docs: improve settings documentation

2026-06-01 20:58:47 +02:00 · 2026-02-11 10:19:05 +05:30
parent 10ceb3098e
commit b873598b77
3 changed files with 12 additions and 6 deletions
@@ -1,4 +1,5 @@
-# Copy this file to config.toml and edit the configuration to your liking.
+# Rename this file to config.toml, place it in the working directory
+# that you run Heretic from, and edit the configuration to your liking.

 # List of PyTorch dtypes to try when loading model tensors.
 # If loading with a dtype fails, the next dtype in the list will be tried.
@@ -77,9 +78,11 @@ row_normalization = "none"
 # larger output files and may slow down evaluation.
 full_normalization_lora_rank = 3

-# The symmetric winsorization to apply to each layer of the per-prompt residuals,
+# The symmetric winsorization to apply to the per-prompt, per-layer residual vectors,
 # expressed as the quantile to clamp to (between 0 and 1). Disabled by default.
-# Example: winsorization_quantile = 0.95 applies a 95% winsorization.
+# This can tame so-called "massive activations" that occur in some models.
+# Example: winsorization_quantile = 0.95 computes the 0.95-quantile of the absolute values
+# of the components, then clamps the magnitudes of all components to that quantile.
 winsorization_quantile = 1.0

 # Number of abliteration trials to run during optimization.
@@ -1,4 +1,5 @@
-# Copy this file to config.toml and edit the configuration to your liking.
+# Rename this file to config.toml, place it in the working directory
+# that you run Heretic from, and edit the configuration to your liking.

 max_response_length = 300

@@ -207,9 +207,11 @@ class Settings(BaseSettings):
    winsorization_quantile: float = Field(
        default=1.0,
        description=(
-            "The symmetric winsorization to apply to each layer of the per-prompt residuals, "
+            "The symmetric winsorization to apply to the per-prompt, per-layer residual vectors, "
            "expressed as the quantile to clamp to (between 0 and 1). Disabled by default. "
-            "Example: winsorization_quantile = 0.95 applies a 95% winsorization."
+            'This can tame so-called "massive activations" that occur in some models. '
+            "Example: winsorization_quantile = 0.95 computes the 0.95-quantile of the absolute values "
+            "of the components, then clamps the magnitudes of all components to that quantile."
        ),
    )