mirror of
https://github.com/p-e-w/heretic.git
synced 2026-06-01 20:58:47 +02:00
docs: improve settings documentation
This commit is contained in:
+6
-3
@@ -1,4 +1,5 @@
|
||||
# Copy this file to config.toml and edit the configuration to your liking.
|
||||
# Rename this file to config.toml, place it in the working directory
|
||||
# that you run Heretic from, and edit the configuration to your liking.
|
||||
|
||||
# List of PyTorch dtypes to try when loading model tensors.
|
||||
# If loading with a dtype fails, the next dtype in the list will be tried.
|
||||
@@ -77,9 +78,11 @@ row_normalization = "none"
|
||||
# larger output files and may slow down evaluation.
|
||||
full_normalization_lora_rank = 3
|
||||
|
||||
# The symmetric winsorization to apply to each layer of the per-prompt residuals,
|
||||
# The symmetric winsorization to apply to the per-prompt, per-layer residual vectors,
|
||||
# expressed as the quantile to clamp to (between 0 and 1). Disabled by default.
|
||||
# Example: winsorization_quantile = 0.95 applies a 95% winsorization.
|
||||
# This can tame so-called "massive activations" that occur in some models.
|
||||
# Example: winsorization_quantile = 0.95 computes the 0.95-quantile of the absolute values
|
||||
# of the components, then clamps the magnitudes of all components to that quantile.
|
||||
winsorization_quantile = 1.0
|
||||
|
||||
# Number of abliteration trials to run during optimization.
|
||||
|
||||
+2
-1
@@ -1,4 +1,5 @@
|
||||
# Copy this file to config.toml and edit the configuration to your liking.
|
||||
# Rename this file to config.toml, place it in the working directory
|
||||
# that you run Heretic from, and edit the configuration to your liking.
|
||||
|
||||
max_response_length = 300
|
||||
|
||||
|
||||
@@ -207,9 +207,11 @@ class Settings(BaseSettings):
|
||||
winsorization_quantile: float = Field(
|
||||
default=1.0,
|
||||
description=(
|
||||
"The symmetric winsorization to apply to each layer of the per-prompt residuals, "
|
||||
"The symmetric winsorization to apply to the per-prompt, per-layer residual vectors, "
|
||||
"expressed as the quantile to clamp to (between 0 and 1). Disabled by default. "
|
||||
"Example: winsorization_quantile = 0.95 applies a 95% winsorization."
|
||||
'This can tame so-called "massive activations" that occur in some models. '
|
||||
"Example: winsorization_quantile = 0.95 computes the 0.95-quantile of the absolute values "
|
||||
"of the components, then clamps the magnitudes of all components to that quantile."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user