feat(goose): honor GOOSE_FAST_MODEL env var in ModelConfig::with_fast (#9296)

Signed-off-by: Vladislav Dobromyslov <vladik.dobrik@gmail.com> Signed-off-by: Douwe Osinga <douwe@squareup.com> Co-authored-by: Douwe Osinga <douwe@squareup.com>
2026-06-02 06:14:27 +02:00 · 2026-05-22 16:47:50 +03:00
parent 603deb88bb
commit 2045e63d8d
2 changed files with 10 additions and 2 deletions
@@ -372,8 +372,12 @@ impl ModelConfig {
        fast_model_name: &str,
        provider_name: &str,
    ) -> Result<Self, ConfigError> {
-        // Create a full ModelConfig for the fast model with proper canonical lookup
-        let fast_config = ModelConfig::new(fast_model_name)?.with_canonical_limits(provider_name);
+        let name = std::env::var("GOOSE_FAST_MODEL")
+            .ok()
+            .map(|v| v.trim().to_string())
+            .filter(|v| !v.is_empty())
+            .unwrap_or_else(|| fast_model_name.to_string());
+        let fast_config = ModelConfig::new(&name)?.with_canonical_limits(provider_name);
        self.fast_model_config = Some(Box::new(fast_config));
        Ok(self)
    }
@@ -18,6 +18,7 @@ These are the minimum required variables to get started with goose.
 |----------|---------|---------|---------|
 | `GOOSE_PROVIDER` | Specifies the LLM provider to use | [See available providers](/docs/getting-started/providers#available-providers) | None (must be [configured](/docs/getting-started/providers#configure-provider-and-model)) |
 | `GOOSE_MODEL` | Specifies which model to use from the provider | Model name (e.g., "gpt-4", "claude-sonnet-4-20250514") | None (must be [configured](/docs/getting-started/providers#configure-provider-and-model)) |
+| `GOOSE_FAST_MODEL` | Overrides the provider's default fast model used for auxiliary calls (tool-selection, classification, session titles) | Model name (e.g., "gpt-4o-mini", "google/gemini-2.5-flash") | Provider-specific default |
 | `GOOSE_TEMPERATURE` | Sets the [temperature](https://medium.com/@kelseyywang/a-comprehensive-guide-to-llm-temperature-%EF%B8%8F-363a40bbc91f) for model responses | Float between 0.0 and 1.0 | Model-specific default |
 | `GOOSE_MAX_TOKENS` | Sets the maximum number of tokens for each model response (truncates longer responses) | Positive integer (e.g., 4096, 8192) | Model-specific default |

@@ -29,6 +30,9 @@ export GOOSE_PROVIDER="anthropic"
 export GOOSE_MODEL="claude-sonnet-4-5-20250929"
 export GOOSE_TEMPERATURE=0.7

+# Override the fast model used for auxiliary calls (tool-selection, classification, etc.)
+export GOOSE_FAST_MODEL="gpt-4o-mini"
+
 # Set a lower limit for shorter interactions
 export GOOSE_MAX_TOKENS=4096