Add unified thinking effort control across all providers (#9242)

Signed-off-by: jh-block <jhugo@block.xyz>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jh-block
2026-05-20 10:42:42 +02:00
committed by GitHub
parent c467e7f998
commit 98a54e9ec6
30 changed files with 2175 additions and 526 deletions
+22 -80
View File
@@ -23,8 +23,6 @@ use goose::model::ModelConfig;
#[cfg(feature = "telemetry")]
use goose::posthog::{get_telemetry_choice, TELEMETRY_ENABLED_KEY};
use goose::providers::base::ConfigKey;
use goose::providers::chatgpt_codex::reasoning_levels_for_model;
use goose::providers::formats::anthropic::supports_adaptive_thinking;
use goose::providers::provider_test::test_provider_configuration;
use goose::providers::{create, providers, retry_operation, RetryConfig};
use goose::session::SessionType;
@@ -738,15 +736,13 @@ pub async fn configure_provider_dialog() -> anyhow::Result<bool> {
let spin = spinner();
spin.start("Attempting to fetch supported models...");
let models_res = {
let temp_model_config =
ModelConfig::new(&provider_meta.default_model)?.with_canonical_limits(provider_name);
let temp_provider = create(provider_name, temp_model_config, Vec::new()).await?;
retry_operation(&RetryConfig::default(), || async {
temp_provider.fetch_recommended_models().await
})
.await
};
let temp_model_config =
ModelConfig::new(&provider_meta.default_model)?.with_canonical_limits(provider_name);
let temp_provider = create(provider_name, temp_model_config, Vec::new()).await?;
let models_res = retry_operation(&RetryConfig::default(), || async {
temp_provider.fetch_recommended_models().await
})
.await;
spin.stop(style("Model fetch complete").green());
// Select a model: on fetch error show styled error and abort; if models available, show list; otherwise free-text input
@@ -766,78 +762,24 @@ pub async fn configure_provider_dialog() -> anyhow::Result<bool> {
}
};
if model.to_lowercase().starts_with("gemini-3") {
let thinking_level: &str = cliclack::select("Select thinking level for Gemini 3:")
.item("low", "Low - Better latency, lighter reasoning", "")
.item("high", "High - Deeper reasoning, higher latency", "")
.interact()?;
config.set_gemini3_thinking_level(thinking_level)?;
}
{
let supports_thinking = match temp_provider.fetch_model_info(&model).await {
Ok(model_info) => model_info.reasoning,
Err(_) => goose::model::ModelConfig::new(&model)
.map(|c| c.is_reasoning_model())
.unwrap_or(false),
};
if model.to_lowercase().starts_with("claude-") {
let supports_adaptive = supports_adaptive_thinking(&model);
let mut thinking_select = cliclack::select("Select extended thinking mode for Claude:");
if supports_adaptive {
thinking_select = thinking_select.item(
"adaptive",
"Adaptive - Claude decides when and how much to think (recommended)",
"",
);
}
thinking_select = thinking_select
.item("enabled", "Enabled - Fixed token budget for thinking", "")
.item("disabled", "Disabled - No extended thinking", "");
if supports_adaptive {
thinking_select = thinking_select.initial_value("adaptive");
} else {
thinking_select = thinking_select.initial_value("disabled");
}
let thinking_type: &str = thinking_select.interact()?;
config.set_claude_thinking_type(thinking_type)?;
if thinking_type == "adaptive" {
let effort: &str = cliclack::select("Select adaptive thinking effort level:")
.item("low", "Low - Minimal thinking, fastest responses", "")
if supports_thinking {
let effort: &str = cliclack::select("Select thinking effort:")
.item("off", "Off - No extended thinking", "")
.item("low", "Low - Better latency, lighter reasoning", "")
.item("medium", "Medium - Moderate thinking", "")
.item("high", "High - Deep reasoning (default)", "")
.item(
"max",
"Max - No constraints on thinking depth (Opus 4.6 only)",
"",
)
.initial_value("high")
.item("high", "High - Deep reasoning", "")
.item("max", "Max - No constraints on thinking depth", "")
.initial_value("off")
.interact()?;
config.set_claude_thinking_effort(effort)?;
} else if thinking_type == "enabled" {
let budget: String = cliclack::input("Enter thinking budget (tokens):")
.default_input("16000")
.validate(|input: &String| match input.parse::<i32>() {
Ok(n) if n > 0 => Ok(()),
_ => Err("Please enter a valid positive number"),
})
.interact()?;
config.set_claude_thinking_budget(budget.parse::<i32>()?)?;
}
}
if provider_name == "chatgpt_codex" {
let valid_levels = reasoning_levels_for_model(&model);
if !valid_levels.is_empty() {
let mut select = cliclack::select("Select reasoning effort level:");
for &level in valid_levels {
let description = match level {
"low" => "Low - Fast responses with lighter reasoning",
"medium" => "Medium - Balances speed and reasoning depth for everyday tasks",
"high" => "High - Greater reasoning depth for complex problems",
"xhigh" => "Extra High - Extra high reasoning depth for complex problems",
_ => "",
};
select = select.item(level, description, "");
}
select = select.initial_value("medium");
let effort: &str = select.interact()?;
config.set_chatgpt_codex_reasoning_effort(effort.to_string())?;
config.set_goose_thinking_effort(effort)?;
}
}
+1
View File
@@ -268,6 +268,7 @@ fn resolve_provider_and_model(
.is_some_and(|mc| mc.model_name == model_name)
{
let mut config = saved_model_config.unwrap();
config.normalize_effort_suffix();
if let Some(temp) = recipe_settings.and_then(|s| s.temperature) {
config = config.with_temperature(Some(temp));
}
+4 -1
View File
@@ -5,7 +5,7 @@ use goose::config::permission::PermissionLevel;
use goose::config::ExtensionEntry;
use goose::conversation::Conversation;
use goose::download_manager::{DownloadProgress, DownloadStatus};
use goose::model::ModelConfig;
use goose::model::{ModelConfig, ThinkingEffort};
use goose::permission::permission_confirmation::{Permission, PrincipalType};
use goose::providers::base::{ConfigKey, ModelInfo, ProviderMetadata, ProviderType};
use goose::session::{Session, SessionInsights, SessionType, SystemInfo};
@@ -397,6 +397,7 @@ derive_utoipa!(IconTheme as IconThemeSchema);
super::routes::config_management::read_all_config,
super::routes::config_management::providers,
super::routes::config_management::get_provider_models,
super::routes::config_management::get_provider_model_info,
super::routes::config_management::get_slash_commands,
super::routes::config_management::upsert_permissions,
super::routes::config_management::create_custom_provider,
@@ -573,6 +574,8 @@ derive_utoipa!(IconTheme as IconThemeSchema);
PrincipalType,
ModelInfo,
ModelConfig,
ThinkingEffort,
super::routes::config_management::ProviderModelInfoQuery,
Session,
goose::config::goose_mode::GooseMode,
SessionInsights,
+11 -3
View File
@@ -1,3 +1,4 @@
use crate::routes::config_management::resolve_provider_model_info;
use crate::routes::errors::ErrorResponse;
use crate::routes::recipe_utils::{
apply_recipe_to_agent, build_recipe_with_parameter_values, load_recipe_by_id, validate_recipe,
@@ -595,7 +596,7 @@ async fn update_agent_provider(
}
};
let model_config = ModelConfig::new(&model)
let mut model_config = ModelConfig::new(&model)
.map_err(|e| {
(
StatusCode::BAD_REQUEST,
@@ -603,8 +604,15 @@ async fn update_agent_provider(
)
})?
.with_canonical_limits(&payload.provider)
.with_context_limit(payload.context_limit)
.with_request_params(payload.request_params);
.with_context_limit(payload.context_limit);
if let Some(request_params) = payload.request_params {
model_config = model_config.with_merged_request_params(request_params);
}
let model_info = resolve_provider_model_info(&payload.provider, &model)
.await
.map_err(|e| (e.status, e.message))?;
model_config.reasoning = Some(model_info.reasoning);
let extensions =
EnabledExtensionsState::for_session(state.session_manager(), &payload.session_id, config)
@@ -13,7 +13,7 @@ use goose::config::ExtensionEntry;
use goose::config::{Config, ConfigError};
use goose::custom_requests::SourceType;
use goose::model::ModelConfig;
use goose::providers::base::{ProviderMetadata, ProviderType};
use goose::providers::base::{ModelInfo, ProviderMetadata, ProviderType};
use goose::providers::canonical::maybe_get_canonical_model;
use goose::providers::catalog::{
get_provider_template, get_providers_by_format, ProviderCatalogEntry, ProviderFormat,
@@ -418,7 +418,7 @@ pub async fn providers() -> Result<Json<Vec<ProviderDetails>>, ErrorResponse> {
("name" = String, Path, description = "Provider name (e.g., openai)")
),
responses(
(status = 200, description = "Models fetched successfully", body = [String]),
(status = 200, description = "Models fetched successfully", body = [ModelInfo]),
(status = 400, description = "Unknown provider, provider not configured, or authentication error"),
(status = 429, description = "Rate limit exceeded"),
(status = 500, description = "Internal server error")
@@ -426,7 +426,7 @@ pub async fn providers() -> Result<Json<Vec<ProviderDetails>>, ErrorResponse> {
)]
pub async fn get_provider_models(
Path(name): Path<String>,
) -> Result<Json<Vec<String>>, ErrorResponse> {
) -> Result<Json<Vec<ModelInfo>>, ErrorResponse> {
let all = get_providers().await.into_iter().collect::<Vec<_>>();
let Some((metadata, provider_type)) = all.into_iter().find(|(m, _)| m.name == name) else {
return Err(ErrorResponse::bad_request(format!(
@@ -444,7 +444,7 @@ pub async fn get_provider_models(
let model_config = ModelConfig::new(&metadata.default_model)?.with_canonical_limits(&name);
let provider = goose::providers::create(&name, model_config, Vec::new()).await?;
let models_result = provider.fetch_recommended_models().await;
let models_result = provider.fetch_recommended_model_info().await;
match models_result {
Ok(models) => Ok(Json(models)),
@@ -452,6 +452,70 @@ pub async fn get_provider_models(
}
}
#[derive(Deserialize, ToSchema)]
pub struct ProviderModelInfoQuery {
pub model: String,
}
pub async fn resolve_provider_model_info(
name: &str,
model: &str,
) -> Result<ModelInfo, ErrorResponse> {
let all = get_providers().await.into_iter().collect::<Vec<_>>();
let Some((metadata, provider_type)) = all.into_iter().find(|(m, _)| m.name == name) else {
return Err(ErrorResponse::bad_request(format!(
"Unknown provider: {}",
name
)));
};
if !check_provider_configured(&metadata, provider_type) {
return Err(ErrorResponse::bad_request(format!(
"Provider '{}' is not configured",
name
)));
}
let model_config = ModelConfig::new(model)?.with_canonical_limits(name);
let provider = goose::providers::create(name, model_config.clone(), Vec::new()).await?;
match provider.fetch_model_info(model).await {
Ok(info) => Ok(info),
Err(error) => {
let mut info = ModelInfo::new(model, model_config.context_limit());
info.reasoning = model_config.is_reasoning_model();
tracing::debug!(
provider = name,
model,
error = %error,
"Falling back to local model metadata"
);
Ok(info)
}
}
}
#[utoipa::path(
post,
path = "/config/providers/{name}/model-info",
params(
("name" = String, Path, description = "Provider name (e.g., openai)")
),
request_body = ProviderModelInfoQuery,
responses(
(status = 200, description = "Model metadata fetched successfully", body = ModelInfo),
(status = 400, description = "Unknown provider, provider not configured, or authentication error"),
(status = 429, description = "Rate limit exceeded"),
(status = 500, description = "Internal server error")
)
)]
pub async fn get_provider_model_info(
Path(name): Path<String>,
Json(query): Json<ProviderModelInfoQuery>,
) -> Result<Json<ModelInfo>, ErrorResponse> {
resolve_provider_model_info(&name, &query.model)
.await
.map(Json)
}
#[derive(Deserialize, utoipa::IntoParams)]
pub struct SlashCommandsQuery {
/// Optional working directory to discover local skills from
@@ -523,6 +587,7 @@ pub struct ModelInfoData {
pub model: String,
pub context_limit: usize,
pub max_output_tokens: Option<usize>,
pub reasoning: bool,
pub input_token_cost: Option<f64>,
pub output_token_cost: Option<f64>,
pub cache_read_token_cost: Option<f64>,
@@ -560,6 +625,9 @@ pub async fn get_canonical_model_info(
model: query.model.clone(),
context_limit: canonical_model.limit.context,
max_output_tokens: canonical_model.limit.output,
reasoning: canonical_model
.reasoning
.unwrap_or_else(|| ModelConfig::new_or_fail(&query.model).is_reasoning_model()),
// Costs are per million tokens - client handles division for display
input_token_cost: canonical_model.cost.input,
output_token_cost: canonical_model.cost.output,
@@ -926,6 +994,10 @@ pub fn routes(state: Arc<AppState>) -> Router {
.route("/config/extensions/{name}", delete(remove_extension))
.route("/config/providers", get(providers))
.route("/config/providers/{name}/models", get(get_provider_models))
.route(
"/config/providers/{name}/model-info",
post(get_provider_model_info),
)
.route("/config/provider-catalog", get(get_provider_catalog))
.route(
"/config/provider-catalog/{id}",
+40 -4
View File
@@ -1039,6 +1039,34 @@ async fn resolve_provider_and_model_from_config(
Ok((provider_name, model_config))
}
fn with_preserved_session_request_params(
mut model_config: crate::model::ModelConfig,
current_model_config: Option<&crate::model::ModelConfig>,
request_params: Option<HashMap<String, serde_json::Value>>,
) -> crate::model::ModelConfig {
let has_model_effort = model_config
.request_params
.as_ref()
.and_then(|params| params.get("thinking_effort"))
.is_some();
if !has_model_effort {
if let Some(thinking_effort) = current_model_config
.and_then(|config| config.request_params.as_ref())
.and_then(|params| params.get("thinking_effort"))
.cloned()
{
model_config = model_config.with_merged_request_params(HashMap::from([(
"thinking_effort".into(),
thinking_effort,
)]));
}
}
if let Some(request_params) = request_params {
model_config = model_config.with_merged_request_params(request_params);
}
model_config
}
/// Convenience wrapper: reads config from disk, then resolves provider + model.
/// Cheap enough to call from `on_new_session` (file + registry reads, no network).
async fn resolve_provider_and_model(
@@ -3223,11 +3251,14 @@ impl GooseAcpAgent {
.await
.internal_err_ctx("Failed to get provider")?;
let provider_name = current_provider.get_name().to_string();
let current_model_config = current_provider.get_model_config();
let extensions =
EnabledExtensionsState::for_session(&self.session_manager, session_id, &config).await;
let model_config = crate::model::ModelConfig::new(model_id)
.invalid_params_err_ctx("Invalid model config")?
.with_canonical_limits(&provider_name);
let model_config =
with_preserved_session_request_params(model_config, Some(&current_model_config), None);
let session = self
.session_manager
.get_session(session_id, false)
@@ -3333,7 +3364,8 @@ impl GooseAcpAgent {
.await
.internal_err_ctx("Failed to get provider")?;
let current_provider_name = current_provider.get_name();
let current_model = current_provider.get_model_config().model_name;
let current_model_config = current_provider.get_model_config();
let current_model = current_model_config.model_name.clone();
let has_default_overrides =
model_name.is_some() || context_limit.is_some() || request_params.is_some();
let use_default_provider = provider_name == DEFAULT_PROVIDER_ID;
@@ -3357,11 +3389,15 @@ impl GooseAcpAgent {
current_model
};
let model = model_name.unwrap_or(&default_model);
let model_config = crate::model::ModelConfig::new(model)
let mut model_config = crate::model::ModelConfig::new(model)
.invalid_params_err_ctx("Invalid model config")?
.with_canonical_limits(&resolved_provider_name)
.with_context_limit(context_limit)
.with_request_params(request_params);
.with_context_limit(context_limit);
model_config = with_preserved_session_request_params(
model_config,
(!is_changing_provider).then_some(&current_model_config),
request_params,
);
let extensions =
EnabledExtensionsState::for_session(&self.session_manager, session_id, &config).await;
+40 -5
View File
@@ -1098,7 +1098,6 @@ config_value!(CLAUDE_CODE_COMMAND, String, "claude");
config_value!(GEMINI_CLI_COMMAND, String, "gemini");
config_value!(CURSOR_AGENT_COMMAND, String, "cursor-agent");
config_value!(CODEX_COMMAND, String, "codex");
config_value!(CODEX_REASONING_EFFORT, String, "high");
config_value!(CODEX_ENABLE_SKILLS, String, "true");
config_value!(CODEX_SKIP_GIT_CHECK, String, "false");
config_value!(CHATGPT_CODEX_REASONING_EFFORT, String, "medium");
@@ -1137,12 +1136,48 @@ config_value!(GOOSE_PROMPT_EDITOR_ALWAYS, Option<bool>);
config_value!(GOOSE_MAX_ACTIVE_AGENTS, usize);
config_value!(GOOSE_DISABLE_SESSION_NAMING, bool);
config_value!(GOOSE_DISABLE_TOOL_CALL_SUMMARY, bool);
config_value!(GEMINI3_THINKING_LEVEL, String);
config_value!(CLAUDE_THINKING_TYPE, String);
config_value!(CLAUDE_THINKING_EFFORT, String);
config_value!(CLAUDE_THINKING_BUDGET, i32);
config_value!(GOOSE_THINKING_EFFORT, String);
config_value!(GOOSE_DEFAULT_EXTENSION_TIMEOUT, u64);
fn find_workspace_or_exe_root() -> Option<PathBuf> {
let exe = std::env::current_exe().ok()?;
let exe_dir = exe.parent()?.to_path_buf();
let mut path = exe;
while let Some(parent) = path.parent() {
let cargo_toml = parent.join("Cargo.toml");
if cargo_toml.exists() {
if let Ok(content) = std::fs::read_to_string(&cargo_toml) {
if content.contains("[workspace]") {
return Some(parent.to_path_buf());
}
}
}
path = parent.to_path_buf();
}
Some(exe_dir)
}
pub fn load_init_config_from_workspace() -> Result<Mapping, ConfigError> {
let root = find_workspace_or_exe_root().ok_or_else(|| {
ConfigError::FileError(std::io::Error::new(
std::io::ErrorKind::NotFound,
"Could not determine executable path",
))
})?;
let init_config_path = root.join("init-config.yaml");
if !init_config_path.exists() {
return Err(ConfigError::NotFound(
"init-config.yaml not found".to_string(),
));
}
let init_content = std::fs::read_to_string(&init_config_path)?;
parse_yaml_content(&init_content)
}
#[cfg(test)]
mod tests {
use super::*;
+455 -14
View File
@@ -1,12 +1,51 @@
use once_cell::sync::Lazy;
use serde::de::Deserializer;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use std::fmt;
use std::str::FromStr;
use thiserror::Error;
use utoipa::ToSchema;
pub const DEFAULT_CONTEXT_LIMIT: usize = 128_000;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "lowercase")]
pub enum ThinkingEffort {
Off,
Low,
Medium,
High,
Max,
}
impl FromStr for ThinkingEffort {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"off" | "disabled" | "none" => Ok(Self::Off),
"low" => Ok(Self::Low),
"medium" | "med" => Ok(Self::Medium),
"high" => Ok(Self::High),
"max" | "xhigh" => Ok(Self::Max),
other => Err(format!("unknown thinking effort: '{other}'")),
}
}
}
impl fmt::Display for ThinkingEffort {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Off => write!(f, "off"),
Self::Low => write!(f, "low"),
Self::Medium => write!(f, "medium"),
Self::High => write!(f, "high"),
Self::Max => write!(f, "max"),
}
}
}
#[derive(Debug, Clone, Deserialize)]
struct PredefinedModel {
name: String,
@@ -44,7 +83,7 @@ pub enum ConfigError {
InvalidRange(String, String),
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
#[derive(Debug, Clone, Default, Serialize, ToSchema)]
pub struct ModelConfig {
pub model_name: String,
pub context_limit: Option<usize>,
@@ -61,6 +100,44 @@ pub struct ModelConfig {
pub reasoning: Option<bool>,
}
impl<'de> Deserialize<'de> for ModelConfig {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
struct RawModelConfig {
model_name: String,
context_limit: Option<usize>,
temperature: Option<f32>,
max_tokens: Option<i32>,
toolshim: bool,
toolshim_model: Option<String>,
#[serde(default)]
fast_model_config: Option<Box<ModelConfig>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
request_params: Option<HashMap<String, Value>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
reasoning: Option<bool>,
}
let raw = RawModelConfig::deserialize(deserializer)?;
let mut config = Self {
model_name: raw.model_name,
context_limit: raw.context_limit,
temperature: raw.temperature,
max_tokens: raw.max_tokens,
toolshim: raw.toolshim,
toolshim_model: raw.toolshim_model,
fast_model_config: raw.fast_model_config,
request_params: raw.request_params,
reasoning: raw.reasoning,
};
config.normalize_effort_suffix();
Ok(config)
}
}
impl ModelConfig {
pub fn new(model_name: &str) -> Result<Self, ConfigError> {
Self::new_base(model_name.to_string(), None)
@@ -114,13 +191,14 @@ impl ModelConfig {
let toolshim = Self::parse_toolshim()?;
let toolshim_model = Self::parse_toolshim_model()?;
// Pick up request_params from predefined models (always applies)
// Pick up predefined model settings before legacy suffix normalization.
let predefined = find_predefined_model(&model_name);
let predefined_context_limit = predefined.as_ref().and_then(|pm| pm.context_limit);
let request_params = predefined.and_then(|pm| pm.request_params);
Ok(Self {
let mut config = Self {
model_name,
context_limit,
context_limit: context_limit.or(predefined_context_limit),
temperature,
max_tokens,
toolshim,
@@ -128,7 +206,9 @@ impl ModelConfig {
fast_model_config: None,
request_params,
reasoning: None,
})
};
config.normalize_effort_suffix();
Ok(config)
}
pub fn with_canonical_limits(mut self, provider_name: &str) -> Self {
@@ -298,8 +378,17 @@ impl ModelConfig {
Ok(self)
}
pub fn with_request_params(mut self, params: Option<HashMap<String, Value>>) -> Self {
self.request_params = params;
pub fn with_merged_request_params(mut self, params: HashMap<String, Value>) -> Self {
match self.request_params.as_mut() {
Some(existing) => {
for (k, v) in params {
existing.insert(k, v);
}
}
None => {
self.request_params = Some(params);
}
}
self
}
@@ -319,6 +408,21 @@ impl ModelConfig {
crate::providers::utils::is_openai_responses_model(&self.model_name)
}
pub fn is_reasoning_model(&self) -> bool {
if let Some(reasoning) = self.reasoning {
return reasoning;
}
self.is_openai_reasoning_model()
|| self.model_name.to_lowercase().contains("claude")
|| Self::is_gemini3_reasoning_model_name(&self.model_name)
}
fn is_gemini3_reasoning_model_name(model_name: &str) -> bool {
let lower = model_name.to_lowercase();
lower.starts_with("gemini-3") || lower.contains("/gemini-3") || lower.contains("-gemini-3")
}
pub fn max_output_tokens(&self) -> i32 {
if let Some(tokens) = self.max_tokens {
return tokens;
@@ -327,6 +431,82 @@ impl ModelConfig {
4_096
}
pub fn normalize_effort_suffix(&mut self) {
if !self.is_openai_reasoning_model() {
return;
}
let parts: Vec<&str> = self.model_name.split('-').collect();
let last = match parts.last() {
Some(l) => *l,
None => return,
};
let effort = match last {
"none" => ThinkingEffort::Off,
"low" => ThinkingEffort::Low,
"medium" => ThinkingEffort::Medium,
"high" => ThinkingEffort::High,
"xhigh" => ThinkingEffort::Max,
_ => return,
};
self.model_name = parts[..parts.len() - 1].join("-");
let has_explicit_effort = self
.request_params
.as_ref()
.and_then(|p| p.get("thinking_effort"))
.is_some();
if !has_explicit_effort {
let params = self.request_params.get_or_insert_with(HashMap::new);
params.insert(
"thinking_effort".to_string(),
serde_json::json!(effort.to_string()),
);
}
}
pub fn thinking_effort(&self) -> Option<ThinkingEffort> {
self.get_config_param::<String>("thinking_effort", "GOOSE_THINKING_EFFORT")
.and_then(|s| s.parse::<ThinkingEffort>().ok())
.or_else(Self::legacy_thinking_effort)
}
fn legacy_thinking_effort() -> Option<ThinkingEffort> {
let config = crate::config::Config::global();
if let Ok(value) = config.get_param::<String>("CLAUDE_THINKING_TYPE") {
if let Some(effort) = match value.to_lowercase().as_str() {
"adaptive" | "enabled" => Some(ThinkingEffort::High),
"disabled" => Some(ThinkingEffort::Off),
_ => None,
} {
return Some(effort);
}
}
if let Ok(enabled) = config.get_param::<bool>("CLAUDE_THINKING_ENABLED") {
return Some(if enabled {
ThinkingEffort::High
} else {
ThinkingEffort::Off
});
}
if let Ok(value) = config.get_param::<String>("GEMINI3_THINKING_LEVEL") {
if let Some(effort) = Self::legacy_gemini3_thinking_effort(&value) {
return Some(effort);
}
}
None
}
fn legacy_gemini3_thinking_effort(value: &str) -> Option<ThinkingEffort> {
match value.to_lowercase().as_str() {
"low" => Some(ThinkingEffort::Low),
"high" => Some(ThinkingEffort::High),
_ => None,
}
}
pub fn get_config_param<T: for<'de> serde::Deserialize<'de>>(
&self,
request_key: &str,
@@ -419,13 +599,10 @@ mod tests {
#[test]
fn test_get_config_param() {
let _guard = env_lock::lock_env([
("CLAUDE_THINKING_EFFORT", Some("high")),
("CLAUDE_THINKING_TYPE", None::<&str>),
]);
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]);
let mut params = HashMap::new();
params.insert("effort".to_string(), serde_json::json!("low"));
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
let config_with_params = ModelConfig {
model_name: "test".to_string(),
@@ -439,11 +616,13 @@ mod tests {
};
assert_eq!(
config_with_params.get_config_param::<String>("effort", "CLAUDE_THINKING_EFFORT"),
config_with_params
.get_config_param::<String>("thinking_effort", "GOOSE_THINKING_EFFORT"),
Some("low".to_string())
);
assert_eq!(
config_without_params.get_config_param::<String>("effort", "CLAUDE_THINKING_EFFORT"),
config_without_params
.get_config_param::<String>("thinking_effort", "GOOSE_THINKING_EFFORT"),
Some("high".to_string())
);
assert_eq!(
@@ -453,6 +632,236 @@ mod tests {
);
}
#[test]
fn test_deserialize_preserves_fast_model_config() {
let config: ModelConfig = serde_json::from_value(serde_json::json!({
"model_name": "primary-model",
"context_limit": null,
"temperature": null,
"max_tokens": null,
"toolshim": false,
"toolshim_model": null,
"fast_model_config": {
"model_name": "fast-model",
"context_limit": 4096,
"temperature": null,
"max_tokens": 1024,
"toolshim": false,
"toolshim_model": null
}
}))
.unwrap();
let fast_config = config.fast_model_config.as_ref().unwrap();
assert_eq!(fast_config.model_name, "fast-model");
assert_eq!(fast_config.context_limit, Some(4096));
assert_eq!(fast_config.max_tokens, Some(1024));
assert_eq!(config.use_fast_model().model_name, "fast-model");
}
mod thinking_effort_tests {
use super::*;
#[test]
fn from_request_params() {
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]);
let mut params = HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("medium"));
let config = ModelConfig {
model_name: "test".to_string(),
request_params: Some(params),
..Default::default()
};
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Medium));
}
#[test]
fn from_env_var() {
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]);
let config = ModelConfig {
model_name: "test".to_string(),
..Default::default()
};
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High));
}
#[test]
fn request_params_override_env() {
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]);
let mut params = HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
let config = ModelConfig {
model_name: "test".to_string(),
request_params: Some(params),
..Default::default()
};
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Low));
}
#[test]
fn legacy_claude_thinking_type_fallback() {
for value in ["enabled", "adaptive"] {
let _guard = env_lock::lock_env([
("GOOSE_THINKING_EFFORT", None::<&str>),
("CLAUDE_THINKING_TYPE", Some(value)),
("CLAUDE_THINKING_ENABLED", None::<&str>),
("GEMINI3_THINKING_LEVEL", None::<&str>),
("ANTHROPIC_THINKING_BUDGET", None::<&str>),
("CLAUDE_THINKING_BUDGET", None::<&str>),
("GEMINI25_THINKING_BUDGET", None::<&str>),
]);
let config = ModelConfig {
model_name: "test".to_string(),
..Default::default()
};
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High));
}
}
#[test]
fn legacy_gemini3_thinking_level_mapping() {
assert_eq!(
ModelConfig::legacy_gemini3_thinking_effort("low"),
Some(ThinkingEffort::Low)
);
assert_eq!(
ModelConfig::legacy_gemini3_thinking_effort("high"),
Some(ThinkingEffort::High)
);
assert_eq!(ModelConfig::legacy_gemini3_thinking_effort("auto"), None);
}
#[test]
fn legacy_gemini3_thinking_level_fallback() {
let temp_dir = tempfile::tempdir().unwrap();
let temp_root = temp_dir.path().to_string_lossy().to_string();
let _guard = env_lock::lock_env([
("GOOSE_PATH_ROOT", Some(temp_root.as_str())),
("GOOSE_THINKING_EFFORT", None::<&str>),
("CLAUDE_THINKING_TYPE", None::<&str>),
("CLAUDE_THINKING_ENABLED", None::<&str>),
("GEMINI3_THINKING_LEVEL", Some("high")),
("ANTHROPIC_THINKING_BUDGET", None::<&str>),
("CLAUDE_THINKING_BUDGET", None::<&str>),
("GEMINI25_THINKING_BUDGET", None::<&str>),
]);
let config = ModelConfig {
model_name: "gemini-3-pro".to_string(),
..Default::default()
};
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High));
}
#[test]
fn effort_suffix_stripped_from_model_name() {
let _guard = env_lock::lock_env([
("GOOSE_THINKING_EFFORT", None::<&str>),
("GOOSE_MAX_TOKENS", None::<&str>),
("GOOSE_TEMPERATURE", None::<&str>),
("GOOSE_CONTEXT_LIMIT", None::<&str>),
("GOOSE_TOOLSHIM", None::<&str>),
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
]);
let config = ModelConfig::new("o3-mini-high").unwrap();
assert_eq!(config.model_name, "o3-mini");
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High));
}
#[test]
fn none_suffix_stripped_from_model_name() {
let _guard = env_lock::lock_env([
("GOOSE_THINKING_EFFORT", Some("high")),
("GOOSE_MAX_TOKENS", None::<&str>),
("GOOSE_TEMPERATURE", None::<&str>),
("GOOSE_CONTEXT_LIMIT", None::<&str>),
("GOOSE_TOOLSHIM", None::<&str>),
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
]);
let config = ModelConfig::new("o3-mini-none").unwrap();
assert_eq!(config.model_name, "o3-mini");
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Off));
}
#[test]
fn xhigh_suffix_stripped_from_model_name() {
let _guard = env_lock::lock_env([
("GOOSE_THINKING_EFFORT", Some("low")),
("GOOSE_MAX_TOKENS", None::<&str>),
("GOOSE_TEMPERATURE", None::<&str>),
("GOOSE_CONTEXT_LIMIT", None::<&str>),
("GOOSE_TOOLSHIM", None::<&str>),
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
]);
let config = ModelConfig::new("gpt-5.4-xhigh").unwrap();
assert_eq!(config.model_name, "gpt-5.4");
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Max));
}
#[test]
fn effort_suffix_not_stripped_when_thinking_effort_set() {
let _guard = env_lock::lock_env([
("GOOSE_THINKING_EFFORT", None::<&str>),
("GOOSE_MAX_TOKENS", None::<&str>),
("GOOSE_TEMPERATURE", None::<&str>),
("GOOSE_CONTEXT_LIMIT", None::<&str>),
("GOOSE_TOOLSHIM", None::<&str>),
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
]);
let mut params = HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
let mut config = ModelConfig::new("o3-mini-high").unwrap();
// Suffix was already normalized during new(), but if request_params
// were set before construction, the suffix would not be stripped.
// Verify the normalized state:
assert_eq!(config.model_name, "o3-mini");
// Now simulate setting explicit effort after construction
config.request_params = Some(params);
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Low));
}
#[test]
fn no_suffix_no_change() {
let _guard = env_lock::lock_env([
("GOOSE_THINKING_EFFORT", None::<&str>),
("GOOSE_MAX_TOKENS", None::<&str>),
("GOOSE_TEMPERATURE", None::<&str>),
("GOOSE_CONTEXT_LIMIT", None::<&str>),
("GOOSE_TOOLSHIM", None::<&str>),
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
]);
let config = ModelConfig::new("o3-mini").unwrap();
assert_eq!(config.model_name, "o3-mini");
}
#[test]
fn non_reasoning_model_suffix_not_stripped() {
let _guard = env_lock::lock_env([
("GOOSE_THINKING_EFFORT", None::<&str>),
("GOOSE_MAX_TOKENS", None::<&str>),
("GOOSE_TEMPERATURE", None::<&str>),
("GOOSE_CONTEXT_LIMIT", None::<&str>),
("GOOSE_TOOLSHIM", None::<&str>),
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
]);
let config = ModelConfig::new("claude-sonnet-4-high").unwrap();
assert_eq!(config.model_name, "claude-sonnet-4-high");
}
#[test]
fn parse_aliases() {
assert_eq!("off".parse::<ThinkingEffort>(), Ok(ThinkingEffort::Off));
assert_eq!(
"disabled".parse::<ThinkingEffort>(),
Ok(ThinkingEffort::Off)
);
assert_eq!("med".parse::<ThinkingEffort>(), Ok(ThinkingEffort::Medium));
assert_eq!("max".parse::<ThinkingEffort>(), Ok(ThinkingEffort::Max));
assert_eq!("xhigh".parse::<ThinkingEffort>(), Ok(ThinkingEffort::Max));
assert!("invalid".parse::<ThinkingEffort>().is_err());
}
}
mod with_canonical_limits {
use super::*;
@@ -597,4 +1006,36 @@ mod tests {
assert!(!ModelConfig::new_or_fail("llama-3-70b").is_openai_reasoning_model());
}
}
mod is_reasoning_model {
use super::*;
const ENV_LOCK_KEYS: [(&str, Option<&str>); 5] = [
("GOOSE_MAX_TOKENS", None),
("GOOSE_TEMPERATURE", None),
("GOOSE_CONTEXT_LIMIT", None),
("GOOSE_TOOLSHIM", None),
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None),
];
#[test]
fn includes_reasoning_model_families() {
let _guard = env_lock::lock_env(ENV_LOCK_KEYS);
assert!(ModelConfig::new_or_fail("o3-mini").is_reasoning_model());
assert!(ModelConfig::new_or_fail("claude-sonnet-4").is_reasoning_model());
assert!(ModelConfig::new_or_fail("gemini-3-pro").is_reasoning_model());
}
#[test]
fn uses_explicit_metadata_first() {
let _guard = env_lock::lock_env(ENV_LOCK_KEYS);
let mut config = ModelConfig::new_or_fail("provider-alias");
config.reasoning = Some(true);
assert!(config.is_reasoning_model());
let mut config = ModelConfig::new_or_fail("claude-sonnet-4");
config.reasoning = Some(false);
assert!(!config.is_reasoning_model());
}
}
}
+57 -10
View File
@@ -395,6 +395,9 @@ pub struct ModelInfo {
pub currency: Option<String>,
/// Whether this model supports cache control
pub supports_cache_control: Option<bool>,
/// Whether this model supports reasoning/thinking controls
#[serde(default)]
pub reasoning: bool,
}
impl ModelInfo {
@@ -407,6 +410,7 @@ impl ModelInfo {
output_token_cost: None,
currency: None,
supports_cache_control: None,
reasoning: false,
}
}
@@ -424,10 +428,37 @@ impl ModelInfo {
output_token_cost: Some(output_cost),
currency: Some("$".to_string()),
supports_cache_control: None,
reasoning: false,
}
}
}
fn model_info_for_provider_model(provider_name: &str, model_name: &str) -> ModelInfo {
let registry = CanonicalModelRegistry::bundled().ok();
let canonical = registry.as_ref().and_then(|registry| {
let canonical_id = map_to_canonical_model(provider_name, model_name, registry)?;
let (provider, model) = canonical_id.split_once('/')?;
registry.get(provider, model)
});
let reasoning = canonical
.as_ref()
.and_then(|model| model.reasoning)
.unwrap_or_else(|| ModelConfig::new_or_fail(model_name).is_reasoning_model());
ModelInfo {
name: model_name.to_string(),
context_limit: ModelConfig::new_or_fail(model_name)
.with_canonical_limits(provider_name)
.context_limit(),
input_token_cost: None,
output_token_cost: None,
currency: None,
supports_cache_control: None,
reasoning,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
pub enum ProviderType {
Preferred,
@@ -478,16 +509,7 @@ impl ProviderMetadata {
default_model: default_model.to_string(),
known_models: model_names
.iter()
.map(|&model_name| ModelInfo {
name: model_name.to_string(),
context_limit: ModelConfig::new_or_fail(model_name)
.with_canonical_limits(name)
.context_limit(),
input_token_cost: None,
output_token_cost: None,
currency: None,
supports_cache_control: None,
})
.map(|&model_name| model_info_for_provider_model(name, model_name))
.collect(),
model_doc_link: model_doc_link.to_string(),
config_keys,
@@ -917,6 +939,19 @@ pub trait Provider: Send + Sync {
Ok(vec![])
}
async fn fetch_supported_model_info(&self) -> Result<Vec<ModelInfo>, ProviderError> {
Ok(self
.fetch_supported_models()
.await?
.iter()
.map(|model_name| model_info_for_provider_model(self.get_name(), model_name))
.collect())
}
async fn fetch_model_info(&self, model_name: &str) -> Result<ModelInfo, ProviderError> {
Ok(model_info_for_provider_model(self.get_name(), model_name))
}
fn skip_canonical_filtering(&self) -> bool {
false
}
@@ -982,6 +1017,15 @@ pub trait Provider: Send + Sync {
}
}
async fn fetch_recommended_model_info(&self) -> Result<Vec<ModelInfo>, ProviderError> {
Ok(self
.fetch_recommended_models()
.await?
.iter()
.map(|model_name| model_info_for_provider_model(self.get_name(), model_name))
.collect())
}
async fn map_to_canonical_model(
&self,
provider_model: &str,
@@ -1739,6 +1783,7 @@ mod tests {
output_token_cost: None,
currency: None,
supports_cache_control: None,
reasoning: false,
};
assert_eq!(info.context_limit, 1000);
@@ -1750,6 +1795,7 @@ mod tests {
output_token_cost: None,
currency: None,
supports_cache_control: None,
reasoning: false,
};
assert_eq!(info, info2);
@@ -1761,6 +1807,7 @@ mod tests {
output_token_cost: None,
currency: None,
supports_cache_control: None,
reasoning: false,
};
assert_ne!(info, info3);
}
+67 -2
View File
@@ -229,6 +229,29 @@ fn get_reasoning_effort(model_name: &str) -> String {
}
}
fn reasoning_effort_for_config(model_config: &ModelConfig) -> Option<String> {
use crate::model::ThinkingEffort;
model_config
.thinking_effort()
.map(|effort| {
let valid_levels = reasoning_levels_for_model(&model_config.model_name);
let preferred_levels: &[&str] = match effort {
ThinkingEffort::Off => return None,
ThinkingEffort::Low => &["low", "medium", "high", "xhigh"],
ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"],
ThinkingEffort::High => &["high", "medium", "xhigh", "low"],
ThinkingEffort::Max => &["xhigh", "high", "medium", "low"],
};
preferred_levels
.iter()
.find(|level| valid_levels.contains(level))
.map(|level| (*level).to_string())
})
.unwrap_or_else(|| Some(get_reasoning_effort(&model_config.model_name)))
}
fn create_codex_request(
model_config: &ModelConfig,
system: &str,
@@ -236,7 +259,7 @@ fn create_codex_request(
tools: &[Tool],
) -> Result<Value> {
let input_items = build_input_items(messages)?;
let reasoning_effort = get_reasoning_effort(&model_config.model_name);
let reasoning_effort = reasoning_effort_for_config(model_config);
let instructions = match model_config.model_name.as_str() {
"gpt-5.3-codex" => format!("{GPT_53_CODEX_TOOL_PREAMBLE}\n\n{system}"),
@@ -247,7 +270,6 @@ fn create_codex_request(
"model": model_config.model_name,
"input": input_items,
"store": false,
"reasoning": {"effort": reasoning_effort},
"instructions": instructions,
});
@@ -277,6 +299,13 @@ fn create_codex_request(
payload_obj.insert("temperature".to_string(), json!(temp));
}
if let Some(reasoning_effort) = reasoning_effort {
payload_obj.insert(
"reasoning".to_string(),
json!({ "effort": reasoning_effort }),
);
}
Ok(payload)
}
@@ -1177,6 +1206,42 @@ mod tests {
);
}
#[test]
fn test_create_codex_request_reasoning_effort_from_unified_thinking() {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), json!("max"));
let mut config = ModelConfig::new("gpt-5.3-codex").unwrap();
config.request_params = Some(params);
let payload = create_codex_request(&config, "sys", &[], &[]).unwrap();
assert_eq!(payload["reasoning"]["effort"], "xhigh");
assert!(payload.get("reasoning_effort").is_none());
}
#[test]
fn test_create_codex_request_caps_unified_thinking_to_supported_level() {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), json!("max"));
let mut config = ModelConfig::new("unknown-model").unwrap();
config.request_params = Some(params);
let payload = create_codex_request(&config, "sys", &[], &[]).unwrap();
assert_eq!(payload["reasoning"]["effort"], "high");
assert!(payload.get("reasoning_effort").is_none());
}
#[test]
fn test_create_codex_request_off_omits_reasoning_for_codex_models() {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), json!("off"));
let mut config = ModelConfig::new("gpt-5.2-codex").unwrap();
config.request_params = Some(params);
let payload = create_codex_request(&config, "sys", &[], &[]).unwrap();
assert!(payload.get("reasoning").is_none());
assert!(payload.get("reasoning_effort").is_none());
}
#[test_case(
JwtClaims {
chatgpt_account_id: Some("account-1".to_string()),
+92 -46
View File
@@ -16,7 +16,7 @@ use super::base::{
};
use super::errors::ProviderError;
use super::utils::{filter_extensions_from_system_prompt, RequestLog};
use crate::config::base::{CodexCommand, CodexReasoningEffort, CodexSkipGitCheck};
use crate::config::base::{CodexCommand, CodexSkipGitCheck};
use crate::config::paths::Paths;
use crate::config::search_path::SearchPaths;
use crate::config::{Config, ExtensionConfig, GooseMode};
@@ -50,7 +50,7 @@ pub struct CodexProvider {
#[serde(skip)]
name: String,
/// Reasoning effort level (none, low, medium, high, xhigh)
reasoning_effort: String,
reasoning_effort: Option<String>,
/// Whether to skip git repo check
skip_git_check: bool,
/// CLI config overrides for MCP servers
@@ -60,12 +60,40 @@ pub struct CodexProvider {
}
impl CodexProvider {
fn supports_reasoning_effort(model_name: &str, reasoning_effort: &str) -> bool {
if !CODEX_REASONING_LEVELS.contains(&reasoning_effort) {
return false;
}
fn legacy_reasoning_effort() -> Option<crate::model::ThinkingEffort> {
Config::global()
.get_param::<String>("CODEX_REASONING_EFFORT")
.ok()
.and_then(|effort| match effort.to_lowercase().as_str() {
"none" => Some(crate::model::ThinkingEffort::Off),
"low" => Some(crate::model::ThinkingEffort::Low),
"medium" => Some(crate::model::ThinkingEffort::Medium),
"high" => Some(crate::model::ThinkingEffort::High),
"xhigh" => Some(crate::model::ThinkingEffort::Max),
_ => None,
})
}
if reasoning_effort == "none" && model_name.contains("codex") {
fn map_thinking_effort(
_model_name: &str,
effort: Option<crate::model::ThinkingEffort>,
) -> Option<String> {
use crate::model::ThinkingEffort;
match effort
.or_else(Self::legacy_reasoning_effort)
.unwrap_or(ThinkingEffort::High)
{
ThinkingEffort::Off => Some("none".to_string()),
ThinkingEffort::Low => Some("low".to_string()),
ThinkingEffort::Medium => Some("medium".to_string()),
ThinkingEffort::High => Some("high".to_string()),
ThinkingEffort::Max => Some("xhigh".to_string()),
}
}
#[cfg(test)]
fn supports_reasoning_effort(_model_name: &str, reasoning_effort: &str) -> bool {
if !CODEX_REASONING_LEVELS.contains(&reasoning_effort) {
return false;
}
@@ -115,7 +143,7 @@ impl CodexProvider {
println!("=== CODEX PROVIDER DEBUG ===");
println!("Command: {:?}", self.command);
println!("Model: {}", self.model.model_name);
println!("Reasoning effort: {}", self.reasoning_effort);
println!("Reasoning effort: {:?}", self.reasoning_effort);
println!("Skip git check: {}", self.skip_git_check);
println!("Prompt length: {} chars", prompt.len());
println!("Prompt: {}", prompt);
@@ -142,11 +170,10 @@ impl CodexProvider {
cmd.arg("-m").arg(&self.model.model_name);
}
// Reasoning effort configuration
cmd.arg("-c").arg(format!(
"model_reasoning_effort=\"{}\"",
self.reasoning_effort
));
if let Some(reasoning_effort) = &self.reasoning_effort {
cmd.arg("-c")
.arg(format!("model_reasoning_effort=\"{}\"", reasoning_effort));
}
for override_config in &self.mcp_config_overrides {
cmd.arg("-c").arg(override_config);
@@ -604,7 +631,6 @@ impl ProviderDef for CodexProvider {
CODEX_DOC_URL,
vec![
ConfigKey::from_value_type::<CodexCommand>(true, false, true),
ConfigKey::from_value_type::<CodexReasoningEffort>(false, false, true),
ConfigKey::from_value_type::<CodexSkipGitCheck>(false, false, true),
],
)
@@ -619,24 +645,8 @@ impl ProviderDef for CodexProvider {
let command: String = config.get_codex_command().unwrap_or_default().into();
let resolved_command = SearchPaths::builder().with_npm().resolve(command)?;
// Get reasoning effort from config, default to "high"
let reasoning_effort = config
.get_codex_reasoning_effort()
.map(String::from)
.unwrap_or_else(|_| "high".to_string());
// Validate reasoning effort
let reasoning_effort =
if Self::supports_reasoning_effort(&model.model_name, &reasoning_effort) {
reasoning_effort
} else {
tracing::warn!(
"Invalid CODEX_REASONING_EFFORT '{}' for model '{}', using 'high'",
reasoning_effort,
model.model_name
);
"high".to_string()
};
Self::map_thinking_effort(&model.model_name, model.thinking_effort());
// Get skip_git_check from config, default to false
let skip_git_check = config
@@ -925,7 +935,7 @@ mod tests {
command: PathBuf::from("codex"),
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
name: "codex".to_string(),
reasoning_effort: "high".to_string(),
reasoning_effort: Some("high".to_string()),
skip_git_check: false,
mcp_config_overrides: Vec::new(),
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
@@ -946,7 +956,7 @@ mod tests {
command: PathBuf::from("codex"),
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
name: "codex".to_string(),
reasoning_effort: "high".to_string(),
reasoning_effort: Some("high".to_string()),
skip_git_check: false,
mcp_config_overrides: Vec::new(),
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
@@ -980,7 +990,7 @@ mod tests {
command: PathBuf::from("codex"),
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
name: "codex".to_string(),
reasoning_effort: "high".to_string(),
reasoning_effort: Some("high".to_string()),
skip_git_check: false,
mcp_config_overrides: Vec::new(),
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
@@ -1005,7 +1015,7 @@ mod tests {
#[test]
fn test_reasoning_effort_support_by_model() {
assert!(CodexProvider::supports_reasoning_effort("gpt-5.2", "none"));
assert!(!CodexProvider::supports_reasoning_effort(
assert!(CodexProvider::supports_reasoning_effort(
"gpt-5.2-codex",
"none"
));
@@ -1029,7 +1039,7 @@ mod tests {
command: PathBuf::from("codex"),
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
name: "codex".to_string(),
reasoning_effort: "high".to_string(),
reasoning_effort: Some("high".to_string()),
skip_git_check: false,
mcp_config_overrides: Vec::new(),
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
@@ -1055,7 +1065,7 @@ mod tests {
command: PathBuf::from("codex"),
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
name: "codex".to_string(),
reasoning_effort: "high".to_string(),
reasoning_effort: Some("high".to_string()),
skip_git_check: false,
mcp_config_overrides: Vec::new(),
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
@@ -1128,7 +1138,7 @@ mod tests {
command: PathBuf::from("codex"),
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
name: "codex".to_string(),
reasoning_effort: "high".to_string(),
reasoning_effort: Some("high".to_string()),
skip_git_check: false,
mcp_config_overrides: Vec::new(),
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
@@ -1145,7 +1155,7 @@ mod tests {
command: PathBuf::from("codex"),
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
name: "codex".to_string(),
reasoning_effort: "high".to_string(),
reasoning_effort: Some("high".to_string()),
skip_git_check: false,
mcp_config_overrides: Vec::new(),
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
@@ -1214,20 +1224,56 @@ mod tests {
#[test]
fn test_config_keys() {
let metadata = CodexProvider::metadata();
assert_eq!(metadata.config_keys.len(), 3);
assert_eq!(metadata.config_keys.len(), 2);
// First key should be CODEX_COMMAND (required)
assert_eq!(metadata.config_keys[0].name, "CODEX_COMMAND");
assert!(metadata.config_keys[0].required);
assert!(!metadata.config_keys[0].secret);
// Second key should be CODEX_REASONING_EFFORT (optional)
assert_eq!(metadata.config_keys[1].name, "CODEX_REASONING_EFFORT");
// Second key should be CODEX_SKIP_GIT_CHECK (optional)
assert_eq!(metadata.config_keys[1].name, "CODEX_SKIP_GIT_CHECK");
assert!(!metadata.config_keys[1].required);
}
// Third key should be CODEX_SKIP_GIT_CHECK (optional)
assert_eq!(metadata.config_keys[2].name, "CODEX_SKIP_GIT_CHECK");
assert!(!metadata.config_keys[2].required);
#[test]
fn test_map_thinking_effort() {
use crate::model::ThinkingEffort;
let _guard = env_lock::lock_env([
("CODEX_REASONING_EFFORT", None::<&str>),
("GOOSE_THINKING_EFFORT", None::<&str>),
]);
assert_eq!(
CodexProvider::map_thinking_effort("gpt-5.2-codex", Some(ThinkingEffort::Off)),
Some("none".to_string())
);
assert_eq!(
CodexProvider::map_thinking_effort("gpt-5.2", Some(ThinkingEffort::Off)),
Some("none".to_string())
);
assert_eq!(
CodexProvider::map_thinking_effort("gpt-5.2-codex", Some(ThinkingEffort::Max)),
Some("xhigh".to_string())
);
assert_eq!(
CodexProvider::map_thinking_effort("gpt-5.2-codex", None),
Some("high".to_string())
);
}
#[test]
fn test_map_thinking_effort_uses_legacy_codex_env() {
let _guard = env_lock::lock_env([
("CODEX_REASONING_EFFORT", Some("low")),
("GOOSE_THINKING_EFFORT", None::<&str>),
]);
assert_eq!(
CodexProvider::map_thinking_effort("gpt-5.2-codex", None),
Some("low".to_string())
);
}
#[test]
@@ -1236,7 +1282,7 @@ mod tests {
command: PathBuf::from("codex"),
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
name: "codex".to_string(),
reasoning_effort: "high".to_string(),
reasoning_effort: Some("high".to_string()),
skip_git_check: false,
mcp_config_overrides: Vec::new(),
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
+456 -20
View File
@@ -3,12 +3,14 @@ use async_trait::async_trait;
use futures::future::BoxFuture;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashSet;
use std::sync::LazyLock;
use std::sync::{Arc, Mutex};
use std::time::Duration;
use std::time::{Duration, Instant};
use super::api_client::{ApiClient, AuthMethod, AuthProvider};
use super::base::{
ConfigKey, MessageStream, Provider, ProviderDef, ProviderMetadata,
ConfigKey, MessageStream, ModelInfo, Provider, ProviderDef, ProviderMetadata,
DEFAULT_PROVIDER_TIMEOUT_SECS,
};
use super::embedding::EmbeddingCapable;
@@ -21,7 +23,7 @@ use super::openai_compatible::{
stream_openai_compat, stream_responses_compat,
};
use super::retry::ProviderRetry;
use super::utils::{ImageFormat, RequestLog};
use super::utils::{is_openai_responses_model, ImageFormat, RequestLog};
use crate::config::ConfigError;
use crate::conversation::message::Message;
use crate::instance_id::get_instance_id;
@@ -33,11 +35,35 @@ use crate::providers::retry::{
use rmcp::model::Tool;
use serde_json::json;
#[derive(Debug, Clone)]
struct DatabricksEndpointInfo {
name: String,
upstream_model_name: Option<String>,
upstream_model_provider: Option<String>,
reasoning: Option<bool>,
}
#[derive(Debug, Clone)]
struct DatabricksUpstreamModel {
name: String,
provider: Option<String>,
}
#[derive(Debug, Clone)]
struct CachedDatabricksEndpointInfo {
info: DatabricksEndpointInfo,
fetched_at: Instant,
}
const DEFAULT_CLIENT_ID: &str = "databricks-cli";
const DEFAULT_REDIRECT_URL: &str = "http://localhost";
const DEFAULT_SCOPES: &[&str] = &["all-apis", "offline_access"];
const DATABRICKS_PROVIDER_NAME: &str = "databricks";
const DATABRICKS_ENDPOINT_METADATA_TTL_SECS: u64 = 60;
static DATABRICKS_ENDPOINT_INFO_CACHE: LazyLock<
Mutex<std::collections::HashMap<String, CachedDatabricksEndpointInfo>>,
> = LazyLock::new(|| Mutex::new(std::collections::HashMap::new()));
pub const DATABRICKS_DEFAULT_MODEL: &str = "databricks-claude-sonnet-4";
const DATABRICKS_DEFAULT_FAST_MODEL: &str = "databricks-claude-haiku-4-5";
pub const DATABRICKS_KNOWN_MODELS: &[&str] = &[
@@ -116,6 +142,8 @@ impl AuthProvider for DatabricksAuthProvider {
pub struct DatabricksProvider {
#[serde(skip)]
api_client: ApiClient,
#[serde(skip)]
host: String,
auth: DatabricksAuth,
model: ModelConfig,
image_format: ImageFormat,
@@ -172,13 +200,14 @@ impl DatabricksProvider {
}));
let api_client = ApiClient::with_timeout(
host,
host.clone(),
auth_method,
Duration::from_secs(DEFAULT_PROVIDER_TIMEOUT_SECS),
)?;
let mut provider = Self {
api_client,
host,
auth,
model: model.clone(),
image_format: ImageFormat::OpenAi,
@@ -240,13 +269,14 @@ impl DatabricksProvider {
}));
let api_client = ApiClient::with_timeout(
host,
host.clone(),
auth_method,
Duration::from_secs(DEFAULT_PROVIDER_TIMEOUT_SECS),
)?;
Ok(Self {
api_client,
host,
auth,
model,
image_format: ImageFormat::OpenAi,
@@ -270,7 +300,252 @@ impl DatabricksProvider {
}
fn is_responses_model(model_name: &str) -> bool {
super::utils::is_openai_responses_model(model_name)
is_openai_responses_model(model_name)
}
fn is_claude_model(model_name: &str) -> bool {
model_name.to_lowercase().contains("claude")
}
fn is_reasoning_capable_model_name(model_name: &str) -> bool {
Self::is_claude_model(model_name) || Self::is_responses_model(model_name)
}
fn endpoint_model_candidates(value: &Value) -> Vec<DatabricksUpstreamModel> {
let mut candidates: Vec<DatabricksUpstreamModel> = Vec::new();
fn get_string_at(value: &Value, path: &[&str]) -> Option<String> {
path.iter()
.try_fold(value, |current, key| current.get(*key))
.and_then(|v| v.as_str())
.filter(|s| !s.is_empty())
.map(ToString::to_string)
}
fn push_candidate(
name: Option<String>,
provider: Option<String>,
candidates: &mut Vec<DatabricksUpstreamModel>,
) {
if let Some(name) = name {
if !candidates.iter().any(|candidate| candidate.name == name) {
candidates.push(DatabricksUpstreamModel { name, provider });
}
}
}
for config_key in ["config", "pending_config"] {
let Some(config) = value.get(config_key) else {
continue;
};
for collection_key in ["served_entities", "served_models"] {
let Some(entities) = config.get(collection_key).and_then(|v| v.as_array()) else {
continue;
};
for entity in entities {
push_candidate(
get_string_at(entity, &["external_model", "name"]),
get_string_at(entity, &["external_model", "provider"]),
&mut candidates,
);
push_candidate(
get_string_at(entity, &["foundation_model", "name"]),
get_string_at(entity, &["foundation_model", "provider"]),
&mut candidates,
);
push_candidate(
get_string_at(entity, &["entity_name"]),
None,
&mut candidates,
);
}
}
}
candidates
}
fn endpoint_info_from_value(endpoint: &Value) -> Option<DatabricksEndpointInfo> {
let name = endpoint.get("name")?.as_str()?.to_string();
let upstream_model = Self::endpoint_model_candidates(endpoint)
.into_iter()
.find(|candidate| candidate.name != name);
let upstream_model_name = upstream_model.as_ref().map(|model| model.name.clone());
let upstream_model_provider = upstream_model.and_then(|model| model.provider);
let reasoning = upstream_model_name
.as_deref()
.map(Self::is_reasoning_capable_model_name)
.or_else(|| Some(Self::is_reasoning_capable_model_name(&name)));
Some(DatabricksEndpointInfo {
name,
upstream_model_name,
upstream_model_provider,
reasoning,
})
}
async fn fetch_endpoint_info(
&self,
endpoint_name: &str,
) -> Result<DatabricksEndpointInfo, ProviderError> {
let response = self
.api_client
.request(
None,
&format!(
"api/2.0/serving-endpoints/{}",
urlencoding::encode(endpoint_name)
),
)
.response_get()
.await
.map_err(|e| {
ProviderError::RequestFailed(format!(
"Failed to fetch Databricks endpoint metadata: {}",
e
))
})?;
if !response.status().is_success() {
let status = response.status();
let detail = response.text().await.unwrap_or_default();
return Err(ProviderError::RequestFailed(format!(
"Failed to fetch Databricks endpoint metadata: {} {}",
status, detail
)));
}
let json: Value = response.json().await.map_err(|e| {
ProviderError::RequestFailed(format!(
"Failed to parse Databricks endpoint metadata: {}",
e
))
})?;
Self::endpoint_info_from_value(&json).ok_or_else(|| {
ProviderError::RequestFailed(
"Unexpected response format from Databricks endpoint metadata".to_string(),
)
})
}
async fn resolve_endpoint_info(
&self,
endpoint_name: &str,
) -> Result<DatabricksEndpointInfo, ProviderError> {
const MAX_MODEL_SERVING_HOPS: usize = 4;
let original_endpoint_name = endpoint_name.to_string();
let mut current_endpoint_name = endpoint_name.to_string();
let mut visited = HashSet::new();
let mut last_info: Option<DatabricksEndpointInfo> = None;
for _ in 0..MAX_MODEL_SERVING_HOPS {
if !visited.insert(current_endpoint_name.clone()) {
break;
}
let info = self.fetch_endpoint_info(&current_endpoint_name).await?;
let next_endpoint_name = match (
info.upstream_model_provider.as_deref(),
info.upstream_model_name.as_deref(),
) {
(Some("databricks-model-serving"), Some(next_endpoint_name))
if !visited.contains(next_endpoint_name) =>
{
Some(next_endpoint_name.to_string())
}
_ => None,
};
if let Some(next_endpoint_name) = next_endpoint_name {
last_info = Some(info);
current_endpoint_name = next_endpoint_name;
continue;
}
return Ok(if info.name == original_endpoint_name {
info
} else {
let upstream_model_name = info
.upstream_model_name
.clone()
.or_else(|| Some(info.name.clone()));
DatabricksEndpointInfo {
name: original_endpoint_name,
upstream_model_name,
upstream_model_provider: info.upstream_model_provider.clone(),
reasoning: info.reasoning,
}
});
}
last_info
.map(|info| DatabricksEndpointInfo {
name: original_endpoint_name,
upstream_model_name: info.upstream_model_name,
upstream_model_provider: info.upstream_model_provider,
reasoning: info.reasoning,
})
.ok_or_else(|| {
ProviderError::RequestFailed(
"Failed to resolve Databricks endpoint metadata".to_string(),
)
})
}
async fn resolve_endpoint_info_cached(
&self,
endpoint_name: &str,
) -> Result<DatabricksEndpointInfo, ProviderError> {
let cache_key = format!("{}:{}", self.host, endpoint_name);
let cached = DATABRICKS_ENDPOINT_INFO_CACHE
.lock()
.unwrap()
.get(&cache_key)
.cloned();
if let Some(cached) = cached {
if cached.fetched_at.elapsed()
< Duration::from_secs(DATABRICKS_ENDPOINT_METADATA_TTL_SECS)
{
return Ok(cached.info);
}
}
let info = self.resolve_endpoint_info(endpoint_name).await?;
DATABRICKS_ENDPOINT_INFO_CACHE.lock().unwrap().insert(
cache_key,
CachedDatabricksEndpointInfo {
info: info.clone(),
fetched_at: Instant::now(),
},
);
Ok(info)
}
fn model_info_from_endpoint(info: DatabricksEndpointInfo) -> ModelInfo {
let context_model = info.upstream_model_name.as_deref().unwrap_or(&info.name);
let context_limit = ModelConfig::new_or_fail(context_model)
.with_canonical_limits(DATABRICKS_PROVIDER_NAME)
.context_limit();
let reasoning = info
.reasoning
.unwrap_or_else(|| ModelConfig::new_or_fail(context_model).is_reasoning_model());
ModelInfo {
name: info.name,
context_limit,
input_token_cost: None,
output_token_cost: None,
currency: None,
supports_cache_control: None,
reasoning,
}
}
fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String {
@@ -378,11 +653,49 @@ impl Provider for DatabricksProvider {
messages: &[Message],
tools: &[Tool],
) -> Result<MessageStream, ProviderError> {
let path = self.get_endpoint_path(&model_config.model_name, false);
let (endpoint_name, _) = super::utils::extract_reasoning_effort(&model_config.model_name);
let endpoint_info = self.resolve_endpoint_info_cached(&endpoint_name).await.ok();
let effective_model_name = endpoint_info
.as_ref()
.and_then(|info| info.upstream_model_name.as_deref())
.unwrap_or(&model_config.model_name);
let is_responses_model = Self::is_responses_model(&model_config.model_name)
|| Self::is_responses_model(effective_model_name);
let path = if is_responses_model {
"serving-endpoints/responses".to_string()
} else {
self.get_endpoint_path(&model_config.model_name, false)
};
let client_request_id = self.build_client_request_id(session_id);
if Self::is_responses_model(&model_config.model_name) {
let mut payload = create_responses_request(model_config, system, messages, tools)?;
if is_responses_model {
let responses_model_config;
let request_model_config = if effective_model_name != model_config.model_name {
responses_model_config = {
let mut config = model_config.clone();
config.model_name = effective_model_name.to_string();
config
};
&responses_model_config
} else {
model_config
};
let mut payload =
create_responses_request(request_model_config, system, messages, tools)?;
payload["model"] = Value::String(endpoint_name.clone());
if payload.get("reasoning").is_none() {
if let Some(effort) = model_config.thinking_effort().and_then(|effort| {
super::utils::openai_reasoning_effort_for_thinking(effective_model_name, effort)
}) {
payload.as_object_mut().unwrap().insert(
"reasoning".to_string(),
json!({
"effort": effort,
"summary": "auto",
}),
);
}
}
payload["stream"] = Value::Bool(true);
if let Some(ref client_request_id) = client_request_id {
payload["client_request_id"] = Value::String(client_request_id.clone());
@@ -406,8 +719,27 @@ impl Provider for DatabricksProvider {
stream_responses_compat(response, log)
} else {
let mut payload =
create_request(model_config, system, messages, tools, &self.image_format)?;
let format_model_config;
let request_model_config = if Self::is_claude_model(effective_model_name)
&& !Self::is_claude_model(&model_config.model_name)
{
format_model_config = {
let mut config = model_config.clone();
config.model_name = effective_model_name.to_string();
config
};
&format_model_config
} else {
model_config
};
let mut payload = create_request(
request_model_config,
system,
messages,
tools,
&self.image_format,
)?;
payload
.as_object_mut()
.expect("payload should have model key")
@@ -498,6 +830,15 @@ impl Provider for DatabricksProvider {
}
async fn fetch_supported_models(&self) -> Result<Vec<String>, ProviderError> {
Ok(self
.fetch_supported_model_info()
.await?
.into_iter()
.map(|model| model.name)
.collect())
}
async fn fetch_supported_model_info(&self) -> Result<Vec<ModelInfo>, ProviderError> {
let response = self
.api_client
.request(None, "api/2.0/serving-endpoints")
@@ -530,18 +871,25 @@ impl Provider for DatabricksProvider {
)
})?;
let models: Vec<String> = endpoints
.iter()
.filter_map(|endpoint| {
endpoint
.get("name")
.and_then(|v| v.as_str())
.map(|name| name.to_string())
})
.collect();
let mut models = Vec::new();
for endpoint in endpoints {
if let Some(endpoint_info) = Self::endpoint_info_from_value(endpoint) {
models.push(Self::model_info_from_endpoint(endpoint_info));
}
}
Ok(models)
}
async fn fetch_model_info(&self, model_name: &str) -> Result<ModelInfo, ProviderError> {
let (endpoint_name, _) = super::utils::extract_reasoning_effort(model_name);
let endpoint_info = self.resolve_endpoint_info_cached(&endpoint_name).await?;
Ok(Self::model_info_from_endpoint(endpoint_info))
}
async fn fetch_recommended_model_info(&self) -> Result<Vec<ModelInfo>, ProviderError> {
self.fetch_supported_model_info().await
}
}
#[async_trait]
@@ -596,6 +944,7 @@ mod tests {
super::super::api_client::AuthMethod::NoAuth,
)
.unwrap(),
host: "https://example.com".to_string(),
auth: DatabricksAuth::Token("fake".into()),
model: ModelConfig::new_or_fail("databricks-gpt-5.4"),
image_format: ImageFormat::OpenAi,
@@ -628,4 +977,91 @@ mod tests {
);
}
}
#[test]
fn endpoint_metadata_marks_reasoning_alias_from_external_model() {
let endpoint = json!({
"name": "goose",
"config": {
"served_entities": [{
"name": "current",
"external_model": {
"name": "claude-opus-4.6",
"provider": "anthropic",
"task": "llm/v1/chat"
}
}]
}
});
let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap();
assert_eq!(info.name, "goose");
assert_eq!(info.upstream_model_name.as_deref(), Some("claude-opus-4.6"));
assert_eq!(info.reasoning, Some(true));
}
#[test]
fn endpoint_metadata_captures_databricks_model_serving_hop() {
let endpoint = json!({
"name": "goose",
"config": {
"served_entities": [{
"external_model": {
"name": "databricks-claude-opus-4-6",
"provider": "databricks-model-serving",
"task": "llm/v1/chat"
}
}]
}
});
let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap();
assert_eq!(info.name, "goose");
assert_eq!(
info.upstream_model_name.as_deref(),
Some("databricks-claude-opus-4-6")
);
assert_eq!(
info.upstream_model_provider.as_deref(),
Some("databricks-model-serving")
);
assert_eq!(info.reasoning, Some(true));
}
#[test]
fn endpoint_metadata_marks_reasoning_alias_from_pending_gpt_model() {
let endpoint = json!({
"name": "goose",
"pending_config": {
"served_entities": [{
"external_model": {
"name": "gpt-5.5",
"provider": "openai",
"task": "llm/v1/chat"
}
}]
}
});
let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap();
assert_eq!(info.name, "goose");
assert_eq!(info.upstream_model_name.as_deref(), Some("gpt-5.5"));
assert_eq!(info.reasoning, Some(true));
}
#[test]
fn endpoint_metadata_uses_endpoint_name_when_no_upstream_model_exists() {
let endpoint = json!({
"name": "goose-gpt-5-5"
});
let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap();
assert_eq!(info.name, "goose-gpt-5-5");
assert_eq!(info.upstream_model_name, None);
assert_eq!(info.reasoning, Some(true));
}
}
+86 -91
View File
@@ -1,6 +1,6 @@
use crate::conversation::message::{Message, MessageContent};
use crate::mcp_utils::extract_text_from_resource;
use crate::model::ModelConfig;
use crate::model::{ModelConfig, ThinkingEffort};
use crate::providers::base::Usage;
use crate::providers::errors::ProviderError;
use crate::providers::utils::{convert_image, ImageFormat};
@@ -37,7 +37,6 @@ macro_rules! string_enum {
}
string_enum!(ThinkingType { Adaptive => "adaptive", Enabled => "enabled", Disabled => "disabled" });
string_enum!(ThinkingEffort { Low => "low", Medium => "medium", High => "high", Max => "max" });
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct AnthropicFormatOptions {
@@ -80,33 +79,16 @@ pub fn thinking_type(model_config: &ModelConfig) -> ThinkingType {
}
let is_adaptive_model = supports_adaptive_thinking(&model_config.model_name);
let effort = model_config.thinking_effort();
if let Some(s) =
model_config.get_config_param::<String>("thinking_type", "CLAUDE_THINKING_TYPE")
{
let tt = s.parse::<ThinkingType>().unwrap_or_else(|e| {
tracing::warn!("{e}");
ThinkingType::Disabled
});
if tt == ThinkingType::Adaptive && !is_adaptive_model {
tracing::warn!(
"Adaptive thinking not supported for {}, disabling thinking",
model_config.model_name
);
return ThinkingType::Disabled;
}
return tt;
if effort.is_none() && legacy_thinking_budget_tokens().is_some() {
return ThinkingType::Enabled;
}
if is_adaptive_model {
ThinkingType::Adaptive
} else if std::env::var("CLAUDE_THINKING_ENABLED").is_ok() {
tracing::warn!(
"CLAUDE_THINKING_ENABLED is deprecated, use CLAUDE_THINKING_TYPE=enabled instead"
);
ThinkingType::Enabled
} else {
ThinkingType::Disabled
match effort.unwrap_or(ThinkingEffort::Off) {
ThinkingEffort::Off => ThinkingType::Disabled,
_ if is_adaptive_model => ThinkingType::Adaptive,
_ => ThinkingType::Enabled,
}
}
@@ -510,35 +492,45 @@ pub fn get_usage(data: &Value) -> Result<Usage> {
}
pub fn thinking_effort(model_config: &ModelConfig) -> ThinkingEffort {
match model_config.get_config_param::<String>("effort", "CLAUDE_THINKING_EFFORT") {
Some(s) => s.parse().unwrap_or_else(|e| {
tracing::warn!("{e}, defaulting to 'high'");
ThinkingEffort::High
}),
None => ThinkingEffort::High,
}
model_config
.thinking_effort()
.unwrap_or(ThinkingEffort::High)
}
fn thinking_budget_tokens(model_config: &ModelConfig) -> i32 {
let request_param = model_config
pub fn thinking_budget_tokens(model_config: &ModelConfig) -> i32 {
if let Some(request_param) = model_config
.request_params
.as_ref()
.and_then(|params| params.get("budget_tokens"))
.and_then(|v| serde_json::from_value(v.clone()).ok());
.and_then(|v| serde_json::from_value::<i32>(v.clone()).ok())
{
return request_param.max(1024);
}
request_param
.or_else(|| {
crate::config::Config::global()
.get_param::<i32>("ANTHROPIC_THINKING_BUDGET")
.ok()
})
.or_else(|| {
crate::config::Config::global()
.get_param::<i32>("CLAUDE_THINKING_BUDGET")
.ok()
})
.unwrap_or(16000)
.max(1024)
if let Some(budget) = legacy_thinking_budget_tokens() {
return budget;
}
let effort = model_config
.thinking_effort()
.unwrap_or(ThinkingEffort::High);
match effort {
ThinkingEffort::Off => 1024,
ThinkingEffort::Low => 4000,
ThinkingEffort::Medium => 10000,
ThinkingEffort::High => 16000,
ThinkingEffort::Max => 32000,
}
}
fn legacy_thinking_budget_tokens() -> Option<i32> {
let config = crate::config::Config::global();
for key in ["ANTHROPIC_THINKING_BUDGET", "CLAUDE_THINKING_BUDGET"] {
if let Ok(budget) = config.get_param::<i32>(key) {
return Some(budget.max(1024));
}
}
None
}
fn apply_thinking_config(
@@ -1181,14 +1173,14 @@ mod tests {
#[test]
fn test_create_request_adaptive_thinking_for_46_models() -> Result<()> {
let _guard = env_lock::lock_env([
("CLAUDE_THINKING_TYPE", Some("adaptive")),
("CLAUDE_THINKING_EFFORT", Some("high")),
("CLAUDE_THINKING_ENABLED", None::<&str>),
]);
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]);
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), json!("high"));
let mut config = cfg("claude-opus-4-6");
config.max_tokens = Some(4096);
config.request_params = Some(params);
let messages = vec![Message::user().with_text("Hello")];
let payload = create_request(&config, "system", &messages, &[])?;
@@ -1202,27 +1194,20 @@ mod tests {
#[test]
fn test_create_request_enabled_thinking_with_budget() -> Result<()> {
let _guard = env_lock::lock_env([
("CLAUDE_THINKING_TYPE", None::<&str>),
("CLAUDE_THINKING_EFFORT", None::<&str>),
("CLAUDE_THINKING_ENABLED", None::<&str>),
("ANTHROPIC_THINKING_BUDGET", None::<&str>),
("CLAUDE_THINKING_BUDGET", None::<&str>),
("GOOSE_THINKING_EFFORT", None::<&str>),
("ANTHROPIC_PRESERVE_THINKING_CONTEXT", None::<&str>),
]);
let mut params = std::collections::HashMap::new();
params.insert("thinking_type".to_string(), json!("enabled"));
params.insert("budget_tokens".to_string(), json!(10000));
let mut config = cfg("claude-3-7-sonnet-20250219");
let mut config = cfg_with_effort("claude-3-7-sonnet-20250219", "high");
config.max_tokens = Some(4096);
config.request_params = Some(params);
let messages = vec![Message::user().with_text("Hello")];
let payload = create_request(&config, "system", &messages, &[])?;
assert_eq!(payload["thinking"]["type"], "enabled");
assert_eq!(payload["thinking"]["budget_tokens"], 10000);
assert_eq!(payload["max_tokens"], 4096 + 10000);
let budget = payload["thinking"]["budget_tokens"].as_i64().unwrap();
assert!(budget > 0);
assert_eq!(payload["max_tokens"], 4096 + budget);
Ok(())
}
@@ -1230,12 +1215,11 @@ mod tests {
#[test]
fn test_create_request_disabled_thinking_no_thinking_field() -> Result<()> {
let _guard = env_lock::lock_env([
("CLAUDE_THINKING_TYPE", None::<&str>),
("CLAUDE_THINKING_ENABLED", None::<&str>),
("GOOSE_THINKING_EFFORT", None::<&str>),
("ANTHROPIC_PRESERVE_THINKING_CONTEXT", None::<&str>),
]);
let config = cfg("claude-sonnet-4-20250514");
let config = cfg_with_effort("claude-sonnet-4-20250514", "off");
let messages = vec![Message::user().with_text("Hello")];
let payload = create_request(&config, "system", &messages, &[])?;
@@ -1449,9 +1433,9 @@ mod tests {
}
}
fn cfg_with_thinking(name: &str, tt: &str) -> ModelConfig {
fn cfg_with_effort(name: &str, effort: &str) -> ModelConfig {
let mut params = std::collections::HashMap::new();
params.insert("thinking_type".to_string(), json!(tt));
params.insert("thinking_effort".to_string(), json!(effort));
ModelConfig {
model_name: name.to_string(),
request_params: Some(params),
@@ -1460,50 +1444,61 @@ mod tests {
}
#[test]
fn test_thinking_type_explicit_params() {
fn test_thinking_type_from_effort() {
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]);
// Adaptive model with effort → adaptive
assert_eq!(
thinking_type(&cfg_with_thinking("claude-opus-4-6", "adaptive")),
thinking_type(&cfg_with_effort("claude-opus-4-6", "high")),
ThinkingType::Adaptive
);
// Adaptive model with off → disabled
assert_eq!(
thinking_type(&cfg_with_thinking("claude-opus-4-6", "disabled")),
thinking_type(&cfg_with_effort("claude-opus-4-6", "off")),
ThinkingType::Disabled
);
// Non-adaptive Claude with effort → enabled
assert_eq!(
thinking_type(&cfg_with_thinking("claude-3-7-sonnet-20250219", "enabled")),
thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "high")),
ThinkingType::Enabled
);
// Non-adaptive Claude with off → disabled
assert_eq!(
thinking_type(&cfg_with_thinking("claude-3-7-sonnet-20250219", "adaptive")),
ThinkingType::Disabled
);
assert_eq!(
thinking_type(&cfg_with_thinking("claude-opus-4-6", "adapttive")),
thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "off")),
ThinkingType::Disabled
);
}
#[test]
fn test_thinking_budget_uses_legacy_env() {
let _guard = env_lock::lock_env([
("GOOSE_THINKING_EFFORT", None::<&str>),
("ANTHROPIC_THINKING_BUDGET", Some("8192")),
("CLAUDE_THINKING_BUDGET", None::<&str>),
]);
let config = cfg_with_effort("claude-3-7-sonnet-20250219", "high");
assert_eq!(thinking_budget_tokens(&config), 8192);
}
#[test]
fn test_thinking_type_non_claude_always_disabled() {
assert_eq!(thinking_type(&cfg("gpt-4o")), ThinkingType::Disabled);
assert_eq!(
thinking_type(&cfg_with_thinking("gpt-4o", "enabled")),
thinking_type(&cfg_with_effort("gpt-4o", "off")),
ThinkingType::Disabled
);
assert_eq!(
thinking_type(&cfg_with_effort("gpt-4o", "high")),
ThinkingType::Disabled
);
}
#[test]
fn test_thinking_type_env_var_override() {
let _guard = env_lock::lock_env([
("CLAUDE_THINKING_TYPE", Some("adaptive")),
("CLAUDE_THINKING_ENABLED", None::<&str>),
]);
fn test_thinking_type_off_means_disabled() {
assert_eq!(
thinking_type(&cfg("claude-opus-4-6")),
ThinkingType::Adaptive
thinking_type(&cfg_with_effort("claude-opus-4-6", "off")),
ThinkingType::Disabled
);
assert_eq!(
thinking_type(&cfg("claude-3-7-sonnet-20250219")),
thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "off")),
ThinkingType::Disabled
);
}
@@ -1,10 +1,12 @@
use crate::conversation::message::{Message, MessageContent};
use crate::model::ModelConfig;
use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType};
use crate::providers::formats::anthropic::{
thinking_budget_tokens, thinking_effort, thinking_type, ThinkingType,
};
use crate::providers::utils::{
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
ImageFormat,
is_valid_function_name, load_image_file, openai_reasoning_effort_for_thinking,
safely_parse_json, sanitize_function_name, ImageFormat,
};
use anyhow::{anyhow, Error};
use rmcp::model::{
@@ -245,11 +247,7 @@ fn apply_claude_thinking_config(payload: &mut Value, model_config: &ModelConfig)
);
}
ThinkingType::Enabled => {
let budget_tokens = model_config
.get_config_param::<i32>("budget_tokens", "CLAUDE_THINKING_BUDGET")
.unwrap_or(16000)
.max(1024);
let budget_tokens = thinking_budget_tokens(model_config);
let max_tokens = model_config.max_output_tokens() + budget_tokens;
obj.insert("max_tokens".to_string(), json!(max_tokens));
obj.insert(
@@ -582,8 +580,17 @@ pub fn create_request(
));
}
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let is_openai_reasoning_model = is_openai_responses_model(&model_name);
let reasoning_effort = if is_openai_reasoning_model {
model_config
.thinking_effort()
.map_or(legacy_reasoning_effort, |effort| {
openai_reasoning_effort_for_thinking(&model_name, effort)
})
} else {
None
};
let system_message = DatabricksMessage {
role: "system".to_string(),
@@ -652,6 +659,9 @@ pub fn create_request(
if let Some(params) = &model_config.request_params {
if let Some(obj) = payload.as_object_mut() {
for (key, value) in params {
if key == "thinking_effort" {
continue;
}
obj.insert(key.clone(), value.clone());
}
}
@@ -1042,15 +1052,17 @@ mod tests {
#[test]
fn test_create_request_reasoning_effort() -> anyhow::Result<()> {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("high"));
let model_config = ModelConfig {
model_name: "o3-mini-high".to_string(),
model_name: "o3-mini".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
request_params: Some(params),
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
@@ -1058,6 +1070,48 @@ mod tests {
Ok(())
}
#[test]
fn test_create_request_off_effort_preserves_none() -> anyhow::Result<()> {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("off"));
let model_config = ModelConfig {
model_name: "databricks-o3-mini".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: Some(params),
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["reasoning_effort"], "none");
assert!(request.get("thinking_effort").is_none());
Ok(())
}
#[test]
fn test_create_request_max_effort_uses_supported_level() -> anyhow::Result<()> {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("max"));
let model_config = ModelConfig {
model_name: "databricks-gpt-5.2-pro".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: Some(params),
reasoning: None,
};
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["reasoning_effort"], "high");
assert!(request.get("thinking_effort").is_none());
Ok(())
}
#[test]
fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> {
let model_config = ModelConfig {
@@ -1117,15 +1171,11 @@ mod tests {
#[test]
fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
let _guard = env_lock::lock_env([
("CLAUDE_THINKING_TYPE", Some("adaptive")),
("CLAUDE_THINKING_EFFORT", Some("low")),
("CLAUDE_THINKING_ENABLED", None::<&str>),
("CLAUDE_THINKING_BUDGET", None::<&str>),
]);
let mut model_config = ModelConfig::new_or_fail("databricks-claude-opus-4-6");
model_config.max_tokens = Some(4096);
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
model_config.request_params = Some(params);
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
@@ -1140,30 +1190,47 @@ mod tests {
#[test]
fn test_create_request_enabled_thinking_with_budget() -> anyhow::Result<()> {
let _guard = env_lock::lock_env([
("CLAUDE_THINKING_TYPE", None::<&str>),
("CLAUDE_THINKING_ENABLED", None::<&str>),
("CLAUDE_THINKING_BUDGET", Some("10000")),
]);
let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet");
model_config.max_tokens = Some(4096);
model_config = model_config.with_request_params(Some(std::collections::HashMap::from([(
"thinking_type".to_string(),
json!("enabled"),
)])));
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("high"));
model_config.request_params = Some(params);
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["thinking"]["type"], "enabled");
assert_eq!(request["thinking"]["budget_tokens"], 10000);
assert_eq!(request["max_tokens"], 14096);
assert_eq!(request["thinking"]["budget_tokens"], 16000);
assert_eq!(request["max_tokens"], 20096);
assert_eq!(request["temperature"], 2);
assert!(request.get("max_completion_tokens").is_none());
Ok(())
}
#[test]
fn test_create_request_enabled_thinking_budget_tracks_effort() -> anyhow::Result<()> {
for (effort, expected_budget) in [
("low", 4000),
("medium", 10000),
("high", 16000),
("max", 32000),
] {
let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet");
model_config.max_tokens = Some(4096);
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!(effort));
model_config.request_params = Some(params);
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
assert_eq!(request["thinking"]["type"], "enabled");
assert_eq!(request["thinking"]["budget_tokens"], expected_budget);
assert_eq!(request["max_tokens"], 4096 + expected_budget);
}
Ok(())
}
#[test]
fn test_response_to_message_claude_thinking() -> anyhow::Result<()> {
let response = json!({
+27 -18
View File
@@ -542,22 +542,18 @@ fn get_thinking_config(model_config: &ModelConfig) -> Option<ThinkingConfig> {
}
if is_gemini_3 {
let thinking_level_str = model_config
.get_config_param::<String>("thinking_level", "GEMINI3_THINKING_LEVEL")
.map(|s| s.to_lowercase())
.unwrap_or_else(|| "low".to_string());
let thinking_level = match thinking_level_str.as_str() {
"high" => ThinkingLevel::High,
"low" => ThinkingLevel::Low,
invalid => {
tracing::warn!(
"Invalid thinking level '{}' for model '{}'. Valid levels: low, high. Using 'low'.",
invalid,
model_config.model_name,
);
use crate::model::ThinkingEffort;
let effort = model_config
.thinking_effort()
.unwrap_or(ThinkingEffort::Off);
if effort == ThinkingEffort::Off {
return None;
}
let thinking_level = match effort {
ThinkingEffort::Off | ThinkingEffort::Low | ThinkingEffort::Medium => {
ThinkingLevel::Low
}
ThinkingEffort::High | ThinkingEffort::Max => ThinkingLevel::High,
};
Some(ThinkingConfig {
@@ -1378,7 +1374,11 @@ data: [DONE]"#;
fn test_get_thinking_config() {
use crate::model::ModelConfig;
let config = ModelConfig::new("gemini-3-pro").unwrap();
// Test 1: Gemini 3 model with low thinking effort
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
let mut config = ModelConfig::new("gemini-3-pro").unwrap();
config.request_params = Some(params);
let result = get_thinking_config(&config);
assert!(result.is_some());
let thinking_config = result.unwrap();
@@ -1386,9 +1386,18 @@ data: [DONE]"#;
assert!(thinking_config.thinking_budget.is_none());
assert!(thinking_config.include_thoughts);
let config = ModelConfig::new("Gemini-3-Flash").unwrap();
// Test 2: Gemini 3 model with high thinking effort
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), serde_json::json!("high"));
let mut config = ModelConfig::new("Gemini-3-Flash").unwrap();
config.request_params = Some(params);
let result = get_thinking_config(&config);
assert!(result.is_some());
let thinking_config = result.unwrap();
assert!(matches!(
thinking_config.thinking_level,
Some(ThinkingLevel::High)
));
let config = ModelConfig::new("gemini-2.5-flash").unwrap();
let result = get_thinking_config(&config);
@@ -1405,7 +1414,7 @@ data: [DONE]"#;
params.insert("thinking_budget".to_string(), json!(4096));
let config = ModelConfig::new("gemini-2.5-flash")
.unwrap()
.with_request_params(Some(params));
.with_merged_request_params(params);
let result = get_thinking_config(&config);
assert!(result.is_some());
let thinking_config = result.unwrap();
@@ -1415,7 +1424,7 @@ data: [DONE]"#;
params.insert("thinking_budget".to_string(), json!(-1));
let config = ModelConfig::new("gemini-2.5-flash")
.unwrap()
.with_request_params(Some(params));
.with_merged_request_params(params);
let result = get_thinking_config(&config);
assert!(result.is_some());
let thinking_config = result.unwrap();
+113 -12
View File
@@ -5,8 +5,8 @@ use crate::providers::base::{split_think_blocks, ProviderUsage, ThinkFilter, Usa
use crate::providers::errors::ProviderError;
use crate::providers::utils::{
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
ImageFormat,
is_valid_function_name, load_image_file, openai_reasoning_effort_for_thinking,
safely_parse_json, sanitize_function_name, ImageFormat,
};
use anyhow::{anyhow, Error};
use async_stream::try_stream;
@@ -1239,8 +1239,17 @@ pub fn create_request_with_options(
));
}
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let is_reasoning_model = is_openai_responses_model(&model_name);
let reasoning_effort = if is_reasoning_model {
model_config
.thinking_effort()
.map_or(legacy_reasoning_effort, |effort| {
openai_reasoning_effort_for_thinking(&model_name, effort)
})
} else {
None
};
let system_message = json!({
"role": if is_reasoning_model { "developer" } else { "system" },
@@ -1299,7 +1308,7 @@ pub fn create_request_with_options(
if let Some(params) = &model_config.request_params {
if let Some(obj) = payload.as_object_mut() {
for (key, value) in params {
if !is_reserved_request_param_key(key) {
if key != "thinking_effort" && !is_reserved_request_param_key(key) {
obj.insert(key.clone(), value.clone());
}
}
@@ -2070,8 +2079,7 @@ mod tests {
fn test_create_request_omits_max_tokens_when_unset() -> anyhow::Result<()> {
// Unknown models on OpenAI-compatible local providers (llama_swap,
// lmstudio) have no canonical record and no GOOSE_MAX_TOKENS, so the
// request must not pin the legacy 4096 default — the server should
// pick its own ceiling. See issue #9007.
// request must not pin the legacy 4096 default. See issue #9007.
let model_config = ModelConfig {
model_name: "some-unknown-local-model".to_string(),
context_limit: None,
@@ -2164,8 +2172,6 @@ mod tests {
#[test]
fn test_create_request_o1_default() -> anyhow::Result<()> {
// Without an explicit effort suffix the API picks its own default;
// we should omit reasoning_effort entirely but still use "developer" role.
let model_config = ModelConfig {
model_name: "o1".to_string(),
context_limit: Some(4096),
@@ -2209,17 +2215,111 @@ mod tests {
}
#[test]
fn test_create_request_o3_custom_reasoning_effort() -> anyhow::Result<()> {
// Test custom reasoning effort for O3 model
fn test_create_request_o1_medium_effort() -> anyhow::Result<()> {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), json!("medium"));
let model_config = ModelConfig {
model_name: "o3-mini-high".to_string(),
model_name: "o1".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
request_params: Some(params),
reasoning: None,
};
let request = create_request(
&model_config,
"system",
&[],
&[],
&ImageFormat::OpenAi,
false,
)?;
let obj = request.as_object().unwrap();
assert_eq!(obj.get("reasoning_effort"), Some(&json!("medium")));
assert!(obj.get("thinking_effort").is_none());
Ok(())
}
#[test]
fn test_create_request_o3_off_effort_preserves_none() -> anyhow::Result<()> {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), json!("off"));
let model_config = ModelConfig {
model_name: "o3".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: Some(params),
reasoning: None,
};
let request = create_request(
&model_config,
"system",
&[],
&[],
&ImageFormat::OpenAi,
false,
)?;
let obj = request.as_object().unwrap();
assert_eq!(obj.get("reasoning_effort"), Some(&json!("none")));
assert!(obj.get("thinking_effort").is_none());
Ok(())
}
#[test]
fn test_create_request_gpt5_pro_max_effort_uses_supported_level() -> anyhow::Result<()> {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), json!("max"));
let model_config = ModelConfig {
model_name: "gpt-5.2-pro-2025-12-11".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: Some(params),
reasoning: None,
};
let request = create_request(
&model_config,
"system",
&[],
&[],
&ImageFormat::OpenAi,
false,
)?;
let obj = request.as_object().unwrap();
assert_eq!(obj.get("reasoning_effort"), Some(&json!("high")));
assert!(obj.get("thinking_effort").is_none());
Ok(())
}
#[test]
fn test_create_request_o3_custom_reasoning_effort() -> anyhow::Result<()> {
let mut params = std::collections::HashMap::new();
params.insert("thinking_effort".to_string(), json!("high"));
let model_config = ModelConfig {
model_name: "o3-mini".to_string(),
context_limit: Some(4096),
temperature: None,
max_tokens: Some(1024),
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: Some(params),
reasoning: None,
};
let request = create_request(
@@ -2246,6 +2346,7 @@ mod tests {
for (key, value) in expected.as_object().unwrap() {
assert_eq!(obj.get(key).unwrap(), value);
}
assert!(obj.get("thinking_effort").is_none());
Ok(())
}
@@ -2,7 +2,9 @@ use crate::conversation::message::{Message, MessageContent};
use crate::mcp_utils::extract_text_from_resource;
use crate::model::ModelConfig;
use crate::providers::base::{ProviderUsage, Usage};
use crate::providers::utils::{extract_reasoning_effort, is_openai_responses_model};
use crate::providers::utils::{
extract_reasoning_effort, is_openai_responses_model, openai_reasoning_effort_for_thinking,
};
use anyhow::{anyhow, Error};
use async_stream::try_stream;
use chrono;
@@ -541,11 +543,26 @@ pub fn create_responses_request(
add_message_items(&mut input_items, messages);
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
// All models routed here are responses-capable; temperature is rejected
// by the API for reasoning models regardless of whether an explicit
// effort suffix was provided.
let is_reasoning_model = is_openai_responses_model(&model_name);
let reasoning_effort = if is_reasoning_model {
if let Some(effort) = legacy_reasoning_effort.as_deref() {
effort
.parse()
.ok()
.and_then(|effort| openai_reasoning_effort_for_thinking(&model_name, effort))
.or(legacy_reasoning_effort)
} else {
model_config
.thinking_effort()
.and_then(|effort| openai_reasoning_effort_for_thinking(&model_name, effort))
}
} else {
None
};
let mut payload = json!({
"model": model_name,
@@ -1268,6 +1285,17 @@ mod tests {
}
}
#[test]
fn test_responses_request_with_normalized_effort_suffix() {
let model_config = ModelConfig::new("o3-mini-high").unwrap();
let result = create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap();
assert_eq!(result["model"], "o3-mini");
assert_eq!(result["reasoning"]["effort"], "high");
assert_eq!(result["reasoning"]["summary"], "auto");
}
#[test]
fn test_responses_request_without_effort_suffix_omits_reasoning() {
for model_name in ["gpt-5.4", "o3", "gpt-5-nano"] {
@@ -1294,6 +1322,30 @@ mod tests {
}
}
#[test]
fn test_responses_request_non_reasoning_model_ignores_global_thinking_effort() {
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]);
let model_config = ModelConfig {
model_name: "gpt-4o".to_string(),
context_limit: None,
temperature: None,
max_tokens: None,
toolshim: false,
toolshim_model: None,
fast_model_config: None,
request_params: None,
reasoning: None,
};
let result = create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap();
assert_eq!(result["model"], "gpt-4o");
assert!(
result.get("reasoning").is_none(),
"non-reasoning models should not receive reasoning config"
);
}
#[test]
fn test_user_image_serialized_in_responses_request() {
use crate::conversation::message::Message;
@@ -1,4 +1,5 @@
use crate::conversation::message::{Message, MessageContent, ProviderMetadata};
use crate::model::{ModelConfig, ThinkingEffort};
use crate::providers::formats::openai;
use rmcp::model::Role;
use serde_json::{json, Value};
@@ -87,9 +88,40 @@ pub fn add_reasoning_details_to_request(payload: &mut Value, messages: &[Message
}
}
fn reasoning_effort_for_openrouter(effort: ThinkingEffort) -> &'static str {
match effort {
ThinkingEffort::Off => "none",
ThinkingEffort::Low => "low",
ThinkingEffort::Medium => "medium",
ThinkingEffort::High => "high",
ThinkingEffort::Max => "xhigh",
}
}
pub fn apply_reasoning_config(payload: &mut Value, model_config: &ModelConfig) {
let Some(effort) = model_config.thinking_effort() else {
return;
};
if let Some(obj) = payload.as_object_mut() {
let clamped_effort = obj
.remove("reasoning_effort")
.and_then(|value| value.as_str().map(str::to_owned));
if clamped_effort.is_none() && !model_config.is_reasoning_model() {
return;
}
obj.insert(
"reasoning".to_string(),
json!({ "effort": clamped_effort.as_deref().unwrap_or_else(|| reasoning_effort_for_openrouter(effort)) }),
);
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
#[test]
fn test_extract_reasoning_details() {
@@ -149,4 +181,89 @@ mod tests {
let details = get_reasoning_details(&tool_request.metadata).unwrap();
assert_eq!(details.len(), 1);
}
#[test]
fn test_apply_reasoning_config_uses_openrouter_reasoning_object() {
let mut payload = json!({
"model": "openai/gpt-5",
"messages": [],
"reasoning_effort": "high"
});
let mut model_config = ModelConfig::new_or_fail("openai/gpt-5");
let mut params = HashMap::new();
params.insert("thinking_effort".to_string(), json!("max"));
model_config.request_params = Some(params);
apply_reasoning_config(&mut payload, &model_config);
assert_eq!(payload["reasoning"], json!({ "effort": "high" }));
assert!(payload.get("reasoning_effort").is_none());
}
#[test]
fn test_apply_reasoning_config_uses_reasoning_metadata() {
let mut payload = json!({
"model": "x-ai/grok-4",
"messages": []
});
let mut model_config = ModelConfig::new_or_fail("x-ai/grok-4");
let mut params = HashMap::new();
params.insert("thinking_effort".to_string(), json!("high"));
model_config.request_params = Some(params);
model_config.reasoning = Some(true);
apply_reasoning_config(&mut payload, &model_config);
assert_eq!(payload["reasoning"], json!({ "effort": "high" }));
}
#[test]
fn test_apply_reasoning_config_uses_model_detection() {
let mut payload = json!({
"model": "anthropic/claude-sonnet-4",
"messages": []
});
let mut model_config = ModelConfig::new_or_fail("anthropic/claude-sonnet-4");
let mut params = HashMap::new();
params.insert("thinking_effort".to_string(), json!("high"));
model_config.request_params = Some(params);
apply_reasoning_config(&mut payload, &model_config);
assert_eq!(payload["reasoning"], json!({ "effort": "high" }));
}
#[test]
fn test_apply_reasoning_config_skips_non_reasoning_models() {
let mut payload = json!({
"model": "openai/gpt-4o",
"messages": []
});
let mut model_config = ModelConfig::new_or_fail("openai/gpt-4o");
let mut params = HashMap::new();
params.insert("thinking_effort".to_string(), json!("high"));
model_config.request_params = Some(params);
model_config.reasoning = Some(false);
apply_reasoning_config(&mut payload, &model_config);
assert!(payload.get("reasoning").is_none());
}
#[test]
fn test_apply_reasoning_config_off_disables_reasoning() {
let mut payload = json!({
"model": "x-ai/grok-4",
"messages": []
});
let mut model_config = ModelConfig::new_or_fail("x-ai/grok-4");
let mut params = HashMap::new();
params.insert("thinking_effort".to_string(), json!("off"));
model_config.request_params = Some(params);
model_config.reasoning = Some(true);
apply_reasoning_config(&mut payload, &model_config);
assert_eq!(payload["reasoning"], json!({ "effort": "none" }));
}
}
+1
View File
@@ -278,6 +278,7 @@ impl Provider for OpenRouterProvider {
if is_gemini_model(&model_config.model_name) {
openrouter_format::add_reasoning_details_to_request(&mut payload, messages);
}
openrouter_format::apply_reasoning_config(&mut payload, model_config);
if let Some(obj) = payload.as_object_mut() {
obj.insert("transforms".to_string(), json!(["middle-out"]));
@@ -182,6 +182,7 @@ impl ProviderRegistry {
output_token_cost: m.output_token_cost,
currency: m.currency.clone(),
supports_cache_control: Some(m.supports_cache_control.unwrap_or(false)),
reasoning: m.reasoning,
})
.collect();
+44 -1
View File
@@ -1,7 +1,7 @@
use super::base::Usage;
use super::errors::GoogleErrorCode;
use crate::config::paths::Paths;
use crate::model::ModelConfig;
use crate::model::{ModelConfig, ThinkingEffort};
use crate::providers::errors::ProviderError;
use anyhow::{anyhow, Result};
use base64::Engine;
@@ -237,6 +237,49 @@ pub fn extract_reasoning_effort(model_name: &str) -> (String, Option<String>) {
(model_name.to_string(), None)
}
pub fn openai_reasoning_effort_for_thinking(
model_name: &str,
effort: ThinkingEffort,
) -> Option<String> {
if effort == ThinkingEffort::Off {
return Some("none".to_string());
}
let supported = openai_reasoning_efforts_for_model(model_name);
let preferred: &[&str] = match effort {
ThinkingEffort::Off => unreachable!(),
ThinkingEffort::Low => &["low", "medium", "high", "xhigh"],
ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"],
ThinkingEffort::High => &["high", "medium", "xhigh", "low"],
ThinkingEffort::Max => &["xhigh", "high", "medium", "low"],
};
preferred
.iter()
.find(|level| supported.contains(level))
.map(|level| (*level).to_string())
}
fn openai_reasoning_efforts_for_model(model_name: &str) -> &'static [&'static str] {
let normalized = model_name.to_ascii_lowercase();
if normalized.contains("gpt-5") {
if normalized.contains("-pro") || normalized.contains("/pro") {
&["high"]
} else if normalized.contains("gpt-5.4")
|| normalized.contains("gpt-5-4")
|| normalized.contains("gpt-5.5")
|| normalized.contains("gpt-5-5")
{
&["low", "medium", "high", "xhigh"]
} else {
&["low", "medium", "high"]
}
} else {
&["low", "medium", "high"]
}
}
pub fn sanitize_function_name(name: &str) -> String {
static RE: OnceLock<Regex> = OnceLock::new();
let re = RE.get_or_init(|| Regex::new(r"[^a-zA-Z0-9_-]").unwrap());
+1 -1
View File
@@ -330,7 +330,7 @@ GOOSE_PROVIDER=claude-code GOOSE_MODE=approve goose session
| `GOOSE_PROVIDER` | Set to `codex` to use this provider | None |
| `GOOSE_MODEL` | Model to use (only known models are passed to CLI) | `gpt-5.2-codex` |
| `CODEX_COMMAND` | Path to the Codex CLI command | `codex` |
| `GOOSE_THINKING_EFFORT` | Unified thinking effort (`off`, `low`, `medium`, `high`, `max`). Mapped to Codex CLI effort levels (`none/low/medium/high/xhigh`). | `high` |
| `CODEX_REASONING_EFFORT` | Reasoning effort level: `low`, `medium`, `high`, or `xhigh` (`none` is only supported on non-codex models like `gpt-5.2`) | `high` |
| `CODEX_ENABLE_SKILLS` | Enable Codex skills: `true` or `false` | `true` |
| `CODEX_SKIP_GIT_CHECK` | Skip git repository requirement: `true` or `false` | `false` |
+80 -1
View File
@@ -1369,6 +1369,56 @@
}
}
},
"/config/providers/{name}/model-info": {
"post": {
"tags": [
"super::routes::config_management"
],
"operationId": "get_provider_model_info",
"parameters": [
{
"name": "name",
"in": "path",
"description": "Provider name (e.g., openai)",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ProviderModelInfoQuery"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Model metadata fetched successfully",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ModelInfo"
}
}
}
},
"400": {
"description": "Unknown provider, provider not configured, or authentication error"
},
"429": {
"description": "Rate limit exceeded"
},
"500": {
"description": "Internal server error"
}
}
}
},
"/config/providers/{name}/models": {
"get": {
"tags": [
@@ -1394,7 +1444,7 @@
"schema": {
"type": "array",
"items": {
"type": "string"
"$ref": "#/components/schemas/ModelInfo"
}
}
}
@@ -6533,6 +6583,10 @@
"description": "Cost per token for output in USD (optional)",
"nullable": true
},
"reasoning": {
"type": "boolean",
"description": "Whether this model supports reasoning/thinking controls"
},
"supports_cache_control": {
"type": "boolean",
"description": "Whether this model supports cache control",
@@ -6546,6 +6600,7 @@
"provider",
"model",
"context_limit",
"reasoning",
"currency"
],
"properties": {
@@ -6586,6 +6641,9 @@
},
"provider": {
"type": "string"
},
"reasoning": {
"type": "boolean"
}
}
},
@@ -6973,6 +7031,17 @@
}
}
},
"ProviderModelInfoQuery": {
"type": "object",
"required": [
"model"
],
"properties": {
"model": {
"type": "string"
}
}
},
"ProviderTemplate": {
"type": "object",
"required": [
@@ -8568,6 +8637,16 @@
}
}
},
"ThinkingEffort": {
"type": "string",
"enum": [
"off",
"low",
"medium",
"high",
"max"
]
},
"TokenState": {
"type": "object",
"required": [
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+48 -1
View File
@@ -818,6 +818,10 @@ export type ModelInfo = {
* Cost per token for output in USD (optional)
*/
output_token_cost?: number | null;
/**
* Whether this model supports reasoning/thinking controls
*/
reasoning?: boolean;
/**
* Whether this model supports cache control
*/
@@ -834,6 +838,7 @@ export type ModelInfoData = {
model: string;
output_token_cost?: number | null;
provider: string;
reasoning: boolean;
};
export type ModelInfoQuery = {
@@ -1000,6 +1005,10 @@ export type ProviderMetadata = {
setup_steps?: Array<string>;
};
export type ProviderModelInfoQuery = {
model: string;
};
export type ProviderTemplate = {
api_url: string;
doc_url: string;
@@ -1482,6 +1491,8 @@ export type ThinkingContent = {
thinking: string;
};
export type ThinkingEffort = 'off' | 'low' | 'medium' | 'high' | 'max';
export type TokenState = {
accumulatedCost?: number | null;
accumulatedInputTokens: number;
@@ -2728,6 +2739,42 @@ export type CleanupProviderCacheResponses = {
export type CleanupProviderCacheResponse = CleanupProviderCacheResponses[keyof CleanupProviderCacheResponses];
export type GetProviderModelInfoData = {
body: ProviderModelInfoQuery;
path: {
/**
* Provider name (e.g., openai)
*/
name: string;
};
query?: never;
url: '/config/providers/{name}/model-info';
};
export type GetProviderModelInfoErrors = {
/**
* Unknown provider, provider not configured, or authentication error
*/
400: unknown;
/**
* Rate limit exceeded
*/
429: unknown;
/**
* Internal server error
*/
500: unknown;
};
export type GetProviderModelInfoResponses = {
/**
* Model metadata fetched successfully
*/
200: ModelInfo;
};
export type GetProviderModelInfoResponse = GetProviderModelInfoResponses[keyof GetProviderModelInfoResponses];
export type GetProviderModelsData = {
body?: never;
path: {
@@ -2759,7 +2806,7 @@ export type GetProviderModelsResponses = {
/**
* Models fetched successfully
*/
200: Array<string>;
200: Array<ModelInfo>;
};
export type GetProviderModelsResponse = GetProviderModelsResponses[keyof GetProviderModelsResponses];
@@ -108,8 +108,8 @@ export const RecipeModelSelector = ({
const modelList = models || [];
const options = modelList.map((m) => ({
value: m,
label: m,
value: m.name,
label: m.name,
provider: p.name,
}));
@@ -1,4 +1,10 @@
import { ProviderDetails, getProviderModels, listLocalModels } from '../../../api';
import {
ProviderDetails,
ThinkingEffort,
getProviderModelInfo,
getProviderModels,
listLocalModels,
} from '../../../api';
import { errorMessage as getErrorMessage } from '../../../utils/conversionUtils';
export default interface Model {
@@ -9,7 +15,8 @@ export default interface Model {
alias?: string; // optional model display name
subtext?: string; // goes below model name if not the provider
context_limit?: number; // optional context limit override
request_params?: Record<string, unknown>; // provider-specific request parameters
reasoning?: boolean; // optional reasoning/thinking support metadata
request_params?: Record<string, unknown> & { thinking_effort?: ThinkingEffort }; // provider-specific request parameters
}
export function createModelStruct(
@@ -45,7 +52,7 @@ export async function getProviderMetadata(
export interface ProviderModelsResult {
provider: ProviderDetails;
models: string[] | null;
models: Model[] | null;
error: string | null;
warning: string | null;
}
@@ -61,7 +68,7 @@ export async function fetchModelsForProviders(
const allModels = response.data || [];
const downloadedModels = allModels
.filter((m) => m.status.state === 'Downloaded')
.map((m) => m.id);
.map((m) => ({ name: m.id, provider: p.name }) as Model);
return { provider: p, models: downloadedModels, error: null, warning: null };
}
@@ -69,12 +76,28 @@ export async function fetchModelsForProviders(
path: { name: p.name },
throwOnError: true,
});
const models = response.data || [];
const models = (response.data || []).map(
(m) =>
({
name: m.name,
provider: p.name,
context_limit: m.context_limit,
reasoning: m.reasoning ?? undefined,
}) as Model
);
return { provider: p, models, error: null, warning: null };
} catch (e: unknown) {
// For custom providers, fall back to the configured model list
if (p.provider_type === 'Custom') {
const fallbackModels = p.metadata.known_models.map((m) => m.name);
const fallbackModels = p.metadata.known_models.map(
(m) =>
({
name: m.name,
provider: p.name,
context_limit: m.context_limit,
reasoning: m.reasoning ?? undefined,
}) as Model
);
if (fallbackModels.length > 0) {
console.warn(`Failed to fetch models for ${p.name}:`, getErrorMessage(e));
return {
@@ -99,3 +122,19 @@ export async function fetchModelsForProviders(
return await Promise.all(modelPromises);
}
export async function fetchModelReasoning(
provider: string,
model: string,
fallback?: boolean
): Promise<boolean | null> {
try {
const response = await getProviderModelInfo({
path: { name: provider },
body: { model },
});
return response.data?.reasoning ?? fallback ?? null;
} catch {
return fallback ?? null;
}
}
@@ -17,12 +17,20 @@ import { Select } from '../../../ui/Select';
import { useConfig } from '../../../ConfigContext';
import { useModelAndProvider } from '../../../ModelAndProviderContext';
import type { View } from '../../../../utils/navigationUtils';
import Model, { getProviderMetadata, fetchModelsForProviders } from '../modelInterface';
import Model, {
fetchModelReasoning,
fetchModelsForProviders,
getProviderMetadata,
} from '../modelInterface';
import { getPredefinedModelsFromEnv, shouldShowPredefinedModels } from '../predefinedModelsUtils';
import { ProviderType } from '../../../../api';
import type { ProviderType, ThinkingEffort } from '../../../../api';
import { trackModelChanged } from '../../../../utils/analytics';
const i18n = defineMessages({
thinkingEffortOff: {
id: 'switchModelModal.thinkingEffortOff',
defaultMessage: 'Off - No extended thinking',
},
thinkingLevelLow: {
id: 'switchModelModal.thinkingLevelLow',
defaultMessage: 'Low - Better latency, lighter reasoning',
@@ -185,16 +193,7 @@ const i18n = defineMessages({
},
});
// THINKING_LEVEL_OPTIONS and CLAUDE_THINKING_EFFORT_OPTIONS are created inside the component to support i18n.
function isClaudeModel(name: string | null | undefined): boolean {
return !!name && name.toLowerCase().startsWith('claude-');
}
function supportsAdaptiveThinking(name: string): boolean {
const lower = name.toLowerCase();
return lower.includes('claude-opus-4-6') || lower.includes('claude-sonnet-4-6');
}
// Thinking effort options are created inside the component to support i18n.
const PREFERRED_MODEL_PATTERNS = [
/claude-sonnet-4/i,
@@ -256,12 +255,8 @@ export const SwitchModelModal = ({
}: SwitchModelModalProps) => {
const intl = useIntl();
const THINKING_LEVEL_OPTIONS = [
{ value: 'low', label: intl.formatMessage(i18n.thinkingLevelLow) },
{ value: 'high', label: intl.formatMessage(i18n.thinkingLevelHigh) },
];
const CLAUDE_THINKING_EFFORT_OPTIONS = [
const THINKING_EFFORT_OPTIONS: { value: ThinkingEffort; label: string }[] = [
{ value: 'off', label: intl.formatMessage(i18n.thinkingEffortOff) },
{ value: 'low', label: intl.formatMessage(i18n.claudeEffortLow) },
{ value: 'medium', label: intl.formatMessage(i18n.claudeEffortMedium) },
{ value: 'high', label: intl.formatMessage(i18n.claudeEffortHigh) },
@@ -278,7 +273,13 @@ export const SwitchModelModal = ({
const currentModel = sessionModel ?? configModel;
const currentProvider = sessionProvider ?? configProvider;
const [providerOptions, setProviderOptions] = useState<{ value: string; label: string }[]>([]);
type ModelOption = { value: string; label: string; provider: string; isDisabled?: boolean };
type ModelOption = {
value: string;
label: string;
provider: string;
isDisabled?: boolean;
reasoning?: boolean;
};
const [modelOptions, setModelOptions] = useState<{ options: ModelOption[] }[]>([]);
const [provider, setProvider] = useState<string | null>(
initialProvider || currentProvider || null
@@ -304,43 +305,56 @@ export const SwitchModelModal = ({
import('../../../../api').ProviderDetails[]
>([]);
const fetchedProviders = useRef<Set<string>>(new Set());
const [thinkingLevel, setThinkingLevel] = useState<string>('low');
const [claudeThinkingType, setClaudeThinkingType] = useState<string>('disabled');
const [claudeThinkingEffort, setClaudeThinkingEffort] = useState<string>('high');
const [claudeThinkingBudget, setClaudeThinkingBudget] = useState<string>('16000');
const reasoningRequestId = useRef(0);
const [thinkingEffort, setThinkingEffort] = useState<ThinkingEffort | null>(null);
const [selectedModelReasoning, setSelectedModelReasoning] = useState<boolean | null>(null);
const modelName = usePredefinedModels ? selectedPredefinedModel?.name : model;
const isGemini3Model = modelName?.toLowerCase().startsWith('gemini-3') ?? false;
const showClaudeThinking = isClaudeModel(modelName);
const modelSupportsAdaptive = modelName ? supportsAdaptiveThinking(modelName) : false;
const modelReasoning = selectedModelReasoning ?? selectedPredefinedModel?.reasoning;
const showThinkingControl = modelReasoning === true;
const resolveSelectedModelReasoning = useCallback(
(providerName: string, modelName: string, fallback?: boolean) => {
const requestId = ++reasoningRequestId.current;
setSelectedModelReasoning(fallback ?? null);
fetchModelReasoning(providerName, modelName, fallback).then((reasoning) => {
if (requestId === reasoningRequestId.current) {
setSelectedModelReasoning(reasoning);
}
});
},
[]
);
useEffect(() => {
if (!showClaudeThinking) return;
if (claudeThinkingType === 'adaptive' && !modelSupportsAdaptive) {
setClaudeThinkingType('disabled');
}
}, [modelName, showClaudeThinking, modelSupportsAdaptive, claudeThinkingType]);
useEffect(() => {
const readConfig = async (key: string): Promise<string | null> => {
try {
const val = (await read(key, false)) as string;
return val || null;
} catch (e) {
console.warn(`Could not read ${key}, using default:`, e);
return null;
}
};
(async () => {
const tt = await readConfig('CLAUDE_THINKING_TYPE');
if (tt) setClaudeThinkingType(tt);
const effort = await readConfig('CLAUDE_THINKING_EFFORT');
if (effort) setClaudeThinkingEffort(effort);
const budget = await readConfig('CLAUDE_THINKING_BUDGET');
if (budget) setClaudeThinkingBudget(budget);
try {
const effort = (await read('GOOSE_THINKING_EFFORT', false)) as ThinkingEffort;
if (effort) setThinkingEffort(effort);
} catch (e) {
console.warn('Could not read GOOSE_THINKING_EFFORT, using default:', e);
}
})();
}, [read]);
useEffect(() => {
if (!provider || !model) return;
const selectedOption = modelOptions
.flatMap((group) => group.options)
.find((option) => option.provider === provider && option.value === model);
if (selectedOption) {
resolveSelectedModelReasoning(provider, model, selectedOption.reasoning);
return;
}
setSelectedModelReasoning(null);
const timeout = setTimeout(() => {
resolveSelectedModelReasoning(provider, model);
}, 400);
return () => clearTimeout(timeout);
}, [model, provider, modelOptions, resolveSelectedModelReasoning]);
// Validate form data
const validateForm = useCallback(() => {
const errors = {
@@ -393,36 +407,18 @@ export const SwitchModelModal = ({
subtext: providerDisplayName,
} as Model;
}
modelObj = {
...modelObj,
reasoning: selectedModelReasoning ?? modelObj.reasoning,
};
if (isGemini3Model) {
if (showThinkingControl) {
const effort = thinkingEffort ?? modelObj.request_params?.thinking_effort ?? 'off';
modelObj = {
...modelObj,
request_params: { ...modelObj.request_params, thinking_level: thinkingLevel },
request_params: { ...modelObj.request_params, thinking_effort: effort },
};
}
if (showClaudeThinking) {
const params: Record<string, unknown> = {
...modelObj.request_params,
thinking_type: claudeThinkingType,
};
if (claudeThinkingType === 'adaptive') {
params.effort = claudeThinkingEffort;
} else if (claudeThinkingType === 'enabled') {
params.budget_tokens = parseInt(claudeThinkingBudget, 10) || 16000;
}
modelObj = { ...modelObj, request_params: params };
upsert('CLAUDE_THINKING_TYPE', claudeThinkingType, false).catch(console.warn);
if (claudeThinkingType === 'adaptive') {
upsert('CLAUDE_THINKING_EFFORT', claudeThinkingEffort, false).catch(console.warn);
} else if (claudeThinkingType === 'enabled') {
upsert(
'CLAUDE_THINKING_BUDGET',
parseInt(claudeThinkingBudget, 10) || 16000,
false
).catch(console.warn);
}
upsert('GOOSE_THINKING_EFFORT', effort, false).catch(console.warn);
}
const success = await changeModel(sessionId, modelObj);
@@ -450,8 +446,13 @@ export const SwitchModelModal = ({
const matchingModel = models.find((m) => m.name === currentModel);
if (matchingModel) {
setSelectedPredefinedModel(matchingModel);
resolveSelectedModelReasoning(
matchingModel.provider,
matchingModel.name,
matchingModel.reasoning
);
}
}, [usePredefinedModels, currentModel]);
}, [usePredefinedModels, currentModel, resolveSelectedModelReasoning]);
// For manual mode: one-time sync of provider/model when session data
// arrives after the modal has already mounted. Uses a ref so it only
@@ -515,7 +516,7 @@ export const SwitchModelModal = ({
if (cancelled) return;
const newGroupedOptions: {
options: { value: string; label: string; provider: string; providerType: ProviderType }[];
options: (ModelOption & { providerType: ProviderType })[];
}[] = [];
const newErrors: Record<string, string> = {};
const newWarnings: Record<string, string> = {};
@@ -536,11 +537,13 @@ export const SwitchModelModal = ({
label: string;
provider: string;
providerType: ProviderType;
reasoning?: boolean;
}[] = modelList.map((m) => ({
value: m,
label: m,
value: m.name,
label: m.name,
provider: p.name,
providerType: p.provider_type,
reasoning: m.reasoning,
}));
if (p.provider_type !== 'Custom') {
@@ -613,30 +616,51 @@ export const SwitchModelModal = ({
}
}, [provider, modelOptions, loadingModels, model, isCustomModel, userClearedModel, activeProvidersList]);
const handlePredefinedModelChange = (model: Model) => {
setSelectedPredefinedModel(model);
resolveSelectedModelReasoning(model.provider, model.name, model.reasoning);
};
// Handle model selection change
const handleModelChange = (newValue: unknown) => {
const selectedOption = newValue as { value: string; label: string; provider: string } | null;
const selectedOption = newValue as {
value: string;
label: string;
provider: string;
reasoning?: boolean;
} | null;
if (selectedOption?.value === 'custom') {
setIsCustomModel(true);
setModel('');
setProvider(selectedOption.provider);
setSelectedModelReasoning(null);
setUserClearedModel(false);
} else if (selectedOption === null) {
// User cleared the selection
setIsCustomModel(false);
setModel('');
setSelectedModelReasoning(null);
setUserClearedModel(true);
} else {
setIsCustomModel(false);
setModel(selectedOption?.value || '');
setProvider(selectedOption?.provider || '');
if (selectedOption?.provider && selectedOption.value) {
resolveSelectedModelReasoning(
selectedOption.provider,
selectedOption.value,
selectedOption.reasoning
);
} else {
setSelectedModelReasoning(selectedOption?.reasoning ?? null);
}
setUserClearedModel(false);
}
};
// Store the original model options in state, initialized from modelOptions
const [originalModelOptions, setOriginalModelOptions] =
useState<{ options: { value: string; label: string; provider: string }[] }[]>(modelOptions);
useState<{ options: ModelOption[] }[]>(modelOptions);
const handleInputChange = (inputValue: string) => {
if (!provider) return;
@@ -680,54 +704,20 @@ export const SwitchModelModal = ({
}
};
const claudeThinkingTypeOptions = [
...(modelSupportsAdaptive
? [{ value: 'adaptive', label: intl.formatMessage(i18n.claudeAdaptive) }]
: []),
{ value: 'enabled', label: intl.formatMessage(i18n.claudeEnabled) },
{ value: 'disabled', label: intl.formatMessage(i18n.claudeDisabled) },
];
const claudeThinkingControls = showClaudeThinking && (
<div className="mt-2 flex flex-col gap-3">
<div>
<label className="text-sm text-textSubtle mb-1 block">{intl.formatMessage(i18n.extendedThinking)}</label>
<Select
options={claudeThinkingTypeOptions}
value={claudeThinkingTypeOptions.find((o) => o.value === claudeThinkingType)}
onChange={(newValue: unknown) => {
const option = newValue as { value: string; label: string } | null;
setClaudeThinkingType(option?.value || 'disabled');
}}
placeholder={intl.formatMessage(i18n.selectThinkingMode)}
/>
</div>
{claudeThinkingType === 'adaptive' && (
<div>
<label className="text-sm text-textSubtle mb-1 block">{intl.formatMessage(i18n.thinkingEffort)}</label>
<Select
options={CLAUDE_THINKING_EFFORT_OPTIONS}
value={CLAUDE_THINKING_EFFORT_OPTIONS.find((o) => o.value === claudeThinkingEffort)}
onChange={(newValue: unknown) => {
const option = newValue as { value: string; label: string } | null;
setClaudeThinkingEffort(option?.value || 'high');
}}
placeholder={intl.formatMessage(i18n.selectEffortLevel)}
/>
</div>
)}
{claudeThinkingType === 'enabled' && (
<div>
<label className="text-sm text-textSubtle mb-1 block">{intl.formatMessage(i18n.thinkingBudget)}</label>
<Input
className="border-2 px-4 py-2"
type="number"
min="1024"
value={claudeThinkingBudget}
onChange={(e) => setClaudeThinkingBudget(e.target.value)}
/>
</div>
)}
const thinkingEffortControl = showThinkingControl && (
<div className="mt-2">
<label className="text-sm text-textSubtle mb-1 block">
{intl.formatMessage(i18n.thinkingEffort)}
</label>
<Select
options={THINKING_EFFORT_OPTIONS}
value={THINKING_EFFORT_OPTIONS.find((o) => o.value === (thinkingEffort ?? 'off'))}
onChange={(newValue: unknown) => {
const option = newValue as { value: ThinkingEffort; label: string } | null;
setThinkingEffort(option?.value || 'off');
}}
placeholder={intl.formatMessage(i18n.selectEffortLevel)}
/>
</div>
);
@@ -760,7 +750,7 @@ export const SwitchModelModal = ({
? 'bg-background-secondary'
: 'bg-background-primary hover:bg-background-secondary'
} rounded-lg transition-all`}
onClick={() => setSelectedPredefinedModel(model)}
onClick={() => handlePredefinedModelChange(model)}
>
<div className="flex-1">
<div className="flex items-center justify-between">
@@ -786,7 +776,7 @@ export const SwitchModelModal = ({
name="predefined-model"
value={model.name}
checked={selectedPredefinedModel?.name === model.name}
onChange={() => setSelectedPredefinedModel(model)}
onChange={() => handlePredefinedModelChange(model)}
className="peer sr-only"
/>
<div
@@ -805,25 +795,7 @@ export const SwitchModelModal = ({
<div className="text-red-500 text-sm mt-1">{validationErrors.model}</div>
)}
{isGemini3Model && (
<div className="mt-2">
<label className="text-sm text-textSubtle mb-1 block">
{intl.formatMessage(i18n.thinkingLevel)}
<span className="text-xs text-textMuted ml-2">{intl.formatMessage(i18n.geminiOnly)}</span>
</label>
<Select
options={THINKING_LEVEL_OPTIONS}
value={THINKING_LEVEL_OPTIONS.find((o) => o.value === thinkingLevel)}
onChange={(newValue: unknown) => {
const option = newValue as { value: string; label: string } | null;
setThinkingLevel(option?.value || 'low');
}}
placeholder={intl.formatMessage(i18n.selectThinkingLevel)}
/>
</div>
)}
{claudeThinkingControls}
{thinkingEffortControl}
</div>
) : (
/* Manual Provider/Model Selection */
@@ -970,25 +942,7 @@ export const SwitchModelModal = ({
</div>
)}
{isGemini3Model && (
<div className="mt-2">
<label className="text-sm text-textSubtle mb-1 block">
Thinking Level
<span className="text-xs text-textMuted ml-2">(Gemini 3 models only)</span>
</label>
<Select
options={THINKING_LEVEL_OPTIONS}
value={THINKING_LEVEL_OPTIONS.find((o) => o.value === thinkingLevel)}
onChange={(newValue: unknown) => {
const option = newValue as { value: string; label: string } | null;
setThinkingLevel(option?.value || 'low');
}}
placeholder="Select thinking level"
/>
</div>
)}
{claudeThinkingControls}
{thinkingEffortControl}
</>
)}
</div>
+3
View File
@@ -4475,6 +4475,9 @@
"switchModelModal.thinkingEffort": {
"defaultMessage": "Thinking Effort"
},
"switchModelModal.thinkingEffortOff": {
"defaultMessage": "Off - No extended thinking"
},
"switchModelModal.thinkingLevel": {
"defaultMessage": "Thinking Level"
},