mirror of
https://github.com/aaif-goose/goose.git
synced 2026-06-01 22:09:18 +02:00
Add unified thinking effort control across all providers (#9242)
Signed-off-by: jh-block <jhugo@block.xyz> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -23,8 +23,6 @@ use goose::model::ModelConfig;
|
||||
#[cfg(feature = "telemetry")]
|
||||
use goose::posthog::{get_telemetry_choice, TELEMETRY_ENABLED_KEY};
|
||||
use goose::providers::base::ConfigKey;
|
||||
use goose::providers::chatgpt_codex::reasoning_levels_for_model;
|
||||
use goose::providers::formats::anthropic::supports_adaptive_thinking;
|
||||
use goose::providers::provider_test::test_provider_configuration;
|
||||
use goose::providers::{create, providers, retry_operation, RetryConfig};
|
||||
use goose::session::SessionType;
|
||||
@@ -738,15 +736,13 @@ pub async fn configure_provider_dialog() -> anyhow::Result<bool> {
|
||||
|
||||
let spin = spinner();
|
||||
spin.start("Attempting to fetch supported models...");
|
||||
let models_res = {
|
||||
let temp_model_config =
|
||||
ModelConfig::new(&provider_meta.default_model)?.with_canonical_limits(provider_name);
|
||||
let temp_provider = create(provider_name, temp_model_config, Vec::new()).await?;
|
||||
retry_operation(&RetryConfig::default(), || async {
|
||||
temp_provider.fetch_recommended_models().await
|
||||
})
|
||||
.await
|
||||
};
|
||||
let temp_model_config =
|
||||
ModelConfig::new(&provider_meta.default_model)?.with_canonical_limits(provider_name);
|
||||
let temp_provider = create(provider_name, temp_model_config, Vec::new()).await?;
|
||||
let models_res = retry_operation(&RetryConfig::default(), || async {
|
||||
temp_provider.fetch_recommended_models().await
|
||||
})
|
||||
.await;
|
||||
spin.stop(style("Model fetch complete").green());
|
||||
|
||||
// Select a model: on fetch error show styled error and abort; if models available, show list; otherwise free-text input
|
||||
@@ -766,78 +762,24 @@ pub async fn configure_provider_dialog() -> anyhow::Result<bool> {
|
||||
}
|
||||
};
|
||||
|
||||
if model.to_lowercase().starts_with("gemini-3") {
|
||||
let thinking_level: &str = cliclack::select("Select thinking level for Gemini 3:")
|
||||
.item("low", "Low - Better latency, lighter reasoning", "")
|
||||
.item("high", "High - Deeper reasoning, higher latency", "")
|
||||
.interact()?;
|
||||
config.set_gemini3_thinking_level(thinking_level)?;
|
||||
}
|
||||
{
|
||||
let supports_thinking = match temp_provider.fetch_model_info(&model).await {
|
||||
Ok(model_info) => model_info.reasoning,
|
||||
Err(_) => goose::model::ModelConfig::new(&model)
|
||||
.map(|c| c.is_reasoning_model())
|
||||
.unwrap_or(false),
|
||||
};
|
||||
|
||||
if model.to_lowercase().starts_with("claude-") {
|
||||
let supports_adaptive = supports_adaptive_thinking(&model);
|
||||
|
||||
let mut thinking_select = cliclack::select("Select extended thinking mode for Claude:");
|
||||
if supports_adaptive {
|
||||
thinking_select = thinking_select.item(
|
||||
"adaptive",
|
||||
"Adaptive - Claude decides when and how much to think (recommended)",
|
||||
"",
|
||||
);
|
||||
}
|
||||
thinking_select = thinking_select
|
||||
.item("enabled", "Enabled - Fixed token budget for thinking", "")
|
||||
.item("disabled", "Disabled - No extended thinking", "");
|
||||
if supports_adaptive {
|
||||
thinking_select = thinking_select.initial_value("adaptive");
|
||||
} else {
|
||||
thinking_select = thinking_select.initial_value("disabled");
|
||||
}
|
||||
let thinking_type: &str = thinking_select.interact()?;
|
||||
config.set_claude_thinking_type(thinking_type)?;
|
||||
|
||||
if thinking_type == "adaptive" {
|
||||
let effort: &str = cliclack::select("Select adaptive thinking effort level:")
|
||||
.item("low", "Low - Minimal thinking, fastest responses", "")
|
||||
if supports_thinking {
|
||||
let effort: &str = cliclack::select("Select thinking effort:")
|
||||
.item("off", "Off - No extended thinking", "")
|
||||
.item("low", "Low - Better latency, lighter reasoning", "")
|
||||
.item("medium", "Medium - Moderate thinking", "")
|
||||
.item("high", "High - Deep reasoning (default)", "")
|
||||
.item(
|
||||
"max",
|
||||
"Max - No constraints on thinking depth (Opus 4.6 only)",
|
||||
"",
|
||||
)
|
||||
.initial_value("high")
|
||||
.item("high", "High - Deep reasoning", "")
|
||||
.item("max", "Max - No constraints on thinking depth", "")
|
||||
.initial_value("off")
|
||||
.interact()?;
|
||||
config.set_claude_thinking_effort(effort)?;
|
||||
} else if thinking_type == "enabled" {
|
||||
let budget: String = cliclack::input("Enter thinking budget (tokens):")
|
||||
.default_input("16000")
|
||||
.validate(|input: &String| match input.parse::<i32>() {
|
||||
Ok(n) if n > 0 => Ok(()),
|
||||
_ => Err("Please enter a valid positive number"),
|
||||
})
|
||||
.interact()?;
|
||||
config.set_claude_thinking_budget(budget.parse::<i32>()?)?;
|
||||
}
|
||||
}
|
||||
|
||||
if provider_name == "chatgpt_codex" {
|
||||
let valid_levels = reasoning_levels_for_model(&model);
|
||||
if !valid_levels.is_empty() {
|
||||
let mut select = cliclack::select("Select reasoning effort level:");
|
||||
for &level in valid_levels {
|
||||
let description = match level {
|
||||
"low" => "Low - Fast responses with lighter reasoning",
|
||||
"medium" => "Medium - Balances speed and reasoning depth for everyday tasks",
|
||||
"high" => "High - Greater reasoning depth for complex problems",
|
||||
"xhigh" => "Extra High - Extra high reasoning depth for complex problems",
|
||||
_ => "",
|
||||
};
|
||||
select = select.item(level, description, "");
|
||||
}
|
||||
select = select.initial_value("medium");
|
||||
let effort: &str = select.interact()?;
|
||||
config.set_chatgpt_codex_reasoning_effort(effort.to_string())?;
|
||||
config.set_goose_thinking_effort(effort)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -268,6 +268,7 @@ fn resolve_provider_and_model(
|
||||
.is_some_and(|mc| mc.model_name == model_name)
|
||||
{
|
||||
let mut config = saved_model_config.unwrap();
|
||||
config.normalize_effort_suffix();
|
||||
if let Some(temp) = recipe_settings.and_then(|s| s.temperature) {
|
||||
config = config.with_temperature(Some(temp));
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ use goose::config::permission::PermissionLevel;
|
||||
use goose::config::ExtensionEntry;
|
||||
use goose::conversation::Conversation;
|
||||
use goose::download_manager::{DownloadProgress, DownloadStatus};
|
||||
use goose::model::ModelConfig;
|
||||
use goose::model::{ModelConfig, ThinkingEffort};
|
||||
use goose::permission::permission_confirmation::{Permission, PrincipalType};
|
||||
use goose::providers::base::{ConfigKey, ModelInfo, ProviderMetadata, ProviderType};
|
||||
use goose::session::{Session, SessionInsights, SessionType, SystemInfo};
|
||||
@@ -397,6 +397,7 @@ derive_utoipa!(IconTheme as IconThemeSchema);
|
||||
super::routes::config_management::read_all_config,
|
||||
super::routes::config_management::providers,
|
||||
super::routes::config_management::get_provider_models,
|
||||
super::routes::config_management::get_provider_model_info,
|
||||
super::routes::config_management::get_slash_commands,
|
||||
super::routes::config_management::upsert_permissions,
|
||||
super::routes::config_management::create_custom_provider,
|
||||
@@ -573,6 +574,8 @@ derive_utoipa!(IconTheme as IconThemeSchema);
|
||||
PrincipalType,
|
||||
ModelInfo,
|
||||
ModelConfig,
|
||||
ThinkingEffort,
|
||||
super::routes::config_management::ProviderModelInfoQuery,
|
||||
Session,
|
||||
goose::config::goose_mode::GooseMode,
|
||||
SessionInsights,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::routes::config_management::resolve_provider_model_info;
|
||||
use crate::routes::errors::ErrorResponse;
|
||||
use crate::routes::recipe_utils::{
|
||||
apply_recipe_to_agent, build_recipe_with_parameter_values, load_recipe_by_id, validate_recipe,
|
||||
@@ -595,7 +596,7 @@ async fn update_agent_provider(
|
||||
}
|
||||
};
|
||||
|
||||
let model_config = ModelConfig::new(&model)
|
||||
let mut model_config = ModelConfig::new(&model)
|
||||
.map_err(|e| {
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
@@ -603,8 +604,15 @@ async fn update_agent_provider(
|
||||
)
|
||||
})?
|
||||
.with_canonical_limits(&payload.provider)
|
||||
.with_context_limit(payload.context_limit)
|
||||
.with_request_params(payload.request_params);
|
||||
.with_context_limit(payload.context_limit);
|
||||
|
||||
if let Some(request_params) = payload.request_params {
|
||||
model_config = model_config.with_merged_request_params(request_params);
|
||||
}
|
||||
let model_info = resolve_provider_model_info(&payload.provider, &model)
|
||||
.await
|
||||
.map_err(|e| (e.status, e.message))?;
|
||||
model_config.reasoning = Some(model_info.reasoning);
|
||||
|
||||
let extensions =
|
||||
EnabledExtensionsState::for_session(state.session_manager(), &payload.session_id, config)
|
||||
|
||||
@@ -13,7 +13,7 @@ use goose::config::ExtensionEntry;
|
||||
use goose::config::{Config, ConfigError};
|
||||
use goose::custom_requests::SourceType;
|
||||
use goose::model::ModelConfig;
|
||||
use goose::providers::base::{ProviderMetadata, ProviderType};
|
||||
use goose::providers::base::{ModelInfo, ProviderMetadata, ProviderType};
|
||||
use goose::providers::canonical::maybe_get_canonical_model;
|
||||
use goose::providers::catalog::{
|
||||
get_provider_template, get_providers_by_format, ProviderCatalogEntry, ProviderFormat,
|
||||
@@ -418,7 +418,7 @@ pub async fn providers() -> Result<Json<Vec<ProviderDetails>>, ErrorResponse> {
|
||||
("name" = String, Path, description = "Provider name (e.g., openai)")
|
||||
),
|
||||
responses(
|
||||
(status = 200, description = "Models fetched successfully", body = [String]),
|
||||
(status = 200, description = "Models fetched successfully", body = [ModelInfo]),
|
||||
(status = 400, description = "Unknown provider, provider not configured, or authentication error"),
|
||||
(status = 429, description = "Rate limit exceeded"),
|
||||
(status = 500, description = "Internal server error")
|
||||
@@ -426,7 +426,7 @@ pub async fn providers() -> Result<Json<Vec<ProviderDetails>>, ErrorResponse> {
|
||||
)]
|
||||
pub async fn get_provider_models(
|
||||
Path(name): Path<String>,
|
||||
) -> Result<Json<Vec<String>>, ErrorResponse> {
|
||||
) -> Result<Json<Vec<ModelInfo>>, ErrorResponse> {
|
||||
let all = get_providers().await.into_iter().collect::<Vec<_>>();
|
||||
let Some((metadata, provider_type)) = all.into_iter().find(|(m, _)| m.name == name) else {
|
||||
return Err(ErrorResponse::bad_request(format!(
|
||||
@@ -444,7 +444,7 @@ pub async fn get_provider_models(
|
||||
let model_config = ModelConfig::new(&metadata.default_model)?.with_canonical_limits(&name);
|
||||
let provider = goose::providers::create(&name, model_config, Vec::new()).await?;
|
||||
|
||||
let models_result = provider.fetch_recommended_models().await;
|
||||
let models_result = provider.fetch_recommended_model_info().await;
|
||||
|
||||
match models_result {
|
||||
Ok(models) => Ok(Json(models)),
|
||||
@@ -452,6 +452,70 @@ pub async fn get_provider_models(
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize, ToSchema)]
|
||||
pub struct ProviderModelInfoQuery {
|
||||
pub model: String,
|
||||
}
|
||||
|
||||
pub async fn resolve_provider_model_info(
|
||||
name: &str,
|
||||
model: &str,
|
||||
) -> Result<ModelInfo, ErrorResponse> {
|
||||
let all = get_providers().await.into_iter().collect::<Vec<_>>();
|
||||
let Some((metadata, provider_type)) = all.into_iter().find(|(m, _)| m.name == name) else {
|
||||
return Err(ErrorResponse::bad_request(format!(
|
||||
"Unknown provider: {}",
|
||||
name
|
||||
)));
|
||||
};
|
||||
if !check_provider_configured(&metadata, provider_type) {
|
||||
return Err(ErrorResponse::bad_request(format!(
|
||||
"Provider '{}' is not configured",
|
||||
name
|
||||
)));
|
||||
}
|
||||
|
||||
let model_config = ModelConfig::new(model)?.with_canonical_limits(name);
|
||||
let provider = goose::providers::create(name, model_config.clone(), Vec::new()).await?;
|
||||
match provider.fetch_model_info(model).await {
|
||||
Ok(info) => Ok(info),
|
||||
Err(error) => {
|
||||
let mut info = ModelInfo::new(model, model_config.context_limit());
|
||||
info.reasoning = model_config.is_reasoning_model();
|
||||
tracing::debug!(
|
||||
provider = name,
|
||||
model,
|
||||
error = %error,
|
||||
"Falling back to local model metadata"
|
||||
);
|
||||
Ok(info)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "/config/providers/{name}/model-info",
|
||||
params(
|
||||
("name" = String, Path, description = "Provider name (e.g., openai)")
|
||||
),
|
||||
request_body = ProviderModelInfoQuery,
|
||||
responses(
|
||||
(status = 200, description = "Model metadata fetched successfully", body = ModelInfo),
|
||||
(status = 400, description = "Unknown provider, provider not configured, or authentication error"),
|
||||
(status = 429, description = "Rate limit exceeded"),
|
||||
(status = 500, description = "Internal server error")
|
||||
)
|
||||
)]
|
||||
pub async fn get_provider_model_info(
|
||||
Path(name): Path<String>,
|
||||
Json(query): Json<ProviderModelInfoQuery>,
|
||||
) -> Result<Json<ModelInfo>, ErrorResponse> {
|
||||
resolve_provider_model_info(&name, &query.model)
|
||||
.await
|
||||
.map(Json)
|
||||
}
|
||||
|
||||
#[derive(Deserialize, utoipa::IntoParams)]
|
||||
pub struct SlashCommandsQuery {
|
||||
/// Optional working directory to discover local skills from
|
||||
@@ -523,6 +587,7 @@ pub struct ModelInfoData {
|
||||
pub model: String,
|
||||
pub context_limit: usize,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub reasoning: bool,
|
||||
pub input_token_cost: Option<f64>,
|
||||
pub output_token_cost: Option<f64>,
|
||||
pub cache_read_token_cost: Option<f64>,
|
||||
@@ -560,6 +625,9 @@ pub async fn get_canonical_model_info(
|
||||
model: query.model.clone(),
|
||||
context_limit: canonical_model.limit.context,
|
||||
max_output_tokens: canonical_model.limit.output,
|
||||
reasoning: canonical_model
|
||||
.reasoning
|
||||
.unwrap_or_else(|| ModelConfig::new_or_fail(&query.model).is_reasoning_model()),
|
||||
// Costs are per million tokens - client handles division for display
|
||||
input_token_cost: canonical_model.cost.input,
|
||||
output_token_cost: canonical_model.cost.output,
|
||||
@@ -926,6 +994,10 @@ pub fn routes(state: Arc<AppState>) -> Router {
|
||||
.route("/config/extensions/{name}", delete(remove_extension))
|
||||
.route("/config/providers", get(providers))
|
||||
.route("/config/providers/{name}/models", get(get_provider_models))
|
||||
.route(
|
||||
"/config/providers/{name}/model-info",
|
||||
post(get_provider_model_info),
|
||||
)
|
||||
.route("/config/provider-catalog", get(get_provider_catalog))
|
||||
.route(
|
||||
"/config/provider-catalog/{id}",
|
||||
|
||||
@@ -1039,6 +1039,34 @@ async fn resolve_provider_and_model_from_config(
|
||||
Ok((provider_name, model_config))
|
||||
}
|
||||
|
||||
fn with_preserved_session_request_params(
|
||||
mut model_config: crate::model::ModelConfig,
|
||||
current_model_config: Option<&crate::model::ModelConfig>,
|
||||
request_params: Option<HashMap<String, serde_json::Value>>,
|
||||
) -> crate::model::ModelConfig {
|
||||
let has_model_effort = model_config
|
||||
.request_params
|
||||
.as_ref()
|
||||
.and_then(|params| params.get("thinking_effort"))
|
||||
.is_some();
|
||||
if !has_model_effort {
|
||||
if let Some(thinking_effort) = current_model_config
|
||||
.and_then(|config| config.request_params.as_ref())
|
||||
.and_then(|params| params.get("thinking_effort"))
|
||||
.cloned()
|
||||
{
|
||||
model_config = model_config.with_merged_request_params(HashMap::from([(
|
||||
"thinking_effort".into(),
|
||||
thinking_effort,
|
||||
)]));
|
||||
}
|
||||
}
|
||||
if let Some(request_params) = request_params {
|
||||
model_config = model_config.with_merged_request_params(request_params);
|
||||
}
|
||||
model_config
|
||||
}
|
||||
|
||||
/// Convenience wrapper: reads config from disk, then resolves provider + model.
|
||||
/// Cheap enough to call from `on_new_session` (file + registry reads, no network).
|
||||
async fn resolve_provider_and_model(
|
||||
@@ -3223,11 +3251,14 @@ impl GooseAcpAgent {
|
||||
.await
|
||||
.internal_err_ctx("Failed to get provider")?;
|
||||
let provider_name = current_provider.get_name().to_string();
|
||||
let current_model_config = current_provider.get_model_config();
|
||||
let extensions =
|
||||
EnabledExtensionsState::for_session(&self.session_manager, session_id, &config).await;
|
||||
let model_config = crate::model::ModelConfig::new(model_id)
|
||||
.invalid_params_err_ctx("Invalid model config")?
|
||||
.with_canonical_limits(&provider_name);
|
||||
let model_config =
|
||||
with_preserved_session_request_params(model_config, Some(¤t_model_config), None);
|
||||
let session = self
|
||||
.session_manager
|
||||
.get_session(session_id, false)
|
||||
@@ -3333,7 +3364,8 @@ impl GooseAcpAgent {
|
||||
.await
|
||||
.internal_err_ctx("Failed to get provider")?;
|
||||
let current_provider_name = current_provider.get_name();
|
||||
let current_model = current_provider.get_model_config().model_name;
|
||||
let current_model_config = current_provider.get_model_config();
|
||||
let current_model = current_model_config.model_name.clone();
|
||||
let has_default_overrides =
|
||||
model_name.is_some() || context_limit.is_some() || request_params.is_some();
|
||||
let use_default_provider = provider_name == DEFAULT_PROVIDER_ID;
|
||||
@@ -3357,11 +3389,15 @@ impl GooseAcpAgent {
|
||||
current_model
|
||||
};
|
||||
let model = model_name.unwrap_or(&default_model);
|
||||
let model_config = crate::model::ModelConfig::new(model)
|
||||
let mut model_config = crate::model::ModelConfig::new(model)
|
||||
.invalid_params_err_ctx("Invalid model config")?
|
||||
.with_canonical_limits(&resolved_provider_name)
|
||||
.with_context_limit(context_limit)
|
||||
.with_request_params(request_params);
|
||||
.with_context_limit(context_limit);
|
||||
model_config = with_preserved_session_request_params(
|
||||
model_config,
|
||||
(!is_changing_provider).then_some(¤t_model_config),
|
||||
request_params,
|
||||
);
|
||||
|
||||
let extensions =
|
||||
EnabledExtensionsState::for_session(&self.session_manager, session_id, &config).await;
|
||||
|
||||
@@ -1098,7 +1098,6 @@ config_value!(CLAUDE_CODE_COMMAND, String, "claude");
|
||||
config_value!(GEMINI_CLI_COMMAND, String, "gemini");
|
||||
config_value!(CURSOR_AGENT_COMMAND, String, "cursor-agent");
|
||||
config_value!(CODEX_COMMAND, String, "codex");
|
||||
config_value!(CODEX_REASONING_EFFORT, String, "high");
|
||||
config_value!(CODEX_ENABLE_SKILLS, String, "true");
|
||||
config_value!(CODEX_SKIP_GIT_CHECK, String, "false");
|
||||
config_value!(CHATGPT_CODEX_REASONING_EFFORT, String, "medium");
|
||||
@@ -1137,12 +1136,48 @@ config_value!(GOOSE_PROMPT_EDITOR_ALWAYS, Option<bool>);
|
||||
config_value!(GOOSE_MAX_ACTIVE_AGENTS, usize);
|
||||
config_value!(GOOSE_DISABLE_SESSION_NAMING, bool);
|
||||
config_value!(GOOSE_DISABLE_TOOL_CALL_SUMMARY, bool);
|
||||
config_value!(GEMINI3_THINKING_LEVEL, String);
|
||||
config_value!(CLAUDE_THINKING_TYPE, String);
|
||||
config_value!(CLAUDE_THINKING_EFFORT, String);
|
||||
config_value!(CLAUDE_THINKING_BUDGET, i32);
|
||||
config_value!(GOOSE_THINKING_EFFORT, String);
|
||||
config_value!(GOOSE_DEFAULT_EXTENSION_TIMEOUT, u64);
|
||||
|
||||
fn find_workspace_or_exe_root() -> Option<PathBuf> {
|
||||
let exe = std::env::current_exe().ok()?;
|
||||
let exe_dir = exe.parent()?.to_path_buf();
|
||||
|
||||
let mut path = exe;
|
||||
while let Some(parent) = path.parent() {
|
||||
let cargo_toml = parent.join("Cargo.toml");
|
||||
if cargo_toml.exists() {
|
||||
if let Ok(content) = std::fs::read_to_string(&cargo_toml) {
|
||||
if content.contains("[workspace]") {
|
||||
return Some(parent.to_path_buf());
|
||||
}
|
||||
}
|
||||
}
|
||||
path = parent.to_path_buf();
|
||||
}
|
||||
|
||||
Some(exe_dir)
|
||||
}
|
||||
|
||||
pub fn load_init_config_from_workspace() -> Result<Mapping, ConfigError> {
|
||||
let root = find_workspace_or_exe_root().ok_or_else(|| {
|
||||
ConfigError::FileError(std::io::Error::new(
|
||||
std::io::ErrorKind::NotFound,
|
||||
"Could not determine executable path",
|
||||
))
|
||||
})?;
|
||||
|
||||
let init_config_path = root.join("init-config.yaml");
|
||||
if !init_config_path.exists() {
|
||||
return Err(ConfigError::NotFound(
|
||||
"init-config.yaml not found".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let init_content = std::fs::read_to_string(&init_config_path)?;
|
||||
parse_yaml_content(&init_content)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
+455
-14
@@ -1,12 +1,51 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::de::Deserializer;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use thiserror::Error;
|
||||
use utoipa::ToSchema;
|
||||
|
||||
pub const DEFAULT_CONTEXT_LIMIT: usize = 128_000;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum ThinkingEffort {
|
||||
Off,
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
Max,
|
||||
}
|
||||
|
||||
impl FromStr for ThinkingEffort {
|
||||
type Err = String;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"off" | "disabled" | "none" => Ok(Self::Off),
|
||||
"low" => Ok(Self::Low),
|
||||
"medium" | "med" => Ok(Self::Medium),
|
||||
"high" => Ok(Self::High),
|
||||
"max" | "xhigh" => Ok(Self::Max),
|
||||
other => Err(format!("unknown thinking effort: '{other}'")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ThinkingEffort {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Off => write!(f, "off"),
|
||||
Self::Low => write!(f, "low"),
|
||||
Self::Medium => write!(f, "medium"),
|
||||
Self::High => write!(f, "high"),
|
||||
Self::Max => write!(f, "max"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
struct PredefinedModel {
|
||||
name: String,
|
||||
@@ -44,7 +83,7 @@ pub enum ConfigError {
|
||||
InvalidRange(String, String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)]
|
||||
#[derive(Debug, Clone, Default, Serialize, ToSchema)]
|
||||
pub struct ModelConfig {
|
||||
pub model_name: String,
|
||||
pub context_limit: Option<usize>,
|
||||
@@ -61,6 +100,44 @@ pub struct ModelConfig {
|
||||
pub reasoning: Option<bool>,
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for ModelConfig {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
#[derive(Deserialize)]
|
||||
struct RawModelConfig {
|
||||
model_name: String,
|
||||
context_limit: Option<usize>,
|
||||
temperature: Option<f32>,
|
||||
max_tokens: Option<i32>,
|
||||
toolshim: bool,
|
||||
toolshim_model: Option<String>,
|
||||
#[serde(default)]
|
||||
fast_model_config: Option<Box<ModelConfig>>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
request_params: Option<HashMap<String, Value>>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
reasoning: Option<bool>,
|
||||
}
|
||||
|
||||
let raw = RawModelConfig::deserialize(deserializer)?;
|
||||
let mut config = Self {
|
||||
model_name: raw.model_name,
|
||||
context_limit: raw.context_limit,
|
||||
temperature: raw.temperature,
|
||||
max_tokens: raw.max_tokens,
|
||||
toolshim: raw.toolshim,
|
||||
toolshim_model: raw.toolshim_model,
|
||||
fast_model_config: raw.fast_model_config,
|
||||
request_params: raw.request_params,
|
||||
reasoning: raw.reasoning,
|
||||
};
|
||||
config.normalize_effort_suffix();
|
||||
Ok(config)
|
||||
}
|
||||
}
|
||||
|
||||
impl ModelConfig {
|
||||
pub fn new(model_name: &str) -> Result<Self, ConfigError> {
|
||||
Self::new_base(model_name.to_string(), None)
|
||||
@@ -114,13 +191,14 @@ impl ModelConfig {
|
||||
let toolshim = Self::parse_toolshim()?;
|
||||
let toolshim_model = Self::parse_toolshim_model()?;
|
||||
|
||||
// Pick up request_params from predefined models (always applies)
|
||||
// Pick up predefined model settings before legacy suffix normalization.
|
||||
let predefined = find_predefined_model(&model_name);
|
||||
let predefined_context_limit = predefined.as_ref().and_then(|pm| pm.context_limit);
|
||||
let request_params = predefined.and_then(|pm| pm.request_params);
|
||||
|
||||
Ok(Self {
|
||||
let mut config = Self {
|
||||
model_name,
|
||||
context_limit,
|
||||
context_limit: context_limit.or(predefined_context_limit),
|
||||
temperature,
|
||||
max_tokens,
|
||||
toolshim,
|
||||
@@ -128,7 +206,9 @@ impl ModelConfig {
|
||||
fast_model_config: None,
|
||||
request_params,
|
||||
reasoning: None,
|
||||
})
|
||||
};
|
||||
config.normalize_effort_suffix();
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
pub fn with_canonical_limits(mut self, provider_name: &str) -> Self {
|
||||
@@ -298,8 +378,17 @@ impl ModelConfig {
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
pub fn with_request_params(mut self, params: Option<HashMap<String, Value>>) -> Self {
|
||||
self.request_params = params;
|
||||
pub fn with_merged_request_params(mut self, params: HashMap<String, Value>) -> Self {
|
||||
match self.request_params.as_mut() {
|
||||
Some(existing) => {
|
||||
for (k, v) in params {
|
||||
existing.insert(k, v);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
self.request_params = Some(params);
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
@@ -319,6 +408,21 @@ impl ModelConfig {
|
||||
crate::providers::utils::is_openai_responses_model(&self.model_name)
|
||||
}
|
||||
|
||||
pub fn is_reasoning_model(&self) -> bool {
|
||||
if let Some(reasoning) = self.reasoning {
|
||||
return reasoning;
|
||||
}
|
||||
|
||||
self.is_openai_reasoning_model()
|
||||
|| self.model_name.to_lowercase().contains("claude")
|
||||
|| Self::is_gemini3_reasoning_model_name(&self.model_name)
|
||||
}
|
||||
|
||||
fn is_gemini3_reasoning_model_name(model_name: &str) -> bool {
|
||||
let lower = model_name.to_lowercase();
|
||||
lower.starts_with("gemini-3") || lower.contains("/gemini-3") || lower.contains("-gemini-3")
|
||||
}
|
||||
|
||||
pub fn max_output_tokens(&self) -> i32 {
|
||||
if let Some(tokens) = self.max_tokens {
|
||||
return tokens;
|
||||
@@ -327,6 +431,82 @@ impl ModelConfig {
|
||||
4_096
|
||||
}
|
||||
|
||||
pub fn normalize_effort_suffix(&mut self) {
|
||||
if !self.is_openai_reasoning_model() {
|
||||
return;
|
||||
}
|
||||
let parts: Vec<&str> = self.model_name.split('-').collect();
|
||||
let last = match parts.last() {
|
||||
Some(l) => *l,
|
||||
None => return,
|
||||
};
|
||||
let effort = match last {
|
||||
"none" => ThinkingEffort::Off,
|
||||
"low" => ThinkingEffort::Low,
|
||||
"medium" => ThinkingEffort::Medium,
|
||||
"high" => ThinkingEffort::High,
|
||||
"xhigh" => ThinkingEffort::Max,
|
||||
_ => return,
|
||||
};
|
||||
self.model_name = parts[..parts.len() - 1].join("-");
|
||||
let has_explicit_effort = self
|
||||
.request_params
|
||||
.as_ref()
|
||||
.and_then(|p| p.get("thinking_effort"))
|
||||
.is_some();
|
||||
if !has_explicit_effort {
|
||||
let params = self.request_params.get_or_insert_with(HashMap::new);
|
||||
params.insert(
|
||||
"thinking_effort".to_string(),
|
||||
serde_json::json!(effort.to_string()),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn thinking_effort(&self) -> Option<ThinkingEffort> {
|
||||
self.get_config_param::<String>("thinking_effort", "GOOSE_THINKING_EFFORT")
|
||||
.and_then(|s| s.parse::<ThinkingEffort>().ok())
|
||||
.or_else(Self::legacy_thinking_effort)
|
||||
}
|
||||
|
||||
fn legacy_thinking_effort() -> Option<ThinkingEffort> {
|
||||
let config = crate::config::Config::global();
|
||||
|
||||
if let Ok(value) = config.get_param::<String>("CLAUDE_THINKING_TYPE") {
|
||||
if let Some(effort) = match value.to_lowercase().as_str() {
|
||||
"adaptive" | "enabled" => Some(ThinkingEffort::High),
|
||||
"disabled" => Some(ThinkingEffort::Off),
|
||||
_ => None,
|
||||
} {
|
||||
return Some(effort);
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(enabled) = config.get_param::<bool>("CLAUDE_THINKING_ENABLED") {
|
||||
return Some(if enabled {
|
||||
ThinkingEffort::High
|
||||
} else {
|
||||
ThinkingEffort::Off
|
||||
});
|
||||
}
|
||||
|
||||
if let Ok(value) = config.get_param::<String>("GEMINI3_THINKING_LEVEL") {
|
||||
if let Some(effort) = Self::legacy_gemini3_thinking_effort(&value) {
|
||||
return Some(effort);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn legacy_gemini3_thinking_effort(value: &str) -> Option<ThinkingEffort> {
|
||||
match value.to_lowercase().as_str() {
|
||||
"low" => Some(ThinkingEffort::Low),
|
||||
"high" => Some(ThinkingEffort::High),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_config_param<T: for<'de> serde::Deserialize<'de>>(
|
||||
&self,
|
||||
request_key: &str,
|
||||
@@ -419,13 +599,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_get_config_param() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("CLAUDE_THINKING_EFFORT", Some("high")),
|
||||
("CLAUDE_THINKING_TYPE", None::<&str>),
|
||||
]);
|
||||
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]);
|
||||
|
||||
let mut params = HashMap::new();
|
||||
params.insert("effort".to_string(), serde_json::json!("low"));
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
|
||||
|
||||
let config_with_params = ModelConfig {
|
||||
model_name: "test".to_string(),
|
||||
@@ -439,11 +616,13 @@ mod tests {
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
config_with_params.get_config_param::<String>("effort", "CLAUDE_THINKING_EFFORT"),
|
||||
config_with_params
|
||||
.get_config_param::<String>("thinking_effort", "GOOSE_THINKING_EFFORT"),
|
||||
Some("low".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
config_without_params.get_config_param::<String>("effort", "CLAUDE_THINKING_EFFORT"),
|
||||
config_without_params
|
||||
.get_config_param::<String>("thinking_effort", "GOOSE_THINKING_EFFORT"),
|
||||
Some("high".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -453,6 +632,236 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_preserves_fast_model_config() {
|
||||
let config: ModelConfig = serde_json::from_value(serde_json::json!({
|
||||
"model_name": "primary-model",
|
||||
"context_limit": null,
|
||||
"temperature": null,
|
||||
"max_tokens": null,
|
||||
"toolshim": false,
|
||||
"toolshim_model": null,
|
||||
"fast_model_config": {
|
||||
"model_name": "fast-model",
|
||||
"context_limit": 4096,
|
||||
"temperature": null,
|
||||
"max_tokens": 1024,
|
||||
"toolshim": false,
|
||||
"toolshim_model": null
|
||||
}
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
let fast_config = config.fast_model_config.as_ref().unwrap();
|
||||
assert_eq!(fast_config.model_name, "fast-model");
|
||||
assert_eq!(fast_config.context_limit, Some(4096));
|
||||
assert_eq!(fast_config.max_tokens, Some(1024));
|
||||
assert_eq!(config.use_fast_model().model_name, "fast-model");
|
||||
}
|
||||
|
||||
mod thinking_effort_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn from_request_params() {
|
||||
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]);
|
||||
let mut params = HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("medium"));
|
||||
let config = ModelConfig {
|
||||
model_name: "test".to_string(),
|
||||
request_params: Some(params),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Medium));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_env_var() {
|
||||
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]);
|
||||
let config = ModelConfig {
|
||||
model_name: "test".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn request_params_override_env() {
|
||||
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]);
|
||||
let mut params = HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
|
||||
let config = ModelConfig {
|
||||
model_name: "test".to_string(),
|
||||
request_params: Some(params),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Low));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_claude_thinking_type_fallback() {
|
||||
for value in ["enabled", "adaptive"] {
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
("CLAUDE_THINKING_TYPE", Some(value)),
|
||||
("CLAUDE_THINKING_ENABLED", None::<&str>),
|
||||
("GEMINI3_THINKING_LEVEL", None::<&str>),
|
||||
("ANTHROPIC_THINKING_BUDGET", None::<&str>),
|
||||
("CLAUDE_THINKING_BUDGET", None::<&str>),
|
||||
("GEMINI25_THINKING_BUDGET", None::<&str>),
|
||||
]);
|
||||
let config = ModelConfig {
|
||||
model_name: "test".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_gemini3_thinking_level_mapping() {
|
||||
assert_eq!(
|
||||
ModelConfig::legacy_gemini3_thinking_effort("low"),
|
||||
Some(ThinkingEffort::Low)
|
||||
);
|
||||
assert_eq!(
|
||||
ModelConfig::legacy_gemini3_thinking_effort("high"),
|
||||
Some(ThinkingEffort::High)
|
||||
);
|
||||
assert_eq!(ModelConfig::legacy_gemini3_thinking_effort("auto"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_gemini3_thinking_level_fallback() {
|
||||
let temp_dir = tempfile::tempdir().unwrap();
|
||||
let temp_root = temp_dir.path().to_string_lossy().to_string();
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_PATH_ROOT", Some(temp_root.as_str())),
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
("CLAUDE_THINKING_TYPE", None::<&str>),
|
||||
("CLAUDE_THINKING_ENABLED", None::<&str>),
|
||||
("GEMINI3_THINKING_LEVEL", Some("high")),
|
||||
("ANTHROPIC_THINKING_BUDGET", None::<&str>),
|
||||
("CLAUDE_THINKING_BUDGET", None::<&str>),
|
||||
("GEMINI25_THINKING_BUDGET", None::<&str>),
|
||||
]);
|
||||
let config = ModelConfig {
|
||||
model_name: "gemini-3-pro".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn effort_suffix_stripped_from_model_name() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
("GOOSE_MAX_TOKENS", None::<&str>),
|
||||
("GOOSE_TEMPERATURE", None::<&str>),
|
||||
("GOOSE_CONTEXT_LIMIT", None::<&str>),
|
||||
("GOOSE_TOOLSHIM", None::<&str>),
|
||||
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
|
||||
]);
|
||||
let config = ModelConfig::new("o3-mini-high").unwrap();
|
||||
assert_eq!(config.model_name, "o3-mini");
|
||||
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn none_suffix_stripped_from_model_name() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_THINKING_EFFORT", Some("high")),
|
||||
("GOOSE_MAX_TOKENS", None::<&str>),
|
||||
("GOOSE_TEMPERATURE", None::<&str>),
|
||||
("GOOSE_CONTEXT_LIMIT", None::<&str>),
|
||||
("GOOSE_TOOLSHIM", None::<&str>),
|
||||
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
|
||||
]);
|
||||
let config = ModelConfig::new("o3-mini-none").unwrap();
|
||||
assert_eq!(config.model_name, "o3-mini");
|
||||
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Off));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xhigh_suffix_stripped_from_model_name() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_THINKING_EFFORT", Some("low")),
|
||||
("GOOSE_MAX_TOKENS", None::<&str>),
|
||||
("GOOSE_TEMPERATURE", None::<&str>),
|
||||
("GOOSE_CONTEXT_LIMIT", None::<&str>),
|
||||
("GOOSE_TOOLSHIM", None::<&str>),
|
||||
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
|
||||
]);
|
||||
let config = ModelConfig::new("gpt-5.4-xhigh").unwrap();
|
||||
assert_eq!(config.model_name, "gpt-5.4");
|
||||
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Max));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn effort_suffix_not_stripped_when_thinking_effort_set() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
("GOOSE_MAX_TOKENS", None::<&str>),
|
||||
("GOOSE_TEMPERATURE", None::<&str>),
|
||||
("GOOSE_CONTEXT_LIMIT", None::<&str>),
|
||||
("GOOSE_TOOLSHIM", None::<&str>),
|
||||
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
|
||||
]);
|
||||
let mut params = HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
|
||||
let mut config = ModelConfig::new("o3-mini-high").unwrap();
|
||||
// Suffix was already normalized during new(), but if request_params
|
||||
// were set before construction, the suffix would not be stripped.
|
||||
// Verify the normalized state:
|
||||
assert_eq!(config.model_name, "o3-mini");
|
||||
|
||||
// Now simulate setting explicit effort after construction
|
||||
config.request_params = Some(params);
|
||||
assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Low));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_suffix_no_change() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
("GOOSE_MAX_TOKENS", None::<&str>),
|
||||
("GOOSE_TEMPERATURE", None::<&str>),
|
||||
("GOOSE_CONTEXT_LIMIT", None::<&str>),
|
||||
("GOOSE_TOOLSHIM", None::<&str>),
|
||||
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
|
||||
]);
|
||||
let config = ModelConfig::new("o3-mini").unwrap();
|
||||
assert_eq!(config.model_name, "o3-mini");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_reasoning_model_suffix_not_stripped() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
("GOOSE_MAX_TOKENS", None::<&str>),
|
||||
("GOOSE_TEMPERATURE", None::<&str>),
|
||||
("GOOSE_CONTEXT_LIMIT", None::<&str>),
|
||||
("GOOSE_TOOLSHIM", None::<&str>),
|
||||
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>),
|
||||
]);
|
||||
let config = ModelConfig::new("claude-sonnet-4-high").unwrap();
|
||||
assert_eq!(config.model_name, "claude-sonnet-4-high");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_aliases() {
|
||||
assert_eq!("off".parse::<ThinkingEffort>(), Ok(ThinkingEffort::Off));
|
||||
assert_eq!(
|
||||
"disabled".parse::<ThinkingEffort>(),
|
||||
Ok(ThinkingEffort::Off)
|
||||
);
|
||||
assert_eq!("med".parse::<ThinkingEffort>(), Ok(ThinkingEffort::Medium));
|
||||
assert_eq!("max".parse::<ThinkingEffort>(), Ok(ThinkingEffort::Max));
|
||||
assert_eq!("xhigh".parse::<ThinkingEffort>(), Ok(ThinkingEffort::Max));
|
||||
assert!("invalid".parse::<ThinkingEffort>().is_err());
|
||||
}
|
||||
}
|
||||
|
||||
mod with_canonical_limits {
|
||||
use super::*;
|
||||
|
||||
@@ -597,4 +1006,36 @@ mod tests {
|
||||
assert!(!ModelConfig::new_or_fail("llama-3-70b").is_openai_reasoning_model());
|
||||
}
|
||||
}
|
||||
|
||||
mod is_reasoning_model {
|
||||
use super::*;
|
||||
|
||||
const ENV_LOCK_KEYS: [(&str, Option<&str>); 5] = [
|
||||
("GOOSE_MAX_TOKENS", None),
|
||||
("GOOSE_TEMPERATURE", None),
|
||||
("GOOSE_CONTEXT_LIMIT", None),
|
||||
("GOOSE_TOOLSHIM", None),
|
||||
("GOOSE_TOOLSHIM_OLLAMA_MODEL", None),
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn includes_reasoning_model_families() {
|
||||
let _guard = env_lock::lock_env(ENV_LOCK_KEYS);
|
||||
assert!(ModelConfig::new_or_fail("o3-mini").is_reasoning_model());
|
||||
assert!(ModelConfig::new_or_fail("claude-sonnet-4").is_reasoning_model());
|
||||
assert!(ModelConfig::new_or_fail("gemini-3-pro").is_reasoning_model());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uses_explicit_metadata_first() {
|
||||
let _guard = env_lock::lock_env(ENV_LOCK_KEYS);
|
||||
let mut config = ModelConfig::new_or_fail("provider-alias");
|
||||
config.reasoning = Some(true);
|
||||
assert!(config.is_reasoning_model());
|
||||
|
||||
let mut config = ModelConfig::new_or_fail("claude-sonnet-4");
|
||||
config.reasoning = Some(false);
|
||||
assert!(!config.is_reasoning_model());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -395,6 +395,9 @@ pub struct ModelInfo {
|
||||
pub currency: Option<String>,
|
||||
/// Whether this model supports cache control
|
||||
pub supports_cache_control: Option<bool>,
|
||||
/// Whether this model supports reasoning/thinking controls
|
||||
#[serde(default)]
|
||||
pub reasoning: bool,
|
||||
}
|
||||
|
||||
impl ModelInfo {
|
||||
@@ -407,6 +410,7 @@ impl ModelInfo {
|
||||
output_token_cost: None,
|
||||
currency: None,
|
||||
supports_cache_control: None,
|
||||
reasoning: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -424,10 +428,37 @@ impl ModelInfo {
|
||||
output_token_cost: Some(output_cost),
|
||||
currency: Some("$".to_string()),
|
||||
supports_cache_control: None,
|
||||
reasoning: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn model_info_for_provider_model(provider_name: &str, model_name: &str) -> ModelInfo {
|
||||
let registry = CanonicalModelRegistry::bundled().ok();
|
||||
let canonical = registry.as_ref().and_then(|registry| {
|
||||
let canonical_id = map_to_canonical_model(provider_name, model_name, registry)?;
|
||||
let (provider, model) = canonical_id.split_once('/')?;
|
||||
registry.get(provider, model)
|
||||
});
|
||||
|
||||
let reasoning = canonical
|
||||
.as_ref()
|
||||
.and_then(|model| model.reasoning)
|
||||
.unwrap_or_else(|| ModelConfig::new_or_fail(model_name).is_reasoning_model());
|
||||
|
||||
ModelInfo {
|
||||
name: model_name.to_string(),
|
||||
context_limit: ModelConfig::new_or_fail(model_name)
|
||||
.with_canonical_limits(provider_name)
|
||||
.context_limit(),
|
||||
input_token_cost: None,
|
||||
output_token_cost: None,
|
||||
currency: None,
|
||||
supports_cache_control: None,
|
||||
reasoning,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
pub enum ProviderType {
|
||||
Preferred,
|
||||
@@ -478,16 +509,7 @@ impl ProviderMetadata {
|
||||
default_model: default_model.to_string(),
|
||||
known_models: model_names
|
||||
.iter()
|
||||
.map(|&model_name| ModelInfo {
|
||||
name: model_name.to_string(),
|
||||
context_limit: ModelConfig::new_or_fail(model_name)
|
||||
.with_canonical_limits(name)
|
||||
.context_limit(),
|
||||
input_token_cost: None,
|
||||
output_token_cost: None,
|
||||
currency: None,
|
||||
supports_cache_control: None,
|
||||
})
|
||||
.map(|&model_name| model_info_for_provider_model(name, model_name))
|
||||
.collect(),
|
||||
model_doc_link: model_doc_link.to_string(),
|
||||
config_keys,
|
||||
@@ -917,6 +939,19 @@ pub trait Provider: Send + Sync {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn fetch_supported_model_info(&self) -> Result<Vec<ModelInfo>, ProviderError> {
|
||||
Ok(self
|
||||
.fetch_supported_models()
|
||||
.await?
|
||||
.iter()
|
||||
.map(|model_name| model_info_for_provider_model(self.get_name(), model_name))
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn fetch_model_info(&self, model_name: &str) -> Result<ModelInfo, ProviderError> {
|
||||
Ok(model_info_for_provider_model(self.get_name(), model_name))
|
||||
}
|
||||
|
||||
fn skip_canonical_filtering(&self) -> bool {
|
||||
false
|
||||
}
|
||||
@@ -982,6 +1017,15 @@ pub trait Provider: Send + Sync {
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_recommended_model_info(&self) -> Result<Vec<ModelInfo>, ProviderError> {
|
||||
Ok(self
|
||||
.fetch_recommended_models()
|
||||
.await?
|
||||
.iter()
|
||||
.map(|model_name| model_info_for_provider_model(self.get_name(), model_name))
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn map_to_canonical_model(
|
||||
&self,
|
||||
provider_model: &str,
|
||||
@@ -1739,6 +1783,7 @@ mod tests {
|
||||
output_token_cost: None,
|
||||
currency: None,
|
||||
supports_cache_control: None,
|
||||
reasoning: false,
|
||||
};
|
||||
assert_eq!(info.context_limit, 1000);
|
||||
|
||||
@@ -1750,6 +1795,7 @@ mod tests {
|
||||
output_token_cost: None,
|
||||
currency: None,
|
||||
supports_cache_control: None,
|
||||
reasoning: false,
|
||||
};
|
||||
assert_eq!(info, info2);
|
||||
|
||||
@@ -1761,6 +1807,7 @@ mod tests {
|
||||
output_token_cost: None,
|
||||
currency: None,
|
||||
supports_cache_control: None,
|
||||
reasoning: false,
|
||||
};
|
||||
assert_ne!(info, info3);
|
||||
}
|
||||
|
||||
@@ -229,6 +229,29 @@ fn get_reasoning_effort(model_name: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn reasoning_effort_for_config(model_config: &ModelConfig) -> Option<String> {
|
||||
use crate::model::ThinkingEffort;
|
||||
|
||||
model_config
|
||||
.thinking_effort()
|
||||
.map(|effort| {
|
||||
let valid_levels = reasoning_levels_for_model(&model_config.model_name);
|
||||
let preferred_levels: &[&str] = match effort {
|
||||
ThinkingEffort::Off => return None,
|
||||
ThinkingEffort::Low => &["low", "medium", "high", "xhigh"],
|
||||
ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"],
|
||||
ThinkingEffort::High => &["high", "medium", "xhigh", "low"],
|
||||
ThinkingEffort::Max => &["xhigh", "high", "medium", "low"],
|
||||
};
|
||||
|
||||
preferred_levels
|
||||
.iter()
|
||||
.find(|level| valid_levels.contains(level))
|
||||
.map(|level| (*level).to_string())
|
||||
})
|
||||
.unwrap_or_else(|| Some(get_reasoning_effort(&model_config.model_name)))
|
||||
}
|
||||
|
||||
fn create_codex_request(
|
||||
model_config: &ModelConfig,
|
||||
system: &str,
|
||||
@@ -236,7 +259,7 @@ fn create_codex_request(
|
||||
tools: &[Tool],
|
||||
) -> Result<Value> {
|
||||
let input_items = build_input_items(messages)?;
|
||||
let reasoning_effort = get_reasoning_effort(&model_config.model_name);
|
||||
let reasoning_effort = reasoning_effort_for_config(model_config);
|
||||
|
||||
let instructions = match model_config.model_name.as_str() {
|
||||
"gpt-5.3-codex" => format!("{GPT_53_CODEX_TOOL_PREAMBLE}\n\n{system}"),
|
||||
@@ -247,7 +270,6 @@ fn create_codex_request(
|
||||
"model": model_config.model_name,
|
||||
"input": input_items,
|
||||
"store": false,
|
||||
"reasoning": {"effort": reasoning_effort},
|
||||
"instructions": instructions,
|
||||
});
|
||||
|
||||
@@ -277,6 +299,13 @@ fn create_codex_request(
|
||||
payload_obj.insert("temperature".to_string(), json!(temp));
|
||||
}
|
||||
|
||||
if let Some(reasoning_effort) = reasoning_effort {
|
||||
payload_obj.insert(
|
||||
"reasoning".to_string(),
|
||||
json!({ "effort": reasoning_effort }),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(payload)
|
||||
}
|
||||
|
||||
@@ -1177,6 +1206,42 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_codex_request_reasoning_effort_from_unified_thinking() {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("max"));
|
||||
let mut config = ModelConfig::new("gpt-5.3-codex").unwrap();
|
||||
config.request_params = Some(params);
|
||||
|
||||
let payload = create_codex_request(&config, "sys", &[], &[]).unwrap();
|
||||
assert_eq!(payload["reasoning"]["effort"], "xhigh");
|
||||
assert!(payload.get("reasoning_effort").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_codex_request_caps_unified_thinking_to_supported_level() {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("max"));
|
||||
let mut config = ModelConfig::new("unknown-model").unwrap();
|
||||
config.request_params = Some(params);
|
||||
|
||||
let payload = create_codex_request(&config, "sys", &[], &[]).unwrap();
|
||||
assert_eq!(payload["reasoning"]["effort"], "high");
|
||||
assert!(payload.get("reasoning_effort").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_codex_request_off_omits_reasoning_for_codex_models() {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("off"));
|
||||
let mut config = ModelConfig::new("gpt-5.2-codex").unwrap();
|
||||
config.request_params = Some(params);
|
||||
|
||||
let payload = create_codex_request(&config, "sys", &[], &[]).unwrap();
|
||||
assert!(payload.get("reasoning").is_none());
|
||||
assert!(payload.get("reasoning_effort").is_none());
|
||||
}
|
||||
|
||||
#[test_case(
|
||||
JwtClaims {
|
||||
chatgpt_account_id: Some("account-1".to_string()),
|
||||
|
||||
@@ -16,7 +16,7 @@ use super::base::{
|
||||
};
|
||||
use super::errors::ProviderError;
|
||||
use super::utils::{filter_extensions_from_system_prompt, RequestLog};
|
||||
use crate::config::base::{CodexCommand, CodexReasoningEffort, CodexSkipGitCheck};
|
||||
use crate::config::base::{CodexCommand, CodexSkipGitCheck};
|
||||
use crate::config::paths::Paths;
|
||||
use crate::config::search_path::SearchPaths;
|
||||
use crate::config::{Config, ExtensionConfig, GooseMode};
|
||||
@@ -50,7 +50,7 @@ pub struct CodexProvider {
|
||||
#[serde(skip)]
|
||||
name: String,
|
||||
/// Reasoning effort level (none, low, medium, high, xhigh)
|
||||
reasoning_effort: String,
|
||||
reasoning_effort: Option<String>,
|
||||
/// Whether to skip git repo check
|
||||
skip_git_check: bool,
|
||||
/// CLI config overrides for MCP servers
|
||||
@@ -60,12 +60,40 @@ pub struct CodexProvider {
|
||||
}
|
||||
|
||||
impl CodexProvider {
|
||||
fn supports_reasoning_effort(model_name: &str, reasoning_effort: &str) -> bool {
|
||||
if !CODEX_REASONING_LEVELS.contains(&reasoning_effort) {
|
||||
return false;
|
||||
}
|
||||
fn legacy_reasoning_effort() -> Option<crate::model::ThinkingEffort> {
|
||||
Config::global()
|
||||
.get_param::<String>("CODEX_REASONING_EFFORT")
|
||||
.ok()
|
||||
.and_then(|effort| match effort.to_lowercase().as_str() {
|
||||
"none" => Some(crate::model::ThinkingEffort::Off),
|
||||
"low" => Some(crate::model::ThinkingEffort::Low),
|
||||
"medium" => Some(crate::model::ThinkingEffort::Medium),
|
||||
"high" => Some(crate::model::ThinkingEffort::High),
|
||||
"xhigh" => Some(crate::model::ThinkingEffort::Max),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
if reasoning_effort == "none" && model_name.contains("codex") {
|
||||
fn map_thinking_effort(
|
||||
_model_name: &str,
|
||||
effort: Option<crate::model::ThinkingEffort>,
|
||||
) -> Option<String> {
|
||||
use crate::model::ThinkingEffort;
|
||||
match effort
|
||||
.or_else(Self::legacy_reasoning_effort)
|
||||
.unwrap_or(ThinkingEffort::High)
|
||||
{
|
||||
ThinkingEffort::Off => Some("none".to_string()),
|
||||
ThinkingEffort::Low => Some("low".to_string()),
|
||||
ThinkingEffort::Medium => Some("medium".to_string()),
|
||||
ThinkingEffort::High => Some("high".to_string()),
|
||||
ThinkingEffort::Max => Some("xhigh".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn supports_reasoning_effort(_model_name: &str, reasoning_effort: &str) -> bool {
|
||||
if !CODEX_REASONING_LEVELS.contains(&reasoning_effort) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -115,7 +143,7 @@ impl CodexProvider {
|
||||
println!("=== CODEX PROVIDER DEBUG ===");
|
||||
println!("Command: {:?}", self.command);
|
||||
println!("Model: {}", self.model.model_name);
|
||||
println!("Reasoning effort: {}", self.reasoning_effort);
|
||||
println!("Reasoning effort: {:?}", self.reasoning_effort);
|
||||
println!("Skip git check: {}", self.skip_git_check);
|
||||
println!("Prompt length: {} chars", prompt.len());
|
||||
println!("Prompt: {}", prompt);
|
||||
@@ -142,11 +170,10 @@ impl CodexProvider {
|
||||
cmd.arg("-m").arg(&self.model.model_name);
|
||||
}
|
||||
|
||||
// Reasoning effort configuration
|
||||
cmd.arg("-c").arg(format!(
|
||||
"model_reasoning_effort=\"{}\"",
|
||||
self.reasoning_effort
|
||||
));
|
||||
if let Some(reasoning_effort) = &self.reasoning_effort {
|
||||
cmd.arg("-c")
|
||||
.arg(format!("model_reasoning_effort=\"{}\"", reasoning_effort));
|
||||
}
|
||||
|
||||
for override_config in &self.mcp_config_overrides {
|
||||
cmd.arg("-c").arg(override_config);
|
||||
@@ -604,7 +631,6 @@ impl ProviderDef for CodexProvider {
|
||||
CODEX_DOC_URL,
|
||||
vec![
|
||||
ConfigKey::from_value_type::<CodexCommand>(true, false, true),
|
||||
ConfigKey::from_value_type::<CodexReasoningEffort>(false, false, true),
|
||||
ConfigKey::from_value_type::<CodexSkipGitCheck>(false, false, true),
|
||||
],
|
||||
)
|
||||
@@ -619,24 +645,8 @@ impl ProviderDef for CodexProvider {
|
||||
let command: String = config.get_codex_command().unwrap_or_default().into();
|
||||
let resolved_command = SearchPaths::builder().with_npm().resolve(command)?;
|
||||
|
||||
// Get reasoning effort from config, default to "high"
|
||||
let reasoning_effort = config
|
||||
.get_codex_reasoning_effort()
|
||||
.map(String::from)
|
||||
.unwrap_or_else(|_| "high".to_string());
|
||||
|
||||
// Validate reasoning effort
|
||||
let reasoning_effort =
|
||||
if Self::supports_reasoning_effort(&model.model_name, &reasoning_effort) {
|
||||
reasoning_effort
|
||||
} else {
|
||||
tracing::warn!(
|
||||
"Invalid CODEX_REASONING_EFFORT '{}' for model '{}', using 'high'",
|
||||
reasoning_effort,
|
||||
model.model_name
|
||||
);
|
||||
"high".to_string()
|
||||
};
|
||||
Self::map_thinking_effort(&model.model_name, model.thinking_effort());
|
||||
|
||||
// Get skip_git_check from config, default to false
|
||||
let skip_git_check = config
|
||||
@@ -925,7 +935,7 @@ mod tests {
|
||||
command: PathBuf::from("codex"),
|
||||
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
|
||||
name: "codex".to_string(),
|
||||
reasoning_effort: "high".to_string(),
|
||||
reasoning_effort: Some("high".to_string()),
|
||||
skip_git_check: false,
|
||||
mcp_config_overrides: Vec::new(),
|
||||
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
|
||||
@@ -946,7 +956,7 @@ mod tests {
|
||||
command: PathBuf::from("codex"),
|
||||
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
|
||||
name: "codex".to_string(),
|
||||
reasoning_effort: "high".to_string(),
|
||||
reasoning_effort: Some("high".to_string()),
|
||||
skip_git_check: false,
|
||||
mcp_config_overrides: Vec::new(),
|
||||
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
|
||||
@@ -980,7 +990,7 @@ mod tests {
|
||||
command: PathBuf::from("codex"),
|
||||
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
|
||||
name: "codex".to_string(),
|
||||
reasoning_effort: "high".to_string(),
|
||||
reasoning_effort: Some("high".to_string()),
|
||||
skip_git_check: false,
|
||||
mcp_config_overrides: Vec::new(),
|
||||
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
|
||||
@@ -1005,7 +1015,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_reasoning_effort_support_by_model() {
|
||||
assert!(CodexProvider::supports_reasoning_effort("gpt-5.2", "none"));
|
||||
assert!(!CodexProvider::supports_reasoning_effort(
|
||||
assert!(CodexProvider::supports_reasoning_effort(
|
||||
"gpt-5.2-codex",
|
||||
"none"
|
||||
));
|
||||
@@ -1029,7 +1039,7 @@ mod tests {
|
||||
command: PathBuf::from("codex"),
|
||||
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
|
||||
name: "codex".to_string(),
|
||||
reasoning_effort: "high".to_string(),
|
||||
reasoning_effort: Some("high".to_string()),
|
||||
skip_git_check: false,
|
||||
mcp_config_overrides: Vec::new(),
|
||||
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
|
||||
@@ -1055,7 +1065,7 @@ mod tests {
|
||||
command: PathBuf::from("codex"),
|
||||
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
|
||||
name: "codex".to_string(),
|
||||
reasoning_effort: "high".to_string(),
|
||||
reasoning_effort: Some("high".to_string()),
|
||||
skip_git_check: false,
|
||||
mcp_config_overrides: Vec::new(),
|
||||
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
|
||||
@@ -1128,7 +1138,7 @@ mod tests {
|
||||
command: PathBuf::from("codex"),
|
||||
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
|
||||
name: "codex".to_string(),
|
||||
reasoning_effort: "high".to_string(),
|
||||
reasoning_effort: Some("high".to_string()),
|
||||
skip_git_check: false,
|
||||
mcp_config_overrides: Vec::new(),
|
||||
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
|
||||
@@ -1145,7 +1155,7 @@ mod tests {
|
||||
command: PathBuf::from("codex"),
|
||||
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
|
||||
name: "codex".to_string(),
|
||||
reasoning_effort: "high".to_string(),
|
||||
reasoning_effort: Some("high".to_string()),
|
||||
skip_git_check: false,
|
||||
mcp_config_overrides: Vec::new(),
|
||||
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
|
||||
@@ -1214,20 +1224,56 @@ mod tests {
|
||||
#[test]
|
||||
fn test_config_keys() {
|
||||
let metadata = CodexProvider::metadata();
|
||||
assert_eq!(metadata.config_keys.len(), 3);
|
||||
assert_eq!(metadata.config_keys.len(), 2);
|
||||
|
||||
// First key should be CODEX_COMMAND (required)
|
||||
assert_eq!(metadata.config_keys[0].name, "CODEX_COMMAND");
|
||||
assert!(metadata.config_keys[0].required);
|
||||
assert!(!metadata.config_keys[0].secret);
|
||||
|
||||
// Second key should be CODEX_REASONING_EFFORT (optional)
|
||||
assert_eq!(metadata.config_keys[1].name, "CODEX_REASONING_EFFORT");
|
||||
// Second key should be CODEX_SKIP_GIT_CHECK (optional)
|
||||
assert_eq!(metadata.config_keys[1].name, "CODEX_SKIP_GIT_CHECK");
|
||||
assert!(!metadata.config_keys[1].required);
|
||||
}
|
||||
|
||||
// Third key should be CODEX_SKIP_GIT_CHECK (optional)
|
||||
assert_eq!(metadata.config_keys[2].name, "CODEX_SKIP_GIT_CHECK");
|
||||
assert!(!metadata.config_keys[2].required);
|
||||
#[test]
|
||||
fn test_map_thinking_effort() {
|
||||
use crate::model::ThinkingEffort;
|
||||
|
||||
let _guard = env_lock::lock_env([
|
||||
("CODEX_REASONING_EFFORT", None::<&str>),
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
]);
|
||||
|
||||
assert_eq!(
|
||||
CodexProvider::map_thinking_effort("gpt-5.2-codex", Some(ThinkingEffort::Off)),
|
||||
Some("none".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
CodexProvider::map_thinking_effort("gpt-5.2", Some(ThinkingEffort::Off)),
|
||||
Some("none".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
CodexProvider::map_thinking_effort("gpt-5.2-codex", Some(ThinkingEffort::Max)),
|
||||
Some("xhigh".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
CodexProvider::map_thinking_effort("gpt-5.2-codex", None),
|
||||
Some("high".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_map_thinking_effort_uses_legacy_codex_env() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("CODEX_REASONING_EFFORT", Some("low")),
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
]);
|
||||
|
||||
assert_eq!(
|
||||
CodexProvider::map_thinking_effort("gpt-5.2-codex", None),
|
||||
Some("low".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1236,7 +1282,7 @@ mod tests {
|
||||
command: PathBuf::from("codex"),
|
||||
model: ModelConfig::new("gpt-5.2-codex").unwrap(),
|
||||
name: "codex".to_string(),
|
||||
reasoning_effort: "high".to_string(),
|
||||
reasoning_effort: Some("high".to_string()),
|
||||
skip_git_check: false,
|
||||
mcp_config_overrides: Vec::new(),
|
||||
mode_by_session: tokio::sync::RwLock::new(HashMap::new()),
|
||||
|
||||
@@ -3,12 +3,14 @@ use async_trait::async_trait;
|
||||
use futures::future::BoxFuture;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use super::api_client::{ApiClient, AuthMethod, AuthProvider};
|
||||
use super::base::{
|
||||
ConfigKey, MessageStream, Provider, ProviderDef, ProviderMetadata,
|
||||
ConfigKey, MessageStream, ModelInfo, Provider, ProviderDef, ProviderMetadata,
|
||||
DEFAULT_PROVIDER_TIMEOUT_SECS,
|
||||
};
|
||||
use super::embedding::EmbeddingCapable;
|
||||
@@ -21,7 +23,7 @@ use super::openai_compatible::{
|
||||
stream_openai_compat, stream_responses_compat,
|
||||
};
|
||||
use super::retry::ProviderRetry;
|
||||
use super::utils::{ImageFormat, RequestLog};
|
||||
use super::utils::{is_openai_responses_model, ImageFormat, RequestLog};
|
||||
use crate::config::ConfigError;
|
||||
use crate::conversation::message::Message;
|
||||
use crate::instance_id::get_instance_id;
|
||||
@@ -33,11 +35,35 @@ use crate::providers::retry::{
|
||||
use rmcp::model::Tool;
|
||||
use serde_json::json;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct DatabricksEndpointInfo {
|
||||
name: String,
|
||||
upstream_model_name: Option<String>,
|
||||
upstream_model_provider: Option<String>,
|
||||
reasoning: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct DatabricksUpstreamModel {
|
||||
name: String,
|
||||
provider: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CachedDatabricksEndpointInfo {
|
||||
info: DatabricksEndpointInfo,
|
||||
fetched_at: Instant,
|
||||
}
|
||||
|
||||
const DEFAULT_CLIENT_ID: &str = "databricks-cli";
|
||||
const DEFAULT_REDIRECT_URL: &str = "http://localhost";
|
||||
const DEFAULT_SCOPES: &[&str] = &["all-apis", "offline_access"];
|
||||
|
||||
const DATABRICKS_PROVIDER_NAME: &str = "databricks";
|
||||
const DATABRICKS_ENDPOINT_METADATA_TTL_SECS: u64 = 60;
|
||||
static DATABRICKS_ENDPOINT_INFO_CACHE: LazyLock<
|
||||
Mutex<std::collections::HashMap<String, CachedDatabricksEndpointInfo>>,
|
||||
> = LazyLock::new(|| Mutex::new(std::collections::HashMap::new()));
|
||||
pub const DATABRICKS_DEFAULT_MODEL: &str = "databricks-claude-sonnet-4";
|
||||
const DATABRICKS_DEFAULT_FAST_MODEL: &str = "databricks-claude-haiku-4-5";
|
||||
pub const DATABRICKS_KNOWN_MODELS: &[&str] = &[
|
||||
@@ -116,6 +142,8 @@ impl AuthProvider for DatabricksAuthProvider {
|
||||
pub struct DatabricksProvider {
|
||||
#[serde(skip)]
|
||||
api_client: ApiClient,
|
||||
#[serde(skip)]
|
||||
host: String,
|
||||
auth: DatabricksAuth,
|
||||
model: ModelConfig,
|
||||
image_format: ImageFormat,
|
||||
@@ -172,13 +200,14 @@ impl DatabricksProvider {
|
||||
}));
|
||||
|
||||
let api_client = ApiClient::with_timeout(
|
||||
host,
|
||||
host.clone(),
|
||||
auth_method,
|
||||
Duration::from_secs(DEFAULT_PROVIDER_TIMEOUT_SECS),
|
||||
)?;
|
||||
|
||||
let mut provider = Self {
|
||||
api_client,
|
||||
host,
|
||||
auth,
|
||||
model: model.clone(),
|
||||
image_format: ImageFormat::OpenAi,
|
||||
@@ -240,13 +269,14 @@ impl DatabricksProvider {
|
||||
}));
|
||||
|
||||
let api_client = ApiClient::with_timeout(
|
||||
host,
|
||||
host.clone(),
|
||||
auth_method,
|
||||
Duration::from_secs(DEFAULT_PROVIDER_TIMEOUT_SECS),
|
||||
)?;
|
||||
|
||||
Ok(Self {
|
||||
api_client,
|
||||
host,
|
||||
auth,
|
||||
model,
|
||||
image_format: ImageFormat::OpenAi,
|
||||
@@ -270,7 +300,252 @@ impl DatabricksProvider {
|
||||
}
|
||||
|
||||
fn is_responses_model(model_name: &str) -> bool {
|
||||
super::utils::is_openai_responses_model(model_name)
|
||||
is_openai_responses_model(model_name)
|
||||
}
|
||||
|
||||
fn is_claude_model(model_name: &str) -> bool {
|
||||
model_name.to_lowercase().contains("claude")
|
||||
}
|
||||
|
||||
fn is_reasoning_capable_model_name(model_name: &str) -> bool {
|
||||
Self::is_claude_model(model_name) || Self::is_responses_model(model_name)
|
||||
}
|
||||
|
||||
fn endpoint_model_candidates(value: &Value) -> Vec<DatabricksUpstreamModel> {
|
||||
let mut candidates: Vec<DatabricksUpstreamModel> = Vec::new();
|
||||
|
||||
fn get_string_at(value: &Value, path: &[&str]) -> Option<String> {
|
||||
path.iter()
|
||||
.try_fold(value, |current, key| current.get(*key))
|
||||
.and_then(|v| v.as_str())
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(ToString::to_string)
|
||||
}
|
||||
|
||||
fn push_candidate(
|
||||
name: Option<String>,
|
||||
provider: Option<String>,
|
||||
candidates: &mut Vec<DatabricksUpstreamModel>,
|
||||
) {
|
||||
if let Some(name) = name {
|
||||
if !candidates.iter().any(|candidate| candidate.name == name) {
|
||||
candidates.push(DatabricksUpstreamModel { name, provider });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for config_key in ["config", "pending_config"] {
|
||||
let Some(config) = value.get(config_key) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for collection_key in ["served_entities", "served_models"] {
|
||||
let Some(entities) = config.get(collection_key).and_then(|v| v.as_array()) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for entity in entities {
|
||||
push_candidate(
|
||||
get_string_at(entity, &["external_model", "name"]),
|
||||
get_string_at(entity, &["external_model", "provider"]),
|
||||
&mut candidates,
|
||||
);
|
||||
push_candidate(
|
||||
get_string_at(entity, &["foundation_model", "name"]),
|
||||
get_string_at(entity, &["foundation_model", "provider"]),
|
||||
&mut candidates,
|
||||
);
|
||||
push_candidate(
|
||||
get_string_at(entity, &["entity_name"]),
|
||||
None,
|
||||
&mut candidates,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
candidates
|
||||
}
|
||||
|
||||
fn endpoint_info_from_value(endpoint: &Value) -> Option<DatabricksEndpointInfo> {
|
||||
let name = endpoint.get("name")?.as_str()?.to_string();
|
||||
let upstream_model = Self::endpoint_model_candidates(endpoint)
|
||||
.into_iter()
|
||||
.find(|candidate| candidate.name != name);
|
||||
let upstream_model_name = upstream_model.as_ref().map(|model| model.name.clone());
|
||||
let upstream_model_provider = upstream_model.and_then(|model| model.provider);
|
||||
|
||||
let reasoning = upstream_model_name
|
||||
.as_deref()
|
||||
.map(Self::is_reasoning_capable_model_name)
|
||||
.or_else(|| Some(Self::is_reasoning_capable_model_name(&name)));
|
||||
|
||||
Some(DatabricksEndpointInfo {
|
||||
name,
|
||||
upstream_model_name,
|
||||
upstream_model_provider,
|
||||
reasoning,
|
||||
})
|
||||
}
|
||||
|
||||
async fn fetch_endpoint_info(
|
||||
&self,
|
||||
endpoint_name: &str,
|
||||
) -> Result<DatabricksEndpointInfo, ProviderError> {
|
||||
let response = self
|
||||
.api_client
|
||||
.request(
|
||||
None,
|
||||
&format!(
|
||||
"api/2.0/serving-endpoints/{}",
|
||||
urlencoding::encode(endpoint_name)
|
||||
),
|
||||
)
|
||||
.response_get()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ProviderError::RequestFailed(format!(
|
||||
"Failed to fetch Databricks endpoint metadata: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let detail = response.text().await.unwrap_or_default();
|
||||
return Err(ProviderError::RequestFailed(format!(
|
||||
"Failed to fetch Databricks endpoint metadata: {} {}",
|
||||
status, detail
|
||||
)));
|
||||
}
|
||||
|
||||
let json: Value = response.json().await.map_err(|e| {
|
||||
ProviderError::RequestFailed(format!(
|
||||
"Failed to parse Databricks endpoint metadata: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
Self::endpoint_info_from_value(&json).ok_or_else(|| {
|
||||
ProviderError::RequestFailed(
|
||||
"Unexpected response format from Databricks endpoint metadata".to_string(),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
async fn resolve_endpoint_info(
|
||||
&self,
|
||||
endpoint_name: &str,
|
||||
) -> Result<DatabricksEndpointInfo, ProviderError> {
|
||||
const MAX_MODEL_SERVING_HOPS: usize = 4;
|
||||
|
||||
let original_endpoint_name = endpoint_name.to_string();
|
||||
let mut current_endpoint_name = endpoint_name.to_string();
|
||||
let mut visited = HashSet::new();
|
||||
let mut last_info: Option<DatabricksEndpointInfo> = None;
|
||||
|
||||
for _ in 0..MAX_MODEL_SERVING_HOPS {
|
||||
if !visited.insert(current_endpoint_name.clone()) {
|
||||
break;
|
||||
}
|
||||
|
||||
let info = self.fetch_endpoint_info(¤t_endpoint_name).await?;
|
||||
let next_endpoint_name = match (
|
||||
info.upstream_model_provider.as_deref(),
|
||||
info.upstream_model_name.as_deref(),
|
||||
) {
|
||||
(Some("databricks-model-serving"), Some(next_endpoint_name))
|
||||
if !visited.contains(next_endpoint_name) =>
|
||||
{
|
||||
Some(next_endpoint_name.to_string())
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(next_endpoint_name) = next_endpoint_name {
|
||||
last_info = Some(info);
|
||||
current_endpoint_name = next_endpoint_name;
|
||||
continue;
|
||||
}
|
||||
|
||||
return Ok(if info.name == original_endpoint_name {
|
||||
info
|
||||
} else {
|
||||
let upstream_model_name = info
|
||||
.upstream_model_name
|
||||
.clone()
|
||||
.or_else(|| Some(info.name.clone()));
|
||||
DatabricksEndpointInfo {
|
||||
name: original_endpoint_name,
|
||||
upstream_model_name,
|
||||
upstream_model_provider: info.upstream_model_provider.clone(),
|
||||
reasoning: info.reasoning,
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
last_info
|
||||
.map(|info| DatabricksEndpointInfo {
|
||||
name: original_endpoint_name,
|
||||
upstream_model_name: info.upstream_model_name,
|
||||
upstream_model_provider: info.upstream_model_provider,
|
||||
reasoning: info.reasoning,
|
||||
})
|
||||
.ok_or_else(|| {
|
||||
ProviderError::RequestFailed(
|
||||
"Failed to resolve Databricks endpoint metadata".to_string(),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
async fn resolve_endpoint_info_cached(
|
||||
&self,
|
||||
endpoint_name: &str,
|
||||
) -> Result<DatabricksEndpointInfo, ProviderError> {
|
||||
let cache_key = format!("{}:{}", self.host, endpoint_name);
|
||||
let cached = DATABRICKS_ENDPOINT_INFO_CACHE
|
||||
.lock()
|
||||
.unwrap()
|
||||
.get(&cache_key)
|
||||
.cloned();
|
||||
|
||||
if let Some(cached) = cached {
|
||||
if cached.fetched_at.elapsed()
|
||||
< Duration::from_secs(DATABRICKS_ENDPOINT_METADATA_TTL_SECS)
|
||||
{
|
||||
return Ok(cached.info);
|
||||
}
|
||||
}
|
||||
|
||||
let info = self.resolve_endpoint_info(endpoint_name).await?;
|
||||
DATABRICKS_ENDPOINT_INFO_CACHE.lock().unwrap().insert(
|
||||
cache_key,
|
||||
CachedDatabricksEndpointInfo {
|
||||
info: info.clone(),
|
||||
fetched_at: Instant::now(),
|
||||
},
|
||||
);
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
fn model_info_from_endpoint(info: DatabricksEndpointInfo) -> ModelInfo {
|
||||
let context_model = info.upstream_model_name.as_deref().unwrap_or(&info.name);
|
||||
let context_limit = ModelConfig::new_or_fail(context_model)
|
||||
.with_canonical_limits(DATABRICKS_PROVIDER_NAME)
|
||||
.context_limit();
|
||||
let reasoning = info
|
||||
.reasoning
|
||||
.unwrap_or_else(|| ModelConfig::new_or_fail(context_model).is_reasoning_model());
|
||||
|
||||
ModelInfo {
|
||||
name: info.name,
|
||||
context_limit,
|
||||
input_token_cost: None,
|
||||
output_token_cost: None,
|
||||
currency: None,
|
||||
supports_cache_control: None,
|
||||
reasoning,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String {
|
||||
@@ -378,11 +653,49 @@ impl Provider for DatabricksProvider {
|
||||
messages: &[Message],
|
||||
tools: &[Tool],
|
||||
) -> Result<MessageStream, ProviderError> {
|
||||
let path = self.get_endpoint_path(&model_config.model_name, false);
|
||||
let (endpoint_name, _) = super::utils::extract_reasoning_effort(&model_config.model_name);
|
||||
let endpoint_info = self.resolve_endpoint_info_cached(&endpoint_name).await.ok();
|
||||
let effective_model_name = endpoint_info
|
||||
.as_ref()
|
||||
.and_then(|info| info.upstream_model_name.as_deref())
|
||||
.unwrap_or(&model_config.model_name);
|
||||
let is_responses_model = Self::is_responses_model(&model_config.model_name)
|
||||
|| Self::is_responses_model(effective_model_name);
|
||||
let path = if is_responses_model {
|
||||
"serving-endpoints/responses".to_string()
|
||||
} else {
|
||||
self.get_endpoint_path(&model_config.model_name, false)
|
||||
};
|
||||
let client_request_id = self.build_client_request_id(session_id);
|
||||
|
||||
if Self::is_responses_model(&model_config.model_name) {
|
||||
let mut payload = create_responses_request(model_config, system, messages, tools)?;
|
||||
if is_responses_model {
|
||||
let responses_model_config;
|
||||
let request_model_config = if effective_model_name != model_config.model_name {
|
||||
responses_model_config = {
|
||||
let mut config = model_config.clone();
|
||||
config.model_name = effective_model_name.to_string();
|
||||
config
|
||||
};
|
||||
&responses_model_config
|
||||
} else {
|
||||
model_config
|
||||
};
|
||||
let mut payload =
|
||||
create_responses_request(request_model_config, system, messages, tools)?;
|
||||
payload["model"] = Value::String(endpoint_name.clone());
|
||||
if payload.get("reasoning").is_none() {
|
||||
if let Some(effort) = model_config.thinking_effort().and_then(|effort| {
|
||||
super::utils::openai_reasoning_effort_for_thinking(effective_model_name, effort)
|
||||
}) {
|
||||
payload.as_object_mut().unwrap().insert(
|
||||
"reasoning".to_string(),
|
||||
json!({
|
||||
"effort": effort,
|
||||
"summary": "auto",
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
payload["stream"] = Value::Bool(true);
|
||||
if let Some(ref client_request_id) = client_request_id {
|
||||
payload["client_request_id"] = Value::String(client_request_id.clone());
|
||||
@@ -406,8 +719,27 @@ impl Provider for DatabricksProvider {
|
||||
|
||||
stream_responses_compat(response, log)
|
||||
} else {
|
||||
let mut payload =
|
||||
create_request(model_config, system, messages, tools, &self.image_format)?;
|
||||
let format_model_config;
|
||||
let request_model_config = if Self::is_claude_model(effective_model_name)
|
||||
&& !Self::is_claude_model(&model_config.model_name)
|
||||
{
|
||||
format_model_config = {
|
||||
let mut config = model_config.clone();
|
||||
config.model_name = effective_model_name.to_string();
|
||||
config
|
||||
};
|
||||
&format_model_config
|
||||
} else {
|
||||
model_config
|
||||
};
|
||||
|
||||
let mut payload = create_request(
|
||||
request_model_config,
|
||||
system,
|
||||
messages,
|
||||
tools,
|
||||
&self.image_format,
|
||||
)?;
|
||||
payload
|
||||
.as_object_mut()
|
||||
.expect("payload should have model key")
|
||||
@@ -498,6 +830,15 @@ impl Provider for DatabricksProvider {
|
||||
}
|
||||
|
||||
async fn fetch_supported_models(&self) -> Result<Vec<String>, ProviderError> {
|
||||
Ok(self
|
||||
.fetch_supported_model_info()
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|model| model.name)
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn fetch_supported_model_info(&self) -> Result<Vec<ModelInfo>, ProviderError> {
|
||||
let response = self
|
||||
.api_client
|
||||
.request(None, "api/2.0/serving-endpoints")
|
||||
@@ -530,18 +871,25 @@ impl Provider for DatabricksProvider {
|
||||
)
|
||||
})?;
|
||||
|
||||
let models: Vec<String> = endpoints
|
||||
.iter()
|
||||
.filter_map(|endpoint| {
|
||||
endpoint
|
||||
.get("name")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|name| name.to_string())
|
||||
})
|
||||
.collect();
|
||||
let mut models = Vec::new();
|
||||
for endpoint in endpoints {
|
||||
if let Some(endpoint_info) = Self::endpoint_info_from_value(endpoint) {
|
||||
models.push(Self::model_info_from_endpoint(endpoint_info));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(models)
|
||||
}
|
||||
|
||||
async fn fetch_model_info(&self, model_name: &str) -> Result<ModelInfo, ProviderError> {
|
||||
let (endpoint_name, _) = super::utils::extract_reasoning_effort(model_name);
|
||||
let endpoint_info = self.resolve_endpoint_info_cached(&endpoint_name).await?;
|
||||
Ok(Self::model_info_from_endpoint(endpoint_info))
|
||||
}
|
||||
|
||||
async fn fetch_recommended_model_info(&self) -> Result<Vec<ModelInfo>, ProviderError> {
|
||||
self.fetch_supported_model_info().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -596,6 +944,7 @@ mod tests {
|
||||
super::super::api_client::AuthMethod::NoAuth,
|
||||
)
|
||||
.unwrap(),
|
||||
host: "https://example.com".to_string(),
|
||||
auth: DatabricksAuth::Token("fake".into()),
|
||||
model: ModelConfig::new_or_fail("databricks-gpt-5.4"),
|
||||
image_format: ImageFormat::OpenAi,
|
||||
@@ -628,4 +977,91 @@ mod tests {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn endpoint_metadata_marks_reasoning_alias_from_external_model() {
|
||||
let endpoint = json!({
|
||||
"name": "goose",
|
||||
"config": {
|
||||
"served_entities": [{
|
||||
"name": "current",
|
||||
"external_model": {
|
||||
"name": "claude-opus-4.6",
|
||||
"provider": "anthropic",
|
||||
"task": "llm/v1/chat"
|
||||
}
|
||||
}]
|
||||
}
|
||||
});
|
||||
|
||||
let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap();
|
||||
|
||||
assert_eq!(info.name, "goose");
|
||||
assert_eq!(info.upstream_model_name.as_deref(), Some("claude-opus-4.6"));
|
||||
assert_eq!(info.reasoning, Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn endpoint_metadata_captures_databricks_model_serving_hop() {
|
||||
let endpoint = json!({
|
||||
"name": "goose",
|
||||
"config": {
|
||||
"served_entities": [{
|
||||
"external_model": {
|
||||
"name": "databricks-claude-opus-4-6",
|
||||
"provider": "databricks-model-serving",
|
||||
"task": "llm/v1/chat"
|
||||
}
|
||||
}]
|
||||
}
|
||||
});
|
||||
|
||||
let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap();
|
||||
|
||||
assert_eq!(info.name, "goose");
|
||||
assert_eq!(
|
||||
info.upstream_model_name.as_deref(),
|
||||
Some("databricks-claude-opus-4-6")
|
||||
);
|
||||
assert_eq!(
|
||||
info.upstream_model_provider.as_deref(),
|
||||
Some("databricks-model-serving")
|
||||
);
|
||||
assert_eq!(info.reasoning, Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn endpoint_metadata_marks_reasoning_alias_from_pending_gpt_model() {
|
||||
let endpoint = json!({
|
||||
"name": "goose",
|
||||
"pending_config": {
|
||||
"served_entities": [{
|
||||
"external_model": {
|
||||
"name": "gpt-5.5",
|
||||
"provider": "openai",
|
||||
"task": "llm/v1/chat"
|
||||
}
|
||||
}]
|
||||
}
|
||||
});
|
||||
|
||||
let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap();
|
||||
|
||||
assert_eq!(info.name, "goose");
|
||||
assert_eq!(info.upstream_model_name.as_deref(), Some("gpt-5.5"));
|
||||
assert_eq!(info.reasoning, Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn endpoint_metadata_uses_endpoint_name_when_no_upstream_model_exists() {
|
||||
let endpoint = json!({
|
||||
"name": "goose-gpt-5-5"
|
||||
});
|
||||
|
||||
let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap();
|
||||
|
||||
assert_eq!(info.name, "goose-gpt-5-5");
|
||||
assert_eq!(info.upstream_model_name, None);
|
||||
assert_eq!(info.reasoning, Some(true));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::conversation::message::{Message, MessageContent};
|
||||
use crate::mcp_utils::extract_text_from_resource;
|
||||
use crate::model::ModelConfig;
|
||||
use crate::model::{ModelConfig, ThinkingEffort};
|
||||
use crate::providers::base::Usage;
|
||||
use crate::providers::errors::ProviderError;
|
||||
use crate::providers::utils::{convert_image, ImageFormat};
|
||||
@@ -37,7 +37,6 @@ macro_rules! string_enum {
|
||||
}
|
||||
|
||||
string_enum!(ThinkingType { Adaptive => "adaptive", Enabled => "enabled", Disabled => "disabled" });
|
||||
string_enum!(ThinkingEffort { Low => "low", Medium => "medium", High => "high", Max => "max" });
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
pub struct AnthropicFormatOptions {
|
||||
@@ -80,33 +79,16 @@ pub fn thinking_type(model_config: &ModelConfig) -> ThinkingType {
|
||||
}
|
||||
|
||||
let is_adaptive_model = supports_adaptive_thinking(&model_config.model_name);
|
||||
let effort = model_config.thinking_effort();
|
||||
|
||||
if let Some(s) =
|
||||
model_config.get_config_param::<String>("thinking_type", "CLAUDE_THINKING_TYPE")
|
||||
{
|
||||
let tt = s.parse::<ThinkingType>().unwrap_or_else(|e| {
|
||||
tracing::warn!("{e}");
|
||||
ThinkingType::Disabled
|
||||
});
|
||||
if tt == ThinkingType::Adaptive && !is_adaptive_model {
|
||||
tracing::warn!(
|
||||
"Adaptive thinking not supported for {}, disabling thinking",
|
||||
model_config.model_name
|
||||
);
|
||||
return ThinkingType::Disabled;
|
||||
}
|
||||
return tt;
|
||||
if effort.is_none() && legacy_thinking_budget_tokens().is_some() {
|
||||
return ThinkingType::Enabled;
|
||||
}
|
||||
|
||||
if is_adaptive_model {
|
||||
ThinkingType::Adaptive
|
||||
} else if std::env::var("CLAUDE_THINKING_ENABLED").is_ok() {
|
||||
tracing::warn!(
|
||||
"CLAUDE_THINKING_ENABLED is deprecated, use CLAUDE_THINKING_TYPE=enabled instead"
|
||||
);
|
||||
ThinkingType::Enabled
|
||||
} else {
|
||||
ThinkingType::Disabled
|
||||
match effort.unwrap_or(ThinkingEffort::Off) {
|
||||
ThinkingEffort::Off => ThinkingType::Disabled,
|
||||
_ if is_adaptive_model => ThinkingType::Adaptive,
|
||||
_ => ThinkingType::Enabled,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -510,35 +492,45 @@ pub fn get_usage(data: &Value) -> Result<Usage> {
|
||||
}
|
||||
|
||||
pub fn thinking_effort(model_config: &ModelConfig) -> ThinkingEffort {
|
||||
match model_config.get_config_param::<String>("effort", "CLAUDE_THINKING_EFFORT") {
|
||||
Some(s) => s.parse().unwrap_or_else(|e| {
|
||||
tracing::warn!("{e}, defaulting to 'high'");
|
||||
ThinkingEffort::High
|
||||
}),
|
||||
None => ThinkingEffort::High,
|
||||
}
|
||||
model_config
|
||||
.thinking_effort()
|
||||
.unwrap_or(ThinkingEffort::High)
|
||||
}
|
||||
|
||||
fn thinking_budget_tokens(model_config: &ModelConfig) -> i32 {
|
||||
let request_param = model_config
|
||||
pub fn thinking_budget_tokens(model_config: &ModelConfig) -> i32 {
|
||||
if let Some(request_param) = model_config
|
||||
.request_params
|
||||
.as_ref()
|
||||
.and_then(|params| params.get("budget_tokens"))
|
||||
.and_then(|v| serde_json::from_value(v.clone()).ok());
|
||||
.and_then(|v| serde_json::from_value::<i32>(v.clone()).ok())
|
||||
{
|
||||
return request_param.max(1024);
|
||||
}
|
||||
|
||||
request_param
|
||||
.or_else(|| {
|
||||
crate::config::Config::global()
|
||||
.get_param::<i32>("ANTHROPIC_THINKING_BUDGET")
|
||||
.ok()
|
||||
})
|
||||
.or_else(|| {
|
||||
crate::config::Config::global()
|
||||
.get_param::<i32>("CLAUDE_THINKING_BUDGET")
|
||||
.ok()
|
||||
})
|
||||
.unwrap_or(16000)
|
||||
.max(1024)
|
||||
if let Some(budget) = legacy_thinking_budget_tokens() {
|
||||
return budget;
|
||||
}
|
||||
|
||||
let effort = model_config
|
||||
.thinking_effort()
|
||||
.unwrap_or(ThinkingEffort::High);
|
||||
match effort {
|
||||
ThinkingEffort::Off => 1024,
|
||||
ThinkingEffort::Low => 4000,
|
||||
ThinkingEffort::Medium => 10000,
|
||||
ThinkingEffort::High => 16000,
|
||||
ThinkingEffort::Max => 32000,
|
||||
}
|
||||
}
|
||||
|
||||
fn legacy_thinking_budget_tokens() -> Option<i32> {
|
||||
let config = crate::config::Config::global();
|
||||
for key in ["ANTHROPIC_THINKING_BUDGET", "CLAUDE_THINKING_BUDGET"] {
|
||||
if let Ok(budget) = config.get_param::<i32>(key) {
|
||||
return Some(budget.max(1024));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn apply_thinking_config(
|
||||
@@ -1181,14 +1173,14 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_create_request_adaptive_thinking_for_46_models() -> Result<()> {
|
||||
let _guard = env_lock::lock_env([
|
||||
("CLAUDE_THINKING_TYPE", Some("adaptive")),
|
||||
("CLAUDE_THINKING_EFFORT", Some("high")),
|
||||
("CLAUDE_THINKING_ENABLED", None::<&str>),
|
||||
]);
|
||||
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]);
|
||||
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("high"));
|
||||
|
||||
let mut config = cfg("claude-opus-4-6");
|
||||
config.max_tokens = Some(4096);
|
||||
config.request_params = Some(params);
|
||||
let messages = vec![Message::user().with_text("Hello")];
|
||||
let payload = create_request(&config, "system", &messages, &[])?;
|
||||
|
||||
@@ -1202,27 +1194,20 @@ mod tests {
|
||||
#[test]
|
||||
fn test_create_request_enabled_thinking_with_budget() -> Result<()> {
|
||||
let _guard = env_lock::lock_env([
|
||||
("CLAUDE_THINKING_TYPE", None::<&str>),
|
||||
("CLAUDE_THINKING_EFFORT", None::<&str>),
|
||||
("CLAUDE_THINKING_ENABLED", None::<&str>),
|
||||
("ANTHROPIC_THINKING_BUDGET", None::<&str>),
|
||||
("CLAUDE_THINKING_BUDGET", None::<&str>),
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
("ANTHROPIC_PRESERVE_THINKING_CONTEXT", None::<&str>),
|
||||
]);
|
||||
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_type".to_string(), json!("enabled"));
|
||||
params.insert("budget_tokens".to_string(), json!(10000));
|
||||
|
||||
let mut config = cfg("claude-3-7-sonnet-20250219");
|
||||
let mut config = cfg_with_effort("claude-3-7-sonnet-20250219", "high");
|
||||
config.max_tokens = Some(4096);
|
||||
config.request_params = Some(params);
|
||||
|
||||
let messages = vec![Message::user().with_text("Hello")];
|
||||
let payload = create_request(&config, "system", &messages, &[])?;
|
||||
|
||||
assert_eq!(payload["thinking"]["type"], "enabled");
|
||||
assert_eq!(payload["thinking"]["budget_tokens"], 10000);
|
||||
assert_eq!(payload["max_tokens"], 4096 + 10000);
|
||||
let budget = payload["thinking"]["budget_tokens"].as_i64().unwrap();
|
||||
assert!(budget > 0);
|
||||
assert_eq!(payload["max_tokens"], 4096 + budget);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1230,12 +1215,11 @@ mod tests {
|
||||
#[test]
|
||||
fn test_create_request_disabled_thinking_no_thinking_field() -> Result<()> {
|
||||
let _guard = env_lock::lock_env([
|
||||
("CLAUDE_THINKING_TYPE", None::<&str>),
|
||||
("CLAUDE_THINKING_ENABLED", None::<&str>),
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
("ANTHROPIC_PRESERVE_THINKING_CONTEXT", None::<&str>),
|
||||
]);
|
||||
|
||||
let config = cfg("claude-sonnet-4-20250514");
|
||||
let config = cfg_with_effort("claude-sonnet-4-20250514", "off");
|
||||
let messages = vec![Message::user().with_text("Hello")];
|
||||
let payload = create_request(&config, "system", &messages, &[])?;
|
||||
|
||||
@@ -1449,9 +1433,9 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn cfg_with_thinking(name: &str, tt: &str) -> ModelConfig {
|
||||
fn cfg_with_effort(name: &str, effort: &str) -> ModelConfig {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_type".to_string(), json!(tt));
|
||||
params.insert("thinking_effort".to_string(), json!(effort));
|
||||
ModelConfig {
|
||||
model_name: name.to_string(),
|
||||
request_params: Some(params),
|
||||
@@ -1460,50 +1444,61 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thinking_type_explicit_params() {
|
||||
fn test_thinking_type_from_effort() {
|
||||
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]);
|
||||
// Adaptive model with effort → adaptive
|
||||
assert_eq!(
|
||||
thinking_type(&cfg_with_thinking("claude-opus-4-6", "adaptive")),
|
||||
thinking_type(&cfg_with_effort("claude-opus-4-6", "high")),
|
||||
ThinkingType::Adaptive
|
||||
);
|
||||
// Adaptive model with off → disabled
|
||||
assert_eq!(
|
||||
thinking_type(&cfg_with_thinking("claude-opus-4-6", "disabled")),
|
||||
thinking_type(&cfg_with_effort("claude-opus-4-6", "off")),
|
||||
ThinkingType::Disabled
|
||||
);
|
||||
// Non-adaptive Claude with effort → enabled
|
||||
assert_eq!(
|
||||
thinking_type(&cfg_with_thinking("claude-3-7-sonnet-20250219", "enabled")),
|
||||
thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "high")),
|
||||
ThinkingType::Enabled
|
||||
);
|
||||
// Non-adaptive Claude with off → disabled
|
||||
assert_eq!(
|
||||
thinking_type(&cfg_with_thinking("claude-3-7-sonnet-20250219", "adaptive")),
|
||||
ThinkingType::Disabled
|
||||
);
|
||||
assert_eq!(
|
||||
thinking_type(&cfg_with_thinking("claude-opus-4-6", "adapttive")),
|
||||
thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "off")),
|
||||
ThinkingType::Disabled
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thinking_budget_uses_legacy_env() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("GOOSE_THINKING_EFFORT", None::<&str>),
|
||||
("ANTHROPIC_THINKING_BUDGET", Some("8192")),
|
||||
("CLAUDE_THINKING_BUDGET", None::<&str>),
|
||||
]);
|
||||
let config = cfg_with_effort("claude-3-7-sonnet-20250219", "high");
|
||||
assert_eq!(thinking_budget_tokens(&config), 8192);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thinking_type_non_claude_always_disabled() {
|
||||
assert_eq!(thinking_type(&cfg("gpt-4o")), ThinkingType::Disabled);
|
||||
assert_eq!(
|
||||
thinking_type(&cfg_with_thinking("gpt-4o", "enabled")),
|
||||
thinking_type(&cfg_with_effort("gpt-4o", "off")),
|
||||
ThinkingType::Disabled
|
||||
);
|
||||
assert_eq!(
|
||||
thinking_type(&cfg_with_effort("gpt-4o", "high")),
|
||||
ThinkingType::Disabled
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thinking_type_env_var_override() {
|
||||
let _guard = env_lock::lock_env([
|
||||
("CLAUDE_THINKING_TYPE", Some("adaptive")),
|
||||
("CLAUDE_THINKING_ENABLED", None::<&str>),
|
||||
]);
|
||||
fn test_thinking_type_off_means_disabled() {
|
||||
assert_eq!(
|
||||
thinking_type(&cfg("claude-opus-4-6")),
|
||||
ThinkingType::Adaptive
|
||||
thinking_type(&cfg_with_effort("claude-opus-4-6", "off")),
|
||||
ThinkingType::Disabled
|
||||
);
|
||||
assert_eq!(
|
||||
thinking_type(&cfg("claude-3-7-sonnet-20250219")),
|
||||
thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "off")),
|
||||
ThinkingType::Disabled
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
use crate::conversation::message::{Message, MessageContent};
|
||||
use crate::model::ModelConfig;
|
||||
use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType};
|
||||
use crate::providers::formats::anthropic::{
|
||||
thinking_budget_tokens, thinking_effort, thinking_type, ThinkingType,
|
||||
};
|
||||
use crate::providers::utils::{
|
||||
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
|
||||
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
|
||||
ImageFormat,
|
||||
is_valid_function_name, load_image_file, openai_reasoning_effort_for_thinking,
|
||||
safely_parse_json, sanitize_function_name, ImageFormat,
|
||||
};
|
||||
use anyhow::{anyhow, Error};
|
||||
use rmcp::model::{
|
||||
@@ -245,11 +247,7 @@ fn apply_claude_thinking_config(payload: &mut Value, model_config: &ModelConfig)
|
||||
);
|
||||
}
|
||||
ThinkingType::Enabled => {
|
||||
let budget_tokens = model_config
|
||||
.get_config_param::<i32>("budget_tokens", "CLAUDE_THINKING_BUDGET")
|
||||
.unwrap_or(16000)
|
||||
.max(1024);
|
||||
|
||||
let budget_tokens = thinking_budget_tokens(model_config);
|
||||
let max_tokens = model_config.max_output_tokens() + budget_tokens;
|
||||
obj.insert("max_tokens".to_string(), json!(max_tokens));
|
||||
obj.insert(
|
||||
@@ -582,8 +580,17 @@ pub fn create_request(
|
||||
));
|
||||
}
|
||||
|
||||
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
|
||||
let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
|
||||
let is_openai_reasoning_model = is_openai_responses_model(&model_name);
|
||||
let reasoning_effort = if is_openai_reasoning_model {
|
||||
model_config
|
||||
.thinking_effort()
|
||||
.map_or(legacy_reasoning_effort, |effort| {
|
||||
openai_reasoning_effort_for_thinking(&model_name, effort)
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let system_message = DatabricksMessage {
|
||||
role: "system".to_string(),
|
||||
@@ -652,6 +659,9 @@ pub fn create_request(
|
||||
if let Some(params) = &model_config.request_params {
|
||||
if let Some(obj) = payload.as_object_mut() {
|
||||
for (key, value) in params {
|
||||
if key == "thinking_effort" {
|
||||
continue;
|
||||
}
|
||||
obj.insert(key.clone(), value.clone());
|
||||
}
|
||||
}
|
||||
@@ -1042,15 +1052,17 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_create_request_reasoning_effort() -> anyhow::Result<()> {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("high"));
|
||||
let model_config = ModelConfig {
|
||||
model_name: "o3-mini-high".to_string(),
|
||||
model_name: "o3-mini".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: None,
|
||||
request_params: Some(params),
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
|
||||
@@ -1058,6 +1070,48 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_off_effort_preserves_none() -> anyhow::Result<()> {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("off"));
|
||||
let model_config = ModelConfig {
|
||||
model_name: "databricks-o3-mini".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: Some(params),
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
|
||||
assert_eq!(request["reasoning_effort"], "none");
|
||||
assert!(request.get("thinking_effort").is_none());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_max_effort_uses_supported_level() -> anyhow::Result<()> {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("max"));
|
||||
let model_config = ModelConfig {
|
||||
model_name: "databricks-gpt-5.2-pro".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: Some(params),
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
|
||||
assert_eq!(request["reasoning_effort"], "high");
|
||||
assert!(request.get("thinking_effort").is_none());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> {
|
||||
let model_config = ModelConfig {
|
||||
@@ -1117,15 +1171,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
|
||||
let _guard = env_lock::lock_env([
|
||||
("CLAUDE_THINKING_TYPE", Some("adaptive")),
|
||||
("CLAUDE_THINKING_EFFORT", Some("low")),
|
||||
("CLAUDE_THINKING_ENABLED", None::<&str>),
|
||||
("CLAUDE_THINKING_BUDGET", None::<&str>),
|
||||
]);
|
||||
|
||||
let mut model_config = ModelConfig::new_or_fail("databricks-claude-opus-4-6");
|
||||
model_config.max_tokens = Some(4096);
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
|
||||
model_config.request_params = Some(params);
|
||||
|
||||
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
|
||||
|
||||
@@ -1140,30 +1190,47 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_create_request_enabled_thinking_with_budget() -> anyhow::Result<()> {
|
||||
let _guard = env_lock::lock_env([
|
||||
("CLAUDE_THINKING_TYPE", None::<&str>),
|
||||
("CLAUDE_THINKING_ENABLED", None::<&str>),
|
||||
("CLAUDE_THINKING_BUDGET", Some("10000")),
|
||||
]);
|
||||
|
||||
let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet");
|
||||
model_config.max_tokens = Some(4096);
|
||||
model_config = model_config.with_request_params(Some(std::collections::HashMap::from([(
|
||||
"thinking_type".to_string(),
|
||||
json!("enabled"),
|
||||
)])));
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("high"));
|
||||
model_config.request_params = Some(params);
|
||||
|
||||
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
|
||||
|
||||
assert_eq!(request["thinking"]["type"], "enabled");
|
||||
assert_eq!(request["thinking"]["budget_tokens"], 10000);
|
||||
assert_eq!(request["max_tokens"], 14096);
|
||||
assert_eq!(request["thinking"]["budget_tokens"], 16000);
|
||||
assert_eq!(request["max_tokens"], 20096);
|
||||
assert_eq!(request["temperature"], 2);
|
||||
assert!(request.get("max_completion_tokens").is_none());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_enabled_thinking_budget_tracks_effort() -> anyhow::Result<()> {
|
||||
for (effort, expected_budget) in [
|
||||
("low", 4000),
|
||||
("medium", 10000),
|
||||
("high", 16000),
|
||||
("max", 32000),
|
||||
] {
|
||||
let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet");
|
||||
model_config.max_tokens = Some(4096);
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!(effort));
|
||||
model_config.request_params = Some(params);
|
||||
|
||||
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
|
||||
|
||||
assert_eq!(request["thinking"]["type"], "enabled");
|
||||
assert_eq!(request["thinking"]["budget_tokens"], expected_budget);
|
||||
assert_eq!(request["max_tokens"], 4096 + expected_budget);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_response_to_message_claude_thinking() -> anyhow::Result<()> {
|
||||
let response = json!({
|
||||
|
||||
@@ -542,22 +542,18 @@ fn get_thinking_config(model_config: &ModelConfig) -> Option<ThinkingConfig> {
|
||||
}
|
||||
|
||||
if is_gemini_3 {
|
||||
let thinking_level_str = model_config
|
||||
.get_config_param::<String>("thinking_level", "GEMINI3_THINKING_LEVEL")
|
||||
.map(|s| s.to_lowercase())
|
||||
.unwrap_or_else(|| "low".to_string());
|
||||
|
||||
let thinking_level = match thinking_level_str.as_str() {
|
||||
"high" => ThinkingLevel::High,
|
||||
"low" => ThinkingLevel::Low,
|
||||
invalid => {
|
||||
tracing::warn!(
|
||||
"Invalid thinking level '{}' for model '{}'. Valid levels: low, high. Using 'low'.",
|
||||
invalid,
|
||||
model_config.model_name,
|
||||
);
|
||||
use crate::model::ThinkingEffort;
|
||||
let effort = model_config
|
||||
.thinking_effort()
|
||||
.unwrap_or(ThinkingEffort::Off);
|
||||
if effort == ThinkingEffort::Off {
|
||||
return None;
|
||||
}
|
||||
let thinking_level = match effort {
|
||||
ThinkingEffort::Off | ThinkingEffort::Low | ThinkingEffort::Medium => {
|
||||
ThinkingLevel::Low
|
||||
}
|
||||
ThinkingEffort::High | ThinkingEffort::Max => ThinkingLevel::High,
|
||||
};
|
||||
|
||||
Some(ThinkingConfig {
|
||||
@@ -1378,7 +1374,11 @@ data: [DONE]"#;
|
||||
fn test_get_thinking_config() {
|
||||
use crate::model::ModelConfig;
|
||||
|
||||
let config = ModelConfig::new("gemini-3-pro").unwrap();
|
||||
// Test 1: Gemini 3 model with low thinking effort
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("low"));
|
||||
let mut config = ModelConfig::new("gemini-3-pro").unwrap();
|
||||
config.request_params = Some(params);
|
||||
let result = get_thinking_config(&config);
|
||||
assert!(result.is_some());
|
||||
let thinking_config = result.unwrap();
|
||||
@@ -1386,9 +1386,18 @@ data: [DONE]"#;
|
||||
assert!(thinking_config.thinking_budget.is_none());
|
||||
assert!(thinking_config.include_thoughts);
|
||||
|
||||
let config = ModelConfig::new("Gemini-3-Flash").unwrap();
|
||||
// Test 2: Gemini 3 model with high thinking effort
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), serde_json::json!("high"));
|
||||
let mut config = ModelConfig::new("Gemini-3-Flash").unwrap();
|
||||
config.request_params = Some(params);
|
||||
let result = get_thinking_config(&config);
|
||||
assert!(result.is_some());
|
||||
let thinking_config = result.unwrap();
|
||||
assert!(matches!(
|
||||
thinking_config.thinking_level,
|
||||
Some(ThinkingLevel::High)
|
||||
));
|
||||
|
||||
let config = ModelConfig::new("gemini-2.5-flash").unwrap();
|
||||
let result = get_thinking_config(&config);
|
||||
@@ -1405,7 +1414,7 @@ data: [DONE]"#;
|
||||
params.insert("thinking_budget".to_string(), json!(4096));
|
||||
let config = ModelConfig::new("gemini-2.5-flash")
|
||||
.unwrap()
|
||||
.with_request_params(Some(params));
|
||||
.with_merged_request_params(params);
|
||||
let result = get_thinking_config(&config);
|
||||
assert!(result.is_some());
|
||||
let thinking_config = result.unwrap();
|
||||
@@ -1415,7 +1424,7 @@ data: [DONE]"#;
|
||||
params.insert("thinking_budget".to_string(), json!(-1));
|
||||
let config = ModelConfig::new("gemini-2.5-flash")
|
||||
.unwrap()
|
||||
.with_request_params(Some(params));
|
||||
.with_merged_request_params(params);
|
||||
let result = get_thinking_config(&config);
|
||||
assert!(result.is_some());
|
||||
let thinking_config = result.unwrap();
|
||||
|
||||
@@ -5,8 +5,8 @@ use crate::providers::base::{split_think_blocks, ProviderUsage, ThinkFilter, Usa
|
||||
use crate::providers::errors::ProviderError;
|
||||
use crate::providers::utils::{
|
||||
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
|
||||
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
|
||||
ImageFormat,
|
||||
is_valid_function_name, load_image_file, openai_reasoning_effort_for_thinking,
|
||||
safely_parse_json, sanitize_function_name, ImageFormat,
|
||||
};
|
||||
use anyhow::{anyhow, Error};
|
||||
use async_stream::try_stream;
|
||||
@@ -1239,8 +1239,17 @@ pub fn create_request_with_options(
|
||||
));
|
||||
}
|
||||
|
||||
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
|
||||
let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
|
||||
let is_reasoning_model = is_openai_responses_model(&model_name);
|
||||
let reasoning_effort = if is_reasoning_model {
|
||||
model_config
|
||||
.thinking_effort()
|
||||
.map_or(legacy_reasoning_effort, |effort| {
|
||||
openai_reasoning_effort_for_thinking(&model_name, effort)
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let system_message = json!({
|
||||
"role": if is_reasoning_model { "developer" } else { "system" },
|
||||
@@ -1299,7 +1308,7 @@ pub fn create_request_with_options(
|
||||
if let Some(params) = &model_config.request_params {
|
||||
if let Some(obj) = payload.as_object_mut() {
|
||||
for (key, value) in params {
|
||||
if !is_reserved_request_param_key(key) {
|
||||
if key != "thinking_effort" && !is_reserved_request_param_key(key) {
|
||||
obj.insert(key.clone(), value.clone());
|
||||
}
|
||||
}
|
||||
@@ -2070,8 +2079,7 @@ mod tests {
|
||||
fn test_create_request_omits_max_tokens_when_unset() -> anyhow::Result<()> {
|
||||
// Unknown models on OpenAI-compatible local providers (llama_swap,
|
||||
// lmstudio) have no canonical record and no GOOSE_MAX_TOKENS, so the
|
||||
// request must not pin the legacy 4096 default — the server should
|
||||
// pick its own ceiling. See issue #9007.
|
||||
// request must not pin the legacy 4096 default. See issue #9007.
|
||||
let model_config = ModelConfig {
|
||||
model_name: "some-unknown-local-model".to_string(),
|
||||
context_limit: None,
|
||||
@@ -2164,8 +2172,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_create_request_o1_default() -> anyhow::Result<()> {
|
||||
// Without an explicit effort suffix the API picks its own default;
|
||||
// we should omit reasoning_effort entirely but still use "developer" role.
|
||||
let model_config = ModelConfig {
|
||||
model_name: "o1".to_string(),
|
||||
context_limit: Some(4096),
|
||||
@@ -2209,17 +2215,111 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_o3_custom_reasoning_effort() -> anyhow::Result<()> {
|
||||
// Test custom reasoning effort for O3 model
|
||||
fn test_create_request_o1_medium_effort() -> anyhow::Result<()> {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("medium"));
|
||||
let model_config = ModelConfig {
|
||||
model_name: "o3-mini-high".to_string(),
|
||||
model_name: "o1".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: None,
|
||||
request_params: Some(params),
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(
|
||||
&model_config,
|
||||
"system",
|
||||
&[],
|
||||
&[],
|
||||
&ImageFormat::OpenAi,
|
||||
false,
|
||||
)?;
|
||||
let obj = request.as_object().unwrap();
|
||||
|
||||
assert_eq!(obj.get("reasoning_effort"), Some(&json!("medium")));
|
||||
assert!(obj.get("thinking_effort").is_none());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_o3_off_effort_preserves_none() -> anyhow::Result<()> {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("off"));
|
||||
let model_config = ModelConfig {
|
||||
model_name: "o3".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: Some(params),
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(
|
||||
&model_config,
|
||||
"system",
|
||||
&[],
|
||||
&[],
|
||||
&ImageFormat::OpenAi,
|
||||
false,
|
||||
)?;
|
||||
let obj = request.as_object().unwrap();
|
||||
|
||||
assert_eq!(obj.get("reasoning_effort"), Some(&json!("none")));
|
||||
assert!(obj.get("thinking_effort").is_none());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_gpt5_pro_max_effort_uses_supported_level() -> anyhow::Result<()> {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("max"));
|
||||
let model_config = ModelConfig {
|
||||
model_name: "gpt-5.2-pro-2025-12-11".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: Some(params),
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(
|
||||
&model_config,
|
||||
"system",
|
||||
&[],
|
||||
&[],
|
||||
&ImageFormat::OpenAi,
|
||||
false,
|
||||
)?;
|
||||
let obj = request.as_object().unwrap();
|
||||
|
||||
assert_eq!(obj.get("reasoning_effort"), Some(&json!("high")));
|
||||
assert!(obj.get("thinking_effort").is_none());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_request_o3_custom_reasoning_effort() -> anyhow::Result<()> {
|
||||
let mut params = std::collections::HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("high"));
|
||||
let model_config = ModelConfig {
|
||||
model_name: "o3-mini".to_string(),
|
||||
context_limit: Some(4096),
|
||||
temperature: None,
|
||||
max_tokens: Some(1024),
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: Some(params),
|
||||
reasoning: None,
|
||||
};
|
||||
let request = create_request(
|
||||
@@ -2246,6 +2346,7 @@ mod tests {
|
||||
for (key, value) in expected.as_object().unwrap() {
|
||||
assert_eq!(obj.get(key).unwrap(), value);
|
||||
}
|
||||
assert!(obj.get("thinking_effort").is_none());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2,7 +2,9 @@ use crate::conversation::message::{Message, MessageContent};
|
||||
use crate::mcp_utils::extract_text_from_resource;
|
||||
use crate::model::ModelConfig;
|
||||
use crate::providers::base::{ProviderUsage, Usage};
|
||||
use crate::providers::utils::{extract_reasoning_effort, is_openai_responses_model};
|
||||
use crate::providers::utils::{
|
||||
extract_reasoning_effort, is_openai_responses_model, openai_reasoning_effort_for_thinking,
|
||||
};
|
||||
use anyhow::{anyhow, Error};
|
||||
use async_stream::try_stream;
|
||||
use chrono;
|
||||
@@ -541,11 +543,26 @@ pub fn create_responses_request(
|
||||
|
||||
add_message_items(&mut input_items, messages);
|
||||
|
||||
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
|
||||
let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
|
||||
// All models routed here are responses-capable; temperature is rejected
|
||||
// by the API for reasoning models regardless of whether an explicit
|
||||
// effort suffix was provided.
|
||||
let is_reasoning_model = is_openai_responses_model(&model_name);
|
||||
let reasoning_effort = if is_reasoning_model {
|
||||
if let Some(effort) = legacy_reasoning_effort.as_deref() {
|
||||
effort
|
||||
.parse()
|
||||
.ok()
|
||||
.and_then(|effort| openai_reasoning_effort_for_thinking(&model_name, effort))
|
||||
.or(legacy_reasoning_effort)
|
||||
} else {
|
||||
model_config
|
||||
.thinking_effort()
|
||||
.and_then(|effort| openai_reasoning_effort_for_thinking(&model_name, effort))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut payload = json!({
|
||||
"model": model_name,
|
||||
@@ -1268,6 +1285,17 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_responses_request_with_normalized_effort_suffix() {
|
||||
let model_config = ModelConfig::new("o3-mini-high").unwrap();
|
||||
|
||||
let result = create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap();
|
||||
|
||||
assert_eq!(result["model"], "o3-mini");
|
||||
assert_eq!(result["reasoning"]["effort"], "high");
|
||||
assert_eq!(result["reasoning"]["summary"], "auto");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_responses_request_without_effort_suffix_omits_reasoning() {
|
||||
for model_name in ["gpt-5.4", "o3", "gpt-5-nano"] {
|
||||
@@ -1294,6 +1322,30 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_responses_request_non_reasoning_model_ignores_global_thinking_effort() {
|
||||
let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]);
|
||||
let model_config = ModelConfig {
|
||||
model_name: "gpt-4o".to_string(),
|
||||
context_limit: None,
|
||||
temperature: None,
|
||||
max_tokens: None,
|
||||
toolshim: false,
|
||||
toolshim_model: None,
|
||||
fast_model_config: None,
|
||||
request_params: None,
|
||||
reasoning: None,
|
||||
};
|
||||
|
||||
let result = create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap();
|
||||
|
||||
assert_eq!(result["model"], "gpt-4o");
|
||||
assert!(
|
||||
result.get("reasoning").is_none(),
|
||||
"non-reasoning models should not receive reasoning config"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_user_image_serialized_in_responses_request() {
|
||||
use crate::conversation::message::Message;
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use crate::conversation::message::{Message, MessageContent, ProviderMetadata};
|
||||
use crate::model::{ModelConfig, ThinkingEffort};
|
||||
use crate::providers::formats::openai;
|
||||
use rmcp::model::Role;
|
||||
use serde_json::{json, Value};
|
||||
@@ -87,9 +88,40 @@ pub fn add_reasoning_details_to_request(payload: &mut Value, messages: &[Message
|
||||
}
|
||||
}
|
||||
|
||||
fn reasoning_effort_for_openrouter(effort: ThinkingEffort) -> &'static str {
|
||||
match effort {
|
||||
ThinkingEffort::Off => "none",
|
||||
ThinkingEffort::Low => "low",
|
||||
ThinkingEffort::Medium => "medium",
|
||||
ThinkingEffort::High => "high",
|
||||
ThinkingEffort::Max => "xhigh",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_reasoning_config(payload: &mut Value, model_config: &ModelConfig) {
|
||||
let Some(effort) = model_config.thinking_effort() else {
|
||||
return;
|
||||
};
|
||||
|
||||
if let Some(obj) = payload.as_object_mut() {
|
||||
let clamped_effort = obj
|
||||
.remove("reasoning_effort")
|
||||
.and_then(|value| value.as_str().map(str::to_owned));
|
||||
if clamped_effort.is_none() && !model_config.is_reasoning_model() {
|
||||
return;
|
||||
}
|
||||
|
||||
obj.insert(
|
||||
"reasoning".to_string(),
|
||||
json!({ "effort": clamped_effort.as_deref().unwrap_or_else(|| reasoning_effort_for_openrouter(effort)) }),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn test_extract_reasoning_details() {
|
||||
@@ -149,4 +181,89 @@ mod tests {
|
||||
let details = get_reasoning_details(&tool_request.metadata).unwrap();
|
||||
assert_eq!(details.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_reasoning_config_uses_openrouter_reasoning_object() {
|
||||
let mut payload = json!({
|
||||
"model": "openai/gpt-5",
|
||||
"messages": [],
|
||||
"reasoning_effort": "high"
|
||||
});
|
||||
let mut model_config = ModelConfig::new_or_fail("openai/gpt-5");
|
||||
let mut params = HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("max"));
|
||||
model_config.request_params = Some(params);
|
||||
|
||||
apply_reasoning_config(&mut payload, &model_config);
|
||||
|
||||
assert_eq!(payload["reasoning"], json!({ "effort": "high" }));
|
||||
assert!(payload.get("reasoning_effort").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_reasoning_config_uses_reasoning_metadata() {
|
||||
let mut payload = json!({
|
||||
"model": "x-ai/grok-4",
|
||||
"messages": []
|
||||
});
|
||||
let mut model_config = ModelConfig::new_or_fail("x-ai/grok-4");
|
||||
let mut params = HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("high"));
|
||||
model_config.request_params = Some(params);
|
||||
model_config.reasoning = Some(true);
|
||||
|
||||
apply_reasoning_config(&mut payload, &model_config);
|
||||
|
||||
assert_eq!(payload["reasoning"], json!({ "effort": "high" }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_reasoning_config_uses_model_detection() {
|
||||
let mut payload = json!({
|
||||
"model": "anthropic/claude-sonnet-4",
|
||||
"messages": []
|
||||
});
|
||||
let mut model_config = ModelConfig::new_or_fail("anthropic/claude-sonnet-4");
|
||||
let mut params = HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("high"));
|
||||
model_config.request_params = Some(params);
|
||||
|
||||
apply_reasoning_config(&mut payload, &model_config);
|
||||
|
||||
assert_eq!(payload["reasoning"], json!({ "effort": "high" }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_reasoning_config_skips_non_reasoning_models() {
|
||||
let mut payload = json!({
|
||||
"model": "openai/gpt-4o",
|
||||
"messages": []
|
||||
});
|
||||
let mut model_config = ModelConfig::new_or_fail("openai/gpt-4o");
|
||||
let mut params = HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("high"));
|
||||
model_config.request_params = Some(params);
|
||||
model_config.reasoning = Some(false);
|
||||
|
||||
apply_reasoning_config(&mut payload, &model_config);
|
||||
|
||||
assert!(payload.get("reasoning").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_reasoning_config_off_disables_reasoning() {
|
||||
let mut payload = json!({
|
||||
"model": "x-ai/grok-4",
|
||||
"messages": []
|
||||
});
|
||||
let mut model_config = ModelConfig::new_or_fail("x-ai/grok-4");
|
||||
let mut params = HashMap::new();
|
||||
params.insert("thinking_effort".to_string(), json!("off"));
|
||||
model_config.request_params = Some(params);
|
||||
model_config.reasoning = Some(true);
|
||||
|
||||
apply_reasoning_config(&mut payload, &model_config);
|
||||
|
||||
assert_eq!(payload["reasoning"], json!({ "effort": "none" }));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -278,6 +278,7 @@ impl Provider for OpenRouterProvider {
|
||||
if is_gemini_model(&model_config.model_name) {
|
||||
openrouter_format::add_reasoning_details_to_request(&mut payload, messages);
|
||||
}
|
||||
openrouter_format::apply_reasoning_config(&mut payload, model_config);
|
||||
|
||||
if let Some(obj) = payload.as_object_mut() {
|
||||
obj.insert("transforms".to_string(), json!(["middle-out"]));
|
||||
|
||||
@@ -182,6 +182,7 @@ impl ProviderRegistry {
|
||||
output_token_cost: m.output_token_cost,
|
||||
currency: m.currency.clone(),
|
||||
supports_cache_control: Some(m.supports_cache_control.unwrap_or(false)),
|
||||
reasoning: m.reasoning,
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use super::base::Usage;
|
||||
use super::errors::GoogleErrorCode;
|
||||
use crate::config::paths::Paths;
|
||||
use crate::model::ModelConfig;
|
||||
use crate::model::{ModelConfig, ThinkingEffort};
|
||||
use crate::providers::errors::ProviderError;
|
||||
use anyhow::{anyhow, Result};
|
||||
use base64::Engine;
|
||||
@@ -237,6 +237,49 @@ pub fn extract_reasoning_effort(model_name: &str) -> (String, Option<String>) {
|
||||
(model_name.to_string(), None)
|
||||
}
|
||||
|
||||
pub fn openai_reasoning_effort_for_thinking(
|
||||
model_name: &str,
|
||||
effort: ThinkingEffort,
|
||||
) -> Option<String> {
|
||||
if effort == ThinkingEffort::Off {
|
||||
return Some("none".to_string());
|
||||
}
|
||||
|
||||
let supported = openai_reasoning_efforts_for_model(model_name);
|
||||
let preferred: &[&str] = match effort {
|
||||
ThinkingEffort::Off => unreachable!(),
|
||||
ThinkingEffort::Low => &["low", "medium", "high", "xhigh"],
|
||||
ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"],
|
||||
ThinkingEffort::High => &["high", "medium", "xhigh", "low"],
|
||||
ThinkingEffort::Max => &["xhigh", "high", "medium", "low"],
|
||||
};
|
||||
|
||||
preferred
|
||||
.iter()
|
||||
.find(|level| supported.contains(level))
|
||||
.map(|level| (*level).to_string())
|
||||
}
|
||||
|
||||
fn openai_reasoning_efforts_for_model(model_name: &str) -> &'static [&'static str] {
|
||||
let normalized = model_name.to_ascii_lowercase();
|
||||
|
||||
if normalized.contains("gpt-5") {
|
||||
if normalized.contains("-pro") || normalized.contains("/pro") {
|
||||
&["high"]
|
||||
} else if normalized.contains("gpt-5.4")
|
||||
|| normalized.contains("gpt-5-4")
|
||||
|| normalized.contains("gpt-5.5")
|
||||
|| normalized.contains("gpt-5-5")
|
||||
{
|
||||
&["low", "medium", "high", "xhigh"]
|
||||
} else {
|
||||
&["low", "medium", "high"]
|
||||
}
|
||||
} else {
|
||||
&["low", "medium", "high"]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sanitize_function_name(name: &str) -> String {
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
let re = RE.get_or_init(|| Regex::new(r"[^a-zA-Z0-9_-]").unwrap());
|
||||
|
||||
@@ -330,7 +330,7 @@ GOOSE_PROVIDER=claude-code GOOSE_MODE=approve goose session
|
||||
| `GOOSE_PROVIDER` | Set to `codex` to use this provider | None |
|
||||
| `GOOSE_MODEL` | Model to use (only known models are passed to CLI) | `gpt-5.2-codex` |
|
||||
| `CODEX_COMMAND` | Path to the Codex CLI command | `codex` |
|
||||
| `GOOSE_THINKING_EFFORT` | Unified thinking effort (`off`, `low`, `medium`, `high`, `max`). Mapped to Codex CLI effort levels (`none/low/medium/high/xhigh`). | `high` |
|
||||
| `CODEX_REASONING_EFFORT` | Reasoning effort level: `low`, `medium`, `high`, or `xhigh` (`none` is only supported on non-codex models like `gpt-5.2`) | `high` |
|
||||
| `CODEX_ENABLE_SKILLS` | Enable Codex skills: `true` or `false` | `true` |
|
||||
| `CODEX_SKIP_GIT_CHECK` | Skip git repository requirement: `true` or `false` | `false` |
|
||||
|
||||
|
||||
+80
-1
@@ -1369,6 +1369,56 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/config/providers/{name}/model-info": {
|
||||
"post": {
|
||||
"tags": [
|
||||
"super::routes::config_management"
|
||||
],
|
||||
"operationId": "get_provider_model_info",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "name",
|
||||
"in": "path",
|
||||
"description": "Provider name (e.g., openai)",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ProviderModelInfoQuery"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Model metadata fetched successfully",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ModelInfo"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Unknown provider, provider not configured, or authentication error"
|
||||
},
|
||||
"429": {
|
||||
"description": "Rate limit exceeded"
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal server error"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/config/providers/{name}/models": {
|
||||
"get": {
|
||||
"tags": [
|
||||
@@ -1394,7 +1444,7 @@
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"$ref": "#/components/schemas/ModelInfo"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6533,6 +6583,10 @@
|
||||
"description": "Cost per token for output in USD (optional)",
|
||||
"nullable": true
|
||||
},
|
||||
"reasoning": {
|
||||
"type": "boolean",
|
||||
"description": "Whether this model supports reasoning/thinking controls"
|
||||
},
|
||||
"supports_cache_control": {
|
||||
"type": "boolean",
|
||||
"description": "Whether this model supports cache control",
|
||||
@@ -6546,6 +6600,7 @@
|
||||
"provider",
|
||||
"model",
|
||||
"context_limit",
|
||||
"reasoning",
|
||||
"currency"
|
||||
],
|
||||
"properties": {
|
||||
@@ -6586,6 +6641,9 @@
|
||||
},
|
||||
"provider": {
|
||||
"type": "string"
|
||||
},
|
||||
"reasoning": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -6973,6 +7031,17 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"ProviderModelInfoQuery": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"model"
|
||||
],
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ProviderTemplate": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
@@ -8568,6 +8637,16 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"ThinkingEffort": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"off",
|
||||
"low",
|
||||
"medium",
|
||||
"high",
|
||||
"max"
|
||||
]
|
||||
},
|
||||
"TokenState": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -818,6 +818,10 @@ export type ModelInfo = {
|
||||
* Cost per token for output in USD (optional)
|
||||
*/
|
||||
output_token_cost?: number | null;
|
||||
/**
|
||||
* Whether this model supports reasoning/thinking controls
|
||||
*/
|
||||
reasoning?: boolean;
|
||||
/**
|
||||
* Whether this model supports cache control
|
||||
*/
|
||||
@@ -834,6 +838,7 @@ export type ModelInfoData = {
|
||||
model: string;
|
||||
output_token_cost?: number | null;
|
||||
provider: string;
|
||||
reasoning: boolean;
|
||||
};
|
||||
|
||||
export type ModelInfoQuery = {
|
||||
@@ -1000,6 +1005,10 @@ export type ProviderMetadata = {
|
||||
setup_steps?: Array<string>;
|
||||
};
|
||||
|
||||
export type ProviderModelInfoQuery = {
|
||||
model: string;
|
||||
};
|
||||
|
||||
export type ProviderTemplate = {
|
||||
api_url: string;
|
||||
doc_url: string;
|
||||
@@ -1482,6 +1491,8 @@ export type ThinkingContent = {
|
||||
thinking: string;
|
||||
};
|
||||
|
||||
export type ThinkingEffort = 'off' | 'low' | 'medium' | 'high' | 'max';
|
||||
|
||||
export type TokenState = {
|
||||
accumulatedCost?: number | null;
|
||||
accumulatedInputTokens: number;
|
||||
@@ -2728,6 +2739,42 @@ export type CleanupProviderCacheResponses = {
|
||||
|
||||
export type CleanupProviderCacheResponse = CleanupProviderCacheResponses[keyof CleanupProviderCacheResponses];
|
||||
|
||||
export type GetProviderModelInfoData = {
|
||||
body: ProviderModelInfoQuery;
|
||||
path: {
|
||||
/**
|
||||
* Provider name (e.g., openai)
|
||||
*/
|
||||
name: string;
|
||||
};
|
||||
query?: never;
|
||||
url: '/config/providers/{name}/model-info';
|
||||
};
|
||||
|
||||
export type GetProviderModelInfoErrors = {
|
||||
/**
|
||||
* Unknown provider, provider not configured, or authentication error
|
||||
*/
|
||||
400: unknown;
|
||||
/**
|
||||
* Rate limit exceeded
|
||||
*/
|
||||
429: unknown;
|
||||
/**
|
||||
* Internal server error
|
||||
*/
|
||||
500: unknown;
|
||||
};
|
||||
|
||||
export type GetProviderModelInfoResponses = {
|
||||
/**
|
||||
* Model metadata fetched successfully
|
||||
*/
|
||||
200: ModelInfo;
|
||||
};
|
||||
|
||||
export type GetProviderModelInfoResponse = GetProviderModelInfoResponses[keyof GetProviderModelInfoResponses];
|
||||
|
||||
export type GetProviderModelsData = {
|
||||
body?: never;
|
||||
path: {
|
||||
@@ -2759,7 +2806,7 @@ export type GetProviderModelsResponses = {
|
||||
/**
|
||||
* Models fetched successfully
|
||||
*/
|
||||
200: Array<string>;
|
||||
200: Array<ModelInfo>;
|
||||
};
|
||||
|
||||
export type GetProviderModelsResponse = GetProviderModelsResponses[keyof GetProviderModelsResponses];
|
||||
|
||||
@@ -108,8 +108,8 @@ export const RecipeModelSelector = ({
|
||||
|
||||
const modelList = models || [];
|
||||
const options = modelList.map((m) => ({
|
||||
value: m,
|
||||
label: m,
|
||||
value: m.name,
|
||||
label: m.name,
|
||||
provider: p.name,
|
||||
}));
|
||||
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
import { ProviderDetails, getProviderModels, listLocalModels } from '../../../api';
|
||||
import {
|
||||
ProviderDetails,
|
||||
ThinkingEffort,
|
||||
getProviderModelInfo,
|
||||
getProviderModels,
|
||||
listLocalModels,
|
||||
} from '../../../api';
|
||||
import { errorMessage as getErrorMessage } from '../../../utils/conversionUtils';
|
||||
|
||||
export default interface Model {
|
||||
@@ -9,7 +15,8 @@ export default interface Model {
|
||||
alias?: string; // optional model display name
|
||||
subtext?: string; // goes below model name if not the provider
|
||||
context_limit?: number; // optional context limit override
|
||||
request_params?: Record<string, unknown>; // provider-specific request parameters
|
||||
reasoning?: boolean; // optional reasoning/thinking support metadata
|
||||
request_params?: Record<string, unknown> & { thinking_effort?: ThinkingEffort }; // provider-specific request parameters
|
||||
}
|
||||
|
||||
export function createModelStruct(
|
||||
@@ -45,7 +52,7 @@ export async function getProviderMetadata(
|
||||
|
||||
export interface ProviderModelsResult {
|
||||
provider: ProviderDetails;
|
||||
models: string[] | null;
|
||||
models: Model[] | null;
|
||||
error: string | null;
|
||||
warning: string | null;
|
||||
}
|
||||
@@ -61,7 +68,7 @@ export async function fetchModelsForProviders(
|
||||
const allModels = response.data || [];
|
||||
const downloadedModels = allModels
|
||||
.filter((m) => m.status.state === 'Downloaded')
|
||||
.map((m) => m.id);
|
||||
.map((m) => ({ name: m.id, provider: p.name }) as Model);
|
||||
return { provider: p, models: downloadedModels, error: null, warning: null };
|
||||
}
|
||||
|
||||
@@ -69,12 +76,28 @@ export async function fetchModelsForProviders(
|
||||
path: { name: p.name },
|
||||
throwOnError: true,
|
||||
});
|
||||
const models = response.data || [];
|
||||
const models = (response.data || []).map(
|
||||
(m) =>
|
||||
({
|
||||
name: m.name,
|
||||
provider: p.name,
|
||||
context_limit: m.context_limit,
|
||||
reasoning: m.reasoning ?? undefined,
|
||||
}) as Model
|
||||
);
|
||||
return { provider: p, models, error: null, warning: null };
|
||||
} catch (e: unknown) {
|
||||
// For custom providers, fall back to the configured model list
|
||||
if (p.provider_type === 'Custom') {
|
||||
const fallbackModels = p.metadata.known_models.map((m) => m.name);
|
||||
const fallbackModels = p.metadata.known_models.map(
|
||||
(m) =>
|
||||
({
|
||||
name: m.name,
|
||||
provider: p.name,
|
||||
context_limit: m.context_limit,
|
||||
reasoning: m.reasoning ?? undefined,
|
||||
}) as Model
|
||||
);
|
||||
if (fallbackModels.length > 0) {
|
||||
console.warn(`Failed to fetch models for ${p.name}:`, getErrorMessage(e));
|
||||
return {
|
||||
@@ -99,3 +122,19 @@ export async function fetchModelsForProviders(
|
||||
|
||||
return await Promise.all(modelPromises);
|
||||
}
|
||||
|
||||
export async function fetchModelReasoning(
|
||||
provider: string,
|
||||
model: string,
|
||||
fallback?: boolean
|
||||
): Promise<boolean | null> {
|
||||
try {
|
||||
const response = await getProviderModelInfo({
|
||||
path: { name: provider },
|
||||
body: { model },
|
||||
});
|
||||
return response.data?.reasoning ?? fallback ?? null;
|
||||
} catch {
|
||||
return fallback ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,12 +17,20 @@ import { Select } from '../../../ui/Select';
|
||||
import { useConfig } from '../../../ConfigContext';
|
||||
import { useModelAndProvider } from '../../../ModelAndProviderContext';
|
||||
import type { View } from '../../../../utils/navigationUtils';
|
||||
import Model, { getProviderMetadata, fetchModelsForProviders } from '../modelInterface';
|
||||
import Model, {
|
||||
fetchModelReasoning,
|
||||
fetchModelsForProviders,
|
||||
getProviderMetadata,
|
||||
} from '../modelInterface';
|
||||
import { getPredefinedModelsFromEnv, shouldShowPredefinedModels } from '../predefinedModelsUtils';
|
||||
import { ProviderType } from '../../../../api';
|
||||
import type { ProviderType, ThinkingEffort } from '../../../../api';
|
||||
import { trackModelChanged } from '../../../../utils/analytics';
|
||||
|
||||
const i18n = defineMessages({
|
||||
thinkingEffortOff: {
|
||||
id: 'switchModelModal.thinkingEffortOff',
|
||||
defaultMessage: 'Off - No extended thinking',
|
||||
},
|
||||
thinkingLevelLow: {
|
||||
id: 'switchModelModal.thinkingLevelLow',
|
||||
defaultMessage: 'Low - Better latency, lighter reasoning',
|
||||
@@ -185,16 +193,7 @@ const i18n = defineMessages({
|
||||
},
|
||||
});
|
||||
|
||||
// THINKING_LEVEL_OPTIONS and CLAUDE_THINKING_EFFORT_OPTIONS are created inside the component to support i18n.
|
||||
|
||||
function isClaudeModel(name: string | null | undefined): boolean {
|
||||
return !!name && name.toLowerCase().startsWith('claude-');
|
||||
}
|
||||
|
||||
function supportsAdaptiveThinking(name: string): boolean {
|
||||
const lower = name.toLowerCase();
|
||||
return lower.includes('claude-opus-4-6') || lower.includes('claude-sonnet-4-6');
|
||||
}
|
||||
// Thinking effort options are created inside the component to support i18n.
|
||||
|
||||
const PREFERRED_MODEL_PATTERNS = [
|
||||
/claude-sonnet-4/i,
|
||||
@@ -256,12 +255,8 @@ export const SwitchModelModal = ({
|
||||
}: SwitchModelModalProps) => {
|
||||
const intl = useIntl();
|
||||
|
||||
const THINKING_LEVEL_OPTIONS = [
|
||||
{ value: 'low', label: intl.formatMessage(i18n.thinkingLevelLow) },
|
||||
{ value: 'high', label: intl.formatMessage(i18n.thinkingLevelHigh) },
|
||||
];
|
||||
|
||||
const CLAUDE_THINKING_EFFORT_OPTIONS = [
|
||||
const THINKING_EFFORT_OPTIONS: { value: ThinkingEffort; label: string }[] = [
|
||||
{ value: 'off', label: intl.formatMessage(i18n.thinkingEffortOff) },
|
||||
{ value: 'low', label: intl.formatMessage(i18n.claudeEffortLow) },
|
||||
{ value: 'medium', label: intl.formatMessage(i18n.claudeEffortMedium) },
|
||||
{ value: 'high', label: intl.formatMessage(i18n.claudeEffortHigh) },
|
||||
@@ -278,7 +273,13 @@ export const SwitchModelModal = ({
|
||||
const currentModel = sessionModel ?? configModel;
|
||||
const currentProvider = sessionProvider ?? configProvider;
|
||||
const [providerOptions, setProviderOptions] = useState<{ value: string; label: string }[]>([]);
|
||||
type ModelOption = { value: string; label: string; provider: string; isDisabled?: boolean };
|
||||
type ModelOption = {
|
||||
value: string;
|
||||
label: string;
|
||||
provider: string;
|
||||
isDisabled?: boolean;
|
||||
reasoning?: boolean;
|
||||
};
|
||||
const [modelOptions, setModelOptions] = useState<{ options: ModelOption[] }[]>([]);
|
||||
const [provider, setProvider] = useState<string | null>(
|
||||
initialProvider || currentProvider || null
|
||||
@@ -304,43 +305,56 @@ export const SwitchModelModal = ({
|
||||
import('../../../../api').ProviderDetails[]
|
||||
>([]);
|
||||
const fetchedProviders = useRef<Set<string>>(new Set());
|
||||
const [thinkingLevel, setThinkingLevel] = useState<string>('low');
|
||||
const [claudeThinkingType, setClaudeThinkingType] = useState<string>('disabled');
|
||||
const [claudeThinkingEffort, setClaudeThinkingEffort] = useState<string>('high');
|
||||
const [claudeThinkingBudget, setClaudeThinkingBudget] = useState<string>('16000');
|
||||
const reasoningRequestId = useRef(0);
|
||||
const [thinkingEffort, setThinkingEffort] = useState<ThinkingEffort | null>(null);
|
||||
const [selectedModelReasoning, setSelectedModelReasoning] = useState<boolean | null>(null);
|
||||
|
||||
const modelName = usePredefinedModels ? selectedPredefinedModel?.name : model;
|
||||
const isGemini3Model = modelName?.toLowerCase().startsWith('gemini-3') ?? false;
|
||||
const showClaudeThinking = isClaudeModel(modelName);
|
||||
const modelSupportsAdaptive = modelName ? supportsAdaptiveThinking(modelName) : false;
|
||||
const modelReasoning = selectedModelReasoning ?? selectedPredefinedModel?.reasoning;
|
||||
const showThinkingControl = modelReasoning === true;
|
||||
const resolveSelectedModelReasoning = useCallback(
|
||||
(providerName: string, modelName: string, fallback?: boolean) => {
|
||||
const requestId = ++reasoningRequestId.current;
|
||||
setSelectedModelReasoning(fallback ?? null);
|
||||
fetchModelReasoning(providerName, modelName, fallback).then((reasoning) => {
|
||||
if (requestId === reasoningRequestId.current) {
|
||||
setSelectedModelReasoning(reasoning);
|
||||
}
|
||||
});
|
||||
},
|
||||
[]
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (!showClaudeThinking) return;
|
||||
if (claudeThinkingType === 'adaptive' && !modelSupportsAdaptive) {
|
||||
setClaudeThinkingType('disabled');
|
||||
}
|
||||
}, [modelName, showClaudeThinking, modelSupportsAdaptive, claudeThinkingType]);
|
||||
|
||||
useEffect(() => {
|
||||
const readConfig = async (key: string): Promise<string | null> => {
|
||||
try {
|
||||
const val = (await read(key, false)) as string;
|
||||
return val || null;
|
||||
} catch (e) {
|
||||
console.warn(`Could not read ${key}, using default:`, e);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
(async () => {
|
||||
const tt = await readConfig('CLAUDE_THINKING_TYPE');
|
||||
if (tt) setClaudeThinkingType(tt);
|
||||
const effort = await readConfig('CLAUDE_THINKING_EFFORT');
|
||||
if (effort) setClaudeThinkingEffort(effort);
|
||||
const budget = await readConfig('CLAUDE_THINKING_BUDGET');
|
||||
if (budget) setClaudeThinkingBudget(budget);
|
||||
try {
|
||||
const effort = (await read('GOOSE_THINKING_EFFORT', false)) as ThinkingEffort;
|
||||
if (effort) setThinkingEffort(effort);
|
||||
} catch (e) {
|
||||
console.warn('Could not read GOOSE_THINKING_EFFORT, using default:', e);
|
||||
}
|
||||
})();
|
||||
}, [read]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!provider || !model) return;
|
||||
|
||||
const selectedOption = modelOptions
|
||||
.flatMap((group) => group.options)
|
||||
.find((option) => option.provider === provider && option.value === model);
|
||||
|
||||
if (selectedOption) {
|
||||
resolveSelectedModelReasoning(provider, model, selectedOption.reasoning);
|
||||
return;
|
||||
}
|
||||
|
||||
setSelectedModelReasoning(null);
|
||||
const timeout = setTimeout(() => {
|
||||
resolveSelectedModelReasoning(provider, model);
|
||||
}, 400);
|
||||
|
||||
return () => clearTimeout(timeout);
|
||||
}, [model, provider, modelOptions, resolveSelectedModelReasoning]);
|
||||
|
||||
// Validate form data
|
||||
const validateForm = useCallback(() => {
|
||||
const errors = {
|
||||
@@ -393,36 +407,18 @@ export const SwitchModelModal = ({
|
||||
subtext: providerDisplayName,
|
||||
} as Model;
|
||||
}
|
||||
modelObj = {
|
||||
...modelObj,
|
||||
reasoning: selectedModelReasoning ?? modelObj.reasoning,
|
||||
};
|
||||
|
||||
if (isGemini3Model) {
|
||||
if (showThinkingControl) {
|
||||
const effort = thinkingEffort ?? modelObj.request_params?.thinking_effort ?? 'off';
|
||||
modelObj = {
|
||||
...modelObj,
|
||||
request_params: { ...modelObj.request_params, thinking_level: thinkingLevel },
|
||||
request_params: { ...modelObj.request_params, thinking_effort: effort },
|
||||
};
|
||||
}
|
||||
|
||||
if (showClaudeThinking) {
|
||||
const params: Record<string, unknown> = {
|
||||
...modelObj.request_params,
|
||||
thinking_type: claudeThinkingType,
|
||||
};
|
||||
if (claudeThinkingType === 'adaptive') {
|
||||
params.effort = claudeThinkingEffort;
|
||||
} else if (claudeThinkingType === 'enabled') {
|
||||
params.budget_tokens = parseInt(claudeThinkingBudget, 10) || 16000;
|
||||
}
|
||||
modelObj = { ...modelObj, request_params: params };
|
||||
|
||||
upsert('CLAUDE_THINKING_TYPE', claudeThinkingType, false).catch(console.warn);
|
||||
if (claudeThinkingType === 'adaptive') {
|
||||
upsert('CLAUDE_THINKING_EFFORT', claudeThinkingEffort, false).catch(console.warn);
|
||||
} else if (claudeThinkingType === 'enabled') {
|
||||
upsert(
|
||||
'CLAUDE_THINKING_BUDGET',
|
||||
parseInt(claudeThinkingBudget, 10) || 16000,
|
||||
false
|
||||
).catch(console.warn);
|
||||
}
|
||||
upsert('GOOSE_THINKING_EFFORT', effort, false).catch(console.warn);
|
||||
}
|
||||
|
||||
const success = await changeModel(sessionId, modelObj);
|
||||
@@ -450,8 +446,13 @@ export const SwitchModelModal = ({
|
||||
const matchingModel = models.find((m) => m.name === currentModel);
|
||||
if (matchingModel) {
|
||||
setSelectedPredefinedModel(matchingModel);
|
||||
resolveSelectedModelReasoning(
|
||||
matchingModel.provider,
|
||||
matchingModel.name,
|
||||
matchingModel.reasoning
|
||||
);
|
||||
}
|
||||
}, [usePredefinedModels, currentModel]);
|
||||
}, [usePredefinedModels, currentModel, resolveSelectedModelReasoning]);
|
||||
|
||||
// For manual mode: one-time sync of provider/model when session data
|
||||
// arrives after the modal has already mounted. Uses a ref so it only
|
||||
@@ -515,7 +516,7 @@ export const SwitchModelModal = ({
|
||||
if (cancelled) return;
|
||||
|
||||
const newGroupedOptions: {
|
||||
options: { value: string; label: string; provider: string; providerType: ProviderType }[];
|
||||
options: (ModelOption & { providerType: ProviderType })[];
|
||||
}[] = [];
|
||||
const newErrors: Record<string, string> = {};
|
||||
const newWarnings: Record<string, string> = {};
|
||||
@@ -536,11 +537,13 @@ export const SwitchModelModal = ({
|
||||
label: string;
|
||||
provider: string;
|
||||
providerType: ProviderType;
|
||||
reasoning?: boolean;
|
||||
}[] = modelList.map((m) => ({
|
||||
value: m,
|
||||
label: m,
|
||||
value: m.name,
|
||||
label: m.name,
|
||||
provider: p.name,
|
||||
providerType: p.provider_type,
|
||||
reasoning: m.reasoning,
|
||||
}));
|
||||
|
||||
if (p.provider_type !== 'Custom') {
|
||||
@@ -613,30 +616,51 @@ export const SwitchModelModal = ({
|
||||
}
|
||||
}, [provider, modelOptions, loadingModels, model, isCustomModel, userClearedModel, activeProvidersList]);
|
||||
|
||||
const handlePredefinedModelChange = (model: Model) => {
|
||||
setSelectedPredefinedModel(model);
|
||||
resolveSelectedModelReasoning(model.provider, model.name, model.reasoning);
|
||||
};
|
||||
|
||||
// Handle model selection change
|
||||
const handleModelChange = (newValue: unknown) => {
|
||||
const selectedOption = newValue as { value: string; label: string; provider: string } | null;
|
||||
const selectedOption = newValue as {
|
||||
value: string;
|
||||
label: string;
|
||||
provider: string;
|
||||
reasoning?: boolean;
|
||||
} | null;
|
||||
if (selectedOption?.value === 'custom') {
|
||||
setIsCustomModel(true);
|
||||
setModel('');
|
||||
setProvider(selectedOption.provider);
|
||||
setSelectedModelReasoning(null);
|
||||
setUserClearedModel(false);
|
||||
} else if (selectedOption === null) {
|
||||
// User cleared the selection
|
||||
setIsCustomModel(false);
|
||||
setModel('');
|
||||
setSelectedModelReasoning(null);
|
||||
setUserClearedModel(true);
|
||||
} else {
|
||||
setIsCustomModel(false);
|
||||
setModel(selectedOption?.value || '');
|
||||
setProvider(selectedOption?.provider || '');
|
||||
if (selectedOption?.provider && selectedOption.value) {
|
||||
resolveSelectedModelReasoning(
|
||||
selectedOption.provider,
|
||||
selectedOption.value,
|
||||
selectedOption.reasoning
|
||||
);
|
||||
} else {
|
||||
setSelectedModelReasoning(selectedOption?.reasoning ?? null);
|
||||
}
|
||||
setUserClearedModel(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Store the original model options in state, initialized from modelOptions
|
||||
const [originalModelOptions, setOriginalModelOptions] =
|
||||
useState<{ options: { value: string; label: string; provider: string }[] }[]>(modelOptions);
|
||||
useState<{ options: ModelOption[] }[]>(modelOptions);
|
||||
|
||||
const handleInputChange = (inputValue: string) => {
|
||||
if (!provider) return;
|
||||
@@ -680,54 +704,20 @@ export const SwitchModelModal = ({
|
||||
}
|
||||
};
|
||||
|
||||
const claudeThinkingTypeOptions = [
|
||||
...(modelSupportsAdaptive
|
||||
? [{ value: 'adaptive', label: intl.formatMessage(i18n.claudeAdaptive) }]
|
||||
: []),
|
||||
{ value: 'enabled', label: intl.formatMessage(i18n.claudeEnabled) },
|
||||
{ value: 'disabled', label: intl.formatMessage(i18n.claudeDisabled) },
|
||||
];
|
||||
|
||||
const claudeThinkingControls = showClaudeThinking && (
|
||||
<div className="mt-2 flex flex-col gap-3">
|
||||
<div>
|
||||
<label className="text-sm text-textSubtle mb-1 block">{intl.formatMessage(i18n.extendedThinking)}</label>
|
||||
<Select
|
||||
options={claudeThinkingTypeOptions}
|
||||
value={claudeThinkingTypeOptions.find((o) => o.value === claudeThinkingType)}
|
||||
onChange={(newValue: unknown) => {
|
||||
const option = newValue as { value: string; label: string } | null;
|
||||
setClaudeThinkingType(option?.value || 'disabled');
|
||||
}}
|
||||
placeholder={intl.formatMessage(i18n.selectThinkingMode)}
|
||||
/>
|
||||
</div>
|
||||
{claudeThinkingType === 'adaptive' && (
|
||||
<div>
|
||||
<label className="text-sm text-textSubtle mb-1 block">{intl.formatMessage(i18n.thinkingEffort)}</label>
|
||||
<Select
|
||||
options={CLAUDE_THINKING_EFFORT_OPTIONS}
|
||||
value={CLAUDE_THINKING_EFFORT_OPTIONS.find((o) => o.value === claudeThinkingEffort)}
|
||||
onChange={(newValue: unknown) => {
|
||||
const option = newValue as { value: string; label: string } | null;
|
||||
setClaudeThinkingEffort(option?.value || 'high');
|
||||
}}
|
||||
placeholder={intl.formatMessage(i18n.selectEffortLevel)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
{claudeThinkingType === 'enabled' && (
|
||||
<div>
|
||||
<label className="text-sm text-textSubtle mb-1 block">{intl.formatMessage(i18n.thinkingBudget)}</label>
|
||||
<Input
|
||||
className="border-2 px-4 py-2"
|
||||
type="number"
|
||||
min="1024"
|
||||
value={claudeThinkingBudget}
|
||||
onChange={(e) => setClaudeThinkingBudget(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
const thinkingEffortControl = showThinkingControl && (
|
||||
<div className="mt-2">
|
||||
<label className="text-sm text-textSubtle mb-1 block">
|
||||
{intl.formatMessage(i18n.thinkingEffort)}
|
||||
</label>
|
||||
<Select
|
||||
options={THINKING_EFFORT_OPTIONS}
|
||||
value={THINKING_EFFORT_OPTIONS.find((o) => o.value === (thinkingEffort ?? 'off'))}
|
||||
onChange={(newValue: unknown) => {
|
||||
const option = newValue as { value: ThinkingEffort; label: string } | null;
|
||||
setThinkingEffort(option?.value || 'off');
|
||||
}}
|
||||
placeholder={intl.formatMessage(i18n.selectEffortLevel)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -760,7 +750,7 @@ export const SwitchModelModal = ({
|
||||
? 'bg-background-secondary'
|
||||
: 'bg-background-primary hover:bg-background-secondary'
|
||||
} rounded-lg transition-all`}
|
||||
onClick={() => setSelectedPredefinedModel(model)}
|
||||
onClick={() => handlePredefinedModelChange(model)}
|
||||
>
|
||||
<div className="flex-1">
|
||||
<div className="flex items-center justify-between">
|
||||
@@ -786,7 +776,7 @@ export const SwitchModelModal = ({
|
||||
name="predefined-model"
|
||||
value={model.name}
|
||||
checked={selectedPredefinedModel?.name === model.name}
|
||||
onChange={() => setSelectedPredefinedModel(model)}
|
||||
onChange={() => handlePredefinedModelChange(model)}
|
||||
className="peer sr-only"
|
||||
/>
|
||||
<div
|
||||
@@ -805,25 +795,7 @@ export const SwitchModelModal = ({
|
||||
<div className="text-red-500 text-sm mt-1">{validationErrors.model}</div>
|
||||
)}
|
||||
|
||||
{isGemini3Model && (
|
||||
<div className="mt-2">
|
||||
<label className="text-sm text-textSubtle mb-1 block">
|
||||
{intl.formatMessage(i18n.thinkingLevel)}
|
||||
<span className="text-xs text-textMuted ml-2">{intl.formatMessage(i18n.geminiOnly)}</span>
|
||||
</label>
|
||||
<Select
|
||||
options={THINKING_LEVEL_OPTIONS}
|
||||
value={THINKING_LEVEL_OPTIONS.find((o) => o.value === thinkingLevel)}
|
||||
onChange={(newValue: unknown) => {
|
||||
const option = newValue as { value: string; label: string } | null;
|
||||
setThinkingLevel(option?.value || 'low');
|
||||
}}
|
||||
placeholder={intl.formatMessage(i18n.selectThinkingLevel)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{claudeThinkingControls}
|
||||
{thinkingEffortControl}
|
||||
</div>
|
||||
) : (
|
||||
/* Manual Provider/Model Selection */
|
||||
@@ -970,25 +942,7 @@ export const SwitchModelModal = ({
|
||||
</div>
|
||||
)}
|
||||
|
||||
{isGemini3Model && (
|
||||
<div className="mt-2">
|
||||
<label className="text-sm text-textSubtle mb-1 block">
|
||||
Thinking Level
|
||||
<span className="text-xs text-textMuted ml-2">(Gemini 3 models only)</span>
|
||||
</label>
|
||||
<Select
|
||||
options={THINKING_LEVEL_OPTIONS}
|
||||
value={THINKING_LEVEL_OPTIONS.find((o) => o.value === thinkingLevel)}
|
||||
onChange={(newValue: unknown) => {
|
||||
const option = newValue as { value: string; label: string } | null;
|
||||
setThinkingLevel(option?.value || 'low');
|
||||
}}
|
||||
placeholder="Select thinking level"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{claudeThinkingControls}
|
||||
{thinkingEffortControl}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -4475,6 +4475,9 @@
|
||||
"switchModelModal.thinkingEffort": {
|
||||
"defaultMessage": "Thinking Effort"
|
||||
},
|
||||
"switchModelModal.thinkingEffortOff": {
|
||||
"defaultMessage": "Off - No extended thinking"
|
||||
},
|
||||
"switchModelModal.thinkingLevel": {
|
||||
"defaultMessage": "Thinking Level"
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user