mirror of
https://github.com/Alishahryar1/free-claude-code.git
synced 2026-06-01 22:09:04 +02:00
Add per-model thinking toggles
This commit is contained in:
+6
-3
@@ -28,9 +28,12 @@ MODEL="nvidia_nim/z-ai/glm4.7"
|
||||
|
||||
|
||||
# Thinking output
|
||||
# Global switch for provider reasoning requests and Claude thinking blocks.
|
||||
# Set false to suppress thinking across NIM, OpenRouter, LM Studio, and llama.cpp.
|
||||
ENABLE_THINKING=true
|
||||
# Per-Claude-model-family switches for provider reasoning requests and Claude thinking blocks.
|
||||
# MODEL_ENABLE_THINKING applies when the incoming Claude model is not Opus, Sonnet, or Haiku.
|
||||
OPUS_ENABLE_THINKING=true
|
||||
SONNET_ENABLE_THINKING=true
|
||||
HAIKU_ENABLE_THINKING=true
|
||||
MODEL_ENABLE_THINKING=true
|
||||
|
||||
|
||||
# Provider config
|
||||
|
||||
@@ -81,8 +81,11 @@ MODEL_SONNET="nvidia_nim/moonshotai/kimi-k2-thinking"
|
||||
MODEL_HAIKU="nvidia_nim/stepfun-ai/step-3.5-flash"
|
||||
MODEL="nvidia_nim/z-ai/glm4.7" # fallback
|
||||
|
||||
# Global switch for provider reasoning requests and Claude thinking blocks.
|
||||
ENABLE_THINKING=true
|
||||
# Per-Claude-model-family switches for provider reasoning requests and Claude thinking blocks.
|
||||
OPUS_ENABLE_THINKING=true
|
||||
SONNET_ENABLE_THINKING=true
|
||||
HAIKU_ENABLE_THINKING=true
|
||||
MODEL_ENABLE_THINKING=true
|
||||
```
|
||||
|
||||
</details>
|
||||
@@ -158,7 +161,7 @@ MODEL="nvidia_nim/z-ai/glm4.7" # fallback
|
||||
|
||||
</details>
|
||||
|
||||
> Migration: `NIM_ENABLE_THINKING` was removed in this release. Rename it to `ENABLE_THINKING`.
|
||||
> Migration: `NIM_ENABLE_THINKING` and `ENABLE_THINKING` were removed in this release. Use `MODEL_ENABLE_THINKING` for fallback behavior, or `OPUS_ENABLE_THINKING`, `SONNET_ENABLE_THINKING`, and `HAIKU_ENABLE_THINKING` for family-specific control.
|
||||
|
||||
<details>
|
||||
<summary><b>Optional Authentication</b> (restrict access to your proxy)</summary>
|
||||
@@ -321,7 +324,7 @@ free-claude-code # starts the server
|
||||
- **Per-model routing**: Opus / Sonnet / Haiku requests resolve to their model-specific backend, with `MODEL` as fallback
|
||||
- **Request optimization**: 5 categories of trivial requests (quota probes, title generation, prefix detection, suggestions, filepath extraction) are intercepted and responded to locally without using API quota
|
||||
- **Format translation**: Requests are translated from Anthropic format to the provider's OpenAI-compatible format and streamed back
|
||||
- **Thinking tokens**: `<think>` tags and `reasoning_content` fields are converted into native Claude thinking blocks when `ENABLE_THINKING=true`
|
||||
- **Thinking tokens**: `<think>` tags and `reasoning_content` fields are converted into native Claude thinking blocks when the matching `*_ENABLE_THINKING` flag is true
|
||||
|
||||
The proxy also exposes Claude-compatible probe routes: `GET /v1/models`, `POST /v1/messages`, `POST /v1/messages/count_tokens`, plus `HEAD`/`OPTIONS` support for the common probe endpoints.
|
||||
|
||||
@@ -507,7 +510,10 @@ Configure via `WHISPER_DEVICE` (`cpu` | `cuda` | `nvidia_nim`) and `WHISPER_MODE
|
||||
| `MODEL_SONNET` | Model for Claude Sonnet requests (falls back to `MODEL`) | `open_router/arcee-ai/trinity-large-preview:free` |
|
||||
| `MODEL_HAIKU` | Model for Claude Haiku requests (falls back to `MODEL`) | `open_router/stepfun/step-3.5-flash:free` |
|
||||
| `NVIDIA_NIM_API_KEY` | NVIDIA API key | required for NIM |
|
||||
| `ENABLE_THINKING` | Global switch for provider reasoning requests and Claude thinking blocks. Set `false` to hide thinking across all providers. | `true` |
|
||||
| `OPUS_ENABLE_THINKING` | Thinking switch for Claude Opus requests | `true` |
|
||||
| `SONNET_ENABLE_THINKING` | Thinking switch for Claude Sonnet requests | `true` |
|
||||
| `HAIKU_ENABLE_THINKING` | Thinking switch for Claude Haiku requests | `true` |
|
||||
| `MODEL_ENABLE_THINKING` | Thinking switch for unrecognized Claude model names and fallback `MODEL` requests | `true` |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter API key | required for OpenRouter |
|
||||
| `DEEPSEEK_API_KEY` | DeepSeek API key | required for DeepSeek |
|
||||
| `LM_STUDIO_BASE_URL` | LM Studio server URL | `http://localhost:1234/v1` |
|
||||
|
||||
+21
-35
@@ -29,6 +29,22 @@ def _get_proxy_value(settings: Settings, attr_name: str) -> str:
|
||||
return value if isinstance(value, str) else ""
|
||||
|
||||
|
||||
def _provider_config_kwargs(settings: Settings) -> dict:
|
||||
"""Return settings shared by all provider configs."""
|
||||
return {
|
||||
"rate_limit": settings.provider_rate_limit,
|
||||
"rate_window": settings.provider_rate_window,
|
||||
"max_concurrency": settings.provider_max_concurrency,
|
||||
"http_read_timeout": settings.http_read_timeout,
|
||||
"http_write_timeout": settings.http_write_timeout,
|
||||
"http_connect_timeout": settings.http_connect_timeout,
|
||||
"opus_enable_thinking": settings.opus_enable_thinking,
|
||||
"sonnet_enable_thinking": settings.sonnet_enable_thinking,
|
||||
"haiku_enable_thinking": settings.haiku_enable_thinking,
|
||||
"model_enable_thinking": settings.model_enable_thinking,
|
||||
}
|
||||
|
||||
|
||||
def _create_provider_for_type(provider_type: str, settings: Settings) -> BaseProvider:
|
||||
"""Construct and return a new provider instance for the given provider type."""
|
||||
_proxy_map = {
|
||||
@@ -48,14 +64,8 @@ def _create_provider_for_type(provider_type: str, settings: Settings) -> BasePro
|
||||
config = ProviderConfig(
|
||||
api_key=settings.nvidia_nim_api_key,
|
||||
base_url=NVIDIA_NIM_BASE_URL,
|
||||
rate_limit=settings.provider_rate_limit,
|
||||
rate_window=settings.provider_rate_window,
|
||||
max_concurrency=settings.provider_max_concurrency,
|
||||
http_read_timeout=settings.http_read_timeout,
|
||||
http_write_timeout=settings.http_write_timeout,
|
||||
http_connect_timeout=settings.http_connect_timeout,
|
||||
enable_thinking=settings.enable_thinking,
|
||||
proxy=proxy,
|
||||
**_provider_config_kwargs(settings),
|
||||
)
|
||||
return NvidiaNimProvider(config, nim_settings=settings.nim)
|
||||
if provider_type == "open_router":
|
||||
@@ -67,14 +77,8 @@ def _create_provider_for_type(provider_type: str, settings: Settings) -> BasePro
|
||||
config = ProviderConfig(
|
||||
api_key=settings.open_router_api_key,
|
||||
base_url=OPENROUTER_BASE_URL,
|
||||
rate_limit=settings.provider_rate_limit,
|
||||
rate_window=settings.provider_rate_window,
|
||||
max_concurrency=settings.provider_max_concurrency,
|
||||
http_read_timeout=settings.http_read_timeout,
|
||||
http_write_timeout=settings.http_write_timeout,
|
||||
http_connect_timeout=settings.http_connect_timeout,
|
||||
enable_thinking=settings.enable_thinking,
|
||||
proxy=proxy,
|
||||
**_provider_config_kwargs(settings),
|
||||
)
|
||||
return OpenRouterProvider(config)
|
||||
if provider_type == "deepseek":
|
||||
@@ -86,41 +90,23 @@ def _create_provider_for_type(provider_type: str, settings: Settings) -> BasePro
|
||||
config = ProviderConfig(
|
||||
api_key=settings.deepseek_api_key,
|
||||
base_url=DEEPSEEK_BASE_URL,
|
||||
rate_limit=settings.provider_rate_limit,
|
||||
rate_window=settings.provider_rate_window,
|
||||
max_concurrency=settings.provider_max_concurrency,
|
||||
http_read_timeout=settings.http_read_timeout,
|
||||
http_write_timeout=settings.http_write_timeout,
|
||||
http_connect_timeout=settings.http_connect_timeout,
|
||||
enable_thinking=settings.enable_thinking,
|
||||
**_provider_config_kwargs(settings),
|
||||
)
|
||||
return DeepSeekProvider(config)
|
||||
if provider_type == "lmstudio":
|
||||
config = ProviderConfig(
|
||||
api_key="lm-studio",
|
||||
base_url=settings.lm_studio_base_url,
|
||||
rate_limit=settings.provider_rate_limit,
|
||||
rate_window=settings.provider_rate_window,
|
||||
max_concurrency=settings.provider_max_concurrency,
|
||||
http_read_timeout=settings.http_read_timeout,
|
||||
http_write_timeout=settings.http_write_timeout,
|
||||
http_connect_timeout=settings.http_connect_timeout,
|
||||
enable_thinking=settings.enable_thinking,
|
||||
proxy=proxy,
|
||||
**_provider_config_kwargs(settings),
|
||||
)
|
||||
return LMStudioProvider(config)
|
||||
if provider_type == "llamacpp":
|
||||
config = ProviderConfig(
|
||||
api_key="llamacpp",
|
||||
base_url=settings.llamacpp_base_url,
|
||||
rate_limit=settings.provider_rate_limit,
|
||||
rate_window=settings.provider_rate_window,
|
||||
max_concurrency=settings.provider_max_concurrency,
|
||||
http_read_timeout=settings.http_read_timeout,
|
||||
http_write_timeout=settings.http_write_timeout,
|
||||
http_connect_timeout=settings.http_connect_timeout,
|
||||
enable_thinking=settings.enable_thinking,
|
||||
proxy=proxy,
|
||||
**_provider_config_kwargs(settings),
|
||||
)
|
||||
return LlamaCppProvider(config)
|
||||
logger.error(
|
||||
|
||||
+44
-14
@@ -56,21 +56,29 @@ def _env_file_contains_key(path: Path, key: str) -> bool:
|
||||
|
||||
def _removed_env_var_message(model_config: Mapping[str, Any]) -> str | None:
|
||||
"""Return a migration error for removed env vars, if present."""
|
||||
removed_key = "NIM_ENABLE_THINKING"
|
||||
replacement = "ENABLE_THINKING"
|
||||
removed_vars = {
|
||||
"NIM_ENABLE_THINKING": (
|
||||
"Use MODEL_ENABLE_THINKING for fallback behavior, or "
|
||||
"OPUS_ENABLE_THINKING, SONNET_ENABLE_THINKING, and "
|
||||
"HAIKU_ENABLE_THINKING for family-specific control."
|
||||
),
|
||||
"ENABLE_THINKING": (
|
||||
"Rename it to MODEL_ENABLE_THINKING for fallback behavior, or "
|
||||
"use OPUS_ENABLE_THINKING, SONNET_ENABLE_THINKING, and "
|
||||
"HAIKU_ENABLE_THINKING for family-specific control."
|
||||
),
|
||||
}
|
||||
|
||||
if removed_key in os.environ:
|
||||
return (
|
||||
f"{removed_key} has been removed in this release. "
|
||||
f"Rename it to {replacement}."
|
||||
)
|
||||
for removed_key, replacement in removed_vars.items():
|
||||
if removed_key in os.environ:
|
||||
return f"{removed_key} has been removed in this release. {replacement}"
|
||||
|
||||
for env_file in _configured_env_files(model_config):
|
||||
if _env_file_contains_key(env_file, removed_key):
|
||||
return (
|
||||
f"{removed_key} has been removed in this release. "
|
||||
f"Rename it to {replacement}. Found in {env_file}."
|
||||
)
|
||||
for env_file in _configured_env_files(model_config):
|
||||
if _env_file_contains_key(env_file, removed_key):
|
||||
return (
|
||||
f"{removed_key} has been removed in this release. "
|
||||
f"{replacement} Found in {env_file}."
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
@@ -130,7 +138,18 @@ class Settings(BaseSettings):
|
||||
provider_max_concurrency: int = Field(
|
||||
default=5, validation_alias="PROVIDER_MAX_CONCURRENCY"
|
||||
)
|
||||
enable_thinking: bool = Field(default=True, validation_alias="ENABLE_THINKING")
|
||||
opus_enable_thinking: bool = Field(
|
||||
default=True, validation_alias="OPUS_ENABLE_THINKING"
|
||||
)
|
||||
sonnet_enable_thinking: bool = Field(
|
||||
default=True, validation_alias="SONNET_ENABLE_THINKING"
|
||||
)
|
||||
haiku_enable_thinking: bool = Field(
|
||||
default=True, validation_alias="HAIKU_ENABLE_THINKING"
|
||||
)
|
||||
model_enable_thinking: bool = Field(
|
||||
default=True, validation_alias="MODEL_ENABLE_THINKING"
|
||||
)
|
||||
|
||||
# ==================== HTTP Client Timeouts ====================
|
||||
http_read_timeout: float = Field(
|
||||
@@ -287,6 +306,17 @@ class Settings(BaseSettings):
|
||||
return self.model_sonnet
|
||||
return self.model
|
||||
|
||||
def resolve_thinking_enabled(self, claude_model_name: str) -> bool:
|
||||
"""Resolve the configured thinking flag for a Claude model family."""
|
||||
name_lower = claude_model_name.lower()
|
||||
if "opus" in name_lower:
|
||||
return self.opus_enable_thinking
|
||||
if "haiku" in name_lower:
|
||||
return self.haiku_enable_thinking
|
||||
if "sonnet" in name_lower:
|
||||
return self.sonnet_enable_thinking
|
||||
return self.model_enable_thinking
|
||||
|
||||
@staticmethod
|
||||
def parse_provider_type(model_string: str) -> str:
|
||||
"""Extract provider type from any 'provider/model' string."""
|
||||
|
||||
+20
-2
@@ -22,7 +22,10 @@ class ProviderConfig(BaseModel):
|
||||
http_read_timeout: float = 300.0
|
||||
http_write_timeout: float = 10.0
|
||||
http_connect_timeout: float = 2.0
|
||||
enable_thinking: bool = True
|
||||
opus_enable_thinking: bool = True
|
||||
sonnet_enable_thinking: bool = True
|
||||
haiku_enable_thinking: bool = True
|
||||
model_enable_thinking: bool = True
|
||||
proxy: str = ""
|
||||
|
||||
|
||||
@@ -40,7 +43,22 @@ class BaseProvider(ABC):
|
||||
if thinking is not None and hasattr(thinking, "enabled")
|
||||
else True
|
||||
)
|
||||
return self._config.enable_thinking and request_enabled
|
||||
request_model = getattr(request, "original_model", None) or getattr(
|
||||
request, "model", ""
|
||||
)
|
||||
model_enabled = self._resolve_model_thinking_enabled(str(request_model))
|
||||
return model_enabled and request_enabled
|
||||
|
||||
def _resolve_model_thinking_enabled(self, model: str) -> bool:
|
||||
"""Return the configured thinking flag for a Claude model family."""
|
||||
model_lower = model.lower()
|
||||
if "opus" in model_lower:
|
||||
return self._config.opus_enable_thinking
|
||||
if "haiku" in model_lower:
|
||||
return self._config.haiku_enable_thinking
|
||||
if "sonnet" in model_lower:
|
||||
return self._config.sonnet_enable_thinking
|
||||
return self._config.model_enable_thinking
|
||||
|
||||
@abstractmethod
|
||||
async def cleanup(self) -> None:
|
||||
|
||||
@@ -32,7 +32,10 @@ def _make_mock_settings(**overrides):
|
||||
mock.http_read_timeout = 300.0
|
||||
mock.http_write_timeout = 10.0
|
||||
mock.http_connect_timeout = 2.0
|
||||
mock.enable_thinking = True
|
||||
mock.opus_enable_thinking = True
|
||||
mock.sonnet_enable_thinking = True
|
||||
mock.haiku_enable_thinking = True
|
||||
mock.model_enable_thinking = True
|
||||
for key, value in overrides.items():
|
||||
setattr(mock, key, value)
|
||||
return mock
|
||||
@@ -134,7 +137,7 @@ async def test_get_provider_deepseek():
|
||||
assert isinstance(provider, DeepSeekProvider)
|
||||
assert provider._base_url == "https://api.deepseek.com"
|
||||
assert provider._api_key == "test_deepseek_key"
|
||||
assert provider._config.enable_thinking is True
|
||||
assert provider._config.model_enable_thinking is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -152,18 +155,38 @@ async def test_get_provider_deepseek_uses_fixed_base_url():
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_provider_deepseek_passes_enable_thinking():
|
||||
"""DeepSeek provider receives the global thinking toggle."""
|
||||
async def test_get_provider_deepseek_passes_model_enable_thinking():
|
||||
"""DeepSeek provider receives the fallback thinking toggle."""
|
||||
with patch("api.dependencies.get_settings") as mock_settings:
|
||||
mock_settings.return_value = _make_mock_settings(
|
||||
provider_type="deepseek",
|
||||
enable_thinking=False,
|
||||
model_enable_thinking=False,
|
||||
)
|
||||
|
||||
provider = get_provider()
|
||||
|
||||
assert isinstance(provider, DeepSeekProvider)
|
||||
assert provider._config.enable_thinking is False
|
||||
assert provider._config.model_enable_thinking is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_provider_passes_per_model_thinking_flags():
|
||||
"""Provider config receives every per-model thinking toggle."""
|
||||
with patch("api.dependencies.get_settings") as mock_settings:
|
||||
mock_settings.return_value = _make_mock_settings(
|
||||
opus_enable_thinking=False,
|
||||
sonnet_enable_thinking=True,
|
||||
haiku_enable_thinking=False,
|
||||
model_enable_thinking=True,
|
||||
)
|
||||
|
||||
provider = get_provider()
|
||||
|
||||
assert isinstance(provider, NvidiaNimProvider)
|
||||
assert provider._config.opus_enable_thinking is False
|
||||
assert provider._config.sonnet_enable_thinking is True
|
||||
assert provider._config.haiku_enable_thinking is False
|
||||
assert provider._config.model_enable_thinking is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -29,7 +29,10 @@ class TestSettings:
|
||||
assert isinstance(settings.provider_rate_window, int)
|
||||
assert isinstance(settings.nim.temperature, float)
|
||||
assert isinstance(settings.fast_prefix_detection, bool)
|
||||
assert isinstance(settings.enable_thinking, bool)
|
||||
assert isinstance(settings.opus_enable_thinking, bool)
|
||||
assert isinstance(settings.sonnet_enable_thinking, bool)
|
||||
assert isinstance(settings.haiku_enable_thinking, bool)
|
||||
assert isinstance(settings.model_enable_thinking, bool)
|
||||
assert settings.http_read_timeout == 120.0
|
||||
|
||||
def test_get_settings_cached(self):
|
||||
@@ -110,20 +113,34 @@ class TestSettings:
|
||||
settings = Settings()
|
||||
assert settings.http_connect_timeout == 5.0
|
||||
|
||||
def test_enable_thinking_from_env(self, monkeypatch):
|
||||
"""ENABLE_THINKING env var is loaded into settings."""
|
||||
def test_per_model_thinking_from_env(self, monkeypatch):
|
||||
"""Per-model thinking env vars are loaded into settings."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("ENABLE_THINKING", "false")
|
||||
monkeypatch.setenv("OPUS_ENABLE_THINKING", "false")
|
||||
monkeypatch.setenv("SONNET_ENABLE_THINKING", "true")
|
||||
monkeypatch.setenv("HAIKU_ENABLE_THINKING", "false")
|
||||
monkeypatch.setenv("MODEL_ENABLE_THINKING", "true")
|
||||
settings = Settings()
|
||||
assert settings.enable_thinking is False
|
||||
assert settings.opus_enable_thinking is False
|
||||
assert settings.sonnet_enable_thinking is True
|
||||
assert settings.haiku_enable_thinking is False
|
||||
assert settings.model_enable_thinking is True
|
||||
|
||||
def test_removed_nim_enable_thinking_raises(self, monkeypatch):
|
||||
"""NIM_ENABLE_THINKING now fails fast with a migration message."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("NIM_ENABLE_THINKING", "false")
|
||||
with pytest.raises(ValidationError, match="Rename it to ENABLE_THINKING"):
|
||||
with pytest.raises(ValidationError, match="MODEL_ENABLE_THINKING"):
|
||||
Settings()
|
||||
|
||||
def test_removed_enable_thinking_raises(self, monkeypatch):
|
||||
"""ENABLE_THINKING now fails fast with a migration message."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("ENABLE_THINKING", "false")
|
||||
with pytest.raises(ValidationError, match="MODEL_ENABLE_THINKING"):
|
||||
Settings()
|
||||
|
||||
|
||||
@@ -494,6 +511,29 @@ class TestPerModelMapping:
|
||||
s.model_opus = "open_router/opus-model"
|
||||
assert s.resolve_model("Claude-OPUS-4") == "open_router/opus-model"
|
||||
|
||||
def test_resolve_thinking_enabled_per_model_family(self):
|
||||
"""resolve_thinking_enabled returns the matching model-family flag."""
|
||||
from config.settings import Settings
|
||||
|
||||
s = Settings()
|
||||
s.opus_enable_thinking = False
|
||||
s.sonnet_enable_thinking = True
|
||||
s.haiku_enable_thinking = False
|
||||
s.model_enable_thinking = True
|
||||
|
||||
assert s.resolve_thinking_enabled("claude-opus-4-20250514") is False
|
||||
assert s.resolve_thinking_enabled("claude-sonnet-4-20250514") is True
|
||||
assert s.resolve_thinking_enabled("claude-haiku-4-20250514") is False
|
||||
assert s.resolve_thinking_enabled("claude-2.1") is True
|
||||
|
||||
def test_resolve_thinking_enabled_case_insensitive(self):
|
||||
"""Thinking model-family classification is case-insensitive."""
|
||||
from config.settings import Settings
|
||||
|
||||
s = Settings()
|
||||
s.opus_enable_thinking = False
|
||||
assert s.resolve_thinking_enabled("Claude-OPUS-4") is False
|
||||
|
||||
def test_parse_provider_type(self):
|
||||
"""parse_provider_type extracts provider from model string."""
|
||||
from config.settings import Settings
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
from providers.base import BaseProvider, ProviderConfig
|
||||
|
||||
|
||||
class DummyProvider(BaseProvider):
|
||||
async def cleanup(self) -> None:
|
||||
return None
|
||||
|
||||
async def stream_response(self, request, input_tokens=0, *, request_id=None):
|
||||
if False:
|
||||
yield ""
|
||||
|
||||
|
||||
class DummyThinking:
|
||||
def __init__(self, enabled: bool):
|
||||
self.enabled = enabled
|
||||
|
||||
|
||||
class DummyRequest:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: str,
|
||||
original_model: str | None = None,
|
||||
thinking: DummyThinking | None = None,
|
||||
):
|
||||
self.model = model
|
||||
self.original_model = original_model
|
||||
self.thinking = thinking
|
||||
|
||||
|
||||
def test_is_thinking_enabled_uses_original_model_family():
|
||||
provider = DummyProvider(
|
||||
ProviderConfig(
|
||||
api_key="test",
|
||||
opus_enable_thinking=False,
|
||||
model_enable_thinking=True,
|
||||
)
|
||||
)
|
||||
request = DummyRequest(
|
||||
model="provider-model",
|
||||
original_model="claude-opus-4-20250514",
|
||||
thinking=DummyThinking(True),
|
||||
)
|
||||
|
||||
assert provider._is_thinking_enabled(request) is False
|
||||
|
||||
|
||||
def test_is_thinking_enabled_falls_back_to_request_model():
|
||||
provider = DummyProvider(
|
||||
ProviderConfig(
|
||||
api_key="test",
|
||||
sonnet_enable_thinking=False,
|
||||
model_enable_thinking=True,
|
||||
)
|
||||
)
|
||||
request = DummyRequest(model="claude-sonnet-4-20250514")
|
||||
|
||||
assert provider._is_thinking_enabled(request) is False
|
||||
|
||||
|
||||
def test_is_thinking_enabled_unknown_model_uses_fallback_flag():
|
||||
provider = DummyProvider(
|
||||
ProviderConfig(api_key="test", model_enable_thinking=False)
|
||||
)
|
||||
request = DummyRequest(model="provider-model")
|
||||
|
||||
assert provider._is_thinking_enabled(request) is False
|
||||
|
||||
|
||||
def test_is_thinking_enabled_respects_request_disable():
|
||||
provider = DummyProvider(ProviderConfig(api_key="test", opus_enable_thinking=True))
|
||||
request = DummyRequest(
|
||||
model="provider-model",
|
||||
original_model="claude-opus-4-20250514",
|
||||
thinking=DummyThinking(False),
|
||||
)
|
||||
|
||||
assert provider._is_thinking_enabled(request) is False
|
||||
@@ -44,7 +44,6 @@ def deepseek_config():
|
||||
base_url=DEEPSEEK_BASE_URL,
|
||||
rate_limit=10,
|
||||
rate_window=60,
|
||||
enable_thinking=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -86,15 +85,15 @@ def test_build_request_body_enables_thinking_for_chat_model(deepseek_provider):
|
||||
assert body["messages"][0]["role"] == "system"
|
||||
|
||||
|
||||
def test_build_request_body_global_disable_blocks_request_thinking():
|
||||
"""Global disable suppresses provider-side thinking even if the request enables it."""
|
||||
def test_build_request_body_model_disable_blocks_request_thinking():
|
||||
"""Model disable suppresses provider-side thinking even if the request enables it."""
|
||||
provider = DeepSeekProvider(
|
||||
ProviderConfig(
|
||||
api_key="test_deepseek_key",
|
||||
base_url=DEEPSEEK_BASE_URL,
|
||||
rate_limit=10,
|
||||
rate_window=60,
|
||||
enable_thinking=False,
|
||||
model_enable_thinking=False,
|
||||
)
|
||||
)
|
||||
req = MockRequest(model="deepseek-chat")
|
||||
@@ -103,8 +102,8 @@ def test_build_request_body_global_disable_blocks_request_thinking():
|
||||
assert "extra_body" not in body or "thinking" not in body["extra_body"]
|
||||
|
||||
|
||||
def test_build_request_body_request_disable_blocks_global_thinking(deepseek_provider):
|
||||
"""Request-level disable suppresses provider-side thinking when global is enabled."""
|
||||
def test_build_request_body_request_disable_blocks_model_thinking(deepseek_provider):
|
||||
"""Request-level disable suppresses provider-side thinking when model is enabled."""
|
||||
req = MockRequest(model="deepseek-chat")
|
||||
req.thinking.enabled = False
|
||||
body = deepseek_provider._build_request_body(req)
|
||||
|
||||
@@ -111,9 +111,9 @@ def test_init_base_url_strips_trailing_slash():
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response_omits_thinking_when_globally_disabled(llamacpp_config):
|
||||
async def test_stream_response_omits_thinking_when_model_disabled(llamacpp_config):
|
||||
provider = LlamaCppProvider(
|
||||
llamacpp_config.model_copy(update={"enable_thinking": False})
|
||||
llamacpp_config.model_copy(update={"model_enable_thinking": False})
|
||||
)
|
||||
req = MockRequest()
|
||||
|
||||
|
||||
@@ -111,9 +111,9 @@ def test_init_base_url_strips_trailing_slash():
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response_omits_thinking_when_globally_disabled(lmstudio_config):
|
||||
async def test_stream_response_omits_thinking_when_model_disabled(lmstudio_config):
|
||||
provider = LMStudioProvider(
|
||||
lmstudio_config.model_copy(update={"enable_thinking": False})
|
||||
lmstudio_config.model_copy(update={"model_enable_thinking": False})
|
||||
)
|
||||
req = MockRequest()
|
||||
|
||||
|
||||
@@ -121,13 +121,13 @@ async def test_build_request_body(provider_config):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_build_request_body_omits_reasoning_when_globally_disabled(
|
||||
async def test_build_request_body_omits_reasoning_when_model_disabled(
|
||||
provider_config,
|
||||
):
|
||||
from config.nim import NimSettings
|
||||
|
||||
provider = NvidiaNimProvider(
|
||||
provider_config.model_copy(update={"enable_thinking": False}),
|
||||
provider_config.model_copy(update={"model_enable_thinking": False}),
|
||||
nim_settings=NimSettings(),
|
||||
)
|
||||
req = MockRequest()
|
||||
@@ -244,7 +244,7 @@ async def test_stream_response_suppresses_thinking_when_disabled(provider_config
|
||||
from config.nim import NimSettings
|
||||
|
||||
provider = NvidiaNimProvider(
|
||||
provider_config.model_copy(update={"enable_thinking": False}),
|
||||
provider_config.model_copy(update={"model_enable_thinking": False}),
|
||||
nim_settings=NimSettings(),
|
||||
)
|
||||
req = MockRequest()
|
||||
|
||||
@@ -105,9 +105,9 @@ def test_build_request_body_has_reasoning_extra(open_router_provider):
|
||||
assert body["extra_body"]["reasoning"]["enabled"] is True
|
||||
|
||||
|
||||
def test_build_request_body_omits_reasoning_when_globally_disabled(open_router_config):
|
||||
def test_build_request_body_omits_reasoning_when_model_disabled(open_router_config):
|
||||
provider = OpenRouterProvider(
|
||||
open_router_config.model_copy(update={"enable_thinking": False})
|
||||
open_router_config.model_copy(update={"model_enable_thinking": False})
|
||||
)
|
||||
req = MockRequest()
|
||||
body = provider._build_request_body(req)
|
||||
@@ -228,7 +228,7 @@ async def test_stream_response_reasoning_content(open_router_provider):
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response_suppresses_reasoning_when_disabled(open_router_config):
|
||||
provider = OpenRouterProvider(
|
||||
open_router_config.model_copy(update={"enable_thinking": False})
|
||||
open_router_config.model_copy(update={"model_enable_thinking": False})
|
||||
)
|
||||
req = MockRequest()
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ def _make_provider_with_thinking_enabled(enabled: bool):
|
||||
base_url="https://test.api.nvidia.com/v1",
|
||||
rate_limit=10,
|
||||
rate_window=60,
|
||||
enable_thinking=enabled,
|
||||
model_enable_thinking=enabled,
|
||||
)
|
||||
return NvidiaNimProvider(config, nim_settings=NimSettings())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user