diff --git a/README.md b/README.md index fa3976b..11e19ff 100644 --- a/README.md +++ b/README.md @@ -305,6 +305,8 @@ Claude Code 2.1.126 or later reads this proxy's `/v1/models` endpoint when `ANTH The proxy lists models for configured provider keys and referenced local providers. Picker-safe IDs are routed back to the real provider/model automatically, so no `.env` edit or separate launcher script is needed after startup. +Each provider model also has a `(no thinking)` picker variant. Use it when a model does not support Claude Code thinking or fails with adaptive-thinking requests. It routes to the same upstream model while asking Claude Code to send a non-thinking request. + ## Optional Integrations ### Discord And Telegram Bots diff --git a/api/gateway_model_ids.py b/api/gateway_model_ids.py new file mode 100644 index 0000000..bb0564a --- /dev/null +++ b/api/gateway_model_ids.py @@ -0,0 +1,54 @@ +"""Gateway-safe model id encoding for Claude Code model discovery.""" + +from __future__ import annotations + +from dataclasses import dataclass + +GATEWAY_MODEL_ID_PREFIX = "anthropic" + +# Claude Code currently treats any model id containing ``claude-3-`` as not +# supporting thinking. This intentionally uses that client-side capability +# heuristic while keeping the real provider/model ref reversible for routing. +NO_THINKING_GATEWAY_MODEL_ID_PREFIX = "claude-3-freecc-no-thinking" + + +@dataclass(frozen=True, slots=True) +class DecodedGatewayModelId: + provider_id: str + provider_model: str + force_thinking_enabled: bool | None = None + + +def gateway_model_id(provider_model_ref: str) -> str: + """Return the normal Claude Code-discoverable id for a provider/model ref.""" + return f"{GATEWAY_MODEL_ID_PREFIX}/{provider_model_ref}" + + +def no_thinking_gateway_model_id(provider_model_ref: str) -> str: + """Return a Claude Code-discoverable id that disables client thinking.""" + return f"{NO_THINKING_GATEWAY_MODEL_ID_PREFIX}/{provider_model_ref}" + + +def decode_gateway_model_id(model_name: str) -> DecodedGatewayModelId | None: + """Decode a model id advertised by this gateway, if it is one.""" + prefix, separator, remainder = model_name.partition("/") + if not separator: + return None + + force_thinking_enabled: bool | None + if prefix == GATEWAY_MODEL_ID_PREFIX: + force_thinking_enabled = None + elif prefix == NO_THINKING_GATEWAY_MODEL_ID_PREFIX: + force_thinking_enabled = False + else: + return None + + provider_id, provider_separator, provider_model = remainder.partition("/") + if not provider_separator or not provider_model: + return None + + return DecodedGatewayModelId( + provider_id=provider_id, + provider_model=provider_model, + force_thinking_enabled=force_thinking_enabled, + ) diff --git a/api/model_router.py b/api/model_router.py index c36a400..687407f 100644 --- a/api/model_router.py +++ b/api/model_router.py @@ -9,10 +9,9 @@ from loguru import logger from config.provider_ids import SUPPORTED_PROVIDER_IDS from config.settings import Settings +from .gateway_model_ids import decode_gateway_model_id from .models.anthropic import MessagesRequest, TokenCountRequest -GATEWAY_MODEL_ID_PREFIX = "anthropic" - @dataclass(frozen=True, slots=True) class ResolvedModel: @@ -42,16 +41,23 @@ class ModelRouter: self._settings = settings def resolve(self, claude_model_name: str) -> ResolvedModel: - direct_provider_id, direct_provider_model = self._direct_provider_model( - claude_model_name - ) + ( + direct_provider_id, + direct_provider_model, + force_thinking_enabled, + ) = self._direct_provider_model(claude_model_name) if direct_provider_id is not None and direct_provider_model is not None: - thinking_enabled = self._settings.resolve_thinking(direct_provider_model) + thinking_enabled = ( + force_thinking_enabled + if force_thinking_enabled is not None + else self._settings.resolve_thinking(direct_provider_model) + ) logger.debug( - "MODEL DIRECT: '{}' -> provider='{}' model='{}'", + "MODEL DIRECT: '{}' -> provider='{}' model='{}' thinking={}", claude_model_name, direct_provider_id, direct_provider_model, + thinking_enabled, ) return ResolvedModel( original_model=claude_model_name, @@ -77,29 +83,27 @@ class ModelRouter: thinking_enabled=thinking_enabled, ) - def _direct_provider_model(self, model_name: str) -> tuple[str | None, str | None]: - provider_id, separator, provider_model = model_name.partition("/") - if not separator: - return None, None - if provider_id == GATEWAY_MODEL_ID_PREFIX: - return self._gateway_encoded_provider_model(provider_model) - if provider_id not in SUPPORTED_PROVIDER_IDS: - return None, None - if not provider_model: - return None, None - return provider_id, provider_model - - def _gateway_encoded_provider_model( + def _direct_provider_model( self, model_name: str - ) -> tuple[str | None, str | None]: + ) -> tuple[str | None, str | None, bool | None]: + decoded = decode_gateway_model_id(model_name) + if decoded is not None: + if decoded.provider_id not in SUPPORTED_PROVIDER_IDS: + return None, None, None + return ( + decoded.provider_id, + decoded.provider_model, + decoded.force_thinking_enabled, + ) + provider_id, separator, provider_model = model_name.partition("/") if not separator: - return None, None + return None, None, None if provider_id not in SUPPORTED_PROVIDER_IDS: - return None, None + return None, None, None if not provider_model: - return None, None - return provider_id, provider_model + return None, None, None + return provider_id, provider_model, None def resolve_messages_request( self, request: MessagesRequest diff --git a/api/routes.py b/api/routes.py index fe72b01..1f81a51 100644 --- a/api/routes.py +++ b/api/routes.py @@ -9,6 +9,7 @@ from providers.registry import ProviderRegistry from . import dependencies from .dependencies import get_settings, require_api_key +from .gateway_model_ids import gateway_model_id, no_thinking_gateway_model_id from .models.anthropic import MessagesRequest, TokenCountRequest from .models.responses import ModelResponse, ModelsListResponse from .services import ClaudeProxyService @@ -16,7 +17,6 @@ from .services import ClaudeProxyService router = APIRouter() DISCOVERED_MODEL_CREATED_AT = "1970-01-01T00:00:00Z" -GATEWAY_MODEL_ID_PREFIX = "anthropic" SUPPORTED_CLAUDE_MODELS = [ @@ -77,10 +77,6 @@ def _probe_response(allow: str) -> Response: return Response(status_code=204, headers={"Allow": allow}) -def _gateway_model_id(provider_model_ref: str) -> str: - return f"{GATEWAY_MODEL_ID_PREFIX}/{provider_model_ref}" - - def _discovered_model_response(model_id: str, *, display_name: str) -> ModelResponse: return ModelResponse( id=model_id, @@ -98,6 +94,27 @@ def _append_unique_model( models.append(model) +def _append_provider_model_variants( + models: list[ModelResponse], seen: set[str], provider_model_ref: str +) -> None: + _append_unique_model( + models, + seen, + _discovered_model_response( + gateway_model_id(provider_model_ref), + display_name=provider_model_ref, + ), + ) + _append_unique_model( + models, + seen, + _discovered_model_response( + no_thinking_gateway_model_id(provider_model_ref), + display_name=f"{provider_model_ref} (no thinking)", + ), + ) + + def _build_models_list_response( settings: Settings, provider_registry: ProviderRegistry | None ) -> ModelsListResponse: @@ -105,23 +122,11 @@ def _build_models_list_response( seen: set[str] = set() for ref in settings.configured_chat_model_refs(): - _append_unique_model( - models, - seen, - _discovered_model_response( - _gateway_model_id(ref.model_ref), display_name=ref.model_ref - ), - ) + _append_provider_model_variants(models, seen, ref.model_ref) if provider_registry is not None: for model_ref in provider_registry.cached_prefixed_model_refs(): - _append_unique_model( - models, - seen, - _discovered_model_response( - _gateway_model_id(model_ref), display_name=model_ref - ), - ) + _append_provider_model_variants(models, seen, model_ref) for model in SUPPORTED_CLAUDE_MODELS: _append_unique_model(models, seen, model) diff --git a/tests/api/test_model_listing.py b/tests/api/test_model_listing.py index 7e11194..40f6f15 100644 --- a/tests/api/test_model_listing.py +++ b/tests/api/test_model_listing.py @@ -34,18 +34,29 @@ def test_models_list_includes_configured_refs_cached_provider_models_and_aliases data = response.json() ids = [item["id"] for item in data["data"]] - assert ids[:3] == [ + assert ids[:6] == [ "anthropic/deepseek/deepseek-chat", + "claude-3-freecc-no-thinking/deepseek/deepseek-chat", "anthropic/open_router/anthropic/claude-opus", + "claude-3-freecc-no-thinking/open_router/anthropic/claude-opus", "anthropic/open_router/meta/llama-3.3", + "claude-3-freecc-no-thinking/open_router/meta/llama-3.3", ] assert ids.count("anthropic/deepseek/deepseek-chat") == 1 + assert ids.count("claude-3-freecc-no-thinking/deepseek/deepseek-chat") == 1 assert ids.count("anthropic/open_router/anthropic/claude-opus") == 1 + assert ( + ids.count("claude-3-freecc-no-thinking/open_router/anthropic/claude-opus") == 1 + ) display_names = {item["id"]: item["display_name"] for item in data["data"]} assert ( display_names["anthropic/open_router/meta/llama-3.3"] == "open_router/meta/llama-3.3" ) + assert ( + display_names["claude-3-freecc-no-thinking/open_router/meta/llama-3.3"] + == "open_router/meta/llama-3.3 (no thinking)" + ) assert "claude-sonnet-4-20250514" in ids assert data["first_id"] == ids[0] assert data["last_id"] == ids[-1] @@ -64,8 +75,10 @@ def test_models_list_works_without_provider_registry(): assert response.status_code == 200 ids = [item["id"] for item in response.json()["data"]] - assert ids[:2] == [ + assert ids[:4] == [ "anthropic/deepseek/deepseek-chat", + "claude-3-freecc-no-thinking/deepseek/deepseek-chat", "anthropic/open_router/anthropic/claude-opus", + "claude-3-freecc-no-thinking/open_router/anthropic/claude-opus", ] assert "claude-sonnet-4-20250514" in ids diff --git a/tests/api/test_model_router.py b/tests/api/test_model_router.py index 9440c48..9152928 100644 --- a/tests/api/test_model_router.py +++ b/tests/api/test_model_router.py @@ -131,6 +131,27 @@ def test_model_router_routes_gateway_encoded_provider_model_directly(settings): ) +def test_model_router_routes_no_thinking_gateway_model_directly(settings): + settings.enable_model_thinking = True + + routed = ModelRouter(settings).resolve_messages_request( + MessagesRequest( + model="claude-3-freecc-no-thinking/nvidia_nim/deepseek-ai/deepseek-v4-pro", + max_tokens=100, + messages=[Message(role="user", content="hello")], + ) + ) + + assert routed.request.model == "deepseek-ai/deepseek-v4-pro" + assert ( + routed.resolved.original_model + == "claude-3-freecc-no-thinking/nvidia_nim/deepseek-ai/deepseek-v4-pro" + ) + assert routed.resolved.provider_id == "nvidia_nim" + assert routed.resolved.provider_model == "deepseek-ai/deepseek-v4-pro" + assert routed.resolved.thinking_enabled is False + + def test_model_router_direct_prefixed_model_uses_provider_model_for_thinking(settings): settings.enable_model_thinking = False settings.enable_opus_thinking = True