Fix live provider smoke defaults

Update live smoke model defaults for NIM, OpenRouter, and Gemini; normalize tool-call indexes; downgrade DeepSeek forced tool_choice; and add coverage for the provider smoke fixes.
2026-06-01 22:09:04 +02:00 · 2026-05-31 13:02:15 -07:00
parent e4d6dc1f94
commit d501e5223a
21 changed files with 187 additions and 40 deletions
@@ -127,7 +127,7 @@ Browse models at [build.nvidia.com](https://build.nvidia.com/explore/discover).

 Get a key at [openrouter.ai/keys](https://openrouter.ai/keys).

-In the Admin UI, paste it into `OPENROUTER_API_KEY`, then set `MODEL` to an OpenRouter slug such as `open_router/stepfun/step-3.5-flash:free`.
+In the Admin UI, paste it into `OPENROUTER_API_KEY`, then set `MODEL` to an OpenRouter slug such as `open_router/openrouter/free`.

 Browse [all models](https://openrouter.ai/models) or [free models](https://openrouter.ai/collections/free-models).

@@ -135,14 +135,13 @@ Browse [all models](https://openrouter.ai/models) or [free models](https://openr

 Get a Gemini API key at [Google AI Studio](https://aistudio.google.com/apikey) (see Google's [Gemini OpenAI compatibility](https://ai.google.dev/gemini-api/docs/openai) docs).

-In the Admin UI, paste it into `GEMINI_API_KEY`, then set `MODEL` to a Gemini model slug such as `gemini/gemini-2.5-flash` or `gemini/gemini-3.1-flash-lite`.
+In the Admin UI, paste it into `GEMINI_API_KEY`, then set `MODEL` to a Gemini model slug such as `gemini/models/gemini-3.1-flash-lite`.

 The Gemini API exposes an OpenAI-compatible endpoint at `https://generativelanguage.googleapis.com/v1beta/openai/`. Free tier quotas are per-model; prompts may be used to improve Google's products outside the UK/CH/EEA/EU unless your account region says otherwise—see Google's terms.

 Popular examples:

- `gemini/gemini-2.5-flash`
- `gemini/gemini-3.1-flash-lite`
+- `gemini/models/gemini-3.1-flash-lite`

 ### 4. [DeepSeek](https://platform.deepseek.com/)

@@ -305,7 +304,7 @@ In the Admin UI, keep or update `OLLAMA_BASE_URL`, then set `MODEL` to the same

 Each model tier can use a different provider by setting `MODEL_OPUS`, `MODEL_SONNET`, and `MODEL_HAIKU` in the Admin UI. Leave a tier blank to inherit `MODEL`.

-For example, you can route Opus to `nvidia_nim/moonshotai/kimi-k2.5`, Sonnet to `open_router/deepseek/deepseek-r1-0528:free`, Haiku to `lmstudio/unsloth/GLM-4.7-Flash-GGUF`, and keep the fallback `MODEL` on `zai/glm-5.1`.
+For example, you can route Opus to `nvidia_nim/moonshotai/kimi-k2.6`, Sonnet to `open_router/openrouter/free`, Haiku to `lmstudio/qwen3.5-coder`, and keep the fallback `MODEL` on `zai/glm-5.1`.

 ## Connect Claude Code

@@ -412,7 +412,7 @@ FIELDS: tuple[ConfigFieldSpec, ...] = (
        "Default Model",
        "models",
        settings_attr="model",
-        default="nvidia_nim/z-ai/glm4.7",
+        default="nvidia_nim/nvidia/nemotron-3-super-120b-a12b",
        description="Fallback provider/model route for all Claude model names.",
    ),
    ConfigFieldSpec(
@@ -1,7 +1,9 @@
 """FastAPI application factory and configuration."""

+import os
 import traceback
 from contextlib import asynccontextmanager
+from pathlib import Path
 from typing import Any

 from fastapi import FastAPI, Request
@@ -85,9 +87,8 @@ class GracefulLifespanApp:
 def create_app(*, lifespan_enabled: bool = True) -> FastAPI:
    """Create and configure the FastAPI application."""
    settings = get_settings()
-    configure_logging(
-        server_log_path(), verbose_third_party=settings.log_raw_api_payloads
-    )
+    log_path = Path(os.getenv("LOG_FILE", server_log_path()))
+    configure_logging(log_path, verbose_third_party=settings.log_raw_api_payloads)

    app_kwargs: dict[str, Any] = {
        "title": "Claude Code Proxy",
@@ -180,7 +180,7 @@ class Settings(BaseSettings):
    # ==================== Model ====================
    # All Claude model requests are mapped to this single model (fallback)
    # Format: provider_type/model/name
-    model: str = "nvidia_nim/z-ai/glm4.7"
+    model: str = "nvidia_nim/nvidia/nemotron-3-super-120b-a12b"

    # Per-model overrides (optional, falls back to MODEL)
    # Each can use a different provider
@@ -395,6 +395,7 @@ def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
        data["messages"] = _strip_unsupported_attachment_blocks(data["messages"])
    _validate_deepseek_native_request_dict(data)
    data.pop("extra_body", None)
+    _downgrade_forced_tool_choice(data)

    has_tool_history = _has_tool_history(data)
    has_replayable_tool_thinking = _has_replayable_tool_thinking(data)
@@ -456,3 +457,19 @@ def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
        len(data.get("tools", [])),
    )
    return data
+
+
+def _downgrade_forced_tool_choice(data: dict[str, Any]) -> None:
+    tool_choice = data.get("tool_choice")
+    if not isinstance(tool_choice, dict):
+        return
+    if tool_choice.get("type") != "tool" or not isinstance(
+        tool_choice.get("name"), str
+    ):
+        return
+    logger.debug(
+        "DEEPSEEK_REQUEST: downgrading forced tool_choice to auto for unsupported "
+        "native request shape tool={}",
+        tool_choice["name"],
+    )
+    data["tool_choice"] = {"type": "auto"}
@@ -234,7 +234,8 @@ class OpenAIChatTransport(BaseProvider):
        tool_argument_alias_buffers: dict[int, str] | None = None,
    ) -> Iterator[str]:
        """Process a single tool call delta and yield SSE events."""
-        tc_index = tc.get("index", 0)
+        raw_index = tc.get("index", 0)
+        tc_index = raw_index if isinstance(raw_index, int) else 0
        if tc_index < 0:
            tc_index = len(sse.blocks.tool_states)

@@ -31,6 +31,8 @@ _UPSTREAM_UNAVAILABLE_MARKERS = (
    "overloaded",
    "capacity",
    "upstream provider",
+    "provider api request failed",
+    "httpstatuserror",
 )
 _HTTP_429_PATTERNS = (
    r'HTTP/1\.[01]" 429\b',
@@ -694,7 +696,15 @@ def _has_proxy_regression(log_delta: str) -> bool:


 def _has_proxy_request(log_delta: str) -> bool:
-    return "POST /v1/messages" in log_delta or "API_REQUEST:" in log_delta
+    return (
+        "POST /v1/messages" in log_delta
+        or "API_REQUEST:" in log_delta
+        or '"event": "api.request.received"' in log_delta
+        or (
+            '"http_method": "POST"' in log_delta
+            and '"http_path": "/v1/messages"' in log_delta
+        )
+    )


 def _tool_catalog_has(log_delta: str, tool_name: str) -> bool:
@@ -755,7 +765,8 @@ def _has_upstream_unavailable_text(text: str) -> bool:
 def _request_count(log_delta: str) -> int:
    access_log_count = log_delta.count("POST /v1/messages")
    service_log_count = log_delta.count("API_REQUEST:")
-    return max(access_log_count, service_log_count)
+    structured_log_count = log_delta.count('"event": "api.request.received"')
+    return max(access_log_count, service_log_count, structured_log_count)


 def _marker(scope: str, prefix: str) -> str:
@@ -42,8 +42,8 @@ TARGET_ALIASES = {
 SECRET_KEY_PARTS = ("KEY", "TOKEN", "SECRET", "WEBHOOK", "AUTH")

 PROVIDER_SMOKE_DEFAULT_MODELS: dict[str, str] = {
-    "nvidia_nim": "nvidia_nim/z-ai/glm4.7",
-    "open_router": "open_router/stepfun/step-3.5-flash:free",
+    "nvidia_nim": "nvidia_nim/nvidia/nemotron-3-super-120b-a12b",
+    "open_router": "open_router/openrouter/free",
    "mistral": "mistral/devstral-small-latest",
    "mistral_codestral": "mistral_codestral/codestral-latest",
    "deepseek": "deepseek/deepseek-v4-pro",
@@ -54,7 +54,7 @@ PROVIDER_SMOKE_DEFAULT_MODELS: dict[str, str] = {
    "opencode": "opencode/gpt-5.3-codex",
    "opencode_go": "opencode_go/minimax-m2.7",
    "zai": "zai/glm-5.1",
-    "gemini": "gemini/gemini-2.5-flash",
+    "gemini": "gemini/models/gemini-3.1-flash-lite",
    "groq": "groq/llama-3.3-70b-versatile",
    "cerebras": "cerebras/llama3.1-8b",
 }
@@ -31,7 +31,7 @@ def test_fcc_init_scaffolds_user_config(
        check=False,
    )
    assert result.returncode == 0, result.stderr or result.stdout
-    assert (tmp_path / ".config" / "free-claude-code" / ".env").is_file()
+    assert (tmp_path / ".fcc" / ".env").is_file()


 def test_free_claude_code_entrypoint_starts_server(smoke_config: SmokeConfig) -> None:
@@ -9,7 +9,10 @@ from core.anthropic.stream_contracts import (
 from smoke.lib.config import SmokeConfig
 from smoke.lib.http import collect_message_stream, message_payload
 from smoke.lib.server import start_server
-from smoke.lib.skips import skip_if_upstream_unavailable_events
+from smoke.lib.skips import (
+    skip_if_upstream_unavailable_events,
+    skip_if_upstream_unavailable_exception,
+)

 pytestmark = [pytest.mark.live, pytest.mark.smoke_target("tools")]

@@ -50,7 +53,11 @@ def test_live_tool_use_when_configured_model_supports_tools(
        },
        name="tools",
    ) as server:
-        events = collect_message_stream(server, payload, smoke_config)
+        try:
+            events = collect_message_stream(server, payload, smoke_config)
+        except Exception as exc:
+            skip_if_upstream_unavailable_exception(exc)
+            raise
    skip_if_upstream_unavailable_events(events)
    assert_anthropic_stream_contract(events)
    assert has_tool_use(events), "model did not emit a tool_use block"
@@ -30,7 +30,7 @@ def test_entrypoint_init_e2e(smoke_config: SmokeConfig, tmp_path: Path) -> None:
        check=False,
    )
    assert result.returncode == 0, result.stderr or result.stdout
-    env_file = tmp_path / ".config" / "free-claude-code" / ".env"
+    env_file = tmp_path / ".fcc" / ".env"
    assert env_file.is_file()
    assert env_file.read_text(encoding="utf-8").strip()

@@ -91,8 +91,9 @@ async def test_cli_session_stop_kills_child_e2e(tmp_path: Path) -> None:
    process.wait = AsyncMock(side_effect=[asyncio.TimeoutError, 0])
    session.process = process

-    stopped = await session.stop()
+    with patch("cli.session.kill_pid_tree_best_effort") as kill_tree:
+        stopped = await session.stop()

    assert stopped is True
-    process.terminate.assert_called_once()
+    kill_tree.assert_called_once_with(process.pid)
    process.kill.assert_called_once()
@@ -138,19 +138,28 @@ def test_proxy_timeout_config_e2e(smoke_config: SmokeConfig, tmp_path) -> None:

@pytest.mark.smoke_target("extensibility")
 def test_provider_registry_e2e() -> None:
-    settings = Settings(
-        open_router_api_key="openrouter-key",
-        deepseek_api_key="deepseek-key",
-        nvidia_nim_api_key="nim-key",
-        lm_studio_base_url="http://localhost:1234/v1",
-        llamacpp_base_url="http://localhost:8080/v1",
-    )
+    settings_kwargs: dict[str, str] = {}
+    for descriptor in PROVIDER_DESCRIPTORS.values():
+        if descriptor.credential_attr is not None:
+            settings_kwargs[_settings_init_key(descriptor.credential_attr)] = (
+                f"{descriptor.provider_id}-key"
+            )
+        if descriptor.base_url_attr is not None and descriptor.default_base_url:
+            settings_kwargs[_settings_init_key(descriptor.base_url_attr)] = (
+                descriptor.default_base_url
+            )
+    settings = Settings.model_validate(settings_kwargs)
    for descriptor in PROVIDER_DESCRIPTORS.values():
        config = build_provider_config(descriptor, settings)
        assert config.base_url
        assert config.api_key


+def _settings_init_key(field_name: str) -> str:
+    alias = Settings.model_fields[field_name].validation_alias
+    return alias if isinstance(alias, str) else field_name
+
+
@pytest.mark.smoke_target("extensibility")
 def test_platform_factory_e2e() -> None:
    assert create_messaging_platform("not-a-platform") is None
@@ -219,7 +219,7 @@ def test_admin_apply_writes_gemini_key_and_masks_preview(monkeypatch, tmp_path):
        "/admin/api/config/apply",
        json={
            "values": {
-                "MODEL": "gemini/gemini-2.5-flash",
+                "MODEL": "gemini/models/gemini-3.1-flash-lite",
                "GEMINI_API_KEY": "gm-secret",
            }
        },
@@ -231,7 +231,7 @@ def test_admin_apply_writes_gemini_key_and_masks_preview(monkeypatch, tmp_path):
    assert "GEMINI_API_KEY=********" in body["env_preview"]
    env_file = tmp_path / ".fcc" / ".env"
    text = env_file.read_text(encoding="utf-8")
-    assert "MODEL=gemini/gemini-2.5-flash" in text
+    assert "MODEL=gemini/models/gemini-3.1-flash-lite" in text
    assert "GEMINI_API_KEY=gm-secret" in text


@@ -644,3 +644,26 @@ def test_create_app_writes_server_log_under_fcc_home(monkeypatch, tmp_path):
    assert canonical_log.is_file()
    assert "canonical log path test" in canonical_log.read_text(encoding="utf-8")
    assert not (run_dir / "logs" / "server.log").exists()
+
+
+def test_create_app_honors_process_log_file_override(monkeypatch, tmp_path):
+    """Smoke subprocesses can redirect app logs without changing Settings."""
+    from loguru import logger
+
+    import config.logging_config as logging_config_mod
+    from api.app import create_app
+    from config.paths import server_log_path
+
+    custom_log = tmp_path / "smoke" / "server.log"
+    monkeypatch.setenv("HOME", str(tmp_path))
+    monkeypatch.setenv("USERPROFILE", str(tmp_path))
+    monkeypatch.setenv("LOG_FILE", str(custom_log))
+    monkeypatch.setattr(logging_config_mod, "_configured", False)
+
+    create_app(lifespan_enabled=False)
+    logger.info("process log path test")
+    logger.complete()
+
+    assert custom_log.is_file()
+    assert "process log path test" in custom_log.read_text(encoding="utf-8")
+    assert not server_log_path().exists()
@@ -32,7 +32,7 @@ class TestSettings:
        monkeypatch.delenv("HTTP_CONNECT_TIMEOUT", raising=False)
        monkeypatch.setitem(Settings.model_config, "env_file", ())
        settings = Settings()
-        assert settings.model == "nvidia_nim/z-ai/glm4.7"
+        assert settings.model == "nvidia_nim/nvidia/nemotron-3-super-120b-a12b"
        assert isinstance(settings.provider_rate_limit, int)
        assert isinstance(settings.provider_rate_window, int)
        assert isinstance(settings.nim.temperature, float)
@@ -770,7 +770,10 @@ class TestPerModelMapping:
        assert Settings.parse_provider_type("llamacpp/model") == "llamacpp"
        assert Settings.parse_provider_type("ollama/llama3.1") == "ollama"
        assert Settings.parse_provider_type("wafer/DeepSeek-V4-Pro") == "wafer"
-        assert Settings.parse_provider_type("gemini/gemini-2.5-flash") == "gemini"
+        assert (
+            Settings.parse_provider_type("gemini/models/gemini-3.1-flash-lite")
+            == "gemini"
+        )
        assert Settings.parse_provider_type("groq/llama-3.3-70b-versatile") == "groq"
        assert Settings.parse_provider_type("cerebras/llama3.1-8b") == "cerebras"

@@ -793,7 +796,8 @@ class TestPerModelMapping:
        assert Settings.parse_model_name("ollama/llama3.1") == "llama3.1"
        assert Settings.parse_model_name("wafer/DeepSeek-V4-Pro") == "DeepSeek-V4-Pro"
        assert (
-            Settings.parse_model_name("gemini/gemini-2.5-flash") == "gemini-2.5-flash"
+            Settings.parse_model_name("gemini/models/gemini-3.1-flash-lite")
+            == "models/gemini-3.1-flash-lite"
        )
        assert (
            Settings.parse_model_name("groq/llama-3.3-70b-versatile")
@@ -331,6 +331,36 @@ def test_cli_matrix_agent_prompt_text_without_tool_evidence_does_not_pass(
    assert outcome.token_evidence["agent_tool_count"] == 0


+def test_cli_matrix_structured_provider_error_is_upstream_unavailable(
+    tmp_path: Path,
+) -> None:
+    run = ClaudeCliRun(
+        command=("claude", "-p", "x"),
+        returncode=0,
+        stdout="Provider API request failed. (request_id=req_123)",
+        stderr="",
+        duration_s=0.1,
+    )
+    outcome = make_outcome(
+        model="minimax/minimax-m2.5:free",
+        full_model="open_router/minimax/minimax-m2.5:free",
+        source="openrouter_free_cli_default",
+        feature="tool_use_roundtrip",
+        marker="FCC_OPENROUTER_FREE_TOOL",
+        run=run,
+        log_delta=(
+            '{"event": "api.request.received", "http_method": "POST", '
+            '"http_path": "/v1/messages"}\n'
+            '{"event": "provider.response.error", "exc_type": "HTTPStatusError"}'
+        ),
+        log_path=tmp_path / "server.log",
+        requires_tool_result=True,
+    )
+
+    assert outcome.classification == "upstream_unavailable"
+    assert outcome.request_count == 1
+
+
 def test_nvidia_nim_cli_timeout_is_not_model_missing(
    tmp_path: Path,
 ) -> None:
@@ -147,7 +147,9 @@ def test_provider_smoke_model_override_accepts_model_name_without_prefix(
 def test_provider_smoke_model_override_accepts_owner_model_name(
    monkeypatch,
 ) -> None:
-    monkeypatch.setenv("FCC_SMOKE_MODEL_NVIDIA_NIM", "z-ai/glm4.7")
+    monkeypatch.setenv(
+        "FCC_SMOKE_MODEL_NVIDIA_NIM", "nvidia/nemotron-3-super-120b-a12b"
+    )
    config = _smoke_config(
        settings=_settings(
            model="deepseek/deepseek-chat",
@@ -160,7 +162,7 @@ def test_provider_smoke_model_override_accepts_owner_model_name(

    models = config.provider_smoke_models()

-    assert models[0].full_model == "nvidia_nim/z-ai/glm4.7"
+    assert models[0].full_model == "nvidia_nim/nvidia/nemotron-3-super-120b-a12b"
    assert models[0].source == "FCC_SMOKE_MODEL_NVIDIA_NIM"


@@ -154,6 +154,31 @@ def test_build_request_body_tool_choice_keeps_thinking(deepseek_provider):
    assert body["tool_choice"] == {"type": "auto"}


+def test_build_request_body_forced_tool_choice_downgrades_to_auto(
+    deepseek_provider,
+):
+    request = MessagesRequest.model_validate(
+        {
+            "model": "m",
+            "messages": [{"role": "user", "content": "x"}],
+            "tool_choice": {"type": "tool", "name": "Read"},
+            "tools": [
+                {
+                    "name": "Read",
+                    "description": "Read a file",
+                    "input_schema": {"type": "object", "properties": {}},
+                }
+            ],
+            "thinking": {"type": "enabled", "budget_tokens": 2000},
+        }
+    )
+
+    body = deepseek_provider._build_request_body(request)
+
+    assert body["thinking"] == {"type": "enabled", "budget_tokens": 2000}
+    assert body["tool_choice"] == {"type": "auto"}
+
+
 def test_build_request_body_respects_global_thinking_disable():
    provider = DeepSeekProvider(
        ProviderConfig(
@@ -17,7 +17,7 @@ class MockMessage:

 class MockRequest:
    def __init__(self, **kwargs):
-        self.model = "gemini-2.5-flash"
+        self.model = "models/gemini-3.1-flash-lite"
        self.messages = [MockMessage("user", "Hello")]
        self.max_tokens = 100
        self.temperature = 0.5
@@ -97,7 +97,7 @@ def test_build_request_body_basic(gemini_provider):
    req = MockRequest()
    body = gemini_provider._build_request_body(req)

-    assert body["model"] == "gemini-2.5-flash"
+    assert body["model"] == "models/gemini-3.1-flash-lite"
    assert body["messages"][0]["role"] == "system"
    assert "reasoning_effort" not in body
    eb = body.get("extra_body")
@@ -25,7 +25,7 @@ class MockMessage:

 class MockRequest:
    def __init__(self, **kwargs):
-        self.model = "stepfun/step-3.5-flash:free"
+        self.model = "moonshotai/kimi-k2.6:free"
        self.messages = [MockMessage("user", "Hello")]
        self.max_tokens = 100
        self.temperature = 0.5
@@ -132,7 +132,7 @@ def test_build_request_body_is_native_anthropic(open_router_provider):
    req = MockRequest()
    body = open_router_provider._build_request_body(req)

-    assert body["model"] == "stepfun/step-3.5-flash:free"
+    assert body["model"] == "moonshotai/kimi-k2.6:free"
    assert body["temperature"] == 0.5
    assert body["stream"] is True
    assert body["messages"] == [{"role": "user", "content": "Hello"}]
@@ -708,6 +708,23 @@ class TestProcessToolCall:
        # Should not crash, should still emit events
        assert len(events) > 0

+    def test_none_tool_index_defaults_to_zero(self):
+        """Gemini may stream tool_call deltas with a null index."""
+        provider = _make_provider()
+        from core.anthropic import SSEBuilder
+
+        sse = SSEBuilder("msg_test", "test-model")
+        tc = {
+            "index": None,
+            "id": "call_none",
+            "function": {"name": "test", "arguments": "{}"},
+        }
+        events = list(provider._process_tool_call(tc, sse))
+        event_text = "".join(events)
+
+        assert "tool_use" in event_text
+        assert "call_none" in event_text
+
    def test_tool_args_emitted_as_delta(self):
        """Arguments are emitted as input_json_delta events."""
        provider = _make_provider()