Reorder providers in README and other places

2026-06-02 06:13:46 +02:00 · 2026-05-23 19:16:22 -07:00
parent a4d7d76040
commit 26c5b35698
5 changed files with 180 additions and 144 deletions
@@ -64,7 +64,7 @@ OLLAMA_BASE_URL="http://localhost:11434"
 # All Claude model requests are mapped to these models, plain model is fallback
 # Format: provider_type/model/name
-# Valid providers: "nvidia_nim" | "open_router" | "mistral" | "mistral_codestral" | "deepseek" | "kimi" | "wafer" | "lmstudio" | "llamacpp" | "ollama" | "opencode" | "opencode_go" | "zai" | "fireworks" | "gemini" | "groq" | "cerebras"
+# Valid providers: "nvidia_nim" | "open_router" | "gemini" | "deepseek" | "mistral" | "mistral_codestral" | "opencode" | "opencode_go" | "wafer" | "kimi" | "cerebras" | "groq" | "fireworks" | "zai" | "lmstudio" | "llamacpp" | "ollama"
 MODEL_OPUS=
 MODEL_SONNET=
 MODEL_HAIKU=
@@ -37,7 +37,7 @@ Free Claude Code routes Anthropic Messages API traffic from Claude Code to any p
 ## What You Get
 - Drop-in proxy for Claude Code's Anthropic API calls.
- Seventeen provider backends: NVIDIA NIM, OpenRouter, Mistral La Plateforme, Mistral Codestral, DeepSeek, Kimi, Wafer, LM Studio, llama.cpp, Ollama, OpenCode Zen, OpenCode Go, Z.ai, Fireworks AI, Google AI Studio (Gemini), Groq, and Cerebras Inference.
+- Seventeen provider backends: NVIDIA NIM, OpenRouter, Google AI Studio (Gemini), DeepSeek, Mistral La Plateforme, Mistral Codestral, OpenCode Zen, OpenCode Go, Wafer, Kimi, Cerebras Inference, Groq, Fireworks AI, Z.ai, LM Studio, llama.cpp, and Ollama.
 - Per-model routing: send Opus, Sonnet, Haiku, and fallback traffic to different providers.
 - Native Claude Code `/model` picker support through the proxy's `/v1/models` endpoint (Claude Code must opt in to Gateway model discovery; see [Model Picker](#model-picker)).
 - Streaming, tool use, reasoning/thinking block handling, and local request optimizations.
@@ -183,34 +183,7 @@ Popular examples:
 This provider uses Wafer's Anthropic-compatible endpoint at `https://pass.wafer.ai/v1/messages`.
-### 8. [LM Studio](https://lmstudio.ai/)
+### 8. [OpenCode Zen](https://opencode.ai/)
 Start LM Studio's local server and load a model. In the Admin UI, keep or update `LM_STUDIO_BASE_URL`, then set `MODEL` to the model identifier shown by LM Studio, prefixed with `lmstudio/`.
 Prefer models with tool-use support for Claude Code workflows.
 ### 9. [llama.cpp](https://github.com/ggml-org/llama.cpp)
 Start `llama-server` with an Anthropic-compatible `/v1/messages` endpoint and enough context for Claude Code requests.
 In the Admin UI, keep or update `LLAMACPP_BASE_URL`, then set `MODEL` to the local model slug, prefixed with `llamacpp/`.
 For local coding models, context size matters. If llama.cpp returns HTTP 400 for normal Claude Code requests, increase `--ctx-size` and verify the model/server build supports the requested features.
 ### 10. [Ollama](https://ollama.com/)
 Run Ollama and pull a model:
 ```bash
 ollama pull llama3.1
 ollama serve
 ```
 In the Admin UI, keep or update `OLLAMA_BASE_URL`, then set `MODEL` to the same tag shown by `ollama list`, prefixed with `ollama/`.
 `OLLAMA_BASE_URL` is the Ollama server root; do not append `/v1`. Example model slugs include `ollama/llama3.1` and `ollama/llama3.1:8b`.
 ### 11. [OpenCode Zen](https://opencode.ai/)
 Get an API key at [opencode.ai/auth](https://opencode.ai/auth).
@@ -229,7 +202,7 @@ Popular examples:
 Browse available models at [opencode.ai](https://opencode.ai).
-### 12. [OpenCode Go](https://opencode.ai/)
+### 9. [OpenCode Go](https://opencode.ai/)
 Get an API key at [opencode.ai/auth](https://opencode.ai/auth) (same as OpenCode Zen).
@@ -243,7 +216,7 @@ Popular examples:
 Browse available models at [opencode.ai](https://opencode.ai).
-### 13. [Z.ai](https://z.ai/)
+### 10. [Z.ai](https://z.ai/)
 Get an API key at [Z.ai/manage-apikey/apikey-list](https://z.ai/manage-apikey/apikey-list).
@@ -258,7 +231,7 @@ Popular examples:
 Browse models at [Z.ai](https://z.ai).
-### 14. [Fireworks AI](https://fireworks.ai/)
+### 11. [Fireworks AI](https://fireworks.ai/)
 Get an API key at [fireworks.ai/account/api-keys](https://fireworks.ai/account/api-keys).
@@ -268,7 +241,7 @@ Fireworks exposes an **Anthropic-compatible** Messages API at `https://api.firew
 Browse models at [fireworks.ai/models](https://fireworks.ai/models).
-### 15. [Google AI Studio (Gemini)](https://aistudio.google.com/)
+### 12. [Google AI Studio (Gemini)](https://aistudio.google.com/)
 Get a Gemini API key at [Google AI Studio](https://aistudio.google.com/apikey) (see Google's [Gemini OpenAI compatibility](https://ai.google.dev/gemini-api/docs/openai) docs).
@@ -281,7 +254,7 @@ Popular examples:
 - `gemini/gemini-2.5-flash`
 - `gemini/gemini-3.1-flash-lite`
-### 16. [Groq](https://console.groq.com/)
+### 13. [Groq](https://console.groq.com/)
 Get an API key at [console.groq.com/keys](https://console.groq.com/keys).
@@ -293,7 +266,7 @@ Reasoning-heavy models expose extra knobs documented under [Groq reasoning](http
 Browse models at [console.groq.com/docs/models](https://console.groq.com/docs/models).
-### 17. [Cerebras Inference](https://inference-docs.cerebras.ai/quickstart)
+### 14. [Cerebras Inference](https://inference-docs.cerebras.ai/quickstart)
 Sign up and create an API key in the [Cerebras Cloud Console](https://cloud.cerebras.ai) (see [Quickstart](https://inference-docs.cerebras.ai/quickstart)).
@@ -301,6 +274,33 @@ In the Admin UI, set `CEREBRAS_API_KEY`, then route with `MODEL` such as `cerebr
 Cerebras exposes an OpenAI-compatible API at `https://api.cerebras.ai/v1` ([OpenAI compatibility](https://inference-docs.cerebras.ai/resources/openai)). Non-standard request fields should go in `extra_body` when using the OpenAI client; see the same page. For reasoning models and parameters, see [Reasoning](https://inference-docs.cerebras.ai/capabilities/reasoning). This proxy follows other OpenAI-compat adapters for thinking via `reasoning_content` when Claude-style thinking is enabled.
 ### 15. [LM Studio](https://lmstudio.ai/)
 Start LM Studio's local server and load a model. In the Admin UI, keep or update `LM_STUDIO_BASE_URL`, then set `MODEL` to the model identifier shown by LM Studio, prefixed with `lmstudio/`.
 Prefer models with tool-use support for Claude Code workflows.
 ### 16. [llama.cpp](https://github.com/ggml-org/llama.cpp)
 Start `llama-server` with an Anthropic-compatible `/v1/messages` endpoint and enough context for Claude Code requests.
 In the Admin UI, keep or update `LLAMACPP_BASE_URL`, then set `MODEL` to the local model slug, prefixed with `llamacpp/`.
 For local coding models, context size matters. If llama.cpp returns HTTP 400 for normal Claude Code requests, increase `--ctx-size` and verify the model/server build supports the requested features.
 ### 17. [Ollama](https://ollama.com/)
 Run Ollama and pull a model:
 ```bash
 ollama pull llama3.1
 ollama serve
 ```
 In the Admin UI, keep or update `OLLAMA_BASE_URL`, then set `MODEL` to the same tag shown by `ollama list`, prefixed with `ollama/`.
 `OLLAMA_BASE_URL` is the Ollama server root; do not append `/v1`. Example model slugs include `ollama/llama3.1` and `ollama/llama3.1:8b`.
 ### 18. Mix Providers By Model Tier
 Each model tier can use a different provider by setting `MODEL_OPUS`, `MODEL_SONNET`, and `MODEL_HAIKU` in the Admin UI. Leave a tier blank to inherit `MODEL`.
@@ -75,6 +75,25 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
        proxy_attr="open_router_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"),
    ),
    "gemini": ProviderDescriptor(
        provider_id="gemini",
        transport_type="openai_chat",
        credential_env="GEMINI_API_KEY",
        credential_url="https://aistudio.google.com/apikey",
        credential_attr="gemini_api_key",
        default_base_url=GEMINI_DEFAULT_BASE,
        proxy_attr="gemini_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
    "deepseek": ProviderDescriptor(
        provider_id="deepseek",
        transport_type="anthropic_messages",
        credential_env="DEEPSEEK_API_KEY",
        credential_url="https://platform.deepseek.com/api_keys",
        credential_attr="deepseek_api_key",
        default_base_url=DEEPSEEK_ANTHROPIC_DEFAULT_BASE,
        capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"),
    ),
    "mistral": ProviderDescriptor(
        provider_id="mistral",
        transport_type="openai_chat",
@@ -95,13 +114,34 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
        proxy_attr="codestral_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
-    "deepseek": ProviderDescriptor(
+    "opencode": ProviderDescriptor(
-        provider_id="deepseek",
+        provider_id="opencode",
        transport_type="openai_chat",
        credential_env="OPENCODE_API_KEY",
        credential_url="https://opencode.ai/auth",
        credential_attr="opencode_api_key",
        default_base_url=OPENCODE_DEFAULT_BASE,
        proxy_attr="opencode_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
    "opencode_go": ProviderDescriptor(
        provider_id="opencode_go",
        transport_type="openai_chat",
        credential_env="OPENCODE_API_KEY",
        credential_url="https://opencode.ai/auth",
        credential_attr="opencode_api_key",
        default_base_url=OPENCODE_GO_DEFAULT_BASE,
        proxy_attr="opencode_go_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
    "wafer": ProviderDescriptor(
        provider_id="wafer",
        transport_type="anthropic_messages",
-        credential_env="DEEPSEEK_API_KEY",
+        credential_env="WAFER_API_KEY",
-        credential_url="https://platform.deepseek.com/api_keys",
+        credential_url="https://www.wafer.ai/pass",
-        credential_attr="deepseek_api_key",
+        credential_attr="wafer_api_key",
-        default_base_url=DEEPSEEK_ANTHROPIC_DEFAULT_BASE,
+        default_base_url=WAFER_DEFAULT_BASE,
        proxy_attr="wafer_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"),
    ),
    "kimi": ProviderDescriptor(
@@ -120,15 +160,58 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
            "native_anthropic",
        ),
    ),
-    "wafer": ProviderDescriptor(
+    "cerebras": ProviderDescriptor(
-        provider_id="wafer",
+        provider_id="cerebras",
        transport_type="openai_chat",
        credential_env="CEREBRAS_API_KEY",
        credential_url="https://cloud.cerebras.ai",
        credential_attr="cerebras_api_key",
        default_base_url=CEREBRAS_DEFAULT_BASE,
        proxy_attr="cerebras_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
    "groq": ProviderDescriptor(
        provider_id="groq",
        transport_type="openai_chat",
        credential_env="GROQ_API_KEY",
        credential_url="https://console.groq.com/keys",
        credential_attr="groq_api_key",
        default_base_url=GROQ_DEFAULT_BASE,
        proxy_attr="groq_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
    "fireworks": ProviderDescriptor(
        provider_id="fireworks",
        transport_type="anthropic_messages",
-        credential_env="WAFER_API_KEY",
+        credential_env="FIREWORKS_API_KEY",
-        credential_url="https://www.wafer.ai/pass",
+        credential_url="https://fireworks.ai/account/api-keys",
-        credential_attr="wafer_api_key",
+        credential_attr="fireworks_api_key",
-        default_base_url=WAFER_DEFAULT_BASE,
+        default_base_url=FIREWORKS_DEFAULT_BASE,
-        proxy_attr="wafer_proxy",
+        proxy_attr="fireworks_proxy",
-        capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"),
+        capabilities=(
            "chat",
            "streaming",
            "tools",
            "thinking",
            "native_anthropic",
            "rate_limit",
        ),
    ),
    "zai": ProviderDescriptor(
        provider_id="zai",
        transport_type="anthropic_messages",
        credential_env="ZAI_API_KEY",
        credential_attr="zai_api_key",
        default_base_url=ZAI_DEFAULT_BASE,
        proxy_attr="zai_proxy",
        capabilities=(
            "chat",
            "streaming",
            "tools",
            "thinking",
            "native_anthropic",
            "rate_limit",
        ),
    ),
    "lmstudio": ProviderDescriptor(
        provider_id="lmstudio",
@@ -163,92 +246,13 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
            "local",
        ),
    ),
    "opencode": ProviderDescriptor(
        provider_id="opencode",
        transport_type="openai_chat",
        credential_env="OPENCODE_API_KEY",
        credential_url="https://opencode.ai/auth",
        credential_attr="opencode_api_key",
        default_base_url=OPENCODE_DEFAULT_BASE,
        proxy_attr="opencode_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
    "opencode_go": ProviderDescriptor(
        provider_id="opencode_go",
        transport_type="openai_chat",
        credential_env="OPENCODE_API_KEY",
        credential_url="https://opencode.ai/auth",
        credential_attr="opencode_api_key",
        default_base_url=OPENCODE_GO_DEFAULT_BASE,
        proxy_attr="opencode_go_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
    "zai": ProviderDescriptor(
        provider_id="zai",
        transport_type="anthropic_messages",
        credential_env="ZAI_API_KEY",
        credential_attr="zai_api_key",
        default_base_url=ZAI_DEFAULT_BASE,
        proxy_attr="zai_proxy",
        capabilities=(
            "chat",
            "streaming",
            "tools",
            "thinking",
            "native_anthropic",
            "rate_limit",
        ),
    ),
    "fireworks": ProviderDescriptor(
        provider_id="fireworks",
        transport_type="anthropic_messages",
        credential_env="FIREWORKS_API_KEY",
        credential_url="https://fireworks.ai/account/api-keys",
        credential_attr="fireworks_api_key",
        default_base_url=FIREWORKS_DEFAULT_BASE,
        proxy_attr="fireworks_proxy",
        capabilities=(
            "chat",
            "streaming",
            "tools",
            "thinking",
            "native_anthropic",
            "rate_limit",
        ),
    ),
    "gemini": ProviderDescriptor(
        provider_id="gemini",
        transport_type="openai_chat",
        credential_env="GEMINI_API_KEY",
        credential_url="https://aistudio.google.com/apikey",
        credential_attr="gemini_api_key",
        default_base_url=GEMINI_DEFAULT_BASE,
        proxy_attr="gemini_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
    "groq": ProviderDescriptor(
        provider_id="groq",
        transport_type="openai_chat",
        credential_env="GROQ_API_KEY",
        credential_url="https://console.groq.com/keys",
        credential_attr="groq_api_key",
        default_base_url=GROQ_DEFAULT_BASE,
        proxy_attr="groq_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
    "cerebras": ProviderDescriptor(
        provider_id="cerebras",
        transport_type="openai_chat",
        credential_env="CEREBRAS_API_KEY",
        credential_url="https://cloud.cerebras.ai",
        credential_attr="cerebras_api_key",
        default_base_url=CEREBRAS_DEFAULT_BASE,
        proxy_attr="cerebras_proxy",
        capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
    ),
 }
-# Order matches docs / historical error text; must match PROVIDER_CATALOG keys.
+# Key order:
 # NVIDIA NIM first (README default), DeepSeek fourth, Wafer ninth / Kimi tenth; then cerebras /
 # groq / fireworks overlap; remainder and locals last per project plan (
 # github.com/cheahjs/free-llm-api-resources Free Providers TOC as rough guide beyond fixed slots).
 # ``SUPPORTED_PROVIDER_IDS`` inherits this insertion order for UI and error-message listing.
 SUPPORTED_PROVIDER_IDS: tuple[str, ...] = tuple(PROVIDER_CATALOG.keys())
 if len(set(SUPPORTED_PROVIDER_IDS)) != len(SUPPORTED_PROVIDER_IDS):
@@ -139,21 +139,21 @@ def _create_cerebras(config: ProviderConfig, _settings: Settings) -> BaseProvide
 PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
    "nvidia_nim": _create_nvidia_nim,
    "open_router": _create_open_router,
    "gemini": _create_gemini,
    "deepseek": _create_deepseek,
    "mistral": _create_mistral,
    "mistral_codestral": _create_mistral_codestral,
-    "deepseek": _create_deepseek,
+    "opencode": _create_opencode,
    "opencode_go": _create_opencode_go,
    "wafer": _create_wafer,
    "kimi": _create_kimi,
    "cerebras": _create_cerebras,
    "groq": _create_groq,
    "fireworks": _create_fireworks,
    "zai": _create_zai,
    "lmstudio": _create_lmstudio,
    "llamacpp": _create_llamacpp,
    "ollama": _create_ollama,
    "kimi": _create_kimi,
    "wafer": _create_wafer,
    "opencode": _create_opencode,
    "opencode_go": _create_opencode_go,
    "zai": _create_zai,
    "fireworks": _create_fireworks,
    "gemini": _create_gemini,
    "groq": _create_groq,
    "cerebras": _create_cerebras,
 }
 if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
@@ -0,0 +1,32 @@
 """Freeze ``PROVIDER_CATALOG`` insertion order used as canonical provider ranking."""
 from __future__ import annotations
 from config.provider_catalog import PROVIDER_CATALOG, SUPPORTED_PROVIDER_IDS
 _EXPECTED_PROVIDER_ORDER: tuple[str, ...] = (
    "nvidia_nim",
    "open_router",
    "gemini",
    "deepseek",
    "mistral",
    "mistral_codestral",
    "opencode",
    "opencode_go",
    "wafer",
    "kimi",
    "cerebras",
    "groq",
    "fireworks",
    "zai",
    "lmstudio",
    "llamacpp",
    "ollama",
 )
 def test_provider_catalog_key_order_matches_canonical_plan() -> None:
    """NIM first; DeepSeek fourth; Wafer ninth / Kimi tenth (see contributor plan)."""
    assert tuple(PROVIDER_CATALOG.keys()) == _EXPECTED_PROVIDER_ORDER
    assert SUPPORTED_PROVIDER_IDS == _EXPECTED_PROVIDER_ORDER