Reorder providers in README and other places

This commit is contained in:
Alishahryar1
2026-05-23 19:16:22 -07:00
parent a4d7d76040
commit 26c5b35698
5 changed files with 180 additions and 144 deletions
+1 -1
View File
@@ -64,7 +64,7 @@ OLLAMA_BASE_URL="http://localhost:11434"
# All Claude model requests are mapped to these models, plain model is fallback # All Claude model requests are mapped to these models, plain model is fallback
# Format: provider_type/model/name # Format: provider_type/model/name
# Valid providers: "nvidia_nim" | "open_router" | "mistral" | "mistral_codestral" | "deepseek" | "kimi" | "wafer" | "lmstudio" | "llamacpp" | "ollama" | "opencode" | "opencode_go" | "zai" | "fireworks" | "gemini" | "groq" | "cerebras" # Valid providers: "nvidia_nim" | "open_router" | "gemini" | "deepseek" | "mistral" | "mistral_codestral" | "opencode" | "opencode_go" | "wafer" | "kimi" | "cerebras" | "groq" | "fireworks" | "zai" | "lmstudio" | "llamacpp" | "ollama"
MODEL_OPUS= MODEL_OPUS=
MODEL_SONNET= MODEL_SONNET=
MODEL_HAIKU= MODEL_HAIKU=
+35 -35
View File
@@ -37,7 +37,7 @@ Free Claude Code routes Anthropic Messages API traffic from Claude Code to any p
## What You Get ## What You Get
- Drop-in proxy for Claude Code's Anthropic API calls. - Drop-in proxy for Claude Code's Anthropic API calls.
- Seventeen provider backends: NVIDIA NIM, OpenRouter, Mistral La Plateforme, Mistral Codestral, DeepSeek, Kimi, Wafer, LM Studio, llama.cpp, Ollama, OpenCode Zen, OpenCode Go, Z.ai, Fireworks AI, Google AI Studio (Gemini), Groq, and Cerebras Inference. - Seventeen provider backends: NVIDIA NIM, OpenRouter, Google AI Studio (Gemini), DeepSeek, Mistral La Plateforme, Mistral Codestral, OpenCode Zen, OpenCode Go, Wafer, Kimi, Cerebras Inference, Groq, Fireworks AI, Z.ai, LM Studio, llama.cpp, and Ollama.
- Per-model routing: send Opus, Sonnet, Haiku, and fallback traffic to different providers. - Per-model routing: send Opus, Sonnet, Haiku, and fallback traffic to different providers.
- Native Claude Code `/model` picker support through the proxy's `/v1/models` endpoint (Claude Code must opt in to Gateway model discovery; see [Model Picker](#model-picker)). - Native Claude Code `/model` picker support through the proxy's `/v1/models` endpoint (Claude Code must opt in to Gateway model discovery; see [Model Picker](#model-picker)).
- Streaming, tool use, reasoning/thinking block handling, and local request optimizations. - Streaming, tool use, reasoning/thinking block handling, and local request optimizations.
@@ -183,34 +183,7 @@ Popular examples:
This provider uses Wafer's Anthropic-compatible endpoint at `https://pass.wafer.ai/v1/messages`. This provider uses Wafer's Anthropic-compatible endpoint at `https://pass.wafer.ai/v1/messages`.
### 8. [LM Studio](https://lmstudio.ai/) ### 8. [OpenCode Zen](https://opencode.ai/)
Start LM Studio's local server and load a model. In the Admin UI, keep or update `LM_STUDIO_BASE_URL`, then set `MODEL` to the model identifier shown by LM Studio, prefixed with `lmstudio/`.
Prefer models with tool-use support for Claude Code workflows.
### 9. [llama.cpp](https://github.com/ggml-org/llama.cpp)
Start `llama-server` with an Anthropic-compatible `/v1/messages` endpoint and enough context for Claude Code requests.
In the Admin UI, keep or update `LLAMACPP_BASE_URL`, then set `MODEL` to the local model slug, prefixed with `llamacpp/`.
For local coding models, context size matters. If llama.cpp returns HTTP 400 for normal Claude Code requests, increase `--ctx-size` and verify the model/server build supports the requested features.
### 10. [Ollama](https://ollama.com/)
Run Ollama and pull a model:
```bash
ollama pull llama3.1
ollama serve
```
In the Admin UI, keep or update `OLLAMA_BASE_URL`, then set `MODEL` to the same tag shown by `ollama list`, prefixed with `ollama/`.
`OLLAMA_BASE_URL` is the Ollama server root; do not append `/v1`. Example model slugs include `ollama/llama3.1` and `ollama/llama3.1:8b`.
### 11. [OpenCode Zen](https://opencode.ai/)
Get an API key at [opencode.ai/auth](https://opencode.ai/auth). Get an API key at [opencode.ai/auth](https://opencode.ai/auth).
@@ -229,7 +202,7 @@ Popular examples:
Browse available models at [opencode.ai](https://opencode.ai). Browse available models at [opencode.ai](https://opencode.ai).
### 12. [OpenCode Go](https://opencode.ai/) ### 9. [OpenCode Go](https://opencode.ai/)
Get an API key at [opencode.ai/auth](https://opencode.ai/auth) (same as OpenCode Zen). Get an API key at [opencode.ai/auth](https://opencode.ai/auth) (same as OpenCode Zen).
@@ -243,7 +216,7 @@ Popular examples:
Browse available models at [opencode.ai](https://opencode.ai). Browse available models at [opencode.ai](https://opencode.ai).
### 13. [Z.ai](https://z.ai/) ### 10. [Z.ai](https://z.ai/)
Get an API key at [Z.ai/manage-apikey/apikey-list](https://z.ai/manage-apikey/apikey-list). Get an API key at [Z.ai/manage-apikey/apikey-list](https://z.ai/manage-apikey/apikey-list).
@@ -258,7 +231,7 @@ Popular examples:
Browse models at [Z.ai](https://z.ai). Browse models at [Z.ai](https://z.ai).
### 14. [Fireworks AI](https://fireworks.ai/) ### 11. [Fireworks AI](https://fireworks.ai/)
Get an API key at [fireworks.ai/account/api-keys](https://fireworks.ai/account/api-keys). Get an API key at [fireworks.ai/account/api-keys](https://fireworks.ai/account/api-keys).
@@ -268,7 +241,7 @@ Fireworks exposes an **Anthropic-compatible** Messages API at `https://api.firew
Browse models at [fireworks.ai/models](https://fireworks.ai/models). Browse models at [fireworks.ai/models](https://fireworks.ai/models).
### 15. [Google AI Studio (Gemini)](https://aistudio.google.com/) ### 12. [Google AI Studio (Gemini)](https://aistudio.google.com/)
Get a Gemini API key at [Google AI Studio](https://aistudio.google.com/apikey) (see Google's [Gemini OpenAI compatibility](https://ai.google.dev/gemini-api/docs/openai) docs). Get a Gemini API key at [Google AI Studio](https://aistudio.google.com/apikey) (see Google's [Gemini OpenAI compatibility](https://ai.google.dev/gemini-api/docs/openai) docs).
@@ -281,7 +254,7 @@ Popular examples:
- `gemini/gemini-2.5-flash` - `gemini/gemini-2.5-flash`
- `gemini/gemini-3.1-flash-lite` - `gemini/gemini-3.1-flash-lite`
### 16. [Groq](https://console.groq.com/) ### 13. [Groq](https://console.groq.com/)
Get an API key at [console.groq.com/keys](https://console.groq.com/keys). Get an API key at [console.groq.com/keys](https://console.groq.com/keys).
@@ -293,7 +266,7 @@ Reasoning-heavy models expose extra knobs documented under [Groq reasoning](http
Browse models at [console.groq.com/docs/models](https://console.groq.com/docs/models). Browse models at [console.groq.com/docs/models](https://console.groq.com/docs/models).
### 17. [Cerebras Inference](https://inference-docs.cerebras.ai/quickstart) ### 14. [Cerebras Inference](https://inference-docs.cerebras.ai/quickstart)
Sign up and create an API key in the [Cerebras Cloud Console](https://cloud.cerebras.ai) (see [Quickstart](https://inference-docs.cerebras.ai/quickstart)). Sign up and create an API key in the [Cerebras Cloud Console](https://cloud.cerebras.ai) (see [Quickstart](https://inference-docs.cerebras.ai/quickstart)).
@@ -301,6 +274,33 @@ In the Admin UI, set `CEREBRAS_API_KEY`, then route with `MODEL` such as `cerebr
Cerebras exposes an OpenAI-compatible API at `https://api.cerebras.ai/v1` ([OpenAI compatibility](https://inference-docs.cerebras.ai/resources/openai)). Non-standard request fields should go in `extra_body` when using the OpenAI client; see the same page. For reasoning models and parameters, see [Reasoning](https://inference-docs.cerebras.ai/capabilities/reasoning). This proxy follows other OpenAI-compat adapters for thinking via `reasoning_content` when Claude-style thinking is enabled. Cerebras exposes an OpenAI-compatible API at `https://api.cerebras.ai/v1` ([OpenAI compatibility](https://inference-docs.cerebras.ai/resources/openai)). Non-standard request fields should go in `extra_body` when using the OpenAI client; see the same page. For reasoning models and parameters, see [Reasoning](https://inference-docs.cerebras.ai/capabilities/reasoning). This proxy follows other OpenAI-compat adapters for thinking via `reasoning_content` when Claude-style thinking is enabled.
### 15. [LM Studio](https://lmstudio.ai/)
Start LM Studio's local server and load a model. In the Admin UI, keep or update `LM_STUDIO_BASE_URL`, then set `MODEL` to the model identifier shown by LM Studio, prefixed with `lmstudio/`.
Prefer models with tool-use support for Claude Code workflows.
### 16. [llama.cpp](https://github.com/ggml-org/llama.cpp)
Start `llama-server` with an Anthropic-compatible `/v1/messages` endpoint and enough context for Claude Code requests.
In the Admin UI, keep or update `LLAMACPP_BASE_URL`, then set `MODEL` to the local model slug, prefixed with `llamacpp/`.
For local coding models, context size matters. If llama.cpp returns HTTP 400 for normal Claude Code requests, increase `--ctx-size` and verify the model/server build supports the requested features.
### 17. [Ollama](https://ollama.com/)
Run Ollama and pull a model:
```bash
ollama pull llama3.1
ollama serve
```
In the Admin UI, keep or update `OLLAMA_BASE_URL`, then set `MODEL` to the same tag shown by `ollama list`, prefixed with `ollama/`.
`OLLAMA_BASE_URL` is the Ollama server root; do not append `/v1`. Example model slugs include `ollama/llama3.1` and `ollama/llama3.1:8b`.
### 18. Mix Providers By Model Tier ### 18. Mix Providers By Model Tier
Each model tier can use a different provider by setting `MODEL_OPUS`, `MODEL_SONNET`, and `MODEL_HAIKU` in the Admin UI. Leave a tier blank to inherit `MODEL`. Each model tier can use a different provider by setting `MODEL_OPUS`, `MODEL_SONNET`, and `MODEL_HAIKU` in the Admin UI. Leave a tier blank to inherit `MODEL`.
+102 -98
View File
@@ -75,6 +75,25 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
proxy_attr="open_router_proxy", proxy_attr="open_router_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"), capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"),
), ),
"gemini": ProviderDescriptor(
provider_id="gemini",
transport_type="openai_chat",
credential_env="GEMINI_API_KEY",
credential_url="https://aistudio.google.com/apikey",
credential_attr="gemini_api_key",
default_base_url=GEMINI_DEFAULT_BASE,
proxy_attr="gemini_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
"deepseek": ProviderDescriptor(
provider_id="deepseek",
transport_type="anthropic_messages",
credential_env="DEEPSEEK_API_KEY",
credential_url="https://platform.deepseek.com/api_keys",
credential_attr="deepseek_api_key",
default_base_url=DEEPSEEK_ANTHROPIC_DEFAULT_BASE,
capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"),
),
"mistral": ProviderDescriptor( "mistral": ProviderDescriptor(
provider_id="mistral", provider_id="mistral",
transport_type="openai_chat", transport_type="openai_chat",
@@ -95,13 +114,34 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
proxy_attr="codestral_proxy", proxy_attr="codestral_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"), capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
), ),
"deepseek": ProviderDescriptor( "opencode": ProviderDescriptor(
provider_id="deepseek", provider_id="opencode",
transport_type="openai_chat",
credential_env="OPENCODE_API_KEY",
credential_url="https://opencode.ai/auth",
credential_attr="opencode_api_key",
default_base_url=OPENCODE_DEFAULT_BASE,
proxy_attr="opencode_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
"opencode_go": ProviderDescriptor(
provider_id="opencode_go",
transport_type="openai_chat",
credential_env="OPENCODE_API_KEY",
credential_url="https://opencode.ai/auth",
credential_attr="opencode_api_key",
default_base_url=OPENCODE_GO_DEFAULT_BASE,
proxy_attr="opencode_go_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
"wafer": ProviderDescriptor(
provider_id="wafer",
transport_type="anthropic_messages", transport_type="anthropic_messages",
credential_env="DEEPSEEK_API_KEY", credential_env="WAFER_API_KEY",
credential_url="https://platform.deepseek.com/api_keys", credential_url="https://www.wafer.ai/pass",
credential_attr="deepseek_api_key", credential_attr="wafer_api_key",
default_base_url=DEEPSEEK_ANTHROPIC_DEFAULT_BASE, default_base_url=WAFER_DEFAULT_BASE,
proxy_attr="wafer_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"), capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"),
), ),
"kimi": ProviderDescriptor( "kimi": ProviderDescriptor(
@@ -120,15 +160,58 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
"native_anthropic", "native_anthropic",
), ),
), ),
"wafer": ProviderDescriptor( "cerebras": ProviderDescriptor(
provider_id="wafer", provider_id="cerebras",
transport_type="openai_chat",
credential_env="CEREBRAS_API_KEY",
credential_url="https://cloud.cerebras.ai",
credential_attr="cerebras_api_key",
default_base_url=CEREBRAS_DEFAULT_BASE,
proxy_attr="cerebras_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
"groq": ProviderDescriptor(
provider_id="groq",
transport_type="openai_chat",
credential_env="GROQ_API_KEY",
credential_url="https://console.groq.com/keys",
credential_attr="groq_api_key",
default_base_url=GROQ_DEFAULT_BASE,
proxy_attr="groq_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
"fireworks": ProviderDescriptor(
provider_id="fireworks",
transport_type="anthropic_messages", transport_type="anthropic_messages",
credential_env="WAFER_API_KEY", credential_env="FIREWORKS_API_KEY",
credential_url="https://www.wafer.ai/pass", credential_url="https://fireworks.ai/account/api-keys",
credential_attr="wafer_api_key", credential_attr="fireworks_api_key",
default_base_url=WAFER_DEFAULT_BASE, default_base_url=FIREWORKS_DEFAULT_BASE,
proxy_attr="wafer_proxy", proxy_attr="fireworks_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"), capabilities=(
"chat",
"streaming",
"tools",
"thinking",
"native_anthropic",
"rate_limit",
),
),
"zai": ProviderDescriptor(
provider_id="zai",
transport_type="anthropic_messages",
credential_env="ZAI_API_KEY",
credential_attr="zai_api_key",
default_base_url=ZAI_DEFAULT_BASE,
proxy_attr="zai_proxy",
capabilities=(
"chat",
"streaming",
"tools",
"thinking",
"native_anthropic",
"rate_limit",
),
), ),
"lmstudio": ProviderDescriptor( "lmstudio": ProviderDescriptor(
provider_id="lmstudio", provider_id="lmstudio",
@@ -163,92 +246,13 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
"local", "local",
), ),
), ),
"opencode": ProviderDescriptor(
provider_id="opencode",
transport_type="openai_chat",
credential_env="OPENCODE_API_KEY",
credential_url="https://opencode.ai/auth",
credential_attr="opencode_api_key",
default_base_url=OPENCODE_DEFAULT_BASE,
proxy_attr="opencode_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
"opencode_go": ProviderDescriptor(
provider_id="opencode_go",
transport_type="openai_chat",
credential_env="OPENCODE_API_KEY",
credential_url="https://opencode.ai/auth",
credential_attr="opencode_api_key",
default_base_url=OPENCODE_GO_DEFAULT_BASE,
proxy_attr="opencode_go_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
"zai": ProviderDescriptor(
provider_id="zai",
transport_type="anthropic_messages",
credential_env="ZAI_API_KEY",
credential_attr="zai_api_key",
default_base_url=ZAI_DEFAULT_BASE,
proxy_attr="zai_proxy",
capabilities=(
"chat",
"streaming",
"tools",
"thinking",
"native_anthropic",
"rate_limit",
),
),
"fireworks": ProviderDescriptor(
provider_id="fireworks",
transport_type="anthropic_messages",
credential_env="FIREWORKS_API_KEY",
credential_url="https://fireworks.ai/account/api-keys",
credential_attr="fireworks_api_key",
default_base_url=FIREWORKS_DEFAULT_BASE,
proxy_attr="fireworks_proxy",
capabilities=(
"chat",
"streaming",
"tools",
"thinking",
"native_anthropic",
"rate_limit",
),
),
"gemini": ProviderDescriptor(
provider_id="gemini",
transport_type="openai_chat",
credential_env="GEMINI_API_KEY",
credential_url="https://aistudio.google.com/apikey",
credential_attr="gemini_api_key",
default_base_url=GEMINI_DEFAULT_BASE,
proxy_attr="gemini_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
"groq": ProviderDescriptor(
provider_id="groq",
transport_type="openai_chat",
credential_env="GROQ_API_KEY",
credential_url="https://console.groq.com/keys",
credential_attr="groq_api_key",
default_base_url=GROQ_DEFAULT_BASE,
proxy_attr="groq_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
"cerebras": ProviderDescriptor(
provider_id="cerebras",
transport_type="openai_chat",
credential_env="CEREBRAS_API_KEY",
credential_url="https://cloud.cerebras.ai",
credential_attr="cerebras_api_key",
default_base_url=CEREBRAS_DEFAULT_BASE,
proxy_attr="cerebras_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
} }
# Order matches docs / historical error text; must match PROVIDER_CATALOG keys. # Key order:
# NVIDIA NIM first (README default), DeepSeek fourth, Wafer ninth / Kimi tenth; then cerebras /
# groq / fireworks overlap; remainder and locals last per project plan (
# github.com/cheahjs/free-llm-api-resources Free Providers TOC as rough guide beyond fixed slots).
# ``SUPPORTED_PROVIDER_IDS`` inherits this insertion order for UI and error-message listing.
SUPPORTED_PROVIDER_IDS: tuple[str, ...] = tuple(PROVIDER_CATALOG.keys()) SUPPORTED_PROVIDER_IDS: tuple[str, ...] = tuple(PROVIDER_CATALOG.keys())
if len(set(SUPPORTED_PROVIDER_IDS)) != len(SUPPORTED_PROVIDER_IDS): if len(set(SUPPORTED_PROVIDER_IDS)) != len(SUPPORTED_PROVIDER_IDS):
+10 -10
View File
@@ -139,21 +139,21 @@ def _create_cerebras(config: ProviderConfig, _settings: Settings) -> BaseProvide
PROVIDER_FACTORIES: dict[str, ProviderFactory] = { PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
"nvidia_nim": _create_nvidia_nim, "nvidia_nim": _create_nvidia_nim,
"open_router": _create_open_router, "open_router": _create_open_router,
"gemini": _create_gemini,
"deepseek": _create_deepseek,
"mistral": _create_mistral, "mistral": _create_mistral,
"mistral_codestral": _create_mistral_codestral, "mistral_codestral": _create_mistral_codestral,
"deepseek": _create_deepseek, "opencode": _create_opencode,
"opencode_go": _create_opencode_go,
"wafer": _create_wafer,
"kimi": _create_kimi,
"cerebras": _create_cerebras,
"groq": _create_groq,
"fireworks": _create_fireworks,
"zai": _create_zai,
"lmstudio": _create_lmstudio, "lmstudio": _create_lmstudio,
"llamacpp": _create_llamacpp, "llamacpp": _create_llamacpp,
"ollama": _create_ollama, "ollama": _create_ollama,
"kimi": _create_kimi,
"wafer": _create_wafer,
"opencode": _create_opencode,
"opencode_go": _create_opencode_go,
"zai": _create_zai,
"fireworks": _create_fireworks,
"gemini": _create_gemini,
"groq": _create_groq,
"cerebras": _create_cerebras,
} }
if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set( if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
@@ -0,0 +1,32 @@
"""Freeze ``PROVIDER_CATALOG`` insertion order used as canonical provider ranking."""
from __future__ import annotations
from config.provider_catalog import PROVIDER_CATALOG, SUPPORTED_PROVIDER_IDS
_EXPECTED_PROVIDER_ORDER: tuple[str, ...] = (
"nvidia_nim",
"open_router",
"gemini",
"deepseek",
"mistral",
"mistral_codestral",
"opencode",
"opencode_go",
"wafer",
"kimi",
"cerebras",
"groq",
"fireworks",
"zai",
"lmstudio",
"llamacpp",
"ollama",
)
def test_provider_catalog_key_order_matches_canonical_plan() -> None:
"""NIM first; DeepSeek fourth; Wafer ninth / Kimi tenth (see contributor plan)."""
assert tuple(PROVIDER_CATALOG.keys()) == _EXPECTED_PROVIDER_ORDER
assert SUPPORTED_PROVIDER_IDS == _EXPECTED_PROVIDER_ORDER