mirror of
https://github.com/Alishahryar1/free-claude-code.git
synced 2026-06-02 06:13:46 +02:00
feat: add Wafer provider
This commit is contained in:
+7
-1
@@ -14,6 +14,10 @@ DEEPSEEK_API_KEY=""
|
||||
KIMI_API_KEY=""
|
||||
|
||||
|
||||
# Wafer Config (uses native Anthropic Messages at pass.wafer.ai/v1/messages)
|
||||
WAFER_API_KEY=""
|
||||
|
||||
|
||||
# LM Studio Config (local provider, no API key required)
|
||||
LM_STUDIO_BASE_URL="http://localhost:1234/v1"
|
||||
|
||||
@@ -28,7 +32,7 @@ OLLAMA_BASE_URL="http://localhost:11434"
|
||||
|
||||
# All Claude model requests are mapped to these models, plain model is fallback
|
||||
# Format: provider_type/model/name
|
||||
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" | "ollama" | "kimi"
|
||||
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" | "ollama" | "kimi" | "wafer"
|
||||
MODEL_OPUS=
|
||||
MODEL_SONNET=
|
||||
MODEL_HAIKU=
|
||||
@@ -44,6 +48,7 @@ FCC_SMOKE_MODEL_LMSTUDIO=
|
||||
FCC_SMOKE_MODEL_LLAMACPP=
|
||||
FCC_SMOKE_MODEL_OLLAMA=
|
||||
FCC_SMOKE_MODEL_KIMI=
|
||||
FCC_SMOKE_MODEL_WAFER=
|
||||
|
||||
|
||||
# Thinking output
|
||||
@@ -62,6 +67,7 @@ OPENROUTER_PROXY=""
|
||||
LMSTUDIO_PROXY=""
|
||||
LLAMACPP_PROXY=""
|
||||
KIMI_PROXY=""
|
||||
WAFER_PROXY=""
|
||||
|
||||
PROVIDER_RATE_LIMIT=1
|
||||
PROVIDER_RATE_WINDOW=3
|
||||
|
||||
@@ -12,7 +12,7 @@ Use Claude Code CLI, VS Code, JetBrains ACP, or chat bots through your own Anthr
|
||||
[](https://github.com/astral-sh/ruff)
|
||||
[](https://github.com/Delgan/loguru)
|
||||
|
||||
Free Claude Code routes Anthropic Messages API traffic from Claude Code to NVIDIA NIM, OpenRouter, DeepSeek, LM Studio, llama.cpp, or Ollama. It keeps Claude Code's client-side protocol stable while letting you choose free, paid, or local models.
|
||||
Free Claude Code routes Anthropic Messages API traffic from Claude Code to NVIDIA NIM, Kimi, Wafer, OpenRouter, DeepSeek, LM Studio, llama.cpp, or Ollama. It keeps Claude Code's client-side protocol stable while letting you choose free, paid, or local models.
|
||||
|
||||
[Quick Start](#quick-start) · [Providers](#choose-a-provider) · [Clients](#connect-claude-code) · [Troubleshooting](#troubleshooting) · [Development](#development)
|
||||
|
||||
@@ -37,7 +37,7 @@ Free Claude Code routes Anthropic Messages API traffic from Claude Code to NVIDI
|
||||
## What You Get
|
||||
|
||||
- Drop-in proxy for Claude Code's Anthropic API calls.
|
||||
- Six provider backends: NVIDIA NIM, OpenRouter, DeepSeek, LM Studio, llama.cpp, and Ollama.
|
||||
- Eight provider backends: NVIDIA NIM, Kimi, Wafer, OpenRouter, DeepSeek, LM Studio, llama.cpp, and Ollama.
|
||||
- Per-model routing: send Opus, Sonnet, Haiku, and fallback traffic to different providers.
|
||||
- Native Claude Code `/model` picker support through the proxy's `/v1/models` endpoint (Claude Code must opt in to Gateway model discovery; see [Model Picker](#model-picker)).
|
||||
- Streaming, tool use, reasoning/thinking block handling, and local request optimizations.
|
||||
@@ -136,6 +136,7 @@ provider_id/model/name
|
||||
| --- | --- | --- | --- | --- |
|
||||
| <img src="https://cdn.simpleicons.org/nvidia/76B900" alt="" width="18" height="18"> NVIDIA NIM | `nvidia_nim/...` | OpenAI chat translation | `NVIDIA_NIM_API_KEY` | `https://integrate.api.nvidia.com/v1` |
|
||||
| <img src="https://raw.githubusercontent.com/lobehub/lobe-icons/refs/heads/master/packages/static-avatar/avatars/kimi.webp" alt="" width="18" height="18"> Kimi | `kimi/...` | OpenAI chat translation | `KIMI_API_KEY` | `https://api.moonshot.ai/v1` |
|
||||
| Wafer | `wafer/...` | Anthropic Messages | `WAFER_API_KEY` | `https://pass.wafer.ai/v1` |
|
||||
| <img src="https://cdn.simpleicons.org/openrouter/6C47FF" alt="" width="18" height="18"> OpenRouter | `open_router/...` | Anthropic Messages | `OPENROUTER_API_KEY` | `https://openrouter.ai/api/v1` |
|
||||
| <img src="https://cdn.simpleicons.org/deepseek/4D6BFF" alt="" width="18" height="18"> DeepSeek | `deepseek/...` | Anthropic Messages | `DEEPSEEK_API_KEY` | `https://api.deepseek.com/anthropic` |
|
||||
| <img src="https://github.com/lmstudio-ai.png?size=64" alt="" width="18" height="18"> LM Studio | `lmstudio/...` | Anthropic Messages | none | `http://localhost:1234/v1` |
|
||||
@@ -178,6 +179,27 @@ Browse [all models](https://openrouter.ai/models) or [free models](https://openr
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Wafer</b></summary>
|
||||
|
||||
Add your Wafer key and choose a model returned by Wafer Pass:
|
||||
|
||||
```dotenv
|
||||
WAFER_API_KEY="your-wafer-key"
|
||||
MODEL="wafer/DeepSeek-V4-Pro"
|
||||
```
|
||||
|
||||
Popular examples:
|
||||
|
||||
- `wafer/DeepSeek-V4-Pro`
|
||||
- `wafer/MiniMax-M2.7`
|
||||
- `wafer/Qwen3.5-397B-A17B`
|
||||
- `wafer/GLM-5.1`
|
||||
|
||||
This provider uses Wafer's Anthropic-compatible endpoint at `https://pass.wafer.ai/v1/messages`.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><img src="https://cdn.simpleicons.org/deepseek/4D6BFF" alt="" width="18" height="18"> <b>DeepSeek</b></summary>
|
||||
|
||||
@@ -249,11 +271,12 @@ Each tier can use a different provider:
|
||||
```dotenv
|
||||
NVIDIA_NIM_API_KEY="nvapi-your-key"
|
||||
OPENROUTER_API_KEY="sk-or-your-key"
|
||||
WAFER_API_KEY="your-wafer-key"
|
||||
|
||||
MODEL_OPUS="nvidia_nim/moonshotai/kimi-k2.5"
|
||||
MODEL_SONNET="open_router/deepseek/deepseek-r1-0528:free"
|
||||
MODEL_HAIKU="lmstudio/unsloth/GLM-4.7-Flash-GGUF"
|
||||
MODEL="nvidia_nim/z-ai/glm4.7"
|
||||
MODEL="wafer/DeepSeek-V4-Pro"
|
||||
```
|
||||
|
||||
</details>
|
||||
@@ -393,6 +416,7 @@ Blank per-tier values inherit the fallback. Blank thinking overrides inherit `EN
|
||||
NVIDIA_NIM_API_KEY=""
|
||||
OPENROUTER_API_KEY=""
|
||||
DEEPSEEK_API_KEY=""
|
||||
WAFER_API_KEY=""
|
||||
LM_STUDIO_BASE_URL="http://localhost:1234/v1"
|
||||
LLAMACPP_BASE_URL="http://localhost:8080/v1"
|
||||
OLLAMA_BASE_URL="http://localhost:11434"
|
||||
@@ -405,6 +429,7 @@ NVIDIA_NIM_PROXY=""
|
||||
OPENROUTER_PROXY=""
|
||||
LMSTUDIO_PROXY=""
|
||||
LLAMACPP_PROXY=""
|
||||
WAFER_PROXY=""
|
||||
```
|
||||
|
||||
### Rate Limits And Timeouts
|
||||
@@ -488,7 +513,7 @@ Free Claude Code proxy (:8082)
|
||||
|
|
||||
| provider-specific request/stream adapter
|
||||
v
|
||||
NIM / OpenRouter / DeepSeek / LM Studio / llama.cpp / Ollama
|
||||
NIM / Kimi / Wafer / OpenRouter / DeepSeek / LM Studio / llama.cpp / Ollama
|
||||
```
|
||||
|
||||
Important pieces:
|
||||
@@ -496,7 +521,7 @@ Important pieces:
|
||||
- FastAPI exposes Anthropic-compatible routes such as `/v1/messages`, `/v1/messages/count_tokens`, and `/v1/models`.
|
||||
- Model routing resolves the Claude model name to `MODEL_OPUS`, `MODEL_SONNET`, `MODEL_HAIKU`, or `MODEL`.
|
||||
- NIM uses OpenAI chat streaming translated into Anthropic SSE.
|
||||
- OpenRouter, DeepSeek, LM Studio, llama.cpp, and Ollama use Anthropic Messages style transports.
|
||||
- Wafer, OpenRouter, DeepSeek, LM Studio, llama.cpp, and Ollama use Anthropic Messages style transports.
|
||||
- The proxy normalizes thinking blocks, tool calls, token usage metadata, and provider errors into the shape Claude Code expects.
|
||||
- Request optimizations answer trivial Claude Code probes locally to save latency and quota.
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ TransportType = Literal["openai_chat", "anthropic_messages"]
|
||||
# Default upstream base URLs (also re-exported via :mod:`providers.defaults`)
|
||||
NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
|
||||
KIMI_DEFAULT_BASE = "https://api.moonshot.ai/v1"
|
||||
WAFER_DEFAULT_BASE = "https://pass.wafer.ai/v1"
|
||||
# DeepSeek Anthropic-compatible Messages API (not OpenAI ``/v1`` chat completions).
|
||||
DEEPSEEK_ANTHROPIC_DEFAULT_BASE = "https://api.deepseek.com/anthropic"
|
||||
# Historical export name: DeepSeek upstream is the native Anthropic path above.
|
||||
@@ -113,6 +114,16 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
|
||||
proxy_attr="kimi_proxy",
|
||||
capabilities=("chat", "streaming", "tools"),
|
||||
),
|
||||
"wafer": ProviderDescriptor(
|
||||
provider_id="wafer",
|
||||
transport_type="anthropic_messages",
|
||||
credential_env="WAFER_API_KEY",
|
||||
credential_url="https://www.wafer.ai/pass",
|
||||
credential_attr="wafer_api_key",
|
||||
default_base_url=WAFER_DEFAULT_BASE,
|
||||
proxy_attr="wafer_proxy",
|
||||
capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"),
|
||||
),
|
||||
}
|
||||
|
||||
# Order matches docs / historical error text; must match PROVIDER_CATALOG keys.
|
||||
|
||||
@@ -115,6 +115,9 @@ class Settings(BaseSettings):
|
||||
# ==================== Kimi Config ====================
|
||||
kimi_api_key: str = Field(default="", validation_alias="KIMI_API_KEY")
|
||||
|
||||
# ==================== Wafer Config ====================
|
||||
wafer_api_key: str = Field(default="", validation_alias="WAFER_API_KEY")
|
||||
|
||||
# ==================== Messaging Platform Selection ====================
|
||||
# Valid: "telegram" | "discord" | "none"
|
||||
messaging_platform: str = Field(
|
||||
@@ -165,6 +168,7 @@ class Settings(BaseSettings):
|
||||
lmstudio_proxy: str = Field(default="", validation_alias="LMSTUDIO_PROXY")
|
||||
llamacpp_proxy: str = Field(default="", validation_alias="LLAMACPP_PROXY")
|
||||
kimi_proxy: str = Field(default="", validation_alias="KIMI_PROXY")
|
||||
wafer_proxy: str = Field(default="", validation_alias="WAFER_PROXY")
|
||||
|
||||
# ==================== Provider Rate Limiting ====================
|
||||
provider_rate_limit: int = Field(default=40, validation_alias="PROVIDER_RATE_LIMIT")
|
||||
|
||||
@@ -99,7 +99,22 @@ def _serialize_value(value: Any) -> Any:
|
||||
def _dump_request_fields(request_data: Any) -> dict[str, Any]:
|
||||
"""Extract the public request fields (OpenRouter-style explicit field list)."""
|
||||
if isinstance(request_data, BaseModel):
|
||||
return request_data.model_dump(exclude_none=True)
|
||||
raw = request_data.model_dump(exclude_none=True)
|
||||
return {
|
||||
field: raw[field]
|
||||
for field in _REQUEST_FIELDS
|
||||
if field in raw and raw[field] is not None
|
||||
}
|
||||
|
||||
dump = getattr(request_data, "model_dump", None)
|
||||
if callable(dump):
|
||||
raw = dump(exclude_none=True)
|
||||
if isinstance(raw, dict):
|
||||
return {
|
||||
field: raw[field]
|
||||
for field in _REQUEST_FIELDS
|
||||
if field in raw and raw[field] is not None
|
||||
}
|
||||
|
||||
dumped: dict[str, Any] = {}
|
||||
for field in _REQUEST_FIELDS:
|
||||
@@ -204,7 +219,7 @@ def build_base_native_anthropic_request_body(
|
||||
thinking_enabled: bool,
|
||||
) -> dict[str, Any]:
|
||||
"""Serialize a Pydantic messages request to a generic native Anthropic body."""
|
||||
body = request.model_dump(exclude_none=True)
|
||||
body = dump_raw_messages_request(request)
|
||||
|
||||
body.pop("extra_body", None)
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ from config.provider_catalog import (
|
||||
NVIDIA_NIM_DEFAULT_BASE,
|
||||
OLLAMA_DEFAULT_BASE,
|
||||
OPENROUTER_DEFAULT_BASE,
|
||||
WAFER_DEFAULT_BASE,
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
@@ -20,4 +21,5 @@ __all__ = (
|
||||
"NVIDIA_NIM_DEFAULT_BASE",
|
||||
"OLLAMA_DEFAULT_BASE",
|
||||
"OPENROUTER_DEFAULT_BASE",
|
||||
"WAFER_DEFAULT_BASE",
|
||||
)
|
||||
|
||||
@@ -74,6 +74,12 @@ def _create_kimi(config: ProviderConfig, _settings: Settings) -> BaseProvider:
|
||||
return KimiProvider(config)
|
||||
|
||||
|
||||
def _create_wafer(config: ProviderConfig, _settings: Settings) -> BaseProvider:
|
||||
from providers.wafer import WaferProvider
|
||||
|
||||
return WaferProvider(config)
|
||||
|
||||
|
||||
PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
|
||||
"nvidia_nim": _create_nvidia_nim,
|
||||
"open_router": _create_open_router,
|
||||
@@ -82,6 +88,7 @@ PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
|
||||
"llamacpp": _create_llamacpp,
|
||||
"ollama": _create_ollama,
|
||||
"kimi": _create_kimi,
|
||||
"wafer": _create_wafer,
|
||||
}
|
||||
|
||||
if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
"""Wafer provider exports."""
|
||||
|
||||
from providers.defaults import WAFER_DEFAULT_BASE
|
||||
|
||||
from .client import WaferProvider
|
||||
|
||||
__all__ = [
|
||||
"WAFER_DEFAULT_BASE",
|
||||
"WaferProvider",
|
||||
]
|
||||
@@ -0,0 +1,40 @@
|
||||
"""Wafer provider implementation (native Anthropic-compatible Messages)."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from providers.anthropic_messages import AnthropicMessagesTransport
|
||||
from providers.base import ProviderConfig
|
||||
from providers.defaults import WAFER_DEFAULT_BASE
|
||||
|
||||
_ANTHROPIC_VERSION = "2023-06-01"
|
||||
|
||||
|
||||
class WaferProvider(AnthropicMessagesTransport):
|
||||
"""Wafer using ``https://pass.wafer.ai/v1/messages``."""
|
||||
|
||||
def __init__(self, config: ProviderConfig):
|
||||
super().__init__(
|
||||
config,
|
||||
provider_name="WAFER",
|
||||
default_base_url=WAFER_DEFAULT_BASE,
|
||||
)
|
||||
|
||||
def _build_request_body(
|
||||
self, request: Any, thinking_enabled: bool | None = None
|
||||
) -> dict:
|
||||
"""Build native body; Wafer rejects omitted thinking as ``reasoning_effort=none``."""
|
||||
body = super()._build_request_body(request, thinking_enabled=thinking_enabled)
|
||||
if "thinking" not in body:
|
||||
body["thinking"] = {"type": "enabled"}
|
||||
return body
|
||||
|
||||
def _request_headers(self) -> dict[str, str]:
|
||||
return {
|
||||
"Accept": "text/event-stream",
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"anthropic-version": _ANTHROPIC_VERSION,
|
||||
}
|
||||
|
||||
def _model_list_headers(self) -> dict[str, str]:
|
||||
return {"Authorization": f"Bearer {self._api_key}"}
|
||||
@@ -44,6 +44,7 @@ PROVIDER_SMOKE_DEFAULT_MODELS: dict[str, str] = {
|
||||
"lmstudio": "lmstudio/local-model",
|
||||
"llamacpp": "llamacpp/local-model",
|
||||
"ollama": "ollama/llama3.1",
|
||||
"wafer": "wafer/DeepSeek-V4-Pro",
|
||||
}
|
||||
|
||||
|
||||
@@ -185,6 +186,8 @@ class SmokeConfig:
|
||||
return bool(self.settings.llamacpp_base_url.strip())
|
||||
if provider == "ollama":
|
||||
return bool(self.settings.ollama_base_url.strip())
|
||||
if provider == "wafer":
|
||||
return bool(self.settings.wafer_api_key.strip())
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@@ -107,6 +107,24 @@ def test_build_base_native_body_includes_cache_control() -> None:
|
||||
assert body["tools"][0]["cache_control"] == {"type": "ephemeral"}
|
||||
|
||||
|
||||
def test_build_base_native_body_drops_unknown_top_level_client_hints() -> None:
|
||||
raw = {
|
||||
"model": "m",
|
||||
"max_tokens": 20,
|
||||
"messages": [{"role": "user", "content": "x"}],
|
||||
"reasoning_effort": "none",
|
||||
"unknown_client_hint": {"mode": "local"},
|
||||
}
|
||||
req = MessagesRequest.model_validate(raw)
|
||||
body = build_base_native_anthropic_request_body(
|
||||
req,
|
||||
default_max_tokens=ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
|
||||
thinking_enabled=False,
|
||||
)
|
||||
assert "reasoning_effort" not in body
|
||||
assert "unknown_client_hint" not in body
|
||||
|
||||
|
||||
def test_pydantic_discriminator_still_distinguishes_blocks() -> None:
|
||||
m = Message.model_validate(
|
||||
{
|
||||
|
||||
@@ -22,6 +22,7 @@ from providers.nvidia_nim import NvidiaNimProvider
|
||||
from providers.ollama import OllamaProvider
|
||||
from providers.open_router import OpenRouterProvider
|
||||
from providers.registry import ProviderRegistry
|
||||
from providers.wafer import WaferProvider
|
||||
|
||||
|
||||
def _make_mock_settings(**overrides):
|
||||
@@ -35,8 +36,13 @@ def _make_mock_settings(**overrides):
|
||||
mock.provider_max_concurrency = 5
|
||||
mock.open_router_api_key = "test_openrouter_key"
|
||||
mock.deepseek_api_key = "test_deepseek_key"
|
||||
mock.wafer_api_key = "test_wafer_key"
|
||||
mock.lm_studio_base_url = "http://localhost:1234/v1"
|
||||
mock.ollama_base_url = "http://localhost:11434"
|
||||
mock.lmstudio_proxy = ""
|
||||
mock.llamacpp_proxy = ""
|
||||
mock.kimi_proxy = ""
|
||||
mock.wafer_proxy = ""
|
||||
mock.nim = NimSettings()
|
||||
mock.http_read_timeout = 300.0
|
||||
mock.http_write_timeout = 10.0
|
||||
@@ -188,6 +194,19 @@ async def test_get_provider_deepseek_passes_enable_model_thinking():
|
||||
assert provider._config.enable_thinking is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_provider_wafer():
|
||||
"""Test that provider_type=wafer returns WaferProvider."""
|
||||
with patch("api.dependencies.get_settings") as mock_settings:
|
||||
mock_settings.return_value = _make_mock_settings(provider_type="wafer")
|
||||
|
||||
provider = get_provider()
|
||||
|
||||
assert isinstance(provider, WaferProvider)
|
||||
assert provider._base_url == "https://pass.wafer.ai/v1"
|
||||
assert provider._api_key == "test_wafer_key"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_provider_lmstudio_uses_lm_studio_base_url():
|
||||
"""LM Studio provider uses lm_studio_base_url from settings."""
|
||||
@@ -324,6 +343,23 @@ async def test_get_provider_deepseek_missing_api_key():
|
||||
assert "platform.deepseek.com" in exc_info.value.detail
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_provider_wafer_missing_api_key():
|
||||
"""Wafer with empty API key raises HTTPException 503."""
|
||||
with patch("api.dependencies.get_settings") as mock_settings:
|
||||
mock_settings.return_value = _make_mock_settings(
|
||||
provider_type="wafer",
|
||||
wafer_api_key="",
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
get_provider()
|
||||
|
||||
assert exc_info.value.status_code == 503
|
||||
assert "WAFER_API_KEY" in exc_info.value.detail
|
||||
assert "wafer.ai" in exc_info.value.detail
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_provider_unknown_type():
|
||||
"""Unknown ``provider_type`` raises :exc:`~providers.exceptions.UnknownProviderTypeError`."""
|
||||
|
||||
@@ -123,6 +123,31 @@ def test_models_list_uses_cached_metadata_for_configured_openrouter_refs():
|
||||
assert ids[0] == "claude-3-freecc-no-thinking/open_router/plain-model"
|
||||
|
||||
|
||||
def test_models_list_includes_cached_wafer_models():
|
||||
app = create_app(lifespan_enabled=False)
|
||||
settings = _settings(
|
||||
model="wafer/DeepSeek-V4-Pro",
|
||||
model_opus=None,
|
||||
model_haiku=None,
|
||||
)
|
||||
registry = ProviderRegistry()
|
||||
registry.cache_model_ids("wafer", {"DeepSeek-V4-Pro", "MiniMax-M2.7"})
|
||||
app.state.provider_registry = registry
|
||||
app.dependency_overrides[get_settings] = lambda: settings
|
||||
|
||||
try:
|
||||
response = TestClient(app).get("/v1/models")
|
||||
finally:
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
assert response.status_code == 200
|
||||
ids = [item["id"] for item in response.json()["data"]]
|
||||
assert "anthropic/wafer/DeepSeek-V4-Pro" in ids
|
||||
assert "claude-3-freecc-no-thinking/wafer/DeepSeek-V4-Pro" in ids
|
||||
assert "anthropic/wafer/MiniMax-M2.7" in ids
|
||||
assert "claude-3-freecc-no-thinking/wafer/MiniMax-M2.7" in ids
|
||||
|
||||
|
||||
def test_models_list_works_without_provider_registry():
|
||||
app = create_app(lifespan_enabled=False)
|
||||
settings = _settings()
|
||||
|
||||
@@ -109,6 +109,21 @@ def test_model_router_routes_prefixed_provider_model_directly(settings):
|
||||
assert routed.resolved.provider_model_ref == "deepseek/deepseek-chat"
|
||||
|
||||
|
||||
def test_model_router_routes_wafer_provider_model_directly(settings):
|
||||
routed = ModelRouter(settings).resolve_messages_request(
|
||||
MessagesRequest(
|
||||
model="wafer/DeepSeek-V4-Pro",
|
||||
max_tokens=100,
|
||||
messages=[Message(role="user", content="hello")],
|
||||
)
|
||||
)
|
||||
|
||||
assert routed.request.model == "DeepSeek-V4-Pro"
|
||||
assert routed.resolved.provider_id == "wafer"
|
||||
assert routed.resolved.provider_model == "DeepSeek-V4-Pro"
|
||||
assert routed.resolved.provider_model_ref == "wafer/DeepSeek-V4-Pro"
|
||||
|
||||
|
||||
def test_model_router_routes_gateway_encoded_provider_model_directly(settings):
|
||||
routed = ModelRouter(settings).resolve_messages_request(
|
||||
MessagesRequest(
|
||||
|
||||
@@ -158,6 +158,14 @@ class TestSettings:
|
||||
settings = Settings()
|
||||
assert settings.enable_model_thinking is False
|
||||
|
||||
def test_wafer_api_key_from_env(self, monkeypatch):
|
||||
"""WAFER_API_KEY env var is loaded into settings."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("WAFER_API_KEY", "wafer-key")
|
||||
settings = Settings()
|
||||
assert settings.wafer_api_key == "wafer-key"
|
||||
|
||||
def test_per_model_thinking_from_env(self, monkeypatch):
|
||||
"""Per-model thinking env vars are loaded into settings."""
|
||||
from config.settings import Settings
|
||||
@@ -507,6 +515,7 @@ class TestPerModelMapping:
|
||||
"open_router/anthropic/claude-3-haiku",
|
||||
),
|
||||
({"MODEL": "deepseek/deepseek-chat"}, "deepseek/deepseek-chat", None),
|
||||
({"MODEL": "wafer/DeepSeek-V4-Pro"}, "wafer/DeepSeek-V4-Pro", None),
|
||||
({"MODEL": "lmstudio/qwen2.5-7b"}, "lmstudio/qwen2.5-7b", None),
|
||||
({"MODEL": "llamacpp/local-model"}, "llamacpp/local-model", None),
|
||||
({"MODEL": "ollama/llama3.1"}, "ollama/llama3.1", None),
|
||||
@@ -647,6 +656,7 @@ class TestPerModelMapping:
|
||||
assert Settings.parse_provider_type("lmstudio/qwen") == "lmstudio"
|
||||
assert Settings.parse_provider_type("llamacpp/model") == "llamacpp"
|
||||
assert Settings.parse_provider_type("ollama/llama3.1") == "ollama"
|
||||
assert Settings.parse_provider_type("wafer/DeepSeek-V4-Pro") == "wafer"
|
||||
|
||||
def test_parse_model_name(self):
|
||||
"""parse_model_name extracts model name from model string."""
|
||||
@@ -657,6 +667,7 @@ class TestPerModelMapping:
|
||||
assert Settings.parse_model_name("lmstudio/qwen") == "qwen"
|
||||
assert Settings.parse_model_name("llamacpp/model") == "model"
|
||||
assert Settings.parse_model_name("ollama/llama3.1") == "llama3.1"
|
||||
assert Settings.parse_model_name("wafer/DeepSeek-V4-Pro") == "DeepSeek-V4-Pro"
|
||||
|
||||
def test_configured_chat_model_refs_collects_unique_models_with_sources(
|
||||
self, monkeypatch
|
||||
|
||||
@@ -11,6 +11,7 @@ from providers.lmstudio import LMStudioProvider
|
||||
from providers.nvidia_nim import NvidiaNimProvider
|
||||
from providers.ollama import OllamaProvider
|
||||
from providers.open_router import OpenRouterProvider
|
||||
from providers.wafer import WaferProvider
|
||||
from smoke.features import FEATURE_INVENTORY, README_FEATURES, feature_ids
|
||||
|
||||
VALID_SOURCE = {"readme", "public_surface"}
|
||||
@@ -73,6 +74,7 @@ def test_provider_and_platform_registries_include_advertised_builtins() -> None:
|
||||
"lmstudio": LMStudioProvider,
|
||||
"llamacpp": LlamaCppProvider,
|
||||
"ollama": OllamaProvider,
|
||||
"wafer": WaferProvider,
|
||||
}
|
||||
for provider_class in provider_classes.values():
|
||||
assert issubclass(provider_class, BaseProvider)
|
||||
|
||||
@@ -20,6 +20,7 @@ def _settings(**overrides):
|
||||
"nvidia_nim_api_key": "",
|
||||
"open_router_api_key": "",
|
||||
"deepseek_api_key": "",
|
||||
"wafer_api_key": "",
|
||||
"lm_studio_base_url": "",
|
||||
"llamacpp_base_url": "",
|
||||
"ollama_base_url": "http://localhost:11434",
|
||||
@@ -83,6 +84,22 @@ def test_provider_smoke_models_cover_configured_providers_independent_of_model_m
|
||||
assert models[0].source == "provider_default"
|
||||
|
||||
|
||||
def test_wafer_provider_configuration_uses_api_key(monkeypatch) -> None:
|
||||
monkeypatch.delenv("FCC_SMOKE_MODEL_WAFER", raising=False)
|
||||
config = _smoke_config(
|
||||
settings=_settings(
|
||||
model="ollama/llama3.1",
|
||||
ollama_base_url="",
|
||||
wafer_api_key="wafer-key",
|
||||
)
|
||||
)
|
||||
|
||||
assert config.has_provider_configuration("wafer")
|
||||
models = config.provider_smoke_models()
|
||||
assert models[0].provider == "wafer"
|
||||
assert models[0].full_model == PROVIDER_SMOKE_DEFAULT_MODELS["wafer"]
|
||||
|
||||
|
||||
def test_provider_smoke_model_override_accepts_model_name_without_prefix(
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
|
||||
@@ -20,6 +20,7 @@ from providers.nvidia_nim import NvidiaNimProvider
|
||||
from providers.ollama import OllamaProvider
|
||||
from providers.open_router import OpenRouterProvider
|
||||
from providers.registry import ProviderRegistry
|
||||
from providers.wafer import WaferProvider
|
||||
|
||||
|
||||
def _settings(
|
||||
@@ -31,6 +32,7 @@ def _settings(
|
||||
nvidia_nim_api_key: str = "",
|
||||
open_router_api_key: str = "",
|
||||
deepseek_api_key: str = "",
|
||||
wafer_api_key: str = "",
|
||||
) -> Settings:
|
||||
return Settings.model_construct(
|
||||
model=model,
|
||||
@@ -40,6 +42,7 @@ def _settings(
|
||||
nvidia_nim_api_key=nvidia_nim_api_key,
|
||||
open_router_api_key=open_router_api_key,
|
||||
deepseek_api_key=deepseek_api_key,
|
||||
wafer_api_key=wafer_api_key,
|
||||
log_api_error_tracebacks=False,
|
||||
)
|
||||
|
||||
@@ -100,6 +103,22 @@ async def test_deepseek_lists_models_from_root_endpoint() -> None:
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_wafer_lists_models_from_default_models_endpoint() -> None:
|
||||
provider = WaferProvider(ProviderConfig(api_key="wafer-key"))
|
||||
with patch.object(
|
||||
provider._client,
|
||||
"get",
|
||||
new_callable=AsyncMock,
|
||||
return_value=_response(200, {"data": [{"id": "DeepSeek-V4-Pro"}]}),
|
||||
) as mock_get:
|
||||
assert await provider.list_model_ids() == frozenset({"DeepSeek-V4-Pro"})
|
||||
|
||||
mock_get.assert_awaited_once_with(
|
||||
"/models", headers={"Authorization": "Bearer wafer-key"}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_openrouter_lists_only_tool_capable_models() -> None:
|
||||
provider = OpenRouterProvider(ProviderConfig(api_key="open-router-key"))
|
||||
|
||||
@@ -18,6 +18,7 @@ from providers.registry import (
|
||||
ProviderRegistry,
|
||||
create_provider,
|
||||
)
|
||||
from providers.wafer import WaferProvider
|
||||
|
||||
|
||||
def _make_settings(**overrides):
|
||||
@@ -27,6 +28,7 @@ def _make_settings(**overrides):
|
||||
mock.nvidia_nim_api_key = "test_key"
|
||||
mock.open_router_api_key = "test_openrouter_key"
|
||||
mock.deepseek_api_key = "test_deepseek_key"
|
||||
mock.wafer_api_key = "test_wafer_key"
|
||||
mock.lm_studio_base_url = "http://localhost:1234/v1"
|
||||
mock.llamacpp_base_url = "http://localhost:8080/v1"
|
||||
mock.ollama_base_url = "http://localhost:11434"
|
||||
@@ -34,6 +36,8 @@ def _make_settings(**overrides):
|
||||
mock.open_router_proxy = ""
|
||||
mock.lmstudio_proxy = ""
|
||||
mock.llamacpp_proxy = ""
|
||||
mock.kimi_proxy = ""
|
||||
mock.wafer_proxy = ""
|
||||
mock.provider_rate_limit = 40
|
||||
mock.provider_rate_window = 60
|
||||
mock.provider_max_concurrency = 5
|
||||
@@ -94,6 +98,7 @@ def test_create_provider_instantiates_each_builtin():
|
||||
"lmstudio": LMStudioProvider,
|
||||
"llamacpp": LlamaCppProvider,
|
||||
"ollama": OllamaProvider,
|
||||
"wafer": WaferProvider,
|
||||
}
|
||||
|
||||
with (
|
||||
|
||||
@@ -0,0 +1,256 @@
|
||||
"""Tests for Wafer native Anthropic Messages provider."""
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from api.models.anthropic import Message, MessagesRequest, Tool
|
||||
from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||
from providers.base import ProviderConfig
|
||||
from providers.wafer import WAFER_DEFAULT_BASE, WaferProvider
|
||||
from tests.stream_contract import assert_canonical_stream_error_envelope
|
||||
|
||||
|
||||
class FakeResponse:
|
||||
def __init__(self, *, status_code=200, lines=None, text=""):
|
||||
self.status_code = status_code
|
||||
self._lines = lines or []
|
||||
self._text = text
|
||||
self.is_closed = False
|
||||
self.headers = httpx.Headers()
|
||||
self.request = httpx.Request("POST", "https://pass.wafer.ai/v1/messages")
|
||||
|
||||
async def aiter_lines(self):
|
||||
for line in self._lines:
|
||||
yield line
|
||||
|
||||
async def aclose(self):
|
||||
self.is_closed = True
|
||||
|
||||
async def aiter_bytes(self, chunk_size: int = 65_536):
|
||||
data = self._text.encode("utf-8")
|
||||
for offset in range(0, len(data), chunk_size):
|
||||
yield data[offset : offset + chunk_size]
|
||||
|
||||
def raise_for_status(self):
|
||||
response = httpx.Response(
|
||||
self.status_code,
|
||||
request=self.request,
|
||||
text=self._text,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wafer_config():
|
||||
return ProviderConfig(
|
||||
api_key="test-wafer-key",
|
||||
base_url=WAFER_DEFAULT_BASE,
|
||||
rate_limit=10,
|
||||
rate_window=60,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_rate_limiter():
|
||||
@asynccontextmanager
|
||||
async def _slot():
|
||||
yield
|
||||
|
||||
with patch("providers.anthropic_messages.GlobalRateLimiter") as mock:
|
||||
instance = mock.get_scoped_instance.return_value
|
||||
|
||||
async def _passthrough(fn, *args, **kwargs):
|
||||
return await fn(*args, **kwargs)
|
||||
|
||||
instance.execute_with_retry = AsyncMock(side_effect=_passthrough)
|
||||
instance.concurrency_slot.side_effect = _slot
|
||||
yield instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wafer_provider(wafer_config):
|
||||
return WaferProvider(wafer_config)
|
||||
|
||||
|
||||
def test_default_base_url():
|
||||
assert WAFER_DEFAULT_BASE == "https://pass.wafer.ai/v1"
|
||||
|
||||
|
||||
def test_init_uses_default_base_url_and_strips_trailing_slash():
|
||||
config = ProviderConfig(api_key="test-wafer-key", base_url=f"{WAFER_DEFAULT_BASE}/")
|
||||
with patch("httpx.AsyncClient"):
|
||||
provider = WaferProvider(config)
|
||||
|
||||
assert provider._api_key == "test-wafer-key"
|
||||
assert provider._base_url == WAFER_DEFAULT_BASE
|
||||
assert provider._provider_name == "WAFER"
|
||||
|
||||
|
||||
def test_request_headers_use_bearer_auth_not_x_api_key(wafer_provider):
|
||||
headers = wafer_provider._request_headers()
|
||||
|
||||
assert headers["Authorization"] == "Bearer test-wafer-key"
|
||||
assert headers["Accept"] == "text/event-stream"
|
||||
assert headers["Content-Type"] == "application/json"
|
||||
assert headers["anthropic-version"] == "2023-06-01"
|
||||
assert "x-api-key" not in headers
|
||||
assert wafer_provider._model_list_headers() == {
|
||||
"Authorization": "Bearer test-wafer-key"
|
||||
}
|
||||
|
||||
|
||||
def test_build_request_body_native_shape_and_defaults(wafer_provider):
|
||||
request = MessagesRequest.model_validate(
|
||||
{
|
||||
"model": "DeepSeek-V4-Pro",
|
||||
"messages": [Message(role="user", content="Hello")],
|
||||
"tools": [
|
||||
Tool(
|
||||
name="echo",
|
||||
description="Echo input",
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
)
|
||||
],
|
||||
"thinking": {"type": "enabled", "budget_tokens": 2048},
|
||||
}
|
||||
)
|
||||
|
||||
body = wafer_provider._build_request_body(request)
|
||||
|
||||
assert body["model"] == "DeepSeek-V4-Pro"
|
||||
assert body["messages"][0]["role"] == "user"
|
||||
assert body["tools"][0]["name"] == "echo"
|
||||
assert body["thinking"] == {"type": "enabled", "budget_tokens": 2048}
|
||||
assert body["max_tokens"] == ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||
assert body["stream"] is True
|
||||
|
||||
|
||||
def test_build_request_body_drops_reasoning_effort_none(wafer_provider):
|
||||
request = MessagesRequest.model_validate(
|
||||
{
|
||||
"model": "DeepSeek-V4-Pro",
|
||||
"messages": [{"role": "user", "content": "Explore the codebase."}],
|
||||
"reasoning_effort": "none",
|
||||
}
|
||||
)
|
||||
|
||||
body = wafer_provider._build_request_body(request)
|
||||
|
||||
assert "reasoning_effort" not in body
|
||||
assert body["thinking"] == {"type": "enabled"}
|
||||
|
||||
|
||||
def test_build_request_body_keeps_upstream_thinking_enabled_when_client_disables_it(
|
||||
wafer_provider,
|
||||
):
|
||||
request = MessagesRequest.model_validate(
|
||||
{
|
||||
"model": "DeepSeek-V4-Pro",
|
||||
"messages": [{"role": "user", "content": "Explore the codebase."}],
|
||||
"thinking": {"type": "disabled"},
|
||||
}
|
||||
)
|
||||
|
||||
body = wafer_provider._build_request_body(request, thinking_enabled=False)
|
||||
|
||||
assert body["thinking"] == {"type": "enabled"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lists_models_from_openai_compatible_models_endpoint(wafer_provider):
|
||||
with patch.object(
|
||||
wafer_provider._client,
|
||||
"get",
|
||||
new_callable=AsyncMock,
|
||||
return_value=httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "DeepSeek-V4-Pro", "object": "model"},
|
||||
{"id": "MiniMax-M2.7", "object": "model"},
|
||||
],
|
||||
},
|
||||
request=httpx.Request("GET", "https://pass.wafer.ai/v1/models"),
|
||||
),
|
||||
) as mock_get:
|
||||
assert await wafer_provider.list_model_ids() == frozenset(
|
||||
{"DeepSeek-V4-Pro", "MiniMax-M2.7"}
|
||||
)
|
||||
|
||||
mock_get.assert_awaited_once_with(
|
||||
"/models", headers={"Authorization": "Bearer test-wafer-key"}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_uses_post_messages_path(wafer_provider):
|
||||
request = MessagesRequest(
|
||||
model="MiniMax-M2.7",
|
||||
messages=[Message(role="user", content="hi")],
|
||||
)
|
||||
response = FakeResponse(
|
||||
lines=[
|
||||
"event: message_start",
|
||||
'data: {"type":"message_start"}',
|
||||
"",
|
||||
]
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
wafer_provider._client, "build_request", return_value=MagicMock()
|
||||
) as mock_build,
|
||||
patch.object(
|
||||
wafer_provider._client,
|
||||
"send",
|
||||
new_callable=AsyncMock,
|
||||
return_value=response,
|
||||
),
|
||||
):
|
||||
events = [event async for event in wafer_provider.stream_response(request)]
|
||||
|
||||
assert events == [
|
||||
"event: message_start\n",
|
||||
'data: {"type":"message_start"}\n',
|
||||
"\n",
|
||||
]
|
||||
assert response.is_closed
|
||||
assert mock_build.call_args.args[:2] == ("POST", "/messages")
|
||||
assert mock_build.call_args.kwargs["headers"]["Authorization"] == (
|
||||
"Bearer test-wafer-key"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_non_200_maps_to_anthropic_error_event(wafer_provider):
|
||||
request = MessagesRequest(
|
||||
model="GLM-5.1",
|
||||
messages=[Message(role="user", content="hi")],
|
||||
)
|
||||
response = FakeResponse(status_code=500, text="Internal Server Error")
|
||||
|
||||
with (
|
||||
patch.object(wafer_provider._client, "build_request", return_value=MagicMock()),
|
||||
patch.object(
|
||||
wafer_provider._client,
|
||||
"send",
|
||||
new_callable=AsyncMock,
|
||||
return_value=response,
|
||||
),
|
||||
):
|
||||
events = [
|
||||
event
|
||||
async for event in wafer_provider.stream_response(
|
||||
request, request_id="REQ_WAFER"
|
||||
)
|
||||
]
|
||||
|
||||
assert response.is_closed
|
||||
assert_canonical_stream_error_envelope(
|
||||
events, user_message_substr="Provider API request failed"
|
||||
)
|
||||
assert "REQ_WAFER" in "".join(events)
|
||||
Reference in New Issue
Block a user