feat: add Wafer provider

This commit is contained in:
Alishahryar1
2026-05-08 23:43:16 -07:00
parent 8687fb3cbb
commit 5294661aa4
20 changed files with 535 additions and 8 deletions
+7 -1
View File
@@ -14,6 +14,10 @@ DEEPSEEK_API_KEY=""
KIMI_API_KEY=""
# Wafer Config (uses native Anthropic Messages at pass.wafer.ai/v1/messages)
WAFER_API_KEY=""
# LM Studio Config (local provider, no API key required)
LM_STUDIO_BASE_URL="http://localhost:1234/v1"
@@ -28,7 +32,7 @@ OLLAMA_BASE_URL="http://localhost:11434"
# All Claude model requests are mapped to these models, plain model is fallback
# Format: provider_type/model/name
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" | "ollama" | "kimi"
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" | "ollama" | "kimi" | "wafer"
MODEL_OPUS=
MODEL_SONNET=
MODEL_HAIKU=
@@ -44,6 +48,7 @@ FCC_SMOKE_MODEL_LMSTUDIO=
FCC_SMOKE_MODEL_LLAMACPP=
FCC_SMOKE_MODEL_OLLAMA=
FCC_SMOKE_MODEL_KIMI=
FCC_SMOKE_MODEL_WAFER=
# Thinking output
@@ -62,6 +67,7 @@ OPENROUTER_PROXY=""
LMSTUDIO_PROXY=""
LLAMACPP_PROXY=""
KIMI_PROXY=""
WAFER_PROXY=""
PROVIDER_RATE_LIMIT=1
PROVIDER_RATE_WINDOW=3
+30 -5
View File
@@ -12,7 +12,7 @@ Use Claude Code CLI, VS Code, JetBrains ACP, or chat bots through your own Anthr
[![Code style: Ruff](https://img.shields.io/badge/code%20formatting-ruff-f5a623.svg?style=for-the-badge)](https://github.com/astral-sh/ruff)
[![Logging: Loguru](https://img.shields.io/badge/logging-loguru-4ecdc4.svg?style=for-the-badge)](https://github.com/Delgan/loguru)
Free Claude Code routes Anthropic Messages API traffic from Claude Code to NVIDIA NIM, OpenRouter, DeepSeek, LM Studio, llama.cpp, or Ollama. It keeps Claude Code's client-side protocol stable while letting you choose free, paid, or local models.
Free Claude Code routes Anthropic Messages API traffic from Claude Code to NVIDIA NIM, Kimi, Wafer, OpenRouter, DeepSeek, LM Studio, llama.cpp, or Ollama. It keeps Claude Code's client-side protocol stable while letting you choose free, paid, or local models.
[Quick Start](#quick-start) · [Providers](#choose-a-provider) · [Clients](#connect-claude-code) · [Troubleshooting](#troubleshooting) · [Development](#development)
@@ -37,7 +37,7 @@ Free Claude Code routes Anthropic Messages API traffic from Claude Code to NVIDI
## What You Get
- Drop-in proxy for Claude Code's Anthropic API calls.
- Six provider backends: NVIDIA NIM, OpenRouter, DeepSeek, LM Studio, llama.cpp, and Ollama.
- Eight provider backends: NVIDIA NIM, Kimi, Wafer, OpenRouter, DeepSeek, LM Studio, llama.cpp, and Ollama.
- Per-model routing: send Opus, Sonnet, Haiku, and fallback traffic to different providers.
- Native Claude Code `/model` picker support through the proxy's `/v1/models` endpoint (Claude Code must opt in to Gateway model discovery; see [Model Picker](#model-picker)).
- Streaming, tool use, reasoning/thinking block handling, and local request optimizations.
@@ -136,6 +136,7 @@ provider_id/model/name
| --- | --- | --- | --- | --- |
| <img src="https://cdn.simpleicons.org/nvidia/76B900" alt="" width="18" height="18"> NVIDIA NIM | `nvidia_nim/...` | OpenAI chat translation | `NVIDIA_NIM_API_KEY` | `https://integrate.api.nvidia.com/v1` |
| <img src="https://raw.githubusercontent.com/lobehub/lobe-icons/refs/heads/master/packages/static-avatar/avatars/kimi.webp" alt="" width="18" height="18"> Kimi | `kimi/...` | OpenAI chat translation | `KIMI_API_KEY` | `https://api.moonshot.ai/v1` |
| Wafer | `wafer/...` | Anthropic Messages | `WAFER_API_KEY` | `https://pass.wafer.ai/v1` |
| <img src="https://cdn.simpleicons.org/openrouter/6C47FF" alt="" width="18" height="18"> OpenRouter | `open_router/...` | Anthropic Messages | `OPENROUTER_API_KEY` | `https://openrouter.ai/api/v1` |
| <img src="https://cdn.simpleicons.org/deepseek/4D6BFF" alt="" width="18" height="18"> DeepSeek | `deepseek/...` | Anthropic Messages | `DEEPSEEK_API_KEY` | `https://api.deepseek.com/anthropic` |
| <img src="https://github.com/lmstudio-ai.png?size=64" alt="" width="18" height="18"> LM Studio | `lmstudio/...` | Anthropic Messages | none | `http://localhost:1234/v1` |
@@ -178,6 +179,27 @@ Browse [all models](https://openrouter.ai/models) or [free models](https://openr
</details>
<details>
<summary><b>Wafer</b></summary>
Add your Wafer key and choose a model returned by Wafer Pass:
```dotenv
WAFER_API_KEY="your-wafer-key"
MODEL="wafer/DeepSeek-V4-Pro"
```
Popular examples:
- `wafer/DeepSeek-V4-Pro`
- `wafer/MiniMax-M2.7`
- `wafer/Qwen3.5-397B-A17B`
- `wafer/GLM-5.1`
This provider uses Wafer's Anthropic-compatible endpoint at `https://pass.wafer.ai/v1/messages`.
</details>
<details>
<summary><img src="https://cdn.simpleicons.org/deepseek/4D6BFF" alt="" width="18" height="18"> <b>DeepSeek</b></summary>
@@ -249,11 +271,12 @@ Each tier can use a different provider:
```dotenv
NVIDIA_NIM_API_KEY="nvapi-your-key"
OPENROUTER_API_KEY="sk-or-your-key"
WAFER_API_KEY="your-wafer-key"
MODEL_OPUS="nvidia_nim/moonshotai/kimi-k2.5"
MODEL_SONNET="open_router/deepseek/deepseek-r1-0528:free"
MODEL_HAIKU="lmstudio/unsloth/GLM-4.7-Flash-GGUF"
MODEL="nvidia_nim/z-ai/glm4.7"
MODEL="wafer/DeepSeek-V4-Pro"
```
</details>
@@ -393,6 +416,7 @@ Blank per-tier values inherit the fallback. Blank thinking overrides inherit `EN
NVIDIA_NIM_API_KEY=""
OPENROUTER_API_KEY=""
DEEPSEEK_API_KEY=""
WAFER_API_KEY=""
LM_STUDIO_BASE_URL="http://localhost:1234/v1"
LLAMACPP_BASE_URL="http://localhost:8080/v1"
OLLAMA_BASE_URL="http://localhost:11434"
@@ -405,6 +429,7 @@ NVIDIA_NIM_PROXY=""
OPENROUTER_PROXY=""
LMSTUDIO_PROXY=""
LLAMACPP_PROXY=""
WAFER_PROXY=""
```
### Rate Limits And Timeouts
@@ -488,7 +513,7 @@ Free Claude Code proxy (:8082)
|
| provider-specific request/stream adapter
v
NIM / OpenRouter / DeepSeek / LM Studio / llama.cpp / Ollama
NIM / Kimi / Wafer / OpenRouter / DeepSeek / LM Studio / llama.cpp / Ollama
```
Important pieces:
@@ -496,7 +521,7 @@ Important pieces:
- FastAPI exposes Anthropic-compatible routes such as `/v1/messages`, `/v1/messages/count_tokens`, and `/v1/models`.
- Model routing resolves the Claude model name to `MODEL_OPUS`, `MODEL_SONNET`, `MODEL_HAIKU`, or `MODEL`.
- NIM uses OpenAI chat streaming translated into Anthropic SSE.
- OpenRouter, DeepSeek, LM Studio, llama.cpp, and Ollama use Anthropic Messages style transports.
- Wafer, OpenRouter, DeepSeek, LM Studio, llama.cpp, and Ollama use Anthropic Messages style transports.
- The proxy normalizes thinking blocks, tool calls, token usage metadata, and provider errors into the shape Claude Code expects.
- Request optimizations answer trivial Claude Code probes locally to save latency and quota.
+11
View File
@@ -14,6 +14,7 @@ TransportType = Literal["openai_chat", "anthropic_messages"]
# Default upstream base URLs (also re-exported via :mod:`providers.defaults`)
NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
KIMI_DEFAULT_BASE = "https://api.moonshot.ai/v1"
WAFER_DEFAULT_BASE = "https://pass.wafer.ai/v1"
# DeepSeek Anthropic-compatible Messages API (not OpenAI ``/v1`` chat completions).
DEEPSEEK_ANTHROPIC_DEFAULT_BASE = "https://api.deepseek.com/anthropic"
# Historical export name: DeepSeek upstream is the native Anthropic path above.
@@ -113,6 +114,16 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
proxy_attr="kimi_proxy",
capabilities=("chat", "streaming", "tools"),
),
"wafer": ProviderDescriptor(
provider_id="wafer",
transport_type="anthropic_messages",
credential_env="WAFER_API_KEY",
credential_url="https://www.wafer.ai/pass",
credential_attr="wafer_api_key",
default_base_url=WAFER_DEFAULT_BASE,
proxy_attr="wafer_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "native_anthropic"),
),
}
# Order matches docs / historical error text; must match PROVIDER_CATALOG keys.
+4
View File
@@ -115,6 +115,9 @@ class Settings(BaseSettings):
# ==================== Kimi Config ====================
kimi_api_key: str = Field(default="", validation_alias="KIMI_API_KEY")
# ==================== Wafer Config ====================
wafer_api_key: str = Field(default="", validation_alias="WAFER_API_KEY")
# ==================== Messaging Platform Selection ====================
# Valid: "telegram" | "discord" | "none"
messaging_platform: str = Field(
@@ -165,6 +168,7 @@ class Settings(BaseSettings):
lmstudio_proxy: str = Field(default="", validation_alias="LMSTUDIO_PROXY")
llamacpp_proxy: str = Field(default="", validation_alias="LLAMACPP_PROXY")
kimi_proxy: str = Field(default="", validation_alias="KIMI_PROXY")
wafer_proxy: str = Field(default="", validation_alias="WAFER_PROXY")
# ==================== Provider Rate Limiting ====================
provider_rate_limit: int = Field(default=40, validation_alias="PROVIDER_RATE_LIMIT")
+17 -2
View File
@@ -99,7 +99,22 @@ def _serialize_value(value: Any) -> Any:
def _dump_request_fields(request_data: Any) -> dict[str, Any]:
"""Extract the public request fields (OpenRouter-style explicit field list)."""
if isinstance(request_data, BaseModel):
return request_data.model_dump(exclude_none=True)
raw = request_data.model_dump(exclude_none=True)
return {
field: raw[field]
for field in _REQUEST_FIELDS
if field in raw and raw[field] is not None
}
dump = getattr(request_data, "model_dump", None)
if callable(dump):
raw = dump(exclude_none=True)
if isinstance(raw, dict):
return {
field: raw[field]
for field in _REQUEST_FIELDS
if field in raw and raw[field] is not None
}
dumped: dict[str, Any] = {}
for field in _REQUEST_FIELDS:
@@ -204,7 +219,7 @@ def build_base_native_anthropic_request_body(
thinking_enabled: bool,
) -> dict[str, Any]:
"""Serialize a Pydantic messages request to a generic native Anthropic body."""
body = request.model_dump(exclude_none=True)
body = dump_raw_messages_request(request)
body.pop("extra_body", None)
+2
View File
@@ -9,6 +9,7 @@ from config.provider_catalog import (
NVIDIA_NIM_DEFAULT_BASE,
OLLAMA_DEFAULT_BASE,
OPENROUTER_DEFAULT_BASE,
WAFER_DEFAULT_BASE,
)
__all__ = (
@@ -20,4 +21,5 @@ __all__ = (
"NVIDIA_NIM_DEFAULT_BASE",
"OLLAMA_DEFAULT_BASE",
"OPENROUTER_DEFAULT_BASE",
"WAFER_DEFAULT_BASE",
)
+7
View File
@@ -74,6 +74,12 @@ def _create_kimi(config: ProviderConfig, _settings: Settings) -> BaseProvider:
return KimiProvider(config)
def _create_wafer(config: ProviderConfig, _settings: Settings) -> BaseProvider:
from providers.wafer import WaferProvider
return WaferProvider(config)
PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
"nvidia_nim": _create_nvidia_nim,
"open_router": _create_open_router,
@@ -82,6 +88,7 @@ PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
"llamacpp": _create_llamacpp,
"ollama": _create_ollama,
"kimi": _create_kimi,
"wafer": _create_wafer,
}
if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
+10
View File
@@ -0,0 +1,10 @@
"""Wafer provider exports."""
from providers.defaults import WAFER_DEFAULT_BASE
from .client import WaferProvider
__all__ = [
"WAFER_DEFAULT_BASE",
"WaferProvider",
]
+40
View File
@@ -0,0 +1,40 @@
"""Wafer provider implementation (native Anthropic-compatible Messages)."""
from typing import Any
from providers.anthropic_messages import AnthropicMessagesTransport
from providers.base import ProviderConfig
from providers.defaults import WAFER_DEFAULT_BASE
_ANTHROPIC_VERSION = "2023-06-01"
class WaferProvider(AnthropicMessagesTransport):
"""Wafer using ``https://pass.wafer.ai/v1/messages``."""
def __init__(self, config: ProviderConfig):
super().__init__(
config,
provider_name="WAFER",
default_base_url=WAFER_DEFAULT_BASE,
)
def _build_request_body(
self, request: Any, thinking_enabled: bool | None = None
) -> dict:
"""Build native body; Wafer rejects omitted thinking as ``reasoning_effort=none``."""
body = super()._build_request_body(request, thinking_enabled=thinking_enabled)
if "thinking" not in body:
body["thinking"] = {"type": "enabled"}
return body
def _request_headers(self) -> dict[str, str]:
return {
"Accept": "text/event-stream",
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json",
"anthropic-version": _ANTHROPIC_VERSION,
}
def _model_list_headers(self) -> dict[str, str]:
return {"Authorization": f"Bearer {self._api_key}"}
+3
View File
@@ -44,6 +44,7 @@ PROVIDER_SMOKE_DEFAULT_MODELS: dict[str, str] = {
"lmstudio": "lmstudio/local-model",
"llamacpp": "llamacpp/local-model",
"ollama": "ollama/llama3.1",
"wafer": "wafer/DeepSeek-V4-Pro",
}
@@ -185,6 +186,8 @@ class SmokeConfig:
return bool(self.settings.llamacpp_base_url.strip())
if provider == "ollama":
return bool(self.settings.ollama_base_url.strip())
if provider == "wafer":
return bool(self.settings.wafer_api_key.strip())
return False
@@ -107,6 +107,24 @@ def test_build_base_native_body_includes_cache_control() -> None:
assert body["tools"][0]["cache_control"] == {"type": "ephemeral"}
def test_build_base_native_body_drops_unknown_top_level_client_hints() -> None:
raw = {
"model": "m",
"max_tokens": 20,
"messages": [{"role": "user", "content": "x"}],
"reasoning_effort": "none",
"unknown_client_hint": {"mode": "local"},
}
req = MessagesRequest.model_validate(raw)
body = build_base_native_anthropic_request_body(
req,
default_max_tokens=ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
thinking_enabled=False,
)
assert "reasoning_effort" not in body
assert "unknown_client_hint" not in body
def test_pydantic_discriminator_still_distinguishes_blocks() -> None:
m = Message.model_validate(
{
+36
View File
@@ -22,6 +22,7 @@ from providers.nvidia_nim import NvidiaNimProvider
from providers.ollama import OllamaProvider
from providers.open_router import OpenRouterProvider
from providers.registry import ProviderRegistry
from providers.wafer import WaferProvider
def _make_mock_settings(**overrides):
@@ -35,8 +36,13 @@ def _make_mock_settings(**overrides):
mock.provider_max_concurrency = 5
mock.open_router_api_key = "test_openrouter_key"
mock.deepseek_api_key = "test_deepseek_key"
mock.wafer_api_key = "test_wafer_key"
mock.lm_studio_base_url = "http://localhost:1234/v1"
mock.ollama_base_url = "http://localhost:11434"
mock.lmstudio_proxy = ""
mock.llamacpp_proxy = ""
mock.kimi_proxy = ""
mock.wafer_proxy = ""
mock.nim = NimSettings()
mock.http_read_timeout = 300.0
mock.http_write_timeout = 10.0
@@ -188,6 +194,19 @@ async def test_get_provider_deepseek_passes_enable_model_thinking():
assert provider._config.enable_thinking is False
@pytest.mark.asyncio
async def test_get_provider_wafer():
"""Test that provider_type=wafer returns WaferProvider."""
with patch("api.dependencies.get_settings") as mock_settings:
mock_settings.return_value = _make_mock_settings(provider_type="wafer")
provider = get_provider()
assert isinstance(provider, WaferProvider)
assert provider._base_url == "https://pass.wafer.ai/v1"
assert provider._api_key == "test_wafer_key"
@pytest.mark.asyncio
async def test_get_provider_lmstudio_uses_lm_studio_base_url():
"""LM Studio provider uses lm_studio_base_url from settings."""
@@ -324,6 +343,23 @@ async def test_get_provider_deepseek_missing_api_key():
assert "platform.deepseek.com" in exc_info.value.detail
@pytest.mark.asyncio
async def test_get_provider_wafer_missing_api_key():
"""Wafer with empty API key raises HTTPException 503."""
with patch("api.dependencies.get_settings") as mock_settings:
mock_settings.return_value = _make_mock_settings(
provider_type="wafer",
wafer_api_key="",
)
with pytest.raises(HTTPException) as exc_info:
get_provider()
assert exc_info.value.status_code == 503
assert "WAFER_API_KEY" in exc_info.value.detail
assert "wafer.ai" in exc_info.value.detail
@pytest.mark.asyncio
async def test_get_provider_unknown_type():
"""Unknown ``provider_type`` raises :exc:`~providers.exceptions.UnknownProviderTypeError`."""
+25
View File
@@ -123,6 +123,31 @@ def test_models_list_uses_cached_metadata_for_configured_openrouter_refs():
assert ids[0] == "claude-3-freecc-no-thinking/open_router/plain-model"
def test_models_list_includes_cached_wafer_models():
app = create_app(lifespan_enabled=False)
settings = _settings(
model="wafer/DeepSeek-V4-Pro",
model_opus=None,
model_haiku=None,
)
registry = ProviderRegistry()
registry.cache_model_ids("wafer", {"DeepSeek-V4-Pro", "MiniMax-M2.7"})
app.state.provider_registry = registry
app.dependency_overrides[get_settings] = lambda: settings
try:
response = TestClient(app).get("/v1/models")
finally:
app.dependency_overrides.clear()
assert response.status_code == 200
ids = [item["id"] for item in response.json()["data"]]
assert "anthropic/wafer/DeepSeek-V4-Pro" in ids
assert "claude-3-freecc-no-thinking/wafer/DeepSeek-V4-Pro" in ids
assert "anthropic/wafer/MiniMax-M2.7" in ids
assert "claude-3-freecc-no-thinking/wafer/MiniMax-M2.7" in ids
def test_models_list_works_without_provider_registry():
app = create_app(lifespan_enabled=False)
settings = _settings()
+15
View File
@@ -109,6 +109,21 @@ def test_model_router_routes_prefixed_provider_model_directly(settings):
assert routed.resolved.provider_model_ref == "deepseek/deepseek-chat"
def test_model_router_routes_wafer_provider_model_directly(settings):
routed = ModelRouter(settings).resolve_messages_request(
MessagesRequest(
model="wafer/DeepSeek-V4-Pro",
max_tokens=100,
messages=[Message(role="user", content="hello")],
)
)
assert routed.request.model == "DeepSeek-V4-Pro"
assert routed.resolved.provider_id == "wafer"
assert routed.resolved.provider_model == "DeepSeek-V4-Pro"
assert routed.resolved.provider_model_ref == "wafer/DeepSeek-V4-Pro"
def test_model_router_routes_gateway_encoded_provider_model_directly(settings):
routed = ModelRouter(settings).resolve_messages_request(
MessagesRequest(
+11
View File
@@ -158,6 +158,14 @@ class TestSettings:
settings = Settings()
assert settings.enable_model_thinking is False
def test_wafer_api_key_from_env(self, monkeypatch):
"""WAFER_API_KEY env var is loaded into settings."""
from config.settings import Settings
monkeypatch.setenv("WAFER_API_KEY", "wafer-key")
settings = Settings()
assert settings.wafer_api_key == "wafer-key"
def test_per_model_thinking_from_env(self, monkeypatch):
"""Per-model thinking env vars are loaded into settings."""
from config.settings import Settings
@@ -507,6 +515,7 @@ class TestPerModelMapping:
"open_router/anthropic/claude-3-haiku",
),
({"MODEL": "deepseek/deepseek-chat"}, "deepseek/deepseek-chat", None),
({"MODEL": "wafer/DeepSeek-V4-Pro"}, "wafer/DeepSeek-V4-Pro", None),
({"MODEL": "lmstudio/qwen2.5-7b"}, "lmstudio/qwen2.5-7b", None),
({"MODEL": "llamacpp/local-model"}, "llamacpp/local-model", None),
({"MODEL": "ollama/llama3.1"}, "ollama/llama3.1", None),
@@ -647,6 +656,7 @@ class TestPerModelMapping:
assert Settings.parse_provider_type("lmstudio/qwen") == "lmstudio"
assert Settings.parse_provider_type("llamacpp/model") == "llamacpp"
assert Settings.parse_provider_type("ollama/llama3.1") == "ollama"
assert Settings.parse_provider_type("wafer/DeepSeek-V4-Pro") == "wafer"
def test_parse_model_name(self):
"""parse_model_name extracts model name from model string."""
@@ -657,6 +667,7 @@ class TestPerModelMapping:
assert Settings.parse_model_name("lmstudio/qwen") == "qwen"
assert Settings.parse_model_name("llamacpp/model") == "model"
assert Settings.parse_model_name("ollama/llama3.1") == "llama3.1"
assert Settings.parse_model_name("wafer/DeepSeek-V4-Pro") == "DeepSeek-V4-Pro"
def test_configured_chat_model_refs_collects_unique_models_with_sources(
self, monkeypatch
+2
View File
@@ -11,6 +11,7 @@ from providers.lmstudio import LMStudioProvider
from providers.nvidia_nim import NvidiaNimProvider
from providers.ollama import OllamaProvider
from providers.open_router import OpenRouterProvider
from providers.wafer import WaferProvider
from smoke.features import FEATURE_INVENTORY, README_FEATURES, feature_ids
VALID_SOURCE = {"readme", "public_surface"}
@@ -73,6 +74,7 @@ def test_provider_and_platform_registries_include_advertised_builtins() -> None:
"lmstudio": LMStudioProvider,
"llamacpp": LlamaCppProvider,
"ollama": OllamaProvider,
"wafer": WaferProvider,
}
for provider_class in provider_classes.values():
assert issubclass(provider_class, BaseProvider)
+17
View File
@@ -20,6 +20,7 @@ def _settings(**overrides):
"nvidia_nim_api_key": "",
"open_router_api_key": "",
"deepseek_api_key": "",
"wafer_api_key": "",
"lm_studio_base_url": "",
"llamacpp_base_url": "",
"ollama_base_url": "http://localhost:11434",
@@ -83,6 +84,22 @@ def test_provider_smoke_models_cover_configured_providers_independent_of_model_m
assert models[0].source == "provider_default"
def test_wafer_provider_configuration_uses_api_key(monkeypatch) -> None:
monkeypatch.delenv("FCC_SMOKE_MODEL_WAFER", raising=False)
config = _smoke_config(
settings=_settings(
model="ollama/llama3.1",
ollama_base_url="",
wafer_api_key="wafer-key",
)
)
assert config.has_provider_configuration("wafer")
models = config.provider_smoke_models()
assert models[0].provider == "wafer"
assert models[0].full_model == PROVIDER_SMOKE_DEFAULT_MODELS["wafer"]
def test_provider_smoke_model_override_accepts_model_name_without_prefix(
monkeypatch,
) -> None:
+19
View File
@@ -20,6 +20,7 @@ from providers.nvidia_nim import NvidiaNimProvider
from providers.ollama import OllamaProvider
from providers.open_router import OpenRouterProvider
from providers.registry import ProviderRegistry
from providers.wafer import WaferProvider
def _settings(
@@ -31,6 +32,7 @@ def _settings(
nvidia_nim_api_key: str = "",
open_router_api_key: str = "",
deepseek_api_key: str = "",
wafer_api_key: str = "",
) -> Settings:
return Settings.model_construct(
model=model,
@@ -40,6 +42,7 @@ def _settings(
nvidia_nim_api_key=nvidia_nim_api_key,
open_router_api_key=open_router_api_key,
deepseek_api_key=deepseek_api_key,
wafer_api_key=wafer_api_key,
log_api_error_tracebacks=False,
)
@@ -100,6 +103,22 @@ async def test_deepseek_lists_models_from_root_endpoint() -> None:
)
@pytest.mark.asyncio
async def test_wafer_lists_models_from_default_models_endpoint() -> None:
provider = WaferProvider(ProviderConfig(api_key="wafer-key"))
with patch.object(
provider._client,
"get",
new_callable=AsyncMock,
return_value=_response(200, {"data": [{"id": "DeepSeek-V4-Pro"}]}),
) as mock_get:
assert await provider.list_model_ids() == frozenset({"DeepSeek-V4-Pro"})
mock_get.assert_awaited_once_with(
"/models", headers={"Authorization": "Bearer wafer-key"}
)
@pytest.mark.asyncio
async def test_openrouter_lists_only_tool_capable_models() -> None:
provider = OpenRouterProvider(ProviderConfig(api_key="open-router-key"))
+5
View File
@@ -18,6 +18,7 @@ from providers.registry import (
ProviderRegistry,
create_provider,
)
from providers.wafer import WaferProvider
def _make_settings(**overrides):
@@ -27,6 +28,7 @@ def _make_settings(**overrides):
mock.nvidia_nim_api_key = "test_key"
mock.open_router_api_key = "test_openrouter_key"
mock.deepseek_api_key = "test_deepseek_key"
mock.wafer_api_key = "test_wafer_key"
mock.lm_studio_base_url = "http://localhost:1234/v1"
mock.llamacpp_base_url = "http://localhost:8080/v1"
mock.ollama_base_url = "http://localhost:11434"
@@ -34,6 +36,8 @@ def _make_settings(**overrides):
mock.open_router_proxy = ""
mock.lmstudio_proxy = ""
mock.llamacpp_proxy = ""
mock.kimi_proxy = ""
mock.wafer_proxy = ""
mock.provider_rate_limit = 40
mock.provider_rate_window = 60
mock.provider_max_concurrency = 5
@@ -94,6 +98,7 @@ def test_create_provider_instantiates_each_builtin():
"lmstudio": LMStudioProvider,
"llamacpp": LlamaCppProvider,
"ollama": OllamaProvider,
"wafer": WaferProvider,
}
with (
+256
View File
@@ -0,0 +1,256 @@
"""Tests for Wafer native Anthropic Messages provider."""
from contextlib import asynccontextmanager
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
from api.models.anthropic import Message, MessagesRequest, Tool
from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
from providers.base import ProviderConfig
from providers.wafer import WAFER_DEFAULT_BASE, WaferProvider
from tests.stream_contract import assert_canonical_stream_error_envelope
class FakeResponse:
def __init__(self, *, status_code=200, lines=None, text=""):
self.status_code = status_code
self._lines = lines or []
self._text = text
self.is_closed = False
self.headers = httpx.Headers()
self.request = httpx.Request("POST", "https://pass.wafer.ai/v1/messages")
async def aiter_lines(self):
for line in self._lines:
yield line
async def aclose(self):
self.is_closed = True
async def aiter_bytes(self, chunk_size: int = 65_536):
data = self._text.encode("utf-8")
for offset in range(0, len(data), chunk_size):
yield data[offset : offset + chunk_size]
def raise_for_status(self):
response = httpx.Response(
self.status_code,
request=self.request,
text=self._text,
)
response.raise_for_status()
@pytest.fixture
def wafer_config():
return ProviderConfig(
api_key="test-wafer-key",
base_url=WAFER_DEFAULT_BASE,
rate_limit=10,
rate_window=60,
)
@pytest.fixture(autouse=True)
def mock_rate_limiter():
@asynccontextmanager
async def _slot():
yield
with patch("providers.anthropic_messages.GlobalRateLimiter") as mock:
instance = mock.get_scoped_instance.return_value
async def _passthrough(fn, *args, **kwargs):
return await fn(*args, **kwargs)
instance.execute_with_retry = AsyncMock(side_effect=_passthrough)
instance.concurrency_slot.side_effect = _slot
yield instance
@pytest.fixture
def wafer_provider(wafer_config):
return WaferProvider(wafer_config)
def test_default_base_url():
assert WAFER_DEFAULT_BASE == "https://pass.wafer.ai/v1"
def test_init_uses_default_base_url_and_strips_trailing_slash():
config = ProviderConfig(api_key="test-wafer-key", base_url=f"{WAFER_DEFAULT_BASE}/")
with patch("httpx.AsyncClient"):
provider = WaferProvider(config)
assert provider._api_key == "test-wafer-key"
assert provider._base_url == WAFER_DEFAULT_BASE
assert provider._provider_name == "WAFER"
def test_request_headers_use_bearer_auth_not_x_api_key(wafer_provider):
headers = wafer_provider._request_headers()
assert headers["Authorization"] == "Bearer test-wafer-key"
assert headers["Accept"] == "text/event-stream"
assert headers["Content-Type"] == "application/json"
assert headers["anthropic-version"] == "2023-06-01"
assert "x-api-key" not in headers
assert wafer_provider._model_list_headers() == {
"Authorization": "Bearer test-wafer-key"
}
def test_build_request_body_native_shape_and_defaults(wafer_provider):
request = MessagesRequest.model_validate(
{
"model": "DeepSeek-V4-Pro",
"messages": [Message(role="user", content="Hello")],
"tools": [
Tool(
name="echo",
description="Echo input",
input_schema={"type": "object", "properties": {}},
)
],
"thinking": {"type": "enabled", "budget_tokens": 2048},
}
)
body = wafer_provider._build_request_body(request)
assert body["model"] == "DeepSeek-V4-Pro"
assert body["messages"][0]["role"] == "user"
assert body["tools"][0]["name"] == "echo"
assert body["thinking"] == {"type": "enabled", "budget_tokens": 2048}
assert body["max_tokens"] == ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
assert body["stream"] is True
def test_build_request_body_drops_reasoning_effort_none(wafer_provider):
request = MessagesRequest.model_validate(
{
"model": "DeepSeek-V4-Pro",
"messages": [{"role": "user", "content": "Explore the codebase."}],
"reasoning_effort": "none",
}
)
body = wafer_provider._build_request_body(request)
assert "reasoning_effort" not in body
assert body["thinking"] == {"type": "enabled"}
def test_build_request_body_keeps_upstream_thinking_enabled_when_client_disables_it(
wafer_provider,
):
request = MessagesRequest.model_validate(
{
"model": "DeepSeek-V4-Pro",
"messages": [{"role": "user", "content": "Explore the codebase."}],
"thinking": {"type": "disabled"},
}
)
body = wafer_provider._build_request_body(request, thinking_enabled=False)
assert body["thinking"] == {"type": "enabled"}
@pytest.mark.asyncio
async def test_lists_models_from_openai_compatible_models_endpoint(wafer_provider):
with patch.object(
wafer_provider._client,
"get",
new_callable=AsyncMock,
return_value=httpx.Response(
200,
json={
"object": "list",
"data": [
{"id": "DeepSeek-V4-Pro", "object": "model"},
{"id": "MiniMax-M2.7", "object": "model"},
],
},
request=httpx.Request("GET", "https://pass.wafer.ai/v1/models"),
),
) as mock_get:
assert await wafer_provider.list_model_ids() == frozenset(
{"DeepSeek-V4-Pro", "MiniMax-M2.7"}
)
mock_get.assert_awaited_once_with(
"/models", headers={"Authorization": "Bearer test-wafer-key"}
)
@pytest.mark.asyncio
async def test_stream_uses_post_messages_path(wafer_provider):
request = MessagesRequest(
model="MiniMax-M2.7",
messages=[Message(role="user", content="hi")],
)
response = FakeResponse(
lines=[
"event: message_start",
'data: {"type":"message_start"}',
"",
]
)
with (
patch.object(
wafer_provider._client, "build_request", return_value=MagicMock()
) as mock_build,
patch.object(
wafer_provider._client,
"send",
new_callable=AsyncMock,
return_value=response,
),
):
events = [event async for event in wafer_provider.stream_response(request)]
assert events == [
"event: message_start\n",
'data: {"type":"message_start"}\n',
"\n",
]
assert response.is_closed
assert mock_build.call_args.args[:2] == ("POST", "/messages")
assert mock_build.call_args.kwargs["headers"]["Authorization"] == (
"Bearer test-wafer-key"
)
@pytest.mark.asyncio
async def test_stream_non_200_maps_to_anthropic_error_event(wafer_provider):
request = MessagesRequest(
model="GLM-5.1",
messages=[Message(role="user", content="hi")],
)
response = FakeResponse(status_code=500, text="Internal Server Error")
with (
patch.object(wafer_provider._client, "build_request", return_value=MagicMock()),
patch.object(
wafer_provider._client,
"send",
new_callable=AsyncMock,
return_value=response,
),
):
events = [
event
async for event in wafer_provider.stream_response(
request, request_id="REQ_WAFER"
)
]
assert response.is_closed
assert_canonical_stream_error_envelope(
events, user_message_substr="Provider API request failed"
)
assert "REQ_WAFER" in "".join(events)