mirror of
https://github.com/Alishahryar1/free-claude-code.git
synced 2026-06-02 06:13:46 +02:00
feat(providers): native Anthropic Messages for Kimi, Fireworks, Z.ai
Route these providers through POST /messages with vendor headers and bases (including Kimi model list on OpenAI /v1/models). Remove Z.ai from OpenAI-chat server-tool rejection; extend tests and README.
This commit is contained in:
+3
-3
@@ -14,7 +14,7 @@ MISTRAL_API_KEY=""
|
|||||||
DEEPSEEK_API_KEY=""
|
DEEPSEEK_API_KEY=""
|
||||||
|
|
||||||
|
|
||||||
# Kimi Config (Moonshot OpenAI-compatible API)
|
# Kimi Config (Anthropic-compatible Messages at api.moonshot.ai/anthropic/v1)
|
||||||
KIMI_API_KEY=""
|
KIMI_API_KEY=""
|
||||||
|
|
||||||
|
|
||||||
@@ -26,11 +26,11 @@ WAFER_API_KEY=""
|
|||||||
OPENCODE_API_KEY=""
|
OPENCODE_API_KEY=""
|
||||||
|
|
||||||
|
|
||||||
# Z.ai Config (Anthropic-compatible Messages at api.z.ai/api/anthropic)
|
# Z.ai Config (Anthropic-compatible Messages at api.z.ai/api/anthropic/v1)
|
||||||
ZAI_API_KEY=""
|
ZAI_API_KEY=""
|
||||||
|
|
||||||
|
|
||||||
# Fireworks AI Config (OpenAI-compatible Chat Completions at api.fireworks.ai/inference/v1)
|
# Fireworks AI Config (Anthropic-compatible Messages at api.fireworks.ai/inference/v1)
|
||||||
FIREWORKS_API_KEY=""
|
FIREWORKS_API_KEY=""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -158,6 +158,8 @@ Get a key at [platform.moonshot.ai/console/api-keys](https://platform.moonshot.a
|
|||||||
|
|
||||||
In the Admin UI, paste it into `KIMI_API_KEY`, then set `MODEL` to a Kimi slug such as `kimi/kimi-k2.5`.
|
In the Admin UI, paste it into `KIMI_API_KEY`, then set `MODEL` to a Kimi slug such as `kimi/kimi-k2.5`.
|
||||||
|
|
||||||
|
This provider calls Kimi's **Anthropic-compatible** Messages API (`https://api.moonshot.ai/anthropic/v1/messages`; model discovery uses OpenAI-compat `GET https://api.moonshot.ai/v1/models`). It is **not** the OpenAI Chat Completions path.
|
||||||
|
|
||||||
Browse models at [platform.moonshot.ai](https://platform.moonshot.ai).
|
Browse models at [platform.moonshot.ai](https://platform.moonshot.ai).
|
||||||
|
|
||||||
### 6. [Wafer](https://wafer.ai/)
|
### 6. [Wafer](https://wafer.ai/)
|
||||||
@@ -239,7 +241,7 @@ Get an API key at [Z.ai/manage-apikey/apikey-list](https://z.ai/manage-apikey/ap
|
|||||||
|
|
||||||
In the Admin UI, paste it into `ZAI_API_KEY`, then set `MODEL` to a Z.ai model slug such as `zai/glm-5.1`.
|
In the Admin UI, paste it into `ZAI_API_KEY`, then set `MODEL` to a Z.ai model slug such as `zai/glm-5.1`.
|
||||||
|
|
||||||
Z.ai provides GLM models through the OpenAI-compatible Coding Plan endpoint at `https://api.z.ai/api/coding/paas/v4`.
|
This provider calls Z.ai's **Anthropic-compatible** Messages API (`https://api.z.ai/api/anthropic/v1/messages`). The former OpenAI Coding Plan base (`https://api.z.ai/api/coding/paas/v4`) is **not** used by this gateway.
|
||||||
|
|
||||||
Popular examples:
|
Popular examples:
|
||||||
|
|
||||||
@@ -254,7 +256,7 @@ Get an API key at [fireworks.ai/account/api-keys](https://fireworks.ai/account/a
|
|||||||
|
|
||||||
In the Admin UI, paste it into `FIREWORKS_API_KEY`, then set `MODEL` to a Fireworks model slug such as `fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct`.
|
In the Admin UI, paste it into `FIREWORKS_API_KEY`, then set `MODEL` to a Fireworks model slug such as `fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct`.
|
||||||
|
|
||||||
Fireworks exposes an OpenAI-compatible Chat Completions API at `https://api.fireworks.ai/inference/v1`.
|
Fireworks exposes an **Anthropic-compatible** Messages API at `https://api.fireworks.ai/inference/v1/messages` (same inference host as before; Chat Completions is not used here). Vendor-specific JSON keys can still be merged from request `extra_body` when allowed.
|
||||||
|
|
||||||
Browse models at [fireworks.ai/models](https://fireworks.ai/models).
|
Browse models at [fireworks.ai/models](https://fireworks.ai/models).
|
||||||
|
|
||||||
@@ -443,8 +445,8 @@ Important pieces:
|
|||||||
|
|
||||||
- FastAPI exposes Anthropic-compatible routes such as `/v1/messages`, `/v1/messages/count_tokens`, and `/v1/models`.
|
- FastAPI exposes Anthropic-compatible routes such as `/v1/messages`, `/v1/messages/count_tokens`, and `/v1/models`.
|
||||||
- Model routing resolves the Claude model name to `MODEL_OPUS`, `MODEL_SONNET`, `MODEL_HAIKU`, or `MODEL`.
|
- Model routing resolves the Claude model name to `MODEL_OPUS`, `MODEL_SONNET`, `MODEL_HAIKU`, or `MODEL`.
|
||||||
- NIM, OpenCode Zen, OpenCode Go, Z.ai use OpenAI chat streaming translated into Anthropic SSE.
|
- NIM, OpenCode Zen, and OpenCode Go use OpenAI chat streaming translated into Anthropic SSE.
|
||||||
- Wafer, OpenRouter, DeepSeek, LM Studio, llama.cpp, and Ollama use Anthropic Messages style transports.
|
- Wafer, OpenRouter, DeepSeek, Kimi, Fireworks AI, Z.ai, LM Studio, llama.cpp, and Ollama use Anthropic Messages style transports where applicable (with provider-specific quirks and model-list URLs).
|
||||||
- The proxy normalizes thinking blocks, tool calls, token usage metadata, and provider errors into the shape Claude Code expects.
|
- The proxy normalizes thinking blocks, tool calls, token usage metadata, and provider errors into the shape Claude Code expects.
|
||||||
- Request optimizations answer trivial Claude Code probes locally to save latency and quota.
|
- Request optimizations answer trivial Claude Code probes locally to save latency and quota.
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -34,7 +34,7 @@ TokenCounter = Callable[[list[Any], str | list[Any] | None, list[Any] | None], i
|
|||||||
ProviderGetter = Callable[[str], BaseProvider]
|
ProviderGetter = Callable[[str], BaseProvider]
|
||||||
|
|
||||||
# Providers that use ``/chat/completions`` + Anthropic-to-OpenAI conversion (not native Messages).
|
# Providers that use ``/chat/completions`` + Anthropic-to-OpenAI conversion (not native Messages).
|
||||||
_OPENAI_CHAT_UPSTREAM_IDS = frozenset({"nvidia_nim", "opencode", "opencode_go", "zai"})
|
_OPENAI_CHAT_UPSTREAM_IDS = frozenset({"nvidia_nim", "opencode", "opencode_go"})
|
||||||
|
|
||||||
|
|
||||||
def anthropic_sse_streaming_response(
|
def anthropic_sse_streaming_response(
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ def openai_chat_upstream_server_tool_error(
|
|||||||
)
|
)
|
||||||
if not forced and has_listed_anthropic_server_tools(request):
|
if not forced and has_listed_anthropic_server_tools(request):
|
||||||
return (
|
return (
|
||||||
"OpenAI Chat upstreams (NVIDIA NIM) cannot use listed Anthropic server tools "
|
"OpenAI Chat upstreams cannot use listed Anthropic server tools "
|
||||||
"(web_search / web_fetch) without the local web server tool handler. Use a native "
|
"(web_search / web_fetch) without the local web server tool handler. Use a native "
|
||||||
"Anthropic transport, set ENABLE_WEB_SERVER_TOOLS=true and force the tool with "
|
"Anthropic transport, set ENABLE_WEB_SERVER_TOOLS=true and force the tool with "
|
||||||
"tool_choice, or remove these tools from the request."
|
"tool_choice, or remove these tools from the request."
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ TransportType = Literal["openai_chat", "anthropic_messages"]
|
|||||||
|
|
||||||
# Default upstream base URLs (also re-exported via :mod:`providers.defaults`)
|
# Default upstream base URLs (also re-exported via :mod:`providers.defaults`)
|
||||||
NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
|
NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
|
||||||
KIMI_DEFAULT_BASE = "https://api.moonshot.ai/v1"
|
# Moonshot Kimi Anthropic-compatible Messages API (POST …/messages).
|
||||||
|
KIMI_DEFAULT_BASE = "https://api.moonshot.ai/anthropic/v1"
|
||||||
WAFER_DEFAULT_BASE = "https://pass.wafer.ai/v1"
|
WAFER_DEFAULT_BASE = "https://pass.wafer.ai/v1"
|
||||||
# DeepSeek Anthropic-compatible Messages API (not OpenAI ``/v1`` chat completions).
|
# DeepSeek Anthropic-compatible Messages API (not OpenAI ``/v1`` chat completions).
|
||||||
DEEPSEEK_ANTHROPIC_DEFAULT_BASE = "https://api.deepseek.com/anthropic"
|
DEEPSEEK_ANTHROPIC_DEFAULT_BASE = "https://api.deepseek.com/anthropic"
|
||||||
@@ -27,7 +28,8 @@ LLAMACPP_DEFAULT_BASE = "http://localhost:8080/v1"
|
|||||||
OLLAMA_DEFAULT_BASE = "http://localhost:11434"
|
OLLAMA_DEFAULT_BASE = "http://localhost:11434"
|
||||||
OPENCODE_DEFAULT_BASE = "https://opencode.ai/zen/v1"
|
OPENCODE_DEFAULT_BASE = "https://opencode.ai/zen/v1"
|
||||||
OPENCODE_GO_DEFAULT_BASE = "https://opencode.ai/zen/go/v1"
|
OPENCODE_GO_DEFAULT_BASE = "https://opencode.ai/zen/go/v1"
|
||||||
ZAI_DEFAULT_BASE = "https://api.z.ai/api/coding/paas/v4"
|
# Z.ai Anthropic-compatible Messages API (not OpenAI Coding Plan chat completions).
|
||||||
|
ZAI_DEFAULT_BASE = "https://api.z.ai/api/anthropic/v1"
|
||||||
# Google AI Studio Gemini API OpenAI-compat layer (not Vertex AI).
|
# Google AI Studio Gemini API OpenAI-compat layer (not Vertex AI).
|
||||||
GEMINI_DEFAULT_BASE = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
GEMINI_DEFAULT_BASE = "https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||||
GROQ_DEFAULT_BASE = "https://api.groq.com/openai/v1"
|
GROQ_DEFAULT_BASE = "https://api.groq.com/openai/v1"
|
||||||
@@ -92,13 +94,19 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
|
|||||||
),
|
),
|
||||||
"kimi": ProviderDescriptor(
|
"kimi": ProviderDescriptor(
|
||||||
provider_id="kimi",
|
provider_id="kimi",
|
||||||
transport_type="openai_chat",
|
transport_type="anthropic_messages",
|
||||||
credential_env="KIMI_API_KEY",
|
credential_env="KIMI_API_KEY",
|
||||||
credential_url="https://platform.moonshot.cn/console/api-keys",
|
credential_url="https://platform.moonshot.cn/console/api-keys",
|
||||||
credential_attr="kimi_api_key",
|
credential_attr="kimi_api_key",
|
||||||
default_base_url=KIMI_DEFAULT_BASE,
|
default_base_url=KIMI_DEFAULT_BASE,
|
||||||
proxy_attr="kimi_proxy",
|
proxy_attr="kimi_proxy",
|
||||||
capabilities=("chat", "streaming", "tools"),
|
capabilities=(
|
||||||
|
"chat",
|
||||||
|
"streaming",
|
||||||
|
"tools",
|
||||||
|
"thinking",
|
||||||
|
"native_anthropic",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
"wafer": ProviderDescriptor(
|
"wafer": ProviderDescriptor(
|
||||||
provider_id="wafer",
|
provider_id="wafer",
|
||||||
@@ -165,22 +173,36 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
|
|||||||
),
|
),
|
||||||
"zai": ProviderDescriptor(
|
"zai": ProviderDescriptor(
|
||||||
provider_id="zai",
|
provider_id="zai",
|
||||||
transport_type="openai_chat",
|
transport_type="anthropic_messages",
|
||||||
credential_env="ZAI_API_KEY",
|
credential_env="ZAI_API_KEY",
|
||||||
credential_attr="zai_api_key",
|
credential_attr="zai_api_key",
|
||||||
default_base_url=ZAI_DEFAULT_BASE,
|
default_base_url=ZAI_DEFAULT_BASE,
|
||||||
proxy_attr="zai_proxy",
|
proxy_attr="zai_proxy",
|
||||||
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
|
capabilities=(
|
||||||
|
"chat",
|
||||||
|
"streaming",
|
||||||
|
"tools",
|
||||||
|
"thinking",
|
||||||
|
"native_anthropic",
|
||||||
|
"rate_limit",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
"fireworks": ProviderDescriptor(
|
"fireworks": ProviderDescriptor(
|
||||||
provider_id="fireworks",
|
provider_id="fireworks",
|
||||||
transport_type="openai_chat",
|
transport_type="anthropic_messages",
|
||||||
credential_env="FIREWORKS_API_KEY",
|
credential_env="FIREWORKS_API_KEY",
|
||||||
credential_url="https://fireworks.ai/account/api-keys",
|
credential_url="https://fireworks.ai/account/api-keys",
|
||||||
credential_attr="fireworks_api_key",
|
credential_attr="fireworks_api_key",
|
||||||
default_base_url=FIREWORKS_DEFAULT_BASE,
|
default_base_url=FIREWORKS_DEFAULT_BASE,
|
||||||
proxy_attr="fireworks_proxy",
|
proxy_attr="fireworks_proxy",
|
||||||
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
|
capabilities=(
|
||||||
|
"chat",
|
||||||
|
"streaming",
|
||||||
|
"tools",
|
||||||
|
"thinking",
|
||||||
|
"native_anthropic",
|
||||||
|
"rate_limit",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
"gemini": ProviderDescriptor(
|
"gemini": ProviderDescriptor(
|
||||||
provider_id="gemini",
|
provider_id="gemini",
|
||||||
|
|||||||
@@ -1,33 +1,45 @@
|
|||||||
"""Fireworks AI provider implementation."""
|
"""Fireworks AI provider using native Anthropic-compatible Messages."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from providers.anthropic_messages import AnthropicMessagesTransport
|
||||||
from providers.base import ProviderConfig
|
from providers.base import ProviderConfig
|
||||||
from providers.openai_compat import OpenAIChatTransport
|
|
||||||
|
|
||||||
from .request import build_request_body
|
from .request import build_request_body
|
||||||
|
|
||||||
FIREWORKS_BASE_URL = "https://api.fireworks.ai/inference/v1"
|
FIREWORKS_BASE_URL = "https://api.fireworks.ai/inference/v1"
|
||||||
|
_ANTHROPIC_VERSION = "2023-06-01"
|
||||||
|
|
||||||
|
|
||||||
class FireworksProvider(OpenAIChatTransport):
|
class FireworksProvider(AnthropicMessagesTransport):
|
||||||
"""Fireworks AI provider using OpenAI-compatible chat completions."""
|
"""Fireworks AI using Anthropic-compatible Messages."""
|
||||||
|
|
||||||
def __init__(self, config: ProviderConfig):
|
def __init__(self, config: ProviderConfig):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
config,
|
config,
|
||||||
provider_name="FIREWORKS",
|
provider_name="FIREWORKS",
|
||||||
base_url=config.base_url or FIREWORKS_BASE_URL,
|
default_base_url=FIREWORKS_BASE_URL,
|
||||||
api_key=config.api_key,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _build_request_body(
|
def _build_request_body(
|
||||||
self, request: Any, thinking_enabled: bool | None = None
|
self, request: Any, thinking_enabled: bool | None = None
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Build request body for Fireworks AI."""
|
|
||||||
if thinking_enabled is None:
|
if thinking_enabled is None:
|
||||||
thinking_enabled = self._is_thinking_enabled(request)
|
thinking_enabled = self._is_thinking_enabled(request)
|
||||||
return build_request_body(
|
return build_request_body(
|
||||||
request,
|
request,
|
||||||
thinking_enabled=thinking_enabled,
|
thinking_enabled=thinking_enabled,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _request_headers(self) -> dict[str, str]:
|
||||||
|
return {
|
||||||
|
"Accept": "text/event-stream",
|
||||||
|
"Authorization": f"Bearer {self._api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"anthropic-version": _ANTHROPIC_VERSION,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _model_list_headers(self) -> dict[str, str]:
|
||||||
|
return {"Authorization": f"Bearer {self._api_key}"}
|
||||||
|
|||||||
@@ -1,39 +1,46 @@
|
|||||||
"""Request builder for Fireworks AI provider."""
|
"""Native Anthropic Messages request builder for Fireworks AI."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from core.anthropic import ReasoningReplayMode, build_base_request_body
|
from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||||
from core.anthropic.conversion import OpenAIConversionError
|
from core.anthropic.native_messages_request import (
|
||||||
|
OpenRouterExtraBodyError,
|
||||||
|
build_base_native_anthropic_request_body,
|
||||||
|
validate_openrouter_extra_body,
|
||||||
|
)
|
||||||
from providers.exceptions import InvalidRequestError
|
from providers.exceptions import InvalidRequestError
|
||||||
|
|
||||||
|
|
||||||
def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
|
def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
|
||||||
"""Build OpenAI-format request body from Anthropic request for Fireworks AI."""
|
"""Build JSON for Fireworks Anthropic-compat ``POST …/messages``."""
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"FIREWORKS_REQUEST: conversion start model={} msgs={}",
|
"FIREWORKS_REQUEST: native build model={} msgs={}",
|
||||||
getattr(request_data, "model", "?"),
|
getattr(request_data, "model", "?"),
|
||||||
len(getattr(request_data, "messages", [])),
|
len(getattr(request_data, "messages", [])),
|
||||||
)
|
)
|
||||||
try:
|
|
||||||
body = build_base_request_body(
|
|
||||||
request_data,
|
|
||||||
reasoning_replay=ReasoningReplayMode.REASONING_CONTENT,
|
|
||||||
)
|
|
||||||
except OpenAIConversionError as exc:
|
|
||||||
raise InvalidRequestError(str(exc)) from exc
|
|
||||||
|
|
||||||
extra_body: dict[str, Any] = {}
|
body = build_base_native_anthropic_request_body(
|
||||||
request_extra = getattr(request_data, "extra_body", None)
|
request_data,
|
||||||
if request_extra:
|
default_max_tokens=ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
|
||||||
extra_body.update(request_extra)
|
thinking_enabled=thinking_enabled,
|
||||||
|
)
|
||||||
|
|
||||||
if extra_body:
|
extra = getattr(request_data, "extra_body", None)
|
||||||
body["extra_body"] = extra_body
|
if isinstance(extra, dict) and extra:
|
||||||
|
try:
|
||||||
|
validate_openrouter_extra_body(extra)
|
||||||
|
except OpenRouterExtraBodyError as exc:
|
||||||
|
raise InvalidRequestError(str(exc)) from exc
|
||||||
|
body.update(extra)
|
||||||
|
|
||||||
|
body["stream"] = True
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"FIREWORKS_REQUEST: conversion done model={} msgs={} tools={}",
|
"FIREWORKS_REQUEST: build done model={} msgs={} tools={}",
|
||||||
body.get("model"),
|
body.get("model"),
|
||||||
len(body.get("messages", [])),
|
len(body.get("messages", [])),
|
||||||
len(body.get("tools", [])),
|
len(body.get("tools", [])),
|
||||||
|
|||||||
@@ -1,25 +1,29 @@
|
|||||||
"""Kimi (Moonshot) provider implementation."""
|
"""Kimi (Moonshot) provider using native Anthropic-compatible Messages."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from providers.anthropic_messages import AnthropicMessagesTransport
|
||||||
from providers.base import ProviderConfig
|
from providers.base import ProviderConfig
|
||||||
from providers.defaults import KIMI_DEFAULT_BASE
|
from providers.defaults import KIMI_DEFAULT_BASE
|
||||||
from providers.openai_compat import OpenAIChatTransport
|
|
||||||
|
|
||||||
from .request import build_request_body
|
from .request import build_request_body
|
||||||
|
|
||||||
|
_MOONSHOT_OPENAI_MODELS_URL = "https://api.moonshot.ai/v1/models"
|
||||||
|
_ANTHROPIC_VERSION = "2023-06-01"
|
||||||
|
|
||||||
class KimiProvider(OpenAIChatTransport):
|
|
||||||
"""Kimi provider using the OpenAI-compatible chat completions API."""
|
class KimiProvider(AnthropicMessagesTransport):
|
||||||
|
"""Kimi provider using Anthropic-compatible Messages at api.moonshot.ai/anthropic/v1."""
|
||||||
|
|
||||||
def __init__(self, config: ProviderConfig):
|
def __init__(self, config: ProviderConfig):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
config,
|
config,
|
||||||
provider_name="KIMI",
|
provider_name="KIMI",
|
||||||
base_url=config.base_url or KIMI_DEFAULT_BASE,
|
default_base_url=KIMI_DEFAULT_BASE,
|
||||||
api_key=config.api_key,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _build_request_body(
|
def _build_request_body(
|
||||||
@@ -29,3 +33,21 @@ class KimiProvider(OpenAIChatTransport):
|
|||||||
request,
|
request,
|
||||||
thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
|
thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _request_headers(self) -> dict[str, str]:
|
||||||
|
return {
|
||||||
|
"Accept": "text/event-stream",
|
||||||
|
"Authorization": f"Bearer {self._api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"anthropic-version": _ANTHROPIC_VERSION,
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _send_model_list_request(self) -> httpx.Response:
|
||||||
|
"""Models are listed from the OpenAI-compat root, not ``/anthropic/v1``."""
|
||||||
|
return await self._client.get(
|
||||||
|
_MOONSHOT_OPENAI_MODELS_URL,
|
||||||
|
headers=self._model_list_headers(),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _model_list_headers(self) -> dict[str, str]:
|
||||||
|
return {"Authorization": f"Bearer {self._api_key}"}
|
||||||
|
|||||||
+21
-12
@@ -1,31 +1,40 @@
|
|||||||
"""Request builder for Kimi (Moonshot) provider."""
|
"""Native Anthropic Messages request builder for Kimi (Moonshot)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from core.anthropic import ReasoningReplayMode, build_base_request_body
|
from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||||
from core.anthropic.conversion import OpenAIConversionError
|
from core.anthropic.native_messages_request import (
|
||||||
|
build_base_native_anthropic_request_body,
|
||||||
|
)
|
||||||
from providers.exceptions import InvalidRequestError
|
from providers.exceptions import InvalidRequestError
|
||||||
|
|
||||||
|
|
||||||
def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
|
def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
|
||||||
"""Build OpenAI-format request body from Anthropic request."""
|
"""Build JSON for Kimi Anthropic-compat ``POST …/messages``."""
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"KIMI_REQUEST: conversion start model={} msgs={}",
|
"KIMI_REQUEST: native build model={} msgs={}",
|
||||||
getattr(request_data, "model", "?"),
|
getattr(request_data, "model", "?"),
|
||||||
len(getattr(request_data, "messages", [])),
|
len(getattr(request_data, "messages", [])),
|
||||||
)
|
)
|
||||||
try:
|
|
||||||
body = build_base_request_body(
|
body = build_base_native_anthropic_request_body(
|
||||||
request_data,
|
request_data,
|
||||||
reasoning_replay=ReasoningReplayMode.REASONING_CONTENT,
|
default_max_tokens=ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
|
||||||
|
thinking_enabled=thinking_enabled,
|
||||||
|
)
|
||||||
|
extra = getattr(request_data, "extra_body", None)
|
||||||
|
if extra:
|
||||||
|
raise InvalidRequestError(
|
||||||
|
"Kimi native Messages API does not support extra_body on requests."
|
||||||
)
|
)
|
||||||
except OpenAIConversionError as exc:
|
body["stream"] = True
|
||||||
raise InvalidRequestError(str(exc)) from exc
|
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"KIMI_REQUEST: conversion done model={} msgs={} tools={}",
|
"KIMI_REQUEST: build done model={} msgs={} tools={}",
|
||||||
body.get("model"),
|
body.get("model"),
|
||||||
len(body.get("messages", [])),
|
len(body.get("messages", [])),
|
||||||
len(body.get("tools", [])),
|
len(body.get("tools", [])),
|
||||||
|
|||||||
+21
-6
@@ -1,25 +1,26 @@
|
|||||||
"""Z.ai provider implementation (OpenAI-compatible Coding Plan API)."""
|
"""Z.ai provider implementation (Anthropic-compatible Messages API)."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from providers.anthropic_messages import AnthropicMessagesTransport
|
||||||
from providers.base import ProviderConfig
|
from providers.base import ProviderConfig
|
||||||
from providers.defaults import ZAI_DEFAULT_BASE
|
from providers.defaults import ZAI_DEFAULT_BASE
|
||||||
from providers.openai_compat import OpenAIChatTransport
|
|
||||||
|
|
||||||
from .request import build_request_body
|
from .request import build_request_body
|
||||||
|
|
||||||
|
_ANTHROPIC_VERSION = "2023-06-01"
|
||||||
|
|
||||||
class ZaiProvider(OpenAIChatTransport):
|
|
||||||
"""Z.ai using OpenAI-compatible Coding Plan API."""
|
class ZaiProvider(AnthropicMessagesTransport):
|
||||||
|
"""Z.ai using Anthropic-compatible Messages at api.z.ai/api/anthropic/v1."""
|
||||||
|
|
||||||
def __init__(self, config: ProviderConfig):
|
def __init__(self, config: ProviderConfig):
|
||||||
super().__init__(
|
super().__init__(
|
||||||
config,
|
config,
|
||||||
provider_name="ZAI",
|
provider_name="ZAI",
|
||||||
base_url=config.base_url or ZAI_DEFAULT_BASE,
|
default_base_url=ZAI_DEFAULT_BASE,
|
||||||
api_key=config.api_key,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _build_request_body(
|
def _build_request_body(
|
||||||
@@ -29,3 +30,17 @@ class ZaiProvider(OpenAIChatTransport):
|
|||||||
request,
|
request,
|
||||||
thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
|
thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _request_headers(self) -> dict[str, str]:
|
||||||
|
return {
|
||||||
|
"Accept": "text/event-stream",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"x-api-key": self._api_key,
|
||||||
|
"anthropic-version": _ANTHROPIC_VERSION,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _model_list_headers(self) -> dict[str, str]:
|
||||||
|
return {
|
||||||
|
"x-api-key": self._api_key,
|
||||||
|
"anthropic-version": _ANTHROPIC_VERSION,
|
||||||
|
}
|
||||||
|
|||||||
+21
-12
@@ -1,31 +1,40 @@
|
|||||||
"""Request builder for Z.ai OpenAI-compatible Coding Plan API."""
|
"""Native Anthropic Messages request builder for Z.ai."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from core.anthropic import ReasoningReplayMode, build_base_request_body
|
from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||||
from core.anthropic.conversion import OpenAIConversionError
|
from core.anthropic.native_messages_request import (
|
||||||
|
build_base_native_anthropic_request_body,
|
||||||
|
)
|
||||||
from providers.exceptions import InvalidRequestError
|
from providers.exceptions import InvalidRequestError
|
||||||
|
|
||||||
|
|
||||||
def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
|
def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
|
||||||
"""Build OpenAI-format request body from Anthropic request."""
|
"""Build JSON for Z.ai Anthropic-compat ``POST …/messages``."""
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"ZAI_REQUEST: conversion start model={} msgs={}",
|
"ZAI_REQUEST: native build model={} msgs={}",
|
||||||
getattr(request_data, "model", "?"),
|
getattr(request_data, "model", "?"),
|
||||||
len(getattr(request_data, "messages", [])),
|
len(getattr(request_data, "messages", [])),
|
||||||
)
|
)
|
||||||
try:
|
|
||||||
body = build_base_request_body(
|
body = build_base_native_anthropic_request_body(
|
||||||
request_data,
|
request_data,
|
||||||
reasoning_replay=ReasoningReplayMode.REASONING_CONTENT,
|
default_max_tokens=ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
|
||||||
|
thinking_enabled=thinking_enabled,
|
||||||
|
)
|
||||||
|
extra = getattr(request_data, "extra_body", None)
|
||||||
|
if extra:
|
||||||
|
raise InvalidRequestError(
|
||||||
|
"Z.ai native Messages API does not support extra_body on requests."
|
||||||
)
|
)
|
||||||
except OpenAIConversionError as exc:
|
body["stream"] = True
|
||||||
raise InvalidRequestError(str(exc)) from exc
|
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"ZAI_REQUEST: conversion done model={} msgs={} tools={}",
|
"ZAI_REQUEST: build done model={} msgs={} tools={}",
|
||||||
body.get("model"),
|
body.get("model"),
|
||||||
len(body.get("messages", [])),
|
len(body.get("messages", [])),
|
||||||
len(body.get("tools", [])),
|
len(body.get("tools", [])),
|
||||||
|
|||||||
@@ -619,3 +619,28 @@ def test_listed_server_tools_routed_on_open_router() -> None:
|
|||||||
)
|
)
|
||||||
service.create_message(request)
|
service.create_message(request)
|
||||||
mock_provider.preflight_stream.assert_called()
|
mock_provider.preflight_stream.assert_called()
|
||||||
|
|
||||||
|
|
||||||
|
def test_listed_server_tools_routed_on_zai() -> None:
|
||||||
|
"""Z.ai uses native Anthropic Messages; listed server tools are not OpenAI-chat blocked."""
|
||||||
|
settings = Settings()
|
||||||
|
|
||||||
|
async def fake_stream(*_a, **_k):
|
||||||
|
yield 'event: message_start\ndata: {"type":"message_start"}\n\n'
|
||||||
|
yield 'event: message_stop\ndata: {"type":"message_stop"}\n\n'
|
||||||
|
|
||||||
|
mock_provider = MagicMock()
|
||||||
|
mock_provider.stream_response = fake_stream
|
||||||
|
service = ClaudeProxyService(
|
||||||
|
settings,
|
||||||
|
provider_getter=lambda _: mock_provider,
|
||||||
|
model_router=FixedProviderModelRouter(settings, "zai"),
|
||||||
|
)
|
||||||
|
request = MessagesRequest(
|
||||||
|
model="m",
|
||||||
|
max_tokens=20,
|
||||||
|
messages=[Message(role="user", content="q")],
|
||||||
|
tools=[Tool(name="web_search", type="web_search_20250305")],
|
||||||
|
)
|
||||||
|
service.create_message(request)
|
||||||
|
mock_provider.preflight_stream.assert_called()
|
||||||
|
|||||||
@@ -7,8 +7,10 @@ from messaging.platforms.factory import create_messaging_platform
|
|||||||
from providers.base import BaseProvider
|
from providers.base import BaseProvider
|
||||||
from providers.cerebras import CerebrasProvider
|
from providers.cerebras import CerebrasProvider
|
||||||
from providers.deepseek import DeepSeekProvider
|
from providers.deepseek import DeepSeekProvider
|
||||||
|
from providers.fireworks import FireworksProvider
|
||||||
from providers.gemini import GeminiProvider
|
from providers.gemini import GeminiProvider
|
||||||
from providers.groq import GroqProvider
|
from providers.groq import GroqProvider
|
||||||
|
from providers.kimi import KimiProvider
|
||||||
from providers.llamacpp import LlamaCppProvider
|
from providers.llamacpp import LlamaCppProvider
|
||||||
from providers.lmstudio import LMStudioProvider
|
from providers.lmstudio import LMStudioProvider
|
||||||
from providers.mistral import MistralProvider
|
from providers.mistral import MistralProvider
|
||||||
@@ -78,6 +80,8 @@ def test_provider_and_platform_registries_include_advertised_builtins() -> None:
|
|||||||
"open_router": OpenRouterProvider,
|
"open_router": OpenRouterProvider,
|
||||||
"mistral": MistralProvider,
|
"mistral": MistralProvider,
|
||||||
"deepseek": DeepSeekProvider,
|
"deepseek": DeepSeekProvider,
|
||||||
|
"kimi": KimiProvider,
|
||||||
|
"fireworks": FireworksProvider,
|
||||||
"lmstudio": LMStudioProvider,
|
"lmstudio": LMStudioProvider,
|
||||||
"llamacpp": LlamaCppProvider,
|
"llamacpp": LlamaCppProvider,
|
||||||
"ollama": OllamaProvider,
|
"ollama": OllamaProvider,
|
||||||
|
|||||||
+102
-128
@@ -1,43 +1,17 @@
|
|||||||
"""Tests for Fireworks AI provider."""
|
"""Tests for Fireworks AI native Anthropic Messages provider."""
|
||||||
|
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from api.models.anthropic import Message, MessagesRequest
|
||||||
|
from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||||
from providers.base import ProviderConfig
|
from providers.base import ProviderConfig
|
||||||
|
from providers.exceptions import InvalidRequestError
|
||||||
from providers.fireworks import FIREWORKS_BASE_URL, FireworksProvider
|
from providers.fireworks import FIREWORKS_BASE_URL, FireworksProvider
|
||||||
|
|
||||||
|
|
||||||
class MockMessage:
|
|
||||||
def __init__(self, role, content):
|
|
||||||
self.role = role
|
|
||||||
self.content = content
|
|
||||||
|
|
||||||
|
|
||||||
class MockBlock:
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
for key, value in kwargs.items():
|
|
||||||
setattr(self, key, value)
|
|
||||||
|
|
||||||
|
|
||||||
class MockRequest:
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
self.model = "accounts/fireworks/models/glm-5p1"
|
|
||||||
self.messages = [MockMessage("user", "Hello")]
|
|
||||||
self.max_tokens = 100
|
|
||||||
self.temperature = 0.5
|
|
||||||
self.top_p = 0.9
|
|
||||||
self.system = "System prompt"
|
|
||||||
self.stop_sequences = None
|
|
||||||
self.tools = []
|
|
||||||
self.extra_body = {}
|
|
||||||
self.thinking = MagicMock()
|
|
||||||
self.thinking.enabled = True
|
|
||||||
for key, value in kwargs.items():
|
|
||||||
setattr(self, key, value)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def fireworks_config():
|
def fireworks_config():
|
||||||
return ProviderConfig(
|
return ProviderConfig(
|
||||||
@@ -51,13 +25,11 @@ def fireworks_config():
|
|||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def mock_rate_limiter():
|
def mock_rate_limiter():
|
||||||
"""Mock the global rate limiter to prevent waiting."""
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def _slot():
|
async def _slot():
|
||||||
yield
|
yield
|
||||||
|
|
||||||
with patch("providers.openai_compat.GlobalRateLimiter") as mock:
|
with patch("providers.anthropic_messages.GlobalRateLimiter") as mock:
|
||||||
instance = mock.get_scoped_instance.return_value
|
instance = mock.get_scoped_instance.return_value
|
||||||
|
|
||||||
async def _passthrough(fn, *args, **kwargs):
|
async def _passthrough(fn, *args, **kwargs):
|
||||||
@@ -74,136 +46,138 @@ def fireworks_provider(fireworks_config):
|
|||||||
|
|
||||||
|
|
||||||
def test_init(fireworks_config):
|
def test_init(fireworks_config):
|
||||||
"""Test provider initialization."""
|
with patch("httpx.AsyncClient") as mock_client:
|
||||||
with patch("providers.openai_compat.AsyncOpenAI") as mock_openai:
|
|
||||||
provider = FireworksProvider(fireworks_config)
|
provider = FireworksProvider(fireworks_config)
|
||||||
assert provider._api_key == "test_fireworks_key"
|
assert provider._api_key == "test_fireworks_key"
|
||||||
assert provider._base_url == FIREWORKS_BASE_URL
|
assert provider._base_url == FIREWORKS_BASE_URL
|
||||||
mock_openai.assert_called_once()
|
assert mock_client.called
|
||||||
|
|
||||||
|
|
||||||
def test_base_url_constant():
|
def test_base_url_constant():
|
||||||
"""FIREWORKS_BASE_URL points to the Fireworks AI inference endpoint."""
|
|
||||||
assert FIREWORKS_BASE_URL == "https://api.fireworks.ai/inference/v1"
|
assert FIREWORKS_BASE_URL == "https://api.fireworks.ai/inference/v1"
|
||||||
|
|
||||||
|
|
||||||
def test_build_request_body_basic(fireworks_provider):
|
def test_request_headers(fireworks_provider):
|
||||||
"""Basic request body conversion works for Fireworks AI."""
|
h = fireworks_provider._request_headers()
|
||||||
req = MockRequest()
|
assert h["Authorization"] == "Bearer test_fireworks_key"
|
||||||
body = fireworks_provider._build_request_body(req)
|
assert h["anthropic-version"] == "2023-06-01"
|
||||||
|
assert h["Accept"] == "text/event-stream"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_request_body_native_shape(fireworks_provider):
|
||||||
|
request = MessagesRequest(
|
||||||
|
model="accounts/fireworks/models/glm-5p1",
|
||||||
|
max_tokens=100,
|
||||||
|
messages=[Message(role="user", content="Hello")],
|
||||||
|
system="System prompt",
|
||||||
|
)
|
||||||
|
body = fireworks_provider._build_request_body(request)
|
||||||
assert body["model"] == "accounts/fireworks/models/glm-5p1"
|
assert body["model"] == "accounts/fireworks/models/glm-5p1"
|
||||||
assert body["messages"][0]["role"] == "system"
|
assert body["stream"] is True
|
||||||
|
assert body["max_tokens"] == 100
|
||||||
|
assert body["system"] == "System prompt"
|
||||||
|
assert body["messages"][0]["role"] == "user"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_request_body_default_max_tokens(fireworks_provider):
|
||||||
|
request = MessagesRequest(
|
||||||
|
model="m",
|
||||||
|
messages=[Message(role="user", content="x")],
|
||||||
|
)
|
||||||
|
body = fireworks_provider._build_request_body(request)
|
||||||
|
assert body["max_tokens"] == ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||||
|
|
||||||
|
|
||||||
def test_build_request_body_global_disable_blocks_thinking():
|
def test_build_request_body_global_disable_blocks_thinking():
|
||||||
"""Global disable suppresses provider-side thinking."""
|
|
||||||
provider = FireworksProvider(
|
provider = FireworksProvider(
|
||||||
ProviderConfig(
|
ProviderConfig(
|
||||||
api_key="test_fireworks_key",
|
api_key="k",
|
||||||
base_url=FIREWORKS_BASE_URL,
|
base_url=FIREWORKS_BASE_URL,
|
||||||
rate_limit=10,
|
rate_limit=1,
|
||||||
rate_window=60,
|
rate_window=1,
|
||||||
enable_thinking=False,
|
enable_thinking=False,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
req = MockRequest()
|
request = MessagesRequest.model_validate(
|
||||||
body = provider._build_request_body(req)
|
{
|
||||||
|
"model": "m",
|
||||||
# When thinking is disabled, no thinking-related fields should appear
|
"messages": [{"role": "user", "content": "x"}],
|
||||||
assert "extra_body" not in body or "thinking" not in body.get("extra_body", {})
|
"thinking": {"type": "enabled", "budget_tokens": 1},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
body = provider._build_request_body(request)
|
||||||
|
assert "thinking" not in body
|
||||||
|
|
||||||
|
|
||||||
def test_build_request_body_request_disable_blocks_thinking(fireworks_provider):
|
def test_build_request_body_request_disable_blocks_thinking(fireworks_provider):
|
||||||
"""Request-level disable suppresses provider-side thinking when global is enabled."""
|
request = MessagesRequest.model_validate(
|
||||||
req = MockRequest()
|
{
|
||||||
req.thinking.enabled = False
|
"model": "m",
|
||||||
body = fireworks_provider._build_request_body(req)
|
"messages": [{"role": "user", "content": "x"}],
|
||||||
|
"thinking": {"enabled": False},
|
||||||
assert "extra_body" not in body or "thinking" not in body.get("extra_body", {})
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_build_request_body_preserves_caller_extra_body(fireworks_provider):
|
|
||||||
"""Caller-provided extra_body should be preserved."""
|
|
||||||
req = MockRequest(
|
|
||||||
extra_body={"custom_param": "value"},
|
|
||||||
)
|
)
|
||||||
body = fireworks_provider._build_request_body(req)
|
body = fireworks_provider._build_request_body(request)
|
||||||
|
assert "thinking" not in body
|
||||||
|
|
||||||
assert body["extra_body"]["custom_param"] == "value"
|
|
||||||
|
def test_build_request_body_merges_safe_extra_body(fireworks_provider):
|
||||||
|
request = MessagesRequest.model_validate(
|
||||||
|
{
|
||||||
|
"model": "m",
|
||||||
|
"messages": [{"role": "user", "content": "x"}],
|
||||||
|
"extra_body": {"custom_param": "value"},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
body = fireworks_provider._build_request_body(request)
|
||||||
|
assert body["custom_param"] == "value"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_request_body_rejects_reserved_extra_body_keys(fireworks_provider):
|
||||||
|
request = MessagesRequest.model_validate(
|
||||||
|
{
|
||||||
|
"model": "m",
|
||||||
|
"messages": [{"role": "user", "content": "x"}],
|
||||||
|
"extra_body": {"temperature": 0.1},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
with pytest.raises(InvalidRequestError, match="extra_body must not override"):
|
||||||
|
fireworks_provider._build_request_body(request)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_stream_response_text(fireworks_provider):
|
async def test_stream_uses_post_messages_path(fireworks_provider):
|
||||||
"""Text content deltas are emitted as text blocks."""
|
request = MessagesRequest(
|
||||||
req = MockRequest()
|
model="m",
|
||||||
|
messages=[Message(role="user", content="hi")],
|
||||||
|
)
|
||||||
|
called: dict[str, str] = {}
|
||||||
|
|
||||||
mock_chunk = MagicMock()
|
async def fake_send(request, *args, **kwargs):
|
||||||
mock_chunk.choices = [
|
called["path"] = request.url.path
|
||||||
MagicMock(
|
mock_resp = MagicMock()
|
||||||
delta=MagicMock(
|
mock_resp.status_code = 200
|
||||||
content="Hello back!",
|
mock_resp.is_closed = False
|
||||||
reasoning_content=None,
|
mock_resp.raise_for_status = lambda: None
|
||||||
tool_calls=None,
|
|
||||||
),
|
|
||||||
finish_reason="stop",
|
|
||||||
)
|
|
||||||
]
|
|
||||||
mock_chunk.usage = MagicMock(completion_tokens=5, prompt_tokens=10)
|
|
||||||
|
|
||||||
async def mock_stream():
|
async def aiter():
|
||||||
yield mock_chunk
|
if False: # pragma: no cover
|
||||||
|
yield ""
|
||||||
|
|
||||||
with patch.object(
|
mock_resp.aiter_lines = aiter
|
||||||
fireworks_provider._client.chat.completions, "create", new_callable=AsyncMock
|
mock_resp.aclose = AsyncMock()
|
||||||
) as mock_create:
|
return mock_resp
|
||||||
mock_create.return_value = mock_stream()
|
|
||||||
|
|
||||||
events = [event async for event in fireworks_provider.stream_response(req)]
|
fireworks_provider._client.send = fake_send
|
||||||
|
_ = [x async for x in fireworks_provider.stream_response(request, request_id="r1")]
|
||||||
|
|
||||||
assert any(
|
assert called["path"].endswith("/messages")
|
||||||
'"text_delta"' in event and "Hello back!" in event for event in events
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_stream_response_reasoning_content(fireworks_provider):
|
async def test_cleanup_aclose(fireworks_provider):
|
||||||
"""reasoning_content deltas are emitted as thinking blocks."""
|
|
||||||
req = MockRequest()
|
|
||||||
|
|
||||||
mock_chunk = MagicMock()
|
|
||||||
mock_chunk.choices = [
|
|
||||||
MagicMock(
|
|
||||||
delta=MagicMock(
|
|
||||||
content=None,
|
|
||||||
reasoning_content="Thinking...",
|
|
||||||
tool_calls=None,
|
|
||||||
),
|
|
||||||
finish_reason="stop",
|
|
||||||
)
|
|
||||||
]
|
|
||||||
mock_chunk.usage = MagicMock(completion_tokens=2, prompt_tokens=10)
|
|
||||||
|
|
||||||
async def mock_stream():
|
|
||||||
yield mock_chunk
|
|
||||||
|
|
||||||
with patch.object(
|
|
||||||
fireworks_provider._client.chat.completions, "create", new_callable=AsyncMock
|
|
||||||
) as mock_create:
|
|
||||||
mock_create.return_value = mock_stream()
|
|
||||||
|
|
||||||
events = [event async for event in fireworks_provider.stream_response(req)]
|
|
||||||
|
|
||||||
assert any(
|
|
||||||
'"thinking_delta"' in event and "Thinking..." in event for event in events
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_cleanup(fireworks_provider):
|
|
||||||
"""cleanup closes the OpenAI client."""
|
|
||||||
fireworks_provider._client = AsyncMock()
|
fireworks_provider._client = AsyncMock()
|
||||||
|
|
||||||
await fireworks_provider.cleanup()
|
await fireworks_provider.cleanup()
|
||||||
|
|
||||||
fireworks_provider._client.close.assert_called_once()
|
fireworks_provider._client.aclose.assert_awaited_once()
|
||||||
|
|||||||
@@ -0,0 +1,121 @@
|
|||||||
|
"""Tests for Kimi (Moonshot) native Anthropic Messages provider."""
|
||||||
|
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from api.models.anthropic import Message, MessagesRequest
|
||||||
|
from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||||
|
from providers.base import ProviderConfig
|
||||||
|
from providers.defaults import KIMI_DEFAULT_BASE
|
||||||
|
from providers.exceptions import InvalidRequestError
|
||||||
|
from providers.kimi import KimiProvider
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def kimi_config():
|
||||||
|
return ProviderConfig(
|
||||||
|
api_key="test_kimi_key",
|
||||||
|
base_url=KIMI_DEFAULT_BASE,
|
||||||
|
rate_limit=10,
|
||||||
|
rate_window=60,
|
||||||
|
enable_thinking=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def mock_rate_limiter():
|
||||||
|
@asynccontextmanager
|
||||||
|
async def _slot():
|
||||||
|
yield
|
||||||
|
|
||||||
|
with patch("providers.anthropic_messages.GlobalRateLimiter") as mock:
|
||||||
|
instance = mock.get_scoped_instance.return_value
|
||||||
|
|
||||||
|
async def _passthrough(fn, *args, **kwargs):
|
||||||
|
return await fn(*args, **kwargs)
|
||||||
|
|
||||||
|
instance.execute_with_retry = AsyncMock(side_effect=_passthrough)
|
||||||
|
instance.concurrency_slot.side_effect = _slot
|
||||||
|
yield instance
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def kimi_provider(kimi_config):
|
||||||
|
return KimiProvider(kimi_config)
|
||||||
|
|
||||||
|
|
||||||
|
def test_init(kimi_config):
|
||||||
|
with patch("httpx.AsyncClient") as mock_client:
|
||||||
|
provider = KimiProvider(kimi_config)
|
||||||
|
assert provider._api_key == "test_kimi_key"
|
||||||
|
assert provider._base_url == KIMI_DEFAULT_BASE
|
||||||
|
assert mock_client.called
|
||||||
|
|
||||||
|
|
||||||
|
def test_request_headers(kimi_provider):
|
||||||
|
h = kimi_provider._request_headers()
|
||||||
|
assert h["Authorization"] == "Bearer test_kimi_key"
|
||||||
|
assert h["anthropic-version"] == "2023-06-01"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_request_body_native(kimi_provider):
|
||||||
|
request = MessagesRequest(
|
||||||
|
model="kimi-k2.5",
|
||||||
|
max_tokens=50,
|
||||||
|
messages=[Message(role="user", content="hi")],
|
||||||
|
)
|
||||||
|
body = kimi_provider._build_request_body(request)
|
||||||
|
assert body["model"] == "kimi-k2.5"
|
||||||
|
assert body["stream"] is True
|
||||||
|
assert body["messages"][0]["role"] == "user"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_request_body_default_max_tokens(kimi_provider):
|
||||||
|
request = MessagesRequest(
|
||||||
|
model="m",
|
||||||
|
messages=[Message(role="user", content="x")],
|
||||||
|
)
|
||||||
|
body = kimi_provider._build_request_body(request)
|
||||||
|
assert body["max_tokens"] == ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_request_body_rejects_extra_body(kimi_provider):
|
||||||
|
request = MessagesRequest.model_validate(
|
||||||
|
{
|
||||||
|
"model": "m",
|
||||||
|
"messages": [{"role": "user", "content": "x"}],
|
||||||
|
"extra_body": {"x": 1},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
with pytest.raises(InvalidRequestError, match="does not support extra_body"):
|
||||||
|
kimi_provider._build_request_body(request)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_model_list_uses_moonshot_openai_url(kimi_provider):
|
||||||
|
called: dict[str, str] = {}
|
||||||
|
|
||||||
|
async def fake_get(url: str, **_k):
|
||||||
|
called["url"] = url
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.raise_for_status = lambda: None
|
||||||
|
mock_resp.json = lambda: {"data": [{"id": "kimi-k2.5"}]}
|
||||||
|
mock_resp.aclose = AsyncMock()
|
||||||
|
return mock_resp
|
||||||
|
|
||||||
|
kimi_provider._client.get = fake_get
|
||||||
|
|
||||||
|
await kimi_provider.list_model_infos()
|
||||||
|
|
||||||
|
assert called["url"] == "https://api.moonshot.ai/v1/models"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_cleanup_aclose(kimi_provider):
|
||||||
|
kimi_provider._client = AsyncMock()
|
||||||
|
|
||||||
|
await kimi_provider.cleanup()
|
||||||
|
|
||||||
|
kimi_provider._client.aclose.assert_awaited_once()
|
||||||
@@ -10,8 +10,10 @@ from config.provider_ids import SUPPORTED_PROVIDER_IDS
|
|||||||
from providers.cerebras import CerebrasProvider
|
from providers.cerebras import CerebrasProvider
|
||||||
from providers.deepseek import DeepSeekProvider
|
from providers.deepseek import DeepSeekProvider
|
||||||
from providers.exceptions import UnknownProviderTypeError
|
from providers.exceptions import UnknownProviderTypeError
|
||||||
|
from providers.fireworks import FireworksProvider
|
||||||
from providers.gemini import GeminiProvider
|
from providers.gemini import GeminiProvider
|
||||||
from providers.groq import GroqProvider
|
from providers.groq import GroqProvider
|
||||||
|
from providers.kimi import KimiProvider
|
||||||
from providers.llamacpp import LlamaCppProvider
|
from providers.llamacpp import LlamaCppProvider
|
||||||
from providers.lmstudio import LMStudioProvider
|
from providers.lmstudio import LMStudioProvider
|
||||||
from providers.mistral import MistralProvider
|
from providers.mistral import MistralProvider
|
||||||
@@ -49,12 +51,13 @@ def _make_settings(**overrides):
|
|||||||
mock.llamacpp_proxy = ""
|
mock.llamacpp_proxy = ""
|
||||||
mock.mistral_proxy = ""
|
mock.mistral_proxy = ""
|
||||||
mock.kimi_proxy = ""
|
mock.kimi_proxy = ""
|
||||||
|
mock.kimi_api_key = "test_kimi_key"
|
||||||
mock.wafer_proxy = ""
|
mock.wafer_proxy = ""
|
||||||
mock.opencode_proxy = ""
|
mock.opencode_proxy = ""
|
||||||
mock.opencode_go_proxy = ""
|
mock.opencode_go_proxy = ""
|
||||||
mock.zai_proxy = ""
|
mock.zai_proxy = ""
|
||||||
mock.fireworks_proxy = ""
|
mock.fireworks_proxy = ""
|
||||||
mock.fireworks_api_key = ""
|
mock.fireworks_api_key = "test_fireworks_key"
|
||||||
mock.gemini_api_key = ""
|
mock.gemini_api_key = ""
|
||||||
mock.gemini_proxy = ""
|
mock.gemini_proxy = ""
|
||||||
mock.groq_api_key = ""
|
mock.groq_api_key = ""
|
||||||
@@ -162,11 +165,15 @@ def test_create_provider_instantiates_each_builtin():
|
|||||||
gemini_api_key="test_gemini_key",
|
gemini_api_key="test_gemini_key",
|
||||||
groq_api_key="test_groq_key",
|
groq_api_key="test_groq_key",
|
||||||
cerebras_api_key="test_cerebras_key",
|
cerebras_api_key="test_cerebras_key",
|
||||||
|
fireworks_api_key="test_fireworks_key",
|
||||||
|
kimi_api_key="test_kimi_key",
|
||||||
)
|
)
|
||||||
cases = {
|
cases = {
|
||||||
"nvidia_nim": NvidiaNimProvider,
|
"nvidia_nim": NvidiaNimProvider,
|
||||||
"mistral": MistralProvider,
|
"mistral": MistralProvider,
|
||||||
"deepseek": DeepSeekProvider,
|
"deepseek": DeepSeekProvider,
|
||||||
|
"kimi": KimiProvider,
|
||||||
|
"fireworks": FireworksProvider,
|
||||||
"lmstudio": LMStudioProvider,
|
"lmstudio": LMStudioProvider,
|
||||||
"llamacpp": LlamaCppProvider,
|
"llamacpp": LlamaCppProvider,
|
||||||
"ollama": OllamaProvider,
|
"ollama": OllamaProvider,
|
||||||
|
|||||||
@@ -0,0 +1,107 @@
|
|||||||
|
"""Tests for Z.ai native Anthropic Messages provider."""
|
||||||
|
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from api.models.anthropic import Message, MessagesRequest
|
||||||
|
from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||||
|
from providers.base import ProviderConfig
|
||||||
|
from providers.defaults import ZAI_DEFAULT_BASE
|
||||||
|
from providers.exceptions import InvalidRequestError
|
||||||
|
from providers.zai import ZaiProvider
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def zai_config():
|
||||||
|
return ProviderConfig(
|
||||||
|
api_key="test_zai_key",
|
||||||
|
base_url=ZAI_DEFAULT_BASE,
|
||||||
|
rate_limit=10,
|
||||||
|
rate_window=60,
|
||||||
|
enable_thinking=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def mock_rate_limiter():
|
||||||
|
@asynccontextmanager
|
||||||
|
async def _slot():
|
||||||
|
yield
|
||||||
|
|
||||||
|
with patch("providers.anthropic_messages.GlobalRateLimiter") as mock:
|
||||||
|
instance = mock.get_scoped_instance.return_value
|
||||||
|
|
||||||
|
async def _passthrough(fn, *args, **kwargs):
|
||||||
|
return await fn(*args, **kwargs)
|
||||||
|
|
||||||
|
instance.execute_with_retry = AsyncMock(side_effect=_passthrough)
|
||||||
|
instance.concurrency_slot.side_effect = _slot
|
||||||
|
yield instance
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def zai_provider(zai_config):
|
||||||
|
return ZaiProvider(zai_config)
|
||||||
|
|
||||||
|
|
||||||
|
def test_init(zai_config):
|
||||||
|
with patch("httpx.AsyncClient") as mock_client:
|
||||||
|
provider = ZaiProvider(zai_config)
|
||||||
|
assert provider._api_key == "test_zai_key"
|
||||||
|
assert provider._base_url == ZAI_DEFAULT_BASE
|
||||||
|
assert mock_client.called
|
||||||
|
|
||||||
|
|
||||||
|
def test_request_headers(zai_provider):
|
||||||
|
h = zai_provider._request_headers()
|
||||||
|
assert h["x-api-key"] == "test_zai_key"
|
||||||
|
assert h["anthropic-version"] == "2023-06-01"
|
||||||
|
|
||||||
|
|
||||||
|
def test_model_list_headers(zai_provider):
|
||||||
|
h = zai_provider._model_list_headers()
|
||||||
|
assert h["x-api-key"] == "test_zai_key"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_request_body_native(zai_provider):
|
||||||
|
request = MessagesRequest(
|
||||||
|
model="glm-5.1",
|
||||||
|
max_tokens=100,
|
||||||
|
messages=[Message(role="user", content="Hello")],
|
||||||
|
)
|
||||||
|
body = zai_provider._build_request_body(request)
|
||||||
|
assert body["model"] == "glm-5.1"
|
||||||
|
assert body["stream"] is True
|
||||||
|
assert body["max_tokens"] == 100
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_request_body_default_max_tokens(zai_provider):
|
||||||
|
request = MessagesRequest(
|
||||||
|
model="m",
|
||||||
|
messages=[Message(role="user", content="x")],
|
||||||
|
)
|
||||||
|
body = zai_provider._build_request_body(request)
|
||||||
|
assert body["max_tokens"] == ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_request_body_rejects_extra_body(zai_provider):
|
||||||
|
request = MessagesRequest.model_validate(
|
||||||
|
{
|
||||||
|
"model": "m",
|
||||||
|
"messages": [{"role": "user", "content": "x"}],
|
||||||
|
"extra_body": {"x": 1},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
with pytest.raises(InvalidRequestError, match="does not support extra_body"):
|
||||||
|
zai_provider._build_request_body(request)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_cleanup_aclose(zai_provider):
|
||||||
|
zai_provider._client = AsyncMock()
|
||||||
|
|
||||||
|
await zai_provider.cleanup()
|
||||||
|
|
||||||
|
zai_provider._client.aclose.assert_awaited_once()
|
||||||
Reference in New Issue
Block a user