Add Groq Provider

This commit is contained in:
Alishahryar1
2026-05-23 16:31:48 -07:00
parent 1324c36da5
commit b2f66db0bb
19 changed files with 512 additions and 4 deletions
+7 -1
View File
@@ -38,6 +38,10 @@ FIREWORKS_API_KEY=""
GEMINI_API_KEY="" GEMINI_API_KEY=""
# Groq Cloud (OpenAI-compatible Chat Completions; see https://console.groq.com/docs/openai)
GROQ_API_KEY=""
# LM Studio Config (local provider, no API key required) # LM Studio Config (local provider, no API key required)
LM_STUDIO_BASE_URL="http://localhost:1234/v1" LM_STUDIO_BASE_URL="http://localhost:1234/v1"
@@ -52,7 +56,7 @@ OLLAMA_BASE_URL="http://localhost:11434"
# All Claude model requests are mapped to these models, plain model is fallback # All Claude model requests are mapped to these models, plain model is fallback
# Format: provider_type/model/name # Format: provider_type/model/name
# Valid providers: "nvidia_nim" | "open_router" | "mistral" | "deepseek" | "kimi" | "wafer" | "lmstudio" | "llamacpp" | "ollama" | "opencode" | "opencode_go" | "zai" | "fireworks" | "gemini" # Valid providers: "nvidia_nim" | "open_router" | "mistral" | "deepseek" | "kimi" | "wafer" | "lmstudio" | "llamacpp" | "ollama" | "opencode" | "opencode_go" | "zai" | "fireworks" | "gemini" | "groq"
MODEL_OPUS= MODEL_OPUS=
MODEL_SONNET= MODEL_SONNET=
MODEL_HAIKU= MODEL_HAIKU=
@@ -75,6 +79,7 @@ FCC_SMOKE_MODEL_OPENCODE_GO=
FCC_SMOKE_MODEL_ZAI= FCC_SMOKE_MODEL_ZAI=
FCC_SMOKE_MODEL_FIREWORKS= FCC_SMOKE_MODEL_FIREWORKS=
FCC_SMOKE_MODEL_GEMINI= FCC_SMOKE_MODEL_GEMINI=
FCC_SMOKE_MODEL_GROQ=
FCC_SMOKE_NIM_MODELS= FCC_SMOKE_NIM_MODELS=
FCC_SMOKE_NIM_EXTRA_MODELS= FCC_SMOKE_NIM_EXTRA_MODELS=
FCC_SMOKE_OPENROUTER_FREE_MODELS= FCC_SMOKE_OPENROUTER_FREE_MODELS=
@@ -104,6 +109,7 @@ OPENCODE_GO_PROXY=""
ZAI_PROXY="" ZAI_PROXY=""
FIREWORKS_PROXY="" FIREWORKS_PROXY=""
GEMINI_PROXY="" GEMINI_PROXY=""
GROQ_PROXY=""
PROVIDER_RATE_LIMIT=1 PROVIDER_RATE_LIMIT=1
PROVIDER_RATE_WINDOW=3 PROVIDER_RATE_WINDOW=3
+14 -2
View File
@@ -37,7 +37,7 @@ Free Claude Code routes Anthropic Messages API traffic from Claude Code to any p
## What You Get ## What You Get
- Drop-in proxy for Claude Code's Anthropic API calls. - Drop-in proxy for Claude Code's Anthropic API calls.
- Fourteen provider backends: NVIDIA NIM, OpenRouter, Mistral La Plateforme, DeepSeek, Kimi, Wafer, LM Studio, llama.cpp, Ollama, OpenCode Zen, OpenCode Go, Z.ai, Fireworks AI, and Google AI Studio (Gemini). - Fifteen provider backends: NVIDIA NIM, OpenRouter, Mistral La Plateforme, DeepSeek, Kimi, Wafer, LM Studio, llama.cpp, Ollama, OpenCode Zen, OpenCode Go, Z.ai, Fireworks AI, Google AI Studio (Gemini), and Groq.
- Per-model routing: send Opus, Sonnet, Haiku, and fallback traffic to different providers. - Per-model routing: send Opus, Sonnet, Haiku, and fallback traffic to different providers.
- Native Claude Code `/model` picker support through the proxy's `/v1/models` endpoint (Claude Code must opt in to Gateway model discovery; see [Model Picker](#model-picker)). - Native Claude Code `/model` picker support through the proxy's `/v1/models` endpoint (Claude Code must opt in to Gateway model discovery; see [Model Picker](#model-picker)).
- Streaming, tool use, reasoning/thinking block handling, and local request optimizations. - Streaming, tool use, reasoning/thinking block handling, and local request optimizations.
@@ -271,7 +271,19 @@ Popular examples:
- `gemini/gemini-2.5-flash` - `gemini/gemini-2.5-flash`
- `gemini/gemini-3.1-flash-lite` - `gemini/gemini-3.1-flash-lite`
### 15. Mix Providers By Model Tier ### 15. [Groq](https://console.groq.com/)
Get an API key at [console.groq.com/keys](https://console.groq.com/keys).
In the Admin UI, paste it into `GROQ_API_KEY`, then set `MODEL` to a Groq OpenAI-compat model slug such as `groq/llama-3.3-70b-versatile`.
Groq routes through `https://api.groq.com/openai/v1` ([OpenAI-compatible Chat Completions](https://console.groq.com/docs/openai)). Some request fields yield HTTP 400; this adapter strips known-unsupported shapes (documented in Groq's compatibility notes).
Reasoning-heavy models expose extra knobs documented under [Groq reasoning](https://console.groq.com/docs/reasoning). This release mirrors other OpenAI-compat adapters for thinking via `reasoning_content` deltas when Claude-style thinking is enabled; you can tune advanced parameters through request `extra_body` when needed.
Browse models at [console.groq.com/docs/models](https://console.groq.com/docs/models).
### 16. Mix Providers By Model Tier
Each model tier can use a different provider by setting `MODEL_OPUS`, `MODEL_SONNET`, and `MODEL_HAIKU` in the Admin UI. Leave a tier blank to inherit `MODEL`. Each model tier can use a different provider by setting `MODEL_OPUS`, `MODEL_SONNET`, and `MODEL_HAIKU` in the Admin UI. Leave a tier blank to inherit `MODEL`.
+28
View File
@@ -213,6 +213,19 @@ FIELDS: tuple[ConfigFieldSpec, ...] = (
"outside the UK/CH/EEA/EU." "outside the UK/CH/EEA/EU."
), ),
), ),
ConfigFieldSpec(
"GROQ_API_KEY",
"Groq API Key",
"providers",
"secret",
settings_attr="groq_api_key",
secret=True,
description=(
"GroqCloud OpenAI-compatible API key ([console.groq.com/keys]("
"https://console.groq.com/keys)); see Groq "
"[OpenAI compatibility docs](https://console.groq.com/docs/openai)."
),
),
ConfigFieldSpec( ConfigFieldSpec(
"LM_STUDIO_BASE_URL", "LM_STUDIO_BASE_URL",
"LM Studio Base URL", "LM Studio Base URL",
@@ -342,6 +355,15 @@ FIELDS: tuple[ConfigFieldSpec, ...] = (
secret=True, secret=True,
advanced=True, advanced=True,
), ),
ConfigFieldSpec(
"GROQ_PROXY",
"Groq Proxy",
"providers",
"secret",
settings_attr="groq_proxy",
secret=True,
advanced=True,
),
ConfigFieldSpec( ConfigFieldSpec(
"MODEL", "MODEL",
"Default Model", "Default Model",
@@ -814,6 +836,12 @@ FIELDS: tuple[ConfigFieldSpec, ...] = (
"smoke", "smoke",
advanced=True, advanced=True,
), ),
ConfigFieldSpec(
"FCC_SMOKE_MODEL_GROQ",
"Smoke Groq Model",
"smoke",
advanced=True,
),
ConfigFieldSpec( ConfigFieldSpec(
"FCC_SMOKE_NIM_MODELS", "FCC_SMOKE_NIM_MODELS",
"Smoke NIM Models", "Smoke NIM Models",
+11
View File
@@ -30,6 +30,7 @@ OPENCODE_GO_DEFAULT_BASE = "https://opencode.ai/zen/go/v1"
ZAI_DEFAULT_BASE = "https://api.z.ai/api/coding/paas/v4" ZAI_DEFAULT_BASE = "https://api.z.ai/api/coding/paas/v4"
# Google AI Studio Gemini API OpenAI-compat layer (not Vertex AI). # Google AI Studio Gemini API OpenAI-compat layer (not Vertex AI).
GEMINI_DEFAULT_BASE = "https://generativelanguage.googleapis.com/v1beta/openai/" GEMINI_DEFAULT_BASE = "https://generativelanguage.googleapis.com/v1beta/openai/"
GROQ_DEFAULT_BASE = "https://api.groq.com/openai/v1"
@dataclass(frozen=True, slots=True) @dataclass(frozen=True, slots=True)
@@ -190,6 +191,16 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
proxy_attr="gemini_proxy", proxy_attr="gemini_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"), capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
), ),
"groq": ProviderDescriptor(
provider_id="groq",
transport_type="openai_chat",
credential_env="GROQ_API_KEY",
credential_url="https://console.groq.com/keys",
credential_attr="groq_api_key",
default_base_url=GROQ_DEFAULT_BASE,
proxy_attr="groq_proxy",
capabilities=("chat", "streaming", "tools", "thinking", "rate_limit"),
),
} }
# Order matches docs / historical error text; must match PROVIDER_CATALOG keys. # Order matches docs / historical error text; must match PROVIDER_CATALOG keys.
+4
View File
@@ -135,6 +135,9 @@ class Settings(BaseSettings):
# ==================== Google Gemini (Google AI Studio) ==================== # ==================== Google Gemini (Google AI Studio) ====================
gemini_api_key: str = Field(default="", validation_alias="GEMINI_API_KEY") gemini_api_key: str = Field(default="", validation_alias="GEMINI_API_KEY")
# ==================== Groq (OpenAI-compatible) ====================
groq_api_key: str = Field(default="", validation_alias="GROQ_API_KEY")
# ==================== Messaging Platform Selection ==================== # ==================== Messaging Platform Selection ====================
# Valid: "telegram" | "discord" | "none" # Valid: "telegram" | "discord" | "none"
messaging_platform: str = Field( messaging_platform: str = Field(
@@ -192,6 +195,7 @@ class Settings(BaseSettings):
zai_proxy: str = Field(default="", validation_alias="ZAI_PROXY") zai_proxy: str = Field(default="", validation_alias="ZAI_PROXY")
fireworks_proxy: str = Field(default="", validation_alias="FIREWORKS_PROXY") fireworks_proxy: str = Field(default="", validation_alias="FIREWORKS_PROXY")
gemini_proxy: str = Field(default="", validation_alias="GEMINI_PROXY") gemini_proxy: str = Field(default="", validation_alias="GEMINI_PROXY")
groq_proxy: str = Field(default="", validation_alias="GROQ_PROXY")
# ==================== Provider Rate Limiting ==================== # ==================== Provider Rate Limiting ====================
provider_rate_limit: int = Field(default=40, validation_alias="PROVIDER_RATE_LIMIT") provider_rate_limit: int = Field(default=40, validation_alias="PROVIDER_RATE_LIMIT")
+2
View File
@@ -4,6 +4,7 @@ from config.provider_catalog import (
DEEPSEEK_ANTHROPIC_DEFAULT_BASE, DEEPSEEK_ANTHROPIC_DEFAULT_BASE,
DEEPSEEK_DEFAULT_BASE, DEEPSEEK_DEFAULT_BASE,
GEMINI_DEFAULT_BASE, GEMINI_DEFAULT_BASE,
GROQ_DEFAULT_BASE,
KIMI_DEFAULT_BASE, KIMI_DEFAULT_BASE,
LLAMACPP_DEFAULT_BASE, LLAMACPP_DEFAULT_BASE,
LMSTUDIO_DEFAULT_BASE, LMSTUDIO_DEFAULT_BASE,
@@ -21,6 +22,7 @@ __all__ = (
"DEEPSEEK_ANTHROPIC_DEFAULT_BASE", "DEEPSEEK_ANTHROPIC_DEFAULT_BASE",
"DEEPSEEK_DEFAULT_BASE", "DEEPSEEK_DEFAULT_BASE",
"GEMINI_DEFAULT_BASE", "GEMINI_DEFAULT_BASE",
"GROQ_DEFAULT_BASE",
"KIMI_DEFAULT_BASE", "KIMI_DEFAULT_BASE",
"LLAMACPP_DEFAULT_BASE", "LLAMACPP_DEFAULT_BASE",
"LMSTUDIO_DEFAULT_BASE", "LMSTUDIO_DEFAULT_BASE",
+7
View File
@@ -0,0 +1,7 @@
"""Groq Cloud (OpenAI-compat) adapter."""
from providers.defaults import GROQ_DEFAULT_BASE
from .client import GroqProvider
__all__ = ["GROQ_DEFAULT_BASE", "GroqProvider"]
+31
View File
@@ -0,0 +1,31 @@
"""Groq provider implementation (OpenAI-compatible chat completions)."""
from __future__ import annotations
from typing import Any
from providers.base import ProviderConfig
from providers.defaults import GROQ_DEFAULT_BASE
from providers.openai_compat import OpenAIChatTransport
from .request import build_request_body
class GroqProvider(OpenAIChatTransport):
"""Groq API using ``https://api.groq.com/openai/v1/chat/completions``."""
def __init__(self, config: ProviderConfig):
super().__init__(
config,
provider_name="GROQ",
base_url=config.base_url or GROQ_DEFAULT_BASE,
api_key=config.api_key,
)
def _build_request_body(
self, request: Any, thinking_enabled: bool | None = None
) -> dict:
return build_request_body(
request,
thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
)
+83
View File
@@ -0,0 +1,83 @@
"""Request builder for Groq (OpenAI-compatible chat completions).
See Groq docs: https://console.groq.com/docs/openai ``messages[].name`` and
unsupported token fields yield 400; ``max_completion_tokens`` is preferred over
deprecated ``max_tokens``.
"""
from __future__ import annotations
from typing import Any
from loguru import logger
from core.anthropic import ReasoningReplayMode, build_base_request_body
from core.anthropic.conversion import OpenAIConversionError
from providers.exceptions import InvalidRequestError
_GROQ_UNSUPPORTED_TOP_KEYS = frozenset({"logprobs", "logit_bias", "top_logprobs"})
def _strip_message_names(messages: Any) -> None:
"""Remove ``name`` from each chat message (Groq rejects ``messages[].name``)."""
if not isinstance(messages, list):
return
for msg in messages:
if isinstance(msg, dict):
msg.pop("name", None)
def _strip_unsupported_body_keys(body: dict[str, Any]) -> None:
for key in _GROQ_UNSUPPORTED_TOP_KEYS:
body.pop(key, None)
def _normalize_max_completion_tokens(body: dict[str, Any]) -> None:
if "max_completion_tokens" in body:
body.pop("max_tokens", None)
return
if "max_tokens" in body and body["max_tokens"] is not None:
body["max_completion_tokens"] = body.pop("max_tokens")
def _normalize_n_candidates(body: dict[str, Any]) -> None:
"""Groq only supports ``n`` = 1; coerce if present."""
if body.get("n") is None:
return
body["n"] = 1
def build_request_body(request_data: Any, *, thinking_enabled: bool) -> dict:
"""Build OpenAI-format request body from an Anthropic request for Groq."""
logger.debug(
"GROQ_REQUEST: conversion start model={} msgs={}",
getattr(request_data, "model", "?"),
len(getattr(request_data, "messages", [])),
)
try:
body = build_base_request_body(
request_data,
reasoning_replay=ReasoningReplayMode.REASONING_CONTENT
if thinking_enabled
else ReasoningReplayMode.DISABLED,
)
except OpenAIConversionError as exc:
raise InvalidRequestError(str(exc)) from exc
request_extra = getattr(request_data, "extra_body", None)
if isinstance(request_extra, dict) and request_extra:
merged = dict(request_extra)
body["extra_body"] = merged
_strip_message_names(body.get("messages"))
_strip_unsupported_body_keys(body)
_normalize_max_completion_tokens(body)
_normalize_n_candidates(body)
logger.debug(
"GROQ_REQUEST: conversion done model={} msgs={} tools={}",
body.get("model"),
len(body.get("messages", [])),
len(body.get("tools", [])),
)
return body
+7
View File
@@ -116,6 +116,12 @@ def _create_gemini(config: ProviderConfig, _settings: Settings) -> BaseProvider:
return GeminiProvider(config) return GeminiProvider(config)
def _create_groq(config: ProviderConfig, _settings: Settings) -> BaseProvider:
from providers.groq import GroqProvider
return GroqProvider(config)
PROVIDER_FACTORIES: dict[str, ProviderFactory] = { PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
"nvidia_nim": _create_nvidia_nim, "nvidia_nim": _create_nvidia_nim,
"open_router": _create_open_router, "open_router": _create_open_router,
@@ -131,6 +137,7 @@ PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
"zai": _create_zai, "zai": _create_zai,
"fireworks": _create_fireworks, "fireworks": _create_fireworks,
"gemini": _create_gemini, "gemini": _create_gemini,
"groq": _create_groq,
} }
if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set( if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
+1
View File
@@ -121,6 +121,7 @@ uv run pytest smoke/product -n 0 -s --tb=short
`FCC_SMOKE_MODEL_MISTRAL`, `FCC_SMOKE_MODEL_DEEPSEEK`, `FCC_SMOKE_MODEL_KIMI`, `FCC_SMOKE_MODEL_MISTRAL`, `FCC_SMOKE_MODEL_DEEPSEEK`, `FCC_SMOKE_MODEL_KIMI`,
`FCC_SMOKE_MODEL_WAFER`, `FCC_SMOKE_MODEL_OPENCODE`, `FCC_SMOKE_MODEL_OPENCODE_GO`, `FCC_SMOKE_MODEL_WAFER`, `FCC_SMOKE_MODEL_OPENCODE`, `FCC_SMOKE_MODEL_OPENCODE_GO`,
`FCC_SMOKE_MODEL_ZAI`, `FCC_SMOKE_MODEL_FIREWORKS`, `FCC_SMOKE_MODEL_GEMINI`, `FCC_SMOKE_MODEL_ZAI`, `FCC_SMOKE_MODEL_FIREWORKS`, `FCC_SMOKE_MODEL_GEMINI`,
`FCC_SMOKE_MODEL_GROQ`,
`FCC_SMOKE_MODEL_LMSTUDIO`, `FCC_SMOKE_MODEL_LMSTUDIO`,
`FCC_SMOKE_MODEL_LLAMACPP`, `FCC_SMOKE_MODEL_OLLAMA`: optional per-provider `FCC_SMOKE_MODEL_LLAMACPP`, `FCC_SMOKE_MODEL_OLLAMA`: optional per-provider
smoke model overrides. Values may include the provider prefix or just the model smoke model overrides. Values may include the provider prefix or just the model
+3
View File
@@ -54,6 +54,7 @@ PROVIDER_SMOKE_DEFAULT_MODELS: dict[str, str] = {
"opencode_go": "opencode_go/minimax-m2.7", "opencode_go": "opencode_go/minimax-m2.7",
"zai": "zai/glm-5.1", "zai": "zai/glm-5.1",
"gemini": "gemini/gemini-2.5-flash", "gemini": "gemini/gemini-2.5-flash",
"groq": "groq/llama-3.3-70b-versatile",
} }
NVIDIA_NIM_CLI_DEFAULT_MODELS: tuple[str, ...] = ( NVIDIA_NIM_CLI_DEFAULT_MODELS: tuple[str, ...] = (
@@ -250,6 +251,8 @@ class SmokeConfig:
return bool(self.settings.zai_api_key.strip()) return bool(self.settings.zai_api_key.strip())
if provider == "gemini": if provider == "gemini":
return bool(self.settings.gemini_api_key.strip()) return bool(self.settings.gemini_api_key.strip())
if provider == "groq":
return bool(self.settings.groq_api_key.strip())
return False return False
+26
View File
@@ -104,6 +104,7 @@ def test_admin_config_masks_secrets_and_exposes_manifest(monkeypatch, tmp_path):
assert "OPENROUTER_API_KEY" in keys assert "OPENROUTER_API_KEY" in keys
assert "FIREWORKS_API_KEY" in keys assert "FIREWORKS_API_KEY" in keys
assert "GEMINI_API_KEY" in keys assert "GEMINI_API_KEY" in keys
assert "GROQ_API_KEY" in keys
assert "ZAI_BASE_URL" not in keys assert "ZAI_BASE_URL" not in keys
assert "CLAUDE_WORKSPACE" not in keys assert "CLAUDE_WORKSPACE" not in keys
assert "CLAUDE_CLI_BIN" not in keys assert "CLAUDE_CLI_BIN" not in keys
@@ -233,6 +234,31 @@ def test_admin_apply_writes_gemini_key_and_masks_preview(monkeypatch, tmp_path):
assert "GEMINI_API_KEY=gm-secret" in text assert "GEMINI_API_KEY=gm-secret" in text
def test_admin_apply_writes_groq_key_and_masks_preview(monkeypatch, tmp_path):
_set_home(monkeypatch, tmp_path)
_clear_process_config(monkeypatch)
app = create_app(lifespan_enabled=False)
response = _local_client(app).post(
"/admin/api/config/apply",
json={
"values": {
"MODEL": "groq/llama-3.3-70b-versatile",
"GROQ_API_KEY": "gq-secret",
}
},
)
assert response.status_code == 200
body = response.json()
assert body["applied"] is True
assert "GROQ_API_KEY=********" in body["env_preview"]
env_file = tmp_path / ".fcc" / ".env"
text = env_file.read_text(encoding="utf-8")
assert "MODEL=groq/llama-3.3-70b-versatile" in text
assert "GROQ_API_KEY=gq-secret" in text
def test_admin_apply_preserves_hidden_diagnostics_and_smoke_values( def test_admin_apply_preserves_hidden_diagnostics_and_smoke_values(
monkeypatch, tmp_path monkeypatch, tmp_path
): ):
+36
View File
@@ -18,6 +18,7 @@ from config.nim import NimSettings
from providers.deepseek import DeepSeekProvider from providers.deepseek import DeepSeekProvider
from providers.exceptions import ServiceUnavailableError, UnknownProviderTypeError from providers.exceptions import ServiceUnavailableError, UnknownProviderTypeError
from providers.gemini import GeminiProvider from providers.gemini import GeminiProvider
from providers.groq import GroqProvider
from providers.lmstudio import LMStudioProvider from providers.lmstudio import LMStudioProvider
from providers.mistral import MistralProvider from providers.mistral import MistralProvider
from providers.nvidia_nim import NvidiaNimProvider from providers.nvidia_nim import NvidiaNimProvider
@@ -59,6 +60,8 @@ def _make_mock_settings(**overrides):
mock.fireworks_proxy = "" mock.fireworks_proxy = ""
mock.gemini_api_key = "" mock.gemini_api_key = ""
mock.gemini_proxy = "" mock.gemini_proxy = ""
mock.groq_api_key = ""
mock.groq_proxy = ""
mock.nim = NimSettings() mock.nim = NimSettings()
mock.http_read_timeout = 300.0 mock.http_read_timeout = 300.0
mock.http_write_timeout = 10.0 mock.http_write_timeout = 10.0
@@ -258,6 +261,39 @@ async def test_get_provider_gemini_missing_api_key():
assert "aistudio.google.com" in exc_info.value.detail assert "aistudio.google.com" in exc_info.value.detail
@pytest.mark.asyncio
async def test_get_provider_groq():
"""Test that provider_type=groq returns GroqProvider."""
with patch("api.dependencies.get_settings") as mock_settings:
mock_settings.return_value = _make_mock_settings(
provider_type="groq",
groq_api_key="secret",
)
provider = get_provider()
assert isinstance(provider, GroqProvider)
assert provider._base_url == "https://api.groq.com/openai/v1"
assert provider._api_key == "secret"
@pytest.mark.asyncio
async def test_get_provider_groq_missing_api_key():
"""Groq with empty API key raises HTTPException 503."""
with patch("api.dependencies.get_settings") as mock_settings:
mock_settings.return_value = _make_mock_settings(
provider_type="groq",
groq_api_key="",
)
with pytest.raises(HTTPException) as exc_info:
get_provider()
assert exc_info.value.status_code == 503
assert "GROQ_API_KEY" in exc_info.value.detail
assert "console.groq.com" in exc_info.value.detail
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_get_provider_wafer(): async def test_get_provider_wafer():
"""Test that provider_type=wafer returns WaferProvider.""" """Test that provider_type=wafer returns WaferProvider."""
+5
View File
@@ -767,6 +767,7 @@ class TestPerModelMapping:
assert Settings.parse_provider_type("ollama/llama3.1") == "ollama" assert Settings.parse_provider_type("ollama/llama3.1") == "ollama"
assert Settings.parse_provider_type("wafer/DeepSeek-V4-Pro") == "wafer" assert Settings.parse_provider_type("wafer/DeepSeek-V4-Pro") == "wafer"
assert Settings.parse_provider_type("gemini/gemini-2.5-flash") == "gemini" assert Settings.parse_provider_type("gemini/gemini-2.5-flash") == "gemini"
assert Settings.parse_provider_type("groq/llama-3.3-70b-versatile") == "groq"
def test_parse_model_name(self): def test_parse_model_name(self):
"""parse_model_name extracts model name from model string.""" """parse_model_name extracts model name from model string."""
@@ -785,6 +786,10 @@ class TestPerModelMapping:
assert ( assert (
Settings.parse_model_name("gemini/gemini-2.5-flash") == "gemini-2.5-flash" Settings.parse_model_name("gemini/gemini-2.5-flash") == "gemini-2.5-flash"
) )
assert (
Settings.parse_model_name("groq/llama-3.3-70b-versatile")
== "llama-3.3-70b-versatile"
)
def test_configured_chat_model_refs_collects_unique_models_with_sources( def test_configured_chat_model_refs_collects_unique_models_with_sources(
self, monkeypatch self, monkeypatch
+2
View File
@@ -7,6 +7,7 @@ from messaging.platforms.factory import create_messaging_platform
from providers.base import BaseProvider from providers.base import BaseProvider
from providers.deepseek import DeepSeekProvider from providers.deepseek import DeepSeekProvider
from providers.gemini import GeminiProvider from providers.gemini import GeminiProvider
from providers.groq import GroqProvider
from providers.llamacpp import LlamaCppProvider from providers.llamacpp import LlamaCppProvider
from providers.lmstudio import LMStudioProvider from providers.lmstudio import LMStudioProvider
from providers.mistral import MistralProvider from providers.mistral import MistralProvider
@@ -84,6 +85,7 @@ def test_provider_and_platform_registries_include_advertised_builtins() -> None:
"opencode_go": OpenCodeProvider, "opencode_go": OpenCodeProvider,
"zai": ZaiProvider, "zai": ZaiProvider,
"gemini": GeminiProvider, "gemini": GeminiProvider,
"groq": GroqProvider,
} }
for provider_class in provider_classes.values(): for provider_class in provider_classes.values():
assert issubclass(provider_class, BaseProvider) assert issubclass(provider_class, BaseProvider)
+1
View File
@@ -32,6 +32,7 @@ def _settings(**overrides):
"opencode_api_key": "", "opencode_api_key": "",
"zai_api_key": "", "zai_api_key": "",
"gemini_api_key": "", "gemini_api_key": "",
"groq_api_key": "",
"fireworks_api_key": "", "fireworks_api_key": "",
"lm_studio_base_url": "", "lm_studio_base_url": "",
"llamacpp_base_url": "", "llamacpp_base_url": "",
+236
View File
@@ -0,0 +1,236 @@
"""Tests for Groq (OpenAI-compatible) provider."""
from contextlib import asynccontextmanager
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from providers.base import ProviderConfig
from providers.groq import GROQ_DEFAULT_BASE, GroqProvider
class MockMessage:
def __init__(self, role, content):
self.role = role
self.content = content
class MockRequest:
def __init__(self, **kwargs):
self.model = "llama-3.3-70b-versatile"
self.messages = [MockMessage("user", "Hello")]
self.max_tokens = 100
self.temperature = 0.5
self.top_p = 0.9
self.system = "System prompt"
self.stop_sequences = None
self.tools = []
self.thinking = MagicMock()
self.thinking.enabled = True
for key, value in kwargs.items():
setattr(self, key, value)
@pytest.fixture
def groq_config():
return ProviderConfig(
api_key="test_groq_key",
base_url=GROQ_DEFAULT_BASE,
rate_limit=10,
rate_window=60,
enable_thinking=True,
)
@pytest.fixture(autouse=True)
def mock_rate_limiter():
"""Mock the global rate limiter to prevent waiting."""
@asynccontextmanager
async def _slot():
yield
with patch("providers.openai_compat.GlobalRateLimiter") as mock:
instance = mock.get_scoped_instance.return_value
async def _passthrough(fn, *args, **kwargs):
return await fn(*args, **kwargs)
instance.execute_with_retry = AsyncMock(side_effect=_passthrough)
instance.concurrency_slot.side_effect = _slot
yield instance
@pytest.fixture
def groq_provider(groq_config):
return GroqProvider(groq_config)
def test_init(groq_config):
"""Test provider initialization."""
with patch("providers.openai_compat.AsyncOpenAI") as mock_openai:
provider = GroqProvider(groq_config)
assert provider._api_key == "test_groq_key"
assert provider._base_url == GROQ_DEFAULT_BASE
mock_openai.assert_called_once()
def test_default_base_url_constant():
assert GROQ_DEFAULT_BASE == "https://api.groq.com/openai/v1"
def test_build_request_body_basic(groq_provider):
"""Basic request body conversion attaches system message from Claude request."""
req = MockRequest()
body = groq_provider._build_request_body(req)
assert body["model"] == "llama-3.3-70b-versatile"
assert body["messages"][0]["role"] == "system"
assert "max_completion_tokens" in body
def test_build_request_body_global_disable_blocks_reasoning_mapping():
provider = GroqProvider(
ProviderConfig(
api_key="test_groq_key",
base_url=GROQ_DEFAULT_BASE,
rate_limit=10,
rate_window=60,
enable_thinking=False,
)
)
req = MockRequest()
body = provider._build_request_body(req)
roles = [m.get("role") for m in body.get("messages", [])]
assert "assistant_reasoning_content" not in roles
def test_build_request_body_sanitizes_and_remaps_via_mock_converter(groq_provider):
with patch("providers.groq.request.build_base_request_body") as mock_convert:
mock_convert.return_value = {
"model": "llama-3.3-70b-versatile",
"messages": [
{"role": "user", "name": "bad", "content": "hello"},
{
"role": "assistant",
"tool_calls": [],
"name": "nope",
"content": "ok",
},
],
"logprobs": True,
"logit_bias": {"1": -100},
"top_logprobs": 2,
"max_tokens": 42,
"n": 4,
}
req = MockRequest()
body = groq_provider._build_request_body(req)
msgs = body["messages"]
assert msgs[0].get("name") is None and msgs[1].get("name") is None
for key in ("logprobs", "logit_bias", "top_logprobs"):
assert key not in body
assert body.get("max_tokens") is None
assert body["max_completion_tokens"] == 42
assert body["n"] == 1
def test_build_request_body_prefers_existing_max_completion_tokens(groq_provider):
with patch("providers.groq.request.build_base_request_body") as mock_convert:
mock_convert.return_value = {
"model": "llama-3.3-70b-versatile",
"messages": [{"role": "user", "content": "x"}],
"max_completion_tokens": 77,
"max_tokens": 999,
}
body = groq_provider._build_request_body(MockRequest())
assert body["max_completion_tokens"] == 77
assert "max_tokens" not in body
def test_build_request_body_preserves_caller_extra_body(groq_provider):
req = MockRequest(extra_body={"metadata": {"user": "u1"}})
body = groq_provider._build_request_body(req)
eb = body.get("extra_body")
assert isinstance(eb, dict)
assert eb.get("metadata") == {"user": "u1"}
@pytest.mark.asyncio
async def test_stream_response_text(groq_provider):
"""Text content deltas are emitted as text blocks."""
req = MockRequest()
mock_chunk = MagicMock()
mock_chunk.choices = [
MagicMock(
delta=MagicMock(
content="Hello back!",
reasoning_content=None,
tool_calls=None,
),
finish_reason="stop",
)
]
mock_chunk.usage = MagicMock(completion_tokens=5, prompt_tokens=10)
async def mock_stream():
yield mock_chunk
with patch.object(
groq_provider._client.chat.completions, "create", new_callable=AsyncMock
) as mock_create:
mock_create.return_value = mock_stream()
events = [event async for event in groq_provider.stream_response(req)]
assert any(
'"text_delta"' in event and "Hello back!" in event for event in events
)
@pytest.mark.asyncio
async def test_stream_response_reasoning_content(groq_provider):
"""reasoning_content deltas are emitted as thinking blocks."""
req = MockRequest()
mock_chunk = MagicMock()
mock_chunk.choices = [
MagicMock(
delta=MagicMock(
content=None,
reasoning_content="Thinking...",
tool_calls=None,
),
finish_reason="stop",
)
]
mock_chunk.usage = MagicMock(completion_tokens=2, prompt_tokens=10)
async def mock_stream():
yield mock_chunk
with patch.object(
groq_provider._client.chat.completions, "create", new_callable=AsyncMock
) as mock_create:
mock_create.return_value = mock_stream()
events = [event async for event in groq_provider.stream_response(req)]
assert any(
'"thinking_delta"' in event and "Thinking..." in event for event in events
)
@pytest.mark.asyncio
async def test_cleanup(groq_provider):
groq_provider._client = AsyncMock()
await groq_provider.cleanup()
groq_provider._client.close.assert_called_once()
+8 -1
View File
@@ -10,6 +10,7 @@ from config.provider_ids import SUPPORTED_PROVIDER_IDS
from providers.deepseek import DeepSeekProvider from providers.deepseek import DeepSeekProvider
from providers.exceptions import UnknownProviderTypeError from providers.exceptions import UnknownProviderTypeError
from providers.gemini import GeminiProvider from providers.gemini import GeminiProvider
from providers.groq import GroqProvider
from providers.llamacpp import LlamaCppProvider from providers.llamacpp import LlamaCppProvider
from providers.lmstudio import LMStudioProvider from providers.lmstudio import LMStudioProvider
from providers.mistral import MistralProvider from providers.mistral import MistralProvider
@@ -55,6 +56,8 @@ def _make_settings(**overrides):
mock.fireworks_api_key = "" mock.fireworks_api_key = ""
mock.gemini_api_key = "" mock.gemini_api_key = ""
mock.gemini_proxy = "" mock.gemini_proxy = ""
mock.groq_api_key = ""
mock.groq_proxy = ""
mock.provider_rate_limit = 40 mock.provider_rate_limit = 40
mock.provider_rate_window = 60 mock.provider_rate_window = 60
mock.provider_max_concurrency = 5 mock.provider_max_concurrency = 5
@@ -152,7 +155,10 @@ def test_create_provider_uses_native_openrouter_by_default():
def test_create_provider_instantiates_each_builtin(): def test_create_provider_instantiates_each_builtin():
settings = _make_settings(gemini_api_key="test_gemini_key") settings = _make_settings(
gemini_api_key="test_gemini_key",
groq_api_key="test_groq_key",
)
cases = { cases = {
"nvidia_nim": NvidiaNimProvider, "nvidia_nim": NvidiaNimProvider,
"mistral": MistralProvider, "mistral": MistralProvider,
@@ -165,6 +171,7 @@ def test_create_provider_instantiates_each_builtin():
"opencode_go": OpenCodeProvider, "opencode_go": OpenCodeProvider,
"zai": ZaiProvider, "zai": ZaiProvider,
"gemini": GeminiProvider, "gemini": GeminiProvider,
"groq": GroqProvider,
} }
with ( with (