mirror of
https://github.com/Alishahryar1/free-claude-code.git
synced 2026-06-01 22:09:04 +02:00
Add NVIDIA NIM CLI smoke matrix and tool schema aliasing
This commit is contained in:
@@ -49,6 +49,8 @@ FCC_SMOKE_MODEL_LLAMACPP=
|
||||
FCC_SMOKE_MODEL_OLLAMA=
|
||||
FCC_SMOKE_MODEL_KIMI=
|
||||
FCC_SMOKE_MODEL_WAFER=
|
||||
FCC_SMOKE_NIM_MODELS=
|
||||
FCC_SMOKE_NIM_EXTRA_MODELS=
|
||||
|
||||
|
||||
# Thinking output
|
||||
|
||||
@@ -12,10 +12,12 @@ from providers.defaults import NVIDIA_NIM_DEFAULT_BASE
|
||||
from providers.openai_compat import OpenAIChatTransport
|
||||
|
||||
from .request import (
|
||||
body_without_nim_tool_argument_aliases,
|
||||
build_request_body,
|
||||
clone_body_without_chat_template,
|
||||
clone_body_without_reasoning_budget,
|
||||
clone_body_without_reasoning_content,
|
||||
nim_tool_argument_aliases_from_body,
|
||||
)
|
||||
|
||||
|
||||
@@ -41,6 +43,14 @@ class NvidiaNimProvider(OpenAIChatTransport):
|
||||
thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
|
||||
)
|
||||
|
||||
def _prepare_create_body(self, body: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Strip private request metadata before calling NVIDIA NIM."""
|
||||
return body_without_nim_tool_argument_aliases(body)
|
||||
|
||||
def _tool_argument_aliases(self, body: dict[str, Any]) -> dict[str, dict[str, str]]:
|
||||
"""Return NIM tool argument aliases captured while building this request."""
|
||||
return nim_tool_argument_aliases_from_body(body)
|
||||
|
||||
def _get_retry_request_body(self, error: Exception, body: dict) -> dict | None:
|
||||
"""Retry once with a downgraded body when NIM rejects a known field."""
|
||||
status_code = getattr(error, "status_code", None)
|
||||
|
||||
@@ -34,6 +34,9 @@ _SCHEMA_LIST_KEYS = frozenset({"allOf", "anyOf", "oneOf", "prefixItems"})
|
||||
_SCHEMA_MAP_KEYS = frozenset(
|
||||
{"properties", "patternProperties", "$defs", "definitions", "dependentSchemas"}
|
||||
)
|
||||
NIM_TOOL_ARGUMENT_ALIASES_KEY = "_fcc_nim_tool_argument_aliases"
|
||||
_NIM_TOOL_PARAMETER_ALIAS_PREFIX = "_fcc_arg_"
|
||||
_NIM_UNSAFE_TOOL_PARAMETER_NAMES = frozenset({"type"})
|
||||
|
||||
|
||||
def _clone_strip_extra_body(
|
||||
@@ -123,12 +126,135 @@ def _sanitize_nim_schema_node(value: Any) -> tuple[bool, Any]:
|
||||
return True, value
|
||||
|
||||
|
||||
def _needs_nim_tool_parameter_alias(name: str) -> bool:
|
||||
return name in _NIM_UNSAFE_TOOL_PARAMETER_NAMES
|
||||
|
||||
|
||||
def _make_nim_tool_parameter_alias(name: str, reserved: set[str]) -> str:
|
||||
safe_tail = "".join(
|
||||
character if character.isalnum() or character == "_" else "_"
|
||||
for character in name
|
||||
).strip("_")
|
||||
if not safe_tail:
|
||||
safe_tail = "arg"
|
||||
candidate = f"{_NIM_TOOL_PARAMETER_ALIAS_PREFIX}{safe_tail}"
|
||||
alias = candidate
|
||||
suffix = 2
|
||||
while alias in reserved:
|
||||
alias = f"{candidate}_{suffix}"
|
||||
suffix += 1
|
||||
reserved.add(alias)
|
||||
return alias
|
||||
|
||||
|
||||
def _collect_nim_tool_property_names(value: Any) -> set[str]:
|
||||
names: set[str] = set()
|
||||
if isinstance(value, dict):
|
||||
properties = value.get("properties")
|
||||
if isinstance(properties, dict):
|
||||
for property_name, property_schema in properties.items():
|
||||
if isinstance(property_name, str):
|
||||
names.add(property_name)
|
||||
names.update(_collect_nim_tool_property_names(property_schema))
|
||||
for key, item in value.items():
|
||||
if key != "properties":
|
||||
names.update(_collect_nim_tool_property_names(item))
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
names.update(_collect_nim_tool_property_names(item))
|
||||
return names
|
||||
|
||||
|
||||
def _alias_nim_schema_property_names(
|
||||
value: Any,
|
||||
*,
|
||||
reserved: set[str],
|
||||
alias_to_original: dict[str, str],
|
||||
original_to_alias: dict[str, str],
|
||||
) -> Any:
|
||||
if isinstance(value, list):
|
||||
return [
|
||||
_alias_nim_schema_property_names(
|
||||
item,
|
||||
reserved=reserved,
|
||||
alias_to_original=alias_to_original,
|
||||
original_to_alias=original_to_alias,
|
||||
)
|
||||
for item in value
|
||||
]
|
||||
if not isinstance(value, dict):
|
||||
return value
|
||||
|
||||
local_aliases: dict[str, str] = {}
|
||||
aliased_value: dict[str, Any] = {}
|
||||
properties = value.get("properties")
|
||||
if isinstance(properties, dict):
|
||||
aliased_properties: dict[str, Any] = {}
|
||||
for property_name, property_schema in properties.items():
|
||||
aliased_schema = _alias_nim_schema_property_names(
|
||||
property_schema,
|
||||
reserved=reserved,
|
||||
alias_to_original=alias_to_original,
|
||||
original_to_alias=original_to_alias,
|
||||
)
|
||||
if isinstance(property_name, str) and _needs_nim_tool_parameter_alias(
|
||||
property_name
|
||||
):
|
||||
alias = original_to_alias.get(property_name)
|
||||
if alias is None:
|
||||
alias = _make_nim_tool_parameter_alias(property_name, reserved)
|
||||
alias_to_original[alias] = property_name
|
||||
original_to_alias[property_name] = alias
|
||||
local_aliases[property_name] = alias
|
||||
aliased_properties[alias] = aliased_schema
|
||||
else:
|
||||
aliased_properties[property_name] = aliased_schema
|
||||
aliased_value["properties"] = aliased_properties
|
||||
|
||||
for key, item in value.items():
|
||||
if key == "properties":
|
||||
continue
|
||||
if key == "required" and isinstance(item, list):
|
||||
aliased_value[key] = [
|
||||
local_aliases.get(required_item, required_item)
|
||||
if isinstance(required_item, str)
|
||||
else required_item
|
||||
for required_item in item
|
||||
]
|
||||
continue
|
||||
aliased_value[key] = _alias_nim_schema_property_names(
|
||||
item,
|
||||
reserved=reserved,
|
||||
alias_to_original=alias_to_original,
|
||||
original_to_alias=original_to_alias,
|
||||
)
|
||||
return aliased_value
|
||||
|
||||
|
||||
def _alias_nim_tool_parameters(
|
||||
parameters: dict[str, Any],
|
||||
) -> tuple[dict[str, Any], dict[str, str]]:
|
||||
alias_to_original: dict[str, str] = {}
|
||||
original_to_alias: dict[str, str] = {}
|
||||
reserved = _collect_nim_tool_property_names(parameters)
|
||||
aliased_parameters = _alias_nim_schema_property_names(
|
||||
parameters,
|
||||
reserved=reserved,
|
||||
alias_to_original=alias_to_original,
|
||||
original_to_alias=original_to_alias,
|
||||
)
|
||||
if not alias_to_original:
|
||||
return parameters, {}
|
||||
return aliased_parameters, alias_to_original
|
||||
|
||||
|
||||
def _sanitize_nim_tool_schemas(body: dict[str, Any]) -> None:
|
||||
"""Sanitize only tool parameter schemas, preserving tool calls/history."""
|
||||
tools = body.get("tools")
|
||||
if not isinstance(tools, list):
|
||||
return
|
||||
|
||||
tool_argument_aliases: dict[str, dict[str, str]] = {}
|
||||
sanitized_tools: list[Any] = []
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
@@ -141,11 +267,52 @@ def _sanitize_nim_tool_schemas(body: dict[str, Any]) -> None:
|
||||
parameters = function.get("parameters")
|
||||
if isinstance(parameters, dict):
|
||||
_, sanitized_parameters = _sanitize_nim_schema_node(parameters)
|
||||
sanitized_parameters, argument_aliases = _alias_nim_tool_parameters(
|
||||
sanitized_parameters
|
||||
)
|
||||
sanitized_function["parameters"] = sanitized_parameters
|
||||
tool_name = function.get("name")
|
||||
if argument_aliases and isinstance(tool_name, str) and tool_name:
|
||||
tool_argument_aliases[tool_name] = argument_aliases
|
||||
sanitized_tool["function"] = sanitized_function
|
||||
sanitized_tools.append(sanitized_tool)
|
||||
|
||||
body["tools"] = sanitized_tools
|
||||
if tool_argument_aliases:
|
||||
body[NIM_TOOL_ARGUMENT_ALIASES_KEY] = tool_argument_aliases
|
||||
else:
|
||||
body.pop(NIM_TOOL_ARGUMENT_ALIASES_KEY, None)
|
||||
|
||||
|
||||
def nim_tool_argument_aliases_from_body(
|
||||
body: dict[str, Any],
|
||||
) -> dict[str, dict[str, str]]:
|
||||
"""Return validated private NIM tool argument aliases from a built body."""
|
||||
raw_aliases = body.get(NIM_TOOL_ARGUMENT_ALIASES_KEY)
|
||||
if not isinstance(raw_aliases, dict):
|
||||
return {}
|
||||
|
||||
aliases: dict[str, dict[str, str]] = {}
|
||||
for tool_name, tool_aliases in raw_aliases.items():
|
||||
if not isinstance(tool_name, str) or not isinstance(tool_aliases, dict):
|
||||
continue
|
||||
sanitized_aliases = {
|
||||
alias: original
|
||||
for alias, original in tool_aliases.items()
|
||||
if isinstance(alias, str) and isinstance(original, str)
|
||||
}
|
||||
if sanitized_aliases:
|
||||
aliases[tool_name] = sanitized_aliases
|
||||
return aliases
|
||||
|
||||
|
||||
def body_without_nim_tool_argument_aliases(body: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Return a request body with private alias metadata stripped before upstream I/O."""
|
||||
if NIM_TOOL_ARGUMENT_ALIASES_KEY not in body:
|
||||
return body
|
||||
upstream_body = dict(body)
|
||||
upstream_body.pop(NIM_TOOL_ARGUMENT_ALIASES_KEY, None)
|
||||
return upstream_body
|
||||
|
||||
|
||||
def _set_extra(
|
||||
|
||||
+126
-7
@@ -128,11 +128,20 @@ class OpenAIChatTransport(BaseProvider):
|
||||
"""Return a modified request body for one retry, or None."""
|
||||
return None
|
||||
|
||||
def _prepare_create_body(self, body: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Return the body passed to the upstream OpenAI-compatible client."""
|
||||
return body
|
||||
|
||||
def _tool_argument_aliases(self, body: dict[str, Any]) -> dict[str, dict[str, str]]:
|
||||
"""Return provider-specific per-tool argument aliases for this request."""
|
||||
return {}
|
||||
|
||||
async def _create_stream(self, body: dict) -> tuple[Any, dict]:
|
||||
"""Create a streaming chat completion, optionally retrying once."""
|
||||
try:
|
||||
create_body = self._prepare_create_body(body)
|
||||
stream = await self._global_rate_limiter.execute_with_retry(
|
||||
self._client.chat.completions.create, **body, stream=True
|
||||
self._client.chat.completions.create, **create_body, stream=True
|
||||
)
|
||||
return stream, body
|
||||
except Exception as error:
|
||||
@@ -140,13 +149,49 @@ class OpenAIChatTransport(BaseProvider):
|
||||
if retry_body is None:
|
||||
raise
|
||||
|
||||
create_retry_body = self._prepare_create_body(retry_body)
|
||||
stream = await self._global_rate_limiter.execute_with_retry(
|
||||
self._client.chat.completions.create, **retry_body, stream=True
|
||||
self._client.chat.completions.create, **create_retry_body, stream=True
|
||||
)
|
||||
return stream, retry_body
|
||||
|
||||
def _restore_aliased_tool_arguments(
|
||||
self, argument_json: str, aliases: dict[str, str]
|
||||
) -> str | None:
|
||||
try:
|
||||
parsed = json.loads(argument_json)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
if not isinstance(parsed, dict):
|
||||
return argument_json
|
||||
restored = self._restore_aliased_tool_argument_value(parsed, aliases)
|
||||
return json.dumps(restored)
|
||||
|
||||
def _restore_aliased_tool_argument_value(
|
||||
self, value: Any, aliases: dict[str, str]
|
||||
) -> Any:
|
||||
if isinstance(value, dict):
|
||||
return {
|
||||
aliases.get(key, key): self._restore_aliased_tool_argument_value(
|
||||
item, aliases
|
||||
)
|
||||
for key, item in value.items()
|
||||
}
|
||||
if isinstance(value, list):
|
||||
return [
|
||||
self._restore_aliased_tool_argument_value(item, aliases)
|
||||
for item in value
|
||||
]
|
||||
return value
|
||||
|
||||
def _emit_tool_arg_delta(
|
||||
self, sse: SSEBuilder, tc_index: int, args: str
|
||||
self,
|
||||
sse: SSEBuilder,
|
||||
tc_index: int,
|
||||
args: str,
|
||||
*,
|
||||
tool_argument_aliases: dict[str, dict[str, str]] | None = None,
|
||||
tool_argument_alias_buffers: dict[int, str] | None = None,
|
||||
) -> Iterator[str]:
|
||||
"""Emit one argument fragment for a started tool block (Task buffer or raw JSON)."""
|
||||
if not args:
|
||||
@@ -159,9 +204,34 @@ class OpenAIChatTransport(BaseProvider):
|
||||
if parsed is not None:
|
||||
yield sse.emit_tool_delta(tc_index, json.dumps(parsed))
|
||||
return
|
||||
aliases = (
|
||||
tool_argument_aliases.get(state.name, {}) if tool_argument_aliases else {}
|
||||
)
|
||||
if aliases:
|
||||
if tool_argument_alias_buffers is None:
|
||||
restored = self._restore_aliased_tool_arguments(args, aliases)
|
||||
if restored is not None:
|
||||
yield sse.emit_tool_delta(tc_index, restored)
|
||||
return
|
||||
|
||||
buffered_args = tool_argument_alias_buffers.get(tc_index, "") + args
|
||||
restored = self._restore_aliased_tool_arguments(buffered_args, aliases)
|
||||
if restored is None:
|
||||
tool_argument_alias_buffers[tc_index] = buffered_args
|
||||
return
|
||||
tool_argument_alias_buffers.pop(tc_index, None)
|
||||
yield sse.emit_tool_delta(tc_index, restored)
|
||||
return
|
||||
yield sse.emit_tool_delta(tc_index, args)
|
||||
|
||||
def _process_tool_call(self, tc: dict, sse: SSEBuilder) -> Iterator[str]:
|
||||
def _process_tool_call(
|
||||
self,
|
||||
tc: dict,
|
||||
sse: SSEBuilder,
|
||||
*,
|
||||
tool_argument_aliases: dict[str, dict[str, str]] | None = None,
|
||||
tool_argument_alias_buffers: dict[int, str] | None = None,
|
||||
) -> Iterator[str]:
|
||||
"""Process a single tool call delta and yield SSE events."""
|
||||
tc_index = tc.get("index", 0)
|
||||
if tc_index < 0:
|
||||
@@ -193,7 +263,13 @@ class OpenAIChatTransport(BaseProvider):
|
||||
if state.pre_start_args:
|
||||
pre = state.pre_start_args
|
||||
state.pre_start_args = ""
|
||||
yield from self._emit_tool_arg_delta(sse, tc_index, pre)
|
||||
yield from self._emit_tool_arg_delta(
|
||||
sse,
|
||||
tc_index,
|
||||
pre,
|
||||
tool_argument_aliases=tool_argument_aliases,
|
||||
tool_argument_alias_buffers=tool_argument_alias_buffers,
|
||||
)
|
||||
|
||||
state = sse.blocks.tool_states.get(tc_index)
|
||||
if not arguments:
|
||||
@@ -204,13 +280,43 @@ class OpenAIChatTransport(BaseProvider):
|
||||
state.pre_start_args += arguments
|
||||
return
|
||||
|
||||
yield from self._emit_tool_arg_delta(sse, tc_index, arguments)
|
||||
yield from self._emit_tool_arg_delta(
|
||||
sse,
|
||||
tc_index,
|
||||
arguments,
|
||||
tool_argument_aliases=tool_argument_aliases,
|
||||
tool_argument_alias_buffers=tool_argument_alias_buffers,
|
||||
)
|
||||
|
||||
def _flush_task_arg_buffers(self, sse: SSEBuilder) -> Iterator[str]:
|
||||
"""Emit buffered Task args as a single JSON delta (best-effort)."""
|
||||
for tool_index, out in sse.blocks.flush_task_arg_buffers():
|
||||
yield sse.emit_tool_delta(tool_index, out)
|
||||
|
||||
def _flush_tool_argument_alias_buffers(
|
||||
self,
|
||||
sse: SSEBuilder,
|
||||
tool_argument_aliases: dict[str, dict[str, str]],
|
||||
tool_argument_alias_buffers: dict[int, str],
|
||||
) -> Iterator[str]:
|
||||
"""Emit remaining aliased tool args without losing data on malformed JSON."""
|
||||
for tool_index, buffered_args in list(tool_argument_alias_buffers.items()):
|
||||
if not buffered_args:
|
||||
tool_argument_alias_buffers.pop(tool_index, None)
|
||||
continue
|
||||
state = sse.blocks.tool_states.get(tool_index)
|
||||
if state is None or state.name == "Task":
|
||||
continue
|
||||
aliases = tool_argument_aliases.get(state.name, {})
|
||||
if not aliases:
|
||||
continue
|
||||
restored = self._restore_aliased_tool_arguments(buffered_args, aliases)
|
||||
yield sse.emit_tool_delta(
|
||||
tool_index,
|
||||
restored if restored is not None else buffered_args,
|
||||
)
|
||||
tool_argument_alias_buffers.pop(tool_index, None)
|
||||
|
||||
async def stream_response(
|
||||
self,
|
||||
request: Any,
|
||||
@@ -262,10 +368,13 @@ class OpenAIChatTransport(BaseProvider):
|
||||
heuristic_parser = HeuristicToolParser()
|
||||
finish_reason = None
|
||||
usage_info = None
|
||||
tool_argument_aliases: dict[str, dict[str, str]] = {}
|
||||
tool_argument_alias_buffers: dict[int, str] = {}
|
||||
|
||||
async with self._global_rate_limiter.concurrency_slot():
|
||||
try:
|
||||
stream, body = await self._create_stream(body)
|
||||
tool_argument_aliases = self._tool_argument_aliases(body)
|
||||
async for chunk in stream:
|
||||
if getattr(chunk, "usage", None):
|
||||
usage_info = chunk.usage
|
||||
@@ -335,7 +444,12 @@ class OpenAIChatTransport(BaseProvider):
|
||||
"arguments": tc.function.arguments,
|
||||
},
|
||||
}
|
||||
for event in self._process_tool_call(tc_info, sse):
|
||||
for event in self._process_tool_call(
|
||||
tc_info,
|
||||
sse,
|
||||
tool_argument_aliases=tool_argument_aliases,
|
||||
tool_argument_alias_buffers=tool_argument_alias_buffers,
|
||||
):
|
||||
yield event
|
||||
|
||||
except asyncio.CancelledError, GeneratorExit:
|
||||
@@ -409,6 +523,11 @@ class OpenAIChatTransport(BaseProvider):
|
||||
yield event
|
||||
yield sse.emit_text_delta(" ")
|
||||
|
||||
for event in self._flush_tool_argument_alias_buffers(
|
||||
sse, tool_argument_aliases, tool_argument_alias_buffers
|
||||
):
|
||||
yield event
|
||||
|
||||
for event in self._flush_task_arg_buffers(sse):
|
||||
yield event
|
||||
|
||||
|
||||
+21
-4
@@ -58,10 +58,11 @@ Default targets do not send real bot messages or load voice backends:
|
||||
| `llamacpp` | local `/models` plus native `/messages` through proxy | running llama-server |
|
||||
| `ollama` | local `/api/tags` plus native Anthropic messages through proxy | running Ollama server |
|
||||
|
||||
Side-effectful targets are opt-in:
|
||||
Heavy/side-effectful targets are opt-in:
|
||||
|
||||
| Target | Product scenarios | Required environment |
|
||||
| --- | --- | --- |
|
||||
| `nvidia_nim_cli` | Claude Code CLI feature matrix across NIM models | `NVIDIA_NIM_API_KEY`, Claude CLI |
|
||||
| `telegram` | getMe, send, edit, delete, optional manual inbound | token and chat/user ID |
|
||||
| `discord` | channel access, send, edit, delete, optional manual inbound | token and channel ID |
|
||||
| `voice` | generated WAV through local Whisper or NVIDIA NIM transcription | `VOICE_NOTE_ENABLED=true`, `FCC_SMOKE_RUN_VOICE=1` |
|
||||
@@ -88,6 +89,13 @@ $env:FCC_SMOKE_RUN_VOICE = "1"
|
||||
uv run pytest smoke/product -n 0 -s --tb=short
|
||||
```
|
||||
|
||||
```powershell
|
||||
$env:FCC_LIVE_SMOKE = "1"
|
||||
$env:FCC_SMOKE_TARGETS = "nvidia_nim_cli"
|
||||
$env:FCC_SMOKE_NIM_MODELS = "z-ai/glm-5.1,moonshotai/kimi-k2.6,minimaxai/minimax-m2.7,nvidia/nemotron-3-super-120b-a12b,deepseek-ai/deepseek-v4-pro,deepseek-ai/deepseek-v4-flash"
|
||||
uv run pytest smoke/product -n 0 -s --tb=short
|
||||
```
|
||||
|
||||
```powershell
|
||||
$env:FCC_LIVE_SMOKE = "1"
|
||||
$env:FCC_SMOKE_TARGETS = "messaging,config,extensibility"
|
||||
@@ -106,6 +114,10 @@ uv run pytest smoke/product -n 0 -s --tb=short
|
||||
`FCC_SMOKE_MODEL_LLAMACPP`, `FCC_SMOKE_MODEL_OLLAMA`: optional per-provider
|
||||
smoke model overrides. Values may include the provider prefix or just the model
|
||||
name for that provider.
|
||||
- `FCC_SMOKE_NIM_MODELS`: optional comma-separated NVIDIA NIM CLI matrix models
|
||||
that replace the default characterization set.
|
||||
- `FCC_SMOKE_NIM_EXTRA_MODELS`: optional comma-separated NVIDIA NIM CLI matrix
|
||||
models appended to the default or replacement set.
|
||||
- `FCC_SMOKE_TIMEOUT_S`: per-request/subprocess timeout, default `45`.
|
||||
- `FCC_SMOKE_CLAUDE_BIN`: Claude CLI executable name, default `claude`.
|
||||
- `FCC_SMOKE_TELEGRAM_CHAT_ID`: Telegram chat/user ID for send/edit/delete.
|
||||
@@ -129,10 +141,15 @@ names contain `KEY`, `TOKEN`, `SECRET`, `WEBHOOK`, or `AUTH`.
|
||||
opt-in flag is absent.
|
||||
- `upstream_unavailable`: a real provider, bot API, or local model server is not
|
||||
reachable.
|
||||
- `probe_timeout`: the smoke driver reached the target, but the CLI/probe did
|
||||
not complete within the smoke timeout.
|
||||
- `product_failure`: the app accepted the scenario but returned the wrong shape,
|
||||
crashed, leaked state, or violated the product contract.
|
||||
- `harness_bug`: the smoke test or driver made an invalid assumption.
|
||||
- `target_disabled`: skipped because `FCC_SMOKE_TARGETS` intentionally selected
|
||||
a different target.
|
||||
|
||||
`product_failure` and `harness_bug` are failures. `missing_env` and
|
||||
`upstream_unavailable` are skips except when the user explicitly selected a
|
||||
provider in `FCC_SMOKE_PROVIDER_MATRIX`; selected-but-missing providers fail.
|
||||
`product_failure` and `harness_bug` are failures. `missing_env`,
|
||||
`upstream_unavailable`, and `probe_timeout` are skips except when the user
|
||||
explicitly selected a provider in `FCC_SMOKE_PROVIDER_MATRIX`;
|
||||
selected-but-missing providers fail.
|
||||
|
||||
@@ -411,7 +411,7 @@ CAPABILITY_CONTRACTS: tuple[CapabilityContract, ...] = (
|
||||
"stream-json events and session id mapping",
|
||||
"stderr/error event and process cleanup",
|
||||
("tests/cli/test_cli.py",),
|
||||
("test_claude_cli_prompt_when_available",),
|
||||
("test_claude_cli_prompt_when_available", "test_nvidia_nim_cli_matrix_e2e"),
|
||||
),
|
||||
CapabilityContract(
|
||||
"extensibility",
|
||||
|
||||
+5
-3
@@ -72,10 +72,11 @@ FEATURE_INVENTORY: tuple[FeatureCoverage, ...] = (
|
||||
(
|
||||
"test_api_basic_conversation_e2e",
|
||||
"test_claude_cli_adaptive_thinking_e2e",
|
||||
"test_nvidia_nim_cli_matrix_e2e",
|
||||
"test_vscode_protocol_e2e",
|
||||
"test_jetbrains_protocol_e2e",
|
||||
),
|
||||
("api", "cli", "clients"),
|
||||
("api", "cli", "clients", "nvidia_nim_cli"),
|
||||
("configured provider", "FCC_SMOKE_CLAUDE_BIN for real Claude CLI"),
|
||||
"skip real CLI when binary is absent; configured providers must pass",
|
||||
),
|
||||
@@ -384,9 +385,10 @@ FEATURE_INVENTORY: tuple[FeatureCoverage, ...] = (
|
||||
(
|
||||
"test_claude_cli_adaptive_thinking_e2e",
|
||||
"test_claude_cli_multiturn_tool_protocol_e2e",
|
||||
"test_nvidia_nim_cli_matrix_e2e",
|
||||
),
|
||||
("cli",),
|
||||
("FCC_SMOKE_CLAUDE_BIN", "configured provider"),
|
||||
("cli", "nvidia_nim_cli"),
|
||||
("FCC_SMOKE_CLAUDE_BIN", "configured provider", "NVIDIA_NIM_API_KEY"),
|
||||
"skip only when Claude CLI binary is absent",
|
||||
),
|
||||
FeatureCoverage(
|
||||
|
||||
+59
-1
@@ -28,9 +28,11 @@ DEFAULT_TARGETS = frozenset(
|
||||
}
|
||||
)
|
||||
SIDE_EFFECT_TARGETS = frozenset({"discord", "telegram", "voice"})
|
||||
ALL_TARGETS = DEFAULT_TARGETS | SIDE_EFFECT_TARGETS
|
||||
OPT_IN_TARGETS = frozenset({"nvidia_nim_cli"})
|
||||
ALL_TARGETS = DEFAULT_TARGETS | SIDE_EFFECT_TARGETS | OPT_IN_TARGETS
|
||||
TARGET_ALIASES = {
|
||||
"contract": "api",
|
||||
"nim_cli": "nvidia_nim_cli",
|
||||
"optimizations": "api",
|
||||
"thinking": "providers",
|
||||
"vscode": "clients",
|
||||
@@ -47,6 +49,15 @@ PROVIDER_SMOKE_DEFAULT_MODELS: dict[str, str] = {
|
||||
"wafer": "wafer/DeepSeek-V4-Pro",
|
||||
}
|
||||
|
||||
NVIDIA_NIM_CLI_DEFAULT_MODELS: tuple[str, ...] = (
|
||||
"z-ai/glm-5.1",
|
||||
"moonshotai/kimi-k2.6",
|
||||
"minimaxai/minimax-m2.7",
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
"deepseek-ai/deepseek-v4-pro",
|
||||
"deepseek-ai/deepseek-v4-flash",
|
||||
)
|
||||
|
||||
|
||||
TARGET_REQUIRED_ENV: dict[str, tuple[str, ...]] = {
|
||||
"api": (),
|
||||
@@ -62,6 +73,10 @@ TARGET_REQUIRED_ENV: dict[str, tuple[str, ...]] = {
|
||||
"lmstudio": ("LM_STUDIO_BASE_URL with a running LM Studio server",),
|
||||
"llamacpp": ("LLAMACPP_BASE_URL with a running llama-server",),
|
||||
"ollama": ("OLLAMA_BASE_URL with a running Ollama server",),
|
||||
"nvidia_nim_cli": (
|
||||
"NVIDIA_NIM_API_KEY",
|
||||
"FCC_SMOKE_CLAUDE_BIN or claude on PATH",
|
||||
),
|
||||
"telegram": (
|
||||
"TELEGRAM_BOT_TOKEN",
|
||||
"ALLOWED_TELEGRAM_USER_ID or FCC_SMOKE_TELEGRAM_CHAT_ID",
|
||||
@@ -161,6 +176,13 @@ class SmokeConfig:
|
||||
)
|
||||
return models
|
||||
|
||||
def nvidia_nim_cli_models(self) -> list[ProviderModel]:
|
||||
"""Return the NVIDIA NIM models for Claude Code CLI characterization."""
|
||||
return [
|
||||
ProviderModel(provider="nvidia_nim", full_model=full_model, source=source)
|
||||
for full_model, source in nvidia_nim_cli_model_refs().items()
|
||||
]
|
||||
|
||||
def _include_provider_in_smoke(
|
||||
self, provider: str, mapped_providers: set[str]
|
||||
) -> bool:
|
||||
@@ -197,6 +219,12 @@ def _parse_csv(raw: str | None) -> frozenset[str]:
|
||||
return frozenset(part.strip() for part in raw.split(",") if part.strip())
|
||||
|
||||
|
||||
def _parse_csv_ordered(raw: str | None) -> tuple[str, ...]:
|
||||
if not raw:
|
||||
return ()
|
||||
return tuple(part.strip() for part in raw.split(",") if part.strip())
|
||||
|
||||
|
||||
def _parse_targets(raw: str | None) -> frozenset[str]:
|
||||
if not raw:
|
||||
return DEFAULT_TARGETS
|
||||
@@ -237,6 +265,36 @@ def _normalize_provider_model(provider: str, raw_model: str) -> str:
|
||||
return f"{provider}/{model}"
|
||||
|
||||
|
||||
def nvidia_nim_cli_model_refs(
|
||||
env: Mapping[str, str] | None = None,
|
||||
) -> dict[str, str]:
|
||||
"""Return normalized NIM CLI matrix model refs in deterministic order.
|
||||
|
||||
Values are returned as ``full_model -> source`` so callers can preserve both
|
||||
de-duplicated order and provenance in reports.
|
||||
"""
|
||||
source = env if env is not None else os.environ
|
||||
explicit_models = _parse_csv_ordered(source.get("FCC_SMOKE_NIM_MODELS"))
|
||||
extra_models = _parse_csv_ordered(source.get("FCC_SMOKE_NIM_EXTRA_MODELS"))
|
||||
|
||||
if "FCC_SMOKE_NIM_MODELS" in source and not explicit_models:
|
||||
raise ValueError("FCC_SMOKE_NIM_MODELS must list at least one model")
|
||||
|
||||
models: list[tuple[str, str]] = []
|
||||
base_models = explicit_models or NVIDIA_NIM_CLI_DEFAULT_MODELS
|
||||
base_source = (
|
||||
"FCC_SMOKE_NIM_MODELS" if explicit_models else "nvidia_nim_cli_default"
|
||||
)
|
||||
models.extend((model, base_source) for model in base_models)
|
||||
models.extend((model, "FCC_SMOKE_NIM_EXTRA_MODELS") for model in extra_models)
|
||||
|
||||
normalized: dict[str, str] = {}
|
||||
for raw_model, model_source in models:
|
||||
full_model = _normalize_provider_model("nvidia_nim", raw_model)
|
||||
normalized.setdefault(full_model, model_source)
|
||||
return normalized
|
||||
|
||||
|
||||
def auth_headers(token: str | None = None) -> dict[str, str]:
|
||||
settings = get_settings()
|
||||
resolved = token if token is not None else settings.anthropic_auth_token
|
||||
|
||||
@@ -0,0 +1,350 @@
|
||||
"""Claude Code CLI characterization helpers for NVIDIA NIM smoke tests."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from smoke.lib.config import SmokeConfig, redacted
|
||||
from smoke.lib.server import RunningServer
|
||||
|
||||
REGRESSION_CLASSIFICATIONS = frozenset({"harness_bug", "product_failure"})
|
||||
|
||||
_HTTP_REGRESSION_PATTERNS = (
|
||||
r'POST /v1/messages[^"\n]* HTTP/1\.1" 4(?!01|03|04|08|09)\d\d',
|
||||
r'POST /v1/messages[^"\n]* HTTP/1\.1" 5\d\d',
|
||||
)
|
||||
_UPSTREAM_UNAVAILABLE_MARKERS = (
|
||||
"upstream_unavailable",
|
||||
"readtimeout",
|
||||
"connecterror",
|
||||
"connection refused",
|
||||
"timed out",
|
||||
"rate limit",
|
||||
"429",
|
||||
"overloaded",
|
||||
"capacity",
|
||||
"upstream provider",
|
||||
)
|
||||
_MISSING_ENV_MARKERS = (
|
||||
"api key",
|
||||
"not logged in",
|
||||
"authentication",
|
||||
"permission denied",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ClaudeCliRun:
|
||||
command: tuple[str, ...]
|
||||
returncode: int | None
|
||||
stdout: str
|
||||
stderr: str
|
||||
duration_s: float
|
||||
timed_out: bool = False
|
||||
|
||||
@property
|
||||
def combined_output(self) -> str:
|
||||
return f"{self.stdout}\n{self.stderr}"
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class NimCliMatrixOutcome:
|
||||
model: str
|
||||
full_model: str
|
||||
source: str
|
||||
feature: str
|
||||
outcome: str
|
||||
classification: str
|
||||
duration_s: float
|
||||
cli_returncode: int | None
|
||||
token_evidence: dict[str, Any]
|
||||
request_count: int
|
||||
log_path: str
|
||||
stdout_excerpt: str
|
||||
stderr_excerpt: str
|
||||
log_excerpt: str
|
||||
|
||||
|
||||
def run_claude_cli(
|
||||
*,
|
||||
claude_bin: str,
|
||||
server: RunningServer,
|
||||
config: SmokeConfig,
|
||||
cwd: Path,
|
||||
prompt: str,
|
||||
tools: str | None,
|
||||
extra_args: tuple[str, ...] = (),
|
||||
session_id: str | None = None,
|
||||
resume_session_id: str | None = None,
|
||||
no_session_persistence: bool = True,
|
||||
) -> ClaudeCliRun:
|
||||
"""Run Claude Code CLI against the local smoke proxy."""
|
||||
cwd.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
cmd: list[str] = [claude_bin, "--bare"]
|
||||
if resume_session_id:
|
||||
cmd.extend(["--resume", resume_session_id])
|
||||
if session_id:
|
||||
cmd.extend(["--session-id", session_id])
|
||||
cmd.extend(
|
||||
[
|
||||
"--output-format",
|
||||
"stream-json",
|
||||
"--include-partial-messages",
|
||||
"--verbose",
|
||||
"--permission-mode",
|
||||
"bypassPermissions",
|
||||
"--dangerously-skip-permissions",
|
||||
"--model",
|
||||
"sonnet",
|
||||
]
|
||||
)
|
||||
if no_session_persistence:
|
||||
cmd.append("--no-session-persistence")
|
||||
if tools is not None:
|
||||
cmd.extend(["--tools", tools])
|
||||
if tools:
|
||||
cmd.extend(["--allowedTools", tools])
|
||||
cmd.extend(extra_args)
|
||||
cmd.extend(["-p", prompt])
|
||||
|
||||
env = os.environ.copy()
|
||||
env["ANTHROPIC_BASE_URL"] = server.base_url
|
||||
env["ANTHROPIC_API_URL"] = f"{server.base_url}/v1"
|
||||
env.setdefault("ANTHROPIC_API_KEY", "sk-smoke-proxy")
|
||||
if config.settings.anthropic_auth_token:
|
||||
env["ANTHROPIC_AUTH_TOKEN"] = config.settings.anthropic_auth_token
|
||||
env["TERM"] = "dumb"
|
||||
env["NO_COLOR"] = "1"
|
||||
env["PYTHONIOENCODING"] = "utf-8"
|
||||
|
||||
started = time.monotonic()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=config.timeout_s,
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
return ClaudeCliRun(
|
||||
command=tuple(cmd),
|
||||
returncode=None,
|
||||
stdout=_coerce_timeout_text(exc.stdout),
|
||||
stderr=_coerce_timeout_text(exc.stderr),
|
||||
duration_s=time.monotonic() - started,
|
||||
timed_out=True,
|
||||
)
|
||||
|
||||
return ClaudeCliRun(
|
||||
command=tuple(cmd),
|
||||
returncode=result.returncode,
|
||||
stdout=result.stdout,
|
||||
stderr=result.stderr,
|
||||
duration_s=time.monotonic() - started,
|
||||
)
|
||||
|
||||
|
||||
def read_log_offset(log_path: Path) -> int:
|
||||
"""Return the current text length of a smoke server log."""
|
||||
if not log_path.is_file():
|
||||
return 0
|
||||
return len(log_path.read_text(encoding="utf-8", errors="replace"))
|
||||
|
||||
|
||||
def read_log_delta(log_path: Path, offset: int) -> str:
|
||||
"""Return smoke server log text written after ``offset``."""
|
||||
if not log_path.is_file():
|
||||
return ""
|
||||
text = log_path.read_text(encoding="utf-8", errors="replace")
|
||||
return text[offset:]
|
||||
|
||||
|
||||
def token_evidence(
|
||||
*,
|
||||
feature: str,
|
||||
marker: str,
|
||||
run: ClaudeCliRun,
|
||||
log_delta: str,
|
||||
) -> dict[str, Any]:
|
||||
"""Collect compact evidence for a CLI feature probe."""
|
||||
combined = f"{run.combined_output}\n{log_delta}"
|
||||
lower = combined.lower()
|
||||
return {
|
||||
"feature": feature,
|
||||
"marker_present": bool(marker and marker in combined),
|
||||
"thinking_delta_count": combined.count("thinking_delta"),
|
||||
"tool_use_count": combined.count('"tool_use"'),
|
||||
"tool_result_count": combined.count('"tool_result"'),
|
||||
"task_tool_count": combined.count('"name": "Task"')
|
||||
+ combined.count('"name":"Task"'),
|
||||
"run_in_background_false": "run_in_background" in combined and "false" in lower,
|
||||
"compact_boundary": "compact_boundary" in combined,
|
||||
"compact_metadata": "compact_metadata" in combined,
|
||||
"http_422": 'HTTP/1.1" 422' in combined,
|
||||
"http_500": bool(re.search(r'HTTP/1\.1" 5\d\d', combined)),
|
||||
"timed_out": run.timed_out,
|
||||
}
|
||||
|
||||
|
||||
def classify_probe(
|
||||
*,
|
||||
run: ClaudeCliRun,
|
||||
log_delta: str,
|
||||
marker: str,
|
||||
requires_tool_result: bool = False,
|
||||
requires_task: bool = False,
|
||||
requires_compact: bool = False,
|
||||
) -> tuple[str, str]:
|
||||
"""Classify a probe without failing compatibility characterization failures."""
|
||||
combined = f"{run.combined_output}\n{log_delta}"
|
||||
lower = combined.lower()
|
||||
|
||||
if _has_proxy_regression(log_delta):
|
||||
return "failed", "product_failure"
|
||||
if run.returncode != 0 and any(
|
||||
marker_text in lower for marker_text in _MISSING_ENV_MARKERS
|
||||
):
|
||||
return "skipped", "missing_env"
|
||||
if run.timed_out:
|
||||
return "failed", "probe_timeout"
|
||||
|
||||
marker_ok = not marker or marker in combined
|
||||
tool_ok = not requires_tool_result or '"tool_result"' in combined
|
||||
task_ok = not requires_task or (
|
||||
('"name": "Task"' in combined or '"name":"Task"' in combined)
|
||||
and "run_in_background" in combined
|
||||
and "false" in lower
|
||||
)
|
||||
compact_ok = not requires_compact or (
|
||||
"compact_boundary" in combined
|
||||
or "compact_metadata" in combined
|
||||
or "/compact" in combined
|
||||
or "compact" in lower
|
||||
)
|
||||
cli_ok = run.returncode == 0
|
||||
|
||||
if cli_ok and marker_ok and tool_ok and task_ok and compact_ok:
|
||||
return "passed", "passed"
|
||||
if any(marker_text in lower for marker_text in _UPSTREAM_UNAVAILABLE_MARKERS):
|
||||
return "failed", "upstream_unavailable"
|
||||
if not _has_proxy_request(log_delta):
|
||||
return "failed", "harness_bug"
|
||||
return "failed", "model_feature_failure"
|
||||
|
||||
|
||||
def make_outcome(
|
||||
*,
|
||||
model: str,
|
||||
full_model: str,
|
||||
source: str,
|
||||
feature: str,
|
||||
marker: str,
|
||||
run: ClaudeCliRun,
|
||||
log_delta: str,
|
||||
log_path: Path,
|
||||
requires_tool_result: bool = False,
|
||||
requires_task: bool = False,
|
||||
requires_compact: bool = False,
|
||||
) -> NimCliMatrixOutcome:
|
||||
"""Build one report outcome from a CLI run and its server log delta."""
|
||||
outcome, classification = classify_probe(
|
||||
run=run,
|
||||
log_delta=log_delta,
|
||||
marker=marker,
|
||||
requires_tool_result=requires_tool_result,
|
||||
requires_task=requires_task,
|
||||
requires_compact=requires_compact,
|
||||
)
|
||||
evidence = token_evidence(
|
||||
feature=feature,
|
||||
marker=marker,
|
||||
run=run,
|
||||
log_delta=log_delta,
|
||||
)
|
||||
return NimCliMatrixOutcome(
|
||||
model=model,
|
||||
full_model=full_model,
|
||||
source=source,
|
||||
feature=feature,
|
||||
outcome=outcome,
|
||||
classification=classification,
|
||||
duration_s=round(run.duration_s, 3),
|
||||
cli_returncode=run.returncode,
|
||||
token_evidence=evidence,
|
||||
request_count=_request_count(log_delta),
|
||||
log_path=str(log_path),
|
||||
stdout_excerpt=_excerpt(run.stdout),
|
||||
stderr_excerpt=_excerpt(run.stderr),
|
||||
log_excerpt=_excerpt(log_delta),
|
||||
)
|
||||
|
||||
|
||||
def write_matrix_report(
|
||||
config: SmokeConfig,
|
||||
outcomes: list[NimCliMatrixOutcome],
|
||||
) -> Path:
|
||||
"""Write the NVIDIA NIM CLI compatibility matrix report."""
|
||||
config.results_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = (
|
||||
config.results_dir
|
||||
/ f"nvidia-nim-cli-matrix-{config.worker_id}-{int(time.time())}.json"
|
||||
)
|
||||
payload = {
|
||||
"started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
"worker_id": config.worker_id,
|
||||
"target": "nvidia_nim_cli",
|
||||
"models": sorted({outcome.full_model for outcome in outcomes}),
|
||||
"outcomes": [asdict(outcome) for outcome in outcomes],
|
||||
}
|
||||
path.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
|
||||
return path
|
||||
|
||||
|
||||
def regression_failures(outcomes: list[NimCliMatrixOutcome]) -> list[str]:
|
||||
"""Return report lines for classifications that should fail pytest."""
|
||||
return [
|
||||
f"{outcome.full_model} {outcome.feature}: {outcome.classification}"
|
||||
for outcome in outcomes
|
||||
if outcome.classification in REGRESSION_CLASSIFICATIONS
|
||||
]
|
||||
|
||||
|
||||
def _has_proxy_regression(log_delta: str) -> bool:
|
||||
if "CREATE_MESSAGE_ERROR" in log_delta:
|
||||
return True
|
||||
return any(re.search(pattern, log_delta) for pattern in _HTTP_REGRESSION_PATTERNS)
|
||||
|
||||
|
||||
def _has_proxy_request(log_delta: str) -> bool:
|
||||
return "POST /v1/messages" in log_delta or "API_REQUEST:" in log_delta
|
||||
|
||||
|
||||
def _request_count(log_delta: str) -> int:
|
||||
access_log_count = log_delta.count("POST /v1/messages")
|
||||
service_log_count = log_delta.count("API_REQUEST:")
|
||||
return max(access_log_count, service_log_count)
|
||||
|
||||
|
||||
def _excerpt(value: str, *, max_chars: int = 2400) -> str:
|
||||
if len(value) <= max_chars:
|
||||
return redacted(value)
|
||||
return redacted(value[-max_chars:])
|
||||
|
||||
|
||||
def _coerce_timeout_text(value: str | bytes | None) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, bytes):
|
||||
return value.decode("utf-8", errors="replace")
|
||||
return value
|
||||
@@ -69,6 +69,8 @@ def classify_outcome(*, nodeid: str, outcome: str, detail: str) -> str:
|
||||
|
||||
text = f"{nodeid}\n{detail}".lower()
|
||||
if outcome == "skipped":
|
||||
if "smoke target disabled" in text:
|
||||
return "target_disabled"
|
||||
if any(
|
||||
marker in text
|
||||
for marker in (
|
||||
|
||||
@@ -0,0 +1,325 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from smoke.lib.config import ProviderModel, SmokeConfig
|
||||
from smoke.lib.e2e import SmokeServerDriver
|
||||
from smoke.lib.nvidia_nim_cli import (
|
||||
ClaudeCliRun,
|
||||
NimCliMatrixOutcome,
|
||||
make_outcome,
|
||||
read_log_delta,
|
||||
read_log_offset,
|
||||
regression_failures,
|
||||
run_claude_cli,
|
||||
write_matrix_report,
|
||||
)
|
||||
from smoke.lib.server import RunningServer
|
||||
|
||||
pytestmark = [pytest.mark.live, pytest.mark.smoke_target("nvidia_nim_cli")]
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_matrix_e2e(smoke_config: SmokeConfig, tmp_path: Path) -> None:
|
||||
if not smoke_config.has_provider_configuration("nvidia_nim"):
|
||||
pytest.skip("missing_env: NVIDIA_NIM_API_KEY is not configured")
|
||||
|
||||
claude_bin = shutil.which(smoke_config.claude_bin)
|
||||
if not claude_bin:
|
||||
pytest.skip(f"missing_env: Claude CLI not found: {smoke_config.claude_bin}")
|
||||
|
||||
provider_models = smoke_config.nvidia_nim_cli_models()
|
||||
if not provider_models:
|
||||
pytest.skip("missing_env: no NVIDIA NIM CLI smoke models configured")
|
||||
|
||||
outcomes: list[NimCliMatrixOutcome] = []
|
||||
for provider_model in provider_models:
|
||||
with SmokeServerDriver(
|
||||
smoke_config,
|
||||
name=f"product-nvidia-nim-cli-{_slug(provider_model.model_name)}",
|
||||
env_overrides={
|
||||
"MODEL": provider_model.full_model,
|
||||
"MESSAGING_PLATFORM": "none",
|
||||
"ENABLE_MODEL_THINKING": "true",
|
||||
"LOG_RAW_API_PAYLOADS": "true",
|
||||
"LOG_RAW_SSE_EVENTS": "true",
|
||||
},
|
||||
).run() as server:
|
||||
model_dir = tmp_path / _slug(provider_model.model_name)
|
||||
outcomes.extend(
|
||||
[
|
||||
_basic_text(
|
||||
claude_bin, server, smoke_config, provider_model, model_dir
|
||||
),
|
||||
_thinking(
|
||||
claude_bin, server, smoke_config, provider_model, model_dir
|
||||
),
|
||||
_tool_use_roundtrip(
|
||||
claude_bin, server, smoke_config, provider_model, model_dir
|
||||
),
|
||||
_interleaved_thinking_tool(
|
||||
claude_bin, server, smoke_config, provider_model, model_dir
|
||||
),
|
||||
_subagent_task(
|
||||
claude_bin, server, smoke_config, provider_model, model_dir
|
||||
),
|
||||
_compact_command(
|
||||
claude_bin, server, smoke_config, provider_model, model_dir
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
report_path = write_matrix_report(smoke_config, outcomes)
|
||||
failures = regression_failures(outcomes)
|
||||
assert not failures, (
|
||||
f"NVIDIA NIM CLI matrix regressions written to {report_path}:\n"
|
||||
+ "\n".join(failures)
|
||||
)
|
||||
|
||||
|
||||
def _basic_text(
|
||||
claude_bin: str,
|
||||
server: RunningServer,
|
||||
smoke_config: SmokeConfig,
|
||||
provider_model: ProviderModel,
|
||||
model_dir: Path,
|
||||
) -> NimCliMatrixOutcome:
|
||||
marker = _marker("BASIC")
|
||||
return _run_probe(
|
||||
claude_bin=claude_bin,
|
||||
server=server,
|
||||
smoke_config=smoke_config,
|
||||
provider_model=provider_model,
|
||||
workspace=model_dir / "basic_text",
|
||||
feature="basic_text",
|
||||
marker=marker,
|
||||
prompt=f"Reply with exactly {marker} and no other text.",
|
||||
tools="",
|
||||
)
|
||||
|
||||
|
||||
def _thinking(
|
||||
claude_bin: str,
|
||||
server: RunningServer,
|
||||
smoke_config: SmokeConfig,
|
||||
provider_model: ProviderModel,
|
||||
model_dir: Path,
|
||||
) -> NimCliMatrixOutcome:
|
||||
marker = _marker("THINK")
|
||||
return _run_probe(
|
||||
claude_bin=claude_bin,
|
||||
server=server,
|
||||
smoke_config=smoke_config,
|
||||
provider_model=provider_model,
|
||||
workspace=model_dir / "thinking",
|
||||
feature="thinking",
|
||||
marker=marker,
|
||||
prompt=(
|
||||
"Think privately about the request, then reply with exactly "
|
||||
f"{marker} and no other text."
|
||||
),
|
||||
tools="",
|
||||
extra_args=("--effort", "high"),
|
||||
)
|
||||
|
||||
|
||||
def _tool_use_roundtrip(
|
||||
claude_bin: str,
|
||||
server: RunningServer,
|
||||
smoke_config: SmokeConfig,
|
||||
provider_model: ProviderModel,
|
||||
model_dir: Path,
|
||||
) -> NimCliMatrixOutcome:
|
||||
marker = _marker("TOOL")
|
||||
workspace = model_dir / "tool_use_roundtrip"
|
||||
(workspace / "smoke-read.txt").parent.mkdir(parents=True, exist_ok=True)
|
||||
(workspace / "smoke-read.txt").write_text(marker, encoding="utf-8")
|
||||
return _run_probe(
|
||||
claude_bin=claude_bin,
|
||||
server=server,
|
||||
smoke_config=smoke_config,
|
||||
provider_model=provider_model,
|
||||
workspace=workspace,
|
||||
feature="tool_use_roundtrip",
|
||||
marker=marker,
|
||||
prompt=(
|
||||
"Use the Read tool to read smoke-read.txt. Reply with exactly the "
|
||||
"secret token from that file and no other text."
|
||||
),
|
||||
tools="Read",
|
||||
requires_tool_result=True,
|
||||
)
|
||||
|
||||
|
||||
def _interleaved_thinking_tool(
|
||||
claude_bin: str,
|
||||
server: RunningServer,
|
||||
smoke_config: SmokeConfig,
|
||||
provider_model: ProviderModel,
|
||||
model_dir: Path,
|
||||
) -> NimCliMatrixOutcome:
|
||||
marker = _marker("INTERLEAVED")
|
||||
workspace = model_dir / "interleaved_thinking_tool"
|
||||
(workspace / "smoke-interleaved.txt").parent.mkdir(parents=True, exist_ok=True)
|
||||
(workspace / "smoke-interleaved.txt").write_text(marker, encoding="utf-8")
|
||||
return _run_probe(
|
||||
claude_bin=claude_bin,
|
||||
server=server,
|
||||
smoke_config=smoke_config,
|
||||
provider_model=provider_model,
|
||||
workspace=workspace,
|
||||
feature="interleaved_thinking_tool",
|
||||
marker=marker,
|
||||
prompt=(
|
||||
"Think privately, use Read on smoke-interleaved.txt, then reply with "
|
||||
"exactly the secret token from that file and no other text."
|
||||
),
|
||||
tools="Read",
|
||||
extra_args=("--effort", "high"),
|
||||
requires_tool_result=True,
|
||||
)
|
||||
|
||||
|
||||
def _subagent_task(
|
||||
claude_bin: str,
|
||||
server: RunningServer,
|
||||
smoke_config: SmokeConfig,
|
||||
provider_model: ProviderModel,
|
||||
model_dir: Path,
|
||||
) -> NimCliMatrixOutcome:
|
||||
marker = _marker("TASK")
|
||||
workspace = model_dir / "subagent_task"
|
||||
(workspace / "smoke-subagent.txt").parent.mkdir(parents=True, exist_ok=True)
|
||||
(workspace / "smoke-subagent.txt").write_text(marker, encoding="utf-8")
|
||||
agents = json.dumps(
|
||||
{
|
||||
"smoke_reader": {
|
||||
"description": "Reads one requested file and returns its token.",
|
||||
"prompt": (
|
||||
"Read the requested file with Read and return only the token "
|
||||
"inside it."
|
||||
),
|
||||
}
|
||||
}
|
||||
)
|
||||
return _run_probe(
|
||||
claude_bin=claude_bin,
|
||||
server=server,
|
||||
smoke_config=smoke_config,
|
||||
provider_model=provider_model,
|
||||
workspace=workspace,
|
||||
feature="subagent_task",
|
||||
marker=marker,
|
||||
prompt=(
|
||||
"Use the smoke_reader subagent with Task to read smoke-subagent.txt. "
|
||||
"Reply with exactly the token the subagent returns and no other text."
|
||||
),
|
||||
tools="Task,Read",
|
||||
extra_args=("--agents", agents),
|
||||
requires_tool_result=True,
|
||||
)
|
||||
|
||||
|
||||
def _compact_command(
|
||||
claude_bin: str,
|
||||
server: RunningServer,
|
||||
smoke_config: SmokeConfig,
|
||||
provider_model: ProviderModel,
|
||||
model_dir: Path,
|
||||
) -> NimCliMatrixOutcome:
|
||||
marker = _marker("COMPACT")
|
||||
workspace = model_dir / "compact_command"
|
||||
session_id = str(uuid.uuid4())
|
||||
offset = read_log_offset(server.log_path)
|
||||
first = run_claude_cli(
|
||||
claude_bin=claude_bin,
|
||||
server=server,
|
||||
config=smoke_config,
|
||||
cwd=workspace,
|
||||
prompt=f"Remember this smoke token: {marker}. Reply with exactly {marker}.",
|
||||
tools="",
|
||||
session_id=session_id,
|
||||
no_session_persistence=False,
|
||||
)
|
||||
second = run_claude_cli(
|
||||
claude_bin=claude_bin,
|
||||
server=server,
|
||||
config=smoke_config,
|
||||
cwd=workspace,
|
||||
prompt=f"/compact preserve {marker}",
|
||||
tools="",
|
||||
resume_session_id=session_id,
|
||||
no_session_persistence=False,
|
||||
)
|
||||
log_delta = read_log_delta(server.log_path, offset)
|
||||
run = ClaudeCliRun(
|
||||
command=(*first.command, "&&", *second.command),
|
||||
returncode=second.returncode if first.returncode == 0 else first.returncode,
|
||||
stdout=f"{first.stdout}\n{second.stdout}",
|
||||
stderr=f"{first.stderr}\n{second.stderr}",
|
||||
duration_s=first.duration_s + second.duration_s,
|
||||
timed_out=first.timed_out or second.timed_out,
|
||||
)
|
||||
return make_outcome(
|
||||
model=provider_model.model_name,
|
||||
full_model=provider_model.full_model,
|
||||
source=provider_model.source,
|
||||
feature="compact_command",
|
||||
marker="",
|
||||
run=run,
|
||||
log_delta=log_delta,
|
||||
log_path=server.log_path,
|
||||
requires_compact=True,
|
||||
)
|
||||
|
||||
|
||||
def _run_probe(
|
||||
*,
|
||||
claude_bin: str,
|
||||
server: RunningServer,
|
||||
smoke_config: SmokeConfig,
|
||||
provider_model: ProviderModel,
|
||||
workspace: Path,
|
||||
feature: str,
|
||||
marker: str,
|
||||
prompt: str,
|
||||
tools: str | None,
|
||||
extra_args: tuple[str, ...] = (),
|
||||
requires_tool_result: bool = False,
|
||||
requires_task: bool = False,
|
||||
) -> NimCliMatrixOutcome:
|
||||
offset = read_log_offset(server.log_path)
|
||||
run = run_claude_cli(
|
||||
claude_bin=claude_bin,
|
||||
server=server,
|
||||
config=smoke_config,
|
||||
cwd=workspace,
|
||||
prompt=prompt,
|
||||
tools=tools,
|
||||
extra_args=extra_args,
|
||||
)
|
||||
log_delta = read_log_delta(server.log_path, offset)
|
||||
return make_outcome(
|
||||
model=provider_model.model_name,
|
||||
full_model=provider_model.full_model,
|
||||
source=provider_model.source,
|
||||
feature=feature,
|
||||
marker=marker,
|
||||
run=run,
|
||||
log_delta=log_delta,
|
||||
log_path=server.log_path,
|
||||
requires_tool_result=requires_tool_result,
|
||||
requires_task=requires_task,
|
||||
)
|
||||
|
||||
|
||||
def _marker(prefix: str) -> str:
|
||||
return f"FCC_NIM_{prefix}_{uuid.uuid4().hex[:8].upper()}"
|
||||
|
||||
|
||||
def _slug(value: str) -> str:
|
||||
return "".join(char if char.isalnum() else "-" for char in value).strip("-")
|
||||
@@ -0,0 +1,196 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from config.settings import Settings
|
||||
from smoke.lib.config import DEFAULT_TARGETS, SmokeConfig
|
||||
from smoke.lib.nvidia_nim_cli import (
|
||||
ClaudeCliRun,
|
||||
make_outcome,
|
||||
regression_failures,
|
||||
write_matrix_report,
|
||||
)
|
||||
|
||||
|
||||
def _smoke_config(tmp_path: Path) -> SmokeConfig:
|
||||
return SmokeConfig(
|
||||
root=tmp_path,
|
||||
results_dir=tmp_path / ".smoke-results",
|
||||
live=False,
|
||||
interactive=False,
|
||||
targets=DEFAULT_TARGETS,
|
||||
provider_matrix=frozenset(),
|
||||
timeout_s=45.0,
|
||||
prompt="Reply with exactly: FCC_SMOKE_PONG",
|
||||
claude_bin="claude",
|
||||
worker_id="test-worker",
|
||||
settings=Settings.model_construct(anthropic_auth_token=""),
|
||||
)
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_matrix_report_shape_and_redaction(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
monkeypatch.setenv("NVIDIA_NIM_API_KEY", "secret-nim-key")
|
||||
run = ClaudeCliRun(
|
||||
command=("claude", "-p", "redacted"),
|
||||
returncode=0,
|
||||
stdout="FCC_NIM_BASIC secret-nim-key",
|
||||
stderr="",
|
||||
duration_s=1.25,
|
||||
)
|
||||
outcome = make_outcome(
|
||||
model="z-ai/glm-5.1",
|
||||
full_model="nvidia_nim/z-ai/glm-5.1",
|
||||
source="nvidia_nim_cli_default",
|
||||
feature="basic_text",
|
||||
marker="FCC_NIM_BASIC",
|
||||
run=run,
|
||||
log_delta='POST /v1/messages HTTP/1.1" 200 OK secret-nim-key',
|
||||
log_path=tmp_path / "server.log",
|
||||
)
|
||||
|
||||
path = write_matrix_report(_smoke_config(tmp_path), [outcome])
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
|
||||
assert path.name.startswith("nvidia-nim-cli-matrix-test-worker-")
|
||||
assert payload["target"] == "nvidia_nim_cli"
|
||||
assert payload["models"] == ["nvidia_nim/z-ai/glm-5.1"]
|
||||
saved = payload["outcomes"][0]
|
||||
assert saved["feature"] == "basic_text"
|
||||
assert saved["classification"] == "passed"
|
||||
assert saved["request_count"] == 1
|
||||
assert saved["token_evidence"]["marker_present"] is True
|
||||
assert "secret-nim-key" not in path.read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_matrix_regression_detection(tmp_path: Path) -> None:
|
||||
run = ClaudeCliRun(
|
||||
command=("claude", "-p", "x"),
|
||||
returncode=0,
|
||||
stdout="",
|
||||
stderr="",
|
||||
duration_s=0.1,
|
||||
)
|
||||
outcome = make_outcome(
|
||||
model="z-ai/glm-5.1",
|
||||
full_model="nvidia_nim/z-ai/glm-5.1",
|
||||
source="nvidia_nim_cli_default",
|
||||
feature="basic_text",
|
||||
marker="FCC_NIM_BASIC",
|
||||
run=run,
|
||||
log_delta='POST /v1/messages HTTP/1.1" 500 Internal Server Error',
|
||||
log_path=tmp_path / "server.log",
|
||||
)
|
||||
|
||||
assert outcome.classification == "product_failure"
|
||||
assert regression_failures([outcome]) == [
|
||||
"nvidia_nim/z-ai/glm-5.1 basic_text: product_failure"
|
||||
]
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_matrix_model_feature_failures_do_not_regress(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
run = ClaudeCliRun(
|
||||
command=("claude", "-p", "x"),
|
||||
returncode=0,
|
||||
stdout="ordinary answer",
|
||||
stderr="",
|
||||
duration_s=0.1,
|
||||
)
|
||||
outcome = make_outcome(
|
||||
model="z-ai/glm-5.1",
|
||||
full_model="nvidia_nim/z-ai/glm-5.1",
|
||||
source="nvidia_nim_cli_default",
|
||||
feature="tool_use_roundtrip",
|
||||
marker="FCC_NIM_TOOL",
|
||||
run=run,
|
||||
log_delta='POST /v1/messages HTTP/1.1" 200 OK',
|
||||
log_path=tmp_path / "server.log",
|
||||
requires_tool_result=True,
|
||||
)
|
||||
|
||||
assert outcome.classification == "model_feature_failure"
|
||||
assert regression_failures([outcome]) == []
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_raw_payload_log_counts_as_proxy_request(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
run = ClaudeCliRun(
|
||||
command=("claude", "-p", "x"),
|
||||
returncode=0,
|
||||
stdout="ordinary answer",
|
||||
stderr="",
|
||||
duration_s=0.1,
|
||||
)
|
||||
outcome = make_outcome(
|
||||
model="z-ai/glm-5.1",
|
||||
full_model="nvidia_nim/z-ai/glm-5.1",
|
||||
source="nvidia_nim_cli_default",
|
||||
feature="subagent_task",
|
||||
marker="FCC_NIM_TASK",
|
||||
run=run,
|
||||
log_delta="API_REQUEST: request_id=req_1 model=z-ai/glm-5.1 messages=2",
|
||||
log_path=tmp_path / "server.log",
|
||||
requires_task=True,
|
||||
)
|
||||
|
||||
assert outcome.classification == "model_feature_failure"
|
||||
assert outcome.request_count == 1
|
||||
assert regression_failures([outcome]) == []
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_timeout_is_not_model_missing(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
run = ClaudeCliRun(
|
||||
command=("claude", "-p", "x"),
|
||||
returncode=None,
|
||||
stdout='{"type":"assistant","content":[{"type":"text","text":"FCC_NIM_TOOL"}]}',
|
||||
stderr="",
|
||||
duration_s=45.0,
|
||||
timed_out=True,
|
||||
)
|
||||
outcome = make_outcome(
|
||||
model="z-ai/glm-5.1",
|
||||
full_model="nvidia_nim/z-ai/glm-5.1",
|
||||
source="nvidia_nim_cli_default",
|
||||
feature="tool_use_roundtrip",
|
||||
marker="FCC_NIM_TOOL",
|
||||
run=run,
|
||||
log_delta="API_REQUEST: request_id=req_1 model=z-ai/glm-5.1 messages=2",
|
||||
log_path=tmp_path / "server.log",
|
||||
)
|
||||
|
||||
assert outcome.classification == "probe_timeout"
|
||||
assert outcome.token_evidence["timed_out"] is True
|
||||
assert regression_failures([outcome]) == []
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_success_beats_verbose_timeout_words(tmp_path: Path) -> None:
|
||||
run = ClaudeCliRun(
|
||||
command=("claude", "-p", "x"),
|
||||
returncode=0,
|
||||
stdout="FCC_NIM_THINK",
|
||||
stderr="",
|
||||
duration_s=0.1,
|
||||
)
|
||||
outcome = make_outcome(
|
||||
model="z-ai/glm-5.1",
|
||||
full_model="nvidia_nim/z-ai/glm-5.1",
|
||||
source="nvidia_nim_cli_default",
|
||||
feature="thinking",
|
||||
marker="FCC_NIM_THINK",
|
||||
run=run,
|
||||
log_delta=(
|
||||
"API_REQUEST: request_id=req_1 model=z-ai/glm-5.1 messages=1 "
|
||||
"read_timeout_s=300"
|
||||
),
|
||||
log_path=tmp_path / "server.log",
|
||||
)
|
||||
|
||||
assert outcome.classification == "passed"
|
||||
assert outcome.request_count == 1
|
||||
@@ -4,10 +4,14 @@ from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
from smoke.lib.config import (
|
||||
ALL_TARGETS,
|
||||
DEFAULT_TARGETS,
|
||||
NVIDIA_NIM_CLI_DEFAULT_MODELS,
|
||||
OPT_IN_TARGETS,
|
||||
PROVIDER_SMOKE_DEFAULT_MODELS,
|
||||
TARGET_REQUIRED_ENV,
|
||||
SmokeConfig,
|
||||
nvidia_nim_cli_model_refs,
|
||||
)
|
||||
|
||||
|
||||
@@ -52,6 +56,13 @@ def test_ollama_is_default_smoke_target() -> None:
|
||||
assert "ollama" in TARGET_REQUIRED_ENV
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_is_opt_in_smoke_target() -> None:
|
||||
assert "nvidia_nim_cli" not in DEFAULT_TARGETS
|
||||
assert "nvidia_nim_cli" in OPT_IN_TARGETS
|
||||
assert "nvidia_nim_cli" in ALL_TARGETS
|
||||
assert "nvidia_nim_cli" in TARGET_REQUIRED_ENV
|
||||
|
||||
|
||||
def test_ollama_provider_configuration_uses_base_url() -> None:
|
||||
config = _smoke_config()
|
||||
|
||||
@@ -190,3 +201,67 @@ def test_provider_smoke_does_not_include_default_local_urls_when_unmapped(
|
||||
config = _smoke_config(settings=_settings(model="nvidia_nim/test"))
|
||||
|
||||
assert config.provider_smoke_models() == []
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_default_models_are_normalized() -> None:
|
||||
refs = nvidia_nim_cli_model_refs({})
|
||||
|
||||
assert tuple(refs) == tuple(
|
||||
f"nvidia_nim/{model}" for model in NVIDIA_NIM_CLI_DEFAULT_MODELS
|
||||
)
|
||||
assert "nvidia_nim/deepseek-ai/deepseek-v4-pro" in refs
|
||||
assert "nvidia_nim/deepseek-ai/deepseek-v4-flash" in refs
|
||||
assert set(refs.values()) == {"nvidia_nim_cli_default"}
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_models_override_and_append() -> None:
|
||||
refs = nvidia_nim_cli_model_refs(
|
||||
{
|
||||
"FCC_SMOKE_NIM_MODELS": "z-ai/glm-5.1,nvidia_nim/custom/model",
|
||||
"FCC_SMOKE_NIM_EXTRA_MODELS": "moonshotai/kimi-k2.6,z-ai/glm-5.1",
|
||||
}
|
||||
)
|
||||
|
||||
assert tuple(refs) == (
|
||||
"nvidia_nim/z-ai/glm-5.1",
|
||||
"nvidia_nim/custom/model",
|
||||
"nvidia_nim/moonshotai/kimi-k2.6",
|
||||
)
|
||||
assert refs["nvidia_nim/z-ai/glm-5.1"] == "FCC_SMOKE_NIM_MODELS"
|
||||
assert refs["nvidia_nim/moonshotai/kimi-k2.6"] == ("FCC_SMOKE_NIM_EXTRA_MODELS")
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_models_reject_empty_override() -> None:
|
||||
try:
|
||||
nvidia_nim_cli_model_refs({"FCC_SMOKE_NIM_MODELS": " , "})
|
||||
except ValueError as exc:
|
||||
assert "FCC_SMOKE_NIM_MODELS" in str(exc)
|
||||
else:
|
||||
raise AssertionError("expected empty NVIDIA NIM CLI model override to fail")
|
||||
|
||||
|
||||
def test_nvidia_nim_cli_models_reject_wrong_provider_prefix() -> None:
|
||||
try:
|
||||
nvidia_nim_cli_model_refs({"FCC_SMOKE_NIM_MODELS": "open_router/model"})
|
||||
except ValueError as exc:
|
||||
assert "nvidia_nim" in str(exc)
|
||||
else:
|
||||
raise AssertionError("expected wrong provider prefix to fail")
|
||||
|
||||
|
||||
def test_smoke_config_returns_nvidia_nim_cli_provider_models(monkeypatch) -> None:
|
||||
monkeypatch.delenv("FCC_SMOKE_NIM_MODELS", raising=False)
|
||||
monkeypatch.delenv("FCC_SMOKE_NIM_EXTRA_MODELS", raising=False)
|
||||
config = _smoke_config(
|
||||
settings=_settings(
|
||||
model="nvidia_nim/z-ai/glm-5.1",
|
||||
nvidia_nim_api_key="nim-key",
|
||||
ollama_base_url="",
|
||||
)
|
||||
)
|
||||
|
||||
models = config.nvidia_nim_cli_models()
|
||||
|
||||
assert models[0].provider == "nvidia_nim"
|
||||
assert models[0].full_model == "nvidia_nim/z-ai/glm-5.1"
|
||||
assert models[0].source == "nvidia_nim_cli_default"
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from smoke.lib.report import classify_outcome
|
||||
from smoke.lib.report_summary import format_summary, summarize_reports
|
||||
|
||||
|
||||
@@ -32,3 +33,13 @@ def test_smoke_report_summary_counts_regression_classes(tmp_path: Path) -> None:
|
||||
assert summary.classifications["product_failure"] == 1
|
||||
assert summary.has_regression
|
||||
assert "status=regression" in format_summary(summary)
|
||||
|
||||
|
||||
def test_target_disabled_skip_is_not_missing_env() -> None:
|
||||
classification = classify_outcome(
|
||||
nodeid="smoke/product/test_api_product_live.py::test_api_basic_conversation_e2e",
|
||||
outcome="skipped",
|
||||
detail="Skipped: smoke target disabled: api",
|
||||
)
|
||||
|
||||
assert classification == "target_disabled"
|
||||
|
||||
@@ -8,6 +8,7 @@ from httpx import Request, Response
|
||||
from config.nim import NimSettings
|
||||
from providers.defaults import NVIDIA_NIM_DEFAULT_BASE
|
||||
from providers.nvidia_nim import NvidiaNimProvider
|
||||
from providers.nvidia_nim.request import NIM_TOOL_ARGUMENT_ALIASES_KEY
|
||||
|
||||
|
||||
# Mock data classes
|
||||
@@ -47,6 +48,46 @@ class MockRequest:
|
||||
setattr(self, k, v)
|
||||
|
||||
|
||||
def _input_json_deltas(events):
|
||||
deltas = []
|
||||
for event in events:
|
||||
if "event: content_block_delta" not in event:
|
||||
continue
|
||||
for line in event.splitlines():
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
payload = json.loads(line[6:])
|
||||
delta = payload.get("delta", {})
|
||||
if delta.get("type") == "input_json_delta":
|
||||
deltas.append(delta.get("partial_json", ""))
|
||||
return deltas
|
||||
|
||||
|
||||
def _tool_call_chunk(
|
||||
*,
|
||||
name,
|
||||
arguments,
|
||||
tool_id="call_1",
|
||||
index=0,
|
||||
finish_reason=None,
|
||||
):
|
||||
mock_tc = MagicMock()
|
||||
mock_tc.index = index
|
||||
mock_tc.id = tool_id
|
||||
mock_tc.function.name = name
|
||||
mock_tc.function.arguments = arguments
|
||||
|
||||
mock_chunk = MagicMock()
|
||||
mock_chunk.choices = [
|
||||
MagicMock(
|
||||
delta=MagicMock(content=None, reasoning_content="", tool_calls=[mock_tc]),
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
]
|
||||
mock_chunk.usage = None
|
||||
return mock_chunk
|
||||
|
||||
|
||||
def _make_bad_request_error(message: str) -> openai.BadRequestError:
|
||||
response = Response(
|
||||
status_code=400,
|
||||
@@ -434,6 +475,195 @@ async def test_tool_call_stream(nim_provider):
|
||||
assert "search" in starts[0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response_restores_aliased_tool_arguments(nim_provider):
|
||||
"""NIM-safe argument aliases are restored before Anthropic SSE emission."""
|
||||
req = MockRequest(
|
||||
tools=[
|
||||
MockTool(
|
||||
"Grep",
|
||||
"Search file contents",
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pattern": {"type": "string"},
|
||||
"-A": {"type": "number"},
|
||||
"type": {"type": "string"},
|
||||
},
|
||||
"required": ["pattern"],
|
||||
},
|
||||
)
|
||||
]
|
||||
)
|
||||
mock_chunk = _tool_call_chunk(
|
||||
name="Grep",
|
||||
arguments=json.dumps({"pattern": "needle", "-A": 2, "_fcc_arg_type": "py"}),
|
||||
)
|
||||
|
||||
async def mock_stream():
|
||||
yield mock_chunk
|
||||
|
||||
with patch.object(
|
||||
nim_provider._client.chat.completions, "create", new_callable=AsyncMock
|
||||
) as mock_create:
|
||||
mock_create.return_value = mock_stream()
|
||||
|
||||
events = [e async for e in nim_provider.stream_response(req)]
|
||||
|
||||
await_args = mock_create.await_args
|
||||
assert await_args is not None
|
||||
create_kwargs = await_args.kwargs
|
||||
assert NIM_TOOL_ARGUMENT_ALIASES_KEY not in create_kwargs
|
||||
properties = create_kwargs["tools"][0]["function"]["parameters"]["properties"]
|
||||
assert "-A" in properties
|
||||
assert "type" not in properties
|
||||
assert "_fcc_arg_A" not in properties
|
||||
assert "_fcc_arg_type" in properties
|
||||
|
||||
deltas = _input_json_deltas(events)
|
||||
assert len(deltas) == 1
|
||||
assert json.loads(deltas[0]) == {"pattern": "needle", "-A": 2, "type": "py"}
|
||||
assert "_fcc_arg_type" not in deltas[0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response_buffers_chunked_aliased_tool_arguments(nim_provider):
|
||||
"""Chunked aliased args are emitted once as restored Claude Code args."""
|
||||
req = MockRequest(
|
||||
tools=[
|
||||
MockTool(
|
||||
"Grep",
|
||||
"Search file contents",
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pattern": {"type": "string"},
|
||||
"type": {"type": "string"},
|
||||
},
|
||||
"required": ["pattern"],
|
||||
},
|
||||
)
|
||||
]
|
||||
)
|
||||
first_chunk = _tool_call_chunk(
|
||||
name="Grep",
|
||||
arguments='{"pattern": "needle", ',
|
||||
tool_id="call_chunked",
|
||||
)
|
||||
second_chunk = _tool_call_chunk(
|
||||
name=None,
|
||||
arguments='"_fcc_arg_type": "py"}',
|
||||
tool_id="call_chunked",
|
||||
)
|
||||
|
||||
async def mock_stream():
|
||||
yield first_chunk
|
||||
yield second_chunk
|
||||
|
||||
with patch.object(
|
||||
nim_provider._client.chat.completions, "create", new_callable=AsyncMock
|
||||
) as mock_create:
|
||||
mock_create.return_value = mock_stream()
|
||||
|
||||
events = [e async for e in nim_provider.stream_response(req)]
|
||||
|
||||
deltas = _input_json_deltas(events)
|
||||
assert len(deltas) == 1
|
||||
assert json.loads(deltas[0]) == {"pattern": "needle", "type": "py"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response_restores_nested_aliased_tool_arguments(nim_provider):
|
||||
req = MockRequest(
|
||||
tools=[
|
||||
MockTool(
|
||||
"NotionLike",
|
||||
"Nested type schema",
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"parent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"type": "string"},
|
||||
"id": {"type": "string"},
|
||||
},
|
||||
"required": ["type", "id"],
|
||||
}
|
||||
},
|
||||
"required": ["parent"],
|
||||
},
|
||||
)
|
||||
]
|
||||
)
|
||||
mock_chunk = _tool_call_chunk(
|
||||
name="NotionLike",
|
||||
arguments=json.dumps(
|
||||
{"parent": {"_fcc_arg_type": "page_id", "id": "page_123"}}
|
||||
),
|
||||
)
|
||||
|
||||
async def mock_stream():
|
||||
yield mock_chunk
|
||||
|
||||
with patch.object(
|
||||
nim_provider._client.chat.completions, "create", new_callable=AsyncMock
|
||||
) as mock_create:
|
||||
mock_create.return_value = mock_stream()
|
||||
|
||||
events = [e async for e in nim_provider.stream_response(req)]
|
||||
|
||||
deltas = _input_json_deltas(events)
|
||||
assert len(deltas) == 1
|
||||
assert json.loads(deltas[0]) == {"parent": {"type": "page_id", "id": "page_123"}}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response_task_tool_still_forces_background_false(nim_provider):
|
||||
req = MockRequest(
|
||||
tools=[
|
||||
MockTool(
|
||||
"Task",
|
||||
"Run a subagent",
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {"type": "string"},
|
||||
"prompt": {"type": "string"},
|
||||
"run_in_background": {"type": "boolean"},
|
||||
},
|
||||
"required": ["description", "prompt"],
|
||||
},
|
||||
)
|
||||
]
|
||||
)
|
||||
mock_chunk = _tool_call_chunk(
|
||||
name="Task",
|
||||
arguments=json.dumps(
|
||||
{
|
||||
"description": "Inspect",
|
||||
"prompt": "Read the marker",
|
||||
"run_in_background": True,
|
||||
}
|
||||
),
|
||||
tool_id="call_task",
|
||||
)
|
||||
|
||||
async def mock_stream():
|
||||
yield mock_chunk
|
||||
|
||||
with patch.object(
|
||||
nim_provider._client.chat.completions, "create", new_callable=AsyncMock
|
||||
) as mock_create:
|
||||
mock_create.return_value = mock_stream()
|
||||
|
||||
events = [e async for e in nim_provider.stream_response(req)]
|
||||
|
||||
deltas = _input_json_deltas(events)
|
||||
assert len(deltas) == 1
|
||||
assert json.loads(deltas[0])["run_in_background"] is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response_retries_without_reasoning_budget(nim_provider):
|
||||
req = MockRequest()
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
"""Tests for providers/nvidia_nim/request.py."""
|
||||
|
||||
from copy import deepcopy
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
@@ -8,12 +10,36 @@ import pytest
|
||||
from config.nim import NimSettings
|
||||
from core.anthropic import set_if_not_none
|
||||
from providers.nvidia_nim.request import (
|
||||
NIM_TOOL_ARGUMENT_ALIASES_KEY,
|
||||
_set_extra,
|
||||
body_without_nim_tool_argument_aliases,
|
||||
build_request_body,
|
||||
clone_body_without_chat_template,
|
||||
clone_body_without_reasoning_content,
|
||||
nim_tool_argument_aliases_from_body,
|
||||
)
|
||||
|
||||
GREP_SCHEMA_FROM_SERVER_LOG: dict[str, Any] = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pattern": {"type": "string", "description": "The regular expression"},
|
||||
"path": {"type": "string", "description": "File or directory to search"},
|
||||
"glob": {"type": "string", "description": "Glob to filter files"},
|
||||
"output_mode": {
|
||||
"type": "string",
|
||||
"enum": ["content", "files_with_matches", "count"],
|
||||
},
|
||||
"-A": {"type": "number", "description": "Lines after match"},
|
||||
"-B": {"type": "number", "description": "Lines before match"},
|
||||
"-C": {"type": "number", "description": "Lines around match"},
|
||||
"-i": {"type": "boolean", "description": "Case insensitive"},
|
||||
"-n": {"type": "boolean", "description": "Show line numbers"},
|
||||
"type": {"type": "string", "description": "File type to search"},
|
||||
},
|
||||
"additionalProperties": False,
|
||||
"required": ["pattern"],
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def req():
|
||||
@@ -121,6 +147,129 @@ class TestBuildRequestBody:
|
||||
assert tool_schema["additionalProperties"] is False
|
||||
assert tool_schema["properties"]["nested"]["additionalProperties"] is False
|
||||
|
||||
def test_grep_schema_type_parameter_is_aliased_without_mutating_request(self, req):
|
||||
tool_schema = deepcopy(GREP_SCHEMA_FROM_SERVER_LOG)
|
||||
tool_schema["properties"]["_fcc_arg_type"] = {
|
||||
"type": "string",
|
||||
"description": "Existing safe property that collides with the alias",
|
||||
}
|
||||
tool_schema["required"] = ["pattern", "-A", "_fcc_arg_type"]
|
||||
original_schema = deepcopy(tool_schema)
|
||||
req.tools = [
|
||||
SimpleNamespace(
|
||||
name="Grep",
|
||||
description="Search file contents",
|
||||
input_schema=tool_schema,
|
||||
)
|
||||
]
|
||||
|
||||
body = build_request_body(req, NimSettings(), thinking_enabled=False)
|
||||
|
||||
parameters = body["tools"][0]["function"]["parameters"]
|
||||
properties = parameters["properties"]
|
||||
aliases = body[NIM_TOOL_ARGUMENT_ALIASES_KEY]["Grep"]
|
||||
assert "additionalProperties" not in parameters
|
||||
assert properties["-A"] == original_schema["properties"]["-A"]
|
||||
assert properties["-B"] == original_schema["properties"]["-B"]
|
||||
assert properties["-C"] == original_schema["properties"]["-C"]
|
||||
assert properties["-i"] == original_schema["properties"]["-i"]
|
||||
assert properties["-n"] == original_schema["properties"]["-n"]
|
||||
assert "type" not in properties
|
||||
assert properties["pattern"] == original_schema["properties"]["pattern"]
|
||||
assert properties["output_mode"]["enum"] == [
|
||||
"content",
|
||||
"files_with_matches",
|
||||
"count",
|
||||
]
|
||||
assert (
|
||||
properties["_fcc_arg_type"]
|
||||
== original_schema["properties"]["_fcc_arg_type"]
|
||||
)
|
||||
assert aliases == {"_fcc_arg_type_2": "type"}
|
||||
assert properties["_fcc_arg_type_2"] == original_schema["properties"]["type"]
|
||||
assert "-A" in parameters["required"]
|
||||
assert "_fcc_arg_type" in parameters["required"]
|
||||
assert tool_schema == original_schema
|
||||
|
||||
def test_safe_tool_schema_does_not_add_alias_metadata(self, req):
|
||||
tool_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pattern": {"type": "string"},
|
||||
"path": {"type": "string"},
|
||||
"output_mode": {"type": "string", "enum": ["content", "count"]},
|
||||
},
|
||||
"required": ["pattern"],
|
||||
}
|
||||
req.tools = [
|
||||
SimpleNamespace(
|
||||
name="Glob",
|
||||
description="Find files",
|
||||
input_schema=tool_schema,
|
||||
)
|
||||
]
|
||||
|
||||
body = build_request_body(req, NimSettings(), thinking_enabled=False)
|
||||
|
||||
assert NIM_TOOL_ARGUMENT_ALIASES_KEY not in body
|
||||
parameters = body["tools"][0]["function"]["parameters"]
|
||||
assert parameters["properties"] == tool_schema["properties"]
|
||||
assert parameters["required"] == ["pattern"]
|
||||
|
||||
def test_nested_schema_keyword_properties_are_aliased_without_mutating_request(
|
||||
self, req
|
||||
):
|
||||
tool_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"parent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"type": "string", "enum": ["page_id"]},
|
||||
"id": {"type": "string"},
|
||||
},
|
||||
"required": ["type", "id"],
|
||||
}
|
||||
},
|
||||
"required": ["parent"],
|
||||
}
|
||||
original_schema = deepcopy(tool_schema)
|
||||
req.tools = [
|
||||
SimpleNamespace(
|
||||
name="NotionLike",
|
||||
description="Nested type schema",
|
||||
input_schema=tool_schema,
|
||||
)
|
||||
]
|
||||
|
||||
body = build_request_body(req, NimSettings(), thinking_enabled=False)
|
||||
|
||||
aliases = body[NIM_TOOL_ARGUMENT_ALIASES_KEY]["NotionLike"]
|
||||
parent = body["tools"][0]["function"]["parameters"]["properties"]["parent"]
|
||||
parent_properties = parent["properties"]
|
||||
assert "type" not in parent_properties
|
||||
assert parent_properties["_fcc_arg_type"] == {
|
||||
"type": "string",
|
||||
"enum": ["page_id"],
|
||||
}
|
||||
assert parent["required"] == ["_fcc_arg_type", "id"]
|
||||
assert aliases == {"_fcc_arg_type": "type"}
|
||||
assert tool_schema == original_schema
|
||||
|
||||
def test_private_alias_metadata_is_stripped_without_mutating_body(self):
|
||||
body = {
|
||||
"model": "test",
|
||||
NIM_TOOL_ARGUMENT_ALIASES_KEY: {"Grep": {"_fcc_arg_A": "-A"}},
|
||||
}
|
||||
|
||||
upstream_body = body_without_nim_tool_argument_aliases(body)
|
||||
|
||||
assert NIM_TOOL_ARGUMENT_ALIASES_KEY not in upstream_body
|
||||
assert body[NIM_TOOL_ARGUMENT_ALIASES_KEY] == {"Grep": {"_fcc_arg_A": "-A"}}
|
||||
assert nim_tool_argument_aliases_from_body(body) == {
|
||||
"Grep": {"_fcc_arg_A": "-A"}
|
||||
}
|
||||
|
||||
def test_reasoning_params_in_extra_body(self):
|
||||
req = MagicMock()
|
||||
req.model = "test"
|
||||
|
||||
Reference in New Issue
Block a user