Try mid stream retries

This commit is contained in:
Alishahryar1
2026-05-31 16:28:44 -07:00
parent 52ebb7417b
commit 0eee1da072
13 changed files with 2049 additions and 210 deletions
+252 -3
View File
@@ -5,10 +5,33 @@ from __future__ import annotations
import uuid
from collections.abc import Iterator
from contextlib import suppress
from dataclasses import dataclass, field
from typing import Any
from core.anthropic.sse import SSEBuilder, format_sse_event
from core.anthropic.stream_contracts import SSEEvent, event_index, parse_sse_lines
from core.anthropic.stream_contracts import SSEEvent, parse_sse_lines
from core.anthropic.stream_recovery import (
ToolSchema,
accept_tool_json_repair,
continuation_suffix,
parse_complete_tool_input,
)
@dataclass
class EmittedBlockState:
"""Tracked downstream block payload emitted to the client."""
index: int
block_type: str
open: bool = True
tool_id: str = ""
name: str = ""
parts: list[str] = field(default_factory=list)
@property
def content(self) -> str:
return "".join(self.parts)
class EmittedNativeSseTracker:
@@ -18,8 +41,11 @@ class EmittedNativeSseTracker:
self._buf = ""
self._open_stack: list[int] = []
self._max_index = -1
self._blocks: dict[int, EmittedBlockState] = {}
self.message_id: str | None = None
self.model: str = ""
self.stop_reason: str | None = None
self.message_stopped = False
def feed(self, chunk: str) -> None:
"""Record SSE frames completed by ``chunk`` (handles splitting across reads)."""
@@ -48,18 +74,79 @@ class EmittedNativeSseTracker:
return
if event.event == "content_block_start":
idx = event_index(event)
raw_index = event.data.get("index")
if not isinstance(raw_index, int):
return
idx = raw_index
self._max_index = max(self._max_index, idx)
self._open_stack.append(idx)
block = event.data.get("content_block")
if isinstance(block, dict):
block_type = str(block.get("type", ""))
state = EmittedBlockState(index=idx, block_type=block_type)
if block_type == "tool_use":
tool_id = block.get("id")
name = block.get("name")
state.tool_id = tool_id if isinstance(tool_id, str) else ""
state.name = name if isinstance(name, str) else ""
elif block_type == "text":
text = block.get("text")
if isinstance(text, str) and text:
state.parts.append(text)
elif block_type == "thinking":
thinking = block.get("thinking")
if isinstance(thinking, str) and thinking:
state.parts.append(thinking)
self._blocks[idx] = state
return
if event.event == "content_block_delta":
raw_index = event.data.get("index")
if not isinstance(raw_index, int):
return
idx = raw_index
state = self._blocks.get(idx)
delta = event.data.get("delta")
if state is not None and isinstance(delta, dict):
if state.block_type == "text":
text = delta.get("text")
if isinstance(text, str):
state.parts.append(text)
elif state.block_type == "thinking":
thinking = delta.get("thinking")
if isinstance(thinking, str):
state.parts.append(thinking)
elif state.block_type == "tool_use":
partial = delta.get("partial_json")
if isinstance(partial, str):
state.parts.append(partial)
return
if event.event == "content_block_stop":
idx = event_index(event)
raw_index = event.data.get("index")
if not isinstance(raw_index, int):
return
idx = raw_index
if self._open_stack and self._open_stack[-1] == idx:
self._open_stack.pop()
else:
with suppress(ValueError):
self._open_stack.remove(idx)
state = self._blocks.get(idx)
if state is not None:
state.open = False
return
if event.event == "message_delta":
delta = event.data.get("delta")
if isinstance(delta, dict):
stop_reason = delta.get("stop_reason")
if isinstance(stop_reason, str):
self.stop_reason = stop_reason
return
if event.event == "message_stop":
self.message_stopped = True
def next_content_index(self) -> int:
"""Next unused content block index based on emitted starts."""
@@ -69,11 +156,166 @@ class EmittedNativeSseTracker:
"""Yield ``content_block_stop`` events for blocks that were started but not stopped."""
while self._open_stack:
idx = self._open_stack.pop()
state = self._blocks.get(idx)
if state is not None:
state.open = False
yield format_sse_event(
"content_block_stop",
{"type": "content_block_stop", "index": idx},
)
def emitted_text(self) -> str:
return "".join(
block.content
for block in self._blocks.values()
if block.block_type == "text"
)
def emitted_thinking(self) -> str:
return "".join(
block.content
for block in self._blocks.values()
if block.block_type == "thinking"
)
def has_tool_block(self) -> bool:
return any(block.block_type == "tool_use" for block in self._blocks.values())
def has_content_block(self) -> bool:
return bool(self._blocks)
def has_terminal_message(self) -> bool:
return self.message_stopped
def tool_blocks(self) -> list[EmittedBlockState]:
return [
block for block in self._blocks.values() if block.block_type == "tool_use"
]
def can_salvage_tool_use(self, schemas: dict[str, ToolSchema]) -> bool:
tool_blocks = self.tool_blocks()
if not tool_blocks:
return False
for block in tool_blocks:
if not block.tool_id or not block.name:
return False
if parse_complete_tool_input(block.content, block.name, schemas) is None:
return False
return True
def append_text_suffix(self, suffix: str) -> Iterator[str]:
if not suffix:
return
active = self._last_open_block("text")
if active is None:
index = self.next_content_index()
self._max_index = max(self._max_index, index)
active = EmittedBlockState(index=index, block_type="text")
self._blocks[index] = active
self._open_stack.append(index)
yield format_sse_event(
"content_block_start",
{
"type": "content_block_start",
"index": index,
"content_block": {"type": "text", "text": ""},
},
)
active.parts.append(suffix)
yield format_sse_event(
"content_block_delta",
{
"type": "content_block_delta",
"index": active.index,
"delta": {"type": "text_delta", "text": suffix},
},
)
def append_thinking_suffix(self, suffix: str) -> Iterator[str]:
if not suffix:
return
active = self._last_open_block("thinking")
if active is None:
index = self.next_content_index()
self._max_index = max(self._max_index, index)
active = EmittedBlockState(index=index, block_type="thinking")
self._blocks[index] = active
self._open_stack.append(index)
yield format_sse_event(
"content_block_start",
{
"type": "content_block_start",
"index": index,
"content_block": {"type": "thinking", "thinking": ""},
},
)
active.parts.append(suffix)
yield format_sse_event(
"content_block_delta",
{
"type": "content_block_delta",
"index": active.index,
"delta": {"type": "thinking_delta", "thinking": suffix},
},
)
def append_tool_repair_suffix(
self,
tool_index: int,
suffix: str,
) -> Iterator[str]:
tool_blocks = self.tool_blocks()
if tool_index >= len(tool_blocks) or not suffix:
return
block = tool_blocks[tool_index]
block.parts.append(suffix)
yield format_sse_event(
"content_block_delta",
{
"type": "content_block_delta",
"index": block.index,
"delta": {"type": "input_json_delta", "partial_json": suffix},
},
)
def iter_success_tail(self, stop_reason: str) -> Iterator[str]:
yield from self.iter_close_unclosed_blocks()
if self.stop_reason is None:
yield format_sse_event(
"message_delta",
{
"type": "message_delta",
"delta": {"stop_reason": stop_reason, "stop_sequence": None},
"usage": {"input_tokens": 0, "output_tokens": 1},
},
)
if not self.message_stopped:
yield format_sse_event("message_stop", {"type": "message_stop"})
def accept_tool_repair(
self,
tool_index: int,
candidate: str,
schemas: dict[str, ToolSchema],
) -> str | None:
tool_blocks = self.tool_blocks()
if tool_index >= len(tool_blocks):
return None
block = tool_blocks[tool_index]
repair = accept_tool_json_repair(
block.content,
candidate,
tool_name=block.name,
schemas=schemas,
)
return repair.suffix if repair is not None else None
def continuation_text_suffix(self, candidate: str) -> str | None:
return continuation_suffix(self.emitted_text(), candidate)
def continuation_thinking_suffix(self, candidate: str) -> str | None:
return continuation_suffix(self.emitted_thinking(), candidate)
def iter_midstream_error_tail(
self,
error_message: str,
@@ -95,3 +337,10 @@ class EmittedNativeSseTracker:
yield from sse.emit_error(error_message)
yield sse.message_delta("end_turn", 1)
yield sse.message_stop()
def _last_open_block(self, block_type: str) -> EmittedBlockState | None:
for index in reversed(self._open_stack):
block = self._blocks.get(index)
if block is not None and block.block_type == block_type and block.open:
return block
return None
+345
View File
@@ -0,0 +1,345 @@
"""Always-on recovery helpers for truncated provider streams."""
from __future__ import annotations
import json
import time
from collections.abc import Callable
from copy import deepcopy
from dataclasses import dataclass
from typing import Any
import httpx
import jsonschema
import openai
from loguru import logger
EARLY_TRANSPARENT_RETRIES = 3
MIDSTREAM_RECOVERY_ATTEMPTS = 2
EARLY_HOLDBACK_SECONDS = 0.75
RECOVERY_BUFFER_MAX_BYTES = 65_536
_RECOVERY_USER_PREFIX = (
"The previous provider stream was interrupted. Continue the assistant response "
"exactly where it stopped. Do not repeat text already written."
)
class TruncatedProviderStreamError(RuntimeError):
"""Raised internally when an upstream stream ends without a terminal marker."""
@dataclass(frozen=True, slots=True)
class ToolSchema:
"""Tool schema resolved from the original Anthropic request."""
name: str
input_schema: dict[str, Any]
@dataclass(frozen=True, slots=True)
class ToolRepair:
"""Accepted append-only tool JSON repair."""
suffix: str
parsed_input: dict[str, Any]
class RecoveryHoldbackBuffer:
"""Briefly hold downstream SSE so early stream cutoffs can be retried invisibly."""
def __init__(
self,
*,
holdback_seconds: float = EARLY_HOLDBACK_SECONDS,
max_bytes: int = RECOVERY_BUFFER_MAX_BYTES,
now: Callable[[], float] | None = None,
) -> None:
self._holdback_seconds = holdback_seconds
self._max_bytes = max_bytes
self._now = now or time.monotonic
self._events: list[str] = []
self._bytes = 0
self._started_at: float | None = None
self.committed = False
def push(self, event: str) -> list[str]:
"""Buffer ``event`` until holdback expires or cap is reached."""
if self.committed:
return [event]
if self._started_at is None:
self._started_at = self._now()
self._events.append(event)
self._bytes += len(event.encode("utf-8", errors="replace"))
if (
self._bytes >= self._max_bytes
or self._now() - self._started_at >= self._holdback_seconds
):
return self.flush()
return []
def flush(self) -> list[str]:
"""Commit and return all held events."""
if self.committed:
return []
self.committed = True
events = self._events
self._events = []
self._bytes = 0
self._started_at = None
return events
def discard(self) -> None:
"""Drop held events without committing them downstream."""
self._events = []
self._bytes = 0
self._started_at = None
@property
def has_buffered(self) -> bool:
return bool(self._events)
def is_retryable_stream_error(exc: BaseException) -> bool:
"""Return whether a provider stream error can be retried/recovered."""
if isinstance(exc, TruncatedProviderStreamError):
return True
if isinstance(exc, openai.AuthenticationError | openai.BadRequestError):
return False
if isinstance(exc, httpx.HTTPStatusError):
status = exc.response.status_code
return status == 429 or 500 <= status <= 599
if isinstance(exc, openai.RateLimitError):
return True
if isinstance(exc, openai.APIStatusError):
status = getattr(exc, "status_code", None)
return isinstance(status, int) and (status == 429 or 500 <= status <= 599)
return isinstance(
exc,
(
TimeoutError,
httpx.ReadTimeout,
httpx.ReadError,
httpx.RemoteProtocolError,
httpx.ConnectError,
httpx.NetworkError,
openai.APITimeoutError,
openai.APIConnectionError,
),
)
def tool_schemas_by_name(request: Any) -> dict[str, ToolSchema]:
"""Return Anthropic tool input schemas keyed by tool name."""
schemas: dict[str, ToolSchema] = {}
tools = getattr(request, "tools", None)
if not tools:
return schemas
for tool in tools:
name = _tool_attr(tool, "name")
if not isinstance(name, str) or not name:
continue
schema = _tool_attr(tool, "input_schema")
if not isinstance(schema, dict):
schema = {"type": "object"}
schemas[name] = ToolSchema(name=name, input_schema=deepcopy(schema))
return schemas
def validate_tool_input(
tool_name: str, parsed_input: dict[str, Any], schemas: dict[str, ToolSchema]
) -> bool:
"""Validate tool input against its JSON schema; unknown tools accept any object."""
tool_schema = schemas.get(tool_name)
if tool_schema is None:
return True
try:
validator_cls = jsonschema.validators.validator_for(tool_schema.input_schema)
validator_cls.check_schema(tool_schema.input_schema)
validator_cls(tool_schema.input_schema).validate(parsed_input)
except jsonschema.exceptions.SchemaError as exc:
logger.warning("Skipping invalid tool schema for {}: {}", tool_name, exc)
return True
except jsonschema.exceptions.ValidationError:
return False
return True
def parse_complete_tool_input(
raw_json: str, tool_name: str, schemas: dict[str, ToolSchema]
) -> dict[str, Any] | None:
"""Return parsed input when raw JSON is complete and schema-valid."""
try:
parsed = json.loads(raw_json)
except json.JSONDecodeError:
return None
if not isinstance(parsed, dict):
return None
if not validate_tool_input(tool_name, parsed, schemas):
return None
return parsed
def accept_tool_json_repair(
prefix: str,
candidate: str,
*,
tool_name: str,
schemas: dict[str, ToolSchema],
) -> ToolRepair | None:
"""Accept only append-only JSON repairs that make ``prefix`` valid."""
suffix_candidates = _repair_suffix_candidates(prefix, candidate)
for suffix in suffix_candidates:
combined = prefix + suffix
parsed = parse_complete_tool_input(combined, tool_name, schemas)
if parsed is not None:
return ToolRepair(suffix=suffix, parsed_input=parsed)
return None
def continuation_suffix(existing: str, candidate: str) -> str | None:
"""Return only the new suffix from a text/thinking continuation candidate."""
existing = existing or ""
candidate = candidate or ""
if not candidate:
return ""
if not existing:
return candidate
if candidate.startswith(existing):
return candidate[len(existing) :]
max_overlap = min(len(existing), len(candidate))
for size in range(max_overlap, 0, -1):
if existing.endswith(candidate[:size]):
return candidate[size:]
# Accept short standalone continuations, but reject full unrelated rewrites.
if len(candidate) < max(200, len(existing) // 2):
return candidate
return None
def make_openai_text_recovery_body(
body: dict[str, Any], partial: str
) -> dict[str, Any]:
"""Build a text-only OpenAI-chat continuation request."""
recovery = deepcopy(body)
recovery.pop("tools", None)
recovery.pop("tool_choice", None)
recovery["stream"] = True
messages = _copied_messages(recovery)
if partial:
messages.append({"role": "assistant", "content": partial})
messages.append({"role": "user", "content": _RECOVERY_USER_PREFIX})
recovery["messages"] = messages
return recovery
def make_openai_tool_repair_body(
body: dict[str, Any],
*,
tool_name: str,
prefix: str,
input_schema: dict[str, Any] | None,
) -> dict[str, Any]:
"""Build a text-only OpenAI-chat request asking for a JSON suffix."""
recovery = deepcopy(body)
recovery.pop("tools", None)
recovery.pop("tool_choice", None)
recovery["stream"] = True
messages = _copied_messages(recovery)
messages.append(
{
"role": "user",
"content": _tool_repair_prompt(
tool_name=tool_name, prefix=prefix, input_schema=input_schema
),
}
)
recovery["messages"] = messages
return recovery
def make_native_text_recovery_body(
body: dict[str, Any], partial: str
) -> dict[str, Any]:
"""Build a text-only native Anthropic continuation request."""
recovery = deepcopy(body)
recovery.pop("tools", None)
recovery.pop("tool_choice", None)
recovery["stream"] = True
messages = _copied_messages(recovery)
if partial:
messages.append({"role": "assistant", "content": partial})
messages.append({"role": "user", "content": _RECOVERY_USER_PREFIX})
recovery["messages"] = messages
return recovery
def make_native_tool_repair_body(
body: dict[str, Any],
*,
tool_name: str,
prefix: str,
input_schema: dict[str, Any] | None,
) -> dict[str, Any]:
"""Build a text-only native Anthropic request asking for a JSON suffix."""
recovery = deepcopy(body)
recovery.pop("tools", None)
recovery.pop("tool_choice", None)
recovery["stream"] = True
messages = _copied_messages(recovery)
messages.append(
{
"role": "user",
"content": _tool_repair_prompt(
tool_name=tool_name, prefix=prefix, input_schema=input_schema
),
}
)
recovery["messages"] = messages
return recovery
def _tool_attr(tool: Any, attr: str) -> Any:
if isinstance(tool, dict):
return tool.get(attr)
return getattr(tool, attr, None)
def _copied_messages(body: dict[str, Any]) -> list[Any]:
messages = body.get("messages")
return deepcopy(messages) if isinstance(messages, list) else []
def _repair_suffix_candidates(prefix: str, candidate: str) -> list[str]:
raw = candidate.strip()
if not raw:
return []
candidates: list[str] = []
if raw.startswith("```"):
lines = raw.splitlines()
if lines and lines[0].startswith("```"):
lines = lines[1:]
if lines and lines[-1].strip() == "```":
lines = lines[:-1]
raw = "\n".join(lines).strip()
candidates.append(raw)
if raw.startswith(prefix):
candidates.append(raw[len(prefix) :])
return list(dict.fromkeys(candidates))
def _tool_repair_prompt(
*, tool_name: str, prefix: str, input_schema: dict[str, Any] | None
) -> str:
schema_text = json.dumps(input_schema or {"type": "object"}, separators=(",", ":"))
return (
"A streamed tool call was interrupted while writing JSON arguments.\n"
f"Tool name: {tool_name}\n"
f"JSON schema: {schema_text}\n"
f"Already emitted JSON prefix: {prefix}\n\n"
"Return only the exact missing JSON suffix needed to complete the same object. "
"Do not repeat the prefix. Do not include markdown or explanation."
)
+341 -76
View File
@@ -23,6 +23,20 @@ from core.anthropic.native_sse_block_policy import (
NativeSseBlockPolicyState,
transform_native_sse_block_event,
)
from core.anthropic.stream_contracts import parse_sse_text
from core.anthropic.stream_recovery import (
EARLY_TRANSPARENT_RETRIES,
MIDSTREAM_RECOVERY_ATTEMPTS,
RecoveryHoldbackBuffer,
TruncatedProviderStreamError,
accept_tool_json_repair,
continuation_suffix,
is_retryable_stream_error,
make_native_text_recovery_body,
make_native_tool_repair_body,
parse_complete_tool_input,
tool_schemas_by_name,
)
from core.trace import provider_native_messages_body_snapshot, trace_event
from providers.base import BaseProvider, ProviderConfig
from providers.error_mapping import (
@@ -290,6 +304,19 @@ class AnthropicMessagesTransport(BaseProvider):
)
return base_message
async def _validated_stream_send(
self, body: dict, *, req_tag: str
) -> httpx.Response:
"""Send request and raise mapped HTTP errors before yielding body chunks."""
send_response = await self._send_stream_request(body)
if send_response.status_code != 200:
try:
await self._raise_for_status(send_response, req_tag=req_tag)
finally:
if not send_response.is_closed:
await _maybe_await_aclose(send_response)
return send_response
def _emit_error_events(
self,
*,
@@ -344,6 +371,168 @@ class AnthropicMessagesTransport(BaseProvider):
if output_event is not None:
yield output_event
async def _collect_native_recovery_text(
self,
body: dict[str, Any],
*,
req_tag: str,
thinking_enabled: bool,
) -> tuple[str, str]:
"""Collect text/thinking from an internal native recovery request."""
last_error: Exception | None = None
for attempt in range(MIDSTREAM_RECOVERY_ATTEMPTS):
response: httpx.Response | None = None
try:
response = await self._global_rate_limiter.execute_with_retry(
self._validated_stream_send, body, req_tag=req_tag
)
state = self._new_stream_state(None, thinking_enabled=thinking_enabled)
chunks = [
chunk
async for chunk in self._iter_stream_chunks(
response,
state=state,
thinking_enabled=thinking_enabled,
)
]
text_parts: list[str] = []
thinking_parts: list[str] = []
for event in parse_sse_text("".join(chunks)):
delta = event.data.get("delta")
if not isinstance(delta, dict):
continue
text = delta.get("text")
if isinstance(text, str):
text_parts.append(text)
thinking = delta.get("thinking")
if isinstance(thinking, str):
thinking_parts.append(thinking)
return "".join(text_parts), "".join(thinking_parts)
except Exception as error:
last_error = error
if not is_retryable_stream_error(error):
raise
trace_event(
stage="provider",
event="provider.recovery.retry",
source="provider",
provider=self._provider_name,
recovery_kind="native_text",
attempt=attempt + 1,
max_attempts=MIDSTREAM_RECOVERY_ATTEMPTS,
exc_type=type(error).__name__,
)
finally:
if response is not None and not response.is_closed:
await _maybe_await_aclose(response)
if last_error is not None:
raise last_error
return "", ""
async def _native_recovery_events(
self,
*,
body: dict[str, Any],
request: Any,
tracker: EmittedNativeSseTracker,
error: Exception,
request_id: str | None,
req_tag: str,
thinking_enabled: bool,
) -> list[str] | None:
if not is_retryable_stream_error(error):
return None
schemas = tool_schemas_by_name(request)
if tracker.has_tool_block():
repair_events: list[str] = []
for index, block in enumerate(tracker.tool_blocks()):
if (
block.tool_id
and block.name
and parse_complete_tool_input(block.content, block.name, schemas)
is not None
):
continue
schema = schemas.get(block.name)
recovery_body = make_native_tool_repair_body(
body,
tool_name=block.name,
prefix=block.content,
input_schema=schema.input_schema if schema is not None else None,
)
accepted_suffix: str | None = None
for attempt in range(MIDSTREAM_RECOVERY_ATTEMPTS):
text, _ = await self._collect_native_recovery_text(
recovery_body,
req_tag=req_tag,
thinking_enabled=thinking_enabled,
)
repair = accept_tool_json_repair(
block.content,
text,
tool_name=block.name,
schemas=schemas,
)
if repair is not None:
accepted_suffix = repair.suffix
trace_event(
stage="provider",
event="provider.recovery.tool_repaired",
source="provider",
provider=self._provider_name,
tool_name=block.name,
attempt=attempt + 1,
)
break
if accepted_suffix is None:
return None
repair_events.extend(
tracker.append_tool_repair_suffix(index, accepted_suffix)
)
if not tracker.can_salvage_tool_use(schemas):
return None
events = list(repair_events)
events.extend(tracker.iter_success_tail("tool_use"))
trace_event(
stage="provider",
event="provider.recovery.tool_salvaged",
source="provider",
provider=self._provider_name,
request_id=request_id,
)
return events
partial_text = tracker.emitted_text()
partial_thinking = tracker.emitted_thinking()
if not partial_text and not partial_thinking:
return None
recovery_body = make_native_text_recovery_body(body, partial_text)
text, thinking = await self._collect_native_recovery_text(
recovery_body,
req_tag=req_tag,
thinking_enabled=thinking_enabled,
)
text_suffix = continuation_suffix(partial_text, text)
thinking_suffix = continuation_suffix(partial_thinking, thinking)
events: list[str] = []
if thinking_suffix:
events.extend(tracker.append_thinking_suffix(thinking_suffix))
if text_suffix:
events.extend(tracker.append_text_suffix(text_suffix))
if not events:
return None
events.extend(tracker.iter_success_tail("end_turn"))
trace_event(
stage="provider",
event="provider.recovery.continued",
source="provider",
provider=self._provider_name,
request_id=request_id,
)
return events
async def stream_response(
self,
request: Any,
@@ -374,87 +563,163 @@ class AnthropicMessagesTransport(BaseProvider):
sent_any_event = False
state = self._new_stream_state(request, thinking_enabled=thinking_enabled)
emitted_tracker = EmittedNativeSseTracker()
holdback = RecoveryHoldbackBuffer()
async with self._global_rate_limiter.concurrency_slot():
try:
async def _validated_stream_send() -> httpx.Response:
"""Send request; retries apply to 429/5xx raises after structured logging."""
send_response = await self._send_stream_request(body)
if send_response.status_code != 200:
try:
await self._raise_for_status(send_response, req_tag=req_tag)
finally:
if not send_response.is_closed:
await _maybe_await_aclose(send_response)
return send_response
response = await self._global_rate_limiter.execute_with_retry(
_validated_stream_send
)
chunk_count = 0
chunk_bytes = 0
async for chunk in self._iter_stream_chunks(
response,
state=state,
thinking_enabled=thinking_enabled,
):
chunk_count += 1
chunk_bytes += len(chunk.encode("utf-8", errors="replace"))
sent_any_event = True
emitted_tracker.feed(chunk)
yield chunk
trace_event(
stage="provider",
event="provider.response.completed",
source="provider",
provider=self._provider_name,
gateway_model=request.model,
sse_chunks_out=chunk_count,
sse_bytes_out=chunk_bytes,
)
except Exception as error:
if not isinstance(error, httpx.HTTPStatusError):
self._log_stream_transport_error(
tag, req_tag, error, request_id=request_id
early_retries = 0
while True:
stream_opened = False
try:
response = await self._global_rate_limiter.execute_with_retry(
self._validated_stream_send, body, req_tag=req_tag
)
error_message = self._get_error_message(error, request_id)
stream_opened = True
if response is not None and not response.is_closed:
await _maybe_await_aclose(response)
chunk_count = 0
chunk_bytes = 0
trace_event(
stage="provider",
event="provider.response.error",
source="provider",
provider=self._provider_name,
error_message=error_message,
exc_type=type(error).__name__,
mid_stream=sent_any_event,
)
if sent_any_event:
for event in emitted_tracker.iter_close_unclosed_blocks():
yield event
for event in emitted_tracker.iter_midstream_error_tail(
error_message,
request=request,
input_tokens=input_tokens,
log_raw_sse_events=self._config.log_raw_sse_events,
async for chunk in self._iter_stream_chunks(
response,
state=state,
thinking_enabled=thinking_enabled,
):
chunk_count += 1
chunk_bytes += len(chunk.encode("utf-8", errors="replace"))
emitted_tracker.feed(chunk)
for event in holdback.push(chunk):
sent_any_event = True
yield event
if not emitted_tracker.has_terminal_message():
raise TruncatedProviderStreamError(
"Provider stream ended without message_stop."
)
trace_event(
stage="provider",
event="provider.response.completed",
source="provider",
provider=self._provider_name,
gateway_model=request.model,
sse_chunks_out=chunk_count,
sse_bytes_out=chunk_bytes,
)
for event in holdback.flush():
sent_any_event = True
yield event
else:
for event in self._emit_error_events(
request=request,
input_tokens=input_tokens,
return
except Exception as error:
committed = holdback.committed
generated_output = emitted_tracker.has_content_block()
complete_tool_salvageable = (
generated_output
and emitted_tracker.can_salvage_tool_use(
tool_schemas_by_name(request)
)
)
if (
not committed
and stream_opened
and is_retryable_stream_error(error)
and not complete_tool_salvageable
and early_retries < EARLY_TRANSPARENT_RETRIES
):
early_retries += 1
holdback.discard()
holdback = RecoveryHoldbackBuffer()
if response is not None and not response.is_closed:
await _maybe_await_aclose(response)
response = None
state = self._new_stream_state(
request, thinking_enabled=thinking_enabled
)
emitted_tracker = EmittedNativeSseTracker()
sent_any_event = False
trace_event(
stage="provider",
event="provider.recovery.early_retry",
source="provider",
provider=self._provider_name,
request_id=request_id,
attempt=early_retries,
max_attempts=EARLY_TRANSPARENT_RETRIES,
exc_type=type(error).__name__,
)
continue
if generated_output and is_retryable_stream_error(error):
try:
recovery_events = await self._native_recovery_events(
body=body,
request=request,
tracker=emitted_tracker,
error=error,
request_id=request_id,
req_tag=req_tag,
thinking_enabled=thinking_enabled,
)
except Exception as recovery_error:
trace_event(
stage="provider",
event="provider.recovery.failed",
source="provider",
provider=self._provider_name,
request_id=request_id,
exc_type=type(recovery_error).__name__,
)
recovery_events = None
if recovery_events is not None:
if not committed:
for event in holdback.flush():
sent_any_event = True
yield event
for event in recovery_events:
yield event
return
if not isinstance(error, httpx.HTTPStatusError):
self._log_stream_transport_error(
tag, req_tag, error, request_id=request_id
)
error_message = self._get_error_message(error, request_id)
if response is not None and not response.is_closed:
await _maybe_await_aclose(response)
trace_event(
stage="provider",
event="provider.response.error",
source="provider",
provider=self._provider_name,
error_message=error_message,
sent_any_event=False,
):
yield event
return
finally:
if response is not None and not response.is_closed:
await _maybe_await_aclose(response)
exc_type=type(error).__name__,
mid_stream=sent_any_event or committed or holdback.has_buffered,
)
if committed or holdback.has_buffered:
if not committed:
for event in holdback.flush():
sent_any_event = True
yield event
for event in emitted_tracker.iter_close_unclosed_blocks():
yield event
for event in emitted_tracker.iter_midstream_error_tail(
error_message,
request=request,
input_tokens=input_tokens,
log_raw_sse_events=self._config.log_raw_sse_events,
):
yield event
else:
holdback.discard()
for event in self._emit_error_events(
request=request,
input_tokens=input_tokens,
error_message=error_message,
sent_any_event=False,
):
yield event
return
finally:
if response is not None and not response.is_closed:
await _maybe_await_aclose(response)
+470 -129
View File
@@ -22,6 +22,19 @@ from core.anthropic import (
ThinkTagParser,
map_stop_reason,
)
from core.anthropic.stream_recovery import (
EARLY_TRANSPARENT_RETRIES,
MIDSTREAM_RECOVERY_ATTEMPTS,
RecoveryHoldbackBuffer,
TruncatedProviderStreamError,
accept_tool_json_repair,
continuation_suffix,
is_retryable_stream_error,
make_openai_text_recovery_body,
make_openai_tool_repair_body,
parse_complete_tool_input,
tool_schemas_by_name,
)
from core.trace import provider_chat_body_snapshot, trace_event
from providers.base import BaseProvider, ProviderConfig
from providers.error_mapping import (
@@ -367,6 +380,227 @@ class OpenAIChatTransport(BaseProvider):
)
tool_argument_alias_buffers.pop(tool_index, None)
def _has_committed_sse_output(self, sse: SSEBuilder) -> bool:
return (
sse.blocks.text_index != -1
or sse.blocks.thinking_index != -1
or sse.blocks.has_emitted_tool_block()
)
def _openai_error_message(self, error: Exception, request_id: str | None) -> str:
mapped_error = map_error(error, rate_limiter=self._global_rate_limiter)
return user_visible_message_for_mapped_provider_error(
mapped_error,
provider_name=self._provider_name,
read_timeout_s=self._config.http_read_timeout,
detail=extract_provider_error_detail(error),
request_id=request_id,
)
async def _collect_recovery_text(self, body: dict[str, Any]) -> tuple[str, str]:
"""Collect text/reasoning from an internal recovery request."""
last_error: Exception | None = None
for attempt in range(MIDSTREAM_RECOVERY_ATTEMPTS):
try:
stream, _ = await self._create_stream(body)
text_parts: list[str] = []
thinking_parts: list[str] = []
async for chunk in stream:
if not getattr(chunk, "choices", None):
continue
choice = chunk.choices[0]
delta = choice.delta
if delta is None:
continue
reasoning = getattr(delta, "reasoning_content", None)
if isinstance(reasoning, str) and reasoning:
thinking_parts.append(reasoning)
content = getattr(delta, "content", None)
if isinstance(content, str) and content:
text_parts.append(content)
return "".join(text_parts), "".join(thinking_parts)
except Exception as error:
last_error = error
if not is_retryable_stream_error(error):
raise
trace_event(
stage="provider",
event="provider.recovery.retry",
source="provider",
provider=self._provider_name,
recovery_kind="openai_text",
attempt=attempt + 1,
max_attempts=MIDSTREAM_RECOVERY_ATTEMPTS,
exc_type=type(error).__name__,
)
if last_error is not None:
raise last_error
return "", ""
def _started_tool_states(self, sse: SSEBuilder) -> list[tuple[int, Any]]:
return [
(tool_index, state)
for tool_index, state in sse.blocks.tool_states.items()
if state.started
]
def _all_started_tools_complete(self, sse: SSEBuilder, request: Any) -> bool:
schemas = tool_schemas_by_name(request)
started = self._started_tool_states(sse)
if not started:
return False
for _, state in started:
raw = "".join(state.contents)
if parse_complete_tool_input(raw, state.name, schemas) is None:
return False
return True
async def _repair_openai_tool_args(
self,
*,
body: dict[str, Any],
sse: SSEBuilder,
request: Any,
tool_argument_alias_buffers: dict[int, str],
) -> list[str] | None:
schemas = tool_schemas_by_name(request)
events: list[str] = []
for tool_index, state in self._started_tool_states(sse):
emitted_prefix = "".join(state.contents)
repair_prefix = emitted_prefix
if not repair_prefix and state.name == "Task" and state.task_arg_buffer:
repair_prefix = state.task_arg_buffer
if not repair_prefix and tool_index in tool_argument_alias_buffers:
repair_prefix = tool_argument_alias_buffers[tool_index]
if (
parse_complete_tool_input(repair_prefix, state.name, schemas)
is not None
):
if not emitted_prefix:
yield_text = repair_prefix
if yield_text:
events.append(sse.emit_tool_delta(tool_index, yield_text))
continue
schema = schemas.get(state.name)
recovery_body = make_openai_tool_repair_body(
body,
tool_name=state.name,
prefix=repair_prefix,
input_schema=schema.input_schema if schema is not None else None,
)
accepted_suffix: str | None = None
for attempt in range(MIDSTREAM_RECOVERY_ATTEMPTS):
text, _ = await self._collect_recovery_text(recovery_body)
repair = accept_tool_json_repair(
repair_prefix,
text,
tool_name=state.name,
schemas=schemas,
)
if repair is not None:
accepted_suffix = repair.suffix
trace_event(
stage="provider",
event="provider.recovery.tool_repaired",
source="provider",
provider=self._provider_name,
tool_name=state.name,
attempt=attempt + 1,
)
break
if accepted_suffix is None:
return None
to_emit = (
accepted_suffix if emitted_prefix else repair_prefix + accepted_suffix
)
if to_emit:
events.append(sse.emit_tool_delta(tool_index, to_emit))
if not self._all_started_tools_complete(sse, request):
return None
return events
async def _openai_recovery_events(
self,
*,
body: dict[str, Any],
sse: SSEBuilder,
request: Any,
request_id: str | None,
error: Exception,
tool_argument_alias_buffers: dict[int, str],
) -> list[str] | None:
if not is_retryable_stream_error(error):
return None
if sse.blocks.has_emitted_tool_block():
if not self._all_started_tools_complete(sse, request):
repair_events = await self._repair_openai_tool_args(
body=body,
sse=sse,
request=request,
tool_argument_alias_buffers=tool_argument_alias_buffers,
)
if repair_events is None:
return None
else:
repair_events = []
events = list(repair_events)
events.extend(sse.close_all_blocks())
events.append(sse.message_delta("tool_use", sse.estimate_output_tokens()))
events.append(sse.message_stop())
trace_event(
stage="provider",
event="provider.recovery.tool_salvaged",
source="provider",
provider=self._provider_name,
request_id=request_id,
)
return events
partial_text = sse.accumulated_text
partial_thinking = sse.accumulated_reasoning
if not partial_text and not partial_thinking:
return None
recovery_body = make_openai_text_recovery_body(body, partial_text)
text, thinking = await self._collect_recovery_text(recovery_body)
text_suffix = continuation_suffix(partial_text, text)
thinking_suffix = continuation_suffix(partial_thinking, thinking)
events: list[str] = []
if thinking_suffix:
for event in sse.ensure_thinking_block():
events.append(event)
events.append(sse.emit_thinking_delta(thinking_suffix))
if text_suffix:
for event in sse.ensure_text_block():
events.append(event)
events.append(sse.emit_text_delta(text_suffix))
if not events:
return None
events.extend(sse.close_all_blocks())
events.append(sse.message_delta("end_turn", sse.estimate_output_tokens()))
events.append(sse.message_stop())
trace_event(
stage="provider",
event="provider.recovery.continued",
source="provider",
provider=self._provider_name,
request_id=request_id,
)
return events
def _emit_openai_error_tail(
self, sse: SSEBuilder, error_message: str
) -> Iterator[str]:
yield from sse.close_all_blocks()
if sse.blocks.has_emitted_tool_block():
yield sse.emit_top_level_error(error_message)
else:
yield from sse.emit_error(error_message)
yield sse.message_delta("end_turn", 1)
yield sse.message_stop()
async def stream_response(
self,
request: Any,
@@ -393,12 +627,24 @@ class OpenAIChatTransport(BaseProvider):
"""Shared streaming implementation."""
tag = self._provider_name
message_id = f"msg_{uuid.uuid4()}"
sse = SSEBuilder(
message_id,
request.model,
input_tokens,
log_raw_events=self._config.log_raw_sse_events,
)
def new_sse_builder() -> SSEBuilder:
return SSEBuilder(
message_id,
request.model,
input_tokens,
log_raw_events=self._config.log_raw_sse_events,
)
sse = new_sse_builder()
holdback = RecoveryHoldbackBuffer()
def hold_event(event: str) -> Iterator[str]:
yield from holdback.push(event)
def hold_events(events: Iterator[str]) -> Iterator[str]:
for event in events:
yield from hold_event(event)
body = self._build_request_body(request, thinking_enabled=thinking_enabled)
thinking_enabled = self._is_thinking_enabled(request, thinking_enabled)
@@ -425,122 +671,204 @@ class OpenAIChatTransport(BaseProvider):
tool_argument_alias_buffers: dict[int, str] = {}
async with self._global_rate_limiter.concurrency_slot():
try:
stream, body = await self._create_stream(body)
tool_argument_aliases = self._tool_argument_aliases(body)
async for chunk in stream:
if getattr(chunk, "usage", None):
usage_info = chunk.usage
early_retries = 0
while True:
stream_opened = False
try:
stream, body = await self._create_stream(body)
stream_opened = True
tool_argument_aliases = self._tool_argument_aliases(body)
async for chunk in stream:
if getattr(chunk, "usage", None):
usage_info = chunk.usage
if not chunk.choices:
continue
if not chunk.choices:
continue
choice = chunk.choices[0]
delta = choice.delta
if delta is None:
continue
choice = chunk.choices[0]
delta = choice.delta
if delta is None:
continue
if choice.finish_reason:
finish_reason = choice.finish_reason
logger.debug("{} finish_reason: {}", tag, finish_reason)
if choice.finish_reason:
finish_reason = choice.finish_reason
logger.debug("{} finish_reason: {}", tag, finish_reason)
# Handle reasoning_content (OpenAI extended format)
reasoning = getattr(delta, "reasoning_content", None)
if thinking_enabled and reasoning:
for event in sse.ensure_thinking_block():
yield event
yield sse.emit_thinking_delta(reasoning)
# Provider-specific extra reasoning (e.g. OpenRouter reasoning_details)
for event in self._handle_extra_reasoning(
delta,
sse,
thinking_enabled=thinking_enabled,
):
yield event
# Handle text content
if delta.content:
for part in think_parser.feed(delta.content):
if part.type == ContentType.THINKING:
if not thinking_enabled:
continue
for event in sse.ensure_thinking_block():
yield event
yield sse.emit_thinking_delta(part.content)
else:
filtered_text, detected_tools = heuristic_parser.feed(
part.content
)
if filtered_text:
for event in sse.ensure_text_block():
yield event
yield sse.emit_text_delta(filtered_text)
for tool_use in detected_tools:
for event in _iter_heuristic_tool_use_sse(
sse, tool_use
):
yield event
# Handle native tool calls
if delta.tool_calls:
for event in sse.close_content_blocks():
yield event
for tc in delta.tool_calls:
extra_content = _tool_call_extra_content(tc)
tc_info = {
"index": tc.index,
"id": tc.id,
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
}
if extra_content:
tc_info["extra_content"] = extra_content
for event in self._process_tool_call(
tc_info,
sse,
tool_argument_aliases=tool_argument_aliases,
tool_argument_alias_buffers=tool_argument_alias_buffers,
):
# Handle reasoning_content (OpenAI extended format)
reasoning = getattr(delta, "reasoning_content", None)
if thinking_enabled and reasoning:
for event in hold_events(sse.ensure_thinking_block()):
yield event
for event in hold_event(sse.emit_thinking_delta(reasoning)):
yield event
except asyncio.CancelledError, GeneratorExit:
raise
except Exception as e:
self._log_stream_transport_error(tag, req_tag, e, request_id=request_id)
mapped_e = map_error(e, rate_limiter=self._global_rate_limiter)
base_message = user_visible_message_for_mapped_provider_error(
mapped_e,
provider_name=tag,
read_timeout_s=self._config.http_read_timeout,
detail=extract_provider_error_detail(e),
request_id=request_id,
)
error_message = base_message
trace_event(
stage="provider",
event="provider.response.error",
source="provider",
provider=tag,
error_message=error_message,
mapped_error_type=type(mapped_e).__name__,
)
for event in sse.close_all_blocks():
yield event
if sse.blocks.has_emitted_tool_block():
# Avoid a second assistant text block after an emitted tool_use, which
# breaks OpenAI history replay (issue #206) when Claude Code stores it.
yield sse.emit_top_level_error(error_message)
else:
for event in sse.emit_error(error_message):
# Provider-specific extra reasoning (e.g. OpenRouter reasoning_details)
for event in self._handle_extra_reasoning(
delta,
sse,
thinking_enabled=thinking_enabled,
):
for out_event in hold_event(event):
yield out_event
# Handle text content
if delta.content:
for part in think_parser.feed(delta.content):
if part.type == ContentType.THINKING:
if not thinking_enabled:
continue
for event in hold_events(
sse.ensure_thinking_block()
):
yield event
for event in hold_event(
sse.emit_thinking_delta(part.content)
):
yield event
else:
(
filtered_text,
detected_tools,
) = heuristic_parser.feed(part.content)
if filtered_text:
for event in hold_events(
sse.ensure_text_block()
):
yield event
for event in hold_event(
sse.emit_text_delta(filtered_text)
):
yield event
for tool_use in detected_tools:
for event in _iter_heuristic_tool_use_sse(
sse, tool_use
):
for out_event in hold_event(event):
yield out_event
# Handle native tool calls
if delta.tool_calls:
for event in hold_events(sse.close_content_blocks()):
yield event
for tc in delta.tool_calls:
extra_content = _tool_call_extra_content(tc)
tc_info = {
"index": tc.index,
"id": tc.id,
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
}
if extra_content:
tc_info["extra_content"] = extra_content
for event in self._process_tool_call(
tc_info,
sse,
tool_argument_aliases=tool_argument_aliases,
tool_argument_alias_buffers=tool_argument_alias_buffers,
):
for out_event in hold_event(event):
yield out_event
if finish_reason is None:
raise TruncatedProviderStreamError(
"Provider stream ended without finish_reason."
)
break
except asyncio.CancelledError, GeneratorExit:
raise
except Exception as e:
committed = holdback.committed
generated_output = self._has_committed_sse_output(sse)
complete_tool_salvageable = (
generated_output
and sse.blocks.has_emitted_tool_block()
and self._all_started_tools_complete(sse, request)
)
if (
not committed
and stream_opened
and is_retryable_stream_error(e)
and not complete_tool_salvageable
and early_retries < EARLY_TRANSPARENT_RETRIES
):
early_retries += 1
holdback.discard()
holdback = RecoveryHoldbackBuffer()
sse = new_sse_builder()
think_parser = ThinkTagParser()
heuristic_parser = HeuristicToolParser()
finish_reason = None
usage_info = None
tool_argument_aliases = {}
tool_argument_alias_buffers = {}
trace_event(
stage="provider",
event="provider.recovery.early_retry",
source="provider",
provider=tag,
request_id=request_id,
attempt=early_retries,
max_attempts=EARLY_TRANSPARENT_RETRIES,
exc_type=type(e).__name__,
)
continue
if generated_output and is_retryable_stream_error(e):
try:
recovery_events = await self._openai_recovery_events(
body=body,
sse=sse,
request=request,
request_id=request_id,
error=e,
tool_argument_alias_buffers=tool_argument_alias_buffers,
)
except Exception as recovery_error:
trace_event(
stage="provider",
event="provider.recovery.failed",
source="provider",
provider=tag,
request_id=request_id,
exc_type=type(recovery_error).__name__,
)
recovery_events = None
if recovery_events is not None:
if not committed:
for event in holdback.flush():
yield event
for event in recovery_events:
yield event
return
self._log_stream_transport_error(
tag, req_tag, e, request_id=request_id
)
error_message = self._openai_error_message(e, request_id)
trace_event(
stage="provider",
event="provider.response.error",
source="provider",
provider=tag,
error_message=error_message,
mapped_error_type=type(
map_error(e, rate_limiter=self._global_rate_limiter)
).__name__,
)
if not committed and holdback.has_buffered:
for event in holdback.flush():
yield event
elif not committed:
holdback.discard()
sse = new_sse_builder()
for event in self._emit_openai_error_tail(sse, error_message):
yield event
yield sse.message_delta("end_turn", 1)
yield sse.message_stop()
return
return
# Flush remaining content
remaining = think_parser.flush()
@@ -549,17 +877,20 @@ class OpenAIChatTransport(BaseProvider):
if not thinking_enabled:
remaining = None
else:
for event in sse.ensure_thinking_block():
for event in hold_events(sse.ensure_thinking_block()):
yield event
for event in hold_event(sse.emit_thinking_delta(remaining.content)):
yield event
yield sse.emit_thinking_delta(remaining.content)
if remaining and remaining.type == ContentType.TEXT:
for event in sse.ensure_text_block():
for event in hold_events(sse.ensure_text_block()):
yield event
for event in hold_event(sse.emit_text_delta(remaining.content)):
yield event
yield sse.emit_text_delta(remaining.content)
for tool_use in heuristic_parser.flush():
for event in _iter_heuristic_tool_use_sse(sse, tool_use):
yield event
for out_event in hold_event(event):
yield out_event
has_started_tool = any(s.started for s in sse.blocks.tool_states.values())
has_content_blocks = (
@@ -568,9 +899,10 @@ class OpenAIChatTransport(BaseProvider):
or has_started_tool
)
if not has_content_blocks:
for event in sse.ensure_text_block():
for event in hold_events(sse.ensure_text_block()):
yield event
for event in hold_event(sse.emit_text_delta(" ")):
yield event
yield sse.emit_text_delta(" ")
elif (
not has_started_tool
and not sse.accumulated_text.strip()
@@ -579,19 +911,22 @@ class OpenAIChatTransport(BaseProvider):
# Some OpenAI-compatible models (e.g. NIM reasoning templates) stream only
# ``reasoning_content`` with no ``content``; emit a minimal text block so
# clients and smoke ``text_content()`` see a completed assistant message.
for event in sse.ensure_text_block():
for event in hold_events(sse.ensure_text_block()):
yield event
for event in hold_event(sse.emit_text_delta(" ")):
yield event
yield sse.emit_text_delta(" ")
for event in self._flush_tool_argument_alias_buffers(
sse, tool_argument_aliases, tool_argument_alias_buffers
):
yield event
for out_event in hold_event(event):
yield out_event
for event in self._flush_task_arg_buffers(sse):
yield event
for out_event in hold_event(event):
yield out_event
for event in sse.close_all_blocks():
for event in hold_events(sse.close_all_blocks()):
yield event
completion = (
@@ -621,5 +956,11 @@ class OpenAIChatTransport(BaseProvider):
output_tokens=output_tokens,
prompt_tokens_estimate=input_tokens,
)
yield sse.message_delta(map_stop_reason(finish_reason), output_tokens)
yield sse.message_stop()
for event in hold_event(
sse.message_delta(map_stop_reason(finish_reason), output_tokens)
):
yield event
for event in hold_event(sse.message_stop()):
yield event
for event in holdback.flush():
yield event
+1
View File
@@ -22,6 +22,7 @@ dependencies = [
"openai>=2.38.0",
"loguru>=0.7.0",
"aiohttp>=3.13.4",
"jsonschema>=4.25.0",
]
[project.scripts]
@@ -0,0 +1,104 @@
"""Unit tests for resilient stream recovery helpers."""
import httpx
from core.anthropic.stream_recovery import (
RecoveryHoldbackBuffer,
ToolSchema,
accept_tool_json_repair,
continuation_suffix,
is_retryable_stream_error,
)
def test_retryable_stream_error_classifies_transport_and_http_status() -> None:
assert is_retryable_stream_error(httpx.ReadError("cut off"))
request = httpx.Request("GET", "https://example.test")
assert is_retryable_stream_error(
httpx.HTTPStatusError(
"server error", request=request, response=httpx.Response(503)
)
)
assert not is_retryable_stream_error(
httpx.HTTPStatusError(
"bad request", request=request, response=httpx.Response(400)
)
)
def test_continuation_suffix_trims_overlap() -> None:
assert continuation_suffix("hello wor", "world") == "ld"
assert continuation_suffix("alpha", "alpha beta") == " beta"
assert continuation_suffix("", "fresh") == "fresh"
def test_tool_json_repair_requires_append_only_schema_valid_json() -> None:
schemas = {
"Echo": ToolSchema(
name="Echo",
input_schema={
"type": "object",
"properties": {"message": {"type": "string"}},
"required": ["message"],
"additionalProperties": False,
},
)
}
accepted = accept_tool_json_repair(
'{"message":',
'"ok"}',
tool_name="Echo",
schemas=schemas,
)
assert accepted is not None
assert accepted.suffix == '"ok"}'
assert accepted.parsed_input == {"message": "ok"}
assert (
accept_tool_json_repair(
'{"message":',
"1}",
tool_name="Echo",
schemas=schemas,
)
is None
)
def test_holdback_buffers_until_delay_then_commits() -> None:
now = [10.0]
holdback = RecoveryHoldbackBuffer(holdback_seconds=0.75, now=lambda: now[0])
assert holdback.push("event: content_block_start\n\n") == []
now[0] += 0.74
assert holdback.push("event: content_block_delta\n\n") == []
assert not holdback.committed
now[0] += 0.01
flushed = holdback.push("event: content_block_stop\n\n")
assert flushed == [
"event: content_block_start\n\n",
"event: content_block_delta\n\n",
"event: content_block_stop\n\n",
]
assert holdback.committed
assert holdback.push("event: message_stop\n\n") == ["event: message_stop\n\n"]
def test_holdback_flushes_at_internal_buffer_cap() -> None:
holdback = RecoveryHoldbackBuffer(max_bytes=5, now=lambda: 1.0)
assert holdback.push("ab") == []
assert holdback.push("cde") == ["ab", "cde"]
assert holdback.committed
def test_holdback_discard_drops_uncommitted_events() -> None:
holdback = RecoveryHoldbackBuffer(now=lambda: 1.0)
assert holdback.push("hidden") == []
holdback.discard()
assert holdback.flush() == []
+258 -1
View File
@@ -51,11 +51,13 @@ class FakeResponse:
lines=None,
text="",
raise_after_line_index: int | None = None,
raise_error: Exception | None = None,
):
self.status_code = status_code
self._lines = lines or []
self._text = text
self._raise_after_line_index = raise_after_line_index
self._raise_error = raise_error or RuntimeError("mid-stream failure")
self.is_closed = False
self.request = httpx.Request("POST", "https://example.test/v1/messages")
self.headers = httpx.Headers()
@@ -67,7 +69,7 @@ class FakeResponse:
self._raise_after_line_index is not None
and i >= self._raise_after_line_index
):
raise RuntimeError("mid-stream failure")
raise self._raise_error
async def aread(self):
return self._text.encode()
@@ -89,6 +91,13 @@ class FakeResponse:
yield data[offset : offset + chunk_size]
def _lines_from_events(*events: str) -> list[str]:
lines: list[str] = []
for event in events:
lines.extend(event.splitlines())
return lines
@pytest.fixture
def provider_config():
return ProviderConfig(
@@ -175,6 +184,9 @@ async def test_stream_uses_retry_builds_request_and_closes_response(
"event: message_start",
'data: {"type":"message_start"}',
"",
"event: message_stop",
'data: {"type":"message_stop"}',
"",
]
)
@@ -195,6 +207,9 @@ async def test_stream_uses_retry_builds_request_and_closes_response(
"event: message_start\n",
'data: {"type":"message_start"}\n',
"\n",
"event: message_stop\n",
'data: {"type":"message_stop"}\n',
"\n",
]
assert response.is_closed
assert mock_build.call_args.args[:2] == ("POST", "/messages")
@@ -293,3 +308,245 @@ async def test_midstream_error_closes_open_block_and_uses_fresh_content_index(
assert event_index(starts[0]) == 0
assert event_index(starts[-1]) == 1
assert {event_index(e) for e in parsed if e.event == "content_block_stop"} == {0, 1}
@pytest.mark.asyncio
async def test_clean_eof_after_complete_native_tool_call_salvages_tool_use(
provider_config,
):
"""Native stream EOF after complete tool args gets a deterministic tool_use tail."""
provider = NativeProvider(provider_config)
req = MockRequest()
msg_start = format_sse_event(
"message_start",
{
"type": "message_start",
"message": {
"id": "msg_tool_eof",
"type": "message",
"role": "assistant",
"content": [],
"model": "test-model",
"stop_reason": None,
"stop_sequence": None,
"usage": {"input_tokens": 1, "output_tokens": 1},
},
},
)
block_start = format_sse_event(
"content_block_start",
{
"type": "content_block_start",
"index": 0,
"content_block": {
"type": "tool_use",
"id": "toolu_eof",
"name": "echo_smoke",
"input": {},
},
},
)
args = format_sse_event(
"content_block_delta",
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "input_json_delta", "partial_json": "{}"},
},
)
lines: list[str] = []
for blob in (msg_start, block_start, args):
lines.extend(blob.splitlines())
response = FakeResponse(lines=lines)
with (
patch.object(provider._client, "build_request", return_value=MagicMock()),
patch.object(
provider._client,
"send",
new_callable=AsyncMock,
return_value=response,
),
):
events = [e async for e in provider.stream_response(req)]
parsed = parse_sse_text("".join(events))
assert parsed[-1].event == "message_stop"
assert any(
event.event == "message_delta"
and event.data.get("delta", {}).get("stop_reason") == "tool_use"
for event in parsed
)
assert not any(event.event == "error" for event in parsed)
@pytest.mark.asyncio
async def test_clean_eof_after_native_text_continues_with_overlap_trim(
provider_config,
):
"""Native text truncation is continued and overlap-trimmed."""
provider = NativeProvider(provider_config)
req = MockRequest()
msg_start = format_sse_event(
"message_start",
{
"type": "message_start",
"message": {
"id": "msg_text_eof",
"type": "message",
"role": "assistant",
"content": [],
"model": "test-model",
"stop_reason": None,
"stop_sequence": None,
"usage": {"input_tokens": 1, "output_tokens": 1},
},
},
)
block_start = format_sse_event(
"content_block_start",
{
"type": "content_block_start",
"index": 0,
"content_block": {"type": "text", "text": ""},
},
)
text_delta = format_sse_event(
"content_block_delta",
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": "hello wor"},
},
)
lines: list[str] = []
for blob in (msg_start, block_start, text_delta):
lines.extend(blob.splitlines())
response = FakeResponse(lines=lines)
with (
patch.object(provider._client, "build_request", return_value=MagicMock()),
patch.object(
provider._client,
"send",
new_callable=AsyncMock,
return_value=response,
),
patch.object(
provider,
"_collect_native_recovery_text",
new_callable=AsyncMock,
return_value=("world", ""),
),
):
events = [e async for e in provider.stream_response(req)]
parsed = parse_sse_text("".join(events))
text = "".join(
event.data.get("delta", {}).get("text", "")
for event in parsed
if event.event == "content_block_delta"
)
assert text == "hello world"
assert any(
event.event == "message_delta"
and event.data.get("delta", {}).get("stop_reason") == "end_turn"
for event in parsed
)
assert not any(event.event == "error" for event in parsed)
@pytest.mark.asyncio
async def test_precommit_native_holdback_retries_without_leaking_partial(
provider_config,
):
"""A retryable early cutoff before holdback commit is retried invisibly."""
provider = NativeProvider(provider_config)
req = MockRequest()
msg_start = format_sse_event(
"message_start",
{
"type": "message_start",
"message": {
"id": "msg_holdback",
"type": "message",
"role": "assistant",
"content": [],
"model": "test-model",
"stop_reason": None,
"stop_sequence": None,
"usage": {"input_tokens": 1, "output_tokens": 1},
},
},
)
block_start = format_sse_event(
"content_block_start",
{
"type": "content_block_start",
"index": 0,
"content_block": {"type": "text", "text": ""},
},
)
hidden_delta = format_sse_event(
"content_block_delta",
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": "hidden"},
},
)
visible_delta = format_sse_event(
"content_block_delta",
{
"type": "content_block_delta",
"index": 0,
"delta": {"type": "text_delta", "text": "visible"},
},
)
block_stop = format_sse_event(
"content_block_stop",
{"type": "content_block_stop", "index": 0},
)
message_delta = format_sse_event(
"message_delta",
{
"type": "message_delta",
"delta": {"stop_reason": "end_turn", "stop_sequence": None},
"usage": {"input_tokens": 1, "output_tokens": 1},
},
)
message_stop = format_sse_event("message_stop", {"type": "message_stop"})
first_lines = _lines_from_events(msg_start, block_start, hidden_delta)
first = FakeResponse(
lines=first_lines,
raise_after_line_index=len(first_lines) - 1,
raise_error=httpx.ReadError("early cutoff"),
)
second = FakeResponse(
lines=_lines_from_events(
msg_start,
block_start,
visible_delta,
block_stop,
message_delta,
message_stop,
),
)
with (
patch.object(provider._client, "build_request", return_value=MagicMock()),
patch.object(
provider._client,
"send",
new_callable=AsyncMock,
side_effect=[first, second],
) as mock_send,
):
events = [e async for e in provider.stream_response(req)]
event_text = "".join(events)
assert mock_send.await_count == 2
assert "hidden" not in event_text
assert "visible" in event_text
assert parse_sse_text(event_text)[-1].event == "message_stop"
@@ -41,6 +41,9 @@ async def test_native_stream_retries_on_http_429_then_streams(provider_config):
"event: message_start",
'data: {"type":"message_start"}',
"",
"event: message_stop",
'data: {"type":"message_stop"}',
"",
]
ok_response = FakeResponse(lines=ok_lines)
too_many = FakeResponse(status_code=429, text="rate limited")
@@ -75,6 +78,9 @@ async def test_native_stream_retries_on_http_429_then_streams(provider_config):
"event: message_start\n",
'data: {"type":"message_start"}\n',
"\n",
"event: message_stop\n",
'data: {"type":"message_stop"}\n',
"\n",
]
finally:
GlobalRateLimiter.reset_instance()
@@ -95,6 +101,9 @@ async def test_native_stream_retries_on_http_5xx_then_streams(
"event: message_start",
'data: {"type":"message_start"}',
"",
"event: message_stop",
'data: {"type":"message_stop"}',
"",
]
ok_response = FakeResponse(lines=ok_lines)
bad = FakeResponse(status_code=status_code, text="upstream error")
@@ -129,6 +138,9 @@ async def test_native_stream_retries_on_http_5xx_then_streams(
"event: message_start\n",
'data: {"type":"message_start"}\n',
"\n",
"event: message_stop\n",
'data: {"type":"message_stop"}\n',
"\n",
]
finally:
GlobalRateLimiter.reset_instance()
+9
View File
@@ -294,9 +294,18 @@ async def test_stream_response_thinking_reasoning_content(nim_provider):
)
]
mock_chunk.usage = None
stop_chunk = MagicMock()
stop_chunk.choices = [
MagicMock(
delta=MagicMock(content=None, reasoning_content=None, tool_calls=None),
finish_reason="stop",
)
]
stop_chunk.usage = None
async def mock_stream():
yield mock_chunk
yield stop_chunk
with patch.object(
nim_provider._client.chat.completions, "create", new_callable=AsyncMock
@@ -42,7 +42,7 @@ async def test_nim_stream_retries_on_openai_5xx_then_streams(status_code):
mock_chunk.choices = [
MagicMock(
delta=MagicMock(content="Hi", reasoning_content=""),
finish_reason=None,
finish_reason="stop",
)
]
mock_chunk.usage = None
+142
View File
@@ -600,6 +600,148 @@ class TestStreamingExceptionHandling:
assert "Request ID: REQ_TOOL_BODY" in event_text
_assert_error_not_in_text_deltas_after_tool(events, "bad after tool")
@pytest.mark.asyncio
async def test_clean_eof_after_complete_tool_call_salvages_tool_use(self):
"""A complete tool JSON payload missing finish_reason is committed as tool_use."""
provider = _make_provider()
request = _make_request()
tool_chunk = _make_tool_calls_chunk(
name="echo_smoke", arguments='{"message":"ok"}', tool_id="call_eof"
)
stream_mock = AsyncStreamMock([tool_chunk])
with patch.object(
provider._client.chat.completions,
"create",
new_callable=AsyncMock,
return_value=stream_mock,
):
events = await _collect_stream(provider, request)
parsed = parse_sse_text("".join(events))
assert parsed[-1].event == "message_stop"
assert any(
event.event == "message_delta"
and event.data.get("delta", {}).get("stop_reason") == "tool_use"
for event in parsed
)
assert not any(event.event == "error" for event in parsed)
@pytest.mark.asyncio
async def test_precommit_openai_holdback_retries_without_leaking_partial(self):
"""A retryable early cutoff before holdback commit is retried invisibly."""
provider = _make_provider()
request = _make_request()
first_stream = AsyncStreamMock(
[_make_chunk(content="hidden")],
error=httpx.ReadError("early cutoff"),
)
second_stream = AsyncStreamMock(
[
_make_chunk(content="visible"),
_make_chunk(finish_reason="stop"),
]
)
with patch.object(
provider._client.chat.completions,
"create",
new_callable=AsyncMock,
side_effect=[first_stream, second_stream],
) as mock_create:
events = await _collect_stream(provider, request)
event_text = "".join(events)
assert mock_create.await_count == 2
assert "hidden" not in event_text
assert "visible" in event_text
assert parse_sse_text(event_text)[-1].event == "message_stop"
@pytest.mark.asyncio
async def test_clean_eof_after_text_continues_with_overlap_trim(self):
"""A truncated text stream is continued and duplicate overlap is trimmed."""
provider = _make_provider()
request = _make_request()
stream_mock = AsyncStreamMock([_make_chunk(content="hello wor")])
with (
patch.object(
provider._client.chat.completions,
"create",
new_callable=AsyncMock,
return_value=stream_mock,
),
patch.object(
provider,
"_collect_recovery_text",
new_callable=AsyncMock,
return_value=("world", ""),
),
):
events = await _collect_stream(provider, request)
parsed = parse_sse_text("".join(events))
text = "".join(
event.data.get("delta", {}).get("text", "")
for event in parsed
if event.event == "content_block_delta"
)
assert text == "hello world"
assert any(
event.event == "message_delta"
and event.data.get("delta", {}).get("stop_reason") == "end_turn"
for event in parsed
)
assert not any(event.event == "error" for event in parsed)
@pytest.mark.asyncio
async def test_incomplete_tool_call_repair_appends_schema_valid_suffix(self):
"""A truncated tool JSON prefix is repaired append-only before tool_use tail."""
provider = _make_provider()
request = _make_request()
request.tools = [
SimpleNamespace(
name="echo_smoke",
description="Echo",
input_schema={
"type": "object",
"properties": {"message": {"type": "string"}},
"required": ["message"],
"additionalProperties": False,
},
)
]
tool_chunk = _make_tool_calls_chunk(
name="echo_smoke", arguments='{"message":', tool_id="call_repair"
)
stream_mock = AsyncStreamMock([tool_chunk])
with (
patch.object(
provider._client.chat.completions,
"create",
new_callable=AsyncMock,
return_value=stream_mock,
),
patch.object(
provider,
"_collect_recovery_text",
new_callable=AsyncMock,
return_value=('"ok"}', ""),
),
):
events = await _collect_stream(provider, request)
event_text = "".join(events)
parsed = parse_sse_text(event_text)
assert '"partial_json": "\\"ok\\"}"' in event_text
assert any(
event.event == "message_delta"
and event.data.get("delta", {}).get("stop_reason") == "tool_use"
for event in parsed
)
assert not any(event.event == "error" for event in parsed)
@pytest.mark.asyncio
async def test_stream_rate_limited_retries_via_execute_with_retry(self):
"""When rate limited, execute_with_retry handles retries transparently."""
+6
View File
@@ -197,6 +197,9 @@ async def test_stream_uses_post_messages_path(wafer_provider):
"event: message_start",
'data: {"type":"message_start"}',
"",
"event: message_stop",
'data: {"type":"message_stop"}',
"",
]
)
@@ -217,6 +220,9 @@ async def test_stream_uses_post_messages_path(wafer_provider):
"event: message_start\n",
'data: {"type":"message_start"}\n',
"\n",
"event: message_stop\n",
'data: {"type":"message_stop"}\n',
"\n",
]
assert response.is_closed
assert mock_build.call_args.args[:2] == ("POST", "/messages")
Generated
+108
View File
@@ -561,6 +561,7 @@ dependencies = [
{ name = "discord-py" },
{ name = "fastapi", extra = ["standard"] },
{ name = "httpx", extra = ["socks"] },
{ name = "jsonschema" },
{ name = "loguru" },
{ name = "markdown-it-py" },
{ name = "openai" },
@@ -604,6 +605,7 @@ requires-dist = [
{ name = "grpcio", marker = "extra == 'voice'", specifier = ">=1.80.0" },
{ name = "grpcio-tools", marker = "extra == 'voice'", specifier = ">=1.80.0" },
{ name = "httpx", extras = ["socks"], specifier = ">=0.28.1" },
{ name = "jsonschema", specifier = ">=4.25.0" },
{ name = "librosa", marker = "extra == 'voice-local'", specifier = ">=0.10.0" },
{ name = "loguru", specifier = ">=0.7.0" },
{ name = "markdown-it-py", specifier = ">=4.2.0" },
@@ -897,6 +899,33 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" },
]
[[package]]
name = "jsonschema"
version = "4.26.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "attrs" },
{ name = "jsonschema-specifications" },
{ name = "referencing" },
{ name = "rpds-py" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
]
[[package]]
name = "jsonschema-specifications"
version = "2025.9.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "referencing" },
]
sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
]
[[package]]
name = "lazy-loader"
version = "0.4"
@@ -1663,6 +1692,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
]
[[package]]
name = "referencing"
version = "0.37.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "attrs" },
{ name = "rpds-py" },
]
sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
]
[[package]]
name = "regex"
version = "2026.1.15"
@@ -1783,6 +1825,72 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/79/62/b88e5879512c55b8ee979c666ee6902adc4ed05007226de266410ae27965/rignore-0.7.6-cp314-cp314t-win_arm64.whl", hash = "sha256:b83adabeb3e8cf662cabe1931b83e165b88c526fa6af6b3aa90429686e474896", size = 656035, upload-time = "2025-11-05T21:41:31.13Z" },
]
[[package]]
name = "rpds-py"
version = "2026.5.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/2e/43/25a8dcd3feedd735039a8f0b5b7e3b118232b5eae288c4fd9ab200d41094/rpds_py-2026.5.1.tar.gz", hash = "sha256:07b24fea40541e28570e5b795a4a38fbdcd12550c06bd0748005ecc8116ca256", size = 64459, upload-time = "2026-05-28T12:02:13.232Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d4/6f/19c1918a4b590d8de87e712e4abe4b3875771eff60216fb6153cf6665c68/rpds_py-2026.5.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:1f2c391c3059798093b65df23aca2cac150460ae9c630d99dec83d703d9485b9", size = 349756, upload-time = "2026-05-28T12:00:20.217Z" },
{ url = "https://files.pythonhosted.org/packages/e5/60/a06fe7da34eca79dacbf958a2ba0c6eea85bc2b29de20080bf40f72f66fa/rpds_py-2026.5.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:413b424f7c4ee65ab5e5be91f5731be0f8b41a1ee2b12dfe810d716312e95a78", size = 343831, upload-time = "2026-05-28T12:00:21.711Z" },
{ url = "https://files.pythonhosted.org/packages/bf/ec/b2333b97b90e2a6ef6ca8ad386ee284968e74bcfe113b3f1a8d9036429a9/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c595a1d9255dce0599e13130d1440ab2506654f2b50294226ee06402f8fef63", size = 375127, upload-time = "2026-05-28T12:00:23.326Z" },
{ url = "https://files.pythonhosted.org/packages/14/7f/e00aae54067f2b488c4637961d5f58204d470795fc791085fa3f15060d2e/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c27c5f6102eac8c03e7595a00827a53b271ba40a53b59ff8709170e0855ea4a", size = 379034, upload-time = "2026-05-28T12:00:24.89Z" },
{ url = "https://files.pythonhosted.org/packages/be/cc/423999bbb8ae8dc93c77fc1d5e984ade5eb89d237d3bb884ccfa72ae2890/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c7fcf61d44cacecaf3aea542b0e053db77972a4573e7ceda16fb2b399161195", size = 490823, upload-time = "2026-05-28T12:00:26.676Z" },
{ url = "https://files.pythonhosted.org/packages/0f/aa/c671bf660f12e68d3c52ff86c7066ed1372df5a0f4f2ff584e419b8207e7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c817a189d4ee14290420e5ff051e4dd6baa13f3edf84685071dee07a6d538ee", size = 388144, upload-time = "2026-05-28T12:00:28.577Z" },
{ url = "https://files.pythonhosted.org/packages/19/c8/d63bb75b68afe77b229e3021c6031bcaf01da5db5b0e69d0d10f9ba679a7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21846aac0ed2e0589f38c12dc44e77bb64e494b771eadbcf169cba00566ba7ba", size = 371959, upload-time = "2026-05-28T12:00:30.304Z" },
{ url = "https://files.pythonhosted.org/packages/82/35/c51122014d8274ff37dc606d60049c3db7d83da02b5b282511e5a906a9a6/rpds_py-2026.5.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b317c87a13f769a4e787819bd508aaa5d69aa09b0880de9af6d3a8a54571cdec", size = 383558, upload-time = "2026-05-28T12:00:31.764Z" },
{ url = "https://files.pythonhosted.org/packages/e3/f9/2790cb99c136a5363acdeacf5c27c56f3de0d4118a1f48fca83404c99c89/rpds_py-2026.5.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce87129d9f2c14fa6c4a8601fb80eb4488c80d38a20cd13758ef11123e14995d", size = 402789, upload-time = "2026-05-28T12:00:33.247Z" },
{ url = "https://files.pythonhosted.org/packages/e5/1b/e4fb584f8c75d35c38150ff6a332cda949e6f97acba1f4fd123b14ab56fe/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9cdddb6c1207d284d94fd1530adf57fbd797fe7c4b8704ba85f49414f2557e7d", size = 551405, upload-time = "2026-05-28T12:00:34.819Z" },
{ url = "https://files.pythonhosted.org/packages/d8/f7/a6731b4216cb3793ea1af5391da240f5683dacc0d13e034fe5fc3503f240/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4e237e139f94d3c036fd28eb9f564c99055476ff4ff05cd42be55ce349b5aa02", size = 616975, upload-time = "2026-05-28T12:00:36.268Z" },
{ url = "https://files.pythonhosted.org/packages/2c/ea/2e051a81d95d8e63f4b35a1c463a87e8766bc3d083c067c5dfb6bf220747/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ed0954b524873214369184a9c82b0eaa45a3fbb9a798cd95b17e0d98499e7ea0", size = 578701, upload-time = "2026-05-28T12:00:37.82Z" },
{ url = "https://files.pythonhosted.org/packages/65/56/b5f6fdb2083e32bca8a8993d89e70db114b4756c9e2c38421328126689d2/rpds_py-2026.5.1-cp314-cp314-win32.whl", hash = "sha256:2d88621d6a7d4dfa633d21abe90f280bb205274e16b1d1e61c6ad4640b2453b7", size = 209806, upload-time = "2026-05-28T12:00:39.492Z" },
{ url = "https://files.pythonhosted.org/packages/fb/80/65a5aa96c155e611d1ed844e4e1f57f3e36b021f396d9f8585d756e6b90d/rpds_py-2026.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:cef8ac28d26f4dda3533060c20fbf80a325458fa9fd23ea72a73cdfa8e978838", size = 225985, upload-time = "2026-05-28T12:00:40.94Z" },
{ url = "https://files.pythonhosted.org/packages/27/7c/ad185212e87b05f196daef92bc5f3caf07298eb47c295b5585c3dd3093ac/rpds_py-2026.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:eaaea962c68cdc68d4a533ba985ab8e9484277910bbfaa2ab3ef7732667bfed8", size = 221219, upload-time = "2026-05-28T12:00:43.15Z" },
{ url = "https://files.pythonhosted.org/packages/23/58/e14ae18759020334646b031e708ab4158d653a938822bfb7b95ef2e93aa3/rpds_py-2026.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:21942f52dbbd5f8758bf021213d28bd45c39e873e65e2407faf5f1846f5761ad", size = 352148, upload-time = "2026-05-28T12:00:44.638Z" },
{ url = "https://files.pythonhosted.org/packages/31/9b/5f4a1e2f960bca3ac5d052b139dd31eed97b259f9d909173821760d542e8/rpds_py-2026.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f414556f6e3958300ff941e40c9f97e3dc9774ddd1b3434c475d73dd354bbed3", size = 345196, upload-time = "2026-05-28T12:00:46.14Z" },
{ url = "https://files.pythonhosted.org/packages/1a/71/1d9574d6a2fa20ab60eaa55c7467f5aa20cbc770f341a05f09c0876f59e2/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1013a8625c74043210190b246f5b1551e09757c1f356c6e4160ef96c5bc081", size = 374981, upload-time = "2026-05-28T12:00:47.531Z" },
{ url = "https://files.pythonhosted.org/packages/0c/9a/37e99f4915a80aa71670263c1267f7ae0af95f53a3f61e6c3bdc016d4515/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cc68e231a77a5f0d774ae278a1f8e55c0456501820847c1e4efb3829f3441df6", size = 379961, upload-time = "2026-05-28T12:00:49.216Z" },
{ url = "https://files.pythonhosted.org/packages/a8/ff/6e73f74b89d2e0715e0fc86b7dde893f9a61ae2f9b256ff3bdfe41ac4e94/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9baffb505aff33acc69b422a19f77806680f3c8632227d79f48de8a810d1c2c5", size = 495965, upload-time = "2026-05-28T12:00:51.111Z" },
{ url = "https://files.pythonhosted.org/packages/ea/e0/425faba25f59d74d4638b267f7c7a80e8649d2ef4db10a19b0c4a71e6e6f/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8d2f912928d426e8cfa396f7f3f8d29a59e6689c86dcca3c420730c1096322b", size = 389526, upload-time = "2026-05-28T12:00:52.77Z" },
{ url = "https://files.pythonhosted.org/packages/c6/76/7a41960e3fddae47fab43a28684d5da981401dffd88253de0944148654cb/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90f628283be835db980c941767d41c9a27b5239e54ba0a9c1335247e82406964", size = 376190, upload-time = "2026-05-28T12:00:54.215Z" },
{ url = "https://files.pythonhosted.org/packages/27/60/5f38dc70824fc6951b51d35377e577a3a3a4c81a6769cc5a2de25ebe0ad1/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:1ebb2f0ab7e16132995a72de805170e0203df0c3dd22e1ef1cd1fdd90bd7a131", size = 383921, upload-time = "2026-05-28T12:00:55.673Z" },
{ url = "https://files.pythonhosted.org/packages/60/1a/d60a38caa1505f4b9483c3fbbde12c94e1079154f4f401a6da96f7e77621/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f3df3d16ded76f1f8c9cdebd0e1ea55fdf4c23b812de189814da7cf229c22a81", size = 404766, upload-time = "2026-05-28T12:00:57.518Z" },
{ url = "https://files.pythonhosted.org/packages/87/ff/602fd3f174d6425f0bce05ad0dfbec0e96b38d0f7d08a79af5aa20083885/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9af8905b8f854990e40d5206aa5ac58d9b0fe0b7f351ff2bb086c20f6c8c6a47", size = 551343, upload-time = "2026-05-28T12:00:58.978Z" },
{ url = "https://files.pythonhosted.org/packages/b8/c1/1be13327acdbead3eca1fde03b6a34dbb011f1e864e217f0d32cc1779a7f/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:036a36a87fb1cd3b214d11c4b3c4f7d2ddad933625dca1c900b56a057c07740a", size = 618502, upload-time = "2026-05-28T12:01:00.656Z" },
{ url = "https://files.pythonhosted.org/packages/f3/d7/afb49b49d7f2be8b7ba1a9f0977fa5168003437b93086726f066544e8351/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ae3853454fe9ef283a03c96c2d835d39e84b14643a9d62c82ef0fb87d702ca", size = 581916, upload-time = "2026-05-28T12:01:02.22Z" },
{ url = "https://files.pythonhosted.org/packages/25/d1/dbef8c1f8a10f07beb62b5f054e20099fd9924b3ec001b8f0b6ac7813a85/rpds_py-2026.5.1-cp314-cp314t-win32.whl", hash = "sha256:6c3d771a46ec18b12af06ce36243a9a80b07a5d0515236332d90863ca8bb326a", size = 207855, upload-time = "2026-05-28T12:01:03.821Z" },
{ url = "https://files.pythonhosted.org/packages/2a/72/bfa4e61ab8e7dc1c8adf397e05e6cbdd4239357bd72b248d3de662f23915/rpds_py-2026.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c93c629be4636cf54337bd5f06c104d55e42ced54d681f6fe21ae510a65116f6", size = 225422, upload-time = "2026-05-28T12:01:05.194Z" },
{ url = "https://files.pythonhosted.org/packages/27/3a/7b5da92b640f67b6717ccafc83cdd06bfa7ff2395c3685c68922bb54d703/rpds_py-2026.5.1-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:3574b55c604b8f75dacb007136508bbc0db406e626301778096a133327e7f2fb", size = 349576, upload-time = "2026-05-28T12:01:06.722Z" },
{ url = "https://files.pythonhosted.org/packages/d7/8a/2aafd7ad355a1bd48ca76e2262b74b15e6432b5a1efe150efd4d779cd55d/rpds_py-2026.5.1-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:94068eb3ae6d43f5a786b7db96a406a34e6d5c24489feef32fd6e8946ea7b291", size = 343640, upload-time = "2026-05-28T12:01:08.441Z" },
{ url = "https://files.pythonhosted.org/packages/f7/7d/6c9523c1abbe840a1b7fba3c516d48e1d3487cc80fea4366c4071cf56784/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a5b10e8ce894825f380a8f1b6444cf73c294dfea62afbb2d13e3a9e630cec1", size = 375322, upload-time = "2026-05-28T12:01:09.934Z" },
{ url = "https://files.pythonhosted.org/packages/5a/5d/0b7b03fb1dc509321f01de3149784ab773e34c8573022029af8076afcb9c/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc09f82e63d4bcd58149572f857a431bae851dc747e313c3b5bdf7abb907fda8", size = 379066, upload-time = "2026-05-28T12:01:11.48Z" },
{ url = "https://files.pythonhosted.org/packages/d7/e2/8ef6012999ebf1cb1c22f876d9ce5e63d960fd4631d2af3202d3f480aa25/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e10464d17df3b582745c25cec695cb9558bca2cb6ddb631aee1787fc72c767b2", size = 494586, upload-time = "2026-05-28T12:01:13.051Z" },
{ url = "https://files.pythonhosted.org/packages/80/af/1eeb029bec67582c226b7809172207cd005073af4ebd906e65ff494f4983/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba05adbf15d994c38ec0b7ab32e858e5110c21e9009a00a86545fd220f84e038", size = 388415, upload-time = "2026-05-28T12:01:14.631Z" },
{ url = "https://files.pythonhosted.org/packages/18/23/ffbe10711c4d766c1cab0557d6906c074f795814863c67b351355d29354a/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77c004fdc7b891967106f78ddfd7b076bfe6813c6139c6fff6aed3bcaa960b26", size = 372427, upload-time = "2026-05-28T12:01:16.153Z" },
{ url = "https://files.pythonhosted.org/packages/bd/3a/30ba4a6ad457e5b070c18d742a33fb77d8d922b565cc881f8a5313d63bfe/rpds_py-2026.5.1-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:83bcf894486c9d78dd290d3c0124ff6dd8875d3025e2090a8ec49fcc37c55fdd", size = 383615, upload-time = "2026-05-28T12:01:17.809Z" },
{ url = "https://files.pythonhosted.org/packages/d3/69/62e242b53ce39c0814bd24e1a6e6eba6c92be716277745f317f9540a2e7b/rpds_py-2026.5.1-cp315-cp315-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3df104083952a0e0c6f10de33e440eabe98fb6317d23e1a58c68f6df08d01b9", size = 402786, upload-time = "2026-05-28T12:01:19.419Z" },
{ url = "https://files.pythonhosted.org/packages/38/c1/a770b9c186928a1ed0f7e6d7ae50e7f3950ed23e3f9e366dbc8e38cb55de/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:980450826cf22e133c57e0835070bdd0dd3f73b9b708c3ce223def2cb9469e14", size = 551583, upload-time = "2026-05-28T12:01:21.013Z" },
{ url = "https://files.pythonhosted.org/packages/21/7c/68e8579b95375b70d2a963103c42e705856cdb98569258bd807f4423891c/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_i686.whl", hash = "sha256:205dde846f24332ab0c1188699a043b8d165b79bb84529ce272c45048ff6be01", size = 616941, upload-time = "2026-05-28T12:01:22.548Z" },
{ url = "https://files.pythonhosted.org/packages/70/a1/a6135aed5730ff03ab957182259987ac11e55fb392a28dc6f0592048a280/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:3966b82dd563176396df030f3dd52a6e54cb69b718e95e78bd555ed3d1e0185d", size = 578349, upload-time = "2026-05-28T12:01:24.118Z" },
{ url = "https://files.pythonhosted.org/packages/09/6e/f24201a76a84e6c49d0bdfdfcb735210e21701e9b21c5bfc0ba497dd62f6/rpds_py-2026.5.1-cp315-cp315-win32.whl", hash = "sha256:7818f8d0a415be74d2be3590b0a1c1f463a642f4d0217e7d10602dceef5b79aa", size = 209922, upload-time = "2026-05-28T12:01:25.522Z" },
{ url = "https://files.pythonhosted.org/packages/9e/e4/966bc240bb0485fc265278f6de44d05834bf0b3618886e0b22e33d54c49a/rpds_py-2026.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:b3cc20c0d800af78fd0fac68086e28c1856cec51ea528bb81ea851aa40d39325", size = 226003, upload-time = "2026-05-28T12:01:27.062Z" },
{ url = "https://files.pythonhosted.org/packages/5c/5c/a15a59269cd5e74472734516c73795c15eccfc841b3d4b0228c3f53f19d0/rpds_py-2026.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:3609e9939a8a76cd904cf98a3f1f13b5dc7e150adeaee89e0ea09652ea213e16", size = 221245, upload-time = "2026-05-28T12:01:28.51Z" },
{ url = "https://files.pythonhosted.org/packages/e0/22/135ce03804e179a71ceb13be095deda4a279bc88f7a6b8fa161c5ad44e12/rpds_py-2026.5.1-cp315-cp315t-macosx_10_12_x86_64.whl", hash = "sha256:5d333a7127d4b307601ac37792bee01bb95c867cbfacf21b6375b804d6bbd723", size = 352015, upload-time = "2026-05-28T12:01:30.214Z" },
{ url = "https://files.pythonhosted.org/packages/3b/5f/f1f6d2652eb9d848f6eb369d8db83a2da6249bb49ad2c2a48f45d54538d3/rpds_py-2026.5.1-cp315-cp315t-macosx_11_0_arm64.whl", hash = "sha256:b5f077b44a4f7808520f66dae234988d867deb9aed9be5da057ce9ba831b2a41", size = 345016, upload-time = "2026-05-28T12:01:31.656Z" },
{ url = "https://files.pythonhosted.org/packages/88/66/b74182775691ea2290c99e52ac8d5db844e56fbec90ce421f107658c8314/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d8f9b7b78c9538fc9e04e82ec0e888ff0c3cffcfad152c77e57cd09351a98a", size = 374775, upload-time = "2026-05-28T12:01:33.136Z" },
{ url = "https://files.pythonhosted.org/packages/ff/8f/15e5a61d9f0a43902d36561d4f07cae6ae9f4716be825159fd72717f33af/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e3a8ae58895ac107ed934a6bf51e5846f95c53b9b940c2c6d310838fd5846358", size = 380270, upload-time = "2026-05-28T12:01:34.574Z" },
{ url = "https://files.pythonhosted.org/packages/02/c3/f859b12763a80540cdf2af0f15b19904cf756a71d7bdd3f82ff3e5b1bbf9/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0957cf3c2b8632ec7aaebffebea8005b353cc2a237b6e2ae3c2cac0820704cfb", size = 495285, upload-time = "2026-05-28T12:01:36.127Z" },
{ url = "https://files.pythonhosted.org/packages/1c/c7/ff27c2ac8411d30b03b1829fd88cae8dad1a4d0da48dd25e57c4038042e6/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c396c1304de421050b3681ea70f371874b54d41b0151e96109758144c231e30b", size = 389581, upload-time = "2026-05-28T12:01:37.635Z" },
{ url = "https://files.pythonhosted.org/packages/6e/67/fe92ee32a6cc05c77228a2f8b1762e7124f386ec20ff83d0757b762d58d0/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad1bff7f666b9598e573815affd666aac6a13a585dde336f843e33350c7fadc", size = 376041, upload-time = "2026-05-28T12:01:39.307Z" },
{ url = "https://files.pythonhosted.org/packages/f8/91/b4d6685c27aba55bd82f25b278be8237038117d05f9659a6213ad3408130/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_31_riscv64.whl", hash = "sha256:656a042550878f12d45752452d47094b7cfe5ad1e9d7b87b5a22ad3ae5ff8015", size = 383946, upload-time = "2026-05-28T12:01:41.043Z" },
{ url = "https://files.pythonhosted.org/packages/bd/79/2c1d832a53c8e0f8e98fc970ec257b950fecd4f62be2ab7182b500a0cbc8/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c4bd4f70294737b5206a3e8e30ccadbf8a60301831c8ea23eec5dbeea1ecfa", size = 405526, upload-time = "2026-05-28T12:01:43.032Z" },
{ url = "https://files.pythonhosted.org/packages/78/c4/c98117b03c6a8581ab2c2dfccfe9a5ad82bd8128a3c28b46a6ad2d97c393/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:43bca78665423cabae77146f2fe7ce55272b6c8d55d82cca83effd42c7e13972", size = 551165, upload-time = "2026-05-28T12:01:44.648Z" },
{ url = "https://files.pythonhosted.org/packages/3b/c1/bc479ca069200af730881b1bd525e3114b2b391a351509fcb1b772f28086/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_i686.whl", hash = "sha256:42d0f20e85e549c870749d0e247f0c10d318a45b7e9676d575d2dcb04a1b2e66", size = 618778, upload-time = "2026-05-28T12:01:46.337Z" },
{ url = "https://files.pythonhosted.org/packages/77/65/38ab2f90df44c2febfb63cc10ced40763d9b4bc94d173e734528663fe7f5/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:b1be5c35683684d5331b93600c210e8367c254683d8a6df6bd21bd2da3a334fb", size = 581839, upload-time = "2026-05-28T12:01:48.109Z" },
{ url = "https://files.pythonhosted.org/packages/15/2d/ce1f605fe036aadd460e5822e578c6c7ec3a860936cca37d6e0f299daa77/rpds_py-2026.5.1-cp315-cp315t-win32.whl", hash = "sha256:75808f6c38ce7749bb68cc2770161aae5045e6c6f6781a9782e74b93304399df", size = 207866, upload-time = "2026-05-28T12:01:49.648Z" },
{ url = "https://files.pythonhosted.org/packages/79/cb/966040123eb102371559746908ef2c9471f4d43e17ec9a645a2258dab64b/rpds_py-2026.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:90bd6630002a1c7f09e7843dd79f0d24f3d2897cc25a753480917865d14f15b3", size = 225441, upload-time = "2026-05-28T12:01:51.408Z" },
]
[[package]]
name = "ruff"
version = "0.15.15"