fix: prevent tool-use marker leakage in toolshim output (#8310)

Signed-off-by: Eugenio La Cava <eugeniolcv@gmail.com>
Signed-off-by: Michael Neale <michael.neale@gmail.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Michael Neale <michael.neale@gmail.com>
This commit is contained in:
Eugenio
2026-05-14 07:49:37 +02:00
committed by GitHub
parent 826cce0257
commit 7fc3537751
6 changed files with 1161 additions and 60 deletions
+6
View File
@@ -64,6 +64,12 @@ do_not_version/
/working_dir
# Local build scripts and generated snapshot artifacts
/build.bat
/build.ps1
/build_check.ps1
/crates/goose/src/agents/snapshots/*.snap.new
# Error log artifacts from mcp replay tests
crates/goose/tests/mcp_replays/*errors.txt
+19 -2
View File
@@ -1,8 +1,7 @@
use anyhow::Result;
use goose_cli::cli::cli;
#[tokio::main]
async fn main() -> Result<()> {
async fn run() -> Result<()> {
if let Err(e) = goose_cli::logging::setup_logging(None) {
eprintln!("Warning: Failed to initialize logging: {}", e);
}
@@ -17,3 +16,21 @@ async fn main() -> Result<()> {
result
}
fn main() -> Result<()> {
let handle = std::thread::Builder::new()
.name("goose-cli-main".to_string())
.stack_size(8 * 1024 * 1024)
.spawn(|| {
let runtime = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.expect("Failed to build Tokio runtime");
runtime.block_on(run())
})
.map_err(|e| anyhow::anyhow!("Failed to spawn goose-cli main thread: {}", e))?;
handle
.join()
.map_err(|_| anyhow::anyhow!("goose-cli main thread panicked"))?
}
+69 -18
View File
@@ -17,10 +17,11 @@ use crate::providers::base::stream_from_single_message;
use crate::providers::base::{MessageStream, Provider, ProviderUsage};
use crate::providers::errors::ProviderError;
use crate::providers::toolshim::{
augment_message_with_tool_calls, convert_tool_messages_to_text,
modify_system_prompt_for_tool_json, OllamaInterpreter,
augment_message_with_selected_tool_interpreter, convert_tool_messages_to_text,
modify_system_prompt_for_tool_json, sanitize_residual_markers,
};
use rmcp::model::Tool;
use tracing::warn;
async fn enhance_model_error(error: ProviderError, provider: &Arc<dyn Provider>) -> ProviderError {
let ProviderError::RequestFailed(ref msg) = error else {
@@ -123,13 +124,16 @@ async fn toolshim_postprocess(
response: Message,
toolshim_tools: &[Tool],
) -> Result<Message, ProviderError> {
let interpreter = OllamaInterpreter::new().map_err(|e| {
ProviderError::ExecutionError(format!("Failed to create OllamaInterpreter: {}", e))
})?;
augment_message_with_tool_calls(&interpreter, response, toolshim_tools)
.await
.map_err(|e| ProviderError::ExecutionError(format!("Failed to augment message: {}", e)))
match augment_message_with_selected_tool_interpreter(response.clone(), toolshim_tools).await {
Ok(message) => Ok(message),
Err(e) => {
warn!(
"Toolshim augmentation failed, skipping tool augmentation: {}",
e
);
Ok(sanitize_residual_markers(response))
}
}
}
impl Agent {
@@ -302,20 +306,67 @@ impl Agent {
};
Ok(Box::pin(try_stream! {
while let Some(result) = stream.next().await {
let (mut message, usage) = result?;
if config.toolshim {
// Toolshim mode: accumulate the full response before processing
// so that tool-use markers spanning multiple chunks are detected
// and stripped before any output reaches the UI.
let mut accumulated_message: Option<Message> = None;
let mut final_usage: Option<ProviderUsage> = None;
// Store the model information in the global store
if let Some(usage) = usage.as_ref() {
crate::providers::base::set_current_model(&usage.model);
while let Some(result) = stream.next().await {
let (msg_opt, usage_opt) = result?;
if let Some(usage) = usage_opt.as_ref() {
crate::providers::base::set_current_model(&usage.model);
}
if let Some(msg) = msg_opt {
accumulated_message = Some(match accumulated_message {
Some(mut prev) => {
for new_content in msg.content {
match (&mut prev.content.last_mut(), &new_content) {
(
Some(MessageContent::Text(last_text)),
MessageContent::Text(new_text),
) => {
last_text.text.push_str(&new_text.text);
}
_ => {
prev.content.push(new_content);
}
}
}
prev
}
None => msg,
});
}
if let Some(usage) = usage_opt {
final_usage = Some(usage);
}
// Yield empty item so the agent loop can check cancellation
yield (None, None);
}
// Post-process / structure the response only if tool interpretation is enabled
if message.is_some() && config.toolshim {
message = Some(toolshim_postprocess(message.unwrap(), &toolshim_tools).await?);
if let Some(msg) = accumulated_message {
let processed = toolshim_postprocess(msg, &toolshim_tools).await?;
yield (Some(processed), final_usage);
} else if final_usage.is_some() {
// Preserve usage-only responses (no message content)
yield (None, final_usage);
}
} else {
while let Some(result) = stream.next().await {
let (message, usage) = result?;
yield (message, usage);
if let Some(usage) = usage.as_ref() {
crate::providers::base::set_current_model(&usage.model);
}
yield (message, usage);
}
}
}))
}
File diff suppressed because it is too large Load Diff
+72 -7
View File
@@ -479,7 +479,7 @@ impl Drop for RequestLog {
/// Safely parse a JSON string that may contain doubly-encoded or malformed JSON.
/// This function first attempts to parse the input string as-is. If that fails,
/// it applies control character escaping and tries again.
/// it applies control character escaping and truncated JSON repair and tries again.
///
/// This approach preserves valid JSON like `{"key1": "value1",\n"key2": "value"}`
/// (which contains a literal \n but is perfectly valid JSON) while still fixing
@@ -490,13 +490,71 @@ pub fn safely_parse_json(s: &str) -> Result<serde_json::Value, serde_json::Error
match serde_json::from_str(s) {
Ok(value) => Ok(value),
Err(_) => {
// If that fails, try with control character escaping
let escaped = json_escape_control_chars_in_string(s);
serde_json::from_str(&escaped)
for candidate in [
repair_truncated_json(s),
json_escape_control_chars_in_string(s),
] {
if let Ok(value) = serde_json::from_str(&candidate) {
return Ok(value);
}
}
let repaired = repair_truncated_json(&json_escape_control_chars_in_string(s));
serde_json::from_str(&repaired)
}
}
}
fn repair_truncated_json(s: &str) -> String {
let mut repaired = String::with_capacity(s.len() + 8);
let mut in_string = false;
let mut escape_next = false;
let mut closers = Vec::new();
for c in s.chars() {
repaired.push(c);
if in_string {
if escape_next {
escape_next = false;
continue;
}
match c {
'\\' => escape_next = true,
'"' => in_string = false,
_ => {}
}
continue;
}
match c {
'"' => in_string = true,
'{' => closers.push('}'),
'[' => closers.push(']'),
'}' | ']' => {
if closers.last() == Some(&c) {
closers.pop();
}
}
_ => {}
}
}
if in_string {
if escape_next {
repaired.push('\\');
}
repaired.push('"');
}
while let Some(closer) = closers.pop() {
repaired.push(closer);
}
repaired
}
/// Helper to escape control characters in a string that is supposed to be a JSON document.
/// This function iterates through the input string `s` and replaces any literal
/// control characters (U+0000 to U+001F) with their JSON-escaped equivalents
@@ -809,9 +867,16 @@ mod tests {
let result = safely_parse_json(good_json).unwrap();
assert_eq!(result["test"], "value");
// Test completely invalid JSON that can't be fixed
let broken_json = r#"{"key": "unclosed_string"#;
assert!(safely_parse_json(broken_json).is_err());
// Test truncated JSON with unclosed string, object, and array
let truncated_json = r#"{"key": "unclosed_string","nested": {"items": [1, 2, 3"#;
let result = safely_parse_json(truncated_json).unwrap();
assert_eq!(result["key"], "unclosed_string");
assert_eq!(result["nested"]["items"], json!([1, 2, 3]));
// Test dangling backslash at end of a truncated string
let dangling_escape_json = String::from(r#"{"path":"abc\"#);
let result = safely_parse_json(&dangling_escape_json).unwrap();
assert_eq!(result["path"], "abc\\");
// Test empty object
let empty_json = "{}";
+11 -1
View File
@@ -94,14 +94,24 @@ export function getTextAndImageContent(message: Message): {
}
}
// Strip <think> tags from assistant text — the thinking is surfaced via getThinkingContent
// Strip assistant-only markup that shouldn't appear in rendered text
if (message.role === 'assistant') {
textContent = stripToolCallMarkers(textContent);
textContent = textContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
}
return { textContent, imagePaths };
}
function stripToolCallMarkers(text: string): string {
// Remove all tool call XML markers and their content
return text
.replace(/<\|tool_calls_section_begin\|>[\s\S]*?<\|tool_calls_section_end\|>/g, '')
.replace(/<\|tool_call_begin\|>[\s\S]*?<\|tool_call_end\|>/g, '')
.replace(/<\|tool_call_argument_begin\|>[\s\S]*?<\|tool_call_argument_end\|>/g, '')
.trim();
}
export function getThinkingContent(message: Message): string | null {
const parts: string[] = [];