fix: prevent tool-use marker leakage in toolshim output (#8310)

Signed-off-by: Eugenio La Cava <eugeniolcv@gmail.com> Signed-off-by: Michael Neale <michael.neale@gmail.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Michael Neale <michael.neale@gmail.com>
2026-06-01 22:09:18 +02:00 · 2026-05-14 07:49:37 +02:00
parent 826cce0257
commit 7fc3537751
6 changed files with 1161 additions and 60 deletions
@@ -64,6 +64,12 @@ do_not_version/
 /working_dir
 # Local build scripts and generated snapshot artifacts
 /build.bat
 /build.ps1
 /build_check.ps1
 /crates/goose/src/agents/snapshots/*.snap.new
 # Error log artifacts from mcp replay tests
 crates/goose/tests/mcp_replays/*errors.txt
@@ -1,8 +1,7 @@
 use anyhow::Result;
 use goose_cli::cli::cli;
-#[tokio::main]
+async fn run() -> Result<()> {
 async fn main() -> Result<()> {
    if let Err(e) = goose_cli::logging::setup_logging(None) {
        eprintln!("Warning: Failed to initialize logging: {}", e);
    }
@@ -17,3 +16,21 @@ async fn main() -> Result<()> {
    result
 }
 fn main() -> Result<()> {
    let handle = std::thread::Builder::new()
        .name("goose-cli-main".to_string())
        .stack_size(8 * 1024 * 1024)
        .spawn(|| {
            let runtime = tokio::runtime::Builder::new_multi_thread()
                .enable_all()
                .build()
                .expect("Failed to build Tokio runtime");
            runtime.block_on(run())
        })
        .map_err(|e| anyhow::anyhow!("Failed to spawn goose-cli main thread: {}", e))?;
    handle
        .join()
        .map_err(|_| anyhow::anyhow!("goose-cli main thread panicked"))?
 }
@@ -17,10 +17,11 @@ use crate::providers::base::stream_from_single_message;
 use crate::providers::base::{MessageStream, Provider, ProviderUsage};
 use crate::providers::errors::ProviderError;
 use crate::providers::toolshim::{
-    augment_message_with_tool_calls, convert_tool_messages_to_text,
+    augment_message_with_selected_tool_interpreter, convert_tool_messages_to_text,
-    modify_system_prompt_for_tool_json, OllamaInterpreter,
+    modify_system_prompt_for_tool_json, sanitize_residual_markers,
 };
 use rmcp::model::Tool;
 use tracing::warn;
 async fn enhance_model_error(error: ProviderError, provider: &Arc<dyn Provider>) -> ProviderError {
    let ProviderError::RequestFailed(ref msg) = error else {
@@ -123,13 +124,16 @@ async fn toolshim_postprocess(
    response: Message,
    toolshim_tools: &[Tool],
 ) -> Result<Message, ProviderError> {
-    let interpreter = OllamaInterpreter::new().map_err(|e| {
+    match augment_message_with_selected_tool_interpreter(response.clone(), toolshim_tools).await {
-        ProviderError::ExecutionError(format!("Failed to create OllamaInterpreter: {}", e))
+        Ok(message) => Ok(message),
-    })?;
+        Err(e) => {
-
+            warn!(
-    augment_message_with_tool_calls(&interpreter, response, toolshim_tools)
+                "Toolshim augmentation failed, skipping tool augmentation: {}",
-        .await
+                e
-        .map_err(|e| ProviderError::ExecutionError(format!("Failed to augment message: {}", e)))
+            );
            Ok(sanitize_residual_markers(response))
        }
    }
 }
 impl Agent {
@@ -302,20 +306,67 @@ impl Agent {
        };
        Ok(Box::pin(try_stream! {
-            while let Some(result) = stream.next().await {
+            if config.toolshim {
-                let (mut message, usage) = result?;
+                // Toolshim mode: accumulate the full response before processing
                // so that tool-use markers spanning multiple chunks are detected
                // and stripped before any output reaches the UI.
                let mut accumulated_message: Option<Message> = None;
                let mut final_usage: Option<ProviderUsage> = None;
-                // Store the model information in the global store
+                while let Some(result) = stream.next().await {
-                if let Some(usage) = usage.as_ref() {
+                    let (msg_opt, usage_opt) = result?;
-                    crate::providers::base::set_current_model(&usage.model);
+
                    if let Some(usage) = usage_opt.as_ref() {
                        crate::providers::base::set_current_model(&usage.model);
                    }
                    if let Some(msg) = msg_opt {
                        accumulated_message = Some(match accumulated_message {
                            Some(mut prev) => {
                                for new_content in msg.content {
                                    match (&mut prev.content.last_mut(), &new_content) {
                                        (
                                            Some(MessageContent::Text(last_text)),
                                            MessageContent::Text(new_text),
                                        ) => {
                                            last_text.text.push_str(&new_text.text);
                                        }
                                        _ => {
                                            prev.content.push(new_content);
                                        }
                                    }
                                }
                                prev
                            }
                            None => msg,
                        });
                    }
                    if let Some(usage) = usage_opt {
                        final_usage = Some(usage);
                    }
                    // Yield empty item so the agent loop can check cancellation
                    yield (None, None);
                }
-                // Post-process / structure the response only if tool interpretation is enabled
+                if let Some(msg) = accumulated_message {
-                if message.is_some() && config.toolshim {
+                    let processed = toolshim_postprocess(msg, &toolshim_tools).await?;
-                    message = Some(toolshim_postprocess(message.unwrap(), &toolshim_tools).await?);
+                    yield (Some(processed), final_usage);
                } else if final_usage.is_some() {
                    // Preserve usage-only responses (no message content)
                    yield (None, final_usage);
                }
            } else {
                while let Some(result) = stream.next().await {
                    let (message, usage) = result?;
-                yield (message, usage);
+                    if let Some(usage) = usage.as_ref() {
                        crate::providers::base::set_current_model(&usage.model);
                    }
                    yield (message, usage);
                }
            }
        }))
    }
@@ -479,7 +479,7 @@ impl Drop for RequestLog {
 /// Safely parse a JSON string that may contain doubly-encoded or malformed JSON.
 /// This function first attempts to parse the input string as-is. If that fails,
-/// it applies control character escaping and tries again.
+/// it applies control character escaping and truncated JSON repair and tries again.
 ///
 /// This approach preserves valid JSON like `{"key1": "value1",\n"key2": "value"}`
 /// (which contains a literal \n but is perfectly valid JSON) while still fixing
@@ -490,13 +490,71 @@ pub fn safely_parse_json(s: &str) -> Result<serde_json::Value, serde_json::Error
    match serde_json::from_str(s) {
        Ok(value) => Ok(value),
        Err(_) => {
-            // If that fails, try with control character escaping
+            for candidate in [
-            let escaped = json_escape_control_chars_in_string(s);
+                repair_truncated_json(s),
-            serde_json::from_str(&escaped)
+                json_escape_control_chars_in_string(s),
            ] {
                if let Ok(value) = serde_json::from_str(&candidate) {
                    return Ok(value);
                }
            }
            let repaired = repair_truncated_json(&json_escape_control_chars_in_string(s));
            serde_json::from_str(&repaired)
        }
    }
 }
 fn repair_truncated_json(s: &str) -> String {
    let mut repaired = String::with_capacity(s.len() + 8);
    let mut in_string = false;
    let mut escape_next = false;
    let mut closers = Vec::new();
    for c in s.chars() {
        repaired.push(c);
        if in_string {
            if escape_next {
                escape_next = false;
                continue;
            }
            match c {
                '\\' => escape_next = true,
                '"' => in_string = false,
                _ => {}
            }
            continue;
        }
        match c {
            '"' => in_string = true,
            '{' => closers.push('}'),
            '[' => closers.push(']'),
            '}' | ']' => {
                if closers.last() == Some(&c) {
                    closers.pop();
                }
            }
            _ => {}
        }
    }
    if in_string {
        if escape_next {
            repaired.push('\\');
        }
        repaired.push('"');
    }
    while let Some(closer) = closers.pop() {
        repaired.push(closer);
    }
    repaired
 }
 /// Helper to escape control characters in a string that is supposed to be a JSON document.
 /// This function iterates through the input string `s` and replaces any literal
 /// control characters (U+0000 to U+001F) with their JSON-escaped equivalents
@@ -809,9 +867,16 @@ mod tests {
        let result = safely_parse_json(good_json).unwrap();
        assert_eq!(result["test"], "value");
-        // Test completely invalid JSON that can't be fixed
+        // Test truncated JSON with unclosed string, object, and array
-        let broken_json = r#"{"key": "unclosed_string"#;
+        let truncated_json = r#"{"key": "unclosed_string","nested": {"items": [1, 2, 3"#;
-        assert!(safely_parse_json(broken_json).is_err());
+        let result = safely_parse_json(truncated_json).unwrap();
        assert_eq!(result["key"], "unclosed_string");
        assert_eq!(result["nested"]["items"], json!([1, 2, 3]));
        // Test dangling backslash at end of a truncated string
        let dangling_escape_json = String::from(r#"{"path":"abc\"#);
        let result = safely_parse_json(&dangling_escape_json).unwrap();
        assert_eq!(result["path"], "abc\\");
        // Test empty object
        let empty_json = "{}";
@@ -94,14 +94,24 @@ export function getTextAndImageContent(message: Message): {
    }
  }
-  // Strip <think> tags from assistant text — the thinking is surfaced via getThinkingContent
+  // Strip assistant-only markup that shouldn't appear in rendered text
  if (message.role === 'assistant') {
    textContent = stripToolCallMarkers(textContent);
    textContent = textContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
  }
  return { textContent, imagePaths };
 }
 function stripToolCallMarkers(text: string): string {
  // Remove all tool call XML markers and their content
  return text
    .replace(/<\|tool_calls_section_begin\|>[\s\S]*?<\|tool_calls_section_end\|>/g, '')
    .replace(/<\|tool_call_begin\|>[\s\S]*?<\|tool_call_end\|>/g, '')
    .replace(/<\|tool_call_argument_begin\|>[\s\S]*?<\|tool_call_argument_end\|>/g, '')
    .trim();
 }
 export function getThinkingContent(message: Message): string | null {
  const parts: string[] = [];