fix: prevent tool-use marker leakage in toolshim output (#8310)

Signed-off-by: Eugenio La Cava <eugeniolcv@gmail.com> Signed-off-by: Michael Neale <michael.neale@gmail.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Michael Neale <michael.neale@gmail.com>
2026-06-01 22:09:18 +02:00 · 2026-05-14 07:49:37 +02:00
parent 826cce0257
commit 7fc3537751
6 changed files with 1161 additions and 60 deletions
@@ -64,6 +64,12 @@ do_not_version/

 /working_dir

+# Local build scripts and generated snapshot artifacts
+/build.bat
+/build.ps1
+/build_check.ps1
+/crates/goose/src/agents/snapshots/*.snap.new
+
 # Error log artifacts from mcp replay tests
 crates/goose/tests/mcp_replays/*errors.txt

@@ -1,8 +1,7 @@
 use anyhow::Result;
 use goose_cli::cli::cli;

-#[tokio::main]
-async fn main() -> Result<()> {
+async fn run() -> Result<()> {
    if let Err(e) = goose_cli::logging::setup_logging(None) {
        eprintln!("Warning: Failed to initialize logging: {}", e);
    }
@@ -17,3 +16,21 @@ async fn main() -> Result<()> {

    result
 }
+
+fn main() -> Result<()> {
+    let handle = std::thread::Builder::new()
+        .name("goose-cli-main".to_string())
+        .stack_size(8 * 1024 * 1024)
+        .spawn(|| {
+            let runtime = tokio::runtime::Builder::new_multi_thread()
+                .enable_all()
+                .build()
+                .expect("Failed to build Tokio runtime");
+            runtime.block_on(run())
+        })
+        .map_err(|e| anyhow::anyhow!("Failed to spawn goose-cli main thread: {}", e))?;
+
+    handle
+        .join()
+        .map_err(|_| anyhow::anyhow!("goose-cli main thread panicked"))?
+}
@@ -17,10 +17,11 @@ use crate::providers::base::stream_from_single_message;
 use crate::providers::base::{MessageStream, Provider, ProviderUsage};
 use crate::providers::errors::ProviderError;
 use crate::providers::toolshim::{
-    augment_message_with_tool_calls, convert_tool_messages_to_text,
-    modify_system_prompt_for_tool_json, OllamaInterpreter,
+    augment_message_with_selected_tool_interpreter, convert_tool_messages_to_text,
+    modify_system_prompt_for_tool_json, sanitize_residual_markers,
 };
 use rmcp::model::Tool;
+use tracing::warn;

 async fn enhance_model_error(error: ProviderError, provider: &Arc<dyn Provider>) -> ProviderError {
    let ProviderError::RequestFailed(ref msg) = error else {
@@ -123,13 +124,16 @@ async fn toolshim_postprocess(
    response: Message,
    toolshim_tools: &[Tool],
 ) -> Result<Message, ProviderError> {
-    let interpreter = OllamaInterpreter::new().map_err(|e| {
-        ProviderError::ExecutionError(format!("Failed to create OllamaInterpreter: {}", e))
-    })?;
-
-    augment_message_with_tool_calls(&interpreter, response, toolshim_tools)
-        .await
-        .map_err(|e| ProviderError::ExecutionError(format!("Failed to augment message: {}", e)))
+    match augment_message_with_selected_tool_interpreter(response.clone(), toolshim_tools).await {
+        Ok(message) => Ok(message),
+        Err(e) => {
+            warn!(
+                "Toolshim augmentation failed, skipping tool augmentation: {}",
+                e
+            );
+            Ok(sanitize_residual_markers(response))
+        }
+    }
 }

 impl Agent {
@@ -302,20 +306,67 @@ impl Agent {
        };

        Ok(Box::pin(try_stream! {
-            while let Some(result) = stream.next().await {
-                let (mut message, usage) = result?;
+            if config.toolshim {
+                // Toolshim mode: accumulate the full response before processing
+                // so that tool-use markers spanning multiple chunks are detected
+                // and stripped before any output reaches the UI.
+                let mut accumulated_message: Option<Message> = None;
+                let mut final_usage: Option<ProviderUsage> = None;

-                // Store the model information in the global store
-                if let Some(usage) = usage.as_ref() {
-                    crate::providers::base::set_current_model(&usage.model);
+                while let Some(result) = stream.next().await {
+                    let (msg_opt, usage_opt) = result?;
+
+                    if let Some(usage) = usage_opt.as_ref() {
+                        crate::providers::base::set_current_model(&usage.model);
+                    }
+
+                    if let Some(msg) = msg_opt {
+                        accumulated_message = Some(match accumulated_message {
+                            Some(mut prev) => {
+                                for new_content in msg.content {
+                                    match (&mut prev.content.last_mut(), &new_content) {
+                                        (
+                                            Some(MessageContent::Text(last_text)),
+                                            MessageContent::Text(new_text),
+                                        ) => {
+                                            last_text.text.push_str(&new_text.text);
+                                        }
+                                        _ => {
+                                            prev.content.push(new_content);
+                                        }
+                                    }
+                                }
+                                prev
+                            }
+                            None => msg,
+                        });
+                    }
+
+                    if let Some(usage) = usage_opt {
+                        final_usage = Some(usage);
+                    }
+
+                    // Yield empty item so the agent loop can check cancellation
+                    yield (None, None);
                }

-                // Post-process / structure the response only if tool interpretation is enabled
-                if message.is_some() && config.toolshim {
-                    message = Some(toolshim_postprocess(message.unwrap(), &toolshim_tools).await?);
+                if let Some(msg) = accumulated_message {
+                    let processed = toolshim_postprocess(msg, &toolshim_tools).await?;
+                    yield (Some(processed), final_usage);
+                } else if final_usage.is_some() {
+                    // Preserve usage-only responses (no message content)
+                    yield (None, final_usage);
                }
+            } else {
+                while let Some(result) = stream.next().await {
+                    let (message, usage) = result?;

-                yield (message, usage);
+                    if let Some(usage) = usage.as_ref() {
+                        crate::providers::base::set_current_model(&usage.model);
+                    }
+
+                    yield (message, usage);
+                }
            }
        }))
    }
@@ -479,7 +479,7 @@ impl Drop for RequestLog {

 /// Safely parse a JSON string that may contain doubly-encoded or malformed JSON.
 /// This function first attempts to parse the input string as-is. If that fails,
-/// it applies control character escaping and tries again.
+/// it applies control character escaping and truncated JSON repair and tries again.
 ///
 /// This approach preserves valid JSON like `{"key1": "value1",\n"key2": "value"}`
 /// (which contains a literal \n but is perfectly valid JSON) while still fixing
@@ -490,13 +490,71 @@ pub fn safely_parse_json(s: &str) -> Result<serde_json::Value, serde_json::Error
    match serde_json::from_str(s) {
        Ok(value) => Ok(value),
        Err(_) => {
-            // If that fails, try with control character escaping
-            let escaped = json_escape_control_chars_in_string(s);
-            serde_json::from_str(&escaped)
+            for candidate in [
+                repair_truncated_json(s),
+                json_escape_control_chars_in_string(s),
+            ] {
+                if let Ok(value) = serde_json::from_str(&candidate) {
+                    return Ok(value);
+                }
+            }
+
+            let repaired = repair_truncated_json(&json_escape_control_chars_in_string(s));
+            serde_json::from_str(&repaired)
        }
    }
 }

+fn repair_truncated_json(s: &str) -> String {
+    let mut repaired = String::with_capacity(s.len() + 8);
+    let mut in_string = false;
+    let mut escape_next = false;
+    let mut closers = Vec::new();
+
+    for c in s.chars() {
+        repaired.push(c);
+
+        if in_string {
+            if escape_next {
+                escape_next = false;
+                continue;
+            }
+
+            match c {
+                '\\' => escape_next = true,
+                '"' => in_string = false,
+                _ => {}
+            }
+            continue;
+        }
+
+        match c {
+            '"' => in_string = true,
+            '{' => closers.push('}'),
+            '[' => closers.push(']'),
+            '}' | ']' => {
+                if closers.last() == Some(&c) {
+                    closers.pop();
+                }
+            }
+            _ => {}
+        }
+    }
+
+    if in_string {
+        if escape_next {
+            repaired.push('\\');
+        }
+        repaired.push('"');
+    }
+
+    while let Some(closer) = closers.pop() {
+        repaired.push(closer);
+    }
+
+    repaired
+}
+
 /// Helper to escape control characters in a string that is supposed to be a JSON document.
 /// This function iterates through the input string `s` and replaces any literal
 /// control characters (U+0000 to U+001F) with their JSON-escaped equivalents
@@ -809,9 +867,16 @@ mod tests {
        let result = safely_parse_json(good_json).unwrap();
        assert_eq!(result["test"], "value");

-        // Test completely invalid JSON that can't be fixed
-        let broken_json = r#"{"key": "unclosed_string"#;
-        assert!(safely_parse_json(broken_json).is_err());
+        // Test truncated JSON with unclosed string, object, and array
+        let truncated_json = r#"{"key": "unclosed_string","nested": {"items": [1, 2, 3"#;
+        let result = safely_parse_json(truncated_json).unwrap();
+        assert_eq!(result["key"], "unclosed_string");
+        assert_eq!(result["nested"]["items"], json!([1, 2, 3]));
+
+        // Test dangling backslash at end of a truncated string
+        let dangling_escape_json = String::from(r#"{"path":"abc\"#);
+        let result = safely_parse_json(&dangling_escape_json).unwrap();
+        assert_eq!(result["path"], "abc\\");

        // Test empty object
        let empty_json = "{}";
@@ -94,14 +94,24 @@ export function getTextAndImageContent(message: Message): {
    }
  }

-  // Strip <think> tags from assistant text — the thinking is surfaced via getThinkingContent
+  // Strip assistant-only markup that shouldn't appear in rendered text
  if (message.role === 'assistant') {
+    textContent = stripToolCallMarkers(textContent);
    textContent = textContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
  }

  return { textContent, imagePaths };
 }

+function stripToolCallMarkers(text: string): string {
+  // Remove all tool call XML markers and their content
+  return text
+    .replace(/<\|tool_calls_section_begin\|>[\s\S]*?<\|tool_calls_section_end\|>/g, '')
+    .replace(/<\|tool_call_begin\|>[\s\S]*?<\|tool_call_end\|>/g, '')
+    .replace(/<\|tool_call_argument_begin\|>[\s\S]*?<\|tool_call_argument_end\|>/g, '')
+    .trim();
+}
+
 export function getThinkingContent(message: Message): string | null {
  const parts: string[] = [];