mirror of
https://github.com/aaif-goose/goose.git
synced 2026-06-01 22:09:18 +02:00
fix: prevent tool-use marker leakage in toolshim output (#8310)
Signed-off-by: Eugenio La Cava <eugeniolcv@gmail.com> Signed-off-by: Michael Neale <michael.neale@gmail.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Michael Neale <michael.neale@gmail.com>
This commit is contained in:
@@ -64,6 +64,12 @@ do_not_version/
|
|||||||
|
|
||||||
/working_dir
|
/working_dir
|
||||||
|
|
||||||
|
# Local build scripts and generated snapshot artifacts
|
||||||
|
/build.bat
|
||||||
|
/build.ps1
|
||||||
|
/build_check.ps1
|
||||||
|
/crates/goose/src/agents/snapshots/*.snap.new
|
||||||
|
|
||||||
# Error log artifacts from mcp replay tests
|
# Error log artifacts from mcp replay tests
|
||||||
crates/goose/tests/mcp_replays/*errors.txt
|
crates/goose/tests/mcp_replays/*errors.txt
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use goose_cli::cli::cli;
|
use goose_cli::cli::cli;
|
||||||
|
|
||||||
#[tokio::main]
|
async fn run() -> Result<()> {
|
||||||
async fn main() -> Result<()> {
|
|
||||||
if let Err(e) = goose_cli::logging::setup_logging(None) {
|
if let Err(e) = goose_cli::logging::setup_logging(None) {
|
||||||
eprintln!("Warning: Failed to initialize logging: {}", e);
|
eprintln!("Warning: Failed to initialize logging: {}", e);
|
||||||
}
|
}
|
||||||
@@ -17,3 +16,21 @@ async fn main() -> Result<()> {
|
|||||||
|
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
let handle = std::thread::Builder::new()
|
||||||
|
.name("goose-cli-main".to_string())
|
||||||
|
.stack_size(8 * 1024 * 1024)
|
||||||
|
.spawn(|| {
|
||||||
|
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||||
|
.enable_all()
|
||||||
|
.build()
|
||||||
|
.expect("Failed to build Tokio runtime");
|
||||||
|
runtime.block_on(run())
|
||||||
|
})
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to spawn goose-cli main thread: {}", e))?;
|
||||||
|
|
||||||
|
handle
|
||||||
|
.join()
|
||||||
|
.map_err(|_| anyhow::anyhow!("goose-cli main thread panicked"))?
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,10 +17,11 @@ use crate::providers::base::stream_from_single_message;
|
|||||||
use crate::providers::base::{MessageStream, Provider, ProviderUsage};
|
use crate::providers::base::{MessageStream, Provider, ProviderUsage};
|
||||||
use crate::providers::errors::ProviderError;
|
use crate::providers::errors::ProviderError;
|
||||||
use crate::providers::toolshim::{
|
use crate::providers::toolshim::{
|
||||||
augment_message_with_tool_calls, convert_tool_messages_to_text,
|
augment_message_with_selected_tool_interpreter, convert_tool_messages_to_text,
|
||||||
modify_system_prompt_for_tool_json, OllamaInterpreter,
|
modify_system_prompt_for_tool_json, sanitize_residual_markers,
|
||||||
};
|
};
|
||||||
use rmcp::model::Tool;
|
use rmcp::model::Tool;
|
||||||
|
use tracing::warn;
|
||||||
|
|
||||||
async fn enhance_model_error(error: ProviderError, provider: &Arc<dyn Provider>) -> ProviderError {
|
async fn enhance_model_error(error: ProviderError, provider: &Arc<dyn Provider>) -> ProviderError {
|
||||||
let ProviderError::RequestFailed(ref msg) = error else {
|
let ProviderError::RequestFailed(ref msg) = error else {
|
||||||
@@ -123,13 +124,16 @@ async fn toolshim_postprocess(
|
|||||||
response: Message,
|
response: Message,
|
||||||
toolshim_tools: &[Tool],
|
toolshim_tools: &[Tool],
|
||||||
) -> Result<Message, ProviderError> {
|
) -> Result<Message, ProviderError> {
|
||||||
let interpreter = OllamaInterpreter::new().map_err(|e| {
|
match augment_message_with_selected_tool_interpreter(response.clone(), toolshim_tools).await {
|
||||||
ProviderError::ExecutionError(format!("Failed to create OllamaInterpreter: {}", e))
|
Ok(message) => Ok(message),
|
||||||
})?;
|
Err(e) => {
|
||||||
|
warn!(
|
||||||
augment_message_with_tool_calls(&interpreter, response, toolshim_tools)
|
"Toolshim augmentation failed, skipping tool augmentation: {}",
|
||||||
.await
|
e
|
||||||
.map_err(|e| ProviderError::ExecutionError(format!("Failed to augment message: {}", e)))
|
);
|
||||||
|
Ok(sanitize_residual_markers(response))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Agent {
|
impl Agent {
|
||||||
@@ -302,20 +306,67 @@ impl Agent {
|
|||||||
};
|
};
|
||||||
|
|
||||||
Ok(Box::pin(try_stream! {
|
Ok(Box::pin(try_stream! {
|
||||||
while let Some(result) = stream.next().await {
|
if config.toolshim {
|
||||||
let (mut message, usage) = result?;
|
// Toolshim mode: accumulate the full response before processing
|
||||||
|
// so that tool-use markers spanning multiple chunks are detected
|
||||||
|
// and stripped before any output reaches the UI.
|
||||||
|
let mut accumulated_message: Option<Message> = None;
|
||||||
|
let mut final_usage: Option<ProviderUsage> = None;
|
||||||
|
|
||||||
// Store the model information in the global store
|
while let Some(result) = stream.next().await {
|
||||||
if let Some(usage) = usage.as_ref() {
|
let (msg_opt, usage_opt) = result?;
|
||||||
crate::providers::base::set_current_model(&usage.model);
|
|
||||||
|
if let Some(usage) = usage_opt.as_ref() {
|
||||||
|
crate::providers::base::set_current_model(&usage.model);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(msg) = msg_opt {
|
||||||
|
accumulated_message = Some(match accumulated_message {
|
||||||
|
Some(mut prev) => {
|
||||||
|
for new_content in msg.content {
|
||||||
|
match (&mut prev.content.last_mut(), &new_content) {
|
||||||
|
(
|
||||||
|
Some(MessageContent::Text(last_text)),
|
||||||
|
MessageContent::Text(new_text),
|
||||||
|
) => {
|
||||||
|
last_text.text.push_str(&new_text.text);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
prev.content.push(new_content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prev
|
||||||
|
}
|
||||||
|
None => msg,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(usage) = usage_opt {
|
||||||
|
final_usage = Some(usage);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Yield empty item so the agent loop can check cancellation
|
||||||
|
yield (None, None);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Post-process / structure the response only if tool interpretation is enabled
|
if let Some(msg) = accumulated_message {
|
||||||
if message.is_some() && config.toolshim {
|
let processed = toolshim_postprocess(msg, &toolshim_tools).await?;
|
||||||
message = Some(toolshim_postprocess(message.unwrap(), &toolshim_tools).await?);
|
yield (Some(processed), final_usage);
|
||||||
|
} else if final_usage.is_some() {
|
||||||
|
// Preserve usage-only responses (no message content)
|
||||||
|
yield (None, final_usage);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
while let Some(result) = stream.next().await {
|
||||||
|
let (message, usage) = result?;
|
||||||
|
|
||||||
yield (message, usage);
|
if let Some(usage) = usage.as_ref() {
|
||||||
|
crate::providers::base::set_current_model(&usage.model);
|
||||||
|
}
|
||||||
|
|
||||||
|
yield (message, usage);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -479,7 +479,7 @@ impl Drop for RequestLog {
|
|||||||
|
|
||||||
/// Safely parse a JSON string that may contain doubly-encoded or malformed JSON.
|
/// Safely parse a JSON string that may contain doubly-encoded or malformed JSON.
|
||||||
/// This function first attempts to parse the input string as-is. If that fails,
|
/// This function first attempts to parse the input string as-is. If that fails,
|
||||||
/// it applies control character escaping and tries again.
|
/// it applies control character escaping and truncated JSON repair and tries again.
|
||||||
///
|
///
|
||||||
/// This approach preserves valid JSON like `{"key1": "value1",\n"key2": "value"}`
|
/// This approach preserves valid JSON like `{"key1": "value1",\n"key2": "value"}`
|
||||||
/// (which contains a literal \n but is perfectly valid JSON) while still fixing
|
/// (which contains a literal \n but is perfectly valid JSON) while still fixing
|
||||||
@@ -490,13 +490,71 @@ pub fn safely_parse_json(s: &str) -> Result<serde_json::Value, serde_json::Error
|
|||||||
match serde_json::from_str(s) {
|
match serde_json::from_str(s) {
|
||||||
Ok(value) => Ok(value),
|
Ok(value) => Ok(value),
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
// If that fails, try with control character escaping
|
for candidate in [
|
||||||
let escaped = json_escape_control_chars_in_string(s);
|
repair_truncated_json(s),
|
||||||
serde_json::from_str(&escaped)
|
json_escape_control_chars_in_string(s),
|
||||||
|
] {
|
||||||
|
if let Ok(value) = serde_json::from_str(&candidate) {
|
||||||
|
return Ok(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let repaired = repair_truncated_json(&json_escape_control_chars_in_string(s));
|
||||||
|
serde_json::from_str(&repaired)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn repair_truncated_json(s: &str) -> String {
|
||||||
|
let mut repaired = String::with_capacity(s.len() + 8);
|
||||||
|
let mut in_string = false;
|
||||||
|
let mut escape_next = false;
|
||||||
|
let mut closers = Vec::new();
|
||||||
|
|
||||||
|
for c in s.chars() {
|
||||||
|
repaired.push(c);
|
||||||
|
|
||||||
|
if in_string {
|
||||||
|
if escape_next {
|
||||||
|
escape_next = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match c {
|
||||||
|
'\\' => escape_next = true,
|
||||||
|
'"' => in_string = false,
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match c {
|
||||||
|
'"' => in_string = true,
|
||||||
|
'{' => closers.push('}'),
|
||||||
|
'[' => closers.push(']'),
|
||||||
|
'}' | ']' => {
|
||||||
|
if closers.last() == Some(&c) {
|
||||||
|
closers.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if in_string {
|
||||||
|
if escape_next {
|
||||||
|
repaired.push('\\');
|
||||||
|
}
|
||||||
|
repaired.push('"');
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(closer) = closers.pop() {
|
||||||
|
repaired.push(closer);
|
||||||
|
}
|
||||||
|
|
||||||
|
repaired
|
||||||
|
}
|
||||||
|
|
||||||
/// Helper to escape control characters in a string that is supposed to be a JSON document.
|
/// Helper to escape control characters in a string that is supposed to be a JSON document.
|
||||||
/// This function iterates through the input string `s` and replaces any literal
|
/// This function iterates through the input string `s` and replaces any literal
|
||||||
/// control characters (U+0000 to U+001F) with their JSON-escaped equivalents
|
/// control characters (U+0000 to U+001F) with their JSON-escaped equivalents
|
||||||
@@ -809,9 +867,16 @@ mod tests {
|
|||||||
let result = safely_parse_json(good_json).unwrap();
|
let result = safely_parse_json(good_json).unwrap();
|
||||||
assert_eq!(result["test"], "value");
|
assert_eq!(result["test"], "value");
|
||||||
|
|
||||||
// Test completely invalid JSON that can't be fixed
|
// Test truncated JSON with unclosed string, object, and array
|
||||||
let broken_json = r#"{"key": "unclosed_string"#;
|
let truncated_json = r#"{"key": "unclosed_string","nested": {"items": [1, 2, 3"#;
|
||||||
assert!(safely_parse_json(broken_json).is_err());
|
let result = safely_parse_json(truncated_json).unwrap();
|
||||||
|
assert_eq!(result["key"], "unclosed_string");
|
||||||
|
assert_eq!(result["nested"]["items"], json!([1, 2, 3]));
|
||||||
|
|
||||||
|
// Test dangling backslash at end of a truncated string
|
||||||
|
let dangling_escape_json = String::from(r#"{"path":"abc\"#);
|
||||||
|
let result = safely_parse_json(&dangling_escape_json).unwrap();
|
||||||
|
assert_eq!(result["path"], "abc\\");
|
||||||
|
|
||||||
// Test empty object
|
// Test empty object
|
||||||
let empty_json = "{}";
|
let empty_json = "{}";
|
||||||
|
|||||||
@@ -94,14 +94,24 @@ export function getTextAndImageContent(message: Message): {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Strip <think> tags from assistant text — the thinking is surfaced via getThinkingContent
|
// Strip assistant-only markup that shouldn't appear in rendered text
|
||||||
if (message.role === 'assistant') {
|
if (message.role === 'assistant') {
|
||||||
|
textContent = stripToolCallMarkers(textContent);
|
||||||
textContent = textContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
textContent = textContent.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
||||||
}
|
}
|
||||||
|
|
||||||
return { textContent, imagePaths };
|
return { textContent, imagePaths };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function stripToolCallMarkers(text: string): string {
|
||||||
|
// Remove all tool call XML markers and their content
|
||||||
|
return text
|
||||||
|
.replace(/<\|tool_calls_section_begin\|>[\s\S]*?<\|tool_calls_section_end\|>/g, '')
|
||||||
|
.replace(/<\|tool_call_begin\|>[\s\S]*?<\|tool_call_end\|>/g, '')
|
||||||
|
.replace(/<\|tool_call_argument_begin\|>[\s\S]*?<\|tool_call_argument_end\|>/g, '')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
export function getThinkingContent(message: Message): string | null {
|
export function getThinkingContent(message: Message): string | null {
|
||||||
const parts: string[] = [];
|
const parts: string[] = [];
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user