Add the importers too

This commit is contained in:
Douwe Osinga
2026-05-28 17:18:37 -04:00
parent b76e555c22
commit 1426fc0f6b
5 changed files with 1328 additions and 3 deletions
@@ -0,0 +1,382 @@
//! Converter for Claude Code `.jsonl` transcript files.
//!
//! Claude Code stores each session as a JSON-Lines file under
//! `~/.claude/projects/<encoded-cwd>/<session-id>.jsonl`. Every line is a typed
//! event; the ones we care about are `user`, `assistant`, and `ai-title`.
//! Most other lines (attachments, queue operations, internal hooks) are
//! transcript noise and are skipped.
use anyhow::{anyhow, Result};
use chrono::{DateTime, Utc};
use rmcp::model::{CallToolRequestParams, CallToolResult, Content, ErrorCode, ErrorData};
use serde_json::{json, Map, Value};
use crate::conversation::message::Message;
use crate::conversation::Conversation;
pub fn convert(content: &str) -> Result<String> {
let lines: Vec<Value> = content
.lines()
.filter(|l| !l.trim().is_empty())
.filter_map(|l| serde_json::from_str::<Value>(l).ok())
.collect();
if lines.is_empty() {
return Err(anyhow!("Claude Code import: no parseable JSON lines"));
}
let cwd = lines
.iter()
.find_map(|l| l.get("cwd").and_then(|v| v.as_str()))
.unwrap_or("")
.to_string();
let session_id = lines
.iter()
.find_map(|l| l.get("sessionId").and_then(|v| v.as_str()))
.unwrap_or("imported")
.to_string();
let ai_title = lines.iter().find_map(|l| {
if l.get("type").and_then(|v| v.as_str()) == Some("ai-title") {
l.get("aiTitle")
.and_then(|v| v.as_str())
.map(str::to_string)
} else {
None
}
});
let mut messages: Vec<Message> = Vec::new();
let mut total_input: i64 = 0;
let mut total_output: i64 = 0;
let mut first_ts: Option<DateTime<Utc>> = None;
let mut last_ts: Option<DateTime<Utc>> = None;
let mut first_user_text: Option<String> = None;
for line in &lines {
let line_type = line.get("type").and_then(|v| v.as_str()).unwrap_or("");
let timestamp = line
.get("timestamp")
.and_then(|v| v.as_str())
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
.map(|dt| dt.with_timezone(&Utc));
if let Some(ts) = timestamp {
first_ts.get_or_insert(ts);
last_ts = Some(ts);
}
match line_type {
"user" => {
if let Some(msg) = convert_user_message(line, timestamp) {
if first_user_text.is_none() {
first_user_text = extract_first_text(&msg);
}
messages.push(msg);
}
}
"assistant" => {
if let Some(msg) = convert_assistant_message(line, timestamp) {
if let Some(usage) = line
.get("message")
.and_then(|m| m.get("usage"))
.and_then(|u| u.as_object())
{
total_input += usage
.get("input_tokens")
.and_then(|v| v.as_i64())
.unwrap_or(0);
total_input += usage
.get("cache_creation_input_tokens")
.and_then(|v| v.as_i64())
.unwrap_or(0);
total_input += usage
.get("cache_read_input_tokens")
.and_then(|v| v.as_i64())
.unwrap_or(0);
total_output += usage
.get("output_tokens")
.and_then(|v| v.as_i64())
.unwrap_or(0);
}
messages.push(msg);
}
}
_ => {} // attachments, ai-title, queue-operation, etc.
}
}
let name = ai_title
.or_else(|| first_user_text.as_deref().map(super::summarize_first_line))
.unwrap_or_else(|| format!("Imported Claude Code session {}", session_id));
let working_dir = if cwd.is_empty() {
std::env::current_dir()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|_| ".".to_string())
} else {
cwd
};
let created_at = first_ts.unwrap_or_else(Utc::now);
let updated_at = last_ts.unwrap_or(created_at);
let conversation = Conversation::new_unvalidated(messages);
let session_json = build_session_json(
&session_id,
&working_dir,
&name,
created_at,
updated_at,
Some(total_input as i32),
Some(total_output as i32),
conversation,
);
serde_json::to_string_pretty(&session_json).map_err(Into::into)
}
fn convert_user_message(line: &Value, timestamp: Option<DateTime<Utc>>) -> Option<Message> {
let content = line.get("message")?.get("content")?;
let created = timestamp
.map(|t| t.timestamp())
.unwrap_or_else(|| Utc::now().timestamp());
// Tool results in Claude Code live inside `user` messages with role=user
// and content blocks of type=tool_result. Goose models tool responses the
// same way (on a user-role message), so this maps cleanly.
let mut msg = Message::user();
msg.created = created;
match content {
Value::String(s) => {
msg = msg.with_text(s.clone());
}
Value::Array(blocks) => {
for block in blocks {
let bt = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
match bt {
"text" => {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
msg = msg.with_text(t);
}
}
"tool_result" => {
let id = block
.get("tool_use_id")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let is_error = block
.get("is_error")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let result = build_tool_result(block.get("content"), is_error);
msg = msg.with_tool_response(id, result);
}
"image" => {
if let (Some(data), Some(mime)) = (
block
.get("source")
.and_then(|s| s.get("data"))
.and_then(|v| v.as_str()),
block
.get("source")
.and_then(|s| s.get("media_type"))
.and_then(|v| v.as_str()),
) {
msg = msg.with_image(data, mime);
}
}
_ => {}
}
}
}
_ => {}
}
if msg.content.is_empty() {
return None;
}
Some(msg)
}
fn convert_assistant_message(line: &Value, timestamp: Option<DateTime<Utc>>) -> Option<Message> {
let content = line.get("message")?.get("content")?.as_array()?;
let created = timestamp
.map(|t| t.timestamp())
.unwrap_or_else(|| Utc::now().timestamp());
let mut msg = Message::assistant();
msg.created = created;
for block in content {
let bt = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
match bt {
"text" => {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
if !t.is_empty() {
msg = msg.with_text(t);
}
}
}
"thinking" => {
let t = block.get("thinking").and_then(|v| v.as_str()).unwrap_or("");
let sig = block
.get("signature")
.and_then(|v| v.as_str())
.unwrap_or("");
if !t.is_empty() {
msg = msg.with_thinking(t, sig);
}
}
"tool_use" => {
let id = block
.get("id")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let name = block
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("unknown_tool");
let args = block
.get("input")
.and_then(|v| v.as_object())
.cloned()
.unwrap_or_default();
let params = CallToolRequestParams::new(name.to_string()).with_arguments(args);
msg = msg.with_tool_request(id, Ok(params));
}
_ => {}
}
}
if msg.content.is_empty() {
return None;
}
Some(msg)
}
fn build_tool_result(content: Option<&Value>, is_error: bool) -> Result<CallToolResult, ErrorData> {
let text = match content {
Some(Value::String(s)) => s.clone(),
Some(Value::Array(blocks)) => blocks
.iter()
.filter_map(|b| {
let bt = b.get("type").and_then(|v| v.as_str()).unwrap_or("");
match bt {
"text" => b.get("text").and_then(|v| v.as_str()).map(str::to_string),
"tool_reference" => b
.get("tool_name")
.and_then(|v| v.as_str())
.map(|n| format!("[tool_reference: {}]", n)),
_ => Some(serde_json::to_string(b).unwrap_or_default()),
}
})
.collect::<Vec<_>>()
.join("\n"),
Some(other) => other.to_string(),
None => String::new(),
};
if is_error {
Err(ErrorData::new(ErrorCode::INTERNAL_ERROR, text, None))
} else {
Ok(CallToolResult::success(vec![Content::text(text)]))
}
}
fn extract_first_text(msg: &Message) -> Option<String> {
use crate::conversation::message::MessageContent;
for c in &msg.content {
if let MessageContent::Text(t) = c {
return Some(t.text.clone());
}
}
None
}
#[allow(clippy::too_many_arguments)]
fn build_session_json(
session_id: &str,
working_dir: &str,
name: &str,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
input_tokens: Option<i32>,
output_tokens: Option<i32>,
conversation: Conversation,
) -> Value {
let total = match (input_tokens, output_tokens) {
(Some(a), Some(b)) => Some(a + b),
_ => None,
};
let mut obj = Map::new();
obj.insert("id".into(), json!(session_id));
obj.insert("working_dir".into(), json!(working_dir));
obj.insert("name".into(), json!(name));
obj.insert("user_set_name".into(), json!(false));
obj.insert("session_type".into(), json!("user"));
obj.insert("created_at".into(), json!(created_at.to_rfc3339()));
obj.insert("updated_at".into(), json!(updated_at.to_rfc3339()));
obj.insert("extension_data".into(), json!({}));
obj.insert("total_tokens".into(), json!(total));
obj.insert("input_tokens".into(), json!(input_tokens));
obj.insert("output_tokens".into(), json!(output_tokens));
obj.insert("accumulated_total_tokens".into(), json!(total));
obj.insert("accumulated_input_tokens".into(), json!(input_tokens));
obj.insert("accumulated_output_tokens".into(), json!(output_tokens));
obj.insert("accumulated_cost".into(), json!(null));
obj.insert("schedule_id".into(), json!(null));
obj.insert("recipe".into(), json!(null));
obj.insert("user_recipe_values".into(), json!(null));
obj.insert(
"conversation".into(),
serde_json::to_value(&conversation).unwrap(),
);
obj.insert("message_count".into(), json!(conversation.messages().len()));
obj.insert("provider_name".into(), json!(null));
obj.insert("model_config".into(), json!(null));
obj.insert("goose_mode".into(), json!("auto"));
obj.insert("archived_at".into(), json!(null));
obj.insert("project_id".into(), json!(null));
Value::Object(obj)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn converts_tool_use_and_result() {
let jsonl = r#"{"type":"user","sessionId":"s","uuid":"u1","timestamp":"2026-01-01T00:00:00.000Z","cwd":"/tmp","message":{"role":"user","content":"do it"}}
{"type":"assistant","sessionId":"s","uuid":"u2","timestamp":"2026-01-01T00:00:01.000Z","cwd":"/tmp","message":{"role":"assistant","content":[{"type":"tool_use","id":"toolu_1","name":"bash","input":{"command":"ls"}}]}}
{"type":"user","sessionId":"s","uuid":"u3","timestamp":"2026-01-01T00:00:02.000Z","cwd":"/tmp","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_1","content":[{"type":"text","text":"file.txt"}]}]}}"#;
let json = convert(jsonl).unwrap();
let v: Value = serde_json::from_str(&json).unwrap();
let msgs = v["conversation"].as_array().unwrap();
assert_eq!(msgs.len(), 3);
// assistant message should contain a toolRequest
let assistant = &msgs[1];
let content = assistant["content"].as_array().unwrap();
assert!(content.iter().any(|c| c["type"] == "toolRequest"));
// user response should contain a toolResponse
let resp = &msgs[2];
let content = resp["content"].as_array().unwrap();
assert!(content.iter().any(|c| c["type"] == "toolResponse"));
}
#[test]
fn skips_unknown_lines() {
let jsonl = r#"{"type":"attachment","sessionId":"s","uuid":"u0","timestamp":"2026-01-01T00:00:00Z"}
{"type":"queue-operation","sessionId":"s","timestamp":"2026-01-01T00:00:00Z"}
{"type":"user","sessionId":"s","uuid":"u1","timestamp":"2026-01-01T00:00:01Z","cwd":"/tmp","message":{"role":"user","content":"hi"}}"#;
let json = convert(jsonl).unwrap();
let v: Value = serde_json::from_str(&json).unwrap();
assert_eq!(v["message_count"], 1);
}
}
@@ -0,0 +1,401 @@
//! Converter for Codex (OpenAI) `.jsonl` rollout files.
//!
//! Codex stores sessions under `~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl`.
//! Each line is `{"timestamp":..,"type":..,"payload":{..}}` with these
//! top-level `type`s:
//!
//! - `session_meta` — header (cwd, id, model, instructions, …)
//! - `response_item` — the real conversation: `message`, `reasoning`,
//! `function_call`, `function_call_output`, `web_search_call`, …
//! - `event_msg` — UI events (`task_started`, `agent_message`, `web_search_end`).
//! Redundant with `response_item`; skipped except to harvest token usage.
//! - `turn_context`, `compacted`, … — metadata, skipped.
//!
//! Assistant-side `response_item` payloads (`message` with `role:"assistant"`,
//! `reasoning`, `function_call`) reuse the existing OpenAI Responses API
//! types from `providers::formats::openai_responses` — so we get argument
//! parsing, reasoning summary handling, and schema validation for free.
//! User-side items (`message` with `role:"user"`, `function_call_output`,
//! `web_search_call`) are rollout-specific and handled locally.
use anyhow::{anyhow, Result};
use chrono::{DateTime, Utc};
use rmcp::model::{CallToolRequestParams, CallToolResult, Content};
use serde_json::{json, Map, Value};
use crate::conversation::message::Message;
use crate::conversation::Conversation;
use crate::providers::formats::openai_responses::{ResponseOutputItem, ResponsesApiResponse};
pub fn convert(content: &str) -> Result<String> {
let lines: Vec<Value> = content
.lines()
.filter(|l| !l.trim().is_empty())
.filter_map(|l| serde_json::from_str::<Value>(l).ok())
.collect();
if lines.is_empty() {
return Err(anyhow!("Codex import: no parseable JSON lines"));
}
let meta = lines
.iter()
.find(|v| v.get("type").and_then(|t| t.as_str()) == Some("session_meta"))
.and_then(|v| v.get("payload"));
let cwd = meta
.and_then(|m| m.get("cwd"))
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let session_id = meta
.and_then(|m| m.get("id"))
.and_then(|v| v.as_str())
.unwrap_or("imported")
.to_string();
let mut messages: Vec<Message> = Vec::new();
let mut first_ts: Option<DateTime<Utc>> = None;
let mut last_ts: Option<DateTime<Utc>> = None;
let mut first_user_text: Option<String> = None;
let mut total_input: i64 = 0;
let mut total_output: i64 = 0;
for line in &lines {
let line_type = line.get("type").and_then(|v| v.as_str()).unwrap_or("");
let timestamp = line
.get("timestamp")
.and_then(|v| v.as_str())
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
.map(|dt| dt.with_timezone(&Utc));
if let Some(ts) = timestamp {
first_ts.get_or_insert(ts);
last_ts = Some(ts);
}
if line_type == "event_msg" {
if let Some(usage) = line
.get("payload")
.and_then(|p| p.get("usage"))
.and_then(|u| u.as_object())
{
total_input += usage
.get("input_tokens")
.and_then(|v| v.as_i64())
.unwrap_or(0);
total_output += usage
.get("output_tokens")
.and_then(|v| v.as_i64())
.unwrap_or(0);
}
continue;
}
if line_type != "response_item" {
continue;
}
let Some(payload) = line.get("payload") else {
continue;
};
let pt = payload.get("type").and_then(|v| v.as_str()).unwrap_or("");
let created = timestamp
.map(|t| t.timestamp())
.unwrap_or_else(|| Utc::now().timestamp());
// First try the provider-defined Responses API types. These cover
// assistant-side output items: `message` (role=assistant),
// `reasoning`, and `function_call`. Unknown variants and user-side
// items will fail to deserialize and fall through.
if let Some(role) = payload.get("role").and_then(|v| v.as_str()) {
if role == "developer" || role == "system" {
continue; // harness-injected prompts, skip
}
if role == "user" {
let text = collect_user_text(payload.get("content"));
if !text.trim().is_empty() {
if first_user_text.is_none() && !is_context_blob(&text) {
first_user_text = Some(text.clone());
}
let mut msg = Message::user();
msg.created = created;
msg = msg.with_text(text);
messages.push(msg);
}
continue;
}
}
if let Ok(item) = serde_json::from_value::<ResponseOutputItem>(payload.clone()) {
// Wrap the single item in a stub `ResponsesApiResponse` so we can
// reuse the existing decoder without duplicating its logic.
let stub = ResponsesApiResponse {
id: session_id.clone(),
object: "response".to_string(),
created_at: created,
status: "completed".to_string(),
model: String::new(),
output: vec![item],
reasoning: None,
usage: None,
};
if let Ok(decoded) =
crate::providers::formats::openai_responses::responses_api_to_message(&stub)
{
if !decoded.content.is_empty() {
let mut msg = Message::assistant();
msg.created = created;
for c in decoded.content {
msg.content.push(c);
}
messages.push(msg);
continue;
}
}
}
// Items the provider doesn't model: function_call_output,
// web_search_call.
match pt {
"function_call_output" => {
let call_id = payload
.get("call_id")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let output = payload
.get("output")
.and_then(|v| v.as_str())
.map(str::to_string)
.unwrap_or_default();
let mut msg = Message::user();
msg.created = created;
msg = msg.with_tool_response(
call_id,
Ok(CallToolResult::success(vec![Content::text(output)])),
);
messages.push(msg);
}
"web_search_call" => {
let action = payload.get("action");
let query = action
.and_then(|a| a.get("query"))
.and_then(|v| v.as_str())
.unwrap_or("");
let url = action
.and_then(|a| a.get("url"))
.and_then(|v| v.as_str())
.unwrap_or("");
let mut args = Map::new();
if !query.is_empty() {
args.insert("query".into(), json!(query));
}
if !url.is_empty() {
args.insert("url".into(), json!(url));
}
let id = format!("codex_websearch_{}", created);
let params =
CallToolRequestParams::new("web_search".to_string()).with_arguments(args);
let mut req = Message::assistant();
req.created = created;
req = req.with_tool_request(id.clone(), Ok(params));
messages.push(req);
let status = payload
.get("status")
.and_then(|v| v.as_str())
.unwrap_or("completed");
let mut resp = Message::user();
resp.created = created;
resp = resp.with_tool_response(
id,
Ok(CallToolResult::success(vec![Content::text(format!(
"[web_search {}]",
status
))])),
);
messages.push(resp);
}
_ => {}
}
}
messages.retain(|m| !m.content.is_empty());
let working_dir = if cwd.is_empty() {
std::env::current_dir()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|_| ".".to_string())
} else {
cwd
};
let name = first_user_text
.as_deref()
.map(super::summarize_first_line)
.unwrap_or_else(|| format!("Imported Codex session {}", session_id));
let created_at = first_ts.unwrap_or_else(Utc::now);
let updated_at = last_ts.unwrap_or(created_at);
let conversation = Conversation::new_unvalidated(messages);
let session_json = build_session_json(
&session_id,
&working_dir,
&name,
created_at,
updated_at,
if total_input > 0 {
Some(total_input as i32)
} else {
None
},
if total_output > 0 {
Some(total_output as i32)
} else {
None
},
conversation,
);
serde_json::to_string_pretty(&session_json).map_err(Into::into)
}
fn collect_user_text(content: Option<&Value>) -> String {
let Some(Value::Array(blocks)) = content else {
return content
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
};
let mut parts = Vec::new();
for block in blocks {
let bt = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
if matches!(bt, "input_text" | "text" | "output_text") {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
parts.push(t.to_string());
}
}
}
parts.join("\n")
}
/// Heuristic: Codex's first "user" message is often a giant
/// `<environment_context>` / AGENTS.md blob injected by the harness rather than
/// the user's actual prompt. We still preserve it in the transcript, but it's
/// a bad source for the session name.
fn is_context_blob(text: &str) -> bool {
let t = text.trim_start();
t.starts_with("<environment_context>")
|| t.starts_with("<app-context>")
|| t.starts_with("<permissions instructions>")
|| t.starts_with("# AGENTS.md")
}
#[allow(clippy::too_many_arguments)]
fn build_session_json(
session_id: &str,
working_dir: &str,
name: &str,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
input_tokens: Option<i32>,
output_tokens: Option<i32>,
conversation: Conversation,
) -> Value {
let total = match (input_tokens, output_tokens) {
(Some(a), Some(b)) => Some(a + b),
_ => None,
};
let mut obj = Map::new();
obj.insert("id".into(), json!(session_id));
obj.insert("working_dir".into(), json!(working_dir));
obj.insert("name".into(), json!(name));
obj.insert("user_set_name".into(), json!(false));
obj.insert("session_type".into(), json!("user"));
obj.insert("created_at".into(), json!(created_at.to_rfc3339()));
obj.insert("updated_at".into(), json!(updated_at.to_rfc3339()));
obj.insert("extension_data".into(), json!({}));
obj.insert("total_tokens".into(), json!(total));
obj.insert("input_tokens".into(), json!(input_tokens));
obj.insert("output_tokens".into(), json!(output_tokens));
obj.insert("accumulated_total_tokens".into(), json!(total));
obj.insert("accumulated_input_tokens".into(), json!(input_tokens));
obj.insert("accumulated_output_tokens".into(), json!(output_tokens));
obj.insert("accumulated_cost".into(), json!(null));
obj.insert("schedule_id".into(), json!(null));
obj.insert("recipe".into(), json!(null));
obj.insert("user_recipe_values".into(), json!(null));
obj.insert(
"conversation".into(),
serde_json::to_value(&conversation).unwrap(),
);
obj.insert(
"message_count".into(),
json!(conversation.messages().len()),
);
obj.insert("provider_name".into(), json!(null));
obj.insert("model_config".into(), json!(null));
obj.insert("goose_mode".into(), json!("auto"));
obj.insert("archived_at".into(), json!(null));
obj.insert("project_id".into(), json!(null));
Value::Object(obj)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn skips_developer_and_system_messages() {
let jsonl = r#"{"timestamp":"2026-05-22T13:37:22.526Z","type":"session_meta","payload":{"id":"abc","cwd":"/tmp"}}
{"timestamp":"2026-05-22T13:37:23.000Z","type":"response_item","payload":{"type":"message","role":"developer","content":[{"type":"input_text","text":"<huge system prompt>"}]}}
{"timestamp":"2026-05-22T13:37:23.946Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"the real question"}]}}"#;
let json = convert(jsonl).unwrap();
let v: Value = serde_json::from_str(&json).unwrap();
assert_eq!(v["message_count"], 1);
assert_eq!(v["name"], "the real question");
}
#[test]
fn converts_function_call_and_output() {
let jsonl = r#"{"timestamp":"2026-05-22T13:37:22Z","type":"session_meta","payload":{"id":"s","cwd":"/w"}}
{"timestamp":"2026-05-22T13:37:23Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"run ls"}]}}
{"timestamp":"2026-05-22T13:37:24Z","type":"response_item","payload":{"type":"function_call","name":"exec_command","arguments":"{\"cmd\":\"ls\"}","call_id":"call_1"}}
{"timestamp":"2026-05-22T13:37:25Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_1","output":"file.txt\n"}}"#;
let json = convert(jsonl).unwrap();
let v: Value = serde_json::from_str(&json).unwrap();
let msgs = v["conversation"].as_array().unwrap();
assert_eq!(msgs.len(), 3);
// assistant message with a tool request, decoded via the provider
// crate so arguments-as-JSON-string is parsed automatically
let req_block = msgs[1]["content"]
.as_array()
.unwrap()
.iter()
.find(|c| c["type"] == "toolRequest")
.expect("expected a toolRequest");
assert_eq!(req_block["toolCall"]["status"], "success");
assert_eq!(req_block["toolCall"]["value"]["arguments"]["cmd"], "ls");
// user message with the tool response
assert!(msgs[2]["content"]
.as_array()
.unwrap()
.iter()
.any(|c| c["type"] == "toolResponse"));
}
#[test]
fn first_user_text_skips_context_blobs() {
let jsonl = r#"{"timestamp":"2026-05-22T13:37:22Z","type":"session_meta","payload":{"id":"s","cwd":"/w"}}
{"timestamp":"2026-05-22T13:37:23Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"<environment_context>\n <cwd>/w</cwd>\n</environment_context>"}]}}
{"timestamp":"2026-05-22T13:37:24Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"actual prompt"}]}}"#;
let json = convert(jsonl).unwrap();
let v: Value = serde_json::from_str(&json).unwrap();
assert_eq!(v["name"], "actual prompt");
assert_eq!(v["message_count"], 2);
}
}
@@ -0,0 +1,122 @@
//! Importers for non-goose session formats.
//!
//! Goose's native session export is a JSON-serialized [`crate::session::Session`].
//! These submodules let users also import sessions exported by other coding
//! agents — currently:
//!
//! - **Claude Code** (`.jsonl` files under `~/.claude/projects/...`)
//! - **Codex** (`.jsonl` rollouts under `~/.codex/sessions/YYYY/MM/DD/...`)
//! - **Pi** (`.jsonl` files under `~/.pi/agent/sessions/...`)
//!
//! The strategy is to convert any supported foreign format into goose's
//! native [`Session`] JSON, then hand it off to the existing
//! `SessionManager::import_session` pipeline.
use anyhow::Result;
pub mod claude_code;
pub mod codex;
pub mod pi;
/// Detected import source format.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ImportFormat {
/// Native goose session export — a JSON object representing a `Session`.
Goose,
/// Claude Code `.jsonl` transcript (one JSON object per line, no header).
ClaudeCode,
/// Codex (OpenAI) `.jsonl` rollout file. First line is `{"type":"session_meta",...}`.
Codex,
/// Pi-mono `.jsonl` transcript (first line is `{"type":"session",...}` header).
Pi,
}
/// Sniff the format of an import payload.
///
/// We peek at the first non-blank line:
/// - If it parses as a JSON object whose top-level has `working_dir`/`workingDir`
/// and a `conversation` (or `messages`) field, it's goose.
/// - If the *first* line is `{"type":"session", ...}` it's pi.
/// - If it's a JSON-Lines stream with per-line `type` fields like
/// `user`/`assistant`/`attachment`, it's Claude Code.
pub fn detect_format(content: &str) -> ImportFormat {
let first_line = content.lines().find(|l| !l.trim().is_empty()).unwrap_or("");
if let Ok(v) = serde_json::from_str::<serde_json::Value>(first_line) {
// Codex rollouts always start with `{"type":"session_meta",...}`.
if v.get("type").and_then(|t| t.as_str()) == Some("session_meta") {
return ImportFormat::Codex;
}
// Pi sessions start with a `{"type":"session",...}` header. Older
// fixtures lack `version` but always have `cwd` + `id`.
if v.get("type").and_then(|t| t.as_str()) == Some("session")
&& (v.get("version").is_some() || (v.get("cwd").is_some() && v.get("id").is_some()))
{
return ImportFormat::Pi;
}
// Claude Code lines always include a sessionId; goose's native JSON is
// a single multi-line object whose first *parsed* line is `{` only.
if v.is_object()
&& v.get("sessionId").is_some()
&& (v.get("type").is_some() || v.get("uuid").is_some())
{
return ImportFormat::ClaudeCode;
}
}
// Goose's pretty-printed export starts with `{` and *eventually* contains
// a full Session object — try to parse the entire payload.
if serde_json::from_str::<serde_json::Value>(content)
.ok()
.and_then(|v| {
v.get("working_dir")
.or_else(|| v.get("workingDir"))
.cloned()
})
.is_some()
{
return ImportFormat::Goose;
}
// Fallback: if every non-blank line is a JSON object with a `type` and
// a `sessionId`, treat it as Claude Code.
let mut saw_claude_marker = false;
for line in content.lines().filter(|l| !l.trim().is_empty()).take(5) {
if let Ok(v) = serde_json::from_str::<serde_json::Value>(line) {
if v.get("sessionId").is_some() {
saw_claude_marker = true;
}
}
}
if saw_claude_marker {
return ImportFormat::ClaudeCode;
}
ImportFormat::Goose
}
/// Convert any supported foreign format to a goose-native session JSON string.
///
/// For [`ImportFormat::Goose`] the input is returned unchanged.
pub fn convert_to_goose_session_json(content: &str) -> Result<String> {
match detect_format(content) {
ImportFormat::Goose => Ok(content.to_string()),
ImportFormat::ClaudeCode => claude_code::convert(content),
ImportFormat::Codex => codex::convert(content),
ImportFormat::Pi => pi::convert(content),
}
}
/// Squeeze a string down to a short session-name candidate: take the first
/// non-empty line and cap it at ~80 chars.
pub(crate) fn summarize_first_line(s: &str) -> String {
let line = s.lines().find(|l| !l.trim().is_empty()).unwrap_or(s).trim();
if line.chars().count() <= 80 {
line.to_string()
} else {
let truncated: String = line.chars().take(77).collect();
format!("{}...", truncated)
}
}
@@ -0,0 +1,423 @@
//! Converter for pi-mono `.jsonl` session files.
//!
//! Pi sessions start with a header line `{"type":"session","version":N,"cwd":..}`
//! followed by entries with `type` in `{message, model_change, compaction,
//! branch_summary, thinking_level_change, custom, ...}`. The interesting
//! ones for replay-in-goose are `message`, whose `message` field carries an
//! `AgentMessage` (`role` is one of `user`, `assistant`, `toolResult`,
//! `bashExecution`, ...).
//!
//! Format reference: pi-mono `packages/coding-agent/docs/session.md`.
use anyhow::{anyhow, Result};
use chrono::{DateTime, Utc};
use rmcp::model::{CallToolRequestParams, CallToolResult, Content, ErrorCode, ErrorData};
use serde_json::{json, Map, Value};
use crate::conversation::message::Message;
use crate::conversation::Conversation;
pub fn convert(content: &str) -> Result<String> {
let mut lines = content.lines().filter(|l| !l.trim().is_empty());
let header: Value = match lines.next() {
Some(l) => serde_json::from_str(l)
.map_err(|e| anyhow!("Pi import: header is not valid JSON: {e}"))?,
None => return Err(anyhow!("Pi import: empty file")),
};
if header.get("type").and_then(|v| v.as_str()) != Some("session") {
return Err(anyhow!("Pi import: missing session header"));
}
let cwd = header
.get("cwd")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let session_id = header
.get("id")
.and_then(|v| v.as_str())
.unwrap_or("imported")
.to_string();
let header_ts = header
.get("timestamp")
.and_then(|v| v.as_str())
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
.map(|dt| dt.with_timezone(&Utc));
let mut messages: Vec<Message> = Vec::new();
let mut total_input: i64 = 0;
let mut total_output: i64 = 0;
let mut total_cost: f64 = 0.0;
let mut first_ts: Option<DateTime<Utc>> = header_ts;
let mut last_ts: Option<DateTime<Utc>> = header_ts;
let mut first_user_text: Option<String> = None;
let entries: Vec<Value> = lines
.filter_map(|l| serde_json::from_str::<Value>(l).ok())
.collect();
// Pi entries form a tree, but in practice the file is written in
// chronological order and the linear view is what users expect on import.
// We just walk top-to-bottom.
for entry in &entries {
let entry_type = entry.get("type").and_then(|v| v.as_str()).unwrap_or("");
let ts = entry
.get("timestamp")
.and_then(|v| v.as_str())
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
.map(|dt| dt.with_timezone(&Utc));
if let Some(t) = ts {
first_ts.get_or_insert(t);
last_ts = Some(t);
}
if entry_type != "message" {
continue;
}
let Some(inner) = entry.get("message") else {
continue;
};
let role = inner.get("role").and_then(|v| v.as_str()).unwrap_or("");
let created = ts
.map(|t| t.timestamp())
.unwrap_or_else(|| Utc::now().timestamp());
if let Some(usage) = inner.get("usage").and_then(|u| u.as_object()) {
total_input += usage.get("input").and_then(|v| v.as_i64()).unwrap_or(0);
total_input += usage.get("cacheRead").and_then(|v| v.as_i64()).unwrap_or(0);
total_input += usage
.get("cacheWrite")
.and_then(|v| v.as_i64())
.unwrap_or(0);
total_output += usage.get("output").and_then(|v| v.as_i64()).unwrap_or(0);
if let Some(cost) = usage
.get("cost")
.and_then(|c| c.get("total"))
.and_then(|v| v.as_f64())
{
total_cost += cost;
}
}
match role {
"user" => {
let mut msg = Message::user();
msg.created = created;
msg = apply_user_content(msg, inner.get("content"));
if !msg.content.is_empty() {
if first_user_text.is_none() {
first_user_text = extract_first_text(&msg);
}
messages.push(msg);
}
}
"assistant" => {
let mut msg = Message::assistant();
msg.created = created;
msg = apply_assistant_content(msg, inner.get("content"));
if !msg.content.is_empty() {
messages.push(msg);
}
}
"toolResult" => {
let id = inner
.get("toolCallId")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let is_error = inner
.get("isError")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let result = build_tool_result(inner.get("content"), is_error);
let mut msg = Message::user();
msg.created = created;
msg = msg.with_tool_response(id, result);
messages.push(msg);
}
"bashExecution" => {
// Synthesize a bash tool round-trip so the export reads naturally.
let command = inner
.get("command")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let output = inner
.get("output")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let exit_code = inner.get("exitCode").and_then(|v| v.as_i64());
let mut args = Map::new();
args.insert("command".into(), json!(command));
let params = CallToolRequestParams::new("bash".to_string()).with_arguments(args);
let id = format!("pi_bash_{}", created);
let mut req = Message::assistant();
req.created = created;
req = req.with_tool_request(id.clone(), Ok(params));
messages.push(req);
let result_text = match exit_code {
Some(code) if code != 0 => format!("exit {}\n{}", code, output),
_ => output,
};
let mut resp = Message::user();
resp.created = created;
resp = resp.with_tool_response(
id,
Ok(CallToolResult::success(vec![Content::text(result_text)])),
);
messages.push(resp);
}
_ => {
// custom / branchSummary / compactionSummary — emit as text
// notes from the assistant so the context is preserved.
if let Some(s) = inner.get("summary").and_then(|v| v.as_str()) {
let mut msg = Message::assistant();
msg.created = created;
msg = msg.with_text(format!("[{}] {}", role, s));
messages.push(msg);
}
}
}
}
let working_dir = if cwd.is_empty() {
std::env::current_dir()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|_| ".".to_string())
} else {
cwd
};
let name = first_user_text
.as_deref()
.map(super::summarize_first_line)
.unwrap_or_else(|| format!("Imported pi session {}", session_id));
let created_at = first_ts.unwrap_or_else(Utc::now);
let updated_at = last_ts.unwrap_or(created_at);
let conversation = Conversation::new_unvalidated(messages);
let session_json = build_session_json(
&session_id,
&working_dir,
&name,
created_at,
updated_at,
Some(total_input as i32),
Some(total_output as i32),
if total_cost > 0.0 {
Some(total_cost)
} else {
None
},
conversation,
);
serde_json::to_string_pretty(&session_json).map_err(Into::into)
}
fn apply_user_content(mut msg: Message, content: Option<&Value>) -> Message {
match content {
Some(Value::String(s)) => {
msg = msg.with_text(s.clone());
}
Some(Value::Array(blocks)) => {
for block in blocks {
let bt = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
match bt {
"text" => {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
msg = msg.with_text(t);
}
}
"image" => {
if let (Some(data), Some(mime)) = (
block.get("data").and_then(|v| v.as_str()),
block.get("mimeType").and_then(|v| v.as_str()),
) {
msg = msg.with_image(data, mime);
}
}
_ => {}
}
}
}
_ => {}
}
msg
}
fn apply_assistant_content(mut msg: Message, content: Option<&Value>) -> Message {
let blocks = match content {
Some(Value::Array(b)) => b,
Some(Value::String(s)) => return msg.with_text(s.clone()),
_ => return msg,
};
for block in blocks {
let bt = block.get("type").and_then(|v| v.as_str()).unwrap_or("");
match bt {
"text" => {
if let Some(t) = block.get("text").and_then(|v| v.as_str()) {
if !t.is_empty() {
msg = msg.with_text(t);
}
}
}
"thinking" => {
let t = block.get("thinking").and_then(|v| v.as_str()).unwrap_or("");
if !t.is_empty() {
msg = msg.with_thinking(t, "");
}
}
"toolCall" => {
let id = block
.get("id")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let name = block
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("unknown_tool");
let args = block
.get("arguments")
.and_then(|v| v.as_object())
.cloned()
.unwrap_or_default();
let params = CallToolRequestParams::new(name.to_string()).with_arguments(args);
msg = msg.with_tool_request(id, Ok(params));
}
_ => {}
}
}
msg
}
fn build_tool_result(content: Option<&Value>, is_error: bool) -> Result<CallToolResult, ErrorData> {
let text = match content {
Some(Value::String(s)) => s.clone(),
Some(Value::Array(blocks)) => blocks
.iter()
.filter_map(|b| {
let bt = b.get("type").and_then(|v| v.as_str()).unwrap_or("");
match bt {
"text" => b.get("text").and_then(|v| v.as_str()).map(str::to_string),
_ => Some(serde_json::to_string(b).unwrap_or_default()),
}
})
.collect::<Vec<_>>()
.join("\n"),
Some(other) => other.to_string(),
None => String::new(),
};
if is_error {
Err(ErrorData::new(ErrorCode::INTERNAL_ERROR, text, None))
} else {
Ok(CallToolResult::success(vec![Content::text(text)]))
}
}
fn extract_first_text(msg: &Message) -> Option<String> {
use crate::conversation::message::MessageContent;
for c in &msg.content {
if let MessageContent::Text(t) = c {
return Some(t.text.clone());
}
}
None
}
#[allow(clippy::too_many_arguments)]
fn build_session_json(
session_id: &str,
working_dir: &str,
name: &str,
created_at: DateTime<Utc>,
updated_at: DateTime<Utc>,
input_tokens: Option<i32>,
output_tokens: Option<i32>,
cost: Option<f64>,
conversation: Conversation,
) -> Value {
let total = match (input_tokens, output_tokens) {
(Some(a), Some(b)) => Some(a + b),
_ => None,
};
let mut obj = Map::new();
obj.insert("id".into(), json!(session_id));
obj.insert("working_dir".into(), json!(working_dir));
obj.insert("name".into(), json!(name));
obj.insert("user_set_name".into(), json!(false));
obj.insert("session_type".into(), json!("user"));
obj.insert("created_at".into(), json!(created_at.to_rfc3339()));
obj.insert("updated_at".into(), json!(updated_at.to_rfc3339()));
obj.insert("extension_data".into(), json!({}));
obj.insert("total_tokens".into(), json!(total));
obj.insert("input_tokens".into(), json!(input_tokens));
obj.insert("output_tokens".into(), json!(output_tokens));
obj.insert("accumulated_total_tokens".into(), json!(total));
obj.insert("accumulated_input_tokens".into(), json!(input_tokens));
obj.insert("accumulated_output_tokens".into(), json!(output_tokens));
obj.insert("accumulated_cost".into(), json!(cost));
obj.insert("schedule_id".into(), json!(null));
obj.insert("recipe".into(), json!(null));
obj.insert("user_recipe_values".into(), json!(null));
obj.insert(
"conversation".into(),
serde_json::to_value(&conversation).unwrap(),
);
obj.insert("message_count".into(), json!(conversation.messages().len()));
obj.insert("provider_name".into(), json!(null));
obj.insert("model_config".into(), json!(null));
obj.insert("goose_mode".into(), json!("auto"));
obj.insert("archived_at".into(), json!(null));
obj.insert("project_id".into(), json!(null));
Value::Object(obj)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn converts_tool_call_and_result() {
let jsonl = r#"{"type":"session","version":3,"id":"s","timestamp":"2024-12-03T14:00:00.000Z","cwd":"/w"}
{"type":"message","id":"a","parentId":null,"timestamp":"2024-12-03T14:00:01.000Z","message":{"role":"user","content":"list files"}}
{"type":"message","id":"b","parentId":"a","timestamp":"2024-12-03T14:00:02.000Z","message":{"role":"assistant","content":[{"type":"toolCall","id":"t1","name":"bash","arguments":{"command":"ls"}}]}}
{"type":"message","id":"c","parentId":"b","timestamp":"2024-12-03T14:00:03.000Z","message":{"role":"toolResult","toolCallId":"t1","toolName":"bash","content":[{"type":"text","text":"a.txt\nb.txt"}],"isError":false}}"#;
let json = convert(jsonl).unwrap();
let v: Value = serde_json::from_str(&json).unwrap();
let msgs = v["conversation"].as_array().unwrap();
assert_eq!(msgs.len(), 3);
assert!(msgs[1]["content"]
.as_array()
.unwrap()
.iter()
.any(|c| c["type"] == "toolRequest"));
assert!(msgs[2]["content"]
.as_array()
.unwrap()
.iter()
.any(|c| c["type"] == "toolResponse"));
}
#[test]
fn synthesizes_bash_execution() {
let jsonl = r#"{"type":"session","version":3,"id":"s","timestamp":"2024-12-03T14:00:00.000Z","cwd":"/w"}
{"type":"message","id":"a","parentId":null,"timestamp":"2024-12-03T14:00:01.000Z","message":{"role":"user","content":"!ls"}}
{"type":"message","id":"b","parentId":"a","timestamp":"2024-12-03T14:00:02.000Z","message":{"role":"bashExecution","command":"ls","output":"file.txt","exitCode":0,"cancelled":false,"truncated":false}}"#;
let json = convert(jsonl).unwrap();
let v: Value = serde_json::from_str(&json).unwrap();
let msgs = v["conversation"].as_array().unwrap();
assert_eq!(msgs.len(), 3);
}
}
@@ -609,9 +609,6 @@ const SessionListView: React.FC<SessionListViewProps> = React.memo(
);
const handleImportClick = useCallback(async () => {
// Prefer the native picker: it can show hidden directories (so users can
// reach `~/.claude/projects/...` or `~/.pi/agent/sessions/...`) and we
// can filter for .json/.jsonl in one place.
const native = window.electron?.selectImportSessionFile;
if (typeof native === 'function') {
try {