diff --git a/cmd/entire/cli/transcript/compact/codex.go b/cmd/entire/cli/transcript/compact/codex.go new file mode 100644 index 000000000..7309a1585 --- /dev/null +++ b/cmd/entire/cli/transcript/compact/codex.go @@ -0,0 +1,396 @@ +package compact + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io" + "strings" + + "github.com/entireio/cli/cmd/entire/cli/textutil" + "github.com/entireio/cli/cmd/entire/cli/transcript" +) + +const ( + codexTypeMessage = "message" + codexTypeFunctionCall = "function_call" + codexTypeFunctionCallOutput = "function_call_output" +) + +// isCodexFormat checks whether JSONL content uses the Codex format. +func isCodexFormat(content []byte) bool { + scanner := bufio.NewScanner(bytes.NewReader(content)) + scanner.Buffer(make([]byte, 0, 64*1024), 10*1024*1024) + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + continue + } + var probe struct { + Type string `json:"type"` + } + if json.Unmarshal(line, &probe) != nil { + continue + } + return probe.Type == "session_meta" + } + if scanner.Err() != nil { + return false + } + return false +} + +// codexLine is the raw parsed form of one Codex JSONL line. +type codexLine struct { + Timestamp string `json:"timestamp"` + Type string `json:"type"` + Payload json.RawMessage `json:"payload"` +} + +// codexPayload captures the common fields across Codex payload types. +type codexPayload struct { + Type string `json:"type"` + Role string `json:"role"` + Content json.RawMessage `json:"content"` + Phase string `json:"phase"` + Name string `json:"name"` + Arguments string `json:"arguments"` + CallID string `json:"call_id"` + Output string `json:"output"` +} + +// compactCodex converts a Codex JSONL transcript into the compact format. +func compactCodex(content []byte, opts MetadataFields) ([]byte, error) { + lines, err := parseCodexLines(content) + if err != nil { + return nil, err + } + + if opts.StartLine > 0 { + lines = codexSliceFromResponseItem(lines, opts.StartLine) + if len(lines) == 0 { + return []byte{}, nil + } + } + + base := newTranscriptLine(opts) + var result []byte + var pendingInTok, pendingOutTok int + + for i := 0; i < len(lines); i++ { + cl := lines[i] + + // Consume token_count lines at the top level (e.g. before any assistant). + if isCodexTokenCountLine(cl) { + pendingInTok, pendingOutTok = codexTokenCount(cl.Payload) + continue + } + + var p codexPayload + if json.Unmarshal(cl.Payload, &p) != nil { + continue + } + + ts, err := json.Marshal(cl.Timestamp) + if err != nil { + continue + } + + switch { + case p.Type == codexTypeMessage && p.Role == "user": + text := codexUserText(p.Content) + if text == "" { + continue + } + contentJSON, err := json.Marshal([]userTextBlock{{Text: text}}) + if err != nil { + continue + } + line := base + line.Type = transcript.TypeUser + line.TS = ts + line.Content = contentJSON + appendLine(&result, line) + + case p.Type == codexTypeMessage && p.Role == "assistant": + text := codexAssistantText(p.Content) + if text == "" { + continue + } + + // Collect any function_calls that follow this assistant message. + var toolBlocks []map[string]json.RawMessage + inTok, outTok := pendingInTok, pendingOutTok + pendingInTok, pendingOutTok = 0, 0 + for i+1 < len(lines) { + next := lines[i+1] + if isCodexTokenCountLine(next) { + inTok, outTok = codexTokenCount(next.Payload) + i++ + continue + } + var np codexPayload + if json.Unmarshal(next.Payload, &np) != nil { + break + } + if np.Type == codexTypeFunctionCall { + tb := codexToolUseBlock(np) + i++ // consume the function_call line + // Skip token_count lines between function_call and output. + for i+1 < len(lines) && isCodexTokenCountLine(lines[i+1]) { + inTok, outTok = codexTokenCount(lines[i+1].Payload) + i++ + } + // Look ahead for the matching output. + if i+1 < len(lines) { + var outp codexPayload + if json.Unmarshal(lines[i+1].Payload, &outp) == nil && outp.Type == codexTypeFunctionCallOutput && outp.CallID == np.CallID { + tb["result"] = buildToolResult(toolResultEntry{output: outp.Output}) + i++ // consume the output line + } + } + toolBlocks = append(toolBlocks, tb) + continue + } + // function_call_output without a preceding function_call — skip. + if np.Type == codexTypeFunctionCallOutput { + i++ + continue + } + break + } + + contentArr := codexBuildContent(text, toolBlocks) + line := base + line.Type = transcript.TypeAssistant + line.TS = ts + line.InputTokens = inTok + line.OutputTokens = outTok + line.Content = contentArr + appendLine(&result, line) + + case p.Type == codexTypeFunctionCall: + // Standalone function_call not preceded by assistant text. + tb := codexToolUseBlock(p) + inTok, outTok := pendingInTok, pendingOutTok + pendingInTok, pendingOutTok = 0, 0 + // Skip token_count lines between function_call and output. + for i+1 < len(lines) && isCodexTokenCountLine(lines[i+1]) { + inTok, outTok = codexTokenCount(lines[i+1].Payload) + i++ + } + if i+1 < len(lines) { + var np codexPayload + if json.Unmarshal(lines[i+1].Payload, &np) == nil && np.Type == codexTypeFunctionCallOutput && np.CallID == p.CallID { + tb["result"] = buildToolResult(toolResultEntry{output: np.Output}) + i++ + } + } + // Also consume any trailing token_count. + for i+1 < len(lines) && isCodexTokenCountLine(lines[i+1]) { + inTok, outTok = codexTokenCount(lines[i+1].Payload) + i++ + } + contentArr, err := json.Marshal([]map[string]json.RawMessage{tb}) + if err != nil { + continue + } + line := base + line.Type = transcript.TypeAssistant + line.TS = ts + line.InputTokens = inTok + line.OutputTokens = outTok + line.Content = contentArr + appendLine(&result, line) + } + } + + return result, nil +} + +// parseCodexLines reads all JSONL lines, keeping response_item and +// token_count event_msg entries. +func parseCodexLines(content []byte) ([]codexLine, error) { + reader := bufio.NewReader(bytes.NewReader(content)) + var lines []codexLine + + for { + lineBytes, err := reader.ReadBytes('\n') + if err != nil && err != io.EOF { + return nil, fmt.Errorf("reading codex line: %w", err) + } + + if len(bytes.TrimSpace(lineBytes)) > 0 { + var cl codexLine + if json.Unmarshal(lineBytes, &cl) == nil { + if cl.Type == "response_item" || isCodexTokenCountLine(cl) { + lines = append(lines, cl) + } + } + } + + if err == io.EOF { + break + } + } + return lines, nil +} + +// isCodexTokenCountLine checks if a codexLine is an event_msg with token_count payload. +func isCodexTokenCountLine(cl codexLine) bool { + if cl.Type != "event_msg" { + return false + } + var p struct { + Type string `json:"type"` + } + return json.Unmarshal(cl.Payload, &p) == nil && p.Type == "token_count" +} + +// codexTokenCount extracts input/output tokens from a token_count payload. +func codexTokenCount(payload json.RawMessage) (input, output int) { + var tc struct { + InputTokens int `json:"input_tokens"` + OutputTokens int `json:"output_tokens"` + } + if json.Unmarshal(payload, &tc) == nil { + return tc.InputTokens, tc.OutputTokens + } + return 0, 0 +} + +// codexUserText extracts the actual user prompt text from a Codex user message, +// dropping system-injected content (AGENTS.md, environment_context, permissions, +// turn_aborted, etc.). +func codexUserText(raw json.RawMessage) string { + var blocks []struct { + Type string `json:"type"` + Text string `json:"text"` + } + if json.Unmarshal(raw, &blocks) != nil { + return "" + } + + var texts []string + for _, b := range blocks { + if b.Type != "input_text" { + continue + } + // Skip system-injected content. + if isCodexSystemContent(b.Text) { + continue + } + stripped := textutil.StripIDEContextTags(b.Text) + if stripped != "" { + texts = append(texts, stripped) + } + } + + return strings.Join(texts, "\n\n") +} + +// isCodexSystemContent returns true for content blocks that are system-injected +// rather than user-authored. +func isCodexSystemContent(text string) bool { + prefixes := []string{ + "", + "", + "", + "", + "# AGENTS.md", + } + for _, p := range prefixes { + if len(text) >= len(p) && text[:len(p)] == p { + return true + } + } + return false +} + +// codexAssistantText extracts text from a Codex assistant message content array. +func codexAssistantText(raw json.RawMessage) string { + var blocks []struct { + Type string `json:"type"` + Text string `json:"text"` + } + if json.Unmarshal(raw, &blocks) != nil { + return "" + } + var texts []string + for _, b := range blocks { + if b.Type == "output_text" && b.Text != "" { + texts = append(texts, b.Text) + } + } + return strings.Join(texts, "\n\n") +} + +// codexSliceFromResponseItem returns a suffix of lines starting after skipping +// n response_item entries. token_count lines do not count toward the offset. +func codexSliceFromResponseItem(lines []codexLine, n int) []codexLine { + if n <= 0 { + return lines + } + + seen := 0 + for i, line := range lines { + if line.Type == "response_item" { + seen++ + } + if seen >= n { + return lines[i+1:] + } + } + + return nil +} + +// codexToolUseBlock builds a compact tool_use content block from a function_call. +func codexToolUseBlock(p codexPayload) map[string]json.RawMessage { + block := map[string]json.RawMessage{ + "type": mustJSON(transcript.ContentTypeToolUse), + "name": mustJSON(p.Name), + } + if p.CallID != "" { + block["id"] = mustJSON(p.CallID) + } + + // Parse the arguments JSON string into a raw object for the "input" field. + var args json.RawMessage + if json.Unmarshal([]byte(p.Arguments), &args) == nil { + block["input"] = args + } + + return block +} + +// mustJSON marshals v to JSON, panicking on error (only used for simple types). +func mustJSON(v interface{}) json.RawMessage { + b, err := json.Marshal(v) + if err != nil { + panic(fmt.Sprintf("compact: mustJSON: %v", err)) + } + return b +} + +// codexBuildContent builds the compact content array from assistant text and +// optional tool_use blocks. +func codexBuildContent(text string, toolBlocks []map[string]json.RawMessage) json.RawMessage { + var content []map[string]json.RawMessage + + if text != "" { + content = append(content, map[string]json.RawMessage{ + "type": mustJSON(transcript.ContentTypeText), + "text": mustJSON(text), + }) + } + content = append(content, toolBlocks...) + + b, err := json.Marshal(content) + if err != nil { + return nil + } + return b +} diff --git a/cmd/entire/cli/transcript/compact/codex_test.go b/cmd/entire/cli/transcript/compact/codex_test.go new file mode 100644 index 000000000..a6a1bc839 --- /dev/null +++ b/cmd/entire/cli/transcript/compact/codex_test.go @@ -0,0 +1,179 @@ +package compact + +import ( + "strings" + "testing" +) + +func TestCompact_CodexFixture(t *testing.T) { + t.Parallel() + + codexOpts := agentOpts("codex") + assertFixtureTransform(t, codexOpts, "testdata/codex_full.jsonl", "testdata/codex_expected.jsonl") +} + +func TestCompact_CodexInlineCases(t *testing.T) { + t.Parallel() + + codexOpts := agentOpts("codex") + + tests := []struct { + name string + input []byte + expected []string + }{ + { + name: "user message with system content filtered", + input: []byte(`{"timestamp":"t1","type":"session_meta","payload":{"id":"s1"}} +{"timestamp":"t2","type":"response_item","payload":{"type":"message","role":"developer","content":[{"type":"input_text","text":"sandbox"}]}} +{"timestamp":"t3","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"# AGENTS.md\ninstructions"},{"type":"input_text","text":"hello world"}]}} +`), + expected: []string{ + `{"v":1,"agent":"codex","cli_version":"0.5.1","type":"user","ts":"t3","content":[{"text":"hello world"}]}`, + }, + }, + { + name: "assistant with function call and output", + input: []byte(`{"timestamp":"t1","type":"session_meta","payload":{"id":"s1"}} +{"timestamp":"t2","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Running the command."}]}} +{"timestamp":"t3","type":"response_item","payload":{"type":"function_call","name":"exec_command","arguments":"{\"cmd\":\"ls\"}","call_id":"call_1"}} +{"timestamp":"t4","type":"response_item","payload":{"type":"function_call_output","call_id":"call_1","output":"file1.txt\nfile2.txt"}} +`), + expected: []string{ + `{"v":1,"agent":"codex","cli_version":"0.5.1","type":"assistant","ts":"t2","content":[{"type":"text","text":"Running the command."},{"type":"tool_use","id":"call_1","name":"exec_command","input":{"cmd":"ls"},"result":{"output":"file1.txt\nfile2.txt","status":"success"}}]}`, + }, + }, + { + name: "assistant with multiple output text blocks", + input: []byte(`{"timestamp":"t1","type":"session_meta","payload":{"id":"s1"}} +{"timestamp":"t2","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"first line"},{"type":"output_text","text":"second line"}]}} +`), + expected: []string{ + `{"v":1,"agent":"codex","cli_version":"0.5.1","type":"assistant","ts":"t2","content":[{"type":"text","text":"first line\n\nsecond line"}]}`, + }, + }, + { + name: "drops reasoning and event_msg", + input: []byte(`{"timestamp":"t1","type":"session_meta","payload":{"id":"s1"}} +{"timestamp":"t2","type":"event_msg","payload":{"type":"task_started"}} +{"timestamp":"t3","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"hi"}]}} +{"timestamp":"t4","type":"response_item","payload":{"type":"reasoning","summary":[]}} +{"timestamp":"t5","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"hello"}]}} +`), + expected: []string{ + `{"v":1,"agent":"codex","cli_version":"0.5.1","type":"user","ts":"t3","content":[{"text":"hi"}]}`, + `{"v":1,"agent":"codex","cli_version":"0.5.1","type":"assistant","ts":"t5","content":[{"type":"text","text":"hello"}]}`, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + result, err := Compact(tc.input, codexOpts) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + assertJSONLines(t, result, tc.expected) + }) + } +} + +func TestIsCodexFormat(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input []byte + want bool + }{ + { + name: "codex session_meta", + input: []byte(`{"timestamp":"t1","type":"session_meta","payload":{"id":"s1"}}` + "\n"), + want: true, + }, + { + name: "claude code format", + input: []byte(`{"type":"user","uuid":"u1","timestamp":"t1","message":{"content":"hi"}}` + "\n"), + want: false, + }, + { + name: "droid format", + input: []byte(`{"type":"session_start","id":"s1"}` + "\n" + `{"type":"message","id":"m1","message":{"role":"user","content":"hi"}}` + "\n"), + want: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + if got := isCodexFormat(tc.input); got != tc.want { + t.Errorf("isCodexFormat() = %v, want %v", got, tc.want) + } + }) + } +} + +func TestIsCodexFormat_LargeFirstLine(t *testing.T) { + t.Parallel() + + large := strings.Repeat("x", 70*1024) + input := []byte(`{"timestamp":"t1","type":"session_meta","payload":{"id":"s1","blob":"` + large + `"}}` + "\n") + if got := isCodexFormat(input); !got { + t.Fatalf("isCodexFormat() = %v, want true", got) + } +} + +func TestCompact_CodexStartLine(t *testing.T) { + t.Parallel() + + // StartLine skips the first N response_item entries (not raw JSONL lines). + // There are 6 response_items here; StartLine=4 skips developer, AGENTS.md + // user, first-prompt user, and first assistant — leaving second user + assistant. + opts := MetadataFields{Agent: "codex", CLIVersion: "0.5.1", StartLine: 4} + + input := []byte(`{"timestamp":"t1","type":"session_meta","payload":{"id":"s1"}} +{"timestamp":"t2","type":"response_item","payload":{"type":"message","role":"developer","content":[{"type":"input_text","text":"sandbox"}]}} +{"timestamp":"t3","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"# AGENTS.md\ninstructions"}]}} +{"timestamp":"t4","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"first prompt"}]}} +{"timestamp":"t5","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"response to first"}]}} +{"timestamp":"t6","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"second prompt"}]}} +{"timestamp":"t7","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"response to second"}]}} +`) + + expected := []string{ + `{"v":1,"agent":"codex","cli_version":"0.5.1","type":"user","ts":"t6","content":[{"text":"second prompt"}]}`, + `{"v":1,"agent":"codex","cli_version":"0.5.1","type":"assistant","ts":"t7","content":[{"type":"text","text":"response to second"}]}`, + } + + result, err := Compact(input, opts) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + assertJSONLines(t, result, expected) +} + +func TestCompact_CodexStartLine_IgnoresTokenCountEvents(t *testing.T) { + t.Parallel() + + // StartLine=1 should skip exactly one response_item (the first user), + // not the token_count event line. + opts := MetadataFields{Agent: "codex", CLIVersion: "0.5.1", StartLine: 1} + + input := []byte(`{"timestamp":"t1","type":"session_meta","payload":{"id":"s1"}} +{"timestamp":"t2","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"first prompt"}]}} +{"timestamp":"t3","type":"event_msg","payload":{"type":"token_count","input_tokens":10,"output_tokens":0}} +{"timestamp":"t4","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"second entry"}]}} +`) + + expected := []string{ + `{"v":1,"agent":"codex","cli_version":"0.5.1","type":"assistant","ts":"t4","input_tokens":10,"content":[{"type":"text","text":"second entry"}]}`, + } + + result, err := Compact(input, opts) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + assertJSONLines(t, result, expected) +} diff --git a/cmd/entire/cli/transcript/compact/compact.go b/cmd/entire/cli/transcript/compact/compact.go index 09282b678..e9676748d 100644 --- a/cmd/entire/cli/transcript/compact/compact.go +++ b/cmd/entire/cli/transcript/compact/compact.go @@ -76,10 +76,11 @@ type userTextBlock struct { // {"v":1,"agent":"claude-code","cli_version":"0.42.0","type":"user","ts":"...","content":"..."} // {"v":1,"agent":"claude-code","cli_version":"0.42.0","type":"assistant","ts":"...","id":"msg_xxx","content":[{"type":"text","text":"..."},{"type":"tool_use","id":"...","name":"...","input":{...},"result":{"output":"...","status":"..."}}]} func Compact(content []byte, opts MetadataFields) ([]byte, error) { - // Single-object formats (OpenCode, Gemini) must be detected on the raw - // content. SliceFromLine operates on newline offsets which would cut a - // JSON object mid-value and break parsing. These compactors handle - // StartLine internally as a message-index offset. + // Formats that need detection on raw content before line truncation: + // - Single-object formats (OpenCode, Gemini): SliceFromLine would cut + // a JSON object mid-value. They handle StartLine as a message-index offset. + // - Codex: session_meta header is only on the first line. Codex handles + // StartLine as a response_item index offset. if isOpenCodeFormat(content) { return compactOpenCode(content, opts) } @@ -88,11 +89,19 @@ func Compact(content []byte, opts MetadataFields) ([]byte, error) { return compactGemini(content, opts) } + if isCodexFormat(content) { + return compactCodex(content, opts) + } + truncated := transcript.SliceFromLine(content, opts.StartLine) if truncated == nil { truncated = []byte{} } + if isDroidFormat(truncated) { + return compactDroid(truncated, opts) + } + return compactJSONL(truncated, opts) } @@ -132,6 +141,9 @@ func normalizeKind(raw map[string]json.RawMessage) string { return "" } +// linePreprocessor transforms a parsed JSONL line before conversion. +type linePreprocessor func(map[string]json.RawMessage) map[string]json.RawMessage + // parsedEntry is an intermediate representation of a JSONL line used during // the two-pass compact conversion. type parsedEntry struct { @@ -150,10 +162,14 @@ type parsedEntry struct { // compactJSONL converts JSONL transcripts (Claude Code, Cursor) into the // transcript.jsonl format. func compactJSONL(content []byte, opts MetadataFields) ([]byte, error) { + return compactJSONLWith(content, opts, nil) +} + +func compactJSONLWith(content []byte, opts MetadataFields, preprocess linePreprocessor) ([]byte, error) { base := newTranscriptLine(opts) // Pass 1: parse all lines into intermediate entries. - entries, err := parseJSONLEntries(content) + entries, err := parseJSONLEntries(content, preprocess) if err != nil { return nil, err } @@ -258,7 +274,7 @@ func appendLine(result *[]byte, line transcriptLine) { // parseJSONLEntries parses all JSONL lines into intermediate entries, // filtering dropped types and malformed lines. -func parseJSONLEntries(content []byte) ([]parsedEntry, error) { +func parseJSONLEntries(content []byte, preprocess linePreprocessor) ([]parsedEntry, error) { reader := bufio.NewReader(bytes.NewReader(content)) var entries []parsedEntry @@ -269,7 +285,7 @@ func parseJSONLEntries(content []byte) ([]parsedEntry, error) { } if len(bytes.TrimSpace(lineBytes)) > 0 { - if e, ok := parseLine(lineBytes); ok { + if e, ok := parseLine(lineBytes, preprocess); ok { entries = append(entries, e) } } @@ -284,12 +300,16 @@ func parseJSONLEntries(content []byte) ([]parsedEntry, error) { // parseLine converts a single JSONL line into a parsedEntry. // Returns ok=false for dropped/malformed lines. -func parseLine(lineBytes []byte) (parsedEntry, bool) { +func parseLine(lineBytes []byte, preprocess linePreprocessor) (parsedEntry, bool) { var raw map[string]json.RawMessage if err := json.Unmarshal(lineBytes, &raw); err != nil { return parsedEntry{}, false } + if preprocess != nil { + raw = preprocess(raw) + } + kind := normalizeKind(raw) if kind == "" { return parsedEntry{}, false diff --git a/cmd/entire/cli/transcript/compact/droid.go b/cmd/entire/cli/transcript/compact/droid.go new file mode 100644 index 000000000..09996b800 --- /dev/null +++ b/cmd/entire/cli/transcript/compact/droid.go @@ -0,0 +1,99 @@ +package compact + +import ( + "bufio" + "bytes" + "encoding/json" + + "github.com/entireio/cli/cmd/entire/cli/transcript" +) + +// isDroidFormat checks whether JSONL content uses Factory AI Droid's envelope +// format. It scans lines looking for one with a recognizable "type" field - if +// the first such line has type "message" with a nested "message" object, it's +// Droid format. Unrecognized types (session_start, session_event) are skipped. +func isDroidFormat(content []byte) bool { + scanner := bufio.NewScanner(bytes.NewReader(content)) + scanner.Buffer(make([]byte, 0, 64*1024), 10*1024*1024) + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + continue + } + var probe struct { + Type string `json:"type"` + Message *json.RawMessage `json:"message"` + } + if json.Unmarshal(line, &probe) != nil { + continue + } + if probe.Type == "message" && probe.Message != nil { + return true + } + // If we hit a known Claude Code/Cursor type, it's not Droid. + if userAliases[probe.Type] || probe.Type == transcript.TypeAssistant || droppedTypes[probe.Type] { + return false + } + } + if scanner.Err() != nil { + return false + } + return false +} + +// compactDroid converts Factory AI Droid JSONL transcripts into the compact +// format. Droid uses the same Anthropic Messages API structure as Claude Code +// and Cursor, but wraps each message in an envelope that must be unwrapped first. +func compactDroid(content []byte, opts MetadataFields) ([]byte, error) { + return compactJSONLWith(content, opts, unwrapDroidEnvelope) +} + +// unwrapDroidEnvelope flattens a Droid envelope line in place: +// +// {"type":"message","id":"m1","timestamp":"t1","message":{"role":"user","content":...}} +// +// becomes a map with role promoted to type, outer timestamp/id carried over, +// and the outer id injected into the inner message so parseLine can extract it. +// Non-envelope lines are returned unchanged (and dropped by normalizeKind). +func unwrapDroidEnvelope(raw map[string]json.RawMessage) map[string]json.RawMessage { + if unquote(raw["type"]) != "message" { + return raw + } + + msgRaw, ok := raw["message"] + if !ok { + return raw + } + + var inner map[string]json.RawMessage + if json.Unmarshal(msgRaw, &inner) != nil { + return raw + } + + role := unquote(inner["role"]) + if !userAliases[role] && role != transcript.TypeAssistant { + return raw + } + + // Build flat line: promote role to type, carry over outer timestamp/id. + flat := make(map[string]json.RawMessage, 4) + flat["type"] = inner["role"] + if v, has := raw["timestamp"]; has { + flat["timestamp"] = v + } + + // Inject outer id into inner message so parseMessage can find it. + if outerID, has := raw["id"]; has { + if _, hasInner := inner["id"]; !hasInner { + inner["id"] = outerID + } + } + rebuilt, err := json.Marshal(inner) + if err != nil { + flat["message"] = msgRaw + } else { + flat["message"] = rebuilt + } + + return flat +} diff --git a/cmd/entire/cli/transcript/compact/droid_test.go b/cmd/entire/cli/transcript/compact/droid_test.go new file mode 100644 index 000000000..ead4495ed --- /dev/null +++ b/cmd/entire/cli/transcript/compact/droid_test.go @@ -0,0 +1,142 @@ +package compact + +import ( + "strings" + "testing" +) + +// --- Factory AI Droid tests --- + +func TestCompact_DroidFixture(t *testing.T) { + t.Parallel() + + droidOpts := agentOpts("factoryai-droid") + assertFixtureTransform(t, droidOpts, "testdata/droid_full.jsonl", "testdata/droid_expected.jsonl") +} + +func TestCompact_FactoryDroidInlineCases(t *testing.T) { + t.Parallel() + + droidOpts := agentOpts("factoryai-droid") + + tests := []struct { + name string + input []byte + expected []string + }{ + { + name: "envelope", + input: []byte(`{"type":"message","id":"m1","timestamp":"t1","message":{"role":"user","content":[{"type":"text","text":"create a file"}]}} +{"type":"message","id":"m2","timestamp":"t2","message":{"role":"assistant","content":[{"type":"text","text":"Done!"},{"type":"tool_use","id":"tu-1","name":"Write","input":{"file_path":"hello.txt","content":"hi"}}]}} +`), + expected: []string{ + `{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"user","ts":"t1","content":[{"text":"create a file"}]}`, + `{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"assistant","ts":"t2","id":"m2","content":[{"type":"text","text":"Done!"},{"type":"tool_use","id":"tu-1","name":"Write","input":{"file_path":"hello.txt","content":"hi"}}]}`, + }, + }, + { + name: "tool result", + input: []byte(`{"type":"message","id":"m1","timestamp":"t1","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu-1","content":"success"},{"type":"text","text":"next step"}]}} +`), + expected: []string{ + `{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"user","ts":"t1","content":[{"text":"next step"}]}`, + }, + }, + { + name: "drops non-message entries", + input: []byte(`{"type":"session_start","id":"sess-1","title":"test"} +{"type":"message","id":"m1","timestamp":"t1","message":{"role":"user","content":"hello"}} +{"type":"session_event","data":"something"} +{"type":"message","id":"m2","timestamp":"t2","message":{"role":"assistant","content":[{"type":"text","text":"hi"}]}} +`), + expected: []string{ + `{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"user","ts":"t1","content":[{"text":"hello"}]}`, + `{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"assistant","ts":"t2","id":"m2","content":[{"type":"text","text":"hi"}]}`, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + result, err := Compact(tc.input, droidOpts) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + assertJSONLines(t, result, tc.expected) + }) + } +} + +func TestIsDroidFormat(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input []byte + want bool + }{ + { + name: "droid envelope", + input: []byte(`{"type":"session_start","id":"s1"}` + "\n" + `{"type":"message","id":"m1","message":{"role":"user","content":"hi"}}` + "\n"), + want: true, + }, + { + name: "claude code format", + input: []byte(`{"type":"user","uuid":"u1","timestamp":"t1","message":{"content":"hi"}}` + "\n"), + want: false, + }, + { + name: "empty", + input: []byte{}, + want: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + if got := isDroidFormat(tc.input); got != tc.want { + t.Errorf("isDroidFormat() = %v, want %v", got, tc.want) + } + }) + } +} + +func TestIsDroidFormat_LargeLeadingLine(t *testing.T) { + t.Parallel() + + large := strings.Repeat("x", 70*1024) + input := []byte( + `{"type":"session_start","title":"` + large + `"}` + "\n" + + `{"type":"message","id":"m1","message":{"role":"user","content":"hi"}}` + "\n", + ) + + if got := isDroidFormat(input); !got { + t.Fatalf("isDroidFormat() = %v, want true", got) + } +} + +func TestCompact_DroidStartLine(t *testing.T) { + t.Parallel() + + opts := MetadataFields{Agent: "factoryai-droid", CLIVersion: "0.5.1", StartLine: 2} + + input := []byte(`{"type":"session_start","id":"sess-1","title":"test"} +{"type":"message","id":"m1","timestamp":"t1","message":{"role":"user","content":"first prompt"}} +{"type":"message","id":"m2","timestamp":"t2","message":{"role":"user","content":"second prompt"}} +{"type":"message","id":"m3","timestamp":"t3","message":{"role":"assistant","content":[{"type":"text","text":"response"}]}} +`) + + expected := []string{ + `{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"user","ts":"t2","content":[{"text":"second prompt"}]}`, + `{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"assistant","ts":"t3","id":"m3","content":[{"type":"text","text":"response"}]}`, + } + + result, err := Compact(input, opts) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + assertJSONLines(t, result, expected) +} diff --git a/cmd/entire/cli/transcript/compact/testdata/codex_expected.jsonl b/cmd/entire/cli/transcript/compact/testdata/codex_expected.jsonl new file mode 100644 index 000000000..a759f2f2b --- /dev/null +++ b/cmd/entire/cli/transcript/compact/testdata/codex_expected.jsonl @@ -0,0 +1,4 @@ +{"v":1,"agent":"codex","cli_version":"0.5.1","type":"user","ts":"2026-04-01T23:31:27.000Z","content":[{"text":"create a hello.txt file with a greeting"}]} +{"v":1,"agent":"codex","cli_version":"0.5.1","type":"assistant","ts":"2026-04-01T23:31:31.001Z","input_tokens":500,"output_tokens":80,"content":[{"text":"Creating the file now.","type":"text"},{"id":"call_abc123","input":{"cmd":"cat > hello.txt << 'EOF'\nHello, world!\nEOF","workdir":"/repo"},"name":"exec_command","result":{"output":"Command: cat > hello.txt\nWall time: 0.001 seconds\nProcess exited with code 0","status":"success"},"type":"tool_use"}]} +{"v":1,"agent":"codex","cli_version":"0.5.1","type":"assistant","ts":"2026-04-01T23:31:35.001Z","content":[{"text":"Verifying the file was created.","type":"text"},{"id":"call_def456","input":{"cmd":"cat hello.txt","workdir":"/repo"},"name":"exec_command","result":{"output":"Command: cat hello.txt\nWall time: 0.001 seconds\nProcess exited with code 0\nOutput:\nHello, world!","status":"success"},"type":"tool_use"}]} +{"v":1,"agent":"codex","cli_version":"0.5.1","type":"assistant","ts":"2026-04-01T23:31:38.501Z","input_tokens":600,"output_tokens":150,"content":[{"text":"Done — `hello.txt` created with \"Hello, world!\".","type":"text"}]} diff --git a/cmd/entire/cli/transcript/compact/testdata/codex_full.jsonl b/cmd/entire/cli/transcript/compact/testdata/codex_full.jsonl new file mode 100644 index 000000000..75e978250 --- /dev/null +++ b/cmd/entire/cli/transcript/compact/testdata/codex_full.jsonl @@ -0,0 +1,26 @@ +{"timestamp":"2026-04-01T23:31:26.000Z","type":"session_meta","payload":{"id":"019d4f9e-feac-7be2-a885-bf0727a50a92","timestamp":"2026-04-01T23:31:26.000Z","cwd":"/repo","originator":"codex-tui","cli_version":"0.118.0","source":"cli","model_provider":"openai"}} +{"timestamp":"2026-04-01T23:31:26.001Z","type":"event_msg","payload":{"type":"task_started","turn_id":"turn-1","model_context_window":258400,"collaboration_mode_kind":"default"}} +{"timestamp":"2026-04-01T23:31:26.002Z","type":"response_item","payload":{"type":"message","role":"developer","content":[{"type":"input_text","text":"\nYou are sandboxed.\n"}]}} +{"timestamp":"2026-04-01T23:31:26.003Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"# AGENTS.md\nThese are the project instructions."},{"type":"input_text","text":"\n/repo\n"}]}} +{"timestamp":"2026-04-01T23:31:26.004Z","type":"turn_context","payload":{"turn_id":"turn-1"}} +{"timestamp":"2026-04-01T23:31:27.000Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"create a hello.txt file with a greeting"}]}} +{"timestamp":"2026-04-01T23:31:27.001Z","type":"event_msg","payload":{"type":"user_message","message":"create a hello.txt file with a greeting","images":[]}} +{"timestamp":"2026-04-01T23:31:27.002Z","type":"event_msg","payload":{"type":"token_count","input_tokens":500,"output_tokens":0}} +{"timestamp":"2026-04-01T23:31:30.000Z","type":"response_item","payload":{"type":"reasoning","summary":[],"content":null,"encrypted_content":"REDACTED"}} +{"timestamp":"2026-04-01T23:31:31.000Z","type":"event_msg","payload":{"type":"agent_message","message":"Creating the file now."}} +{"timestamp":"2026-04-01T23:31:31.001Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Creating the file now."}],"phase":"commentary"}} +{"timestamp":"2026-04-01T23:31:32.000Z","type":"response_item","payload":{"type":"function_call","name":"exec_command","arguments":"{\"cmd\":\"cat > hello.txt << 'EOF'\\nHello, world!\\nEOF\",\"workdir\":\"/repo\"}","call_id":"call_abc123"}} +{"timestamp":"2026-04-01T23:31:32.001Z","type":"event_msg","payload":{"type":"token_count","input_tokens":500,"output_tokens":80}} +{"timestamp":"2026-04-01T23:31:32.500Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"call_abc123","process_id":"12345","turn_id":"turn-1","command":["/bin/zsh","-lc","cat > hello.txt << 'EOF'\nHello, world!\nEOF"]}} +{"timestamp":"2026-04-01T23:31:32.501Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_abc123","output":"Command: cat > hello.txt\nWall time: 0.001 seconds\nProcess exited with code 0"}} +{"timestamp":"2026-04-01T23:31:34.000Z","type":"response_item","payload":{"type":"reasoning","summary":[],"content":null,"encrypted_content":"REDACTED"}} +{"timestamp":"2026-04-01T23:31:35.000Z","type":"event_msg","payload":{"type":"agent_message","message":"Verifying the file was created."}} +{"timestamp":"2026-04-01T23:31:35.001Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Verifying the file was created."}],"phase":"commentary"}} +{"timestamp":"2026-04-01T23:31:36.000Z","type":"response_item","payload":{"type":"function_call","name":"exec_command","arguments":"{\"cmd\":\"cat hello.txt\",\"workdir\":\"/repo\"}","call_id":"call_def456"}} +{"timestamp":"2026-04-01T23:31:36.500Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"call_def456","process_id":"12346","turn_id":"turn-1"}} +{"timestamp":"2026-04-01T23:31:36.501Z","type":"response_item","payload":{"type":"function_call_output","call_id":"call_def456","output":"Command: cat hello.txt\nWall time: 0.001 seconds\nProcess exited with code 0\nOutput:\nHello, world!"}} +{"timestamp":"2026-04-01T23:31:38.000Z","type":"response_item","payload":{"type":"reasoning","summary":[],"content":null,"encrypted_content":"REDACTED"}} +{"timestamp":"2026-04-01T23:31:38.500Z","type":"event_msg","payload":{"type":"agent_message","message":"Done — `hello.txt` created with \"Hello, world!\"."}} +{"timestamp":"2026-04-01T23:31:38.501Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Done — `hello.txt` created with \"Hello, world!\"."}],"phase":"final"}} +{"timestamp":"2026-04-01T23:31:39.000Z","type":"event_msg","payload":{"type":"token_count","input_tokens":600,"output_tokens":150}} +{"timestamp":"2026-04-01T23:31:40.000Z","type":"event_msg","payload":{"type":"task_completed","turn_id":"turn-1"}} diff --git a/cmd/entire/cli/transcript/compact/testdata/droid_expected.jsonl b/cmd/entire/cli/transcript/compact/testdata/droid_expected.jsonl new file mode 100644 index 000000000..6a3da777a --- /dev/null +++ b/cmd/entire/cli/transcript/compact/testdata/droid_expected.jsonl @@ -0,0 +1,8 @@ +{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"user","ts":"2026-03-30T18:50:09.639Z","content":[{"text":"create a markdown file called hello.md with a greeting"}]} +{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"assistant","ts":"2026-03-30T18:50:12.100Z","id":"a1b2c3d4-0001-0001-0001-000000000001","input_tokens":500,"output_tokens":80,"content":[{"type":"text","text":"I'll create hello.md for you."},{"type":"tool_use","id":"tu-1","name":"Write","input":{"file_path":"hello.md","content":"# Hello\n\nWelcome!"},"result":{"output":"File written successfully","status":"success"}}]} +{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"assistant","ts":"2026-03-30T18:50:14.500Z","id":"a1b2c3d4-0001-0001-0001-000000000003","input_tokens":600,"output_tokens":25,"content":[{"type":"text","text":"Done! I've created `hello.md` with a greeting heading and welcome message."}]} +{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"user","ts":"2026-03-30T18:50:20.000Z","content":[{"text":"now add a second section about testing"}]} +{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"assistant","ts":"2026-03-30T18:50:22.100Z","id":"a1b2c3d4-0001-0001-0001-000000000005","input_tokens":700,"output_tokens":60,"content":[{"type":"text","text":"I'll read the file first, then add the section."},{"type":"tool_use","id":"tu-2","name":"Read","input":{"file_path":"hello.md"},"result":{"output":"# Hello\n\nWelcome!","status":"success"}}]} +{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"user","ts":"2026-03-30T18:50:23.000Z","content":[{"text":"looks good so far"}]} +{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"assistant","ts":"2026-03-30T18:50:25.500Z","id":"a1b2c3d4-0001-0001-0001-000000000007","input_tokens":800,"output_tokens":45,"content":[{"type":"tool_use","id":"tu-3","name":"Edit","input":{"file_path":"hello.md","old_string":"Welcome!","new_string":"Welcome!\n\n## Testing\n\nThis section is for testing."},"result":{"output":"","status":"success"}}]} +{"v":1,"agent":"factoryai-droid","cli_version":"0.5.1","type":"assistant","ts":"2026-03-30T18:50:28.000Z","id":"a1b2c3d4-0001-0001-0001-000000000009","input_tokens":850,"output_tokens":15,"content":[{"type":"text","text":"Added a Testing section to hello.md."}]} diff --git a/cmd/entire/cli/transcript/compact/testdata/droid_full.jsonl b/cmd/entire/cli/transcript/compact/testdata/droid_full.jsonl new file mode 100644 index 000000000..b2900ae88 --- /dev/null +++ b/cmd/entire/cli/transcript/compact/testdata/droid_full.jsonl @@ -0,0 +1,12 @@ +{"type":"session_start","id":"a31fbbb0-0922-4b2b-be85-9e9f0283a4d8","title":"create a markdown file","sessionTitle":"New Session","owner":"testuser","version":2,"cwd":"/repo"} +{"type":"message","id":"14b7187c-fdb4-4f52-9539-46fff2d45652","timestamp":"2026-03-30T18:50:09.639Z","message":{"role":"user","content":[{"type":"text","text":"create a markdown file called hello.md with a greeting"}]}} +{"type":"message","id":"a1b2c3d4-0001-0001-0001-000000000001","timestamp":"2026-03-30T18:50:12.100Z","message":{"role":"assistant","content":[{"type":"thinking","thinking":"Let me create the file..."},{"type":"text","text":"I'll create hello.md for you."},{"type":"tool_use","id":"tu-1","name":"Write","input":{"file_path":"hello.md","content":"# Hello\n\nWelcome!"}}],"usage":{"input_tokens":500,"output_tokens":80}}} +{"type":"message","id":"a1b2c3d4-0001-0001-0001-000000000002","timestamp":"2026-03-30T18:50:13.200Z","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu-1","content":"File written successfully"}]}} +{"type":"message","id":"a1b2c3d4-0001-0001-0001-000000000003","timestamp":"2026-03-30T18:50:14.500Z","message":{"role":"assistant","content":[{"type":"text","text":"Done! I've created `hello.md` with a greeting heading and welcome message."}],"usage":{"input_tokens":600,"output_tokens":25}}} +{"type":"message","id":"a1b2c3d4-0001-0001-0001-000000000004","timestamp":"2026-03-30T18:50:20.000Z","message":{"role":"user","content":[{"type":"text","text":"now add a second section about testing"}]}} +{"type":"message","id":"a1b2c3d4-0001-0001-0001-000000000005","timestamp":"2026-03-30T18:50:22.100Z","message":{"role":"assistant","content":[{"type":"text","text":"I'll read the file first, then add the section."},{"type":"tool_use","id":"tu-2","name":"Read","input":{"file_path":"hello.md"}}],"usage":{"input_tokens":700,"output_tokens":60}}} +{"type":"message","id":"a1b2c3d4-0001-0001-0001-000000000006","timestamp":"2026-03-30T18:50:23.000Z","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu-2","content":"# Hello\n\nWelcome!"},{"type":"text","text":"looks good so far"}]}} +{"type":"message","id":"a1b2c3d4-0001-0001-0001-000000000007","timestamp":"2026-03-30T18:50:25.500Z","message":{"role":"assistant","content":[{"type":"tool_use","id":"tu-3","name":"Edit","input":{"file_path":"hello.md","old_string":"Welcome!","new_string":"Welcome!\n\n## Testing\n\nThis section is for testing."}}],"usage":{"input_tokens":800,"output_tokens":45}}} +{"type":"message","id":"a1b2c3d4-0001-0001-0001-000000000008","timestamp":"2026-03-30T18:50:26.500Z","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu-3","content":"","is_error":false}]}} +{"type":"message","id":"a1b2c3d4-0001-0001-0001-000000000009","timestamp":"2026-03-30T18:50:28.000Z","message":{"role":"assistant","content":[{"type":"text","text":"Added a Testing section to hello.md."}],"usage":{"input_tokens":850,"output_tokens":15}}} +{"type":"session_event","data":"session_ended"}