"""Conversation log formatting for execution analysis. Converts ``conversations.jsonl`` entries into a priority-based text block suitable for LLM analysis prompts. All functions are pure (stateless). Priority levels (lower = more important): 0 — CRITICAL : User instruction (never truncated) 1 — CRITICAL : Final iteration assistant response (never truncated) 2 — HIGH : Tool calls (name + args) AND tool errors — kept together 3 — HIGH : Non-final assistant reasoning; tool results with embedded summary 4 — MEDIUM : Tool success results (try to preserve) 5 — LOW : System guidance messages between iterations SKIP : Skill injection text, verbose system prompts (not included; skill & tool info are provided separately in the prompt) """ from __future__ import annotations import re from typing import Any, Dict, List, Optional # Per-section truncation limits (kept in sync with analyzer constants) TOOL_ERROR_MAX_CHARS = 1000 TOOL_SUCCESS_MAX_CHARS = 800 TOOL_ARGS_MAX_CHARS = 500 TOOL_SUMMARY_MAX_CHARS = 1500 def format_conversations( conversations: List[Dict[str, Any]], budget: int, ) -> str: """Format ``conversations.jsonl`` entries into a readable text block. Uses priority-based truncation instead of simple tail-truncation. When total exceeds *budget*: 1. Include all priority ≤ 3 (CRITICAL + HIGH) segments in full. 2. Add MEDIUM + LOW segments until budget is exhausted, truncating if possible. 3. If even HIGH content exceeds budget, keep priority 0-1 in full, budget-allocate priority 2, and summarize priority 3. """ # Count total iterations for priority assignment total_iters = sum( 1 for c in conversations if c.get("type") == "iteration" ) # Phase 1: Collect all segments in chronological order with priority segments: List[Dict[str, Any]] = [] for conv in conversations: conv_type = conv.get("type", "") if conv_type == "setup": _collect_setup_segments(conv, segments) elif conv_type == "iteration": _collect_iteration_segments(conv, total_iters, segments) # Phase 2: Assemble with budget management return _assemble_with_budget(segments, budget) def _collect_setup_segments( conv: Dict[str, Any], segments: List[Dict[str, Any]], ) -> None: """Extract segments from a ``type: "setup"`` conversation entry. Only the user instruction is extracted. System prompts (including skill injection text and tool descriptions) are skipped — they are provided in dedicated sections of the analysis prompt. """ for msg in conv.get("messages", []): role = msg.get("role", "") content = msg.get("content", "") if not isinstance(content, str): content = str(content) if role == "user": segments.append({ "priority": 0, # CRITICAL — always keep "text": f"[USER INSTRUCTION]\n{content}", "iteration": 0, "role": "user", "truncatable_to": None, }) def _collect_iteration_segments( conv: Dict[str, Any], total_iters: int, segments: List[Dict[str, Any]], ) -> None: """Extract segments from a ``type: "iteration"`` conversation entry. Key design decisions: - Tool calls and tool errors share the SAME high priority (2) - Tool success results get MEDIUM priority (4) - Shell agent results with embedded "Execution Summary" get HIGH (3). """ iteration = conv.get("iteration", "?") is_last = (iteration == total_iters) if isinstance(iteration, int) else False # Process delta_messages in order for msg in conv.get("delta_messages", []): role = msg.get("role", "") content = msg.get("content", "") if not isinstance(content, str): content = str(content) if role == "assistant": # Assistant reasoning if content: priority = 1 if is_last else 3 segments.append({ "priority": priority, "text": f"[Iter {iteration}] ASSISTANT: {content}", "iteration": iteration, "role": "assistant", "truncatable_to": None, }) # Tool calls for tc in msg.get("tool_calls", []): fn = tc.get("function", {}) fn_name = fn.get("name", "?") fn_args = fn.get("arguments", "") if isinstance(fn_args, str) and len(fn_args) > TOOL_ARGS_MAX_CHARS: fn_args = fn_args[:TOOL_ARGS_MAX_CHARS] + "..." segments.append({ "priority": 2, # HIGH — paired with tool results/errors "text": f"[Iter {iteration}] TOOL_CALL: {fn_name}({fn_args})", "iteration": iteration, "role": "tool_call", "truncatable_to": None, }) elif role == "tool": # Tool result is_error = _is_error_result(content) if is_error: truncated = content[:TOOL_ERROR_MAX_CHARS] if len(content) > TOOL_ERROR_MAX_CHARS: truncated += f"... [truncated, total {len(content)} chars]" segments.append({ "priority": 2, # HIGH — errors are critical, same tier as tool calls "text": f"[Iter {iteration}] TOOL_ERROR: {truncated}", "iteration": iteration, "role": "tool_error", "truncatable_to": None, }) else: # Check if result contains a self-generated summary # (e.g. shell_agent produces "Execution Summary (N steps):") summary = _extract_embedded_summary(content) if summary: # Show the embedded summary (high value, compact) segments.append({ "priority": 3, # HIGH — self-generated summaries are informative "text": f"[Iter {iteration}] TOOL_RESULT (with summary):\n{summary}", "iteration": iteration, "role": "tool_result", "truncatable_to": 500, }) else: truncated = content[:TOOL_SUCCESS_MAX_CHARS] if len(content) > TOOL_SUCCESS_MAX_CHARS: truncated += f"... [truncated, total {len(content)} chars]" segments.append({ "priority": 4, # MEDIUM — try to preserve success results "text": f"[Iter {iteration}] TOOL_RESULT: {truncated}", "iteration": iteration, "role": "tool_result", "truncatable_to": 300, }) elif role == "system": # System guidance between iterations (e.g. "Iteration N complete...") if content: segments.append({ "priority": 5, # LOW — guidance messages "text": f"[Iter {iteration}] SYSTEM: {content}", "iteration": iteration, "role": "system", "truncatable_to": 150, }) def _assemble_with_budget( segments: List[Dict[str, Any]], budget: int, ) -> str: """Assemble segments into final text respecting the character budget. Strategy: 1. Include all segments with priority ≤ 3 (CRITICAL + HIGH) in full. 2. Add MEDIUM + LOW segments in chronological order until budget is hit. 3. If even HIGH-priority content exceeds budget, progressively truncate older iterations while preserving user instruction and final iteration. """ # Calculate essential (priority ≤ 3) size essential = [s for s in segments if s["priority"] <= 3] essential_chars = sum(len(s["text"]) for s in essential) remaining_budget = budget - essential_chars if remaining_budget < 0: # Essential content alone exceeds budget — need to reduce # Keep priority 0-1 (user instruction + final iteration) in full # Truncate priority 2-3 (tool calls/errors + older assistant content) return _assemble_essential_only(segments, budget) # Build output in chronological order output_parts: List[str] = [] used_chars = 0 skipped_count = 0 for seg in segments: text = seg["text"] priority = seg["priority"] if priority <= 3: # Essential — always include output_parts.append(text) used_chars += len(text) + 1 elif used_chars + len(text) + 1 <= budget: # Within budget — include output_parts.append(text) used_chars += len(text) + 1 else: # Over budget — try truncation truncatable_to = seg.get("truncatable_to") if truncatable_to and len(text) > truncatable_to: truncated = text[:truncatable_to] + "... [budget-truncated]" if used_chars + len(truncated) + 1 <= budget: output_parts.append(truncated) used_chars += len(truncated) + 1 continue skipped_count += 1 if skipped_count > 0: output_parts.append( f"\n[... {skipped_count} lower-priority segment(s) omitted due to length ...]" ) return "\n\n".join(output_parts) def _assemble_essential_only( segments: List[Dict[str, Any]], budget: int, ) -> str: """Fallback: even essential content exceeds budget. Keep: - User instruction (priority 0) — never truncated - Final iteration (priority 1) — never truncated - Tool calls + tool errors (priority 2) — budget-allocated, truncated if needed - Non-final assistant reasoning (priority 3) — heavily summarized """ output_parts: List[str] = [] used_chars = 0 # Pass 1: priority 0 and 1 (user instruction + final iteration) for seg in segments: if seg["priority"] <= 1: output_parts.append(seg["text"]) used_chars += len(seg["text"]) + 1 remaining = budget - used_chars # Pass 2: priority 2 (tool calls + tool errors) — budget-allocated tool_segments = [s for s in segments if s["priority"] == 2] if tool_segments: per_segment_budget = max(400, remaining // (len(tool_segments) + 1)) for seg in tool_segments: text = seg["text"] if len(text) > per_segment_budget: text = text[:per_segment_budget] + "... [budget-truncated]" if used_chars + len(text) + 1 <= budget: output_parts.append(text) used_chars += len(text) + 1 # Pass 3: priority 3 (non-final assistant reasoning) — one-line summaries assistants = [s for s in segments if s["priority"] == 3] if assistants and used_chars < budget: output_parts.append("\n--- Older iteration summaries ---") for seg in assistants: first_line = seg["text"].split("\n", 1)[0][:200] if used_chars + len(first_line) + 1 > budget: output_parts.append("[... remaining iterations omitted ...]") break output_parts.append(first_line) used_chars += len(first_line) + 1 return "\n\n".join(output_parts) def _is_error_result(content: str) -> bool: """Detect if a tool result represents an error.""" if not content: return False # Check common error patterns in the first 200 chars head = content[:200].lower() return ( content.startswith("[ERROR]") or content.startswith("ERROR") or "error" in head[:50] or "task failed" in head or "connection refused" in head or "timed out" in head or "traceback" in head ) def _extract_embedded_summary(content: str) -> Optional[str]: """Extract self-generated summary from tool result content. Shell agent results often contain an ``Execution Summary (N steps):`` block that provides a compact view of what happened internally. This is more informative than the raw output. """ # Look for "Execution Summary (N steps):" pattern match = re.search( r"(Execution Summary \(\d+ steps?\):.*?)(?:={10,}|$)", content, re.DOTALL, ) if match: summary = match.group(1).strip() # Also capture any "Summary:" line after the steps summary_match = re.search(r"\nSummary:\s*(.+)", content) if summary_match: summary += f"\nConclusion: {summary_match.group(1).strip()}" return summary[:TOOL_SUMMARY_MAX_CHARS] return None