Spaces:
Running
Running
File size: 6,505 Bytes
dc71cad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | """
agent/trajectory_logger.py
ββββββββββββββββββββββββββββ
Trajectory logger β records every attempt as JSONL.
Each line in the trajectory file is one attempt:
{
"instance_id": "django__django-12345",
"repo": "django/django",
"attempt": 1,
"patch": "<unified diff>",
"test_stdout": "<pytest output>",
"fail_to_pass_results": {"tests/test_foo.py::test_x": true},
"pass_to_pass_results": {"tests/test_foo.py::test_y": true},
"resolved": false,
"failure_category": "wrong_file_edit",
"elapsed_seconds": 12.3,
"token_cost": {"prompt_tokens": 1200, "completion_tokens": 400},
"localised_files": ["django/db/models/query.py"],
"timestamp": "2025-05-01T14:23:01Z"
}
The JSONL dataset is filtered in Phase 7:
- Keep: instances with known failure_category (not 'unknown')
- Focus: syntax_error, hallucinated_api, wrong_file_edit β these are
the most learnable patterns for fine-tuning
"""
from __future__ import annotations
import json
import logging
import time
from dataclasses import dataclass, asdict, field
from datetime import datetime, timezone
from pathlib import Path
logger = logging.getLogger(__name__)
@dataclass
class TrajectoryEntry:
instance_id: str
repo: str
attempt: int
patch: str
test_stdout: str
fail_to_pass_results: dict[str, bool]
pass_to_pass_results: dict[str, bool]
resolved: bool
failure_category: str
elapsed_seconds: float
token_cost: dict[str, int] = field(default_factory=dict)
localised_files: list[str] = field(default_factory=list)
problem_statement: str = ""
timestamp: str = field(
default_factory=lambda: datetime.now(timezone.utc).isoformat()
)
def to_jsonl_line(self) -> str:
return json.dumps(asdict(self))
def to_instruction_pair(self) -> dict:
"""
Format as an instruction-following pair for fine-tuning (Phase 7).
Schema:
system: role description
user: issue + file context + failure message
assistant: corrected unified diff
"""
file_context = "\n\n".join(
f"# File: {fp}" for fp in self.localised_files
)
failure_excerpt = self.test_stdout[-1000:] if self.test_stdout else ""
return {
"system": (
"You are an expert Python software engineer. "
"You fix bugs by generating minimal unified diffs."
),
"user": (
f"## GitHub Issue\n{self.problem_statement[:800]}\n\n"
f"## Relevant Files\n{file_context}\n\n"
f"## Previous Attempt Failed\n"
f"Category: {self.failure_category}\n"
f"Test output:\n{failure_excerpt}"
),
"assistant": self.patch,
"metadata": {
"instance_id": self.instance_id,
"attempt": self.attempt,
"failure_category": self.failure_category,
"resolved": self.resolved,
}
}
class TrajectoryLogger:
"""
Appends trajectory entries to a JSONL file.
Thread-safe for single-process use (file lock on append).
"""
def __init__(self, output_path: Path):
self.output_path = Path(output_path)
self.output_path.parent.mkdir(parents=True, exist_ok=True)
self._count = 0
logger.info("TrajectoryLogger writing to %s", self.output_path)
def log(self, entry: TrajectoryEntry) -> None:
"""Append one trajectory entry to the JSONL file."""
with self.output_path.open("a") as f:
f.write(entry.to_jsonl_line() + "\n")
self._count += 1
@property
def total_logged(self) -> int:
return self._count
def load_all(self) -> list[TrajectoryEntry]:
"""Load all logged trajectories from file."""
if not self.output_path.exists():
return []
entries = []
with self.output_path.open() as f:
for line in f:
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
entries.append(TrajectoryEntry(**data))
except (json.JSONDecodeError, TypeError) as e:
logger.warning("Skipping malformed trajectory line: %s", e)
return entries
def stats(self) -> dict:
"""Summary statistics over all logged trajectories."""
entries = self.load_all()
if not entries:
return {"total": 0}
resolved = [e for e in entries if e.resolved]
categories: dict[str, int] = {}
for e in entries:
categories[e.failure_category] = categories.get(e.failure_category, 0) + 1
return {
"total": len(entries),
"resolved": len(resolved),
"resolved_rate": len(resolved) / len(entries),
"avg_attempts": sum(e.attempt for e in entries) / len(entries),
"failure_categories": categories,
"unique_instances": len({e.instance_id for e in entries}),
}
def export_for_finetuning(
self,
output_path: Path,
filter_categories: list[str] | None = None,
resolved_only: bool = False,
) -> int:
"""
Export trajectory entries as instruction-following pairs (Phase 7).
Args:
output_path: where to write the fine-tuning JSONL
filter_categories: only export entries with these categories
resolved_only: only export successfully resolved instances
Returns:
Number of pairs exported
"""
entries = self.load_all()
if filter_categories:
entries = [e for e in entries if e.failure_category in filter_categories]
if resolved_only:
entries = [e for e in entries if e.resolved]
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
count = 0
with output_path.open("w") as f:
for entry in entries:
if entry.problem_statement and entry.patch:
pair = entry.to_instruction_pair()
f.write(json.dumps(pair) + "\n")
count += 1
logger.info("Exported %d fine-tuning pairs to %s", count, output_path)
return count
|