File size: 6,505 Bytes
dc71cad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
"""
agent/trajectory_logger.py
────────────────────────────
Trajectory logger β€” records every attempt as JSONL.

Each line in the trajectory file is one attempt:
{
    "instance_id": "django__django-12345",
    "repo": "django/django",
    "attempt": 1,
    "patch": "<unified diff>",
    "test_stdout": "<pytest output>",
    "fail_to_pass_results": {"tests/test_foo.py::test_x": true},
    "pass_to_pass_results": {"tests/test_foo.py::test_y": true},
    "resolved": false,
    "failure_category": "wrong_file_edit",
    "elapsed_seconds": 12.3,
    "token_cost": {"prompt_tokens": 1200, "completion_tokens": 400},
    "localised_files": ["django/db/models/query.py"],
    "timestamp": "2025-05-01T14:23:01Z"
}

The JSONL dataset is filtered in Phase 7:
  - Keep: instances with known failure_category (not 'unknown')
  - Focus: syntax_error, hallucinated_api, wrong_file_edit β€” these are
    the most learnable patterns for fine-tuning
"""
from __future__ import annotations

import json
import logging
import time
from dataclasses import dataclass, asdict, field
from datetime import datetime, timezone
from pathlib import Path

logger = logging.getLogger(__name__)


@dataclass
class TrajectoryEntry:
    instance_id: str
    repo: str
    attempt: int
    patch: str
    test_stdout: str
    fail_to_pass_results: dict[str, bool]
    pass_to_pass_results: dict[str, bool]
    resolved: bool
    failure_category: str
    elapsed_seconds: float
    token_cost: dict[str, int] = field(default_factory=dict)
    localised_files: list[str] = field(default_factory=list)
    problem_statement: str = ""
    timestamp: str = field(
        default_factory=lambda: datetime.now(timezone.utc).isoformat()
    )

    def to_jsonl_line(self) -> str:
        return json.dumps(asdict(self))

    def to_instruction_pair(self) -> dict:
        """
        Format as an instruction-following pair for fine-tuning (Phase 7).

        Schema:
          system:    role description
          user:      issue + file context + failure message
          assistant: corrected unified diff
        """
        file_context = "\n\n".join(
            f"# File: {fp}" for fp in self.localised_files
        )
        failure_excerpt = self.test_stdout[-1000:] if self.test_stdout else ""

        return {
            "system": (
                "You are an expert Python software engineer. "
                "You fix bugs by generating minimal unified diffs."
            ),
            "user": (
                f"## GitHub Issue\n{self.problem_statement[:800]}\n\n"
                f"## Relevant Files\n{file_context}\n\n"
                f"## Previous Attempt Failed\n"
                f"Category: {self.failure_category}\n"
                f"Test output:\n{failure_excerpt}"
            ),
            "assistant": self.patch,
            "metadata": {
                "instance_id": self.instance_id,
                "attempt": self.attempt,
                "failure_category": self.failure_category,
                "resolved": self.resolved,
            }
        }


class TrajectoryLogger:
    """
    Appends trajectory entries to a JSONL file.
    Thread-safe for single-process use (file lock on append).
    """

    def __init__(self, output_path: Path):
        self.output_path = Path(output_path)
        self.output_path.parent.mkdir(parents=True, exist_ok=True)
        self._count = 0
        logger.info("TrajectoryLogger writing to %s", self.output_path)

    def log(self, entry: TrajectoryEntry) -> None:
        """Append one trajectory entry to the JSONL file."""
        with self.output_path.open("a") as f:
            f.write(entry.to_jsonl_line() + "\n")
        self._count += 1

    @property
    def total_logged(self) -> int:
        return self._count

    def load_all(self) -> list[TrajectoryEntry]:
        """Load all logged trajectories from file."""
        if not self.output_path.exists():
            return []
        entries = []
        with self.output_path.open() as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    data = json.loads(line)
                    entries.append(TrajectoryEntry(**data))
                except (json.JSONDecodeError, TypeError) as e:
                    logger.warning("Skipping malformed trajectory line: %s", e)
        return entries

    def stats(self) -> dict:
        """Summary statistics over all logged trajectories."""
        entries = self.load_all()
        if not entries:
            return {"total": 0}

        resolved = [e for e in entries if e.resolved]
        categories: dict[str, int] = {}
        for e in entries:
            categories[e.failure_category] = categories.get(e.failure_category, 0) + 1

        return {
            "total": len(entries),
            "resolved": len(resolved),
            "resolved_rate": len(resolved) / len(entries),
            "avg_attempts": sum(e.attempt for e in entries) / len(entries),
            "failure_categories": categories,
            "unique_instances": len({e.instance_id for e in entries}),
        }

    def export_for_finetuning(
        self,
        output_path: Path,
        filter_categories: list[str] | None = None,
        resolved_only: bool = False,
    ) -> int:
        """
        Export trajectory entries as instruction-following pairs (Phase 7).

        Args:
            output_path: where to write the fine-tuning JSONL
            filter_categories: only export entries with these categories
            resolved_only: only export successfully resolved instances

        Returns:
            Number of pairs exported
        """
        entries = self.load_all()

        if filter_categories:
            entries = [e for e in entries if e.failure_category in filter_categories]
        if resolved_only:
            entries = [e for e in entries if e.resolved]

        output_path = Path(output_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)

        count = 0
        with output_path.open("w") as f:
            for entry in entries:
                if entry.problem_statement and entry.patch:
                    pair = entry.to_instruction_pair()
                    f.write(json.dumps(pair) + "\n")
                    count += 1

        logger.info("Exported %d fine-tuning pairs to %s", count, output_path)
        return count