Spaces:

SouravNath
/

repomind-api

Running

File size: 6,505 Bytes

dc71cad

"""
agent/trajectory_logger.py
────────────────────────────
Trajectory logger — records every attempt as JSONL.

Each line in the trajectory file is one attempt:
{
    "instance_id": "django__django-12345",
    "repo": "django/django",
    "attempt": 1,
    "patch": "<unified diff>",
    "test_stdout": "<pytest output>",
    "fail_to_pass_results": {"tests/test_foo.py::test_x": true},
    "pass_to_pass_results": {"tests/test_foo.py::test_y": true},
    "resolved": false,
    "failure_category": "wrong_file_edit",
    "elapsed_seconds": 12.3,
    "token_cost": {"prompt_tokens": 1200, "completion_tokens": 400},
    "localised_files": ["django/db/models/query.py"],
    "timestamp": "2025-05-01T14:23:01Z"
}

The JSONL dataset is filtered in Phase 7:
  - Keep: instances with known failure_category (not 'unknown')
  - Focus: syntax_error, hallucinated_api, wrong_file_edit — these are
    the most learnable patterns for fine-tuning
"""
from __future__ import annotations

import json
import logging
import time
from dataclasses import dataclass, asdict, field
from datetime import datetime, timezone
from pathlib import Path

logger = logging.getLogger(__name__)


@dataclass
class TrajectoryEntry:
    instance_id: str
    repo: str
    attempt: int
    patch: str
    test_stdout: str
    fail_to_pass_results: dict[str, bool]
    pass_to_pass_results: dict[str, bool]
    resolved: bool
    failure_category: str
    elapsed_seconds: float
    token_cost: dict[str, int] = field(default_factory=dict)
    localised_files: list[str] = field(default_factory=list)
    problem_statement: str = ""
    timestamp: str = field(
        default_factory=lambda: datetime.now(timezone.utc).isoformat()
    )

    def to_jsonl_line(self) -> str:
        return json.dumps(asdict(self))

    def to_instruction_pair(self) -> dict:
        """
        Format as an instruction-following pair for fine-tuning (Phase 7).

        Schema:
          system:    role description
          user:      issue + file context + failure message
          assistant: corrected unified diff
        """
        file_context = "\n\n".join(
            f"# File: {fp}" for fp in self.localised_files
        )
        failure_excerpt = self.test_stdout[-1000:] if self.test_stdout else ""

        return {
            "system": (
                "You are an expert Python software engineer. "
                "You fix bugs by generating minimal unified diffs."
            ),
            "user": (
                f"## GitHub Issue\n{self.problem_statement[:800]}\n\n"
                f"## Relevant Files\n{file_context}\n\n"
                f"## Previous Attempt Failed\n"
                f"Category: {self.failure_category}\n"
                f"Test output:\n{failure_excerpt}"
            ),
            "assistant": self.patch,
            "metadata": {
                "instance_id": self.instance_id,
                "attempt": self.attempt,
                "failure_category": self.failure_category,
                "resolved": self.resolved,
            }
        }


class TrajectoryLogger:
    """
    Appends trajectory entries to a JSONL file.
    Thread-safe for single-process use (file lock on append).
    """

    def __init__(self, output_path: Path):
        self.output_path = Path(output_path)
        self.output_path.parent.mkdir(parents=True, exist_ok=True)
        self._count = 0
        logger.info("TrajectoryLogger writing to %s", self.output_path)

    def log(self, entry: TrajectoryEntry) -> None:
        """Append one trajectory entry to the JSONL file."""
        with self.output_path.open("a") as f:
            f.write(entry.to_jsonl_line() + "\n")
        self._count += 1

    @property
    def total_logged(self) -> int:
        return self._count

    def load_all(self) -> list[TrajectoryEntry]:
        """Load all logged trajectories from file."""
        if not self.output_path.exists():
            return []
        entries = []
        with self.output_path.open() as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    data = json.loads(line)
                    entries.append(TrajectoryEntry(**data))
                except (json.JSONDecodeError, TypeError) as e:
                    logger.warning("Skipping malformed trajectory line: %s", e)
        return entries

    def stats(self) -> dict:
        """Summary statistics over all logged trajectories."""
        entries = self.load_all()
        if not entries:
            return {"total": 0}

        resolved = [e for e in entries if e.resolved]
        categories: dict[str, int] = {}
        for e in entries:
            categories[e.failure_category] = categories.get(e.failure_category, 0) + 1

        return {
            "total": len(entries),
            "resolved": len(resolved),
            "resolved_rate": len(resolved) / len(entries),
            "avg_attempts": sum(e.attempt for e in entries) / len(entries),
            "failure_categories": categories,
            "unique_instances": len({e.instance_id for e in entries}),
        }

    def export_for_finetuning(
        self,
        output_path: Path,
        filter_categories: list[str] | None = None,
        resolved_only: bool = False,
    ) -> int:
        """
        Export trajectory entries as instruction-following pairs (Phase 7).

        Args:
            output_path: where to write the fine-tuning JSONL
            filter_categories: only export entries with these categories
            resolved_only: only export successfully resolved instances

        Returns:
            Number of pairs exported
        """
        entries = self.load_all()

        if filter_categories:
            entries = [e for e in entries if e.failure_category in filter_categories]
        if resolved_only:
            entries = [e for e in entries if e.resolved]

        output_path = Path(output_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)

        count = 0
        with output_path.open("w") as f:
            for entry in entries:
                if entry.problem_statement and entry.patch:
                    pair = entry.to_instruction_pair()
                    f.write(json.dumps(pair) + "\n")
                    count += 1

        logger.info("Exported %d fine-tuning pairs to %s", count, output_path)
        return count