Spaces:

Prasham1710
/

ci-triage-training

Sleeping

File size: 7,785 Bytes

7a658b7

"""RealBugGenerator — scenario family: real_bug."""

from __future__ import annotations

import random

from ci_triage_env.data.clustering.archetypes import Archetype
from ci_triage_env.data.generators._helpers import (
    ArchetypedGenerator,
    build_base_outputs,
    fake_short_sha,
    fake_timestamp,
    fill_template,
    make_failure_summary,
    pick_test_name,
    scenario_id_for,
)
from ci_triage_env.schemas.diagnosis import DiagnosisLabel
from ci_triage_env.schemas.scenario import (
    GroundTruth,
    Scenario,
    ScenarioMetadata,
    TerminalActionSpec,
    ToolOutput,
)

_DEFAULT_LOG_TEMPLATE = (
    "FAILED {TEST_MODULE}::{TEST_FUNC} - AssertionError\n"
    "  assert result == expected\n"
    "  where result   = {ACTUAL}\n"
    "  and   expected = {EXPECTED}\n"
    "E AssertionError: assertion failed at line {LINENO}\n"
    "short test summary info\n"
    "FAILED {TEST_MODULE}::{TEST_FUNC}"
)

_DEFAULT_BUGGY_CODE = (
    "def {TEST_FUNC}(self):\n"
    "    result = self.service.compute({INPUT})\n"
    "    assert result == {EXPECTED}  # broke after {COMMIT_MSG}\n"
)


class RealBugGenerator(ArchetypedGenerator):
    family_name = "real_bug"
    label = DiagnosisLabel.REAL_BUG

    def informative_tools(self) -> list[str]:
        return ["read_logs", "inspect_test_code", "recent_commits", "rerun_test"]

    def minimal_evidence_set(self) -> list[str]:
        return ["recent_commits", "inspect_test_code"]

    def _default_archetypes(self) -> list[Archetype]:
        return [
            Archetype(
                archetype_id="real_bug_001",
                family="real_bug",
                pattern_summary="AssertionError after recent commit changed return value",
                log_template=_DEFAULT_LOG_TEMPLATE,
                slot_distributions={
                    "TEST_MODULE": ["tests/unit/test_core", "tests/unit/test_api"],
                    "TEST_FUNC": ["test_compute", "test_process", "test_validate"],
                    "ACTUAL": ["None", "0", "-1", "[]"],
                    "EXPECTED": ["42", "True", "{'ok': True}"],
                    "LINENO": ["42", "87", "115", "203"],
                },
                informative_tools_hint=["read_logs:full", "inspect_test_code", "recent_commits"],
                minimal_evidence_hint=["recent_commits", "inspect_test_code"],
            ),
            Archetype(
                archetype_id="real_bug_002",
                family="real_bug",
                pattern_summary="AttributeError / NullPointerException in core logic",
                log_template=(
                    "AttributeError: 'NoneType' object has no attribute '{ATTR}'\n"
                    "  File \"{TEST_MODULE}.py\", line {LINENO}, in {TEST_FUNC}\n"
                    "    return obj.{ATTR}\n"
                    "FAILED {TEST_MODULE}::{TEST_FUNC}"
                ),
                slot_distributions={
                    "ATTR": ["name", "id", "value", "data", "result"],
                    "TEST_MODULE": ["tests/unit/test_models", "tests/unit/test_service"],
                    "TEST_FUNC": ["test_create", "test_update", "test_fetch"],
                    "LINENO": ["33", "67", "91", "144"],
                },
                informative_tools_hint=["read_logs:full", "inspect_test_code", "recent_commits"],
                minimal_evidence_hint=["inspect_test_code"],
            ),
        ]

    def generate(self, seed: int, source_log_hash: str | None = None) -> Scenario:
        rng = random.Random(seed)
        archetype = self._pick_archetype(rng)
        log_text = fill_template(archetype.log_template, archetype.slot_distributions, rng)
        test_name = pick_test_name(rng)

        summary = make_failure_summary(
            self.family_name, rng, test_name=test_name, log_excerpt=log_text
        )
        branch = summary.branch

        outputs = build_base_outputs(
            test_name, branch, rng,
            log_lines=log_text.splitlines(),
            rerun_passes=False,
        )

        # --- informative overrides ---
        breaking_author = rng.choice(["@alice", "@bob", "@carol"])
        breaking_sha = fake_short_sha(rng)
        breaking_commit = {
            "sha": breaking_sha,
            "author": breaking_author,
            "msg": rng.choice([
                f"fix: update {test_name.split('::')[-1].replace('test_', '')} logic",
                "refactor: change return contract of compute()",
                f"feat: extend {test_name.split('::')[-1].split('_')[1]} API",
            ]),
            "files": [
                f"src/{test_name.split('/')[1].replace('test_', '')}.py",
                test_name.rsplit("::", 1)[0],
            ],
        }
        outputs[f"recent_commits:{branch}"] = ToolOutput(
            tool_name="recent_commits",
            payload={"commits": [breaking_commit, {
                "sha": fake_short_sha(rng),
                "author": rng.choice(["@dave", "@eve"]),
                "msg": "chore: update lockfile",
                "files": ["pyproject.toml"],
            }]},
            cost_units=0.002,
        )

        buggy_code = self._pick_buggy_code(rng)
        outputs[f"inspect_test_code:{test_name}"] = ToolOutput(
            tool_name="inspect_test_code",
            payload={"source": buggy_code, "fixtures": []},
            cost_units=0.002,
        )

        # Rerun also fails — it's a real bug, not a flake
        outputs["rerun_test"] = ToolOutput(
            tool_name="rerun_test",
            payload={"results": [{"passed": False, "duration_s": round(rng.uniform(5, 30), 2),
                                  "log_excerpt": log_text.splitlines()[:3]}]},
            cost_units=0.01,
        )

        # Flake history is clean (test was stable before the bad commit)
        outputs[f"query_flake_history:{test_name}"] = ToolOutput(
            tool_name="query_flake_history",
            payload={"failure_count": 0, "pass_count": 50, "recent_failures": []},
            cost_units=0.002,
        )

        difficulty = rng.choice(["easy", "medium", "hard"])
        rationale = (
            f"The commit {breaking_sha} by {breaking_author} changed the return contract "
            f"of the production code exercised by {test_name}. "
            f"inspect_test_code shows the assertion that now fails; "
            f"recent_commits:{branch} shows the introducing commit. "
            f"query_flake_history shows no prior failures — not a flake. "
            f"rerun_test fails again — confirms deterministic breakage."
        )

        return Scenario(
            schema_version="1.0",
            scenario_id=scenario_id_for(self.family_name, seed),
            family=self.family_name,
            seed=seed,
            ground_truth=GroundTruth(
                label=self.label,
                rationale=rationale,
                is_ambiguous=False,
                confidence_target=1.0,
            ),
            failure_summary=summary,
            tool_outputs=outputs,
            informative_tools=self.informative_tools(),
            minimal_evidence_set=self.minimal_evidence_set(),
            correct_terminal_action=TerminalActionSpec(
                primary="submit_diagnosis",
                args={
                    "diagnosis": self.label.value,
                    "confidence": 1.0,
                    "secondary_actions": [{"name": "file_bug", "owner": breaking_author}],
                },
                acceptable_alternatives=[],
            ),
            metadata=ScenarioMetadata(
                generator_version="1.0",
                generated_at=fake_timestamp(rng),
                source_log_hash=source_log_hash,
                difficulty=difficulty,
            ),
        )