Prasham.Jain
feat(data): Phase B4 β€” 7 ScenarioFamilyGenerators with archetype loading
7a658b7
"""RealBugGenerator β€” scenario family: real_bug."""
from __future__ import annotations
import random
from ci_triage_env.data.clustering.archetypes import Archetype
from ci_triage_env.data.generators._helpers import (
ArchetypedGenerator,
build_base_outputs,
fake_short_sha,
fake_timestamp,
fill_template,
make_failure_summary,
pick_test_name,
scenario_id_for,
)
from ci_triage_env.schemas.diagnosis import DiagnosisLabel
from ci_triage_env.schemas.scenario import (
GroundTruth,
Scenario,
ScenarioMetadata,
TerminalActionSpec,
ToolOutput,
)
_DEFAULT_LOG_TEMPLATE = (
"FAILED {TEST_MODULE}::{TEST_FUNC} - AssertionError\n"
" assert result == expected\n"
" where result = {ACTUAL}\n"
" and expected = {EXPECTED}\n"
"E AssertionError: assertion failed at line {LINENO}\n"
"short test summary info\n"
"FAILED {TEST_MODULE}::{TEST_FUNC}"
)
_DEFAULT_BUGGY_CODE = (
"def {TEST_FUNC}(self):\n"
" result = self.service.compute({INPUT})\n"
" assert result == {EXPECTED} # broke after {COMMIT_MSG}\n"
)
class RealBugGenerator(ArchetypedGenerator):
family_name = "real_bug"
label = DiagnosisLabel.REAL_BUG
def informative_tools(self) -> list[str]:
return ["read_logs", "inspect_test_code", "recent_commits", "rerun_test"]
def minimal_evidence_set(self) -> list[str]:
return ["recent_commits", "inspect_test_code"]
def _default_archetypes(self) -> list[Archetype]:
return [
Archetype(
archetype_id="real_bug_001",
family="real_bug",
pattern_summary="AssertionError after recent commit changed return value",
log_template=_DEFAULT_LOG_TEMPLATE,
slot_distributions={
"TEST_MODULE": ["tests/unit/test_core", "tests/unit/test_api"],
"TEST_FUNC": ["test_compute", "test_process", "test_validate"],
"ACTUAL": ["None", "0", "-1", "[]"],
"EXPECTED": ["42", "True", "{'ok': True}"],
"LINENO": ["42", "87", "115", "203"],
},
informative_tools_hint=["read_logs:full", "inspect_test_code", "recent_commits"],
minimal_evidence_hint=["recent_commits", "inspect_test_code"],
),
Archetype(
archetype_id="real_bug_002",
family="real_bug",
pattern_summary="AttributeError / NullPointerException in core logic",
log_template=(
"AttributeError: 'NoneType' object has no attribute '{ATTR}'\n"
" File \"{TEST_MODULE}.py\", line {LINENO}, in {TEST_FUNC}\n"
" return obj.{ATTR}\n"
"FAILED {TEST_MODULE}::{TEST_FUNC}"
),
slot_distributions={
"ATTR": ["name", "id", "value", "data", "result"],
"TEST_MODULE": ["tests/unit/test_models", "tests/unit/test_service"],
"TEST_FUNC": ["test_create", "test_update", "test_fetch"],
"LINENO": ["33", "67", "91", "144"],
},
informative_tools_hint=["read_logs:full", "inspect_test_code", "recent_commits"],
minimal_evidence_hint=["inspect_test_code"],
),
]
def generate(self, seed: int, source_log_hash: str | None = None) -> Scenario:
rng = random.Random(seed)
archetype = self._pick_archetype(rng)
log_text = fill_template(archetype.log_template, archetype.slot_distributions, rng)
test_name = pick_test_name(rng)
summary = make_failure_summary(
self.family_name, rng, test_name=test_name, log_excerpt=log_text
)
branch = summary.branch
outputs = build_base_outputs(
test_name, branch, rng,
log_lines=log_text.splitlines(),
rerun_passes=False,
)
# --- informative overrides ---
breaking_author = rng.choice(["@alice", "@bob", "@carol"])
breaking_sha = fake_short_sha(rng)
breaking_commit = {
"sha": breaking_sha,
"author": breaking_author,
"msg": rng.choice([
f"fix: update {test_name.split('::')[-1].replace('test_', '')} logic",
"refactor: change return contract of compute()",
f"feat: extend {test_name.split('::')[-1].split('_')[1]} API",
]),
"files": [
f"src/{test_name.split('/')[1].replace('test_', '')}.py",
test_name.rsplit("::", 1)[0],
],
}
outputs[f"recent_commits:{branch}"] = ToolOutput(
tool_name="recent_commits",
payload={"commits": [breaking_commit, {
"sha": fake_short_sha(rng),
"author": rng.choice(["@dave", "@eve"]),
"msg": "chore: update lockfile",
"files": ["pyproject.toml"],
}]},
cost_units=0.002,
)
buggy_code = self._pick_buggy_code(rng)
outputs[f"inspect_test_code:{test_name}"] = ToolOutput(
tool_name="inspect_test_code",
payload={"source": buggy_code, "fixtures": []},
cost_units=0.002,
)
# Rerun also fails β€” it's a real bug, not a flake
outputs["rerun_test"] = ToolOutput(
tool_name="rerun_test",
payload={"results": [{"passed": False, "duration_s": round(rng.uniform(5, 30), 2),
"log_excerpt": log_text.splitlines()[:3]}]},
cost_units=0.01,
)
# Flake history is clean (test was stable before the bad commit)
outputs[f"query_flake_history:{test_name}"] = ToolOutput(
tool_name="query_flake_history",
payload={"failure_count": 0, "pass_count": 50, "recent_failures": []},
cost_units=0.002,
)
difficulty = rng.choice(["easy", "medium", "hard"])
rationale = (
f"The commit {breaking_sha} by {breaking_author} changed the return contract "
f"of the production code exercised by {test_name}. "
f"inspect_test_code shows the assertion that now fails; "
f"recent_commits:{branch} shows the introducing commit. "
f"query_flake_history shows no prior failures β€” not a flake. "
f"rerun_test fails again β€” confirms deterministic breakage."
)
return Scenario(
schema_version="1.0",
scenario_id=scenario_id_for(self.family_name, seed),
family=self.family_name,
seed=seed,
ground_truth=GroundTruth(
label=self.label,
rationale=rationale,
is_ambiguous=False,
confidence_target=1.0,
),
failure_summary=summary,
tool_outputs=outputs,
informative_tools=self.informative_tools(),
minimal_evidence_set=self.minimal_evidence_set(),
correct_terminal_action=TerminalActionSpec(
primary="submit_diagnosis",
args={
"diagnosis": self.label.value,
"confidence": 1.0,
"secondary_actions": [{"name": "file_bug", "owner": breaking_author}],
},
acceptable_alternatives=[],
),
metadata=ScenarioMetadata(
generator_version="1.0",
generated_at=fake_timestamp(rng),
source_log_hash=source_log_hash,
difficulty=difficulty,
),
)