Spaces:

Prasham1710
/

ci-triage-env

Sleeping

ci-triage-env / src /ci_triage_env /data /generators /real_bug.py

Prasham.Jain

feat(data): Phase B4 — 7 ScenarioFamilyGenerators with archetype loading

7a658b7 13 days ago

7.79 kB

	"""RealBugGenerator — scenario family: real_bug."""

	from __future__ import annotations

	import random

	from ci_triage_env.data.clustering.archetypes import Archetype
	from ci_triage_env.data.generators._helpers import (
	ArchetypedGenerator,
	build_base_outputs,
	fake_short_sha,
	fake_timestamp,
	fill_template,
	make_failure_summary,
	pick_test_name,
	scenario_id_for,
	)
	from ci_triage_env.schemas.diagnosis import DiagnosisLabel
	from ci_triage_env.schemas.scenario import (
	GroundTruth,
	Scenario,
	ScenarioMetadata,
	TerminalActionSpec,
	ToolOutput,
	)

	_DEFAULT_LOG_TEMPLATE = (
	"FAILED {TEST_MODULE}::{TEST_FUNC} - AssertionError\n"
	" assert result == expected\n"
	" where result = {ACTUAL}\n"
	" and expected = {EXPECTED}\n"
	"E AssertionError: assertion failed at line {LINENO}\n"
	"short test summary info\n"
	"FAILED {TEST_MODULE}::{TEST_FUNC}"
	)

	_DEFAULT_BUGGY_CODE = (
	"def {TEST_FUNC}(self):\n"
	" result = self.service.compute({INPUT})\n"
	" assert result == {EXPECTED} # broke after {COMMIT_MSG}\n"
	)


	class RealBugGenerator(ArchetypedGenerator):
	family_name = "real_bug"
	label = DiagnosisLabel.REAL_BUG

	def informative_tools(self) -> list[str]:
	return ["read_logs", "inspect_test_code", "recent_commits", "rerun_test"]

	def minimal_evidence_set(self) -> list[str]:
	return ["recent_commits", "inspect_test_code"]

	def _default_archetypes(self) -> list[Archetype]:
	return [
	Archetype(
	archetype_id="real_bug_001",
	family="real_bug",
	pattern_summary="AssertionError after recent commit changed return value",
	log_template=_DEFAULT_LOG_TEMPLATE,
	slot_distributions={
	"TEST_MODULE": ["tests/unit/test_core", "tests/unit/test_api"],
	"TEST_FUNC": ["test_compute", "test_process", "test_validate"],
	"ACTUAL": ["None", "0", "-1", "[]"],
	"EXPECTED": ["42", "True", "{'ok': True}"],
	"LINENO": ["42", "87", "115", "203"],
	},
	informative_tools_hint=["read_logs:full", "inspect_test_code", "recent_commits"],
	minimal_evidence_hint=["recent_commits", "inspect_test_code"],
	),
	Archetype(
	archetype_id="real_bug_002",
	family="real_bug",
	pattern_summary="AttributeError / NullPointerException in core logic",
	log_template=(
	"AttributeError: 'NoneType' object has no attribute '{ATTR}'\n"
	" File \"{TEST_MODULE}.py\", line {LINENO}, in {TEST_FUNC}\n"
	" return obj.{ATTR}\n"
	"FAILED {TEST_MODULE}::{TEST_FUNC}"
	),
	slot_distributions={
	"ATTR": ["name", "id", "value", "data", "result"],
	"TEST_MODULE": ["tests/unit/test_models", "tests/unit/test_service"],
	"TEST_FUNC": ["test_create", "test_update", "test_fetch"],
	"LINENO": ["33", "67", "91", "144"],
	},
	informative_tools_hint=["read_logs:full", "inspect_test_code", "recent_commits"],
	minimal_evidence_hint=["inspect_test_code"],
	),
	]

	def generate(self, seed: int, source_log_hash: str \| None = None) -> Scenario:
	rng = random.Random(seed)
	archetype = self._pick_archetype(rng)
	log_text = fill_template(archetype.log_template, archetype.slot_distributions, rng)
	test_name = pick_test_name(rng)

	summary = make_failure_summary(
	self.family_name, rng, test_name=test_name, log_excerpt=log_text
	)
	branch = summary.branch

	outputs = build_base_outputs(
	test_name, branch, rng,
	log_lines=log_text.splitlines(),
	rerun_passes=False,
	)

	# --- informative overrides ---
	breaking_author = rng.choice(["@alice", "@bob", "@carol"])
	breaking_sha = fake_short_sha(rng)
	breaking_commit = {
	"sha": breaking_sha,
	"author": breaking_author,
	"msg": rng.choice([
	f"fix: update {test_name.split('::')[-1].replace('test_', '')} logic",
	"refactor: change return contract of compute()",
	f"feat: extend {test_name.split('::')[-1].split('_')[1]} API",
	]),
	"files": [
	f"src/{test_name.split('/')[1].replace('test_', '')}.py",
	test_name.rsplit("::", 1)[0],
	],
	}
	outputs[f"recent_commits:{branch}"] = ToolOutput(
	tool_name="recent_commits",
	payload={"commits": [breaking_commit, {
	"sha": fake_short_sha(rng),
	"author": rng.choice(["@dave", "@eve"]),
	"msg": "chore: update lockfile",
	"files": ["pyproject.toml"],
	}]},
	cost_units=0.002,
	)

	buggy_code = self._pick_buggy_code(rng)
	outputs[f"inspect_test_code:{test_name}"] = ToolOutput(
	tool_name="inspect_test_code",
	payload={"source": buggy_code, "fixtures": []},
	cost_units=0.002,
	)

	# Rerun also fails — it's a real bug, not a flake
	outputs["rerun_test"] = ToolOutput(
	tool_name="rerun_test",
	payload={"results": [{"passed": False, "duration_s": round(rng.uniform(5, 30), 2),
	"log_excerpt": log_text.splitlines()[:3]}]},
	cost_units=0.01,
	)

	# Flake history is clean (test was stable before the bad commit)
	outputs[f"query_flake_history:{test_name}"] = ToolOutput(
	tool_name="query_flake_history",
	payload={"failure_count": 0, "pass_count": 50, "recent_failures": []},
	cost_units=0.002,
	)

	difficulty = rng.choice(["easy", "medium", "hard"])
	rationale = (
	f"The commit {breaking_sha} by {breaking_author} changed the return contract "
	f"of the production code exercised by {test_name}. "
	f"inspect_test_code shows the assertion that now fails; "
	f"recent_commits:{branch} shows the introducing commit. "
	f"query_flake_history shows no prior failures — not a flake. "
	f"rerun_test fails again — confirms deterministic breakage."
	)

	return Scenario(
	schema_version="1.0",
	scenario_id=scenario_id_for(self.family_name, seed),
	family=self.family_name,
	seed=seed,
	ground_truth=GroundTruth(
	label=self.label,
	rationale=rationale,
	is_ambiguous=False,
	confidence_target=1.0,
	),
	failure_summary=summary,
	tool_outputs=outputs,
	informative_tools=self.informative_tools(),
	minimal_evidence_set=self.minimal_evidence_set(),
	correct_terminal_action=TerminalActionSpec(
	primary="submit_diagnosis",
	args={
	"diagnosis": self.label.value,
	"confidence": 1.0,
	"secondary_actions": [{"name": "file_bug", "owner": breaking_author}],
	},
	acceptable_alternatives=[],
	),
	metadata=ScenarioMetadata(
	generator_version="1.0",
	generated_at=fake_timestamp(rng),
	source_log_hash=source_log_hash,
	difficulty=difficulty,
	),
	)