Spaces:

ujjwalpardeshi
/

chakravyuh

Running

UjjwalPardeshi

deploy: latest main to HF Space

03815d6 15 days ago

3.53 kB

	"""Inter-annotator / rule-vs-expert agreement statistics.

	v0 ships `rule_vs_expert_kappa` — Cohen's κ between the scripted analyzer's
	binary is_scam predictions and the human-curated ground-truth labels. This
	is NOT full inter-rater reliability (that requires two independent human
	annotators). It IS a legitimate agreement measure between a rule-based
	detector and the expert labels, and it provides a reproducible consistency
	number for v0.2.

	Full human IRR (Cohen's κ between two human labelers on a 30-scenario
	sample) is deferred to v0.3.
	"""

	from __future__ import annotations

	import argparse
	import json
	from pathlib import Path

	from chakravyuh_env.agents.analyzer import ScriptedAnalyzer
	from chakravyuh_env.schemas import ChatMessage, Observation


	def cohens_kappa(a: list[int], b: list[int]) -> float:
	"""Cohen's κ for two binary label sequences of equal length."""
	if len(a) != len(b):
	raise ValueError(f"length mismatch: {len(a)} vs {len(b)}")
	n = len(a)
	if n == 0:
	return 0.0

	agree = sum(1 for x, y in zip(a, b) if x == y)
	p_o = agree / n

	# Expected agreement by chance
	pa1 = sum(a) / n
	pb1 = sum(b) / n
	p_e = pa1 * pb1 + (1 - pa1) * (1 - pb1)

	if p_e == 1.0:
	return 1.0
	return (p_o - p_e) / (1 - p_e)


	def scripted_label(text: str, threshold: float = 0.5) -> int:
	"""Run the scripted analyzer on a message, return 1 if flagged, 0 else."""
	analyzer = ScriptedAnalyzer(flag_threshold=threshold)
	obs = Observation(
	agent_role="analyzer",
	turn=1,
	chat_history=[ChatMessage(sender="scammer", turn=1, text=text)],
	)
	score = analyzer.act(obs)
	return 1 if score.score >= threshold else 0


	def main() -> None:
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--dataset",
	default="data/chakravyuh-bench-v0/scenarios.jsonl",
	)
	parser.add_argument("--threshold", type=float, default=0.5)
	args = parser.parse_args()

	scenarios = [json.loads(l) for l in Path(args.dataset).open()]

	expert_labels: list[int] = []
	rule_labels: list[int] = []

	for s in scenarios:
	# Concatenate all scammer turns (for multi-turn, we give the rule
	# detector the full sequence — same privilege the expert had).
	scammer_text = " ".join(
	step["text"] for step in s["attack_sequence"]
	if step["sender"] == "scammer"
	)
	if not scammer_text:
	continue
	expert_labels.append(1 if s["ground_truth"]["is_scam"] else 0)
	rule_labels.append(scripted_label(scammer_text, args.threshold))

	n = len(expert_labels)
	agree = sum(1 for x, y in zip(expert_labels, rule_labels) if x == y)
	kappa = cohens_kappa(expert_labels, rule_labels)

	print(f"Dataset: {args.dataset}")
	print(f"N compared: {n}")
	print(f"Raw agreement: {agree}/{n} = {agree/n:.3f}")
	print(f"Expert positive rate: {sum(expert_labels)/n:.3f}")
	print(f"Rule positive rate: {sum(rule_labels)/n:.3f}")
	print(f"Cohen's κ (rule vs expert): {kappa:.3f}")

	# Interpretation band
	if kappa < 0:
	interp = "worse than chance"
	elif kappa < 0.20:
	interp = "slight"
	elif kappa < 0.40:
	interp = "fair"
	elif kappa < 0.60:
	interp = "moderate"
	elif kappa < 0.80:
	interp = "substantial"
	else:
	interp = "almost perfect"
	print(f"Landis-Koch interpretation: {interp} agreement")


	if __name__ == "__main__":
	main()