Spaces:

pratinavseth
/

cricket-captain-llm

Sleeping

App Files Files Community

cricket-captain-llm / server /coherence_grader.py

pratinavseth

sync: today's source updates (XML-only prompt, reward unclip, neg-reward on loss, pinned versions, configs reorg)

2fc50a9 verified 13 days ago

raw

history blame contribute delete

4.94 kB

	"""
	Coherence grader: measures alignment between declared strategy and executed shot.

	Three components (weighted sum per PRD v2.1):
	1. aggression_match (50%) — shot aggression vs declared aggression
	2. rationale_specificity (30%) — word count + domain keyword density
	3. phase_appropriate (20%) — declared aggression vs expected aggression for phase

	Non-gameability:
	- "I'll play well" → near-zero specificity → low score
	- Declaring aggression=0.8 then playing defensive → aggression mismatch penalty
	- Declaring aggressive=0.8 in middle overs (expected 0.35) → phase_appropriate penalty
	- Only path to 1.0: specific + phase-calibrated declaration + consistent execution
	"""

	from server.markov_engine import SHOT_AGGRESSION

	_SPECIFICITY_WORD_TARGET = 12

	_DOMAIN_KEYWORDS = {
	"wicket", "wickets", "over", "overs", "run", "runs", "rate", "rr",
	"powerplay", "middle", "death", "phase", "boundary", "spin", "pace",
	"seam", "swing", "yorker", "bouncer", "pitch", "field", "fielder",
	"consolidate", "attack", "rotate", "economy", "par", "target", "chase",
	"dls", "platform", "innings", "tail", "anchor", "pinch", "slog",
	"bowler", "batting", "acceleration", "preservation", "wickets-in-hand",
	}

	# Expected aggression level per phase (from match data)
	_PHASE_BASELINE = {
	"powerplay": 0.55, # Fielding restrictions → attack
	"middle": 0.35, # Build platform, preserve wickets
	"death": 0.75, # Maximize scoring regardless of wicket risk
	}


	def aggression_match(declared_aggression: float, shot_intent: str) -> float:
	"""1 - \|declared_aggression - shot_aggression\|. Range [0, 1]."""
	shot_agg = SHOT_AGGRESSION.get(shot_intent, 0.3)
	return max(0.0, 1.0 - abs(declared_aggression - shot_agg))


	def rationale_specificity(rationale: str) -> float:
	"""Score rationale quality: word count + cricket domain keyword density."""
	if not rationale or not rationale.strip():
	return 0.0
	words = rationale.lower().split()
	word_score = min(len(words) / _SPECIFICITY_WORD_TARGET, 1.0)
	domain_hits = sum(1 for w in words if w.rstrip(".,!?;:") in _DOMAIN_KEYWORDS)
	keyword_score = min(domain_hits / 3.0, 1.0)
	return (word_score + keyword_score) / 2.0


	def phase_appropriate(declared_aggression: float, phase: str) -> float:
	"""1 - \|declared_aggression - phase_baseline\|. Range [0, 1]."""
	baseline = _PHASE_BASELINE.get(phase, 0.45)
	return max(0.0, 1.0 - abs(declared_aggression - baseline))


	def bowling_coherence_score(
	bowling_strategy: dict,
	field_setting: str,
	phase: str = "middle",
	) -> float:
	"""
	Grade bowling strategy coherence.
	Weights (from game_knowledge.yaml): 40% rationale + 30% field logic + 30% phase fit.

	Line/length values must already be normalized (normalize_line / normalize_length
	from field_model.py) — e.g. "pads" not "on pads", "outside_off" not "outside off".
	"""
	if not bowling_strategy:
	return 0.0

	rationale = bowling_strategy.get("rationale", "")
	r_spec = rationale_specificity(rationale)

	line = bowling_strategy.get("line", "outside_off")
	length = bowling_strategy.get("length", "good")

	# Attacking plan: attack the stumps/pads with short/full threatening lengths
	_ATTACKING_LINES = {"stumps", "pads"}
	_ATTACKING_LENGTHS = {"bouncer", "short", "yorker"}
	# Containing plan: bowl wide or full to restrict scoring
	_DEFENSIVE_LINES = {"outside_off", "wide"}
	_DEFENSIVE_LENGTHS = {"yorker", "full"}

	if field_setting == "Aggressive":
	logic_score = 1.0 if (line in _ATTACKING_LINES or length in _ATTACKING_LENGTHS) else 0.5
	elif field_setting == "Defensive":
	logic_score = 1.0 if (line in _DEFENSIVE_LINES or length in _DEFENSIVE_LENGTHS) else 0.5
	else: # Balanced
	logic_score = 0.8

	# Phase appropriateness: spin in middle, pace in powerplay/death
	bowler_type = bowling_strategy.get("bowler_type", "pace")
	if phase == "middle" and bowler_type == "spin":
	p_approp = 1.0
	elif phase in {"powerplay", "death"} and bowler_type == "pace":
	p_approp = 1.0
	else:
	p_approp = 0.6

	score = 0.40 * r_spec + 0.30 * logic_score + 0.30 * p_approp
	return round(score, 4)


	def coherence_score(
	declared_strategy: dict,
	shot_intent: str,
	phase: str = "middle",
	) -> float:
	"""
	Composite coherence score in [0, 1].

	Weights: 50% aggression_match + 30% rationale_specificity + 20% phase_appropriate
	"""
	if not declared_strategy:
	return 0.0
	agg = float(declared_strategy.get("aggression", 0.5))
	rationale = declared_strategy.get("rationale", "")

	a_match = aggression_match(agg, shot_intent)
	r_spec = rationale_specificity(rationale)
	p_approp = phase_appropriate(agg, phase)

	score = 0.50 * a_match + 0.30 * r_spec + 0.20 * p_approp
	return round(score, 4)