cricket-captain-llm / server /coherence_grader.py
pratinavseth's picture
sync: today's source updates (XML-only prompt, reward unclip, neg-reward on loss, pinned versions, configs reorg)
2fc50a9 verified
"""
Coherence grader: measures alignment between declared strategy and executed shot.
Three components (weighted sum per PRD v2.1):
1. aggression_match (50%) — shot aggression vs declared aggression
2. rationale_specificity (30%) — word count + domain keyword density
3. phase_appropriate (20%) — declared aggression vs expected aggression for phase
Non-gameability:
- "I'll play well" → near-zero specificity → low score
- Declaring aggression=0.8 then playing defensive → aggression mismatch penalty
- Declaring aggressive=0.8 in middle overs (expected 0.35) → phase_appropriate penalty
- Only path to 1.0: specific + phase-calibrated declaration + consistent execution
"""
from server.markov_engine import SHOT_AGGRESSION
_SPECIFICITY_WORD_TARGET = 12
_DOMAIN_KEYWORDS = {
"wicket", "wickets", "over", "overs", "run", "runs", "rate", "rr",
"powerplay", "middle", "death", "phase", "boundary", "spin", "pace",
"seam", "swing", "yorker", "bouncer", "pitch", "field", "fielder",
"consolidate", "attack", "rotate", "economy", "par", "target", "chase",
"dls", "platform", "innings", "tail", "anchor", "pinch", "slog",
"bowler", "batting", "acceleration", "preservation", "wickets-in-hand",
}
# Expected aggression level per phase (from match data)
_PHASE_BASELINE = {
"powerplay": 0.55, # Fielding restrictions → attack
"middle": 0.35, # Build platform, preserve wickets
"death": 0.75, # Maximize scoring regardless of wicket risk
}
def aggression_match(declared_aggression: float, shot_intent: str) -> float:
"""1 - |declared_aggression - shot_aggression|. Range [0, 1]."""
shot_agg = SHOT_AGGRESSION.get(shot_intent, 0.3)
return max(0.0, 1.0 - abs(declared_aggression - shot_agg))
def rationale_specificity(rationale: str) -> float:
"""Score rationale quality: word count + cricket domain keyword density."""
if not rationale or not rationale.strip():
return 0.0
words = rationale.lower().split()
word_score = min(len(words) / _SPECIFICITY_WORD_TARGET, 1.0)
domain_hits = sum(1 for w in words if w.rstrip(".,!?;:") in _DOMAIN_KEYWORDS)
keyword_score = min(domain_hits / 3.0, 1.0)
return (word_score + keyword_score) / 2.0
def phase_appropriate(declared_aggression: float, phase: str) -> float:
"""1 - |declared_aggression - phase_baseline|. Range [0, 1]."""
baseline = _PHASE_BASELINE.get(phase, 0.45)
return max(0.0, 1.0 - abs(declared_aggression - baseline))
def bowling_coherence_score(
bowling_strategy: dict,
field_setting: str,
phase: str = "middle",
) -> float:
"""
Grade bowling strategy coherence.
Weights (from game_knowledge.yaml): 40% rationale + 30% field logic + 30% phase fit.
Line/length values must already be normalized (normalize_line / normalize_length
from field_model.py) — e.g. "pads" not "on pads", "outside_off" not "outside off".
"""
if not bowling_strategy:
return 0.0
rationale = bowling_strategy.get("rationale", "")
r_spec = rationale_specificity(rationale)
line = bowling_strategy.get("line", "outside_off")
length = bowling_strategy.get("length", "good")
# Attacking plan: attack the stumps/pads with short/full threatening lengths
_ATTACKING_LINES = {"stumps", "pads"}
_ATTACKING_LENGTHS = {"bouncer", "short", "yorker"}
# Containing plan: bowl wide or full to restrict scoring
_DEFENSIVE_LINES = {"outside_off", "wide"}
_DEFENSIVE_LENGTHS = {"yorker", "full"}
if field_setting == "Aggressive":
logic_score = 1.0 if (line in _ATTACKING_LINES or length in _ATTACKING_LENGTHS) else 0.5
elif field_setting == "Defensive":
logic_score = 1.0 if (line in _DEFENSIVE_LINES or length in _DEFENSIVE_LENGTHS) else 0.5
else: # Balanced
logic_score = 0.8
# Phase appropriateness: spin in middle, pace in powerplay/death
bowler_type = bowling_strategy.get("bowler_type", "pace")
if phase == "middle" and bowler_type == "spin":
p_approp = 1.0
elif phase in {"powerplay", "death"} and bowler_type == "pace":
p_approp = 1.0
else:
p_approp = 0.6
score = 0.40 * r_spec + 0.30 * logic_score + 0.30 * p_approp
return round(score, 4)
def coherence_score(
declared_strategy: dict,
shot_intent: str,
phase: str = "middle",
) -> float:
"""
Composite coherence score in [0, 1].
Weights: 50% aggression_match + 30% rationale_specificity + 20% phase_appropriate
"""
if not declared_strategy:
return 0.0
agg = float(declared_strategy.get("aggression", 0.5))
rationale = declared_strategy.get("rationale", "")
a_match = aggression_match(agg, shot_intent)
r_spec = rationale_specificity(rationale)
p_approp = phase_appropriate(agg, phase)
score = 0.50 * a_match + 0.30 * r_spec + 0.20 * p_approp
return round(score, 4)