| """Build batter and bowler profiles from curated Cricsheet ball outcomes. |
| |
| Input: |
| data/processed/ball_outcomes_t20_v1.pkl |
| |
| Output: |
| data/processed/player_profiles_t20_v1.json |
| """ |
|
|
| import argparse |
| import json |
| import pickle |
| from collections import defaultdict |
| from pathlib import Path |
|
|
| _ROOT = Path(__file__).parent.parent |
| _PROCESSED = _ROOT / "data" / "processed" |
| _DREAM11_PATH = _ROOT / "data" / "dream11_t20_points.json" |
|
|
|
|
| def _load_dream11() -> dict: |
| if not _DREAM11_PATH.exists(): |
| return {} |
| with _DREAM11_PATH.open() as f: |
| return json.load(f) |
|
|
|
|
| def _classify_batter(runs: int, balls: int, boundaries: int, death_runs: int) -> str: |
| strike_rate = 100 * runs / max(balls, 1) |
| boundary_rate = boundaries / max(balls, 1) |
| death_share = death_runs / max(runs, 1) |
| if strike_rate >= 145 or boundary_rate >= 0.22: |
| return "hitter" |
| if death_share >= 0.35 and strike_rate >= 130: |
| return "finisher" |
| if strike_rate < 110 and balls >= 30: |
| return "anchor" |
| return "balanced" |
|
|
|
|
| def _classify_bowler(balls: int, runs: int, wickets: int, death_balls: int, dots: int, bowler_type: str) -> str: |
| economy = 6 * runs / max(balls, 1) |
| strike_rate = balls / max(wickets, 1) |
| dot_rate = dots / max(balls, 1) |
| death_share = death_balls / max(balls, 1) |
| if death_share >= 0.30: |
| return "death_specialist" |
| if wickets >= 10 and strike_rate <= 18: |
| return "wicket_taker" |
| if economy <= 7.0 or dot_rate >= 0.42: |
| return "economy" |
| return bowler_type or "pace" |
|
|
|
|
| def build_profiles(records: list[dict]) -> dict: |
| batters = defaultdict(lambda: { |
| "runs": 0, "balls": 0, "boundaries": 0, "dismissals": 0, |
| "phase_runs": defaultdict(int), "phase_balls": defaultdict(int), |
| }) |
| bowlers = defaultdict(lambda: { |
| "runs": 0, "balls": 0, "dots": 0, "wickets": 0, "type_counts": defaultdict(int), |
| "phase_runs": defaultdict(int), "phase_balls": defaultdict(int), "phase_wickets": defaultdict(int), |
| }) |
|
|
| for r in records: |
| if not r.get("legal_delivery", True): |
| continue |
| phase = r.get("phase", "middle") |
| batter = r.get("batter") |
| bowler = r.get("bowler") |
| runs_batter = int(r.get("runs_batter", 0)) |
| runs_total = int(r.get("runs_total", runs_batter)) |
|
|
| if batter: |
| b = batters[batter] |
| b["runs"] += runs_batter |
| b["balls"] += 1 |
| b["boundaries"] += 1 if runs_batter >= 4 else 0 |
| b["dismissals"] += 1 if r.get("wicket") else 0 |
| b["phase_runs"][phase] += runs_batter |
| b["phase_balls"][phase] += 1 |
|
|
| if bowler: |
| w = bowlers[bowler] |
| w["runs"] += runs_total |
| w["balls"] += 1 |
| w["dots"] += 1 if runs_total == 0 else 0 |
| w["wickets"] += 1 if r.get("wicket") and r.get("dismissal_kind") != "run out" else 0 |
| w["type_counts"][r.get("bowler_type", "pace")] += 1 |
| w["phase_runs"][phase] += runs_total |
| w["phase_balls"][phase] += 1 |
| w["phase_wickets"][phase] += 1 if r.get("wicket") else 0 |
|
|
| dream11 = _load_dream11() |
| dot_points = dream11.get("categories", {}).get("bowling", {}).get("items", {}).get("dot_ball", {}).get("points", 1) |
| wicket_points = dream11.get("categories", {}).get("bowling", {}).get("items", {}).get("wicket_excluding_run_out", {}).get("points", 30) |
|
|
| batter_profiles = {} |
| for name, s in batters.items(): |
| balls = s["balls"] |
| if balls < 12: |
| continue |
| death_runs = s["phase_runs"].get("death", 0) |
| style = _classify_batter(s["runs"], balls, s["boundaries"], death_runs) |
| batter_profiles[name] = { |
| "role": "batter", |
| "style": style, |
| "runs": s["runs"], |
| "balls": balls, |
| "strike_rate": round(100 * s["runs"] / max(balls, 1), 2), |
| "boundary_rate": round(s["boundaries"] / max(balls, 1), 4), |
| "dismissal_rate": round(s["dismissals"] / max(balls, 1), 4), |
| "phase_strengths": { |
| phase: round(100 * s["phase_runs"][phase] / max(s["phase_balls"][phase], 1), 2) |
| for phase in ("powerplay", "middle", "death") |
| if s["phase_balls"].get(phase) |
| }, |
| } |
|
|
| bowler_profiles = {} |
| for name, s in bowlers.items(): |
| balls = s["balls"] |
| if balls < 12: |
| continue |
| bowler_type = max(s["type_counts"], key=s["type_counts"].get) if s["type_counts"] else "pace" |
| style = _classify_bowler( |
| balls, s["runs"], s["wickets"], s["phase_balls"].get("death", 0), s["dots"], bowler_type |
| ) |
| bowler_profiles[name] = { |
| "role": "bowler", |
| "type": bowler_type, |
| "style": style, |
| "balls": balls, |
| "runs_conceded": s["runs"], |
| "wickets": s["wickets"], |
| "economy": round(6 * s["runs"] / max(balls, 1), 2), |
| "dot_rate": round(s["dots"] / max(balls, 1), 4), |
| "strike_rate": round(balls / max(s["wickets"], 1), 2), |
| "dream11_pressure_points": s["dots"] * dot_points + s["wickets"] * wicket_points, |
| "phase_strengths": { |
| phase: { |
| "economy": round(6 * s["phase_runs"][phase] / max(s["phase_balls"][phase], 1), 2), |
| "wickets": s["phase_wickets"][phase], |
| } |
| for phase in ("powerplay", "middle", "death") |
| if s["phase_balls"].get(phase) |
| }, |
| } |
|
|
| return { |
| "schema_version": "player_profiles_t20_v1", |
| "batters": batter_profiles, |
| "bowlers": bowler_profiles, |
| } |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--input", default=str(_PROCESSED / "ball_outcomes_t20_v1.pkl")) |
| parser.add_argument("--output", default=str(_PROCESSED / "player_profiles_t20_v1.json")) |
| args = parser.parse_args() |
|
|
| with open(args.input, "rb") as f: |
| records = pickle.load(f) |
|
|
| profiles = build_profiles(records) |
| output = Path(args.output) |
| output.parent.mkdir(parents=True, exist_ok=True) |
| with output.open("w") as f: |
| json.dump(profiles, f, indent=2) |
|
|
| print( |
| f"Wrote {output}: {len(profiles['batters'])} batters, " |
| f"{len(profiles['bowlers'])} bowlers" |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|