pratinavseth's picture
custom: in-process episode driver + CaptainRL rename (mirrors github c17c1ba)
8a86db4 verified
"""
Gradio demo UI for CricketCaptain-LLM.
Two modes:
1. Manual play — human picks tool + args, submits, sees result.
2. Auto-play — AI plays N balls using the built-in RandomAgent (no API key needed).
HF Space: opponent defaults to heuristic; no API key required for basic demo.
Set CRICKET_CAPTAIN_MODEL + HF_TOKEN secrets for live LLM captain/opponent.
"""
import json
import os
import random
import sys
from pathlib import Path
from typing import Any
sys.path.insert(0, str(Path(__file__).parent.parent))
import gradio as gr
from server.cricket_environment import CricketEnvironment
from server.captain_policy import (
OPPONENT_PRESETS,
captain_presets as _captain_presets,
pick_action as _pick_captain_action,
)
from models import CricketAction
# ------------------------------------------------------------------ #
# Constants #
# ------------------------------------------------------------------ #
ALL_TOOLS = [
"call_toss",
"set_match_plan", "update_match_plan",
"select_batter",
"set_strategy", "plan_shot", "play_delivery",
"choose_bowler", "set_bowling_strategy", "plan_delivery",
"set_field_setting", "bowl_delivery",
"reflect_after_ball", "analyze_situation",
]
SHOT_INTENTS = ["leave", "defensive", "single", "rotate", "boundary", "six"]
# Captain + opponent presets are now defined in server.captain_policy so the
# /custom cockpit driver and the /web Gradio UI share the same lookup table.
# ------------------------------------------------------------------ #
# Scorecard / metrics renderers #
# ------------------------------------------------------------------ #
def _scorecard(obs) -> str:
if obs is None:
return "*Click **New Match** to begin.*"
ctx = obs.game_context
strat = obs.declared_strategy
bowl = obs.bowling_strategy
opp = obs.opponent_plan
lines = [
f"### {ctx.get('game_state','').upper()}{ctx.get('innings','first').upper()} INNINGS",
f"**Over** {ctx.get('over',0)}.{ctx.get('ball',0)}  |  "
f"**Score** {ctx.get('score',0)}/{ctx.get('wickets',0)}  |  "
f"**RR** {ctx.get('run_rate',0.0):.2f}",
]
if ctx.get("target"):
need = ctx["target"] - ctx.get("score", 0)
rrr = ctx.get("req_rate", 0.0)
lines.append(f"**Target** {ctx['target']}  |  **Need** {need}  |  **RRR** {rrr:.1f}")
lines.append(
f"**Phase** `{ctx.get('phase','?').upper()}`  |  "
f"**Bowler type** `{ctx.get('bowler_type','?').upper()}`  |  "
f"**Field** `{ctx.get('field_setting','Balanced')}`"
)
lines.append("")
if obs.game_state == "batting":
if strat:
lines.append(f"**Strategy** {strat.get('phase_intent','?')} (agg={strat.get('aggression',0):.2f})")
rat = strat.get("rationale", "")
if rat:
lines.append(f"*{rat[:100]}*")
else:
lines.append("*No batting strategy set yet.*")
elif obs.game_state == "bowling":
if bowl:
lines.append(
f"**Bowl plan** {bowl.get('delivery_type','?')} · "
f"{bowl.get('line','?')} · {bowl.get('length','?')}"
)
if opp and opp.get("shot_intent"):
lines.append(f"**Opponent intent** `{opp.get('shot_intent','?')}` (agg={opp.get('aggression',0):.2f})")
elif obs.game_state == "toss":
lines.append("*Waiting for toss call…*")
last = obs.last_ball_result or ""
if last:
lines.append(f"\n> 🏏 {last}")
available = obs.available_tools or []
lines.append(f"\n**Available tools:** " + " ".join(f"`{t}`" for t in available))
return "\n".join(lines)
def _metrics(env) -> str:
if env is None or not hasattr(env, "_state"):
return "No match started."
s = env._state
def _avg(lst): return sum(lst)/len(lst) if lst else 0.0
lines = [
f"**Coherence** {_avg(s.coherence_scores):.3f}  |  "
f"**Adaptation** {_avg(s.adaptation_scores):.3f}  |  "
f"**Opp-awareness** {_avg(s.opponent_awareness_scores):.3f}",
f"**Plan-commit** {_avg(s.plan_commitment_scores):.3f}  |  "
f"**Tool calls** {s.tool_calls_made}  |  "
f"**r_validity** {'1.0 ✅' if s.tool_calls_made > 0 else '—'}",
]
return "\n".join(lines)
# ------------------------------------------------------------------ #
# Random auto-play agent #
# ------------------------------------------------------------------ #
def _captain_action(obs, preset_key: str = "heuristic") -> CricketAction:
"""Pick a captain action via the named preset (delegates to captain_policy)."""
return _pick_captain_action(obs, preset_key, _auto_action, prompt_render=_scorecard)
def _auto_action(obs) -> CricketAction:
available = obs.available_tools or []
state = obs.game_state
phase = obs.strategic_phase
if "call_toss" in available:
return CricketAction(tool="call_toss", arguments={"call": "heads", "decision": "bat"})
if state == "bowling":
if "set_bowling_strategy" in available and phase in ("pre_over", "pre_ball") and random.random() < 0.3:
return CricketAction(tool="set_bowling_strategy", arguments={
"bowler_type": "pace", "line": "outside off", "length": "good length",
"delivery_type": "stock", "rationale": "Target corridor of uncertainty.",
})
if "plan_delivery" in available and phase == "pre_ball" and random.random() < 0.35:
return CricketAction(tool="plan_delivery", arguments={
"bowler_type": "pace", "line": "outside off", "length": "full",
"delivery_type": "outswinger", "rationale": "Test the outside edge.",
})
if "bowl_delivery" in available:
return CricketAction(tool="bowl_delivery", arguments={})
if "reflect_after_ball" in available and random.random() < 0.4:
return CricketAction(tool="reflect_after_ball", arguments={"reflection": "Maintain pressure."})
if "set_field_setting" in available:
return CricketAction(tool="set_field_setting", arguments={"setting": random.choice(["Aggressive","Balanced"])})
if available:
return CricketAction(tool=available[0], arguments={})
if state == "batting":
if "set_strategy" in available and not obs.declared_strategy and random.random() < 0.7:
return CricketAction(tool="set_strategy", arguments={
"phase_intent": "attack", "aggression": 0.6,
"rationale": "Powerplay — push for boundaries while wickets are in hand.",
})
if "plan_shot" in available and random.random() < 0.25:
return CricketAction(tool="plan_shot", arguments={
"shot_intent": "boundary", "target_area": "cover",
"risk": "medium", "trajectory": "ground",
"rationale": "Drive through cover gap.",
})
if "play_delivery" in available:
shot = random.choices(SHOT_INTENTS, weights=[5,15,25,20,25,10], k=1)[0]
return CricketAction(tool="play_delivery", arguments={
"shot_intent": shot, "explanation": f"Going for {shot}.",
})
if "reflect_after_ball" in available and random.random() < 0.35:
return CricketAction(tool="reflect_after_ball", arguments={"reflection": "Adjust based on outcome."})
if available:
return CricketAction(tool=available[0], arguments={})
return CricketAction(tool=available[0] if available else "analyze_situation", arguments={})
# ------------------------------------------------------------------ #
# Gradio UI #
# ------------------------------------------------------------------ #
def build_ui(
web_manager: Any = None,
action_fields: list | None = None,
metadata: Any = None,
is_chat_env: bool = False,
title: str = "CaptainRL Demo",
quick_start_md: str | None = None,
) -> gr.Blocks:
"""Render the OpenEnv "Custom" tab.
The full cricket cockpit SPA (live field view, captain's mind, training
metrics, Cartesia commentary audio) is served at `/custom`. Embedding it
here via an iframe gives the OpenEnv web-interface the same experience
without duplicating UI code in Gradio.
"""
iframe_html = """
<div style=\"width:100%; height:88vh; min-height:720px; border-radius:10px; overflow:hidden;\">
<iframe
src=\"/custom\"
title=\"Cricket Cockpit\"
allow=\"autoplay; clipboard-read; clipboard-write\"
style=\"width:100%; height:100%; border:0; background:#0b0d0e;\"
></iframe>
</div>
"""
with gr.Blocks(
title="CaptainRL",
theme=gr.themes.Soft(primary_hue="teal", secondary_hue="blue"),
css="footer { display: none !important; }",
) as demo:
gr.HTML(iframe_html)
return demo
if __name__ == "__main__":
build_ui().launch(server_name="0.0.0.0", server_port=7860, share=False)