Spaces:

pratinavseth
/

cricket-captain-llm

Sleeping

App Files Files Community

pratinavseth

pratinavseth commited on 29 days ago

Commit

3d096fc

1 Parent(s): 86a4911

fix: incremental log writes, openenv.yaml descriptions, openai dep

Browse files

- inference.py: write run_output.txt header immediately at run start,
then append each line as it's logged (no more batch write at end)
- train.py train-smoke: same — write header up front, append per step;
remove redundant batch write_text() at end
- openenv.yaml: update task descriptions to match actual 4-rubric reward
(stage2_full was still saying "tool efficiency + format", now correct)
- pyproject.toml: add openai>=1.0.0 to base deps (needed for HF Space
LLM captain mode and inference.py)

Co-Authored-By: Pratinav Seth <seth.pratinav@gmail.com>

Files changed (4) hide show

inference.py +9 -7
openenv.yaml +10 -7
pyproject.toml +1 -0
train.py +10 -3

inference.py CHANGED Viewed

@@ -380,7 +380,10 @@ async def evaluate(args):
         print(f"Using OpenAI-compatible agent: {args.model}")
     run_dir = _make_inference_run_folder(args.model, args.opponent_mode, args.max_overs)
-    log_lines: list[str] = [
         f"# Inference run: {run_dir.name}",
         f"timestamp_utc: {datetime.datetime.utcnow().isoformat()}",
         f"model: {args.model}",
@@ -391,11 +394,13 @@ async def evaluate(args):
         f"task: {args.task}",
         f"eval_pack_id: {args.eval_pack_id}",
         "",
-    ]
     def _log(msg: str):
         print(msg)
-        log_lines.append(msg)
     results = []
     async with CricketCaptainEnv(args.env_url) as env:
@@ -428,10 +433,7 @@ async def evaluate(args):
         summary_lines.append(f"  {key:20s}: mean={statistics.mean(vals):.3f}  std={statistics.stdev(vals) if len(vals)>1 else 0:.3f}")
         _log(summary_lines[-1])
-    # Write run_output.txt
-    (run_dir / "run_output.txt").write_text("\n".join(log_lines) + "\n")
-    # Write README
     (run_dir / "README.md").write_text(
         f"## Inference Run: {run_dir.name}\n\n"
         f"**Date**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"

         print(f"Using OpenAI-compatible agent: {args.model}")
     run_dir = _make_inference_run_folder(args.model, args.opponent_mode, args.max_overs)
+    log_file = run_dir / "run_output.txt"
+    # Write header immediately so the file exists and is readable while running
+    header = "\n".join([
         f"# Inference run: {run_dir.name}",
         f"timestamp_utc: {datetime.datetime.utcnow().isoformat()}",
         f"model: {args.model}",
         f"task: {args.task}",
         f"eval_pack_id: {args.eval_pack_id}",
         "",
+    ])
+    log_file.write_text(header)
     def _log(msg: str):
         print(msg)
+        with open(log_file, "a") as f:
+            f.write(msg + "\n")
     results = []
     async with CricketCaptainEnv(args.env_url) as env:
         summary_lines.append(f"  {key:20s}: mean={statistics.mean(vals):.3f}  std={statistics.stdev(vals) if len(vals)>1 else 0:.3f}")
         _log(summary_lines[-1])
+    # Write README (always at end — has final summary)
     (run_dir / "README.md").write_text(
         f"## Inference Run: {run_dir.name}\n\n"
         f"**Date**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"

openenv.yaml CHANGED Viewed

@@ -6,20 +6,23 @@ app: server.app:app
 port: 8000
 description: >
-  CricketCaptain-LLM trains strategic coherence in LLMs via a T20/ODI cricket
-  environment. The agent must set strategy, analyze situations, and play deliveries
-  in a way that is internally consistent — rewarding alignment between declared
-  intent and executed actions.
 tasks:
   - name: stage1_format
-    description: "5-over mini-match. Format reward only — trains valid JSON tool calls."
     difficulty: easy
   - name: stage2_full
-    description: "20-over match. Full 4-rubric reward: cricket outcome + coherence + tool efficiency + format."
     difficulty: medium
   - name: eval_50over
-    description: "Full 50-over ODI. Evaluation only — no training."
     difficulty: hard

 port: 8000
 description: >
+  CricketCaptain-LLM trains LLMs to exhibit strategic coherence — aligning
+  declared intentions with executed actions across 300 sequential decisions.
+  The agent uses 12 tools (toss, match-plan, batting, bowling, reflection)
+  and is scored on four rubrics: match outcome (55%), cricket contribution (25%),
+  behavioral coherence (15%), and tool-call validity (5%).
+  Two-sided: a live or heuristic LLM opponent plays the opposing team.
+  Two-stage ToolRL curriculum (format mastery → full strategic reward).
 tasks:
   - name: stage1_format
+    description: "5-over mini-match. r_validity only — teaches valid JSON tool-call structure (ToolRL Stage 1)."
     difficulty: easy
   - name: stage2_full
+    description: "20-over match. Full reward: r_result (55%) + r_cricket (25%) + r_behavior (15%) + r_validity (5%)."
     difficulty: medium
   - name: eval_50over
+    description: "Full 50-over ODI. Evaluation benchmark — measures trained coherence vs DLS par."
     difficulty: hard

pyproject.toml CHANGED Viewed

@@ -12,6 +12,7 @@ dependencies = [
     "numpy>=1.24.0",
     "pyyaml>=6.0.1",
     "gradio>=4.0.0",
 ]
 [project.optional-dependencies]

     "numpy>=1.24.0",
     "pyyaml>=6.0.1",
     "gradio>=4.0.0",
+    "openai>=1.0.0",
 ]
 [project.optional-dependencies]

train.py CHANGED Viewed

@@ -717,11 +717,19 @@ def train_smoke(args):
         run_dir = _make_run_folder("train_smoke", model_hint, args.opponent_mode, args.max_overs)
         output_path = run_dir / "run_output.txt"
-    lines: list[str] = []
     def log(msg: str):
         print(msg)
-        lines.append(msg)
     log("# Training smoke: direct CricketEnvironment rollout")
     log(f"matches={args.matches} max_overs={args.max_overs} opponent_mode={args.opponent_mode}")
@@ -820,7 +828,6 @@ def train_smoke(args):
         log(f"mean_adaptation={(sum(state.adaptation_scores) / len(state.adaptation_scores)) if state.adaptation_scores else 0.0:.3f}")
         log(f"mean_opponent_awareness={(sum(state.opponent_awareness_scores) / len(state.opponent_awareness_scores)) if state.opponent_awareness_scores else 0.0:.3f}")
-    output_path.write_text("\n".join(lines) + "\n")
     print(f"\nwrote={output_path}")
     # Write README for the run

         run_dir = _make_run_folder("train_smoke", model_hint, args.opponent_mode, args.max_overs)
         output_path = run_dir / "run_output.txt"
+    # Write header immediately so the file exists while the run is in progress
+    header_lines = [
+        "# Training smoke: direct CricketEnvironment rollout",
+        f"matches={args.matches} max_overs={args.max_overs} opponent_mode={args.opponent_mode}",
+        "purpose=verify one short training-style match rollout, prompt collection, tool stepping, and terminal reward",
+        "",
+    ]
+    output_path.write_text("\n".join(header_lines))
     def log(msg: str):
         print(msg)
+        with open(output_path, "a") as _f:
+            _f.write(msg + "\n")
     log("# Training smoke: direct CricketEnvironment rollout")
     log(f"matches={args.matches} max_overs={args.max_overs} opponent_mode={args.opponent_mode}")
         log(f"mean_adaptation={(sum(state.adaptation_scores) / len(state.adaptation_scores)) if state.adaptation_scores else 0.0:.3f}")
         log(f"mean_opponent_awareness={(sum(state.opponent_awareness_scores) / len(state.opponent_awareness_scores)) if state.opponent_awareness_scores else 0.0:.3f}")
     print(f"\nwrote={output_path}")
     # Write README for the run