pratinavseth pratinavseth commited on
Commit
3d096fc
·
1 Parent(s): 86a4911

fix: incremental log writes, openenv.yaml descriptions, openai dep

Browse files

- inference.py: write run_output.txt header immediately at run start,
then append each line as it's logged (no more batch write at end)
- train.py train-smoke: same — write header up front, append per step;
remove redundant batch write_text() at end
- openenv.yaml: update task descriptions to match actual 4-rubric reward
(stage2_full was still saying "tool efficiency + format", now correct)
- pyproject.toml: add openai>=1.0.0 to base deps (needed for HF Space
LLM captain mode and inference.py)

Co-Authored-By: Pratinav Seth <seth.pratinav@gmail.com>

Files changed (4) hide show
  1. inference.py +9 -7
  2. openenv.yaml +10 -7
  3. pyproject.toml +1 -0
  4. train.py +10 -3
inference.py CHANGED
@@ -380,7 +380,10 @@ async def evaluate(args):
380
  print(f"Using OpenAI-compatible agent: {args.model}")
381
 
382
  run_dir = _make_inference_run_folder(args.model, args.opponent_mode, args.max_overs)
383
- log_lines: list[str] = [
 
 
 
384
  f"# Inference run: {run_dir.name}",
385
  f"timestamp_utc: {datetime.datetime.utcnow().isoformat()}",
386
  f"model: {args.model}",
@@ -391,11 +394,13 @@ async def evaluate(args):
391
  f"task: {args.task}",
392
  f"eval_pack_id: {args.eval_pack_id}",
393
  "",
394
- ]
 
395
 
396
  def _log(msg: str):
397
  print(msg)
398
- log_lines.append(msg)
 
399
 
400
  results = []
401
  async with CricketCaptainEnv(args.env_url) as env:
@@ -428,10 +433,7 @@ async def evaluate(args):
428
  summary_lines.append(f" {key:20s}: mean={statistics.mean(vals):.3f} std={statistics.stdev(vals) if len(vals)>1 else 0:.3f}")
429
  _log(summary_lines[-1])
430
 
431
- # Write run_output.txt
432
- (run_dir / "run_output.txt").write_text("\n".join(log_lines) + "\n")
433
-
434
- # Write README
435
  (run_dir / "README.md").write_text(
436
  f"## Inference Run: {run_dir.name}\n\n"
437
  f"**Date**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"
 
380
  print(f"Using OpenAI-compatible agent: {args.model}")
381
 
382
  run_dir = _make_inference_run_folder(args.model, args.opponent_mode, args.max_overs)
383
+ log_file = run_dir / "run_output.txt"
384
+
385
+ # Write header immediately so the file exists and is readable while running
386
+ header = "\n".join([
387
  f"# Inference run: {run_dir.name}",
388
  f"timestamp_utc: {datetime.datetime.utcnow().isoformat()}",
389
  f"model: {args.model}",
 
394
  f"task: {args.task}",
395
  f"eval_pack_id: {args.eval_pack_id}",
396
  "",
397
+ ])
398
+ log_file.write_text(header)
399
 
400
  def _log(msg: str):
401
  print(msg)
402
+ with open(log_file, "a") as f:
403
+ f.write(msg + "\n")
404
 
405
  results = []
406
  async with CricketCaptainEnv(args.env_url) as env:
 
433
  summary_lines.append(f" {key:20s}: mean={statistics.mean(vals):.3f} std={statistics.stdev(vals) if len(vals)>1 else 0:.3f}")
434
  _log(summary_lines[-1])
435
 
436
+ # Write README (always at end — has final summary)
 
 
 
437
  (run_dir / "README.md").write_text(
438
  f"## Inference Run: {run_dir.name}\n\n"
439
  f"**Date**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"
openenv.yaml CHANGED
@@ -6,20 +6,23 @@ app: server.app:app
6
  port: 8000
7
 
8
  description: >
9
- CricketCaptain-LLM trains strategic coherence in LLMs via a T20/ODI cricket
10
- environment. The agent must set strategy, analyze situations, and play deliveries
11
- in a way that is internally consistent rewarding alignment between declared
12
- intent and executed actions.
 
 
 
13
 
14
  tasks:
15
  - name: stage1_format
16
- description: "5-over mini-match. Format reward only — trains valid JSON tool calls."
17
  difficulty: easy
18
 
19
  - name: stage2_full
20
- description: "20-over match. Full 4-rubric reward: cricket outcome + coherence + tool efficiency + format."
21
  difficulty: medium
22
 
23
  - name: eval_50over
24
- description: "Full 50-over ODI. Evaluation onlyno training."
25
  difficulty: hard
 
6
  port: 8000
7
 
8
  description: >
9
+ CricketCaptain-LLM trains LLMs to exhibit strategic coherence aligning
10
+ declared intentions with executed actions across 300 sequential decisions.
11
+ The agent uses 12 tools (toss, match-plan, batting, bowling, reflection)
12
+ and is scored on four rubrics: match outcome (55%), cricket contribution (25%),
13
+ behavioral coherence (15%), and tool-call validity (5%).
14
+ Two-sided: a live or heuristic LLM opponent plays the opposing team.
15
+ Two-stage ToolRL curriculum (format mastery → full strategic reward).
16
 
17
  tasks:
18
  - name: stage1_format
19
+ description: "5-over mini-match. r_validity only — teaches valid JSON tool-call structure (ToolRL Stage 1)."
20
  difficulty: easy
21
 
22
  - name: stage2_full
23
+ description: "20-over match. Full reward: r_result (55%) + r_cricket (25%) + r_behavior (15%) + r_validity (5%)."
24
  difficulty: medium
25
 
26
  - name: eval_50over
27
+ description: "Full 50-over ODI. Evaluation benchmarkmeasures trained coherence vs DLS par."
28
  difficulty: hard
pyproject.toml CHANGED
@@ -12,6 +12,7 @@ dependencies = [
12
  "numpy>=1.24.0",
13
  "pyyaml>=6.0.1",
14
  "gradio>=4.0.0",
 
15
  ]
16
 
17
  [project.optional-dependencies]
 
12
  "numpy>=1.24.0",
13
  "pyyaml>=6.0.1",
14
  "gradio>=4.0.0",
15
+ "openai>=1.0.0",
16
  ]
17
 
18
  [project.optional-dependencies]
train.py CHANGED
@@ -717,11 +717,19 @@ def train_smoke(args):
717
  run_dir = _make_run_folder("train_smoke", model_hint, args.opponent_mode, args.max_overs)
718
  output_path = run_dir / "run_output.txt"
719
 
720
- lines: list[str] = []
 
 
 
 
 
 
 
721
 
722
  def log(msg: str):
723
  print(msg)
724
- lines.append(msg)
 
725
 
726
  log("# Training smoke: direct CricketEnvironment rollout")
727
  log(f"matches={args.matches} max_overs={args.max_overs} opponent_mode={args.opponent_mode}")
@@ -820,7 +828,6 @@ def train_smoke(args):
820
  log(f"mean_adaptation={(sum(state.adaptation_scores) / len(state.adaptation_scores)) if state.adaptation_scores else 0.0:.3f}")
821
  log(f"mean_opponent_awareness={(sum(state.opponent_awareness_scores) / len(state.opponent_awareness_scores)) if state.opponent_awareness_scores else 0.0:.3f}")
822
 
823
- output_path.write_text("\n".join(lines) + "\n")
824
  print(f"\nwrote={output_path}")
825
 
826
  # Write README for the run
 
717
  run_dir = _make_run_folder("train_smoke", model_hint, args.opponent_mode, args.max_overs)
718
  output_path = run_dir / "run_output.txt"
719
 
720
+ # Write header immediately so the file exists while the run is in progress
721
+ header_lines = [
722
+ "# Training smoke: direct CricketEnvironment rollout",
723
+ f"matches={args.matches} max_overs={args.max_overs} opponent_mode={args.opponent_mode}",
724
+ "purpose=verify one short training-style match rollout, prompt collection, tool stepping, and terminal reward",
725
+ "",
726
+ ]
727
+ output_path.write_text("\n".join(header_lines))
728
 
729
  def log(msg: str):
730
  print(msg)
731
+ with open(output_path, "a") as _f:
732
+ _f.write(msg + "\n")
733
 
734
  log("# Training smoke: direct CricketEnvironment rollout")
735
  log(f"matches={args.matches} max_overs={args.max_overs} opponent_mode={args.opponent_mode}")
 
828
  log(f"mean_adaptation={(sum(state.adaptation_scores) / len(state.adaptation_scores)) if state.adaptation_scores else 0.0:.3f}")
829
  log(f"mean_opponent_awareness={(sum(state.opponent_awareness_scores) / len(state.opponent_awareness_scores)) if state.opponent_awareness_scores else 0.0:.3f}")
830
 
 
831
  print(f"\nwrote={output_path}")
832
 
833
  # Write README for the run