Spaces:
Sleeping
Sleeping
Commit ·
3d096fc
1
Parent(s): 86a4911
fix: incremental log writes, openenv.yaml descriptions, openai dep
Browse files- inference.py: write run_output.txt header immediately at run start,
then append each line as it's logged (no more batch write at end)
- train.py train-smoke: same — write header up front, append per step;
remove redundant batch write_text() at end
- openenv.yaml: update task descriptions to match actual 4-rubric reward
(stage2_full was still saying "tool efficiency + format", now correct)
- pyproject.toml: add openai>=1.0.0 to base deps (needed for HF Space
LLM captain mode and inference.py)
Co-Authored-By: Pratinav Seth <seth.pratinav@gmail.com>
- inference.py +9 -7
- openenv.yaml +10 -7
- pyproject.toml +1 -0
- train.py +10 -3
inference.py
CHANGED
|
@@ -380,7 +380,10 @@ async def evaluate(args):
|
|
| 380 |
print(f"Using OpenAI-compatible agent: {args.model}")
|
| 381 |
|
| 382 |
run_dir = _make_inference_run_folder(args.model, args.opponent_mode, args.max_overs)
|
| 383 |
-
|
|
|
|
|
|
|
|
|
|
| 384 |
f"# Inference run: {run_dir.name}",
|
| 385 |
f"timestamp_utc: {datetime.datetime.utcnow().isoformat()}",
|
| 386 |
f"model: {args.model}",
|
|
@@ -391,11 +394,13 @@ async def evaluate(args):
|
|
| 391 |
f"task: {args.task}",
|
| 392 |
f"eval_pack_id: {args.eval_pack_id}",
|
| 393 |
"",
|
| 394 |
-
]
|
|
|
|
| 395 |
|
| 396 |
def _log(msg: str):
|
| 397 |
print(msg)
|
| 398 |
-
|
|
|
|
| 399 |
|
| 400 |
results = []
|
| 401 |
async with CricketCaptainEnv(args.env_url) as env:
|
|
@@ -428,10 +433,7 @@ async def evaluate(args):
|
|
| 428 |
summary_lines.append(f" {key:20s}: mean={statistics.mean(vals):.3f} std={statistics.stdev(vals) if len(vals)>1 else 0:.3f}")
|
| 429 |
_log(summary_lines[-1])
|
| 430 |
|
| 431 |
-
# Write
|
| 432 |
-
(run_dir / "run_output.txt").write_text("\n".join(log_lines) + "\n")
|
| 433 |
-
|
| 434 |
-
# Write README
|
| 435 |
(run_dir / "README.md").write_text(
|
| 436 |
f"## Inference Run: {run_dir.name}\n\n"
|
| 437 |
f"**Date**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"
|
|
|
|
| 380 |
print(f"Using OpenAI-compatible agent: {args.model}")
|
| 381 |
|
| 382 |
run_dir = _make_inference_run_folder(args.model, args.opponent_mode, args.max_overs)
|
| 383 |
+
log_file = run_dir / "run_output.txt"
|
| 384 |
+
|
| 385 |
+
# Write header immediately so the file exists and is readable while running
|
| 386 |
+
header = "\n".join([
|
| 387 |
f"# Inference run: {run_dir.name}",
|
| 388 |
f"timestamp_utc: {datetime.datetime.utcnow().isoformat()}",
|
| 389 |
f"model: {args.model}",
|
|
|
|
| 394 |
f"task: {args.task}",
|
| 395 |
f"eval_pack_id: {args.eval_pack_id}",
|
| 396 |
"",
|
| 397 |
+
])
|
| 398 |
+
log_file.write_text(header)
|
| 399 |
|
| 400 |
def _log(msg: str):
|
| 401 |
print(msg)
|
| 402 |
+
with open(log_file, "a") as f:
|
| 403 |
+
f.write(msg + "\n")
|
| 404 |
|
| 405 |
results = []
|
| 406 |
async with CricketCaptainEnv(args.env_url) as env:
|
|
|
|
| 433 |
summary_lines.append(f" {key:20s}: mean={statistics.mean(vals):.3f} std={statistics.stdev(vals) if len(vals)>1 else 0:.3f}")
|
| 434 |
_log(summary_lines[-1])
|
| 435 |
|
| 436 |
+
# Write README (always at end — has final summary)
|
|
|
|
|
|
|
|
|
|
| 437 |
(run_dir / "README.md").write_text(
|
| 438 |
f"## Inference Run: {run_dir.name}\n\n"
|
| 439 |
f"**Date**: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"
|
openenv.yaml
CHANGED
|
@@ -6,20 +6,23 @@ app: server.app:app
|
|
| 6 |
port: 8000
|
| 7 |
|
| 8 |
description: >
|
| 9 |
-
CricketCaptain-LLM trains
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
tasks:
|
| 15 |
- name: stage1_format
|
| 16 |
-
description: "5-over mini-match.
|
| 17 |
difficulty: easy
|
| 18 |
|
| 19 |
- name: stage2_full
|
| 20 |
-
description: "20-over match. Full
|
| 21 |
difficulty: medium
|
| 22 |
|
| 23 |
- name: eval_50over
|
| 24 |
-
description: "Full 50-over ODI. Evaluation
|
| 25 |
difficulty: hard
|
|
|
|
| 6 |
port: 8000
|
| 7 |
|
| 8 |
description: >
|
| 9 |
+
CricketCaptain-LLM trains LLMs to exhibit strategic coherence — aligning
|
| 10 |
+
declared intentions with executed actions across 300 sequential decisions.
|
| 11 |
+
The agent uses 12 tools (toss, match-plan, batting, bowling, reflection)
|
| 12 |
+
and is scored on four rubrics: match outcome (55%), cricket contribution (25%),
|
| 13 |
+
behavioral coherence (15%), and tool-call validity (5%).
|
| 14 |
+
Two-sided: a live or heuristic LLM opponent plays the opposing team.
|
| 15 |
+
Two-stage ToolRL curriculum (format mastery → full strategic reward).
|
| 16 |
|
| 17 |
tasks:
|
| 18 |
- name: stage1_format
|
| 19 |
+
description: "5-over mini-match. r_validity only — teaches valid JSON tool-call structure (ToolRL Stage 1)."
|
| 20 |
difficulty: easy
|
| 21 |
|
| 22 |
- name: stage2_full
|
| 23 |
+
description: "20-over match. Full reward: r_result (55%) + r_cricket (25%) + r_behavior (15%) + r_validity (5%)."
|
| 24 |
difficulty: medium
|
| 25 |
|
| 26 |
- name: eval_50over
|
| 27 |
+
description: "Full 50-over ODI. Evaluation benchmark — measures trained coherence vs DLS par."
|
| 28 |
difficulty: hard
|
pyproject.toml
CHANGED
|
@@ -12,6 +12,7 @@ dependencies = [
|
|
| 12 |
"numpy>=1.24.0",
|
| 13 |
"pyyaml>=6.0.1",
|
| 14 |
"gradio>=4.0.0",
|
|
|
|
| 15 |
]
|
| 16 |
|
| 17 |
[project.optional-dependencies]
|
|
|
|
| 12 |
"numpy>=1.24.0",
|
| 13 |
"pyyaml>=6.0.1",
|
| 14 |
"gradio>=4.0.0",
|
| 15 |
+
"openai>=1.0.0",
|
| 16 |
]
|
| 17 |
|
| 18 |
[project.optional-dependencies]
|
train.py
CHANGED
|
@@ -717,11 +717,19 @@ def train_smoke(args):
|
|
| 717 |
run_dir = _make_run_folder("train_smoke", model_hint, args.opponent_mode, args.max_overs)
|
| 718 |
output_path = run_dir / "run_output.txt"
|
| 719 |
|
| 720 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 721 |
|
| 722 |
def log(msg: str):
|
| 723 |
print(msg)
|
| 724 |
-
|
|
|
|
| 725 |
|
| 726 |
log("# Training smoke: direct CricketEnvironment rollout")
|
| 727 |
log(f"matches={args.matches} max_overs={args.max_overs} opponent_mode={args.opponent_mode}")
|
|
@@ -820,7 +828,6 @@ def train_smoke(args):
|
|
| 820 |
log(f"mean_adaptation={(sum(state.adaptation_scores) / len(state.adaptation_scores)) if state.adaptation_scores else 0.0:.3f}")
|
| 821 |
log(f"mean_opponent_awareness={(sum(state.opponent_awareness_scores) / len(state.opponent_awareness_scores)) if state.opponent_awareness_scores else 0.0:.3f}")
|
| 822 |
|
| 823 |
-
output_path.write_text("\n".join(lines) + "\n")
|
| 824 |
print(f"\nwrote={output_path}")
|
| 825 |
|
| 826 |
# Write README for the run
|
|
|
|
| 717 |
run_dir = _make_run_folder("train_smoke", model_hint, args.opponent_mode, args.max_overs)
|
| 718 |
output_path = run_dir / "run_output.txt"
|
| 719 |
|
| 720 |
+
# Write header immediately so the file exists while the run is in progress
|
| 721 |
+
header_lines = [
|
| 722 |
+
"# Training smoke: direct CricketEnvironment rollout",
|
| 723 |
+
f"matches={args.matches} max_overs={args.max_overs} opponent_mode={args.opponent_mode}",
|
| 724 |
+
"purpose=verify one short training-style match rollout, prompt collection, tool stepping, and terminal reward",
|
| 725 |
+
"",
|
| 726 |
+
]
|
| 727 |
+
output_path.write_text("\n".join(header_lines))
|
| 728 |
|
| 729 |
def log(msg: str):
|
| 730 |
print(msg)
|
| 731 |
+
with open(output_path, "a") as _f:
|
| 732 |
+
_f.write(msg + "\n")
|
| 733 |
|
| 734 |
log("# Training smoke: direct CricketEnvironment rollout")
|
| 735 |
log(f"matches={args.matches} max_overs={args.max_overs} opponent_mode={args.opponent_mode}")
|
|
|
|
| 828 |
log(f"mean_adaptation={(sum(state.adaptation_scores) / len(state.adaptation_scores)) if state.adaptation_scores else 0.0:.3f}")
|
| 829 |
log(f"mean_opponent_awareness={(sum(state.opponent_awareness_scores) / len(state.opponent_awareness_scores)) if state.opponent_awareness_scores else 0.0:.3f}")
|
| 830 |
|
|
|
|
| 831 |
print(f"\nwrote={output_path}")
|
| 832 |
|
| 833 |
# Write README for the run
|