AniketAsla commited on
Commit
319c96e
·
verified ·
1 Parent(s): c7f3175

deploy: update train/train_minimal.py

Browse files
Files changed (1) hide show
  1. train/train_minimal.py +2 -2
train/train_minimal.py CHANGED
@@ -674,8 +674,8 @@ def save_training_artifacts(trainer, result, before_components=None, after_compo
674
  "global_step": int(getattr(result, "global_step", 0) or 0),
675
  "training_loss": float(getattr(result, "training_loss", 0.0) or 0.0),
676
  "training_reward_curve": {
677
- "type": "unbounded_scalar",
678
- "note": "Direct training_reward() scalar. Not comparable to eval_reward.",
679
  "mean_start": round(float(train_rewards[0]), 4) if train_rewards else None,
680
  "mean_end": round(float(train_rewards[-1]), 4) if train_rewards else None,
681
  },
 
674
  "global_step": int(getattr(result, "global_step", 0) or 0),
675
  "training_loss": float(getattr(result, "training_loss", 0.0) or 0.0),
676
  "training_reward_curve": {
677
+ "type": "env_http_reward",
678
+ "note": "Reward from live environment via POST /reset + /step (MR-2 compliant). Not comparable to eval_reward which is clamped [0,1].",
679
  "mean_start": round(float(train_rewards[0]), 4) if train_rewards else None,
680
  "mean_end": round(float(train_rewards[-1]), 4) if train_rewards else None,
681
  },