Spaces:
Running
Running
deploy: update train/train_minimal.py
Browse files- train/train_minimal.py +2 -2
train/train_minimal.py
CHANGED
|
@@ -674,8 +674,8 @@ def save_training_artifacts(trainer, result, before_components=None, after_compo
|
|
| 674 |
"global_step": int(getattr(result, "global_step", 0) or 0),
|
| 675 |
"training_loss": float(getattr(result, "training_loss", 0.0) or 0.0),
|
| 676 |
"training_reward_curve": {
|
| 677 |
-
"type": "
|
| 678 |
-
"note": "
|
| 679 |
"mean_start": round(float(train_rewards[0]), 4) if train_rewards else None,
|
| 680 |
"mean_end": round(float(train_rewards[-1]), 4) if train_rewards else None,
|
| 681 |
},
|
|
|
|
| 674 |
"global_step": int(getattr(result, "global_step", 0) or 0),
|
| 675 |
"training_loss": float(getattr(result, "training_loss", 0.0) or 0.0),
|
| 676 |
"training_reward_curve": {
|
| 677 |
+
"type": "env_http_reward",
|
| 678 |
+
"note": "Reward from live environment via POST /reset + /step (MR-2 compliant). Not comparable to eval_reward which is clamped [0,1].",
|
| 679 |
"mean_start": round(float(train_rewards[0]), 4) if train_rewards else None,
|
| 680 |
"mean_end": round(float(train_rewards[-1]), 4) if train_rewards else None,
|
| 681 |
},
|