sync: docs, training page fixes, OpenEnv SFT demo notebook
Browse files- dashboard/api.py +5 -0
dashboard/api.py
CHANGED
|
@@ -342,6 +342,7 @@ def _training_status_payload() -> dict[str, Any]:
|
|
| 342 |
eval_path = _RESULTS_DIR / "eval_results.json"
|
| 343 |
grpo: float | None = None
|
| 344 |
base: float | None = None
|
|
|
|
| 345 |
rnd: float | None = None
|
| 346 |
has_results = False
|
| 347 |
if eval_path.is_file():
|
|
@@ -350,11 +351,14 @@ def _training_status_payload() -> dict[str, Any]:
|
|
| 350 |
has_results = True
|
| 351 |
grpo = raw.get("grpo_mean_reward")
|
| 352 |
base = raw.get("base_mean_reward")
|
|
|
|
| 353 |
rnd = raw.get("random_mean_reward")
|
| 354 |
if grpo is not None:
|
| 355 |
grpo = float(grpo)
|
| 356 |
if base is not None:
|
| 357 |
base = float(base)
|
|
|
|
|
|
|
| 358 |
if rnd is not None:
|
| 359 |
rnd = float(rnd)
|
| 360 |
except Exception: # noqa: BLE001
|
|
@@ -407,6 +411,7 @@ def _training_status_payload() -> dict[str, Any]:
|
|
| 407 |
return {
|
| 408 |
"has_results": has_results,
|
| 409 |
"grpo_mean_reward": grpo,
|
|
|
|
| 410 |
"base_mean_reward": base,
|
| 411 |
"random_mean_reward": rnd,
|
| 412 |
"model_on_hub": bool(repo),
|
|
|
|
| 342 |
eval_path = _RESULTS_DIR / "eval_results.json"
|
| 343 |
grpo: float | None = None
|
| 344 |
base: float | None = None
|
| 345 |
+
sft: float | None = None
|
| 346 |
rnd: float | None = None
|
| 347 |
has_results = False
|
| 348 |
if eval_path.is_file():
|
|
|
|
| 351 |
has_results = True
|
| 352 |
grpo = raw.get("grpo_mean_reward")
|
| 353 |
base = raw.get("base_mean_reward")
|
| 354 |
+
sft = raw.get("sft_mean_reward")
|
| 355 |
rnd = raw.get("random_mean_reward")
|
| 356 |
if grpo is not None:
|
| 357 |
grpo = float(grpo)
|
| 358 |
if base is not None:
|
| 359 |
base = float(base)
|
| 360 |
+
if sft is not None:
|
| 361 |
+
sft = float(sft)
|
| 362 |
if rnd is not None:
|
| 363 |
rnd = float(rnd)
|
| 364 |
except Exception: # noqa: BLE001
|
|
|
|
| 411 |
return {
|
| 412 |
"has_results": has_results,
|
| 413 |
"grpo_mean_reward": grpo,
|
| 414 |
+
"sft_mean_reward": sft,
|
| 415 |
"base_mean_reward": base,
|
| 416 |
"random_mean_reward": rnd,
|
| 417 |
"model_on_hub": bool(repo),
|