sh4shv4t commited on
Commit
9ef99b8
·
verified ·
1 Parent(s): 90fedec

sync: docs, training page fixes, OpenEnv SFT demo notebook

Browse files
Files changed (1) hide show
  1. dashboard/api.py +5 -0
dashboard/api.py CHANGED
@@ -342,6 +342,7 @@ def _training_status_payload() -> dict[str, Any]:
342
  eval_path = _RESULTS_DIR / "eval_results.json"
343
  grpo: float | None = None
344
  base: float | None = None
 
345
  rnd: float | None = None
346
  has_results = False
347
  if eval_path.is_file():
@@ -350,11 +351,14 @@ def _training_status_payload() -> dict[str, Any]:
350
  has_results = True
351
  grpo = raw.get("grpo_mean_reward")
352
  base = raw.get("base_mean_reward")
 
353
  rnd = raw.get("random_mean_reward")
354
  if grpo is not None:
355
  grpo = float(grpo)
356
  if base is not None:
357
  base = float(base)
 
 
358
  if rnd is not None:
359
  rnd = float(rnd)
360
  except Exception: # noqa: BLE001
@@ -407,6 +411,7 @@ def _training_status_payload() -> dict[str, Any]:
407
  return {
408
  "has_results": has_results,
409
  "grpo_mean_reward": grpo,
 
410
  "base_mean_reward": base,
411
  "random_mean_reward": rnd,
412
  "model_on_hub": bool(repo),
 
342
  eval_path = _RESULTS_DIR / "eval_results.json"
343
  grpo: float | None = None
344
  base: float | None = None
345
+ sft: float | None = None
346
  rnd: float | None = None
347
  has_results = False
348
  if eval_path.is_file():
 
351
  has_results = True
352
  grpo = raw.get("grpo_mean_reward")
353
  base = raw.get("base_mean_reward")
354
+ sft = raw.get("sft_mean_reward")
355
  rnd = raw.get("random_mean_reward")
356
  if grpo is not None:
357
  grpo = float(grpo)
358
  if base is not None:
359
  base = float(base)
360
+ if sft is not None:
361
+ sft = float(sft)
362
  if rnd is not None:
363
  rnd = float(rnd)
364
  except Exception: # noqa: BLE001
 
411
  return {
412
  "has_results": has_results,
413
  "grpo_mean_reward": grpo,
414
+ "sft_mean_reward": sft,
415
  "base_mean_reward": base,
416
  "random_mean_reward": rnd,
417
  "model_on_hub": bool(repo),