Spaces:

ycwhencpp
/

final-iteration

Paused

HF Job: train_grpo run output

17149c8 verified 12 days ago

1.04 kB

	{
	"model": "Qwen/Qwen2.5-3B-Instruct",
	"training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase",
	"phases": [],
	"rounds_per_phase": 3,
	"episodes_per_round": 6,
	"before": {
	"monthly_engage": 1.0,
	"monthly_strategic": 0.8357,
	"monthly_competitive": 0.9414
	},
	"after": {
	"monthly_engage": 0.999,
	"monthly_strategic": 0.9321439559505211,
	"monthly_competitive": 0.999
	},
	"smart_heuristic": {
	"monthly_engage": 0.7519,
	"monthly_strategic": 0.9101,
	"monthly_competitive": 0.9141
	},
	"improvement": {
	"monthly_engage": -0.0010000000000000009,
	"monthly_strategic": 0.09644395595052113,
	"monthly_competitive": 0.057599999999999985
	},
	"training_log": {
	"phase": [],
	"round": [],
	"global_step": [],
	"use_hint": [],
	"avg_episode_reward": [],
	"max_episode_reward": [],
	"min_episode_reward": [],
	"avg_grader": [],
	"max_grader": [],
	"n_training_samples": [],
	"train_loss": []
	}
	}