Spaces:

ycwhencpp
/

final-iteration

Paused

Upload folder using huggingface_hub

e52d302 verified 15 days ago

1.91 kB

	{
	"model": "Qwen/Qwen2.5-3B-Instruct",
	"training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase",
	"phases": [
	"phase1_timing",
	"phase2_content"
	],
	"rounds_per_phase": 3,
	"episodes_per_round": 6,
	"before": {
	"monthly_engage": 0.0,
	"monthly_strategic": 0.175,
	"monthly_competitive": 0.035
	},
	"after": {
	"monthly_engage": 0.0,
	"monthly_strategic": 0.175,
	"monthly_competitive": 0.035
	},
	"smart_heuristic": {
	"monthly_engage": 0.7519,
	"monthly_strategic": 0.9101,
	"monthly_competitive": 0.9141
	},
	"improvement": {
	"monthly_engage": 0.0,
	"monthly_strategic": 0.0,
	"monthly_competitive": 0.0
	},
	"training_log": {
	"phase": [
	"phase1_timing",
	"phase1_timing",
	"phase1_timing",
	"phase2_content",
	"phase2_content",
	"phase2_content"
	],
	"round": [
	1,
	2,
	3,
	1,
	2,
	3
	],
	"global_step": [
	1,
	2,
	3,
	4,
	5,
	6
	],
	"use_hint": [
	true,
	false,
	false,
	true,
	false,
	false
	],
	"avg_episode_reward": [
	5.127,
	3.04,
	2.867,
	3.538,
	2.15,
	1.924
	],
	"max_episode_reward": [
	5.315,
	3.303,
	3.016,
	3.837,
	2.807,
	2.609
	],
	"min_episode_reward": [
	4.96,
	2.6,
	2.555,
	3.338,
	1.587,
	1.375
	],
	"avg_grader": [
	0.9498,
	0.259,
	0.2083,
	0.8697,
	0.3763,
	0.2855
	],
	"max_grader": [
	1.0,
	0.3614,
	0.3042,
	1.0,
	0.5979,
	0.5027
	],
	"n_training_samples": [
	81,
	96,
	102,
	77,
	90,
	76
	],
	"train_loss": [
	2.833,
	3.1413,
	3.1255,
	2.8381,
	2.9281,
	2.9184
	]
	}
	}