Spaces:

ycwhencpp
/

final-iteration

Paused

update

c3e9b69 12 days ago

997 Bytes

	{
	"model": "Qwen/Qwen2.5-1.5B-Instruct",
	"training": "LoRA SFT (real weight updates)",
	"rounds": 1,
	"episodes_per_round": 1,
	"before": {
	"monthly_engage": 0.3048,
	"monthly_strategic": 0.3456,
	"monthly_competitive": 0.4808
	},
	"after": {
	"monthly_engage": 0.0162,
	"monthly_strategic": 0.1749,
	"monthly_competitive": 0.3621
	},
	"smart_heuristic": {
	"monthly_engage": 0.6342,
	"monthly_strategic": 0.7218,
	"monthly_competitive": 0.8315
	},
	"improvement": {
	"monthly_engage": -0.2886,
	"monthly_strategic": -0.17070000000000002,
	"monthly_competitive": -0.11870000000000003
	},
	"training_log": {
	"round": [
	1
	],
	"avg_episode_reward": [
	1.593
	],
	"max_episode_reward": [
	1.593
	],
	"min_episode_reward": [
	1.593
	],
	"avg_grader": [
	0.0268
	],
	"max_grader": [
	0.0268
	],
	"n_training_samples": [
	4
	],
	"train_loss": [
	2.3314
	]
	}
	}