Spaces:

ycwhencpp
/

final-iteration

Paused

Upload folder using huggingface_hub

3419724 verified about 1 month ago

1.09 kB

	{
	"model": "Qwen/Qwen2.5-3B-Instruct",
	"training": "LoRA SFT (real weight updates)",
	"rounds": 2,
	"episodes_per_round": 6,
	"before": {
	"monthly_engage": 1.0,
	"monthly_strategic": 0.8426,
	"monthly_competitive": 0.9521
	},
	"after": {
	"monthly_engage": 1.0,
	"monthly_strategic": 0.8416,
	"monthly_competitive": 0.964
	},
	"smart_heuristic": {
	"monthly_engage": 0.7352,
	"monthly_strategic": 0.9043,
	"monthly_competitive": 0.9066
	},
	"improvement": {
	"monthly_engage": 0.0,
	"monthly_strategic": -0.0010000000000000009,
	"monthly_competitive": 0.011900000000000022
	},
	"training_log": {
	"round": [
	1,
	2
	],
	"avg_episode_reward": [
	3.904,
	4.215
	],
	"max_episode_reward": [
	4.514,
	4.658
	],
	"min_episode_reward": [
	3.287,
	3.566
	],
	"avg_grader": [
	0.6202,
	0.7325
	],
	"max_grader": [
	0.8268,
	0.8703
	],
	"n_training_samples": [
	101,
	102
	],
	"train_loss": [
	2.6723,
	2.5934
	]
	}
	}