RegTech-4B-Instruct / training_report.json
MwSpace's picture
Upload RegTech-4B-Instruct
1b68735 verified
{
"model_base": "Qwen/Qwen3-4B-Instruct-2507",
"model_name": "RegTech-4B-Instruct",
"dataset": "./train.jsonl",
"env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.4B",
"train_samples": 2330,
"eval_samples": 258,
"params": {
"rank": 4,
"alpha": 8,
"dropout": 0.05,
"lr": 1e-05,
"scheduler": "cosine",
"epochs": 1,
"effective_batch": 8,
"max_seq_length": 4096,
"neftune_alpha": 0.0,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"results": {
"total_steps": 292,
"final_train_loss": 1.5045,
"best_eval_loss": 1.601854681968689,
"best_eval_step": 240,
"best_token_accuracy": 0.6812,
"elapsed_minutes": 8.6
},
"loss_history": {
"train": [
[
10,
2.1906
],
[
20,
2.0417
],
[
30,
2.1217
],
[
40,
2.0513
],
[
50,
1.9839
],
[
60,
1.9423
],
[
70,
1.9321
],
[
80,
1.8047
],
[
90,
1.7045
],
[
100,
1.8603
],
[
110,
1.721
],
[
120,
1.6419
],
[
130,
1.5821
],
[
140,
1.5593
],
[
150,
1.4756
],
[
160,
1.4945
],
[
170,
1.5168
],
[
180,
1.5689
],
[
190,
1.3763
],
[
200,
1.5759
],
[
210,
1.477
],
[
220,
1.4889
],
[
230,
1.4514
],
[
240,
1.441
],
[
250,
1.427
],
[
260,
1.4423
],
[
270,
1.4199
],
[
280,
1.457
],
[
290,
1.5045
]
],
"eval": [
[
80,
2.036996841430664
],
[
160,
1.6603444814682007
],
[
240,
1.601854681968689
]
],
"token_accuracy": [
[
80,
0.661
],
[
160,
0.6759
],
[
240,
0.6812
]
]
}
}