| { | |
| "model_base": "Qwen/Qwen3-4B-Instruct-2507", | |
| "model_name": "RegTech-4B-Instruct", | |
| "dataset": "./train.jsonl", | |
| "env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.4B", | |
| "train_samples": 2330, | |
| "eval_samples": 258, | |
| "params": { | |
| "rank": 4, | |
| "alpha": 8, | |
| "dropout": 0.05, | |
| "lr": 1e-05, | |
| "scheduler": "cosine", | |
| "epochs": 1, | |
| "effective_batch": 8, | |
| "max_seq_length": 4096, | |
| "neftune_alpha": 0.0, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| }, | |
| "results": { | |
| "total_steps": 292, | |
| "final_train_loss": 1.5045, | |
| "best_eval_loss": 1.601854681968689, | |
| "best_eval_step": 240, | |
| "best_token_accuracy": 0.6812, | |
| "elapsed_minutes": 8.6 | |
| }, | |
| "loss_history": { | |
| "train": [ | |
| [ | |
| 10, | |
| 2.1906 | |
| ], | |
| [ | |
| 20, | |
| 2.0417 | |
| ], | |
| [ | |
| 30, | |
| 2.1217 | |
| ], | |
| [ | |
| 40, | |
| 2.0513 | |
| ], | |
| [ | |
| 50, | |
| 1.9839 | |
| ], | |
| [ | |
| 60, | |
| 1.9423 | |
| ], | |
| [ | |
| 70, | |
| 1.9321 | |
| ], | |
| [ | |
| 80, | |
| 1.8047 | |
| ], | |
| [ | |
| 90, | |
| 1.7045 | |
| ], | |
| [ | |
| 100, | |
| 1.8603 | |
| ], | |
| [ | |
| 110, | |
| 1.721 | |
| ], | |
| [ | |
| 120, | |
| 1.6419 | |
| ], | |
| [ | |
| 130, | |
| 1.5821 | |
| ], | |
| [ | |
| 140, | |
| 1.5593 | |
| ], | |
| [ | |
| 150, | |
| 1.4756 | |
| ], | |
| [ | |
| 160, | |
| 1.4945 | |
| ], | |
| [ | |
| 170, | |
| 1.5168 | |
| ], | |
| [ | |
| 180, | |
| 1.5689 | |
| ], | |
| [ | |
| 190, | |
| 1.3763 | |
| ], | |
| [ | |
| 200, | |
| 1.5759 | |
| ], | |
| [ | |
| 210, | |
| 1.477 | |
| ], | |
| [ | |
| 220, | |
| 1.4889 | |
| ], | |
| [ | |
| 230, | |
| 1.4514 | |
| ], | |
| [ | |
| 240, | |
| 1.441 | |
| ], | |
| [ | |
| 250, | |
| 1.427 | |
| ], | |
| [ | |
| 260, | |
| 1.4423 | |
| ], | |
| [ | |
| 270, | |
| 1.4199 | |
| ], | |
| [ | |
| 280, | |
| 1.457 | |
| ], | |
| [ | |
| 290, | |
| 1.5045 | |
| ] | |
| ], | |
| "eval": [ | |
| [ | |
| 80, | |
| 2.036996841430664 | |
| ], | |
| [ | |
| 160, | |
| 1.6603444814682007 | |
| ], | |
| [ | |
| 240, | |
| 1.601854681968689 | |
| ] | |
| ], | |
| "token_accuracy": [ | |
| [ | |
| 80, | |
| 0.661 | |
| ], | |
| [ | |
| 160, | |
| 0.6759 | |
| ], | |
| [ | |
| 240, | |
| 0.6812 | |
| ] | |
| ] | |
| } | |
| } |