File size: 641 Bytes
c86e92d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | {
"base_model": "/workspaces/LLiMba/models/cpt-pretrain-qwen2.5-3b",
"dataset": "/workspaces/LLiMba/data/curated/sft/sft_dataset.jsonl",
"mode": "lora",
"rank": 256,
"alpha": 256,
"dropout": 0.05,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
"lr": 2e-05,
"epochs": 2,
"batch_size": 1,
"grad_accum": 16,
"effective_batch": 16,
"max_length": 4096,
"warmup_steps": 50,
"attention": "flash_attention_2",
"eval_split": 0.05,
"train_loss": 0.867611675270807,
"eval_loss": null,
"train_samples": 13683,
"eval_samples": 721
} |