retailops-instruct-qwen3.5-9b / training_config.json
kyLELEng's picture
Upload merged RetailOps-Instruct Qwen3.5-9B full model
a750b19 verified
{
"base_model_id": "Qwen/Qwen3.5-9B",
"dataset_repo_id": "kyLELEng/retailops-instruct-sft",
"model_repo_id": "kyLELEng/retailops-instruct-qwen3.5-9b-lora",
"output_dir": "/tmp/retailops-instruct-qwen35-9b-lora",
"smoke_test": false,
"force_rebuild_dataset": false,
"dataset_only": false,
"max_total_examples": 300000,
"max_ec_examples": 70000,
"max_shopify_examples": 40000,
"max_review_examples": 170000,
"max_c4_examples": 20000,
"review_scan_per_category": 180000,
"meta_scan_per_category": 120000,
"max_steps": 600,
"max_length": 2048,
"per_device_train_batch_size": 1,
"per_device_eval_batch_size": 1,
"gradient_accumulation_steps": 2,
"learning_rate": 0.0001,
"warmup_ratio": 0.03,
"weight_decay": 0.01,
"logging_steps": 10,
"eval_steps": 100,
"save_steps": 300,
"save_total_limit": 2,
"dataset_num_proc": 8,
"lora_r": 64,
"lora_alpha": 128,
"lora_dropout": 0.05,
"seed": 42,
"mixed_precision": "bf16",
"attn_implementation": "sdpa",
"packing": false,
"eval_generation_samples": 8,
"max_new_tokens": 800
}