{ "model_name": "llm-semantic-router/mmbert-32k-yarn", "output_dir": "/workspace/outputs/mmbert-rerank-32k-2d-matryoshka", "use_2d_matryoshka": true, "layer_indices": "3,6,11,22", "dim_indices": "768,512,256,128,64", "pooling_strategy": "cls", "train_data": "/workspace/data/bge-m3/bge-m3-data", "max_length": 32768, "max_samples": null, "negatives_per_query": 3, "use_quora": false, "use_fever": false, "max_quora_samples": 100000, "max_fever_samples": 100000, "epochs": 1, "batch_size": 16, "gradient_accumulation_steps": 2, "learning_rate": 2e-05, "weight_decay": 0.01, "warmup_ratio": 0.1, "max_grad_norm": 1.0, "use_flash_attn": true, "bf16": true, "gradient_checkpointing": true, "num_workers": 4, "logging_steps": 100, "save_steps": 5000, "seed": 42 }