elephant-rerank-v1-text-small / training_args.json
Xunzhuo's picture
Mirror agentic-intelligence-lab/elephant-rerank-v1-text-small from ModelScope
69eb4b7 verified
{
"model_name": "llm-semantic-router/mmbert-32k-yarn",
"output_dir": "/workspace/outputs/mmbert-rerank-32k-2d-matryoshka",
"use_2d_matryoshka": true,
"layer_indices": "3,6,11,22",
"dim_indices": "768,512,256,128,64",
"pooling_strategy": "cls",
"train_data": "/workspace/data/bge-m3/bge-m3-data",
"max_length": 32768,
"max_samples": null,
"negatives_per_query": 3,
"use_quora": false,
"use_fever": false,
"max_quora_samples": 100000,
"max_fever_samples": 100000,
"epochs": 1,
"batch_size": 16,
"gradient_accumulation_steps": 2,
"learning_rate": 2e-05,
"weight_decay": 0.01,
"warmup_ratio": 0.1,
"max_grad_norm": 1.0,
"use_flash_attn": true,
"bf16": true,
"gradient_checkpointing": true,
"num_workers": 4,
"logging_steps": 100,
"save_steps": 5000,
"seed": 42
}