File size: 814 Bytes
69eb4b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
{
  "model_name": "llm-semantic-router/mmbert-32k-yarn",
  "output_dir": "/workspace/outputs/mmbert-rerank-32k-2d-matryoshka",
  "use_2d_matryoshka": true,
  "layer_indices": "3,6,11,22",
  "dim_indices": "768,512,256,128,64",
  "pooling_strategy": "cls",
  "train_data": "/workspace/data/bge-m3/bge-m3-data",
  "max_length": 32768,
  "max_samples": null,
  "negatives_per_query": 3,
  "use_quora": false,
  "use_fever": false,
  "max_quora_samples": 100000,
  "max_fever_samples": 100000,
  "epochs": 1,
  "batch_size": 16,
  "gradient_accumulation_steps": 2,
  "learning_rate": 2e-05,
  "weight_decay": 0.01,
  "warmup_ratio": 0.1,
  "max_grad_norm": 1.0,
  "use_flash_attn": true,
  "bf16": true,
  "gradient_checkpointing": true,
  "num_workers": 4,
  "logging_steps": 100,
  "save_steps": 5000,
  "seed": 42
}