File size: 1,189 Bytes
e21cde3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
  "seed": 42,
  "output_dir": "/scratch/hf_st_mm_outputs/server_datacenter_8gpu_tri_encoder",
  "model": {
    "text_encoder_name": "llm-semantic-router/mmbert-embed-32k-2d-matryoshka",
    "image_encoder_name": "google/siglip2-so400m-patch14-384",
    "audio_encoder_name": "openai/whisper-medium",
    "embedding_dim": 768,
    "max_text_length": 32768
  },
  "training": {
    "epochs": 10,
    "batch_size": 12,
    "grad_accum_steps": 8,
    "num_workers": 4,
    "prefetch_factor": 4,
    "shard_prefetch": 2,
    "shard_cache_limit": 4,
    "sequential_shard_loading": true,
    "shuffle": false,
    "modality_homogeneous_batches": false,
    "learning_rate": 1e-05,
    "weight_decay": 0.01,
    "warmup_ratio": 0.1,
    "max_grad_norm": 1.0,
    "mixed_precision": "bf16",
    "log_every": 10,
    "save_every": 2000,
    "hard_negative_ratio": 0.5
  },
  "loss": {
    "type": "cached_mnrl",
    "scale": 20.0
  },
  "data": {
    "cache_dir": "/scratch/2dmse-data/server_full_datacenter_cache/train"
  },
  "validation": {
    "cache_dir": "/scratch/2dmse-data/server_full_datacenter_cache/val",
    "num_workers": 2,
    "shard_prefetch": 1,
    "shard_cache_limit": 2
  }
}