File size: 1,985 Bytes
807bce8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
{
  "model_id": "google/siglip2-base-patch16-naflex",
  "dataset": "apartments",
  "apartments_jsonl": "data/apartments/question-image-dataset_labeled.jsonl",
  "apartments_images_dir": "data/apartments/images",
  "fmnist_root": "data",
  "output_root": "models",
  "run_name": null,
  "batch_size": 12,
  "gradient_accumulation_steps": 1,
  "stack_size": 1,
  "epochs": 10,
  "max_steps": null,
  "eval_interval": 1,
  "eval_batches": 2,
  "checkpoint_interval": 1000,
  "checkpoint_mode": "max_val_acc",
  "val_ratio": 0.1,
  "seed": 42,
  "apartments_train_group_limit": null,
  "apartments_train_question_limit": null,
  "apartments_hard_negative_strategy": "within_group_overlap",
  "apartments_hard_negative_topk": 0,
  "apartments_hard_negative_oversample_factor": 0,
  "num_workers_train": 4,
  "num_workers_val": 2,
  "text_max_length": 64,
  "image_max_num_patches": null,
  "lr_backbone": 1e-6,
  "lr_head": 0.0002,
  "weight_decay": 0.01,
  "freeze_backbone": false,
  "freeze_backbone_epochs": 0,
  "freeze_backbone_steps": null,
  "backbone_trainable_scope": "full",
  "attention_heads": 2,
  "attention_dropout": 0.3,
  "image_self_attention_layers": 0,
  "aggregation_mode": "attention",
  "head_feature_mode": "matching",
  "late_interaction_topk": 8,
  "head_hidden_dim": 768,
  "head_num_layers": 2,
  "head_dropout": 0.2,
  "head_activation": "swiglu",
  "label_smoothing": 0.05,
  "alignment_loss_weight": 0.05,
  "alignment_loss_logit_scale": 18.0,
  "alignment_loss_hard_negative_topk": 1,
  "compile_model": false,
  "compile_fallback_to_eager": true,
  "backbone_torch_dtype": "auto",
  "attn_implementation": null,
  "resume_path": null,
  "min_free_space_gb": 1.0,
  "use_wandb": true,
  "wandb_project": "flash-judge",
  "wandb_entity": null,
  "wandb_run_name": null,
  "save_artifacts": true,
  "push_to_hub": true,
  "hub_org": "FuncAI",
  "hub_repo_name": "FlashJudge3",
  "hub_private": false,
  "hub_token": null,
  "device": null,
  "use_amp": true
}