FlashJudge3 / training_config.json
KilianFt's picture
Upload FlashJudge artifacts from run_20260413_171923
807bce8 verified
{
"model_id": "google/siglip2-base-patch16-naflex",
"dataset": "apartments",
"apartments_jsonl": "data/apartments/question-image-dataset_labeled.jsonl",
"apartments_images_dir": "data/apartments/images",
"fmnist_root": "data",
"output_root": "models",
"run_name": null,
"batch_size": 12,
"gradient_accumulation_steps": 1,
"stack_size": 1,
"epochs": 10,
"max_steps": null,
"eval_interval": 1,
"eval_batches": 2,
"checkpoint_interval": 1000,
"checkpoint_mode": "max_val_acc",
"val_ratio": 0.1,
"seed": 42,
"apartments_train_group_limit": null,
"apartments_train_question_limit": null,
"apartments_hard_negative_strategy": "within_group_overlap",
"apartments_hard_negative_topk": 0,
"apartments_hard_negative_oversample_factor": 0,
"num_workers_train": 4,
"num_workers_val": 2,
"text_max_length": 64,
"image_max_num_patches": null,
"lr_backbone": 1e-6,
"lr_head": 0.0002,
"weight_decay": 0.01,
"freeze_backbone": false,
"freeze_backbone_epochs": 0,
"freeze_backbone_steps": null,
"backbone_trainable_scope": "full",
"attention_heads": 2,
"attention_dropout": 0.3,
"image_self_attention_layers": 0,
"aggregation_mode": "attention",
"head_feature_mode": "matching",
"late_interaction_topk": 8,
"head_hidden_dim": 768,
"head_num_layers": 2,
"head_dropout": 0.2,
"head_activation": "swiglu",
"label_smoothing": 0.05,
"alignment_loss_weight": 0.05,
"alignment_loss_logit_scale": 18.0,
"alignment_loss_hard_negative_topk": 1,
"compile_model": false,
"compile_fallback_to_eager": true,
"backbone_torch_dtype": "auto",
"attn_implementation": null,
"resume_path": null,
"min_free_space_gb": 1.0,
"use_wandb": true,
"wandb_project": "flash-judge",
"wandb_entity": null,
"wandb_run_name": null,
"save_artifacts": true,
"push_to_hub": true,
"hub_org": "FuncAI",
"hub_repo_name": "FlashJudge3",
"hub_private": false,
"hub_token": null,
"device": null,
"use_amp": true
}