# Cocoa pod 5-class — EVA-02-Large fine-tuning on MI300X # Backbone: EVA-02-L ViT-L/14 pretrained MIM-CLIP, fine-tuned IN22k+IN1k @ 448 # Dataset : merged LatAm + Peru YOLO crops (~4.9k crops, 5 classes) # # Resolution is 448 — quadruples per-image memory vs Track 2's 224×224 DINOv2, # so batch_size and num_workers are scaled down. Saturate later if VRAM allows. seed: 123 data: # splits.json built by prepare_cocoa_data.py — paths inside it are absolute splits_file: splits.json num_workers: 4 pin_memory: true model: # timm name for EVA-02-Large pretrained MIM (LAION) + IN22k + IN1k FT @ 448 name: eva02_large_patch14_448.mim_m38m_ft_in22k_in1k num_classes: 5 drop_path_rate: 0.1 img_size: 448 train: # Single-phase fine-tune (matches the simplified track2/train.py behavior) lr: 1.0e-4 epochs: 20 # ~5k crops × 20 epochs ≈ 100k steps total at BS=64 batch_size: 64 # safe at 448px on a 192GB MI300X; raise if you have headroom grad_accum_steps: 1 weight_decay: 0.05 betas: [0.9, 0.999] label_smoothing: 0.1 mixup_alpha: 0.1 # matches the reduced setting that worked in Track 2 cutmix_alpha: 0.5 mixup_prob: 0.5 grad_clip: 1.0 amp_dtype: bfloat16 # MI300X has native bf16 compile: false # enable after the first stable run if you want speed grad_checkpointing: true # 448px attn maps are big; checkpoint to free VRAM augment: rand_augment_n: 2 rand_augment_m: 9 random_erasing_p: 0.25 horizontal_flip: true vertical_flip: true # cocoa pods don't have a strong up/down orientation eval: batch_size: 64 tta_rounds: 10 log: every_n_steps: 50 checkpoint_dir: runs save_best_metric: val_macro_f1 # rare classes (witches_broom, carmenta) → optimize macro F1