| # Cocoa pod 5-class β EVA-02-Large fine-tuning on MI300X | |
| # Backbone: EVA-02-L ViT-L/14 pretrained MIM-CLIP, fine-tuned IN22k+IN1k @ 448 | |
| # Dataset : merged LatAm + Peru YOLO crops (~4.9k crops, 5 classes) | |
| # | |
| # Resolution is 448 β quadruples per-image memory vs Track 2's 224Γ224 DINOv2, | |
| # so batch_size and num_workers are scaled down. Saturate later if VRAM allows. | |
| seed: 123 | |
| data: | |
| # splits.json built by prepare_cocoa_data.py β paths inside it are absolute | |
| splits_file: splits.json | |
| num_workers: 4 | |
| pin_memory: true | |
| model: | |
| # timm name for EVA-02-Large pretrained MIM (LAION) + IN22k + IN1k FT @ 448 | |
| name: eva02_large_patch14_448.mim_m38m_ft_in22k_in1k | |
| num_classes: 5 | |
| drop_path_rate: 0.1 | |
| img_size: 448 | |
| train: | |
| # Single-phase fine-tune (matches the simplified track2/train.py behavior) | |
| lr: 1.0e-4 | |
| epochs: 20 # ~5k crops Γ 20 epochs β 100k steps total at BS=64 | |
| batch_size: 64 # safe at 448px on a 192GB MI300X; raise if you have headroom | |
| grad_accum_steps: 1 | |
| weight_decay: 0.05 | |
| betas: [0.9, 0.999] | |
| label_smoothing: 0.1 | |
| mixup_alpha: 0.1 # matches the reduced setting that worked in Track 2 | |
| cutmix_alpha: 0.5 | |
| mixup_prob: 0.5 | |
| grad_clip: 1.0 | |
| amp_dtype: bfloat16 # MI300X has native bf16 | |
| compile: false # enable after the first stable run if you want speed | |
| grad_checkpointing: true # 448px attn maps are big; checkpoint to free VRAM | |
| augment: | |
| rand_augment_n: 2 | |
| rand_augment_m: 9 | |
| random_erasing_p: 0.25 | |
| horizontal_flip: true | |
| vertical_flip: true # cocoa pods don't have a strong up/down orientation | |
| eval: | |
| batch_size: 64 | |
| tta_rounds: 10 | |
| log: | |
| every_n_steps: 50 | |
| checkpoint_dir: runs | |
| save_best_metric: val_macro_f1 # rare classes (witches_broom, carmenta) β optimize macro F1 | |