iamcode6's picture
Upload folder using huggingface_hub
a5c2a62 verified
# Cocoa pod 5-class β€” EVA-02-Large fine-tuning on MI300X
# Backbone: EVA-02-L ViT-L/14 pretrained MIM-CLIP, fine-tuned IN22k+IN1k @ 448
# Dataset : merged LatAm + Peru YOLO crops (~4.9k crops, 5 classes)
#
# Resolution is 448 β€” quadruples per-image memory vs Track 2's 224Γ—224 DINOv2,
# so batch_size and num_workers are scaled down. Saturate later if VRAM allows.
seed: 123
data:
# splits.json built by prepare_cocoa_data.py β€” paths inside it are absolute
splits_file: splits.json
num_workers: 4
pin_memory: true
model:
# timm name for EVA-02-Large pretrained MIM (LAION) + IN22k + IN1k FT @ 448
name: eva02_large_patch14_448.mim_m38m_ft_in22k_in1k
num_classes: 5
drop_path_rate: 0.1
img_size: 448
train:
# Single-phase fine-tune (matches the simplified track2/train.py behavior)
lr: 1.0e-4
epochs: 20 # ~5k crops Γ— 20 epochs β‰ˆ 100k steps total at BS=64
batch_size: 64 # safe at 448px on a 192GB MI300X; raise if you have headroom
grad_accum_steps: 1
weight_decay: 0.05
betas: [0.9, 0.999]
label_smoothing: 0.1
mixup_alpha: 0.1 # matches the reduced setting that worked in Track 2
cutmix_alpha: 0.5
mixup_prob: 0.5
grad_clip: 1.0
amp_dtype: bfloat16 # MI300X has native bf16
compile: false # enable after the first stable run if you want speed
grad_checkpointing: true # 448px attn maps are big; checkpoint to free VRAM
augment:
rand_augment_n: 2
rand_augment_m: 9
random_erasing_p: 0.25
horizontal_flip: true
vertical_flip: true # cocoa pods don't have a strong up/down orientation
eval:
batch_size: 64
tta_rounds: 10
log:
every_n_steps: 50
checkpoint_dir: runs
save_best_metric: val_macro_f1 # rare classes (witches_broom, carmenta) β†’ optimize macro F1