{ "model": { "dim": 384, "depth": 24, "patch_size": 16, "proj_factor": 2.0, "qkv_proj_blocksize": 4, "num_heads": 4, "conv_kernel": 4, "mlp_ratio": 4.0, "drop_path_rate": 0.05, "tmoe_blocks": 2, "num_experts": 4, "film_interval": 6, "feat_size": 16, "use_uncertainty": true }, "training": { "phase1": { "epochs": 300, "lr": 1e-4, "batch_size": 32, "weight_decay": 0.05, "warmup_epochs": 5, "grad_clip": 1.0, "backbone_lr_scale": 0.1, "acl_ramp_epochs": 50 }, "phase2": { "epochs": 100, "lr": 1e-5, "batch_size": 32, "backbone_lr_scale": 0.01, "warmup_epochs": 2 } }, "data": { "template_size": 128, "search_size": 256, "datasets": ["got10k", "lasot", "trackingnet", "coco", "visdrone", "uavdt", "webuav3m"] }, "inference": { "search_scale": 4.0, "confidence_threshold": 0.3, "template_update_threshold": 0.8 }, "constraints": { "max_params_M": 50, "max_latency_ms": 30, "max_gflops": 20, "max_size_mb": 500 } }