| { |
| "architecture": "IgorModel", |
| "description": "", |
| "version": 0, |
| "config": { |
| "resolution": 224, |
| "patch_size": 14, |
| "in_channels": 3, |
| "d_t": 8, |
| "augment_type": "resize_crop", |
| "augment_level": "clip", |
| "random_crop_scale": [ |
| 0.8, |
| 1.0 |
| ], |
| "random_crop_ratio": [ |
| 0.9, |
| 1.1 |
| ], |
| "mlp_ratio": 4.0, |
| "use_xformers": null, |
| "encoder_depth": 12, |
| "encoder_embed_dim": 768, |
| "encoder_n_heads": 12, |
| "action_latent_dim": 32, |
| "st_use_qk_norm": true, |
| "num_learned_tokens": 2, |
| "map_heads": 24, |
| "decoder_depth": 12, |
| "decoder_embed_dim": 768, |
| "decoder_n_heads": 12, |
| "use_qk_norm": true, |
| "n_codes": 32, |
| "grid_size": 16, |
| "embed_tokens": 256 |
| } |
| } |