| { |
| "models": { |
| "property_encoder": { |
| "name": "ElasticPropertyEncoder", |
| "args": { |
| "resolution": 64, |
| "in_channels": 3072, |
| "in_channels_phy": 14, |
| "model_channels": 768, |
| "latent_channels": 8, |
| "num_blocks": 4, |
| "num_heads": 12, |
| "mlp_ratio": 4, |
| "attn_mode": "swin", |
| "window_size": 8, |
| "use_fp16": true |
| } |
| }, |
| "property_decoder": { |
| "name": "ElasticPropertyDecoder", |
| "args": { |
| "resolution": 64, |
| "model_channels": 2048, |
| "latent_channels": 8, |
| "num_blocks": 4, |
| "num_heads": 16, |
| "mlp_ratio": 4, |
| "attn_mode": "swin", |
| "window_size": 8, |
| "use_fp16": true, |
| "representation_config": { |
| "use_color": true |
| } |
| } |
| }, |
| "property_output": { |
| "name": "PropertyOutput", |
| "args": { |
| "model_channels": 32, |
| "output_channels_lang": 3072, |
| "output_channels_phy": 14, |
| "use_fp16": true |
| } |
| }, |
| "decoder": { |
| "name": "ElasticSLatMeshDecodernew", |
| "args": { |
| "resolution": 64, |
| "model_channels": 768, |
| "phy_channels": 2048, |
| "latent_channels": 8, |
| "num_blocks": 12, |
| "num_heads": 12, |
| "mlp_ratio": 4, |
| "attn_mode": "swin", |
| "window_size": 8, |
| "use_fp16": true, |
| "representation_config": { |
| "use_color": true |
| } |
| } |
| } |
| }, |
| "dataset": { |
| "name": "Slat2RenderGeomesh", |
| "args": { |
| "image_size": 384, |
| "latent_model": "dinov2_vitl14_reg_slat_enc_swin8_B_64l8_fp16", |
| "min_aesthetic_score": 4.5, |
| "max_num_voxels": 28000 |
| } |
| }, |
| "trainer": { |
| "name": "SLatVaeMeshTrainer", |
| "args": { |
| "onlyphy_property": true, |
| "max_steps": 1000000, |
| "batch_size_per_gpu": 4, |
| "batch_split": 4, |
| "optimizer": { |
| "name": "AdamW", |
| "args": { |
| "lr": 0.0001, |
| "weight_decay": 0.0 |
| } |
| }, |
| "ema_rate": [ |
| 0.9999 |
| ], |
| "fp16_mode": "inflat_all", |
| "fp16_scale_growth": 0.001, |
| "elastic": { |
| "name": "LinearMemoryController", |
| "args": { |
| "target_ratio": 0.6, |
| "max_mem_ratio_start": 0.5 |
| } |
| }, |
| "grad_clip": { |
| "name": "AdaptiveGradClipper", |
| "args": { |
| "max_norm": 1.0, |
| "clip_percentile": 95 |
| } |
| }, |
| "i_log": 10, |
| "i_sample": 5000, |
| "i_save": 10000, |
| "lambda_ssim": 0.2, |
| "lambda_lpips": 0.2, |
| "lambda_tsdf": 0.01, |
| "lambda_depth": 10.0, |
| "lambda_color": 0.1, |
| "lambda_kl": 1e-06, |
| "depth_loss_type": "smooth_l1" |
| } |
| } |
| } |