Caoza commited on
Commit
1c3643f
·
verified ·
1 Parent(s): 8663125

Upload folder using huggingface_hub

Browse files
diffusion/ckpts_new/decoder_step0100000.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatMeshDecodernew",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 768,
6
+ "phy_channels": 2048,
7
+ "latent_channels": 8,
8
+ "num_blocks": 12,
9
+ "num_heads": 12,
10
+ "mlp_ratio": 4,
11
+ "attn_mode": "swin",
12
+ "window_size": 8,
13
+ "use_fp16": true,
14
+ "representation_config": {
15
+ "use_color": true
16
+ }
17
+ }
18
+ }
diffusion/ckpts_new/decoder_step0100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a460d8ea685f97c3d320b03aabd3c5ae55cf94ef4e3bd1136860521f1075dcd
3
+ size 1281392840
diffusion/ckpts_new/denoiser_phy_step0700000.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatFlowModelphy",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 14,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 2,
13
+ "num_io_res_blocks": 2,
14
+ "io_block_channels": [128],
15
+ "pe_mode": "ape",
16
+ "qk_rms_norm": true,
17
+ "use_fp16": true
18
+ }
19
+ }
diffusion/ckpts_new/denoiser_phy_step0700000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e9b76b5f4c6663f87539151eb6b40b209e9bb2b09ef5f18feb58e8372a7a60
3
+ size 1619037489
diffusion/ckpts_new/denoiser_step0700000.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SLatFlowModel",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1024,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 24,
10
+ "num_heads": 16,
11
+ "mlp_ratio": 4,
12
+ "patch_size": 2,
13
+ "num_io_res_blocks": 2,
14
+ "io_block_channels": [
15
+ 128
16
+ ],
17
+ "pe_mode": "ape",
18
+ "qk_rms_norm": true,
19
+ "use_fp16": true
20
+ }
21
+ }
diffusion/ckpts_new/denoiser_step0700000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abf3356a382e13e85e18c282a0963e5d3633b6bee1f29e974258c04a1ff1ac9b
3
+ size 2822875445
diffusion/ckpts_new/property_decoder_step0100000.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "PropertyDecoder",
3
+ "args": {
4
+ "resolution": 64,
5
+ "model_channels": 2048,
6
+ "latent_channels": 8,
7
+ "num_blocks": 4,
8
+ "num_heads": 16,
9
+ "mlp_ratio": 4,
10
+ "attn_mode": "swin",
11
+ "window_size": 8,
12
+ "use_fp16": true,
13
+ "representation_config": {
14
+ "use_color": true
15
+ }
16
+ }
17
+ }
diffusion/ckpts_new/property_decoder_step0100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab977e5c18afd73a60cee0a44a8c75bb93debabd041dbe1b914902f3dbab38d4
3
+ size 1099504133
diffusion/ckpts_new/property_output_step0100000.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "PropertyOutput",
3
+ "args": {
4
+ "model_channels": 32,
5
+ "output_channels_lang": 3072,
6
+ "output_channels_phy": 14,
7
+ "use_fp16": true
8
+ }
9
+ }
diffusion/ckpts_new/property_output_step0100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ecaed44d9511e2b2dd800c4235e629567cba21053cd3b55cdb22008c32bbf2
3
+ size 1099495424
diffusion/pipeline.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "TrellisImageTo3DPipeline",
3
+ "args": {
4
+ "models": {
5
+ "sparse_structure_decoder": "/mnt/petrelfs/caoziang.p/3D_generation/phyX/docker_test/new/TRELLIS/pretrain/ckpts/ss_dec_conv3d_16l8_fp16",
6
+ "sparse_structure_flow_model": "/mnt/petrelfs/caoziang.p/3D_generation/phyX/docker_test/new/TRELLIS/pretrain/ckpts/ss_flow_img_dit_L_16l8_fp16",
7
+ "slat_decoder_gs": "/mnt/petrelfs/caoziang.p/3D_generation/phyX/docker_test/new/TRELLIS/pretrain/ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16",
8
+ "slat_decoder_rf": "/mnt/petrelfs/caoziang.p/3D_generation/phyX/docker_test/new/TRELLIS/pretrain/ckpts/slat_dec_rf_swin8_B_64l8r16_fp16",
9
+
10
+ "slat_decoder_mesh": "ckpts_new/decoder_step0100000.pt",
11
+ "slat_decoder_phy": "ckpts_new/property_decoder_step0100000.pt",
12
+ "slat_decoder_output": "ckpts_new/property_output_step0100000.pt",
13
+ "slat_flow_model": "ckpts_new/denoiser_step0700000.pt",
14
+ "slat_flow_model_phy": "ckpts_new/denoiser_phy_step0700000.pt"
15
+ },
16
+ "sparse_structure_sampler": {
17
+ "name": "FlowEulerGuidanceIntervalSampler",
18
+ "args": {
19
+ "sigma_min": 1e-5
20
+ },
21
+ "params": {
22
+ "steps": 25,
23
+ "cfg_strength": 5.0,
24
+ "cfg_interval": [0.5, 1.0],
25
+ "rescale_t": 3.0
26
+ }
27
+ },
28
+ "slat_sampler": {
29
+ "name": "FlowEulerGuidanceIntervalSampler",
30
+ "args": {
31
+ "sigma_min": 1e-5
32
+ },
33
+ "params": {
34
+ "steps": 25,
35
+ "cfg_strength": 5.0,
36
+ "cfg_interval": [0.5, 1.0],
37
+ "rescale_t": 3.0
38
+ }
39
+ },
40
+ "slat_normalization": {
41
+ "mean": [
42
+ -2.1687545776367188,
43
+ -0.004347046371549368,
44
+ -0.13352349400520325,
45
+ -0.08418072760105133,
46
+ -0.5271206498146057,
47
+ 0.7238689064979553,
48
+ -1.1414450407028198,
49
+ 1.2039363384246826
50
+ ],
51
+ "std": [
52
+ 2.377650737762451,
53
+ 2.386378288269043,
54
+ 2.124418020248413,
55
+ 2.1748552322387695,
56
+ 2.663944721221924,
57
+ 2.371192216873169,
58
+ 2.6217446327209473,
59
+ 2.684523105621338
60
+ ],
61
+ "mean_phy": [
62
+ -2.1507165,
63
+ -0.9456348,
64
+ -2.0234883,
65
+ -0.5949867,
66
+ -3.608296 ,
67
+ -1.062877 ,
68
+ -3.288852 ,
69
+ -1.0749111
70
+ ],
71
+ "std_phy": [
72
+ 0.6931998 ,
73
+ 0.9221464 ,
74
+ 0.6542199 ,
75
+ 0.6594776 ,
76
+ 0.8451334 ,
77
+ 0.594917 ,
78
+ 0.69759405,
79
+ 1.1614994
80
+ ]
81
+ },
82
+ "image_cond_model": "dinov2_vitl14_reg"
83
+ }
84
+ }
vae/ckpts/decoder_step0100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a460d8ea685f97c3d320b03aabd3c5ae55cf94ef4e3bd1136860521f1075dcd
3
+ size 1281392840
vae/ckpts/property_decoder_step0100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab977e5c18afd73a60cee0a44a8c75bb93debabd041dbe1b914902f3dbab38d4
3
+ size 1099504133
vae/ckpts/property_encoder_step0100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04e5ae16b65d2f29e014a898a06d7e555a3ad7f18a67ebe336f2f9208c860bcc
3
+ size 1099500741
vae/ckpts/property_output_step0100000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ecaed44d9511e2b2dd800c4235e629567cba21053cd3b55cdb22008c32bbf2
3
+ size 1099495424
vae/config.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "models": {
3
+ "property_encoder": {
4
+ "name": "ElasticPropertyEncoder",
5
+ "args": {
6
+ "resolution": 64,
7
+ "in_channels": 3072,
8
+ "in_channels_phy": 14,
9
+ "model_channels": 768,
10
+ "latent_channels": 8,
11
+ "num_blocks": 4,
12
+ "num_heads": 12,
13
+ "mlp_ratio": 4,
14
+ "attn_mode": "swin",
15
+ "window_size": 8,
16
+ "use_fp16": true
17
+ }
18
+ },
19
+ "property_decoder": {
20
+ "name": "ElasticPropertyDecoder",
21
+ "args": {
22
+ "resolution": 64,
23
+ "model_channels": 2048,
24
+ "latent_channels": 8,
25
+ "num_blocks": 4,
26
+ "num_heads": 16,
27
+ "mlp_ratio": 4,
28
+ "attn_mode": "swin",
29
+ "window_size": 8,
30
+ "use_fp16": true,
31
+ "representation_config": {
32
+ "use_color": true
33
+ }
34
+ }
35
+ },
36
+ "property_output": {
37
+ "name": "PropertyOutput",
38
+ "args": {
39
+ "model_channels": 32,
40
+ "output_channels_lang": 3072,
41
+ "output_channels_phy": 14,
42
+ "use_fp16": true
43
+ }
44
+ },
45
+ "decoder": {
46
+ "name": "ElasticSLatMeshDecodernew",
47
+ "args": {
48
+ "resolution": 64,
49
+ "model_channels": 768,
50
+ "phy_channels": 2048,
51
+ "latent_channels": 8,
52
+ "num_blocks": 12,
53
+ "num_heads": 12,
54
+ "mlp_ratio": 4,
55
+ "attn_mode": "swin",
56
+ "window_size": 8,
57
+ "use_fp16": true,
58
+ "representation_config": {
59
+ "use_color": true
60
+ }
61
+ }
62
+ }
63
+ },
64
+ "dataset": {
65
+ "name": "Slat2RenderGeomesh",
66
+ "args": {
67
+ "image_size": 384,
68
+ "latent_model": "dinov2_vitl14_reg_slat_enc_swin8_B_64l8_fp16",
69
+ "min_aesthetic_score": 4.5,
70
+ "max_num_voxels": 28000
71
+ }
72
+ },
73
+ "trainer": {
74
+ "name": "SLatVaeMeshTrainer",
75
+ "args": {
76
+ "onlyphy_property": true,
77
+ "max_steps": 1000000,
78
+ "batch_size_per_gpu": 4,
79
+ "batch_split": 4,
80
+ "optimizer": {
81
+ "name": "AdamW",
82
+ "args": {
83
+ "lr": 0.0001,
84
+ "weight_decay": 0.0
85
+ }
86
+ },
87
+ "ema_rate": [
88
+ 0.9999
89
+ ],
90
+ "fp16_mode": "inflat_all",
91
+ "fp16_scale_growth": 0.001,
92
+ "elastic": {
93
+ "name": "LinearMemoryController",
94
+ "args": {
95
+ "target_ratio": 0.6,
96
+ "max_mem_ratio_start": 0.5
97
+ }
98
+ },
99
+ "grad_clip": {
100
+ "name": "AdaptiveGradClipper",
101
+ "args": {
102
+ "max_norm": 1.0,
103
+ "clip_percentile": 95
104
+ }
105
+ },
106
+ "i_log": 10,
107
+ "i_sample": 5000,
108
+ "i_save": 10000,
109
+ "lambda_ssim": 0.2,
110
+ "lambda_lpips": 0.2,
111
+ "lambda_tsdf": 0.01,
112
+ "lambda_depth": 10.0,
113
+ "lambda_color": 0.1,
114
+ "lambda_kl": 1e-06,
115
+ "depth_loss_type": "smooth_l1"
116
+ }
117
+ }
118
+ }