Yang2001 commited on
Commit
2bea032
·
verified ·
1 Parent(s): fcb8f0c

Upload Pixal3D-T model weights and pipeline config

Browse files
ckpts/shape_dec_next_dc_f16c32_fp16.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "FlexiDualGridVaeDecoder",
3
+ "args": {
4
+ "resolution": 256,
5
+ "model_channels": [1024, 512, 256, 128, 64],
6
+ "latent_channels": 32,
7
+ "num_blocks": [4, 16, 8, 4, 0],
8
+ "block_type": [
9
+ "SparseConvNeXtBlock3d",
10
+ "SparseConvNeXtBlock3d",
11
+ "SparseConvNeXtBlock3d",
12
+ "SparseConvNeXtBlock3d",
13
+ "SparseConvNeXtBlock3d"
14
+ ],
15
+ "up_block_type": [
16
+ "SparseResBlockC2S3d",
17
+ "SparseResBlockC2S3d",
18
+ "SparseResBlockC2S3d",
19
+ "SparseResBlockC2S3d"
20
+ ],
21
+ "block_args": [{}, {}, {}, {}, {}],
22
+ "use_fp16": true
23
+ }
24
+ }
ckpts/shape_dec_next_dc_f16c32_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b718d3e43e4f8780e9a24ac6fff231811a67e3b058e336e10fe654c911d581
3
+ size 948490494
ckpts/slat_flow_img2shape_dit_1_3B_1024_bf16.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "ElasticSLatFlowModel",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 32,
6
+ "out_channels": 32,
7
+ "model_channels": 1536,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 30,
10
+ "num_heads": 12,
11
+ "mlp_ratio": 5.3334,
12
+ "pe_mode": "rope",
13
+ "share_mod": true,
14
+ "initialization": "scaled",
15
+ "qk_rms_norm": true,
16
+ "qk_rms_norm_cross": true,
17
+ "image_attn_mode": "proj",
18
+ "proj_in_channels": 2048
19
+ }
20
+ }
ckpts/slat_flow_img2shape_dit_1_3B_1024_bf16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e6d1df0e62bc5bc863320d1d63d6b996235eef4abb8b851e878c88f316369f1
3
+ size 5546764048
ckpts/slat_flow_img2shape_dit_1_3B_512_bf16.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "ElasticSLatFlowModel",
3
+ "args": {
4
+ "resolution": 32,
5
+ "in_channels": 32,
6
+ "out_channels": 32,
7
+ "model_channels": 1536,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 30,
10
+ "num_heads": 12,
11
+ "mlp_ratio": 5.3334,
12
+ "pe_mode": "rope",
13
+ "share_mod": true,
14
+ "initialization": "scaled",
15
+ "qk_rms_norm": true,
16
+ "qk_rms_norm_cross": true,
17
+ "image_attn_mode": "proj",
18
+ "proj_in_channels": 2048
19
+ }
20
+ }
ckpts/slat_flow_img2shape_dit_1_3B_512_bf16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63569838c1da93f010e2fa805b202ecf802cc758830230abac202a81b70e5b45
3
+ size 5546764048
ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "ElasticSLatFlowModel",
3
+ "args": {
4
+ "resolution": 64,
5
+ "in_channels": 64,
6
+ "out_channels": 32,
7
+ "model_channels": 1536,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 30,
10
+ "num_heads": 12,
11
+ "mlp_ratio": 5.3334,
12
+ "pe_mode": "rope",
13
+ "share_mod": true,
14
+ "initialization": "scaled",
15
+ "qk_rms_norm": true,
16
+ "qk_rms_norm_cross": true,
17
+ "image_attn_mode": "proj",
18
+ "proj_in_channels": 2048
19
+ }
20
+ }
ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65df179ad02f9e92bc18335d7861fadd95f268e6249217352468dd83cb79d1d0
3
+ size 5546960656
ckpts/ss_dec_conv3d_16l8_fp16.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "name": "SparseStructureDecoder",
4
+ "args": {
5
+ "out_channels": 1,
6
+ "latent_channels": 8,
7
+ "num_res_blocks": 2,
8
+ "num_res_blocks_middle": 2,
9
+ "channels": [512, 128, 32],
10
+ "use_fp16": true
11
+ }
12
+ }
ckpts/ss_dec_conv3d_16l8_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c76d4a40519aa2d711cc263a8404105231ac26db31d946bed48b84fee79009a
3
+ size 147591972
ckpts/ss_flow_img_dit_1_3B_64_bf16.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SparseStructureFlowModel",
3
+ "args": {
4
+ "resolution": 16,
5
+ "in_channels": 8,
6
+ "out_channels": 8,
7
+ "model_channels": 1536,
8
+ "cond_channels": 1024,
9
+ "num_blocks": 30,
10
+ "num_heads": 12,
11
+ "mlp_ratio": 5.3334,
12
+ "pe_mode": "rope",
13
+ "share_mod": true,
14
+ "initialization": "scaled",
15
+ "qk_rms_norm": true,
16
+ "qk_rms_norm_cross": true,
17
+ "image_attn_mode": "proj"
18
+ }
19
+ }
ckpts/ss_flow_img_dit_1_3B_64_bf16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0147bb347eb61fa81c304cc68c7fd69d447e91870357a549b47e1d0fd77242
3
+ size 5359822584
ckpts/tex_dec_next_dc_f16c32_fp16.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "SparseUnetVaeDecoder",
3
+ "args": {
4
+ "out_channels": 6,
5
+ "model_channels": [1024, 512, 256, 128, 64],
6
+ "latent_channels": 32,
7
+ "num_blocks": [4, 16, 8, 4, 0],
8
+ "block_type": [
9
+ "SparseConvNeXtBlock3d",
10
+ "SparseConvNeXtBlock3d",
11
+ "SparseConvNeXtBlock3d",
12
+ "SparseConvNeXtBlock3d",
13
+ "SparseConvNeXtBlock3d"
14
+ ],
15
+ "up_block_type": [
16
+ "SparseResBlockC2S3d",
17
+ "SparseResBlockC2S3d",
18
+ "SparseResBlockC2S3d",
19
+ "SparseResBlockC2S3d"
20
+ ],
21
+ "block_args": [{}, {}, {}, {}, {}],
22
+ "pred_subdiv": false,
23
+ "use_fp16": true
24
+ }
25
+ }
ckpts/tex_dec_next_dc_f16c32_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ea69addea2ecd9312910f5f548234665eef51c088386180b7cd5b258645e3c
3
+ size 948458812
pipeline.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Trellis2ImageTo3DPipeline",
3
+ "args": {
4
+ "models": {
5
+ "sparse_structure_decoder": "ckpts/ss_dec_conv3d_16l8_fp16",
6
+ "sparse_structure_flow_model": "ckpts/ss_flow_img_dit_1_3B_64_bf16",
7
+ "shape_slat_decoder": "ckpts/shape_dec_next_dc_f16c32_fp16",
8
+ "shape_slat_flow_model_512": "ckpts/slat_flow_img2shape_dit_1_3B_512_bf16",
9
+ "shape_slat_flow_model_1024": "ckpts/slat_flow_img2shape_dit_1_3B_1024_bf16",
10
+ "tex_slat_decoder": "ckpts/tex_dec_next_dc_f16c32_fp16",
11
+ "tex_slat_flow_model_1024": "ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16"
12
+ },
13
+ "sparse_structure_sampler": {
14
+ "name": "FlowEulerGuidanceIntervalSampler",
15
+ "args": {
16
+ "sigma_min": 1e-5
17
+ },
18
+ "params": {
19
+ "steps": 12,
20
+ "guidance_strength": 7.5,
21
+ "guidance_rescale": 0.7,
22
+ "guidance_interval": [0.6, 1.0],
23
+ "rescale_t": 5.0
24
+ }
25
+ },
26
+ "shape_slat_sampler": {
27
+ "name": "FlowEulerGuidanceIntervalSampler",
28
+ "args": {
29
+ "sigma_min": 1e-5
30
+ },
31
+ "params": {
32
+ "steps": 12,
33
+ "guidance_strength": 7.5,
34
+ "guidance_rescale": 0.5,
35
+ "guidance_interval": [0.6, 1.0],
36
+ "rescale_t": 3.0
37
+ }
38
+ },
39
+ "shape_slat_normalization": {
40
+ "mean": [
41
+ 0.781296, 0.018091, -0.495192, -0.558457, 1.060530, 0.093252, 1.518149, -0.933218,
42
+ -0.732996, 2.604095, -0.118341, -2.143904, 0.495076, -2.179512, -2.130751, -0.996944,
43
+ 0.261421, -2.217463, 1.260067, -0.150213, 3.790713, 1.481266, -1.046058, -1.523667,
44
+ -0.059621, 2.220780, 1.621212, 0.877230, 0.567247, -3.175944, -3.186688, 1.578665
45
+ ],
46
+ "std": [
47
+ 5.972266, 4.706852, 5.445010, 5.209927, 5.320220, 4.547237, 5.020802, 5.444004,
48
+ 5.226681, 5.683095, 4.831436, 5.286469, 5.652043, 5.367606, 5.525084, 4.730578,
49
+ 4.805265, 5.124013, 5.530808, 5.619001, 5.103930, 5.417670, 5.269677, 5.547194,
50
+ 5.634698, 5.235274, 6.110351, 5.511298, 6.237273, 4.879207, 5.347008, 5.405691
51
+ ]
52
+ },
53
+ "tex_slat_sampler": {
54
+ "name": "FlowEulerGuidanceIntervalSampler",
55
+ "args": {
56
+ "sigma_min": 1e-5
57
+ },
58
+ "params": {
59
+ "steps": 12,
60
+ "guidance_strength": 1.0,
61
+ "guidance_rescale": 0.0,
62
+ "guidance_interval": [0.6, 0.9],
63
+ "rescale_t": 3.0
64
+ }
65
+ },
66
+ "tex_slat_normalization": {
67
+ "mean": [
68
+ 3.501659, 2.212398, 2.226094, 0.251093, -0.026248, -0.687364, 0.439898, -0.928075,
69
+ 0.029398, -0.339596, -0.869527, 1.038479, -0.972385, 0.126042, -1.129303, 0.455149,
70
+ -1.209521, 2.069067, 0.544735, 2.569128, -0.323407, 2.293000, -1.925608, -1.217717,
71
+ 1.213905, 0.971588, -0.023631, 0.106750, 2.021786, 0.250524, -0.662387, -0.768862
72
+ ],
73
+ "std": [
74
+ 2.665652, 2.743913, 2.765121, 2.595319, 3.037293, 2.291316, 2.144656, 2.911822,
75
+ 2.969419, 2.501689, 2.154811, 3.163343, 2.621215, 2.381943, 3.186697, 3.021588,
76
+ 2.295916, 3.234985, 3.233086, 2.260140, 2.874801, 2.810596, 3.292720, 2.674999,
77
+ 2.680878, 2.372054, 2.451546, 2.353556, 2.995195, 2.379849, 2.786195, 2.775190
78
+ ]
79
+ },
80
+ "image_cond_model": {
81
+ "name": "DinoV3FeatureExtractor",
82
+ "args": {
83
+ "model_name": "facebook/dinov3-vitl16-pretrain-lvd1689m"
84
+ }
85
+ },
86
+ "rembg_model": {
87
+ "name": "BiRefNet",
88
+ "args": {
89
+ "model_name": "briaai/RMBG-2.0"
90
+ }
91
+ },
92
+ "default_pipeline_type": "1536_cascade"
93
+ }
94
+ }