Upload Pixal3D-T model weights and pipeline config
Browse files- ckpts/shape_dec_next_dc_f16c32_fp16.json +24 -0
- ckpts/shape_dec_next_dc_f16c32_fp16.safetensors +3 -0
- ckpts/slat_flow_img2shape_dit_1_3B_1024_bf16.json +20 -0
- ckpts/slat_flow_img2shape_dit_1_3B_1024_bf16.safetensors +3 -0
- ckpts/slat_flow_img2shape_dit_1_3B_512_bf16.json +20 -0
- ckpts/slat_flow_img2shape_dit_1_3B_512_bf16.safetensors +3 -0
- ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16.json +20 -0
- ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16.safetensors +3 -0
- ckpts/ss_dec_conv3d_16l8_fp16.json +12 -0
- ckpts/ss_dec_conv3d_16l8_fp16.safetensors +3 -0
- ckpts/ss_flow_img_dit_1_3B_64_bf16.json +19 -0
- ckpts/ss_flow_img_dit_1_3B_64_bf16.safetensors +3 -0
- ckpts/tex_dec_next_dc_f16c32_fp16.json +25 -0
- ckpts/tex_dec_next_dc_f16c32_fp16.safetensors +3 -0
- pipeline.json +94 -0
ckpts/shape_dec_next_dc_f16c32_fp16.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "FlexiDualGridVaeDecoder",
|
| 3 |
+
"args": {
|
| 4 |
+
"resolution": 256,
|
| 5 |
+
"model_channels": [1024, 512, 256, 128, 64],
|
| 6 |
+
"latent_channels": 32,
|
| 7 |
+
"num_blocks": [4, 16, 8, 4, 0],
|
| 8 |
+
"block_type": [
|
| 9 |
+
"SparseConvNeXtBlock3d",
|
| 10 |
+
"SparseConvNeXtBlock3d",
|
| 11 |
+
"SparseConvNeXtBlock3d",
|
| 12 |
+
"SparseConvNeXtBlock3d",
|
| 13 |
+
"SparseConvNeXtBlock3d"
|
| 14 |
+
],
|
| 15 |
+
"up_block_type": [
|
| 16 |
+
"SparseResBlockC2S3d",
|
| 17 |
+
"SparseResBlockC2S3d",
|
| 18 |
+
"SparseResBlockC2S3d",
|
| 19 |
+
"SparseResBlockC2S3d"
|
| 20 |
+
],
|
| 21 |
+
"block_args": [{}, {}, {}, {}, {}],
|
| 22 |
+
"use_fp16": true
|
| 23 |
+
}
|
| 24 |
+
}
|
ckpts/shape_dec_next_dc_f16c32_fp16.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3b718d3e43e4f8780e9a24ac6fff231811a67e3b058e336e10fe654c911d581
|
| 3 |
+
size 948490494
|
ckpts/slat_flow_img2shape_dit_1_3B_1024_bf16.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "ElasticSLatFlowModel",
|
| 3 |
+
"args": {
|
| 4 |
+
"resolution": 64,
|
| 5 |
+
"in_channels": 32,
|
| 6 |
+
"out_channels": 32,
|
| 7 |
+
"model_channels": 1536,
|
| 8 |
+
"cond_channels": 1024,
|
| 9 |
+
"num_blocks": 30,
|
| 10 |
+
"num_heads": 12,
|
| 11 |
+
"mlp_ratio": 5.3334,
|
| 12 |
+
"pe_mode": "rope",
|
| 13 |
+
"share_mod": true,
|
| 14 |
+
"initialization": "scaled",
|
| 15 |
+
"qk_rms_norm": true,
|
| 16 |
+
"qk_rms_norm_cross": true,
|
| 17 |
+
"image_attn_mode": "proj",
|
| 18 |
+
"proj_in_channels": 2048
|
| 19 |
+
}
|
| 20 |
+
}
|
ckpts/slat_flow_img2shape_dit_1_3B_1024_bf16.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e6d1df0e62bc5bc863320d1d63d6b996235eef4abb8b851e878c88f316369f1
|
| 3 |
+
size 5546764048
|
ckpts/slat_flow_img2shape_dit_1_3B_512_bf16.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "ElasticSLatFlowModel",
|
| 3 |
+
"args": {
|
| 4 |
+
"resolution": 32,
|
| 5 |
+
"in_channels": 32,
|
| 6 |
+
"out_channels": 32,
|
| 7 |
+
"model_channels": 1536,
|
| 8 |
+
"cond_channels": 1024,
|
| 9 |
+
"num_blocks": 30,
|
| 10 |
+
"num_heads": 12,
|
| 11 |
+
"mlp_ratio": 5.3334,
|
| 12 |
+
"pe_mode": "rope",
|
| 13 |
+
"share_mod": true,
|
| 14 |
+
"initialization": "scaled",
|
| 15 |
+
"qk_rms_norm": true,
|
| 16 |
+
"qk_rms_norm_cross": true,
|
| 17 |
+
"image_attn_mode": "proj",
|
| 18 |
+
"proj_in_channels": 2048
|
| 19 |
+
}
|
| 20 |
+
}
|
ckpts/slat_flow_img2shape_dit_1_3B_512_bf16.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63569838c1da93f010e2fa805b202ecf802cc758830230abac202a81b70e5b45
|
| 3 |
+
size 5546764048
|
ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "ElasticSLatFlowModel",
|
| 3 |
+
"args": {
|
| 4 |
+
"resolution": 64,
|
| 5 |
+
"in_channels": 64,
|
| 6 |
+
"out_channels": 32,
|
| 7 |
+
"model_channels": 1536,
|
| 8 |
+
"cond_channels": 1024,
|
| 9 |
+
"num_blocks": 30,
|
| 10 |
+
"num_heads": 12,
|
| 11 |
+
"mlp_ratio": 5.3334,
|
| 12 |
+
"pe_mode": "rope",
|
| 13 |
+
"share_mod": true,
|
| 14 |
+
"initialization": "scaled",
|
| 15 |
+
"qk_rms_norm": true,
|
| 16 |
+
"qk_rms_norm_cross": true,
|
| 17 |
+
"image_attn_mode": "proj",
|
| 18 |
+
"proj_in_channels": 2048
|
| 19 |
+
}
|
| 20 |
+
}
|
ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65df179ad02f9e92bc18335d7861fadd95f268e6249217352468dd83cb79d1d0
|
| 3 |
+
size 5546960656
|
ckpts/ss_dec_conv3d_16l8_fp16.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
{
|
| 3 |
+
"name": "SparseStructureDecoder",
|
| 4 |
+
"args": {
|
| 5 |
+
"out_channels": 1,
|
| 6 |
+
"latent_channels": 8,
|
| 7 |
+
"num_res_blocks": 2,
|
| 8 |
+
"num_res_blocks_middle": 2,
|
| 9 |
+
"channels": [512, 128, 32],
|
| 10 |
+
"use_fp16": true
|
| 11 |
+
}
|
| 12 |
+
}
|
ckpts/ss_dec_conv3d_16l8_fp16.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c76d4a40519aa2d711cc263a8404105231ac26db31d946bed48b84fee79009a
|
| 3 |
+
size 147591972
|
ckpts/ss_flow_img_dit_1_3B_64_bf16.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "SparseStructureFlowModel",
|
| 3 |
+
"args": {
|
| 4 |
+
"resolution": 16,
|
| 5 |
+
"in_channels": 8,
|
| 6 |
+
"out_channels": 8,
|
| 7 |
+
"model_channels": 1536,
|
| 8 |
+
"cond_channels": 1024,
|
| 9 |
+
"num_blocks": 30,
|
| 10 |
+
"num_heads": 12,
|
| 11 |
+
"mlp_ratio": 5.3334,
|
| 12 |
+
"pe_mode": "rope",
|
| 13 |
+
"share_mod": true,
|
| 14 |
+
"initialization": "scaled",
|
| 15 |
+
"qk_rms_norm": true,
|
| 16 |
+
"qk_rms_norm_cross": true,
|
| 17 |
+
"image_attn_mode": "proj"
|
| 18 |
+
}
|
| 19 |
+
}
|
ckpts/ss_flow_img_dit_1_3B_64_bf16.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d0147bb347eb61fa81c304cc68c7fd69d447e91870357a549b47e1d0fd77242
|
| 3 |
+
size 5359822584
|
ckpts/tex_dec_next_dc_f16c32_fp16.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "SparseUnetVaeDecoder",
|
| 3 |
+
"args": {
|
| 4 |
+
"out_channels": 6,
|
| 5 |
+
"model_channels": [1024, 512, 256, 128, 64],
|
| 6 |
+
"latent_channels": 32,
|
| 7 |
+
"num_blocks": [4, 16, 8, 4, 0],
|
| 8 |
+
"block_type": [
|
| 9 |
+
"SparseConvNeXtBlock3d",
|
| 10 |
+
"SparseConvNeXtBlock3d",
|
| 11 |
+
"SparseConvNeXtBlock3d",
|
| 12 |
+
"SparseConvNeXtBlock3d",
|
| 13 |
+
"SparseConvNeXtBlock3d"
|
| 14 |
+
],
|
| 15 |
+
"up_block_type": [
|
| 16 |
+
"SparseResBlockC2S3d",
|
| 17 |
+
"SparseResBlockC2S3d",
|
| 18 |
+
"SparseResBlockC2S3d",
|
| 19 |
+
"SparseResBlockC2S3d"
|
| 20 |
+
],
|
| 21 |
+
"block_args": [{}, {}, {}, {}, {}],
|
| 22 |
+
"pred_subdiv": false,
|
| 23 |
+
"use_fp16": true
|
| 24 |
+
}
|
| 25 |
+
}
|
ckpts/tex_dec_next_dc_f16c32_fp16.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97ea69addea2ecd9312910f5f548234665eef51c088386180b7cd5b258645e3c
|
| 3 |
+
size 948458812
|
pipeline.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "Trellis2ImageTo3DPipeline",
|
| 3 |
+
"args": {
|
| 4 |
+
"models": {
|
| 5 |
+
"sparse_structure_decoder": "ckpts/ss_dec_conv3d_16l8_fp16",
|
| 6 |
+
"sparse_structure_flow_model": "ckpts/ss_flow_img_dit_1_3B_64_bf16",
|
| 7 |
+
"shape_slat_decoder": "ckpts/shape_dec_next_dc_f16c32_fp16",
|
| 8 |
+
"shape_slat_flow_model_512": "ckpts/slat_flow_img2shape_dit_1_3B_512_bf16",
|
| 9 |
+
"shape_slat_flow_model_1024": "ckpts/slat_flow_img2shape_dit_1_3B_1024_bf16",
|
| 10 |
+
"tex_slat_decoder": "ckpts/tex_dec_next_dc_f16c32_fp16",
|
| 11 |
+
"tex_slat_flow_model_1024": "ckpts/slat_flow_imgshape2tex_dit_1_3B_1024_bf16"
|
| 12 |
+
},
|
| 13 |
+
"sparse_structure_sampler": {
|
| 14 |
+
"name": "FlowEulerGuidanceIntervalSampler",
|
| 15 |
+
"args": {
|
| 16 |
+
"sigma_min": 1e-5
|
| 17 |
+
},
|
| 18 |
+
"params": {
|
| 19 |
+
"steps": 12,
|
| 20 |
+
"guidance_strength": 7.5,
|
| 21 |
+
"guidance_rescale": 0.7,
|
| 22 |
+
"guidance_interval": [0.6, 1.0],
|
| 23 |
+
"rescale_t": 5.0
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
"shape_slat_sampler": {
|
| 27 |
+
"name": "FlowEulerGuidanceIntervalSampler",
|
| 28 |
+
"args": {
|
| 29 |
+
"sigma_min": 1e-5
|
| 30 |
+
},
|
| 31 |
+
"params": {
|
| 32 |
+
"steps": 12,
|
| 33 |
+
"guidance_strength": 7.5,
|
| 34 |
+
"guidance_rescale": 0.5,
|
| 35 |
+
"guidance_interval": [0.6, 1.0],
|
| 36 |
+
"rescale_t": 3.0
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"shape_slat_normalization": {
|
| 40 |
+
"mean": [
|
| 41 |
+
0.781296, 0.018091, -0.495192, -0.558457, 1.060530, 0.093252, 1.518149, -0.933218,
|
| 42 |
+
-0.732996, 2.604095, -0.118341, -2.143904, 0.495076, -2.179512, -2.130751, -0.996944,
|
| 43 |
+
0.261421, -2.217463, 1.260067, -0.150213, 3.790713, 1.481266, -1.046058, -1.523667,
|
| 44 |
+
-0.059621, 2.220780, 1.621212, 0.877230, 0.567247, -3.175944, -3.186688, 1.578665
|
| 45 |
+
],
|
| 46 |
+
"std": [
|
| 47 |
+
5.972266, 4.706852, 5.445010, 5.209927, 5.320220, 4.547237, 5.020802, 5.444004,
|
| 48 |
+
5.226681, 5.683095, 4.831436, 5.286469, 5.652043, 5.367606, 5.525084, 4.730578,
|
| 49 |
+
4.805265, 5.124013, 5.530808, 5.619001, 5.103930, 5.417670, 5.269677, 5.547194,
|
| 50 |
+
5.634698, 5.235274, 6.110351, 5.511298, 6.237273, 4.879207, 5.347008, 5.405691
|
| 51 |
+
]
|
| 52 |
+
},
|
| 53 |
+
"tex_slat_sampler": {
|
| 54 |
+
"name": "FlowEulerGuidanceIntervalSampler",
|
| 55 |
+
"args": {
|
| 56 |
+
"sigma_min": 1e-5
|
| 57 |
+
},
|
| 58 |
+
"params": {
|
| 59 |
+
"steps": 12,
|
| 60 |
+
"guidance_strength": 1.0,
|
| 61 |
+
"guidance_rescale": 0.0,
|
| 62 |
+
"guidance_interval": [0.6, 0.9],
|
| 63 |
+
"rescale_t": 3.0
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
"tex_slat_normalization": {
|
| 67 |
+
"mean": [
|
| 68 |
+
3.501659, 2.212398, 2.226094, 0.251093, -0.026248, -0.687364, 0.439898, -0.928075,
|
| 69 |
+
0.029398, -0.339596, -0.869527, 1.038479, -0.972385, 0.126042, -1.129303, 0.455149,
|
| 70 |
+
-1.209521, 2.069067, 0.544735, 2.569128, -0.323407, 2.293000, -1.925608, -1.217717,
|
| 71 |
+
1.213905, 0.971588, -0.023631, 0.106750, 2.021786, 0.250524, -0.662387, -0.768862
|
| 72 |
+
],
|
| 73 |
+
"std": [
|
| 74 |
+
2.665652, 2.743913, 2.765121, 2.595319, 3.037293, 2.291316, 2.144656, 2.911822,
|
| 75 |
+
2.969419, 2.501689, 2.154811, 3.163343, 2.621215, 2.381943, 3.186697, 3.021588,
|
| 76 |
+
2.295916, 3.234985, 3.233086, 2.260140, 2.874801, 2.810596, 3.292720, 2.674999,
|
| 77 |
+
2.680878, 2.372054, 2.451546, 2.353556, 2.995195, 2.379849, 2.786195, 2.775190
|
| 78 |
+
]
|
| 79 |
+
},
|
| 80 |
+
"image_cond_model": {
|
| 81 |
+
"name": "DinoV3FeatureExtractor",
|
| 82 |
+
"args": {
|
| 83 |
+
"model_name": "facebook/dinov3-vitl16-pretrain-lvd1689m"
|
| 84 |
+
}
|
| 85 |
+
},
|
| 86 |
+
"rembg_model": {
|
| 87 |
+
"name": "BiRefNet",
|
| 88 |
+
"args": {
|
| 89 |
+
"model_name": "briaai/RMBG-2.0"
|
| 90 |
+
}
|
| 91 |
+
},
|
| 92 |
+
"default_pipeline_type": "1536_cascade"
|
| 93 |
+
}
|
| 94 |
+
}
|