Spaces:
Running on Zero
Running on Zero
Update ActionMesh space
Browse files- .gitattributes +1 -0
- app.py +21 -25
- blender-3.5.1-linux-x64.tar.xz → assets/eagle.gif +2 -2
- assets/eagle/00.png +0 -0
- assets/eagle/01.png +0 -0
- assets/eagle/02.png +0 -0
- assets/eagle/03.png +0 -0
- assets/eagle/04.png +0 -0
- assets/eagle/05.png +0 -0
- assets/eagle/06.png +0 -0
- assets/eagle/07.png +0 -0
- assets/eagle/08.png +0 -0
- assets/eagle/09.png +0 -0
- assets/eagle/10.png +0 -0
- assets/eagle/11.png +0 -0
- assets/eagle/12.png +0 -0
- assets/eagle/13.png +0 -0
- assets/eagle/14.png +0 -0
- assets/eagle/15.png +0 -0
- gradio_pipeline.py +40 -26
.gitattributes
CHANGED
|
@@ -148,6 +148,7 @@ assets/davis_flamingo/10.png filter=lfs diff=lfs merge=lfs -text
|
|
| 148 |
assets/davis_flamingo/11.png filter=lfs diff=lfs merge=lfs -text
|
| 149 |
assets/davis_flamingo/12.png filter=lfs diff=lfs merge=lfs -text
|
| 150 |
assets/davis_flamingo.gif filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 151 |
assets/kangaroo/00.png filter=lfs diff=lfs merge=lfs -text
|
| 152 |
assets/kangaroo/01.png filter=lfs diff=lfs merge=lfs -text
|
| 153 |
assets/kangaroo/02.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 148 |
assets/davis_flamingo/11.png filter=lfs diff=lfs merge=lfs -text
|
| 149 |
assets/davis_flamingo/12.png filter=lfs diff=lfs merge=lfs -text
|
| 150 |
assets/davis_flamingo.gif filter=lfs diff=lfs merge=lfs -text
|
| 151 |
+
assets/eagle.gif filter=lfs diff=lfs merge=lfs -text
|
| 152 |
assets/kangaroo/00.png filter=lfs diff=lfs merge=lfs -text
|
| 153 |
assets/kangaroo/01.png filter=lfs diff=lfs merge=lfs -text
|
| 154 |
assets/kangaroo/02.png filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -104,7 +104,6 @@ def setup_blender() -> Path:
|
|
| 104 |
shutil.rmtree(blender_dir)
|
| 105 |
raise RuntimeError(f"Failed to extract Blender: {e}")
|
| 106 |
|
| 107 |
-
|
| 108 |
# Verify installation
|
| 109 |
if not blender_executable.exists():
|
| 110 |
raise RuntimeError(f"Blender executable not found: {blender_executable}")
|
|
@@ -499,19 +498,14 @@ def create_demo() -> gr.Blocks:
|
|
| 499 |
# 🎬 ActionMesh: Video to Animated 3D Mesh
|
| 500 |
|
| 501 |
[**Project Page**](https://remysabathier.github.io/actionmesh/) · [**GitHub**](https://github.com/facebookresearch/ActionMesh)
|
| 502 |
-
[Remy Sabathier](https://remysabathier.github.io/RemySabathier/), [David Novotny](https://d-novotny.github.io/), [Niloy J. Mitra](
|
| 503 |
**[Meta Reality Labs](https://ai.facebook.com/research/)** · **[SpAItial](https://www.spaitial.ai/)** · **[University College London](https://geometry.cs.ucl.ac.uk/)**
|
| 504 |
|
| 505 |
Generate animated 3D meshes from video input using ActionMesh.
|
| 506 |
|
| 507 |
-
**Instructions:**
|
| 508 |
-
|
| 509 |
-
2. Click "Generate"
|
| 510 |
-
3. View the animated 4D mesh in the viewer
|
| 511 |
-
4. Download the animated GLB mesh (ready for Blender)
|
| 512 |
|
| 513 |
-
⏱️ **Performance:** Inference on HuggingFace Space (ZeroGPU) is 2x slower than running locally.
|
| 514 |
-
We recommend **Fast mode** (90s). For faster inference, run [locally via GitHub](https://github.com/facebookresearch/ActionMesh).
|
| 515 |
"""
|
| 516 |
)
|
| 517 |
|
|
@@ -567,22 +561,23 @@ def create_demo() -> gr.Blocks:
|
|
| 567 |
info="⚡ Fast: ~90s, ✨ High Quality: ~3min30s",
|
| 568 |
)
|
| 569 |
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
|
|
|
| 578 |
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
|
| 587 |
generate_btn = gr.Button("🎬 Generate", variant="primary", size="lg")
|
| 588 |
|
|
@@ -679,7 +674,8 @@ def create_demo() -> gr.Blocks:
|
|
| 679 |
gr.Markdown(
|
| 680 |
"""
|
| 681 |
---
|
| 682 |
-
**
|
|
|
|
| 683 |
"""
|
| 684 |
)
|
| 685 |
|
|
|
|
| 104 |
shutil.rmtree(blender_dir)
|
| 105 |
raise RuntimeError(f"Failed to extract Blender: {e}")
|
| 106 |
|
|
|
|
| 107 |
# Verify installation
|
| 108 |
if not blender_executable.exists():
|
| 109 |
raise RuntimeError(f"Blender executable not found: {blender_executable}")
|
|
|
|
| 498 |
# 🎬 ActionMesh: Video to Animated 3D Mesh
|
| 499 |
|
| 500 |
[**Project Page**](https://remysabathier.github.io/actionmesh/) · [**GitHub**](https://github.com/facebookresearch/ActionMesh)
|
| 501 |
+
[Remy Sabathier](https://remysabathier.github.io/RemySabathier/), [David Novotny](https://d-novotny.github.io/), [Niloy J. Mitra](https://geometry.cs.ucl.ac.uk/), [Tom Monnier](https://tmonnier.com/)
|
| 502 |
**[Meta Reality Labs](https://ai.facebook.com/research/)** · **[SpAItial](https://www.spaitial.ai/)** · **[University College London](https://geometry.cs.ucl.ac.uk/)**
|
| 503 |
|
| 504 |
Generate animated 3D meshes from video input using ActionMesh.
|
| 505 |
|
| 506 |
+
**Instructions:** Upload video/images → Click "Generate" → Download animated 4D mesh (.GLB, Blender-ready)
|
| 507 |
+
⚠️ *Input limited to 16 frames. Extra frames discarded.*
|
|
|
|
|
|
|
|
|
|
| 508 |
|
|
|
|
|
|
|
| 509 |
"""
|
| 510 |
)
|
| 511 |
|
|
|
|
| 561 |
info="⚡ Fast: ~90s, ✨ High Quality: ~3min30s",
|
| 562 |
)
|
| 563 |
|
| 564 |
+
with gr.Accordion("More Settings", open=False):
|
| 565 |
+
reference_frame = gr.Slider(
|
| 566 |
+
minimum=1,
|
| 567 |
+
maximum=16,
|
| 568 |
+
value=1,
|
| 569 |
+
step=1,
|
| 570 |
+
label="Reference Frame",
|
| 571 |
+
info="Frame used as reference for 3D generation (1 recommended)",
|
| 572 |
+
)
|
| 573 |
|
| 574 |
+
seed = gr.Slider(
|
| 575 |
+
minimum=0,
|
| 576 |
+
maximum=100,
|
| 577 |
+
value=44,
|
| 578 |
+
step=1,
|
| 579 |
+
label="Random Seed",
|
| 580 |
+
)
|
| 581 |
|
| 582 |
generate_btn = gr.Button("🎬 Generate", variant="primary", size="lg")
|
| 583 |
|
|
|
|
| 674 |
gr.Markdown(
|
| 675 |
"""
|
| 676 |
---
|
| 677 |
+
⏱️ **Performance:** Inference on HuggingFace Space (ZeroGPU) is 2x slower than running locally.
|
| 678 |
+
We recommend **Fast mode** (90s). For faster inference, run [locally via GitHub](https://github.com/facebookresearch/ActionMesh).
|
| 679 |
"""
|
| 680 |
)
|
| 681 |
|
blender-3.5.1-linux-x64.tar.xz → assets/eagle.gif
RENAMED
|
File without changes
|
assets/eagle/00.png
ADDED
|
assets/eagle/01.png
ADDED
|
assets/eagle/02.png
ADDED
|
assets/eagle/03.png
ADDED
|
assets/eagle/04.png
ADDED
|
assets/eagle/05.png
ADDED
|
assets/eagle/06.png
ADDED
|
assets/eagle/07.png
ADDED
|
assets/eagle/08.png
ADDED
|
assets/eagle/09.png
ADDED
|
assets/eagle/10.png
ADDED
|
assets/eagle/11.png
ADDED
|
assets/eagle/12.png
ADDED
|
assets/eagle/13.png
ADDED
|
assets/eagle/14.png
ADDED
|
assets/eagle/15.png
ADDED
|
gradio_pipeline.py
CHANGED
|
@@ -44,7 +44,6 @@ class GradioPipeline(ActionMeshPipeline):
|
|
| 44 |
progress_callback: Optional[ProgressCallback] = None,
|
| 45 |
) -> list[trimesh.Trimesh]:
|
| 46 |
"""Generate an animated mesh sequence with progress tracking."""
|
| 47 |
-
# Apply parameter overrides
|
| 48 |
if stage_0_steps is not None:
|
| 49 |
self.cfg.model.image_to_3D_denoiser.num_inference_steps = stage_0_steps
|
| 50 |
if stage_1_steps is not None:
|
|
@@ -58,56 +57,71 @@ class GradioPipeline(ActionMeshPipeline):
|
|
| 58 |
if anchor_idx is not None:
|
| 59 |
self.cfg.anchor_idx = anchor_idx
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
# -- Preprocessing: remove background
|
|
|
|
| 62 |
input.frames = self.background_removal.process_images(input.frames)
|
|
|
|
| 63 |
|
| 64 |
# -- Preprocessing: grouped cropping & padding
|
| 65 |
input.frames = self.image_process.process_images(input.frames)
|
| 66 |
|
| 67 |
with torch.inference_mode():
|
| 68 |
# -- Stage 0: generate anchor 3D mesh & latent from single frame
|
|
|
|
| 69 |
latent_bank, mesh_bank = self.init_banks_from_anchor(input, seed)
|
|
|
|
| 70 |
|
| 71 |
if progress_callback is not None:
|
| 72 |
progress_callback(0.10, "Anchor 3D generated, starting Stage 1...")
|
| 73 |
|
| 74 |
-
#
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
)
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
f"Stage 1: step {step}/{total_steps} ",
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
# Stage 2 callback: 90% -> 100%
|
| 86 |
-
def stage2_callback(
|
| 87 |
-
step: int, total_steps: int, window_idx: int, total_windows: int
|
| 88 |
-
) -> None:
|
| 89 |
-
if progress_callback is not None:
|
| 90 |
-
window_progress = (window_idx + step / total_steps) / total_windows
|
| 91 |
-
progress_callback(
|
| 92 |
-
0.90 + 0.10 * window_progress,
|
| 93 |
-
f"Stage 2: step {step}/{total_steps} ",
|
| 94 |
-
)
|
| 95 |
-
|
| 96 |
-
with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
|
| 97 |
-
# -- Stage I: denoise synchronized 3D latents
|
| 98 |
latent_bank = self.generate_3d_latents(
|
| 99 |
input,
|
|
|
|
| 100 |
latent_bank=latent_bank,
|
| 101 |
seed=seed,
|
| 102 |
step_callback=stage1_callback,
|
| 103 |
)
|
|
|
|
| 104 |
|
| 105 |
-
|
|
|
|
|
|
|
| 106 |
mesh_bank = self.generate_mesh_animation(
|
| 107 |
latent_bank=latent_bank,
|
| 108 |
mesh_bank=mesh_bank,
|
| 109 |
step_callback=stage2_callback,
|
| 110 |
)
|
|
|
|
| 111 |
|
| 112 |
if progress_callback is not None:
|
| 113 |
progress_callback(1.0, "Pipeline complete!")
|
|
|
|
| 44 |
progress_callback: Optional[ProgressCallback] = None,
|
| 45 |
) -> list[trimesh.Trimesh]:
|
| 46 |
"""Generate an animated mesh sequence with progress tracking."""
|
|
|
|
| 47 |
if stage_0_steps is not None:
|
| 48 |
self.cfg.model.image_to_3D_denoiser.num_inference_steps = stage_0_steps
|
| 49 |
if stage_1_steps is not None:
|
|
|
|
| 57 |
if anchor_idx is not None:
|
| 58 |
self.cfg.anchor_idx = anchor_idx
|
| 59 |
|
| 60 |
+
# Stage 1 callback: 10% -> 90%
|
| 61 |
+
def stage1_callback(
|
| 62 |
+
step: int, total_steps: int, window_idx: int, total_windows: int
|
| 63 |
+
) -> None:
|
| 64 |
+
if progress_callback is not None:
|
| 65 |
+
window_progress = (window_idx + step / total_steps) / total_windows
|
| 66 |
+
progress_callback(
|
| 67 |
+
0.10 + 0.80 * window_progress,
|
| 68 |
+
f"Stage 1: step {step}/{total_steps} ",
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# Stage 2 callback: 90% -> 100%
|
| 72 |
+
def stage2_callback(
|
| 73 |
+
step: int, total_steps: int, window_idx: int, total_windows: int
|
| 74 |
+
) -> None:
|
| 75 |
+
if progress_callback is not None:
|
| 76 |
+
window_progress = (window_idx + step / total_steps) / total_windows
|
| 77 |
+
progress_callback(
|
| 78 |
+
0.90 + 0.10 * window_progress,
|
| 79 |
+
f"Stage 2: step {step}/{total_steps} ",
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
# -- Preprocessing: remove background
|
| 83 |
+
self._load_background_removal()
|
| 84 |
input.frames = self.background_removal.process_images(input.frames)
|
| 85 |
+
self._unload_model("background_removal")
|
| 86 |
|
| 87 |
# -- Preprocessing: grouped cropping & padding
|
| 88 |
input.frames = self.image_process.process_images(input.frames)
|
| 89 |
|
| 90 |
with torch.inference_mode():
|
| 91 |
# -- Stage 0: generate anchor 3D mesh & latent from single frame
|
| 92 |
+
self._load_image_to_3d()
|
| 93 |
latent_bank, mesh_bank = self.init_banks_from_anchor(input, seed)
|
| 94 |
+
self._unload_model("image_to_3d_pipe")
|
| 95 |
|
| 96 |
if progress_callback is not None:
|
| 97 |
progress_callback(0.10, "Anchor 3D generated, starting Stage 1...")
|
| 98 |
|
| 99 |
+
# -- Pre-compute context embeddings for all frames
|
| 100 |
+
self._load_image_encoder()
|
| 101 |
+
context = self.encode_all_frames(input)
|
| 102 |
+
self._unload_model("image_encoder")
|
| 103 |
+
|
| 104 |
+
# -- Stage I: denoise synchronized 3D latents
|
| 105 |
+
self._load_temporal_denoiser()
|
| 106 |
+
with torch.autocast(device_type="cuda", dtype=self._dtype):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
latent_bank = self.generate_3d_latents(
|
| 108 |
input,
|
| 109 |
+
context=context,
|
| 110 |
latent_bank=latent_bank,
|
| 111 |
seed=seed,
|
| 112 |
step_callback=stage1_callback,
|
| 113 |
)
|
| 114 |
+
self._unload_model("temporal_3D_denoiser")
|
| 115 |
|
| 116 |
+
self._load_temporal_vae()
|
| 117 |
+
# -- Stage II: decode latents into mesh displacements
|
| 118 |
+
with torch.autocast(device_type="cuda", dtype=self._dtype):
|
| 119 |
mesh_bank = self.generate_mesh_animation(
|
| 120 |
latent_bank=latent_bank,
|
| 121 |
mesh_bank=mesh_bank,
|
| 122 |
step_callback=stage2_callback,
|
| 123 |
)
|
| 124 |
+
self._unload_model("temporal_3D_vae")
|
| 125 |
|
| 126 |
if progress_callback is not None:
|
| 127 |
progress_callback(1.0, "Pipeline complete!")
|