Spaces:

facebook
/

ActionMesh

Running on Zero

App Files Files Community

Remy commited on Feb 2

Commit

b9913c3

verified ·

1 Parent(s): 74464c6

Update ActionMesh space

Browse files

Files changed (20) hide show

.gitattributes +1 -0
app.py +21 -25
blender-3.5.1-linux-x64.tar.xz → assets/eagle.gif +2 -2
assets/eagle/00.png +0 -0
assets/eagle/01.png +0 -0
assets/eagle/02.png +0 -0
assets/eagle/03.png +0 -0
assets/eagle/04.png +0 -0
assets/eagle/05.png +0 -0
assets/eagle/06.png +0 -0
assets/eagle/07.png +0 -0
assets/eagle/08.png +0 -0
assets/eagle/09.png +0 -0
assets/eagle/10.png +0 -0
assets/eagle/11.png +0 -0
assets/eagle/12.png +0 -0
assets/eagle/13.png +0 -0
assets/eagle/14.png +0 -0
assets/eagle/15.png +0 -0
gradio_pipeline.py +40 -26

.gitattributes CHANGED Viewed

@@ -148,6 +148,7 @@ assets/davis_flamingo/10.png filter=lfs diff=lfs merge=lfs -text
 assets/davis_flamingo/11.png filter=lfs diff=lfs merge=lfs -text
 assets/davis_flamingo/12.png filter=lfs diff=lfs merge=lfs -text
 assets/davis_flamingo.gif filter=lfs diff=lfs merge=lfs -text
 assets/kangaroo/00.png filter=lfs diff=lfs merge=lfs -text
 assets/kangaroo/01.png filter=lfs diff=lfs merge=lfs -text
 assets/kangaroo/02.png filter=lfs diff=lfs merge=lfs -text

 assets/davis_flamingo/11.png filter=lfs diff=lfs merge=lfs -text
 assets/davis_flamingo/12.png filter=lfs diff=lfs merge=lfs -text
 assets/davis_flamingo.gif filter=lfs diff=lfs merge=lfs -text
+assets/eagle.gif filter=lfs diff=lfs merge=lfs -text
 assets/kangaroo/00.png filter=lfs diff=lfs merge=lfs -text
 assets/kangaroo/01.png filter=lfs diff=lfs merge=lfs -text
 assets/kangaroo/02.png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -104,7 +104,6 @@ def setup_blender() -> Path:
             shutil.rmtree(blender_dir)
         raise RuntimeError(f"Failed to extract Blender: {e}")
     # Verify installation
     if not blender_executable.exists():
         raise RuntimeError(f"Blender executable not found: {blender_executable}")
@@ -499,19 +498,14 @@ def create_demo() -> gr.Blocks:
             # 🎬 ActionMesh: Video to Animated 3D Mesh
             [**Project Page**](https://remysabathier.github.io/actionmesh/) · [**GitHub**](https://github.com/facebookresearch/ActionMesh)
-            [Remy Sabathier](https://remysabathier.github.io/RemySabathier/), [David Novotny](https://d-novotny.github.io/), [Niloy J. Mitra](http://www0.cs.ucl.ac.uk/staff/n.mitra/), [Tom Monnier](https://tmonnier.com/)
             **[Meta Reality Labs](https://ai.facebook.com/research/)**  · **[SpAItial](https://www.spaitial.ai/)** · **[University College London](https://geometry.cs.ucl.ac.uk/)**
             Generate animated 3D meshes from video input using ActionMesh.
-            **Instructions:**
-            1. Upload a video OR multiple images ⚠️ *Input is limited to exactly 16 frames. Extra frames will be discarded.*
-            2. Click "Generate"
-            3. View the animated 4D mesh in the viewer
-            4. Download the animated GLB mesh (ready for Blender)
-            ⏱️ **Performance:** Inference on HuggingFace Space (ZeroGPU) is 2x slower than running locally.
-            We recommend **Fast mode** (90s). For faster inference, run [locally via GitHub](https://github.com/facebookresearch/ActionMesh).
             """
         )
@@ -567,22 +561,23 @@ def create_demo() -> gr.Blocks:
                     info="⚡ Fast: ~90s, ✨ High Quality: ~3min30s",
                 )
-                reference_frame = gr.Slider(
-                    minimum=1,
-                    maximum=16,
-                    value=1,
-                    step=1,
-                    label="Reference Frame",
-                    info="Frame used as reference for 3D generation (1 recommended)",
-                )
-                seed = gr.Slider(
-                    minimum=0,
-                    maximum=100,
-                    value=44,
-                    step=1,
-                    label="Random Seed",
-                )
                 generate_btn = gr.Button("🎬 Generate", variant="primary", size="lg")
@@ -679,7 +674,8 @@ def create_demo() -> gr.Blocks:
         gr.Markdown(
             """
             ---
-            **Note:** This demo requires a GPU with sufficient VRAM.
             """
         )

             shutil.rmtree(blender_dir)
         raise RuntimeError(f"Failed to extract Blender: {e}")
     # Verify installation
     if not blender_executable.exists():
         raise RuntimeError(f"Blender executable not found: {blender_executable}")
             # 🎬 ActionMesh: Video to Animated 3D Mesh
             [**Project Page**](https://remysabathier.github.io/actionmesh/) · [**GitHub**](https://github.com/facebookresearch/ActionMesh)
+            [Remy Sabathier](https://remysabathier.github.io/RemySabathier/), [David Novotny](https://d-novotny.github.io/), [Niloy J. Mitra](https://geometry.cs.ucl.ac.uk/), [Tom Monnier](https://tmonnier.com/)
             **[Meta Reality Labs](https://ai.facebook.com/research/)**  · **[SpAItial](https://www.spaitial.ai/)** · **[University College London](https://geometry.cs.ucl.ac.uk/)**
             Generate animated 3D meshes from video input using ActionMesh.
+            **Instructions:** Upload video/images → Click "Generate" → Download animated 4D mesh (.GLB, Blender-ready)
+            ⚠️ *Input limited to 16 frames. Extra frames discarded.*
             """
         )
                     info="⚡ Fast: ~90s, ✨ High Quality: ~3min30s",
                 )
+                with gr.Accordion("More Settings", open=False):
+                    reference_frame = gr.Slider(
+                        minimum=1,
+                        maximum=16,
+                        value=1,
+                        step=1,
+                        label="Reference Frame",
+                        info="Frame used as reference for 3D generation (1 recommended)",
+                    )
+                    seed = gr.Slider(
+                        minimum=0,
+                        maximum=100,
+                        value=44,
+                        step=1,
+                        label="Random Seed",
+                    )
                 generate_btn = gr.Button("🎬 Generate", variant="primary", size="lg")
         gr.Markdown(
             """
             ---
+            ⏱️ **Performance:** Inference on HuggingFace Space (ZeroGPU) is 2x slower than running locally.
+            We recommend **Fast mode** (90s). For faster inference, run [locally via GitHub](https://github.com/facebookresearch/ActionMesh).
             """
         )

blender-3.5.1-linux-x64.tar.xz → assets/eagle.gif RENAMED Viewed

File without changes

assets/eagle/00.png ADDED Viewed

assets/eagle/01.png ADDED Viewed

assets/eagle/02.png ADDED Viewed

assets/eagle/03.png ADDED Viewed

assets/eagle/04.png ADDED Viewed

assets/eagle/05.png ADDED Viewed

assets/eagle/06.png ADDED Viewed

assets/eagle/07.png ADDED Viewed

assets/eagle/08.png ADDED Viewed

assets/eagle/09.png ADDED Viewed

assets/eagle/10.png ADDED Viewed

assets/eagle/11.png ADDED Viewed

assets/eagle/12.png ADDED Viewed

assets/eagle/13.png ADDED Viewed

assets/eagle/14.png ADDED Viewed

assets/eagle/15.png ADDED Viewed

gradio_pipeline.py CHANGED Viewed

@@ -44,7 +44,6 @@ class GradioPipeline(ActionMeshPipeline):
         progress_callback: Optional[ProgressCallback] = None,
     ) -> list[trimesh.Trimesh]:
         """Generate an animated mesh sequence with progress tracking."""
-        # Apply parameter overrides
         if stage_0_steps is not None:
             self.cfg.model.image_to_3D_denoiser.num_inference_steps = stage_0_steps
         if stage_1_steps is not None:
@@ -58,56 +57,71 @@ class GradioPipeline(ActionMeshPipeline):
         if anchor_idx is not None:
             self.cfg.anchor_idx = anchor_idx
         # -- Preprocessing: remove background
         input.frames = self.background_removal.process_images(input.frames)
         # -- Preprocessing: grouped cropping & padding
         input.frames = self.image_process.process_images(input.frames)
         with torch.inference_mode():
             # -- Stage 0: generate anchor 3D mesh & latent from single frame
             latent_bank, mesh_bank = self.init_banks_from_anchor(input, seed)
             if progress_callback is not None:
                 progress_callback(0.10, "Anchor 3D generated, starting Stage 1...")
-            # Stage 1 callback: 10% -> 90%
-            def stage1_callback(
-                step: int, total_steps: int, window_idx: int, total_windows: int
-            ) -> None:
-                if progress_callback is not None:
-                    window_progress = (window_idx + step / total_steps) / total_windows
-                    progress_callback(
-                        0.10 + 0.80 * window_progress,
-                        f"Stage 1: step {step}/{total_steps} ",
-                    )
-            # Stage 2 callback: 90% -> 100%
-            def stage2_callback(
-                step: int, total_steps: int, window_idx: int, total_windows: int
-            ) -> None:
-                if progress_callback is not None:
-                    window_progress = (window_idx + step / total_steps) / total_windows
-                    progress_callback(
-                        0.90 + 0.10 * window_progress,
-                        f"Stage 2: step {step}/{total_steps} ",
-                    )
-            with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
-                # -- Stage I: denoise synchronized 3D latents
                 latent_bank = self.generate_3d_latents(
                     input,
                     latent_bank=latent_bank,
                     seed=seed,
                     step_callback=stage1_callback,
                 )
-                # -- Stage II: decode latents into mesh displacements
                 mesh_bank = self.generate_mesh_animation(
                     latent_bank=latent_bank,
                     mesh_bank=mesh_bank,
                     step_callback=stage2_callback,
                 )
             if progress_callback is not None:
                 progress_callback(1.0, "Pipeline complete!")

         progress_callback: Optional[ProgressCallback] = None,
     ) -> list[trimesh.Trimesh]:
         """Generate an animated mesh sequence with progress tracking."""
         if stage_0_steps is not None:
             self.cfg.model.image_to_3D_denoiser.num_inference_steps = stage_0_steps
         if stage_1_steps is not None:
         if anchor_idx is not None:
             self.cfg.anchor_idx = anchor_idx
+        # Stage 1 callback: 10% -> 90%
+        def stage1_callback(
+            step: int, total_steps: int, window_idx: int, total_windows: int
+        ) -> None:
+            if progress_callback is not None:
+                window_progress = (window_idx + step / total_steps) / total_windows
+                progress_callback(
+                    0.10 + 0.80 * window_progress,
+                    f"Stage 1: step {step}/{total_steps} ",
+                )
+        # Stage 2 callback: 90% -> 100%
+        def stage2_callback(
+            step: int, total_steps: int, window_idx: int, total_windows: int
+        ) -> None:
+            if progress_callback is not None:
+                window_progress = (window_idx + step / total_steps) / total_windows
+                progress_callback(
+                    0.90 + 0.10 * window_progress,
+                    f"Stage 2: step {step}/{total_steps} ",
+                )
         # -- Preprocessing: remove background
+        self._load_background_removal()
         input.frames = self.background_removal.process_images(input.frames)
+        self._unload_model("background_removal")
         # -- Preprocessing: grouped cropping & padding
         input.frames = self.image_process.process_images(input.frames)
         with torch.inference_mode():
             # -- Stage 0: generate anchor 3D mesh & latent from single frame
+            self._load_image_to_3d()
             latent_bank, mesh_bank = self.init_banks_from_anchor(input, seed)
+            self._unload_model("image_to_3d_pipe")
             if progress_callback is not None:
                 progress_callback(0.10, "Anchor 3D generated, starting Stage 1...")
+            # -- Pre-compute context embeddings for all frames
+            self._load_image_encoder()
+            context = self.encode_all_frames(input)
+            self._unload_model("image_encoder")
+            # -- Stage I: denoise synchronized 3D latents
+            self._load_temporal_denoiser()
+            with torch.autocast(device_type="cuda", dtype=self._dtype):
                 latent_bank = self.generate_3d_latents(
                     input,
+                    context=context,
                     latent_bank=latent_bank,
                     seed=seed,
                     step_callback=stage1_callback,
                 )
+            self._unload_model("temporal_3D_denoiser")
+            self._load_temporal_vae()
+            # -- Stage II: decode latents into mesh displacements
+            with torch.autocast(device_type="cuda", dtype=self._dtype):
                 mesh_bank = self.generate_mesh_animation(
                     latent_bank=latent_bank,
                     mesh_bank=mesh_bank,
                     step_callback=stage2_callback,
                 )
+            self._unload_model("temporal_3D_vae")
             if progress_callback is not None:
                 progress_callback(1.0, "Pipeline complete!")