Remy commited on
Commit
b9913c3
·
verified ·
1 Parent(s): 74464c6

Update ActionMesh space

Browse files
.gitattributes CHANGED
@@ -148,6 +148,7 @@ assets/davis_flamingo/10.png filter=lfs diff=lfs merge=lfs -text
148
  assets/davis_flamingo/11.png filter=lfs diff=lfs merge=lfs -text
149
  assets/davis_flamingo/12.png filter=lfs diff=lfs merge=lfs -text
150
  assets/davis_flamingo.gif filter=lfs diff=lfs merge=lfs -text
 
151
  assets/kangaroo/00.png filter=lfs diff=lfs merge=lfs -text
152
  assets/kangaroo/01.png filter=lfs diff=lfs merge=lfs -text
153
  assets/kangaroo/02.png filter=lfs diff=lfs merge=lfs -text
 
148
  assets/davis_flamingo/11.png filter=lfs diff=lfs merge=lfs -text
149
  assets/davis_flamingo/12.png filter=lfs diff=lfs merge=lfs -text
150
  assets/davis_flamingo.gif filter=lfs diff=lfs merge=lfs -text
151
+ assets/eagle.gif filter=lfs diff=lfs merge=lfs -text
152
  assets/kangaroo/00.png filter=lfs diff=lfs merge=lfs -text
153
  assets/kangaroo/01.png filter=lfs diff=lfs merge=lfs -text
154
  assets/kangaroo/02.png filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -104,7 +104,6 @@ def setup_blender() -> Path:
104
  shutil.rmtree(blender_dir)
105
  raise RuntimeError(f"Failed to extract Blender: {e}")
106
 
107
-
108
  # Verify installation
109
  if not blender_executable.exists():
110
  raise RuntimeError(f"Blender executable not found: {blender_executable}")
@@ -499,19 +498,14 @@ def create_demo() -> gr.Blocks:
499
  # 🎬 ActionMesh: Video to Animated 3D Mesh
500
 
501
  [**Project Page**](https://remysabathier.github.io/actionmesh/) · [**GitHub**](https://github.com/facebookresearch/ActionMesh)
502
- [Remy Sabathier](https://remysabathier.github.io/RemySabathier/), [David Novotny](https://d-novotny.github.io/), [Niloy J. Mitra](http://www0.cs.ucl.ac.uk/staff/n.mitra/), [Tom Monnier](https://tmonnier.com/)
503
  **[Meta Reality Labs](https://ai.facebook.com/research/)** · **[SpAItial](https://www.spaitial.ai/)** · **[University College London](https://geometry.cs.ucl.ac.uk/)**
504
 
505
  Generate animated 3D meshes from video input using ActionMesh.
506
 
507
- **Instructions:**
508
- 1. Upload a video OR multiple images ⚠️ *Input is limited to exactly 16 frames. Extra frames will be discarded.*
509
- 2. Click "Generate"
510
- 3. View the animated 4D mesh in the viewer
511
- 4. Download the animated GLB mesh (ready for Blender)
512
 
513
- ⏱️ **Performance:** Inference on HuggingFace Space (ZeroGPU) is 2x slower than running locally.
514
- We recommend **Fast mode** (90s). For faster inference, run [locally via GitHub](https://github.com/facebookresearch/ActionMesh).
515
  """
516
  )
517
 
@@ -567,22 +561,23 @@ def create_demo() -> gr.Blocks:
567
  info="⚡ Fast: ~90s, ✨ High Quality: ~3min30s",
568
  )
569
 
570
- reference_frame = gr.Slider(
571
- minimum=1,
572
- maximum=16,
573
- value=1,
574
- step=1,
575
- label="Reference Frame",
576
- info="Frame used as reference for 3D generation (1 recommended)",
577
- )
 
578
 
579
- seed = gr.Slider(
580
- minimum=0,
581
- maximum=100,
582
- value=44,
583
- step=1,
584
- label="Random Seed",
585
- )
586
 
587
  generate_btn = gr.Button("🎬 Generate", variant="primary", size="lg")
588
 
@@ -679,7 +674,8 @@ def create_demo() -> gr.Blocks:
679
  gr.Markdown(
680
  """
681
  ---
682
- **Note:** This demo requires a GPU with sufficient VRAM.
 
683
  """
684
  )
685
 
 
104
  shutil.rmtree(blender_dir)
105
  raise RuntimeError(f"Failed to extract Blender: {e}")
106
 
 
107
  # Verify installation
108
  if not blender_executable.exists():
109
  raise RuntimeError(f"Blender executable not found: {blender_executable}")
 
498
  # 🎬 ActionMesh: Video to Animated 3D Mesh
499
 
500
  [**Project Page**](https://remysabathier.github.io/actionmesh/) · [**GitHub**](https://github.com/facebookresearch/ActionMesh)
501
+ [Remy Sabathier](https://remysabathier.github.io/RemySabathier/), [David Novotny](https://d-novotny.github.io/), [Niloy J. Mitra](https://geometry.cs.ucl.ac.uk/), [Tom Monnier](https://tmonnier.com/)
502
  **[Meta Reality Labs](https://ai.facebook.com/research/)** · **[SpAItial](https://www.spaitial.ai/)** · **[University College London](https://geometry.cs.ucl.ac.uk/)**
503
 
504
  Generate animated 3D meshes from video input using ActionMesh.
505
 
506
+ **Instructions:** Upload video/images → Click "Generate" → Download animated 4D mesh (.GLB, Blender-ready)
507
+ ⚠️ *Input limited to 16 frames. Extra frames discarded.*
 
 
 
508
 
 
 
509
  """
510
  )
511
 
 
561
  info="⚡ Fast: ~90s, ✨ High Quality: ~3min30s",
562
  )
563
 
564
+ with gr.Accordion("More Settings", open=False):
565
+ reference_frame = gr.Slider(
566
+ minimum=1,
567
+ maximum=16,
568
+ value=1,
569
+ step=1,
570
+ label="Reference Frame",
571
+ info="Frame used as reference for 3D generation (1 recommended)",
572
+ )
573
 
574
+ seed = gr.Slider(
575
+ minimum=0,
576
+ maximum=100,
577
+ value=44,
578
+ step=1,
579
+ label="Random Seed",
580
+ )
581
 
582
  generate_btn = gr.Button("🎬 Generate", variant="primary", size="lg")
583
 
 
674
  gr.Markdown(
675
  """
676
  ---
677
+ ⏱️ **Performance:** Inference on HuggingFace Space (ZeroGPU) is 2x slower than running locally.
678
+ We recommend **Fast mode** (90s). For faster inference, run [locally via GitHub](https://github.com/facebookresearch/ActionMesh).
679
  """
680
  )
681
 
blender-3.5.1-linux-x64.tar.xz → assets/eagle.gif RENAMED
File without changes
assets/eagle/00.png ADDED
assets/eagle/01.png ADDED
assets/eagle/02.png ADDED
assets/eagle/03.png ADDED
assets/eagle/04.png ADDED
assets/eagle/05.png ADDED
assets/eagle/06.png ADDED
assets/eagle/07.png ADDED
assets/eagle/08.png ADDED
assets/eagle/09.png ADDED
assets/eagle/10.png ADDED
assets/eagle/11.png ADDED
assets/eagle/12.png ADDED
assets/eagle/13.png ADDED
assets/eagle/14.png ADDED
assets/eagle/15.png ADDED
gradio_pipeline.py CHANGED
@@ -44,7 +44,6 @@ class GradioPipeline(ActionMeshPipeline):
44
  progress_callback: Optional[ProgressCallback] = None,
45
  ) -> list[trimesh.Trimesh]:
46
  """Generate an animated mesh sequence with progress tracking."""
47
- # Apply parameter overrides
48
  if stage_0_steps is not None:
49
  self.cfg.model.image_to_3D_denoiser.num_inference_steps = stage_0_steps
50
  if stage_1_steps is not None:
@@ -58,56 +57,71 @@ class GradioPipeline(ActionMeshPipeline):
58
  if anchor_idx is not None:
59
  self.cfg.anchor_idx = anchor_idx
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # -- Preprocessing: remove background
 
62
  input.frames = self.background_removal.process_images(input.frames)
 
63
 
64
  # -- Preprocessing: grouped cropping & padding
65
  input.frames = self.image_process.process_images(input.frames)
66
 
67
  with torch.inference_mode():
68
  # -- Stage 0: generate anchor 3D mesh & latent from single frame
 
69
  latent_bank, mesh_bank = self.init_banks_from_anchor(input, seed)
 
70
 
71
  if progress_callback is not None:
72
  progress_callback(0.10, "Anchor 3D generated, starting Stage 1...")
73
 
74
- # Stage 1 callback: 10% -> 90%
75
- def stage1_callback(
76
- step: int, total_steps: int, window_idx: int, total_windows: int
77
- ) -> None:
78
- if progress_callback is not None:
79
- window_progress = (window_idx + step / total_steps) / total_windows
80
- progress_callback(
81
- 0.10 + 0.80 * window_progress,
82
- f"Stage 1: step {step}/{total_steps} ",
83
- )
84
-
85
- # Stage 2 callback: 90% -> 100%
86
- def stage2_callback(
87
- step: int, total_steps: int, window_idx: int, total_windows: int
88
- ) -> None:
89
- if progress_callback is not None:
90
- window_progress = (window_idx + step / total_steps) / total_windows
91
- progress_callback(
92
- 0.90 + 0.10 * window_progress,
93
- f"Stage 2: step {step}/{total_steps} ",
94
- )
95
-
96
- with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
97
- # -- Stage I: denoise synchronized 3D latents
98
  latent_bank = self.generate_3d_latents(
99
  input,
 
100
  latent_bank=latent_bank,
101
  seed=seed,
102
  step_callback=stage1_callback,
103
  )
 
104
 
105
- # -- Stage II: decode latents into mesh displacements
 
 
106
  mesh_bank = self.generate_mesh_animation(
107
  latent_bank=latent_bank,
108
  mesh_bank=mesh_bank,
109
  step_callback=stage2_callback,
110
  )
 
111
 
112
  if progress_callback is not None:
113
  progress_callback(1.0, "Pipeline complete!")
 
44
  progress_callback: Optional[ProgressCallback] = None,
45
  ) -> list[trimesh.Trimesh]:
46
  """Generate an animated mesh sequence with progress tracking."""
 
47
  if stage_0_steps is not None:
48
  self.cfg.model.image_to_3D_denoiser.num_inference_steps = stage_0_steps
49
  if stage_1_steps is not None:
 
57
  if anchor_idx is not None:
58
  self.cfg.anchor_idx = anchor_idx
59
 
60
+ # Stage 1 callback: 10% -> 90%
61
+ def stage1_callback(
62
+ step: int, total_steps: int, window_idx: int, total_windows: int
63
+ ) -> None:
64
+ if progress_callback is not None:
65
+ window_progress = (window_idx + step / total_steps) / total_windows
66
+ progress_callback(
67
+ 0.10 + 0.80 * window_progress,
68
+ f"Stage 1: step {step}/{total_steps} ",
69
+ )
70
+
71
+ # Stage 2 callback: 90% -> 100%
72
+ def stage2_callback(
73
+ step: int, total_steps: int, window_idx: int, total_windows: int
74
+ ) -> None:
75
+ if progress_callback is not None:
76
+ window_progress = (window_idx + step / total_steps) / total_windows
77
+ progress_callback(
78
+ 0.90 + 0.10 * window_progress,
79
+ f"Stage 2: step {step}/{total_steps} ",
80
+ )
81
+
82
  # -- Preprocessing: remove background
83
+ self._load_background_removal()
84
  input.frames = self.background_removal.process_images(input.frames)
85
+ self._unload_model("background_removal")
86
 
87
  # -- Preprocessing: grouped cropping & padding
88
  input.frames = self.image_process.process_images(input.frames)
89
 
90
  with torch.inference_mode():
91
  # -- Stage 0: generate anchor 3D mesh & latent from single frame
92
+ self._load_image_to_3d()
93
  latent_bank, mesh_bank = self.init_banks_from_anchor(input, seed)
94
+ self._unload_model("image_to_3d_pipe")
95
 
96
  if progress_callback is not None:
97
  progress_callback(0.10, "Anchor 3D generated, starting Stage 1...")
98
 
99
+ # -- Pre-compute context embeddings for all frames
100
+ self._load_image_encoder()
101
+ context = self.encode_all_frames(input)
102
+ self._unload_model("image_encoder")
103
+
104
+ # -- Stage I: denoise synchronized 3D latents
105
+ self._load_temporal_denoiser()
106
+ with torch.autocast(device_type="cuda", dtype=self._dtype):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  latent_bank = self.generate_3d_latents(
108
  input,
109
+ context=context,
110
  latent_bank=latent_bank,
111
  seed=seed,
112
  step_callback=stage1_callback,
113
  )
114
+ self._unload_model("temporal_3D_denoiser")
115
 
116
+ self._load_temporal_vae()
117
+ # -- Stage II: decode latents into mesh displacements
118
+ with torch.autocast(device_type="cuda", dtype=self._dtype):
119
  mesh_bank = self.generate_mesh_animation(
120
  latent_bank=latent_bank,
121
  mesh_bank=mesh_bank,
122
  step_callback=stage2_callback,
123
  )
124
+ self._unload_model("temporal_3D_vae")
125
 
126
  if progress_callback is not None:
127
  progress_callback(1.0, "Pipeline complete!")