dennny123 commited on
Commit
3e5df04
·
verified ·
1 Parent(s): bd30ee0

Make Space video-only and fix preview export shape handling

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +13 -50
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: LingBot 3D
3
- short_description: Streaming 3D scene reconstruction from short clips.
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
 
1
  ---
2
  title: LingBot 3D
3
+ short_description: Streaming 3D scene reconstruction from short videos.
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
app.py CHANGED
@@ -7,7 +7,7 @@ import threading
7
  import time
8
  import zipfile
9
  from pathlib import Path
10
- from typing import Any, Iterable
11
 
12
  import cv2
13
  import gradio as gr
@@ -142,21 +142,6 @@ def _eager_load_default_model() -> None:
142
  STARTUP_NOTES.append(f"Startup preload failed: {exc}")
143
 
144
 
145
- def _copy_image_inputs(image_files: Iterable[Any], input_dir: Path, max_frames: int) -> list[str]:
146
- paths = sorted(filter(None, (_resolve_path(item) for item in image_files)), key=lambda value: Path(value).name)
147
- if not paths:
148
- return []
149
-
150
- copied = []
151
- for idx, src_path in enumerate(paths[:max_frames]):
152
- src = Path(src_path)
153
- suffix = src.suffix.lower() or ".png"
154
- dest = input_dir / f"{idx:06d}{suffix}"
155
- shutil.copy2(src, dest)
156
- copied.append(str(dest))
157
- return copied
158
-
159
-
160
  def _extract_video_frames(video_file: str, frames_dir: Path, fps: int, max_frames: int) -> tuple[list[str], dict[str, Any]]:
161
  cap = cv2.VideoCapture(video_file)
162
  if not cap.isOpened():
@@ -187,27 +172,18 @@ def _extract_video_frames(video_file: str, frames_dir: Path, fps: int, max_frame
187
  }
188
 
189
 
190
- def _prepare_inputs(image_files: list[Any], video_file: Any, fps: int, max_frames: int) -> tuple[torch.Tensor, list[str], Path, dict[str, Any]]:
191
  _cleanup_old_runs()
192
  work_dir = Path(tempfile.mkdtemp(prefix="lingbot-map-", dir=OUTPUT_ROOT))
193
  input_dir = work_dir / "inputs"
194
  input_dir.mkdir(parents=True, exist_ok=True)
195
 
196
- image_paths = _copy_image_inputs(image_files or [], input_dir, max_frames=max_frames)
197
- input_summary = {"input_mode": None}
198
-
199
- if image_paths:
200
- input_summary["input_mode"] = "images"
201
- input_summary["source_fps"] = None
202
- input_summary["sample_interval"] = None
203
- input_summary["original_frame_count"] = len(image_paths)
204
- else:
205
- video_path = _resolve_path(video_file)
206
- if not video_path:
207
- raise ValueError("Upload either ordered images or a video.")
208
- image_paths, video_summary = _extract_video_frames(video_path, input_dir, fps=fps, max_frames=max_frames)
209
- input_summary["input_mode"] = "video"
210
- input_summary.update(video_summary)
211
 
212
  if len(image_paths) < 2:
213
  raise ValueError("Provide at least 2 frames. The Space is tuned for short multi-frame reconstructions.")
@@ -326,7 +302,7 @@ def _run_inference(images: torch.Tensor, num_scale_frames: int, keyframe_interva
326
 
327
 
328
  def _make_preview_strip(images: torch.Tensor, output_path: Path) -> str:
329
- frames = images.detach().cpu()
330
  count = frames.shape[0]
331
  indices = sorted({int(round(i)) for i in np.linspace(0, count - 1, num=min(4, count))})
332
 
@@ -449,7 +425,6 @@ def _format_status(summary: dict[str, Any]) -> str:
449
 
450
 
451
  def reconstruct_scene(
452
- image_files: list[Any],
453
  video_file: Any,
454
  fps: int,
455
  max_frames: int,
@@ -462,12 +437,7 @@ def reconstruct_scene(
462
  keyframe_interval = max(1, int(keyframe_interval))
463
  conf_percentile = float(conf_percentile)
464
 
465
- images, image_paths, work_dir, input_summary = _prepare_inputs(
466
- image_files=image_files or [],
467
- video_file=video_file,
468
- fps=int(fps),
469
- max_frames=max_frames,
470
- )
471
 
472
  num_scale_frames = min(num_scale_frames, int(images.shape[0]))
473
  predictions, images_cpu, runtime_summary = _run_inference(
@@ -496,7 +466,7 @@ def reconstruct_scene(
496
  def _build_startup_markdown() -> str:
497
  if not STARTUP_NOTES:
498
  return (
499
- "Build a 3D scene from a short image sequence or video. "
500
  "This app uses the upstream LingBot-Map checkpoint and exports a navigable GLB scene plus a downloadable results bundle."
501
  )
502
  return "\n".join([f"- {note}" for note in STARTUP_NOTES])
@@ -535,7 +505,7 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="green", secondary_hue=
535
  """
536
  <div class="headline">
537
  <h1>LingBot 3D</h1>
538
- <p>Upload ordered images or a short video, sample a compact clip, and export a 3D scene you can inspect or download.</p>
539
  </div>
540
  """
541
  )
@@ -544,14 +514,8 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="green", secondary_hue=
544
 
545
  with gr.Row():
546
  with gr.Column(scale=1):
547
- image_files = gr.File(
548
- label="Ordered images",
549
- file_count="multiple",
550
- file_types=["image"],
551
- type="filepath",
552
- )
553
  video_file = gr.File(
554
- label="Or upload one video",
555
  file_types=["video"],
556
  type="filepath",
557
  )
@@ -578,7 +542,6 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="green", secondary_hue=
578
  run_button.click(
579
  fn=reconstruct_scene,
580
  inputs=[
581
- image_files,
582
  video_file,
583
  fps,
584
  max_frames,
 
7
  import time
8
  import zipfile
9
  from pathlib import Path
10
+ from typing import Any
11
 
12
  import cv2
13
  import gradio as gr
 
142
  STARTUP_NOTES.append(f"Startup preload failed: {exc}")
143
 
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  def _extract_video_frames(video_file: str, frames_dir: Path, fps: int, max_frames: int) -> tuple[list[str], dict[str, Any]]:
146
  cap = cv2.VideoCapture(video_file)
147
  if not cap.isOpened():
 
172
  }
173
 
174
 
175
+ def _prepare_inputs(video_file: Any, fps: int, max_frames: int) -> tuple[torch.Tensor, list[str], Path, dict[str, Any]]:
176
  _cleanup_old_runs()
177
  work_dir = Path(tempfile.mkdtemp(prefix="lingbot-map-", dir=OUTPUT_ROOT))
178
  input_dir = work_dir / "inputs"
179
  input_dir.mkdir(parents=True, exist_ok=True)
180
 
181
+ input_summary = {"input_mode": "video"}
182
+ video_path = _resolve_path(video_file)
183
+ if not video_path:
184
+ raise ValueError("Upload one short video.")
185
+ image_paths, video_summary = _extract_video_frames(video_path, input_dir, fps=fps, max_frames=max_frames)
186
+ input_summary.update(video_summary)
 
 
 
 
 
 
 
 
 
187
 
188
  if len(image_paths) < 2:
189
  raise ValueError("Provide at least 2 frames. The Space is tuned for short multi-frame reconstructions.")
 
302
 
303
 
304
  def _make_preview_strip(images: torch.Tensor, output_path: Path) -> str:
305
+ frames = _squeeze_single_batch("images", images.detach().cpu())
306
  count = frames.shape[0]
307
  indices = sorted({int(round(i)) for i in np.linspace(0, count - 1, num=min(4, count))})
308
 
 
425
 
426
 
427
  def reconstruct_scene(
 
428
  video_file: Any,
429
  fps: int,
430
  max_frames: int,
 
437
  keyframe_interval = max(1, int(keyframe_interval))
438
  conf_percentile = float(conf_percentile)
439
 
440
+ images, image_paths, work_dir, input_summary = _prepare_inputs(video_file=video_file, fps=int(fps), max_frames=max_frames)
 
 
 
 
 
441
 
442
  num_scale_frames = min(num_scale_frames, int(images.shape[0]))
443
  predictions, images_cpu, runtime_summary = _run_inference(
 
466
  def _build_startup_markdown() -> str:
467
  if not STARTUP_NOTES:
468
  return (
469
+ "Build a 3D scene from a short video. "
470
  "This app uses the upstream LingBot-Map checkpoint and exports a navigable GLB scene plus a downloadable results bundle."
471
  )
472
  return "\n".join([f"- {note}" for note in STARTUP_NOTES])
 
505
  """
506
  <div class="headline">
507
  <h1>LingBot 3D</h1>
508
+ <p>Upload one short video, sample a compact clip, and export a 3D scene you can inspect or download.</p>
509
  </div>
510
  """
511
  )
 
514
 
515
  with gr.Row():
516
  with gr.Column(scale=1):
 
 
 
 
 
 
517
  video_file = gr.File(
518
+ label="Upload one short video",
519
  file_types=["video"],
520
  type="filepath",
521
  )
 
542
  run_button.click(
543
  fn=reconstruct_scene,
544
  inputs=[
 
545
  video_file,
546
  fps,
547
  max_frames,