| """SHARP Gradio demo (minimal, responsive UI). |
| |
| This Space: |
| - Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image. |
| - Exports a canonical `.ply` file for download. |
| - Optionally renders a camera trajectory `.mp4` (CUDA / ZeroGPU only). |
| |
| Precompiled examples |
| Place precompiled examples under `assets/examples/`. |
| |
| Recommended structure (matching stem): |
| assets/examples/<name>.jpg|png|webp |
| assets/examples/<name>.mp4 |
| assets/examples/<name>.ply |
| |
| Optional manifest (assets/examples/manifest.json): |
| [ |
| {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"}, |
| ... |
| ] |
| """ |
|
|
| from __future__ import annotations |
|
|
| import json |
| from dataclasses import dataclass |
| from pathlib import Path |
| from typing import Final |
|
|
| import gradio as gr |
|
|
| from model_utils import TrajectoryType, predict_and_maybe_render_gpu |
|
|
| |
| |
| |
|
|
| APP_DIR: Final[Path] = Path(__file__).resolve().parent |
| OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs" |
| ASSETS_DIR: Final[Path] = APP_DIR / "assets" |
| EXAMPLES_DIR: Final[Path] = ASSETS_DIR / "examples" |
|
|
| IMAGE_EXTS: Final[tuple[str, ...]] = (".png", ".jpg", ".jpeg", ".webp") |
| DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32 |
|
|
| THEME: Final = gr.themes.Soft( |
| primary_hue="indigo", |
| secondary_hue="blue", |
| neutral_hue="slate", |
| ) |
|
|
| CSS: Final[str] = """ |
| /* Keep layout stable when scrollbars appear/disappear */ |
| html { scrollbar-gutter: stable; } |
| |
| /* Use normal document flow (no fixed-height viewport shell) */ |
| html, body { height: auto; } |
| body { overflow: auto; } |
| |
| /* Comfortable max width; still fills small screens */ |
| .gradio-container { |
| max-width: 1400px; |
| margin: 0 auto; |
| padding: 0.75rem 1rem 1rem; |
| box-sizing: border-box; |
| } |
| |
| /* Make media components responsive without stretching */ |
| #run-image, #run-video, |
| #examples-image, #examples-video { |
| width: 100%; |
| } |
| |
| /* Keep aspect ratio and prevent runaway vertical growth on tall viewports */ |
| #run-image img, #examples-image img { |
| width: 100%; |
| height: auto; |
| max-height: 70vh; |
| object-fit: contain; |
| } |
| #run-video video, #examples-video video { |
| width: 100%; |
| height: auto; |
| max-height: 70vh; |
| object-fit: contain; |
| } |
| |
| /* On very small screens, reduce max media height a bit */ |
| @media (max-width: 640px) { |
| #run-image img, #examples-image img, |
| #run-video video, #examples-video video { |
| max-height: 55vh; |
| } |
| } |
| |
| /* Reduce extra whitespace in markdown blocks */ |
| .gr-markdown > :first-child { margin-top: 0 !important; } |
| .gr-markdown > :last-child { margin-bottom: 0 !important; } |
| """ |
|
|
| |
| |
| |
|
|
|
|
| def _ensure_dir(path: Path) -> Path: |
| path.mkdir(parents=True, exist_ok=True) |
| return path |
|
|
|
|
| @dataclass(frozen=True, slots=True) |
| class ExampleSpec: |
| """A precompiled example bundle (image + optional mp4 + optional ply).""" |
|
|
| label: str |
| image: Path |
| video: Path | None |
| ply: Path | None |
|
|
|
|
| def _normalize_key(path: str) -> str: |
| """Normalize a path-like string for stable dictionary keys.""" |
| try: |
| return str(Path(path).resolve()) |
| except Exception: |
| return path |
|
|
|
|
| def _load_manifest(manifest_path: Path) -> list[dict]: |
| """Load manifest.json if present; return an empty list on errors.""" |
| try: |
| data = json.loads(manifest_path.read_text(encoding="utf-8")) |
| if not isinstance(data, list): |
| raise ValueError("manifest.json must contain a JSON list.") |
| return [x for x in data if isinstance(x, dict)] |
| except FileNotFoundError: |
| return [] |
| except Exception as e: |
| |
| print(f"[examples] Failed to parse manifest.json: {type(e).__name__}: {e}") |
| return [] |
|
|
|
|
| def discover_examples(examples_dir: Path) -> list[ExampleSpec]: |
| """Discover example bundles under assets/examples/.""" |
| _ensure_dir(examples_dir) |
|
|
| manifest_rows = _load_manifest(examples_dir / "manifest.json") |
| if manifest_rows: |
| specs: list[ExampleSpec] = [] |
| for row in manifest_rows: |
| label = str(row.get("label") or "Example").strip() or "Example" |
| image_rel = row.get("image") |
| if not image_rel: |
| continue |
|
|
| image = (examples_dir / str(image_rel)).resolve() |
| if not image.exists(): |
| continue |
|
|
| video = None |
| ply = None |
| if row.get("video"): |
| v = (examples_dir / str(row["video"])).resolve() |
| if v.exists(): |
| video = v |
| if row.get("ply"): |
| p = (examples_dir / str(row["ply"])).resolve() |
| if p.exists(): |
| ply = p |
|
|
| specs.append(ExampleSpec(label=label, image=image, video=video, ply=ply)) |
| return specs |
|
|
| |
| images: list[Path] = [] |
| for ext in IMAGE_EXTS: |
| images.extend(sorted(examples_dir.glob(f"*{ext}"))) |
|
|
| specs = [] |
| for img in images: |
| stem = img.stem |
| video = examples_dir / f"{stem}.mp4" |
| ply = examples_dir / f"{stem}.ply" |
| specs.append( |
| ExampleSpec( |
| label=stem.replace("_", " ").strip() or stem, |
| image=img.resolve(), |
| video=video.resolve() if video.exists() else None, |
| ply=ply.resolve() if ply.exists() else None, |
| ) |
| ) |
| return specs |
|
|
|
|
| _ensure_dir(OUTPUTS_DIR) |
|
|
| EXAMPLE_SPECS: Final[list[ExampleSpec]] = discover_examples(EXAMPLES_DIR) |
| EXAMPLE_INDEX_BY_PATH: Final[dict[str, ExampleSpec]] = { |
| _normalize_key(str(s.image)): s for s in EXAMPLE_SPECS |
| } |
| EXAMPLE_INDEX_BY_NAME: Final[dict[str, ExampleSpec]] = { |
| s.image.name: s for s in EXAMPLE_SPECS |
| } |
|
|
|
|
| def load_example_assets( |
| image_path: str | None, |
| ) -> tuple[str | None, str | None, str | None, str]: |
| """Return (image, video, ply_path, status) for the selected example image.""" |
| if not image_path: |
| return None, None, None, "No example selected." |
|
|
| spec = EXAMPLE_INDEX_BY_PATH.get(_normalize_key(image_path)) |
| if spec is None: |
| spec = EXAMPLE_INDEX_BY_NAME.get(Path(image_path).name) |
|
|
| if spec is None: |
| return image_path, None, None, "No matching example bundle found." |
|
|
| video = str(spec.video) if spec.video is not None else None |
| ply_path = str(spec.ply) if spec.ply is not None else None |
|
|
| missing: list[str] = [] |
| if video is None: |
| missing.append("MP4") |
| if ply_path is None: |
| missing.append("PLY") |
|
|
| msg = f"Loaded example: **{spec.label}**." |
| if missing: |
| msg += f" Missing: {', '.join(missing)}." |
|
|
| return str(spec.image), video, ply_path, msg |
|
|
|
|
| def _validate_image(image_path: str | None) -> None: |
| if not image_path: |
| raise gr.Error("Upload an image first.") |
|
|
|
|
| def run_sharp( |
| image_path: str | None, |
| trajectory_type: TrajectoryType, |
| output_long_side: int, |
| num_frames: int, |
| fps: int, |
| render_video: bool, |
| ) -> tuple[str | None, str | None, str]: |
| """Run SHARP inference and return (video_path, ply_path, status_markdown).""" |
| _validate_image(image_path) |
| out_long_side: int | None = ( |
| None if int(output_long_side) <= 0 else int(output_long_side) |
| ) |
|
|
| try: |
| video_path, ply_path = predict_and_maybe_render_gpu( |
| image_path, |
| trajectory_type=trajectory_type, |
| num_frames=int(num_frames), |
| fps=int(fps), |
| output_long_side=out_long_side, |
| render_video=bool(render_video), |
| ) |
|
|
| lines: list[str] = [f"**PLY:** `{ply_path.name}` (ready to download)"] |
| if render_video: |
| if video_path is None: |
| lines.append("**Video:** not rendered (CUDA unavailable).") |
| else: |
| lines.append(f"**Video:** `{video_path.name}`") |
| else: |
| lines.append("**Video:** disabled.") |
|
|
| return ( |
| str(video_path) if video_path is not None else None, |
| str(ply_path), |
| "\n".join(lines), |
| ) |
| except gr.Error: |
| raise |
| except Exception as e: |
| raise gr.Error(f"SHARP failed: {type(e).__name__}: {e}") from e |
|
|
|
|
| |
| |
| |
|
|
|
|
| def build_demo() -> gr.Blocks: |
| with gr.Blocks( |
| title="SHARP • Single-Image 3D Gaussian Prediction", |
| elem_id="sharp-root", |
| fill_height=True, |
| ) as demo: |
| gr.Markdown("## SHARP\nSingle-image **3D Gaussian scene** prediction.") |
|
|
| |
| with gr.Column(elem_id="tabs-shell"): |
| with gr.Tabs(): |
| with gr.Tab("Run", id="run"): |
| with gr.Column(elem_id="run-panel"): |
| with gr.Row(equal_height=True, elem_id="run-media-row"): |
| with gr.Column( |
| scale=5, min_width=360, elem_id="run-left-col" |
| ): |
| image_in = gr.Image( |
| label="Input image", |
| type="filepath", |
| sources=["upload"], |
| elem_id="run-image", |
| ) |
|
|
| with gr.Row(): |
| trajectory = gr.Dropdown( |
| label="Trajectory", |
| choices=[ |
| "swipe", |
| "shake", |
| "rotate", |
| "rotate_forward", |
| ], |
| value="rotate_forward", |
| ) |
| output_res = gr.Dropdown( |
| label="Output long side", |
| info="0 = match input", |
| choices=[ |
| ("Match input", 0), |
| ("512", 512), |
| ("768", 768), |
| ("1024", 1024), |
| ("1280", 1280), |
| ("1536", 1536), |
| ], |
| value=0, |
| ) |
|
|
| with gr.Row(): |
| frames = gr.Slider( |
| label="Frames", |
| minimum=24, |
| maximum=120, |
| step=1, |
| value=60, |
| ) |
| fps_in = gr.Slider( |
| label="FPS", |
| minimum=8, |
| maximum=60, |
| step=1, |
| value=30, |
| ) |
|
|
| render_toggle = gr.Checkbox( |
| label="Render MP4 (CUDA / ZeroGPU only)", |
| value=True, |
| ) |
|
|
| with gr.Column( |
| scale=5, min_width=360, elem_id="run-right-col" |
| ): |
| video_out = gr.Video( |
| label="Trajectory video (MP4)", |
| elem_id="run-video", |
| ) |
| with gr.Row(elem_id="run-download-row"): |
| ply_download = gr.DownloadButton( |
| label="Download PLY (.ply)", |
| value=None, |
| visible=True, |
| elem_id="run-ply-download", |
| ) |
| status_md = gr.Markdown("", elem_id="run-status") |
|
|
| with gr.Row(elem_id="run-actions-row"): |
| run_btn = gr.Button("Generate", variant="primary") |
| clear_btn = gr.ClearButton( |
| [image_in, video_out, ply_download, status_md], |
| value="Clear", |
| ) |
|
|
| |
| clear_btn.click( |
| fn=lambda: None, |
| outputs=[ply_download], |
| queue=False, |
| ) |
|
|
| run_btn.click( |
| fn=run_sharp, |
| inputs=[ |
| image_in, |
| trajectory, |
| output_res, |
| frames, |
| fps_in, |
| render_toggle, |
| ], |
| outputs=[video_out, ply_download, status_md], |
| api_visibility="public", |
| ) |
|
|
| with gr.Tab("Examples", id="examples"): |
| with gr.Column(elem_id="examples-panel"): |
| if EXAMPLE_SPECS: |
| gr.Markdown( |
| "Click an example to preview precompiled outputs. " |
| "The example image will also be loaded into the Run tab." |
| ) |
|
|
| |
| ex_img = gr.Image( |
| label="Example image", |
| type="filepath", |
| interactive=False, |
| render=False, |
| height=360, |
| elem_id="examples-image", |
| ) |
| ex_vid = gr.Video( |
| label="Pre-rendered MP4", |
| render=False, |
| height=360, |
| elem_id="examples-video", |
| ) |
| ex_ply = gr.DownloadButton( |
| label="Download PLY (.ply)", |
| value=None, |
| visible=True, |
| render=False, |
| elem_id="examples-ply-download", |
| ) |
| ex_status = gr.Markdown( |
| render=False, elem_id="examples-status" |
| ) |
|
|
| with gr.Row(equal_height=True): |
| with gr.Column(scale=4, min_width=320): |
| gr.Examples( |
| examples=[ |
| [str(s.image)] for s in EXAMPLE_SPECS |
| ], |
| example_labels=[s.label for s in EXAMPLE_SPECS], |
| inputs=[image_in], |
| outputs=[ex_img, ex_vid, ex_ply, ex_status], |
| fn=load_example_assets, |
| cache_examples=False, |
| run_on_click=True, |
| examples_per_page=10, |
| label=None, |
| ) |
|
|
| with gr.Column(scale=6, min_width=360): |
| ex_img.render() |
| ex_vid.render() |
| ex_ply.render() |
| ex_status.render() |
|
|
| gr.Markdown( |
| "Add example bundles under `assets/examples/` " |
| "(image + mp4 + ply) or provide a `manifest.json`." |
| ) |
| else: |
| gr.Markdown( |
| "No precompiled examples found.\n\n" |
| "Add files under `assets/examples/`:\n" |
| "- `example.jpg` (or png/webp)\n" |
| "- `example.mp4`\n" |
| "- `example.ply`\n\n" |
| "Optionally add `assets/examples/manifest.json` to define labels and filenames." |
| ) |
|
|
| with gr.Tab("About", id="about"): |
| with gr.Column(elem_id="about-panel"): |
| gr.Markdown( |
| """ |
| *Sharp Monocular View Synthesis in Less Than a Second* (Apple, 2025) |
| |
| ```bibtex |
| @inproceedings{Sharp2025:arxiv, |
| title = {Sharp Monocular View Synthesis in Less Than a Second}, |
| author = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Ama\\"{e}l Delaunoyand Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun}, |
| journal = {arXiv preprint arXiv:2512.10685}, |
| year = {2025}, |
| url = {https://arxiv.org/abs/2512.10685}, |
| } |
| ``` |
| """.strip() |
| ) |
|
|
| demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1) |
| return demo |
|
|
|
|
| demo = build_demo() |
|
|
| if __name__ == "__main__": |
| demo.launch(theme=THEME, css=CSS) |
|
|