SynLayers
/

synlayers

+from __future__ import annotations
+import os
+import sys
+from pathlib import Path
+try:
+    import spaces
+except ImportError:
+    class _SpacesCompat:
+        @staticmethod
+        def GPU(*decorator_args, **decorator_kwargs):
+            if decorator_args and callable(decorator_args[0]) and len(decorator_args) == 1 and not decorator_kwargs:
+                return decorator_args[0]
+            def decorator(fn):
+                return fn
+            return decorator
+    spaces = _SpacesCompat()
+import gradio as gr
+import torch
+CURRENT_FILE = Path(__file__).resolve()
+PROJECT_ROOT = CURRENT_FILE.parents[1]
+for candidate in (CURRENT_FILE.parent, CURRENT_FILE.parents[1]):
+    if (candidate / "infer").exists() and (candidate / "models").exists():
+        PROJECT_ROOT = candidate
+        break
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+from demo.real_world_pipeline import (  # noqa: E402
+    DEFAULT_BBOX_MODEL,
+    DEFAULT_REAL_CONFIG_PATH,
+    DEFAULT_RUN_NAME,
+    DEFAULT_WORK_DIR,
+    run_real_world_pipeline,
+)
+DEFAULT_EXAMPLE_DIR = Path(
+    os.environ.get(
+        "SYNLAYERS_EXAMPLE_DIR",
+        "/project/llmsvgen/share/data/kmw_layered_dataset/real_world_inference/layers_real_test_1024",
+    )
+)
+def read_int_env(name: str, default: int) -> int:
+    raw = os.environ.get(name)
+    if raw is None:
+        return default
+    try:
+        return int(raw)
+    except ValueError:
+        return default
+ZERO_GPU_SIZE = (os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large").lower()
+ZERO_GPU_DURATION = max(60, read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 900))
+def list_example_images(limit: int = 6) -> list[list[str]]:
+    if not DEFAULT_EXAMPLE_DIR.exists():
+        return []
+    candidates = []
+    for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp"):
+        candidates.extend(DEFAULT_EXAMPLE_DIR.glob(ext))
+    candidates = sorted(candidates)[:limit]
+    return [[str(path)] for path in candidates]
+def build_gallery(result: dict) -> list[tuple[str, str]]:
+    gallery: list[tuple[str, str]] = []
+    if result.get("whole_image_rgba"):
+        gallery.append((result["whole_image_rgba"], "Whole RGBA"))
+    if result.get("background_rgba"):
+        gallery.append((result["background_rgba"], "Background RGBA"))
+    for idx, path in enumerate(result.get("layer_images", [])):
+        gallery.append((path, f"Layer {idx}"))
+    return gallery
+def get_gpu_name() -> str:
+    if not torch.cuda.is_available():
+        return "None"
+    try:
+        return torch.cuda.get_device_name(torch.cuda.current_device())
+    except Exception as exc:  # pragma: no cover - defensive runtime reporting
+        return f"Unavailable ({exc})"
+def is_zero_gpu_space() -> bool:
+    accelerator = os.environ.get("ACCELERATOR", "").lower()
+    return (
+        os.environ.get("ZEROGPU_V2", "").lower() == "true"
+        or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1"
+        or accelerator == "zerogpu"
+        or accelerator.startswith("zero")
+    )
+def get_runtime_status_markdown() -> str:
+    accelerator = os.environ.get("ACCELERATOR", "unknown")
+    space_id = os.environ.get("SPACE_ID", "local")
+    bbox_repo = os.environ.get("SYNLAYERS_BBOX_MODEL_REPO") or os.environ.get("SYNLAYERS_BBOX_MODEL", "(unset)")
+    stage2_repo = os.environ.get("SYNLAYERS_STAGE2_MODEL_REPO") or os.environ.get("SYNLAYERS_MODEL_REPO", "(unset)")
+    zero_gpu_enabled = is_zero_gpu_space()
+    lines = ["## Runtime Status", f"- `SPACE_ID`: `{space_id}`", f"- `ACCELERATOR`: `{accelerator}`"]
+    if zero_gpu_enabled:
+        lines.extend(
+            [
+                f"- `ZeroGPU mode`: `True`",
+                f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`",
+                f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds",
+                f"- `Stage 1 bbox repo/path`: `{bbox_repo}`",
+                f"- `Stage 2 repo`: `{stage2_repo}`",
+                f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`",
+                "",
+                "This Space is configured for Hugging Face ZeroGPU.",
+                "A shared H200 GPU is requested on demand when you click `Run Full Pipeline`.",
+                "Queueing and quota are managed by Hugging Face ZeroGPU, not by an in-app GPU selector.",
+            ]
+        )
+    else:
+        cuda_available = torch.cuda.is_available()
+        lines.extend(
+            [
+                f"- `CUDA available`: `{cuda_available}`",
+                f"- `GPU device`: `{get_gpu_name()}`",
+                f"- `Stage 1 bbox repo/path`: `{bbox_repo}`",
+                f"- `Stage 2 repo`: `{stage2_repo}`",
+                "",
+            ]
+        )
+        if accelerator == "none" or not cuda_available:
+            lines.extend(
+                [
+                    "This Space is not currently running with a usable CUDA GPU.",
+                    "The GPU type must be chosen by the Space owner in Hugging Face `Settings -> Hardware`.",
+                    "Visitors cannot switch GPUs from inside the Gradio app.",
+                ]
+            )
+        else:
+            lines.append("The CUDA runtime is available and the full SynLayers pipeline can run here.")
+    return "\n".join(lines)
+@spaces.GPU(duration=ZERO_GPU_DURATION, size=ZERO_GPU_SIZE)
+def run_demo_inference(
+    image_path: str,
+    sample_name: str,
+    max_new_tokens: int,
+    seed_value: float,
+) -> dict:
+    seed = int(seed_value) if seed_value >= 0 else None
+    return run_real_world_pipeline(
+        image_path=image_path,
+        sample_name=sample_name or None,
+        work_dir=DEFAULT_WORK_DIR,
+        bbox_model=DEFAULT_BBOX_MODEL,
+        config_path=DEFAULT_REAL_CONFIG_PATH,
+        max_new_tokens=int(max_new_tokens),
+        seed=seed,
+        run_name=DEFAULT_RUN_NAME,
+    )
+def run_demo(
+    image_path: str,
+    sample_name: str,
+    max_new_tokens: int,
+    seed_value: float,
+):
+    if not image_path:
+        raise gr.Error("Please upload an input image first.")
+    try:
+        result = run_demo_inference(
+            image_path=image_path,
+            sample_name=sample_name,
+            max_new_tokens=max_new_tokens,
+            seed_value=seed_value,
+        )
+    except Exception as exc:
+        raise gr.Error(str(exc)) from exc
+    return (
+        result["bbox_visualization"],
+        result["merged_image"],
+        result["bbox_record"].get("whole_caption", ""),
+        result["bbox_record"],
+        result["metadata"],
+        build_gallery(result),
+        result["archive_path"],
+        result["case_dir"],
+    )
+with gr.Blocks(title="SynLayers Real-World Demo") as demo:
+    gr.Markdown(
+        """
+        # SynLayers Real-World Decomposition
+        Upload a single image and run the full pipeline in one step:
+        1. VLM for whole-caption + bounding-box detection
+        2. SynLayers real-image layer decomposition
+        This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
+        The first request may take time while model assets are loaded from Hugging Face.
+        In ZeroGPU mode, a shared GPU is requested only while inference is running.
+        """
+    )
+    runtime_status = gr.Markdown(get_runtime_status_markdown())
+    refresh_status_button = gr.Button("Refresh Runtime Status")
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(type="filepath", label="Input Image")
+            sample_name_input = gr.Textbox(
+                label="Optional Sample Name",
+                placeholder="Leave empty to use the uploaded filename",
+            )
+            max_new_tokens_input = gr.Slider(
+                minimum=128,
+                maximum=2048,
+                value=1024,
+                step=64,
+                label="VLM Max New Tokens",
+            )
+            seed_input = gr.Number(
+                value=42,
+                precision=0,
+                label="Seed (-1 keeps config default)",
+            )
+            run_button = gr.Button("Run Full Pipeline", variant="primary")
+        with gr.Column(scale=1):
+            bbox_vis_output = gr.Image(type="filepath", label="Detected Bounding Boxes")
+            merged_output = gr.Image(type="filepath", label="Merged Decomposition")
+    caption_output = gr.Textbox(label="Whole Caption", lines=6)
+    with gr.Row():
+        bbox_json_output = gr.JSON(label="BBox JSON")
+        meta_json_output = gr.JSON(label="Inference Metadata")
+    layer_gallery = gr.Gallery(label="Predicted Layers", columns=4, height="auto")
+    with gr.Row():
+        archive_output = gr.File(label="Download Result Bundle")
+        case_dir_output = gr.Textbox(label="Saved Case Directory")
+    examples = list_example_images()
+    if examples:
+        gr.Examples(examples=examples, inputs=[image_input], label="Example Images")
+    refresh_status_button.click(
+        fn=get_runtime_status_markdown,
+        outputs=runtime_status,
+    )
+    run_button.click(
+        fn=run_demo,
+        inputs=[
+            image_input,
+            sample_name_input,
+            max_new_tokens_input,
+            seed_input,
+        ],
+        outputs=[
+            bbox_vis_output,
+            merged_output,
+            caption_output,
+            bbox_json_output,
+            meta_json_output,
+            layer_gallery,
+            archive_output,
+            case_dir_output,
+        ],
+    )
+if __name__ == "__main__":
+    demo.queue().launch(
+        server_name="0.0.0.0",
+        server_port=int(os.environ.get("PORT", "7860")),
+    )