Instructions to use SynLayers/Bbox-caption-8b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use SynLayers/Bbox-caption-8b with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="SynLayers/Bbox-caption-8b")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText

processor = AutoProcessor.from_pretrained("SynLayers/Bbox-caption-8b")
model = AutoModelForImageTextToText.from_pretrained("SynLayers/Bbox-caption-8b")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use SynLayers/Bbox-caption-8b with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "SynLayers/Bbox-caption-8b"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "SynLayers/Bbox-caption-8b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker

docker model run hf.co/SynLayers/Bbox-caption-8b

SGLang

How to use SynLayers/Bbox-caption-8b with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "SynLayers/Bbox-caption-8b" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "SynLayers/Bbox-caption-8b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "SynLayers/Bbox-caption-8b" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "SynLayers/Bbox-caption-8b",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Docker Model Runner
How to use SynLayers/Bbox-caption-8b with Docker Model Runner:
```
docker model run hf.co/SynLayers/Bbox-caption-8b
```

SynLayers commited on 4 days ago

Commit

fbdaf23

verified ·

1 Parent(s): 4980a79

Upload demo/app.py with huggingface_hub

Browse files

Files changed (1) hide show

demo/app.py +101 -31

demo/app.py CHANGED Viewed

@@ -4,6 +4,22 @@ import os
 import sys
 from pathlib import Path
 import gradio as gr
 import torch
@@ -32,6 +48,20 @@ DEFAULT_EXAMPLE_DIR = Path(
 )
 def list_example_images(limit: int = 6) -> list[list[str]]:
     if not DEFAULT_EXAMPLE_DIR.exists():
         return []
@@ -63,36 +93,83 @@ def get_gpu_name() -> str:
         return f"Unavailable ({exc})"
 def get_runtime_status_markdown() -> str:
     accelerator = os.environ.get("ACCELERATOR", "unknown")
     space_id = os.environ.get("SPACE_ID", "local")
     model_repo = os.environ.get("SYNLAYERS_MODEL_REPO", "(unset)")
-    cuda_available = torch.cuda.is_available()
-    lines = [
-        "## Runtime Status",
-        f"- `SPACE_ID`: `{space_id}`",
-        f"- `ACCELERATOR`: `{accelerator}`",
-        f"- `CUDA available`: `{cuda_available}`",
-        f"- `GPU device`: `{get_gpu_name()}`",
-        f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
-        "",
-    ]
-    if accelerator == "none" or not cuda_available:
         lines.extend(
             [
-                "This Space is not currently running with a usable CUDA GPU.",
-                "The GPU type must be chosen by the Space owner in Hugging Face `Settings -> Hardware`.",
-                "Visitors cannot switch GPUs from inside the Gradio app.",
             ]
         )
     else:
-        lines.append("The CUDA runtime is available and the full SynLayers pipeline can run here.")
     return "\n".join(lines)
 def run_demo(
     image_path: str,
     sample_name: str,
@@ -102,18 +179,12 @@ def run_demo(
     if not image_path:
         raise gr.Error("Please upload an input image first.")
-    seed = int(seed_value) if seed_value >= 0 else None
     try:
-        result = run_real_world_pipeline(
             image_path=image_path,
-            sample_name=sample_name or None,
-            work_dir=DEFAULT_WORK_DIR,
-            bbox_model=DEFAULT_BBOX_MODEL,
-            config_path=DEFAULT_REAL_CONFIG_PATH,
-            max_new_tokens=int(max_new_tokens),
-            seed=seed,
-            run_name=DEFAULT_RUN_NAME,
         )
     except Exception as exc:
         raise gr.Error(str(exc)) from exc
@@ -139,11 +210,10 @@ with gr.Blocks(title="SynLayers Real-World Demo") as demo:
         1. VLM for whole-caption + bounding-box detection
         2. SynLayers real-image layer decomposition
-        This Space is intended to run on GPU hardware. The first request may take time
-        while model assets are loaded from Hugging Face.
-        GPU hardware is selected in the Hugging Face Space settings by the owner,
-        not from inside this app.
         """
     )
     runtime_status = gr.Markdown(get_runtime_status_markdown())

 import sys
 from pathlib import Path
+try:
+    import spaces
+except ImportError:
+    class _SpacesCompat:
+        @staticmethod
+        def GPU(*decorator_args, **decorator_kwargs):
+            if decorator_args and callable(decorator_args[0]) and len(decorator_args) == 1 and not decorator_kwargs:
+                return decorator_args[0]
+            def decorator(fn):
+                return fn
+            return decorator
+    spaces = _SpacesCompat()
 import gradio as gr
 import torch
 )
+def read_int_env(name: str, default: int) -> int:
+    raw = os.environ.get(name)
+    if raw is None:
+        return default
+    try:
+        return int(raw)
+    except ValueError:
+        return default
+ZERO_GPU_SIZE = (os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large").lower()
+ZERO_GPU_DURATION = max(60, read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 900))
 def list_example_images(limit: int = 6) -> list[list[str]]:
     if not DEFAULT_EXAMPLE_DIR.exists():
         return []
         return f"Unavailable ({exc})"
+def is_zero_gpu_space() -> bool:
+    accelerator = os.environ.get("ACCELERATOR", "").lower()
+    return (
+        os.environ.get("ZEROGPU_V2", "").lower() == "true"
+        or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1"
+        or accelerator == "zerogpu"
+        or accelerator.startswith("zero")
+    )
 def get_runtime_status_markdown() -> str:
     accelerator = os.environ.get("ACCELERATOR", "unknown")
     space_id = os.environ.get("SPACE_ID", "local")
     model_repo = os.environ.get("SYNLAYERS_MODEL_REPO", "(unset)")
+    zero_gpu_enabled = is_zero_gpu_space()
+    lines = ["## Runtime Status", f"- `SPACE_ID`: `{space_id}`", f"- `ACCELERATOR`: `{accelerator}`"]
+    if zero_gpu_enabled:
         lines.extend(
             [
+                f"- `ZeroGPU mode`: `True`",
+                f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`",
+                f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds",
+                f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
+                f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`",
+                "",
+                "This Space is configured for Hugging Face ZeroGPU.",
+                "A shared H200 GPU is requested on demand when you click `Run Full Pipeline`.",
+                "Queueing and quota are managed by Hugging Face ZeroGPU, not by an in-app GPU selector.",
             ]
         )
     else:
+        cuda_available = torch.cuda.is_available()
+        lines.extend(
+            [
+                f"- `CUDA available`: `{cuda_available}`",
+                f"- `GPU device`: `{get_gpu_name()}`",
+                f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
+                "",
+            ]
+        )
+        if accelerator == "none" or not cuda_available:
+            lines.extend(
+                [
+                    "This Space is not currently running with a usable CUDA GPU.",
+                    "The GPU type must be chosen by the Space owner in Hugging Face `Settings -> Hardware`.",
+                    "Visitors cannot switch GPUs from inside the Gradio app.",
+                ]
+            )
+        else:
+            lines.append("The CUDA runtime is available and the full SynLayers pipeline can run here.")
     return "\n".join(lines)
+@spaces.GPU(duration=ZERO_GPU_DURATION, size=ZERO_GPU_SIZE)
+def run_demo_inference(
+    image_path: str,
+    sample_name: str,
+    max_new_tokens: int,
+    seed_value: float,
+) -> dict:
+    seed = int(seed_value) if seed_value >= 0 else None
+    return run_real_world_pipeline(
+        image_path=image_path,
+        sample_name=sample_name or None,
+        work_dir=DEFAULT_WORK_DIR,
+        bbox_model=DEFAULT_BBOX_MODEL,
+        config_path=DEFAULT_REAL_CONFIG_PATH,
+        max_new_tokens=int(max_new_tokens),
+        seed=seed,
+        run_name=DEFAULT_RUN_NAME,
+    )
 def run_demo(
     image_path: str,
     sample_name: str,
     if not image_path:
         raise gr.Error("Please upload an input image first.")
     try:
+        result = run_demo_inference(
             image_path=image_path,
+            sample_name=sample_name,
+            max_new_tokens=max_new_tokens,
+            seed_value=seed_value,
         )
     except Exception as exc:
         raise gr.Error(str(exc)) from exc
         1. VLM for whole-caption + bounding-box detection
         2. SynLayers real-image layer decomposition
+        This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
+        The first request may take time while model assets are loaded from Hugging Face.
+        In ZeroGPU mode, a shared GPU is requested only while inference is running.
         """
     )
     runtime_status = gr.Markdown(get_runtime_status_markdown())