SynLayers commited on
Commit
b08a1a8
·
verified ·
1 Parent(s): a5975f0

Upload demo/app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. demo/app.py +194 -19
demo/app.py CHANGED
@@ -23,30 +23,46 @@ except ImportError:
23
  import gradio as gr
24
  import torch
25
 
 
 
 
 
 
 
26
  CURRENT_FILE = Path(__file__).resolve()
27
  PROJECT_ROOT = CURRENT_FILE.parents[1]
 
28
  for candidate in (CURRENT_FILE.parent, CURRENT_FILE.parents[1]):
29
  if (candidate / "infer").exists() and (candidate / "models").exists():
30
  PROJECT_ROOT = candidate
31
  break
 
32
  if str(PROJECT_ROOT) not in sys.path:
33
  sys.path.insert(0, str(PROJECT_ROOT))
34
 
35
  from demo.real_world_pipeline import ( # noqa: E402
36
  DEFAULT_BBOX_MODEL,
 
37
  DEFAULT_REAL_CONFIG_PATH,
38
  DEFAULT_RUN_NAME,
39
  DEFAULT_WORK_DIR,
40
  run_real_world_pipeline,
41
  )
 
 
42
 
43
  DEFAULT_EXAMPLE_DIR = Path(
44
  os.environ.get(
45
  "SYNLAYERS_EXAMPLE_DIR",
46
- "/project/llmsvgen/share/data/kmw_layered_dataset/real_world_inference/layers_real_test_1024",
47
  )
48
  )
49
 
 
 
 
 
 
50
 
51
  def read_int_env(name: str, default: int) -> int:
52
  raw = os.environ.get(name)
@@ -58,8 +74,131 @@ def read_int_env(name: str, default: int) -> int:
58
  return default
59
 
60
 
61
- ZERO_GPU_SIZE = (os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large").lower()
62
- ZERO_GPU_DURATION = max(60, read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 900))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
65
  def list_example_images(limit: int = 6) -> list[list[str]]:
@@ -69,32 +208,39 @@ def list_example_images(limit: int = 6) -> list[list[str]]:
69
  candidates = []
70
  for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp"):
71
  candidates.extend(DEFAULT_EXAMPLE_DIR.glob(ext))
 
72
  candidates = sorted(candidates)[:limit]
73
  return [[str(path)] for path in candidates]
74
 
75
 
76
  def build_gallery(result: dict) -> list[tuple[str, str]]:
77
  gallery: list[tuple[str, str]] = []
 
78
  if result.get("whole_image_rgba"):
79
  gallery.append((result["whole_image_rgba"], "Whole RGBA"))
 
80
  if result.get("background_rgba"):
81
  gallery.append((result["background_rgba"], "Background RGBA"))
 
82
  for idx, path in enumerate(result.get("layer_images", [])):
83
  gallery.append((path, f"Layer {idx}"))
 
84
  return gallery
85
 
86
 
87
  def get_gpu_name() -> str:
88
  if not torch.cuda.is_available():
89
  return "None"
 
90
  try:
91
  return torch.cuda.get_device_name(torch.cuda.current_device())
92
- except Exception as exc: # pragma: no cover - defensive runtime reporting
93
  return f"Unavailable ({exc})"
94
 
95
 
96
  def is_zero_gpu_space() -> bool:
97
  accelerator = os.environ.get("ACCELERATOR", "").lower()
 
98
  return (
99
  os.environ.get("ZEROGPU_V2", "").lower() == "true"
100
  or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1"
@@ -106,35 +252,58 @@ def is_zero_gpu_space() -> bool:
106
  def get_runtime_status_markdown() -> str:
107
  accelerator = os.environ.get("ACCELERATOR", "unknown")
108
  space_id = os.environ.get("SPACE_ID", "local")
109
- bbox_repo = os.environ.get("SYNLAYERS_BBOX_MODEL_REPO") or os.environ.get("SYNLAYERS_BBOX_MODEL", "(unset)")
110
- stage2_repo = os.environ.get("SYNLAYERS_STAGE2_MODEL_REPO") or os.environ.get("SYNLAYERS_MODEL_REPO", "(unset)")
111
  zero_gpu_enabled = is_zero_gpu_space()
112
 
113
- lines = ["## Runtime Status", f"- `SPACE_ID`: `{space_id}`", f"- `ACCELERATOR`: `{accelerator}`"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  if zero_gpu_enabled:
116
  lines.extend(
117
  [
 
118
  f"- `ZeroGPU mode`: `True`",
119
  f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`",
120
  f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds",
121
- f"- `Stage 1 bbox repo/path`: `{bbox_repo}`",
122
- f"- `Stage 2 repo`: `{stage2_repo}`",
123
  f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`",
124
  "",
125
  "This Space is configured for Hugging Face ZeroGPU.",
126
- "A shared H200 GPU is requested on demand when you click `Run Full Pipeline`.",
127
- "Queueing and quota are managed by Hugging Face ZeroGPU, not by an in-app GPU selector.",
 
128
  ]
129
  )
130
  else:
131
  cuda_available = torch.cuda.is_available()
 
132
  lines.extend(
133
  [
 
134
  f"- `CUDA available`: `{cuda_available}`",
135
  f"- `GPU device`: `{get_gpu_name()}`",
136
- f"- `Stage 1 bbox repo/path`: `{bbox_repo}`",
137
- f"- `Stage 2 repo`: `{stage2_repo}`",
138
  "",
139
  ]
140
  )
@@ -161,6 +330,7 @@ def run_demo_inference(
161
  seed_value: float,
162
  ) -> dict:
163
  seed = int(seed_value) if seed_value >= 0 else None
 
164
  return run_real_world_pipeline(
165
  image_path=image_path,
166
  sample_name=sample_name or None,
@@ -208,27 +378,27 @@ with gr.Blocks(title="SynLayers Real-World Demo") as demo:
208
  gr.Markdown(
209
  """
210
  # SynLayers Real-World Decomposition
211
-
212
  Upload a single image and run the full pipeline in one step:
213
  1. VLM for whole-caption + bounding-box detection
214
  2. SynLayers real-image layer decomposition
215
-
216
  This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
217
- The first request may take time while model assets are loaded from Hugging Face.
218
-
219
- In ZeroGPU mode, a shared GPU is requested only while inference is running.
220
  """
221
  )
 
222
  runtime_status = gr.Markdown(get_runtime_status_markdown())
223
  refresh_status_button = gr.Button("Refresh Runtime Status")
224
 
225
  with gr.Row():
226
  with gr.Column(scale=1):
227
  image_input = gr.Image(type="filepath", label="Input Image")
 
228
  sample_name_input = gr.Textbox(
229
  label="Optional Sample Name",
230
  placeholder="Leave empty to use the uploaded filename",
231
  )
 
232
  max_new_tokens_input = gr.Slider(
233
  minimum=128,
234
  maximum=2048,
@@ -236,11 +406,13 @@ with gr.Blocks(title="SynLayers Real-World Demo") as demo:
236
  step=64,
237
  label="VLM Max New Tokens",
238
  )
 
239
  seed_input = gr.Number(
240
  value=42,
241
  precision=0,
242
  label="Seed (-1 keeps config default)",
243
  )
 
244
  run_button = gr.Button("Run Full Pipeline", variant="primary")
245
 
246
  with gr.Column(scale=1):
@@ -248,10 +420,13 @@ with gr.Blocks(title="SynLayers Real-World Demo") as demo:
248
  merged_output = gr.Image(type="filepath", label="Merged Decomposition")
249
 
250
  caption_output = gr.Textbox(label="Whole Caption", lines=6)
 
251
  with gr.Row():
252
  bbox_json_output = gr.JSON(label="BBox JSON")
253
  meta_json_output = gr.JSON(label="Inference Metadata")
 
254
  layer_gallery = gr.Gallery(label="Predicted Layers", columns=4, height="auto")
 
255
  with gr.Row():
256
  archive_output = gr.File(label="Download Result Bundle")
257
  case_dir_output = gr.Textbox(label="Saved Case Directory")
@@ -290,4 +465,4 @@ if __name__ == "__main__":
290
  demo.queue().launch(
291
  server_name="0.0.0.0",
292
  server_port=int(os.environ.get("PORT", "7860")),
293
- )
 
23
  import gradio as gr
24
  import torch
25
 
26
+ try:
27
+ from huggingface_hub import snapshot_download
28
+ except Exception:
29
+ snapshot_download = None
30
+
31
+
32
  CURRENT_FILE = Path(__file__).resolve()
33
  PROJECT_ROOT = CURRENT_FILE.parents[1]
34
+
35
  for candidate in (CURRENT_FILE.parent, CURRENT_FILE.parents[1]):
36
  if (candidate / "infer").exists() and (candidate / "models").exists():
37
  PROJECT_ROOT = candidate
38
  break
39
+
40
  if str(PROJECT_ROOT) not in sys.path:
41
  sys.path.insert(0, str(PROJECT_ROOT))
42
 
43
  from demo.real_world_pipeline import ( # noqa: E402
44
  DEFAULT_BBOX_MODEL,
45
+ DEFAULT_MODEL_REPO_ID,
46
  DEFAULT_REAL_CONFIG_PATH,
47
  DEFAULT_RUN_NAME,
48
  DEFAULT_WORK_DIR,
49
  run_real_world_pipeline,
50
  )
51
+ from demo.hf_repo_assets import ensure_repo_assets # noqa: E402
52
+
53
 
54
  DEFAULT_EXAMPLE_DIR = Path(
55
  os.environ.get(
56
  "SYNLAYERS_EXAMPLE_DIR",
57
+ str(PROJECT_ROOT / "demo" / "examples"),
58
  )
59
  )
60
 
61
+ HF_HOME = Path(os.environ.get("HF_HOME", str(Path.home() / ".cache" / "huggingface")))
62
+ HF_HOME.mkdir(parents=True, exist_ok=True)
63
+ os.environ["HF_HOME"] = str(HF_HOME)
64
+ os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
65
+
66
 
67
  def read_int_env(name: str, default: int) -> int:
68
  raw = os.environ.get(name)
 
74
  return default
75
 
76
 
77
+ def clamp(value: int, low: int, high: int) -> int:
78
+ return max(low, min(value, high))
79
+
80
+
81
+ ZERO_GPU_SIZE = (
82
+ os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large"
83
+ ).lower()
84
+
85
+ # Keep this high enough for the full pipeline after model initialization.
86
+ ZERO_GPU_DURATION = clamp(
87
+ read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 500),
88
+ 60,
89
+ 500,
90
+ )
91
+
92
+ MODEL_PREFETCH_STATUS = {
93
+ "enabled": os.environ.get("SYNLAYERS_DISABLE_PREFETCH", "0") != "1",
94
+ "bbox_model": str(DEFAULT_BBOX_MODEL),
95
+ "main_model": str(os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID),
96
+ "bbox_done": False,
97
+ "main_done": False,
98
+ "error": "",
99
+ }
100
+
101
+
102
+ def is_hf_repo_id(path_or_repo: str | Path | None) -> bool:
103
+ if path_or_repo is None:
104
+ return False
105
+
106
+ value = str(path_or_repo)
107
+
108
+ if not value:
109
+ return False
110
+
111
+ # Local path.
112
+ if value.startswith("/") or value.startswith("./") or value.startswith("../"):
113
+ return False
114
+
115
+ # HF repo id usually looks like "namespace/repo".
116
+ return "/" in value and not Path(value).exists()
117
+
118
+
119
+ def prefetch_one_model(repo_id_or_path: str | Path | None, label: str) -> bool:
120
+ if snapshot_download is None:
121
+ MODEL_PREFETCH_STATUS["error"] += (
122
+ f"\n- Cannot prefetch {label}: huggingface_hub.snapshot_download is unavailable."
123
+ )
124
+ return False
125
+
126
+ if not is_hf_repo_id(repo_id_or_path):
127
+ return True
128
+
129
+ repo_id = str(repo_id_or_path)
130
+
131
+ try:
132
+ snapshot_download(
133
+ repo_id=repo_id,
134
+ local_files_only=False,
135
+ resume_download=True,
136
+ allow_patterns=[
137
+ "config.json",
138
+ "generation_config.json",
139
+ "preprocessor_config.json",
140
+ "processor_config.json",
141
+ "tokenizer.json",
142
+ "tokenizer_config.json",
143
+ "special_tokens_map.json",
144
+ "merges.txt",
145
+ "vocab.json",
146
+ "*.py",
147
+ "*.json",
148
+ "*.safetensors",
149
+ "*.safetensors.index.json",
150
+ "*.bin",
151
+ "*.pt",
152
+ ],
153
+ ignore_patterns=[
154
+ ".git/*",
155
+ "*.md",
156
+ "*.txt",
157
+ "*.png",
158
+ "*.jpg",
159
+ "*.jpeg",
160
+ "*.webp",
161
+ "*.mp4",
162
+ "*.zip",
163
+ "*.tar",
164
+ "*.tar.gz",
165
+ ],
166
+ )
167
+ return True
168
+ except Exception as exc:
169
+ MODEL_PREFETCH_STATUS["error"] += f"\n- Failed to prefetch {label} `{repo_id}`: {exc}"
170
+ return False
171
+
172
+
173
+ def prefetch_model_assets() -> None:
174
+ """
175
+ Download model files before the ZeroGPU function is called.
176
+ This does not instantiate the models. It only ensures files are already in
177
+ the Hugging Face cache, so download time is not counted inside @spaces.GPU.
178
+ Model objects are cached in demo/real_world_pipeline.py after their first
179
+ construction in the running process.
180
+ """
181
+ if not MODEL_PREFETCH_STATUS["enabled"]:
182
+ return
183
+
184
+ main_model = os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID
185
+ bbox_ok = prefetch_one_model(DEFAULT_BBOX_MODEL, "bbox model")
186
+ main_ok = prefetch_one_model(main_model, "main model")
187
+
188
+ try:
189
+ ensure_repo_assets(main_model)
190
+ except Exception as exc:
191
+ MODEL_PREFETCH_STATUS["error"] += (
192
+ f"\n- Failed to prefetch runtime assets from `{main_model}`: {exc}"
193
+ )
194
+ main_ok = False
195
+
196
+ MODEL_PREFETCH_STATUS["bbox_done"] = bool(bbox_ok)
197
+ MODEL_PREFETCH_STATUS["main_done"] = bool(main_ok)
198
+
199
+
200
+ # Run prefetch during Space startup, outside the ZeroGPU-decorated function.
201
+ prefetch_model_assets()
202
 
203
 
204
  def list_example_images(limit: int = 6) -> list[list[str]]:
 
208
  candidates = []
209
  for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp"):
210
  candidates.extend(DEFAULT_EXAMPLE_DIR.glob(ext))
211
+
212
  candidates = sorted(candidates)[:limit]
213
  return [[str(path)] for path in candidates]
214
 
215
 
216
  def build_gallery(result: dict) -> list[tuple[str, str]]:
217
  gallery: list[tuple[str, str]] = []
218
+
219
  if result.get("whole_image_rgba"):
220
  gallery.append((result["whole_image_rgba"], "Whole RGBA"))
221
+
222
  if result.get("background_rgba"):
223
  gallery.append((result["background_rgba"], "Background RGBA"))
224
+
225
  for idx, path in enumerate(result.get("layer_images", [])):
226
  gallery.append((path, f"Layer {idx}"))
227
+
228
  return gallery
229
 
230
 
231
  def get_gpu_name() -> str:
232
  if not torch.cuda.is_available():
233
  return "None"
234
+
235
  try:
236
  return torch.cuda.get_device_name(torch.cuda.current_device())
237
+ except Exception as exc:
238
  return f"Unavailable ({exc})"
239
 
240
 
241
  def is_zero_gpu_space() -> bool:
242
  accelerator = os.environ.get("ACCELERATOR", "").lower()
243
+
244
  return (
245
  os.environ.get("ZEROGPU_V2", "").lower() == "true"
246
  or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1"
 
252
  def get_runtime_status_markdown() -> str:
253
  accelerator = os.environ.get("ACCELERATOR", "unknown")
254
  space_id = os.environ.get("SPACE_ID", "local")
255
+ model_repo = os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID
 
256
  zero_gpu_enabled = is_zero_gpu_space()
257
 
258
+ lines = [
259
+ "## Runtime Status",
260
+ f"- `SPACE_ID`: `{space_id}`",
261
+ f"- `ACCELERATOR`: `{accelerator}`",
262
+ f"- `HF_HOME`: `{os.environ.get('HF_HOME', '')}`",
263
+ f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
264
+ "",
265
+ "## Model Asset Prefetch",
266
+ f"- `Prefetch enabled`: `{MODEL_PREFETCH_STATUS['enabled']}`",
267
+ f"- `BBox model`: `{MODEL_PREFETCH_STATUS['bbox_model']}`",
268
+ f"- `Main model`: `{MODEL_PREFETCH_STATUS['main_model']}`",
269
+ f"- `BBox model files prefetched`: `{MODEL_PREFETCH_STATUS['bbox_done']}`",
270
+ f"- `Main model files prefetched`: `{MODEL_PREFETCH_STATUS['main_done']}`",
271
+ ]
272
+
273
+ if MODEL_PREFETCH_STATUS["error"]:
274
+ lines.extend(
275
+ [
276
+ "",
277
+ "### Prefetch Warnings",
278
+ MODEL_PREFETCH_STATUS["error"],
279
+ ]
280
+ )
281
+
282
+ lines.append("")
283
 
284
  if zero_gpu_enabled:
285
  lines.extend(
286
  [
287
+ "## ZeroGPU",
288
  f"- `ZeroGPU mode`: `True`",
289
  f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`",
290
  f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds",
 
 
291
  f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`",
292
  "",
293
  "This Space is configured for Hugging Face ZeroGPU.",
294
+ "A shared GPU is requested on demand when you click `Run Full Pipeline`.",
295
+ "Model files are prefetched during Space startup, before the ZeroGPU function is called.",
296
+ "After the first successful request, model objects are reused while the Python process stays alive.",
297
  ]
298
  )
299
  else:
300
  cuda_available = torch.cuda.is_available()
301
+
302
  lines.extend(
303
  [
304
+ "## CUDA",
305
  f"- `CUDA available`: `{cuda_available}`",
306
  f"- `GPU device`: `{get_gpu_name()}`",
 
 
307
  "",
308
  ]
309
  )
 
330
  seed_value: float,
331
  ) -> dict:
332
  seed = int(seed_value) if seed_value >= 0 else None
333
+
334
  return run_real_world_pipeline(
335
  image_path=image_path,
336
  sample_name=sample_name or None,
 
378
  gr.Markdown(
379
  """
380
  # SynLayers Real-World Decomposition
 
381
  Upload a single image and run the full pipeline in one step:
382
  1. VLM for whole-caption + bounding-box detection
383
  2. SynLayers real-image layer decomposition
 
384
  This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
385
+ The first request may still take time while Python modules and model objects are initialized.
386
+ Model files are prefetched during Space startup, and initialized model objects are reused while the process stays alive.
 
387
  """
388
  )
389
+
390
  runtime_status = gr.Markdown(get_runtime_status_markdown())
391
  refresh_status_button = gr.Button("Refresh Runtime Status")
392
 
393
  with gr.Row():
394
  with gr.Column(scale=1):
395
  image_input = gr.Image(type="filepath", label="Input Image")
396
+
397
  sample_name_input = gr.Textbox(
398
  label="Optional Sample Name",
399
  placeholder="Leave empty to use the uploaded filename",
400
  )
401
+
402
  max_new_tokens_input = gr.Slider(
403
  minimum=128,
404
  maximum=2048,
 
406
  step=64,
407
  label="VLM Max New Tokens",
408
  )
409
+
410
  seed_input = gr.Number(
411
  value=42,
412
  precision=0,
413
  label="Seed (-1 keeps config default)",
414
  )
415
+
416
  run_button = gr.Button("Run Full Pipeline", variant="primary")
417
 
418
  with gr.Column(scale=1):
 
420
  merged_output = gr.Image(type="filepath", label="Merged Decomposition")
421
 
422
  caption_output = gr.Textbox(label="Whole Caption", lines=6)
423
+
424
  with gr.Row():
425
  bbox_json_output = gr.JSON(label="BBox JSON")
426
  meta_json_output = gr.JSON(label="Inference Metadata")
427
+
428
  layer_gallery = gr.Gallery(label="Predicted Layers", columns=4, height="auto")
429
+
430
  with gr.Row():
431
  archive_output = gr.File(label="Download Result Bundle")
432
  case_dir_output = gr.Textbox(label="Saved Case Directory")
 
465
  demo.queue().launch(
466
  server_name="0.0.0.0",
467
  server_port=int(os.environ.get("PORT", "7860")),
468
+ )