SynLayers commited on
Commit
3064720
·
verified ·
1 Parent(s): 0312c85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -12
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from __future__ import annotations
2
 
3
  import os
4
  import sys
@@ -23,12 +23,20 @@ except ImportError:
23
  import gradio as gr
24
  import torch
25
 
 
 
 
 
 
 
26
  CURRENT_FILE = Path(__file__).resolve()
27
  PROJECT_ROOT = CURRENT_FILE.parents[1]
 
28
  for candidate in (CURRENT_FILE.parent, CURRENT_FILE.parents[1]):
29
  if (candidate / "infer").exists() and (candidate / "models").exists():
30
  PROJECT_ROOT = candidate
31
  break
 
32
  if str(PROJECT_ROOT) not in sys.path:
33
  sys.path.insert(0, str(PROJECT_ROOT))
34
 
@@ -41,6 +49,7 @@ from demo.real_world_pipeline import ( # noqa: E402
41
  run_real_world_pipeline,
42
  )
43
 
 
44
  DEFAULT_EXAMPLE_DIR = Path(
45
  os.environ.get(
46
  "SYNLAYERS_EXAMPLE_DIR",
@@ -48,6 +57,11 @@ DEFAULT_EXAMPLE_DIR = Path(
48
  )
49
  )
50
 
 
 
 
 
 
51
 
52
  def read_int_env(name: str, default: int) -> int:
53
  raw = os.environ.get(name)
@@ -59,8 +73,126 @@ def read_int_env(name: str, default: int) -> int:
59
  return default
60
 
61
 
62
- ZERO_GPU_SIZE = (os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large").lower()
63
- ZERO_GPU_DURATION = max(60, read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 120))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
 
66
  def list_example_images(limit: int = 6) -> list[list[str]]:
@@ -70,32 +202,39 @@ def list_example_images(limit: int = 6) -> list[list[str]]:
70
  candidates = []
71
  for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp"):
72
  candidates.extend(DEFAULT_EXAMPLE_DIR.glob(ext))
 
73
  candidates = sorted(candidates)[:limit]
74
  return [[str(path)] for path in candidates]
75
 
76
 
77
  def build_gallery(result: dict) -> list[tuple[str, str]]:
78
  gallery: list[tuple[str, str]] = []
 
79
  if result.get("whole_image_rgba"):
80
  gallery.append((result["whole_image_rgba"], "Whole RGBA"))
 
81
  if result.get("background_rgba"):
82
  gallery.append((result["background_rgba"], "Background RGBA"))
 
83
  for idx, path in enumerate(result.get("layer_images", [])):
84
  gallery.append((path, f"Layer {idx}"))
 
85
  return gallery
86
 
87
 
88
  def get_gpu_name() -> str:
89
  if not torch.cuda.is_available():
90
  return "None"
 
91
  try:
92
  return torch.cuda.get_device_name(torch.cuda.current_device())
93
- except Exception as exc: # pragma: no cover - defensive runtime reporting
94
  return f"Unavailable ({exc})"
95
 
96
 
97
  def is_zero_gpu_space() -> bool:
98
  accelerator = os.environ.get("ACCELERATOR", "").lower()
 
99
  return (
100
  os.environ.get("ZEROGPU_V2", "").lower() == "true"
101
  or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1"
@@ -110,29 +249,55 @@ def get_runtime_status_markdown() -> str:
110
  model_repo = os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID
111
  zero_gpu_enabled = is_zero_gpu_space()
112
 
113
- lines = ["## Runtime Status", f"- `SPACE_ID`: `{space_id}`", f"- `ACCELERATOR`: `{accelerator}`"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  if zero_gpu_enabled:
116
  lines.extend(
117
  [
 
118
  f"- `ZeroGPU mode`: `True`",
119
  f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`",
120
  f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds",
121
- f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
122
  f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`",
123
  "",
124
  "This Space is configured for Hugging Face ZeroGPU.",
125
- "A shared H200 GPU is requested on demand when you click `Run Full Pipeline`.",
126
- "Queueing and quota are managed by Hugging Face ZeroGPU, not by an in-app GPU selector.",
 
127
  ]
128
  )
129
  else:
130
  cuda_available = torch.cuda.is_available()
 
131
  lines.extend(
132
  [
 
133
  f"- `CUDA available`: `{cuda_available}`",
134
  f"- `GPU device`: `{get_gpu_name()}`",
135
- f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
136
  "",
137
  ]
138
  )
@@ -159,6 +324,7 @@ def run_demo_inference(
159
  seed_value: float,
160
  ) -> dict:
161
  seed = int(seed_value) if seed_value >= 0 else None
 
162
  return run_real_world_pipeline(
163
  image_path=image_path,
164
  sample_name=sample_name or None,
@@ -208,25 +374,29 @@ with gr.Blocks(title="SynLayers Real-World Demo") as demo:
208
  # SynLayers Real-World Decomposition
209
 
210
  Upload a single image and run the full pipeline in one step:
 
211
  1. VLM for whole-caption + bounding-box detection
212
  2. SynLayers real-image layer decomposition
213
 
214
  This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
215
- The first request may take time while model assets are loaded from Hugging Face.
216
 
217
- In ZeroGPU mode, a shared GPU is requested only while inference is running.
 
218
  """
219
  )
 
220
  runtime_status = gr.Markdown(get_runtime_status_markdown())
221
  refresh_status_button = gr.Button("Refresh Runtime Status")
222
 
223
  with gr.Row():
224
  with gr.Column(scale=1):
225
  image_input = gr.Image(type="filepath", label="Input Image")
 
226
  sample_name_input = gr.Textbox(
227
  label="Optional Sample Name",
228
  placeholder="Leave empty to use the uploaded filename",
229
  )
 
230
  max_new_tokens_input = gr.Slider(
231
  minimum=128,
232
  maximum=2048,
@@ -234,11 +404,13 @@ with gr.Blocks(title="SynLayers Real-World Demo") as demo:
234
  step=64,
235
  label="VLM Max New Tokens",
236
  )
 
237
  seed_input = gr.Number(
238
  value=42,
239
  precision=0,
240
  label="Seed (-1 keeps config default)",
241
  )
 
242
  run_button = gr.Button("Run Full Pipeline", variant="primary")
243
 
244
  with gr.Column(scale=1):
@@ -246,10 +418,13 @@ with gr.Blocks(title="SynLayers Real-World Demo") as demo:
246
  merged_output = gr.Image(type="filepath", label="Merged Decomposition")
247
 
248
  caption_output = gr.Textbox(label="Whole Caption", lines=6)
 
249
  with gr.Row():
250
  bbox_json_output = gr.JSON(label="BBox JSON")
251
  meta_json_output = gr.JSON(label="Inference Metadata")
 
252
  layer_gallery = gr.Gallery(label="Predicted Layers", columns=4, height="auto")
 
253
  with gr.Row():
254
  archive_output = gr.File(label="Download Result Bundle")
255
  case_dir_output = gr.Textbox(label="Saved Case Directory")
@@ -288,4 +463,4 @@ if __name__ == "__main__":
288
  demo.queue().launch(
289
  server_name="0.0.0.0",
290
  server_port=int(os.environ.get("PORT", "7860")),
291
- )
 
1
+ from __future__ import annotations
2
 
3
  import os
4
  import sys
 
23
  import gradio as gr
24
  import torch
25
 
26
+ try:
27
+ from huggingface_hub import snapshot_download
28
+ except Exception:
29
+ snapshot_download = None
30
+
31
+
32
  CURRENT_FILE = Path(__file__).resolve()
33
  PROJECT_ROOT = CURRENT_FILE.parents[1]
34
+
35
  for candidate in (CURRENT_FILE.parent, CURRENT_FILE.parents[1]):
36
  if (candidate / "infer").exists() and (candidate / "models").exists():
37
  PROJECT_ROOT = candidate
38
  break
39
+
40
  if str(PROJECT_ROOT) not in sys.path:
41
  sys.path.insert(0, str(PROJECT_ROOT))
42
 
 
49
  run_real_world_pipeline,
50
  )
51
 
52
+
53
  DEFAULT_EXAMPLE_DIR = Path(
54
  os.environ.get(
55
  "SYNLAYERS_EXAMPLE_DIR",
 
57
  )
58
  )
59
 
60
+ HF_HOME = Path(os.environ.get("HF_HOME", "/data/.huggingface"))
61
+ HF_HOME.mkdir(parents=True, exist_ok=True)
62
+ os.environ.setdefault("HF_HOME", str(HF_HOME))
63
+ os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
64
+
65
 
66
  def read_int_env(name: str, default: int) -> int:
67
  raw = os.environ.get(name)
 
73
  return default
74
 
75
 
76
+ def clamp(value: int, low: int, high: int) -> int:
77
+ return max(low, min(value, high))
78
+
79
+
80
+ ZERO_GPU_SIZE = (
81
+ os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large"
82
+ ).lower()
83
+
84
+ # ZeroGPU duration has a hard upper limit. 120s is usually the safe maximum.
85
+ ZERO_GPU_DURATION = clamp(
86
+ read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 120),
87
+ 60,
88
+ 120,
89
+ )
90
+
91
+ MODEL_PREFETCH_STATUS = {
92
+ "enabled": os.environ.get("SYNLAYERS_DISABLE_PREFETCH", "0") != "1",
93
+ "bbox_model": str(DEFAULT_BBOX_MODEL),
94
+ "main_model": str(os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID),
95
+ "bbox_done": False,
96
+ "main_done": False,
97
+ "error": "",
98
+ }
99
+
100
+
101
+ def is_hf_repo_id(path_or_repo: str | Path | None) -> bool:
102
+ if path_or_repo is None:
103
+ return False
104
+
105
+ value = str(path_or_repo)
106
+
107
+ if not value:
108
+ return False
109
+
110
+ # Local path.
111
+ if value.startswith("/") or value.startswith("./") or value.startswith("../"):
112
+ return False
113
+
114
+ # HF repo id usually looks like "namespace/repo".
115
+ return "/" in value and not Path(value).exists()
116
+
117
+
118
+ def prefetch_one_model(repo_id_or_path: str | Path | None, label: str) -> bool:
119
+ if snapshot_download is None:
120
+ MODEL_PREFETCH_STATUS["error"] += (
121
+ f"\n- Cannot prefetch {label}: huggingface_hub.snapshot_download is unavailable."
122
+ )
123
+ return False
124
+
125
+ if not is_hf_repo_id(repo_id_or_path):
126
+ return True
127
+
128
+ repo_id = str(repo_id_or_path)
129
+
130
+ try:
131
+ snapshot_download(
132
+ repo_id=repo_id,
133
+ local_files_only=False,
134
+ resume_download=True,
135
+ allow_patterns=[
136
+ "config.json",
137
+ "generation_config.json",
138
+ "preprocessor_config.json",
139
+ "processor_config.json",
140
+ "tokenizer.json",
141
+ "tokenizer_config.json",
142
+ "special_tokens_map.json",
143
+ "merges.txt",
144
+ "vocab.json",
145
+ "*.py",
146
+ "*.json",
147
+ "*.safetensors",
148
+ "*.safetensors.index.json",
149
+ "*.bin",
150
+ "*.pt",
151
+ ],
152
+ ignore_patterns=[
153
+ ".git/*",
154
+ "*.md",
155
+ "*.txt",
156
+ "*.png",
157
+ "*.jpg",
158
+ "*.jpeg",
159
+ "*.webp",
160
+ "*.mp4",
161
+ "*.zip",
162
+ "*.tar",
163
+ "*.tar.gz",
164
+ ],
165
+ )
166
+ return True
167
+ except Exception as exc:
168
+ MODEL_PREFETCH_STATUS["error"] += f"\n- Failed to prefetch {label} `{repo_id}`: {exc}"
169
+ return False
170
+
171
+
172
+ def prefetch_model_assets() -> None:
173
+ """
174
+ Download model files before the ZeroGPU function is called.
175
+
176
+ This does not instantiate the models. It only ensures files are already in
177
+ the Hugging Face cache, so download time is not counted inside @spaces.GPU.
178
+
179
+ If the actual model construction in run_real_world_pipeline() is still slow,
180
+ the next step is to refactor demo/real_world_pipeline.py to cache model
181
+ objects globally.
182
+ """
183
+ if not MODEL_PREFETCH_STATUS["enabled"]:
184
+ return
185
+
186
+ bbox_ok = prefetch_one_model(DEFAULT_BBOX_MODEL, "bbox model")
187
+ main_model = os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID
188
+ main_ok = prefetch_one_model(main_model, "main model")
189
+
190
+ MODEL_PREFETCH_STATUS["bbox_done"] = bool(bbox_ok)
191
+ MODEL_PREFETCH_STATUS["main_done"] = bool(main_ok)
192
+
193
+
194
+ # Run prefetch during Space startup, outside the ZeroGPU-decorated function.
195
+ prefetch_model_assets()
196
 
197
 
198
  def list_example_images(limit: int = 6) -> list[list[str]]:
 
202
  candidates = []
203
  for ext in ("*.png", "*.jpg", "*.jpeg", "*.webp"):
204
  candidates.extend(DEFAULT_EXAMPLE_DIR.glob(ext))
205
+
206
  candidates = sorted(candidates)[:limit]
207
  return [[str(path)] for path in candidates]
208
 
209
 
210
  def build_gallery(result: dict) -> list[tuple[str, str]]:
211
  gallery: list[tuple[str, str]] = []
212
+
213
  if result.get("whole_image_rgba"):
214
  gallery.append((result["whole_image_rgba"], "Whole RGBA"))
215
+
216
  if result.get("background_rgba"):
217
  gallery.append((result["background_rgba"], "Background RGBA"))
218
+
219
  for idx, path in enumerate(result.get("layer_images", [])):
220
  gallery.append((path, f"Layer {idx}"))
221
+
222
  return gallery
223
 
224
 
225
  def get_gpu_name() -> str:
226
  if not torch.cuda.is_available():
227
  return "None"
228
+
229
  try:
230
  return torch.cuda.get_device_name(torch.cuda.current_device())
231
+ except Exception as exc:
232
  return f"Unavailable ({exc})"
233
 
234
 
235
  def is_zero_gpu_space() -> bool:
236
  accelerator = os.environ.get("ACCELERATOR", "").lower()
237
+
238
  return (
239
  os.environ.get("ZEROGPU_V2", "").lower() == "true"
240
  or os.environ.get("ZERO_GPU_PATCH_TORCH_DEVICE") == "1"
 
249
  model_repo = os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID
250
  zero_gpu_enabled = is_zero_gpu_space()
251
 
252
+ lines = [
253
+ "## Runtime Status",
254
+ f"- `SPACE_ID`: `{space_id}`",
255
+ f"- `ACCELERATOR`: `{accelerator}`",
256
+ f"- `HF_HOME`: `{os.environ.get('HF_HOME', '')}`",
257
+ f"- `SYNLAYERS_MODEL_REPO`: `{model_repo}`",
258
+ "",
259
+ "## Model Asset Prefetch",
260
+ f"- `Prefetch enabled`: `{MODEL_PREFETCH_STATUS['enabled']}`",
261
+ f"- `BBox model`: `{MODEL_PREFETCH_STATUS['bbox_model']}`",
262
+ f"- `Main model`: `{MODEL_PREFETCH_STATUS['main_model']}`",
263
+ f"- `BBox model files prefetched`: `{MODEL_PREFETCH_STATUS['bbox_done']}`",
264
+ f"- `Main model files prefetched`: `{MODEL_PREFETCH_STATUS['main_done']}`",
265
+ ]
266
+
267
+ if MODEL_PREFETCH_STATUS["error"]:
268
+ lines.extend(
269
+ [
270
+ "",
271
+ "### Prefetch Warnings",
272
+ MODEL_PREFETCH_STATUS["error"],
273
+ ]
274
+ )
275
+
276
+ lines.append("")
277
 
278
  if zero_gpu_enabled:
279
  lines.extend(
280
  [
281
+ "## ZeroGPU",
282
  f"- `ZeroGPU mode`: `True`",
283
  f"- `Requested GPU size`: `{ZERO_GPU_SIZE}`",
284
  f"- `Requested max duration`: `{ZERO_GPU_DURATION}` seconds",
 
285
  f"- `CUDA probe outside @spaces.GPU`: `{torch.cuda.is_available()}`",
286
  "",
287
  "This Space is configured for Hugging Face ZeroGPU.",
288
+ "A shared GPU is requested on demand when you click `Run Full Pipeline`.",
289
+ "Model files are prefetched during Space startup, before the ZeroGPU function is called.",
290
+ "If the first request still times out, the remaining bottleneck is model construction inside `run_real_world_pipeline()`.",
291
  ]
292
  )
293
  else:
294
  cuda_available = torch.cuda.is_available()
295
+
296
  lines.extend(
297
  [
298
+ "## CUDA",
299
  f"- `CUDA available`: `{cuda_available}`",
300
  f"- `GPU device`: `{get_gpu_name()}`",
 
301
  "",
302
  ]
303
  )
 
324
  seed_value: float,
325
  ) -> dict:
326
  seed = int(seed_value) if seed_value >= 0 else None
327
+
328
  return run_real_world_pipeline(
329
  image_path=image_path,
330
  sample_name=sample_name or None,
 
374
  # SynLayers Real-World Decomposition
375
 
376
  Upload a single image and run the full pipeline in one step:
377
+
378
  1. VLM for whole-caption + bounding-box detection
379
  2. SynLayers real-image layer decomposition
380
 
381
  This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
 
382
 
383
+ The first request may still take time while Python modules and model objects are initialized.
384
+ Model files are prefetched during Space startup to avoid downloading large weights inside the ZeroGPU function.
385
  """
386
  )
387
+
388
  runtime_status = gr.Markdown(get_runtime_status_markdown())
389
  refresh_status_button = gr.Button("Refresh Runtime Status")
390
 
391
  with gr.Row():
392
  with gr.Column(scale=1):
393
  image_input = gr.Image(type="filepath", label="Input Image")
394
+
395
  sample_name_input = gr.Textbox(
396
  label="Optional Sample Name",
397
  placeholder="Leave empty to use the uploaded filename",
398
  )
399
+
400
  max_new_tokens_input = gr.Slider(
401
  minimum=128,
402
  maximum=2048,
 
404
  step=64,
405
  label="VLM Max New Tokens",
406
  )
407
+
408
  seed_input = gr.Number(
409
  value=42,
410
  precision=0,
411
  label="Seed (-1 keeps config default)",
412
  )
413
+
414
  run_button = gr.Button("Run Full Pipeline", variant="primary")
415
 
416
  with gr.Column(scale=1):
 
418
  merged_output = gr.Image(type="filepath", label="Merged Decomposition")
419
 
420
  caption_output = gr.Textbox(label="Whole Caption", lines=6)
421
+
422
  with gr.Row():
423
  bbox_json_output = gr.JSON(label="BBox JSON")
424
  meta_json_output = gr.JSON(label="Inference Metadata")
425
+
426
  layer_gallery = gr.Gallery(label="Predicted Layers", columns=4, height="auto")
427
+
428
  with gr.Row():
429
  archive_output = gr.File(label="Download Result Bundle")
430
  case_dir_output = gr.Textbox(label="Saved Case Directory")
 
463
  demo.queue().launch(
464
  server_name="0.0.0.0",
465
  server_port=int(os.environ.get("PORT", "7860")),
466
+ )