SynLayers commited on
Commit
3357579
·
1 Parent(s): 4274d32

update app

Browse files
Files changed (2) hide show
  1. app.py +17 -9
  2. demo/real_world_pipeline.py +3 -1
app.py CHANGED
@@ -48,6 +48,7 @@ from demo.real_world_pipeline import ( # noqa: E402
48
  DEFAULT_WORK_DIR,
49
  run_real_world_pipeline,
50
  )
 
51
 
52
 
53
  DEFAULT_EXAMPLE_DIR = Path(
@@ -81,11 +82,11 @@ ZERO_GPU_SIZE = (
81
  os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large"
82
  ).lower()
83
 
84
- # ZeroGPU duration has a hard upper limit. 120s is usually the safe maximum.
85
  ZERO_GPU_DURATION = clamp(
86
- read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 330),
87
  60,
88
- 360,
89
  )
90
 
91
  MODEL_PREFETCH_STATUS = {
@@ -176,17 +177,24 @@ def prefetch_model_assets() -> None:
176
  This does not instantiate the models. It only ensures files are already in
177
  the Hugging Face cache, so download time is not counted inside @spaces.GPU.
178
 
179
- If the actual model construction in run_real_world_pipeline() is still slow,
180
- the next step is to refactor demo/real_world_pipeline.py to cache model
181
- objects globally.
182
  """
183
  if not MODEL_PREFETCH_STATUS["enabled"]:
184
  return
185
 
186
- bbox_ok = prefetch_one_model(DEFAULT_BBOX_MODEL, "bbox model")
187
  main_model = os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID
 
188
  main_ok = prefetch_one_model(main_model, "main model")
189
 
 
 
 
 
 
 
 
 
190
  MODEL_PREFETCH_STATUS["bbox_done"] = bool(bbox_ok)
191
  MODEL_PREFETCH_STATUS["main_done"] = bool(main_ok)
192
 
@@ -287,7 +295,7 @@ def get_runtime_status_markdown() -> str:
287
  "This Space is configured for Hugging Face ZeroGPU.",
288
  "A shared GPU is requested on demand when you click `Run Full Pipeline`.",
289
  "Model files are prefetched during Space startup, before the ZeroGPU function is called.",
290
- "If the first request still times out, the remaining bottleneck is model construction inside `run_real_world_pipeline()`.",
291
  ]
292
  )
293
  else:
@@ -381,7 +389,7 @@ with gr.Blocks(title="SynLayers Real-World Demo") as demo:
381
  This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
382
 
383
  The first request may still take time while Python modules and model objects are initialized.
384
- Model files are prefetched during Space startup to avoid downloading large weights inside the ZeroGPU function.
385
  """
386
  )
387
 
 
48
  DEFAULT_WORK_DIR,
49
  run_real_world_pipeline,
50
  )
51
+ from demo.hf_repo_assets import ensure_repo_assets # noqa: E402
52
 
53
 
54
  DEFAULT_EXAMPLE_DIR = Path(
 
82
  os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large"
83
  ).lower()
84
 
85
+ # Keep this high enough for the full pipeline after model initialization.
86
  ZERO_GPU_DURATION = clamp(
87
+ read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 500),
88
  60,
89
+ 500,
90
  )
91
 
92
  MODEL_PREFETCH_STATUS = {
 
177
  This does not instantiate the models. It only ensures files are already in
178
  the Hugging Face cache, so download time is not counted inside @spaces.GPU.
179
 
180
+ Model objects are cached in demo/real_world_pipeline.py after their first
181
+ construction in the running process.
 
182
  """
183
  if not MODEL_PREFETCH_STATUS["enabled"]:
184
  return
185
 
 
186
  main_model = os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID
187
+ bbox_ok = prefetch_one_model(DEFAULT_BBOX_MODEL, "bbox model")
188
  main_ok = prefetch_one_model(main_model, "main model")
189
 
190
+ try:
191
+ ensure_repo_assets(main_model)
192
+ except Exception as exc:
193
+ MODEL_PREFETCH_STATUS["error"] += (
194
+ f"\n- Failed to prefetch runtime assets from `{main_model}`: {exc}"
195
+ )
196
+ main_ok = False
197
+
198
  MODEL_PREFETCH_STATUS["bbox_done"] = bool(bbox_ok)
199
  MODEL_PREFETCH_STATUS["main_done"] = bool(main_ok)
200
 
 
295
  "This Space is configured for Hugging Face ZeroGPU.",
296
  "A shared GPU is requested on demand when you click `Run Full Pipeline`.",
297
  "Model files are prefetched during Space startup, before the ZeroGPU function is called.",
298
+ "After the first successful request, model objects are reused while the Python process stays alive.",
299
  ]
300
  )
301
  else:
 
389
  This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
390
 
391
  The first request may still take time while Python modules and model objects are initialized.
392
+ Model files are prefetched during Space startup, and initialized model objects are reused while the process stays alive.
393
  """
394
  )
395
 
demo/real_world_pipeline.py CHANGED
@@ -39,6 +39,7 @@ DEFAULT_MODEL_REPO_ID = "SynLayers/Bbox-caption-8b"
39
 
40
  _BBOX_CACHE: dict[str, object] = {"model_path": None, "model": None, "processor": None}
41
  _REAL_CACHE: dict[str, object] = {"key": None, "pipeline": None, "transp_vae": None}
 
42
 
43
 
44
  def slugify(text: str) -> str:
@@ -406,7 +407,8 @@ def run_real_world_pipeline(
406
  bbox_jsonl = write_bbox_jsonl(record, run_dir / "caption_bbox_infer.jsonl")
407
  bbox_vis_path = run_dir / "bbox_vis" / f"{normalized_sample_name}_vis.png"
408
  draw_boxes(prepared_image_path, bboxes, bbox_vis_path)
409
- release_bbox_bundle()
 
410
 
411
  config = build_runtime_config(
412
  config_path=config_path,
 
39
 
40
  _BBOX_CACHE: dict[str, object] = {"model_path": None, "model": None, "processor": None}
41
  _REAL_CACHE: dict[str, object] = {"key": None, "pipeline": None, "transp_vae": None}
42
+ RELEASE_BBOX_AFTER_CAPTION = os.environ.get("SYNLAYERS_RELEASE_BBOX_AFTER_CAPTION", "0") == "1"
43
 
44
 
45
  def slugify(text: str) -> str:
 
407
  bbox_jsonl = write_bbox_jsonl(record, run_dir / "caption_bbox_infer.jsonl")
408
  bbox_vis_path = run_dir / "bbox_vis" / f"{normalized_sample_name}_vis.png"
409
  draw_boxes(prepared_image_path, bboxes, bbox_vis_path)
410
+ if RELEASE_BBOX_AFTER_CAPTION:
411
+ release_bbox_bundle()
412
 
413
  config = build_runtime_config(
414
  config_path=config_path,