Spaces:

SynLayers
/

synlayers

Running on Zero

App Files Files Community

SynLayers commited on 8 days ago

Commit

3357579

1 Parent(s): 4274d32

update app

Browse files

Files changed (2) hide show

app.py +17 -9
demo/real_world_pipeline.py +3 -1

app.py CHANGED Viewed

@@ -48,6 +48,7 @@ from demo.real_world_pipeline import (  # noqa: E402
     DEFAULT_WORK_DIR,
     run_real_world_pipeline,
 )
 DEFAULT_EXAMPLE_DIR = Path(
@@ -81,11 +82,11 @@ ZERO_GPU_SIZE = (
     os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large"
 ).lower()
-# ZeroGPU duration has a hard upper limit. 120s is usually the safe maximum.
 ZERO_GPU_DURATION = clamp(
-    read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 330),
     60,
-    360,
 )
 MODEL_PREFETCH_STATUS = {
@@ -176,17 +177,24 @@ def prefetch_model_assets() -> None:
     This does not instantiate the models. It only ensures files are already in
     the Hugging Face cache, so download time is not counted inside @spaces.GPU.
-    If the actual model construction in run_real_world_pipeline() is still slow,
-    the next step is to refactor demo/real_world_pipeline.py to cache model
-    objects globally.
     """
     if not MODEL_PREFETCH_STATUS["enabled"]:
         return
-    bbox_ok = prefetch_one_model(DEFAULT_BBOX_MODEL, "bbox model")
     main_model = os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID
     main_ok = prefetch_one_model(main_model, "main model")
     MODEL_PREFETCH_STATUS["bbox_done"] = bool(bbox_ok)
     MODEL_PREFETCH_STATUS["main_done"] = bool(main_ok)
@@ -287,7 +295,7 @@ def get_runtime_status_markdown() -> str:
                 "This Space is configured for Hugging Face ZeroGPU.",
                 "A shared GPU is requested on demand when you click `Run Full Pipeline`.",
                 "Model files are prefetched during Space startup, before the ZeroGPU function is called.",
-                "If the first request still times out, the remaining bottleneck is model construction inside `run_real_world_pipeline()`.",
             ]
         )
     else:
@@ -381,7 +389,7 @@ with gr.Blocks(title="SynLayers Real-World Demo") as demo:
         This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
         The first request may still take time while Python modules and model objects are initialized.
-        Model files are prefetched during Space startup to avoid downloading large weights inside the ZeroGPU function.
         """
     )

     DEFAULT_WORK_DIR,
     run_real_world_pipeline,
 )
+from demo.hf_repo_assets import ensure_repo_assets  # noqa: E402
 DEFAULT_EXAMPLE_DIR = Path(
     os.environ.get("SYNLAYERS_ZERO_GPU_SIZE", "large").strip() or "large"
 ).lower()
+# Keep this high enough for the full pipeline after model initialization.
 ZERO_GPU_DURATION = clamp(
+    read_int_env("SYNLAYERS_ZERO_GPU_DURATION", 500),
     60,
+    500,
 )
 MODEL_PREFETCH_STATUS = {
     This does not instantiate the models. It only ensures files are already in
     the Hugging Face cache, so download time is not counted inside @spaces.GPU.
+    Model objects are cached in demo/real_world_pipeline.py after their first
+    construction in the running process.
     """
     if not MODEL_PREFETCH_STATUS["enabled"]:
         return
     main_model = os.environ.get("SYNLAYERS_MODEL_REPO") or DEFAULT_MODEL_REPO_ID
+    bbox_ok = prefetch_one_model(DEFAULT_BBOX_MODEL, "bbox model")
     main_ok = prefetch_one_model(main_model, "main model")
+    try:
+        ensure_repo_assets(main_model)
+    except Exception as exc:
+        MODEL_PREFETCH_STATUS["error"] += (
+            f"\n- Failed to prefetch runtime assets from `{main_model}`: {exc}"
+        )
+        main_ok = False
     MODEL_PREFETCH_STATUS["bbox_done"] = bool(bbox_ok)
     MODEL_PREFETCH_STATUS["main_done"] = bool(main_ok)
                 "This Space is configured for Hugging Face ZeroGPU.",
                 "A shared GPU is requested on demand when you click `Run Full Pipeline`.",
                 "Model files are prefetched during Space startup, before the ZeroGPU function is called.",
+                "After the first successful request, model objects are reused while the Python process stays alive.",
             ]
         )
     else:
         This Space can run either on a dedicated GPU Space or on Hugging Face ZeroGPU.
         The first request may still take time while Python modules and model objects are initialized.
+        Model files are prefetched during Space startup, and initialized model objects are reused while the process stays alive.
         """
     )

demo/real_world_pipeline.py CHANGED Viewed

@@ -39,6 +39,7 @@ DEFAULT_MODEL_REPO_ID = "SynLayers/Bbox-caption-8b"
 _BBOX_CACHE: dict[str, object] = {"model_path": None, "model": None, "processor": None}
 _REAL_CACHE: dict[str, object] = {"key": None, "pipeline": None, "transp_vae": None}
 def slugify(text: str) -> str:
@@ -406,7 +407,8 @@ def run_real_world_pipeline(
     bbox_jsonl = write_bbox_jsonl(record, run_dir / "caption_bbox_infer.jsonl")
     bbox_vis_path = run_dir / "bbox_vis" / f"{normalized_sample_name}_vis.png"
     draw_boxes(prepared_image_path, bboxes, bbox_vis_path)
-    release_bbox_bundle()
     config = build_runtime_config(
         config_path=config_path,

 _BBOX_CACHE: dict[str, object] = {"model_path": None, "model": None, "processor": None}
 _REAL_CACHE: dict[str, object] = {"key": None, "pipeline": None, "transp_vae": None}
+RELEASE_BBOX_AFTER_CAPTION = os.environ.get("SYNLAYERS_RELEASE_BBOX_AFTER_CAPTION", "0") == "1"
 def slugify(text: str) -> str:
     bbox_jsonl = write_bbox_jsonl(record, run_dir / "caption_bbox_infer.jsonl")
     bbox_vis_path = run_dir / "bbox_vis" / f"{normalized_sample_name}_vis.png"
     draw_boxes(prepared_image_path, bboxes, bbox_vis_path)
+    if RELEASE_BBOX_AFTER_CAPTION:
+        release_bbox_bundle()
     config = build_runtime_config(
         config_path=config_path,