Spaces:

numind
/

NuExtract3

Running on A100

App Files Files Community

Alexandre commited on 25 days ago

Commit

8595613

1 Parent(s): 6c9dd13

init

Browse files

Files changed (5) hide show

DockerFile +45 -0
README.md +7 -7
app.py +1677 -0
assets/logo_numind_picto.svg +31 -0
start.sh +40 -0

DockerFile ADDED Viewed

	@@ -0,0 +1,45 @@

+FROM vllm/vllm-openai:latest
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV MODEL_NAME=NM-dev/NuExtract3.4_4B-RL-400
+ENV OPENAI_API_BASE=http://127.0.0.1:8000/v1
+ENV OPENAI_API_KEY=EMPTY
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+ENV VLLM_PORT=8000
+ENV MAX_MODEL_LEN=8192
+ENV GPU_MEMORY_UTILIZATION=0.90
+ENV NUEXTRACT_MAX_TOKENS=5000
+ENV NUEXTRACT_EXAMPLE_DIR=/home/user/app/examples
+ENV NUEXTRACT_ASSETS_DIR=/home/user/app/assets
+ENV HF_HOME=/data/.cache/huggingface
+ENV TRANSFORMERS_CACHE=/data/.cache/huggingface
+ENV VLLM_CACHE_ROOT=/data/.cache/vllm
+RUN pip install --no-cache-dir \
+    gradio \
+    openai \
+    pillow
+RUN useradd -m -u 1000 user || true
+WORKDIR /home/user/app
+COPY --chown=user:user app.py /home/user/app/app.py
+COPY --chown=user:user start.sh /home/user/app/start.sh
+COPY --chown=user:user examples /home/user/app/examples
+COPY --chown=user:user assets /home/user/app/assets
+# Your app.py hardcodes /home/azureuser/assets, so mirror assets there
+RUN mkdir -p /home/azureuser/assets && \
+    cp -r /home/user/app/assets/* /home/azureuser/assets/ && \
+    chown -R user:user /home/user /home/azureuser || true && \
+    chmod +x /home/user/app/start.sh
+USER user
+EXPOSE 7860
+CMD ["/home/user/app/start.sh"]

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: NuExtract3 4B
-emoji: 📚
-colorFrom: indigo
-colorTo: pink
 sdk: docker
 pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: NuExtract 3
+emoji: 📄
+colorFrom: blue
+colorTo: orange
 sdk: docker
+app_port: 7860
 pinned: false
+license: mit
+---

app.py ADDED Viewed

	@@ -0,0 +1,1677 @@

+import argparse
+import base64
+import io
+import json
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+import gradio as gr
+from openai import OpenAI
+from PIL import Image
+# ---------------- Paths ----------------
+APP_DIR = Path(__file__).resolve().parent
+# ---------------- CLI / environment configuration ----------------
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="NuExtract Gradio demo")
+    parser.add_argument(
+        "--model-name",
+        default=os.environ.get("MODEL_NAME", "NM-dev/NuExtract3.4_4B-RL-400"),
+        help="Model name served by the OpenAI-compatible endpoint.",
+    )
+    parser.add_argument(
+        "--api-base",
+        default=os.environ.get("OPENAI_API_BASE", "http://127.0.0.1:8000/v1"),
+        help="OpenAI-compatible base URL.",
+    )
+    parser.add_argument(
+        "--api-key",
+        default=os.environ.get("OPENAI_API_KEY", "EMPTY"),
+        help="API key for the OpenAI-compatible endpoint.",
+    )
+    parser.add_argument(
+        "--server-name",
+        default=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
+        help="Gradio server host.",
+    )
+    parser.add_argument(
+        "--server-port",
+        type=int,
+        default=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
+        help="Gradio server port.",
+    )
+    parser.add_argument(
+        "--share",
+        action="store_true",
+        default=os.environ.get("GRADIO_SHARE", "false").lower() in {"1", "true", "yes"},
+        help="Create a public Gradio share link.",
+    )
+    parser.add_argument(
+        "--max-tokens",
+        type=int,
+        default=int(os.environ.get("NUEXTRACT_MAX_TOKENS", "5000")),
+        help="Maximum tokens for model generation. Hidden from the UI.",
+    )
+    parser.add_argument(
+        "--example-dir",
+        default=os.environ.get("NUEXTRACT_EXAMPLE_DIR", str(APP_DIR / "examples")),
+        help="Directory containing image examples.",
+    )
+    parser.add_argument(
+        "--assets-dir",
+        default=os.environ.get("NUEXTRACT_ASSETS_DIR", str(APP_DIR / "assets")),
+        help="Directory containing static assets such as the NuExtract logo.",
+    )
+    args, _ = parser.parse_known_args()
+    return args
+def resolve_dir(path_like: str) -> Path:
+    path = Path(path_like).expanduser()
+    if path.is_absolute():
+        return path.resolve()
+    return (APP_DIR / path).resolve()
+ARGS = parse_args()
+DEFAULT_MODEL = ARGS.model_name
+DEFAULT_API_BASE = ARGS.api_base
+DEFAULT_API_KEY = ARGS.api_key
+DEFAULT_MAX_TOKENS = ARGS.max_tokens
+EXAMPLE_DIR = resolve_dir(ARGS.example_dir)
+ASSETS_DIR = resolve_dir(ARGS.assets_dir)
+LOGO_PATH = ASSETS_DIR / "logo_numind_picto.svg"
+if LOGO_PATH.exists():
+    LOGO_URL = f"/gradio_api/file={LOGO_PATH}"
+    gr.set_static_paths(paths=[ASSETS_DIR])
+else:
+    print(f"[assets] Missing logo: {LOGO_PATH}", flush=True)
+    LOGO_URL = ""
+SYSTEM_PROMPT_DEFAULT = (
+    "You are a precise information extraction assistant. "
+    "Return faithful, source-grounded results only."
+)
+# ---------------- Structured extraction examples ----------------
+# These examples populate: Image + Template + Instructions.
+STRUCTURED_EXAMPLE_TEMPLATES: Dict[str, Dict[str, Any]] = {
+    "1.jpg": {
+        "game_name": "verbatim-string",
+        "game_company_issuer_name": "string",
+        "currency_code_iso4217": "string",
+        "game_price": "number",
+        "game_maximum_possible_gain": "number",
+        "matched_winning_numbers": ["integer"],
+        "matched_winning_symbols": ["string"],
+        "gain": "number",
+    },
+    "2.png": {
+        "number_of_bathrooms": "integer",
+        "number_of_toilets": "integer",
+        "number_of_fireplaces": "integer",
+        "number_of_closets": "integer",
+        "distance_unit": ["meter", "foot"],
+        "rooms_that_are_not_bedrooms_or_corridors_or_toilets": [
+            {
+                "room_name": "verbatim-string",
+                "surface_area": "number",
+                "number_of_windows": "integer",
+                "number_of_doors": "integer",
+            }
+        ],
+        "bedrooms": [
+            {
+                "bedroom_name": "verbatim-string",
+                "surface_area": "number",
+                "number_of_windows": "integer",
+                "has_closet": "boolean",
+                "has_private_bathroom": "boolean",
+            }
+        ],
+        "has_laundry_room": "boolean",
+        "has_terrace": "boolean",
+        "has_balcony": "boolean",
+        "number_of_parking_spaces_in_garage": "integer",
+        "number_of_parking_spaces_exterior": "integer",
+    },
+}
+STRUCTURED_EXAMPLE_INSTRUCTIONS: Dict[str, str] = {
+    "1.jpg": "",
+    "2.png": "",
+}
+# ---------------- Markdown/OCR examples ----------------
+# Put Markdown example image paths here.
+# These examples populate only the Image input and are meant for the
+# “Convert to Markdown” button.
+MARKDOWN_EXAMPLE_IMAGE_PATHS: List[str] = [
+    "3.jpg",
+    "4.jpg",
+    "5.jpg",
+    "6.png",
+    "7.jpg",
+]
+def resolve_example_path(path_like: str) -> Path:
+    path = Path(path_like).expanduser()
+    if path.is_absolute():
+        return path.resolve()
+    return (EXAMPLE_DIR / path).resolve()
+def build_structured_examples() -> List[List[Any]]:
+    examples: List[List[Any]] = []
+    for filename, template_obj in STRUCTURED_EXAMPLE_TEMPLATES.items():
+        image_path = resolve_example_path(filename)
+        if not image_path.exists():
+            print(f"[structured examples] Missing image: {image_path}", flush=True)
+            continue
+        examples.append(
+            [
+                str(image_path),
+                json.dumps(template_obj, indent=4, ensure_ascii=False),
+                STRUCTURED_EXAMPLE_INSTRUCTIONS.get(filename, ""),
+            ]
+        )
+    return examples
+def build_markdown_examples() -> List[List[Any]]:
+    examples: List[List[Any]] = []
+    for path_like in MARKDOWN_EXAMPLE_IMAGE_PATHS:
+        image_path = resolve_example_path(path_like)
+        if not image_path.exists():
+            print(f"[markdown examples] Missing image: {image_path}", flush=True)
+            continue
+        examples.append([str(image_path)])
+    return examples
+STRUCTURED_EXAMPLES = build_structured_examples()
+MARKDOWN_EXAMPLES = build_markdown_examples()
+# ---------------- Utility helpers ----------------
+def image_bytes_to_base64(b: bytes) -> str:
+    return base64.b64encode(b).decode("utf-8")
+def ensure_rgb_image(image_bytes: bytes) -> Image.Image:
+    img = Image.open(io.BytesIO(image_bytes))
+    if img.mode != "RGB":
+        img = img.convert("RGB")
+    return img
+def file_path_to_bytes(path: str) -> bytes:
+    with open(path, "rb") as f:
+        return f.read()
+# ---------------- Response parsing ----------------
+def strip_code_fence(payload: str) -> str:
+    return re.sub(
+        r"^```(?:json|markdown|text)?\s*|\s*```$",
+        "",
+        payload.strip(),
+        flags=re.IGNORECASE | re.MULTILINE,
+    ).strip()
+def pretty_json_or_text(payload: str) -> str:
+    if not payload:
+        return ""
+    cleaned = strip_code_fence(payload)
+    try:
+        return json.dumps(json.loads(cleaned), indent=4, ensure_ascii=False)
+    except Exception:
+        return cleaned
+def extract_answer_block(text: str) -> str:
+    if not text:
+        return ""
+    try:
+        match = re.search(
+            r"<answer>\s*(.*?)\s*</answer>",
+            text,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+        if match:
+            return pretty_json_or_text(match.group(1).strip())
+    except Exception:
+        pass
+    json_objects = list(re.finditer(r"\{[\s\S]*\}", text))
+    if json_objects:
+        candidate = max(json_objects, key=lambda match: len(match.group(0))).group(0)
+        return pretty_json_or_text(candidate)
+    return text.strip()
+def split_reasoning_and_output(text: str, reasoning_enabled: bool) -> Tuple[str, str]:
+    if not text:
+        return "", ""
+    if not reasoning_enabled:
+        return "", text.strip()
+    lower = text.lower()
+    end_tag = "</think>"
+    if end_tag in lower:
+        end_idx = lower.find(end_tag)
+        reasoning = text[:end_idx].strip()
+        output = text[end_idx + len(end_tag):].strip()
+        return reasoning, output
+    return text.strip(), ""
+# ---------------- Message building ----------------
+def make_text_content(text: str) -> List[Dict[str, Any]]:
+    return [{"type": "text", "text": text or ""}]
+def make_image_content(
+    image_bytes: bytes,
+    extra_text: Optional[str] = None,
+) -> List[Dict[str, Any]]:
+    img = ensure_rgb_image(image_bytes)
+    buffer = io.BytesIO()
+    img.save(buffer, format="JPEG", quality=95)
+    img_b64 = image_bytes_to_base64(buffer.getvalue())
+    content: List[Dict[str, Any]] = [
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:image/jpeg;base64,{img_b64}",
+                "detail": "high",
+            },
+        }
+    ]
+    if extra_text and extra_text.strip():
+        content.append({"type": "text", "text": extra_text.strip()})
+    return content
+def normalize_template(template: str) -> str:
+    tpl = (template or "").strip()
+    if not tpl:
+        return "{}"
+    try:
+        return json.dumps(json.loads(tpl), indent=4, ensure_ascii=False)
+    except Exception:
+        return tpl
+def collate_single_input(
+    *,
+    text_or_image: Any,
+    template: str,
+    system_prompt: Optional[str],
+    instruction: Optional[str],
+) -> Tuple[List[Dict[str, Any]], str]:
+    is_image_input = isinstance(text_or_image, dict) and "bytes" in text_or_image
+    messages: List[Dict[str, Any]] = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    template_json = normalize_template(template)
+    extra_parts = []
+    if instruction and instruction.strip():
+        extra_parts.append(f"Instructions:\n{instruction.strip()}")
+    if template_json and template_json.strip() not in {"{}", ""}:
+        extra_parts.append(f"Extraction template:\n```json\n{template_json}\n```")
+    extra_text_for_user = "\n\n".join(extra_parts) if extra_parts else None
+    if is_image_input:
+        messages.append(
+            {
+                "role": "user",
+                "content": make_image_content(
+                    image_bytes=text_or_image["bytes"],
+                    extra_text=extra_text_for_user,
+                ),
+            }
+        )
+    else:
+        text = str(text_or_image or "")
+        if extra_text_for_user:
+            text = f"{text}\n\n{extra_text_for_user}".strip()
+        messages.append({"role": "user", "content": make_text_content(text)})
+    return messages, template_json
+def collate_for_template_generation(
+    *,
+    context_text: str,
+    context_image_path: Optional[str],
+    system_prompt: Optional[str],
+) -> List[Dict[str, Any]]:
+    messages: List[Dict[str, Any]] = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    guidance = (
+        "Generate a concise JSON extraction template for this document. "
+        "Use descriptive field names and simple type hints like string, number, YYYY-MM-DD, "
+        "boolean, or arrays of objects. Return only the JSON template."
+    )
+    if context_image_path:
+        messages.append(
+            {
+                "role": "user",
+                "content": make_image_content(
+                    image_bytes=file_path_to_bytes(context_image_path),
+                    extra_text=guidance,
+                ),
+            }
+        )
+    else:
+        text = (context_text or "").strip()
+        messages.append(
+            {
+                "role": "user",
+                "content": make_text_content(f"{text}\n\n{guidance}".strip()),
+            }
+        )
+    return messages
+def collate_markdown_image_only(
+    *,
+    image_bytes: bytes,
+    system_prompt: Optional[str],
+) -> List[Dict[str, Any]]:
+    messages: List[Dict[str, Any]] = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append(
+        {
+            "role": "user",
+            "content": make_image_content(image_bytes=image_bytes),
+        }
+    )
+    return messages
+# ---------------- Model calls ----------------
+def chunk_to_text(chunk: Any) -> str:
+    try:
+        if not chunk or not getattr(chunk, "choices", None):
+            return ""
+        delta = getattr(chunk.choices[0], "delta", None)
+        if delta is None:
+            return ""
+        content = getattr(delta, "content", None)
+        if isinstance(content, str):
+            return content
+        if isinstance(content, list):
+            parts: List[str] = []
+            for item in content:
+                if isinstance(item, dict) and item.get("text"):
+                    parts.append(item["text"])
+                elif getattr(item, "text", None):
+                    parts.append(item.text)
+            return "".join(parts)
+    except Exception:
+        return ""
+    return ""
+def build_chat_template_kwargs(
+    *,
+    template_json: str,
+    reasoning: bool,
+    instruction: Optional[str],
+    markdown_mode: bool,
+) -> Dict[str, Any]:
+    if markdown_mode:
+        return {
+            "mode": "markdown",
+            "enable_thinking": bool(reasoning),
+        }
+    use_structured = bool(
+        template_json
+        and template_json.strip()
+        and template_json.strip() != "{}"
+    )
+    chat_kwargs: Dict[str, Any] = {
+        "mode": "structured" if use_structured else "content",
+        "enable_thinking": bool(reasoning),
+    }
+    if use_structured:
+        chat_kwargs["template"] = template_json
+    if instruction and instruction.strip():
+        chat_kwargs["instructions"] = instruction.strip()
+    return chat_kwargs
+def call_model_stream(
+    *,
+    api_base: str,
+    api_key: str,
+    model_name: str,
+    messages: List[Dict[str, Any]],
+    template_json: str,
+    temperature: float,
+    max_tokens: int,
+    reasoning: bool,
+    instruction: Optional[str],
+    markdown_mode: bool,
+) -> Iterator[str]:
+    client = OpenAI(base_url=api_base, api_key=api_key)
+    chat_kwargs = build_chat_template_kwargs(
+        template_json=template_json,
+        reasoning=reasoning,
+        instruction=instruction,
+        markdown_mode=markdown_mode,
+    )
+    stream = client.chat.completions.create(
+        model=model_name,
+        temperature=float(temperature),
+        max_tokens=int(max_tokens),
+        messages=messages,
+        stream=True,
+        extra_body={"chat_template_kwargs": chat_kwargs},
+    )
+    accumulated = ""
+    for chunk in stream:
+        delta_text = chunk_to_text(chunk)
+        if delta_text:
+            accumulated += delta_text
+            yield accumulated
+def call_model_once(
+    *,
+    api_base: str,
+    api_key: str,
+    model_name: str,
+    messages: List[Dict[str, Any]],
+    mode: str,
+    temperature: float,
+    max_tokens: int,
+) -> str:
+    client = OpenAI(base_url=api_base, api_key=api_key)
+    chat = client.chat.completions.create(
+        model=model_name,
+        temperature=float(temperature),
+        max_tokens=int(max_tokens),
+        messages=messages,
+        extra_body={
+            "chat_template_kwargs": {
+                "mode": mode,
+                "enable_thinking": False,
+            }
+        },
+    )
+    return chat.choices[0].message.content if chat.choices else ""
+# ---------------- Inference orchestration ----------------
+def prepare_input(context_text: str, context_image_path: Optional[str]) -> Any:
+    if context_image_path:
+        return {"bytes": file_path_to_bytes(context_image_path)}
+    return context_text or ""
+def infer_stream(
+    *,
+    api_key: str,
+    api_base: str,
+    system_prompt: str,
+    template: str,
+    instruction: str,
+    context_text: str,
+    context_image_path: Optional[str],
+    temperature: float,
+    reasoning: bool,
+    markdown_mode: bool,
+):
+    single_input = prepare_input(context_text, context_image_path)
+    is_image = isinstance(single_input, dict) and "bytes" in single_input
+    if markdown_mode:
+        if not is_image:
+            raise ValueError("Markdown conversion requires an image input.")
+        messages = collate_markdown_image_only(
+            image_bytes=single_input["bytes"],
+            system_prompt=system_prompt,
+        )
+        template_json = ""
+    else:
+        messages, template_json = collate_single_input(
+            text_or_image=single_input,
+            template=template,
+            system_prompt=system_prompt,
+            instruction=instruction,
+        )
+    for partial_text in call_model_stream(
+        api_base=api_base,
+        api_key=api_key,
+        model_name=DEFAULT_MODEL,
+        messages=messages,
+        template_json=template_json,
+        temperature=temperature,
+        max_tokens=DEFAULT_MAX_TOKENS,
+        reasoning=reasoning,
+        instruction=instruction,
+        markdown_mode=markdown_mode,
+    ):
+        trace, output_text = split_reasoning_and_output(
+            partial_text,
+            reasoning_enabled=reasoning,
+        )
+        if markdown_mode:
+            output_display = output_text or (
+                "_(Waiting for output after `</think>`.)_"
+                if reasoning
+                else "_(Empty output.)_"
+            )
+            yield {
+                "mode": "markdown",
+                "output": output_display,
+                "think": trace if reasoning else "",
+            }
+            continue
+        if not reasoning:
+            output_text = partial_text or ""
+        answer = extract_answer_block(output_text)
+        output_display = answer or (
+            "_(Waiting for output after `</think>`.)_"
+            if reasoning
+            else "_(No output found yet.)_"
+        )
+        if output_display.strip().startswith("{") or output_display.strip().startswith("["):
+            output_display = pretty_json_or_text(output_display)
+            output_display = f"```json\n{output_display}\n```"
+        else:
+            output_display = output_display.replace("\\n", "\n")
+        yield {
+            "mode": "structured",
+            "output": output_display,
+            "think": trace if reasoning else "",
+        }
+def infer_template_generation(
+    *,
+    api_key: str,
+    api_base: str,
+    system_prompt: str,
+    context_text: str,
+    context_image_path: Optional[str],
+    temperature: float,
+) -> str:
+    messages = collate_for_template_generation(
+        context_text=context_text,
+        context_image_path=context_image_path,
+        system_prompt=system_prompt,
+    )
+    result = call_model_once(
+        api_base=api_base,
+        api_key=api_key,
+        model_name=DEFAULT_MODEL,
+        messages=messages,
+        mode="template-generation",
+        temperature=temperature,
+        max_tokens=DEFAULT_MAX_TOKENS,
+    )
+    return pretty_json_or_text(result)
+# ---------------- UI styling ----------------
+CSS = """
+:root {
+  color-scheme: light;
+  --bg: #f6f2eb;
+  --panel: #ffffff;
+  --panel-rgb: 255, 255, 255;
+  --panel-strong-rgb: 255, 252, 246;
+  --input-rgb: 255, 255, 255;
+  --border-blue: rgba(67, 111, 148, 0.30);
+  --border-blue-soft: rgba(67, 111, 148, 0.18);
+  --border-input: rgba(67, 111, 148, 0.22);
+  --border-orange-soft: rgba(190, 103, 36, 0.26);
+  --text: #23252b;
+  --text-strong: #101318;
+  --text-on-accent: #101318;
+  --muted: #5f6673;
+  --muted-2: #7d8490;
+  --logo-blue: #5d9bcf;
+  --logo-orange: #d6742f;
+  --green: #178f66;
+  --card-alpha: 0.88;
+  --header-alpha: 0.82;
+  --input-alpha: 0.94;
+  --shadow: rgba(54, 46, 35, 0.14);
+  --inset-highlight: rgba(255, 255, 255, 0.85);
+  --logo-opacity: 0.18;
+  --focus-ring: rgba(67, 111, 148, 0.26);
+  --code-bg: #fdfaf5;
+  --dropzone-bg: #fbf8f2;
+}
+html.dark,
+body.dark,
+.dark,
+[data-theme="dark"] {
+  color-scheme: dark;
+  --bg: #242529;
+  --panel: #1d1f26;
+  --panel-rgb: 29, 31, 38;
+  --panel-strong-rgb: 21, 22, 26;
+  --input-rgb: 12, 14, 19;
+  --border-blue: rgba(135, 183, 224, 0.24);
+  --border-blue-soft: rgba(135, 183, 224, 0.16);
+  --border-input: rgba(135, 183, 224, 0.14);
+  --border-orange-soft: rgba(228, 132, 58, 0.22);
+  --text: #eef0f4;
+  --text-strong: #ffffff;
+  --text-on-accent: #101318;
+  --muted: #969baa;
+  --muted-2: #737988;
+  --logo-blue: #87b7e0;
+  --logo-orange: #e4843a;
+  --green: #31c48d;
+  --card-alpha: 0.66;
+  --header-alpha: 0.42;
+  --input-alpha: 0.78;
+  --shadow: rgba(0, 0, 0, 0.28);
+  --inset-highlight: rgba(255, 255, 255, 0.055);
+  --logo-opacity: 0.88;
+  --focus-ring: rgba(135, 183, 224, 0.32);
+  --code-bg: rgba(12, 14, 19, 0.78);
+  --dropzone-bg: rgba(12, 14, 19, 0.78);
+}
+@media (prefers-color-scheme: dark) {
+  :root:not([data-theme="light"]) {
+    color-scheme: dark;
+    --bg: #242529;
+    --panel: #1d1f26;
+    --panel-rgb: 29, 31, 38;
+    --panel-strong-rgb: 21, 22, 26;
+    --input-rgb: 12, 14, 19;
+    --border-blue: rgba(135, 183, 224, 0.24);
+    --border-blue-soft: rgba(135, 183, 224, 0.16);
+    --border-input: rgba(135, 183, 224, 0.14);
+    --border-orange-soft: rgba(228, 132, 58, 0.22);
+    --text: #eef0f4;
+    --text-strong: #ffffff;
+    --text-on-accent: #101318;
+    --muted: #969baa;
+    --muted-2: #737988;
+    --logo-blue: #87b7e0;
+    --logo-orange: #e4843a;
+    --green: #31c48d;
+    --card-alpha: 0.66;
+    --header-alpha: 0.42;
+    --input-alpha: 0.78;
+    --shadow: rgba(0, 0, 0, 0.28);
+    --inset-highlight: rgba(255, 255, 255, 0.055);
+    --logo-opacity: 0.88;
+    --focus-ring: rgba(135, 183, 224, 0.32);
+    --code-bg: rgba(12, 14, 19, 0.78);
+    --dropzone-bg: rgba(12, 14, 19, 0.78);
+  }
+}
+html,
+body,
+footer,
+.gradio-container {
+  color: var(--text) !important;
+}
+body {
+  background: var(--bg) !important;
+  background-attachment: fixed !important;
+}
+footer {
+  background: transparent !important;
+}
+.gradio-container {
+  position: relative !important;
+  isolation: isolate !important;
+  max-width: 1680px !important;
+  padding: 10px 18px 18px 18px !important;
+  background: transparent !important;
+}
+.gradio-container::before {
+  content: "";
+  position: fixed;
+  inset: 0;
+  z-index: -2;
+  pointer-events: none;
+  background-image: url("__LOGO_URL__");
+  background-repeat: no-repeat;
+  background-size: min(86vw, 980px) min(86vw, 980px);
+  background-position: calc(100% + 230px) 34px;
+  opacity: var(--logo-opacity);
+  filter: saturate(1.2) drop-shadow(0 0 28px rgba(135, 183, 224, 0.14));
+}
+.with-gap,
+.gradio-row {
+  gap: 18px !important;
+}
+.app-header {
+  position: relative;
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 16px;
+  padding: 10px 12px 14px 12px;
+  margin-bottom: 10px;
+  border-bottom: 1px solid var(--border-blue-soft);
+  background: rgba(var(--panel-strong-rgb), var(--header-alpha));
+  border-radius: 14px;
+  backdrop-filter: blur(8px);
+  box-shadow: 0 12px 42px var(--shadow), inset 0 1px 0 var(--inset-highlight);
+}
+.brand {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+}
+.brand-mark {
+  width: 28px;
+  height: 28px;
+  flex: 0 0 auto;
+  object-fit: contain;
+}
+.brand-title {
+  display: flex;
+  align-items: baseline;
+  gap: 8px;
+}
+.brand-name {
+  font-size: 23px;
+  line-height: 1;
+  font-weight: 750;
+  letter-spacing: -0.045em;
+  color: var(--text-strong) !important;
+}
+.brand-name span {
+  color: var(--muted) !important;
+}
+.model-chip {
+  display: inline-flex;
+  align-items: center;
+  max-width: 520px;
+  padding: 5px 9px;
+  border-radius: 999px;
+  background: rgba(var(--panel-rgb), 0.88);
+  border: 1px solid var(--border-blue-soft);
+  color: var(--muted) !important;
+  font-size: 12px;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+.model-chip code {
+  color: var(--text-strong) !important;
+  background: transparent !important;
+}
+.header-actions {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  color: var(--muted) !important;
+  font-size: 13px;
+}
+.status-dot {
+  width: 8px;
+  height: 8px;
+  border-radius: 99px;
+  background: var(--green);
+  box-shadow: 0 0 14px rgba(49, 196, 141, 0.65);
+}
+.intro-card {
+  margin: 0 0 16px 0;
+  padding: 14px 16px;
+  border-radius: 14px;
+  background: rgba(var(--panel-rgb), var(--card-alpha));
+  border: 1px solid var(--border-blue-soft);
+  box-shadow: 0 12px 42px var(--shadow), inset 0 1px 0 var(--inset-highlight);
+  backdrop-filter: blur(8px);
+}
+.intro-card p {
+  margin: 0 0 8px 0;
+  line-height: 1.5;
+}
+.section-title {
+  margin: 0 0 8px 0;
+  color: var(--text-strong) !important;
+  font-size: 13px;
+  font-weight: 750;
+  letter-spacing: 0.01em;
+}
+.main-card,
+.output-card,
+.gradio-group {
+  background: rgba(var(--panel-rgb), var(--card-alpha)) !important;
+  border: 1px solid var(--border-blue) !important;
+  border-radius: 14px !important;
+  box-shadow: 0 22px 70px var(--shadow), inset 0 1px 0 var(--inset-highlight) !important;
+  backdrop-filter: blur(10px) saturate(1.18);
+}
+.output-card {
+  min-height: 820px !important;
+  border-color: var(--border-orange-soft) !important;
+}
+label,
+.markdown,
+.prose,
+h1,
+h2,
+h3,
+h4,
+h5,
+h6,
+p,
+span,
+div {
+  color: var(--text) !important;
+}
+.section-title,
+label > span,
+.gradio-container label {
+  color: var(--text-strong) !important;
+}
+.secondary-note {
+  color: var(--muted) !important;
+  font-size: 12px;
+  line-height: 1.35;
+}
+textarea,
+input[type="text"],
+input[type="password"],
+input[type="number"],
+input[type="email"],
+.cm-editor {
+  background: rgba(var(--input-rgb), var(--input-alpha)) !important;
+  color: var(--text) !important;
+  border-color: var(--border-input) !important;
+}
+textarea::placeholder,
+input::placeholder {
+  color: var(--muted-2) !important;
+}
+textarea:focus,
+input:focus,
+.cm-editor.cm-focused {
+  border-color: var(--logo-blue) !important;
+  box-shadow: 0 0 0 3px var(--focus-ring) !important;
+}
+input[type="checkbox"] {
+  accent-color: var(--logo-blue) !important;
+}
+#schema-box .cm-editor {
+  min-height: 410px !important;
+  max-height: 480px !important;
+  background: var(--code-bg) !important;
+}
+.cm-editor,
+.cm-scroller,
+.cm-content,
+.cm-line,
+.cm-gutters,
+.cm-activeLine,
+.cm-activeLineGutter {
+  background: var(--code-bg) !important;
+  color: var(--text) !important;
+}
+.cm-gutters {
+  border-color: var(--border-blue-soft) !important;
+  color: var(--muted-2) !important;
+}
+.cm-cursor {
+  border-left-color: var(--text-strong) !important;
+}
+#image-box {
+  min-height: 335px !important;
+  background: var(--dropzone-bg) !important;
+  border-color: var(--border-blue-soft) !important;
+}
+#image-box,
+#image-box *,
+.upload-container,
+.upload-container *,
+.file-preview,
+.file-preview * {
+  color: var(--text) !important;
+}
+#image-box button,
+#image-box .icon-wrap,
+#image-box .wrap {
+  background: transparent !important;
+}
+#reasoning-box {
+  min-height: 250px;
+  max-height: 300px;
+  overflow: auto;
+  padding: 8px;
+  border-radius: 8px;
+  background: rgba(var(--input-rgb), var(--input-alpha)) !important;
+  border: 1px solid var(--border-blue-soft);
+  white-space: pre-wrap !important;
+  overflow-wrap: anywhere !important;
+  word-break: break-word !important;
+}
+#output-box {
+  min-height: 430px;
+  max-height: 520px;
+  overflow: auto;
+  padding: 8px;
+  border-radius: 8px;
+  background: rgba(var(--input-rgb), var(--input-alpha)) !important;
+  border: 1px solid var(--border-blue-soft);
+  white-space: pre-wrap !important;
+  overflow-wrap: anywhere !important;
+  word-break: break-word !important;
+}
+#reasoning-box pre,
+#reasoning-box code,
+#output-box pre,
+#output-box code {
+  white-space: pre-wrap !important;
+  overflow-wrap: anywhere !important;
+  word-break: break-word !important;
+  color: var(--text) !important;
+  background: transparent !important;
+}
+button {
+  border-radius: 9px !important;
+  min-height: 34px !important;
+}
+button.primary-button,
+.primary-button button,
+.primary-button {
+  background: var(--logo-blue) !important;
+  background-color: var(--logo-blue) !important;
+  color: var(--text-on-accent) !important;
+  border: none !important;
+  font-weight: 750 !important;
+}
+button.markdown-button,
+.markdown-button button,
+.markdown-button {
+  background: var(--logo-orange) !important;
+  background-color: var(--logo-orange) !important;
+  color: var(--text-on-accent) !important;
+  border: none !important;
+  font-weight: 750 !important;
+}
+.clear-button button,
+button.clear-button,
+.clear-button {
+  background: transparent !important;
+  background-color: transparent !important;
+  color: var(--muted) !important;
+  border: 1px solid var(--border-blue-soft) !important;
+}
+.gradio-container .wrap,
+.gradio-container .block,
+.gradio-container .form,
+.gradio-container .panel,
+.gradio-container .tabs,
+.gradio-container .tabitem {
+  background: transparent !important;
+  color: var(--text) !important;
+}
+.gradio-accordion {
+  border-color: var(--border-blue-soft) !important;
+}
+.gradio-container table,
+.gradio-container th,
+.gradio-container td {
+  color: var(--text) !important;
+}
+.gradio-container label,
+.gradio-container label span,
+.gradio-container .label-wrap,
+.gradio-container .label-wrap span {
+  color: var(--text-strong) !important;
+}
+@media (max-width: 1100px) {
+  .app-header {
+    align-items: flex-start;
+    flex-direction: column;
+  }
+  .brand-title {
+    align-items: flex-start;
+    flex-direction: column;
+  }
+  .model-chip {
+    max-width: 100%;
+  }
+  .output-card {
+    min-height: 520px !important;
+  }
+  #reasoning-box {
+    min-height: 180px;
+  }
+  #output-box {
+    min-height: 320px;
+  }
+}
+""".replace("__LOGO_URL__", LOGO_URL or "")
+# ---------------- Gradio app ----------------
+with gr.Blocks(
+    title="NuExtract",
+    css=CSS,
+    theme=gr.themes.Base(
+        primary_hue="blue",
+        secondary_hue="orange",
+        neutral_hue="slate",
+    ),
+) as demo:
+    logo_html = (
+        f'<img class="brand-mark" src="{LOGO_URL}" alt="NuExtract logo" />'
+        if LOGO_URL
+        else '<div class="brand-mark"></div>'
+    )
+    gr.HTML(
+        f"""
+        <header class="app-header">
+          <div class="brand">
+            {logo_html}
+            <div class="brand-title">
+              <div class="brand-name">Nu<span>Extract</span></div>
+              <div class="model-chip">Model&nbsp;<code>{DEFAULT_MODEL}</code></div>
+            </div>
+          </div>
+          <div class="header-actions">
+            <span class="status-dot"></span>
+            <span>OpenAI-compatible endpoint</span>
+          </div>
+        </header>
+        """
+    )
+    gr.Markdown(
+        """
+        We introduce **NuExtract 3** — a 4B open-source **MIT License** VLM specialized in document extraction.
+        NuExtract 3 unifies structured extraction — document to JSON — and content extraction — document to Markdown,
+        a.k.a. OCR — into one model.
+        NuExtract 3 has been trained via Reinforcement Learning to have extraction-specific reasoning abilities, which can
+        be switched on/off on demand. We find that NuExtract 3 substantially outperforms similar-sized models for both
+        structured extraction and content extraction, making it the new reference model of open-source document extraction.
+        """,
+        elem_classes=["intro-card"],
+    )
+    with gr.Row(equal_height=True):
+        # Left: input, schema, controls
+        with gr.Column(scale=1, min_width=520):
+            with gr.Group(elem_classes="main-card"):
+                gr.HTML("<div class='section-title'>Input</div>")
+                context_image = gr.Image(
+                    label="Image",
+                    type="filepath",
+                    height=340,
+                    sources=["upload", "clipboard"],
+                    elem_id="image-box",
+                )
+                context_text = gr.Textbox(
+                    label="Text",
+                    placeholder="Optional: paste document text.",
+                    lines=3,
+                    max_lines=5,
+                )
+            with gr.Group(elem_classes="main-card"):
+                gr.HTML("<div class='section-title'>Schema & instructions</div>")
+                instruction = gr.Textbox(
+                    label="Instructions",
+                    placeholder="Optional extraction instructions.",
+                    lines=2,
+                    max_lines=3,
+                )
+                with gr.Row(equal_height=True):
+                    template = gr.Code(
+                        label="Template",
+                        language="json",
+                        value=json.dumps(
+                            {
+                                "title": "string",
+                                "entities": ["string"],
+                                "dates": ["YYYY-MM-DD"],
+                                "amounts": [
+                                    {
+                                        "value": "number",
+                                        "currency": "string",
+                                    }
+                                ],
+                            },
+                            indent=4,
+                        ),
+                        lines=16,
+                        scale=5,
+                        elem_id="schema-box",
+                    )
+                    with gr.Column(scale=2, min_width=150):
+                        generate_template_btn = gr.Button(
+                            "Generate template",
+                            variant="secondary",
+                        )
+                        gr.HTML(
+                            "<div class='secondary-note'>"
+                            "Use Extract for JSON. Use Markdown to convert an image document."
+                            "</div>"
+                        )
+            with gr.Group(elem_classes="main-card"):
+                gr.HTML("<div class='section-title'>Run</div>")
+                with gr.Row():
+                    extract_btn = gr.Button(
+                        "Extract JSON",
+                        variant="secondary",
+                        elem_classes=["primary-button"],
+                    )
+                    markdown_btn = gr.Button(
+                        "Convert to Markdown",
+                        variant="secondary",
+                        elem_classes=["markdown-button"],
+                    )
+                with gr.Row():
+                    stop_btn = gr.Button("Stop", variant="stop")
+                    clear_btn = gr.Button(
+                        "Clear results",
+                        variant="secondary",
+                        elem_classes=["clear-button"],
+                    )
+                reasoning_checkbox = gr.Checkbox(
+                    label="Reasoning",
+                    value=True,
+                    interactive=True,
+                    info="If enabled, reasoning is everything before </think>.",
+                )
+                temperature = gr.Slider(
+                    0.0,
+                    1.5,
+                    value=0.2,
+                    step=0.05,
+                    label="Temperature",
+                    info="Lower values are best for extraction.",
+                )
+            with gr.Accordion("Structured examples", open=False):
+                if STRUCTURED_EXAMPLES:
+                    gr.Examples(
+                        examples=STRUCTURED_EXAMPLES,
+                        inputs=[context_image, template, instruction],
+                        label="Load structured example",
+                        examples_per_page=8,
+                        cache_examples=False,
+                    )
+                else:
+                    gr.Markdown(
+                        f"""
+                        No structured examples found.
+                        Add files referenced in `STRUCTURED_EXAMPLE_TEMPLATES`, for example:
+                        ```text
+                        {EXAMPLE_DIR}/1.jpg
+                        {EXAMPLE_DIR}/2.png
+                        ```
+                        """
+                    )
+            with gr.Accordion("Markdown examples", open=False):
+                if MARKDOWN_EXAMPLES:
+                    gr.Examples(
+                        examples=MARKDOWN_EXAMPLES,
+                        inputs=[context_image],
+                        label="Load Markdown example",
+                        examples_per_page=8,
+                        cache_examples=False,
+                    )
+                else:
+                    gr.Markdown(
+                        f"""
+                        No Markdown examples found.
+                        Add image paths to `MARKDOWN_EXAMPLE_IMAGE_PATHS`, for example:
+                        ```python
+                        MARKDOWN_EXAMPLE_IMAGE_PATHS = [
+                            "markdown_1.png",
+                            "markdown_2.jpg",
+                            "/home/user/app/examples/report.png",
+                        ]
+                        ```
+                        Relative paths are resolved from:
+                        ```text
+                        {EXAMPLE_DIR}
+                        ```
+                        """
+                    )
+            # Endpoint settings are intentionally hidden from the UI.
+            api_base = gr.State(DEFAULT_API_BASE)
+            api_key = gr.State(DEFAULT_API_KEY)
+            system_prompt = gr.State(SYSTEM_PROMPT_DEFAULT)
+        # Right: reasoning + output
+        with gr.Column(scale=1, min_width=520):
+            with gr.Group(elem_classes="output-card"):
+                gr.HTML("<div class='section-title'>Reasoning</div>")
+                reasoning_md = gr.Markdown(
+                    label="Reasoning",
+                    elem_id="reasoning-box",
+                )
+                gr.HTML("<div class='section-title' style='margin-top: 12px;'>Output</div>")
+                output_md = gr.Markdown(
+                    label="Output",
+                    elem_id="output-box",
+                )
+                error_box = gr.Markdown(visible=False)
+    def run_model_click(
+        api_key_val,
+        api_base_val,
+        system_prompt_val,
+        instruction_val,
+        template_val,
+        context_text_val,
+        context_image_val,
+        temperature_val,
+        reasoning_val,
+        markdown_mode_val,
+    ):
+        mode_name = "Markdown" if markdown_mode_val else "Extract"
+        print(f"[button] {mode_name} clicked", flush=True)
+        print(f"[button] image={context_image_val}", flush=True)
+        print(f"[button] text_len={len(context_text_val or '')}", flush=True)
+        print(f"[button] reasoning={bool(reasoning_val)}", flush=True)
+        if markdown_mode_val and not context_image_val:
+            msg = "Markdown conversion requires a document image."
+            yield (
+                gr.update(value=""),
+                gr.update(value=""),
+                gr.update(visible=True, value=f"### Error\n{msg}"),
+            )
+            return
+        if not context_image_val and not (context_text_val or "").strip():
+            msg = "Please provide a document image or paste document text."
+            yield (
+                gr.update(value=""),
+                gr.update(value=""),
+                gr.update(visible=True, value=f"### Error\n{msg}"),
+            )
+            return
+        try:
+            yielded_anything = False
+            for res in infer_stream(
+                api_key=api_key_val,
+                api_base=api_base_val,
+                system_prompt=system_prompt_val,
+                template=template_val,
+                instruction=instruction_val,
+                context_text=context_text_val,
+                context_image_path=context_image_val,
+                temperature=temperature_val,
+                reasoning=bool(reasoning_val),
+                markdown_mode=bool(markdown_mode_val),
+            ):
+                yielded_anything = True
+                think = res.get("think") or ""
+                output = res.get("output") or "_(Empty output.)_"
+                yield (
+                    gr.update(value=f"```text\n{think}\n```" if think else ""),
+                    gr.update(value=output),
+                    gr.update(visible=False, value=""),
+                )
+            if not yielded_anything:
+                yield (
+                    gr.update(value=""),
+                    gr.update(value=""),
+                    gr.update(
+                        visible=True,
+                        value="### Error\nThe model returned no streamed output.",
+                    ),
+                )
+        except Exception:
+            import traceback
+            tb = traceback.format_exc()
+            print(tb, flush=True)
+            yield (
+                gr.update(value=""),
+                gr.update(value=""),
+                gr.update(visible=True, value=f"### Error\n```text\n{tb}\n```"),
+            )
+    def on_extract_click(
+        api_key_val,
+        api_base_val,
+        system_prompt_val,
+        instruction_val,
+        template_val,
+        context_text_val,
+        context_image_val,
+        temperature_val,
+        reasoning_val,
+    ):
+        yield from run_model_click(
+            api_key_val,
+            api_base_val,
+            system_prompt_val,
+            instruction_val,
+            template_val,
+            context_text_val,
+            context_image_val,
+            temperature_val,
+            reasoning_val,
+            False,
+        )
+    def on_markdown_click(
+        api_key_val,
+        api_base_val,
+        system_prompt_val,
+        instruction_val,
+        template_val,
+        context_text_val,
+        context_image_val,
+        temperature_val,
+        reasoning_val,
+    ):
+        yield from run_model_click(
+            api_key_val,
+            api_base_val,
+            system_prompt_val,
+            instruction_val,
+            template_val,
+            context_text_val,
+            context_image_val,
+            temperature_val,
+            reasoning_val,
+            True,
+        )
+    def on_click_generate_template(
+        api_key_val,
+        api_base_val,
+        system_prompt_val,
+        context_text_val,
+        context_image_val,
+        temperature_val,
+    ):
+        print("[button] Generate template clicked", flush=True)
+        if not context_image_val and not (context_text_val or "").strip():
+            return (
+                gr.update(),
+                gr.update(
+                    visible=True,
+                    value="### Error\nPlease provide a document image or paste document text.",
+                ),
+            )
+        try:
+            template_text = infer_template_generation(
+                api_key=api_key_val,
+                api_base=api_base_val,
+                system_prompt=system_prompt_val,
+                context_text=context_text_val,
+                context_image_path=context_image_val,
+                temperature=temperature_val,
+            )
+            return gr.update(value=template_text), gr.update(visible=False, value="")
+        except Exception:
+            import traceback
+            tb = traceback.format_exc()
+            print(tb, flush=True)
+            return (
+                gr.update(),
+                gr.update(visible=True, value=f"### Error\n```text\n{tb}\n```"),
+            )
+    def on_clear():
+        return (
+            gr.update(value=""),
+            gr.update(value=""),
+            gr.update(visible=False, value=""),
+        )
+    common_inputs = [
+        api_key,
+        api_base,
+        system_prompt,
+        instruction,
+        template,
+        context_text,
+        context_image,
+        temperature,
+        reasoning_checkbox,
+    ]
+    common_outputs = [
+        reasoning_md,
+        output_md,
+        error_box,
+    ]
+    extract_event = extract_btn.click(
+        fn=on_extract_click,
+        inputs=common_inputs,
+        outputs=common_outputs,
+        show_progress=True,
+    )
+    markdown_event = markdown_btn.click(
+        fn=on_markdown_click,
+        inputs=common_inputs,
+        outputs=common_outputs,
+        show_progress=True,
+    )
+    stop_btn.click(
+        fn=None,
+        inputs=None,
+        outputs=None,
+        cancels=[extract_event, markdown_event],
+    )
+    clear_btn.click(
+        fn=on_clear,
+        inputs=None,
+        outputs=common_outputs,
+    )
+    generate_template_btn.click(
+        fn=on_click_generate_template,
+        inputs=[
+            api_key,
+            api_base,
+            system_prompt,
+            context_text,
+            context_image,
+            temperature,
+        ],
+        outputs=[
+            template,
+            error_box,
+        ],
+        show_progress=True,
+    )
+if __name__ == "__main__":
+    allowed_paths = []
+    if ASSETS_DIR.exists():
+        allowed_paths.append(str(ASSETS_DIR))
+    if EXAMPLE_DIR.exists():
+        allowed_paths.append(str(EXAMPLE_DIR))
+    demo.queue().launch(
+        share=ARGS.share,
+        server_name=ARGS.server_name,
+        server_port=ARGS.server_port,
+        show_error=True,
+        allowed_paths=allowed_paths or None,
+    )

assets/logo_numind_picto.svg ADDED Viewed

start.sh ADDED Viewed

	@@ -0,0 +1,40 @@

+#!/usr/bin/env bash
+set -euo pipefail
+MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}"
+VLLM_PORT="${VLLM_PORT:-8000}"
+GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}"
+echo "Starting vLLM with model: ${MODEL_NAME}"
+python -m vllm.entrypoints.openai.api_server \
+  --model "${MODEL_NAME}" \
+  --served-model-name "${MODEL_NAME}" \
+  --host 127.0.0.1 \
+  --port "${VLLM_PORT}" \
+  --trust-remote-code \
+  --dtype auto \
+  --max-model-len "${MAX_MODEL_LEN:-8192}" \
+  --gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \
+  --limit-mm-per-prompt image=1 \
+  --api-key "${OPENAI_API_KEY:-EMPTY}" &
+VLLM_PID=$!
+echo "Waiting for vLLM to become ready..."
+until curl -sf "http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; do
+  if ! kill -0 "${VLLM_PID}" 2>/dev/null; then
+    echo "vLLM exited before becoming ready."
+    exit 1
+  fi
+  sleep 2
+done
+echo "vLLM is ready. Starting Gradio..."
+python /home/user/app/app.py \
+  --model-name "${MODEL_NAME}" \
+  --api-base "http://127.0.0.1:${VLLM_PORT}/v1" \
+  --api-key "${OPENAI_API_KEY:-EMPTY}" \
+  --server-name "0.0.0.0" \
+  --server-port "${GRADIO_PORT}"