#!/usr/bin/env bash set -euo pipefail MODEL_NAME="${MODEL_NAME:-NM-dev/NuExtract3.4_4B-RL-400}" VLLM_PORT="${VLLM_PORT:-8000}" GRADIO_PORT="${GRADIO_SERVER_PORT:-7860}" echo "Starting vLLM with model: ${MODEL_NAME}" python -m vllm.entrypoints.openai.api_server \ --model "${MODEL_NAME}" \ --served-model-name "${MODEL_NAME}" \ --host 127.0.0.1 \ --port "${VLLM_PORT}" \ --trust-remote-code \ --dtype auto \ --max-model-len "${MAX_MODEL_LEN:-8192}" \ --gpu-memory-utilization "${GPU_MEMORY_UTILIZATION:-0.90}" \ --limit-mm-per-prompt image=1 \ --api-key "${OPENAI_API_KEY:-EMPTY}" & VLLM_PID=$! echo "Waiting for vLLM to become ready..." until curl -sf "http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; do if ! kill -0 "${VLLM_PID}" 2>/dev/null; then echo "vLLM exited before becoming ready." exit 1 fi sleep 2 done echo "vLLM is ready. Starting Gradio..." python /home/user/app/app.py \ --model-name "${MODEL_NAME}" \ --api-base "http://127.0.0.1:${VLLM_PORT}/v1" \ --api-key "${OPENAI_API_KEY:-EMPTY}" \ --server-name "0.0.0.0" \ --server-port "${GRADIO_PORT}"