# AMD Developer Cloud vLLM server endpoint (OpenAI-compatible) VLLM_API_URL=http://your-amd-vm-ip:8000 # Model served by vLLM (must match what the server has loaded) MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct # API key for your vLLM server (set on the server via --api-key) VLLM_API_KEY=your-secret-key # Set to true to skip vLLM calls and return realistic mock responses MOCK_MODE=false # Generation parameters MAX_NEW_TOKENS=512 TEMPERATURE=0.2