| # AMD Developer Cloud vLLM server endpoint (OpenAI-compatible) | |
| VLLM_API_URL=http://your-amd-vm-ip:8000 | |
| # Model served by vLLM (must match what the server has loaded) | |
| MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct | |
| # API key for your vLLM server (set on the server via --api-key) | |
| VLLM_API_KEY=your-secret-key | |
| # Set to true to skip vLLM calls and return realistic mock responses | |
| MOCK_MODE=false | |
| # Generation parameters | |
| MAX_NEW_TOKENS=512 | |
| TEMPERATURE=0.2 | |