# AMD Developer Cloud vLLM server endpoint (OpenAI-compatible)
VLLM_API_URL=http://your-amd-vm-ip:8000

# Model served by vLLM (must match what the server has loaded)
MODEL_NAME=Qwen/Qwen2.5-VL-7B-Instruct

# API key for your vLLM server (set on the server via --api-key)
VLLM_API_KEY=your-secret-key

# Set to true to skip vLLM calls and return realistic mock responses
MOCK_MODE=false

# Generation parameters
MAX_NEW_TOKENS=512
TEMPERATURE=0.2