import os # vLLM server on AMD Developer Cloud (OpenAI-compatible endpoint) VLLM_API_URL = os.environ.get("VLLM_API_URL", "http://localhost:8000") MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-VL-7B-Instruct") # Generation settings MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "512")) TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.2"))