Spaces:
Runtime error
Runtime error
| version: "3.9" | |
| services: | |
| inference: | |
| image: nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 | |
| environment: | |
| - NVIDIA_VISIBLE_DEVICES=all | |
| - NVIDIA_DRIVER_CAPABILITIES=compute,utility | |
| - CUDA_VISIBLE_DEVICES=0 | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: 1 | |
| capabilities: [gpu] | |
| volumes: | |
| - .:/workspace | |
| working_dir: /workspace | |
| command: python infer.py | |
| vllm_server: | |
| image: nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 | |
| environment: | |
| - NVIDIA_VISIBLE_DEVICES=0,1 | |
| - CUDA_VISIBLE_DEVICES=0,1 | |
| ports: | |
| - "8000:8000" | |
| command: > | |
| bash -c "nvidia-smi && pip install vllm && | |
| vllm serve Qwen/Qwen2.5-0.5B-Instruct --tensor-parallel-size 2" | |