Spaces:
Sleeping
Sleeping
| services: | |
| vllm: | |
| image: ollama/rocm:latest | |
| container_name: contextforge-vllm | |
| ports: | |
| - "8000:8000" | |
| environment: | |
| - VLLM_API_KEY=${VLLM_API_KEY:-contextforge-local} | |
| command: > | |
| vllm serve Qwen/Qwen3.6-35B-A3B | |
| --enable-prefix-caching | |
| --enable-chunked-prefill | |
| --tensor-parallel-size 1 | |
| --reasoning-parser qwen3 | |
| --trust-remote-code | |
| --host 0.0.0.0 | |
| --port 8000 | |
| healthcheck: | |
| test: ["CMD", "curl", "-f", "http://localhost:8000/health"] | |
| interval: 30s | |
| timeout: 10s | |
| retries: 3 | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: amd | |
| count: 1 | |
| capabilities: [gpu] | |
| apohara: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile | |
| container_name: apohara | |
| ports: | |
| - "8001:8001" | |
| environment: | |
| - VLLM_BASE_URL=http://vllm:8000 | |
| - VLLM_MODEL=Qwen/Qwen3.6-35B-A3B | |
| - CONTEXTFORGE_PORT=8001 | |
| depends_on: | |
| vllm: | |
| condition: service_healthy | |
| healthcheck: | |
| test: ["CMD", "curl", "-f", "http://localhost:8001/health"] | |
| interval: 30s | |
| timeout: 10s | |
| retries: 3 | |
| gradio: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile | |
| container_name: apohara-ui | |
| ports: | |
| - "7860:7860" | |
| environment: | |
| - CONTEXTFORGE_PORT=8001 | |
| depends_on: | |
| - apohara | |
| command: python demo/app.py | |
| volumes: | |
| models: |