File size: 1,442 Bytes
6d9c72b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf0a8ed
6d9c72b
 
 
cf0a8ed
6d9c72b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf0a8ed
6d9c72b
 
 
 
 
cf0a8ed
6d9c72b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
services:
  vllm:
    image: ollama/rocm:latest
    container_name: contextforge-vllm
    ports:
      - "8000:8000"
    environment:
      - VLLM_API_KEY=${VLLM_API_KEY:-contextforge-local}
    command: >
      vllm serve Qwen/Qwen3.6-35B-A3B
      --enable-prefix-caching
      --enable-chunked-prefill
      --tensor-parallel-size 1
      --reasoning-parser qwen3
      --trust-remote-code
      --host 0.0.0.0
      --port 8000
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
    deploy:
      resources:
        reservations:
          devices:
            - driver: amd
              count: 1
              capabilities: [gpu]

  apohara:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: apohara
    ports:
      - "8001:8001"
    environment:
      - VLLM_BASE_URL=http://vllm:8000
      - VLLM_MODEL=Qwen/Qwen3.6-35B-A3B
      - CONTEXTFORGE_PORT=8001
    depends_on:
      vllm:
        condition: service_healthy
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
      interval: 30s
      timeout: 10s
      retries: 3

  gradio:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: apohara-ui
    ports:
      - "7860:7860"
    environment:
      - CONTEXTFORGE_PORT=8001
    depends_on:
      - apohara
    command: python demo/app.py

volumes:
  models: