raazkumar commited on
Commit
a82b7c7
·
verified ·
1 Parent(s): ff7cea4

Upload production/k8s/deployment-api.yml

Browse files
Files changed (1) hide show
  1. production/k8s/deployment-api.yml +170 -0
production/k8s/deployment-api.yml ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: apps/v1
2
+ kind: Deployment
3
+ metadata:
4
+ name: ml-intern-api
5
+ namespace: ml-intern
6
+ labels:
7
+ app: ml-intern-api
8
+ version: v1
9
+ spec:
10
+ replicas: 2
11
+ strategy:
12
+ type: RollingUpdate
13
+ rollingUpdate:
14
+ maxSurge: 1
15
+ maxUnavailable: 0
16
+ selector:
17
+ matchLabels:
18
+ app: ml-intern-api
19
+ template:
20
+ metadata:
21
+ labels:
22
+ app: ml-intern-api
23
+ version: v1
24
+ annotations:
25
+ prometheus.io/scrape: "true"
26
+ prometheus.io/port: "8000"
27
+ prometheus.io/path: "/metrics"
28
+ spec:
29
+ affinity:
30
+ podAntiAffinity:
31
+ preferredDuringSchedulingIgnoredDuringExecution:
32
+ - weight: 100
33
+ podAffinityTerm:
34
+ labelSelector:
35
+ matchExpressions:
36
+ - key: app
37
+ operator: In
38
+ values:
39
+ - ml-intern-api
40
+ topologyKey: kubernetes.io/hostname
41
+ containers:
42
+ - name: api
43
+ image: ml-intern:latest
44
+ imagePullPolicy: Always
45
+ ports:
46
+ - containerPort: 8000
47
+ name: http
48
+ envFrom:
49
+ - configMapRef:
50
+ name: ml-intern-config
51
+ env:
52
+ - name: DATABASE_URL
53
+ valueFrom:
54
+ secretKeyRef:
55
+ name: ml-intern-secrets
56
+ key: DATABASE_URL
57
+ - name: REDIS_URL
58
+ valueFrom:
59
+ secretKeyRef:
60
+ name: ml-intern-secrets
61
+ key: REDIS_URL
62
+ - name: HF_TOKEN
63
+ valueFrom:
64
+ secretKeyRef:
65
+ name: ml-intern-secrets
66
+ key: HF_TOKEN
67
+ - name: ANTHROPIC_API_KEY
68
+ valueFrom:
69
+ secretKeyRef:
70
+ name: ml-intern-secrets
71
+ key: ANTHROPIC_API_KEY
72
+ - name: OPENAI_API_KEY
73
+ valueFrom:
74
+ secretKeyRef:
75
+ name: ml-intern-secrets
76
+ key: OPENAI_API_KEY
77
+ - name: GROQ_API_KEY
78
+ valueFrom:
79
+ secretKeyRef:
80
+ name: ml-intern-secrets
81
+ key: GROQ_API_KEY
82
+ - name: NVIDIA_API_KEY
83
+ valueFrom:
84
+ secretKeyRef:
85
+ name: ml-intern-secrets
86
+ key: NVIDIA_API_KEY
87
+ resources:
88
+ requests:
89
+ memory: "512Mi"
90
+ cpu: "250m"
91
+ limits:
92
+ memory: "2Gi"
93
+ cpu: "2000m"
94
+ livenessProbe:
95
+ httpGet:
96
+ path: /health
97
+ port: 8000
98
+ initialDelaySeconds: 30
99
+ periodSeconds: 10
100
+ timeoutSeconds: 5
101
+ failureThreshold: 3
102
+ readinessProbe:
103
+ httpGet:
104
+ path: /health
105
+ port: 8000
106
+ initialDelaySeconds: 5
107
+ periodSeconds: 5
108
+ timeoutSeconds: 3
109
+ failureThreshold: 3
110
+ lifecycle:
111
+ preStop:
112
+ exec:
113
+ command: ["/bin/sh", "-c", "sleep 15"]
114
+ terminationGracePeriodSeconds: 60
115
+ ---
116
+ apiVersion: v1
117
+ kind: Service
118
+ metadata:
119
+ name: ml-intern-api
120
+ namespace: ml-intern
121
+ labels:
122
+ app: ml-intern-api
123
+ spec:
124
+ type: ClusterIP
125
+ ports:
126
+ - port: 8000
127
+ targetPort: 8000
128
+ protocol: TCP
129
+ name: http
130
+ selector:
131
+ app: ml-intern-api
132
+ ---
133
+ apiVersion: autoscaling/v2
134
+ kind: HorizontalPodAutoscaler
135
+ metadata:
136
+ name: ml-intern-api-hpa
137
+ namespace: ml-intern
138
+ spec:
139
+ scaleTargetRef:
140
+ apiVersion: apps/v1
141
+ kind: Deployment
142
+ name: ml-intern-api
143
+ minReplicas: 2
144
+ maxReplicas: 20
145
+ metrics:
146
+ - type: Resource
147
+ resource:
148
+ name: cpu
149
+ target:
150
+ type: Utilization
151
+ averageUtilization: 70
152
+ - type: Resource
153
+ resource:
154
+ name: memory
155
+ target:
156
+ type: Utilization
157
+ averageUtilization: 80
158
+ behavior:
159
+ scaleUp:
160
+ stabilizationWindowSeconds: 60
161
+ policies:
162
+ - type: Percent
163
+ value: 100
164
+ periodSeconds: 60
165
+ scaleDown:
166
+ stabilizationWindowSeconds: 300
167
+ policies:
168
+ - type: Percent
169
+ value: 50
170
+ periodSeconds: 120