| apiVersion: apps/v1 | |
| kind: Deployment | |
| metadata: | |
| name: ml-intern-api | |
| namespace: ml-intern | |
| labels: | |
| app: ml-intern-api | |
| version: v1 | |
| spec: | |
| replicas: 2 | |
| strategy: | |
| type: RollingUpdate | |
| rollingUpdate: | |
| maxSurge: 1 | |
| maxUnavailable: 0 | |
| selector: | |
| matchLabels: | |
| app: ml-intern-api | |
| template: | |
| metadata: | |
| labels: | |
| app: ml-intern-api | |
| version: v1 | |
| annotations: | |
| prometheus.io/scrape: "true" | |
| prometheus.io/port: "8000" | |
| prometheus.io/path: "/metrics" | |
| spec: | |
| affinity: | |
| podAntiAffinity: | |
| preferredDuringSchedulingIgnoredDuringExecution: | |
| - weight: 100 | |
| podAffinityTerm: | |
| labelSelector: | |
| matchExpressions: | |
| - key: app | |
| operator: In | |
| values: | |
| - ml-intern-api | |
| topologyKey: kubernetes.io/hostname | |
| containers: | |
| - name: api | |
| image: ml-intern:latest | |
| imagePullPolicy: Always | |
| ports: | |
| - containerPort: 8000 | |
| name: http | |
| envFrom: | |
| - configMapRef: | |
| name: ml-intern-config | |
| env: | |
| - name: DATABASE_URL | |
| valueFrom: | |
| secretKeyRef: | |
| name: ml-intern-secrets | |
| key: DATABASE_URL | |
| - name: REDIS_URL | |
| valueFrom: | |
| secretKeyRef: | |
| name: ml-intern-secrets | |
| key: REDIS_URL | |
| - name: HF_TOKEN | |
| valueFrom: | |
| secretKeyRef: | |
| name: ml-intern-secrets | |
| key: HF_TOKEN | |
| - name: ANTHROPIC_API_KEY | |
| valueFrom: | |
| secretKeyRef: | |
| name: ml-intern-secrets | |
| key: ANTHROPIC_API_KEY | |
| - name: OPENAI_API_KEY | |
| valueFrom: | |
| secretKeyRef: | |
| name: ml-intern-secrets | |
| key: OPENAI_API_KEY | |
| - name: GROQ_API_KEY | |
| valueFrom: | |
| secretKeyRef: | |
| name: ml-intern-secrets | |
| key: GROQ_API_KEY | |
| - name: NVIDIA_API_KEY | |
| valueFrom: | |
| secretKeyRef: | |
| name: ml-intern-secrets | |
| key: NVIDIA_API_KEY | |
| resources: | |
| requests: | |
| memory: "512Mi" | |
| cpu: "250m" | |
| limits: | |
| memory: "2Gi" | |
| cpu: "2000m" | |
| livenessProbe: | |
| httpGet: | |
| path: /health | |
| port: 8000 | |
| initialDelaySeconds: 30 | |
| periodSeconds: 10 | |
| timeoutSeconds: 5 | |
| failureThreshold: 3 | |
| readinessProbe: | |
| httpGet: | |
| path: /health | |
| port: 8000 | |
| initialDelaySeconds: 5 | |
| periodSeconds: 5 | |
| timeoutSeconds: 3 | |
| failureThreshold: 3 | |
| lifecycle: | |
| preStop: | |
| exec: | |
| command: ["/bin/sh", "-c", "sleep 15"] | |
| terminationGracePeriodSeconds: 60 | |
| apiVersion: v1 | |
| kind: Service | |
| metadata: | |
| name: ml-intern-api | |
| namespace: ml-intern | |
| labels: | |
| app: ml-intern-api | |
| spec: | |
| type: ClusterIP | |
| ports: | |
| - port: 8000 | |
| targetPort: 8000 | |
| protocol: TCP | |
| name: http | |
| selector: | |
| app: ml-intern-api | |
| apiVersion: autoscaling/v2 | |
| kind: HorizontalPodAutoscaler | |
| metadata: | |
| name: ml-intern-api-hpa | |
| namespace: ml-intern | |
| spec: | |
| scaleTargetRef: | |
| apiVersion: apps/v1 | |
| kind: Deployment | |
| name: ml-intern-api | |
| minReplicas: 2 | |
| maxReplicas: 20 | |
| metrics: | |
| - type: Resource | |
| resource: | |
| name: cpu | |
| target: | |
| type: Utilization | |
| averageUtilization: 70 | |
| - type: Resource | |
| resource: | |
| name: memory | |
| target: | |
| type: Utilization | |
| averageUtilization: 80 | |
| behavior: | |
| scaleUp: | |
| stabilizationWindowSeconds: 60 | |
| policies: | |
| - type: Percent | |
| value: 100 | |
| periodSeconds: 60 | |
| scaleDown: | |
| stabilizationWindowSeconds: 300 | |
| policies: | |
| - type: Percent | |
| value: 50 | |
| periodSeconds: 120 | |