File size: 2,810 Bytes
838f982 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | replicaCount:
api: 2
worker: 2
image:
repository: ml-intern
pullPolicy: IfNotPresent
tag: "latest"
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
serviceAccount:
create: true
annotations: {}
name: ""
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
prometheus.io/path: "/metrics"
podSecurityContext:
fsGroup: 1000
securityContext:
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
service:
type: ClusterIP
port: 8000
ingress:
enabled: false
className: "nginx"
annotations:
nginx.ingress.kubernetes.io/rate-limit: "100"
nginx.ingress.kubernetes.io/rate-limit-window: "1m"
hosts:
- host: ml-intern.local
paths:
- path: /
pathType: Prefix
tls: []
resources:
api:
requests:
cpu: 250m
memory: 512Mi
limits:
cpu: 2000m
memory: 2Gi
worker:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 1000m
memory: 1Gi
autoscaling:
enabled: true
api:
minReplicas: 2
maxReplicas: 20
targetCPUUtilizationPercentage: 70
targetMemoryUtilizationPercentage: 80
scaleUpStabilizationWindowSeconds: 60
scaleDownStabilizationWindowSeconds: 300
nodeSelector: {}
tolerations: []
affinity:
api:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- ml-intern
topologyKey: kubernetes.io/hostname
config:
port: 8000
workers: 4
maxConcurrentRequests: 200
defaultRpmLimit: 40
requestTimeout: 120
cacheTtlSeconds: 300
budgetUsdPerSession: 10.0
circuitBreaker:
failureThreshold: 5
recoveryTimeout: 60
logLevel: INFO
secrets:
hfToken: ""
anthropicApiKey: ""
openaiApiKey: ""
groqApiKey: ""
nvidiaApiKey: ""
redis:
enabled: true
architecture: standalone
auth:
enabled: false
master:
persistence:
enabled: true
size: 5Gi
resources:
requests:
memory: 256Mi
cpu: 100m
limits:
memory: 1Gi
cpu: 500m
postgresql:
enabled: true
auth:
username: ml_intern
password: ml_intern
database: ml_intern
primary:
persistence:
enabled: true
size: 10Gi
resources:
requests:
memory: 256Mi
cpu: 100m
limits:
memory: 1Gi
cpu: 500m
monitoring:
enabled: true
prometheus:
enabled: true
retention: "30d"
grafana:
enabled: true
adminPassword: "admin"
jaeger:
enabled: true
|