raazkumar commited on
Commit
a7404b0
·
verified ·
1 Parent(s): 618e907

Upload production/grafana/dashboards/ml_intern.json

Browse files
production/grafana/dashboards/ml_intern.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dashboard": {
3
+ "id": null,
4
+ "title": "ml-intern Production Dashboard",
5
+ "tags": ["ml-intern", "production"],
6
+ "timezone": "UTC",
7
+ "panels": [
8
+ {
9
+ "id": 1,
10
+ "title": "Request Rate (RPM)",
11
+ "type": "graph",
12
+ "targets": [
13
+ {
14
+ "expr": "sum(rate(ml_intern_requests_total[1m])) by (provider)",
15
+ "legendFormat": "{{provider}}"
16
+ }
17
+ ],
18
+ "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}
19
+ },
20
+ {
21
+ "id": 2,
22
+ "title": "Request Latency (P95)",
23
+ "type": "graph",
24
+ "targets": [
25
+ {
26
+ "expr": "histogram_quantile(0.95, rate(ml_intern_request_duration_seconds_bucket[5m]))",
27
+ "legendFormat": "P95"
28
+ }
29
+ ],
30
+ "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}
31
+ },
32
+ {
33
+ "id": 3,
34
+ "title": "Active Sessions",
35
+ "type": "singlestat",
36
+ "targets": [
37
+ {
38
+ "expr": "ml_intern_active_sessions"
39
+ }
40
+ ],
41
+ "gridPos": {"h": 4, "w": 6, "x": 0, "y": 8}
42
+ },
43
+ {
44
+ "id": 4,
45
+ "title": "Cache Hit Rate",
46
+ "type": "singlestat",
47
+ "targets": [
48
+ {
49
+ "expr": "sum(rate(ml_intern_cache_hits_total[5m])) / (sum(rate(ml_intern_cache_hits_total[5m])) + sum(rate(ml_intern_cache_misses_total[5m])))",
50
+ "format": "percentunit"
51
+ }
52
+ ],
53
+ "gridPos": {"h": 4, "w": 6, "x": 6, "y": 8}
54
+ },
55
+ {
56
+ "id": 5,
57
+ "title": "LLM Cost (USD/hour)",
58
+ "type": "graph",
59
+ "targets": [
60
+ {
61
+ "expr": "sum(rate(ml_intern_llm_cost_usd_total[1h])) by (provider, model)",
62
+ "legendFormat": "{{provider}}/{{model}}"
63
+ }
64
+ ],
65
+ "gridPos": {"h": 8, "w": 12, "x": 0, "y": 12}
66
+ },
67
+ {
68
+ "id": 6,
69
+ "title": "Circuit Breaker States",
70
+ "type": "graph",
71
+ "targets": [
72
+ {
73
+ "expr": "ml_intern_circuit_breaker_state",
74
+ "legendFormat": "{{provider}}"
75
+ }
76
+ ],
77
+ "gridPos": {"h": 8, "w": 12, "x": 12, "y": 12}
78
+ },
79
+ {
80
+ "id": 7,
81
+ "title": "Error Rate",
82
+ "type": "graph",
83
+ "targets": [
84
+ {
85
+ "expr": "sum(rate(ml_intern_requests_total{status!~\"2..\"}[5m])) / sum(rate(ml_intern_requests_total[5m]))",
86
+ "legendFormat": "Error Rate"
87
+ }
88
+ ],
89
+ "gridPos": {"h": 8, "w": 12, "x": 0, "y": 20}
90
+ }
91
+ ],
92
+ "time": {"from": "now-1h", "to": "now"},
93
+ "refresh": "10s"
94
+ }
95
+ }