Builder-Neekhil commited on
Commit
761a114
·
verified ·
1 Parent(s): cf22de4

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +12 -0
  2. india_all_districts_risk.csv +0 -0
  3. main.py +136 -0
  4. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
india_all_districts_risk.csv ADDED
The diff for this file is too large to render. See raw diff
 
main.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Query
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import pandas as pd
4
+ import numpy as np
5
+ import os
6
+
7
+ app = FastAPI(
8
+ title="Districtmaps.ai API",
9
+ description="District-level health risk intelligence across 708 Indian districts. Powered by NFHS-5 data and validated ML models.",
10
+ version="1.0.0"
11
+ )
12
+
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+ # Load data at startup
21
+ DATA_PATH = os.getenv("DATA_PATH", "india_all_districts_risk.csv")
22
+ df = None
23
+
24
+ @app.on_event("startup")
25
+ def load_data():
26
+ global df
27
+ df = pd.read_csv(DATA_PATH)
28
+ df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]
29
+ df["district_lower"] = df["district"].str.lower().str.strip()
30
+ df["state_lower"] = df["state"].str.lower().str.strip()
31
+ print(f"Loaded {len(df)} districts.")
32
+
33
+ def format_district(row):
34
+ return {
35
+ "district": row.get("district", ""),
36
+ "state": row.get("state", ""),
37
+ "risk_scores": {
38
+ "diabetes": safe_float(row.get("diabetes_risk")),
39
+ "blood_pressure": safe_float(row.get("blood_pressure_risk")),
40
+ "obesity": safe_float(row.get("obesity_risk")),
41
+ "anaemia": safe_float(row.get("anaemia_risk")),
42
+ },
43
+ "composite_risk": safe_float(row.get("composite_risk")),
44
+ "risk_percentile": safe_float(row.get("diabetes_risk_norm")),
45
+ }
46
+
47
+ def safe_float(val):
48
+ try:
49
+ f = float(val)
50
+ return round(f, 4) if not np.isnan(f) else None
51
+ except:
52
+ return None
53
+
54
+ @app.get("/", tags=["Info"])
55
+ def root():
56
+ return {
57
+ "product": "Districtmaps.ai",
58
+ "description": "District-level NCD risk intelligence for India",
59
+ "districts": len(df) if df is not None else 0,
60
+ "conditions": ["diabetes", "blood_pressure", "obesity", "anaemia"],
61
+ "validation": {
62
+ "cross_sectional_r2": 0.7477,
63
+ "temporal_r2": 0.6279,
64
+ "temporal_gap": "4 years (NFHS-4 2015-16 → NFHS-5 2019-21)",
65
+ "districts_covered": 708
66
+ },
67
+ "endpoints": {
68
+ "GET /risk": "Risk scores for a specific district",
69
+ "GET /districts": "Full ranked list of all districts",
70
+ "GET /top": "Top N highest risk districts",
71
+ "GET /state/{state}": "All districts within a state",
72
+ }
73
+ }
74
+
75
+ @app.get("/risk", tags=["Risk Scores"])
76
+ def get_district_risk(
77
+ district: str = Query(..., description="District name e.g. Mumbai"),
78
+ state: str = Query(None, description="Optional state filter to disambiguate")
79
+ ):
80
+ mask = df["district_lower"] == district.lower().strip()
81
+ if state:
82
+ mask &= df["state_lower"] == state.lower().strip()
83
+ results = df[mask]
84
+ if results.empty:
85
+ # Fuzzy fallback — partial match
86
+ mask2 = df["district_lower"].str.contains(district.lower().strip(), na=False)
87
+ if state:
88
+ mask2 &= df["state_lower"].str.contains(state.lower().strip(), na=False)
89
+ results = df[mask2]
90
+ if results.empty:
91
+ raise HTTPException(status_code=404, detail=f"District '{district}' not found.")
92
+ return {
93
+ "query": district,
94
+ "matches": [format_district(row) for _, row in results.iterrows()]
95
+ }
96
+
97
+ @app.get("/districts", tags=["Rankings"])
98
+ def get_all_districts(
99
+ sort_by: str = Query("composite_risk", description="Field to sort by"),
100
+ order: str = Query("desc", description="asc or desc"),
101
+ limit: int = Query(708, ge=1, le=708)
102
+ ):
103
+ ascending = order == "asc"
104
+ col = sort_by if sort_by in df.columns else "composite_risk"
105
+ sorted_df = df.sort_values(col, ascending=ascending).head(limit)
106
+ return {
107
+ "total": len(sorted_df),
108
+ "sorted_by": col,
109
+ "order": order,
110
+ "districts": [format_district(row) for _, row in sorted_df.iterrows()]
111
+ }
112
+
113
+ @app.get("/top", tags=["Rankings"])
114
+ def get_top_districts(
115
+ n: int = Query(10, ge=1, le=100, description="Number of districts"),
116
+ condition: str = Query("composite_risk", description="diabetes_risk | blood_pressure_risk | obesity_risk | anaemia_risk | composite_risk")
117
+ ):
118
+ col = condition if condition in df.columns else "composite_risk"
119
+ top = df.nlargest(n, col)
120
+ return {
121
+ "condition": col,
122
+ "top_n": n,
123
+ "districts": [format_district(row) for _, row in top.iterrows()]
124
+ }
125
+
126
+ @app.get("/state/{state}", tags=["State"])
127
+ def get_state_districts(state: str):
128
+ mask = df["state_lower"].str.contains(state.lower().strip(), na=False)
129
+ results = df[mask].sort_values("composite_risk", ascending=False)
130
+ if results.empty:
131
+ raise HTTPException(status_code=404, detail=f"State '{state}' not found.")
132
+ return {
133
+ "state": state,
134
+ "districts": len(results),
135
+ "ranked": [format_district(row) for _, row in results.iterrows()]
136
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn==0.29.0
3
+ pandas==2.2.2
4
+ numpy==1.26.4
5
+ python-multipart==0.0.9