digifreely commited on
Commit
1b95620
Β·
verified Β·
1 Parent(s): a4134f9

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +323 -0
  2. requirements.txt +20 -0
app.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py – Children's Learning Router Service
3
+ Runs on Hugging Face Spaces (CPU-only Docker) with uvicorn + FastAPI (ASGI native).
4
+ Validates serv_code header, uses Qwen2.5-3B-Instruct to decide routing,
5
+ then asynchronously forwards the full payload to the appropriate downstream URL.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import logging
11
+ from contextlib import asynccontextmanager
12
+
13
+ import httpx
14
+ from fastapi import FastAPI, Request
15
+ from fastapi.responses import JSONResponse
16
+ from transformers import AutoTokenizer, AutoModelForCausalLM
17
+ import torch
18
+
19
+ # ──────────────────────────────────────────────
20
+ # Logging
21
+ # ──────────────────────────────────────────────
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format="%(asctime)s %(levelname)s %(message)s",
25
+ )
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # ──────────────────────────────────────────────
29
+ # Environment / Secrets
30
+ # (set in HF Spaces β†’ Settings β†’ Repository secrets)
31
+ # ──────────────────────────────────────────────
32
+ SERV_CODE = os.environ.get("SERV_CODE", "")
33
+ CF_API_TOKEN = os.environ.get("CF_API_TOKEN", "")
34
+ CF_ACCOUNT_ID = os.environ.get("CF_ACCOUNT_ID", "")
35
+
36
+ BLOCK_URL = os.environ.get("BLOCK_URL", "https://blockchakalaka.onrender.com")
37
+ CHITCHAT_URL = os.environ.get("CHITCHAT_URL", "https://chitchatchakalaka.onrender.com")
38
+ QUESTION_URL = os.environ.get("QUESTION_URL", "https://questionchakalaka.onrender.com")
39
+ CURRICULUM_URL = os.environ.get("CURRICULUM_URL", "https://currichakalaka.onrender.com")
40
+
41
+ DECISION_URL_MAP = {
42
+ "Block": BLOCK_URL,
43
+ "Chitchat": CHITCHAT_URL,
44
+ "Question": QUESTION_URL,
45
+ "Curriculum": CURRICULUM_URL,
46
+ }
47
+
48
+ # ──────────────────────────────────────────────
49
+ # Model globals
50
+ # ──────────────────────────────────────────────
51
+ MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
52
+ tokenizer = None
53
+ model = None
54
+
55
+
56
+ def load_model() -> None:
57
+ """Load Qwen2.5-3B-Instruct once at startup."""
58
+ global tokenizer, model
59
+ logger.info("Loading %s on CPU …", MODEL_NAME)
60
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
61
+ model = AutoModelForCausalLM.from_pretrained(
62
+ MODEL_NAME,
63
+ torch_dtype=torch.float32, # CPU-safe
64
+ device_map="cpu",
65
+ low_cpu_mem_usage=True,
66
+ )
67
+ model.eval()
68
+ logger.info("Model ready.")
69
+
70
+
71
+ # ──────────────────────────────────────────────
72
+ # FastAPI lifespan β€” loads model before first request
73
+ # ──────────────────────────────────────────────
74
+ @asynccontextmanager
75
+ async def lifespan(app: FastAPI):
76
+ load_model() # runs at startup, before any request is served
77
+ yield
78
+ # nothing to clean up on shutdown
79
+
80
+
81
+ app = FastAPI(lifespan=lifespan)
82
+
83
+ # ──────────────────────────────────────────────
84
+ # System Prompt
85
+ # ──────────────────────────────────────────────
86
+ SYSTEM_PROMPT = """You are a routing agent for a children's educational app (ages 5-12).
87
+
88
+ Read the student context and output EXACTLY ONE word:
89
+ Block | Curriculum | Question | Chitchat
90
+
91
+ RULES β€” evaluate in this strict order:
92
+
93
+ 1. BLOCK
94
+ - request_message contains abusive, sexual, violent, hateful, or adult content.
95
+ - OR the child has been persistently abusive across multiple turns in chat_history.
96
+ β†’ Output: Block
97
+
98
+ 2. CURRICULUM
99
+ - request_message is clearly related to current_learning goals.
100
+ β†’ Output: Curriculum
101
+
102
+ 3. QUESTION
103
+ - request_message is educational / knowledge-based but NOT related to current_learning.
104
+ (Could be another subject, a past/future lesson, or general knowledge.)
105
+ β†’ Output: Question
106
+
107
+ 4. CHITCHAT
108
+ - Everything else: greetings, jokes, feelings, random comments, playful chat.
109
+ β†’ Output: Chitchat
110
+
111
+ CRITICAL:
112
+ - Output the single decision word ONLY. No punctuation, no explanation.
113
+ - Follow the numbered order strictly.
114
+ - Prefer Curriculum over Question when current_learning is involved.
115
+ - Prefer Question over Chitchat when the message is educational.
116
+ """
117
+
118
+
119
+ def build_user_content(payload: dict) -> str:
120
+ """Serialise the full learning context into a prompt for the model."""
121
+ lp = payload.get("learning_path", {})
122
+ query = payload.get("query", {})
123
+ current_learning = lp.get("assessment_stages", {}).get("current_learning", [])
124
+
125
+ return f"""=== STUDENT CONTEXT ===
126
+ Board: {lp.get('board', 'N/A')}
127
+ Class: {lp.get('class', 'N/A')}
128
+ Subject: {lp.get('subject', 'N/A')}
129
+ Student Name: {lp.get('student_name', 'N/A')}
130
+ Teacher Persona: {lp.get('teacher_persona', 'N/A')}
131
+
132
+ === CURRICULUM OBJECTIVES ===
133
+ {json.dumps(lp.get('curriculum_objectives', []), indent=2)}
134
+
135
+ === CURRENT LEARNING (active topic) ===
136
+ {json.dumps(current_learning, indent=2)}
137
+
138
+ === CHAT HISTORY ===
139
+ {json.dumps(lp.get('chat_history', []), indent=2)}
140
+
141
+ === SCRATCHPAD ===
142
+ {json.dumps(lp.get('scratchpad', []), indent=2)}
143
+
144
+ === STUDENT'S CURRENT REQUEST ===
145
+ "{query.get('request_message', '')}"
146
+
147
+ Output your single decision word:"""
148
+
149
+
150
+ def get_decision(payload: dict) -> str:
151
+ """Run Qwen inference and return one of: Block, Curriculum, Question, Chitchat."""
152
+ messages = [
153
+ {"role": "system", "content": SYSTEM_PROMPT},
154
+ {"role": "user", "content": build_user_content(payload)},
155
+ ]
156
+
157
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
158
+ inputs = tokenizer([text], return_tensors="pt")
159
+
160
+ with torch.no_grad():
161
+ output_ids = model.generate(
162
+ **inputs,
163
+ max_new_tokens=5,
164
+ do_sample=False,
165
+ pad_token_id=tokenizer.eos_token_id,
166
+ )
167
+
168
+ new_tokens = output_ids[0][inputs["input_ids"].shape[1]:]
169
+ raw = tokenizer.decode(new_tokens, skip_special_tokens=True).strip().lower()
170
+ logger.info("Raw model output: %r", raw)
171
+
172
+ if "block" in raw: return "Block"
173
+ if "curriculum" in raw: return "Curriculum"
174
+ if "question" in raw: return "Question"
175
+ return "Chitchat"
176
+
177
+
178
+ # ──────────────────────────────────────────────
179
+ # Cloudflare IP blocking helper
180
+ # ──────────────────────────────────────────────
181
+ async def block_ip_cloudflare(ip: str) -> None:
182
+ if not CF_API_TOKEN or not CF_ACCOUNT_ID:
183
+ logger.warning("Cloudflare secrets not configured – skipping IP block for %s", ip)
184
+ return
185
+
186
+ url = f"https://api.cloudflare.com/client/v4/accounts/{CF_ACCOUNT_ID}/firewall/access_rules/rules"
187
+ cf_headers = {
188
+ "Authorization": f"Bearer {CF_API_TOKEN}",
189
+ "Content-Type": "application/json",
190
+ }
191
+ body = {
192
+ "mode": "block",
193
+ "configuration": {"target": "ip", "value": ip},
194
+ "notes": "Auto-blocked: invalid serv_code",
195
+ }
196
+ try:
197
+ async with httpx.AsyncClient(timeout=10.0) as client:
198
+ resp = await client.post(url, headers=cf_headers, json=body)
199
+ logger.info("Cloudflare block %s β†’ HTTP %s", ip, resp.status_code)
200
+ except Exception as exc:
201
+ logger.error("Cloudflare block failed for %s: %s", ip, exc)
202
+
203
+
204
+ # ──────────────────────────────────────────────
205
+ # Downstream forwarding helper
206
+ # ──────────────────────────────────────────────
207
+ async def forward_request(target_url: str, payload: dict, serv_code: str) -> tuple[dict, int]:
208
+ """POST the full payload to the chosen downstream service."""
209
+ fwd_headers = {
210
+ "Content-Type": "application/json",
211
+ "serv_code": serv_code,
212
+ }
213
+ try:
214
+ async with httpx.AsyncClient(timeout=120.0) as client:
215
+ resp = await client.post(target_url, json=payload, headers=fwd_headers)
216
+ logger.info("Downstream %s β†’ HTTP %s", target_url, resp.status_code)
217
+ try:
218
+ return resp.json(), resp.status_code
219
+ except Exception:
220
+ return {"raw_response": resp.text}, resp.status_code
221
+ except httpx.TimeoutException:
222
+ logger.error("Timeout forwarding to %s", target_url)
223
+ return {"error": f"Downstream timeout: {target_url}"}, 504
224
+ except Exception as exc:
225
+ logger.error("Error forwarding to %s: %s", target_url, exc)
226
+ return {"error": str(exc)}, 502
227
+
228
+
229
+ # ──────────────────────────────────────────────
230
+ # Shared helpers
231
+ # ──────────────────────────────────────────────
232
+ def get_caller_ip(req: Request) -> str:
233
+ forwarded = req.headers.get("x-forwarded-for", "")
234
+ if forwarded:
235
+ return forwarded.split(",")[0].strip()
236
+ return req.client.host if req.client else "unknown"
237
+
238
+
239
+ # ──────────────────────────────────────────────
240
+ # Routes
241
+ # ──────────────────���───────────────────────────
242
+ @app.get("/health")
243
+ async def health():
244
+ """Public liveness probe β€” no auth required."""
245
+ return {"status": "ok"}
246
+
247
+
248
+ @app.get("/ping")
249
+ async def ping(request: Request):
250
+ """
251
+ Authenticated liveness probe.
252
+ Validates serv_code header. Blocks invalid callers in Cloudflare.
253
+ """
254
+ incoming_code = request.headers.get("serv_code", "")
255
+
256
+ if not incoming_code or incoming_code != SERV_CODE:
257
+ caller_ip = get_caller_ip(request)
258
+ logger.warning("Ping rejected – invalid serv_code from IP %s", caller_ip)
259
+ await block_ip_cloudflare(caller_ip)
260
+ return JSONResponse(status_code=401, content={"error": "Unauthorized"})
261
+
262
+ return JSONResponse(content={
263
+ "status": "alive",
264
+ "service": "children-learning-router",
265
+ "model": MODEL_NAME,
266
+ })
267
+
268
+
269
+ @app.post("/chat")
270
+ async def chat(request: Request):
271
+ """
272
+ Main routing endpoint.
273
+ 1. Validate serv_code header.
274
+ 2. Parse and validate JSON body.
275
+ 3. Get routing decision from Qwen.
276
+ 4. Async-forward payload to chosen downstream service.
277
+ 5. Return downstream response to caller.
278
+ """
279
+
280
+ # ── 1. Auth ──────────────────────────────
281
+ incoming_code = request.headers.get("serv_code", "")
282
+ if not incoming_code or incoming_code != SERV_CODE:
283
+ caller_ip = get_caller_ip(request)
284
+ logger.warning("Invalid serv_code from IP %s", caller_ip)
285
+ await block_ip_cloudflare(caller_ip)
286
+ return JSONResponse(status_code=401, content={"error": "Unauthorized"})
287
+
288
+ # ── 2. Parse body ────────────────────────
289
+ try:
290
+ payload = await request.json()
291
+ except Exception:
292
+ return JSONResponse(status_code=400, content={"error": "Request body must be valid JSON"})
293
+
294
+ if "learning_path" not in payload:
295
+ return JSONResponse(status_code=400, content={"error": "Missing required field: learning_path"})
296
+ if "query" not in payload:
297
+ return JSONResponse(status_code=400, content={"error": "Missing required field: query"})
298
+ if "request_message" not in payload.get("query", {}):
299
+ return JSONResponse(status_code=400, content={"error": "Missing required field: query.request_message"})
300
+
301
+ for field in ["board", "class", "subject", "student_name", "teacher_persona"]:
302
+ if field not in payload["learning_path"]:
303
+ return JSONResponse(status_code=400, content={"error": f"Missing required field: learning_path.{field}"})
304
+
305
+ # ── 3. Decision ──────────────────────────
306
+ try:
307
+ decision = get_decision(payload)
308
+ except Exception as exc:
309
+ logger.exception("Model inference error: %s", exc)
310
+ return JSONResponse(status_code=500, content={"error": "Model inference failed"})
311
+
312
+ logger.info("Routing decision: %s", decision)
313
+ target_url = DECISION_URL_MAP.get(decision, CHITCHAT_URL)
314
+
315
+ # ── 4. Forward ───────────────────────────
316
+ response_body, status_code = await forward_request(target_url, payload, incoming_code)
317
+
318
+ # ── 5. Return ────────────────────────────
319
+ return JSONResponse(status_code=status_code, content={
320
+ "decision": decision,
321
+ "forwarded": target_url,
322
+ "response": response_body,
323
+ })
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ──────────────────────────────────────────────────────────
2
+ # requirements.txt – Children's Learning Router Service
3
+ # CPU-only Β· Hugging Face Spaces (Docker SDK)
4
+ # ──────────────────────────────────────────────────────────
5
+
6
+ # Web framework β€” ASGI native, compatible with uvicorn
7
+ fastapi==0.115.0
8
+
9
+ # ASGI server
10
+ uvicorn[standard]==0.30.6
11
+
12
+ # Async HTTP client for forwarding requests to downstream services
13
+ httpx==0.27.2
14
+
15
+ # Qwen2.5-3B-Instruct inference
16
+ torch==2.3.1
17
+ transformers==4.46.3
18
+ accelerate==1.1.1
19
+ sentencepiece==0.2.0
20
+ protobuf==5.28.3