Spaces:

digifreely
/

brain

Sleeping

App Files Files Community

digifreely commited on 12 days ago

Commit

1b95620

verified ·

1 Parent(s): a4134f9

Upload 2 files

Browse files

Files changed (2) hide show

app.py +323 -0
requirements.txt +20 -0

app.py ADDED Viewed

	@@ -0,0 +1,323 @@

+"""
+app.py – Children's Learning Router Service
+Runs on Hugging Face Spaces (CPU-only Docker) with uvicorn + FastAPI (ASGI native).
+Validates serv_code header, uses Qwen2.5-3B-Instruct to decide routing,
+then asynchronously forwards the full payload to the appropriate downstream URL.
+"""
+import os
+import json
+import logging
+from contextlib import asynccontextmanager
+import httpx
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# ──────────────────────────────────────────────
+# Logging
+# ──────────────────────────────────────────────
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s  %(levelname)s  %(message)s",
+)
+logger = logging.getLogger(__name__)
+# ──────────────────────────────────────────────
+# Environment / Secrets
+# (set in HF Spaces → Settings → Repository secrets)
+# ──────────────────────────────────────────────
+SERV_CODE      = os.environ.get("SERV_CODE", "")
+CF_API_TOKEN   = os.environ.get("CF_API_TOKEN", "")
+CF_ACCOUNT_ID  = os.environ.get("CF_ACCOUNT_ID", "")
+BLOCK_URL      = os.environ.get("BLOCK_URL",      "https://blockchakalaka.onrender.com")
+CHITCHAT_URL   = os.environ.get("CHITCHAT_URL",   "https://chitchatchakalaka.onrender.com")
+QUESTION_URL   = os.environ.get("QUESTION_URL",   "https://questionchakalaka.onrender.com")
+CURRICULUM_URL = os.environ.get("CURRICULUM_URL", "https://currichakalaka.onrender.com")
+DECISION_URL_MAP = {
+    "Block":      BLOCK_URL,
+    "Chitchat":   CHITCHAT_URL,
+    "Question":   QUESTION_URL,
+    "Curriculum": CURRICULUM_URL,
+}
+# ──────────────────────────────────────────────
+# Model globals
+# ──────────────────────────────────────────────
+MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
+tokenizer  = None
+model      = None
+def load_model() -> None:
+    """Load Qwen2.5-3B-Instruct once at startup."""
+    global tokenizer, model
+    logger.info("Loading %s on CPU …", MODEL_NAME)
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        torch_dtype=torch.float32,   # CPU-safe
+        device_map="cpu",
+        low_cpu_mem_usage=True,
+    )
+    model.eval()
+    logger.info("Model ready.")
+# ──────────────────────────────────────────────
+# FastAPI lifespan — loads model before first request
+# ──────────────────────────────────────────────
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    load_model()   # runs at startup, before any request is served
+    yield
+    # nothing to clean up on shutdown
+app = FastAPI(lifespan=lifespan)
+# ──────────────────────────────────────────────
+# System Prompt
+# ──────────────────────────────────────────────
+SYSTEM_PROMPT = """You are a routing agent for a children's educational app (ages 5-12).
+Read the student context and output EXACTLY ONE word:
+    Block | Curriculum | Question | Chitchat
+RULES — evaluate in this strict order:
+1. BLOCK
+   - request_message contains abusive, sexual, violent, hateful, or adult content.
+   - OR the child has been persistently abusive across multiple turns in chat_history.
+   → Output: Block
+2. CURRICULUM
+   - request_message is clearly related to current_learning goals.
+   → Output: Curriculum
+3. QUESTION
+   - request_message is educational / knowledge-based but NOT related to current_learning.
+   (Could be another subject, a past/future lesson, or general knowledge.)
+   → Output: Question
+4. CHITCHAT
+   - Everything else: greetings, jokes, feelings, random comments, playful chat.
+   → Output: Chitchat
+CRITICAL:
+- Output the single decision word ONLY. No punctuation, no explanation.
+- Follow the numbered order strictly.
+- Prefer Curriculum over Question when current_learning is involved.
+- Prefer Question over Chitchat when the message is educational.
+"""
+def build_user_content(payload: dict) -> str:
+    """Serialise the full learning context into a prompt for the model."""
+    lp               = payload.get("learning_path", {})
+    query            = payload.get("query", {})
+    current_learning = lp.get("assessment_stages", {}).get("current_learning", [])
+    return f"""=== STUDENT CONTEXT ===
+Board: {lp.get('board', 'N/A')}
+Class: {lp.get('class', 'N/A')}
+Subject: {lp.get('subject', 'N/A')}
+Student Name: {lp.get('student_name', 'N/A')}
+Teacher Persona: {lp.get('teacher_persona', 'N/A')}
+=== CURRICULUM OBJECTIVES ===
+{json.dumps(lp.get('curriculum_objectives', []), indent=2)}
+=== CURRENT LEARNING (active topic) ===
+{json.dumps(current_learning, indent=2)}
+=== CHAT HISTORY ===
+{json.dumps(lp.get('chat_history', []), indent=2)}
+=== SCRATCHPAD ===
+{json.dumps(lp.get('scratchpad', []), indent=2)}
+=== STUDENT'S CURRENT REQUEST ===
+"{query.get('request_message', '')}"
+Output your single decision word:"""
+def get_decision(payload: dict) -> str:
+    """Run Qwen inference and return one of: Block, Curriculum, Question, Chitchat."""
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user",   "content": build_user_content(payload)},
+    ]
+    text   = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer([text], return_tensors="pt")
+    with torch.no_grad():
+        output_ids = model.generate(
+            **inputs,
+            max_new_tokens=5,
+            do_sample=False,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    new_tokens = output_ids[0][inputs["input_ids"].shape[1]:]
+    raw        = tokenizer.decode(new_tokens, skip_special_tokens=True).strip().lower()
+    logger.info("Raw model output: %r", raw)
+    if "block"      in raw: return "Block"
+    if "curriculum" in raw: return "Curriculum"
+    if "question"   in raw: return "Question"
+    return "Chitchat"
+# ──────────────────────────────────────────────
+# Cloudflare IP blocking helper
+# ──────────────────────────────────────────────
+async def block_ip_cloudflare(ip: str) -> None:
+    if not CF_API_TOKEN or not CF_ACCOUNT_ID:
+        logger.warning("Cloudflare secrets not configured – skipping IP block for %s", ip)
+        return
+    url = f"https://api.cloudflare.com/client/v4/accounts/{CF_ACCOUNT_ID}/firewall/access_rules/rules"
+    cf_headers = {
+        "Authorization": f"Bearer {CF_API_TOKEN}",
+        "Content-Type":  "application/json",
+    }
+    body = {
+        "mode": "block",
+        "configuration": {"target": "ip", "value": ip},
+        "notes": "Auto-blocked: invalid serv_code",
+    }
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(url, headers=cf_headers, json=body)
+            logger.info("Cloudflare block %s → HTTP %s", ip, resp.status_code)
+    except Exception as exc:
+        logger.error("Cloudflare block failed for %s: %s", ip, exc)
+# ──────────────────────────────────────────────
+# Downstream forwarding helper
+# ──────────────────────────────────────────────
+async def forward_request(target_url: str, payload: dict, serv_code: str) -> tuple[dict, int]:
+    """POST the full payload to the chosen downstream service."""
+    fwd_headers = {
+        "Content-Type": "application/json",
+        "serv_code":    serv_code,
+    }
+    try:
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            resp = await client.post(target_url, json=payload, headers=fwd_headers)
+            logger.info("Downstream %s → HTTP %s", target_url, resp.status_code)
+            try:
+                return resp.json(), resp.status_code
+            except Exception:
+                return {"raw_response": resp.text}, resp.status_code
+    except httpx.TimeoutException:
+        logger.error("Timeout forwarding to %s", target_url)
+        return {"error": f"Downstream timeout: {target_url}"}, 504
+    except Exception as exc:
+        logger.error("Error forwarding to %s: %s", target_url, exc)
+        return {"error": str(exc)}, 502
+# ──────────────────────────────────────────────
+# Shared helpers
+# ──────────────────────────────────────────────
+def get_caller_ip(req: Request) -> str:
+    forwarded = req.headers.get("x-forwarded-for", "")
+    if forwarded:
+        return forwarded.split(",")[0].strip()
+    return req.client.host if req.client else "unknown"
+# ──────────────────────────────────────────────
+# Routes
+# ──────────────────���───────────────────────────
+@app.get("/health")
+async def health():
+    """Public liveness probe — no auth required."""
+    return {"status": "ok"}
+@app.get("/ping")
+async def ping(request: Request):
+    """
+    Authenticated liveness probe.
+    Validates serv_code header. Blocks invalid callers in Cloudflare.
+    """
+    incoming_code = request.headers.get("serv_code", "")
+    if not incoming_code or incoming_code != SERV_CODE:
+        caller_ip = get_caller_ip(request)
+        logger.warning("Ping rejected – invalid serv_code from IP %s", caller_ip)
+        await block_ip_cloudflare(caller_ip)
+        return JSONResponse(status_code=401, content={"error": "Unauthorized"})
+    return JSONResponse(content={
+        "status":  "alive",
+        "service": "children-learning-router",
+        "model":   MODEL_NAME,
+    })
+@app.post("/chat")
+async def chat(request: Request):
+    """
+    Main routing endpoint.
+    1. Validate serv_code header.
+    2. Parse and validate JSON body.
+    3. Get routing decision from Qwen.
+    4. Async-forward payload to chosen downstream service.
+    5. Return downstream response to caller.
+    """
+    # ── 1. Auth ──────────────────────────────
+    incoming_code = request.headers.get("serv_code", "")
+    if not incoming_code or incoming_code != SERV_CODE:
+        caller_ip = get_caller_ip(request)
+        logger.warning("Invalid serv_code from IP %s", caller_ip)
+        await block_ip_cloudflare(caller_ip)
+        return JSONResponse(status_code=401, content={"error": "Unauthorized"})
+    # ── 2. Parse body ────────────────────────
+    try:
+        payload = await request.json()
+    except Exception:
+        return JSONResponse(status_code=400, content={"error": "Request body must be valid JSON"})
+    if "learning_path" not in payload:
+        return JSONResponse(status_code=400, content={"error": "Missing required field: learning_path"})
+    if "query" not in payload:
+        return JSONResponse(status_code=400, content={"error": "Missing required field: query"})
+    if "request_message" not in payload.get("query", {}):
+        return JSONResponse(status_code=400, content={"error": "Missing required field: query.request_message"})
+    for field in ["board", "class", "subject", "student_name", "teacher_persona"]:
+        if field not in payload["learning_path"]:
+            return JSONResponse(status_code=400, content={"error": f"Missing required field: learning_path.{field}"})
+    # ── 3. Decision ──────────────────────────
+    try:
+        decision = get_decision(payload)
+    except Exception as exc:
+        logger.exception("Model inference error: %s", exc)
+        return JSONResponse(status_code=500, content={"error": "Model inference failed"})
+    logger.info("Routing decision: %s", decision)
+    target_url = DECISION_URL_MAP.get(decision, CHITCHAT_URL)
+    # ── 4. Forward ───────────────────────────
+    response_body, status_code = await forward_request(target_url, payload, incoming_code)
+    # ── 5. Return ────────────────────────────
+    return JSONResponse(status_code=status_code, content={
+        "decision":  decision,
+        "forwarded": target_url,
+        "response":  response_body,
+    })

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+# ──────────────────────────────────────────────────────────
+# requirements.txt  –  Children's Learning Router Service
+# CPU-only · Hugging Face Spaces (Docker SDK)
+# ──────────────────────────────────────────────────────────
+# Web framework — ASGI native, compatible with uvicorn
+fastapi==0.115.0
+# ASGI server
+uvicorn[standard]==0.30.6
+# Async HTTP client for forwarding requests to downstream services
+httpx==0.27.2
+# Qwen2.5-3B-Instruct inference
+torch==2.3.1
+transformers==4.46.3
+accelerate==1.1.1
+sentencepiece==0.2.0
+protobuf==5.28.3