prasai-ap commited on
Commit
910c9bd
·
verified ·
1 Parent(s): e173db9

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +13 -7
  2. app.py +308 -19
  3. requirements.txt +3 -0
README.md CHANGED
@@ -13,15 +13,16 @@ pinned: false
13
 
14
  Pathshala AI is a bilingual AI tutor demo for rural primary students in Nepal.
15
 
16
- The Gradio Space mirrors the local Streamlit/web app flow. It can accept a student
17
- question in English, Nepali, or romanized Nepali plus optional textbook context, then returns:
 
18
 
19
  - English explanation
20
  - Nepali explanation
21
  - 3 simple quiz questions
22
  - Retrieved textbook sources
23
- - Quiz grading when a backend is configured
24
- - Parent/teacher summary when a backend is configured
25
 
26
  ## Deploy To Hugging Face Spaces
27
 
@@ -49,8 +50,13 @@ git push
49
  ## Recommended Submission Mode
50
 
51
  For the easiest hackathon submission, deploy the Space without `BACKEND_URL`.
52
- It will use the built-in demo fallback, so judges can try it immediately by pasting
53
- textbook context into the question tab.
 
 
 
 
 
54
 
55
  For the full RAG workflow, first deploy the FastAPI backend somewhere public, then set `BACKEND_URL` in the Space settings.
56
 
@@ -81,7 +87,7 @@ If the backend returns `normalized_question`, the Space shows the interpreted qu
81
 
82
  ## Mock Mode
83
 
84
- If `BACKEND_URL` is missing or the backend is unavailable, the Space uses a simple demo fallback so the demo remains easy to try. PDF upload, quiz grading, and parent summaries require the backend.
85
 
86
  Example question:
87
 
 
13
 
14
  Pathshala AI is a bilingual AI tutor demo for rural primary students in Nepal.
15
 
16
+ The Gradio Space mirrors the local Streamlit/web app flow. It can upload a text-based
17
+ PDF directly inside Hugging Face Spaces, accept a student question in English, Nepali,
18
+ or romanized Nepali, retrieve relevant textbook portions, then returns:
19
 
20
  - English explanation
21
  - Nepali explanation
22
  - 3 simple quiz questions
23
  - Retrieved textbook sources
24
+ - Basic quiz grading in Space-local mode
25
+ - Parent/teacher summary note in Space-local mode
26
 
27
  ## Deploy To Hugging Face Spaces
28
 
 
50
  ## Recommended Submission Mode
51
 
52
  For the easiest hackathon submission, deploy the Space without `BACKEND_URL`.
53
+ It will run a Space-local workflow:
54
+
55
+ 1. Upload a text-based PDF.
56
+ 2. Extract text with PyMuPDF.
57
+ 3. Create embeddings with `sentence-transformers`.
58
+ 4. Search the uploaded book in memory.
59
+ 5. Show Nepali quiz questions and retrieved textbook portions.
60
 
61
  For the full RAG workflow, first deploy the FastAPI backend somewhere public, then set `BACKEND_URL` in the Space settings.
62
 
 
87
 
88
  ## Mock Mode
89
 
90
+ If `BACKEND_URL` is missing or the backend is unavailable, the Space uses local PDF extraction and in-memory retrieval. This supports text-based PDFs. For scanned PDFs or persistent student progress, deploy the backend and set `BACKEND_URL`.
91
 
92
  Example question:
93
 
app.py CHANGED
@@ -1,8 +1,10 @@
1
  import os
2
  from typing import Any
 
3
 
4
  from dotenv import load_dotenv
5
  import gradio as gr
 
6
  import requests
7
 
8
 
@@ -18,14 +20,20 @@ EXAMPLE_CONTEXT = (
18
  "Soil erosion is the removal of topsoil by wind, water, or other natural forces. "
19
  "It can make farmland less fertile and can be reduced by planting trees and grass."
20
  )
 
 
 
 
 
 
21
 
22
 
23
- def upload_textbook(pdf_path: str | None) -> str:
24
  if not pdf_path:
25
- return "Choose a PDF first."
26
 
27
  if not BACKEND_URL:
28
- return "Backend URL is not configured for this Space. Paste context below to use demo mode."
29
 
30
  try:
31
  with open(pdf_path, "rb") as pdf_file:
@@ -41,22 +49,55 @@ def upload_textbook(pdf_path: str | None) -> str:
41
  method_text = f" Text extraction: {extraction_method}." if extraction_method else ""
42
  return (
43
  f"Uploaded {result['filename']} with {result['page_count']} pages "
44
- f"and {result['chunk_count']} chunks.{method_text}"
 
 
45
  )
46
 
47
- return _response_error(response, "Upload failed.")
48
  except requests.Timeout:
49
- return "Backend is still processing the PDF. Try a smaller PDF for the demo."
50
  except requests.RequestException as exc:
51
- return f"Could not reach backend: {exc}"
52
  except OSError as exc:
53
- return f"Could not read uploaded PDF: {exc}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
 
56
  def ask_tutor(
57
  question: str,
58
  student_id: str,
59
  textbook_context: str,
 
60
  ) -> tuple[str, str, str, str, str, dict[str, Any]]:
61
  question = question.strip()
62
  student_id = (student_id or "hf-space-demo").strip()
@@ -78,7 +119,12 @@ def ask_tutor(
78
  if backend_result and not is_insufficient_backend_result(backend_result):
79
  return backend_result
80
 
81
- return mock_response(question=question, textbook_context=textbook_context)
 
 
 
 
 
82
 
83
 
84
  def ask_backend(
@@ -145,12 +191,12 @@ def grade_quiz(
145
  student_id: str,
146
  quiz_state: dict[str, Any] | None,
147
  ) -> str:
148
- if not BACKEND_URL:
149
- return "Quiz grading needs the backend. Demo mode can show questions but cannot grade them."
150
-
151
  quiz_state = quiz_state or {}
152
  quiz_id = quiz_state.get("quiz_id")
153
 
 
 
 
154
  if not quiz_id:
155
  return "Ask the tutor first so a quiz can be created."
156
 
@@ -177,9 +223,59 @@ def grade_quiz(
177
  return "Quiz grading returned an invalid response."
178
 
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  def parent_summary(student_id: str) -> str:
181
  if not BACKEND_URL:
182
- return "Parent/teacher summary needs the backend."
 
 
 
 
183
 
184
  student_id = (student_id or "hf-space-demo").strip()
185
 
@@ -225,6 +321,103 @@ def is_insufficient_backend_result(result: tuple[str, str, str, str, str, dict[s
225
  return any(marker in combined for marker in markers)
226
 
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  def mock_response(question: str, textbook_context: str) -> tuple[str, str, str, str, str, dict[str, Any]]:
229
  context = textbook_context or EXAMPLE_CONTEXT
230
  normalized_question = normalize_question_mock(question)
@@ -252,6 +445,54 @@ def mock_response(question: str, textbook_context: str) -> tuple[str, str, str,
252
  )
253
 
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  def mock_english_explanation(normalized_question: str, context: str) -> str:
256
  text = f"{normalized_question} {context}".lower()
257
 
@@ -330,6 +571,53 @@ def mock_nepali_explanation(normalized_question: str, context: str = "") -> str:
330
  return "यो विषयलाई सरल रूपमा बुझ्न पाठ्यपुस्तकको सन्दर्भ पढेर मुख्य कुरा सम्झनुहोस्।"
331
 
332
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  def normalize_question_mock(question: str) -> str:
334
  text = question.lower()
335
 
@@ -491,12 +779,13 @@ with gr.Blocks(title=APP_NAME, theme=gr.themes.Soft()) as demo:
491
  gr.Markdown(
492
  """
493
  # Pathshala AI
494
- Bilingual AI tutor for rural primary students in Nepal. Upload a PDF when a
495
- public backend is configured, or paste textbook context for the Space demo.
496
  """
497
  )
498
 
499
  quiz_state = gr.State({})
 
500
 
501
  with gr.Row():
502
  student_id_input = gr.Textbox(
@@ -508,7 +797,7 @@ with gr.Blocks(title=APP_NAME, theme=gr.themes.Soft()) as demo:
508
  label="Status",
509
  value=(
510
  "Backend connected." if BACKEND_URL else
511
- "Demo fallback active. Set BACKEND_URL in Space settings for full RAG."
512
  ),
513
  interactive=False,
514
  scale=2,
@@ -581,18 +870,18 @@ with gr.Blocks(title=APP_NAME, theme=gr.themes.Soft()) as demo:
581
  status_output,
582
  quiz_state,
583
  ],
584
- fn=lambda question, context: ask_tutor(question, "hf-space-demo", context),
585
  cache_examples=False,
586
  )
587
 
588
  upload_button.click(
589
  fn=upload_textbook,
590
  inputs=[pdf_input],
591
- outputs=[upload_output],
592
  )
593
  ask_button.click(
594
  fn=ask_tutor,
595
- inputs=[question_input, student_id_input, context_input],
596
  outputs=[
597
  english_output,
598
  nepali_output,
 
1
  import os
2
  from typing import Any
3
+ from functools import lru_cache
4
 
5
  from dotenv import load_dotenv
6
  import gradio as gr
7
+ import numpy as np
8
  import requests
9
 
10
 
 
20
  "Soil erosion is the removal of topsoil by wind, water, or other natural forces. "
21
  "It can make farmland less fertile and can be reduced by planting trees and grass."
22
  )
23
+ MIN_CHUNK_CHARS = 250
24
+ MAX_CHUNK_CHARS = 900
25
+ EMBEDDING_MODEL = os.getenv(
26
+ "EMBEDDING_MODEL",
27
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
28
+ )
29
 
30
 
31
+ def upload_textbook(pdf_path: str | None) -> tuple[str, dict[str, Any], Any]:
32
  if not pdf_path:
33
+ return "Choose a PDF first.", {}, gr.update()
34
 
35
  if not BACKEND_URL:
36
+ return upload_textbook_locally(pdf_path)
37
 
38
  try:
39
  with open(pdf_path, "rb") as pdf_file:
 
49
  method_text = f" Text extraction: {extraction_method}." if extraction_method else ""
50
  return (
51
  f"Uploaded {result['filename']} with {result['page_count']} pages "
52
+ f"and {result['chunk_count']} chunks.{method_text}",
53
+ {},
54
+ gr.update(value=""),
55
  )
56
 
57
+ return _response_error(response, "Upload failed."), {}, gr.update()
58
  except requests.Timeout:
59
+ return "Backend is still processing the PDF. Try a smaller PDF for the demo.", {}, gr.update()
60
  except requests.RequestException as exc:
61
+ return f"Could not reach backend: {exc}", {}, gr.update()
62
  except OSError as exc:
63
+ return f"Could not read uploaded PDF: {exc}", {}, gr.update()
64
+
65
+
66
+ def upload_textbook_locally(pdf_path: str) -> tuple[str, dict[str, Any], Any]:
67
+ try:
68
+ extracted = extract_pdf_text(pdf_path)
69
+ chunks = chunk_text(extracted["text"])
70
+
71
+ if not chunks:
72
+ return "No readable text chunks could be created from this PDF.", {}, gr.update()
73
+
74
+ embeddings = embed_texts(chunks)
75
+ state = {
76
+ "filename": os.path.basename(pdf_path),
77
+ "page_count": extracted["page_count"],
78
+ "chunk_count": len(chunks),
79
+ "extraction_method": extracted["extraction_method"],
80
+ "chunks": chunks,
81
+ "embeddings": embeddings.tolist(),
82
+ }
83
+ return (
84
+ (
85
+ f"Uploaded {state['filename']} inside this Space with "
86
+ f"{state['page_count']} pages and {state['chunk_count']} chunks. "
87
+ f"Text extraction: {state['extraction_method']}."
88
+ ),
89
+ state,
90
+ gr.update(value=""),
91
+ )
92
+ except Exception as exc:
93
+ return f"Could not process uploaded PDF in this Space: {exc}", {}, gr.update()
94
 
95
 
96
  def ask_tutor(
97
  question: str,
98
  student_id: str,
99
  textbook_context: str,
100
+ textbook_state: dict[str, Any] | None,
101
  ) -> tuple[str, str, str, str, str, dict[str, Any]]:
102
  question = question.strip()
103
  student_id = (student_id or "hf-space-demo").strip()
 
119
  if backend_result and not is_insufficient_backend_result(backend_result):
120
  return backend_result
121
 
122
+ return local_response(
123
+ question=question,
124
+ student_id=student_id,
125
+ textbook_context=textbook_context,
126
+ textbook_state=textbook_state or {},
127
+ )
128
 
129
 
130
  def ask_backend(
 
191
  student_id: str,
192
  quiz_state: dict[str, Any] | None,
193
  ) -> str:
 
 
 
194
  quiz_state = quiz_state or {}
195
  quiz_id = quiz_state.get("quiz_id")
196
 
197
+ if not BACKEND_URL:
198
+ return grade_quiz_locally([answer_1, answer_2, answer_3], quiz_state)
199
+
200
  if not quiz_id:
201
  return "Ask the tutor first so a quiz can be created."
202
 
 
223
  return "Quiz grading returned an invalid response."
224
 
225
 
226
+ def grade_quiz_locally(answers: list[str], quiz_state: dict[str, Any]) -> str:
227
+ questions = quiz_state.get("quiz_questions", [])
228
+ expected_answers = quiz_state.get("expected_answers", [])
229
+
230
+ if not questions:
231
+ return "Ask the tutor first so a quiz can be created."
232
+
233
+ score = 0
234
+ lines = []
235
+
236
+ for index, question in enumerate(questions[:3]):
237
+ student_answer = answers[index].strip() if index < len(answers) else ""
238
+ expected_answer = str(expected_answers[index] if index < len(expected_answers) else "")
239
+ is_correct = is_answer_close(student_answer, expected_answer)
240
+
241
+ if is_correct:
242
+ score += 1
243
+
244
+ status = "Correct" if is_correct else "Needs practice"
245
+ lines.append(f"{status}: {question}")
246
+
247
+ if not is_correct and expected_answer:
248
+ lines.append(f"Expected idea: {expected_answer}")
249
+
250
+ return f"Score: {score} / {min(len(questions), 3)}\n" + "\n".join(lines)
251
+
252
+
253
+ def is_answer_close(student_answer: str, expected_answer: str) -> bool:
254
+ student_tokens = set(normalize_answer(student_answer).split())
255
+ expected_tokens = set(normalize_answer(expected_answer).split())
256
+
257
+ if not student_tokens or not expected_tokens:
258
+ return False
259
+
260
+ overlap = len(student_tokens & expected_tokens) / max(len(expected_tokens), 1)
261
+ return overlap >= 0.35 or normalize_answer(student_answer) in normalize_answer(expected_answer)
262
+
263
+
264
+ def normalize_answer(answer: str) -> str:
265
+ return " ".join(
266
+ word.strip(".,?!:;()[]{}\"'।").lower()
267
+ for word in answer.split()
268
+ if word.strip(".,?!:;()[]{}\"'।")
269
+ )
270
+
271
+
272
  def parent_summary(student_id: str) -> str:
273
  if not BACKEND_URL:
274
+ return (
275
+ "Parent/teacher summary\n\n"
276
+ "The student has practiced with the uploaded or pasted textbook context in this Space. "
277
+ "For persistent progress across sessions, deploy the FastAPI backend and set BACKEND_URL."
278
+ )
279
 
280
  student_id = (student_id or "hf-space-demo").strip()
281
 
 
321
  return any(marker in combined for marker in markers)
322
 
323
 
324
+ def extract_pdf_text(pdf_path: str) -> dict[str, Any]:
325
+ import fitz
326
+
327
+ page_texts = []
328
+
329
+ with fitz.open(pdf_path) as document:
330
+ for page in document:
331
+ text = page.get_text("text").strip()
332
+ if text:
333
+ page_texts.append(text)
334
+
335
+ page_count = document.page_count
336
+
337
+ text = "\n\n".join(page_texts).strip()
338
+
339
+ if not text:
340
+ raise ValueError(
341
+ "No selectable text was found. For scanned PDFs, deploy with a backend "
342
+ "or paste a short textbook paragraph into the context box."
343
+ )
344
+
345
+ return {
346
+ "text": text,
347
+ "page_count": page_count,
348
+ "extraction_method": "pymupdf-local",
349
+ }
350
+
351
+
352
+ def chunk_text(text: str) -> list[str]:
353
+ paragraphs = [part.strip() for part in text.splitlines() if part.strip()]
354
+ chunks = []
355
+ current = ""
356
+
357
+ for paragraph in paragraphs:
358
+ if len(current) + len(paragraph) + 2 <= MAX_CHUNK_CHARS:
359
+ current = f"{current}\n{paragraph}".strip()
360
+ continue
361
+
362
+ if len(current) >= MIN_CHUNK_CHARS:
363
+ chunks.append(current)
364
+ current = paragraph
365
+ else:
366
+ current = f"{current}\n{paragraph}".strip()
367
+
368
+ if current:
369
+ chunks.append(current)
370
+
371
+ return chunks or ([text.strip()] if text.strip() else [])
372
+
373
+
374
+ @lru_cache(maxsize=1)
375
+ def get_embedding_model():
376
+ from sentence_transformers import SentenceTransformer
377
+
378
+ return SentenceTransformer(EMBEDDING_MODEL)
379
+
380
+
381
+ def embed_texts(texts: list[str]) -> np.ndarray:
382
+ model = get_embedding_model()
383
+ return np.asarray(
384
+ model.encode(
385
+ texts,
386
+ convert_to_numpy=True,
387
+ normalize_embeddings=True,
388
+ show_progress_bar=False,
389
+ )
390
+ )
391
+
392
+
393
+ def retrieve_local_sources(
394
+ question: str,
395
+ textbook_state: dict[str, Any],
396
+ limit: int = 5,
397
+ ) -> list[dict[str, Any]]:
398
+ chunks = [str(chunk) for chunk in textbook_state.get("chunks", [])]
399
+ embeddings = np.asarray(textbook_state.get("embeddings", []), dtype=float)
400
+
401
+ if not chunks or embeddings.size == 0:
402
+ return []
403
+
404
+ query_embedding = embed_texts([question])[0]
405
+ scores = embeddings @ query_embedding
406
+ top_indices = np.argsort(scores)[::-1][:limit]
407
+
408
+ return [
409
+ {
410
+ "score": float(scores[index]),
411
+ "text": chunks[index],
412
+ "metadata": {
413
+ "filename": textbook_state.get("filename", "uploaded-textbook"),
414
+ "chunk_index": int(index),
415
+ },
416
+ }
417
+ for index in top_indices
418
+ ]
419
+
420
+
421
  def mock_response(question: str, textbook_context: str) -> tuple[str, str, str, str, str, dict[str, Any]]:
422
  context = textbook_context or EXAMPLE_CONTEXT
423
  normalized_question = normalize_question_mock(question)
 
445
  )
446
 
447
 
448
+ def local_response(
449
+ question: str,
450
+ student_id: str,
451
+ textbook_context: str,
452
+ textbook_state: dict[str, Any],
453
+ ) -> tuple[str, str, str, str, str, dict[str, Any]]:
454
+ normalized_question = normalize_question_mock(question)
455
+ sources = []
456
+
457
+ if textbook_context.strip():
458
+ sources = [
459
+ {
460
+ "score": 1.0,
461
+ "text": chunk,
462
+ "metadata": {"filename": "pasted-context", "chunk_index": index},
463
+ }
464
+ for index, chunk in enumerate(chunk_text(textbook_context)[:5])
465
+ ]
466
+ elif textbook_state.get("chunks") and textbook_state.get("embeddings"):
467
+ sources = retrieve_local_sources(normalized_question, textbook_state, limit=5)
468
+
469
+ context = "\n\n".join(str(source.get("text", "")) for source in sources).strip()
470
+
471
+ if not context:
472
+ return mock_response(question=question, textbook_context=textbook_context)
473
+
474
+ english = (
475
+ f"Interpreted question: {normalized_question}\n\n"
476
+ f"Answer from the uploaded textbook context:\n{truncate(context, max_length=700)}"
477
+ )
478
+ nepali = local_nepali_answer(normalized_question, context)
479
+ quiz_questions = local_nepali_quiz_questions(context)
480
+ quiz_state = {
481
+ "student_id": student_id,
482
+ "quiz_questions": quiz_questions,
483
+ "expected_answers": [source_answer(sources)] * 3,
484
+ }
485
+
486
+ return (
487
+ english,
488
+ nepali,
489
+ format_quiz(quiz_questions),
490
+ format_sources(sources),
491
+ "Answered with the Hugging Face Space local PDF workflow.",
492
+ quiz_state,
493
+ )
494
+
495
+
496
  def mock_english_explanation(normalized_question: str, context: str) -> str:
497
  text = f"{normalized_question} {context}".lower()
498
 
 
571
  return "यो विषयलाई सरल रूपमा बुझ्न पाठ्यपुस्तकको सन्दर्भ पढेर मुख्य कुरा सम्झनुहोस्।"
572
 
573
 
574
+ def local_nepali_answer(normalized_question: str, context: str) -> str:
575
+ known_answer = mock_nepali_explanation(normalized_question, context)
576
+
577
+ if known_answer != "यो विषयलाई सरल रूपमा बुझ्न पाठ्यपुस्तकको सन्दर्भ पढेर मुख्य कुरा सम्झनुहोस्।":
578
+ return known_answer
579
+
580
+ if has_devanagari(context):
581
+ return (
582
+ "अपलोड गरिएको पाठ्यपुस्तकको सन्दर्भअनुसार मुख्य कुरा यस्तो छ:\n\n"
583
+ f"{truncate(context, max_length=700)}"
584
+ )
585
+
586
+ return (
587
+ "अपलोड गरिएको पाठ्यपुस्तकको सन्दर्भअनुसार यो विषय महत्त्वपूर्ण छ। "
588
+ "मुख्य शब्दहरू पढ्नुहोस्, उदाहरणसँग जोड्नुहोस्, र आफ्नै सरल शब्दमा उत्तर लेख्ने अभ्यास गर्नुहोस्।"
589
+ )
590
+
591
+
592
+ def local_nepali_quiz_questions(context: str) -> list[str]:
593
+ short_context = truncate(first_sentence(context), max_length=140)
594
+ return [
595
+ "प्राप्त पाठ्यपुस्तक सन्दर्भको मुख्य कुरा के हो?",
596
+ f"यो वाक्यले के बुझाउँछ: {short_context}",
597
+ "यस विषयलाई आफ्नै सरल शब्दमा कसरी भन्न सकिन्छ?",
598
+ ]
599
+
600
+
601
+ def source_answer(sources: list[dict[str, Any]]) -> str:
602
+ if not sources:
603
+ return "पाठ्यपुस्तकको मुख्य कुरा।"
604
+
605
+ text = str(sources[0].get("text", "")).strip()
606
+ return truncate(first_sentence(text) or text, max_length=220)
607
+
608
+
609
+ def first_sentence(text: str) -> str:
610
+ for separator in ["।", ".", "?", "!"]:
611
+ if separator in text:
612
+ return text.split(separator, 1)[0].strip() + separator
613
+
614
+ return text.strip()
615
+
616
+
617
+ def has_devanagari(text: str) -> bool:
618
+ return any("\u0900" <= character <= "\u097f" for character in text)
619
+
620
+
621
  def normalize_question_mock(question: str) -> str:
622
  text = question.lower()
623
 
 
779
  gr.Markdown(
780
  """
781
  # Pathshala AI
782
+ Bilingual AI tutor for rural primary students in Nepal. Upload a PDF directly
783
+ in this Space, or connect a public backend for the full production workflow.
784
  """
785
  )
786
 
787
  quiz_state = gr.State({})
788
+ textbook_state = gr.State({})
789
 
790
  with gr.Row():
791
  student_id_input = gr.Textbox(
 
797
  label="Status",
798
  value=(
799
  "Backend connected." if BACKEND_URL else
800
+ "Space-local PDF upload is active. Set BACKEND_URL for the full backend workflow."
801
  ),
802
  interactive=False,
803
  scale=2,
 
870
  status_output,
871
  quiz_state,
872
  ],
873
+ fn=lambda question, context: ask_tutor(question, "hf-space-demo", context, {}),
874
  cache_examples=False,
875
  )
876
 
877
  upload_button.click(
878
  fn=upload_textbook,
879
  inputs=[pdf_input],
880
+ outputs=[upload_output, textbook_state, context_input],
881
  )
882
  ask_button.click(
883
  fn=ask_tutor,
884
+ inputs=[question_input, student_id_input, context_input, textbook_state],
885
  outputs=[
886
  english_output,
887
  nepali_output,
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  gradio>=4.44.0
2
  python-dotenv>=1.0.0
3
  requests>=2.31.0
 
 
 
 
1
  gradio>=4.44.0
2
  python-dotenv>=1.0.0
3
  requests>=2.31.0
4
+ numpy>=1.26.0
5
+ PyMuPDF>=1.24.0
6
+ sentence-transformers==2.7.0