Spaces:

Benny-Tang
/

exam-simulator

Runtime error

App Files Files Community

Benny-Tang commited on Sep 14, 2025

Commit

9f22a20

verified ·

1 Parent(s): a593d2d

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -240

app.py CHANGED Viewed

@@ -1,6 +1,4 @@
-# app.py
 import os
-import re
 import json
 import random
 import subprocess
@@ -9,263 +7,89 @@ import gradio as gr
 from agents import AnalyzerAgent, CoachAgent, PredictiveAgent
 from ocr_agent import OcrAgent
-# Constants
 DATA_DIR = "data"
 QUESTIONS_FILE = "questions.json"
-VALID_SUBJECTS = ["BM", "English", "Math", "History", "Science", "MoralStudies",
-                  "Accounting", "Economics", "Business"]
 os.makedirs(DATA_DIR, exist_ok=True)
-# Agents and OCR
-analyzer = AnalyzerAgent()
-coach_agent = CoachAgent()
-predictor = PredictiveAgent()
-ocr_agent = OcrAgent()
-# Load question bank safely
 def load_question_bank():
     if not os.path.exists(QUESTIONS_FILE):
         return []
     try:
         with open(QUESTIONS_FILE, "r", encoding="utf-8") as f:
-            content = f.read().strip()
-            return json.loads(content) if content else []
-    except Exception:
         return []
 QUESTION_BANK = load_question_bank()
-# Merge runner
-def run_merge():
-    """Run merge_questions.py (rebuilds questions.json) and reload QUESTION_BANK."""
-    try:
-        subprocess.run(["python", "merge_questions.py"], check=True)
-        global QUESTION_BANK
-        QUESTION_BANK = load_question_bank()
-        return True, "Merge successful."
-    except subprocess.CalledProcessError as e:
-        return False, f"Merge failed: {e}"
-# Utility: normalize subject token and display
-def subject_token_from_display(display):
-    if not display:
-        return "bm"
-    return display.strip().lower()
-def normalize_display_subject(token):
-    if not token:
-        return "BM"
-    t = token.strip().lower()
-    mapping = {
-        "bm": "BM",
-        "bahasa": "BM",
-        "bahasamelayu": "BM",
-        "english": "English",
-        "math": "Math",
-        "mathematics": "Math",
-        "history": "History",
-        "sejarah": "History",
-        "science": "Science",
-        "physics": "Science",
-        "moral": "MoralStudies",
-        "moralstudies": "MoralStudies",
-    }
-    return mapping.get(t, token.capitalize())
-def autodetect_from_filename(path):
-    """Detect year and subject token from filename like spm_2018_bm.pdf"""
-    if not path:
-        return None, None
-    fname = os.path.basename(path)
-    m = re.search(r"spm[_\-]?(\d{4})[_\-]?([A-Za-z]+)", fname, re.IGNORECASE)
-    if not m:
-        return None, None
-    year = m.group(1)
-    subj = m.group(2).lower()
-    return year, subj
-# ===== OCR upload + auto-merge =====
-def process_pdf_and_merge(file_path, display_subject, year):
-    """
-    file_path: local filepath (gr.File type='filepath')
-    display_subject: e.g. "BM"
-    year: "2018"
-    """
-    if not file_path:
         return "No file uploaded."
-    subj_token = subject_token_from_display(display_subject)
-    try:
-        qfile, scheme_file = ocr_agent.extract_questions_to_files(
-            pdf_path=file_path, year=str(year), subject_token=subj_token, out_dir=DATA_DIR
-        )
-    except Exception as e:
-        return f"❌ OCR failed: {e}"
-    ok, msg = run_merge()
-    if ok:
-        return f"✅ OCR saved: {os.path.basename(qfile)} & {os.path.basename(scheme_file)}. Merge: {msg}"
-    else:
-        return f"⚠️ OCR saved: {os.path.basename(qfile)} & {os.path.basename(scheme_file)}. Merge: {msg}"
-# ===== Exam generation =====
-def generate_exam(display_subject, num_questions, include_predicted):
-    """
-    display_subject: "BM" etc.
-    returns exam_data (list) and status text and exam_data (for state)
-    """
-    subj_key = f"Form5_{display_subject}"
-    pool = [q for q in QUESTION_BANK if q.get("subject") == subj_key]
-    predicted_questions = []
-    if include_predicted:
-        predicted_questions = predictor.generate_predictions(level="Form5",
-                                                            subject=display_subject,
-                                                            n=min(10, max(1, num_questions // 2)),
-                                                            question_bank=QUESTION_BANK)
-    combined = pool + predicted_questions
-    if not combined:
-        return [], f"No questions found for {display_subject}. Upload papers (2018–2024).", []
-    random.shuffle(combined)
-    selected = combined[:min(num_questions, len(combined))]
-    # For safety, return minimal exam objects
-    exam_data = []
-    for q in selected:
-        # if predicted questions include correct_answer, it can be included (but they are in-memory)
-        exam_data.append({
-            "id": q.get("id"),
-            "text": q.get("text"),
-            "choices": q.get("choices", []),
-            "topics": q.get("topics", []),
-            "source": q.get("source", "pastpaper")
-        })
-    return exam_data, f"Prepared {len(exam_data)} questions ({len(predicted_questions)} predicted)", exam_data
-# ===== Submit & grade =====
-def submit_exam(answers_json, exam_state, display_subject):
-    """
-    answers_json: dict where keys are stringified ids -> answer text (or choice text)
-    exam_state: the exam_data (list) saved in gr.State
-    """
-    exam_data = exam_state or []
-    if not exam_data:
-        return "No exam data found.", {}, {}, {}, gr.update(visible=False), gr.update(visible=True)
-    correct = 0
-    graded = 0
-    per_question = {}
-    for q in exam_data:
-        qid = q.get("id")
-        key = str(qid)
-        user_ans = answers_json.get(key) if isinstance(answers_json, dict) else None
-        # determine correct answer
-        correct_ans = None
-        if q.get("source") == "predicted":
-            # predicted question may have correct_answer inside QUESTION_BANK? predictor sets it when generating.
-            # We didn't include correct_answer in exam state by default; attempt to find inside QUESTION_BANK (unlikely)
-            correct_ans = q.get("correct_answer")
-        else:
-            orig = next((item for item in QUESTION_BANK if item.get("id") == qid), None)
-            if orig:
-                correct_ans = orig.get("correct_answer")
-        per_question[str(qid)] = {"user": user_ans, "correct": correct_ans, "topics": q.get("topics", [])}
-        # grade only when correct_answer is not None
-        if correct_ans is not None:
-            graded += 1
-            if user_ans is not None and str(user_ans).strip() == str(correct_ans).strip():
-                correct += 1
-    score = round(100 * correct / graded, 2) if graded > 0 else "N/A (no answer keys)"
-    analysis = analyzer.analyze(per_question)
-    coach = coach_agent.coach(analysis, "Form5", display_subject)
-    pred_summary = predictor.summary(level="Form5", subject=display_subject, question_bank=QUESTION_BANK)
-    return (
-        f"Your Score: {score}%",
-        analysis,
-        coach,
-        pred_summary,
-        gr.update(visible=True),
-        gr.update(visible=True)
-    )
-# ===== Prefill handler for upload UI =====
-def prefill_subject_year_from_file(file_path):
-    if not file_path:
-        return "BM", "2018"
-    year, subj_token = autodetect_from_filename(file_path)
-    subj_display = normalize_display_subject(subj_token) if subj_token else "BM"
-    return subj_display, year if year else "2018"
-# ===== Gradio UI =====
 with gr.Blocks() as demo:
-    gr.Markdown("## SPM Exam Simulator — Form 5 (Past papers 2018–2024)")
-    with gr.Tab("Upload (OCR → JSON → Merge)"):
-        pdf_file = gr.File(label="Upload SPM PDF (e.g., spm_2018_bm.pdf)", type="filepath")
-        subject_dropdown = gr.Dropdown(choices=VALID_SUBJECTS, value="BM", label="Subject (override)")
-        year_dropdown = gr.Dropdown(choices=[str(y) for y in range(2018, 2025)], value="2018", label="Year")
-        process_btn = gr.Button("Process PDF → JSON + Merge")
-        ocr_status = gr.Textbox(label="Status", interactive=False)
-        pdf_file.change(prefill_subject_year_from_file, inputs=[pdf_file], outputs=[subject_dropdown, year_dropdown])
-        process_btn.click(process_pdf_and_merge, inputs=[pdf_file, subject_dropdown, year_dropdown], outputs=[ocr_status])
-    with gr.Tab("Exam Simulator"):
-        subject_sel = gr.Dropdown(choices=["BM", "English", "Math", "History", "Science", "MoralStudies"],
-                                  value="Math", label="Subject")
-        num_q = gr.Slider(minimum=5, maximum=50, step=5, value=10, label="Number of Questions")
-        include_pred = gr.Checkbox(value=True, label="Include AI-predicted questions (in-memory only)")
-        start_btn = gr.Button("Start Exam")
-        exam_state = gr.State()
-        exam_display = gr.JSON(label="Exam Questions")
-        status_display = gr.Textbox(label="Status", interactive=False)
-        start_btn.click(generate_exam,
-                        inputs=[subject_sel, num_q, include_pred],
-                        outputs=[exam_display, status_display, exam_state])
-    with gr.Tab("Submit & Results"):
-        answers_input = gr.JSON(label='Submit Your Answers as JSON (e.g. {"1001":"Seronok", "900000":"4"})')
-        submit_btn = gr.Button("Submit Answers")
-        score_out = gr.Textbox(label="Score")
-        analysis_out = gr.JSON(label="Weakness Analysis")
-        coach_out = gr.JSON(label="Personalized Coaching")
-        pred_out = gr.JSON(label="Prediction Summary")
-        back_btn = gr.Button("← Back to Exam", visible=False)
-        retry_btn = gr.Button("Retry", visible=False)
-        # submit takes (answers_input, exam_state, subject_sel)
-        submit_btn.click(
-            submit_exam,
-            inputs=[answers_input, exam_state, subject_sel],
-            outputs=[score_out, analysis_out, coach_out, pred_out, back_btn, retry_btn]
-        )
-# Launch
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
 import json
 import random
 import subprocess
 from agents import AnalyzerAgent, CoachAgent, PredictiveAgent
 from ocr_agent import OcrAgent
+# Paths
 DATA_DIR = "data"
 QUESTIONS_FILE = "questions.json"
 os.makedirs(DATA_DIR, exist_ok=True)
+# Load questions
 def load_question_bank():
     if not os.path.exists(QUESTIONS_FILE):
         return []
     try:
         with open(QUESTIONS_FILE, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except json.JSONDecodeError:
         return []
 QUESTION_BANK = load_question_bank()
+# Agents
+analyzer = AnalyzerAgent()
+coach = CoachAgent()
+predictive = PredictiveAgent()
+ocr_agent = OcrAgent(data_dir=DATA_DIR)
+# Merge questions.json after new OCR upload
+def merge_questions():
+    subprocess.run(["python", "merge_questions.py"])
+# Gradio Functions
+def upload_and_extract(pdf_file, subject, year):
+    if not pdf_file:
         return "No file uploaded."
+    extracted = ocr_agent.extract_questions(pdf_file, subject, year)
+    json_path = os.path.join(DATA_DIR, f"spm_{year}_{subject.lower()}.json")
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(extracted, f, ensure_ascii=False, indent=2)
+    merge_questions()
+    return f"✅ Extracted {len(extracted)} questions from {subject} {year}."
+def simulate_exam(subject, year, num_questions, include_pred):
+    qs = [q for q in QUESTION_BANK if subject.lower() in q.get("text","").lower()]
+    selected = random.sample(qs, min(num_questions, len(qs)))
+    output_blocks = [q for q in selected]
+    if include_pred:
+        pred_qs = predictive.predict_questions(json.dumps(selected), subject)
+        output_blocks.append(f"<b>Predicted:</b><br>{pred_qs}")
+    return "<br><br>".join(output_blocks)
+def submit_exam_answers(user_answers):
+    # Placeholder scoring until marking scheme integration
+    return "✅ Answers submitted. Scoring will be added when scheme JSONs are ready."
+# Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# 📘 SPM Exam Simulator (Form 5)")
+    gr.Markdown("Practice with real SPM past papers (2018–2024).")
+    with gr.Tab("Upload Past Paper"):
+        pdf_in = gr.File(label="Upload SPM PDF", type="filepath")
+        subject_in = gr.Dropdown(["BM","English","Math","History","Science","MoralStudies"],
+                                 label="Subject")
+        year_in = gr.Dropdown([str(y) for y in range(2018,2025)], label="Year")
+        upload_btn = gr.Button("Extract Questions")
+        upload_out = gr.Textbox()
+        upload_btn.click(upload_and_extract, inputs=[pdf_in, subject_in, year_in], outputs=upload_out)
+    with gr.Tab("Exam Simulation"):
+        subject_sim = gr.Dropdown(["BM","English","Math","History","Science","MoralStudies"], label="Subject")
+        year_sim = gr.Dropdown([str(y) for y in range(2018,2025)], label="Year")
+        num_qs = gr.Slider(1, 20, value=5, step=1, label="Number of Questions")
+        include_pred = gr.Checkbox(label="Include AI-Predicted Questions")
+        start_btn = gr.Button("Start Simulation")
+        exam_out = gr.HTML()
+        start_btn.click(simulate_exam, inputs=[subject_sim, year_sim, num_qs, include_pred], outputs=exam_out)
+    with gr.Tab("Submit Answers"):
+        ans_in = gr.Textbox(label="Enter your answers (e.g., Q1:A, Q2:C)")
+        submit_btn = gr.Button("Submit")
+        submit_out = gr.Textbox()
+        submit_btn.click(submit_exam_answers, inputs=ans_in, outputs=submit_out)
+demo.launch(server_name="0.0.0.0", server_port=7860)