Benny-Tang commited on
Commit
9f22a20
·
verified ·
1 Parent(s): a593d2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -240
app.py CHANGED
@@ -1,6 +1,4 @@
1
- # app.py
2
  import os
3
- import re
4
  import json
5
  import random
6
  import subprocess
@@ -9,263 +7,89 @@ import gradio as gr
9
  from agents import AnalyzerAgent, CoachAgent, PredictiveAgent
10
  from ocr_agent import OcrAgent
11
 
12
- # Constants
13
  DATA_DIR = "data"
14
  QUESTIONS_FILE = "questions.json"
15
- VALID_SUBJECTS = ["BM", "English", "Math", "History", "Science", "MoralStudies",
16
- "Accounting", "Economics", "Business"]
17
 
18
  os.makedirs(DATA_DIR, exist_ok=True)
19
 
20
- # Agents and OCR
21
- analyzer = AnalyzerAgent()
22
- coach_agent = CoachAgent()
23
- predictor = PredictiveAgent()
24
- ocr_agent = OcrAgent()
25
-
26
- # Load question bank safely
27
  def load_question_bank():
28
  if not os.path.exists(QUESTIONS_FILE):
29
  return []
30
  try:
31
  with open(QUESTIONS_FILE, "r", encoding="utf-8") as f:
32
- content = f.read().strip()
33
- return json.loads(content) if content else []
34
- except Exception:
35
  return []
36
 
37
  QUESTION_BANK = load_question_bank()
38
 
 
 
 
 
 
39
 
40
- # Merge runner
41
- def run_merge():
42
- """Run merge_questions.py (rebuilds questions.json) and reload QUESTION_BANK."""
43
- try:
44
- subprocess.run(["python", "merge_questions.py"], check=True)
45
- global QUESTION_BANK
46
- QUESTION_BANK = load_question_bank()
47
- return True, "Merge successful."
48
- except subprocess.CalledProcessError as e:
49
- return False, f"Merge failed: {e}"
50
-
51
-
52
- # Utility: normalize subject token and display
53
- def subject_token_from_display(display):
54
- if not display:
55
- return "bm"
56
- return display.strip().lower()
57
-
58
-
59
- def normalize_display_subject(token):
60
- if not token:
61
- return "BM"
62
- t = token.strip().lower()
63
- mapping = {
64
- "bm": "BM",
65
- "bahasa": "BM",
66
- "bahasamelayu": "BM",
67
- "english": "English",
68
- "math": "Math",
69
- "mathematics": "Math",
70
- "history": "History",
71
- "sejarah": "History",
72
- "science": "Science",
73
- "physics": "Science",
74
- "moral": "MoralStudies",
75
- "moralstudies": "MoralStudies",
76
- }
77
- return mapping.get(t, token.capitalize())
78
-
79
-
80
- def autodetect_from_filename(path):
81
- """Detect year and subject token from filename like spm_2018_bm.pdf"""
82
- if not path:
83
- return None, None
84
- fname = os.path.basename(path)
85
- m = re.search(r"spm[_\-]?(\d{4})[_\-]?([A-Za-z]+)", fname, re.IGNORECASE)
86
- if not m:
87
- return None, None
88
- year = m.group(1)
89
- subj = m.group(2).lower()
90
- return year, subj
91
-
92
 
93
- # ===== OCR upload + auto-merge =====
94
- def process_pdf_and_merge(file_path, display_subject, year):
95
- """
96
- file_path: local filepath (gr.File type='filepath')
97
- display_subject: e.g. "BM"
98
- year: "2018"
99
- """
100
- if not file_path:
101
  return "No file uploaded."
102
-
103
- subj_token = subject_token_from_display(display_subject)
104
- try:
105
- qfile, scheme_file = ocr_agent.extract_questions_to_files(
106
- pdf_path=file_path, year=str(year), subject_token=subj_token, out_dir=DATA_DIR
107
- )
108
- except Exception as e:
109
- return f"❌ OCR failed: {e}"
110
-
111
- ok, msg = run_merge()
112
- if ok:
113
- return f"✅ OCR saved: {os.path.basename(qfile)} & {os.path.basename(scheme_file)}. Merge: {msg}"
114
- else:
115
- return f"⚠️ OCR saved: {os.path.basename(qfile)} & {os.path.basename(scheme_file)}. Merge: {msg}"
116
-
117
-
118
- # ===== Exam generation =====
119
- def generate_exam(display_subject, num_questions, include_predicted):
120
- """
121
- display_subject: "BM" etc.
122
- returns exam_data (list) and status text and exam_data (for state)
123
- """
124
- subj_key = f"Form5_{display_subject}"
125
- pool = [q for q in QUESTION_BANK if q.get("subject") == subj_key]
126
-
127
- predicted_questions = []
128
- if include_predicted:
129
- predicted_questions = predictor.generate_predictions(level="Form5",
130
- subject=display_subject,
131
- n=min(10, max(1, num_questions // 2)),
132
- question_bank=QUESTION_BANK)
133
-
134
- combined = pool + predicted_questions
135
- if not combined:
136
- return [], f"No questions found for {display_subject}. Upload papers (2018–2024).", []
137
-
138
- random.shuffle(combined)
139
- selected = combined[:min(num_questions, len(combined))]
140
-
141
- # For safety, return minimal exam objects
142
- exam_data = []
143
- for q in selected:
144
- # if predicted questions include correct_answer, it can be included (but they are in-memory)
145
- exam_data.append({
146
- "id": q.get("id"),
147
- "text": q.get("text"),
148
- "choices": q.get("choices", []),
149
- "topics": q.get("topics", []),
150
- "source": q.get("source", "pastpaper")
151
- })
152
- return exam_data, f"Prepared {len(exam_data)} questions ({len(predicted_questions)} predicted)", exam_data
153
-
154
-
155
- # ===== Submit & grade =====
156
- def submit_exam(answers_json, exam_state, display_subject):
157
- """
158
- answers_json: dict where keys are stringified ids -> answer text (or choice text)
159
- exam_state: the exam_data (list) saved in gr.State
160
- """
161
- exam_data = exam_state or []
162
- if not exam_data:
163
- return "No exam data found.", {}, {}, {}, gr.update(visible=False), gr.update(visible=True)
164
-
165
- correct = 0
166
- graded = 0
167
- per_question = {}
168
-
169
- for q in exam_data:
170
- qid = q.get("id")
171
- key = str(qid)
172
- user_ans = answers_json.get(key) if isinstance(answers_json, dict) else None
173
-
174
- # determine correct answer
175
- correct_ans = None
176
- if q.get("source") == "predicted":
177
- # predicted question may have correct_answer inside QUESTION_BANK? predictor sets it when generating.
178
- # We didn't include correct_answer in exam state by default; attempt to find inside QUESTION_BANK (unlikely)
179
- correct_ans = q.get("correct_answer")
180
- else:
181
- orig = next((item for item in QUESTION_BANK if item.get("id") == qid), None)
182
- if orig:
183
- correct_ans = orig.get("correct_answer")
184
-
185
- per_question[str(qid)] = {"user": user_ans, "correct": correct_ans, "topics": q.get("topics", [])}
186
-
187
- # grade only when correct_answer is not None
188
- if correct_ans is not None:
189
- graded += 1
190
- if user_ans is not None and str(user_ans).strip() == str(correct_ans).strip():
191
- correct += 1
192
-
193
- score = round(100 * correct / graded, 2) if graded > 0 else "N/A (no answer keys)"
194
-
195
- analysis = analyzer.analyze(per_question)
196
- coach = coach_agent.coach(analysis, "Form5", display_subject)
197
- pred_summary = predictor.summary(level="Form5", subject=display_subject, question_bank=QUESTION_BANK)
198
-
199
- return (
200
- f"Your Score: {score}%",
201
- analysis,
202
- coach,
203
- pred_summary,
204
- gr.update(visible=True),
205
- gr.update(visible=True)
206
- )
207
-
208
-
209
- # ===== Prefill handler for upload UI =====
210
- def prefill_subject_year_from_file(file_path):
211
- if not file_path:
212
- return "BM", "2018"
213
- year, subj_token = autodetect_from_filename(file_path)
214
- subj_display = normalize_display_subject(subj_token) if subj_token else "BM"
215
- return subj_display, year if year else "2018"
216
-
217
-
218
- # ===== Gradio UI =====
219
  with gr.Blocks() as demo:
220
- gr.Markdown("## SPM Exam Simulator Form 5 (Past papers 2018–2024)")
221
-
222
- with gr.Tab("Upload (OCR → JSON → Merge)"):
223
- pdf_file = gr.File(label="Upload SPM PDF (e.g., spm_2018_bm.pdf)", type="filepath")
224
- subject_dropdown = gr.Dropdown(choices=VALID_SUBJECTS, value="BM", label="Subject (override)")
225
- year_dropdown = gr.Dropdown(choices=[str(y) for y in range(2018, 2025)], value="2018", label="Year")
226
- process_btn = gr.Button("Process PDF → JSON + Merge")
227
- ocr_status = gr.Textbox(label="Status", interactive=False)
228
-
229
- pdf_file.change(prefill_subject_year_from_file, inputs=[pdf_file], outputs=[subject_dropdown, year_dropdown])
230
- process_btn.click(process_pdf_and_merge, inputs=[pdf_file, subject_dropdown, year_dropdown], outputs=[ocr_status])
231
-
232
- with gr.Tab("Exam Simulator"):
233
- subject_sel = gr.Dropdown(choices=["BM", "English", "Math", "History", "Science", "MoralStudies"],
234
- value="Math", label="Subject")
235
- num_q = gr.Slider(minimum=5, maximum=50, step=5, value=10, label="Number of Questions")
236
- include_pred = gr.Checkbox(value=True, label="Include AI-predicted questions (in-memory only)")
237
- start_btn = gr.Button("Start Exam")
238
- exam_state = gr.State()
239
-
240
- exam_display = gr.JSON(label="Exam Questions")
241
- status_display = gr.Textbox(label="Status", interactive=False)
242
-
243
- start_btn.click(generate_exam,
244
- inputs=[subject_sel, num_q, include_pred],
245
- outputs=[exam_display, status_display, exam_state])
246
-
247
- with gr.Tab("Submit & Results"):
248
- answers_input = gr.JSON(label='Submit Your Answers as JSON (e.g. {"1001":"Seronok", "900000":"4"})')
249
- submit_btn = gr.Button("Submit Answers")
250
-
251
- score_out = gr.Textbox(label="Score")
252
- analysis_out = gr.JSON(label="Weakness Analysis")
253
- coach_out = gr.JSON(label="Personalized Coaching")
254
- pred_out = gr.JSON(label="Prediction Summary")
255
-
256
- back_btn = gr.Button("← Back to Exam", visible=False)
257
- retry_btn = gr.Button("Retry", visible=False)
258
-
259
- # submit takes (answers_input, exam_state, subject_sel)
260
- submit_btn.click(
261
- submit_exam,
262
- inputs=[answers_input, exam_state, subject_sel],
263
- outputs=[score_out, analysis_out, coach_out, pred_out, back_btn, retry_btn]
264
- )
265
 
266
- # Launch
267
- if __name__ == "__main__":
268
- demo.launch(server_name="0.0.0.0", server_port=7860)
269
 
270
 
271
 
 
 
1
  import os
 
2
  import json
3
  import random
4
  import subprocess
 
7
  from agents import AnalyzerAgent, CoachAgent, PredictiveAgent
8
  from ocr_agent import OcrAgent
9
 
10
+ # Paths
11
  DATA_DIR = "data"
12
  QUESTIONS_FILE = "questions.json"
 
 
13
 
14
  os.makedirs(DATA_DIR, exist_ok=True)
15
 
16
+ # Load questions
 
 
 
 
 
 
17
  def load_question_bank():
18
  if not os.path.exists(QUESTIONS_FILE):
19
  return []
20
  try:
21
  with open(QUESTIONS_FILE, "r", encoding="utf-8") as f:
22
+ return json.load(f)
23
+ except json.JSONDecodeError:
 
24
  return []
25
 
26
  QUESTION_BANK = load_question_bank()
27
 
28
+ # Agents
29
+ analyzer = AnalyzerAgent()
30
+ coach = CoachAgent()
31
+ predictive = PredictiveAgent()
32
+ ocr_agent = OcrAgent(data_dir=DATA_DIR)
33
 
34
+ # Merge questions.json after new OCR upload
35
+ def merge_questions():
36
+ subprocess.run(["python", "merge_questions.py"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # Gradio Functions
39
+ def upload_and_extract(pdf_file, subject, year):
40
+ if not pdf_file:
 
 
 
 
 
41
  return "No file uploaded."
42
+ extracted = ocr_agent.extract_questions(pdf_file, subject, year)
43
+ json_path = os.path.join(DATA_DIR, f"spm_{year}_{subject.lower()}.json")
44
+ with open(json_path, "w", encoding="utf-8") as f:
45
+ json.dump(extracted, f, ensure_ascii=False, indent=2)
46
+ merge_questions()
47
+ return f"✅ Extracted {len(extracted)} questions from {subject} {year}."
48
+
49
+ def simulate_exam(subject, year, num_questions, include_pred):
50
+ qs = [q for q in QUESTION_BANK if subject.lower() in q.get("text","").lower()]
51
+ selected = random.sample(qs, min(num_questions, len(qs)))
52
+ output_blocks = [q for q in selected]
53
+ if include_pred:
54
+ pred_qs = predictive.predict_questions(json.dumps(selected), subject)
55
+ output_blocks.append(f"<b>Predicted:</b><br>{pred_qs}")
56
+ return "<br><br>".join(output_blocks)
57
+
58
+ def submit_exam_answers(user_answers):
59
+ # Placeholder scoring until marking scheme integration
60
+ return "✅ Answers submitted. Scoring will be added when scheme JSONs are ready."
61
+
62
+ # Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  with gr.Blocks() as demo:
64
+ gr.Markdown("# 📘 SPM Exam Simulator (Form 5)")
65
+ gr.Markdown("Practice with real SPM past papers (2018–2024).")
66
+
67
+ with gr.Tab("Upload Past Paper"):
68
+ pdf_in = gr.File(label="Upload SPM PDF", type="filepath")
69
+ subject_in = gr.Dropdown(["BM","English","Math","History","Science","MoralStudies"],
70
+ label="Subject")
71
+ year_in = gr.Dropdown([str(y) for y in range(2018,2025)], label="Year")
72
+ upload_btn = gr.Button("Extract Questions")
73
+ upload_out = gr.Textbox()
74
+ upload_btn.click(upload_and_extract, inputs=[pdf_in, subject_in, year_in], outputs=upload_out)
75
+
76
+ with gr.Tab("Exam Simulation"):
77
+ subject_sim = gr.Dropdown(["BM","English","Math","History","Science","MoralStudies"], label="Subject")
78
+ year_sim = gr.Dropdown([str(y) for y in range(2018,2025)], label="Year")
79
+ num_qs = gr.Slider(1, 20, value=5, step=1, label="Number of Questions")
80
+ include_pred = gr.Checkbox(label="Include AI-Predicted Questions")
81
+ start_btn = gr.Button("Start Simulation")
82
+ exam_out = gr.HTML()
83
+ start_btn.click(simulate_exam, inputs=[subject_sim, year_sim, num_qs, include_pred], outputs=exam_out)
84
+
85
+ with gr.Tab("Submit Answers"):
86
+ ans_in = gr.Textbox(label="Enter your answers (e.g., Q1:A, Q2:C)")
87
+ submit_btn = gr.Button("Submit")
88
+ submit_out = gr.Textbox()
89
+ submit_btn.click(submit_exam_answers, inputs=ans_in, outputs=submit_out)
90
+
91
+ demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
 
 
 
93
 
94
 
95