Benny-Tang commited on
Commit
bf6ca70
·
verified ·
1 Parent(s): 7a7dad6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -2
app.py CHANGED
@@ -94,9 +94,31 @@ def auto_detect(file_path):
94
  m = re.match(r"spm_(\d{4})_(\w+)\.pdf", fname, re.IGNORECASE)
95
  if m:
96
  year, subject = m.groups()
97
- return year, subject.capitalize()
98
  return None, None
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def process_pdf(file, subject, year):
101
  raw = ocr_agent.extract_from_pdf(file)
102
  cleaned = ocr_agent.clean_text(raw)
@@ -115,7 +137,8 @@ def prefill_subject_year(file):
115
  if not file:
116
  return "BM", "2018"
117
  year, subject = auto_detect(file)
118
- return subject if subject else "BM", year if year else "2018"
 
119
 
120
  # ----------------- Gradio UI -----------------
121
  with gr.Blocks() as demo:
@@ -189,3 +212,4 @@ if __name__ == "__main__":
189
 
190
 
191
 
 
 
94
  m = re.match(r"spm_(\d{4})_(\w+)\.pdf", fname, re.IGNORECASE)
95
  if m:
96
  year, subject = m.groups()
97
+ return year, subject
98
  return None, None
99
 
100
+ def normalize_subject(subject):
101
+ """Normalize subject names to match dropdown choices."""
102
+ valid_subjects = ["BM", "English", "Math", "History", "Science", "MoralStudies"]
103
+ if not subject:
104
+ return "BM"
105
+ subject = subject.strip().lower()
106
+ mapping = {
107
+ "bm": "BM",
108
+ "bahasa": "BM",
109
+ "bahasamelayu": "BM",
110
+ "b.m": "BM",
111
+ "english": "English",
112
+ "math": "Math",
113
+ "mathematics": "Math",
114
+ "history": "History",
115
+ "sejarah": "History",
116
+ "science": "Science",
117
+ "moral": "MoralStudies",
118
+ "moralstudies": "MoralStudies",
119
+ }
120
+ return mapping.get(subject, "BM") if subject in mapping else subject.capitalize()
121
+
122
  def process_pdf(file, subject, year):
123
  raw = ocr_agent.extract_from_pdf(file)
124
  cleaned = ocr_agent.clean_text(raw)
 
137
  if not file:
138
  return "BM", "2018"
139
  year, subject = auto_detect(file)
140
+ subject = normalize_subject(subject)
141
+ return subject, year if year else "2018"
142
 
143
  # ----------------- Gradio UI -----------------
144
  with gr.Blocks() as demo:
 
212
 
213
 
214
 
215
+