WebashalarForML commited on
Commit
8574192
·
verified ·
1 Parent(s): a18b376

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -111
app.py CHANGED
@@ -80,148 +80,88 @@ def upload_file():
80
  logging.info(f"Files successfully uploaded: {uploaded_files}")
81
  return process_file()
82
 
83
- @app.route('/remove_file',methods=['POST'])
84
- def remove_file():
85
- uploaded_files = session.get('uploaded_files', [])
86
- if uploaded_file:
87
- for filename in uploaded_files:
88
- file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
89
- if os.path.exists(file_path):
90
- os.remove(file_path)
91
- logging.info(f"Removed file: {filename}")
92
- else:
93
- logging.warning(f"File not found for removal: {file_path}") # More specific log
94
-
95
- session.pop('uploaded_files', None)
96
- flash('Files successfully removed')
97
- logging.info("All uploaded files removed")
98
- else:
99
- flash('No file to remove.')
100
- logging.warning("File not found for removal")
101
- return redirect(url_for('index'))
102
-
103
  @app.route('/reset_upload')
104
  def reset_upload():
105
- """Reset the uploaded file and the processed data."""
106
  uploaded_files = session.get('uploaded_files', [])
107
- if uploaded_file:
108
- for filename in uploaded_files:
109
- file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
110
- if os.path.exists(file_path):
111
- os.remove(file_path)
112
- logging.info(f"Removed file: {filename}")
113
- else:
114
- logging.warning(f"File not found for removal: {file_path}") # More specific log
115
-
116
- session.pop('uploaded_files', None)
117
- flash('Files successfully removed')
118
- logging.info("All uploaded files removed")
119
- else:
120
- flash('No file to remove.')
121
- logging.warning("File not found for removal")
122
  return redirect(url_for('index'))
123
 
124
- # @app.route('/process', methods=['GET','POST'])
125
- # def process_file():
126
- # uploaded_files = session.get('uploaded_files', [])
127
- # if not uploaded_files:
128
- # flash('No files selected for processing')
129
- # logging.warning("No files selected for processing")
130
- # return redirect(url_for('index'))
131
-
132
- # file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
133
- # logging.info(f"Processing files: {file_paths}")
134
-
135
- # extracted_text = {}
136
- # processed_Img = {}
137
-
138
- # try:
139
- # extracted_text, processed_Img = extract_text_from_images(file_paths)
140
- # logging.info(f"Extracted text: {extracted_text}")
141
- # logging.info(f"Processed images: {processed_Img}")
142
-
143
- # llmText = json_to_llm_str(extracted_text)
144
- # logging.info(f"LLM text: {llmText}")
145
-
146
- # LLMdata = Data_Extractor(llmText)
147
- # print("llm data--------->",llmText)
148
- # logging.info(f"LLM data: {LLMdata}")
149
-
150
- # except Exception as e:
151
- # logging.error(f"Error during LLM processing: {e}")
152
- # logging.info("Running backup model...")
153
  @app.route('/process', methods=['GET', 'POST'])
154
  def process_file():
155
  uploaded_files = session.get('uploaded_files', [])
156
  if not uploaded_files:
157
  flash('No files selected for processing')
158
- logging.warning("No files selected for processing")
159
  return redirect(url_for('index'))
160
 
161
  file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
162
  logging.info(f"Processing files: {file_paths}")
163
 
 
 
 
 
164
  try:
165
- # Single Groq VLM pass on each image
166
  LLMdata, extracted_text, processed_Img = extract_text_from_images(file_paths)
167
- LLMdata['meta'] = "Primary: Groq VLM Extraction"
168
 
169
- logging.info(f"Groq VLM structured data: {LLMdata}")
170
- logging.info(f"Extracted text blobs: {extracted_text}")
171
- logging.info(f"Processed images: {processed_Img}")
172
-
173
- # If LLMdata is essentially empty (all values are empty lists), we might want to try backup
174
- is_empty = all(len(v) == 0 for k, v in LLMdata.items() if k != 'extracted_text')
175
 
176
  if is_empty:
177
- logging.info("Groq VLM returned empty data. Trying backup model...")
178
  raise ValueError("Empty data from Groq VLM")
179
 
180
- # Regex fallback / augmentation from model text
181
- cont_data = process_extracted_text(extracted_text)
182
- logging.info(f"Contextual data: {cont_data}")
183
-
184
- processed_data = process_resume_data(LLMdata, cont_data, extracted_text)
185
- logging.info(f"Processed data: {processed_data}")
186
-
187
- session['processed_data'] = processed_data
188
- session['processed_Img'] = processed_Img
189
-
190
- flash('Data processed and analyzed successfully')
191
- return redirect(url_for('result'))
192
-
193
  except Exception as e:
194
- logging.exception(f"Error during primary processing: {e}")
195
- flash('Primary processing failed, attempting backup model...')
196
 
197
- # We don't call extract_text_from_images AGAIN because it already ran and produced its results
198
- # in the variables assigned at line 162. We just need to ensure they are available here.
199
- # If extraction completely failed (raised before return), then we have nothing to do.
200
- if 'extracted_text' not in locals() or not extracted_text:
201
- flash('Critical failure: Could not extract text from image.')
202
  return redirect(url_for('index'))
203
 
204
- LLMdata = {}
205
  try:
206
- text = json_to_llm_str(extracted_text)
207
- LLMdata = NER_Model(text)
208
- LLMdata['meta'] = "Backup: PaddleOCR + Local NER"
 
 
 
 
209
  logging.info(f"NER model data: {LLMdata}")
210
  except Exception as backup_e:
211
- logging.exception(f"Error during backup processing: {backup_e}")
212
- flash('Backup processing also failed')
213
  return redirect(url_for('index'))
214
 
215
- # Final merge using backup data if we reached here
216
- cont_data = process_extracted_text(extracted_text)
217
- processed_data = process_resume_data(LLMdata, cont_data, extracted_text)
218
- logging.info(f"Final merged data: {processed_data}")
219
-
220
- session['processed_data'] = processed_data
221
- session['processed_Img'] = processed_Img
222
- flash('Data processed using backup model')
223
- logging.info("Data processed using backup model")
224
- return redirect(url_for('result'))
 
 
 
 
225
 
226
 
227
  @app.route('/result')
 
80
  logging.info(f"Files successfully uploaded: {uploaded_files}")
81
  return process_file()
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  @app.route('/reset_upload')
84
  def reset_upload():
85
+ """Reset the uploaded files and processed data in the session and filesystem."""
86
  uploaded_files = session.get('uploaded_files', [])
87
+ for filename in uploaded_files:
88
+ file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
89
+ if os.path.exists(file_path):
90
+ os.remove(file_path)
91
+ logging.info(f"Removed file: {filename}")
92
+ else:
93
+ logging.warning(f"File not found for removal: {file_path}")
94
+
95
+ session.pop('uploaded_files', None)
96
+ session.pop('processed_data', None)
97
+ session.pop('processed_Img', None)
98
+ flash('Reset successful. All files removed.')
99
+ logging.info("Session and upload folder reset.")
 
 
100
  return redirect(url_for('index'))
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  @app.route('/process', methods=['GET', 'POST'])
103
  def process_file():
104
  uploaded_files = session.get('uploaded_files', [])
105
  if not uploaded_files:
106
  flash('No files selected for processing')
 
107
  return redirect(url_for('index'))
108
 
109
  file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
110
  logging.info(f"Processing files: {file_paths}")
111
 
112
+ extracted_text = {}
113
+ LLMdata = {}
114
+ processed_Img = {}
115
+
116
  try:
117
+ # Primary: Groq VLM Single Pass
118
  LLMdata, extracted_text, processed_Img = extract_text_from_images(file_paths)
119
+ LLMdata['meta'] = "Primary: Groq VLM"
120
 
121
+ # Check if extracted data is essentially empty
122
+ is_empty = all(len(v) == 0 for k, v in LLMdata.items() if k != 'meta')
 
 
 
 
123
 
124
  if is_empty:
125
+ logging.info("Groq VLM returned empty data. Attempting backup...")
126
  raise ValueError("Empty data from Groq VLM")
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  except Exception as e:
129
+ logging.exception(f"Primary processing failed or returned empty: {e}")
130
+ flash('Primary processing failed, using backup model...')
131
 
132
+ # If extraction failed but we have some text/images, use them
133
+ if not extracted_text:
134
+ flash('Critical failure: Could not extract text from images.')
 
 
135
  return redirect(url_for('index'))
136
 
 
137
  try:
138
+ # Backup: NER Model on extracted text
139
+ text_for_ner = ""
140
+ for path, text in extracted_text.items():
141
+ text_for_ner += str(text) + " "
142
+
143
+ LLMdata = NER_Model(text_for_ner)
144
+ LLMdata['meta'] = "Backup: Local NER"
145
  logging.info(f"NER model data: {LLMdata}")
146
  except Exception as backup_e:
147
+ logging.exception(f"Backup processing failed: {backup_e}")
148
+ flash('Processing failed completely.')
149
  return redirect(url_for('index'))
150
 
151
+ # Common merge and finalization step
152
+ try:
153
+ cont_data = process_extracted_text(extracted_text)
154
+ processed_data = process_resume_data(LLMdata, cont_data, extracted_text)
155
+
156
+ session['processed_data'] = processed_data
157
+ session['processed_Img'] = processed_Img
158
+
159
+ logging.info(f"Final processed data: {processed_data}")
160
+ return redirect(url_for('result'))
161
+ except Exception as merge_e:
162
+ logging.exception(f"Error during data merging: {merge_e}")
163
+ flash('Error finalizing data extraction.')
164
+ return redirect(url_for('index'))
165
 
166
 
167
  @app.route('/result')