Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -80,148 +80,88 @@ def upload_file():
|
|
| 80 |
logging.info(f"Files successfully uploaded: {uploaded_files}")
|
| 81 |
return process_file()
|
| 82 |
|
| 83 |
-
@app.route('/remove_file',methods=['POST'])
|
| 84 |
-
def remove_file():
|
| 85 |
-
uploaded_files = session.get('uploaded_files', [])
|
| 86 |
-
if uploaded_file:
|
| 87 |
-
for filename in uploaded_files:
|
| 88 |
-
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
| 89 |
-
if os.path.exists(file_path):
|
| 90 |
-
os.remove(file_path)
|
| 91 |
-
logging.info(f"Removed file: {filename}")
|
| 92 |
-
else:
|
| 93 |
-
logging.warning(f"File not found for removal: {file_path}") # More specific log
|
| 94 |
-
|
| 95 |
-
session.pop('uploaded_files', None)
|
| 96 |
-
flash('Files successfully removed')
|
| 97 |
-
logging.info("All uploaded files removed")
|
| 98 |
-
else:
|
| 99 |
-
flash('No file to remove.')
|
| 100 |
-
logging.warning("File not found for removal")
|
| 101 |
-
return redirect(url_for('index'))
|
| 102 |
-
|
| 103 |
@app.route('/reset_upload')
|
| 104 |
def reset_upload():
|
| 105 |
-
"""Reset the uploaded
|
| 106 |
uploaded_files = session.get('uploaded_files', [])
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
flash('No file to remove.')
|
| 121 |
-
logging.warning("File not found for removal")
|
| 122 |
return redirect(url_for('index'))
|
| 123 |
|
| 124 |
-
# @app.route('/process', methods=['GET','POST'])
|
| 125 |
-
# def process_file():
|
| 126 |
-
# uploaded_files = session.get('uploaded_files', [])
|
| 127 |
-
# if not uploaded_files:
|
| 128 |
-
# flash('No files selected for processing')
|
| 129 |
-
# logging.warning("No files selected for processing")
|
| 130 |
-
# return redirect(url_for('index'))
|
| 131 |
-
|
| 132 |
-
# file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
|
| 133 |
-
# logging.info(f"Processing files: {file_paths}")
|
| 134 |
-
|
| 135 |
-
# extracted_text = {}
|
| 136 |
-
# processed_Img = {}
|
| 137 |
-
|
| 138 |
-
# try:
|
| 139 |
-
# extracted_text, processed_Img = extract_text_from_images(file_paths)
|
| 140 |
-
# logging.info(f"Extracted text: {extracted_text}")
|
| 141 |
-
# logging.info(f"Processed images: {processed_Img}")
|
| 142 |
-
|
| 143 |
-
# llmText = json_to_llm_str(extracted_text)
|
| 144 |
-
# logging.info(f"LLM text: {llmText}")
|
| 145 |
-
|
| 146 |
-
# LLMdata = Data_Extractor(llmText)
|
| 147 |
-
# print("llm data--------->",llmText)
|
| 148 |
-
# logging.info(f"LLM data: {LLMdata}")
|
| 149 |
-
|
| 150 |
-
# except Exception as e:
|
| 151 |
-
# logging.error(f"Error during LLM processing: {e}")
|
| 152 |
-
# logging.info("Running backup model...")
|
| 153 |
@app.route('/process', methods=['GET', 'POST'])
|
| 154 |
def process_file():
|
| 155 |
uploaded_files = session.get('uploaded_files', [])
|
| 156 |
if not uploaded_files:
|
| 157 |
flash('No files selected for processing')
|
| 158 |
-
logging.warning("No files selected for processing")
|
| 159 |
return redirect(url_for('index'))
|
| 160 |
|
| 161 |
file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
|
| 162 |
logging.info(f"Processing files: {file_paths}")
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
try:
|
| 165 |
-
#
|
| 166 |
LLMdata, extracted_text, processed_Img = extract_text_from_images(file_paths)
|
| 167 |
-
LLMdata['meta'] = "Primary: Groq VLM
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
logging.info(f"Processed images: {processed_Img}")
|
| 172 |
-
|
| 173 |
-
# If LLMdata is essentially empty (all values are empty lists), we might want to try backup
|
| 174 |
-
is_empty = all(len(v) == 0 for k, v in LLMdata.items() if k != 'extracted_text')
|
| 175 |
|
| 176 |
if is_empty:
|
| 177 |
-
logging.info("Groq VLM returned empty data.
|
| 178 |
raise ValueError("Empty data from Groq VLM")
|
| 179 |
|
| 180 |
-
# Regex fallback / augmentation from model text
|
| 181 |
-
cont_data = process_extracted_text(extracted_text)
|
| 182 |
-
logging.info(f"Contextual data: {cont_data}")
|
| 183 |
-
|
| 184 |
-
processed_data = process_resume_data(LLMdata, cont_data, extracted_text)
|
| 185 |
-
logging.info(f"Processed data: {processed_data}")
|
| 186 |
-
|
| 187 |
-
session['processed_data'] = processed_data
|
| 188 |
-
session['processed_Img'] = processed_Img
|
| 189 |
-
|
| 190 |
-
flash('Data processed and analyzed successfully')
|
| 191 |
-
return redirect(url_for('result'))
|
| 192 |
-
|
| 193 |
except Exception as e:
|
| 194 |
-
logging.exception(f"
|
| 195 |
-
flash('Primary processing failed,
|
| 196 |
|
| 197 |
-
#
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
if 'extracted_text' not in locals() or not extracted_text:
|
| 201 |
-
flash('Critical failure: Could not extract text from image.')
|
| 202 |
return redirect(url_for('index'))
|
| 203 |
|
| 204 |
-
LLMdata = {}
|
| 205 |
try:
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
logging.info(f"NER model data: {LLMdata}")
|
| 210 |
except Exception as backup_e:
|
| 211 |
-
logging.exception(f"
|
| 212 |
-
flash('
|
| 213 |
return redirect(url_for('index'))
|
| 214 |
|
| 215 |
-
#
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
|
| 227 |
@app.route('/result')
|
|
|
|
| 80 |
logging.info(f"Files successfully uploaded: {uploaded_files}")
|
| 81 |
return process_file()
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
@app.route('/reset_upload')
|
| 84 |
def reset_upload():
|
| 85 |
+
"""Reset the uploaded files and processed data in the session and filesystem."""
|
| 86 |
uploaded_files = session.get('uploaded_files', [])
|
| 87 |
+
for filename in uploaded_files:
|
| 88 |
+
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
| 89 |
+
if os.path.exists(file_path):
|
| 90 |
+
os.remove(file_path)
|
| 91 |
+
logging.info(f"Removed file: {filename}")
|
| 92 |
+
else:
|
| 93 |
+
logging.warning(f"File not found for removal: {file_path}")
|
| 94 |
+
|
| 95 |
+
session.pop('uploaded_files', None)
|
| 96 |
+
session.pop('processed_data', None)
|
| 97 |
+
session.pop('processed_Img', None)
|
| 98 |
+
flash('Reset successful. All files removed.')
|
| 99 |
+
logging.info("Session and upload folder reset.")
|
|
|
|
|
|
|
| 100 |
return redirect(url_for('index'))
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
@app.route('/process', methods=['GET', 'POST'])
|
| 103 |
def process_file():
|
| 104 |
uploaded_files = session.get('uploaded_files', [])
|
| 105 |
if not uploaded_files:
|
| 106 |
flash('No files selected for processing')
|
|
|
|
| 107 |
return redirect(url_for('index'))
|
| 108 |
|
| 109 |
file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
|
| 110 |
logging.info(f"Processing files: {file_paths}")
|
| 111 |
|
| 112 |
+
extracted_text = {}
|
| 113 |
+
LLMdata = {}
|
| 114 |
+
processed_Img = {}
|
| 115 |
+
|
| 116 |
try:
|
| 117 |
+
# Primary: Groq VLM Single Pass
|
| 118 |
LLMdata, extracted_text, processed_Img = extract_text_from_images(file_paths)
|
| 119 |
+
LLMdata['meta'] = "Primary: Groq VLM"
|
| 120 |
|
| 121 |
+
# Check if extracted data is essentially empty
|
| 122 |
+
is_empty = all(len(v) == 0 for k, v in LLMdata.items() if k != 'meta')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
if is_empty:
|
| 125 |
+
logging.info("Groq VLM returned empty data. Attempting backup...")
|
| 126 |
raise ValueError("Empty data from Groq VLM")
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
except Exception as e:
|
| 129 |
+
logging.exception(f"Primary processing failed or returned empty: {e}")
|
| 130 |
+
flash('Primary processing failed, using backup model...')
|
| 131 |
|
| 132 |
+
# If extraction failed but we have some text/images, use them
|
| 133 |
+
if not extracted_text:
|
| 134 |
+
flash('Critical failure: Could not extract text from images.')
|
|
|
|
|
|
|
| 135 |
return redirect(url_for('index'))
|
| 136 |
|
|
|
|
| 137 |
try:
|
| 138 |
+
# Backup: NER Model on extracted text
|
| 139 |
+
text_for_ner = ""
|
| 140 |
+
for path, text in extracted_text.items():
|
| 141 |
+
text_for_ner += str(text) + " "
|
| 142 |
+
|
| 143 |
+
LLMdata = NER_Model(text_for_ner)
|
| 144 |
+
LLMdata['meta'] = "Backup: Local NER"
|
| 145 |
logging.info(f"NER model data: {LLMdata}")
|
| 146 |
except Exception as backup_e:
|
| 147 |
+
logging.exception(f"Backup processing failed: {backup_e}")
|
| 148 |
+
flash('Processing failed completely.')
|
| 149 |
return redirect(url_for('index'))
|
| 150 |
|
| 151 |
+
# Common merge and finalization step
|
| 152 |
+
try:
|
| 153 |
+
cont_data = process_extracted_text(extracted_text)
|
| 154 |
+
processed_data = process_resume_data(LLMdata, cont_data, extracted_text)
|
| 155 |
+
|
| 156 |
+
session['processed_data'] = processed_data
|
| 157 |
+
session['processed_Img'] = processed_Img
|
| 158 |
+
|
| 159 |
+
logging.info(f"Final processed data: {processed_data}")
|
| 160 |
+
return redirect(url_for('result'))
|
| 161 |
+
except Exception as merge_e:
|
| 162 |
+
logging.exception(f"Error during data merging: {merge_e}")
|
| 163 |
+
flash('Error finalizing data extraction.')
|
| 164 |
+
return redirect(url_for('index'))
|
| 165 |
|
| 166 |
|
| 167 |
@app.route('/result')
|