| import os |
| import uuid |
| import json |
| from flask import Flask, request, jsonify, Response |
| import pytesseract |
| from pdf2image import convert_from_bytes |
| from flask_cors import CORS |
| from lib import ocr_2 as ocr |
| from lib import llm_3_deepinfra as llm |
|
|
| os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/5/tessdata' |
|
|
|
|
|
|
| app = Flask(__name__) |
| CORS(app) |
| UPLOAD_FOLDER = './tmp' |
| app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
|
| |
| @app.route('/recognize', methods=['POST']) |
| def upload_file(): |
| |
| if 'file' not in request.files: |
| return jsonify({'error': 'No file part'}) |
|
|
| file = request.files['file'] |
|
|
| |
| if file.filename == '': |
| return jsonify({'error': 'No selected file'}) |
| if file and file.filename.endswith('.pdf'): |
| |
| |
| filename = str(uuid.uuid4()) + '.pdf' |
|
|
| |
| file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) |
|
|
| |
| temp_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) |
| |
| text = '' |
| |
| |
| |
| |
| |
| docs_info = ocr.processSingleFile(temp_path) |
| |
| |
| os.remove(temp_path) |
| return Response(json.dumps(docs_info, sort_keys=False, ensure_ascii=False), content_type='application/json; charset=utf-8') |
| else: |
| return jsonify({'error': 'File must be a PDF'}) |
|
|
| |
| @app.route('/analize', methods=['POST']) |
| async def analize(): |
| |
| text_data = request.json.get('text') |
| app_info = await llm.getApplicationInfo(text_data) |
| result = { |
| "application": app_info, |
| "debug": {} |
| } |
| return Response(json.dumps(result, sort_keys=False, ensure_ascii=False), content_type='application/json; charset=utf-8') |
| |
| if __name__ == '__main__': |
| app.run(debug=False) |
|
|