Spaces:

muryshev
/

cb-api

Sleeping

App Files Files Community

cb-api / app.py

muryshev

async fixes

c01b75e about 1 year ago

raw

history blame contribute delete

2.26 kB

	import os
	import uuid
	import json
	from flask import Flask, request, jsonify, Response
	import pytesseract
	from pdf2image import convert_from_bytes
	from flask_cors import CORS
	from lib import ocr_2 as ocr
	from lib import llm_3_deepinfra as llm

	os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/5/tessdata'



	app = Flask(__name__)
	CORS(app)
	UPLOAD_FOLDER = './tmp'
	app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

	# Endpoint for uploading PDF and extracting text
	@app.route('/recognize', methods=['POST'])
	def upload_file():
	# Check if the post request has the file part
	if 'file' not in request.files:
	return jsonify({'error': 'No file part'})

	file = request.files['file']

	# Check if the file is a PDF
	if file.filename == '':
	return jsonify({'error': 'No selected file'})
	if file and file.filename.endswith('.pdf'):
	# Convert PDF to images
	# images = convert_from_bytes(file.read())
	filename = str(uuid.uuid4()) + '.pdf'

	# Save the file to the temporary upload directory
	file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))

	# Construct and return the path where the file is saved
	temp_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)

	text = ''
	# for img in images:
	# # Perform OCR on each page
	# text += pytesseract.image_to_string(img, lang='rus')


	docs_info = ocr.processSingleFile(temp_path)


	os.remove(temp_path)
	return Response(json.dumps(docs_info, sort_keys=False, ensure_ascii=False), content_type='application/json; charset=utf-8')
	else:
	return jsonify({'error': 'File must be a PDF'})

	# Endpoint for uploading PDF and extracting text
	@app.route('/analize', methods=['POST'])
	async def analize():
	# Get the text data from the request
	text_data = request.json.get('text')
	app_info = await llm.getApplicationInfo(text_data)
	result = {
	"application": app_info,
	"debug": {}
	}
	return Response(json.dumps(result, sort_keys=False, ensure_ascii=False), content_type='application/json; charset=utf-8')

	if __name__ == '__main__':
	app.run(debug=False)