from fastapi import FastAPI, UploadFile, File, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, HTMLResponse import tempfile import os import sys import base64 import uvicorn import pandas as pd from io import BytesIO sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from src.parser import TaskParser from src.excel_exporter import ExcelExporter from src.summarizer import TaskSummarizer from src.google_sheets import GoogleSheetsExporter from src.google_calendar import GoogleCalendarExporter import json GOOGLE_CREDS = os.environ.get("GOOGLE_CREDENTIALS") if GOOGLE_CREDS: os.makedirs("credentials", exist_ok=True) with open("credentials/google-credentials.json", "w") as f: f.write(GOOGLE_CREDS) print("✅ Google-ключ загружен из секретов") app = FastAPI(title="PDF Task Parser API") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) from fastapi.staticfiles import StaticFiles app.mount("/web", StaticFiles(directory="web"), name="web") TEMP_DIR = "temp_uploads" os.makedirs(TEMP_DIR, exist_ok=True) summarizer = None def get_summarizer(): global summarizer if summarizer is None: try: print("🔄 Загрузка суммаризатора...") summarizer = TaskSummarizer() print("✅ Суммаризатор загружен") except Exception as e: print(f"⚠️ Суммаризатор не загружен: {e}") summarizer = False return summarizer if summarizer is not False else None @app.get("/") async def root(): return {"message": "PDF Task Parser API", "status": "running"} @app.get("/app") async def get_app(): html_path = os.path.join("web", "index.html") if os.path.exists(html_path): with open(html_path, "r", encoding="utf-8") as f: return HTMLResponse(content=f.read()) return HTMLResponse(content="

index.html not found

", status_code=404) @app.post("/parse-batch") async def parse_batch(request: Request): print("\n" + "="*60) print("🔍 ПОЛУЧЕН ЗАПРОС НА ПАРСИНГ") print("="*60) form = await request.form() files = form.getlist("files") export_to_sheets = form.get("export_to_sheets", "false").lower() == "true" export_to_calendar = form.get("export_to_calendar", "false").lower() == "true" sheets_url = form.get("sheets_url", "") calendar_id = form.get("calendar_id", "") print(f"📄 Файлов: {len(files)}") print(f"📊 Экспорт в Sheets: {export_to_sheets}") print(f"📅 Экспорт в Calendar: {export_to_calendar}") print(f"🔗 URL Sheets: {sheets_url}") print(f"📆 ID Calendar: {calendar_id}") print("="*60 + "\n") all_results = [] all_tasks_data = [] sheets_export_status = None calendar_export_status = None all_dfs = [] for file in files: file_ext = os.path.splitext(file.filename)[1].lower() with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp: content = await file.read() tmp.write(content) tmp_path = tmp.name try: parser = TaskParser(tmp_path) text = parser.extract_text() tasks = parser.parse_tasks(text) if tasks: summarizer = get_summarizer() for task in tasks: task['source'] = file.filename if summarizer: try: task['summary'] = summarizer.summarize(task['full_description']) except Exception: task['summary'] = task['full_description'][:100] + "..." else: task['summary'] = task['full_description'][:100] + "..." all_tasks_data.append(task) df_data = [] for task in tasks: df_data.append({ '№': task['number'], 'Краткое описание': task.get('summary', ''), 'Описание': task['full_description'], 'Ответственный': task.get('responsible', ''), 'Срок': task.get('due_date_str', '') }) df = pd.DataFrame(df_data) all_dfs.append({ "df": df, "filename": file.filename, "tasks": tasks }) all_results.append({ "filename": file.filename, "tasks": tasks, "count": len(tasks) }) os.remove(tmp_path) except Exception as e: print(f"Ошибка в {file.filename}: {e}") if os.path.exists(tmp_path): os.remove(tmp_path) if not all_tasks_data: return JSONResponse({ "success": False, "error": "Задачи не найдены ни в одном файле" }) # ===== ЭКСПОРТ В GOOGLE SHEETS ===== if export_to_sheets and sheets_url: try: print("📊 Экспорт в Google Sheets...") sheets_exporter = GoogleSheetsExporter() if sheets_exporter.use_existing_spreadsheet(sheets_url): for item in all_dfs: sheet_name = os.path.splitext(item['filename'])[0][:30] sheet_name = sheet_name.replace(' ', '_').replace('/', '_') sheets_exporter.export_dataframe(item['df'], sheet_name) sheets_export_status = "success" print("✅ Экспорт в Google Sheets выполнен") else: sheets_export_status = "error: таблица не найдена" print("❌ Таблица не найдена") except Exception as e: sheets_export_status = f"error: {str(e)}" print(f"❌ Ошибка Sheets: {e}") # ===== ЭКСПОРТ В GOOGLE CALENDAR ===== if export_to_calendar and calendar_id: try: print(f"📅 Экспорт в Google Calendar...") print(f" ID календаря: {calendar_id}") print(f" Количество задач: {len(all_tasks_data)}") calendar_exporter = GoogleCalendarExporter(calendar_id=calendar_id) calendar_exporter.create_events_from_tasks(all_tasks_data) calendar_export_status = "success" print("✅ Экспорт в Google Calendar выполнен") except Exception as e: calendar_export_status = f"error: {str(e)}" print(f"❌ Ошибка Calendar: {e}") else: print(f"⚠️ Экспорт в Calendar пропущен: export_to_calendar={export_to_calendar}, calendar_id={calendar_id}") # ===== СОЗДАЁМ EXCEL ===== exporter = ExcelExporter() for item in all_dfs: sheet_name = os.path.splitext(item['filename'])[0][:30] sheet_name = sheet_name.replace(' ', '_').replace('/', '_').replace('\\', '_') exporter.add_sheet(item['df'], sheet_name) all_df_data = [] for task in all_tasks_data: all_df_data.append({ 'Источник': task.get('source', ''), '№': task['number'], 'Краткое описание': task.get('summary', ''), 'Описание': task['full_description'], 'Ответственный': task.get('responsible', ''), 'Срок': task.get('due_date_str', '') }) all_df = pd.DataFrame(all_df_data) exporter.add_sheet(all_df, "Все задачи") excel_buffer = BytesIO() exporter.save_to_buffer(excel_buffer) excel_bytes = excel_buffer.getvalue() excel_base64 = base64.b64encode(excel_bytes).decode('ascii') total_stats = { "total": len(all_tasks_data), "with_responsible": sum(1 for t in all_tasks_data if t.get('responsible')), "with_date": sum(1 for t in all_tasks_data if t.get('due_date_str')), "files_count": len(all_results) } return { "success": True, "tasks": all_tasks_data, "statistics": total_stats, "excel_base64": excel_base64, "files": [{"name": r["filename"], "count": r["count"]} for r in all_results], "sheets_export": sheets_export_status, "calendar_export": calendar_export_status } if __name__ == "__main__": print("🚀 Запуск PDF Task Parser API") uvicorn.run(app, host="0.0.0.0", port=8000)