| import io | |
| from fastapi import APIRouter, File, UploadFile | |
| from pypdf import PdfReader | |
| router = APIRouter() | |
| async def upload_document(file: UploadFile = File(...)): | |
| if not file.filename.lower().endswith(".pdf"): | |
| return {"error": "Solo se admiten archivos PDF por ahora."} | |
| try: | |
| content = await file.read() | |
| pdf_file = io.BytesIO(content) | |
| reader = PdfReader(pdf_file) | |
| extracted_text = "" | |
| for page in reader.pages: | |
| extracted_text += page.extract_text() + "\n" | |
| return { | |
| "filename": file.filename, | |
| "text": extracted_text[:100000], # Limit to 100k chars for context | |
| "length": len(extracted_text) | |
| } | |
| except Exception as e: | |
| return {"error": f"Error al procesar el PDF: {str(e)}"} | |