"""Document management API endpoints.""" from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File from sqlalchemy.ext.asyncio import AsyncSession from src.db.postgres.connection import get_db from src.document.document_service import document_service from src.middlewares.logging import get_logger, log_execution from src.middlewares.rate_limit import limiter from src.pipeline.document_pipeline.document_pipeline import document_pipeline from pydantic import BaseModel from typing import List logger = get_logger("document_api") router = APIRouter(prefix="/api/v1", tags=["Documents"]) class DocumentResponse(BaseModel): id: str filename: str status: str file_size: int file_type: str created_at: str # NOTE: Keep in sync with SUPPORTED_FILE_TYPES in src/pipeline/document_pipeline/document_pipeline.py _DOC_TYPES = [ {"doc_type": "pdf", "max_size": 10, "status": "active", "message": None}, {"doc_type": "docx", "max_size": 10, "status": "active", "message": None}, {"doc_type": "txt", "max_size": 10, "status": "active", "message": None}, {"doc_type": "csv", "max_size": 10, "status": "active", "message": None}, {"doc_type": "xlsx", "max_size": 10, "status": "active", "message": None}, ] @router.get( "/documents/doctypes", summary="List supported document types", response_description="All document types supported by DataEyond with their size limits and status.", ) @log_execution(logger) async def get_document_types(): """Return every document type DataEyond can process, with max file size and active/inactive status.""" return {"status": "success", "data": _DOC_TYPES} @router.get("/documents/{user_id}", response_model=List[DocumentResponse]) @log_execution(logger) async def list_documents( user_id: str, db: AsyncSession = Depends(get_db) ): """List all documents for a user.""" documents = await document_service.get_user_documents(db, user_id) return [ DocumentResponse( id=doc.id, filename=doc.filename, status=doc.status, file_size=doc.file_size or 0, file_type=doc.file_type, created_at=doc.created_at.isoformat() ) for doc in documents ] @router.post("/document/upload") @limiter.limit("10/minute") @log_execution(logger) async def upload_document( request: Request, file: UploadFile = File(...), user_id: str = None, db: AsyncSession = Depends(get_db) ): """Upload a document.""" if not user_id: raise HTTPException(status_code=400, detail="user_id is required") data = await document_pipeline.upload(file, user_id, db) return {"status": "success", "message": "Document uploaded successfully", "data": data} @router.delete("/document/delete") @log_execution(logger) async def delete_document( document_id: str, user_id: str, db: AsyncSession = Depends(get_db) ): """Delete a document.""" await document_pipeline.delete(document_id, user_id, db) return {"status": "success", "message": "Document deleted successfully"} @router.post("/document/process") @log_execution(logger) async def process_document( document_id: str, user_id: str, db: AsyncSession = Depends(get_db) ): """Process document and ingest to vector index.""" data = await document_pipeline.process(document_id, user_id, db) return {"status": "success", "message": "Document processed successfully", "data": data}