/** * PDF parsing result types * Extended to support advanced features from providers like MinerU */ /** * Parsed PDF content with text and images */ export interface ParsedPdfContent { /** Extracted text content from the PDF */ text: string; /** Array of images as base64 data URLs */ images: string[]; /** Extracted tables (MinerU feature) */ tables?: Array<{ page: number; data: string[][]; caption?: string; }>; /** Extracted formulas (MinerU feature) */ formulas?: Array<{ page: number; latex: string; position?: { x: number; y: number; width: number; height: number }; }>; /** Layout analysis (MinerU feature) */ layout?: Array<{ page: number; type: 'title' | 'text' | 'image' | 'table' | 'formula'; content: string; position?: { x: number; y: number; width: number; height: number }; }>; /** Metadata about the PDF */ metadata?: { fileName?: string; fileSize?: number; pageCount: number; parser?: string; // 'unpdf' | 'mineru' processingTime?: number; taskId?: string; // MinerU task ID /** Image ID to base64 URL mapping (used in generation pipeline) */ imageMapping?: Record; // e.g., { "img_1": "data:image/png;base64,..." } /** PdfImage array with page numbers (used in generation pipeline) */ pdfImages?: Array<{ id: string; src: string; pageNumber: number; description?: string; width?: number; height?: number; }>; [key: string]: unknown; }; } /** * Request parameters for PDF parsing */ export interface ParsePdfRequest { /** PDF file to parse */ pdf: File; } /** * Response from PDF parsing API */ export interface ParsePdfResponse { success: boolean; data?: ParsedPdfContent; error?: string; }