File size: 1,810 Bytes
f56a29b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | /**
* PDF parsing result types
* Extended to support advanced features from providers like MinerU
*/
/**
* Parsed PDF content with text and images
*/
export interface ParsedPdfContent {
/** Extracted text content from the PDF */
text: string;
/** Array of images as base64 data URLs */
images: string[];
/** Extracted tables (MinerU feature) */
tables?: Array<{
page: number;
data: string[][];
caption?: string;
}>;
/** Extracted formulas (MinerU feature) */
formulas?: Array<{
page: number;
latex: string;
position?: { x: number; y: number; width: number; height: number };
}>;
/** Layout analysis (MinerU feature) */
layout?: Array<{
page: number;
type: 'title' | 'text' | 'image' | 'table' | 'formula';
content: string;
position?: { x: number; y: number; width: number; height: number };
}>;
/** Metadata about the PDF */
metadata?: {
fileName?: string;
fileSize?: number;
pageCount: number;
parser?: string; // 'unpdf' | 'mineru'
processingTime?: number;
taskId?: string; // MinerU task ID
/** Image ID to base64 URL mapping (used in generation pipeline) */
imageMapping?: Record<string, string>; // e.g., { "img_1": "data:image/png;base64,..." }
/** PdfImage array with page numbers (used in generation pipeline) */
pdfImages?: Array<{
id: string;
src: string;
pageNumber: number;
description?: string;
width?: number;
height?: number;
}>;
[key: string]: unknown;
};
}
/**
* Request parameters for PDF parsing
*/
export interface ParsePdfRequest {
/** PDF file to parse */
pdf: File;
}
/**
* Response from PDF parsing API
*/
export interface ParsePdfResponse {
success: boolean;
data?: ParsedPdfContent;
error?: string;
}
|