| import { Providers } from '@librechat/agents'; |
| import { isOpenAILikeProvider, isDocumentSupportedProvider } from 'librechat-data-provider'; |
| import type { IMongoFile } from '@librechat/data-schemas'; |
| import type { |
| AnthropicDocumentBlock, |
| StrategyFunctions, |
| DocumentResult, |
| ServerRequest, |
| } from '~/types'; |
| import { getFileStream, getConfiguredFileSizeLimit } from './utils'; |
| import { validatePdf } from '~/files/validation'; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export async function encodeAndFormatDocuments( |
| req: ServerRequest, |
| files: IMongoFile[], |
| params: { provider: Providers; endpoint?: string; useResponsesApi?: boolean }, |
| getStrategyFunctions: (source: string) => StrategyFunctions, |
| ): Promise<DocumentResult> { |
| const { provider, endpoint, useResponsesApi } = params; |
| if (!files?.length) { |
| return { documents: [], files: [] }; |
| } |
|
|
| const encodingMethods: Record<string, StrategyFunctions> = {}; |
| const result: DocumentResult = { documents: [], files: [] }; |
|
|
| const documentFiles = files.filter( |
| (file) => file.type === 'application/pdf' || file.type?.startsWith('application/'), |
| ); |
|
|
| if (!documentFiles.length) { |
| return result; |
| } |
|
|
| const results = await Promise.allSettled( |
| documentFiles.map((file) => { |
| if (file.type !== 'application/pdf' || !isDocumentSupportedProvider(provider)) { |
| return Promise.resolve(null); |
| } |
| return getFileStream(req, file, encodingMethods, getStrategyFunctions); |
| }), |
| ); |
|
|
| for (const settledResult of results) { |
| if (settledResult.status === 'rejected') { |
| console.error('Document processing failed:', settledResult.reason); |
| continue; |
| } |
|
|
| const processed = settledResult.value; |
| if (!processed) continue; |
|
|
| const { file, content, metadata } = processed; |
|
|
| if (!content || !file) { |
| if (metadata) result.files.push(metadata); |
| continue; |
| } |
|
|
| if (file.type === 'application/pdf' && isDocumentSupportedProvider(provider)) { |
| const pdfBuffer = Buffer.from(content, 'base64'); |
|
|
| |
| const configuredFileSizeLimit = getConfiguredFileSizeLimit(req, { |
| provider, |
| endpoint, |
| }); |
|
|
| const validation = await validatePdf( |
| pdfBuffer, |
| pdfBuffer.length, |
| provider, |
| configuredFileSizeLimit, |
| ); |
|
|
| if (!validation.isValid) { |
| throw new Error(`PDF validation failed: ${validation.error}`); |
| } |
|
|
| if (provider === Providers.ANTHROPIC) { |
| const document: AnthropicDocumentBlock = { |
| type: 'document', |
| source: { |
| type: 'base64', |
| media_type: 'application/pdf', |
| data: content, |
| }, |
| citations: { enabled: true }, |
| }; |
|
|
| if (file.filename) { |
| document.context = `File: "${file.filename}"`; |
| } |
|
|
| result.documents.push(document); |
| } else if (useResponsesApi) { |
| result.documents.push({ |
| type: 'input_file', |
| filename: file.filename, |
| file_data: `data:application/pdf;base64,${content}`, |
| }); |
| } else if (provider === Providers.GOOGLE || provider === Providers.VERTEXAI) { |
| result.documents.push({ |
| type: 'media', |
| mimeType: 'application/pdf', |
| data: content, |
| }); |
| } else if (isOpenAILikeProvider(provider) && provider != Providers.AZURE) { |
| result.documents.push({ |
| type: 'file', |
| file: { |
| filename: file.filename, |
| file_data: `data:application/pdf;base64,${content}`, |
| }, |
| }); |
| } |
| result.files.push(metadata); |
| } |
| } |
|
|
| return result; |
| } |
|
|