| import { useState, useCallback, useRef } from 'react'; |
| import { FileProcessingState, ProcessingFile, ProcessedFile, FileMetadata } from '../types'; |
|
|
| interface FileConfig { |
| maxFileSize?: number; |
| supportedExtensions?: string[]; |
| processingQueueSize?: number; |
| } |
|
|
| interface FileValidationResult { |
| isValid: boolean; |
| errors: string[]; |
| warnings: string[]; |
| } |
|
|
| interface ProcessingOptions { |
| extractMetadata?: boolean; |
| performContentAnalysis?: boolean; |
| generateSemanticTags?: boolean; |
| enableSecurityScanning?: boolean; |
| } |
|
|
| export const useFileProcessing = (config: FileConfig = {}) => { |
| const [fileProcessing, setFileProcessing] = useState<FileProcessingState>({ |
| queue: [], |
| processed: [], |
| categories: { |
| code: { count: 0, types: ['tsx', 'ts', 'jsx', 'js', 'py', 'cpp', 'html', 'css'], totalSize: 0, lastUpdated: new Date() }, |
| documents: { count: 0, types: ['md', 'pdf', 'docx', 'txt', 'rtf'], totalSize: 0, lastUpdated: new Date() }, |
| data: { count: 0, types: ['json', 'csv', 'xml', 'yaml', 'sql'], totalSize: 0, lastUpdated: new Date() }, |
| multimedia: { count: 0, types: ['png', 'jpg', 'gif', 'mp4', 'wav', 'mp3'], totalSize: 0, lastUpdated: new Date() }, |
| archives: { count: 0, types: ['zip', 'tar', 'gz', 'rar'], totalSize: 0, lastUpdated: new Date() }, |
| executables: { count: 0, types: ['exe', 'dll', 'so', 'dylib'], totalSize: 0, lastUpdated: new Date() } |
| }, |
| locations: new Map(), |
| encoding: new Map() |
| }); |
|
|
| const processingRateRef = useRef(1.0); |
| const isProcessingRef = useRef(false); |
|
|
| |
| const validateFile = useCallback(async (file: File): Promise<FileValidationResult> => { |
| const errors: string[] = []; |
| const warnings: string[] = []; |
| |
| |
| const maxSize = config.maxFileSize || 50 * 1024 * 1024; |
| if (file.size > maxSize) { |
| errors.push(`File size (${formatFileSize(file.size)}) exceeds maximum allowed size (${formatFileSize(maxSize)})`); |
| } |
| |
| |
| const extension = getFileExtension(file.name); |
| const supportedExtensions = config.supportedExtensions || [ |
| 'tsx', 'ts', 'jsx', 'js', 'py', 'cpp', 'html', 'css', 'md', 'pdf', |
| 'json', 'csv', 'xml', 'yaml', 'png', 'jpg', 'mp4', 'wav' |
| ]; |
| |
| if (!supportedExtensions.includes(extension)) { |
| warnings.push(`File extension '${extension}' is not in the supported list`); |
| } |
| |
| |
| const dangerousExtensions = ['exe', 'bat', 'cmd', 'scr', 'vbs', 'js', 'jar']; |
| if (dangerousExtensions.includes(extension)) { |
| warnings.push(`File type '${extension}' may pose security risks`); |
| } |
| |
| |
| if (file.type && !isValidMimeType(file.type, extension)) { |
| warnings.push(`MIME type '${file.type}' doesn't match file extension '${extension}'`); |
| } |
| |
| return { |
| isValid: errors.length === 0, |
| errors, |
| warnings |
| }; |
| }, [config]); |
|
|
| |
| const addFileToQueue = useCallback(async (file: ProcessingFile) => { |
| setFileProcessing(prev => { |
| const queueSize = config.processingQueueSize || 100; |
| if (prev.queue.length >= queueSize) { |
| console.warn('Processing queue is full, removing oldest item'); |
| return { |
| ...prev, |
| queue: [...prev.queue.slice(1), file] |
| }; |
| } |
| |
| return { |
| ...prev, |
| queue: [...prev.queue, file] |
| }; |
| }); |
| }, [config]); |
|
|
| |
| const processNextFile = useCallback(async (): Promise<ProcessedFile | null> => { |
| if (isProcessingRef.current) return null; |
| |
| const nextFile = fileProcessing.queue[0]; |
| if (!nextFile) return null; |
| |
| isProcessingRef.current = true; |
| |
| try { |
| |
| setFileProcessing(prev => ({ |
| ...prev, |
| queue: prev.queue.map(f => |
| f.id === nextFile.id ? { ...f, status: 'processing' } : f |
| ) |
| })); |
| |
| |
| const processingTime = Math.min(5000, nextFile.size / 1000 / processingRateRef.current); |
| await new Promise(resolve => setTimeout(resolve, processingTime)); |
| |
| |
| const processedFile: ProcessedFile = { |
| ...nextFile, |
| status: 'completed', |
| processedAt: new Date(), |
| metadata: await generateFileMetadata(nextFile), |
| content: await extractFileContent(nextFile) |
| }; |
| |
| |
| setFileProcessing(prev => { |
| const newState = { |
| ...prev, |
| queue: prev.queue.filter(f => f.id !== nextFile.id), |
| processed: [...prev.processed, processedFile] |
| }; |
| |
| |
| const category = determineFileCategory(nextFile.type); |
| if (category && newState.categories[category]) { |
| newState.categories[category].count++; |
| newState.categories[category].totalSize += nextFile.size; |
| newState.categories[category].lastUpdated = new Date(); |
| } |
| |
| return newState; |
| }); |
| |
| return processedFile; |
| |
| } catch (error) { |
| console.error('File processing failed:', error); |
| |
| |
| setFileProcessing(prev => ({ |
| ...prev, |
| queue: prev.queue.map(f => |
| f.id === nextFile.id |
| ? { ...f, status: 'error' } |
| : f |
| ) |
| })); |
| |
| return null; |
| } finally { |
| isProcessingRef.current = false; |
| } |
| }, [fileProcessing.queue]); |
|
|
| |
| const generateFileMetadata = async (file: ProcessingFile): Promise<FileMetadata> => { |
| const now = new Date(); |
| |
| return { |
| size: file.size, |
| createdAt: now, |
| modifiedAt: now, |
| encoding: detectEncoding(file.name), |
| checksum: await calculateChecksum(file.name), |
| contentType: file.type, |
| extractedText: await extractTextContent(file), |
| semanticTags: await generateSemanticTags(file) |
| }; |
| }; |
|
|
| |
| const extractTextContent = async (file: ProcessingFile): Promise<string | undefined> => { |
| const extension = getFileExtension(file.name); |
| |
| |
| |
| const textExtensions = ['txt', 'md', 'js', 'ts', 'tsx', 'jsx', 'py', 'html', 'css', 'json', 'xml', 'yaml']; |
| |
| if (textExtensions.includes(extension)) { |
| return `Extracted text content from ${file.name}. This would contain the actual file content in a real implementation.`; |
| } |
| |
| return undefined; |
| }; |
|
|
| |
| const generateSemanticTags = async (file: ProcessingFile): Promise<string[]> => { |
| const tags: string[] = []; |
| const extension = getFileExtension(file.name); |
| const fileName = file.name.toLowerCase(); |
| |
| |
| const category = determineFileCategory(file.type); |
| if (category) { |
| tags.push(category); |
| } |
| |
| |
| tags.push(extension); |
| |
| |
| if (file.size < 1024) tags.push('small'); |
| else if (file.size < 1024 * 1024) tags.push('medium'); |
| else tags.push('large'); |
| |
| |
| if (fileName.includes('test')) tags.push('testing'); |
| if (fileName.includes('config')) tags.push('configuration'); |
| if (fileName.includes('api')) tags.push('api'); |
| if (fileName.includes('component')) tags.push('component'); |
| if (fileName.includes('service')) tags.push('service'); |
| if (fileName.includes('util')) tags.push('utility'); |
| if (fileName.includes('doc')) tags.push('documentation'); |
| |
| |
| const codeExtensions = ['js', 'ts', 'tsx', 'jsx', 'py', 'cpp', 'java', 'go', 'rs']; |
| if (codeExtensions.includes(extension)) { |
| tags.push('source-code', 'programming'); |
| } |
| |
| return tags; |
| }; |
|
|
| |
| const extractFileContent = async (file: ProcessingFile): Promise<any> => { |
| |
| |
| return { |
| fileName: file.name, |
| fileType: file.type, |
| size: file.size, |
| extension: getFileExtension(file.name), |
| category: determineFileCategory(file.type), |
| processedAt: new Date().toISOString() |
| }; |
| }; |
|
|
| |
| const getFileExtension = (fileName: string): string => { |
| return fileName.split('.').pop()?.toLowerCase() || ''; |
| }; |
|
|
| const formatFileSize = (bytes: number): string => { |
| const units = ['B', 'KB', 'MB', 'GB']; |
| let size = bytes; |
| let unitIndex = 0; |
| |
| while (size >= 1024 && unitIndex < units.length - 1) { |
| size /= 1024; |
| unitIndex++; |
| } |
| |
| return `${size.toFixed(1)} ${units[unitIndex]}`; |
| }; |
|
|
| const isValidMimeType = (mimeType: string, extension: string): boolean => { |
| const mimeMap: Record<string, string[]> = { |
| 'text/plain': ['txt', 'md'], |
| 'application/json': ['json'], |
| 'text/html': ['html', 'htm'], |
| 'text/css': ['css'], |
| 'application/javascript': ['js'], |
| 'image/png': ['png'], |
| 'image/jpeg': ['jpg', 'jpeg'], |
| 'application/pdf': ['pdf'] |
| }; |
| |
| return mimeMap[mimeType]?.includes(extension) || false; |
| }; |
|
|
| const detectEncoding = (fileName: string): string => { |
| |
| const extension = getFileExtension(fileName); |
| const textExtensions = ['txt', 'md', 'js', 'ts', 'tsx', 'jsx', 'html', 'css', 'json', 'xml']; |
| |
| return textExtensions.includes(extension) ? 'utf-8' : 'binary'; |
| }; |
|
|
| const calculateChecksum = async (fileName: string): Promise<string> => { |
| |
| return `checksum_${fileName.length}_${Date.now()}`; |
| }; |
|
|
| const determineFileCategory = (mimeType: string): keyof typeof fileProcessing.categories | null => { |
| if (mimeType.startsWith('text/') || mimeType.includes('javascript') || mimeType.includes('typescript')) { |
| return 'code'; |
| } |
| if (mimeType.includes('document') || mimeType.includes('pdf') || mimeType.includes('text')) { |
| return 'documents'; |
| } |
| if (mimeType.includes('json') || mimeType.includes('xml') || mimeType.includes('csv')) { |
| return 'data'; |
| } |
| if (mimeType.startsWith('image/') || mimeType.startsWith('video/') || mimeType.startsWith('audio/')) { |
| return 'multimedia'; |
| } |
| if (mimeType.includes('zip') || mimeType.includes('tar') || mimeType.includes('compressed')) { |
| return 'archives'; |
| } |
| if (mimeType.includes('executable') || mimeType.includes('application/x-')) { |
| return 'executables'; |
| } |
| |
| return null; |
| }; |
|
|
| |
| const getProcessingStats = useCallback(() => { |
| const totalProcessed = fileProcessing.processed.length; |
| const totalSize = fileProcessing.processed.reduce((sum, file) => sum + file.size, 0); |
| const averageProcessingTime = totalProcessed > 0 |
| ? fileProcessing.processed.reduce((sum, file) => { |
| const processingTime = file.processedAt.getTime() - new Date(file.processedAt).getTime(); |
| return sum + processingTime; |
| }, 0) / totalProcessed |
| : 0; |
| |
| return { |
| queueLength: fileProcessing.queue.length, |
| totalProcessed, |
| totalSize: formatFileSize(totalSize), |
| averageProcessingTime, |
| categories: fileProcessing.categories, |
| processingRate: processingRateRef.current |
| }; |
| }, [fileProcessing]); |
|
|
| |
| const clearProcessedFiles = useCallback(() => { |
| setFileProcessing(prev => ({ |
| ...prev, |
| processed: [] |
| })); |
| }, []); |
|
|
| |
| const adjustProcessingRate = useCallback((rate: number) => { |
| processingRateRef.current = Math.max(0.1, Math.min(2.0, rate)); |
| }, []); |
|
|
| return { |
| fileProcessing, |
| validateFile, |
| addFileToQueue, |
| processNextFile, |
| getProcessingStats, |
| clearProcessedFiles, |
| adjustProcessingRate |
| }; |
| }; |
|
|
|
|