import io import os import pandas as pd from PyPDF2 import PdfReader def read_pdf(file_bytes: bytes) -> str: """ Extracts text from a PDF file provided as bytes. """ try: reader = PdfReader(io.BytesIO(file_bytes)) text = "" for page in reader.pages: text += page.extract_text() or "" return text.strip() except Exception as e: return f"[ERROR] Failed to read PDF: {e}" def read_csv(file_bytes: bytes) -> str: """ Reads the first few rows of a CSV file provided as bytes. """ try: df = pd.read_csv(io.BytesIO(file_bytes)) return df.head(10).to_string(index=False) except Exception as e: return f"[ERROR] Failed to read CSV: {e}" def read_txt(file_bytes: bytes) -> str: """ Reads a plain text file provided as bytes. """ try: return file_bytes.decode('utf-8').strip() except UnicodeDecodeError: return file_bytes.decode('latin1', errors='ignore').strip() except Exception as e: return f"[ERROR] Failed to read TXT: {e}" def load_file_if_any(file_path: str) -> str: """ Loads and reads file content based on file extension. Supports .pdf, .csv, .txt files. Returns "" if no file. """ if not file_path or not os.path.exists(file_path): return "" try: with open(file_path, "rb") as f: file_bytes = f.read() if file_path.endswith(".pdf"): return read_pdf(file_bytes) elif file_path.endswith(".csv"): return read_csv(file_bytes) elif file_path.endswith(".txt"): return read_txt(file_bytes) else: return "[WARNING] Unsupported file format." except Exception as e: return f"[ERROR] Failed to load file: {e}"