Spaces:
Sleeping
Sleeping
| import io | |
| import os | |
| import pandas as pd | |
| from PyPDF2 import PdfReader | |
| def read_pdf(file_bytes: bytes) -> str: | |
| """ | |
| Extracts text from a PDF file provided as bytes. | |
| """ | |
| try: | |
| reader = PdfReader(io.BytesIO(file_bytes)) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() or "" | |
| return text.strip() | |
| except Exception as e: | |
| return f"[ERROR] Failed to read PDF: {e}" | |
| def read_csv(file_bytes: bytes) -> str: | |
| """ | |
| Reads the first few rows of a CSV file provided as bytes. | |
| """ | |
| try: | |
| df = pd.read_csv(io.BytesIO(file_bytes)) | |
| return df.head(10).to_string(index=False) | |
| except Exception as e: | |
| return f"[ERROR] Failed to read CSV: {e}" | |
| def read_txt(file_bytes: bytes) -> str: | |
| """ | |
| Reads a plain text file provided as bytes. | |
| """ | |
| try: | |
| return file_bytes.decode('utf-8').strip() | |
| except UnicodeDecodeError: | |
| return file_bytes.decode('latin1', errors='ignore').strip() | |
| except Exception as e: | |
| return f"[ERROR] Failed to read TXT: {e}" | |
| def load_file_if_any(file_path: str) -> str: | |
| """ | |
| Loads and reads file content based on file extension. | |
| Supports .pdf, .csv, .txt files. Returns "" if no file. | |
| """ | |
| if not file_path or not os.path.exists(file_path): | |
| return "" | |
| try: | |
| with open(file_path, "rb") as f: | |
| file_bytes = f.read() | |
| if file_path.endswith(".pdf"): | |
| return read_pdf(file_bytes) | |
| elif file_path.endswith(".csv"): | |
| return read_csv(file_bytes) | |
| elif file_path.endswith(".txt"): | |
| return read_txt(file_bytes) | |
| else: | |
| return "[WARNING] Unsupported file format." | |
| except Exception as e: | |
| return f"[ERROR] Failed to load file: {e}" |