Spaces:
Sleeping
Sleeping
File size: 1,824 Bytes
daa3bd9 5fc87ee daa3bd9 5fc87ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | import io
import os
import pandas as pd
from PyPDF2 import PdfReader
def read_pdf(file_bytes: bytes) -> str:
"""
Extracts text from a PDF file provided as bytes.
"""
try:
reader = PdfReader(io.BytesIO(file_bytes))
text = ""
for page in reader.pages:
text += page.extract_text() or ""
return text.strip()
except Exception as e:
return f"[ERROR] Failed to read PDF: {e}"
def read_csv(file_bytes: bytes) -> str:
"""
Reads the first few rows of a CSV file provided as bytes.
"""
try:
df = pd.read_csv(io.BytesIO(file_bytes))
return df.head(10).to_string(index=False)
except Exception as e:
return f"[ERROR] Failed to read CSV: {e}"
def read_txt(file_bytes: bytes) -> str:
"""
Reads a plain text file provided as bytes.
"""
try:
return file_bytes.decode('utf-8').strip()
except UnicodeDecodeError:
return file_bytes.decode('latin1', errors='ignore').strip()
except Exception as e:
return f"[ERROR] Failed to read TXT: {e}"
def load_file_if_any(file_path: str) -> str:
"""
Loads and reads file content based on file extension.
Supports .pdf, .csv, .txt files. Returns "" if no file.
"""
if not file_path or not os.path.exists(file_path):
return ""
try:
with open(file_path, "rb") as f:
file_bytes = f.read()
if file_path.endswith(".pdf"):
return read_pdf(file_bytes)
elif file_path.endswith(".csv"):
return read_csv(file_bytes)
elif file_path.endswith(".txt"):
return read_txt(file_bytes)
else:
return "[WARNING] Unsupported file format."
except Exception as e:
return f"[ERROR] Failed to load file: {e}" |