Final_Assignment_Template / tools /file_loader.py
FD900's picture
Update tools/file_loader.py
5fc87ee verified
raw
history blame
1.82 kB
import io
import os
import pandas as pd
from PyPDF2 import PdfReader
def read_pdf(file_bytes: bytes) -> str:
"""
Extracts text from a PDF file provided as bytes.
"""
try:
reader = PdfReader(io.BytesIO(file_bytes))
text = ""
for page in reader.pages:
text += page.extract_text() or ""
return text.strip()
except Exception as e:
return f"[ERROR] Failed to read PDF: {e}"
def read_csv(file_bytes: bytes) -> str:
"""
Reads the first few rows of a CSV file provided as bytes.
"""
try:
df = pd.read_csv(io.BytesIO(file_bytes))
return df.head(10).to_string(index=False)
except Exception as e:
return f"[ERROR] Failed to read CSV: {e}"
def read_txt(file_bytes: bytes) -> str:
"""
Reads a plain text file provided as bytes.
"""
try:
return file_bytes.decode('utf-8').strip()
except UnicodeDecodeError:
return file_bytes.decode('latin1', errors='ignore').strip()
except Exception as e:
return f"[ERROR] Failed to read TXT: {e}"
def load_file_if_any(file_path: str) -> str:
"""
Loads and reads file content based on file extension.
Supports .pdf, .csv, .txt files. Returns "" if no file.
"""
if not file_path or not os.path.exists(file_path):
return ""
try:
with open(file_path, "rb") as f:
file_bytes = f.read()
if file_path.endswith(".pdf"):
return read_pdf(file_bytes)
elif file_path.endswith(".csv"):
return read_csv(file_bytes)
elif file_path.endswith(".txt"):
return read_txt(file_bytes)
else:
return "[WARNING] Unsupported file format."
except Exception as e:
return f"[ERROR] Failed to load file: {e}"