Final_Assignment_Template

Sleeping

Update tools/file_loader.py

5fc87ee verified 10 months ago

1.82 kB

	import io
	import os
	import pandas as pd
	from PyPDF2 import PdfReader

	def read_pdf(file_bytes: bytes) -> str:
	"""
	Extracts text from a PDF file provided as bytes.
	"""
	try:
	reader = PdfReader(io.BytesIO(file_bytes))
	text = ""
	for page in reader.pages:
	text += page.extract_text() or ""
	return text.strip()
	except Exception as e:
	return f"[ERROR] Failed to read PDF: {e}"

	def read_csv(file_bytes: bytes) -> str:
	"""
	Reads the first few rows of a CSV file provided as bytes.
	"""
	try:
	df = pd.read_csv(io.BytesIO(file_bytes))
	return df.head(10).to_string(index=False)
	except Exception as e:
	return f"[ERROR] Failed to read CSV: {e}"

	def read_txt(file_bytes: bytes) -> str:
	"""
	Reads a plain text file provided as bytes.
	"""
	try:
	return file_bytes.decode('utf-8').strip()
	except UnicodeDecodeError:
	return file_bytes.decode('latin1', errors='ignore').strip()
	except Exception as e:
	return f"[ERROR] Failed to read TXT: {e}"

	def load_file_if_any(file_path: str) -> str:
	"""
	Loads and reads file content based on file extension.
	Supports .pdf, .csv, .txt files. Returns "" if no file.
	"""
	if not file_path or not os.path.exists(file_path):
	return ""

	try:
	with open(file_path, "rb") as f:
	file_bytes = f.read()

	if file_path.endswith(".pdf"):
	return read_pdf(file_bytes)
	elif file_path.endswith(".csv"):
	return read_csv(file_bytes)
	elif file_path.endswith(".txt"):
	return read_txt(file_bytes)
	else:
	return "[WARNING] Unsupported file format."
	except Exception as e:
	return f"[ERROR] Failed to load file: {e}"