FD900 commited on
Commit
daa3bd9
·
verified ·
1 Parent(s): d31057a

Create app/tools/file_loader.py

Browse files
Files changed (1) hide show
  1. app/tools/file_loader.py +37 -0
app/tools/file_loader.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import pandas as pd
3
+ from PyPDF2 import PdfReader
4
+
5
+ def read_pdf(file_bytes: bytes) -> str:
6
+ """
7
+ Extracts text from a PDF file provided as bytes.
8
+ """
9
+ try:
10
+ reader = PdfReader(io.BytesIO(file_bytes))
11
+ text = ""
12
+ for page in reader.pages:
13
+ text += page.extract_text() or ""
14
+ return text.strip()
15
+ except Exception as e:
16
+ return f"[ERROR] Failed to read PDF: {e}"
17
+
18
+ def read_csv(file_bytes: bytes) -> str:
19
+ """
20
+ Reads the first few rows of a CSV file provided as bytes.
21
+ """
22
+ try:
23
+ df = pd.read_csv(io.BytesIO(file_bytes))
24
+ return df.head(10).to_string(index=False)
25
+ except Exception as e:
26
+ return f"[ERROR] Failed to read CSV: {e}"
27
+
28
+ def read_txt(file_bytes: bytes) -> str:
29
+ """
30
+ Reads a plain text file provided as bytes.
31
+ """
32
+ try:
33
+ return file_bytes.decode('utf-8').strip()
34
+ except UnicodeDecodeError:
35
+ return file_bytes.decode('latin1', errors='ignore').strip()
36
+ except Exception as e:
37
+ return f"[ERROR] Failed to read TXT: {e}"