FD900 commited on
Commit
8b1008c
·
verified ·
1 Parent(s): 0b7542f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -230
app.py CHANGED
@@ -1,236 +1,76 @@
 
1
  import os
2
- import requests
3
- import pandas as pd
4
- import gradio as gr
5
- import openai
6
- from langchain.embeddings import OpenAIEmbeddings
7
- from langchain.vectorstores import FAISS
8
- from langchain.text_splitter import CharacterTextSplitter
9
- from langchain.chains import RetrievalQA
10
- from langchain.llms import OpenAI
11
- from langchain.document_loaders import TextLoader, PyPDFLoader, CSVLoader
12
- from langchain.tools import DuckDuckGoSearchRun
13
- from langchain.agents import initialize_agent, Tool
14
- from langchain.agents.agent_types import AgentType
15
- from langchain.schema import Document
16
- from PIL import Image
17
- import pytesseract
18
-
19
- # --- Constants ---
20
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
-
22
- # --- Basic Agent Definition with RAG + Tools ---
23
- class RAGAgent:
24
- def _init_(self):
25
- self.api_key = os.getenv("OPENAI_API_KEY")
26
- if not self.api_key:
27
- raise ValueError("OPENAI_API_KEY is not set in environment variables.")
28
- openai.api_key = self.api_key
29
- print("GPT-4o RAG Agent with tools initialized.")
30
- self.vectorstore = None
31
- self.tools = [
32
- Tool(
33
- name="Search News",
34
- func=DuckDuckGoSearchRun().run,
35
- description="Useful for finding recent news articles about a topic."
36
- ),
37
- Tool(
38
- name="Company Profile",
39
- func=DuckDuckGoSearchRun().run,
40
- description="Retrieve basic profile information about a company."
41
- ),
42
- Tool(
43
- name="Search Wikipedia",
44
- func=DuckDuckGoSearchRun().run,
45
- description="Good for general encyclopedic knowledge."
46
- )
47
- ]
48
-
49
- def build_vectorstore(self, file_path):
50
- print(f"Building vectorstore from file: {file_path}")
51
- ext = os.path.splitext(file_path)[-1].lower()
52
- if ext == ".txt":
53
- loader = TextLoader(file_path)
54
- elif ext == ".pdf":
55
- loader = PyPDFLoader(file_path)
56
- elif ext == ".csv":
57
- loader = CSVLoader(file_path)
58
- elif ext in [".png", ".jpg", ".jpeg"]:
59
- def ocr_image(file_path):
60
- text = pytesseract.image_to_string(Image.open(file_path))
61
- return [Document(page_content=text)]
62
-
63
- class OCRImageLoader:
64
- def _init_(self, path):
65
- self.path = path
66
-
67
- def load(self):
68
- return ocr_image(self.path)
69
-
70
- loader = OCRImageLoader(file_path)
71
- else:
72
- raise ValueError(f"Unsupported file type: {ext}")
73
 
74
- documents = loader.load()
75
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
76
- texts = text_splitter.split_documents(documents)
77
- embeddings = OpenAIEmbeddings()
78
- self.vectorstore = FAISS.from_documents(texts, embeddings)
 
79
 
80
- def classify_task_level(self, question: str) -> int:
81
- if any(kw in question.lower() for kw in ["in the image", "clockwise", "based on", "served in", "multi-step"]):
82
- return 3
83
- elif len(question.split()) > 40 or any(kw in question.lower() for kw in ["using the tool", "summarize and compare"]):
84
- return 2
85
- else:
86
- return 1
87
-
88
- def simple_answer(self, question, file_path):
89
- if file_path and os.path.isfile(file_path):
90
- self.build_vectorstore(file_path)
91
- retriever = self.vectorstore.as_retriever()
92
- qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(model_name="gpt-4o", temperature=0.3), retriever=retriever)
93
- return qa_chain.run(question)
94
- else:
95
- return OpenAI(model_name="gpt-4o", temperature=0.3)(question)
96
 
97
- def coordinated_tool_reasoning(self, question, file_path):
98
- if file_path and os.path.isfile(file_path):
99
- self.build_vectorstore(file_path)
100
- retriever = self.vectorstore.as_retriever()
 
 
 
 
 
 
 
 
 
 
101
  else:
102
- retriever = None
103
-
104
- agent_executor = initialize_agent(
105
- self.tools,
106
- OpenAI(model_name="gpt-4o", temperature=0.3),
107
- agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
108
- verbose=True
109
- )
110
-
111
- context = retriever.get_relevant_documents(question) if retriever else []
112
- augmented_question = f"{question}\n\nContext:\n{''.join([doc.page_content for doc in context])}" if context else question
113
- return agent_executor.run(augmented_question)
114
-
115
- def complex_multihop_chain(self, question, file_path):
116
- return self.coordinated_tool_reasoning(question, file_path)
117
-
118
- def solve_question(self, question: str, file_path: str = None, level: int = None) -> str:
119
- print(f"Received question (first 50 chars): {question[:50]}...")
120
- if level is None:
121
- level = self.classify_task_level(question)
122
- print(f"Classified task as Level {level}")
123
-
124
- try:
125
- if level == 1:
126
- return self.simple_answer(question, file_path)
127
- elif level == 2:
128
- return self.coordinated_tool_reasoning(question, file_path)
129
- elif level == 3:
130
- return self.complex_multihop_chain(question, file_path)
131
- else:
132
- raise ValueError("Unsupported level.")
133
- except Exception as e:
134
- print(f"Error during reasoning: {e}")
135
- return f"Error: {e}"
136
-
137
- def _call_(self, question: str, file_path: str = None, level: int = None) -> str:
138
- return self.solve_question(question, file_path, level)
139
-
140
- # --- Evaluation & Submission Code ---
141
-
142
- def run_and_submit_all(profile: gr.OAuthProfile | None):
143
- space_id = os.getenv("SPACE_ID")
144
-
145
- if profile:
146
- username = f"{profile.username}"
147
- print(f"User logged in: {username}")
148
- else:
149
- print("User not logged in.")
150
- return "Please Login to Hugging Face with the button.", None
151
-
152
- api_url = DEFAULT_API_URL
153
- questions_url = f"{api_url}/questions"
154
- submit_url = f"{api_url}/submit"
155
-
156
- try:
157
- agent = RAGAgent()
158
- except Exception as e:
159
- return f"Error initializing agent: {e}", None
160
-
161
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
162
-
163
- try:
164
- response = requests.get(questions_url, timeout=15)
165
- response.raise_for_status()
166
- questions_data = response.json()
167
- except Exception as e:
168
- return f"Error fetching questions: {e}", None
169
-
170
- results_log = []
171
- answers_payload = []
172
- for item in questions_data:
173
- task_id = item.get("task_id")
174
- question_text = item.get("question")
175
- file_path = item.get("file_path")
176
- level = item.get("level")
177
- if not task_id or question_text is None:
178
  continue
179
- try:
180
- submitted_answer = agent(question_text, file_path, level)
181
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
182
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
183
- except Exception as e:
184
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
185
-
186
- if not answers_payload:
187
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
188
-
189
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
190
- try:
191
- response = requests.post(submit_url, json=submission_data, timeout=60)
192
- response.raise_for_status()
193
- result_data = response.json()
194
- final_status = (
195
- f"Submission Successful!\n"
196
- f"User: {result_data.get('username')}\n"
197
- f"Overall Score: {result_data.get('score', 'N/A')}% "
198
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
199
- f"Message: {result_data.get('message', 'No message received.')}"
200
- )
201
- return final_status, pd.DataFrame(results_log)
202
- except Exception as e:
203
- return f"Submission Failed: {e}", pd.DataFrame(results_log)
204
-
205
- with gr.Blocks() as demo:
206
- gr.Markdown("# Basic Agent Evaluation Runner")
207
- gr.Markdown(
208
- """
209
- *Instructions:*
210
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
211
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
212
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
213
-
214
- This agent is designed for tasks that require:
215
- - Structured responses
216
- - Multimodal reasoning (e.g., analyzing images)
217
- - Multi-hop retrieval of interdependent facts (e.g., identify fruit in an image, lookup ship history, fetch historical menus)
218
- - Correct sequencing and planning over multiple steps
219
-
220
- These capabilities are critical to solving complex GAIA tasks that go beyond what standalone LLMs can handle.
221
- """
222
- )
223
-
224
- gr.LoginButton()
225
-
226
- run_button = gr.Button("Run Evaluation & Submit All Answers")
227
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
228
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
229
-
230
- run_button.click(
231
- fn=run_and_submit_all,
232
- outputs=[status_output, results_table]
233
- )
234
-
235
- if _name_ == "_main_":
236
- demo.launch(debug=True, share=False)
 
1
+ # Standard imports
2
  import os
3
+ import sys
4
+ import warnings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # LangChain community imports (Updated for v0.2+)
7
+ from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader
8
+ from langchain_community.embeddings import OpenAIEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_community.llms import OpenAI
11
+ from langchain_community.tools import DuckDuckGoSearchRun
12
 
13
+ # Other imports (you may have these depending on use)
14
+ from langchain.chains import RetrievalQA
15
+ from langchain.text_splitter import CharacterTextSplitter
16
+ from langchain.schema import Document
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Add your environment key for OpenAI if required
19
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "your-api-key-here")
20
+
21
+ def load_documents(directory: str):
22
+ """Loads documents from a directory using supported loaders."""
23
+ docs = []
24
+ for filename in os.listdir(directory):
25
+ filepath = os.path.join(directory, filename)
26
+ if filename.endswith(".txt"):
27
+ loader = TextLoader(filepath)
28
+ elif filename.endswith(".pdf"):
29
+ loader = PyPDFLoader(filepath)
30
+ elif filename.endswith(".csv"):
31
+ loader = CSVLoader(filepath)
32
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  continue
34
+ docs.extend(loader.load())
35
+ return docs
36
+
37
+ def build_vector_store(docs):
38
+ """Build FAISS index from documents using OpenAI embeddings."""
39
+ embeddings = OpenAIEmbeddings()
40
+ return FAISS.from_documents(docs, embeddings)
41
+
42
+ def build_qa_chain(vectorstore):
43
+ """Create a RetrievalQA chain from the vector store."""
44
+ retriever = vectorstore.as_retriever()
45
+ llm = OpenAI(temperature=0)
46
+ return RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
47
+
48
+ def main():
49
+ # Load and process data
50
+ data_path = "data/" # Change to your actual directory
51
+ print("[INFO] Loading documents...")
52
+ documents = load_documents(data_path)
53
+
54
+ print("[INFO] Splitting text...")
55
+ splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
56
+ split_docs = splitter.split_documents(documents)
57
+
58
+ print("[INFO] Creating vector store...")
59
+ vectorstore = build_vector_store(split_docs)
60
+
61
+ print("[INFO] Building QA chain...")
62
+ qa_chain = build_qa_chain(vectorstore)
63
+
64
+ print("\n[READY] Ask questions (type 'exit' to quit):\n")
65
+ while True:
66
+ question = input("Q: ")
67
+ if question.lower() in ["exit", "quit"]:
68
+ print("Goodbye!")
69
+ break
70
+ answer = qa_chain.run(question)
71
+ print("A:", answer)
72
+
73
+ # Main entry point
74
+ if __name__ == "__main__":
75
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
76
+ main()