chevisli commited on
Commit
71ddb0d
·
1 Parent(s): df88b35

Support downloading associated files

Browse files
Files changed (2) hide show
  1. app.py +138 -11
  2. tools.py +61 -6
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import gradio as gr
3
  import requests
4
  import inspect
@@ -20,18 +22,94 @@ class BasicAgent:
20
  print("BasicAgent initialized with LangChain tools.")
21
  self.agent_executor = agent_executor
22
 
23
- def __call__(self, question: str) -> str:
24
- print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
 
 
 
 
 
 
 
 
 
 
25
  try:
26
- # Use the LangChain agent executor to process the question
27
- response = self.agent_executor.invoke({"input": question})
28
- answer = response.get('output', 'No answer generated.')
29
- print(f"Agent generated answer: {answer}")
30
- return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  except Exception as e:
32
  error_msg = f"Agent error: {str(e)}"
33
  print(f"Error in agent execution: {e}")
34
  return error_msg
 
 
 
 
 
 
 
 
35
 
36
  def run_and_submit_all( profile: gr.OAuthProfile | None):
37
  """
@@ -51,6 +129,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
51
  api_url = DEFAULT_API_URL
52
  questions_url = f"{api_url}/questions"
53
  submit_url = f"{api_url}/submit"
 
54
 
55
  # 1. Instantiate Agent ( modify this part to create your agent)
56
  try:
@@ -90,16 +169,64 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
90
  for item in questions_data:
91
  task_id = item.get("task_id")
92
  question_text = item.get("question")
 
 
93
  if not task_id or question_text is None:
94
  print(f"Skipping item with missing task_id or question: {item}")
95
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  try:
97
- submitted_answer = agent(question_text)
98
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
99
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
100
  except Exception as e:
101
- print(f"Error running agent on task {task_id}: {e}")
102
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
103
 
104
  if not answers_payload:
105
  print("Agent did not produce any answers to submit.")
 
1
  import os
2
+ import base64
3
+ import mimetypes
4
  import gradio as gr
5
  import requests
6
  import inspect
 
22
  print("BasicAgent initialized with LangChain tools.")
23
  self.agent_executor = agent_executor
24
 
25
+ def __call__(self, question_data: dict) -> str:
26
+ """
27
+ Process question with optional file attachment
28
+ question_data = {
29
+ 'text': str,
30
+ 'file_path': str (optional),
31
+ 'file_name': str (optional)
32
+ }
33
+ """
34
+ question_text = question_data.get('text', '')
35
+ file_path = question_data.get('file_path')
36
+
37
+ print(f"Agent received question (first 50 chars): {question_text[:50]}...")
38
+
39
  try:
40
+ # Prepare input for the agent
41
+ if file_path and os.path.exists(file_path):
42
+ # Handle file input - check if it's an image or other file type
43
+ file_extension = os.path.splitext(file_path)[1].lower()
44
+
45
+ if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
46
+ # Handle image files - use LLM directly for vision, bypass ReAct agent
47
+ with open(file_path, 'rb') as img_file:
48
+ img_base64 = base64.b64encode(img_file.read()).decode()
49
+
50
+ print(f"Processing question with image: {file_path}")
51
+
52
+ # Use the LLM directly for vision tasks
53
+ from langchain_core.messages import HumanMessage
54
+
55
+ # Create image message for direct LLM call
56
+ message = HumanMessage(
57
+ content=[
58
+ {"type": "text", "text": f"{question_text}\n\nPlease analyze this image and provide your answer in the format: FINAL ANSWER: [YOUR ANSWER]"},
59
+ {
60
+ "type": "image_url",
61
+ "image_url": {"url": f"data:image/{file_extension[1:]};base64,{img_base64}"}
62
+ }
63
+ ]
64
+ )
65
+
66
+ # Call LLM directly for vision tasks
67
+ response = self.agent_executor.llm.invoke([message])
68
+ answer = response.content if hasattr(response, 'content') else str(response)
69
+ print(f"Agent generated answer: {answer}")
70
+ return answer
71
+
72
+ else:
73
+ # Handle other file types (text, CSV, etc.) - use ReAct agent
74
+ try:
75
+ with open(file_path, 'r', encoding='utf-8') as f:
76
+ file_content = f.read()
77
+ combined_input = f"{question_text}\n\nFile content ({os.path.basename(file_path)}):\n{file_content}"
78
+ agent_input = {"input": combined_input}
79
+ print(f"Processing question with text file: {file_path}")
80
+ except UnicodeDecodeError:
81
+ # Binary file - provide file info only
82
+ file_info = f"Binary file: {os.path.basename(file_path)} ({os.path.getsize(file_path)} bytes)"
83
+ combined_input = f"{question_text}\n\nAttached file: {file_info}"
84
+ agent_input = {"input": combined_input}
85
+ print(f"Processing question with binary file: {file_path}")
86
+
87
+ # Use ReAct agent for non-image files
88
+ response = self.agent_executor.invoke(agent_input)
89
+ answer = response.get('output', 'No answer generated.')
90
+ print(f"Agent generated answer: {answer}")
91
+ return answer
92
+ else:
93
+ # Text-only question - use ReAct agent
94
+ agent_input = {"input": question_text}
95
+ print("Processing text-only question")
96
+ response = self.agent_executor.invoke(agent_input)
97
+ answer = response.get('output', 'No answer generated.')
98
+ print(f"Agent generated answer: {answer}")
99
+ return answer
100
+
101
  except Exception as e:
102
  error_msg = f"Agent error: {str(e)}"
103
  print(f"Error in agent execution: {e}")
104
  return error_msg
105
+ finally:
106
+ # Clean up downloaded file
107
+ if file_path and os.path.exists(file_path):
108
+ try:
109
+ os.remove(file_path)
110
+ print(f"Cleaned up file: {file_path}")
111
+ except Exception as e:
112
+ print(f"Warning: Could not remove file {file_path}: {e}")
113
 
114
  def run_and_submit_all( profile: gr.OAuthProfile | None):
115
  """
 
129
  api_url = DEFAULT_API_URL
130
  questions_url = f"{api_url}/questions"
131
  submit_url = f"{api_url}/submit"
132
+ file_url = f"{api_url}/files"
133
 
134
  # 1. Instantiate Agent ( modify this part to create your agent)
135
  try:
 
169
  for item in questions_data:
170
  task_id = item.get("task_id")
171
  question_text = item.get("question")
172
+ file_name = item.get("file_name")
173
+
174
  if not task_id or question_text is None:
175
  print(f"Skipping item with missing task_id or question: {item}")
176
  continue
177
+
178
+ # Prepare question data for the agent
179
+ question_data = {
180
+ 'text': question_text,
181
+ 'file_path': None,
182
+ 'file_name': file_name
183
+ }
184
+
185
+ # Download file if present
186
+ if file_name:
187
+ try:
188
+ print(f"Downloading file for task {task_id}: {file_name}")
189
+ file_response = requests.get(f"{file_url}/{task_id}", timeout=30) # Increased timeout
190
+ file_response.raise_for_status()
191
+
192
+ # Save file with a safe path
193
+ safe_file_path = os.path.join(os.getcwd(), f"temp_{task_id}_{file_name}")
194
+ with open(safe_file_path, "wb") as f:
195
+ f.write(file_response.content)
196
+
197
+ question_data['file_path'] = safe_file_path
198
+ print(f"File downloaded successfully: {safe_file_path}")
199
+
200
+ except requests.exceptions.Timeout:
201
+ print(f"Timeout downloading file for task {task_id}: {file_name}")
202
+ question_data['file_path'] = None
203
+ except Exception as e:
204
+ print(f"Error downloading file for task {task_id}: {e}")
205
+ # Continue processing without file
206
+ question_data['file_path'] = None
207
+
208
+ # Process question with agent
209
  try:
210
+ submitted_answer = agent(question_data)
211
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
212
+
213
+ # Log result with file info
214
+ file_info = f" (with file: {file_name})" if file_name else ""
215
+ results_log.append({
216
+ "Task ID": task_id,
217
+ "Question": question_text + file_info,
218
+ "Submitted Answer": submitted_answer
219
+ })
220
+ print(f"Task {task_id} completed successfully{file_info}")
221
+
222
  except Exception as e:
223
+ print(f"Error running agent on task {task_id}: {e}")
224
+ file_info = f" (with file: {file_name})" if file_name else ""
225
+ results_log.append({
226
+ "Task ID": task_id,
227
+ "Question": question_text + file_info,
228
+ "Submitted Answer": f"AGENT ERROR: {e}"
229
+ })
230
 
231
  if not answers_payload:
232
  print("Agent did not produce any answers to submit.")
tools.py CHANGED
@@ -3,16 +3,61 @@ from langchain.agents import AgentExecutor, create_react_agent
3
  from langchain_google_community import GoogleSearchRun, GoogleSearchAPIWrapper
4
  from langchain_core.prompts import PromptTemplate
5
  from langchain_openai import ChatOpenAI # Or any other LangChain compatible LLM
 
6
  from dotenv import load_dotenv
 
 
7
 
8
  load_dotenv()
9
 
10
- # 1. Initialize the Tool with API wrapper
11
- # Create the Google Search API wrapper
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  search_wrapper = GoogleSearchAPIWrapper()
13
  search_tool = GoogleSearchRun(api_wrapper=search_wrapper)
14
 
15
- tools = [search_tool]
 
 
 
 
 
 
 
16
 
17
  # 2. Create a simple prompt template for an agent
18
  template = """
@@ -43,12 +88,22 @@ prompt = PromptTemplate.from_template(template)
43
 
44
  # 3. Set up the LLM and Agent
45
  llm = ChatOpenAI(
46
- model="gpt-4o",
47
- temperature=0
 
 
 
48
  )
49
 
50
  agent = create_react_agent(llm, tools, prompt)
51
- agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
 
 
 
 
 
 
 
52
 
53
  # 4. Run the agent with a question
54
  # response = agent_executor.invoke({
 
3
  from langchain_google_community import GoogleSearchRun, GoogleSearchAPIWrapper
4
  from langchain_core.prompts import PromptTemplate
5
  from langchain_openai import ChatOpenAI # Or any other LangChain compatible LLM
6
+ from langchain.tools import Tool
7
  from dotenv import load_dotenv
8
+ import pandas as pd
9
+ import json
10
 
11
  load_dotenv()
12
 
13
+ def analyze_file_content(file_path: str) -> str:
14
+ """
15
+ Analyze file content and provide information about the file.
16
+ """
17
+ if not os.path.exists(file_path):
18
+ return f"File not found: {file_path}"
19
+
20
+ try:
21
+ file_size = os.path.getsize(file_path)
22
+ file_extension = os.path.splitext(file_path)[1].lower()
23
+
24
+ # Handle different file types
25
+ if file_extension == '.csv':
26
+ df = pd.read_csv(file_path)
27
+ return f"CSV file with {len(df)} rows and {len(df.columns)} columns. Columns: {list(df.columns)[:10]}. First few rows:\n{df.head().to_string()}"
28
+
29
+ elif file_extension == '.json':
30
+ with open(file_path, 'r', encoding='utf-8') as f:
31
+ data = json.load(f)
32
+ return f"JSON file. Keys: {list(data.keys()) if isinstance(data, dict) else 'Array with ' + str(len(data)) + ' items'}"
33
+
34
+ elif file_extension in ['.txt', '.md', '.py', '.js', '.html', '.css']:
35
+ with open(file_path, 'r', encoding='utf-8') as f:
36
+ content = f.read()
37
+ return f"Text file ({file_extension}) with {len(content)} characters. Content preview:\n{content[:500]}..."
38
+
39
+ elif file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
40
+ return f"Image file ({file_extension}) - {file_size} bytes. Use vision capabilities to analyze this image."
41
+
42
+ else:
43
+ return f"File: {file_path} ({file_extension}) - {file_size} bytes. Binary or unknown format."
44
+
45
+ except Exception as e:
46
+ return f"Error analyzing file {file_path}: {str(e)}"
47
+
48
+ # 1. Initialize the Tools
49
+ # Google Search Tool
50
  search_wrapper = GoogleSearchAPIWrapper()
51
  search_tool = GoogleSearchRun(api_wrapper=search_wrapper)
52
 
53
+ # File Analysis Tool
54
+ file_analysis_tool = Tool(
55
+ name="file_analyzer",
56
+ description="Analyze the content of files including CSV, JSON, text files, and images. Input should be a file path.",
57
+ func=analyze_file_content
58
+ )
59
+
60
+ tools = [search_tool, file_analysis_tool]
61
 
62
  # 2. Create a simple prompt template for an agent
63
  template = """
 
88
 
89
  # 3. Set up the LLM and Agent
90
  llm = ChatOpenAI(
91
+ model="gpt-4o", # Vision-capable model
92
+ temperature=0,
93
+ timeout=60, # 60 second timeout for LLM calls
94
+ request_timeout=120, # 2 minute timeout for requests
95
+ max_retries=2, # Retry failed requests
96
  )
97
 
98
  agent = create_react_agent(llm, tools, prompt)
99
+ agent_executor = AgentExecutor(
100
+ agent=agent,
101
+ tools=tools,
102
+ verbose=True,
103
+ max_execution_time=60, # 1 minute timeout for entire agent execution
104
+ max_iterations=10, # Limit agent iterations to prevent infinite loops
105
+ early_stopping_method="generate" # Stop early if final answer is generated
106
+ )
107
 
108
  # 4. Run the agent with a question
109
  # response = agent_executor.invoke({