Seth0330's picture
Update app.py
fb5cdfa verified
raw
history blame
6.01 kB
import streamlit as st
import pandas as pd
import numpy as np
import os
import requests
import json
import re
# — Page config
st.set_page_config(page_title="CSV-Backed AI Agent with Function Calling", layout="wide")
# — Title & image
st.title("CSV-Backed AI Agent with Function Calling")
st.image("./nadi-lok-image.png")
# — Load API key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
st.error("❌ OPENAI_API_KEY not set in Settings → Secrets.")
st.stop()
HEADERS = {
"Authorization": f"Bearer {OPENAI_API_KEY}",
"Content-Type": "application/json",
}
# — Sidebar: CSV upload & preview
st.sidebar.header("Upload CSV File")
uploaded_file = st.sidebar.file_uploader("Choose a CSV file", type="csv")
if uploaded_file:
try:
df = pd.read_csv(uploaded_file)
st.sidebar.success("File uploaded successfully!")
st.sidebar.write("Preview of the uploaded file:")
st.sidebar.dataframe(df.head())
except Exception as e:
st.sidebar.error(f"Error reading file: {e}")
df = None
else:
df = None
if df is not None:
st.markdown(f"**Loaded CSV:** {df.shape[0]} rows × {df.shape[1]} columns")
@st.cache_data(show_spinner=False)
def build_row_embeddings(df: pd.DataFrame):
# 1) Convert each row to a compact JSON string
texts = df.apply(lambda r: r.to_json(), axis=1).tolist()
# 2) Batch‐call the embeddings endpoint
embeddings = []
for i in range(0, len(texts), 100):
batch = texts[i : i + 100]
resp = requests.post(
"https://api.openai.com/v1/embeddings",
headers=HEADERS,
json={"model": "text-embedding-ada-002", "input": batch},
timeout=60,
)
resp.raise_for_status()
data = resp.json()["data"]
embeddings.extend(d["embedding"] for d in data)
return np.array(embeddings), texts
embeddings, row_texts = build_row_embeddings(df)
# — Prompt input
prompt = st.text_area(
"Enter your prompt for the agent",
placeholder="e.g. Which products have price > 100?",
height=150,
)
# — Define function for OpenAI function calling
def search_csv(query: str):
# Run a Pandas query safely
try:
result_df = df.query(query)
return result_df.to_dict(orient="records")
except Exception as e:
return {"error": f"Query error: {str(e)}"}
function_schema = [
{
"name": "search_csv",
"description": "Filter the CSV rows by a Pandas query. Example: price > 100",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "A Pandas query string, e.g. 'price > 100 and category == \"A\"'"
},
},
"required": ["query"],
},
}
]
# — Run Agent
if st.button("Run Agent"):
if df is None:
st.error("Please upload a CSV file first.")
elif not prompt.strip():
st.error("Please enter a prompt.")
else:
# 1) First call: ask OpenAI if it wants to use a function
messages = [
{
"role": "system",
"content": (
"You are an AI agent helping users analyze a CSV file. "
"If you need to search or filter the CSV, call the 'search_csv' function. "
"Only use the function when you need data from the CSV."
),
},
{"role": "user", "content": prompt}
]
chat_resp = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=HEADERS,
json={
"model": "gpt-3.5-turbo-1106", # or "gpt-4-1106-preview" if available
"messages": messages,
"functions": function_schema,
"function_call": "auto",
"temperature": 0,
"max_tokens": 1000,
},
timeout=60,
)
chat_resp.raise_for_status()
response_json = chat_resp.json()
msg = response_json["choices"][0]["message"]
# 2) Check if function call is requested
if msg.get("function_call"):
func_name = msg["function_call"]["name"]
args_json = msg["function_call"]["arguments"]
args = json.loads(args_json)
# Only one function: search_csv
search_result = search_csv(args["query"])
# 3) Pass function result back to OpenAI for final answer
followup_messages = [
{
"role": "system",
"content": (
"You are an AI agent helping users analyze a CSV file."
),
},
{"role": "user", "content": prompt},
{
"role": "function",
"name": func_name,
"content": json.dumps(search_result),
}
]
final_resp = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=HEADERS,
json={
"model": "gpt-3.5-turbo-1106", # or "gpt-4-1106-preview"
"messages": followup_messages,
"temperature": 0,
"max_tokens": 1500,
},
timeout=60,
)
final_resp.raise_for_status()
answer = final_resp.json()["choices"][0]["message"]["content"]
st.subheader("✅ Agent Answer")
st.markdown(answer)
st.subheader("📊 Filtered CSV Data")
st.json(search_result)
else:
# No function call: model answered directly
st.subheader("✅ Agent Answer")
st.markdown(msg["content"])