Final_Assignment_Template

Sleeping

App Files Files Community

FD900 commited on Jun 28, 2025

Commit

8b1008c

verified ·

1 Parent(s): 0b7542f

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -230

app.py CHANGED Viewed

@@ -1,236 +1,76 @@
 import os
-import requests
-import pandas as pd
-import gradio as gr
-import openai
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.vectorstores import FAISS
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.chains import RetrievalQA
-from langchain.llms import OpenAI
-from langchain.document_loaders import TextLoader, PyPDFLoader, CSVLoader
-from langchain.tools import DuckDuckGoSearchRun
-from langchain.agents import initialize_agent, Tool
-from langchain.agents.agent_types import AgentType
-from langchain.schema import Document
-from PIL import Image
-import pytesseract
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition with RAG + Tools ---
-class RAGAgent:
-    def _init_(self):
-        self.api_key = os.getenv("OPENAI_API_KEY")
-        if not self.api_key:
-            raise ValueError("OPENAI_API_KEY is not set in environment variables.")
-        openai.api_key = self.api_key
-        print("GPT-4o RAG Agent with tools initialized.")
-        self.vectorstore = None
-        self.tools = [
-            Tool(
-                name="Search News",
-                func=DuckDuckGoSearchRun().run,
-                description="Useful for finding recent news articles about a topic."
-            ),
-            Tool(
-                name="Company Profile",
-                func=DuckDuckGoSearchRun().run,
-                description="Retrieve basic profile information about a company."
-            ),
-            Tool(
-                name="Search Wikipedia",
-                func=DuckDuckGoSearchRun().run,
-                description="Good for general encyclopedic knowledge."
-            )
-        ]
-    def build_vectorstore(self, file_path):
-        print(f"Building vectorstore from file: {file_path}")
-        ext = os.path.splitext(file_path)[-1].lower()
-        if ext == ".txt":
-            loader = TextLoader(file_path)
-        elif ext == ".pdf":
-            loader = PyPDFLoader(file_path)
-        elif ext == ".csv":
-            loader = CSVLoader(file_path)
-        elif ext in [".png", ".jpg", ".jpeg"]:
-            def ocr_image(file_path):
-                text = pytesseract.image_to_string(Image.open(file_path))
-                return [Document(page_content=text)]
-            class OCRImageLoader:
-                def _init_(self, path):
-                    self.path = path
-                def load(self):
-                    return ocr_image(self.path)
-            loader = OCRImageLoader(file_path)
-        else:
-            raise ValueError(f"Unsupported file type: {ext}")
-        documents = loader.load()
-        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
-        texts = text_splitter.split_documents(documents)
-        embeddings = OpenAIEmbeddings()
-        self.vectorstore = FAISS.from_documents(texts, embeddings)
-    def classify_task_level(self, question: str) -> int:
-        if any(kw in question.lower() for kw in ["in the image", "clockwise", "based on", "served in", "multi-step"]):
-            return 3
-        elif len(question.split()) > 40 or any(kw in question.lower() for kw in ["using the tool", "summarize and compare"]):
-            return 2
-        else:
-            return 1
-    def simple_answer(self, question, file_path):
-        if file_path and os.path.isfile(file_path):
-            self.build_vectorstore(file_path)
-            retriever = self.vectorstore.as_retriever()
-            qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(model_name="gpt-4o", temperature=0.3), retriever=retriever)
-            return qa_chain.run(question)
-        else:
-            return OpenAI(model_name="gpt-4o", temperature=0.3)(question)
-    def coordinated_tool_reasoning(self, question, file_path):
-        if file_path and os.path.isfile(file_path):
-            self.build_vectorstore(file_path)
-            retriever = self.vectorstore.as_retriever()
         else:
-            retriever = None
-        agent_executor = initialize_agent(
-            self.tools,
-            OpenAI(model_name="gpt-4o", temperature=0.3),
-            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-            verbose=True
-        )
-        context = retriever.get_relevant_documents(question) if retriever else []
-        augmented_question = f"{question}\n\nContext:\n{''.join([doc.page_content for doc in context])}" if context else question
-        return agent_executor.run(augmented_question)
-    def complex_multihop_chain(self, question, file_path):
-        return self.coordinated_tool_reasoning(question, file_path)
-    def solve_question(self, question: str, file_path: str = None, level: int = None) -> str:
-        print(f"Received question (first 50 chars): {question[:50]}...")
-        if level is None:
-            level = self.classify_task_level(question)
-        print(f"Classified task as Level {level}")
-        try:
-            if level == 1:
-                return self.simple_answer(question, file_path)
-            elif level == 2:
-                return self.coordinated_tool_reasoning(question, file_path)
-            elif level == 3:
-                return self.complex_multihop_chain(question, file_path)
-            else:
-                raise ValueError("Unsupported level.")
-        except Exception as e:
-            print(f"Error during reasoning: {e}")
-            return f"Error: {e}"
-    def _call_(self, question: str, file_path: str = None, level: int = None) -> str:
-        return self.solve_question(question, file_path, level)
-# --- Evaluation & Submission Code ---
-def run_and_submit_all(profile: gr.OAuthProfile | None):
-    space_id = os.getenv("SPACE_ID")
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    try:
-        agent = RAGAgent()
-    except Exception as e:
-        return f"Error initializing agent: {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-    except Exception as e:
-        return f"Error fetching questions: {e}", None
-    results_log = []
-    answers_payload = []
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        file_path = item.get("file_path")
-        level = item.get("level")
-        if not task_id or question_text is None:
             continue
-        try:
-            submitted_answer = agent(question_text, file_path, level)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
-    if not answers_payload:
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        return final_status, pd.DataFrame(results_log)
-    except Exception as e:
-        return f"Submission Failed: {e}", pd.DataFrame(results_log)
-with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        *Instructions:*
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        This agent is designed for tasks that require:
-        - Structured responses
-        - Multimodal reasoning (e.g., analyzing images)
-        - Multi-hop retrieval of interdependent facts (e.g., identify fruit in an image, lookup ship history, fetch historical menus)
-        - Correct sequencing and planning over multiple steps
-        These capabilities are critical to solving complex GAIA tasks that go beyond what standalone LLMs can handle.
-        """
-    )
-    gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
-if _name_ == "_main_":
-    demo.launch(debug=True, share=False)

+# Standard imports
 import os
+import sys
+import warnings
+# LangChain community imports (Updated for v0.2+)
+from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader
+from langchain_community.embeddings import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.llms import OpenAI
+from langchain_community.tools import DuckDuckGoSearchRun
+# Other imports (you may have these depending on use)
+from langchain.chains import RetrievalQA
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.schema import Document
+# Add your environment key for OpenAI if required
+os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "your-api-key-here")
+def load_documents(directory: str):
+    """Loads documents from a directory using supported loaders."""
+    docs = []
+    for filename in os.listdir(directory):
+        filepath = os.path.join(directory, filename)
+        if filename.endswith(".txt"):
+            loader = TextLoader(filepath)
+        elif filename.endswith(".pdf"):
+            loader = PyPDFLoader(filepath)
+        elif filename.endswith(".csv"):
+            loader = CSVLoader(filepath)
         else:
             continue
+        docs.extend(loader.load())
+    return docs
+def build_vector_store(docs):
+    """Build FAISS index from documents using OpenAI embeddings."""
+    embeddings = OpenAIEmbeddings()
+    return FAISS.from_documents(docs, embeddings)
+def build_qa_chain(vectorstore):
+    """Create a RetrievalQA chain from the vector store."""
+    retriever = vectorstore.as_retriever()
+    llm = OpenAI(temperature=0)
+    return RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
+def main():
+    # Load and process data
+    data_path = "data/"  # Change to your actual directory
+    print("[INFO] Loading documents...")
+    documents = load_documents(data_path)
+    print("[INFO] Splitting text...")
+    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    split_docs = splitter.split_documents(documents)
+    print("[INFO] Creating vector store...")
+    vectorstore = build_vector_store(split_docs)
+    print("[INFO] Building QA chain...")
+    qa_chain = build_qa_chain(vectorstore)
+    print("\n[READY] Ask questions (type 'exit' to quit):\n")
+    while True:
+        question = input("Q: ")
+        if question.lower() in ["exit", "quit"]:
+            print("Goodbye!")
+            break
+        answer = qa_chain.run(question)
+        print("A:", answer)
+# Main entry point
+if __name__ == "__main__":
+    warnings.filterwarnings("ignore", category=DeprecationWarning)
+    main()