# agent.py (Updated for Hugging Face inference with FLAN-T5) from transformers import pipeline # Load the Hugging Face inference pipeline qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base") def answer_question(question: str, file_context: str = None, do_search: bool = False) -> str: """ Answers the question using file context (if any) with a Hugging Face model. Args: question (str): The question to be answered. file_context (str, optional): Optional context extracted from a file. do_search (bool): Ignored for local mode. Returns: str: The generated answer. """ prompt = question if not file_context else f"Context: {file_context}\nQuestion: {question}" try: result = qa_pipeline(prompt, max_length=256, do_sample=False) return result[0]["generated_text"].strip() except Exception as e: return f"[ERROR] Hugging Face pipeline failed: {e}"