import os from huggingface_hub import InferenceClient def llm_node(question, search_result): # Initialize the client (no repo_id here!) client = InferenceClient(token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) # Define the model you want to use model_id = "HuggingFaceH4/zephyr-7b-beta" # You can swap this with e.g., mistralai/Mistral-7B-Instruct-v0.2 # Craft the prompt carefully prompt = f"""You are solving a GAIA benchmark evaluation question. Here’s the question: {question} Here’s retrieved information: {search_result} ⚠️ VERY IMPORTANT: - ONLY return the final answer, exactly as required. - Do NOT include explanations, prefixes, or notes. - If the question asks for a list, give only the list, in the requested format. Your answer:""" # Call the model (pass model ID here) response = client.text_generation( model=model_id, prompt=prompt, max_new_tokens=500, temperature=0.1, top_p=0.95, repetition_penalty=1.1 ) # Clean up response text return response.strip()