Spaces:
Sleeping
Sleeping
| import os | |
| from huggingface_hub import InferenceClient | |
| def llm_node(question, search_result): | |
| # Initialize the client (no repo_id here!) | |
| client = InferenceClient(token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) | |
| # Define the model you want to use | |
| model_id = "HuggingFaceH4/zephyr-7b-beta" # You can swap this with e.g., mistralai/Mistral-7B-Instruct-v0.2 | |
| # Craft the prompt carefully | |
| prompt = f"""You are solving a GAIA benchmark evaluation question. | |
| Here’s the question: | |
| {question} | |
| Here’s retrieved information: | |
| {search_result} | |
| ⚠️ VERY IMPORTANT: | |
| - ONLY return the final answer, exactly as required. | |
| - Do NOT include explanations, prefixes, or notes. | |
| - If the question asks for a list, give only the list, in the requested format. | |
| Your answer:""" | |
| # Call the model (pass model ID here) | |
| response = client.text_generation( | |
| model=model_id, | |
| prompt=prompt, | |
| max_new_tokens=500, | |
| temperature=0.1, | |
| top_p=0.95, | |
| repetition_penalty=1.1 | |
| ) | |
| # Clean up response text | |
| return response.strip() | |