import os
from huggingface_hub import InferenceClient

def llm_node(question, search_result):
    # Initialize the client (no repo_id here!)
    client = InferenceClient(token=os.getenv("HUGGINGFACEHUB_API_TOKEN"))

    # Define the model you want to use
    model_id = "HuggingFaceH4/zephyr-7b-beta"  # You can swap this with e.g., mistralai/Mistral-7B-Instruct-v0.2

    # Craft the prompt carefully
    prompt = f"""You are solving a GAIA benchmark evaluation question.

Here’s the question:
{question}

Here’s retrieved information:
{search_result}

⚠️ VERY IMPORTANT:
- ONLY return the final answer, exactly as required.
- Do NOT include explanations, prefixes, or notes.
- If the question asks for a list, give only the list, in the requested format.

Your answer:"""

    # Call the model (pass model ID here)
    response = client.text_generation(
        model=model_id,
        prompt=prompt,
        max_new_tokens=500,
        temperature=0.1,
        top_p=0.95,
        repetition_penalty=1.1
    )

    # Clean up response text
    return response.strip()