import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
import torch
from huggingface_hub import login

# Authenticate with Huging Face
token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if token:
    login(token=token)

# Load model and fix rope_scaling
model_name = "Avinash17/llama-math-tutor"
config = AutoConfig.from_pretrained(model_name)
if config.rope_scaling and "rope_type" in config.rope_scaling:
    config.rope_scaling = {
        "type": "linear",
        "factor": config.rope_scaling.get("factor", 32.0)
    }
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    config=config,
    torch_dtype=torch.float16,
    device_map="auto" if torch.cuda.is_available() else "cpu",
)

def respond(
    message: str,
    history: list[dict[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    # Build conversation messages
    messages = [{"role": "system", "content": system_message}]
    for msg in history:
        messages.append(msg)
    messages.append({"role": "user", "content": message})
    prompt = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages)

    inputs = tokenizer(
        prompt, return_tensors="pt", truncation=True, max_length=2048
    ).to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
    )
    full = tokenizer.decode(outputs[0], skip_special_tokens=True)
    assistant_reply = full[len(prompt):].strip()
    return assistant_reply

with gr.Blocks() as demo:
    gr.Markdown("# 🧮 Llama Math Tutor")

    # Make sure the Chatbot starts with a list[dict] (not tuples)
    chatbot = gr.Chatbot(
        label="Math Tutor Chat",
        placeholder="Type your math question here…",
        type="messages",
        value=[]  # <— start with an empty list of {role,content} dicts
    )

    gr.ChatInterface(
        fn=respond,
        chatbot=chatbot,        # pass in your Chatbot instance
        type="messages",        # enforce messages format
        additional_inputs=[
            gr.Textbox(
                value="You are an expert math tutor. Provide clear, step-by-step solutions.",
                label="System message"
            ),
            gr.Slider(1, 2048, value=512, label="Max new tokens"),
            gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
            gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
        ],
        title="Math Problem Solver",
        description="Ask any math question & get step-by-step answers."
    )

demo.launch(server_name="0.0.0.0", server_port=7860)