import os import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig import torch from huggingface_hub import login # Authenticate with Huging Face token = os.getenv("HUGGINGFACEHUB_API_TOKEN") if token: login(token=token) # Load model and fix rope_scaling model_name = "Avinash17/llama-math-tutor" config = AutoConfig.from_pretrained(model_name) if config.rope_scaling and "rope_type" in config.rope_scaling: config.rope_scaling = { "type": "linear", "factor": config.rope_scaling.get("factor", 32.0) } tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, config=config, torch_dtype=torch.float16, device_map="auto" if torch.cuda.is_available() else "cpu", ) def respond( message: str, history: list[dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, ): # Build conversation messages messages = [{"role": "system", "content": system_message}] for msg in history: messages.append(msg) messages.append({"role": "user", "content": message}) prompt = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages) inputs = tokenizer( prompt, return_tensors="pt", truncation=True, max_length=2048 ).to(model.device) outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True, pad_token_id=tokenizer.eos_token_id, ) full = tokenizer.decode(outputs[0], skip_special_tokens=True) assistant_reply = full[len(prompt):].strip() return assistant_reply with gr.Blocks() as demo: gr.Markdown("# 🧮 Llama Math Tutor") # Make sure the Chatbot starts with a list[dict] (not tuples) chatbot = gr.Chatbot( label="Math Tutor Chat", placeholder="Type your math question here…", type="messages", value=[] # <— start with an empty list of {role,content} dicts ) gr.ChatInterface( fn=respond, chatbot=chatbot, # pass in your Chatbot instance type="messages", # enforce messages format additional_inputs=[ gr.Textbox( value="You are an expert math tutor. Provide clear, step-by-step solutions.", label="System message" ), gr.Slider(1, 2048, value=512, label="Max new tokens"), gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"), ], title="Math Problem Solver", description="Ask any math question & get step-by-step answers." ) demo.launch(server_name="0.0.0.0", server_port=7860)