import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import os

model_name = "google/gemma-4-E2B-it" 

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(model_name, 
                                             torch_dtype=torch.float16, 
                                             device_map="auto")

pipe = pipeline("text-generation", 
                model=model_name, 
                tokenizer=tokenizer, 
                max_new_tokens=1500, 
                temperature=0.7)

def generate_response(message, history):
    messages = [
    [
        {
            "role": "system",
            "content": [{"type": "text", 
                         "text": "You are a brain map model, an outstanding medical assistant, and a talented university professor. "},]
        },
        {
            "role": "user",
            "content": [{"type": "text", 
                         "text": message},]
        },
    ],
    ]

    response = pipe(messages)
    return response[0][0]['generated_text'][2]['content']

demo = gr.ChatInterface(
    generate_response,
    title="Brain map(com LLM)",
    description="You are a Gemma 4 model, a trusted speaker and medical assistant. "
)

demo.launch()