import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch import os model_name = "google/gemma-4-E2B-it" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, max_new_tokens=1500, temperature=0.7) def generate_response(message, history): messages = [ [ { "role": "system", "content": [{"type": "text", "text": "You are a brain map model, an outstanding medical assistant, and a talented university professor. "},] }, { "role": "user", "content": [{"type": "text", "text": message},] }, ], ] response = pipe(messages) return response[0][0]['generated_text'][2]['content'] demo = gr.ChatInterface( generate_response, title="Brain map(com LLM)", description="You are a Gemma 4 model, a trusted speaker and medical assistant. " ) demo.launch()