| import gradio as gr |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
| import torch |
| import os |
|
|
| model_name = "google/gemma-4-E2B-it" |
|
|
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
| model = AutoModelForCausalLM.from_pretrained(model_name, |
| torch_dtype=torch.float16, |
| device_map="auto") |
|
|
| pipe = pipeline("text-generation", |
| model=model_name, |
| tokenizer=tokenizer, |
| max_new_tokens=1500, |
| temperature=0.7) |
|
|
| def generate_response(message, history): |
| messages = [ |
| [ |
| { |
| "role": "system", |
| "content": [{"type": "text", |
| "text": "You are a brain map model, an outstanding medical assistant, and a talented university professor. "},] |
| }, |
| { |
| "role": "user", |
| "content": [{"type": "text", |
| "text": message},] |
| }, |
| ], |
| ] |
|
|
| response = pipe(messages) |
| return response[0][0]['generated_text'][2]['content'] |
|
|
| demo = gr.ChatInterface( |
| generate_response, |
| title="Brain map(com LLM)", |
| description="You are a Gemma 4 model, a trusted speaker and medical assistant. " |
| ) |
|
|
| demo.launch() |