YAML Metadata Warning:empty or missing yaml metadata in repo card

Check out the documentation for more information.

import torch
from transformers import AutoProcessor, AutoModelForCausalLM
import os

# 1. Define the path to your new, fully merged model directory
model_directory = "medgemma-4b-it-merged"

# --- Verification ---
if not os.path.isdir(model_directory):
    print(f"❌ Error: The directory '{model_directory}' was not found.")
else:
    # 2. Load both the model AND the processor from the same directory
    print("Loading model and processor from the same self-contained directory...")
    model = AutoModelForCausalLM.from_pretrained(
        model_directory,
        torch_dtype=torch.bfloat16,
        device_map="auto"
    )
    processor = AutoProcessor.from_pretrained(model_directory)
    processor.tokenizer.padding_side = "right"
    print("✅ Model and processor loaded successfully.")

    # 3. Prepare data for inference (no changes here)
    patient_age = "58"
    patient_sex = "female"
    new_results = {
        "WBC": "18.9", "RBC": "3.8", "HGB": "105", "HCT": "33", "PLT": "420",
        "MCV": "87", "MCH": "28", "MPV": "11.0", "Ne %": "78", "LYM": "1.5",
        "MON": "0.6", "EO": "0.3", "BA": "0.1", "İMM": "0.5", "ATL": "0",
        "ESR": "55", "HGB/RBC": "27.6"
    }

    results_str = "\n".join([f"- {key}: {value}" for key, value in new_results.items()])
    user_prompt = (
        "Zəhmət olmasa, aşağıdakı pasiyent məlumatlarına və qan analizi nəticələrinə əsasən klinik rəy bildir.\n\n"
        "### Pasiyent məlumatları\n"
        f"- Pasiyentin yaşı: {patient_age}\n"
        f"- Pasiyentin cinsi: {patient_sex}\n\n"
        "### Qan Analizi nəticələri\n"
        f"{results_str}"
    )
    messages = [{"role": "user", "content": [{"type": "text", "text": user_prompt}]}]
    prompt = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)

    # 4. Run Inference (no changes here)
    inputs = processor(text=prompt, return_tensors="pt").to(model.device)
    generation_kwargs = {"max_new_tokens": 512, "do_sample": False}
    
    print("\nGenerating feedback...")
    with torch.no_grad():
        outputs = model.generate(**inputs, **generation_kwargs)

    response = processor.batch_decode(outputs, skip_special_tokens=True)
    final_response = response[0].strip().split('<|assistant|>')[-1]

    print("\n--- Generated Clinical Feedback ---")
    print(final_response.strip())

Downloads last month: 1

Safetensors

Model size

5B params

Tensor type

F16

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support