Spaces:

Tushar9802
/

sakhi

Sleeping

File size: 5,380 Bytes

745f62a

"""
MedScribe v2 — Export to Ollama

Creates the Ollama model from the GGUF exported by 05_train_unsloth.py.
Also tests the model via Ollama API with a sample extraction.

Usage:
  python scripts/07_export_ollama.py --create    # Create Ollama model
  python scripts/07_export_ollama.py --test      # Test the model
  python scripts/07_export_ollama.py --create --test  # Both
"""
import argparse
import json
import os
import subprocess
import sys


MODELFILE_PATH = "configs/Modelfile"
MODEL_NAME = "medscribe-v2"
GGUF_DIR = "models/exported"

SAMPLE_TRANSCRIPT = """ASHA: नमस्ते बहन जी, कैसी तबीयत है?
Patient: ठीक हूँ दीदी, बस थोड़ी कमज़ोरी लग रही है।
ASHA: आखिरी बार पीरियड कब आया था?
Patient: करीब 7 महीने पहले, अब बच्चा होने वाला है।
ASHA: चलिए बी.पी. देखते हैं... 120/80 है, बिल्कुल नॉर्मल। वज़न 55 किलो।
Patient: आयरन की गोली खा रही हूँ रोज़।
ASHA: बहुत अच्छा। TT का टीका लगवाया?
Patient: हाँ, पहला लगवा लिया, दूसरा अगले महीने है।
ASHA: बच्चा हिल रहा है ठीक से?
Patient: हाँ दीदी, खूब हिलता है।
ASHA: अगली बार अस्पताल जाकर खून की जाँच करवा लेना। अगली विज़िट 2 हफ्ते बाद।"""


def create_model():
    """Create Ollama model from Modelfile."""
    # Check GGUF exists
    gguf_files = [f for f in os.listdir(GGUF_DIR) if f.endswith(".gguf")] if os.path.exists(GGUF_DIR) else []
    if not gguf_files:
        print(f"ABORT: No GGUF files found in {GGUF_DIR}")
        print("Run scripts/05_train_unsloth.py first.")
        sys.exit(1)

    print(f"Found GGUF: {gguf_files}")
    print(f"Creating Ollama model '{MODEL_NAME}' from {MODELFILE_PATH}...")

    result = subprocess.run(
        ["ollama", "create", MODEL_NAME, "-f", MODELFILE_PATH],
        capture_output=True, text=True, timeout=300,
    )

    if result.returncode == 0:
        print(f"Model '{MODEL_NAME}' created successfully")
        print(result.stdout)
    else:
        print(f"Failed to create model:")
        print(result.stderr)
        sys.exit(1)


def test_model():
    """Test the model with a sample ASHA transcript."""
    try:
        import ollama
    except ImportError:
        print("ollama package not installed")
        sys.exit(1)

    print(f"\nTesting '{MODEL_NAME}' with sample ANC transcript...")

    # Load ANC schema
    with open("configs/schemas/anc_visit.json", "r", encoding="utf-8") as f:
        schema = json.load(f)

    system_prompt = (
        "You are a clinical data extraction system for India's ASHA health worker program. "
        "Extract structured data from the Hindi/Hinglish conversation into JSON. "
        "ONLY extract what is explicitly stated. Use null for unmentioned fields."
    )

    user_prompt = (
        f"Extract data from this ASHA visit:\n\n{SAMPLE_TRANSCRIPT}\n\n"
        f"Schema:\n{json.dumps(schema, ensure_ascii=False)}"
    )

    import time
    t0 = time.time()
    response = ollama.chat(
        model=MODEL_NAME,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
    )
    elapsed = time.time() - t0

    text = response.message.content
    print(f"\nResponse ({elapsed:.1f}s):\n{text[:2000]}")

    # Try to parse
    try:
        data = json.loads(text)
        print(f"\nValid JSON. Non-null fields: {_count_non_null(data)}")

        # Quick sanity checks
        checks = []
        vitals = data.get("vitals", {})
        if vitals.get("bp_systolic") == 120 and vitals.get("bp_diastolic") == 80:
            checks.append("BP extracted correctly")
        if vitals.get("weight_kg") == 55:
            checks.append("Weight extracted correctly")

        preg = data.get("pregnancy", {})
        if preg.get("gestational_weeks") and 28 <= preg["gestational_weeks"] <= 32:
            checks.append("Gestational weeks reasonable")

        for c in checks:
            print(f"  PASS: {c}")

    except json.JSONDecodeError:
        print("\nOutput is not valid JSON — model may need more fine-tuning")


def _count_non_null(d, count=0):
    if isinstance(d, dict):
        for v in d.values():
            count = _count_non_null(v, count)
    elif isinstance(d, list):
        count += len(d)
    elif d is not None:
        count += 1
    return count


def main():
    parser = argparse.ArgumentParser(description="MedScribe v2 — Ollama Export")
    parser.add_argument("--create", action="store_true", help="Create Ollama model")
    parser.add_argument("--test", action="store_true", help="Test model with sample")
    args = parser.parse_args()

    if not args.create and not args.test:
        print("Specify --create, --test, or both")
        sys.exit(1)

    if args.create:
        create_model()
    if args.test:
        test_model()


if __name__ == "__main__":
    main()