Spaces:
Sleeping
Sleeping
| """ | |
| MedScribe v2 — Export to Ollama | |
| Creates the Ollama model from the GGUF exported by 05_train_unsloth.py. | |
| Also tests the model via Ollama API with a sample extraction. | |
| Usage: | |
| python scripts/07_export_ollama.py --create # Create Ollama model | |
| python scripts/07_export_ollama.py --test # Test the model | |
| python scripts/07_export_ollama.py --create --test # Both | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| import subprocess | |
| import sys | |
| MODELFILE_PATH = "configs/Modelfile" | |
| MODEL_NAME = "medscribe-v2" | |
| GGUF_DIR = "models/exported" | |
| SAMPLE_TRANSCRIPT = """ASHA: नमस्ते बहन जी, कैसी तबीयत है? | |
| Patient: ठीक हूँ दीदी, बस थोड़ी कमज़ोरी लग रही है। | |
| ASHA: आखिरी बार पीरियड कब आया था? | |
| Patient: करीब 7 महीने पहले, अब बच्चा होने वाला है। | |
| ASHA: चलिए बी.पी. देखते हैं... 120/80 है, बिल्कुल नॉर्मल। वज़न 55 किलो। | |
| Patient: आयरन की गोली खा रही हूँ रोज़। | |
| ASHA: बहुत अच्छा। TT का टीका लगवाया? | |
| Patient: हाँ, पहला लगवा लिया, दूसरा अगले महीने है। | |
| ASHA: बच्चा हिल रहा है ठीक से? | |
| Patient: हाँ दीदी, खूब हिलता है। | |
| ASHA: अगली बार अस्पताल जाकर खून की जाँच करवा लेना। अगली विज़िट 2 हफ्ते बाद।""" | |
| def create_model(): | |
| """Create Ollama model from Modelfile.""" | |
| # Check GGUF exists | |
| gguf_files = [f for f in os.listdir(GGUF_DIR) if f.endswith(".gguf")] if os.path.exists(GGUF_DIR) else [] | |
| if not gguf_files: | |
| print(f"ABORT: No GGUF files found in {GGUF_DIR}") | |
| print("Run scripts/05_train_unsloth.py first.") | |
| sys.exit(1) | |
| print(f"Found GGUF: {gguf_files}") | |
| print(f"Creating Ollama model '{MODEL_NAME}' from {MODELFILE_PATH}...") | |
| result = subprocess.run( | |
| ["ollama", "create", MODEL_NAME, "-f", MODELFILE_PATH], | |
| capture_output=True, text=True, timeout=300, | |
| ) | |
| if result.returncode == 0: | |
| print(f"Model '{MODEL_NAME}' created successfully") | |
| print(result.stdout) | |
| else: | |
| print(f"Failed to create model:") | |
| print(result.stderr) | |
| sys.exit(1) | |
| def test_model(): | |
| """Test the model with a sample ASHA transcript.""" | |
| try: | |
| import ollama | |
| except ImportError: | |
| print("ollama package not installed") | |
| sys.exit(1) | |
| print(f"\nTesting '{MODEL_NAME}' with sample ANC transcript...") | |
| # Load ANC schema | |
| with open("configs/schemas/anc_visit.json", "r", encoding="utf-8") as f: | |
| schema = json.load(f) | |
| system_prompt = ( | |
| "You are a clinical data extraction system for India's ASHA health worker program. " | |
| "Extract structured data from the Hindi/Hinglish conversation into JSON. " | |
| "ONLY extract what is explicitly stated. Use null for unmentioned fields." | |
| ) | |
| user_prompt = ( | |
| f"Extract data from this ASHA visit:\n\n{SAMPLE_TRANSCRIPT}\n\n" | |
| f"Schema:\n{json.dumps(schema, ensure_ascii=False)}" | |
| ) | |
| import time | |
| t0 = time.time() | |
| response = ollama.chat( | |
| model=MODEL_NAME, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ], | |
| ) | |
| elapsed = time.time() - t0 | |
| text = response.message.content | |
| print(f"\nResponse ({elapsed:.1f}s):\n{text[:2000]}") | |
| # Try to parse | |
| try: | |
| data = json.loads(text) | |
| print(f"\nValid JSON. Non-null fields: {_count_non_null(data)}") | |
| # Quick sanity checks | |
| checks = [] | |
| vitals = data.get("vitals", {}) | |
| if vitals.get("bp_systolic") == 120 and vitals.get("bp_diastolic") == 80: | |
| checks.append("BP extracted correctly") | |
| if vitals.get("weight_kg") == 55: | |
| checks.append("Weight extracted correctly") | |
| preg = data.get("pregnancy", {}) | |
| if preg.get("gestational_weeks") and 28 <= preg["gestational_weeks"] <= 32: | |
| checks.append("Gestational weeks reasonable") | |
| for c in checks: | |
| print(f" PASS: {c}") | |
| except json.JSONDecodeError: | |
| print("\nOutput is not valid JSON — model may need more fine-tuning") | |
| def _count_non_null(d, count=0): | |
| if isinstance(d, dict): | |
| for v in d.values(): | |
| count = _count_non_null(v, count) | |
| elif isinstance(d, list): | |
| count += len(d) | |
| elif d is not None: | |
| count += 1 | |
| return count | |
| def main(): | |
| parser = argparse.ArgumentParser(description="MedScribe v2 — Ollama Export") | |
| parser.add_argument("--create", action="store_true", help="Create Ollama model") | |
| parser.add_argument("--test", action="store_true", help="Test model with sample") | |
| args = parser.parse_args() | |
| if not args.create and not args.test: | |
| print("Specify --create, --test, or both") | |
| sys.exit(1) | |
| if args.create: | |
| create_model() | |
| if args.test: | |
| test_model() | |
| if __name__ == "__main__": | |
| main() | |