Add production readiness test suite — validates all 6 capabilities end-to-end

Browse files

Files changed (1) hide show

test_production_readiness.py +212 -0

test_production_readiness.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""
+Tinman-SmolOmni-MLA: Production Readiness Test Suite
+======================================================
+Tests everything a new user needs to verify:
+1. pip install works
+2. Load checkpoint from HuggingFace Hub
+3. Text understanding inference
+4. Image generation pipeline
+5. Moonshine audio integration
+6. KV cache verification
+Run:
+    python test_production_readiness.py
+Requires:
+    pip install git+https://huggingface.co/TinmanLabSL/SmolOmni-MLA-Toolkit
+    pip install transformers pillow soundfile librosa
+"""
+import torch
+import warnings
+import sys
+# Global imports
+try:
+    import smolomni
+    from smolomni import SmolOmni, get_model_config
+    from smolomni.config import SmolOmniConfig
+    IMPORTS_OK = True
+except Exception as e:
+    IMPORTS_OK = False
+    IMPORT_ERROR = e
+    SmolOmni = None
+def main():
+    print("=" * 60)
+    print("Tinman-SmolOmni-MLA: Production Readiness Test Suite")
+    print("=" * 60)
+    print(f"PyTorch: {torch.__version__}")
+    print(f"CUDA available: {torch.cuda.is_available()}")
+    results = {}
+    # Test 1: Imports
+    print("\n" + "=" * 60)
+    print("TEST 1: Package Import")
+    print("=" * 60)
+    if IMPORTS_OK:
+        print(f"  ✅ Package: smolomni v{smolomni.__version__}")
+        print(f"  ✅ get_model_config: {get_model_config('mla-hybrid-ar-flow-500M').hidden_size} hidden")
+        results['imports'] = True
+    else:
+        print(f"  ❌ FAILED: {IMPORT_ERROR}")
+        results['imports'] = False
+        return results
+    # Test 2: Load 500M checkpoint
+    print("\n" + "=" * 60)
+    print("TEST 2: Load 500M Checkpoint from Hub")
+    print("=" * 60)
+    print("  Downloading 1.1GB checkpoint... (may take 2-3 minutes)")
+    try:
+        model = SmolOmni.from_hub(
+            'TinmanLabSL/SmolOmni-MLA-500M',
+            checkpoint='stage2_final/model.pt',
+            config='mla-hybrid-ar-flow-500M',
+            device='cpu',
+            dtype=torch.float32,
+            strict=False,
+        )
+        n_params = sum(p.numel() for p in model.parameters())
+        print(f"  ✅ Model loaded: {n_params/1e6:.1f}M parameters")
+        print(f"  ✅ Config variant: {model.config.model_variant}")
+        print(f"  ✅ Layers: {model.config.num_hidden_layers}")
+        gqa_count = sum(1 for l in model.layers if not l.is_mla)
+        mla_count = sum(1 for l in model.layers if l.is_mla)
+        print(f"  ✅ GQA layers: {gqa_count}, MLA layers: {mla_count}")
+        results['load_checkpoint'] = True
+    except Exception as e:
+        print(f"  ❌ FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        results['load_checkpoint'] = False
+        return results
+    # Test 3: Text understanding
+    print("\n" + "=" * 60)
+    print("TEST 3: Text Understanding Inference")
+    print("=" * 60)
+    try:
+        from transformers import AutoTokenizer
+        tokenizer = AutoTokenizer.from_pretrained('HuggingFaceTB/SmolVLM-500M-Instruct')
+        prompt = "The capital of France is"
+        inputs = tokenizer(prompt, return_tensors='pt')
+        with torch.no_grad():
+            result = model.forward_understanding(input_ids=inputs['input_ids'])
+            logits = result['logits']
+        next_token = logits[0, -1, :].argmax()
+        prediction = tokenizer.decode([next_token])
+        print(f"  ✅ Input: '{prompt}'")
+        print(f"  ✅ Logits shape: {logits.shape}")
+        print(f"  ✅ Next token: '{prediction}'")
+        results['text_understanding'] = True
+    except Exception as e:
+        print(f"  ❌ FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        results['text_understanding'] = False
+    # Test 4: Image generation
+    print("\n" + "=" * 60)
+    print("TEST 4: Image Generation Pipeline")
+    print("=" * 60)
+    try:
+        from transformers import AutoTokenizer
+        tokenizer = AutoTokenizer.from_pretrained('HuggingFaceTB/SmolVLM-500M-Instruct')
+        prompt = "a red apple"
+        inputs = tokenizer(prompt, return_tensors='pt')
+        with torch.no_grad():
+            latents = model.generate_image(
+                input_ids=inputs['input_ids'],
+                num_steps=5,
+                latent_shape=(1, 4, 32, 32),
+            )
+        print(f"  ✅ Prompt: '{prompt}'")
+        print(f"  ✅ Latents shape: {latents.shape}")
+        print(f"  ✅ Latents mean: {latents.mean().item():.4f}, std: {latents.std().item():.4f}")
+        print(f"  ⚠️  Run through VAE decoder for actual image")
+        results['image_generation'] = True
+    except Exception as e:
+        print(f"  ❌ FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        results['image_generation'] = False
+    # Test 5: KV cache
+    print("\n" + "=" * 60)
+    print("TEST 5: KV Cache Verification")
+    print("=" * 60)
+    try:
+        kv = model.kv_cache_info()
+        print(f"  ✅ Original GQA: {kv['original_gqa']} floats/token")
+        print(f"  ✅ Hybrid cache: {kv['hybrid']} floats/token")
+        print(f"  ✅ Reduction: {kv['hybrid_reduction_pct']}%")
+        results['kv_cache'] = True
+    except Exception as e:
+        print(f"  ❌ FAILED: {e}")
+        results['kv_cache'] = False
+    # Test 6: Moonshine audio
+    print("\n" + "=" * 60)
+    print("TEST 6: Moonshine Audio Integration")
+    print("=" * 60)
+    try:
+        import numpy as np
+        from moonshine_integration import SmolOmniAudio
+        audio_model = SmolOmniAudio(device='cpu')
+        sr = 16000
+        t = np.linspace(0, 1, sr)
+        audio = 0.3 * np.sin(2 * np.pi * 440 * t).astype(np.float32)
+        result = audio_model.transcribe(audio)
+        print(f"  ✅ ASR model: {audio_model.asr_params:.1f}M params")
+        print(f"  ✅ Transcription: '{result}'")
+        chat = audio_model.chat(audio=audio, question="What is this?")
+        print(f"  ✅ Chat pipeline: {len(chat['full_prompt'])} chars")
+        results['moonshine_audio'] = True
+    except Exception as e:
+        print(f"  ❌ FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        results['moonshine_audio'] = False
+    # Summary
+    print("\n" + "=" * 60)
+    print("SUMMARY")
+    print("=" * 60)
+    passed = sum(1 for v in results.values() if v)
+    total = len(results)
+    for test_name, passed_test in results.items():
+        status = "✅ PASS" if passed_test else "❌ FAIL"
+        print(f"  {test_name:25s} {status}")
+    print(f"\n{passed}/{total} tests passed")
+    if passed == total:
+        print("\n🎉 Production ready!")
+    elif passed >= 4:
+        print("\n⚠️  Mostly ready — investigate failures")
+    else:
+        print("\n❌ Not ready — multiple critical failures")
+    return results
+if __name__ == "__main__":
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        results = main()
+        sys.exit(0 if all(results.values()) else 1)