Upload benchmark.py
Browse files- benchmark.py +2 -2
benchmark.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
SmolOmni-MLA Benchmark Suite
|
| 3 |
Measures: VRAM usage, KV cache size, throughput, generation quality
|
| 4 |
Compares against SmolVLM baseline.
|
| 5 |
"""
|
|
@@ -115,7 +115,7 @@ def benchmark_vram(model, batch_size: int = 1, seq_len: int = 512):
|
|
| 115 |
def run_all_benchmarks(model_variant: str = "256M"):
|
| 116 |
"""Run full benchmark suite."""
|
| 117 |
print(f"="*70)
|
| 118 |
-
print(f"SmolOmni-MLA Benchmark: {model_variant}")
|
| 119 |
print(f"="*70)
|
| 120 |
|
| 121 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 1 |
"""
|
| 2 |
+
Tinman-SmolOmni-MLA Benchmark Suite
|
| 3 |
Measures: VRAM usage, KV cache size, throughput, generation quality
|
| 4 |
Compares against SmolVLM baseline.
|
| 5 |
"""
|
|
|
|
| 115 |
def run_all_benchmarks(model_variant: str = "256M"):
|
| 116 |
"""Run full benchmark suite."""
|
| 117 |
print(f"="*70)
|
| 118 |
+
print(f"Tinman-SmolOmni-MLA Benchmark: {model_variant}")
|
| 119 |
print(f"="*70)
|
| 120 |
|
| 121 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|