| """Benchmark: Python vs Rust address converter.""" |
|
|
| import json |
| import subprocess |
| import time |
| from pathlib import Path |
|
|
| DATA_PATH = Path(__file__).parent / "data" / "mapping.json" |
| RUST_BIN = Path(__file__).parent / "rust" / "target" / "release" / "address-converter" |
|
|
|
|
| def load_test_addresses(n: int | None = None) -> list[str]: |
| """Generate test addresses from mapping.json ward records.""" |
| with open(DATA_PATH, encoding="utf-8") as f: |
| data = json.load(f) |
|
|
| addresses = [] |
| for rec in data["ward_mapping"]: |
| addr = f"{rec['old_ward']}, {rec['old_district']}, {rec['old_province']}" |
| addresses.append(addr) |
|
|
| if n is not None: |
| addresses = addresses[:n] |
| return addresses |
|
|
|
|
| def bench_python(addresses: list[str]) -> tuple[list[str], float]: |
| """Benchmark Python implementation, return (results, elapsed_seconds).""" |
| from src.converter import convert_address |
|
|
| |
| convert_address(addresses[0]) |
|
|
| start = time.perf_counter() |
| results = [] |
| for addr in addresses: |
| r = convert_address(addr) |
| results.append(r.converted) |
| elapsed = time.perf_counter() - start |
| return results, elapsed |
|
|
|
|
| def bench_rust(addresses: list[str]) -> tuple[list[str], float]: |
| """Benchmark Rust implementation, return (results, elapsed_seconds).""" |
| if not RUST_BIN.exists(): |
| raise FileNotFoundError( |
| f"Rust binary not found at {RUST_BIN}. Run: cd rust && cargo build --release" |
| ) |
|
|
| input_data = "\n".join(addresses) + "\n" |
| env = {"MAPPING_JSON": str(DATA_PATH)} |
|
|
| proc = subprocess.run( |
| [str(RUST_BIN), "bench"], |
| input=input_data, |
| capture_output=True, |
| text=True, |
| env=env, |
| ) |
|
|
| if proc.returncode != 0: |
| raise RuntimeError(f"Rust bench failed: {proc.stderr}") |
|
|
| results = proc.stdout.strip().split("\n") if proc.stdout.strip() else [] |
|
|
| |
| elapsed = 0.0 |
| for line in proc.stderr.strip().split("\n"): |
| if line.startswith("BENCH:"): |
| |
| parts = line.split() |
| elapsed = float(parts[4]) |
| break |
|
|
| return results, elapsed |
|
|
|
|
| def verify_correctness(py_results: list[str], rs_results: list[str], addresses: list[str]): |
| """Check that Rust output matches Python output.""" |
| mismatches = 0 |
| for i, (py, rs) in enumerate(zip(py_results, rs_results)): |
| if py != rs: |
| mismatches += 1 |
| if mismatches <= 10: |
| print(f" MISMATCH [{i}] input: {addresses[i]}") |
| print(f" Python: {py}") |
| print(f" Rust: {rs}") |
| total = len(py_results) |
| match = total - mismatches |
| print(f"\nCorrectness: {match}/{total} match ({100*match/total:.1f}%)") |
| if mismatches > 10: |
| print(f" ... and {mismatches - 10} more mismatches") |
| return mismatches == 0 |
|
|
|
|
| def main(): |
| print("Loading test addresses from mapping.json ...") |
| addresses = load_test_addresses() |
| n = len(addresses) |
| print(f" {n} addresses loaded\n") |
|
|
| |
| print("Running Python benchmark ...") |
| py_results, py_time = bench_python(addresses) |
| print(f" Python: {py_time:.4f} s ({py_time/n*1e6:.1f} us/addr)\n") |
|
|
| |
| print("Running Rust benchmark ...") |
| rs_results, rs_time = bench_rust(addresses) |
| print(f" Rust: {rs_time:.4f} s ({rs_time/n*1e6:.1f} us/addr)\n") |
|
|
| |
| print("Verifying correctness ...") |
| verify_correctness(py_results, rs_results, addresses) |
|
|
| |
| speedup = py_time / rs_time if rs_time > 0 else float("inf") |
| print(f"\n{'='*55}") |
| print(f" {'':20s} {'Total (s)':>10s} {'Per-addr (us)':>14s}") |
| print(f" {'Python':20s} {py_time:>10.4f} {py_time/n*1e6:>14.1f}") |
| print(f" {'Rust':20s} {rs_time:>10.4f} {rs_time/n*1e6:>14.1f}") |
| print(f" {'Speedup':20s} {speedup:>10.1f}x") |
| print(f"{'='*55}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|