| |
| |
| """ |
| Script to validate that experimental results meet the acceptance criteria |
| specified in make_it_real.md |
| """ |
|
|
| import json |
| import csv |
| import argparse |
| from pathlib import Path |
|
|
| def validate_quantum_criteria(csv_file): |
| """ |
| Validate quantum acceptance criteria: |
| - Quantum (hardware): n=5, m=1 → p_success ≥ 0.55 at k=k* with ≥2000 shots |
| - Simulator: clear peak near k* with p_success ≥ 0.90 |
| """ |
| results = {"passed": False, "details": {}} |
| |
| try: |
| with open(csv_file, 'r') as f: |
| reader = csv.DictReader(f) |
| rows = list(reader) |
| |
| |
| k_star = int(rows[0]['k_opt']) if rows else None |
| max_p = max(float(row['p_success']) for row in rows) |
| optimal_row = max(rows, key=lambda r: float(r['p_success'])) |
| backend = rows[0]['backend'] if rows else None |
| shots = int(rows[0]['shots']) if rows else 0 |
| |
| results["details"] = { |
| "backend": backend, |
| "k_star": k_star, |
| "max_p_success": max_p, |
| "optimal_k": int(optimal_row['k']), |
| "shots": shots |
| } |
| |
| if backend == "aer": |
| |
| results["passed"] = max_p >= 0.90 |
| results["criteria"] = "Simulator: p_success ≥ 0.90" |
| else: |
| |
| results["passed"] = max_p >= 0.55 and shots >= 2000 |
| results["criteria"] = "Hardware: p_success ≥ 0.55 with ≥2000 shots" |
| |
| except Exception as e: |
| results["error"] = str(e) |
| |
| return results |
|
|
| def validate_energy_criteria(baseline_file, quantized_file): |
| """ |
| Validate energy/compression criteria: |
| - ≥ 40% reduction in J per 1M tokens |
| - ≤ 3% quality drift (PPL/accuracy) |
| - P95 latency ≥ 20% better |
| - ≥ 4× storage reduction |
| """ |
| results = {"passed": False, "details": {}} |
| |
| try: |
| with open(baseline_file, 'r') as f: |
| baseline = json.load(f) |
| with open(quantized_file, 'r') as f: |
| quantized = json.load(f) |
| |
| |
| energy_reduction = (baseline["J_per_1M_tokens"] - quantized["J_per_1M_tokens"]) / baseline["J_per_1M_tokens"] |
| latency_improvement = (baseline["latency_ms_p95"] - quantized["latency_ms_p95"]) / baseline["latency_ms_p95"] |
| size_reduction = baseline["size_bytes"] / quantized["size_bytes"] |
| |
| results["details"] = { |
| "energy_reduction_pct": energy_reduction * 100, |
| "latency_improvement_pct": latency_improvement * 100, |
| "size_reduction_factor": size_reduction, |
| "baseline_J_per_1M": baseline["J_per_1M_tokens"], |
| "quantized_J_per_1M": quantized["J_per_1M_tokens"], |
| "baseline_latency_p95": baseline["latency_ms_p95"], |
| "quantized_latency_p95": quantized["latency_ms_p95"] |
| } |
| |
| |
| energy_ok = energy_reduction >= 0.40 |
| latency_ok = latency_improvement >= 0.20 |
| size_ok = size_reduction >= 4.0 |
| |
| results["passed"] = energy_ok and latency_ok and size_ok |
| results["criteria_met"] = { |
| "energy_reduction_40pct": energy_ok, |
| "latency_improvement_20pct": latency_ok, |
| "size_reduction_4x": size_ok |
| } |
| |
| except Exception as e: |
| results["error"] = str(e) |
| |
| return results |
|
|
| def validate_training_criteria(sgd_evo_file): |
| """ |
| Validate training cost criteria: |
| - Publish cost-to-quality curves (kJ & time) for SGD vs Evolution |
| """ |
| results = {"passed": False, "details": {}} |
| |
| try: |
| with open(sgd_evo_file, 'r') as f: |
| data = json.load(f) |
| |
| sgd = data["sgd"] |
| evo = data["evo"] |
| |
| |
| acc_diff = abs(sgd["acc"] - evo["acc"]) |
| |
| results["details"] = { |
| "sgd_accuracy": sgd["acc"], |
| "evo_accuracy": evo["acc"], |
| "accuracy_difference": acc_diff, |
| "sgd_energy_kJ": sgd.get("energy_J", 0) / 1000 if sgd.get("energy_J") else None, |
| "evo_energy_kJ": evo.get("energy_J", 0) / 1000 if evo.get("energy_J") else None, |
| "sgd_time_s": sgd["wall_s"], |
| "evo_time_s": evo["wall_s"] |
| } |
| |
| |
| results["passed"] = sgd["acc"] > 0 and evo["acc"] > 0 and acc_diff < 0.1 |
| |
| except Exception as e: |
| results["error"] = str(e) |
| |
| return results |
|
|
| def main(): |
| parser = argparse.ArgumentParser(description='Validate Phase 4 acceptance criteria') |
| parser.add_argument('--quantum_csv', help='Path to quantum results CSV') |
| parser.add_argument('--baseline_json', help='Path to baseline energy JSON') |
| parser.add_argument('--quantized_json', help='Path to quantized energy JSON') |
| parser.add_argument('--sgd_evo_json', help='Path to SGD vs Evolution JSON') |
| parser.add_argument('--all', action='store_true', help='Test all criteria with default paths') |
| |
| args = parser.parse_args() |
| |
| results = {} |
| |
| if args.all or args.quantum_csv: |
| csv_path = args.quantum_csv or "quantum/qiskit/results/sample_grover_qiskit_results.csv" |
| print(f"\n=== QUANTUM CRITERIA ===") |
| print(f"Testing: {csv_path}") |
| quantum_results = validate_quantum_criteria(csv_path) |
| results["quantum"] = quantum_results |
| print(f"PASSED: {quantum_results['passed']}") |
| print(f"Details: {json.dumps(quantum_results['details'], indent=2)}") |
| |
| if args.all or (args.baseline_json and args.quantized_json): |
| baseline_path = args.baseline_json or "phase4_outputs/llm_eval_baseline.json" |
| quantized_path = args.quantized_json or "phase4_outputs/llm_eval_post_quant.json" |
| print(f"\n=== ENERGY/COMPRESSION CRITERIA ===") |
| print(f"Testing: {baseline_path} vs {quantized_path}") |
| energy_results = validate_energy_criteria(baseline_path, quantized_path) |
| results["energy"] = energy_results |
| print(f"PASSED: {energy_results['passed']}") |
| print(f"Details: {json.dumps(energy_results['details'], indent=2)}") |
| if 'criteria_met' in energy_results: |
| print(f"Criteria met: {json.dumps(energy_results['criteria_met'], indent=2)}") |
| |
| if args.all or args.sgd_evo_json: |
| sgd_evo_path = args.sgd_evo_json or "phase4_outputs/sgd_vs_evo.json" |
| print(f"\n=== TRAINING COST CRITERIA ===") |
| print(f"Testing: {sgd_evo_path}") |
| training_results = validate_training_criteria(sgd_evo_path) |
| results["training"] = training_results |
| print(f"PASSED: {training_results['passed']}") |
| print(f"Details: {json.dumps(training_results['details'], indent=2)}") |
| |
| |
| print(f"\n=== OVERALL SUMMARY ===") |
| passed_count = sum(1 for r in results.values() if r['passed']) |
| total_count = len(results) |
| print(f"Passed: {passed_count}/{total_count} criteria") |
| |
| all_passed = all(r['passed'] for r in results.values()) |
| print(f"ALL CRITERIA MET: {all_passed}") |
| |
| return 0 if all_passed else 1 |
|
|
| if __name__ == '__main__': |
| exit(main()) |