Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import csv | |
| import json | |
| from dataclasses import asdict, dataclass | |
| from pathlib import Path | |
| from statistics import mean | |
| from typing import List, Optional | |
| try: | |
| from crisis_logistics_env.graders import EpisodeMetrics, grade_episode | |
| from crisis_logistics_env.models import CrisisLogisticsAction | |
| from crisis_logistics_env.server.crisis_logistics_env_environment import ( | |
| CrisisLogisticsEnvironment, | |
| choose_network_action, | |
| choose_resilient_action, | |
| ) | |
| from crisis_logistics_env.tasks import list_tasks | |
| except ImportError: | |
| from graders import EpisodeMetrics, grade_episode | |
| from models import CrisisLogisticsAction | |
| from server.crisis_logistics_env_environment import ( | |
| CrisisLogisticsEnvironment, | |
| choose_network_action, | |
| choose_resilient_action, | |
| ) | |
| from tasks import list_tasks | |
| class EpisodeSummary: | |
| task_id: str | |
| policy: str | |
| total_reward: float | |
| average_reward: float | |
| score: float | |
| bottlenecks: int | |
| retail_delivered: float | |
| sla_success_rate: float | |
| priority_service_rate: float | |
| average_pressure: float | |
| invalid_actions: int | |
| reward_curve: List[float] | |
| def run_policy(task_id: str, policy: str) -> EpisodeSummary: | |
| env = CrisisLogisticsEnvironment() | |
| observation = env.reset(task_id=task_id) | |
| round_robin_step = 0 | |
| reward_curve: List[float] = [] | |
| pressure_curve: List[float] = [] | |
| while not observation.done: | |
| if policy == "round_robin": | |
| action = CrisisLogisticsAction(target_hub=round_robin_step % 3) | |
| round_robin_step += 1 | |
| elif policy == "heuristic": | |
| action = choose_network_action(observation) | |
| elif policy == "resilient": | |
| action = choose_resilient_action(observation) | |
| else: | |
| raise ValueError(f"Unknown policy: {policy}") | |
| observation = env.step(action) | |
| reward_curve.append(float(observation.reward or 0.0)) | |
| pressure_curve.append(float(observation.dynamic_pressure)) | |
| metrics = EpisodeMetrics( | |
| total_reward=env.total_reward, | |
| average_reward=env.total_reward / max(env.step_count, 1), | |
| bottlenecks=env.bottlenecks, | |
| optimal_steps=env.optimal_steps, | |
| average_balance_gap=sum(env.balance_gap_history) / max(len(env.balance_gap_history), 1), | |
| throughput_served=env.throughput_served, | |
| steps_completed=env.step_count, | |
| retail_delivered=env.retail_delivered, | |
| sla_success_rate=env._sla_success_rate(), | |
| disruption_recovery_score=sum(env.recovery_history) / max(len(env.recovery_history), 1), | |
| invalid_actions=env.invalid_actions, | |
| ) | |
| score = grade_episode(env.task, metrics) | |
| return EpisodeSummary( | |
| task_id=task_id, | |
| policy=policy, | |
| total_reward=round(env.total_reward, 3), | |
| average_reward=round(metrics.average_reward, 3), | |
| score=score, | |
| bottlenecks=env.bottlenecks, | |
| retail_delivered=round(env.retail_delivered, 2), | |
| sla_success_rate=env._sla_success_rate(), | |
| priority_service_rate=env._priority_service_rate(), | |
| average_pressure=round(mean(pressure_curve) if pressure_curve else 0.0, 3), | |
| invalid_actions=env.invalid_actions, | |
| reward_curve=[round(v, 3) for v in reward_curve], | |
| ) | |
| def export_artifacts(summaries: List[EpisodeSummary]) -> tuple[Path, Path, Optional[Path]]: | |
| artifacts_dir = Path(__file__).resolve().parent / "artifacts" | |
| artifacts_dir.mkdir(parents=True, exist_ok=True) | |
| summary_path = artifacts_dir / "benchmark_summary.json" | |
| curves_path = artifacts_dir / "reward_curves.csv" | |
| per_policy: dict[str, dict[str, float]] = {} | |
| for policy in sorted({summary.policy for summary in summaries}): | |
| rows = [summary for summary in summaries if summary.policy == policy] | |
| per_policy[policy] = { | |
| "avg_score": round(mean([row.score for row in rows]), 3), | |
| "avg_reward": round(mean([row.average_reward for row in rows]), 3), | |
| "avg_sla_success_rate": round(mean([row.sla_success_rate for row in rows]), 3), | |
| "avg_priority_service_rate": round(mean([row.priority_service_rate for row in rows]), 3), | |
| "avg_invalid_actions": round(mean([row.invalid_actions for row in rows]), 3), | |
| } | |
| payload = { | |
| "policies": per_policy, | |
| "runs": [{k: v for k, v in asdict(summary).items() if k != "reward_curve"} for summary in summaries], | |
| } | |
| summary_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") | |
| with curves_path.open("w", newline="", encoding="utf-8") as handle: | |
| writer = csv.writer(handle) | |
| writer.writerow(["task_id", "policy", "step", "reward"]) | |
| for summary in summaries: | |
| for step, reward in enumerate(summary.reward_curve, start=1): | |
| writer.writerow([summary.task_id, summary.policy, step, reward]) | |
| plot_path: Optional[Path] = None | |
| try: | |
| import matplotlib.pyplot as plt | |
| plot_path = artifacts_dir / "reward_curves.png" | |
| plt.figure(figsize=(10, 5)) | |
| for policy in sorted({summary.policy for summary in summaries}): | |
| curves = [summary.reward_curve for summary in summaries if summary.policy == policy] | |
| max_len = max((len(curve) for curve in curves), default=0) | |
| if max_len == 0: | |
| continue | |
| mean_curve = [ | |
| mean([curve[step] for curve in curves if step < len(curve)]) | |
| for step in range(max_len) | |
| ] | |
| plt.plot(range(1, max_len + 1), mean_curve, linewidth=2, label=policy) | |
| plt.xlabel("Step") | |
| plt.ylabel("Reward") | |
| plt.title("LogiFlow-RL Baseline Reward Curves") | |
| plt.legend() | |
| plt.grid(alpha=0.25) | |
| plt.savefig(plot_path, dpi=160, bbox_inches="tight") | |
| plt.close() | |
| except Exception as exc: | |
| print(f"Warning: matplotlib plot unavailable ({exc}); trying PIL fallback.") | |
| try: | |
| from PIL import Image, ImageDraw | |
| plot_path = artifacts_dir / "reward_curves.png" | |
| width, height = 1100, 560 | |
| margin_left, margin_right = 70, 30 | |
| margin_top, margin_bottom = 40, 60 | |
| plot_w = width - margin_left - margin_right | |
| plot_h = height - margin_top - margin_bottom | |
| image = Image.new("RGB", (width, height), "white") | |
| draw = ImageDraw.Draw(image) | |
| per_policy_curves: dict[str, List[float]] = {} | |
| for policy in sorted({summary.policy for summary in summaries}): | |
| curves = [summary.reward_curve for summary in summaries if summary.policy == policy] | |
| max_len = max((len(curve) for curve in curves), default=0) | |
| if max_len == 0: | |
| continue | |
| per_policy_curves[policy] = [ | |
| mean([curve[step] for curve in curves if step < len(curve)]) | |
| for step in range(max_len) | |
| ] | |
| if per_policy_curves: | |
| all_values = [value for curve in per_policy_curves.values() for value in curve] | |
| y_min = min(all_values) | |
| y_max = max(all_values) | |
| if abs(y_max - y_min) < 1e-6: | |
| y_max = y_min + 1.0 | |
| def map_x(step: int, max_len: int) -> int: | |
| if max_len <= 1: | |
| return margin_left | |
| return int(margin_left + (step / (max_len - 1)) * plot_w) | |
| def map_y(value: float) -> int: | |
| return int(margin_top + (1 - (value - y_min) / (y_max - y_min)) * plot_h) | |
| draw.rectangle( | |
| [margin_left, margin_top, margin_left + plot_w, margin_top + plot_h], | |
| outline="#333333", | |
| width=1, | |
| ) | |
| grid_steps = 5 | |
| for i in range(grid_steps + 1): | |
| y = margin_top + int(i * plot_h / grid_steps) | |
| draw.line([(margin_left, y), (margin_left + plot_w, y)], fill="#E6E6E6", width=1) | |
| colors = { | |
| "round_robin": "#A84B4B", | |
| "heuristic": "#2E7D32", | |
| "resilient": "#1565C0", | |
| } | |
| legend_y = margin_top + 8 | |
| legend_x = margin_left + 8 | |
| for policy, curve in per_policy_curves.items(): | |
| color = colors.get(policy, "#444444") | |
| points = [ | |
| (map_x(step, len(curve)), map_y(value)) | |
| for step, value in enumerate(curve) | |
| ] | |
| if len(points) >= 2: | |
| draw.line(points, fill=color, width=3) | |
| elif len(points) == 1: | |
| x, y = points[0] | |
| draw.ellipse((x - 2, y - 2, x + 2, y + 2), fill=color) | |
| draw.rectangle((legend_x, legend_y, legend_x + 14, legend_y + 10), fill=color) | |
| draw.text((legend_x + 20, legend_y - 1), policy, fill="#111111") | |
| legend_y += 18 | |
| draw.text((margin_left, height - 45), "Step", fill="#111111") | |
| draw.text((10, margin_top), "Reward", fill="#111111") | |
| draw.text((margin_left, 10), "LogiFlow-RL Baseline Reward Curves", fill="#111111") | |
| image.save(plot_path) | |
| else: | |
| plot_path = None | |
| except Exception as fallback_exc: | |
| print(f"Warning: could not generate reward_curves.png with PIL fallback ({fallback_exc})") | |
| plot_path = None | |
| return summary_path, curves_path, plot_path | |
| def _print_table(summaries: List[EpisodeSummary]) -> None: | |
| print("task | policy | score | avg_reward | sla | priority | bottlenecks | invalid") | |
| print("--------+------------+-------+------------+------+----------+-------------+--------") | |
| for summary in summaries: | |
| print( | |
| f"{summary.task_id:7} | {summary.policy:10} | {summary.score:0.3f} | " | |
| f"{summary.average_reward:0.3f} | {summary.sla_success_rate:0.3f} | " | |
| f"{summary.priority_service_rate:0.3f} | {summary.bottlenecks:11} | {summary.invalid_actions:7}" | |
| ) | |
| def main() -> None: | |
| print("LogiFlow-RL Benchmarks (Hackathon Evidence)") | |
| print("-------------------------------------------") | |
| print("Note: this script benchmarks non-LLM baselines only.") | |
| print("Run train_grpo.py for GRPO LLM training and before/after model evaluation.\n") | |
| summaries: List[EpisodeSummary] = [] | |
| policies = ("round_robin", "heuristic", "resilient") | |
| for policy in policies: | |
| for task in list_tasks(): | |
| summary = run_policy(task.task_id, policy) | |
| summaries.append(summary) | |
| _print_table(summaries) | |
| summary_path, curves_path, plot_path = export_artifacts(summaries) | |
| print("\nArtifacts") | |
| print(f"- Summary JSON: {summary_path}") | |
| print(f"- Reward curves: {curves_path}") | |
| if plot_path: | |
| print(f"- Reward curves plot: {plot_path}") | |
| baseline_scores = [row.score for row in summaries if row.policy == "round_robin"] | |
| resilient_scores = [row.score for row in summaries if row.policy == "resilient"] | |
| if baseline_scores and resilient_scores: | |
| baseline_avg = mean(baseline_scores) | |
| resilient_avg = mean(resilient_scores) | |
| delta = resilient_avg - baseline_avg | |
| pct = (delta / baseline_avg * 100.0) if baseline_avg > 0 else 0.0 | |
| print( | |
| f"\nResilient policy improvement vs round_robin: " | |
| f"{delta:+0.3f} score points ({pct:+0.1f}%)." | |
| ) | |
| if __name__ == "__main__": | |
| main() | |