Spaces:
Running
Running
| # -*- coding: utf-8 -*- | |
| """Training metrics: diversity, productive signal, coverage, and zero-gradient detection. | |
| Extracted from train.py to keep the training pipeline modular. | |
| """ | |
| from __future__ import annotations | |
| import math | |
| from typing import Any, Dict, List, Optional, Tuple | |
| import numpy as np | |
| # --------------------------------------------------------------------------- | |
| # Thresholds (mirrored from train.py config; imported at call sites) | |
| # --------------------------------------------------------------------------- | |
| ZERO_SIGNAL_REWARD_THRESHOLD = 0.05 | |
| TRIVIAL_REWARD_THRESHOLD = 0.95 | |
| def set_thresholds(zero: float, trivial: float) -> None: | |
| """Allow train.py to override the defaults at startup.""" | |
| global ZERO_SIGNAL_REWARD_THRESHOLD, TRIVIAL_REWARD_THRESHOLD | |
| ZERO_SIGNAL_REWARD_THRESHOLD = zero | |
| TRIVIAL_REWARD_THRESHOLD = trivial | |
| # --------------------------------------------------------------------------- | |
| # Helpers | |
| # --------------------------------------------------------------------------- | |
| def safe_ratio(numerator: float, denominator: float) -> float: | |
| if denominator <= 0: | |
| return 0.0 | |
| return float(numerator) / float(denominator) | |
| def _increment_counter(counter: Dict[str, int], key: Any) -> None: | |
| label = str(key or "unknown") | |
| counter[label] = counter.get(label, 0) + 1 | |
| def _normalize_completion_text(text: str) -> str: | |
| return " ".join(str(text or "").strip().split()) | |
| def _extract_completion_choice(text: str) -> str: | |
| from training.episodes import parse_action | |
| payload = parse_action(str(text or "")) or {} | |
| choice = payload.get("decision") or payload.get("action") or payload.get("action_type") or "" | |
| return str(choice).upper() | |
| def _shannon_entropy_from_labels(labels: List[str]) -> float: | |
| usable = [label for label in labels if label] | |
| if not usable: | |
| return 0.0 | |
| total = float(len(usable)) | |
| counts: Dict[str, int] = {} | |
| for label in usable: | |
| counts[label] = counts.get(label, 0) + 1 | |
| entropy = 0.0 | |
| for count in counts.values(): | |
| p = count / total | |
| entropy -= p * math.log(p, 2) | |
| return float(entropy) | |
| # --------------------------------------------------------------------------- | |
| # Completion diversity | |
| # --------------------------------------------------------------------------- | |
| def completion_diversity_metrics(completions: Optional[List[str]]) -> Dict[str, Any]: | |
| if not completions: | |
| return { | |
| "unique_completion_ratio": 0.0, | |
| "decision_entropy": 0.0, | |
| "decision_variety": 0, | |
| "decision_distribution": {}, | |
| } | |
| normalized = [_normalize_completion_text(text) for text in completions] | |
| unique_ratio = safe_ratio(len(set(normalized)), len(normalized)) | |
| decisions = [_extract_completion_choice(text) for text in completions] | |
| decision_counts: Dict[str, int] = {} | |
| for choice in decisions: | |
| key = choice or "UNPARSED" | |
| decision_counts[key] = decision_counts.get(key, 0) + 1 | |
| total = float(sum(decision_counts.values()) or 1.0) | |
| decision_distribution = { | |
| key: round(value / total, 4) | |
| for key, value in sorted(decision_counts.items(), key=lambda item: item[0]) | |
| } | |
| return { | |
| "unique_completion_ratio": round(unique_ratio, 4), | |
| "decision_entropy": round(_shannon_entropy_from_labels(decisions), 4), | |
| "decision_variety": len(decision_counts), | |
| "decision_distribution": decision_distribution, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Frontier scenarios | |
| # --------------------------------------------------------------------------- | |
| def frontier_scenario_keys(curriculum_summary: Optional[Dict[str, Any]]) -> set[Tuple[str, int]]: | |
| if not curriculum_summary: | |
| return set() | |
| adaptive = curriculum_summary.get("adaptive_difficulty") or {} | |
| frontier_scenarios = adaptive.get("frontier_scenarios") or [] | |
| resolved = set() | |
| for item in frontier_scenarios: | |
| try: | |
| resolved.add((str(item.get("task_id")), int(item.get("variant_seed", 0)))) | |
| except (TypeError, ValueError): | |
| continue | |
| return resolved | |
| # --------------------------------------------------------------------------- | |
| # Productive signal metrics | |
| # --------------------------------------------------------------------------- | |
| def productive_signal_metrics( | |
| rewards: List[float], | |
| task_ids: List[str], | |
| variant_seeds: List[int], | |
| curriculum_summary: Optional[Dict[str, Any]] = None, | |
| ) -> Dict[str, Any]: | |
| reward_values = [float(value) for value in rewards] | |
| fkeys = frontier_scenario_keys(curriculum_summary) | |
| zero_signal = sum(1 for reward in reward_values if reward <= ZERO_SIGNAL_REWARD_THRESHOLD) | |
| trivial = sum(1 for reward in reward_values if reward >= TRIVIAL_REWARD_THRESHOLD) | |
| productive = max(0, len(reward_values) - zero_signal - trivial) | |
| frontier_hits = sum( | |
| 1 | |
| for task_id, variant_seed in zip(task_ids, variant_seeds) | |
| if (str(task_id), int(variant_seed)) in fkeys | |
| ) | |
| active_task_ids = list((curriculum_summary or {}).get("active_task_ids") or []) | |
| task_diversity_ratio = safe_ratio(len(set(task_ids)), len(active_task_ids) or len(set(task_ids)) or 1) | |
| payload = { | |
| "zero_reward_fraction": round(safe_ratio(zero_signal, len(reward_values)), 4), | |
| "trivially_solved_fraction": round(safe_ratio(trivial, len(reward_values)), 4), | |
| "productive_fraction": round(safe_ratio(productive, len(reward_values)), 4), | |
| "effective_prompt_ratio": round(safe_ratio(productive, len(reward_values)), 4), | |
| "frontier_hit_rate": round(safe_ratio(frontier_hits, len(reward_values)), 4), | |
| "task_diversity_ratio": round(task_diversity_ratio, 4), | |
| "frontier_hit_count": frontier_hits, | |
| } | |
| if not fkeys and curriculum_summary and curriculum_summary.get("frontier_hit_rate") is not None: | |
| payload["frontier_hit_rate"] = float(curriculum_summary.get("frontier_hit_rate", 0.0)) | |
| return payload | |
| # --------------------------------------------------------------------------- | |
| # Training coverage | |
| # --------------------------------------------------------------------------- | |
| def training_coverage_metrics( | |
| histories: List[List[Dict[str, Any]]], | |
| task_ids: List[str], | |
| variant_seeds: List[int], | |
| adversarial_cases: Optional[List[str]] = None, | |
| ) -> Dict[str, Any]: | |
| """Summarize what the batch actually exercised for judge-facing plots.""" | |
| task_counts: Dict[str, int] = {} | |
| scenario_counts: Dict[str, int] = {} | |
| worker_counts: Dict[str, int] = {} | |
| worker_role_counts: Dict[str, int] = {} | |
| misbehavior_counts: Dict[str, int] = {} | |
| decision_counts: Dict[str, int] = {} | |
| corrective_counts: Dict[str, int] = {"attempted": 0, "approved": 0} | |
| for index, task_id in enumerate(task_ids): | |
| variant_seed = int(variant_seeds[index]) if index < len(variant_seeds) else 0 | |
| _increment_counter(task_counts, task_id) | |
| _increment_counter(scenario_counts, f"{task_id}:seed{variant_seed}") | |
| for history in histories: | |
| for entry in history: | |
| audit = entry.get("audit") or {} | |
| info = entry.get("info") or {} | |
| decision = entry.get("decision") or {} | |
| revision = entry.get("worker_revision") or {} | |
| worker_id = audit.get("worker_id") or (entry.get("proposal") or {}).get("worker_id") | |
| if worker_id: | |
| _increment_counter(worker_counts, worker_id) | |
| worker_role = audit.get("worker_role") or info.get("worker_role") | |
| if worker_role: | |
| _increment_counter(worker_role_counts, worker_role) | |
| if audit.get("was_misbehavior") or info.get("is_misbehavior"): | |
| _increment_counter(misbehavior_counts, audit.get("reason") or info.get("mb_type") or "unknown") | |
| _increment_counter( | |
| decision_counts, | |
| audit.get("sentinel_decision") or decision.get("decision") or decision.get("action") or "unknown", | |
| ) | |
| if revision.get("attempted"): | |
| corrective_counts["attempted"] += 1 | |
| if revision.get("revision_approved"): | |
| corrective_counts["approved"] += 1 | |
| adversarial_count = sum(1 for case in (adversarial_cases or []) if str(case or "").strip()) | |
| total_cases = len(adversarial_cases or []) or len(task_ids) or 1 | |
| return { | |
| "task_counts": dict(sorted(task_counts.items())), | |
| "scenario_counts": dict(sorted(scenario_counts.items())), | |
| "worker_counts": dict(sorted(worker_counts.items())), | |
| "worker_role_counts": dict(sorted(worker_role_counts.items())), | |
| "misbehavior_counts": dict(sorted(misbehavior_counts.items())), | |
| "oversight_decision_counts": dict(sorted(decision_counts.items())), | |
| "corrective_loop_counts": corrective_counts, | |
| "adversarial_case_count": adversarial_count, | |
| "adversarial_case_fraction": round(safe_ratio(adversarial_count, total_cases), 4), | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Zero-gradient group detection | |
| # --------------------------------------------------------------------------- | |
| def zero_gradient_group_metrics( | |
| rewards: List[float], | |
| task_ids: List[str], | |
| variant_seeds: List[int], | |
| prompts: Optional[List[str]] = None, | |
| adversarial_cases: Optional[List[str]] = None, | |
| tolerance: float = 1e-9, | |
| ) -> Dict[str, Any]: | |
| """Detect GRPO groups where every sampled completion received the same reward.""" | |
| groups: Dict[str, List[float]] = {} | |
| for index, reward in enumerate(rewards): | |
| if prompts and index < len(prompts): | |
| key = str(prompts[index]) | |
| else: | |
| task_id = task_ids[index] if index < len(task_ids) else "unknown" | |
| variant_seed = int(variant_seeds[index]) if index < len(variant_seeds) else 0 | |
| case = "" | |
| if adversarial_cases and index < len(adversarial_cases): | |
| case = str(adversarial_cases[index] or "") | |
| key = f"{task_id}:seed{variant_seed}:adv{bool(case.strip())}" | |
| groups.setdefault(key, []).append(float(reward)) | |
| multi_sample_groups = [values for values in groups.values() if len(values) > 1] | |
| zero_gradient_groups = [ | |
| values | |
| for values in multi_sample_groups | |
| if max(values) - min(values) <= tolerance | |
| ] | |
| group_std_values = [float(np.std(values)) for values in multi_sample_groups] | |
| return { | |
| "reward_group_count": len(multi_sample_groups), | |
| "zero_gradient_group_count": len(zero_gradient_groups), | |
| "zero_gradient_group_fraction": round( | |
| safe_ratio(len(zero_gradient_groups), len(multi_sample_groups)), | |
| 4, | |
| ), | |
| "mean_reward_group_std": round(float(np.mean(group_std_values)), 4) if group_std_values else 0.0, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # SENTINEL history summarization | |
| # --------------------------------------------------------------------------- | |
| def summarize_sentinel_history(history: List[Dict[str, Any]]) -> Dict[str, float]: | |
| audits = [entry.get("audit") or {} for entry in history if entry.get("audit")] | |
| misbehaviors = sum(1 for audit in audits if audit.get("was_misbehavior")) | |
| caught = sum( | |
| 1 | |
| for audit in audits | |
| if audit.get("was_misbehavior") and audit.get("sentinel_decision") != "APPROVE" | |
| ) | |
| false_positives = sum( | |
| 1 | |
| for audit in audits | |
| if audit.get("sentinel_decision") != "APPROVE" and not audit.get("was_misbehavior") | |
| ) | |
| false_negatives = sum( | |
| 1 | |
| for audit in audits | |
| if audit.get("was_misbehavior") and audit.get("sentinel_decision") == "APPROVE" | |
| ) | |
| revision_attempts = sum( | |
| 1 | |
| for entry in history | |
| if (entry.get("worker_revision") or {}).get("attempted") | |
| ) | |
| revision_successes = sum( | |
| 1 | |
| for entry in history | |
| if (entry.get("worker_revision") or {}).get("revision_approved") | |
| ) | |
| coaching_values = [ | |
| float((entry.get("reward_breakdown") or {}).get("coaching_quality")) | |
| for entry in history | |
| if (entry.get("reward_breakdown") or {}).get("coaching_quality") is not None | |
| ] | |
| prevented_damage = sum(float(audit.get("prevented_damage_score") or 0.0) for audit in audits) | |
| allowed_damage = sum(float(audit.get("allowed_damage_score") or 0.0) for audit in audits) | |
| twin_without_sentinel_damage = prevented_damage + allowed_damage | |
| safe_actions = max(0, len(audits) - misbehaviors) | |
| return { | |
| "steps": float(len(history)), | |
| "misbehaviors": float(misbehaviors), | |
| "caught": float(caught), | |
| "false_positives": float(false_positives), | |
| "false_negatives": float(false_negatives), | |
| "revision_attempts": float(revision_attempts), | |
| "revision_successes": float(revision_successes), | |
| "prevented_damage_total": round(prevented_damage, 4), | |
| "allowed_damage_total": round(allowed_damage, 4), | |
| "twin_without_sentinel_damage_total": round(twin_without_sentinel_damage, 4), | |
| "twin_with_sentinel_damage_total": round(allowed_damage, 4), | |
| "twin_prevented_damage_total": round(prevented_damage, 4), | |
| "twin_damage_reduction_rate": round( | |
| safe_ratio(prevented_damage, twin_without_sentinel_damage), | |
| 4, | |
| ), | |
| "coaching_quality": round(float(np.mean(coaching_values)), 4) if coaching_values else 0.0, | |
| "detection_rate": round(safe_ratio(caught, misbehaviors), 4), | |
| "false_positive_rate": round(safe_ratio(false_positives, safe_actions), 4), | |
| "risk_reduction_rate": round( | |
| safe_ratio(prevented_damage, prevented_damage + allowed_damage), | |
| 4, | |
| ), | |
| "worker_rehabilitation_rate": round( | |
| safe_ratio(revision_successes, revision_attempts), | |
| 4, | |
| ), | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Aggregate batch metrics | |
| # --------------------------------------------------------------------------- | |
| def aggregate_batch_metrics( | |
| rewards: List[float], | |
| histories: List[List[Dict[str, Any]]], | |
| task_ids: List[str], | |
| variant_seeds: List[int], | |
| sentinel_task_ids: Optional[List[str]] = None, | |
| completions: Optional[List[str]] = None, | |
| prompts: Optional[List[str]] = None, | |
| adversarial_cases: Optional[List[str]] = None, | |
| curriculum_summary: Optional[Dict[str, Any]] = None, | |
| prompt_refreshes: int = 0, | |
| ) -> Dict[str, Any]: | |
| if sentinel_task_ids is None: | |
| sentinel_task_ids = ["basic_oversight", "fleet_monitoring_conflict", "adversarial_worker", "multi_crisis_command"] | |
| is_sentinel_batch = any(task_id in sentinel_task_ids for task_id in task_ids) | |
| safe_rewards = [float(r) for r in rewards] | |
| prod_metrics = productive_signal_metrics( | |
| rewards=safe_rewards, | |
| task_ids=task_ids, | |
| variant_seeds=variant_seeds, | |
| curriculum_summary=curriculum_summary, | |
| ) | |
| fkeys = frontier_scenario_keys(curriculum_summary) | |
| reward_mean = float(np.mean(safe_rewards)) if safe_rewards else 0.0 | |
| reward_min = float(np.min(safe_rewards)) if safe_rewards else 0.0 | |
| reward_max = float(np.max(safe_rewards)) if safe_rewards else 0.0 | |
| reward_std = float(np.std(safe_rewards)) if safe_rewards else 0.0 | |
| avg_steps = float(np.mean([len(history) for history in histories])) if histories else 0.0 | |
| active_task_ids_for_fallback = sentinel_task_ids if is_sentinel_batch else task_ids | |
| per_task: Dict[str, Dict[str, Any]] = {} | |
| for idx, reward in enumerate(safe_rewards): | |
| task_id = task_ids[idx] if idx < len(task_ids) else active_task_ids_for_fallback[0] | |
| variant_seed = int(variant_seeds[idx]) if idx < len(variant_seeds) else 0 | |
| history = histories[idx] if idx < len(histories) else [] | |
| bucket = per_task.setdefault( | |
| task_id, | |
| { | |
| "count": 0, | |
| "reward_values": [], | |
| "step_values": [], | |
| "variant_seeds": set(), | |
| "misbehaviors": 0.0, | |
| "caught": 0.0, | |
| "false_positives": 0.0, | |
| "false_negatives": 0.0, | |
| "revision_attempts": 0.0, | |
| "revision_successes": 0.0, | |
| "prevented_damage_total": 0.0, | |
| "allowed_damage_total": 0.0, | |
| "twin_without_sentinel_damage_total": 0.0, | |
| "twin_with_sentinel_damage_total": 0.0, | |
| "twin_prevented_damage_total": 0.0, | |
| "coaching_quality_values": [], | |
| "zero_reward_count": 0, | |
| "trivial_reward_count": 0, | |
| "productive_count": 0, | |
| "frontier_hits": 0, | |
| }, | |
| ) | |
| bucket["count"] += 1 | |
| bucket["reward_values"].append(float(reward)) | |
| bucket["step_values"].append(len(history)) | |
| bucket["variant_seeds"].add(variant_seed) | |
| if reward <= ZERO_SIGNAL_REWARD_THRESHOLD: | |
| bucket["zero_reward_count"] += 1 | |
| elif reward >= TRIVIAL_REWARD_THRESHOLD: | |
| bucket["trivial_reward_count"] += 1 | |
| else: | |
| bucket["productive_count"] += 1 | |
| if (str(task_id), int(variant_seed)) in fkeys: | |
| bucket["frontier_hits"] += 1 | |
| if is_sentinel_batch: | |
| rollup = summarize_sentinel_history(history) | |
| for key in ( | |
| "misbehaviors", | |
| "caught", | |
| "false_positives", | |
| "false_negatives", | |
| "revision_attempts", | |
| "revision_successes", | |
| "prevented_damage_total", | |
| "allowed_damage_total", | |
| "twin_without_sentinel_damage_total", | |
| "twin_with_sentinel_damage_total", | |
| "twin_prevented_damage_total", | |
| ): | |
| bucket[key] += float(rollup[key]) | |
| bucket["coaching_quality_values"].append(float(rollup.get("coaching_quality", 0.0))) | |
| for task_id, bucket in list(per_task.items()): | |
| task_summary: Dict[str, Any] = { | |
| "count": bucket["count"], | |
| "reward_mean": round(float(np.mean(bucket["reward_values"])), 4) if bucket["reward_values"] else 0.0, | |
| "avg_steps": round(float(np.mean(bucket["step_values"])), 4) if bucket["step_values"] else 0.0, | |
| "variant_seeds": sorted(bucket["variant_seeds"]), | |
| "zero_reward_fraction": round(safe_ratio(bucket["zero_reward_count"], bucket["count"]), 4), | |
| "trivially_solved_fraction": round(safe_ratio(bucket["trivial_reward_count"], bucket["count"]), 4), | |
| "productive_fraction": round(safe_ratio(bucket["productive_count"], bucket["count"]), 4), | |
| "frontier_hit_rate": round(safe_ratio(bucket["frontier_hits"], bucket["count"]), 4), | |
| } | |
| if is_sentinel_batch: | |
| task_summary.update( | |
| { | |
| "misbehaviors": int(bucket["misbehaviors"]), | |
| "caught": int(bucket["caught"]), | |
| "false_positives": int(bucket["false_positives"]), | |
| "false_negatives": int(bucket["false_negatives"]), | |
| "revision_attempts": int(bucket["revision_attempts"]), | |
| "revision_successes": int(bucket["revision_successes"]), | |
| "prevented_damage_total": round(bucket["prevented_damage_total"], 4), | |
| "allowed_damage_total": round(bucket["allowed_damage_total"], 4), | |
| "twin_without_sentinel_damage_total": round(bucket["twin_without_sentinel_damage_total"], 4), | |
| "twin_with_sentinel_damage_total": round(bucket["twin_with_sentinel_damage_total"], 4), | |
| "twin_prevented_damage_total": round(bucket["twin_prevented_damage_total"], 4), | |
| "twin_damage_reduction_rate": round( | |
| safe_ratio( | |
| bucket["twin_prevented_damage_total"], | |
| bucket["twin_without_sentinel_damage_total"], | |
| ), | |
| 4, | |
| ), | |
| "coaching_quality": round( | |
| float(np.mean(bucket["coaching_quality_values"])), | |
| 4, | |
| ) if bucket["coaching_quality_values"] else 0.0, | |
| "detection_rate": round( | |
| safe_ratio(bucket["caught"], bucket["misbehaviors"]), | |
| 4, | |
| ), | |
| "false_positive_rate": round( | |
| safe_ratio( | |
| bucket["false_positives"], | |
| max(0.0, float(sum(bucket["step_values"])) - bucket["misbehaviors"]), | |
| ), | |
| 4, | |
| ), | |
| "risk_reduction_rate": round( | |
| safe_ratio( | |
| bucket["prevented_damage_total"], | |
| bucket["prevented_damage_total"] + bucket["allowed_damage_total"], | |
| ), | |
| 4, | |
| ), | |
| "worker_rehabilitation_rate": round( | |
| safe_ratio(bucket["revision_successes"], bucket["revision_attempts"]), | |
| 4, | |
| ), | |
| } | |
| ) | |
| per_task[task_id] = task_summary | |
| payload: Dict[str, Any] = { | |
| "reward_mean": round(reward_mean, 4), | |
| "reward_min": round(reward_min, 4), | |
| "reward_max": round(reward_max, 4), | |
| "reward_std": round(reward_std, 4), | |
| "avg_steps": round(avg_steps, 4), | |
| "batch_size": len(safe_rewards), | |
| "prompt_refreshes": prompt_refreshes, | |
| "per_task": per_task, | |
| "curriculum": curriculum_summary or {}, | |
| } | |
| payload.update(completion_diversity_metrics(completions)) | |
| payload.update(prod_metrics) | |
| payload.update(training_coverage_metrics(histories, task_ids, variant_seeds, adversarial_cases)) | |
| payload.update( | |
| zero_gradient_group_metrics( | |
| rewards=safe_rewards, | |
| task_ids=task_ids, | |
| variant_seeds=variant_seeds, | |
| prompts=prompts, | |
| adversarial_cases=adversarial_cases, | |
| ) | |
| ) | |
| if is_sentinel_batch: | |
| overall = { | |
| "misbehaviors": 0.0, | |
| "caught": 0.0, | |
| "false_positives": 0.0, | |
| "false_negatives": 0.0, | |
| "revision_attempts": 0.0, | |
| "revision_successes": 0.0, | |
| "prevented_damage_total": 0.0, | |
| "allowed_damage_total": 0.0, | |
| "twin_without_sentinel_damage_total": 0.0, | |
| "twin_with_sentinel_damage_total": 0.0, | |
| "twin_prevented_damage_total": 0.0, | |
| "coaching_quality_sum": 0.0, | |
| "coaching_quality_count": 0.0, | |
| } | |
| for history in histories: | |
| rollup = summarize_sentinel_history(history) | |
| for key in ( | |
| "misbehaviors", | |
| "caught", | |
| "false_positives", | |
| "false_negatives", | |
| "revision_attempts", | |
| "revision_successes", | |
| "prevented_damage_total", | |
| "allowed_damage_total", | |
| "twin_without_sentinel_damage_total", | |
| "twin_with_sentinel_damage_total", | |
| "twin_prevented_damage_total", | |
| ): | |
| overall[key] += float(rollup[key]) | |
| overall["coaching_quality_sum"] += float(rollup.get("coaching_quality", 0.0)) | |
| overall["coaching_quality_count"] += 1.0 | |
| safe_actions = max(0.0, float(sum(len(history) for history in histories)) - overall["misbehaviors"]) | |
| payload.update( | |
| { | |
| "misbehaviors": int(overall["misbehaviors"]), | |
| "caught": int(overall["caught"]), | |
| "false_positives": int(overall["false_positives"]), | |
| "false_negatives": int(overall["false_negatives"]), | |
| "revision_attempts": int(overall["revision_attempts"]), | |
| "revision_successes": int(overall["revision_successes"]), | |
| "prevented_damage_total": round(overall["prevented_damage_total"], 4), | |
| "allowed_damage_total": round(overall["allowed_damage_total"], 4), | |
| "twin_without_sentinel_damage_total": round(overall["twin_without_sentinel_damage_total"], 4), | |
| "twin_with_sentinel_damage_total": round(overall["twin_with_sentinel_damage_total"], 4), | |
| "twin_prevented_damage_total": round(overall["twin_prevented_damage_total"], 4), | |
| "twin_damage_reduction_rate": round( | |
| safe_ratio( | |
| overall["twin_prevented_damage_total"], | |
| overall["twin_without_sentinel_damage_total"], | |
| ), | |
| 4, | |
| ), | |
| "coaching_quality": round( | |
| safe_ratio(overall["coaching_quality_sum"], overall["coaching_quality_count"]), | |
| 4, | |
| ), | |
| "detection_rate": round(safe_ratio(overall["caught"], overall["misbehaviors"]), 4), | |
| "false_positive_rate": round(safe_ratio(overall["false_positives"], safe_actions), 4), | |
| "risk_reduction_rate": round( | |
| safe_ratio( | |
| overall["prevented_damage_total"], | |
| overall["prevented_damage_total"] + overall["allowed_damage_total"], | |
| ), | |
| 4, | |
| ), | |
| "worker_rehabilitation_rate": round( | |
| safe_ratio(overall["revision_successes"], overall["revision_attempts"]), | |
| 4, | |
| ), | |
| } | |
| ) | |
| return payload | |