Spaces:
Sleeping
Sleeping
File size: 5,528 Bytes
be8eade 0e7f59c be8eade 0e7f59c be8eade 0e7f59c f7b8ac6 be8eade | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | from pathlib import Path
import pytest
from CyberSecurity_OWASP.reward_config import (
compute_token_penalty,
flatten_reward_config,
load_reward_settings,
reward_config_hash,
reward_config_run_config,
reward_config_summary,
)
def test_default_reward_config_has_descriptions():
settings = load_reward_settings()
assert settings.mode == "sparse_eval"
assert settings.training_mode == "dense_train"
assert settings.value("terminal_cap") == 15.0
for key, value in settings.raw.items():
if isinstance(value, dict):
assert value.get("description")
def test_reward_config_env_overrides(monkeypatch):
monkeypatch.setenv("CYBERSECURITY_OWASP_REWARD_MODE", "dense_train")
monkeypatch.setenv("CYBERSECURITY_OWASP_REWARD_STAGE", "late")
monkeypatch.setenv("CYBERSECURITY_OWASP_SHAPING_WEIGHT", "0.25")
settings = load_reward_settings()
assert settings.mode == "dense_train"
assert settings.stage == "late"
assert settings.shaping_weight == 0.25
assert compute_token_penalty(850, settings) == -0.5
def test_reward_config_hash_and_flattened_values_are_deterministic(monkeypatch):
monkeypatch.setenv("CYBERSECURITY_OWASP_REWARD_MODE", "dense_train")
monkeypatch.setenv("CYBERSECURITY_OWASP_REWARD_STAGE", "middle")
settings = load_reward_settings()
first_hash = reward_config_hash(settings)
second_hash = reward_config_hash(load_reward_settings())
summary = reward_config_summary(settings)
run_config = reward_config_run_config(settings)
rows = {row["key"]: row for row in flatten_reward_config(settings)}
assert first_hash == second_hash
assert len(first_hash) == 64
assert summary["reward_config_hash"] == first_hash
assert summary["reward_config_id"].endswith(first_hash[:12])
assert run_config["reward_config_hash"] == first_hash
assert run_config["reward_mode"] == "dense_train"
assert run_config["reward_stage"] == "middle"
assert run_config["reward_config_values"]["policy_inspected"]["value"] == 0.30
assert run_config["reward_config_values"]["shaping_weight"]["stage_value"] == 0.7
assert run_config["reward_config__policy_inspected__value"] == 0.30
assert run_config["reward_config__shaping_weight__stage_value"] == 0.7
assert "policy_inspected" in run_config["reward_config_values_json"]
assert rows["policy_inspected"]["value"] == 0.30
assert rows["shaping_weight"]["stage_value"] == 0.7
assert rows["shaping_weight"]["resolved"] == 0.7
assert rows["step_penalty"]["stage_value"] == -0.01
assert rows["oversized_patch"]["threshold"] == 80
assert rows["oversized_patch"]["severe_threshold"] == 180
assert rows["hidden_file_probe"]["terminate"] is True
def test_reward_ablation_configs_extend_default_and_have_unique_hashes(monkeypatch):
monkeypatch.setenv("CYBERSECURITY_OWASP_REWARD_MODE", "dense_train")
paths = [
Path("training/configs/reward_ablations/A0_sparse_terminal_only.yaml"),
Path("training/configs/reward_ablations/A2_reduced_shaping.yaml"),
Path("training/configs/reward_ablations/A6_visible_gate.yaml"),
Path("training/configs/reward_ablations/A7_evidence045.yaml"),
Path("training/configs/reward_ablations/A3_no_speed_token.yaml"),
]
settings_by_name = {path.name: load_reward_settings(path) for path in paths}
hashes = {reward_config_hash(settings) for settings in settings_by_name.values()}
assert len(hashes) == len(paths)
assert settings_by_name["A0_sparse_terminal_only.yaml"].shaping_weight == 0.0
assert settings_by_name["A0_sparse_terminal_only.yaml"].value("progressive_cap") == 0.0
assert settings_by_name["A0_sparse_terminal_only.yaml"].value("terminal_cap") == 12.0
assert settings_by_name["A2_reduced_shaping.yaml"].shaping_weight == 0.35
assert settings_by_name["A2_reduced_shaping.yaml"].value("progressive_cap") == 2.5
assert settings_by_name["A6_visible_gate.yaml"].value("visible_tests_improved") == 0.0
assert settings_by_name["A6_visible_gate.yaml"].value("app_boots_after_patch") == 0.10
assert settings_by_name["A7_evidence045.yaml"].value("local_evidence_found") == 0.45
assert settings_by_name["A3_no_speed_token.yaml"].value("speed_bonus") == 0.0
assert compute_token_penalty(850, settings_by_name["A3_no_speed_token.yaml"]) == 0.0
def test_reward_config_run_config_includes_variant(monkeypatch):
monkeypatch.setenv("CYBERSECURITY_OWASP_REWARD_MODE", "dense_train")
monkeypatch.setenv("CYBERSECURITY_OWASP_REWARD_VARIANT", "abl-a2-shape035")
config = reward_config_run_config(
load_reward_settings("training/configs/reward_ablations/A2_reduced_shaping.yaml")
)
assert config["reward_variant"] == "abl-a2-shape035"
assert config["reward_config_source_name"] == "A2_reduced_shaping.yaml"
assert config["reward_config__shaping_weight__stage_value"] == 0.35
def test_reward_config_rejects_missing_descriptions(monkeypatch):
config_path = Path("outputs/test_reward_config_bad.yaml")
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(
"reward:\n mode: sparse_eval\n policy_inspected:\n value: 0.3\n",
encoding="utf-8",
)
try:
monkeypatch.setenv("CYBERSECURITY_OWASP_REWARD_CONFIG", str(config_path))
with pytest.raises(ValueError, match="description"):
load_reward_settings()
finally:
config_path.unlink(missing_ok=True)
|