Spaces:
Sleeping
Sleeping
File size: 1,774 Bytes
3807ea3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | """Server-side verifier aggregation for terminal scoring."""
from __future__ import annotations
try:
from ..models import CyberSecurityOWASPAction, CyberSecurityOWASPState
from ..rewards import compute_reward
from ..validators import (
patch_quality,
run_hidden_regression_tests,
run_hidden_security_tests,
run_public_route_tests,
run_visible_tests,
verify_finding,
)
except ImportError: # pragma: no cover
from models import CyberSecurityOWASPAction, CyberSecurityOWASPState
from rewards import compute_reward
from validators import (
patch_quality,
run_hidden_regression_tests,
run_hidden_security_tests,
run_public_route_tests,
run_visible_tests,
verify_finding,
)
def evaluate_action(
state: CyberSecurityOWASPState,
action: CyberSecurityOWASPAction,
anti_cheat_flags: list[str] | None = None,
) -> tuple[dict, dict[str, float]]:
verifier_result: dict = {"anti_cheat_flags": anti_cheat_flags or []}
if action.tool_name == "submit_finding":
verifier_result["finding"] = verify_finding(state, action.arguments)
elif action.tool_name == "run_visible_tests":
verifier_result["visible"] = run_visible_tests(state)
elif action.tool_name == "submit_fix":
verifier_result.update(
{
"visible": run_visible_tests(state),
"security": run_hidden_security_tests(state),
"regression": run_hidden_regression_tests(state),
"public_routes": run_public_route_tests(state),
"patch_quality": patch_quality(state),
}
)
return verifier_result, compute_reward(state, action, verifier_result)
|