| from app.training.reward_functions import ( |
| abstention_quality_score, |
| anti_cheat_score, |
| candidate_alignment_score, |
| disease_stability_score, |
| dosing_quality_score, |
| efficiency_score, |
| explanation_grounding_score, |
| format_compliance_score, |
| legality_score, |
| process_fidelity_score, |
| safety_delta_score, |
| uncertainty_calibration_score, |
| ) |
|
|
|
|
| def _in_range(v: float) -> bool: |
| return 0.001 <= v <= 0.999 |
|
|
|
|
| def test_all_reward_functions_range() -> None: |
| values = [ |
| format_compliance_score(True), |
| candidate_alignment_score(True), |
| legality_score(True), |
| safety_delta_score(0.2), |
| disease_stability_score(0.8), |
| dosing_quality_score(0.7), |
| abstention_quality_score(True), |
| efficiency_score(0.4), |
| process_fidelity_score(0.9), |
| explanation_grounding_score(0.75), |
| anti_cheat_score(False), |
| uncertainty_calibration_score(0.65), |
| ] |
| assert all(_in_range(v) for v in values) |
|
|