Spaces:
Sleeping
Sleeping
File size: 1,282 Bytes
3807ea3 be8eade | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | """Reward functions exposed for TRL/GRPO logging."""
def _values(name: str, completions, kwargs):
return [float(x) for x in kwargs.get(name, [0.0] * len(completions))]
def reward_total(completions, **kwargs):
return _values("reward_total", completions, kwargs)
def reward_security(completions, **kwargs):
return _values("reward_security", completions, kwargs)
def reward_regression(completions, **kwargs):
return _values("reward_regression", completions, kwargs)
def reward_patch_quality(completions, **kwargs):
return _values("reward_patch_quality", completions, kwargs)
def reward_anti_cheat(completions, **kwargs):
return _values("reward_anti_cheat", completions, kwargs)
def reward_terminal_15(completions, **kwargs):
return _values("reward_terminal_15", completions, kwargs)
def reward_progressive_5(completions, **kwargs):
return _values("reward_progressive_5", completions, kwargs)
def reward_step_penalty(completions, **kwargs):
return _values("reward_step_penalty", completions, kwargs)
def reward_speed_bonus(completions, **kwargs):
return _values("reward_speed_bonus", completions, kwargs)
def reward_behavior_penalty(completions, **kwargs):
return _values("reward_behavior_penalty", completions, kwargs)
|