Upload environment/env.py with huggingface_hub
Browse files- environment/env.py +8 -3
environment/env.py
CHANGED
|
@@ -138,7 +138,11 @@ class ContentModerationEnv:
|
|
| 138 |
|
| 139 |
final_score = None
|
| 140 |
if self._done:
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
info: Dict[str, Any] = {
|
| 144 |
"post_id": post_id,
|
|
@@ -154,7 +158,7 @@ class ContentModerationEnv:
|
|
| 154 |
|
| 155 |
return StepResult(
|
| 156 |
observation=obs,
|
| 157 |
-
reward=
|
| 158 |
done=self._done,
|
| 159 |
info=info,
|
| 160 |
)
|
|
@@ -163,7 +167,8 @@ class ContentModerationEnv:
|
|
| 163 |
"""Return a full snapshot of current internal state."""
|
| 164 |
final_score = None
|
| 165 |
if self._done and self._decisions:
|
| 166 |
-
|
|
|
|
| 167 |
|
| 168 |
return EnvironmentState(
|
| 169 |
session_id=self._session_id,
|
|
|
|
| 138 |
|
| 139 |
final_score = None
|
| 140 |
if self._done:
|
| 141 |
+
raw = self._cumulative_reward / max(len(self._decisions), 1)
|
| 142 |
+
final_score = max(0.01, min(0.99, round(raw, 4)))
|
| 143 |
+
|
| 144 |
+
# Clamp reward to strictly (0, 1)
|
| 145 |
+
clamped_reward = max(0.01, min(0.99, reward_value))
|
| 146 |
|
| 147 |
info: Dict[str, Any] = {
|
| 148 |
"post_id": post_id,
|
|
|
|
| 158 |
|
| 159 |
return StepResult(
|
| 160 |
observation=obs,
|
| 161 |
+
reward=clamped_reward,
|
| 162 |
done=self._done,
|
| 163 |
info=info,
|
| 164 |
)
|
|
|
|
| 167 |
"""Return a full snapshot of current internal state."""
|
| 168 |
final_score = None
|
| 169 |
if self._done and self._decisions:
|
| 170 |
+
raw = self._cumulative_reward / len(self._decisions)
|
| 171 |
+
final_score = max(0.01, min(0.99, round(raw, 4)))
|
| 172 |
|
| 173 |
return EnvironmentState(
|
| 174 |
session_id=self._session_id,
|