Spaces:

sniki28
/

content-moderation-queue

Running

sniki28 commited on 9 days ago

Commit

4cb377e

verified ·

1 Parent(s): 7f504b5

Upload environment/env.py with huggingface_hub

Files changed (1) hide show

environment/env.py CHANGED Viewed

@@ -138,7 +138,11 @@ class ContentModerationEnv:
         final_score = None
         if self._done:
-            final_score = round(self._cumulative_reward / max(len(self._decisions), 1), 4)
         info: Dict[str, Any] = {
             "post_id": post_id,
@@ -154,7 +158,7 @@ class ContentModerationEnv:
         return StepResult(
             observation=obs,
-            reward=reward_value,
             done=self._done,
             info=info,
         )
@@ -163,7 +167,8 @@ class ContentModerationEnv:
         """Return a full snapshot of current internal state."""
         final_score = None
         if self._done and self._decisions:
-            final_score = round(self._cumulative_reward / len(self._decisions), 4)
         return EnvironmentState(
             session_id=self._session_id,

         final_score = None
         if self._done:
+            raw = self._cumulative_reward / max(len(self._decisions), 1)
+            final_score = max(0.01, min(0.99, round(raw, 4)))
+        # Clamp reward to strictly (0, 1)
+        clamped_reward = max(0.01, min(0.99, reward_value))
         info: Dict[str, Any] = {
             "post_id": post_id,
         return StepResult(
             observation=obs,
+            reward=clamped_reward,
             done=self._done,
             info=info,
         )
         """Return a full snapshot of current internal state."""
         final_score = None
         if self._done and self._decisions:
+            raw = self._cumulative_reward / len(self._decisions)
+            final_score = max(0.01, min(0.99, round(raw, 4)))
         return EnvironmentState(
             session_id=self._session_id,