sniki28 commited on
Commit
4cb377e
·
verified ·
1 Parent(s): 7f504b5

Upload environment/env.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. environment/env.py +8 -3
environment/env.py CHANGED
@@ -138,7 +138,11 @@ class ContentModerationEnv:
138
 
139
  final_score = None
140
  if self._done:
141
- final_score = round(self._cumulative_reward / max(len(self._decisions), 1), 4)
 
 
 
 
142
 
143
  info: Dict[str, Any] = {
144
  "post_id": post_id,
@@ -154,7 +158,7 @@ class ContentModerationEnv:
154
 
155
  return StepResult(
156
  observation=obs,
157
- reward=reward_value,
158
  done=self._done,
159
  info=info,
160
  )
@@ -163,7 +167,8 @@ class ContentModerationEnv:
163
  """Return a full snapshot of current internal state."""
164
  final_score = None
165
  if self._done and self._decisions:
166
- final_score = round(self._cumulative_reward / len(self._decisions), 4)
 
167
 
168
  return EnvironmentState(
169
  session_id=self._session_id,
 
138
 
139
  final_score = None
140
  if self._done:
141
+ raw = self._cumulative_reward / max(len(self._decisions), 1)
142
+ final_score = max(0.01, min(0.99, round(raw, 4)))
143
+
144
+ # Clamp reward to strictly (0, 1)
145
+ clamped_reward = max(0.01, min(0.99, reward_value))
146
 
147
  info: Dict[str, Any] = {
148
  "post_id": post_id,
 
158
 
159
  return StepResult(
160
  observation=obs,
161
+ reward=clamped_reward,
162
  done=self._done,
163
  info=info,
164
  )
 
167
  """Return a full snapshot of current internal state."""
168
  final_score = None
169
  if self._done and self._decisions:
170
+ raw = self._cumulative_reward / len(self._decisions)
171
+ final_score = max(0.01, min(0.99, round(raw, 4)))
172
 
173
  return EnvironmentState(
174
  session_id=self._session_id,