Pratap-K commited on
Commit
e8dc38e
·
1 Parent(s): f5607fa

Fix state endpoint and score range clamping

Browse files
Files changed (6) hide show
  1. create_app_sig.txt +1 -0
  2. final_inference.log +18 -0
  3. output.log +18 -0
  4. routes.txt +1 -0
  5. server/app.py +11 -0
  6. server/env.py +11 -3
create_app_sig.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ (env: 'Callable[[], Environment]', action_cls: 'Type[Action]', observation_cls: 'Type[Observation]', env_name: 'Optional[str]' = None, max_concurrent_envs: 'Optional[int]' = None, concurrency_config: 'Optional[ConcurrencyConfig]' = None, gradio_builder: 'Optional[Callable[..., Any]]' = None) -> 'FastAPI'
final_inference.log ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [START] task=single-label-classify env=meta-content-moderation model=meta-llama/Llama-3.3-70B-Instruct
2
+ [STEP] step=1 action={"content_id":"unknown","labels":["clean"],"action":"approve","confidence":0.0,"reasoning":"No content provided.","policy_citations":[]} reward=0.80 done=false error=null
3
+ [STEP] step=2 action={"content_id":"post_004","labels":["hate_speech"],"action":"remove","confidence":0.95,"reasoning":"Direct call for violence against a group based on protected characteristics.","policy_citations":["Content that directly attacks people based on protected characteristics"]} reward=0.93 done=false error=null
4
+ [STEP] step=3 action={"content_id":"post_011","labels":["clean"],"action":"approve","confidence":0.99,"reasoning":"The post expresses a positive sentiment and does not contain any harmful or violating content.","policy_citations":[]} reward=1.00 done=false error=null
5
+ [STEP] step=4 action={"content_id":"post_027","labels":["misinformation"],"action":"remove","confidence":0.9,"reasoning":"The post spreads debunked conspiracy theories about vaccines, posing a risk to public health.","policy_citations":[]} reward=1.00 done=false error=null
6
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
7
+ [STEP] step=5 action={"content_id":"post_038","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.13 done=false error=null
8
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
9
+ [STEP] step=6 action={"content_id":"post_019","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
10
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
11
+ [STEP] step=7 action={"content_id":"post_040","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
12
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
13
+ [STEP] step=8 action={"content_id":"img_020","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
14
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
15
+ [STEP] step=9 action={"content_id":"img_006","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.95 done=false error=null
16
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
17
+ [STEP] step=10 action={"content_id":"img_015","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=true error=null
18
+ [END] success=true steps=10 score=0.748 rewards=0.80,0.93,1.00,1.00,0.13,0.04,0.04,0.04,0.95,0.04
output.log ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [START] task=single-label-classify env=meta-content-moderation model=meta-llama/Llama-3.3-70B-Instruct
2
+ [STEP] step=1 action={"content_id":"unknown","labels":["clean"],"action":"approve","confidence":1.0,"reasoning":"No content provided to moderate.","policy_citations":[]} reward=1.00 done=false error=null
3
+ [STEP] step=2 action={"content_id":"post_004","labels":["hate_speech","violence"],"action":"remove","confidence":0.95,"reasoning":"The post directly incites violence against a group, violating policies against hate speech and violence.","policy_citations":["Content that directly attacks people based on protected characteristics"]} reward=1.00 done=false error=null
4
+ [STEP] step=3 action={"content_id":"post_011","labels":["clean"],"action":"approve","confidence":0.99,"reasoning":"The post expresses a positive sentiment and does not contain any harmful or violating content.","policy_citations":[]} reward=1.00 done=false error=null
5
+ [STEP] step=4 action={"content_id":"post_027","labels":["misinformation"],"action":"remove","confidence":0.9,"reasoning":"The post spreads debunked conspiracy theories about vaccines, posing a risk to public health.","policy_citations":[]} reward=1.00 done=false error=null
6
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
7
+ [STEP] step=5 action={"content_id":"post_038","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.13 done=false error=null
8
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
9
+ [STEP] step=6 action={"content_id":"post_019","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
10
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
11
+ [STEP] step=7 action={"content_id":"post_040","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
12
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
13
+ [STEP] step=8 action={"content_id":"img_020","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
14
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
15
+ [STEP] step=9 action={"content_id":"img_006","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.95 done=false error=null
16
+ [DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
17
+ [STEP] step=10 action={"content_id":"img_015","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=true error=null
18
+ [END] success=false steps=10 score=0.000 rewards=1.00,1.00,1.00,1.00,0.13,0.04,0.04,0.04,0.95,0.04
routes.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ['/openapi.json', '/docs', '/docs/oauth2-redirect', '/redoc', '/mcp', '/reset', '/step', '/state', '/metadata', '/health', '/schema', '/mcp', '/ws', '/state', '/', '/tasks', '/openenv.yaml']
server/app.py CHANGED
@@ -29,6 +29,17 @@ app = create_app(
29
  max_concurrent_envs=1
30
  )
31
 
 
 
 
 
 
 
 
 
 
 
 
32
  # Custom extra routes specific to this environment
33
  @app.get("/", include_in_schema=False)
34
  def root():
 
29
  max_concurrent_envs=1
30
  )
31
 
32
+ # Remove the default /state route added by create_app so our custom one below is used
33
+ app.router.routes[:] = [r for r in app.router.routes if getattr(r, "path", None) != "/state"]
34
+
35
+ # Force the state endpoint to return our full ModerationState (including 'score')
36
+ @app.get("/state")
37
+ def get_state():
38
+ """Returns the full environment state including custom fields like score."""
39
+ # We use the singleton instance
40
+ env = MetaContentModerationEnv()
41
+ return env.state
42
+
43
  # Custom extra routes specific to this environment
44
  @app.get("/", include_in_schema=False)
45
  def root():
server/env.py CHANGED
@@ -228,8 +228,16 @@ class MetaContentModerationEnv(Environment[ModerationDecision, ModerationObserva
228
 
229
  def _compute_score(self) -> float:
230
  if not self._decisions_log:
231
- return 0.0
232
  max_possible = self._max_steps * 1.0
233
- raw = self._cumulative_reward / max_possible if max_possible > 0 else 0.0
234
- return round(min(max(raw, 0.0), 1.0), 4)
 
 
 
 
 
 
 
 
235
 
 
228
 
229
  def _compute_score(self) -> float:
230
  if not self._decisions_log:
231
+ return 0.01
232
  max_possible = self._max_steps * 1.0
233
+ if max_possible <= 0:
234
+ return 0.01
235
+
236
+ avg_reward = self._cumulative_reward / max_possible
237
+ # Map avg_reward from [-1.0, 1.0] to [0.0, 1.0]
238
+ normalized = (avg_reward + 1.0) / 2.0
239
+
240
+ # Clamp strictly between 0.01 and 0.99 for OpenEnv
241
+ score = min(max(normalized, 0.01), 0.99)
242
+ return round(score, 4)
243