Fix state endpoint and score range clamping
Browse files- create_app_sig.txt +1 -0
- final_inference.log +18 -0
- output.log +18 -0
- routes.txt +1 -0
- server/app.py +11 -0
- server/env.py +11 -3
create_app_sig.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
(env: 'Callable[[], Environment]', action_cls: 'Type[Action]', observation_cls: 'Type[Observation]', env_name: 'Optional[str]' = None, max_concurrent_envs: 'Optional[int]' = None, concurrency_config: 'Optional[ConcurrencyConfig]' = None, gradio_builder: 'Optional[Callable[..., Any]]' = None) -> 'FastAPI'
|
final_inference.log
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[START] task=single-label-classify env=meta-content-moderation model=meta-llama/Llama-3.3-70B-Instruct
|
| 2 |
+
[STEP] step=1 action={"content_id":"unknown","labels":["clean"],"action":"approve","confidence":0.0,"reasoning":"No content provided.","policy_citations":[]} reward=0.80 done=false error=null
|
| 3 |
+
[STEP] step=2 action={"content_id":"post_004","labels":["hate_speech"],"action":"remove","confidence":0.95,"reasoning":"Direct call for violence against a group based on protected characteristics.","policy_citations":["Content that directly attacks people based on protected characteristics"]} reward=0.93 done=false error=null
|
| 4 |
+
[STEP] step=3 action={"content_id":"post_011","labels":["clean"],"action":"approve","confidence":0.99,"reasoning":"The post expresses a positive sentiment and does not contain any harmful or violating content.","policy_citations":[]} reward=1.00 done=false error=null
|
| 5 |
+
[STEP] step=4 action={"content_id":"post_027","labels":["misinformation"],"action":"remove","confidence":0.9,"reasoning":"The post spreads debunked conspiracy theories about vaccines, posing a risk to public health.","policy_citations":[]} reward=1.00 done=false error=null
|
| 6 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 7 |
+
[STEP] step=5 action={"content_id":"post_038","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.13 done=false error=null
|
| 8 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 9 |
+
[STEP] step=6 action={"content_id":"post_019","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
|
| 10 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 11 |
+
[STEP] step=7 action={"content_id":"post_040","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
|
| 12 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 13 |
+
[STEP] step=8 action={"content_id":"img_020","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
|
| 14 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 15 |
+
[STEP] step=9 action={"content_id":"img_006","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.95 done=false error=null
|
| 16 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 17 |
+
[STEP] step=10 action={"content_id":"img_015","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=true error=null
|
| 18 |
+
[END] success=true steps=10 score=0.748 rewards=0.80,0.93,1.00,1.00,0.13,0.04,0.04,0.04,0.95,0.04
|
output.log
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[START] task=single-label-classify env=meta-content-moderation model=meta-llama/Llama-3.3-70B-Instruct
|
| 2 |
+
[STEP] step=1 action={"content_id":"unknown","labels":["clean"],"action":"approve","confidence":1.0,"reasoning":"No content provided to moderate.","policy_citations":[]} reward=1.00 done=false error=null
|
| 3 |
+
[STEP] step=2 action={"content_id":"post_004","labels":["hate_speech","violence"],"action":"remove","confidence":0.95,"reasoning":"The post directly incites violence against a group, violating policies against hate speech and violence.","policy_citations":["Content that directly attacks people based on protected characteristics"]} reward=1.00 done=false error=null
|
| 4 |
+
[STEP] step=3 action={"content_id":"post_011","labels":["clean"],"action":"approve","confidence":0.99,"reasoning":"The post expresses a positive sentiment and does not contain any harmful or violating content.","policy_citations":[]} reward=1.00 done=false error=null
|
| 5 |
+
[STEP] step=4 action={"content_id":"post_027","labels":["misinformation"],"action":"remove","confidence":0.9,"reasoning":"The post spreads debunked conspiracy theories about vaccines, posing a risk to public health.","policy_citations":[]} reward=1.00 done=false error=null
|
| 6 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 7 |
+
[STEP] step=5 action={"content_id":"post_038","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.13 done=false error=null
|
| 8 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 9 |
+
[STEP] step=6 action={"content_id":"post_019","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
|
| 10 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 11 |
+
[STEP] step=7 action={"content_id":"post_040","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
|
| 12 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 13 |
+
[STEP] step=8 action={"content_id":"img_020","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=false error=null
|
| 14 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 15 |
+
[STEP] step=9 action={"content_id":"img_006","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.95 done=false error=null
|
| 16 |
+
[DEBUG] Model call failed: Error code: 402 - {'error': 'You have depleted your monthly included credits. Purchase pre-paid credits to continue using Inference Providers. Alternatively, subscribe to PRO to get 20x more included usage.'}
|
| 17 |
+
[STEP] step=10 action={"content_id":"img_015","labels":["clean"],"action":"approve","confidence":0.5,"reasoning":"Model fallback \u2014 defaulting to approve","policy_citations":[]} reward=0.04 done=true error=null
|
| 18 |
+
[END] success=false steps=10 score=0.000 rewards=1.00,1.00,1.00,1.00,0.13,0.04,0.04,0.04,0.95,0.04
|
routes.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
['/openapi.json', '/docs', '/docs/oauth2-redirect', '/redoc', '/mcp', '/reset', '/step', '/state', '/metadata', '/health', '/schema', '/mcp', '/ws', '/state', '/', '/tasks', '/openenv.yaml']
|
server/app.py
CHANGED
|
@@ -29,6 +29,17 @@ app = create_app(
|
|
| 29 |
max_concurrent_envs=1
|
| 30 |
)
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
# Custom extra routes specific to this environment
|
| 33 |
@app.get("/", include_in_schema=False)
|
| 34 |
def root():
|
|
|
|
| 29 |
max_concurrent_envs=1
|
| 30 |
)
|
| 31 |
|
| 32 |
+
# Remove the default /state route added by create_app so our custom one below is used
|
| 33 |
+
app.router.routes[:] = [r for r in app.router.routes if getattr(r, "path", None) != "/state"]
|
| 34 |
+
|
| 35 |
+
# Force the state endpoint to return our full ModerationState (including 'score')
|
| 36 |
+
@app.get("/state")
|
| 37 |
+
def get_state():
|
| 38 |
+
"""Returns the full environment state including custom fields like score."""
|
| 39 |
+
# We use the singleton instance
|
| 40 |
+
env = MetaContentModerationEnv()
|
| 41 |
+
return env.state
|
| 42 |
+
|
| 43 |
# Custom extra routes specific to this environment
|
| 44 |
@app.get("/", include_in_schema=False)
|
| 45 |
def root():
|
server/env.py
CHANGED
|
@@ -228,8 +228,16 @@ class MetaContentModerationEnv(Environment[ModerationDecision, ModerationObserva
|
|
| 228 |
|
| 229 |
def _compute_score(self) -> float:
|
| 230 |
if not self._decisions_log:
|
| 231 |
-
return 0.
|
| 232 |
max_possible = self._max_steps * 1.0
|
| 233 |
-
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
|
|
|
| 228 |
|
| 229 |
def _compute_score(self) -> float:
|
| 230 |
if not self._decisions_log:
|
| 231 |
+
return 0.01
|
| 232 |
max_possible = self._max_steps * 1.0
|
| 233 |
+
if max_possible <= 0:
|
| 234 |
+
return 0.01
|
| 235 |
+
|
| 236 |
+
avg_reward = self._cumulative_reward / max_possible
|
| 237 |
+
# Map avg_reward from [-1.0, 1.0] to [0.0, 1.0]
|
| 238 |
+
normalized = (avg_reward + 1.0) / 2.0
|
| 239 |
+
|
| 240 |
+
# Clamp strictly between 0.01 and 0.99 for OpenEnv
|
| 241 |
+
score = min(max(normalized, 0.01), 0.99)
|
| 242 |
+
return round(score, 4)
|
| 243 |
|