minor fix
Browse files- openenv.yaml +4 -0
- requirements.txt +2 -1
- scripts/validate_graders.py +83 -0
- server/app.py +20 -0
openenv.yaml
CHANGED
|
@@ -29,18 +29,21 @@ tasks:
|
|
| 29 |
difficulty: easy
|
| 30 |
description: Classify a single content item into one violation category or CLEAN
|
| 31 |
grader: "server.graders:single_label_entry"
|
|
|
|
| 32 |
|
| 33 |
- id: multi-label-classify
|
| 34 |
name: multi-label-classify
|
| 35 |
difficulty: medium
|
| 36 |
description: Assign all applicable violation labels to content that may violate multiple policies
|
| 37 |
grader: "server.graders:multi_label_entry"
|
|
|
|
| 38 |
|
| 39 |
- id: ad-policy-compliance
|
| 40 |
name: ad-policy-compliance
|
| 41 |
difficulty: medium_hard
|
| 42 |
description: Review ad copy against ad policies, identify violations, and cite specific rule IDs
|
| 43 |
grader: "server.graders:ad_policy_entry"
|
|
|
|
| 44 |
|
| 45 |
- id: thread-moderation-hard
|
| 46 |
name: thread-moderation-hard
|
|
@@ -49,6 +52,7 @@ tasks:
|
|
| 49 |
Moderate a full WhatsApp conversation thread with growing context window.
|
| 50 |
Handles cultural nuance, multi-label violations, and conflicting policy resolution.
|
| 51 |
grader: "server.graders:thread_hard_entry"
|
|
|
|
| 52 |
|
| 53 |
observation_space:
|
| 54 |
type: object
|
|
|
|
| 29 |
difficulty: easy
|
| 30 |
description: Classify a single content item into one violation category or CLEAN
|
| 31 |
grader: "server.graders:single_label_entry"
|
| 32 |
+
grader_import: "server.graders.single_label_entry"
|
| 33 |
|
| 34 |
- id: multi-label-classify
|
| 35 |
name: multi-label-classify
|
| 36 |
difficulty: medium
|
| 37 |
description: Assign all applicable violation labels to content that may violate multiple policies
|
| 38 |
grader: "server.graders:multi_label_entry"
|
| 39 |
+
grader_import: "server.graders.multi_label_entry"
|
| 40 |
|
| 41 |
- id: ad-policy-compliance
|
| 42 |
name: ad-policy-compliance
|
| 43 |
difficulty: medium_hard
|
| 44 |
description: Review ad copy against ad policies, identify violations, and cite specific rule IDs
|
| 45 |
grader: "server.graders:ad_policy_entry"
|
| 46 |
+
grader_import: "server.graders.ad_policy_entry"
|
| 47 |
|
| 48 |
- id: thread-moderation-hard
|
| 49 |
name: thread-moderation-hard
|
|
|
|
| 52 |
Moderate a full WhatsApp conversation thread with growing context window.
|
| 53 |
Handles cultural nuance, multi-label violations, and conflicting policy resolution.
|
| 54 |
grader: "server.graders:thread_hard_entry"
|
| 55 |
+
grader_import: "server.graders.thread_hard_entry"
|
| 56 |
|
| 57 |
observation_space:
|
| 58 |
type: object
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
# uv export --format requirements-txt --no-hashes -o requirements.txt
|
| 3 |
-
#
|
|
|
|
| 4 |
aiofile==3.9.0
|
| 5 |
# via py-key-value-aio
|
| 6 |
aiofiles==24.1.0
|
|
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
# uv export --format requirements-txt --no-hashes -o requirements.txt
|
| 3 |
+
# editable install of this package so external checkers can import `server`
|
| 4 |
+
-e .
|
| 5 |
aiofile==3.9.0
|
| 6 |
# via py-key-value-aio
|
| 7 |
aiofiles==24.1.0
|
scripts/validate_graders.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import importlib
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import yaml
|
| 5 |
+
|
| 6 |
+
ROOT = Path(__file__).parent.parent
|
| 7 |
+
OPENENV = ROOT / "openenv.yaml"
|
| 8 |
+
|
| 9 |
+
# Ensure repo root is on sys.path so imports like `server.graders` work when run from scripts/
|
| 10 |
+
sys.path.insert(0, str(ROOT))
|
| 11 |
+
|
| 12 |
+
if not OPENENV.exists():
|
| 13 |
+
print(f"openenv.yaml not found at {OPENENV}")
|
| 14 |
+
sys.exit(2)
|
| 15 |
+
|
| 16 |
+
with OPENENV.open("r", encoding="utf-8") as f:
|
| 17 |
+
cfg = yaml.safe_load(f)
|
| 18 |
+
|
| 19 |
+
tasks = cfg.get("tasks", [])
|
| 20 |
+
if not tasks:
|
| 21 |
+
print("No tasks found in openenv.yaml")
|
| 22 |
+
sys.exit(2)
|
| 23 |
+
|
| 24 |
+
print(f"Found {len(tasks)} tasks. Validating grader importability...")
|
| 25 |
+
|
| 26 |
+
failed = []
|
| 27 |
+
for t in tasks:
|
| 28 |
+
tid = t.get("id") or t.get("name") or "<unknown>"
|
| 29 |
+
grader_spec = t.get("grader")
|
| 30 |
+
grader_dot = t.get("grader_import")
|
| 31 |
+
print(f"\n- Task: {tid}")
|
| 32 |
+
|
| 33 |
+
tried = []
|
| 34 |
+
ok = False
|
| 35 |
+
|
| 36 |
+
for spec in (grader_spec, grader_dot):
|
| 37 |
+
if not spec:
|
| 38 |
+
continue
|
| 39 |
+
tried.append(spec)
|
| 40 |
+
# Support both "module:callable" and "module.callable"
|
| 41 |
+
module = None
|
| 42 |
+
attr = None
|
| 43 |
+
if ":" in spec:
|
| 44 |
+
module, attr = spec.split(":", 1)
|
| 45 |
+
elif "." in spec:
|
| 46 |
+
parts = spec.rsplit(".", 1)
|
| 47 |
+
if len(parts) == 2:
|
| 48 |
+
module, attr = parts
|
| 49 |
+
else:
|
| 50 |
+
module = spec
|
| 51 |
+
attr = None
|
| 52 |
+
else:
|
| 53 |
+
module = spec
|
| 54 |
+
attr = None
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
m = importlib.import_module(module)
|
| 58 |
+
if attr:
|
| 59 |
+
if hasattr(m, attr):
|
| 60 |
+
print(f" OK: imported {module} and found attribute '{attr}'")
|
| 61 |
+
ok = True
|
| 62 |
+
break
|
| 63 |
+
else:
|
| 64 |
+
print(f" ERROR: imported {module} but attribute '{attr}' not found")
|
| 65 |
+
else:
|
| 66 |
+
print(f" OK: imported module {module}")
|
| 67 |
+
ok = True
|
| 68 |
+
break
|
| 69 |
+
except Exception as e:
|
| 70 |
+
print(f" ERROR importing {module}: {e}")
|
| 71 |
+
|
| 72 |
+
if not ok:
|
| 73 |
+
failed.append((tid, tried))
|
| 74 |
+
|
| 75 |
+
print("\nSummary:")
|
| 76 |
+
if not failed:
|
| 77 |
+
print("All graders importable β
")
|
| 78 |
+
sys.exit(0)
|
| 79 |
+
else:
|
| 80 |
+
print(f"{len(failed)} task(s) failed to import graders:")
|
| 81 |
+
for tid, tried in failed:
|
| 82 |
+
print(f" - {tid}: tried {tried}")
|
| 83 |
+
sys.exit(1)
|
server/app.py
CHANGED
|
@@ -27,6 +27,26 @@ from server.models import (
|
|
| 27 |
StepResult,
|
| 28 |
)
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# βββ App Init βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
|
| 32 |
app = FastAPI(
|
|
|
|
| 27 |
StepResult,
|
| 28 |
)
|
| 29 |
|
| 30 |
+
# Try to import OpenEnv core package at runtime and log availability.
|
| 31 |
+
# Prefer the new `openenv.core` import; fall back to legacy `openenv_core` if needed.
|
| 32 |
+
_oe_imported = False
|
| 33 |
+
for _mod in ("openenv.core", "openenv_core"):
|
| 34 |
+
try:
|
| 35 |
+
_openenv_core = __import__(_mod, fromlist=["*"])
|
| 36 |
+
_oe_ver = getattr(_openenv_core, "__version__", None)
|
| 37 |
+
if _oe_ver:
|
| 38 |
+
print(f"[startup] {_mod} available, version={_oe_ver}")
|
| 39 |
+
else:
|
| 40 |
+
print(f"[startup] {_mod} imported (version unknown)")
|
| 41 |
+
_oe_imported = True
|
| 42 |
+
break
|
| 43 |
+
except Exception as _e:
|
| 44 |
+
# continue to next candidate
|
| 45 |
+
_oe_last_exc = _e
|
| 46 |
+
|
| 47 |
+
if not _oe_imported:
|
| 48 |
+
print(f"[startup] openenv package not importable: {_oe_last_exc}")
|
| 49 |
+
|
| 50 |
# βββ App Init βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 51 |
|
| 52 |
app = FastAPI(
|