File size: 1,595 Bytes
0ee3210 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | # app/dataset.py
"""
Dataset loader for MetaContentModerationEnv.
All data is loaded from local JSON files under data/.
"""
from __future__ import annotations
import json
import random
from pathlib import Path
from typing import Any
DATA_DIR = Path(__file__).parent.parent / "data"
def load_json(path: Path) -> Any:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def get_posts(seed: int = 42) -> list[dict]:
items = load_json(DATA_DIR / "posts.json")
rng = random.Random(seed)
rng.shuffle(items)
return items
def get_image_descriptions(seed: int = 42) -> list[dict]:
items = load_json(DATA_DIR / "image_descriptions.json")
rng = random.Random(seed)
rng.shuffle(items)
return items
def get_ad_copies(seed: int = 42) -> list[dict]:
items = load_json(DATA_DIR / "ad_copies.json")
rng = random.Random(seed)
rng.shuffle(items)
return items
def get_whatsapp_threads(seed: int = 42) -> list[dict]:
items = load_json(DATA_DIR / "whatsapp_threads.json")
rng = random.Random(seed)
rng.shuffle(items)
return items
def get_community_standards() -> dict:
return load_json(DATA_DIR / "policies" / "community_standards.json")
def get_ad_policies() -> dict:
return load_json(DATA_DIR / "policies" / "ad_policies.json")
def get_policy_excerpt(content_type: str, policies: dict) -> str:
"""Return a short relevant policy excerpt for the given content type."""
relevant = [
p["description"]
for p in policies.get("policies", [])
]
return " | ".join(relevant[:3])
|