File size: 1,595 Bytes
0ee3210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# app/dataset.py
"""
Dataset loader for MetaContentModerationEnv.
All data is loaded from local JSON files under data/.
"""
from __future__ import annotations
import json
import random
from pathlib import Path
from typing import Any

DATA_DIR = Path(__file__).parent.parent / "data"


def load_json(path: Path) -> Any:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)


def get_posts(seed: int = 42) -> list[dict]:
    items = load_json(DATA_DIR / "posts.json")
    rng = random.Random(seed)
    rng.shuffle(items)
    return items


def get_image_descriptions(seed: int = 42) -> list[dict]:
    items = load_json(DATA_DIR / "image_descriptions.json")
    rng = random.Random(seed)
    rng.shuffle(items)
    return items


def get_ad_copies(seed: int = 42) -> list[dict]:
    items = load_json(DATA_DIR / "ad_copies.json")
    rng = random.Random(seed)
    rng.shuffle(items)
    return items


def get_whatsapp_threads(seed: int = 42) -> list[dict]:
    items = load_json(DATA_DIR / "whatsapp_threads.json")
    rng = random.Random(seed)
    rng.shuffle(items)
    return items


def get_community_standards() -> dict:
    return load_json(DATA_DIR / "policies" / "community_standards.json")


def get_ad_policies() -> dict:
    return load_json(DATA_DIR / "policies" / "ad_policies.json")


def get_policy_excerpt(content_type: str, policies: dict) -> str:
    """Return a short relevant policy excerpt for the given content type."""
    relevant = [
        p["description"]
        for p in policies.get("policies", [])
    ]
    return " | ".join(relevant[:3])