openenv / tasks.py
jeromerichard's picture
Trust & Safety RL Environment - OpenEnv Hackathon
74e3b5e
TASKS = [
{
"task_id": "T-001",
"difficulty": "easy",
"ambiguity_level": "low",
"risk_level": "low",
"post_text": (
"๐Ÿšจ URGENT: Your account has been COMPROMISED! "
"Click here immediately: http://meta-security-verify-now.xyz/login?token=abc123 "
"Act within 24 hours or your account will be PERMANENTLY BANNED. โ€” Meta Security Team"
),
"image_description": (
"Poorly formatted alert page with mismatched Meta logos, "
"red urgent warning banners, and a suspicious external URL."
),
"tool_responses": {
"read_comments": "Comments disabled. Zero engagement.",
"check_user_history": "Account created 2 days ago. Zero posts. Flagged by spam filter 3ร—.",
"check_entity_status": "Domain 'meta-security-verify-now.xyz' registered 1 day ago โ€” confirmed phishing domain.",
"view_policy": "Policy 3.2 โ€” Phishing: Impersonating Meta to harvest credentials = immediate REMOVE.",
},
"required_tools": [],
"ground_truth": "REMOVE",
"ground_truth_signals": {
"target": "individual",
"is_protected_class": False,
"toxicity_level": 0.90,
"is_direct_attack": True,
"context_type": "spam",
"intent": "harmful",
"abusive_language_present": False,
"content_flags": ["phishing", "spam", "impersonation"],
},
},
{
"task_id": "T-002",
"difficulty": "medium",
"ambiguity_level": "medium",
"risk_level": "low",
"post_text": (
"LMAO I will literally SNIPE you the second the game starts, "
"no mercy, you're getting absolutely destroyed ๐Ÿ’€๐ŸŽฎ "
"1v1 me and I'll end your whole career bro. Your team is DONE."
),
"image_description": (
"Call of Duty: Warzone โ€” squad wipe in progress. "
"Kill notifications, sniper rifle, enemy team eliminated. Classic gaming screenshot."
),
"tool_responses": {
"read_comments": (
"47 comments: 'GG bro!', 'You always trash talk before CoD ๐Ÿ˜‚', "
"'rematch tonight??', 'your aim is mid bro'. "
"All mutual gaming friends. Clear competitive banter context."
),
"check_user_history": (
"Account 3 years active. 500+ posts, 90% gaming. "
"Zero harassment reports. Regular CoD tournament player."
),
"check_entity_status": "Private individual. Standard gaming community member.",
"view_policy": (
"Policy 7.1 โ€” Harassment: Hyperbolic language in established gaming communities "
"is NOT harassment. 'Snipe', 'destroy', 'end your career' are non-literal expressions."
),
},
"required_tools": ["read_comments"],
"ground_truth": "ALLOW",
"ground_truth_signals": {
"target": "individual",
"is_protected_class": False,
"toxicity_level": 0.30,
"is_direct_attack": False,
"context_type": "meme",
"intent": "ambiguous",
"abusive_language_present": False,
"content_flags": ["competitive_language"],
},
},
{
"task_id": "T-003",
"difficulty": "hard",
"ambiguity_level": "high",
"risk_level": "high",
"post_text": (
"Sharing this political cartoon I drew. "
"Mayor Davidson cutting the homeless shelter budget while sitting on a throne of gold coins. "
"Caption: 'Priorities.' #LocalPolitics #HoldThemAccountable #SatiricalArt #MayorDavidson"
),
"image_description": (
"Hand-drawn political cartoon. Exaggerated caricature of a public official on a coin throne, "
"homeless families pushed away. Deliberately satirical editorial style. Artist signature visible."
),
"tool_responses": {
"read_comments": (
"23 comments: 'Powerful commentary', 'Free speech!', 'Great art', 'Defamation!'. "
"No threats or coordinated harassment detected."
),
"check_user_history": (
"Account 5 years. Verified local journalist and artist. "
"Regular political commentary. No violations. Posts cited in local news."
),
"check_entity_status": (
"Mayor Davidson: CONFIRMED PUBLIC FIGURE. "
"Elected Mayor of Springfield. Active public interest in recent social services budget cuts. "
"Multiple credible news articles confirm the cuts are real and newsworthy."
),
"view_policy": (
"Policy 12.4 โ€” Political Satire: Editorial cartoons targeting PUBLIC OFFICIALS "
"on PUBLIC INTEREST matters are PROTECTED. Caricature is a recognized technique. "
"Not realistic impersonation. Correct action: ALLOW_WITH_WARNING."
),
},
"required_tools": ["check_entity_status", "view_policy"],
"ground_truth": "ALLOW_WITH_WARNING",
"ground_truth_signals": {
"target": "political",
"is_protected_class": False,
"toxicity_level": 0.20,
"is_direct_attack": False,
"context_type": "satire",
"intent": "satirical",
"abusive_language_present": False,
"content_flags": ["political_content", "public_figure"],
},
},
# โ”€โ”€ Append these to the TASKS list โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
{
"task_id": "T-004",
"difficulty": "medium",
"ambiguity_level": "medium",
"risk_level": "high",
"post_text": (
"Sharing this 'educational' video about why [ethnic group] people "
"are genetically predisposed to criminal behaviour. "
"The science doesn't lie. Share before it gets deleted. ๐Ÿ‘€ "
"#HiddenTruths #CensoredFacts #JustAsking"
),
"image_description": (
"Screenshot of a YouTube-style video thumbnail with a white lab coat figure "
"and a graph. Title: 'The Uncomfortable Biology of Crime Rates'. "
"Uploader has 0 verified credentials. Comments disabled."
),
"tool_responses": {
"read_comments": (
"Comments disabled by uploader. "
"Zero engagement from credible academic or scientific accounts."
),
"check_user_history": (
"Account 6 months old. 48 posts โ€” all promoting pseudoscientific race content. "
"Previously received 2 policy warnings for hate content. "
"No verified credentials. Frequently uses 'just asking questions' framing."
),
"check_entity_status": (
"Ethnic group mentioned: confirmed PROTECTED CLASS under platform policy. "
"Claim about genetic criminality: DEBUNKED by mainstream biology and criminology. "
"This framing is a known pseudoscience hate vector (scientific racism)."
),
"view_policy": (
"Policy 4.3 โ€” Hate Speech: Content attributing criminal or negative traits "
"to a protected class based on race or ethnicity = REMOVE. "
"Disguising hate speech as 'educational content' or 'science' does not "
"change the violation. Action: REMOVE."
),
},
"required_tools": ["check_entity_status", "view_policy"],
"ground_truth": "REMOVE",
"ground_truth_signals": {
"target": "group",
"is_protected_class": True,
"toxicity_level": 0.85,
"is_direct_attack": True,
"context_type": "statement",
"intent": "harmful",
"abusive_language_present": False,
"content_flags": ["slur", "misinformation", "hate_speech_disguised_as_education"],
},
},
{
"task_id": "T-005",
"difficulty": "hard",
"ambiguity_level": "high",
"risk_level": "high",
"post_text": (
"The Prime Minister announced new economic reforms today. "
"Opposition leaders called it 'economic warfare on the poor.' "
"Protests erupted in 3 cities. Security forces used tear gas. "
"Government says the crackdown was 'necessary to restore order.' "
"At least 12 protesters arrested. #Politics #Protest #Reform"
),
"image_description": (
"News-style post with a photo of a large protest crowd holding signs. "
"Tear gas clouds visible in background. "
"Caption: 'Thousands take to streets against new austerity measures.' "
"Shared by a verified news outlet handle. No graphic violence."
),
"tool_responses": {
"read_comments": (
"340 comments split sharply: "
"'State violence against citizens!', 'Finally someone fixing the economy', "
"'This is propaganda!', 'Biased reporting!', 'Necessary action'. "
"High engagement, deep political polarization. No incitement to violence detected."
),
"check_user_history": (
"Posted by @NationalNewsNetwork โ€” verified news organization. "
"12 years active. Consistently neutral coverage. "
"No policy violations. Registered press entity."
),
"check_entity_status": (
"Prime Minister: CONFIRMED PUBLIC FIGURE, head of government. "
"Economic reforms: CONFIRMED real policy, actively debated in parliament. "
"Protest: CONFIRMED real event, covered by 6+ independent news agencies. "
"Political sensitivity: VERY HIGH โ€” active governance dispute in a democracy."
),
"view_policy": (
"Policy 12.1 โ€” News & Political Content: Factual reporting on real government "
"actions, protests, and police responses by VERIFIED NEWS entities is PROTECTED. "
"Even politically contentious factual reporting is protected expression. "
"Suppressing legitimate news = censorship risk. "
"Action: ALLOW_WITH_WARNING (politically sensitive content label)."
),
},
"required_tools": ["check_user_history", "check_entity_status", "view_policy"],
"ground_truth": "ALLOW_WITH_WARNING",
"ground_truth_signals": {
"target": "political",
"is_protected_class": False,
"toxicity_level": 0.20,
"is_direct_attack": False,
"context_type": "news",
"intent": "descriptive",
"abusive_language_present": False,
"content_flags": ["political_content", "public_figure", "protest"],
},
},
{
"task_id": "T-006",
"difficulty": "hard",
"ambiguity_level": "high",
"risk_level": "high",
"post_text": (
"Sending Eid Mubarak wishes to my community ๐ŸŒ™โœจ "
"During this holy month let us remember those suffering in conflict zones. "
"Our brothers and sisters deserve peace and dignity. "
"May Allah guide us all. #EidMubarak #Ramadan #Peace #Solidarity"
),
"image_description": (
"Warm celebratory post with a crescent moon and stars graphic. "
"Soft gold and green colors. "
"Comment section shows replies in Arabic, Urdu, and English. "
"Emojis: praying hands, crescent moon, dove."
),
"tool_responses": {
"read_comments": (
"89 comments: 'Eid Mubarak!', 'JazakAllah Khair', "
"'prayers for peace', 'beautiful message', "
"1 comment flagged by automated system: 'why is this allowed, "
"these people support terrorism' โ€” that comment is from a third party, "
"not the original poster. "
"No incitement, no threats, no extremist content in original post."
),
"check_user_history": (
"Account 4 years active. 200+ posts โ€” community announcements, "
"religious holiday greetings, charity drives. "
"Zero prior violations. Community leader in local Muslim cultural centre."
),
"check_entity_status": (
"Islam / Muslim community: CONFIRMED PROTECTED CLASS (religion). "
"Eid al-Fitr: globally recognized Islamic religious holiday. "
"Content: religious greeting + humanitarian solidarity message. "
"No extremist symbols, no prohibited organisations mentioned."
),
"view_policy": (
"Policy 8.1 โ€” Religious Expression: Greetings and solidarity messages "
"for recognized religious holidays are PROTECTED. "
"Vague 'conflict zone' solidarity without naming a prohibited group "
"is NOT a violation. "
"Automated flag was triggered by a COMMENT, not the original post. "
"Action: ALLOW โ€” original content is clearly safe religious expression."
),
},
"required_tools": ["read_comments", "check_entity_status", "view_policy"],
"ground_truth": "ALLOW",
"ground_truth_signals": {
"target": "religion",
"is_protected_class": True,
"toxicity_level": 0.05,
"is_direct_attack": False,
"context_type": "statement",
"intent": "educational",
"abusive_language_present": False,
"content_flags": ["political_content"],
},
},
]