Spaces:

Jeromerich
/

openenv

Configuration error

App Files Files Community

openenv / tasks.py

jeromerichard

Trust & Safety RL Environment - OpenEnv Hackathon

74e3b5e 11 days ago

raw

history blame contribute delete

14.3 kB

	TASKS = [
	{
	"task_id": "T-001",
	"difficulty": "easy",
	"ambiguity_level": "low",
	"risk_level": "low",
	"post_text": (
	"🚨 URGENT: Your account has been COMPROMISED! "
	"Click here immediately: http://meta-security-verify-now.xyz/login?token=abc123 "
	"Act within 24 hours or your account will be PERMANENTLY BANNED. — Meta Security Team"
	),
	"image_description": (
	"Poorly formatted alert page with mismatched Meta logos, "
	"red urgent warning banners, and a suspicious external URL."
	),
	"tool_responses": {
	"read_comments": "Comments disabled. Zero engagement.",
	"check_user_history": "Account created 2 days ago. Zero posts. Flagged by spam filter 3×.",
	"check_entity_status": "Domain 'meta-security-verify-now.xyz' registered 1 day ago — confirmed phishing domain.",
	"view_policy": "Policy 3.2 — Phishing: Impersonating Meta to harvest credentials = immediate REMOVE.",
	},
	"required_tools": [],
	"ground_truth": "REMOVE",
	"ground_truth_signals": {
	"target": "individual",
	"is_protected_class": False,
	"toxicity_level": 0.90,
	"is_direct_attack": True,
	"context_type": "spam",
	"intent": "harmful",
	"abusive_language_present": False,
	"content_flags": ["phishing", "spam", "impersonation"],
	},
	},
	{
	"task_id": "T-002",
	"difficulty": "medium",
	"ambiguity_level": "medium",
	"risk_level": "low",
	"post_text": (
	"LMAO I will literally SNIPE you the second the game starts, "
	"no mercy, you're getting absolutely destroyed 💀🎮 "
	"1v1 me and I'll end your whole career bro. Your team is DONE."
	),
	"image_description": (
	"Call of Duty: Warzone — squad wipe in progress. "
	"Kill notifications, sniper rifle, enemy team eliminated. Classic gaming screenshot."
	),
	"tool_responses": {
	"read_comments": (
	"47 comments: 'GG bro!', 'You always trash talk before CoD 😂', "
	"'rematch tonight??', 'your aim is mid bro'. "
	"All mutual gaming friends. Clear competitive banter context."
	),
	"check_user_history": (
	"Account 3 years active. 500+ posts, 90% gaming. "
	"Zero harassment reports. Regular CoD tournament player."
	),
	"check_entity_status": "Private individual. Standard gaming community member.",
	"view_policy": (
	"Policy 7.1 — Harassment: Hyperbolic language in established gaming communities "
	"is NOT harassment. 'Snipe', 'destroy', 'end your career' are non-literal expressions."
	),
	},
	"required_tools": ["read_comments"],
	"ground_truth": "ALLOW",
	"ground_truth_signals": {
	"target": "individual",
	"is_protected_class": False,
	"toxicity_level": 0.30,
	"is_direct_attack": False,
	"context_type": "meme",
	"intent": "ambiguous",
	"abusive_language_present": False,
	"content_flags": ["competitive_language"],
	},
	},
	{
	"task_id": "T-003",
	"difficulty": "hard",
	"ambiguity_level": "high",
	"risk_level": "high",
	"post_text": (
	"Sharing this political cartoon I drew. "
	"Mayor Davidson cutting the homeless shelter budget while sitting on a throne of gold coins. "
	"Caption: 'Priorities.' #LocalPolitics #HoldThemAccountable #SatiricalArt #MayorDavidson"
	),
	"image_description": (
	"Hand-drawn political cartoon. Exaggerated caricature of a public official on a coin throne, "
	"homeless families pushed away. Deliberately satirical editorial style. Artist signature visible."
	),
	"tool_responses": {
	"read_comments": (
	"23 comments: 'Powerful commentary', 'Free speech!', 'Great art', 'Defamation!'. "
	"No threats or coordinated harassment detected."
	),
	"check_user_history": (
	"Account 5 years. Verified local journalist and artist. "
	"Regular political commentary. No violations. Posts cited in local news."
	),
	"check_entity_status": (
	"Mayor Davidson: CONFIRMED PUBLIC FIGURE. "
	"Elected Mayor of Springfield. Active public interest in recent social services budget cuts. "
	"Multiple credible news articles confirm the cuts are real and newsworthy."
	),
	"view_policy": (
	"Policy 12.4 — Political Satire: Editorial cartoons targeting PUBLIC OFFICIALS "
	"on PUBLIC INTEREST matters are PROTECTED. Caricature is a recognized technique. "
	"Not realistic impersonation. Correct action: ALLOW_WITH_WARNING."
	),
	},
	"required_tools": ["check_entity_status", "view_policy"],
	"ground_truth": "ALLOW_WITH_WARNING",
	"ground_truth_signals": {
	"target": "political",
	"is_protected_class": False,
	"toxicity_level": 0.20,
	"is_direct_attack": False,
	"context_type": "satire",
	"intent": "satirical",
	"abusive_language_present": False,
	"content_flags": ["political_content", "public_figure"],
	},
	},
	# ── Append these to the TASKS list ──────────────────────────────────────────

	{
	"task_id": "T-004",
	"difficulty": "medium",
	"ambiguity_level": "medium",
	"risk_level": "high",
	"post_text": (
	"Sharing this 'educational' video about why [ethnic group] people "
	"are genetically predisposed to criminal behaviour. "
	"The science doesn't lie. Share before it gets deleted. 👀 "
	"#HiddenTruths #CensoredFacts #JustAsking"
	),
	"image_description": (
	"Screenshot of a YouTube-style video thumbnail with a white lab coat figure "
	"and a graph. Title: 'The Uncomfortable Biology of Crime Rates'. "
	"Uploader has 0 verified credentials. Comments disabled."
	),
	"tool_responses": {
	"read_comments": (
	"Comments disabled by uploader. "
	"Zero engagement from credible academic or scientific accounts."
	),
	"check_user_history": (
	"Account 6 months old. 48 posts — all promoting pseudoscientific race content. "
	"Previously received 2 policy warnings for hate content. "
	"No verified credentials. Frequently uses 'just asking questions' framing."
	),
	"check_entity_status": (
	"Ethnic group mentioned: confirmed PROTECTED CLASS under platform policy. "
	"Claim about genetic criminality: DEBUNKED by mainstream biology and criminology. "
	"This framing is a known pseudoscience hate vector (scientific racism)."
	),
	"view_policy": (
	"Policy 4.3 — Hate Speech: Content attributing criminal or negative traits "
	"to a protected class based on race or ethnicity = REMOVE. "
	"Disguising hate speech as 'educational content' or 'science' does not "
	"change the violation. Action: REMOVE."
	),
	},
	"required_tools": ["check_entity_status", "view_policy"],
	"ground_truth": "REMOVE",
	"ground_truth_signals": {
	"target": "group",
	"is_protected_class": True,
	"toxicity_level": 0.85,
	"is_direct_attack": True,
	"context_type": "statement",
	"intent": "harmful",
	"abusive_language_present": False,
	"content_flags": ["slur", "misinformation", "hate_speech_disguised_as_education"],
	},
	},

	{
	"task_id": "T-005",
	"difficulty": "hard",
	"ambiguity_level": "high",
	"risk_level": "high",
	"post_text": (
	"The Prime Minister announced new economic reforms today. "
	"Opposition leaders called it 'economic warfare on the poor.' "
	"Protests erupted in 3 cities. Security forces used tear gas. "
	"Government says the crackdown was 'necessary to restore order.' "
	"At least 12 protesters arrested. #Politics #Protest #Reform"
	),
	"image_description": (
	"News-style post with a photo of a large protest crowd holding signs. "
	"Tear gas clouds visible in background. "
	"Caption: 'Thousands take to streets against new austerity measures.' "
	"Shared by a verified news outlet handle. No graphic violence."
	),
	"tool_responses": {
	"read_comments": (
	"340 comments split sharply: "
	"'State violence against citizens!', 'Finally someone fixing the economy', "
	"'This is propaganda!', 'Biased reporting!', 'Necessary action'. "
	"High engagement, deep political polarization. No incitement to violence detected."
	),
	"check_user_history": (
	"Posted by @NationalNewsNetwork — verified news organization. "
	"12 years active. Consistently neutral coverage. "
	"No policy violations. Registered press entity."
	),
	"check_entity_status": (
	"Prime Minister: CONFIRMED PUBLIC FIGURE, head of government. "
	"Economic reforms: CONFIRMED real policy, actively debated in parliament. "
	"Protest: CONFIRMED real event, covered by 6+ independent news agencies. "
	"Political sensitivity: VERY HIGH — active governance dispute in a democracy."
	),
	"view_policy": (
	"Policy 12.1 — News & Political Content: Factual reporting on real government "
	"actions, protests, and police responses by VERIFIED NEWS entities is PROTECTED. "
	"Even politically contentious factual reporting is protected expression. "
	"Suppressing legitimate news = censorship risk. "
	"Action: ALLOW_WITH_WARNING (politically sensitive content label)."
	),
	},
	"required_tools": ["check_user_history", "check_entity_status", "view_policy"],
	"ground_truth": "ALLOW_WITH_WARNING",
	"ground_truth_signals": {
	"target": "political",
	"is_protected_class": False,
	"toxicity_level": 0.20,
	"is_direct_attack": False,
	"context_type": "news",
	"intent": "descriptive",
	"abusive_language_present": False,
	"content_flags": ["political_content", "public_figure", "protest"],
	},
	},

	{
	"task_id": "T-006",
	"difficulty": "hard",
	"ambiguity_level": "high",
	"risk_level": "high",
	"post_text": (
	"Sending Eid Mubarak wishes to my community 🌙✨ "
	"During this holy month let us remember those suffering in conflict zones. "
	"Our brothers and sisters deserve peace and dignity. "
	"May Allah guide us all. #EidMubarak #Ramadan #Peace #Solidarity"
	),
	"image_description": (
	"Warm celebratory post with a crescent moon and stars graphic. "
	"Soft gold and green colors. "
	"Comment section shows replies in Arabic, Urdu, and English. "
	"Emojis: praying hands, crescent moon, dove."
	),
	"tool_responses": {
	"read_comments": (
	"89 comments: 'Eid Mubarak!', 'JazakAllah Khair', "
	"'prayers for peace', 'beautiful message', "
	"1 comment flagged by automated system: 'why is this allowed, "
	"these people support terrorism' — that comment is from a third party, "
	"not the original poster. "
	"No incitement, no threats, no extremist content in original post."
	),
	"check_user_history": (
	"Account 4 years active. 200+ posts — community announcements, "
	"religious holiday greetings, charity drives. "
	"Zero prior violations. Community leader in local Muslim cultural centre."
	),
	"check_entity_status": (
	"Islam / Muslim community: CONFIRMED PROTECTED CLASS (religion). "
	"Eid al-Fitr: globally recognized Islamic religious holiday. "
	"Content: religious greeting + humanitarian solidarity message. "
	"No extremist symbols, no prohibited organisations mentioned."
	),
	"view_policy": (
	"Policy 8.1 — Religious Expression: Greetings and solidarity messages "
	"for recognized religious holidays are PROTECTED. "
	"Vague 'conflict zone' solidarity without naming a prohibited group "
	"is NOT a violation. "
	"Automated flag was triggered by a COMMENT, not the original post. "
	"Action: ALLOW — original content is clearly safe religious expression."
	),
	},
	"required_tools": ["read_comments", "check_entity_status", "view_policy"],
	"ground_truth": "ALLOW",
	"ground_truth_signals": {
	"target": "religion",
	"is_protected_class": True,
	"toxicity_level": 0.05,
	"is_direct_attack": False,
	"context_type": "statement",
	"intent": "educational",
	"abusive_language_present": False,
	"content_flags": ["political_content"],
	},
	},
	]