prompt_golf_env / server /tasks_policy.py
Don Rishabh
tasks_policy: long-context policy-compression tasks
e8ef5c3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Long-policy task bank for Prompt Golf.
These tasks exist to stress-test prompt compression on a real-world
pattern: an organization has a multi-page policy (ad standards, content
moderation, financial disclosures) and needs an LLM-based classifier
that decides whether a piece of content complies. The naive prompt is
the *entire policy verbatim* — easily 1000-2000 tokens. The trained
prompt golf agent must compress that into a tight classifier prompt
that preserves the load-bearing rules.
Why these tasks are valuable:
- Long policy text = aggressive compression target (10×+ ratio).
- Multi-class decisions test that the agent identifies the
*hierarchy* of rules, not just keywords.
- Real businesses pay for policy compression today — every byte of
system prompt costs $$ at scale.
Each task ships with:
- description: the full multi-clause policy (1000+ tokens)
- train_examples: 3 (content_description, decision) pairs
- test_examples: 6 hidden (content_description, decision) pairs
- scorer: exact_label (decision is from a closed vocabulary)
- budget_tokens: 250 (aggressive — verbose policy is ~1200 tokens)
"""
from __future__ import annotations
try:
from .tasks import TaskSpec
except ImportError:
from server.tasks import TaskSpec
TASKS_POLICY: dict[str, TaskSpec] = {}
def _add(task: TaskSpec) -> None:
TASKS_POLICY[task.task_id] = task
def list_task_ids_policy() -> list[str]:
return list(TASKS_POLICY.keys())
# ============================================================================
# 1. Ad creative policy compliance (MSN/Bing-style)
# ============================================================================
_MSN_AD_POLICY = """\
MSN AD CREATIVE POLICY (effective 2025)
This document describes the standards every ad creative must meet \
before serving on the MSN advertising network. Each submitted creative \
must be classified into exactly one of three buckets: ALLOW (compliant, \
serve as-is), DISALLOW (violates a hard prohibition, do not serve), or \
REVIEW (potentially compliant but requires manual editorial review or \
the addition of mandatory disclosures before serving).
SECTION A — HARD PROHIBITIONS (always DISALLOW)
A.1 Illegal goods or services. Any creative promoting goods or \
services that are illegal under United States federal law or in the \
country where the creative will serve is prohibited. This includes \
unlicensed pharmaceuticals, controlled substances, illegal weapons, \
counterfeit goods, and pirated content.
A.2 Tobacco, e-cigarettes, vaping products. Promotion of any tobacco \
or nicotine-delivery product is prohibited in the United States, \
European Union, Australia, and Singapore. Cessation aids prescribed by \
licensed clinicians are exempt.
A.3 Adult content. Sexually explicit material, escort services, and \
adult entertainment are prohibited.
A.4 Misleading medical claims. Any creative that claims to cure, \
prevent, or guarantee treatment outcomes for a serious medical \
condition (cancer, diabetes, HIV/AIDS) is prohibited unless backed by \
FDA approval cited in the creative.
A.5 Cryptocurrency speculation. Promotion of unregistered initial coin \
offerings, leveraged crypto trading without risk disclosure, or any \
'guaranteed returns' framing in the digital-asset category.
SECTION B — RESTRICTED CONTENT (REVIEW unless conditions met)
B.1 Alcohol. Permitted only when (a) creative includes 'Drink \
Responsibly' or jurisdiction-equivalent disclaimer, (b) campaign is \
restricted to ages 21+ in the US (18+ in countries with lower legal \
drinking age), and (c) target country permits alcohol advertising. If \
all three conditions are explicitly stated, ALLOW; otherwise REVIEW.
B.2 Gambling and games of chance. Permitted only with (a) state or \
country-level operator license number visible in the creative, and (b) \
problem-gambling helpline displayed. Both required for ALLOW.
B.3 Financial services and investment products. Stock trading, \
brokerage, and investment-management ads require an explicit risk \
disclosure clause ('investments may lose value' or equivalent). Loans \
and credit cards require APR ranges. If disclosure is present, ALLOW; \
otherwise REVIEW for editorial sign-off.
B.4 Political and issue advertising. Any creative referencing a \
political candidate, ballot measure, or contested social issue must \
have advertiser identity verified through MSN's political verification \
program. Without verification: REVIEW.
B.5 Health and wellness (non-medical). Supplements, fitness programs, \
and weight-loss products are permitted only when claims are factual and \
do not promise specific outcomes ('lose 20 lbs guaranteed' is \
prohibited; 'supports healthy weight management' is permitted).
SECTION C — FORMAT AND PRESENTATION STANDARDS
C.1 Creative must clearly identify the advertiser. Anonymous or \
deceptively branded creatives are REVIEW.
C.2 No fake-system-UI elements. Creatives that imitate browser \
warnings, OS notifications, or user-interface chrome to induce clicks \
are DISALLOW.
C.3 No autoplay audio with sound on by default.
C.4 All claims of superiority ('#1', 'best in class') must be \
independently verifiable; otherwise REVIEW.
DECISION FORMAT
For each input creative description, output exactly one label, \
lowercase, no punctuation, no explanation: 'allow', 'disallow', or \
'review'. The label must be on the first line of the output and \
nothing else.\
"""
_add(TaskSpec(
task_id="policy_msn_ad_creative",
category="policy_compression",
description=_MSN_AD_POLICY,
scorer="exact_label",
train_examples=[
("Banner ad for Heineken beer, copy includes 'Drink Responsibly', "
"campaign restricted to ages 21+, serving in the United States.",
"allow"),
("Banner ad promoting an unregistered ICO promising 'guaranteed "
"300% returns in 90 days'.",
"disallow"),
("Banner ad for Robinhood stock trading app, copy mentions "
"'commission-free trading' but no risk disclosure visible.",
"review"),
],
test_examples=[
("Banner ad for a vape pen brand, campaign serving in the US.",
"disallow"),
("Native ad for an online casino, includes operator license "
"number 'NV-2189' and 1-800-GAMBLER helpline.",
"allow"),
("Display ad for Nike running shoes, no special claims.",
"allow"),
("Sponsored post for a weight-loss supplement, copy says 'lose 20 "
"pounds in 30 days, guaranteed!'.",
"disallow"),
("Banner ad endorsing Senator Park's re-election campaign, no "
"advertiser-verification badge present.",
"review"),
("Display ad for Wells Fargo personal loans, copy mentions APR "
"range '6.99%-19.99%' clearly.",
"allow"),
],
budget_tokens=250,
difficulty="hard",
tags=["policy", "compression", "ad-tech", "long-context"],
))
# ============================================================================
# 2. Content moderation policy (social-platform style)
# ============================================================================
_CONTENT_MOD_POLICY = """\
COMMUNITY CONTENT POLICY (v3.2)
Every user-submitted post on this platform is reviewed against the \
following rules. Each post is classified into exactly one of four \
buckets: KEEP (post complies, do not act), WARN (post is borderline; \
attach a sensitive-content interstitial but leave it visible), REMOVE \
(post violates a hard rule; delete it and notify the author), or \
ESCALATE (post may involve real-world harm or legal risk; route to a \
human reviewer immediately).
PART 1 — HARD REMOVAL CRITERIA (always REMOVE)
1.1 Doxxing. Posts that publish another user's private contact \
information (home address, personal phone, employer in a hostile \
context) without that user's consent.
1.2 CSAM and child exploitation. Any sexualized content involving \
minors, or content that grooms, recruits, or trafficks minors. These \
posts also trigger ESCALATE simultaneously.
1.3 Direct credible threats of violence. Posts that name a specific \
person, place, or scheduled event AND describe an intended violent act.
1.4 Coordinated harassment. Posts directing multiple users to attack a \
named target (pile-on, brigade, raid).
1.5 Illegal goods marketplace. Selling, buying, or arranging the \
exchange of regulated firearms without a license, controlled substances \
without a prescription, or services rendered for criminal purposes.
PART 2 — ESCALATE TO HUMAN REVIEWER (always ESCALATE)
2.1 First-person suicidal ideation or imminent self-harm. Route to \
crisis-resource workflow within 15 minutes.
2.2 Reports of ongoing real-world emergencies (active shooter, \
kidnapping in progress, hostage situation) — route to law enforcement \
liaison.
2.3 Coordinated influence operations or election-interference patterns \
flagged by the platform-integrity model.
2.4 Legal compliance gray zones (DMCA, GDPR right-to-erasure, court \
preservation orders).
PART 3 — WARN (interstitial but keep)
3.1 Graphic but newsworthy content. Photojournalism from conflict \
zones, accident scenes, or medical procedures shown for educational \
purposes. Apply sensitive-content overlay; allow click-through.
3.2 Suicide / self-harm references in the third person, news, fiction, \
or recovery contexts (NOT first-person ideation — that's ESCALATE).
3.3 Extremely strong language directed at a public figure (politicians, \
celebrities) but stopping short of credible threat or doxxing.
3.4 Adult nudity in clearly artistic, educational, or medical contexts \
(NOT pornographic).
PART 4 — KEEP DEFAULTS
Posts that don't trigger Part 1, 2, or 3 are KEEP. This includes \
ordinary disagreement, strong political opinion, profanity not directed \
at a specific user, satire and parody, and historical or scientific \
discussion of sensitive topics.
OUTPUT FORMAT
For each post description, output exactly one label, lowercase, no \
punctuation, no explanation: 'keep', 'warn', 'remove', or 'escalate'. \
First line only.\
"""
_add(TaskSpec(
task_id="policy_content_moderation",
category="policy_compression",
description=_CONTENT_MOD_POLICY,
scorer="exact_label",
train_examples=[
("Post that names a specific journalist by full name and posts "
"their home address, calling for 'accountability'.",
"remove"),
("News photograph from a conflict zone showing wounded civilians, "
"captioned with factual reporting context.",
"warn"),
("Post containing strong profanity directed at a politician's "
"policy stance, no doxxing or threats.",
"keep"),
],
test_examples=[
("First-person post saying 'I have a plan and pills, tonight is "
"the night' with no recovery framing.",
"escalate"),
("Satirical post mocking a CEO's recent quarterly earnings call, "
"uses sarcasm but no slurs or threats.",
"keep"),
("Post offering to sell an unregistered AR-15 to whoever sends "
"the highest bid via DM.",
"remove"),
("Documentary-style post about historical lynchings in the 20th "
"century US, includes one period photograph.",
"warn"),
("Post coordinating dozens of accounts to flood a single user's "
"mentions with insults over a public dispute.",
"remove"),
("Post sharing a recipe for homemade lemon cake with no other "
"context.",
"keep"),
],
budget_tokens=250,
difficulty="hard",
tags=["policy", "compression", "moderation", "long-context"],
))
# ============================================================================
# 3. Financial-disclosure compliance (broker/dealer style)
# ============================================================================
_FINREG_POLICY = """\
FINRA-STYLE COMMUNICATION REVIEW POLICY (excerpted, simplified for \
classifier training)
A registered representative or firm-affiliated person at a broker-dealer \
must have all written communications with retail customers reviewed \
before they are sent. Each communication is classified into one of four \
buckets: APPROVED (compliant, send), HOLD (return to author for \
correction of a fixable issue), REJECT (substantive violation, do not \
send under any revision), or ESCALATE (requires compliance officer \
sign-off because of complexity, novel product, or potential customer \
harm).
PART I — REJECT CATEGORIES (always REJECT)
1.1 Performance guarantees. Any statement that the customer's \
investment 'cannot lose' or will 'guarantee' a specific return over a \
specific time horizon. Phrases like 'risk-free' applied to anything \
other than US Treasuries are REJECT.
1.2 Selective performance highlighting (cherry-picking). Citing only \
the best historical years of a fund without stating the comparable \
benchmark return AND the worst year in the same period.
1.3 Promissory language about future appreciation. 'Will' double, \
'must' rise, 'has to' recover.
1.4 Unsuitable product recommendations to vulnerable customers. \
Recommending leveraged ETFs, options strategies, or crypto futures to \
customers under 25 or over 75, or with stated 'capital preservation' \
objectives, in a public-facing communication.
PART II — HOLD (revisable)
2.1 Missing standard disclosures. Mutual fund pitches missing the \
'past performance does not guarantee future results' line, or annuity \
pitches missing the surrender-charge schedule reference.
2.2 Missing FINRA Rule 2210 footer (firm name, registration status, \
and contact info).
2.3 Fee disclosures present but not 'clear and prominent' (font size \
or contrast inadequate).
PART III — ESCALATE (compliance officer required)
3.1 New-issue securities communications.
3.2 Communications mentioning private placements, structured products, \
or non-traded REITs.
3.3 References to options strategies more complex than basic covered \
calls.
3.4 Communications targeting accounts opened in the past 90 days \
(new-customer suitability review).
PART IV — APPROVED DEFAULTS
Communications that include all required disclosures, do not promise \
returns, do not cherry-pick performance data, and concern only \
plain-vanilla products (broad-market index funds, money market funds, \
US Treasuries) for customers in the firm's standard suitability bands. \
APPROVED.
OUTPUT FORMAT
Output exactly one lowercase label, no punctuation, no explanation: \
'approved', 'hold', 'reject', or 'escalate'. First line only.\
"""
_add(TaskSpec(
task_id="policy_finreg_communication_review",
category="policy_compression",
description=_FINREG_POLICY,
scorer="exact_label",
train_examples=[
("Email pitch promising clients that the firm's growth fund "
"'will deliver double-digit returns over the next decade'.",
"reject"),
("Brochure for an S&P 500 index fund, includes the standard "
"'past performance does not guarantee future results' "
"disclosure and FINRA footer.",
"approved"),
("Newsletter about a non-traded REIT being added to the firm's "
"platform, includes no obvious violations but is a new "
"structured-product offering.",
"escalate"),
],
test_examples=[
("Mailer for a money-market fund, omits the 'past performance' "
"line but otherwise compliant; FINRA footer present.",
"hold"),
("Email to a 78-year-old retiree with stated 'capital "
"preservation' goal, recommending a 3x leveraged tech ETF.",
"reject"),
("Email pitching a new private placement to a sophisticated "
"investor, with all standard risk disclosures present.",
"escalate"),
("Quarterly note about US Treasury yields, no recommendations, "
"all firm disclosures present.",
"approved"),
("Brochure highlighting only the fund's three best years out of "
"ten, omitting the worst year and benchmark return.",
"reject"),
("Email pitching a covered-call strategy to a customer who "
"opened the account 30 days ago.",
"escalate"),
],
budget_tokens=250,
difficulty="hard",
tags=["policy", "compression", "finance", "long-context"],
))
if __name__ == "__main__":
from collections import Counter
print(f"tasks_policy: {len(TASKS_POLICY)} tasks")
for tid, spec in TASKS_POLICY.items():
desc_words = len(spec.description.split())
print(f" {tid:42s} category={spec.category:22s} "
f"budget={spec.budget_tokens} desc_words≈{desc_words}")