Spaces:

ycwhencpp
/

final-iteration

Paused

App Files Files Community

anuragredbus commited on 12 days ago

Commit

1a2a407

1 Parent(s): f0a8734

added more scenaiors

Browse files

Files changed (7) hide show

__init__.py +2 -0
eval_env.py +295 -0
models.py +30 -0
server/app.py +126 -13
server/data/audience_overlap_matrix.json +24 -3
server/viraltest_environment.py +448 -24
test_scenarios.py +169 -22

__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@
 from .client import ViraltestEnv
 from .models import (
     CollabProposal,
     EngagementSignals,
     ScheduledAction,
     ToolCall,
@@ -19,6 +20,7 @@ from .models import (
 __all__ = [
     "CollabProposal",
     "EngagementSignals",
     "ScheduledAction",
     "ToolCall",

 from .client import ViraltestEnv
 from .models import (
     CollabProposal,
+    DailyInteractions,
     EngagementSignals,
     ScheduledAction,
     ToolCall,
 __all__ = [
     "CollabProposal",
+    "DailyInteractions",
     "EngagementSignals",
     "ScheduledAction",
     "ToolCall",

eval_env.py ADDED Viewed

	@@ -0,0 +1,295 @@

+"""
+End-to-end evaluation of the viraltest environment after the collab + interaction expansion.
+Sections
+--------
+A) Collab tier diagnostics
+   - Per-tier expected multipliers from `_collab_evaluation`
+   - Episode runs with varying collab cadence (1, 5, 15 collabs/episode) to show that
+     the score spread between tiers GROWS with cadence, proving the multiplier is doing
+     real work and the small diffs in the 2-collab test are just dilution.
+B) Interaction diagnostics
+   - Each penalty path (spam, ignoring_own, off_niche, low_quality, energy_drain) fires
+     the expected violation.
+   - Healthy band lifts reach_modifier > 1.0.
+C) Cross-cutting sanity
+   - Every scenario completes without errors, energy non-negative, judge_report present.
+Run: .venv/bin/python eval_env.py
+"""
+from typing import Any, Dict, List, Optional
+from models import (
+    CollabProposal,
+    DailyInteractions,
+    ScheduledAction,
+    ViraltestAction,
+)
+from server.viraltest_environment import ViraltestEnvironment
+SEED = 42
+HORIZON = 15  # TASK_HORIZON in the env
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _post_only(content_type: str = "reel", topic: str = "AI tools",
+                tags: Optional[List[str]] = None, intent: str = "watch_bait") -> ScheduledAction:
+    return ScheduledAction(
+        hour=12, action_type="post", content_type=content_type,
+        topic=topic, tags=tags or ["ai"], intent=intent,
+    )
+def _run_episode(
+    plan_fn,
+    user_niche: Optional[str] = None,
+    task: str = "monthly_competitive",
+) -> Dict[str, Any]:
+    env = ViraltestEnvironment()
+    reset_kwargs: Dict[str, Any] = {"task": task, "seed": SEED}
+    if user_niche:
+        reset_kwargs["user_niche"] = user_niche
+    obs = env.reset(**reset_kwargs)
+    obs_dict = obs.model_dump()
+    last_obs = obs
+    judge_violations_total: List[str] = []
+    interaction_violations_total: List[str] = []
+    min_energy = 1.0
+    for day in range(1, HORIZON + 2):
+        action = plan_fn(obs_dict, day)
+        obs = env.step(action)
+        obs_dict = obs.model_dump()
+        last_obs = obs
+        min_energy = min(min_energy, obs.creator_energy)
+        if obs.judge_report:
+            judge_violations_total.extend(obs.judge_report.violations)
+        if obs.interaction_metrics:
+            interaction_violations_total.extend(obs.interaction_metrics.get("violations", []) or [])
+        if obs.done:
+            break
+    score = (last_obs.metadata or {}).get("grader_score", 0.0)
+    return {
+        "score": float(score),
+        "followers": int(last_obs.follower_count),
+        "min_energy": float(min_energy),
+        "energy": float(last_obs.creator_energy),
+        "engagement_rate": float(last_obs.engagement_rate),
+        "judge_violations": judge_violations_total,
+        "interaction_violations": interaction_violations_total,
+        "error": last_obs.error,
+        "done": last_obs.done,
+    }
+# ---------------------------------------------------------------------------
+# A) COLLAB TIER DIAGNOSTICS
+# ---------------------------------------------------------------------------
+def section_a_collab_evaluator() -> None:
+    print("=" * 78)
+    print("A1. _collab_evaluation snapshot (user_niche=tech)")
+    print("=" * 78)
+    env = ViraltestEnvironment()
+    env.reset(task="monthly_competitive", seed=SEED, user_niche="tech")
+    fmt = "{:<22} {:>5} {:>7} {:>5} {:>5} {:>10} {:>10} {:<28}"
+    print(fmt.format("partner", "same?", "overlap", "fol", "gap%", "eng_mult", "growth", "reason/recommended"))
+    print("-" * 105)
+    for pid in [
+        "niche_expert", "viral_chaser", "lifestyle_blogger", "b2b_thought_leader",
+        "food_creator", "fitness_coach", "travel_creator",
+    ]:
+        ev = env._collab_evaluation(pid)
+        rec_str = f"OK" if ev["recommended"] else f"BLOCK:{ev['reason']}"
+        print(fmt.format(
+            pid,
+            "Y" if ev["same_niche"] else "N",
+            f"{ev['overlap']:.2f}",
+            ev["partner_followers"],
+            f"{ev['follower_gap_pct']*100:.0f}%",
+            f"{ev['eng_mult']:.3f}",
+            f"{ev['growth_mult']:.3f}",
+            rec_str,
+        ))
+    print()
+def make_collab_plan(partner_id: str, collab_days: List[int]):
+    """Daily plan: single post + collab proposed on collab_days."""
+    def plan(obs: Dict[str, Any], day: int) -> ViraltestAction:
+        actions = [_post_only()]
+        collab = None
+        if day in collab_days:
+            collab = CollabProposal(partner_id=partner_id, content_type="reel", hour=12)
+        return ViraltestAction(scheduled_actions=actions, collab=collab)
+    return plan
+def section_a_collab_cadence() -> None:
+    print("=" * 78)
+    print("A2. Score spread vs collab cadence (1, 5, 15 collabs in 15-day horizon)")
+    print("    Hypothesis: more collab days -> larger gap between tiers")
+    print("=" * 78)
+    # Map each tier to (partner_id, user_niche) — chosen so the partner clears the
+    # follower-size guardrail (peer-tier mocked followers in the data file).
+    tiers = [
+        ("Same-Niche Low",  "niche_expert",       "tech"),
+        ("Same-Niche High", "viral_chaser",       "lifestyle"),  # overlap=0.55 (high)
+        ("Diff-Niche Low",  "food_creator",       "tech"),       # overlap=0.25 (mid-low)
+        ("Diff-Niche High", "lifestyle_blogger",  "tech"),       # overlap=0.40 (boundary high)
+        ("Guardrail Block", "b2b_thought_leader", "tech"),       # overlap=0.08 (<10%)
+    ]
+    cadences = {
+        "1 collab":  [5],
+        "5 collabs": [3, 5, 7, 9, 11],
+        "15 collabs": list(range(1, 16)),
+    }
+    fmt = "{:<22} {:>10} {:>10} {:>10}"
+    print(fmt.format("Tier", *cadences.keys()))
+    print("-" * 56)
+    for label, partner_id, user_niche in tiers:
+        scores = []
+        for cad_label, days in cadences.items():
+            r = _run_episode(make_collab_plan(partner_id, days), user_niche=user_niche)
+            scores.append(f"{r['score']:.4f}")
+        print(fmt.format(label, *scores))
+    print()
+    print("    -> Same-Niche Low score should DROP slowly as you add collabs.")
+    print("    -> Same-Niche High and Diff-Niche High should DROP quickly (penalty stacks).")
+    print("    -> Spread between top and bottom should GROW with cadence.")
+    print()
+# ---------------------------------------------------------------------------
+# B) INTERACTION DIAGNOSTICS
+# ---------------------------------------------------------------------------
+def make_interaction_plan(interactions: DailyInteractions):
+    def plan(obs: Dict[str, Any], day: int) -> ViraltestAction:
+        return ViraltestAction(scheduled_actions=[_post_only()], interactions=interactions)
+    return plan
+def section_b_interactions() -> None:
+    print("=" * 78)
+    print("B. Interaction penalty-path matrix")
+    print("=" * 78)
+    cases = [
+        ("healthy", DailyInteractions(
+            likes_on_others=12, comments_on_others=5, replies_to_audience=3,
+            target_partner_ids=["niche_expert"], avg_reply_quality=0.8,
+        ), "interaction_*", False),
+        ("spam", DailyInteractions(
+            likes_on_others=80, comments_on_others=40, replies_to_audience=0,
+            target_partner_ids=["niche_expert"], avg_reply_quality=0.4,
+        ), "interaction_spam", True),
+        ("ignoring_own", DailyInteractions(
+            likes_on_others=8, comments_on_others=4, replies_to_audience=0,
+            target_partner_ids=["niche_expert"], avg_reply_quality=0.6,
+        ), "interaction_ignoring_own", True),
+        ("off_niche", DailyInteractions(
+            likes_on_others=10, comments_on_others=5, replies_to_audience=2,
+            target_partner_ids=["food_creator", "fitness_coach", "travel_creator", "lifestyle_blogger"],
+            avg_reply_quality=0.7,
+        ), "interaction_off_niche", True),
+        ("low_quality", DailyInteractions(
+            likes_on_others=10, comments_on_others=5, replies_to_audience=8,
+            target_partner_ids=["niche_expert"], avg_reply_quality=0.05,
+        ), "interaction_low_quality", True),
+        ("energy_drain", DailyInteractions(
+            likes_on_others=200, comments_on_others=100, replies_to_audience=100,
+            target_partner_ids=["niche_expert"], avg_reply_quality=0.5,
+        ), "interaction_energy_drain", True),
+    ]
+    fmt = "{:<14} {:>7} {:>9} {:>10} {:>10} {:>11} {:<12}"
+    print(fmt.format("case", "score", "followers", "min_energy", "engRate", "violations", "expect"))
+    print("-" * 80)
+    for label, interactions, expected_violation, must_fire in cases:
+        r = _run_episode(make_interaction_plan(interactions), user_niche="tech")
+        viols = r["interaction_violations"]
+        fired = any(expected_violation.replace("interaction_", "") in v for v in viols)
+        ok = "OK" if (fired == must_fire) else "FAIL"
+        # For "healthy" we expect NO interaction violations.
+        if label == "healthy":
+            ok = "OK" if not viols else "FAIL"
+        print(fmt.format(
+            label,
+            f"{r['score']:.3f}",
+            r["followers"],
+            f"{r['min_energy']:.2f}",
+            f"{r['engagement_rate']:.3f}",
+            len(viols),
+            ok,
+        ))
+    print()
+# ---------------------------------------------------------------------------
+# C) CROSS-CUTTING SANITY
+# ---------------------------------------------------------------------------
+def section_c_sanity() -> None:
+    print("=" * 78)
+    print("C. Cross-cutting sanity (rest, post-only, smart, query_interaction_norms)")
+    print("=" * 78)
+    # Baselines for visual sanity
+    def plan_rest(obs: Dict[str, Any], day: int) -> ViraltestAction:
+        return ViraltestAction(scheduled_actions=[])
+    def plan_post1(obs: Dict[str, Any], day: int) -> ViraltestAction:
+        return ViraltestAction(scheduled_actions=[_post_only()])
+    def plan_post2(obs: Dict[str, Any], day: int) -> ViraltestAction:
+        return ViraltestAction(scheduled_actions=[
+            _post_only(content_type="reel", topic="AI tools"),
+            ScheduledAction(hour=19, action_type="post", content_type="carousel",
+                             topic="AI tools", tags=["coding"], intent="save_bait"),
+        ])
+    fmt = "{:<14} {:>7} {:>9} {:>8} {:>8} {:>6}"
+    print(fmt.format("baseline", "score", "followers", "energy", "engRate", "errs"))
+    print("-" * 60)
+    for label, plan_fn in [("rest", plan_rest), ("1-post", plan_post1), ("2-post", plan_post2)]:
+        r = _run_episode(plan_fn, user_niche="tech")
+        errs = "0" if not r["error"] else r["error"][:12]
+        print(fmt.format(label, f"{r['score']:.3f}", r["followers"],
+                          f"{r['energy']:.2f}", f"{r['engagement_rate']:.3f}", errs))
+    print()
+    # Verify query_interaction_norms surfaces sensible values.
+    env = ViraltestEnvironment()
+    env.reset(task="monthly_engage", seed=SEED, user_niche="tech")
+    from models import ToolCall
+    res = env._dispatch_tool(ToolCall(name="query_interaction_norms", arguments={}))
+    print("query_interaction_norms tool ->")
+    print(f"  success={res.success}, data={res.data}")
+    print()
+    # Verify query_creator_pool returns the recommendation surface.
+    res = env._dispatch_tool(ToolCall(name="query_creator_pool", arguments={}))
+    print("query_creator_pool tool ->")
+    print(f"  user_niche={res.data['user_niche']}, user_followers={res.data['user_followers']}")
+    for p in res.data["pool"]:
+        print(f"  {p['id']:<22} same_niche={p['same_niche']!s:<5} overlap={p['audience_overlap']:>4} "
+              f"recommended={p['recommended']!s:<5} reason={p['reason']}")
+    print()
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    section_a_collab_evaluator()
+    section_a_collab_cadence()
+    section_b_interactions()
+    section_c_sanity()
+    print("Evaluation complete.")

models.py CHANGED Viewed

@@ -64,6 +64,28 @@ class CollabProposal(BaseModel):
     hour: int = Field(default=12, ge=0, le=23)
 class ViraltestAction(Action):
     """Daily plan: tool calls for discovery, then scheduled actions to commit."""
@@ -79,6 +101,10 @@ class ViraltestAction(Action):
         default=None,
         description="Optional collaboration proposal (max 2 per month)",
     )
     notes: Optional[str] = Field(
         default=None,
         max_length=2000,
@@ -191,6 +217,10 @@ class ViraltestObservation(Observation):
     tool_results: List[ToolResult] = Field(default_factory=list, description="Results from tool_calls this step")
     agent_notes: Optional[str] = Field(default=None, description="Echo of agent's notes from previous step")
     api_budget_remaining: int = Field(default=100, ge=0)
     grader_score: Optional[float] = Field(default=None)
     error: Optional[str] = Field(default=None)

     hour: int = Field(default=12, ge=0, le=23)
+class DailyInteractions(BaseModel):
+    """Daily aggregate of creator interactions: likes, comments on others' content, and replies to own audience.
+    Models the comment/like/reply economy. Healthy interaction in moderation rewards reach;
+    spam, audience-ignoring, off-niche, and low-quality patterns are penalized.
+    """
+    likes_on_others: int = Field(default=0, ge=0, le=200, description="Likes given on other creators' posts today")
+    comments_on_others: int = Field(default=0, ge=0, le=100, description="Comments left on other creators' posts today")
+    replies_to_audience: int = Field(default=0, ge=0, le=100, description="Replies to incoming comments on your own posts")
+    target_partner_ids: List[str] = Field(
+        default_factory=list,
+        description="Competitor archetype ids you interacted with today (used for off-niche detection)",
+    )
+    avg_reply_quality: float = Field(
+        default=0.6,
+        ge=0.0,
+        le=1.0,
+        description="Self-rated effort/depth of replies (0=one-word, 1=substantive)",
+    )
 class ViraltestAction(Action):
     """Daily plan: tool calls for discovery, then scheduled actions to commit."""
         default=None,
         description="Optional collaboration proposal (max 2 per month)",
     )
+    interactions: Optional[DailyInteractions] = Field(
+        default=None,
+        description="Daily likes/comments/replies activity (community engagement layer)",
+    )
     notes: Optional[str] = Field(
         default=None,
         max_length=2000,
     tool_results: List[ToolResult] = Field(default_factory=list, description="Results from tool_calls this step")
     agent_notes: Optional[str] = Field(default=None, description="Echo of agent's notes from previous step")
     api_budget_remaining: int = Field(default=100, ge=0)
+    interaction_metrics: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Daily interaction summary: reach modifier, shadowban_risk, and a one-line reason",
+    )
     grader_score: Optional[float] = Field(default=None)
     error: Optional[str] = Field(default=None)

server/app.py CHANGED Viewed

@@ -29,10 +29,22 @@ if "ENABLE_WEB_INTERFACE" not in os.environ:
     os.environ["ENABLE_WEB_INTERFACE"] = "true"
 try:
-    from ..models import ScheduledAction, ViraltestAction, ViraltestObservation
     from .viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
 except ImportError:
-    from models import ScheduledAction, ViraltestAction, ViraltestObservation
     from server.viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
 try:
@@ -174,10 +186,17 @@ _CONTENT_TYPES = ["reel", "carousel", "story", "text_post"]
 _TOPICS = ["AI tools", "fitness routine", "growth hacks", "travel guide", "food recipe", "wellness tips"]
-def _make_daily_plan(actions: list, notes: Optional[str] = None) -> ViraltestAction:
     return ViraltestAction(
         scheduled_actions=[ScheduledAction(**a) for a in actions],
         notes=notes,
     )
@@ -236,12 +255,96 @@ def _plan_minimal(obs: dict, day: int) -> ViraltestAction:
     ])
-SCENARIOS = {
-    "always_rest": ("Always Rest", "Never posts. Tests follower decay.", _plan_always_rest),
-    "spam": ("Spam Post", "Same reel every hour. Burns out fast.", _plan_spam),
-    "smart": ("Smart Agent", "Optimal: peak hours, trending, varied types+intents.", _plan_smart),
-    "minimal": ("Minimal Poster", "1 carousel per day at noon.", _plan_minimal),
-    "random": ("Random Actor", "Random actions. Baseline test.", _plan_random),
 }
@@ -265,9 +368,14 @@ async def dashboard_simulate(body: Dict[str, Any] = Body(...)):
     if scenario_id not in SCENARIOS:
         return {"error": f"Unknown scenario: {scenario_id}"}
-    label, desc, plan_fn = SCENARIOS[scenario_id]
     env = ViraltestEnvironment()
-    obs = env.reset(task=task, seed=42)
     obs_dict = obs.model_dump()
     steps: List[Dict[str, Any]] = []
@@ -347,11 +455,16 @@ async def training_evidence():
     global _SIM_RNG
     results = []
-    for scenario_id, (label, desc, plan_fn) in SCENARIOS.items():
         for task in _TRAINING_TASKS:
             _SIM_RNG = stdlib_random.Random(99)
             env = ViraltestEnvironment()
-            obs = env.reset(task=task, seed=42)
             obs_dict = obs.model_dump()
             rewards: List[float] = []

     os.environ["ENABLE_WEB_INTERFACE"] = "true"
 try:
+    from ..models import (
+        CollabProposal,
+        DailyInteractions,
+        ScheduledAction,
+        ViraltestAction,
+        ViraltestObservation,
+    )
     from .viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
 except ImportError:
+    from models import (
+        CollabProposal,
+        DailyInteractions,
+        ScheduledAction,
+        ViraltestAction,
+        ViraltestObservation,
+    )
     from server.viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
 try:
 _TOPICS = ["AI tools", "fitness routine", "growth hacks", "travel guide", "food recipe", "wellness tips"]
+def _make_daily_plan(
+    actions: list,
+    notes: Optional[str] = None,
+    collab: Optional[CollabProposal] = None,
+    interactions: Optional[DailyInteractions] = None,
+) -> ViraltestAction:
     return ViraltestAction(
         scheduled_actions=[ScheduledAction(**a) for a in actions],
         notes=notes,
+        collab=collab,
+        interactions=interactions,
     )
     ])
+def _plan_collab_same_low(obs: dict, day: int) -> ViraltestAction:
+    """Same-niche, low-overlap collab on day 5+15 — best-case reward path."""
+    trending = (obs.get("trending_topics") or ["AI tools"])[0]
+    tags = list((obs.get("trending_tags") or [])[:2]) + ["ai"]
+    actions = [
+        {"hour": 12, "action_type": "post", "content_type": "reel",
+         "topic": trending, "tags": tags, "intent": "watch_bait"},
+    ]
+    collab = None
+    if day in (5, 15):
+        collab = CollabProposal(partner_id="niche_expert", content_type="reel", hour=12)
+    return _make_daily_plan(actions, notes="Same-niche low-overlap collab demo.", collab=collab)
+def _plan_collab_diff_high(obs: dict, day: int) -> ViraltestAction:
+    """Diff-niche, high-overlap collab — penalty path (mismatch)."""
+    trending = (obs.get("trending_topics") or ["AI tools"])[0]
+    tags = list((obs.get("trending_tags") or [])[:2]) + ["ai"]
+    actions = [
+        {"hour": 12, "action_type": "post", "content_type": "reel",
+         "topic": trending, "tags": tags, "intent": "watch_bait"},
+    ]
+    collab = None
+    if day in (5, 15):
+        collab = CollabProposal(partner_id="lifestyle_blogger", content_type="reel", hour=12)
+    return _make_daily_plan(actions, notes="Diff-niche high-overlap collab demo.", collab=collab)
+def _plan_interact_balanced(obs: dict, day: int) -> ViraltestAction:
+    """Healthy daily interaction — likes/comments on-niche, replies to audience."""
+    trending = (obs.get("trending_topics") or ["AI tools"])[0]
+    interactions = DailyInteractions(
+        likes_on_others=12, comments_on_others=5, replies_to_audience=3,
+        target_partner_ids=["niche_expert"], avg_reply_quality=0.8,
+    )
+    return _make_daily_plan(
+        [{"hour": 12, "action_type": "post", "content_type": "reel",
+          "topic": trending, "tags": ["ai"], "intent": "watch_bait"}],
+        notes="Healthy interaction demo.",
+        interactions=interactions,
+    )
+def _plan_interact_spam(obs: dict, day: int) -> ViraltestAction:
+    """Spam interaction — triggers shadowban_risk + reach penalty."""
+    trending = (obs.get("trending_topics") or ["AI tools"])[0]
+    interactions = DailyInteractions(
+        likes_on_others=80, comments_on_others=40, replies_to_audience=0,
+        target_partner_ids=["niche_expert"], avg_reply_quality=0.4,
+    )
+    return _make_daily_plan(
+        [{"hour": 12, "action_type": "post", "content_type": "reel",
+          "topic": trending, "tags": ["ai"], "intent": "watch_bait"}],
+        notes="Interaction spam demo.",
+        interactions=interactions,
+    )
+# Scenario tuple: (label, description, plan_fn, optional user_niche).
+# user_niche is honored by dashboard_simulate / training_evidence; defaults to "generic" when None.
+SCENARIOS: Dict[str, tuple] = {
+    "always_rest": ("Always Rest", "Never posts. Tests follower decay.", _plan_always_rest, None),
+    "spam": ("Spam Post", "Same reel every hour. Burns out fast.", _plan_spam, None),
+    "smart": ("Smart Agent", "Optimal: peak hours, trending, varied types+intents.", _plan_smart, None),
+    "minimal": ("Minimal Poster", "1 carousel per day at noon.", _plan_minimal, None),
+    "random": ("Random Actor", "Random actions. Baseline test.", _plan_random, None),
+    "collab_same_low": (
+        "Collab Same-Niche Low Overlap",
+        "Same-niche partner with <20% overlap. Best-case collab reward path.",
+        _plan_collab_same_low,
+        "tech",
+    ),
+    "collab_diff_high": (
+        "Collab Diff-Niche High Overlap",
+        "Diff-niche partner with >40% overlap. Penalty path (audience mismatch).",
+        _plan_collab_diff_high,
+        "tech",
+    ),
+    "interact_balanced": (
+        "Interact Balanced",
+        "Healthy on-niche likes/comments and audience replies.",
+        _plan_interact_balanced,
+        "tech",
+    ),
+    "interact_spam": (
+        "Interact Spam",
+        "80 likes + 40 comments — spam path triggers shadowban_risk.",
+        _plan_interact_spam,
+        "tech",
+    ),
 }
     if scenario_id not in SCENARIOS:
         return {"error": f"Unknown scenario: {scenario_id}"}
+    entry = SCENARIOS[scenario_id]
+    label, desc, plan_fn = entry[0], entry[1], entry[2]
+    user_niche = entry[3] if len(entry) > 3 else None
     env = ViraltestEnvironment()
+    reset_kwargs: Dict[str, Any] = {"task": task, "seed": 42}
+    if user_niche:
+        reset_kwargs["user_niche"] = user_niche
+    obs = env.reset(**reset_kwargs)
     obs_dict = obs.model_dump()
     steps: List[Dict[str, Any]] = []
     global _SIM_RNG
     results = []
+    for scenario_id, entry in SCENARIOS.items():
+        label, desc, plan_fn = entry[0], entry[1], entry[2]
+        user_niche = entry[3] if len(entry) > 3 else None
         for task in _TRAINING_TASKS:
             _SIM_RNG = stdlib_random.Random(99)
             env = ViraltestEnvironment()
+            reset_kwargs: Dict[str, Any] = {"task": task, "seed": 42}
+            if user_niche:
+                reset_kwargs["user_niche"] = user_niche
+            obs = env.reset(**reset_kwargs)
             obs_dict = obs.model_dump()
             rewards: List[float] = []

server/data/audience_overlap_matrix.json CHANGED Viewed

@@ -1,7 +1,8 @@
 {
   "_meta": {
-    "description": "8x8 symmetric audience overlap matrix between competitor archetypes and the user creator. Values 0.0-1.0 represent fraction of shared audience. Used by propose_collab to compute collab reward multipliers and by query_creator_pool to expose overlap to the agent. Same-niche pairs ~0.4-0.65, cross-niche ~0.05-0.20.",
-    "source": "Competitor pairs estimated from Rival IQ 2025 cross-industry overlap patterns + niche proximity heuristic. user_creator row tuned to a generic micro-creator (no locked niche): broad mass-market partners (lifestyle_blogger, viral_chaser) score highest; specialist partners (b2b_thought_leader, niche_expert) score lowest."
   },
   "archetype_ids": ["niche_expert", "viral_chaser", "lifestyle_blogger", "b2b_thought_leader", "food_creator", "fitness_coach", "travel_creator", "user_creator"],
   "matrix": [
@@ -13,5 +14,25 @@
     [0.10, 0.25, 0.35, 0.10, 0.45, 1.00, 0.30, 0.28],
     [0.15, 0.30, 0.40, 0.12, 0.35, 0.30, 1.00, 0.30],
     [0.10, 0.35, 0.40, 0.08, 0.25, 0.28, 0.30, 1.00]
-  ]
 }

 {
   "_meta": {
+    "description": "8x8 symmetric audience overlap matrix between competitor archetypes and the user creator. Values 0.0-1.0 represent fraction of shared audience (Jaccard intersection fraction). Used by propose_collab to compute collab reward multipliers and by query_creator_pool to expose overlap to the agent. Same-niche pairs ~0.4-0.65, cross-niche ~0.05-0.20.",
+    "source": "Competitor pairs estimated from Rival IQ 2025 cross-industry overlap patterns + niche proximity heuristic. user_creator row tuned to a generic micro-creator (no locked niche): broad mass-market partners (lifestyle_blogger, viral_chaser) score highest; specialist partners (b2b_thought_leader, niche_expert) score lowest.",
+    "mock_followers_note": "Mocked follower counts span tiers from micro (10k user) to mid (250k viral_chaser). Used to derive intersection size via Jaccard inversion: |A intersect B| = overlap * (|A| + |B|) / (1 + overlap)."
   },
   "archetype_ids": ["niche_expert", "viral_chaser", "lifestyle_blogger", "b2b_thought_leader", "food_creator", "fitness_coach", "travel_creator", "user_creator"],
   "matrix": [
     [0.10, 0.25, 0.35, 0.10, 0.45, 1.00, 0.30, 0.28],
     [0.15, 0.30, 0.40, 0.12, 0.35, 0.30, 1.00, 0.30],
     [0.10, 0.35, 0.40, 0.08, 0.25, 0.28, 0.30, 1.00]
+  ],
+  "niche_by_archetype": {
+    "niche_expert": "tech",
+    "viral_chaser": "lifestyle",
+    "lifestyle_blogger": "lifestyle",
+    "b2b_thought_leader": "business",
+    "food_creator": "food",
+    "fitness_coach": "fitness",
+    "travel_creator": "travel",
+    "user_creator": "generic"
+  },
+  "mock_followers_by_archetype": {
+    "niche_expert": 12000,
+    "viral_chaser": 250000,
+    "lifestyle_blogger": 11000,
+    "b2b_thought_leader": 9000,
+    "food_creator": 12000,
+    "fitness_coach": 8000,
+    "travel_creator": 11000,
+    "user_creator": 10000
+  }
 }

server/viraltest_environment.py CHANGED Viewed

@@ -26,6 +26,7 @@ from openenv.core.env_server.types import State
 try:
     from ..models import (
         CollabProposal,
         EngagementSignals,
         HeadlineMetrics,
         JudgeReport,
@@ -38,6 +39,7 @@ try:
 except ImportError:
     from models import (
         CollabProposal,
         EngagementSignals,
         HeadlineMetrics,
         JudgeReport,
@@ -88,6 +90,13 @@ _HEATMAP_GRID: Dict[int, List[float]] = {
     int(k): v for k, v in _HEATMAP_DATA.get("grid", {}).items()
 }
 # ---------------------------------------------------------------------------
 # Constants (research-backed, Tier 1-3 sources)
 # ---------------------------------------------------------------------------
@@ -166,12 +175,56 @@ TREND_DEFAULT_HALFLIFE_HOURS = 60
 TREND_MATCH_STOPWORDS = {"tips", "guide", "review", "routine", "ideas", "hacks", "tutorial", "the", "a", "an", "and", "of", "for", "to"}
 # Collab reward shaping (Later 2023 reach study, HypeAuditor 2024 niche affinity, Rival IQ 2025 overlap patterns,
 # Cen et al. 2024 disengagement model for diminishing returns instead of a hard cap).
-COLLAB_REACH_K = 0.60      # cross-audience exposure: capped reach uplift when overlap is 0
-COLLAB_AFFINITY_K = 0.30   # same-audience affinity: per-impression engagement uplift when overlap is 1
-COLLAB_GROWTH_K = 1.50     # cross-pollination follower spillover, scales (1 - overlap)
 COLLAB_PARTNER_REPEAT_PENALTY = 0.7  # discount on multipliers when partner reused this brand
 COLLAB_FATIGUE_K = 0.3     # per-collab diminishing-returns factor: 1/(1+K*prior_collabs_this_episode)
 API_BUDGET_INITIAL = 10**9  # effectively unlimited; rate-limit removed
 # Heuristic baselines for headline metric `vs_baseline_pct`.
@@ -251,17 +304,21 @@ TOOL_CATALOG = {
         "parameters": {"scheduled_actions": {"type": "array"}},
     },
     "query_creator_pool": {
-        "description": "List available competitor archetypes for potential collaboration, with audience overlap %.",
         "parameters": {},
     },
     "propose_collab": {
-        "description": "Propose a collab post with a competitor at a specific hour. The post you schedule at that hour will be co-authored with the partner.",
         "parameters": {
             "partner_id": {"type": "string"},
             "content_type": {"type": "string", "enum": ["reel", "story", "carousel", "text_post"]},
             "hour": {"type": "integer", "minimum": 0, "maximum": 23},
         },
     },
 }
@@ -305,6 +362,15 @@ class ViraltestEnvironment(Environment):
         self._collabs_this_month = 0
         self._collab_history: List[str] = []
         self._active_collab: Optional[CollabProposal] = None
         self._low_energy_days = 0
         self._total_posts_this_week = 0
         self._week_start_day = 0
@@ -486,7 +552,7 @@ class ViraltestEnvironment(Environment):
         return daily_fatigue * weekly_mult
-    # ----- collab multipliers (overlap-driven) -----
     def _user_partner_overlap(self, partner_id: str) -> Optional[float]:
         ids = _OVERLAP_DATA.get("archetype_ids", [])
@@ -496,21 +562,297 @@ class ViraltestEnvironment(Environment):
         p = ids.index(partner_id)
         return _OVERLAP_DATA["matrix"][u][p]
-    def _collab_multipliers(self, partner_id: str) -> Tuple[float, float]:
-        """Returns (engagement_multiplier, follower_growth_multiplier)."""
-        o = self._user_partner_overlap(partner_id)
-        if o is None:
-            return 1.0, 1.0
-        reach = 1.0 + (1.0 - o) * COLLAB_REACH_K
-        affinity = 1.0 + o * COLLAB_AFFINITY_K
-        growth = 1.0 + (1.0 - o) * COLLAB_GROWTH_K
-        eng_boost = reach * affinity
         if partner_id in self._collab_history[:-1]:
-            eng_boost *= COLLAB_PARTNER_REPEAT_PENALTY
-            growth *= COLLAB_PARTNER_REPEAT_PENALTY
         prior = max(0, self._collabs_this_month - 1)
         fatigue = 1.0 / (1.0 + COLLAB_FATIGUE_K * prior)
-        return eng_boost * fatigue, growth * fatigue
     # ----- engagement signals (Mosseri-aligned) -----
@@ -597,18 +939,68 @@ class ViraltestEnvironment(Environment):
         elif tool.name == "query_creator_pool":
             pool = []
             for comp in self._competitors:
-                overlap = self._user_partner_overlap(comp.id)
                 pool.append({
-                    "id": comp.id, "name": comp.name, "niche": comp.niche,
-                    "audience_overlap": round(overlap, 2) if overlap is not None else None,
                 })
-            return ToolResult(name=tool.name, data=pool, budget_remaining=self._api_budget)
         elif tool.name == "propose_collab":
             partner_id = tool.arguments.get("partner_id", "")
             if partner_id not in [c.id for c in self._competitors]:
                 return ToolResult(name=tool.name, success=False, error=f"unknown partner: {partner_id}", budget_remaining=self._api_budget)
-            return ToolResult(name=tool.name, data={"status": "proposal_accepted", "partner_id": partner_id}, budget_remaining=self._api_budget)
         return ToolResult(name=tool.name, success=False, error=f"unknown tool: {tool.name}", budget_remaining=self._api_budget)
@@ -665,6 +1057,14 @@ class ViraltestEnvironment(Environment):
         if self._hours_since_sleep > 22:
             violations.append(f"sleep_debt: {self._hours_since_sleep}h awake (Van Dongen 2003)")
             pc -= 0.10
         burnout_pressure = (1.0 - energy_min) * 0.4 + self._sleep_debt * 0.3 + (self._low_energy_days / 5.0) * 0.3
         sustainability_risk = max(0.0, min(1.0, burnout_pressure))
@@ -729,6 +1129,11 @@ class ViraltestEnvironment(Environment):
         self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
         self._init_state()
         self._shift_label = kwargs.get("shift_label")
         self._chain_id = kwargs.get("episode_chain_id")
@@ -766,11 +1171,16 @@ class ViraltestEnvironment(Environment):
         # Process collab proposal (no hard cap; diminishing returns enforced via _collab_multipliers)
         self._active_collab = None
         if action.collab:
             self._collabs_this_month += 1
             self._collab_history.append(action.collab.partner_id)
             self._active_collab = action.collab
         # Validate scheduled actions
         schedule: Dict[int, ScheduledAction] = {}
         errors: List[str] = []
@@ -837,9 +1247,14 @@ class ViraltestEnvironment(Environment):
         if 1 <= self._posts_per_day.get(prev_day, 0) <= 2:
             self._days_with_good_posts.add(prev_day)
-        avg_reward = daily_reward / 24.0
         error_str = "; ".join(errors) if errors else None
         done = self._state.step_count >= TASK_HORIZON or self._energy <= 0.0
         coach = self._compute_coach_feedback(daily_engagement)
         judge = self._compute_judge_report(action, daily_engagement, daily_posts, energy_min, errors)
@@ -864,6 +1279,7 @@ class ViraltestEnvironment(Environment):
                 daily_posts_made=daily_posts, daily_energy_min=energy_min,
                 tool_results=tool_results, engagement_signals=daily_signals,
                 coach_feedback=coach, judge_report=judge, headline_metrics=headline,
             )
             return self._final_observation
@@ -873,6 +1289,7 @@ class ViraltestEnvironment(Environment):
             daily_posts_made=daily_posts, daily_energy_min=energy_min,
             tool_results=tool_results, engagement_signals=daily_signals,
             coach_feedback=coach, judge_report=judge,
         )
     def _process_hour_action(self, sa: ScheduledAction) -> Tuple[float, float, Optional[EngagementSignals]]:
@@ -918,6 +1335,10 @@ class ViraltestEnvironment(Environment):
                     * niche_mult * saturation_factor
                 )
                 if self._active_collab is not None and self._active_collab.hour == sa.hour:
                     eng_m, growth_m = self._collab_multipliers(self._active_collab.partner_id)
                     engagement *= eng_m
@@ -1101,9 +1522,11 @@ class ViraltestEnvironment(Environment):
         coach_feedback: Optional[Dict[str, Any]] = None,
         judge_report: Optional[JudgeReport] = None,
         headline_metrics: Optional[HeadlineMetrics] = None,
     ) -> ViraltestObservation:
         recent_eng = self._engagement_history[-10:] if self._engagement_history else []
         eng_rate = sum(recent_eng) / len(recent_eng) if recent_eng else 0.0
         meta: Dict[str, Any] = {"step": self._state.step_count, "task": self._task}
         if grader_score is not None:
@@ -1153,6 +1576,7 @@ class ViraltestEnvironment(Environment):
             done=done,
             reward=round(reward, 4),
             metadata=meta,
         )
     # ----- graders (monthly) -----

 try:
     from ..models import (
         CollabProposal,
+        DailyInteractions,
         EngagementSignals,
         HeadlineMetrics,
         JudgeReport,
 except ImportError:
     from models import (
         CollabProposal,
+        DailyInteractions,
         EngagementSignals,
         HeadlineMetrics,
         JudgeReport,
     int(k): v for k, v in _HEATMAP_DATA.get("grid", {}).items()
 }
+# Mocked niche + follower-count lookups for the collab system. Live in the overlap matrix file
+# so the same source-of-truth carries (a) Jaccard overlap, (b) niche label, (c) follower size.
+_NICHE_BY_ARCHETYPE: Dict[str, str] = dict(_OVERLAP_DATA.get("niche_by_archetype", {}))
+_FOLLOWERS_BY_ARCHETYPE: Dict[str, int] = {
+    k: int(v) for k, v in _OVERLAP_DATA.get("mock_followers_by_archetype", {}).items()
+}
 # ---------------------------------------------------------------------------
 # Constants (research-backed, Tier 1-3 sources)
 # ---------------------------------------------------------------------------
 TREND_MATCH_STOPWORDS = {"tips", "guide", "review", "routine", "ideas", "hacks", "tutorial", "the", "a", "an", "and", "of", "for", "to"}
 # Collab reward shaping (Later 2023 reach study, HypeAuditor 2024 niche affinity, Rival IQ 2025 overlap patterns,
 # Cen et al. 2024 disengagement model for diminishing returns instead of a hard cap).
 COLLAB_PARTNER_REPEAT_PENALTY = 0.7  # discount on multipliers when partner reused this brand
 COLLAB_FATIGUE_K = 0.3     # per-collab diminishing-returns factor: 1/(1+K*prior_collabs_this_episode)
+# Niche-aware tiered shaping (overlap = Jaccard intersection fraction).
+# Hard rule: any diff-niche multiplier must be < the minimum same-niche-low multiplier
+# so the env never recommends a diff-niche collab over an equal-overlap same-niche one.
+COLLAB_LOW_OVERLAP_THRESHOLD = 0.20      # < this counts as "low intersection"
+COLLAB_HIGH_OVERLAP_THRESHOLD = 0.40     # >= this counts as "high intersection"
+COLLAB_GUARDRAIL_OVERLAP_MIN = 0.10      # below this -> recommended=False (intersection-too-low guardrail)
+COLLAB_GUARDRAIL_FOLLOWER_GAP_MAX = 0.25 # |partner - user| / max > this -> follower-size mismatch
+COLLAB_FORCED_PENALTY_ENG = 0.7          # eng_mult applied if agent ignores guardrail
+COLLAB_FORCED_PENALTY_GROWTH = 0.6       # growth_mult applied if agent ignores guardrail
+# Same niche, LOW overlap -> HIGH reward (best case). Smoothly interpolated by overlap (low->high uplift as overlap->0).
+COLLAB_SAME_LOW_ENG = (1.50, 1.80)
+COLLAB_SAME_LOW_GROWTH = (1.60, 2.00)
+# Same niche, HIGH overlap -> LOW reward (no point, audience already shared).
+COLLAB_SAME_HIGH_ENG = 0.85
+COLLAB_SAME_HIGH_GROWTH = 0.90
+# Diff niche, LOW overlap -> MED reward (cross-pollination, capped < SAME_LOW min).
+COLLAB_DIFF_LOW_ENG = (1.20, 1.40)
+COLLAB_DIFF_LOW_GROWTH = (1.30, 1.55)
+# Diff niche, HIGH overlap -> LOW reward (mismatch).
+COLLAB_DIFF_HIGH_ENG = 0.75
+COLLAB_DIFF_HIGH_GROWTH = 0.80
+# Interaction (likes/comments/replies) tunables
+INTERACT_ENERGY_LIKE = 0.005
+INTERACT_ENERGY_COMMENT = 0.012
+INTERACT_ENERGY_REPLY = 0.018
+INTERACT_HEALTHY_LIKES = (5, 20)
+INTERACT_HEALTHY_COMMENTS = (3, 10)
+INTERACT_LIKE_REACH_BUFF = 0.04
+INTERACT_COMMENT_REACH_BUFF = 0.08
+INTERACT_REPLY_REWARD_PER = 0.01
+INTERACT_REPLY_REWARD_CAP = 0.15
+INTERACT_DAILY_REWARD_CAP = 0.15
+INTERACT_SPAM_LIKES = 30
+INTERACT_SPAM_COMMENTS = 20
+INTERACT_SPAM_REACH_PENALTY = 0.85
+INTERACT_SPAM_SHADOWBAN_BUMP = 0.20
+INTERACT_IGNORE_THRESHOLD_K = 0.05
+INTERACT_IGNORE_LOYALTY_DECAY = 0.97
+INTERACT_OFFNICHE_THRESHOLD = 0.60
+INTERACT_OFFNICHE_REACH_PENALTY = 0.90
+INTERACT_LOWQ_THRESHOLD = 0.30
+INTERACT_LOWQ_WEIGHT = 0.4
+INTERACT_VERY_LOWQ_THRESHOLD = 0.10
+INTERACT_VERY_LOWQ_PENALTY = -0.03
 API_BUDGET_INITIAL = 10**9  # effectively unlimited; rate-limit removed
 # Heuristic baselines for headline metric `vs_baseline_pct`.
         "parameters": {"scheduled_actions": {"type": "array"}},
     },
     "query_creator_pool": {
+        "description": "List available competitor archetypes for potential collaboration with audience overlap %, niche match, mocked follower counts, intersection size, and a recommendation flag (recommended=False when guardrails block: zero followers, intersection<10%, or follower-size gap>25%).",
         "parameters": {},
     },
     "propose_collab": {
+        "description": "Propose a collab post with a competitor at a specific hour. The post you schedule at that hour will be co-authored. Reward shaping: same-niche + low overlap = HIGH; same-niche + high overlap = LOW; diff-niche always capped below same-niche-low. Guardrail violations apply a 0.7x engagement / 0.6x growth penalty AND surface in the JudgeReport.",
         "parameters": {
             "partner_id": {"type": "string"},
             "content_type": {"type": "string", "enum": ["reel", "story", "carousel", "text_post"]},
             "hour": {"type": "integer", "minimum": 0, "maximum": 23},
         },
     },
+    "query_interaction_norms": {
+        "description": "Discover healthy daily ranges for likes/comments/replies and the current shadowban_risk. Use before submitting ViraltestAction.interactions.",
+        "parameters": {},
+    },
 }
         self._collabs_this_month = 0
         self._collab_history: List[str] = []
         self._active_collab: Optional[CollabProposal] = None
+        self._collab_violations: List[str] = []  # collab guardrail breaches this step
+        self._user_niche: str = _NICHE_BY_ARCHETYPE.get("user_creator", "generic")
+        # Interaction state
+        self._pending_reach_mult: float = 1.0   # applied to next day's posts (one-shot)
+        self._shadowban_risk: float = 0.0
+        self._engagement_rate_loyalty_mult: float = 1.0  # compounding loyalty drop from ignoring audience
+        self._interaction_violations: List[str] = []
+        self._last_interaction_summary: Optional[Dict[str, Any]] = None
         self._low_energy_days = 0
         self._total_posts_this_week = 0
         self._week_start_day = 0
         return daily_fatigue * weekly_mult
+    # ----- collab evaluation (niche-aware, overlap-tiered) -----
     def _user_partner_overlap(self, partner_id: str) -> Optional[float]:
         ids = _OVERLAP_DATA.get("archetype_ids", [])
         p = ids.index(partner_id)
         return _OVERLAP_DATA["matrix"][u][p]
+    def _partner_niche(self, partner_id: str) -> str:
+        return _NICHE_BY_ARCHETYPE.get(partner_id, "generic")
+    def _partner_followers(self, partner_id: str) -> int:
+        return _FOLLOWERS_BY_ARCHETYPE.get(partner_id, 0)
+    @staticmethod
+    def _interp(span: Tuple[float, float], t: float) -> float:
+        """Linear interp from span[0] (t=0) to span[1] (t=1)."""
+        t = max(0.0, min(1.0, t))
+        return span[0] + (span[1] - span[0]) * t
+    def _collab_tier_multipliers(self, same_niche: bool, overlap: float) -> Tuple[float, float]:
+        """Pure 2x2 tier shaping (no fatigue/repeat/guardrail effects yet)."""
+        # Smooth interp factor: how "low" is this overlap on the [0, LOW_THRESHOLD] scale.
+        low_t = 1.0 - min(1.0, overlap / COLLAB_LOW_OVERLAP_THRESHOLD)  # 1 at overlap=0, 0 at threshold
+        if same_niche:
+            if overlap < COLLAB_LOW_OVERLAP_THRESHOLD:
+                eng = self._interp(COLLAB_SAME_LOW_ENG, low_t)
+                growth = self._interp(COLLAB_SAME_LOW_GROWTH, low_t)
+            elif overlap >= COLLAB_HIGH_OVERLAP_THRESHOLD:
+                eng = COLLAB_SAME_HIGH_ENG
+                growth = COLLAB_SAME_HIGH_GROWTH
+            else:
+                # Mid-band linear interpolation between LOW endpoint (overlap=LOW_TH) and HIGH endpoint (overlap=HIGH_TH).
+                mid_t = (overlap - COLLAB_LOW_OVERLAP_THRESHOLD) / (COLLAB_HIGH_OVERLAP_THRESHOLD - COLLAB_LOW_OVERLAP_THRESHOLD)
+                eng = self._interp((COLLAB_SAME_LOW_ENG[0], COLLAB_SAME_HIGH_ENG), mid_t)
+                growth = self._interp((COLLAB_SAME_LOW_GROWTH[0], COLLAB_SAME_HIGH_GROWTH), mid_t)
+        else:
+            if overlap < COLLAB_LOW_OVERLAP_THRESHOLD:
+                eng = self._interp(COLLAB_DIFF_LOW_ENG, low_t)
+                growth = self._interp(COLLAB_DIFF_LOW_GROWTH, low_t)
+            elif overlap >= COLLAB_HIGH_OVERLAP_THRESHOLD:
+                eng = COLLAB_DIFF_HIGH_ENG
+                growth = COLLAB_DIFF_HIGH_GROWTH
+            else:
+                mid_t = (overlap - COLLAB_LOW_OVERLAP_THRESHOLD) / (COLLAB_HIGH_OVERLAP_THRESHOLD - COLLAB_LOW_OVERLAP_THRESHOLD)
+                eng = self._interp((COLLAB_DIFF_LOW_ENG[0], COLLAB_DIFF_HIGH_ENG), mid_t)
+                growth = self._interp((COLLAB_DIFF_LOW_GROWTH[0], COLLAB_DIFF_HIGH_GROWTH), mid_t)
+            # Hard rule: diff-niche must always be < same-niche-low minimum (cap just below).
+            eng = min(eng, COLLAB_SAME_LOW_ENG[0] - 0.01)
+            growth = min(growth, COLLAB_SAME_LOW_GROWTH[0] - 0.01)
+        return eng, growth
+    def _collab_evaluation(self, partner_id: str) -> Dict[str, Any]:
+        """Single source of truth: tier reward + guardrails + final multipliers (after fatigue/repeat).
+        Returns a dict consumable by both query_creator_pool (for recommendation surface)
+        and _process_hour_action (for applied multipliers).
+        """
+        overlap = self._user_partner_overlap(partner_id)
+        if overlap is None:
+            return {
+                "partner_id": partner_id,
+                "overlap": None,
+                "same_niche": False,
+                "partner_followers": 0,
+                "user_followers": self._followers,
+                "follower_gap_pct": 1.0,
+                "intersection_size": 0,
+                "recommended": False,
+                "reason": "unknown_partner",
+                "tier_eng_mult": 1.0,
+                "tier_growth_mult": 1.0,
+                "eng_mult": 1.0,
+                "growth_mult": 1.0,
+            }
+        partner_niche = self._partner_niche(partner_id)
+        same_niche = partner_niche == self._user_niche
+        partner_followers = self._partner_followers(partner_id)
+        user_followers = max(0, int(self._followers))
+        denom = max(1, max(partner_followers, user_followers))
+        gap_pct = abs(partner_followers - user_followers) / denom if denom else 1.0
+        # Mock intersection size via Jaccard inversion: union ≈ (|A|+|B|)/(1+overlap), intersection = overlap*union.
+        union_approx = (partner_followers + user_followers) / (1.0 + overlap) if overlap >= 0 else 0.0
+        intersection_size = int(round(overlap * union_approx))
+        # Guardrails (in priority order)
+        recommended = True
+        reason: Optional[str] = None
+        if partner_followers <= 0:
+            recommended = False
+            reason = "partner_zero_followers"
+        elif overlap < COLLAB_GUARDRAIL_OVERLAP_MIN:
+            recommended = False
+            reason = "intersection_below_10pct"
+        elif gap_pct > COLLAB_GUARDRAIL_FOLLOWER_GAP_MAX:
+            recommended = False
+            reason = "follower_size_mismatch"
+        tier_eng, tier_growth = self._collab_tier_multipliers(same_niche, overlap)
+        eng_mult = tier_eng
+        growth_mult = tier_growth
+        # Repeat-partner discount (existing behavior preserved).
         if partner_id in self._collab_history[:-1]:
+            eng_mult *= COLLAB_PARTNER_REPEAT_PENALTY
+            growth_mult *= COLLAB_PARTNER_REPEAT_PENALTY
+        # Diminishing returns across the episode (Cen 2024).
         prior = max(0, self._collabs_this_month - 1)
         fatigue = 1.0 / (1.0 + COLLAB_FATIGUE_K * prior)
+        eng_mult *= fatigue
+        growth_mult *= fatigue
+        return {
+            "partner_id": partner_id,
+            "overlap": round(overlap, 3),
+            "same_niche": same_niche,
+            "partner_niche": partner_niche,
+            "user_niche": self._user_niche,
+            "partner_followers": partner_followers,
+            "user_followers": user_followers,
+            "follower_gap_pct": round(gap_pct, 3),
+            "intersection_size": intersection_size,
+            "recommended": recommended,
+            "reason": reason,
+            "tier_eng_mult": round(tier_eng, 3),
+            "tier_growth_mult": round(tier_growth, 3),
+            "eng_mult": round(eng_mult, 3),
+            "growth_mult": round(growth_mult, 3),
+        }
+    def _collab_multipliers(self, partner_id: str) -> Tuple[float, float]:
+        """Returns (engagement_multiplier, follower_growth_multiplier).
+        Applies guardrail penalties when the agent forces a non-recommended collab.
+        Side effect: appends to self._collab_violations for the JudgeReport.
+        """
+        ev = self._collab_evaluation(partner_id)
+        eng = ev["eng_mult"]
+        growth = ev["growth_mult"]
+        if not ev["recommended"]:
+            eng *= COLLAB_FORCED_PENALTY_ENG
+            growth *= COLLAB_FORCED_PENALTY_GROWTH
+            self._collab_violations.append(
+                f"collab_guardrail:{ev.get('reason', 'blocked')}@{partner_id}"
+            )
+        return eng, growth
+    # ----- interactions (likes/comments/replies) -----
+    def _process_interactions(
+        self, interactions: Optional[DailyInteractions]
+    ) -> Tuple[float, Dict[str, Any]]:
+        """Apply daily interaction effects: energy cost, reach buffs (next post), and 5 penalty paths.
+        Returns (reward_delta, summary_dict). The reward_delta is added to today's averaged reward;
+        reach effects propagate via self._pending_reach_mult (consumed at next _process_hour_action).
+        Loyalty effects propagate via self._engagement_rate_loyalty_mult (compounding).
+        """
+        # Reset reach mult for the day (default neutral); we accumulate per-day, then it's consumed
+        # by today's posts and any leftover carries over by simply staying at 1.0 next step.
+        self._pending_reach_mult = 1.0
+        self._interaction_violations = []
+        summary: Dict[str, Any] = {
+            "likes_on_others": 0,
+            "comments_on_others": 0,
+            "replies_to_audience": 0,
+            "energy_cost": 0.0,
+            "reach_modifier": 1.0,
+            "shadowban_risk": round(self._shadowban_risk, 3),
+            "loyalty_mult": round(self._engagement_rate_loyalty_mult, 3),
+            "reward_delta": 0.0,
+            "violations": [],
+            "summary": "no_interactions",
+        }
+        if interactions is None:
+            return 0.0, summary
+        likes = int(interactions.likes_on_others)
+        comments = int(interactions.comments_on_others)
+        replies = int(interactions.replies_to_audience)
+        targets = list(interactions.target_partner_ids or [])
+        quality = float(interactions.avg_reply_quality)
+        # 1) Energy cost (paid up front; can push creator below 0.2 -> burnout track).
+        energy_cost = (
+            INTERACT_ENERGY_LIKE * likes
+            + INTERACT_ENERGY_COMMENT * comments
+            + INTERACT_ENERGY_REPLY * replies
+        )
+        self._energy = max(0.0, self._energy - energy_cost)
+        # Determine off-niche share among interaction targets.
+        off_niche_share = 0.0
+        if targets:
+            off = 0
+            for tid in targets:
+                if self._partner_niche(tid) != self._user_niche:
+                    off += 1
+            off_niche_share = off / len(targets)
+        # 2) Reach buffs (next post engagement multiplier) — only when on-niche and within healthy band.
+        on_niche_share = 1.0 - off_niche_share
+        reach_mult = 1.0
+        if on_niche_share > 0:
+            if INTERACT_HEALTHY_LIKES[0] <= likes <= INTERACT_HEALTHY_LIKES[1]:
+                reach_mult *= 1.0 + INTERACT_LIKE_REACH_BUFF * on_niche_share
+            if INTERACT_HEALTHY_COMMENTS[0] <= comments <= INTERACT_HEALTHY_COMMENTS[1]:
+                reach_mult *= 1.0 + INTERACT_COMMENT_REACH_BUFF * on_niche_share
+        reward_delta = 0.0
+        # 3) Reply reward (audience loyalty), scaled by quality.
+        reply_weight = INTERACT_LOWQ_WEIGHT if quality < INTERACT_LOWQ_THRESHOLD else 1.0
+        reply_reward = min(
+            INTERACT_REPLY_REWARD_CAP,
+            INTERACT_REPLY_REWARD_PER * replies * quality * reply_weight,
+        )
+        reward_delta += reply_reward
+        # 4) Penalties — each surfaces a violation string.
+        # 4a) Spam volume.
+        if likes > INTERACT_SPAM_LIKES or comments > INTERACT_SPAM_COMMENTS:
+            reach_mult *= INTERACT_SPAM_REACH_PENALTY
+            self._shadowban_risk = min(1.0, self._shadowban_risk + INTERACT_SPAM_SHADOWBAN_BUMP)
+            self._interaction_violations.append(
+                f"interaction_spam:likes={likes},comments={comments}"
+            )
+        # 4b) Off-niche heavy interaction.
+        if off_niche_share >= INTERACT_OFFNICHE_THRESHOLD and len(targets) >= 3:
+            reach_mult *= INTERACT_OFFNICHE_REACH_PENALTY
+            self._interaction_violations.append(
+                f"interaction_off_niche:share={off_niche_share:.2f}"
+            )
+        # 4c) Ignoring own audience: expected_replies = K * recent_engagement_proxy (use last day's posts)
+        prev_day = max(0, self._day - 1)
+        expected_signal = self._posts_per_day.get(prev_day, 0)  # # posts yesterday as a proxy
+        # Multiply by a small constant so 1 post = 1 expected reply unit floor.
+        expected_replies = expected_signal * 1.0
+        if expected_replies > 0 and replies < INTERACT_IGNORE_THRESHOLD_K * expected_replies * 20:
+            # Compounding loyalty drop on engagement_rate, capped at 0.5x floor.
+            self._engagement_rate_loyalty_mult = max(
+                0.5, self._engagement_rate_loyalty_mult * INTERACT_IGNORE_LOYALTY_DECAY
+            )
+            self._interaction_violations.append(
+                f"interaction_ignoring_own:replies={replies}"
+            )
+        # 4d) Low quality replies — already weighted; if extremely low quality, additional penalty.
+        if replies > 0 and quality < INTERACT_VERY_LOWQ_THRESHOLD:
+            reward_delta += INTERACT_VERY_LOWQ_PENALTY
+            self._interaction_violations.append(
+                f"interaction_low_quality:q={quality:.2f}"
+            )
+        # 4e) Energy: covered upstream; just record if it pushed creator into low-energy zone.
+        if energy_cost > 0 and self._energy < 0.2:
+            self._interaction_violations.append(
+                f"interaction_energy_drain:residual_energy={self._energy:.2f}"
+            )
+        # Cap daily reward_delta to avoid blowing past the per-step [0,1] reward envelope.
+        reward_delta = max(-INTERACT_DAILY_REWARD_CAP, min(INTERACT_DAILY_REWARD_CAP, reward_delta))
+        # Persist computed reach_mult so today's hourly posts pick it up.
+        self._pending_reach_mult = max(0.5, reach_mult)
+        # Decay shadowban_risk slightly on quiet days (0 likes & 0 comments).
+        if likes == 0 and comments == 0:
+            self._shadowban_risk = max(0.0, self._shadowban_risk - 0.05)
+        summary.update({
+            "likes_on_others": likes,
+            "comments_on_others": comments,
+            "replies_to_audience": replies,
+            "energy_cost": round(energy_cost, 4),
+            "reach_modifier": round(self._pending_reach_mult, 3),
+            "shadowban_risk": round(self._shadowban_risk, 3),
+            "loyalty_mult": round(self._engagement_rate_loyalty_mult, 3),
+            "off_niche_share": round(off_niche_share, 2),
+            "reward_delta": round(reward_delta, 4),
+            "violations": list(self._interaction_violations),
+            "summary": (
+                "spam" if likes > INTERACT_SPAM_LIKES or comments > INTERACT_SPAM_COMMENTS
+                else "off_niche" if off_niche_share >= INTERACT_OFFNICHE_THRESHOLD and len(targets) >= 3
+                else "low_quality" if replies > 0 and quality < INTERACT_VERY_LOWQ_THRESHOLD
+                else "ignoring_own" if expected_replies > 0 and replies < INTERACT_IGNORE_THRESHOLD_K * expected_replies * 20
+                else "healthy" if reward_delta > 0 or reach_mult > 1.0
+                else "neutral"
+            ),
+        })
+        return reward_delta, summary
     # ----- engagement signals (Mosseri-aligned) -----
         elif tool.name == "query_creator_pool":
             pool = []
             for comp in self._competitors:
+                ev = self._collab_evaluation(comp.id)
                 pool.append({
+                    "id": comp.id,
+                    "name": comp.name,
+                    "niche": comp.niche,
+                    "audience_overlap": ev.get("overlap"),
+                    "mock_followers": ev.get("partner_followers"),
+                    "intersection_size": ev.get("intersection_size"),
+                    "same_niche": ev.get("same_niche"),
+                    "follower_gap_pct": ev.get("follower_gap_pct"),
+                    "recommended": ev.get("recommended"),
+                    "reason": ev.get("reason"),
+                    "expected_eng_mult": ev.get("eng_mult"),
+                    "expected_growth_mult": ev.get("growth_mult"),
                 })
+            return ToolResult(
+                name=tool.name,
+                data={
+                    "user_niche": self._user_niche,
+                    "user_followers": int(self._followers),
+                    "pool": pool,
+                },
+                budget_remaining=self._api_budget,
+            )
         elif tool.name == "propose_collab":
             partner_id = tool.arguments.get("partner_id", "")
             if partner_id not in [c.id for c in self._competitors]:
                 return ToolResult(name=tool.name, success=False, error=f"unknown partner: {partner_id}", budget_remaining=self._api_budget)
+            ev = self._collab_evaluation(partner_id)
+            return ToolResult(
+                name=tool.name,
+                data={
+                    "status": "proposal_accepted" if ev["recommended"] else "proposal_accepted_with_warning",
+                    "partner_id": partner_id,
+                    "recommended": ev["recommended"],
+                    "reason": ev["reason"],
+                    "same_niche": ev["same_niche"],
+                    "audience_overlap": ev["overlap"],
+                    "intersection_size": ev["intersection_size"],
+                    "expected_eng_mult": ev["eng_mult"],
+                    "expected_growth_mult": ev["growth_mult"],
+                },
+                budget_remaining=self._api_budget,
+            )
+        elif tool.name == "query_interaction_norms":
+            return ToolResult(
+                name=tool.name,
+                data={
+                    "healthy_likes_per_day": list(INTERACT_HEALTHY_LIKES),
+                    "healthy_comments_per_day": list(INTERACT_HEALTHY_COMMENTS),
+                    "spam_threshold_likes": INTERACT_SPAM_LIKES,
+                    "spam_threshold_comments": INTERACT_SPAM_COMMENTS,
+                    "off_niche_share_max": INTERACT_OFFNICHE_THRESHOLD,
+                    "min_reply_quality": INTERACT_LOWQ_THRESHOLD,
+                    "current_shadowban_risk": round(self._shadowban_risk, 3),
+                    "user_niche": self._user_niche,
+                    "expected_replies_per_unit_engagement": INTERACT_IGNORE_THRESHOLD_K,
+                },
+                budget_remaining=self._api_budget,
+            )
         return ToolResult(name=tool.name, success=False, error=f"unknown tool: {tool.name}", budget_remaining=self._api_budget)
         if self._hours_since_sleep > 22:
             violations.append(f"sleep_debt: {self._hours_since_sleep}h awake (Van Dongen 2003)")
             pc -= 0.10
+        # Collab guardrail breaches surfaced by _collab_multipliers (forced past block).
+        for v in self._collab_violations:
+            violations.append(v)
+            pc -= 0.10
+        # Interaction system violations (spam/off-niche/ignoring/low-quality/energy-drain).
+        for v in self._interaction_violations:
+            violations.append(v)
+            pc -= 0.10
         burnout_pressure = (1.0 - energy_min) * 0.4 + self._sleep_debt * 0.3 + (self._low_energy_days / 5.0) * 0.3
         sustainability_risk = max(0.0, min(1.0, burnout_pressure))
         self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
         self._init_state()
+        # Optional user-niche override (for collab same/diff niche scenarios).
+        user_niche_override = kwargs.get("user_niche")
+        if user_niche_override:
+            self._user_niche = str(user_niche_override)
         self._shift_label = kwargs.get("shift_label")
         self._chain_id = kwargs.get("episode_chain_id")
         # Process collab proposal (no hard cap; diminishing returns enforced via _collab_multipliers)
         self._active_collab = None
+        self._collab_violations = []
         if action.collab:
             self._collabs_this_month += 1
             self._collab_history.append(action.collab.partner_id)
             self._active_collab = action.collab
+        # Process interactions BEFORE the day's hourly loop so energy cost and reach buffs/penalties
+        # influence the same day's posts.
+        interaction_reward, interaction_summary = self._process_interactions(action.interactions)
         # Validate scheduled actions
         schedule: Dict[int, ScheduledAction] = {}
         errors: List[str] = []
         if 1 <= self._posts_per_day.get(prev_day, 0) <= 2:
             self._days_with_good_posts.add(prev_day)
+        # Apply ignored-audience compounding loyalty multiplier into the per-day reward.
+        avg_reward = (daily_reward / 24.0) + interaction_reward
+        avg_reward = max(0.0, min(1.0, avg_reward))
         error_str = "; ".join(errors) if errors else None
+        # Finalize this step's interaction summary on the obs.
+        self._last_interaction_summary = interaction_summary
         done = self._state.step_count >= TASK_HORIZON or self._energy <= 0.0
         coach = self._compute_coach_feedback(daily_engagement)
         judge = self._compute_judge_report(action, daily_engagement, daily_posts, energy_min, errors)
                 daily_posts_made=daily_posts, daily_energy_min=energy_min,
                 tool_results=tool_results, engagement_signals=daily_signals,
                 coach_feedback=coach, judge_report=judge, headline_metrics=headline,
+                interaction_metrics=interaction_summary,
             )
             return self._final_observation
             daily_posts_made=daily_posts, daily_energy_min=energy_min,
             tool_results=tool_results, engagement_signals=daily_signals,
             coach_feedback=coach, judge_report=judge,
+            interaction_metrics=interaction_summary,
         )
     def _process_hour_action(self, sa: ScheduledAction) -> Tuple[float, float, Optional[EngagementSignals]]:
                     * niche_mult * saturation_factor
                 )
+                # Interaction-driven reach modifier (set by _process_interactions earlier this step).
+                # Multiplicative on engagement; capped at 0.5 floor inside _process_interactions.
+                engagement *= getattr(self, "_pending_reach_mult", 1.0)
                 if self._active_collab is not None and self._active_collab.hour == sa.hour:
                     eng_m, growth_m = self._collab_multipliers(self._active_collab.partner_id)
                     engagement *= eng_m
         coach_feedback: Optional[Dict[str, Any]] = None,
         judge_report: Optional[JudgeReport] = None,
         headline_metrics: Optional[HeadlineMetrics] = None,
+        interaction_metrics: Optional[Dict[str, Any]] = None,
     ) -> ViraltestObservation:
         recent_eng = self._engagement_history[-10:] if self._engagement_history else []
         eng_rate = sum(recent_eng) / len(recent_eng) if recent_eng else 0.0
+        eng_rate *= getattr(self, "_engagement_rate_loyalty_mult", 1.0)
         meta: Dict[str, Any] = {"step": self._state.step_count, "task": self._task}
         if grader_score is not None:
             done=done,
             reward=round(reward, 4),
             metadata=meta,
+            interaction_metrics=interaction_metrics,
         )
     # ----- graders (monthly) -----

test_scenarios.py CHANGED Viewed

@@ -5,9 +5,14 @@ Each step = one full day. Agent submits a sparse daily plan.
 """
 import random as stdlib_random
-from typing import Callable, Dict, List, Tuple
-from models import ScheduledAction, ViraltestAction
 from server.viraltest_environment import (
     TAG_POOL,
     ViraltestEnvironment,
@@ -22,17 +27,29 @@ _TOPICS = ["AI tools", "fitness routine", "growth hacks", "travel guide", "food
 _rng = stdlib_random.Random(99)
-def _plan(actions: list) -> ViraltestAction:
-    return ViraltestAction(scheduled_actions=[ScheduledAction(**a) for a in actions])
 def run_episode(
     task: str,
     plan_fn: Callable[[Dict, int], ViraltestAction],
     label: str,
 ) -> float:
     env = ViraltestEnvironment()
-    obs = env.reset(task=task, seed=SEED)
     obs_dict = obs.model_dump()
     rewards: List[float] = []
     min_energy = 1.0
@@ -159,16 +176,139 @@ def plan_random(obs: dict, day: int) -> ViraltestAction:
     return _plan(actions)
-SCENARIOS: List[Tuple[str, Callable, str]] = [
-    ("Always Rest", plan_always_rest, "Zero engagement, no growth, energy stays max"),
-    ("Spam Post", plan_spam, "Post every hour, burns out instantly"),
-    ("Smart Agent", plan_smart, "Peak hours, trending, varied types, energy management"),
-    ("No Rest", plan_no_rest, "Post every hour, never rests, burns out"),
-    ("Minimal Poster", plan_minimal, "1 carousel at noon per day"),
-    ("Tag Explorer", plan_tag_explorer, "Rotates through tag pool for max discovery"),
-    ("Queue Optimizer", plan_queue_optimizer, "Creates content first, posts from queue"),
-    ("Double Peak", plan_double_peak, "Posts at 9am and 3pm"),
-    ("Random Actor", plan_random, "Random sparse actions each day"),
 ]
@@ -178,16 +318,18 @@ if __name__ == "__main__":
     print("=" * 70)
     print()
-    for scenario_name, plan_fn, description in SCENARIOS:
         print("=" * 70)
         print(f"{scenario_name}")
         print(f"  {description}")
         print("=" * 70)
         print()
         for task in TASKS:
             _rng = stdlib_random.Random(99)
-            run_episode(task, plan_fn, scenario_name)
         print()
@@ -195,15 +337,18 @@ if __name__ == "__main__":
     print("SUMMARY TABLE")
     print("=" * 70)
     print()
-    print(f"{'Scenario':<30} {'Engage':>8} {'Strategic':>10} {'Competitive':>12}")
-    print("-" * 62)
-    for scenario_name, plan_fn, _ in SCENARIOS:
         scores = []
         for task in TASKS:
             _rng = stdlib_random.Random(99)
             env = ViraltestEnvironment()
-            obs = env.reset(task=task, seed=SEED)
             obs_dict = obs.model_dump()
             for day in range(1, 31):
                 action = plan_fn(obs_dict, day)
@@ -212,8 +357,10 @@ if __name__ == "__main__":
                 if obs.done:
                     break
             scores.append((obs.metadata or {}).get("grader_score", 0.0))
-        print(f"{scenario_name:<30} {scores[0]:>8.4f} {scores[1]:>10.4f} {scores[2]:>12.4f}")
     print()
     print("EXPECTED: Smart/Queue/Tag Explorer should score highest.")
     print("Burnout agents (spam, no_rest) should score near 0 on strategic/competitive.")

 """
 import random as stdlib_random
+from typing import Any, Callable, Dict, List, Optional, Tuple
+from models import (
+    CollabProposal,
+    DailyInteractions,
+    ScheduledAction,
+    ViraltestAction,
+)
 from server.viraltest_environment import (
     TAG_POOL,
     ViraltestEnvironment,
 _rng = stdlib_random.Random(99)
+def _plan(
+    actions: list,
+    collab: Optional[CollabProposal] = None,
+    interactions: Optional[DailyInteractions] = None,
+) -> ViraltestAction:
+    return ViraltestAction(
+        scheduled_actions=[ScheduledAction(**a) for a in actions],
+        collab=collab,
+        interactions=interactions,
+    )
 def run_episode(
     task: str,
     plan_fn: Callable[[Dict, int], ViraltestAction],
     label: str,
+    user_niche: Optional[str] = None,
 ) -> float:
     env = ViraltestEnvironment()
+    reset_kwargs: Dict[str, Any] = {"task": task, "seed": SEED}
+    if user_niche:
+        reset_kwargs["user_niche"] = user_niche
+    obs = env.reset(**reset_kwargs)
     obs_dict = obs.model_dump()
     rewards: List[float] = []
     min_energy = 1.0
     return _plan(actions)
+# ---------------------------------------------------------------------------
+# Collab grid scenarios — user_niche set on env.reset(...) by run_episode.
+# Each picks a partner_id intended to land in a specific (same/diff x low/high) tier
+# and proposes the collab on day 5.
+# ---------------------------------------------------------------------------
+def _collab_plan(day: int, partner_id: str, hour: int = 12) -> ViraltestAction:
+    """Daily plan that posts once and proposes a collab on days 5 and 15.
+    Single-post per day keeps engagement below the theoretical_max cap so collab
+    multipliers visibly bend the final grader score and follower count.
+    """
+    actions = [
+        {"hour": hour, "action_type": "post", "content_type": "reel",
+         "topic": "AI tools", "tags": ["ai"], "intent": "watch_bait"},
+    ]
+    collab = None
+    if day in (5, 15):
+        collab = CollabProposal(partner_id=partner_id, content_type="reel", hour=hour)
+    return _plan(actions, collab=collab)
+def plan_collab_same_low(obs: dict, day: int) -> ViraltestAction:
+    # user_niche=tech, partner=b2b_thought_leader (NICHE differs but matrix overlap=0.08)
+    # Use niche_expert (tech) which has overlap=0.10 with user_creator => same niche, low overlap.
+    return _collab_plan(day, partner_id="niche_expert")
+def plan_collab_same_high(obs: dict, day: int) -> ViraltestAction:
+    # Force same niche + high overlap by setting user_niche=lifestyle and pairing with viral_chaser (overlap=0.55).
+    return _collab_plan(day, partner_id="viral_chaser")
+def plan_collab_diff_low(obs: dict, day: int) -> ViraltestAction:
+    # user_niche=tech, partner=lifestyle_blogger (overlap=0.40 — actually high), pick travel_creator overlap=0.30 instead.
+    return _collab_plan(day, partner_id="travel_creator")
+def plan_collab_diff_high(obs: dict, day: int) -> ViraltestAction:
+    # user_niche=tech, partner=lifestyle_blogger (overlap=0.40, diff niche).
+    return _collab_plan(day, partner_id="lifestyle_blogger")
+def plan_collab_blocked_zero(obs: dict, day: int) -> ViraltestAction:
+    # b2b_thought_leader has overlap=0.08 with user_creator -> intersection_below_10pct guardrail.
+    return _collab_plan(day, partner_id="b2b_thought_leader")
+# ---------------------------------------------------------------------------
+# Interaction scenarios — exercise the 5 penalty paths and the healthy band.
+# ---------------------------------------------------------------------------
+def _post_only_actions() -> list:
+    return [
+        {"hour": 12, "action_type": "post", "content_type": "reel",
+         "topic": "AI tools", "tags": ["ai"], "intent": "watch_bait"},
+    ]
+def plan_interact_balanced(obs: dict, day: int) -> ViraltestAction:
+    interactions = DailyInteractions(
+        likes_on_others=12, comments_on_others=5, replies_to_audience=3,
+        target_partner_ids=["niche_expert"], avg_reply_quality=0.8,
+    )
+    return _plan(_post_only_actions(), interactions=interactions)
+def plan_interact_spam(obs: dict, day: int) -> ViraltestAction:
+    interactions = DailyInteractions(
+        likes_on_others=80, comments_on_others=40, replies_to_audience=0,
+        target_partner_ids=["niche_expert"], avg_reply_quality=0.4,
+    )
+    return _plan(_post_only_actions(), interactions=interactions)
+def plan_interact_ignoring_own(obs: dict, day: int) -> ViraltestAction:
+    interactions = DailyInteractions(
+        likes_on_others=8, comments_on_others=4, replies_to_audience=0,
+        target_partner_ids=["niche_expert"], avg_reply_quality=0.6,
+    )
+    return _plan(_post_only_actions(), interactions=interactions)
+def plan_interact_off_niche(obs: dict, day: int) -> ViraltestAction:
+    interactions = DailyInteractions(
+        likes_on_others=10, comments_on_others=5, replies_to_audience=2,
+        target_partner_ids=["food_creator", "fitness_coach", "travel_creator", "lifestyle_blogger"],
+        avg_reply_quality=0.7,
+    )
+    return _plan(_post_only_actions(), interactions=interactions)
+def plan_interact_low_quality(obs: dict, day: int) -> ViraltestAction:
+    interactions = DailyInteractions(
+        likes_on_others=10, comments_on_others=5, replies_to_audience=8,
+        target_partner_ids=["niche_expert"], avg_reply_quality=0.05,
+    )
+    return _plan(_post_only_actions(), interactions=interactions)
+# Scenario tuple: (label, plan_fn, description, user_niche)
+SCENARIOS: List[Tuple[str, Callable, str, Optional[str]]] = [
+    ("Always Rest", plan_always_rest, "Zero engagement, no growth, energy stays max", None),
+    ("Spam Post", plan_spam, "Post every hour, burns out instantly", None),
+    ("Smart Agent", plan_smart, "Peak hours, trending, varied types, energy management", None),
+    ("No Rest", plan_no_rest, "Post every hour, never rests, burns out", None),
+    ("Minimal Poster", plan_minimal, "1 carousel at noon per day", None),
+    ("Tag Explorer", plan_tag_explorer, "Rotates through tag pool for max discovery", None),
+    ("Queue Optimizer", plan_queue_optimizer, "Creates content first, posts from queue", None),
+    ("Double Peak", plan_double_peak, "Posts at 9am and 3pm", None),
+    ("Random Actor", plan_random, "Random sparse actions each day", None),
+    # Collab grid: 2x2 same/diff niche x low/high overlap + zero-guardrail.
+    ("Collab Same-Niche Low Overlap", plan_collab_same_low,
+     "user_niche=tech + niche_expert (same niche, overlap 0.10) — should yield HIGH boost.", "tech"),
+    ("Collab Same-Niche High Overlap", plan_collab_same_high,
+     "user_niche=lifestyle + viral_chaser (same niche, overlap 0.55) — penalty path: redundant audience.", "lifestyle"),
+    ("Collab Diff-Niche Low Overlap", plan_collab_diff_low,
+     "user_niche=tech + travel_creator (diff niche, overlap 0.30) — capped below same-niche-low.", "tech"),
+    ("Collab Diff-Niche High Overlap", plan_collab_diff_high,
+     "user_niche=tech + lifestyle_blogger (diff niche, overlap 0.40) — LOW reward (mismatch).", "tech"),
+    ("Collab Guardrail Block", plan_collab_blocked_zero,
+     "user_niche=tech + b2b_thought_leader (overlap 0.08 < 10%) — guardrail trips, forced penalty applied.", "tech"),
+    # Interaction grid: healthy + 4 penalty paths.
+    ("Interact Balanced", plan_interact_balanced,
+     "Healthy daily likes/comments/replies on-niche.", "tech"),
+    ("Interact Spam", plan_interact_spam,
+     "80 likes + 40 comments — spam path, shadowban_risk + reach penalty.", "tech"),
+    ("Interact Ignoring Own", plan_interact_ignoring_own,
+     "Zero replies to own audience — compounding loyalty drop.", "tech"),
+    ("Interact Off-Niche", plan_interact_off_niche,
+     "All interactions targeted at non-tech creators — reach penalty.", "tech"),
+    ("Interact Low-Quality", plan_interact_low_quality,
+     "Replies with quality=0.05 — replies discounted + extra reward penalty.", "tech"),
 ]
     print("=" * 70)
     print()
+    for scenario_name, plan_fn, description, user_niche in SCENARIOS:
         print("=" * 70)
         print(f"{scenario_name}")
         print(f"  {description}")
+        if user_niche:
+            print(f"  user_niche={user_niche}")
         print("=" * 70)
         print()
         for task in TASKS:
             _rng = stdlib_random.Random(99)
+            run_episode(task, plan_fn, scenario_name, user_niche=user_niche)
         print()
     print("SUMMARY TABLE")
     print("=" * 70)
     print()
+    print(f"{'Scenario':<35} {'Engage':>8} {'Strategic':>10} {'Competitive':>12}")
+    print("-" * 67)
+    for scenario_name, plan_fn, _, user_niche in SCENARIOS:
         scores = []
         for task in TASKS:
             _rng = stdlib_random.Random(99)
             env = ViraltestEnvironment()
+            reset_kwargs: Dict[str, Any] = {"task": task, "seed": SEED}
+            if user_niche:
+                reset_kwargs["user_niche"] = user_niche
+            obs = env.reset(**reset_kwargs)
             obs_dict = obs.model_dump()
             for day in range(1, 31):
                 action = plan_fn(obs_dict, day)
                 if obs.done:
                     break
             scores.append((obs.metadata or {}).get("grader_score", 0.0))
+        print(f"{scenario_name:<35} {scores[0]:>8.4f} {scores[1]:>10.4f} {scores[2]:>12.4f}")
     print()
     print("EXPECTED: Smart/Queue/Tag Explorer should score highest.")
     print("Burnout agents (spam, no_rest) should score near 0 on strategic/competitive.")
+    print("Collab Same-Niche Low Overlap should outperform any Diff-Niche collab.")
+    print("Interact Spam/Off-Niche/Ignoring/Low-Quality should underperform Balanced.")