anuragredbus commited on
Commit
1a2a407
·
1 Parent(s): f0a8734

added more scenaiors

Browse files
__init__.py CHANGED
@@ -9,6 +9,7 @@
9
  from .client import ViraltestEnv
10
  from .models import (
11
  CollabProposal,
 
12
  EngagementSignals,
13
  ScheduledAction,
14
  ToolCall,
@@ -19,6 +20,7 @@ from .models import (
19
 
20
  __all__ = [
21
  "CollabProposal",
 
22
  "EngagementSignals",
23
  "ScheduledAction",
24
  "ToolCall",
 
9
  from .client import ViraltestEnv
10
  from .models import (
11
  CollabProposal,
12
+ DailyInteractions,
13
  EngagementSignals,
14
  ScheduledAction,
15
  ToolCall,
 
20
 
21
  __all__ = [
22
  "CollabProposal",
23
+ "DailyInteractions",
24
  "EngagementSignals",
25
  "ScheduledAction",
26
  "ToolCall",
eval_env.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ End-to-end evaluation of the viraltest environment after the collab + interaction expansion.
3
+
4
+ Sections
5
+ --------
6
+ A) Collab tier diagnostics
7
+ - Per-tier expected multipliers from `_collab_evaluation`
8
+ - Episode runs with varying collab cadence (1, 5, 15 collabs/episode) to show that
9
+ the score spread between tiers GROWS with cadence, proving the multiplier is doing
10
+ real work and the small diffs in the 2-collab test are just dilution.
11
+ B) Interaction diagnostics
12
+ - Each penalty path (spam, ignoring_own, off_niche, low_quality, energy_drain) fires
13
+ the expected violation.
14
+ - Healthy band lifts reach_modifier > 1.0.
15
+ C) Cross-cutting sanity
16
+ - Every scenario completes without errors, energy non-negative, judge_report present.
17
+
18
+ Run: .venv/bin/python eval_env.py
19
+ """
20
+
21
+ from typing import Any, Dict, List, Optional
22
+
23
+ from models import (
24
+ CollabProposal,
25
+ DailyInteractions,
26
+ ScheduledAction,
27
+ ViraltestAction,
28
+ )
29
+ from server.viraltest_environment import ViraltestEnvironment
30
+
31
+
32
+ SEED = 42
33
+ HORIZON = 15 # TASK_HORIZON in the env
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Helpers
37
+ # ---------------------------------------------------------------------------
38
+
39
+ def _post_only(content_type: str = "reel", topic: str = "AI tools",
40
+ tags: Optional[List[str]] = None, intent: str = "watch_bait") -> ScheduledAction:
41
+ return ScheduledAction(
42
+ hour=12, action_type="post", content_type=content_type,
43
+ topic=topic, tags=tags or ["ai"], intent=intent,
44
+ )
45
+
46
+
47
+ def _run_episode(
48
+ plan_fn,
49
+ user_niche: Optional[str] = None,
50
+ task: str = "monthly_competitive",
51
+ ) -> Dict[str, Any]:
52
+ env = ViraltestEnvironment()
53
+ reset_kwargs: Dict[str, Any] = {"task": task, "seed": SEED}
54
+ if user_niche:
55
+ reset_kwargs["user_niche"] = user_niche
56
+ obs = env.reset(**reset_kwargs)
57
+ obs_dict = obs.model_dump()
58
+ last_obs = obs
59
+ judge_violations_total: List[str] = []
60
+ interaction_violations_total: List[str] = []
61
+ min_energy = 1.0
62
+ for day in range(1, HORIZON + 2):
63
+ action = plan_fn(obs_dict, day)
64
+ obs = env.step(action)
65
+ obs_dict = obs.model_dump()
66
+ last_obs = obs
67
+ min_energy = min(min_energy, obs.creator_energy)
68
+ if obs.judge_report:
69
+ judge_violations_total.extend(obs.judge_report.violations)
70
+ if obs.interaction_metrics:
71
+ interaction_violations_total.extend(obs.interaction_metrics.get("violations", []) or [])
72
+ if obs.done:
73
+ break
74
+ score = (last_obs.metadata or {}).get("grader_score", 0.0)
75
+ return {
76
+ "score": float(score),
77
+ "followers": int(last_obs.follower_count),
78
+ "min_energy": float(min_energy),
79
+ "energy": float(last_obs.creator_energy),
80
+ "engagement_rate": float(last_obs.engagement_rate),
81
+ "judge_violations": judge_violations_total,
82
+ "interaction_violations": interaction_violations_total,
83
+ "error": last_obs.error,
84
+ "done": last_obs.done,
85
+ }
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # A) COLLAB TIER DIAGNOSTICS
90
+ # ---------------------------------------------------------------------------
91
+
92
+ def section_a_collab_evaluator() -> None:
93
+ print("=" * 78)
94
+ print("A1. _collab_evaluation snapshot (user_niche=tech)")
95
+ print("=" * 78)
96
+ env = ViraltestEnvironment()
97
+ env.reset(task="monthly_competitive", seed=SEED, user_niche="tech")
98
+ fmt = "{:<22} {:>5} {:>7} {:>5} {:>5} {:>10} {:>10} {:<28}"
99
+ print(fmt.format("partner", "same?", "overlap", "fol", "gap%", "eng_mult", "growth", "reason/recommended"))
100
+ print("-" * 105)
101
+ for pid in [
102
+ "niche_expert", "viral_chaser", "lifestyle_blogger", "b2b_thought_leader",
103
+ "food_creator", "fitness_coach", "travel_creator",
104
+ ]:
105
+ ev = env._collab_evaluation(pid)
106
+ rec_str = f"OK" if ev["recommended"] else f"BLOCK:{ev['reason']}"
107
+ print(fmt.format(
108
+ pid,
109
+ "Y" if ev["same_niche"] else "N",
110
+ f"{ev['overlap']:.2f}",
111
+ ev["partner_followers"],
112
+ f"{ev['follower_gap_pct']*100:.0f}%",
113
+ f"{ev['eng_mult']:.3f}",
114
+ f"{ev['growth_mult']:.3f}",
115
+ rec_str,
116
+ ))
117
+ print()
118
+
119
+
120
+ def make_collab_plan(partner_id: str, collab_days: List[int]):
121
+ """Daily plan: single post + collab proposed on collab_days."""
122
+ def plan(obs: Dict[str, Any], day: int) -> ViraltestAction:
123
+ actions = [_post_only()]
124
+ collab = None
125
+ if day in collab_days:
126
+ collab = CollabProposal(partner_id=partner_id, content_type="reel", hour=12)
127
+ return ViraltestAction(scheduled_actions=actions, collab=collab)
128
+ return plan
129
+
130
+
131
+ def section_a_collab_cadence() -> None:
132
+ print("=" * 78)
133
+ print("A2. Score spread vs collab cadence (1, 5, 15 collabs in 15-day horizon)")
134
+ print(" Hypothesis: more collab days -> larger gap between tiers")
135
+ print("=" * 78)
136
+
137
+ # Map each tier to (partner_id, user_niche) — chosen so the partner clears the
138
+ # follower-size guardrail (peer-tier mocked followers in the data file).
139
+ tiers = [
140
+ ("Same-Niche Low", "niche_expert", "tech"),
141
+ ("Same-Niche High", "viral_chaser", "lifestyle"), # overlap=0.55 (high)
142
+ ("Diff-Niche Low", "food_creator", "tech"), # overlap=0.25 (mid-low)
143
+ ("Diff-Niche High", "lifestyle_blogger", "tech"), # overlap=0.40 (boundary high)
144
+ ("Guardrail Block", "b2b_thought_leader", "tech"), # overlap=0.08 (<10%)
145
+ ]
146
+ cadences = {
147
+ "1 collab": [5],
148
+ "5 collabs": [3, 5, 7, 9, 11],
149
+ "15 collabs": list(range(1, 16)),
150
+ }
151
+
152
+ fmt = "{:<22} {:>10} {:>10} {:>10}"
153
+ print(fmt.format("Tier", *cadences.keys()))
154
+ print("-" * 56)
155
+ for label, partner_id, user_niche in tiers:
156
+ scores = []
157
+ for cad_label, days in cadences.items():
158
+ r = _run_episode(make_collab_plan(partner_id, days), user_niche=user_niche)
159
+ scores.append(f"{r['score']:.4f}")
160
+ print(fmt.format(label, *scores))
161
+ print()
162
+ print(" -> Same-Niche Low score should DROP slowly as you add collabs.")
163
+ print(" -> Same-Niche High and Diff-Niche High should DROP quickly (penalty stacks).")
164
+ print(" -> Spread between top and bottom should GROW with cadence.")
165
+ print()
166
+
167
+
168
+ # ---------------------------------------------------------------------------
169
+ # B) INTERACTION DIAGNOSTICS
170
+ # ---------------------------------------------------------------------------
171
+
172
+ def make_interaction_plan(interactions: DailyInteractions):
173
+ def plan(obs: Dict[str, Any], day: int) -> ViraltestAction:
174
+ return ViraltestAction(scheduled_actions=[_post_only()], interactions=interactions)
175
+ return plan
176
+
177
+
178
+ def section_b_interactions() -> None:
179
+ print("=" * 78)
180
+ print("B. Interaction penalty-path matrix")
181
+ print("=" * 78)
182
+
183
+ cases = [
184
+ ("healthy", DailyInteractions(
185
+ likes_on_others=12, comments_on_others=5, replies_to_audience=3,
186
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.8,
187
+ ), "interaction_*", False),
188
+ ("spam", DailyInteractions(
189
+ likes_on_others=80, comments_on_others=40, replies_to_audience=0,
190
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.4,
191
+ ), "interaction_spam", True),
192
+ ("ignoring_own", DailyInteractions(
193
+ likes_on_others=8, comments_on_others=4, replies_to_audience=0,
194
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.6,
195
+ ), "interaction_ignoring_own", True),
196
+ ("off_niche", DailyInteractions(
197
+ likes_on_others=10, comments_on_others=5, replies_to_audience=2,
198
+ target_partner_ids=["food_creator", "fitness_coach", "travel_creator", "lifestyle_blogger"],
199
+ avg_reply_quality=0.7,
200
+ ), "interaction_off_niche", True),
201
+ ("low_quality", DailyInteractions(
202
+ likes_on_others=10, comments_on_others=5, replies_to_audience=8,
203
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.05,
204
+ ), "interaction_low_quality", True),
205
+ ("energy_drain", DailyInteractions(
206
+ likes_on_others=200, comments_on_others=100, replies_to_audience=100,
207
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.5,
208
+ ), "interaction_energy_drain", True),
209
+ ]
210
+
211
+ fmt = "{:<14} {:>7} {:>9} {:>10} {:>10} {:>11} {:<12}"
212
+ print(fmt.format("case", "score", "followers", "min_energy", "engRate", "violations", "expect"))
213
+ print("-" * 80)
214
+ for label, interactions, expected_violation, must_fire in cases:
215
+ r = _run_episode(make_interaction_plan(interactions), user_niche="tech")
216
+ viols = r["interaction_violations"]
217
+ fired = any(expected_violation.replace("interaction_", "") in v for v in viols)
218
+ ok = "OK" if (fired == must_fire) else "FAIL"
219
+ # For "healthy" we expect NO interaction violations.
220
+ if label == "healthy":
221
+ ok = "OK" if not viols else "FAIL"
222
+ print(fmt.format(
223
+ label,
224
+ f"{r['score']:.3f}",
225
+ r["followers"],
226
+ f"{r['min_energy']:.2f}",
227
+ f"{r['engagement_rate']:.3f}",
228
+ len(viols),
229
+ ok,
230
+ ))
231
+ print()
232
+
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # C) CROSS-CUTTING SANITY
236
+ # ---------------------------------------------------------------------------
237
+
238
+ def section_c_sanity() -> None:
239
+ print("=" * 78)
240
+ print("C. Cross-cutting sanity (rest, post-only, smart, query_interaction_norms)")
241
+ print("=" * 78)
242
+
243
+ # Baselines for visual sanity
244
+ def plan_rest(obs: Dict[str, Any], day: int) -> ViraltestAction:
245
+ return ViraltestAction(scheduled_actions=[])
246
+
247
+ def plan_post1(obs: Dict[str, Any], day: int) -> ViraltestAction:
248
+ return ViraltestAction(scheduled_actions=[_post_only()])
249
+
250
+ def plan_post2(obs: Dict[str, Any], day: int) -> ViraltestAction:
251
+ return ViraltestAction(scheduled_actions=[
252
+ _post_only(content_type="reel", topic="AI tools"),
253
+ ScheduledAction(hour=19, action_type="post", content_type="carousel",
254
+ topic="AI tools", tags=["coding"], intent="save_bait"),
255
+ ])
256
+
257
+ fmt = "{:<14} {:>7} {:>9} {:>8} {:>8} {:>6}"
258
+ print(fmt.format("baseline", "score", "followers", "energy", "engRate", "errs"))
259
+ print("-" * 60)
260
+ for label, plan_fn in [("rest", plan_rest), ("1-post", plan_post1), ("2-post", plan_post2)]:
261
+ r = _run_episode(plan_fn, user_niche="tech")
262
+ errs = "0" if not r["error"] else r["error"][:12]
263
+ print(fmt.format(label, f"{r['score']:.3f}", r["followers"],
264
+ f"{r['energy']:.2f}", f"{r['engagement_rate']:.3f}", errs))
265
+ print()
266
+
267
+ # Verify query_interaction_norms surfaces sensible values.
268
+ env = ViraltestEnvironment()
269
+ env.reset(task="monthly_engage", seed=SEED, user_niche="tech")
270
+ from models import ToolCall
271
+ res = env._dispatch_tool(ToolCall(name="query_interaction_norms", arguments={}))
272
+ print("query_interaction_norms tool ->")
273
+ print(f" success={res.success}, data={res.data}")
274
+ print()
275
+
276
+ # Verify query_creator_pool returns the recommendation surface.
277
+ res = env._dispatch_tool(ToolCall(name="query_creator_pool", arguments={}))
278
+ print("query_creator_pool tool ->")
279
+ print(f" user_niche={res.data['user_niche']}, user_followers={res.data['user_followers']}")
280
+ for p in res.data["pool"]:
281
+ print(f" {p['id']:<22} same_niche={p['same_niche']!s:<5} overlap={p['audience_overlap']:>4} "
282
+ f"recommended={p['recommended']!s:<5} reason={p['reason']}")
283
+ print()
284
+
285
+
286
+ # ---------------------------------------------------------------------------
287
+ # Main
288
+ # ---------------------------------------------------------------------------
289
+
290
+ if __name__ == "__main__":
291
+ section_a_collab_evaluator()
292
+ section_a_collab_cadence()
293
+ section_b_interactions()
294
+ section_c_sanity()
295
+ print("Evaluation complete.")
models.py CHANGED
@@ -64,6 +64,28 @@ class CollabProposal(BaseModel):
64
  hour: int = Field(default=12, ge=0, le=23)
65
 
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  class ViraltestAction(Action):
68
  """Daily plan: tool calls for discovery, then scheduled actions to commit."""
69
 
@@ -79,6 +101,10 @@ class ViraltestAction(Action):
79
  default=None,
80
  description="Optional collaboration proposal (max 2 per month)",
81
  )
 
 
 
 
82
  notes: Optional[str] = Field(
83
  default=None,
84
  max_length=2000,
@@ -191,6 +217,10 @@ class ViraltestObservation(Observation):
191
  tool_results: List[ToolResult] = Field(default_factory=list, description="Results from tool_calls this step")
192
  agent_notes: Optional[str] = Field(default=None, description="Echo of agent's notes from previous step")
193
  api_budget_remaining: int = Field(default=100, ge=0)
 
 
 
 
194
 
195
  grader_score: Optional[float] = Field(default=None)
196
  error: Optional[str] = Field(default=None)
 
64
  hour: int = Field(default=12, ge=0, le=23)
65
 
66
 
67
+ class DailyInteractions(BaseModel):
68
+ """Daily aggregate of creator interactions: likes, comments on others' content, and replies to own audience.
69
+
70
+ Models the comment/like/reply economy. Healthy interaction in moderation rewards reach;
71
+ spam, audience-ignoring, off-niche, and low-quality patterns are penalized.
72
+ """
73
+
74
+ likes_on_others: int = Field(default=0, ge=0, le=200, description="Likes given on other creators' posts today")
75
+ comments_on_others: int = Field(default=0, ge=0, le=100, description="Comments left on other creators' posts today")
76
+ replies_to_audience: int = Field(default=0, ge=0, le=100, description="Replies to incoming comments on your own posts")
77
+ target_partner_ids: List[str] = Field(
78
+ default_factory=list,
79
+ description="Competitor archetype ids you interacted with today (used for off-niche detection)",
80
+ )
81
+ avg_reply_quality: float = Field(
82
+ default=0.6,
83
+ ge=0.0,
84
+ le=1.0,
85
+ description="Self-rated effort/depth of replies (0=one-word, 1=substantive)",
86
+ )
87
+
88
+
89
  class ViraltestAction(Action):
90
  """Daily plan: tool calls for discovery, then scheduled actions to commit."""
91
 
 
101
  default=None,
102
  description="Optional collaboration proposal (max 2 per month)",
103
  )
104
+ interactions: Optional[DailyInteractions] = Field(
105
+ default=None,
106
+ description="Daily likes/comments/replies activity (community engagement layer)",
107
+ )
108
  notes: Optional[str] = Field(
109
  default=None,
110
  max_length=2000,
 
217
  tool_results: List[ToolResult] = Field(default_factory=list, description="Results from tool_calls this step")
218
  agent_notes: Optional[str] = Field(default=None, description="Echo of agent's notes from previous step")
219
  api_budget_remaining: int = Field(default=100, ge=0)
220
+ interaction_metrics: Optional[Dict[str, Any]] = Field(
221
+ default=None,
222
+ description="Daily interaction summary: reach modifier, shadowban_risk, and a one-line reason",
223
+ )
224
 
225
  grader_score: Optional[float] = Field(default=None)
226
  error: Optional[str] = Field(default=None)
server/app.py CHANGED
@@ -29,10 +29,22 @@ if "ENABLE_WEB_INTERFACE" not in os.environ:
29
  os.environ["ENABLE_WEB_INTERFACE"] = "true"
30
 
31
  try:
32
- from ..models import ScheduledAction, ViraltestAction, ViraltestObservation
 
 
 
 
 
 
33
  from .viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
34
  except ImportError:
35
- from models import ScheduledAction, ViraltestAction, ViraltestObservation
 
 
 
 
 
 
36
  from server.viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
37
 
38
  try:
@@ -174,10 +186,17 @@ _CONTENT_TYPES = ["reel", "carousel", "story", "text_post"]
174
  _TOPICS = ["AI tools", "fitness routine", "growth hacks", "travel guide", "food recipe", "wellness tips"]
175
 
176
 
177
- def _make_daily_plan(actions: list, notes: Optional[str] = None) -> ViraltestAction:
 
 
 
 
 
178
  return ViraltestAction(
179
  scheduled_actions=[ScheduledAction(**a) for a in actions],
180
  notes=notes,
 
 
181
  )
182
 
183
 
@@ -236,12 +255,96 @@ def _plan_minimal(obs: dict, day: int) -> ViraltestAction:
236
  ])
237
 
238
 
239
- SCENARIOS = {
240
- "always_rest": ("Always Rest", "Never posts. Tests follower decay.", _plan_always_rest),
241
- "spam": ("Spam Post", "Same reel every hour. Burns out fast.", _plan_spam),
242
- "smart": ("Smart Agent", "Optimal: peak hours, trending, varied types+intents.", _plan_smart),
243
- "minimal": ("Minimal Poster", "1 carousel per day at noon.", _plan_minimal),
244
- "random": ("Random Actor", "Random actions. Baseline test.", _plan_random),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  }
246
 
247
 
@@ -265,9 +368,14 @@ async def dashboard_simulate(body: Dict[str, Any] = Body(...)):
265
  if scenario_id not in SCENARIOS:
266
  return {"error": f"Unknown scenario: {scenario_id}"}
267
 
268
- label, desc, plan_fn = SCENARIOS[scenario_id]
 
 
269
  env = ViraltestEnvironment()
270
- obs = env.reset(task=task, seed=42)
 
 
 
271
  obs_dict = obs.model_dump()
272
 
273
  steps: List[Dict[str, Any]] = []
@@ -347,11 +455,16 @@ async def training_evidence():
347
  global _SIM_RNG
348
 
349
  results = []
350
- for scenario_id, (label, desc, plan_fn) in SCENARIOS.items():
 
 
351
  for task in _TRAINING_TASKS:
352
  _SIM_RNG = stdlib_random.Random(99)
353
  env = ViraltestEnvironment()
354
- obs = env.reset(task=task, seed=42)
 
 
 
355
  obs_dict = obs.model_dump()
356
 
357
  rewards: List[float] = []
 
29
  os.environ["ENABLE_WEB_INTERFACE"] = "true"
30
 
31
  try:
32
+ from ..models import (
33
+ CollabProposal,
34
+ DailyInteractions,
35
+ ScheduledAction,
36
+ ViraltestAction,
37
+ ViraltestObservation,
38
+ )
39
  from .viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
40
  except ImportError:
41
+ from models import (
42
+ CollabProposal,
43
+ DailyInteractions,
44
+ ScheduledAction,
45
+ ViraltestAction,
46
+ ViraltestObservation,
47
+ )
48
  from server.viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
49
 
50
  try:
 
186
  _TOPICS = ["AI tools", "fitness routine", "growth hacks", "travel guide", "food recipe", "wellness tips"]
187
 
188
 
189
+ def _make_daily_plan(
190
+ actions: list,
191
+ notes: Optional[str] = None,
192
+ collab: Optional[CollabProposal] = None,
193
+ interactions: Optional[DailyInteractions] = None,
194
+ ) -> ViraltestAction:
195
  return ViraltestAction(
196
  scheduled_actions=[ScheduledAction(**a) for a in actions],
197
  notes=notes,
198
+ collab=collab,
199
+ interactions=interactions,
200
  )
201
 
202
 
 
255
  ])
256
 
257
 
258
+ def _plan_collab_same_low(obs: dict, day: int) -> ViraltestAction:
259
+ """Same-niche, low-overlap collab on day 5+15 — best-case reward path."""
260
+ trending = (obs.get("trending_topics") or ["AI tools"])[0]
261
+ tags = list((obs.get("trending_tags") or [])[:2]) + ["ai"]
262
+ actions = [
263
+ {"hour": 12, "action_type": "post", "content_type": "reel",
264
+ "topic": trending, "tags": tags, "intent": "watch_bait"},
265
+ ]
266
+ collab = None
267
+ if day in (5, 15):
268
+ collab = CollabProposal(partner_id="niche_expert", content_type="reel", hour=12)
269
+ return _make_daily_plan(actions, notes="Same-niche low-overlap collab demo.", collab=collab)
270
+
271
+
272
+ def _plan_collab_diff_high(obs: dict, day: int) -> ViraltestAction:
273
+ """Diff-niche, high-overlap collab — penalty path (mismatch)."""
274
+ trending = (obs.get("trending_topics") or ["AI tools"])[0]
275
+ tags = list((obs.get("trending_tags") or [])[:2]) + ["ai"]
276
+ actions = [
277
+ {"hour": 12, "action_type": "post", "content_type": "reel",
278
+ "topic": trending, "tags": tags, "intent": "watch_bait"},
279
+ ]
280
+ collab = None
281
+ if day in (5, 15):
282
+ collab = CollabProposal(partner_id="lifestyle_blogger", content_type="reel", hour=12)
283
+ return _make_daily_plan(actions, notes="Diff-niche high-overlap collab demo.", collab=collab)
284
+
285
+
286
+ def _plan_interact_balanced(obs: dict, day: int) -> ViraltestAction:
287
+ """Healthy daily interaction — likes/comments on-niche, replies to audience."""
288
+ trending = (obs.get("trending_topics") or ["AI tools"])[0]
289
+ interactions = DailyInteractions(
290
+ likes_on_others=12, comments_on_others=5, replies_to_audience=3,
291
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.8,
292
+ )
293
+ return _make_daily_plan(
294
+ [{"hour": 12, "action_type": "post", "content_type": "reel",
295
+ "topic": trending, "tags": ["ai"], "intent": "watch_bait"}],
296
+ notes="Healthy interaction demo.",
297
+ interactions=interactions,
298
+ )
299
+
300
+
301
+ def _plan_interact_spam(obs: dict, day: int) -> ViraltestAction:
302
+ """Spam interaction — triggers shadowban_risk + reach penalty."""
303
+ trending = (obs.get("trending_topics") or ["AI tools"])[0]
304
+ interactions = DailyInteractions(
305
+ likes_on_others=80, comments_on_others=40, replies_to_audience=0,
306
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.4,
307
+ )
308
+ return _make_daily_plan(
309
+ [{"hour": 12, "action_type": "post", "content_type": "reel",
310
+ "topic": trending, "tags": ["ai"], "intent": "watch_bait"}],
311
+ notes="Interaction spam demo.",
312
+ interactions=interactions,
313
+ )
314
+
315
+
316
+ # Scenario tuple: (label, description, plan_fn, optional user_niche).
317
+ # user_niche is honored by dashboard_simulate / training_evidence; defaults to "generic" when None.
318
+ SCENARIOS: Dict[str, tuple] = {
319
+ "always_rest": ("Always Rest", "Never posts. Tests follower decay.", _plan_always_rest, None),
320
+ "spam": ("Spam Post", "Same reel every hour. Burns out fast.", _plan_spam, None),
321
+ "smart": ("Smart Agent", "Optimal: peak hours, trending, varied types+intents.", _plan_smart, None),
322
+ "minimal": ("Minimal Poster", "1 carousel per day at noon.", _plan_minimal, None),
323
+ "random": ("Random Actor", "Random actions. Baseline test.", _plan_random, None),
324
+ "collab_same_low": (
325
+ "Collab Same-Niche Low Overlap",
326
+ "Same-niche partner with <20% overlap. Best-case collab reward path.",
327
+ _plan_collab_same_low,
328
+ "tech",
329
+ ),
330
+ "collab_diff_high": (
331
+ "Collab Diff-Niche High Overlap",
332
+ "Diff-niche partner with >40% overlap. Penalty path (audience mismatch).",
333
+ _plan_collab_diff_high,
334
+ "tech",
335
+ ),
336
+ "interact_balanced": (
337
+ "Interact Balanced",
338
+ "Healthy on-niche likes/comments and audience replies.",
339
+ _plan_interact_balanced,
340
+ "tech",
341
+ ),
342
+ "interact_spam": (
343
+ "Interact Spam",
344
+ "80 likes + 40 comments — spam path triggers shadowban_risk.",
345
+ _plan_interact_spam,
346
+ "tech",
347
+ ),
348
  }
349
 
350
 
 
368
  if scenario_id not in SCENARIOS:
369
  return {"error": f"Unknown scenario: {scenario_id}"}
370
 
371
+ entry = SCENARIOS[scenario_id]
372
+ label, desc, plan_fn = entry[0], entry[1], entry[2]
373
+ user_niche = entry[3] if len(entry) > 3 else None
374
  env = ViraltestEnvironment()
375
+ reset_kwargs: Dict[str, Any] = {"task": task, "seed": 42}
376
+ if user_niche:
377
+ reset_kwargs["user_niche"] = user_niche
378
+ obs = env.reset(**reset_kwargs)
379
  obs_dict = obs.model_dump()
380
 
381
  steps: List[Dict[str, Any]] = []
 
455
  global _SIM_RNG
456
 
457
  results = []
458
+ for scenario_id, entry in SCENARIOS.items():
459
+ label, desc, plan_fn = entry[0], entry[1], entry[2]
460
+ user_niche = entry[3] if len(entry) > 3 else None
461
  for task in _TRAINING_TASKS:
462
  _SIM_RNG = stdlib_random.Random(99)
463
  env = ViraltestEnvironment()
464
+ reset_kwargs: Dict[str, Any] = {"task": task, "seed": 42}
465
+ if user_niche:
466
+ reset_kwargs["user_niche"] = user_niche
467
+ obs = env.reset(**reset_kwargs)
468
  obs_dict = obs.model_dump()
469
 
470
  rewards: List[float] = []
server/data/audience_overlap_matrix.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
  "_meta": {
3
- "description": "8x8 symmetric audience overlap matrix between competitor archetypes and the user creator. Values 0.0-1.0 represent fraction of shared audience. Used by propose_collab to compute collab reward multipliers and by query_creator_pool to expose overlap to the agent. Same-niche pairs ~0.4-0.65, cross-niche ~0.05-0.20.",
4
- "source": "Competitor pairs estimated from Rival IQ 2025 cross-industry overlap patterns + niche proximity heuristic. user_creator row tuned to a generic micro-creator (no locked niche): broad mass-market partners (lifestyle_blogger, viral_chaser) score highest; specialist partners (b2b_thought_leader, niche_expert) score lowest."
 
5
  },
6
  "archetype_ids": ["niche_expert", "viral_chaser", "lifestyle_blogger", "b2b_thought_leader", "food_creator", "fitness_coach", "travel_creator", "user_creator"],
7
  "matrix": [
@@ -13,5 +14,25 @@
13
  [0.10, 0.25, 0.35, 0.10, 0.45, 1.00, 0.30, 0.28],
14
  [0.15, 0.30, 0.40, 0.12, 0.35, 0.30, 1.00, 0.30],
15
  [0.10, 0.35, 0.40, 0.08, 0.25, 0.28, 0.30, 1.00]
16
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
 
1
  {
2
  "_meta": {
3
+ "description": "8x8 symmetric audience overlap matrix between competitor archetypes and the user creator. Values 0.0-1.0 represent fraction of shared audience (Jaccard intersection fraction). Used by propose_collab to compute collab reward multipliers and by query_creator_pool to expose overlap to the agent. Same-niche pairs ~0.4-0.65, cross-niche ~0.05-0.20.",
4
+ "source": "Competitor pairs estimated from Rival IQ 2025 cross-industry overlap patterns + niche proximity heuristic. user_creator row tuned to a generic micro-creator (no locked niche): broad mass-market partners (lifestyle_blogger, viral_chaser) score highest; specialist partners (b2b_thought_leader, niche_expert) score lowest.",
5
+ "mock_followers_note": "Mocked follower counts span tiers from micro (10k user) to mid (250k viral_chaser). Used to derive intersection size via Jaccard inversion: |A intersect B| = overlap * (|A| + |B|) / (1 + overlap)."
6
  },
7
  "archetype_ids": ["niche_expert", "viral_chaser", "lifestyle_blogger", "b2b_thought_leader", "food_creator", "fitness_coach", "travel_creator", "user_creator"],
8
  "matrix": [
 
14
  [0.10, 0.25, 0.35, 0.10, 0.45, 1.00, 0.30, 0.28],
15
  [0.15, 0.30, 0.40, 0.12, 0.35, 0.30, 1.00, 0.30],
16
  [0.10, 0.35, 0.40, 0.08, 0.25, 0.28, 0.30, 1.00]
17
+ ],
18
+ "niche_by_archetype": {
19
+ "niche_expert": "tech",
20
+ "viral_chaser": "lifestyle",
21
+ "lifestyle_blogger": "lifestyle",
22
+ "b2b_thought_leader": "business",
23
+ "food_creator": "food",
24
+ "fitness_coach": "fitness",
25
+ "travel_creator": "travel",
26
+ "user_creator": "generic"
27
+ },
28
+ "mock_followers_by_archetype": {
29
+ "niche_expert": 12000,
30
+ "viral_chaser": 250000,
31
+ "lifestyle_blogger": 11000,
32
+ "b2b_thought_leader": 9000,
33
+ "food_creator": 12000,
34
+ "fitness_coach": 8000,
35
+ "travel_creator": 11000,
36
+ "user_creator": 10000
37
+ }
38
  }
server/viraltest_environment.py CHANGED
@@ -26,6 +26,7 @@ from openenv.core.env_server.types import State
26
  try:
27
  from ..models import (
28
  CollabProposal,
 
29
  EngagementSignals,
30
  HeadlineMetrics,
31
  JudgeReport,
@@ -38,6 +39,7 @@ try:
38
  except ImportError:
39
  from models import (
40
  CollabProposal,
 
41
  EngagementSignals,
42
  HeadlineMetrics,
43
  JudgeReport,
@@ -88,6 +90,13 @@ _HEATMAP_GRID: Dict[int, List[float]] = {
88
  int(k): v for k, v in _HEATMAP_DATA.get("grid", {}).items()
89
  }
90
 
 
 
 
 
 
 
 
91
  # ---------------------------------------------------------------------------
92
  # Constants (research-backed, Tier 1-3 sources)
93
  # ---------------------------------------------------------------------------
@@ -166,12 +175,56 @@ TREND_DEFAULT_HALFLIFE_HOURS = 60
166
  TREND_MATCH_STOPWORDS = {"tips", "guide", "review", "routine", "ideas", "hacks", "tutorial", "the", "a", "an", "and", "of", "for", "to"}
167
  # Collab reward shaping (Later 2023 reach study, HypeAuditor 2024 niche affinity, Rival IQ 2025 overlap patterns,
168
  # Cen et al. 2024 disengagement model for diminishing returns instead of a hard cap).
169
- COLLAB_REACH_K = 0.60 # cross-audience exposure: capped reach uplift when overlap is 0
170
- COLLAB_AFFINITY_K = 0.30 # same-audience affinity: per-impression engagement uplift when overlap is 1
171
- COLLAB_GROWTH_K = 1.50 # cross-pollination follower spillover, scales (1 - overlap)
172
  COLLAB_PARTNER_REPEAT_PENALTY = 0.7 # discount on multipliers when partner reused this brand
173
  COLLAB_FATIGUE_K = 0.3 # per-collab diminishing-returns factor: 1/(1+K*prior_collabs_this_episode)
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  API_BUDGET_INITIAL = 10**9 # effectively unlimited; rate-limit removed
176
 
177
  # Heuristic baselines for headline metric `vs_baseline_pct`.
@@ -251,17 +304,21 @@ TOOL_CATALOG = {
251
  "parameters": {"scheduled_actions": {"type": "array"}},
252
  },
253
  "query_creator_pool": {
254
- "description": "List available competitor archetypes for potential collaboration, with audience overlap %.",
255
  "parameters": {},
256
  },
257
  "propose_collab": {
258
- "description": "Propose a collab post with a competitor at a specific hour. The post you schedule at that hour will be co-authored with the partner.",
259
  "parameters": {
260
  "partner_id": {"type": "string"},
261
  "content_type": {"type": "string", "enum": ["reel", "story", "carousel", "text_post"]},
262
  "hour": {"type": "integer", "minimum": 0, "maximum": 23},
263
  },
264
  },
 
 
 
 
265
  }
266
 
267
 
@@ -305,6 +362,15 @@ class ViraltestEnvironment(Environment):
305
  self._collabs_this_month = 0
306
  self._collab_history: List[str] = []
307
  self._active_collab: Optional[CollabProposal] = None
 
 
 
 
 
 
 
 
 
308
  self._low_energy_days = 0
309
  self._total_posts_this_week = 0
310
  self._week_start_day = 0
@@ -486,7 +552,7 @@ class ViraltestEnvironment(Environment):
486
 
487
  return daily_fatigue * weekly_mult
488
 
489
- # ----- collab multipliers (overlap-driven) -----
490
 
491
  def _user_partner_overlap(self, partner_id: str) -> Optional[float]:
492
  ids = _OVERLAP_DATA.get("archetype_ids", [])
@@ -496,21 +562,297 @@ class ViraltestEnvironment(Environment):
496
  p = ids.index(partner_id)
497
  return _OVERLAP_DATA["matrix"][u][p]
498
 
499
- def _collab_multipliers(self, partner_id: str) -> Tuple[float, float]:
500
- """Returns (engagement_multiplier, follower_growth_multiplier)."""
501
- o = self._user_partner_overlap(partner_id)
502
- if o is None:
503
- return 1.0, 1.0
504
- reach = 1.0 + (1.0 - o) * COLLAB_REACH_K
505
- affinity = 1.0 + o * COLLAB_AFFINITY_K
506
- growth = 1.0 + (1.0 - o) * COLLAB_GROWTH_K
507
- eng_boost = reach * affinity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  if partner_id in self._collab_history[:-1]:
509
- eng_boost *= COLLAB_PARTNER_REPEAT_PENALTY
510
- growth *= COLLAB_PARTNER_REPEAT_PENALTY
 
 
511
  prior = max(0, self._collabs_this_month - 1)
512
  fatigue = 1.0 / (1.0 + COLLAB_FATIGUE_K * prior)
513
- return eng_boost * fatigue, growth * fatigue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
  # ----- engagement signals (Mosseri-aligned) -----
516
 
@@ -597,18 +939,68 @@ class ViraltestEnvironment(Environment):
597
  elif tool.name == "query_creator_pool":
598
  pool = []
599
  for comp in self._competitors:
600
- overlap = self._user_partner_overlap(comp.id)
601
  pool.append({
602
- "id": comp.id, "name": comp.name, "niche": comp.niche,
603
- "audience_overlap": round(overlap, 2) if overlap is not None else None,
 
 
 
 
 
 
 
 
 
 
604
  })
605
- return ToolResult(name=tool.name, data=pool, budget_remaining=self._api_budget)
 
 
 
 
 
 
 
 
606
 
607
  elif tool.name == "propose_collab":
608
  partner_id = tool.arguments.get("partner_id", "")
609
  if partner_id not in [c.id for c in self._competitors]:
610
  return ToolResult(name=tool.name, success=False, error=f"unknown partner: {partner_id}", budget_remaining=self._api_budget)
611
- return ToolResult(name=tool.name, data={"status": "proposal_accepted", "partner_id": partner_id}, budget_remaining=self._api_budget)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
 
613
  return ToolResult(name=tool.name, success=False, error=f"unknown tool: {tool.name}", budget_remaining=self._api_budget)
614
 
@@ -665,6 +1057,14 @@ class ViraltestEnvironment(Environment):
665
  if self._hours_since_sleep > 22:
666
  violations.append(f"sleep_debt: {self._hours_since_sleep}h awake (Van Dongen 2003)")
667
  pc -= 0.10
 
 
 
 
 
 
 
 
668
 
669
  burnout_pressure = (1.0 - energy_min) * 0.4 + self._sleep_debt * 0.3 + (self._low_energy_days / 5.0) * 0.3
670
  sustainability_risk = max(0.0, min(1.0, burnout_pressure))
@@ -729,6 +1129,11 @@ class ViraltestEnvironment(Environment):
729
  self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
730
  self._init_state()
731
 
 
 
 
 
 
732
  self._shift_label = kwargs.get("shift_label")
733
  self._chain_id = kwargs.get("episode_chain_id")
734
 
@@ -766,11 +1171,16 @@ class ViraltestEnvironment(Environment):
766
 
767
  # Process collab proposal (no hard cap; diminishing returns enforced via _collab_multipliers)
768
  self._active_collab = None
 
769
  if action.collab:
770
  self._collabs_this_month += 1
771
  self._collab_history.append(action.collab.partner_id)
772
  self._active_collab = action.collab
773
 
 
 
 
 
774
  # Validate scheduled actions
775
  schedule: Dict[int, ScheduledAction] = {}
776
  errors: List[str] = []
@@ -837,9 +1247,14 @@ class ViraltestEnvironment(Environment):
837
  if 1 <= self._posts_per_day.get(prev_day, 0) <= 2:
838
  self._days_with_good_posts.add(prev_day)
839
 
840
- avg_reward = daily_reward / 24.0
 
 
841
  error_str = "; ".join(errors) if errors else None
842
 
 
 
 
843
  done = self._state.step_count >= TASK_HORIZON or self._energy <= 0.0
844
  coach = self._compute_coach_feedback(daily_engagement)
845
  judge = self._compute_judge_report(action, daily_engagement, daily_posts, energy_min, errors)
@@ -864,6 +1279,7 @@ class ViraltestEnvironment(Environment):
864
  daily_posts_made=daily_posts, daily_energy_min=energy_min,
865
  tool_results=tool_results, engagement_signals=daily_signals,
866
  coach_feedback=coach, judge_report=judge, headline_metrics=headline,
 
867
  )
868
  return self._final_observation
869
 
@@ -873,6 +1289,7 @@ class ViraltestEnvironment(Environment):
873
  daily_posts_made=daily_posts, daily_energy_min=energy_min,
874
  tool_results=tool_results, engagement_signals=daily_signals,
875
  coach_feedback=coach, judge_report=judge,
 
876
  )
877
 
878
  def _process_hour_action(self, sa: ScheduledAction) -> Tuple[float, float, Optional[EngagementSignals]]:
@@ -918,6 +1335,10 @@ class ViraltestEnvironment(Environment):
918
  * niche_mult * saturation_factor
919
  )
920
 
 
 
 
 
921
  if self._active_collab is not None and self._active_collab.hour == sa.hour:
922
  eng_m, growth_m = self._collab_multipliers(self._active_collab.partner_id)
923
  engagement *= eng_m
@@ -1101,9 +1522,11 @@ class ViraltestEnvironment(Environment):
1101
  coach_feedback: Optional[Dict[str, Any]] = None,
1102
  judge_report: Optional[JudgeReport] = None,
1103
  headline_metrics: Optional[HeadlineMetrics] = None,
 
1104
  ) -> ViraltestObservation:
1105
  recent_eng = self._engagement_history[-10:] if self._engagement_history else []
1106
  eng_rate = sum(recent_eng) / len(recent_eng) if recent_eng else 0.0
 
1107
 
1108
  meta: Dict[str, Any] = {"step": self._state.step_count, "task": self._task}
1109
  if grader_score is not None:
@@ -1153,6 +1576,7 @@ class ViraltestEnvironment(Environment):
1153
  done=done,
1154
  reward=round(reward, 4),
1155
  metadata=meta,
 
1156
  )
1157
 
1158
  # ----- graders (monthly) -----
 
26
  try:
27
  from ..models import (
28
  CollabProposal,
29
+ DailyInteractions,
30
  EngagementSignals,
31
  HeadlineMetrics,
32
  JudgeReport,
 
39
  except ImportError:
40
  from models import (
41
  CollabProposal,
42
+ DailyInteractions,
43
  EngagementSignals,
44
  HeadlineMetrics,
45
  JudgeReport,
 
90
  int(k): v for k, v in _HEATMAP_DATA.get("grid", {}).items()
91
  }
92
 
93
+ # Mocked niche + follower-count lookups for the collab system. Live in the overlap matrix file
94
+ # so the same source-of-truth carries (a) Jaccard overlap, (b) niche label, (c) follower size.
95
+ _NICHE_BY_ARCHETYPE: Dict[str, str] = dict(_OVERLAP_DATA.get("niche_by_archetype", {}))
96
+ _FOLLOWERS_BY_ARCHETYPE: Dict[str, int] = {
97
+ k: int(v) for k, v in _OVERLAP_DATA.get("mock_followers_by_archetype", {}).items()
98
+ }
99
+
100
  # ---------------------------------------------------------------------------
101
  # Constants (research-backed, Tier 1-3 sources)
102
  # ---------------------------------------------------------------------------
 
175
  TREND_MATCH_STOPWORDS = {"tips", "guide", "review", "routine", "ideas", "hacks", "tutorial", "the", "a", "an", "and", "of", "for", "to"}
176
  # Collab reward shaping (Later 2023 reach study, HypeAuditor 2024 niche affinity, Rival IQ 2025 overlap patterns,
177
  # Cen et al. 2024 disengagement model for diminishing returns instead of a hard cap).
 
 
 
178
  COLLAB_PARTNER_REPEAT_PENALTY = 0.7 # discount on multipliers when partner reused this brand
179
  COLLAB_FATIGUE_K = 0.3 # per-collab diminishing-returns factor: 1/(1+K*prior_collabs_this_episode)
180
 
181
+ # Niche-aware tiered shaping (overlap = Jaccard intersection fraction).
182
+ # Hard rule: any diff-niche multiplier must be < the minimum same-niche-low multiplier
183
+ # so the env never recommends a diff-niche collab over an equal-overlap same-niche one.
184
+ COLLAB_LOW_OVERLAP_THRESHOLD = 0.20 # < this counts as "low intersection"
185
+ COLLAB_HIGH_OVERLAP_THRESHOLD = 0.40 # >= this counts as "high intersection"
186
+ COLLAB_GUARDRAIL_OVERLAP_MIN = 0.10 # below this -> recommended=False (intersection-too-low guardrail)
187
+ COLLAB_GUARDRAIL_FOLLOWER_GAP_MAX = 0.25 # |partner - user| / max > this -> follower-size mismatch
188
+ COLLAB_FORCED_PENALTY_ENG = 0.7 # eng_mult applied if agent ignores guardrail
189
+ COLLAB_FORCED_PENALTY_GROWTH = 0.6 # growth_mult applied if agent ignores guardrail
190
+
191
+ # Same niche, LOW overlap -> HIGH reward (best case). Smoothly interpolated by overlap (low->high uplift as overlap->0).
192
+ COLLAB_SAME_LOW_ENG = (1.50, 1.80)
193
+ COLLAB_SAME_LOW_GROWTH = (1.60, 2.00)
194
+ # Same niche, HIGH overlap -> LOW reward (no point, audience already shared).
195
+ COLLAB_SAME_HIGH_ENG = 0.85
196
+ COLLAB_SAME_HIGH_GROWTH = 0.90
197
+ # Diff niche, LOW overlap -> MED reward (cross-pollination, capped < SAME_LOW min).
198
+ COLLAB_DIFF_LOW_ENG = (1.20, 1.40)
199
+ COLLAB_DIFF_LOW_GROWTH = (1.30, 1.55)
200
+ # Diff niche, HIGH overlap -> LOW reward (mismatch).
201
+ COLLAB_DIFF_HIGH_ENG = 0.75
202
+ COLLAB_DIFF_HIGH_GROWTH = 0.80
203
+
204
+ # Interaction (likes/comments/replies) tunables
205
+ INTERACT_ENERGY_LIKE = 0.005
206
+ INTERACT_ENERGY_COMMENT = 0.012
207
+ INTERACT_ENERGY_REPLY = 0.018
208
+ INTERACT_HEALTHY_LIKES = (5, 20)
209
+ INTERACT_HEALTHY_COMMENTS = (3, 10)
210
+ INTERACT_LIKE_REACH_BUFF = 0.04
211
+ INTERACT_COMMENT_REACH_BUFF = 0.08
212
+ INTERACT_REPLY_REWARD_PER = 0.01
213
+ INTERACT_REPLY_REWARD_CAP = 0.15
214
+ INTERACT_DAILY_REWARD_CAP = 0.15
215
+ INTERACT_SPAM_LIKES = 30
216
+ INTERACT_SPAM_COMMENTS = 20
217
+ INTERACT_SPAM_REACH_PENALTY = 0.85
218
+ INTERACT_SPAM_SHADOWBAN_BUMP = 0.20
219
+ INTERACT_IGNORE_THRESHOLD_K = 0.05
220
+ INTERACT_IGNORE_LOYALTY_DECAY = 0.97
221
+ INTERACT_OFFNICHE_THRESHOLD = 0.60
222
+ INTERACT_OFFNICHE_REACH_PENALTY = 0.90
223
+ INTERACT_LOWQ_THRESHOLD = 0.30
224
+ INTERACT_LOWQ_WEIGHT = 0.4
225
+ INTERACT_VERY_LOWQ_THRESHOLD = 0.10
226
+ INTERACT_VERY_LOWQ_PENALTY = -0.03
227
+
228
  API_BUDGET_INITIAL = 10**9 # effectively unlimited; rate-limit removed
229
 
230
  # Heuristic baselines for headline metric `vs_baseline_pct`.
 
304
  "parameters": {"scheduled_actions": {"type": "array"}},
305
  },
306
  "query_creator_pool": {
307
+ "description": "List available competitor archetypes for potential collaboration with audience overlap %, niche match, mocked follower counts, intersection size, and a recommendation flag (recommended=False when guardrails block: zero followers, intersection<10%, or follower-size gap>25%).",
308
  "parameters": {},
309
  },
310
  "propose_collab": {
311
+ "description": "Propose a collab post with a competitor at a specific hour. The post you schedule at that hour will be co-authored. Reward shaping: same-niche + low overlap = HIGH; same-niche + high overlap = LOW; diff-niche always capped below same-niche-low. Guardrail violations apply a 0.7x engagement / 0.6x growth penalty AND surface in the JudgeReport.",
312
  "parameters": {
313
  "partner_id": {"type": "string"},
314
  "content_type": {"type": "string", "enum": ["reel", "story", "carousel", "text_post"]},
315
  "hour": {"type": "integer", "minimum": 0, "maximum": 23},
316
  },
317
  },
318
+ "query_interaction_norms": {
319
+ "description": "Discover healthy daily ranges for likes/comments/replies and the current shadowban_risk. Use before submitting ViraltestAction.interactions.",
320
+ "parameters": {},
321
+ },
322
  }
323
 
324
 
 
362
  self._collabs_this_month = 0
363
  self._collab_history: List[str] = []
364
  self._active_collab: Optional[CollabProposal] = None
365
+ self._collab_violations: List[str] = [] # collab guardrail breaches this step
366
+ self._user_niche: str = _NICHE_BY_ARCHETYPE.get("user_creator", "generic")
367
+
368
+ # Interaction state
369
+ self._pending_reach_mult: float = 1.0 # applied to next day's posts (one-shot)
370
+ self._shadowban_risk: float = 0.0
371
+ self._engagement_rate_loyalty_mult: float = 1.0 # compounding loyalty drop from ignoring audience
372
+ self._interaction_violations: List[str] = []
373
+ self._last_interaction_summary: Optional[Dict[str, Any]] = None
374
  self._low_energy_days = 0
375
  self._total_posts_this_week = 0
376
  self._week_start_day = 0
 
552
 
553
  return daily_fatigue * weekly_mult
554
 
555
+ # ----- collab evaluation (niche-aware, overlap-tiered) -----
556
 
557
  def _user_partner_overlap(self, partner_id: str) -> Optional[float]:
558
  ids = _OVERLAP_DATA.get("archetype_ids", [])
 
562
  p = ids.index(partner_id)
563
  return _OVERLAP_DATA["matrix"][u][p]
564
 
565
+ def _partner_niche(self, partner_id: str) -> str:
566
+ return _NICHE_BY_ARCHETYPE.get(partner_id, "generic")
567
+
568
+ def _partner_followers(self, partner_id: str) -> int:
569
+ return _FOLLOWERS_BY_ARCHETYPE.get(partner_id, 0)
570
+
571
+ @staticmethod
572
+ def _interp(span: Tuple[float, float], t: float) -> float:
573
+ """Linear interp from span[0] (t=0) to span[1] (t=1)."""
574
+ t = max(0.0, min(1.0, t))
575
+ return span[0] + (span[1] - span[0]) * t
576
+
577
+ def _collab_tier_multipliers(self, same_niche: bool, overlap: float) -> Tuple[float, float]:
578
+ """Pure 2x2 tier shaping (no fatigue/repeat/guardrail effects yet)."""
579
+ # Smooth interp factor: how "low" is this overlap on the [0, LOW_THRESHOLD] scale.
580
+ low_t = 1.0 - min(1.0, overlap / COLLAB_LOW_OVERLAP_THRESHOLD) # 1 at overlap=0, 0 at threshold
581
+ if same_niche:
582
+ if overlap < COLLAB_LOW_OVERLAP_THRESHOLD:
583
+ eng = self._interp(COLLAB_SAME_LOW_ENG, low_t)
584
+ growth = self._interp(COLLAB_SAME_LOW_GROWTH, low_t)
585
+ elif overlap >= COLLAB_HIGH_OVERLAP_THRESHOLD:
586
+ eng = COLLAB_SAME_HIGH_ENG
587
+ growth = COLLAB_SAME_HIGH_GROWTH
588
+ else:
589
+ # Mid-band linear interpolation between LOW endpoint (overlap=LOW_TH) and HIGH endpoint (overlap=HIGH_TH).
590
+ mid_t = (overlap - COLLAB_LOW_OVERLAP_THRESHOLD) / (COLLAB_HIGH_OVERLAP_THRESHOLD - COLLAB_LOW_OVERLAP_THRESHOLD)
591
+ eng = self._interp((COLLAB_SAME_LOW_ENG[0], COLLAB_SAME_HIGH_ENG), mid_t)
592
+ growth = self._interp((COLLAB_SAME_LOW_GROWTH[0], COLLAB_SAME_HIGH_GROWTH), mid_t)
593
+ else:
594
+ if overlap < COLLAB_LOW_OVERLAP_THRESHOLD:
595
+ eng = self._interp(COLLAB_DIFF_LOW_ENG, low_t)
596
+ growth = self._interp(COLLAB_DIFF_LOW_GROWTH, low_t)
597
+ elif overlap >= COLLAB_HIGH_OVERLAP_THRESHOLD:
598
+ eng = COLLAB_DIFF_HIGH_ENG
599
+ growth = COLLAB_DIFF_HIGH_GROWTH
600
+ else:
601
+ mid_t = (overlap - COLLAB_LOW_OVERLAP_THRESHOLD) / (COLLAB_HIGH_OVERLAP_THRESHOLD - COLLAB_LOW_OVERLAP_THRESHOLD)
602
+ eng = self._interp((COLLAB_DIFF_LOW_ENG[0], COLLAB_DIFF_HIGH_ENG), mid_t)
603
+ growth = self._interp((COLLAB_DIFF_LOW_GROWTH[0], COLLAB_DIFF_HIGH_GROWTH), mid_t)
604
+ # Hard rule: diff-niche must always be < same-niche-low minimum (cap just below).
605
+ eng = min(eng, COLLAB_SAME_LOW_ENG[0] - 0.01)
606
+ growth = min(growth, COLLAB_SAME_LOW_GROWTH[0] - 0.01)
607
+ return eng, growth
608
+
609
+ def _collab_evaluation(self, partner_id: str) -> Dict[str, Any]:
610
+ """Single source of truth: tier reward + guardrails + final multipliers (after fatigue/repeat).
611
+
612
+ Returns a dict consumable by both query_creator_pool (for recommendation surface)
613
+ and _process_hour_action (for applied multipliers).
614
+ """
615
+ overlap = self._user_partner_overlap(partner_id)
616
+ if overlap is None:
617
+ return {
618
+ "partner_id": partner_id,
619
+ "overlap": None,
620
+ "same_niche": False,
621
+ "partner_followers": 0,
622
+ "user_followers": self._followers,
623
+ "follower_gap_pct": 1.0,
624
+ "intersection_size": 0,
625
+ "recommended": False,
626
+ "reason": "unknown_partner",
627
+ "tier_eng_mult": 1.0,
628
+ "tier_growth_mult": 1.0,
629
+ "eng_mult": 1.0,
630
+ "growth_mult": 1.0,
631
+ }
632
+
633
+ partner_niche = self._partner_niche(partner_id)
634
+ same_niche = partner_niche == self._user_niche
635
+ partner_followers = self._partner_followers(partner_id)
636
+ user_followers = max(0, int(self._followers))
637
+ denom = max(1, max(partner_followers, user_followers))
638
+ gap_pct = abs(partner_followers - user_followers) / denom if denom else 1.0
639
+
640
+ # Mock intersection size via Jaccard inversion: union ≈ (|A|+|B|)/(1+overlap), intersection = overlap*union.
641
+ union_approx = (partner_followers + user_followers) / (1.0 + overlap) if overlap >= 0 else 0.0
642
+ intersection_size = int(round(overlap * union_approx))
643
+
644
+ # Guardrails (in priority order)
645
+ recommended = True
646
+ reason: Optional[str] = None
647
+ if partner_followers <= 0:
648
+ recommended = False
649
+ reason = "partner_zero_followers"
650
+ elif overlap < COLLAB_GUARDRAIL_OVERLAP_MIN:
651
+ recommended = False
652
+ reason = "intersection_below_10pct"
653
+ elif gap_pct > COLLAB_GUARDRAIL_FOLLOWER_GAP_MAX:
654
+ recommended = False
655
+ reason = "follower_size_mismatch"
656
+
657
+ tier_eng, tier_growth = self._collab_tier_multipliers(same_niche, overlap)
658
+
659
+ eng_mult = tier_eng
660
+ growth_mult = tier_growth
661
+
662
+ # Repeat-partner discount (existing behavior preserved).
663
  if partner_id in self._collab_history[:-1]:
664
+ eng_mult *= COLLAB_PARTNER_REPEAT_PENALTY
665
+ growth_mult *= COLLAB_PARTNER_REPEAT_PENALTY
666
+
667
+ # Diminishing returns across the episode (Cen 2024).
668
  prior = max(0, self._collabs_this_month - 1)
669
  fatigue = 1.0 / (1.0 + COLLAB_FATIGUE_K * prior)
670
+ eng_mult *= fatigue
671
+ growth_mult *= fatigue
672
+
673
+ return {
674
+ "partner_id": partner_id,
675
+ "overlap": round(overlap, 3),
676
+ "same_niche": same_niche,
677
+ "partner_niche": partner_niche,
678
+ "user_niche": self._user_niche,
679
+ "partner_followers": partner_followers,
680
+ "user_followers": user_followers,
681
+ "follower_gap_pct": round(gap_pct, 3),
682
+ "intersection_size": intersection_size,
683
+ "recommended": recommended,
684
+ "reason": reason,
685
+ "tier_eng_mult": round(tier_eng, 3),
686
+ "tier_growth_mult": round(tier_growth, 3),
687
+ "eng_mult": round(eng_mult, 3),
688
+ "growth_mult": round(growth_mult, 3),
689
+ }
690
+
691
+ def _collab_multipliers(self, partner_id: str) -> Tuple[float, float]:
692
+ """Returns (engagement_multiplier, follower_growth_multiplier).
693
+
694
+ Applies guardrail penalties when the agent forces a non-recommended collab.
695
+ Side effect: appends to self._collab_violations for the JudgeReport.
696
+ """
697
+ ev = self._collab_evaluation(partner_id)
698
+ eng = ev["eng_mult"]
699
+ growth = ev["growth_mult"]
700
+ if not ev["recommended"]:
701
+ eng *= COLLAB_FORCED_PENALTY_ENG
702
+ growth *= COLLAB_FORCED_PENALTY_GROWTH
703
+ self._collab_violations.append(
704
+ f"collab_guardrail:{ev.get('reason', 'blocked')}@{partner_id}"
705
+ )
706
+ return eng, growth
707
+
708
+ # ----- interactions (likes/comments/replies) -----
709
+
710
+ def _process_interactions(
711
+ self, interactions: Optional[DailyInteractions]
712
+ ) -> Tuple[float, Dict[str, Any]]:
713
+ """Apply daily interaction effects: energy cost, reach buffs (next post), and 5 penalty paths.
714
+
715
+ Returns (reward_delta, summary_dict). The reward_delta is added to today's averaged reward;
716
+ reach effects propagate via self._pending_reach_mult (consumed at next _process_hour_action).
717
+ Loyalty effects propagate via self._engagement_rate_loyalty_mult (compounding).
718
+ """
719
+ # Reset reach mult for the day (default neutral); we accumulate per-day, then it's consumed
720
+ # by today's posts and any leftover carries over by simply staying at 1.0 next step.
721
+ self._pending_reach_mult = 1.0
722
+ self._interaction_violations = []
723
+
724
+ summary: Dict[str, Any] = {
725
+ "likes_on_others": 0,
726
+ "comments_on_others": 0,
727
+ "replies_to_audience": 0,
728
+ "energy_cost": 0.0,
729
+ "reach_modifier": 1.0,
730
+ "shadowban_risk": round(self._shadowban_risk, 3),
731
+ "loyalty_mult": round(self._engagement_rate_loyalty_mult, 3),
732
+ "reward_delta": 0.0,
733
+ "violations": [],
734
+ "summary": "no_interactions",
735
+ }
736
+
737
+ if interactions is None:
738
+ return 0.0, summary
739
+
740
+ likes = int(interactions.likes_on_others)
741
+ comments = int(interactions.comments_on_others)
742
+ replies = int(interactions.replies_to_audience)
743
+ targets = list(interactions.target_partner_ids or [])
744
+ quality = float(interactions.avg_reply_quality)
745
+
746
+ # 1) Energy cost (paid up front; can push creator below 0.2 -> burnout track).
747
+ energy_cost = (
748
+ INTERACT_ENERGY_LIKE * likes
749
+ + INTERACT_ENERGY_COMMENT * comments
750
+ + INTERACT_ENERGY_REPLY * replies
751
+ )
752
+ self._energy = max(0.0, self._energy - energy_cost)
753
+
754
+ # Determine off-niche share among interaction targets.
755
+ off_niche_share = 0.0
756
+ if targets:
757
+ off = 0
758
+ for tid in targets:
759
+ if self._partner_niche(tid) != self._user_niche:
760
+ off += 1
761
+ off_niche_share = off / len(targets)
762
+
763
+ # 2) Reach buffs (next post engagement multiplier) — only when on-niche and within healthy band.
764
+ on_niche_share = 1.0 - off_niche_share
765
+ reach_mult = 1.0
766
+ if on_niche_share > 0:
767
+ if INTERACT_HEALTHY_LIKES[0] <= likes <= INTERACT_HEALTHY_LIKES[1]:
768
+ reach_mult *= 1.0 + INTERACT_LIKE_REACH_BUFF * on_niche_share
769
+ if INTERACT_HEALTHY_COMMENTS[0] <= comments <= INTERACT_HEALTHY_COMMENTS[1]:
770
+ reach_mult *= 1.0 + INTERACT_COMMENT_REACH_BUFF * on_niche_share
771
+
772
+ reward_delta = 0.0
773
+
774
+ # 3) Reply reward (audience loyalty), scaled by quality.
775
+ reply_weight = INTERACT_LOWQ_WEIGHT if quality < INTERACT_LOWQ_THRESHOLD else 1.0
776
+ reply_reward = min(
777
+ INTERACT_REPLY_REWARD_CAP,
778
+ INTERACT_REPLY_REWARD_PER * replies * quality * reply_weight,
779
+ )
780
+ reward_delta += reply_reward
781
+
782
+ # 4) Penalties — each surfaces a violation string.
783
+ # 4a) Spam volume.
784
+ if likes > INTERACT_SPAM_LIKES or comments > INTERACT_SPAM_COMMENTS:
785
+ reach_mult *= INTERACT_SPAM_REACH_PENALTY
786
+ self._shadowban_risk = min(1.0, self._shadowban_risk + INTERACT_SPAM_SHADOWBAN_BUMP)
787
+ self._interaction_violations.append(
788
+ f"interaction_spam:likes={likes},comments={comments}"
789
+ )
790
+
791
+ # 4b) Off-niche heavy interaction.
792
+ if off_niche_share >= INTERACT_OFFNICHE_THRESHOLD and len(targets) >= 3:
793
+ reach_mult *= INTERACT_OFFNICHE_REACH_PENALTY
794
+ self._interaction_violations.append(
795
+ f"interaction_off_niche:share={off_niche_share:.2f}"
796
+ )
797
+
798
+ # 4c) Ignoring own audience: expected_replies = K * recent_engagement_proxy (use last day's posts)
799
+ prev_day = max(0, self._day - 1)
800
+ expected_signal = self._posts_per_day.get(prev_day, 0) # # posts yesterday as a proxy
801
+ # Multiply by a small constant so 1 post = 1 expected reply unit floor.
802
+ expected_replies = expected_signal * 1.0
803
+ if expected_replies > 0 and replies < INTERACT_IGNORE_THRESHOLD_K * expected_replies * 20:
804
+ # Compounding loyalty drop on engagement_rate, capped at 0.5x floor.
805
+ self._engagement_rate_loyalty_mult = max(
806
+ 0.5, self._engagement_rate_loyalty_mult * INTERACT_IGNORE_LOYALTY_DECAY
807
+ )
808
+ self._interaction_violations.append(
809
+ f"interaction_ignoring_own:replies={replies}"
810
+ )
811
+
812
+ # 4d) Low quality replies — already weighted; if extremely low quality, additional penalty.
813
+ if replies > 0 and quality < INTERACT_VERY_LOWQ_THRESHOLD:
814
+ reward_delta += INTERACT_VERY_LOWQ_PENALTY
815
+ self._interaction_violations.append(
816
+ f"interaction_low_quality:q={quality:.2f}"
817
+ )
818
+
819
+ # 4e) Energy: covered upstream; just record if it pushed creator into low-energy zone.
820
+ if energy_cost > 0 and self._energy < 0.2:
821
+ self._interaction_violations.append(
822
+ f"interaction_energy_drain:residual_energy={self._energy:.2f}"
823
+ )
824
+
825
+ # Cap daily reward_delta to avoid blowing past the per-step [0,1] reward envelope.
826
+ reward_delta = max(-INTERACT_DAILY_REWARD_CAP, min(INTERACT_DAILY_REWARD_CAP, reward_delta))
827
+
828
+ # Persist computed reach_mult so today's hourly posts pick it up.
829
+ self._pending_reach_mult = max(0.5, reach_mult)
830
+
831
+ # Decay shadowban_risk slightly on quiet days (0 likes & 0 comments).
832
+ if likes == 0 and comments == 0:
833
+ self._shadowban_risk = max(0.0, self._shadowban_risk - 0.05)
834
+
835
+ summary.update({
836
+ "likes_on_others": likes,
837
+ "comments_on_others": comments,
838
+ "replies_to_audience": replies,
839
+ "energy_cost": round(energy_cost, 4),
840
+ "reach_modifier": round(self._pending_reach_mult, 3),
841
+ "shadowban_risk": round(self._shadowban_risk, 3),
842
+ "loyalty_mult": round(self._engagement_rate_loyalty_mult, 3),
843
+ "off_niche_share": round(off_niche_share, 2),
844
+ "reward_delta": round(reward_delta, 4),
845
+ "violations": list(self._interaction_violations),
846
+ "summary": (
847
+ "spam" if likes > INTERACT_SPAM_LIKES or comments > INTERACT_SPAM_COMMENTS
848
+ else "off_niche" if off_niche_share >= INTERACT_OFFNICHE_THRESHOLD and len(targets) >= 3
849
+ else "low_quality" if replies > 0 and quality < INTERACT_VERY_LOWQ_THRESHOLD
850
+ else "ignoring_own" if expected_replies > 0 and replies < INTERACT_IGNORE_THRESHOLD_K * expected_replies * 20
851
+ else "healthy" if reward_delta > 0 or reach_mult > 1.0
852
+ else "neutral"
853
+ ),
854
+ })
855
+ return reward_delta, summary
856
 
857
  # ----- engagement signals (Mosseri-aligned) -----
858
 
 
939
  elif tool.name == "query_creator_pool":
940
  pool = []
941
  for comp in self._competitors:
942
+ ev = self._collab_evaluation(comp.id)
943
  pool.append({
944
+ "id": comp.id,
945
+ "name": comp.name,
946
+ "niche": comp.niche,
947
+ "audience_overlap": ev.get("overlap"),
948
+ "mock_followers": ev.get("partner_followers"),
949
+ "intersection_size": ev.get("intersection_size"),
950
+ "same_niche": ev.get("same_niche"),
951
+ "follower_gap_pct": ev.get("follower_gap_pct"),
952
+ "recommended": ev.get("recommended"),
953
+ "reason": ev.get("reason"),
954
+ "expected_eng_mult": ev.get("eng_mult"),
955
+ "expected_growth_mult": ev.get("growth_mult"),
956
  })
957
+ return ToolResult(
958
+ name=tool.name,
959
+ data={
960
+ "user_niche": self._user_niche,
961
+ "user_followers": int(self._followers),
962
+ "pool": pool,
963
+ },
964
+ budget_remaining=self._api_budget,
965
+ )
966
 
967
  elif tool.name == "propose_collab":
968
  partner_id = tool.arguments.get("partner_id", "")
969
  if partner_id not in [c.id for c in self._competitors]:
970
  return ToolResult(name=tool.name, success=False, error=f"unknown partner: {partner_id}", budget_remaining=self._api_budget)
971
+ ev = self._collab_evaluation(partner_id)
972
+ return ToolResult(
973
+ name=tool.name,
974
+ data={
975
+ "status": "proposal_accepted" if ev["recommended"] else "proposal_accepted_with_warning",
976
+ "partner_id": partner_id,
977
+ "recommended": ev["recommended"],
978
+ "reason": ev["reason"],
979
+ "same_niche": ev["same_niche"],
980
+ "audience_overlap": ev["overlap"],
981
+ "intersection_size": ev["intersection_size"],
982
+ "expected_eng_mult": ev["eng_mult"],
983
+ "expected_growth_mult": ev["growth_mult"],
984
+ },
985
+ budget_remaining=self._api_budget,
986
+ )
987
+
988
+ elif tool.name == "query_interaction_norms":
989
+ return ToolResult(
990
+ name=tool.name,
991
+ data={
992
+ "healthy_likes_per_day": list(INTERACT_HEALTHY_LIKES),
993
+ "healthy_comments_per_day": list(INTERACT_HEALTHY_COMMENTS),
994
+ "spam_threshold_likes": INTERACT_SPAM_LIKES,
995
+ "spam_threshold_comments": INTERACT_SPAM_COMMENTS,
996
+ "off_niche_share_max": INTERACT_OFFNICHE_THRESHOLD,
997
+ "min_reply_quality": INTERACT_LOWQ_THRESHOLD,
998
+ "current_shadowban_risk": round(self._shadowban_risk, 3),
999
+ "user_niche": self._user_niche,
1000
+ "expected_replies_per_unit_engagement": INTERACT_IGNORE_THRESHOLD_K,
1001
+ },
1002
+ budget_remaining=self._api_budget,
1003
+ )
1004
 
1005
  return ToolResult(name=tool.name, success=False, error=f"unknown tool: {tool.name}", budget_remaining=self._api_budget)
1006
 
 
1057
  if self._hours_since_sleep > 22:
1058
  violations.append(f"sleep_debt: {self._hours_since_sleep}h awake (Van Dongen 2003)")
1059
  pc -= 0.10
1060
+ # Collab guardrail breaches surfaced by _collab_multipliers (forced past block).
1061
+ for v in self._collab_violations:
1062
+ violations.append(v)
1063
+ pc -= 0.10
1064
+ # Interaction system violations (spam/off-niche/ignoring/low-quality/energy-drain).
1065
+ for v in self._interaction_violations:
1066
+ violations.append(v)
1067
+ pc -= 0.10
1068
 
1069
  burnout_pressure = (1.0 - energy_min) * 0.4 + self._sleep_debt * 0.3 + (self._low_energy_days / 5.0) * 0.3
1070
  sustainability_risk = max(0.0, min(1.0, burnout_pressure))
 
1129
  self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
1130
  self._init_state()
1131
 
1132
+ # Optional user-niche override (for collab same/diff niche scenarios).
1133
+ user_niche_override = kwargs.get("user_niche")
1134
+ if user_niche_override:
1135
+ self._user_niche = str(user_niche_override)
1136
+
1137
  self._shift_label = kwargs.get("shift_label")
1138
  self._chain_id = kwargs.get("episode_chain_id")
1139
 
 
1171
 
1172
  # Process collab proposal (no hard cap; diminishing returns enforced via _collab_multipliers)
1173
  self._active_collab = None
1174
+ self._collab_violations = []
1175
  if action.collab:
1176
  self._collabs_this_month += 1
1177
  self._collab_history.append(action.collab.partner_id)
1178
  self._active_collab = action.collab
1179
 
1180
+ # Process interactions BEFORE the day's hourly loop so energy cost and reach buffs/penalties
1181
+ # influence the same day's posts.
1182
+ interaction_reward, interaction_summary = self._process_interactions(action.interactions)
1183
+
1184
  # Validate scheduled actions
1185
  schedule: Dict[int, ScheduledAction] = {}
1186
  errors: List[str] = []
 
1247
  if 1 <= self._posts_per_day.get(prev_day, 0) <= 2:
1248
  self._days_with_good_posts.add(prev_day)
1249
 
1250
+ # Apply ignored-audience compounding loyalty multiplier into the per-day reward.
1251
+ avg_reward = (daily_reward / 24.0) + interaction_reward
1252
+ avg_reward = max(0.0, min(1.0, avg_reward))
1253
  error_str = "; ".join(errors) if errors else None
1254
 
1255
+ # Finalize this step's interaction summary on the obs.
1256
+ self._last_interaction_summary = interaction_summary
1257
+
1258
  done = self._state.step_count >= TASK_HORIZON or self._energy <= 0.0
1259
  coach = self._compute_coach_feedback(daily_engagement)
1260
  judge = self._compute_judge_report(action, daily_engagement, daily_posts, energy_min, errors)
 
1279
  daily_posts_made=daily_posts, daily_energy_min=energy_min,
1280
  tool_results=tool_results, engagement_signals=daily_signals,
1281
  coach_feedback=coach, judge_report=judge, headline_metrics=headline,
1282
+ interaction_metrics=interaction_summary,
1283
  )
1284
  return self._final_observation
1285
 
 
1289
  daily_posts_made=daily_posts, daily_energy_min=energy_min,
1290
  tool_results=tool_results, engagement_signals=daily_signals,
1291
  coach_feedback=coach, judge_report=judge,
1292
+ interaction_metrics=interaction_summary,
1293
  )
1294
 
1295
  def _process_hour_action(self, sa: ScheduledAction) -> Tuple[float, float, Optional[EngagementSignals]]:
 
1335
  * niche_mult * saturation_factor
1336
  )
1337
 
1338
+ # Interaction-driven reach modifier (set by _process_interactions earlier this step).
1339
+ # Multiplicative on engagement; capped at 0.5 floor inside _process_interactions.
1340
+ engagement *= getattr(self, "_pending_reach_mult", 1.0)
1341
+
1342
  if self._active_collab is not None and self._active_collab.hour == sa.hour:
1343
  eng_m, growth_m = self._collab_multipliers(self._active_collab.partner_id)
1344
  engagement *= eng_m
 
1522
  coach_feedback: Optional[Dict[str, Any]] = None,
1523
  judge_report: Optional[JudgeReport] = None,
1524
  headline_metrics: Optional[HeadlineMetrics] = None,
1525
+ interaction_metrics: Optional[Dict[str, Any]] = None,
1526
  ) -> ViraltestObservation:
1527
  recent_eng = self._engagement_history[-10:] if self._engagement_history else []
1528
  eng_rate = sum(recent_eng) / len(recent_eng) if recent_eng else 0.0
1529
+ eng_rate *= getattr(self, "_engagement_rate_loyalty_mult", 1.0)
1530
 
1531
  meta: Dict[str, Any] = {"step": self._state.step_count, "task": self._task}
1532
  if grader_score is not None:
 
1576
  done=done,
1577
  reward=round(reward, 4),
1578
  metadata=meta,
1579
+ interaction_metrics=interaction_metrics,
1580
  )
1581
 
1582
  # ----- graders (monthly) -----
test_scenarios.py CHANGED
@@ -5,9 +5,14 @@ Each step = one full day. Agent submits a sparse daily plan.
5
  """
6
 
7
  import random as stdlib_random
8
- from typing import Callable, Dict, List, Tuple
9
 
10
- from models import ScheduledAction, ViraltestAction
 
 
 
 
 
11
  from server.viraltest_environment import (
12
  TAG_POOL,
13
  ViraltestEnvironment,
@@ -22,17 +27,29 @@ _TOPICS = ["AI tools", "fitness routine", "growth hacks", "travel guide", "food
22
  _rng = stdlib_random.Random(99)
23
 
24
 
25
- def _plan(actions: list) -> ViraltestAction:
26
- return ViraltestAction(scheduled_actions=[ScheduledAction(**a) for a in actions])
 
 
 
 
 
 
 
 
27
 
28
 
29
  def run_episode(
30
  task: str,
31
  plan_fn: Callable[[Dict, int], ViraltestAction],
32
  label: str,
 
33
  ) -> float:
34
  env = ViraltestEnvironment()
35
- obs = env.reset(task=task, seed=SEED)
 
 
 
36
  obs_dict = obs.model_dump()
37
  rewards: List[float] = []
38
  min_energy = 1.0
@@ -159,16 +176,139 @@ def plan_random(obs: dict, day: int) -> ViraltestAction:
159
  return _plan(actions)
160
 
161
 
162
- SCENARIOS: List[Tuple[str, Callable, str]] = [
163
- ("Always Rest", plan_always_rest, "Zero engagement, no growth, energy stays max"),
164
- ("Spam Post", plan_spam, "Post every hour, burns out instantly"),
165
- ("Smart Agent", plan_smart, "Peak hours, trending, varied types, energy management"),
166
- ("No Rest", plan_no_rest, "Post every hour, never rests, burns out"),
167
- ("Minimal Poster", plan_minimal, "1 carousel at noon per day"),
168
- ("Tag Explorer", plan_tag_explorer, "Rotates through tag pool for max discovery"),
169
- ("Queue Optimizer", plan_queue_optimizer, "Creates content first, posts from queue"),
170
- ("Double Peak", plan_double_peak, "Posts at 9am and 3pm"),
171
- ("Random Actor", plan_random, "Random sparse actions each day"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  ]
173
 
174
 
@@ -178,16 +318,18 @@ if __name__ == "__main__":
178
  print("=" * 70)
179
  print()
180
 
181
- for scenario_name, plan_fn, description in SCENARIOS:
182
  print("=" * 70)
183
  print(f"{scenario_name}")
184
  print(f" {description}")
 
 
185
  print("=" * 70)
186
  print()
187
 
188
  for task in TASKS:
189
  _rng = stdlib_random.Random(99)
190
- run_episode(task, plan_fn, scenario_name)
191
 
192
  print()
193
 
@@ -195,15 +337,18 @@ if __name__ == "__main__":
195
  print("SUMMARY TABLE")
196
  print("=" * 70)
197
  print()
198
- print(f"{'Scenario':<30} {'Engage':>8} {'Strategic':>10} {'Competitive':>12}")
199
- print("-" * 62)
200
 
201
- for scenario_name, plan_fn, _ in SCENARIOS:
202
  scores = []
203
  for task in TASKS:
204
  _rng = stdlib_random.Random(99)
205
  env = ViraltestEnvironment()
206
- obs = env.reset(task=task, seed=SEED)
 
 
 
207
  obs_dict = obs.model_dump()
208
  for day in range(1, 31):
209
  action = plan_fn(obs_dict, day)
@@ -212,8 +357,10 @@ if __name__ == "__main__":
212
  if obs.done:
213
  break
214
  scores.append((obs.metadata or {}).get("grader_score", 0.0))
215
- print(f"{scenario_name:<30} {scores[0]:>8.4f} {scores[1]:>10.4f} {scores[2]:>12.4f}")
216
 
217
  print()
218
  print("EXPECTED: Smart/Queue/Tag Explorer should score highest.")
219
  print("Burnout agents (spam, no_rest) should score near 0 on strategic/competitive.")
 
 
 
5
  """
6
 
7
  import random as stdlib_random
8
+ from typing import Any, Callable, Dict, List, Optional, Tuple
9
 
10
+ from models import (
11
+ CollabProposal,
12
+ DailyInteractions,
13
+ ScheduledAction,
14
+ ViraltestAction,
15
+ )
16
  from server.viraltest_environment import (
17
  TAG_POOL,
18
  ViraltestEnvironment,
 
27
  _rng = stdlib_random.Random(99)
28
 
29
 
30
+ def _plan(
31
+ actions: list,
32
+ collab: Optional[CollabProposal] = None,
33
+ interactions: Optional[DailyInteractions] = None,
34
+ ) -> ViraltestAction:
35
+ return ViraltestAction(
36
+ scheduled_actions=[ScheduledAction(**a) for a in actions],
37
+ collab=collab,
38
+ interactions=interactions,
39
+ )
40
 
41
 
42
  def run_episode(
43
  task: str,
44
  plan_fn: Callable[[Dict, int], ViraltestAction],
45
  label: str,
46
+ user_niche: Optional[str] = None,
47
  ) -> float:
48
  env = ViraltestEnvironment()
49
+ reset_kwargs: Dict[str, Any] = {"task": task, "seed": SEED}
50
+ if user_niche:
51
+ reset_kwargs["user_niche"] = user_niche
52
+ obs = env.reset(**reset_kwargs)
53
  obs_dict = obs.model_dump()
54
  rewards: List[float] = []
55
  min_energy = 1.0
 
176
  return _plan(actions)
177
 
178
 
179
+ # ---------------------------------------------------------------------------
180
+ # Collab grid scenarios user_niche set on env.reset(...) by run_episode.
181
+ # Each picks a partner_id intended to land in a specific (same/diff x low/high) tier
182
+ # and proposes the collab on day 5.
183
+ # ---------------------------------------------------------------------------
184
+
185
+ def _collab_plan(day: int, partner_id: str, hour: int = 12) -> ViraltestAction:
186
+ """Daily plan that posts once and proposes a collab on days 5 and 15.
187
+
188
+ Single-post per day keeps engagement below the theoretical_max cap so collab
189
+ multipliers visibly bend the final grader score and follower count.
190
+ """
191
+ actions = [
192
+ {"hour": hour, "action_type": "post", "content_type": "reel",
193
+ "topic": "AI tools", "tags": ["ai"], "intent": "watch_bait"},
194
+ ]
195
+ collab = None
196
+ if day in (5, 15):
197
+ collab = CollabProposal(partner_id=partner_id, content_type="reel", hour=hour)
198
+ return _plan(actions, collab=collab)
199
+
200
+
201
+ def plan_collab_same_low(obs: dict, day: int) -> ViraltestAction:
202
+ # user_niche=tech, partner=b2b_thought_leader (NICHE differs but matrix overlap=0.08)
203
+ # Use niche_expert (tech) which has overlap=0.10 with user_creator => same niche, low overlap.
204
+ return _collab_plan(day, partner_id="niche_expert")
205
+
206
+
207
+ def plan_collab_same_high(obs: dict, day: int) -> ViraltestAction:
208
+ # Force same niche + high overlap by setting user_niche=lifestyle and pairing with viral_chaser (overlap=0.55).
209
+ return _collab_plan(day, partner_id="viral_chaser")
210
+
211
+
212
+ def plan_collab_diff_low(obs: dict, day: int) -> ViraltestAction:
213
+ # user_niche=tech, partner=lifestyle_blogger (overlap=0.40 — actually high), pick travel_creator overlap=0.30 instead.
214
+ return _collab_plan(day, partner_id="travel_creator")
215
+
216
+
217
+ def plan_collab_diff_high(obs: dict, day: int) -> ViraltestAction:
218
+ # user_niche=tech, partner=lifestyle_blogger (overlap=0.40, diff niche).
219
+ return _collab_plan(day, partner_id="lifestyle_blogger")
220
+
221
+
222
+ def plan_collab_blocked_zero(obs: dict, day: int) -> ViraltestAction:
223
+ # b2b_thought_leader has overlap=0.08 with user_creator -> intersection_below_10pct guardrail.
224
+ return _collab_plan(day, partner_id="b2b_thought_leader")
225
+
226
+
227
+ # ---------------------------------------------------------------------------
228
+ # Interaction scenarios — exercise the 5 penalty paths and the healthy band.
229
+ # ---------------------------------------------------------------------------
230
+
231
+ def _post_only_actions() -> list:
232
+ return [
233
+ {"hour": 12, "action_type": "post", "content_type": "reel",
234
+ "topic": "AI tools", "tags": ["ai"], "intent": "watch_bait"},
235
+ ]
236
+
237
+
238
+ def plan_interact_balanced(obs: dict, day: int) -> ViraltestAction:
239
+ interactions = DailyInteractions(
240
+ likes_on_others=12, comments_on_others=5, replies_to_audience=3,
241
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.8,
242
+ )
243
+ return _plan(_post_only_actions(), interactions=interactions)
244
+
245
+
246
+ def plan_interact_spam(obs: dict, day: int) -> ViraltestAction:
247
+ interactions = DailyInteractions(
248
+ likes_on_others=80, comments_on_others=40, replies_to_audience=0,
249
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.4,
250
+ )
251
+ return _plan(_post_only_actions(), interactions=interactions)
252
+
253
+
254
+ def plan_interact_ignoring_own(obs: dict, day: int) -> ViraltestAction:
255
+ interactions = DailyInteractions(
256
+ likes_on_others=8, comments_on_others=4, replies_to_audience=0,
257
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.6,
258
+ )
259
+ return _plan(_post_only_actions(), interactions=interactions)
260
+
261
+
262
+ def plan_interact_off_niche(obs: dict, day: int) -> ViraltestAction:
263
+ interactions = DailyInteractions(
264
+ likes_on_others=10, comments_on_others=5, replies_to_audience=2,
265
+ target_partner_ids=["food_creator", "fitness_coach", "travel_creator", "lifestyle_blogger"],
266
+ avg_reply_quality=0.7,
267
+ )
268
+ return _plan(_post_only_actions(), interactions=interactions)
269
+
270
+
271
+ def plan_interact_low_quality(obs: dict, day: int) -> ViraltestAction:
272
+ interactions = DailyInteractions(
273
+ likes_on_others=10, comments_on_others=5, replies_to_audience=8,
274
+ target_partner_ids=["niche_expert"], avg_reply_quality=0.05,
275
+ )
276
+ return _plan(_post_only_actions(), interactions=interactions)
277
+
278
+
279
+ # Scenario tuple: (label, plan_fn, description, user_niche)
280
+ SCENARIOS: List[Tuple[str, Callable, str, Optional[str]]] = [
281
+ ("Always Rest", plan_always_rest, "Zero engagement, no growth, energy stays max", None),
282
+ ("Spam Post", plan_spam, "Post every hour, burns out instantly", None),
283
+ ("Smart Agent", plan_smart, "Peak hours, trending, varied types, energy management", None),
284
+ ("No Rest", plan_no_rest, "Post every hour, never rests, burns out", None),
285
+ ("Minimal Poster", plan_minimal, "1 carousel at noon per day", None),
286
+ ("Tag Explorer", plan_tag_explorer, "Rotates through tag pool for max discovery", None),
287
+ ("Queue Optimizer", plan_queue_optimizer, "Creates content first, posts from queue", None),
288
+ ("Double Peak", plan_double_peak, "Posts at 9am and 3pm", None),
289
+ ("Random Actor", plan_random, "Random sparse actions each day", None),
290
+ # Collab grid: 2x2 same/diff niche x low/high overlap + zero-guardrail.
291
+ ("Collab Same-Niche Low Overlap", plan_collab_same_low,
292
+ "user_niche=tech + niche_expert (same niche, overlap 0.10) — should yield HIGH boost.", "tech"),
293
+ ("Collab Same-Niche High Overlap", plan_collab_same_high,
294
+ "user_niche=lifestyle + viral_chaser (same niche, overlap 0.55) — penalty path: redundant audience.", "lifestyle"),
295
+ ("Collab Diff-Niche Low Overlap", plan_collab_diff_low,
296
+ "user_niche=tech + travel_creator (diff niche, overlap 0.30) — capped below same-niche-low.", "tech"),
297
+ ("Collab Diff-Niche High Overlap", plan_collab_diff_high,
298
+ "user_niche=tech + lifestyle_blogger (diff niche, overlap 0.40) — LOW reward (mismatch).", "tech"),
299
+ ("Collab Guardrail Block", plan_collab_blocked_zero,
300
+ "user_niche=tech + b2b_thought_leader (overlap 0.08 < 10%) — guardrail trips, forced penalty applied.", "tech"),
301
+ # Interaction grid: healthy + 4 penalty paths.
302
+ ("Interact Balanced", plan_interact_balanced,
303
+ "Healthy daily likes/comments/replies on-niche.", "tech"),
304
+ ("Interact Spam", plan_interact_spam,
305
+ "80 likes + 40 comments — spam path, shadowban_risk + reach penalty.", "tech"),
306
+ ("Interact Ignoring Own", plan_interact_ignoring_own,
307
+ "Zero replies to own audience — compounding loyalty drop.", "tech"),
308
+ ("Interact Off-Niche", plan_interact_off_niche,
309
+ "All interactions targeted at non-tech creators — reach penalty.", "tech"),
310
+ ("Interact Low-Quality", plan_interact_low_quality,
311
+ "Replies with quality=0.05 — replies discounted + extra reward penalty.", "tech"),
312
  ]
313
 
314
 
 
318
  print("=" * 70)
319
  print()
320
 
321
+ for scenario_name, plan_fn, description, user_niche in SCENARIOS:
322
  print("=" * 70)
323
  print(f"{scenario_name}")
324
  print(f" {description}")
325
+ if user_niche:
326
+ print(f" user_niche={user_niche}")
327
  print("=" * 70)
328
  print()
329
 
330
  for task in TASKS:
331
  _rng = stdlib_random.Random(99)
332
+ run_episode(task, plan_fn, scenario_name, user_niche=user_niche)
333
 
334
  print()
335
 
 
337
  print("SUMMARY TABLE")
338
  print("=" * 70)
339
  print()
340
+ print(f"{'Scenario':<35} {'Engage':>8} {'Strategic':>10} {'Competitive':>12}")
341
+ print("-" * 67)
342
 
343
+ for scenario_name, plan_fn, _, user_niche in SCENARIOS:
344
  scores = []
345
  for task in TASKS:
346
  _rng = stdlib_random.Random(99)
347
  env = ViraltestEnvironment()
348
+ reset_kwargs: Dict[str, Any] = {"task": task, "seed": SEED}
349
+ if user_niche:
350
+ reset_kwargs["user_niche"] = user_niche
351
+ obs = env.reset(**reset_kwargs)
352
  obs_dict = obs.model_dump()
353
  for day in range(1, 31):
354
  action = plan_fn(obs_dict, day)
 
357
  if obs.done:
358
  break
359
  scores.append((obs.metadata or {}).get("grader_score", 0.0))
360
+ print(f"{scenario_name:<35} {scores[0]:>8.4f} {scores[1]:>10.4f} {scores[2]:>12.4f}")
361
 
362
  print()
363
  print("EXPECTED: Smart/Queue/Tag Explorer should score highest.")
364
  print("Burnout agents (spam, no_rest) should score near 0 on strategic/competitive.")
365
+ print("Collab Same-Niche Low Overlap should outperform any Diff-Niche collab.")
366
+ print("Interact Spam/Off-Niche/Ignoring/Low-Quality should underperform Balanced.")