Spaces:
Paused
Paused
Commit ·
1a2a407
1
Parent(s): f0a8734
added more scenaiors
Browse files- __init__.py +2 -0
- eval_env.py +295 -0
- models.py +30 -0
- server/app.py +126 -13
- server/data/audience_overlap_matrix.json +24 -3
- server/viraltest_environment.py +448 -24
- test_scenarios.py +169 -22
__init__.py
CHANGED
|
@@ -9,6 +9,7 @@
|
|
| 9 |
from .client import ViraltestEnv
|
| 10 |
from .models import (
|
| 11 |
CollabProposal,
|
|
|
|
| 12 |
EngagementSignals,
|
| 13 |
ScheduledAction,
|
| 14 |
ToolCall,
|
|
@@ -19,6 +20,7 @@ from .models import (
|
|
| 19 |
|
| 20 |
__all__ = [
|
| 21 |
"CollabProposal",
|
|
|
|
| 22 |
"EngagementSignals",
|
| 23 |
"ScheduledAction",
|
| 24 |
"ToolCall",
|
|
|
|
| 9 |
from .client import ViraltestEnv
|
| 10 |
from .models import (
|
| 11 |
CollabProposal,
|
| 12 |
+
DailyInteractions,
|
| 13 |
EngagementSignals,
|
| 14 |
ScheduledAction,
|
| 15 |
ToolCall,
|
|
|
|
| 20 |
|
| 21 |
__all__ = [
|
| 22 |
"CollabProposal",
|
| 23 |
+
"DailyInteractions",
|
| 24 |
"EngagementSignals",
|
| 25 |
"ScheduledAction",
|
| 26 |
"ToolCall",
|
eval_env.py
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
End-to-end evaluation of the viraltest environment after the collab + interaction expansion.
|
| 3 |
+
|
| 4 |
+
Sections
|
| 5 |
+
--------
|
| 6 |
+
A) Collab tier diagnostics
|
| 7 |
+
- Per-tier expected multipliers from `_collab_evaluation`
|
| 8 |
+
- Episode runs with varying collab cadence (1, 5, 15 collabs/episode) to show that
|
| 9 |
+
the score spread between tiers GROWS with cadence, proving the multiplier is doing
|
| 10 |
+
real work and the small diffs in the 2-collab test are just dilution.
|
| 11 |
+
B) Interaction diagnostics
|
| 12 |
+
- Each penalty path (spam, ignoring_own, off_niche, low_quality, energy_drain) fires
|
| 13 |
+
the expected violation.
|
| 14 |
+
- Healthy band lifts reach_modifier > 1.0.
|
| 15 |
+
C) Cross-cutting sanity
|
| 16 |
+
- Every scenario completes without errors, energy non-negative, judge_report present.
|
| 17 |
+
|
| 18 |
+
Run: .venv/bin/python eval_env.py
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from typing import Any, Dict, List, Optional
|
| 22 |
+
|
| 23 |
+
from models import (
|
| 24 |
+
CollabProposal,
|
| 25 |
+
DailyInteractions,
|
| 26 |
+
ScheduledAction,
|
| 27 |
+
ViraltestAction,
|
| 28 |
+
)
|
| 29 |
+
from server.viraltest_environment import ViraltestEnvironment
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
SEED = 42
|
| 33 |
+
HORIZON = 15 # TASK_HORIZON in the env
|
| 34 |
+
|
| 35 |
+
# ---------------------------------------------------------------------------
|
| 36 |
+
# Helpers
|
| 37 |
+
# ---------------------------------------------------------------------------
|
| 38 |
+
|
| 39 |
+
def _post_only(content_type: str = "reel", topic: str = "AI tools",
|
| 40 |
+
tags: Optional[List[str]] = None, intent: str = "watch_bait") -> ScheduledAction:
|
| 41 |
+
return ScheduledAction(
|
| 42 |
+
hour=12, action_type="post", content_type=content_type,
|
| 43 |
+
topic=topic, tags=tags or ["ai"], intent=intent,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _run_episode(
|
| 48 |
+
plan_fn,
|
| 49 |
+
user_niche: Optional[str] = None,
|
| 50 |
+
task: str = "monthly_competitive",
|
| 51 |
+
) -> Dict[str, Any]:
|
| 52 |
+
env = ViraltestEnvironment()
|
| 53 |
+
reset_kwargs: Dict[str, Any] = {"task": task, "seed": SEED}
|
| 54 |
+
if user_niche:
|
| 55 |
+
reset_kwargs["user_niche"] = user_niche
|
| 56 |
+
obs = env.reset(**reset_kwargs)
|
| 57 |
+
obs_dict = obs.model_dump()
|
| 58 |
+
last_obs = obs
|
| 59 |
+
judge_violations_total: List[str] = []
|
| 60 |
+
interaction_violations_total: List[str] = []
|
| 61 |
+
min_energy = 1.0
|
| 62 |
+
for day in range(1, HORIZON + 2):
|
| 63 |
+
action = plan_fn(obs_dict, day)
|
| 64 |
+
obs = env.step(action)
|
| 65 |
+
obs_dict = obs.model_dump()
|
| 66 |
+
last_obs = obs
|
| 67 |
+
min_energy = min(min_energy, obs.creator_energy)
|
| 68 |
+
if obs.judge_report:
|
| 69 |
+
judge_violations_total.extend(obs.judge_report.violations)
|
| 70 |
+
if obs.interaction_metrics:
|
| 71 |
+
interaction_violations_total.extend(obs.interaction_metrics.get("violations", []) or [])
|
| 72 |
+
if obs.done:
|
| 73 |
+
break
|
| 74 |
+
score = (last_obs.metadata or {}).get("grader_score", 0.0)
|
| 75 |
+
return {
|
| 76 |
+
"score": float(score),
|
| 77 |
+
"followers": int(last_obs.follower_count),
|
| 78 |
+
"min_energy": float(min_energy),
|
| 79 |
+
"energy": float(last_obs.creator_energy),
|
| 80 |
+
"engagement_rate": float(last_obs.engagement_rate),
|
| 81 |
+
"judge_violations": judge_violations_total,
|
| 82 |
+
"interaction_violations": interaction_violations_total,
|
| 83 |
+
"error": last_obs.error,
|
| 84 |
+
"done": last_obs.done,
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# ---------------------------------------------------------------------------
|
| 89 |
+
# A) COLLAB TIER DIAGNOSTICS
|
| 90 |
+
# ---------------------------------------------------------------------------
|
| 91 |
+
|
| 92 |
+
def section_a_collab_evaluator() -> None:
|
| 93 |
+
print("=" * 78)
|
| 94 |
+
print("A1. _collab_evaluation snapshot (user_niche=tech)")
|
| 95 |
+
print("=" * 78)
|
| 96 |
+
env = ViraltestEnvironment()
|
| 97 |
+
env.reset(task="monthly_competitive", seed=SEED, user_niche="tech")
|
| 98 |
+
fmt = "{:<22} {:>5} {:>7} {:>5} {:>5} {:>10} {:>10} {:<28}"
|
| 99 |
+
print(fmt.format("partner", "same?", "overlap", "fol", "gap%", "eng_mult", "growth", "reason/recommended"))
|
| 100 |
+
print("-" * 105)
|
| 101 |
+
for pid in [
|
| 102 |
+
"niche_expert", "viral_chaser", "lifestyle_blogger", "b2b_thought_leader",
|
| 103 |
+
"food_creator", "fitness_coach", "travel_creator",
|
| 104 |
+
]:
|
| 105 |
+
ev = env._collab_evaluation(pid)
|
| 106 |
+
rec_str = f"OK" if ev["recommended"] else f"BLOCK:{ev['reason']}"
|
| 107 |
+
print(fmt.format(
|
| 108 |
+
pid,
|
| 109 |
+
"Y" if ev["same_niche"] else "N",
|
| 110 |
+
f"{ev['overlap']:.2f}",
|
| 111 |
+
ev["partner_followers"],
|
| 112 |
+
f"{ev['follower_gap_pct']*100:.0f}%",
|
| 113 |
+
f"{ev['eng_mult']:.3f}",
|
| 114 |
+
f"{ev['growth_mult']:.3f}",
|
| 115 |
+
rec_str,
|
| 116 |
+
))
|
| 117 |
+
print()
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def make_collab_plan(partner_id: str, collab_days: List[int]):
|
| 121 |
+
"""Daily plan: single post + collab proposed on collab_days."""
|
| 122 |
+
def plan(obs: Dict[str, Any], day: int) -> ViraltestAction:
|
| 123 |
+
actions = [_post_only()]
|
| 124 |
+
collab = None
|
| 125 |
+
if day in collab_days:
|
| 126 |
+
collab = CollabProposal(partner_id=partner_id, content_type="reel", hour=12)
|
| 127 |
+
return ViraltestAction(scheduled_actions=actions, collab=collab)
|
| 128 |
+
return plan
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def section_a_collab_cadence() -> None:
|
| 132 |
+
print("=" * 78)
|
| 133 |
+
print("A2. Score spread vs collab cadence (1, 5, 15 collabs in 15-day horizon)")
|
| 134 |
+
print(" Hypothesis: more collab days -> larger gap between tiers")
|
| 135 |
+
print("=" * 78)
|
| 136 |
+
|
| 137 |
+
# Map each tier to (partner_id, user_niche) — chosen so the partner clears the
|
| 138 |
+
# follower-size guardrail (peer-tier mocked followers in the data file).
|
| 139 |
+
tiers = [
|
| 140 |
+
("Same-Niche Low", "niche_expert", "tech"),
|
| 141 |
+
("Same-Niche High", "viral_chaser", "lifestyle"), # overlap=0.55 (high)
|
| 142 |
+
("Diff-Niche Low", "food_creator", "tech"), # overlap=0.25 (mid-low)
|
| 143 |
+
("Diff-Niche High", "lifestyle_blogger", "tech"), # overlap=0.40 (boundary high)
|
| 144 |
+
("Guardrail Block", "b2b_thought_leader", "tech"), # overlap=0.08 (<10%)
|
| 145 |
+
]
|
| 146 |
+
cadences = {
|
| 147 |
+
"1 collab": [5],
|
| 148 |
+
"5 collabs": [3, 5, 7, 9, 11],
|
| 149 |
+
"15 collabs": list(range(1, 16)),
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
fmt = "{:<22} {:>10} {:>10} {:>10}"
|
| 153 |
+
print(fmt.format("Tier", *cadences.keys()))
|
| 154 |
+
print("-" * 56)
|
| 155 |
+
for label, partner_id, user_niche in tiers:
|
| 156 |
+
scores = []
|
| 157 |
+
for cad_label, days in cadences.items():
|
| 158 |
+
r = _run_episode(make_collab_plan(partner_id, days), user_niche=user_niche)
|
| 159 |
+
scores.append(f"{r['score']:.4f}")
|
| 160 |
+
print(fmt.format(label, *scores))
|
| 161 |
+
print()
|
| 162 |
+
print(" -> Same-Niche Low score should DROP slowly as you add collabs.")
|
| 163 |
+
print(" -> Same-Niche High and Diff-Niche High should DROP quickly (penalty stacks).")
|
| 164 |
+
print(" -> Spread between top and bottom should GROW with cadence.")
|
| 165 |
+
print()
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
# ---------------------------------------------------------------------------
|
| 169 |
+
# B) INTERACTION DIAGNOSTICS
|
| 170 |
+
# ---------------------------------------------------------------------------
|
| 171 |
+
|
| 172 |
+
def make_interaction_plan(interactions: DailyInteractions):
|
| 173 |
+
def plan(obs: Dict[str, Any], day: int) -> ViraltestAction:
|
| 174 |
+
return ViraltestAction(scheduled_actions=[_post_only()], interactions=interactions)
|
| 175 |
+
return plan
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def section_b_interactions() -> None:
|
| 179 |
+
print("=" * 78)
|
| 180 |
+
print("B. Interaction penalty-path matrix")
|
| 181 |
+
print("=" * 78)
|
| 182 |
+
|
| 183 |
+
cases = [
|
| 184 |
+
("healthy", DailyInteractions(
|
| 185 |
+
likes_on_others=12, comments_on_others=5, replies_to_audience=3,
|
| 186 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.8,
|
| 187 |
+
), "interaction_*", False),
|
| 188 |
+
("spam", DailyInteractions(
|
| 189 |
+
likes_on_others=80, comments_on_others=40, replies_to_audience=0,
|
| 190 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.4,
|
| 191 |
+
), "interaction_spam", True),
|
| 192 |
+
("ignoring_own", DailyInteractions(
|
| 193 |
+
likes_on_others=8, comments_on_others=4, replies_to_audience=0,
|
| 194 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.6,
|
| 195 |
+
), "interaction_ignoring_own", True),
|
| 196 |
+
("off_niche", DailyInteractions(
|
| 197 |
+
likes_on_others=10, comments_on_others=5, replies_to_audience=2,
|
| 198 |
+
target_partner_ids=["food_creator", "fitness_coach", "travel_creator", "lifestyle_blogger"],
|
| 199 |
+
avg_reply_quality=0.7,
|
| 200 |
+
), "interaction_off_niche", True),
|
| 201 |
+
("low_quality", DailyInteractions(
|
| 202 |
+
likes_on_others=10, comments_on_others=5, replies_to_audience=8,
|
| 203 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.05,
|
| 204 |
+
), "interaction_low_quality", True),
|
| 205 |
+
("energy_drain", DailyInteractions(
|
| 206 |
+
likes_on_others=200, comments_on_others=100, replies_to_audience=100,
|
| 207 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.5,
|
| 208 |
+
), "interaction_energy_drain", True),
|
| 209 |
+
]
|
| 210 |
+
|
| 211 |
+
fmt = "{:<14} {:>7} {:>9} {:>10} {:>10} {:>11} {:<12}"
|
| 212 |
+
print(fmt.format("case", "score", "followers", "min_energy", "engRate", "violations", "expect"))
|
| 213 |
+
print("-" * 80)
|
| 214 |
+
for label, interactions, expected_violation, must_fire in cases:
|
| 215 |
+
r = _run_episode(make_interaction_plan(interactions), user_niche="tech")
|
| 216 |
+
viols = r["interaction_violations"]
|
| 217 |
+
fired = any(expected_violation.replace("interaction_", "") in v for v in viols)
|
| 218 |
+
ok = "OK" if (fired == must_fire) else "FAIL"
|
| 219 |
+
# For "healthy" we expect NO interaction violations.
|
| 220 |
+
if label == "healthy":
|
| 221 |
+
ok = "OK" if not viols else "FAIL"
|
| 222 |
+
print(fmt.format(
|
| 223 |
+
label,
|
| 224 |
+
f"{r['score']:.3f}",
|
| 225 |
+
r["followers"],
|
| 226 |
+
f"{r['min_energy']:.2f}",
|
| 227 |
+
f"{r['engagement_rate']:.3f}",
|
| 228 |
+
len(viols),
|
| 229 |
+
ok,
|
| 230 |
+
))
|
| 231 |
+
print()
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
# ---------------------------------------------------------------------------
|
| 235 |
+
# C) CROSS-CUTTING SANITY
|
| 236 |
+
# ---------------------------------------------------------------------------
|
| 237 |
+
|
| 238 |
+
def section_c_sanity() -> None:
|
| 239 |
+
print("=" * 78)
|
| 240 |
+
print("C. Cross-cutting sanity (rest, post-only, smart, query_interaction_norms)")
|
| 241 |
+
print("=" * 78)
|
| 242 |
+
|
| 243 |
+
# Baselines for visual sanity
|
| 244 |
+
def plan_rest(obs: Dict[str, Any], day: int) -> ViraltestAction:
|
| 245 |
+
return ViraltestAction(scheduled_actions=[])
|
| 246 |
+
|
| 247 |
+
def plan_post1(obs: Dict[str, Any], day: int) -> ViraltestAction:
|
| 248 |
+
return ViraltestAction(scheduled_actions=[_post_only()])
|
| 249 |
+
|
| 250 |
+
def plan_post2(obs: Dict[str, Any], day: int) -> ViraltestAction:
|
| 251 |
+
return ViraltestAction(scheduled_actions=[
|
| 252 |
+
_post_only(content_type="reel", topic="AI tools"),
|
| 253 |
+
ScheduledAction(hour=19, action_type="post", content_type="carousel",
|
| 254 |
+
topic="AI tools", tags=["coding"], intent="save_bait"),
|
| 255 |
+
])
|
| 256 |
+
|
| 257 |
+
fmt = "{:<14} {:>7} {:>9} {:>8} {:>8} {:>6}"
|
| 258 |
+
print(fmt.format("baseline", "score", "followers", "energy", "engRate", "errs"))
|
| 259 |
+
print("-" * 60)
|
| 260 |
+
for label, plan_fn in [("rest", plan_rest), ("1-post", plan_post1), ("2-post", plan_post2)]:
|
| 261 |
+
r = _run_episode(plan_fn, user_niche="tech")
|
| 262 |
+
errs = "0" if not r["error"] else r["error"][:12]
|
| 263 |
+
print(fmt.format(label, f"{r['score']:.3f}", r["followers"],
|
| 264 |
+
f"{r['energy']:.2f}", f"{r['engagement_rate']:.3f}", errs))
|
| 265 |
+
print()
|
| 266 |
+
|
| 267 |
+
# Verify query_interaction_norms surfaces sensible values.
|
| 268 |
+
env = ViraltestEnvironment()
|
| 269 |
+
env.reset(task="monthly_engage", seed=SEED, user_niche="tech")
|
| 270 |
+
from models import ToolCall
|
| 271 |
+
res = env._dispatch_tool(ToolCall(name="query_interaction_norms", arguments={}))
|
| 272 |
+
print("query_interaction_norms tool ->")
|
| 273 |
+
print(f" success={res.success}, data={res.data}")
|
| 274 |
+
print()
|
| 275 |
+
|
| 276 |
+
# Verify query_creator_pool returns the recommendation surface.
|
| 277 |
+
res = env._dispatch_tool(ToolCall(name="query_creator_pool", arguments={}))
|
| 278 |
+
print("query_creator_pool tool ->")
|
| 279 |
+
print(f" user_niche={res.data['user_niche']}, user_followers={res.data['user_followers']}")
|
| 280 |
+
for p in res.data["pool"]:
|
| 281 |
+
print(f" {p['id']:<22} same_niche={p['same_niche']!s:<5} overlap={p['audience_overlap']:>4} "
|
| 282 |
+
f"recommended={p['recommended']!s:<5} reason={p['reason']}")
|
| 283 |
+
print()
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
# ---------------------------------------------------------------------------
|
| 287 |
+
# Main
|
| 288 |
+
# ---------------------------------------------------------------------------
|
| 289 |
+
|
| 290 |
+
if __name__ == "__main__":
|
| 291 |
+
section_a_collab_evaluator()
|
| 292 |
+
section_a_collab_cadence()
|
| 293 |
+
section_b_interactions()
|
| 294 |
+
section_c_sanity()
|
| 295 |
+
print("Evaluation complete.")
|
models.py
CHANGED
|
@@ -64,6 +64,28 @@ class CollabProposal(BaseModel):
|
|
| 64 |
hour: int = Field(default=12, ge=0, le=23)
|
| 65 |
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
class ViraltestAction(Action):
|
| 68 |
"""Daily plan: tool calls for discovery, then scheduled actions to commit."""
|
| 69 |
|
|
@@ -79,6 +101,10 @@ class ViraltestAction(Action):
|
|
| 79 |
default=None,
|
| 80 |
description="Optional collaboration proposal (max 2 per month)",
|
| 81 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
notes: Optional[str] = Field(
|
| 83 |
default=None,
|
| 84 |
max_length=2000,
|
|
@@ -191,6 +217,10 @@ class ViraltestObservation(Observation):
|
|
| 191 |
tool_results: List[ToolResult] = Field(default_factory=list, description="Results from tool_calls this step")
|
| 192 |
agent_notes: Optional[str] = Field(default=None, description="Echo of agent's notes from previous step")
|
| 193 |
api_budget_remaining: int = Field(default=100, ge=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
grader_score: Optional[float] = Field(default=None)
|
| 196 |
error: Optional[str] = Field(default=None)
|
|
|
|
| 64 |
hour: int = Field(default=12, ge=0, le=23)
|
| 65 |
|
| 66 |
|
| 67 |
+
class DailyInteractions(BaseModel):
|
| 68 |
+
"""Daily aggregate of creator interactions: likes, comments on others' content, and replies to own audience.
|
| 69 |
+
|
| 70 |
+
Models the comment/like/reply economy. Healthy interaction in moderation rewards reach;
|
| 71 |
+
spam, audience-ignoring, off-niche, and low-quality patterns are penalized.
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
likes_on_others: int = Field(default=0, ge=0, le=200, description="Likes given on other creators' posts today")
|
| 75 |
+
comments_on_others: int = Field(default=0, ge=0, le=100, description="Comments left on other creators' posts today")
|
| 76 |
+
replies_to_audience: int = Field(default=0, ge=0, le=100, description="Replies to incoming comments on your own posts")
|
| 77 |
+
target_partner_ids: List[str] = Field(
|
| 78 |
+
default_factory=list,
|
| 79 |
+
description="Competitor archetype ids you interacted with today (used for off-niche detection)",
|
| 80 |
+
)
|
| 81 |
+
avg_reply_quality: float = Field(
|
| 82 |
+
default=0.6,
|
| 83 |
+
ge=0.0,
|
| 84 |
+
le=1.0,
|
| 85 |
+
description="Self-rated effort/depth of replies (0=one-word, 1=substantive)",
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
class ViraltestAction(Action):
|
| 90 |
"""Daily plan: tool calls for discovery, then scheduled actions to commit."""
|
| 91 |
|
|
|
|
| 101 |
default=None,
|
| 102 |
description="Optional collaboration proposal (max 2 per month)",
|
| 103 |
)
|
| 104 |
+
interactions: Optional[DailyInteractions] = Field(
|
| 105 |
+
default=None,
|
| 106 |
+
description="Daily likes/comments/replies activity (community engagement layer)",
|
| 107 |
+
)
|
| 108 |
notes: Optional[str] = Field(
|
| 109 |
default=None,
|
| 110 |
max_length=2000,
|
|
|
|
| 217 |
tool_results: List[ToolResult] = Field(default_factory=list, description="Results from tool_calls this step")
|
| 218 |
agent_notes: Optional[str] = Field(default=None, description="Echo of agent's notes from previous step")
|
| 219 |
api_budget_remaining: int = Field(default=100, ge=0)
|
| 220 |
+
interaction_metrics: Optional[Dict[str, Any]] = Field(
|
| 221 |
+
default=None,
|
| 222 |
+
description="Daily interaction summary: reach modifier, shadowban_risk, and a one-line reason",
|
| 223 |
+
)
|
| 224 |
|
| 225 |
grader_score: Optional[float] = Field(default=None)
|
| 226 |
error: Optional[str] = Field(default=None)
|
server/app.py
CHANGED
|
@@ -29,10 +29,22 @@ if "ENABLE_WEB_INTERFACE" not in os.environ:
|
|
| 29 |
os.environ["ENABLE_WEB_INTERFACE"] = "true"
|
| 30 |
|
| 31 |
try:
|
| 32 |
-
from ..models import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
from .viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
|
| 34 |
except ImportError:
|
| 35 |
-
from models import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
from server.viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
|
| 37 |
|
| 38 |
try:
|
|
@@ -174,10 +186,17 @@ _CONTENT_TYPES = ["reel", "carousel", "story", "text_post"]
|
|
| 174 |
_TOPICS = ["AI tools", "fitness routine", "growth hacks", "travel guide", "food recipe", "wellness tips"]
|
| 175 |
|
| 176 |
|
| 177 |
-
def _make_daily_plan(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
return ViraltestAction(
|
| 179 |
scheduled_actions=[ScheduledAction(**a) for a in actions],
|
| 180 |
notes=notes,
|
|
|
|
|
|
|
| 181 |
)
|
| 182 |
|
| 183 |
|
|
@@ -236,12 +255,96 @@ def _plan_minimal(obs: dict, day: int) -> ViraltestAction:
|
|
| 236 |
])
|
| 237 |
|
| 238 |
|
| 239 |
-
|
| 240 |
-
"
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
}
|
| 246 |
|
| 247 |
|
|
@@ -265,9 +368,14 @@ async def dashboard_simulate(body: Dict[str, Any] = Body(...)):
|
|
| 265 |
if scenario_id not in SCENARIOS:
|
| 266 |
return {"error": f"Unknown scenario: {scenario_id}"}
|
| 267 |
|
| 268 |
-
|
|
|
|
|
|
|
| 269 |
env = ViraltestEnvironment()
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
| 271 |
obs_dict = obs.model_dump()
|
| 272 |
|
| 273 |
steps: List[Dict[str, Any]] = []
|
|
@@ -347,11 +455,16 @@ async def training_evidence():
|
|
| 347 |
global _SIM_RNG
|
| 348 |
|
| 349 |
results = []
|
| 350 |
-
for scenario_id,
|
|
|
|
|
|
|
| 351 |
for task in _TRAINING_TASKS:
|
| 352 |
_SIM_RNG = stdlib_random.Random(99)
|
| 353 |
env = ViraltestEnvironment()
|
| 354 |
-
|
|
|
|
|
|
|
|
|
|
| 355 |
obs_dict = obs.model_dump()
|
| 356 |
|
| 357 |
rewards: List[float] = []
|
|
|
|
| 29 |
os.environ["ENABLE_WEB_INTERFACE"] = "true"
|
| 30 |
|
| 31 |
try:
|
| 32 |
+
from ..models import (
|
| 33 |
+
CollabProposal,
|
| 34 |
+
DailyInteractions,
|
| 35 |
+
ScheduledAction,
|
| 36 |
+
ViraltestAction,
|
| 37 |
+
ViraltestObservation,
|
| 38 |
+
)
|
| 39 |
from .viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
|
| 40 |
except ImportError:
|
| 41 |
+
from models import (
|
| 42 |
+
CollabProposal,
|
| 43 |
+
DailyInteractions,
|
| 44 |
+
ScheduledAction,
|
| 45 |
+
ViraltestAction,
|
| 46 |
+
ViraltestObservation,
|
| 47 |
+
)
|
| 48 |
from server.viraltest_environment import TOOL_CATALOG, ViraltestEnvironment
|
| 49 |
|
| 50 |
try:
|
|
|
|
| 186 |
_TOPICS = ["AI tools", "fitness routine", "growth hacks", "travel guide", "food recipe", "wellness tips"]
|
| 187 |
|
| 188 |
|
| 189 |
+
def _make_daily_plan(
|
| 190 |
+
actions: list,
|
| 191 |
+
notes: Optional[str] = None,
|
| 192 |
+
collab: Optional[CollabProposal] = None,
|
| 193 |
+
interactions: Optional[DailyInteractions] = None,
|
| 194 |
+
) -> ViraltestAction:
|
| 195 |
return ViraltestAction(
|
| 196 |
scheduled_actions=[ScheduledAction(**a) for a in actions],
|
| 197 |
notes=notes,
|
| 198 |
+
collab=collab,
|
| 199 |
+
interactions=interactions,
|
| 200 |
)
|
| 201 |
|
| 202 |
|
|
|
|
| 255 |
])
|
| 256 |
|
| 257 |
|
| 258 |
+
def _plan_collab_same_low(obs: dict, day: int) -> ViraltestAction:
|
| 259 |
+
"""Same-niche, low-overlap collab on day 5+15 — best-case reward path."""
|
| 260 |
+
trending = (obs.get("trending_topics") or ["AI tools"])[0]
|
| 261 |
+
tags = list((obs.get("trending_tags") or [])[:2]) + ["ai"]
|
| 262 |
+
actions = [
|
| 263 |
+
{"hour": 12, "action_type": "post", "content_type": "reel",
|
| 264 |
+
"topic": trending, "tags": tags, "intent": "watch_bait"},
|
| 265 |
+
]
|
| 266 |
+
collab = None
|
| 267 |
+
if day in (5, 15):
|
| 268 |
+
collab = CollabProposal(partner_id="niche_expert", content_type="reel", hour=12)
|
| 269 |
+
return _make_daily_plan(actions, notes="Same-niche low-overlap collab demo.", collab=collab)
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def _plan_collab_diff_high(obs: dict, day: int) -> ViraltestAction:
|
| 273 |
+
"""Diff-niche, high-overlap collab — penalty path (mismatch)."""
|
| 274 |
+
trending = (obs.get("trending_topics") or ["AI tools"])[0]
|
| 275 |
+
tags = list((obs.get("trending_tags") or [])[:2]) + ["ai"]
|
| 276 |
+
actions = [
|
| 277 |
+
{"hour": 12, "action_type": "post", "content_type": "reel",
|
| 278 |
+
"topic": trending, "tags": tags, "intent": "watch_bait"},
|
| 279 |
+
]
|
| 280 |
+
collab = None
|
| 281 |
+
if day in (5, 15):
|
| 282 |
+
collab = CollabProposal(partner_id="lifestyle_blogger", content_type="reel", hour=12)
|
| 283 |
+
return _make_daily_plan(actions, notes="Diff-niche high-overlap collab demo.", collab=collab)
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
def _plan_interact_balanced(obs: dict, day: int) -> ViraltestAction:
|
| 287 |
+
"""Healthy daily interaction — likes/comments on-niche, replies to audience."""
|
| 288 |
+
trending = (obs.get("trending_topics") or ["AI tools"])[0]
|
| 289 |
+
interactions = DailyInteractions(
|
| 290 |
+
likes_on_others=12, comments_on_others=5, replies_to_audience=3,
|
| 291 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.8,
|
| 292 |
+
)
|
| 293 |
+
return _make_daily_plan(
|
| 294 |
+
[{"hour": 12, "action_type": "post", "content_type": "reel",
|
| 295 |
+
"topic": trending, "tags": ["ai"], "intent": "watch_bait"}],
|
| 296 |
+
notes="Healthy interaction demo.",
|
| 297 |
+
interactions=interactions,
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
def _plan_interact_spam(obs: dict, day: int) -> ViraltestAction:
|
| 302 |
+
"""Spam interaction — triggers shadowban_risk + reach penalty."""
|
| 303 |
+
trending = (obs.get("trending_topics") or ["AI tools"])[0]
|
| 304 |
+
interactions = DailyInteractions(
|
| 305 |
+
likes_on_others=80, comments_on_others=40, replies_to_audience=0,
|
| 306 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.4,
|
| 307 |
+
)
|
| 308 |
+
return _make_daily_plan(
|
| 309 |
+
[{"hour": 12, "action_type": "post", "content_type": "reel",
|
| 310 |
+
"topic": trending, "tags": ["ai"], "intent": "watch_bait"}],
|
| 311 |
+
notes="Interaction spam demo.",
|
| 312 |
+
interactions=interactions,
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
# Scenario tuple: (label, description, plan_fn, optional user_niche).
|
| 317 |
+
# user_niche is honored by dashboard_simulate / training_evidence; defaults to "generic" when None.
|
| 318 |
+
SCENARIOS: Dict[str, tuple] = {
|
| 319 |
+
"always_rest": ("Always Rest", "Never posts. Tests follower decay.", _plan_always_rest, None),
|
| 320 |
+
"spam": ("Spam Post", "Same reel every hour. Burns out fast.", _plan_spam, None),
|
| 321 |
+
"smart": ("Smart Agent", "Optimal: peak hours, trending, varied types+intents.", _plan_smart, None),
|
| 322 |
+
"minimal": ("Minimal Poster", "1 carousel per day at noon.", _plan_minimal, None),
|
| 323 |
+
"random": ("Random Actor", "Random actions. Baseline test.", _plan_random, None),
|
| 324 |
+
"collab_same_low": (
|
| 325 |
+
"Collab Same-Niche Low Overlap",
|
| 326 |
+
"Same-niche partner with <20% overlap. Best-case collab reward path.",
|
| 327 |
+
_plan_collab_same_low,
|
| 328 |
+
"tech",
|
| 329 |
+
),
|
| 330 |
+
"collab_diff_high": (
|
| 331 |
+
"Collab Diff-Niche High Overlap",
|
| 332 |
+
"Diff-niche partner with >40% overlap. Penalty path (audience mismatch).",
|
| 333 |
+
_plan_collab_diff_high,
|
| 334 |
+
"tech",
|
| 335 |
+
),
|
| 336 |
+
"interact_balanced": (
|
| 337 |
+
"Interact Balanced",
|
| 338 |
+
"Healthy on-niche likes/comments and audience replies.",
|
| 339 |
+
_plan_interact_balanced,
|
| 340 |
+
"tech",
|
| 341 |
+
),
|
| 342 |
+
"interact_spam": (
|
| 343 |
+
"Interact Spam",
|
| 344 |
+
"80 likes + 40 comments — spam path triggers shadowban_risk.",
|
| 345 |
+
_plan_interact_spam,
|
| 346 |
+
"tech",
|
| 347 |
+
),
|
| 348 |
}
|
| 349 |
|
| 350 |
|
|
|
|
| 368 |
if scenario_id not in SCENARIOS:
|
| 369 |
return {"error": f"Unknown scenario: {scenario_id}"}
|
| 370 |
|
| 371 |
+
entry = SCENARIOS[scenario_id]
|
| 372 |
+
label, desc, plan_fn = entry[0], entry[1], entry[2]
|
| 373 |
+
user_niche = entry[3] if len(entry) > 3 else None
|
| 374 |
env = ViraltestEnvironment()
|
| 375 |
+
reset_kwargs: Dict[str, Any] = {"task": task, "seed": 42}
|
| 376 |
+
if user_niche:
|
| 377 |
+
reset_kwargs["user_niche"] = user_niche
|
| 378 |
+
obs = env.reset(**reset_kwargs)
|
| 379 |
obs_dict = obs.model_dump()
|
| 380 |
|
| 381 |
steps: List[Dict[str, Any]] = []
|
|
|
|
| 455 |
global _SIM_RNG
|
| 456 |
|
| 457 |
results = []
|
| 458 |
+
for scenario_id, entry in SCENARIOS.items():
|
| 459 |
+
label, desc, plan_fn = entry[0], entry[1], entry[2]
|
| 460 |
+
user_niche = entry[3] if len(entry) > 3 else None
|
| 461 |
for task in _TRAINING_TASKS:
|
| 462 |
_SIM_RNG = stdlib_random.Random(99)
|
| 463 |
env = ViraltestEnvironment()
|
| 464 |
+
reset_kwargs: Dict[str, Any] = {"task": task, "seed": 42}
|
| 465 |
+
if user_niche:
|
| 466 |
+
reset_kwargs["user_niche"] = user_niche
|
| 467 |
+
obs = env.reset(**reset_kwargs)
|
| 468 |
obs_dict = obs.model_dump()
|
| 469 |
|
| 470 |
rewards: List[float] = []
|
server/data/audience_overlap_matrix.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"_meta": {
|
| 3 |
-
"description": "8x8 symmetric audience overlap matrix between competitor archetypes and the user creator. Values 0.0-1.0 represent fraction of shared audience. Used by propose_collab to compute collab reward multipliers and by query_creator_pool to expose overlap to the agent. Same-niche pairs ~0.4-0.65, cross-niche ~0.05-0.20.",
|
| 4 |
-
"source": "Competitor pairs estimated from Rival IQ 2025 cross-industry overlap patterns + niche proximity heuristic. user_creator row tuned to a generic micro-creator (no locked niche): broad mass-market partners (lifestyle_blogger, viral_chaser) score highest; specialist partners (b2b_thought_leader, niche_expert) score lowest."
|
|
|
|
| 5 |
},
|
| 6 |
"archetype_ids": ["niche_expert", "viral_chaser", "lifestyle_blogger", "b2b_thought_leader", "food_creator", "fitness_coach", "travel_creator", "user_creator"],
|
| 7 |
"matrix": [
|
|
@@ -13,5 +14,25 @@
|
|
| 13 |
[0.10, 0.25, 0.35, 0.10, 0.45, 1.00, 0.30, 0.28],
|
| 14 |
[0.15, 0.30, 0.40, 0.12, 0.35, 0.30, 1.00, 0.30],
|
| 15 |
[0.10, 0.35, 0.40, 0.08, 0.25, 0.28, 0.30, 1.00]
|
| 16 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"_meta": {
|
| 3 |
+
"description": "8x8 symmetric audience overlap matrix between competitor archetypes and the user creator. Values 0.0-1.0 represent fraction of shared audience (Jaccard intersection fraction). Used by propose_collab to compute collab reward multipliers and by query_creator_pool to expose overlap to the agent. Same-niche pairs ~0.4-0.65, cross-niche ~0.05-0.20.",
|
| 4 |
+
"source": "Competitor pairs estimated from Rival IQ 2025 cross-industry overlap patterns + niche proximity heuristic. user_creator row tuned to a generic micro-creator (no locked niche): broad mass-market partners (lifestyle_blogger, viral_chaser) score highest; specialist partners (b2b_thought_leader, niche_expert) score lowest.",
|
| 5 |
+
"mock_followers_note": "Mocked follower counts span tiers from micro (10k user) to mid (250k viral_chaser). Used to derive intersection size via Jaccard inversion: |A intersect B| = overlap * (|A| + |B|) / (1 + overlap)."
|
| 6 |
},
|
| 7 |
"archetype_ids": ["niche_expert", "viral_chaser", "lifestyle_blogger", "b2b_thought_leader", "food_creator", "fitness_coach", "travel_creator", "user_creator"],
|
| 8 |
"matrix": [
|
|
|
|
| 14 |
[0.10, 0.25, 0.35, 0.10, 0.45, 1.00, 0.30, 0.28],
|
| 15 |
[0.15, 0.30, 0.40, 0.12, 0.35, 0.30, 1.00, 0.30],
|
| 16 |
[0.10, 0.35, 0.40, 0.08, 0.25, 0.28, 0.30, 1.00]
|
| 17 |
+
],
|
| 18 |
+
"niche_by_archetype": {
|
| 19 |
+
"niche_expert": "tech",
|
| 20 |
+
"viral_chaser": "lifestyle",
|
| 21 |
+
"lifestyle_blogger": "lifestyle",
|
| 22 |
+
"b2b_thought_leader": "business",
|
| 23 |
+
"food_creator": "food",
|
| 24 |
+
"fitness_coach": "fitness",
|
| 25 |
+
"travel_creator": "travel",
|
| 26 |
+
"user_creator": "generic"
|
| 27 |
+
},
|
| 28 |
+
"mock_followers_by_archetype": {
|
| 29 |
+
"niche_expert": 12000,
|
| 30 |
+
"viral_chaser": 250000,
|
| 31 |
+
"lifestyle_blogger": 11000,
|
| 32 |
+
"b2b_thought_leader": 9000,
|
| 33 |
+
"food_creator": 12000,
|
| 34 |
+
"fitness_coach": 8000,
|
| 35 |
+
"travel_creator": 11000,
|
| 36 |
+
"user_creator": 10000
|
| 37 |
+
}
|
| 38 |
}
|
server/viraltest_environment.py
CHANGED
|
@@ -26,6 +26,7 @@ from openenv.core.env_server.types import State
|
|
| 26 |
try:
|
| 27 |
from ..models import (
|
| 28 |
CollabProposal,
|
|
|
|
| 29 |
EngagementSignals,
|
| 30 |
HeadlineMetrics,
|
| 31 |
JudgeReport,
|
|
@@ -38,6 +39,7 @@ try:
|
|
| 38 |
except ImportError:
|
| 39 |
from models import (
|
| 40 |
CollabProposal,
|
|
|
|
| 41 |
EngagementSignals,
|
| 42 |
HeadlineMetrics,
|
| 43 |
JudgeReport,
|
|
@@ -88,6 +90,13 @@ _HEATMAP_GRID: Dict[int, List[float]] = {
|
|
| 88 |
int(k): v for k, v in _HEATMAP_DATA.get("grid", {}).items()
|
| 89 |
}
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
# ---------------------------------------------------------------------------
|
| 92 |
# Constants (research-backed, Tier 1-3 sources)
|
| 93 |
# ---------------------------------------------------------------------------
|
|
@@ -166,12 +175,56 @@ TREND_DEFAULT_HALFLIFE_HOURS = 60
|
|
| 166 |
TREND_MATCH_STOPWORDS = {"tips", "guide", "review", "routine", "ideas", "hacks", "tutorial", "the", "a", "an", "and", "of", "for", "to"}
|
| 167 |
# Collab reward shaping (Later 2023 reach study, HypeAuditor 2024 niche affinity, Rival IQ 2025 overlap patterns,
|
| 168 |
# Cen et al. 2024 disengagement model for diminishing returns instead of a hard cap).
|
| 169 |
-
COLLAB_REACH_K = 0.60 # cross-audience exposure: capped reach uplift when overlap is 0
|
| 170 |
-
COLLAB_AFFINITY_K = 0.30 # same-audience affinity: per-impression engagement uplift when overlap is 1
|
| 171 |
-
COLLAB_GROWTH_K = 1.50 # cross-pollination follower spillover, scales (1 - overlap)
|
| 172 |
COLLAB_PARTNER_REPEAT_PENALTY = 0.7 # discount on multipliers when partner reused this brand
|
| 173 |
COLLAB_FATIGUE_K = 0.3 # per-collab diminishing-returns factor: 1/(1+K*prior_collabs_this_episode)
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
API_BUDGET_INITIAL = 10**9 # effectively unlimited; rate-limit removed
|
| 176 |
|
| 177 |
# Heuristic baselines for headline metric `vs_baseline_pct`.
|
|
@@ -251,17 +304,21 @@ TOOL_CATALOG = {
|
|
| 251 |
"parameters": {"scheduled_actions": {"type": "array"}},
|
| 252 |
},
|
| 253 |
"query_creator_pool": {
|
| 254 |
-
"description": "List available competitor archetypes for potential collaboration
|
| 255 |
"parameters": {},
|
| 256 |
},
|
| 257 |
"propose_collab": {
|
| 258 |
-
"description": "Propose a collab post with a competitor at a specific hour. The post you schedule at that hour will be co-authored
|
| 259 |
"parameters": {
|
| 260 |
"partner_id": {"type": "string"},
|
| 261 |
"content_type": {"type": "string", "enum": ["reel", "story", "carousel", "text_post"]},
|
| 262 |
"hour": {"type": "integer", "minimum": 0, "maximum": 23},
|
| 263 |
},
|
| 264 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
}
|
| 266 |
|
| 267 |
|
|
@@ -305,6 +362,15 @@ class ViraltestEnvironment(Environment):
|
|
| 305 |
self._collabs_this_month = 0
|
| 306 |
self._collab_history: List[str] = []
|
| 307 |
self._active_collab: Optional[CollabProposal] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
self._low_energy_days = 0
|
| 309 |
self._total_posts_this_week = 0
|
| 310 |
self._week_start_day = 0
|
|
@@ -486,7 +552,7 @@ class ViraltestEnvironment(Environment):
|
|
| 486 |
|
| 487 |
return daily_fatigue * weekly_mult
|
| 488 |
|
| 489 |
-
# ----- collab
|
| 490 |
|
| 491 |
def _user_partner_overlap(self, partner_id: str) -> Optional[float]:
|
| 492 |
ids = _OVERLAP_DATA.get("archetype_ids", [])
|
|
@@ -496,21 +562,297 @@ class ViraltestEnvironment(Environment):
|
|
| 496 |
p = ids.index(partner_id)
|
| 497 |
return _OVERLAP_DATA["matrix"][u][p]
|
| 498 |
|
| 499 |
-
def
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 508 |
if partner_id in self._collab_history[:-1]:
|
| 509 |
-
|
| 510 |
-
|
|
|
|
|
|
|
| 511 |
prior = max(0, self._collabs_this_month - 1)
|
| 512 |
fatigue = 1.0 / (1.0 + COLLAB_FATIGUE_K * prior)
|
| 513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
|
| 515 |
# ----- engagement signals (Mosseri-aligned) -----
|
| 516 |
|
|
@@ -597,18 +939,68 @@ class ViraltestEnvironment(Environment):
|
|
| 597 |
elif tool.name == "query_creator_pool":
|
| 598 |
pool = []
|
| 599 |
for comp in self._competitors:
|
| 600 |
-
|
| 601 |
pool.append({
|
| 602 |
-
"id": comp.id,
|
| 603 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 604 |
})
|
| 605 |
-
return ToolResult(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 606 |
|
| 607 |
elif tool.name == "propose_collab":
|
| 608 |
partner_id = tool.arguments.get("partner_id", "")
|
| 609 |
if partner_id not in [c.id for c in self._competitors]:
|
| 610 |
return ToolResult(name=tool.name, success=False, error=f"unknown partner: {partner_id}", budget_remaining=self._api_budget)
|
| 611 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
|
| 613 |
return ToolResult(name=tool.name, success=False, error=f"unknown tool: {tool.name}", budget_remaining=self._api_budget)
|
| 614 |
|
|
@@ -665,6 +1057,14 @@ class ViraltestEnvironment(Environment):
|
|
| 665 |
if self._hours_since_sleep > 22:
|
| 666 |
violations.append(f"sleep_debt: {self._hours_since_sleep}h awake (Van Dongen 2003)")
|
| 667 |
pc -= 0.10
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
|
| 669 |
burnout_pressure = (1.0 - energy_min) * 0.4 + self._sleep_debt * 0.3 + (self._low_energy_days / 5.0) * 0.3
|
| 670 |
sustainability_risk = max(0.0, min(1.0, burnout_pressure))
|
|
@@ -729,6 +1129,11 @@ class ViraltestEnvironment(Environment):
|
|
| 729 |
self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
|
| 730 |
self._init_state()
|
| 731 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 732 |
self._shift_label = kwargs.get("shift_label")
|
| 733 |
self._chain_id = kwargs.get("episode_chain_id")
|
| 734 |
|
|
@@ -766,11 +1171,16 @@ class ViraltestEnvironment(Environment):
|
|
| 766 |
|
| 767 |
# Process collab proposal (no hard cap; diminishing returns enforced via _collab_multipliers)
|
| 768 |
self._active_collab = None
|
|
|
|
| 769 |
if action.collab:
|
| 770 |
self._collabs_this_month += 1
|
| 771 |
self._collab_history.append(action.collab.partner_id)
|
| 772 |
self._active_collab = action.collab
|
| 773 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 774 |
# Validate scheduled actions
|
| 775 |
schedule: Dict[int, ScheduledAction] = {}
|
| 776 |
errors: List[str] = []
|
|
@@ -837,9 +1247,14 @@ class ViraltestEnvironment(Environment):
|
|
| 837 |
if 1 <= self._posts_per_day.get(prev_day, 0) <= 2:
|
| 838 |
self._days_with_good_posts.add(prev_day)
|
| 839 |
|
| 840 |
-
|
|
|
|
|
|
|
| 841 |
error_str = "; ".join(errors) if errors else None
|
| 842 |
|
|
|
|
|
|
|
|
|
|
| 843 |
done = self._state.step_count >= TASK_HORIZON or self._energy <= 0.0
|
| 844 |
coach = self._compute_coach_feedback(daily_engagement)
|
| 845 |
judge = self._compute_judge_report(action, daily_engagement, daily_posts, energy_min, errors)
|
|
@@ -864,6 +1279,7 @@ class ViraltestEnvironment(Environment):
|
|
| 864 |
daily_posts_made=daily_posts, daily_energy_min=energy_min,
|
| 865 |
tool_results=tool_results, engagement_signals=daily_signals,
|
| 866 |
coach_feedback=coach, judge_report=judge, headline_metrics=headline,
|
|
|
|
| 867 |
)
|
| 868 |
return self._final_observation
|
| 869 |
|
|
@@ -873,6 +1289,7 @@ class ViraltestEnvironment(Environment):
|
|
| 873 |
daily_posts_made=daily_posts, daily_energy_min=energy_min,
|
| 874 |
tool_results=tool_results, engagement_signals=daily_signals,
|
| 875 |
coach_feedback=coach, judge_report=judge,
|
|
|
|
| 876 |
)
|
| 877 |
|
| 878 |
def _process_hour_action(self, sa: ScheduledAction) -> Tuple[float, float, Optional[EngagementSignals]]:
|
|
@@ -918,6 +1335,10 @@ class ViraltestEnvironment(Environment):
|
|
| 918 |
* niche_mult * saturation_factor
|
| 919 |
)
|
| 920 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 921 |
if self._active_collab is not None and self._active_collab.hour == sa.hour:
|
| 922 |
eng_m, growth_m = self._collab_multipliers(self._active_collab.partner_id)
|
| 923 |
engagement *= eng_m
|
|
@@ -1101,9 +1522,11 @@ class ViraltestEnvironment(Environment):
|
|
| 1101 |
coach_feedback: Optional[Dict[str, Any]] = None,
|
| 1102 |
judge_report: Optional[JudgeReport] = None,
|
| 1103 |
headline_metrics: Optional[HeadlineMetrics] = None,
|
|
|
|
| 1104 |
) -> ViraltestObservation:
|
| 1105 |
recent_eng = self._engagement_history[-10:] if self._engagement_history else []
|
| 1106 |
eng_rate = sum(recent_eng) / len(recent_eng) if recent_eng else 0.0
|
|
|
|
| 1107 |
|
| 1108 |
meta: Dict[str, Any] = {"step": self._state.step_count, "task": self._task}
|
| 1109 |
if grader_score is not None:
|
|
@@ -1153,6 +1576,7 @@ class ViraltestEnvironment(Environment):
|
|
| 1153 |
done=done,
|
| 1154 |
reward=round(reward, 4),
|
| 1155 |
metadata=meta,
|
|
|
|
| 1156 |
)
|
| 1157 |
|
| 1158 |
# ----- graders (monthly) -----
|
|
|
|
| 26 |
try:
|
| 27 |
from ..models import (
|
| 28 |
CollabProposal,
|
| 29 |
+
DailyInteractions,
|
| 30 |
EngagementSignals,
|
| 31 |
HeadlineMetrics,
|
| 32 |
JudgeReport,
|
|
|
|
| 39 |
except ImportError:
|
| 40 |
from models import (
|
| 41 |
CollabProposal,
|
| 42 |
+
DailyInteractions,
|
| 43 |
EngagementSignals,
|
| 44 |
HeadlineMetrics,
|
| 45 |
JudgeReport,
|
|
|
|
| 90 |
int(k): v for k, v in _HEATMAP_DATA.get("grid", {}).items()
|
| 91 |
}
|
| 92 |
|
| 93 |
+
# Mocked niche + follower-count lookups for the collab system. Live in the overlap matrix file
|
| 94 |
+
# so the same source-of-truth carries (a) Jaccard overlap, (b) niche label, (c) follower size.
|
| 95 |
+
_NICHE_BY_ARCHETYPE: Dict[str, str] = dict(_OVERLAP_DATA.get("niche_by_archetype", {}))
|
| 96 |
+
_FOLLOWERS_BY_ARCHETYPE: Dict[str, int] = {
|
| 97 |
+
k: int(v) for k, v in _OVERLAP_DATA.get("mock_followers_by_archetype", {}).items()
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
# ---------------------------------------------------------------------------
|
| 101 |
# Constants (research-backed, Tier 1-3 sources)
|
| 102 |
# ---------------------------------------------------------------------------
|
|
|
|
| 175 |
TREND_MATCH_STOPWORDS = {"tips", "guide", "review", "routine", "ideas", "hacks", "tutorial", "the", "a", "an", "and", "of", "for", "to"}
|
| 176 |
# Collab reward shaping (Later 2023 reach study, HypeAuditor 2024 niche affinity, Rival IQ 2025 overlap patterns,
|
| 177 |
# Cen et al. 2024 disengagement model for diminishing returns instead of a hard cap).
|
|
|
|
|
|
|
|
|
|
| 178 |
COLLAB_PARTNER_REPEAT_PENALTY = 0.7 # discount on multipliers when partner reused this brand
|
| 179 |
COLLAB_FATIGUE_K = 0.3 # per-collab diminishing-returns factor: 1/(1+K*prior_collabs_this_episode)
|
| 180 |
|
| 181 |
+
# Niche-aware tiered shaping (overlap = Jaccard intersection fraction).
|
| 182 |
+
# Hard rule: any diff-niche multiplier must be < the minimum same-niche-low multiplier
|
| 183 |
+
# so the env never recommends a diff-niche collab over an equal-overlap same-niche one.
|
| 184 |
+
COLLAB_LOW_OVERLAP_THRESHOLD = 0.20 # < this counts as "low intersection"
|
| 185 |
+
COLLAB_HIGH_OVERLAP_THRESHOLD = 0.40 # >= this counts as "high intersection"
|
| 186 |
+
COLLAB_GUARDRAIL_OVERLAP_MIN = 0.10 # below this -> recommended=False (intersection-too-low guardrail)
|
| 187 |
+
COLLAB_GUARDRAIL_FOLLOWER_GAP_MAX = 0.25 # |partner - user| / max > this -> follower-size mismatch
|
| 188 |
+
COLLAB_FORCED_PENALTY_ENG = 0.7 # eng_mult applied if agent ignores guardrail
|
| 189 |
+
COLLAB_FORCED_PENALTY_GROWTH = 0.6 # growth_mult applied if agent ignores guardrail
|
| 190 |
+
|
| 191 |
+
# Same niche, LOW overlap -> HIGH reward (best case). Smoothly interpolated by overlap (low->high uplift as overlap->0).
|
| 192 |
+
COLLAB_SAME_LOW_ENG = (1.50, 1.80)
|
| 193 |
+
COLLAB_SAME_LOW_GROWTH = (1.60, 2.00)
|
| 194 |
+
# Same niche, HIGH overlap -> LOW reward (no point, audience already shared).
|
| 195 |
+
COLLAB_SAME_HIGH_ENG = 0.85
|
| 196 |
+
COLLAB_SAME_HIGH_GROWTH = 0.90
|
| 197 |
+
# Diff niche, LOW overlap -> MED reward (cross-pollination, capped < SAME_LOW min).
|
| 198 |
+
COLLAB_DIFF_LOW_ENG = (1.20, 1.40)
|
| 199 |
+
COLLAB_DIFF_LOW_GROWTH = (1.30, 1.55)
|
| 200 |
+
# Diff niche, HIGH overlap -> LOW reward (mismatch).
|
| 201 |
+
COLLAB_DIFF_HIGH_ENG = 0.75
|
| 202 |
+
COLLAB_DIFF_HIGH_GROWTH = 0.80
|
| 203 |
+
|
| 204 |
+
# Interaction (likes/comments/replies) tunables
|
| 205 |
+
INTERACT_ENERGY_LIKE = 0.005
|
| 206 |
+
INTERACT_ENERGY_COMMENT = 0.012
|
| 207 |
+
INTERACT_ENERGY_REPLY = 0.018
|
| 208 |
+
INTERACT_HEALTHY_LIKES = (5, 20)
|
| 209 |
+
INTERACT_HEALTHY_COMMENTS = (3, 10)
|
| 210 |
+
INTERACT_LIKE_REACH_BUFF = 0.04
|
| 211 |
+
INTERACT_COMMENT_REACH_BUFF = 0.08
|
| 212 |
+
INTERACT_REPLY_REWARD_PER = 0.01
|
| 213 |
+
INTERACT_REPLY_REWARD_CAP = 0.15
|
| 214 |
+
INTERACT_DAILY_REWARD_CAP = 0.15
|
| 215 |
+
INTERACT_SPAM_LIKES = 30
|
| 216 |
+
INTERACT_SPAM_COMMENTS = 20
|
| 217 |
+
INTERACT_SPAM_REACH_PENALTY = 0.85
|
| 218 |
+
INTERACT_SPAM_SHADOWBAN_BUMP = 0.20
|
| 219 |
+
INTERACT_IGNORE_THRESHOLD_K = 0.05
|
| 220 |
+
INTERACT_IGNORE_LOYALTY_DECAY = 0.97
|
| 221 |
+
INTERACT_OFFNICHE_THRESHOLD = 0.60
|
| 222 |
+
INTERACT_OFFNICHE_REACH_PENALTY = 0.90
|
| 223 |
+
INTERACT_LOWQ_THRESHOLD = 0.30
|
| 224 |
+
INTERACT_LOWQ_WEIGHT = 0.4
|
| 225 |
+
INTERACT_VERY_LOWQ_THRESHOLD = 0.10
|
| 226 |
+
INTERACT_VERY_LOWQ_PENALTY = -0.03
|
| 227 |
+
|
| 228 |
API_BUDGET_INITIAL = 10**9 # effectively unlimited; rate-limit removed
|
| 229 |
|
| 230 |
# Heuristic baselines for headline metric `vs_baseline_pct`.
|
|
|
|
| 304 |
"parameters": {"scheduled_actions": {"type": "array"}},
|
| 305 |
},
|
| 306 |
"query_creator_pool": {
|
| 307 |
+
"description": "List available competitor archetypes for potential collaboration with audience overlap %, niche match, mocked follower counts, intersection size, and a recommendation flag (recommended=False when guardrails block: zero followers, intersection<10%, or follower-size gap>25%).",
|
| 308 |
"parameters": {},
|
| 309 |
},
|
| 310 |
"propose_collab": {
|
| 311 |
+
"description": "Propose a collab post with a competitor at a specific hour. The post you schedule at that hour will be co-authored. Reward shaping: same-niche + low overlap = HIGH; same-niche + high overlap = LOW; diff-niche always capped below same-niche-low. Guardrail violations apply a 0.7x engagement / 0.6x growth penalty AND surface in the JudgeReport.",
|
| 312 |
"parameters": {
|
| 313 |
"partner_id": {"type": "string"},
|
| 314 |
"content_type": {"type": "string", "enum": ["reel", "story", "carousel", "text_post"]},
|
| 315 |
"hour": {"type": "integer", "minimum": 0, "maximum": 23},
|
| 316 |
},
|
| 317 |
},
|
| 318 |
+
"query_interaction_norms": {
|
| 319 |
+
"description": "Discover healthy daily ranges for likes/comments/replies and the current shadowban_risk. Use before submitting ViraltestAction.interactions.",
|
| 320 |
+
"parameters": {},
|
| 321 |
+
},
|
| 322 |
}
|
| 323 |
|
| 324 |
|
|
|
|
| 362 |
self._collabs_this_month = 0
|
| 363 |
self._collab_history: List[str] = []
|
| 364 |
self._active_collab: Optional[CollabProposal] = None
|
| 365 |
+
self._collab_violations: List[str] = [] # collab guardrail breaches this step
|
| 366 |
+
self._user_niche: str = _NICHE_BY_ARCHETYPE.get("user_creator", "generic")
|
| 367 |
+
|
| 368 |
+
# Interaction state
|
| 369 |
+
self._pending_reach_mult: float = 1.0 # applied to next day's posts (one-shot)
|
| 370 |
+
self._shadowban_risk: float = 0.0
|
| 371 |
+
self._engagement_rate_loyalty_mult: float = 1.0 # compounding loyalty drop from ignoring audience
|
| 372 |
+
self._interaction_violations: List[str] = []
|
| 373 |
+
self._last_interaction_summary: Optional[Dict[str, Any]] = None
|
| 374 |
self._low_energy_days = 0
|
| 375 |
self._total_posts_this_week = 0
|
| 376 |
self._week_start_day = 0
|
|
|
|
| 552 |
|
| 553 |
return daily_fatigue * weekly_mult
|
| 554 |
|
| 555 |
+
# ----- collab evaluation (niche-aware, overlap-tiered) -----
|
| 556 |
|
| 557 |
def _user_partner_overlap(self, partner_id: str) -> Optional[float]:
|
| 558 |
ids = _OVERLAP_DATA.get("archetype_ids", [])
|
|
|
|
| 562 |
p = ids.index(partner_id)
|
| 563 |
return _OVERLAP_DATA["matrix"][u][p]
|
| 564 |
|
| 565 |
+
def _partner_niche(self, partner_id: str) -> str:
|
| 566 |
+
return _NICHE_BY_ARCHETYPE.get(partner_id, "generic")
|
| 567 |
+
|
| 568 |
+
def _partner_followers(self, partner_id: str) -> int:
|
| 569 |
+
return _FOLLOWERS_BY_ARCHETYPE.get(partner_id, 0)
|
| 570 |
+
|
| 571 |
+
@staticmethod
|
| 572 |
+
def _interp(span: Tuple[float, float], t: float) -> float:
|
| 573 |
+
"""Linear interp from span[0] (t=0) to span[1] (t=1)."""
|
| 574 |
+
t = max(0.0, min(1.0, t))
|
| 575 |
+
return span[0] + (span[1] - span[0]) * t
|
| 576 |
+
|
| 577 |
+
def _collab_tier_multipliers(self, same_niche: bool, overlap: float) -> Tuple[float, float]:
|
| 578 |
+
"""Pure 2x2 tier shaping (no fatigue/repeat/guardrail effects yet)."""
|
| 579 |
+
# Smooth interp factor: how "low" is this overlap on the [0, LOW_THRESHOLD] scale.
|
| 580 |
+
low_t = 1.0 - min(1.0, overlap / COLLAB_LOW_OVERLAP_THRESHOLD) # 1 at overlap=0, 0 at threshold
|
| 581 |
+
if same_niche:
|
| 582 |
+
if overlap < COLLAB_LOW_OVERLAP_THRESHOLD:
|
| 583 |
+
eng = self._interp(COLLAB_SAME_LOW_ENG, low_t)
|
| 584 |
+
growth = self._interp(COLLAB_SAME_LOW_GROWTH, low_t)
|
| 585 |
+
elif overlap >= COLLAB_HIGH_OVERLAP_THRESHOLD:
|
| 586 |
+
eng = COLLAB_SAME_HIGH_ENG
|
| 587 |
+
growth = COLLAB_SAME_HIGH_GROWTH
|
| 588 |
+
else:
|
| 589 |
+
# Mid-band linear interpolation between LOW endpoint (overlap=LOW_TH) and HIGH endpoint (overlap=HIGH_TH).
|
| 590 |
+
mid_t = (overlap - COLLAB_LOW_OVERLAP_THRESHOLD) / (COLLAB_HIGH_OVERLAP_THRESHOLD - COLLAB_LOW_OVERLAP_THRESHOLD)
|
| 591 |
+
eng = self._interp((COLLAB_SAME_LOW_ENG[0], COLLAB_SAME_HIGH_ENG), mid_t)
|
| 592 |
+
growth = self._interp((COLLAB_SAME_LOW_GROWTH[0], COLLAB_SAME_HIGH_GROWTH), mid_t)
|
| 593 |
+
else:
|
| 594 |
+
if overlap < COLLAB_LOW_OVERLAP_THRESHOLD:
|
| 595 |
+
eng = self._interp(COLLAB_DIFF_LOW_ENG, low_t)
|
| 596 |
+
growth = self._interp(COLLAB_DIFF_LOW_GROWTH, low_t)
|
| 597 |
+
elif overlap >= COLLAB_HIGH_OVERLAP_THRESHOLD:
|
| 598 |
+
eng = COLLAB_DIFF_HIGH_ENG
|
| 599 |
+
growth = COLLAB_DIFF_HIGH_GROWTH
|
| 600 |
+
else:
|
| 601 |
+
mid_t = (overlap - COLLAB_LOW_OVERLAP_THRESHOLD) / (COLLAB_HIGH_OVERLAP_THRESHOLD - COLLAB_LOW_OVERLAP_THRESHOLD)
|
| 602 |
+
eng = self._interp((COLLAB_DIFF_LOW_ENG[0], COLLAB_DIFF_HIGH_ENG), mid_t)
|
| 603 |
+
growth = self._interp((COLLAB_DIFF_LOW_GROWTH[0], COLLAB_DIFF_HIGH_GROWTH), mid_t)
|
| 604 |
+
# Hard rule: diff-niche must always be < same-niche-low minimum (cap just below).
|
| 605 |
+
eng = min(eng, COLLAB_SAME_LOW_ENG[0] - 0.01)
|
| 606 |
+
growth = min(growth, COLLAB_SAME_LOW_GROWTH[0] - 0.01)
|
| 607 |
+
return eng, growth
|
| 608 |
+
|
| 609 |
+
def _collab_evaluation(self, partner_id: str) -> Dict[str, Any]:
|
| 610 |
+
"""Single source of truth: tier reward + guardrails + final multipliers (after fatigue/repeat).
|
| 611 |
+
|
| 612 |
+
Returns a dict consumable by both query_creator_pool (for recommendation surface)
|
| 613 |
+
and _process_hour_action (for applied multipliers).
|
| 614 |
+
"""
|
| 615 |
+
overlap = self._user_partner_overlap(partner_id)
|
| 616 |
+
if overlap is None:
|
| 617 |
+
return {
|
| 618 |
+
"partner_id": partner_id,
|
| 619 |
+
"overlap": None,
|
| 620 |
+
"same_niche": False,
|
| 621 |
+
"partner_followers": 0,
|
| 622 |
+
"user_followers": self._followers,
|
| 623 |
+
"follower_gap_pct": 1.0,
|
| 624 |
+
"intersection_size": 0,
|
| 625 |
+
"recommended": False,
|
| 626 |
+
"reason": "unknown_partner",
|
| 627 |
+
"tier_eng_mult": 1.0,
|
| 628 |
+
"tier_growth_mult": 1.0,
|
| 629 |
+
"eng_mult": 1.0,
|
| 630 |
+
"growth_mult": 1.0,
|
| 631 |
+
}
|
| 632 |
+
|
| 633 |
+
partner_niche = self._partner_niche(partner_id)
|
| 634 |
+
same_niche = partner_niche == self._user_niche
|
| 635 |
+
partner_followers = self._partner_followers(partner_id)
|
| 636 |
+
user_followers = max(0, int(self._followers))
|
| 637 |
+
denom = max(1, max(partner_followers, user_followers))
|
| 638 |
+
gap_pct = abs(partner_followers - user_followers) / denom if denom else 1.0
|
| 639 |
+
|
| 640 |
+
# Mock intersection size via Jaccard inversion: union ≈ (|A|+|B|)/(1+overlap), intersection = overlap*union.
|
| 641 |
+
union_approx = (partner_followers + user_followers) / (1.0 + overlap) if overlap >= 0 else 0.0
|
| 642 |
+
intersection_size = int(round(overlap * union_approx))
|
| 643 |
+
|
| 644 |
+
# Guardrails (in priority order)
|
| 645 |
+
recommended = True
|
| 646 |
+
reason: Optional[str] = None
|
| 647 |
+
if partner_followers <= 0:
|
| 648 |
+
recommended = False
|
| 649 |
+
reason = "partner_zero_followers"
|
| 650 |
+
elif overlap < COLLAB_GUARDRAIL_OVERLAP_MIN:
|
| 651 |
+
recommended = False
|
| 652 |
+
reason = "intersection_below_10pct"
|
| 653 |
+
elif gap_pct > COLLAB_GUARDRAIL_FOLLOWER_GAP_MAX:
|
| 654 |
+
recommended = False
|
| 655 |
+
reason = "follower_size_mismatch"
|
| 656 |
+
|
| 657 |
+
tier_eng, tier_growth = self._collab_tier_multipliers(same_niche, overlap)
|
| 658 |
+
|
| 659 |
+
eng_mult = tier_eng
|
| 660 |
+
growth_mult = tier_growth
|
| 661 |
+
|
| 662 |
+
# Repeat-partner discount (existing behavior preserved).
|
| 663 |
if partner_id in self._collab_history[:-1]:
|
| 664 |
+
eng_mult *= COLLAB_PARTNER_REPEAT_PENALTY
|
| 665 |
+
growth_mult *= COLLAB_PARTNER_REPEAT_PENALTY
|
| 666 |
+
|
| 667 |
+
# Diminishing returns across the episode (Cen 2024).
|
| 668 |
prior = max(0, self._collabs_this_month - 1)
|
| 669 |
fatigue = 1.0 / (1.0 + COLLAB_FATIGUE_K * prior)
|
| 670 |
+
eng_mult *= fatigue
|
| 671 |
+
growth_mult *= fatigue
|
| 672 |
+
|
| 673 |
+
return {
|
| 674 |
+
"partner_id": partner_id,
|
| 675 |
+
"overlap": round(overlap, 3),
|
| 676 |
+
"same_niche": same_niche,
|
| 677 |
+
"partner_niche": partner_niche,
|
| 678 |
+
"user_niche": self._user_niche,
|
| 679 |
+
"partner_followers": partner_followers,
|
| 680 |
+
"user_followers": user_followers,
|
| 681 |
+
"follower_gap_pct": round(gap_pct, 3),
|
| 682 |
+
"intersection_size": intersection_size,
|
| 683 |
+
"recommended": recommended,
|
| 684 |
+
"reason": reason,
|
| 685 |
+
"tier_eng_mult": round(tier_eng, 3),
|
| 686 |
+
"tier_growth_mult": round(tier_growth, 3),
|
| 687 |
+
"eng_mult": round(eng_mult, 3),
|
| 688 |
+
"growth_mult": round(growth_mult, 3),
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
def _collab_multipliers(self, partner_id: str) -> Tuple[float, float]:
|
| 692 |
+
"""Returns (engagement_multiplier, follower_growth_multiplier).
|
| 693 |
+
|
| 694 |
+
Applies guardrail penalties when the agent forces a non-recommended collab.
|
| 695 |
+
Side effect: appends to self._collab_violations for the JudgeReport.
|
| 696 |
+
"""
|
| 697 |
+
ev = self._collab_evaluation(partner_id)
|
| 698 |
+
eng = ev["eng_mult"]
|
| 699 |
+
growth = ev["growth_mult"]
|
| 700 |
+
if not ev["recommended"]:
|
| 701 |
+
eng *= COLLAB_FORCED_PENALTY_ENG
|
| 702 |
+
growth *= COLLAB_FORCED_PENALTY_GROWTH
|
| 703 |
+
self._collab_violations.append(
|
| 704 |
+
f"collab_guardrail:{ev.get('reason', 'blocked')}@{partner_id}"
|
| 705 |
+
)
|
| 706 |
+
return eng, growth
|
| 707 |
+
|
| 708 |
+
# ----- interactions (likes/comments/replies) -----
|
| 709 |
+
|
| 710 |
+
def _process_interactions(
|
| 711 |
+
self, interactions: Optional[DailyInteractions]
|
| 712 |
+
) -> Tuple[float, Dict[str, Any]]:
|
| 713 |
+
"""Apply daily interaction effects: energy cost, reach buffs (next post), and 5 penalty paths.
|
| 714 |
+
|
| 715 |
+
Returns (reward_delta, summary_dict). The reward_delta is added to today's averaged reward;
|
| 716 |
+
reach effects propagate via self._pending_reach_mult (consumed at next _process_hour_action).
|
| 717 |
+
Loyalty effects propagate via self._engagement_rate_loyalty_mult (compounding).
|
| 718 |
+
"""
|
| 719 |
+
# Reset reach mult for the day (default neutral); we accumulate per-day, then it's consumed
|
| 720 |
+
# by today's posts and any leftover carries over by simply staying at 1.0 next step.
|
| 721 |
+
self._pending_reach_mult = 1.0
|
| 722 |
+
self._interaction_violations = []
|
| 723 |
+
|
| 724 |
+
summary: Dict[str, Any] = {
|
| 725 |
+
"likes_on_others": 0,
|
| 726 |
+
"comments_on_others": 0,
|
| 727 |
+
"replies_to_audience": 0,
|
| 728 |
+
"energy_cost": 0.0,
|
| 729 |
+
"reach_modifier": 1.0,
|
| 730 |
+
"shadowban_risk": round(self._shadowban_risk, 3),
|
| 731 |
+
"loyalty_mult": round(self._engagement_rate_loyalty_mult, 3),
|
| 732 |
+
"reward_delta": 0.0,
|
| 733 |
+
"violations": [],
|
| 734 |
+
"summary": "no_interactions",
|
| 735 |
+
}
|
| 736 |
+
|
| 737 |
+
if interactions is None:
|
| 738 |
+
return 0.0, summary
|
| 739 |
+
|
| 740 |
+
likes = int(interactions.likes_on_others)
|
| 741 |
+
comments = int(interactions.comments_on_others)
|
| 742 |
+
replies = int(interactions.replies_to_audience)
|
| 743 |
+
targets = list(interactions.target_partner_ids or [])
|
| 744 |
+
quality = float(interactions.avg_reply_quality)
|
| 745 |
+
|
| 746 |
+
# 1) Energy cost (paid up front; can push creator below 0.2 -> burnout track).
|
| 747 |
+
energy_cost = (
|
| 748 |
+
INTERACT_ENERGY_LIKE * likes
|
| 749 |
+
+ INTERACT_ENERGY_COMMENT * comments
|
| 750 |
+
+ INTERACT_ENERGY_REPLY * replies
|
| 751 |
+
)
|
| 752 |
+
self._energy = max(0.0, self._energy - energy_cost)
|
| 753 |
+
|
| 754 |
+
# Determine off-niche share among interaction targets.
|
| 755 |
+
off_niche_share = 0.0
|
| 756 |
+
if targets:
|
| 757 |
+
off = 0
|
| 758 |
+
for tid in targets:
|
| 759 |
+
if self._partner_niche(tid) != self._user_niche:
|
| 760 |
+
off += 1
|
| 761 |
+
off_niche_share = off / len(targets)
|
| 762 |
+
|
| 763 |
+
# 2) Reach buffs (next post engagement multiplier) — only when on-niche and within healthy band.
|
| 764 |
+
on_niche_share = 1.0 - off_niche_share
|
| 765 |
+
reach_mult = 1.0
|
| 766 |
+
if on_niche_share > 0:
|
| 767 |
+
if INTERACT_HEALTHY_LIKES[0] <= likes <= INTERACT_HEALTHY_LIKES[1]:
|
| 768 |
+
reach_mult *= 1.0 + INTERACT_LIKE_REACH_BUFF * on_niche_share
|
| 769 |
+
if INTERACT_HEALTHY_COMMENTS[0] <= comments <= INTERACT_HEALTHY_COMMENTS[1]:
|
| 770 |
+
reach_mult *= 1.0 + INTERACT_COMMENT_REACH_BUFF * on_niche_share
|
| 771 |
+
|
| 772 |
+
reward_delta = 0.0
|
| 773 |
+
|
| 774 |
+
# 3) Reply reward (audience loyalty), scaled by quality.
|
| 775 |
+
reply_weight = INTERACT_LOWQ_WEIGHT if quality < INTERACT_LOWQ_THRESHOLD else 1.0
|
| 776 |
+
reply_reward = min(
|
| 777 |
+
INTERACT_REPLY_REWARD_CAP,
|
| 778 |
+
INTERACT_REPLY_REWARD_PER * replies * quality * reply_weight,
|
| 779 |
+
)
|
| 780 |
+
reward_delta += reply_reward
|
| 781 |
+
|
| 782 |
+
# 4) Penalties — each surfaces a violation string.
|
| 783 |
+
# 4a) Spam volume.
|
| 784 |
+
if likes > INTERACT_SPAM_LIKES or comments > INTERACT_SPAM_COMMENTS:
|
| 785 |
+
reach_mult *= INTERACT_SPAM_REACH_PENALTY
|
| 786 |
+
self._shadowban_risk = min(1.0, self._shadowban_risk + INTERACT_SPAM_SHADOWBAN_BUMP)
|
| 787 |
+
self._interaction_violations.append(
|
| 788 |
+
f"interaction_spam:likes={likes},comments={comments}"
|
| 789 |
+
)
|
| 790 |
+
|
| 791 |
+
# 4b) Off-niche heavy interaction.
|
| 792 |
+
if off_niche_share >= INTERACT_OFFNICHE_THRESHOLD and len(targets) >= 3:
|
| 793 |
+
reach_mult *= INTERACT_OFFNICHE_REACH_PENALTY
|
| 794 |
+
self._interaction_violations.append(
|
| 795 |
+
f"interaction_off_niche:share={off_niche_share:.2f}"
|
| 796 |
+
)
|
| 797 |
+
|
| 798 |
+
# 4c) Ignoring own audience: expected_replies = K * recent_engagement_proxy (use last day's posts)
|
| 799 |
+
prev_day = max(0, self._day - 1)
|
| 800 |
+
expected_signal = self._posts_per_day.get(prev_day, 0) # # posts yesterday as a proxy
|
| 801 |
+
# Multiply by a small constant so 1 post = 1 expected reply unit floor.
|
| 802 |
+
expected_replies = expected_signal * 1.0
|
| 803 |
+
if expected_replies > 0 and replies < INTERACT_IGNORE_THRESHOLD_K * expected_replies * 20:
|
| 804 |
+
# Compounding loyalty drop on engagement_rate, capped at 0.5x floor.
|
| 805 |
+
self._engagement_rate_loyalty_mult = max(
|
| 806 |
+
0.5, self._engagement_rate_loyalty_mult * INTERACT_IGNORE_LOYALTY_DECAY
|
| 807 |
+
)
|
| 808 |
+
self._interaction_violations.append(
|
| 809 |
+
f"interaction_ignoring_own:replies={replies}"
|
| 810 |
+
)
|
| 811 |
+
|
| 812 |
+
# 4d) Low quality replies — already weighted; if extremely low quality, additional penalty.
|
| 813 |
+
if replies > 0 and quality < INTERACT_VERY_LOWQ_THRESHOLD:
|
| 814 |
+
reward_delta += INTERACT_VERY_LOWQ_PENALTY
|
| 815 |
+
self._interaction_violations.append(
|
| 816 |
+
f"interaction_low_quality:q={quality:.2f}"
|
| 817 |
+
)
|
| 818 |
+
|
| 819 |
+
# 4e) Energy: covered upstream; just record if it pushed creator into low-energy zone.
|
| 820 |
+
if energy_cost > 0 and self._energy < 0.2:
|
| 821 |
+
self._interaction_violations.append(
|
| 822 |
+
f"interaction_energy_drain:residual_energy={self._energy:.2f}"
|
| 823 |
+
)
|
| 824 |
+
|
| 825 |
+
# Cap daily reward_delta to avoid blowing past the per-step [0,1] reward envelope.
|
| 826 |
+
reward_delta = max(-INTERACT_DAILY_REWARD_CAP, min(INTERACT_DAILY_REWARD_CAP, reward_delta))
|
| 827 |
+
|
| 828 |
+
# Persist computed reach_mult so today's hourly posts pick it up.
|
| 829 |
+
self._pending_reach_mult = max(0.5, reach_mult)
|
| 830 |
+
|
| 831 |
+
# Decay shadowban_risk slightly on quiet days (0 likes & 0 comments).
|
| 832 |
+
if likes == 0 and comments == 0:
|
| 833 |
+
self._shadowban_risk = max(0.0, self._shadowban_risk - 0.05)
|
| 834 |
+
|
| 835 |
+
summary.update({
|
| 836 |
+
"likes_on_others": likes,
|
| 837 |
+
"comments_on_others": comments,
|
| 838 |
+
"replies_to_audience": replies,
|
| 839 |
+
"energy_cost": round(energy_cost, 4),
|
| 840 |
+
"reach_modifier": round(self._pending_reach_mult, 3),
|
| 841 |
+
"shadowban_risk": round(self._shadowban_risk, 3),
|
| 842 |
+
"loyalty_mult": round(self._engagement_rate_loyalty_mult, 3),
|
| 843 |
+
"off_niche_share": round(off_niche_share, 2),
|
| 844 |
+
"reward_delta": round(reward_delta, 4),
|
| 845 |
+
"violations": list(self._interaction_violations),
|
| 846 |
+
"summary": (
|
| 847 |
+
"spam" if likes > INTERACT_SPAM_LIKES or comments > INTERACT_SPAM_COMMENTS
|
| 848 |
+
else "off_niche" if off_niche_share >= INTERACT_OFFNICHE_THRESHOLD and len(targets) >= 3
|
| 849 |
+
else "low_quality" if replies > 0 and quality < INTERACT_VERY_LOWQ_THRESHOLD
|
| 850 |
+
else "ignoring_own" if expected_replies > 0 and replies < INTERACT_IGNORE_THRESHOLD_K * expected_replies * 20
|
| 851 |
+
else "healthy" if reward_delta > 0 or reach_mult > 1.0
|
| 852 |
+
else "neutral"
|
| 853 |
+
),
|
| 854 |
+
})
|
| 855 |
+
return reward_delta, summary
|
| 856 |
|
| 857 |
# ----- engagement signals (Mosseri-aligned) -----
|
| 858 |
|
|
|
|
| 939 |
elif tool.name == "query_creator_pool":
|
| 940 |
pool = []
|
| 941 |
for comp in self._competitors:
|
| 942 |
+
ev = self._collab_evaluation(comp.id)
|
| 943 |
pool.append({
|
| 944 |
+
"id": comp.id,
|
| 945 |
+
"name": comp.name,
|
| 946 |
+
"niche": comp.niche,
|
| 947 |
+
"audience_overlap": ev.get("overlap"),
|
| 948 |
+
"mock_followers": ev.get("partner_followers"),
|
| 949 |
+
"intersection_size": ev.get("intersection_size"),
|
| 950 |
+
"same_niche": ev.get("same_niche"),
|
| 951 |
+
"follower_gap_pct": ev.get("follower_gap_pct"),
|
| 952 |
+
"recommended": ev.get("recommended"),
|
| 953 |
+
"reason": ev.get("reason"),
|
| 954 |
+
"expected_eng_mult": ev.get("eng_mult"),
|
| 955 |
+
"expected_growth_mult": ev.get("growth_mult"),
|
| 956 |
})
|
| 957 |
+
return ToolResult(
|
| 958 |
+
name=tool.name,
|
| 959 |
+
data={
|
| 960 |
+
"user_niche": self._user_niche,
|
| 961 |
+
"user_followers": int(self._followers),
|
| 962 |
+
"pool": pool,
|
| 963 |
+
},
|
| 964 |
+
budget_remaining=self._api_budget,
|
| 965 |
+
)
|
| 966 |
|
| 967 |
elif tool.name == "propose_collab":
|
| 968 |
partner_id = tool.arguments.get("partner_id", "")
|
| 969 |
if partner_id not in [c.id for c in self._competitors]:
|
| 970 |
return ToolResult(name=tool.name, success=False, error=f"unknown partner: {partner_id}", budget_remaining=self._api_budget)
|
| 971 |
+
ev = self._collab_evaluation(partner_id)
|
| 972 |
+
return ToolResult(
|
| 973 |
+
name=tool.name,
|
| 974 |
+
data={
|
| 975 |
+
"status": "proposal_accepted" if ev["recommended"] else "proposal_accepted_with_warning",
|
| 976 |
+
"partner_id": partner_id,
|
| 977 |
+
"recommended": ev["recommended"],
|
| 978 |
+
"reason": ev["reason"],
|
| 979 |
+
"same_niche": ev["same_niche"],
|
| 980 |
+
"audience_overlap": ev["overlap"],
|
| 981 |
+
"intersection_size": ev["intersection_size"],
|
| 982 |
+
"expected_eng_mult": ev["eng_mult"],
|
| 983 |
+
"expected_growth_mult": ev["growth_mult"],
|
| 984 |
+
},
|
| 985 |
+
budget_remaining=self._api_budget,
|
| 986 |
+
)
|
| 987 |
+
|
| 988 |
+
elif tool.name == "query_interaction_norms":
|
| 989 |
+
return ToolResult(
|
| 990 |
+
name=tool.name,
|
| 991 |
+
data={
|
| 992 |
+
"healthy_likes_per_day": list(INTERACT_HEALTHY_LIKES),
|
| 993 |
+
"healthy_comments_per_day": list(INTERACT_HEALTHY_COMMENTS),
|
| 994 |
+
"spam_threshold_likes": INTERACT_SPAM_LIKES,
|
| 995 |
+
"spam_threshold_comments": INTERACT_SPAM_COMMENTS,
|
| 996 |
+
"off_niche_share_max": INTERACT_OFFNICHE_THRESHOLD,
|
| 997 |
+
"min_reply_quality": INTERACT_LOWQ_THRESHOLD,
|
| 998 |
+
"current_shadowban_risk": round(self._shadowban_risk, 3),
|
| 999 |
+
"user_niche": self._user_niche,
|
| 1000 |
+
"expected_replies_per_unit_engagement": INTERACT_IGNORE_THRESHOLD_K,
|
| 1001 |
+
},
|
| 1002 |
+
budget_remaining=self._api_budget,
|
| 1003 |
+
)
|
| 1004 |
|
| 1005 |
return ToolResult(name=tool.name, success=False, error=f"unknown tool: {tool.name}", budget_remaining=self._api_budget)
|
| 1006 |
|
|
|
|
| 1057 |
if self._hours_since_sleep > 22:
|
| 1058 |
violations.append(f"sleep_debt: {self._hours_since_sleep}h awake (Van Dongen 2003)")
|
| 1059 |
pc -= 0.10
|
| 1060 |
+
# Collab guardrail breaches surfaced by _collab_multipliers (forced past block).
|
| 1061 |
+
for v in self._collab_violations:
|
| 1062 |
+
violations.append(v)
|
| 1063 |
+
pc -= 0.10
|
| 1064 |
+
# Interaction system violations (spam/off-niche/ignoring/low-quality/energy-drain).
|
| 1065 |
+
for v in self._interaction_violations:
|
| 1066 |
+
violations.append(v)
|
| 1067 |
+
pc -= 0.10
|
| 1068 |
|
| 1069 |
burnout_pressure = (1.0 - energy_min) * 0.4 + self._sleep_debt * 0.3 + (self._low_energy_days / 5.0) * 0.3
|
| 1070 |
sustainability_risk = max(0.0, min(1.0, burnout_pressure))
|
|
|
|
| 1129 |
self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
|
| 1130 |
self._init_state()
|
| 1131 |
|
| 1132 |
+
# Optional user-niche override (for collab same/diff niche scenarios).
|
| 1133 |
+
user_niche_override = kwargs.get("user_niche")
|
| 1134 |
+
if user_niche_override:
|
| 1135 |
+
self._user_niche = str(user_niche_override)
|
| 1136 |
+
|
| 1137 |
self._shift_label = kwargs.get("shift_label")
|
| 1138 |
self._chain_id = kwargs.get("episode_chain_id")
|
| 1139 |
|
|
|
|
| 1171 |
|
| 1172 |
# Process collab proposal (no hard cap; diminishing returns enforced via _collab_multipliers)
|
| 1173 |
self._active_collab = None
|
| 1174 |
+
self._collab_violations = []
|
| 1175 |
if action.collab:
|
| 1176 |
self._collabs_this_month += 1
|
| 1177 |
self._collab_history.append(action.collab.partner_id)
|
| 1178 |
self._active_collab = action.collab
|
| 1179 |
|
| 1180 |
+
# Process interactions BEFORE the day's hourly loop so energy cost and reach buffs/penalties
|
| 1181 |
+
# influence the same day's posts.
|
| 1182 |
+
interaction_reward, interaction_summary = self._process_interactions(action.interactions)
|
| 1183 |
+
|
| 1184 |
# Validate scheduled actions
|
| 1185 |
schedule: Dict[int, ScheduledAction] = {}
|
| 1186 |
errors: List[str] = []
|
|
|
|
| 1247 |
if 1 <= self._posts_per_day.get(prev_day, 0) <= 2:
|
| 1248 |
self._days_with_good_posts.add(prev_day)
|
| 1249 |
|
| 1250 |
+
# Apply ignored-audience compounding loyalty multiplier into the per-day reward.
|
| 1251 |
+
avg_reward = (daily_reward / 24.0) + interaction_reward
|
| 1252 |
+
avg_reward = max(0.0, min(1.0, avg_reward))
|
| 1253 |
error_str = "; ".join(errors) if errors else None
|
| 1254 |
|
| 1255 |
+
# Finalize this step's interaction summary on the obs.
|
| 1256 |
+
self._last_interaction_summary = interaction_summary
|
| 1257 |
+
|
| 1258 |
done = self._state.step_count >= TASK_HORIZON or self._energy <= 0.0
|
| 1259 |
coach = self._compute_coach_feedback(daily_engagement)
|
| 1260 |
judge = self._compute_judge_report(action, daily_engagement, daily_posts, energy_min, errors)
|
|
|
|
| 1279 |
daily_posts_made=daily_posts, daily_energy_min=energy_min,
|
| 1280 |
tool_results=tool_results, engagement_signals=daily_signals,
|
| 1281 |
coach_feedback=coach, judge_report=judge, headline_metrics=headline,
|
| 1282 |
+
interaction_metrics=interaction_summary,
|
| 1283 |
)
|
| 1284 |
return self._final_observation
|
| 1285 |
|
|
|
|
| 1289 |
daily_posts_made=daily_posts, daily_energy_min=energy_min,
|
| 1290 |
tool_results=tool_results, engagement_signals=daily_signals,
|
| 1291 |
coach_feedback=coach, judge_report=judge,
|
| 1292 |
+
interaction_metrics=interaction_summary,
|
| 1293 |
)
|
| 1294 |
|
| 1295 |
def _process_hour_action(self, sa: ScheduledAction) -> Tuple[float, float, Optional[EngagementSignals]]:
|
|
|
|
| 1335 |
* niche_mult * saturation_factor
|
| 1336 |
)
|
| 1337 |
|
| 1338 |
+
# Interaction-driven reach modifier (set by _process_interactions earlier this step).
|
| 1339 |
+
# Multiplicative on engagement; capped at 0.5 floor inside _process_interactions.
|
| 1340 |
+
engagement *= getattr(self, "_pending_reach_mult", 1.0)
|
| 1341 |
+
|
| 1342 |
if self._active_collab is not None and self._active_collab.hour == sa.hour:
|
| 1343 |
eng_m, growth_m = self._collab_multipliers(self._active_collab.partner_id)
|
| 1344 |
engagement *= eng_m
|
|
|
|
| 1522 |
coach_feedback: Optional[Dict[str, Any]] = None,
|
| 1523 |
judge_report: Optional[JudgeReport] = None,
|
| 1524 |
headline_metrics: Optional[HeadlineMetrics] = None,
|
| 1525 |
+
interaction_metrics: Optional[Dict[str, Any]] = None,
|
| 1526 |
) -> ViraltestObservation:
|
| 1527 |
recent_eng = self._engagement_history[-10:] if self._engagement_history else []
|
| 1528 |
eng_rate = sum(recent_eng) / len(recent_eng) if recent_eng else 0.0
|
| 1529 |
+
eng_rate *= getattr(self, "_engagement_rate_loyalty_mult", 1.0)
|
| 1530 |
|
| 1531 |
meta: Dict[str, Any] = {"step": self._state.step_count, "task": self._task}
|
| 1532 |
if grader_score is not None:
|
|
|
|
| 1576 |
done=done,
|
| 1577 |
reward=round(reward, 4),
|
| 1578 |
metadata=meta,
|
| 1579 |
+
interaction_metrics=interaction_metrics,
|
| 1580 |
)
|
| 1581 |
|
| 1582 |
# ----- graders (monthly) -----
|
test_scenarios.py
CHANGED
|
@@ -5,9 +5,14 @@ Each step = one full day. Agent submits a sparse daily plan.
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import random as stdlib_random
|
| 8 |
-
from typing import Callable, Dict, List, Tuple
|
| 9 |
|
| 10 |
-
from models import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from server.viraltest_environment import (
|
| 12 |
TAG_POOL,
|
| 13 |
ViraltestEnvironment,
|
|
@@ -22,17 +27,29 @@ _TOPICS = ["AI tools", "fitness routine", "growth hacks", "travel guide", "food
|
|
| 22 |
_rng = stdlib_random.Random(99)
|
| 23 |
|
| 24 |
|
| 25 |
-
def _plan(
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def run_episode(
|
| 30 |
task: str,
|
| 31 |
plan_fn: Callable[[Dict, int], ViraltestAction],
|
| 32 |
label: str,
|
|
|
|
| 33 |
) -> float:
|
| 34 |
env = ViraltestEnvironment()
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
| 36 |
obs_dict = obs.model_dump()
|
| 37 |
rewards: List[float] = []
|
| 38 |
min_energy = 1.0
|
|
@@ -159,16 +176,139 @@ def plan_random(obs: dict, day: int) -> ViraltestAction:
|
|
| 159 |
return _plan(actions)
|
| 160 |
|
| 161 |
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
]
|
| 173 |
|
| 174 |
|
|
@@ -178,16 +318,18 @@ if __name__ == "__main__":
|
|
| 178 |
print("=" * 70)
|
| 179 |
print()
|
| 180 |
|
| 181 |
-
for scenario_name, plan_fn, description in SCENARIOS:
|
| 182 |
print("=" * 70)
|
| 183 |
print(f"{scenario_name}")
|
| 184 |
print(f" {description}")
|
|
|
|
|
|
|
| 185 |
print("=" * 70)
|
| 186 |
print()
|
| 187 |
|
| 188 |
for task in TASKS:
|
| 189 |
_rng = stdlib_random.Random(99)
|
| 190 |
-
run_episode(task, plan_fn, scenario_name)
|
| 191 |
|
| 192 |
print()
|
| 193 |
|
|
@@ -195,15 +337,18 @@ if __name__ == "__main__":
|
|
| 195 |
print("SUMMARY TABLE")
|
| 196 |
print("=" * 70)
|
| 197 |
print()
|
| 198 |
-
print(f"{'Scenario':<
|
| 199 |
-
print("-" *
|
| 200 |
|
| 201 |
-
for scenario_name, plan_fn, _ in SCENARIOS:
|
| 202 |
scores = []
|
| 203 |
for task in TASKS:
|
| 204 |
_rng = stdlib_random.Random(99)
|
| 205 |
env = ViraltestEnvironment()
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
| 207 |
obs_dict = obs.model_dump()
|
| 208 |
for day in range(1, 31):
|
| 209 |
action = plan_fn(obs_dict, day)
|
|
@@ -212,8 +357,10 @@ if __name__ == "__main__":
|
|
| 212 |
if obs.done:
|
| 213 |
break
|
| 214 |
scores.append((obs.metadata or {}).get("grader_score", 0.0))
|
| 215 |
-
print(f"{scenario_name:<
|
| 216 |
|
| 217 |
print()
|
| 218 |
print("EXPECTED: Smart/Queue/Tag Explorer should score highest.")
|
| 219 |
print("Burnout agents (spam, no_rest) should score near 0 on strategic/competitive.")
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import random as stdlib_random
|
| 8 |
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
| 9 |
|
| 10 |
+
from models import (
|
| 11 |
+
CollabProposal,
|
| 12 |
+
DailyInteractions,
|
| 13 |
+
ScheduledAction,
|
| 14 |
+
ViraltestAction,
|
| 15 |
+
)
|
| 16 |
from server.viraltest_environment import (
|
| 17 |
TAG_POOL,
|
| 18 |
ViraltestEnvironment,
|
|
|
|
| 27 |
_rng = stdlib_random.Random(99)
|
| 28 |
|
| 29 |
|
| 30 |
+
def _plan(
|
| 31 |
+
actions: list,
|
| 32 |
+
collab: Optional[CollabProposal] = None,
|
| 33 |
+
interactions: Optional[DailyInteractions] = None,
|
| 34 |
+
) -> ViraltestAction:
|
| 35 |
+
return ViraltestAction(
|
| 36 |
+
scheduled_actions=[ScheduledAction(**a) for a in actions],
|
| 37 |
+
collab=collab,
|
| 38 |
+
interactions=interactions,
|
| 39 |
+
)
|
| 40 |
|
| 41 |
|
| 42 |
def run_episode(
|
| 43 |
task: str,
|
| 44 |
plan_fn: Callable[[Dict, int], ViraltestAction],
|
| 45 |
label: str,
|
| 46 |
+
user_niche: Optional[str] = None,
|
| 47 |
) -> float:
|
| 48 |
env = ViraltestEnvironment()
|
| 49 |
+
reset_kwargs: Dict[str, Any] = {"task": task, "seed": SEED}
|
| 50 |
+
if user_niche:
|
| 51 |
+
reset_kwargs["user_niche"] = user_niche
|
| 52 |
+
obs = env.reset(**reset_kwargs)
|
| 53 |
obs_dict = obs.model_dump()
|
| 54 |
rewards: List[float] = []
|
| 55 |
min_energy = 1.0
|
|
|
|
| 176 |
return _plan(actions)
|
| 177 |
|
| 178 |
|
| 179 |
+
# ---------------------------------------------------------------------------
|
| 180 |
+
# Collab grid scenarios — user_niche set on env.reset(...) by run_episode.
|
| 181 |
+
# Each picks a partner_id intended to land in a specific (same/diff x low/high) tier
|
| 182 |
+
# and proposes the collab on day 5.
|
| 183 |
+
# ---------------------------------------------------------------------------
|
| 184 |
+
|
| 185 |
+
def _collab_plan(day: int, partner_id: str, hour: int = 12) -> ViraltestAction:
|
| 186 |
+
"""Daily plan that posts once and proposes a collab on days 5 and 15.
|
| 187 |
+
|
| 188 |
+
Single-post per day keeps engagement below the theoretical_max cap so collab
|
| 189 |
+
multipliers visibly bend the final grader score and follower count.
|
| 190 |
+
"""
|
| 191 |
+
actions = [
|
| 192 |
+
{"hour": hour, "action_type": "post", "content_type": "reel",
|
| 193 |
+
"topic": "AI tools", "tags": ["ai"], "intent": "watch_bait"},
|
| 194 |
+
]
|
| 195 |
+
collab = None
|
| 196 |
+
if day in (5, 15):
|
| 197 |
+
collab = CollabProposal(partner_id=partner_id, content_type="reel", hour=hour)
|
| 198 |
+
return _plan(actions, collab=collab)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def plan_collab_same_low(obs: dict, day: int) -> ViraltestAction:
|
| 202 |
+
# user_niche=tech, partner=b2b_thought_leader (NICHE differs but matrix overlap=0.08)
|
| 203 |
+
# Use niche_expert (tech) which has overlap=0.10 with user_creator => same niche, low overlap.
|
| 204 |
+
return _collab_plan(day, partner_id="niche_expert")
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def plan_collab_same_high(obs: dict, day: int) -> ViraltestAction:
|
| 208 |
+
# Force same niche + high overlap by setting user_niche=lifestyle and pairing with viral_chaser (overlap=0.55).
|
| 209 |
+
return _collab_plan(day, partner_id="viral_chaser")
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def plan_collab_diff_low(obs: dict, day: int) -> ViraltestAction:
|
| 213 |
+
# user_niche=tech, partner=lifestyle_blogger (overlap=0.40 — actually high), pick travel_creator overlap=0.30 instead.
|
| 214 |
+
return _collab_plan(day, partner_id="travel_creator")
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def plan_collab_diff_high(obs: dict, day: int) -> ViraltestAction:
|
| 218 |
+
# user_niche=tech, partner=lifestyle_blogger (overlap=0.40, diff niche).
|
| 219 |
+
return _collab_plan(day, partner_id="lifestyle_blogger")
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def plan_collab_blocked_zero(obs: dict, day: int) -> ViraltestAction:
|
| 223 |
+
# b2b_thought_leader has overlap=0.08 with user_creator -> intersection_below_10pct guardrail.
|
| 224 |
+
return _collab_plan(day, partner_id="b2b_thought_leader")
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
# ---------------------------------------------------------------------------
|
| 228 |
+
# Interaction scenarios — exercise the 5 penalty paths and the healthy band.
|
| 229 |
+
# ---------------------------------------------------------------------------
|
| 230 |
+
|
| 231 |
+
def _post_only_actions() -> list:
|
| 232 |
+
return [
|
| 233 |
+
{"hour": 12, "action_type": "post", "content_type": "reel",
|
| 234 |
+
"topic": "AI tools", "tags": ["ai"], "intent": "watch_bait"},
|
| 235 |
+
]
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def plan_interact_balanced(obs: dict, day: int) -> ViraltestAction:
|
| 239 |
+
interactions = DailyInteractions(
|
| 240 |
+
likes_on_others=12, comments_on_others=5, replies_to_audience=3,
|
| 241 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.8,
|
| 242 |
+
)
|
| 243 |
+
return _plan(_post_only_actions(), interactions=interactions)
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
def plan_interact_spam(obs: dict, day: int) -> ViraltestAction:
|
| 247 |
+
interactions = DailyInteractions(
|
| 248 |
+
likes_on_others=80, comments_on_others=40, replies_to_audience=0,
|
| 249 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.4,
|
| 250 |
+
)
|
| 251 |
+
return _plan(_post_only_actions(), interactions=interactions)
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def plan_interact_ignoring_own(obs: dict, day: int) -> ViraltestAction:
|
| 255 |
+
interactions = DailyInteractions(
|
| 256 |
+
likes_on_others=8, comments_on_others=4, replies_to_audience=0,
|
| 257 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.6,
|
| 258 |
+
)
|
| 259 |
+
return _plan(_post_only_actions(), interactions=interactions)
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def plan_interact_off_niche(obs: dict, day: int) -> ViraltestAction:
|
| 263 |
+
interactions = DailyInteractions(
|
| 264 |
+
likes_on_others=10, comments_on_others=5, replies_to_audience=2,
|
| 265 |
+
target_partner_ids=["food_creator", "fitness_coach", "travel_creator", "lifestyle_blogger"],
|
| 266 |
+
avg_reply_quality=0.7,
|
| 267 |
+
)
|
| 268 |
+
return _plan(_post_only_actions(), interactions=interactions)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def plan_interact_low_quality(obs: dict, day: int) -> ViraltestAction:
|
| 272 |
+
interactions = DailyInteractions(
|
| 273 |
+
likes_on_others=10, comments_on_others=5, replies_to_audience=8,
|
| 274 |
+
target_partner_ids=["niche_expert"], avg_reply_quality=0.05,
|
| 275 |
+
)
|
| 276 |
+
return _plan(_post_only_actions(), interactions=interactions)
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
# Scenario tuple: (label, plan_fn, description, user_niche)
|
| 280 |
+
SCENARIOS: List[Tuple[str, Callable, str, Optional[str]]] = [
|
| 281 |
+
("Always Rest", plan_always_rest, "Zero engagement, no growth, energy stays max", None),
|
| 282 |
+
("Spam Post", plan_spam, "Post every hour, burns out instantly", None),
|
| 283 |
+
("Smart Agent", plan_smart, "Peak hours, trending, varied types, energy management", None),
|
| 284 |
+
("No Rest", plan_no_rest, "Post every hour, never rests, burns out", None),
|
| 285 |
+
("Minimal Poster", plan_minimal, "1 carousel at noon per day", None),
|
| 286 |
+
("Tag Explorer", plan_tag_explorer, "Rotates through tag pool for max discovery", None),
|
| 287 |
+
("Queue Optimizer", plan_queue_optimizer, "Creates content first, posts from queue", None),
|
| 288 |
+
("Double Peak", plan_double_peak, "Posts at 9am and 3pm", None),
|
| 289 |
+
("Random Actor", plan_random, "Random sparse actions each day", None),
|
| 290 |
+
# Collab grid: 2x2 same/diff niche x low/high overlap + zero-guardrail.
|
| 291 |
+
("Collab Same-Niche Low Overlap", plan_collab_same_low,
|
| 292 |
+
"user_niche=tech + niche_expert (same niche, overlap 0.10) — should yield HIGH boost.", "tech"),
|
| 293 |
+
("Collab Same-Niche High Overlap", plan_collab_same_high,
|
| 294 |
+
"user_niche=lifestyle + viral_chaser (same niche, overlap 0.55) — penalty path: redundant audience.", "lifestyle"),
|
| 295 |
+
("Collab Diff-Niche Low Overlap", plan_collab_diff_low,
|
| 296 |
+
"user_niche=tech + travel_creator (diff niche, overlap 0.30) — capped below same-niche-low.", "tech"),
|
| 297 |
+
("Collab Diff-Niche High Overlap", plan_collab_diff_high,
|
| 298 |
+
"user_niche=tech + lifestyle_blogger (diff niche, overlap 0.40) — LOW reward (mismatch).", "tech"),
|
| 299 |
+
("Collab Guardrail Block", plan_collab_blocked_zero,
|
| 300 |
+
"user_niche=tech + b2b_thought_leader (overlap 0.08 < 10%) — guardrail trips, forced penalty applied.", "tech"),
|
| 301 |
+
# Interaction grid: healthy + 4 penalty paths.
|
| 302 |
+
("Interact Balanced", plan_interact_balanced,
|
| 303 |
+
"Healthy daily likes/comments/replies on-niche.", "tech"),
|
| 304 |
+
("Interact Spam", plan_interact_spam,
|
| 305 |
+
"80 likes + 40 comments — spam path, shadowban_risk + reach penalty.", "tech"),
|
| 306 |
+
("Interact Ignoring Own", plan_interact_ignoring_own,
|
| 307 |
+
"Zero replies to own audience — compounding loyalty drop.", "tech"),
|
| 308 |
+
("Interact Off-Niche", plan_interact_off_niche,
|
| 309 |
+
"All interactions targeted at non-tech creators — reach penalty.", "tech"),
|
| 310 |
+
("Interact Low-Quality", plan_interact_low_quality,
|
| 311 |
+
"Replies with quality=0.05 — replies discounted + extra reward penalty.", "tech"),
|
| 312 |
]
|
| 313 |
|
| 314 |
|
|
|
|
| 318 |
print("=" * 70)
|
| 319 |
print()
|
| 320 |
|
| 321 |
+
for scenario_name, plan_fn, description, user_niche in SCENARIOS:
|
| 322 |
print("=" * 70)
|
| 323 |
print(f"{scenario_name}")
|
| 324 |
print(f" {description}")
|
| 325 |
+
if user_niche:
|
| 326 |
+
print(f" user_niche={user_niche}")
|
| 327 |
print("=" * 70)
|
| 328 |
print()
|
| 329 |
|
| 330 |
for task in TASKS:
|
| 331 |
_rng = stdlib_random.Random(99)
|
| 332 |
+
run_episode(task, plan_fn, scenario_name, user_niche=user_niche)
|
| 333 |
|
| 334 |
print()
|
| 335 |
|
|
|
|
| 337 |
print("SUMMARY TABLE")
|
| 338 |
print("=" * 70)
|
| 339 |
print()
|
| 340 |
+
print(f"{'Scenario':<35} {'Engage':>8} {'Strategic':>10} {'Competitive':>12}")
|
| 341 |
+
print("-" * 67)
|
| 342 |
|
| 343 |
+
for scenario_name, plan_fn, _, user_niche in SCENARIOS:
|
| 344 |
scores = []
|
| 345 |
for task in TASKS:
|
| 346 |
_rng = stdlib_random.Random(99)
|
| 347 |
env = ViraltestEnvironment()
|
| 348 |
+
reset_kwargs: Dict[str, Any] = {"task": task, "seed": SEED}
|
| 349 |
+
if user_niche:
|
| 350 |
+
reset_kwargs["user_niche"] = user_niche
|
| 351 |
+
obs = env.reset(**reset_kwargs)
|
| 352 |
obs_dict = obs.model_dump()
|
| 353 |
for day in range(1, 31):
|
| 354 |
action = plan_fn(obs_dict, day)
|
|
|
|
| 357 |
if obs.done:
|
| 358 |
break
|
| 359 |
scores.append((obs.metadata or {}).get("grader_score", 0.0))
|
| 360 |
+
print(f"{scenario_name:<35} {scores[0]:>8.4f} {scores[1]:>10.4f} {scores[2]:>12.4f}")
|
| 361 |
|
| 362 |
print()
|
| 363 |
print("EXPECTED: Smart/Queue/Tag Explorer should score highest.")
|
| 364 |
print("Burnout agents (spam, no_rest) should score near 0 on strategic/competitive.")
|
| 365 |
+
print("Collab Same-Niche Low Overlap should outperform any Diff-Niche collab.")
|
| 366 |
+
print("Interact Spam/Off-Niche/Ignoring/Low-Quality should underperform Balanced.")
|