vaibhav12332112312 commited on
Commit
afbf541
·
1 Parent(s): 4299c91

Strip leaked peak-hour info from observation, force tool discovery

Browse files

- Remove audience_active_hours and competitor_recent_post_hours from
format_obs: model was reading them and skipping query_* entirely
- Rewrite TOOL POLICY: tools are free, encourage aggressive discovery,
multiple segment queries on day 1 instead of one
- Empty Tool results now reads "(none — call query_* tools to discover)"
to nudge the model on its first turn

Made-with: Cursor

Files changed (1) hide show
  1. training/train_grpo.ipynb +27 -29
training/train_grpo.ipynb CHANGED
@@ -441,18 +441,21 @@
441
  "- topic: free-form string\n",
442
  "- empty scheduled_actions = full day rest\"\"\")\n",
443
  "\n",
444
- "SYSTEM_PROMPT = _SYSTEM_BASE + textwrap.dedent(\"\"\"\n",
445
- "\n",
446
- "TOOL POLICY (MANDATORY — empty tool_calls on day 1 = wasted day):\n",
447
- "- days_elapsed == 0 -> call AT LEAST these in tool_calls:\n",
448
- " {\"name\": \"query_trends\", \"arguments\": {\"niche\": \"<one of TOPIC_CATEGORIES keys>\"}}\n",
449
- " {\"name\": \"query_audience\", \"arguments\": {\"segment_id\": \"young_professionals\"}}\n",
450
- " {\"name\": \"query_creator_pool\", \"arguments\": {}}\n",
451
- " {\"name\": \"query_competitor\", \"arguments\": {\"competitor_id\": \"niche_expert\", \"window_days\": 7}}\n",
452
- "- days_elapsed >= 1 -> before scheduling, call:\n",
453
- " {\"name\": \"predict_engagement\", \"arguments\": {\"scheduled_actions\": [...]}}\n",
454
- " and at least one query_* tool whose result you don't already have in Tool results.\n",
455
- "- audience_active_hours in the observation is a coarse hint; query_audience returns ranked topic affinities you cannot get otherwise.\"\"\")\n",
 
 
 
456
  "SYSTEM_PROMPT_EVAL = SYSTEM_PROMPT\n",
457
  "SYSTEM_PROMPT_TRAIN = SYSTEM_PROMPT\n",
458
  "\n",
@@ -466,23 +469,18 @@
466
  " signals_str = (f\"Signals: watch={signals.watch_time:.3f} \"\n",
467
  " f\"sends={signals.sends_per_reach:.3f} \"\n",
468
  " f\"saves={signals.saves:.3f}\\n\")\n",
469
- " meta = getattr(obs, \"metadata\", None) or {}\n",
470
- " aud = meta.get(\"audience_active_hours\") or []\n",
471
- " comp = meta.get(\"competitor_recent_post_hours\") or []\n",
472
- " tool_str = \"\"\n",
473
- " for tr in getattr(obs, \"tool_results\", []):\n",
474
- " if tr.success:\n",
475
- " tool_str += f\" {tr.name}: {json.dumps(tr.data)}\\n\"\n",
476
- " if not tool_str:\n",
477
- " tool_str = \" (none)\\n\"\n",
478
- " return (f\"Day: {day_name} | days_elapsed={obs.days_elapsed}\\n\"\n",
479
- " f\"Energy: {obs.creator_energy:.2f} | Followers: {obs.follower_count}\\n\"\n",
480
- " f\"Engagement: {obs.engagement_rate:.3f} | Queue: {obs.content_queue_size}\\n\"\n",
481
- " f\"{signals_str}\"\n",
482
- " f\"audience_active_hours: {aud}\\n\"\n",
483
- " f\"competitor_recent_post_hours: {comp}\\n\"\n",
484
- " f\"Tool results:\\n{tool_str}\"\n",
485
- " f\"Plan today's actions (JSON only):\")\n",
486
  "\n",
487
  "\n",
488
  "def is_well_formed_response(text):\n",
 
441
  "- topic: free-form string\n",
442
  "- empty scheduled_actions = full day rest\"\"\")\n",
443
  "\n",
444
+ "SYSTEM_PROMPT = _SYSTEM_BASE + textwrap.dedent(\"\"\"\n",
445
+ "\n",
446
+ "TOOL POLICY (tool_calls cost nothing call them aggressively):\n",
447
+ "- The observation tells you ONLY your account stats. Audience peak hours, segment\n",
448
+ " affinities, trending topics/tags and competitor schedules are NOT given. You must\n",
449
+ " discover them via tool_calls and read them from `Tool results` next turn.\n",
450
+ "- days_elapsed == 0 -> call EVERY discovery tool you might need, e.g.:\n",
451
+ " {\"name\": \"query_trends\", \"arguments\": {\"niche\": \"<TOPIC_CATEGORIES key>\"}}\n",
452
+ " {\"name\": \"query_audience\", \"arguments\": {\"segment_id\": \"young_professionals\"}}\n",
453
+ " {\"name\": \"query_audience\", \"arguments\": {\"segment_id\": \"students\"}}\n",
454
+ " {\"name\": \"query_creator_pool\", \"arguments\": {}}\n",
455
+ " {\"name\": \"query_competitor\", \"arguments\": {\"competitor_id\": \"niche_expert\", \"window_days\": 7}}\n",
456
+ "- days_elapsed >= 1 -> before scheduling posts, call:\n",
457
+ " {\"name\": \"predict_engagement\", \"arguments\": {\"scheduled_actions\": [...]}}\n",
458
+ " and any query_* whose result is missing from `Tool results`.\"\"\")\n",
459
  "SYSTEM_PROMPT_EVAL = SYSTEM_PROMPT\n",
460
  "SYSTEM_PROMPT_TRAIN = SYSTEM_PROMPT\n",
461
  "\n",
 
469
  " signals_str = (f\"Signals: watch={signals.watch_time:.3f} \"\n",
470
  " f\"sends={signals.sends_per_reach:.3f} \"\n",
471
  " f\"saves={signals.saves:.3f}\\n\")\n",
472
+ " tool_str = \"\"\n",
473
+ " for tr in getattr(obs, \"tool_results\", []):\n",
474
+ " if tr.success:\n",
475
+ " tool_str += f\" {tr.name}: {json.dumps(tr.data)}\\n\"\n",
476
+ " if not tool_str:\n",
477
+ " tool_str = \" (none — call query_* tools to discover)\\n\"\n",
478
+ " return (f\"Day: {day_name} | days_elapsed={obs.days_elapsed}\\n\"\n",
479
+ " f\"Energy: {obs.creator_energy:.2f} | Followers: {obs.follower_count}\\n\"\n",
480
+ " f\"Engagement: {obs.engagement_rate:.3f} | Queue: {obs.content_queue_size}\\n\"\n",
481
+ " f\"{signals_str}\"\n",
482
+ " f\"Tool results:\\n{tool_str}\"\n",
483
+ " f\"Plan today's actions (JSON only):\")\n",
 
 
 
 
 
484
  "\n",
485
  "\n",
486
  "def is_well_formed_response(text):\n",