"""Curated few-shot trajectories for GRPO system prompt guidance. Based on ToolExpander (arXiv:2510.07737): few-shot guidance during RL eliminates training collapse for 1.5B models. These examples show the model the complete tool-calling loop with real environment output. """ # Example 1: single-table integer question (describe → query → answer) EXAMPLE_INTEGER = { "question": "How many cars have a larger accelerate than the car " "with the largest horsepower?", "trajectory": [ { "role": "assistant", "action": 'describe(table_name="cars_data")', }, { "role": "tool", "content": ( "Table 'cars_data' columns:\n" "- Id: INTEGER\n" "- Horsepower: TEXT\n" "- Accelerate: REAL\n" "- Year: INTEGER\n" "Row count: 406" ), }, { "role": "assistant", "action": ( 'query(sql="SELECT COUNT(*) FROM cars_data ' "WHERE Accelerate > (SELECT Accelerate FROM " 'cars_data ORDER BY Horsepower DESC LIMIT 1)")' ), }, {"role": "tool", "content": "1. 39"}, { "role": "assistant", "action": 'answer(value="39")', }, {"role": "tool", "content": "Answer submitted: correct."}, ], } # Example 2: empty result (describe → query → answer with []) EXAMPLE_EMPTY = { "question": "Show location and name for all stadiums with a " "capacity between 5000 and 10000.", "trajectory": [ { "role": "assistant", "action": 'describe(table_name="stadium")', }, { "role": "tool", "content": ( "Table 'stadium' columns:\n" "- Stadium_ID: INT\n" "- Location: TEXT\n" "- Name: TEXT\n" "- Capacity: INT\n" "Row count: 9" ), }, { "role": "assistant", "action": ( 'query(sql="SELECT Location, Name FROM stadium ' 'WHERE Capacity BETWEEN 5000 AND 10000")' ), }, {"role": "tool", "content": "No rows returned."}, { "role": "assistant", "action": 'answer(value="[]")', }, {"role": "tool", "content": "Answer submitted: correct."}, ], } ALL_EXAMPLES = [EXAMPLE_INTEGER, EXAMPLE_EMPTY] def format_examples_for_prompt( examples: list[dict] | None = None, ) -> str: """Render few-shot examples as a compact text block for the system prompt. Each example shows: question, then alternating assistant actions and tool responses. Compact enough to fit in the system prompt without dominating the token budget (~200 tokens total). """ if examples is None: examples = ALL_EXAMPLES lines = ["Examples:"] for i, ex in enumerate(examples, 1): lines.append(f"\n{i}. Q: {ex['question']}") for step in ex["trajectory"]: if step["role"] == "assistant": lines.append(f" → {step['action']}") else: content = step["content"].replace("\n", " | ") lines.append(f" ← {content}") return "\n".join(lines)