| """Curated few-shot trajectories for GRPO system prompt guidance. |
| |
| Based on ToolExpander (arXiv:2510.07737): few-shot guidance during RL |
| eliminates training collapse for 1.5B models. These examples show the |
| model the complete tool-calling loop with real environment output. |
| """ |
|
|
| |
| EXAMPLE_INTEGER = { |
| "question": "How many cars have a larger accelerate than the car " |
| "with the largest horsepower?", |
| "trajectory": [ |
| { |
| "role": "assistant", |
| "action": 'describe(table_name="cars_data")', |
| }, |
| { |
| "role": "tool", |
| "content": ( |
| "Table 'cars_data' columns:\n" |
| "- Id: INTEGER\n" |
| "- Horsepower: TEXT\n" |
| "- Accelerate: REAL\n" |
| "- Year: INTEGER\n" |
| "Row count: 406" |
| ), |
| }, |
| { |
| "role": "assistant", |
| "action": ( |
| 'query(sql="SELECT COUNT(*) FROM cars_data ' |
| "WHERE Accelerate > (SELECT Accelerate FROM " |
| 'cars_data ORDER BY Horsepower DESC LIMIT 1)")' |
| ), |
| }, |
| {"role": "tool", "content": "1. 39"}, |
| { |
| "role": "assistant", |
| "action": 'answer(value="39")', |
| }, |
| {"role": "tool", "content": "Answer submitted: correct."}, |
| ], |
| } |
|
|
| |
| EXAMPLE_EMPTY = { |
| "question": "Show location and name for all stadiums with a " |
| "capacity between 5000 and 10000.", |
| "trajectory": [ |
| { |
| "role": "assistant", |
| "action": 'describe(table_name="stadium")', |
| }, |
| { |
| "role": "tool", |
| "content": ( |
| "Table 'stadium' columns:\n" |
| "- Stadium_ID: INT\n" |
| "- Location: TEXT\n" |
| "- Name: TEXT\n" |
| "- Capacity: INT\n" |
| "Row count: 9" |
| ), |
| }, |
| { |
| "role": "assistant", |
| "action": ( |
| 'query(sql="SELECT Location, Name FROM stadium ' |
| 'WHERE Capacity BETWEEN 5000 AND 10000")' |
| ), |
| }, |
| {"role": "tool", "content": "No rows returned."}, |
| { |
| "role": "assistant", |
| "action": 'answer(value="[]")', |
| }, |
| {"role": "tool", "content": "Answer submitted: correct."}, |
| ], |
| } |
|
|
| ALL_EXAMPLES = [EXAMPLE_INTEGER, EXAMPLE_EMPTY] |
|
|
|
|
| def format_examples_for_prompt( |
| examples: list[dict] | None = None, |
| ) -> str: |
| """Render few-shot examples as a compact text block for the system prompt. |
| |
| Each example shows: question, then alternating assistant actions and |
| tool responses. Compact enough to fit in the system prompt without |
| dominating the token budget (~200 tokens total). |
| """ |
| if examples is None: |
| examples = ALL_EXAMPLES |
|
|
| lines = ["Examples:"] |
| for i, ex in enumerate(examples, 1): |
| lines.append(f"\n{i}. Q: {ex['question']}") |
| for step in ex["trajectory"]: |
| if step["role"] == "assistant": |
| lines.append(f" β {step['action']}") |
| else: |
| content = step["content"].replace("\n", " | ") |
| lines.append(f" β {content}") |
|
|
| return "\n".join(lines) |
|
|