sql_env / training /few_shot_examples.py
hjerpe's picture
Upload folder using huggingface_hub
9e64e71 verified
"""Curated few-shot trajectories for GRPO system prompt guidance.
Based on ToolExpander (arXiv:2510.07737): few-shot guidance during RL
eliminates training collapse for 1.5B models. These examples show the
model the complete tool-calling loop with real environment output.
"""
# Example 1: single-table integer question (describe β†’ query β†’ answer)
EXAMPLE_INTEGER = {
"question": "How many cars have a larger accelerate than the car "
"with the largest horsepower?",
"trajectory": [
{
"role": "assistant",
"action": 'describe(table_name="cars_data")',
},
{
"role": "tool",
"content": (
"Table 'cars_data' columns:\n"
"- Id: INTEGER\n"
"- Horsepower: TEXT\n"
"- Accelerate: REAL\n"
"- Year: INTEGER\n"
"Row count: 406"
),
},
{
"role": "assistant",
"action": (
'query(sql="SELECT COUNT(*) FROM cars_data '
"WHERE Accelerate > (SELECT Accelerate FROM "
'cars_data ORDER BY Horsepower DESC LIMIT 1)")'
),
},
{"role": "tool", "content": "1. 39"},
{
"role": "assistant",
"action": 'answer(value="39")',
},
{"role": "tool", "content": "Answer submitted: correct."},
],
}
# Example 2: empty result (describe β†’ query β†’ answer with [])
EXAMPLE_EMPTY = {
"question": "Show location and name for all stadiums with a "
"capacity between 5000 and 10000.",
"trajectory": [
{
"role": "assistant",
"action": 'describe(table_name="stadium")',
},
{
"role": "tool",
"content": (
"Table 'stadium' columns:\n"
"- Stadium_ID: INT\n"
"- Location: TEXT\n"
"- Name: TEXT\n"
"- Capacity: INT\n"
"Row count: 9"
),
},
{
"role": "assistant",
"action": (
'query(sql="SELECT Location, Name FROM stadium '
'WHERE Capacity BETWEEN 5000 AND 10000")'
),
},
{"role": "tool", "content": "No rows returned."},
{
"role": "assistant",
"action": 'answer(value="[]")',
},
{"role": "tool", "content": "Answer submitted: correct."},
],
}
ALL_EXAMPLES = [EXAMPLE_INTEGER, EXAMPLE_EMPTY]
def format_examples_for_prompt(
examples: list[dict] | None = None,
) -> str:
"""Render few-shot examples as a compact text block for the system prompt.
Each example shows: question, then alternating assistant actions and
tool responses. Compact enough to fit in the system prompt without
dominating the token budget (~200 tokens total).
"""
if examples is None:
examples = ALL_EXAMPLES
lines = ["Examples:"]
for i, ex in enumerate(examples, 1):
lines.append(f"\n{i}. Q: {ex['question']}")
for step in ex["trajectory"]:
if step["role"] == "assistant":
lines.append(f" β†’ {step['action']}")
else:
content = step["content"].replace("\n", " | ")
lines.append(f" ← {content}")
return "\n".join(lines)