Spaces:

hjerpe
/

sql_env

Running

App Files Files Community

sql_env / training /few_shot_examples.py

hjerpe

Upload folder using huggingface_hub

9e64e71 verified 6 days ago

raw

history blame contribute delete

3.4 kB

	"""Curated few-shot trajectories for GRPO system prompt guidance.

	Based on ToolExpander (arXiv:2510.07737): few-shot guidance during RL
	eliminates training collapse for 1.5B models. These examples show the
	model the complete tool-calling loop with real environment output.
	"""

	# Example 1: single-table integer question (describe → query → answer)
	EXAMPLE_INTEGER = {
	"question": "How many cars have a larger accelerate than the car "
	"with the largest horsepower?",
	"trajectory": [
	{
	"role": "assistant",
	"action": 'describe(table_name="cars_data")',
	},
	{
	"role": "tool",
	"content": (
	"Table 'cars_data' columns:\n"
	"- Id: INTEGER\n"
	"- Horsepower: TEXT\n"
	"- Accelerate: REAL\n"
	"- Year: INTEGER\n"
	"Row count: 406"
	),
	},
	{
	"role": "assistant",
	"action": (
	'query(sql="SELECT COUNT(*) FROM cars_data '
	"WHERE Accelerate > (SELECT Accelerate FROM "
	'cars_data ORDER BY Horsepower DESC LIMIT 1)")'
	),
	},
	{"role": "tool", "content": "1. 39"},
	{
	"role": "assistant",
	"action": 'answer(value="39")',
	},
	{"role": "tool", "content": "Answer submitted: correct."},
	],
	}

	# Example 2: empty result (describe → query → answer with [])
	EXAMPLE_EMPTY = {
	"question": "Show location and name for all stadiums with a "
	"capacity between 5000 and 10000.",
	"trajectory": [
	{
	"role": "assistant",
	"action": 'describe(table_name="stadium")',
	},
	{
	"role": "tool",
	"content": (
	"Table 'stadium' columns:\n"
	"- Stadium_ID: INT\n"
	"- Location: TEXT\n"
	"- Name: TEXT\n"
	"- Capacity: INT\n"
	"Row count: 9"
	),
	},
	{
	"role": "assistant",
	"action": (
	'query(sql="SELECT Location, Name FROM stadium '
	'WHERE Capacity BETWEEN 5000 AND 10000")'
	),
	},
	{"role": "tool", "content": "No rows returned."},
	{
	"role": "assistant",
	"action": 'answer(value="[]")',
	},
	{"role": "tool", "content": "Answer submitted: correct."},
	],
	}

	ALL_EXAMPLES = [EXAMPLE_INTEGER, EXAMPLE_EMPTY]


	def format_examples_for_prompt(
	examples: list[dict] \| None = None,
	) -> str:
	"""Render few-shot examples as a compact text block for the system prompt.

	Each example shows: question, then alternating assistant actions and
	tool responses. Compact enough to fit in the system prompt without
	dominating the token budget (~200 tokens total).
	"""
	if examples is None:
	examples = ALL_EXAMPLES

	lines = ["Examples:"]
	for i, ex in enumerate(examples, 1):
	lines.append(f"\n{i}. Q: {ex['question']}")
	for step in ex["trajectory"]:
	if step["role"] == "assistant":
	lines.append(f" → {step['action']}")
	else:
	content = step["content"].replace("\n", " \| ")
	lines.append(f" ← {content}")

	return "\n".join(lines)