{ "$schema": "autocode-verification-input-v1", "feature_id": "F003", "spec_path": "specs/F003-IMPLEMENTATION_SPEC.md", "generated": "2026-03-27T12:00:00Z", "verification_mode": "mvp", "overview": { "summary": "Dense 3-layer reward system for SQLEnv. Layer 1 provides operational signals (exec_ok, new_info, repeat penalty, step_cost). Layer 2 computes progress-to-target for QUERY actions using fixed weighted average of cardinality (0.25), value overlap (0.50), and numeric range proximity (0.25), binned to 5 levels with improvement-only gating. Layer 3 is the existing terminal correctness signal. Total step rewards clamped to [-0.2, +0.5].", "goal": "Agents get meaningful per-step feedback during exploration so GRPO training converges. Random exploration yields ~0.1 cumulative reward, targeted queries ~0.3, correct answer ~1.3." }, "interfaces": { "types": [ { "name": "EpisodeContext", "fields": [ {"name": "gold_rows", "type": "list[tuple]", "optional": false, "description": "Gold SQL result rows cached at reset(), used by Layer 2 progress metrics"}, {"name": "query_hashes", "type": "set[str]", "optional": false, "description": "Set of hashes of previously executed SQL strings for repeat detection"}, {"name": "best_progress", "type": "float", "optional": false, "description": "Best binned progress score seen so far (improvement-only gating)"}, {"name": "cumulative_step_reward", "type": "float", "optional": false, "description": "Running total of step rewards for clamping to [-0.2, +0.5]"}, {"name": "cumulative_new_info_reward", "type": "float", "optional": false, "description": "Running total of new_info rewards for capping at 0.10"} ], "description": "Per-episode server-side state extended with reward-tracking fields" } ], "functions": [ { "name": "compute_step_reward", "params": [ {"name": "ctx", "type": "EpisodeContext", "description": "Episode context (mutated: updates tracking fields)"}, {"name": "action_type", "type": "str", "description": "One of DESCRIBE, SAMPLE, QUERY"}, {"name": "sql", "type": "str", "description": "SQL string executed (for repeat detection)"}, {"name": "rows", "type": "list[tuple] | None", "description": "Result rows from query, or None if error"}, {"name": "error", "type": "str | None", "description": "Error message if action failed, else None"} ], "returns": "float", "description": "Main entry point. Combines Layer 1 + Layer 2 signals, clamps running total to [-0.2, +0.5]." }, { "name": "_layer1_operational", "params": [ {"name": "ctx", "type": "EpisodeContext", "description": "Episode context"}, {"name": "action_type", "type": "str", "description": "Action type string"}, {"name": "sql", "type": "str", "description": "SQL string for repeat detection"}, {"name": "rows", "type": "list[tuple] | None", "description": "Result rows"}, {"name": "error", "type": "str | None", "description": "Error message if failed"} ], "returns": "float", "description": "Layer 1 operational signals: exec_ok(+0.02), new_info(+0.01 capped 0.10), repeat(-0.01), step_cost(-0.005)." }, { "name": "_layer2_progress", "params": [ {"name": "ctx", "type": "EpisodeContext", "description": "Episode context with gold_rows"}, {"name": "rows", "type": "list[tuple]", "description": "Query result rows"} ], "returns": "float", "description": "Layer 2 progress-to-target for QUERY only. Weighted avg of sub-metrics, binned to 5 levels, improvement-only, scaled by 0.15." }, { "name": "_cardinality_score", "params": [ {"name": "pred_rows", "type": "list[tuple]", "description": "Predicted result rows"}, {"name": "gold_rows", "type": "list[tuple]", "description": "Gold result rows"} ], "returns": "float", "description": "Row count similarity: 1 - |len(pred) - len(gold)| / max(len(pred), len(gold), 1). Returns [0.0, 1.0]." }, { "name": "_value_overlap_score", "params": [ {"name": "pred_rows", "type": "list[tuple]", "description": "Predicted result rows"}, {"name": "gold_rows", "type": "list[tuple]", "description": "Gold result rows"} ], "returns": "float", "description": "Jaccard overlap of flattened cell values as strings. Returns [0.0, 1.0]." }, { "name": "_numeric_range_score", "params": [ {"name": "pred_rows", "type": "list[tuple]", "description": "Predicted result rows"}, {"name": "gold_rows", "type": "list[tuple]", "description": "Gold result rows"} ], "returns": "float", "description": "Log-distance proximity for numeric cells. mean(1/(1+log(1+|pred-gold|))). Returns 1.0 if no numerics in gold. Returns [0.0, 1.0]." }, { "name": "_bin_progress", "params": [ {"name": "raw_score", "type": "float", "description": "Raw progress score in [0.0, 1.0]"} ], "returns": "float", "description": "Bin to {0, 0.25, 0.5, 0.75, 1.0}. Thresholds at 0.125, 0.375, 0.625, 0.875." } ], "api_endpoints": [] }, "data_flow": { "primary_flow": [ "step() receives SQLAction with action_type and argument", "step() dispatches to handler (_handle_query, _handle_describe, _handle_sample)", "For non-terminal actions, step() calls compute_step_reward(ctx, action_type, sql, rows, error)", "compute_step_reward calls _layer1_operational for all action types", "compute_step_reward calls _layer2_progress for QUERY actions only (when rows is not None and gold_rows is not empty)", "_layer2_progress computes weighted average of _cardinality_score(0.25), _value_overlap_score(0.50), _numeric_range_score(0.25)", "_layer2_progress bins result via _bin_progress, rewards only improvement over best_progress, scales by 0.15", "compute_step_reward sums Layer 1 + Layer 2, clamps cumulative to [-0.2, +0.5], returns step reward" ], "alternative_flows": [ { "name": "SQL error on QUERY", "trigger": "Query execution raises sqlite3.Error", "steps": [ "step() catches error, sets error string", "compute_step_reward called with error set and rows=None", "Layer 1 returns step_cost only (-0.005)", "Layer 2 skipped" ] }, { "name": "Empty gold_rows", "trigger": "Gold SQL returned no rows at reset()", "steps": [ "gold_rows stored as empty list in EpisodeContext", "Layer 2 returns 0.0 (skipped)", "Layer 1 operates normally" ] }, { "name": "Repeated query", "trigger": "SQL hash already in ctx.query_hashes", "steps": [ "Layer 1 applies repeat penalty (-0.01) in addition to step_cost", "No exec_ok bonus for repeated query", "Layer 2 still computes progress (may still show improvement)" ] } ] }, "error_handling": { "error_types": [ { "name": "SQL execution error", "when": "Invalid query syntax or runtime SQL error during QUERY action", "message_template": "Layer 1 returns step_cost only; Layer 2 skipped" }, { "name": "Empty gold rows", "when": "Gold SQL returns no rows at episode reset", "message_template": "Layer 2 returns 0.0; Layer 1 operates normally" } ], "retry_strategy": null }, "dependencies": { "external": [], "internal": [ "models.py (EpisodeContext dataclass)", "server/sql_environment.py (step() and reset() integration)", "tests/test_smoke.py (existing tests need assertion updates)" ] } }