File size: 2,646 Bytes
d103a0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
name: sql-data-analyst
version: "1.0.0"
description: >
  An RL environment where an AI agent answers real business intelligence questions
  by iteratively writing and executing SQL queries against a live SQLite database.
  Simulates the day-to-day workflow of a data analyst.

tags:
  - openenv
  - sql
  - data-analysis
  - business-intelligence
  - real-world

author: sql-data-analyst
repository: https://huggingface.co/spaces/sql-data-analyst

observation_space:
  type: dict
  fields:
    schema_summary:
      type: string
      description: Compact one-line-per-table schema of the database
    question:
      type: string
      description: Natural language business question to answer
    last_query:
      type: string
      nullable: true
      description: The last SQL query executed by the agent
    last_result:
      type: object
      nullable: true
      description: Result of the last query (columns, rows, error)
    last_error:
      type: string
      nullable: true
      description: SQL error message if last query failed
    step:
      type: integer
      description: Current step number
    max_steps:
      type: integer
      description: Maximum steps allowed for this task
    hints:
      type: array
      items: string
      description: Progressive hints revealed as steps increase
    done:
      type: boolean
      description: Whether the episode is complete

action_space:
  type: union
  description: Agent must provide exactly one of the following
  options:
    sql_query:
      type: string
      description: A SELECT or WITH SQL query to execute
    submit_answer:
      type: string
      description: Final answer to the question. Ends the episode.

tasks:
  - id: monthly_signups
    difficulty: easy
    max_steps: 10
    description: "Count the number of users who signed up in the last 30 days"
    skills_required:
      - COUNT
      - WHERE with date filter

  - id: top_revenue_category
    difficulty: medium
    max_steps: 15
    description: "Find which product category generated the most revenue in Q3"
    skills_required:
      - JOIN (3 tables)
      - GROUP BY
      - SUM aggregation
      - Date range filtering

  - id: churn_analysis
    difficulty: hard
    max_steps: 20
    description: >
      Find email addresses of users who placed exactly 3 orders and then
      never ordered again (churned after their 3rd purchase)
    skills_required:
      - Subqueries
      - HAVING clause
      - Date logic
      - Window functions (optional)

baseline_scores:
  monthly_signups: 0.85
  top_revenue_category: 0.65
  churn_analysis: 0.40
  average: 0.63
  model: gpt-4o-mini