hannan2859r commited on
Commit
fcd6ec6
·
verified ·
1 Parent(s): 2be28d6

Update openenv.yaml

Browse files
Files changed (1) hide show
  1. openenv.yaml +79 -19
openenv.yaml CHANGED
@@ -1,20 +1,22 @@
1
  name: focusflow-env
 
2
  description: >
3
- An RL environment where an AI agent learns to manage a student's focus session.
4
- The agent blocks distracting apps, times breaks correctly, and maximises
5
- deep-focus time using a Pomodoro-style framework.
6
- Built on Meta's OpenEnv framework for the Meta x Scaler Hackathon 2026.
7
 
8
- version: "1.0.0"
9
  author: Abdul Hannan
 
 
10
  license: MIT
11
 
12
  environment:
13
- base_url: https://hannan2859r-focusflow-env.hf.space
14
  framework: openenv
15
  language: python
16
  python_version: "3.11"
17
 
 
18
  api:
19
  reset:
20
  method: POST
@@ -27,38 +29,96 @@ api:
27
  - name: seed
28
  type: integer
29
  default: 42
 
 
 
 
30
  step:
31
  method: POST
32
  path: /step
 
 
 
 
33
  body: FocusAction
34
  state:
35
  method: GET
36
  path: /state
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
 
38
  tasks:
39
  - id: task_1
40
- description: Complete one 25-min focus session without checking any distracting app.
41
  max_steps: 60
42
- success_reward: 1.0
43
-
44
  - id: task_2
45
- description: Complete two sessions with strategically timed breaks.
46
  max_steps: 120
47
- success_reward: 1.0
48
-
49
  - id: task_3
50
- description: Block all 5 distracting apps within 10 steps then complete a session.
51
- max_steps: 80
52
- success_reward: 1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- reward_range: [-0.5, 0.5]
55
- action_space: discrete (5 action types)
56
- observation_space: structured JSON (FocusObservation)
 
 
 
57
 
58
  tags:
59
  - productivity
60
  - student
61
- - anti-distraction
 
62
  - pomodoro
63
  - llm-agent
64
  - openenv
 
1
  name: focusflow-env
2
+ version: "2.0.0"
3
  description: >
4
+ LLM-hard RL environment for student focus and distraction management.
5
+ Agent must handle natural language distraction events, manage cognitive load,
6
+ track multi-day deadlines, and justify every decision with graded reasoning.
 
7
 
 
8
  author: Abdul Hannan
9
+ theme: "Theme 3.2 - Personalized Tasks"
10
+ hackathon: "Meta x Scaler OpenEnv Hackathon 2026"
11
  license: MIT
12
 
13
  environment:
14
+ base_url: https://YOUR-HF-SPACE-NAME.hf.space
15
  framework: openenv
16
  language: python
17
  python_version: "3.11"
18
 
19
+ # OpenEnv HTTP API endpoints
20
  api:
21
  reset:
22
  method: POST
 
29
  - name: seed
30
  type: integer
31
  default: 42
32
+ - name: session_id
33
+ type: string
34
+ default: default
35
+ description: Unique ID for multi-agent parallel training
36
  step:
37
  method: POST
38
  path: /step
39
+ params:
40
+ - name: session_id
41
+ type: string
42
+ default: default
43
  body: FocusAction
44
  state:
45
  method: GET
46
  path: /state
47
+ params:
48
+ - name: session_id
49
+ type: string
50
+ default: default
51
+ health:
52
+ method: GET
53
+ path: /health
54
+ tasks:
55
+ method: GET
56
+ path: /tasks
57
+ metrics:
58
+ method: GET
59
+ path: /metrics
60
 
61
+ # Tasks
62
  tasks:
63
  - id: task_1
64
+ description: Single focused session. Complete one 25-min Pomodoro with zero app checks and handle NL events correctly.
65
  max_steps: 60
66
+ days: 1
 
67
  - id: task_2
68
+ description: Multi-session day. Manage cognitive load and defer low-urgency events across 2 sessions.
69
  max_steps: 120
70
+ days: 1
 
71
  - id: task_3
72
+ description: Week planner. Plan a 3-day schedule, handle shifting deadlines, and maintain energy levels.
73
+ max_steps: 240
74
+ days: 3
75
+
76
+ # Action space
77
+ actions:
78
+ - focus
79
+ - block_app
80
+ - take_break
81
+ - defer_event
82
+ - respond_to_event
83
+ - plan_day
84
+ - adjust_energy
85
+ - check_app
86
+ - quit_session
87
+
88
+ # Observation fields
89
+ observation:
90
+ - time_remaining_seconds
91
+ - current_phase
92
+ - sessions_completed
93
+ - focus_score
94
+ - active_distractions
95
+ - blocked_apps
96
+ - pending_event
97
+ - day_context
98
+ - cognitive_load
99
+ - deadline_pressure
100
+ - last_action_feedback
101
+ - reasoning_quality_score
102
+
103
+ # Reward range
104
+ reward:
105
+ min: -0.60
106
+ max: 0.60
107
+ shaped: true
108
+ reasoning_graded: true
109
 
110
+ # Training
111
+ training:
112
+ frameworks: [trl, unsloth]
113
+ algorithm: GRPO
114
+ model: unsloth/Llama-3.2-1B-Instruct
115
+ colab_notebook: training_colab.py
116
 
117
  tags:
118
  - productivity
119
  - student
120
+ - llm-hard
121
+ - natural-language-rl
122
  - pomodoro
123
  - llm-agent
124
  - openenv