File size: 2,669 Bytes
fdd45f1
fcd6ec6
fdd45f1
fcd6ec6
 
 
fdd45f1
 
fcd6ec6
 
fdd45f1
 
 
fcd6ec6
fdd45f1
 
 
 
fcd6ec6
fdd45f1
 
 
 
 
 
 
 
 
 
 
 
fcd6ec6
 
 
 
fdd45f1
 
 
fcd6ec6
 
 
 
fdd45f1
 
 
 
fcd6ec6
 
 
 
 
 
 
 
 
 
 
 
 
fdd45f1
fcd6ec6
fdd45f1
 
fcd6ec6
fdd45f1
fcd6ec6
fdd45f1
fcd6ec6
fdd45f1
fcd6ec6
fdd45f1
fcd6ec6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdd45f1
fcd6ec6
 
 
 
 
 
fdd45f1
 
 
 
fcd6ec6
 
fdd45f1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
name: focusflow-env
version: "2.0.0"
description: >
  LLM-hard RL environment for student focus and distraction management.
  Agent must handle natural language distraction events, manage cognitive load,
  track multi-day deadlines, and justify every decision with graded reasoning.

author: Abdul Hannan
theme: "Theme 3.2 - Personalized Tasks"
hackathon: "Meta x Scaler OpenEnv Hackathon 2026"
license: MIT

environment:
  base_url: https://YOUR-HF-SPACE-NAME.hf.space
  framework: openenv
  language: python
  python_version: "3.11"

# OpenEnv HTTP API endpoints
api:
  reset:
    method: POST
    path: /reset
    params:
      - name: task_id
        type: string
        default: task_1
        description: Which task to load (task_1, task_2, task_3)
      - name: seed
        type: integer
        default: 42
      - name: session_id
        type: string
        default: default
        description: Unique ID for multi-agent parallel training
  step:
    method: POST
    path: /step
    params:
      - name: session_id
        type: string
        default: default
    body: FocusAction
  state:
    method: GET
    path: /state
    params:
      - name: session_id
        type: string
        default: default
  health:
    method: GET
    path: /health
  tasks:
    method: GET
    path: /tasks
  metrics:
    method: GET
    path: /metrics

# Tasks
tasks:
  - id: task_1
    description: Single focused session. Complete one 25-min Pomodoro with zero app checks and handle NL events correctly.
    max_steps: 60
    days: 1
  - id: task_2
    description: Multi-session day. Manage cognitive load and defer low-urgency events across 2 sessions.
    max_steps: 120
    days: 1
  - id: task_3
    description: Week planner. Plan a 3-day schedule, handle shifting deadlines, and maintain energy levels.
    max_steps: 240
    days: 3

# Action space
actions:
  - focus
  - block_app
  - take_break
  - defer_event
  - respond_to_event
  - plan_day
  - adjust_energy
  - check_app
  - quit_session

# Observation fields
observation:
  - time_remaining_seconds
  - current_phase
  - sessions_completed
  - focus_score
  - active_distractions
  - blocked_apps
  - pending_event
  - day_context
  - cognitive_load
  - deadline_pressure
  - last_action_feedback
  - reasoning_quality_score

# Reward range
reward:
  min: -0.60
  max:  0.60
  shaped: true
  reasoning_graded: true

# Training
training:
  frameworks: [trl, unsloth]
  algorithm: GRPO
  model: unsloth/Llama-3.2-1B-Instruct
  colab_notebook: training_colab.py

tags:
  - productivity
  - student
  - llm-hard
  - natural-language-rl
  - pomodoro
  - llm-agent
  - openenv
  - meta-hackathon-2026