focusflow_env / openenv.yaml
hannan2859r's picture
Update openenv.yaml
fcd6ec6 verified
name: focusflow-env
version: "2.0.0"
description: >
LLM-hard RL environment for student focus and distraction management.
Agent must handle natural language distraction events, manage cognitive load,
track multi-day deadlines, and justify every decision with graded reasoning.
author: Abdul Hannan
theme: "Theme 3.2 - Personalized Tasks"
hackathon: "Meta x Scaler OpenEnv Hackathon 2026"
license: MIT
environment:
base_url: https://YOUR-HF-SPACE-NAME.hf.space
framework: openenv
language: python
python_version: "3.11"
# OpenEnv HTTP API endpoints
api:
reset:
method: POST
path: /reset
params:
- name: task_id
type: string
default: task_1
description: Which task to load (task_1, task_2, task_3)
- name: seed
type: integer
default: 42
- name: session_id
type: string
default: default
description: Unique ID for multi-agent parallel training
step:
method: POST
path: /step
params:
- name: session_id
type: string
default: default
body: FocusAction
state:
method: GET
path: /state
params:
- name: session_id
type: string
default: default
health:
method: GET
path: /health
tasks:
method: GET
path: /tasks
metrics:
method: GET
path: /metrics
# Tasks
tasks:
- id: task_1
description: Single focused session. Complete one 25-min Pomodoro with zero app checks and handle NL events correctly.
max_steps: 60
days: 1
- id: task_2
description: Multi-session day. Manage cognitive load and defer low-urgency events across 2 sessions.
max_steps: 120
days: 1
- id: task_3
description: Week planner. Plan a 3-day schedule, handle shifting deadlines, and maintain energy levels.
max_steps: 240
days: 3
# Action space
actions:
- focus
- block_app
- take_break
- defer_event
- respond_to_event
- plan_day
- adjust_energy
- check_app
- quit_session
# Observation fields
observation:
- time_remaining_seconds
- current_phase
- sessions_completed
- focus_score
- active_distractions
- blocked_apps
- pending_event
- day_context
- cognitive_load
- deadline_pressure
- last_action_feedback
- reasoning_quality_score
# Reward range
reward:
min: -0.60
max: 0.60
shaped: true
reasoning_graded: true
# Training
training:
frameworks: [trl, unsloth]
algorithm: GRPO
model: unsloth/Llama-3.2-1B-Instruct
colab_notebook: training_colab.py
tags:
- productivity
- student
- llm-hard
- natural-language-rl
- pomodoro
- llm-agent
- openenv
- meta-hackathon-2026