# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Data models for the Self-Healing DevOps Sandbox Environment. Defines the Action and Observation types used by the RL agent to interact with a broken Node.js backend. The agent acts as a DevOps engineer, diagnosing and fixing production-like bugs using bash commands. """ from typing import Any, Dict, List, Optional from pydantic import Field from openenv.core.env_server.types import Action, Observation class BashAction(Action): """Action: a bash command to execute inside the sandbox. The agent sends shell commands (ls, cat, sed, grep, node, npm, etc.) to diagnose and repair the broken Node.js application. """ command: str = Field( ..., description=( "The bash command to execute in the sandbox terminal " "(e.g., 'ls -la', 'cat server.js', " "'sed -i s/old/new/ file.js')." ), ) class TerminalObservation(Observation): """Observation returned after executing a bash command. Includes stdout/stderr from the command, working directory context, the current task identifier, grader's partial score, and episode metadata. """ stdout: str = Field( default="", description="Standard output from the executed command.", ) stderr: str = Field( default="", description="Standard error from the executed command, if any.", ) current_dir: str = Field( default="/app", description="The current working directory inside the container.", ) task_id: str = Field( default="devops_sandbox", description="Identifier for the current task scenario (easy/medium/hard).", ) grader_score: float = Field( default=0.01, ge=0.0, le=1.0, description="The grader's partial reward strictly within (0, 1).", ) grader_feedback: str = Field( default="", description="Human-readable feedback from the grader.", ) done: bool = Field( default=False, description="Whether the episode is complete (all bugs fixed or max steps reached).", ) reward: Optional[float] = Field( default=None, description="Incremental reward for this step (score delta).", ) metadata: Dict[str, Any] = Field( default_factory=dict, description="Additional metadata: files_modified, commands_count, bugs_found, etc.", ) __all__ = ["BashAction", "TerminalObservation"]