Spaces:
Running
Running
File size: 4,976 Bytes
b4ac377 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Test that PythonCodeActEnv.reset() properly resets executor state."""
import os
import sys
from pathlib import Path
import pytest
# Add the project root and src to the path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
# Skip entire module if smolagents is not installed (optional dependency)
pytest.importorskip("smolagents", reason="smolagents is not installed")
from envs.coding_env.models import CodeAction
from envs.coding_env.server.python_codeact_env import PythonCodeActEnv
def test_reset_clears_executor_state():
"""Test that reset() clears functions and variables defined in
previous execution."""
env = PythonCodeActEnv()
# Initial reset
obs = env.reset()
assert obs.exit_code == 0
assert env.state.step_count == 0
# Define a function in the executor
action1 = CodeAction(code="def my_function():\n return 'Hello from function'\n")
obs1 = env.step(action1)
assert obs1.exit_code == 0
# Call the function to verify it exists
action2 = CodeAction(code="result = my_function()\nprint(result)")
obs2 = env.step(action2)
assert obs2.exit_code == 0
assert "Hello from function" in obs2.stdout
# Reset the environment
obs_reset = env.reset()
assert obs_reset.exit_code == 0
assert env.state.step_count == 0
# Try to call the function again - should fail because executor was reset
action3 = CodeAction(code="result = my_function()\nprint(result)")
obs3 = env.step(action3)
# Should get an error because my_function is no longer defined
assert obs3.exit_code == 1 # Error exit code
assert "my_function" in obs3.stderr or "NameError" in obs3.stderr
def test_reset_clears_variables():
"""Test that reset() clears variables defined in previous execution."""
env = PythonCodeActEnv()
# Initial reset
env.reset()
# Define a variable
action1 = CodeAction(code="my_variable = 42\n")
obs1 = env.step(action1)
assert obs1.exit_code == 0
# Use the variable to verify it exists
action2 = CodeAction(code="print(my_variable)")
obs2 = env.step(action2)
assert obs2.exit_code == 0
assert "42" in obs2.stdout
# Reset the environment
env.reset()
# Try to use the variable again - should fail
action3 = CodeAction(code="print(my_variable)")
obs3 = env.step(action3)
# Should get an error because my_variable is no longer defined
assert obs3.exit_code == 1
assert "my_variable" in obs3.stderr or "NameError" in obs3.stderr
def test_reset_clears_imports():
"""Test that reset() clears imported modules from previous execution."""
env = PythonCodeActEnv()
# Initial reset
env.reset()
# Import a module and define an alias
action1 = CodeAction(code="import math as m\n")
obs1 = env.step(action1)
assert obs1.exit_code == 0
# Use the alias to verify it exists
action2 = CodeAction(code="print(m.pi)")
obs2 = env.step(action2)
assert obs2.exit_code == 0
assert "3.14" in obs2.stdout
# Reset the environment
env.reset()
# Try to use the alias again - should fail
action3 = CodeAction(code="print(m.pi)")
obs3 = env.step(action3)
# Should get an error because 'm' is no longer defined
assert obs3.exit_code == 1
assert (
"NameError" in obs3.stderr
or "'m'" in obs3.stderr
or "variable `m` is not defined" in obs3.stderr
)
def test_reset_preserves_step_count_reset():
"""Test that reset() properly resets step count."""
env = PythonCodeActEnv()
# Initial reset
env.reset()
assert env.state.step_count == 0
# Execute some steps
for i in range(5):
action = CodeAction(code=f"print({i})")
env.step(action)
assert env.state.step_count == 5
# Reset should reset step count
env.reset()
assert env.state.step_count == 0
# Execute another step
action = CodeAction(code="print('test')")
env.step(action)
assert env.state.step_count == 1
def test_reset_changes_episode_id():
"""Test that reset() generates a new episode ID."""
env = PythonCodeActEnv()
# Initial reset
env.reset()
episode_id_1 = env.state.episode_id
# Execute some steps
action = CodeAction(code="print('test')")
env.step(action)
# Reset and get new episode ID
env.reset()
episode_id_2 = env.state.episode_id
# Episode IDs should be different
assert episode_id_1 != episode_id_2
|