File size: 4,976 Bytes
b4ac377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Test that PythonCodeActEnv.reset() properly resets executor state."""

import os
import sys
from pathlib import Path

import pytest


# Add the project root and src to the path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))

# Skip entire module if smolagents is not installed (optional dependency)
pytest.importorskip("smolagents", reason="smolagents is not installed")

from envs.coding_env.models import CodeAction
from envs.coding_env.server.python_codeact_env import PythonCodeActEnv


def test_reset_clears_executor_state():
    """Test that reset() clears functions and variables defined in

    previous execution."""
    env = PythonCodeActEnv()

    # Initial reset
    obs = env.reset()
    assert obs.exit_code == 0
    assert env.state.step_count == 0

    # Define a function in the executor
    action1 = CodeAction(code="def my_function():\n    return 'Hello from function'\n")
    obs1 = env.step(action1)
    assert obs1.exit_code == 0

    # Call the function to verify it exists
    action2 = CodeAction(code="result = my_function()\nprint(result)")
    obs2 = env.step(action2)
    assert obs2.exit_code == 0
    assert "Hello from function" in obs2.stdout

    # Reset the environment
    obs_reset = env.reset()
    assert obs_reset.exit_code == 0
    assert env.state.step_count == 0

    # Try to call the function again - should fail because executor was reset
    action3 = CodeAction(code="result = my_function()\nprint(result)")
    obs3 = env.step(action3)

    # Should get an error because my_function is no longer defined
    assert obs3.exit_code == 1  # Error exit code
    assert "my_function" in obs3.stderr or "NameError" in obs3.stderr


def test_reset_clears_variables():
    """Test that reset() clears variables defined in previous execution."""
    env = PythonCodeActEnv()

    # Initial reset
    env.reset()

    # Define a variable
    action1 = CodeAction(code="my_variable = 42\n")
    obs1 = env.step(action1)
    assert obs1.exit_code == 0

    # Use the variable to verify it exists
    action2 = CodeAction(code="print(my_variable)")
    obs2 = env.step(action2)
    assert obs2.exit_code == 0
    assert "42" in obs2.stdout

    # Reset the environment
    env.reset()

    # Try to use the variable again - should fail
    action3 = CodeAction(code="print(my_variable)")
    obs3 = env.step(action3)

    # Should get an error because my_variable is no longer defined
    assert obs3.exit_code == 1
    assert "my_variable" in obs3.stderr or "NameError" in obs3.stderr


def test_reset_clears_imports():
    """Test that reset() clears imported modules from previous execution."""
    env = PythonCodeActEnv()

    # Initial reset
    env.reset()

    # Import a module and define an alias
    action1 = CodeAction(code="import math as m\n")
    obs1 = env.step(action1)
    assert obs1.exit_code == 0

    # Use the alias to verify it exists
    action2 = CodeAction(code="print(m.pi)")
    obs2 = env.step(action2)
    assert obs2.exit_code == 0
    assert "3.14" in obs2.stdout

    # Reset the environment
    env.reset()

    # Try to use the alias again - should fail
    action3 = CodeAction(code="print(m.pi)")
    obs3 = env.step(action3)

    # Should get an error because 'm' is no longer defined
    assert obs3.exit_code == 1
    assert (
        "NameError" in obs3.stderr
        or "'m'" in obs3.stderr
        or "variable `m` is not defined" in obs3.stderr
    )


def test_reset_preserves_step_count_reset():
    """Test that reset() properly resets step count."""
    env = PythonCodeActEnv()

    # Initial reset
    env.reset()
    assert env.state.step_count == 0

    # Execute some steps
    for i in range(5):
        action = CodeAction(code=f"print({i})")
        env.step(action)

    assert env.state.step_count == 5

    # Reset should reset step count
    env.reset()
    assert env.state.step_count == 0

    # Execute another step
    action = CodeAction(code="print('test')")
    env.step(action)
    assert env.state.step_count == 1


def test_reset_changes_episode_id():
    """Test that reset() generates a new episode ID."""
    env = PythonCodeActEnv()

    # Initial reset
    env.reset()
    episode_id_1 = env.state.episode_id

    # Execute some steps
    action = CodeAction(code="print('test')")
    env.step(action)

    # Reset and get new episode ID
    env.reset()
    episode_id_2 = env.state.episode_id

    # Episode IDs should be different
    assert episode_id_1 != episode_id_2