Spaces:

KnightBlade
/

data-wrangler-env

Runtime error

App Files Files Community

KnightBlade commited on 17 days ago

Commit

9e89374

1 Parent(s): 3b1dc2a

Optimize env concurrency, dataframe ops, and inference loop

Browse files

Files changed (3) hide show

inference.py +12 -6
server/app.py +3 -1
server/data_wrangler_environment.py +44 -27

inference.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import os
 import sys
 import asyncio
 from openai import AsyncOpenAI
 # OpenEnv V5 specific client components
@@ -19,6 +21,7 @@ BENCHMARK = "data_wrangler"
 MAX_STEPS = 15
 MAX_TOTAL_REWARD = 1.0
 SUCCESS_SCORE_THRESHOLD = 0.5
 system_prompt = """\
 SYSTEM INSTRUCTIONS: ELITE DATA ENGINEER AGENT
@@ -63,7 +66,11 @@ Select Action: Which action type and parameters will execute this fix?
 async def get_model_message(client, step, obs_dict, last_reward, history, max_retries=3):
     obs_text = str(obs_dict)
-    prompt = f"Step {step}.\nObservation: {obs_text}\nLast Reward: {last_reward}\nHistory: {history}\nChoose your next action (JSON matching schema)."
     # Priority 3: Error Reflection. Pass previous feedback directly to LLM if there was an error.
     if "Error" in obs_dict.get("last_action_feedback", "") or "Exception" in obs_dict.get("last_action_feedback", ""):
@@ -77,13 +84,12 @@ async def get_model_message(client, step, obs_dict, last_reward, history, max_re
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": prompt}
                 ],
-                temperature=0.0
             )
             content = response.choices[0].message.content
-            import json
-            import re
-            match = re.search(r'(\{.*\})', content, re.DOTALL)
             if match:
                 return json.loads(match.group(1))
             else:

 import os
 import sys
 import asyncio
+import json
+import re
 from openai import AsyncOpenAI
 # OpenEnv V5 specific client components
 MAX_STEPS = 15
 MAX_TOTAL_REWARD = 1.0
 SUCCESS_SCORE_THRESHOLD = 0.5
+MAX_HISTORY_ITEMS = int(os.environ.get("MAX_HISTORY_ITEMS", "6"))
 system_prompt = """\
 SYSTEM INSTRUCTIONS: ELITE DATA ENGINEER AGENT
 async def get_model_message(client, step, obs_dict, last_reward, history, max_retries=3):
     obs_text = str(obs_dict)
+    trimmed_history = history[-MAX_HISTORY_ITEMS:] if history else []
+    prompt = (
+        f"Step {step}.\nObservation: {obs_text}\nLast Reward: {last_reward}\n"
+        f"History: {trimmed_history}\nChoose your next action (JSON matching schema)."
+    )
     # Priority 3: Error Reflection. Pass previous feedback directly to LLM if there was an error.
     if "Error" in obs_dict.get("last_action_feedback", "") or "Exception" in obs_dict.get("last_action_feedback", ""):
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": prompt}
                 ],
+                temperature=0.0,
+                max_tokens=220,
             )
             content = response.choices[0].message.content
+            match = re.search(r'(\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\})', content or "", re.DOTALL)
             if match:
                 return json.loads(match.group(1))
             else:

server/app.py CHANGED Viewed

@@ -28,6 +28,8 @@ Usage:
     python -m server.app
 """
 try:
     from openenv.core.env_server.http_server import create_app
 except Exception as e:  # pragma: no cover
@@ -52,7 +54,7 @@ app = create_app(
     DataWranglerAction,
     DataWranglerObservation,
     env_name="data_wrangler",
-    max_concurrent_envs=1,  # increase this number to allow more concurrent WebSocket sessions
 )

     python -m server.app
 """
+import os
 try:
     from openenv.core.env_server.http_server import create_app
 except Exception as e:  # pragma: no cover
     DataWranglerAction,
     DataWranglerObservation,
     env_name="data_wrangler",
+    max_concurrent_envs=int(os.getenv("MAX_CONCURRENT_ENVS", "4")),
 )

server/data_wrangler_environment.py CHANGED Viewed

@@ -109,10 +109,11 @@ class DataWranglerEnvironment(Environment):
     def _get_obs(self, feedback: str = "Environment initialized.", done: bool = False, reward: float = 0.0) -> DataWranglerObservation:
         stats = {}
         for col in self.df.columns:
             stats[col] = {
                 "dtype": str(self.df[col].dtype),
                 "missing_count": int(self.df[col].isna().sum()),
-                "sample_values": self.df[col].dropna().astype(str).tolist()[:3]
             }
         return DataWranglerObservation(
@@ -132,6 +133,12 @@ class DataWranglerEnvironment(Environment):
         self._initialize_task()
         return self._get_obs()
     def step(self, action: DataWranglerAction) -> DataWranglerObservation: # type: ignore
         self._state.step_count += 1
         feedback = "Action executed successfully."
@@ -141,7 +148,8 @@ class DataWranglerEnvironment(Environment):
         try:
             if action.action_type == "drop_column":
                 col = action.target_column
-                if col in self.df.columns:
                     self.df.drop(columns=[col], inplace=True)
                     if col not in self.target_df.columns:
                         reward = 0.2
@@ -149,72 +157,81 @@ class DataWranglerEnvironment(Environment):
                         reward = -0.5
                         feedback = f"Warning: dropped targeting column {col}"
                 else:
-                    feedback = f"Error: Column '{col}' not found."
             elif action.action_type == "rename_column":
                 col = action.target_column
                 new_col = action.new_name
-                if col in self.df.columns:
                     self.df.rename(columns={col: new_col}, inplace=True)
                     if new_col in self.target_df.columns:
                         reward = 0.2
                 else:
-                    feedback = f"Error: Column '{col}' not found."
             elif action.action_type == "fill_missing":
                 col = action.target_column
-                if col in self.df.columns:
-                    self.df[col].fillna(action.fill_value, inplace=True)
                     reward = 0.1
                 else:
-                    feedback = f"Error: Column '{col}' not found."
             elif action.action_type == "cast_type":
                 col = action.target_column
-                to_type = action.cast_to
-                if col in self.df.columns:
-                    if to_type == 'int':
-                        self.df = self.df.astype({col: int})
-                    elif to_type == 'float':
-                        self.df = self.df.astype({col: float})
-                    elif to_type == 'datetime':
-                        self.df[col] = pd.to_datetime(self.df[col])
-                    elif to_type == 'string':
-                        self.df = self.df.astype({col: str})
                     reward = 0.2
                 else:
-                    feedback = f"Error: Column '{col}' not found."
             elif action.action_type == "extract_regex":
                 col = action.target_column
                 new_col = action.new_name
                 pattern = action.regex_pattern
-                if col in self.df.columns:
                     # extract the first capture group
                     extracted = self.df[col].astype(str).str.extract(pattern)[0]
                     self.df[new_col] = extracted
                     reward = 0.1
                 else:
-                    feedback = f"Error: Column '{col}' not found."
             elif action.action_type == "datetime_parse":
                 col = action.target_column
                 fmt = action.format_string
-                if col in self.df.columns:
-                    self.df[col] = pd.to_datetime(self.df[col], format=fmt)
                     reward = 0.1
                 else:
-                    feedback = f"Error: Column '{col}' not found."
             elif action.action_type == "group_by_aggregate":
                 group_col = action.target_column
                 agg_col = action.agg_column
                 func = action.agg_func
-                if group_col in self.df.columns and agg_col in self.df.columns:
-                    self.df = self.df.groupby(group_col, as_index=False).agg({agg_col: func})
                     reward = 0.2
                 else:
-                    feedback = f"Error: Columns '{group_col}' or '{agg_col}' not found."
             elif action.action_type == "submit":
                 score = self._grade()

     def _get_obs(self, feedback: str = "Environment initialized.", done: bool = False, reward: float = 0.0) -> DataWranglerObservation:
         stats = {}
         for col in self.df.columns:
+            non_null = self.df[col].dropna()
             stats[col] = {
                 "dtype": str(self.df[col].dtype),
                 "missing_count": int(self.df[col].isna().sum()),
+                "sample_values": non_null.astype(str).head(3).tolist(),
             }
         return DataWranglerObservation(
         self._initialize_task()
         return self._get_obs()
+    def _require_columns(self, *columns: str) -> str | None:
+        missing = [col for col in columns if not col or col not in self.df.columns]
+        if missing:
+            return f"Error: Column(s) not found: {', '.join(missing)}"
+        return None
     def step(self, action: DataWranglerAction) -> DataWranglerObservation: # type: ignore
         self._state.step_count += 1
         feedback = "Action executed successfully."
         try:
             if action.action_type == "drop_column":
                 col = action.target_column
+                err = self._require_columns(col)
+                if not err:
                     self.df.drop(columns=[col], inplace=True)
                     if col not in self.target_df.columns:
                         reward = 0.2
                         reward = -0.5
                         feedback = f"Warning: dropped targeting column {col}"
                 else:
+                    feedback = err
             elif action.action_type == "rename_column":
                 col = action.target_column
                 new_col = action.new_name
+                err = self._require_columns(col)
+                if not err:
                     self.df.rename(columns={col: new_col}, inplace=True)
                     if new_col in self.target_df.columns:
                         reward = 0.2
                 else:
+                    feedback = err
             elif action.action_type == "fill_missing":
                 col = action.target_column
+                err = self._require_columns(col)
+                if not err:
+                    self.df[col] = self.df[col].fillna(action.fill_value)
                     reward = 0.1
                 else:
+                    feedback = err
             elif action.action_type == "cast_type":
                 col = action.target_column
+                to_type = (action.cast_to or "").lower()
+                err = self._require_columns(col)
+                if not err:
+                    if to_type == "int":
+                        self.df[col] = pd.to_numeric(self.df[col], errors="coerce").astype("Int64")
+                    elif to_type == "float":
+                        self.df[col] = pd.to_numeric(self.df[col], errors="coerce").astype(float)
+                    elif to_type == "datetime":
+                        self.df[col] = pd.to_datetime(self.df[col], errors="coerce")
+                    elif to_type == "string":
+                        self.df[col] = self.df[col].astype(str)
+                    else:
+                        feedback = f"Error: Unsupported cast type '{action.cast_to}'."
+                        return self._get_obs(feedback=feedback, done=done, reward=reward)
                     reward = 0.2
                 else:
+                    feedback = err
             elif action.action_type == "extract_regex":
                 col = action.target_column
                 new_col = action.new_name
                 pattern = action.regex_pattern
+                err = self._require_columns(col)
+                if not err and new_col and pattern:
                     # extract the first capture group
                     extracted = self.df[col].astype(str).str.extract(pattern)[0]
                     self.df[new_col] = extracted
                     reward = 0.1
                 else:
+                    feedback = err or "Error: 'new_name' and 'regex_pattern' are required."
             elif action.action_type == "datetime_parse":
                 col = action.target_column
                 fmt = action.format_string
+                err = self._require_columns(col)
+                if not err:
+                    self.df[col] = pd.to_datetime(self.df[col], format=fmt, errors="coerce")
                     reward = 0.1
                 else:
+                    feedback = err
             elif action.action_type == "group_by_aggregate":
                 group_col = action.target_column
                 agg_col = action.agg_column
                 func = action.agg_func
+                err = self._require_columns(group_col, agg_col)
+                if not err and func:
+                    self.df = self.df.groupby(group_col, as_index=False, observed=True).agg({agg_col: func})
                     reward = 0.2
                 else:
+                    feedback = err or "Error: 'agg_func' is required."
             elif action.action_type == "submit":
                 score = self._grade()