Spaces:

kush5699
/

data-validation-env

Sleeping

App Files Files Community

kush5699 commited on 11 days ago

Commit

42757ca

verified ·

1 Parent(s): 0e5c7a8

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

.gitignore +12 -0
Dockerfile +1 -9
env/__init__.py +0 -1
env/environment.py +19 -55
env/models.py +21 -42
env/tasks.py +43 -104
inference.py +1 -40
server.py +7 -22

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+__pycache__/
+*.pyc
+*.pyo
+.env
+*.egg-info/
+dist/
+build/
+.vscode/
+.idea/
+test_space.py
+test_all_tasks.py
+*.pdf

Dockerfile CHANGED Viewed

@@ -2,24 +2,16 @@ FROM python:3.11-slim
 WORKDIR /app
-# Install system dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl \
-    && rm -rf /var/lib/apt/lists/*
-# Copy requirements first for caching
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy application code
 COPY . .
-# Expose port
 EXPOSE 8000
-# Health check
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
     CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
-# Run the server
 CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]

 WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 EXPOSE 8000
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
     CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
 CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]

env/__init__.py CHANGED Viewed

	@@ -1,2 +1 @@
1	- """Data Validation Pipeline - OpenEnv Environment."""
2	from env.models import DataCleanAction, DataCleanObservation, DataCleanState



1	from env.models import DataCleanAction, DataCleanObservation, DataCleanState

env/environment.py CHANGED Viewed

@@ -1,5 +1,3 @@
-"""Core Environment implementation for the Data Validation Pipeline."""
 import uuid
 from typing import Any, Dict, List, Optional
@@ -8,44 +6,25 @@ from env.tasks import generate_task, get_task_names, grade_action
 class DataValidationEnvironment:
-    """
-    Data Validation Pipeline Environment.
-    An RL environment where the agent must clean and validate structured datasets
-    by identifying and fixing errors (missing values, type mismatches, format violations,
-    range errors, and duplicates).
-    Follows OpenEnv Environment interface: reset(), step(), state().
-    """
     def __init__(self):
         self._state = DataCleanState()
         self._ground_truth: List[Dict[str, Any]] = []
         self._errors: List[Dict[str, Any]] = []
         self._task_info: Dict[str, Any] = {}
         self._field_names: List[str] = []
     def reset(self, task_name: Optional[str] = None, seed: int = 42, **kwargs) -> DataCleanObservation:
-        """
-        Reset the environment with a new task.
-        Args:
-            task_name: Task to load ('easy_missing_values', 'medium_mixed_errors', 'hard_multi_constraint')
-            seed: Random seed for reproducibility
-        Returns:
-            Initial observation
-        """
         if task_name is None:
             task_name = "easy_missing_values"
         task = generate_task(task_name, seed)
         self._ground_truth = task["ground_truth"]
         self._errors = task["errors"]
         self._task_info = task
         self._field_names = task["field_names"]
         self._state = DataCleanState(
             episode_id=str(uuid.uuid4()),
             task_name=task_name,
@@ -61,7 +40,7 @@ class DataValidationEnvironment:
             total_errors=len(self._errors),
             last_actions=[],
         )
         return DataCleanObservation(
             task_name=task_name,
             task_description=task["description"],
@@ -80,27 +59,17 @@ class DataValidationEnvironment:
             progress_pct=0.0,
             field_names=self._field_names,
         )
     def step(self, action: DataCleanAction) -> DataCleanObservation:
-        """
-        Execute an action to fix a data error.
-        Args:
-            action: The action to take
-        Returns:
-            Updated observation with reward
-        """
         if self._state.done:
             return self._make_observation(0.0, "Episode already done. Call reset().")
         self._state.step_count += 1
-        # Check for repeated identical action
         action_key = f"{action.action_type}:{action.target_field}:{action.target_row}:{action.new_value}"
         is_repeat = action_key in self._state.last_actions
         self._state.last_actions.append(action_key)
         if is_repeat:
             reward = -0.1
             message = "Penalty: repeated identical action"
@@ -116,39 +85,34 @@ class DataValidationEnvironment:
             )
             if fixed:
                 self._state.errors_fixed += 1
         self._state.cumulative_reward += reward
         self._state.reward_history.append(reward)
-        # Check termination conditions
         errors_remaining = sum(1 for e in self._errors if not e.get("fixed", False))
         if errors_remaining == 0:
             self._state.done = True
             message += " | All errors fixed! Episode complete."
         elif self._state.step_count >= self._state.max_steps:
             self._state.done = True
             message += f" | Max steps reached. {errors_remaining} errors remaining."
         return self._make_observation(reward, message)
     def state(self) -> DataCleanState:
-        """Return the current environment state."""
         return self._state
     def get_task_names(self) -> List[str]:
-        """Return available task names."""
         return get_task_names()
     def _make_observation(self, reward: float, message: str) -> DataCleanObservation:
-        """Create an observation from current state."""
         errors_remaining = sum(1 for e in self._errors if not e.get("fixed", False))
         total = self._state.total_errors if self._state.total_errors > 0 else 1
         progress = (self._state.errors_fixed / total) * 100
-        # Only show unfixed errors
         unfixed_errors = [e for e in self._errors if not e.get("fixed", False)]
         return DataCleanObservation(
             task_name=self._state.task_name,
             task_description=self._task_info.get("description", ""),

 import uuid
 from typing import Any, Dict, List, Optional
 class DataValidationEnvironment:
     def __init__(self):
         self._state = DataCleanState()
         self._ground_truth: List[Dict[str, Any]] = []
         self._errors: List[Dict[str, Any]] = []
         self._task_info: Dict[str, Any] = {}
         self._field_names: List[str] = []
     def reset(self, task_name: Optional[str] = None, seed: int = 42, **kwargs) -> DataCleanObservation:
         if task_name is None:
             task_name = "easy_missing_values"
         task = generate_task(task_name, seed)
         self._ground_truth = task["ground_truth"]
         self._errors = task["errors"]
         self._task_info = task
         self._field_names = task["field_names"]
         self._state = DataCleanState(
             episode_id=str(uuid.uuid4()),
             task_name=task_name,
             total_errors=len(self._errors),
             last_actions=[],
         )
         return DataCleanObservation(
             task_name=task_name,
             task_description=task["description"],
             progress_pct=0.0,
             field_names=self._field_names,
         )
     def step(self, action: DataCleanAction) -> DataCleanObservation:
         if self._state.done:
             return self._make_observation(0.0, "Episode already done. Call reset().")
         self._state.step_count += 1
         action_key = f"{action.action_type}:{action.target_field}:{action.target_row}:{action.new_value}"
         is_repeat = action_key in self._state.last_actions
         self._state.last_actions.append(action_key)
         if is_repeat:
             reward = -0.1
             message = "Penalty: repeated identical action"
             )
             if fixed:
                 self._state.errors_fixed += 1
         self._state.cumulative_reward += reward
         self._state.reward_history.append(reward)
         errors_remaining = sum(1 for e in self._errors if not e.get("fixed", False))
         if errors_remaining == 0:
             self._state.done = True
             message += " | All errors fixed! Episode complete."
         elif self._state.step_count >= self._state.max_steps:
             self._state.done = True
             message += f" | Max steps reached. {errors_remaining} errors remaining."
         return self._make_observation(reward, message)
     def state(self) -> DataCleanState:
         return self._state
     def get_task_names(self) -> List[str]:
         return get_task_names()
     def _make_observation(self, reward: float, message: str) -> DataCleanObservation:
         errors_remaining = sum(1 for e in self._errors if not e.get("fixed", False))
         total = self._state.total_errors if self._state.total_errors > 0 else 1
         progress = (self._state.errors_fixed / total) * 100
         unfixed_errors = [e for e in self._errors if not e.get("fixed", False)]
         return DataCleanObservation(
             task_name=self._state.task_name,
             task_description=self._task_info.get("description", ""),

env/models.py CHANGED Viewed

@@ -1,61 +1,40 @@
-"""Pydantic models for the Data Validation Pipeline environment."""
 from typing import Any, Dict, List, Optional
 from pydantic import BaseModel, Field
 class DataCleanAction(BaseModel):
-    """Action that the agent can take to clean/fix data."""
-    action_type: str = Field(
-        ...,
-        description="Type of action: fix_missing, fix_type, fix_range, fix_format, fix_duplicate, validate, skip"
-    )
-    target_field: str = Field(
-        default="",
-        description="The field/column name to apply the action to"
-    )
-    target_row: int = Field(
-        default=0,
-        description="The row index to apply the action to"
-    )
-    new_value: str = Field(
-        default="",
-        description="The new/corrected value to set"
-    )
 class DataCleanObservation(BaseModel):
-    """Observation returned by the environment."""
-    task_name: str = Field(default="", description="Name of the current task")
-    task_description: str = Field(default="", description="Description of what to do")
-    dataset: List[Dict[str, Any]] = Field(default_factory=list, description="Current state of the dataset")
-    errors_found: List[Dict[str, Any]] = Field(
-        default_factory=list,
-        description="List of errors detected in the dataset"
-    )
-    errors_remaining: int = Field(default=0, description="Number of errors left to fix")
-    errors_total: int = Field(default=0, description="Total errors at start")
-    errors_fixed: int = Field(default=0, description="Number of errors successfully fixed")
-    step_count: int = Field(default=0, description="Current step number")
-    max_steps: int = Field(default=20, description="Max steps allowed")
-    reward: float = Field(default=0.0, description="Reward from last action")
-    cumulative_reward: float = Field(default=0.0, description="Total reward accumulated")
-    done: bool = Field(default=False, description="Whether episode is finished")
-    last_action_result: str = Field(default="", description="Result of the last action")
-    task_hint: str = Field(default="", description="Hint for solving the task")
     available_actions: List[str] = Field(
         default_factory=lambda: [
             "fix_missing", "fix_type", "fix_range", "fix_format",
             "fix_duplicate", "validate", "skip"
-        ],
-        description="Available action types"
     )
-    progress_pct: float = Field(default=0.0, description="Progress percentage (0-100)")
-    field_names: List[str] = Field(default_factory=list, description="Column names in the dataset")
 class DataCleanState(BaseModel):
-    """Full internal state of the environment."""
     episode_id: str = Field(default="")
     task_name: str = Field(default="")
     step_count: int = Field(default=0)

 from typing import Any, Dict, List, Optional
 from pydantic import BaseModel, Field
 class DataCleanAction(BaseModel):
+    action_type: str = Field(...)
+    target_field: str = Field(default="")
+    target_row: int = Field(default=0)
+    new_value: str = Field(default="")
 class DataCleanObservation(BaseModel):
+    task_name: str = Field(default="")
+    task_description: str = Field(default="")
+    dataset: List[Dict[str, Any]] = Field(default_factory=list)
+    errors_found: List[Dict[str, Any]] = Field(default_factory=list)
+    errors_remaining: int = Field(default=0)
+    errors_total: int = Field(default=0)
+    errors_fixed: int = Field(default=0)
+    step_count: int = Field(default=0)
+    max_steps: int = Field(default=20)
+    reward: float = Field(default=0.0)
+    cumulative_reward: float = Field(default=0.0)
+    done: bool = Field(default=False)
+    last_action_result: str = Field(default="")
+    task_hint: str = Field(default="")
     available_actions: List[str] = Field(
         default_factory=lambda: [
             "fix_missing", "fix_type", "fix_range", "fix_format",
             "fix_duplicate", "validate", "skip"
+        ]
     )
+    progress_pct: float = Field(default=0.0)
+    field_names: List[str] = Field(default_factory=list)
 class DataCleanState(BaseModel):
     episode_id: str = Field(default="")
     task_name: str = Field(default="")
     step_count: int = Field(default=0)

env/tasks.py CHANGED Viewed

@@ -1,24 +1,11 @@
-"""Task registry and graders for the Data Validation Pipeline environment.
-Each task provides:
-- A dirty dataset with injected errors
-- A ground truth clean dataset
-- A grader that scores partial progress
-"""
 import copy
 import random
 from typing import Any, Dict, List, Tuple
-# ──────────────────────────────────────────────────────────────────────
-# TASK 1 (Easy): Fix Missing Values — solvable in ≤5 steps
-# ──────────────────────────────────────────────────────────────────────
 def _generate_task_easy(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[Dict]]:
-    """Generate a small employee dataset with missing values only."""
     rng = random.Random(seed)
     ground_truth = [
         {"id": 1, "name": "Alice Johnson", "email": "alice@example.com", "age": 30, "department": "Engineering"},
         {"id": 2, "name": "Bob Smith", "email": "bob@example.com", "age": 25, "department": "Marketing"},
@@ -26,17 +13,16 @@ def _generate_task_easy(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[Di
         {"id": 4, "name": "David Brown", "email": "david@example.com", "age": 28, "department": "Sales"},
         {"id": 5, "name": "Eve Davis", "email": "eve@example.com", "age": 32, "department": "Marketing"},
     ]
     dirty = copy.deepcopy(ground_truth)
     errors = []
-    # Inject 3 missing value errors
     missing_configs = [
         (1, "email", ""),
         (2, "department", ""),
         (4, "name", ""),
     ]
     for row_idx, field, replacement in missing_configs:
         dirty[row_idx][field] = replacement
         errors.append({
@@ -46,18 +32,13 @@ def _generate_task_easy(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[Di
             "current_value": replacement,
             "description": f"Row {row_idx}: '{field}' is missing/empty"
         })
-    return dirty, ground_truth, errors
-# ──────────────────────────────────────────────────────────────────────
-# TASK 2 (Medium): Fix Types & Formats — requires 2-3 stage reasoning
-# ──────────────────────────────────────────────────────────────────────
 def _generate_task_medium(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[Dict]]:
-    """Generate a product dataset with type, format, and missing errors."""
     rng = random.Random(seed)
     ground_truth = [
         {"id": 1, "product": "Laptop Pro", "price": 999.99, "quantity": 50, "sku": "LP-001", "category": "Electronics"},
         {"id": 2, "product": "Wireless Mouse", "price": 29.99, "quantity": 200, "sku": "WM-002", "category": "Accessories"},
@@ -67,11 +48,10 @@ def _generate_task_medium(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[
         {"id": 6, "product": "Headphones", "price": 149.99, "quantity": 80, "sku": "HP-006", "category": "Audio"},
         {"id": 7, "product": "Webcam HD", "price": 59.99, "quantity": 120, "sku": "WC-007", "category": "Electronics"},
     ]
     dirty = copy.deepcopy(ground_truth)
     errors = []
-    # Error 1: price stored as string
     dirty[0]["price"] = "999.99"
     errors.append({
         "error_type": "type",
@@ -81,8 +61,7 @@ def _generate_task_medium(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[
         "expected_type": "float",
         "description": "Row 0: 'price' should be float, got string '999.99'"
     })
-    # Error 2: quantity stored as string
     dirty[2]["quantity"] = "five hundred"
     errors.append({
         "error_type": "type",
@@ -92,8 +71,7 @@ def _generate_task_medium(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[
         "expected_type": "int",
         "description": "Row 2: 'quantity' should be int, got string 'five hundred'"
     })
-    # Error 3: SKU wrong format
     dirty[3]["sku"] = "mn004"
     errors.append({
         "error_type": "format",
@@ -103,8 +81,7 @@ def _generate_task_medium(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[
         "expected_format": "XX-NNN",
         "description": "Row 3: 'sku' should match format 'XX-NNN', got 'mn004'"
     })
-    # Error 4: missing category
     dirty[5]["category"] = ""
     errors.append({
         "error_type": "missing",
@@ -113,8 +90,7 @@ def _generate_task_medium(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[
         "current_value": "",
         "description": "Row 5: 'category' is missing/empty"
     })
-    # Error 5: negative price (range error)
     dirty[4]["price"] = -79.99
     errors.append({
         "error_type": "range",
@@ -123,8 +99,7 @@ def _generate_task_medium(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[
         "current_value": -79.99,
         "description": "Row 4: 'price' is negative (-79.99), should be positive"
     })
-    # Error 6: duplicate SKU
     dirty[6]["sku"] = "WM-002"
     errors.append({
         "error_type": "duplicate",
@@ -133,18 +108,13 @@ def _generate_task_medium(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[
         "current_value": "WM-002",
         "description": "Row 6: 'sku' value 'WM-002' duplicates row 1"
     })
-    return dirty, ground_truth, errors
-# ──────────────────────────────────────────────────────────────────────
-# TASK 3 (Hard): Multi-constraint Optimization — requires planning
-# ──────────────────────────────────────────────────────────────────────
 def _generate_task_hard(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[Dict]]:
-    """Generate a complex customer orders dataset with multiple interrelated errors."""
     rng = random.Random(seed)
     ground_truth = [
         {"id": 1, "customer": "Acme Corp", "email": "orders@acme.com", "amount": 1500.00, "currency": "USD", "status": "completed", "date": "2024-03-15", "region": "North America", "priority": "high"},
         {"id": 2, "customer": "GlobalTech", "email": "sales@globaltech.io", "amount": 2300.50, "currency": "EUR", "status": "pending", "date": "2024-03-16", "region": "Europe", "priority": "medium"},
@@ -157,57 +127,43 @@ def _generate_task_hard(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[Di
         {"id": 9, "customer": "EcoSmart", "email": "green@ecosmart.co", "amount": 1200.00, "currency": "AUD", "status": "shipped", "date": "2024-03-23", "region": "Asia Pacific", "priority": "medium"},
         {"id": 10, "customer": "BlueOcean", "email": "info@blueocean.net", "amount": 980.75, "currency": "USD", "status": "pending", "date": "2024-03-24", "region": "North America", "priority": "low"},
     ]
     dirty = copy.deepcopy(ground_truth)
     errors = []
-    # Error 1: missing email
     dirty[0]["email"] = ""
     errors.append({"error_type": "missing", "row": 0, "field": "email", "current_value": "", "description": "Row 0: 'email' is missing"})
-    # Error 2: negative amount
     dirty[1]["amount"] = -2300.50
     errors.append({"error_type": "range", "row": 1, "field": "amount", "current_value": -2300.50, "description": "Row 1: 'amount' is negative"})
-    # Error 3: invalid date format
     dirty[2]["date"] = "03/17/2024"
     errors.append({"error_type": "format", "row": 2, "field": "date", "current_value": "03/17/2024", "expected_format": "YYYY-MM-DD", "description": "Row 2: 'date' wrong format, expected YYYY-MM-DD"})
-    # Error 4: amount as string
     dirty[3]["amount"] = "4200"
     errors.append({"error_type": "type", "row": 3, "field": "amount", "current_value": "4200", "expected_type": "float", "description": "Row 3: 'amount' should be float, got string"})
-    # Error 5: invalid status
     dirty[4]["status"] = "in-progress"
     errors.append({"error_type": "format", "row": 4, "field": "status", "current_value": "in-progress", "expected_format": "one of: completed, pending, shipped, cancelled", "description": "Row 4: 'status' invalid value 'in-progress'"})
-    # Error 6: missing region
     dirty[5]["region"] = ""
     errors.append({"error_type": "missing", "row": 5, "field": "region", "current_value": "", "description": "Row 5: 'region' is missing"})
-    # Error 7: duplicate customer
     dirty[6]["customer"] = "Acme Corp"
     dirty[6]["email"] = "orders@acme.com"
     errors.append({"error_type": "duplicate", "row": 6, "field": "customer", "current_value": "Acme Corp", "description": "Row 6: 'customer' duplicates row 0"})
-    # Error 8: amount out of range (too high)
     dirty[7]["amount"] = 99999.99
     errors.append({"error_type": "range", "row": 7, "field": "amount", "current_value": 99999.99, "description": "Row 7: 'amount' exceeds maximum threshold (should be 6750.00)"})
-    # Error 9: invalid currency
     dirty[8]["currency"] = "AUSD"
     errors.append({"error_type": "format", "row": 8, "field": "currency", "current_value": "AUSD", "expected_format": "3-letter ISO code", "description": "Row 8: 'currency' invalid code 'AUSD'"})
-    # Error 10: missing priority + wrong type
     dirty[9]["priority"] = ""
     errors.append({"error_type": "missing", "row": 9, "field": "priority", "current_value": "", "description": "Row 9: 'priority' is missing"})
-    return dirty, ground_truth, errors
-# ──────────────────────────────────────────────────────────────────────
-# Task Registry
-# ──────────────────────────────────────────────────────────────────────
 TASK_REGISTRY = {
     "easy_missing_values": {
@@ -238,18 +194,16 @@ TASK_REGISTRY = {
 def get_task_names() -> List[str]:
-    """Return all registered task names."""
     return list(TASK_REGISTRY.keys())
 def generate_task(task_name: str, seed: int = 42) -> Dict[str, Any]:
-    """Generate a task by name."""
     if task_name not in TASK_REGISTRY:
         raise ValueError(f"Unknown task: {task_name}. Available: {get_task_names()}")
     task_info = TASK_REGISTRY[task_name]
     dirty, ground_truth, errors = task_info["generator"](seed)
     return {
         "name": task_info["name"],
         "description": task_info["description"],
@@ -263,29 +217,18 @@ def generate_task(task_name: str, seed: int = 42) -> Dict[str, Any]:
     }
-def grade_action(action_type: str, target_field: str, target_row: int,
-                 new_value: str, dirty_dataset: List[Dict],
                  ground_truth: List[Dict], errors: List[Dict]) -> Tuple[float, str, bool]:
-    """
-    Grade a single action. Returns (reward, message, error_fixed).
-    Reward strategy:
-    - Correct fix: +1.0 / total_errors (proportional)
-    - Wrong fix: -0.05
-    - Skip: 0.0
-    - Validate (check progress): 0.0
-    - Repeated identical action: -0.1
-    """
     total_errors = len(errors) if errors else 1
     if action_type == "validate":
         fixed = sum(1 for e in errors if e.get("fixed", False))
         return 0.0, f"Validation: {fixed}/{total_errors} errors fixed ({fixed/total_errors*100:.0f}%)", False
     if action_type == "skip":
         return 0.0, "Skipped current action", False
-    # Find matching error
     matching_error = None
     for e in errors:
         if e.get("fixed", False):
@@ -293,11 +236,10 @@ def grade_action(action_type: str, target_field: str, target_row: int,
         if e["row"] == target_row and e["field"] == target_field:
             matching_error = e
             break
     if matching_error is None:
         return -0.05, f"No unfixed error at row {target_row}, field '{target_field}'", False
-    # Check if the action type matches the error type
     action_to_error_map = {
         "fix_missing": "missing",
         "fix_type": "type",
@@ -305,15 +247,13 @@ def grade_action(action_type: str, target_field: str, target_row: int,
         "fix_format": "format",
         "fix_duplicate": "duplicate",
     }
     expected_error_type = action_to_error_map.get(action_type, "")
     if expected_error_type != matching_error["error_type"]:
         return -0.05, f"Wrong action type '{action_type}' for error type '{matching_error['error_type']}'", False
-    # Check the new value against ground truth
     gt_value = ground_truth[target_row][target_field]
-    # Flexible value comparison
     is_correct = False
     try:
         if isinstance(gt_value, float):
@@ -324,18 +264,17 @@ def grade_action(action_type: str, target_field: str, target_row: int,
             is_correct = str(new_value).strip() == str(gt_value).strip()
     except (ValueError, TypeError):
         is_correct = str(new_value).strip() == str(gt_value).strip()
     if is_correct:
         matching_error["fixed"] = True
-        # Update the dirty dataset
         if isinstance(gt_value, float):
             dirty_dataset[target_row][target_field] = float(new_value)
         elif isinstance(gt_value, int):
             dirty_dataset[target_row][target_field] = int(float(new_value))
         else:
             dirty_dataset[target_row][target_field] = new_value
         reward = 1.0 / total_errors
-        return reward, f"✓ Fixed: row {target_row}, field '{target_field}' → '{new_value}'", True
     else:
-        return -0.05, f"✗ Wrong value for row {target_row}, field '{target_field}'. Got '{new_value}', expected something else.", False

 import copy
 import random
 from typing import Any, Dict, List, Tuple
 def _generate_task_easy(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[Dict]]:
     rng = random.Random(seed)
     ground_truth = [
         {"id": 1, "name": "Alice Johnson", "email": "alice@example.com", "age": 30, "department": "Engineering"},
         {"id": 2, "name": "Bob Smith", "email": "bob@example.com", "age": 25, "department": "Marketing"},
         {"id": 4, "name": "David Brown", "email": "david@example.com", "age": 28, "department": "Sales"},
         {"id": 5, "name": "Eve Davis", "email": "eve@example.com", "age": 32, "department": "Marketing"},
     ]
     dirty = copy.deepcopy(ground_truth)
     errors = []
     missing_configs = [
         (1, "email", ""),
         (2, "department", ""),
         (4, "name", ""),
     ]
     for row_idx, field, replacement in missing_configs:
         dirty[row_idx][field] = replacement
         errors.append({
             "current_value": replacement,
             "description": f"Row {row_idx}: '{field}' is missing/empty"
         })
+    return dirty, ground_truth, errors
 def _generate_task_medium(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[Dict]]:
     rng = random.Random(seed)
     ground_truth = [
         {"id": 1, "product": "Laptop Pro", "price": 999.99, "quantity": 50, "sku": "LP-001", "category": "Electronics"},
         {"id": 2, "product": "Wireless Mouse", "price": 29.99, "quantity": 200, "sku": "WM-002", "category": "Accessories"},
         {"id": 6, "product": "Headphones", "price": 149.99, "quantity": 80, "sku": "HP-006", "category": "Audio"},
         {"id": 7, "product": "Webcam HD", "price": 59.99, "quantity": 120, "sku": "WC-007", "category": "Electronics"},
     ]
     dirty = copy.deepcopy(ground_truth)
     errors = []
     dirty[0]["price"] = "999.99"
     errors.append({
         "error_type": "type",
         "expected_type": "float",
         "description": "Row 0: 'price' should be float, got string '999.99'"
     })
     dirty[2]["quantity"] = "five hundred"
     errors.append({
         "error_type": "type",
         "expected_type": "int",
         "description": "Row 2: 'quantity' should be int, got string 'five hundred'"
     })
     dirty[3]["sku"] = "mn004"
     errors.append({
         "error_type": "format",
         "expected_format": "XX-NNN",
         "description": "Row 3: 'sku' should match format 'XX-NNN', got 'mn004'"
     })
     dirty[5]["category"] = ""
     errors.append({
         "error_type": "missing",
         "current_value": "",
         "description": "Row 5: 'category' is missing/empty"
     })
     dirty[4]["price"] = -79.99
     errors.append({
         "error_type": "range",
         "current_value": -79.99,
         "description": "Row 4: 'price' is negative (-79.99), should be positive"
     })
     dirty[6]["sku"] = "WM-002"
     errors.append({
         "error_type": "duplicate",
         "current_value": "WM-002",
         "description": "Row 6: 'sku' value 'WM-002' duplicates row 1"
     })
+    return dirty, ground_truth, errors
 def _generate_task_hard(seed: int = 42) -> Tuple[List[Dict], List[Dict], List[Dict]]:
     rng = random.Random(seed)
     ground_truth = [
         {"id": 1, "customer": "Acme Corp", "email": "orders@acme.com", "amount": 1500.00, "currency": "USD", "status": "completed", "date": "2024-03-15", "region": "North America", "priority": "high"},
         {"id": 2, "customer": "GlobalTech", "email": "sales@globaltech.io", "amount": 2300.50, "currency": "EUR", "status": "pending", "date": "2024-03-16", "region": "Europe", "priority": "medium"},
         {"id": 9, "customer": "EcoSmart", "email": "green@ecosmart.co", "amount": 1200.00, "currency": "AUD", "status": "shipped", "date": "2024-03-23", "region": "Asia Pacific", "priority": "medium"},
         {"id": 10, "customer": "BlueOcean", "email": "info@blueocean.net", "amount": 980.75, "currency": "USD", "status": "pending", "date": "2024-03-24", "region": "North America", "priority": "low"},
     ]
     dirty = copy.deepcopy(ground_truth)
     errors = []
     dirty[0]["email"] = ""
     errors.append({"error_type": "missing", "row": 0, "field": "email", "current_value": "", "description": "Row 0: 'email' is missing"})
     dirty[1]["amount"] = -2300.50
     errors.append({"error_type": "range", "row": 1, "field": "amount", "current_value": -2300.50, "description": "Row 1: 'amount' is negative"})
     dirty[2]["date"] = "03/17/2024"
     errors.append({"error_type": "format", "row": 2, "field": "date", "current_value": "03/17/2024", "expected_format": "YYYY-MM-DD", "description": "Row 2: 'date' wrong format, expected YYYY-MM-DD"})
     dirty[3]["amount"] = "4200"
     errors.append({"error_type": "type", "row": 3, "field": "amount", "current_value": "4200", "expected_type": "float", "description": "Row 3: 'amount' should be float, got string"})
     dirty[4]["status"] = "in-progress"
     errors.append({"error_type": "format", "row": 4, "field": "status", "current_value": "in-progress", "expected_format": "one of: completed, pending, shipped, cancelled", "description": "Row 4: 'status' invalid value 'in-progress'"})
     dirty[5]["region"] = ""
     errors.append({"error_type": "missing", "row": 5, "field": "region", "current_value": "", "description": "Row 5: 'region' is missing"})
     dirty[6]["customer"] = "Acme Corp"
     dirty[6]["email"] = "orders@acme.com"
     errors.append({"error_type": "duplicate", "row": 6, "field": "customer", "current_value": "Acme Corp", "description": "Row 6: 'customer' duplicates row 0"})
     dirty[7]["amount"] = 99999.99
     errors.append({"error_type": "range", "row": 7, "field": "amount", "current_value": 99999.99, "description": "Row 7: 'amount' exceeds maximum threshold (should be 6750.00)"})
     dirty[8]["currency"] = "AUSD"
     errors.append({"error_type": "format", "row": 8, "field": "currency", "current_value": "AUSD", "expected_format": "3-letter ISO code", "description": "Row 8: 'currency' invalid code 'AUSD'"})
     dirty[9]["priority"] = ""
     errors.append({"error_type": "missing", "row": 9, "field": "priority", "current_value": "", "description": "Row 9: 'priority' is missing"})
+    return dirty, ground_truth, errors
 TASK_REGISTRY = {
     "easy_missing_values": {
 def get_task_names() -> List[str]:
     return list(TASK_REGISTRY.keys())
 def generate_task(task_name: str, seed: int = 42) -> Dict[str, Any]:
     if task_name not in TASK_REGISTRY:
         raise ValueError(f"Unknown task: {task_name}. Available: {get_task_names()}")
     task_info = TASK_REGISTRY[task_name]
     dirty, ground_truth, errors = task_info["generator"](seed)
     return {
         "name": task_info["name"],
         "description": task_info["description"],
     }
+def grade_action(action_type: str, target_field: str, target_row: int,
+                 new_value: str, dirty_dataset: List[Dict],
                  ground_truth: List[Dict], errors: List[Dict]) -> Tuple[float, str, bool]:
     total_errors = len(errors) if errors else 1
     if action_type == "validate":
         fixed = sum(1 for e in errors if e.get("fixed", False))
         return 0.0, f"Validation: {fixed}/{total_errors} errors fixed ({fixed/total_errors*100:.0f}%)", False
     if action_type == "skip":
         return 0.0, "Skipped current action", False
     matching_error = None
     for e in errors:
         if e.get("fixed", False):
         if e["row"] == target_row and e["field"] == target_field:
             matching_error = e
             break
     if matching_error is None:
         return -0.05, f"No unfixed error at row {target_row}, field '{target_field}'", False
     action_to_error_map = {
         "fix_missing": "missing",
         "fix_type": "type",
         "fix_format": "format",
         "fix_duplicate": "duplicate",
     }
     expected_error_type = action_to_error_map.get(action_type, "")
     if expected_error_type != matching_error["error_type"]:
         return -0.05, f"Wrong action type '{action_type}' for error type '{matching_error['error_type']}'", False
     gt_value = ground_truth[target_row][target_field]
     is_correct = False
     try:
         if isinstance(gt_value, float):
             is_correct = str(new_value).strip() == str(gt_value).strip()
     except (ValueError, TypeError):
         is_correct = str(new_value).strip() == str(gt_value).strip()
     if is_correct:
         matching_error["fixed"] = True
         if isinstance(gt_value, float):
             dirty_dataset[target_row][target_field] = float(new_value)
         elif isinstance(gt_value, int):
             dirty_dataset[target_row][target_field] = int(float(new_value))
         else:
             dirty_dataset[target_row][target_field] = new_value
         reward = 1.0 / total_errors
+        return reward, f"Fixed: row {target_row}, field '{target_field}' -> '{new_value}'", True
     else:
+        return -0.05, f"Wrong value for row {target_row}, field '{target_field}'. Got '{new_value}', expected something else.", False

inference.py CHANGED Viewed

@@ -1,18 +1,3 @@
-"""
-Inference agent for the Data Validation Pipeline environment.
-Uses OpenAI-compatible API to solve data cleaning tasks.
-Reads environment variables:
-  - API_BASE_URL: Base URL for the OpenAI-compatible API (default: https://api.openai.com/v1)
-  - MODEL_NAME: Model to use (default: gpt-4.1-mini)
-  - HF_TOKEN: HuggingFace token (REQUIRED, no default)
-Output format strictly follows OpenEnv spec:
-  [START] task=<name> env=<benchmark> model=<model_name>
-  [STEP] step=<n> action=<str> reward=<0.00> done=<true|false> error=<msg|null>
-  [END] success=<true|false> steps=<n> rewards=<r1,r2,...,rn>
-"""
 import json
 import os
 import re
@@ -21,7 +6,6 @@ import time
 import requests
 from openai import OpenAI
-# Read environment variables with defaults where required
 API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
 HF_TOKEN = os.getenv("HF_TOKEN")
@@ -29,16 +13,13 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN is None:
     raise ValueError("HF_TOKEN environment variable is required")
-# Initialize OpenAI client
 client = OpenAI(
     base_url=API_BASE_URL,
     api_key=HF_TOKEN,
 )
-# The HF Space URL where the environment is running
 ENV_BASE_URL = os.getenv("ENV_BASE_URL", "https://kush5699-data-validation-env.hf.space")
-# All 3 tasks to run sequentially
 TASKS = [
     {"task_name": "easy_missing_values", "seed": 42},
     {"task_name": "medium_mixed_errors", "seed": 42},
@@ -49,7 +30,6 @@ BENCHMARK_NAME = "data_validation_env"
 def call_llm(messages: list) -> str:
-    """Call the LLM via OpenAI-compatible API."""
     try:
         response = client.chat.completions.create(
             model=MODEL_NAME,
@@ -68,7 +48,6 @@ def call_llm(messages: list) -> str:
 def env_reset(task_name: str, seed: int = 42) -> dict:
-    """Reset the environment."""
     resp = requests.post(
         f"{ENV_BASE_URL}/reset",
         json={"task_name": task_name, "seed": seed},
@@ -79,7 +58,6 @@ def env_reset(task_name: str, seed: int = 42) -> dict:
 def env_step(action: dict) -> dict:
-    """Take a step in the environment."""
     resp = requests.post(
         f"{ENV_BASE_URL}/step",
         json=action,
@@ -90,7 +68,6 @@ def env_step(action: dict) -> dict:
 def build_system_prompt(obs: dict) -> str:
-    """Build a system prompt for the LLM based on current observation."""
     return f"""You are a data validation agent. Your task is to fix errors in a dataset.
 TASK: {obs.get('task_description', '')}
@@ -116,13 +93,9 @@ RULES:
 def build_user_prompt(obs: dict) -> str:
-    """Build a user prompt showing current state."""
     errors = obs.get("errors_found", [])
     dataset = obs.get("dataset", [])
     errors_text = json.dumps(errors, indent=2) if errors else "No errors remaining!"
-    # Show a compact view of dataset
     dataset_compact = []
     for i, row in enumerate(dataset):
         dataset_compact.append(f"Row {i}: {json.dumps(row)}")
@@ -146,8 +119,6 @@ Respond with ONLY a JSON action object to fix the next error."""
 def parse_llm_response(response: str) -> dict:
-    """Parse the LLM response into a valid action."""
-    # Try to extract JSON from the response
     try:
         action = json.loads(response)
         return {
@@ -159,7 +130,6 @@ def parse_llm_response(response: str) -> dict:
     except json.JSONDecodeError:
         pass
-    # Try to find JSON in the response
     json_match = re.search(r'\{[^}]+\}', response)
     if json_match:
         try:
@@ -173,12 +143,10 @@ def parse_llm_response(response: str) -> dict:
         except (json.JSONDecodeError, ValueError):
             pass
-    # Fallback: skip
     return {"action_type": "skip", "target_field": "", "target_row": 0, "new_value": ""}
 def run_episode(task_config: dict) -> None:
-    """Run a single episode for a task."""
     task_name = task_config["task_name"]
     seed = task_config.get("seed", 42)
     rewards = []
@@ -188,7 +156,6 @@ def run_episode(task_config: dict) -> None:
     print(f"[START] task={task_name} env={BENCHMARK_NAME} model={MODEL_NAME}")
     try:
-        # Reset environment
         obs = env_reset(task_name, seed)
         max_steps = obs.get("max_steps", 20)
@@ -197,18 +164,14 @@ def run_episode(task_config: dict) -> None:
         ]
         while not obs.get("done", False) and steps < max_steps:
-            # Build user prompt
             user_msg = build_user_prompt(obs)
             messages_for_call = messages + [{"role": "user", "content": user_msg}]
-            # Get LLM response
             llm_response = call_llm(messages_for_call)
-            # Parse into action
             action = parse_llm_response(llm_response)
             action_str = json.dumps(action)
-            # Take step
             error_msg = None
             try:
                 obs = env_step(action)
@@ -227,7 +190,6 @@ def run_episode(task_config: dict) -> None:
             if done:
                 break
-        # Calculate success based on cumulative reward
         total_reward = sum(rewards)
         success = total_reward > 0.5
@@ -243,10 +205,9 @@ def run_episode(task_config: dict) -> None:
 def main():
-    """Run all 3 tasks sequentially."""
     for task_config in TASKS:
         run_episode(task_config)
-        time.sleep(1)  # Small delay between tasks
 if __name__ == "__main__":

 import json
 import os
 import re
 import requests
 from openai import OpenAI
 API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN is None:
     raise ValueError("HF_TOKEN environment variable is required")
 client = OpenAI(
     base_url=API_BASE_URL,
     api_key=HF_TOKEN,
 )
 ENV_BASE_URL = os.getenv("ENV_BASE_URL", "https://kush5699-data-validation-env.hf.space")
 TASKS = [
     {"task_name": "easy_missing_values", "seed": 42},
     {"task_name": "medium_mixed_errors", "seed": 42},
 def call_llm(messages: list) -> str:
     try:
         response = client.chat.completions.create(
             model=MODEL_NAME,
 def env_reset(task_name: str, seed: int = 42) -> dict:
     resp = requests.post(
         f"{ENV_BASE_URL}/reset",
         json={"task_name": task_name, "seed": seed},
 def env_step(action: dict) -> dict:
     resp = requests.post(
         f"{ENV_BASE_URL}/step",
         json=action,
 def build_system_prompt(obs: dict) -> str:
     return f"""You are a data validation agent. Your task is to fix errors in a dataset.
 TASK: {obs.get('task_description', '')}
 def build_user_prompt(obs: dict) -> str:
     errors = obs.get("errors_found", [])
     dataset = obs.get("dataset", [])
     errors_text = json.dumps(errors, indent=2) if errors else "No errors remaining!"
     dataset_compact = []
     for i, row in enumerate(dataset):
         dataset_compact.append(f"Row {i}: {json.dumps(row)}")
 def parse_llm_response(response: str) -> dict:
     try:
         action = json.loads(response)
         return {
     except json.JSONDecodeError:
         pass
     json_match = re.search(r'\{[^}]+\}', response)
     if json_match:
         try:
         except (json.JSONDecodeError, ValueError):
             pass
     return {"action_type": "skip", "target_field": "", "target_row": 0, "new_value": ""}
 def run_episode(task_config: dict) -> None:
     task_name = task_config["task_name"]
     seed = task_config.get("seed", 42)
     rewards = []
     print(f"[START] task={task_name} env={BENCHMARK_NAME} model={MODEL_NAME}")
     try:
         obs = env_reset(task_name, seed)
         max_steps = obs.get("max_steps", 20)
         ]
         while not obs.get("done", False) and steps < max_steps:
             user_msg = build_user_prompt(obs)
             messages_for_call = messages + [{"role": "user", "content": user_msg}]
             llm_response = call_llm(messages_for_call)
             action = parse_llm_response(llm_response)
             action_str = json.dumps(action)
             error_msg = None
             try:
                 obs = env_step(action)
             if done:
                 break
         total_reward = sum(rewards)
         success = total_reward > 0.5
 def main():
     for task_config in TASKS:
         run_episode(task_config)
+        time.sleep(1)
 if __name__ == "__main__":

server.py CHANGED Viewed

@@ -1,27 +1,19 @@
-"""FastAPI server for the Data Validation Pipeline environment.
-Exposes OpenEnv-compatible HTTP endpoints: /reset, /step, /state, /health
-"""
 import json
 import traceback
-from typing import Any, Dict, Optional
-from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect, Request
-from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from env.environment import DataValidationEnvironment
-from env.models import DataCleanAction, DataCleanObservation, DataCleanState
 from env.tasks import get_task_names
 app = FastAPI(
     title="Data Validation Pipeline - OpenEnv Environment",
-    description="An RL environment for training agents to clean and validate structured data",
     version="1.0.0",
 )
-# Single shared environment instance
 env = DataValidationEnvironment()
@@ -39,13 +31,11 @@ class StepRequest(BaseModel):
 @app.get("/health")
 async def health():
-    """Health check endpoint."""
     return {"status": "healthy", "service": "data-validation-env"}
 @app.post("/reset")
 async def reset(request: ResetRequest = None):
-    """Reset the environment with a new task."""
     if request is None:
         request = ResetRequest()
     try:
@@ -57,7 +47,6 @@ async def reset(request: ResetRequest = None):
 @app.post("/step")
 async def step(request: StepRequest):
-    """Execute an action in the environment."""
     try:
         action = DataCleanAction(
             action_type=request.action_type,
@@ -73,7 +62,6 @@ async def step(request: StepRequest):
 @app.get("/state")
 async def state():
-    """Get the current environment state."""
     try:
         s = env.state()
         return s.model_dump()
@@ -83,25 +71,22 @@ async def state():
 @app.get("/tasks")
 async def tasks():
-    """List available tasks."""
     return {"tasks": get_task_names()}
-# WebSocket support for OpenEnv clients
 @app.websocket("/ws")
 async def websocket_endpoint(websocket: WebSocket):
-    """WebSocket endpoint for persistent sessions."""
     await websocket.accept()
     ws_env = DataValidationEnvironment()
     try:
         while True:
             data = await websocket.receive_text()
             msg = json.loads(data)
             method = msg.get("method", "")
             params = msg.get("params", {})
             try:
                 if method == "reset":
                     obs = ws_env.reset(
@@ -131,7 +116,7 @@ async def websocket_endpoint(websocket: WebSocket):
                     }
                 else:
                     response = {"error": f"Unknown method: {method}"}
                 await websocket.send_text(json.dumps(response))
             except Exception as e:
                 await websocket.send_text(json.dumps({

 import json
 import traceback
+from typing import Optional
+from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
 from pydantic import BaseModel
 from env.environment import DataValidationEnvironment
+from env.models import DataCleanAction
 from env.tasks import get_task_names
 app = FastAPI(
     title="Data Validation Pipeline - OpenEnv Environment",
     version="1.0.0",
 )
 env = DataValidationEnvironment()
 @app.get("/health")
 async def health():
     return {"status": "healthy", "service": "data-validation-env"}
 @app.post("/reset")
 async def reset(request: ResetRequest = None):
     if request is None:
         request = ResetRequest()
     try:
 @app.post("/step")
 async def step(request: StepRequest):
     try:
         action = DataCleanAction(
             action_type=request.action_type,
 @app.get("/state")
 async def state():
     try:
         s = env.state()
         return s.model_dump()
 @app.get("/tasks")
 async def tasks():
     return {"tasks": get_task_names()}
 @app.websocket("/ws")
 async def websocket_endpoint(websocket: WebSocket):
     await websocket.accept()
     ws_env = DataValidationEnvironment()
     try:
         while True:
             data = await websocket.receive_text()
             msg = json.loads(data)
             method = msg.get("method", "")
             params = msg.get("params", {})
             try:
                 if method == "reset":
                     obs = ws_env.reset(
                     }
                 else:
                     response = {"error": f"Unknown method: {method}"}
                 await websocket.send_text(json.dumps(response))
             except Exception as e:
                 await websocket.send_text(json.dumps({