Spaces:
Sleeping
Sleeping
File size: 1,254 Bytes
d2d30e9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | from typing import Any, Dict, List, Optional
from pydantic import BaseModel
class DataCleaningAction(BaseModel):
"""
Action to apply to the current dirty DataFrame.
operation choices:
fill_missing – fill NaN values in a column
drop_duplicates – remove duplicate rows
fix_format – standardise string formats (phone, date, text)
replace_value – replace a specific value with another
drop_outliers – remove rows where column value is a statistical outlier
fix_dtype – cast a column to the correct dtype
"""
operation: str
column: Optional[str] = None
params: Dict[str, Any] = {}
class DataCleaningObservation(BaseModel):
done: bool
reward: float
data_preview: str # First 10 rows as CSV string
data_shape: List[int] # [rows, cols]
missing_counts: Dict[str, int]
duplicate_count: int
dtype_issues: Dict[str, str]
task_description: str
message: str
step_count: int
current_score: float # Running grader score 0.0–1.0
class DataCleaningState(BaseModel):
episode_id: str
task_id: int
step_count: int
max_steps: int
total_errors: int
errors_remaining: int
|