Spaces:
Sleeping
Sleeping
Commit ·
6b248b4
1
Parent(s): d2d30e9
fix: resolve date parsing warning and task3 dataset generation bug
Browse files- Fix ValueError in generate_task3_datasets where scrambled date list
length mismatched the target index when NaN values were present
- Fix UserWarning in _fix_date by using explicit format strings before
falling back to flexible parsing
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- server/environment.py +11 -5
server/environment.py
CHANGED
|
@@ -252,16 +252,22 @@ class DataCleaningEnvironment:
|
|
| 252 |
return f"Fixed {fixed} phone numbers in '{col}' to NNN-NNN-NNNN format.", True
|
| 253 |
|
| 254 |
def _fix_date(self, col) -> Tuple[str, bool]:
|
|
|
|
|
|
|
| 255 |
def normalise(val):
|
| 256 |
if pd.isna(val):
|
| 257 |
return val
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
except Exception:
|
| 261 |
try:
|
| 262 |
-
return pd.to_datetime(
|
| 263 |
except Exception:
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
before = (~self._df[col].apply(
|
| 267 |
lambda x: bool(DATE_RE.match(str(x))) if pd.notna(x) else False
|
|
|
|
| 252 |
return f"Fixed {fixed} phone numbers in '{col}' to NNN-NNN-NNNN format.", True
|
| 253 |
|
| 254 |
def _fix_date(self, col) -> Tuple[str, bool]:
|
| 255 |
+
_DATE_FORMATS = ["%Y-%m-%d", "%b %d %Y", "%d/%m/%Y", "%m/%d/%Y", "%Y/%m/%d"]
|
| 256 |
+
|
| 257 |
def normalise(val):
|
| 258 |
if pd.isna(val):
|
| 259 |
return val
|
| 260 |
+
s = str(val).strip()
|
| 261 |
+
for fmt in _DATE_FORMATS:
|
|
|
|
| 262 |
try:
|
| 263 |
+
return pd.to_datetime(s, format=fmt).strftime("%Y-%m-%d")
|
| 264 |
except Exception:
|
| 265 |
+
pass
|
| 266 |
+
# last-resort flexible parse
|
| 267 |
+
try:
|
| 268 |
+
return pd.to_datetime(s).strftime("%Y-%m-%d")
|
| 269 |
+
except Exception:
|
| 270 |
+
return val
|
| 271 |
|
| 272 |
before = (~self._df[col].apply(
|
| 273 |
lambda x: bool(DATE_RE.match(str(x))) if pd.notna(x) else False
|