Taniieeee83 Claude Sonnet 4.6 commited on
Commit
6b248b4
·
1 Parent(s): d2d30e9

fix: resolve date parsing warning and task3 dataset generation bug

Browse files

- Fix ValueError in generate_task3_datasets where scrambled date list
length mismatched the target index when NaN values were present
- Fix UserWarning in _fix_date by using explicit format strings before
falling back to flexible parsing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. server/environment.py +11 -5
server/environment.py CHANGED
@@ -252,16 +252,22 @@ class DataCleaningEnvironment:
252
  return f"Fixed {fixed} phone numbers in '{col}' to NNN-NNN-NNNN format.", True
253
 
254
  def _fix_date(self, col) -> Tuple[str, bool]:
 
 
255
  def normalise(val):
256
  if pd.isna(val):
257
  return val
258
- try:
259
- return pd.to_datetime(str(val), dayfirst=False).strftime("%Y-%m-%d")
260
- except Exception:
261
  try:
262
- return pd.to_datetime(str(val), dayfirst=True).strftime("%Y-%m-%d")
263
  except Exception:
264
- return val
 
 
 
 
 
265
 
266
  before = (~self._df[col].apply(
267
  lambda x: bool(DATE_RE.match(str(x))) if pd.notna(x) else False
 
252
  return f"Fixed {fixed} phone numbers in '{col}' to NNN-NNN-NNNN format.", True
253
 
254
  def _fix_date(self, col) -> Tuple[str, bool]:
255
+ _DATE_FORMATS = ["%Y-%m-%d", "%b %d %Y", "%d/%m/%Y", "%m/%d/%Y", "%Y/%m/%d"]
256
+
257
  def normalise(val):
258
  if pd.isna(val):
259
  return val
260
+ s = str(val).strip()
261
+ for fmt in _DATE_FORMATS:
 
262
  try:
263
+ return pd.to_datetime(s, format=fmt).strftime("%Y-%m-%d")
264
  except Exception:
265
+ pass
266
+ # last-resort flexible parse
267
+ try:
268
+ return pd.to_datetime(s).strftime("%Y-%m-%d")
269
+ except Exception:
270
+ return val
271
 
272
  before = (~self._df[col].apply(
273
  lambda x: bool(DATE_RE.match(str(x))) if pd.notna(x) else False