avanigupta Claude Opus 4.6 (1M context) commited on
Commit
f1b7439
Β·
1 Parent(s): c3f32c9

replace ambiguous salary issue with date format fix

Browse files

990000 could be a real salary β€” not a clear typo.
Replaced with: date "11-03-2022" (MM-DD-YYYY) β†’ "2022-11-03" (YYYY-MM-DD)
Fully deterministic format conversion.

128 tests passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

dataqa_env/server/gradio_ui.py CHANGED
@@ -40,7 +40,7 @@ AGENT_TRAJECTORIES = {
40
  "row:7,col:salary,issue:wrong_type",
41
  "row:11,col:department,issue:format_violation",
42
  "row:15,col:email,issue:inconsistent_value",
43
- "row:18,col:salary,issue:out_of_range",
44
  "row:21,col:employee_id,issue:duplicate_row",
45
  ],
46
  "fixes": [
@@ -49,7 +49,7 @@ AGENT_TRAJECTORIES = {
49
  "row:7,col:salary,fix:75000", # "seventy-five thousand" β†’ 75000
50
  "row:11,col:department,fix:Engineering", # "Engneering" β†’ "Engineering"
51
  "row:15,col:email,fix:oscar.rivera@company.com", # from name Oscar Rivera
52
- "row:18,col:salary,fix:99000", # 990000 β†’ remove extra digit
53
  ],
54
  },
55
  ],
 
40
  "row:7,col:salary,issue:wrong_type",
41
  "row:11,col:department,issue:format_violation",
42
  "row:15,col:email,issue:inconsistent_value",
43
+ "row:12,col:start_date,issue:format_violation",
44
  "row:21,col:employee_id,issue:duplicate_row",
45
  ],
46
  "fixes": [
 
49
  "row:7,col:salary,fix:75000", # "seventy-five thousand" β†’ 75000
50
  "row:11,col:department,fix:Engineering", # "Engneering" β†’ "Engineering"
51
  "row:15,col:email,fix:oscar.rivera@company.com", # from name Oscar Rivera
52
+ "row:12,col:start_date,fix:2022-11-03", # MM-DD-YYYY β†’ YYYY-MM-DD
53
  ],
54
  },
55
  ],
dataqa_env/server/tasks.py CHANGED
@@ -158,11 +158,11 @@ def create_task_easy(seed: int = 42) -> Task:
158
  description="Email john.doe@company.com doesn't match name Oscar Rivera",
159
  difficulty=1.5))
160
 
161
- # Issue 6: Salary with extra digit β€” typo (deterministic fix: "950000" β†’ "95000")
162
- r = 17 # Rosa Diaz, original salary is 99000
163
- data[r][4] = "990000" # extra zero
164
- issues.append(PlantedIssue(row=r + 1, col="salary", issue_type="out_of_range",
165
- description="Salary 990000 exceeds maximum 150000 β€” likely extra digit typo (should be 99000)",
166
  difficulty=1.5))
167
 
168
  corrupted = _rows_to_csv([header] + data)
 
158
  description="Email john.doe@company.com doesn't match name Oscar Rivera",
159
  difficulty=1.5))
160
 
161
+ # Issue 6: Date in wrong format (deterministic fix: "03-15-2022" β†’ "2022-03-15")
162
+ r = 11 # Laura Adams, start_date should be 2022-11-03
163
+ data[r][5] = "11-03-2022" # MM-DD-YYYY instead of YYYY-MM-DD
164
+ issues.append(PlantedIssue(row=r + 1, col="start_date", issue_type="format_violation",
165
+ description="Start date '11-03-2022' is in MM-DD-YYYY format instead of required YYYY-MM-DD (should be 2022-11-03)",
166
  difficulty=1.5))
167
 
168
  corrupted = _rows_to_csv([header] + data)
tests/test_environment.py CHANGED
@@ -233,7 +233,7 @@ class TestGradeFixes:
233
  (7, "salary", "75000"), # type conversion
234
  (11, "department", "Engineering"), # spelling fix
235
  (15, "email", "oscar.rivera@company.com"), # pattern match
236
- (18, "salary", "99000"), # remove extra digit
237
  ]
238
  result = grade_fixes(fixes, easy_task)
239
  assert result["fix_score"] > 0.7
 
233
  (7, "salary", "75000"), # type conversion
234
  (11, "department", "Engineering"), # spelling fix
235
  (15, "email", "oscar.rivera@company.com"), # pattern match
236
+ (12, "start_date", "2022-11-03"), # date format fix
237
  ]
238
  result = grade_fixes(fixes, easy_task)
239
  assert result["fix_score"] > 0.7