Spaces:
Sleeping
Sleeping
Commit Β·
f1b7439
1
Parent(s): c3f32c9
replace ambiguous salary issue with date format fix
Browse files990000 could be a real salary β not a clear typo.
Replaced with: date "11-03-2022" (MM-DD-YYYY) β "2022-11-03" (YYYY-MM-DD)
Fully deterministic format conversion.
128 tests passing.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
dataqa_env/server/gradio_ui.py
CHANGED
|
@@ -40,7 +40,7 @@ AGENT_TRAJECTORIES = {
|
|
| 40 |
"row:7,col:salary,issue:wrong_type",
|
| 41 |
"row:11,col:department,issue:format_violation",
|
| 42 |
"row:15,col:email,issue:inconsistent_value",
|
| 43 |
-
"row:
|
| 44 |
"row:21,col:employee_id,issue:duplicate_row",
|
| 45 |
],
|
| 46 |
"fixes": [
|
|
@@ -49,7 +49,7 @@ AGENT_TRAJECTORIES = {
|
|
| 49 |
"row:7,col:salary,fix:75000", # "seventy-five thousand" β 75000
|
| 50 |
"row:11,col:department,fix:Engineering", # "Engneering" β "Engineering"
|
| 51 |
"row:15,col:email,fix:oscar.rivera@company.com", # from name Oscar Rivera
|
| 52 |
-
"row:
|
| 53 |
],
|
| 54 |
},
|
| 55 |
],
|
|
|
|
| 40 |
"row:7,col:salary,issue:wrong_type",
|
| 41 |
"row:11,col:department,issue:format_violation",
|
| 42 |
"row:15,col:email,issue:inconsistent_value",
|
| 43 |
+
"row:12,col:start_date,issue:format_violation",
|
| 44 |
"row:21,col:employee_id,issue:duplicate_row",
|
| 45 |
],
|
| 46 |
"fixes": [
|
|
|
|
| 49 |
"row:7,col:salary,fix:75000", # "seventy-five thousand" β 75000
|
| 50 |
"row:11,col:department,fix:Engineering", # "Engneering" β "Engineering"
|
| 51 |
"row:15,col:email,fix:oscar.rivera@company.com", # from name Oscar Rivera
|
| 52 |
+
"row:12,col:start_date,fix:2022-11-03", # MM-DD-YYYY β YYYY-MM-DD
|
| 53 |
],
|
| 54 |
},
|
| 55 |
],
|
dataqa_env/server/tasks.py
CHANGED
|
@@ -158,11 +158,11 @@ def create_task_easy(seed: int = 42) -> Task:
|
|
| 158 |
description="Email john.doe@company.com doesn't match name Oscar Rivera",
|
| 159 |
difficulty=1.5))
|
| 160 |
|
| 161 |
-
# Issue 6:
|
| 162 |
-
r =
|
| 163 |
-
data[r][
|
| 164 |
-
issues.append(PlantedIssue(row=r + 1, col="
|
| 165 |
-
description="
|
| 166 |
difficulty=1.5))
|
| 167 |
|
| 168 |
corrupted = _rows_to_csv([header] + data)
|
|
|
|
| 158 |
description="Email john.doe@company.com doesn't match name Oscar Rivera",
|
| 159 |
difficulty=1.5))
|
| 160 |
|
| 161 |
+
# Issue 6: Date in wrong format (deterministic fix: "03-15-2022" β "2022-03-15")
|
| 162 |
+
r = 11 # Laura Adams, start_date should be 2022-11-03
|
| 163 |
+
data[r][5] = "11-03-2022" # MM-DD-YYYY instead of YYYY-MM-DD
|
| 164 |
+
issues.append(PlantedIssue(row=r + 1, col="start_date", issue_type="format_violation",
|
| 165 |
+
description="Start date '11-03-2022' is in MM-DD-YYYY format instead of required YYYY-MM-DD (should be 2022-11-03)",
|
| 166 |
difficulty=1.5))
|
| 167 |
|
| 168 |
corrupted = _rows_to_csv([header] + data)
|
tests/test_environment.py
CHANGED
|
@@ -233,7 +233,7 @@ class TestGradeFixes:
|
|
| 233 |
(7, "salary", "75000"), # type conversion
|
| 234 |
(11, "department", "Engineering"), # spelling fix
|
| 235 |
(15, "email", "oscar.rivera@company.com"), # pattern match
|
| 236 |
-
(
|
| 237 |
]
|
| 238 |
result = grade_fixes(fixes, easy_task)
|
| 239 |
assert result["fix_score"] > 0.7
|
|
|
|
| 233 |
(7, "salary", "75000"), # type conversion
|
| 234 |
(11, "department", "Engineering"), # spelling fix
|
| 235 |
(15, "email", "oscar.rivera@company.com"), # pattern match
|
| 236 |
+
(12, "start_date", "2022-11-03"), # date format fix
|
| 237 |
]
|
| 238 |
result = grade_fixes(fixes, easy_task)
|
| 239 |
assert result["fix_score"] > 0.7
|