Fix image digitization: robust axis tick disambiguation and mapping
Browse filesThree fixes for image-based CV analysis:
1. Improved tick disambiguation: when OCR detections appear in both
x-axis and y-axis candidate lists (bottom-left corner), assign
each to the axis whose alignment coordinate it matches better,
instead of filtering by position distance from median.
2. Robust tick-to-pixel mapping: when OCR misses intermediate tick
labels (e.g. only detecting [0.5, -1.0, -1.5] instead of
[1.0, 0.5, 0.0, -0.5, -1.0, -1.5]), the simple polyfit produces
a wrong slope. New _robust_tick_fit uses the minimum |Δvalue/Δpixel|
ratio among adjacent pairs as the true scale.
3. Outlier tick removal and decimal misread correction: detects OCR
misreads like "1.0" → "10" using leave-one-out residuals and
decimal-point-drop heuristics.
Also widened left margin crop (8% → 12%) for y-axis unit detection
and added "HA" as OCR misread pattern for "µA".
Made-with: Cursor
- digitizer.py +209 -6
|
@@ -70,6 +70,55 @@ def auto_detect_axis_bounds(image_array):
|
|
| 70 |
x_candidates = [(cx, cy, v) for cx, cy, v in detections if cy > H * 0.65]
|
| 71 |
y_candidates = [(cx, cy, v) for cx, cy, v in detections if cx < W * 0.30]
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
x_ticks = _extract_axis_ticks(x_candidates, axis="x")
|
| 74 |
y_ticks = _extract_axis_ticks(y_candidates, axis="y")
|
| 75 |
|
|
@@ -115,7 +164,7 @@ def _detect_current_unit(image_array, reader, all_texts):
|
|
| 115 |
return unit
|
| 116 |
|
| 117 |
H, W = image_array.shape[:2]
|
| 118 |
-
left_strip = image_array[:, : int(W * 0.
|
| 119 |
rotated = cv2.rotate(left_strip, cv2.ROTATE_90_CLOCKWISE)
|
| 120 |
|
| 121 |
try:
|
|
@@ -130,6 +179,7 @@ def _detect_current_unit(image_array, reader, all_texts):
|
|
| 130 |
for pattern, unit in [
|
| 131 |
("µA", "µA"), ("µa", "µA"), ("uA", "µA"), ("μA", "µA"),
|
| 132 |
("MA", "µA"), # OCR often misreads µ as M
|
|
|
|
| 133 |
("mA", "mA"),
|
| 134 |
("nA", "nA"),
|
| 135 |
]:
|
|
@@ -181,9 +231,119 @@ def _extract_axis_ticks(candidates, axis="x"):
|
|
| 181 |
# y-axis: value DECREASES with cy (top to bottom, since cy increases downward)
|
| 182 |
ticks = _fix_missing_negatives(ticks, increasing=(axis == "x"))
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
return ticks
|
| 185 |
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
def _fix_missing_negatives(ticks, increasing=True):
|
| 188 |
"""Fix OCR-dropped minus signs using spatial monotonicity.
|
| 189 |
|
|
@@ -330,6 +490,52 @@ def _detect_plot_region(gray):
|
|
| 330 |
return px_left, px_right, py_top, py_bottom
|
| 331 |
|
| 332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
def digitize_plot(image_array, x_min, x_max, y_min, y_max,
|
| 334 |
threshold=0, min_contour_length=50,
|
| 335 |
x_ticks=None, y_ticks=None):
|
|
@@ -377,17 +583,14 @@ def digitize_plot(image_array, x_min, x_max, y_min, y_max,
|
|
| 377 |
# Build pixel-to-data mapping from tick positions if available.
|
| 378 |
# This allows correct extrapolation for data beyond the last tick.
|
| 379 |
if x_ticks and len(x_ticks) >= 2:
|
| 380 |
-
|
| 381 |
-
x_slope = np.polyfit(x_px, x_val, 1)
|
| 382 |
-
# Recompute effective x_min/x_max at the plot region boundaries
|
| 383 |
eff_x_min = float(np.polyval(x_slope, px_left))
|
| 384 |
eff_x_max = float(np.polyval(x_slope, px_right))
|
| 385 |
else:
|
| 386 |
eff_x_min, eff_x_max = x_min, x_max
|
| 387 |
|
| 388 |
if y_ticks and len(y_ticks) >= 2:
|
| 389 |
-
|
| 390 |
-
y_slope = np.polyfit(y_px, y_val, 1)
|
| 391 |
eff_y_min = float(np.polyval(y_slope, py_bottom)) # bottom = y_min
|
| 392 |
eff_y_max = float(np.polyval(y_slope, py_top)) # top = y_max
|
| 393 |
else:
|
|
|
|
| 70 |
x_candidates = [(cx, cy, v) for cx, cy, v in detections if cy > H * 0.65]
|
| 71 |
y_candidates = [(cx, cy, v) for cx, cy, v in detections if cx < W * 0.30]
|
| 72 |
|
| 73 |
+
# Disambiguate detections that appear in both candidate lists.
|
| 74 |
+
# Y-axis labels in the bottom-left corner satisfy both cy > 0.65*H
|
| 75 |
+
# and cx < 0.30*W. Assign each ambiguous detection to the axis
|
| 76 |
+
# whose alignment coordinate (cy for x-axis, cx for y-axis) it
|
| 77 |
+
# matches better.
|
| 78 |
+
if x_candidates and y_candidates:
|
| 79 |
+
ambiguous_indices = set()
|
| 80 |
+
for i, xc in enumerate(x_candidates):
|
| 81 |
+
for yc in y_candidates:
|
| 82 |
+
if abs(xc[0] - yc[0]) < 10 and abs(xc[1] - yc[1]) < 10:
|
| 83 |
+
ambiguous_indices.add(i)
|
| 84 |
+
|
| 85 |
+
if ambiguous_indices:
|
| 86 |
+
# X-axis ticks share a common cy (alignment); compute the
|
| 87 |
+
# median cy of non-ambiguous x-candidates as reference.
|
| 88 |
+
clean_x_cys = [x_candidates[i][1] for i in range(len(x_candidates))
|
| 89 |
+
if i not in ambiguous_indices]
|
| 90 |
+
clean_y_cxs = [c[0] for c in y_candidates
|
| 91 |
+
if not any(abs(c[0] - x_candidates[j][0]) < 10
|
| 92 |
+
and abs(c[1] - x_candidates[j][1]) < 10
|
| 93 |
+
for j in ambiguous_indices)]
|
| 94 |
+
|
| 95 |
+
ref_x_cy = np.median(clean_x_cys) if clean_x_cys else np.median([c[1] for c in x_candidates])
|
| 96 |
+
ref_y_cx = np.median(clean_y_cxs) if clean_y_cxs else np.median([c[0] for c in y_candidates])
|
| 97 |
+
|
| 98 |
+
x_candidates_filtered = []
|
| 99 |
+
for i, c in enumerate(x_candidates):
|
| 100 |
+
if i in ambiguous_indices:
|
| 101 |
+
dist_to_x_row = abs(c[1] - ref_x_cy)
|
| 102 |
+
dist_to_y_col = abs(c[0] - ref_y_cx)
|
| 103 |
+
if dist_to_x_row > dist_to_y_col:
|
| 104 |
+
continue
|
| 105 |
+
x_candidates_filtered.append(c)
|
| 106 |
+
|
| 107 |
+
y_candidates_filtered = []
|
| 108 |
+
for c in y_candidates:
|
| 109 |
+
is_amb = any(abs(c[0] - x_candidates[j][0]) < 10
|
| 110 |
+
and abs(c[1] - x_candidates[j][1]) < 10
|
| 111 |
+
for j in ambiguous_indices)
|
| 112 |
+
if is_amb:
|
| 113 |
+
dist_to_x_row = abs(c[1] - ref_x_cy)
|
| 114 |
+
dist_to_y_col = abs(c[0] - ref_y_cx)
|
| 115 |
+
if dist_to_y_col > dist_to_x_row:
|
| 116 |
+
continue
|
| 117 |
+
y_candidates_filtered.append(c)
|
| 118 |
+
|
| 119 |
+
x_candidates = x_candidates_filtered
|
| 120 |
+
y_candidates = y_candidates_filtered
|
| 121 |
+
|
| 122 |
x_ticks = _extract_axis_ticks(x_candidates, axis="x")
|
| 123 |
y_ticks = _extract_axis_ticks(y_candidates, axis="y")
|
| 124 |
|
|
|
|
| 164 |
return unit
|
| 165 |
|
| 166 |
H, W = image_array.shape[:2]
|
| 167 |
+
left_strip = image_array[:, : int(W * 0.12), :]
|
| 168 |
rotated = cv2.rotate(left_strip, cv2.ROTATE_90_CLOCKWISE)
|
| 169 |
|
| 170 |
try:
|
|
|
|
| 179 |
for pattern, unit in [
|
| 180 |
("µA", "µA"), ("µa", "µA"), ("uA", "µA"), ("μA", "µA"),
|
| 181 |
("MA", "µA"), # OCR often misreads µ as M
|
| 182 |
+
("HA", "µA"), # OCR sometimes misreads µ as H
|
| 183 |
("mA", "mA"),
|
| 184 |
("nA", "nA"),
|
| 185 |
]:
|
|
|
|
| 231 |
# y-axis: value DECREASES with cy (top to bottom, since cy increases downward)
|
| 232 |
ticks = _fix_missing_negatives(ticks, increasing=(axis == "x"))
|
| 233 |
|
| 234 |
+
# Remove outlier ticks whose values break the expected linear
|
| 235 |
+
# position-to-value mapping (e.g. OCR reading "1.0" as "10").
|
| 236 |
+
ticks = _remove_tick_outliers(ticks)
|
| 237 |
+
|
| 238 |
return ticks
|
| 239 |
|
| 240 |
|
| 241 |
+
def _remove_tick_outliers(ticks):
|
| 242 |
+
"""Remove ticks whose values deviate from the expected linear mapping.
|
| 243 |
+
|
| 244 |
+
Uses a leave-one-out approach: for each tick, fit a line to the
|
| 245 |
+
remaining ticks and check if the held-out tick's residual is large.
|
| 246 |
+
This is robust even when a single outlier distorts the overall fit
|
| 247 |
+
(e.g. OCR reading "1.0" as "10").
|
| 248 |
+
|
| 249 |
+
Also detects and corrects OCR misreads where a decimal point is
|
| 250 |
+
dropped (e.g. "1.0" → "10") by checking tick spacing consistency.
|
| 251 |
+
"""
|
| 252 |
+
if len(ticks) < 3:
|
| 253 |
+
return ticks
|
| 254 |
+
|
| 255 |
+
positions, values = zip(*ticks)
|
| 256 |
+
positions = np.array(positions, dtype=float)
|
| 257 |
+
values = np.array(values, dtype=float)
|
| 258 |
+
n = len(ticks)
|
| 259 |
+
|
| 260 |
+
# First pass: fix decimal-point-dropped misreads.
|
| 261 |
+
# If adjacent ticks have inconsistent value/pixel ratios, check if
|
| 262 |
+
# dividing a value by 10 makes the ratios consistent.
|
| 263 |
+
ticks = _fix_decimal_misreads(list(zip(positions, values)))
|
| 264 |
+
positions, values = zip(*ticks)
|
| 265 |
+
positions = np.array(positions, dtype=float)
|
| 266 |
+
values = np.array(values, dtype=float)
|
| 267 |
+
|
| 268 |
+
# Second pass: leave-one-out outlier removal
|
| 269 |
+
loo_residuals = np.zeros(n)
|
| 270 |
+
for i in range(n):
|
| 271 |
+
mask = np.ones(n, dtype=bool)
|
| 272 |
+
mask[i] = False
|
| 273 |
+
if mask.sum() < 2:
|
| 274 |
+
loo_residuals[i] = 0
|
| 275 |
+
continue
|
| 276 |
+
coeffs = np.polyfit(positions[mask], values[mask], 1)
|
| 277 |
+
predicted = np.polyval(coeffs, positions[i])
|
| 278 |
+
loo_residuals[i] = abs(values[i] - predicted)
|
| 279 |
+
|
| 280 |
+
coeffs_all = np.polyfit(positions, values, 1)
|
| 281 |
+
expected_spacing = abs(coeffs_all[0]) * np.median(np.diff(positions))
|
| 282 |
+
if expected_spacing < 1e-12:
|
| 283 |
+
expected_spacing = np.median(np.abs(np.diff(values))) + 1e-12
|
| 284 |
+
|
| 285 |
+
keep = loo_residuals < expected_spacing * 2
|
| 286 |
+
if keep.sum() < 2:
|
| 287 |
+
return ticks
|
| 288 |
+
|
| 289 |
+
return [(p, v) for p, v, k in zip(positions, values, keep) if k]
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def _fix_decimal_misreads(ticks):
|
| 293 |
+
"""Fix OCR misreads where the decimal point is dropped.
|
| 294 |
+
|
| 295 |
+
E.g. "1.0" read as "10", "0.5" read as "5". Detects these by
|
| 296 |
+
checking if the value/pixel ratio between adjacent ticks is
|
| 297 |
+
inconsistent, and whether dividing a value by 10 fixes it.
|
| 298 |
+
"""
|
| 299 |
+
if len(ticks) < 3:
|
| 300 |
+
return ticks
|
| 301 |
+
|
| 302 |
+
positions = np.array([t[0] for t in ticks], dtype=float)
|
| 303 |
+
values = np.array([t[1] for t in ticks], dtype=float)
|
| 304 |
+
n = len(ticks)
|
| 305 |
+
|
| 306 |
+
# Compute the value step per pixel step for each adjacent pair
|
| 307 |
+
dv = np.diff(values)
|
| 308 |
+
dp = np.diff(positions)
|
| 309 |
+
ratios = dv / (dp + 1e-12)
|
| 310 |
+
|
| 311 |
+
# The median ratio represents the "true" scale
|
| 312 |
+
med_ratio = np.median(ratios)
|
| 313 |
+
if abs(med_ratio) < 1e-12:
|
| 314 |
+
return ticks
|
| 315 |
+
|
| 316 |
+
# For each tick, check if replacing its value with value/10
|
| 317 |
+
# produces a more consistent set of ratios
|
| 318 |
+
improved = True
|
| 319 |
+
max_iters = 5
|
| 320 |
+
while improved and max_iters > 0:
|
| 321 |
+
improved = False
|
| 322 |
+
max_iters -= 1
|
| 323 |
+
for i in range(n):
|
| 324 |
+
# Compute current residual from linear fit
|
| 325 |
+
coeffs = np.polyfit(positions, values, 1)
|
| 326 |
+
predicted = np.polyval(coeffs, positions[i])
|
| 327 |
+
current_res = abs(values[i] - predicted)
|
| 328 |
+
|
| 329 |
+
# Try value / 10
|
| 330 |
+
test_values = values.copy()
|
| 331 |
+
test_values[i] = values[i] / 10.0
|
| 332 |
+
coeffs_test = np.polyfit(positions, test_values, 1)
|
| 333 |
+
predicted_test = np.polyval(coeffs_test, positions[i])
|
| 334 |
+
test_res = abs(test_values[i] - predicted_test)
|
| 335 |
+
|
| 336 |
+
# Also compute overall fit quality
|
| 337 |
+
current_total = np.sum((values - np.polyval(coeffs, positions)) ** 2)
|
| 338 |
+
test_total = np.sum((test_values - np.polyval(coeffs_test, positions)) ** 2)
|
| 339 |
+
|
| 340 |
+
if test_total < current_total * 0.3:
|
| 341 |
+
values[i] = test_values[i]
|
| 342 |
+
improved = True
|
| 343 |
+
|
| 344 |
+
return list(zip(positions, values))
|
| 345 |
+
|
| 346 |
+
|
| 347 |
def _fix_missing_negatives(ticks, increasing=True):
|
| 348 |
"""Fix OCR-dropped minus signs using spatial monotonicity.
|
| 349 |
|
|
|
|
| 490 |
return px_left, px_right, py_top, py_bottom
|
| 491 |
|
| 492 |
|
| 493 |
+
def _robust_tick_fit(ticks):
|
| 494 |
+
"""Fit a linear pixel→value mapping that handles missing intermediate ticks.
|
| 495 |
+
|
| 496 |
+
A simple polyfit fails when OCR misses some tick labels, because the
|
| 497 |
+
value-gap between detected ticks no longer matches the pixel-gap.
|
| 498 |
+
For example, ticks at values [0.5, -1.0, -1.5] with equal pixel
|
| 499 |
+
spacing means OCR missed 0.0 and -0.5 between 0.5 and -1.0.
|
| 500 |
+
|
| 501 |
+
Strategy: find the minimum |Δvalue/Δpixel| ratio among adjacent
|
| 502 |
+
tick pairs — this corresponds to the pair where no ticks are missing.
|
| 503 |
+
Use that ratio as the true scale, then anchor the mapping at the
|
| 504 |
+
tick pair that defines it.
|
| 505 |
+
"""
|
| 506 |
+
if len(ticks) < 2:
|
| 507 |
+
return np.array([0.0, 0.0])
|
| 508 |
+
|
| 509 |
+
positions = np.array([t[0] for t in ticks], dtype=float)
|
| 510 |
+
values = np.array([t[1] for t in ticks], dtype=float)
|
| 511 |
+
|
| 512 |
+
if len(ticks) == 2:
|
| 513 |
+
return np.polyfit(positions, values, 1)
|
| 514 |
+
|
| 515 |
+
# Compute |Δvalue / Δpixel| for each adjacent pair
|
| 516 |
+
dp = np.diff(positions)
|
| 517 |
+
dv = np.diff(values)
|
| 518 |
+
ratios = dv / (dp + 1e-12)
|
| 519 |
+
abs_ratios = np.abs(ratios)
|
| 520 |
+
|
| 521 |
+
# The minimum absolute ratio corresponds to the pair with no missing
|
| 522 |
+
# ticks between them (smallest value change per pixel step).
|
| 523 |
+
min_idx = np.argmin(abs_ratios)
|
| 524 |
+
true_ratio = ratios[min_idx]
|
| 525 |
+
|
| 526 |
+
# Check if all ratios are consistent (within 50% of each other).
|
| 527 |
+
# If so, just use polyfit — no missing ticks.
|
| 528 |
+
if abs_ratios.max() < abs_ratios.min() * 1.8:
|
| 529 |
+
return np.polyfit(positions, values, 1)
|
| 530 |
+
|
| 531 |
+
# Use the true ratio and anchor at the midpoint of the best pair
|
| 532 |
+
anchor_px = (positions[min_idx] + positions[min_idx + 1]) / 2
|
| 533 |
+
anchor_val = (values[min_idx] + values[min_idx + 1]) / 2
|
| 534 |
+
intercept = anchor_val - true_ratio * anchor_px
|
| 535 |
+
|
| 536 |
+
return np.array([true_ratio, intercept])
|
| 537 |
+
|
| 538 |
+
|
| 539 |
def digitize_plot(image_array, x_min, x_max, y_min, y_max,
|
| 540 |
threshold=0, min_contour_length=50,
|
| 541 |
x_ticks=None, y_ticks=None):
|
|
|
|
| 583 |
# Build pixel-to-data mapping from tick positions if available.
|
| 584 |
# This allows correct extrapolation for data beyond the last tick.
|
| 585 |
if x_ticks and len(x_ticks) >= 2:
|
| 586 |
+
x_slope = _robust_tick_fit(x_ticks)
|
|
|
|
|
|
|
| 587 |
eff_x_min = float(np.polyval(x_slope, px_left))
|
| 588 |
eff_x_max = float(np.polyval(x_slope, px_right))
|
| 589 |
else:
|
| 590 |
eff_x_min, eff_x_max = x_min, x_max
|
| 591 |
|
| 592 |
if y_ticks and len(y_ticks) >= 2:
|
| 593 |
+
y_slope = _robust_tick_fit(y_ticks)
|
|
|
|
| 594 |
eff_y_min = float(np.polyval(y_slope, py_bottom)) # bottom = y_min
|
| 595 |
eff_y_max = float(np.polyval(y_slope, py_top)) # top = y_max
|
| 596 |
else:
|