Bing Yan commited on
Commit
58cb8ee
·
1 Parent(s): 41896af

Fix image digitization: robust axis tick disambiguation and mapping

Browse files

Three fixes for image-based CV analysis:

1. Improved tick disambiguation: when OCR detections appear in both
x-axis and y-axis candidate lists (bottom-left corner), assign
each to the axis whose alignment coordinate it matches better,
instead of filtering by position distance from median.

2. Robust tick-to-pixel mapping: when OCR misses intermediate tick
labels (e.g. only detecting [0.5, -1.0, -1.5] instead of
[1.0, 0.5, 0.0, -0.5, -1.0, -1.5]), the simple polyfit produces
a wrong slope. New _robust_tick_fit uses the minimum |Δvalue/Δpixel|
ratio among adjacent pairs as the true scale.

3. Outlier tick removal and decimal misread correction: detects OCR
misreads like "1.0" → "10" using leave-one-out residuals and
decimal-point-drop heuristics.

Also widened left margin crop (8% → 12%) for y-axis unit detection
and added "HA" as OCR misread pattern for "µA".

Made-with: Cursor

Files changed (1) hide show
  1. digitizer.py +209 -6
digitizer.py CHANGED
@@ -70,6 +70,55 @@ def auto_detect_axis_bounds(image_array):
70
  x_candidates = [(cx, cy, v) for cx, cy, v in detections if cy > H * 0.65]
71
  y_candidates = [(cx, cy, v) for cx, cy, v in detections if cx < W * 0.30]
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  x_ticks = _extract_axis_ticks(x_candidates, axis="x")
74
  y_ticks = _extract_axis_ticks(y_candidates, axis="y")
75
 
@@ -115,7 +164,7 @@ def _detect_current_unit(image_array, reader, all_texts):
115
  return unit
116
 
117
  H, W = image_array.shape[:2]
118
- left_strip = image_array[:, : int(W * 0.08), :]
119
  rotated = cv2.rotate(left_strip, cv2.ROTATE_90_CLOCKWISE)
120
 
121
  try:
@@ -130,6 +179,7 @@ def _detect_current_unit(image_array, reader, all_texts):
130
  for pattern, unit in [
131
  ("µA", "µA"), ("µa", "µA"), ("uA", "µA"), ("μA", "µA"),
132
  ("MA", "µA"), # OCR often misreads µ as M
 
133
  ("mA", "mA"),
134
  ("nA", "nA"),
135
  ]:
@@ -181,9 +231,119 @@ def _extract_axis_ticks(candidates, axis="x"):
181
  # y-axis: value DECREASES with cy (top to bottom, since cy increases downward)
182
  ticks = _fix_missing_negatives(ticks, increasing=(axis == "x"))
183
 
 
 
 
 
184
  return ticks
185
 
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  def _fix_missing_negatives(ticks, increasing=True):
188
  """Fix OCR-dropped minus signs using spatial monotonicity.
189
 
@@ -330,6 +490,52 @@ def _detect_plot_region(gray):
330
  return px_left, px_right, py_top, py_bottom
331
 
332
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  def digitize_plot(image_array, x_min, x_max, y_min, y_max,
334
  threshold=0, min_contour_length=50,
335
  x_ticks=None, y_ticks=None):
@@ -377,17 +583,14 @@ def digitize_plot(image_array, x_min, x_max, y_min, y_max,
377
  # Build pixel-to-data mapping from tick positions if available.
378
  # This allows correct extrapolation for data beyond the last tick.
379
  if x_ticks and len(x_ticks) >= 2:
380
- x_px, x_val = zip(*x_ticks)
381
- x_slope = np.polyfit(x_px, x_val, 1)
382
- # Recompute effective x_min/x_max at the plot region boundaries
383
  eff_x_min = float(np.polyval(x_slope, px_left))
384
  eff_x_max = float(np.polyval(x_slope, px_right))
385
  else:
386
  eff_x_min, eff_x_max = x_min, x_max
387
 
388
  if y_ticks and len(y_ticks) >= 2:
389
- y_px, y_val = zip(*y_ticks)
390
- y_slope = np.polyfit(y_px, y_val, 1)
391
  eff_y_min = float(np.polyval(y_slope, py_bottom)) # bottom = y_min
392
  eff_y_max = float(np.polyval(y_slope, py_top)) # top = y_max
393
  else:
 
70
  x_candidates = [(cx, cy, v) for cx, cy, v in detections if cy > H * 0.65]
71
  y_candidates = [(cx, cy, v) for cx, cy, v in detections if cx < W * 0.30]
72
 
73
+ # Disambiguate detections that appear in both candidate lists.
74
+ # Y-axis labels in the bottom-left corner satisfy both cy > 0.65*H
75
+ # and cx < 0.30*W. Assign each ambiguous detection to the axis
76
+ # whose alignment coordinate (cy for x-axis, cx for y-axis) it
77
+ # matches better.
78
+ if x_candidates and y_candidates:
79
+ ambiguous_indices = set()
80
+ for i, xc in enumerate(x_candidates):
81
+ for yc in y_candidates:
82
+ if abs(xc[0] - yc[0]) < 10 and abs(xc[1] - yc[1]) < 10:
83
+ ambiguous_indices.add(i)
84
+
85
+ if ambiguous_indices:
86
+ # X-axis ticks share a common cy (alignment); compute the
87
+ # median cy of non-ambiguous x-candidates as reference.
88
+ clean_x_cys = [x_candidates[i][1] for i in range(len(x_candidates))
89
+ if i not in ambiguous_indices]
90
+ clean_y_cxs = [c[0] for c in y_candidates
91
+ if not any(abs(c[0] - x_candidates[j][0]) < 10
92
+ and abs(c[1] - x_candidates[j][1]) < 10
93
+ for j in ambiguous_indices)]
94
+
95
+ ref_x_cy = np.median(clean_x_cys) if clean_x_cys else np.median([c[1] for c in x_candidates])
96
+ ref_y_cx = np.median(clean_y_cxs) if clean_y_cxs else np.median([c[0] for c in y_candidates])
97
+
98
+ x_candidates_filtered = []
99
+ for i, c in enumerate(x_candidates):
100
+ if i in ambiguous_indices:
101
+ dist_to_x_row = abs(c[1] - ref_x_cy)
102
+ dist_to_y_col = abs(c[0] - ref_y_cx)
103
+ if dist_to_x_row > dist_to_y_col:
104
+ continue
105
+ x_candidates_filtered.append(c)
106
+
107
+ y_candidates_filtered = []
108
+ for c in y_candidates:
109
+ is_amb = any(abs(c[0] - x_candidates[j][0]) < 10
110
+ and abs(c[1] - x_candidates[j][1]) < 10
111
+ for j in ambiguous_indices)
112
+ if is_amb:
113
+ dist_to_x_row = abs(c[1] - ref_x_cy)
114
+ dist_to_y_col = abs(c[0] - ref_y_cx)
115
+ if dist_to_y_col > dist_to_x_row:
116
+ continue
117
+ y_candidates_filtered.append(c)
118
+
119
+ x_candidates = x_candidates_filtered
120
+ y_candidates = y_candidates_filtered
121
+
122
  x_ticks = _extract_axis_ticks(x_candidates, axis="x")
123
  y_ticks = _extract_axis_ticks(y_candidates, axis="y")
124
 
 
164
  return unit
165
 
166
  H, W = image_array.shape[:2]
167
+ left_strip = image_array[:, : int(W * 0.12), :]
168
  rotated = cv2.rotate(left_strip, cv2.ROTATE_90_CLOCKWISE)
169
 
170
  try:
 
179
  for pattern, unit in [
180
  ("µA", "µA"), ("µa", "µA"), ("uA", "µA"), ("μA", "µA"),
181
  ("MA", "µA"), # OCR often misreads µ as M
182
+ ("HA", "µA"), # OCR sometimes misreads µ as H
183
  ("mA", "mA"),
184
  ("nA", "nA"),
185
  ]:
 
231
  # y-axis: value DECREASES with cy (top to bottom, since cy increases downward)
232
  ticks = _fix_missing_negatives(ticks, increasing=(axis == "x"))
233
 
234
+ # Remove outlier ticks whose values break the expected linear
235
+ # position-to-value mapping (e.g. OCR reading "1.0" as "10").
236
+ ticks = _remove_tick_outliers(ticks)
237
+
238
  return ticks
239
 
240
 
241
+ def _remove_tick_outliers(ticks):
242
+ """Remove ticks whose values deviate from the expected linear mapping.
243
+
244
+ Uses a leave-one-out approach: for each tick, fit a line to the
245
+ remaining ticks and check if the held-out tick's residual is large.
246
+ This is robust even when a single outlier distorts the overall fit
247
+ (e.g. OCR reading "1.0" as "10").
248
+
249
+ Also detects and corrects OCR misreads where a decimal point is
250
+ dropped (e.g. "1.0" → "10") by checking tick spacing consistency.
251
+ """
252
+ if len(ticks) < 3:
253
+ return ticks
254
+
255
+ positions, values = zip(*ticks)
256
+ positions = np.array(positions, dtype=float)
257
+ values = np.array(values, dtype=float)
258
+ n = len(ticks)
259
+
260
+ # First pass: fix decimal-point-dropped misreads.
261
+ # If adjacent ticks have inconsistent value/pixel ratios, check if
262
+ # dividing a value by 10 makes the ratios consistent.
263
+ ticks = _fix_decimal_misreads(list(zip(positions, values)))
264
+ positions, values = zip(*ticks)
265
+ positions = np.array(positions, dtype=float)
266
+ values = np.array(values, dtype=float)
267
+
268
+ # Second pass: leave-one-out outlier removal
269
+ loo_residuals = np.zeros(n)
270
+ for i in range(n):
271
+ mask = np.ones(n, dtype=bool)
272
+ mask[i] = False
273
+ if mask.sum() < 2:
274
+ loo_residuals[i] = 0
275
+ continue
276
+ coeffs = np.polyfit(positions[mask], values[mask], 1)
277
+ predicted = np.polyval(coeffs, positions[i])
278
+ loo_residuals[i] = abs(values[i] - predicted)
279
+
280
+ coeffs_all = np.polyfit(positions, values, 1)
281
+ expected_spacing = abs(coeffs_all[0]) * np.median(np.diff(positions))
282
+ if expected_spacing < 1e-12:
283
+ expected_spacing = np.median(np.abs(np.diff(values))) + 1e-12
284
+
285
+ keep = loo_residuals < expected_spacing * 2
286
+ if keep.sum() < 2:
287
+ return ticks
288
+
289
+ return [(p, v) for p, v, k in zip(positions, values, keep) if k]
290
+
291
+
292
+ def _fix_decimal_misreads(ticks):
293
+ """Fix OCR misreads where the decimal point is dropped.
294
+
295
+ E.g. "1.0" read as "10", "0.5" read as "5". Detects these by
296
+ checking if the value/pixel ratio between adjacent ticks is
297
+ inconsistent, and whether dividing a value by 10 fixes it.
298
+ """
299
+ if len(ticks) < 3:
300
+ return ticks
301
+
302
+ positions = np.array([t[0] for t in ticks], dtype=float)
303
+ values = np.array([t[1] for t in ticks], dtype=float)
304
+ n = len(ticks)
305
+
306
+ # Compute the value step per pixel step for each adjacent pair
307
+ dv = np.diff(values)
308
+ dp = np.diff(positions)
309
+ ratios = dv / (dp + 1e-12)
310
+
311
+ # The median ratio represents the "true" scale
312
+ med_ratio = np.median(ratios)
313
+ if abs(med_ratio) < 1e-12:
314
+ return ticks
315
+
316
+ # For each tick, check if replacing its value with value/10
317
+ # produces a more consistent set of ratios
318
+ improved = True
319
+ max_iters = 5
320
+ while improved and max_iters > 0:
321
+ improved = False
322
+ max_iters -= 1
323
+ for i in range(n):
324
+ # Compute current residual from linear fit
325
+ coeffs = np.polyfit(positions, values, 1)
326
+ predicted = np.polyval(coeffs, positions[i])
327
+ current_res = abs(values[i] - predicted)
328
+
329
+ # Try value / 10
330
+ test_values = values.copy()
331
+ test_values[i] = values[i] / 10.0
332
+ coeffs_test = np.polyfit(positions, test_values, 1)
333
+ predicted_test = np.polyval(coeffs_test, positions[i])
334
+ test_res = abs(test_values[i] - predicted_test)
335
+
336
+ # Also compute overall fit quality
337
+ current_total = np.sum((values - np.polyval(coeffs, positions)) ** 2)
338
+ test_total = np.sum((test_values - np.polyval(coeffs_test, positions)) ** 2)
339
+
340
+ if test_total < current_total * 0.3:
341
+ values[i] = test_values[i]
342
+ improved = True
343
+
344
+ return list(zip(positions, values))
345
+
346
+
347
  def _fix_missing_negatives(ticks, increasing=True):
348
  """Fix OCR-dropped minus signs using spatial monotonicity.
349
 
 
490
  return px_left, px_right, py_top, py_bottom
491
 
492
 
493
+ def _robust_tick_fit(ticks):
494
+ """Fit a linear pixel→value mapping that handles missing intermediate ticks.
495
+
496
+ A simple polyfit fails when OCR misses some tick labels, because the
497
+ value-gap between detected ticks no longer matches the pixel-gap.
498
+ For example, ticks at values [0.5, -1.0, -1.5] with equal pixel
499
+ spacing means OCR missed 0.0 and -0.5 between 0.5 and -1.0.
500
+
501
+ Strategy: find the minimum |Δvalue/Δpixel| ratio among adjacent
502
+ tick pairs — this corresponds to the pair where no ticks are missing.
503
+ Use that ratio as the true scale, then anchor the mapping at the
504
+ tick pair that defines it.
505
+ """
506
+ if len(ticks) < 2:
507
+ return np.array([0.0, 0.0])
508
+
509
+ positions = np.array([t[0] for t in ticks], dtype=float)
510
+ values = np.array([t[1] for t in ticks], dtype=float)
511
+
512
+ if len(ticks) == 2:
513
+ return np.polyfit(positions, values, 1)
514
+
515
+ # Compute |Δvalue / Δpixel| for each adjacent pair
516
+ dp = np.diff(positions)
517
+ dv = np.diff(values)
518
+ ratios = dv / (dp + 1e-12)
519
+ abs_ratios = np.abs(ratios)
520
+
521
+ # The minimum absolute ratio corresponds to the pair with no missing
522
+ # ticks between them (smallest value change per pixel step).
523
+ min_idx = np.argmin(abs_ratios)
524
+ true_ratio = ratios[min_idx]
525
+
526
+ # Check if all ratios are consistent (within 50% of each other).
527
+ # If so, just use polyfit — no missing ticks.
528
+ if abs_ratios.max() < abs_ratios.min() * 1.8:
529
+ return np.polyfit(positions, values, 1)
530
+
531
+ # Use the true ratio and anchor at the midpoint of the best pair
532
+ anchor_px = (positions[min_idx] + positions[min_idx + 1]) / 2
533
+ anchor_val = (values[min_idx] + values[min_idx + 1]) / 2
534
+ intercept = anchor_val - true_ratio * anchor_px
535
+
536
+ return np.array([true_ratio, intercept])
537
+
538
+
539
  def digitize_plot(image_array, x_min, x_max, y_min, y_max,
540
  threshold=0, min_contour_length=50,
541
  x_ticks=None, y_ticks=None):
 
583
  # Build pixel-to-data mapping from tick positions if available.
584
  # This allows correct extrapolation for data beyond the last tick.
585
  if x_ticks and len(x_ticks) >= 2:
586
+ x_slope = _robust_tick_fit(x_ticks)
 
 
587
  eff_x_min = float(np.polyval(x_slope, px_left))
588
  eff_x_max = float(np.polyval(x_slope, px_right))
589
  else:
590
  eff_x_min, eff_x_max = x_min, x_max
591
 
592
  if y_ticks and len(y_ticks) >= 2:
593
+ y_slope = _robust_tick_fit(y_ticks)
 
594
  eff_y_min = float(np.polyval(y_slope, py_bottom)) # bottom = y_min
595
  eff_y_max = float(np.polyval(y_slope, py_top)) # top = y_max
596
  else: