Spaces:

bing-yan
/

ecflow

Running

Bing Yan commited on 30 days ago

Commit

58cb8ee

1 Parent(s): 41896af

Fix image digitization: robust axis tick disambiguation and mapping

Three fixes for image-based CV analysis:

1. Improved tick disambiguation: when OCR detections appear in both
x-axis and y-axis candidate lists (bottom-left corner), assign
each to the axis whose alignment coordinate it matches better,
instead of filtering by position distance from median.

2. Robust tick-to-pixel mapping: when OCR misses intermediate tick
labels (e.g. only detecting [0.5, -1.0, -1.5] instead of
[1.0, 0.5, 0.0, -0.5, -1.0, -1.5]), the simple polyfit produces
a wrong slope. New _robust_tick_fit uses the minimum |Δvalue/Δpixel|
ratio among adjacent pairs as the true scale.

3. Outlier tick removal and decimal misread correction: detects OCR
misreads like "1.0" → "10" using leave-one-out residuals and
decimal-point-drop heuristics.

Also widened left margin crop (8% → 12%) for y-axis unit detection
and added "HA" as OCR misread pattern for "µA".

Made-with: Cursor

Files changed (1) hide show

digitizer.py +209 -6

digitizer.py CHANGED Viewed

@@ -70,6 +70,55 @@ def auto_detect_axis_bounds(image_array):
     x_candidates = [(cx, cy, v) for cx, cy, v in detections if cy > H * 0.65]
     y_candidates = [(cx, cy, v) for cx, cy, v in detections if cx < W * 0.30]
     x_ticks = _extract_axis_ticks(x_candidates, axis="x")
     y_ticks = _extract_axis_ticks(y_candidates, axis="y")
@@ -115,7 +164,7 @@ def _detect_current_unit(image_array, reader, all_texts):
             return unit
     H, W = image_array.shape[:2]
-    left_strip = image_array[:, : int(W * 0.08), :]
     rotated = cv2.rotate(left_strip, cv2.ROTATE_90_CLOCKWISE)
     try:
@@ -130,6 +179,7 @@ def _detect_current_unit(image_array, reader, all_texts):
     for pattern, unit in [
         ("µA", "µA"), ("µa", "µA"), ("uA", "µA"), ("μA", "µA"),
         ("MA", "µA"),  # OCR often misreads µ as M
         ("mA", "mA"),
         ("nA", "nA"),
     ]:
@@ -181,9 +231,119 @@ def _extract_axis_ticks(candidates, axis="x"):
     # y-axis: value DECREASES with cy (top to bottom, since cy increases downward)
     ticks = _fix_missing_negatives(ticks, increasing=(axis == "x"))
     return ticks
 def _fix_missing_negatives(ticks, increasing=True):
     """Fix OCR-dropped minus signs using spatial monotonicity.
@@ -330,6 +490,52 @@ def _detect_plot_region(gray):
     return px_left, px_right, py_top, py_bottom
 def digitize_plot(image_array, x_min, x_max, y_min, y_max,
                   threshold=0, min_contour_length=50,
                   x_ticks=None, y_ticks=None):
@@ -377,17 +583,14 @@ def digitize_plot(image_array, x_min, x_max, y_min, y_max,
     # Build pixel-to-data mapping from tick positions if available.
     # This allows correct extrapolation for data beyond the last tick.
     if x_ticks and len(x_ticks) >= 2:
-        x_px, x_val = zip(*x_ticks)
-        x_slope = np.polyfit(x_px, x_val, 1)
-        # Recompute effective x_min/x_max at the plot region boundaries
         eff_x_min = float(np.polyval(x_slope, px_left))
         eff_x_max = float(np.polyval(x_slope, px_right))
     else:
         eff_x_min, eff_x_max = x_min, x_max
     if y_ticks and len(y_ticks) >= 2:
-        y_px, y_val = zip(*y_ticks)
-        y_slope = np.polyfit(y_px, y_val, 1)
         eff_y_min = float(np.polyval(y_slope, py_bottom))  # bottom = y_min
         eff_y_max = float(np.polyval(y_slope, py_top))      # top = y_max
     else:

     x_candidates = [(cx, cy, v) for cx, cy, v in detections if cy > H * 0.65]
     y_candidates = [(cx, cy, v) for cx, cy, v in detections if cx < W * 0.30]
+    # Disambiguate detections that appear in both candidate lists.
+    # Y-axis labels in the bottom-left corner satisfy both cy > 0.65*H
+    # and cx < 0.30*W.  Assign each ambiguous detection to the axis
+    # whose alignment coordinate (cy for x-axis, cx for y-axis) it
+    # matches better.
+    if x_candidates and y_candidates:
+        ambiguous_indices = set()
+        for i, xc in enumerate(x_candidates):
+            for yc in y_candidates:
+                if abs(xc[0] - yc[0]) < 10 and abs(xc[1] - yc[1]) < 10:
+                    ambiguous_indices.add(i)
+        if ambiguous_indices:
+            # X-axis ticks share a common cy (alignment); compute the
+            # median cy of non-ambiguous x-candidates as reference.
+            clean_x_cys = [x_candidates[i][1] for i in range(len(x_candidates))
+                           if i not in ambiguous_indices]
+            clean_y_cxs = [c[0] for c in y_candidates
+                           if not any(abs(c[0] - x_candidates[j][0]) < 10
+                                      and abs(c[1] - x_candidates[j][1]) < 10
+                                      for j in ambiguous_indices)]
+            ref_x_cy = np.median(clean_x_cys) if clean_x_cys else np.median([c[1] for c in x_candidates])
+            ref_y_cx = np.median(clean_y_cxs) if clean_y_cxs else np.median([c[0] for c in y_candidates])
+            x_candidates_filtered = []
+            for i, c in enumerate(x_candidates):
+                if i in ambiguous_indices:
+                    dist_to_x_row = abs(c[1] - ref_x_cy)
+                    dist_to_y_col = abs(c[0] - ref_y_cx)
+                    if dist_to_x_row > dist_to_y_col:
+                        continue
+                x_candidates_filtered.append(c)
+            y_candidates_filtered = []
+            for c in y_candidates:
+                is_amb = any(abs(c[0] - x_candidates[j][0]) < 10
+                             and abs(c[1] - x_candidates[j][1]) < 10
+                             for j in ambiguous_indices)
+                if is_amb:
+                    dist_to_x_row = abs(c[1] - ref_x_cy)
+                    dist_to_y_col = abs(c[0] - ref_y_cx)
+                    if dist_to_y_col > dist_to_x_row:
+                        continue
+                y_candidates_filtered.append(c)
+            x_candidates = x_candidates_filtered
+            y_candidates = y_candidates_filtered
     x_ticks = _extract_axis_ticks(x_candidates, axis="x")
     y_ticks = _extract_axis_ticks(y_candidates, axis="y")
             return unit
     H, W = image_array.shape[:2]
+    left_strip = image_array[:, : int(W * 0.12), :]
     rotated = cv2.rotate(left_strip, cv2.ROTATE_90_CLOCKWISE)
     try:
     for pattern, unit in [
         ("µA", "µA"), ("µa", "µA"), ("uA", "µA"), ("μA", "µA"),
         ("MA", "µA"),  # OCR often misreads µ as M
+        ("HA", "µA"),  # OCR sometimes misreads µ as H
         ("mA", "mA"),
         ("nA", "nA"),
     ]:
     # y-axis: value DECREASES with cy (top to bottom, since cy increases downward)
     ticks = _fix_missing_negatives(ticks, increasing=(axis == "x"))
+    # Remove outlier ticks whose values break the expected linear
+    # position-to-value mapping (e.g. OCR reading "1.0" as "10").
+    ticks = _remove_tick_outliers(ticks)
     return ticks
+def _remove_tick_outliers(ticks):
+    """Remove ticks whose values deviate from the expected linear mapping.
+    Uses a leave-one-out approach: for each tick, fit a line to the
+    remaining ticks and check if the held-out tick's residual is large.
+    This is robust even when a single outlier distorts the overall fit
+    (e.g. OCR reading "1.0" as "10").
+    Also detects and corrects OCR misreads where a decimal point is
+    dropped (e.g. "1.0" → "10") by checking tick spacing consistency.
+    """
+    if len(ticks) < 3:
+        return ticks
+    positions, values = zip(*ticks)
+    positions = np.array(positions, dtype=float)
+    values = np.array(values, dtype=float)
+    n = len(ticks)
+    # First pass: fix decimal-point-dropped misreads.
+    # If adjacent ticks have inconsistent value/pixel ratios, check if
+    # dividing a value by 10 makes the ratios consistent.
+    ticks = _fix_decimal_misreads(list(zip(positions, values)))
+    positions, values = zip(*ticks)
+    positions = np.array(positions, dtype=float)
+    values = np.array(values, dtype=float)
+    # Second pass: leave-one-out outlier removal
+    loo_residuals = np.zeros(n)
+    for i in range(n):
+        mask = np.ones(n, dtype=bool)
+        mask[i] = False
+        if mask.sum() < 2:
+            loo_residuals[i] = 0
+            continue
+        coeffs = np.polyfit(positions[mask], values[mask], 1)
+        predicted = np.polyval(coeffs, positions[i])
+        loo_residuals[i] = abs(values[i] - predicted)
+    coeffs_all = np.polyfit(positions, values, 1)
+    expected_spacing = abs(coeffs_all[0]) * np.median(np.diff(positions))
+    if expected_spacing < 1e-12:
+        expected_spacing = np.median(np.abs(np.diff(values))) + 1e-12
+    keep = loo_residuals < expected_spacing * 2
+    if keep.sum() < 2:
+        return ticks
+    return [(p, v) for p, v, k in zip(positions, values, keep) if k]
+def _fix_decimal_misreads(ticks):
+    """Fix OCR misreads where the decimal point is dropped.
+    E.g. "1.0" read as "10", "0.5" read as "5".  Detects these by
+    checking if the value/pixel ratio between adjacent ticks is
+    inconsistent, and whether dividing a value by 10 fixes it.
+    """
+    if len(ticks) < 3:
+        return ticks
+    positions = np.array([t[0] for t in ticks], dtype=float)
+    values = np.array([t[1] for t in ticks], dtype=float)
+    n = len(ticks)
+    # Compute the value step per pixel step for each adjacent pair
+    dv = np.diff(values)
+    dp = np.diff(positions)
+    ratios = dv / (dp + 1e-12)
+    # The median ratio represents the "true" scale
+    med_ratio = np.median(ratios)
+    if abs(med_ratio) < 1e-12:
+        return ticks
+    # For each tick, check if replacing its value with value/10
+    # produces a more consistent set of ratios
+    improved = True
+    max_iters = 5
+    while improved and max_iters > 0:
+        improved = False
+        max_iters -= 1
+        for i in range(n):
+            # Compute current residual from linear fit
+            coeffs = np.polyfit(positions, values, 1)
+            predicted = np.polyval(coeffs, positions[i])
+            current_res = abs(values[i] - predicted)
+            # Try value / 10
+            test_values = values.copy()
+            test_values[i] = values[i] / 10.0
+            coeffs_test = np.polyfit(positions, test_values, 1)
+            predicted_test = np.polyval(coeffs_test, positions[i])
+            test_res = abs(test_values[i] - predicted_test)
+            # Also compute overall fit quality
+            current_total = np.sum((values - np.polyval(coeffs, positions)) ** 2)
+            test_total = np.sum((test_values - np.polyval(coeffs_test, positions)) ** 2)
+            if test_total < current_total * 0.3:
+                values[i] = test_values[i]
+                improved = True
+    return list(zip(positions, values))
 def _fix_missing_negatives(ticks, increasing=True):
     """Fix OCR-dropped minus signs using spatial monotonicity.
     return px_left, px_right, py_top, py_bottom
+def _robust_tick_fit(ticks):
+    """Fit a linear pixel→value mapping that handles missing intermediate ticks.
+    A simple polyfit fails when OCR misses some tick labels, because the
+    value-gap between detected ticks no longer matches the pixel-gap.
+    For example, ticks at values [0.5, -1.0, -1.5] with equal pixel
+    spacing means OCR missed 0.0 and -0.5 between 0.5 and -1.0.
+    Strategy: find the minimum |Δvalue/Δpixel| ratio among adjacent
+    tick pairs — this corresponds to the pair where no ticks are missing.
+    Use that ratio as the true scale, then anchor the mapping at the
+    tick pair that defines it.
+    """
+    if len(ticks) < 2:
+        return np.array([0.0, 0.0])
+    positions = np.array([t[0] for t in ticks], dtype=float)
+    values = np.array([t[1] for t in ticks], dtype=float)
+    if len(ticks) == 2:
+        return np.polyfit(positions, values, 1)
+    # Compute |Δvalue / Δpixel| for each adjacent pair
+    dp = np.diff(positions)
+    dv = np.diff(values)
+    ratios = dv / (dp + 1e-12)
+    abs_ratios = np.abs(ratios)
+    # The minimum absolute ratio corresponds to the pair with no missing
+    # ticks between them (smallest value change per pixel step).
+    min_idx = np.argmin(abs_ratios)
+    true_ratio = ratios[min_idx]
+    # Check if all ratios are consistent (within 50% of each other).
+    # If so, just use polyfit — no missing ticks.
+    if abs_ratios.max() < abs_ratios.min() * 1.8:
+        return np.polyfit(positions, values, 1)
+    # Use the true ratio and anchor at the midpoint of the best pair
+    anchor_px = (positions[min_idx] + positions[min_idx + 1]) / 2
+    anchor_val = (values[min_idx] + values[min_idx + 1]) / 2
+    intercept = anchor_val - true_ratio * anchor_px
+    return np.array([true_ratio, intercept])
 def digitize_plot(image_array, x_min, x_max, y_min, y_max,
                   threshold=0, min_contour_length=50,
                   x_ticks=None, y_ticks=None):
     # Build pixel-to-data mapping from tick positions if available.
     # This allows correct extrapolation for data beyond the last tick.
     if x_ticks and len(x_ticks) >= 2:
+        x_slope = _robust_tick_fit(x_ticks)
         eff_x_min = float(np.polyval(x_slope, px_left))
         eff_x_max = float(np.polyval(x_slope, px_right))
     else:
         eff_x_min, eff_x_max = x_min, x_max
     if y_ticks and len(y_ticks) >= 2:
+        y_slope = _robust_tick_fit(y_ticks)
         eff_y_min = float(np.polyval(y_slope, py_bottom))  # bottom = y_min
         eff_y_max = float(np.polyval(y_slope, py_top))      # top = y_max
     else: