Bing Yan commited on
Commit
8e295bb
·
1 Parent(s): 7e3de92

Support multiple image uploads for CV and TPD analysis

Browse files

Both image tabs now accept multiple files (one per scan rate / heating
rate) via gr.File with file_count="multiple", matching the CSV tab
pattern. Each image is independently digitized with per-image OCR axis
detection, then all curves are fed to the model together.

This enables multi-scan-rate image analysis, which dramatically
improves classification accuracy (e.g. BV vs MHC disambiguation).

Made-with: Cursor

Files changed (2) hide show
  1. app.py +159 -111
  2. requirements.txt +1 -0
app.py CHANGED
@@ -145,100 +145,119 @@ def analyze_cv(files, scan_rates_text, E0_V, T_K, A_cm2,
145
  return _run_ec_analysis(potentials, fluxes, sigmas_list, n_samples, preproc_text=preproc_info)
146
 
147
 
148
- def analyze_cv_image(image, scan_rate_text, E0_V, threshold, n_samples,
149
  x_min, x_max, y_min, y_max):
150
- """Analyze CV from an uploaded plot image.
151
 
152
- Extracts the CV curve via image digitization, then nondimensionalizes
153
  and runs inference identically to the CSV path.
154
- Axis bounds are auto-detected via OCR if not provided.
155
  """
156
- if image is None:
157
- return _ec_error("Please upload an image.")
158
 
159
  try:
160
  from digitizer import digitize_plot, auto_detect_axis_bounds
 
161
  except ImportError:
162
- return _ec_error("OpenCV not available for image digitization.")
163
 
164
  scan_rate_text = scan_rate_text.strip() if scan_rate_text else ""
165
  if not scan_rate_text:
166
- return _ec_error("Please enter the scan rate (V/s).")
167
  try:
168
- v_Vs = float(scan_rate_text)
169
  except ValueError:
170
- return _ec_error("Invalid scan rate. Enter a number in V/s.")
171
 
172
- img_arr = np.array(image)
 
 
 
173
 
174
- # Determine axis bounds: user overrides take priority, else auto-detect
175
  has_user_bounds = all(
176
  v is not None and v != 0 for v in [x_min, x_max, y_min, y_max]
177
  )
178
- if has_user_bounds:
179
- bounds = {
180
- "x_min": float(x_min), "x_max": float(x_max),
181
- "y_min": float(y_min), "y_max": float(y_max),
182
- }
183
- bounds_source = "user"
184
- else:
185
- bounds = auto_detect_axis_bounds(img_arr)
186
- if bounds is None:
187
- return _ec_error(
188
- "Could not auto-detect axis bounds from the image. "
189
- "Please enter E min, E max, I min, I max under "
190
- "'Advanced: axis overrides'.")
191
- bounds_source = "auto"
192
-
193
- try:
194
- E_V, I_raw = digitize_plot(
195
- img_arr, bounds["x_min"], bounds["x_max"],
196
- bounds["y_min"], bounds["y_max"],
197
- threshold=int(threshold),
198
- )
199
- except Exception as e:
200
- return _ec_error(f"Digitization failed: {e}")
201
-
202
- # Convert current units: OCR reads axis labels so I_raw is in the
203
- # display unit (µA, mA, A). Assume A unless values are large.
204
- i_max = np.max(np.abs(I_raw))
205
- if i_max > 100:
206
- i_A = I_raw * 1e-6 # likely µA
207
- i_unit_guess = "µA"
208
- elif i_max > 0.1:
209
- i_A = I_raw * 1e-3 # likely mA
210
- i_unit_guess = "mA"
211
- else:
212
- i_A = I_raw
213
- i_unit_guess = "A"
214
 
215
- if E0_V is not None and E0_V != 0:
216
- e0 = float(E0_V)
217
- e0_source = "user"
218
- else:
219
- e0 = float(estimate_E0(E_V, i_A))
220
- e0_source = "auto"
221
 
222
  D = 1e-5
223
  T = 298.15
224
  A = 0.0707
225
  C_molcm3 = 1e-6
226
  n = 1
 
 
227
 
228
- theta, flux, sigma = nondimensionalize_cv(
229
- E_V, i_A, v_Vs, e0, T, A, C_molcm3, D, n
230
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
- preproc_info = "**Preprocessing (image):** "
233
- preproc_info += f"Scan rate: {v_Vs*1000:.1f} mV/s (σ={sigma:.2f}). "
234
- preproc_info += f"Axis bounds ({bounds_source}): E=[{bounds['x_min']:.3f}, {bounds['x_max']:.3f}] V, "
235
- preproc_info += f"I=[{bounds['y_min']:.2f}, {bounds['y_max']:.2f}] {i_unit_guess}. "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  if e0_source == "auto":
237
  preproc_info += f"E₀ auto-estimated as {e0:.4f} V."
238
  else:
239
  preproc_info += f"E₀ = {e0:.4f} V (user-provided)."
240
 
241
- return _run_ec_analysis([theta], [flux], [sigma], n_samples, preproc_text=preproc_info)
242
 
243
 
244
  def _ec_error(msg=""):
@@ -364,59 +383,74 @@ def analyze_tpd(files, heating_rates_text, n_samples):
364
  return _run_tpd_analysis(temperatures, rates, betas, n_samples, preproc_text=preproc_info)
365
 
366
 
367
- def analyze_tpd_image(image, heating_rates_text, threshold, n_samples,
368
  x_min, x_max, y_min, y_max):
369
- """Analyze TPD from an uploaded plot image.
370
 
371
- Axis bounds are auto-detected via OCR if not provided.
372
  """
373
- if image is None:
374
- return _tpd_error("Please upload an image.")
375
 
376
  try:
377
  from digitizer import digitize_plot, auto_detect_axis_bounds
 
378
  except ImportError:
379
- return _tpd_error("OpenCV not available for image digitization.")
380
 
381
  heating_rates_text = heating_rates_text.strip() if heating_rates_text else ""
382
  if not heating_rates_text:
383
  return _tpd_error(
384
- "Please enter the heating rate (β in K/s). "
385
  "This value is critical for correct inference.")
386
  try:
387
  betas = [float(s.strip()) for s in heating_rates_text.split(",")]
388
  except ValueError:
389
- return _tpd_error("Invalid heating rates.")
390
 
391
- img_arr = np.array(image)
 
 
 
392
 
393
  has_user_bounds = all(
394
  v is not None and v != 0 for v in [x_min, x_max, y_min, y_max]
395
  )
396
- if has_user_bounds:
397
- bounds = {
398
- "x_min": float(x_min), "x_max": float(x_max),
399
- "y_min": float(y_min), "y_max": float(y_max),
400
- }
401
- else:
402
- bounds = auto_detect_axis_bounds(img_arr)
403
- if bounds is None:
404
- return _tpd_error(
405
- "Could not auto-detect axis bounds from the image. "
406
- "Please enter T min, T max, Signal min, Signal max "
407
- "under 'Advanced: axis overrides'.")
408
 
409
- try:
410
- x_data, y_data = digitize_plot(
411
- img_arr, bounds["x_min"], bounds["x_max"],
412
- bounds["y_min"], bounds["y_max"],
413
- threshold=int(threshold),
414
- )
415
- except Exception as e:
416
- return _tpd_error(f"Digitization failed: {e}")
417
 
418
- preproc_info = f"**Preprocessing (image):** Heating rate β = {betas[0]:.2f} K/s."
419
- return _run_tpd_analysis([x_data], [y_data], betas[:1], n_samples, preproc_text=preproc_info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
 
422
  def _tpd_error(msg=""):
@@ -694,14 +728,20 @@ def build_app():
694
  # --- Image mode ---
695
  with gr.Tab("From Image"):
696
  gr.Markdown(
697
- "Upload an image of a CV plot (potential in V on x-axis, "
698
- "current in A/mA/µA on y-axis). Axis bounds are "
699
- "**auto-detected** via OCR override in Advanced if needed."
 
 
 
 
 
 
 
700
  )
701
- cv_img = gr.Image(label="Plot image", type="numpy")
702
  cv_img_scan_rate = gr.Textbox(
703
- label="Scan rate (V/s)",
704
- placeholder="e.g., 0.1",
705
  value="",
706
  )
707
  with gr.Accordion("Advanced parameters", open=False):
@@ -717,8 +757,9 @@ def build_app():
717
  )
718
  with gr.Accordion("Axis overrides", open=False):
719
  gr.Markdown(
720
- "Leave at 0 to auto-detect from the image. "
721
- "Override if OCR detection is inaccurate."
 
722
  )
723
  with gr.Row():
724
  cv_img_xmin = gr.Number(label="E min (V)", value=None)
@@ -744,7 +785,7 @@ def build_app():
744
  cv_img_btn.click(
745
  analyze_cv_image,
746
  inputs=[
747
- cv_img, cv_img_scan_rate, cv_img_E0,
748
  cv_img_threshold, cv_img_nsamples,
749
  cv_img_xmin, cv_img_xmax,
750
  cv_img_ymin, cv_img_ymax,
@@ -811,14 +852,20 @@ def build_app():
811
  # --- Image mode ---
812
  with gr.Tab("From Image"):
813
  gr.Markdown(
814
- "Upload an image of a TPD plot (temperature in K on "
815
- "x-axis, signal on y-axis). Axis bounds are "
816
- "**auto-detected** via OCR override in Advanced if needed."
 
 
 
 
 
 
 
817
  )
818
- tpd_img = gr.Image(label="Plot image", type="numpy")
819
  tpd_img_betas = gr.Textbox(
820
- label="Heating rate β (K/s)",
821
- placeholder="e.g., 5.0",
822
  value="",
823
  )
824
  with gr.Accordion("Advanced parameters", open=False):
@@ -829,8 +876,9 @@ def build_app():
829
  )
830
  with gr.Accordion("Axis overrides", open=False):
831
  gr.Markdown(
832
- "Leave at 0 to auto-detect from the image. "
833
- "Override if OCR detection is inaccurate."
 
834
  )
835
  with gr.Row():
836
  tpd_img_xmin = gr.Number(label="T min (K)", value=None)
@@ -855,7 +903,7 @@ def build_app():
855
  tpd_img_btn.click(
856
  analyze_tpd_image,
857
  inputs=[
858
- tpd_img, tpd_img_betas,
859
  tpd_img_threshold, tpd_img_nsamples,
860
  tpd_img_xmin, tpd_img_xmax,
861
  tpd_img_ymin, tpd_img_ymax,
 
145
  return _run_ec_analysis(potentials, fluxes, sigmas_list, n_samples, preproc_text=preproc_info)
146
 
147
 
148
+ def analyze_cv_image(files, scan_rate_text, E0_V, threshold, n_samples,
149
  x_min, x_max, y_min, y_max):
150
+ """Analyze CV from uploaded plot images (one per scan rate).
151
 
152
+ Extracts CV curves via image digitization, then nondimensionalizes
153
  and runs inference identically to the CSV path.
154
+ Axis bounds are auto-detected via OCR override in Advanced if needed.
155
  """
156
+ if not files:
157
+ return _ec_error("Please upload at least one image.")
158
 
159
  try:
160
  from digitizer import digitize_plot, auto_detect_axis_bounds
161
+ from PIL import Image as PILImage
162
  except ImportError:
163
+ return _ec_error("Required libraries not available for image digitization.")
164
 
165
  scan_rate_text = scan_rate_text.strip() if scan_rate_text else ""
166
  if not scan_rate_text:
167
+ return _ec_error("Please enter the scan rate(s) (V/s), comma-separated.")
168
  try:
169
+ scan_rates = [float(s.strip()) for s in scan_rate_text.split(",")]
170
  except ValueError:
171
+ return _ec_error("Invalid scan rates. Enter comma-separated numbers in V/s.")
172
 
173
+ if len(files) != len(scan_rates):
174
+ return _ec_error(
175
+ f"Number of images ({len(files)}) must match number of "
176
+ f"scan rates ({len(scan_rates)}).")
177
 
 
178
  has_user_bounds = all(
179
  v is not None and v != 0 for v in [x_min, x_max, y_min, y_max]
180
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
+ potentials, fluxes, sigmas_list = [], [], []
183
+ preproc_parts = []
 
 
 
 
184
 
185
  D = 1e-5
186
  T = 298.15
187
  A = 0.0707
188
  C_molcm3 = 1e-6
189
  n = 1
190
+ e0 = None
191
+ e0_source = None
192
 
193
+ if E0_V is not None and E0_V != 0:
194
+ e0 = float(E0_V)
195
+ e0_source = "user"
196
+
197
+ for idx, f in enumerate(files):
198
+ img_arr = np.array(PILImage.open(f.name).convert("RGB"))
199
+ v_Vs = scan_rates[idx]
200
+
201
+ if has_user_bounds:
202
+ bounds = {
203
+ "x_min": float(x_min), "x_max": float(x_max),
204
+ "y_min": float(y_min), "y_max": float(y_max),
205
+ }
206
+ bounds_source = "user"
207
+ else:
208
+ bounds = auto_detect_axis_bounds(img_arr)
209
+ if bounds is None:
210
+ return _ec_error(
211
+ f"Could not auto-detect axis bounds for image {idx + 1}. "
212
+ "Please enter E min, E max, I min, I max under "
213
+ "'Axis overrides'.")
214
+ bounds_source = "auto"
215
+
216
+ try:
217
+ E_V, I_raw = digitize_plot(
218
+ img_arr, bounds["x_min"], bounds["x_max"],
219
+ bounds["y_min"], bounds["y_max"],
220
+ threshold=int(threshold),
221
+ )
222
+ except Exception as exc:
223
+ return _ec_error(f"Digitization failed for image {idx + 1}: {exc}")
224
+
225
+ i_max = np.max(np.abs(I_raw))
226
+ if i_max > 100:
227
+ i_A = I_raw * 1e-6
228
+ i_unit = "µA"
229
+ elif i_max > 0.1:
230
+ i_A = I_raw * 1e-3
231
+ i_unit = "mA"
232
+ else:
233
+ i_A = I_raw
234
+ i_unit = "A"
235
 
236
+ if e0 is None:
237
+ e0 = float(estimate_E0(E_V, i_A))
238
+ e0_source = "auto"
239
+
240
+ theta, flux, sigma = nondimensionalize_cv(
241
+ E_V, i_A, v_Vs, e0, T, A, C_molcm3, D, n
242
+ )
243
+ potentials.append(theta)
244
+ fluxes.append(flux)
245
+ sigmas_list.append(sigma)
246
+
247
+ preproc_parts.append(
248
+ f"{v_Vs*1000:.1f} mV/s (σ={sigma:.2f}, "
249
+ f"bounds {bounds_source}: E=[{bounds['x_min']:.3f}, {bounds['x_max']:.3f}] V, "
250
+ f"I=[{bounds['y_min']:.2f}, {bounds['y_max']:.2f}] {i_unit})"
251
+ )
252
+
253
+ preproc_info = f"**Preprocessing ({len(files)} image{'s' if len(files) > 1 else ''}):** "
254
+ preproc_info += "; ".join(preproc_parts) + ". "
255
  if e0_source == "auto":
256
  preproc_info += f"E₀ auto-estimated as {e0:.4f} V."
257
  else:
258
  preproc_info += f"E₀ = {e0:.4f} V (user-provided)."
259
 
260
+ return _run_ec_analysis(potentials, fluxes, sigmas_list, n_samples, preproc_text=preproc_info)
261
 
262
 
263
  def _ec_error(msg=""):
 
383
  return _run_tpd_analysis(temperatures, rates, betas, n_samples, preproc_text=preproc_info)
384
 
385
 
386
+ def analyze_tpd_image(files, heating_rates_text, threshold, n_samples,
387
  x_min, x_max, y_min, y_max):
388
+ """Analyze TPD from uploaded plot images (one per heating rate).
389
 
390
+ Axis bounds are auto-detected via OCR override in Advanced if needed.
391
  """
392
+ if not files:
393
+ return _tpd_error("Please upload at least one image.")
394
 
395
  try:
396
  from digitizer import digitize_plot, auto_detect_axis_bounds
397
+ from PIL import Image as PILImage
398
  except ImportError:
399
+ return _tpd_error("Required libraries not available for image digitization.")
400
 
401
  heating_rates_text = heating_rates_text.strip() if heating_rates_text else ""
402
  if not heating_rates_text:
403
  return _tpd_error(
404
+ "Please enter the heating rate(s) (β in K/s), comma-separated. "
405
  "This value is critical for correct inference.")
406
  try:
407
  betas = [float(s.strip()) for s in heating_rates_text.split(",")]
408
  except ValueError:
409
+ return _tpd_error("Invalid heating rates. Enter comma-separated numbers in K/s.")
410
 
411
+ if len(files) != len(betas):
412
+ return _tpd_error(
413
+ f"Number of images ({len(files)}) must match number of "
414
+ f"heating rates ({len(betas)}).")
415
 
416
  has_user_bounds = all(
417
  v is not None and v != 0 for v in [x_min, x_max, y_min, y_max]
418
  )
 
 
 
 
 
 
 
 
 
 
 
 
419
 
420
+ temperatures, rates = [], []
421
+ for idx, f in enumerate(files):
422
+ img_arr = np.array(PILImage.open(f.name).convert("RGB"))
 
 
 
 
 
423
 
424
+ if has_user_bounds:
425
+ bounds = {
426
+ "x_min": float(x_min), "x_max": float(x_max),
427
+ "y_min": float(y_min), "y_max": float(y_max),
428
+ }
429
+ else:
430
+ bounds = auto_detect_axis_bounds(img_arr)
431
+ if bounds is None:
432
+ return _tpd_error(
433
+ f"Could not auto-detect axis bounds for image {idx + 1}. "
434
+ "Please enter T min, T max, Signal min, Signal max "
435
+ "under 'Axis overrides'.")
436
+
437
+ try:
438
+ x_data, y_data = digitize_plot(
439
+ img_arr, bounds["x_min"], bounds["x_max"],
440
+ bounds["y_min"], bounds["y_max"],
441
+ threshold=int(threshold),
442
+ )
443
+ except Exception as exc:
444
+ return _tpd_error(f"Digitization failed for image {idx + 1}: {exc}")
445
+
446
+ temperatures.append(x_data)
447
+ rates.append(y_data)
448
+
449
+ beta_strs = [f"β = {b:.2f} K/s" for b in betas]
450
+ preproc_info = f"**Preprocessing ({len(files)} image{'s' if len(files) > 1 else ''}):** "
451
+ preproc_info += f"Heating rates: {', '.join(beta_strs)}."
452
+
453
+ return _run_tpd_analysis(temperatures, rates, betas, n_samples, preproc_text=preproc_info)
454
 
455
 
456
  def _tpd_error(msg=""):
 
728
  # --- Image mode ---
729
  with gr.Tab("From Image"):
730
  gr.Markdown(
731
+ "Upload plot images of CVs (potential in V on x-axis, "
732
+ "current in A/mA/µA on y-axis). **One image per scan rate.** "
733
+ "For best accuracy, upload multiple scan rates.\n\n"
734
+ "Axis bounds are **auto-detected** via OCR — "
735
+ "override in Advanced if needed."
736
+ )
737
+ cv_img_files = gr.File(
738
+ label="Plot images (one per scan rate)",
739
+ file_count="multiple",
740
+ file_types=["image"],
741
  )
 
742
  cv_img_scan_rate = gr.Textbox(
743
+ label="Scan rates (V/s), comma-separated",
744
+ placeholder="e.g., 0.01, 0.1, 1.0",
745
  value="",
746
  )
747
  with gr.Accordion("Advanced parameters", open=False):
 
757
  )
758
  with gr.Accordion("Axis overrides", open=False):
759
  gr.Markdown(
760
+ "Leave at 0 to auto-detect from each image. "
761
+ "Override if OCR detection is inaccurate. "
762
+ "Overrides apply to **all** images."
763
  )
764
  with gr.Row():
765
  cv_img_xmin = gr.Number(label="E min (V)", value=None)
 
785
  cv_img_btn.click(
786
  analyze_cv_image,
787
  inputs=[
788
+ cv_img_files, cv_img_scan_rate, cv_img_E0,
789
  cv_img_threshold, cv_img_nsamples,
790
  cv_img_xmin, cv_img_xmax,
791
  cv_img_ymin, cv_img_ymax,
 
852
  # --- Image mode ---
853
  with gr.Tab("From Image"):
854
  gr.Markdown(
855
+ "Upload plot images of TPD curves (temperature in K on "
856
+ "x-axis, signal on y-axis). **One image per heating rate.** "
857
+ "For best accuracy, upload multiple heating rates.\n\n"
858
+ "Axis bounds are **auto-detected** via OCR — "
859
+ "override in Advanced if needed."
860
+ )
861
+ tpd_img_files = gr.File(
862
+ label="Plot images (one per heating rate)",
863
+ file_count="multiple",
864
+ file_types=["image"],
865
  )
 
866
  tpd_img_betas = gr.Textbox(
867
+ label="Heating rates β (K/s), comma-separated",
868
+ placeholder="e.g., 0.3, 2.6, 22.1",
869
  value="",
870
  )
871
  with gr.Accordion("Advanced parameters", open=False):
 
876
  )
877
  with gr.Accordion("Axis overrides", open=False):
878
  gr.Markdown(
879
+ "Leave at 0 to auto-detect from each image. "
880
+ "Override if OCR detection is inaccurate. "
881
+ "Overrides apply to **all** images."
882
  )
883
  with gr.Row():
884
  tpd_img_xmin = gr.Number(label="T min (K)", value=None)
 
903
  tpd_img_btn.click(
904
  analyze_tpd_image,
905
  inputs=[
906
+ tpd_img_files, tpd_img_betas,
907
  tpd_img_threshold, tpd_img_nsamples,
908
  tpd_img_xmin, tpd_img_xmax,
909
  tpd_img_ymin, tpd_img_ymax,
requirements.txt CHANGED
@@ -6,3 +6,4 @@ gradio==5.29.0
6
  opencv-python-headless>=4.8
7
  easyocr>=1.7
8
  tqdm>=4.65
 
 
6
  opencv-python-headless>=4.8
7
  easyocr>=1.7
8
  tqdm>=4.65
9
+ Pillow>=9.0