mm commited on
Commit
6e513f7
·
1 Parent(s): 39f456e

Fix PaddlePaddle 3.3.0 PIR crash, cache models, add --max-inpaint-size

Browse files

- Exclude paddlepaddle 3.3.0 (PIR regression, Paddle#77340)
- Disable PIR API as safety net
- Cache PaddleOCR and LAMA models in memory across calls
- Add max_inpaint_size option to downscale large images before LAMA

app.py CHANGED
@@ -5,6 +5,7 @@ import tempfile
5
  from pathlib import Path
6
 
7
  os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
 
8
 
9
  import gradio as gr
10
  from PIL import Image
 
5
  from pathlib import Path
6
 
7
  os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
8
+ os.environ["FLAGS_enable_pir_api"] = "0"
9
 
10
  import gradio as gr
11
  from PIL import Image
px_image2pptx/cli.py CHANGED
@@ -56,6 +56,9 @@ examples:
56
  help="Maximum font size in points (default: 72)")
57
  parser.add_argument("--skip-inpaint", action="store_true",
58
  help="Skip LAMA inpainting (use original or solid bg)")
 
 
 
59
  parser.add_argument("--work-dir", default=None,
60
  help="Directory for intermediate files")
61
  return parser.parse_args(argv)
@@ -77,6 +80,7 @@ def main(argv=None):
77
  min_font=args.min_font,
78
  max_font=args.max_font,
79
  skip_inpaint=args.skip_inpaint,
 
80
  work_dir=args.work_dir,
81
  )
82
  elapsed = time.time() - t0
 
56
  help="Maximum font size in points (default: 72)")
57
  parser.add_argument("--skip-inpaint", action="store_true",
58
  help="Skip LAMA inpainting (use original or solid bg)")
59
+ parser.add_argument("--max-inpaint-size", type=int, default=None,
60
+ help="Downscale longer edge to N px before inpainting "
61
+ "(e.g. 2048). Reduces time for large images.")
62
  parser.add_argument("--work-dir", default=None,
63
  help="Directory for intermediate files")
64
  return parser.parse_args(argv)
 
80
  min_font=args.min_font,
81
  max_font=args.max_font,
82
  skip_inpaint=args.skip_inpaint,
83
+ max_inpaint_size=args.max_inpaint_size,
84
  work_dir=args.work_dir,
85
  )
86
  elapsed = time.time() - t0
px_image2pptx/inpaint.py CHANGED
@@ -24,20 +24,17 @@ def _ensure_lama():
24
  ) from None
25
 
26
 
27
- def inpaint(
28
- image: np.ndarray,
29
- mask: np.ndarray,
30
- ) -> np.ndarray:
31
- """Inpaint masked regions of an image using LAMA.
32
 
33
- Args:
34
- image: RGB numpy array (H, W, 3), uint8.
35
- mask: Grayscale numpy array (H, W), uint8. 255 = inpaint.
36
 
37
- Returns:
38
- Inpainted RGB numpy array (H, W, 3), uint8.
39
- """
40
- torch, download_model, LAMA_MODEL_URL, prepare_img_and_mask = _ensure_lama()
 
 
 
41
 
42
  if torch.backends.mps.is_available():
43
  device = torch.device("mps")
@@ -51,8 +48,47 @@ def inpaint(
51
  model.eval()
52
  model.to(device)
53
 
54
- pil_image = Image.fromarray(image)
55
- pil_mask = Image.fromarray(mask)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  img_t, mask_t = prepare_img_and_mask(pil_image, pil_mask, device)
57
 
58
  with torch.inference_mode():
@@ -60,6 +96,11 @@ def inpaint(
60
  result = inpainted[0].permute(1, 2, 0).detach().cpu().numpy()
61
  result = np.clip(result * 255, 0, 255).astype(np.uint8)
62
 
 
 
 
 
 
63
  return result
64
 
65
 
 
24
  ) from None
25
 
26
 
27
+ _cached_model = None
28
+ _cached_device = None
 
 
 
29
 
 
 
 
30
 
31
+ def _get_model():
32
+ """Return the cached LAMA model, loading it on first call."""
33
+ global _cached_model, _cached_device
34
+ if _cached_model is not None:
35
+ return _cached_model, _cached_device
36
+
37
+ torch, download_model, LAMA_MODEL_URL, _ = _ensure_lama()
38
 
39
  if torch.backends.mps.is_available():
40
  device = torch.device("mps")
 
48
  model.eval()
49
  model.to(device)
50
 
51
+ _cached_model = model
52
+ _cached_device = device
53
+ return model, device
54
+
55
+
56
+ def inpaint(
57
+ image: np.ndarray,
58
+ mask: np.ndarray,
59
+ max_size: int | None = None,
60
+ ) -> np.ndarray:
61
+ """Inpaint masked regions of an image using LAMA.
62
+
63
+ Args:
64
+ image: RGB numpy array (H, W, 3), uint8.
65
+ mask: Grayscale numpy array (H, W), uint8. 255 = inpaint.
66
+ max_size: If set, downscale the longer edge to this many pixels
67
+ before LAMA inference, then upscale the result back.
68
+ Reduces memory and compute for large images.
69
+
70
+ Returns:
71
+ Inpainted RGB numpy array (H, W, 3), uint8, same size as input.
72
+ """
73
+ _, _, _, prepare_img_and_mask = _ensure_lama()
74
+ import torch
75
+
76
+ model, device = _get_model()
77
+
78
+ orig_h, orig_w = image.shape[:2]
79
+ scaled = False
80
+
81
+ if max_size and max(orig_h, orig_w) > max_size:
82
+ scale = max_size / max(orig_h, orig_w)
83
+ new_w = round(orig_w * scale)
84
+ new_h = round(orig_h * scale)
85
+ pil_image = Image.fromarray(image).resize((new_w, new_h), Image.LANCZOS)
86
+ pil_mask = Image.fromarray(mask).resize((new_w, new_h), Image.NEAREST)
87
+ scaled = True
88
+ else:
89
+ pil_image = Image.fromarray(image)
90
+ pil_mask = Image.fromarray(mask)
91
+
92
  img_t, mask_t = prepare_img_and_mask(pil_image, pil_mask, device)
93
 
94
  with torch.inference_mode():
 
96
  result = inpainted[0].permute(1, 2, 0).detach().cpu().numpy()
97
  result = np.clip(result * 255, 0, 255).astype(np.uint8)
98
 
99
+ if scaled:
100
+ result = np.array(
101
+ Image.fromarray(result).resize((orig_w, orig_h), Image.LANCZOS)
102
+ )
103
+
104
  return result
105
 
106
 
px_image2pptx/ocr.py CHANGED
@@ -27,6 +27,22 @@ def _ensure_paddleocr():
27
  ) from None
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def run_ocr(image_path: str | Path, lang: str = "ch") -> list[dict]:
31
  """Run PaddleOCR on an image and return structured text regions.
32
 
@@ -41,17 +57,7 @@ def run_ocr(image_path: str | Path, lang: str = "ch") -> list[dict]:
41
  - confidence: float
42
  - bbox: {"x1": int, "y1": int, "x2": int, "y2": int}
43
  """
44
- import os
45
- os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
46
-
47
- PaddleOCR = _ensure_paddleocr()
48
-
49
- ocr = PaddleOCR(
50
- lang=lang,
51
- use_textline_orientation=False,
52
- use_doc_orientation_classify=False,
53
- use_doc_unwarping=False,
54
- )
55
  results = list(ocr.predict(str(image_path)))
56
 
57
  regions = []
 
27
  ) from None
28
 
29
 
30
+ _ocr_cache: dict[str, Any] = {}
31
+
32
+
33
+ def _get_ocr(lang: str):
34
+ """Return a cached PaddleOCR instance for the given language."""
35
+ if lang not in _ocr_cache:
36
+ PaddleOCR = _ensure_paddleocr()
37
+ _ocr_cache[lang] = PaddleOCR(
38
+ lang=lang,
39
+ use_textline_orientation=False,
40
+ use_doc_orientation_classify=False,
41
+ use_doc_unwarping=False,
42
+ )
43
+ return _ocr_cache[lang]
44
+
45
+
46
  def run_ocr(image_path: str | Path, lang: str = "ch") -> list[dict]:
47
  """Run PaddleOCR on an image and return structured text regions.
48
 
 
57
  - confidence: float
58
  - bbox: {"x1": int, "y1": int, "x2": int, "y2": int}
59
  """
60
+ ocr = _get_ocr(lang)
 
 
 
 
 
 
 
 
 
 
61
  results = list(ocr.predict(str(image_path)))
62
 
63
  regions = []
px_image2pptx/pipeline.py CHANGED
@@ -29,6 +29,7 @@ def image_to_pptx(
29
  min_font: int = 8,
30
  max_font: int = 72,
31
  skip_inpaint: bool = False,
 
32
  work_dir: str | Path | None = None,
33
  ) -> dict:
34
  """Convert a static image to an editable PPTX.
@@ -44,6 +45,8 @@ def image_to_pptx(
44
  min_font: Minimum font size in points.
45
  max_font: Maximum font size in points.
46
  skip_inpaint: If True, skip inpainting (use original as background).
 
 
47
  work_dir: Directory for intermediate files (default: temp dir).
48
 
49
  Returns:
@@ -96,7 +99,7 @@ def image_to_pptx(
96
  from px_image2pptx.inpaint import inpaint
97
 
98
  image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
99
- result = inpaint(image_rgb, dilated_mask)
100
 
101
  if save_intermediates:
102
  bg_path = str(wdir / "background.png")
 
29
  min_font: int = 8,
30
  max_font: int = 72,
31
  skip_inpaint: bool = False,
32
+ max_inpaint_size: int | None = None,
33
  work_dir: str | Path | None = None,
34
  ) -> dict:
35
  """Convert a static image to an editable PPTX.
 
45
  min_font: Minimum font size in points.
46
  max_font: Maximum font size in points.
47
  skip_inpaint: If True, skip inpainting (use original as background).
48
+ max_inpaint_size: If set, downscale the longer edge to this many
49
+ pixels before LAMA inpainting. Reduces time for large images.
50
  work_dir: Directory for intermediate files (default: temp dir).
51
 
52
  Returns:
 
99
  from px_image2pptx.inpaint import inpaint
100
 
101
  image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
102
+ result = inpaint(image_rgb, dilated_mask, max_size=max_inpaint_size)
103
 
104
  if save_intermediates:
105
  bg_path = str(wdir / "background.png")
requirements.txt CHANGED
@@ -3,7 +3,7 @@ numpy>=1.24
3
  opencv-python-headless>=4.8
4
  python-pptx>=0.6.21
5
  paddleocr>=3.0
6
- paddlepaddle>=3.0
7
  simple-lama-inpainting>=0.1.0
8
  --extra-index-url https://download.pytorch.org/whl/cpu
9
  torch>=2.0
 
3
  opencv-python-headless>=4.8
4
  python-pptx>=0.6.21
5
  paddleocr>=3.0
6
+ paddlepaddle>=3.0,!=3.3.0
7
  simple-lama-inpainting>=0.1.0
8
  --extra-index-url https://download.pytorch.org/whl/cpu
9
  torch>=2.0