Spaces:
Running
Running
mm commited on
Commit ·
6e513f7
1
Parent(s): 39f456e
Fix PaddlePaddle 3.3.0 PIR crash, cache models, add --max-inpaint-size
Browse files- Exclude paddlepaddle 3.3.0 (PIR regression, Paddle#77340)
- Disable PIR API as safety net
- Cache PaddleOCR and LAMA models in memory across calls
- Add max_inpaint_size option to downscale large images before LAMA
- app.py +1 -0
- px_image2pptx/cli.py +4 -0
- px_image2pptx/inpaint.py +55 -14
- px_image2pptx/ocr.py +17 -11
- px_image2pptx/pipeline.py +4 -1
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import tempfile
|
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
|
|
|
|
| 8 |
|
| 9 |
import gradio as gr
|
| 10 |
from PIL import Image
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
|
| 8 |
+
os.environ["FLAGS_enable_pir_api"] = "0"
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
from PIL import Image
|
px_image2pptx/cli.py
CHANGED
|
@@ -56,6 +56,9 @@ examples:
|
|
| 56 |
help="Maximum font size in points (default: 72)")
|
| 57 |
parser.add_argument("--skip-inpaint", action="store_true",
|
| 58 |
help="Skip LAMA inpainting (use original or solid bg)")
|
|
|
|
|
|
|
|
|
|
| 59 |
parser.add_argument("--work-dir", default=None,
|
| 60 |
help="Directory for intermediate files")
|
| 61 |
return parser.parse_args(argv)
|
|
@@ -77,6 +80,7 @@ def main(argv=None):
|
|
| 77 |
min_font=args.min_font,
|
| 78 |
max_font=args.max_font,
|
| 79 |
skip_inpaint=args.skip_inpaint,
|
|
|
|
| 80 |
work_dir=args.work_dir,
|
| 81 |
)
|
| 82 |
elapsed = time.time() - t0
|
|
|
|
| 56 |
help="Maximum font size in points (default: 72)")
|
| 57 |
parser.add_argument("--skip-inpaint", action="store_true",
|
| 58 |
help="Skip LAMA inpainting (use original or solid bg)")
|
| 59 |
+
parser.add_argument("--max-inpaint-size", type=int, default=None,
|
| 60 |
+
help="Downscale longer edge to N px before inpainting "
|
| 61 |
+
"(e.g. 2048). Reduces time for large images.")
|
| 62 |
parser.add_argument("--work-dir", default=None,
|
| 63 |
help="Directory for intermediate files")
|
| 64 |
return parser.parse_args(argv)
|
|
|
|
| 80 |
min_font=args.min_font,
|
| 81 |
max_font=args.max_font,
|
| 82 |
skip_inpaint=args.skip_inpaint,
|
| 83 |
+
max_inpaint_size=args.max_inpaint_size,
|
| 84 |
work_dir=args.work_dir,
|
| 85 |
)
|
| 86 |
elapsed = time.time() - t0
|
px_image2pptx/inpaint.py
CHANGED
|
@@ -24,20 +24,17 @@ def _ensure_lama():
|
|
| 24 |
) from None
|
| 25 |
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
mask: np.ndarray,
|
| 30 |
-
) -> np.ndarray:
|
| 31 |
-
"""Inpaint masked regions of an image using LAMA.
|
| 32 |
|
| 33 |
-
Args:
|
| 34 |
-
image: RGB numpy array (H, W, 3), uint8.
|
| 35 |
-
mask: Grayscale numpy array (H, W), uint8. 255 = inpaint.
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
if torch.backends.mps.is_available():
|
| 43 |
device = torch.device("mps")
|
|
@@ -51,8 +48,47 @@ def inpaint(
|
|
| 51 |
model.eval()
|
| 52 |
model.to(device)
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
img_t, mask_t = prepare_img_and_mask(pil_image, pil_mask, device)
|
| 57 |
|
| 58 |
with torch.inference_mode():
|
|
@@ -60,6 +96,11 @@ def inpaint(
|
|
| 60 |
result = inpainted[0].permute(1, 2, 0).detach().cpu().numpy()
|
| 61 |
result = np.clip(result * 255, 0, 255).astype(np.uint8)
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
return result
|
| 64 |
|
| 65 |
|
|
|
|
| 24 |
) from None
|
| 25 |
|
| 26 |
|
| 27 |
+
_cached_model = None
|
| 28 |
+
_cached_device = None
|
|
|
|
|
|
|
|
|
|
| 29 |
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
def _get_model():
|
| 32 |
+
"""Return the cached LAMA model, loading it on first call."""
|
| 33 |
+
global _cached_model, _cached_device
|
| 34 |
+
if _cached_model is not None:
|
| 35 |
+
return _cached_model, _cached_device
|
| 36 |
+
|
| 37 |
+
torch, download_model, LAMA_MODEL_URL, _ = _ensure_lama()
|
| 38 |
|
| 39 |
if torch.backends.mps.is_available():
|
| 40 |
device = torch.device("mps")
|
|
|
|
| 48 |
model.eval()
|
| 49 |
model.to(device)
|
| 50 |
|
| 51 |
+
_cached_model = model
|
| 52 |
+
_cached_device = device
|
| 53 |
+
return model, device
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def inpaint(
|
| 57 |
+
image: np.ndarray,
|
| 58 |
+
mask: np.ndarray,
|
| 59 |
+
max_size: int | None = None,
|
| 60 |
+
) -> np.ndarray:
|
| 61 |
+
"""Inpaint masked regions of an image using LAMA.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
image: RGB numpy array (H, W, 3), uint8.
|
| 65 |
+
mask: Grayscale numpy array (H, W), uint8. 255 = inpaint.
|
| 66 |
+
max_size: If set, downscale the longer edge to this many pixels
|
| 67 |
+
before LAMA inference, then upscale the result back.
|
| 68 |
+
Reduces memory and compute for large images.
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
Inpainted RGB numpy array (H, W, 3), uint8, same size as input.
|
| 72 |
+
"""
|
| 73 |
+
_, _, _, prepare_img_and_mask = _ensure_lama()
|
| 74 |
+
import torch
|
| 75 |
+
|
| 76 |
+
model, device = _get_model()
|
| 77 |
+
|
| 78 |
+
orig_h, orig_w = image.shape[:2]
|
| 79 |
+
scaled = False
|
| 80 |
+
|
| 81 |
+
if max_size and max(orig_h, orig_w) > max_size:
|
| 82 |
+
scale = max_size / max(orig_h, orig_w)
|
| 83 |
+
new_w = round(orig_w * scale)
|
| 84 |
+
new_h = round(orig_h * scale)
|
| 85 |
+
pil_image = Image.fromarray(image).resize((new_w, new_h), Image.LANCZOS)
|
| 86 |
+
pil_mask = Image.fromarray(mask).resize((new_w, new_h), Image.NEAREST)
|
| 87 |
+
scaled = True
|
| 88 |
+
else:
|
| 89 |
+
pil_image = Image.fromarray(image)
|
| 90 |
+
pil_mask = Image.fromarray(mask)
|
| 91 |
+
|
| 92 |
img_t, mask_t = prepare_img_and_mask(pil_image, pil_mask, device)
|
| 93 |
|
| 94 |
with torch.inference_mode():
|
|
|
|
| 96 |
result = inpainted[0].permute(1, 2, 0).detach().cpu().numpy()
|
| 97 |
result = np.clip(result * 255, 0, 255).astype(np.uint8)
|
| 98 |
|
| 99 |
+
if scaled:
|
| 100 |
+
result = np.array(
|
| 101 |
+
Image.fromarray(result).resize((orig_w, orig_h), Image.LANCZOS)
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
return result
|
| 105 |
|
| 106 |
|
px_image2pptx/ocr.py
CHANGED
|
@@ -27,6 +27,22 @@ def _ensure_paddleocr():
|
|
| 27 |
) from None
|
| 28 |
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
def run_ocr(image_path: str | Path, lang: str = "ch") -> list[dict]:
|
| 31 |
"""Run PaddleOCR on an image and return structured text regions.
|
| 32 |
|
|
@@ -41,17 +57,7 @@ def run_ocr(image_path: str | Path, lang: str = "ch") -> list[dict]:
|
|
| 41 |
- confidence: float
|
| 42 |
- bbox: {"x1": int, "y1": int, "x2": int, "y2": int}
|
| 43 |
"""
|
| 44 |
-
|
| 45 |
-
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
|
| 46 |
-
|
| 47 |
-
PaddleOCR = _ensure_paddleocr()
|
| 48 |
-
|
| 49 |
-
ocr = PaddleOCR(
|
| 50 |
-
lang=lang,
|
| 51 |
-
use_textline_orientation=False,
|
| 52 |
-
use_doc_orientation_classify=False,
|
| 53 |
-
use_doc_unwarping=False,
|
| 54 |
-
)
|
| 55 |
results = list(ocr.predict(str(image_path)))
|
| 56 |
|
| 57 |
regions = []
|
|
|
|
| 27 |
) from None
|
| 28 |
|
| 29 |
|
| 30 |
+
_ocr_cache: dict[str, Any] = {}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _get_ocr(lang: str):
|
| 34 |
+
"""Return a cached PaddleOCR instance for the given language."""
|
| 35 |
+
if lang not in _ocr_cache:
|
| 36 |
+
PaddleOCR = _ensure_paddleocr()
|
| 37 |
+
_ocr_cache[lang] = PaddleOCR(
|
| 38 |
+
lang=lang,
|
| 39 |
+
use_textline_orientation=False,
|
| 40 |
+
use_doc_orientation_classify=False,
|
| 41 |
+
use_doc_unwarping=False,
|
| 42 |
+
)
|
| 43 |
+
return _ocr_cache[lang]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
def run_ocr(image_path: str | Path, lang: str = "ch") -> list[dict]:
|
| 47 |
"""Run PaddleOCR on an image and return structured text regions.
|
| 48 |
|
|
|
|
| 57 |
- confidence: float
|
| 58 |
- bbox: {"x1": int, "y1": int, "x2": int, "y2": int}
|
| 59 |
"""
|
| 60 |
+
ocr = _get_ocr(lang)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
results = list(ocr.predict(str(image_path)))
|
| 62 |
|
| 63 |
regions = []
|
px_image2pptx/pipeline.py
CHANGED
|
@@ -29,6 +29,7 @@ def image_to_pptx(
|
|
| 29 |
min_font: int = 8,
|
| 30 |
max_font: int = 72,
|
| 31 |
skip_inpaint: bool = False,
|
|
|
|
| 32 |
work_dir: str | Path | None = None,
|
| 33 |
) -> dict:
|
| 34 |
"""Convert a static image to an editable PPTX.
|
|
@@ -44,6 +45,8 @@ def image_to_pptx(
|
|
| 44 |
min_font: Minimum font size in points.
|
| 45 |
max_font: Maximum font size in points.
|
| 46 |
skip_inpaint: If True, skip inpainting (use original as background).
|
|
|
|
|
|
|
| 47 |
work_dir: Directory for intermediate files (default: temp dir).
|
| 48 |
|
| 49 |
Returns:
|
|
@@ -96,7 +99,7 @@ def image_to_pptx(
|
|
| 96 |
from px_image2pptx.inpaint import inpaint
|
| 97 |
|
| 98 |
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
|
| 99 |
-
result = inpaint(image_rgb, dilated_mask)
|
| 100 |
|
| 101 |
if save_intermediates:
|
| 102 |
bg_path = str(wdir / "background.png")
|
|
|
|
| 29 |
min_font: int = 8,
|
| 30 |
max_font: int = 72,
|
| 31 |
skip_inpaint: bool = False,
|
| 32 |
+
max_inpaint_size: int | None = None,
|
| 33 |
work_dir: str | Path | None = None,
|
| 34 |
) -> dict:
|
| 35 |
"""Convert a static image to an editable PPTX.
|
|
|
|
| 45 |
min_font: Minimum font size in points.
|
| 46 |
max_font: Maximum font size in points.
|
| 47 |
skip_inpaint: If True, skip inpainting (use original as background).
|
| 48 |
+
max_inpaint_size: If set, downscale the longer edge to this many
|
| 49 |
+
pixels before LAMA inpainting. Reduces time for large images.
|
| 50 |
work_dir: Directory for intermediate files (default: temp dir).
|
| 51 |
|
| 52 |
Returns:
|
|
|
|
| 99 |
from px_image2pptx.inpaint import inpaint
|
| 100 |
|
| 101 |
image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
|
| 102 |
+
result = inpaint(image_rgb, dilated_mask, max_size=max_inpaint_size)
|
| 103 |
|
| 104 |
if save_intermediates:
|
| 105 |
bg_path = str(wdir / "background.png")
|
requirements.txt
CHANGED
|
@@ -3,7 +3,7 @@ numpy>=1.24
|
|
| 3 |
opencv-python-headless>=4.8
|
| 4 |
python-pptx>=0.6.21
|
| 5 |
paddleocr>=3.0
|
| 6 |
-
paddlepaddle>=3.0
|
| 7 |
simple-lama-inpainting>=0.1.0
|
| 8 |
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 9 |
torch>=2.0
|
|
|
|
| 3 |
opencv-python-headless>=4.8
|
| 4 |
python-pptx>=0.6.21
|
| 5 |
paddleocr>=3.0
|
| 6 |
+
paddlepaddle>=3.0,!=3.3.0
|
| 7 |
simple-lama-inpainting>=0.1.0
|
| 8 |
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 9 |
torch>=2.0
|