| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import os |
| import yaml |
| import json |
| import copy |
| import shutil |
| import paddle |
| import paddle.nn as nn |
| from paddle.jit import to_static |
|
|
| from collections import OrderedDict |
| from packaging import version |
| from argparse import ArgumentParser, RawDescriptionHelpFormatter |
| from ppocr.modeling.architectures import build_model |
| from ppocr.postprocess import build_post_process |
| from ppocr.utils.save_load import load_model |
| from ppocr.utils.logging import get_logger |
|
|
|
|
| def represent_dictionary_order(self, dict_data): |
| return self.represent_mapping("tag:yaml.org,2002:map", dict_data.items()) |
|
|
|
|
| def setup_orderdict(): |
| yaml.add_representer(OrderedDict, represent_dictionary_order) |
|
|
|
|
| def dump_infer_config(config, path, logger): |
| setup_orderdict() |
| infer_cfg = OrderedDict() |
| if not os.path.exists(os.path.dirname(path)): |
| os.makedirs(os.path.dirname(path)) |
| model_name = None |
| if config["Global"].get("model_name", None): |
| model_name = config["Global"]["model_name"] |
| infer_cfg["Global"] = {"model_name": model_name} |
| if config["Global"].get("uniform_output_enabled", True): |
| arch_config = config["Architecture"] |
| if arch_config["algorithm"] in ["SVTR_LCNet", "SVTR_HGNet"]: |
| common_dynamic_shapes = { |
| "x": [[1, 3, 48, 160], [1, 3, 48, 320], [8, 3, 48, 3200]] |
| } |
| elif arch_config["model_type"] == "det": |
| common_dynamic_shapes = { |
| "x": [[1, 3, 32, 32], [1, 3, 736, 736], [1, 3, 4000, 4000]] |
| } |
| elif arch_config["algorithm"] == "SLANet": |
| if model_name == "SLANet_plus": |
| common_dynamic_shapes = { |
| "x": [[1, 3, 32, 32], [1, 3, 64, 448], [1, 3, 488, 488]] |
| } |
| else: |
| common_dynamic_shapes = { |
| "x": [[1, 3, 32, 32], [1, 3, 64, 448], [8, 3, 488, 488]] |
| } |
| elif arch_config["algorithm"] == "SLANeXt": |
| common_dynamic_shapes = { |
| "x": [[1, 3, 512, 512], [1, 3, 512, 512], [1, 3, 512, 512]] |
| } |
| elif arch_config["algorithm"] == "LaTeXOCR": |
| common_dynamic_shapes = { |
| "x": [[1, 1, 32, 32], [1, 1, 64, 448], [1, 1, 192, 672]] |
| } |
| elif arch_config["algorithm"] == "UniMERNet": |
| common_dynamic_shapes = { |
| "x": [[1, 1, 192, 672], [1, 1, 192, 672], [8, 1, 192, 672]] |
| } |
| elif arch_config["algorithm"] in ["PP-FormulaNet-L", "PP-FormulaNet_plus-L"]: |
| common_dynamic_shapes = { |
| "x": [[1, 1, 768, 768], [1, 1, 768, 768], [8, 1, 768, 768]] |
| } |
| elif arch_config["algorithm"] in [ |
| "PP-FormulaNet-S", |
| "PP-FormulaNet_plus-S", |
| "PP-FormulaNet_plus-M", |
| ]: |
| common_dynamic_shapes = { |
| "x": [[1, 1, 384, 384], [1, 1, 384, 384], [8, 1, 384, 384]] |
| } |
| else: |
| common_dynamic_shapes = None |
|
|
| backend_keys = ["paddle_infer", "tensorrt"] |
| hpi_config = { |
| "backend_configs": { |
| key: { |
| ( |
| "dynamic_shapes" if key == "tensorrt" else "trt_dynamic_shapes" |
| ): common_dynamic_shapes |
| } |
| for key in backend_keys |
| } |
| } |
| if common_dynamic_shapes: |
| infer_cfg["Hpi"] = hpi_config |
|
|
| infer_cfg["PreProcess"] = {"transform_ops": config["Eval"]["dataset"]["transforms"]} |
| postprocess = OrderedDict() |
| for k, v in config["PostProcess"].items(): |
| if config["Architecture"].get("algorithm") in [ |
| "LaTeXOCR", |
| "UniMERNet", |
| "PP-FormulaNet-L", |
| "PP-FormulaNet-S", |
| "PP-FormulaNet_plus-L", |
| "PP-FormulaNet_plus-M", |
| "PP-FormulaNet_plus-S", |
| ]: |
| if k != "rec_char_dict_path": |
| postprocess[k] = v |
| else: |
| postprocess[k] = v |
|
|
| if config["Architecture"].get("algorithm") in ["LaTeXOCR"]: |
| tokenizer_file = config["Global"].get("rec_char_dict_path") |
| if tokenizer_file is not None: |
| with open(tokenizer_file, encoding="utf-8") as tokenizer_config_handle: |
| character_dict = json.load(tokenizer_config_handle) |
| postprocess["character_dict"] = character_dict |
| elif config["Architecture"].get("algorithm") in [ |
| "UniMERNet", |
| "PP-FormulaNet-L", |
| "PP-FormulaNet-S", |
| "PP-FormulaNet_plus-L", |
| "PP-FormulaNet_plus-M", |
| "PP-FormulaNet_plus-S", |
| ]: |
| tokenizer_file = config["Global"].get("rec_char_dict_path") |
| fast_tokenizer_file = os.path.join(tokenizer_file, "tokenizer.json") |
| tokenizer_config_file = os.path.join(tokenizer_file, "tokenizer_config.json") |
| postprocess["character_dict"] = {} |
| if fast_tokenizer_file is not None: |
| with open(fast_tokenizer_file, encoding="utf-8") as tokenizer_config_handle: |
| character_dict = json.load(tokenizer_config_handle) |
| postprocess["character_dict"]["fast_tokenizer_file"] = character_dict |
| if tokenizer_config_file is not None: |
| with open( |
| tokenizer_config_file, encoding="utf-8" |
| ) as tokenizer_config_handle: |
| character_dict = json.load(tokenizer_config_handle) |
| postprocess["character_dict"]["tokenizer_config_file"] = character_dict |
| else: |
| if config["Global"].get("character_dict_path") is not None: |
| with open(config["Global"]["character_dict_path"], encoding="utf-8") as f: |
| lines = f.readlines() |
| character_dict = [line.strip("\n") for line in lines] |
| postprocess["character_dict"] = character_dict |
|
|
| infer_cfg["PostProcess"] = postprocess |
|
|
| with open(path, "w", encoding="utf-8") as f: |
| yaml.dump(infer_cfg, f, default_flow_style=False, allow_unicode=True) |
| logger.info("Export inference config file to {}".format(os.path.join(path))) |
|
|
|
|
| def dynamic_to_static(model, arch_config, logger, input_shape=None): |
| if arch_config["algorithm"] == "SRN": |
| max_text_length = arch_config["Head"]["max_text_length"] |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 1, 64, 256], dtype="float32"), |
| [ |
| paddle.static.InputSpec(shape=[None, 256, 1], dtype="int64"), |
| paddle.static.InputSpec( |
| shape=[None, max_text_length, 1], dtype="int64" |
| ), |
| paddle.static.InputSpec( |
| shape=[None, 8, max_text_length, max_text_length], dtype="int64" |
| ), |
| paddle.static.InputSpec( |
| shape=[None, 8, max_text_length, max_text_length], dtype="int64" |
| ), |
| ], |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] == "SAR": |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 3, 48, 160], dtype="float32"), |
| [paddle.static.InputSpec(shape=[None], dtype="float32")], |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] in ["SVTR_LCNet", "SVTR_HGNet"]: |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 3, 48, -1], dtype="float32"), |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] in ["SVTR", "CPPD"]: |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None] + input_shape, dtype="float32"), |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] == "PREN": |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 3, 64, 256], dtype="float32"), |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["model_type"] == "sr": |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 3, 16, 64], dtype="float32") |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] == "ViTSTR": |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 1, 224, 224], dtype="float32"), |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] == "ABINet": |
| if not input_shape: |
| input_shape = [3, 32, 128] |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None] + input_shape, dtype="float32"), |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] in ["NRTR", "SPIN", "RFL"]: |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 1, 32, 100], dtype="float32"), |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] in ["SATRN"]: |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 3, 32, 100], dtype="float32"), |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] == "VisionLAN": |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 3, 64, 256], dtype="float32"), |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] == "RobustScanner": |
| max_text_length = arch_config["Head"]["max_text_length"] |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 3, 48, 160], dtype="float32"), |
| [ |
| paddle.static.InputSpec( |
| shape=[ |
| None, |
| ], |
| dtype="float32", |
| ), |
| paddle.static.InputSpec(shape=[None, max_text_length], dtype="int64"), |
| ], |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] == "CAN": |
| other_shape = [ |
| [ |
| paddle.static.InputSpec(shape=[None, 1, None, None], dtype="float32"), |
| paddle.static.InputSpec(shape=[None, 1, None, None], dtype="float32"), |
| paddle.static.InputSpec( |
| shape=[None, arch_config["Head"]["max_text_length"]], dtype="int64" |
| ), |
| ] |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] == "LaTeXOCR": |
| other_shape = [ |
| paddle.static.InputSpec(shape=[None, 1, None, None], dtype="float32"), |
| ] |
| model = to_static(model, input_spec=other_shape) |
| elif arch_config["algorithm"] == "UniMERNet": |
| model = paddle.jit.to_static( |
| model, |
| input_spec=[ |
| paddle.static.InputSpec(shape=[-1, 1, 192, 672], dtype="float32") |
| ], |
| full_graph=True, |
| ) |
| elif arch_config["algorithm"] == "SLANeXt": |
| model = paddle.jit.to_static( |
| model, |
| input_spec=[ |
| paddle.static.InputSpec(shape=[-1, 3, 512, 512], dtype="float32") |
| ], |
| full_graph=True, |
| ) |
| elif arch_config["algorithm"] in ["PP-FormulaNet-L", "PP-FormulaNet_plus-L"]: |
| model = paddle.jit.to_static( |
| model, |
| input_spec=[ |
| paddle.static.InputSpec(shape=[-1, 1, 768, 768], dtype="float32") |
| ], |
| full_graph=True, |
| ) |
| elif arch_config["algorithm"] in [ |
| "PP-FormulaNet-S", |
| "PP-FormulaNet_plus-S", |
| "PP-FormulaNet_plus-M", |
| ]: |
| model = paddle.jit.to_static( |
| model, |
| input_spec=[ |
| paddle.static.InputSpec(shape=[-1, 1, 384, 384], dtype="float32") |
| ], |
| full_graph=True, |
| ) |
|
|
| elif arch_config["algorithm"] in ["LayoutLM", "LayoutLMv2", "LayoutXLM"]: |
| input_spec = [ |
| paddle.static.InputSpec(shape=[None, 512], dtype="int64"), |
| paddle.static.InputSpec(shape=[None, 512, 4], dtype="int64"), |
| paddle.static.InputSpec(shape=[None, 512], dtype="int64"), |
| paddle.static.InputSpec(shape=[None, 512], dtype="int64"), |
| paddle.static.InputSpec(shape=[None, 3, 224, 224], dtype="int64"), |
| ] |
| if "Re" in arch_config["Backbone"]["name"]: |
| input_spec.extend( |
| [ |
| paddle.static.InputSpec( |
| shape=[None, 512, 3], dtype="int64" |
| ), |
| paddle.static.InputSpec( |
| shape=[None, None, 2], dtype="int64" |
| ), |
| ] |
| ) |
| if model.backbone.use_visual_backbone is False: |
| input_spec.pop(4) |
| model = to_static(model, input_spec=[input_spec]) |
| else: |
| infer_shape = [3, -1, -1] |
| if arch_config["model_type"] == "rec": |
| infer_shape = [3, 32, -1] |
| if ( |
| "Transform" in arch_config |
| and arch_config["Transform"] is not None |
| and arch_config["Transform"]["name"] == "TPS" |
| ): |
| logger.info( |
| "When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training" |
| ) |
| infer_shape[-1] = 100 |
| elif arch_config["model_type"] == "table": |
| infer_shape = [3, 488, 488] |
| if arch_config["algorithm"] == "TableMaster": |
| infer_shape = [3, 480, 480] |
| if arch_config["algorithm"] == "SLANet": |
| infer_shape = [3, -1, -1] |
| model = to_static( |
| model, |
| input_spec=[ |
| paddle.static.InputSpec(shape=[None] + infer_shape, dtype="float32") |
| ], |
| ) |
|
|
| if ( |
| arch_config["model_type"] != "sr" |
| and arch_config["Backbone"]["name"] == "PPLCNetV3" |
| ): |
| |
| for layer in model.sublayers(): |
| if hasattr(layer, "rep") and not getattr(layer, "is_repped"): |
| layer.rep() |
| return model |
|
|
|
|
| def export_single_model( |
| model, |
| arch_config, |
| save_path, |
| logger, |
| yaml_path, |
| config, |
| input_shape=None, |
| quanter=None, |
| ): |
|
|
| model = dynamic_to_static(model, arch_config, logger, input_shape) |
|
|
| if quanter is None: |
| try: |
| import encryption |
| except ( |
| ModuleNotFoundError |
| ): |
| print("Skipping import of the encryption module") |
| paddle_version = version.parse(paddle.__version__) |
| if config["Global"].get("export_with_pir", True): |
| assert ( |
| paddle_version >= version.parse("3.0.0b2") |
| or paddle_version == version.parse("0.0.0") |
| ) and os.environ.get("FLAGS_enable_pir_api", None) not in ["0", "False"] |
| paddle.jit.save(model, save_path) |
| else: |
| if paddle_version >= version.parse( |
| "3.0.0b2" |
| ) or paddle_version == version.parse("0.0.0"): |
| model.forward.rollback() |
| with paddle.pir_utils.OldIrGuard(): |
| model = dynamic_to_static(model, arch_config, logger, input_shape) |
| paddle.jit.save(model, save_path) |
| else: |
| paddle.jit.save(model, save_path) |
| else: |
| quanter.save_quantized_model(model, save_path) |
| logger.info("inference model is saved to {}".format(save_path)) |
| return |
|
|
|
|
| def convert_bn(model): |
| for n, m in model.named_children(): |
| if isinstance(m, nn.SyncBatchNorm): |
| bn = nn.BatchNorm2D( |
| m._num_features, m._momentum, m._epsilon, m._weight_attr, m._bias_attr |
| ) |
| bn.set_dict(m.state_dict()) |
| setattr(model, n, bn) |
| else: |
| convert_bn(m) |
|
|
|
|
| def export(config, base_model=None, save_path=None): |
| if paddle.distributed.get_rank() != 0: |
| return |
| logger = get_logger() |
| |
| post_process_class = build_post_process(config["PostProcess"], config["Global"]) |
|
|
| |
| |
| if hasattr(post_process_class, "character"): |
| char_num = len(getattr(post_process_class, "character")) |
| if config["Architecture"]["algorithm"] in [ |
| "Distillation", |
| ]: |
| for key in config["Architecture"]["Models"]: |
| if ( |
| config["Architecture"]["Models"][key]["Head"]["name"] == "MultiHead" |
| ): |
| out_channels_list = {} |
| if config["PostProcess"]["name"] == "DistillationSARLabelDecode": |
| char_num = char_num - 2 |
| if config["PostProcess"]["name"] == "DistillationNRTRLabelDecode": |
| char_num = char_num - 3 |
| out_channels_list["CTCLabelDecode"] = char_num |
| out_channels_list["SARLabelDecode"] = char_num + 2 |
| out_channels_list["NRTRLabelDecode"] = char_num + 3 |
| config["Architecture"]["Models"][key]["Head"][ |
| "out_channels_list" |
| ] = out_channels_list |
| else: |
| config["Architecture"]["Models"][key]["Head"][ |
| "out_channels" |
| ] = char_num |
| |
| config["Architecture"]["Models"][key]["return_all_feats"] = False |
| elif config["Architecture"]["Head"]["name"] == "MultiHead": |
| out_channels_list = {} |
| char_num = len(getattr(post_process_class, "character")) |
| if config["PostProcess"]["name"] == "SARLabelDecode": |
| char_num = char_num - 2 |
| if config["PostProcess"]["name"] == "NRTRLabelDecode": |
| char_num = char_num - 3 |
| out_channels_list["CTCLabelDecode"] = char_num |
| out_channels_list["SARLabelDecode"] = char_num + 2 |
| out_channels_list["NRTRLabelDecode"] = char_num + 3 |
| config["Architecture"]["Head"]["out_channels_list"] = out_channels_list |
| else: |
| config["Architecture"]["Head"]["out_channels"] = char_num |
|
|
| |
| if config["Architecture"]["model_type"] == "sr": |
| config["Architecture"]["Transform"]["infer_mode"] = True |
|
|
| |
| if config["Architecture"].get("algorithm") in ["LaTeXOCR"]: |
| config["Architecture"]["Backbone"]["is_predict"] = True |
| config["Architecture"]["Backbone"]["is_export"] = True |
| config["Architecture"]["Head"]["is_export"] = True |
| if config["Architecture"].get("algorithm") in ["UniMERNet"]: |
| config["Architecture"]["Backbone"]["is_export"] = True |
| config["Architecture"]["Head"]["is_export"] = True |
| if config["Architecture"].get("algorithm") in [ |
| "PP-FormulaNet-S", |
| "PP-FormulaNet-L", |
| "PP-FormulaNet_plus-S", |
| "PP-FormulaNet_plus-M", |
| "PP-FormulaNet_plus-L", |
| ]: |
| config["Architecture"]["Head"]["is_export"] = True |
| if base_model is not None: |
| model = base_model |
| if isinstance(model, paddle.DataParallel): |
| model = copy.deepcopy(model._layers) |
| else: |
| model = copy.deepcopy(model) |
| else: |
| model = build_model(config["Architecture"]) |
| load_model(config, model, model_type=config["Architecture"]["model_type"]) |
| convert_bn(model) |
| model.eval() |
|
|
| if not save_path: |
| save_path = config["Global"]["save_inference_dir"] |
| yaml_path = os.path.join(save_path, "inference.yml") |
|
|
| arch_config = config["Architecture"] |
|
|
| if ( |
| arch_config["algorithm"] in ["SVTR", "CPPD"] |
| and arch_config["Head"]["name"] != "MultiHead" |
| ): |
| input_shape = config["Eval"]["dataset"]["transforms"][-2]["SVTRRecResizeImg"][ |
| "image_shape" |
| ] |
| elif arch_config["algorithm"].lower() == "ABINet".lower(): |
| rec_rs = [ |
| c |
| for c in config["Eval"]["dataset"]["transforms"] |
| if "ABINetRecResizeImg" in c |
| ] |
| input_shape = rec_rs[0]["ABINetRecResizeImg"]["image_shape"] if rec_rs else None |
| else: |
| input_shape = None |
| dump_infer_config(config, yaml_path, logger) |
| if arch_config["algorithm"] in [ |
| "Distillation", |
| ]: |
| archs = list(arch_config["Models"].values()) |
| for idx, name in enumerate(model.model_name_list): |
| sub_model_save_path = os.path.join(save_path, name, "inference") |
| export_single_model( |
| model.model_list[idx], |
| archs[idx], |
| sub_model_save_path, |
| logger, |
| yaml_path, |
| config, |
| ) |
| else: |
| save_path = os.path.join(save_path, "inference") |
| export_single_model( |
| model, |
| arch_config, |
| save_path, |
| logger, |
| yaml_path, |
| config, |
| input_shape=input_shape, |
| ) |
|
|