Source code for layoutparser.models.detectron2.layoutmodel

# Copyright 2021 The Layout Parser team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union
from PIL import Image
import numpy as np
import warnings

from .catalog import MODEL_CATALOG, PathManager, LABEL_MAP_CATALOG
from ..base_layoutmodel import BaseLayoutModel
from ...elements import Rectangle, TextBlock, Layout
from ...file_utils import is_torch_cuda_available, is_detectron2_available

if is_detectron2_available():
    import detectron2.engine
    import detectron2.config


__all__ = ["Detectron2LayoutModel"]


[docs]class Detectron2LayoutModel(BaseLayoutModel): """Create a Detectron2-based Layout Detection Model Args: config_path (:obj:`str`): The path to the configuration file. model_path (:obj:`str`, None): The path to the saved weights of the model. If set, overwrite the weights in the configuration file. Defaults to `None`. label_map (:obj:`dict`, optional): The map from the model prediction (ids) to real word labels (strings). If the config is from one of the supported datasets, Layout Parser will automatically initialize the label_map. Defaults to `None`. device(:obj:`str`, optional): Whether to use cuda or cpu devices. If not set, LayoutParser will automatically determine the device to initialize the models on. extra_config (:obj:`list`, optional): Extra configuration passed to the Detectron2 model configuration. The argument will be used in the `merge_from_list <https://detectron2.readthedocs.io/modules/config.html #detectron2.config.CfgNode.merge_from_list>`_ function. Defaults to `[]`. Examples:: >>> import layoutparser as lp >>> model = lp.Detectron2LayoutModel('lp://HJDataset/faster_rcnn_R_50_FPN_3x/config') >>> model.detect(image) """ DEPENDENCIES = ["detectron2"] DETECTOR_NAME = "detectron2" MODEL_CATALOG = MODEL_CATALOG def __init__( self, config_path, model_path=None, label_map=None, extra_config=None, enforce_cpu=None, device=None, ): if enforce_cpu is not None: warnings.warn( "Setting enforce_cpu is deprecated. Please set `device` instead.", DeprecationWarning, ) if extra_config is None: extra_config = [] config_path, model_path = self.config_parser( config_path, model_path, allow_empty_path=True ) config_path = PathManager.get_local_path(config_path) if label_map is None: if config_path.startswith("lp://"): dataset_name = config_path.lstrip("lp://").split("/")[1] label_map = LABEL_MAP_CATALOG[dataset_name] else: label_map = {} cfg = detectron2.config.get_cfg() cfg.merge_from_file(config_path) cfg.merge_from_list(extra_config) if model_path is not None: model_path = PathManager.get_local_path(model_path) # Because it will be forwarded to the detectron2 paths cfg.MODEL.WEIGHTS = model_path if is_torch_cuda_available(): if device is None: device = "cuda" else: device = "cpu" cfg.MODEL.DEVICE = device self.cfg = cfg self.label_map = label_map self._create_model() def _create_model(self): self.model = detectron2.engine.DefaultPredictor(self.cfg)
[docs] def gather_output(self, outputs): instance_pred = outputs["instances"].to("cpu") layout = Layout() scores = instance_pred.scores.tolist() boxes = instance_pred.pred_boxes.tensor.tolist() labels = instance_pred.pred_classes.tolist() for score, box, label in zip(scores, boxes, labels): x_1, y_1, x_2, y_2 = box label = self.label_map.get(label, label) cur_block = TextBlock( Rectangle(x_1, y_1, x_2, y_2), type=label, score=score ) layout.append(cur_block) return layout
[docs] def detect(self, image): """Detect the layout of a given image. Args: image (:obj:`np.ndarray` or `PIL.Image`): The input image to detect. Returns: :obj:`~layoutparser.Layout`: The detected layout of the input image """ image = self.image_loader(image) outputs = self.model(image) layout = self.gather_output(outputs) return layout
[docs] def image_loader(self, image: Union["np.ndarray", "Image.Image"]): # Convert PIL Image Input if isinstance(image, Image.Image): if image.mode != "RGB": image = image.convert("RGB") image = np.array(image) return image