| | import torch |
| | import detectron2.data.transforms as T |
| | from detectron2.checkpoint import DetectionCheckpointer |
| | from detectron2.data import ( |
| | MetadataCatalog, |
| | ) |
| | from detectron2.modeling import build_model |
| |
|
| |
|
| | __all__ = [ |
| | "DefaultPredictor", |
| | ] |
| |
|
| |
|
| | class DefaultPredictor: |
| | """ |
| | Create a simple end-to-end predictor with the given config that runs on |
| | single device for a single input image. |
| | Compared to using the model directly, this class does the following additions: |
| | 1. Load checkpoint from `cfg.MODEL.WEIGHTS`. |
| | 2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`. |
| | 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`. |
| | 4. Take one input image and produce a single output, instead of a batch. |
| | This is meant for simple demo purposes, so it does the above steps automatically. |
| | This is not meant for benchmarks or running complicated inference logic. |
| | If you'd like to do anything more complicated, please refer to its source code as |
| | examples to build and use the model manually. |
| | Attributes: |
| | metadata (Metadata): the metadata of the underlying dataset, obtained from |
| | cfg.DATASETS.TEST. |
| | Examples: |
| | :: |
| | pred = DefaultPredictor(cfg) |
| | inputs = cv2.imread("input.jpg") |
| | outputs = pred(inputs) |
| | """ |
| |
|
| | def __init__(self, cfg): |
| | self.cfg = cfg.clone() |
| | self.model = build_model(self.cfg) |
| | self.model.eval() |
| | if len(cfg.DATASETS.TEST): |
| | self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) |
| |
|
| | checkpointer = DetectionCheckpointer(self.model) |
| | checkpointer.load(cfg.MODEL.WEIGHTS) |
| |
|
| | self.aug = T.ResizeShortestEdge( |
| | [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST |
| | ) |
| |
|
| | self.input_format = cfg.INPUT.FORMAT |
| | assert self.input_format in ["RGB", "BGR"], self.input_format |
| |
|
| | def __call__(self, original_image, task): |
| | """ |
| | Args: |
| | original_image (np.ndarray): an image of shape (H, W, C) (in BGR order). |
| | Returns: |
| | predictions (dict): |
| | the output of the model for one image only. |
| | See :doc:`/tutorials/models` for details about the format. |
| | """ |
| | with torch.no_grad(): |
| | |
| | if self.input_format == "RGB": |
| | |
| | original_image = original_image[:, :, ::-1] |
| | height, width = original_image.shape[:2] |
| | image = self.aug.get_transform(original_image).apply_image(original_image) |
| | image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) |
| | |
| | task = f"The task is {task}" |
| |
|
| | inputs = {"image": image, "height": height, "width": width, "task": task} |
| | predictions = self.model([inputs])[0] |
| | return predictions |