Source code for ezflow.models.predictor

import torch
from torchvision import io
from torchvision.transforms import Normalize

from ..utils import InputPadder
from .build import build_model


[docs]class Predictor: """ A class that uses an instance of an optical flow estimation model to predict flow between two images Parameters ---------- model_name : str The name of the optical flow estimation model to use mean : tuple of float, Sequence of mean for normalizing each image channel std : tuple of float, Sequence of standard deviations for normalizing each image channel model_cfg_path : str, optional The path to the config file for the optical flow estimation model, by default None in which case the default config is used model_cfg : CfgNode object, optional The config object for the optical flow estimation model, by default None model_weights_path : str, optional The path to the weights file for the optical flow estimation model custom_cfg_file : bool, optional Whether the config file is a custom config file or one one of the configs included in EzFlow, by default False default : bool, optional Whether to use the default config for the model data_transform : torchvision.transforms object, optional The data transform to apply to the images before passing them to the model, by default None device : str, optional The device to use for the model, by default "cpu" flow_scale : float, optional The scale to apply to the predicted flow, by default 1.0 pad_divisor : int, optional The divisor to make the image dimensions evenly divisible by using padding, by default 1 """ def __init__( self, model_name, mean, std, model_cfg_path=None, model_cfg=None, model_weights_path=None, custom_cfg_file=False, default=False, device="cpu", data_transform=None, flow_scale=1.0, pad_divisor=1, ): self.flow_scale = flow_scale self.pad_divisor = pad_divisor if model_cfg_path is not None: self.model = build_model( model_name, cfg_path=model_cfg_path, custom_cfg=custom_cfg_file, default=default, weights_path=model_weights_path, ) elif default: self.model = build_model( model_name, default=True, weights_path=model_weights_path ) else: assert ( model_cfg is not None ), "Must provide either a path to a config file or a config object" self.model = build_model( model_name, cfg=model_cfg, weights_path=model_weights_path ) self.model = self.model.eval() self.norm = Normalize(mean=mean, std=std) self.data_transform = data_transform self.device = torch.device(device) def __call__(self, img1, img2): """ Runs the prediction on the two images Parameters ---------- img1 : torch.Tensor or str The first image to predict flow from img2 : torch.Tensor or str The second image to predict flow to Returns ------- torch.Tensor The predicted flow """ if type(img1) == str: img1 = io.read_image(img1).float() img1 = img1.unsqueeze(dim=0) if type(img2) == str: img2 = io.read_image(img2).float() img2 = img2.unsqueeze(dim=0) if self.data_transform: img1 = self.data_transform(img1) img2 = self.data_transform(img2) img1 = self.norm(img1) img2 = self.norm(img2) padder = InputPadder(img1.shape, divisor=self.pad_divisor) img1, img2 = padder.pad(img1, img2) output = self.model(img1, img2) flow_pred = padder.unpad(output["flow_upsampled"]) flow_pred = flow_pred * self.flow_scale return flow_pred