Source code for ezflow.encoder.conv_encoder

import torch
import torch.nn as nn

from ..config import configurable
from ..modules import conv
from .build import ENCODER_REGISTRY


[docs]@ENCODER_REGISTRY.register()
class BasicConvEncoder(nn.Module):
    """
    A Basic Convolution Encoder with a fixed size kernel = 3, padding=1 and dilation = 1.
    Every alternate layer has stride = 1 followed by stride = 2.

    Parameters
    ----------
    in_channels : int
        Number of input channels
    config : list of int
        Configuration for the layers in the encoder
    norm : str
        Type of normalization to use. Can be None, 'batch', 'group', 'instance'
    """

    @configurable
    def __init__(
        self,
        in_channels=3,
        config=[64, 128, 256, 512],
        norm=None,
    ):
        super(BasicConvEncoder, self).__init__()

        if isinstance(config, tuple):
            config = list(config)

        channels = [in_channels] + config

        self.encoder = nn.ModuleList()

        for i in range(len(channels) - 1):

            stride = 1 if i % 2 == 0 else 2
            kernel_size = 3

            self.encoder.append(
                conv(
                    channels[i],
                    channels[i + 1],
                    kernel_size=3,
                    stride=stride,
                    padding=(kernel_size - 1) // 2,
                    norm=norm,
                )
            )

    @classmethod
    def from_config(self, cfg):
        return {
            "in_channels": cfg.IN_CHANNELS,
            "config": cfg.CONFIG,
            "norm": cfg.NORM,
        }

[docs]    def forward(self, x):
        """
        Performs forward pass.

        Parameters
        ----------
        x : torch.Tensor
            Input tensor

        Returns
        -------
        List[torch.Tensor],
            List of all the output convolutions from each encoder layer
        """

        outputs = []

        for i in range(len(self.encoder)):
            x = self.encoder[i](x)

            if len(outputs) > 0:
                prev_output = outputs[-1]
                if prev_output.shape[1:] == x.shape[1:]:
                    outputs[-1] = x
                else:
                    outputs.append(x)
            else:
                outputs.append(x)

        return outputs


[docs]@ENCODER_REGISTRY.register()
class FlowNetConvEncoder(BasicConvEncoder):
    """
    Convolutional encoder based on the FlowNet architecture
    Used in **FlowNet: Learning Optical Flow with Convolutional Networks** (https://arxiv.org/abs/1504.06852)

    Parameters
    ----------
    in_channels : int
        Number of input channels
    config : list of int
        Configuration for the layers in the encoder
    norm : str
        Type of normalization to use. Can be None, 'batch', 'group', 'instance'
    """

    @configurable
    def __init__(
        self,
        in_channels=3,
        config=[64, 128, 256, 512],
        norm=None,
    ):
        super(FlowNetConvEncoder, self).__init__()

        assert (
            len(config) >= 2
        ), "FlowNetConvEncoder expects at least 2 output channels in config."

        if isinstance(config, tuple):
            config = list(config)

        channels = [in_channels] + config

        self.encoder = nn.ModuleList()

        self.encoder.append(
            conv(
                channels[0], channels[1], kernel_size=7, stride=2, padding=(7 - 1) // 2
            )
        )
        self.encoder.append(
            conv(
                channels[1], channels[2], kernel_size=5, stride=2, padding=(5 - 1) // 2
            )
        )
        self.encoder.append(
            conv(
                channels[2], channels[3], kernel_size=5, stride=2, padding=(5 - 1) // 2
            )
        )

        channels = channels[3:]

        for i in range(len(channels) - 1):

            stride = 1 if i % 2 == 0 else 2
            kernel_size = 3

            self.encoder.append(
                conv(
                    channels[i],
                    channels[i + 1],
                    kernel_size=kernel_size,
                    stride=stride,
                    padding=(kernel_size - 1) // 2,
                    norm=norm,
                )
            )

    @classmethod
    def from_config(self, cfg):
        return {
            "in_channels": cfg.IN_CHANNELS,
            "config": cfg.CONFIG,
            "norm": cfg.NORM,
        }