sifid.py

import os

import numpy as np
import scipy
import torch
from torchvision.models import inception_v3

from utils import load_image


def _load_inception_block1(device):
    """ Load and return the inception network's first block (64 filters). """
    inception = inception_v3(pretrained=True).to(device)  # need pre-trained weights for FID
    # only get the first layers that return 64 features,
    # these can easily be found by printing inception
    inception_block1 = torch.nn.Sequential(
        inception.Conv2d_1a_3x3,
        inception.Conv2d_2a_3x3,
        inception.Conv2d_2b_3x3
    )
    # inference mode
    inception.requires_grad_(False)
    inception.eval()
    return inception_block1


def _get_mu_sigma(features):
    """
    Computes mu and sigma of the gaussian distribution fit to image features

    Args:
        features: numpy array of image features, like output of _get_activation_features()

    Returns:
        mu: numpy array, mean of the gaussian distribution
        sigma: numpy array, std of the gaussian distribution ((64x64) if the input features is
               output of _get_activation_features())
    """
    mu = np.mean(features, axis=0)
    sigma = np.cov(features, rowvar=False)

    return mu, sigma


def calculate_fid(mu1, sigma1, mu2, sigma2):
    """
    Calculate Frechet Inception Distance given mu's and sigma's of 2 gaussian distributions
    FID = ||mu1 - mu2||^2 + Tr(sigma1 + sigma2 - 2(sigma1 * sigma2)^(1/2))
    Args:
        mu1: numpy array, mean of the first gaussian
        sigma1: numpy array, std of the first gaussian
        mu1: numpy array, mean of the second gaussian
        sigma1: numpy array, std of the second gaussian
    :Returns:
        fid: float, Frechet Inception Distance of the two gaussian distributions
    """
    mu_diff_squared = np.dot(mu1 - mu2, mu1 - mu2)
    sigma_mult_sqrt = scipy.linalg.sqrtm(np.dot(sigma1, sigma2))
    return mu_diff_squared + np.trace(sigma1 + sigma2 - 2 * sigma_mult_sqrt)


class SIFIDCalculator:
    """ A class that wraps an inception model and can then be used to calculate SIFID """
    def __init__(self, device='cpu'):
        self.inception = _load_inception_block1(device)
        self.device = device

    def _get_activation_features(self, image):
        """
        Returns features of the image generated by pre-trained inceptionV3 modeule.
        Features are taken after the first pooling layer, as in SinGAN paper.

        Args:
            image: float image tensor with squeezed dimensions (C, H, W)

        Returns:
            features (n, 64) numpy array
        """

        # properly shape the image
        image = image.squeeze()  # collapse extra dims to 3D
        image = image.unsqueeze(dim=0)  # expand to for 4D

        # get the (1, 64, H', W') features and transform to (H'*W', 64)
        features = self.inception(image).detach().cpu().numpy()
        features = features.transpose(0, 2, 3, 1).reshape(features.shape[2] * features.shape[3], -1)
        return features

    def calculate_sifid(self, img1, img2):
        """
        Calculates Single Image Frechet Inception Distances (SIFID) of the images as explained in the SinGAN paper.
        Features of the images are taken just before the second pooling layer of the InceptionV3 module.
        Pixel values of the input images are mapped to (0, 1). Since SIFID is symmetric, order of the input images does
        not matter.
        Args:
            img1, img2: torch tensor that squeezes to (C, H, W). Pixel values must be in (-1, 1),
                these values are later mapped to (0, 1).
        Returns:
            sifid: float, SIFID of the two input images
        """

        img1_features = self._get_activation_features(img1)
        img2_features = self._get_activation_features(img2)
        mu1, sigma1 = _get_mu_sigma(img1_features)
        mu2, sigma2 = _get_mu_sigma(img2_features)
        return calculate_fid(mu1, sigma1, mu2, sigma2)

    def calculate_average_sifid(self, fixed_sggen, original_image, num_samples=50):
        """ Calculate the average SIFID between samples from a FixedSGGenView model and the original image """
        sifids = np.zeros(num_samples)
        for i in range(num_samples):
            sample = fixed_sggen()  # generate one by one, too much memory use otherwise
            sifids[i] = self.calculate_sifid(original_image, sample)
        return sifids.mean()

    def calculate_average_sifid_folders(self, folder1, folder2):
        sifids = []
        for img1_entry in os.scandir(folder1):
            img2_path = os.path.join(folder2, img1_entry.name)
            img1 = load_image(img1_entry.path, device=self.device)
            img2 = load_image(img2_path, device=self.device)
            sifid = self.calculate_sifid(img1, img2)
            sifids.append(sifid)
        return np.array(sifids).mean()