Add baseline network

mohitzsh · Nov 11, 2017 · 044a9fc · 044a9fc
1 parent 62dd972
commit 044a9fc
Show file tree

Hide file tree

Showing 10 changed files with 12,478 additions and 0 deletions.
diff --git a/datasets/__init__.py b/datasets/__init__.py
diff --git a/datasets/lists/train.txt b/datasets/lists/train.txt
diff --git a/datasets/lists/val.txt b/datasets/lists/val.txt
diff --git a/datasets/pascalvoc.py b/datasets/pascalvoc.py
@@ -0,0 +1,64 @@
+import numpy as np
+import os
+from PIL import Image
+import torch
+from torch.utils.data import Dataset
+
+def load_image(file):
+    return Image.open(file)
+
+def image_path(root, basename, extension):
+    return os.path.join(root,basename+extension)
+
+def image_basename(filename):
+    return os.path.basename(os.path.splitext(filename)[0])
+
+def read_img_list(filename):
+    with open(filename) as f:
+        img_list = []
+        for line in f:
+            img_list.append(line[:-1])
+    return img_list[:50]
+
+def extract_class_mask(label,c):
+# Get one-hot encoding
+    if c == 0:
+        encoded_label = Image.eval(label,lambda p: 0 if (p != 0 or p != 255) else 1)
+    else:
+        encoded_label = Image.eval(label,lambda p: 0 if p != c else 1)
+
+    return encoded_label
+
+class PascalVOC(Dataset):
+
+    TRAIN_LIST = "lists/train.txt"
+    VAL_LIST = "lists/val.txt"
+
+    def __init__(self, root, data_root, transform = None, co_transform=None, train_phase=True,numClasses=20):
+        self.root = root
+        self.data_root = data_root
+        self.images_root = os.path.join(self.data_root, 'img')
+        self.labels_root = os.path.join(self.data_root, 'cls')
+        self.img_list = read_img_list(os.path.join(self.root,'datasets',self.TRAIN_LIST)) if train_phase else read_img_list(os.path.join(self.root,'datasets',self.VAL_LIST))
+
+        self.transform = transform
+        self.co_transform = co_transform
+
+    def __getitem__(self, index):
+        filename = self.img_list[index]
+
+        with open(os.path.join(self.images_root,filename+'.jpg'), 'rb') as f:
+            image = load_image(f).convert('RGB')
+        with open(os.path.join(self.labels_root,filename+'.png'), 'rb') as f:
+            label = load_image(f).convert('P')
+
+        # Apply Random Crop and resize to both label and image
+        image, label = self.co_transform((image,label))
+        # TODO: Add this as a transform for the label
+        label = Image.eval(label,lambda p: 0 if p == 255 else p)
+        image = self.transform(image)
+        label = torch.from_numpy(np.array(label.getdata()).reshape(label.size))
+        return image, label
+
+    def __len__(self):
+        return len(self.img_list)
diff --git a/generators/__init__.py b/generators/__init__.py
diff --git a/generators/deeplabv2.py b/generators/deeplabv2.py
@@ -0,0 +1,210 @@
+import torch.nn as nn
+import math
+import torch
+import numpy as np
+affine_par = True
+
+
+def outS(i):
+    i = int(i)
+    i = (i+1)/2
+    i = int(np.ceil((i+1)/2.0))
+    i = (i+1)/2
+    return i
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, affine = affine_par)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, affine = affine_par)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1,  dilation_ = 1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
+        self.bn1 = nn.BatchNorm2d(planes,affine = affine_par)
+        for i in self.bn1.parameters():
+            i.requires_grad = False
+        padding = 1
+        if dilation_ == 2:
+            padding = 2
+        elif dilation_ == 4:
+            padding = 4
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
+                               padding=padding, bias=False, dilation = dilation_)
+        self.bn2 = nn.BatchNorm2d(planes,affine = affine_par)
+        for i in self.bn2.parameters():
+            i.requires_grad = False
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4, affine = affine_par)
+        for i in self.bn3.parameters():
+            i.requires_grad = False
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+class Classifier_Module(nn.Module):
+
+    def __init__(self,dilation_series,padding_series,NoLabels):
+        super(Classifier_Module, self).__init__()
+        self.conv2d_list = nn.ModuleList()
+        for dilation,padding in zip(dilation_series,padding_series):
+            self.conv2d_list.append(nn.Conv2d(2048,NoLabels,kernel_size=3,stride=1, padding =padding, dilation = dilation,bias = True))
+
+        for m in self.conv2d_list:
+            m.weight.data.normal_(0, 0.01)
+
+
+    def forward(self, x):
+        out = self.conv2d_list[0](x)
+        for i in range(len(self.conv2d_list)-1):
+            out += self.conv2d_list[i+1](x)
+        return out
+
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, layers,NoLabels):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64,affine = affine_par)
+        for i in self.bn1.parameters():
+            i.requires_grad = False
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True) # change
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation__ = 2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation__ = 4)
+        self.layer5 = self._make_pred_layer(Classifier_Module, [6,12,18,24],[6,12,18,24],NoLabels)
+        self.up = nn.UpsamplingBilinear2d(scale_factor=8)
+        self.down = nn.Conv2d(NoLabels,NoLabels,kernel_size=8)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, 0.01)
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+        #        for i in m.parameters():
+        #            i.requires_grad = False
+
+    def _make_layer(self, block, planes, blocks, stride=1,dilation__ = 1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion or dilation__ == 2 or dilation__ == 4:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion,affine = affine_par),
+            )
+        for i in downsample._modules['1'].parameters():
+            i.requires_grad = False
+        layers = []
+        layers.append(block(self.inplanes, planes, stride,dilation_=dilation__, downsample = downsample ))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes,dilation_=dilation__))
+
+        return nn.Sequential(*layers)
+    def _make_pred_layer(self,block, dilation_series, padding_series,NoLabels):
+        return block(dilation_series,padding_series,NoLabels)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.layer5(x)
+        x = self.up(x)
+        x = self.down(x)
+        return x
+
+class MS_Deeplab(nn.Module):
+    def __init__(self,block,NoLabels):
+        super(MS_Deeplab,self).__init__()
+        self.Scale = ResNet(block,[3, 4, 23, 3],NoLabels)   #changed to fix #4
+
+    def forward(self,x):
+        input_size = x.size()[2]
+        # self.interp1 = nn.UpsamplingBilinear2d(size = (  int(input_size*0.75)+1,  int(input_size*0.75)+1  ))
+        # self.interp2 = nn.UpsamplingBilinear2d(size = (  int(input_size*0.5)+1,   int(input_size*0.5)+1   ))
+        # self.interp3 = nn.UpsamplingBilinear2d(size = (  outS(input_size),   outS(input_size)   ))
+        out = self.Scale(x)	# for original scale
+        # out.append(self.interp3(self.Scale(x2)))	# for 0.75x scale
+        # out.append(self.Scale(x3))	# for 0.5x scale
+        #
+        #
+        # x2Out_interp = out[1]
+        # x3Out_interp = self.interp3(out[2])
+        # temp1 = torch.max(out[0],x2Out_interp)
+        # out.append(torch.max(temp1,x3Out_interp))
+
+        # out = self.Scale(x)
+        return out
+
+def Res_Deeplab(NoLabels=21):
+    model = MS_Deeplab(Bottleneck,NoLabels)
+    return model
diff --git a/train.py b/train.py
@@ -0,0 +1,59 @@
+from __future__ import unicode_literals
+
+import torch
+from datasets.pascalvoc import PascalVOC
+import generators.deeplabv2 as deeplabv2
+import discriminators.discriminator as dis
+from torchvision import transforms
+from torch.autograd import Variable
+from torch.utils.data import DataLoader
+from utils.transforms import RandomSizedCrop
+import torch.nn.functional as F
+import torch.nn as nn
+from functools import reduce
+import torch.optim as optim
+import os
+
+MAX_ITR = 1000
+
+def main():
+    home_dir = os.path.join(os.environ["HOME"],"adversarial_segmentation")
+    pascal_base_dir = os.path.join(os.environ["RCAC_SCRATCH"],"PASCALVOC")
+    transform = transforms.Compose([transforms.ToTensor()])
+    co_transform = transforms.Compose([RandomSizedCrop((321,321))])
+    trainset = PascalVOC(home_dir,pascal_base_dir,transform=transform, co_transform=co_transform)
+    print("Trainset created.")
+    trainloader = DataLoader(trainset,batch_size=10,shuffle=True,num_workers=2)
+    print('TrainLoader created')
+
+    generator = deeplabv2.Res_Deeplab().cuda()
+
+    saved_net = torch.load(os.path.join(home_dir,'data','MS_DeepLab_resnet_pretrained_COCO_init.pth'))
+    new_state = generator.state_dict()
+    new_state.update(saved_net)
+    generator.load_state_dict(new_state)
+
+    print('Generator Net created')
+
+    # Setup the optimizer
+    optimizer = optim.SGD(filter(lambda p: p.requires_grad, generator.parameters()),lr=0.00025,momentum=0.9,weight_decay=0.0001,nesterov=True)
+    optimizer.zero_grad()
+
+    print('Training Going to Start')
+    for iteration in range(0,MAX_ITR):
+
+        for batch_id, (img,mask) in enumerate(trainloader):
+            optimizer.zero_grad()
+            img,mask = Variable(img.cuda()),Variable(mask.cuda())
+            out_img_map = generator(img)
+            out_img_map = nn.LogSoftmax()(out_img_map)
+            L_ce = nn.NLLLoss2d()
+            loss = L_ce(out_img_map,mask.long())
+            loss.backward()
+            optimizer.step()
+        print("Iteration: ", iteration, "Loss: ", loss.data)
+        # if iteration % SNAPSHOT_ITER == 0:
+            # Take a snapshot of the network
+
+if __name__ == '__main__':
+    main()
diff --git a/utils/__init__.py b/utils/__init__.py
diff --git a/utils/mattopng.py b/utils/mattopng.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# Martin Kersner, [email protected]
+# 2016/03/17
+
+from __future__ import print_function
+import os
+import sys
+import glob
+from PIL import Image as PILImage
+
+from utils import mat2png_hariharan
+
+def main():
+  input_path, output_path = process_arguments(sys.argv)
+
+  if os.path.isdir(input_path) and os.path.isdir(output_path):
+    mat_files = glob.glob(os.path.join(input_path, '*.mat'))
+    convert_mat2png(mat_files, output_path)
+  else:
+    help('Input or output path does not exist!\n')
+
+def process_arguments(argv):
+  num_args = len(argv)
+
+  input_path  = None
+  output_path = None
+
+  if num_args == 3:
+    input_path  = argv[1]
+    output_path = argv[2]
+  else:
+    help()
+
+  return input_path, output_path
+
+def convert_mat2png(mat_files, output_path):
+  if not mat_files:
+    help('Input directory does not contain any Matlab files!\n')
+
+  for mat in mat_files:
+    numpy_img = mat2png_hariharan(mat)
+    pil_img = PILImage.fromarray(numpy_img)
+    pil_img.save(os.path.join(output_path, modify_image_name(mat, 'png')))
+
+# Extract name of image from given path, replace its extension with specified one
+# and return new name only, not path.
+def modify_image_name(path, ext):
+  return os.path.basename(path).split('.')[0] + '.' + ext
+
+def help(msg=''):
+  print(msg +
+        'Usage: python mat2png.py INPUT_PATH OUTPUT_PATH\n'
+        'INPUT_PATH denotes path containing Matlab files for conversion.\n'
+        'OUTPUT_PATH denotes path where converted Png files ar going to be saved.'
+        , file=sys.stderr)
+
+  exit()
+
+if __name__ == '__main__':
+  main()