diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..48d7e1c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,57 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# ignore saved weights files
+weights/
diff --git a/model/PieAPPv0pt1_PT.py b/model/PieAPPv0pt1_PT.py
index 5f7b095..c4d736d 100644
--- a/model/PieAPPv0pt1_PT.py
+++ b/model/PieAPPv0pt1_PT.py
@@ -8,64 +8,65 @@
 import skimage.io as io
 
 class PieAPP(nn.Module): # How to ensure that everything goes on a GPU? do I need to fetch?
-	def __init__(self,batch_size,num_patches):
-		super(PieAPP, self).__init__()
-		self.conv1 = nn.Conv2d(3,64,3,padding=1)
-		self.conv2 = nn.Conv2d(64,64,3,padding=1)
-		self.pool2 = nn.MaxPool2d(2,2)
-		self.conv3 = nn.Conv2d(64,64,3,padding=1)
-		self.conv4 = nn.Conv2d(64,128,3,padding=1)
-		self.pool4 = nn.MaxPool2d(2,2)
-		self.conv5 = nn.Conv2d(128,128,3,padding=1)
-		self.conv6 = nn.Conv2d(128,128,3,padding=1)
-		self.pool6 = nn.MaxPool2d(2,2)
-		self.conv7 = nn.Conv2d(128,256,3,padding=1)
-		self.conv8 = nn.Conv2d(256,256,3,padding=1)
-		self.pool8 = nn.MaxPool2d(2,2)
-		self.conv9 = nn.Conv2d(256,256,3,padding=1)
-		self.conv10 = nn.Conv2d(256,512,3,padding=1)
-		self.pool10 = nn.MaxPool2d(2,2)
-		self.conv11 = nn.Conv2d(512,512,3,padding=1)
-		self.fc1_score = nn.Linear(120832, 512)
-		self.fc2_score = nn.Linear(512,1)
-		self.fc1_weight = nn.Linear(2048,512)
-		self.fc2_weight = nn.Linear(512,1)
-		self.ref_score_subtract = nn.Linear(1,1)
-		self.batch_size = batch_size
-		self.num_patches = num_patches	
+    def __init__(self,batch_size,num_patches):
+        super(PieAPP, self).__init__()
+        self.conv1 = nn.Conv2d(3,64,3,padding=1)
+        self.conv2 = nn.Conv2d(64,64,3,padding=1)
+        self.pool2 = nn.MaxPool2d(2,2)
+        self.conv3 = nn.Conv2d(64,64,3,padding=1)
+        self.conv4 = nn.Conv2d(64,128,3,padding=1)
+        self.pool4 = nn.MaxPool2d(2,2)
+        self.conv5 = nn.Conv2d(128,128,3,padding=1)
+        self.conv6 = nn.Conv2d(128,128,3,padding=1)
+        self.pool6 = nn.MaxPool2d(2,2)
+        self.conv7 = nn.Conv2d(128,256,3,padding=1)
+        self.conv8 = nn.Conv2d(256,256,3,padding=1)
+        self.pool8 = nn.MaxPool2d(2,2)
+        self.conv9 = nn.Conv2d(256,256,3,padding=1)
+        self.conv10 = nn.Conv2d(256,512,3,padding=1)
+        self.pool10 = nn.MaxPool2d(2,2)
+        self.conv11 = nn.Conv2d(512,512,3,padding=1)
+        self.fc1_score = nn.Linear(120832, 512)
+        self.fc2_score = nn.Linear(512,1)
+        self.fc1_weight = nn.Linear(2048,512)
+        self.fc2_weight = nn.Linear(512,1)
+        self.ref_score_subtract = nn.Linear(1,1)
+        self.batch_size = int(batch_size)
+        self.num_patches = int(num_patches)
 
-	def flatten(self,matrix): # takes NxCxHxW input and outputs NxHWC
-		return matrix.view((self.batch_size*self.num_patches,-1))
-	
-	def compute_features(self,input):
-		#conv1 -> relu -> conv2 -> relu -> pool2 -> conv3 -> relu
-		x3 = F.relu(self.conv3(self.pool2(F.relu(self.conv2(F.relu(self.conv1(input))))))) 
-		# conv4 -> relu -> pool4 -> conv5 -> relu		
-		x5 = F.relu(self.conv5(self.pool4(F.relu(self.conv4(x3))))) 
-		# conv6 -> relu -> pool6 -> conv7 -> relu		
-		x7 = F.relu(self.conv7(self.pool6(F.relu(self.conv6(x5))))) 
-		# conv8 -> relu -> pool8 -> conv9 -> relu		
-		x9 = F.relu(self.conv9(self.pool8(F.relu(self.conv8(x7))))) 
-		# conv10 -> relu -> pool10 -> conv11 -> relU
-		x11 = self.flatten(F.relu(self.conv11(self.pool10(F.relu(self.conv10(x9)))))) 
-		# flatten and concatenate
-		feature_ms = torch.cat((self.flatten(x3),self.flatten(x5),self.flatten(x7),self.flatten(x9),x11),1) 
-		return feature_ms, x11
-	
-	def compute_score(self,image_A_patches, image_ref_patches):
-		A_multi_scale, A_coarse = self.compute_features(image_A_patches)
-		ref_multi_scale, ref_coarse = self.compute_features(image_ref_patches)
-		diff_ms = ref_multi_scale - A_multi_scale
-		diff_coarse = ref_coarse - A_coarse		
-		# per patch score: fc1_score -> relu -> fc2_score
-		per_patch_score = self.ref_score_subtract(0.01*self.fc2_score(F.relu(self.fc1_score(diff_ms))))
-		per_patch_score.view((-1,self.num_patches))
-		# per patch weight: fc1_weight -> relu -> fc2_weight
-		const = Variable(torch.from_numpy(0.000001*np.ones((1,))).float(), requires_grad=False) 
-		const_cuda = const.cuda()		
-		per_patch_weight = self.fc2_weight(F.relu(self.fc1_weight(diff_coarse)))+const_cuda
-		per_patch_weight.view((-1,self.num_patches))
-		product_val = torch.mul(per_patch_weight,per_patch_score)
-		dot_product_val = torch.sum(product_val)
-		norm_factor = torch.sum(per_patch_weight)
-		return torch.div(dot_product_val, norm_factor), per_patch_score, per_patch_weight
+    def flatten(self,matrix): # takes NxCxHxW input and outputs NxHWC
+        return matrix.view((int(self.batch_size*self.num_patches),-1))
+    
+    def compute_features(self,input):
+        #conv1 -> relu -> conv2 -> relu -> pool2 -> conv3 -> relu
+        x3 = F.relu(self.conv3(self.pool2(F.relu(self.conv2(F.relu(self.conv1(input))))))) 
+        # conv4 -> relu -> pool4 -> conv5 -> relu       
+        x5 = F.relu(self.conv5(self.pool4(F.relu(self.conv4(x3))))) 
+        # conv6 -> relu -> pool6 -> conv7 -> relu       
+        x7 = F.relu(self.conv7(self.pool6(F.relu(self.conv6(x5))))) 
+        # conv8 -> relu -> pool8 -> conv9 -> relu       
+        x9 = F.relu(self.conv9(self.pool8(F.relu(self.conv8(x7))))) 
+        # conv10 -> relu -> pool10 -> conv11 -> relU
+        x11 = self.flatten(F.relu(self.conv11(self.pool10(F.relu(self.conv10(x9)))))) 
+        # flatten and concatenate
+        feature_ms = torch.cat((self.flatten(x3),self.flatten(x5),self.flatten(x7),self.flatten(x9),x11),1) 
+        return feature_ms, x11
+    
+    def compute_score(self,image_A_patches, image_ref_patches):
+        A_multi_scale, A_coarse = self.compute_features(image_A_patches)
+        ref_multi_scale, ref_coarse = self.compute_features(image_ref_patches)
+        diff_ms = ref_multi_scale - A_multi_scale
+        diff_coarse = ref_coarse - A_coarse     
+        # per patch score: fc1_score -> relu -> fc2_score
+        per_patch_score = self.ref_score_subtract(0.01*self.fc2_score(F.relu(self.fc1_score(diff_ms))))
+        per_patch_score.view((-1,int(self.num_patches)))
+        # per patch weight: fc1_weight -> relu -> fc2_weight
+        const = Variable(torch.from_numpy(0.000001*np.ones((1,))).float(), requires_grad=False) 
+        if next(self.fc1_weight.parameters()).is_cuda:
+            const = const.cuda()       
+        per_patch_weight = self.fc2_weight(F.relu(self.fc1_weight(diff_coarse)))+const
+        per_patch_weight.view((-1,int(self.num_patches)))
+        product_val = torch.mul(per_patch_weight,per_patch_score)
+        dot_product_val = torch.sum(product_val)
+        norm_factor = torch.sum(per_patch_weight)
+        return torch.div(dot_product_val, norm_factor), per_patch_score, per_patch_weight
diff --git a/test_PieAPP_PT.py b/test_PieAPP_PT.py
index 00cc917..8d670a1 100644
--- a/test_PieAPP_PT.py
+++ b/test_PieAPP_PT.py
@@ -1,3 +1,5 @@
+from __future__ import print_function
+
 import numpy as np
 import cv2
 import sys
@@ -10,13 +12,14 @@
 import argparse
 import os
 
+
 ######## check for model and download if not present
 if not os.path.isfile('weights/PieAPPv0.1.pth'):
-	print "downloading dataset"
-	os.system("bash scripts/download_PieAPPv0.1_PT_weights.sh")
-	if not os.path.isfile('weights/PieAPPv0.1.pth'):
-		print "PieAPPv0.1.pth not downloaded"
-		sys.exit()
+    print("downloading dataset")
+    os.system("bash scripts/download_PieAPPv0.1_PT_weights.sh")
+    if not os.path.isfile('weights/PieAPPv0.1.pth'):
+        print("PieAPPv0.1.pth not downloaded")
+        sys.exit()
 
 ######## variables
 patch_size = 64
@@ -38,9 +41,9 @@
 _,rows,cols,ch = imagesRef.shape
 
 if args.sampling_mode == 'sparse':
-	stride_val = 27
+    stride_val = 27
 else:
-	stride_val = 6
+    stride_val = 6
 
 try:
     gpu_num = float(args.gpu_id)
@@ -58,46 +61,48 @@
 
 ######## initialize the model
 PieAPP_net = PieAPP(batch_size,num_patches_per_dim)
-PieAPP_net.load_state_dict(torch.load('weights/PieAPPv0.1.pth'))
+state_dict = torch.load('weights/PieAPPv0.1.pth')
+state_dict['ref_score_subtract.weight'] = state_dict['ref_score_subtract.weight'].unsqueeze(1)
+PieAPP_net.load_state_dict(state_dict)
 
 if use_gpu == 1:
-	PieAPP_net.cuda()
+    PieAPP_net.cuda()
 
 score_accum = 0.0
 weight_accum = 0.0
 
 # iterate through smaller size sub-images (to prevent memory overload)
-for x_iter in range(0,num_x/num_patches_per_dim+1):	
-	for y_iter in range(0,num_y/num_patches_per_dim+1):
-		# compute the size of the subimage
-		if (num_patches_per_dim*(x_iter + 1) > num_x):				
-			size_slice_cols = cols - x_loc[num_patches_per_dim*x_iter]
-		else:
-			size_slice_cols = x_loc[num_patches_per_dim*(x_iter + 1)] - x_loc[num_patches_per_dim*x_iter] + patch_size - stride_val			
-		if (num_patches_per_dim*(y_iter + 1) > num_y):
-			size_slice_rows = cols - y_loc[num_patches_per_dim*y_iter]
-		else:
-			size_slice_rows = y_loc[num_patches_per_dim*(y_iter + 1)] - y_loc[num_patches_per_dim*y_iter] + patch_size - stride_val
-		# obtain the subimage and samples patches
-		A_sub_im = imagesA[:, y_loc[num_patches_per_dim*y_iter]:y_loc[num_patches_per_dim*y_iter]+size_slice_rows, x_loc[num_patches_per_dim*x_iter]:x_loc[num_patches_per_dim*x_iter]+size_slice_cols,:]
-		ref_sub_im = imagesRef[:, y_loc[num_patches_per_dim*y_iter]:y_loc[num_patches_per_dim*y_iter]+size_slice_rows, x_loc[num_patches_per_dim*x_iter]:x_loc[num_patches_per_dim*x_iter]+size_slice_cols,:]
-		A_patches, ref_patches = sample_patches(A_sub_im, ref_sub_im, patch_size=64, strideval=stride_val, random_selection=False, uniform_grid_mode = 'strided')
-		num_patches_curr = A_patches.shape[0]/batch_size
-		
-		PieAPP_net.num_patches = num_patches_curr
-		
-		# initialize variable to be  fed to PieAPP_net
-		A_patches_var = Variable(torch.from_numpy(np.transpose(A_patches,(0,3,1,2))), requires_grad=False)
-		ref_patches_var = Variable(torch.from_numpy(np.transpose(ref_patches,(0,3,1,2))), requires_grad=False)
-		if use_gpu == 1:
-			A_patches_var = A_patches_var.cuda()
-			ref_patches_var = ref_patches_var.cuda()
-
-		# forward pass 
-		_, PieAPP_patchwise_errors, PieAPP_patchwise_weights = PieAPP_net.compute_score(A_patches_var.float(), ref_patches_var.float())
-		curr_err = PieAPP_patchwise_errors.cpu().data.numpy()	
-		curr_weights = 	PieAPP_patchwise_weights.cpu().data.numpy()		
-		score_accum += np.sum(np.multiply(curr_err, curr_weights))
-		weight_accum += np.sum(curr_weights)
-
-print 'PieAPP value of '+args.A_path+ ' with respect to: '+str(score_accum/weight_accum)
\ No newline at end of file
+for x_iter in range(0,num_x//num_patches_per_dim+1): 
+    for y_iter in range(0,num_y//num_patches_per_dim+1):
+        # compute the size of the subimage
+        if (num_patches_per_dim*(x_iter + 1) > num_x):              
+            size_slice_cols = cols - x_loc[num_patches_per_dim*x_iter]
+        else:
+            size_slice_cols = x_loc[num_patches_per_dim*(x_iter + 1)] - x_loc[num_patches_per_dim*x_iter] + patch_size - stride_val         
+        if (num_patches_per_dim*(y_iter + 1) > num_y):
+            size_slice_rows = cols - y_loc[num_patches_per_dim*y_iter]
+        else:
+            size_slice_rows = y_loc[num_patches_per_dim*(y_iter + 1)] - y_loc[num_patches_per_dim*y_iter] + patch_size - stride_val
+        # obtain the subimage and samples patches
+        A_sub_im = imagesA[:, y_loc[num_patches_per_dim*y_iter]:y_loc[num_patches_per_dim*y_iter]+size_slice_rows, x_loc[num_patches_per_dim*x_iter]:x_loc[num_patches_per_dim*x_iter]+size_slice_cols,:]
+        ref_sub_im = imagesRef[:, y_loc[num_patches_per_dim*y_iter]:y_loc[num_patches_per_dim*y_iter]+size_slice_rows, x_loc[num_patches_per_dim*x_iter]:x_loc[num_patches_per_dim*x_iter]+size_slice_cols,:]
+        A_patches, ref_patches = sample_patches(A_sub_im, ref_sub_im, patch_size=64, strideval=stride_val, random_selection=False, uniform_grid_mode = 'strided')
+        num_patches_curr = A_patches.shape[0]/batch_size
+        
+        PieAPP_net.num_patches = num_patches_curr
+        
+        # initialize variable to be  fed to PieAPP_net
+        A_patches_var = Variable(torch.from_numpy(np.transpose(A_patches,(0,3,1,2))), requires_grad=False)
+        ref_patches_var = Variable(torch.from_numpy(np.transpose(ref_patches,(0,3,1,2))), requires_grad=False)
+        if use_gpu == 1:
+            A_patches_var = A_patches_var.cuda()
+            ref_patches_var = ref_patches_var.cuda()
+
+        # forward pass 
+        _, PieAPP_patchwise_errors, PieAPP_patchwise_weights = PieAPP_net.compute_score(A_patches_var.float(), ref_patches_var.float())
+        curr_err = PieAPP_patchwise_errors.cpu().data.numpy()   
+        curr_weights =  PieAPP_patchwise_weights.cpu().data.numpy()     
+        score_accum += np.sum(np.multiply(curr_err, curr_weights))
+        weight_accum += np.sum(curr_weights)
+
+print('PieAPP value of '+args.A_path+ ' with respect to: '+str(score_accum/weight_accum))