klarEDA · Shankhanil · Mar 19, 2021 · Mar 19, 2021 · Mar 18, 2021 · Mar 20, 2021
diff --git a/docsource/image_visualize.rst b/docsource/image_visualize.rst
@@ -0,0 +1,6 @@
+Image Visualize
+=========================
+
+.. automodule:: klar_eda.visualize.image_visualize
+   :members:
+   :undoc-members:
diff --git a/docsource/index.rst b/docsource/index.rst
@@ -12,6 +12,7 @@ klar-eda's documentation!
 
    preprocess
    visualize
+   image_visualize
 
 
 Indices and tables

diff --git a/klar_eda/preprocess/__init__.py b/klar_eda/preprocess/__init__.py
@@ -2,5 +2,7 @@
 from . import csv_preprocess
 from . import image_preprocess
 from . import preprocess
+# To import morphological preprocessor
+from .image_preprocess import morphological  
 import pkg_resources
 pkg_resources.declare_namespace(__name__)
diff --git a/klar_eda/preprocess/csv_preprocess.py b/klar_eda/preprocess/csv_preprocess.py
@@ -72,8 +72,13 @@ def fill_numerical_na(self, ret = False):
                         self.df[col] = y
             except Exception as e:
                 pass
+<<<<<<< HEAD
             if ret == True:
                 return self.df
+=======
+        if ret == True:
+            return self.df
+>>>>>>> issue22
 
     def fill_categorical_na(self, ret = False):
         self.df = self.df.fillna("Unknown")
@@ -84,6 +89,25 @@ def normalize_numerical(self):
         for col in self.numerical_column_list:
             if col != self.target_column:
                 self.df[col]=(self.df[col]-self.df[col].min())/(self.df[col].max()-self.df[col].min())
+    def standardize(self):
+
+        ### Data use cases for Standardization: ###
+
+        # It makes the data with unit variance and zero mean. 
+        # This will be used when the features have different scales , for example if there are two features salary and age , Obviously age will be from 1-100 and salary can be substantially higher than age values. So if we fit the model directly the salary feature will have a larger impact on predicting the target variable. But it may not be the case.
+        # So It's necessary to standardise the data.  
+        # We should do standardization in case of algorithms where Gradient descent is used for optimizations, for achieving the minima faster. 
+        # Standardisation is also called z-score normalisation.
+
+        for i in df.columns:
+            self.df[i] = (self.df[i] - self.df[i].mean())/self.df[i].std() # Standardise the data z = (x - mean)/ (standard deviation)
+
+    def mean_normalization(self):
+        """ converts x to x' where,
+        x' = (x - mean(x))/(max(x) - min(x))
+        """
+        for col in df.columns:
+            self.df[i] = (self.df[i] - self.df[i].mean())/(self.df[i].max() - self.df[i].min())
 
     def encode_categorical(self):
         enc = OneHotEncoder(handle_unknown='ignore')
@@ -160,4 +184,4 @@ def convert_date_format(self, input_date, output_date_format = 'DD/MM/YYYY'):
 
         parsed_date = dateutil.parser.parse(input_date, dayfirst=True)
         self.converted_date = parsed_date.strftime(output_date_formats[output_date_format])
-        return self.converted_date
+        return self.converted_date
diff --git a/klar_eda/preprocess/image_preprocess.py b/klar_eda/preprocess/image_preprocess.py
@@ -96,94 +96,95 @@ def contrast_control(self, alpha = 1.25, beta = 0, save=True, show=False):
                 print('Error while changing contast for image ',image_index, e)
         self.cv2_image_list = contrast_image_list
 
-    def thresholding(self, technique = 'mean', threshold = cv2.THRESH_BINARY, save=True, show=False):
-        binarized_image_list = []
-        image_index = 0
-        #study the parameters
-        for image in self.cv2_image_list:
-            try:
-                if technique == 'simple':
-                    res , img = cv2.threshold(image, 120, 255, threshold)
-                    binarized_image_list.append(img)
-                    self.save_or_show_image(img,image_index,'threshold',save=save,show=show)
-                    image_index += 1
-                elif technique == 'mean':
-                    img = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, threshold, 199, 5)
-                    binarized_image_list.append(img)
-                    self.save_or_show_image(img,image_index,'threshold',save=save,show=show)
-                    image_index += 1
-                else:
-                    img = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, threshold, 199, 5)
-                    binarized_image_list.append(img)
-                    self.save_or_show_image(img,image_index,'threshold',save=save,show=show)
-                    image_index += 1
-            except Exception as e:
-                print('Error during binarization of image ', image_index, e)
-        self.cv2_image_list = binarized_image_list
+    # ***************************CODE SEGMENT MOVED TO ./morphological.py***************************
+    # def thresholding(self, technique = 'mean', threshold = cv2.THRESH_BINARY, save=True, show=False):
+    #     binarized_image_list = []
+    #     image_index = 0
+    #     #study the parameters
+    #     for image in self.cv2_image_list:
+    #         try:
+    #             if technique == 'simple':
+    #                 res , img = cv2.threshold(image, 120, 255, threshold)
+    #                 binarized_image_list.append(img)
+    #                 self.save_or_show_image(img,image_index,'threshold',save=save,show=show)
+    #                 image_index += 1
+    #             elif technique == 'mean':
+    #                 img = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, threshold, 199, 5)
+    #                 binarized_image_list.append(img)
+    #                 self.save_or_show_image(img,image_index,'threshold',save=save,show=show)
+    #                 image_index += 1
+    #             else:
+    #                 img = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, threshold, 199, 5)
+    #                 binarized_image_list.append(img)
+    #                 self.save_or_show_image(img,image_index,'threshold',save=save,show=show)
+    #                 image_index += 1
+    #         except Exception as e:
+    #             print('Error during binarization of image ', image_index, e)
+    #     self.cv2_image_list = binarized_image_list
 
-    def denoise(self, is_gray = True, save=True, show=False):
-        denoised_image_list = []
-        image_index = 0
-        for image in self.cv2_image_list:
-            try:
-                if not is_gray:
-                    img = cv2.fastNlMeansDenoisingColored(image,None,10,10,7,21)
-                else:
-                    img = cv2.fastNlMeansDenoising(image,None,3,7,21)
-                denoised_image_list.append(img)
-                self.save_or_show_image(img,image_index,'denoise',save=save,show=show)
-                image_index += 1
-            except Exception as e:
-                print('Error during denoising image ', image_index, e)
-        self.cv2_image_list = denoised_image_list
+    # def denoise(self, is_gray = True, save=True, show=False):
+    #     denoised_image_list = []
+    #     image_index = 0
+    #     for image in self.cv2_image_list:
+    #         try:
+    #             if not is_gray:
+    #                 img = cv2.fastNlMeansDenoisingColored(image,None,10,10,7,21)
+    #             else:
+    #                 img = cv2.fastNlMeansDenoising(image,None,3,7,21)
+    #             denoised_image_list.append(img)
+    #             self.save_or_show_image(img,image_index,'denoise',save=save,show=show)
+    #             image_index += 1
+    #         except Exception as e:
+    #             print('Error during denoising image ', image_index, e)
+    #     self.cv2_image_list = denoised_image_list
 
-    def erode(self, dim = None, save=True, show=False):
-        eroded_image_list = []
-        image_index = 0
-        if dim == None:
-            dim = (2,2)
-        for image in self.cv2_image_list:
-            try:
-                kernel = np.ones(dim,np.uint8)
-                img = cv2.erode(image,kernel,iterations = 1)
-                self.save_or_show_image(img,image_index,'erode',save=save,show=show)
-                image_index += 1
-                eroded_image_list.append(img)
-            except Exception as e:
-                print('Error during eroding image ', image_index, e)
-        self.cv2_image_list = eroded_image_list
+    # def erode(self, dim = None, save=True, show=False):
+    #     eroded_image_list = []
+    #     image_index = 0
+    #     if dim == None:
+    #         dim = (2,2)
+    #     for image in self.cv2_image_list:
+    #         try:
+    #             kernel = np.ones(dim,np.uint8)
+    #             img = cv2.erode(image,kernel,iterations = 1)
+    #             self.save_or_show_image(img,image_index,'erode',save=save,show=show)
+    #             image_index += 1
+    #             eroded_image_list.append(img)
+    #         except Exception as e:
+    #             print('Error during eroding image ', image_index, e)
+    #     self.cv2_image_list = eroded_image_list
 
-    def dilation(self, dim = None, save=True, show=False):
-        dilated_image_list = []
-        image_index = 0
-        if dim == None:
-            dim = (2,2)
-        for image in self.cv2_image_list:
-            try:
-                kernel = np.ones(dim,np.uint8)
-                img = cv2.dilate(image,kernel,iterations = 1)
-                self.save_or_show_image(img,image_index,'dilation',save=save,show=show)
-                image_index += 1
-                dilated_image_list.append(img)
-            except Exception as e:
-                print('Error while dilating image ', image_index, e)
-        self.cv2_image_list = dilated_image_list
+    # def dilation(self, dim = None, save=True, show=False):
+    #     dilated_image_list = []
+    #     image_index = 0
+    #     if dim == None:
+    #         dim = (2,2)
+    #     for image in self.cv2_image_list:
+    #         try:
+    #             kernel = np.ones(dim,np.uint8)
+    #             img = cv2.dilate(image,kernel,iterations = 1)
+    #             self.save_or_show_image(img,image_index,'dilation',save=save,show=show)
+    #             image_index += 1
+    #             dilated_image_list.append(img)
+    #         except Exception as e:
+    #             print('Error while dilating image ', image_index, e)
+    #     self.cv2_image_list = dilated_image_list
 
-    def normalize(self, dim = None, save=True, show=False):
-        normalized_image_list = []
-        image_index = 0
-        if dim == None:
-            dim = (512,512)
-        for image in self.cv2_image_list:
-            try:
-                kernel = np.zeros(dim)
-                img = cv2.normalize(image,kernel,0,255,cv2.NORM_MINMAX)
-                normalized_image_list.append(img)
-                self.save_or_show_image(img,image_index,'normalize',save=save,show=show)
-                image_index += 1
-            except Exception as e:
-                print('Error while normalizing image ', image_index, e)
-
+    # def normalize(self, dim = None, save=True, show=False):
+    #     normalized_image_list = []
+    #     image_index = 0
+    #     if dim == None:
+    #         dim = (512,512)
+    #     for image in self.cv2_image_list:
+    #         try:
+    #             kernel = np.zeros(dim)
+    #             img = cv2.normalize(image,kernel,0,255,cv2.NORM_MINMAX)
+    #             normalized_image_list.append(img)
+    #             self.save_or_show_image(img,image_index,'normalize',save=save,show=show)
+    #             image_index += 1
+    #         except Exception as e:
+    #             print('Error while normalizing image ', image_index, e)
+    # ******************************************************************************************
     def print_variables(self):
         for img in self.cv2_image_list:
             cv2.imshow('img',img)
@@ -193,35 +194,37 @@ def get_cascade(self, cascade_type='face'):
         #if cascade_type == 'face':
         return cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
 
-    def detect_face_and_crop(self, crop = False, save=True, show=False):
-        face_image_list = []
-        image_index = -1
-        face_cascade = self.get_cascade('face')
-        for image in self.cv2_image_list:
-            try:
-                image_index += 1
-                img = image.copy()
-                faces = face_cascade.detectMultiScale(img, 1.3, 5)
-                if faces is None:
-                    print('Unable to find face ')
-                    continue
-                for (x,y,w,h) in faces:
-                    padding = 10
-                    ih, iw = img.shape[:2]
-                    lx = max( 0, x - padding )
-                    ly = max( 0, x - padding )
-                    ux = min( iw, x + w + padding )
-                    uy = min( ih, y + h + padding )
-                    img = cv2.rectangle(img,(lx,ly),(ux,uy),(255,0,0),2)
-                    roi_color = img[y:y+h, x:x+w]
-                    if crop == True:
-                        self.save_or_show_image(roi_color, image_index, 'haarcascade_faces',save=save,show=show)
-                self.save_or_show_image(img, image_index, 'haarcascade',save=save,show=show)
-                face_image_list.append(img)
-            except Exception as e:
-                print('Error while detecing')
-        self.cv2_image_list = face_image_list
-
+    # ***************************CODE SEGMENT MOVED TO ./intelligent.py***************************
+    # def detect_face_and_crop(self, crop = False, save=True, show=False):
+    #     face_image_list = []
+    #     image_index = -1
+    #     face_cascade = self.get_cascade('face')
+    #     for image in self.cv2_image_list:
+    #         try:
+    #             image_index += 1
+    #             img = image.copy()
+    #             faces = face_cascade.detectMultiScale(img, 1.3, 5)
+    #             if faces is None:
+    #                 print('Unable to find face ')
+    #                 continue
+    #             for (x,y,w,h) in faces:
+    #                 padding = 10
+    #                 ih, iw = img.shape[:2]
+    #                 lx = max( 0, x - padding )
+    #                 ly = max( 0, x - padding )
+    #                 ux = min( iw, x + w + padding )
+    #                 uy = min( ih, y + h + padding )
+    #                 img = cv2.rectangle(img,(lx,ly),(ux,uy),(255,0,0),2)
+    #                 roi_color = img[y:y+h, x:x+w]
+    #                 if crop == True:
+    #                     self.save_or_show_image(roi_color, image_index, 'haarcascade_faces',save=save,show=show)
+    #             self.save_or_show_image(img, image_index, 'haarcascade',save=save,show=show)
+    #             face_image_list.append(img)
+    #         except Exception as e:
+    #             print('Error while detecing')
+    #     self.cv2_image_list = face_image_list
+    # ******************************************************************************************
+
     def adaptive_histogram_equalization(self, save=True, show=False):
         refined_image_list = []
         image_index = 0

diff --git a/klar_eda/preprocess/image_preprocess/__init__.py b/klar_eda/preprocess/image_preprocess/__init__.py
@@ -0,0 +1,4 @@
+from . import morphological
+from . import intelligent
+import pkg_resources
+pkg_resources.declare_namespace(__name__)
diff --git a/klar_eda/preprocess/image_preprocess/intelligent.py b/klar_eda/preprocess/image_preprocess/intelligent.py
@@ -0,0 +1,56 @@
+import os
+from os import makedirs
+from os.path import join, exists
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+from ..image_preprocess import ImagePreprocess
+
+class IntelligentImagePreprocess:
+    """
+    This class contains the functions:
+
+    """    
+    def __init__(self,input,labels = None):
+        self.suffixes = ('.jpeg', '.jpg', '.png')
+        # self.clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
+        self.labels = labels
+        if type(input)==str:
+            self.path = input
+            self.image_list = sorted([ file for file in os.listdir(input) if (file.endswith(self.suffixes))])
+            self.cv2_image_list = [ self.read_images(os.path.join(self.path,image_name)) for image_name in  self.image_list ]
+        else:
+            self.path = None
+            self.image_list = None
+            self.cv2_image_list = input
+
+    #  the functions
+    def detect_face_and_crop(self, crop = False, save=True, show=False):
+        face_image_list = []
+        image_index = -1
+        face_cascade = self.get_cascade('face')
+        for image in self.cv2_image_list:
+            try:
+                image_index += 1
+                img = image.copy()
+                faces = face_cascade.detectMultiScale(img, 1.3, 5)
+                if faces is None:
+                    print('Unable to find face ')
+                    continue
+                for (x,y,w,h) in faces:
+                    padding = 10
+                    ih, iw = img.shape[:2]
+                    lx = max( 0, x - padding )
+                    ly = max( 0, x - padding )
+                    ux = min( iw, x + w + padding )
+                    uy = min( ih, y + h + padding )
+                    img = cv2.rectangle(img,(lx,ly),(ux,uy),(255,0,0),2)
+                    roi_color = img[y:y+h, x:x+w]
+                    if crop == True:
+                        self.save_or_show_image(roi_color, image_index, 'haarcascade_faces',save=save,show=show)
+                self.save_or_show_image(img, image_index, 'haarcascade',save=save,show=show)
+                face_image_list.append(img)
+            except Exception as e:
+                print('Error while detecing')
+        self.cv2_image_list = face_image_list