Revert the changes with normalized scores, this was completely wrong

florianneukirchen · Mar 23, 2024 · 898fa9b · 898fa9b
1 parent 1e8b9ef
commit 898fa9b
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 98 deletions.
diff --git a/algs/scipy_pca_algorithm.py b/algs/scipy_pca_algorithm.py
@@ -60,7 +60,6 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm):
 
     OUTPUT = 'OUTPUT'
     INPUT = 'INPUT'
-    NORMALIZED = 'NORMALIZED'
     NCOMPONENTS = 'NCOMPONENTS'
     PERCENTVARIANCE = 'PERCENTVARIANCE'
     DTYPE = 'DTYPE'
@@ -81,8 +80,6 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm):
             <i>number of components</i> to keep or the <i>percentage of variance</i> \
             explained by the kept components can be set. 
 
-            <b>Normalize</b> Compute normalized version of PCA scores, by dividing \
-            by sqrt(n_samples - 1). Number of samples means number of pixels in this case. \
             
             <b>Number of components</b> is only used if the value is greater than 0 and \
             smaller than the count of original bands and if percentage of variance is \
@@ -93,7 +90,7 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm):
 
             <b>Output</b> The output raster contains \
             the data projected into the principal components \
-            (i.e. the normalized or unnormalized scores).
+            (i.e. the PCA scores).
 
             <b>Output data type</b> Float32 or Float64
 
@@ -112,24 +109,6 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm):
             <li>Band Mean</li>
             </ul>
 
-            <b>Note on Normalization</b> The normalized version of the PCA \
-            can be calculated in two ways: 
-            
-            <ul>
-            <li>By first dividing the data by \
-            sqrt(n_samples - 1); then performing SVD and calculating \
-            scores as dot productuct of normalized data and V (of the SVD, \
-            not transposed).</li>
-            <li>By performing SVD with unnormalized data, then calculating \
-            loadings by dividing the eigenvectors by sqrt(n_samples - 1) \
-            and calculating scores as dot product of unnormalized data \
-            and loadings.
-            </ul>
-
-            Unnmormalized scores are calculated as dot product of \
-            unnormalized data and the (unnormalized) eigenvectors \
-            of the SVD. (The result is the same as PCA using the \
-            python module sklearn without normalizing the data).              )
             """
 
     # Init Algorithm
@@ -147,12 +126,6 @@ def initAlgorithm(self, config):
             )
         )
 
-        self.addParameter(QgsProcessingParameterBoolean(
-            self.NORMALIZED,
-            self.tr('Normalize'),
-            optional=True,
-            defaultValue=False,
-        ))
 
         self.addParameter(QgsProcessingParameterNumber(
             self.NCOMPONENTS,
@@ -202,8 +175,6 @@ def processAlgorithm(self, parameters, context, feedback):
         self.inputlayer = self.parameterAsRasterLayer(parameters, self.INPUT, context)
         self.output_raster = self.parameterAsOutputLayer(parameters, self.OUTPUT,context)
 
-        self.normalized = self.parameterAsBool(parameters, self.NORMALIZED, context)
-
         self.ncomponents = self.parameterAsInt(parameters, self.NCOMPONENTS,context)
         self.percentvariance = self.parameterAsDouble(parameters, self.PERCENTVARIANCE,context)
 
@@ -259,9 +230,6 @@ def processAlgorithm(self, parameters, context, feedback):
         # The constant used for normalization in PCA is: 1 / sqrt(n_samples) 
         # or 1 / sqrt(n_samples - 1)
 
-        # Normalizing afterwards means: 
-        # Normalized scores are data @ loadings
-        # Unnormalized scores are data @ VT.T
 
         U, S, VT = linalg.svd(centered,full_matrices=False)
 
@@ -308,10 +276,7 @@ def processAlgorithm(self, parameters, context, feedback):
             return {}
 
         # Get the scores, i.e. the data in principal components
-        if self.normalized: 
-            new_array = centered @ loadings
-        else:       
-            new_array = centered @ VT.T
+        new_array = centered @ VT.T
 
 
         # Reshape to original shape
@@ -365,16 +330,11 @@ def processAlgorithm(self, parameters, context, feedback):
                 'variance_ratio': variance_ratio.tolist(),
                 'variance explained cumsum': variance_explained_cumsum.tolist(),
                 'band mean': col_mean.tolist(),
-                'is normalized': self.normalized,
                 })
 
         # Save loadings etc as json in the metadata abstract of the layer
-        if self.normalized:
-            layername = "PCA (normalized)"
-        else:
-            layername = "PCA (not normalized)"
         global updatemetadata
-        updatemetadata = self.UpdateMetadata(encoded, layername)
+        updatemetadata = self.UpdateMetadata(encoded)
         context.layerToLoadOnCompletionDetails(self.output_raster).setPostProcessor(updatemetadata)
 
         return {self.OUTPUT: self.output_raster,
@@ -385,7 +345,6 @@ def processAlgorithm(self, parameters, context, feedback):
                 'variance explained cumsum': variance_explained_cumsum,
                 'band mean': col_mean,
                 'eigenvectors': VT.T,
-                'is normalized': self.normalized,
                 'json': encoded}
 
 
@@ -401,13 +360,11 @@ class UpdateMetadata(QgsProcessingLayerPostProcessorInterface):
         """
         To add metadata in the postprocessing step.
         """
-        def __init__(self, abstract, layername):
+        def __init__(self, abstract):
             self.abstract = abstract
-            self.layername = layername
             super().__init__()
 
         def postProcessLayer(self, layer, context, feedback):
-            layer.setName(self.layername)
             meta = layer.metadata()
             meta.setAbstract(self.abstract)
             layer.setMetadata(meta)

diff --git a/algs/scipy_pca_helper_algorithms.py b/algs/scipy_pca_helper_algorithms.py
@@ -83,7 +83,6 @@ class SciPyTransformPcBaseclass(QgsProcessingAlgorithm):
     _inverse = False
     _keepbands = 0
     falsemean = False
-    msg = ""
 
     _bandmean = None
     V = None
@@ -289,8 +288,6 @@ def processAlgorithm(self, parameters, context, feedback):
         """
         self.get_parameters(parameters, context)
 
-        if self.msg != "":
-            feedback.reportError(self.tr(self.msg), fatalError=False)
 
         self.ds = gdal.Open(self.inputlayer.source())
 
@@ -414,34 +411,14 @@ def json_to_parameters(self, s):
             decoded = json.loads(s)
         except (json.decoder.JSONDecodeError, ValueError, TypeError):
             return None, None
-        is_normalized = decoded.get("is normalized", None)
-
-        if is_normalized is None:
-            self.msg = "Metadata does not tell if these are normalized scores. Calculating assuming unnormalized scores."
-            is_normalized = False
-        print("is normalized", is_normalized)
-        if is_normalized:
-            eigenvectors = decoded.get("loadings", None)
-        else:
-            eigenvectors = decoded.get("eigenvectors", None)
-
-        print(is_normalized)
-        print(eigenvectors)
+
+        eigenvectors = decoded.get("eigenvectors", None)
 
         try:
             eigenvectors = np.array(eigenvectors)
         except (ValueError, TypeError):
             eigenvectors = None
 
-        if is_normalized:
-            eigenvals = decoded.get("variance explained", None)
-            if not eigenvals is None:
-                try:
-                    eigenvals = np.array(eigenvals)
-                    eigenvectors = (eigenvectors / eigenvals)
-                except (ValueError, TypeError):
-                    msg = "Could not read eigenvalues from metadata"
-
         means = decoded.get("band mean", 0)
         try:
             means = np.array(means)
@@ -500,29 +477,21 @@ class SciPyTransformToPCAlgorithm(SciPyTransformPcBaseclass):
 
     _help = """
         Transform data into given principal components \
-        with a matrix of weights (eigenvectors or loadings) by taking the \
+        with a matrix of eigenvectors by taking the \
         dot product with a matrix of weights (after centering the data). \
 
 
-        The eigenvectors / loadings can also be read from the metadata of an \
-        existing PCA layer. Normalized PCA scores are only partially supported.
+        The eigenvectors can also be read from the metadata of an \
+        existing PCA layer. 
 
-        <b>Eigenvectors</b> Matrix of eigenvectors or loadings (as string). \
+        <b>Eigenvectors</b> Matrix of eigenvectors (as string). \
         Optional if the next parameter is set. \
-        The matrix can be taken from the output of the PCA algorith of this plugin. \
-        Using eigenvectors, the result will be unnormalized PCA scores. \
-        Using loadings, the result will be normalized PCA scores, \
-        but the metadata of the layer will be incorrect (and automatic \
-        inverse transform from PC does not work). 
+        
 
-        <b>Read eigenvectors / loadings from PCA layer metadata</b> \
+        <b>Read eigenvectors from PCA layer metadata</b> \
         Reads the weights for the transformation from the metadata \
         of a layer that was generated using the PCA algorithm of this plugin. \
-        Ignored if the parameter <i>eigenvectors</i> is used. \
-        The eigenvectors are used, if the layer contains unnormalized scores. \
-        The loadings are used, if the layer contains normalized scores; however, \
-        in this case, the metadata of the result will not be correct \
-        (and automatic inverse transform from PC does not work).
+        Ignored if the parameter <i>eigenvectors</i> is used. 
 
         <b>Number of components</b> is only used if the value is greater than 0 and \
         smaller than the count of original bands.
@@ -554,7 +523,7 @@ def initAlgorithm(self, config):
         self.addParameter(
             QgsProcessingParameterRasterLayer(
                 self.PARAMETERLAYER,
-                self.tr('Read eigenvectors/loadings from PCA layer metadata'),
+                self.tr('Read eigenvectors from PCA layer metadata'),
                 optional=True,
             )
         )
@@ -610,21 +579,12 @@ class SciPyTransformFromPCAlgorithm(SciPyTransformPcBaseclass):
         dot product of the scores the with the transpose of the matrix of eigenvectors \
         and adding the original means to the result.
 
-        Normalized PCA scores are only partially supported, see below. \
-        The eigenvectors / loadings can also be read from the metadata \
+        The eigenvectors can also be read from the metadata \
         of the input layer, as long as they exist and are complete. \
-        If the layer contains the PCA generated with the PCA \
-        algorithm of this plugin (i.e. the meta data is complete), \
-        the transform works for both normalized and unnormalized scores \
-        without changing any parameters.
 
         <b>Eigenvectors</b> Matrix of eigenvectors (as string). \
         Optional if the next parameter is set. \
         The matrix can be taken from the output of the PCA algorith of this plugin. \
-        Assumes that the input contains unnormalized PCA scores. \
-        For normalized PCA scores, divide the loadings matrix by \
-        the eigenvalues ("variance explained") and enter the result \
-        into the <i>eigenvectors</i> text field.
 
         <b>Mean of original bands</b> As first step of PCA, the data of each \
         band is centered by subtracting the means. These must be added \