diff --git a/algs/scipy_pca_algorithm.py b/algs/scipy_pca_algorithm.py index 9b9f965..49922d6 100644 --- a/algs/scipy_pca_algorithm.py +++ b/algs/scipy_pca_algorithm.py @@ -60,7 +60,6 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm): OUTPUT = 'OUTPUT' INPUT = 'INPUT' - NORMALIZED = 'NORMALIZED' NCOMPONENTS = 'NCOMPONENTS' PERCENTVARIANCE = 'PERCENTVARIANCE' DTYPE = 'DTYPE' @@ -81,8 +80,6 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm): number of components to keep or the percentage of variance \ explained by the kept components can be set. - Normalize Compute normalized version of PCA scores, by dividing \ - by sqrt(n_samples - 1). Number of samples means number of pixels in this case. \ Number of components is only used if the value is greater than 0 and \ smaller than the count of original bands and if percentage of variance is \ @@ -93,7 +90,7 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm): Output The output raster contains \ the data projected into the principal components \ - (i.e. the normalized or unnormalized scores). + (i.e. the PCA scores). Output data type Float32 or Float64 @@ -112,24 +109,6 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm):
  • Band Mean
  • - Note on Normalization The normalized version of the PCA \ - can be calculated in two ways: - - - - Unnmormalized scores are calculated as dot product of \ - unnormalized data and the (unnormalized) eigenvectors \ - of the SVD. (The result is the same as PCA using the \ - python module sklearn without normalizing the data). ) """ # Init Algorithm @@ -147,12 +126,6 @@ def initAlgorithm(self, config): ) ) - self.addParameter(QgsProcessingParameterBoolean( - self.NORMALIZED, - self.tr('Normalize'), - optional=True, - defaultValue=False, - )) self.addParameter(QgsProcessingParameterNumber( self.NCOMPONENTS, @@ -202,8 +175,6 @@ def processAlgorithm(self, parameters, context, feedback): self.inputlayer = self.parameterAsRasterLayer(parameters, self.INPUT, context) self.output_raster = self.parameterAsOutputLayer(parameters, self.OUTPUT,context) - self.normalized = self.parameterAsBool(parameters, self.NORMALIZED, context) - self.ncomponents = self.parameterAsInt(parameters, self.NCOMPONENTS,context) self.percentvariance = self.parameterAsDouble(parameters, self.PERCENTVARIANCE,context) @@ -259,9 +230,6 @@ def processAlgorithm(self, parameters, context, feedback): # The constant used for normalization in PCA is: 1 / sqrt(n_samples) # or 1 / sqrt(n_samples - 1) - # Normalizing afterwards means: - # Normalized scores are data @ loadings - # Unnormalized scores are data @ VT.T U, S, VT = linalg.svd(centered,full_matrices=False) @@ -308,10 +276,7 @@ def processAlgorithm(self, parameters, context, feedback): return {} # Get the scores, i.e. the data in principal components - if self.normalized: - new_array = centered @ loadings - else: - new_array = centered @ VT.T + new_array = centered @ VT.T # Reshape to original shape @@ -365,16 +330,11 @@ def processAlgorithm(self, parameters, context, feedback): 'variance_ratio': variance_ratio.tolist(), 'variance explained cumsum': variance_explained_cumsum.tolist(), 'band mean': col_mean.tolist(), - 'is normalized': self.normalized, }) # Save loadings etc as json in the metadata abstract of the layer - if self.normalized: - layername = "PCA (normalized)" - else: - layername = "PCA (not normalized)" global updatemetadata - updatemetadata = self.UpdateMetadata(encoded, layername) + updatemetadata = self.UpdateMetadata(encoded) context.layerToLoadOnCompletionDetails(self.output_raster).setPostProcessor(updatemetadata) return {self.OUTPUT: self.output_raster, @@ -385,7 +345,6 @@ def processAlgorithm(self, parameters, context, feedback): 'variance explained cumsum': variance_explained_cumsum, 'band mean': col_mean, 'eigenvectors': VT.T, - 'is normalized': self.normalized, 'json': encoded} @@ -401,13 +360,11 @@ class UpdateMetadata(QgsProcessingLayerPostProcessorInterface): """ To add metadata in the postprocessing step. """ - def __init__(self, abstract, layername): + def __init__(self, abstract): self.abstract = abstract - self.layername = layername super().__init__() def postProcessLayer(self, layer, context, feedback): - layer.setName(self.layername) meta = layer.metadata() meta.setAbstract(self.abstract) layer.setMetadata(meta) diff --git a/algs/scipy_pca_helper_algorithms.py b/algs/scipy_pca_helper_algorithms.py index 21f013c..65ffd06 100644 --- a/algs/scipy_pca_helper_algorithms.py +++ b/algs/scipy_pca_helper_algorithms.py @@ -83,7 +83,6 @@ class SciPyTransformPcBaseclass(QgsProcessingAlgorithm): _inverse = False _keepbands = 0 falsemean = False - msg = "" _bandmean = None V = None @@ -289,8 +288,6 @@ def processAlgorithm(self, parameters, context, feedback): """ self.get_parameters(parameters, context) - if self.msg != "": - feedback.reportError(self.tr(self.msg), fatalError=False) self.ds = gdal.Open(self.inputlayer.source()) @@ -414,34 +411,14 @@ def json_to_parameters(self, s): decoded = json.loads(s) except (json.decoder.JSONDecodeError, ValueError, TypeError): return None, None - is_normalized = decoded.get("is normalized", None) - - if is_normalized is None: - self.msg = "Metadata does not tell if these are normalized scores. Calculating assuming unnormalized scores." - is_normalized = False - print("is normalized", is_normalized) - if is_normalized: - eigenvectors = decoded.get("loadings", None) - else: - eigenvectors = decoded.get("eigenvectors", None) - - print(is_normalized) - print(eigenvectors) + + eigenvectors = decoded.get("eigenvectors", None) try: eigenvectors = np.array(eigenvectors) except (ValueError, TypeError): eigenvectors = None - if is_normalized: - eigenvals = decoded.get("variance explained", None) - if not eigenvals is None: - try: - eigenvals = np.array(eigenvals) - eigenvectors = (eigenvectors / eigenvals) - except (ValueError, TypeError): - msg = "Could not read eigenvalues from metadata" - means = decoded.get("band mean", 0) try: means = np.array(means) @@ -500,29 +477,21 @@ class SciPyTransformToPCAlgorithm(SciPyTransformPcBaseclass): _help = """ Transform data into given principal components \ - with a matrix of weights (eigenvectors or loadings) by taking the \ + with a matrix of eigenvectors by taking the \ dot product with a matrix of weights (after centering the data). \ - The eigenvectors / loadings can also be read from the metadata of an \ - existing PCA layer. Normalized PCA scores are only partially supported. + The eigenvectors can also be read from the metadata of an \ + existing PCA layer. - Eigenvectors Matrix of eigenvectors or loadings (as string). \ + Eigenvectors Matrix of eigenvectors (as string). \ Optional if the next parameter is set. \ - The matrix can be taken from the output of the PCA algorith of this plugin. \ - Using eigenvectors, the result will be unnormalized PCA scores. \ - Using loadings, the result will be normalized PCA scores, \ - but the metadata of the layer will be incorrect (and automatic \ - inverse transform from PC does not work). + - Read eigenvectors / loadings from PCA layer metadata \ + Read eigenvectors from PCA layer metadata \ Reads the weights for the transformation from the metadata \ of a layer that was generated using the PCA algorithm of this plugin. \ - Ignored if the parameter eigenvectors is used. \ - The eigenvectors are used, if the layer contains unnormalized scores. \ - The loadings are used, if the layer contains normalized scores; however, \ - in this case, the metadata of the result will not be correct \ - (and automatic inverse transform from PC does not work). + Ignored if the parameter eigenvectors is used. Number of components is only used if the value is greater than 0 and \ smaller than the count of original bands. @@ -554,7 +523,7 @@ def initAlgorithm(self, config): self.addParameter( QgsProcessingParameterRasterLayer( self.PARAMETERLAYER, - self.tr('Read eigenvectors/loadings from PCA layer metadata'), + self.tr('Read eigenvectors from PCA layer metadata'), optional=True, ) ) @@ -610,21 +579,12 @@ class SciPyTransformFromPCAlgorithm(SciPyTransformPcBaseclass): dot product of the scores the with the transpose of the matrix of eigenvectors \ and adding the original means to the result. - Normalized PCA scores are only partially supported, see below. \ - The eigenvectors / loadings can also be read from the metadata \ + The eigenvectors can also be read from the metadata \ of the input layer, as long as they exist and are complete. \ - If the layer contains the PCA generated with the PCA \ - algorithm of this plugin (i.e. the meta data is complete), \ - the transform works for both normalized and unnormalized scores \ - without changing any parameters. Eigenvectors Matrix of eigenvectors (as string). \ Optional if the next parameter is set. \ The matrix can be taken from the output of the PCA algorith of this plugin. \ - Assumes that the input contains unnormalized PCA scores. \ - For normalized PCA scores, divide the loadings matrix by \ - the eigenvalues ("variance explained") and enter the result \ - into the eigenvectors text field. Mean of original bands As first step of PCA, the data of each \ band is centered by subtracting the means. These must be added \