diff --git a/algs/scipy_pca_algorithm.py b/algs/scipy_pca_algorithm.py
index 9b9f965..49922d6 100644
--- a/algs/scipy_pca_algorithm.py
+++ b/algs/scipy_pca_algorithm.py
@@ -60,7 +60,6 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm):
OUTPUT = 'OUTPUT'
INPUT = 'INPUT'
- NORMALIZED = 'NORMALIZED'
NCOMPONENTS = 'NCOMPONENTS'
PERCENTVARIANCE = 'PERCENTVARIANCE'
DTYPE = 'DTYPE'
@@ -81,8 +80,6 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm):
number of components to keep or the percentage of variance \
explained by the kept components can be set.
- Normalize Compute normalized version of PCA scores, by dividing \
- by sqrt(n_samples - 1). Number of samples means number of pixels in this case. \
Number of components is only used if the value is greater than 0 and \
smaller than the count of original bands and if percentage of variance is \
@@ -93,7 +90,7 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm):
Output The output raster contains \
the data projected into the principal components \
- (i.e. the normalized or unnormalized scores).
+ (i.e. the PCA scores).
Output data type Float32 or Float64
@@ -112,24 +109,6 @@ class SciPyPCAAlgorithm(QgsProcessingAlgorithm):
Band Mean
- Note on Normalization The normalized version of the PCA \
- can be calculated in two ways:
-
-
- - By first dividing the data by \
- sqrt(n_samples - 1); then performing SVD and calculating \
- scores as dot productuct of normalized data and V (of the SVD, \
- not transposed).
- - By performing SVD with unnormalized data, then calculating \
- loadings by dividing the eigenvectors by sqrt(n_samples - 1) \
- and calculating scores as dot product of unnormalized data \
- and loadings.
-
-
- Unnmormalized scores are calculated as dot product of \
- unnormalized data and the (unnormalized) eigenvectors \
- of the SVD. (The result is the same as PCA using the \
- python module sklearn without normalizing the data). )
"""
# Init Algorithm
@@ -147,12 +126,6 @@ def initAlgorithm(self, config):
)
)
- self.addParameter(QgsProcessingParameterBoolean(
- self.NORMALIZED,
- self.tr('Normalize'),
- optional=True,
- defaultValue=False,
- ))
self.addParameter(QgsProcessingParameterNumber(
self.NCOMPONENTS,
@@ -202,8 +175,6 @@ def processAlgorithm(self, parameters, context, feedback):
self.inputlayer = self.parameterAsRasterLayer(parameters, self.INPUT, context)
self.output_raster = self.parameterAsOutputLayer(parameters, self.OUTPUT,context)
- self.normalized = self.parameterAsBool(parameters, self.NORMALIZED, context)
-
self.ncomponents = self.parameterAsInt(parameters, self.NCOMPONENTS,context)
self.percentvariance = self.parameterAsDouble(parameters, self.PERCENTVARIANCE,context)
@@ -259,9 +230,6 @@ def processAlgorithm(self, parameters, context, feedback):
# The constant used for normalization in PCA is: 1 / sqrt(n_samples)
# or 1 / sqrt(n_samples - 1)
- # Normalizing afterwards means:
- # Normalized scores are data @ loadings
- # Unnormalized scores are data @ VT.T
U, S, VT = linalg.svd(centered,full_matrices=False)
@@ -308,10 +276,7 @@ def processAlgorithm(self, parameters, context, feedback):
return {}
# Get the scores, i.e. the data in principal components
- if self.normalized:
- new_array = centered @ loadings
- else:
- new_array = centered @ VT.T
+ new_array = centered @ VT.T
# Reshape to original shape
@@ -365,16 +330,11 @@ def processAlgorithm(self, parameters, context, feedback):
'variance_ratio': variance_ratio.tolist(),
'variance explained cumsum': variance_explained_cumsum.tolist(),
'band mean': col_mean.tolist(),
- 'is normalized': self.normalized,
})
# Save loadings etc as json in the metadata abstract of the layer
- if self.normalized:
- layername = "PCA (normalized)"
- else:
- layername = "PCA (not normalized)"
global updatemetadata
- updatemetadata = self.UpdateMetadata(encoded, layername)
+ updatemetadata = self.UpdateMetadata(encoded)
context.layerToLoadOnCompletionDetails(self.output_raster).setPostProcessor(updatemetadata)
return {self.OUTPUT: self.output_raster,
@@ -385,7 +345,6 @@ def processAlgorithm(self, parameters, context, feedback):
'variance explained cumsum': variance_explained_cumsum,
'band mean': col_mean,
'eigenvectors': VT.T,
- 'is normalized': self.normalized,
'json': encoded}
@@ -401,13 +360,11 @@ class UpdateMetadata(QgsProcessingLayerPostProcessorInterface):
"""
To add metadata in the postprocessing step.
"""
- def __init__(self, abstract, layername):
+ def __init__(self, abstract):
self.abstract = abstract
- self.layername = layername
super().__init__()
def postProcessLayer(self, layer, context, feedback):
- layer.setName(self.layername)
meta = layer.metadata()
meta.setAbstract(self.abstract)
layer.setMetadata(meta)
diff --git a/algs/scipy_pca_helper_algorithms.py b/algs/scipy_pca_helper_algorithms.py
index 21f013c..65ffd06 100644
--- a/algs/scipy_pca_helper_algorithms.py
+++ b/algs/scipy_pca_helper_algorithms.py
@@ -83,7 +83,6 @@ class SciPyTransformPcBaseclass(QgsProcessingAlgorithm):
_inverse = False
_keepbands = 0
falsemean = False
- msg = ""
_bandmean = None
V = None
@@ -289,8 +288,6 @@ def processAlgorithm(self, parameters, context, feedback):
"""
self.get_parameters(parameters, context)
- if self.msg != "":
- feedback.reportError(self.tr(self.msg), fatalError=False)
self.ds = gdal.Open(self.inputlayer.source())
@@ -414,34 +411,14 @@ def json_to_parameters(self, s):
decoded = json.loads(s)
except (json.decoder.JSONDecodeError, ValueError, TypeError):
return None, None
- is_normalized = decoded.get("is normalized", None)
-
- if is_normalized is None:
- self.msg = "Metadata does not tell if these are normalized scores. Calculating assuming unnormalized scores."
- is_normalized = False
- print("is normalized", is_normalized)
- if is_normalized:
- eigenvectors = decoded.get("loadings", None)
- else:
- eigenvectors = decoded.get("eigenvectors", None)
-
- print(is_normalized)
- print(eigenvectors)
+
+ eigenvectors = decoded.get("eigenvectors", None)
try:
eigenvectors = np.array(eigenvectors)
except (ValueError, TypeError):
eigenvectors = None
- if is_normalized:
- eigenvals = decoded.get("variance explained", None)
- if not eigenvals is None:
- try:
- eigenvals = np.array(eigenvals)
- eigenvectors = (eigenvectors / eigenvals)
- except (ValueError, TypeError):
- msg = "Could not read eigenvalues from metadata"
-
means = decoded.get("band mean", 0)
try:
means = np.array(means)
@@ -500,29 +477,21 @@ class SciPyTransformToPCAlgorithm(SciPyTransformPcBaseclass):
_help = """
Transform data into given principal components \
- with a matrix of weights (eigenvectors or loadings) by taking the \
+ with a matrix of eigenvectors by taking the \
dot product with a matrix of weights (after centering the data). \
- The eigenvectors / loadings can also be read from the metadata of an \
- existing PCA layer. Normalized PCA scores are only partially supported.
+ The eigenvectors can also be read from the metadata of an \
+ existing PCA layer.
- Eigenvectors Matrix of eigenvectors or loadings (as string). \
+ Eigenvectors Matrix of eigenvectors (as string). \
Optional if the next parameter is set. \
- The matrix can be taken from the output of the PCA algorith of this plugin. \
- Using eigenvectors, the result will be unnormalized PCA scores. \
- Using loadings, the result will be normalized PCA scores, \
- but the metadata of the layer will be incorrect (and automatic \
- inverse transform from PC does not work).
+
- Read eigenvectors / loadings from PCA layer metadata \
+ Read eigenvectors from PCA layer metadata \
Reads the weights for the transformation from the metadata \
of a layer that was generated using the PCA algorithm of this plugin. \
- Ignored if the parameter eigenvectors is used. \
- The eigenvectors are used, if the layer contains unnormalized scores. \
- The loadings are used, if the layer contains normalized scores; however, \
- in this case, the metadata of the result will not be correct \
- (and automatic inverse transform from PC does not work).
+ Ignored if the parameter eigenvectors is used.
Number of components is only used if the value is greater than 0 and \
smaller than the count of original bands.
@@ -554,7 +523,7 @@ def initAlgorithm(self, config):
self.addParameter(
QgsProcessingParameterRasterLayer(
self.PARAMETERLAYER,
- self.tr('Read eigenvectors/loadings from PCA layer metadata'),
+ self.tr('Read eigenvectors from PCA layer metadata'),
optional=True,
)
)
@@ -610,21 +579,12 @@ class SciPyTransformFromPCAlgorithm(SciPyTransformPcBaseclass):
dot product of the scores the with the transpose of the matrix of eigenvectors \
and adding the original means to the result.
- Normalized PCA scores are only partially supported, see below. \
- The eigenvectors / loadings can also be read from the metadata \
+ The eigenvectors can also be read from the metadata \
of the input layer, as long as they exist and are complete. \
- If the layer contains the PCA generated with the PCA \
- algorithm of this plugin (i.e. the meta data is complete), \
- the transform works for both normalized and unnormalized scores \
- without changing any parameters.
Eigenvectors Matrix of eigenvectors (as string). \
Optional if the next parameter is set. \
The matrix can be taken from the output of the PCA algorith of this plugin. \
- Assumes that the input contains unnormalized PCA scores. \
- For normalized PCA scores, divide the loadings matrix by \
- the eigenvalues ("variance explained") and enter the result \
- into the eigenvectors text field.
Mean of original bands As first step of PCA, the data of each \
band is centered by subtracting the means. These must be added \