Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: initial refactoring of incremental spmd algos #2248

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
14 changes: 10 additions & 4 deletions onedal/basic_statistics/incremental_basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ def __init__(self, result_options="all"):

def _reset(self):
self._need_to_finalize = False
self._partial_result = self._get_backend(
"basic_statistics", None, "partial_compute_result"
# Not supported with spmd policy so IncrementalBasicStatistics must be specified
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love the comment

self._partial_result = IncrementalBasicStatistics._get_backend(
ethanglaser marked this conversation as resolved.
Show resolved Hide resolved
IncrementalBasicStatistics, "basic_statistics", None, "partial_compute_result"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ahuber21 these changes are likely going to interact with #2168 (just as a heads up).

)

def __getstate__(self):
Expand Down Expand Up @@ -105,7 +106,10 @@ def partial_fit(self, X, weights=None, queue=None):
Returns the instance itself.
"""
self._queue = queue
policy = self._get_policy(queue, X)
# Not supported with spmd policy so IncrementalBasicStatistics must be specified
policy = IncrementalBasicStatistics._get_policy(
IncrementalBasicStatistics, queue, X
)

X = _check_array(
X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False
Expand All @@ -123,7 +127,9 @@ def partial_fit(self, X, weights=None, queue=None):
self._onedal_params = self._get_onedal_params(False, dtype=dtype)

X_table, weights_table = to_table(X, weights, queue=queue)
self._partial_result = self._get_backend(
# Not supported with spmd policy so IncrementalBasicStatistics must be specified
self._partial_result = IncrementalBasicStatistics._get_backend(
IncrementalBasicStatistics,
"basic_statistics",
None,
"partial_compute",
Expand Down
14 changes: 10 additions & 4 deletions onedal/covariance/incremental_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def __init__(self, method="dense", bias=False, assume_centered=False):

def _reset(self):
self._need_to_finalize = False
self._partial_result = self._get_backend(
"covariance", None, "partial_compute_result"
# Not supported with spmd policy so IncrementalEmpiricalCovariance must be specified
self._partial_result = IncrementalEmpiricalCovariance._get_backend(
IncrementalEmpiricalCovariance, "covariance", None, "partial_compute_result"
)

def __getstate__(self):
Expand Down Expand Up @@ -99,15 +100,20 @@ def partial_fit(self, X, y=None, queue=None):

self._queue = queue

policy = self._get_policy(queue, X)
# Not supported with spmd policy so IncrementalEmpiricalCovariance must be specified
policy = IncrementalEmpiricalCovariance._get_policy(
IncrementalEmpiricalCovariance, queue, X
)

X_table = to_table(X, queue=queue)

if not hasattr(self, "_dtype"):
self._dtype = X_table.dtype

params = self._get_onedal_params(self._dtype)
self._partial_result = self._get_backend(
# Not supported with spmd policy so IncrementalEmpiricalCovariance must be specified
self._partial_result = IncrementalEmpiricalCovariance._get_backend(
IncrementalEmpiricalCovariance,
"covariance",
None,
"partial_compute",
Expand Down
12 changes: 9 additions & 3 deletions onedal/decomposition/incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ def __init__(

def _reset(self):
self._need_to_finalize = False
module = self._get_backend("decomposition", "dim_reduction")
# Not supported with spmd policy so IncrementalPCA must be specified
module = IncrementalPCA._get_backend(
IncrementalPCA, "decomposition", "dim_reduction"
)
if hasattr(self, "components_"):
del self.components_
self._partial_result = module.partial_train_result()
Expand Down Expand Up @@ -154,14 +157,17 @@ def partial_fit(self, X, queue):

self._queue = queue

policy = self._get_policy(queue, X)
# Not supported with spmd policy so IncrementalPCA must be specified
policy = IncrementalPCA._get_policy(IncrementalPCA, queue, X)
X_table = to_table(X, queue=queue)

if not hasattr(self, "_dtype"):
self._dtype = X_table.dtype
self._params = self._get_onedal_params(X_table)

self._partial_result = self._get_backend(
# Not supported with spmd policy so IncrementalPCA must be specified
self._partial_result = IncrementalPCA._get_backend(
IncrementalPCA,
"decomposition",
"dim_reduction",
"partial_train",
Expand Down
13 changes: 9 additions & 4 deletions onedal/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def _compute_noise_variance(self, n_components, n_sf_min):
return 0.0

def _create_model(self):
m = self._get_backend("decomposition", "dim_reduction", "model")
# Not supported with spmd policy so BasePCA must be specified
m = BasePCA._get_backend(BasePCA, "decomposition", "dim_reduction", "model")
m.eigenvectors = to_table(self.components_)
m.means = to_table(self.mean_)
if self.whiten:
Expand All @@ -128,15 +129,19 @@ def _create_model(self):
return m

def predict(self, X, queue=None):
policy = self._get_policy(queue, X)
# Not supported with spmd policy so BasePCA must be specified
policy = BasePCA._get_policy(BasePCA, queue, X)
model = self._create_model()
X_table = to_table(X, queue=queue)
params = self._get_onedal_params(X_table, stage="predict")

result = self._get_backend(
"decomposition", "dim_reduction", "infer", policy, params, model, X_table
# Not supported with spmd policy so BasePCA must be specified
result = BasePCA._get_backend(
BasePCA, "decomposition", "dim_reduction", "infer", policy, params, model, X_table
)
return from_table(result.transformed_data)

transform = predict


class PCA(BasePCA):
Expand Down
4 changes: 2 additions & 2 deletions onedal/decomposition/tests/test_incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_on_gold_data(queue, is_deterministic, whiten, num_blocks, dtype):

result = incpca.finalize_fit()

transformed_data = incpca.predict(X, queue=queue)
transformed_data = incpca.transform(X, queue=queue)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for conformance purposes to sklearn (though not strictly necessary in the onedal folder)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exactly - pca predict does not exist in sklearn so would prefer to not use this convention if possible


expected_n_components_ = 2
expected_components_ = np.array([[0.83849224, 0.54491354], [-0.54491354, 0.83849224]])
Expand Down Expand Up @@ -128,7 +128,7 @@ def test_on_random_data(

incpca.finalize_fit()

transformed_data = incpca.predict(X, queue=queue)
transformed_data = incpca.transform(X, queue=queue)
tol = 3e-3 if transformed_data.dtype == np.float32 else 2e-6

n_components = incpca.n_components_
Expand Down
18 changes: 14 additions & 4 deletions onedal/linear_model/incremental_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,12 @@ def __init__(self, fit_intercept=True, copy_X=False, algorithm="norm_eq"):

def _reset(self):
self._need_to_finalize = False
self._partial_result = self._get_backend(
"linear_model", "regression", "partial_train_result"
# Not supported with spmd policy so IncrementalLinearRegression must be specified
self._partial_result = IncrementalLinearRegression._get_backend(
IncrementalLinearRegression,
"linear_model",
"regression",
"partial_train_result",
)

def __getstate__(self):
Expand Down Expand Up @@ -84,10 +88,16 @@ def partial_fit(self, X, y, queue=None):
self : object
Returns the instance itself.
"""
module = self._get_backend("linear_model", "regression")
# Not supported with spmd policy so IncrementalLinearRegression must be specified
module = IncrementalLinearRegression._get_backend(
IncrementalLinearRegression, "linear_model", "regression"
)

self._queue = queue
policy = self._get_policy(queue, X)
# Not supported with spmd policy so IncrementalLinearRegression must be specified
policy = IncrementalLinearRegression._get_policy(
IncrementalLinearRegression, queue, X
)

X, y = _check_X_y(
X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False
Expand Down
49 changes: 1 addition & 48 deletions onedal/spmd/basic_statistics/incremental_basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,58 +14,11 @@
# limitations under the License.
# ==============================================================================

from daal4py.sklearn._utils import get_dtype

from ...basic_statistics import (
IncrementalBasicStatistics as base_IncrementalBasicStatistics,
)
from ...datatypes import to_table
from .._base import BaseEstimatorSPMD


class IncrementalBasicStatistics(BaseEstimatorSPMD, base_IncrementalBasicStatistics):
def _reset(self):
self._need_to_finalize = False
self._partial_result = super(base_IncrementalBasicStatistics, self)._get_backend(
"basic_statistics", None, "partial_compute_result"
)

def partial_fit(self, X, weights=None, queue=None):
"""
Computes partial data for basic statistics
from data batch X and saves it to `_partial_result`.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data batch, where `n_samples` is the number of samples
in the batch, and `n_features` is the number of features.

queue : dpctl.SyclQueue
If not None, use this queue for computations.

Returns
-------
self : object
Returns the instance itself.
"""
self._queue = queue
policy = super(base_IncrementalBasicStatistics, self)._get_policy(queue, X)
X_table, weights_table = to_table(X, weights, queue=queue)

if not hasattr(self, "_onedal_params"):
self._onedal_params = self._get_onedal_params(False, dtype=X_table.dtype)

self._partial_result = super(base_IncrementalBasicStatistics, self)._get_backend(
"basic_statistics",
None,
"partial_compute",
policy,
self._onedal_params,
self._partial_result,
X_table,
weights_table,
)

self._need_to_finalize = True
return self
pass
59 changes: 1 addition & 58 deletions onedal/spmd/covariance/incremental_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,70 +14,13 @@
# limitations under the License.
# ==============================================================================

import numpy as np

from daal4py.sklearn._utils import get_dtype

from ...covariance import (
IncrementalEmpiricalCovariance as base_IncrementalEmpiricalCovariance,
)
from ...datatypes import to_table
from ...utils import _check_array
from .._base import BaseEstimatorSPMD


class IncrementalEmpiricalCovariance(
BaseEstimatorSPMD, base_IncrementalEmpiricalCovariance
):
def _reset(self):
self._need_to_finalize = False
self._partial_result = super(
base_IncrementalEmpiricalCovariance, self
)._get_backend("covariance", None, "partial_compute_result")

def partial_fit(self, X, y=None, queue=None):
"""
Computes partial data for the covariance matrix
from data batch X and saves it to `_partial_result`.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data batch, where `n_samples` is the number of samples
in the batch, and `n_features` is the number of features.

y : Ignored
Not used, present for API consistency by convention.

queue : dpctl.SyclQueue
If not None, use this queue for computations.

Returns
-------
self : object
Returns the instance itself.
"""
X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True)

self._queue = queue

policy = super(base_IncrementalEmpiricalCovariance, self)._get_policy(queue, X)

X_table = to_table(X, queue=queue)

if not hasattr(self, "_dtype"):
self._dtype = X_table.dtype

params = self._get_onedal_params(self._dtype)
self._partial_result = super(
base_IncrementalEmpiricalCovariance, self
)._get_backend(
"covariance",
None,
"partial_compute",
policy,
params,
self._partial_result,
X_table,
)
self._need_to_finalize = True
pass
Loading
Loading