-
Notifications
You must be signed in to change notification settings - Fork 79
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
normalize in log scale #25
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,8 @@ | |
from joblib import Parallel, delayed | ||
from numpy import i0 # modified Bessel function of first kind order 0, I_0 | ||
from scipy.special import iv # modified Bessel function of first kind, I_v | ||
from scipy.special import i0e # exp-scaled modified Bessel function of first kind order 0, I_0 | ||
from scipy.special import ive # exp-scaled modified Bessel function of first kind, I_v | ||
from scipy.special import logsumexp | ||
|
||
from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin | ||
|
@@ -56,24 +58,44 @@ def _vmf_log(X, kappa, mu): | |
"""Computs log(vMF(X, kappa, mu)) using built-in numpy/scipy Bessel | ||
approximations. | ||
|
||
Works well on small kappa and mu. | ||
Works well on small kappa and mu. (see comment below about working in log scale) | ||
""" | ||
n_examples, n_features = X.shape | ||
return np.log(_vmf_normalize(kappa, n_features) * np.exp(kappa * X.dot(mu).T)) | ||
|
||
#--- calculate log directly to avoid over-flow, so it works also with large kappa and mu | ||
log_density = kappa * X.dot(mu).T | ||
log_norm = _vmf_normalize_log(kappa, n_features) | ||
|
||
return log_norm + log_density | ||
|
||
def _vmf_normalize_log(kappa, dim): | ||
""" Compute normalization constant in log scale, using built-in exp-scaled numpy/scipy Bessel """ | ||
log_num = (dim / 2. - 1.) * np.log(kappa) | ||
if dim / 2. - 1. < 1e-15: | ||
log_denom = dim / 2. * np.log(2. * np.pi) + np.log(i0e(kappa)) + kappa | ||
else: | ||
log_denom = dim / 2. * np.log(2. * np.pi) + np.log(ive(dim / 2. - 1., kappa)) + kappa | ||
|
||
if np.isinf(log_num): | ||
raise ValueError("VMF scaling numerator was inf.") | ||
|
||
if np.isinf(log_denom): | ||
raise ValueError("VMF scaling denominator was inf.") | ||
|
||
return log_num - log_denom | ||
|
||
def _vmf_normalize(kappa, dim): | ||
"""Compute normalization constant using built-in numpy/scipy Bessel | ||
approximations. | ||
|
||
Works well on small kappa and mu. | ||
""" | ||
num = np.power(kappa, dim / 2.0 - 1.0) | ||
num = np.power(kappa, dim / 2. - 1.) | ||
|
||
if dim / 2.0 - 1.0 < 1e-15: | ||
denom = np.power(2.0 * np.pi, dim / 2.0) * i0(kappa) | ||
if dim / 2. - 1. < 1e-15: | ||
denom = np.power(2. * np.pi, dim / 2.) * i0(kappa) | ||
else: | ||
denom = np.power(2.0 * np.pi, dim / 2.0) * iv(dim / 2.0 - 1.0, kappa) | ||
denom = np.power(2. * np.pi, dim / 2.) * iv(dim / 2. - 1., kappa) | ||
|
||
if np.isinf(num): | ||
raise ValueError("VMF scaling numerator was inf.") | ||
|
@@ -95,9 +117,9 @@ def _log_H_asymptotic(nu, kappa): | |
from https://cran.r-project.org/web/packages/movMF/index.html | ||
""" | ||
beta = np.sqrt((nu + 0.5) ** 2) | ||
kappa_l = np.min([kappa, np.sqrt((3.0 * nu + 11.0 / 2.0) * (nu + 3.0 / 2.0))]) | ||
kappa_l = np.min([kappa, np.sqrt((3. * nu + 11. / 2.) * (nu + 3. / 2.))]) | ||
return _S(kappa, nu + 0.5, beta) + ( | ||
_S(kappa_l, nu, nu + 2.0) - _S(kappa_l, nu + 0.5, beta) | ||
_S(kappa_l, nu, nu + 2.) - _S(kappa_l, nu + 0.5, beta) | ||
) | ||
|
||
|
||
|
@@ -110,9 +132,9 @@ def _S(kappa, alpha, beta): | |
See "S <-" in movMF.R and utility function implementation notes from | ||
https://cran.r-project.org/web/packages/movMF/index.html | ||
""" | ||
kappa = 1.0 * np.abs(kappa) | ||
alpha = 1.0 * alpha | ||
beta = 1.0 * np.abs(beta) | ||
kappa = 1. * np.abs(kappa) | ||
alpha = 1. * alpha | ||
beta = 1. * np.abs(beta) | ||
a_plus_b = alpha + beta | ||
u = np.sqrt(kappa ** 2 + beta ** 2) | ||
if alpha == 0: | ||
|
@@ -137,7 +159,7 @@ def _vmf_log_asymptotic(X, kappa, mu): | |
https://cran.r-project.org/web/packages/movMF/index.html | ||
""" | ||
n_examples, n_features = X.shape | ||
log_vfm = kappa * X.dot(mu).T + -_log_H_asymptotic(n_features / 2.0 - 1.0, kappa) | ||
log_vfm = kappa * X.dot(mu).T + -_log_H_asymptotic(n_features / 2. - 1., kappa) | ||
|
||
return log_vfm | ||
|
||
|
@@ -341,11 +363,11 @@ def _maximization(X, posterior, force_weights=None): | |
|
||
# update concentration (kappa) [TODO: add other kappa approximations] | ||
rbar = center_norm / (n_examples * weights[cc]) | ||
concentrations[cc] = rbar * n_features - np.power(rbar, 3.0) | ||
concentrations[cc] = rbar * n_features - np.power(rbar, 3.) | ||
if np.abs(rbar - 1.0) < 1e-10: | ||
concentrations[cc] = MAX_CONTENTRATION | ||
else: | ||
concentrations[cc] /= 1.0 - np.power(rbar, 2.0) | ||
concentrations[cc] /= 1. - np.power(rbar, 2.) | ||
|
||
# let python know we can free this (good for large dense X) | ||
del X_scaled | ||
|
@@ -784,13 +806,13 @@ def _check_fit_data(self, X): | |
else: | ||
n = np.linalg.norm(X[ee, :]) | ||
|
||
if np.abs(n - 1.0) > 1e-4: | ||
if np.abs(n - 1.) > 1e-4: | ||
raise ValueError("Data l2-norm must be 1, found {}".format(n)) | ||
|
||
return X | ||
|
||
def _check_test_data(self, X): | ||
X = check_array(X, accept_sparse="csr", dtype=FLOAT_DTYPES) | ||
X = check_array(X, accept_sparse="csr", dtype=FLOAT_DTYPES, warn_on_dtype=True) | ||
n_samples, n_features = X.shape | ||
expected_n_features = self.cluster_centers_.shape[1] | ||
if not n_features == expected_n_features: | ||
|
@@ -805,7 +827,7 @@ def _check_test_data(self, X): | |
else: | ||
n = np.linalg.norm(X[ee, :]) | ||
|
||
if np.abs(n - 1.0) > 1e-4: | ||
if np.abs(n - 1.) > 1e-4: | ||
raise ValueError("Data l2-norm must be 1, found {}".format(n)) | ||
|
||
return X | ||
|
@@ -884,7 +906,7 @@ def transform(self, X, y=None): | |
if self.normalize: | ||
X = normalize(X) | ||
|
||
check_is_fitted(self) | ||
check_is_fitted(self, "cluster_centers_") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you remove these changes? Alternatively, make sure you are working off of the most recent develop commit (I made this change a few days ago). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I'll pull the latest changes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated sklear to 0.22, now one of the tests fails ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You'll need 0.22 and the latest develop branch (which updates things for 0.22) |
||
X = self._check_test_data(X) | ||
return self._transform(X) | ||
|
||
|
@@ -913,7 +935,7 @@ def predict(self, X): | |
if self.normalize: | ||
X = normalize(X) | ||
|
||
check_is_fitted(self) | ||
check_is_fitted(self, "cluster_centers_") | ||
|
||
X = self._check_test_data(X) | ||
return _labels_inertia(X, self.cluster_centers_)[0] | ||
|
@@ -934,12 +956,12 @@ def score(self, X, y=None): | |
if self.normalize: | ||
X = normalize(X) | ||
|
||
check_is_fitted(self) | ||
check_is_fitted(self, "cluster_centers_") | ||
X = self._check_test_data(X) | ||
return -_labels_inertia(X, self.cluster_centers_)[1] | ||
|
||
def log_likelihood(self, X): | ||
check_is_fitted(self) | ||
check_is_fitted(self, "cluster_centers_") | ||
|
||
return _log_likelihood( | ||
X, self.cluster_centers_, self.weights_, self.concentrations_ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Curious about the change in style for each of these floats?
Since this PR is mostly focused on adding the log normalization factor, can we also change these back so the diff can reflect just the salient changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've noticed these changes when I
git diff
ed, and I am not sure how they happened, I definitely don't remember making such changes.. besides being unrelated to the log normalization, I don't change style in other peoples code.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Once I pulled the changes from your latest develop branch, these style changes were back to include the ".0" in floats. I also changed it in the new log-scale method.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry I probably messed up my local fork somehow. let me check and I'll update soon
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See my check list in the main PR comment area, if you run
black
on the file when you are done, it should take care of everything for you. The final diffs for this file should be pretty minimal (just the new function, imports, and change in call site).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes I already fixed all style issues, but adding a test method will take some time (I'm pretty busy as the moment for side-work). So in a few days I'll add the test method.
Btw in my fork, does it matter if I add the changes to
develop
branch or another (mine) branch? for the PR that isThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think the branch matters (more of a decision on your end) since they are separate develop branches. Your branch can be named anything like
feature/normalize_log_scale
or leaving as develop is fine, if thats simpler for you.However, I don't see the changes reflect in the PR so you'll need to figure out how to pull it into your existing develop branch or do a new PR.