From 593625380440713f62eb038116ca2850f523d1dc Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Fri, 16 Mar 2018 22:44:39 +0900 Subject: [PATCH 1/2] Added group-lasso --- decomp/group_lasso.py | 195 +++++++++++++++++++++++++++++++++++++ decomp/math_utils/eigen.py | 2 +- tests/test_group_lasso.py | 43 ++++++++ 3 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 decomp/group_lasso.py create mode 100644 tests/test_group_lasso.py diff --git a/decomp/group_lasso.py b/decomp/group_lasso.py new file mode 100644 index 0000000..2720fa3 --- /dev/null +++ b/decomp/group_lasso.py @@ -0,0 +1,195 @@ +import numpy as np +from .utils.cp_compat import get_array_module +from .utils import assertion, dtype +from .math_utils import eigen + + +AVAILABLE_METHODS = ['ista'] +_JITTER = 1.0e-15 + + +def predict(A, x): + return np.tensordot(x, A, axes=2) + + +def loss(y, A, alpha, x, mask=None): + """ + Returns a loss of the grouped lasso + + Parameters + ---------- + y: nd-array, size [..., n] + A: 3d sparse array, size [m, g, n] + x: nd-array, size [..., m, g] + + Returns + ------- + loss: float + """ + n_samples = y.shape[-1] + if mask is not None: + n_samples = np.sum(mask, axis=-1, keepdims=True) + + xA = np.tensordot(x, A, axes=2) + axes = tuple(np.arange(x.ndim - 1)) + x_abs = np.sqrt(np.real(np.sum(x * np.conj(x), axis=axes, keepdims=True))) + + fit_loss = np.sum((y - xA) * np.conj(y - xA)) / (2.0 * n_samples) + reg_loss = np.sum(alpha * x_abs) + return fit_loss + reg_loss + + +def solve(y, A, alpha, x=None, method='ista', tol=1.0e-5, maxiter=1000, + mask=None, **kwargs): + """ + Solve a group lasso problem. + + argmin_x {1 / (2 * n) * |y - xA|^2 + alpha \sum_p \sqrt{\sum_j x^2}} + + Parameters + ---------- + y: nd-array + Data to be fitted, size [..., n] + A: 3d-array + Design matrix, sized [g, m, n]. g: group size. m: feature size. + x: 2d-array or None. + Initial guess of the solution, sized [..., g, m] + alpha: float or nd-array + Regularization parameter. + axes: integer or a tuple of integers. + Which axis should be grouped. + """ + # Check all the class are numpy or cupy + xp = get_array_module(y, A, x, mask) + + if x is None: + x = xp.zeros(y.shape[:-1] + A.shape[:-1], dtype=y.dtype) + + assertion.assert_dtypes(y=y, A=A, x=x) + assertion.assert_dtypes(mask=mask, dtypes='f') + assertion.assert_nonnegative(mask) + assertion.assert_ndim('A', A, ndim=3) + assertion.assert_shapes('x', x, 'A', A, axes=2) + assertion.assert_shapes('y', y, 'x', x, + axes=np.arange(x.ndim - 2).tolist()) + assertion.assert_shapes('y', y, 'A', A, axes=[-1]) + if mask is not None and mask.ndim == 1: + assertion.assert_shapes('y', y, 'mask', mask, axes=[-1]) + else: + assertion.assert_shapes('y', y, 'mask', mask) + if method not in AVAILABLE_METHODS: + raise ValueError('Available methods are {0:s}. Given {1:s}'.format( + str(AVAILABLE_METHODS), method)) + + assert A.dtype.kind != 'c' or method[-4:] != '_pos' + return solve_fastpath(y, A, alpha, x, tol, maxiter, method, xp, mask=mask, + **kwargs) + + +def solve_fastpath(y, A, alpha, x, tol, maxiter, method, xp, mask=None, + **kwargs): + """ fast path for group lasso, without default value setting and + shape/dtype assertions. + + In this method, some correction takes place, + + alpha scaling: + We changed the model from + argmin_x {1 / (2 * n) * |y - xA|^2 - alpha |x|} + to + argmin_x {1 / 2 * |y - xA|^2 - alpha |x|} + by scaling alpha by n. + (Make sure with mask case, n is the number of valid entries) + + A scaling + We also scale A, so that [AAt]_i,i is 1. + """ + positive = False + if method[-4:] == '_pos': + method = method[:-4] + positive = True + + if mask is not None and mask.ndim == 1: + y = y * mask + A = A * mask + + # A scaling + if A.dtype.kind != 'c': + AAt_diag_sqrt = xp.sqrt(xp.sum(xp.square(A), axis=-1)) # size [g, m] + else: + AAt_diag_sqrt = xp.sqrt(xp.sum(xp.real(xp.conj(A) * A), axis=-1)) + A = A / xp.expand_dims(AAt_diag_sqrt, axis=-1) + alpha = alpha / AAt_diag_sqrt # size [g, m] + tol = tol * AAt_diag_sqrt + x = x * AAt_diag_sqrt + + if mask is None or mask.ndim == 1: + # alpha scaling + if mask is not None: # mask.ndim == 1 + alpha = alpha * xp.sum(mask, axis=-1) + else: + alpha = alpha * A.shape[-1] + if method == 'ista': + it, x = _solve_ista(y, A, alpha, x, tol=tol, maxiter=maxiter, + xp=xp, positive=positive) + else: + raise NotImplementedError('Method ' + method + ' is not yet ' + 'implemented.') + else: + raise NotImplementedError('Method ' + method + ' is not yet ' + 'implemented with mask.') + # not forget to restore x value. + return it, x / AAt_diag_sqrt + + +def soft_threshold(x, y, xp): + """ + soft-threasholding function + + x: nd array. + y: positive float (array like) + + Returns + ------- + if x is float + x - y if x > y + x + y if x < -y + 0 otherwise + """ + axes = tuple(xp.arange(x.ndim - 1)) + x_abs = np.sqrt(np.real(xp.sum(x * xp.conj(x), axis=axes, keepdims=True))) + sign = x / xp.maximum(x_abs, _JITTER) + return xp.maximum(x_abs - y, 0.0) * sign + + +def _update(yAt, AAt, x0, Lalpha_inv, L_inv, xp): + dx = xp.swapaxes(yAt - np.tensordot(x0, AAt, axes=2), -1, -2) + return soft_threshold(x0 + Lalpha_inv * dx, L_inv, xp) + + +def _update_positive(yAt, AAt, x0, Lalpha_inv, L_inv): + raise NotImplementedError + + +def _solve_ista(y, A, alpha, x, tol, maxiter, positive, xp): + """ Fast path to solve lasso by ista method """ + updator = _update_positive if positive else _update + + At = xp.transpose(A, axes=(2, 1, 0)) # [n, g, m] + if A.dtype.kind == 'c': + At = xp.conj(At) + AAt = xp.tensordot(A, At, axes=1) # [m, g, g, m] + AAt_flat = AAt.reshape(A.shape[0] * A.shape[1], -1) # [m*g, g*m] + radius = eigen.spectral_radius_Gershgorin(AAt_flat, xp, keepdims=False) + Lalpha_inv = 1.0 / radius + L_inv = Lalpha_inv * alpha + + yAt = np.tensordot(y, At, axes=1) # [..., g, m] + + for i in range(maxiter): + x_new = updator(yAt, AAt, x, Lalpha_inv, L_inv, xp) + if np.max(xp.abs(x_new - x) - tol) < 0.0: + return i, x_new + x = x_new + + return maxiter - 1, x diff --git a/decomp/math_utils/eigen.py b/decomp/math_utils/eigen.py index 7d9e675..e1df19f 100644 --- a/decomp/math_utils/eigen.py +++ b/decomp/math_utils/eigen.py @@ -17,4 +17,4 @@ def spectral_radius_Gershgorin(X, xp, keepdims=False): X should be a matrix or batch of matrices, shape [..., n, n]. The return shape is [..., 1] """ - return xp.max(xp.sum(xp.abs(X), axis=-2), axis=-1, keepdims=True) + return xp.max(xp.sum(xp.abs(X), axis=-2), axis=-1, keepdims=keepdims) diff --git a/tests/test_group_lasso.py b/tests/test_group_lasso.py new file mode 100644 index 0000000..037297a --- /dev/null +++ b/tests/test_group_lasso.py @@ -0,0 +1,43 @@ +import numpy as np +import pytest +from decomp import group_lasso + + +def construct_data(shape, n_feature, n_group, is_complex, seed=0): + """ + Construct an example data + """ + rng = np.random.RandomState(seed) + + A = rng.randn(n_feature, n_group, shape[-1]) + x = rng.randn(*(list(shape[:-1]) + [n_feature, n_group])) + + ind = rng.choice(np.arange(n_group), int(n_group / 2)) + x[ind] = 0.0 + + if not is_complex: + y = np.tensordot(x, A, axes=2) + rng.randn(*shape) * 0.1 + return y, A, x + + A = A + 1.0j * rng.randn(n_feature, n_group, shape[-1]) + x = x + 1.0j * rng.randn(*(list(shape[:-1]) + [n_feature, n_group])) + x[ind] = 0.0 + y = np.tensordot(x, A, axes=2) + rng.randn(*shape) * 0.1 + return y, A, x + + +@pytest.mark.parametrize('shape', [[10, ], [10, 15]]) +@pytest.mark.parametrize('alpha', [1.0, 0.1]) +@pytest.mark.parametrize('is_complex', [False, True]) +def test_decrease_loss(shape, alpha, is_complex): + y, A, x_true = construct_data(shape, 10, 3, is_complex) + + it, x = group_lasso.solve(y, A, alpha, x=None, maxiter=1) + loss = group_lasso.loss(y, A, alpha, x) + for _ in range(100): + it, x = group_lasso.solve(y, A, alpha, x=x, maxiter=10) + new_loss = group_lasso.loss(y, A, alpha, x) + + assert new_loss <= loss + 1.0e-4 + loss = new_loss + assert not (x == 0).all() From eb5718ebef4ddcb5e7ca0e0029c797f48906596b Mon Sep 17 00:00:00 2001 From: keisukefujii Date: Mon, 19 Mar 2018 09:16:54 +0900 Subject: [PATCH 2/2] gpu support for group lasso --- decomp/__init__.py | 2 +- decomp/group_lasso.py | 56 ++++++++++++++++++++++++++++++--------- tests/test_group_lasso.py | 12 +++++---- 3 files changed, 52 insertions(+), 18 deletions(-) diff --git a/decomp/__init__.py b/decomp/__init__.py index 147aee9..47be5e0 100644 --- a/decomp/__init__.py +++ b/decomp/__init__.py @@ -1,4 +1,4 @@ -from . import lasso +from . import lasso, group_lasso from . import nnls from . import dictionary_learning, nmf, template_matching from . import utils, math_utils, nmf_methods diff --git a/decomp/group_lasso.py b/decomp/group_lasso.py index 2720fa3..0de4fa1 100644 --- a/decomp/group_lasso.py +++ b/decomp/group_lasso.py @@ -4,12 +4,13 @@ from .math_utils import eigen -AVAILABLE_METHODS = ['ista'] +AVAILABLE_METHODS = ['ista', 'acc_ista'] _JITTER = 1.0e-15 def predict(A, x): - return np.tensordot(x, A, axes=2) + xp = get_array_module(A, x) + return xp.tensordot(x, A, axes=2) def loss(y, A, alpha, x, mask=None): @@ -26,16 +27,17 @@ def loss(y, A, alpha, x, mask=None): ------- loss: float """ + xp = get_array_module(A, x) n_samples = y.shape[-1] if mask is not None: - n_samples = np.sum(mask, axis=-1, keepdims=True) + n_samples = xp.sum(mask, axis=-1, keepdims=True) - xA = np.tensordot(x, A, axes=2) + xA = xp.tensordot(x, A, axes=2) axes = tuple(np.arange(x.ndim - 1)) - x_abs = np.sqrt(np.real(np.sum(x * np.conj(x), axis=axes, keepdims=True))) + x_abs = xp.sqrt(xp.real(xp.sum(x * xp.conj(x), axis=axes, keepdims=True))) - fit_loss = np.sum((y - xA) * np.conj(y - xA)) / (2.0 * n_samples) - reg_loss = np.sum(alpha * x_abs) + fit_loss = xp.real(xp.sum((y - xA) * xp.conj(y - xA)) / (2.0 * n_samples)) + reg_loss = xp.sum(alpha * x_abs) return fit_loss + reg_loss @@ -132,6 +134,9 @@ def solve_fastpath(y, A, alpha, x, tol, maxiter, method, xp, mask=None, if method == 'ista': it, x = _solve_ista(y, A, alpha, x, tol=tol, maxiter=maxiter, xp=xp, positive=positive) + elif method == 'acc_ista': + it, x = _solve_acc_ista(y, A, alpha, x, tol=tol, maxiter=maxiter, + xp=xp, positive=positive) else: raise NotImplementedError('Method ' + method + ' is not yet ' 'implemented.') @@ -156,14 +161,14 @@ def soft_threshold(x, y, xp): x + y if x < -y 0 otherwise """ - axes = tuple(xp.arange(x.ndim - 1)) - x_abs = np.sqrt(np.real(xp.sum(x * xp.conj(x), axis=axes, keepdims=True))) + axes = tuple(np.arange(x.ndim - 1)) + x_abs = xp.sqrt(xp.real(xp.sum(x * xp.conj(x), axis=axes, keepdims=True))) sign = x / xp.maximum(x_abs, _JITTER) return xp.maximum(x_abs - y, 0.0) * sign def _update(yAt, AAt, x0, Lalpha_inv, L_inv, xp): - dx = xp.swapaxes(yAt - np.tensordot(x0, AAt, axes=2), -1, -2) + dx = xp.swapaxes(yAt - xp.tensordot(x0, AAt, axes=2), -1, -2) return soft_threshold(x0 + Lalpha_inv * dx, L_inv, xp) @@ -184,12 +189,39 @@ def _solve_ista(y, A, alpha, x, tol, maxiter, positive, xp): Lalpha_inv = 1.0 / radius L_inv = Lalpha_inv * alpha - yAt = np.tensordot(y, At, axes=1) # [..., g, m] + yAt = xp.tensordot(y, At, axes=1) # [..., g, m] for i in range(maxiter): x_new = updator(yAt, AAt, x, Lalpha_inv, L_inv, xp) - if np.max(xp.abs(x_new - x) - tol) < 0.0: + if xp.max(xp.abs(x_new - x) - tol) < 0.0: return i, x_new x = x_new return maxiter - 1, x + + +def _solve_acc_ista(y, A, alpha, x0, tol, maxiter, positive, xp): + """ Nesterovs' Accelerated Proximal Gradient """ + updator = _update_positive if positive else _update + + At = xp.transpose(A, axes=(2, 1, 0)) # [n, g, m] + if A.dtype.kind == 'c': + At = xp.conj(At) + AAt = xp.tensordot(A, At, axes=1) # [m, g, g, m] + AAt_flat = AAt.reshape(A.shape[0] * A.shape[1], -1) # [m*g, g*m] + radius = eigen.spectral_radius_Gershgorin(AAt_flat, xp, keepdims=False) + Lalpha_inv = 1.0 / radius + L_inv = Lalpha_inv * alpha + + yAt = xp.tensordot(y, At, axes=1) # [..., g, m] + + v = x0 + x0_new = x0 + for i in range(maxiter): + x0 = x0_new + x0_new = updator(yAt, AAt, v, Lalpha_inv, L_inv, xp=xp) + v = x0_new + i / (i + 3) * (x0_new - x0) + + if i % 10 == 0 and xp.max(xp.abs(x0_new - x0) - tol) < 0.0: + return i, x0_new + return maxiter - 1, x0 diff --git a/tests/test_group_lasso.py b/tests/test_group_lasso.py index 037297a..3687009 100644 --- a/tests/test_group_lasso.py +++ b/tests/test_group_lasso.py @@ -1,4 +1,5 @@ import numpy as np +from decomp.utils.cp_compat import numpy_or_cupy as xp import pytest from decomp import group_lasso @@ -17,25 +18,26 @@ def construct_data(shape, n_feature, n_group, is_complex, seed=0): if not is_complex: y = np.tensordot(x, A, axes=2) + rng.randn(*shape) * 0.1 - return y, A, x + return xp.array(y), xp.array(A), xp.array(x) A = A + 1.0j * rng.randn(n_feature, n_group, shape[-1]) x = x + 1.0j * rng.randn(*(list(shape[:-1]) + [n_feature, n_group])) x[ind] = 0.0 y = np.tensordot(x, A, axes=2) + rng.randn(*shape) * 0.1 - return y, A, x + return xp.array(y), xp.array(A), xp.array(x) +@pytest.mark.parametrize('method', ['ista', 'acc_ista']) @pytest.mark.parametrize('shape', [[10, ], [10, 15]]) @pytest.mark.parametrize('alpha', [1.0, 0.1]) @pytest.mark.parametrize('is_complex', [False, True]) -def test_decrease_loss(shape, alpha, is_complex): +def test_decrease_loss(method, shape, alpha, is_complex): y, A, x_true = construct_data(shape, 10, 3, is_complex) - it, x = group_lasso.solve(y, A, alpha, x=None, maxiter=1) + it, x = group_lasso.solve(y, A, alpha, x=None, maxiter=1, method=method) loss = group_lasso.loss(y, A, alpha, x) for _ in range(100): - it, x = group_lasso.solve(y, A, alpha, x=x, maxiter=10) + it, x = group_lasso.solve(y, A, alpha, x=x, maxiter=10, method=method) new_loss = group_lasso.loss(y, A, alpha, x) assert new_loss <= loss + 1.0e-4