From 593625380440713f62eb038116ca2850f523d1dc Mon Sep 17 00:00:00 2001
From: Keisuke Fujii <fujiisoup@gmail.com>
Date: Fri, 16 Mar 2018 22:44:39 +0900
Subject: [PATCH 1/2] Added group-lasso

---
 decomp/group_lasso.py      | 195 +++++++++++++++++++++++++++++++++++++
 decomp/math_utils/eigen.py |   2 +-
 tests/test_group_lasso.py  |  43 ++++++++
 3 files changed, 239 insertions(+), 1 deletion(-)
 create mode 100644 decomp/group_lasso.py
 create mode 100644 tests/test_group_lasso.py

diff --git a/decomp/group_lasso.py b/decomp/group_lasso.py
new file mode 100644
index 0000000..2720fa3
--- /dev/null
+++ b/decomp/group_lasso.py
@@ -0,0 +1,195 @@
+import numpy as np
+from .utils.cp_compat import get_array_module
+from .utils import assertion, dtype
+from .math_utils import eigen
+
+
+AVAILABLE_METHODS = ['ista']
+_JITTER = 1.0e-15
+
+
+def predict(A, x):
+    return np.tensordot(x, A, axes=2)
+
+
+def loss(y, A, alpha, x, mask=None):
+    """
+    Returns a loss of the grouped lasso
+
+    Parameters
+    ----------
+    y: nd-array, size [..., n]
+    A: 3d sparse array, size [m, g, n]
+    x: nd-array, size [..., m, g]
+
+    Returns
+    -------
+    loss: float
+    """
+    n_samples = y.shape[-1]
+    if mask is not None:
+        n_samples = np.sum(mask, axis=-1, keepdims=True)
+
+    xA = np.tensordot(x, A, axes=2)
+    axes = tuple(np.arange(x.ndim - 1))
+    x_abs = np.sqrt(np.real(np.sum(x * np.conj(x), axis=axes, keepdims=True)))
+
+    fit_loss = np.sum((y - xA) * np.conj(y - xA)) / (2.0 * n_samples)
+    reg_loss = np.sum(alpha * x_abs)
+    return fit_loss + reg_loss
+
+
+def solve(y, A, alpha, x=None, method='ista', tol=1.0e-5, maxiter=1000,
+          mask=None, **kwargs):
+    """
+    Solve a group lasso problem.
+
+    argmin_x {1 / (2 * n) * |y - xA|^2 + alpha \sum_p \sqrt{\sum_j x^2}}
+
+    Parameters
+    ----------
+    y: nd-array
+        Data to be fitted, size [..., n]
+    A: 3d-array
+        Design matrix, sized [g, m, n]. g: group size. m: feature size.
+    x: 2d-array or None.
+        Initial guess of the solution, sized [..., g, m]
+    alpha: float or nd-array
+        Regularization parameter.
+    axes: integer or a tuple of integers.
+        Which axis should be grouped.
+    """
+    # Check all the class are numpy or cupy
+    xp = get_array_module(y, A, x, mask)
+
+    if x is None:
+        x = xp.zeros(y.shape[:-1] + A.shape[:-1], dtype=y.dtype)
+
+    assertion.assert_dtypes(y=y, A=A, x=x)
+    assertion.assert_dtypes(mask=mask, dtypes='f')
+    assertion.assert_nonnegative(mask)
+    assertion.assert_ndim('A', A, ndim=3)
+    assertion.assert_shapes('x', x, 'A', A, axes=2)
+    assertion.assert_shapes('y', y, 'x', x,
+                            axes=np.arange(x.ndim - 2).tolist())
+    assertion.assert_shapes('y', y, 'A', A, axes=[-1])
+    if mask is not None and mask.ndim == 1:
+        assertion.assert_shapes('y', y, 'mask', mask, axes=[-1])
+    else:
+        assertion.assert_shapes('y', y, 'mask', mask)
+    if method not in AVAILABLE_METHODS:
+        raise ValueError('Available methods are {0:s}. Given {1:s}'.format(
+                            str(AVAILABLE_METHODS), method))
+
+    assert A.dtype.kind != 'c' or method[-4:] != '_pos'
+    return solve_fastpath(y, A, alpha, x, tol, maxiter, method, xp, mask=mask,
+                          **kwargs)
+
+
+def solve_fastpath(y, A, alpha, x, tol, maxiter, method, xp, mask=None,
+                   **kwargs):
+    """ fast path for group lasso, without default value setting and
+    shape/dtype assertions.
+
+    In this method, some correction takes place,
+
+    alpha scaling:
+        We changed the model from
+            argmin_x {1 / (2 * n) * |y - xA|^2 - alpha |x|}
+        to
+            argmin_x {1 / 2 * |y - xA|^2 - alpha |x|}
+        by scaling alpha by n.
+        (Make sure with mask case, n is the number of valid entries)
+
+    A scaling
+        We also scale A, so that [AAt]_i,i is 1.
+    """
+    positive = False
+    if method[-4:] == '_pos':
+        method = method[:-4]
+        positive = True
+
+    if mask is not None and mask.ndim == 1:
+        y = y * mask
+        A = A * mask
+
+    # A scaling
+    if A.dtype.kind != 'c':
+        AAt_diag_sqrt = xp.sqrt(xp.sum(xp.square(A), axis=-1))  # size [g, m]
+    else:
+        AAt_diag_sqrt = xp.sqrt(xp.sum(xp.real(xp.conj(A) * A), axis=-1))
+    A = A / xp.expand_dims(AAt_diag_sqrt, axis=-1)
+    alpha = alpha / AAt_diag_sqrt  # size [g, m]
+    tol = tol * AAt_diag_sqrt
+    x = x * AAt_diag_sqrt
+
+    if mask is None or mask.ndim == 1:
+        # alpha scaling
+        if mask is not None:  # mask.ndim == 1
+            alpha = alpha * xp.sum(mask, axis=-1)
+        else:
+            alpha = alpha * A.shape[-1]
+        if method == 'ista':
+            it, x = _solve_ista(y, A, alpha, x, tol=tol, maxiter=maxiter,
+                                xp=xp, positive=positive)
+        else:
+            raise NotImplementedError('Method ' + method + ' is not yet '
+                                      'implemented.')
+    else:
+        raise NotImplementedError('Method ' + method + ' is not yet '
+                                  'implemented with mask.')
+    # not forget to restore x value.
+    return it, x / AAt_diag_sqrt
+
+
+def soft_threshold(x, y, xp):
+    """
+    soft-threasholding function
+
+    x: nd array.
+    y: positive float (array like)
+
+    Returns
+    -------
+    if x is float
+        x - y if x > y
+        x + y if x < -y
+        0 otherwise
+    """
+    axes = tuple(xp.arange(x.ndim - 1))
+    x_abs = np.sqrt(np.real(xp.sum(x * xp.conj(x), axis=axes, keepdims=True)))
+    sign = x / xp.maximum(x_abs, _JITTER)
+    return xp.maximum(x_abs - y, 0.0) * sign
+
+
+def _update(yAt, AAt, x0, Lalpha_inv, L_inv, xp):
+    dx = xp.swapaxes(yAt - np.tensordot(x0, AAt, axes=2), -1, -2)
+    return soft_threshold(x0 + Lalpha_inv * dx, L_inv, xp)
+
+
+def _update_positive(yAt, AAt, x0, Lalpha_inv, L_inv):
+    raise NotImplementedError
+
+
+def _solve_ista(y, A, alpha, x, tol, maxiter, positive, xp):
+    """ Fast path to solve lasso by ista method """
+    updator = _update_positive if positive else _update
+
+    At = xp.transpose(A, axes=(2, 1, 0))  # [n, g, m]
+    if A.dtype.kind == 'c':
+        At = xp.conj(At)
+    AAt = xp.tensordot(A, At, axes=1)  # [m, g, g, m]
+    AAt_flat = AAt.reshape(A.shape[0] * A.shape[1], -1)  # [m*g, g*m]
+    radius = eigen.spectral_radius_Gershgorin(AAt_flat, xp, keepdims=False)
+    Lalpha_inv = 1.0 / radius
+    L_inv = Lalpha_inv * alpha
+
+    yAt = np.tensordot(y, At, axes=1)  # [..., g, m]
+
+    for i in range(maxiter):
+        x_new = updator(yAt, AAt, x, Lalpha_inv, L_inv, xp)
+        if np.max(xp.abs(x_new - x) - tol) < 0.0:
+            return i, x_new
+        x = x_new
+
+    return maxiter - 1, x
diff --git a/decomp/math_utils/eigen.py b/decomp/math_utils/eigen.py
index 7d9e675..e1df19f 100644
--- a/decomp/math_utils/eigen.py
+++ b/decomp/math_utils/eigen.py
@@ -17,4 +17,4 @@ def spectral_radius_Gershgorin(X, xp, keepdims=False):
     X should be a matrix or batch of matrices, shape [..., n, n].
     The return shape is [..., 1]
     """
-    return xp.max(xp.sum(xp.abs(X), axis=-2), axis=-1, keepdims=True)
+    return xp.max(xp.sum(xp.abs(X), axis=-2), axis=-1, keepdims=keepdims)
diff --git a/tests/test_group_lasso.py b/tests/test_group_lasso.py
new file mode 100644
index 0000000..037297a
--- /dev/null
+++ b/tests/test_group_lasso.py
@@ -0,0 +1,43 @@
+import numpy as np
+import pytest
+from decomp import group_lasso
+
+
+def construct_data(shape, n_feature, n_group, is_complex, seed=0):
+    """
+    Construct an example data
+    """
+    rng = np.random.RandomState(seed)
+
+    A = rng.randn(n_feature, n_group, shape[-1])
+    x = rng.randn(*(list(shape[:-1]) + [n_feature, n_group]))
+
+    ind = rng.choice(np.arange(n_group), int(n_group / 2))
+    x[ind] = 0.0
+
+    if not is_complex:
+        y = np.tensordot(x, A, axes=2) + rng.randn(*shape) * 0.1
+        return y, A, x
+
+    A = A + 1.0j * rng.randn(n_feature, n_group, shape[-1])
+    x = x + 1.0j * rng.randn(*(list(shape[:-1]) + [n_feature, n_group]))
+    x[ind] = 0.0
+    y = np.tensordot(x, A, axes=2) + rng.randn(*shape) * 0.1
+    return y, A, x
+
+
+@pytest.mark.parametrize('shape', [[10, ], [10, 15]])
+@pytest.mark.parametrize('alpha', [1.0, 0.1])
+@pytest.mark.parametrize('is_complex', [False, True])
+def test_decrease_loss(shape, alpha, is_complex):
+    y, A, x_true = construct_data(shape, 10, 3, is_complex)
+
+    it, x = group_lasso.solve(y, A, alpha, x=None, maxiter=1)
+    loss = group_lasso.loss(y, A, alpha, x)
+    for _ in range(100):
+        it, x = group_lasso.solve(y, A, alpha, x=x, maxiter=10)
+        new_loss = group_lasso.loss(y, A, alpha, x)
+
+        assert new_loss <= loss + 1.0e-4
+        loss = new_loss
+    assert not (x == 0).all()

From eb5718ebef4ddcb5e7ca0e0029c797f48906596b Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Mon, 19 Mar 2018 09:16:54 +0900
Subject: [PATCH 2/2] gpu support for group lasso

---
 decomp/__init__.py        |  2 +-
 decomp/group_lasso.py     | 56 ++++++++++++++++++++++++++++++---------
 tests/test_group_lasso.py | 12 +++++----
 3 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/decomp/__init__.py b/decomp/__init__.py
index 147aee9..47be5e0 100644
--- a/decomp/__init__.py
+++ b/decomp/__init__.py
@@ -1,4 +1,4 @@
-from . import lasso
+from . import lasso, group_lasso
 from . import nnls
 from . import dictionary_learning, nmf, template_matching
 from . import utils, math_utils, nmf_methods
diff --git a/decomp/group_lasso.py b/decomp/group_lasso.py
index 2720fa3..0de4fa1 100644
--- a/decomp/group_lasso.py
+++ b/decomp/group_lasso.py
@@ -4,12 +4,13 @@
 from .math_utils import eigen
 
 
-AVAILABLE_METHODS = ['ista']
+AVAILABLE_METHODS = ['ista', 'acc_ista']
 _JITTER = 1.0e-15
 
 
 def predict(A, x):
-    return np.tensordot(x, A, axes=2)
+    xp = get_array_module(A, x)
+    return xp.tensordot(x, A, axes=2)
 
 
 def loss(y, A, alpha, x, mask=None):
@@ -26,16 +27,17 @@ def loss(y, A, alpha, x, mask=None):
     -------
     loss: float
     """
+    xp = get_array_module(A, x)
     n_samples = y.shape[-1]
     if mask is not None:
-        n_samples = np.sum(mask, axis=-1, keepdims=True)
+        n_samples = xp.sum(mask, axis=-1, keepdims=True)
 
-    xA = np.tensordot(x, A, axes=2)
+    xA = xp.tensordot(x, A, axes=2)
     axes = tuple(np.arange(x.ndim - 1))
-    x_abs = np.sqrt(np.real(np.sum(x * np.conj(x), axis=axes, keepdims=True)))
+    x_abs = xp.sqrt(xp.real(xp.sum(x * xp.conj(x), axis=axes, keepdims=True)))
 
-    fit_loss = np.sum((y - xA) * np.conj(y - xA)) / (2.0 * n_samples)
-    reg_loss = np.sum(alpha * x_abs)
+    fit_loss = xp.real(xp.sum((y - xA) * xp.conj(y - xA)) / (2.0 * n_samples))
+    reg_loss = xp.sum(alpha * x_abs)
     return fit_loss + reg_loss
 
 
@@ -132,6 +134,9 @@ def solve_fastpath(y, A, alpha, x, tol, maxiter, method, xp, mask=None,
         if method == 'ista':
             it, x = _solve_ista(y, A, alpha, x, tol=tol, maxiter=maxiter,
                                 xp=xp, positive=positive)
+        elif method == 'acc_ista':
+            it, x = _solve_acc_ista(y, A, alpha, x, tol=tol, maxiter=maxiter,
+                                    xp=xp, positive=positive)
         else:
             raise NotImplementedError('Method ' + method + ' is not yet '
                                       'implemented.')
@@ -156,14 +161,14 @@ def soft_threshold(x, y, xp):
         x + y if x < -y
         0 otherwise
     """
-    axes = tuple(xp.arange(x.ndim - 1))
-    x_abs = np.sqrt(np.real(xp.sum(x * xp.conj(x), axis=axes, keepdims=True)))
+    axes = tuple(np.arange(x.ndim - 1))
+    x_abs = xp.sqrt(xp.real(xp.sum(x * xp.conj(x), axis=axes, keepdims=True)))
     sign = x / xp.maximum(x_abs, _JITTER)
     return xp.maximum(x_abs - y, 0.0) * sign
 
 
 def _update(yAt, AAt, x0, Lalpha_inv, L_inv, xp):
-    dx = xp.swapaxes(yAt - np.tensordot(x0, AAt, axes=2), -1, -2)
+    dx = xp.swapaxes(yAt - xp.tensordot(x0, AAt, axes=2), -1, -2)
     return soft_threshold(x0 + Lalpha_inv * dx, L_inv, xp)
 
 
@@ -184,12 +189,39 @@ def _solve_ista(y, A, alpha, x, tol, maxiter, positive, xp):
     Lalpha_inv = 1.0 / radius
     L_inv = Lalpha_inv * alpha
 
-    yAt = np.tensordot(y, At, axes=1)  # [..., g, m]
+    yAt = xp.tensordot(y, At, axes=1)  # [..., g, m]
 
     for i in range(maxiter):
         x_new = updator(yAt, AAt, x, Lalpha_inv, L_inv, xp)
-        if np.max(xp.abs(x_new - x) - tol) < 0.0:
+        if xp.max(xp.abs(x_new - x) - tol) < 0.0:
             return i, x_new
         x = x_new
 
     return maxiter - 1, x
+
+
+def _solve_acc_ista(y, A, alpha, x0, tol, maxiter, positive, xp):
+    """ Nesterovs' Accelerated Proximal Gradient """
+    updator = _update_positive if positive else _update
+
+    At = xp.transpose(A, axes=(2, 1, 0))  # [n, g, m]
+    if A.dtype.kind == 'c':
+        At = xp.conj(At)
+    AAt = xp.tensordot(A, At, axes=1)  # [m, g, g, m]
+    AAt_flat = AAt.reshape(A.shape[0] * A.shape[1], -1)  # [m*g, g*m]
+    radius = eigen.spectral_radius_Gershgorin(AAt_flat, xp, keepdims=False)
+    Lalpha_inv = 1.0 / radius
+    L_inv = Lalpha_inv * alpha
+
+    yAt = xp.tensordot(y, At, axes=1)  # [..., g, m]
+
+    v = x0
+    x0_new = x0
+    for i in range(maxiter):
+        x0 = x0_new
+        x0_new = updator(yAt, AAt, v, Lalpha_inv, L_inv, xp=xp)
+        v = x0_new + i / (i + 3) * (x0_new - x0)
+
+        if i % 10 == 0 and xp.max(xp.abs(x0_new - x0) - tol) < 0.0:
+            return i, x0_new
+    return maxiter - 1, x0
diff --git a/tests/test_group_lasso.py b/tests/test_group_lasso.py
index 037297a..3687009 100644
--- a/tests/test_group_lasso.py
+++ b/tests/test_group_lasso.py
@@ -1,4 +1,5 @@
 import numpy as np
+from decomp.utils.cp_compat import numpy_or_cupy as xp
 import pytest
 from decomp import group_lasso
 
@@ -17,25 +18,26 @@ def construct_data(shape, n_feature, n_group, is_complex, seed=0):
 
     if not is_complex:
         y = np.tensordot(x, A, axes=2) + rng.randn(*shape) * 0.1
-        return y, A, x
+        return xp.array(y), xp.array(A), xp.array(x)
 
     A = A + 1.0j * rng.randn(n_feature, n_group, shape[-1])
     x = x + 1.0j * rng.randn(*(list(shape[:-1]) + [n_feature, n_group]))
     x[ind] = 0.0
     y = np.tensordot(x, A, axes=2) + rng.randn(*shape) * 0.1
-    return y, A, x
+    return xp.array(y), xp.array(A), xp.array(x)
 
 
+@pytest.mark.parametrize('method', ['ista', 'acc_ista'])
 @pytest.mark.parametrize('shape', [[10, ], [10, 15]])
 @pytest.mark.parametrize('alpha', [1.0, 0.1])
 @pytest.mark.parametrize('is_complex', [False, True])
-def test_decrease_loss(shape, alpha, is_complex):
+def test_decrease_loss(method, shape, alpha, is_complex):
     y, A, x_true = construct_data(shape, 10, 3, is_complex)
 
-    it, x = group_lasso.solve(y, A, alpha, x=None, maxiter=1)
+    it, x = group_lasso.solve(y, A, alpha, x=None, maxiter=1, method=method)
     loss = group_lasso.loss(y, A, alpha, x)
     for _ in range(100):
-        it, x = group_lasso.solve(y, A, alpha, x=x, maxiter=10)
+        it, x = group_lasso.solve(y, A, alpha, x=x, maxiter=10, method=method)
         new_loss = group_lasso.loss(y, A, alpha, x)
 
         assert new_loss <= loss + 1.0e-4