diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java index 486ec40694f..009be82de3a 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java @@ -92,6 +92,9 @@ public class LibMatrixReorg { //use csr instead of mcsr sparse block for rexpand columns / diag v2m public static final boolean SPARSE_OUTPUTS_IN_CSR = true; + + //use legacy in-place transpose dense instead of brenner in-place transpose dense + public static final boolean TRANSPOSE_IN_PLACE_DENSE_LEGACY = true; private enum ReorgType { TRANSPOSE, @@ -1789,19 +1792,23 @@ else if(cols == rows){ transposeInPlaceTrivial(in.getDenseBlockValues(), cols, k); } else { - if(cols comp) { + start += div; + continue count; + } + } + while(test > start && test < comp); + + count = simultaneousCycleShift(matrix, moved, rows, maxIndex, count, workSize, start, comp); + start += div; + } + } + while(count > 0); + + // update cycle divisor for the next set of cycles based on prime factors + for(int ip = 0; ip < numPrimes; ip++) { + if(iExponents[ip] != exponents[ip]) { + iExponents[ip]++; + div *= primes[ip]; + continue div; + } + iExponents[ip] = 0; + div /= powers[ip]; + } + } + + denseBlock.setDims(new int[] {cols, rows}); + in.setNumColumns(rows); + in.setNumRows(cols); + } + + /** + * Performs a simultaneous cycle shift for a cycle and its companion cycle. This method ensures that distinct cycles + * or self-dual cycles are handled correctly. This method is based on: Algorithm 2, Karlsson, + * https://webapps.cs.umu.se/uminf/reports/2009/011/part1.pdf and Algorithm 467, Brenner, + * https://dl.acm.org/doi/pdf/10.1145/355611.362542. + * + * @param matrix The matrix whose elements are being shifted. + * @param moved Boolean array tracking whether an element has already been moved. + * @param rows The number of rows in the matrix. + * @param maxIndex The maximum valid index in the matrix. + * @param count The number of elements left to process. + * @param workSize The length of moved. + * @param start The starting index for the cycle shift. + * @param comp The corresponding companion index. + * @return The updated count of elements remaining to shift. + */ + private static int simultaneousCycleShift(double[] matrix, boolean[] moved, int rows, int maxIndex, int count, + int workSize, int start, int comp) { + + int orig = start; + double val = matrix[orig]; + double cval = matrix[comp]; + + while(orig >= 0) { + // decrease the remaining shift count by orig and comp + count -= 2; + orig = simultaneousCycleShiftStep(matrix, moved, rows, maxIndex, workSize, start, orig, val, cval); + } + return count; + } + + private static int simultaneousCycleShiftStep(double[] matrix, boolean[] moved, int rows, int maxIndex, + int workSize, int start, int orig, double val, double cval) { + + int comp = maxIndex - orig; + int prevOrig = prevIndexCycle(orig, rows, (maxIndex + 1) / rows); + int prevComp = maxIndex - prevOrig; + + if(orig < workSize) + moved[orig] = true; + if(comp < workSize) + moved[comp] = true; + + if(prevOrig == start) { + // cycle and comp are distinct + matrix[orig] = val; + matrix[comp] = cval; + return -1; + } + else if(prevComp == start) { + // cycle is self dual + matrix[orig] = cval; + matrix[comp] = val; + return -1; + } + + // shift the values to their next positions + matrix[orig] = matrix[prevOrig]; + matrix[comp] = matrix[prevComp]; + // update + return prevOrig; + } + + private static int prevIndexCycle(int index, int rows, int cols) { + int lastIndex = rows * cols - 1; + if(index == lastIndex) + return lastIndex; + long temp = (long) index * cols; + return (int) (temp % lastIndex); + } + + /** + * Performs prime factorization of a given number n. The method calculates the prime factors of n, their exponents, + * powers and stores the results in the provided arrays. + * + * @param n The number to be factorized. + * @param primes Array to store the unique prime factors of n. + * @param exponents Array to store the exponents of the respective prime factors. + * @param powers Array to store the powers of the respective prime factors. + * @return The number of unique prime factors. + */ + private static int primeFactorization(int n, int[] primes, int[] exponents, int[] powers) { + int pIdx = -1; + int currDiv = 0; + int rest = n; + int div = 2; + + while(rest > 1) { + int quotient = rest / div; + if(rest - div * quotient == 0) { + if(div == currDiv) { + // current divisor is the same as the last one + powers[pIdx] *= div; + exponents[pIdx]++; + } + else { + // new prime factor found + pIdx++; + if(pIdx >= primes.length) + throw new RuntimeException("Not enough space, need to expand input arrays."); + + primes[pIdx] = div; + powers[pIdx] = div; + currDiv = div; + exponents[pIdx] = 1; + } + rest = quotient; + } + else { + // only odd divs + div = (div == 2) ? 3 : div + 2; + } + } + return pIdx + 1; + } + + private static int eulerTotient(int[] primes, int[] exponents, int[] iExponents, int numPrimes, int count) { + for(int ip = 0; ip < numPrimes; ip++) { + if(iExponents[ip] == exponents[ip]) { + continue; + } + count = (count / primes[ip]) * (primes[ip] - 1); + } + return count; + } } diff --git a/src/test/java/org/apache/sysds/test/component/matrix/libMatrixReorg/TransposeInPlaceBrennerTest.java b/src/test/java/org/apache/sysds/test/component/matrix/libMatrixReorg/TransposeInPlaceBrennerTest.java new file mode 100644 index 00000000000..87e169f3f3f --- /dev/null +++ b/src/test/java/org/apache/sysds/test/component/matrix/libMatrixReorg/TransposeInPlaceBrennerTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.component.matrix.libMatrixReorg; + +import org.apache.sysds.runtime.matrix.data.LibMatrixReorg; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.test.TestUtils; +import org.junit.Test; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +public class TransposeInPlaceBrennerTest { + + @Test + public void transposeInPlaceDenseBrennerOnePrime() { + // 3*4-1 = 11 + testTransposeInPlaceDense(3, 4, 1); + } + + @Test + public void transposeInPlaceDenseBrennerTwoPrimes() { + // 4*9-1 = 5*7 + testTransposeInPlaceDense(4, 9, 0.96); + } + + @Test + public void transposeInPlaceDenseBrennerThreePrimes() { + // 2*53-1 = 3*5*7 + testTransposeInPlaceDense(2, 53, 0.52); + } + + @Test + public void transposeInPlaceDenseBrennerThreePrimesOneExpo() { + // 1151*2999-1 = (2**3)*3*143827 + testTransposeInPlaceDense(1151, 2999, 0.82); + } + + @Test + public void transposeInPlaceDenseBrennerThreePrimesAllExpos() { + // 9*10889-1 = (2**4)*(5**3)*(7**2) + testTransposeInPlaceDense(9, 10889, 0.74); + } + + @Test + public void transposeInPlaceDenseBrennerFourPrimesOneExpo() { + // 53*4421-1 = (2**3)*3*13*751 + testTransposeInPlaceDense(53, 4421, 0.75); + } + + @Test + public void transposeInPlaceDenseBrennerFivePrimes() { + // 3*3337-1 = 2*5*7*11*13 + testTransposeInPlaceDense(3, 3337, 0.68); + } + + @Test + public void transposeInPlaceDenseBrennerSixPrimesOneExpo() { + // 53*7177-1 = (2**2)*5*7*11*13*19 + testTransposeInPlaceDense(53, 7177, 0.78); + } + + @Test + public void transposeInPlaceDenseBrennerSevenPrimesThreeExpos() { + // 2087*17123-1 = (2**2)*3*(5**2)*(7**2)*11*13*17 + testTransposeInPlaceDense(2087, 17123, 0.79); + } + + @Test + public void transposeInPlaceDenseBrennerEightPrimes() { + // 347*27953-1 = 2*3*5*7*11*13*17*19 + testTransposeInPlaceDense(347, 27953, 0.86); + } + + @Test + public void transposeInPlaceDenseBrennerNinePrimes() { + // 317*703763-1 = 2*3*5*7*11*13*17*19*23 + MatrixBlock X = MatrixBlock.randOperations(317, 703763, 0.52); + + Exception exception = assertThrows(RuntimeException.class, + () -> LibMatrixReorg.transposeInPlaceDenseBrenner(X, 1)); + assertTrue(exception.getMessage().contains("Not enough space, need to expand input arrays.")); + } + + private void testTransposeInPlaceDense(int rows, int cols, double sparsity) { + MatrixBlock X = MatrixBlock.randOperations(rows, cols, sparsity); + MatrixBlock tX = LibMatrixReorg.transpose(X); + + LibMatrixReorg.transposeInPlaceDenseBrenner(X, 1); + + TestUtils.compareMatrices(X, tX, 0); + } + +}