From a06ea0f37c01c47ca4251c6f95f7b49ffe05454e Mon Sep 17 00:00:00 2001 From: Sebastian Baunsgaard Date: Fri, 7 Feb 2025 17:50:00 +0100 Subject: [PATCH] [SYSTEMDS-3832] MatrixBlock Append performance This commit contains a new performance measuring script for appending matrices. See the PR for performance details Closes #2223 --- .../org/apache/sysds/performance/Main.java | 4 + .../org/apache/sysds/performance/README.md | 7 ++ .../apache/sysds/performance/TimingUtils.java | 88 +++++++++++-------- .../performance/compression/APerfTest.java | 25 ++++-- .../performance/matrix/MatrixAppend.java | 86 ++++++++++++++++++ src/test/scripts/performance/append.sh | 43 +++++++++ 6 files changed, 210 insertions(+), 43 deletions(-) create mode 100644 src/test/java/org/apache/sysds/performance/matrix/MatrixAppend.java create mode 100755 src/test/scripts/performance/append.sh diff --git a/src/test/java/org/apache/sysds/performance/Main.java b/src/test/java/org/apache/sysds/performance/Main.java index d7f85c9b788..fc749b56df5 100644 --- a/src/test/java/org/apache/sysds/performance/Main.java +++ b/src/test/java/org/apache/sysds/performance/Main.java @@ -31,6 +31,7 @@ import org.apache.sysds.performance.generators.GenMatrices; import org.apache.sysds.performance.generators.IGenerate; import org.apache.sysds.performance.generators.MatrixFile; +import org.apache.sysds.performance.matrix.MatrixAppend; import org.apache.sysds.performance.matrix.MatrixBinaryCellPerf; import org.apache.sysds.performance.matrix.MatrixMulPerformance; import org.apache.sysds.performance.matrix.MatrixReplacePerf; @@ -135,6 +136,9 @@ private static void exec(int prog, String[] args) throws Exception { case 1007: Transform.main(args); break; + case 1008: + MatrixAppend.main(args); + break; default: break; } diff --git a/src/test/java/org/apache/sysds/performance/README.md b/src/test/java/org/apache/sysds/performance/README.md index 20d2757c805..4945afd9ab5 100644 --- a/src/test/java/org/apache/sysds/performance/README.md +++ b/src/test/java/org/apache/sysds/performance/README.md @@ -89,3 +89,10 @@ transform encode ```bash java -jar -agentpath:$HOME/Programs/profiler/lib/libasyncProfiler.so=start,event=cpu,file=temp/log.html -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1007 ``` + + +append matrix sequence + +```bash +./src/test/scripts/performance/append.sh +``` diff --git a/src/test/java/org/apache/sysds/performance/TimingUtils.java b/src/test/java/org/apache/sysds/performance/TimingUtils.java index 506d49cc861..e1933438f5e 100644 --- a/src/test/java/org/apache/sysds/performance/TimingUtils.java +++ b/src/test/java/org/apache/sysds/performance/TimingUtils.java @@ -32,51 +32,26 @@ public interface TimingUtils { /** A specification enum for the type of statistics to gather from the time measurements */ public enum StatsType { - MEAN_STD; + MEAN_STD, MEAN_STD_Q1; } /** The specified measurement to use in this case. Can be set from any of the programs */ - public static StatsType st = StatsType.MEAN_STD; - - // /** - // * Time the given function call - // * - // * @param f The function to execute - // * @return The time it took - // */ - // public static double time(F f) { - // Timing time = new Timing(true); - // f.run(); - // return time.stop(); - // } - - // /** - // * Time the function and print using the string given prepended. - // * - // * @param f The function to time - // * @param p The print statement - // */ - // public static void time(F f, String p) { - // Timing time = new Timing(true); - // f.run(); - // System.out.print(p); - // System.out.println(time.stop()); - // } + // public static StatsType st = StatsType.MEAN_STD; -/** + /** * Time the function given assuming that it should put result into the given time array at index i. * - * @param f The function to time + * @param f The function to time * @param rep the number of repetitions */ public static double[] time(F f, int rep) { double[] times = new double[rep]; - for(int i = 0; i < rep; i ++) + for(int i = 0; i < rep; i++) time(f, times, i); - + return times; } - + /** * Time the function given assuming that it should put result into the given time array at index i. * @@ -116,14 +91,31 @@ public static double[] time(F f, F c, F b, int rep, IGenerate bq) throws Inte } /** - * Calculate the statistics of the times executed The default is to calculate the mean and standard deviation and - * return that as a string + * Calculate the statistics of the times executed + *

+ * The default is to calculate the mean and standard deviation and return that as a string * - * @param v The times observed + * @param v The times observed + * @param st The type of stats to print * @return The status string. */ public static String stats(double[] v) { + return statsMeanSTD(v); + } + + /** + * Calculate the statistics of the times executed, given the stats type provided + *

+ * The default is to calculate the mean and standard deviation and return that as a string + * + * @param v The times observed + * @param st The type of stats to print + * @return The status string. + */ + public static String stats(double[] v, StatsType st) { switch(st) { + case MEAN_STD_Q1: + return statsMeanSTDQ1(v); case MEAN_STD: default: return statsMeanSTD(v); @@ -151,6 +143,32 @@ private static String statsMeanSTD(double[] v) { return String.format("%8.3f+-%7.3f ms", mean, std); } + private static String statsMeanSTDQ1(double[] v) { + final int l = v.length; + final int remove = (int) Math.floor(l * 0.05); + Arrays.sort(v); + + double q1 = v[v.length - 1 - (int) (Math.floor((double) v.length / 100))]; + double q2p5 = v[v.length - 1 - (int) (Math.floor((double) v.length / 40))]; + double q5 = v[v.length - 1 - (int) (Math.floor((double) v.length / 20))]; + double q10 = v[v.length - 1 - (int) (Math.floor((double) v.length / 10))]; + + double total = 0; + final int el = v.length - remove * 2; + for(int i = remove; i < l - remove; i++) + total += v[i]; + + double mean = total / el; + + double var = 0; + for(int i = remove; i < l - remove; i++) + var += Math.pow(Math.abs(v[i] - mean), 2); + + double std = Math.sqrt(var / el); + + return String.format("%8.3f+-%7.3f ms [q1:%7.3f, q2.5:%7.3f, q5:%7.3f, q10:%7.3f]", mean, std, q1, q2p5, q5, q10); + } + /** * Interface method to enable timed calling from other Classes */ diff --git a/src/test/java/org/apache/sysds/performance/compression/APerfTest.java b/src/test/java/org/apache/sysds/performance/compression/APerfTest.java index 2c49393384f..0ac88233f54 100644 --- a/src/test/java/org/apache/sysds/performance/compression/APerfTest.java +++ b/src/test/java/org/apache/sysds/performance/compression/APerfTest.java @@ -23,6 +23,7 @@ import org.apache.sysds.performance.TimingUtils; import org.apache.sysds.performance.TimingUtils.F; +import org.apache.sysds.performance.TimingUtils.StatsType; import org.apache.sysds.performance.generators.IGenerate; public abstract class APerfTest { @@ -39,19 +40,27 @@ public abstract class APerfTest { /** Warmup iterations */ protected final int W; + /** The type of statistics to use */ + protected final StatsType st; + protected APerfTest(int N, IGenerate gen) { - ret = new ArrayList<>(N); - this.gen = gen; - this.N = N; - this.W = 10; + this(N, 10, gen, StatsType.MEAN_STD); } + protected APerfTest(int N, IGenerate gen, StatsType st) { + this(N, 10, gen, st); + } protected APerfTest(int N, int W, IGenerate gen) { + this(N, W, gen, StatsType.MEAN_STD); + } + + protected APerfTest(int N, int W, IGenerate gen, StatsType st) { ret = new ArrayList<>(N); this.gen = gen; this.N = N; - this.W = 10; + this.W = W; + this.st = st; } protected void execute(F f, String name) throws InterruptedException { @@ -70,7 +79,7 @@ protected void execute(F f, F c, F b, String name) throws InterruptedException { ret.clear(); double[] times = TimingUtils.time(f, c, b, N, gen); String retS = makeResString(times); - System.out.println(String.format("%35s, %s, %10s", name, TimingUtils.stats(times), retS)); + System.out.println(String.format("%35s, %s, %10s", name, TimingUtils.stats(times, st), retS)); } protected void warmup(F f, int n) throws InterruptedException { @@ -92,7 +101,7 @@ protected void execute(F f, F c, F b, String name, int N) throws InterruptedExce ret.clear(); double[] times = TimingUtils.time(f, c, b, N, gen); String retS = makeResString(times); - System.out.println(String.format("%35s, %s, %10s", name, TimingUtils.stats(times), retS)); + System.out.println(String.format("%35s, %s, %10s", name, TimingUtils.stats(times, st), retS)); } protected abstract String makeResString(); @@ -106,7 +115,7 @@ public String toString() { StringBuilder sb = new StringBuilder(); sb.append(String.format("%20s ", this.getClass().getSimpleName())); sb.append(" Repetitions: ").append(N).append("\n"); - sb.append(String.format("%20s ","Generator:")); + sb.append(String.format("%20s ", "Generator:")); sb.append(gen); sb.append("\n"); return sb.toString(); diff --git a/src/test/java/org/apache/sysds/performance/matrix/MatrixAppend.java b/src/test/java/org/apache/sysds/performance/matrix/MatrixAppend.java new file mode 100644 index 00000000000..75554bb830b --- /dev/null +++ b/src/test/java/org/apache/sysds/performance/matrix/MatrixAppend.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.performance.matrix; + +import org.apache.sysds.performance.TimingUtils.StatsType; +import org.apache.sysds.performance.compression.APerfTest; +import org.apache.sysds.performance.generators.ConstMatrix; +import org.apache.sysds.performance.generators.GenMatrices; +import org.apache.sysds.performance.generators.IGenerate; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + +public class MatrixAppend extends APerfTest { + + final boolean cbind; + + final MatrixBlock base; + final MatrixBlock[] others; + final int n; + + public MatrixAppend(int N, int n, IGenerate gen, boolean cbind) { + super(N, 0, new ConstMatrix(gen.take()), StatsType.MEAN_STD_Q1); + this.cbind = cbind; + this.n = n; + base = gen.take(); + + others = new MatrixBlock[n]; + for(int i = 0; i < n; i++) { + others[i] = gen.take(); + } + + } + + public void run() throws Exception { + + execute(() -> append(base, others), // + String.format("appending: rows:%5d cols:%5d sp:%3.1f Blocks:%4d rep:%6d ", // + base.getNumRows(), base.getNumColumns(), base.getSparsity(), n, N)); + + } + + private void append(MatrixBlock a, MatrixBlock[] others) { + a.append(others, null, cbind); + } + + @Override + protected String makeResString() { + return ""; + } + + public static void main(String[] args) throws Exception { + IGenerate in; + int nBlocks; + int nRepeats; + if(args.length == 0) { + in = new GenMatrices(1000, 10, 10, 1.0); + nBlocks = 10; + nRepeats = 100; + } + else { + in = new GenMatrices(Integer.parseInt(args[1]), Integer.parseInt(args[2]), 10, Double.parseDouble(args[3])); + nBlocks = Integer.parseInt(args[4]); + nRepeats = Integer.parseInt(args[5]); + } + in.generate(nBlocks + 2); + + new MatrixAppend(nRepeats, nBlocks, in, false).run(); + } + +} diff --git a/src/test/scripts/performance/append.sh b/src/test/scripts/performance/append.sh new file mode 100755 index 00000000000..d2184dd472f --- /dev/null +++ b/src/test/scripts/performance/append.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +mvn package > /dev/null +java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 100 100 1.0 1 30000 +java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 100 1.0 1 3000 +java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 1000 1.0 1 3000 +java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 100 100 0.3 1 30000 +java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 100 0.3 1 3000 +java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 1000 0.3 1 3000 + +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 100 100 1.0 10 30000 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 100 1.0 10 3000 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 1000 1.0 10 1000 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 100 100 0.3 10 30000 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 100 0.3 10 3000 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 1000 0.3 10 1000 + +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 100 100 1.0 100 3000 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 100 1.0 100 300 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 1000 1.0 100 200 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 100 100 0.3 100 3000 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 100 0.3 100 2000 +# java -jar -XX:+UseNUMA target/systemds-3.3.0-SNAPSHOT-perf.jar 1008 1000 1000 0.3 100 1000 \ No newline at end of file