From 4f8f6b5d741141fad8a76563d7aae1e970427325 Mon Sep 17 00:00:00 2001
From: Boyd Duffee <boyd.duffee@gmail.com>
Date: Tue, 21 Jan 2025 22:00:11 +0000
Subject: [PATCH 1/2] Adding the matmul benchmark from plb2

---
 examples/Benchmark/tasks/Guide.md             | 19 ++++++++++++++
 .../Benchmark/tasks/matrix_multiplication.pl  | 26 +++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 examples/Benchmark/tasks/Guide.md
 create mode 100755 examples/Benchmark/tasks/matrix_multiplication.pl

diff --git a/examples/Benchmark/tasks/Guide.md b/examples/Benchmark/tasks/Guide.md
new file mode 100644
index 000000000..6c40ec2e8
--- /dev/null
+++ b/examples/Benchmark/tasks/Guide.md
@@ -0,0 +1,19 @@
+# A Guide to Benchmarking with PDL
+
+## Tasks
+
+This is a collection of benchmarking tasks collected from comparison sites,
+such as [plb2](https://github.com/attractivechaos/plb2),
+[FPBench](https://github.com/FPBench/FPBench) and
+the [Benchmarks Game](https://benchmarksgame-team.pages.debian.net/benchmarksgame/index.html).
+Yes, we want PDL to be the fastest dog in the race, but more importantly it lets
+our developers know when their latest commit has degraded performance.
+
+### Matrix multiplication
+
+From [plb2](https://github.com/attractivechaos/plb2/tree/master/src/perl), it creates
+two square matrices and multiplies them together (the inner product, `x`).
+
+This PDL script is more than 65 times faster than their Perl script.
+I've tried a few variations to find a faster version, but most gains are within
+the timing variation.
diff --git a/examples/Benchmark/tasks/matrix_multiplication.pl b/examples/Benchmark/tasks/matrix_multiplication.pl
new file mode 100755
index 000000000..4c6655d84
--- /dev/null
+++ b/examples/Benchmark/tasks/matrix_multiplication.pl
@@ -0,0 +1,26 @@
+#!/usr/bin/perl
+#
+# Matrix multiplication of 2 square matrices
+# Implements matmul from https://github.com/attractivechaos/plb2
+# Boyd Duffee, 2025
+
+use warnings;
+use strict;
+use PDL;
+
+my $n = $ARGV[0] || 1500;
+$n = int($n/2) * 2;
+
+my $a = generate_matrix($n);
+my $b = generate_matrix($n);
+
+my $x = $a x $b;
+print $x->at($n/2, $n/2), "\n";
+
+sub generate_matrix {
+    my $n = shift;
+    my $c = 1 / $n / $n;
+    my $i = xvals($n, $n);
+    my $j = yvals($n, $n);
+    return $c * ($i - $j) * ($i + $j);
+}

From c9d5f5cc68edf4afeea722b9ff0bc98d10577d56 Mon Sep 17 00:00:00 2001
From: Boyd Duffee <boyd.duffee@gmail.com>
Date: Tue, 28 Jan 2025 20:35:42 +0000
Subject: [PATCH 2/2] Renamed Guide to README.md, added instructions and
 rebuild MANIFEST

---
 MANIFEST                           |  2 +
 examples/Benchmark/tasks/Guide.md  | 19 ----------
 examples/Benchmark/tasks/README.md | 61 ++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 19 deletions(-)
 delete mode 100644 examples/Benchmark/tasks/Guide.md
 create mode 100644 examples/Benchmark/tasks/README.md

diff --git a/MANIFEST b/MANIFEST
index 8edfc5c0b..fa539be18 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -4,6 +4,8 @@ examples/Benchmark/Bench.pm
 examples/Benchmark/Bench.xs
 examples/Benchmark/Makefile.PL
 examples/Benchmark/README.md
+examples/Benchmark/tasks/matrix_multiplication.pl
+examples/Benchmark/tasks/README.md
 examples/Benchmark/time.pl
 examples/earth-interp.pl
 examples/earth.txt
diff --git a/examples/Benchmark/tasks/Guide.md b/examples/Benchmark/tasks/Guide.md
deleted file mode 100644
index 6c40ec2e8..000000000
--- a/examples/Benchmark/tasks/Guide.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# A Guide to Benchmarking with PDL
-
-## Tasks
-
-This is a collection of benchmarking tasks collected from comparison sites,
-such as [plb2](https://github.com/attractivechaos/plb2),
-[FPBench](https://github.com/FPBench/FPBench) and
-the [Benchmarks Game](https://benchmarksgame-team.pages.debian.net/benchmarksgame/index.html).
-Yes, we want PDL to be the fastest dog in the race, but more importantly it lets
-our developers know when their latest commit has degraded performance.
-
-### Matrix multiplication
-
-From [plb2](https://github.com/attractivechaos/plb2/tree/master/src/perl), it creates
-two square matrices and multiplies them together (the inner product, `x`).
-
-This PDL script is more than 65 times faster than their Perl script.
-I've tried a few variations to find a faster version, but most gains are within
-the timing variation.
diff --git a/examples/Benchmark/tasks/README.md b/examples/Benchmark/tasks/README.md
new file mode 100644
index 000000000..42b986d0b
--- /dev/null
+++ b/examples/Benchmark/tasks/README.md
@@ -0,0 +1,61 @@
+# A Guide to Benchmarking with PDL
+
+## Tasks
+
+This is a collection of benchmarking tasks collected from comparison sites,
+such as [plb2](https://github.com/attractivechaos/plb2),
+[FPBench](https://github.com/FPBench/FPBench) and
+the [Benchmarks Game](https://benchmarksgame-team.pages.debian.net/benchmarksgame/index.html).
+Yes, we want PDL to be the fastest dog in the race, but more importantly it lets
+our developers know when their latest commit has degraded performance.
+
+### Matrix multiplication
+
+From [plb2](https://github.com/attractivechaos/plb2/tree/master/src/perl), it creates
+two square matrices and multiplies them together (the inner product, `x`).
+
+This PDL script is more than 65 times faster than their Perl script.
+I've tried a few variations to find a faster version, but most gains are within
+the timing variation.
+
+Initial measurement for PDL 2.095
+```
+Benchmark 1: ./matrix_multiplication.pl
+  Time (mean ± σ):      3.569 s ±  0.063 s    [User: 3.656 s, System: 0.088 s]
+  Range (min … max):    3.521 s …  3.710 s    10 runs
+```
+
+## Benchmarking
+
+You can run a comparison with the Perl script using [hyperfine](https://github.com/sharkdp/hyperfine)
+
+```
+hyperfine --warmup 1 'YOUR_ENV_VAR=1 path/to/matmul.pl 300' 'path/to/matrix_multiplication.pl 300'
+```
+
+I recommend low values at first because, using the default (N=1500),
+hyperfine takes over 10 minutes to measure the Perl script because it's default
+is to run each program 10 times (changed with the **[mMr]** options).
+Ideally, you want to run this on a quiet system with few other processes running.
+
+Other benchmarks in pure Perl can be found at [plb2](https://github.com/attractivechaos/plb2/tree/master/src/perl),
+
+### Strategies
+
+If you want to compare two different branches against each other,
+consider using something like `--setup 'git checkout HEAD^'` or perhaps
+just running hyperfine with the single benchmark while searching for
+the offending commit with `git bisect`. Let us know how _you_ do benchmarking.
+
+## Profiling
+
+Use [Devel::NYTProf](https://metacpan.org/pod/Devel::NYTProf) to find out where
+your script is spending its time and how many times a line is run. Running the
+profiler really slows down your code, so I will run both commands on the same line
+and come back later.
+```
+perl -d:NYTProf matrix_multiplication.pl 500; nytprofhtml --no-flame
+```
+
+* [profiling](https://github.com/PDLPorters/pdl/issues/451)
+* [perl v java](https://charlesreid1.github.io/perl-vs-java-n-queens-problem.html)