diff --git a/README.md b/README.md
index 034b66f..66bf7ca 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ various compiler and architectures.
 
 HPCombi was initially designed using the SSE and AVX instruction sets, and did
 not work on machines without these instructions (such as ARM). From v1.0.1
-HPCombi supports processors with other instruction sets also, via 
+HPCombi supports processors with other instruction sets also, via
 [SIMD Everywhere][]. It might be the case that the greatest performance gains
 are achieved on processors supporting the SSE and AVX instruction sets, but the
 HPCombi benchmarks indicate that there are also still significant gains on
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index bc31ad1..0faba64 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -37,7 +37,7 @@ endif()
 message(STATUS "Building benchmark")
 
 set(benchmark_src
-  bench_epu8.cpp bench_perm16.cpp bench_bmat8.cpp)
+  bench_epu8.cpp bench_perm16.cpp bench_bmat8.cpp bench_bmat16.cpp)
 
 foreach(f ${benchmark_src})
   get_filename_component(benchName ${f} NAME_WE)
diff --git a/benchmark/bench_bmat16.cpp b/benchmark/bench_bmat16.cpp
new file mode 100644
index 0000000..d54c973
--- /dev/null
+++ b/benchmark/bench_bmat16.cpp
@@ -0,0 +1,73 @@
+//****************************************************************************//
+//     Copyright (C) 2018-2024 Florent Hivert <Florent.Hivert@lisn.fr>,       //
+//                                                                            //
+//  This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi>  //
+//                                                                            //
+//  HP-Combi is free software: you can redistribute it and/or modify it       //
+//  under the terms of the GNU General Public License as published by the     //
+//  Free Software Foundation, either version 3 of the License, or             //
+//  (at your option) any later version.                                       //
+//                                                                            //
+//  HP-Combi is distributed in the hope that it will be useful, but WITHOUT   //
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or     //
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License      //
+//  for  more details.                                                        //
+//                                                                            //
+//  You should have received a copy of the GNU General Public License along   //
+//  with HP-Combi. If not, see <https://www.gnu.org/licenses/>.               //
+//****************************************************************************//
+
+#include <cstdlib>
+#include <iostream>
+#include <string>
+
+#include <catch2/benchmark/catch_benchmark.hpp>
+#include <catch2/catch_test_macros.hpp>
+
+#include "bench_fixture.hpp"
+#include "bench_main.hpp"
+
+#include "hpcombi/bmat16.hpp"
+
+namespace HPCombi {
+
+std::vector<BMat16> make_sample(size_t n) {
+    std::vector<BMat16> res{};
+    for (size_t i = 0; i < n; i++) {
+        res.push_back(BMat16::random());
+    }
+    return res;
+}
+
+std::vector<std::pair<BMat16, BMat16>> make_pair_sample(size_t n) {
+    std::vector<std::pair<BMat16, BMat16>> res{};
+    for (size_t i = 0; i < n; i++) {
+        auto x = BMat16::random();
+        res.push_back(std::make_pair(x, x));
+    }
+    return res;
+}
+
+class Fix_BMat16 {
+ public:
+    Fix_BMat16()
+        : sample(make_sample(1000)), pair_sample(make_pair_sample(1000)) {}
+    ~Fix_BMat16() {}
+    const std::vector<BMat16> sample;
+    std::vector<std::pair<BMat16, BMat16>>
+        pair_sample;  // not const, transpose2 is in place
+};
+
+TEST_CASE_METHOD(Fix_BMat16, "Transpose", "[BMat16][000]") {
+    BENCHMARK_MEM_FN(transpose, sample);
+    BENCHMARK_MEM_FN(transpose_naive, sample);
+}
+
+TEST_CASE_METHOD(Fix_BMat16, "Multiplication", "[BMat16][001]") {
+    BENCHMARK_MEM_FN_PAIR(BMat16::operator*, pair_sample);
+    BENCHMARK_MEM_FN_PAIR(mult_4bmat8, pair_sample);
+    BENCHMARK_MEM_FN_PAIR(mult_naive, pair_sample);
+    BENCHMARK_MEM_FN_PAIR(mult_naive_array, pair_sample);
+}
+
+}  // namespace HPCombi
diff --git a/benchmark/bench_bmat8.cpp b/benchmark/bench_bmat8.cpp
index 341162e..ac21074 100644
--- a/benchmark/bench_bmat8.cpp
+++ b/benchmark/bench_bmat8.cpp
@@ -70,12 +70,14 @@ TEST_CASE_METHOD(Fix_BMat8, "Transpose", "[BMat8][001]") {
     BENCHMARK_MEM_FN(transpose, sample);
     BENCHMARK_MEM_FN(transpose_mask, sample);
     BENCHMARK_MEM_FN(transpose_maskd, sample);
+    BENCHMARK_MEM_FN(transpose_naive, sample);
 }
 
 TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs", "[BMat8][002]") {
     BENCHMARK_MEM_FN_PAIR_EQ(transpose, pair_sample);
     BENCHMARK_MEM_FN_PAIR_EQ(transpose_mask, pair_sample);
     BENCHMARK_MEM_FN_PAIR_EQ(transpose_maskd, pair_sample);
+    BENCHMARK_MEM_FN_PAIR_EQ(transpose_naive, pair_sample);
     BENCHMARK("transpose2") {
         for (auto &pair : pair_sample) {
             BMat8::transpose2(pair.first, pair.second);
@@ -109,4 +111,10 @@ TEST_CASE_METHOD(Fix_BMat8, "Pair row space inclusion", "[BMat8][004]") {
     };
 }
 
+TEST_CASE_METHOD(Fix_BMat8, "Multiplication", "[BMat8][005]") {
+    BENCHMARK_MEM_FN_PAIR(BMat8::operator*, pair_sample);
+    BENCHMARK_MEM_FN_PAIR(mult_naive, pair_sample);
+    BENCHMARK_MEM_FN_PAIR(mult_naive_array, pair_sample);
+}
+
 }  // namespace HPCombi
diff --git a/include/hpcombi/bmat16.hpp b/include/hpcombi/bmat16.hpp
new file mode 100644
index 0000000..7773236
--- /dev/null
+++ b/include/hpcombi/bmat16.hpp
@@ -0,0 +1,266 @@
+//****************************************************************************//
+//    Copyright (C) 2018-2024 Finn Smith <fls3@st-andrews.ac.uk>              //
+//    Copyright (C) 2018-2024 James Mitchell <jdm3@st-andrews.ac.uk>          //
+//    Copyright (C) 2018-2024 Florent Hivert <Florent.Hivert@lisn.fr>,        //
+//                                                                            //
+//  This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi>  //
+//                                                                            //
+//  HP-Combi is free software: you can redistribute it and/or modify it       //
+//  under the terms of the GNU General Public License as published by the     //
+//  Free Software Foundation, either version 3 of the License, or             //
+//  (at your option) any later version.                                       //
+//                                                                            //
+//  HP-Combi is distributed in the hope that it will be useful, but WITHOUT   //
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or     //
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License      //
+//  for  more details.                                                        //
+//                                                                            //
+//  You should have received a copy of the GNU General Public License along   //
+//  with HP-Combi. If not, see <https://www.gnu.org/licenses/>.               //
+//****************************************************************************//
+
+// This file contains a declaration of fast boolean matrices up to dimension 16.
+
+#ifndef HPCOMBI_BMAT16_HPP_
+#define HPCOMBI_BMAT16_HPP_
+
+#include <array>       // for array
+#include <bitset>      // for bitset
+#include <cstddef>     // for size_t
+#include <cstdint>     // for uint64_t, uint8_t
+#include <functional>  // for hash, __scalar_hash
+#include <iostream>    // for ostream
+#include <memory>      // for hash
+#include <utility>     // for pair, swap
+#include <vector>      // for vector
+
+#include "debug.hpp"   // for HPCOMBI_ASSERT
+#include "bmat8.hpp"
+
+#include "simde/x86/avx2.h"
+
+namespace HPCombi {
+using xpu16 = uint16_t __attribute__((vector_size(32)));
+using xpu64 = uint64_t __attribute__((vector_size(32)));
+
+//! Converting storage type from blocks to rows of a xpu64
+//! representing a 16x16 matrix (used in BMat16).
+//!
+//! Each 64 bit unsigned int represents 4 lines of the matrix.
+xpu64 to_line(xpu64 vect);
+
+//! Converting storage type from rows to blocks of a xpu64
+//! representing a 16x16 matrix (used in BMat16).
+//! 
+//! Each 64 bit unsigned int represents one of the four 
+//! 8x8 matrix that make up a 16x16 when quartered.
+xpu64 to_block(xpu64 vect);
+
+
+
+//! Class for fast boolean matrices of dimension up to 16 x 16
+//!
+//! The methods for these small matrices over the boolean semiring
+//! are more optimised than the generic methods for boolean matrices.
+//! Note that all BMat16 are represented internally as an 16 x 16 matrix;
+//! any entries not defined by the user are taken to be 0. This does
+//! not affect the results of any calculations.
+//!
+//! BMat16 is a trivial class.
+class BMat16 {
+ public:
+    //! A default constructor.
+    //!
+    //! This constructor gives no guarantees on what the matrix will contain.
+    BMat16() noexcept = default;
+
+    //! A constructor.
+    //!
+    //! This constructor initializes a matrix with a 256-bit register
+    //! The rows are equal to the 16 chunks, of 16 bits each, 
+    //! of the binary representation of the matrix.
+    explicit BMat16(xpu64 mat) noexcept : 
+        _data{mat} {}
+
+    //! A constructor.
+    //!
+    //! This constructor initializes a matrix with 4 64 bits unsigned int
+    //! Each uint represents one of the four quarter (8x8 matrix).
+    explicit BMat16(uint64_t n0, uint64_t n1, uint64_t n2, uint64_t n3) noexcept;
+    
+    //! A constructor.
+    //!
+    //! This constructor initializes a matrix where the rows of the matrix
+    //! are the vectors in \p mat.
+    explicit BMat16(std::vector<std::vector<bool>> const &mat) noexcept;
+
+    //! A constructor.
+    //!
+    //! This is the copy constructor.
+    BMat16(BMat16 const &) noexcept = default;
+
+    //! A constructor.
+    //!
+    //! This is the move constructor.
+    BMat16(BMat16 &&) noexcept = default;
+
+    //! A constructor.
+    //!
+    //! This is the copy assignment constructor.
+    BMat16 &operator=(BMat16 const &) noexcept = default;
+
+    //! A constructor.
+    //!
+    //! This is the move assignment constructor.
+    BMat16 &operator=(BMat16 &&) noexcept = default;
+
+    //! A default destructor.
+    ~BMat16() = default;
+
+    //! Returns \c true if \c this equals \p that.
+    //!
+    //! This method checks the mathematical equality of two BMat16 objects.
+    bool operator==(BMat16 const &that) const noexcept;
+
+    //! Returns \c true if \c this does not equal \p that
+    //!
+    //! This method checks the mathematical inequality of two BMat16 objects.
+    bool operator!=(BMat16 const &that) const noexcept {
+        return !(*this == that);
+    }
+
+    //! Returns \c true if \c this is less than \p that.
+    //!
+    //! This method checks whether a BMat16 objects is less than another.
+    bool operator<(BMat16 const &that) const noexcept;
+
+    //! Returns \c true if \c this is greater than \p that.
+    //!
+    //! This method checks whether a BMat16 objects is greater than another.
+    bool operator>(BMat16 const &that) const noexcept;
+
+    //! Returns the entry in the (\p i, \p j)th position.
+    //!
+    //! This method returns the entry in the (\p i, \p j)th position.
+    //! Note that since all matrices are internally represented as 16 x 16, it
+    //! is possible to access entries that you might not believe exist.
+    bool operator()(size_t i, size_t j) const noexcept;
+
+    //! Sets the (\p i, \p j)th position to \p val.
+    //!
+    //! This method sets the (\p i, \p j)th entry of \c this to \p val.
+    //! Uses the bit twiddle for setting bits found
+    //! <a href=http://graphics.stanford.edu/~seander/bithacks.html>here</a>.
+    void set(size_t i, size_t j, bool val) noexcept;
+
+    //! Returns the array representation of \c this.
+    //!
+    //! Returns a two dimensional 16 x 16 array representing the matrix.
+    std::array<std::array<bool, 16>, 16> to_array() const noexcept;
+
+    //! Returns the bitwise or between \c this and \p that
+    //!
+    //! This method perform the bitwise operator on the matrices and
+    //! returns the result as a BMat16.
+    BMat16 operator|(BMat16 const& that) const noexcept {
+        return BMat16(_data | that._data);
+    }
+
+    //! Returns the transpose of \c this.
+    //!
+    //! Returns the standard matrix transpose of a BMat16.
+    //! Uses a naive technique, by simply iterating through all entries.
+    BMat16 transpose_naive() const noexcept;
+
+    //! Returns the transpose of \c this
+    //!
+    //! Returns the standard matrix transpose of a BMat16.
+    //! Uses the technique found in Knuth AoCP Vol. 4 Fasc. 1a, p. 15.
+    BMat16 transpose() const noexcept;
+
+    //! Returns the matrix product of \c this and the transpose of \p that
+    //!
+    //! This method returns the standard matrix product (over the
+    //! boolean semiring) of two BMat16 objects. This is faster than transposing
+    //! that and calling the product of \c this with it. Implementation uses
+    //! vector instructions.
+    BMat16 mult_transpose(BMat16 const &that) const noexcept;
+
+    //! Returns the matrix product of \c this and \p that
+    //!
+    //! This method returns the standard matrix product (over the
+    //! boolean semiring) of two BMat16 objects. 
+    //! It comes down to 8 products of 8x8 matrices, 
+    //! which make up a 16x16 when we cut it into 4.
+    BMat16 mult_4bmat8(BMat16 const &that) const noexcept;
+
+    //! Returns the matrix product of \c this and \p that
+    //!
+    //! This method returns the standard matrix product (over the
+    //! boolean semiring) of two BMat16 objects. This is a fast implementation
+    //! using transposition and vector instructions.
+    BMat16 operator*(BMat16 const &that) const noexcept {
+        return mult_transpose(that.transpose());
+    }
+
+    //! Returns the matrix product of \c this and \p that
+    //!
+    //! This method returns the standard matrix product (over the
+    //! boolean semiring) of two BMat16 objects. It performs the most naive approach
+    //! by simply iterating through all entries using the access operator of BMat16
+    BMat16 mult_naive(BMat16 const& that) const noexcept;
+
+    //! Returns the matrix product of \c this and \p that
+    //!
+    //! This method returns the standard matrix product (over the
+    //! boolean semiring) of two BMat16 objects. It performs the most naive approach
+    //! by simply iterating through all entries using array conversion.
+    BMat16 mult_naive_array(BMat16 const& that) const noexcept;
+
+    //! Returns the number of non-zero rows of \c this
+    size_t nr_rows() const noexcept;
+
+    //! Returns a \c std::vector for rows of \c this
+    // Not noexcept because it constructs a vector
+    std::vector<uint16_t> rows() const;
+
+    //! Returns the identity BMat16
+    //!
+    //! This method returns the 16 x 16 BMat16 with 1s on the main diagonal.
+    static BMat16 one(size_t dim = 16) noexcept {
+        HPCOMBI_ASSERT(dim <= 16);
+        static std::array<uint64_t, 9> const ones = {
+            0, 1, 0x201, 0x40201, 0x8040201, 0x1008040201, 0x201008040201,  0x40201008040201, 0x8040201008040201};
+        return BMat16(ones[dim >= 8 ? 8 : dim], 0, 0, ones[dim >= 8 ? dim - 8 : 0]);
+    }
+
+    //! Returns a random BMat16
+    //!
+    //! This method returns a BMat16 chosen at random.
+    // Not noexcept because random things aren't
+    static BMat16 random();
+
+    //! Returns a random square BMat16 up to dimension \p dim.
+    //!
+    //! This method returns a BMat16 chosen at random, where only the
+    //! top-left \p dim x \p dim entries may be non-zero.
+    // Not noexcept because BMat16::random above is not
+    static BMat16 random(size_t dim);
+
+    void swap(BMat16 &that) noexcept { std::swap(this->_data, that._data); }
+
+    //! Write \c this on \c os
+    // Not noexcept
+    std::ostream &write(std::ostream &os) const;
+
+
+ private:
+    xpu64 _data;
+
+};
+
+}  // namespace HPCombi
+
+#include "bmat16_impl.hpp"
+
+#endif
\ No newline at end of file
diff --git a/include/hpcombi/bmat16_impl.hpp b/include/hpcombi/bmat16_impl.hpp
new file mode 100644
index 0000000..7cc3900
--- /dev/null
+++ b/include/hpcombi/bmat16_impl.hpp
@@ -0,0 +1,303 @@
+//****************************************************************************//
+//    Copyright (C) 2018-2024 Finn Smith <fls3@st-andrews.ac.uk>              //
+//    Copyright (C) 2018-2024 James Mitchell <jdm3@st-andrews.ac.uk>          //
+//    Copyright (C) 2018-2024 Florent Hivert <Florent.Hivert@lisn.fr>,        //
+//                                                                            //
+//  This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi>  //
+//                                                                            //
+//  HP-Combi is free software: you can redistribute it and/or modify it       //
+//  under the terms of the GNU General Public License as published by the     //
+//  Free Software Foundation, either version 3 of the License, or             //
+//  (at your option) any later version.                                       //
+//                                                                            //
+//  HP-Combi is distributed in the hope that it will be useful, but WITHOUT   //
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or     //
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License      //
+//  for  more details.                                                        //
+//                                                                            //
+//  You should have received a copy of the GNU General Public License along   //
+//  with HP-Combi. If not, see <https://www.gnu.org/licenses/>.               //
+//****************************************************************************//
+
+// This file contains an implementation of fast boolean matrices up to
+// dimension 16 x 16.
+
+namespace HPCombi {
+static_assert(std::is_trivial<BMat16>(), "BMat16 is not a trivial class!");
+
+static constexpr xpu16 line{0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05, 0xe06, 0xf07, 0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05, 0xe06, 0xf07};
+static constexpr xpu16 block{0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d, 0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d};
+
+inline xpu64 to_line(xpu64 vect) {
+    return simde_mm256_shuffle_epi8(vect, line);
+}
+
+inline xpu64 to_block(xpu64 vect) {
+    return simde_mm256_shuffle_epi8(vect, block);
+}
+
+inline BMat16::BMat16(uint64_t n0, uint64_t n1, uint64_t n2, uint64_t n3) noexcept {
+    xpu64 tmp{n0, n1, n2, n3};
+    _data = to_line(tmp);
+}
+
+inline BMat16::BMat16(std::vector<std::vector<bool>> const &mat) noexcept {
+    HPCOMBI_ASSERT(mat.size() <= 16);
+    HPCOMBI_ASSERT(0 < mat.size());
+    std::array<uint64_t, 4> tmp = {0, 0, 0, 0};
+    for (int i = mat.size() - 1; i >= 0; --i) {
+        HPCOMBI_ASSERT(mat.size() == mat[i].size());
+        tmp[i/4] <<= 16 - mat.size();
+        for (int j = mat[i].size() - 1; j >= 0; --j) {
+            tmp[i/4] = (tmp[i/4] << 1) | mat[i][j];
+        }
+    }
+    _data = xpu64{tmp[0], tmp[1], tmp[2], tmp[3]};
+}
+
+inline bool BMat16::operator()(size_t i, size_t j) const noexcept {
+    return (_data[i/4] >> (16 * (i%4) + j)) & 1;
+}
+
+inline void BMat16::set(size_t i, size_t j, bool val) noexcept {
+    HPCOMBI_ASSERT(i < 16);
+    HPCOMBI_ASSERT(j < 16);
+    uint64_t a = 1;
+    a <<= 16 * (i%4) + j;
+    xpu64 mask{(i/4 == 0)*a, 
+               (i/4 == 1)*a, 
+               (i/4 == 2)*a, 
+               (i/4 == 3)*a};
+    _data ^= (-val ^ _data) & mask;
+}
+
+inline bool BMat16::operator==(BMat16 const &that) const noexcept {
+    xpu64 tmp = _data ^ that._data;
+    return simde_mm256_testz_si256(tmp, tmp);
+}
+
+inline bool BMat16::operator<(BMat16 const &that) const noexcept {
+    return _data[0] < that._data[0] || 
+            (_data[0] == that._data[0] && (_data[1] < that._data[1] || 
+            (_data[1] == that._data[1] && (_data[2] < that._data[2] || 
+            (_data[2] == that._data[2] && (_data[3] < that._data[3]))))));
+}
+
+inline bool BMat16::operator>(BMat16 const &that) const noexcept {
+    return _data[0] > that._data[0] || 
+            (_data[0] == that._data[0] && (_data[1] > that._data[1] || 
+            (_data[1] == that._data[1] && (_data[2] > that._data[2] || 
+            (_data[2] == that._data[2] && (_data[3] > that._data[3]))))));
+}
+
+inline std::array<std::array<bool, 16>, 16> BMat16::to_array() const noexcept {
+    xpu64 tmp = to_block(_data);
+    uint64_t a = tmp[0], b = tmp[1], c = tmp[2], d = tmp[3];
+    std::array<std::array<bool, 16>, 16> res;
+    for (size_t i = 0; i < 64; ++i) {
+        res[i/8][i%8]         = a & 1; a >>= 1;
+        res[i/8][8 + i%8]     = b & 1; b >>= 1;
+        res[8 + i/8][i%8]     = c & 1; c >>= 1;
+        res[8 + i/8][8 + i%8] = d & 1; d >>= 1;
+    }
+    return res;
+}
+
+inline BMat16 BMat16::transpose_naive() const noexcept {
+    uint64_t a = 0, b = 0, c = 0, d = 0;
+    for (int i = 7; i >= 0; --i) {
+        for (int j = 7; j >= 0; --j) {
+            a = (a << 1) | (*this)(j, i); 
+            b = (b << 1) | (*this)(j+8, i); 
+            c = (c << 1) | (*this)(j, i+8); 
+            d = (d << 1) | (*this)(j+8, i+8);
+        }
+    }
+    return BMat16(a, b, c, d);
+}
+
+inline BMat16 BMat16::transpose() const noexcept {
+    xpu64 tmp = to_block(_data);
+    xpu64 x = simde_mm256_set_epi64x(tmp[3], tmp[1], tmp[2], tmp[0]);
+    xpu64 y = (x ^ (x >> 7)) & (xpu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA, 0xAA00AA00AA00AA, 0xAA00AA00AA00AA});
+    x = x ^ y ^ (y << 7);
+    y = (x ^ (x >> 14)) & (xpu64{0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC});
+    x = x ^ y ^ (y << 14);
+    y = (x ^ (x >> 28)) & (xpu64{0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0});
+    x = x ^ y ^ (y << 28);
+    return BMat16(to_line(x));
+}
+
+static constexpr xpu16 rot{0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100, 0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100};
+
+inline BMat16 BMat16::mult_transpose(BMat16 const &that) const noexcept {
+    xpu16 x = _data;
+    xpu16 y1 = that._data;
+    xpu16 y2 = simde_mm256_set_epi64x(that._data[1], that._data[0], that._data[3], that._data[2]);
+    xpu16 zero = simde_mm256_setzero_si256();
+    xpu16 data = simde_mm256_setzero_si256();
+    xpu16 diag1{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000};
+    xpu16 diag2{0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+    for (size_t i = 0; i < 8; ++i) {
+        data |= ((x & y1) != zero) & diag1;
+        data |= ((x & y2) != zero) & diag2;
+        y1 = simde_mm256_shuffle_epi8(y1, rot);
+        y2 = simde_mm256_shuffle_epi8(y2, rot);
+        diag1 = simde_mm256_shuffle_epi8(diag1, rot);
+        diag2 = simde_mm256_shuffle_epi8(diag2, rot);
+    }
+    return BMat16(data);
+}
+
+inline BMat16 BMat16::mult_4bmat8(BMat16 const &that) const noexcept {
+    BMat16 tmp = that.transpose();
+    xpu64 t1 = to_block(_data), 
+          t2 = to_block(tmp._data);
+    BMat8 a1(t1[0]), b1(t1[1]), c1(t1[2]), d1(t1[3]),
+          a2(t2[0]), b2(t2[1]), c2(t2[2]), d2(t2[3]);
+    return BMat16((a1.mult_transpose(a2) | b1.mult_transpose(b2)).to_int(), 
+                  (a1.mult_transpose(c2) | b1.mult_transpose(d2)).to_int(), 
+                  (c1.mult_transpose(a2) | d1.mult_transpose(b2)).to_int(), 
+                  (c1.mult_transpose(c2) | d1.mult_transpose(d2)).to_int());
+}
+
+inline BMat16 BMat16::mult_naive(BMat16 const &that) const noexcept {
+    uint64_t a = 0, b = 0, c = 0, d = 0;
+    for (int i = 7; i >= 0; --i) {
+        for (int j = 7; j >= 0; --j) {
+            a <<= 1; b <<= 1; c <<= 1; d <<= 1;
+            for (size_t k = 0; k < 8; ++k) {
+                a |= ((*this)(i, k)     & that(k, j))     | ((*this)(i, k + 8)     & that(k + 8, j));
+                b |= ((*this)(i, k)     & that(k, j + 8)) | ((*this)(i, k + 8)     & that(k + 8, j + 8));
+                c |= ((*this)(i + 8, k) & that(k, j))     | ((*this)(i + 8, k + 8) & that(k + 8, j));
+                d |= ((*this)(i + 8, k) & that(k, j + 8)) | ((*this)(i + 8, k + 8) & that(k + 8, j + 8));
+            }
+        }
+    }
+    return BMat16(a, b, c, d);
+}
+
+inline BMat16 BMat16::mult_naive_array(BMat16 const &that) const noexcept {
+    std::array<std::array<bool, 16>, 16> tab1 = to_array(), tab2 = that.to_array();
+    uint64_t a = 0, b = 0, c = 0, d = 0;
+    for (int i = 7; i >= 0; --i) {
+        for (int j = 7; j >= 0; --j) {
+            a <<= 1; b <<= 1; c <<= 1; d <<= 1;
+            for (size_t k = 0; k < 16; ++k) {
+                a |= tab1[i][k]     & tab2[k][j];
+                b |= tab1[i][k]     & tab2[k][j + 8];
+                c |= tab1[i + 8][k] & tab2[k][j];
+                d |= tab1[i + 8][k] & tab2[k][j + 8];
+            }
+        }
+    }
+    return BMat16(a, b, c, d);
+}
+
+inline size_t BMat16::nr_rows() const noexcept{
+    size_t res = 0;
+    for (size_t i = 0; i < 16; ++i)
+        if ((_data[i/4] << (16 * (i%4)) >> 48) != 0) 
+            ++res;
+    return res;
+
+    //// Vectorized version which doesn't work due to the absence of popcnt in simde
+    // xpu16 tmp = _data, zero = simde_mm256_setzero_si256();
+    // xpu16 x = (tmp != zero);
+    // return simde_mm256_popcnt_epi16(x);
+}
+
+inline std::vector<uint16_t> BMat16::rows() const {
+    std::vector<uint16_t> rows;
+    for (size_t i = 0; i < 16; ++i) {
+        uint16_t row_rev = (_data[i/4] << (16 * (3 - i%4)) >> 48);
+
+        // The row needs to be reversed
+        uint16_t row = 0;
+        for (size_t j = 0; j < 16; ++j) {
+            row = (row << 1) | (row_rev & 1);
+            row_rev >>= 1;
+        }
+        rows.push_back(row);
+    }
+    return rows;
+}
+
+inline BMat16 BMat16::random() {
+    static std::random_device _rd;
+    static std::mt19937 _gen(_rd());
+    static std::uniform_int_distribution<uint64_t> _dist(0, 0xffffffffffffffff);
+
+    return BMat16(_dist(_gen), _dist(_gen), _dist(_gen), _dist(_gen));
+}
+
+static const constexpr std::array<xpu64, 16> ROW_MASK16 = {
+    xpu16{0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0, 0}, 
+    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff, 0},
+    xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff}
+};
+
+static const constexpr std::array<xpu64, 16> COL_MASK16 = {
+    xpu16{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 
+    xpu16{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, 
+    xpu16{4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}, 
+    xpu16{8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}, 
+    xpu16{0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, 
+    xpu16{0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 
+    xpu16{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40}, 
+    xpu16{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, 
+    xpu16{0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100}, 
+    xpu16{0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200}, 
+    xpu16{0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400}, 
+    xpu16{0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800}, 
+    xpu16{0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000}, 
+    xpu16{0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000}, 
+    xpu16{0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000}, 
+    xpu16{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000}
+};
+
+inline BMat16 BMat16::random(size_t const dim) {
+    // TO DO : Instead of nulling all the cols/rows one by one, one could do that at once with the proper mask
+    HPCOMBI_ASSERT(0 < dim && dim <= 16);
+    BMat16 bm = BMat16::random();
+    for (size_t i = dim; i < 16; ++i) {
+        bm._data &= ~ROW_MASK16[i];
+        bm._data &= ~COL_MASK16[i];
+    }
+    return bm;
+}
+
+inline std::ostream &BMat16::write(std::ostream &os) const {
+    for (size_t i = 0; i < 16; ++i) {
+        for (size_t j = 0; j < 16; ++j) {
+            os << (*this)(i, j);
+        }
+        os << "\n";
+    }
+    return os;
+}
+
+
+} // namespace HPCombi
+
+namespace std {
+
+// Not noexcept because BMat16::write isn't
+inline std::ostream &operator<<(std::ostream &os, HPCombi::BMat16 const &bm) {
+    return bm.write(os);
+}
+
+}  // namespace std
diff --git a/include/hpcombi/bmat8.hpp b/include/hpcombi/bmat8.hpp
index cb789fa..f4132ff 100644
--- a/include/hpcombi/bmat8.hpp
+++ b/include/hpcombi/bmat8.hpp
@@ -133,7 +133,7 @@ class BMat8 {
     //!
     //! This method sets the (\p i, \p j)th entry of \c this to \p val.
     //! Uses the bit twiddle for setting bits found
-    //! <a href=http://graphics.stanford.edu/~seander/bithacks>here</a>.
+    //! <a href=http://graphics.stanford.edu/~seander/bithacks.html>here</a>.
     void set(size_t i, size_t j, bool val) noexcept;
 
     //! Returns the integer representation of \c this.
@@ -143,6 +143,25 @@ class BMat8 {
     //! from top to bottom) and then this sequence as an unsigned int.
     uint64_t to_int() const noexcept { return _data; }
 
+    //! Returns the array representation of \c this.
+    //!
+    //! Returns a two dimensional 8 x 8 array representing the matrix.
+    std::array<std::array<bool, 8>, 8> to_array() const noexcept;
+
+    //! Returns the bitwise or between \c this and \p that
+    //!
+    //! This method perform the bitwise operator on the matrices and
+    //! returns the result as a BMat8
+    BMat8 operator|(BMat8 const& that) const noexcept {
+        return BMat8(to_int() | that.to_int());
+    }
+
+    //! Returns the transpose of \c this.
+    //!
+    //! Returns the standard matrix transpose of a BMat8.
+    //! Uses a naive technique, by simply iterating through all entries
+    BMat8 transpose_naive() const noexcept;
+
     //! Returns the transpose of \c this
     //!
     //! Returns the standard matrix transpose of a BMat8.
@@ -184,6 +203,20 @@ class BMat8 {
         return mult_transpose(that.transpose());
     }
 
+    //! Returns the matrix product of \c this and \p that
+    //!
+    //! This method returns the standard matrix product (over the
+    //! boolean semiring) of two BMat8 objects. It performs the most naive approach
+    //! by simply iterating through all entries using the access operator of BMat8
+    BMat8 mult_naive(BMat8 const& that) const noexcept;
+
+    //! Returns the matrix product of \c this and \p that
+    //!
+    //! This method returns the standard matrix product (over the
+    //! boolean semiring) of two BMat8 objects. It performs the most naive approach
+    //! by simply iterating through all entries using array conversion.
+    BMat8 mult_naive_array(BMat8 const& that) const noexcept;
+
     //! Returns a canonical basis of the row space of \c this
     //!
     //! Any two matrix with the same row space are guaranteed to have the same
diff --git a/include/hpcombi/bmat8_impl.hpp b/include/hpcombi/bmat8_impl.hpp
index da0f18b..5bf05f7 100644
--- a/include/hpcombi/bmat8_impl.hpp
+++ b/include/hpcombi/bmat8_impl.hpp
@@ -113,6 +113,15 @@ inline void BMat8::set(size_t i, size_t j, bool val) noexcept {
     _data ^= (-val ^ _data) & BIT_MASK[8 * i + j];
 }
 
+inline std::array<std::array<bool, 8>, 8> BMat8::to_array() const noexcept {
+    uint64_t a = to_int();
+    std::array<std::array<bool, 8>, 8> res;
+    for (int i = 0; i < 64; i++) {
+        res[7 - i/8][7 - i%8] = a % 2; a >>= 1;
+    }
+    return res;
+}
+
 inline BMat8::BMat8(std::vector<std::vector<bool>> const &mat) {
     HPCOMBI_ASSERT(mat.size() <= 8);
     HPCOMBI_ASSERT(0 < mat.size());
@@ -140,6 +149,7 @@ inline BMat8 BMat8::random() {
 }
 
 inline BMat8 BMat8::random(size_t const dim) {
+    // TO DO : Instead of nulling all the cols/rows one by one, one could do that at once with the proper mask
     HPCOMBI_ASSERT(0 < dim && dim <= 8);
     BMat8 bm = BMat8::random();
     for (size_t i = dim; i < 8; ++i) {
@@ -149,6 +159,16 @@ inline BMat8 BMat8::random(size_t const dim) {
     return bm;
 }
 
+inline BMat8 BMat8::transpose_naive() const noexcept {
+    uint64_t res = 0;
+    for (int i = 0; i < 8; i++) {
+        for (int j = 0; j < 8; j++) {
+            res = (res << 1) | (*this)(j, i);
+        }
+    }
+    return BMat8(res);
+}
+
 inline BMat8 BMat8::transpose() const noexcept {
     uint64_t x = _data;
     uint64_t y = (x ^ (x >> 7)) & 0xAA00AA00AA00AA;
@@ -222,6 +242,33 @@ inline BMat8 BMat8::mult_transpose(BMat8 const &that) const noexcept {
                  simde_mm_extract_epi64(data, 1));
 }
 
+inline BMat8 BMat8::mult_naive(BMat8 const &that) const noexcept {
+    uint64_t a = 0;
+    for (int i = 0; i < 8; i++) {
+        for (int j = 0; j < 8; j++) {
+            a <<= 1;
+            for (int k = 0; k < 8; k++) {
+                a |= ((*this)(i, k) & that(k, j));
+            }
+        }
+    }
+    return BMat8(a);
+}
+
+inline BMat8 BMat8::mult_naive_array(BMat8 const &that) const noexcept {
+    std::array<std::array<bool, 8>, 8> tab1 = to_array(), tab2 = that.to_array();
+    uint64_t a = 0;
+    for (int i = 0; i < 8; i++) {
+        for (int j = 0; j < 8; j++) {
+            a <<= 1;
+            for (int k = 0; k < 8; k++) {
+                a |= tab1[i][k] & tab2[k][j];
+            }
+        }
+    }
+    return BMat8(a);
+}
+
 inline epu8 BMat8::row_space_basis_internal() const noexcept {
     epu8 res = remove_dups(revsorted8(simde_mm_set_epi64x(0, _data)));
     epu8 rescy = res;
diff --git a/include/hpcombi/hpcombi.hpp b/include/hpcombi/hpcombi.hpp
index bdc8145..7e5c998 100644
--- a/include/hpcombi/hpcombi.hpp
+++ b/include/hpcombi/hpcombi.hpp
@@ -21,6 +21,7 @@
 #define HPCOMBI_HPCOMBI_HPP_
 
 #include "bmat8.hpp"
+#include "bmat16.hpp"
 #include "debug.hpp"
 #include "epu8.hpp"
 #include "perm16.hpp"
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 7f65b23..626e706 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -37,7 +37,7 @@ endif()
 message(STATUS "Building tests")
 
 set(test_src
-  test_epu8.cpp test_perm16.cpp test_perm_all.cpp test_bmat8.cpp)
+  test_epu8.cpp test_perm16.cpp test_perm_all.cpp test_bmat8.cpp test_bmat16.cpp)
 
 foreach(f ${test_src})
   get_filename_component(testName ${f} NAME_WE)
@@ -65,3 +65,4 @@ add_test (TestEPU8 test_epu8)
 add_test (TestPerm16 test_perm16)
 add_test (TestPermAll test_perm_all)
 add_test (TestBMat8 test_bmat8)
+add_test (TestBMat16 test_bmat16)
diff --git a/tests/test_bmat16.cpp b/tests/test_bmat16.cpp
new file mode 100644
index 0000000..b9ed395
--- /dev/null
+++ b/tests/test_bmat16.cpp
@@ -0,0 +1,299 @@
+//****************************************************************************//
+//     Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>,       //
+//                                                                            //
+//  This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi>  //
+//                                                                            //
+//  HP-Combi is free software: you can redistribute it and/or modify it       //
+//  under the terms of the GNU General Public License as published by the     //
+//  Free Software Foundation, either version 3 of the License, or             //
+//  (at your option) any later version.                                       //
+//                                                                            //
+//  HP-Combi is distributed in the hope that it will be useful, but WITHOUT   //
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or     //
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License      //
+//  for  more details.                                                        //
+//                                                                            //
+//  You should have received a copy of the GNU General Public License along   //
+//  with HP-Combi. If not, see <https://www.gnu.org/licenses/>.               //
+//****************************************************************************//
+
+#include <cstddef>   // for size_t
+#include <cstdint>   // for uint64_t
+#include <iostream>  // for char_traits, ostream, ostrin...
+#include <string>    // for operator==
+#include <utility>   // for pair
+#include <vector>    // for vector, allocator
+
+#include "test_main.hpp"                 // for TEST_AGREES, TEST_AGREES2
+#include <catch2/catch_test_macros.hpp>  // for operator""_catch_sr, operator==
+
+#include "hpcombi/bmat16.hpp"   // for BMat16, operator<<
+
+namespace HPCombi {
+namespace {
+struct BMat16Fixture {
+    const BMat16 zero, one1, one2, ones, bm, bm1, bmm1, bm2, bm2t, bm3, bm3t;
+    const std::vector<BMat16> BMlist;
+    BMat16Fixture()
+        : zero(0, 0, 0, 0), one1(0, 0, 0, 1), one2(0, 0, 0, 0x20001), 
+        ones(0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff),
+          bm({{0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0},
+              {0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1},
+              {1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1},
+              {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0},
+              {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1},
+              {1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0},
+              {1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0},
+              {1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0},
+              {0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0},
+              {1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1},
+              {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1},
+              {1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0},
+              {0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1},
+              {1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0},
+              {0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1},
+              {0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0}}),
+          bm1({{0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0},
+               {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0},
+               {0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0},
+               {0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0},
+               {1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1},
+               {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0},
+               {1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0},
+               {0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1},
+               {0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1},
+               {1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0},
+               {0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1},
+               {1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0},
+               {1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1},
+               {0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0},
+               {1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1},
+               {0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}),
+          bmm1({{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1},
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1},
+                {1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
+                {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1},
+                {0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1},
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1},
+                {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+                {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
+                {0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1}}),
+          bm2({{1, 1}, {0, 1}}), bm2t({{1, 0}, {1, 1}}),
+          bm3({{0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0},
+              {0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1},
+              {1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1},
+              {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0},
+              {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1},
+              {1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0},
+              {1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0},
+              {1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0},
+              {0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0},
+              {1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1},
+              {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1},
+              {1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0},
+              {0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1},
+              {1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0},
+              {0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1},
+              {0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0}}),
+          bm3t({{0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0},
+                {0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1},
+                {0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0},
+                {1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0},
+                {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0},
+                {1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0},
+                {0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1},
+                {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1},
+                {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                {1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1},
+                {0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0},
+                {1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0},
+                {1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1},
+                {1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0},
+                {0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0}}),
+          BMlist(
+              {zero, one1, one2, ones, bm, bm1, bmm1, bm2, bm2t, bm3, bm3t}) {}
+};
+}  // namespace
+
+//****************************************************************************//
+//****************************************************************************//
+
+TEST_CASE_METHOD(BMat16Fixture, "BMat16::one", "[BMat16][000]") {
+    CHECK(BMat16::one(0) == zero);
+    CHECK(BMat16::one(2) == BMat16({{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}));
+    CHECK(BMat16::one(10) == BMat16({{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+                                    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}));
+    CHECK(BMat16::one(16) == BMat16::one());
+}
+
+
+TEST_CASE_METHOD(BMat16Fixture, "BMat16::transpose", "[BMat16][001]") {
+    CHECK(zero.transpose() == zero);
+    CHECK(bm2.transpose() == bm2t);
+    CHECK(bm3.transpose() == bm3t);
+
+    for (auto m : BMlist) {
+        CHECK(m.transpose().transpose() == m);
+    }
+}
+
+TEST_AGREES(BMat16Fixture, transpose, transpose_naive, BMlist, "[BMat16][002]");
+
+TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator*", "[BMat16][003]") {
+    BMat16 tmp = bm * bm1;
+    CHECK(tmp == bmm1);
+    CHECK(tmp == bm * bm1);
+
+    for (auto b : BMlist) {
+        CHECK(zero * b == zero);
+        CHECK(b * zero == zero);
+        CHECK(b * b.one() == b);
+        CHECK(b.one() * b == b);
+        CHECK((b * b) * (b * b) == b * b * b * b);
+    }
+
+    for (auto b1 : BMlist) {
+        for (auto b2 : BMlist) {
+            for (auto b3 : BMlist) {
+                CHECK((b1 * b2) * b3 == b1 * (b2 * b3));
+            }
+        }
+    }
+}
+
+TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_4bmat8, BMlist, "[BMat16][004]");
+TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive, BMlist, "[BMat16][005]");
+TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive_array, BMlist, "[BMat16][006]");
+
+TEST_CASE("BMat16::random", "[BMat16][007]") {
+    for (size_t d = 1; d < 8; ++d) {
+        BMat16 bm = BMat16::random(d);
+        for (size_t i = d + 1; i < 16; ++i) {
+            for (size_t j = 0; j < 16; ++j) {
+                CHECK(bm(i, j) == 0);
+                CHECK(bm(j, i) == 0);
+            }
+        }
+    }
+}
+
+TEST_CASE("BMat16::operator()", "[BMat16][008]") {
+    std::vector<std::vector<bool>> mat = {
+        {0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0},
+        {0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0},
+        {0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1},
+        {0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0},
+        {0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0},
+        {0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1},
+        {0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1},
+        {0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1},
+        {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1},
+        {0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1},
+        {0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1},
+        {1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0},
+        {0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1},
+        {1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0},
+        {0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0}};
+    BMat16 bm(mat);
+    for (size_t i = 0; i < 15; ++i) {
+        for (size_t j = 0; j < 15; ++j) {
+            CHECK(static_cast<size_t>(bm(i, j)) == mat[i][j]);
+        }
+    }
+}
+
+TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator<<", "[BMat16][009]") {
+    std::ostringstream oss;
+    oss << bm3;
+    CHECK(oss.str() == "0001101000101110\n"
+                       "0100101100001001\n"
+                       "1010000101101111\n"
+                       "0101001010100010\n"
+                       "0010001000010001\n"
+                       "1100101101100100\n"
+                       "1011000000100100\n"
+                       "1010001010010010\n"
+                       "0100100100010010\n"
+                       "1000101010001001\n"
+                       "0000000010100001\n"
+                       "1101110010100010\n"
+                       "0100100000110101\n"
+                       "1101001010101110\n"
+                       "0100010100001001\n"
+                       "0100000110100100\n");
+
+    std::stringbuf buff;
+    std::ostream os(&buff);
+    os << BMat8::random();  // Also does not do anything visible
+}
+
+TEST_CASE_METHOD(BMat16Fixture, "BMat16::set", "[BMat16][010]") {
+    BMat16 bs;
+    bs = bm;
+    bs.set(0, 0, 1);
+    CHECK(bs != bm);
+    bs = bm;
+    bs.set(0, 0, 0);
+    CHECK(bs == bm);
+    bs = bm;
+    bs.set(13, 6, 1);
+    CHECK(bs != bm);
+    CHECK(bs == bm3);
+
+    for (size_t i = 0; i < 16; ++i)
+        for (size_t j = 0; j < 16; ++j)
+            bs.set(i, j, true);
+    CHECK(bs == ones);
+
+    for (size_t i = 0; i < 16; ++i)
+        for (size_t j = 0; j < 16; ++j)
+            bs.set(i, j, false);
+    CHECK(bs == zero);
+}
+
+TEST_CASE_METHOD(BMat16Fixture, "BMat16::nr_rows", "[BMat16][011]") {
+    CHECK(zero.nr_rows() == 0);
+    CHECK(one1.nr_rows() == 1);
+    CHECK(one2.nr_rows() == 2);
+    CHECK(bm.nr_rows() == 16);
+    CHECK(BMat16({{1, 0, 1}, {1, 1, 0}, {0, 0, 0}}).nr_rows() == 2);
+}
+
+}  // namespace HPCombi
diff --git a/tests/test_bmat8.cpp b/tests/test_bmat8.cpp
index b2879dc..15c41e7 100644
--- a/tests/test_bmat8.cpp
+++ b/tests/test_bmat8.cpp
@@ -120,8 +120,9 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose", "[BMat8][001]") {
 
 TEST_AGREES(BMat8Fixture, transpose, transpose_mask, BMlist, "[BMat8][002]");
 TEST_AGREES(BMat8Fixture, transpose, transpose_maskd, BMlist, "[BMat8][003]");
+TEST_AGREES(BMat8Fixture, transpose, transpose_naive, BMlist, "[BMat8][004]");
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose2", "[BMat8][004]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose2", "[BMat8][005]") {
     for (auto a : BMlist) {
         for (auto b : BMlist) {
             BMat8 at = a, bt = b;
@@ -132,7 +133,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose2", "[BMat8][004]") {
     }
 }
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator*", "[BMat8][005]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator*", "[BMat8][006]") {
     BMat8 tmp = bm * bm1;
     CHECK(tmp == bmm1);
     CHECK(tmp == bm * bm1);
@@ -154,7 +155,10 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator*", "[BMat8][005]") {
     }
 }
 
-TEST_CASE("BMat8::random", "[BMat8][006]") {
+TEST_AGREES2(BMat8Fixture, BMat8::operator*, mult_naive, BMlist, "[BMat8][007]");
+TEST_AGREES2(BMat8Fixture, BMat8::operator*, mult_naive_array, BMlist, "[BMat8][008]");
+
+TEST_CASE("BMat8::random", "[BMat8][009]") {
     for (size_t d = 1; d < 8; ++d) {
         BMat8 bm = BMat8::random(d);
         for (size_t i = d + 1; i < 8; ++i) {
@@ -166,7 +170,7 @@ TEST_CASE("BMat8::random", "[BMat8][006]") {
     }
 }
 
-TEST_CASE("BMat8::operator()", "[BMat8][007]") {
+TEST_CASE("BMat8::operator()", "[BMat8][010]") {
     std::vector<std::vector<bool>> mat = {
         {0, 0, 0, 1, 0, 0, 1}, {0, 1, 1, 1, 0, 1, 0}, {1, 1, 0, 1, 1, 1, 1},
         {0, 0, 1, 0, 0, 1, 1}, {1, 1, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 1},
@@ -179,7 +183,7 @@ TEST_CASE("BMat8::operator()", "[BMat8][007]") {
     }
 }
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator<<", "[BMat8][008]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator<<", "[BMat8][011]") {
     std::ostringstream oss;
     oss << bm3;
     CHECK(oss.str() == "00010011\n"
@@ -196,7 +200,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator<<", "[BMat8][008]") {
     os << BMat8::random();  // Also does not do anything visible
 }
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::set", "[BMat8][009]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::set", "[BMat8][012]") {
     BMat8 bs;
     bs = bm;
     bs.set(0, 0, 1);
@@ -220,7 +224,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::set", "[BMat8][009]") {
     CHECK(bs == zero);
 }
 
-TEST_CASE("BMat8::row_space_basis", "[BMat8][010]") {
+TEST_CASE("BMat8::row_space_basis", "[BMat8][013]") {
     BMat8 bm({{0, 1, 1, 1, 0, 1, 0, 1},
               {0, 0, 0, 0, 0, 0, 0, 1},
               {1, 1, 1, 1, 1, 1, 0, 1},
@@ -277,7 +281,7 @@ TEST_CASE("BMat8::row_space_basis", "[BMat8][010]") {
     }
 }
 
-TEST_CASE("BMat8::col_space_basis", "[BMat8][011]") {
+TEST_CASE("BMat8::col_space_basis", "[BMat8][014]") {
     BMat8 bm({{0, 1, 1, 1, 0, 1, 0, 1},
               {0, 0, 0, 0, 0, 0, 0, 1},
               {1, 1, 1, 1, 1, 1, 0, 1},
@@ -334,7 +338,7 @@ TEST_CASE("BMat8::col_space_basis", "[BMat8][011]") {
     }
 }
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_size", "[BMat8][012]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_size", "[BMat8][015]") {
     CHECK(zero.row_space_size() == 1);
     CHECK(one1.row_space_size() == 2);
     CHECK(one2.row_space_size() == 4);
@@ -349,15 +353,15 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_size", "[BMat8][012]") {
 }
 
 TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size, BMlist,
-            "[BMat8][013]");
+            "[BMat8][016]");
 TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_incl, BMlist,
-            "[BMat8][014]");
+            "[BMat8][017]");
 TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_incl1, BMlist,
-            "[BMat8][015]");
+            "[BMat8][018]");
 TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_bitset, BMlist,
-            "[BMat8][016]");
+            "[BMat8][019]");
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included", "[BMat8][017]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included", "[BMat8][020]") {
     CHECK(zero.row_space_included(one1));
     CHECK_FALSE(one1.row_space_included(zero));
 
@@ -378,11 +382,11 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included", "[BMat8][017]") {
 }
 
 TEST_AGREES2(BMat8Fixture, row_space_included, row_space_included_ref, BMlist,
-             "[BMat8][018]");
+             "[BMat8][021]");
 TEST_AGREES2(BMat8Fixture, row_space_included, row_space_included_bitset,
-             BMlist, "[BMat8][019]");
+             BMlist, "[BMat8][022]");
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included2", "[BMat8][020]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included2", "[BMat8][023]") {
     BMat8 a0 = BMat8::one();
     BMat8 b0 = BMat8(0);
     BMat8 a1 = BMat8(0);
@@ -405,7 +409,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included2", "[BMat8][020]") {
     }
 }
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_permuted", "[BMat8][021]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_permuted", "[BMat8][024]") {
     CHECK(bm2.row_permuted(Perm16({1, 0})) == BMat8({{0, 1}, {1, 1}}));
     CHECK(bm2.row_permuted(Perm16({2, 1, 0})) ==
           BMat8({{0, 0, 0}, {0, 1, 0}, {1, 1, 0}}));
@@ -429,7 +433,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_permuted", "[BMat8][021]") {
                  {0, 0, 0, 0, 0, 0, 0, 1}}));
 }
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::col_permuted", "[BMat8][022]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::col_permuted", "[BMat8][025]") {
     CHECK(bm2.col_permuted(Perm16({1, 0})) == BMat8({{1, 1}, {1, 0}}));
     CHECK(bm2.col_permuted(Perm16({2, 1, 0})) ==
           BMat8({{0, 1, 1}, {0, 1, 0}, {0, 0, 0}}));
@@ -453,7 +457,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::col_permuted", "[BMat8][022]") {
                  {0, 0, 0, 0, 0, 0, 0, 1}}));
 }
 
-TEST_CASE("BMat8::row_permutation_matrix", "[BMat8][023]") {
+TEST_CASE("BMat8::row_permutation_matrix", "[BMat8][026]") {
     CHECK(BMat8::row_permutation_matrix(Perm16({1, 0})) ==
           BMat8({{0, 1, 0, 0, 0, 0, 0, 0},
                  {1, 0, 0, 0, 0, 0, 0, 0},
@@ -483,7 +487,7 @@ TEST_CASE("BMat8::row_permutation_matrix", "[BMat8][023]") {
                  {0, 0, 0, 0, 0, 0, 0, 1}}));
 }
 
-TEST_CASE("BMat8::col_permutation_matrix", "[BMat8][024]") {
+TEST_CASE("BMat8::col_permutation_matrix", "[BMat8][027]") {
     CHECK(BMat8::col_permutation_matrix(Perm16({1, 0})) ==
           BMat8({{0, 1, 0, 0, 0, 0, 0, 0},
                  {1, 0, 0, 0, 0, 0, 0, 0},
@@ -513,7 +517,7 @@ TEST_CASE("BMat8::col_permutation_matrix", "[BMat8][024]") {
                  {0, 0, 0, 0, 0, 0, 0, 1}}));
 }
 
-TEST_CASE_METHOD(BMat8Fixture, "BMat8::nr_rows", "[BMat8][025]") {
+TEST_CASE_METHOD(BMat8Fixture, "BMat8::nr_rows", "[BMat8][028]") {
     CHECK(zero.nr_rows() == 0);
     CHECK(one1.nr_rows() == 1);
     CHECK(one2.nr_rows() == 2);
@@ -521,7 +525,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::nr_rows", "[BMat8][025]") {
     CHECK(BMat8({{1, 0, 1}, {1, 1, 0}, {0, 0, 0}}).nr_rows() == 2);
 }
 
-TEST_CASE("BMat8::right_perm_action_on_basis_ref", "[BMat8][026]") {
+TEST_CASE("BMat8::right_perm_action_on_basis_ref", "[BMat8][029]") {
     BMat8 m1({{1, 1, 0}, {1, 0, 1}, {0, 0, 0}});
     BMat8 m2({{0, 0, 0}, {1, 0, 1}, {1, 1, 0}});
     CHECK(m1.right_perm_action_on_basis_ref(m2) == Perm16({1, 0}));