diff --git a/frontend/benchmark.h b/frontend/benchmark.h
index c5b51e9..dcc00d9 100644
--- a/frontend/benchmark.h
+++ b/frontend/benchmark.h
@@ -14,6 +14,7 @@
 #include "libOTe/Vole/Silent/SilentVoleReceiver.h"
 #include "libOTe/Tools/CoeffCtx.h"
 #include "libOTe/Tools/TungstenCode/TungstenCode.h"
+#include "libOTe/Tools/ExConvCodeOld/ExConvCodeOld.h"
 
 namespace osuCrypto
 {
@@ -157,7 +158,7 @@ namespace osuCrypto
         timer.setTimePoint("_____________________");
         for (u64 i = 0; i < trials; ++i)
         {
-            if(gf128)
+            if (gf128)
                 code.dualEncode<block, CoeffCtxGF128>(x.begin(), {});
             else
                 code.dualEncode<block, CoeffCtxGF2>(x.begin(), {});
@@ -172,6 +173,143 @@ namespace osuCrypto
             std::cout << verbose << std::endl;
     }
 
+    inline void ExConvCodeOldBench(CLP& cmd)
+    {
+#ifdef LIBOTE_ENABLE_OLD_EXCONV
+
+        u64 trials = cmd.getOr("t", 10);
+
+        // the message length of the code. 
+        // The noise vector will have size n=2*k.
+        // the user can use 
+        //   -k X 
+        // to state that exactly X rows should be used or
+        //   -kk X
+        // to state that 2^X rows should be used.
+        u64 k = cmd.getOr("k", 1ull << cmd.getOr("kk", 10));
+
+        u64 n = cmd.getOr<u64>("n", k * cmd.getOr("R", 2.0));
+
+        // the weight of the code
+        u64 w = cmd.getOr("w", 7);
+
+        // size for the accumulator (# random transitions)
+        u64 a = cmd.getOr("a", roundUpTo(log2ceil(n), 8));
+
+        bool gf128 = cmd.isSet("gf128");
+
+        // verbose flag.
+        bool v = cmd.isSet("v");
+        bool sys = cmd.isSet("sys");
+
+        ExConvCodeOld code;
+        code.config(k, n, w, a, sys);
+
+        if (v)
+        {
+            std::cout << "n: " << code.mCodeSize << std::endl;
+            std::cout << "k: " << code.mMessageSize << std::endl;
+            //std::cout << "w: " << code.mExpanderWeight << std::endl;
+        }
+
+        std::vector<block> x(code.mCodeSize), y(code.mMessageSize * !sys);
+        Timer timer, verbose;
+
+        if (v)
+            code.setTimer(verbose);
+
+        timer.setTimePoint("_____________________");
+        for (u64 i = 0; i < trials; ++i)
+        {
+            code.dualEncode<block>(x);
+
+            timer.setTimePoint("encode");
+        }
+
+        if (cmd.isSet("quiet") == false)
+        {
+            std::cout << "EC " << std::endl;
+            std::cout << timer << std::endl;
+        }
+        if (v)
+            std::cout << verbose << std::endl;
+#else
+        std::cout << "LIBOTE_ENABLE_OLD_EXCONV = false" << std::endl;
+#endif
+    }
+
+
+    inline void PprfBench(CLP& cmd)
+    {
+
+#ifdef ENABLE_SILENTOT
+
+        try
+        {
+            using Ctx = CoeffCtxGF2;
+            RegularPprfReceiver<block, block, Ctx> recver;
+            RegularPprfSender<block, block, Ctx> sender;
+
+            u64 trials = cmd.getOr("t", 10);
+
+            u64 w = cmd.getOr("w", 32);
+            u64 n = cmd.getOr("n", 1ull << cmd.getOr("nn", 14));
+
+            PRNG prng0(ZeroBlock), prng1(ZeroBlock);
+            block delta = prng0.get();
+
+            auto sock = coproto::LocalAsyncSocket::makePair();
+
+            Timer rTimer;
+            auto s = rTimer.setTimePoint("start");
+            auto ctx = Ctx{};
+            auto vals = Ctx::Vec<block>(w);
+            auto out0 = Ctx::Vec<block>(n / w * w);
+            auto out1 = Ctx::Vec<block>(n / w * w);
+
+
+
+            for (u64 t = 0; t < trials; ++t)
+            {
+                sender.configure(n / w, w);
+                recver.configure(n / w, w);
+
+                std::vector<std::array<block, 2>> baseSend(sender.baseOtCount());
+                std::vector<block> baseRecv(sender.baseOtCount());
+                BitVector baseChoice(sender.baseOtCount());
+                sender.setBase(baseSend);
+                recver.setBase(baseRecv);
+                recver.setChoiceBits(baseChoice);
+
+                auto p0 = sender.expand(sock[0], vals, prng0.get(), out0, PprfOutputFormat::Interleaved, true, 1, ctx);
+                auto p1 = recver.expand(sock[1], out1, PprfOutputFormat::Interleaved, true, 1, ctx);
+
+                rTimer.setTimePoint("r start");
+                coproto::sync_wait(macoro::when_all_ready(
+                    std::move(p0), std::move(p1)));
+                rTimer.setTimePoint("r done");
+
+            }
+            auto e = rTimer.setTimePoint("end");
+
+            auto time = std::chrono::duration_cast<std::chrono::milliseconds>(e - s).count();
+            auto avgTime = time / double(trials);
+            auto timePer512 = avgTime / n * 512;
+            std::cout << "OT n:" << n << ", " <<
+                avgTime << "ms/batch, " << timePer512 << "ms/512ot" << std::endl;
+
+            std::cout << rTimer << std::endl;
+
+            std::cout << sock[0].bytesReceived() / trials << " " << sock[1].bytesReceived() / trials << " bytes per " << std::endl;
+        }
+        catch (std::exception& e)
+        {
+            std::cout << e.what() << std::endl;
+        }
+#else
+        std::cout << "ENABLE_SILENTOT = false" << std::endl;
+#endif
+    }
 
     inline void TungstenCodeBench(CLP& cmd)
     {
@@ -201,21 +339,23 @@ namespace osuCrypto
             std::cout << "k: " << code.mMessageSize << std::endl;
         }
 
-        std::vector<block> x(code.mCodeSize);
+        AlignedUnVector<block> x(code.mCodeSize);
         Timer timer, verbose;
-
+        
 
         timer.setTimePoint("_____________________");
         for (u64 i = 0; i < trials; ++i)
         {
-            code.dualEncode<block, CoeffCtxGF128>(x.data(), {});
+            code.dualEncode<block, CoeffCtxGF2>(x.data(), {});
 
             timer.setTimePoint("encode");
         }
 
-        std::cout << "tungsten " << std::endl;
-        std::cout << timer << std::endl;
-
+        if (cmd.isSet("quiet") == false)
+        {
+            std::cout << "tungsten " << std::endl;
+            std::cout << timer << std::endl;
+        }
         if (v)
             std::cout << verbose << std::endl;
     }
@@ -301,7 +441,7 @@ namespace osuCrypto
             u64 trials = cmd.getOr("t", 10);
 
             u64 n = cmd.getOr("n", 1ull << cmd.getOr("nn", 20));
-            MultType multType = (MultType)cmd.getOr("m", (int)MultType::ExAcc7);
+            MultType multType = (MultType)cmd.getOr("m", (int)MultType::ExConv7x24);
             std::cout << multType << std::endl;
 
             recver.mMultType = multType;
@@ -314,51 +454,47 @@ namespace osuCrypto
 
             Timer sTimer;
             Timer rTimer;
+            recver.setTimer(rTimer);
+            sender.setTimer(rTimer);
             sTimer.setTimePoint("start");
-            rTimer.setTimePoint("start");
-
-            auto t0 = std::thread([&] {
-                for (u64 t = 0; t < trials; ++t)
-                {
-                    auto p0 = sender.silentSendInplace(delta, n, prng0, sock[0]);
-
-                    char c;
-
-                    coproto::sync_wait(sock[0].send(std::move(c)));
-                    coproto::sync_wait(sock[0].recv(c));
-                    sTimer.setTimePoint("__");
-                    coproto::sync_wait(sock[0].send(std::move(c)));
-                    coproto::sync_wait(sock[0].recv(c));
-                    sTimer.setTimePoint("s start");
-                    coproto::sync_wait(p0);
-                    sTimer.setTimePoint("s done");
-                }
-                });
-
+            auto s = sTimer.setTimePoint("start");
 
             for (u64 t = 0; t < trials; ++t)
             {
-                auto p1 = recver.silentReceiveInplace(n, prng1, sock[1]);
-                char c;
-                coproto::sync_wait(sock[1].send(std::move(c)));
-                coproto::sync_wait(sock[1].recv(c));
+                sender.configure(n);
+                recver.configure(n);
 
-                rTimer.setTimePoint("__");
-                coproto::sync_wait(sock[1].send(std::move(c)));
-                coproto::sync_wait(sock[1].recv(c));
+                auto choice = recver.sampleBaseChoiceBits(prng0);
+                std::vector<std::array<block, 2>> sendBase(sender.silentBaseOtCount());
+                std::vector<block> recvBase(recver.silentBaseOtCount());
+                sender.setSilentBaseOts(sendBase);
+                recver.setSilentBaseOts(recvBase);
+
+                auto p0 = sender.silentSendInplace(delta, n, prng0, sock[0]);
+                auto p1 = recver.silentReceiveInplace(n, prng1, sock[1], ChoiceBitPacking::True);
 
                 rTimer.setTimePoint("r start");
-                coproto::sync_wait(p1);
+                coproto::sync_wait(macoro::when_all_ready(
+                    std::move(p0), std::move(p1)));
                 rTimer.setTimePoint("r done");
 
             }
+            auto e = rTimer.setTimePoint("end");
 
+            if (cmd.isSet("quiet") == false)
+            {
 
-            t0.join();
-            std::cout << sTimer << std::endl;
-            std::cout << rTimer << std::endl;
+                auto time = std::chrono::duration_cast<std::chrono::milliseconds>(e - s).count();
+                auto avgTime = time / double(trials);
+                auto timePer512 = avgTime / n * 512;
+                std::cout << "OT n:" << n << ", " <<
+                    avgTime << "ms/batch, " << timePer512 << "ms/512ot" << std::endl;
 
-            std::cout << sock[0].bytesReceived() / trials << " " << sock[1].bytesReceived() / trials << " bytes per " << std::endl;
+                std::cout << sTimer << std::endl;
+                std::cout << rTimer << std::endl;
+
+                std::cout << sock[0].bytesReceived() / trials << " " << sock[1].bytesReceived() / trials << " bytes per " << std::endl;
+            }
         }
         catch (std::exception& e)
         {
diff --git a/frontend/main.cpp b/frontend/main.cpp
index 339b0ad..6dba7b4 100644
--- a/frontend/main.cpp
+++ b/frontend/main.cpp
@@ -105,12 +105,16 @@ int main(int argc, char** argv)
 			QCCodeBench(cmd);
 		else if (cmd.isSet("silent"))
 			SilentOtBench(cmd);
+		else if (cmd.isSet("pprf"))
+			PprfBench(cmd);
 		else if (cmd.isSet("vole2"))
 			VoleBench2(cmd);
 		else if (cmd.isSet("ea"))
 			EACodeBench(cmd);
 		else if (cmd.isSet("ec"))
 			ExConvCodeBench(cmd);
+		else if (cmd.isSet("ecold"))
+			ExConvCodeOldBench(cmd);
 		else if (cmd.isSet("tungsten"))
 			TungstenCodeBench(cmd);
 
diff --git a/libOTe/Tools/CoeffCtx.h b/libOTe/Tools/CoeffCtx.h
index ddf64ee..a070d12 100644
--- a/libOTe/Tools/CoeffCtx.h
+++ b/libOTe/Tools/CoeffCtx.h
@@ -151,7 +151,8 @@ namespace osuCrypto {
             static_assert(std::is_trivially_copyable<F1>::value, "memcpy is used so must be trivially_copyable.");
             static_assert(std::is_same_v<F1, F2>, "src and destication types are not the same.");
 
-            std::copy(begin, end, dstBegin);
+            memcpy((F2* __restrict) & *dstBegin, (F1 * __restrict) &*begin, std::distance(begin, end) * sizeof(F1));
+            //std::copy(begin, end, dstBegin);
         }
 
         // deserialize [begin,...,end) into  [dstBegin, ...)
diff --git a/libOTe/Tools/ExConvCodeOld/ExConvCodeInstantiations.cpp b/libOTe/Tools/ExConvCodeOld/ExConvCodeInstantiations.cpp
new file mode 100644
index 0000000..80a69b8
--- /dev/null
+++ b/libOTe/Tools/ExConvCodeOld/ExConvCodeInstantiations.cpp
@@ -0,0 +1,20 @@
+
+#define EXCONVCODE_INSTANTIATIONS
+#include "ExConvCodeOld.cpp"
+#ifdef LIBOTE_ENABLE_OLD_EXCONV
+
+namespace osuCrypto
+{
+
+    template void ExConvCodeOld::dualEncode<block>(span<block> e);
+    template void ExConvCodeOld::dualEncode<u8>(span<u8> e);
+    template void ExConvCodeOld::dualEncode<block>(span<block> e, span<block> w);
+    template void ExConvCodeOld::dualEncode<u8>(span<u8> e, span<u8> w);
+    template void ExConvCodeOld::dualEncode2<block, u8>(span<block>, span<u8> e);
+    template void ExConvCodeOld::dualEncode2<block, block>(span<block>, span<block> e);
+
+    template void ExConvCodeOld::accumulate<block, u8>(span<block>, span<u8> e);
+    template void ExConvCodeOld::accumulate<block, block>(span<block>, span<block> e);
+}
+
+#endif
\ No newline at end of file
diff --git a/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.cpp b/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.cpp
new file mode 100644
index 0000000..ae07227
--- /dev/null
+++ b/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.cpp
@@ -0,0 +1,555 @@
+#include "ExConvCodeOld.h"
+
+#ifdef LIBOTE_ENABLE_OLD_EXCONV
+
+namespace osuCrypto
+{
+#ifdef ENABLE_SSE
+
+    using My__m128 = __m128;
+
+#else
+    using My__m128 = block;
+
+    inline My__m128 _mm_load_ps(float* b) { return *(block*)b; }
+
+    // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps&ig_expand=557
+    inline My__m128 _mm_blendv_ps(My__m128 a, My__m128 b, My__m128 mask)
+    {
+        My__m128 dst;
+        for (u64 j = 0; j < 4; ++j)
+        {
+            if (mask.get<i32>(j) < 0)
+                dst.set<u32>(j, b.get<u32>(j));
+            else
+                dst.set<u32>(j, a.get<u32>(j));
+        }
+        return dst;
+    }
+
+
+    inline My__m128 _mm_setzero_ps() { return ZeroBlock; }
+#endif
+
+    // Compute e = G * e.
+    template<typename T>
+    void ExConvCodeOld::dualEncode(span<T> e)
+    {
+        if (e.size() != mCodeSize)
+            throw RTE_LOC;
+
+        if (mSystematic)
+        {
+            auto d = e.subspan(mMessageSize);
+            setTimePoint("ExConv.encode.begin");
+            accumulate<T>(d);
+            setTimePoint("ExConv.encode.accumulate");
+            mExpander.expand<T, true>(d, e.subspan(0, mMessageSize));
+            setTimePoint("ExConv.encode.expand");
+        }
+        else
+        {
+            oc::AlignedUnVector<T> w(mMessageSize);
+            dualEncode<T>(e, w);
+            memcpy(e.data(), w.data(), w.size() * sizeof(T));
+            setTimePoint("ExConv.encode.memcpy");
+
+        }
+    }
+
+
+    // Compute e = G * e.
+    template<typename T0, typename T1>
+    void ExConvCodeOld::dualEncode2(span<T0> e0, span<T1> e1)
+    {
+        if (e0.size() != mCodeSize)
+            throw RTE_LOC;
+        if (e1.size() != mCodeSize)
+            throw RTE_LOC;
+
+        if (mSystematic)
+        {
+            auto d0 = e0.subspan(mMessageSize);
+            auto d1 = e1.subspan(mMessageSize);
+            setTimePoint("ExConv.encode.begin");
+            accumulate<T0, T1>(d0, d1);
+            setTimePoint("ExConv.encode.accumulate");
+            mExpander.expand<T0, T1, true>(
+                d0, d1,
+                e0.subspan(0, mMessageSize),
+                e1.subspan(0, mMessageSize));
+            setTimePoint("ExConv.encode.expand");
+        }
+        else
+        {
+            //oc::AlignedUnVector<T0> w0(mMessageSize);
+            //dualEncode<T0, T1>(e, w);
+            //memcpy(e.data(), w.data(), w.size() * sizeof(T));
+            //setTimePoint("ExConv.encode.memcpy");
+
+            // not impl.
+            throw RTE_LOC;
+
+        }
+    }
+
+    // Compute w = G * e.
+    template<typename T>
+    void ExConvCodeOld::dualEncode(span<T> e, span<T> w)
+    {
+        if (e.size() != mCodeSize)
+            throw RTE_LOC;
+
+        if (w.size() != mMessageSize)
+            throw RTE_LOC;
+
+        if (mSystematic)
+        {
+            dualEncode<T>(e);
+            memcpy(w.data(), e.data(), w.size() * sizeof(T));
+            setTimePoint("ExConv.encode.memcpy");
+        }
+        else
+        {
+
+            setTimePoint("ExConv.encode.begin");
+
+            accumulate<T>(e);
+
+            setTimePoint("ExConv.encode.accumulate");
+
+            mExpander.expand<T, false>(e, w);
+            setTimePoint("ExConv.encode.expand");
+        }
+    }
+
+    inline void refill(PRNG& prng)
+    {
+        assert(prng.mBuffer.size() == 256);
+        //block b[8];
+        for (u64 i = 0; i < 256; i += 8)
+        {
+            //auto idx = mPrng.mBuffer[i].get<u8>();
+            block* __restrict b = prng.mBuffer.data() + i;
+            block* __restrict k = prng.mBuffer.data() + (u8)(i - 8);
+            //for (u64 j = 0; j < 8; ++j)
+            //{
+            //    b = b ^ mPrng.mBuffer.data()[idx[j]];
+            //}
+            b[0] = AES::roundEnc(b[0], k[0]);
+            b[1] = AES::roundEnc(b[1], k[1]);
+            b[2] = AES::roundEnc(b[2], k[2]);
+            b[3] = AES::roundEnc(b[3], k[3]);
+            b[4] = AES::roundEnc(b[4], k[4]);
+            b[5] = AES::roundEnc(b[5], k[5]);
+            b[6] = AES::roundEnc(b[6], k[6]);
+            b[7] = AES::roundEnc(b[7], k[7]);
+
+            b[0] = b[0] ^ k[0];
+            b[1] = b[1] ^ k[1];
+            b[2] = b[2] ^ k[2];
+            b[3] = b[3] ^ k[3];
+            b[4] = b[4] ^ k[4];
+            b[5] = b[5] ^ k[5];
+            b[6] = b[6] ^ k[6];
+            b[7] = b[7] ^ k[7];
+        }
+    }
+
+#ifndef EXCONVCODE_INSTANTIATIONS
+
+    void ExConvCodeOld::accOne(
+        PointList& pl,
+        u64 i,
+        u8* __restrict& ptr,
+        PRNG& prng,
+        block& rnd,
+        u64& q,
+        u64 qe,
+        u64 size) const
+    {
+        u64 j = i + 1;
+        pl.push_back(i, i);
+
+        //if (mWrapping)
+        {
+            if (j < size)
+                pl.push_back(j, i);
+            ++j;
+        }
+
+        if (q + mAccumulatorSize > qe)
+        {
+            refill(prng);
+            ptr = (u8*)prng.mBuffer.data();
+            q = 0;
+        }
+
+
+        for (u64 k = 0; k < mAccumulatorSize; k += 8, q += 8, j += 8)
+        {
+            assert(ptr < (u8*)(prng.mBuffer.data() + prng.mBuffer.size()));
+            rnd = block::allSame<u8>(*ptr);
+            ++ptr;
+
+            //std::cout << "r " << rnd << std::endl;
+            auto b0 = rnd;
+            auto b1 = rnd.slli_epi32<1>();
+            auto b2 = rnd.slli_epi32<2>();
+            auto b3 = rnd.slli_epi32<3>();
+            auto b4 = rnd.slli_epi32<4>();
+            auto b5 = rnd.slli_epi32<5>();
+            auto b6 = rnd.slli_epi32<6>();
+            auto b7 = rnd.slli_epi32<7>();
+            //rnd = rnd.mm_slli_epi32<8>();
+
+            if (j + 0 < size && b0.get<i32>(0) < 0) pl.push_back(j + 0, i);
+            if (j + 1 < size && b1.get<i32>(0) < 0) pl.push_back(j + 1, i);
+            if (j + 2 < size && b2.get<i32>(0) < 0) pl.push_back(j + 2, i);
+            if (j + 3 < size && b3.get<i32>(0) < 0) pl.push_back(j + 3, i);
+            if (j + 4 < size && b4.get<i32>(0) < 0) pl.push_back(j + 4, i);
+            if (j + 5 < size && b5.get<i32>(0) < 0) pl.push_back(j + 5, i);
+            if (j + 6 < size && b6.get<i32>(0) < 0) pl.push_back(j + 6, i);
+            if (j + 7 < size && b7.get<i32>(0) < 0) pl.push_back(j + 7, i);
+        }
+
+
+
+    }
+#endif
+
+
+    template<typename T, bool rangeCheck>
+    OC_FORCEINLINE void accOneHelper(
+        T* __restrict xx,
+        My__m128 xii,
+        u64 j, u64 i, u64 size,
+        block* b
+    )
+    {
+        My__m128 Zero = _mm_setzero_ps();
+
+        if constexpr (std::is_same<block, T>::value)
+        {
+            My__m128 bb[8];
+            bb[0] = _mm_load_ps((float*)&b[0]);
+            bb[1] = _mm_load_ps((float*)&b[1]);
+            bb[2] = _mm_load_ps((float*)&b[2]);
+            bb[3] = _mm_load_ps((float*)&b[3]);
+            bb[4] = _mm_load_ps((float*)&b[4]);
+            bb[5] = _mm_load_ps((float*)&b[5]);
+            bb[6] = _mm_load_ps((float*)&b[6]);
+            bb[7] = _mm_load_ps((float*)&b[7]);
+
+
+            bb[0] = _mm_blendv_ps(Zero, xii, bb[0]);
+            bb[1] = _mm_blendv_ps(Zero, xii, bb[1]);
+            bb[2] = _mm_blendv_ps(Zero, xii, bb[2]);
+            bb[3] = _mm_blendv_ps(Zero, xii, bb[3]);
+            bb[4] = _mm_blendv_ps(Zero, xii, bb[4]);
+            bb[5] = _mm_blendv_ps(Zero, xii, bb[5]);
+            bb[6] = _mm_blendv_ps(Zero, xii, bb[6]);
+            bb[7] = _mm_blendv_ps(Zero, xii, bb[7]);
+
+            block tt[8];
+            memcpy(tt, bb, 8 * 16);
+
+            if (!rangeCheck || j + 0 < size) xx[j + 0] = xx[j + 0] ^ tt[0];
+            if (!rangeCheck || j + 1 < size) xx[j + 1] = xx[j + 1] ^ tt[1];
+            if (!rangeCheck || j + 2 < size) xx[j + 2] = xx[j + 2] ^ tt[2];
+            if (!rangeCheck || j + 3 < size) xx[j + 3] = xx[j + 3] ^ tt[3];
+            if (!rangeCheck || j + 4 < size) xx[j + 4] = xx[j + 4] ^ tt[4];
+            if (!rangeCheck || j + 5 < size) xx[j + 5] = xx[j + 5] ^ tt[5];
+            if (!rangeCheck || j + 6 < size) xx[j + 6] = xx[j + 6] ^ tt[6];
+            if (!rangeCheck || j + 7 < size) xx[j + 7] = xx[j + 7] ^ tt[7];
+        }
+        else
+        {
+            auto bb0 = xx[i] * (b[0].get<i32>(0) < 0);
+            auto bb1 = xx[i] * (b[1].get<i32>(0) < 0);
+            auto bb2 = xx[i] * (b[2].get<i32>(0) < 0);
+            auto bb3 = xx[i] * (b[3].get<i32>(0) < 0);
+            auto bb4 = xx[i] * (b[4].get<i32>(0) < 0);
+            auto bb5 = xx[i] * (b[5].get<i32>(0) < 0);
+            auto bb6 = xx[i] * (b[6].get<i32>(0) < 0);
+            auto bb7 = xx[i] * (b[7].get<i32>(0) < 0);
+
+            if (!rangeCheck || j + 0 < size) xx[j + 0] = xx[j + 0] ^ bb0;
+            if (!rangeCheck || j + 1 < size) xx[j + 1] = xx[j + 1] ^ bb1;
+            if (!rangeCheck || j + 2 < size) xx[j + 2] = xx[j + 2] ^ bb2;
+            if (!rangeCheck || j + 3 < size) xx[j + 3] = xx[j + 3] ^ bb3;
+            if (!rangeCheck || j + 4 < size) xx[j + 4] = xx[j + 4] ^ bb4;
+            if (!rangeCheck || j + 5 < size) xx[j + 5] = xx[j + 5] ^ bb5;
+            if (!rangeCheck || j + 6 < size) xx[j + 6] = xx[j + 6] ^ bb6;
+            if (!rangeCheck || j + 7 < size) xx[j + 7] = xx[j + 7] ^ bb7;
+        }
+    }
+
+
+    template<typename T, bool rangeCheck, int width>
+    OC_FORCEINLINE void ExConvCodeOld::accOne(
+        T* __restrict xx,
+        u64 i,
+        u8*& ptr,
+        PRNG& prng,
+        u64& q,
+        u64 qe,
+        u64 size)
+    {
+        u64 j = i + 1;
+        if (width)
+        {
+            auto xii = _mm_load_ps((float*)(xx + i));
+
+            if (!rangeCheck || j < size)
+            {
+                auto xj = xx[j] ^ xx[i];
+                xx[j] = xj;
+                ++j;
+            }
+
+
+            if (q + width > qe)
+            {
+                refill(prng);
+                ptr = (u8*)prng.mBuffer.data();
+                q = 0;
+
+            }
+            q += width;
+
+            for (u64 k = 0; k < width; ++k, j += 8)
+            {
+                assert(ptr < (u8*)(prng.mBuffer.data() + prng.mBuffer.size()));
+                block rnd = block::allSame<u8>(*(u8*)ptr++);
+
+
+                block b[8];
+                b[0] = rnd;
+                b[1] = rnd.slli_epi32<1>();
+                b[2] = rnd.slli_epi32<2>();
+                b[3] = rnd.slli_epi32<3>();
+                b[4] = rnd.slli_epi32<4>();
+                b[5] = rnd.slli_epi32<5>();
+                b[6] = rnd.slli_epi32<6>();
+                b[7] = rnd.slli_epi32<7>();
+
+                accOneHelper<T, rangeCheck>(xx, xii, j, i, size, b);
+            }
+        }
+
+    }
+
+    template<typename T0, typename T1, bool rangeCheck, int width>
+    OC_FORCEINLINE void ExConvCodeOld::accOne(
+        T0* __restrict xx0,
+        T1* __restrict xx1,
+        u64 i,
+        u8*& ptr,
+        PRNG& prng,
+        u64& q,
+        u64 qe,
+        u64 size)
+    {
+        u64 j = i + 1;
+        if (width)
+        {
+            auto xii0 = _mm_load_ps((float*)(xx0 + i));
+            auto xii1 = _mm_load_ps((float*)(xx1 + i));
+            if (!rangeCheck || j < size)
+            {
+                auto xj0 = xx0[j] ^ xx0[i];
+                auto xj1 = xx1[j] ^ xx1[i];
+                xx0[j] = xj0;
+                xx1[j] = xj1;
+                ++j;
+            }
+
+            if (q + width > qe)
+            {
+                refill(prng);
+                ptr = (u8*)prng.mBuffer.data();
+                q = 0;
+
+            }
+            q += width;
+
+            for (u64 k = 0; k < width; ++k, j += 8)
+            {
+                assert(ptr < (u8*)(prng.mBuffer.data() + prng.mBuffer.size()));
+                block rnd = block::allSame<u8>(*(u8*)ptr++);
+
+                block b[8];
+                b[0] = rnd;
+                b[1] = rnd.slli_epi32<1>();
+                b[2] = rnd.slli_epi32<2>();
+                b[3] = rnd.slli_epi32<3>();
+                b[4] = rnd.slli_epi32<4>();
+                b[5] = rnd.slli_epi32<5>();
+                b[6] = rnd.slli_epi32<6>();
+                b[7] = rnd.slli_epi32<7>();
+
+                accOneHelper<T0, rangeCheck>(xx0, xii0, j, i, size, b);
+                accOneHelper<T1, rangeCheck>(xx1, xii1, j, i, size, b);
+            }
+        }
+
+
+    }
+
+
+
+    template<typename T>
+    void ExConvCodeOld::accumulate(span<T> x)
+    {
+        PRNG prng(mSeed ^ OneBlock);
+
+        u64 i = 0;
+        auto size = x.size();
+        auto main = (u64)std::max<i64>(0, size - 1 - mAccumulatorSize);
+        u8* ptr = (u8*)prng.mBuffer.data();
+        auto qe = prng.mBuffer.size() * 128 / 8;
+        u64 q = 0;
+        T* __restrict xx = x.data();
+
+        {
+
+#define CASE(I) case I:\
+                for (; i < main; ++i)\
+                    accOne<T, false, I>(xx, i, ptr, prng, q, qe, size);\
+                for (; i < size; ++i)\
+                    accOne<T, true, I>(xx, i, ptr, prng, q, qe, size);\
+                break
+
+            switch (mAccumulatorSize / 8)
+            {
+                CASE(0);
+                CASE(1);
+                CASE(2);
+                CASE(3);
+                CASE(4);
+            default:
+                throw RTE_LOC;
+                break;
+            }
+#undef CASE
+        }
+    }
+
+
+    template<typename T0, typename T1>
+    void ExConvCodeOld::accumulate(span<T0> x0, span<T1> x1)
+    {
+        PRNG prng(mSeed ^ OneBlock);
+
+        u64 i = 0;
+        auto size = x0.size();
+        auto main = (u64)std::max<i64>(0, size - 1 - mAccumulatorSize);
+        u8* ptr = (u8*)prng.mBuffer.data();
+        auto qe = prng.mBuffer.size() * 128 / 8;
+        u64 q = 0;
+        T0* __restrict xx0 = x0.data();
+        T1* __restrict xx1 = x1.data();
+
+        {
+
+#define CASE(I) case I:\
+                for (; i < main; ++i)\
+                    accOne<T0, T1, false, I>(xx0,xx1, i, ptr, prng, q, qe, size);\
+                for (; i < size; ++i)\
+                    accOne<T0, T1, true, I>(xx0, xx1, i, ptr, prng, q, qe, size);\
+                break
+
+            switch (mAccumulatorSize / 8)
+            {
+                CASE(0);
+                CASE(1);
+                CASE(2);
+                CASE(3);
+                CASE(4);
+            default:
+                throw RTE_LOC;
+                break;
+            }
+#undef CASE
+        }
+    }
+
+
+#ifndef EXCONVCODE_INSTANTIATIONS
+
+    SparseMtx ExConvCodeOld::getB() const
+    {
+        if (mSystematic)
+        {
+            PointList R(mMessageSize, mCodeSize);
+            auto B = mExpander.getB().points();
+
+            for (auto p : B)
+            {
+                R.push_back(p.mRow, mMessageSize + p.mCol);
+            }
+            for (u64 i = 0; i < mMessageSize; ++i)
+                R.push_back(i, i);
+
+            return R;
+        }
+        else
+        {
+            return mExpander.getB();
+        }
+
+    }
+
+    // Get the parity check version of the accumulator
+    SparseMtx ExConvCodeOld::getAPar() const
+    {
+        PRNG prng(mSeed ^ OneBlock);
+
+        auto n = mCodeSize - mSystematic * mMessageSize;
+
+        PointList AP(n, n);;
+        DenseMtx A = DenseMtx::Identity(n);
+
+        block rnd;
+        u8* __restrict ptr = (u8*)prng.mBuffer.data();
+        auto qe = prng.mBuffer.size() * 128;
+        u64 q = 0;
+
+        for (u64 i = 0; i < n; ++i)
+        {
+            accOne(AP, i, ptr, prng, rnd, q, qe, n);
+        }
+        return AP;
+    }
+
+    SparseMtx ExConvCodeOld::getA() const
+    {
+        auto APar = getAPar();
+
+        auto A = DenseMtx::Identity(mCodeSize);
+
+        u64 offset = mSystematic ? mMessageSize : 0ull;
+
+        for (u64 i = 0; i < APar.rows(); ++i)
+        {
+            for (auto y : APar.col(i))
+            {
+                //std::cout << y << " ";
+                if (y != i)
+                {
+                    auto ay = A.row(y + offset);
+                    auto ai = A.row(i + offset);
+                    ay ^= ai;
+                }
+            }
+
+            //std::cout << "\n" << A << std::endl;
+        }
+
+        return A.sparse();
+    }
+#endif
+}
+
+#endif
\ No newline at end of file
diff --git a/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.h b/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.h
new file mode 100644
index 0000000..ae31ba8
--- /dev/null
+++ b/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.h
@@ -0,0 +1,161 @@
+// � 2023 Visa.
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+// 
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#pragma once
+
+#include "cryptoTools/Common/Defines.h"
+#include "cryptoTools/Common/Timer.h"
+#include "ExpanderOld.h"
+#include "libOTe/Tools/EACode/Util.h"
+#ifdef LIBOTE_ENABLE_OLD_EXCONV
+
+namespace osuCrypto
+{
+
+    // The encoder for the generator matrix G = B * A. dualEncode(...) is the main function
+    // config(...) should be called first.
+    // 
+    // B is the expander while A is the convolution.
+    // 
+    // B has mMessageSize rows and mCodeSize columns. It is sampled uniformly
+    // with fixed row weight mExpanderWeight.
+    //
+    // A is a lower triangular n by n matrix with ones on the diagonal. The
+    // mAccumulatorSize diagonals left of the main diagonal are uniformly random.
+    // If mStickyAccumulator, then the first diagonal left of the main is always ones.
+    //
+    // See ExConvCodeInstantiations.cpp for how to instantiate new types that
+    // dualEncode can be called on.
+    //
+    // https://eprint.iacr.org/2023/882
+    class ExConvCodeOld : public TimerAdapter
+    {
+    public:
+        ExpanderCodeOld mExpander;
+
+        // configure the code. The default parameters are choses to balance security and performance.
+        // For additional parameter choices see the paper.
+        void config(
+            u64 messageSize,
+            u64 codeSize = 0 /*2 * messageSize is default */,
+            u64 expanderWeight = 7,
+            u64 accumulatorSize = 16,
+            bool systematic = true,
+            block seed = block(99999, 88888))
+        {
+            if (codeSize == 0)
+                codeSize = 2 * messageSize;
+
+            if (accumulatorSize % 8)
+                throw std::runtime_error("ExConvCode accumulator size must be a multiple of 8." LOCATION);
+
+            mSeed = seed;
+            mMessageSize = messageSize;
+            mCodeSize = codeSize;
+            mAccumulatorSize = accumulatorSize;
+            mSystematic = systematic;
+            mExpander.config(messageSize, codeSize - messageSize * systematic, expanderWeight, seed ^ CCBlock);
+        }
+
+        // the seed that generates the code.
+        block mSeed = ZeroBlock;
+
+        // The message size of the code. K.
+        u64 mMessageSize = 0;
+
+        // The codeword size of the code. n.
+        u64 mCodeSize = 0;
+
+        // The size of the accumulator.
+        u64 mAccumulatorSize = 0;
+
+        // is the code systematic (true=faster)
+        bool mSystematic = true;
+
+        // return n-k. code size n, message size k. 
+        u64 parityRows() const { return mCodeSize - mMessageSize; }
+
+        // return code size n.
+        u64 parityCols() const { return mCodeSize; }
+
+        // return message size k.
+        u64 generatorRows() const { return mMessageSize; }
+
+        // return code size n.
+        u64 generatorCols() const { return mCodeSize; }
+
+        // Compute w = G * e. e will be modified in the computation.
+        template<typename T>
+        void dualEncode(span<T> e, span<T> w);
+
+        // Compute e[0,...,k-1] = G * e.
+        template<typename T>
+        void dualEncode(span<T> e);
+
+
+        // Compute e[0,...,k-1] = G * e.
+        template<typename T0, typename T1>
+        void dualEncode2(span<T0> e0, span<T1> e1);
+
+        // get the expander matrix
+        SparseMtx getB() const;
+
+        // Get the parity check version of the accumulator
+        SparseMtx getAPar() const;
+
+        // get the accumulator matrix
+        SparseMtx getA() const;
+
+        // Private functions ------------------------------------
+
+        // generate the point list for accumulating row i.
+        void accOne(
+            PointList& pl,
+            u64 i,
+            u8* __restrict& ptr,
+            PRNG& prng,
+            block& rnd,
+            u64& q,
+            u64 qe,
+            u64 size) const;
+
+        // accumulating row i.
+        template<typename T, bool rangeCheck, int width>
+        void accOne(
+            T* __restrict xx,
+            u64 i,
+            u8*& ptr,
+            PRNG& prng,
+            u64& q,
+            u64 qe,
+            u64 size);
+
+
+        // accumulating row i.
+        template<typename T0, typename T1, bool rangeCheck, int width>
+        void accOne(
+            T0* __restrict xx0,
+            T1* __restrict xx1,
+            u64 i,
+            u8*& ptr,
+            PRNG& prng,
+            u64& q,
+            u64 qe,
+            u64 size);
+
+
+        // accumulate x onto itself.
+        template<typename T>
+        void accumulate(span<T> x);
+
+
+        // accumulate x onto itself.
+        template<typename T0,typename T1>
+        void accumulate(span<T0> x0, span<T1> x1);
+    };
+}
+
+#endif
\ No newline at end of file
diff --git a/libOTe/Tools/ExConvCodeOld/ExpanderOld.h b/libOTe/Tools/ExConvCodeOld/ExpanderOld.h
new file mode 100644
index 0000000..a9717c3
--- /dev/null
+++ b/libOTe/Tools/ExConvCodeOld/ExpanderOld.h
@@ -0,0 +1,527 @@
+// � 2023 Peter Rindal.
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+// 
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#pragma once
+
+#include "cryptoTools/Common/Defines.h"
+#include "libOTe/Tools/LDPC/Mtx.h"
+#include "libOTe/Tools/EACode/Util.h"
+#include "cryptoTools/Common/Range.h"
+#ifdef LIBOTE_ENABLE_OLD_EXCONV
+
+
+namespace osuCrypto
+{
+
+    // The encoder for the expander matrix B.
+    // B has mMessageSize rows and mCodeSize columns. It is sampled uniformly
+    // with fixed row weight mExpanderWeight.
+    class ExpanderCodeOld
+    {
+    public:
+
+        void config(
+            u64 messageSize,
+            u64 codeSize = 0 /* default is 5* messageSize */,
+            u64 expanderWeight = 21,
+            block seed = block(33333, 33333))
+        {
+            mMessageSize = messageSize;
+            mCodeSize = codeSize;
+            mExpanderWeight = expanderWeight;
+            mSeed = seed;
+
+        }
+
+        // the seed that generates the code.
+        block mSeed = block(0, 0);
+
+        // The message size of the code. K.
+        u64 mMessageSize = 0;
+
+        // The codeword size of the code. n.
+        u64 mCodeSize = 0;
+
+        // The row weight of the B matrix.
+        u64 mExpanderWeight = 0;
+
+        u64 parityRows() const { return mCodeSize - mMessageSize; }
+        u64 parityCols() const { return mCodeSize; }
+
+        u64 generatorRows() const { return mMessageSize; }
+        u64 generatorCols() const { return mCodeSize; }
+
+
+
+        template<typename T, u64 count>
+        typename std::enable_if<(count > 1), T>::type
+            expandOne(const T* __restrict ee, detail::ExpanderModd& prng)const;
+
+        template<typename T, typename T2, u64 count, bool Add>
+        typename std::enable_if<(count > 1)>::type
+            expandOne(
+                const T* __restrict ee1,
+                const T2* __restrict ee2,
+                T* __restrict y1,
+                T2* __restrict y2,
+                detail::ExpanderModd& prng)const;
+
+        template<typename T, u64 count>
+        typename std::enable_if<count == 1, T>::type
+            expandOne(const T* __restrict ee, detail::ExpanderModd& prng) const;
+
+        template<typename T, typename T2, u64 count, bool Add>
+        typename std::enable_if<count == 1>::type
+            expandOne(
+                const T* __restrict ee1,
+                const T2* __restrict ee2,
+                T* __restrict y1,
+                T2* __restrict y2,
+                detail::ExpanderModd& prng) const;
+
+        template<typename T, bool Add = false>
+        void expand(
+            span<const T> e,
+            span<T> w) const;
+
+        template<typename T, typename T2, bool Add>
+        void expand(
+            span<const T> e1,
+            span<const T2> e2,
+            span<T> w1,
+            span<T2> w2
+        ) const;
+
+        SparseMtx getB() const;
+
+    };
+
+
+    template<typename T, u64 count>
+    typename std::enable_if<count == 1, T>::type
+        ExpanderCodeOld::expandOne(const T* __restrict ee, detail::ExpanderModd& prng) const
+    {
+        auto r = prng.get();
+        return ee[r];
+    }
+
+    template<typename T, typename T2, u64 count, bool Add>
+    typename std::enable_if<count == 1>::type
+        ExpanderCodeOld::expandOne(
+            const T* __restrict ee1,
+            const T2* __restrict ee2,
+            T* __restrict y1,
+            T2* __restrict y2,
+            detail::ExpanderModd& prng) const
+    {
+        auto r = prng.get();
+
+        if (Add)
+        {
+            *y1 = *y1 ^ ee1[r];
+            *y2 = *y2 ^ ee2[r];
+        }
+        else
+        {
+
+            *y1 = ee1[r];
+            *y2 = ee2[r];
+        }
+    }
+
+
+    template<typename T, u64 count>
+    OC_FORCEINLINE typename std::enable_if<(count > 1), T>::type
+        ExpanderCodeOld::expandOne(const T* __restrict ee, detail::ExpanderModd& prng)const
+    {
+        if constexpr (count >= 8)
+        {
+            u64 rr[8];
+            T w[8];
+            rr[0] = prng.get();
+            rr[1] = prng.get();
+            rr[2] = prng.get();
+            rr[3] = prng.get();
+            rr[4] = prng.get();
+            rr[5] = prng.get();
+            rr[6] = prng.get();
+            rr[7] = prng.get();
+
+            w[0] = ee[rr[0]];
+            w[1] = ee[rr[1]];
+            w[2] = ee[rr[2]];
+            w[3] = ee[rr[3]];
+            w[4] = ee[rr[4]];
+            w[5] = ee[rr[5]];
+            w[6] = ee[rr[6]];
+            w[7] = ee[rr[7]];
+
+            auto ww =
+                w[0] ^
+                w[1] ^
+                w[2] ^
+                w[3] ^
+                w[4] ^
+                w[5] ^
+                w[6] ^
+                w[7];
+
+            if constexpr (count > 8)
+                ww = ww ^ expandOne<T, count - 8>(ee, prng);
+            return ww;
+        }
+        else
+        {
+
+            auto r = prng.get();
+            auto ww = expandOne<T, count - 1>(ee, prng);
+            return ww ^ ee[r];
+        }
+    }
+
+
+    template<typename T, typename T2, u64 count, bool Add>
+    OC_FORCEINLINE typename std::enable_if<(count > 1)>::type
+        ExpanderCodeOld::expandOne(
+            const T* __restrict ee1,
+            const T2* __restrict ee2,
+            T* __restrict y1,
+            T2* __restrict y2,
+            detail::ExpanderModd& prng)const
+    {
+        if constexpr (count >= 8)
+        {
+            u64 rr[8];
+            T w1[8];
+            T2 w2[8];
+            rr[0] = prng.get();
+            rr[1] = prng.get();
+            rr[2] = prng.get();
+            rr[3] = prng.get();
+            rr[4] = prng.get();
+            rr[5] = prng.get();
+            rr[6] = prng.get();
+            rr[7] = prng.get();
+
+            w1[0] = ee1[rr[0]];
+            w1[1] = ee1[rr[1]];
+            w1[2] = ee1[rr[2]];
+            w1[3] = ee1[rr[3]];
+            w1[4] = ee1[rr[4]];
+            w1[5] = ee1[rr[5]];
+            w1[6] = ee1[rr[6]];
+            w1[7] = ee1[rr[7]];
+
+            w2[0] = ee2[rr[0]];
+            w2[1] = ee2[rr[1]];
+            w2[2] = ee2[rr[2]];
+            w2[3] = ee2[rr[3]];
+            w2[4] = ee2[rr[4]];
+            w2[5] = ee2[rr[5]];
+            w2[6] = ee2[rr[6]];
+            w2[7] = ee2[rr[7]];
+
+            auto ww1 =
+                w1[0] ^
+                w1[1] ^
+                w1[2] ^
+                w1[3] ^
+                w1[4] ^
+                w1[5] ^
+                w1[6] ^
+                w1[7];
+            auto ww2 =
+                w2[0] ^
+                w2[1] ^
+                w2[2] ^
+                w2[3] ^
+                w2[4] ^
+                w2[5] ^
+                w2[6] ^
+                w2[7];
+
+            if constexpr (count > 8)
+            {
+                T yy1;
+                T2 yy2;
+                expandOne<T, T2, count - 8, false>(ee1, ee2, &yy1, &yy2, prng);
+                ww1 = ww1 ^ yy1;
+                ww2 = ww2 ^ yy2;
+            }
+
+            if constexpr (Add)
+            {
+                *y1 = *y1 ^ ww1;
+                *y2 = *y2 ^ ww2;
+            }
+            else
+            {
+                *y1 = ww1;
+                *y2 = ww2;
+            }
+
+        }
+        else
+        {
+
+            auto r = prng.get();
+            if constexpr (Add)
+            {
+                auto w1 = ee1[r];
+                auto w2 = ee2[r];
+                expandOne<T, T2, count - 1, true>(ee1, ee2, y1, y2, prng);
+                *y1 = *y1 ^ w1;
+                *y2 = *y2 ^ w2;
+
+            }
+            else
+            {
+
+                T yy1;
+                T2 yy2;
+                expandOne<T, T2, count - 1, false>(ee1, ee2, &yy1, &yy2, prng);
+                *y1 = ee1[r] ^ yy1;
+                *y2 = ee2[r] ^ yy2;
+            }
+        }
+    }
+
+
+
+    template<typename T, bool Add>
+    void ExpanderCodeOld::expand(
+        span<const T> e,
+        span<T> w) const
+    {
+        assert(w.size() == mMessageSize);
+        assert(e.size() == mCodeSize);
+        detail::ExpanderModd prng(mSeed, mCodeSize);
+
+        const T* __restrict  ee = e.data();
+        T* __restrict  ww = w.data();
+
+        auto main = mMessageSize / 8 * 8;
+        u64 i = 0;
+
+        for (; i < main; i += 8)
+        {
+#define CASE(I) \
+                case I:\
+                if constexpr(Add)\
+                {\
+                    ww[i + 0] = ww[i + 0] ^ expandOne<T, I>(ee, prng);\
+                    ww[i + 1] = ww[i + 1] ^ expandOne<T, I>(ee, prng);\
+                    ww[i + 2] = ww[i + 2] ^ expandOne<T, I>(ee, prng);\
+                    ww[i + 3] = ww[i + 3] ^ expandOne<T, I>(ee, prng);\
+                    ww[i + 4] = ww[i + 4] ^ expandOne<T, I>(ee, prng);\
+                    ww[i + 5] = ww[i + 5] ^ expandOne<T, I>(ee, prng);\
+                    ww[i + 6] = ww[i + 6] ^ expandOne<T, I>(ee, prng);\
+                    ww[i + 7] = ww[i + 7] ^ expandOne<T, I>(ee, prng);\
+                }\
+                else\
+                {\
+                    ww[i + 0] = expandOne<T, I>(ee, prng);\
+                    ww[i + 1] = expandOne<T, I>(ee, prng);\
+                    ww[i + 2] = expandOne<T, I>(ee, prng);\
+                    ww[i + 3] = expandOne<T, I>(ee, prng);\
+                    ww[i + 4] = expandOne<T, I>(ee, prng);\
+                    ww[i + 5] = expandOne<T, I>(ee, prng);\
+                    ww[i + 6] = expandOne<T, I>(ee, prng);\
+                    ww[i + 7] = expandOne<T, I>(ee, prng);\
+                }\
+                break
+
+            switch (mExpanderWeight)
+            {
+                CASE(5);
+                CASE(7);
+                CASE(9);
+                CASE(11);
+                CASE(21);
+                CASE(40);
+            default:
+                for (u64 jj = 0; jj < 8; ++jj)
+                {
+                    auto r = prng.get();
+                    auto wv = ee[r];
+
+                    for (auto j = 1ull; j < mExpanderWeight; ++j)
+                    {
+                        r = prng.get();
+                        wv = wv ^ ee[r];
+                    }
+                    if constexpr (Add)
+                        ww[i + jj] = ww[i + jj] ^ wv;
+                    else
+                        ww[i + jj] = wv;
+
+                }
+            }
+#undef CASE
+        }
+
+        for (; i < mMessageSize; ++i)
+        {
+            auto wv = ee[prng.get()];
+            for (auto j = 1ull; j < mExpanderWeight; ++j)
+                wv = wv ^ ee[prng.get()];
+
+            if constexpr (Add)
+                ww[i] = ww[i] ^ wv;
+            else
+                ww[i] = wv;
+        }
+    }
+
+
+
+    template<typename T, typename T2, bool Add>
+    void ExpanderCodeOld::expand(
+        span<const T> e1,
+        span<const T2> e2,
+        span<T> w1,
+        span<T2> w2
+    ) const
+    {
+        assert(w1.size() == mMessageSize);
+        assert(w2.size() == mMessageSize);
+        assert(e1.size() == mCodeSize);
+        assert(e2.size() == mCodeSize);
+        detail::ExpanderModd prng(mSeed, mCodeSize);
+
+        const T* __restrict  ee1 = e1.data();
+        const T2* __restrict  ee2 = e2.data();
+        T* __restrict  ww1 = w1.data();
+        T2* __restrict  ww2 = w2.data();
+
+        auto main = mMessageSize / 8 * 8;
+        u64 i = 0;
+
+        for (; i < main; i += 8)
+        {
+#define CASE(I) \
+                case I:\
+                expandOne<T, T2, I, Add>(ee1, ee2, &ww1[i + 0], &ww2[i + 0], prng);\
+                expandOne<T, T2, I, Add>(ee1, ee2, &ww1[i + 1], &ww2[i + 1], prng);\
+                expandOne<T, T2, I, Add>(ee1, ee2, &ww1[i + 2], &ww2[i + 2], prng);\
+                expandOne<T, T2, I, Add>(ee1, ee2, &ww1[i + 3], &ww2[i + 3], prng);\
+                expandOne<T, T2, I, Add>(ee1, ee2, &ww1[i + 4], &ww2[i + 4], prng);\
+                expandOne<T, T2, I, Add>(ee1, ee2, &ww1[i + 5], &ww2[i + 5], prng);\
+                expandOne<T, T2, I, Add>(ee1, ee2, &ww1[i + 6], &ww2[i + 6], prng);\
+                expandOne<T, T2, I, Add>(ee1, ee2, &ww1[i + 7], &ww2[i + 7], prng);\
+                break
+
+            switch (mExpanderWeight)
+            {
+                CASE(5);
+                CASE(7);
+                CASE(9);
+                CASE(11);
+                CASE(21);
+                CASE(40);
+            default:
+                for (u64 jj = 0; jj < 8; ++jj)
+                {
+                    auto r = prng.get();
+                    auto wv1 = ee1[r];
+                    auto wv2 = ee2[r];
+
+                    for (auto j = 1ull; j < mExpanderWeight; ++j)
+                    {
+                        r = prng.get();
+                        wv1 = wv1 ^ ee1[r];
+                        wv2 = wv2 ^ ee2[r];
+                    }
+                    if constexpr (Add)
+                    {
+                        ww1[i + jj] = ww1[i + jj] ^ wv1;
+                        ww2[i + jj] = ww2[i + jj] ^ wv2;
+                    }
+                    else
+                    {
+
+                        ww1[i + jj] = wv1;
+                        ww2[i + jj] = wv2;
+                    }
+                }
+            }
+#undef CASE
+        }
+
+        for (; i < mMessageSize; ++i)
+        {
+            auto r = prng.get();
+            auto wv1 = ee1[r];
+            auto wv2 = ee2[r];
+            for (auto j = 1ull; j < mExpanderWeight; ++j)
+            {
+                r = prng.get();
+                wv1 = wv1 ^ ee1[r];
+                wv2 = wv2 ^ ee2[r];
+
+            }
+            if constexpr (Add)
+            {
+                ww1[i] = ww1[i] ^ wv1;
+                ww2[i] = ww2[i] ^ wv2;
+            }
+            else
+            {
+                ww1[i] = wv1;
+                ww2[i] = wv2;
+            }
+        }
+    }
+
+    inline SparseMtx ExpanderCodeOld::getB() const
+    {
+        //PRNG prng(mSeed);
+        detail::ExpanderModd prng(mSeed, mCodeSize);
+        PointList points(mMessageSize, mCodeSize);
+
+        std::vector<u64> row(mExpanderWeight);
+
+        {
+
+            for (auto i : rng(mMessageSize))
+            {
+                row[0] = prng.get();
+                //points.push_back(i, row[0]);
+                for (auto j : rng(1, mExpanderWeight))
+                {
+                    //do {
+                    row[j] = prng.get();
+                    //} while
+                    auto iter = std::find(row.data(), row.data() + j, row[j]);
+                    if (iter != row.data() + j)
+                    {
+                        row[j] = ~0ull;
+                        *iter = ~0ull;
+                    }
+                    //throw RTE_LOC;
+
+                }
+                for (auto j : rng(mExpanderWeight))
+                {
+
+                    if (row[j] != ~0ull)
+                    {
+                        //std::cout << row[j] << " ";
+                        points.push_back(i, row[j]);
+                    }
+                    else
+                    {
+                        //std::cout << "* ";
+                    }
+                }
+                //std::cout << std::endl;
+            }
+        }
+
+        return points;
+    }
+}
+#endif // LIBOTE_ENABLE_OLD_EXCONV
diff --git a/libOTe/Tools/Pprf/RegularPprf.h b/libOTe/Tools/Pprf/RegularPprf.h
index 439150a..9a6a1e0 100644
--- a/libOTe/Tools/Pprf/RegularPprf.h
+++ b/libOTe/Tools/Pprf/RegularPprf.h
@@ -319,15 +319,17 @@ namespace osuCrypto
             ctx.zero(leafSums[0].begin(), leafSums[0].end());
             ctx.zero(leafSums[1].begin(), leafSums[1].end());
 
+            auto outIter = leafLevel.begin() + leafOffset;
+
             // for the leaf nodes we need to hash both children.
-            for (u64 parentIdx = 0, outIdx = leafOffset, childIdx = 0; parentIdx < width; ++parentIdx)
+            for (u64 parentIdx = 0, childIdx = 0; parentIdx < width; ++parentIdx)
             {
                 // The value of the parent.
                 auto& parent = level0.data()[parentIdx];
 
                 // The bit that indicates if we are on the left child (0)
                 // or on the right child (1).
-                for (u64 keep = 0; keep < 2; ++keep, ++childIdx, outIdx += 8)
+                for (u64 keep = 0; keep < 2; ++keep, ++childIdx)
                 {
                     // The child that we will write in this iteration.
 
@@ -337,27 +339,30 @@ namespace osuCrypto
                     //    H(x) = (AES(k0, x) + x) || (AES(k1, x) + x);
                     //
                     // where each half defines one of the children.
-                    gGgmAes[keep].hashBlocks<8>(parent.data(), child.data());
+                    gGgmAes.data()[keep].hashBlocks<8>(parent.data(), child.data());
 
-                    ctx.fromBlock(leafLevel[outIdx + 0], child[0]);
-                    ctx.fromBlock(leafLevel[outIdx + 1], child[1]);
-                    ctx.fromBlock(leafLevel[outIdx + 2], child[2]);
-                    ctx.fromBlock(leafLevel[outIdx + 3], child[3]);
-                    ctx.fromBlock(leafLevel[outIdx + 4], child[4]);
-                    ctx.fromBlock(leafLevel[outIdx + 5], child[5]);
-                    ctx.fromBlock(leafLevel[outIdx + 6], child[6]);
-                    ctx.fromBlock(leafLevel[outIdx + 7], child[7]);
+                    ctx.fromBlock(*(outIter + 0), child.data()[0]);
+                    ctx.fromBlock(*(outIter + 1), child.data()[1]);
+                    ctx.fromBlock(*(outIter + 2), child.data()[2]);
+                    ctx.fromBlock(*(outIter + 3), child.data()[3]);
+                    ctx.fromBlock(*(outIter + 4), child.data()[4]);
+                    ctx.fromBlock(*(outIter + 5), child.data()[5]);
+                    ctx.fromBlock(*(outIter + 6), child.data()[6]);
+                    ctx.fromBlock(*(outIter + 7), child.data()[7]);
 
                     // leafSum += child
                     auto& leafSum = leafSums[keep];
-                    ctx.plus(leafSum[0], leafSum[0], leafLevel[outIdx + 0]);
-                    ctx.plus(leafSum[1], leafSum[1], leafLevel[outIdx + 1]);
-                    ctx.plus(leafSum[2], leafSum[2], leafLevel[outIdx + 2]);
-                    ctx.plus(leafSum[3], leafSum[3], leafLevel[outIdx + 3]);
-                    ctx.plus(leafSum[4], leafSum[4], leafLevel[outIdx + 4]);
-                    ctx.plus(leafSum[5], leafSum[5], leafLevel[outIdx + 5]);
-                    ctx.plus(leafSum[6], leafSum[6], leafLevel[outIdx + 6]);
-                    ctx.plus(leafSum[7], leafSum[7], leafLevel[outIdx + 7]);
+                    ctx.plus(leafSum.data()[0], leafSum.data()[0], *(outIter + 0));
+                    ctx.plus(leafSum.data()[1], leafSum.data()[1], *(outIter + 1));
+                    ctx.plus(leafSum.data()[2], leafSum.data()[2], *(outIter + 2));
+                    ctx.plus(leafSum.data()[3], leafSum.data()[3], *(outIter + 3));
+                    ctx.plus(leafSum.data()[4], leafSum.data()[4], *(outIter + 4));
+                    ctx.plus(leafSum.data()[5], leafSum.data()[5], *(outIter + 5));
+                    ctx.plus(leafSum.data()[6], leafSum.data()[6], *(outIter + 6));
+                    ctx.plus(leafSum.data()[7], leafSum.data()[7], *(outIter + 7));
+
+                    outIter+= 8;
+                    assert(outIter <= leafLevel.end());
                 }
 
             }
@@ -900,13 +905,14 @@ namespace osuCrypto
                         ctx.copy(leafSums[k][i], leafSums[k][0]);
                 }
 
+                auto outIter = leafLevel.begin() + outputOffset;
                 // for leaf nodes both children should be hashed.
-                for (u64 parentIdx = 0, childIdx = 0, outputIdx = outputOffset; parentIdx < width; ++parentIdx)
+                for (u64 parentIdx = 0, childIdx = 0; parentIdx < width; ++parentIdx)
                 {
                     // The value of the parent.
-                    auto parent = level0[parentIdx];
+                    auto parent = level0.data()[parentIdx];
 
-                    for (u64 keep = 0; keep < 2; ++keep, ++childIdx, outputIdx += 8)
+                    for (u64 keep = 0; keep < 2; ++keep, ++childIdx)
                     {
                         // Each parent is expanded into the left and right children
                         // using a different AES fixed-key. Therefore our OWF is:
@@ -914,26 +920,29 @@ namespace osuCrypto
                         //    H(x) = (AES(k0, x) + x) || (AES(k1, x) + x);
                         //
                         // where each half defines one of the children.
-                        gGgmAes[keep].hashBlocks<8>(parent.data(), child.data());
+                        gGgmAes.data()[keep].hashBlocks<8>(parent.data(), child.data());
 
-                        ctx.fromBlock(leafLevel[outputIdx + 0], child[0]);
-                        ctx.fromBlock(leafLevel[outputIdx + 1], child[1]);
-                        ctx.fromBlock(leafLevel[outputIdx + 2], child[2]);
-                        ctx.fromBlock(leafLevel[outputIdx + 3], child[3]);
-                        ctx.fromBlock(leafLevel[outputIdx + 4], child[4]);
-                        ctx.fromBlock(leafLevel[outputIdx + 5], child[5]);
-                        ctx.fromBlock(leafLevel[outputIdx + 6], child[6]);
-                        ctx.fromBlock(leafLevel[outputIdx + 7], child[7]);
+                        ctx.fromBlock(*(outIter + 0), child.data()[0]);
+                        ctx.fromBlock(*(outIter + 1), child.data()[1]);
+                        ctx.fromBlock(*(outIter + 2), child.data()[2]);
+                        ctx.fromBlock(*(outIter + 3), child.data()[3]);
+                        ctx.fromBlock(*(outIter + 4), child.data()[4]);
+                        ctx.fromBlock(*(outIter + 5), child.data()[5]);
+                        ctx.fromBlock(*(outIter + 6), child.data()[6]);
+                        ctx.fromBlock(*(outIter + 7), child.data()[7]);
 
                         auto& leafSum = leafSums[keep];
-                        ctx.plus(leafSum[0], leafSum[0], leafLevel[outputIdx + 0]);
-                        ctx.plus(leafSum[1], leafSum[1], leafLevel[outputIdx + 1]);
-                        ctx.plus(leafSum[2], leafSum[2], leafLevel[outputIdx + 2]);
-                        ctx.plus(leafSum[3], leafSum[3], leafLevel[outputIdx + 3]);
-                        ctx.plus(leafSum[4], leafSum[4], leafLevel[outputIdx + 4]);
-                        ctx.plus(leafSum[5], leafSum[5], leafLevel[outputIdx + 5]);
-                        ctx.plus(leafSum[6], leafSum[6], leafLevel[outputIdx + 6]);
-                        ctx.plus(leafSum[7], leafSum[7], leafLevel[outputIdx + 7]);
+                        ctx.plus(leafSum.data()[0], leafSum.data()[0], *(outIter + 0));
+                        ctx.plus(leafSum.data()[1], leafSum.data()[1], *(outIter + 1));
+                        ctx.plus(leafSum.data()[2], leafSum.data()[2], *(outIter + 2));
+                        ctx.plus(leafSum.data()[3], leafSum.data()[3], *(outIter + 3));
+                        ctx.plus(leafSum.data()[4], leafSum.data()[4], *(outIter + 4));
+                        ctx.plus(leafSum.data()[5], leafSum.data()[5], *(outIter + 5));
+                        ctx.plus(leafSum.data()[6], leafSum.data()[6], *(outIter + 6));
+                        ctx.plus(leafSum.data()[7], leafSum.data()[7], *(outIter + 7));
+
+                        outIter += 8;
+                        assert(outIter <= leafLevel.end());
                     }
                 }
             }
diff --git a/libOTe/Tools/TungstenCode/TungstenCode.h b/libOTe/Tools/TungstenCode/TungstenCode.h
index 28d90b9..1e91d63 100644
--- a/libOTe/Tools/TungstenCode/TungstenCode.h
+++ b/libOTe/Tools/TungstenCode/TungstenCode.h
@@ -59,12 +59,19 @@ namespace osuCrypto {
                 assert(mPermIter < mPerm.data() + mPerm.size());
                 auto dst = output + (*(u32 * __restrict)mPermIter * chunkSize);
                 ++mPermIter;
-
+                //if ((u64)output % std::hardware_destructive_interference_size != 0)
+                //    throw std::runtime_error(LOCATION);
+                //if((u64)dst % std::hardware_destructive_interference_size != 0)
+                //    throw std::runtime_error(LOCATION);
+                //if((u64)x % std::hardware_destructive_interference_size != 0)
+                //    throw std::runtime_error(LOCATION);
+                //__assume((u64)x % std::hardware_destructive_interference_size == 0);
+                //__assume((u64)dst % std::hardware_destructive_interference_size == 0);
                 ctx.copy(x, x + chunkSize, dst);
             }
 
 
-            void skip(u64 i) 
+            void skip(u64 i)
             {
                 assert(i % chunkSize == 0);
                 mPermIter += i / chunkSize;
@@ -121,6 +128,10 @@ namespace osuCrypto {
         struct TungstenCode
         {
             static const u64 ChunkSize = 8;
+            using Table = TableTungsten1024x4;
+            //static const u64 ChunkSize = 16;
+            //using Table = TableTungsten128x4;
+
             TungstenPerm<ChunkSize> mPerm;
 
             u64 mMessageSize = 0;
@@ -132,7 +143,7 @@ namespace osuCrypto {
             void config(u64 messageSize, u64 codeSize, block seed = block(452345234, 6756754363))
             {
                 if (messageSize % ChunkSize)
-                    throw std::runtime_error("messageSize must be a multiple of ChunkSize. " LOCATION);
+                    throw std::runtime_error("messageSize "+std::to_string(messageSize) + " must be a multiple of ChunkSize "+std::to_string(ChunkSize) + ". " LOCATION);
                 if (codeSize % ChunkSize)
                     throw std::runtime_error("codeSize must be a multiple of ChunkSize. " LOCATION);
 
@@ -245,20 +256,34 @@ namespace osuCrypto {
                             }
                             else
                             {
-                                auto xi = x + i;
 
-                                auto xs = xi + Table::max + 1;
-                                auto x0 = xi + table[j].data()[0];
-                                auto x1 = xi + table[j].data()[1];
-                                auto x2 = xi + table[j].data()[2];
-                                auto x3 = xi + table[j].data()[3];
-
-                                ctx.plus(*xs, *xs, *xi);
-                                ctx.plus(*x0, *x0, *xi);
-                                ctx.plus(*x1, *x1, *xi);
-                                ctx.plus(*x2, *x2, *xi);
-                                ctx.plus(*x3, *x3, *xi);
-                                ctx.mulConst(*xs, *xs);
+                                auto xiPtr = (x + i);
+                                auto xsPtr = (xiPtr + Table::max + 1);
+                                auto x0Ptr = (xiPtr + table[j].data()[0]);
+                                auto x1Ptr = (xiPtr + table[j].data()[1]);
+                                auto x2Ptr = (xiPtr + table[j].data()[2]);
+                                auto x3Ptr = (xiPtr + table[j].data()[3]);
+
+                                auto xi = *xiPtr;
+                                auto xs = *xsPtr;
+                                auto x0 = *x0Ptr;
+                                auto x1 = *x1Ptr;
+                                auto x2 = *x2Ptr;
+                                auto x3 = *x3Ptr;
+
+                                ctx.plus(xs, xs, xi);
+                                ctx.plus(x0, x0, xi);
+                                ctx.plus(x1, x1, xi);
+                                ctx.plus(x2, x2, xi);
+                                ctx.plus(x3, x3, xi);
+                                ctx.mulConst(xs, xs);
+
+
+                                ctx.copy(*xsPtr, xs);
+                                ctx.copy(*x0Ptr, x0);
+                                ctx.copy(*x1Ptr, x1);
+                                ctx.copy(*x2Ptr, x2);
+                                ctx.copy(*x3Ptr, x3);
 
                             }
                         }
@@ -274,6 +299,93 @@ namespace osuCrypto {
                         break;
                 }
             }
+            //
+            //
+            //            template<
+            //                typename Table,
+            //                typename F,
+            //                bool rangeCheck,
+            //                typename OutputMap,
+            //                typename CoeffCtx,
+            //                typename Iter
+            //            >
+            //            void accumulateBlockGather(
+            //                Iter x,
+            //                u64 i,
+            //                Iter dst,
+            //                u64 size,
+            //                OutputMap& output,
+            //                CoeffCtx& ctx)
+            //            {
+            //
+            //                //static constexpr int chunkSize = OutputMap::chunkSize;
+            //                static_assert(Table::data.size() % ChunkSize == 0);
+            //                auto table = Table::data.data();
+            //
+            //                for (u64 j = 0; j < Table::data.size();)
+            //                {
+            //#ifdef ENABLE_SSE
+            //                    if (rangeCheck == false || i + Table::data.size() * 2 < size)
+            //                        _mm_prefetch((char*)(x + i + Table::data.size() * 2), _MM_HINT_T0);
+            //#endif
+            //
+            //                    for (u64 k = 0; k < ChunkSize; ++k, ++j, ++i)
+            //                    {
+            //
+            //                        if constexpr (Table::data[0].size() == 4)
+            //                        {
+            //                            if constexpr (rangeCheck)
+            //                            {
+            //                                if (i == size)
+            //                                    return;
+            //
+            //                                auto xi = x + i;
+            //                                auto xs = x + ((i + Table::max + 1) % size);
+            //                                ctx.plus(*xs, *xs, *xi);
+            //                                ctx.mulConst(*xs, *xs);
+            //
+            //                                for (u64 p = 0; p < Table::data[0].size(); ++p)
+            //                                {
+            //                                    auto idx = (i + table[j].data()[p]) % size;
+            //                                    if (idx != i)
+            //                                    {
+            //                                        auto xi = x + i;
+            //                                        auto xp = x + idx;
+            //                                        ctx.plus(*xp, *xp, *xi);
+            //                                    }
+            //                                }
+            //                            }
+            //                            else
+            //                            {
+            //                                auto xi = x + i;
+            //
+            //                                auto xs = xi + Table::max + 1;
+            //                                auto x0 = xi + table[j].data()[0];
+            //                                auto x1 = xi + table[j].data()[1];
+            //                                auto x2 = xi + table[j].data()[2];
+            //                                auto x3 = xi + table[j].data()[3];
+            //
+            //                                ctx.plus(*xs, *xs, *xi);
+            //                                ctx.plus(*x0, *x0, *xi);
+            //                                ctx.plus(*x1, *x1, *xi);
+            //                                ctx.plus(*x2, *x2, *xi);
+            //                                ctx.plus(*x3, *x3, *xi);
+            //                                ctx.mulConst(*xs, *xs);
+            //
+            //                            }
+            //                        }
+            //                        else
+            //                        {
+            //                            throw RTE_LOC;
+            //                        }
+            //                    }
+            //
+            //                    output.template applyChunk<F>(dst, x + (i - ChunkSize), ctx);
+            //
+            //                    if (rangeCheck && i >= size)
+            //                        break;
+            //                }
+            //            }
 
 
             template<typename F,
@@ -287,47 +399,90 @@ namespace osuCrypto {
                 OutputMap& map,
                 CoeffCtx& ctx)
             {
+                bool eager = true;
+                if (eager)
+                {
 
-                using Table = TableTungsten1024x4;
 
-                u64 main = std::max<i64>(size / Table::data.size() - 1, 0) * Table::data.size();
-                u64 i = 0;
-                map.reset();
+                    u64 main = std::max<i64>(size / Table::data.size() - 1, 0) * Table::data.size();
+                    u64 i = 0;
+                    map.reset();
 
-                // for the first iteration, the last accumulateBlock
-                // will wrap anmd change its value. We therefore can't
-                // yet map the output for this part. We do this at the end.
-                while (i <= Table::max)
+                    // for the first iteration, the last accumulateBlock
+                    // will wrap anmd change its value. We therefore can't
+                    // yet map the output for this part. We do this at the end.
+                    while (i <= Table::max)
+                    {
+                        TungstenNoop noop;
+                        if (i < main)
+                            accumulateBlock<Table, F, false>(input, i, output, size, noop, ctx);
+                        else
+                            accumulateBlock<Table, F, true>(input, i, output, size, noop, ctx);
+                        i += Table::data.size();
+                    }
+                    map.skip(i);
+
+                    // accumulate and map. no range check required.
+                    for (; i < main; i += Table::data.size())
+                    {
+                        accumulateBlock<Table, F, false>(input, i, output, size, map, ctx);
+                    }
+
+                    // last iteration or two requires range checking.
+                    for (; i < size; i += Table::data.size())
+                    {
+                        accumulateBlock<Table, F, true>(input, i, output, size, map, ctx);
+                    }
+
+                    // map the missing blocks at the start.
+                    map.reset();
+                    i = 0;
+                    auto end = std::min<u64>(Table::max, size);
+                    while (i < end)
+                    {
+                        map.template applyChunk<F>(output, input + i, ctx);
+                        i += ChunkSize;
+                    }
+                }
+                else
                 {
                     TungstenNoop noop;
-                    if (i < main)
-                        accumulateBlock<Table, F, false>(input, i, output, size, noop, ctx);
-                    else
-                        accumulateBlock<Table, F, true>(input, i, output, size, noop, ctx);
-                    i += Table::data.size();
-                }
-                map.skip(i);
+                    u64 main = std::max<i64>(size / Table::data.size() - 1, 0) * Table::data.size();
+                    u64 i = 0;
+                    map.reset();
+
+                    // for the first iteration, the last accumulateBlock
+                    // will wrap anmd change its value. We therefore can't
+                    // yet map the output for this part. We do this at the end.
+                    while (i <= Table::max)
+                    {
+                        if (i < main)
+                            accumulateBlock<Table, F, false>(input, i, output, size, noop, ctx);
+                        else
+                            accumulateBlock<Table, F, true>(input, i, output, size, noop, ctx);
+                        i += Table::data.size();
+                    }
 
-                // accumulate and map. no range check required.
-                for (; i < main; i += Table::data.size())
-                {
-                    accumulateBlock<Table, F, false>(input, i, output, size, map, ctx);
-                }
+                    // accumulate and map. no range check required.
+                    for (; i < main; i += Table::data.size())
+                    {
+                        accumulateBlock<Table, F, false>(input, i, output, size, noop, ctx);
+                    }
 
-                // last iteration or two requires range checking.
-                for (; i < size; i += Table::data.size())
-                {
-                    accumulateBlock<Table, F, true>(input, i, output, size, map, ctx);
-                }
+                    // last iteration or two requires range checking.
+                    for (; i < size; i += Table::data.size())
+                    {
+                        accumulateBlock<Table, F, true>(input, i, output, size, noop, ctx);
+                    }
 
-                // map the missing blocks at the start.
-                map.reset();
-                i = 0;
-                auto end = std::min<u64>(Table::max, size);
-                while (i < end)
-                {
-                    map.template applyChunk<F>(output, input + i, ctx);
-                    i += ChunkSize;
+                    // map the missing blocks at the start.
+                    map.reset();
+                    i = 0;
+                    while (i < size)
+                    {
+                        map.template applyChunk<F>(output, input + i, ctx);
+                        i += ChunkSize;
+                    }
                 }
             }
 
diff --git a/libOTe/TwoChooseOne/ConfigureCode.cpp b/libOTe/TwoChooseOne/ConfigureCode.cpp
index ca7ef15..a0b7400 100644
--- a/libOTe/TwoChooseOne/ConfigureCode.cpp
+++ b/libOTe/TwoChooseOne/ConfigureCode.cpp
@@ -73,9 +73,9 @@ namespace osuCrypto
 
     void ExConvConfigure(
         MultType mMultType,
+        u64& scaler,
         u64& expanderWeight,
         u64& accumulatorWeight,
-        u64& scaler,
         double& minDist)
     {
         scaler = 2;
diff --git a/libOTe/TwoChooseOne/Silent/SilentOtExtReceiver.cpp b/libOTe/TwoChooseOne/Silent/SilentOtExtReceiver.cpp
index a6fbbf7..986011c 100644
--- a/libOTe/TwoChooseOne/Silent/SilentOtExtReceiver.cpp
+++ b/libOTe/TwoChooseOne/Silent/SilentOtExtReceiver.cpp
@@ -385,7 +385,7 @@ namespace osuCrypto
             i = u64{}, j = u64{}, main = u64{}
         );
 
-        gTimer.setTimePoint("recver.ot.enter");
+        setTimePoint("recver.expand.enter");
 
         if (isConfigured() == false)
         {
@@ -403,7 +403,6 @@ namespace osuCrypto
         }
 
         setTimePoint("recver.expand.start");
-        gTimer.setTimePoint("recver.expand.start");
 
 
         mA.resize(mNoiseVecSize);
@@ -411,13 +410,16 @@ namespace osuCrypto
 
 
         MC_AWAIT(mGen.expand(chl, mA, PprfOutputFormat::Interleaved, true, mNumThreads));
-        setTimePoint("recver.expand.pprf_transpose");
-        gTimer.setTimePoint("recver.expand.pprf_transpose");
+        setTimePoint("recver.expand.pprf");
 
 
         if (mMalType == SilentSecType::Malicious)
+        {
+
             MC_AWAIT(ferretMalCheck(chl, prng));
+            setTimePoint("recver.expand.malCheck");
 
+        }
 
         if (mDebug)
         {
@@ -426,6 +428,7 @@ namespace osuCrypto
         }
 
         compress(type);
+        setTimePoint("recver.expand.dualEncode");
 
         mA.resize(mRequestNumOts);
 
@@ -565,15 +568,13 @@ namespace osuCrypto
             // not implemented.
             throw RTE_LOC;
         }
-        setTimePoint("recver.expand.ldpc.mCopyHash");
+        setTimePoint("recver.expand.CopyHash");
 
     }
 
     void SilentOtExtReceiver::compress(ChoiceBitPacking packing)// )
     {
 
-        setTimePoint("recver.expand.ldpc.mult");
-
         if (packing == ChoiceBitPacking::True)
         {
             // zero out the lsb of mA. We will store mC there.
@@ -600,7 +601,8 @@ namespace osuCrypto
             // set the lsb of mA to be mC.
             for (auto p : mS)
                 mA[p] = mA[p] | OneBlock;
-            setTimePoint("recver.expand.ldpc.mask");
+
+            setTimePoint("recver.expand.bitPacking");
 
             switch (mMultType)
             {
@@ -659,7 +661,7 @@ namespace osuCrypto
                 break;
             }
 
-            setTimePoint("recver.expand.ldpc.dualEncode");
+            setTimePoint("recver.expand.dualEncode");
 
         }
         else
@@ -742,7 +744,7 @@ namespace osuCrypto
                 break;
             }
             
-            setTimePoint("recver.expand.ldpc.dualEncode");
+            setTimePoint("recver.expand.dualEncode2");
         }
     }
 
diff --git a/libOTe/TwoChooseOne/Silent/SilentOtExtSender.cpp b/libOTe/TwoChooseOne/Silent/SilentOtExtSender.cpp
index df84c5e..4cceac7 100644
--- a/libOTe/TwoChooseOne/Silent/SilentOtExtSender.cpp
+++ b/libOTe/TwoChooseOne/Silent/SilentOtExtSender.cpp
@@ -340,7 +340,6 @@ namespace osuCrypto
             delta = AlignedUnVector<block>{}
         );
 
-        gTimer.setTimePoint("sender.ot.enter");
         setTimePoint("sender.expand.enter");
 
         if (isConfigured() == false)
@@ -357,7 +356,6 @@ namespace osuCrypto
         }
 
         setTimePoint("sender.expand.start");
-        gTimer.setTimePoint("sender.expand.start");
 
         mDelta = d;
 
@@ -369,18 +367,22 @@ namespace osuCrypto
 
         MC_AWAIT(mGen.expand(chl, delta, prng.get(), mB, PprfOutputFormat::Interleaved, true, mNumThreads));
 
+        setTimePoint("sender.expand.pprf");
 
         if (mMalType == SilentSecType::Malicious)
+        {
             MC_AWAIT(ferretMalCheck(chl, prng));
+            setTimePoint("sender.expand.malcheck");
+        }
 
-        setTimePoint("sender.expand.pprf_transpose");
-        gTimer.setTimePoint("sender.expand.pprf_transpose");
 
         if (mDebug)
             MC_AWAIT(checkRT(chl));
 
         compress();
 
+        setTimePoint("sender.expand.dualEncode");
+
         mB.resize(mRequestNumOts);
 
         MC_END();
@@ -468,9 +470,10 @@ namespace osuCrypto
         case osuCrypto::MultType::ExConv21x24:
         {
 
-            u64 expanderWeight = 0, accWeight = 0, _1;
-            double _2;
-            ExConvConfigure(mMultType, _1, expanderWeight, accWeight, _2);
+            u64 expanderWeight = 0, accWeight = 0, scaler = 0;
+            double minDist = 0;
+            ExConvConfigure(mMultType, scaler, expanderWeight, accWeight, minDist);
+            assert(scaler == 2 && minDist > 0 && minDist < 1);
 
             ExConvCode exConvEncoder;
             exConvEncoder.config(mRequestNumOts, mNoiseVecSize, expanderWeight, accWeight);
diff --git a/libOTe/Vole/Silent/SilentVoleReceiver.h b/libOTe/Vole/Silent/SilentVoleReceiver.h
index eafd389..99e5858 100644
--- a/libOTe/Vole/Silent/SilentVoleReceiver.h
+++ b/libOTe/Vole/Silent/SilentVoleReceiver.h
@@ -533,9 +533,10 @@ namespace osuCrypto
             case osuCrypto::MultType::ExConv21x24:
             {
                 u64 expanderWeight, accumulatorWeight, scaler;
-                double _;
-                ExConvConfigure(mMultType, scaler, expanderWeight, accumulatorWeight, _);
+                double minDist;
+                ExConvConfigure(mMultType, scaler, expanderWeight, accumulatorWeight, minDist);
                 ExConvCode encoder;
+                assert(scaler == 2 && minDist <1 && minDist > 0);
                 encoder.config(mRequestSize, mNoiseVecSize, expanderWeight, accumulatorWeight);
 
                 if (mTimer)
diff --git a/libOTe/Vole/Silent/SilentVoleSender.h b/libOTe/Vole/Silent/SilentVoleSender.h
index 2ad7ab5..d43499b 100644
--- a/libOTe/Vole/Silent/SilentVoleSender.h
+++ b/libOTe/Vole/Silent/SilentVoleSender.h
@@ -385,8 +385,10 @@ namespace osuCrypto
             {
                 ExConvCode encoder;
                 u64 expanderWeight, accumulatorWeight, scaler;
-                double _1;
-                ExConvConfigure(mMultType, scaler, expanderWeight, accumulatorWeight, _1);
+                double minDist;
+                ExConvConfigure(mMultType, scaler, expanderWeight, accumulatorWeight, minDist);
+                assert(scaler == 2 && minDist < 1 && minDist > 0);
+
                 encoder.config(mRequestSize, mNoiseVecSize, expanderWeight, accumulatorWeight);
                 if (mTimer)
                     encoder.setTimer(getTimer());
diff --git a/libOTe_Tests/TungstenCode_Tests.cpp b/libOTe_Tests/TungstenCode_Tests.cpp
index 245d0ed..d4dcd96 100644
--- a/libOTe_Tests/TungstenCode_Tests.cpp
+++ b/libOTe_Tests/TungstenCode_Tests.cpp
@@ -105,7 +105,7 @@ namespace tests_libOTe
                     //std::cout << "\n";
                     encoder.mPerm.reset();
                     std::vector<F> out2(out.size());
-                    encoder.accumulate<F, TungstenPerm<8>, Ctx, F*>(in3.data(), out2.data(), in.size(), encoder.mPerm, ctx);
+                    encoder.accumulate<F, TungstenPerm<TungstenCode::ChunkSize>, Ctx, F*>(in3.data(), out2.data(), in.size(), encoder.mPerm, ctx);
 
 
                     if (in3 != in)
@@ -177,7 +177,7 @@ namespace tests_libOTe
     void TungstenCode_encode_test(const oc::CLP& cmd)
     {
 
-        auto K = cmd.getManyOr<u64>("k", { 256, 3328, 152336 });
+        auto K = cmd.getManyOr<u64>("k", { 256, 3328, 15232 });
         auto R = cmd.getManyOr<double>("R", { 2.0 });
 
         for (auto k : K) for (auto r : R)