diff --git a/frontend/benchmark.h b/frontend/benchmark.h index c5b51e9..dcc00d9 100644 --- a/frontend/benchmark.h +++ b/frontend/benchmark.h @@ -14,6 +14,7 @@ #include "libOTe/Vole/Silent/SilentVoleReceiver.h" #include "libOTe/Tools/CoeffCtx.h" #include "libOTe/Tools/TungstenCode/TungstenCode.h" +#include "libOTe/Tools/ExConvCodeOld/ExConvCodeOld.h" namespace osuCrypto { @@ -157,7 +158,7 @@ namespace osuCrypto timer.setTimePoint("_____________________"); for (u64 i = 0; i < trials; ++i) { - if(gf128) + if (gf128) code.dualEncode(x.begin(), {}); else code.dualEncode(x.begin(), {}); @@ -172,6 +173,143 @@ namespace osuCrypto std::cout << verbose << std::endl; } + inline void ExConvCodeOldBench(CLP& cmd) + { +#ifdef LIBOTE_ENABLE_OLD_EXCONV + + u64 trials = cmd.getOr("t", 10); + + // the message length of the code. + // The noise vector will have size n=2*k. + // the user can use + // -k X + // to state that exactly X rows should be used or + // -kk X + // to state that 2^X rows should be used. + u64 k = cmd.getOr("k", 1ull << cmd.getOr("kk", 10)); + + u64 n = cmd.getOr("n", k * cmd.getOr("R", 2.0)); + + // the weight of the code + u64 w = cmd.getOr("w", 7); + + // size for the accumulator (# random transitions) + u64 a = cmd.getOr("a", roundUpTo(log2ceil(n), 8)); + + bool gf128 = cmd.isSet("gf128"); + + // verbose flag. + bool v = cmd.isSet("v"); + bool sys = cmd.isSet("sys"); + + ExConvCodeOld code; + code.config(k, n, w, a, sys); + + if (v) + { + std::cout << "n: " << code.mCodeSize << std::endl; + std::cout << "k: " << code.mMessageSize << std::endl; + //std::cout << "w: " << code.mExpanderWeight << std::endl; + } + + std::vector x(code.mCodeSize), y(code.mMessageSize * !sys); + Timer timer, verbose; + + if (v) + code.setTimer(verbose); + + timer.setTimePoint("_____________________"); + for (u64 i = 0; i < trials; ++i) + { + code.dualEncode(x); + + timer.setTimePoint("encode"); + } + + if (cmd.isSet("quiet") == false) + { + std::cout << "EC " << std::endl; + std::cout << timer << std::endl; + } + if (v) + std::cout << verbose << std::endl; +#else + std::cout << "LIBOTE_ENABLE_OLD_EXCONV = false" << std::endl; +#endif + } + + + inline void PprfBench(CLP& cmd) + { + +#ifdef ENABLE_SILENTOT + + try + { + using Ctx = CoeffCtxGF2; + RegularPprfReceiver recver; + RegularPprfSender sender; + + u64 trials = cmd.getOr("t", 10); + + u64 w = cmd.getOr("w", 32); + u64 n = cmd.getOr("n", 1ull << cmd.getOr("nn", 14)); + + PRNG prng0(ZeroBlock), prng1(ZeroBlock); + block delta = prng0.get(); + + auto sock = coproto::LocalAsyncSocket::makePair(); + + Timer rTimer; + auto s = rTimer.setTimePoint("start"); + auto ctx = Ctx{}; + auto vals = Ctx::Vec(w); + auto out0 = Ctx::Vec(n / w * w); + auto out1 = Ctx::Vec(n / w * w); + + + + for (u64 t = 0; t < trials; ++t) + { + sender.configure(n / w, w); + recver.configure(n / w, w); + + std::vector> baseSend(sender.baseOtCount()); + std::vector baseRecv(sender.baseOtCount()); + BitVector baseChoice(sender.baseOtCount()); + sender.setBase(baseSend); + recver.setBase(baseRecv); + recver.setChoiceBits(baseChoice); + + auto p0 = sender.expand(sock[0], vals, prng0.get(), out0, PprfOutputFormat::Interleaved, true, 1, ctx); + auto p1 = recver.expand(sock[1], out1, PprfOutputFormat::Interleaved, true, 1, ctx); + + rTimer.setTimePoint("r start"); + coproto::sync_wait(macoro::when_all_ready( + std::move(p0), std::move(p1))); + rTimer.setTimePoint("r done"); + + } + auto e = rTimer.setTimePoint("end"); + + auto time = std::chrono::duration_cast(e - s).count(); + auto avgTime = time / double(trials); + auto timePer512 = avgTime / n * 512; + std::cout << "OT n:" << n << ", " << + avgTime << "ms/batch, " << timePer512 << "ms/512ot" << std::endl; + + std::cout << rTimer << std::endl; + + std::cout << sock[0].bytesReceived() / trials << " " << sock[1].bytesReceived() / trials << " bytes per " << std::endl; + } + catch (std::exception& e) + { + std::cout << e.what() << std::endl; + } +#else + std::cout << "ENABLE_SILENTOT = false" << std::endl; +#endif + } inline void TungstenCodeBench(CLP& cmd) { @@ -201,21 +339,23 @@ namespace osuCrypto std::cout << "k: " << code.mMessageSize << std::endl; } - std::vector x(code.mCodeSize); + AlignedUnVector x(code.mCodeSize); Timer timer, verbose; - + timer.setTimePoint("_____________________"); for (u64 i = 0; i < trials; ++i) { - code.dualEncode(x.data(), {}); + code.dualEncode(x.data(), {}); timer.setTimePoint("encode"); } - std::cout << "tungsten " << std::endl; - std::cout << timer << std::endl; - + if (cmd.isSet("quiet") == false) + { + std::cout << "tungsten " << std::endl; + std::cout << timer << std::endl; + } if (v) std::cout << verbose << std::endl; } @@ -301,7 +441,7 @@ namespace osuCrypto u64 trials = cmd.getOr("t", 10); u64 n = cmd.getOr("n", 1ull << cmd.getOr("nn", 20)); - MultType multType = (MultType)cmd.getOr("m", (int)MultType::ExAcc7); + MultType multType = (MultType)cmd.getOr("m", (int)MultType::ExConv7x24); std::cout << multType << std::endl; recver.mMultType = multType; @@ -314,51 +454,47 @@ namespace osuCrypto Timer sTimer; Timer rTimer; + recver.setTimer(rTimer); + sender.setTimer(rTimer); sTimer.setTimePoint("start"); - rTimer.setTimePoint("start"); - - auto t0 = std::thread([&] { - for (u64 t = 0; t < trials; ++t) - { - auto p0 = sender.silentSendInplace(delta, n, prng0, sock[0]); - - char c; - - coproto::sync_wait(sock[0].send(std::move(c))); - coproto::sync_wait(sock[0].recv(c)); - sTimer.setTimePoint("__"); - coproto::sync_wait(sock[0].send(std::move(c))); - coproto::sync_wait(sock[0].recv(c)); - sTimer.setTimePoint("s start"); - coproto::sync_wait(p0); - sTimer.setTimePoint("s done"); - } - }); - + auto s = sTimer.setTimePoint("start"); for (u64 t = 0; t < trials; ++t) { - auto p1 = recver.silentReceiveInplace(n, prng1, sock[1]); - char c; - coproto::sync_wait(sock[1].send(std::move(c))); - coproto::sync_wait(sock[1].recv(c)); + sender.configure(n); + recver.configure(n); - rTimer.setTimePoint("__"); - coproto::sync_wait(sock[1].send(std::move(c))); - coproto::sync_wait(sock[1].recv(c)); + auto choice = recver.sampleBaseChoiceBits(prng0); + std::vector> sendBase(sender.silentBaseOtCount()); + std::vector recvBase(recver.silentBaseOtCount()); + sender.setSilentBaseOts(sendBase); + recver.setSilentBaseOts(recvBase); + + auto p0 = sender.silentSendInplace(delta, n, prng0, sock[0]); + auto p1 = recver.silentReceiveInplace(n, prng1, sock[1], ChoiceBitPacking::True); rTimer.setTimePoint("r start"); - coproto::sync_wait(p1); + coproto::sync_wait(macoro::when_all_ready( + std::move(p0), std::move(p1))); rTimer.setTimePoint("r done"); } + auto e = rTimer.setTimePoint("end"); + if (cmd.isSet("quiet") == false) + { - t0.join(); - std::cout << sTimer << std::endl; - std::cout << rTimer << std::endl; + auto time = std::chrono::duration_cast(e - s).count(); + auto avgTime = time / double(trials); + auto timePer512 = avgTime / n * 512; + std::cout << "OT n:" << n << ", " << + avgTime << "ms/batch, " << timePer512 << "ms/512ot" << std::endl; - std::cout << sock[0].bytesReceived() / trials << " " << sock[1].bytesReceived() / trials << " bytes per " << std::endl; + std::cout << sTimer << std::endl; + std::cout << rTimer << std::endl; + + std::cout << sock[0].bytesReceived() / trials << " " << sock[1].bytesReceived() / trials << " bytes per " << std::endl; + } } catch (std::exception& e) { diff --git a/frontend/main.cpp b/frontend/main.cpp index 339b0ad..6dba7b4 100644 --- a/frontend/main.cpp +++ b/frontend/main.cpp @@ -105,12 +105,16 @@ int main(int argc, char** argv) QCCodeBench(cmd); else if (cmd.isSet("silent")) SilentOtBench(cmd); + else if (cmd.isSet("pprf")) + PprfBench(cmd); else if (cmd.isSet("vole2")) VoleBench2(cmd); else if (cmd.isSet("ea")) EACodeBench(cmd); else if (cmd.isSet("ec")) ExConvCodeBench(cmd); + else if (cmd.isSet("ecold")) + ExConvCodeOldBench(cmd); else if (cmd.isSet("tungsten")) TungstenCodeBench(cmd); diff --git a/libOTe/Tools/CoeffCtx.h b/libOTe/Tools/CoeffCtx.h index ddf64ee..a070d12 100644 --- a/libOTe/Tools/CoeffCtx.h +++ b/libOTe/Tools/CoeffCtx.h @@ -151,7 +151,8 @@ namespace osuCrypto { static_assert(std::is_trivially_copyable::value, "memcpy is used so must be trivially_copyable."); static_assert(std::is_same_v, "src and destication types are not the same."); - std::copy(begin, end, dstBegin); + memcpy((F2* __restrict) & *dstBegin, (F1 * __restrict) &*begin, std::distance(begin, end) * sizeof(F1)); + //std::copy(begin, end, dstBegin); } // deserialize [begin,...,end) into [dstBegin, ...) diff --git a/libOTe/Tools/ExConvCodeOld/ExConvCodeInstantiations.cpp b/libOTe/Tools/ExConvCodeOld/ExConvCodeInstantiations.cpp new file mode 100644 index 0000000..80a69b8 --- /dev/null +++ b/libOTe/Tools/ExConvCodeOld/ExConvCodeInstantiations.cpp @@ -0,0 +1,20 @@ + +#define EXCONVCODE_INSTANTIATIONS +#include "ExConvCodeOld.cpp" +#ifdef LIBOTE_ENABLE_OLD_EXCONV + +namespace osuCrypto +{ + + template void ExConvCodeOld::dualEncode(span e); + template void ExConvCodeOld::dualEncode(span e); + template void ExConvCodeOld::dualEncode(span e, span w); + template void ExConvCodeOld::dualEncode(span e, span w); + template void ExConvCodeOld::dualEncode2(span, span e); + template void ExConvCodeOld::dualEncode2(span, span e); + + template void ExConvCodeOld::accumulate(span, span e); + template void ExConvCodeOld::accumulate(span, span e); +} + +#endif \ No newline at end of file diff --git a/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.cpp b/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.cpp new file mode 100644 index 0000000..ae07227 --- /dev/null +++ b/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.cpp @@ -0,0 +1,555 @@ +#include "ExConvCodeOld.h" + +#ifdef LIBOTE_ENABLE_OLD_EXCONV + +namespace osuCrypto +{ +#ifdef ENABLE_SSE + + using My__m128 = __m128; + +#else + using My__m128 = block; + + inline My__m128 _mm_load_ps(float* b) { return *(block*)b; } + + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps&ig_expand=557 + inline My__m128 _mm_blendv_ps(My__m128 a, My__m128 b, My__m128 mask) + { + My__m128 dst; + for (u64 j = 0; j < 4; ++j) + { + if (mask.get(j) < 0) + dst.set(j, b.get(j)); + else + dst.set(j, a.get(j)); + } + return dst; + } + + + inline My__m128 _mm_setzero_ps() { return ZeroBlock; } +#endif + + // Compute e = G * e. + template + void ExConvCodeOld::dualEncode(span e) + { + if (e.size() != mCodeSize) + throw RTE_LOC; + + if (mSystematic) + { + auto d = e.subspan(mMessageSize); + setTimePoint("ExConv.encode.begin"); + accumulate(d); + setTimePoint("ExConv.encode.accumulate"); + mExpander.expand(d, e.subspan(0, mMessageSize)); + setTimePoint("ExConv.encode.expand"); + } + else + { + oc::AlignedUnVector w(mMessageSize); + dualEncode(e, w); + memcpy(e.data(), w.data(), w.size() * sizeof(T)); + setTimePoint("ExConv.encode.memcpy"); + + } + } + + + // Compute e = G * e. + template + void ExConvCodeOld::dualEncode2(span e0, span e1) + { + if (e0.size() != mCodeSize) + throw RTE_LOC; + if (e1.size() != mCodeSize) + throw RTE_LOC; + + if (mSystematic) + { + auto d0 = e0.subspan(mMessageSize); + auto d1 = e1.subspan(mMessageSize); + setTimePoint("ExConv.encode.begin"); + accumulate(d0, d1); + setTimePoint("ExConv.encode.accumulate"); + mExpander.expand( + d0, d1, + e0.subspan(0, mMessageSize), + e1.subspan(0, mMessageSize)); + setTimePoint("ExConv.encode.expand"); + } + else + { + //oc::AlignedUnVector w0(mMessageSize); + //dualEncode(e, w); + //memcpy(e.data(), w.data(), w.size() * sizeof(T)); + //setTimePoint("ExConv.encode.memcpy"); + + // not impl. + throw RTE_LOC; + + } + } + + // Compute w = G * e. + template + void ExConvCodeOld::dualEncode(span e, span w) + { + if (e.size() != mCodeSize) + throw RTE_LOC; + + if (w.size() != mMessageSize) + throw RTE_LOC; + + if (mSystematic) + { + dualEncode(e); + memcpy(w.data(), e.data(), w.size() * sizeof(T)); + setTimePoint("ExConv.encode.memcpy"); + } + else + { + + setTimePoint("ExConv.encode.begin"); + + accumulate(e); + + setTimePoint("ExConv.encode.accumulate"); + + mExpander.expand(e, w); + setTimePoint("ExConv.encode.expand"); + } + } + + inline void refill(PRNG& prng) + { + assert(prng.mBuffer.size() == 256); + //block b[8]; + for (u64 i = 0; i < 256; i += 8) + { + //auto idx = mPrng.mBuffer[i].get(); + block* __restrict b = prng.mBuffer.data() + i; + block* __restrict k = prng.mBuffer.data() + (u8)(i - 8); + //for (u64 j = 0; j < 8; ++j) + //{ + // b = b ^ mPrng.mBuffer.data()[idx[j]]; + //} + b[0] = AES::roundEnc(b[0], k[0]); + b[1] = AES::roundEnc(b[1], k[1]); + b[2] = AES::roundEnc(b[2], k[2]); + b[3] = AES::roundEnc(b[3], k[3]); + b[4] = AES::roundEnc(b[4], k[4]); + b[5] = AES::roundEnc(b[5], k[5]); + b[6] = AES::roundEnc(b[6], k[6]); + b[7] = AES::roundEnc(b[7], k[7]); + + b[0] = b[0] ^ k[0]; + b[1] = b[1] ^ k[1]; + b[2] = b[2] ^ k[2]; + b[3] = b[3] ^ k[3]; + b[4] = b[4] ^ k[4]; + b[5] = b[5] ^ k[5]; + b[6] = b[6] ^ k[6]; + b[7] = b[7] ^ k[7]; + } + } + +#ifndef EXCONVCODE_INSTANTIATIONS + + void ExConvCodeOld::accOne( + PointList& pl, + u64 i, + u8* __restrict& ptr, + PRNG& prng, + block& rnd, + u64& q, + u64 qe, + u64 size) const + { + u64 j = i + 1; + pl.push_back(i, i); + + //if (mWrapping) + { + if (j < size) + pl.push_back(j, i); + ++j; + } + + if (q + mAccumulatorSize > qe) + { + refill(prng); + ptr = (u8*)prng.mBuffer.data(); + q = 0; + } + + + for (u64 k = 0; k < mAccumulatorSize; k += 8, q += 8, j += 8) + { + assert(ptr < (u8*)(prng.mBuffer.data() + prng.mBuffer.size())); + rnd = block::allSame(*ptr); + ++ptr; + + //std::cout << "r " << rnd << std::endl; + auto b0 = rnd; + auto b1 = rnd.slli_epi32<1>(); + auto b2 = rnd.slli_epi32<2>(); + auto b3 = rnd.slli_epi32<3>(); + auto b4 = rnd.slli_epi32<4>(); + auto b5 = rnd.slli_epi32<5>(); + auto b6 = rnd.slli_epi32<6>(); + auto b7 = rnd.slli_epi32<7>(); + //rnd = rnd.mm_slli_epi32<8>(); + + if (j + 0 < size && b0.get(0) < 0) pl.push_back(j + 0, i); + if (j + 1 < size && b1.get(0) < 0) pl.push_back(j + 1, i); + if (j + 2 < size && b2.get(0) < 0) pl.push_back(j + 2, i); + if (j + 3 < size && b3.get(0) < 0) pl.push_back(j + 3, i); + if (j + 4 < size && b4.get(0) < 0) pl.push_back(j + 4, i); + if (j + 5 < size && b5.get(0) < 0) pl.push_back(j + 5, i); + if (j + 6 < size && b6.get(0) < 0) pl.push_back(j + 6, i); + if (j + 7 < size && b7.get(0) < 0) pl.push_back(j + 7, i); + } + + + + } +#endif + + + template + OC_FORCEINLINE void accOneHelper( + T* __restrict xx, + My__m128 xii, + u64 j, u64 i, u64 size, + block* b + ) + { + My__m128 Zero = _mm_setzero_ps(); + + if constexpr (std::is_same::value) + { + My__m128 bb[8]; + bb[0] = _mm_load_ps((float*)&b[0]); + bb[1] = _mm_load_ps((float*)&b[1]); + bb[2] = _mm_load_ps((float*)&b[2]); + bb[3] = _mm_load_ps((float*)&b[3]); + bb[4] = _mm_load_ps((float*)&b[4]); + bb[5] = _mm_load_ps((float*)&b[5]); + bb[6] = _mm_load_ps((float*)&b[6]); + bb[7] = _mm_load_ps((float*)&b[7]); + + + bb[0] = _mm_blendv_ps(Zero, xii, bb[0]); + bb[1] = _mm_blendv_ps(Zero, xii, bb[1]); + bb[2] = _mm_blendv_ps(Zero, xii, bb[2]); + bb[3] = _mm_blendv_ps(Zero, xii, bb[3]); + bb[4] = _mm_blendv_ps(Zero, xii, bb[4]); + bb[5] = _mm_blendv_ps(Zero, xii, bb[5]); + bb[6] = _mm_blendv_ps(Zero, xii, bb[6]); + bb[7] = _mm_blendv_ps(Zero, xii, bb[7]); + + block tt[8]; + memcpy(tt, bb, 8 * 16); + + if (!rangeCheck || j + 0 < size) xx[j + 0] = xx[j + 0] ^ tt[0]; + if (!rangeCheck || j + 1 < size) xx[j + 1] = xx[j + 1] ^ tt[1]; + if (!rangeCheck || j + 2 < size) xx[j + 2] = xx[j + 2] ^ tt[2]; + if (!rangeCheck || j + 3 < size) xx[j + 3] = xx[j + 3] ^ tt[3]; + if (!rangeCheck || j + 4 < size) xx[j + 4] = xx[j + 4] ^ tt[4]; + if (!rangeCheck || j + 5 < size) xx[j + 5] = xx[j + 5] ^ tt[5]; + if (!rangeCheck || j + 6 < size) xx[j + 6] = xx[j + 6] ^ tt[6]; + if (!rangeCheck || j + 7 < size) xx[j + 7] = xx[j + 7] ^ tt[7]; + } + else + { + auto bb0 = xx[i] * (b[0].get(0) < 0); + auto bb1 = xx[i] * (b[1].get(0) < 0); + auto bb2 = xx[i] * (b[2].get(0) < 0); + auto bb3 = xx[i] * (b[3].get(0) < 0); + auto bb4 = xx[i] * (b[4].get(0) < 0); + auto bb5 = xx[i] * (b[5].get(0) < 0); + auto bb6 = xx[i] * (b[6].get(0) < 0); + auto bb7 = xx[i] * (b[7].get(0) < 0); + + if (!rangeCheck || j + 0 < size) xx[j + 0] = xx[j + 0] ^ bb0; + if (!rangeCheck || j + 1 < size) xx[j + 1] = xx[j + 1] ^ bb1; + if (!rangeCheck || j + 2 < size) xx[j + 2] = xx[j + 2] ^ bb2; + if (!rangeCheck || j + 3 < size) xx[j + 3] = xx[j + 3] ^ bb3; + if (!rangeCheck || j + 4 < size) xx[j + 4] = xx[j + 4] ^ bb4; + if (!rangeCheck || j + 5 < size) xx[j + 5] = xx[j + 5] ^ bb5; + if (!rangeCheck || j + 6 < size) xx[j + 6] = xx[j + 6] ^ bb6; + if (!rangeCheck || j + 7 < size) xx[j + 7] = xx[j + 7] ^ bb7; + } + } + + + template + OC_FORCEINLINE void ExConvCodeOld::accOne( + T* __restrict xx, + u64 i, + u8*& ptr, + PRNG& prng, + u64& q, + u64 qe, + u64 size) + { + u64 j = i + 1; + if (width) + { + auto xii = _mm_load_ps((float*)(xx + i)); + + if (!rangeCheck || j < size) + { + auto xj = xx[j] ^ xx[i]; + xx[j] = xj; + ++j; + } + + + if (q + width > qe) + { + refill(prng); + ptr = (u8*)prng.mBuffer.data(); + q = 0; + + } + q += width; + + for (u64 k = 0; k < width; ++k, j += 8) + { + assert(ptr < (u8*)(prng.mBuffer.data() + prng.mBuffer.size())); + block rnd = block::allSame(*(u8*)ptr++); + + + block b[8]; + b[0] = rnd; + b[1] = rnd.slli_epi32<1>(); + b[2] = rnd.slli_epi32<2>(); + b[3] = rnd.slli_epi32<3>(); + b[4] = rnd.slli_epi32<4>(); + b[5] = rnd.slli_epi32<5>(); + b[6] = rnd.slli_epi32<6>(); + b[7] = rnd.slli_epi32<7>(); + + accOneHelper(xx, xii, j, i, size, b); + } + } + + } + + template + OC_FORCEINLINE void ExConvCodeOld::accOne( + T0* __restrict xx0, + T1* __restrict xx1, + u64 i, + u8*& ptr, + PRNG& prng, + u64& q, + u64 qe, + u64 size) + { + u64 j = i + 1; + if (width) + { + auto xii0 = _mm_load_ps((float*)(xx0 + i)); + auto xii1 = _mm_load_ps((float*)(xx1 + i)); + if (!rangeCheck || j < size) + { + auto xj0 = xx0[j] ^ xx0[i]; + auto xj1 = xx1[j] ^ xx1[i]; + xx0[j] = xj0; + xx1[j] = xj1; + ++j; + } + + if (q + width > qe) + { + refill(prng); + ptr = (u8*)prng.mBuffer.data(); + q = 0; + + } + q += width; + + for (u64 k = 0; k < width; ++k, j += 8) + { + assert(ptr < (u8*)(prng.mBuffer.data() + prng.mBuffer.size())); + block rnd = block::allSame(*(u8*)ptr++); + + block b[8]; + b[0] = rnd; + b[1] = rnd.slli_epi32<1>(); + b[2] = rnd.slli_epi32<2>(); + b[3] = rnd.slli_epi32<3>(); + b[4] = rnd.slli_epi32<4>(); + b[5] = rnd.slli_epi32<5>(); + b[6] = rnd.slli_epi32<6>(); + b[7] = rnd.slli_epi32<7>(); + + accOneHelper(xx0, xii0, j, i, size, b); + accOneHelper(xx1, xii1, j, i, size, b); + } + } + + + } + + + + template + void ExConvCodeOld::accumulate(span x) + { + PRNG prng(mSeed ^ OneBlock); + + u64 i = 0; + auto size = x.size(); + auto main = (u64)std::max(0, size - 1 - mAccumulatorSize); + u8* ptr = (u8*)prng.mBuffer.data(); + auto qe = prng.mBuffer.size() * 128 / 8; + u64 q = 0; + T* __restrict xx = x.data(); + + { + +#define CASE(I) case I:\ + for (; i < main; ++i)\ + accOne(xx, i, ptr, prng, q, qe, size);\ + for (; i < size; ++i)\ + accOne(xx, i, ptr, prng, q, qe, size);\ + break + + switch (mAccumulatorSize / 8) + { + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + default: + throw RTE_LOC; + break; + } +#undef CASE + } + } + + + template + void ExConvCodeOld::accumulate(span x0, span x1) + { + PRNG prng(mSeed ^ OneBlock); + + u64 i = 0; + auto size = x0.size(); + auto main = (u64)std::max(0, size - 1 - mAccumulatorSize); + u8* ptr = (u8*)prng.mBuffer.data(); + auto qe = prng.mBuffer.size() * 128 / 8; + u64 q = 0; + T0* __restrict xx0 = x0.data(); + T1* __restrict xx1 = x1.data(); + + { + +#define CASE(I) case I:\ + for (; i < main; ++i)\ + accOne(xx0,xx1, i, ptr, prng, q, qe, size);\ + for (; i < size; ++i)\ + accOne(xx0, xx1, i, ptr, prng, q, qe, size);\ + break + + switch (mAccumulatorSize / 8) + { + CASE(0); + CASE(1); + CASE(2); + CASE(3); + CASE(4); + default: + throw RTE_LOC; + break; + } +#undef CASE + } + } + + +#ifndef EXCONVCODE_INSTANTIATIONS + + SparseMtx ExConvCodeOld::getB() const + { + if (mSystematic) + { + PointList R(mMessageSize, mCodeSize); + auto B = mExpander.getB().points(); + + for (auto p : B) + { + R.push_back(p.mRow, mMessageSize + p.mCol); + } + for (u64 i = 0; i < mMessageSize; ++i) + R.push_back(i, i); + + return R; + } + else + { + return mExpander.getB(); + } + + } + + // Get the parity check version of the accumulator + SparseMtx ExConvCodeOld::getAPar() const + { + PRNG prng(mSeed ^ OneBlock); + + auto n = mCodeSize - mSystematic * mMessageSize; + + PointList AP(n, n);; + DenseMtx A = DenseMtx::Identity(n); + + block rnd; + u8* __restrict ptr = (u8*)prng.mBuffer.data(); + auto qe = prng.mBuffer.size() * 128; + u64 q = 0; + + for (u64 i = 0; i < n; ++i) + { + accOne(AP, i, ptr, prng, rnd, q, qe, n); + } + return AP; + } + + SparseMtx ExConvCodeOld::getA() const + { + auto APar = getAPar(); + + auto A = DenseMtx::Identity(mCodeSize); + + u64 offset = mSystematic ? mMessageSize : 0ull; + + for (u64 i = 0; i < APar.rows(); ++i) + { + for (auto y : APar.col(i)) + { + //std::cout << y << " "; + if (y != i) + { + auto ay = A.row(y + offset); + auto ai = A.row(i + offset); + ay ^= ai; + } + } + + //std::cout << "\n" << A << std::endl; + } + + return A.sparse(); + } +#endif +} + +#endif \ No newline at end of file diff --git a/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.h b/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.h new file mode 100644 index 0000000..ae31ba8 --- /dev/null +++ b/libOTe/Tools/ExConvCodeOld/ExConvCodeOld.h @@ -0,0 +1,161 @@ +// © 2023 Visa. +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +#pragma once + +#include "cryptoTools/Common/Defines.h" +#include "cryptoTools/Common/Timer.h" +#include "ExpanderOld.h" +#include "libOTe/Tools/EACode/Util.h" +#ifdef LIBOTE_ENABLE_OLD_EXCONV + +namespace osuCrypto +{ + + // The encoder for the generator matrix G = B * A. dualEncode(...) is the main function + // config(...) should be called first. + // + // B is the expander while A is the convolution. + // + // B has mMessageSize rows and mCodeSize columns. It is sampled uniformly + // with fixed row weight mExpanderWeight. + // + // A is a lower triangular n by n matrix with ones on the diagonal. The + // mAccumulatorSize diagonals left of the main diagonal are uniformly random. + // If mStickyAccumulator, then the first diagonal left of the main is always ones. + // + // See ExConvCodeInstantiations.cpp for how to instantiate new types that + // dualEncode can be called on. + // + // https://eprint.iacr.org/2023/882 + class ExConvCodeOld : public TimerAdapter + { + public: + ExpanderCodeOld mExpander; + + // configure the code. The default parameters are choses to balance security and performance. + // For additional parameter choices see the paper. + void config( + u64 messageSize, + u64 codeSize = 0 /*2 * messageSize is default */, + u64 expanderWeight = 7, + u64 accumulatorSize = 16, + bool systematic = true, + block seed = block(99999, 88888)) + { + if (codeSize == 0) + codeSize = 2 * messageSize; + + if (accumulatorSize % 8) + throw std::runtime_error("ExConvCode accumulator size must be a multiple of 8." LOCATION); + + mSeed = seed; + mMessageSize = messageSize; + mCodeSize = codeSize; + mAccumulatorSize = accumulatorSize; + mSystematic = systematic; + mExpander.config(messageSize, codeSize - messageSize * systematic, expanderWeight, seed ^ CCBlock); + } + + // the seed that generates the code. + block mSeed = ZeroBlock; + + // The message size of the code. K. + u64 mMessageSize = 0; + + // The codeword size of the code. n. + u64 mCodeSize = 0; + + // The size of the accumulator. + u64 mAccumulatorSize = 0; + + // is the code systematic (true=faster) + bool mSystematic = true; + + // return n-k. code size n, message size k. + u64 parityRows() const { return mCodeSize - mMessageSize; } + + // return code size n. + u64 parityCols() const { return mCodeSize; } + + // return message size k. + u64 generatorRows() const { return mMessageSize; } + + // return code size n. + u64 generatorCols() const { return mCodeSize; } + + // Compute w = G * e. e will be modified in the computation. + template + void dualEncode(span e, span w); + + // Compute e[0,...,k-1] = G * e. + template + void dualEncode(span e); + + + // Compute e[0,...,k-1] = G * e. + template + void dualEncode2(span e0, span e1); + + // get the expander matrix + SparseMtx getB() const; + + // Get the parity check version of the accumulator + SparseMtx getAPar() const; + + // get the accumulator matrix + SparseMtx getA() const; + + // Private functions ------------------------------------ + + // generate the point list for accumulating row i. + void accOne( + PointList& pl, + u64 i, + u8* __restrict& ptr, + PRNG& prng, + block& rnd, + u64& q, + u64 qe, + u64 size) const; + + // accumulating row i. + template + void accOne( + T* __restrict xx, + u64 i, + u8*& ptr, + PRNG& prng, + u64& q, + u64 qe, + u64 size); + + + // accumulating row i. + template + void accOne( + T0* __restrict xx0, + T1* __restrict xx1, + u64 i, + u8*& ptr, + PRNG& prng, + u64& q, + u64 qe, + u64 size); + + + // accumulate x onto itself. + template + void accumulate(span x); + + + // accumulate x onto itself. + template + void accumulate(span x0, span x1); + }; +} + +#endif \ No newline at end of file diff --git a/libOTe/Tools/ExConvCodeOld/ExpanderOld.h b/libOTe/Tools/ExConvCodeOld/ExpanderOld.h new file mode 100644 index 0000000..a9717c3 --- /dev/null +++ b/libOTe/Tools/ExConvCodeOld/ExpanderOld.h @@ -0,0 +1,527 @@ +// © 2023 Peter Rindal. +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +#pragma once + +#include "cryptoTools/Common/Defines.h" +#include "libOTe/Tools/LDPC/Mtx.h" +#include "libOTe/Tools/EACode/Util.h" +#include "cryptoTools/Common/Range.h" +#ifdef LIBOTE_ENABLE_OLD_EXCONV + + +namespace osuCrypto +{ + + // The encoder for the expander matrix B. + // B has mMessageSize rows and mCodeSize columns. It is sampled uniformly + // with fixed row weight mExpanderWeight. + class ExpanderCodeOld + { + public: + + void config( + u64 messageSize, + u64 codeSize = 0 /* default is 5* messageSize */, + u64 expanderWeight = 21, + block seed = block(33333, 33333)) + { + mMessageSize = messageSize; + mCodeSize = codeSize; + mExpanderWeight = expanderWeight; + mSeed = seed; + + } + + // the seed that generates the code. + block mSeed = block(0, 0); + + // The message size of the code. K. + u64 mMessageSize = 0; + + // The codeword size of the code. n. + u64 mCodeSize = 0; + + // The row weight of the B matrix. + u64 mExpanderWeight = 0; + + u64 parityRows() const { return mCodeSize - mMessageSize; } + u64 parityCols() const { return mCodeSize; } + + u64 generatorRows() const { return mMessageSize; } + u64 generatorCols() const { return mCodeSize; } + + + + template + typename std::enable_if<(count > 1), T>::type + expandOne(const T* __restrict ee, detail::ExpanderModd& prng)const; + + template + typename std::enable_if<(count > 1)>::type + expandOne( + const T* __restrict ee1, + const T2* __restrict ee2, + T* __restrict y1, + T2* __restrict y2, + detail::ExpanderModd& prng)const; + + template + typename std::enable_if::type + expandOne(const T* __restrict ee, detail::ExpanderModd& prng) const; + + template + typename std::enable_if::type + expandOne( + const T* __restrict ee1, + const T2* __restrict ee2, + T* __restrict y1, + T2* __restrict y2, + detail::ExpanderModd& prng) const; + + template + void expand( + span e, + span w) const; + + template + void expand( + span e1, + span e2, + span w1, + span w2 + ) const; + + SparseMtx getB() const; + + }; + + + template + typename std::enable_if::type + ExpanderCodeOld::expandOne(const T* __restrict ee, detail::ExpanderModd& prng) const + { + auto r = prng.get(); + return ee[r]; + } + + template + typename std::enable_if::type + ExpanderCodeOld::expandOne( + const T* __restrict ee1, + const T2* __restrict ee2, + T* __restrict y1, + T2* __restrict y2, + detail::ExpanderModd& prng) const + { + auto r = prng.get(); + + if (Add) + { + *y1 = *y1 ^ ee1[r]; + *y2 = *y2 ^ ee2[r]; + } + else + { + + *y1 = ee1[r]; + *y2 = ee2[r]; + } + } + + + template + OC_FORCEINLINE typename std::enable_if<(count > 1), T>::type + ExpanderCodeOld::expandOne(const T* __restrict ee, detail::ExpanderModd& prng)const + { + if constexpr (count >= 8) + { + u64 rr[8]; + T w[8]; + rr[0] = prng.get(); + rr[1] = prng.get(); + rr[2] = prng.get(); + rr[3] = prng.get(); + rr[4] = prng.get(); + rr[5] = prng.get(); + rr[6] = prng.get(); + rr[7] = prng.get(); + + w[0] = ee[rr[0]]; + w[1] = ee[rr[1]]; + w[2] = ee[rr[2]]; + w[3] = ee[rr[3]]; + w[4] = ee[rr[4]]; + w[5] = ee[rr[5]]; + w[6] = ee[rr[6]]; + w[7] = ee[rr[7]]; + + auto ww = + w[0] ^ + w[1] ^ + w[2] ^ + w[3] ^ + w[4] ^ + w[5] ^ + w[6] ^ + w[7]; + + if constexpr (count > 8) + ww = ww ^ expandOne(ee, prng); + return ww; + } + else + { + + auto r = prng.get(); + auto ww = expandOne(ee, prng); + return ww ^ ee[r]; + } + } + + + template + OC_FORCEINLINE typename std::enable_if<(count > 1)>::type + ExpanderCodeOld::expandOne( + const T* __restrict ee1, + const T2* __restrict ee2, + T* __restrict y1, + T2* __restrict y2, + detail::ExpanderModd& prng)const + { + if constexpr (count >= 8) + { + u64 rr[8]; + T w1[8]; + T2 w2[8]; + rr[0] = prng.get(); + rr[1] = prng.get(); + rr[2] = prng.get(); + rr[3] = prng.get(); + rr[4] = prng.get(); + rr[5] = prng.get(); + rr[6] = prng.get(); + rr[7] = prng.get(); + + w1[0] = ee1[rr[0]]; + w1[1] = ee1[rr[1]]; + w1[2] = ee1[rr[2]]; + w1[3] = ee1[rr[3]]; + w1[4] = ee1[rr[4]]; + w1[5] = ee1[rr[5]]; + w1[6] = ee1[rr[6]]; + w1[7] = ee1[rr[7]]; + + w2[0] = ee2[rr[0]]; + w2[1] = ee2[rr[1]]; + w2[2] = ee2[rr[2]]; + w2[3] = ee2[rr[3]]; + w2[4] = ee2[rr[4]]; + w2[5] = ee2[rr[5]]; + w2[6] = ee2[rr[6]]; + w2[7] = ee2[rr[7]]; + + auto ww1 = + w1[0] ^ + w1[1] ^ + w1[2] ^ + w1[3] ^ + w1[4] ^ + w1[5] ^ + w1[6] ^ + w1[7]; + auto ww2 = + w2[0] ^ + w2[1] ^ + w2[2] ^ + w2[3] ^ + w2[4] ^ + w2[5] ^ + w2[6] ^ + w2[7]; + + if constexpr (count > 8) + { + T yy1; + T2 yy2; + expandOne(ee1, ee2, &yy1, &yy2, prng); + ww1 = ww1 ^ yy1; + ww2 = ww2 ^ yy2; + } + + if constexpr (Add) + { + *y1 = *y1 ^ ww1; + *y2 = *y2 ^ ww2; + } + else + { + *y1 = ww1; + *y2 = ww2; + } + + } + else + { + + auto r = prng.get(); + if constexpr (Add) + { + auto w1 = ee1[r]; + auto w2 = ee2[r]; + expandOne(ee1, ee2, y1, y2, prng); + *y1 = *y1 ^ w1; + *y2 = *y2 ^ w2; + + } + else + { + + T yy1; + T2 yy2; + expandOne(ee1, ee2, &yy1, &yy2, prng); + *y1 = ee1[r] ^ yy1; + *y2 = ee2[r] ^ yy2; + } + } + } + + + + template + void ExpanderCodeOld::expand( + span e, + span w) const + { + assert(w.size() == mMessageSize); + assert(e.size() == mCodeSize); + detail::ExpanderModd prng(mSeed, mCodeSize); + + const T* __restrict ee = e.data(); + T* __restrict ww = w.data(); + + auto main = mMessageSize / 8 * 8; + u64 i = 0; + + for (; i < main; i += 8) + { +#define CASE(I) \ + case I:\ + if constexpr(Add)\ + {\ + ww[i + 0] = ww[i + 0] ^ expandOne(ee, prng);\ + ww[i + 1] = ww[i + 1] ^ expandOne(ee, prng);\ + ww[i + 2] = ww[i + 2] ^ expandOne(ee, prng);\ + ww[i + 3] = ww[i + 3] ^ expandOne(ee, prng);\ + ww[i + 4] = ww[i + 4] ^ expandOne(ee, prng);\ + ww[i + 5] = ww[i + 5] ^ expandOne(ee, prng);\ + ww[i + 6] = ww[i + 6] ^ expandOne(ee, prng);\ + ww[i + 7] = ww[i + 7] ^ expandOne(ee, prng);\ + }\ + else\ + {\ + ww[i + 0] = expandOne(ee, prng);\ + ww[i + 1] = expandOne(ee, prng);\ + ww[i + 2] = expandOne(ee, prng);\ + ww[i + 3] = expandOne(ee, prng);\ + ww[i + 4] = expandOne(ee, prng);\ + ww[i + 5] = expandOne(ee, prng);\ + ww[i + 6] = expandOne(ee, prng);\ + ww[i + 7] = expandOne(ee, prng);\ + }\ + break + + switch (mExpanderWeight) + { + CASE(5); + CASE(7); + CASE(9); + CASE(11); + CASE(21); + CASE(40); + default: + for (u64 jj = 0; jj < 8; ++jj) + { + auto r = prng.get(); + auto wv = ee[r]; + + for (auto j = 1ull; j < mExpanderWeight; ++j) + { + r = prng.get(); + wv = wv ^ ee[r]; + } + if constexpr (Add) + ww[i + jj] = ww[i + jj] ^ wv; + else + ww[i + jj] = wv; + + } + } +#undef CASE + } + + for (; i < mMessageSize; ++i) + { + auto wv = ee[prng.get()]; + for (auto j = 1ull; j < mExpanderWeight; ++j) + wv = wv ^ ee[prng.get()]; + + if constexpr (Add) + ww[i] = ww[i] ^ wv; + else + ww[i] = wv; + } + } + + + + template + void ExpanderCodeOld::expand( + span e1, + span e2, + span w1, + span w2 + ) const + { + assert(w1.size() == mMessageSize); + assert(w2.size() == mMessageSize); + assert(e1.size() == mCodeSize); + assert(e2.size() == mCodeSize); + detail::ExpanderModd prng(mSeed, mCodeSize); + + const T* __restrict ee1 = e1.data(); + const T2* __restrict ee2 = e2.data(); + T* __restrict ww1 = w1.data(); + T2* __restrict ww2 = w2.data(); + + auto main = mMessageSize / 8 * 8; + u64 i = 0; + + for (; i < main; i += 8) + { +#define CASE(I) \ + case I:\ + expandOne(ee1, ee2, &ww1[i + 0], &ww2[i + 0], prng);\ + expandOne(ee1, ee2, &ww1[i + 1], &ww2[i + 1], prng);\ + expandOne(ee1, ee2, &ww1[i + 2], &ww2[i + 2], prng);\ + expandOne(ee1, ee2, &ww1[i + 3], &ww2[i + 3], prng);\ + expandOne(ee1, ee2, &ww1[i + 4], &ww2[i + 4], prng);\ + expandOne(ee1, ee2, &ww1[i + 5], &ww2[i + 5], prng);\ + expandOne(ee1, ee2, &ww1[i + 6], &ww2[i + 6], prng);\ + expandOne(ee1, ee2, &ww1[i + 7], &ww2[i + 7], prng);\ + break + + switch (mExpanderWeight) + { + CASE(5); + CASE(7); + CASE(9); + CASE(11); + CASE(21); + CASE(40); + default: + for (u64 jj = 0; jj < 8; ++jj) + { + auto r = prng.get(); + auto wv1 = ee1[r]; + auto wv2 = ee2[r]; + + for (auto j = 1ull; j < mExpanderWeight; ++j) + { + r = prng.get(); + wv1 = wv1 ^ ee1[r]; + wv2 = wv2 ^ ee2[r]; + } + if constexpr (Add) + { + ww1[i + jj] = ww1[i + jj] ^ wv1; + ww2[i + jj] = ww2[i + jj] ^ wv2; + } + else + { + + ww1[i + jj] = wv1; + ww2[i + jj] = wv2; + } + } + } +#undef CASE + } + + for (; i < mMessageSize; ++i) + { + auto r = prng.get(); + auto wv1 = ee1[r]; + auto wv2 = ee2[r]; + for (auto j = 1ull; j < mExpanderWeight; ++j) + { + r = prng.get(); + wv1 = wv1 ^ ee1[r]; + wv2 = wv2 ^ ee2[r]; + + } + if constexpr (Add) + { + ww1[i] = ww1[i] ^ wv1; + ww2[i] = ww2[i] ^ wv2; + } + else + { + ww1[i] = wv1; + ww2[i] = wv2; + } + } + } + + inline SparseMtx ExpanderCodeOld::getB() const + { + //PRNG prng(mSeed); + detail::ExpanderModd prng(mSeed, mCodeSize); + PointList points(mMessageSize, mCodeSize); + + std::vector row(mExpanderWeight); + + { + + for (auto i : rng(mMessageSize)) + { + row[0] = prng.get(); + //points.push_back(i, row[0]); + for (auto j : rng(1, mExpanderWeight)) + { + //do { + row[j] = prng.get(); + //} while + auto iter = std::find(row.data(), row.data() + j, row[j]); + if (iter != row.data() + j) + { + row[j] = ~0ull; + *iter = ~0ull; + } + //throw RTE_LOC; + + } + for (auto j : rng(mExpanderWeight)) + { + + if (row[j] != ~0ull) + { + //std::cout << row[j] << " "; + points.push_back(i, row[j]); + } + else + { + //std::cout << "* "; + } + } + //std::cout << std::endl; + } + } + + return points; + } +} +#endif // LIBOTE_ENABLE_OLD_EXCONV diff --git a/libOTe/Tools/Pprf/RegularPprf.h b/libOTe/Tools/Pprf/RegularPprf.h index 439150a..9a6a1e0 100644 --- a/libOTe/Tools/Pprf/RegularPprf.h +++ b/libOTe/Tools/Pprf/RegularPprf.h @@ -319,15 +319,17 @@ namespace osuCrypto ctx.zero(leafSums[0].begin(), leafSums[0].end()); ctx.zero(leafSums[1].begin(), leafSums[1].end()); + auto outIter = leafLevel.begin() + leafOffset; + // for the leaf nodes we need to hash both children. - for (u64 parentIdx = 0, outIdx = leafOffset, childIdx = 0; parentIdx < width; ++parentIdx) + for (u64 parentIdx = 0, childIdx = 0; parentIdx < width; ++parentIdx) { // The value of the parent. auto& parent = level0.data()[parentIdx]; // The bit that indicates if we are on the left child (0) // or on the right child (1). - for (u64 keep = 0; keep < 2; ++keep, ++childIdx, outIdx += 8) + for (u64 keep = 0; keep < 2; ++keep, ++childIdx) { // The child that we will write in this iteration. @@ -337,27 +339,30 @@ namespace osuCrypto // H(x) = (AES(k0, x) + x) || (AES(k1, x) + x); // // where each half defines one of the children. - gGgmAes[keep].hashBlocks<8>(parent.data(), child.data()); + gGgmAes.data()[keep].hashBlocks<8>(parent.data(), child.data()); - ctx.fromBlock(leafLevel[outIdx + 0], child[0]); - ctx.fromBlock(leafLevel[outIdx + 1], child[1]); - ctx.fromBlock(leafLevel[outIdx + 2], child[2]); - ctx.fromBlock(leafLevel[outIdx + 3], child[3]); - ctx.fromBlock(leafLevel[outIdx + 4], child[4]); - ctx.fromBlock(leafLevel[outIdx + 5], child[5]); - ctx.fromBlock(leafLevel[outIdx + 6], child[6]); - ctx.fromBlock(leafLevel[outIdx + 7], child[7]); + ctx.fromBlock(*(outIter + 0), child.data()[0]); + ctx.fromBlock(*(outIter + 1), child.data()[1]); + ctx.fromBlock(*(outIter + 2), child.data()[2]); + ctx.fromBlock(*(outIter + 3), child.data()[3]); + ctx.fromBlock(*(outIter + 4), child.data()[4]); + ctx.fromBlock(*(outIter + 5), child.data()[5]); + ctx.fromBlock(*(outIter + 6), child.data()[6]); + ctx.fromBlock(*(outIter + 7), child.data()[7]); // leafSum += child auto& leafSum = leafSums[keep]; - ctx.plus(leafSum[0], leafSum[0], leafLevel[outIdx + 0]); - ctx.plus(leafSum[1], leafSum[1], leafLevel[outIdx + 1]); - ctx.plus(leafSum[2], leafSum[2], leafLevel[outIdx + 2]); - ctx.plus(leafSum[3], leafSum[3], leafLevel[outIdx + 3]); - ctx.plus(leafSum[4], leafSum[4], leafLevel[outIdx + 4]); - ctx.plus(leafSum[5], leafSum[5], leafLevel[outIdx + 5]); - ctx.plus(leafSum[6], leafSum[6], leafLevel[outIdx + 6]); - ctx.plus(leafSum[7], leafSum[7], leafLevel[outIdx + 7]); + ctx.plus(leafSum.data()[0], leafSum.data()[0], *(outIter + 0)); + ctx.plus(leafSum.data()[1], leafSum.data()[1], *(outIter + 1)); + ctx.plus(leafSum.data()[2], leafSum.data()[2], *(outIter + 2)); + ctx.plus(leafSum.data()[3], leafSum.data()[3], *(outIter + 3)); + ctx.plus(leafSum.data()[4], leafSum.data()[4], *(outIter + 4)); + ctx.plus(leafSum.data()[5], leafSum.data()[5], *(outIter + 5)); + ctx.plus(leafSum.data()[6], leafSum.data()[6], *(outIter + 6)); + ctx.plus(leafSum.data()[7], leafSum.data()[7], *(outIter + 7)); + + outIter+= 8; + assert(outIter <= leafLevel.end()); } } @@ -900,13 +905,14 @@ namespace osuCrypto ctx.copy(leafSums[k][i], leafSums[k][0]); } + auto outIter = leafLevel.begin() + outputOffset; // for leaf nodes both children should be hashed. - for (u64 parentIdx = 0, childIdx = 0, outputIdx = outputOffset; parentIdx < width; ++parentIdx) + for (u64 parentIdx = 0, childIdx = 0; parentIdx < width; ++parentIdx) { // The value of the parent. - auto parent = level0[parentIdx]; + auto parent = level0.data()[parentIdx]; - for (u64 keep = 0; keep < 2; ++keep, ++childIdx, outputIdx += 8) + for (u64 keep = 0; keep < 2; ++keep, ++childIdx) { // Each parent is expanded into the left and right children // using a different AES fixed-key. Therefore our OWF is: @@ -914,26 +920,29 @@ namespace osuCrypto // H(x) = (AES(k0, x) + x) || (AES(k1, x) + x); // // where each half defines one of the children. - gGgmAes[keep].hashBlocks<8>(parent.data(), child.data()); + gGgmAes.data()[keep].hashBlocks<8>(parent.data(), child.data()); - ctx.fromBlock(leafLevel[outputIdx + 0], child[0]); - ctx.fromBlock(leafLevel[outputIdx + 1], child[1]); - ctx.fromBlock(leafLevel[outputIdx + 2], child[2]); - ctx.fromBlock(leafLevel[outputIdx + 3], child[3]); - ctx.fromBlock(leafLevel[outputIdx + 4], child[4]); - ctx.fromBlock(leafLevel[outputIdx + 5], child[5]); - ctx.fromBlock(leafLevel[outputIdx + 6], child[6]); - ctx.fromBlock(leafLevel[outputIdx + 7], child[7]); + ctx.fromBlock(*(outIter + 0), child.data()[0]); + ctx.fromBlock(*(outIter + 1), child.data()[1]); + ctx.fromBlock(*(outIter + 2), child.data()[2]); + ctx.fromBlock(*(outIter + 3), child.data()[3]); + ctx.fromBlock(*(outIter + 4), child.data()[4]); + ctx.fromBlock(*(outIter + 5), child.data()[5]); + ctx.fromBlock(*(outIter + 6), child.data()[6]); + ctx.fromBlock(*(outIter + 7), child.data()[7]); auto& leafSum = leafSums[keep]; - ctx.plus(leafSum[0], leafSum[0], leafLevel[outputIdx + 0]); - ctx.plus(leafSum[1], leafSum[1], leafLevel[outputIdx + 1]); - ctx.plus(leafSum[2], leafSum[2], leafLevel[outputIdx + 2]); - ctx.plus(leafSum[3], leafSum[3], leafLevel[outputIdx + 3]); - ctx.plus(leafSum[4], leafSum[4], leafLevel[outputIdx + 4]); - ctx.plus(leafSum[5], leafSum[5], leafLevel[outputIdx + 5]); - ctx.plus(leafSum[6], leafSum[6], leafLevel[outputIdx + 6]); - ctx.plus(leafSum[7], leafSum[7], leafLevel[outputIdx + 7]); + ctx.plus(leafSum.data()[0], leafSum.data()[0], *(outIter + 0)); + ctx.plus(leafSum.data()[1], leafSum.data()[1], *(outIter + 1)); + ctx.plus(leafSum.data()[2], leafSum.data()[2], *(outIter + 2)); + ctx.plus(leafSum.data()[3], leafSum.data()[3], *(outIter + 3)); + ctx.plus(leafSum.data()[4], leafSum.data()[4], *(outIter + 4)); + ctx.plus(leafSum.data()[5], leafSum.data()[5], *(outIter + 5)); + ctx.plus(leafSum.data()[6], leafSum.data()[6], *(outIter + 6)); + ctx.plus(leafSum.data()[7], leafSum.data()[7], *(outIter + 7)); + + outIter += 8; + assert(outIter <= leafLevel.end()); } } } diff --git a/libOTe/Tools/TungstenCode/TungstenCode.h b/libOTe/Tools/TungstenCode/TungstenCode.h index 28d90b9..1e91d63 100644 --- a/libOTe/Tools/TungstenCode/TungstenCode.h +++ b/libOTe/Tools/TungstenCode/TungstenCode.h @@ -59,12 +59,19 @@ namespace osuCrypto { assert(mPermIter < mPerm.data() + mPerm.size()); auto dst = output + (*(u32 * __restrict)mPermIter * chunkSize); ++mPermIter; - + //if ((u64)output % std::hardware_destructive_interference_size != 0) + // throw std::runtime_error(LOCATION); + //if((u64)dst % std::hardware_destructive_interference_size != 0) + // throw std::runtime_error(LOCATION); + //if((u64)x % std::hardware_destructive_interference_size != 0) + // throw std::runtime_error(LOCATION); + //__assume((u64)x % std::hardware_destructive_interference_size == 0); + //__assume((u64)dst % std::hardware_destructive_interference_size == 0); ctx.copy(x, x + chunkSize, dst); } - void skip(u64 i) + void skip(u64 i) { assert(i % chunkSize == 0); mPermIter += i / chunkSize; @@ -121,6 +128,10 @@ namespace osuCrypto { struct TungstenCode { static const u64 ChunkSize = 8; + using Table = TableTungsten1024x4; + //static const u64 ChunkSize = 16; + //using Table = TableTungsten128x4; + TungstenPerm mPerm; u64 mMessageSize = 0; @@ -132,7 +143,7 @@ namespace osuCrypto { void config(u64 messageSize, u64 codeSize, block seed = block(452345234, 6756754363)) { if (messageSize % ChunkSize) - throw std::runtime_error("messageSize must be a multiple of ChunkSize. " LOCATION); + throw std::runtime_error("messageSize "+std::to_string(messageSize) + " must be a multiple of ChunkSize "+std::to_string(ChunkSize) + ". " LOCATION); if (codeSize % ChunkSize) throw std::runtime_error("codeSize must be a multiple of ChunkSize. " LOCATION); @@ -245,20 +256,34 @@ namespace osuCrypto { } else { - auto xi = x + i; - auto xs = xi + Table::max + 1; - auto x0 = xi + table[j].data()[0]; - auto x1 = xi + table[j].data()[1]; - auto x2 = xi + table[j].data()[2]; - auto x3 = xi + table[j].data()[3]; - - ctx.plus(*xs, *xs, *xi); - ctx.plus(*x0, *x0, *xi); - ctx.plus(*x1, *x1, *xi); - ctx.plus(*x2, *x2, *xi); - ctx.plus(*x3, *x3, *xi); - ctx.mulConst(*xs, *xs); + auto xiPtr = (x + i); + auto xsPtr = (xiPtr + Table::max + 1); + auto x0Ptr = (xiPtr + table[j].data()[0]); + auto x1Ptr = (xiPtr + table[j].data()[1]); + auto x2Ptr = (xiPtr + table[j].data()[2]); + auto x3Ptr = (xiPtr + table[j].data()[3]); + + auto xi = *xiPtr; + auto xs = *xsPtr; + auto x0 = *x0Ptr; + auto x1 = *x1Ptr; + auto x2 = *x2Ptr; + auto x3 = *x3Ptr; + + ctx.plus(xs, xs, xi); + ctx.plus(x0, x0, xi); + ctx.plus(x1, x1, xi); + ctx.plus(x2, x2, xi); + ctx.plus(x3, x3, xi); + ctx.mulConst(xs, xs); + + + ctx.copy(*xsPtr, xs); + ctx.copy(*x0Ptr, x0); + ctx.copy(*x1Ptr, x1); + ctx.copy(*x2Ptr, x2); + ctx.copy(*x3Ptr, x3); } } @@ -274,6 +299,93 @@ namespace osuCrypto { break; } } + // + // + // template< + // typename Table, + // typename F, + // bool rangeCheck, + // typename OutputMap, + // typename CoeffCtx, + // typename Iter + // > + // void accumulateBlockGather( + // Iter x, + // u64 i, + // Iter dst, + // u64 size, + // OutputMap& output, + // CoeffCtx& ctx) + // { + // + // //static constexpr int chunkSize = OutputMap::chunkSize; + // static_assert(Table::data.size() % ChunkSize == 0); + // auto table = Table::data.data(); + // + // for (u64 j = 0; j < Table::data.size();) + // { + //#ifdef ENABLE_SSE + // if (rangeCheck == false || i + Table::data.size() * 2 < size) + // _mm_prefetch((char*)(x + i + Table::data.size() * 2), _MM_HINT_T0); + //#endif + // + // for (u64 k = 0; k < ChunkSize; ++k, ++j, ++i) + // { + // + // if constexpr (Table::data[0].size() == 4) + // { + // if constexpr (rangeCheck) + // { + // if (i == size) + // return; + // + // auto xi = x + i; + // auto xs = x + ((i + Table::max + 1) % size); + // ctx.plus(*xs, *xs, *xi); + // ctx.mulConst(*xs, *xs); + // + // for (u64 p = 0; p < Table::data[0].size(); ++p) + // { + // auto idx = (i + table[j].data()[p]) % size; + // if (idx != i) + // { + // auto xi = x + i; + // auto xp = x + idx; + // ctx.plus(*xp, *xp, *xi); + // } + // } + // } + // else + // { + // auto xi = x + i; + // + // auto xs = xi + Table::max + 1; + // auto x0 = xi + table[j].data()[0]; + // auto x1 = xi + table[j].data()[1]; + // auto x2 = xi + table[j].data()[2]; + // auto x3 = xi + table[j].data()[3]; + // + // ctx.plus(*xs, *xs, *xi); + // ctx.plus(*x0, *x0, *xi); + // ctx.plus(*x1, *x1, *xi); + // ctx.plus(*x2, *x2, *xi); + // ctx.plus(*x3, *x3, *xi); + // ctx.mulConst(*xs, *xs); + // + // } + // } + // else + // { + // throw RTE_LOC; + // } + // } + // + // output.template applyChunk(dst, x + (i - ChunkSize), ctx); + // + // if (rangeCheck && i >= size) + // break; + // } + // } template(size / Table::data.size() - 1, 0) * Table::data.size(); - u64 i = 0; - map.reset(); + u64 main = std::max(size / Table::data.size() - 1, 0) * Table::data.size(); + u64 i = 0; + map.reset(); - // for the first iteration, the last accumulateBlock - // will wrap anmd change its value. We therefore can't - // yet map the output for this part. We do this at the end. - while (i <= Table::max) + // for the first iteration, the last accumulateBlock + // will wrap anmd change its value. We therefore can't + // yet map the output for this part. We do this at the end. + while (i <= Table::max) + { + TungstenNoop noop; + if (i < main) + accumulateBlock(input, i, output, size, noop, ctx); + else + accumulateBlock(input, i, output, size, noop, ctx); + i += Table::data.size(); + } + map.skip(i); + + // accumulate and map. no range check required. + for (; i < main; i += Table::data.size()) + { + accumulateBlock(input, i, output, size, map, ctx); + } + + // last iteration or two requires range checking. + for (; i < size; i += Table::data.size()) + { + accumulateBlock(input, i, output, size, map, ctx); + } + + // map the missing blocks at the start. + map.reset(); + i = 0; + auto end = std::min(Table::max, size); + while (i < end) + { + map.template applyChunk(output, input + i, ctx); + i += ChunkSize; + } + } + else { TungstenNoop noop; - if (i < main) - accumulateBlock(input, i, output, size, noop, ctx); - else - accumulateBlock(input, i, output, size, noop, ctx); - i += Table::data.size(); - } - map.skip(i); + u64 main = std::max(size / Table::data.size() - 1, 0) * Table::data.size(); + u64 i = 0; + map.reset(); + + // for the first iteration, the last accumulateBlock + // will wrap anmd change its value. We therefore can't + // yet map the output for this part. We do this at the end. + while (i <= Table::max) + { + if (i < main) + accumulateBlock(input, i, output, size, noop, ctx); + else + accumulateBlock(input, i, output, size, noop, ctx); + i += Table::data.size(); + } - // accumulate and map. no range check required. - for (; i < main; i += Table::data.size()) - { - accumulateBlock(input, i, output, size, map, ctx); - } + // accumulate and map. no range check required. + for (; i < main; i += Table::data.size()) + { + accumulateBlock(input, i, output, size, noop, ctx); + } - // last iteration or two requires range checking. - for (; i < size; i += Table::data.size()) - { - accumulateBlock(input, i, output, size, map, ctx); - } + // last iteration or two requires range checking. + for (; i < size; i += Table::data.size()) + { + accumulateBlock(input, i, output, size, noop, ctx); + } - // map the missing blocks at the start. - map.reset(); - i = 0; - auto end = std::min(Table::max, size); - while (i < end) - { - map.template applyChunk(output, input + i, ctx); - i += ChunkSize; + // map the missing blocks at the start. + map.reset(); + i = 0; + while (i < size) + { + map.template applyChunk(output, input + i, ctx); + i += ChunkSize; + } } } diff --git a/libOTe/TwoChooseOne/ConfigureCode.cpp b/libOTe/TwoChooseOne/ConfigureCode.cpp index ca7ef15..a0b7400 100644 --- a/libOTe/TwoChooseOne/ConfigureCode.cpp +++ b/libOTe/TwoChooseOne/ConfigureCode.cpp @@ -73,9 +73,9 @@ namespace osuCrypto void ExConvConfigure( MultType mMultType, + u64& scaler, u64& expanderWeight, u64& accumulatorWeight, - u64& scaler, double& minDist) { scaler = 2; diff --git a/libOTe/TwoChooseOne/Silent/SilentOtExtReceiver.cpp b/libOTe/TwoChooseOne/Silent/SilentOtExtReceiver.cpp index a6fbbf7..986011c 100644 --- a/libOTe/TwoChooseOne/Silent/SilentOtExtReceiver.cpp +++ b/libOTe/TwoChooseOne/Silent/SilentOtExtReceiver.cpp @@ -385,7 +385,7 @@ namespace osuCrypto i = u64{}, j = u64{}, main = u64{} ); - gTimer.setTimePoint("recver.ot.enter"); + setTimePoint("recver.expand.enter"); if (isConfigured() == false) { @@ -403,7 +403,6 @@ namespace osuCrypto } setTimePoint("recver.expand.start"); - gTimer.setTimePoint("recver.expand.start"); mA.resize(mNoiseVecSize); @@ -411,13 +410,16 @@ namespace osuCrypto MC_AWAIT(mGen.expand(chl, mA, PprfOutputFormat::Interleaved, true, mNumThreads)); - setTimePoint("recver.expand.pprf_transpose"); - gTimer.setTimePoint("recver.expand.pprf_transpose"); + setTimePoint("recver.expand.pprf"); if (mMalType == SilentSecType::Malicious) + { + MC_AWAIT(ferretMalCheck(chl, prng)); + setTimePoint("recver.expand.malCheck"); + } if (mDebug) { @@ -426,6 +428,7 @@ namespace osuCrypto } compress(type); + setTimePoint("recver.expand.dualEncode"); mA.resize(mRequestNumOts); @@ -565,15 +568,13 @@ namespace osuCrypto // not implemented. throw RTE_LOC; } - setTimePoint("recver.expand.ldpc.mCopyHash"); + setTimePoint("recver.expand.CopyHash"); } void SilentOtExtReceiver::compress(ChoiceBitPacking packing)// ) { - setTimePoint("recver.expand.ldpc.mult"); - if (packing == ChoiceBitPacking::True) { // zero out the lsb of mA. We will store mC there. @@ -600,7 +601,8 @@ namespace osuCrypto // set the lsb of mA to be mC. for (auto p : mS) mA[p] = mA[p] | OneBlock; - setTimePoint("recver.expand.ldpc.mask"); + + setTimePoint("recver.expand.bitPacking"); switch (mMultType) { @@ -659,7 +661,7 @@ namespace osuCrypto break; } - setTimePoint("recver.expand.ldpc.dualEncode"); + setTimePoint("recver.expand.dualEncode"); } else @@ -742,7 +744,7 @@ namespace osuCrypto break; } - setTimePoint("recver.expand.ldpc.dualEncode"); + setTimePoint("recver.expand.dualEncode2"); } } diff --git a/libOTe/TwoChooseOne/Silent/SilentOtExtSender.cpp b/libOTe/TwoChooseOne/Silent/SilentOtExtSender.cpp index df84c5e..4cceac7 100644 --- a/libOTe/TwoChooseOne/Silent/SilentOtExtSender.cpp +++ b/libOTe/TwoChooseOne/Silent/SilentOtExtSender.cpp @@ -340,7 +340,6 @@ namespace osuCrypto delta = AlignedUnVector{} ); - gTimer.setTimePoint("sender.ot.enter"); setTimePoint("sender.expand.enter"); if (isConfigured() == false) @@ -357,7 +356,6 @@ namespace osuCrypto } setTimePoint("sender.expand.start"); - gTimer.setTimePoint("sender.expand.start"); mDelta = d; @@ -369,18 +367,22 @@ namespace osuCrypto MC_AWAIT(mGen.expand(chl, delta, prng.get(), mB, PprfOutputFormat::Interleaved, true, mNumThreads)); + setTimePoint("sender.expand.pprf"); if (mMalType == SilentSecType::Malicious) + { MC_AWAIT(ferretMalCheck(chl, prng)); + setTimePoint("sender.expand.malcheck"); + } - setTimePoint("sender.expand.pprf_transpose"); - gTimer.setTimePoint("sender.expand.pprf_transpose"); if (mDebug) MC_AWAIT(checkRT(chl)); compress(); + setTimePoint("sender.expand.dualEncode"); + mB.resize(mRequestNumOts); MC_END(); @@ -468,9 +470,10 @@ namespace osuCrypto case osuCrypto::MultType::ExConv21x24: { - u64 expanderWeight = 0, accWeight = 0, _1; - double _2; - ExConvConfigure(mMultType, _1, expanderWeight, accWeight, _2); + u64 expanderWeight = 0, accWeight = 0, scaler = 0; + double minDist = 0; + ExConvConfigure(mMultType, scaler, expanderWeight, accWeight, minDist); + assert(scaler == 2 && minDist > 0 && minDist < 1); ExConvCode exConvEncoder; exConvEncoder.config(mRequestNumOts, mNoiseVecSize, expanderWeight, accWeight); diff --git a/libOTe/Vole/Silent/SilentVoleReceiver.h b/libOTe/Vole/Silent/SilentVoleReceiver.h index eafd389..99e5858 100644 --- a/libOTe/Vole/Silent/SilentVoleReceiver.h +++ b/libOTe/Vole/Silent/SilentVoleReceiver.h @@ -533,9 +533,10 @@ namespace osuCrypto case osuCrypto::MultType::ExConv21x24: { u64 expanderWeight, accumulatorWeight, scaler; - double _; - ExConvConfigure(mMultType, scaler, expanderWeight, accumulatorWeight, _); + double minDist; + ExConvConfigure(mMultType, scaler, expanderWeight, accumulatorWeight, minDist); ExConvCode encoder; + assert(scaler == 2 && minDist <1 && minDist > 0); encoder.config(mRequestSize, mNoiseVecSize, expanderWeight, accumulatorWeight); if (mTimer) diff --git a/libOTe/Vole/Silent/SilentVoleSender.h b/libOTe/Vole/Silent/SilentVoleSender.h index 2ad7ab5..d43499b 100644 --- a/libOTe/Vole/Silent/SilentVoleSender.h +++ b/libOTe/Vole/Silent/SilentVoleSender.h @@ -385,8 +385,10 @@ namespace osuCrypto { ExConvCode encoder; u64 expanderWeight, accumulatorWeight, scaler; - double _1; - ExConvConfigure(mMultType, scaler, expanderWeight, accumulatorWeight, _1); + double minDist; + ExConvConfigure(mMultType, scaler, expanderWeight, accumulatorWeight, minDist); + assert(scaler == 2 && minDist < 1 && minDist > 0); + encoder.config(mRequestSize, mNoiseVecSize, expanderWeight, accumulatorWeight); if (mTimer) encoder.setTimer(getTimer()); diff --git a/libOTe_Tests/TungstenCode_Tests.cpp b/libOTe_Tests/TungstenCode_Tests.cpp index 245d0ed..d4dcd96 100644 --- a/libOTe_Tests/TungstenCode_Tests.cpp +++ b/libOTe_Tests/TungstenCode_Tests.cpp @@ -105,7 +105,7 @@ namespace tests_libOTe //std::cout << "\n"; encoder.mPerm.reset(); std::vector out2(out.size()); - encoder.accumulate, Ctx, F*>(in3.data(), out2.data(), in.size(), encoder.mPerm, ctx); + encoder.accumulate, Ctx, F*>(in3.data(), out2.data(), in.size(), encoder.mPerm, ctx); if (in3 != in) @@ -177,7 +177,7 @@ namespace tests_libOTe void TungstenCode_encode_test(const oc::CLP& cmd) { - auto K = cmd.getManyOr("k", { 256, 3328, 152336 }); + auto K = cmd.getManyOr("k", { 256, 3328, 15232 }); auto R = cmd.getManyOr("R", { 2.0 }); for (auto k : K) for (auto r : R)