From 0c80033daf7252bae9906d379a0385b24078deb7 Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Mon, 16 Sep 2024 17:10:34 +0200 Subject: [PATCH 1/2] lowram implementation --- lowram/Makefile | 116 ++++ lowram/api.h | 98 +++ lowram/config.h | 27 + lowram/fips202.c | 1 + lowram/fips202.h | 1 + lowram/lowram.c | 1248 ++++++++++++++++++++++++++++++++++ lowram/lowram.h | 90 +++ lowram/ntt.c | 1 + lowram/ntt.h | 1 + lowram/packing.c | 1 + lowram/packing.h | 1 + lowram/params.h | 1 + lowram/poly.c | 1 + lowram/poly.h | 1 + lowram/polyvec.c | 1 + lowram/polyvec.h | 1 + lowram/randombytes.c | 1 + lowram/randombytes.h | 1 + lowram/reduce.c | 1 + lowram/reduce.h | 1 + lowram/rounding.c | 1 + lowram/rounding.h | 1 + lowram/sign.c | 517 ++++++++++++++ lowram/sign.h | 1 + lowram/smallntt.h | 36 + lowram/smallntt_3329.c | 180 +++++ lowram/smallpoly.c | 100 +++ lowram/smallpoly.h | 26 + lowram/symmetric-shake.c | 1 + lowram/symmetric.h | 1 + lowram/test/.gitignore | 1 + lowram/test/cpucycles.c | 1 + lowram/test/cpucycles.h | 1 + lowram/test/speed_print.c | 1 + lowram/test/speed_print.h | 1 + lowram/test/test_dilithium.c | 1 + lowram/test/test_mul.c | 1 + lowram/test/test_speed.c | 1 + lowram/test/test_vectors.c | 1 + 39 files changed, 2467 insertions(+) create mode 100644 lowram/Makefile create mode 100644 lowram/api.h create mode 100644 lowram/config.h create mode 120000 lowram/fips202.c create mode 120000 lowram/fips202.h create mode 100644 lowram/lowram.c create mode 100644 lowram/lowram.h create mode 120000 lowram/ntt.c create mode 120000 lowram/ntt.h create mode 120000 lowram/packing.c create mode 120000 lowram/packing.h create mode 120000 lowram/params.h create mode 120000 lowram/poly.c create mode 120000 lowram/poly.h create mode 120000 lowram/polyvec.c create mode 120000 lowram/polyvec.h create mode 120000 lowram/randombytes.c create mode 120000 lowram/randombytes.h create mode 120000 lowram/reduce.c create mode 120000 lowram/reduce.h create mode 120000 lowram/rounding.c create mode 120000 lowram/rounding.h create mode 100644 lowram/sign.c create mode 120000 lowram/sign.h create mode 100644 lowram/smallntt.h create mode 100644 lowram/smallntt_3329.c create mode 100644 lowram/smallpoly.c create mode 100644 lowram/smallpoly.h create mode 120000 lowram/symmetric-shake.c create mode 120000 lowram/symmetric.h create mode 120000 lowram/test/.gitignore create mode 120000 lowram/test/cpucycles.c create mode 120000 lowram/test/cpucycles.h create mode 120000 lowram/test/speed_print.c create mode 120000 lowram/test/speed_print.h create mode 120000 lowram/test/test_dilithium.c create mode 120000 lowram/test/test_mul.c create mode 120000 lowram/test/test_speed.c create mode 120000 lowram/test/test_vectors.c diff --git a/lowram/Makefile b/lowram/Makefile new file mode 100644 index 0000000..1c6bceb --- /dev/null +++ b/lowram/Makefile @@ -0,0 +1,116 @@ +CC ?= /usr/bin/cc +CFLAGS += -Wall -Wextra -Wpedantic -Wmissing-prototypes -Wredundant-decls \ + -Wshadow -Wvla -Wpointer-arith -O3 -fomit-frame-pointer +NISTFLAGS += -Wno-unused-result -O3 -fomit-frame-pointer +SOURCES = sign.c packing.c polyvec.c poly.c ntt.c reduce.c rounding.c lowram.c smallpoly.c smallntt_3329.c +HEADERS = config.h params.h api.h sign.h packing.h polyvec.h poly.h ntt.h \ + reduce.h rounding.h symmetric.h randombytes.h lowram.h smallpoly.h smallntt.h +KECCAK_SOURCES = $(SOURCES) fips202.c symmetric-shake.c +KECCAK_HEADERS = $(HEADERS) fips202.h + +.PHONY: all speed shared clean + +all: \ + test/test_dilithium2 \ + test/test_dilithium3 \ + test/test_dilithium5 \ + test/test_vectors2 \ + test/test_vectors3 \ + test/test_vectors5 + +speed: \ + test/test_mul \ + test/test_speed2 \ + test/test_speed3 \ + test/test_speed5 \ + +shared: \ + libpqcrystals_dilithium2_lowram.so \ + libpqcrystals_dilithium3_lowram.so \ + libpqcrystals_dilithium5_lowram.so \ + libpqcrystals_fips202_lowram.so \ + +libpqcrystals_fips202_lowram.so: fips202.c fips202.h + $(CC) -shared -fPIC $(CFLAGS) -o $@ $< + +libpqcrystals_dilithium2_lowram.so: $(SOURCES) $(HEADERS) symmetric-shake.c + $(CC) -shared -fPIC $(CFLAGS) -DDILITHIUM_MODE=2 \ + -o $@ $(SOURCES) symmetric-shake.c + +libpqcrystals_dilithium3_lowram.so: $(SOURCES) $(HEADERS) symmetric-shake.c + $(CC) -shared -fPIC $(CFLAGS) -DDILITHIUM_MODE=3 \ + -o $@ $(SOURCES) symmetric-shake.c + +libpqcrystals_dilithium5_lowram.so: $(SOURCES) $(HEADERS) symmetric-shake.c + $(CC) -shared -fPIC $(CFLAGS) -DDILITHIUM_MODE=5 \ + -o $@ $(SOURCES) symmetric-shake.c + +test/test_dilithium2: test/test_dilithium.c randombytes.c $(KECCAK_SOURCES) \ + $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -DDILITHIUM_MODE=2 \ + -o $@ $< randombytes.c $(KECCAK_SOURCES) + +test/test_dilithium3: test/test_dilithium.c randombytes.c $(KECCAK_SOURCES) \ + $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -DDILITHIUM_MODE=3 \ + -o $@ $< randombytes.c $(KECCAK_SOURCES) + +test/test_dilithium5: test/test_dilithium.c randombytes.c $(KECCAK_SOURCES) \ + $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -DDILITHIUM_MODE=5 \ + -o $@ $< randombytes.c $(KECCAK_SOURCES) + +test/test_vectors2: test/test_vectors.c $(KECCAK_SOURCES) \ + $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -DDILITHIUM_MODE=2 \ + -o $@ $< $(KECCAK_SOURCES) + +test/test_vectors3: test/test_vectors.c $(KECCAK_SOURCES) $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -DDILITHIUM_MODE=3 \ + -o $@ $< $(KECCAK_SOURCES) + +test/test_vectors5: test/test_vectors.c $(KECCAK_SOURCES) \ + $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -DDILITHIUM_MODE=5 \ + -o $@ $< $(KECCAK_SOURCES) + +test/test_speed2: test/test_speed.c test/speed_print.c test/speed_print.h \ + test/cpucycles.c test/cpucycles.h randombytes.c $(KECCAK_SOURCES) \ + $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -DDILITHIUM_MODE=2 \ + -o $@ $< test/speed_print.c test/cpucycles.c randombytes.c \ + $(KECCAK_SOURCES) + +test/test_speed3: test/test_speed.c test/speed_print.c test/speed_print.h \ + test/cpucycles.c test/cpucycles.h randombytes.c $(KECCAK_SOURCES) \ + $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -DDILITHIUM_MODE=3 \ + -o $@ $< test/speed_print.c test/cpucycles.c randombytes.c \ + $(KECCAK_SOURCES) + +test/test_speed5: test/test_speed.c test/speed_print.c test/speed_print.h \ + test/cpucycles.c test/cpucycles.h randombytes.c $(KECCAK_SOURCES) \ + $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -DDILITHIUM_MODE=5 \ + -o $@ $< test/speed_print.c test/cpucycles.c randombytes.c \ + $(KECCAK_SOURCES) + +test/test_mul: test/test_mul.c randombytes.c $(KECCAK_SOURCES) $(KECCAK_HEADERS) + $(CC) $(CFLAGS) -UDBENCH -o $@ $< randombytes.c $(KECCAK_SOURCES) + +clean: + rm -f *~ test/*~ *.gcno *.gcda *.lcov + rm -f libpqcrystals_dilithium2_lowram.so + rm -f libpqcrystals_dilithium3_lowram.so + rm -f libpqcrystals_dilithium5_lowram.so + rm -f libpqcrystals_fips202_lowram.so + rm -f test/test_dilithium2 + rm -f test/test_dilithium3 + rm -f test/test_dilithium5 + rm -f test/test_vectors2 + rm -f test/test_vectors3 + rm -f test/test_vectors5 + rm -f test/test_speed2 + rm -f test/test_speed3 + rm -f test/test_speed5 + rm -f test/test_mul diff --git a/lowram/api.h b/lowram/api.h new file mode 100644 index 0000000..c255fd1 --- /dev/null +++ b/lowram/api.h @@ -0,0 +1,98 @@ +#ifndef API_H +#define API_H + +#include +#include + +#define pqcrystals_dilithium2_PUBLICKEYBYTES 1312 +#define pqcrystals_dilithium2_SECRETKEYBYTES 2560 +#define pqcrystals_dilithium2_BYTES 2420 + +#define pqcrystals_dilithium2_lowram_PUBLICKEYBYTES pqcrystals_dilithium2_PUBLICKEYBYTES +#define pqcrystals_dilithium2_lowram_SECRETKEYBYTES pqcrystals_dilithium2_SECRETKEYBYTES +#define pqcrystals_dilithium2_lowram_BYTES pqcrystals_dilithium2_BYTES + +int pqcrystals_dilithium2_lowram_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium2_lowram_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_lowram(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *sk); + +int pqcrystals_dilithium2_lowram_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *pk); + +int pqcrystals_dilithium2_lowram_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *pk); + +#define pqcrystals_dilithium3_PUBLICKEYBYTES 1952 +#define pqcrystals_dilithium3_SECRETKEYBYTES 4032 +#define pqcrystals_dilithium3_BYTES 3309 + +#define pqcrystals_dilithium3_lowram_PUBLICKEYBYTES pqcrystals_dilithium3_PUBLICKEYBYTES +#define pqcrystals_dilithium3_lowram_SECRETKEYBYTES pqcrystals_dilithium3_SECRETKEYBYTES +#define pqcrystals_dilithium3_lowram_BYTES pqcrystals_dilithium3_BYTES + +int pqcrystals_dilithium3_lowram_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium3_lowram_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_lowram(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *sk); + +int pqcrystals_dilithium3_lowram_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *pk); + +int pqcrystals_dilithium3_lowram_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *pk); + +#define pqcrystals_dilithium5_PUBLICKEYBYTES 2592 +#define pqcrystals_dilithium5_SECRETKEYBYTES 4896 +#define pqcrystals_dilithium5_BYTES 4627 + +#define pqcrystals_dilithium5_lowram_PUBLICKEYBYTES pqcrystals_dilithium5_PUBLICKEYBYTES +#define pqcrystals_dilithium5_lowram_SECRETKEYBYTES pqcrystals_dilithium5_SECRETKEYBYTES +#define pqcrystals_dilithium5_lowram_BYTES pqcrystals_dilithium5_BYTES + +int pqcrystals_dilithium5_lowram_keypair(uint8_t *pk, uint8_t *sk); + +int pqcrystals_dilithium5_lowram_signature(uint8_t *sig, size_t *siglen, + const uint8_t *m, size_t mlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_lowram(uint8_t *sm, size_t *smlen, + const uint8_t *m, size_t mlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *sk); + +int pqcrystals_dilithium5_lowram_verify(const uint8_t *sig, size_t siglen, + const uint8_t *m, size_t mlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *pk); + +int pqcrystals_dilithium5_lowram_open(uint8_t *m, size_t *mlen, + const uint8_t *sm, size_t smlen, + const uint8_t *ctx, size_t ctxlen, + const uint8_t *pk); + + +#endif diff --git a/lowram/config.h b/lowram/config.h new file mode 100644 index 0000000..ac0a571 --- /dev/null +++ b/lowram/config.h @@ -0,0 +1,27 @@ +#ifndef CONFIG_H +#define CONFIG_H + +//#define DILITHIUM_MODE 2 +// #define DILITHIUM_RANDOMIZED_SIGNING +//#define USE_RDPMC +//#define DBENCH + +#ifndef DILITHIUM_MODE +#define DILITHIUM_MODE 2 +#endif + +#if DILITHIUM_MODE == 2 +#define CRYPTO_ALGNAME "Dilithium2" +#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium2_lowram +#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium2_lowram_##s +#elif DILITHIUM_MODE == 3 +#define CRYPTO_ALGNAME "Dilithium3" +#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium3_lowram +#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium3_lowram_##s +#elif DILITHIUM_MODE == 5 +#define CRYPTO_ALGNAME "Dilithium5" +#define DILITHIUM_NAMESPACETOP pqcrystals_dilithium5_lowram +#define DILITHIUM_NAMESPACE(s) pqcrystals_dilithium5_lowram_##s +#endif + +#endif diff --git a/lowram/fips202.c b/lowram/fips202.c new file mode 120000 index 0000000..da2fa42 --- /dev/null +++ b/lowram/fips202.c @@ -0,0 +1 @@ +../ref/fips202.c \ No newline at end of file diff --git a/lowram/fips202.h b/lowram/fips202.h new file mode 120000 index 0000000..c759415 --- /dev/null +++ b/lowram/fips202.h @@ -0,0 +1 @@ +../ref/fips202.h \ No newline at end of file diff --git a/lowram/lowram.c b/lowram/lowram.c new file mode 100644 index 0000000..2a3716d --- /dev/null +++ b/lowram/lowram.c @@ -0,0 +1,1248 @@ +#include "lowram.h" +#include "fips202.h" +#include "symmetric.h" +#include "reduce.h" +#include "rounding.h" + +/* +This file implements functions aiding with the reduction of the memory +footprint of ML-DSA. + +The ideas are taken from the paper: + +Joppe W. Bos, Joost Renes, and Amber Sprenkels. 2022. Dilithium for Memory +Constrained Devices. In Progress in Cryptology - AFRICACRYPT 2022: 13th +International Conference on Cryptology in Africa, AFRICACRYPT 2022, Fes, +Morocco, July 18–20, 2022, Proceedings. Springer-Verlag, Berlin, Heidelberg, +217–235. https://doi.org/10.1007/978-3-031-17433-9_10 +*/ + +/************************************************* + * Name: unpack_pk_t1 + * + * Description: Unpack only t1 from pk. + * + * Arguments: - poly *t1: pointer to output t1 + * - const size_t idx: unpack n'th element from t1 + * - unsigned char pk[]: byte array containing bit-packed pk + **************************************************/ +void unpack_pk_t1(poly *t1, size_t idx, const unsigned char pk[CRYPTO_PUBLICKEYBYTES]) +{ + pk += SEEDBYTES; + polyt1_unpack(t1, pk + idx * POLYT1_PACKEDBYTES); +} + +/************************************************* +* Name: pack_sig_c +* +* Description: Pack only c into signature. +* +* Arguments: - uint8_t sig[]: byte array containing bit-packed signature +* - const uint8_t c: challenge +**************************************************/ +void pack_sig_c(uint8_t sig[CRYPTO_BYTES], + const uint8_t c[CTILDEBYTES]) +{ + unsigned int i; + + for (i = 0; i < CTILDEBYTES; ++i) + sig[i] = c[i]; + sig += CTILDEBYTES; +} + +/************************************************* +* Name: pack_sig_z +* +* Description: Pack only z into signature. +* +* Arguments: - uint8_t sig[]: byte array containing bit-packed signature +* - const polyvecl *z: z vector +**************************************************/ +void pack_sig_z(uint8_t sig[CRYPTO_BYTES], + const polyvecl *z) +{ + unsigned int i; + sig += CTILDEBYTES; + for (i = 0; i < L; ++i) + polyz_pack(sig + i * POLYZ_PACKEDBYTES, &z->vec[i]); +} + +/************************************************* +* Name: pack_sig_h +* +* Description: Pack only h into signature. +* +* Arguments: - unsigned char sig[]: byte array containing bit-packed signature +* - const poly *h_elem: element of h +* - const unsigned int idx: index of h in vector +* - unsigned int *hints_written: number of hints already written +**************************************************/ +void pack_sig_h(unsigned char sig[CRYPTO_BYTES], + const poly *h_elem, + const unsigned int idx, + unsigned int *hints_written) +{ + sig += CTILDEBYTES; + sig += L * POLYZ_PACKEDBYTES; + + // Encode h + for (unsigned int j = 0; j < N; j++) + { + if (h_elem->coeffs[j] != 0) + { + sig[*hints_written] = (uint8_t)j; + (*hints_written)++; + } + } + sig[OMEGA + idx] = (uint8_t)*hints_written; +} + +/************************************************* +* Name: pack_sig_h_zero +* +* Description: Pack only remaining zeros into signature. +* +* Arguments: - unsigned char sig[]: byte array containing bit-packed signature +* - unsigned int *hints_written: number of hints written +**************************************************/ +void pack_sig_h_zero(unsigned char sig[CRYPTO_BYTES], + unsigned int *hints_written) +{ + sig += CTILDEBYTES; + sig += L * POLYZ_PACKEDBYTES; + while (*hints_written < OMEGA) + { + sig[*hints_written] = 0; + (*hints_written)++; + } +} + +/************************************************* + * Name: unpack_sig_c + * + * Description: Unpack only c from signature sig = (z, h, c). + * + * Arguments: - poly *c: pointer to output challenge polynomial + * - const unsigned char sig[]: byte array containing + * bit-packed signature + * + * Returns 1 in case of malformed signature; otherwise 0. + **************************************************/ +int unpack_sig_c(uint8_t c[CTILDEBYTES], const unsigned char sig[CRYPTO_BYTES]) +{ + for (size_t i = 0; i < CTILDEBYTES; ++i) + c[i] = sig[i]; + sig += CTILDEBYTES; + return 0; +} + +/************************************************* + * Name: unpack_sig_z + * + * Description: Unpack only z from signature sig = (z, h, c). + * + * Arguments: - polyvecl *z: pointer to output vector z + * - const unsigned char sig[]: byte array containing + * bit-packed signature + * + * Returns 1 in case of malformed signature; otherwise 0. + **************************************************/ +int unpack_sig_z(polyvecl *z, const unsigned char sig[CRYPTO_BYTES]) +{ + sig += CTILDEBYTES; + for (size_t i = 0; i < L; ++i) + { + polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES); + } + return 0; +} + +/************************************************* + * Name: unpack_sig_h + * + * Description: Unpack only h from signature sig = (z, h, c). + * + * Arguments: - polyveck *h: pointer to output hint vector h + * - const unsigned char sig[]: byte array containing + * bit-packed signature + * + * Returns 1 in case of malformed signature; otherwise 0. + **************************************************/ +int unpack_sig_h(poly *h, size_t idx, const unsigned char sig[CRYPTO_BYTES]) +{ + sig += CTILDEBYTES; + sig += L * POLYZ_PACKEDBYTES; + + /* Decode h */ + size_t k = 0; + for (size_t i = 0; i < K; ++i) + { + for (size_t j = 0; j < N; ++j) + { + if (i == idx) + { + h->coeffs[j] = 0; + } + } + + if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) + { + return 1; + } + + for (size_t j = k; j < sig[OMEGA + i]; ++j) + { + /* Coefficients are ordered for strong unforgeability */ + if (j > k && sig[j] <= sig[j - 1]) + { + return 1; + } + if (i == idx) + { + h->coeffs[sig[j]] = 1; + } + } + + k = sig[OMEGA + i]; + } + + /* Extra indices are zero for strong unforgeability */ + for (size_t j = k; j < OMEGA; ++j) + { + if (sig[j]) + { + return 1; + } + } + return 0; +} + +/************************************************* + * Name: poly_challenge_compress + * + * Description: Compress the challenge polynomial. + * + * Arguments: - uint8_t c[]: byte array for holding the compressed challenge + * - const poly *cp: challenge polynomnial + * + **************************************************/ +void poly_challenge_compress(uint8_t c[68], const poly *cp) +{ + unsigned int i, pos; + uint64_t signs; + uint64_t mask; + /* Encode c */ + for (i = 0; i < 68; i++) + c[i] = 0; + signs = 0; + mask = 1; + pos = 0; + for (i = 0; i < N; ++i) + { + if (cp->coeffs[i] != 0) + { + c[pos++] = i; + if (cp->coeffs[i] == -1) + { + signs |= mask; + } + mask <<= 1; + } + } + + for (i = 0; i < 8; ++i) + { + c[60 + i] = (unsigned char)(signs >> 8 * i); + } +} + +/************************************************* + * Name: poly_challenge_decompress + * + * Description: Decompress the challenge polynomial. + * + * Arguments: - poly *cp: challenge polynomnial output + * - uint8_t c[]: byte array holding the compressed challenge + * + **************************************************/ +void poly_challenge_decompress(poly *cp, const uint8_t c[68]) +{ + unsigned int i; + unsigned pos; + uint64_t signs = 0; + for (i = 0; i < N; i++) + cp->coeffs[i] = 0; + for (i = 0; i < 8; i++) + { + signs |= ((uint64_t)c[60 + i]) << (8 * i); + } + + for (i = 0; i < TAU; i++) + { + pos = c[i]; + if (signs & 1) + { + cp->coeffs[pos] = -1; + } + else + { + cp->coeffs[pos] = 1; + } + signs >>= 1; + } +} + +/************************************************* + * Name: polyt0_unpack_idx + * + * Description: Unpack coefficient from t0 at specific index. + * + * Arguments: - const uint8_t *t0: packed t0 + * - unsigned idx: index of coefficient + * + **************************************************/ +static inline int32_t polyt0_unpack_idx(const uint8_t *t0, unsigned idx) +{ + int32_t coeff; + // 8 coefficients are packed in 13 bytes + t0 += 13 * (idx >> 3); + + if (idx % 8 == 0) + { + coeff = t0[0]; + coeff |= (uint32_t)t0[1] << 8; + } + else if (idx % 8 == 1) + { + coeff = t0[1] >> 5; + coeff |= (uint32_t)t0[2] << 3; + coeff |= (uint32_t)t0[3] << 11; + } + else if (idx % 8 == 2) + { + coeff = t0[3] >> 2; + coeff |= (uint32_t)t0[4] << 6; + } + else if (idx % 8 == 3) + { + coeff = t0[4] >> 7; + coeff |= (uint32_t)t0[5] << 1; + coeff |= (uint32_t)t0[6] << 9; + } + else if (idx % 8 == 4) + { + coeff = t0[6] >> 4; + coeff |= (uint32_t)t0[7] << 4; + coeff |= (uint32_t)t0[8] << 12; + } + else if (idx % 8 == 5) + { + coeff = t0[8] >> 1; + coeff |= (uint32_t)t0[9] << 7; + } + else if (idx % 8 == 6) + { + coeff = t0[9] >> 6; + coeff |= (uint32_t)t0[10] << 2; + coeff |= (uint32_t)t0[11] << 10; + } + else + { // (idx % 8 == 7) + coeff = t0[11] >> 3; + coeff |= (uint32_t)t0[12] << 5; + } + coeff &= 0x1FFF; + return (1 << (D - 1)) - coeff; +} + +/************************************************* + * Name: polyt1_unpack_idx + * + * Description: Unpack coefficient from t1 at specific index. + * + * Arguments: - const uint8_t *t1: packed t1 + * - unsigned idx: index of coefficient + * + **************************************************/ +static inline int32_t polyt1_unpack_idx(const uint8_t *t1, unsigned idx) +{ + int32_t coeff; + // 4 coefficients are packed in 5 bytes + t1 += 5 * (idx >> 2); + + if (idx % 4 == 0) + { + coeff = (t1[0] >> 0); + coeff |= ((uint32_t)t1[1] << 8); + } + else if (idx % 4 == 1) + { + coeff = (t1[1] >> 2); + coeff |= ((uint32_t)t1[2] << 6); + } + else if (idx % 4 == 2) + { + coeff = (t1[2] >> 4); + coeff |= ((uint32_t)t1[3] << 4); + } + else + { // (idx % 4 == 3) + coeff = (t1[3] >> 6); + coeff |= ((uint32_t)t1[4] << 2); + } + coeff &= 0x3FF; + return coeff; +} + +/************************************************* + * Name: poly_schoolbook + * + * Description: Schoolbook multiplication between challenge and t0. + * + * Arguments: - poly *c: Output polynomial + * - const uint8_t ccomp[]: First input, compressed challenge + * - const uint8_t *t0: Second input, packed t0 + * + **************************************************/ +void poly_schoolbook(poly *c, const uint8_t ccomp[68], const uint8_t *t0) +{ + unsigned i, j, idx; + uint64_t signs = 0; + for (i = 0; i < N; i++) + c->coeffs[i] = 0; + for (i = 0; i < 8; i++) + { + signs |= ((uint64_t)ccomp[60 + i]) << (8 * i); + } + + for (idx = 0; idx < TAU; idx++) + { + i = ccomp[idx]; + if (!(signs & 1)) + { + for (j = 0; i + j < N; j++) + { + c->coeffs[i + j] += polyt0_unpack_idx(t0, j); + } + for (j = N - i; j < N; j++) + { + c->coeffs[i + j - N] -= polyt0_unpack_idx(t0, j); + } + } + else + { + for (j = 0; i + j < N; j++) + { + c->coeffs[i + j] -= polyt0_unpack_idx(t0, j); + } + for (j = N - i; j < N; j++) + { + c->coeffs[i + j - N] += polyt0_unpack_idx(t0, j); + } + } + + signs >>= 1; + } +} + +/************************************************* + * Name: poly_schoolbook_t1 + * + * Description: Schoolbook multiplication between challenge and t1. + * + * Arguments: - poly *c: Output polynomial + * - const uint8_t ccomp[]: First input, compressed challenge + * - const uint8_t *t1: Second input, packed t1 + * + **************************************************/ +void poly_schoolbook_t1(poly *c, const uint8_t ccomp[68], const uint8_t *t1) +{ + unsigned i, j, idx; + uint64_t signs = 0; + for (i = 0; i < N; i++) + c->coeffs[i] = 0; + for (i = 0; i < 8; i++) + { + signs |= ((uint64_t)ccomp[60 + i]) << (8 * i); + } + + for (idx = 0; idx < TAU; idx++) + { + i = ccomp[idx]; + if (!(signs & 1)) + { + for (j = 0; i + j < N; j++) + { + c->coeffs[i + j] += (polyt1_unpack_idx(t1, j) << D); + } + for (j = N - i; j < N; j++) + { + c->coeffs[i + j - N] -= (polyt1_unpack_idx(t1, j) << D); + } + } + else + { + for (j = 0; i + j < N; j++) + { + c->coeffs[i + j] -= (polyt1_unpack_idx(t1, j) << D); + } + for (j = N - i; j < N; j++) + { + c->coeffs[i + j - N] += (polyt1_unpack_idx(t1, j) << D); + } + } + + signs >>= 1; + } +} + +/************************************************* + * Name: polyw_pack + * + * Description: Pack polynomial w. + * + * Arguments: - uint8_t buf[]: buffer to hold compressed w + * - poly *w: input polynomial + * + **************************************************/ +void polyw_pack(uint8_t buf[K * 768], poly *w) +{ + poly_reduce(w); + poly_caddq(w); + unsigned int i; + for (i = 0; i < N; i++) + { + buf[i * 3 + 0] = w->coeffs[i]; + buf[i * 3 + 1] = w->coeffs[i] >> 8; + buf[i * 3 + 2] = w->coeffs[i] >> 16; + } +} + +/************************************************* + * Name: polyw_unpack + * + * Description: Unpack polynomial w. + * + * Arguments: - poly *w: output polynomial + * - const uint8_t buf[]: buffer holding compressed w + * + **************************************************/ +void polyw_unpack(poly *w, const uint8_t buf[K * 768]) +{ + unsigned int i; + for (i = 0; i < N; i++) + { + w->coeffs[i] = buf[i * 3 + 0]; + w->coeffs[i] |= (int32_t)buf[i * 3 + 1] << 8; + w->coeffs[i] |= (int32_t)buf[i * 3 + 2] << 16; + } +} + +/************************************************* + * Name: polyw_add_idx + * + * Description: Add an integer to a coefficient in a compressed polynomial buffer. + * + * Arguments: - uint8_t buf[]: buffer holding compressed polynomial coefficients + * - int32_t a: integer to add to the coefficient + * - size_t i: index of the coefficient to modify + * + **************************************************/ +static void polyw_add_idx(uint8_t buf[K * 768], int32_t a, size_t i) +{ + int32_t coeff; + coeff = buf[i * 3 + 0]; + coeff |= (int32_t)buf[i * 3 + 1] << 8; + coeff |= (int32_t)buf[i * 3 + 2] << 16; + + coeff += a; + + coeff = freeze(coeff); + + buf[i * 3 + 0] = coeff; + buf[i * 3 + 1] = coeff >> 8; + buf[i * 3 + 2] = coeff >> 16; +} + +/************************************************* + * Name: polyw_sub + * + * Description: Subtract the coefficients of a polynomial from a compressed + polynomial buffer and store the result in another polynomial. + * + * Arguments: - poly *c: output polynomial to store the result + * - uint8_t buf[]: buffer holding compressed polynomial coefficients + * - poly *a: polynomial whose coefficients are to be subtracted + * from the buffer + * + **************************************************/ +void polyw_sub(poly *c, uint8_t buf[3 * 256], poly *a) +{ + int32_t coeff; + + for (size_t i = 0; i < N; i++) + { + coeff = buf[i * 3 + 0]; + coeff |= (int32_t)buf[i * 3 + 1] << 8; + coeff |= (int32_t)buf[i * 3 + 2] << 16; + + c->coeffs[i] = coeff - a->coeffs[i]; + } +} + +/************************************************* + * Name: highbits + * + * Description: Compute the high bits of an integer. + * + * Arguments: - int32_t a: input integer whose high bits are to be computed + * + * Returns the high bits of the input as the result. + **************************************************/ +static int32_t highbits(int32_t a) +{ + int32_t a1; + + a1 = (a + 127) >> 7; +#if GAMMA2 == (Q - 1) / 32 + a1 = (a1 * 1025 + (1 << 21)) >> 22; + a1 &= 15; +#elif GAMMA2 == (Q - 1) / 88 + a1 = (a1 * 11275 + (1 << 23)) >> 24; + a1 ^= ((43 - a1) >> 31) & a1; +#endif + + return a1; +} + +/************************************************* + * Name: poly_highbits + * + * Description: Compute the high bits of each coefficient in a polynomial. + * + * Arguments: - poly *a1: output polynomial to store the high bits of the coefficients + * - const poly *a: input polynomial whose coefficients' high bits + * are to be computed + * + **************************************************/ +void poly_highbits(poly *a1, const poly *a) +{ + unsigned int i; + + for (i = 0; i < N; ++i) + a1->coeffs[i] = highbits(a->coeffs[i]); +} + +/************************************************* + * Name: lowbits + * + * Description: Compute the low bits of an integer. + * + * Arguments: - int32_t a: input integer whose low bits are to be computed + * + * Returns the low bits of the input as the result. + **************************************************/ +static int32_t lowbits(int32_t a) +{ + int32_t a1; + int32_t a0; + + a1 = (a + 127) >> 7; +#if GAMMA2 == (Q - 1) / 32 + a1 = (a1 * 1025 + (1 << 21)) >> 22; + a1 &= 15; +#elif GAMMA2 == (Q - 1) / 88 + a1 = (a1 * 11275 + (1 << 23)) >> 24; + a1 ^= ((43 - a1) >> 31) & a1; +#endif + + a0 = a - a1 * 2 * GAMMA2; + a0 -= (((Q - 1) / 2 - a0) >> 31) & Q; + return a0; +} + +/************************************************* + * Name: poly_lowbits + * + * Description: Compute the low bits of each coefficient in a polynomial. + * + * Arguments: - poly *a0: output polynomial to store the low bits of the coefficients + * - const poly *a: input polynomial whose coefficients' low bits + are to be computed + * + **************************************************/ +void poly_lowbits(poly *a0, const poly *a) +{ + unsigned int i; + + for (i = 0; i < N; ++i) + a0->coeffs[i] = lowbits(a->coeffs[i]); +} + +/************************************************* + * Name: unpack_sk_s1 + * + * Description: Unpack only s1 from the secret key into a small polynomial. + * + * Arguments: - smallpoly *a: output small polynomial to store the unpacked data + * - const uint8_t *sk: input secret key buffer + * - size_t idx: index specifying the polynomial to unpack + * + **************************************************/ +void unpack_sk_s1(smallpoly *a, const uint8_t *sk, size_t idx) +{ + small_polyeta_unpack(a, sk + 2 * SEEDBYTES + TRBYTES + idx * POLYETA_PACKEDBYTES); +} + +/************************************************* + * Name: unpack_sk_s2 + * + * Description: Unpack only s2 from the secret key into a small polynomial. + * + * Arguments: - smallpoly *a: output small polynomial to store the unpacked data + * - const uint8_t *sk: input secret key buffer + * - size_t idx: index specifying the polynomial to unpack + * + **************************************************/ +void unpack_sk_s2(smallpoly *a, const uint8_t *sk, size_t idx) +{ + small_polyeta_unpack(a, sk + 2 * SEEDBYTES + TRBYTES + L * POLYETA_PACKEDBYTES + idx * POLYETA_PACKEDBYTES); +} + +/* Note: Buffer size can potentially be increased */ +#define POLY_UNIFORM_BUFFERSIZE 3 +/************************************************* + * Name: poly_uniform_pointwise_montgomery_polywadd_lowram + * + * Description: Generate a uniform polynomial using a seed and nonce, + * perform pointwise multiplication with another polynomial, + * and add the result to a compressed polynomial buffer. + * + * Arguments: - uint8_t wcomp[]: buffer to store the compressed polynomial + * coefficients + * - poly *b: input polynomial for pointwise multiplication + * - const uint8_t seed[]: seed for SHAKE128 + * - uint16_t nonce: nonce for SHAKE128 + * - keccak_state *state: state for the SHAKE128 + * + **************************************************/ +void poly_uniform_pointwise_montgomery_polywadd_lowram(uint8_t wcomp[3 * N], poly *b, const uint8_t seed[SEEDBYTES], uint16_t nonce, keccak_state *state) +{ + int32_t t; + uint8_t buf[POLY_UNIFORM_BUFFERSIZE * 3]; + { + size_t ctr = 0; + stream128_init(state, seed, nonce); + + do + { + shake128_squeeze(buf, sizeof buf, state); + + for (size_t pos = 0; pos < sizeof buf && ctr < N; pos += 3) + { + t = buf[pos]; + t |= (uint32_t)buf[pos + 1] << 8; + t |= (uint32_t)buf[pos + 2] << 16; + t &= 0x7FFFFF; + + if (t < Q) + { + t = montgomery_reduce((int64_t)t * b->coeffs[ctr]); + polyw_add_idx(wcomp, t, ctr); + ctr++; + } + } + } while (ctr < N); + } +} + +#define POLY_UNIFORM_GAMMA1_BUFFERSIZE 1 +#if GAMMA1 == (1 << 17) +#define POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS (POLY_UNIFORM_GAMMA1_BUFFERSIZE * 4) +#define POLY_UNIFORM_GAMMA1_BUFFERSIZE_BYTES (POLY_UNIFORM_GAMMA1_BUFFERSIZE * 9) +#elif GAMMA1 == (1 << 19) +#define POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS (POLY_UNIFORM_GAMMA1_BUFFERSIZE * 2) +#define POLY_UNIFORM_GAMMA1_BUFFERSIZE_BYTES (POLY_UNIFORM_GAMMA1_BUFFERSIZE * 5) +#endif + +/************************************************* + * Name: polyz_unpack_inplace + * + * Description: Unpack a compressed polynomial z in place. + * + * Arguments: - int32_t *r: pointer to the array where the unpacked polynomial + * coefficients will be stored which is also used as + * the input + * + **************************************************/ +static void polyz_unpack_inplace(int32_t *r) +{ + uint8_t *a = (uint8_t *)r; + + unsigned int i, j; +#if GAMMA1 == (1 << 17) + for (j = 0; j < POLY_UNIFORM_GAMMA1_BUFFERSIZE; ++j) + { + i = POLY_UNIFORM_GAMMA1_BUFFERSIZE - 1 - j; + int32_t t0; + + r[4 * i + 3] = a[9 * i + 6] >> 6; + r[4 * i + 3] |= (uint32_t)a[9 * i + 7] << 2; + r[4 * i + 3] |= (uint32_t)a[9 * i + 8] << 10; + r[4 * i + 3] &= 0x3FFFF; + + r[4 * i + 2] = a[9 * i + 4] >> 4; + r[4 * i + 2] |= (uint32_t)a[9 * i + 5] << 4; + r[4 * i + 2] |= (uint32_t)a[9 * i + 6] << 12; + r[4 * i + 2] &= 0x3FFFF; + + r[4 * i + 1] = (uint32_t)a[9 * i + 4] << 14; + r[4 * i + 1] |= a[9 * i + 2] >> 2; + r[4 * i + 1] |= (uint32_t)a[9 * i + 3] << 6; + r[4 * i + 1] &= 0x3FFFF; + + t0 = a[9 * i + 0]; + t0 |= (uint32_t)a[9 * i + 1] << 8; + t0 |= (uint32_t)a[9 * i + 2] << 16; + t0 &= 0x3FFFF; + + r[4 * i + 0] = GAMMA1 - t0; + r[4 * i + 1] = GAMMA1 - r[4 * i + 1]; + r[4 * i + 2] = GAMMA1 - r[4 * i + 2]; + r[4 * i + 3] = GAMMA1 - r[4 * i + 3]; + } +#elif GAMMA1 == (1 << 19) + for (j = 0; j < POLY_UNIFORM_GAMMA1_BUFFERSIZE; ++j) + { + i = POLY_UNIFORM_GAMMA1_BUFFERSIZE - 1 - j; + int32_t tmp0, tmp1; + + tmp0 = a[5 * i + 2] >> 4; + tmp0 |= (uint32_t)a[5 * i + 3] << 4; + tmp0 |= (uint32_t)a[5 * i + 4] << 12; + tmp0 &= 0xFFFFF; + + tmp1 = a[5 * i + 0]; + tmp1 |= (uint32_t)a[5 * i + 1] << 8; + tmp1 |= (uint32_t)a[5 * i + 2] << 16; + tmp1 &= 0xFFFFF; + + r[2 * i + 0] = GAMMA1 - tmp1; + r[2 * i + 1] = GAMMA1 - tmp0; + } +#endif +} + +/************************************************* + * Name: poly_uniform_gamma1_lowram + * + * Description: Generate a uniform polynomial with coefficients in the range [-GAMMA1, GAMMA1]. + * + * Arguments: - poly *a: output polynomial to store the generated coefficients + * - const uint8_t seed[]: seed for SHAKE256 + * - uint16_t nonce: nonce for SHAKE256 + * - keccak_state *state: state for SHAKE256 + * + **************************************************/ +void poly_uniform_gamma1_lowram(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce, keccak_state *state) +{ + int32_t buf[POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS]; + + stream256_init(state, seed, nonce); + for (size_t i = 0; i < N / POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS; i++) + { + shake256_squeeze((uint8_t *)buf, POLY_UNIFORM_GAMMA1_BUFFERSIZE_BYTES, state); + polyz_unpack_inplace(buf); + + for (size_t j = 0; j < POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS; j++) + { + a->coeffs[i * POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS + j] = buf[j]; + } + } +} + +/************************************************* + * Name: poly_uniform_gamma1_add_lowram + * + * Description: Generate a uniform polynomial with coefficients in the range [-GAMMA1, GAMMA1], + * and add it to another polynomial. + * + * Arguments: - poly *a: output polynomial to store the result + * - poly *b: input polynomial whose coefficients are to be added + * - const uint8_t seed[]: seed for SHAKE256 + * - uint16_t nonce: nonce for SHAKE256 + * - keccak_state *state: state for SHAKE256 + * + **************************************************/ +void poly_uniform_gamma1_add_lowram(poly *a, poly *b, const uint8_t seed[CRHBYTES], uint16_t nonce, keccak_state *state) +{ + int32_t buf[POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS]; + + stream256_init(state, seed, nonce); + for (size_t i = 0; i < N / POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS; i++) + { + shake256_squeeze((uint8_t *)buf, POLY_UNIFORM_GAMMA1_BUFFERSIZE_BYTES, state); + polyz_unpack_inplace(buf); + + for (size_t j = 0; j < POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS; j++) + { + a->coeffs[i * POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS + j] = buf[j] + b->coeffs[i * POLY_UNIFORM_GAMMA1_BUFFERSIZE_COEFFS + j]; + } + } +} + +/************************************************* +* Name: make_hint +* +* Description: Compute hint bit indicating whether the low bits of the +* input element overflow into the high bits. +* +* Arguments: - int32_t a0: low bits of input element +* - int32_t a1: high bits of input element +* +* Returns 1 if overflow. +**************************************************/ +static inline int32_t make_hint_lowram(int32_t z, int32_t r) +{ + int32_t r1, v1; + + r1 = highbits(r); + v1 = highbits(r + z); + + if (r1 != v1) + return 1; + return 0; +} + +/************************************************* + * Name: poly_make_hint_lowram + * + * Description: Generate hint polynomial. + * + * Arguments: - poly *a: output polynomial to store the generated hints + * - poly *t: input polynomial + * - uint8_t w[]: buffer holding compressed polynomial coefficients + * + * Returns the number of hints generated. + **************************************************/ +size_t poly_make_hint_lowram(poly *a, poly *t, uint8_t w[768]) +{ + int32_t coeff; + size_t hints_n = 0; + for (size_t i = 0; i < N; i++) + { + // unpack coeff from w (contains w - cs2) + coeff = w[i * 3 + 0]; + coeff |= (int32_t)w[i * 3 + 1] << 8; + coeff |= (int32_t)w[i * 3 + 2] << 16; + + // compute w - cs2 + c*t0 + coeff = coeff + t->coeffs[i]; + + a->coeffs[i] = make_hint_lowram(-t->coeffs[i], coeff); + if (a->coeffs[i] == 1) + { + hints_n++; + } + } + return hints_n; +} + +/************************************************* + * Name: unpack_sig_h_indices + * + * Description: Unpack only h from signature sig = (c, z, h). + * + * Arguments: - polyveck *h: pointer to output hint vector h + * - const unsigned char sig[]: byte array containing + * bit-packed signature + * + * Returns 1 in case of malformed signature; otherwise 0. + **************************************************/ +int unpack_sig_h_indices(uint8_t h_i[OMEGA], unsigned int *number_of_hints, unsigned int idx, const unsigned char sig[CRYPTO_BYTES]) +{ + sig += L * POLYZ_PACKEDBYTES; + sig += CTILDEBYTES; + /* Decode h */ + unsigned int k = 0; + unsigned int hidx = 0; + + if (idx > 0) + { + k = sig[OMEGA + (idx - 1)]; + } + + if (sig[OMEGA + idx] < k || sig[OMEGA + idx] > OMEGA) + { + return 1; + } + + for (unsigned int j = k; j < sig[OMEGA + idx]; ++j) + { + /* Coefficients are ordered for strong unforgeability */ + if (j > k && sig[j] <= sig[j - 1]) + { + return 1; + } + h_i[hidx++] = sig[j]; + } + + *number_of_hints = hidx; + + k = sig[OMEGA + (K - 1)]; + /* Extra indices are zero for strong unforgeability */ + for (unsigned int j = k; j < OMEGA; ++j) + { + if (sig[j]) + { + return 1; + } + } + return 0; +} + +/************************************************* + * Name: poly_use_hint_lowram + * + * Description: Use hint polynomial to correct the high bits of a polynomial. + * + * Arguments: - poly *b: pointer to output polynomial with corrected high bits + * - const poly *a: pointer to input polynomial + * - const poly *h: pointer to input hint polynomial + **************************************************/ +void poly_use_hint_lowram(poly *b, const poly *a, uint8_t h_i[OMEGA], unsigned int number_of_hints) +{ + unsigned int i; + unsigned int in_list; + + for (i = 0; i < N; ++i) + { + in_list = 0; + for (size_t hidx = 0; hidx < number_of_hints; hidx++) + { + if (i == h_i[hidx]) + { + in_list = 1; + break; + } + } + if (in_list) + { + b->coeffs[i] = use_hint(a->coeffs[i], 1); + } + else + { + b->coeffs[i] = use_hint(a->coeffs[i], 0); + } + } +} + +/************************************************* + * Name: pack_pk_rho + * + * Description: Bit-pack only rho in public key pk = (rho, t1). + * + * Arguments: - unsigned char pk[]: output byte array + * - const unsigned char rho[]: byte array containing rho + **************************************************/ +void pack_pk_rho(unsigned char pk[CRYPTO_PUBLICKEYBYTES], + const unsigned char rho[SEEDBYTES]) +{ + for (unsigned int i = 0; i < SEEDBYTES; ++i) + { + pk[i] = rho[i]; + } +} + +/************************************************* + * Name: pack_pk_t1 + * + * Description: Bit-pack only the t1 elem at idx in public key pk = (rho, t1). + * + * Arguments: - unsigned char pk[]: output byte array + * - const polyveck *t1: pointer to vector t1 + * - const unsigned int idx: index to the elem to pack + **************************************************/ +void pack_pk_t1(unsigned char pk[CRYPTO_PUBLICKEYBYTES], + const poly *t1, + const unsigned int idx) +{ + pk += SEEDBYTES; + polyt1_pack(pk + idx * POLYT1_PACKEDBYTES, t1); +} + +/************************************************* + * Name: pack_sk_s1 + * + * Description: Bit-pack only some element of s1 in secret key sk = (rho, key, tr, s1, s2, t0). + * + * Arguments: - unsigned char sk[]: output byte array + * - const poly *s1_elem: pointer to vector element idx in s1 + * - const unisgned int idx: index to the element of s1 that should + * be packed + **************************************************/ +void pack_sk_s1(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const poly *s1_elem, + const unsigned int idx) +{ + sk += 2 * SEEDBYTES + TRBYTES; + polyeta_pack(sk + idx * POLYETA_PACKEDBYTES, s1_elem); +} + +/************************************************* + * Name: pack_sk_s2 + * + * Description: Bit-pack only some element of s2 in secret key sk = (rho, key, tr, s1, s2, t0). + * + * Arguments: - unsigned char sk[]: output byte array + * - const poly *s2_elem: pointer to vector element idx in s2 + * - const unsigned int idx: index to the element of s1 that should + * be packed + **************************************************/ +void pack_sk_s2(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const poly *s2_elem, + const unsigned int idx) +{ + sk += 2 * SEEDBYTES + TRBYTES + L * POLYETA_PACKEDBYTES; + polyeta_pack(sk + idx * POLYETA_PACKEDBYTES, s2_elem); +} + +/************************************************* + * Name: pack_sk_t0 + * + * Description: Bit-pack only some element of t0 in secret key sk = (rho, key, tr, s1, s2, t0). + * + * Arguments: - unsigned char sk[]: output byte array + * - const poly *t0_elem: pointer to vector element idx in s2 + * - const unsigned int idx: index to the element of s1 that should + * be packed + **************************************************/ +void pack_sk_t0(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const poly *t0_elem, + const unsigned int idx) +{ + sk += 2 * SEEDBYTES + TRBYTES + L * POLYETA_PACKEDBYTES + K * POLYETA_PACKEDBYTES; + polyt0_pack(sk + idx * POLYT0_PACKEDBYTES, t0_elem); +} + +/************************************************* + * Name: pack_sk_rho + * + * Description: Bit-pack only rho in secret key sk = (rho, key, tr, s1, s2, t0). + * + * Arguments: - unsigned char sk[]: output byte array + * - const unsigned char rho[]: byte array containing rho + **************************************************/ +void pack_sk_rho(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const unsigned char rho[SEEDBYTES]) +{ + for (unsigned int i = 0; i < SEEDBYTES; ++i) + { + sk[i] = rho[i]; + } +} + +/************************************************* + * Name: pack_sk_key + * + * Description: Bit-pack only key in secret key sk = (rho, key, tr, s1, s2, t0). + * + * Arguments: - unsigned char sk[]: output byte array + * - const unsigned char key[]: byte array containing key + **************************************************/ +void pack_sk_key(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const unsigned char key[SEEDBYTES]) +{ + sk += SEEDBYTES; + for (unsigned int i = 0; i < SEEDBYTES; ++i) + { + sk[i] = key[i]; + } +} + +/************************************************* + * Name: pack_sk_tr + * + * Description: Bit-pack only tr in secret key sk = (rho, key, tr, s1, s2, t0). + * + * Arguments: - unsigned char sk[]: output byte array + * - const unsigned char tr[]: byte array containing tr + **************************************************/ +void pack_sk_tr(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const unsigned char tr[TRBYTES]) +{ + sk += 2 * SEEDBYTES; + for (unsigned int i = 0; i < TRBYTES; ++i) + { + sk[i] = tr[i]; + } +} + +/************************************************* + * Name: challenge + * + * Description: Implementation of H. Samples polynomial with TAU nonzero + * coefficients in {-1,1} using the output stream of + * SHAKE256(seed). Memory optimized. + * + * Arguments: - poly *c: pointer to output polynomial + * - const uint8_t mu[]: byte array containing seed of length SEEDBYTES + **************************************************/ +#define CHALLENGE_lowram_BUF_SIZE 8 +void poly_challenge_lowram(poly *c, const uint8_t seed[CTILDEBYTES]) +{ + unsigned int i, b, pos; + uint64_t signs; + uint8_t buf[CHALLENGE_lowram_BUF_SIZE]; + keccak_state state; + + shake256_init(&state); + shake256_absorb(&state, seed, CTILDEBYTES); + shake256_finalize(&state); + shake256_squeeze(buf, CHALLENGE_lowram_BUF_SIZE, &state); + signs = 0; + for (i = 0; i < 8; ++i) + { + signs |= (uint64_t)buf[i] << 8 * i; + } + pos = 8; + + for (i = 0; i < N; ++i) + c->coeffs[i] = 0; + for (i = N - TAU; i < N; ++i) + { + do + { + if (pos >= CHALLENGE_lowram_BUF_SIZE) + { + shake256_squeeze(buf, CHALLENGE_lowram_BUF_SIZE, &state); + pos = 0; + } + + b = buf[pos++]; + } while (b > i); + + c->coeffs[i] = c->coeffs[b]; + c->coeffs[b] = 1 - 2 * (signs & 1); + signs >>= 1; + } +} + +/************************************************* + * Name: poly_pointwise_acc_montgomery + * + * Description: Pointwise multiplication of polynomials in NTT domain + * representation and multiplication of resulting polynomial + * by 2^{-32} with accumulation. + * + * Arguments: - poly *c: pointer to output/accumulator polynomial + * - const poly *a: pointer to first input polynomial + * - const poly *b: pointer to second input polynomial + **************************************************/ +void poly_pointwise_acc_montgomery(poly *c, const poly *a, const poly *b) +{ + unsigned int i; + + for (i = 0; i < N; ++i) + c->coeffs[i] += montgomery_reduce((int64_t)a->coeffs[i] * b->coeffs[i]); +} diff --git a/lowram/lowram.h b/lowram/lowram.h new file mode 100644 index 0000000..d330d25 --- /dev/null +++ b/lowram/lowram.h @@ -0,0 +1,90 @@ +#ifndef STACK_H +#define STACK_H + +#include "poly.h" +#include "smallpoly.h" +#include +#include +#include "fips202.h" + +#define unpack_pk_t1 DILITHIUM_NAMESPACE(unpack_pk_t1) +void unpack_pk_t1(poly *t1, size_t idx, const unsigned char pk[CRYPTO_PUBLICKEYBYTES]); +#define unpack_sig_z DILITHIUM_NAMESPACE(unpack_sig_z) +int unpack_sig_z(polyvecl *z, const unsigned char sig[CRYPTO_BYTES]); +#define unpack_sig_h DILITHIUM_NAMESPACE(unpack_sig_h) +int unpack_sig_h(poly *h, size_t idx, const unsigned char sig[CRYPTO_BYTES]); +#define unpack_sig_c DILITHIUM_NAMESPACE(unpack_sig_c) +int unpack_sig_c(uint8_t c[CTILDEBYTES], const unsigned char sig[CRYPTO_BYTES]); + + +#define pack_sig_c DILITHIUM_NAMESPACE(pack_sig_c) +void pack_sig_c(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES]); +#define pack_sig_z DILITHIUM_NAMESPACE(pack_sig_z) +void pack_sig_z(uint8_t sig[CRYPTO_BYTES], const polyvecl *z); +#define pack_sig_h DILITHIUM_NAMESPACE(pack_sig_h) +void pack_sig_h(unsigned char sig[CRYPTO_BYTES], + const poly *h_elem, + const unsigned int idx, + unsigned int *hints_written); +#define pack_sig_h_zero DILITHIUM_NAMESPACE(pack_sig_h_zero) +void pack_sig_h_zero(unsigned char sig[CRYPTO_BYTES], + unsigned int *hints_written); + +void poly_challenge_compress(uint8_t c[68], const poly *cp); +void poly_challenge_decompress(poly *cp, const uint8_t c[68]); + + +void poly_schoolbook(poly *c, const uint8_t ccomp[68], const uint8_t *t0); +void poly_schoolbook_t1(poly *c, const uint8_t ccomp[68], const uint8_t *t1); +void polyw_pack(uint8_t buf[K*768], poly *w); +void polyw_unpack(poly *w, const uint8_t buf[K*768]); + +void polyw_add(uint8_t buf[3*256], poly *p); +void polyw_sub(poly* c, uint8_t buf[3*256], poly *a); + +void poly_highbits(poly *a1, const poly *a); +void poly_lowbits(poly *a0, const poly *a); + +void unpack_sk_s1(smallpoly *a, const uint8_t *sk, size_t idx); +void unpack_sk_s2(smallpoly *a, const uint8_t *sk, size_t idx); + +void poly_uniform_pointwise_montgomery_polywadd_lowram(uint8_t wcomp[3*N], poly *b, const uint8_t seed[SEEDBYTES], uint16_t nonce, keccak_state *state); +void poly_uniform_gamma1_lowram(poly *a, const uint8_t seed[CRHBYTES], uint16_t nonce, keccak_state *state); +void poly_uniform_gamma1_add_lowram(poly *a, poly *b, const uint8_t seed[CRHBYTES], uint16_t nonce, keccak_state *state); +void poly_challenge_lowram(poly *c, const uint8_t seed[CTILDEBYTES]); + +size_t poly_make_hint_lowram(poly *a, poly *t, uint8_t w[768]); +int unpack_sig_h_indices(uint8_t h_i[OMEGA], unsigned int * number_of_hints, unsigned int idx, const unsigned char sig[CRYPTO_BYTES]); +void poly_use_hint_lowram(poly *b, const poly *a, uint8_t h_i[OMEGA], unsigned int number_of_hints); + +void pack_pk_rho(unsigned char pk[CRYPTO_PUBLICKEYBYTES], + const unsigned char rho[SEEDBYTES]); + +void pack_pk_t1(unsigned char pk[CRYPTO_PUBLICKEYBYTES], + const poly *t1, + const unsigned int idx); + +void pack_sk_s1(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const poly *s1_elem, + const unsigned int idx); + +void pack_sk_s2(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const poly *s2_elem, + const unsigned int idx); + +void pack_sk_t0(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const poly *t0_elem, + const unsigned int idx); + +void pack_sk_rho(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const unsigned char rho[SEEDBYTES]); + +void pack_sk_key(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const unsigned char key[SEEDBYTES]); + +void pack_sk_tr(unsigned char sk[CRYPTO_SECRETKEYBYTES], + const unsigned char tr[TRBYTES]); + +#define poly_pointwise_acc_montgomery DILITHIUM_NAMESPACE(poly_pointwise_acc_montgomery) +void poly_pointwise_acc_montgomery(poly *c, const poly *a, const poly *b); +#endif diff --git a/lowram/ntt.c b/lowram/ntt.c new file mode 120000 index 0000000..b8583dd --- /dev/null +++ b/lowram/ntt.c @@ -0,0 +1 @@ +../ref/ntt.c \ No newline at end of file diff --git a/lowram/ntt.h b/lowram/ntt.h new file mode 120000 index 0000000..87cb9c3 --- /dev/null +++ b/lowram/ntt.h @@ -0,0 +1 @@ +../ref/ntt.h \ No newline at end of file diff --git a/lowram/packing.c b/lowram/packing.c new file mode 120000 index 0000000..e77fac9 --- /dev/null +++ b/lowram/packing.c @@ -0,0 +1 @@ +../ref/packing.c \ No newline at end of file diff --git a/lowram/packing.h b/lowram/packing.h new file mode 120000 index 0000000..d27a8e9 --- /dev/null +++ b/lowram/packing.h @@ -0,0 +1 @@ +../ref/packing.h \ No newline at end of file diff --git a/lowram/params.h b/lowram/params.h new file mode 120000 index 0000000..53133cc --- /dev/null +++ b/lowram/params.h @@ -0,0 +1 @@ +../ref/params.h \ No newline at end of file diff --git a/lowram/poly.c b/lowram/poly.c new file mode 120000 index 0000000..1f747c9 --- /dev/null +++ b/lowram/poly.c @@ -0,0 +1 @@ +../ref/poly.c \ No newline at end of file diff --git a/lowram/poly.h b/lowram/poly.h new file mode 120000 index 0000000..7a4cdf0 --- /dev/null +++ b/lowram/poly.h @@ -0,0 +1 @@ +../ref/poly.h \ No newline at end of file diff --git a/lowram/polyvec.c b/lowram/polyvec.c new file mode 120000 index 0000000..dc0efe3 --- /dev/null +++ b/lowram/polyvec.c @@ -0,0 +1 @@ +../ref/polyvec.c \ No newline at end of file diff --git a/lowram/polyvec.h b/lowram/polyvec.h new file mode 120000 index 0000000..59c77ec --- /dev/null +++ b/lowram/polyvec.h @@ -0,0 +1 @@ +../ref/polyvec.h \ No newline at end of file diff --git a/lowram/randombytes.c b/lowram/randombytes.c new file mode 120000 index 0000000..59a42a5 --- /dev/null +++ b/lowram/randombytes.c @@ -0,0 +1 @@ +../ref/randombytes.c \ No newline at end of file diff --git a/lowram/randombytes.h b/lowram/randombytes.h new file mode 120000 index 0000000..055e443 --- /dev/null +++ b/lowram/randombytes.h @@ -0,0 +1 @@ +../ref/randombytes.h \ No newline at end of file diff --git a/lowram/reduce.c b/lowram/reduce.c new file mode 120000 index 0000000..3300e09 --- /dev/null +++ b/lowram/reduce.c @@ -0,0 +1 @@ +../ref/reduce.c \ No newline at end of file diff --git a/lowram/reduce.h b/lowram/reduce.h new file mode 120000 index 0000000..d429fe7 --- /dev/null +++ b/lowram/reduce.h @@ -0,0 +1 @@ +../ref/reduce.h \ No newline at end of file diff --git a/lowram/rounding.c b/lowram/rounding.c new file mode 120000 index 0000000..970bc20 --- /dev/null +++ b/lowram/rounding.c @@ -0,0 +1 @@ +../ref/rounding.c \ No newline at end of file diff --git a/lowram/rounding.h b/lowram/rounding.h new file mode 120000 index 0000000..d26f3b1 --- /dev/null +++ b/lowram/rounding.h @@ -0,0 +1 @@ +../ref/rounding.h \ No newline at end of file diff --git a/lowram/sign.c b/lowram/sign.c new file mode 100644 index 0000000..0849272 --- /dev/null +++ b/lowram/sign.c @@ -0,0 +1,517 @@ +#include +#include "params.h" +#include "sign.h" +#include "packing.h" +#include "polyvec.h" +#include "poly.h" +#include "randombytes.h" +#include "symmetric.h" +#include "fips202.h" +#include "smallpoly.h" +#include "lowram.h" + +#include "smallntt.h" + +/************************************************* +* Name: crypto_sign_keypair +* +* Description: Generates public and private key. +* +* Arguments: - uint8_t *pk: pointer to output public key (allocated +* array of CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (allocated +* array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { + unsigned int i, j; + uint8_t seedbuf[2*SEEDBYTES + CRHBYTES]; + const uint8_t *rho, *rhoprime, *key; + + poly tA, tB; + + union { + uint8_t tr[TRBYTES]; + keccak_state s256; + poly tC; + } data; + + keccak_state *s256 = &data.s256; + uint8_t *tr = data.tr; + poly *tC = &data.tC; + + /* Get randomness for rho, rhoprime and key */ + randombytes(seedbuf, SEEDBYTES); + seedbuf[SEEDBYTES+0] = K; + seedbuf[SEEDBYTES+1] = L; + shake256_init(s256); + shake256_absorb(s256, seedbuf, SEEDBYTES + 2); + shake256_finalize(s256); + shake256_squeeze(seedbuf, 2*SEEDBYTES + CRHBYTES, s256); + + rho = seedbuf; + rhoprime = rho + SEEDBYTES; + key = rhoprime + CRHBYTES; + + pack_sk_rho(sk, rho); + pack_sk_key(sk, key); + pack_pk_rho(pk, rho); + + /* Matrix-vector multiplication */ + for (i = 0; i < K; i++) + { + /* Expand part of s1 */ + poly_uniform_eta(tC, rhoprime, 0); + if (i == 0) + { + pack_sk_s1(sk, tC, 0); + } + poly_ntt(tC); + /* expand part of the matrix */ + poly_uniform(&tB, rho, (i << 8) + 0); + /* partial matrix-vector multiplication */ + poly_pointwise_montgomery(&tA, &tB, tC); + for(j = 1; j < L; j++) + { + /* Expand part of s1 */ + poly_uniform_eta(tC, rhoprime, j); + if (i == 0) + { + pack_sk_s1(sk, tC, j); + } + poly_ntt(tC); + poly_uniform(&tB, rho, (i << 8) + j); + poly_pointwise_acc_montgomery(&tA, &tB, tC); + } + + poly_reduce(&tA); + poly_invntt_tomont(&tA); + + /* Add error vector s2 */ + /* Sample short vector s2 */ + poly_uniform_eta(&tB, rhoprime, L + i); + pack_sk_s2(sk, &tB, i); + poly_add(&tA, &tA, &tB); + + /* Compute t{0,1} */ + poly_caddq(&tA); + poly_power2round(tC, &tB, &tA); + pack_sk_t0(sk, &tB, i); + pack_pk_t1(pk, tC, i); + + } + + /* Compute H(rho, t1) and write secret key */ + shake256(tr, TRBYTES, pk, CRYPTO_PUBLICKEYBYTES); + pack_sk_tr(sk, tr); + + return 0; +} + + +/************************************************* +* Name: crypto_sign_signature +* +* Description: Computes signature. +* +* Arguments: - uint8_t *sig: pointer to output signature (of length CRYPTO_BYTES) +* - size_t *siglen: pointer to output length of signature +* - uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - uint8_t *ctx: pointer to context string +* - size_t ctxlen: length of context string +* - uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) or -1 (context string too long) +**************************************************/ +int crypto_sign_signature(uint8_t *sig, + size_t *siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *ctx, + size_t ctxlen, + const uint8_t *sk) +{ + uint8_t buf[2 * CRHBYTES]; + uint8_t *mu, *rhoprime, *rnd; + const uint8_t *rho, *tr, *key; + uint16_t nonce = 0; + uint8_t wcomp[K][768]; + uint8_t ccomp[68]; + + if(ctxlen > 255) + return -1; + + union { + keccak_state s128; + keccak_state s256; + } state; + + union { + poly full; + struct { + smallpoly stmp0; + smallpoly stmp1; + } small; + } polybuffer; + + poly *tmp0 = &polybuffer.full; + smallpoly *stmp0 = &polybuffer.small.stmp0; + smallpoly *scp = &polybuffer.small.stmp1; + + rho = sk; + tr = sk + SEEDBYTES*2; + key = sk + SEEDBYTES; + + mu = buf; + rnd = mu + CRHBYTES; + rhoprime = mu + CRHBYTES; + + /* Compute mu = CRH(tr, 0, ctxlen, ctx, msg) */ + mu[0] = 0; + mu[1] = ctxlen; + shake256_init(&state.s256); + shake256_absorb(&state.s256, tr, TRBYTES); + shake256_absorb(&state.s256, mu, 2); + shake256_absorb(&state.s256, ctx, ctxlen); + shake256_absorb(&state.s256, m, mlen); + shake256_finalize(&state.s256); + shake256_squeeze(mu, CRHBYTES, &state.s256); + +#ifdef DILITHIUM_RANDOMIZED_SIGNING + randombytes(rnd, RNDBYTES); +#else + unsigned int n; + /* Note: RNDBYTES < CRHBYTES, so buffer has proper size */ + for(n=0;ncoeffs); + poly_small_basemul_invntt(tmp0, scp, stmp0); + + poly_uniform_gamma1_add_lowram(tmp0, tmp0, rhoprime, L*(nonce-1) + l_idx, &state.s256); + + poly_reduce(tmp0); + + if(poly_chknorm(tmp0, GAMMA1 - BETA)) + goto rej; + + polyz_pack(sig + CTILDEBYTES + l_idx*POLYZ_PACKEDBYTES, tmp0); + } + + + /* Write signature */ + unsigned int hint_n = 0; + unsigned int hints_written = 0; + /* Check that subtracting cs2 does not change high bits of w and low bits + * do not reveal secret information */ + + for(unsigned int k_idx = 0; k_idx < K; ++k_idx) { + poly_challenge_decompress(tmp0, ccomp); + poly_small_ntt_copy(scp, tmp0); + + unpack_sk_s2(stmp0, sk, k_idx); + small_ntt(stmp0->coeffs); + poly_small_basemul_invntt(tmp0, scp, stmp0); + + polyw_sub(tmp0, wcomp[k_idx], tmp0); + poly_reduce(tmp0); + + polyw_pack(wcomp[k_idx], tmp0); + + poly_lowbits(tmp0, tmp0); + poly_reduce(tmp0); + if(poly_chknorm(tmp0, GAMMA2 - BETA)){ + goto rej; + } + + poly_schoolbook(tmp0, ccomp, sk + SEEDBYTES + TRBYTES + SEEDBYTES + + L*POLYETA_PACKEDBYTES + K*POLYETA_PACKEDBYTES + k_idx*POLYT0_PACKEDBYTES); + + /* Compute hints for w1 */ + + if(poly_chknorm(tmp0, GAMMA2)) { + goto rej; + } + + hint_n += poly_make_hint_lowram(tmp0, tmp0, wcomp[k_idx]); + + if (hint_n > OMEGA) { + goto rej; + } + pack_sig_h(sig, tmp0, k_idx, &hints_written); + } + pack_sig_h_zero(sig, &hints_written); + *siglen = CRYPTO_BYTES; + return 0; +} + +/************************************************* +* Name: crypto_sign +* +* Description: Compute signed message. +* +* Arguments: - uint8_t *sm: pointer to output signed message (allocated +* array with CRYPTO_BYTES + mlen bytes), +* can be equal to m +* - size_t *smlen: pointer to output length of signed +* message +* - const uint8_t *m: pointer to message to be signed +* - size_t mlen: length of message +* - const uint8_t *ctx: pointer to context string +* - size_t ctxlen: length of context string +* - const uint8_t *sk: pointer to bit-packed secret key +* +* Returns 0 (success) or -1 (context string too long) +**************************************************/ +int crypto_sign(uint8_t *sm, + size_t *smlen, + const uint8_t *m, + size_t mlen, + const uint8_t *ctx, + size_t ctxlen, + const uint8_t *sk) +{ + int ret; + size_t i; + + for(i = 0; i < mlen; ++i) + sm[CRYPTO_BYTES + mlen - 1 - i] = m[mlen - 1 - i]; + ret = crypto_sign_signature(sm, smlen, sm + CRYPTO_BYTES, mlen, ctx, ctxlen, sk); + *smlen += mlen; + return ret; +} + +/************************************************* +* Name: crypto_sign_verify +* +* Description: Verifies signature. +* +* Arguments: - uint8_t *m: pointer to input signature +* - size_t siglen: length of signature +* - const uint8_t *m: pointer to message +* - size_t mlen: length of message +* - const uint8_t *ctx: pointer to context string +* - size_t ctxlen: length of context string +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signature could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_verify(const uint8_t *sig, + size_t siglen, + const uint8_t *m, + size_t mlen, + const uint8_t *ctx, + size_t ctxlen, + const uint8_t *pk) +{ + unsigned int i; + + poly p; + + union { + uint8_t w1_packed[POLYW1_PACKEDBYTES]; + uint8_t wcomp[768]; + } w1_packed_comp; + uint8_t *w1_packed = w1_packed_comp.w1_packed; + uint8_t *wcomp = w1_packed_comp.wcomp; + + union { + uint8_t ccomp[68]; + uint8_t mu[TRBYTES]; + } ccomp_mu; + uint8_t *ccomp = ccomp_mu.ccomp; + uint8_t *mu = ccomp_mu.mu; + + keccak_state s256; + + union { + uint8_t hint_ones[OMEGA]; + keccak_state s128; + uint8_t c2[CTILDEBYTES]; + } shake_hint; + + uint8_t *hint_ones = shake_hint.hint_ones; + keccak_state *s128 = &shake_hint.s128; + uint8_t *c2 = shake_hint.c2; + + if(ctxlen > 255 || siglen != CRYPTO_BYTES) + return -1; + + /* Compute CRH(h(rho, t1), msg) */ + shake256_init(&s256); + shake256_absorb(&s256, pk, CRYPTO_PUBLICKEYBYTES); + shake256_finalize(&s256); + shake256_squeeze(mu, TRBYTES, &s256); + + shake256_init(&s256); + shake256_absorb(&s256, mu, TRBYTES); + mu[0] = 0; + mu[1] = ctxlen; + shake256_absorb(&s256, mu, 2); + shake256_absorb(&s256, ctx, ctxlen); + shake256_absorb(&s256, m, mlen); + shake256_finalize(&s256); + shake256_squeeze(mu, CRHBYTES, &s256); + + shake256_init(&s256); + shake256_absorb(&s256, mu, CRHBYTES); + + /* Matrix-vector multiplication; compute Az - c2^dt1 */ + poly_challenge_lowram(&p, sig); + poly_challenge_compress(ccomp, &p); + + for (size_t k_idx = 0; k_idx < K; k_idx++) { + for(size_t widx=0;widx<768;widx++){ + wcomp[widx] = 0; + } + + polyz_unpack(&p, sig + CTILDEBYTES); + if(poly_chknorm(&p, GAMMA1 - BETA)) + return -1; + poly_ntt(&p); + + poly_uniform_pointwise_montgomery_polywadd_lowram(wcomp, &p, pk, (k_idx << 8) + 0, s128); + + for (size_t l_idx = 1; l_idx < L; l_idx++) { + polyz_unpack(&p, sig + CTILDEBYTES + l_idx*POLYZ_PACKEDBYTES); + if(poly_chknorm(&p, GAMMA1 - BETA)) + return -1; + poly_ntt(&p); + poly_uniform_pointwise_montgomery_polywadd_lowram(wcomp, &p, pk, (k_idx << 8) + l_idx, s128); + } + polyw_unpack(&p, wcomp); + poly_reduce(&p); + poly_invntt_tomont(&p); + polyw_pack(wcomp, &p); + + poly_schoolbook_t1(&p, ccomp, pk + SEEDBYTES + k_idx*POLYT1_PACKEDBYTES); + + polyw_sub(&p, wcomp, &p); + poly_reduce(&p); + + /* Reconstruct w1 */ + poly_caddq(&p); + + if (unpack_sig_h_indices(hint_ones, &i, k_idx, sig) != 0) + { + return -1; + } + poly_use_hint_lowram(&p, &p, hint_ones, i); + + polyw1_pack(w1_packed, &p); + + shake256_absorb(&s256, w1_packed, POLYW1_PACKEDBYTES); + } + /* Call random oracle and verify challenge */ + shake256_finalize(&s256); + shake256_squeeze(c2, CTILDEBYTES, &s256); + for(i = 0; i < CTILDEBYTES; ++i) + if(sig[i] != c2[i]) + return -1; + + return 0; +} + +/************************************************* +* Name: crypto_sign_open +* +* Description: Verify signed message. +* +* Arguments: - uint8_t *m: pointer to output message (allocated +* array with smlen bytes), can be equal to sm +* - size_t *mlen: pointer to output length of message +* - const uint8_t *sm: pointer to signed message +* - size_t smlen: length of signed message +* - const uint8_t *ctx: pointer to context tring +* - size_t ctxlen: length of context string +* - const uint8_t *pk: pointer to bit-packed public key +* +* Returns 0 if signed message could be verified correctly and -1 otherwise +**************************************************/ +int crypto_sign_open(uint8_t *m, + size_t *mlen, + const uint8_t *sm, + size_t smlen, + const uint8_t *ctx, + size_t ctxlen, + const uint8_t *pk) +{ + size_t i; + + if(smlen < CRYPTO_BYTES) + goto badsig; + + *mlen = smlen - CRYPTO_BYTES; + if(crypto_sign_verify(sm, CRYPTO_BYTES, sm + CRYPTO_BYTES, *mlen, ctx, ctxlen, pk)) + goto badsig; + else { + /* All good, copy msg, return 0 */ + for(i = 0; i < *mlen; ++i) + m[i] = sm[CRYPTO_BYTES + i]; + return 0; + } + +badsig: + /* Signature verification failed */ + *mlen = 0; + for(i = 0; i < smlen; ++i) + m[i] = 0; + + return -1; +} diff --git a/lowram/sign.h b/lowram/sign.h new file mode 120000 index 0000000..200b72f --- /dev/null +++ b/lowram/sign.h @@ -0,0 +1 @@ +../ref/sign.h \ No newline at end of file diff --git a/lowram/smallntt.h b/lowram/smallntt.h new file mode 100644 index 0000000..df908cf --- /dev/null +++ b/lowram/smallntt.h @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2023 Junhao Huang (jhhuang_nuaa@126.com) + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SMALLNTT_H +#define SMALLNTT_H + +#include +#include "params.h" +#include "poly.h" + +/* We use the Kyber prime 3329 as the modulus for the small NTT. Other choices +such as 769 for all parameter sets or 257 for Dilithium2 and Dilithium5 are also +viable. */ +#define SMALL_Q 3329 +#define Q_INV_SMALL -3327 + +extern const int16_t small_zetas[128]; + +void small_ntt(int16_t r[N]); +void small_invntt_tomont(int16_t r[N]); +void small_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); + +#endif diff --git a/lowram/smallntt_3329.c b/lowram/smallntt_3329.c new file mode 100644 index 0000000..df2f5c3 --- /dev/null +++ b/lowram/smallntt_3329.c @@ -0,0 +1,180 @@ +#include +#include "params.h" +#include "ntt.h" +#include "reduce.h" +#include "smallntt.h" + +/* Code to generate zetas and zetas_inv used in the number-theoretic transform: + +#define KYBER_ROOT_OF_UNITY 17 + +static const uint8_t tree[128] = { + 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, + 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, + 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, + 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, + 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, + 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, + 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, + 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 +}; + +void init_ntt() { + unsigned int i; + int16_t tmp[128]; + + tmp[0] = MONT; + for(i=1;i<128;i++) + tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q); + + for(i=0;i<128;i++) { + zetas[i] = tmp[tree[i]]; + if(zetas[i] > KYBER_Q/2) + zetas[i] -= KYBER_Q; + if(zetas[i] < -KYBER_Q/2) + zetas[i] += KYBER_Q; + } +} +*/ + +const int16_t small_zetas[128] = { + -1044, -758, -359, -1517, 1493, 1422, 287, 202, + -171, 622, 1577, 182, 962, -1202, -1474, 1468, + 573, -1325, 264, 383, -829, 1458, -1602, -130, + -681, 1017, 732, 608, -1542, 411, -205, -1571, + 1223, 652, -552, 1015, -1293, 1491, -282, -1544, + 516, -8, -320, -666, -1618, -1162, 126, 1469, + -853, -90, -271, 830, 107, -1421, -247, -951, + -398, 961, -1508, -725, 448, -1065, 677, -1275, + -1103, 430, 555, 843, -1251, 871, 1550, 105, + 422, 587, 177, -235, -291, -460, 1574, 1653, + -246, 778, 1159, -147, -777, 1483, -602, 1119, + -1590, 644, -872, 349, 418, 329, -156, -75, + 817, 1097, 603, 610, 1322, -1285, -1465, 384, + -1215, -136, 1218, -1335, -874, 220, -1187, -1659, + -1185, -1530, -1278, 794, -1510, -854, -870, 478, + -108, -308, 996, 991, 958, -1460, 1522, 1628}; + +static int16_t montgomery_reduce_small(int32_t a) +{ + int16_t t; + + t = (int16_t)a * Q_INV_SMALL; + t = (a - (int32_t)t * SMALL_Q) >> 16; + return t; +} + +static int16_t barrett_reduce(int16_t a) +{ + int16_t t; + const int16_t v = ((1 << 26) + SMALL_Q / 2) / SMALL_Q; + + t = ((int32_t)v * a + (1 << 25)) >> 26; + t *= SMALL_Q; + return a - t; +} + +/************************************************* + * Name: fqmul + * + * Description: Multiplication followed by Montgomery reduction + * + * Arguments: - int16_t a: first factor + * - int16_t b: second factor + * + * Returns 16-bit integer congruent to a*b*R^{-1} mod q + **************************************************/ +static int16_t fqmul(int16_t a, int16_t b) +{ + return montgomery_reduce_small((int32_t)a * b); +} + +/************************************************* + * Name: ntt + * + * Description: Inplace number-theoretic transform (NTT) in Rq. + * input is in standard order, output is in bitreversed order + * + * Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq + **************************************************/ +void small_ntt(int16_t r[256]) +{ + unsigned int len, start, j, k; + int16_t t, zeta; + + k = 1; + for (len = 128; len >= 2; len >>= 1) + { + for (start = 0; start < 256; start = j + len) + { + zeta = small_zetas[k++]; + for (j = start; j < start + len; j++) + { + t = fqmul(zeta, r[j + len]); + r[j + len] = r[j] - t; + r[j] = r[j] + t; + } + } + } +} + +/************************************************* + * Name: invntt_tomont + * + * Description: Inplace inverse number-theoretic transform in Rq and + * multiplication by Montgomery factor 2^16. + * Input is in bitreversed order, output is in standard order + * + * Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq + **************************************************/ +void small_invntt_tomont(int16_t r[256]) +{ + unsigned int start, len, j, k; + int16_t t, zeta; + const int16_t f = 1441; // mont^2/128 + + k = 127; + for (len = 2; len <= 128; len <<= 1) + { + for (start = 0; start < 256; start = j + len) + { + zeta = small_zetas[k--]; + for (j = start; j < start + len; j++) + { + t = r[j]; + r[j] = barrett_reduce(t + r[j + len]); + r[j + len] = r[j + len] - t; + r[j + len] = fqmul(zeta, r[j + len]); + } + } + } + + for (j = 0; j < 256; j++) + { + r[j] = barrett_reduce(fqmul(r[j], f)); + } +} + +/************************************************* + * Name: basemul + * + * Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) + * used for multiplication of elements in Rq in NTT domain + * + * Arguments: - int16_t r[2]: pointer to the output polynomial + * - const int16_t a[2]: pointer to the first factor + * - const int16_t b[2]: pointer to the second factor + * - int16_t zeta: integer defining the reduction polynomial + **************************************************/ +void small_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) +{ + + int16_t a0 = a[0], a1 = a[1]; + int16_t b0 = b[0]; + + r[0] = fqmul(a1, b[1]); + r[0] = fqmul(r[0], zeta); + r[0] += fqmul(a0, b0); + r[1] = fqmul(a0, b[1]); + r[1] += fqmul(a1, b0); +} diff --git a/lowram/smallpoly.c b/lowram/smallpoly.c new file mode 100644 index 0000000..361fcfd --- /dev/null +++ b/lowram/smallpoly.c @@ -0,0 +1,100 @@ +#include "smallpoly.h" +#include "smallntt.h" + +void poly_small_ntt_copy(smallpoly *out, poly *in) +{ + for (int i = N - 1; i >= 0; i--) + { + out->coeffs[i] = in->coeffs[i]; + } + small_ntt(out->coeffs); +} + +void polyvecl_small_ntt(smallpoly v[L]) +{ + unsigned int i; + + for (i = 0; i < L; ++i) + small_ntt(v[i].coeffs); +} + +void polyveck_small_ntt(smallpoly v[K]) +{ + unsigned int i; + + for (i = 0; i < K; ++i) + small_ntt(v[i].coeffs); +} + +void poly_small_basemul(int16_t r[N], const int16_t a[N], const int16_t b[N]) +{ + unsigned int i; + for (i = 0; i < N / 4; i++) + { + small_basemul(&r[4 * i], &a[4 * i], &b[4 * i], small_zetas[64 + i]); + small_basemul(&r[4 * i + 2], &a[4 * i + 2], &b[4 * i + 2], -small_zetas[64 + i]); + } +} + +void poly_small_basemul_invntt(poly *r, const smallpoly *a, const smallpoly *b) +{ + // re-use the buffer + smallpoly *tmp = (smallpoly *)r; + poly_small_basemul(tmp->coeffs, a->coeffs, b->coeffs); + + small_invntt_tomont(tmp->coeffs); + +#ifdef SMALL_POLY_16_BIT + int j; + // buffer is the same, so we neeed to be careful + for (j = N - 1; j >= 0; j--) + { + r->coeffs[j] = tmp->coeffs[j]; + } +#endif +} + +void polyvecl_small_basemul_invntt(polyvecl *r, const smallpoly *a, const smallpoly b[L]) +{ + unsigned int i; + for (i = 0; i < L; i++) + { + poly_small_basemul_invntt(&r->vec[i], a, &b[i]); + } +} + +void small_polyeta_unpack(smallpoly *r, const uint8_t *a) +{ + unsigned int i; + +#if ETA == 2 + for (i = 0; i < N / 8; ++i) + { + r->coeffs[8 * i + 0] = (a[3 * i + 0] >> 0) & 7; + r->coeffs[8 * i + 1] = (a[3 * i + 0] >> 3) & 7; + r->coeffs[8 * i + 2] = ((a[3 * i + 0] >> 6) | (a[3 * i + 1] << 2)) & 7; + r->coeffs[8 * i + 3] = (a[3 * i + 1] >> 1) & 7; + r->coeffs[8 * i + 4] = (a[3 * i + 1] >> 4) & 7; + r->coeffs[8 * i + 5] = ((a[3 * i + 1] >> 7) | (a[3 * i + 2] << 1)) & 7; + r->coeffs[8 * i + 6] = (a[3 * i + 2] >> 2) & 7; + r->coeffs[8 * i + 7] = (a[3 * i + 2] >> 5) & 7; + + r->coeffs[8 * i + 0] = ETA - r->coeffs[8 * i + 0]; + r->coeffs[8 * i + 1] = ETA - r->coeffs[8 * i + 1]; + r->coeffs[8 * i + 2] = ETA - r->coeffs[8 * i + 2]; + r->coeffs[8 * i + 3] = ETA - r->coeffs[8 * i + 3]; + r->coeffs[8 * i + 4] = ETA - r->coeffs[8 * i + 4]; + r->coeffs[8 * i + 5] = ETA - r->coeffs[8 * i + 5]; + r->coeffs[8 * i + 6] = ETA - r->coeffs[8 * i + 6]; + r->coeffs[8 * i + 7] = ETA - r->coeffs[8 * i + 7]; + } +#elif ETA == 4 + for (i = 0; i < N / 2; ++i) + { + r->coeffs[2 * i + 0] = a[i] & 0x0F; + r->coeffs[2 * i + 1] = a[i] >> 4; + r->coeffs[2 * i + 0] = ETA - r->coeffs[2 * i + 0]; + r->coeffs[2 * i + 1] = ETA - r->coeffs[2 * i + 1]; + } +#endif +} diff --git a/lowram/smallpoly.h b/lowram/smallpoly.h new file mode 100644 index 0000000..a547c8d --- /dev/null +++ b/lowram/smallpoly.h @@ -0,0 +1,26 @@ +#ifndef SMALLPOLY_H +#define SMALLPOLY_H +#include "params.h" +#include "poly.h" +#include "polyvec.h" + +#define SMALL_POLY_16_BIT +typedef struct +{ + int16_t coeffs[N]; +} smallpoly; + +typedef smallpoly smallhalfpoly; + +void poly_small_ntt_copy(smallpoly *, poly *); +void poly_small_basemul(int16_t r[N], const int16_t a[N], const int16_t b[N]); + +void polyvecl_small_ntt(smallpoly v[L]); +void polyveck_small_ntt(smallpoly v[K]); + +void polyvecl_small_basemul_invntt(polyvecl *r, const smallpoly *a, const smallpoly b[L]); +void poly_small_basemul_invntt(poly *r, const smallpoly *a, const smallpoly *b); + +void small_polyeta_unpack(smallpoly *r, const uint8_t *a); + +#endif diff --git a/lowram/symmetric-shake.c b/lowram/symmetric-shake.c new file mode 120000 index 0000000..86f8b6c --- /dev/null +++ b/lowram/symmetric-shake.c @@ -0,0 +1 @@ +../ref/symmetric-shake.c \ No newline at end of file diff --git a/lowram/symmetric.h b/lowram/symmetric.h new file mode 120000 index 0000000..8655364 --- /dev/null +++ b/lowram/symmetric.h @@ -0,0 +1 @@ +../ref/symmetric.h \ No newline at end of file diff --git a/lowram/test/.gitignore b/lowram/test/.gitignore new file mode 120000 index 0000000..cde2696 --- /dev/null +++ b/lowram/test/.gitignore @@ -0,0 +1 @@ +../../ref/test/.gitignore \ No newline at end of file diff --git a/lowram/test/cpucycles.c b/lowram/test/cpucycles.c new file mode 120000 index 0000000..4d6fc8a --- /dev/null +++ b/lowram/test/cpucycles.c @@ -0,0 +1 @@ +../../ref/test/cpucycles.c \ No newline at end of file diff --git a/lowram/test/cpucycles.h b/lowram/test/cpucycles.h new file mode 120000 index 0000000..269feb3 --- /dev/null +++ b/lowram/test/cpucycles.h @@ -0,0 +1 @@ +../../ref/test/cpucycles.h \ No newline at end of file diff --git a/lowram/test/speed_print.c b/lowram/test/speed_print.c new file mode 120000 index 0000000..98f2a46 --- /dev/null +++ b/lowram/test/speed_print.c @@ -0,0 +1 @@ +../../ref/test/speed_print.c \ No newline at end of file diff --git a/lowram/test/speed_print.h b/lowram/test/speed_print.h new file mode 120000 index 0000000..8ba4e5e --- /dev/null +++ b/lowram/test/speed_print.h @@ -0,0 +1 @@ +../../ref/test/speed_print.h \ No newline at end of file diff --git a/lowram/test/test_dilithium.c b/lowram/test/test_dilithium.c new file mode 120000 index 0000000..729cb5b --- /dev/null +++ b/lowram/test/test_dilithium.c @@ -0,0 +1 @@ +../../ref/test/test_dilithium.c \ No newline at end of file diff --git a/lowram/test/test_mul.c b/lowram/test/test_mul.c new file mode 120000 index 0000000..013f7af --- /dev/null +++ b/lowram/test/test_mul.c @@ -0,0 +1 @@ +../../ref/test/test_mul.c \ No newline at end of file diff --git a/lowram/test/test_speed.c b/lowram/test/test_speed.c new file mode 120000 index 0000000..7decc02 --- /dev/null +++ b/lowram/test/test_speed.c @@ -0,0 +1 @@ +../../ref/test/test_speed.c \ No newline at end of file diff --git a/lowram/test/test_vectors.c b/lowram/test/test_vectors.c new file mode 120000 index 0000000..1dffda0 --- /dev/null +++ b/lowram/test/test_vectors.c @@ -0,0 +1 @@ +../../ref/test/test_vectors.c \ No newline at end of file From dbe6543f6e020ebcbf62d2b2a80b44471baecf8a Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Thu, 31 Oct 2024 11:34:21 +0100 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Matthias J. Kannwischer --- lowram/sign.c | 4 ++-- lowram/smallntt_3329.c | 2 ++ lowram/smallpoly.c | 2 -- lowram/smallpoly.h | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/lowram/sign.c b/lowram/sign.c index 0849272..4d7075b 100644 --- a/lowram/sign.c +++ b/lowram/sign.c @@ -393,7 +393,7 @@ int crypto_sign_verify(const uint8_t *sig, if(ctxlen > 255 || siglen != CRYPTO_BYTES) return -1; - /* Compute CRH(h(rho, t1), msg) */ + /* Compute mu = CRH(H(rho, t1), 0, ctxlen, ctx, msg) */ shake256_init(&s256); shake256_absorb(&s256, pk, CRYPTO_PUBLICKEYBYTES); shake256_finalize(&s256); @@ -478,7 +478,7 @@ int crypto_sign_verify(const uint8_t *sig, * - size_t *mlen: pointer to output length of message * - const uint8_t *sm: pointer to signed message * - size_t smlen: length of signed message -* - const uint8_t *ctx: pointer to context tring +* - const uint8_t *ctx: pointer to context string * - size_t ctxlen: length of context string * - const uint8_t *pk: pointer to bit-packed public key * diff --git a/lowram/smallntt_3329.c b/lowram/smallntt_3329.c index df2f5c3..97eb7be 100644 --- a/lowram/smallntt_3329.c +++ b/lowram/smallntt_3329.c @@ -7,6 +7,8 @@ /* Code to generate zetas and zetas_inv used in the number-theoretic transform: #define KYBER_ROOT_OF_UNITY 17 +#define KYBER_Q 3329 +#define MONT -1044 static const uint8_t tree[128] = { 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, diff --git a/lowram/smallpoly.c b/lowram/smallpoly.c index 361fcfd..3e0f9e5 100644 --- a/lowram/smallpoly.c +++ b/lowram/smallpoly.c @@ -44,14 +44,12 @@ void poly_small_basemul_invntt(poly *r, const smallpoly *a, const smallpoly *b) small_invntt_tomont(tmp->coeffs); -#ifdef SMALL_POLY_16_BIT int j; // buffer is the same, so we neeed to be careful for (j = N - 1; j >= 0; j--) { r->coeffs[j] = tmp->coeffs[j]; } -#endif } void polyvecl_small_basemul_invntt(polyvecl *r, const smallpoly *a, const smallpoly b[L]) diff --git a/lowram/smallpoly.h b/lowram/smallpoly.h index a547c8d..72cc403 100644 --- a/lowram/smallpoly.h +++ b/lowram/smallpoly.h @@ -4,7 +4,6 @@ #include "poly.h" #include "polyvec.h" -#define SMALL_POLY_16_BIT typedef struct { int16_t coeffs[N];