From ac6c1ef57a920ef5344e0366c564113889a826c5 Mon Sep 17 00:00:00 2001 From: Amin Abdulrahman Date: Tue, 9 Apr 2024 16:11:37 +0200 Subject: [PATCH] Match 769 Plantard to m4f code --- crypto_sign/dilithium2/m4fstack/smallntt.S | 1 - .../dilithium2/m4fstack/smallntt_769.S | 1 + crypto_sign/dilithium3/m4fstack/macros_fnt.i | 158 ------------------ .../dilithium3/m4fstack/macros_smallntt.i | 24 ++- crypto_sign/dilithium3/m4fstack/smallntt.h | 23 ++- .../m4fstack/{smallntt.S => smallntt_769.S} | 24 ++- crypto_sign/dilithium5/m4fstack/smallntt.S | 1 - .../dilithium5/m4fstack/smallntt_769.S | 1 + 8 files changed, 60 insertions(+), 173 deletions(-) delete mode 120000 crypto_sign/dilithium2/m4fstack/smallntt.S create mode 120000 crypto_sign/dilithium2/m4fstack/smallntt_769.S delete mode 100644 crypto_sign/dilithium3/m4fstack/macros_fnt.i rename crypto_sign/dilithium3/m4fstack/{smallntt.S => smallntt_769.S} (94%) delete mode 120000 crypto_sign/dilithium5/m4fstack/smallntt.S create mode 120000 crypto_sign/dilithium5/m4fstack/smallntt_769.S diff --git a/crypto_sign/dilithium2/m4fstack/smallntt.S b/crypto_sign/dilithium2/m4fstack/smallntt.S deleted file mode 120000 index 7e2174f9..00000000 --- a/crypto_sign/dilithium2/m4fstack/smallntt.S +++ /dev/null @@ -1 +0,0 @@ -../../dilithium3/m4fstack/smallntt.S \ No newline at end of file diff --git a/crypto_sign/dilithium2/m4fstack/smallntt_769.S b/crypto_sign/dilithium2/m4fstack/smallntt_769.S new file mode 120000 index 00000000..6300683f --- /dev/null +++ b/crypto_sign/dilithium2/m4fstack/smallntt_769.S @@ -0,0 +1 @@ +../../dilithium3/m4fstack/smallntt_769.S \ No newline at end of file diff --git a/crypto_sign/dilithium3/m4fstack/macros_fnt.i b/crypto_sign/dilithium3/m4fstack/macros_fnt.i deleted file mode 100644 index 25903e41..00000000 --- a/crypto_sign/dilithium3/m4fstack/macros_fnt.i +++ /dev/null @@ -1,158 +0,0 @@ -// 2 -.macro ldrstr2 ldrstr, target, c0, c1, mem0, mem1 - \ldrstr \c0, [\target, \mem0] - \ldrstr \c1, [\target, \mem1] -.endm - -// 2 -.macro ldrstr2jump ldrstr, target, c0, c1, mem1, jump - \ldrstr \c1, [\target, \mem1] - \ldrstr \c0, [\target], \jump -.endm - -// 4 -.macro ldrstr4 ldrstr, target, c0, c1, c2, c3, mem0, mem1, mem2, mem3 - \ldrstr \c0, [\target, \mem0] - \ldrstr \c1, [\target, \mem1] - \ldrstr \c2, [\target, \mem2] - \ldrstr \c3, [\target, \mem3] -.endm - -// 4 -.macro ldrstr4jump ldrstr, target, c0, c1, c2, c3, mem1, mem2, mem3, jump - \ldrstr \c1, [\target, \mem1] - \ldrstr \c2, [\target, \mem2] - \ldrstr \c3, [\target, \mem3] - \ldrstr \c0, [\target], \jump -.endm - -// 8 -.macro ldrstrvec ldrstr, target, c0, c1, c2, c3, c4, c5, c6, c7, mem0, mem1, mem2, mem3, mem4, mem5, mem6, mem7 - ldrstr4 \ldrstr, \target, \c0, \c1, \c2, \c3, \mem0, \mem1, \mem2, \mem3 - ldrstr4 \ldrstr, \target, \c4, \c5, \c6, \c7, \mem4, \mem5, \mem6, \mem7 -.endm - -// 8 -.macro ldrstrvecjump ldrstr, target, c0, c1, c2, c3, c4, c5, c6, c7, mem1, mem2, mem3, mem4, mem5, mem6, mem7, jump - ldrstr4 \ldrstr, \target, \c4, \c5, \c6, \c7, \mem4, \mem5, \mem6, \mem7 - ldrstr4jump \ldrstr, \target, \c0, \c1, \c2, \c3, \mem1, \mem2, \mem3, \jump -.endm - - - -.macro addSub1 c0, c1 - add.w \c0, \c1 - sub.w \c1, \c0, \c1, lsl #1 -.endm - -.macro addSub2 c0, c1, c2, c3 - add \c0, \c1 - add \c2, \c3 - sub.w \c1, \c0, \c1, lsl #1 - sub.w \c3, \c2, \c3, lsl #1 -.endm - -.macro addSub4 c0, c1, c2, c3, c4, c5, c6, c7 - add \c0, \c1 - add \c2, \c3 - add \c4, \c5 - add \c6, \c7 - sub.w \c1, \c0, \c1, lsl #1 - sub.w \c3, \c2, \c3, lsl #1 - sub.w \c5, \c4, \c5, lsl #1 - sub.w \c7, \c6, \c7, lsl #1 -.endm - -// 2 -.macro barrett_32 a, Qbar, Q, tmp - smmulr.w \tmp, \a, \Qbar - mls.w \a, \tmp, \Q, \a -.endm - -.macro FNT_CT_butterfly c0, c1, logW - add.w \c0, \c0, \c1, lsl #\logW - sub.w \c1, \c0, \c1, lsl #(\logW+1) -.endm - -.macro shift_subAdd c0, c1, shlv - sub.w \c0, \c0, \c1, lsl #(\shlv) - add.w \c1, \c0, \c1, lsl #(\shlv+1) -.endm - -.macro FNT_CT_ibutterfly c0, c1, shlv - shift_subAdd \c0, \c1, \shlv -.endm - -// 46 -.macro _3_layer_CT_32_FNT c0, c1, c2, c3, c4, c5, c6, c7, xi0, xi1, xi2, xi3, xi4, xi5, xi6, twiddle, Qprime, Q, tmp, tmp2 - vmov.w \twiddle, \xi0 - - // c0, c1, c2, c3, c4, c5, c6, c7, c8 - // 0,4 - mla \tmp, \c4, \twiddle, \c0 - mls \c4, \c4, \twiddle, \c0 - - // 1,5 - mla \c0, \c5, \twiddle, \c1 - mls \c5, \c5, \twiddle, \c1 - - // 2,6 - mla \c1, \c6, \twiddle, \c2 - mls \c6, \c6, \twiddle, \c2 - - // 3,7 - mla \c2, \c7, \twiddle, \c3 - mls \c7, \c7, \twiddle, \c3 - - // tmp, c0, c1, c2, c4, c5, c6, c7 - - barrett_32 \tmp, \Qprime, \Q, \c3 - barrett_32 \c0, \Qprime, \Q, \c3 - barrett_32 \c1, \Qprime, \Q, \c3 - barrett_32 \c2, \Qprime, \Q, \c3 - barrett_32 \c4, \Qprime, \Q, \c3 - barrett_32 \c5, \Qprime, \Q, \c3 - barrett_32 \c6, \Qprime, \Q, \c3 - barrett_32 \c7, \Qprime, \Q, \c3 - - vmov.w \twiddle, \xi1 - // 0,2 - mla \tmp2, \c1, \twiddle, \tmp - mls \c3, \c1, \twiddle, \tmp - - // 1,3 - mla \tmp, \c2, \twiddle, \c0 - mls \c0, \c2, \twiddle, \c0 - - vmov.w \twiddle, \xi2 - - // 4,6 - mla \c2, \c6, \twiddle, \c4 - mls \c1, \c6, \twiddle, \c4 - - // 5,7 - mla \c6, \c7, \twiddle, \c5 - mls \c7, \c7, \twiddle, \c5 - - // tmp2, tmp, c3, c0 | c2, c6, c1, c7 - - // 4,5 - vmov.w \twiddle, \xi5 - mla \c4, \c6, \twiddle, \c2 - mls \c5, \c6, \twiddle, \c2 - - // 6,7 - vmov.w \twiddle, \xi6 - mla \c6, \c7, \twiddle, \c1 - mls \c7, \c7, \twiddle, \c1 - - // 2,3 - vmov.w \twiddle, \xi4 - mla \c2, \c0, \twiddle, \c3 - mls \c3, \c0, \twiddle, \c3 - - // 0,1 - vmov.w \twiddle, \xi3 - mla \c0, \tmp, \twiddle, \tmp2 - mls \c1, \tmp, \twiddle, \tmp2 -.endm \ No newline at end of file diff --git a/crypto_sign/dilithium3/m4fstack/macros_smallntt.i b/crypto_sign/dilithium3/m4fstack/macros_smallntt.i index b97f4d52..7c9a387c 100644 --- a/crypto_sign/dilithium3/m4fstack/macros_smallntt.i +++ b/crypto_sign/dilithium3/m4fstack/macros_smallntt.i @@ -1,9 +1,23 @@ /* -* NTT and inverse NTT code from: -* Huang, J. et al. 2024. Revisiting Keccak and Dilithium Implementations on ARMv7-M. -* IACR Transactions on Cryptographic Hardware and Embedded Systems. 2024, 2 (Mar. 2024), 1–24. -* DOI:https://doi.org/10.46586/tches.v2024.i2.1-24. -* https://github.com/UIC-ESLAS/Dilithium-Multi-Moduli/blob/332a32cc02d407020e48a4f9b3a0dc78d4c8b0bc/M4/crypto_sign/dilithium3/m4plant/smallntt_769.S + * Copyright (c) 2023 Junhao Huang (jhhuang_nuaa@126.com) + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * NTT and inverse NTT code from: + * Huang, J. et al. 2024. Revisiting Keccak and Dilithium Implementations on ARMv7-M. + * IACR Transactions on Cryptographic Hardware and Embedded Systems. 2024, 2 (Mar. 2024), 1–24. + * DOI:https://doi.org/10.46586/tches.v2024.i2.1-24. + * https://github.com/UIC-ESLAS/Dilithium-Multi-Moduli/blob/332a32cc02d407020e48a4f9b3a0dc78d4c8b0bc/M4/crypto_sign/dilithium3/m4plant/smallntt_769.S */ #ifndef MACROS_SMALLNTT_I diff --git a/crypto_sign/dilithium3/m4fstack/smallntt.h b/crypto_sign/dilithium3/m4fstack/smallntt.h index c3fd065f..244fad24 100644 --- a/crypto_sign/dilithium3/m4fstack/smallntt.h +++ b/crypto_sign/dilithium3/m4fstack/smallntt.h @@ -1,9 +1,27 @@ +/** + * Copyright (c) 2023 Junhao Huang (jhhuang_nuaa@126.com) + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #ifndef SMALLNTT_H #define SMALLNTT_H #include #include "params.h" +#define SMALL_Q 769 + static const int32_t zetas_769[64] = { 3138844760, 1334846793, 999738812, 1854264165, 1681125041, 1150537404, 2820492178, 3071823164, 726067294, 2066499220, 3272887953, 1055590142, 4255871365, 1871019564, 2731130050, 1826338500, 513832239, 1792827701, 3373420347, 2993631302, 1161707670, 3306398751, 3518633806, 3406931146, 1586177780, 3853741788, 3317569017, 3825816122, 971813147, 122872927, 217820188, 619949766, 3753209393, 770748358, 4099487641, 765163225, 3630336467, 1742561504, 3479537875, 982983413, 2809321912, 2379266669, 703726762, 681386230, 4110657907, 1457719720, 1217559000, 2474213930, 1195218468, 1089100940, 564098436, 614364633, 3635921600, 2088839752, 3702943196, 1949211426, 2569161192, 374203913, 3982199847, 2083254619, 1513571050, 3647091866, 413299844, 4149753838}; @@ -16,13 +34,12 @@ static const int32_t zetas_inv_asm_769[256] = { // removed first "2285" + LAYER 3+2+1 - 1 - butterfly 5585134, -346278248, 5585134, -966228013, -346278248, -223405321, 636705165, 446810642, 1519156183, 11170266, -821014555, -1932456027, 301597183, -692556495, -240160720, 1061175275, -1368357591, -519417371, -335107981, 2139105948, -698141628, -625534899, -1267825197, 843355087, 290426917, 128458060, 1295750862, -748407825, -826599688, 1736976371, -240160720, 2005062756, 1061175275, 1100271206, -1368357591, 502661972, 915961816, 1396283256, 452395775, -1038834743, -955057747, -670215963, 2016233022, -16755399, -1675539907, 1614103444, -1290165729, 94947261, 753992958, -1591762912, 497076839, -1954796559, 1943626293, -1122611738, -1239899531, 938302348, -245745853, 882451018, -435640376, -966228013, 1736976371, -318352582, -240160720, -1401868389, 2005062756, 1016494210, 714897027, -1005323944, 876865885, 2122350549, -1373942724, -2094424884, 1468889985, 1558252114, -1401868389, -686971362, -357448514, 860110486, 1524741316, -1787242568, -44681064, 1407453522, -368618780, 1323676527, -653460564, -1362772458, 1379527857, -463566041, 1859849297, 150798592, -1675539907, 804259156, 1614103444, -67021596, -1290165729, -139628326, -2060914086, -994153678, 55851330, 189894523, -1072345541, 1507985917, 832184821, 1111441472, 2105595150, -525002504, -1809583100, 212235055, 1938041160, -273671518, 100532394, -2044158687, -78191862, 1452134586, 642290298, -2111180283, 552928169, 161968858, -1167292802, -346278248, -966228013, -223405321, 1736976371, 150798592, -318352582, -759578091, -1608518311, -2032988421, -899206417, -480321440, 943887481, 1491230518, -83776995, -284841784, 2005062756, 1100271206, 502661972, 1669954774, -1139367137, -457980908, 1921285760, 1128196871, -1318091394, -1904530361, 396544445, -1228729265, 117287794, 2116765416, 1184048201, -318352582, -1401868389, 1016494210, -686971362, -1413038655, -357448514, 1709050706, -731652426, 89362128, 2021818155, 1720220972, -1882189829, -1245484665, -798674023, 720482160, 804259156, -67021596, -139628326, -536172770, -1731391238, -1117026605, -27925665, -1843093898, -1971551958, 1027664477, 1776072302, -1692295306, 1977137091, 709311894, 1552666981, -223405321, 150798592, -759578091, -1675539907, 2105595150, 804259156, -1697880440, -675801096, 279256651, 949472614, -1066760408, -1050005009, -134043193, 1262240064, 1714635839, 1016494210, -1413038655, 1709050706, 1206388733, 1748146637, -1781657435, -1010909077, -390959312, -1329261660, -1083515807, -1965966825, -1530326449, 809844289, -1541496715, 1630858843, -759578091, 2105595150, -1697880440, -525002504, 631120032, -1809583100, -474736307, -1575007513, -201064789, 1893360095, 424470110, -1133782004, -418884977, -1424208921, -547343036, -1697880440, 631120032, -474736307, 1580592646, 1435379187, 787503756, 1200803600, 1999477623, -932717215, 1982722224, -1848679031, 586438968, 1993892490, 1625273710, -1346017059, 0}; - -#define SMALL_Q 769 - +// Q1=769 void small_ntt_asm_769(int16_t a[N], const int32_t * zetas); void small_invntt_asm_769(int16_t a[N], const int32_t * zetas); void small_basemul_asm_769(int16_t *c, const int16_t *a, const int16_t *b, const int32_t *zetas); +// small NTT for computing cs0 and cs1 #define small_ntt(a) small_ntt_asm_769(a, zetas_asm_769) #define small_invntt_tomont(a) small_invntt_asm_769(a, zetas_inv_asm_769) #define small_basemul(r,a,b) small_basemul_asm_769(r, a, b, zetas_769) diff --git a/crypto_sign/dilithium3/m4fstack/smallntt.S b/crypto_sign/dilithium3/m4fstack/smallntt_769.S similarity index 94% rename from crypto_sign/dilithium3/m4fstack/smallntt.S rename to crypto_sign/dilithium3/m4fstack/smallntt_769.S index 9f048042..1c3c9a88 100644 --- a/crypto_sign/dilithium3/m4fstack/smallntt.S +++ b/crypto_sign/dilithium3/m4fstack/smallntt_769.S @@ -1,9 +1,23 @@ /* -* NTT and inverse NTT code from: -* Huang, J. et al. 2024. Revisiting Keccak and Dilithium Implementations on ARMv7-M. -* IACR Transactions on Cryptographic Hardware and Embedded Systems. 2024, 2 (Mar. 2024), 1–24. -* DOI:https://doi.org/10.46586/tches.v2024.i2.1-24. -* https://github.com/UIC-ESLAS/Dilithium-Multi-Moduli/blob/332a32cc02d407020e48a4f9b3a0dc78d4c8b0bc/M4/crypto_sign/dilithium3/m4plant/smallntt_769.S + * Copyright (c) 2023 Junhao Huang (jhhuang_nuaa@126.com) + * + * Licensed under the Apache License, Version 2.0(the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * NTT and inverse NTT code from: + * Huang, J. et al. 2024. Revisiting Keccak and Dilithium Implementations on ARMv7-M. + * IACR Transactions on Cryptographic Hardware and Embedded Systems. 2024, 2 (Mar. 2024), 1–24. + * DOI:https://doi.org/10.46586/tches.v2024.i2.1-24. + * https://github.com/UIC-ESLAS/Dilithium-Multi-Moduli/blob/332a32cc02d407020e48a4f9b3a0dc78d4c8b0bc/M4/crypto_sign/dilithium3/m4plant/smallntt_769.S */ #include "macros.i" diff --git a/crypto_sign/dilithium5/m4fstack/smallntt.S b/crypto_sign/dilithium5/m4fstack/smallntt.S deleted file mode 120000 index 7e2174f9..00000000 --- a/crypto_sign/dilithium5/m4fstack/smallntt.S +++ /dev/null @@ -1 +0,0 @@ -../../dilithium3/m4fstack/smallntt.S \ No newline at end of file diff --git a/crypto_sign/dilithium5/m4fstack/smallntt_769.S b/crypto_sign/dilithium5/m4fstack/smallntt_769.S new file mode 120000 index 00000000..6300683f --- /dev/null +++ b/crypto_sign/dilithium5/m4fstack/smallntt_769.S @@ -0,0 +1 @@ +../../dilithium3/m4fstack/smallntt_769.S \ No newline at end of file