-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathrationale.tex
535 lines (428 loc) · 44 KB
/
rationale.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
% !TeX root = falcon.tex
\chapter{The Design Rationale of \falcon}\label{chap:ratio}
% \section{Preliminaries}
%
% \subsection{Lattice}
% An $n$-dimensional lattice $\Lambda$ is any subset of $\bR^n$ that is both:
% \begin{enumerate}
% \item an additive subgroup: $\matzero \in \Lambda$, and $-\vecx, \vecx + \vecy \in \Lambda$ for every $\vecx, \vecy \in \Lambda$ ;
% \item discrete: every $\vecx \in \Lambda$ has a neighhood in $\bR^n$ in which $\vecx$ is the only lattice point.
% \end{enumerate}
% Let $\Lambda_q$ be a lattice embedded in $\bZ^n$, we say $\Lambda_q$ is a $q$-ary lattice for some integer $q$, if $q\bZ \subseteq \Lambda_q$. Since any lattice in closed under addition, the vector $\vecx \in \bZ^n$ is in the $q$-ary lattice $\Lambda_q$ if and only if $\vecx \bmod q$ is also in the lattice.
% Given $n$ linearly independant vectors $\vecb_1, \vecb_2, \ldots, \vecb_n \in \bR^m$, the lattice generated by them is defined as
% \[
% \Lambda(\vecb_1, \vecb_2, \ldots, \vecb_n) = \left\{ \sum x_i\vecb_i~|~x_i \in \bZ\right\}.
% \]
% We refer to $\vecb_1, \vecb_2, \ldots, \vecb_n$ as a basis of the lattice. Equivalently, if we define $\matB$ as the $n \times m$ matrix whose rows are $\vecb_1, \vecb_2, \ldots, \vecb_n$, then the lattice generated by $\matB$ is
% \[
% \Lambda(\matB) = \Lambda(\vecb_1, \vecb_2, \ldots, \vecb_n) = \left\{ \vecx\matB~|~\vecx\in\bZ^n \right\}.
% \]
% We say that the rank of the lattice is $m$. If $m = n$, the lattice (respectively the matrix) is called a full-rank lattice (resp. a full-rank matrix). In this document we will usually consider full-rank lattices as the more general case.
%
% \subsection{Gram-Schmidt Orthogonalisation (GSO)}\label{sec:ratio:gso}
% The Gram-Schmidt orthogonalization (GSO) is an algorithm that transforms any basis $\matB$ of a vector space to an orthogonal basis $\tilde\matB$ of the same vector space. Let $\matB = (\vecb_1,\ldots,\vecb_n)$ be a matrix, the GSO $\tilde\matB = (\tilde\vecb_1,\ldots,\tilde\vecb_n)$ of $\matB$ is defined as follows:
% \begin{align*}
% \tilde\vecb_i = b_i - \sum_{j=1}^{i-1}\frac{\langle\vecb_i,\tilde\vecb_j\rangle}{\lVert\tilde\vecb_j\rVert^2}\tilde\vecb_j.
% \end{align*}
% We refer to $\lVert\tilde\matB\rVert$ as the Gram-Schmidt norm of $\matB$, where $\lVert\tilde\matB\rVert$ denotes the $L_2$ length of the longest vector in $\tilde\matB$, i.e. $\lVert\tilde\matB\rVert := \max_i\lVert\tilde\vecb_i\rVert$ for $1 \leq i \leq n$.
% Note that if $\matB$ is a basis of a lattice $\Lambda$, $\tilde\matB$ is not necessarily a basis of the same lattice and in general, unlike vector spaces, lattices do not admit orthogonal base. However, the Gram-Schnmidt of a lattice basis remain a useful object, in particular when it will come to using a basis to approximate the closest vector problem, the GSO can provide a better approximation.
\section{A Quest for Compactness}
The design rationale of \falcon stems from a simple observation: when switching from RSA- or discrete logarithm-based signatures to post-quantum signatures, communication complexity will likely be a larger problem than speed. Indeed, many post-quantum schemes have a simple algebraic description which makes them fast, but all require either larger keys than pre-quantum schemes, larger signatures, or both.
\medskip
We expect such performance issues will hinder transition from pre-quantum to post-quantum schemes. Hence our leading design principle was to minimize the following quantity:
$$|\pk|+|\signature| = \text{(bitsize of the public key)} + \text{(bitsize of a signature)}.$$
\medskip
This led us to consider lattice-based signatures, which manage to keep both $|\pk|$ and $|\signature|$ rather small, especially for structured lattices. When it comes to lattice-based signatures, there are essentially two paradigms: Fiat-Shamir or hash-and-sign.
\medskip
Both paradigms achieve comparable levels of compactness, but hash-and-sign have interesting properties: the GPV framework~\cite{STOC:GenPeiVai08}, which describes how to obtain hash-and-sign lattice-based signature schemes, is secure in the classical and quantum oracle models~\cite{STOC:GenPeiVai08,AC:BDFLSZ11}. In addition, it enjoys message-recovery capabilities~\cite{SCN:delLyuPoi16}. So we chose this framework. Details are given in \cref{sec:ratio:gpv}.
\medskip
Next, we chose a class of cryptographic lattices to instantiate this framework. A close to optimal choice with respect to our main design principle -- compactness -- is NTRU lattices: they allow to obtain a compact instantiation~\cite{AC:DucLyuPre14} of the GPV framework. In addition, their structure speeds up many operations by two orders of magnitude. Details are given in \cref{sec:ratio:ntru}.
\medskip
The last step was the trapdoor sampler. We devised a new trapdoor sampler which is asymptotically as fast as the fastest generic trapdoor sampler~\cite{C:Peikert10} and provides the same level of security as the most secure sampler~\cite{SODA:Klein00}. Details are given in \cref{sec:ratio:ffs}.
\section{The Gentry-Peikert-Vaikuntanathan Framework}\label{sec:ratio:gpv}
In 2008, Gentry, Peikert and Vaikuntanathan~\cite{STOC:GenPeiVai08} established a framework for obtaining secure lattice-based signatures. At a very high level, this framework may be described as follows:
\begin{itemize}
\item
The public key contains a full-rank matrix $\matA \in \bZ_q^{n \times m}$ (with $m>n$) generating a $q$-ary lattice $\Lambda$.
\item
The private key contains a matrix $\matB \in \bZ_q^{m \times m}$ generating $\Lambda_q^\perp$, where $\Lambda_q^\perp$ denotes the lattice orthogonal to $\Lambda$ modulo $q$: for any $\vecx \in \Lambda$ and $\vecy \in \Lambda_q^\perp$, we have $\inner{\vecx}{\vecy} = 0 \bmod q$. Equivalently, the rows of $\matA$ and $\matB$ are pairwise orthogonal: $\matB \times \matA^\t = \matzero$.
\item Given a message \msg, a signature of \msg is a short value $\vecs \in \bZ_q^m$ such that $\vecs \matA^\t = H(\msg)$, where $H : \{0,1\}^\ast \rightarrow \bZ_q^n$ is a hash function. Given $\matA$, verifying that $\vecs$ is a valid signature is straightforward: it only requires to check that $\vecs$ is indeed short and verifies $\vecs \matA^\t = H(\msg)$.
\item Computing a valid signature is more delicate. First, a preimage $\vecc_0 \in \bZ_q^m$ is computed, which verifies $\vecc_0 \matA^\t = H(\msg)$.
As $\vecc_0$ is not required to be short and $m \geq n$, this is simply done via standard linear algebra. $\matB$ is then used in order to compute a vector $\vecv \in \Lambda_q^\perp$ close to $\vecc_0$.
The difference $\vecs = \vecc_0 - \vecv$ is a valid signature: indeed, $\vecs \matA^\t = \vecc_0 \matA^\t - \vecv \matA^\t = \vecc - \matzero = H(\msg)$, and if $\vecc_0$ and $\vecv$ are close enough, then $\vecs$ is short.
\end{itemize}
This high-level description of a signature scheme is not exclusive to the GPV framework: it was first instantiated in the GGH~\cite{C:GolGolHal97b} and \ntrusign~\cite{RSA:HHPSW03} signature schemes. However, these schemes suffered total break attacks, whereas the GPV framework is proven secure in the (quantum) random oracle model assuming the hardness of \sis for some parameters. This is because GGH/\ntrusign and the GPV framework have radically different ways of computing $\vecv$ in the signing procedure.
\paragraph{Computing $\vecv$ in GGH and \ntrusign.}
In GGH and \ntrusign, $\vecv$ is computed using an algorithm called the round-off algorithm and first formalized by Babai~\cite{STACS:Babai85,Combinatorica:Babai86}. In this deterministic algorithm, $\vecc_0$ is first expressed as a real linear combination of the rows of $\matB$, the vector of these real coordinates is then rounded coefficient-wise and multiplied again by $\matB$: in a nutshell, $\vecv \gets \left\lfloor \vecc_0 \matB^{-1} \right\rceil \matB$, where $\lfloor\cdot\rceil$ denotes coefficient-wise rounding. At the end of the procedure, $\vecs = \vecv- \vecc_0$ is guaranteed to lie in the parallelepiped $[-1, 1]^m \times \matB$, which allows to tightly bound the norm $\|\vecs\|$.
%
The problem with this approach is that each signature $\vecs$ lies in $[-1, 1]^m \times \matB$, and therefore leaks information about the basis $\matB$. This fact was exploited by several key-recovery attacks~\cite{EC:NguReg06,AC:DucNgu12b}.
\paragraph{Computing $\vecv$ in the GPV framework.}
A major contribution of~\cite{STOC:GenPeiVai08}, which is also the key difference between the GPV framework and GGH/\ntrusign, is the way $\vecv$ is computed. Instead of the round-off algorithm, the GPV framework relies on a randomized variant by ~\cite{SODA:Klein00} of the nearest plane algorithm, also formalized by Babai.
Just as for the round-off algorithm, using the nearest plane algorithm would have leaked the secret basis $\matB$ and resulted in a total break of the scheme. However, Klein's algorithm prevents this: it is randomized in a way such that for a given \msg, $\vecs$ is sampled according to a spherical Gaussian distribution over the shifted lattice $\vecc_0 + \Lambda_q^\perp$. This method is proven to leak no information about the basis $\matB$. Klein's algorithm was in fact the first of a family of algorithms called \textit{trapdoor samplers}. More details about trapdoor samplers are given in \cref{sec:ratio:ffs}.
\subsection{Features and instantiation of the GPV framework}\label{sec:ratio:features}
%The topic of this section is to make explicit a few aspects and features of the GPV framework.
\paragraph{Security in the classical and quantum oracle models.}
In the original paper~\cite{STOC:GenPeiVai08}, the GPV framework has been proven to be secure in the random oracle model under the \sis assumption. In our case, we use NTRU lattices so we need to adapt the proof for a ``NTRU-\sis'' assumption, but this adaptation is straightforward. In addition, the GPV framework has also been proven to be secure in the quantum oracle model~\cite{AC:BDFLSZ11}.
\paragraph{Identity-based encryption.}
\falcon can be turned into an identity-based encryption scheme, as described in \cite{AC:DucLyuPre14}. However, this requires de-randomizing the signature procedure (see \cref{sec:ratio:randomization}).
\subsection{Statefulness, de-randomization or hash randomization}\label{sec:ratio:randomization}
% In its original form, the hash-and sign scheme described in \cite{STOC:GenPeiVai08} suffers from a major drawback: it is stateful. Indeed, since trapdoor samplers are randomized, two distinct valid signatures for a given message \msg could be output. But \cite{STOC:GenPeiVai08} makes sure it does never happen by making the signer maintain a state of all signatures he has computed. This is due to security reasons: given two distinct signatures $\vecs, \vecs'$ of a message \msg, the difference $\vecs - \vecs'$ is a solution of the \sis problem over $\matA$ (for a certain set of parameters): $(\vecs - \vecs') \matA = H(\msg) - H(\msg) = \matzero$. This fact underlies the security proof, but it is also the reason why two different signatures of a same hash cannot be made public: security of the GPV framework under the \sis assumption could no longer be claimed. Perhaps more importantly from a practical perspective, $\vecs - \vecs'$ would be a somewhat short vector of $\Lambda_q^\perp$. Having several such vectors would allow an attacker to construct its own short basis for $\Lambda_q^\perp$ and grant him forgery abilities, as described in \eg \cite[Section 2.5.1]{Prest15}.
In the GPV framework, two different signatures $\vecs, \vecs'$ of a same hash $H(\msg)$ can never be made public simultaneously, because doing so breaks the security proof~\cite[Section 6.1]{STOC:GenPeiVai08}.
% TODO: is this property similar to what happens with SPHINCS?
\paragraph{Statefulness.} A first solution proposed in \cite[Section 6.1]{STOC:GenPeiVai08} is to make the scheme stateful by maintaining a list of the signed messages and of their signatures. However, maintaining such a state poses a number of operational issues, so we do not consider it as a credible solution.
\paragraph{De-randomization.} A second possibility proposed by \cite{STOC:GenPeiVai08} is to de-randomize the signing procedure. However, pseudorandomness would need to be generated in a consistent way over all the implementations (it is not uncommon to have a same signing key used in different devices). While this solution can be applied in a few specific usecases, we do not consider it for \falcon.
\paragraph{Hash randomization.} A third solution is to prepend a salt $\salt \in \{0,1\}^{k}$ to the message \msg before hashing it. Provided that $k$ is large enough, this prevents collisions from occurring. From an operational perspective, this solution is the easiest to apply, and it is still covered by the security proof of the GPV framework~(see \cite[Section 6.2]{STOC:GenPeiVai08}). For a given security level $\lambda$ and up to $q_s$ signature queries, taking $k = \lambda + \log_2( q_s )$ is enough to guarantee that the probability of collision is less than $q_s \cdot 2^{-\lambda}$.
Out of the three solutions, \falcon opts for hash randomization: a salt $\salt \in \{0,1\}^{320}$ is randomly generated and prepended to the message before hashing it. The bitsize $320$ is equal to $\lambda + \log_2( q_s )$ for $\lambda = 256$ the highest security level required by NIST, and $q_s = 2^{64}$ the maximal number of signature which may be queried from a single signer. This size is actually overkill for security levels $ \lambda < 256$, but fixing a single size across all the security levels makes things easier from an API perspective: for example, one can hash a message without knowing the security level of the private signing key.
\section{NTRU Lattices}\label{sec:ratio:ntru}
The first choice when instantiating the GPV framework is the class of lattices to use. The design rationale obviously plays a large part in this. Indeed, if emphasis is placed on security without compromise, then the logical choice is to use standard lattices without any additional structure, as was done \eg in the key-exchange scheme \textsc{Frodo}~\cite{CCS:BCDMNN16}.
Our main design principle is compactness. For this reason, \falcon relies on the class of NTRU lattices, introduced by Hoffstein, Pipher and Silverman~\cite{ANTS:HofPipSil98}; they come with an additional ring structure which not only does allow to reduce the public keys' size by a factor $O(n)$, but also speeds up many computations by a factor at least $O(n / \log n)$. Even in the broader class of lattices over rings, NTRU lattices are among the most compact: the public key can be reduced to a single polynomial $h \in \bZ_q[x]$ of degree at most $n-1$. In doing this we follow the idea of Stehl\'e and Steinfeld~\cite{EC:SteSte11}, who showed that the GPV framework can be used with NTRU lattices in a provably secure way.
Compactness, however, would be useless without security. From this perspective, NTRU lattices also have reasons to inspire confidence as they have resisted extensive cryptanalysis for about two decades, and we parameterize them in a way which we believe makes them even more resistant.
\subsection{Introduction to NTRU lattices}
Let $\phi = x^n + 1$ for $n = 2^\kappa$ a power of two, and $q\in \bN^\star$. A set of NTRU secrets consists of four polynomials $f,g,F,G \in \bZ[x]/(\phi)$ which verify the NTRU equation:
\begin{equation}\label{eq:ntruset}
f G - g F = q \mod \phi
\end{equation}
Provided that $f$ is invertible modulo $q$, we can define the polynomial $h \gets g \cdot f^{-1} \bmod q$.
Typically, $h$ will be a public key, whereas $f,g,F,G$ will be secret keys. Indeed, one can check that the matrices $\twotwo{1}{h}{0}{q}$ and $\twotwo{f}{g}{F}{G}$ generate the same lattice, but the first matrix contains two large polynomials ($h$ and q), whereas the second matrix contains only small polynomials, which allows to solve problems as illustrated in \cref{sec:ratio:gpv}. If $f,g$ are generated with enough entropy, then $h$ will look pseudo-random~\cite{EC:SteSte11}. However in practice, even when $f,g$ are quite small, it remains hard to find small polynomials $f',g'$ such that $h = g' \cdot (f')^{-1} \bmod q$. The hardness of this problem constitutes the NTRU assumption.
\subsection{Instantiation with the GPV framework}
We now instantiate the GPV framework described in \cref{sec:ratio:gpv} over NTRU lattices:
\begin{itemize}
\item The public basis is $\matA = \onetwo{1}{\adj h}$, but this is equivalent to knowing $h$.
\item The secret basis is
\begin{equation}\label{eq:B}
\matB = \twotwo{g}{-f}{G}{-F}
\end{equation}
One can check that the matrices $\matA$ and $\matB$ are indeed orthogonal: $\matB \times \adj\matA = 0 \bmod q$.
\item The signature of a message \msg consists of a salt $\salt$ plus a pair of polynomials $(s_1, s_2)$ such that $s_1 + s_2 h = H(\salt\|\msg)$. We note that since $s_1$ is completely determined by $\msg, \salt$ and $s_2$, there is no need to send it: the signature can simply be $(\salt, s_2)$.
\end{itemize}
% \subsection{Generation of NTRU bases}
% TODO
\subsection{Choosing optimal parameters}
Our trapdoor sampler samples signatures of norm essentially proportional to $\gsnorm{\matB}$, where $\gsnorm{\matB}$ denotes the Gram-Schmidt norm of $\matB$.
% One can show (see \eg~\cite{AC:DucLyuPre14}) that the value of $\gsnorm{\matB}$ is:
% \begin{equation}\label{eq:gsnorm}
% \gsnorm{\matB} = \max \left\{ \norm{(f,g)}, \norm{(\tilde{F},\tilde{G})} \right\},
% \end{equation}
% where $\tilde G = \frac{q\adj f}{\ffgg}$ and $\tilde F = -\frac{q\adj g}{\ffgg}$.
Previous works (\cite{AC:DucLyuPre14} and \cite[Sections 6.4.1 and 6.5.1]{Prest15}) have provided heuristic and experimental evidence that in practice, $\gsnorm{\matB}$ is minimized for $\norm{(f,g)} \approx 1.17 \sqrt{q}$.
%$\norm{(f,g)} \approx \sqrt{\frac{qe}{2}} \approx 1.17 \sqrt{q}$.
Therefore, we generate $f,g$ as discrete Gaussians in $\bZ[x]/(\phi)$ centered in $0$, so that the expected value of $\norm{(f,g)}$ is about $1.17 \sqrt{q}$. Once this is done, very efficient ways to compute $\gsnorm{\matB}$ are known, and if this value is more than $1.17 \sqrt{q}$, new polynomials $f,g$'s are regenerated and the procedure starts over.
% This gives us the following procedure for quickly finding $(f,g)$ which generate a matrix $\matB$ with a short Gram-Schmidt norm:
% A previous work by Prest~\cite[Sections 6.4.1 and 6.5.1]{Prest15} has provided heuristic and experimental evidence that in practice, $\norm{(\tilde{F},\tilde{G})} \approx \frac{qe}{2 \norm{(f,g)}}$. Therefore $\gsnorm{\matB}$ is minimized for $\norm{(f,g)} \approx \sqrt{\frac{qe}{2}} \approx 1.17 \sqrt{q}$. This gives us the following procedure for quickly finding $(f,g)$ which generate a matrix $\matB$ with a short Gram-Schmidt norm:
% \begin{enumerate}
% \item\label{item:one} Generate each coefficient of $f$ and $g$ from the discrete Gaussian $D_{\bZ, 1.17\sqrt{q/2n}}$.
% \item Compute $\gamma \gets \gsnorm{\matB}$ using the equation \ref{eq:gsnorm}.
% \item If $\gamma > 1.17 \sqrt{q}$, goto \ref{item:one}. Else, output $(f,g)$.
% \end{enumerate}
\paragraph{Quasi-optimality.}
The bound $\gsnorm{\matB} \leq 1.17 \sqrt{q}$ that we reach in practice is within a factor $1.17$ of the theoretic lower bound for $\gsnorm{\matB}$. Indeed, for any $\matB$ of the form given in \eqref{eq:B} with $f,g,F,G$ verifying \eqref{eq:ntruset}, we have $\det(\matB) = f G - g F = q$. So $\sqrt{q}$ is a theoretic lower bound of $\gsnorm{\matB}$.
\section{Fast Fourier Sampling}\label{sec:ratio:ffs}
The second choice when instantiating the GPV framework is the trapdoor sampler. A trapdoor sampler takes as input a matrix $\matA$, a trapdoor \textsf{T}, a target $\vecc$ and outputs a short vector $\vecs$ such that $\vecs^\t \matA = \vecc \bmod q$. With the notations of \cref{sec:ratio:gpv}, this is equivalent to finding $\vecv \in \Lambda_q^\perp$ close to $\vecc_0$, so we may indifferently refer by the term ``trapdoor samplers'' to algorithms which perform one task or the other.
We now list the existing trapdoor samplers, their advantages and limitations. Obviously, being efficient is important for a trapdoor sampler. However, an equally important metric is the ``quality'' of the sampler: the shorter the vector $\vecs$ is (or equivalently, the closer $\vecv$ is to $\vecc_0$), the more secure this sampler will be.
\begin{enumerate}
\item Klein's algorithm~\cite{SODA:Klein00} takes as a trapdoor the matrix $\matB$. It outputs vectors $\vecs$ of norm proportional to $\gsnorm{\matB}$, which is short and therefore good for security. On the downside, its time and space complexity are in $O(m^2)$.
\item Just like Klein's algorithm is a randomized version of the nearest plane algorithm, Peikert proposed a randomized version of the round-off algorithm~\cite{C:Peikert10}. A nice thing about it is that when $\matB$ has a structure over rings -- as in our case -- then it can be made to run in time and space $O(m \log m)$. However, it outputs vectors of norm proportional to the spectral norm $\|\matB\|_2$ of $\matB$. This is larger than what we get with Klein's algorithm, and therefore it is worse security-wise.
\item Micciancio and Peikert~\cite{EC:MicPei12} proposed a novel approach in which $\matA$ and its trapdoor are constructed in a way which allows simple and efficient trapdoor sampling. Unfortunately, it is not straightforwardly compatible with NTRU lattices and yet has to reach the same level of compactness as with NTRU lattices~\cite{AC:CheGenMuk19}.
\item Ducas and Prest~\cite{ISSAC:DucPre16} proposed ``\textit{fast Fourier nearest plane}'', a variant of Babai's nearest plane algorithm for lattices over rings. It proceeds in a recursive way which is very similar to the fast Fourier transform, hence the name. This algorithm can be randomized: it results in a trapdoor sampler which combines the quality of Klein's algorithm, the efficiency of Peikert's and can be used over NTRU lattices.
\end{enumerate}
Of the four approaches we just described, it seems clear to us that a randomized variant of the fast Fourier nearest plane~\cite{ISSAC:DucPre16} is the most adequate choice given our design rationale and our previous design choices (NTRU lattices). For this reason, it is the trapdoor sampler used in \falcon.
%The implementation details are given in chapter~\ref{chap:ffs}.
\begin{table}[H]
\centering
\begin{tabular}{|l|c|c|c|}
\hline
\textbf{\textsf{Sampler}} & \textbf{\textsf{Fast}} & \textbf{\textsf{Short output $\vecs$}} & \textbf{\textsf{NTRU-friendly}} \\
%\hhline{|=#=|=|=|}
\hline
Klein~\cite{SODA:Klein00} & \no & \yes & \yes \\
%\hline
Peikert~\cite{C:Peikert10} & \yes & \no & \yes \\
%\hline
Micciancio-Peikert~\cite{EC:MicPei12} & \yes & \yes & \no \\
%\hline
Ducas-Prest~\cite{ISSAC:DucPre16} & \yes & \yes & \yes \\
\hline
\end{tabular}
\caption{Comparison of the different trapdoor samplers}\label{tab:samplers}
\end{table}
\paragraph{Choosing the standard deviation.} When using a trapdoor sampler, an important parameter to set is the standard deviation $\sigma$. If it is too low, then it is no longer guaranteed that the sampler not leak the secret basis (and indeed, for all known samplers, a value $\sigma = 0$ opens the door to learning attacks \`a la \cite{EC:NguReg06,AC:DucNgu12b}). But if it is too high, the sampler does not return optimally short vectors and the scheme is not as secure as it could be. So there is a compromise to be found.
%
%\todo{FIXME}
%
Our fast Fourier sampler shares many similarities with Klein's sampler, including the optimal value for $\sigma$. Following \cite[Section 4.4]{AC:Prest17}, we take $\sigma = \smoothZ \cdot \gsnorm{\matB}$.
\section{Security}\label{sec:rat:sec}
% \tprcomment{Todo: some bonding text
% \newpage}
% In this section we review the different known attacks why apply to \falcon. This is done in \cref{sec:rat:sec:attacks}.
%
% \falcon relies on floating-point arithmetic for the signing procedure, which raises the question of the required floating-point arithmetic precision. We dicuss the impact of the precision on the security of \falcon in \cref{sec:rat:sec:precision}.
% \pagebreak
\subsection{Known Attacks}\label{sec:rat:sec:attacks}
\paragraph{Key Recovery.} The most efficient attacks come from lattice
reduction.
We start by considering the lattice generated by the columns of
$\twotwo{q}{h}{0}{1}$.
After using lattice reduction on this basis, we enumerate all lattice
points in a ball of radius $\sqrt{2n} \cdot \sigmafg$, centered on the origin.
With significant probability, we are therefore able to find
$\onetwo{g}{f}$.
%If we use a block-size of $B$, enumeration takes negligible time if the
%$(2n-B)$th Gram-Schmidt norm is larger than $\sqrt{3B/4}\cdot \sigmafg$.
Let $\lambda$ be the $(2n-B)$th Gram-Schmidt norm, which is approximately
the norm of the shortest vector of the lattice generated by the last $B$
vectors projected orthogonally to the first $2n-B-1$ vectors.
A sieve algorithm performed on this projected lattice will recover all vectors
of norm smaller than $\sqrt{4/3}\lambda$ (see~\cite{EC:Ducas18} for instance).
If the projection of the key is among them, that is when
$$\sqrt{B}\sigmafg \leq \sqrt{4/3}\lambda,$$ we can recover a secret key vector
from its projection by using Babai's Nearest Plane algorithm on all sieved vectors
with high probability.
This is because all remaining Gram-Schmidt norms are larger than $\lambda$,
which is much larger than $\sigmafg$.
For the best known lattice reduction algorithm, DBKZ~\cite[Corollary 2]{EC:MicWal16},
we get
\[ \lambda = \bigg(\frac{B}{2\pi e}\bigg)^{1-n/B}\sqrt{q},\]
and
\begin{equation}\label{eq:blocksize_keyrecovery}
(B/2\pi e)^{1-n/B}\sqrt{q} = \sqrt{3/4B}\sigmafg
\end{equation}
Note that we conservatively assumed that we could perform a sieve algorithm in dimension $B$
for the same cost as the SVP oracle inside the DBKZ algorithm, which is a slight overestimate
\cite{EC:Ducas18}.
It is then easy to deduce $B$.
%, and to show that $B=n+o(n)$, which is the fastest attack \emph{asymptotically}.
Note that the given value for the Gram-Schmidt norm is correct only when
the basis is first randomized, and it is necessary to do so
(asymptotically).
\paragraph{Forging a Signature.} Forging a signature can be performed by finding a lattice point at distance bounded by $\beta$ from a random point, in the same lattice as above.
This task can also be solved by lattice reduction.
One possibility is to use Kannan's embedding, that is add $(H(r||m), 0, K)$ to the lattice basis,
extended by a row of zeroes, which gives the following matrix:
\[ \left[
\begin{array}{c|c|c}
q & h & H(r\| m) \\
\hline
0 & 1 & 0 \\
\hline
0 & 0 & K
\end{array}
\right].
\]
\iffalse
$(2n+1) \times (2n+1)$ matrix:
%
%\todo{FIXME: transpose table}
\newcommand{\toto}[1]{\multicolumn{3}{c|}{\multirow{3}{*}{#1}}}
\[
% \renewcommand{\arraystretch}{1}
\renewcommand{\arraycolsep}{2.5mm}
\left[
\begin{array}{ccc|ccc|c}
\toto{$\matI_n$} & \toto{$0$} & \multirow{3}{*}{0}\\
&&&&&& \\
&&&&&& \\
\hline
\toto{$\cC(h)$} & \toto{$q \matI_n$} & \multirow{3}{*}{0}\\
&&&&&& \\
&&&&&& \\
\hline
\multicolumn{3}{c|}{H(\salt \| \msg)} & \multicolumn{3}{c|}{0} & 1
\end{array}
\right],
\]
where $\cC(h)$ is the $n \times n$ matrix which $i$-th row is the vector of coefficients of $x^{i-1} \cdot h \bmod (x^n + 1)$.
\fi
%This task is also eased by first carrying out lattice reduction on the original basis.\todo{Does it change anything to the security?}
%
As sieve algorithms generate many short vectors, we can certainly find among them a vector of the
form $(c,*, K)$ and then $H(r||m)-c$ is a lattice point.
Taking $K\approx \sqrt{q}$, the DBKZ algorithm~\cite[Corollary 2]{EC:MicWal16} gives as a success condition for
the forgery: %requires the blocksize $B$ used for DBKZ to be large enough so that:
\begin{equation}\label{eq:blocksize_forgery}
\bigg(\frac{B}{2\pi e}\bigg)^{n/B} \sqrt{q} \leq \beta.
\end{equation}
Interestingly, since the factor $\sqrt{q}$ is also present in $\beta$, the modulus $q$ has virtually no effect on the best forgery attack. This is the best attack against our instantiations.
%, even if we have $B=2n+o(n)$.
%\textcolor{red}{TPr: I don't get this last sentence}
We convert the blocksize $B$ into concrete bit-security following the methodology of
New Hope~\cite{USENIX:ADPS16}, sometimes called ``core-SVP methodology''.
This gives the bit-security as per \cite{SODA:BDGL16,Laarhoven16}:
\begin{alignat}{4}
&\text{Classical:} && \lfloor 0.292 \cdot B \rfloor \label{eq:sec_classic} \\
&\text{Quantum:} && \lfloor 0.262 \cdot B \rfloor \label{eq:sec_quantum}
\end{alignat}
This gives the following table.
%\def\lvliforgebkzblocksize{393}
%\FPeval{\lvliforgeclassic}{round(\lvliforgebkzblocksize * 0.292 - 0.5, 0)}
%\FPeval{\lvliforgequantum}{round(\lvliforgebkzblocksize * 0.265 - 0.5, 0)}
%
%\def\lvlvforgebkzblocksize{922}
%\FPeval{\lvlvforgeclassic}{round(\lvlvforgebkzblocksize * 0.292 - 0.5, 0)}
%\FPeval{\lvlvforgequantum}{round(\lvlvforgebkzblocksize * 0.265 - 0.5, 0)}
%We recall that this is believed to be a conservative estimate,
%as we neglect the cost of a large memory, lower order terms in the Nearest Neighbor
%Search~\cite{SODA:BDGL16} and the number of calls to the oracle.
%The optimization of Ducas~\cite{EC:Ducas18} indicates that the dimension of the lattice sieved
%is decreased by respectively 36 and 85 compared to the table.
%We believe the neglected terms have a larger impact than this so that the scheme fulfills
%the security levels 1 and 5.
\begin{center}
\begin{tabular}{||c|cc| c c|cc| c c||}
\hline
& \multicolumn{4}{c|}{Key recovery} & \multicolumn{4}{c||}{Forgery} \\
\hline
$n$ & $B$ & $B'$ & Classical & Quantum & $B$ & $B'$ & Classical & Quantum \\
\hline
512 & \keyrecbkzi & \keyrecsievei & \keyrecclassici & \keyrecquantumi & \forgebkzi & \forgesievei & \forgeclassici & \forgequantumi \\
1024 & \keyrecbkzv & \keyrecsievev & \keyrecclassicv & \keyrecquantumv & \forgebkzv & \forgesievev & \forgeclassicv & \forgequantumv \\
\hline
\end{tabular}
\end{center}
\paragraph{Concrete cost of the best attacks.}
For \falcon-512, we estimate the complexity of the best attack as equivalent
to a BKZ with block size $B = 411$. The latest method~\cite{EC:ADHKPS19}
suggests that the cost
in dimension $n$ is close to solving $\frac{n^3}{4B^2}$ shortest vector
problem instances in dimension $B$.
The optimization of Ducas~\cite{EC:Ducas18} decreases the
dimension of the lattice sieved by $\left\lfloor \frac{B \ln(4/3)}{\ln(B / (2 \pi e)} \right\rceil = 37$ to $B' = \forgesievei$.
Taking only the first asymptotical term in the complexity of a
sieve~\cite{SODA:BDGL16} leads to a number of $\frac{n^3}{4B^2} \cdot (\sqrt{1.5})^{B'} \approx 2^{120}$ classical operations (where $\sqrt{1.5} \approx 2^{0.292}$).
This is believed to be a conservative estimate,
as we neglect the lower order subexponential terms in the Nearest Neighbor
Search.
Each operation includes a random access of at least one bit to
a memory which has to contain $2^{77}$ vectors.
A recent record~\cite{EC:ADHKPS19} used $2^{19} (\sqrt{1.5})^{112}$ cycles for a sieve in dimension 112, and an average cycle certainly used more than 16 gates.
We therefore regard an estimate of the minimum number of gates of
$2^{120+19+4}=2^{143}$ as conservative.
For \falcon-1024, key recovery is slightly more efficient.
The first part of the attack uses lattice reduction, and cost more
than $2^{10}$ calls to a SVP instance in dimension $B = 936$, which corresponds to a sieve in dimension $B' = \keyrecsievev$.
This indicates a total of at least classical $2^{264}$ operations; and a number of gates
larger than $2^{287}$.
For the quantum cost, we take \cite[Table 10]{EC:JNRV20} as a baseline. For key search on AES-\{128,256\}, it indicates a cost of $\{2^{82}, 2^{143}\}$ gates. This is far below the estimated quantum cost for breaking \falcon.
% Taking only the complexity of the shortest vector problem at the first
%order of approximation --- a conservative estimate ---, this gives
%respectively $172$ quantum (
%
%\paragraph{Key Recovery.} The most efficient attacks come from lattice reduction.
%We start by considering the lattice $(\bZ[x]/(\phi))^2\twotwo{0}{q}{1}{h}$.
%After using lattice reduction on this basis, we enumerate all lattice points in a ball of radius $\sqrt{2n}\sigma'$, centered on the origin.
%With significant probability, we are therefore able to find $\onetwo{g}{f}$.
%If we use a block-size of $B$, enumeration takes negligible time if the $2n-B$th Gram-Schmidt norm is larger than $0.75\sqrt{B}\sigma'$.
%For the best known lattice reduction algorithm, DBKZ~\cite{EC:MicWal16}, it is
%% \[ \big(\frac{B}{2\pi\mathrm{e}}\big)^{(2n-B)/2B}\sqrt{q}. \]
%\[ \big(\frac{B}{2\pi\mathrm{e}}\big)^{(1-n/B)}\sqrt{q}. \]
%It is then easy to deduce $B$, and to show that $B=n+o(n)$.
%This gives $B=652$ when $n=768$ and $B=921$ when $n=1024$.
%The security implied is detailed in the following table, using the methodology of New Hope~\cite{USENIX:ADPS16}. %PK
%
%\begin{center}
%\begin{tabular}{||c| c| c c||}
%\hline
%$n$ & $B$ & Classical & Quantum \\%& Plausible \\
%\hline
%$512$ & $392$ & $114$ & $103$ \\%& $74$ \\
%$768$ & $652$ & $195$ & $172$ \\%& $116$ \\
%$1024$ & $921$ & $263$ & $230$ \\%& $160$ \\
%\hline
%\end{tabular}
%\end{center}
%% Taking only the complexity of the shortest vector problem at the first order of approximation --- a conservative estimate ---, this gives respectively $172$ quantum (
%
%\paragraph{Forging a Signature.} Forging a signature can be perfomed by finding a lattice point at distance bounded by $\beta$ from a random point, in the same lattice as above.
%This task is also eased by first carrying out lattice reduction on the original basis.
%One possibility is to enumerate all lattice points in a ball of radius $\sqrt{\frac{nq}{\pi\mathrm{e}}}$.
%As this ball is larger than the one of the previous attack, it would be slower.
%It may seem as if it would be much smaller than the previous attack due to a factor $\Theta(\sqrt{n})$ in the radius.
%It is not the case, since the lattice has an (almost) orthogonal basis, which implies there are few ($2^{o(n)}$) points at distance in $o(\sqrt{n})$.
%This implies that the proposed method essentially starts by recovering the secret key, so that it is slower than the previous algorithm.
%Also, embedding the point in the lattice does not help: the distance to the lattice is $\Theta(\sqrt{n})$ greater than the shortest non-zero point.
%
%\paragraph{Combinatorial attack.} If we were to choose $q=O(n)$, the size of the coefficients would be constant.
%Then, Kirchner-Fouque~\cite{C:KirFou15} BKW variant would run in time $2^{n/((2+o(1))\log \log n)}$ to recover the key, i.e. asymptotically faster than the previous algorithms.
%It indicates that the most compact scheme uses $q=n^{1+\epsilon+o(1)}$ for some $\epsilon>0$.
%However, since $n$ is not huge, our moderate $q$ is enough to make this attack irrelevant.
%Indeed, even assuming that nearest neighbor search runs in constant time and other optimistic assumptions, the best combinatorial attack runs in time $2^{135}$ for $n=512$. %PK:complete
\paragraph{Hybrid attack.} The hybrid attack~\cite{C:HowgraveGraham07} combines a meet-in-the-middle algorithm and the key recovery algorithm.
It was used with great effect against NTRU, due to its choice of {\em sparse} polynomials.
This is however not the case here, so that its impact is much more modest, and counterbalanced by the lack of sieve-enumeration.
\paragraph{Dense, high rank sublattice.}
Recent works~\cite{C:AlbBaiDuc16,EPRINT:CheJeoLee16,EC:KirFou17} have shown that when $f,g$ are extremely small compared to $q$, it is easy to attack cryptographic schemes based on NTRU lattices. To the contrary, in \falcon we take $f,g$ to be not too small while $q$ is hardly large: a side-effect is that this makes our scheme impervious to the so-called ``overstretched NTRU'' attacks.
In particular, even if $f,g$ were taken to be binary, we would have to select $q>n^{2.83}$\todo[inline]{How was $q>n^{2.83}$ computed?} for this property to be useful for cryptanalysis.
Our large margin should allow even significant improvements of this algorithm to be irrelevant to our case.
\paragraph{Algebraic attacks.}
While there is a rich algebraic structure in \falcon, there is no known way to improve all the algorithms previously mentioned with respect to their general lattice equivalent by more than a factor $n^2$.
However, there exist efficient algorithms for finding not-so-small elements in {\em ideals} of $\bZ[x]/(\phi)$~\cite{EC:CraDucWes17}.
\newpage
\subsection{Precision of the Floating-Point Arithmetic}~\label{sec:rat:sec:precision}
% \tprcomment{I added this precision analysis. The exercise was a bit delicate, so please check the presentation to make sure we won't get destroyec because of it.}
Trapdoor samplers usually require the use of floating-point arithmetic, and our fast Fourier sampler is no exception. This naturally raises the question of the precision required to claim meaningful security bounds. A naive analysis would require a precision of $O(\lambda)$ bits (notwithstanding logarithmic factors), but this would result in a substantially slower signature generation procedure.
In order to analyze the required precision, we use a R\'enyi divergence argument. As in \cite{C:MicWal17}, we denote by $a \lesssim b$ the fact that $a \leq b + o(b)$, which allows discarding negligible factors in a rigorous way. Our fast Fourier sampler is a recursive algorithm which relies on $2n$ discrete samplers $D_{\bZ,c_j,\sigma_j}$. We suppose that the values $c_j$ (resp. $\sigma_j$) are known with an \emph{absolute} error (resp. \emph{relative} error) at most $\dc$ (resp. $\ds$) and denote by $\cD$ (resp. $\err \cD$) the output distribution of our sampler with infinite (resp. finite) precision. We can then re-use the precision analysis of Klein's sampler in \cite[Section 4.5]{AC:Prest17}. For any output of our sampler with non-negligible probability, in the worst case:
\begin{equation}\label{eq:precisionbound}
\left| \log\left( \frac{\err \cD(\vecz)}{\cD(\vecz)}\right) \right| \lesssim 2n \left[ \frac{\sqrt{154}}{1.312} \dc + (2\pi + 1) \ds\right] \leq 20 n (\dc +\ds)
\end{equation}
In the average case, the value $2n$ in \eqref{eq:precisionbound} can be replaced with $\sqrt{ 2n}$. Following the security arguments of \cite[Section 3.3]{AC:Prest17}, this allows to claim that in average, no security loss is expected if $(\dc +\ds) \leq 2^{-46}$.
To check if this is the case for \falcon, we have run \falcon in two different precisions, a high precision of $200$ bits and a standard precision of $53$ bits, and compared the values of the $c_j,\sigma_j$'s. The result of these experiments is that we always have $(\dc +\ds) \leq 2^{-40}$: while this is higher than $2^{-46}$, the difference is of only $6$ bits. Therefore, we consider that $53$ bits of precision are sufficient for NIST's parameters (security level $\lambda \leq 256$, number of queries $q_s \leq 2^{64}$), and that the possibility of our signature procedure leaking information about the secret basis is a purely theoretic threat.
\section{Summary of Parameters}\label{sec:parametersummary}
In this section, we summarize the interplay between parameters. The resulting parameter selection process is automatized in {\small\tt Supporting\_Documentation/additional/parameters.py}, which also gives the core-SVP hardness of key recovery and forgery.
\begin{figure}[!htb]
\centering
\begin{tikzpicture}[every node/.style={draw=black,anchor=center}, align=center,>={Stealth}]
\matrix (m) [matrix of nodes,row sep=10mm,column sep = 30mm,draw=none, align=center, text width=60mm, minimum height=8mm, inner sep=0mm, rounded corners]
{
& Modulus: $q$ \\
Number of queries: $\queries$ & Gram-Schmidt norm: $\gsnorm{\matB}$ \\
Targeted security level: $\lambda$ & Signatures' standard deviation: $\sigma$ \\
Ring degree: $n$ & Signatures maximal norm: $\beta$ \\
& BKZ blocksize for forgery: $B$ \\
& Security levels for forgery \\
};
\draw[line] (m-4-1.west) -- ($(m-4-1.west)+(-10mm,0)$) -- ($(m-1-2.west)+(-100mm,0)$) -- node[above,draw=none] {\eqref{eq:q}} (m-1-2.west);
\draw[line] (m-1-2) -> node[right,draw=none] {\eqref{eq:gsnorm}} (m-2-2);
\draw[line] (m-2-2) -> node[right,draw=none] {\eqref{eq:sigma:1}} (m-3-2);
\draw[line] (m-3-2) -> node[right,draw=none] {\eqref{eq:beta}} (m-4-2);
\draw[line] (m-2-1) -> node[above,draw=none] {\eqref{eq:sigma:1}} (m-3-2);
\draw[line] (m-3-1) -> node[above,draw=none] {\eqref{eq:sigma:1}} (m-3-2);
\draw[line] (m-4-1) -> node[above,draw=none] {\eqref{eq:sigma:1}} (m-3-2);
\draw[line] (m-4-1) -> node[above,draw=none] {\eqref{eq:beta}} (m-4-2);
\draw[line] (m-4-1) -> node[above,draw=none] {\eqref{eq:blocksize_forgery}} (m-5-2);
\draw[line] (m-4-2) -> node[right,draw=none] {\eqref{eq:blocksize_forgery}} (m-5-2);
\draw[line] (m-5-2) -> node[right,draw=none] {\eqref{eq:sec_classic}, \eqref{eq:sec_quantum}} (m-6-2);
\end{tikzpicture}
\caption{Parameters of \falcon and security estimates. Initial parameters are on the left side of the figure. Parameters on the right side of the figure (which include concrete security estimates) are derived systematically from initial parameters.}\label{fig:}
\end{figure}
\paragraph{Number of queries $\queries$, targeted security level $\lambda$ and ring degree $n$.} We start with three initial parameters: the maximal number of signing queries $\queries$, the targeted security level $\lambda$ and the degree $n$ of the ring $\bZ[x]/(x^n + 1)$. As per \cite{NIST}, $\queries = 2^{64}$. Also as per \cite{NIST}, it suffices to take $\lambda = 128$ for NIST Level I and $\lambda = 256$ for NIST Level V. Finally, we take:
\begin{align}
& n = 512 && \text{for NIST Level I,}\\
& n = 1024 && \text{for NIST Level V.}
\end{align}
\paragraph{Integer modulus $q$.} The modulus $q$ needs to be a prime of the form $k \cdot 2n + 1$ in order to maximize the efficiency of the NTT. The smallest prime of this form is
\begin{equation}\label{eq:q}
q = 12 \cdot 1024 + 1 = 12289.
\end{equation}
For this value, $q$ has essentially no influence on security: it is large enough to resist hybrid attacks and trivial attacks on SIS, and small enough to resist overstetched NTRU attacks.
\paragraph{Gram-Schmidt norm $\gsnorm{\matB}$.} We wish to minimize $\gsnorm{\matB}$. It has been shown in \cite[Section 3]{AC:DucLyuPre14} that in practice we can ensure (upon resampling a finite number of times) that:
\begin{equation}\label{eq:gsnorm}
\gsnorm{\matB} \leq 1.17 \sqrt{q}.
\end{equation}
In order to do that, each coefficient of $f$ and $g$ is sampled from the discrete Gaussian $D_{\bZ, \sigmafg}$ with:
\begin{equation}\label{eq:sigmafg}
\sigmafg = 1.17 \sqrt{q/2n}.
\end{equation}
%until \eqref{eq:gsnorm} is verified.
\paragraph{Standard deviation $\sigma$ of the signatures.} Signatures are sampled from a discrete Gaussian distribution using the fast Fourier sampling algorithm (with $\matB$ as a basis and a standard deviation $\sigma$). It suffices to take $\epsilon \leq 1 / \sqrt{\queries \cdot \lambda}$ and:
\begin{align}
\sigma & = \frac{1}{\pi} \cdot \sqrt{\frac{\log(4n (1 + 1 / \epsilon))}{2}} \cdot 1.17 \cdot \sqrt{q} \label{eq:sigma:1} \\
& \geq \smooth(\bZ^{2n}) \cdot \gsnorm{\matB} \notag \label{eq:sigma:2}
\end{align}
Following \cite[Lemma 6]{AC:Prest17}, this ensures that $R_{2\lambda}(\cD \cdot \matB \| D_{\Lambda_q^\perp, \sigma, \vecc}) \lesssim 1 + O(1) / \queries$, where $\cD$ is the output of the sampler, $D_{\Lambda_q^\perp, \sigma, \vecc}$ is an ideal Gaussian and $R_{2\lambda}$ is the R\'enyi divergence between them. Following \cite[Section 3.3]{AC:Prest17}, $O(1)$ bits of security are lost by using our sampler instead of $D_{\Lambda_q^\perp, \sigma, \vecc}$.
\paragraph{Maximal norm $\beta$ of the signatures.} During the signing and verification procedures, signatures $(s_1, s_2)$ must verify $\|(s_1, s_2)\|^2 \leq \sqsignorm$ in order to be accepted, with:
\begin{equation}\label{eq:beta}
\beta = \sigrate \cdot \sigma \sqrt{2n}, \hspace*{10mm} \sigrate = \sigrateval
\end{equation}
We call $\sigrate$ the tailcut rate of signatures, because the expected value of $\|(s_1, s_2)\|$ is $\sigma \sqrt{2n}$; any signature larger than this expected value by a factor more than $\sigrate$ is rejected. By applying \cite[Lemma 4.4, Item 3]{EC:Lyubashevsky12}, the probability that a sampled signature is larger than $\beta$ (hence that the signing procedure has to restart) is upper bounded as follows:
\begin{equation}\label{eq:rejrate}
\bP[\|(s_1, s_2)\|^2 > \sqsignorm] \leq \sigrate^{2n} \cdot e^{n \left(1 - \sigrate^{2} \right)}.
\end{equation}