From aa2d0b7b2f91b7b79f38c0843c0e984a2bd82528 Mon Sep 17 00:00:00 2001 From: Bas Westerbaan Date: Sun, 17 Jun 2018 23:22:00 +0200 Subject: [PATCH] Add amd64 optimized FieldElement This field arithmatic is based on George Tankersley's patch[1] for Golang's crypto/ed25519, which in turn is based on amd64-51-30k from SUPERCOP. [1] https://go-review.googlesource.com/c/crypto/+/71950 Cf. #6 --- README.md | 7 +- edwards25519/curve.go | 50 +--- edwards25519/field.go | 430 ++------------------------------- edwards25519/field_amd64.go | 272 +++++++++++++++++++++ edwards25519/field_amd64.s | 342 +++++++++++++++++++++++++++ edwards25519/field_generic.go | 433 ++++++++++++++++++++++++++++++++++ edwards25519/field_test.go | 46 ++++ 7 files changed, 1121 insertions(+), 459 deletions(-) create mode 100644 edwards25519/field_amd64.go create mode 100644 edwards25519/field_amd64.s create mode 100644 edwards25519/field_generic.go diff --git a/README.md b/README.md index 3ce3023..cd6ad11 100644 --- a/README.md +++ b/README.md @@ -55,9 +55,14 @@ library by [Chuengsatiansup](https://perso.ens-lyon.fr/chitchanok.chuengsatiansup/), [Ribarski](http://panceribarski.com) and [Schwabe](https://cryptojedi.org/peter/index.shtml), -see [cref/cref.c](cref/cref.c). The field operations borrow +see [cref/cref.c](cref/cref.c). The generic field operations borrow from [Adam Langley](https://www.imperialviolet.org)'s [ed25519](http://github.com/agl/ed25519). +The amd64 optimized field arithmetic are from George Tankersley's +[ed25519 patch](https://go-review.googlesource.com/c/crypto/+/71950), +which in turn is based on SUPERCOP's +[amd64-51-30k](https://github.com/floodyberry/supercop/tree/master/crypto_sign/ed25519/amd64-51-30k) +by Bernstein, Duif, Lange, Schwabe and Yang. ### other platforms * [Rust](https://github.com/dalek-cryptography/curve25519-dalek) diff --git a/edwards25519/curve.go b/edwards25519/curve.go index 6ad9658..357c891 100644 --- a/edwards25519/curve.go +++ b/edwards25519/curve.go @@ -2,55 +2,6 @@ // Ristretto group is a subquotient. package edwards25519 -var ( - // parameter d of Edwards25519 - feD = FieldElement{ - -10913610, 13857413, -15372611, 6949391, 114729, - -8787816, -6275908, -3247719, -18696448, -12055116, - } - - // double feD - fe2D = FieldElement{ - -21827239, -5839606, -30745221, 13898782, 229458, - 15978800, -12551817, -6495438, 29715968, 9444199, - } - - // 1 / sqrt(-1-d) - feInvSqrtMinusDMinusOne = FieldElement{ - -6111485, -4156064, 27798727, -12243468, 25904040, - -120897, -20826367, 7060776, -6093568, 1986012, - } - - // (d-1)^2 - feDMinusOneSquared = FieldElement{ - 15551795, -11097455, -13425098, -10125071, -11896535, - 10178284, -26634327, 4729244, -5282110, -10116402, - } - - // 1 - d^2 - feOneMinusDSquared = FieldElement{ - 6275446, -16617371, -22938544, -3773710, 11667077, - 7397348, -27922721, 1766195, -24433858, 672203, - } - - epZero = ExtendedPoint{feZero, feOne, feOne, feZero} - - epBase = ExtendedPoint{ - FieldElement{-41032219, -27199451, -7502359, -2800332, -50176896, - -33336453, -33570123, -31949908, -53948439, -29257844}, - FieldElement{20163995, 28827709, 65616271, 30544542, 24400674, - 29683035, 27175815, 26206403, 10372291, 5663137}, - feOne, - FieldElement{38281802, 6116118, 27349572, 33310069, 58473857, - 22289538, 47757517, 20140834, 50497352, 6414979}, - } -) - -// // (X:Y:Z) satisfying x=X/Z, y=Y/Z. Aka P2. -// type ProjectivePoint struct { -// X, Y, Z FieldElement -// } - // (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, X*Y=Z*T. Aka P3. type ExtendedPoint struct { X, Y, Z, T FieldElement @@ -167,6 +118,7 @@ func (p *CompletedPoint) SetRistrettoElligator2(r0 *FieldElement) *CompletedPoin // sqrt is the inverse square root of N*D or of i*N*D. // b=1 iff n1 is square. ND.Mul(&N, &D) + b = sqrt.InvSqrtI(&ND) sqrt.Abs(&sqrt) diff --git a/edwards25519/field.go b/edwards25519/field.go index 580817d..67f1ab7 100644 --- a/edwards25519/field.go +++ b/edwards25519/field.go @@ -6,43 +6,6 @@ import ( "math/big" ) -// Element of the field GF(2^255 - 19) over which the elliptic -// curve Edwards25519 is defined. -type FieldElement [10]int32 - -var ( - feZero FieldElement - feOne = FieldElement{1, 0, 0, 0, 0, 0, 0, 0, 0, 0} - feMinusOne = FieldElement{-1, 0, 0, 0, 0, 0, 0, 0, 0, 0} - - // sqrt(-1) - feI = FieldElement{ - -32595792, -7943725, 9377950, 3500415, 12389472, - -272473, -25146209, -2005654, 326686, 11406482, - } - - // -sqrt(-1) - feMinusI = FieldElement{ - 32595792, 7943725, -9377950, -3500415, -12389472, - 272473, 25146209, 2005654, -326686, -11406482, - } -) - -// Sets fe to -a. Returns fe. -func (fe *FieldElement) Neg(a *FieldElement) *FieldElement { - fe[0] = -a[0] - fe[1] = -a[1] - fe[2] = -a[2] - fe[3] = -a[3] - fe[4] = -a[4] - fe[5] = -a[5] - fe[6] = -a[6] - fe[7] = -a[7] - fe[8] = -a[8] - fe[9] = -a[9] - return fe -} - // Set fe to i, the root of -1. Returns fe. func (fe *FieldElement) SetI() *FieldElement { copy(fe[:], feI[:]) @@ -66,41 +29,11 @@ func (fe *FieldElement) double(a *FieldElement) *FieldElement { return fe.add(a, a) } -// Sets fe to a + b without normalizing. Returns fe. -func (fe *FieldElement) add(a, b *FieldElement) *FieldElement { - fe[0] = a[0] + b[0] - fe[1] = a[1] + b[1] - fe[2] = a[2] + b[2] - fe[3] = a[3] + b[3] - fe[4] = a[4] + b[4] - fe[5] = a[5] + b[5] - fe[6] = a[6] + b[6] - fe[7] = a[7] + b[7] - fe[8] = a[8] + b[8] - fe[9] = a[9] + b[9] - return fe -} - // Sets fe to a + b. Returns fe. func (fe *FieldElement) Add(a, b *FieldElement) *FieldElement { return fe.add(a, b).normalize() } -// Sets fe to a - b without normalizing. Returns fe. -func (fe *FieldElement) sub(a, b *FieldElement) *FieldElement { - fe[0] = a[0] - b[0] - fe[1] = a[1] - b[1] - fe[2] = a[2] - b[2] - fe[3] = a[3] - b[3] - fe[4] = a[4] - b[4] - fe[5] = a[5] - b[5] - fe[6] = a[6] - b[6] - fe[7] = a[7] - b[7] - fe[8] = a[8] - b[8] - fe[9] = a[9] - b[9] - return fe -} - // Sets fe to a - b. Returns fe. func (fe *FieldElement) Sub(a, b *FieldElement) *FieldElement { return fe.sub(a, b).normalize() @@ -112,132 +45,6 @@ func (fe *FieldElement) Set(a *FieldElement) *FieldElement { return fe } -// Returns 1 if b < 0 and 0 otherwise. -func negative(b int32) int32 { - return (b >> 31) & 1 -} - -// Returns 1 if b == c and 0 otherwise. Assumes 2^15 <= b, c < 2^30. -func equal15(b, c int32) int32 { - ub := uint16(b) - uc := uint16(c) - x := uint32(ub ^ uc) - x-- - return int32(x >> 31) -} - -// Returns 1 if b == c and 0 otherwise. Assumes 0 <= b, c < 2^30. -func equal30(b, c int32) int32 { - x := uint32(b ^ c) - x-- - return int32(x >> 31) -} - -// Interprets a 3-byte unsigned little endian byte-slice as int64 -func load3(in []byte) int64 { - var r int64 - r = int64(in[0]) - r |= int64(in[1]) << 8 - r |= int64(in[2]) << 16 - return r -} - -// Interprets a 4-byte unsigned little endian byte-slice as int64 -func load4(in []byte) int64 { - var r int64 - r = int64(in[0]) - r |= int64(in[1]) << 8 - r |= int64(in[2]) << 16 - r |= int64(in[3]) << 24 - return r -} - -// Reduce the even coefficients to below 1.01*2^25 and the odd coefficients -// to below 1.01*2^24. Returns fe. -func (fe *FieldElement) normalize() *FieldElement { - return fe.setReduced( - int64(fe[0]), int64(fe[1]), int64(fe[2]), int64(fe[3]), int64(fe[4]), - int64(fe[5]), int64(fe[6]), int64(fe[7]), int64(fe[8]), int64(fe[9])) -} - -// Set fe to h0 + h1*2^26 + h2*2^51 + ... + h9*2^230. Requires a little -// headroom in the inputs to store the carries. Returns fe. -func (fe *FieldElement) setReduced( - h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) *FieldElement { - var c0, c1, c2, c3, c4, c5, c6, c7, c8, c9 int64 - - c0 = (h0 + (1 << 25)) >> 26 - h1 += c0 - h0 -= c0 << 26 - c4 = (h4 + (1 << 25)) >> 26 - h5 += c4 - h4 -= c4 << 26 - - c1 = (h1 + (1 << 24)) >> 25 - h2 += c1 - h1 -= c1 << 25 - c5 = (h5 + (1 << 24)) >> 25 - h6 += c5 - h5 -= c5 << 25 - - c2 = (h2 + (1 << 25)) >> 26 - h3 += c2 - h2 -= c2 << 26 - c6 = (h6 + (1 << 25)) >> 26 - h7 += c6 - h6 -= c6 << 26 - - c3 = (h3 + (1 << 24)) >> 25 - h4 += c3 - h3 -= c3 << 25 - c7 = (h7 + (1 << 24)) >> 25 - h8 += c7 - h7 -= c7 << 25 - - c4 = (h4 + (1 << 25)) >> 26 - h5 += c4 - h4 -= c4 << 26 - c8 = (h8 + (1 << 25)) >> 26 - h9 += c8 - h8 -= c8 << 26 - - c9 = (h9 + (1 << 24)) >> 25 - h0 += c9 * 19 - h9 -= c9 << 25 - - c0 = (h0 + (1 << 25)) >> 26 - h1 += c0 - h0 -= c0 << 26 - - fe[0] = int32(h0) - fe[1] = int32(h1) - fe[2] = int32(h2) - fe[3] = int32(h3) - fe[4] = int32(h4) - fe[5] = int32(h5) - fe[6] = int32(h6) - fe[7] = int32(h7) - fe[8] = int32(h8) - fe[9] = int32(h9) - - return fe -} - -// Set fe to a if b == 1. Requires b to be either 0 or 1. -func (fe *FieldElement) ConditionalSet(a *FieldElement, b int32) { - b = -b // b == 0b11111111111111111111111111111111 or 0. - fe[0] ^= b & (fe[0] ^ a[0]) - fe[1] ^= b & (fe[1] ^ a[1]) - fe[2] ^= b & (fe[2] ^ a[2]) - fe[3] ^= b & (fe[3] ^ a[3]) - fe[4] ^= b & (fe[4] ^ a[4]) - fe[5] ^= b & (fe[5] ^ a[5]) - fe[6] ^= b & (fe[6] ^ a[6]) - fe[7] ^= b & (fe[7] ^ a[7]) - fe[8] ^= b & (fe[8] ^ a[8]) - fe[9] ^= b & (fe[9] ^ a[9]) -} - // Returns little endian representation of fe. func (fe *FieldElement) Bytes() [32]byte { var ret [32]byte @@ -245,222 +52,6 @@ func (fe *FieldElement) Bytes() [32]byte { return ret } -// Write fe to s in little endian. Returns fe. -func (fe *FieldElement) BytesInto(s *[32]byte) *FieldElement { - var carry [10]int32 - - q := (19*fe[9] + (1 << 24)) >> 25 - q = (fe[0] + q) >> 26 - q = (fe[1] + q) >> 25 - q = (fe[2] + q) >> 26 - q = (fe[3] + q) >> 25 - q = (fe[4] + q) >> 26 - q = (fe[5] + q) >> 25 - q = (fe[6] + q) >> 26 - q = (fe[7] + q) >> 25 - q = (fe[8] + q) >> 26 - q = (fe[9] + q) >> 25 - - fe[0] += 19 * q - - carry[0] = fe[0] >> 26 - fe[1] += carry[0] - fe[0] -= carry[0] << 26 - carry[1] = fe[1] >> 25 - fe[2] += carry[1] - fe[1] -= carry[1] << 25 - carry[2] = fe[2] >> 26 - fe[3] += carry[2] - fe[2] -= carry[2] << 26 - carry[3] = fe[3] >> 25 - fe[4] += carry[3] - fe[3] -= carry[3] << 25 - carry[4] = fe[4] >> 26 - fe[5] += carry[4] - fe[4] -= carry[4] << 26 - carry[5] = fe[5] >> 25 - fe[6] += carry[5] - fe[5] -= carry[5] << 25 - carry[6] = fe[6] >> 26 - fe[7] += carry[6] - fe[6] -= carry[6] << 26 - carry[7] = fe[7] >> 25 - fe[8] += carry[7] - fe[7] -= carry[7] << 25 - carry[8] = fe[8] >> 26 - fe[9] += carry[8] - fe[8] -= carry[8] << 26 - carry[9] = fe[9] >> 25 - fe[9] -= carry[9] << 25 - - s[0] = byte(fe[0] >> 0) - s[1] = byte(fe[0] >> 8) - s[2] = byte(fe[0] >> 16) - s[3] = byte((fe[0] >> 24) | (fe[1] << 2)) - s[4] = byte(fe[1] >> 6) - s[5] = byte(fe[1] >> 14) - s[6] = byte((fe[1] >> 22) | (fe[2] << 3)) - s[7] = byte(fe[2] >> 5) - s[8] = byte(fe[2] >> 13) - s[9] = byte((fe[2] >> 21) | (fe[3] << 5)) - s[10] = byte(fe[3] >> 3) - s[11] = byte(fe[3] >> 11) - s[12] = byte((fe[3] >> 19) | (fe[4] << 6)) - s[13] = byte(fe[4] >> 2) - s[14] = byte(fe[4] >> 10) - s[15] = byte(fe[4] >> 18) - s[16] = byte(fe[5] >> 0) - s[17] = byte(fe[5] >> 8) - s[18] = byte(fe[5] >> 16) - s[19] = byte((fe[5] >> 24) | (fe[6] << 1)) - s[20] = byte(fe[6] >> 7) - s[21] = byte(fe[6] >> 15) - s[22] = byte((fe[6] >> 23) | (fe[7] << 3)) - s[23] = byte(fe[7] >> 5) - s[24] = byte(fe[7] >> 13) - s[25] = byte((fe[7] >> 21) | (fe[8] << 4)) - s[26] = byte(fe[8] >> 4) - s[27] = byte(fe[8] >> 12) - s[28] = byte((fe[8] >> 20) | (fe[9] << 6)) - s[29] = byte(fe[9] >> 2) - s[30] = byte(fe[9] >> 10) - s[31] = byte(fe[9] >> 18) - return fe -} - -// Sets fe to the little endian number encoded in buf modulo 2^255-19. -// Ignores the highest bit in buf. Returns fe. -func (fe *FieldElement) SetBytes(buf *[32]byte) *FieldElement { - return fe.setReduced( - load4(buf[:]), - load3(buf[4:])<<6, - load3(buf[7:])<<5, - load3(buf[10:])<<3, - load3(buf[13:])<<2, - load4(buf[16:]), - load3(buf[20:])<<7, - load3(buf[23:])<<5, - load3(buf[26:])<<4, - (load3(buf[29:])&8388607)<<2, - ) -} - -// Sets fe to a * b. Returns fe. -func (fe *FieldElement) Mul(a, b *FieldElement) *FieldElement { - a0 := int64(a[0]) - a1 := int64(a[1]) - a2 := int64(a[2]) - a3 := int64(a[3]) - a4 := int64(a[4]) - a5 := int64(a[5]) - a6 := int64(a[6]) - a7 := int64(a[7]) - a8 := int64(a[8]) - a9 := int64(a[9]) - - a1_2 := int64(2 * a[1]) - a3_2 := int64(2 * a[3]) - a5_2 := int64(2 * a[5]) - a7_2 := int64(2 * a[7]) - a9_2 := int64(2 * a[9]) - - b0 := int64(b[0]) - b1 := int64(b[1]) - b2 := int64(b[2]) - b3 := int64(b[3]) - b4 := int64(b[4]) - b5 := int64(b[5]) - b6 := int64(b[6]) - b7 := int64(b[7]) - b8 := int64(b[8]) - b9 := int64(b[9]) - - b1_19 := int64(19 * b[1]) - b2_19 := int64(19 * b[2]) - b3_19 := int64(19 * b[3]) - b4_19 := int64(19 * b[4]) - b5_19 := int64(19 * b[5]) - b6_19 := int64(19 * b[6]) - b7_19 := int64(19 * b[7]) - b8_19 := int64(19 * b[8]) - b9_19 := int64(19 * b[9]) - - h0 := a0*b0 + a1_2*b9_19 + a2*b8_19 + a3_2*b7_19 + a4*b6_19 + a5_2*b5_19 + a6*b4_19 + a7_2*b3_19 + a8*b2_19 + a9_2*b1_19 - h1 := a0*b1 + a1*b0 + a2*b9_19 + a3*b8_19 + a4*b7_19 + a5*b6_19 + a6*b5_19 + a7*b4_19 + a8*b3_19 + a9*b2_19 - h2 := a0*b2 + a1_2*b1 + a2*b0 + a3_2*b9_19 + a4*b8_19 + a5_2*b7_19 + a6*b6_19 + a7_2*b5_19 + a8*b4_19 + a9_2*b3_19 - h3 := a0*b3 + a1*b2 + a2*b1 + a3*b0 + a4*b9_19 + a5*b8_19 + a6*b7_19 + a7*b6_19 + a8*b5_19 + a9*b4_19 - h4 := a0*b4 + a1_2*b3 + a2*b2 + a3_2*b1 + a4*b0 + a5_2*b9_19 + a6*b8_19 + a7_2*b7_19 + a8*b6_19 + a9_2*b5_19 - h5 := a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0 + a6*b9_19 + a7*b8_19 + a8*b7_19 + a9*b6_19 - h6 := a0*b6 + a1_2*b5 + a2*b4 + a3_2*b3 + a4*b2 + a5_2*b1 + a6*b0 + a7_2*b9_19 + a8*b8_19 + a9_2*b7_19 - h7 := a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0 + a8*b9_19 + a9*b8_19 - h8 := a0*b8 + a1_2*b7 + a2*b6 + a3_2*b5 + a4*b4 + a5_2*b3 + a6*b2 + a7_2*b1 + a8*b0 + a9_2*b9_19 - h9 := a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0 - - return fe.setReduced(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) -} - -// Returns the unnormalized coefficients of fe^2. -func (fe *FieldElement) square() (h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) { - f0 := int64(fe[0]) - f1 := int64(fe[1]) - f2 := int64(fe[2]) - f3 := int64(fe[3]) - f4 := int64(fe[4]) - f5 := int64(fe[5]) - f6 := int64(fe[6]) - f7 := int64(fe[7]) - f8 := int64(fe[8]) - f9 := int64(fe[9]) - f0_2 := int64(2 * fe[0]) - f1_2 := int64(2 * fe[1]) - f2_2 := int64(2 * fe[2]) - f3_2 := int64(2 * fe[3]) - f4_2 := int64(2 * fe[4]) - f5_2 := int64(2 * fe[5]) - f6_2 := int64(2 * fe[6]) - f7_2 := int64(2 * fe[7]) - f5_38 := 38 * f5 - f6_19 := 19 * f6 - f7_38 := 38 * f7 - f8_19 := 19 * f8 - f9_38 := 38 * f9 - - h0 = f0*f0 + f1_2*f9_38 + f2_2*f8_19 + f3_2*f7_38 + f4_2*f6_19 + f5*f5_38 - h1 = f0_2*f1 + f2*f9_38 + f3_2*f8_19 + f4*f7_38 + f5_2*f6_19 - h2 = f0_2*f2 + f1_2*f1 + f3_2*f9_38 + f4_2*f8_19 + f5_2*f7_38 + f6*f6_19 - h3 = f0_2*f3 + f1_2*f2 + f4*f9_38 + f5_2*f8_19 + f6*f7_38 - h4 = f0_2*f4 + f1_2*f3_2 + f2*f2 + f5_2*f9_38 + f6_2*f8_19 + f7*f7_38 - h5 = f0_2*f5 + f1_2*f4 + f2_2*f3 + f6*f9_38 + f7_2*f8_19 - h6 = f0_2*f6 + f1_2*f5_2 + f2_2*f4 + f3_2*f3 + f7_2*f9_38 + f8*f8_19 - h7 = f0_2*f7 + f1_2*f6 + f2_2*f5 + f3_2*f4 + f8*f9_38 - h8 = f0_2*f8 + f1_2*f7_2 + f2_2*f6 + f3_2*f5_2 + f4*f4 + f9*f9_38 - h9 = f0_2*f9 + f1_2*f8 + f2_2*f7 + f3_2*f6 + f4_2*f5 - - return -} - -// Sets fe to a^2. Returns fe. -func (fe *FieldElement) Square(a *FieldElement) *FieldElement { - h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := a.square() - return fe.setReduced(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) -} - -// Sets fe to 2 * a^2. Returns fe. -func (fe *FieldElement) DoubledSquare(a *FieldElement) *FieldElement { - h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := a.square() - h0 += h0 - h1 += h1 - h2 += h2 - h3 += h3 - h4 += h4 - h5 += h5 - h6 += h6 - h7 += h7 - h8 += h8 - h9 += h9 - return fe.setReduced(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) -} - // Set fe to the inverse of a. Return fe. func (fe *FieldElement) Inverse(a *FieldElement) *FieldElement { var t0, t1, t2, t3 FieldElement @@ -715,3 +306,24 @@ func (fe *FieldElement) InvSqrtI(a *FieldElement) int32 { return inCaseA + inCaseB } + +// Returns 1 if b == c and 0 otherwise. Assumes 0 <= b, c < 2^30. +func equal30(b, c int32) int32 { + x := uint32(b ^ c) + x-- + return int32(x >> 31) +} + +// Returns 1 if b == c and 0 otherwise. Assumes 2^15 <= b, c < 2^30. +func equal15(b, c int32) int32 { + ub := uint16(b) + uc := uint16(c) + x := uint32(ub ^ uc) + x-- + return int32(x >> 31) +} + +// Returns 1 if b < 0 and 0 otherwise. +func negative(b int32) int32 { + return (b >> 31) & 1 +} diff --git a/edwards25519/field_amd64.go b/edwards25519/field_amd64.go new file mode 100644 index 0000000..5e1d75e --- /dev/null +++ b/edwards25519/field_amd64.go @@ -0,0 +1,272 @@ +// +build amd64 + +package edwards25519 + +// Element of the field GF(2^255 - 19) over which the elliptic +// curve Edwards25519 is defined. +type FieldElement [5]uint64 + +var ( + feZero FieldElement + feOne = FieldElement{1, 0, 0, 0, 0} + feMinusOne = FieldElement{2251799813685228, 2251799813685247, + 2251799813685247, 2251799813685247, 2251799813685247} + + // sqrt(-1) + feI = FieldElement{ + 1718705420411056, 234908883556509, 2233514472574048, + 2117202627021982, 765476049583133, + } + + // -sqrt(-1) + feMinusI = FieldElement{ + 533094393274173, 2016890930128738, 18285341111199, + 134597186663265, 1486323764102114, + } + + // parameter d of Edwards25519 + feD = FieldElement{ + 929955233495203, 466365720129213, 1662059464998953, + 2033849074728123, 1442794654840575, + } + + // double feD + fe2D = FieldElement{ + 1859910466990425, 932731440258426, 1072319116312658, + 1815898335770999, 633789495995903, + } + + // 1 / sqrt(-1-d) + feInvSqrtMinusDMinusOne = FieldElement{ + 1972891073822467, 1430154612583622, 2243686579258279, + 473840635492096, 133279003116800, + } + + // (d-1)^2 + feDMinusOneSquared = FieldElement{ + 1507062230895904, 1572317787530805, 683053064812840, + 317374165784489, 1572899562415810, + } + + // 1 - d^2 + feOneMinusDSquared = FieldElement{ + 1136626929484150, 1998550399581263, 496427632559748, + 118527312129759, 45110755273534, + } + + epZero = ExtendedPoint{feZero, feOne, feOne, feZero} + + epBase = ExtendedPoint{ + FieldElement{2678275328304575, 4315672520525287, 2266428086574206, + 2359477563015859, 2540138899492839}, + FieldElement{1934594822876571, 2049809580636559, 1991994783322914, + 1758681962032007, 380046701118659}, + feOne, + FieldElement{410445769351754, 2235400917701188, 1495825632738689, + 1351628537510093, 430502003771208}, + } +) + +//go:noescape +func feMul(out, a, b *FieldElement) + +//go:noescape +func feSquare(out, a *FieldElement) + +// Neg sets fe to -a. Returns fe. +func (fe *FieldElement) Neg(a *FieldElement) *FieldElement { + var t FieldElement + t = *a + + t[1] += t[0] >> 51 + t[0] = t[0] & 0x7ffffffffffff + t[2] += t[1] >> 51 + t[1] = t[1] & 0x7ffffffffffff + t[3] += t[2] >> 51 + t[2] = t[2] & 0x7ffffffffffff + t[4] += t[3] >> 51 + t[3] = t[3] & 0x7ffffffffffff + t[0] += (t[4] >> 51) * 19 + t[4] = t[4] & 0x7ffffffffffff + + fe[0] = 0xfffffffffffda - t[0] + fe[1] = 0xffffffffffffe - t[1] + fe[2] = 0xffffffffffffe - t[2] + fe[3] = 0xffffffffffffe - t[3] + fe[4] = 0xffffffffffffe - t[4] + + return fe +} + +// Sets fe to a-b. Returns fe. +func (fe *FieldElement) sub(a, b *FieldElement) *FieldElement { + var t FieldElement + t = *b + + t[1] += t[0] >> 51 + t[0] = t[0] & 0x7ffffffffffff + t[2] += t[1] >> 51 + t[1] = t[1] & 0x7ffffffffffff + t[3] += t[2] >> 51 + t[2] = t[2] & 0x7ffffffffffff + t[4] += t[3] >> 51 + t[3] = t[3] & 0x7ffffffffffff + t[0] += (t[4] >> 51) * 19 + t[4] = t[4] & 0x7ffffffffffff + + fe[0] = (a[0] + 0xfffffffffffda) - t[0] + fe[1] = (a[1] + 0xffffffffffffe) - t[1] + fe[2] = (a[2] + 0xffffffffffffe) - t[2] + fe[3] = (a[3] + 0xffffffffffffe) - t[3] + fe[4] = (a[4] + 0xffffffffffffe) - t[4] + + return fe +} + +// Sets fe to a + b without normalizing. Returns fe. +func (fe *FieldElement) add(a, b *FieldElement) *FieldElement { + fe[0] = a[0] + b[0] + fe[1] = a[1] + b[1] + fe[2] = a[2] + b[2] + fe[3] = a[3] + b[3] + fe[4] = a[4] + b[4] + return fe +} + +// Reduce the even coefficients. Returns fe. +func (fe *FieldElement) normalize() *FieldElement { + return fe.setReduced(fe) +} + +// Set fe to a reduced version of a. Returns fe. +func (fe *FieldElement) setReduced(a *FieldElement) *FieldElement { + *fe = *a + + fe[1] += fe[0] >> 51 + fe[0] = fe[0] & 0x7ffffffffffff + fe[2] += fe[1] >> 51 + fe[1] = fe[1] & 0x7ffffffffffff + fe[3] += fe[2] >> 51 + fe[2] = fe[2] & 0x7ffffffffffff + fe[4] += fe[3] >> 51 + fe[3] = fe[3] & 0x7ffffffffffff + fe[0] += (fe[4] >> 51) * 19 + fe[4] = fe[4] & 0x7ffffffffffff + + c := (fe[0] + 19) >> 51 + c = (fe[1] + c) >> 51 + c = (fe[2] + c) >> 51 + c = (fe[3] + c) >> 51 + c = (fe[4] + c) >> 51 + + fe[0] += 19 * c + + fe[1] += fe[0] >> 51 + fe[0] = fe[0] & 0x7ffffffffffff + fe[2] += fe[1] >> 51 + fe[1] = fe[1] & 0x7ffffffffffff + fe[3] += fe[2] >> 51 + fe[2] = fe[2] & 0x7ffffffffffff + fe[4] += fe[3] >> 51 + fe[3] = fe[3] & 0x7ffffffffffff + fe[4] = fe[4] & 0x7ffffffffffff + + return fe +} + +// Set fe to a if b == 1. Requires b to be either 0 or 1. +func (fe *FieldElement) ConditionalSet(a *FieldElement, b int32) { + b2 := uint64(1-b) - 1 + fe[0] ^= b2 & (fe[0] ^ a[0]) + fe[1] ^= b2 & (fe[1] ^ a[1]) + fe[2] ^= b2 & (fe[2] ^ a[2]) + fe[3] ^= b2 & (fe[3] ^ a[3]) + fe[4] ^= b2 & (fe[4] ^ a[4]) +} + +// Write fe to s in little endian. Returns fe. +func (fe *FieldElement) BytesInto(s *[32]byte) *FieldElement { + var t FieldElement + t.setReduced(fe) + + s[0] = byte(t[0] & 0xff) + s[1] = byte((t[0] >> 8) & 0xff) + s[2] = byte((t[0] >> 16) & 0xff) + s[3] = byte((t[0] >> 24) & 0xff) + s[4] = byte((t[0] >> 32) & 0xff) + s[5] = byte((t[0] >> 40) & 0xff) + s[6] = byte((t[0] >> 48)) + s[6] ^= byte((t[1] << 3) & 0xf8) + s[7] = byte((t[1] >> 5) & 0xff) + s[8] = byte((t[1] >> 13) & 0xff) + s[9] = byte((t[1] >> 21) & 0xff) + s[10] = byte((t[1] >> 29) & 0xff) + s[11] = byte((t[1] >> 37) & 0xff) + s[12] = byte((t[1] >> 45)) + s[12] ^= byte((t[2] << 6) & 0xc0) + s[13] = byte((t[2] >> 2) & 0xff) + s[14] = byte((t[2] >> 10) & 0xff) + s[15] = byte((t[2] >> 18) & 0xff) + s[16] = byte((t[2] >> 26) & 0xff) + s[17] = byte((t[2] >> 34) & 0xff) + s[18] = byte((t[2] >> 42) & 0xff) + s[19] = byte((t[2] >> 50)) + s[19] ^= byte((t[3] << 1) & 0xfe) + s[20] = byte((t[3] >> 7) & 0xff) + s[21] = byte((t[3] >> 15) & 0xff) + s[22] = byte((t[3] >> 23) & 0xff) + s[23] = byte((t[3] >> 31) & 0xff) + s[24] = byte((t[3] >> 39) & 0xff) + s[25] = byte((t[3] >> 47)) + s[25] ^= byte((t[4] << 4) & 0xf0) + s[26] = byte((t[4] >> 4) & 0xff) + s[27] = byte((t[4] >> 12) & 0xff) + s[28] = byte((t[4] >> 20) & 0xff) + s[29] = byte((t[4] >> 28) & 0xff) + s[30] = byte((t[4] >> 36) & 0xff) + s[31] = byte((t[4] >> 44)) + return fe +} + +// Sets fe to the little endian number encoded in buf modulo 2^255-19. +// Ignores the highest bit in buf. Returns fe. +func (fe *FieldElement) SetBytes(buf *[32]byte) *FieldElement { + fe[0] = (uint64(buf[0]) | (uint64(buf[1]) << 8) | (uint64(buf[2]) << 16) | + (uint64(buf[3]) << 24) | (uint64(buf[4]) << 32) | + (uint64(buf[5]) << 40) | (uint64(buf[6]&7) << 48)) + fe[1] = ((uint64(buf[6]) >> 3) | (uint64(buf[7]) << 5) | + (uint64(buf[8]) << 13) | (uint64(buf[9]) << 21) | + (uint64(buf[10]) << 29) | (uint64(buf[11]) << 37) | + (uint64(buf[12]&63) << 45)) + fe[2] = ((uint64(buf[12]) >> 6) | (uint64(buf[13]) << 2) | + (uint64(buf[14]) << 10) | (uint64(buf[15]) << 18) | + (uint64(buf[16]) << 26) | (uint64(buf[17]) << 34) | + (uint64(buf[18]) << 42) | (uint64(buf[19]&1) << 50)) + fe[3] = ((uint64(buf[19]) >> 1) | (uint64(buf[20]) << 7) | + (uint64(buf[21]) << 15) | (uint64(buf[22]) << 23) | + (uint64(buf[23]) << 31) | (uint64(buf[24]) << 39) | + (uint64(buf[25]&15) << 47)) + fe[4] = ((uint64(buf[25]) >> 4) | (uint64(buf[26]) << 4) | + (uint64(buf[27]) << 12) | (uint64(buf[28]) << 20) | + (uint64(buf[29]) << 28) | (uint64(buf[30]) << 36) | + (uint64(buf[31]&127) << 44)) + return fe +} + +// Sets fe to a * b. Returns fe. +func (fe *FieldElement) Mul(a, b *FieldElement) *FieldElement { + feMul(fe, a, b) + return fe +} + +// Sets fe to a^2. Returns fe. +func (fe *FieldElement) Square(a *FieldElement) *FieldElement { + feSquare(fe, a) + return fe +} + +// Sets fe to 2 * a^2. Returns fe. +func (fe *FieldElement) DoubledSquare(a *FieldElement) *FieldElement { + feSquare(fe, a) + return fe.add(fe, fe) +} diff --git a/edwards25519/field_amd64.s b/edwards25519/field_amd64.s new file mode 100644 index 0000000..4f99cba --- /dev/null +++ b/edwards25519/field_amd64.s @@ -0,0 +1,342 @@ +//+build amd64 + +#include "textflag.h" + +// func feMul(outp *uint64, xp *uint64, yp *uint64) +TEXT ·feMul(SB),NOSPLIT,$0 + MOVQ outp+0(FP), DI + MOVQ xp+8(FP), BX + MOVQ yp+16(FP), CX + + // Calculate r0 + MOVQ 0(BX), AX // rax <-- x0 + MULQ 0(CX) // rdx, rax <-- x0*y0 + MOVQ AX, SI // r00 = rax + MOVQ DX, BP // r01 = rdx + + MOVQ 8(BX), DX // rdx <-- x1 + IMUL3Q $19, DX, AX // rax <-- x1*19 + MULQ 32(CX) // rdx, rax <-- x1_19*y4 + ADDQ AX, SI // r00 += rax + ADCQ DX, BP // r01 += rdx + + MOVQ 16(BX), DX // rdx <-- x2 + IMUL3Q $19, DX, AX // rax <-- x2*19 + MULQ 24(CX) // rdx, rax <-- x2_19*y3 + ADDQ AX, SI // r00 += rax + ADCQ DX, BP // r01 += rdx + + MOVQ 24(BX), DX // rdx <-- x3 + IMUL3Q $19, DX, AX // rax <-- x3*19 + MULQ 16(CX) // rdx, rax <-- x3_19 * y2 + ADDQ AX, SI // r00 += rax + ADCQ DX, BP // r01 += rdx + + MOVQ 32(BX), DX // rdx <-- x4 + IMUL3Q $19, DX, AX // rax <-- x4*19 + MULQ 8(CX) // rdx rax <-- x4_19*y1 + ADDQ AX, SI // r00 += rax + ADCQ DX, BP // r01 += rdx + + // Calculate r1 + MOVQ 0(BX), AX + MULQ 8(CX) + MOVQ AX, R8 // r10 + MOVQ DX, R9 // r11 + + MOVQ 8(BX), AX + MULQ 0(CX) + ADDQ AX, R8 + ADCQ DX, R9 + + MOVQ 16(BX), DX + IMUL3Q $19, DX, AX + MULQ 32(CX) + ADDQ AX, R8 + ADCQ DX, R9 + + MOVQ 24(BX), DX + IMUL3Q $19, DX, AX + MULQ 24(CX) + ADDQ AX, R8 + ADCQ DX, R9 + + MOVQ 32(BX), DX + IMUL3Q $19, DX, AX + MULQ 16(CX) + ADDQ AX, R8 + ADCQ DX, R9 + + // Calculate r2 + MOVQ 0(BX), AX + MULQ 16(CX) + MOVQ AX, R10 // r20 + MOVQ DX, R11 // r21 + + MOVQ 8(BX), AX + MULQ 8(CX) + ADDQ AX, R10 + ADCQ DX, R11 + + MOVQ 16(BX), AX + MULQ 0(CX) + ADDQ AX, R10 + ADCQ DX, R11 + + MOVQ 24(BX), DX + IMUL3Q $19, DX, AX + MULQ 32(CX) + ADDQ AX, R10 + ADCQ DX, R11 + + MOVQ 32(BX), DX + IMUL3Q $19, DX, AX + MULQ 24(CX) + ADDQ AX, R10 + ADCQ DX, R11 + + // Calculate r3 + MOVQ 0(BX), AX + MULQ 24(CX) + MOVQ AX, R12 // r30 + MOVQ DX, R13 // r31 + + MOVQ 8(BX), AX + MULQ 16(CX) + ADDQ AX, R12 + ADCQ DX, R13 + + MOVQ 16(BX), AX + MULQ 8(CX) + ADDQ AX, R12 + ADCQ DX, R13 + + MOVQ 24(BX), AX + MULQ 0(CX) + ADDQ AX, R12 + ADCQ DX, R13 + + MOVQ 32(BX), DX + IMUL3Q $19, DX, AX + MULQ 32(CX) + ADDQ AX, R12 + ADCQ DX, R13 + + // Calculate r4 + MOVQ 0(BX), AX + MULQ 32(CX) + MOVQ AX, R14 // r40 + MOVQ DX, R15 // r41 + + MOVQ 8(BX), AX + MULQ 24(CX) + ADDQ AX, R14 + ADCQ DX, R15 + + MOVQ 16(BX), AX + MULQ 16(CX) + ADDQ AX, R14 + ADCQ DX, R15 + + MOVQ 24(BX), AX + MULQ 8(CX) + ADDQ AX, R14 + ADCQ DX, R15 + + MOVQ 32(BX), AX + MULQ 0(CX) + ADDQ AX, R14 + ADCQ DX, R15 + + + MOVQ $2251799813685247, AX // (1<<51) - 1 + SHLQ $13, SI, BP // r01 = shld with r00 + ANDQ AX, SI // r00 &= mask51 + SHLQ $13, R8, R9 // r11 = shld with r10 + ANDQ AX, R8 // r10 &= mask51 + ADDQ BP, R8 // r10 += r01 + SHLQ $13, R10, R11 // r21 = shld with r20 + ANDQ AX, R10 // r20 &= mask51 + ADDQ R9, R10 // r20 += r11 + SHLQ $13, R12, R13 // r31 = shld with r30 + ANDQ AX, R12 // r30 &= mask51 + ADDQ R11, R12 // r30 += r21 + SHLQ $13, R14, R15 // r41 = shld with r40 + ANDQ AX, R14 // r40 &= mask51 + ADDQ R13, R14 // r40 += r31 + IMUL3Q $19, R15, R15 // r41 = r41*19 + ADDQ R15, SI // r00 += r41 + + MOVQ SI, DX // rdx <-- r00 + SHRQ $51, DX // rdx <-- r00 >> 51 + ADDQ DX, R8 // r10 += r00 >> 51 + MOVQ R8, DX // rdx <-- r10 + SHRQ $51, DX // rdx <-- r10 >> 51 + ANDQ AX, SI // r00 &= mask51 + ADDQ DX, R10 // r20 += r10 >> 51 + MOVQ R10, DX // rdx <-- r20 + SHRQ $51, DX // rdx <-- r20 >> 51 + ANDQ AX, R8 // r10 &= mask51 + ADDQ DX, R12 // r30 += r20 >> 51 + MOVQ R12, DX // rdx <-- r30 + SHRQ $51, DX // rdx <-- r30 >> 51 + ANDQ AX, R10 // r20 &= mask51 + ADDQ DX, R14 // r40 += r30 >> 51 + MOVQ R14, DX // rdx <-- r40 + SHRQ $51, DX // rdx <-- r40 >> 51 + ANDQ AX, R12 // r30 &= mask51 + IMUL3Q $19, DX, DX // rdx <-- (r40 >> 51) * 19 + ADDQ DX, SI // r00 += (r40 >> 51) *19 + ANDQ AX, R14 // r40 &= mask51 + + MOVQ SI, 0(DI) + MOVQ R8, 8(DI) + MOVQ R10, 16(DI) + MOVQ R12, 24(DI) + MOVQ R14, 32(DI) + RET + +// func feSquare(outp *uint64, xp *uint64) +TEXT ·feSquare(SB),NOSPLIT,$0 + MOVQ outp+0(FP), DI + MOVQ xp+8(FP), SI + + // r0 = x0*x0 + x1*38*x4 + x2*38*x3 + MOVQ 0(SI), AX + MULQ 0(SI) + MOVQ AX, CX // r00 + MOVQ DX, R8 // r01 + + MOVQ 8(SI), DX + IMUL3Q $38, DX, AX + MULQ 32(SI) + ADDQ AX, CX + ADCQ DX, R8 + + MOVQ 16(SI), DX + IMUL3Q $38, DX, AX + MULQ 24(SI) + ADDQ AX, CX + ADCQ DX, R8 + + // r1 = x0*2*x1 + x2*38*x4 + x3*19*x3 + MOVQ 0(SI), AX + SHLQ $1, AX + MULQ 8(SI) + MOVQ AX, R9 // r10 + MOVQ DX, R10 // r11 + + MOVQ 16(SI), DX + IMUL3Q $38, DX, AX + MULQ 32(SI) + ADDQ AX, R9 + ADCQ DX, R10 + + MOVQ 24(SI), DX + IMUL3Q $19, DX, AX + MULQ 24(SI) + ADDQ AX, R9 + ADCQ DX, R10 + + // r2 = x0*2*x2 + x1*x1 + x3*38*x4 + MOVQ 0(SI), AX + SHLQ $1, AX + MULQ 16(SI) + MOVQ AX, R11 // r20 + MOVQ DX, R12 // r21 + + MOVQ 8(SI), AX + MULQ 8(SI) + ADDQ AX, R11 + ADCQ DX, R12 + + MOVQ 24(SI), DX + IMUL3Q $38, DX, AX + MULQ 32(SI) + ADDQ AX, R11 + ADCQ DX, R12 + + // r3 = x0*2*x3 + x1*2*x2 + x4*19*x4 + MOVQ 0(SI), AX + SHLQ $1, AX + MULQ 24(SI) + MOVQ AX, R13 // r30 + MOVQ DX, R14 // r31 + + MOVQ 8(SI), AX + SHLQ $1, AX + MULQ 16(SI) + ADDQ AX, R13 + ADCQ DX, R14 + + MOVQ 32(SI), DX + IMUL3Q $19, DX, AX + MULQ 32(SI) + ADDQ AX, R13 + ADCQ DX, R14 + + // r4 = x0*2*x4 + x1*2*x3 + x2*x2 + MOVQ 0(SI), AX + SHLQ $1, AX + MULQ 32(SI) + MOVQ AX, R15 // r40 + MOVQ DX, BX // r41 + + MOVQ 8(SI), AX + SHLQ $1, AX + MULQ 24(SI) + ADDQ AX, R15 + ADCQ DX, BX + + MOVQ 16(SI), AX + MULQ 16(SI) + ADDQ AX, R15 + ADCQ DX, BX + + // Reduce + MOVQ $2251799813685247, AX // (1<<51) - 1 + SHLQ $13, CX, R8 // r01 = shld with r00 + ANDQ AX, CX // r00 &= mask51 + SHLQ $13, R9, R10 // r11 = shld with r10 + ANDQ AX, R9 // r10 &= mask51 + ADDQ R8, R9 // r10 += r01 + SHLQ $13, R11, R12 // r21 = shld with r20 + ANDQ AX, R11 // r20 &= mask51 + ADDQ R10, R11 // r20 += r11 + SHLQ $13, R13, R14 // r31 = shld with r30 + ANDQ AX, R13 // r30 &= mask51 + ADDQ R12, R13 // r30 += r21 + SHLQ $13, R15, BX // r41 = shld with r40 + ANDQ AX, R15 // r40 &= mask51 + ADDQ R14, R15 // r40 += r31 + IMUL3Q $19, BX, DX // r41 = r41*19 + ADDQ DX, CX // r00 += r41 + + MOVQ CX, DX // rdx <-- r00 + SHRQ $51, DX // rdx <-- r00 >> 51 + ADDQ DX, R9 // r10 += r00 >> 51 + MOVQ R9, DX // rdx <-- r10 + SHRQ $51, DX // rdx <-- r10 >> 51 + ANDQ AX, CX // r00 &= mask51 + ADDQ DX, R11 // r20 += r10 >> 51 + MOVQ R11, DX // rdx <-- r20 + SHRQ $51, DX // rdx <-- r20 >> 51 + ANDQ AX, R9 // r10 &= mask51 + ADDQ DX, R13 // r30 += r20 >> 51 + MOVQ R13, DX // rdx <-- r30 + SHRQ $51, DX // rdx <-- r30 >> 51 + ANDQ AX, R11 // r20 &= mask51 + ADDQ DX, R15 // r40 += r30 >> 51 + MOVQ R15, DX // rdx <-- r40 + SHRQ $51, DX // rdx <-- r40 >> 51 + ANDQ AX, R13 // r30 &= mask51 + IMUL3Q $19, DX, DX // rdx <-- (r40 >> 51) * 19 + ADDQ DX, CX // r00 += (r40 >> 51) *19 + ANDQ AX, R15 // r40 &= mask51 + + MOVQ CX, 0(DI) + MOVQ R9, 8(DI) + MOVQ R11, 16(DI) + MOVQ R13, 24(DI) + MOVQ R15, 32(DI) + RET diff --git a/edwards25519/field_generic.go b/edwards25519/field_generic.go new file mode 100644 index 0000000..522008b --- /dev/null +++ b/edwards25519/field_generic.go @@ -0,0 +1,433 @@ +// +build !amd64 + +package edwards25519 + +// Element of the field GF(2^255 - 19) over which the elliptic +// curve Edwards25519 is defined. +type FieldElement [10]int32 + +var ( + feZero FieldElement + feOne = FieldElement{1, 0, 0, 0, 0, 0, 0, 0, 0, 0} + feMinusOne = FieldElement{-1, 0, 0, 0, 0, 0, 0, 0, 0, 0} + + // sqrt(-1) + feI = FieldElement{ + -32595792, -7943725, 9377950, 3500415, 12389472, + -272473, -25146209, -2005654, 326686, 11406482, + } + + // -sqrt(-1) + feMinusI = FieldElement{ + 32595792, 7943725, -9377950, -3500415, -12389472, + 272473, 25146209, 2005654, -326686, -11406482, + } + + // parameter d of Edwards25519 + feD = FieldElement{ + -10913610, 13857413, -15372611, 6949391, 114729, + -8787816, -6275908, -3247719, -18696448, -12055116, + } + + // double feD + fe2D = FieldElement{ + -21827239, -5839606, -30745221, 13898782, 229458, + 15978800, -12551817, -6495438, 29715968, 9444199, + } + + // 1 / sqrt(-1-d) + feInvSqrtMinusDMinusOne = FieldElement{ + -6111485, -4156064, 27798727, -12243468, 25904040, + -120897, -20826367, 7060776, -6093568, 1986012, + } + + // (d-1)^2 + feDMinusOneSquared = FieldElement{ + 15551795, -11097455, -13425098, -10125071, -11896535, + 10178284, -26634327, 4729244, -5282110, -10116402, + } + + // 1 - d^2 + feOneMinusDSquared = FieldElement{ + 6275446, -16617371, -22938544, -3773710, 11667077, + 7397348, -27922721, 1766195, -24433858, 672203, + } + + epZero = ExtendedPoint{feZero, feOne, feOne, feZero} + + epBase = ExtendedPoint{ + FieldElement{-41032219, -27199451, -7502359, -2800332, -50176896, + -33336453, -33570123, -31949908, -53948439, -29257844}, + FieldElement{20163995, 28827709, 65616271, 30544542, 24400674, + 29683035, 27175815, 26206403, 10372291, 5663137}, + feOne, + FieldElement{38281802, 6116118, 27349572, 33310069, 58473857, + 22289538, 47757517, 20140834, 50497352, 6414979}, + } +) + +// Sets fe to -a. Returns fe. +func (fe *FieldElement) Neg(a *FieldElement) *FieldElement { + fe[0] = -a[0] + fe[1] = -a[1] + fe[2] = -a[2] + fe[3] = -a[3] + fe[4] = -a[4] + fe[5] = -a[5] + fe[6] = -a[6] + fe[7] = -a[7] + fe[8] = -a[8] + fe[9] = -a[9] + return fe +} + +// Sets fe to a + b without normalizing. Returns fe. +func (fe *FieldElement) add(a, b *FieldElement) *FieldElement { + fe[0] = a[0] + b[0] + fe[1] = a[1] + b[1] + fe[2] = a[2] + b[2] + fe[3] = a[3] + b[3] + fe[4] = a[4] + b[4] + fe[5] = a[5] + b[5] + fe[6] = a[6] + b[6] + fe[7] = a[7] + b[7] + fe[8] = a[8] + b[8] + fe[9] = a[9] + b[9] + return fe +} + +// Sets fe to a - b without normalizing. Returns fe. +func (fe *FieldElement) sub(a, b *FieldElement) *FieldElement { + fe[0] = a[0] - b[0] + fe[1] = a[1] - b[1] + fe[2] = a[2] - b[2] + fe[3] = a[3] - b[3] + fe[4] = a[4] - b[4] + fe[5] = a[5] - b[5] + fe[6] = a[6] - b[6] + fe[7] = a[7] - b[7] + fe[8] = a[8] - b[8] + fe[9] = a[9] - b[9] + return fe +} + +// Interprets a 3-byte unsigned little endian byte-slice as int64 +func load3(in []byte) int64 { + var r int64 + r = int64(in[0]) + r |= int64(in[1]) << 8 + r |= int64(in[2]) << 16 + return r +} + +// Interprets a 4-byte unsigned little endian byte-slice as int64 +func load4(in []byte) int64 { + var r int64 + r = int64(in[0]) + r |= int64(in[1]) << 8 + r |= int64(in[2]) << 16 + r |= int64(in[3]) << 24 + return r +} + +// Reduce the even coefficients to below 1.01*2^25 and the odd coefficients +// to below 1.01*2^24. Returns fe. +func (fe *FieldElement) normalize() *FieldElement { + return fe.setReduced( + int64(fe[0]), int64(fe[1]), int64(fe[2]), int64(fe[3]), int64(fe[4]), + int64(fe[5]), int64(fe[6]), int64(fe[7]), int64(fe[8]), int64(fe[9])) +} + +// Set fe to h0 + h1*2^26 + h2*2^51 + ... + h9*2^230. Requires a little +// headroom in the inputs to store the carries. Returns fe. +func (fe *FieldElement) setReduced( + h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) *FieldElement { + var c0, c1, c2, c3, c4, c5, c6, c7, c8, c9 int64 + + c0 = (h0 + (1 << 25)) >> 26 + h1 += c0 + h0 -= c0 << 26 + c4 = (h4 + (1 << 25)) >> 26 + h5 += c4 + h4 -= c4 << 26 + + c1 = (h1 + (1 << 24)) >> 25 + h2 += c1 + h1 -= c1 << 25 + c5 = (h5 + (1 << 24)) >> 25 + h6 += c5 + h5 -= c5 << 25 + + c2 = (h2 + (1 << 25)) >> 26 + h3 += c2 + h2 -= c2 << 26 + c6 = (h6 + (1 << 25)) >> 26 + h7 += c6 + h6 -= c6 << 26 + + c3 = (h3 + (1 << 24)) >> 25 + h4 += c3 + h3 -= c3 << 25 + c7 = (h7 + (1 << 24)) >> 25 + h8 += c7 + h7 -= c7 << 25 + + c4 = (h4 + (1 << 25)) >> 26 + h5 += c4 + h4 -= c4 << 26 + c8 = (h8 + (1 << 25)) >> 26 + h9 += c8 + h8 -= c8 << 26 + + c9 = (h9 + (1 << 24)) >> 25 + h0 += c9 * 19 + h9 -= c9 << 25 + + c0 = (h0 + (1 << 25)) >> 26 + h1 += c0 + h0 -= c0 << 26 + + fe[0] = int32(h0) + fe[1] = int32(h1) + fe[2] = int32(h2) + fe[3] = int32(h3) + fe[4] = int32(h4) + fe[5] = int32(h5) + fe[6] = int32(h6) + fe[7] = int32(h7) + fe[8] = int32(h8) + fe[9] = int32(h9) + + return fe +} + +// Set fe to a if b == 1. Requires b to be either 0 or 1. +func (fe *FieldElement) ConditionalSet(a *FieldElement, b int32) { + b = -b // b == 0b11111111111111111111111111111111 or 0. + fe[0] ^= b & (fe[0] ^ a[0]) + fe[1] ^= b & (fe[1] ^ a[1]) + fe[2] ^= b & (fe[2] ^ a[2]) + fe[3] ^= b & (fe[3] ^ a[3]) + fe[4] ^= b & (fe[4] ^ a[4]) + fe[5] ^= b & (fe[5] ^ a[5]) + fe[6] ^= b & (fe[6] ^ a[6]) + fe[7] ^= b & (fe[7] ^ a[7]) + fe[8] ^= b & (fe[8] ^ a[8]) + fe[9] ^= b & (fe[9] ^ a[9]) +} + +// Write fe to s in little endian. Returns fe. +func (fe *FieldElement) BytesInto(s *[32]byte) *FieldElement { + var carry [10]int32 + + q := (19*fe[9] + (1 << 24)) >> 25 + q = (fe[0] + q) >> 26 + q = (fe[1] + q) >> 25 + q = (fe[2] + q) >> 26 + q = (fe[3] + q) >> 25 + q = (fe[4] + q) >> 26 + q = (fe[5] + q) >> 25 + q = (fe[6] + q) >> 26 + q = (fe[7] + q) >> 25 + q = (fe[8] + q) >> 26 + q = (fe[9] + q) >> 25 + + fe[0] += 19 * q + + carry[0] = fe[0] >> 26 + fe[1] += carry[0] + fe[0] -= carry[0] << 26 + carry[1] = fe[1] >> 25 + fe[2] += carry[1] + fe[1] -= carry[1] << 25 + carry[2] = fe[2] >> 26 + fe[3] += carry[2] + fe[2] -= carry[2] << 26 + carry[3] = fe[3] >> 25 + fe[4] += carry[3] + fe[3] -= carry[3] << 25 + carry[4] = fe[4] >> 26 + fe[5] += carry[4] + fe[4] -= carry[4] << 26 + carry[5] = fe[5] >> 25 + fe[6] += carry[5] + fe[5] -= carry[5] << 25 + carry[6] = fe[6] >> 26 + fe[7] += carry[6] + fe[6] -= carry[6] << 26 + carry[7] = fe[7] >> 25 + fe[8] += carry[7] + fe[7] -= carry[7] << 25 + carry[8] = fe[8] >> 26 + fe[9] += carry[8] + fe[8] -= carry[8] << 26 + carry[9] = fe[9] >> 25 + fe[9] -= carry[9] << 25 + + s[0] = byte(fe[0] >> 0) + s[1] = byte(fe[0] >> 8) + s[2] = byte(fe[0] >> 16) + s[3] = byte((fe[0] >> 24) | (fe[1] << 2)) + s[4] = byte(fe[1] >> 6) + s[5] = byte(fe[1] >> 14) + s[6] = byte((fe[1] >> 22) | (fe[2] << 3)) + s[7] = byte(fe[2] >> 5) + s[8] = byte(fe[2] >> 13) + s[9] = byte((fe[2] >> 21) | (fe[3] << 5)) + s[10] = byte(fe[3] >> 3) + s[11] = byte(fe[3] >> 11) + s[12] = byte((fe[3] >> 19) | (fe[4] << 6)) + s[13] = byte(fe[4] >> 2) + s[14] = byte(fe[4] >> 10) + s[15] = byte(fe[4] >> 18) + s[16] = byte(fe[5] >> 0) + s[17] = byte(fe[5] >> 8) + s[18] = byte(fe[5] >> 16) + s[19] = byte((fe[5] >> 24) | (fe[6] << 1)) + s[20] = byte(fe[6] >> 7) + s[21] = byte(fe[6] >> 15) + s[22] = byte((fe[6] >> 23) | (fe[7] << 3)) + s[23] = byte(fe[7] >> 5) + s[24] = byte(fe[7] >> 13) + s[25] = byte((fe[7] >> 21) | (fe[8] << 4)) + s[26] = byte(fe[8] >> 4) + s[27] = byte(fe[8] >> 12) + s[28] = byte((fe[8] >> 20) | (fe[9] << 6)) + s[29] = byte(fe[9] >> 2) + s[30] = byte(fe[9] >> 10) + s[31] = byte(fe[9] >> 18) + return fe +} + +// Sets fe to the little endian number encoded in buf modulo 2^255-19. +// Ignores the highest bit in buf. Returns fe. +func (fe *FieldElement) SetBytes(buf *[32]byte) *FieldElement { + return fe.setReduced( + load4(buf[:]), + load3(buf[4:])<<6, + load3(buf[7:])<<5, + load3(buf[10:])<<3, + load3(buf[13:])<<2, + load4(buf[16:]), + load3(buf[20:])<<7, + load3(buf[23:])<<5, + load3(buf[26:])<<4, + (load3(buf[29:])&8388607)<<2, + ) +} + +// Sets fe to a * b. Returns fe. +func (fe *FieldElement) Mul(a, b *FieldElement) *FieldElement { + a0 := int64(a[0]) + a1 := int64(a[1]) + a2 := int64(a[2]) + a3 := int64(a[3]) + a4 := int64(a[4]) + a5 := int64(a[5]) + a6 := int64(a[6]) + a7 := int64(a[7]) + a8 := int64(a[8]) + a9 := int64(a[9]) + + a1_2 := int64(2 * a[1]) + a3_2 := int64(2 * a[3]) + a5_2 := int64(2 * a[5]) + a7_2 := int64(2 * a[7]) + a9_2 := int64(2 * a[9]) + + b0 := int64(b[0]) + b1 := int64(b[1]) + b2 := int64(b[2]) + b3 := int64(b[3]) + b4 := int64(b[4]) + b5 := int64(b[5]) + b6 := int64(b[6]) + b7 := int64(b[7]) + b8 := int64(b[8]) + b9 := int64(b[9]) + + b1_19 := int64(19 * b[1]) + b2_19 := int64(19 * b[2]) + b3_19 := int64(19 * b[3]) + b4_19 := int64(19 * b[4]) + b5_19 := int64(19 * b[5]) + b6_19 := int64(19 * b[6]) + b7_19 := int64(19 * b[7]) + b8_19 := int64(19 * b[8]) + b9_19 := int64(19 * b[9]) + + h0 := a0*b0 + a1_2*b9_19 + a2*b8_19 + a3_2*b7_19 + a4*b6_19 + a5_2*b5_19 + a6*b4_19 + a7_2*b3_19 + a8*b2_19 + a9_2*b1_19 + h1 := a0*b1 + a1*b0 + a2*b9_19 + a3*b8_19 + a4*b7_19 + a5*b6_19 + a6*b5_19 + a7*b4_19 + a8*b3_19 + a9*b2_19 + h2 := a0*b2 + a1_2*b1 + a2*b0 + a3_2*b9_19 + a4*b8_19 + a5_2*b7_19 + a6*b6_19 + a7_2*b5_19 + a8*b4_19 + a9_2*b3_19 + h3 := a0*b3 + a1*b2 + a2*b1 + a3*b0 + a4*b9_19 + a5*b8_19 + a6*b7_19 + a7*b6_19 + a8*b5_19 + a9*b4_19 + h4 := a0*b4 + a1_2*b3 + a2*b2 + a3_2*b1 + a4*b0 + a5_2*b9_19 + a6*b8_19 + a7_2*b7_19 + a8*b6_19 + a9_2*b5_19 + h5 := a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0 + a6*b9_19 + a7*b8_19 + a8*b7_19 + a9*b6_19 + h6 := a0*b6 + a1_2*b5 + a2*b4 + a3_2*b3 + a4*b2 + a5_2*b1 + a6*b0 + a7_2*b9_19 + a8*b8_19 + a9_2*b7_19 + h7 := a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0 + a8*b9_19 + a9*b8_19 + h8 := a0*b8 + a1_2*b7 + a2*b6 + a3_2*b5 + a4*b4 + a5_2*b3 + a6*b2 + a7_2*b1 + a8*b0 + a9_2*b9_19 + h9 := a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0 + + return fe.setReduced(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) +} + +// Returns the unnormalized coefficients of fe^2. +func (fe *FieldElement) square() (h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) { + f0 := int64(fe[0]) + f1 := int64(fe[1]) + f2 := int64(fe[2]) + f3 := int64(fe[3]) + f4 := int64(fe[4]) + f5 := int64(fe[5]) + f6 := int64(fe[6]) + f7 := int64(fe[7]) + f8 := int64(fe[8]) + f9 := int64(fe[9]) + f0_2 := int64(2 * fe[0]) + f1_2 := int64(2 * fe[1]) + f2_2 := int64(2 * fe[2]) + f3_2 := int64(2 * fe[3]) + f4_2 := int64(2 * fe[4]) + f5_2 := int64(2 * fe[5]) + f6_2 := int64(2 * fe[6]) + f7_2 := int64(2 * fe[7]) + f5_38 := 38 * f5 + f6_19 := 19 * f6 + f7_38 := 38 * f7 + f8_19 := 19 * f8 + f9_38 := 38 * f9 + + h0 = f0*f0 + f1_2*f9_38 + f2_2*f8_19 + f3_2*f7_38 + f4_2*f6_19 + f5*f5_38 + h1 = f0_2*f1 + f2*f9_38 + f3_2*f8_19 + f4*f7_38 + f5_2*f6_19 + h2 = f0_2*f2 + f1_2*f1 + f3_2*f9_38 + f4_2*f8_19 + f5_2*f7_38 + f6*f6_19 + h3 = f0_2*f3 + f1_2*f2 + f4*f9_38 + f5_2*f8_19 + f6*f7_38 + h4 = f0_2*f4 + f1_2*f3_2 + f2*f2 + f5_2*f9_38 + f6_2*f8_19 + f7*f7_38 + h5 = f0_2*f5 + f1_2*f4 + f2_2*f3 + f6*f9_38 + f7_2*f8_19 + h6 = f0_2*f6 + f1_2*f5_2 + f2_2*f4 + f3_2*f3 + f7_2*f9_38 + f8*f8_19 + h7 = f0_2*f7 + f1_2*f6 + f2_2*f5 + f3_2*f4 + f8*f9_38 + h8 = f0_2*f8 + f1_2*f7_2 + f2_2*f6 + f3_2*f5_2 + f4*f4 + f9*f9_38 + h9 = f0_2*f9 + f1_2*f8 + f2_2*f7 + f3_2*f6 + f4_2*f5 + + return +} + +// Sets fe to a^2. Returns fe. +func (fe *FieldElement) Square(a *FieldElement) *FieldElement { + h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := a.square() + return fe.setReduced(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) +} + +// Sets fe to 2 * a^2. Returns fe. +func (fe *FieldElement) DoubledSquare(a *FieldElement) *FieldElement { + h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := a.square() + h0 += h0 + h1 += h1 + h2 += h2 + h3 += h3 + h4 += h4 + h5 += h5 + h6 += h6 + h7 += h7 + h8 += h8 + h9 += h9 + return fe.setReduced(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9) +} diff --git a/edwards25519/field_test.go b/edwards25519/field_test.go index ccc3f7c..ddae923 100644 --- a/edwards25519/field_test.go +++ b/edwards25519/field_test.go @@ -57,6 +57,52 @@ func TestFeMul(t *testing.T) { } } +func TestFeSquare(t *testing.T) { + var bi1, bi2 big.Int + var fe1, fe2 edwards25519.FieldElement + for i := 0; i < 100; i++ { + bi1.Rand(rnd, &bi25519) + bi2.Mul(&bi1, &bi1) + bi2.Mod(&bi2, &bi25519) + fe1.SetBigInt(&bi1) + if fe2.Square(&fe1).BigInt().Cmp(&bi2) != 0 { + t.Fatalf("%v^2 = %v != %v", &bi1, &bi2, &fe2) + } + } +} + +func TestFeSub(t *testing.T) { + var bi1, bi2, bi3 big.Int + var fe1, fe2, fe3 edwards25519.FieldElement + for i := 0; i < 100; i++ { + bi1.Rand(rnd, &bi25519) + bi2.Rand(rnd, &bi25519) + bi3.Sub(&bi1, &bi2) + bi3.Mod(&bi3, &bi25519) + fe1.SetBigInt(&bi1) + fe2.SetBigInt(&bi2) + if fe3.Sub(&fe1, &fe2).BigInt().Cmp(&bi3) != 0 { + t.Fatalf("%v - %v = %v != %v", &bi1, &bi2, &bi3, &fe3) + } + } +} + +func TestFeAdd(t *testing.T) { + var bi1, bi2, bi3 big.Int + var fe1, fe2, fe3 edwards25519.FieldElement + for i := 0; i < 100; i++ { + bi1.Rand(rnd, &bi25519) + bi2.Rand(rnd, &bi25519) + bi3.Add(&bi1, &bi2) + bi3.Mod(&bi3, &bi25519) + fe1.SetBigInt(&bi1) + fe2.SetBigInt(&bi2) + if fe3.Add(&fe1, &fe2).BigInt().Cmp(&bi3) != 0 { + t.Fatalf("%v + %v = %v != %v", &bi1, &bi2, &bi3, &fe3) + } + } +} + func TestFeInverse(t *testing.T) { var bi1, bi2 big.Int var fe1, fe2 edwards25519.FieldElement