// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package ed25519

// edwards25519.go implements operations in GF(2**255-19) and on an Edwards
// curve that is isomorphic to curve25519. See http://ed25519.cr.yp.to/.

// This code is a port of the public domain, "ref10" implementation of ed25519
// from SUPERCOP.

// fieldElement represents an element of the field GF(2^255 - 19).  An element
// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
// t[3]+2^102 t[4]+...+2^230 t[9].  Bounds on each t[i] vary depending on
// context.
type fieldElement [10]int32

func feZero(fe *fieldElement) {
	*fe = [10]int32{}
}

func feOne(fe *fieldElement) {
	*fe = [10]int32{1, 0, 0, 0, 0, 0, 0, 0, 0, 0}
}

func feAdd(dst, a, b *fieldElement) {
	dst[0] = a[0] + b[0]
	dst[1] = a[1] + b[1]
	dst[2] = a[2] + b[2]
	dst[3] = a[3] + b[3]
	dst[4] = a[4] + b[4]
	dst[5] = a[5] + b[5]
	dst[6] = a[6] + b[6]
	dst[7] = a[7] + b[7]
	dst[8] = a[8] + b[8]
	dst[9] = a[9] + b[9]
}

func feSub(dst, a, b *fieldElement) {
	dst[0] = a[0] - b[0]
	dst[1] = a[1] - b[1]
	dst[2] = a[2] - b[2]
	dst[3] = a[3] - b[3]
	dst[4] = a[4] - b[4]
	dst[5] = a[5] - b[5]
	dst[6] = a[6] - b[6]
	dst[7] = a[7] - b[7]
	dst[8] = a[8] - b[8]
	dst[9] = a[9] - b[9]
}

func feCopy(dst, src *fieldElement) {
	copy(dst[:], src[:])
}

// Replace (f,g) with (g,g) if b == 1;
// replace (f,g) with (f,g) if b == 0.
//
// Preconditions: b in {0,1}.
func feCMove(f, g *fieldElement, b int32) {
	b = -b
	f[0] ^= b & (f[0] ^ g[0])
	f[1] ^= b & (f[1] ^ g[1])
	f[2] ^= b & (f[2] ^ g[2])
	f[3] ^= b & (f[3] ^ g[3])
	f[4] ^= b & (f[4] ^ g[4])
	f[5] ^= b & (f[5] ^ g[5])
	f[6] ^= b & (f[6] ^ g[6])
	f[7] ^= b & (f[7] ^ g[7])
	f[8] ^= b & (f[8] ^ g[8])
	f[9] ^= b & (f[9] ^ g[9])
}

func load3(in []byte) int64 {
	var r int64
	r = int64(in[0])
	r |= int64(in[1]) << 8
	r |= int64(in[2]) << 16
	return r
}

func load4(in []byte) int64 {
	var r int64
	r = int64(in[0])
	r |= int64(in[1]) << 8
	r |= int64(in[2]) << 16
	r |= int64(in[3]) << 24
	return r
}

func feFromBytes(dst *fieldElement, src []byte) {
	h0 := load4(src[:])
	h1 := load3(src[4:]) << 6
	h2 := load3(src[7:]) << 5
	h3 := load3(src[10:]) << 3
	h4 := load3(src[13:]) << 2
	h5 := load4(src[16:])
	h6 := load3(src[20:]) << 7
	h7 := load3(src[23:]) << 5
	h8 := load3(src[26:]) << 4
	h9 := (load3(src[29:]) & 8388607) << 2

	var carry [10]int64
	carry[9] = (h9 + 1<<24) >> 25
	h0 += carry[9] * 19
	h9 -= carry[9] << 25
	carry[1] = (h1 + 1<<24) >> 25
	h2 += carry[1]
	h1 -= carry[1] << 25
	carry[3] = (h3 + 1<<24) >> 25
	h4 += carry[3]
	h3 -= carry[3] << 25
	carry[5] = (h5 + 1<<24) >> 25
	h6 += carry[5]
	h5 -= carry[5] << 25
	carry[7] = (h7 + 1<<24) >> 25
	h8 += carry[7]
	h7 -= carry[7] << 25

	carry[0] = (h0 + 1<<25) >> 26
	h1 += carry[0]
	h0 -= carry[0] << 26
	carry[2] = (h2 + 1<<25) >> 26
	h3 += carry[2]
	h2 -= carry[2] << 26
	carry[4] = (h4 + 1<<25) >> 26
	h5 += carry[4]
	h4 -= carry[4] << 26
	carry[6] = (h6 + 1<<25) >> 26
	h7 += carry[6]
	h6 -= carry[6] << 26
	carry[8] = (h8 + 1<<25) >> 26
	h9 += carry[8]
	h8 -= carry[8] << 26

	dst[0] = int32(h0)
	dst[1] = int32(h1)
	dst[2] = int32(h2)
	dst[3] = int32(h3)
	dst[4] = int32(h4)
	dst[5] = int32(h5)
	dst[6] = int32(h6)
	dst[7] = int32(h7)
	dst[8] = int32(h8)
	dst[9] = int32(h9)
}

// feToBytes marshals h to s.
// Preconditions:
//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
//
// Write p=2^255-19; q=floor(h/p).
// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
//
// Proof:
//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
//
//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
//   Then 0<y<1.
//
//   Write r=h-pq.
//   Have 0<=r<=p-1=2^255-20.
//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
//
//   Write x=r+19(2^-255)r+y.
//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
//
//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
func feToBytes(s []byte, h *fieldElement) {
	var carry [10]int32

	q := (19*h[9] + (1 << 24)) >> 25
	q = (h[0] + q) >> 26
	q = (h[1] + q) >> 25
	q = (h[2] + q) >> 26
	q = (h[3] + q) >> 25
	q = (h[4] + q) >> 26
	q = (h[5] + q) >> 25
	q = (h[6] + q) >> 26
	q = (h[7] + q) >> 25
	q = (h[8] + q) >> 26
	q = (h[9] + q) >> 25

	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
	h[0] += 19 * q
	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.

	carry[0] = h[0] >> 26
	h[1] += carry[0]
	h[0] -= carry[0] << 26
	carry[1] = h[1] >> 25
	h[2] += carry[1]
	h[1] -= carry[1] << 25
	carry[2] = h[2] >> 26
	h[3] += carry[2]
	h[2] -= carry[2] << 26
	carry[3] = h[3] >> 25
	h[4] += carry[3]
	h[3] -= carry[3] << 25
	carry[4] = h[4] >> 26
	h[5] += carry[4]
	h[4] -= carry[4] << 26
	carry[5] = h[5] >> 25
	h[6] += carry[5]
	h[5] -= carry[5] << 25
	carry[6] = h[6] >> 26
	h[7] += carry[6]
	h[6] -= carry[6] << 26
	carry[7] = h[7] >> 25
	h[8] += carry[7]
	h[7] -= carry[7] << 25
	carry[8] = h[8] >> 26
	h[9] += carry[8]
	h[8] -= carry[8] << 26
	carry[9] = h[9] >> 25
	h[9] -= carry[9] << 25
	// h10 = carry9

	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
	// evidently 2^255 h10-2^255 q = 0.
	// Goal: Output h[0]+...+2^230 h[9].

	s[0] = byte(h[0] >> 0)
	s[1] = byte(h[0] >> 8)
	s[2] = byte(h[0] >> 16)
	s[3] = byte((h[0] >> 24) | (h[1] << 2))
	s[4] = byte(h[1] >> 6)
	s[5] = byte(h[1] >> 14)
	s[6] = byte((h[1] >> 22) | (h[2] << 3))
	s[7] = byte(h[2] >> 5)
	s[8] = byte(h[2] >> 13)
	s[9] = byte((h[2] >> 21) | (h[3] << 5))
	s[10] = byte(h[3] >> 3)
	s[11] = byte(h[3] >> 11)
	s[12] = byte((h[3] >> 19) | (h[4] << 6))
	s[13] = byte(h[4] >> 2)
	s[14] = byte(h[4] >> 10)
	s[15] = byte(h[4] >> 18)
	s[16] = byte(h[5] >> 0)
	s[17] = byte(h[5] >> 8)
	s[18] = byte(h[5] >> 16)
	s[19] = byte((h[5] >> 24) | (h[6] << 1))
	s[20] = byte(h[6] >> 7)
	s[21] = byte(h[6] >> 15)
	s[22] = byte((h[6] >> 23) | (h[7] << 3))
	s[23] = byte(h[7] >> 5)
	s[24] = byte(h[7] >> 13)
	s[25] = byte((h[7] >> 21) | (h[8] << 4))
	s[26] = byte(h[8] >> 4)
	s[27] = byte(h[8] >> 12)
	s[28] = byte((h[8] >> 20) | (h[9] << 6))
	s[29] = byte(h[9] >> 2)
	s[30] = byte(h[9] >> 10)
	s[31] = byte(h[9] >> 18)
}

func feIsNegative(h *fieldElement) byte {
	var carry [10]int32
	q := (19*h[9] + (1 << 24)) >> 25
	q = (h[0] + q) >> 26
	q = (h[1] + q) >> 25
	q = (h[2] + q) >> 26
	q = (h[3] + q) >> 25
	q = (h[4] + q) >> 26
	q = (h[5] + q) >> 25
	q = (h[6] + q) >> 26
	q = (h[7] + q) >> 25
	q = (h[8] + q) >> 26
	q = (h[9] + q) >> 25
	h[0] += 19 * q
	carry[0] = h[0] >> 26
	h[1] += carry[0]
	h[0] -= carry[0] << 26
	carry[1] = h[1] >> 25
	h[2] += carry[1]
	h[1] -= carry[1] << 25
	carry[2] = h[2] >> 26
	h[3] += carry[2]
	h[2] -= carry[2] << 26
	carry[3] = h[3] >> 25
	h[4] += carry[3]
	h[3] -= carry[3] << 25
	carry[4] = h[4] >> 26
	h[5] += carry[4]
	h[4] -= carry[4] << 26
	carry[5] = h[5] >> 25
	h[6] += carry[5]
	h[5] -= carry[5] << 25
	carry[6] = h[6] >> 26
	h[7] += carry[6]
	h[6] -= carry[6] << 26
	carry[7] = h[7] >> 25
	h[8] += carry[7]
	h[7] -= carry[7] << 25
	carry[8] = h[8] >> 26
	h[9] += carry[8]
	h[8] -= carry[8] << 26
	carry[9] = h[9] >> 25
	h[9] -= carry[9] << 25
	return byte(h[0]>>0) & 1
}

func feIsNonZero(f *fieldElement) int32 {
	var s [32]byte
	feToBytes(s[:], f)
	var x uint8
	for i := range s {
		x |= s[i]
	}
	x |= x >> 4
	x |= x >> 2
	x |= x >> 1
	return int32(x & 1)
}

// feNeg sets h = -f
//
// Preconditions:
//    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
//
// Postconditions:
//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
func feNeg(h, f *fieldElement) {
	h[0] = -f[0]
	h[1] = -f[1]
	h[2] = -f[2]
	h[3] = -f[3]
	h[4] = -f[4]
	h[5] = -f[5]
	h[6] = -f[6]
	h[7] = -f[7]
	h[8] = -f[8]
	h[9] = -f[9]
}

// feMul calculates h = f * g
// Can overlap h with f or g.
//
// Preconditions:
//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
//
// Postconditions:
//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
//
// Notes on implementation strategy:
//
// Using schoolbook multiplication.
// Karatsuba would save a little in some cost models.
//
// Most multiplications by 2 and 19 are 32-bit precomputations;
// cheaper than 64-bit postcomputations.
//
// There is one remaining multiplication by 19 in the carry chain;
// one *19 precomputation can be merged into this,
// but the resulting data flow is considerably less clean.
//
// There are 12 carries below.
// 10 of them are 2-way parallelizable and vectorizable.
// Can get away with 11 carries, but then data flow is much deeper.
//
// With tighter constraints on inputs can squeeze carries into int32.
func feMul(h, f, g *fieldElement) {
	f0 := f[0]
	f1 := f[1]
	f2 := f[2]
	f3 := f[3]
	f4 := f[4]
	f5 := f[5]
	f6 := f[6]
	f7 := f[7]
	f8 := f[8]
	f9 := f[9]
	g0 := g[0]
	g1 := g[1]
	g2 := g[2]
	g3 := g[3]
	g4 := g[4]
	g5 := g[5]
	g6 := g[6]
	g7 := g[7]
	g8 := g[8]
	g9 := g[9]
	g1_19 := 19 * g1 /* 1.4*2^29 */
	g2_19 := 19 * g2 /* 1.4*2^30; still ok */
	g3_19 := 19 * g3
	g4_19 := 19 * g4
	g5_19 := 19 * g5
	g6_19 := 19 * g6
	g7_19 := 19 * g7
	g8_19 := 19 * g8
	g9_19 := 19 * g9
	f1_2 := 2 * f1
	f3_2 := 2 * f3
	f5_2 := 2 * f5
	f7_2 := 2 * f7
	f9_2 := 2 * f9
	f0g0 := int64(f0) * int64(g0)
	f0g1 := int64(f0) * int64(g1)
	f0g2 := int64(f0) * int64(g2)
	f0g3 := int64(f0) * int64(g3)
	f0g4 := int64(f0) * int64(g4)
	f0g5 := int64(f0) * int64(g5)
	f0g6 := int64(f0) * int64(g6)
	f0g7 := int64(f0) * int64(g7)
	f0g8 := int64(f0) * int64(g8)
	f0g9 := int64(f0) * int64(g9)
	f1g0 := int64(f1) * int64(g0)
	f1g1_2 := int64(f1_2) * int64(g1)
	f1g2 := int64(f1) * int64(g2)
	f1g3_2 := int64(f1_2) * int64(g3)
	f1g4 := int64(f1) * int64(g4)
	f1g5_2 := int64(f1_2) * int64(g5)
	f1g6 := int64(f1) * int64(g6)
	f1g7_2 := int64(f1_2) * int64(g7)
	f1g8 := int64(f1) * int64(g8)
	f1g9_38 := int64(f1_2) * int64(g9_19)
	f2g0 := int64(f2) * int64(g0)
	f2g1 := int64(f2) * int64(g1)
	f2g2 := int64(f2) * int64(g2)
	f2g3 := int64(f2) * int64(g3)
	f2g4 := int64(f2) * int64(g4)
	f2g5 := int64(f2) * int64(g5)
	f2g6 := int64(f2) * int64(g6)
	f2g7 := int64(f2) * int64(g7)
	f2g8_19 := int64(f2) * int64(g8_19)
	f2g9_19 := int64(f2) * int64(g9_19)
	f3g0 := int64(f3) * int64(g0)
	f3g1_2 := int64(f3_2) * int64(g1)
	f3g2 := int64(f3) * int64(g2)
	f3g3_2 := int64(f3_2) * int64(g3)
	f3g4 := int64(f3) * int64(g4)
	f3g5_2 := int64(f3_2) * int64(g5)
	f3g6 := int64(f3) * int64(g6)
	f3g7_38 := int64(f3_2) * int64(g7_19)
	f3g8_19 := int64(f3) * int64(g8_19)
	f3g9_38 := int64(f3_2) * int64(g9_19)
	f4g0 := int64(f4) * int64(g0)
	f4g1 := int64(f4) * int64(g1)
	f4g2 := int64(f4) * int64(g2)
	f4g3 := int64(f4) * int64(g3)
	f4g4 := int64(f4) * int64(g4)
	f4g5 := int64(f4) * int64(g5)
	f4g6_19 := int64(f4) * int64(g6_19)
	f4g7_19 := int64(f4) * int64(g7_19)
	f4g8_19 := int64(f4) * int64(g8_19)
	f4g9_19 := int64(f4) * int64(g9_19)
	f5g0 := int64(f5) * int64(g0)
	f5g1_2 := int64(f5_2) * int64(g1)
	f5g2 := int64(f5) * int64(g2)
	f5g3_2 := int64(f5_2) * int64(g3)
	f5g4 := int64(f5) * int64(g4)
	f5g5_38 := int64(f5_2) * int64(g5_19)
	f5g6_19 := int64(f5) * int64(g6_19)
	f5g7_38 := int64(f5_2) * int64(g7_19)
	f5g8_19 := int64(f5) * int64(g8_19)
	f5g9_38 := int64(f5_2) * int64(g9_19)
	f6g0 := int64(f6) * int64(g0)
	f6g1 := int64(f6) * int64(g1)
	f6g2 := int64(f6) * int64(g2)
	f6g3 := int64(f6) * int64(g3)
	f6g4_19 := int64(f6) * int64(g4_19)
	f6g5_19 := int64(f6) * int64(g5_19)
	f6g6_19 := int64(f6) * int64(g6_19)
	f6g7_19 := int64(f6) * int64(g7_19)
	f6g8_19 := int64(f6) * int64(g8_19)
	f6g9_19 := int64(f6) * int64(g9_19)
	f7g0 := int64(f7) * int64(g0)
	f7g1_2 := int64(f7_2) * int64(g1)
	f7g2 := int64(f7) * int64(g2)
	f7g3_38 := int64(f7_2) * int64(g3_19)
	f7g4_19 := int64(f7) * int64(g4_19)
	f7g5_38 := int64(f7_2) * int64(g5_19)
	f7g6_19 := int64(f7) * int64(g6_19)
	f7g7_38 := int64(f7_2) * int64(g7_19)
	f7g8_19 := int64(f7) * int64(g8_19)
	f7g9_38 := int64(f7_2) * int64(g9_19)
	f8g0 := int64(f8) * int64(g0)
	f8g1 := int64(f8) * int64(g1)
	f8g2_19 := int64(f8) * int64(g2_19)
	f8g3_19 := int64(f8) * int64(g3_19)
	f8g4_19 := int64(f8) * int64(g4_19)
	f8g5_19 := int64(f8) * int64(g5_19)
	f8g6_19 := int64(f8) * int64(g6_19)
	f8g7_19 := int64(f8) * int64(g7_19)
	f8g8_19 := int64(f8) * int64(g8_19)
	f8g9_19 := int64(f8) * int64(g9_19)
	f9g0 := int64(f9) * int64(g0)
	f9g1_38 := int64(f9_2) * int64(g1_19)
	f9g2_19 := int64(f9) * int64(g2_19)
	f9g3_38 := int64(f9_2) * int64(g3_19)
	f9g4_19 := int64(f9) * int64(g4_19)
	f9g5_38 := int64(f9_2) * int64(g5_19)
	f9g6_19 := int64(f9) * int64(g6_19)
	f9g7_38 := int64(f9_2) * int64(g7_19)
	f9g8_19 := int64(f9) * int64(g8_19)
	f9g9_38 := int64(f9_2) * int64(g9_19)
	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
	var carry [10]int64

	/*
	  |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
	    i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
	  |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
	    i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
	*/

	carry[0] = (h0 + (1 << 25)) >> 26
	h1 += carry[0]
	h0 -= carry[0] << 26
	carry[4] = (h4 + (1 << 25)) >> 26
	h5 += carry[4]
	h4 -= carry[4] << 26
	/* |h0| <= 2^25 */
	/* |h4| <= 2^25 */
	/* |h1| <= 1.51*2^58 */
	/* |h5| <= 1.51*2^58 */

	carry[1] = (h1 + (1 << 24)) >> 25
	h2 += carry[1]
	h1 -= carry[1] << 25
	carry[5] = (h5 + (1 << 24)) >> 25
	h6 += carry[5]
	h5 -= carry[5] << 25
	/* |h1| <= 2^24; from now on fits into int32 */
	/* |h5| <= 2^24; from now on fits into int32 */
	/* |h2| <= 1.21*2^59 */
	/* |h6| <= 1.21*2^59 */

	carry[2] = (h2 + (1 << 25)) >> 26
	h3 += carry[2]
	h2 -= carry[2] << 26
	carry[6] = (h6 + (1 << 25)) >> 26
	h7 += carry[6]
	h6 -= carry[6] << 26
	/* |h2| <= 2^25; from now on fits into int32 unchanged */
	/* |h6| <= 2^25; from now on fits into int32 unchanged */
	/* |h3| <= 1.51*2^58 */
	/* |h7| <= 1.51*2^58 */

	carry[3] = (h3 + (1 << 24)) >> 25
	h4 += carry[3]
	h3 -= carry[3] << 25
	carry[7] = (h7 + (1 << 24)) >> 25
	h8 += carry[7]
	h7 -= carry[7] << 25
	/* |h3| <= 2^24; from now on fits into int32 unchanged */
	/* |h7| <= 2^24; from now on fits into int32 unchanged */
	/* |h4| <= 1.52*2^33 */
	/* |h8| <= 1.52*2^33 */

	carry[4] = (h4 + (1 << 25)) >> 26
	h5 += carry[4]
	h4 -= carry[4] << 26
	carry[8] = (h8 + (1 << 25)) >> 26
	h9 += carry[8]
	h8 -= carry[8] << 26
	/* |h4| <= 2^25; from now on fits into int32 unchanged */
	/* |h8| <= 2^25; from now on fits into int32 unchanged */
	/* |h5| <= 1.01*2^24 */
	/* |h9| <= 1.51*2^58 */

	carry[9] = (h9 + (1 << 24)) >> 25
	h0 += carry[9] * 19
	h9 -= carry[9] << 25
	/* |h9| <= 2^24; from now on fits into int32 unchanged */
	/* |h0| <= 1.8*2^37 */

	carry[0] = (h0 + (1 << 25)) >> 26
	h1 += carry[0]
	h0 -= carry[0] << 26
	/* |h0| <= 2^25; from now on fits into int32 unchanged */
	/* |h1| <= 1.01*2^24 */

	h[0] = int32(h0)
	h[1] = int32(h1)
	h[2] = int32(h2)
	h[3] = int32(h3)
	h[4] = int32(h4)
	h[5] = int32(h5)
	h[6] = int32(h6)
	h[7] = int32(h7)
	h[8] = int32(h8)
	h[9] = int32(h9)
}

// feSquare calculates h = f*f. Can overlap h with f.
//
// Preconditions:
//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
//
// Postconditions:
//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
func feSquare(h, f *fieldElement) {
	f0 := f[0]
	f1 := f[1]
	f2 := f[2]
	f3 := f[3]
	f4 := f[4]
	f5 := f[5]
	f6 := f[6]
	f7 := f[7]
	f8 := f[8]
	f9 := f[9]
	f0_2 := 2 * f0
	f1_2 := 2 * f1
	f2_2 := 2 * f2
	f3_2 := 2 * f3
	f4_2 := 2 * f4
	f5_2 := 2 * f5
	f6_2 := 2 * f6
	f7_2 := 2 * f7
	f5_38 := 38 * f5 // 1.31*2^30
	f6_19 := 19 * f6 // 1.31*2^30
	f7_38 := 38 * f7 // 1.31*2^30
	f8_19 := 19 * f8 // 1.31*2^30
	f9_38 := 38 * f9 // 1.31*2^30
	f0f0 := int64(f0) * int64(f0)
	f0f1_2 := int64(f0_2) * int64(f1)
	f0f2_2 := int64(f0_2) * int64(f2)
	f0f3_2 := int64(f0_2) * int64(f3)
	f0f4_2 := int64(f0_2) * int64(f4)
	f0f5_2 := int64(f0_2) * int64(f5)
	f0f6_2 := int64(f0_2) * int64(f6)
	f0f7_2 := int64(f0_2) * int64(f7)
	f0f8_2 := int64(f0_2) * int64(f8)
	f0f9_2 := int64(f0_2) * int64(f9)
	f1f1_2 := int64(f1_2) * int64(f1)
	f1f2_2 := int64(f1_2) * int64(f2)
	f1f3_4 := int64(f1_2) * int64(f3_2)
	f1f4_2 := int64(f1_2) * int64(f4)
	f1f5_4 := int64(f1_2) * int64(f5_2)
	f1f6_2 := int64(f1_2) * int64(f6)
	f1f7_4 := int64(f1_2) * int64(f7_2)
	f1f8_2 := int64(f1_2) * int64(f8)
	f1f9_76 := int64(f1_2) * int64(f9_38)
	f2f2 := int64(f2) * int64(f2)
	f2f3_2 := int64(f2_2) * int64(f3)
	f2f4_2 := int64(f2_2) * int64(f4)
	f2f5_2 := int64(f2_2) * int64(f5)
	f2f6_2 := int64(f2_2) * int64(f6)
	f2f7_2 := int64(f2_2) * int64(f7)
	f2f8_38 := int64(f2_2) * int64(f8_19)
	f2f9_38 := int64(f2) * int64(f9_38)
	f3f3_2 := int64(f3_2) * int64(f3)
	f3f4_2 := int64(f3_2) * int64(f4)
	f3f5_4 := int64(f3_2) * int64(f5_2)
	f3f6_2 := int64(f3_2) * int64(f6)
	f3f7_76 := int64(f3_2) * int64(f7_38)
	f3f8_38 := int64(f3_2) * int64(f8_19)
	f3f9_76 := int64(f3_2) * int64(f9_38)
	f4f4 := int64(f4) * int64(f4)
	f4f5_2 := int64(f4_2) * int64(f5)
	f4f6_38 := int64(f4_2) * int64(f6_19)
	f4f7_38 := int64(f4) * int64(f7_38)
	f4f8_38 := int64(f4_2) * int64(f8_19)
	f4f9_38 := int64(f4) * int64(f9_38)
	f5f5_38 := int64(f5) * int64(f5_38)
	f5f6_38 := int64(f5_2) * int64(f6_19)
	f5f7_76 := int64(f5_2) * int64(f7_38)
	f5f8_38 := int64(f5_2) * int64(f8_19)
	f5f9_76 := int64(f5_2) * int64(f9_38)
	f6f6_19 := int64(f6) * int64(f6_19)
	f6f7_38 := int64(f6) * int64(f7_38)
	f6f8_38 := int64(f6_2) * int64(f8_19)
	f6f9_38 := int64(f6) * int64(f9_38)
	f7f7_38 := int64(f7) * int64(f7_38)
	f7f8_38 := int64(f7_2) * int64(f8_19)
	f7f9_76 := int64(f7_2) * int64(f9_38)
	f8f8_19 := int64(f8) * int64(f8_19)
	f8f9_38 := int64(f8) * int64(f9_38)
	f9f9_38 := int64(f9) * int64(f9_38)
	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
	var carry [10]int64

	carry[0] = (h0 + (1 << 25)) >> 26
	h1 += carry[0]
	h0 -= carry[0] << 26
	carry[4] = (h4 + (1 << 25)) >> 26
	h5 += carry[4]
	h4 -= carry[4] << 26

	carry[1] = (h1 + (1 << 24)) >> 25
	h2 += carry[1]
	h1 -= carry[1] << 25
	carry[5] = (h5 + (1 << 24)) >> 25
	h6 += carry[5]
	h5 -= carry[5] << 25

	carry[2] = (h2 + (1 << 25)) >> 26
	h3 += carry[2]
	h2 -= carry[2] << 26
	carry[6] = (h6 + (1 << 25)) >> 26
	h7 += carry[6]
	h6 -= carry[6] << 26

	carry[3] = (h3 + (1 << 24)) >> 25
	h4 += carry[3]
	h3 -= carry[3] << 25
	carry[7] = (h7 + (1 << 24)) >> 25
	h8 += carry[7]
	h7 -= carry[7] << 25

	carry[4] = (h4 + (1 << 25)) >> 26
	h5 += carry[4]
	h4 -= carry[4] << 26
	carry[8] = (h8 + (1 << 25)) >> 26
	h9 += carry[8]
	h8 -= carry[8] << 26

	carry[9] = (h9 + (1 << 24)) >> 25
	h0 += carry[9] * 19
	h9 -= carry[9] << 25

	carry[0] = (h0 + (1 << 25)) >> 26
	h1 += carry[0]
	h0 -= carry[0] << 26

	h[0] = int32(h0)
	h[1] = int32(h1)
	h[2] = int32(h2)
	h[3] = int32(h3)
	h[4] = int32(h4)
	h[5] = int32(h5)
	h[6] = int32(h6)
	h[7] = int32(h7)
	h[8] = int32(h8)
	h[9] = int32(h9)
}

// feSquare2 sets h = 2 * f * f
//
// Can overlap h with f.
//
// Preconditions:
//    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
//
// Postconditions:
//    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
// See fe_mul.c for discussion of implementation strategy.
func feSquare2(h, f *fieldElement) {
	f0 := f[0]
	f1 := f[1]
	f2 := f[2]
	f3 := f[3]
	f4 := f[4]
	f5 := f[5]
	f6 := f[6]
	f7 := f[7]
	f8 := f[8]
	f9 := f[9]
	f0_2 := 2 * f0
	f1_2 := 2 * f1
	f2_2 := 2 * f2
	f3_2 := 2 * f3
	f4_2 := 2 * f4
	f5_2 := 2 * f5
	f6_2 := 2 * f6
	f7_2 := 2 * f7
	f5_38 := 38 * f5 // 1.959375*2^30
	f6_19 := 19 * f6 // 1.959375*2^30
	f7_38 := 38 * f7 // 1.959375*2^30
	f8_19 := 19 * f8 // 1.959375*2^30
	f9_38 := 38 * f9 // 1.959375*2^30
	f0f0 := int64(f0) * int64(f0)
	f0f1_2 := int64(f0_2) * int64(f1)
	f0f2_2 := int64(f0_2) * int64(f2)
	f0f3_2 := int64(f0_2) * int64(f3)
	f0f4_2 := int64(f0_2) * int64(f4)
	f0f5_2 := int64(f0_2) * int64(f5)
	f0f6_2 := int64(f0_2) * int64(f6)
	f0f7_2 := int64(f0_2) * int64(f7)
	f0f8_2 := int64(f0_2) * int64(f8)
	f0f9_2 := int64(f0_2) * int64(f9)
	f1f1_2 := int64(f1_2) * int64(f1)
	f1f2_2 := int64(f1_2) * int64(f2)
	f1f3_4 := int64(f1_2) * int64(f3_2)
	f1f4_2 := int64(f1_2) * int64(f4)
	f1f5_4 := int64(f1_2) * int64(f5_2)
	f1f6_2 := int64(f1_2) * int64(f6)
	f1f7_4 := int64(f1_2) * int64(f7_2)
	f1f8_2 := int64(f1_2) * int64(f8)
	f1f9_76 := int64(f1_2) * int64(f9_38)
	f2f2 := int64(f2) * int64(f2)
	f2f3_2 := int64(f2_2) * int64(f3)
	f2f4_2 := int64(f2_2) * int64(f4)
	f2f5_2 := int64(f2_2) * int64(f5)
	f2f6_2 := int64(f2_2) * int64(f6)
	f2f7_2 := int64(f2_2) * int64(f7)
	f2f8_38 := int64(f2_2) * int64(f8_19)
	f2f9_38 := int64(f2) * int64(f9_38)
	f3f3_2 := int64(f3_2) * int64(f3)
	f3f4_2 := int64(f3_2) * int64(f4)
	f3f5_4 := int64(f3_2) * int64(f5_2)
	f3f6_2 := int64(f3_2) * int64(f6)
	f3f7_76 := int64(f3_2) * int64(f7_38)
	f3f8_38 := int64(f3_2) * int64(f8_19)
	f3f9_76 := int64(f3_2) * int64(f9_38)
	f4f4 := int64(f4) * int64(f4)
	f4f5_2 := int64(f4_2) * int64(f5)
	f4f6_38 := int64(f4_2) * int64(f6_19)
	f4f7_38 := int64(f4) * int64(f7_38)
	f4f8_38 := int64(f4_2) * int64(f8_19)
	f4f9_38 := int64(f4) * int64(f9_38)
	f5f5_38 := int64(f5) * int64(f5_38)
	f5f6_38 := int64(f5_2) * int64(f6_19)
	f5f7_76 := int64(f5_2) * int64(f7_38)
	f5f8_38 := int64(f5_2) * int64(f8_19)
	f5f9_76 := int64(f5_2) * int64(f9_38)
	f6f6_19 := int64(f6) * int64(f6_19)
	f6f7_38 := int64(f6) * int64(f7_38)
	f6f8_38 := int64(f6_2) * int64(f8_19)
	f6f9_38 := int64(f6) * int64(f9_38)
	f7f7_38 := int64(f7) * int64(f7_38)
	f7f8_38 := int64(f7_2) * int64(f8_19)
	f7f9_76 := int64(f7_2) * int64(f9_38)
	f8f8_19 := int64(f8) * int64(f8_19)
	f8f9_38 := int64(f8) * int64(f9_38)
	f9f9_38 := int64(f9) * int64(f9_38)
	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
	var carry [10]int64

	h0 += h0
	h1 += h1
	h2 += h2
	h3 += h3
	h4 += h4
	h5 += h5
	h6 += h6
	h7 += h7
	h8 += h8
	h9 += h9

	carry[0] = (h0 + (1 << 25)) >> 26
	h1 += carry[0]
	h0 -= carry[0] << 26
	carry[4] = (h4 + (1 << 25)) >> 26
	h5 += carry[4]
	h4 -= carry[4] << 26

	carry[1] = (h1 + (1 << 24)) >> 25
	h2 += carry[1]
	h1 -= carry[1] << 25
	carry[5] = (h5 + (1 << 24)) >> 25
	h6 += carry[5]
	h5 -= carry[5] << 25

	carry[2] = (h2 + (1 << 25)) >> 26
	h3 += carry[2]
	h2 -= carry[2] << 26
	carry[6] = (h6 + (1 << 25)) >> 26
	h7 += carry[6]
	h6 -= carry[6] << 26

	carry[3] = (h3 + (1 << 24)) >> 25
	h4 += carry[3]
	h3 -= carry[3] << 25
	carry[7] = (h7 + (1 << 24)) >> 25
	h8 += carry[7]
	h7 -= carry[7] << 25

	carry[4] = (h4 + (1 << 25)) >> 26
	h5 += carry[4]
	h4 -= carry[4] << 26
	carry[8] = (h8 + (1 << 25)) >> 26
	h9 += carry[8]
	h8 -= carry[8] << 26

	carry[9] = (h9 + (1 << 24)) >> 25
	h0 += carry[9] * 19
	h9 -= carry[9] << 25

	carry[0] = (h0 + (1 << 25)) >> 26
	h1 += carry[0]
	h0 -= carry[0] << 26

	h[0] = int32(h0)
	h[1] = int32(h1)
	h[2] = int32(h2)
	h[3] = int32(h3)
	h[4] = int32(h4)
	h[5] = int32(h5)
	h[6] = int32(h6)
	h[7] = int32(h7)
	h[8] = int32(h8)
	h[9] = int32(h9)
}

func feInvert(out, z *fieldElement) {
	var t0, t1, t2, t3 fieldElement
	var i int

	feSquare(&t0, z)        // 2^1
	feSquare(&t1, &t0)      // 2^2
	feSquare(&t1, &t1)      // 2^3
	feMul(&t1, z, &t1)      // 2^3 + 2^0
	feMul(&t0, &t0, &t1)    // 2^3 + 2^1 + 2^0
	feSquare(&t2, &t0)      // 2^4 + 2^2 + 2^1
	feMul(&t1, &t1, &t2)    // 2^4 + 2^3 + 2^2 + 2^1 + 2^0
	feSquare(&t2, &t1)      // 5,4,3,2,1
	for i = 1; i < 5; i++ { // 9,8,7,6,5
		feSquare(&t2, &t2)
	}
	feMul(&t1, &t2, &t1)     // 9,8,7,6,5,4,3,2,1,0
	feSquare(&t2, &t1)       // 10..1
	for i = 1; i < 10; i++ { // 19..10
		feSquare(&t2, &t2)
	}
	feMul(&t2, &t2, &t1)     // 19..0
	feSquare(&t3, &t2)       // 20..1
	for i = 1; i < 20; i++ { // 39..20
		feSquare(&t3, &t3)
	}
	feMul(&t2, &t3, &t2)     // 39..0
	feSquare(&t2, &t2)       // 40..1
	for i = 1; i < 10; i++ { // 49..10
		feSquare(&t2, &t2)
	}
	feMul(&t1, &t2, &t1)     // 49..0
	feSquare(&t2, &t1)       // 50..1
	for i = 1; i < 50; i++ { // 99..50
		feSquare(&t2, &t2)
	}
	feMul(&t2, &t2, &t1)      // 99..0
	feSquare(&t3, &t2)        // 100..1
	for i = 1; i < 100; i++ { // 199..100
		feSquare(&t3, &t3)
	}
	feMul(&t2, &t3, &t2)     // 199..0
	feSquare(&t2, &t2)       // 200..1
	for i = 1; i < 50; i++ { // 249..50
		feSquare(&t2, &t2)
	}
	feMul(&t1, &t2, &t1)    // 249..0
	feSquare(&t1, &t1)      // 250..1
	for i = 1; i < 5; i++ { // 254..5
		feSquare(&t1, &t1)
	}
	feMul(out, &t1, &t0) // 254..5,3,1,0
}

func fePow22523(out, z *fieldElement) {
	var t0, t1, t2 fieldElement
	var i int

	feSquare(&t0, z)
	feSquare(&t1, &t0)
	feSquare(&t1, &t1)
	feMul(&t1, z, &t1)
	feMul(&t0, &t0, &t1)
	feSquare(&t0, &t0)
	feMul(&t0, &t1, &t0)
	feSquare(&t1, &t0)
	for i = 1; i < 5; i++ {
		feSquare(&t1, &t1)
	}
	feMul(&t0, &t1, &t0)
	feSquare(&t1, &t0)
	for i = 1; i < 10; i++ {
		feSquare(&t1, &t1)
	}
	feMul(&t1, &t1, &t0)
	feSquare(&t2, &t1)
	for i = 1; i < 20; i++ {
		feSquare(&t2, &t2)
	}
	feMul(&t1, &t2, &t1)
	feSquare(&t1, &t1)
	for i = 1; i < 10; i++ {
		feSquare(&t1, &t1)
	}
	feMul(&t0, &t1, &t0)
	feSquare(&t1, &t0)
	for i = 1; i < 50; i++ {
		feSquare(&t1, &t1)
	}
	feMul(&t1, &t1, &t0)
	feSquare(&t2, &t1)
	for i = 1; i < 100; i++ {
		feSquare(&t2, &t2)
	}
	feMul(&t1, &t2, &t1)
	feSquare(&t1, &t1)
	for i = 1; i < 50; i++ {
		feSquare(&t1, &t1)
	}
	feMul(&t0, &t1, &t0)
	feSquare(&t0, &t0)
	feSquare(&t0, &t0)
	feMul(out, &t0, z)
}

// Group elements are members of the elliptic curve -x^2 + y^2 = 1 + d * x^2 *
// y^2 where d = -121665/121666.
//
// Several representations are used:
//   projectiveGroupElement: (X:Y:Z) satisfying x=X/Z, y=Y/Z
//   extendedGroupElement: (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
//   completedGroupElement: ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
//   preComputedGroupElement: (y+x,y-x,2dxy)

type projectiveGroupElement struct {
	X, Y, Z fieldElement
}

type extendedGroupElement struct {
	X, Y, Z, T fieldElement
}

type completedGroupElement struct {
	X, Y, Z, T fieldElement
}

type preComputedGroupElement struct {
	yPlusX, yMinusX, xy2d fieldElement
}

type cachedGroupElement struct {
	yPlusX, yMinusX, Z, T2d fieldElement
}

func (p *projectiveGroupElement) Zero() {
	feZero(&p.X)
	feOne(&p.Y)
	feOne(&p.Z)
}

func (p *projectiveGroupElement) Double(r *completedGroupElement) {
	var t0 fieldElement

	feSquare(&r.X, &p.X)
	feSquare(&r.Z, &p.Y)
	feSquare2(&r.T, &p.Z)
	feAdd(&r.Y, &p.X, &p.Y)
	feSquare(&t0, &r.Y)
	feAdd(&r.Y, &r.Z, &r.X)
	feSub(&r.Z, &r.Z, &r.X)
	feSub(&r.X, &t0, &r.Y)
	feSub(&r.T, &r.T, &r.Z)
}

func (p *projectiveGroupElement) ToBytes(s *[32]byte) {
	var recip, x, y fieldElement

	feInvert(&recip, &p.Z)
	feMul(&x, &p.X, &recip)
	feMul(&y, &p.Y, &recip)
	feToBytes(s[:], &y)
	s[31] ^= feIsNegative(&x) << 7
}

func (p *extendedGroupElement) Zero() {
	feZero(&p.X)
	feOne(&p.Y)
	feOne(&p.Z)
	feZero(&p.T)
}

func (p *extendedGroupElement) Double(r *completedGroupElement) {
	var q projectiveGroupElement
	p.ToProjective(&q)
	q.Double(r)
}

func (p *extendedGroupElement) ToCached(r *cachedGroupElement) {
	feAdd(&r.yPlusX, &p.Y, &p.X)
	feSub(&r.yMinusX, &p.Y, &p.X)
	feCopy(&r.Z, &p.Z)
	feMul(&r.T2d, &p.T, &d2)
}

func (p *extendedGroupElement) ToProjective(r *projectiveGroupElement) {
	feCopy(&r.X, &p.X)
	feCopy(&r.Y, &p.Y)
	feCopy(&r.Z, &p.Z)
}

func (p *extendedGroupElement) ToBytes(s []byte) {
	var recip, x, y fieldElement

	feInvert(&recip, &p.Z)
	feMul(&x, &p.X, &recip)
	feMul(&y, &p.Y, &recip)
	feToBytes(s, &y)
	s[31] ^= feIsNegative(&x) << 7
}

func (p *extendedGroupElement) FromBytes(s []byte) bool {
	var u, v, v3, vxx, check fieldElement

	feFromBytes(&p.Y, s)
	feOne(&p.Z)
	feSquare(&u, &p.Y)
	feMul(&v, &u, &d)
	feSub(&u, &u, &p.Z) // y = y^2-1
	feAdd(&v, &v, &p.Z) // v = dy^2+1

	feSquare(&v3, &v)
	feMul(&v3, &v3, &v) // v3 = v^3
	feSquare(&p.X, &v3)
	feMul(&p.X, &p.X, &v)
	feMul(&p.X, &p.X, &u) // x = uv^7

	fePow22523(&p.X, &p.X) // x = (uv^7)^((q-5)/8)
	feMul(&p.X, &p.X, &v3)
	feMul(&p.X, &p.X, &u) // x = uv^3(uv^7)^((q-5)/8)

	var tmpX, tmp2 [32]byte

	feSquare(&vxx, &p.X)
	feMul(&vxx, &vxx, &v)
	feSub(&check, &vxx, &u) // vx^2-u
	if feIsNonZero(&check) == 1 {
		feAdd(&check, &vxx, &u) // vx^2+u
		if feIsNonZero(&check) == 1 {
			return false
		}
		feMul(&p.X, &p.X, &sqrtM1)

		feToBytes(tmpX[:], &p.X)
		for i, v := range tmpX {
			tmp2[31-i] = v
		}
	}

	if feIsNegative(&p.X) == (s[31] >> 7) {
		feNeg(&p.X, &p.X)
	}

	feMul(&p.T, &p.X, &p.Y)
	return true
}

func (p *completedGroupElement) ToProjective(r *projectiveGroupElement) {
	feMul(&r.X, &p.X, &p.T)
	feMul(&r.Y, &p.Y, &p.Z)
	feMul(&r.Z, &p.Z, &p.T)
}

func (p *completedGroupElement) ToExtended(r *extendedGroupElement) {
	feMul(&r.X, &p.X, &p.T)
	feMul(&r.Y, &p.Y, &p.Z)
	feMul(&r.Z, &p.Z, &p.T)
	feMul(&r.T, &p.X, &p.Y)
}

func (p *preComputedGroupElement) Zero() {
	feOne(&p.yPlusX)
	feOne(&p.yMinusX)
	feZero(&p.xy2d)
}

func geAdd(r *completedGroupElement, p *extendedGroupElement, q *cachedGroupElement) {
	var t0 fieldElement

	feAdd(&r.X, &p.Y, &p.X)
	feSub(&r.Y, &p.Y, &p.X)
	feMul(&r.Z, &r.X, &q.yPlusX)
	feMul(&r.Y, &r.Y, &q.yMinusX)
	feMul(&r.T, &q.T2d, &p.T)
	feMul(&r.X, &p.Z, &q.Z)
	feAdd(&t0, &r.X, &r.X)
	feSub(&r.X, &r.Z, &r.Y)
	feAdd(&r.Y, &r.Z, &r.Y)
	feAdd(&r.Z, &t0, &r.T)
	feSub(&r.T, &t0, &r.T)
}

func geSub(r *completedGroupElement, p *extendedGroupElement, q *cachedGroupElement) {
	var t0 fieldElement

	feAdd(&r.X, &p.Y, &p.X)
	feSub(&r.Y, &p.Y, &p.X)
	feMul(&r.Z, &r.X, &q.yMinusX)
	feMul(&r.Y, &r.Y, &q.yPlusX)
	feMul(&r.T, &q.T2d, &p.T)
	feMul(&r.X, &p.Z, &q.Z)
	feAdd(&t0, &r.X, &r.X)
	feSub(&r.X, &r.Z, &r.Y)
	feAdd(&r.Y, &r.Z, &r.Y)
	feSub(&r.Z, &t0, &r.T)
	feAdd(&r.T, &t0, &r.T)
}

func geMixedAdd(r *completedGroupElement, p *extendedGroupElement, q *preComputedGroupElement) {
	var t0 fieldElement

	feAdd(&r.X, &p.Y, &p.X)
	feSub(&r.Y, &p.Y, &p.X)
	feMul(&r.Z, &r.X, &q.yPlusX)
	feMul(&r.Y, &r.Y, &q.yMinusX)
	feMul(&r.T, &q.xy2d, &p.T)
	feAdd(&t0, &p.Z, &p.Z)
	feSub(&r.X, &r.Z, &r.Y)
	feAdd(&r.Y, &r.Z, &r.Y)
	feAdd(&r.Z, &t0, &r.T)
	feSub(&r.T, &t0, &r.T)
}

func geMixedSub(r *completedGroupElement, p *extendedGroupElement, q *preComputedGroupElement) {
	var t0 fieldElement

	feAdd(&r.X, &p.Y, &p.X)
	feSub(&r.Y, &p.Y, &p.X)
	feMul(&r.Z, &r.X, &q.yMinusX)
	feMul(&r.Y, &r.Y, &q.yPlusX)
	feMul(&r.T, &q.xy2d, &p.T)
	feAdd(&t0, &p.Z, &p.Z)
	feSub(&r.X, &r.Z, &r.Y)
	feAdd(&r.Y, &r.Z, &r.Y)
	feSub(&r.Z, &t0, &r.T)
	feAdd(&r.T, &t0, &r.T)
}

func slide(r *[256]int8, a []byte) {
	for i := range r {
		r[i] = int8(1 & (a[i>>3] >> uint(i&7)))
	}

	for i := range r {
		if r[i] != 0 {
			for b := 1; b <= 6 && i+b < 256; b++ {
				if r[i+b] != 0 {
					if r[i]+(r[i+b]<<uint(b)) <= 15 {
						r[i] += r[i+b] << uint(b)
						r[i+b] = 0
					} else if r[i]-(r[i+b]<<uint(b)) >= -15 {
						r[i] -= r[i+b] << uint(b)
						for k := i + b; k < 256; k++ {
							if r[k] == 0 {
								r[k] = 1
								break
							}
							r[k] = 0
						}
					} else {
						break
					}
				}
			}
		}
	}
}

// geDoubleScalarMultVartime sets r = a*A + b*B
// where a = a[0]+256*a[1]+...+256^31 a[31].
// and b = b[0]+256*b[1]+...+256^31 b[31].
// B is the Ed25519 base point (x,4/5) with x positive.
func geDoubleScalarMultVartime(a []byte, A *extendedGroupElement, b []byte) *projectiveGroupElement {
	r := new(projectiveGroupElement)

	var aSlide, bSlide [256]int8
	var Ai [8]cachedGroupElement // A,3A,5A,7A,9A,11A,13A,15A
	var t completedGroupElement
	var u, A2 extendedGroupElement
	var i int

	slide(&aSlide, a)
	slide(&bSlide, b)

	A.ToCached(&Ai[0])
	A.Double(&t)
	t.ToExtended(&A2)

	for i := 0; i < 7; i++ {
		geAdd(&t, &A2, &Ai[i])
		t.ToExtended(&u)
		u.ToCached(&Ai[i+1])
	}

	r.Zero()

	for i = 255; i >= 0; i-- {
		if aSlide[i] != 0 || bSlide[i] != 0 {
			break
		}
	}

	for ; i >= 0; i-- {
		r.Double(&t)

		if aSlide[i] > 0 {
			t.ToExtended(&u)
			geAdd(&t, &u, &Ai[aSlide[i]/2])
		} else if aSlide[i] < 0 {
			t.ToExtended(&u)
			geSub(&t, &u, &Ai[(-aSlide[i])/2])
		}

		if bSlide[i] > 0 {
			t.ToExtended(&u)
			geMixedAdd(&t, &u, &bi[bSlide[i]/2])
		} else if bSlide[i] < 0 {
			t.ToExtended(&u)
			geMixedSub(&t, &u, &bi[(-bSlide[i])/2])
		}

		t.ToProjective(r)
	}
	return r
}

// equal returns 1 if b == c and 0 otherwise.
func equal(b, c int32) int32 {
	x := uint32(b ^ c)
	x--
	return int32(x >> 31)
}

func preComputedGroupElementCMove(t, u *preComputedGroupElement, b int32) {
	feCMove(&t.yPlusX, &u.yPlusX, b)
	feCMove(&t.yMinusX, &u.yMinusX, b)
	feCMove(&t.xy2d, &u.xy2d, b)
}

func selectPoint(t *preComputedGroupElement, pos int32, b int32) {
	var minusT preComputedGroupElement
	bNegative := (b >> 31) & 1
	bAbs := b - (((-bNegative) & b) << 1)

	t.Zero()
	for i := int32(0); i < 8; i++ {
		preComputedGroupElementCMove(t, &base[pos][i], equal(bAbs, i+1))
	}
	feCopy(&minusT.yPlusX, &t.yMinusX)
	feCopy(&minusT.yMinusX, &t.yPlusX)
	feNeg(&minusT.xy2d, &t.xy2d)
	preComputedGroupElementCMove(t, &minusT, bNegative)
}

// geScalarMultBase computes h = a*B, where
//   a = a[0]+256*a[1]+...+256^31 a[31]
//   B is the Ed25519 base point (x,4/5) with x positive.
//
// Preconditions:
//   a[31] <= 127
func geScalarMultBase(a []byte) *extendedGroupElement {
	h := new(extendedGroupElement)

	var e [64]int8

	for i, v := range a {
		e[2*i] = int8(v & 15)
		e[2*i+1] = int8((v >> 4) & 15)
	}

	// each e[i] is between 0 and 15 and e[63] is between 0 and 7.

	carry := int8(0)
	for i := 0; i < 63; i++ {
		e[i] += carry
		carry = (e[i] + 8) >> 4
		e[i] -= carry << 4
	}
	e[63] += carry
	// each e[i] is between -8 and 8.

	h.Zero()
	var t preComputedGroupElement
	var r completedGroupElement
	for i := int32(1); i < 64; i += 2 {
		selectPoint(&t, i/2, int32(e[i]))
		geMixedAdd(&r, h, &t)
		r.ToExtended(h)
	}

	var s projectiveGroupElement

	h.Double(&r)
	r.ToProjective(&s)
	s.Double(&r)
	r.ToProjective(&s)
	s.Double(&r)
	r.ToProjective(&s)
	s.Double(&r)
	r.ToExtended(h)

	for i := int32(0); i < 64; i += 2 {
		selectPoint(&t, i/2, int32(e[i]))
		geMixedAdd(&r, h, &t)
		r.ToExtended(h)
	}
	return h
}

// The scalars are GF(2^252 + 27742317777372353535851937790883648493).

// Input:
//   a[0]+256*a[1]+...+256^31*a[31] = a
//   b[0]+256*b[1]+...+256^31*b[31] = b
//   c[0]+256*c[1]+...+256^31*c[31] = c
//
// Output:
//   s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
//   where l = 2^252 + 27742317777372353535851937790883648493.
func scMulAdd(s, a, b, c []byte) {
	a0 := 2097151 & load3(a[:])
	a1 := 2097151 & (load4(a[2:]) >> 5)
	a2 := 2097151 & (load3(a[5:]) >> 2)
	a3 := 2097151 & (load4(a[7:]) >> 7)
	a4 := 2097151 & (load4(a[10:]) >> 4)
	a5 := 2097151 & (load3(a[13:]) >> 1)
	a6 := 2097151 & (load4(a[15:]) >> 6)
	a7 := 2097151 & (load3(a[18:]) >> 3)
	a8 := 2097151 & load3(a[21:])
	a9 := 2097151 & (load4(a[23:]) >> 5)
	a10 := 2097151 & (load3(a[26:]) >> 2)
	a11 := (load4(a[28:]) >> 7)
	b0 := 2097151 & load3(b[:])
	b1 := 2097151 & (load4(b[2:]) >> 5)
	b2 := 2097151 & (load3(b[5:]) >> 2)
	b3 := 2097151 & (load4(b[7:]) >> 7)
	b4 := 2097151 & (load4(b[10:]) >> 4)
	b5 := 2097151 & (load3(b[13:]) >> 1)
	b6 := 2097151 & (load4(b[15:]) >> 6)
	b7 := 2097151 & (load3(b[18:]) >> 3)
	b8 := 2097151 & load3(b[21:])
	b9 := 2097151 & (load4(b[23:]) >> 5)
	b10 := 2097151 & (load3(b[26:]) >> 2)
	b11 := (load4(b[28:]) >> 7)
	c0 := 2097151 & load3(c[:])
	c1 := 2097151 & (load4(c[2:]) >> 5)
	c2 := 2097151 & (load3(c[5:]) >> 2)
	c3 := 2097151 & (load4(c[7:]) >> 7)
	c4 := 2097151 & (load4(c[10:]) >> 4)
	c5 := 2097151 & (load3(c[13:]) >> 1)
	c6 := 2097151 & (load4(c[15:]) >> 6)
	c7 := 2097151 & (load3(c[18:]) >> 3)
	c8 := 2097151 & load3(c[21:])
	c9 := 2097151 & (load4(c[23:]) >> 5)
	c10 := 2097151 & (load3(c[26:]) >> 2)
	c11 := (load4(c[28:]) >> 7)
	var carry [23]int64

	s0 := c0 + a0*b0
	s1 := c1 + a0*b1 + a1*b0
	s2 := c2 + a0*b2 + a1*b1 + a2*b0
	s3 := c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0
	s4 := c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0
	s5 := c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0
	s6 := c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0
	s7 := c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0
	s8 := c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0
	s9 := c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0
	s10 := c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0
	s11 := c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0
	s12 := a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1
	s13 := a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2
	s14 := a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3
	s15 := a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4
	s16 := a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5
	s17 := a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6
	s18 := a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7
	s19 := a8*b11 + a9*b10 + a10*b9 + a11*b8
	s20 := a9*b11 + a10*b10 + a11*b9
	s21 := a10*b11 + a11*b10
	s22 := a11 * b11
	s23 := int64(0)

	carry[0] = (s0 + (1 << 20)) >> 21
	s1 += carry[0]
	s0 -= carry[0] << 21
	carry[2] = (s2 + (1 << 20)) >> 21
	s3 += carry[2]
	s2 -= carry[2] << 21
	carry[4] = (s4 + (1 << 20)) >> 21
	s5 += carry[4]
	s4 -= carry[4] << 21
	carry[6] = (s6 + (1 << 20)) >> 21
	s7 += carry[6]
	s6 -= carry[6] << 21
	carry[8] = (s8 + (1 << 20)) >> 21
	s9 += carry[8]
	s8 -= carry[8] << 21
	carry[10] = (s10 + (1 << 20)) >> 21
	s11 += carry[10]
	s10 -= carry[10] << 21
	carry[12] = (s12 + (1 << 20)) >> 21
	s13 += carry[12]
	s12 -= carry[12] << 21
	carry[14] = (s14 + (1 << 20)) >> 21
	s15 += carry[14]
	s14 -= carry[14] << 21
	carry[16] = (s16 + (1 << 20)) >> 21
	s17 += carry[16]
	s16 -= carry[16] << 21
	carry[18] = (s18 + (1 << 20)) >> 21
	s19 += carry[18]
	s18 -= carry[18] << 21
	carry[20] = (s20 + (1 << 20)) >> 21
	s21 += carry[20]
	s20 -= carry[20] << 21
	carry[22] = (s22 + (1 << 20)) >> 21
	s23 += carry[22]
	s22 -= carry[22] << 21

	carry[1] = (s1 + (1 << 20)) >> 21
	s2 += carry[1]
	s1 -= carry[1] << 21
	carry[3] = (s3 + (1 << 20)) >> 21
	s4 += carry[3]
	s3 -= carry[3] << 21
	carry[5] = (s5 + (1 << 20)) >> 21
	s6 += carry[5]
	s5 -= carry[5] << 21
	carry[7] = (s7 + (1 << 20)) >> 21
	s8 += carry[7]
	s7 -= carry[7] << 21
	carry[9] = (s9 + (1 << 20)) >> 21
	s10 += carry[9]
	s9 -= carry[9] << 21
	carry[11] = (s11 + (1 << 20)) >> 21
	s12 += carry[11]
	s11 -= carry[11] << 21
	carry[13] = (s13 + (1 << 20)) >> 21
	s14 += carry[13]
	s13 -= carry[13] << 21
	carry[15] = (s15 + (1 << 20)) >> 21
	s16 += carry[15]
	s15 -= carry[15] << 21
	carry[17] = (s17 + (1 << 20)) >> 21
	s18 += carry[17]
	s17 -= carry[17] << 21
	carry[19] = (s19 + (1 << 20)) >> 21
	s20 += carry[19]
	s19 -= carry[19] << 21
	carry[21] = (s21 + (1 << 20)) >> 21
	s22 += carry[21]
	s21 -= carry[21] << 21

	s11 += s23 * 666643
	s12 += s23 * 470296
	s13 += s23 * 654183
	s14 -= s23 * 997805
	s15 += s23 * 136657
	s16 -= s23 * 683901
	s23 = 0

	s10 += s22 * 666643
	s11 += s22 * 470296
	s12 += s22 * 654183
	s13 -= s22 * 997805
	s14 += s22 * 136657
	s15 -= s22 * 683901
	s22 = 0

	s9 += s21 * 666643
	s10 += s21 * 470296
	s11 += s21 * 654183
	s12 -= s21 * 997805
	s13 += s21 * 136657
	s14 -= s21 * 683901
	s21 = 0

	s8 += s20 * 666643
	s9 += s20 * 470296
	s10 += s20 * 654183
	s11 -= s20 * 997805
	s12 += s20 * 136657
	s13 -= s20 * 683901
	s20 = 0

	s7 += s19 * 666643
	s8 += s19 * 470296
	s9 += s19 * 654183
	s10 -= s19 * 997805
	s11 += s19 * 136657
	s12 -= s19 * 683901
	s19 = 0

	s6 += s18 * 666643
	s7 += s18 * 470296
	s8 += s18 * 654183
	s9 -= s18 * 997805
	s10 += s18 * 136657
	s11 -= s18 * 683901
	s18 = 0

	carry[6] = (s6 + (1 << 20)) >> 21
	s7 += carry[6]
	s6 -= carry[6] << 21
	carry[8] = (s8 + (1 << 20)) >> 21
	s9 += carry[8]
	s8 -= carry[8] << 21
	carry[10] = (s10 + (1 << 20)) >> 21
	s11 += carry[10]
	s10 -= carry[10] << 21
	carry[12] = (s12 + (1 << 20)) >> 21
	s13 += carry[12]
	s12 -= carry[12] << 21
	carry[14] = (s14 + (1 << 20)) >> 21
	s15 += carry[14]
	s14 -= carry[14] << 21
	carry[16] = (s16 + (1 << 20)) >> 21
	s17 += carry[16]
	s16 -= carry[16] << 21

	carry[7] = (s7 + (1 << 20)) >> 21
	s8 += carry[7]
	s7 -= carry[7] << 21
	carry[9] = (s9 + (1 << 20)) >> 21
	s10 += carry[9]
	s9 -= carry[9] << 21
	carry[11] = (s11 + (1 << 20)) >> 21
	s12 += carry[11]
	s11 -= carry[11] << 21
	carry[13] = (s13 + (1 << 20)) >> 21
	s14 += carry[13]
	s13 -= carry[13] << 21
	carry[15] = (s15 + (1 << 20)) >> 21
	s16 += carry[15]
	s15 -= carry[15] << 21

	s5 += s17 * 666643
	s6 += s17 * 470296
	s7 += s17 * 654183
	s8 -= s17 * 997805
	s9 += s17 * 136657
	s10 -= s17 * 683901
	s17 = 0

	s4 += s16 * 666643
	s5 += s16 * 470296
	s6 += s16 * 654183
	s7 -= s16 * 997805
	s8 += s16 * 136657
	s9 -= s16 * 683901
	s16 = 0

	s3 += s15 * 666643
	s4 += s15 * 470296
	s5 += s15 * 654183
	s6 -= s15 * 997805
	s7 += s15 * 136657
	s8 -= s15 * 683901
	s15 = 0

	s2 += s14 * 666643
	s3 += s14 * 470296
	s4 += s14 * 654183
	s5 -= s14 * 997805
	s6 += s14 * 136657
	s7 -= s14 * 683901
	s14 = 0

	s1 += s13 * 666643
	s2 += s13 * 470296
	s3 += s13 * 654183
	s4 -= s13 * 997805
	s5 += s13 * 136657
	s6 -= s13 * 683901
	s13 = 0

	s0 += s12 * 666643
	s1 += s12 * 470296
	s2 += s12 * 654183
	s3 -= s12 * 997805
	s4 += s12 * 136657
	s5 -= s12 * 683901
	s12 = 0

	carry[0] = (s0 + (1 << 20)) >> 21
	s1 += carry[0]
	s0 -= carry[0] << 21
	carry[2] = (s2 + (1 << 20)) >> 21
	s3 += carry[2]
	s2 -= carry[2] << 21
	carry[4] = (s4 + (1 << 20)) >> 21
	s5 += carry[4]
	s4 -= carry[4] << 21
	carry[6] = (s6 + (1 << 20)) >> 21
	s7 += carry[6]
	s6 -= carry[6] << 21
	carry[8] = (s8 + (1 << 20)) >> 21
	s9 += carry[8]
	s8 -= carry[8] << 21
	carry[10] = (s10 + (1 << 20)) >> 21
	s11 += carry[10]
	s10 -= carry[10] << 21

	carry[1] = (s1 + (1 << 20)) >> 21
	s2 += carry[1]
	s1 -= carry[1] << 21
	carry[3] = (s3 + (1 << 20)) >> 21
	s4 += carry[3]
	s3 -= carry[3] << 21
	carry[5] = (s5 + (1 << 20)) >> 21
	s6 += carry[5]
	s5 -= carry[5] << 21
	carry[7] = (s7 + (1 << 20)) >> 21
	s8 += carry[7]
	s7 -= carry[7] << 21
	carry[9] = (s9 + (1 << 20)) >> 21
	s10 += carry[9]
	s9 -= carry[9] << 21
	carry[11] = (s11 + (1 << 20)) >> 21
	s12 += carry[11]
	s11 -= carry[11] << 21

	s0 += s12 * 666643
	s1 += s12 * 470296
	s2 += s12 * 654183
	s3 -= s12 * 997805
	s4 += s12 * 136657
	s5 -= s12 * 683901
	s12 = 0

	carry[0] = s0 >> 21
	s1 += carry[0]
	s0 -= carry[0] << 21
	carry[1] = s1 >> 21
	s2 += carry[1]
	s1 -= carry[1] << 21
	carry[2] = s2 >> 21
	s3 += carry[2]
	s2 -= carry[2] << 21
	carry[3] = s3 >> 21
	s4 += carry[3]
	s3 -= carry[3] << 21
	carry[4] = s4 >> 21
	s5 += carry[4]
	s4 -= carry[4] << 21
	carry[5] = s5 >> 21
	s6 += carry[5]
	s5 -= carry[5] << 21
	carry[6] = s6 >> 21
	s7 += carry[6]
	s6 -= carry[6] << 21
	carry[7] = s7 >> 21
	s8 += carry[7]
	s7 -= carry[7] << 21
	carry[8] = s8 >> 21
	s9 += carry[8]
	s8 -= carry[8] << 21
	carry[9] = s9 >> 21
	s10 += carry[9]
	s9 -= carry[9] << 21
	carry[10] = s10 >> 21
	s11 += carry[10]
	s10 -= carry[10] << 21
	carry[11] = s11 >> 21
	s12 += carry[11]
	s11 -= carry[11] << 21

	s0 += s12 * 666643
	s1 += s12 * 470296
	s2 += s12 * 654183
	s3 -= s12 * 997805
	s4 += s12 * 136657
	s5 -= s12 * 683901
	s12 = 0

	carry[0] = s0 >> 21
	s1 += carry[0]
	s0 -= carry[0] << 21
	carry[1] = s1 >> 21
	s2 += carry[1]
	s1 -= carry[1] << 21
	carry[2] = s2 >> 21
	s3 += carry[2]
	s2 -= carry[2] << 21
	carry[3] = s3 >> 21
	s4 += carry[3]
	s3 -= carry[3] << 21
	carry[4] = s4 >> 21
	s5 += carry[4]
	s4 -= carry[4] << 21
	carry[5] = s5 >> 21
	s6 += carry[5]
	s5 -= carry[5] << 21
	carry[6] = s6 >> 21
	s7 += carry[6]
	s6 -= carry[6] << 21
	carry[7] = s7 >> 21
	s8 += carry[7]
	s7 -= carry[7] << 21
	carry[8] = s8 >> 21
	s9 += carry[8]
	s8 -= carry[8] << 21
	carry[9] = s9 >> 21
	s10 += carry[9]
	s9 -= carry[9] << 21
	carry[10] = s10 >> 21
	s11 += carry[10]
	s10 -= carry[10] << 21

	s[0] = byte(s0 >> 0)
	s[1] = byte(s0 >> 8)
	s[2] = byte((s0 >> 16) | (s1 << 5))
	s[3] = byte(s1 >> 3)
	s[4] = byte(s1 >> 11)
	s[5] = byte((s1 >> 19) | (s2 << 2))
	s[6] = byte(s2 >> 6)
	s[7] = byte((s2 >> 14) | (s3 << 7))
	s[8] = byte(s3 >> 1)
	s[9] = byte(s3 >> 9)
	s[10] = byte((s3 >> 17) | (s4 << 4))
	s[11] = byte(s4 >> 4)
	s[12] = byte(s4 >> 12)
	s[13] = byte((s4 >> 20) | (s5 << 1))
	s[14] = byte(s5 >> 7)
	s[15] = byte((s5 >> 15) | (s6 << 6))
	s[16] = byte(s6 >> 2)
	s[17] = byte(s6 >> 10)
	s[18] = byte((s6 >> 18) | (s7 << 3))
	s[19] = byte(s7 >> 5)
	s[20] = byte(s7 >> 13)
	s[21] = byte(s8 >> 0)
	s[22] = byte(s8 >> 8)
	s[23] = byte((s8 >> 16) | (s9 << 5))
	s[24] = byte(s9 >> 3)
	s[25] = byte(s9 >> 11)
	s[26] = byte((s9 >> 19) | (s10 << 2))
	s[27] = byte(s10 >> 6)
	s[28] = byte((s10 >> 14) | (s11 << 7))
	s[29] = byte(s11 >> 1)
	s[30] = byte(s11 >> 9)
	s[31] = byte(s11 >> 17)
}

// Input:
//   s[0]+256*s[1]+...+256^63*s[63] = s
//
// Output:
//   s[0]+256*s[1]+...+256^31*s[31] = s mod l
//   where l = 2^252 + 27742317777372353535851937790883648493.
func scReduce(s []byte) []byte {
	s0 := 2097151 & load3(s[:])
	s1 := 2097151 & (load4(s[2:]) >> 5)
	s2 := 2097151 & (load3(s[5:]) >> 2)
	s3 := 2097151 & (load4(s[7:]) >> 7)
	s4 := 2097151 & (load4(s[10:]) >> 4)
	s5 := 2097151 & (load3(s[13:]) >> 1)
	s6 := 2097151 & (load4(s[15:]) >> 6)
	s7 := 2097151 & (load3(s[18:]) >> 3)
	s8 := 2097151 & load3(s[21:])
	s9 := 2097151 & (load4(s[23:]) >> 5)
	s10 := 2097151 & (load3(s[26:]) >> 2)
	s11 := 2097151 & (load4(s[28:]) >> 7)
	s12 := 2097151 & (load4(s[31:]) >> 4)
	s13 := 2097151 & (load3(s[34:]) >> 1)
	s14 := 2097151 & (load4(s[36:]) >> 6)
	s15 := 2097151 & (load3(s[39:]) >> 3)
	s16 := 2097151 & load3(s[42:])
	s17 := 2097151 & (load4(s[44:]) >> 5)
	s18 := 2097151 & (load3(s[47:]) >> 2)
	s19 := 2097151 & (load4(s[49:]) >> 7)
	s20 := 2097151 & (load4(s[52:]) >> 4)
	s21 := 2097151 & (load3(s[55:]) >> 1)
	s22 := 2097151 & (load4(s[57:]) >> 6)
	s23 := (load4(s[60:]) >> 3)

	s11 += s23 * 666643
	s12 += s23 * 470296
	s13 += s23 * 654183
	s14 -= s23 * 997805
	s15 += s23 * 136657
	s16 -= s23 * 683901
	s23 = 0

	s10 += s22 * 666643
	s11 += s22 * 470296
	s12 += s22 * 654183
	s13 -= s22 * 997805
	s14 += s22 * 136657
	s15 -= s22 * 683901
	s22 = 0

	s9 += s21 * 666643
	s10 += s21 * 470296
	s11 += s21 * 654183
	s12 -= s21 * 997805
	s13 += s21 * 136657
	s14 -= s21 * 683901
	s21 = 0

	s8 += s20 * 666643
	s9 += s20 * 470296
	s10 += s20 * 654183
	s11 -= s20 * 997805
	s12 += s20 * 136657
	s13 -= s20 * 683901
	s20 = 0

	s7 += s19 * 666643
	s8 += s19 * 470296
	s9 += s19 * 654183
	s10 -= s19 * 997805
	s11 += s19 * 136657
	s12 -= s19 * 683901
	s19 = 0

	s6 += s18 * 666643
	s7 += s18 * 470296
	s8 += s18 * 654183
	s9 -= s18 * 997805
	s10 += s18 * 136657
	s11 -= s18 * 683901
	s18 = 0

	var carry [17]int64

	carry[6] = (s6 + (1 << 20)) >> 21
	s7 += carry[6]
	s6 -= carry[6] << 21
	carry[8] = (s8 + (1 << 20)) >> 21
	s9 += carry[8]
	s8 -= carry[8] << 21
	carry[10] = (s10 + (1 << 20)) >> 21
	s11 += carry[10]
	s10 -= carry[10] << 21
	carry[12] = (s12 + (1 << 20)) >> 21
	s13 += carry[12]
	s12 -= carry[12] << 21
	carry[14] = (s14 + (1 << 20)) >> 21
	s15 += carry[14]
	s14 -= carry[14] << 21
	carry[16] = (s16 + (1 << 20)) >> 21
	s17 += carry[16]
	s16 -= carry[16] << 21

	carry[7] = (s7 + (1 << 20)) >> 21
	s8 += carry[7]
	s7 -= carry[7] << 21
	carry[9] = (s9 + (1 << 20)) >> 21
	s10 += carry[9]
	s9 -= carry[9] << 21
	carry[11] = (s11 + (1 << 20)) >> 21
	s12 += carry[11]
	s11 -= carry[11] << 21
	carry[13] = (s13 + (1 << 20)) >> 21
	s14 += carry[13]
	s13 -= carry[13] << 21
	carry[15] = (s15 + (1 << 20)) >> 21
	s16 += carry[15]
	s15 -= carry[15] << 21

	s5 += s17 * 666643
	s6 += s17 * 470296
	s7 += s17 * 654183
	s8 -= s17 * 997805
	s9 += s17 * 136657
	s10 -= s17 * 683901
	s17 = 0

	s4 += s16 * 666643
	s5 += s16 * 470296
	s6 += s16 * 654183
	s7 -= s16 * 997805
	s8 += s16 * 136657
	s9 -= s16 * 683901
	s16 = 0

	s3 += s15 * 666643
	s4 += s15 * 470296
	s5 += s15 * 654183
	s6 -= s15 * 997805
	s7 += s15 * 136657
	s8 -= s15 * 683901
	s15 = 0

	s2 += s14 * 666643
	s3 += s14 * 470296
	s4 += s14 * 654183
	s5 -= s14 * 997805
	s6 += s14 * 136657
	s7 -= s14 * 683901
	s14 = 0

	s1 += s13 * 666643
	s2 += s13 * 470296
	s3 += s13 * 654183
	s4 -= s13 * 997805
	s5 += s13 * 136657
	s6 -= s13 * 683901
	s13 = 0

	s0 += s12 * 666643
	s1 += s12 * 470296
	s2 += s12 * 654183
	s3 -= s12 * 997805
	s4 += s12 * 136657
	s5 -= s12 * 683901
	s12 = 0

	carry[0] = (s0 + (1 << 20)) >> 21
	s1 += carry[0]
	s0 -= carry[0] << 21
	carry[2] = (s2 + (1 << 20)) >> 21
	s3 += carry[2]
	s2 -= carry[2] << 21
	carry[4] = (s4 + (1 << 20)) >> 21
	s5 += carry[4]
	s4 -= carry[4] << 21
	carry[6] = (s6 + (1 << 20)) >> 21
	s7 += carry[6]
	s6 -= carry[6] << 21
	carry[8] = (s8 + (1 << 20)) >> 21
	s9 += carry[8]
	s8 -= carry[8] << 21
	carry[10] = (s10 + (1 << 20)) >> 21
	s11 += carry[10]
	s10 -= carry[10] << 21

	carry[1] = (s1 + (1 << 20)) >> 21
	s2 += carry[1]
	s1 -= carry[1] << 21
	carry[3] = (s3 + (1 << 20)) >> 21
	s4 += carry[3]
	s3 -= carry[3] << 21
	carry[5] = (s5 + (1 << 20)) >> 21
	s6 += carry[5]
	s5 -= carry[5] << 21
	carry[7] = (s7 + (1 << 20)) >> 21
	s8 += carry[7]
	s7 -= carry[7] << 21
	carry[9] = (s9 + (1 << 20)) >> 21
	s10 += carry[9]
	s9 -= carry[9] << 21
	carry[11] = (s11 + (1 << 20)) >> 21
	s12 += carry[11]
	s11 -= carry[11] << 21

	s0 += s12 * 666643
	s1 += s12 * 470296
	s2 += s12 * 654183
	s3 -= s12 * 997805
	s4 += s12 * 136657
	s5 -= s12 * 683901
	s12 = 0

	carry[0] = s0 >> 21
	s1 += carry[0]
	s0 -= carry[0] << 21
	carry[1] = s1 >> 21
	s2 += carry[1]
	s1 -= carry[1] << 21
	carry[2] = s2 >> 21
	s3 += carry[2]
	s2 -= carry[2] << 21
	carry[3] = s3 >> 21
	s4 += carry[3]
	s3 -= carry[3] << 21
	carry[4] = s4 >> 21
	s5 += carry[4]
	s4 -= carry[4] << 21
	carry[5] = s5 >> 21
	s6 += carry[5]
	s5 -= carry[5] << 21
	carry[6] = s6 >> 21
	s7 += carry[6]
	s6 -= carry[6] << 21
	carry[7] = s7 >> 21
	s8 += carry[7]
	s7 -= carry[7] << 21
	carry[8] = s8 >> 21
	s9 += carry[8]
	s8 -= carry[8] << 21
	carry[9] = s9 >> 21
	s10 += carry[9]
	s9 -= carry[9] << 21
	carry[10] = s10 >> 21
	s11 += carry[10]
	s10 -= carry[10] << 21
	carry[11] = s11 >> 21
	s12 += carry[11]
	s11 -= carry[11] << 21

	s0 += s12 * 666643
	s1 += s12 * 470296
	s2 += s12 * 654183
	s3 -= s12 * 997805
	s4 += s12 * 136657
	s5 -= s12 * 683901
	s12 = 0

	carry[0] = s0 >> 21
	s1 += carry[0]
	s0 -= carry[0] << 21
	carry[1] = s1 >> 21
	s2 += carry[1]
	s1 -= carry[1] << 21
	carry[2] = s2 >> 21
	s3 += carry[2]
	s2 -= carry[2] << 21
	carry[3] = s3 >> 21
	s4 += carry[3]
	s3 -= carry[3] << 21
	carry[4] = s4 >> 21
	s5 += carry[4]
	s4 -= carry[4] << 21
	carry[5] = s5 >> 21
	s6 += carry[5]
	s5 -= carry[5] << 21
	carry[6] = s6 >> 21
	s7 += carry[6]
	s6 -= carry[6] << 21
	carry[7] = s7 >> 21
	s8 += carry[7]
	s7 -= carry[7] << 21
	carry[8] = s8 >> 21
	s9 += carry[8]
	s8 -= carry[8] << 21
	carry[9] = s9 >> 21
	s10 += carry[9]
	s9 -= carry[9] << 21
	carry[10] = s10 >> 21
	s11 += carry[10]
	s10 -= carry[10] << 21

	out := make([]byte, 32)
	out[0] = byte(s0 >> 0)
	out[1] = byte(s0 >> 8)
	out[2] = byte((s0 >> 16) | (s1 << 5))
	out[3] = byte(s1 >> 3)
	out[4] = byte(s1 >> 11)
	out[5] = byte((s1 >> 19) | (s2 << 2))
	out[6] = byte(s2 >> 6)
	out[7] = byte((s2 >> 14) | (s3 << 7))
	out[8] = byte(s3 >> 1)
	out[9] = byte(s3 >> 9)
	out[10] = byte((s3 >> 17) | (s4 << 4))
	out[11] = byte(s4 >> 4)
	out[12] = byte(s4 >> 12)
	out[13] = byte((s4 >> 20) | (s5 << 1))
	out[14] = byte(s5 >> 7)
	out[15] = byte((s5 >> 15) | (s6 << 6))
	out[16] = byte(s6 >> 2)
	out[17] = byte(s6 >> 10)
	out[18] = byte((s6 >> 18) | (s7 << 3))
	out[19] = byte(s7 >> 5)
	out[20] = byte(s7 >> 13)
	out[21] = byte(s8 >> 0)
	out[22] = byte(s8 >> 8)
	out[23] = byte((s8 >> 16) | (s9 << 5))
	out[24] = byte(s9 >> 3)
	out[25] = byte(s9 >> 11)
	out[26] = byte((s9 >> 19) | (s10 << 2))
	out[27] = byte(s10 >> 6)
	out[28] = byte((s10 >> 14) | (s11 << 7))
	out[29] = byte(s11 >> 1)
	out[30] = byte(s11 >> 9)
	out[31] = byte(s11 >> 17)
	return out
}
