1// Copyright 2020 ConsenSys Software Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package amd64 16 17import ( 18 "io" 19 20 "github.com/consensys/bavard" 21 ramd64 "github.com/consensys/bavard/amd64" 22 "github.com/consensys/gnark-crypto/ecc" 23 "github.com/consensys/gnark-crypto/field" 24 "github.com/consensys/gnark-crypto/field/asm/amd64" 25 "github.com/consensys/gnark-crypto/internal/generator/config" 26) 27 28// Fq2Amd64 ... 29type Fq2Amd64 struct { 30 *amd64.FFAmd64 31 config config.Curve 32 w io.Writer 33 F *field.Field 34} 35 36// NewFq2Amd64 ... 37func NewFq2Amd64(w io.Writer, F *field.Field, config config.Curve) *Fq2Amd64 { 38 return &Fq2Amd64{ 39 amd64.NewFFAmd64(w, F), 40 config, 41 w, 42 F, 43 } 44} 45 46// Generate ... 47func (fq2 *Fq2Amd64) Generate(forceADXCheck bool) error { 48 fq2.WriteLn(bavard.Apache2Header("ConsenSys Software Inc.", 2020)) 49 50 fq2.WriteLn("#include \"textflag.h\"") 51 fq2.WriteLn("#include \"funcdata.h\"") 52 53 fq2.GenerateDefines() 54 if fq2.config.ID() == ecc.BN254 { 55 fq2.generateMulDefine() 56 } 57 58 fq2.generateAddE2() 59 fq2.generateDoubleE2() 60 fq2.generateSubE2() 61 fq2.generateNegE2() 62 63 switch fq2.config.ID() { 64 case ecc.BN254: 65 fq2.generateMulByNonResidueE2BN254() 66 fq2.generateMulE2BN254(forceADXCheck) 67 fq2.generateSquareE2BN254(forceADXCheck) 68 case ecc.BLS12_381: 69 fq2.generateMulByNonResidueE2BLS381() 70 fq2.generateSquareE2BLS381(forceADXCheck) 71 fq2.generateMulE2BLS381(forceADXCheck) 72 } 73 74 return nil 75} 76 77func (fq2 *Fq2Amd64) generateAddE2() { 78 registers := fq2.FnHeader("addE2", 0, 24) 79 80 // registers 81 x := registers.Pop() 82 y := registers.Pop() 83 r := registers.Pop() 84 t := registers.PopN(fq2.NbWords) 85 86 fq2.MOVQ("x+8(FP)", x) 87 88 // move t = x 89 fq2.Mov(x, t) 90 91 fq2.MOVQ("y+16(FP)", y) 92 93 // t = t + y = x + y 94 fq2.Add(y, t) 95 96 // reduce 97 fq2.Reduce(®isters, t) 98 fq2.MOVQ("res+0(FP)", r) 99 fq2.Mov(t, r) 100 101 // move x+offset(fq2.NbWords) into t 102 fq2.Mov(x, t, fq2.NbWords) 103 104 // add y+offset(fq2.NbWords) into t 105 fq2.Add(y, t, fq2.NbWords) 106 107 // reduce t into r with offset fq2.NbWords 108 fq2.Reduce(®isters, t) 109 fq2.Mov(t, r, 0, fq2.NbWords) 110 111 fq2.RET() 112 113} 114 115func (fq2 *Fq2Amd64) generateDoubleE2() { 116 // func header 117 registers := fq2.FnHeader("doubleE2", 0, 16) 118 119 // registers 120 x := registers.Pop() 121 r := registers.Pop() 122 t := registers.PopN(fq2.NbWords) 123 124 fq2.MOVQ("res+0(FP)", r) 125 fq2.MOVQ("x+8(FP)", x) 126 127 fq2.Mov(x, t) 128 fq2.Add(t, t) 129 fq2.Reduce(®isters, t) 130 fq2.Mov(t, r) 131 fq2.Mov(x, t, fq2.NbWords) 132 fq2.Add(t, t) 133 fq2.Reduce(®isters, t) 134 fq2.Mov(t, r, 0, fq2.NbWords) 135 136 fq2.RET() 137} 138 139func (fq2 *Fq2Amd64) generateNegE2() { 140 registers := fq2.FnHeader("negE2", 0, 16) 141 142 nonZeroA := fq2.NewLabel() 143 nonZeroB := fq2.NewLabel() 144 B := fq2.NewLabel() 145 146 // registers 147 x := registers.Pop() 148 r := registers.Pop() 149 q := registers.Pop() 150 t := registers.PopN(fq2.NbWords) 151 152 fq2.MOVQ("res+0(FP)", r) 153 fq2.MOVQ("x+8(FP)", x) 154 155 // t = x 156 fq2.Mov(x, t) 157 158 // x = t[0] | ... | t[n] 159 fq2.MOVQ(t[0], x) 160 for i := 1; i < fq2.NbWords; i++ { 161 fq2.ORQ(t[i], x) 162 } 163 164 fq2.TESTQ(x, x) 165 166 // if x != 0, we jump to nonzero label 167 fq2.JNE(nonZeroA) 168 169 // if x == 0, we set the result to zero and continue 170 for i := 0; i < fq2.NbWords; i++ { 171 fq2.MOVQ(x, r.At(i+fq2.NbWords)) 172 } 173 fq2.JMP(B) 174 175 fq2.LABEL(nonZeroA) 176 177 // z = x - q 178 for i := 0; i < fq2.NbWords; i++ { 179 fq2.MOVQ(fq2.Q[i], q) 180 if i == 0 { 181 fq2.SUBQ(t[i], q) 182 } else { 183 fq2.SBBQ(t[i], q) 184 } 185 fq2.MOVQ(q, r.At(i)) 186 } 187 188 fq2.LABEL(B) 189 fq2.MOVQ("x+8(FP)", x) 190 fq2.Mov(x, t, fq2.NbWords) 191 192 // x = t[0] | ... | t[n] 193 fq2.MOVQ(t[0], x) 194 for i := 1; i < fq2.NbWords; i++ { 195 fq2.ORQ(t[i], x) 196 } 197 198 fq2.TESTQ(x, x) 199 200 // if x != 0, we jump to nonzero label 201 fq2.JNE(nonZeroB) 202 203 // if x == 0, we set the result to zero and return 204 for i := 0; i < fq2.NbWords; i++ { 205 fq2.MOVQ(x, r.At(i+fq2.NbWords)) 206 } 207 fq2.RET() 208 209 fq2.LABEL(nonZeroB) 210 211 // z = x - q 212 for i := 0; i < fq2.NbWords; i++ { 213 fq2.MOVQ(fq2.Q[i], q) 214 if i == 0 { 215 fq2.SUBQ(t[i], q) 216 } else { 217 fq2.SBBQ(t[i], q) 218 } 219 fq2.MOVQ(q, r.At(i+fq2.NbWords)) 220 } 221 222 fq2.RET() 223 224} 225 226func (fq2 *Fq2Amd64) generateSubE2() { 227 registers := fq2.FnHeader("subE2", 0, 24) 228 229 // registers 230 t := registers.PopN(fq2.NbWords) 231 xy := registers.Pop() 232 233 zero := registers.Pop() 234 fq2.XORQ(zero, zero) 235 236 fq2.MOVQ("x+8(FP)", xy) 237 fq2.Mov(xy, t) 238 239 // z = x - y mod q 240 // move t = x 241 fq2.MOVQ("y+16(FP)", xy) 242 fq2.Sub(xy, t) 243 fq2.MOVQ("x+8(FP)", xy) 244 245 fq2.modReduceAfterSub(®isters, zero, t) 246 247 r := registers.Pop() 248 fq2.MOVQ("res+0(FP)", r) 249 fq2.Mov(t, r) 250 registers.Push(r) 251 252 fq2.Mov(xy, t, fq2.NbWords) 253 254 // z = x - y mod q 255 // move t = x 256 fq2.MOVQ("y+16(FP)", xy) 257 fq2.Sub(xy, t, fq2.NbWords) 258 259 fq2.modReduceAfterSub(®isters, zero, t) 260 261 r = xy 262 fq2.MOVQ("res+0(FP)", r) 263 264 fq2.Mov(t, r, 0, fq2.NbWords) 265 266 fq2.RET() 267 268} 269 270func (fq2 *Fq2Amd64) modReduceAfterSub(registers *ramd64.Registers, zero ramd64.Register, t []ramd64.Register) { 271 q := registers.PopN(fq2.NbWords) 272 fq2.modReduceAfterSubScratch(zero, t, q) 273 registers.Push(q...) 274} 275 276func (fq2 *Fq2Amd64) modReduceAfterSubScratch(zero ramd64.Register, t, scratch []ramd64.Register) { 277 fq2.Mov(fq2.Q, scratch) 278 for i := 0; i < fq2.NbWords; i++ { 279 fq2.CMOVQCC(zero, scratch[i]) 280 } 281 // add registers (q or 0) to t, and set to result 282 fq2.Add(scratch, t) 283} 284