1// Copyright 2020 ConsenSys Software Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include "textflag.h" 16#include "funcdata.h" 17 18// modulus q 19DATA q<>+0(SB)/8, $0x8508c00000000001 20DATA q<>+8(SB)/8, $0x170b5d4430000000 21DATA q<>+16(SB)/8, $0x1ef3622fba094800 22DATA q<>+24(SB)/8, $0x1a22d9f300f5138f 23DATA q<>+32(SB)/8, $0xc63b05c06ca1493b 24DATA q<>+40(SB)/8, $0x01ae3a4617c510ea 25GLOBL q<>(SB), (RODATA+NOPTR), $48 26 27// qInv0 q'[0] 28DATA qInv0<>(SB)/8, $0x8508bfffffffffff 29GLOBL qInv0<>(SB), (RODATA+NOPTR), $8 30 31#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \ 32 MOVQ ra0, rb0; \ 33 SUBQ q<>(SB), ra0; \ 34 MOVQ ra1, rb1; \ 35 SBBQ q<>+8(SB), ra1; \ 36 MOVQ ra2, rb2; \ 37 SBBQ q<>+16(SB), ra2; \ 38 MOVQ ra3, rb3; \ 39 SBBQ q<>+24(SB), ra3; \ 40 MOVQ ra4, rb4; \ 41 SBBQ q<>+32(SB), ra4; \ 42 MOVQ ra5, rb5; \ 43 SBBQ q<>+40(SB), ra5; \ 44 CMOVQCS rb0, ra0; \ 45 CMOVQCS rb1, ra1; \ 46 CMOVQCS rb2, ra2; \ 47 CMOVQCS rb3, ra3; \ 48 CMOVQCS rb4, ra4; \ 49 CMOVQCS rb5, ra5; \ 50 51// add(res, x, y *Element) 52TEXT ·add(SB), NOSPLIT, $0-24 53 MOVQ x+8(FP), AX 54 MOVQ 0(AX), CX 55 MOVQ 8(AX), BX 56 MOVQ 16(AX), SI 57 MOVQ 24(AX), DI 58 MOVQ 32(AX), R8 59 MOVQ 40(AX), R9 60 MOVQ y+16(FP), DX 61 ADDQ 0(DX), CX 62 ADCQ 8(DX), BX 63 ADCQ 16(DX), SI 64 ADCQ 24(DX), DI 65 ADCQ 32(DX), R8 66 ADCQ 40(DX), R9 67 68 // reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15) 69 REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15) 70 71 MOVQ res+0(FP), AX 72 MOVQ CX, 0(AX) 73 MOVQ BX, 8(AX) 74 MOVQ SI, 16(AX) 75 MOVQ DI, 24(AX) 76 MOVQ R8, 32(AX) 77 MOVQ R9, 40(AX) 78 RET 79 80// sub(res, x, y *Element) 81TEXT ·sub(SB), NOSPLIT, $0-24 82 XORQ R9, R9 83 MOVQ x+8(FP), R8 84 MOVQ 0(R8), AX 85 MOVQ 8(R8), DX 86 MOVQ 16(R8), CX 87 MOVQ 24(R8), BX 88 MOVQ 32(R8), SI 89 MOVQ 40(R8), DI 90 MOVQ y+16(FP), R8 91 SUBQ 0(R8), AX 92 SBBQ 8(R8), DX 93 SBBQ 16(R8), CX 94 SBBQ 24(R8), BX 95 SBBQ 32(R8), SI 96 SBBQ 40(R8), DI 97 MOVQ $0x8508c00000000001, R10 98 MOVQ $0x170b5d4430000000, R11 99 MOVQ $0x1ef3622fba094800, R12 100 MOVQ $0x1a22d9f300f5138f, R13 101 MOVQ $0xc63b05c06ca1493b, R14 102 MOVQ $0x01ae3a4617c510ea, R15 103 CMOVQCC R9, R10 104 CMOVQCC R9, R11 105 CMOVQCC R9, R12 106 CMOVQCC R9, R13 107 CMOVQCC R9, R14 108 CMOVQCC R9, R15 109 ADDQ R10, AX 110 ADCQ R11, DX 111 ADCQ R12, CX 112 ADCQ R13, BX 113 ADCQ R14, SI 114 ADCQ R15, DI 115 MOVQ res+0(FP), R8 116 MOVQ AX, 0(R8) 117 MOVQ DX, 8(R8) 118 MOVQ CX, 16(R8) 119 MOVQ BX, 24(R8) 120 MOVQ SI, 32(R8) 121 MOVQ DI, 40(R8) 122 RET 123 124// double(res, x *Element) 125TEXT ·double(SB), NOSPLIT, $0-16 126 MOVQ x+8(FP), AX 127 MOVQ 0(AX), DX 128 MOVQ 8(AX), CX 129 MOVQ 16(AX), BX 130 MOVQ 24(AX), SI 131 MOVQ 32(AX), DI 132 MOVQ 40(AX), R8 133 ADDQ DX, DX 134 ADCQ CX, CX 135 ADCQ BX, BX 136 ADCQ SI, SI 137 ADCQ DI, DI 138 ADCQ R8, R8 139 140 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 141 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 142 143 MOVQ res+0(FP), R15 144 MOVQ DX, 0(R15) 145 MOVQ CX, 8(R15) 146 MOVQ BX, 16(R15) 147 MOVQ SI, 24(R15) 148 MOVQ DI, 32(R15) 149 MOVQ R8, 40(R15) 150 RET 151 152// neg(res, x *Element) 153TEXT ·neg(SB), NOSPLIT, $0-16 154 MOVQ res+0(FP), R9 155 MOVQ x+8(FP), AX 156 MOVQ 0(AX), DX 157 MOVQ 8(AX), CX 158 MOVQ 16(AX), BX 159 MOVQ 24(AX), SI 160 MOVQ 32(AX), DI 161 MOVQ 40(AX), R8 162 MOVQ DX, AX 163 ORQ CX, AX 164 ORQ BX, AX 165 ORQ SI, AX 166 ORQ DI, AX 167 ORQ R8, AX 168 TESTQ AX, AX 169 JEQ l1 170 MOVQ $0x8508c00000000001, R10 171 SUBQ DX, R10 172 MOVQ R10, 0(R9) 173 MOVQ $0x170b5d4430000000, R10 174 SBBQ CX, R10 175 MOVQ R10, 8(R9) 176 MOVQ $0x1ef3622fba094800, R10 177 SBBQ BX, R10 178 MOVQ R10, 16(R9) 179 MOVQ $0x1a22d9f300f5138f, R10 180 SBBQ SI, R10 181 MOVQ R10, 24(R9) 182 MOVQ $0xc63b05c06ca1493b, R10 183 SBBQ DI, R10 184 MOVQ R10, 32(R9) 185 MOVQ $0x01ae3a4617c510ea, R10 186 SBBQ R8, R10 187 MOVQ R10, 40(R9) 188 RET 189 190l1: 191 MOVQ AX, 0(R9) 192 MOVQ AX, 8(R9) 193 MOVQ AX, 16(R9) 194 MOVQ AX, 24(R9) 195 MOVQ AX, 32(R9) 196 MOVQ AX, 40(R9) 197 RET 198 199TEXT ·reduce(SB), NOSPLIT, $0-8 200 MOVQ res+0(FP), AX 201 MOVQ 0(AX), DX 202 MOVQ 8(AX), CX 203 MOVQ 16(AX), BX 204 MOVQ 24(AX), SI 205 MOVQ 32(AX), DI 206 MOVQ 40(AX), R8 207 208 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 209 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 210 211 MOVQ DX, 0(AX) 212 MOVQ CX, 8(AX) 213 MOVQ BX, 16(AX) 214 MOVQ SI, 24(AX) 215 MOVQ DI, 32(AX) 216 MOVQ R8, 40(AX) 217 RET 218 219// MulBy3(x *Element) 220TEXT ·MulBy3(SB), NOSPLIT, $0-8 221 MOVQ x+0(FP), AX 222 MOVQ 0(AX), DX 223 MOVQ 8(AX), CX 224 MOVQ 16(AX), BX 225 MOVQ 24(AX), SI 226 MOVQ 32(AX), DI 227 MOVQ 40(AX), R8 228 ADDQ DX, DX 229 ADCQ CX, CX 230 ADCQ BX, BX 231 ADCQ SI, SI 232 ADCQ DI, DI 233 ADCQ R8, R8 234 235 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 236 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 237 238 ADDQ 0(AX), DX 239 ADCQ 8(AX), CX 240 ADCQ 16(AX), BX 241 ADCQ 24(AX), SI 242 ADCQ 32(AX), DI 243 ADCQ 40(AX), R8 244 245 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13) 246 REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13) 247 248 MOVQ DX, 0(AX) 249 MOVQ CX, 8(AX) 250 MOVQ BX, 16(AX) 251 MOVQ SI, 24(AX) 252 MOVQ DI, 32(AX) 253 MOVQ R8, 40(AX) 254 RET 255 256// MulBy5(x *Element) 257TEXT ·MulBy5(SB), NOSPLIT, $0-8 258 MOVQ x+0(FP), AX 259 MOVQ 0(AX), DX 260 MOVQ 8(AX), CX 261 MOVQ 16(AX), BX 262 MOVQ 24(AX), SI 263 MOVQ 32(AX), DI 264 MOVQ 40(AX), R8 265 ADDQ DX, DX 266 ADCQ CX, CX 267 ADCQ BX, BX 268 ADCQ SI, SI 269 ADCQ DI, DI 270 ADCQ R8, R8 271 272 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 273 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 274 275 ADDQ DX, DX 276 ADCQ CX, CX 277 ADCQ BX, BX 278 ADCQ SI, SI 279 ADCQ DI, DI 280 ADCQ R8, R8 281 282 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13) 283 REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13) 284 285 ADDQ 0(AX), DX 286 ADCQ 8(AX), CX 287 ADCQ 16(AX), BX 288 ADCQ 24(AX), SI 289 ADCQ 32(AX), DI 290 ADCQ 40(AX), R8 291 292 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R14,R15,R9,R10,R11,R12) 293 REDUCE(DX,CX,BX,SI,DI,R8,R14,R15,R9,R10,R11,R12) 294 295 MOVQ DX, 0(AX) 296 MOVQ CX, 8(AX) 297 MOVQ BX, 16(AX) 298 MOVQ SI, 24(AX) 299 MOVQ DI, 32(AX) 300 MOVQ R8, 40(AX) 301 RET 302 303// MulBy13(x *Element) 304TEXT ·MulBy13(SB), $40-8 305 MOVQ x+0(FP), AX 306 MOVQ 0(AX), DX 307 MOVQ 8(AX), CX 308 MOVQ 16(AX), BX 309 MOVQ 24(AX), SI 310 MOVQ 32(AX), DI 311 MOVQ 40(AX), R8 312 ADDQ DX, DX 313 ADCQ CX, CX 314 ADCQ BX, BX 315 ADCQ SI, SI 316 ADCQ DI, DI 317 ADCQ R8, R8 318 319 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 320 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 321 322 ADDQ DX, DX 323 ADCQ CX, CX 324 ADCQ BX, BX 325 ADCQ SI, SI 326 ADCQ DI, DI 327 ADCQ R8, R8 328 329 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP)) 330 REDUCE(DX,CX,BX,SI,DI,R8,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP)) 331 332 MOVQ DX, R15 333 MOVQ CX, s0-8(SP) 334 MOVQ BX, s1-16(SP) 335 MOVQ SI, s2-24(SP) 336 MOVQ DI, s3-32(SP) 337 MOVQ R8, s4-40(SP) 338 ADDQ DX, DX 339 ADCQ CX, CX 340 ADCQ BX, BX 341 ADCQ SI, SI 342 ADCQ DI, DI 343 ADCQ R8, R8 344 345 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 346 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 347 348 ADDQ R15, DX 349 ADCQ s0-8(SP), CX 350 ADCQ s1-16(SP), BX 351 ADCQ s2-24(SP), SI 352 ADCQ s3-32(SP), DI 353 ADCQ s4-40(SP), R8 354 355 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 356 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 357 358 ADDQ 0(AX), DX 359 ADCQ 8(AX), CX 360 ADCQ 16(AX), BX 361 ADCQ 24(AX), SI 362 ADCQ 32(AX), DI 363 ADCQ 40(AX), R8 364 365 // reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14) 366 REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) 367 368 MOVQ DX, 0(AX) 369 MOVQ CX, 8(AX) 370 MOVQ BX, 16(AX) 371 MOVQ SI, 24(AX) 372 MOVQ DI, 32(AX) 373 MOVQ R8, 40(AX) 374 RET 375