1// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. 2 3// +build !appengine 4// +build !noasm 5// +build gc 6 7#include "textflag.h" 8 9// func encodeBlockAsm(dst []byte, src []byte) int 10// Requires: SSE2 11TEXT ·encodeBlockAsm(SB), $65560-56 12 MOVQ dst_base+0(FP), AX 13 MOVQ $0x00000200, CX 14 LEAQ 24(SP), DX 15 PXOR X0, X0 16 17zero_loop_encodeBlockAsm: 18 MOVOU X0, (DX) 19 MOVOU X0, 16(DX) 20 MOVOU X0, 32(DX) 21 MOVOU X0, 48(DX) 22 MOVOU X0, 64(DX) 23 MOVOU X0, 80(DX) 24 MOVOU X0, 96(DX) 25 MOVOU X0, 112(DX) 26 ADDQ $0x80, DX 27 DECQ CX 28 JNZ zero_loop_encodeBlockAsm 29 MOVL $0x00000000, 12(SP) 30 MOVQ src_len+32(FP), CX 31 LEAQ -9(CX), DX 32 LEAQ -8(CX), SI 33 MOVL SI, 8(SP) 34 SHRQ $0x05, CX 35 SUBL CX, DX 36 LEAQ (AX)(DX*1), DX 37 MOVQ DX, (SP) 38 MOVL $0x00000001, CX 39 MOVL CX, 16(SP) 40 MOVQ src_base+24(FP), DX 41 42search_loop_encodeBlockAsm: 43 MOVL CX, SI 44 SUBL 12(SP), SI 45 SHRL $0x06, SI 46 LEAL 4(CX)(SI*1), SI 47 CMPL SI, 8(SP) 48 JGE emit_remainder_encodeBlockAsm 49 MOVQ (DX)(CX*1), DI 50 MOVL SI, 20(SP) 51 MOVQ $0x0000cf1bbcdcbf9b, R9 52 MOVQ DI, R10 53 MOVQ DI, R11 54 SHRQ $0x08, R11 55 SHLQ $0x10, R10 56 IMULQ R9, R10 57 SHRQ $0x32, R10 58 SHLQ $0x10, R11 59 IMULQ R9, R11 60 SHRQ $0x32, R11 61 MOVL 24(SP)(R10*4), SI 62 MOVL 24(SP)(R11*4), R8 63 MOVL CX, 24(SP)(R10*4) 64 LEAL 1(CX), R10 65 MOVL R10, 24(SP)(R11*4) 66 MOVQ DI, R10 67 SHRQ $0x10, R10 68 SHLQ $0x10, R10 69 IMULQ R9, R10 70 SHRQ $0x32, R10 71 MOVL CX, R9 72 SUBL 16(SP), R9 73 MOVL 1(DX)(R9*1), R11 74 MOVQ DI, R9 75 SHRQ $0x08, R9 76 CMPL R9, R11 77 JNE no_repeat_found_encodeBlockAsm 78 LEAL 1(CX), DI 79 MOVL 12(SP), R8 80 MOVL DI, SI 81 SUBL 16(SP), SI 82 JZ repeat_extend_back_end_encodeBlockAsm 83 84repeat_extend_back_loop_encodeBlockAsm: 85 CMPL DI, R8 86 JLE repeat_extend_back_end_encodeBlockAsm 87 MOVB -1(DX)(SI*1), BL 88 MOVB -1(DX)(DI*1), R9 89 CMPB BL, R9 90 JNE repeat_extend_back_end_encodeBlockAsm 91 LEAL -1(DI), DI 92 DECL SI 93 JNZ repeat_extend_back_loop_encodeBlockAsm 94 95repeat_extend_back_end_encodeBlockAsm: 96 MOVL 12(SP), SI 97 CMPL SI, DI 98 JEQ emit_literal_done_repeat_emit_encodeBlockAsm 99 MOVL DI, R9 100 MOVL DI, 12(SP) 101 LEAQ (DX)(SI*1), R10 102 SUBL SI, R9 103 LEAL -1(R9), SI 104 CMPL SI, $0x3c 105 JLT one_byte_repeat_emit_encodeBlockAsm 106 CMPL SI, $0x00000100 107 JLT two_bytes_repeat_emit_encodeBlockAsm 108 CMPL SI, $0x00010000 109 JLT three_bytes_repeat_emit_encodeBlockAsm 110 CMPL SI, $0x01000000 111 JLT four_bytes_repeat_emit_encodeBlockAsm 112 MOVB $0xfc, (AX) 113 MOVL SI, 1(AX) 114 ADDQ $0x05, AX 115 JMP memmove_long_repeat_emit_encodeBlockAsm 116 117four_bytes_repeat_emit_encodeBlockAsm: 118 MOVL SI, R11 119 SHRL $0x10, R11 120 MOVB $0xf8, (AX) 121 MOVW SI, 1(AX) 122 MOVB R11, 3(AX) 123 ADDQ $0x04, AX 124 JMP memmove_long_repeat_emit_encodeBlockAsm 125 126three_bytes_repeat_emit_encodeBlockAsm: 127 MOVB $0xf4, (AX) 128 MOVW SI, 1(AX) 129 ADDQ $0x03, AX 130 JMP memmove_long_repeat_emit_encodeBlockAsm 131 132two_bytes_repeat_emit_encodeBlockAsm: 133 MOVB $0xf0, (AX) 134 MOVB SI, 1(AX) 135 ADDQ $0x02, AX 136 CMPL SI, $0x40 137 JL memmove_repeat_emit_encodeBlockAsm 138 JMP memmove_long_repeat_emit_encodeBlockAsm 139 140one_byte_repeat_emit_encodeBlockAsm: 141 SHLB $0x02, SI 142 MOVB SI, (AX) 143 ADDQ $0x01, AX 144 145memmove_repeat_emit_encodeBlockAsm: 146 LEAQ (AX)(R9*1), SI 147 148 // genMemMoveShort 149 CMPQ R9, $0x08 150 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 151 CMPQ R9, $0x10 152 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 153 CMPQ R9, $0x20 154 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 155 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 156 157emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: 158 MOVQ (R10), R11 159 MOVQ R11, (AX) 160 JMP memmove_end_copy_repeat_emit_encodeBlockAsm 161 162emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: 163 MOVQ (R10), R11 164 MOVQ -8(R10)(R9*1), R10 165 MOVQ R11, (AX) 166 MOVQ R10, -8(AX)(R9*1) 167 JMP memmove_end_copy_repeat_emit_encodeBlockAsm 168 169emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: 170 MOVOU (R10), X0 171 MOVOU -16(R10)(R9*1), X1 172 MOVOU X0, (AX) 173 MOVOU X1, -16(AX)(R9*1) 174 JMP memmove_end_copy_repeat_emit_encodeBlockAsm 175 176emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: 177 MOVOU (R10), X0 178 MOVOU 16(R10), X1 179 MOVOU -32(R10)(R9*1), X2 180 MOVOU -16(R10)(R9*1), X3 181 MOVOU X0, (AX) 182 MOVOU X1, 16(AX) 183 MOVOU X2, -32(AX)(R9*1) 184 MOVOU X3, -16(AX)(R9*1) 185 186memmove_end_copy_repeat_emit_encodeBlockAsm: 187 MOVQ SI, AX 188 JMP emit_literal_done_repeat_emit_encodeBlockAsm 189 190memmove_long_repeat_emit_encodeBlockAsm: 191 LEAQ (AX)(R9*1), SI 192 193 // genMemMoveLong 194 MOVOU (R10), X0 195 MOVOU 16(R10), X1 196 MOVOU -32(R10)(R9*1), X2 197 MOVOU -16(R10)(R9*1), X3 198 MOVQ R9, R12 199 SHRQ $0x05, R12 200 MOVQ AX, R11 201 ANDL $0x0000001f, R11 202 MOVQ $0x00000040, R13 203 SUBQ R11, R13 204 DECQ R12 205 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 206 LEAQ -32(R10)(R13*1), R11 207 LEAQ -32(AX)(R13*1), R14 208 209emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: 210 MOVOU (R11), X4 211 MOVOU 16(R11), X5 212 MOVOA X4, (R14) 213 MOVOA X5, 16(R14) 214 ADDQ $0x20, R14 215 ADDQ $0x20, R11 216 ADDQ $0x20, R13 217 DECQ R12 218 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back 219 220emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: 221 MOVOU -32(R10)(R13*1), X4 222 MOVOU -16(R10)(R13*1), X5 223 MOVOA X4, -32(AX)(R13*1) 224 MOVOA X5, -16(AX)(R13*1) 225 ADDQ $0x20, R13 226 CMPQ R9, R13 227 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 228 MOVOU X0, (AX) 229 MOVOU X1, 16(AX) 230 MOVOU X2, -32(AX)(R9*1) 231 MOVOU X3, -16(AX)(R9*1) 232 MOVQ SI, AX 233 234emit_literal_done_repeat_emit_encodeBlockAsm: 235 ADDL $0x05, CX 236 MOVL CX, SI 237 SUBL 16(SP), SI 238 MOVQ src_len+32(FP), R9 239 SUBL CX, R9 240 LEAQ (DX)(CX*1), R10 241 LEAQ (DX)(SI*1), SI 242 243 // matchLen 244 XORL R12, R12 245 CMPL R9, $0x08 246 JL matchlen_single_repeat_extend_encodeBlockAsm 247 248matchlen_loopback_repeat_extend_encodeBlockAsm: 249 MOVQ (R10)(R12*1), R11 250 XORQ (SI)(R12*1), R11 251 TESTQ R11, R11 252 JZ matchlen_loop_repeat_extend_encodeBlockAsm 253 BSFQ R11, R11 254 SARQ $0x03, R11 255 LEAL (R12)(R11*1), R12 256 JMP repeat_extend_forward_end_encodeBlockAsm 257 258matchlen_loop_repeat_extend_encodeBlockAsm: 259 LEAL -8(R9), R9 260 LEAL 8(R12), R12 261 CMPL R9, $0x08 262 JGE matchlen_loopback_repeat_extend_encodeBlockAsm 263 264matchlen_single_repeat_extend_encodeBlockAsm: 265 TESTL R9, R9 266 JZ repeat_extend_forward_end_encodeBlockAsm 267 268matchlen_single_loopback_repeat_extend_encodeBlockAsm: 269 MOVB (R10)(R12*1), R11 270 CMPB (SI)(R12*1), R11 271 JNE repeat_extend_forward_end_encodeBlockAsm 272 LEAL 1(R12), R12 273 DECL R9 274 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm 275 276repeat_extend_forward_end_encodeBlockAsm: 277 ADDL R12, CX 278 MOVL CX, SI 279 SUBL DI, SI 280 MOVL 16(SP), DI 281 TESTL R8, R8 282 JZ repeat_as_copy_encodeBlockAsm 283 284 // emitRepeat 285emit_repeat_again_match_repeat_encodeBlockAsm: 286 MOVL SI, R8 287 LEAL -4(SI), SI 288 CMPL R8, $0x08 289 JLE repeat_two_match_repeat_encodeBlockAsm 290 CMPL R8, $0x0c 291 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm 292 CMPL DI, $0x00000800 293 JLT repeat_two_offset_match_repeat_encodeBlockAsm 294 295cant_repeat_two_offset_match_repeat_encodeBlockAsm: 296 CMPL SI, $0x00000104 297 JLT repeat_three_match_repeat_encodeBlockAsm 298 CMPL SI, $0x00010100 299 JLT repeat_four_match_repeat_encodeBlockAsm 300 CMPL SI, $0x0100ffff 301 JLT repeat_five_match_repeat_encodeBlockAsm 302 LEAL -16842747(SI), SI 303 MOVW $0x001d, (AX) 304 MOVW $0xfffb, 2(AX) 305 MOVB $0xff, 4(AX) 306 ADDQ $0x05, AX 307 JMP emit_repeat_again_match_repeat_encodeBlockAsm 308 309repeat_five_match_repeat_encodeBlockAsm: 310 LEAL -65536(SI), SI 311 MOVL SI, DI 312 MOVW $0x001d, (AX) 313 MOVW SI, 2(AX) 314 SARL $0x10, DI 315 MOVB DI, 4(AX) 316 ADDQ $0x05, AX 317 JMP repeat_end_emit_encodeBlockAsm 318 319repeat_four_match_repeat_encodeBlockAsm: 320 LEAL -256(SI), SI 321 MOVW $0x0019, (AX) 322 MOVW SI, 2(AX) 323 ADDQ $0x04, AX 324 JMP repeat_end_emit_encodeBlockAsm 325 326repeat_three_match_repeat_encodeBlockAsm: 327 LEAL -4(SI), SI 328 MOVW $0x0015, (AX) 329 MOVB SI, 2(AX) 330 ADDQ $0x03, AX 331 JMP repeat_end_emit_encodeBlockAsm 332 333repeat_two_match_repeat_encodeBlockAsm: 334 SHLL $0x02, SI 335 ORL $0x01, SI 336 MOVW SI, (AX) 337 ADDQ $0x02, AX 338 JMP repeat_end_emit_encodeBlockAsm 339 340repeat_two_offset_match_repeat_encodeBlockAsm: 341 XORQ R8, R8 342 LEAL 1(R8)(SI*4), SI 343 MOVB DI, 1(AX) 344 SARL $0x08, DI 345 SHLL $0x05, DI 346 ORL DI, SI 347 MOVB SI, (AX) 348 ADDQ $0x02, AX 349 JMP repeat_end_emit_encodeBlockAsm 350 351repeat_as_copy_encodeBlockAsm: 352 // emitCopy 353 CMPL DI, $0x00010000 354 JL two_byte_offset_repeat_as_copy_encodeBlockAsm 355 356four_bytes_loop_back_repeat_as_copy_encodeBlockAsm: 357 CMPL SI, $0x40 358 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm 359 MOVB $0xff, (AX) 360 MOVL DI, 1(AX) 361 LEAL -64(SI), SI 362 ADDQ $0x05, AX 363 CMPL SI, $0x04 364 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm 365 366 // emitRepeat 367emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: 368 MOVL SI, R8 369 LEAL -4(SI), SI 370 CMPL R8, $0x08 371 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy 372 CMPL R8, $0x0c 373 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy 374 CMPL DI, $0x00000800 375 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy 376 377cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: 378 CMPL SI, $0x00000104 379 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy 380 CMPL SI, $0x00010100 381 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy 382 CMPL SI, $0x0100ffff 383 JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy 384 LEAL -16842747(SI), SI 385 MOVW $0x001d, (AX) 386 MOVW $0xfffb, 2(AX) 387 MOVB $0xff, 4(AX) 388 ADDQ $0x05, AX 389 JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy 390 391repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: 392 LEAL -65536(SI), SI 393 MOVL SI, DI 394 MOVW $0x001d, (AX) 395 MOVW SI, 2(AX) 396 SARL $0x10, DI 397 MOVB DI, 4(AX) 398 ADDQ $0x05, AX 399 JMP repeat_end_emit_encodeBlockAsm 400 401repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: 402 LEAL -256(SI), SI 403 MOVW $0x0019, (AX) 404 MOVW SI, 2(AX) 405 ADDQ $0x04, AX 406 JMP repeat_end_emit_encodeBlockAsm 407 408repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: 409 LEAL -4(SI), SI 410 MOVW $0x0015, (AX) 411 MOVB SI, 2(AX) 412 ADDQ $0x03, AX 413 JMP repeat_end_emit_encodeBlockAsm 414 415repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: 416 SHLL $0x02, SI 417 ORL $0x01, SI 418 MOVW SI, (AX) 419 ADDQ $0x02, AX 420 JMP repeat_end_emit_encodeBlockAsm 421 422repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: 423 XORQ R8, R8 424 LEAL 1(R8)(SI*4), SI 425 MOVB DI, 1(AX) 426 SARL $0x08, DI 427 SHLL $0x05, DI 428 ORL DI, SI 429 MOVB SI, (AX) 430 ADDQ $0x02, AX 431 JMP repeat_end_emit_encodeBlockAsm 432 JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm 433 434four_bytes_remain_repeat_as_copy_encodeBlockAsm: 435 TESTL SI, SI 436 JZ repeat_end_emit_encodeBlockAsm 437 MOVB $0x03, BL 438 LEAL -4(BX)(SI*4), SI 439 MOVB SI, (AX) 440 MOVL DI, 1(AX) 441 ADDQ $0x05, AX 442 JMP repeat_end_emit_encodeBlockAsm 443 444two_byte_offset_repeat_as_copy_encodeBlockAsm: 445 CMPL SI, $0x40 446 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm 447 MOVB $0xee, (AX) 448 MOVW DI, 1(AX) 449 LEAL -60(SI), SI 450 ADDQ $0x03, AX 451 452 // emitRepeat 453emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: 454 MOVL SI, R8 455 LEAL -4(SI), SI 456 CMPL R8, $0x08 457 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short 458 CMPL R8, $0x0c 459 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short 460 CMPL DI, $0x00000800 461 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short 462 463cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: 464 CMPL SI, $0x00000104 465 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short 466 CMPL SI, $0x00010100 467 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short 468 CMPL SI, $0x0100ffff 469 JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short 470 LEAL -16842747(SI), SI 471 MOVW $0x001d, (AX) 472 MOVW $0xfffb, 2(AX) 473 MOVB $0xff, 4(AX) 474 ADDQ $0x05, AX 475 JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short 476 477repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: 478 LEAL -65536(SI), SI 479 MOVL SI, DI 480 MOVW $0x001d, (AX) 481 MOVW SI, 2(AX) 482 SARL $0x10, DI 483 MOVB DI, 4(AX) 484 ADDQ $0x05, AX 485 JMP repeat_end_emit_encodeBlockAsm 486 487repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: 488 LEAL -256(SI), SI 489 MOVW $0x0019, (AX) 490 MOVW SI, 2(AX) 491 ADDQ $0x04, AX 492 JMP repeat_end_emit_encodeBlockAsm 493 494repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: 495 LEAL -4(SI), SI 496 MOVW $0x0015, (AX) 497 MOVB SI, 2(AX) 498 ADDQ $0x03, AX 499 JMP repeat_end_emit_encodeBlockAsm 500 501repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: 502 SHLL $0x02, SI 503 ORL $0x01, SI 504 MOVW SI, (AX) 505 ADDQ $0x02, AX 506 JMP repeat_end_emit_encodeBlockAsm 507 508repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: 509 XORQ R8, R8 510 LEAL 1(R8)(SI*4), SI 511 MOVB DI, 1(AX) 512 SARL $0x08, DI 513 SHLL $0x05, DI 514 ORL DI, SI 515 MOVB SI, (AX) 516 ADDQ $0x02, AX 517 JMP repeat_end_emit_encodeBlockAsm 518 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm 519 520two_byte_offset_short_repeat_as_copy_encodeBlockAsm: 521 CMPL SI, $0x0c 522 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm 523 CMPL DI, $0x00000800 524 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm 525 MOVB $0x01, BL 526 LEAL -16(BX)(SI*4), SI 527 MOVB DI, 1(AX) 528 SHRL $0x08, DI 529 SHLL $0x05, DI 530 ORL DI, SI 531 MOVB SI, (AX) 532 ADDQ $0x02, AX 533 JMP repeat_end_emit_encodeBlockAsm 534 535emit_copy_three_repeat_as_copy_encodeBlockAsm: 536 MOVB $0x02, BL 537 LEAL -4(BX)(SI*4), SI 538 MOVB SI, (AX) 539 MOVW DI, 1(AX) 540 ADDQ $0x03, AX 541 542repeat_end_emit_encodeBlockAsm: 543 MOVL CX, 12(SP) 544 JMP search_loop_encodeBlockAsm 545 546no_repeat_found_encodeBlockAsm: 547 CMPL (DX)(SI*1), DI 548 JEQ candidate_match_encodeBlockAsm 549 SHRQ $0x08, DI 550 MOVL 24(SP)(R10*4), SI 551 LEAL 2(CX), R9 552 CMPL (DX)(R8*1), DI 553 JEQ candidate2_match_encodeBlockAsm 554 MOVL R9, 24(SP)(R10*4) 555 SHRQ $0x08, DI 556 CMPL (DX)(SI*1), DI 557 JEQ candidate3_match_encodeBlockAsm 558 MOVL 20(SP), CX 559 JMP search_loop_encodeBlockAsm 560 561candidate3_match_encodeBlockAsm: 562 ADDL $0x02, CX 563 JMP candidate_match_encodeBlockAsm 564 565candidate2_match_encodeBlockAsm: 566 MOVL R9, 24(SP)(R10*4) 567 INCL CX 568 MOVL R8, SI 569 570candidate_match_encodeBlockAsm: 571 MOVL 12(SP), DI 572 TESTL SI, SI 573 JZ match_extend_back_end_encodeBlockAsm 574 575match_extend_back_loop_encodeBlockAsm: 576 CMPL CX, DI 577 JLE match_extend_back_end_encodeBlockAsm 578 MOVB -1(DX)(SI*1), BL 579 MOVB -1(DX)(CX*1), R8 580 CMPB BL, R8 581 JNE match_extend_back_end_encodeBlockAsm 582 LEAL -1(CX), CX 583 DECL SI 584 JZ match_extend_back_end_encodeBlockAsm 585 JMP match_extend_back_loop_encodeBlockAsm 586 587match_extend_back_end_encodeBlockAsm: 588 MOVL CX, DI 589 SUBL 12(SP), DI 590 LEAQ 5(AX)(DI*1), DI 591 CMPQ DI, (SP) 592 JL match_dst_size_check_encodeBlockAsm 593 MOVQ $0x00000000, ret+48(FP) 594 RET 595 596match_dst_size_check_encodeBlockAsm: 597 MOVL CX, DI 598 MOVL 12(SP), R8 599 CMPL R8, DI 600 JEQ emit_literal_done_match_emit_encodeBlockAsm 601 MOVL DI, R9 602 MOVL DI, 12(SP) 603 LEAQ (DX)(R8*1), DI 604 SUBL R8, R9 605 LEAL -1(R9), R8 606 CMPL R8, $0x3c 607 JLT one_byte_match_emit_encodeBlockAsm 608 CMPL R8, $0x00000100 609 JLT two_bytes_match_emit_encodeBlockAsm 610 CMPL R8, $0x00010000 611 JLT three_bytes_match_emit_encodeBlockAsm 612 CMPL R8, $0x01000000 613 JLT four_bytes_match_emit_encodeBlockAsm 614 MOVB $0xfc, (AX) 615 MOVL R8, 1(AX) 616 ADDQ $0x05, AX 617 JMP memmove_long_match_emit_encodeBlockAsm 618 619four_bytes_match_emit_encodeBlockAsm: 620 MOVL R8, R10 621 SHRL $0x10, R10 622 MOVB $0xf8, (AX) 623 MOVW R8, 1(AX) 624 MOVB R10, 3(AX) 625 ADDQ $0x04, AX 626 JMP memmove_long_match_emit_encodeBlockAsm 627 628three_bytes_match_emit_encodeBlockAsm: 629 MOVB $0xf4, (AX) 630 MOVW R8, 1(AX) 631 ADDQ $0x03, AX 632 JMP memmove_long_match_emit_encodeBlockAsm 633 634two_bytes_match_emit_encodeBlockAsm: 635 MOVB $0xf0, (AX) 636 MOVB R8, 1(AX) 637 ADDQ $0x02, AX 638 CMPL R8, $0x40 639 JL memmove_match_emit_encodeBlockAsm 640 JMP memmove_long_match_emit_encodeBlockAsm 641 642one_byte_match_emit_encodeBlockAsm: 643 SHLB $0x02, R8 644 MOVB R8, (AX) 645 ADDQ $0x01, AX 646 647memmove_match_emit_encodeBlockAsm: 648 LEAQ (AX)(R9*1), R8 649 650 // genMemMoveShort 651 CMPQ R9, $0x08 652 JLE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 653 CMPQ R9, $0x10 654 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 655 CMPQ R9, $0x20 656 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 657 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 658 659emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: 660 MOVQ (DI), R10 661 MOVQ R10, (AX) 662 JMP memmove_end_copy_match_emit_encodeBlockAsm 663 664emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: 665 MOVQ (DI), R10 666 MOVQ -8(DI)(R9*1), DI 667 MOVQ R10, (AX) 668 MOVQ DI, -8(AX)(R9*1) 669 JMP memmove_end_copy_match_emit_encodeBlockAsm 670 671emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: 672 MOVOU (DI), X0 673 MOVOU -16(DI)(R9*1), X1 674 MOVOU X0, (AX) 675 MOVOU X1, -16(AX)(R9*1) 676 JMP memmove_end_copy_match_emit_encodeBlockAsm 677 678emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: 679 MOVOU (DI), X0 680 MOVOU 16(DI), X1 681 MOVOU -32(DI)(R9*1), X2 682 MOVOU -16(DI)(R9*1), X3 683 MOVOU X0, (AX) 684 MOVOU X1, 16(AX) 685 MOVOU X2, -32(AX)(R9*1) 686 MOVOU X3, -16(AX)(R9*1) 687 688memmove_end_copy_match_emit_encodeBlockAsm: 689 MOVQ R8, AX 690 JMP emit_literal_done_match_emit_encodeBlockAsm 691 692memmove_long_match_emit_encodeBlockAsm: 693 LEAQ (AX)(R9*1), R8 694 695 // genMemMoveLong 696 MOVOU (DI), X0 697 MOVOU 16(DI), X1 698 MOVOU -32(DI)(R9*1), X2 699 MOVOU -16(DI)(R9*1), X3 700 MOVQ R9, R11 701 SHRQ $0x05, R11 702 MOVQ AX, R10 703 ANDL $0x0000001f, R10 704 MOVQ $0x00000040, R12 705 SUBQ R10, R12 706 DECQ R11 707 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 708 LEAQ -32(DI)(R12*1), R10 709 LEAQ -32(AX)(R12*1), R13 710 711emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: 712 MOVOU (R10), X4 713 MOVOU 16(R10), X5 714 MOVOA X4, (R13) 715 MOVOA X5, 16(R13) 716 ADDQ $0x20, R13 717 ADDQ $0x20, R10 718 ADDQ $0x20, R12 719 DECQ R11 720 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back 721 722emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: 723 MOVOU -32(DI)(R12*1), X4 724 MOVOU -16(DI)(R12*1), X5 725 MOVOA X4, -32(AX)(R12*1) 726 MOVOA X5, -16(AX)(R12*1) 727 ADDQ $0x20, R12 728 CMPQ R9, R12 729 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 730 MOVOU X0, (AX) 731 MOVOU X1, 16(AX) 732 MOVOU X2, -32(AX)(R9*1) 733 MOVOU X3, -16(AX)(R9*1) 734 MOVQ R8, AX 735 736emit_literal_done_match_emit_encodeBlockAsm: 737match_nolit_loop_encodeBlockAsm: 738 MOVL CX, DI 739 SUBL SI, DI 740 MOVL DI, 16(SP) 741 ADDL $0x04, CX 742 ADDL $0x04, SI 743 MOVQ src_len+32(FP), DI 744 SUBL CX, DI 745 LEAQ (DX)(CX*1), R8 746 LEAQ (DX)(SI*1), SI 747 748 // matchLen 749 XORL R10, R10 750 CMPL DI, $0x08 751 JL matchlen_single_match_nolit_encodeBlockAsm 752 753matchlen_loopback_match_nolit_encodeBlockAsm: 754 MOVQ (R8)(R10*1), R9 755 XORQ (SI)(R10*1), R9 756 TESTQ R9, R9 757 JZ matchlen_loop_match_nolit_encodeBlockAsm 758 BSFQ R9, R9 759 SARQ $0x03, R9 760 LEAL (R10)(R9*1), R10 761 JMP match_nolit_end_encodeBlockAsm 762 763matchlen_loop_match_nolit_encodeBlockAsm: 764 LEAL -8(DI), DI 765 LEAL 8(R10), R10 766 CMPL DI, $0x08 767 JGE matchlen_loopback_match_nolit_encodeBlockAsm 768 769matchlen_single_match_nolit_encodeBlockAsm: 770 TESTL DI, DI 771 JZ match_nolit_end_encodeBlockAsm 772 773matchlen_single_loopback_match_nolit_encodeBlockAsm: 774 MOVB (R8)(R10*1), R9 775 CMPB (SI)(R10*1), R9 776 JNE match_nolit_end_encodeBlockAsm 777 LEAL 1(R10), R10 778 DECL DI 779 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm 780 781match_nolit_end_encodeBlockAsm: 782 ADDL R10, CX 783 MOVL 16(SP), SI 784 ADDL $0x04, R10 785 MOVL CX, 12(SP) 786 787 // emitCopy 788 CMPL SI, $0x00010000 789 JL two_byte_offset_match_nolit_encodeBlockAsm 790 791four_bytes_loop_back_match_nolit_encodeBlockAsm: 792 CMPL R10, $0x40 793 JLE four_bytes_remain_match_nolit_encodeBlockAsm 794 MOVB $0xff, (AX) 795 MOVL SI, 1(AX) 796 LEAL -64(R10), R10 797 ADDQ $0x05, AX 798 CMPL R10, $0x04 799 JL four_bytes_remain_match_nolit_encodeBlockAsm 800 801 // emitRepeat 802emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: 803 MOVL R10, DI 804 LEAL -4(R10), R10 805 CMPL DI, $0x08 806 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy 807 CMPL DI, $0x0c 808 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy 809 CMPL SI, $0x00000800 810 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy 811 812cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: 813 CMPL R10, $0x00000104 814 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy 815 CMPL R10, $0x00010100 816 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy 817 CMPL R10, $0x0100ffff 818 JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy 819 LEAL -16842747(R10), R10 820 MOVW $0x001d, (AX) 821 MOVW $0xfffb, 2(AX) 822 MOVB $0xff, 4(AX) 823 ADDQ $0x05, AX 824 JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy 825 826repeat_five_match_nolit_encodeBlockAsm_emit_copy: 827 LEAL -65536(R10), R10 828 MOVL R10, SI 829 MOVW $0x001d, (AX) 830 MOVW R10, 2(AX) 831 SARL $0x10, SI 832 MOVB SI, 4(AX) 833 ADDQ $0x05, AX 834 JMP match_nolit_emitcopy_end_encodeBlockAsm 835 836repeat_four_match_nolit_encodeBlockAsm_emit_copy: 837 LEAL -256(R10), R10 838 MOVW $0x0019, (AX) 839 MOVW R10, 2(AX) 840 ADDQ $0x04, AX 841 JMP match_nolit_emitcopy_end_encodeBlockAsm 842 843repeat_three_match_nolit_encodeBlockAsm_emit_copy: 844 LEAL -4(R10), R10 845 MOVW $0x0015, (AX) 846 MOVB R10, 2(AX) 847 ADDQ $0x03, AX 848 JMP match_nolit_emitcopy_end_encodeBlockAsm 849 850repeat_two_match_nolit_encodeBlockAsm_emit_copy: 851 SHLL $0x02, R10 852 ORL $0x01, R10 853 MOVW R10, (AX) 854 ADDQ $0x02, AX 855 JMP match_nolit_emitcopy_end_encodeBlockAsm 856 857repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: 858 XORQ DI, DI 859 LEAL 1(DI)(R10*4), R10 860 MOVB SI, 1(AX) 861 SARL $0x08, SI 862 SHLL $0x05, SI 863 ORL SI, R10 864 MOVB R10, (AX) 865 ADDQ $0x02, AX 866 JMP match_nolit_emitcopy_end_encodeBlockAsm 867 JMP four_bytes_loop_back_match_nolit_encodeBlockAsm 868 869four_bytes_remain_match_nolit_encodeBlockAsm: 870 TESTL R10, R10 871 JZ match_nolit_emitcopy_end_encodeBlockAsm 872 MOVB $0x03, BL 873 LEAL -4(BX)(R10*4), R10 874 MOVB R10, (AX) 875 MOVL SI, 1(AX) 876 ADDQ $0x05, AX 877 JMP match_nolit_emitcopy_end_encodeBlockAsm 878 879two_byte_offset_match_nolit_encodeBlockAsm: 880 CMPL R10, $0x40 881 JLE two_byte_offset_short_match_nolit_encodeBlockAsm 882 MOVB $0xee, (AX) 883 MOVW SI, 1(AX) 884 LEAL -60(R10), R10 885 ADDQ $0x03, AX 886 887 // emitRepeat 888emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: 889 MOVL R10, DI 890 LEAL -4(R10), R10 891 CMPL DI, $0x08 892 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short 893 CMPL DI, $0x0c 894 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short 895 CMPL SI, $0x00000800 896 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short 897 898cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: 899 CMPL R10, $0x00000104 900 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short 901 CMPL R10, $0x00010100 902 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short 903 CMPL R10, $0x0100ffff 904 JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short 905 LEAL -16842747(R10), R10 906 MOVW $0x001d, (AX) 907 MOVW $0xfffb, 2(AX) 908 MOVB $0xff, 4(AX) 909 ADDQ $0x05, AX 910 JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short 911 912repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: 913 LEAL -65536(R10), R10 914 MOVL R10, SI 915 MOVW $0x001d, (AX) 916 MOVW R10, 2(AX) 917 SARL $0x10, SI 918 MOVB SI, 4(AX) 919 ADDQ $0x05, AX 920 JMP match_nolit_emitcopy_end_encodeBlockAsm 921 922repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: 923 LEAL -256(R10), R10 924 MOVW $0x0019, (AX) 925 MOVW R10, 2(AX) 926 ADDQ $0x04, AX 927 JMP match_nolit_emitcopy_end_encodeBlockAsm 928 929repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: 930 LEAL -4(R10), R10 931 MOVW $0x0015, (AX) 932 MOVB R10, 2(AX) 933 ADDQ $0x03, AX 934 JMP match_nolit_emitcopy_end_encodeBlockAsm 935 936repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: 937 SHLL $0x02, R10 938 ORL $0x01, R10 939 MOVW R10, (AX) 940 ADDQ $0x02, AX 941 JMP match_nolit_emitcopy_end_encodeBlockAsm 942 943repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: 944 XORQ DI, DI 945 LEAL 1(DI)(R10*4), R10 946 MOVB SI, 1(AX) 947 SARL $0x08, SI 948 SHLL $0x05, SI 949 ORL SI, R10 950 MOVB R10, (AX) 951 ADDQ $0x02, AX 952 JMP match_nolit_emitcopy_end_encodeBlockAsm 953 JMP two_byte_offset_match_nolit_encodeBlockAsm 954 955two_byte_offset_short_match_nolit_encodeBlockAsm: 956 CMPL R10, $0x0c 957 JGE emit_copy_three_match_nolit_encodeBlockAsm 958 CMPL SI, $0x00000800 959 JGE emit_copy_three_match_nolit_encodeBlockAsm 960 MOVB $0x01, BL 961 LEAL -16(BX)(R10*4), R10 962 MOVB SI, 1(AX) 963 SHRL $0x08, SI 964 SHLL $0x05, SI 965 ORL SI, R10 966 MOVB R10, (AX) 967 ADDQ $0x02, AX 968 JMP match_nolit_emitcopy_end_encodeBlockAsm 969 970emit_copy_three_match_nolit_encodeBlockAsm: 971 MOVB $0x02, BL 972 LEAL -4(BX)(R10*4), R10 973 MOVB R10, (AX) 974 MOVW SI, 1(AX) 975 ADDQ $0x03, AX 976 977match_nolit_emitcopy_end_encodeBlockAsm: 978 CMPL CX, 8(SP) 979 JGE emit_remainder_encodeBlockAsm 980 MOVQ -2(DX)(CX*1), DI 981 CMPQ AX, (SP) 982 JL match_nolit_dst_ok_encodeBlockAsm 983 MOVQ $0x00000000, ret+48(FP) 984 RET 985 986match_nolit_dst_ok_encodeBlockAsm: 987 MOVQ $0x0000cf1bbcdcbf9b, R9 988 MOVQ DI, R8 989 SHRQ $0x10, DI 990 MOVQ DI, SI 991 SHLQ $0x10, R8 992 IMULQ R9, R8 993 SHRQ $0x32, R8 994 SHLQ $0x10, SI 995 IMULQ R9, SI 996 SHRQ $0x32, SI 997 LEAL -2(CX), R9 998 LEAQ 24(SP)(SI*4), R10 999 MOVL (R10), SI 1000 MOVL R9, 24(SP)(R8*4) 1001 MOVL CX, (R10) 1002 CMPL (DX)(SI*1), DI 1003 JEQ match_nolit_loop_encodeBlockAsm 1004 INCL CX 1005 JMP search_loop_encodeBlockAsm 1006 1007emit_remainder_encodeBlockAsm: 1008 MOVQ src_len+32(FP), CX 1009 SUBL 12(SP), CX 1010 LEAQ 5(AX)(CX*1), CX 1011 CMPQ CX, (SP) 1012 JL emit_remainder_ok_encodeBlockAsm 1013 MOVQ $0x00000000, ret+48(FP) 1014 RET 1015 1016emit_remainder_ok_encodeBlockAsm: 1017 MOVQ src_len+32(FP), CX 1018 MOVL 12(SP), BX 1019 CMPL BX, CX 1020 JEQ emit_literal_done_emit_remainder_encodeBlockAsm 1021 MOVL CX, SI 1022 MOVL CX, 12(SP) 1023 LEAQ (DX)(BX*1), CX 1024 SUBL BX, SI 1025 LEAL -1(SI), DX 1026 CMPL DX, $0x3c 1027 JLT one_byte_emit_remainder_encodeBlockAsm 1028 CMPL DX, $0x00000100 1029 JLT two_bytes_emit_remainder_encodeBlockAsm 1030 CMPL DX, $0x00010000 1031 JLT three_bytes_emit_remainder_encodeBlockAsm 1032 CMPL DX, $0x01000000 1033 JLT four_bytes_emit_remainder_encodeBlockAsm 1034 MOVB $0xfc, (AX) 1035 MOVL DX, 1(AX) 1036 ADDQ $0x05, AX 1037 JMP memmove_long_emit_remainder_encodeBlockAsm 1038 1039four_bytes_emit_remainder_encodeBlockAsm: 1040 MOVL DX, BX 1041 SHRL $0x10, BX 1042 MOVB $0xf8, (AX) 1043 MOVW DX, 1(AX) 1044 MOVB BL, 3(AX) 1045 ADDQ $0x04, AX 1046 JMP memmove_long_emit_remainder_encodeBlockAsm 1047 1048three_bytes_emit_remainder_encodeBlockAsm: 1049 MOVB $0xf4, (AX) 1050 MOVW DX, 1(AX) 1051 ADDQ $0x03, AX 1052 JMP memmove_long_emit_remainder_encodeBlockAsm 1053 1054two_bytes_emit_remainder_encodeBlockAsm: 1055 MOVB $0xf0, (AX) 1056 MOVB DL, 1(AX) 1057 ADDQ $0x02, AX 1058 CMPL DX, $0x40 1059 JL memmove_emit_remainder_encodeBlockAsm 1060 JMP memmove_long_emit_remainder_encodeBlockAsm 1061 1062one_byte_emit_remainder_encodeBlockAsm: 1063 SHLB $0x02, DL 1064 MOVB DL, (AX) 1065 ADDQ $0x01, AX 1066 1067memmove_emit_remainder_encodeBlockAsm: 1068 LEAQ (AX)(SI*1), DX 1069 MOVL SI, BX 1070 1071 // genMemMoveShort 1072 CMPQ BX, $0x08 1073 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8 1074 CMPQ BX, $0x10 1075 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16 1076 CMPQ BX, $0x20 1077 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32 1078 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 1079 1080emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8: 1081 MOVQ (CX), SI 1082 MOVQ SI, (AX) 1083 JMP memmove_end_copy_emit_remainder_encodeBlockAsm 1084 1085emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: 1086 MOVQ (CX), SI 1087 MOVQ -8(CX)(BX*1), CX 1088 MOVQ SI, (AX) 1089 MOVQ CX, -8(AX)(BX*1) 1090 JMP memmove_end_copy_emit_remainder_encodeBlockAsm 1091 1092emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: 1093 MOVOU (CX), X0 1094 MOVOU -16(CX)(BX*1), X1 1095 MOVOU X0, (AX) 1096 MOVOU X1, -16(AX)(BX*1) 1097 JMP memmove_end_copy_emit_remainder_encodeBlockAsm 1098 1099emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: 1100 MOVOU (CX), X0 1101 MOVOU 16(CX), X1 1102 MOVOU -32(CX)(BX*1), X2 1103 MOVOU -16(CX)(BX*1), X3 1104 MOVOU X0, (AX) 1105 MOVOU X1, 16(AX) 1106 MOVOU X2, -32(AX)(BX*1) 1107 MOVOU X3, -16(AX)(BX*1) 1108 1109memmove_end_copy_emit_remainder_encodeBlockAsm: 1110 MOVQ DX, AX 1111 JMP emit_literal_done_emit_remainder_encodeBlockAsm 1112 1113memmove_long_emit_remainder_encodeBlockAsm: 1114 LEAQ (AX)(SI*1), DX 1115 MOVL SI, BX 1116 1117 // genMemMoveLong 1118 MOVOU (CX), X0 1119 MOVOU 16(CX), X1 1120 MOVOU -32(CX)(BX*1), X2 1121 MOVOU -16(CX)(BX*1), X3 1122 MOVQ BX, DI 1123 SHRQ $0x05, DI 1124 MOVQ AX, SI 1125 ANDL $0x0000001f, SI 1126 MOVQ $0x00000040, R8 1127 SUBQ SI, R8 1128 DECQ DI 1129 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 1130 LEAQ -32(CX)(R8*1), SI 1131 LEAQ -32(AX)(R8*1), R9 1132 1133emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: 1134 MOVOU (SI), X4 1135 MOVOU 16(SI), X5 1136 MOVOA X4, (R9) 1137 MOVOA X5, 16(R9) 1138 ADDQ $0x20, R9 1139 ADDQ $0x20, SI 1140 ADDQ $0x20, R8 1141 DECQ DI 1142 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back 1143 1144emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: 1145 MOVOU -32(CX)(R8*1), X4 1146 MOVOU -16(CX)(R8*1), X5 1147 MOVOA X4, -32(AX)(R8*1) 1148 MOVOA X5, -16(AX)(R8*1) 1149 ADDQ $0x20, R8 1150 CMPQ BX, R8 1151 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 1152 MOVOU X0, (AX) 1153 MOVOU X1, 16(AX) 1154 MOVOU X2, -32(AX)(BX*1) 1155 MOVOU X3, -16(AX)(BX*1) 1156 MOVQ DX, AX 1157 1158emit_literal_done_emit_remainder_encodeBlockAsm: 1159 MOVQ dst_base+0(FP), CX 1160 SUBQ CX, AX 1161 MOVQ AX, ret+48(FP) 1162 RET 1163 1164// func encodeBlockAsm4MB(dst []byte, src []byte) int 1165// Requires: SSE2 1166TEXT ·encodeBlockAsm4MB(SB), $65560-56 1167 MOVQ dst_base+0(FP), AX 1168 MOVQ $0x00000200, CX 1169 LEAQ 24(SP), DX 1170 PXOR X0, X0 1171 1172zero_loop_encodeBlockAsm4MB: 1173 MOVOU X0, (DX) 1174 MOVOU X0, 16(DX) 1175 MOVOU X0, 32(DX) 1176 MOVOU X0, 48(DX) 1177 MOVOU X0, 64(DX) 1178 MOVOU X0, 80(DX) 1179 MOVOU X0, 96(DX) 1180 MOVOU X0, 112(DX) 1181 ADDQ $0x80, DX 1182 DECQ CX 1183 JNZ zero_loop_encodeBlockAsm4MB 1184 MOVL $0x00000000, 12(SP) 1185 MOVQ src_len+32(FP), CX 1186 LEAQ -9(CX), DX 1187 LEAQ -8(CX), SI 1188 MOVL SI, 8(SP) 1189 SHRQ $0x05, CX 1190 SUBL CX, DX 1191 LEAQ (AX)(DX*1), DX 1192 MOVQ DX, (SP) 1193 MOVL $0x00000001, CX 1194 MOVL CX, 16(SP) 1195 MOVQ src_base+24(FP), DX 1196 1197search_loop_encodeBlockAsm4MB: 1198 MOVL CX, SI 1199 SUBL 12(SP), SI 1200 SHRL $0x06, SI 1201 LEAL 4(CX)(SI*1), SI 1202 CMPL SI, 8(SP) 1203 JGE emit_remainder_encodeBlockAsm4MB 1204 MOVQ (DX)(CX*1), DI 1205 MOVL SI, 20(SP) 1206 MOVQ $0x0000cf1bbcdcbf9b, R9 1207 MOVQ DI, R10 1208 MOVQ DI, R11 1209 SHRQ $0x08, R11 1210 SHLQ $0x10, R10 1211 IMULQ R9, R10 1212 SHRQ $0x32, R10 1213 SHLQ $0x10, R11 1214 IMULQ R9, R11 1215 SHRQ $0x32, R11 1216 MOVL 24(SP)(R10*4), SI 1217 MOVL 24(SP)(R11*4), R8 1218 MOVL CX, 24(SP)(R10*4) 1219 LEAL 1(CX), R10 1220 MOVL R10, 24(SP)(R11*4) 1221 MOVQ DI, R10 1222 SHRQ $0x10, R10 1223 SHLQ $0x10, R10 1224 IMULQ R9, R10 1225 SHRQ $0x32, R10 1226 MOVL CX, R9 1227 SUBL 16(SP), R9 1228 MOVL 1(DX)(R9*1), R11 1229 MOVQ DI, R9 1230 SHRQ $0x08, R9 1231 CMPL R9, R11 1232 JNE no_repeat_found_encodeBlockAsm4MB 1233 LEAL 1(CX), DI 1234 MOVL 12(SP), R8 1235 MOVL DI, SI 1236 SUBL 16(SP), SI 1237 JZ repeat_extend_back_end_encodeBlockAsm4MB 1238 1239repeat_extend_back_loop_encodeBlockAsm4MB: 1240 CMPL DI, R8 1241 JLE repeat_extend_back_end_encodeBlockAsm4MB 1242 MOVB -1(DX)(SI*1), BL 1243 MOVB -1(DX)(DI*1), R9 1244 CMPB BL, R9 1245 JNE repeat_extend_back_end_encodeBlockAsm4MB 1246 LEAL -1(DI), DI 1247 DECL SI 1248 JNZ repeat_extend_back_loop_encodeBlockAsm4MB 1249 1250repeat_extend_back_end_encodeBlockAsm4MB: 1251 MOVL 12(SP), SI 1252 CMPL SI, DI 1253 JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB 1254 MOVL DI, R9 1255 MOVL DI, 12(SP) 1256 LEAQ (DX)(SI*1), R10 1257 SUBL SI, R9 1258 LEAL -1(R9), SI 1259 CMPL SI, $0x3c 1260 JLT one_byte_repeat_emit_encodeBlockAsm4MB 1261 CMPL SI, $0x00000100 1262 JLT two_bytes_repeat_emit_encodeBlockAsm4MB 1263 CMPL SI, $0x00010000 1264 JLT three_bytes_repeat_emit_encodeBlockAsm4MB 1265 MOVL SI, R11 1266 SHRL $0x10, R11 1267 MOVB $0xf8, (AX) 1268 MOVW SI, 1(AX) 1269 MOVB R11, 3(AX) 1270 ADDQ $0x04, AX 1271 JMP memmove_long_repeat_emit_encodeBlockAsm4MB 1272 1273three_bytes_repeat_emit_encodeBlockAsm4MB: 1274 MOVB $0xf4, (AX) 1275 MOVW SI, 1(AX) 1276 ADDQ $0x03, AX 1277 JMP memmove_long_repeat_emit_encodeBlockAsm4MB 1278 1279two_bytes_repeat_emit_encodeBlockAsm4MB: 1280 MOVB $0xf0, (AX) 1281 MOVB SI, 1(AX) 1282 ADDQ $0x02, AX 1283 CMPL SI, $0x40 1284 JL memmove_repeat_emit_encodeBlockAsm4MB 1285 JMP memmove_long_repeat_emit_encodeBlockAsm4MB 1286 1287one_byte_repeat_emit_encodeBlockAsm4MB: 1288 SHLB $0x02, SI 1289 MOVB SI, (AX) 1290 ADDQ $0x01, AX 1291 1292memmove_repeat_emit_encodeBlockAsm4MB: 1293 LEAQ (AX)(R9*1), SI 1294 1295 // genMemMoveShort 1296 CMPQ R9, $0x08 1297 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 1298 CMPQ R9, $0x10 1299 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 1300 CMPQ R9, $0x20 1301 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 1302 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 1303 1304emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8: 1305 MOVQ (R10), R11 1306 MOVQ R11, (AX) 1307 JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB 1308 1309emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: 1310 MOVQ (R10), R11 1311 MOVQ -8(R10)(R9*1), R10 1312 MOVQ R11, (AX) 1313 MOVQ R10, -8(AX)(R9*1) 1314 JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB 1315 1316emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: 1317 MOVOU (R10), X0 1318 MOVOU -16(R10)(R9*1), X1 1319 MOVOU X0, (AX) 1320 MOVOU X1, -16(AX)(R9*1) 1321 JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB 1322 1323emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: 1324 MOVOU (R10), X0 1325 MOVOU 16(R10), X1 1326 MOVOU -32(R10)(R9*1), X2 1327 MOVOU -16(R10)(R9*1), X3 1328 MOVOU X0, (AX) 1329 MOVOU X1, 16(AX) 1330 MOVOU X2, -32(AX)(R9*1) 1331 MOVOU X3, -16(AX)(R9*1) 1332 1333memmove_end_copy_repeat_emit_encodeBlockAsm4MB: 1334 MOVQ SI, AX 1335 JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB 1336 1337memmove_long_repeat_emit_encodeBlockAsm4MB: 1338 LEAQ (AX)(R9*1), SI 1339 1340 // genMemMoveLong 1341 MOVOU (R10), X0 1342 MOVOU 16(R10), X1 1343 MOVOU -32(R10)(R9*1), X2 1344 MOVOU -16(R10)(R9*1), X3 1345 MOVQ R9, R12 1346 SHRQ $0x05, R12 1347 MOVQ AX, R11 1348 ANDL $0x0000001f, R11 1349 MOVQ $0x00000040, R13 1350 SUBQ R11, R13 1351 DECQ R12 1352 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 1353 LEAQ -32(R10)(R13*1), R11 1354 LEAQ -32(AX)(R13*1), R14 1355 1356emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: 1357 MOVOU (R11), X4 1358 MOVOU 16(R11), X5 1359 MOVOA X4, (R14) 1360 MOVOA X5, 16(R14) 1361 ADDQ $0x20, R14 1362 ADDQ $0x20, R11 1363 ADDQ $0x20, R13 1364 DECQ R12 1365 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back 1366 1367emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: 1368 MOVOU -32(R10)(R13*1), X4 1369 MOVOU -16(R10)(R13*1), X5 1370 MOVOA X4, -32(AX)(R13*1) 1371 MOVOA X5, -16(AX)(R13*1) 1372 ADDQ $0x20, R13 1373 CMPQ R9, R13 1374 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 1375 MOVOU X0, (AX) 1376 MOVOU X1, 16(AX) 1377 MOVOU X2, -32(AX)(R9*1) 1378 MOVOU X3, -16(AX)(R9*1) 1379 MOVQ SI, AX 1380 1381emit_literal_done_repeat_emit_encodeBlockAsm4MB: 1382 ADDL $0x05, CX 1383 MOVL CX, SI 1384 SUBL 16(SP), SI 1385 MOVQ src_len+32(FP), R9 1386 SUBL CX, R9 1387 LEAQ (DX)(CX*1), R10 1388 LEAQ (DX)(SI*1), SI 1389 1390 // matchLen 1391 XORL R12, R12 1392 CMPL R9, $0x08 1393 JL matchlen_single_repeat_extend_encodeBlockAsm4MB 1394 1395matchlen_loopback_repeat_extend_encodeBlockAsm4MB: 1396 MOVQ (R10)(R12*1), R11 1397 XORQ (SI)(R12*1), R11 1398 TESTQ R11, R11 1399 JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB 1400 BSFQ R11, R11 1401 SARQ $0x03, R11 1402 LEAL (R12)(R11*1), R12 1403 JMP repeat_extend_forward_end_encodeBlockAsm4MB 1404 1405matchlen_loop_repeat_extend_encodeBlockAsm4MB: 1406 LEAL -8(R9), R9 1407 LEAL 8(R12), R12 1408 CMPL R9, $0x08 1409 JGE matchlen_loopback_repeat_extend_encodeBlockAsm4MB 1410 1411matchlen_single_repeat_extend_encodeBlockAsm4MB: 1412 TESTL R9, R9 1413 JZ repeat_extend_forward_end_encodeBlockAsm4MB 1414 1415matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB: 1416 MOVB (R10)(R12*1), R11 1417 CMPB (SI)(R12*1), R11 1418 JNE repeat_extend_forward_end_encodeBlockAsm4MB 1419 LEAL 1(R12), R12 1420 DECL R9 1421 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB 1422 1423repeat_extend_forward_end_encodeBlockAsm4MB: 1424 ADDL R12, CX 1425 MOVL CX, SI 1426 SUBL DI, SI 1427 MOVL 16(SP), DI 1428 TESTL R8, R8 1429 JZ repeat_as_copy_encodeBlockAsm4MB 1430 1431 // emitRepeat 1432 MOVL SI, R8 1433 LEAL -4(SI), SI 1434 CMPL R8, $0x08 1435 JLE repeat_two_match_repeat_encodeBlockAsm4MB 1436 CMPL R8, $0x0c 1437 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB 1438 CMPL DI, $0x00000800 1439 JLT repeat_two_offset_match_repeat_encodeBlockAsm4MB 1440 1441cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: 1442 CMPL SI, $0x00000104 1443 JLT repeat_three_match_repeat_encodeBlockAsm4MB 1444 CMPL SI, $0x00010100 1445 JLT repeat_four_match_repeat_encodeBlockAsm4MB 1446 LEAL -65536(SI), SI 1447 MOVL SI, DI 1448 MOVW $0x001d, (AX) 1449 MOVW SI, 2(AX) 1450 SARL $0x10, DI 1451 MOVB DI, 4(AX) 1452 ADDQ $0x05, AX 1453 JMP repeat_end_emit_encodeBlockAsm4MB 1454 1455repeat_four_match_repeat_encodeBlockAsm4MB: 1456 LEAL -256(SI), SI 1457 MOVW $0x0019, (AX) 1458 MOVW SI, 2(AX) 1459 ADDQ $0x04, AX 1460 JMP repeat_end_emit_encodeBlockAsm4MB 1461 1462repeat_three_match_repeat_encodeBlockAsm4MB: 1463 LEAL -4(SI), SI 1464 MOVW $0x0015, (AX) 1465 MOVB SI, 2(AX) 1466 ADDQ $0x03, AX 1467 JMP repeat_end_emit_encodeBlockAsm4MB 1468 1469repeat_two_match_repeat_encodeBlockAsm4MB: 1470 SHLL $0x02, SI 1471 ORL $0x01, SI 1472 MOVW SI, (AX) 1473 ADDQ $0x02, AX 1474 JMP repeat_end_emit_encodeBlockAsm4MB 1475 1476repeat_two_offset_match_repeat_encodeBlockAsm4MB: 1477 XORQ R8, R8 1478 LEAL 1(R8)(SI*4), SI 1479 MOVB DI, 1(AX) 1480 SARL $0x08, DI 1481 SHLL $0x05, DI 1482 ORL DI, SI 1483 MOVB SI, (AX) 1484 ADDQ $0x02, AX 1485 JMP repeat_end_emit_encodeBlockAsm4MB 1486 1487repeat_as_copy_encodeBlockAsm4MB: 1488 // emitCopy 1489 CMPL DI, $0x00010000 1490 JL two_byte_offset_repeat_as_copy_encodeBlockAsm4MB 1491 1492four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB: 1493 CMPL SI, $0x40 1494 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB 1495 MOVB $0xff, (AX) 1496 MOVL DI, 1(AX) 1497 LEAL -64(SI), SI 1498 ADDQ $0x05, AX 1499 CMPL SI, $0x04 1500 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB 1501 1502 // emitRepeat 1503 MOVL SI, R8 1504 LEAL -4(SI), SI 1505 CMPL R8, $0x08 1506 JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1507 CMPL R8, $0x0c 1508 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1509 CMPL DI, $0x00000800 1510 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1511 1512cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1513 CMPL SI, $0x00000104 1514 JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1515 CMPL SI, $0x00010100 1516 JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1517 LEAL -65536(SI), SI 1518 MOVL SI, DI 1519 MOVW $0x001d, (AX) 1520 MOVW SI, 2(AX) 1521 SARL $0x10, DI 1522 MOVB DI, 4(AX) 1523 ADDQ $0x05, AX 1524 JMP repeat_end_emit_encodeBlockAsm4MB 1525 1526repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1527 LEAL -256(SI), SI 1528 MOVW $0x0019, (AX) 1529 MOVW SI, 2(AX) 1530 ADDQ $0x04, AX 1531 JMP repeat_end_emit_encodeBlockAsm4MB 1532 1533repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1534 LEAL -4(SI), SI 1535 MOVW $0x0015, (AX) 1536 MOVB SI, 2(AX) 1537 ADDQ $0x03, AX 1538 JMP repeat_end_emit_encodeBlockAsm4MB 1539 1540repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1541 SHLL $0x02, SI 1542 ORL $0x01, SI 1543 MOVW SI, (AX) 1544 ADDQ $0x02, AX 1545 JMP repeat_end_emit_encodeBlockAsm4MB 1546 1547repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1548 XORQ R8, R8 1549 LEAL 1(R8)(SI*4), SI 1550 MOVB DI, 1(AX) 1551 SARL $0x08, DI 1552 SHLL $0x05, DI 1553 ORL DI, SI 1554 MOVB SI, (AX) 1555 ADDQ $0x02, AX 1556 JMP repeat_end_emit_encodeBlockAsm4MB 1557 JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB 1558 1559four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: 1560 TESTL SI, SI 1561 JZ repeat_end_emit_encodeBlockAsm4MB 1562 MOVB $0x03, BL 1563 LEAL -4(BX)(SI*4), SI 1564 MOVB SI, (AX) 1565 MOVL DI, 1(AX) 1566 ADDQ $0x05, AX 1567 JMP repeat_end_emit_encodeBlockAsm4MB 1568 1569two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: 1570 CMPL SI, $0x40 1571 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB 1572 MOVB $0xee, (AX) 1573 MOVW DI, 1(AX) 1574 LEAL -60(SI), SI 1575 ADDQ $0x03, AX 1576 1577 // emitRepeat 1578 MOVL SI, R8 1579 LEAL -4(SI), SI 1580 CMPL R8, $0x08 1581 JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1582 CMPL R8, $0x0c 1583 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1584 CMPL DI, $0x00000800 1585 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1586 1587cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1588 CMPL SI, $0x00000104 1589 JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1590 CMPL SI, $0x00010100 1591 JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1592 LEAL -65536(SI), SI 1593 MOVL SI, DI 1594 MOVW $0x001d, (AX) 1595 MOVW SI, 2(AX) 1596 SARL $0x10, DI 1597 MOVB DI, 4(AX) 1598 ADDQ $0x05, AX 1599 JMP repeat_end_emit_encodeBlockAsm4MB 1600 1601repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1602 LEAL -256(SI), SI 1603 MOVW $0x0019, (AX) 1604 MOVW SI, 2(AX) 1605 ADDQ $0x04, AX 1606 JMP repeat_end_emit_encodeBlockAsm4MB 1607 1608repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1609 LEAL -4(SI), SI 1610 MOVW $0x0015, (AX) 1611 MOVB SI, 2(AX) 1612 ADDQ $0x03, AX 1613 JMP repeat_end_emit_encodeBlockAsm4MB 1614 1615repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1616 SHLL $0x02, SI 1617 ORL $0x01, SI 1618 MOVW SI, (AX) 1619 ADDQ $0x02, AX 1620 JMP repeat_end_emit_encodeBlockAsm4MB 1621 1622repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1623 XORQ R8, R8 1624 LEAL 1(R8)(SI*4), SI 1625 MOVB DI, 1(AX) 1626 SARL $0x08, DI 1627 SHLL $0x05, DI 1628 ORL DI, SI 1629 MOVB SI, (AX) 1630 ADDQ $0x02, AX 1631 JMP repeat_end_emit_encodeBlockAsm4MB 1632 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm4MB 1633 1634two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: 1635 CMPL SI, $0x0c 1636 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB 1637 CMPL DI, $0x00000800 1638 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB 1639 MOVB $0x01, BL 1640 LEAL -16(BX)(SI*4), SI 1641 MOVB DI, 1(AX) 1642 SHRL $0x08, DI 1643 SHLL $0x05, DI 1644 ORL DI, SI 1645 MOVB SI, (AX) 1646 ADDQ $0x02, AX 1647 JMP repeat_end_emit_encodeBlockAsm4MB 1648 1649emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: 1650 MOVB $0x02, BL 1651 LEAL -4(BX)(SI*4), SI 1652 MOVB SI, (AX) 1653 MOVW DI, 1(AX) 1654 ADDQ $0x03, AX 1655 1656repeat_end_emit_encodeBlockAsm4MB: 1657 MOVL CX, 12(SP) 1658 JMP search_loop_encodeBlockAsm4MB 1659 1660no_repeat_found_encodeBlockAsm4MB: 1661 CMPL (DX)(SI*1), DI 1662 JEQ candidate_match_encodeBlockAsm4MB 1663 SHRQ $0x08, DI 1664 MOVL 24(SP)(R10*4), SI 1665 LEAL 2(CX), R9 1666 CMPL (DX)(R8*1), DI 1667 JEQ candidate2_match_encodeBlockAsm4MB 1668 MOVL R9, 24(SP)(R10*4) 1669 SHRQ $0x08, DI 1670 CMPL (DX)(SI*1), DI 1671 JEQ candidate3_match_encodeBlockAsm4MB 1672 MOVL 20(SP), CX 1673 JMP search_loop_encodeBlockAsm4MB 1674 1675candidate3_match_encodeBlockAsm4MB: 1676 ADDL $0x02, CX 1677 JMP candidate_match_encodeBlockAsm4MB 1678 1679candidate2_match_encodeBlockAsm4MB: 1680 MOVL R9, 24(SP)(R10*4) 1681 INCL CX 1682 MOVL R8, SI 1683 1684candidate_match_encodeBlockAsm4MB: 1685 MOVL 12(SP), DI 1686 TESTL SI, SI 1687 JZ match_extend_back_end_encodeBlockAsm4MB 1688 1689match_extend_back_loop_encodeBlockAsm4MB: 1690 CMPL CX, DI 1691 JLE match_extend_back_end_encodeBlockAsm4MB 1692 MOVB -1(DX)(SI*1), BL 1693 MOVB -1(DX)(CX*1), R8 1694 CMPB BL, R8 1695 JNE match_extend_back_end_encodeBlockAsm4MB 1696 LEAL -1(CX), CX 1697 DECL SI 1698 JZ match_extend_back_end_encodeBlockAsm4MB 1699 JMP match_extend_back_loop_encodeBlockAsm4MB 1700 1701match_extend_back_end_encodeBlockAsm4MB: 1702 MOVL CX, DI 1703 SUBL 12(SP), DI 1704 LEAQ 4(AX)(DI*1), DI 1705 CMPQ DI, (SP) 1706 JL match_dst_size_check_encodeBlockAsm4MB 1707 MOVQ $0x00000000, ret+48(FP) 1708 RET 1709 1710match_dst_size_check_encodeBlockAsm4MB: 1711 MOVL CX, DI 1712 MOVL 12(SP), R8 1713 CMPL R8, DI 1714 JEQ emit_literal_done_match_emit_encodeBlockAsm4MB 1715 MOVL DI, R9 1716 MOVL DI, 12(SP) 1717 LEAQ (DX)(R8*1), DI 1718 SUBL R8, R9 1719 LEAL -1(R9), R8 1720 CMPL R8, $0x3c 1721 JLT one_byte_match_emit_encodeBlockAsm4MB 1722 CMPL R8, $0x00000100 1723 JLT two_bytes_match_emit_encodeBlockAsm4MB 1724 CMPL R8, $0x00010000 1725 JLT three_bytes_match_emit_encodeBlockAsm4MB 1726 MOVL R8, R10 1727 SHRL $0x10, R10 1728 MOVB $0xf8, (AX) 1729 MOVW R8, 1(AX) 1730 MOVB R10, 3(AX) 1731 ADDQ $0x04, AX 1732 JMP memmove_long_match_emit_encodeBlockAsm4MB 1733 1734three_bytes_match_emit_encodeBlockAsm4MB: 1735 MOVB $0xf4, (AX) 1736 MOVW R8, 1(AX) 1737 ADDQ $0x03, AX 1738 JMP memmove_long_match_emit_encodeBlockAsm4MB 1739 1740two_bytes_match_emit_encodeBlockAsm4MB: 1741 MOVB $0xf0, (AX) 1742 MOVB R8, 1(AX) 1743 ADDQ $0x02, AX 1744 CMPL R8, $0x40 1745 JL memmove_match_emit_encodeBlockAsm4MB 1746 JMP memmove_long_match_emit_encodeBlockAsm4MB 1747 1748one_byte_match_emit_encodeBlockAsm4MB: 1749 SHLB $0x02, R8 1750 MOVB R8, (AX) 1751 ADDQ $0x01, AX 1752 1753memmove_match_emit_encodeBlockAsm4MB: 1754 LEAQ (AX)(R9*1), R8 1755 1756 // genMemMoveShort 1757 CMPQ R9, $0x08 1758 JLE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 1759 CMPQ R9, $0x10 1760 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 1761 CMPQ R9, $0x20 1762 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 1763 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 1764 1765emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8: 1766 MOVQ (DI), R10 1767 MOVQ R10, (AX) 1768 JMP memmove_end_copy_match_emit_encodeBlockAsm4MB 1769 1770emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: 1771 MOVQ (DI), R10 1772 MOVQ -8(DI)(R9*1), DI 1773 MOVQ R10, (AX) 1774 MOVQ DI, -8(AX)(R9*1) 1775 JMP memmove_end_copy_match_emit_encodeBlockAsm4MB 1776 1777emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: 1778 MOVOU (DI), X0 1779 MOVOU -16(DI)(R9*1), X1 1780 MOVOU X0, (AX) 1781 MOVOU X1, -16(AX)(R9*1) 1782 JMP memmove_end_copy_match_emit_encodeBlockAsm4MB 1783 1784emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: 1785 MOVOU (DI), X0 1786 MOVOU 16(DI), X1 1787 MOVOU -32(DI)(R9*1), X2 1788 MOVOU -16(DI)(R9*1), X3 1789 MOVOU X0, (AX) 1790 MOVOU X1, 16(AX) 1791 MOVOU X2, -32(AX)(R9*1) 1792 MOVOU X3, -16(AX)(R9*1) 1793 1794memmove_end_copy_match_emit_encodeBlockAsm4MB: 1795 MOVQ R8, AX 1796 JMP emit_literal_done_match_emit_encodeBlockAsm4MB 1797 1798memmove_long_match_emit_encodeBlockAsm4MB: 1799 LEAQ (AX)(R9*1), R8 1800 1801 // genMemMoveLong 1802 MOVOU (DI), X0 1803 MOVOU 16(DI), X1 1804 MOVOU -32(DI)(R9*1), X2 1805 MOVOU -16(DI)(R9*1), X3 1806 MOVQ R9, R11 1807 SHRQ $0x05, R11 1808 MOVQ AX, R10 1809 ANDL $0x0000001f, R10 1810 MOVQ $0x00000040, R12 1811 SUBQ R10, R12 1812 DECQ R11 1813 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 1814 LEAQ -32(DI)(R12*1), R10 1815 LEAQ -32(AX)(R12*1), R13 1816 1817emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: 1818 MOVOU (R10), X4 1819 MOVOU 16(R10), X5 1820 MOVOA X4, (R13) 1821 MOVOA X5, 16(R13) 1822 ADDQ $0x20, R13 1823 ADDQ $0x20, R10 1824 ADDQ $0x20, R12 1825 DECQ R11 1826 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back 1827 1828emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: 1829 MOVOU -32(DI)(R12*1), X4 1830 MOVOU -16(DI)(R12*1), X5 1831 MOVOA X4, -32(AX)(R12*1) 1832 MOVOA X5, -16(AX)(R12*1) 1833 ADDQ $0x20, R12 1834 CMPQ R9, R12 1835 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 1836 MOVOU X0, (AX) 1837 MOVOU X1, 16(AX) 1838 MOVOU X2, -32(AX)(R9*1) 1839 MOVOU X3, -16(AX)(R9*1) 1840 MOVQ R8, AX 1841 1842emit_literal_done_match_emit_encodeBlockAsm4MB: 1843match_nolit_loop_encodeBlockAsm4MB: 1844 MOVL CX, DI 1845 SUBL SI, DI 1846 MOVL DI, 16(SP) 1847 ADDL $0x04, CX 1848 ADDL $0x04, SI 1849 MOVQ src_len+32(FP), DI 1850 SUBL CX, DI 1851 LEAQ (DX)(CX*1), R8 1852 LEAQ (DX)(SI*1), SI 1853 1854 // matchLen 1855 XORL R10, R10 1856 CMPL DI, $0x08 1857 JL matchlen_single_match_nolit_encodeBlockAsm4MB 1858 1859matchlen_loopback_match_nolit_encodeBlockAsm4MB: 1860 MOVQ (R8)(R10*1), R9 1861 XORQ (SI)(R10*1), R9 1862 TESTQ R9, R9 1863 JZ matchlen_loop_match_nolit_encodeBlockAsm4MB 1864 BSFQ R9, R9 1865 SARQ $0x03, R9 1866 LEAL (R10)(R9*1), R10 1867 JMP match_nolit_end_encodeBlockAsm4MB 1868 1869matchlen_loop_match_nolit_encodeBlockAsm4MB: 1870 LEAL -8(DI), DI 1871 LEAL 8(R10), R10 1872 CMPL DI, $0x08 1873 JGE matchlen_loopback_match_nolit_encodeBlockAsm4MB 1874 1875matchlen_single_match_nolit_encodeBlockAsm4MB: 1876 TESTL DI, DI 1877 JZ match_nolit_end_encodeBlockAsm4MB 1878 1879matchlen_single_loopback_match_nolit_encodeBlockAsm4MB: 1880 MOVB (R8)(R10*1), R9 1881 CMPB (SI)(R10*1), R9 1882 JNE match_nolit_end_encodeBlockAsm4MB 1883 LEAL 1(R10), R10 1884 DECL DI 1885 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm4MB 1886 1887match_nolit_end_encodeBlockAsm4MB: 1888 ADDL R10, CX 1889 MOVL 16(SP), SI 1890 ADDL $0x04, R10 1891 MOVL CX, 12(SP) 1892 1893 // emitCopy 1894 CMPL SI, $0x00010000 1895 JL two_byte_offset_match_nolit_encodeBlockAsm4MB 1896 1897four_bytes_loop_back_match_nolit_encodeBlockAsm4MB: 1898 CMPL R10, $0x40 1899 JLE four_bytes_remain_match_nolit_encodeBlockAsm4MB 1900 MOVB $0xff, (AX) 1901 MOVL SI, 1(AX) 1902 LEAL -64(R10), R10 1903 ADDQ $0x05, AX 1904 CMPL R10, $0x04 1905 JL four_bytes_remain_match_nolit_encodeBlockAsm4MB 1906 1907 // emitRepeat 1908 MOVL R10, DI 1909 LEAL -4(R10), R10 1910 CMPL DI, $0x08 1911 JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy 1912 CMPL DI, $0x0c 1913 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy 1914 CMPL SI, $0x00000800 1915 JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy 1916 1917cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: 1918 CMPL R10, $0x00000104 1919 JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy 1920 CMPL R10, $0x00010100 1921 JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy 1922 LEAL -65536(R10), R10 1923 MOVL R10, SI 1924 MOVW $0x001d, (AX) 1925 MOVW R10, 2(AX) 1926 SARL $0x10, SI 1927 MOVB SI, 4(AX) 1928 ADDQ $0x05, AX 1929 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 1930 1931repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: 1932 LEAL -256(R10), R10 1933 MOVW $0x0019, (AX) 1934 MOVW R10, 2(AX) 1935 ADDQ $0x04, AX 1936 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 1937 1938repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: 1939 LEAL -4(R10), R10 1940 MOVW $0x0015, (AX) 1941 MOVB R10, 2(AX) 1942 ADDQ $0x03, AX 1943 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 1944 1945repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: 1946 SHLL $0x02, R10 1947 ORL $0x01, R10 1948 MOVW R10, (AX) 1949 ADDQ $0x02, AX 1950 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 1951 1952repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: 1953 XORQ DI, DI 1954 LEAL 1(DI)(R10*4), R10 1955 MOVB SI, 1(AX) 1956 SARL $0x08, SI 1957 SHLL $0x05, SI 1958 ORL SI, R10 1959 MOVB R10, (AX) 1960 ADDQ $0x02, AX 1961 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 1962 JMP four_bytes_loop_back_match_nolit_encodeBlockAsm4MB 1963 1964four_bytes_remain_match_nolit_encodeBlockAsm4MB: 1965 TESTL R10, R10 1966 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB 1967 MOVB $0x03, BL 1968 LEAL -4(BX)(R10*4), R10 1969 MOVB R10, (AX) 1970 MOVL SI, 1(AX) 1971 ADDQ $0x05, AX 1972 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 1973 1974two_byte_offset_match_nolit_encodeBlockAsm4MB: 1975 CMPL R10, $0x40 1976 JLE two_byte_offset_short_match_nolit_encodeBlockAsm4MB 1977 MOVB $0xee, (AX) 1978 MOVW SI, 1(AX) 1979 LEAL -60(R10), R10 1980 ADDQ $0x03, AX 1981 1982 // emitRepeat 1983 MOVL R10, DI 1984 LEAL -4(R10), R10 1985 CMPL DI, $0x08 1986 JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short 1987 CMPL DI, $0x0c 1988 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short 1989 CMPL SI, $0x00000800 1990 JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short 1991 1992cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: 1993 CMPL R10, $0x00000104 1994 JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short 1995 CMPL R10, $0x00010100 1996 JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short 1997 LEAL -65536(R10), R10 1998 MOVL R10, SI 1999 MOVW $0x001d, (AX) 2000 MOVW R10, 2(AX) 2001 SARL $0x10, SI 2002 MOVB SI, 4(AX) 2003 ADDQ $0x05, AX 2004 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2005 2006repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: 2007 LEAL -256(R10), R10 2008 MOVW $0x0019, (AX) 2009 MOVW R10, 2(AX) 2010 ADDQ $0x04, AX 2011 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2012 2013repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: 2014 LEAL -4(R10), R10 2015 MOVW $0x0015, (AX) 2016 MOVB R10, 2(AX) 2017 ADDQ $0x03, AX 2018 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2019 2020repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: 2021 SHLL $0x02, R10 2022 ORL $0x01, R10 2023 MOVW R10, (AX) 2024 ADDQ $0x02, AX 2025 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2026 2027repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: 2028 XORQ DI, DI 2029 LEAL 1(DI)(R10*4), R10 2030 MOVB SI, 1(AX) 2031 SARL $0x08, SI 2032 SHLL $0x05, SI 2033 ORL SI, R10 2034 MOVB R10, (AX) 2035 ADDQ $0x02, AX 2036 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2037 JMP two_byte_offset_match_nolit_encodeBlockAsm4MB 2038 2039two_byte_offset_short_match_nolit_encodeBlockAsm4MB: 2040 CMPL R10, $0x0c 2041 JGE emit_copy_three_match_nolit_encodeBlockAsm4MB 2042 CMPL SI, $0x00000800 2043 JGE emit_copy_three_match_nolit_encodeBlockAsm4MB 2044 MOVB $0x01, BL 2045 LEAL -16(BX)(R10*4), R10 2046 MOVB SI, 1(AX) 2047 SHRL $0x08, SI 2048 SHLL $0x05, SI 2049 ORL SI, R10 2050 MOVB R10, (AX) 2051 ADDQ $0x02, AX 2052 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2053 2054emit_copy_three_match_nolit_encodeBlockAsm4MB: 2055 MOVB $0x02, BL 2056 LEAL -4(BX)(R10*4), R10 2057 MOVB R10, (AX) 2058 MOVW SI, 1(AX) 2059 ADDQ $0x03, AX 2060 2061match_nolit_emitcopy_end_encodeBlockAsm4MB: 2062 CMPL CX, 8(SP) 2063 JGE emit_remainder_encodeBlockAsm4MB 2064 MOVQ -2(DX)(CX*1), DI 2065 CMPQ AX, (SP) 2066 JL match_nolit_dst_ok_encodeBlockAsm4MB 2067 MOVQ $0x00000000, ret+48(FP) 2068 RET 2069 2070match_nolit_dst_ok_encodeBlockAsm4MB: 2071 MOVQ $0x0000cf1bbcdcbf9b, R9 2072 MOVQ DI, R8 2073 SHRQ $0x10, DI 2074 MOVQ DI, SI 2075 SHLQ $0x10, R8 2076 IMULQ R9, R8 2077 SHRQ $0x32, R8 2078 SHLQ $0x10, SI 2079 IMULQ R9, SI 2080 SHRQ $0x32, SI 2081 LEAL -2(CX), R9 2082 LEAQ 24(SP)(SI*4), R10 2083 MOVL (R10), SI 2084 MOVL R9, 24(SP)(R8*4) 2085 MOVL CX, (R10) 2086 CMPL (DX)(SI*1), DI 2087 JEQ match_nolit_loop_encodeBlockAsm4MB 2088 INCL CX 2089 JMP search_loop_encodeBlockAsm4MB 2090 2091emit_remainder_encodeBlockAsm4MB: 2092 MOVQ src_len+32(FP), CX 2093 SUBL 12(SP), CX 2094 LEAQ 4(AX)(CX*1), CX 2095 CMPQ CX, (SP) 2096 JL emit_remainder_ok_encodeBlockAsm4MB 2097 MOVQ $0x00000000, ret+48(FP) 2098 RET 2099 2100emit_remainder_ok_encodeBlockAsm4MB: 2101 MOVQ src_len+32(FP), CX 2102 MOVL 12(SP), BX 2103 CMPL BX, CX 2104 JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB 2105 MOVL CX, SI 2106 MOVL CX, 12(SP) 2107 LEAQ (DX)(BX*1), CX 2108 SUBL BX, SI 2109 LEAL -1(SI), DX 2110 CMPL DX, $0x3c 2111 JLT one_byte_emit_remainder_encodeBlockAsm4MB 2112 CMPL DX, $0x00000100 2113 JLT two_bytes_emit_remainder_encodeBlockAsm4MB 2114 CMPL DX, $0x00010000 2115 JLT three_bytes_emit_remainder_encodeBlockAsm4MB 2116 MOVL DX, BX 2117 SHRL $0x10, BX 2118 MOVB $0xf8, (AX) 2119 MOVW DX, 1(AX) 2120 MOVB BL, 3(AX) 2121 ADDQ $0x04, AX 2122 JMP memmove_long_emit_remainder_encodeBlockAsm4MB 2123 2124three_bytes_emit_remainder_encodeBlockAsm4MB: 2125 MOVB $0xf4, (AX) 2126 MOVW DX, 1(AX) 2127 ADDQ $0x03, AX 2128 JMP memmove_long_emit_remainder_encodeBlockAsm4MB 2129 2130two_bytes_emit_remainder_encodeBlockAsm4MB: 2131 MOVB $0xf0, (AX) 2132 MOVB DL, 1(AX) 2133 ADDQ $0x02, AX 2134 CMPL DX, $0x40 2135 JL memmove_emit_remainder_encodeBlockAsm4MB 2136 JMP memmove_long_emit_remainder_encodeBlockAsm4MB 2137 2138one_byte_emit_remainder_encodeBlockAsm4MB: 2139 SHLB $0x02, DL 2140 MOVB DL, (AX) 2141 ADDQ $0x01, AX 2142 2143memmove_emit_remainder_encodeBlockAsm4MB: 2144 LEAQ (AX)(SI*1), DX 2145 MOVL SI, BX 2146 2147 // genMemMoveShort 2148 CMPQ BX, $0x08 2149 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8 2150 CMPQ BX, $0x10 2151 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16 2152 CMPQ BX, $0x20 2153 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32 2154 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 2155 2156emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8: 2157 MOVQ (CX), SI 2158 MOVQ SI, (AX) 2159 JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB 2160 2161emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: 2162 MOVQ (CX), SI 2163 MOVQ -8(CX)(BX*1), CX 2164 MOVQ SI, (AX) 2165 MOVQ CX, -8(AX)(BX*1) 2166 JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB 2167 2168emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: 2169 MOVOU (CX), X0 2170 MOVOU -16(CX)(BX*1), X1 2171 MOVOU X0, (AX) 2172 MOVOU X1, -16(AX)(BX*1) 2173 JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB 2174 2175emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: 2176 MOVOU (CX), X0 2177 MOVOU 16(CX), X1 2178 MOVOU -32(CX)(BX*1), X2 2179 MOVOU -16(CX)(BX*1), X3 2180 MOVOU X0, (AX) 2181 MOVOU X1, 16(AX) 2182 MOVOU X2, -32(AX)(BX*1) 2183 MOVOU X3, -16(AX)(BX*1) 2184 2185memmove_end_copy_emit_remainder_encodeBlockAsm4MB: 2186 MOVQ DX, AX 2187 JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB 2188 2189memmove_long_emit_remainder_encodeBlockAsm4MB: 2190 LEAQ (AX)(SI*1), DX 2191 MOVL SI, BX 2192 2193 // genMemMoveLong 2194 MOVOU (CX), X0 2195 MOVOU 16(CX), X1 2196 MOVOU -32(CX)(BX*1), X2 2197 MOVOU -16(CX)(BX*1), X3 2198 MOVQ BX, DI 2199 SHRQ $0x05, DI 2200 MOVQ AX, SI 2201 ANDL $0x0000001f, SI 2202 MOVQ $0x00000040, R8 2203 SUBQ SI, R8 2204 DECQ DI 2205 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 2206 LEAQ -32(CX)(R8*1), SI 2207 LEAQ -32(AX)(R8*1), R9 2208 2209emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: 2210 MOVOU (SI), X4 2211 MOVOU 16(SI), X5 2212 MOVOA X4, (R9) 2213 MOVOA X5, 16(R9) 2214 ADDQ $0x20, R9 2215 ADDQ $0x20, SI 2216 ADDQ $0x20, R8 2217 DECQ DI 2218 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back 2219 2220emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: 2221 MOVOU -32(CX)(R8*1), X4 2222 MOVOU -16(CX)(R8*1), X5 2223 MOVOA X4, -32(AX)(R8*1) 2224 MOVOA X5, -16(AX)(R8*1) 2225 ADDQ $0x20, R8 2226 CMPQ BX, R8 2227 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 2228 MOVOU X0, (AX) 2229 MOVOU X1, 16(AX) 2230 MOVOU X2, -32(AX)(BX*1) 2231 MOVOU X3, -16(AX)(BX*1) 2232 MOVQ DX, AX 2233 2234emit_literal_done_emit_remainder_encodeBlockAsm4MB: 2235 MOVQ dst_base+0(FP), CX 2236 SUBQ CX, AX 2237 MOVQ AX, ret+48(FP) 2238 RET 2239 2240// func encodeBlockAsm12B(dst []byte, src []byte) int 2241// Requires: SSE2 2242TEXT ·encodeBlockAsm12B(SB), $16408-56 2243 MOVQ dst_base+0(FP), AX 2244 MOVQ $0x00000080, CX 2245 LEAQ 24(SP), DX 2246 PXOR X0, X0 2247 2248zero_loop_encodeBlockAsm12B: 2249 MOVOU X0, (DX) 2250 MOVOU X0, 16(DX) 2251 MOVOU X0, 32(DX) 2252 MOVOU X0, 48(DX) 2253 MOVOU X0, 64(DX) 2254 MOVOU X0, 80(DX) 2255 MOVOU X0, 96(DX) 2256 MOVOU X0, 112(DX) 2257 ADDQ $0x80, DX 2258 DECQ CX 2259 JNZ zero_loop_encodeBlockAsm12B 2260 MOVL $0x00000000, 12(SP) 2261 MOVQ src_len+32(FP), CX 2262 LEAQ -9(CX), DX 2263 LEAQ -8(CX), SI 2264 MOVL SI, 8(SP) 2265 SHRQ $0x05, CX 2266 SUBL CX, DX 2267 LEAQ (AX)(DX*1), DX 2268 MOVQ DX, (SP) 2269 MOVL $0x00000001, CX 2270 MOVL CX, 16(SP) 2271 MOVQ src_base+24(FP), DX 2272 2273search_loop_encodeBlockAsm12B: 2274 MOVL CX, SI 2275 SUBL 12(SP), SI 2276 SHRL $0x05, SI 2277 LEAL 4(CX)(SI*1), SI 2278 CMPL SI, 8(SP) 2279 JGE emit_remainder_encodeBlockAsm12B 2280 MOVQ (DX)(CX*1), DI 2281 MOVL SI, 20(SP) 2282 MOVQ $0x000000cf1bbcdcbb, R9 2283 MOVQ DI, R10 2284 MOVQ DI, R11 2285 SHRQ $0x08, R11 2286 SHLQ $0x18, R10 2287 IMULQ R9, R10 2288 SHRQ $0x34, R10 2289 SHLQ $0x18, R11 2290 IMULQ R9, R11 2291 SHRQ $0x34, R11 2292 MOVL 24(SP)(R10*4), SI 2293 MOVL 24(SP)(R11*4), R8 2294 MOVL CX, 24(SP)(R10*4) 2295 LEAL 1(CX), R10 2296 MOVL R10, 24(SP)(R11*4) 2297 MOVQ DI, R10 2298 SHRQ $0x10, R10 2299 SHLQ $0x18, R10 2300 IMULQ R9, R10 2301 SHRQ $0x34, R10 2302 MOVL CX, R9 2303 SUBL 16(SP), R9 2304 MOVL 1(DX)(R9*1), R11 2305 MOVQ DI, R9 2306 SHRQ $0x08, R9 2307 CMPL R9, R11 2308 JNE no_repeat_found_encodeBlockAsm12B 2309 LEAL 1(CX), DI 2310 MOVL 12(SP), R8 2311 MOVL DI, SI 2312 SUBL 16(SP), SI 2313 JZ repeat_extend_back_end_encodeBlockAsm12B 2314 2315repeat_extend_back_loop_encodeBlockAsm12B: 2316 CMPL DI, R8 2317 JLE repeat_extend_back_end_encodeBlockAsm12B 2318 MOVB -1(DX)(SI*1), BL 2319 MOVB -1(DX)(DI*1), R9 2320 CMPB BL, R9 2321 JNE repeat_extend_back_end_encodeBlockAsm12B 2322 LEAL -1(DI), DI 2323 DECL SI 2324 JNZ repeat_extend_back_loop_encodeBlockAsm12B 2325 2326repeat_extend_back_end_encodeBlockAsm12B: 2327 MOVL 12(SP), SI 2328 CMPL SI, DI 2329 JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B 2330 MOVL DI, R9 2331 MOVL DI, 12(SP) 2332 LEAQ (DX)(SI*1), R10 2333 SUBL SI, R9 2334 LEAL -1(R9), SI 2335 CMPL SI, $0x3c 2336 JLT one_byte_repeat_emit_encodeBlockAsm12B 2337 CMPL SI, $0x00000100 2338 JLT two_bytes_repeat_emit_encodeBlockAsm12B 2339 MOVB $0xf4, (AX) 2340 MOVW SI, 1(AX) 2341 ADDQ $0x03, AX 2342 JMP memmove_long_repeat_emit_encodeBlockAsm12B 2343 2344two_bytes_repeat_emit_encodeBlockAsm12B: 2345 MOVB $0xf0, (AX) 2346 MOVB SI, 1(AX) 2347 ADDQ $0x02, AX 2348 CMPL SI, $0x40 2349 JL memmove_repeat_emit_encodeBlockAsm12B 2350 JMP memmove_long_repeat_emit_encodeBlockAsm12B 2351 2352one_byte_repeat_emit_encodeBlockAsm12B: 2353 SHLB $0x02, SI 2354 MOVB SI, (AX) 2355 ADDQ $0x01, AX 2356 2357memmove_repeat_emit_encodeBlockAsm12B: 2358 LEAQ (AX)(R9*1), SI 2359 2360 // genMemMoveShort 2361 CMPQ R9, $0x08 2362 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 2363 CMPQ R9, $0x10 2364 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 2365 CMPQ R9, $0x20 2366 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 2367 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 2368 2369emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: 2370 MOVQ (R10), R11 2371 MOVQ R11, (AX) 2372 JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B 2373 2374emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: 2375 MOVQ (R10), R11 2376 MOVQ -8(R10)(R9*1), R10 2377 MOVQ R11, (AX) 2378 MOVQ R10, -8(AX)(R9*1) 2379 JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B 2380 2381emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: 2382 MOVOU (R10), X0 2383 MOVOU -16(R10)(R9*1), X1 2384 MOVOU X0, (AX) 2385 MOVOU X1, -16(AX)(R9*1) 2386 JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B 2387 2388emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: 2389 MOVOU (R10), X0 2390 MOVOU 16(R10), X1 2391 MOVOU -32(R10)(R9*1), X2 2392 MOVOU -16(R10)(R9*1), X3 2393 MOVOU X0, (AX) 2394 MOVOU X1, 16(AX) 2395 MOVOU X2, -32(AX)(R9*1) 2396 MOVOU X3, -16(AX)(R9*1) 2397 2398memmove_end_copy_repeat_emit_encodeBlockAsm12B: 2399 MOVQ SI, AX 2400 JMP emit_literal_done_repeat_emit_encodeBlockAsm12B 2401 2402memmove_long_repeat_emit_encodeBlockAsm12B: 2403 LEAQ (AX)(R9*1), SI 2404 2405 // genMemMoveLong 2406 MOVOU (R10), X0 2407 MOVOU 16(R10), X1 2408 MOVOU -32(R10)(R9*1), X2 2409 MOVOU -16(R10)(R9*1), X3 2410 MOVQ R9, R12 2411 SHRQ $0x05, R12 2412 MOVQ AX, R11 2413 ANDL $0x0000001f, R11 2414 MOVQ $0x00000040, R13 2415 SUBQ R11, R13 2416 DECQ R12 2417 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 2418 LEAQ -32(R10)(R13*1), R11 2419 LEAQ -32(AX)(R13*1), R14 2420 2421emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: 2422 MOVOU (R11), X4 2423 MOVOU 16(R11), X5 2424 MOVOA X4, (R14) 2425 MOVOA X5, 16(R14) 2426 ADDQ $0x20, R14 2427 ADDQ $0x20, R11 2428 ADDQ $0x20, R13 2429 DECQ R12 2430 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back 2431 2432emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: 2433 MOVOU -32(R10)(R13*1), X4 2434 MOVOU -16(R10)(R13*1), X5 2435 MOVOA X4, -32(AX)(R13*1) 2436 MOVOA X5, -16(AX)(R13*1) 2437 ADDQ $0x20, R13 2438 CMPQ R9, R13 2439 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 2440 MOVOU X0, (AX) 2441 MOVOU X1, 16(AX) 2442 MOVOU X2, -32(AX)(R9*1) 2443 MOVOU X3, -16(AX)(R9*1) 2444 MOVQ SI, AX 2445 2446emit_literal_done_repeat_emit_encodeBlockAsm12B: 2447 ADDL $0x05, CX 2448 MOVL CX, SI 2449 SUBL 16(SP), SI 2450 MOVQ src_len+32(FP), R9 2451 SUBL CX, R9 2452 LEAQ (DX)(CX*1), R10 2453 LEAQ (DX)(SI*1), SI 2454 2455 // matchLen 2456 XORL R12, R12 2457 CMPL R9, $0x08 2458 JL matchlen_single_repeat_extend_encodeBlockAsm12B 2459 2460matchlen_loopback_repeat_extend_encodeBlockAsm12B: 2461 MOVQ (R10)(R12*1), R11 2462 XORQ (SI)(R12*1), R11 2463 TESTQ R11, R11 2464 JZ matchlen_loop_repeat_extend_encodeBlockAsm12B 2465 BSFQ R11, R11 2466 SARQ $0x03, R11 2467 LEAL (R12)(R11*1), R12 2468 JMP repeat_extend_forward_end_encodeBlockAsm12B 2469 2470matchlen_loop_repeat_extend_encodeBlockAsm12B: 2471 LEAL -8(R9), R9 2472 LEAL 8(R12), R12 2473 CMPL R9, $0x08 2474 JGE matchlen_loopback_repeat_extend_encodeBlockAsm12B 2475 2476matchlen_single_repeat_extend_encodeBlockAsm12B: 2477 TESTL R9, R9 2478 JZ repeat_extend_forward_end_encodeBlockAsm12B 2479 2480matchlen_single_loopback_repeat_extend_encodeBlockAsm12B: 2481 MOVB (R10)(R12*1), R11 2482 CMPB (SI)(R12*1), R11 2483 JNE repeat_extend_forward_end_encodeBlockAsm12B 2484 LEAL 1(R12), R12 2485 DECL R9 2486 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm12B 2487 2488repeat_extend_forward_end_encodeBlockAsm12B: 2489 ADDL R12, CX 2490 MOVL CX, SI 2491 SUBL DI, SI 2492 MOVL 16(SP), DI 2493 TESTL R8, R8 2494 JZ repeat_as_copy_encodeBlockAsm12B 2495 2496 // emitRepeat 2497 MOVL SI, R8 2498 LEAL -4(SI), SI 2499 CMPL R8, $0x08 2500 JLE repeat_two_match_repeat_encodeBlockAsm12B 2501 CMPL R8, $0x0c 2502 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B 2503 CMPL DI, $0x00000800 2504 JLT repeat_two_offset_match_repeat_encodeBlockAsm12B 2505 2506cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: 2507 CMPL SI, $0x00000104 2508 JLT repeat_three_match_repeat_encodeBlockAsm12B 2509 LEAL -256(SI), SI 2510 MOVW $0x0019, (AX) 2511 MOVW SI, 2(AX) 2512 ADDQ $0x04, AX 2513 JMP repeat_end_emit_encodeBlockAsm12B 2514 2515repeat_three_match_repeat_encodeBlockAsm12B: 2516 LEAL -4(SI), SI 2517 MOVW $0x0015, (AX) 2518 MOVB SI, 2(AX) 2519 ADDQ $0x03, AX 2520 JMP repeat_end_emit_encodeBlockAsm12B 2521 2522repeat_two_match_repeat_encodeBlockAsm12B: 2523 SHLL $0x02, SI 2524 ORL $0x01, SI 2525 MOVW SI, (AX) 2526 ADDQ $0x02, AX 2527 JMP repeat_end_emit_encodeBlockAsm12B 2528 2529repeat_two_offset_match_repeat_encodeBlockAsm12B: 2530 XORQ R8, R8 2531 LEAL 1(R8)(SI*4), SI 2532 MOVB DI, 1(AX) 2533 SARL $0x08, DI 2534 SHLL $0x05, DI 2535 ORL DI, SI 2536 MOVB SI, (AX) 2537 ADDQ $0x02, AX 2538 JMP repeat_end_emit_encodeBlockAsm12B 2539 2540repeat_as_copy_encodeBlockAsm12B: 2541 // emitCopy 2542two_byte_offset_repeat_as_copy_encodeBlockAsm12B: 2543 CMPL SI, $0x40 2544 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B 2545 MOVB $0xee, (AX) 2546 MOVW DI, 1(AX) 2547 LEAL -60(SI), SI 2548 ADDQ $0x03, AX 2549 2550 // emitRepeat 2551 MOVL SI, R8 2552 LEAL -4(SI), SI 2553 CMPL R8, $0x08 2554 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short 2555 CMPL R8, $0x0c 2556 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short 2557 CMPL DI, $0x00000800 2558 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short 2559 2560cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: 2561 CMPL SI, $0x00000104 2562 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short 2563 LEAL -256(SI), SI 2564 MOVW $0x0019, (AX) 2565 MOVW SI, 2(AX) 2566 ADDQ $0x04, AX 2567 JMP repeat_end_emit_encodeBlockAsm12B 2568 2569repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: 2570 LEAL -4(SI), SI 2571 MOVW $0x0015, (AX) 2572 MOVB SI, 2(AX) 2573 ADDQ $0x03, AX 2574 JMP repeat_end_emit_encodeBlockAsm12B 2575 2576repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: 2577 SHLL $0x02, SI 2578 ORL $0x01, SI 2579 MOVW SI, (AX) 2580 ADDQ $0x02, AX 2581 JMP repeat_end_emit_encodeBlockAsm12B 2582 2583repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: 2584 XORQ R8, R8 2585 LEAL 1(R8)(SI*4), SI 2586 MOVB DI, 1(AX) 2587 SARL $0x08, DI 2588 SHLL $0x05, DI 2589 ORL DI, SI 2590 MOVB SI, (AX) 2591 ADDQ $0x02, AX 2592 JMP repeat_end_emit_encodeBlockAsm12B 2593 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B 2594 2595two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: 2596 CMPL SI, $0x0c 2597 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B 2598 CMPL DI, $0x00000800 2599 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B 2600 MOVB $0x01, BL 2601 LEAL -16(BX)(SI*4), SI 2602 MOVB DI, 1(AX) 2603 SHRL $0x08, DI 2604 SHLL $0x05, DI 2605 ORL DI, SI 2606 MOVB SI, (AX) 2607 ADDQ $0x02, AX 2608 JMP repeat_end_emit_encodeBlockAsm12B 2609 2610emit_copy_three_repeat_as_copy_encodeBlockAsm12B: 2611 MOVB $0x02, BL 2612 LEAL -4(BX)(SI*4), SI 2613 MOVB SI, (AX) 2614 MOVW DI, 1(AX) 2615 ADDQ $0x03, AX 2616 2617repeat_end_emit_encodeBlockAsm12B: 2618 MOVL CX, 12(SP) 2619 JMP search_loop_encodeBlockAsm12B 2620 2621no_repeat_found_encodeBlockAsm12B: 2622 CMPL (DX)(SI*1), DI 2623 JEQ candidate_match_encodeBlockAsm12B 2624 SHRQ $0x08, DI 2625 MOVL 24(SP)(R10*4), SI 2626 LEAL 2(CX), R9 2627 CMPL (DX)(R8*1), DI 2628 JEQ candidate2_match_encodeBlockAsm12B 2629 MOVL R9, 24(SP)(R10*4) 2630 SHRQ $0x08, DI 2631 CMPL (DX)(SI*1), DI 2632 JEQ candidate3_match_encodeBlockAsm12B 2633 MOVL 20(SP), CX 2634 JMP search_loop_encodeBlockAsm12B 2635 2636candidate3_match_encodeBlockAsm12B: 2637 ADDL $0x02, CX 2638 JMP candidate_match_encodeBlockAsm12B 2639 2640candidate2_match_encodeBlockAsm12B: 2641 MOVL R9, 24(SP)(R10*4) 2642 INCL CX 2643 MOVL R8, SI 2644 2645candidate_match_encodeBlockAsm12B: 2646 MOVL 12(SP), DI 2647 TESTL SI, SI 2648 JZ match_extend_back_end_encodeBlockAsm12B 2649 2650match_extend_back_loop_encodeBlockAsm12B: 2651 CMPL CX, DI 2652 JLE match_extend_back_end_encodeBlockAsm12B 2653 MOVB -1(DX)(SI*1), BL 2654 MOVB -1(DX)(CX*1), R8 2655 CMPB BL, R8 2656 JNE match_extend_back_end_encodeBlockAsm12B 2657 LEAL -1(CX), CX 2658 DECL SI 2659 JZ match_extend_back_end_encodeBlockAsm12B 2660 JMP match_extend_back_loop_encodeBlockAsm12B 2661 2662match_extend_back_end_encodeBlockAsm12B: 2663 MOVL CX, DI 2664 SUBL 12(SP), DI 2665 LEAQ 3(AX)(DI*1), DI 2666 CMPQ DI, (SP) 2667 JL match_dst_size_check_encodeBlockAsm12B 2668 MOVQ $0x00000000, ret+48(FP) 2669 RET 2670 2671match_dst_size_check_encodeBlockAsm12B: 2672 MOVL CX, DI 2673 MOVL 12(SP), R8 2674 CMPL R8, DI 2675 JEQ emit_literal_done_match_emit_encodeBlockAsm12B 2676 MOVL DI, R9 2677 MOVL DI, 12(SP) 2678 LEAQ (DX)(R8*1), DI 2679 SUBL R8, R9 2680 LEAL -1(R9), R8 2681 CMPL R8, $0x3c 2682 JLT one_byte_match_emit_encodeBlockAsm12B 2683 CMPL R8, $0x00000100 2684 JLT two_bytes_match_emit_encodeBlockAsm12B 2685 MOVB $0xf4, (AX) 2686 MOVW R8, 1(AX) 2687 ADDQ $0x03, AX 2688 JMP memmove_long_match_emit_encodeBlockAsm12B 2689 2690two_bytes_match_emit_encodeBlockAsm12B: 2691 MOVB $0xf0, (AX) 2692 MOVB R8, 1(AX) 2693 ADDQ $0x02, AX 2694 CMPL R8, $0x40 2695 JL memmove_match_emit_encodeBlockAsm12B 2696 JMP memmove_long_match_emit_encodeBlockAsm12B 2697 2698one_byte_match_emit_encodeBlockAsm12B: 2699 SHLB $0x02, R8 2700 MOVB R8, (AX) 2701 ADDQ $0x01, AX 2702 2703memmove_match_emit_encodeBlockAsm12B: 2704 LEAQ (AX)(R9*1), R8 2705 2706 // genMemMoveShort 2707 CMPQ R9, $0x08 2708 JLE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 2709 CMPQ R9, $0x10 2710 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 2711 CMPQ R9, $0x20 2712 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 2713 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 2714 2715emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: 2716 MOVQ (DI), R10 2717 MOVQ R10, (AX) 2718 JMP memmove_end_copy_match_emit_encodeBlockAsm12B 2719 2720emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: 2721 MOVQ (DI), R10 2722 MOVQ -8(DI)(R9*1), DI 2723 MOVQ R10, (AX) 2724 MOVQ DI, -8(AX)(R9*1) 2725 JMP memmove_end_copy_match_emit_encodeBlockAsm12B 2726 2727emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: 2728 MOVOU (DI), X0 2729 MOVOU -16(DI)(R9*1), X1 2730 MOVOU X0, (AX) 2731 MOVOU X1, -16(AX)(R9*1) 2732 JMP memmove_end_copy_match_emit_encodeBlockAsm12B 2733 2734emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: 2735 MOVOU (DI), X0 2736 MOVOU 16(DI), X1 2737 MOVOU -32(DI)(R9*1), X2 2738 MOVOU -16(DI)(R9*1), X3 2739 MOVOU X0, (AX) 2740 MOVOU X1, 16(AX) 2741 MOVOU X2, -32(AX)(R9*1) 2742 MOVOU X3, -16(AX)(R9*1) 2743 2744memmove_end_copy_match_emit_encodeBlockAsm12B: 2745 MOVQ R8, AX 2746 JMP emit_literal_done_match_emit_encodeBlockAsm12B 2747 2748memmove_long_match_emit_encodeBlockAsm12B: 2749 LEAQ (AX)(R9*1), R8 2750 2751 // genMemMoveLong 2752 MOVOU (DI), X0 2753 MOVOU 16(DI), X1 2754 MOVOU -32(DI)(R9*1), X2 2755 MOVOU -16(DI)(R9*1), X3 2756 MOVQ R9, R11 2757 SHRQ $0x05, R11 2758 MOVQ AX, R10 2759 ANDL $0x0000001f, R10 2760 MOVQ $0x00000040, R12 2761 SUBQ R10, R12 2762 DECQ R11 2763 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 2764 LEAQ -32(DI)(R12*1), R10 2765 LEAQ -32(AX)(R12*1), R13 2766 2767emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: 2768 MOVOU (R10), X4 2769 MOVOU 16(R10), X5 2770 MOVOA X4, (R13) 2771 MOVOA X5, 16(R13) 2772 ADDQ $0x20, R13 2773 ADDQ $0x20, R10 2774 ADDQ $0x20, R12 2775 DECQ R11 2776 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back 2777 2778emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: 2779 MOVOU -32(DI)(R12*1), X4 2780 MOVOU -16(DI)(R12*1), X5 2781 MOVOA X4, -32(AX)(R12*1) 2782 MOVOA X5, -16(AX)(R12*1) 2783 ADDQ $0x20, R12 2784 CMPQ R9, R12 2785 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 2786 MOVOU X0, (AX) 2787 MOVOU X1, 16(AX) 2788 MOVOU X2, -32(AX)(R9*1) 2789 MOVOU X3, -16(AX)(R9*1) 2790 MOVQ R8, AX 2791 2792emit_literal_done_match_emit_encodeBlockAsm12B: 2793match_nolit_loop_encodeBlockAsm12B: 2794 MOVL CX, DI 2795 SUBL SI, DI 2796 MOVL DI, 16(SP) 2797 ADDL $0x04, CX 2798 ADDL $0x04, SI 2799 MOVQ src_len+32(FP), DI 2800 SUBL CX, DI 2801 LEAQ (DX)(CX*1), R8 2802 LEAQ (DX)(SI*1), SI 2803 2804 // matchLen 2805 XORL R10, R10 2806 CMPL DI, $0x08 2807 JL matchlen_single_match_nolit_encodeBlockAsm12B 2808 2809matchlen_loopback_match_nolit_encodeBlockAsm12B: 2810 MOVQ (R8)(R10*1), R9 2811 XORQ (SI)(R10*1), R9 2812 TESTQ R9, R9 2813 JZ matchlen_loop_match_nolit_encodeBlockAsm12B 2814 BSFQ R9, R9 2815 SARQ $0x03, R9 2816 LEAL (R10)(R9*1), R10 2817 JMP match_nolit_end_encodeBlockAsm12B 2818 2819matchlen_loop_match_nolit_encodeBlockAsm12B: 2820 LEAL -8(DI), DI 2821 LEAL 8(R10), R10 2822 CMPL DI, $0x08 2823 JGE matchlen_loopback_match_nolit_encodeBlockAsm12B 2824 2825matchlen_single_match_nolit_encodeBlockAsm12B: 2826 TESTL DI, DI 2827 JZ match_nolit_end_encodeBlockAsm12B 2828 2829matchlen_single_loopback_match_nolit_encodeBlockAsm12B: 2830 MOVB (R8)(R10*1), R9 2831 CMPB (SI)(R10*1), R9 2832 JNE match_nolit_end_encodeBlockAsm12B 2833 LEAL 1(R10), R10 2834 DECL DI 2835 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B 2836 2837match_nolit_end_encodeBlockAsm12B: 2838 ADDL R10, CX 2839 MOVL 16(SP), SI 2840 ADDL $0x04, R10 2841 MOVL CX, 12(SP) 2842 2843 // emitCopy 2844two_byte_offset_match_nolit_encodeBlockAsm12B: 2845 CMPL R10, $0x40 2846 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B 2847 MOVB $0xee, (AX) 2848 MOVW SI, 1(AX) 2849 LEAL -60(R10), R10 2850 ADDQ $0x03, AX 2851 2852 // emitRepeat 2853 MOVL R10, DI 2854 LEAL -4(R10), R10 2855 CMPL DI, $0x08 2856 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short 2857 CMPL DI, $0x0c 2858 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short 2859 CMPL SI, $0x00000800 2860 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short 2861 2862cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: 2863 CMPL R10, $0x00000104 2864 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short 2865 LEAL -256(R10), R10 2866 MOVW $0x0019, (AX) 2867 MOVW R10, 2(AX) 2868 ADDQ $0x04, AX 2869 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 2870 2871repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: 2872 LEAL -4(R10), R10 2873 MOVW $0x0015, (AX) 2874 MOVB R10, 2(AX) 2875 ADDQ $0x03, AX 2876 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 2877 2878repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: 2879 SHLL $0x02, R10 2880 ORL $0x01, R10 2881 MOVW R10, (AX) 2882 ADDQ $0x02, AX 2883 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 2884 2885repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: 2886 XORQ DI, DI 2887 LEAL 1(DI)(R10*4), R10 2888 MOVB SI, 1(AX) 2889 SARL $0x08, SI 2890 SHLL $0x05, SI 2891 ORL SI, R10 2892 MOVB R10, (AX) 2893 ADDQ $0x02, AX 2894 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 2895 JMP two_byte_offset_match_nolit_encodeBlockAsm12B 2896 2897two_byte_offset_short_match_nolit_encodeBlockAsm12B: 2898 CMPL R10, $0x0c 2899 JGE emit_copy_three_match_nolit_encodeBlockAsm12B 2900 CMPL SI, $0x00000800 2901 JGE emit_copy_three_match_nolit_encodeBlockAsm12B 2902 MOVB $0x01, BL 2903 LEAL -16(BX)(R10*4), R10 2904 MOVB SI, 1(AX) 2905 SHRL $0x08, SI 2906 SHLL $0x05, SI 2907 ORL SI, R10 2908 MOVB R10, (AX) 2909 ADDQ $0x02, AX 2910 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 2911 2912emit_copy_three_match_nolit_encodeBlockAsm12B: 2913 MOVB $0x02, BL 2914 LEAL -4(BX)(R10*4), R10 2915 MOVB R10, (AX) 2916 MOVW SI, 1(AX) 2917 ADDQ $0x03, AX 2918 2919match_nolit_emitcopy_end_encodeBlockAsm12B: 2920 CMPL CX, 8(SP) 2921 JGE emit_remainder_encodeBlockAsm12B 2922 MOVQ -2(DX)(CX*1), DI 2923 CMPQ AX, (SP) 2924 JL match_nolit_dst_ok_encodeBlockAsm12B 2925 MOVQ $0x00000000, ret+48(FP) 2926 RET 2927 2928match_nolit_dst_ok_encodeBlockAsm12B: 2929 MOVQ $0x000000cf1bbcdcbb, R9 2930 MOVQ DI, R8 2931 SHRQ $0x10, DI 2932 MOVQ DI, SI 2933 SHLQ $0x18, R8 2934 IMULQ R9, R8 2935 SHRQ $0x34, R8 2936 SHLQ $0x18, SI 2937 IMULQ R9, SI 2938 SHRQ $0x34, SI 2939 LEAL -2(CX), R9 2940 LEAQ 24(SP)(SI*4), R10 2941 MOVL (R10), SI 2942 MOVL R9, 24(SP)(R8*4) 2943 MOVL CX, (R10) 2944 CMPL (DX)(SI*1), DI 2945 JEQ match_nolit_loop_encodeBlockAsm12B 2946 INCL CX 2947 JMP search_loop_encodeBlockAsm12B 2948 2949emit_remainder_encodeBlockAsm12B: 2950 MOVQ src_len+32(FP), CX 2951 SUBL 12(SP), CX 2952 LEAQ 3(AX)(CX*1), CX 2953 CMPQ CX, (SP) 2954 JL emit_remainder_ok_encodeBlockAsm12B 2955 MOVQ $0x00000000, ret+48(FP) 2956 RET 2957 2958emit_remainder_ok_encodeBlockAsm12B: 2959 MOVQ src_len+32(FP), CX 2960 MOVL 12(SP), BX 2961 CMPL BX, CX 2962 JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B 2963 MOVL CX, SI 2964 MOVL CX, 12(SP) 2965 LEAQ (DX)(BX*1), CX 2966 SUBL BX, SI 2967 LEAL -1(SI), DX 2968 CMPL DX, $0x3c 2969 JLT one_byte_emit_remainder_encodeBlockAsm12B 2970 CMPL DX, $0x00000100 2971 JLT two_bytes_emit_remainder_encodeBlockAsm12B 2972 MOVB $0xf4, (AX) 2973 MOVW DX, 1(AX) 2974 ADDQ $0x03, AX 2975 JMP memmove_long_emit_remainder_encodeBlockAsm12B 2976 2977two_bytes_emit_remainder_encodeBlockAsm12B: 2978 MOVB $0xf0, (AX) 2979 MOVB DL, 1(AX) 2980 ADDQ $0x02, AX 2981 CMPL DX, $0x40 2982 JL memmove_emit_remainder_encodeBlockAsm12B 2983 JMP memmove_long_emit_remainder_encodeBlockAsm12B 2984 2985one_byte_emit_remainder_encodeBlockAsm12B: 2986 SHLB $0x02, DL 2987 MOVB DL, (AX) 2988 ADDQ $0x01, AX 2989 2990memmove_emit_remainder_encodeBlockAsm12B: 2991 LEAQ (AX)(SI*1), DX 2992 MOVL SI, BX 2993 2994 // genMemMoveShort 2995 CMPQ BX, $0x08 2996 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8 2997 CMPQ BX, $0x10 2998 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16 2999 CMPQ BX, $0x20 3000 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32 3001 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 3002 3003emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8: 3004 MOVQ (CX), SI 3005 MOVQ SI, (AX) 3006 JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B 3007 3008emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: 3009 MOVQ (CX), SI 3010 MOVQ -8(CX)(BX*1), CX 3011 MOVQ SI, (AX) 3012 MOVQ CX, -8(AX)(BX*1) 3013 JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B 3014 3015emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: 3016 MOVOU (CX), X0 3017 MOVOU -16(CX)(BX*1), X1 3018 MOVOU X0, (AX) 3019 MOVOU X1, -16(AX)(BX*1) 3020 JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B 3021 3022emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: 3023 MOVOU (CX), X0 3024 MOVOU 16(CX), X1 3025 MOVOU -32(CX)(BX*1), X2 3026 MOVOU -16(CX)(BX*1), X3 3027 MOVOU X0, (AX) 3028 MOVOU X1, 16(AX) 3029 MOVOU X2, -32(AX)(BX*1) 3030 MOVOU X3, -16(AX)(BX*1) 3031 3032memmove_end_copy_emit_remainder_encodeBlockAsm12B: 3033 MOVQ DX, AX 3034 JMP emit_literal_done_emit_remainder_encodeBlockAsm12B 3035 3036memmove_long_emit_remainder_encodeBlockAsm12B: 3037 LEAQ (AX)(SI*1), DX 3038 MOVL SI, BX 3039 3040 // genMemMoveLong 3041 MOVOU (CX), X0 3042 MOVOU 16(CX), X1 3043 MOVOU -32(CX)(BX*1), X2 3044 MOVOU -16(CX)(BX*1), X3 3045 MOVQ BX, DI 3046 SHRQ $0x05, DI 3047 MOVQ AX, SI 3048 ANDL $0x0000001f, SI 3049 MOVQ $0x00000040, R8 3050 SUBQ SI, R8 3051 DECQ DI 3052 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 3053 LEAQ -32(CX)(R8*1), SI 3054 LEAQ -32(AX)(R8*1), R9 3055 3056emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: 3057 MOVOU (SI), X4 3058 MOVOU 16(SI), X5 3059 MOVOA X4, (R9) 3060 MOVOA X5, 16(R9) 3061 ADDQ $0x20, R9 3062 ADDQ $0x20, SI 3063 ADDQ $0x20, R8 3064 DECQ DI 3065 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back 3066 3067emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: 3068 MOVOU -32(CX)(R8*1), X4 3069 MOVOU -16(CX)(R8*1), X5 3070 MOVOA X4, -32(AX)(R8*1) 3071 MOVOA X5, -16(AX)(R8*1) 3072 ADDQ $0x20, R8 3073 CMPQ BX, R8 3074 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 3075 MOVOU X0, (AX) 3076 MOVOU X1, 16(AX) 3077 MOVOU X2, -32(AX)(BX*1) 3078 MOVOU X3, -16(AX)(BX*1) 3079 MOVQ DX, AX 3080 3081emit_literal_done_emit_remainder_encodeBlockAsm12B: 3082 MOVQ dst_base+0(FP), CX 3083 SUBQ CX, AX 3084 MOVQ AX, ret+48(FP) 3085 RET 3086 3087// func encodeBlockAsm10B(dst []byte, src []byte) int 3088// Requires: SSE2 3089TEXT ·encodeBlockAsm10B(SB), $4120-56 3090 MOVQ dst_base+0(FP), AX 3091 MOVQ $0x00000020, CX 3092 LEAQ 24(SP), DX 3093 PXOR X0, X0 3094 3095zero_loop_encodeBlockAsm10B: 3096 MOVOU X0, (DX) 3097 MOVOU X0, 16(DX) 3098 MOVOU X0, 32(DX) 3099 MOVOU X0, 48(DX) 3100 MOVOU X0, 64(DX) 3101 MOVOU X0, 80(DX) 3102 MOVOU X0, 96(DX) 3103 MOVOU X0, 112(DX) 3104 ADDQ $0x80, DX 3105 DECQ CX 3106 JNZ zero_loop_encodeBlockAsm10B 3107 MOVL $0x00000000, 12(SP) 3108 MOVQ src_len+32(FP), CX 3109 LEAQ -9(CX), DX 3110 LEAQ -8(CX), SI 3111 MOVL SI, 8(SP) 3112 SHRQ $0x05, CX 3113 SUBL CX, DX 3114 LEAQ (AX)(DX*1), DX 3115 MOVQ DX, (SP) 3116 MOVL $0x00000001, CX 3117 MOVL CX, 16(SP) 3118 MOVQ src_base+24(FP), DX 3119 3120search_loop_encodeBlockAsm10B: 3121 MOVL CX, SI 3122 SUBL 12(SP), SI 3123 SHRL $0x05, SI 3124 LEAL 4(CX)(SI*1), SI 3125 CMPL SI, 8(SP) 3126 JGE emit_remainder_encodeBlockAsm10B 3127 MOVQ (DX)(CX*1), DI 3128 MOVL SI, 20(SP) 3129 MOVQ $0x9e3779b1, R9 3130 MOVQ DI, R10 3131 MOVQ DI, R11 3132 SHRQ $0x08, R11 3133 SHLQ $0x20, R10 3134 IMULQ R9, R10 3135 SHRQ $0x36, R10 3136 SHLQ $0x20, R11 3137 IMULQ R9, R11 3138 SHRQ $0x36, R11 3139 MOVL 24(SP)(R10*4), SI 3140 MOVL 24(SP)(R11*4), R8 3141 MOVL CX, 24(SP)(R10*4) 3142 LEAL 1(CX), R10 3143 MOVL R10, 24(SP)(R11*4) 3144 MOVQ DI, R10 3145 SHRQ $0x10, R10 3146 SHLQ $0x20, R10 3147 IMULQ R9, R10 3148 SHRQ $0x36, R10 3149 MOVL CX, R9 3150 SUBL 16(SP), R9 3151 MOVL 1(DX)(R9*1), R11 3152 MOVQ DI, R9 3153 SHRQ $0x08, R9 3154 CMPL R9, R11 3155 JNE no_repeat_found_encodeBlockAsm10B 3156 LEAL 1(CX), DI 3157 MOVL 12(SP), R8 3158 MOVL DI, SI 3159 SUBL 16(SP), SI 3160 JZ repeat_extend_back_end_encodeBlockAsm10B 3161 3162repeat_extend_back_loop_encodeBlockAsm10B: 3163 CMPL DI, R8 3164 JLE repeat_extend_back_end_encodeBlockAsm10B 3165 MOVB -1(DX)(SI*1), BL 3166 MOVB -1(DX)(DI*1), R9 3167 CMPB BL, R9 3168 JNE repeat_extend_back_end_encodeBlockAsm10B 3169 LEAL -1(DI), DI 3170 DECL SI 3171 JNZ repeat_extend_back_loop_encodeBlockAsm10B 3172 3173repeat_extend_back_end_encodeBlockAsm10B: 3174 MOVL 12(SP), SI 3175 CMPL SI, DI 3176 JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B 3177 MOVL DI, R9 3178 MOVL DI, 12(SP) 3179 LEAQ (DX)(SI*1), R10 3180 SUBL SI, R9 3181 LEAL -1(R9), SI 3182 CMPL SI, $0x3c 3183 JLT one_byte_repeat_emit_encodeBlockAsm10B 3184 CMPL SI, $0x00000100 3185 JLT two_bytes_repeat_emit_encodeBlockAsm10B 3186 MOVB $0xf4, (AX) 3187 MOVW SI, 1(AX) 3188 ADDQ $0x03, AX 3189 JMP memmove_long_repeat_emit_encodeBlockAsm10B 3190 3191two_bytes_repeat_emit_encodeBlockAsm10B: 3192 MOVB $0xf0, (AX) 3193 MOVB SI, 1(AX) 3194 ADDQ $0x02, AX 3195 CMPL SI, $0x40 3196 JL memmove_repeat_emit_encodeBlockAsm10B 3197 JMP memmove_long_repeat_emit_encodeBlockAsm10B 3198 3199one_byte_repeat_emit_encodeBlockAsm10B: 3200 SHLB $0x02, SI 3201 MOVB SI, (AX) 3202 ADDQ $0x01, AX 3203 3204memmove_repeat_emit_encodeBlockAsm10B: 3205 LEAQ (AX)(R9*1), SI 3206 3207 // genMemMoveShort 3208 CMPQ R9, $0x08 3209 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 3210 CMPQ R9, $0x10 3211 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 3212 CMPQ R9, $0x20 3213 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 3214 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 3215 3216emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: 3217 MOVQ (R10), R11 3218 MOVQ R11, (AX) 3219 JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B 3220 3221emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: 3222 MOVQ (R10), R11 3223 MOVQ -8(R10)(R9*1), R10 3224 MOVQ R11, (AX) 3225 MOVQ R10, -8(AX)(R9*1) 3226 JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B 3227 3228emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: 3229 MOVOU (R10), X0 3230 MOVOU -16(R10)(R9*1), X1 3231 MOVOU X0, (AX) 3232 MOVOU X1, -16(AX)(R9*1) 3233 JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B 3234 3235emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: 3236 MOVOU (R10), X0 3237 MOVOU 16(R10), X1 3238 MOVOU -32(R10)(R9*1), X2 3239 MOVOU -16(R10)(R9*1), X3 3240 MOVOU X0, (AX) 3241 MOVOU X1, 16(AX) 3242 MOVOU X2, -32(AX)(R9*1) 3243 MOVOU X3, -16(AX)(R9*1) 3244 3245memmove_end_copy_repeat_emit_encodeBlockAsm10B: 3246 MOVQ SI, AX 3247 JMP emit_literal_done_repeat_emit_encodeBlockAsm10B 3248 3249memmove_long_repeat_emit_encodeBlockAsm10B: 3250 LEAQ (AX)(R9*1), SI 3251 3252 // genMemMoveLong 3253 MOVOU (R10), X0 3254 MOVOU 16(R10), X1 3255 MOVOU -32(R10)(R9*1), X2 3256 MOVOU -16(R10)(R9*1), X3 3257 MOVQ R9, R12 3258 SHRQ $0x05, R12 3259 MOVQ AX, R11 3260 ANDL $0x0000001f, R11 3261 MOVQ $0x00000040, R13 3262 SUBQ R11, R13 3263 DECQ R12 3264 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 3265 LEAQ -32(R10)(R13*1), R11 3266 LEAQ -32(AX)(R13*1), R14 3267 3268emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: 3269 MOVOU (R11), X4 3270 MOVOU 16(R11), X5 3271 MOVOA X4, (R14) 3272 MOVOA X5, 16(R14) 3273 ADDQ $0x20, R14 3274 ADDQ $0x20, R11 3275 ADDQ $0x20, R13 3276 DECQ R12 3277 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back 3278 3279emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: 3280 MOVOU -32(R10)(R13*1), X4 3281 MOVOU -16(R10)(R13*1), X5 3282 MOVOA X4, -32(AX)(R13*1) 3283 MOVOA X5, -16(AX)(R13*1) 3284 ADDQ $0x20, R13 3285 CMPQ R9, R13 3286 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 3287 MOVOU X0, (AX) 3288 MOVOU X1, 16(AX) 3289 MOVOU X2, -32(AX)(R9*1) 3290 MOVOU X3, -16(AX)(R9*1) 3291 MOVQ SI, AX 3292 3293emit_literal_done_repeat_emit_encodeBlockAsm10B: 3294 ADDL $0x05, CX 3295 MOVL CX, SI 3296 SUBL 16(SP), SI 3297 MOVQ src_len+32(FP), R9 3298 SUBL CX, R9 3299 LEAQ (DX)(CX*1), R10 3300 LEAQ (DX)(SI*1), SI 3301 3302 // matchLen 3303 XORL R12, R12 3304 CMPL R9, $0x08 3305 JL matchlen_single_repeat_extend_encodeBlockAsm10B 3306 3307matchlen_loopback_repeat_extend_encodeBlockAsm10B: 3308 MOVQ (R10)(R12*1), R11 3309 XORQ (SI)(R12*1), R11 3310 TESTQ R11, R11 3311 JZ matchlen_loop_repeat_extend_encodeBlockAsm10B 3312 BSFQ R11, R11 3313 SARQ $0x03, R11 3314 LEAL (R12)(R11*1), R12 3315 JMP repeat_extend_forward_end_encodeBlockAsm10B 3316 3317matchlen_loop_repeat_extend_encodeBlockAsm10B: 3318 LEAL -8(R9), R9 3319 LEAL 8(R12), R12 3320 CMPL R9, $0x08 3321 JGE matchlen_loopback_repeat_extend_encodeBlockAsm10B 3322 3323matchlen_single_repeat_extend_encodeBlockAsm10B: 3324 TESTL R9, R9 3325 JZ repeat_extend_forward_end_encodeBlockAsm10B 3326 3327matchlen_single_loopback_repeat_extend_encodeBlockAsm10B: 3328 MOVB (R10)(R12*1), R11 3329 CMPB (SI)(R12*1), R11 3330 JNE repeat_extend_forward_end_encodeBlockAsm10B 3331 LEAL 1(R12), R12 3332 DECL R9 3333 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm10B 3334 3335repeat_extend_forward_end_encodeBlockAsm10B: 3336 ADDL R12, CX 3337 MOVL CX, SI 3338 SUBL DI, SI 3339 MOVL 16(SP), DI 3340 TESTL R8, R8 3341 JZ repeat_as_copy_encodeBlockAsm10B 3342 3343 // emitRepeat 3344 MOVL SI, R8 3345 LEAL -4(SI), SI 3346 CMPL R8, $0x08 3347 JLE repeat_two_match_repeat_encodeBlockAsm10B 3348 CMPL R8, $0x0c 3349 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B 3350 CMPL DI, $0x00000800 3351 JLT repeat_two_offset_match_repeat_encodeBlockAsm10B 3352 3353cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: 3354 CMPL SI, $0x00000104 3355 JLT repeat_three_match_repeat_encodeBlockAsm10B 3356 LEAL -256(SI), SI 3357 MOVW $0x0019, (AX) 3358 MOVW SI, 2(AX) 3359 ADDQ $0x04, AX 3360 JMP repeat_end_emit_encodeBlockAsm10B 3361 3362repeat_three_match_repeat_encodeBlockAsm10B: 3363 LEAL -4(SI), SI 3364 MOVW $0x0015, (AX) 3365 MOVB SI, 2(AX) 3366 ADDQ $0x03, AX 3367 JMP repeat_end_emit_encodeBlockAsm10B 3368 3369repeat_two_match_repeat_encodeBlockAsm10B: 3370 SHLL $0x02, SI 3371 ORL $0x01, SI 3372 MOVW SI, (AX) 3373 ADDQ $0x02, AX 3374 JMP repeat_end_emit_encodeBlockAsm10B 3375 3376repeat_two_offset_match_repeat_encodeBlockAsm10B: 3377 XORQ R8, R8 3378 LEAL 1(R8)(SI*4), SI 3379 MOVB DI, 1(AX) 3380 SARL $0x08, DI 3381 SHLL $0x05, DI 3382 ORL DI, SI 3383 MOVB SI, (AX) 3384 ADDQ $0x02, AX 3385 JMP repeat_end_emit_encodeBlockAsm10B 3386 3387repeat_as_copy_encodeBlockAsm10B: 3388 // emitCopy 3389two_byte_offset_repeat_as_copy_encodeBlockAsm10B: 3390 CMPL SI, $0x40 3391 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B 3392 MOVB $0xee, (AX) 3393 MOVW DI, 1(AX) 3394 LEAL -60(SI), SI 3395 ADDQ $0x03, AX 3396 3397 // emitRepeat 3398 MOVL SI, R8 3399 LEAL -4(SI), SI 3400 CMPL R8, $0x08 3401 JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short 3402 CMPL R8, $0x0c 3403 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short 3404 CMPL DI, $0x00000800 3405 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short 3406 3407cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: 3408 CMPL SI, $0x00000104 3409 JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short 3410 LEAL -256(SI), SI 3411 MOVW $0x0019, (AX) 3412 MOVW SI, 2(AX) 3413 ADDQ $0x04, AX 3414 JMP repeat_end_emit_encodeBlockAsm10B 3415 3416repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: 3417 LEAL -4(SI), SI 3418 MOVW $0x0015, (AX) 3419 MOVB SI, 2(AX) 3420 ADDQ $0x03, AX 3421 JMP repeat_end_emit_encodeBlockAsm10B 3422 3423repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: 3424 SHLL $0x02, SI 3425 ORL $0x01, SI 3426 MOVW SI, (AX) 3427 ADDQ $0x02, AX 3428 JMP repeat_end_emit_encodeBlockAsm10B 3429 3430repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: 3431 XORQ R8, R8 3432 LEAL 1(R8)(SI*4), SI 3433 MOVB DI, 1(AX) 3434 SARL $0x08, DI 3435 SHLL $0x05, DI 3436 ORL DI, SI 3437 MOVB SI, (AX) 3438 ADDQ $0x02, AX 3439 JMP repeat_end_emit_encodeBlockAsm10B 3440 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B 3441 3442two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: 3443 CMPL SI, $0x0c 3444 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B 3445 CMPL DI, $0x00000800 3446 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B 3447 MOVB $0x01, BL 3448 LEAL -16(BX)(SI*4), SI 3449 MOVB DI, 1(AX) 3450 SHRL $0x08, DI 3451 SHLL $0x05, DI 3452 ORL DI, SI 3453 MOVB SI, (AX) 3454 ADDQ $0x02, AX 3455 JMP repeat_end_emit_encodeBlockAsm10B 3456 3457emit_copy_three_repeat_as_copy_encodeBlockAsm10B: 3458 MOVB $0x02, BL 3459 LEAL -4(BX)(SI*4), SI 3460 MOVB SI, (AX) 3461 MOVW DI, 1(AX) 3462 ADDQ $0x03, AX 3463 3464repeat_end_emit_encodeBlockAsm10B: 3465 MOVL CX, 12(SP) 3466 JMP search_loop_encodeBlockAsm10B 3467 3468no_repeat_found_encodeBlockAsm10B: 3469 CMPL (DX)(SI*1), DI 3470 JEQ candidate_match_encodeBlockAsm10B 3471 SHRQ $0x08, DI 3472 MOVL 24(SP)(R10*4), SI 3473 LEAL 2(CX), R9 3474 CMPL (DX)(R8*1), DI 3475 JEQ candidate2_match_encodeBlockAsm10B 3476 MOVL R9, 24(SP)(R10*4) 3477 SHRQ $0x08, DI 3478 CMPL (DX)(SI*1), DI 3479 JEQ candidate3_match_encodeBlockAsm10B 3480 MOVL 20(SP), CX 3481 JMP search_loop_encodeBlockAsm10B 3482 3483candidate3_match_encodeBlockAsm10B: 3484 ADDL $0x02, CX 3485 JMP candidate_match_encodeBlockAsm10B 3486 3487candidate2_match_encodeBlockAsm10B: 3488 MOVL R9, 24(SP)(R10*4) 3489 INCL CX 3490 MOVL R8, SI 3491 3492candidate_match_encodeBlockAsm10B: 3493 MOVL 12(SP), DI 3494 TESTL SI, SI 3495 JZ match_extend_back_end_encodeBlockAsm10B 3496 3497match_extend_back_loop_encodeBlockAsm10B: 3498 CMPL CX, DI 3499 JLE match_extend_back_end_encodeBlockAsm10B 3500 MOVB -1(DX)(SI*1), BL 3501 MOVB -1(DX)(CX*1), R8 3502 CMPB BL, R8 3503 JNE match_extend_back_end_encodeBlockAsm10B 3504 LEAL -1(CX), CX 3505 DECL SI 3506 JZ match_extend_back_end_encodeBlockAsm10B 3507 JMP match_extend_back_loop_encodeBlockAsm10B 3508 3509match_extend_back_end_encodeBlockAsm10B: 3510 MOVL CX, DI 3511 SUBL 12(SP), DI 3512 LEAQ 3(AX)(DI*1), DI 3513 CMPQ DI, (SP) 3514 JL match_dst_size_check_encodeBlockAsm10B 3515 MOVQ $0x00000000, ret+48(FP) 3516 RET 3517 3518match_dst_size_check_encodeBlockAsm10B: 3519 MOVL CX, DI 3520 MOVL 12(SP), R8 3521 CMPL R8, DI 3522 JEQ emit_literal_done_match_emit_encodeBlockAsm10B 3523 MOVL DI, R9 3524 MOVL DI, 12(SP) 3525 LEAQ (DX)(R8*1), DI 3526 SUBL R8, R9 3527 LEAL -1(R9), R8 3528 CMPL R8, $0x3c 3529 JLT one_byte_match_emit_encodeBlockAsm10B 3530 CMPL R8, $0x00000100 3531 JLT two_bytes_match_emit_encodeBlockAsm10B 3532 MOVB $0xf4, (AX) 3533 MOVW R8, 1(AX) 3534 ADDQ $0x03, AX 3535 JMP memmove_long_match_emit_encodeBlockAsm10B 3536 3537two_bytes_match_emit_encodeBlockAsm10B: 3538 MOVB $0xf0, (AX) 3539 MOVB R8, 1(AX) 3540 ADDQ $0x02, AX 3541 CMPL R8, $0x40 3542 JL memmove_match_emit_encodeBlockAsm10B 3543 JMP memmove_long_match_emit_encodeBlockAsm10B 3544 3545one_byte_match_emit_encodeBlockAsm10B: 3546 SHLB $0x02, R8 3547 MOVB R8, (AX) 3548 ADDQ $0x01, AX 3549 3550memmove_match_emit_encodeBlockAsm10B: 3551 LEAQ (AX)(R9*1), R8 3552 3553 // genMemMoveShort 3554 CMPQ R9, $0x08 3555 JLE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 3556 CMPQ R9, $0x10 3557 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 3558 CMPQ R9, $0x20 3559 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 3560 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 3561 3562emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: 3563 MOVQ (DI), R10 3564 MOVQ R10, (AX) 3565 JMP memmove_end_copy_match_emit_encodeBlockAsm10B 3566 3567emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: 3568 MOVQ (DI), R10 3569 MOVQ -8(DI)(R9*1), DI 3570 MOVQ R10, (AX) 3571 MOVQ DI, -8(AX)(R9*1) 3572 JMP memmove_end_copy_match_emit_encodeBlockAsm10B 3573 3574emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: 3575 MOVOU (DI), X0 3576 MOVOU -16(DI)(R9*1), X1 3577 MOVOU X0, (AX) 3578 MOVOU X1, -16(AX)(R9*1) 3579 JMP memmove_end_copy_match_emit_encodeBlockAsm10B 3580 3581emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: 3582 MOVOU (DI), X0 3583 MOVOU 16(DI), X1 3584 MOVOU -32(DI)(R9*1), X2 3585 MOVOU -16(DI)(R9*1), X3 3586 MOVOU X0, (AX) 3587 MOVOU X1, 16(AX) 3588 MOVOU X2, -32(AX)(R9*1) 3589 MOVOU X3, -16(AX)(R9*1) 3590 3591memmove_end_copy_match_emit_encodeBlockAsm10B: 3592 MOVQ R8, AX 3593 JMP emit_literal_done_match_emit_encodeBlockAsm10B 3594 3595memmove_long_match_emit_encodeBlockAsm10B: 3596 LEAQ (AX)(R9*1), R8 3597 3598 // genMemMoveLong 3599 MOVOU (DI), X0 3600 MOVOU 16(DI), X1 3601 MOVOU -32(DI)(R9*1), X2 3602 MOVOU -16(DI)(R9*1), X3 3603 MOVQ R9, R11 3604 SHRQ $0x05, R11 3605 MOVQ AX, R10 3606 ANDL $0x0000001f, R10 3607 MOVQ $0x00000040, R12 3608 SUBQ R10, R12 3609 DECQ R11 3610 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 3611 LEAQ -32(DI)(R12*1), R10 3612 LEAQ -32(AX)(R12*1), R13 3613 3614emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: 3615 MOVOU (R10), X4 3616 MOVOU 16(R10), X5 3617 MOVOA X4, (R13) 3618 MOVOA X5, 16(R13) 3619 ADDQ $0x20, R13 3620 ADDQ $0x20, R10 3621 ADDQ $0x20, R12 3622 DECQ R11 3623 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back 3624 3625emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: 3626 MOVOU -32(DI)(R12*1), X4 3627 MOVOU -16(DI)(R12*1), X5 3628 MOVOA X4, -32(AX)(R12*1) 3629 MOVOA X5, -16(AX)(R12*1) 3630 ADDQ $0x20, R12 3631 CMPQ R9, R12 3632 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 3633 MOVOU X0, (AX) 3634 MOVOU X1, 16(AX) 3635 MOVOU X2, -32(AX)(R9*1) 3636 MOVOU X3, -16(AX)(R9*1) 3637 MOVQ R8, AX 3638 3639emit_literal_done_match_emit_encodeBlockAsm10B: 3640match_nolit_loop_encodeBlockAsm10B: 3641 MOVL CX, DI 3642 SUBL SI, DI 3643 MOVL DI, 16(SP) 3644 ADDL $0x04, CX 3645 ADDL $0x04, SI 3646 MOVQ src_len+32(FP), DI 3647 SUBL CX, DI 3648 LEAQ (DX)(CX*1), R8 3649 LEAQ (DX)(SI*1), SI 3650 3651 // matchLen 3652 XORL R10, R10 3653 CMPL DI, $0x08 3654 JL matchlen_single_match_nolit_encodeBlockAsm10B 3655 3656matchlen_loopback_match_nolit_encodeBlockAsm10B: 3657 MOVQ (R8)(R10*1), R9 3658 XORQ (SI)(R10*1), R9 3659 TESTQ R9, R9 3660 JZ matchlen_loop_match_nolit_encodeBlockAsm10B 3661 BSFQ R9, R9 3662 SARQ $0x03, R9 3663 LEAL (R10)(R9*1), R10 3664 JMP match_nolit_end_encodeBlockAsm10B 3665 3666matchlen_loop_match_nolit_encodeBlockAsm10B: 3667 LEAL -8(DI), DI 3668 LEAL 8(R10), R10 3669 CMPL DI, $0x08 3670 JGE matchlen_loopback_match_nolit_encodeBlockAsm10B 3671 3672matchlen_single_match_nolit_encodeBlockAsm10B: 3673 TESTL DI, DI 3674 JZ match_nolit_end_encodeBlockAsm10B 3675 3676matchlen_single_loopback_match_nolit_encodeBlockAsm10B: 3677 MOVB (R8)(R10*1), R9 3678 CMPB (SI)(R10*1), R9 3679 JNE match_nolit_end_encodeBlockAsm10B 3680 LEAL 1(R10), R10 3681 DECL DI 3682 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10B 3683 3684match_nolit_end_encodeBlockAsm10B: 3685 ADDL R10, CX 3686 MOVL 16(SP), SI 3687 ADDL $0x04, R10 3688 MOVL CX, 12(SP) 3689 3690 // emitCopy 3691two_byte_offset_match_nolit_encodeBlockAsm10B: 3692 CMPL R10, $0x40 3693 JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B 3694 MOVB $0xee, (AX) 3695 MOVW SI, 1(AX) 3696 LEAL -60(R10), R10 3697 ADDQ $0x03, AX 3698 3699 // emitRepeat 3700 MOVL R10, DI 3701 LEAL -4(R10), R10 3702 CMPL DI, $0x08 3703 JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short 3704 CMPL DI, $0x0c 3705 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short 3706 CMPL SI, $0x00000800 3707 JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short 3708 3709cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: 3710 CMPL R10, $0x00000104 3711 JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short 3712 LEAL -256(R10), R10 3713 MOVW $0x0019, (AX) 3714 MOVW R10, 2(AX) 3715 ADDQ $0x04, AX 3716 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3717 3718repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: 3719 LEAL -4(R10), R10 3720 MOVW $0x0015, (AX) 3721 MOVB R10, 2(AX) 3722 ADDQ $0x03, AX 3723 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3724 3725repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: 3726 SHLL $0x02, R10 3727 ORL $0x01, R10 3728 MOVW R10, (AX) 3729 ADDQ $0x02, AX 3730 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3731 3732repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: 3733 XORQ DI, DI 3734 LEAL 1(DI)(R10*4), R10 3735 MOVB SI, 1(AX) 3736 SARL $0x08, SI 3737 SHLL $0x05, SI 3738 ORL SI, R10 3739 MOVB R10, (AX) 3740 ADDQ $0x02, AX 3741 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3742 JMP two_byte_offset_match_nolit_encodeBlockAsm10B 3743 3744two_byte_offset_short_match_nolit_encodeBlockAsm10B: 3745 CMPL R10, $0x0c 3746 JGE emit_copy_three_match_nolit_encodeBlockAsm10B 3747 CMPL SI, $0x00000800 3748 JGE emit_copy_three_match_nolit_encodeBlockAsm10B 3749 MOVB $0x01, BL 3750 LEAL -16(BX)(R10*4), R10 3751 MOVB SI, 1(AX) 3752 SHRL $0x08, SI 3753 SHLL $0x05, SI 3754 ORL SI, R10 3755 MOVB R10, (AX) 3756 ADDQ $0x02, AX 3757 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3758 3759emit_copy_three_match_nolit_encodeBlockAsm10B: 3760 MOVB $0x02, BL 3761 LEAL -4(BX)(R10*4), R10 3762 MOVB R10, (AX) 3763 MOVW SI, 1(AX) 3764 ADDQ $0x03, AX 3765 3766match_nolit_emitcopy_end_encodeBlockAsm10B: 3767 CMPL CX, 8(SP) 3768 JGE emit_remainder_encodeBlockAsm10B 3769 MOVQ -2(DX)(CX*1), DI 3770 CMPQ AX, (SP) 3771 JL match_nolit_dst_ok_encodeBlockAsm10B 3772 MOVQ $0x00000000, ret+48(FP) 3773 RET 3774 3775match_nolit_dst_ok_encodeBlockAsm10B: 3776 MOVQ $0x9e3779b1, R9 3777 MOVQ DI, R8 3778 SHRQ $0x10, DI 3779 MOVQ DI, SI 3780 SHLQ $0x20, R8 3781 IMULQ R9, R8 3782 SHRQ $0x36, R8 3783 SHLQ $0x20, SI 3784 IMULQ R9, SI 3785 SHRQ $0x36, SI 3786 LEAL -2(CX), R9 3787 LEAQ 24(SP)(SI*4), R10 3788 MOVL (R10), SI 3789 MOVL R9, 24(SP)(R8*4) 3790 MOVL CX, (R10) 3791 CMPL (DX)(SI*1), DI 3792 JEQ match_nolit_loop_encodeBlockAsm10B 3793 INCL CX 3794 JMP search_loop_encodeBlockAsm10B 3795 3796emit_remainder_encodeBlockAsm10B: 3797 MOVQ src_len+32(FP), CX 3798 SUBL 12(SP), CX 3799 LEAQ 3(AX)(CX*1), CX 3800 CMPQ CX, (SP) 3801 JL emit_remainder_ok_encodeBlockAsm10B 3802 MOVQ $0x00000000, ret+48(FP) 3803 RET 3804 3805emit_remainder_ok_encodeBlockAsm10B: 3806 MOVQ src_len+32(FP), CX 3807 MOVL 12(SP), BX 3808 CMPL BX, CX 3809 JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B 3810 MOVL CX, SI 3811 MOVL CX, 12(SP) 3812 LEAQ (DX)(BX*1), CX 3813 SUBL BX, SI 3814 LEAL -1(SI), DX 3815 CMPL DX, $0x3c 3816 JLT one_byte_emit_remainder_encodeBlockAsm10B 3817 CMPL DX, $0x00000100 3818 JLT two_bytes_emit_remainder_encodeBlockAsm10B 3819 MOVB $0xf4, (AX) 3820 MOVW DX, 1(AX) 3821 ADDQ $0x03, AX 3822 JMP memmove_long_emit_remainder_encodeBlockAsm10B 3823 3824two_bytes_emit_remainder_encodeBlockAsm10B: 3825 MOVB $0xf0, (AX) 3826 MOVB DL, 1(AX) 3827 ADDQ $0x02, AX 3828 CMPL DX, $0x40 3829 JL memmove_emit_remainder_encodeBlockAsm10B 3830 JMP memmove_long_emit_remainder_encodeBlockAsm10B 3831 3832one_byte_emit_remainder_encodeBlockAsm10B: 3833 SHLB $0x02, DL 3834 MOVB DL, (AX) 3835 ADDQ $0x01, AX 3836 3837memmove_emit_remainder_encodeBlockAsm10B: 3838 LEAQ (AX)(SI*1), DX 3839 MOVL SI, BX 3840 3841 // genMemMoveShort 3842 CMPQ BX, $0x08 3843 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8 3844 CMPQ BX, $0x10 3845 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16 3846 CMPQ BX, $0x20 3847 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32 3848 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 3849 3850emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8: 3851 MOVQ (CX), SI 3852 MOVQ SI, (AX) 3853 JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B 3854 3855emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: 3856 MOVQ (CX), SI 3857 MOVQ -8(CX)(BX*1), CX 3858 MOVQ SI, (AX) 3859 MOVQ CX, -8(AX)(BX*1) 3860 JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B 3861 3862emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: 3863 MOVOU (CX), X0 3864 MOVOU -16(CX)(BX*1), X1 3865 MOVOU X0, (AX) 3866 MOVOU X1, -16(AX)(BX*1) 3867 JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B 3868 3869emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: 3870 MOVOU (CX), X0 3871 MOVOU 16(CX), X1 3872 MOVOU -32(CX)(BX*1), X2 3873 MOVOU -16(CX)(BX*1), X3 3874 MOVOU X0, (AX) 3875 MOVOU X1, 16(AX) 3876 MOVOU X2, -32(AX)(BX*1) 3877 MOVOU X3, -16(AX)(BX*1) 3878 3879memmove_end_copy_emit_remainder_encodeBlockAsm10B: 3880 MOVQ DX, AX 3881 JMP emit_literal_done_emit_remainder_encodeBlockAsm10B 3882 3883memmove_long_emit_remainder_encodeBlockAsm10B: 3884 LEAQ (AX)(SI*1), DX 3885 MOVL SI, BX 3886 3887 // genMemMoveLong 3888 MOVOU (CX), X0 3889 MOVOU 16(CX), X1 3890 MOVOU -32(CX)(BX*1), X2 3891 MOVOU -16(CX)(BX*1), X3 3892 MOVQ BX, DI 3893 SHRQ $0x05, DI 3894 MOVQ AX, SI 3895 ANDL $0x0000001f, SI 3896 MOVQ $0x00000040, R8 3897 SUBQ SI, R8 3898 DECQ DI 3899 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 3900 LEAQ -32(CX)(R8*1), SI 3901 LEAQ -32(AX)(R8*1), R9 3902 3903emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: 3904 MOVOU (SI), X4 3905 MOVOU 16(SI), X5 3906 MOVOA X4, (R9) 3907 MOVOA X5, 16(R9) 3908 ADDQ $0x20, R9 3909 ADDQ $0x20, SI 3910 ADDQ $0x20, R8 3911 DECQ DI 3912 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back 3913 3914emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: 3915 MOVOU -32(CX)(R8*1), X4 3916 MOVOU -16(CX)(R8*1), X5 3917 MOVOA X4, -32(AX)(R8*1) 3918 MOVOA X5, -16(AX)(R8*1) 3919 ADDQ $0x20, R8 3920 CMPQ BX, R8 3921 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 3922 MOVOU X0, (AX) 3923 MOVOU X1, 16(AX) 3924 MOVOU X2, -32(AX)(BX*1) 3925 MOVOU X3, -16(AX)(BX*1) 3926 MOVQ DX, AX 3927 3928emit_literal_done_emit_remainder_encodeBlockAsm10B: 3929 MOVQ dst_base+0(FP), CX 3930 SUBQ CX, AX 3931 MOVQ AX, ret+48(FP) 3932 RET 3933 3934// func encodeBlockAsm8B(dst []byte, src []byte) int 3935// Requires: SSE2 3936TEXT ·encodeBlockAsm8B(SB), $1048-56 3937 MOVQ dst_base+0(FP), AX 3938 MOVQ $0x00000008, CX 3939 LEAQ 24(SP), DX 3940 PXOR X0, X0 3941 3942zero_loop_encodeBlockAsm8B: 3943 MOVOU X0, (DX) 3944 MOVOU X0, 16(DX) 3945 MOVOU X0, 32(DX) 3946 MOVOU X0, 48(DX) 3947 MOVOU X0, 64(DX) 3948 MOVOU X0, 80(DX) 3949 MOVOU X0, 96(DX) 3950 MOVOU X0, 112(DX) 3951 ADDQ $0x80, DX 3952 DECQ CX 3953 JNZ zero_loop_encodeBlockAsm8B 3954 MOVL $0x00000000, 12(SP) 3955 MOVQ src_len+32(FP), CX 3956 LEAQ -9(CX), DX 3957 LEAQ -8(CX), SI 3958 MOVL SI, 8(SP) 3959 SHRQ $0x05, CX 3960 SUBL CX, DX 3961 LEAQ (AX)(DX*1), DX 3962 MOVQ DX, (SP) 3963 MOVL $0x00000001, CX 3964 MOVL CX, 16(SP) 3965 MOVQ src_base+24(FP), DX 3966 3967search_loop_encodeBlockAsm8B: 3968 MOVL CX, SI 3969 SUBL 12(SP), SI 3970 SHRL $0x04, SI 3971 LEAL 4(CX)(SI*1), SI 3972 CMPL SI, 8(SP) 3973 JGE emit_remainder_encodeBlockAsm8B 3974 MOVQ (DX)(CX*1), DI 3975 MOVL SI, 20(SP) 3976 MOVQ $0x9e3779b1, R9 3977 MOVQ DI, R10 3978 MOVQ DI, R11 3979 SHRQ $0x08, R11 3980 SHLQ $0x20, R10 3981 IMULQ R9, R10 3982 SHRQ $0x38, R10 3983 SHLQ $0x20, R11 3984 IMULQ R9, R11 3985 SHRQ $0x38, R11 3986 MOVL 24(SP)(R10*4), SI 3987 MOVL 24(SP)(R11*4), R8 3988 MOVL CX, 24(SP)(R10*4) 3989 LEAL 1(CX), R10 3990 MOVL R10, 24(SP)(R11*4) 3991 MOVQ DI, R10 3992 SHRQ $0x10, R10 3993 SHLQ $0x20, R10 3994 IMULQ R9, R10 3995 SHRQ $0x38, R10 3996 MOVL CX, R9 3997 SUBL 16(SP), R9 3998 MOVL 1(DX)(R9*1), R11 3999 MOVQ DI, R9 4000 SHRQ $0x08, R9 4001 CMPL R9, R11 4002 JNE no_repeat_found_encodeBlockAsm8B 4003 LEAL 1(CX), DI 4004 MOVL 12(SP), R8 4005 MOVL DI, SI 4006 SUBL 16(SP), SI 4007 JZ repeat_extend_back_end_encodeBlockAsm8B 4008 4009repeat_extend_back_loop_encodeBlockAsm8B: 4010 CMPL DI, R8 4011 JLE repeat_extend_back_end_encodeBlockAsm8B 4012 MOVB -1(DX)(SI*1), BL 4013 MOVB -1(DX)(DI*1), R9 4014 CMPB BL, R9 4015 JNE repeat_extend_back_end_encodeBlockAsm8B 4016 LEAL -1(DI), DI 4017 DECL SI 4018 JNZ repeat_extend_back_loop_encodeBlockAsm8B 4019 4020repeat_extend_back_end_encodeBlockAsm8B: 4021 MOVL 12(SP), SI 4022 CMPL SI, DI 4023 JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B 4024 MOVL DI, R9 4025 MOVL DI, 12(SP) 4026 LEAQ (DX)(SI*1), R10 4027 SUBL SI, R9 4028 LEAL -1(R9), SI 4029 CMPL SI, $0x3c 4030 JLT one_byte_repeat_emit_encodeBlockAsm8B 4031 CMPL SI, $0x00000100 4032 JLT two_bytes_repeat_emit_encodeBlockAsm8B 4033 MOVB $0xf4, (AX) 4034 MOVW SI, 1(AX) 4035 ADDQ $0x03, AX 4036 JMP memmove_long_repeat_emit_encodeBlockAsm8B 4037 4038two_bytes_repeat_emit_encodeBlockAsm8B: 4039 MOVB $0xf0, (AX) 4040 MOVB SI, 1(AX) 4041 ADDQ $0x02, AX 4042 CMPL SI, $0x40 4043 JL memmove_repeat_emit_encodeBlockAsm8B 4044 JMP memmove_long_repeat_emit_encodeBlockAsm8B 4045 4046one_byte_repeat_emit_encodeBlockAsm8B: 4047 SHLB $0x02, SI 4048 MOVB SI, (AX) 4049 ADDQ $0x01, AX 4050 4051memmove_repeat_emit_encodeBlockAsm8B: 4052 LEAQ (AX)(R9*1), SI 4053 4054 // genMemMoveShort 4055 CMPQ R9, $0x08 4056 JLE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 4057 CMPQ R9, $0x10 4058 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 4059 CMPQ R9, $0x20 4060 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 4061 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 4062 4063emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: 4064 MOVQ (R10), R11 4065 MOVQ R11, (AX) 4066 JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B 4067 4068emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: 4069 MOVQ (R10), R11 4070 MOVQ -8(R10)(R9*1), R10 4071 MOVQ R11, (AX) 4072 MOVQ R10, -8(AX)(R9*1) 4073 JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B 4074 4075emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: 4076 MOVOU (R10), X0 4077 MOVOU -16(R10)(R9*1), X1 4078 MOVOU X0, (AX) 4079 MOVOU X1, -16(AX)(R9*1) 4080 JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B 4081 4082emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: 4083 MOVOU (R10), X0 4084 MOVOU 16(R10), X1 4085 MOVOU -32(R10)(R9*1), X2 4086 MOVOU -16(R10)(R9*1), X3 4087 MOVOU X0, (AX) 4088 MOVOU X1, 16(AX) 4089 MOVOU X2, -32(AX)(R9*1) 4090 MOVOU X3, -16(AX)(R9*1) 4091 4092memmove_end_copy_repeat_emit_encodeBlockAsm8B: 4093 MOVQ SI, AX 4094 JMP emit_literal_done_repeat_emit_encodeBlockAsm8B 4095 4096memmove_long_repeat_emit_encodeBlockAsm8B: 4097 LEAQ (AX)(R9*1), SI 4098 4099 // genMemMoveLong 4100 MOVOU (R10), X0 4101 MOVOU 16(R10), X1 4102 MOVOU -32(R10)(R9*1), X2 4103 MOVOU -16(R10)(R9*1), X3 4104 MOVQ R9, R12 4105 SHRQ $0x05, R12 4106 MOVQ AX, R11 4107 ANDL $0x0000001f, R11 4108 MOVQ $0x00000040, R13 4109 SUBQ R11, R13 4110 DECQ R12 4111 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 4112 LEAQ -32(R10)(R13*1), R11 4113 LEAQ -32(AX)(R13*1), R14 4114 4115emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: 4116 MOVOU (R11), X4 4117 MOVOU 16(R11), X5 4118 MOVOA X4, (R14) 4119 MOVOA X5, 16(R14) 4120 ADDQ $0x20, R14 4121 ADDQ $0x20, R11 4122 ADDQ $0x20, R13 4123 DECQ R12 4124 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back 4125 4126emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: 4127 MOVOU -32(R10)(R13*1), X4 4128 MOVOU -16(R10)(R13*1), X5 4129 MOVOA X4, -32(AX)(R13*1) 4130 MOVOA X5, -16(AX)(R13*1) 4131 ADDQ $0x20, R13 4132 CMPQ R9, R13 4133 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 4134 MOVOU X0, (AX) 4135 MOVOU X1, 16(AX) 4136 MOVOU X2, -32(AX)(R9*1) 4137 MOVOU X3, -16(AX)(R9*1) 4138 MOVQ SI, AX 4139 4140emit_literal_done_repeat_emit_encodeBlockAsm8B: 4141 ADDL $0x05, CX 4142 MOVL CX, SI 4143 SUBL 16(SP), SI 4144 MOVQ src_len+32(FP), R9 4145 SUBL CX, R9 4146 LEAQ (DX)(CX*1), R10 4147 LEAQ (DX)(SI*1), SI 4148 4149 // matchLen 4150 XORL R12, R12 4151 CMPL R9, $0x08 4152 JL matchlen_single_repeat_extend_encodeBlockAsm8B 4153 4154matchlen_loopback_repeat_extend_encodeBlockAsm8B: 4155 MOVQ (R10)(R12*1), R11 4156 XORQ (SI)(R12*1), R11 4157 TESTQ R11, R11 4158 JZ matchlen_loop_repeat_extend_encodeBlockAsm8B 4159 BSFQ R11, R11 4160 SARQ $0x03, R11 4161 LEAL (R12)(R11*1), R12 4162 JMP repeat_extend_forward_end_encodeBlockAsm8B 4163 4164matchlen_loop_repeat_extend_encodeBlockAsm8B: 4165 LEAL -8(R9), R9 4166 LEAL 8(R12), R12 4167 CMPL R9, $0x08 4168 JGE matchlen_loopback_repeat_extend_encodeBlockAsm8B 4169 4170matchlen_single_repeat_extend_encodeBlockAsm8B: 4171 TESTL R9, R9 4172 JZ repeat_extend_forward_end_encodeBlockAsm8B 4173 4174matchlen_single_loopback_repeat_extend_encodeBlockAsm8B: 4175 MOVB (R10)(R12*1), R11 4176 CMPB (SI)(R12*1), R11 4177 JNE repeat_extend_forward_end_encodeBlockAsm8B 4178 LEAL 1(R12), R12 4179 DECL R9 4180 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm8B 4181 4182repeat_extend_forward_end_encodeBlockAsm8B: 4183 ADDL R12, CX 4184 MOVL CX, SI 4185 SUBL DI, SI 4186 MOVL 16(SP), DI 4187 TESTL R8, R8 4188 JZ repeat_as_copy_encodeBlockAsm8B 4189 4190 // emitRepeat 4191 MOVL SI, DI 4192 LEAL -4(SI), SI 4193 CMPL DI, $0x08 4194 JLE repeat_two_match_repeat_encodeBlockAsm8B 4195 CMPL DI, $0x0c 4196 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B 4197 4198cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: 4199 CMPL SI, $0x00000104 4200 JLT repeat_three_match_repeat_encodeBlockAsm8B 4201 LEAL -256(SI), SI 4202 MOVW $0x0019, (AX) 4203 MOVW SI, 2(AX) 4204 ADDQ $0x04, AX 4205 JMP repeat_end_emit_encodeBlockAsm8B 4206 4207repeat_three_match_repeat_encodeBlockAsm8B: 4208 LEAL -4(SI), SI 4209 MOVW $0x0015, (AX) 4210 MOVB SI, 2(AX) 4211 ADDQ $0x03, AX 4212 JMP repeat_end_emit_encodeBlockAsm8B 4213 4214repeat_two_match_repeat_encodeBlockAsm8B: 4215 SHLL $0x02, SI 4216 ORL $0x01, SI 4217 MOVW SI, (AX) 4218 ADDQ $0x02, AX 4219 JMP repeat_end_emit_encodeBlockAsm8B 4220 XORQ R8, R8 4221 LEAL 1(R8)(SI*4), SI 4222 MOVB DI, 1(AX) 4223 SARL $0x08, DI 4224 SHLL $0x05, DI 4225 ORL DI, SI 4226 MOVB SI, (AX) 4227 ADDQ $0x02, AX 4228 JMP repeat_end_emit_encodeBlockAsm8B 4229 4230repeat_as_copy_encodeBlockAsm8B: 4231 // emitCopy 4232two_byte_offset_repeat_as_copy_encodeBlockAsm8B: 4233 CMPL SI, $0x40 4234 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B 4235 MOVB $0xee, (AX) 4236 MOVW DI, 1(AX) 4237 LEAL -60(SI), SI 4238 ADDQ $0x03, AX 4239 4240 // emitRepeat 4241 MOVL SI, DI 4242 LEAL -4(SI), SI 4243 CMPL DI, $0x08 4244 JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short 4245 CMPL DI, $0x0c 4246 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short 4247 4248cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: 4249 CMPL SI, $0x00000104 4250 JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short 4251 LEAL -256(SI), SI 4252 MOVW $0x0019, (AX) 4253 MOVW SI, 2(AX) 4254 ADDQ $0x04, AX 4255 JMP repeat_end_emit_encodeBlockAsm8B 4256 4257repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: 4258 LEAL -4(SI), SI 4259 MOVW $0x0015, (AX) 4260 MOVB SI, 2(AX) 4261 ADDQ $0x03, AX 4262 JMP repeat_end_emit_encodeBlockAsm8B 4263 4264repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: 4265 SHLL $0x02, SI 4266 ORL $0x01, SI 4267 MOVW SI, (AX) 4268 ADDQ $0x02, AX 4269 JMP repeat_end_emit_encodeBlockAsm8B 4270 XORQ R8, R8 4271 LEAL 1(R8)(SI*4), SI 4272 MOVB DI, 1(AX) 4273 SARL $0x08, DI 4274 SHLL $0x05, DI 4275 ORL DI, SI 4276 MOVB SI, (AX) 4277 ADDQ $0x02, AX 4278 JMP repeat_end_emit_encodeBlockAsm8B 4279 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B 4280 4281two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: 4282 CMPL SI, $0x0c 4283 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B 4284 MOVB $0x01, BL 4285 LEAL -16(BX)(SI*4), SI 4286 MOVB DI, 1(AX) 4287 SHRL $0x08, DI 4288 SHLL $0x05, DI 4289 ORL DI, SI 4290 MOVB SI, (AX) 4291 ADDQ $0x02, AX 4292 JMP repeat_end_emit_encodeBlockAsm8B 4293 4294emit_copy_three_repeat_as_copy_encodeBlockAsm8B: 4295 MOVB $0x02, BL 4296 LEAL -4(BX)(SI*4), SI 4297 MOVB SI, (AX) 4298 MOVW DI, 1(AX) 4299 ADDQ $0x03, AX 4300 4301repeat_end_emit_encodeBlockAsm8B: 4302 MOVL CX, 12(SP) 4303 JMP search_loop_encodeBlockAsm8B 4304 4305no_repeat_found_encodeBlockAsm8B: 4306 CMPL (DX)(SI*1), DI 4307 JEQ candidate_match_encodeBlockAsm8B 4308 SHRQ $0x08, DI 4309 MOVL 24(SP)(R10*4), SI 4310 LEAL 2(CX), R9 4311 CMPL (DX)(R8*1), DI 4312 JEQ candidate2_match_encodeBlockAsm8B 4313 MOVL R9, 24(SP)(R10*4) 4314 SHRQ $0x08, DI 4315 CMPL (DX)(SI*1), DI 4316 JEQ candidate3_match_encodeBlockAsm8B 4317 MOVL 20(SP), CX 4318 JMP search_loop_encodeBlockAsm8B 4319 4320candidate3_match_encodeBlockAsm8B: 4321 ADDL $0x02, CX 4322 JMP candidate_match_encodeBlockAsm8B 4323 4324candidate2_match_encodeBlockAsm8B: 4325 MOVL R9, 24(SP)(R10*4) 4326 INCL CX 4327 MOVL R8, SI 4328 4329candidate_match_encodeBlockAsm8B: 4330 MOVL 12(SP), DI 4331 TESTL SI, SI 4332 JZ match_extend_back_end_encodeBlockAsm8B 4333 4334match_extend_back_loop_encodeBlockAsm8B: 4335 CMPL CX, DI 4336 JLE match_extend_back_end_encodeBlockAsm8B 4337 MOVB -1(DX)(SI*1), BL 4338 MOVB -1(DX)(CX*1), R8 4339 CMPB BL, R8 4340 JNE match_extend_back_end_encodeBlockAsm8B 4341 LEAL -1(CX), CX 4342 DECL SI 4343 JZ match_extend_back_end_encodeBlockAsm8B 4344 JMP match_extend_back_loop_encodeBlockAsm8B 4345 4346match_extend_back_end_encodeBlockAsm8B: 4347 MOVL CX, DI 4348 SUBL 12(SP), DI 4349 LEAQ 3(AX)(DI*1), DI 4350 CMPQ DI, (SP) 4351 JL match_dst_size_check_encodeBlockAsm8B 4352 MOVQ $0x00000000, ret+48(FP) 4353 RET 4354 4355match_dst_size_check_encodeBlockAsm8B: 4356 MOVL CX, DI 4357 MOVL 12(SP), R8 4358 CMPL R8, DI 4359 JEQ emit_literal_done_match_emit_encodeBlockAsm8B 4360 MOVL DI, R9 4361 MOVL DI, 12(SP) 4362 LEAQ (DX)(R8*1), DI 4363 SUBL R8, R9 4364 LEAL -1(R9), R8 4365 CMPL R8, $0x3c 4366 JLT one_byte_match_emit_encodeBlockAsm8B 4367 CMPL R8, $0x00000100 4368 JLT two_bytes_match_emit_encodeBlockAsm8B 4369 MOVB $0xf4, (AX) 4370 MOVW R8, 1(AX) 4371 ADDQ $0x03, AX 4372 JMP memmove_long_match_emit_encodeBlockAsm8B 4373 4374two_bytes_match_emit_encodeBlockAsm8B: 4375 MOVB $0xf0, (AX) 4376 MOVB R8, 1(AX) 4377 ADDQ $0x02, AX 4378 CMPL R8, $0x40 4379 JL memmove_match_emit_encodeBlockAsm8B 4380 JMP memmove_long_match_emit_encodeBlockAsm8B 4381 4382one_byte_match_emit_encodeBlockAsm8B: 4383 SHLB $0x02, R8 4384 MOVB R8, (AX) 4385 ADDQ $0x01, AX 4386 4387memmove_match_emit_encodeBlockAsm8B: 4388 LEAQ (AX)(R9*1), R8 4389 4390 // genMemMoveShort 4391 CMPQ R9, $0x08 4392 JLE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 4393 CMPQ R9, $0x10 4394 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 4395 CMPQ R9, $0x20 4396 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 4397 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 4398 4399emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: 4400 MOVQ (DI), R10 4401 MOVQ R10, (AX) 4402 JMP memmove_end_copy_match_emit_encodeBlockAsm8B 4403 4404emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: 4405 MOVQ (DI), R10 4406 MOVQ -8(DI)(R9*1), DI 4407 MOVQ R10, (AX) 4408 MOVQ DI, -8(AX)(R9*1) 4409 JMP memmove_end_copy_match_emit_encodeBlockAsm8B 4410 4411emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: 4412 MOVOU (DI), X0 4413 MOVOU -16(DI)(R9*1), X1 4414 MOVOU X0, (AX) 4415 MOVOU X1, -16(AX)(R9*1) 4416 JMP memmove_end_copy_match_emit_encodeBlockAsm8B 4417 4418emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: 4419 MOVOU (DI), X0 4420 MOVOU 16(DI), X1 4421 MOVOU -32(DI)(R9*1), X2 4422 MOVOU -16(DI)(R9*1), X3 4423 MOVOU X0, (AX) 4424 MOVOU X1, 16(AX) 4425 MOVOU X2, -32(AX)(R9*1) 4426 MOVOU X3, -16(AX)(R9*1) 4427 4428memmove_end_copy_match_emit_encodeBlockAsm8B: 4429 MOVQ R8, AX 4430 JMP emit_literal_done_match_emit_encodeBlockAsm8B 4431 4432memmove_long_match_emit_encodeBlockAsm8B: 4433 LEAQ (AX)(R9*1), R8 4434 4435 // genMemMoveLong 4436 MOVOU (DI), X0 4437 MOVOU 16(DI), X1 4438 MOVOU -32(DI)(R9*1), X2 4439 MOVOU -16(DI)(R9*1), X3 4440 MOVQ R9, R11 4441 SHRQ $0x05, R11 4442 MOVQ AX, R10 4443 ANDL $0x0000001f, R10 4444 MOVQ $0x00000040, R12 4445 SUBQ R10, R12 4446 DECQ R11 4447 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 4448 LEAQ -32(DI)(R12*1), R10 4449 LEAQ -32(AX)(R12*1), R13 4450 4451emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: 4452 MOVOU (R10), X4 4453 MOVOU 16(R10), X5 4454 MOVOA X4, (R13) 4455 MOVOA X5, 16(R13) 4456 ADDQ $0x20, R13 4457 ADDQ $0x20, R10 4458 ADDQ $0x20, R12 4459 DECQ R11 4460 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back 4461 4462emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: 4463 MOVOU -32(DI)(R12*1), X4 4464 MOVOU -16(DI)(R12*1), X5 4465 MOVOA X4, -32(AX)(R12*1) 4466 MOVOA X5, -16(AX)(R12*1) 4467 ADDQ $0x20, R12 4468 CMPQ R9, R12 4469 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 4470 MOVOU X0, (AX) 4471 MOVOU X1, 16(AX) 4472 MOVOU X2, -32(AX)(R9*1) 4473 MOVOU X3, -16(AX)(R9*1) 4474 MOVQ R8, AX 4475 4476emit_literal_done_match_emit_encodeBlockAsm8B: 4477match_nolit_loop_encodeBlockAsm8B: 4478 MOVL CX, DI 4479 SUBL SI, DI 4480 MOVL DI, 16(SP) 4481 ADDL $0x04, CX 4482 ADDL $0x04, SI 4483 MOVQ src_len+32(FP), DI 4484 SUBL CX, DI 4485 LEAQ (DX)(CX*1), R8 4486 LEAQ (DX)(SI*1), SI 4487 4488 // matchLen 4489 XORL R10, R10 4490 CMPL DI, $0x08 4491 JL matchlen_single_match_nolit_encodeBlockAsm8B 4492 4493matchlen_loopback_match_nolit_encodeBlockAsm8B: 4494 MOVQ (R8)(R10*1), R9 4495 XORQ (SI)(R10*1), R9 4496 TESTQ R9, R9 4497 JZ matchlen_loop_match_nolit_encodeBlockAsm8B 4498 BSFQ R9, R9 4499 SARQ $0x03, R9 4500 LEAL (R10)(R9*1), R10 4501 JMP match_nolit_end_encodeBlockAsm8B 4502 4503matchlen_loop_match_nolit_encodeBlockAsm8B: 4504 LEAL -8(DI), DI 4505 LEAL 8(R10), R10 4506 CMPL DI, $0x08 4507 JGE matchlen_loopback_match_nolit_encodeBlockAsm8B 4508 4509matchlen_single_match_nolit_encodeBlockAsm8B: 4510 TESTL DI, DI 4511 JZ match_nolit_end_encodeBlockAsm8B 4512 4513matchlen_single_loopback_match_nolit_encodeBlockAsm8B: 4514 MOVB (R8)(R10*1), R9 4515 CMPB (SI)(R10*1), R9 4516 JNE match_nolit_end_encodeBlockAsm8B 4517 LEAL 1(R10), R10 4518 DECL DI 4519 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8B 4520 4521match_nolit_end_encodeBlockAsm8B: 4522 ADDL R10, CX 4523 MOVL 16(SP), SI 4524 ADDL $0x04, R10 4525 MOVL CX, 12(SP) 4526 4527 // emitCopy 4528two_byte_offset_match_nolit_encodeBlockAsm8B: 4529 CMPL R10, $0x40 4530 JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B 4531 MOVB $0xee, (AX) 4532 MOVW SI, 1(AX) 4533 LEAL -60(R10), R10 4534 ADDQ $0x03, AX 4535 4536 // emitRepeat 4537 MOVL R10, SI 4538 LEAL -4(R10), R10 4539 CMPL SI, $0x08 4540 JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short 4541 CMPL SI, $0x0c 4542 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short 4543 4544cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: 4545 CMPL R10, $0x00000104 4546 JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short 4547 LEAL -256(R10), R10 4548 MOVW $0x0019, (AX) 4549 MOVW R10, 2(AX) 4550 ADDQ $0x04, AX 4551 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4552 4553repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: 4554 LEAL -4(R10), R10 4555 MOVW $0x0015, (AX) 4556 MOVB R10, 2(AX) 4557 ADDQ $0x03, AX 4558 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4559 4560repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: 4561 SHLL $0x02, R10 4562 ORL $0x01, R10 4563 MOVW R10, (AX) 4564 ADDQ $0x02, AX 4565 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4566 XORQ DI, DI 4567 LEAL 1(DI)(R10*4), R10 4568 MOVB SI, 1(AX) 4569 SARL $0x08, SI 4570 SHLL $0x05, SI 4571 ORL SI, R10 4572 MOVB R10, (AX) 4573 ADDQ $0x02, AX 4574 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4575 JMP two_byte_offset_match_nolit_encodeBlockAsm8B 4576 4577two_byte_offset_short_match_nolit_encodeBlockAsm8B: 4578 CMPL R10, $0x0c 4579 JGE emit_copy_three_match_nolit_encodeBlockAsm8B 4580 MOVB $0x01, BL 4581 LEAL -16(BX)(R10*4), R10 4582 MOVB SI, 1(AX) 4583 SHRL $0x08, SI 4584 SHLL $0x05, SI 4585 ORL SI, R10 4586 MOVB R10, (AX) 4587 ADDQ $0x02, AX 4588 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4589 4590emit_copy_three_match_nolit_encodeBlockAsm8B: 4591 MOVB $0x02, BL 4592 LEAL -4(BX)(R10*4), R10 4593 MOVB R10, (AX) 4594 MOVW SI, 1(AX) 4595 ADDQ $0x03, AX 4596 4597match_nolit_emitcopy_end_encodeBlockAsm8B: 4598 CMPL CX, 8(SP) 4599 JGE emit_remainder_encodeBlockAsm8B 4600 MOVQ -2(DX)(CX*1), DI 4601 CMPQ AX, (SP) 4602 JL match_nolit_dst_ok_encodeBlockAsm8B 4603 MOVQ $0x00000000, ret+48(FP) 4604 RET 4605 4606match_nolit_dst_ok_encodeBlockAsm8B: 4607 MOVQ $0x9e3779b1, R9 4608 MOVQ DI, R8 4609 SHRQ $0x10, DI 4610 MOVQ DI, SI 4611 SHLQ $0x20, R8 4612 IMULQ R9, R8 4613 SHRQ $0x38, R8 4614 SHLQ $0x20, SI 4615 IMULQ R9, SI 4616 SHRQ $0x38, SI 4617 LEAL -2(CX), R9 4618 LEAQ 24(SP)(SI*4), R10 4619 MOVL (R10), SI 4620 MOVL R9, 24(SP)(R8*4) 4621 MOVL CX, (R10) 4622 CMPL (DX)(SI*1), DI 4623 JEQ match_nolit_loop_encodeBlockAsm8B 4624 INCL CX 4625 JMP search_loop_encodeBlockAsm8B 4626 4627emit_remainder_encodeBlockAsm8B: 4628 MOVQ src_len+32(FP), CX 4629 SUBL 12(SP), CX 4630 LEAQ 3(AX)(CX*1), CX 4631 CMPQ CX, (SP) 4632 JL emit_remainder_ok_encodeBlockAsm8B 4633 MOVQ $0x00000000, ret+48(FP) 4634 RET 4635 4636emit_remainder_ok_encodeBlockAsm8B: 4637 MOVQ src_len+32(FP), CX 4638 MOVL 12(SP), BX 4639 CMPL BX, CX 4640 JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B 4641 MOVL CX, SI 4642 MOVL CX, 12(SP) 4643 LEAQ (DX)(BX*1), CX 4644 SUBL BX, SI 4645 LEAL -1(SI), DX 4646 CMPL DX, $0x3c 4647 JLT one_byte_emit_remainder_encodeBlockAsm8B 4648 CMPL DX, $0x00000100 4649 JLT two_bytes_emit_remainder_encodeBlockAsm8B 4650 MOVB $0xf4, (AX) 4651 MOVW DX, 1(AX) 4652 ADDQ $0x03, AX 4653 JMP memmove_long_emit_remainder_encodeBlockAsm8B 4654 4655two_bytes_emit_remainder_encodeBlockAsm8B: 4656 MOVB $0xf0, (AX) 4657 MOVB DL, 1(AX) 4658 ADDQ $0x02, AX 4659 CMPL DX, $0x40 4660 JL memmove_emit_remainder_encodeBlockAsm8B 4661 JMP memmove_long_emit_remainder_encodeBlockAsm8B 4662 4663one_byte_emit_remainder_encodeBlockAsm8B: 4664 SHLB $0x02, DL 4665 MOVB DL, (AX) 4666 ADDQ $0x01, AX 4667 4668memmove_emit_remainder_encodeBlockAsm8B: 4669 LEAQ (AX)(SI*1), DX 4670 MOVL SI, BX 4671 4672 // genMemMoveShort 4673 CMPQ BX, $0x08 4674 JLE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8 4675 CMPQ BX, $0x10 4676 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16 4677 CMPQ BX, $0x20 4678 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32 4679 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 4680 4681emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8: 4682 MOVQ (CX), SI 4683 MOVQ SI, (AX) 4684 JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B 4685 4686emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: 4687 MOVQ (CX), SI 4688 MOVQ -8(CX)(BX*1), CX 4689 MOVQ SI, (AX) 4690 MOVQ CX, -8(AX)(BX*1) 4691 JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B 4692 4693emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: 4694 MOVOU (CX), X0 4695 MOVOU -16(CX)(BX*1), X1 4696 MOVOU X0, (AX) 4697 MOVOU X1, -16(AX)(BX*1) 4698 JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B 4699 4700emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: 4701 MOVOU (CX), X0 4702 MOVOU 16(CX), X1 4703 MOVOU -32(CX)(BX*1), X2 4704 MOVOU -16(CX)(BX*1), X3 4705 MOVOU X0, (AX) 4706 MOVOU X1, 16(AX) 4707 MOVOU X2, -32(AX)(BX*1) 4708 MOVOU X3, -16(AX)(BX*1) 4709 4710memmove_end_copy_emit_remainder_encodeBlockAsm8B: 4711 MOVQ DX, AX 4712 JMP emit_literal_done_emit_remainder_encodeBlockAsm8B 4713 4714memmove_long_emit_remainder_encodeBlockAsm8B: 4715 LEAQ (AX)(SI*1), DX 4716 MOVL SI, BX 4717 4718 // genMemMoveLong 4719 MOVOU (CX), X0 4720 MOVOU 16(CX), X1 4721 MOVOU -32(CX)(BX*1), X2 4722 MOVOU -16(CX)(BX*1), X3 4723 MOVQ BX, DI 4724 SHRQ $0x05, DI 4725 MOVQ AX, SI 4726 ANDL $0x0000001f, SI 4727 MOVQ $0x00000040, R8 4728 SUBQ SI, R8 4729 DECQ DI 4730 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 4731 LEAQ -32(CX)(R8*1), SI 4732 LEAQ -32(AX)(R8*1), R9 4733 4734emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: 4735 MOVOU (SI), X4 4736 MOVOU 16(SI), X5 4737 MOVOA X4, (R9) 4738 MOVOA X5, 16(R9) 4739 ADDQ $0x20, R9 4740 ADDQ $0x20, SI 4741 ADDQ $0x20, R8 4742 DECQ DI 4743 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back 4744 4745emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: 4746 MOVOU -32(CX)(R8*1), X4 4747 MOVOU -16(CX)(R8*1), X5 4748 MOVOA X4, -32(AX)(R8*1) 4749 MOVOA X5, -16(AX)(R8*1) 4750 ADDQ $0x20, R8 4751 CMPQ BX, R8 4752 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 4753 MOVOU X0, (AX) 4754 MOVOU X1, 16(AX) 4755 MOVOU X2, -32(AX)(BX*1) 4756 MOVOU X3, -16(AX)(BX*1) 4757 MOVQ DX, AX 4758 4759emit_literal_done_emit_remainder_encodeBlockAsm8B: 4760 MOVQ dst_base+0(FP), CX 4761 SUBQ CX, AX 4762 MOVQ AX, ret+48(FP) 4763 RET 4764 4765// func encodeBetterBlockAsm(dst []byte, src []byte) int 4766// Requires: SSE2 4767TEXT ·encodeBetterBlockAsm(SB), $327704-56 4768 MOVQ dst_base+0(FP), AX 4769 MOVQ $0x00000a00, CX 4770 LEAQ 24(SP), DX 4771 PXOR X0, X0 4772 4773zero_loop_encodeBetterBlockAsm: 4774 MOVOU X0, (DX) 4775 MOVOU X0, 16(DX) 4776 MOVOU X0, 32(DX) 4777 MOVOU X0, 48(DX) 4778 MOVOU X0, 64(DX) 4779 MOVOU X0, 80(DX) 4780 MOVOU X0, 96(DX) 4781 MOVOU X0, 112(DX) 4782 ADDQ $0x80, DX 4783 DECQ CX 4784 JNZ zero_loop_encodeBetterBlockAsm 4785 MOVL $0x00000000, 12(SP) 4786 MOVQ src_len+32(FP), CX 4787 LEAQ -6(CX), DX 4788 LEAQ -8(CX), SI 4789 MOVL SI, 8(SP) 4790 SHRQ $0x05, CX 4791 SUBL CX, DX 4792 LEAQ (AX)(DX*1), DX 4793 MOVQ DX, (SP) 4794 MOVL $0x00000001, CX 4795 MOVL $0x00000000, 16(SP) 4796 MOVQ src_base+24(FP), DX 4797 4798search_loop_encodeBetterBlockAsm: 4799 MOVL CX, SI 4800 SUBL 12(SP), SI 4801 SHRL $0x07, SI 4802 CMPL SI, $0x63 4803 JLE check_maxskip_ok_encodeBetterBlockAsm 4804 LEAL 100(CX), SI 4805 JMP check_maxskip_cont_encodeBetterBlockAsm 4806 4807check_maxskip_ok_encodeBetterBlockAsm: 4808 LEAL 1(CX)(SI*1), SI 4809 4810check_maxskip_cont_encodeBetterBlockAsm: 4811 CMPL SI, 8(SP) 4812 JGE emit_remainder_encodeBetterBlockAsm 4813 MOVQ (DX)(CX*1), DI 4814 MOVL SI, 20(SP) 4815 MOVQ $0x00cf1bbcdcbfa563, R9 4816 MOVQ $0x9e3779b1, SI 4817 MOVQ DI, R10 4818 MOVQ DI, R11 4819 SHLQ $0x08, R10 4820 IMULQ R9, R10 4821 SHRQ $0x30, R10 4822 SHLQ $0x20, R11 4823 IMULQ SI, R11 4824 SHRQ $0x32, R11 4825 MOVL 24(SP)(R10*4), SI 4826 MOVL 262168(SP)(R11*4), R8 4827 MOVL CX, 24(SP)(R10*4) 4828 MOVL CX, 262168(SP)(R11*4) 4829 CMPL (DX)(SI*1), DI 4830 JEQ candidate_match_encodeBetterBlockAsm 4831 CMPL (DX)(R8*1), DI 4832 JEQ candidateS_match_encodeBetterBlockAsm 4833 MOVL 20(SP), CX 4834 JMP search_loop_encodeBetterBlockAsm 4835 4836candidateS_match_encodeBetterBlockAsm: 4837 SHRQ $0x08, DI 4838 MOVQ DI, R10 4839 SHLQ $0x08, R10 4840 IMULQ R9, R10 4841 SHRQ $0x30, R10 4842 MOVL 24(SP)(R10*4), SI 4843 INCL CX 4844 MOVL CX, 24(SP)(R10*4) 4845 CMPL (DX)(SI*1), DI 4846 JEQ candidate_match_encodeBetterBlockAsm 4847 DECL CX 4848 MOVL R8, SI 4849 4850candidate_match_encodeBetterBlockAsm: 4851 MOVL 12(SP), DI 4852 TESTL SI, SI 4853 JZ match_extend_back_end_encodeBetterBlockAsm 4854 4855match_extend_back_loop_encodeBetterBlockAsm: 4856 CMPL CX, DI 4857 JLE match_extend_back_end_encodeBetterBlockAsm 4858 MOVB -1(DX)(SI*1), BL 4859 MOVB -1(DX)(CX*1), R8 4860 CMPB BL, R8 4861 JNE match_extend_back_end_encodeBetterBlockAsm 4862 LEAL -1(CX), CX 4863 DECL SI 4864 JZ match_extend_back_end_encodeBetterBlockAsm 4865 JMP match_extend_back_loop_encodeBetterBlockAsm 4866 4867match_extend_back_end_encodeBetterBlockAsm: 4868 MOVL CX, DI 4869 SUBL 12(SP), DI 4870 LEAQ 5(AX)(DI*1), DI 4871 CMPQ DI, (SP) 4872 JL match_dst_size_check_encodeBetterBlockAsm 4873 MOVQ $0x00000000, ret+48(FP) 4874 RET 4875 4876match_dst_size_check_encodeBetterBlockAsm: 4877 MOVL CX, DI 4878 ADDL $0x04, CX 4879 ADDL $0x04, SI 4880 MOVQ src_len+32(FP), R8 4881 SUBL CX, R8 4882 LEAQ (DX)(CX*1), R9 4883 LEAQ (DX)(SI*1), R10 4884 4885 // matchLen 4886 XORL R12, R12 4887 CMPL R8, $0x08 4888 JL matchlen_single_match_nolit_encodeBetterBlockAsm 4889 4890matchlen_loopback_match_nolit_encodeBetterBlockAsm: 4891 MOVQ (R9)(R12*1), R11 4892 XORQ (R10)(R12*1), R11 4893 TESTQ R11, R11 4894 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm 4895 BSFQ R11, R11 4896 SARQ $0x03, R11 4897 LEAL (R12)(R11*1), R12 4898 JMP match_nolit_end_encodeBetterBlockAsm 4899 4900matchlen_loop_match_nolit_encodeBetterBlockAsm: 4901 LEAL -8(R8), R8 4902 LEAL 8(R12), R12 4903 CMPL R8, $0x08 4904 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm 4905 4906matchlen_single_match_nolit_encodeBetterBlockAsm: 4907 TESTL R8, R8 4908 JZ match_nolit_end_encodeBetterBlockAsm 4909 4910matchlen_single_loopback_match_nolit_encodeBetterBlockAsm: 4911 MOVB (R9)(R12*1), R11 4912 CMPB (R10)(R12*1), R11 4913 JNE match_nolit_end_encodeBetterBlockAsm 4914 LEAL 1(R12), R12 4915 DECL R8 4916 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm 4917 4918match_nolit_end_encodeBetterBlockAsm: 4919 MOVL CX, R8 4920 SUBL SI, R8 4921 4922 // Check if repeat 4923 CMPL 16(SP), R8 4924 JEQ match_is_repeat_encodeBetterBlockAsm 4925 CMPL R12, $0x01 4926 JG match_length_ok_encodeBetterBlockAsm 4927 CMPL R8, $0x0000ffff 4928 JLE match_length_ok_encodeBetterBlockAsm 4929 MOVL 20(SP), CX 4930 INCL CX 4931 JMP search_loop_encodeBetterBlockAsm 4932 4933match_length_ok_encodeBetterBlockAsm: 4934 MOVL R8, 16(SP) 4935 MOVL 12(SP), SI 4936 CMPL SI, DI 4937 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm 4938 MOVL DI, R9 4939 MOVL DI, 12(SP) 4940 LEAQ (DX)(SI*1), R10 4941 SUBL SI, R9 4942 LEAL -1(R9), SI 4943 CMPL SI, $0x3c 4944 JLT one_byte_match_emit_encodeBetterBlockAsm 4945 CMPL SI, $0x00000100 4946 JLT two_bytes_match_emit_encodeBetterBlockAsm 4947 CMPL SI, $0x00010000 4948 JLT three_bytes_match_emit_encodeBetterBlockAsm 4949 CMPL SI, $0x01000000 4950 JLT four_bytes_match_emit_encodeBetterBlockAsm 4951 MOVB $0xfc, (AX) 4952 MOVL SI, 1(AX) 4953 ADDQ $0x05, AX 4954 JMP memmove_long_match_emit_encodeBetterBlockAsm 4955 4956four_bytes_match_emit_encodeBetterBlockAsm: 4957 MOVL SI, R11 4958 SHRL $0x10, R11 4959 MOVB $0xf8, (AX) 4960 MOVW SI, 1(AX) 4961 MOVB R11, 3(AX) 4962 ADDQ $0x04, AX 4963 JMP memmove_long_match_emit_encodeBetterBlockAsm 4964 4965three_bytes_match_emit_encodeBetterBlockAsm: 4966 MOVB $0xf4, (AX) 4967 MOVW SI, 1(AX) 4968 ADDQ $0x03, AX 4969 JMP memmove_long_match_emit_encodeBetterBlockAsm 4970 4971two_bytes_match_emit_encodeBetterBlockAsm: 4972 MOVB $0xf0, (AX) 4973 MOVB SI, 1(AX) 4974 ADDQ $0x02, AX 4975 CMPL SI, $0x40 4976 JL memmove_match_emit_encodeBetterBlockAsm 4977 JMP memmove_long_match_emit_encodeBetterBlockAsm 4978 4979one_byte_match_emit_encodeBetterBlockAsm: 4980 SHLB $0x02, SI 4981 MOVB SI, (AX) 4982 ADDQ $0x01, AX 4983 4984memmove_match_emit_encodeBetterBlockAsm: 4985 LEAQ (AX)(R9*1), SI 4986 4987 // genMemMoveShort 4988 CMPQ R9, $0x04 4989 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 4990 CMPQ R9, $0x08 4991 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 4992 CMPQ R9, $0x10 4993 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 4994 CMPQ R9, $0x20 4995 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 4996 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 4997 4998emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4: 4999 MOVL (R10), R11 5000 MOVL R11, (AX) 5001 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm 5002 5003emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: 5004 MOVL (R10), R11 5005 MOVL -4(R10)(R9*1), R10 5006 MOVL R11, (AX) 5007 MOVL R10, -4(AX)(R9*1) 5008 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm 5009 5010emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: 5011 MOVQ (R10), R11 5012 MOVQ -8(R10)(R9*1), R10 5013 MOVQ R11, (AX) 5014 MOVQ R10, -8(AX)(R9*1) 5015 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm 5016 5017emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: 5018 MOVOU (R10), X0 5019 MOVOU -16(R10)(R9*1), X1 5020 MOVOU X0, (AX) 5021 MOVOU X1, -16(AX)(R9*1) 5022 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm 5023 5024emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: 5025 MOVOU (R10), X0 5026 MOVOU 16(R10), X1 5027 MOVOU -32(R10)(R9*1), X2 5028 MOVOU -16(R10)(R9*1), X3 5029 MOVOU X0, (AX) 5030 MOVOU X1, 16(AX) 5031 MOVOU X2, -32(AX)(R9*1) 5032 MOVOU X3, -16(AX)(R9*1) 5033 5034memmove_end_copy_match_emit_encodeBetterBlockAsm: 5035 MOVQ SI, AX 5036 JMP emit_literal_done_match_emit_encodeBetterBlockAsm 5037 5038memmove_long_match_emit_encodeBetterBlockAsm: 5039 LEAQ (AX)(R9*1), SI 5040 5041 // genMemMoveLong 5042 MOVOU (R10), X0 5043 MOVOU 16(R10), X1 5044 MOVOU -32(R10)(R9*1), X2 5045 MOVOU -16(R10)(R9*1), X3 5046 MOVQ R9, R13 5047 SHRQ $0x05, R13 5048 MOVQ AX, R11 5049 ANDL $0x0000001f, R11 5050 MOVQ $0x00000040, R14 5051 SUBQ R11, R14 5052 DECQ R13 5053 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 5054 LEAQ -32(R10)(R14*1), R11 5055 LEAQ -32(AX)(R14*1), R15 5056 5057emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: 5058 MOVOU (R11), X4 5059 MOVOU 16(R11), X5 5060 MOVOA X4, (R15) 5061 MOVOA X5, 16(R15) 5062 ADDQ $0x20, R15 5063 ADDQ $0x20, R11 5064 ADDQ $0x20, R14 5065 DECQ R13 5066 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back 5067 5068emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: 5069 MOVOU -32(R10)(R14*1), X4 5070 MOVOU -16(R10)(R14*1), X5 5071 MOVOA X4, -32(AX)(R14*1) 5072 MOVOA X5, -16(AX)(R14*1) 5073 ADDQ $0x20, R14 5074 CMPQ R9, R14 5075 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 5076 MOVOU X0, (AX) 5077 MOVOU X1, 16(AX) 5078 MOVOU X2, -32(AX)(R9*1) 5079 MOVOU X3, -16(AX)(R9*1) 5080 MOVQ SI, AX 5081 5082emit_literal_done_match_emit_encodeBetterBlockAsm: 5083 ADDL R12, CX 5084 ADDL $0x04, R12 5085 MOVL CX, 12(SP) 5086 5087 // emitCopy 5088 CMPL R8, $0x00010000 5089 JL two_byte_offset_match_nolit_encodeBetterBlockAsm 5090 5091four_bytes_loop_back_match_nolit_encodeBetterBlockAsm: 5092 CMPL R12, $0x40 5093 JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm 5094 MOVB $0xff, (AX) 5095 MOVL R8, 1(AX) 5096 LEAL -64(R12), R12 5097 ADDQ $0x05, AX 5098 CMPL R12, $0x04 5099 JL four_bytes_remain_match_nolit_encodeBetterBlockAsm 5100 5101 // emitRepeat 5102emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: 5103 MOVL R12, SI 5104 LEAL -4(R12), R12 5105 CMPL SI, $0x08 5106 JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy 5107 CMPL SI, $0x0c 5108 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy 5109 CMPL R8, $0x00000800 5110 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy 5111 5112cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: 5113 CMPL R12, $0x00000104 5114 JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy 5115 CMPL R12, $0x00010100 5116 JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy 5117 CMPL R12, $0x0100ffff 5118 JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy 5119 LEAL -16842747(R12), R12 5120 MOVW $0x001d, (AX) 5121 MOVW $0xfffb, 2(AX) 5122 MOVB $0xff, 4(AX) 5123 ADDQ $0x05, AX 5124 JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy 5125 5126repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: 5127 LEAL -65536(R12), R12 5128 MOVL R12, R8 5129 MOVW $0x001d, (AX) 5130 MOVW R12, 2(AX) 5131 SARL $0x10, R8 5132 MOVB R8, 4(AX) 5133 ADDQ $0x05, AX 5134 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5135 5136repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: 5137 LEAL -256(R12), R12 5138 MOVW $0x0019, (AX) 5139 MOVW R12, 2(AX) 5140 ADDQ $0x04, AX 5141 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5142 5143repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: 5144 LEAL -4(R12), R12 5145 MOVW $0x0015, (AX) 5146 MOVB R12, 2(AX) 5147 ADDQ $0x03, AX 5148 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5149 5150repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: 5151 SHLL $0x02, R12 5152 ORL $0x01, R12 5153 MOVW R12, (AX) 5154 ADDQ $0x02, AX 5155 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5156 5157repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: 5158 XORQ SI, SI 5159 LEAL 1(SI)(R12*4), R12 5160 MOVB R8, 1(AX) 5161 SARL $0x08, R8 5162 SHLL $0x05, R8 5163 ORL R8, R12 5164 MOVB R12, (AX) 5165 ADDQ $0x02, AX 5166 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5167 JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm 5168 5169four_bytes_remain_match_nolit_encodeBetterBlockAsm: 5170 TESTL R12, R12 5171 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm 5172 MOVB $0x03, BL 5173 LEAL -4(BX)(R12*4), R12 5174 MOVB R12, (AX) 5175 MOVL R8, 1(AX) 5176 ADDQ $0x05, AX 5177 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5178 5179two_byte_offset_match_nolit_encodeBetterBlockAsm: 5180 CMPL R12, $0x40 5181 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm 5182 MOVB $0xee, (AX) 5183 MOVW R8, 1(AX) 5184 LEAL -60(R12), R12 5185 ADDQ $0x03, AX 5186 5187 // emitRepeat 5188emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5189 MOVL R12, SI 5190 LEAL -4(R12), R12 5191 CMPL SI, $0x08 5192 JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short 5193 CMPL SI, $0x0c 5194 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short 5195 CMPL R8, $0x00000800 5196 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short 5197 5198cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5199 CMPL R12, $0x00000104 5200 JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short 5201 CMPL R12, $0x00010100 5202 JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short 5203 CMPL R12, $0x0100ffff 5204 JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short 5205 LEAL -16842747(R12), R12 5206 MOVW $0x001d, (AX) 5207 MOVW $0xfffb, 2(AX) 5208 MOVB $0xff, 4(AX) 5209 ADDQ $0x05, AX 5210 JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short 5211 5212repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5213 LEAL -65536(R12), R12 5214 MOVL R12, R8 5215 MOVW $0x001d, (AX) 5216 MOVW R12, 2(AX) 5217 SARL $0x10, R8 5218 MOVB R8, 4(AX) 5219 ADDQ $0x05, AX 5220 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5221 5222repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5223 LEAL -256(R12), R12 5224 MOVW $0x0019, (AX) 5225 MOVW R12, 2(AX) 5226 ADDQ $0x04, AX 5227 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5228 5229repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5230 LEAL -4(R12), R12 5231 MOVW $0x0015, (AX) 5232 MOVB R12, 2(AX) 5233 ADDQ $0x03, AX 5234 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5235 5236repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5237 SHLL $0x02, R12 5238 ORL $0x01, R12 5239 MOVW R12, (AX) 5240 ADDQ $0x02, AX 5241 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5242 5243repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5244 XORQ SI, SI 5245 LEAL 1(SI)(R12*4), R12 5246 MOVB R8, 1(AX) 5247 SARL $0x08, R8 5248 SHLL $0x05, R8 5249 ORL R8, R12 5250 MOVB R12, (AX) 5251 ADDQ $0x02, AX 5252 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5253 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm 5254 5255two_byte_offset_short_match_nolit_encodeBetterBlockAsm: 5256 CMPL R12, $0x0c 5257 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm 5258 CMPL R8, $0x00000800 5259 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm 5260 MOVB $0x01, BL 5261 LEAL -16(BX)(R12*4), R12 5262 MOVB R8, 1(AX) 5263 SHRL $0x08, R8 5264 SHLL $0x05, R8 5265 ORL R8, R12 5266 MOVB R12, (AX) 5267 ADDQ $0x02, AX 5268 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5269 5270emit_copy_three_match_nolit_encodeBetterBlockAsm: 5271 MOVB $0x02, BL 5272 LEAL -4(BX)(R12*4), R12 5273 MOVB R12, (AX) 5274 MOVW R8, 1(AX) 5275 ADDQ $0x03, AX 5276 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5277 5278match_is_repeat_encodeBetterBlockAsm: 5279 MOVL 12(SP), SI 5280 CMPL SI, DI 5281 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm 5282 MOVL DI, R9 5283 MOVL DI, 12(SP) 5284 LEAQ (DX)(SI*1), R10 5285 SUBL SI, R9 5286 LEAL -1(R9), SI 5287 CMPL SI, $0x3c 5288 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm 5289 CMPL SI, $0x00000100 5290 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm 5291 CMPL SI, $0x00010000 5292 JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm 5293 CMPL SI, $0x01000000 5294 JLT four_bytes_match_emit_repeat_encodeBetterBlockAsm 5295 MOVB $0xfc, (AX) 5296 MOVL SI, 1(AX) 5297 ADDQ $0x05, AX 5298 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm 5299 5300four_bytes_match_emit_repeat_encodeBetterBlockAsm: 5301 MOVL SI, R11 5302 SHRL $0x10, R11 5303 MOVB $0xf8, (AX) 5304 MOVW SI, 1(AX) 5305 MOVB R11, 3(AX) 5306 ADDQ $0x04, AX 5307 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm 5308 5309three_bytes_match_emit_repeat_encodeBetterBlockAsm: 5310 MOVB $0xf4, (AX) 5311 MOVW SI, 1(AX) 5312 ADDQ $0x03, AX 5313 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm 5314 5315two_bytes_match_emit_repeat_encodeBetterBlockAsm: 5316 MOVB $0xf0, (AX) 5317 MOVB SI, 1(AX) 5318 ADDQ $0x02, AX 5319 CMPL SI, $0x40 5320 JL memmove_match_emit_repeat_encodeBetterBlockAsm 5321 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm 5322 5323one_byte_match_emit_repeat_encodeBetterBlockAsm: 5324 SHLB $0x02, SI 5325 MOVB SI, (AX) 5326 ADDQ $0x01, AX 5327 5328memmove_match_emit_repeat_encodeBetterBlockAsm: 5329 LEAQ (AX)(R9*1), SI 5330 5331 // genMemMoveShort 5332 CMPQ R9, $0x04 5333 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 5334 CMPQ R9, $0x08 5335 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 5336 CMPQ R9, $0x10 5337 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 5338 CMPQ R9, $0x20 5339 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 5340 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 5341 5342emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4: 5343 MOVL (R10), R11 5344 MOVL R11, (AX) 5345 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm 5346 5347emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: 5348 MOVL (R10), R11 5349 MOVL -4(R10)(R9*1), R10 5350 MOVL R11, (AX) 5351 MOVL R10, -4(AX)(R9*1) 5352 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm 5353 5354emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: 5355 MOVQ (R10), R11 5356 MOVQ -8(R10)(R9*1), R10 5357 MOVQ R11, (AX) 5358 MOVQ R10, -8(AX)(R9*1) 5359 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm 5360 5361emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: 5362 MOVOU (R10), X0 5363 MOVOU -16(R10)(R9*1), X1 5364 MOVOU X0, (AX) 5365 MOVOU X1, -16(AX)(R9*1) 5366 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm 5367 5368emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: 5369 MOVOU (R10), X0 5370 MOVOU 16(R10), X1 5371 MOVOU -32(R10)(R9*1), X2 5372 MOVOU -16(R10)(R9*1), X3 5373 MOVOU X0, (AX) 5374 MOVOU X1, 16(AX) 5375 MOVOU X2, -32(AX)(R9*1) 5376 MOVOU X3, -16(AX)(R9*1) 5377 5378memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: 5379 MOVQ SI, AX 5380 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm 5381 5382memmove_long_match_emit_repeat_encodeBetterBlockAsm: 5383 LEAQ (AX)(R9*1), SI 5384 5385 // genMemMoveLong 5386 MOVOU (R10), X0 5387 MOVOU 16(R10), X1 5388 MOVOU -32(R10)(R9*1), X2 5389 MOVOU -16(R10)(R9*1), X3 5390 MOVQ R9, R13 5391 SHRQ $0x05, R13 5392 MOVQ AX, R11 5393 ANDL $0x0000001f, R11 5394 MOVQ $0x00000040, R14 5395 SUBQ R11, R14 5396 DECQ R13 5397 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 5398 LEAQ -32(R10)(R14*1), R11 5399 LEAQ -32(AX)(R14*1), R15 5400 5401emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: 5402 MOVOU (R11), X4 5403 MOVOU 16(R11), X5 5404 MOVOA X4, (R15) 5405 MOVOA X5, 16(R15) 5406 ADDQ $0x20, R15 5407 ADDQ $0x20, R11 5408 ADDQ $0x20, R14 5409 DECQ R13 5410 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back 5411 5412emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: 5413 MOVOU -32(R10)(R14*1), X4 5414 MOVOU -16(R10)(R14*1), X5 5415 MOVOA X4, -32(AX)(R14*1) 5416 MOVOA X5, -16(AX)(R14*1) 5417 ADDQ $0x20, R14 5418 CMPQ R9, R14 5419 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 5420 MOVOU X0, (AX) 5421 MOVOU X1, 16(AX) 5422 MOVOU X2, -32(AX)(R9*1) 5423 MOVOU X3, -16(AX)(R9*1) 5424 MOVQ SI, AX 5425 5426emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: 5427 ADDL R12, CX 5428 ADDL $0x04, R12 5429 MOVL CX, 12(SP) 5430 5431 // emitRepeat 5432emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: 5433 MOVL R12, SI 5434 LEAL -4(R12), R12 5435 CMPL SI, $0x08 5436 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm 5437 CMPL SI, $0x0c 5438 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm 5439 CMPL R8, $0x00000800 5440 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm 5441 5442cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: 5443 CMPL R12, $0x00000104 5444 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm 5445 CMPL R12, $0x00010100 5446 JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm 5447 CMPL R12, $0x0100ffff 5448 JLT repeat_five_match_nolit_repeat_encodeBetterBlockAsm 5449 LEAL -16842747(R12), R12 5450 MOVW $0x001d, (AX) 5451 MOVW $0xfffb, 2(AX) 5452 MOVB $0xff, 4(AX) 5453 ADDQ $0x05, AX 5454 JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm 5455 5456repeat_five_match_nolit_repeat_encodeBetterBlockAsm: 5457 LEAL -65536(R12), R12 5458 MOVL R12, R8 5459 MOVW $0x001d, (AX) 5460 MOVW R12, 2(AX) 5461 SARL $0x10, R8 5462 MOVB R8, 4(AX) 5463 ADDQ $0x05, AX 5464 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5465 5466repeat_four_match_nolit_repeat_encodeBetterBlockAsm: 5467 LEAL -256(R12), R12 5468 MOVW $0x0019, (AX) 5469 MOVW R12, 2(AX) 5470 ADDQ $0x04, AX 5471 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5472 5473repeat_three_match_nolit_repeat_encodeBetterBlockAsm: 5474 LEAL -4(R12), R12 5475 MOVW $0x0015, (AX) 5476 MOVB R12, 2(AX) 5477 ADDQ $0x03, AX 5478 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5479 5480repeat_two_match_nolit_repeat_encodeBetterBlockAsm: 5481 SHLL $0x02, R12 5482 ORL $0x01, R12 5483 MOVW R12, (AX) 5484 ADDQ $0x02, AX 5485 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5486 5487repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: 5488 XORQ SI, SI 5489 LEAL 1(SI)(R12*4), R12 5490 MOVB R8, 1(AX) 5491 SARL $0x08, R8 5492 SHLL $0x05, R8 5493 ORL R8, R12 5494 MOVB R12, (AX) 5495 ADDQ $0x02, AX 5496 5497match_nolit_emitcopy_end_encodeBetterBlockAsm: 5498 CMPL CX, 8(SP) 5499 JGE emit_remainder_encodeBetterBlockAsm 5500 CMPQ AX, (SP) 5501 JL match_nolit_dst_ok_encodeBetterBlockAsm 5502 MOVQ $0x00000000, ret+48(FP) 5503 RET 5504 5505match_nolit_dst_ok_encodeBetterBlockAsm: 5506 MOVQ $0x00cf1bbcdcbfa563, SI 5507 MOVQ $0x9e3779b1, R8 5508 INCL DI 5509 MOVQ (DX)(DI*1), R9 5510 MOVQ R9, R10 5511 MOVQ R9, R11 5512 MOVQ R9, R12 5513 SHRQ $0x08, R11 5514 MOVQ R11, R13 5515 SHRQ $0x10, R12 5516 LEAL 1(DI), R14 5517 LEAL 2(DI), R15 5518 MOVQ -2(DX)(CX*1), R9 5519 SHLQ $0x08, R10 5520 IMULQ SI, R10 5521 SHRQ $0x30, R10 5522 SHLQ $0x08, R13 5523 IMULQ SI, R13 5524 SHRQ $0x30, R13 5525 SHLQ $0x20, R11 5526 IMULQ R8, R11 5527 SHRQ $0x32, R11 5528 SHLQ $0x20, R12 5529 IMULQ R8, R12 5530 SHRQ $0x32, R12 5531 MOVL DI, 24(SP)(R10*4) 5532 MOVL R14, 24(SP)(R13*4) 5533 MOVL R14, 262168(SP)(R11*4) 5534 MOVL R15, 262168(SP)(R12*4) 5535 MOVQ R9, R10 5536 MOVQ R9, R11 5537 SHRQ $0x08, R11 5538 MOVQ R11, R13 5539 LEAL -2(CX), R9 5540 LEAL -1(CX), DI 5541 SHLQ $0x08, R10 5542 IMULQ SI, R10 5543 SHRQ $0x30, R10 5544 SHLQ $0x20, R11 5545 IMULQ R8, R11 5546 SHRQ $0x32, R11 5547 SHLQ $0x08, R13 5548 IMULQ SI, R13 5549 SHRQ $0x30, R13 5550 MOVL R9, 24(SP)(R10*4) 5551 MOVL DI, 262168(SP)(R11*4) 5552 MOVL DI, 24(SP)(R13*4) 5553 JMP search_loop_encodeBetterBlockAsm 5554 5555emit_remainder_encodeBetterBlockAsm: 5556 MOVQ src_len+32(FP), CX 5557 SUBL 12(SP), CX 5558 LEAQ 5(AX)(CX*1), CX 5559 CMPQ CX, (SP) 5560 JL emit_remainder_ok_encodeBetterBlockAsm 5561 MOVQ $0x00000000, ret+48(FP) 5562 RET 5563 5564emit_remainder_ok_encodeBetterBlockAsm: 5565 MOVQ src_len+32(FP), CX 5566 MOVL 12(SP), BX 5567 CMPL BX, CX 5568 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm 5569 MOVL CX, SI 5570 MOVL CX, 12(SP) 5571 LEAQ (DX)(BX*1), CX 5572 SUBL BX, SI 5573 LEAL -1(SI), DX 5574 CMPL DX, $0x3c 5575 JLT one_byte_emit_remainder_encodeBetterBlockAsm 5576 CMPL DX, $0x00000100 5577 JLT two_bytes_emit_remainder_encodeBetterBlockAsm 5578 CMPL DX, $0x00010000 5579 JLT three_bytes_emit_remainder_encodeBetterBlockAsm 5580 CMPL DX, $0x01000000 5581 JLT four_bytes_emit_remainder_encodeBetterBlockAsm 5582 MOVB $0xfc, (AX) 5583 MOVL DX, 1(AX) 5584 ADDQ $0x05, AX 5585 JMP memmove_long_emit_remainder_encodeBetterBlockAsm 5586 5587four_bytes_emit_remainder_encodeBetterBlockAsm: 5588 MOVL DX, BX 5589 SHRL $0x10, BX 5590 MOVB $0xf8, (AX) 5591 MOVW DX, 1(AX) 5592 MOVB BL, 3(AX) 5593 ADDQ $0x04, AX 5594 JMP memmove_long_emit_remainder_encodeBetterBlockAsm 5595 5596three_bytes_emit_remainder_encodeBetterBlockAsm: 5597 MOVB $0xf4, (AX) 5598 MOVW DX, 1(AX) 5599 ADDQ $0x03, AX 5600 JMP memmove_long_emit_remainder_encodeBetterBlockAsm 5601 5602two_bytes_emit_remainder_encodeBetterBlockAsm: 5603 MOVB $0xf0, (AX) 5604 MOVB DL, 1(AX) 5605 ADDQ $0x02, AX 5606 CMPL DX, $0x40 5607 JL memmove_emit_remainder_encodeBetterBlockAsm 5608 JMP memmove_long_emit_remainder_encodeBetterBlockAsm 5609 5610one_byte_emit_remainder_encodeBetterBlockAsm: 5611 SHLB $0x02, DL 5612 MOVB DL, (AX) 5613 ADDQ $0x01, AX 5614 5615memmove_emit_remainder_encodeBetterBlockAsm: 5616 LEAQ (AX)(SI*1), DX 5617 MOVL SI, BX 5618 5619 // genMemMoveShort 5620 CMPQ BX, $0x04 5621 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4 5622 CMPQ BX, $0x08 5623 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7 5624 CMPQ BX, $0x10 5625 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16 5626 CMPQ BX, $0x20 5627 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32 5628 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 5629 5630emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4: 5631 MOVL (CX), SI 5632 MOVL SI, (AX) 5633 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm 5634 5635emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: 5636 MOVL (CX), SI 5637 MOVL -4(CX)(BX*1), CX 5638 MOVL SI, (AX) 5639 MOVL CX, -4(AX)(BX*1) 5640 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm 5641 5642emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: 5643 MOVQ (CX), SI 5644 MOVQ -8(CX)(BX*1), CX 5645 MOVQ SI, (AX) 5646 MOVQ CX, -8(AX)(BX*1) 5647 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm 5648 5649emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: 5650 MOVOU (CX), X0 5651 MOVOU -16(CX)(BX*1), X1 5652 MOVOU X0, (AX) 5653 MOVOU X1, -16(AX)(BX*1) 5654 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm 5655 5656emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: 5657 MOVOU (CX), X0 5658 MOVOU 16(CX), X1 5659 MOVOU -32(CX)(BX*1), X2 5660 MOVOU -16(CX)(BX*1), X3 5661 MOVOU X0, (AX) 5662 MOVOU X1, 16(AX) 5663 MOVOU X2, -32(AX)(BX*1) 5664 MOVOU X3, -16(AX)(BX*1) 5665 5666memmove_end_copy_emit_remainder_encodeBetterBlockAsm: 5667 MOVQ DX, AX 5668 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm 5669 5670memmove_long_emit_remainder_encodeBetterBlockAsm: 5671 LEAQ (AX)(SI*1), DX 5672 MOVL SI, BX 5673 5674 // genMemMoveLong 5675 MOVOU (CX), X0 5676 MOVOU 16(CX), X1 5677 MOVOU -32(CX)(BX*1), X2 5678 MOVOU -16(CX)(BX*1), X3 5679 MOVQ BX, DI 5680 SHRQ $0x05, DI 5681 MOVQ AX, SI 5682 ANDL $0x0000001f, SI 5683 MOVQ $0x00000040, R8 5684 SUBQ SI, R8 5685 DECQ DI 5686 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 5687 LEAQ -32(CX)(R8*1), SI 5688 LEAQ -32(AX)(R8*1), R9 5689 5690emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: 5691 MOVOU (SI), X4 5692 MOVOU 16(SI), X5 5693 MOVOA X4, (R9) 5694 MOVOA X5, 16(R9) 5695 ADDQ $0x20, R9 5696 ADDQ $0x20, SI 5697 ADDQ $0x20, R8 5698 DECQ DI 5699 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back 5700 5701emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: 5702 MOVOU -32(CX)(R8*1), X4 5703 MOVOU -16(CX)(R8*1), X5 5704 MOVOA X4, -32(AX)(R8*1) 5705 MOVOA X5, -16(AX)(R8*1) 5706 ADDQ $0x20, R8 5707 CMPQ BX, R8 5708 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 5709 MOVOU X0, (AX) 5710 MOVOU X1, 16(AX) 5711 MOVOU X2, -32(AX)(BX*1) 5712 MOVOU X3, -16(AX)(BX*1) 5713 MOVQ DX, AX 5714 5715emit_literal_done_emit_remainder_encodeBetterBlockAsm: 5716 MOVQ dst_base+0(FP), CX 5717 SUBQ CX, AX 5718 MOVQ AX, ret+48(FP) 5719 RET 5720 5721// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int 5722// Requires: SSE2 5723TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56 5724 MOVQ dst_base+0(FP), AX 5725 MOVQ $0x00000a00, CX 5726 LEAQ 24(SP), DX 5727 PXOR X0, X0 5728 5729zero_loop_encodeBetterBlockAsm4MB: 5730 MOVOU X0, (DX) 5731 MOVOU X0, 16(DX) 5732 MOVOU X0, 32(DX) 5733 MOVOU X0, 48(DX) 5734 MOVOU X0, 64(DX) 5735 MOVOU X0, 80(DX) 5736 MOVOU X0, 96(DX) 5737 MOVOU X0, 112(DX) 5738 ADDQ $0x80, DX 5739 DECQ CX 5740 JNZ zero_loop_encodeBetterBlockAsm4MB 5741 MOVL $0x00000000, 12(SP) 5742 MOVQ src_len+32(FP), CX 5743 LEAQ -6(CX), DX 5744 LEAQ -8(CX), SI 5745 MOVL SI, 8(SP) 5746 SHRQ $0x05, CX 5747 SUBL CX, DX 5748 LEAQ (AX)(DX*1), DX 5749 MOVQ DX, (SP) 5750 MOVL $0x00000001, CX 5751 MOVL $0x00000000, 16(SP) 5752 MOVQ src_base+24(FP), DX 5753 5754search_loop_encodeBetterBlockAsm4MB: 5755 MOVL CX, SI 5756 SUBL 12(SP), SI 5757 SHRL $0x07, SI 5758 CMPL SI, $0x63 5759 JLE check_maxskip_ok_encodeBetterBlockAsm4MB 5760 LEAL 100(CX), SI 5761 JMP check_maxskip_cont_encodeBetterBlockAsm4MB 5762 5763check_maxskip_ok_encodeBetterBlockAsm4MB: 5764 LEAL 1(CX)(SI*1), SI 5765 5766check_maxskip_cont_encodeBetterBlockAsm4MB: 5767 CMPL SI, 8(SP) 5768 JGE emit_remainder_encodeBetterBlockAsm4MB 5769 MOVQ (DX)(CX*1), DI 5770 MOVL SI, 20(SP) 5771 MOVQ $0x00cf1bbcdcbfa563, R9 5772 MOVQ $0x9e3779b1, SI 5773 MOVQ DI, R10 5774 MOVQ DI, R11 5775 SHLQ $0x08, R10 5776 IMULQ R9, R10 5777 SHRQ $0x30, R10 5778 SHLQ $0x20, R11 5779 IMULQ SI, R11 5780 SHRQ $0x32, R11 5781 MOVL 24(SP)(R10*4), SI 5782 MOVL 262168(SP)(R11*4), R8 5783 MOVL CX, 24(SP)(R10*4) 5784 MOVL CX, 262168(SP)(R11*4) 5785 CMPL (DX)(SI*1), DI 5786 JEQ candidate_match_encodeBetterBlockAsm4MB 5787 CMPL (DX)(R8*1), DI 5788 JEQ candidateS_match_encodeBetterBlockAsm4MB 5789 MOVL 20(SP), CX 5790 JMP search_loop_encodeBetterBlockAsm4MB 5791 5792candidateS_match_encodeBetterBlockAsm4MB: 5793 SHRQ $0x08, DI 5794 MOVQ DI, R10 5795 SHLQ $0x08, R10 5796 IMULQ R9, R10 5797 SHRQ $0x30, R10 5798 MOVL 24(SP)(R10*4), SI 5799 INCL CX 5800 MOVL CX, 24(SP)(R10*4) 5801 CMPL (DX)(SI*1), DI 5802 JEQ candidate_match_encodeBetterBlockAsm4MB 5803 DECL CX 5804 MOVL R8, SI 5805 5806candidate_match_encodeBetterBlockAsm4MB: 5807 MOVL 12(SP), DI 5808 TESTL SI, SI 5809 JZ match_extend_back_end_encodeBetterBlockAsm4MB 5810 5811match_extend_back_loop_encodeBetterBlockAsm4MB: 5812 CMPL CX, DI 5813 JLE match_extend_back_end_encodeBetterBlockAsm4MB 5814 MOVB -1(DX)(SI*1), BL 5815 MOVB -1(DX)(CX*1), R8 5816 CMPB BL, R8 5817 JNE match_extend_back_end_encodeBetterBlockAsm4MB 5818 LEAL -1(CX), CX 5819 DECL SI 5820 JZ match_extend_back_end_encodeBetterBlockAsm4MB 5821 JMP match_extend_back_loop_encodeBetterBlockAsm4MB 5822 5823match_extend_back_end_encodeBetterBlockAsm4MB: 5824 MOVL CX, DI 5825 SUBL 12(SP), DI 5826 LEAQ 4(AX)(DI*1), DI 5827 CMPQ DI, (SP) 5828 JL match_dst_size_check_encodeBetterBlockAsm4MB 5829 MOVQ $0x00000000, ret+48(FP) 5830 RET 5831 5832match_dst_size_check_encodeBetterBlockAsm4MB: 5833 MOVL CX, DI 5834 ADDL $0x04, CX 5835 ADDL $0x04, SI 5836 MOVQ src_len+32(FP), R8 5837 SUBL CX, R8 5838 LEAQ (DX)(CX*1), R9 5839 LEAQ (DX)(SI*1), R10 5840 5841 // matchLen 5842 XORL R12, R12 5843 CMPL R8, $0x08 5844 JL matchlen_single_match_nolit_encodeBetterBlockAsm4MB 5845 5846matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: 5847 MOVQ (R9)(R12*1), R11 5848 XORQ (R10)(R12*1), R11 5849 TESTQ R11, R11 5850 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB 5851 BSFQ R11, R11 5852 SARQ $0x03, R11 5853 LEAL (R12)(R11*1), R12 5854 JMP match_nolit_end_encodeBetterBlockAsm4MB 5855 5856matchlen_loop_match_nolit_encodeBetterBlockAsm4MB: 5857 LEAL -8(R8), R8 5858 LEAL 8(R12), R12 5859 CMPL R8, $0x08 5860 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB 5861 5862matchlen_single_match_nolit_encodeBetterBlockAsm4MB: 5863 TESTL R8, R8 5864 JZ match_nolit_end_encodeBetterBlockAsm4MB 5865 5866matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB: 5867 MOVB (R9)(R12*1), R11 5868 CMPB (R10)(R12*1), R11 5869 JNE match_nolit_end_encodeBetterBlockAsm4MB 5870 LEAL 1(R12), R12 5871 DECL R8 5872 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB 5873 5874match_nolit_end_encodeBetterBlockAsm4MB: 5875 MOVL CX, R8 5876 SUBL SI, R8 5877 5878 // Check if repeat 5879 CMPL 16(SP), R8 5880 JEQ match_is_repeat_encodeBetterBlockAsm4MB 5881 CMPL R12, $0x01 5882 JG match_length_ok_encodeBetterBlockAsm4MB 5883 CMPL R8, $0x0000ffff 5884 JLE match_length_ok_encodeBetterBlockAsm4MB 5885 MOVL 20(SP), CX 5886 INCL CX 5887 JMP search_loop_encodeBetterBlockAsm4MB 5888 5889match_length_ok_encodeBetterBlockAsm4MB: 5890 MOVL R8, 16(SP) 5891 MOVL 12(SP), SI 5892 CMPL SI, DI 5893 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB 5894 MOVL DI, R9 5895 MOVL DI, 12(SP) 5896 LEAQ (DX)(SI*1), R10 5897 SUBL SI, R9 5898 LEAL -1(R9), SI 5899 CMPL SI, $0x3c 5900 JLT one_byte_match_emit_encodeBetterBlockAsm4MB 5901 CMPL SI, $0x00000100 5902 JLT two_bytes_match_emit_encodeBetterBlockAsm4MB 5903 CMPL SI, $0x00010000 5904 JLT three_bytes_match_emit_encodeBetterBlockAsm4MB 5905 MOVL SI, R11 5906 SHRL $0x10, R11 5907 MOVB $0xf8, (AX) 5908 MOVW SI, 1(AX) 5909 MOVB R11, 3(AX) 5910 ADDQ $0x04, AX 5911 JMP memmove_long_match_emit_encodeBetterBlockAsm4MB 5912 5913three_bytes_match_emit_encodeBetterBlockAsm4MB: 5914 MOVB $0xf4, (AX) 5915 MOVW SI, 1(AX) 5916 ADDQ $0x03, AX 5917 JMP memmove_long_match_emit_encodeBetterBlockAsm4MB 5918 5919two_bytes_match_emit_encodeBetterBlockAsm4MB: 5920 MOVB $0xf0, (AX) 5921 MOVB SI, 1(AX) 5922 ADDQ $0x02, AX 5923 CMPL SI, $0x40 5924 JL memmove_match_emit_encodeBetterBlockAsm4MB 5925 JMP memmove_long_match_emit_encodeBetterBlockAsm4MB 5926 5927one_byte_match_emit_encodeBetterBlockAsm4MB: 5928 SHLB $0x02, SI 5929 MOVB SI, (AX) 5930 ADDQ $0x01, AX 5931 5932memmove_match_emit_encodeBetterBlockAsm4MB: 5933 LEAQ (AX)(R9*1), SI 5934 5935 // genMemMoveShort 5936 CMPQ R9, $0x04 5937 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 5938 CMPQ R9, $0x08 5939 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 5940 CMPQ R9, $0x10 5941 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 5942 CMPQ R9, $0x20 5943 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 5944 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 5945 5946emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4: 5947 MOVL (R10), R11 5948 MOVL R11, (AX) 5949 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB 5950 5951emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: 5952 MOVL (R10), R11 5953 MOVL -4(R10)(R9*1), R10 5954 MOVL R11, (AX) 5955 MOVL R10, -4(AX)(R9*1) 5956 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB 5957 5958emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: 5959 MOVQ (R10), R11 5960 MOVQ -8(R10)(R9*1), R10 5961 MOVQ R11, (AX) 5962 MOVQ R10, -8(AX)(R9*1) 5963 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB 5964 5965emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: 5966 MOVOU (R10), X0 5967 MOVOU -16(R10)(R9*1), X1 5968 MOVOU X0, (AX) 5969 MOVOU X1, -16(AX)(R9*1) 5970 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB 5971 5972emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: 5973 MOVOU (R10), X0 5974 MOVOU 16(R10), X1 5975 MOVOU -32(R10)(R9*1), X2 5976 MOVOU -16(R10)(R9*1), X3 5977 MOVOU X0, (AX) 5978 MOVOU X1, 16(AX) 5979 MOVOU X2, -32(AX)(R9*1) 5980 MOVOU X3, -16(AX)(R9*1) 5981 5982memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: 5983 MOVQ SI, AX 5984 JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB 5985 5986memmove_long_match_emit_encodeBetterBlockAsm4MB: 5987 LEAQ (AX)(R9*1), SI 5988 5989 // genMemMoveLong 5990 MOVOU (R10), X0 5991 MOVOU 16(R10), X1 5992 MOVOU -32(R10)(R9*1), X2 5993 MOVOU -16(R10)(R9*1), X3 5994 MOVQ R9, R13 5995 SHRQ $0x05, R13 5996 MOVQ AX, R11 5997 ANDL $0x0000001f, R11 5998 MOVQ $0x00000040, R14 5999 SUBQ R11, R14 6000 DECQ R13 6001 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6002 LEAQ -32(R10)(R14*1), R11 6003 LEAQ -32(AX)(R14*1), R15 6004 6005emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: 6006 MOVOU (R11), X4 6007 MOVOU 16(R11), X5 6008 MOVOA X4, (R15) 6009 MOVOA X5, 16(R15) 6010 ADDQ $0x20, R15 6011 ADDQ $0x20, R11 6012 ADDQ $0x20, R14 6013 DECQ R13 6014 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back 6015 6016emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: 6017 MOVOU -32(R10)(R14*1), X4 6018 MOVOU -16(R10)(R14*1), X5 6019 MOVOA X4, -32(AX)(R14*1) 6020 MOVOA X5, -16(AX)(R14*1) 6021 ADDQ $0x20, R14 6022 CMPQ R9, R14 6023 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6024 MOVOU X0, (AX) 6025 MOVOU X1, 16(AX) 6026 MOVOU X2, -32(AX)(R9*1) 6027 MOVOU X3, -16(AX)(R9*1) 6028 MOVQ SI, AX 6029 6030emit_literal_done_match_emit_encodeBetterBlockAsm4MB: 6031 ADDL R12, CX 6032 ADDL $0x04, R12 6033 MOVL CX, 12(SP) 6034 6035 // emitCopy 6036 CMPL R8, $0x00010000 6037 JL two_byte_offset_match_nolit_encodeBetterBlockAsm4MB 6038 6039four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB: 6040 CMPL R12, $0x40 6041 JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB 6042 MOVB $0xff, (AX) 6043 MOVL R8, 1(AX) 6044 LEAL -64(R12), R12 6045 ADDQ $0x05, AX 6046 CMPL R12, $0x04 6047 JL four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB 6048 6049 // emitRepeat 6050 MOVL R12, SI 6051 LEAL -4(R12), R12 6052 CMPL SI, $0x08 6053 JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6054 CMPL SI, $0x0c 6055 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6056 CMPL R8, $0x00000800 6057 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6058 6059cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6060 CMPL R12, $0x00000104 6061 JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6062 CMPL R12, $0x00010100 6063 JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6064 LEAL -65536(R12), R12 6065 MOVL R12, R8 6066 MOVW $0x001d, (AX) 6067 MOVW R12, 2(AX) 6068 SARL $0x10, R8 6069 MOVB R8, 4(AX) 6070 ADDQ $0x05, AX 6071 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6072 6073repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6074 LEAL -256(R12), R12 6075 MOVW $0x0019, (AX) 6076 MOVW R12, 2(AX) 6077 ADDQ $0x04, AX 6078 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6079 6080repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6081 LEAL -4(R12), R12 6082 MOVW $0x0015, (AX) 6083 MOVB R12, 2(AX) 6084 ADDQ $0x03, AX 6085 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6086 6087repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6088 SHLL $0x02, R12 6089 ORL $0x01, R12 6090 MOVW R12, (AX) 6091 ADDQ $0x02, AX 6092 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6093 6094repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6095 XORQ SI, SI 6096 LEAL 1(SI)(R12*4), R12 6097 MOVB R8, 1(AX) 6098 SARL $0x08, R8 6099 SHLL $0x05, R8 6100 ORL R8, R12 6101 MOVB R12, (AX) 6102 ADDQ $0x02, AX 6103 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6104 JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB 6105 6106four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: 6107 TESTL R12, R12 6108 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6109 MOVB $0x03, BL 6110 LEAL -4(BX)(R12*4), R12 6111 MOVB R12, (AX) 6112 MOVL R8, 1(AX) 6113 ADDQ $0x05, AX 6114 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6115 6116two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: 6117 CMPL R12, $0x40 6118 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB 6119 MOVB $0xee, (AX) 6120 MOVW R8, 1(AX) 6121 LEAL -60(R12), R12 6122 ADDQ $0x03, AX 6123 6124 // emitRepeat 6125 MOVL R12, SI 6126 LEAL -4(R12), R12 6127 CMPL SI, $0x08 6128 JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6129 CMPL SI, $0x0c 6130 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6131 CMPL R8, $0x00000800 6132 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6133 6134cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6135 CMPL R12, $0x00000104 6136 JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6137 CMPL R12, $0x00010100 6138 JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6139 LEAL -65536(R12), R12 6140 MOVL R12, R8 6141 MOVW $0x001d, (AX) 6142 MOVW R12, 2(AX) 6143 SARL $0x10, R8 6144 MOVB R8, 4(AX) 6145 ADDQ $0x05, AX 6146 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6147 6148repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6149 LEAL -256(R12), R12 6150 MOVW $0x0019, (AX) 6151 MOVW R12, 2(AX) 6152 ADDQ $0x04, AX 6153 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6154 6155repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6156 LEAL -4(R12), R12 6157 MOVW $0x0015, (AX) 6158 MOVB R12, 2(AX) 6159 ADDQ $0x03, AX 6160 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6161 6162repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6163 SHLL $0x02, R12 6164 ORL $0x01, R12 6165 MOVW R12, (AX) 6166 ADDQ $0x02, AX 6167 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6168 6169repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6170 XORQ SI, SI 6171 LEAL 1(SI)(R12*4), R12 6172 MOVB R8, 1(AX) 6173 SARL $0x08, R8 6174 SHLL $0x05, R8 6175 ORL R8, R12 6176 MOVB R12, (AX) 6177 ADDQ $0x02, AX 6178 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6179 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm4MB 6180 6181two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: 6182 CMPL R12, $0x0c 6183 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB 6184 CMPL R8, $0x00000800 6185 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB 6186 MOVB $0x01, BL 6187 LEAL -16(BX)(R12*4), R12 6188 MOVB R8, 1(AX) 6189 SHRL $0x08, R8 6190 SHLL $0x05, R8 6191 ORL R8, R12 6192 MOVB R12, (AX) 6193 ADDQ $0x02, AX 6194 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6195 6196emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: 6197 MOVB $0x02, BL 6198 LEAL -4(BX)(R12*4), R12 6199 MOVB R12, (AX) 6200 MOVW R8, 1(AX) 6201 ADDQ $0x03, AX 6202 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6203 6204match_is_repeat_encodeBetterBlockAsm4MB: 6205 MOVL 12(SP), SI 6206 CMPL SI, DI 6207 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB 6208 MOVL DI, R9 6209 MOVL DI, 12(SP) 6210 LEAQ (DX)(SI*1), R10 6211 SUBL SI, R9 6212 LEAL -1(R9), SI 6213 CMPL SI, $0x3c 6214 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm4MB 6215 CMPL SI, $0x00000100 6216 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB 6217 CMPL SI, $0x00010000 6218 JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB 6219 MOVL SI, R11 6220 SHRL $0x10, R11 6221 MOVB $0xf8, (AX) 6222 MOVW SI, 1(AX) 6223 MOVB R11, 3(AX) 6224 ADDQ $0x04, AX 6225 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB 6226 6227three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: 6228 MOVB $0xf4, (AX) 6229 MOVW SI, 1(AX) 6230 ADDQ $0x03, AX 6231 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB 6232 6233two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: 6234 MOVB $0xf0, (AX) 6235 MOVB SI, 1(AX) 6236 ADDQ $0x02, AX 6237 CMPL SI, $0x40 6238 JL memmove_match_emit_repeat_encodeBetterBlockAsm4MB 6239 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB 6240 6241one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: 6242 SHLB $0x02, SI 6243 MOVB SI, (AX) 6244 ADDQ $0x01, AX 6245 6246memmove_match_emit_repeat_encodeBetterBlockAsm4MB: 6247 LEAQ (AX)(R9*1), SI 6248 6249 // genMemMoveShort 6250 CMPQ R9, $0x04 6251 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 6252 CMPQ R9, $0x08 6253 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 6254 CMPQ R9, $0x10 6255 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 6256 CMPQ R9, $0x20 6257 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 6258 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 6259 6260emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4: 6261 MOVL (R10), R11 6262 MOVL R11, (AX) 6263 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB 6264 6265emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: 6266 MOVL (R10), R11 6267 MOVL -4(R10)(R9*1), R10 6268 MOVL R11, (AX) 6269 MOVL R10, -4(AX)(R9*1) 6270 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB 6271 6272emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: 6273 MOVQ (R10), R11 6274 MOVQ -8(R10)(R9*1), R10 6275 MOVQ R11, (AX) 6276 MOVQ R10, -8(AX)(R9*1) 6277 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB 6278 6279emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: 6280 MOVOU (R10), X0 6281 MOVOU -16(R10)(R9*1), X1 6282 MOVOU X0, (AX) 6283 MOVOU X1, -16(AX)(R9*1) 6284 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB 6285 6286emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: 6287 MOVOU (R10), X0 6288 MOVOU 16(R10), X1 6289 MOVOU -32(R10)(R9*1), X2 6290 MOVOU -16(R10)(R9*1), X3 6291 MOVOU X0, (AX) 6292 MOVOU X1, 16(AX) 6293 MOVOU X2, -32(AX)(R9*1) 6294 MOVOU X3, -16(AX)(R9*1) 6295 6296memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: 6297 MOVQ SI, AX 6298 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB 6299 6300memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: 6301 LEAQ (AX)(R9*1), SI 6302 6303 // genMemMoveLong 6304 MOVOU (R10), X0 6305 MOVOU 16(R10), X1 6306 MOVOU -32(R10)(R9*1), X2 6307 MOVOU -16(R10)(R9*1), X3 6308 MOVQ R9, R13 6309 SHRQ $0x05, R13 6310 MOVQ AX, R11 6311 ANDL $0x0000001f, R11 6312 MOVQ $0x00000040, R14 6313 SUBQ R11, R14 6314 DECQ R13 6315 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6316 LEAQ -32(R10)(R14*1), R11 6317 LEAQ -32(AX)(R14*1), R15 6318 6319emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: 6320 MOVOU (R11), X4 6321 MOVOU 16(R11), X5 6322 MOVOA X4, (R15) 6323 MOVOA X5, 16(R15) 6324 ADDQ $0x20, R15 6325 ADDQ $0x20, R11 6326 ADDQ $0x20, R14 6327 DECQ R13 6328 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back 6329 6330emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: 6331 MOVOU -32(R10)(R14*1), X4 6332 MOVOU -16(R10)(R14*1), X5 6333 MOVOA X4, -32(AX)(R14*1) 6334 MOVOA X5, -16(AX)(R14*1) 6335 ADDQ $0x20, R14 6336 CMPQ R9, R14 6337 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6338 MOVOU X0, (AX) 6339 MOVOU X1, 16(AX) 6340 MOVOU X2, -32(AX)(R9*1) 6341 MOVOU X3, -16(AX)(R9*1) 6342 MOVQ SI, AX 6343 6344emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: 6345 ADDL R12, CX 6346 ADDL $0x04, R12 6347 MOVL CX, 12(SP) 6348 6349 // emitRepeat 6350 MOVL R12, SI 6351 LEAL -4(R12), R12 6352 CMPL SI, $0x08 6353 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB 6354 CMPL SI, $0x0c 6355 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB 6356 CMPL R8, $0x00000800 6357 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB 6358 6359cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: 6360 CMPL R12, $0x00000104 6361 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB 6362 CMPL R12, $0x00010100 6363 JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB 6364 LEAL -65536(R12), R12 6365 MOVL R12, R8 6366 MOVW $0x001d, (AX) 6367 MOVW R12, 2(AX) 6368 SARL $0x10, R8 6369 MOVB R8, 4(AX) 6370 ADDQ $0x05, AX 6371 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6372 6373repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: 6374 LEAL -256(R12), R12 6375 MOVW $0x0019, (AX) 6376 MOVW R12, 2(AX) 6377 ADDQ $0x04, AX 6378 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6379 6380repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: 6381 LEAL -4(R12), R12 6382 MOVW $0x0015, (AX) 6383 MOVB R12, 2(AX) 6384 ADDQ $0x03, AX 6385 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6386 6387repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: 6388 SHLL $0x02, R12 6389 ORL $0x01, R12 6390 MOVW R12, (AX) 6391 ADDQ $0x02, AX 6392 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6393 6394repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: 6395 XORQ SI, SI 6396 LEAL 1(SI)(R12*4), R12 6397 MOVB R8, 1(AX) 6398 SARL $0x08, R8 6399 SHLL $0x05, R8 6400 ORL R8, R12 6401 MOVB R12, (AX) 6402 ADDQ $0x02, AX 6403 6404match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: 6405 CMPL CX, 8(SP) 6406 JGE emit_remainder_encodeBetterBlockAsm4MB 6407 CMPQ AX, (SP) 6408 JL match_nolit_dst_ok_encodeBetterBlockAsm4MB 6409 MOVQ $0x00000000, ret+48(FP) 6410 RET 6411 6412match_nolit_dst_ok_encodeBetterBlockAsm4MB: 6413 MOVQ $0x00cf1bbcdcbfa563, SI 6414 MOVQ $0x9e3779b1, R8 6415 INCL DI 6416 MOVQ (DX)(DI*1), R9 6417 MOVQ R9, R10 6418 MOVQ R9, R11 6419 MOVQ R9, R12 6420 SHRQ $0x08, R11 6421 MOVQ R11, R13 6422 SHRQ $0x10, R12 6423 LEAL 1(DI), R14 6424 LEAL 2(DI), R15 6425 MOVQ -2(DX)(CX*1), R9 6426 SHLQ $0x08, R10 6427 IMULQ SI, R10 6428 SHRQ $0x30, R10 6429 SHLQ $0x08, R13 6430 IMULQ SI, R13 6431 SHRQ $0x30, R13 6432 SHLQ $0x20, R11 6433 IMULQ R8, R11 6434 SHRQ $0x32, R11 6435 SHLQ $0x20, R12 6436 IMULQ R8, R12 6437 SHRQ $0x32, R12 6438 MOVL DI, 24(SP)(R10*4) 6439 MOVL R14, 24(SP)(R13*4) 6440 MOVL R14, 262168(SP)(R11*4) 6441 MOVL R15, 262168(SP)(R12*4) 6442 MOVQ R9, R10 6443 MOVQ R9, R11 6444 SHRQ $0x08, R11 6445 MOVQ R11, R13 6446 LEAL -2(CX), R9 6447 LEAL -1(CX), DI 6448 SHLQ $0x08, R10 6449 IMULQ SI, R10 6450 SHRQ $0x30, R10 6451 SHLQ $0x20, R11 6452 IMULQ R8, R11 6453 SHRQ $0x32, R11 6454 SHLQ $0x08, R13 6455 IMULQ SI, R13 6456 SHRQ $0x30, R13 6457 MOVL R9, 24(SP)(R10*4) 6458 MOVL DI, 262168(SP)(R11*4) 6459 MOVL DI, 24(SP)(R13*4) 6460 JMP search_loop_encodeBetterBlockAsm4MB 6461 6462emit_remainder_encodeBetterBlockAsm4MB: 6463 MOVQ src_len+32(FP), CX 6464 SUBL 12(SP), CX 6465 LEAQ 4(AX)(CX*1), CX 6466 CMPQ CX, (SP) 6467 JL emit_remainder_ok_encodeBetterBlockAsm4MB 6468 MOVQ $0x00000000, ret+48(FP) 6469 RET 6470 6471emit_remainder_ok_encodeBetterBlockAsm4MB: 6472 MOVQ src_len+32(FP), CX 6473 MOVL 12(SP), BX 6474 CMPL BX, CX 6475 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB 6476 MOVL CX, SI 6477 MOVL CX, 12(SP) 6478 LEAQ (DX)(BX*1), CX 6479 SUBL BX, SI 6480 LEAL -1(SI), DX 6481 CMPL DX, $0x3c 6482 JLT one_byte_emit_remainder_encodeBetterBlockAsm4MB 6483 CMPL DX, $0x00000100 6484 JLT two_bytes_emit_remainder_encodeBetterBlockAsm4MB 6485 CMPL DX, $0x00010000 6486 JLT three_bytes_emit_remainder_encodeBetterBlockAsm4MB 6487 MOVL DX, BX 6488 SHRL $0x10, BX 6489 MOVB $0xf8, (AX) 6490 MOVW DX, 1(AX) 6491 MOVB BL, 3(AX) 6492 ADDQ $0x04, AX 6493 JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB 6494 6495three_bytes_emit_remainder_encodeBetterBlockAsm4MB: 6496 MOVB $0xf4, (AX) 6497 MOVW DX, 1(AX) 6498 ADDQ $0x03, AX 6499 JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB 6500 6501two_bytes_emit_remainder_encodeBetterBlockAsm4MB: 6502 MOVB $0xf0, (AX) 6503 MOVB DL, 1(AX) 6504 ADDQ $0x02, AX 6505 CMPL DX, $0x40 6506 JL memmove_emit_remainder_encodeBetterBlockAsm4MB 6507 JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB 6508 6509one_byte_emit_remainder_encodeBetterBlockAsm4MB: 6510 SHLB $0x02, DL 6511 MOVB DL, (AX) 6512 ADDQ $0x01, AX 6513 6514memmove_emit_remainder_encodeBetterBlockAsm4MB: 6515 LEAQ (AX)(SI*1), DX 6516 MOVL SI, BX 6517 6518 // genMemMoveShort 6519 CMPQ BX, $0x04 6520 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4 6521 CMPQ BX, $0x08 6522 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7 6523 CMPQ BX, $0x10 6524 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16 6525 CMPQ BX, $0x20 6526 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32 6527 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 6528 6529emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4: 6530 MOVL (CX), SI 6531 MOVL SI, (AX) 6532 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB 6533 6534emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: 6535 MOVL (CX), SI 6536 MOVL -4(CX)(BX*1), CX 6537 MOVL SI, (AX) 6538 MOVL CX, -4(AX)(BX*1) 6539 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB 6540 6541emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: 6542 MOVQ (CX), SI 6543 MOVQ -8(CX)(BX*1), CX 6544 MOVQ SI, (AX) 6545 MOVQ CX, -8(AX)(BX*1) 6546 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB 6547 6548emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: 6549 MOVOU (CX), X0 6550 MOVOU -16(CX)(BX*1), X1 6551 MOVOU X0, (AX) 6552 MOVOU X1, -16(AX)(BX*1) 6553 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB 6554 6555emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: 6556 MOVOU (CX), X0 6557 MOVOU 16(CX), X1 6558 MOVOU -32(CX)(BX*1), X2 6559 MOVOU -16(CX)(BX*1), X3 6560 MOVOU X0, (AX) 6561 MOVOU X1, 16(AX) 6562 MOVOU X2, -32(AX)(BX*1) 6563 MOVOU X3, -16(AX)(BX*1) 6564 6565memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: 6566 MOVQ DX, AX 6567 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB 6568 6569memmove_long_emit_remainder_encodeBetterBlockAsm4MB: 6570 LEAQ (AX)(SI*1), DX 6571 MOVL SI, BX 6572 6573 // genMemMoveLong 6574 MOVOU (CX), X0 6575 MOVOU 16(CX), X1 6576 MOVOU -32(CX)(BX*1), X2 6577 MOVOU -16(CX)(BX*1), X3 6578 MOVQ BX, DI 6579 SHRQ $0x05, DI 6580 MOVQ AX, SI 6581 ANDL $0x0000001f, SI 6582 MOVQ $0x00000040, R8 6583 SUBQ SI, R8 6584 DECQ DI 6585 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6586 LEAQ -32(CX)(R8*1), SI 6587 LEAQ -32(AX)(R8*1), R9 6588 6589emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: 6590 MOVOU (SI), X4 6591 MOVOU 16(SI), X5 6592 MOVOA X4, (R9) 6593 MOVOA X5, 16(R9) 6594 ADDQ $0x20, R9 6595 ADDQ $0x20, SI 6596 ADDQ $0x20, R8 6597 DECQ DI 6598 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back 6599 6600emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: 6601 MOVOU -32(CX)(R8*1), X4 6602 MOVOU -16(CX)(R8*1), X5 6603 MOVOA X4, -32(AX)(R8*1) 6604 MOVOA X5, -16(AX)(R8*1) 6605 ADDQ $0x20, R8 6606 CMPQ BX, R8 6607 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6608 MOVOU X0, (AX) 6609 MOVOU X1, 16(AX) 6610 MOVOU X2, -32(AX)(BX*1) 6611 MOVOU X3, -16(AX)(BX*1) 6612 MOVQ DX, AX 6613 6614emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: 6615 MOVQ dst_base+0(FP), CX 6616 SUBQ CX, AX 6617 MOVQ AX, ret+48(FP) 6618 RET 6619 6620// func encodeBetterBlockAsm12B(dst []byte, src []byte) int 6621// Requires: SSE2 6622TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 6623 MOVQ dst_base+0(FP), AX 6624 MOVQ $0x00000280, CX 6625 LEAQ 24(SP), DX 6626 PXOR X0, X0 6627 6628zero_loop_encodeBetterBlockAsm12B: 6629 MOVOU X0, (DX) 6630 MOVOU X0, 16(DX) 6631 MOVOU X0, 32(DX) 6632 MOVOU X0, 48(DX) 6633 MOVOU X0, 64(DX) 6634 MOVOU X0, 80(DX) 6635 MOVOU X0, 96(DX) 6636 MOVOU X0, 112(DX) 6637 ADDQ $0x80, DX 6638 DECQ CX 6639 JNZ zero_loop_encodeBetterBlockAsm12B 6640 MOVL $0x00000000, 12(SP) 6641 MOVQ src_len+32(FP), CX 6642 LEAQ -6(CX), DX 6643 LEAQ -8(CX), SI 6644 MOVL SI, 8(SP) 6645 SHRQ $0x05, CX 6646 SUBL CX, DX 6647 LEAQ (AX)(DX*1), DX 6648 MOVQ DX, (SP) 6649 MOVL $0x00000001, CX 6650 MOVL $0x00000000, 16(SP) 6651 MOVQ src_base+24(FP), DX 6652 6653search_loop_encodeBetterBlockAsm12B: 6654 MOVL CX, SI 6655 SUBL 12(SP), SI 6656 SHRL $0x06, SI 6657 LEAL 1(CX)(SI*1), SI 6658 CMPL SI, 8(SP) 6659 JGE emit_remainder_encodeBetterBlockAsm12B 6660 MOVQ (DX)(CX*1), DI 6661 MOVL SI, 20(SP) 6662 MOVQ $0x0000cf1bbcdcbf9b, R9 6663 MOVQ $0x9e3779b1, SI 6664 MOVQ DI, R10 6665 MOVQ DI, R11 6666 SHLQ $0x10, R10 6667 IMULQ R9, R10 6668 SHRQ $0x32, R10 6669 SHLQ $0x20, R11 6670 IMULQ SI, R11 6671 SHRQ $0x34, R11 6672 MOVL 24(SP)(R10*4), SI 6673 MOVL 65560(SP)(R11*4), R8 6674 MOVL CX, 24(SP)(R10*4) 6675 MOVL CX, 65560(SP)(R11*4) 6676 CMPL (DX)(SI*1), DI 6677 JEQ candidate_match_encodeBetterBlockAsm12B 6678 CMPL (DX)(R8*1), DI 6679 JEQ candidateS_match_encodeBetterBlockAsm12B 6680 MOVL 20(SP), CX 6681 JMP search_loop_encodeBetterBlockAsm12B 6682 6683candidateS_match_encodeBetterBlockAsm12B: 6684 SHRQ $0x08, DI 6685 MOVQ DI, R10 6686 SHLQ $0x10, R10 6687 IMULQ R9, R10 6688 SHRQ $0x32, R10 6689 MOVL 24(SP)(R10*4), SI 6690 INCL CX 6691 MOVL CX, 24(SP)(R10*4) 6692 CMPL (DX)(SI*1), DI 6693 JEQ candidate_match_encodeBetterBlockAsm12B 6694 DECL CX 6695 MOVL R8, SI 6696 6697candidate_match_encodeBetterBlockAsm12B: 6698 MOVL 12(SP), DI 6699 TESTL SI, SI 6700 JZ match_extend_back_end_encodeBetterBlockAsm12B 6701 6702match_extend_back_loop_encodeBetterBlockAsm12B: 6703 CMPL CX, DI 6704 JLE match_extend_back_end_encodeBetterBlockAsm12B 6705 MOVB -1(DX)(SI*1), BL 6706 MOVB -1(DX)(CX*1), R8 6707 CMPB BL, R8 6708 JNE match_extend_back_end_encodeBetterBlockAsm12B 6709 LEAL -1(CX), CX 6710 DECL SI 6711 JZ match_extend_back_end_encodeBetterBlockAsm12B 6712 JMP match_extend_back_loop_encodeBetterBlockAsm12B 6713 6714match_extend_back_end_encodeBetterBlockAsm12B: 6715 MOVL CX, DI 6716 SUBL 12(SP), DI 6717 LEAQ 3(AX)(DI*1), DI 6718 CMPQ DI, (SP) 6719 JL match_dst_size_check_encodeBetterBlockAsm12B 6720 MOVQ $0x00000000, ret+48(FP) 6721 RET 6722 6723match_dst_size_check_encodeBetterBlockAsm12B: 6724 MOVL CX, DI 6725 ADDL $0x04, CX 6726 ADDL $0x04, SI 6727 MOVQ src_len+32(FP), R8 6728 SUBL CX, R8 6729 LEAQ (DX)(CX*1), R9 6730 LEAQ (DX)(SI*1), R10 6731 6732 // matchLen 6733 XORL R12, R12 6734 CMPL R8, $0x08 6735 JL matchlen_single_match_nolit_encodeBetterBlockAsm12B 6736 6737matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: 6738 MOVQ (R9)(R12*1), R11 6739 XORQ (R10)(R12*1), R11 6740 TESTQ R11, R11 6741 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B 6742 BSFQ R11, R11 6743 SARQ $0x03, R11 6744 LEAL (R12)(R11*1), R12 6745 JMP match_nolit_end_encodeBetterBlockAsm12B 6746 6747matchlen_loop_match_nolit_encodeBetterBlockAsm12B: 6748 LEAL -8(R8), R8 6749 LEAL 8(R12), R12 6750 CMPL R8, $0x08 6751 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B 6752 6753matchlen_single_match_nolit_encodeBetterBlockAsm12B: 6754 TESTL R8, R8 6755 JZ match_nolit_end_encodeBetterBlockAsm12B 6756 6757matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B: 6758 MOVB (R9)(R12*1), R11 6759 CMPB (R10)(R12*1), R11 6760 JNE match_nolit_end_encodeBetterBlockAsm12B 6761 LEAL 1(R12), R12 6762 DECL R8 6763 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B 6764 6765match_nolit_end_encodeBetterBlockAsm12B: 6766 MOVL CX, R8 6767 SUBL SI, R8 6768 6769 // Check if repeat 6770 CMPL 16(SP), R8 6771 JEQ match_is_repeat_encodeBetterBlockAsm12B 6772 MOVL R8, 16(SP) 6773 MOVL 12(SP), SI 6774 CMPL SI, DI 6775 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B 6776 MOVL DI, R9 6777 MOVL DI, 12(SP) 6778 LEAQ (DX)(SI*1), R10 6779 SUBL SI, R9 6780 LEAL -1(R9), SI 6781 CMPL SI, $0x3c 6782 JLT one_byte_match_emit_encodeBetterBlockAsm12B 6783 CMPL SI, $0x00000100 6784 JLT two_bytes_match_emit_encodeBetterBlockAsm12B 6785 MOVB $0xf4, (AX) 6786 MOVW SI, 1(AX) 6787 ADDQ $0x03, AX 6788 JMP memmove_long_match_emit_encodeBetterBlockAsm12B 6789 6790two_bytes_match_emit_encodeBetterBlockAsm12B: 6791 MOVB $0xf0, (AX) 6792 MOVB SI, 1(AX) 6793 ADDQ $0x02, AX 6794 CMPL SI, $0x40 6795 JL memmove_match_emit_encodeBetterBlockAsm12B 6796 JMP memmove_long_match_emit_encodeBetterBlockAsm12B 6797 6798one_byte_match_emit_encodeBetterBlockAsm12B: 6799 SHLB $0x02, SI 6800 MOVB SI, (AX) 6801 ADDQ $0x01, AX 6802 6803memmove_match_emit_encodeBetterBlockAsm12B: 6804 LEAQ (AX)(R9*1), SI 6805 6806 // genMemMoveShort 6807 CMPQ R9, $0x04 6808 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 6809 CMPQ R9, $0x08 6810 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 6811 CMPQ R9, $0x10 6812 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 6813 CMPQ R9, $0x20 6814 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 6815 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 6816 6817emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4: 6818 MOVL (R10), R11 6819 MOVL R11, (AX) 6820 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B 6821 6822emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: 6823 MOVL (R10), R11 6824 MOVL -4(R10)(R9*1), R10 6825 MOVL R11, (AX) 6826 MOVL R10, -4(AX)(R9*1) 6827 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B 6828 6829emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: 6830 MOVQ (R10), R11 6831 MOVQ -8(R10)(R9*1), R10 6832 MOVQ R11, (AX) 6833 MOVQ R10, -8(AX)(R9*1) 6834 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B 6835 6836emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: 6837 MOVOU (R10), X0 6838 MOVOU -16(R10)(R9*1), X1 6839 MOVOU X0, (AX) 6840 MOVOU X1, -16(AX)(R9*1) 6841 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B 6842 6843emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: 6844 MOVOU (R10), X0 6845 MOVOU 16(R10), X1 6846 MOVOU -32(R10)(R9*1), X2 6847 MOVOU -16(R10)(R9*1), X3 6848 MOVOU X0, (AX) 6849 MOVOU X1, 16(AX) 6850 MOVOU X2, -32(AX)(R9*1) 6851 MOVOU X3, -16(AX)(R9*1) 6852 6853memmove_end_copy_match_emit_encodeBetterBlockAsm12B: 6854 MOVQ SI, AX 6855 JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B 6856 6857memmove_long_match_emit_encodeBetterBlockAsm12B: 6858 LEAQ (AX)(R9*1), SI 6859 6860 // genMemMoveLong 6861 MOVOU (R10), X0 6862 MOVOU 16(R10), X1 6863 MOVOU -32(R10)(R9*1), X2 6864 MOVOU -16(R10)(R9*1), X3 6865 MOVQ R9, R13 6866 SHRQ $0x05, R13 6867 MOVQ AX, R11 6868 ANDL $0x0000001f, R11 6869 MOVQ $0x00000040, R14 6870 SUBQ R11, R14 6871 DECQ R13 6872 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 6873 LEAQ -32(R10)(R14*1), R11 6874 LEAQ -32(AX)(R14*1), R15 6875 6876emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: 6877 MOVOU (R11), X4 6878 MOVOU 16(R11), X5 6879 MOVOA X4, (R15) 6880 MOVOA X5, 16(R15) 6881 ADDQ $0x20, R15 6882 ADDQ $0x20, R11 6883 ADDQ $0x20, R14 6884 DECQ R13 6885 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back 6886 6887emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: 6888 MOVOU -32(R10)(R14*1), X4 6889 MOVOU -16(R10)(R14*1), X5 6890 MOVOA X4, -32(AX)(R14*1) 6891 MOVOA X5, -16(AX)(R14*1) 6892 ADDQ $0x20, R14 6893 CMPQ R9, R14 6894 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 6895 MOVOU X0, (AX) 6896 MOVOU X1, 16(AX) 6897 MOVOU X2, -32(AX)(R9*1) 6898 MOVOU X3, -16(AX)(R9*1) 6899 MOVQ SI, AX 6900 6901emit_literal_done_match_emit_encodeBetterBlockAsm12B: 6902 ADDL R12, CX 6903 ADDL $0x04, R12 6904 MOVL CX, 12(SP) 6905 6906 // emitCopy 6907two_byte_offset_match_nolit_encodeBetterBlockAsm12B: 6908 CMPL R12, $0x40 6909 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B 6910 MOVB $0xee, (AX) 6911 MOVW R8, 1(AX) 6912 LEAL -60(R12), R12 6913 ADDQ $0x03, AX 6914 6915 // emitRepeat 6916 MOVL R12, SI 6917 LEAL -4(R12), R12 6918 CMPL SI, $0x08 6919 JLE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short 6920 CMPL SI, $0x0c 6921 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short 6922 CMPL R8, $0x00000800 6923 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short 6924 6925cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: 6926 CMPL R12, $0x00000104 6927 JLT repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short 6928 LEAL -256(R12), R12 6929 MOVW $0x0019, (AX) 6930 MOVW R12, 2(AX) 6931 ADDQ $0x04, AX 6932 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 6933 6934repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: 6935 LEAL -4(R12), R12 6936 MOVW $0x0015, (AX) 6937 MOVB R12, 2(AX) 6938 ADDQ $0x03, AX 6939 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 6940 6941repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: 6942 SHLL $0x02, R12 6943 ORL $0x01, R12 6944 MOVW R12, (AX) 6945 ADDQ $0x02, AX 6946 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 6947 6948repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: 6949 XORQ SI, SI 6950 LEAL 1(SI)(R12*4), R12 6951 MOVB R8, 1(AX) 6952 SARL $0x08, R8 6953 SHLL $0x05, R8 6954 ORL R8, R12 6955 MOVB R12, (AX) 6956 ADDQ $0x02, AX 6957 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 6958 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm12B 6959 6960two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: 6961 CMPL R12, $0x0c 6962 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B 6963 CMPL R8, $0x00000800 6964 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B 6965 MOVB $0x01, BL 6966 LEAL -16(BX)(R12*4), R12 6967 MOVB R8, 1(AX) 6968 SHRL $0x08, R8 6969 SHLL $0x05, R8 6970 ORL R8, R12 6971 MOVB R12, (AX) 6972 ADDQ $0x02, AX 6973 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 6974 6975emit_copy_three_match_nolit_encodeBetterBlockAsm12B: 6976 MOVB $0x02, BL 6977 LEAL -4(BX)(R12*4), R12 6978 MOVB R12, (AX) 6979 MOVW R8, 1(AX) 6980 ADDQ $0x03, AX 6981 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 6982 6983match_is_repeat_encodeBetterBlockAsm12B: 6984 MOVL 12(SP), SI 6985 CMPL SI, DI 6986 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B 6987 MOVL DI, R9 6988 MOVL DI, 12(SP) 6989 LEAQ (DX)(SI*1), R10 6990 SUBL SI, R9 6991 LEAL -1(R9), SI 6992 CMPL SI, $0x3c 6993 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm12B 6994 CMPL SI, $0x00000100 6995 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm12B 6996 MOVB $0xf4, (AX) 6997 MOVW SI, 1(AX) 6998 ADDQ $0x03, AX 6999 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B 7000 7001two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: 7002 MOVB $0xf0, (AX) 7003 MOVB SI, 1(AX) 7004 ADDQ $0x02, AX 7005 CMPL SI, $0x40 7006 JL memmove_match_emit_repeat_encodeBetterBlockAsm12B 7007 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B 7008 7009one_byte_match_emit_repeat_encodeBetterBlockAsm12B: 7010 SHLB $0x02, SI 7011 MOVB SI, (AX) 7012 ADDQ $0x01, AX 7013 7014memmove_match_emit_repeat_encodeBetterBlockAsm12B: 7015 LEAQ (AX)(R9*1), SI 7016 7017 // genMemMoveShort 7018 CMPQ R9, $0x04 7019 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 7020 CMPQ R9, $0x08 7021 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 7022 CMPQ R9, $0x10 7023 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 7024 CMPQ R9, $0x20 7025 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 7026 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 7027 7028emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4: 7029 MOVL (R10), R11 7030 MOVL R11, (AX) 7031 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B 7032 7033emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: 7034 MOVL (R10), R11 7035 MOVL -4(R10)(R9*1), R10 7036 MOVL R11, (AX) 7037 MOVL R10, -4(AX)(R9*1) 7038 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B 7039 7040emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: 7041 MOVQ (R10), R11 7042 MOVQ -8(R10)(R9*1), R10 7043 MOVQ R11, (AX) 7044 MOVQ R10, -8(AX)(R9*1) 7045 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B 7046 7047emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: 7048 MOVOU (R10), X0 7049 MOVOU -16(R10)(R9*1), X1 7050 MOVOU X0, (AX) 7051 MOVOU X1, -16(AX)(R9*1) 7052 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B 7053 7054emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: 7055 MOVOU (R10), X0 7056 MOVOU 16(R10), X1 7057 MOVOU -32(R10)(R9*1), X2 7058 MOVOU -16(R10)(R9*1), X3 7059 MOVOU X0, (AX) 7060 MOVOU X1, 16(AX) 7061 MOVOU X2, -32(AX)(R9*1) 7062 MOVOU X3, -16(AX)(R9*1) 7063 7064memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: 7065 MOVQ SI, AX 7066 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B 7067 7068memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: 7069 LEAQ (AX)(R9*1), SI 7070 7071 // genMemMoveLong 7072 MOVOU (R10), X0 7073 MOVOU 16(R10), X1 7074 MOVOU -32(R10)(R9*1), X2 7075 MOVOU -16(R10)(R9*1), X3 7076 MOVQ R9, R13 7077 SHRQ $0x05, R13 7078 MOVQ AX, R11 7079 ANDL $0x0000001f, R11 7080 MOVQ $0x00000040, R14 7081 SUBQ R11, R14 7082 DECQ R13 7083 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7084 LEAQ -32(R10)(R14*1), R11 7085 LEAQ -32(AX)(R14*1), R15 7086 7087emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: 7088 MOVOU (R11), X4 7089 MOVOU 16(R11), X5 7090 MOVOA X4, (R15) 7091 MOVOA X5, 16(R15) 7092 ADDQ $0x20, R15 7093 ADDQ $0x20, R11 7094 ADDQ $0x20, R14 7095 DECQ R13 7096 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back 7097 7098emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: 7099 MOVOU -32(R10)(R14*1), X4 7100 MOVOU -16(R10)(R14*1), X5 7101 MOVOA X4, -32(AX)(R14*1) 7102 MOVOA X5, -16(AX)(R14*1) 7103 ADDQ $0x20, R14 7104 CMPQ R9, R14 7105 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7106 MOVOU X0, (AX) 7107 MOVOU X1, 16(AX) 7108 MOVOU X2, -32(AX)(R9*1) 7109 MOVOU X3, -16(AX)(R9*1) 7110 MOVQ SI, AX 7111 7112emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: 7113 ADDL R12, CX 7114 ADDL $0x04, R12 7115 MOVL CX, 12(SP) 7116 7117 // emitRepeat 7118 MOVL R12, SI 7119 LEAL -4(R12), R12 7120 CMPL SI, $0x08 7121 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B 7122 CMPL SI, $0x0c 7123 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B 7124 CMPL R8, $0x00000800 7125 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B 7126 7127cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: 7128 CMPL R12, $0x00000104 7129 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B 7130 LEAL -256(R12), R12 7131 MOVW $0x0019, (AX) 7132 MOVW R12, 2(AX) 7133 ADDQ $0x04, AX 7134 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7135 7136repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: 7137 LEAL -4(R12), R12 7138 MOVW $0x0015, (AX) 7139 MOVB R12, 2(AX) 7140 ADDQ $0x03, AX 7141 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7142 7143repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: 7144 SHLL $0x02, R12 7145 ORL $0x01, R12 7146 MOVW R12, (AX) 7147 ADDQ $0x02, AX 7148 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7149 7150repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: 7151 XORQ SI, SI 7152 LEAL 1(SI)(R12*4), R12 7153 MOVB R8, 1(AX) 7154 SARL $0x08, R8 7155 SHLL $0x05, R8 7156 ORL R8, R12 7157 MOVB R12, (AX) 7158 ADDQ $0x02, AX 7159 7160match_nolit_emitcopy_end_encodeBetterBlockAsm12B: 7161 CMPL CX, 8(SP) 7162 JGE emit_remainder_encodeBetterBlockAsm12B 7163 CMPQ AX, (SP) 7164 JL match_nolit_dst_ok_encodeBetterBlockAsm12B 7165 MOVQ $0x00000000, ret+48(FP) 7166 RET 7167 7168match_nolit_dst_ok_encodeBetterBlockAsm12B: 7169 MOVQ $0x0000cf1bbcdcbf9b, SI 7170 MOVQ $0x9e3779b1, R8 7171 INCL DI 7172 MOVQ (DX)(DI*1), R9 7173 MOVQ R9, R10 7174 MOVQ R9, R11 7175 MOVQ R9, R12 7176 SHRQ $0x08, R11 7177 MOVQ R11, R13 7178 SHRQ $0x10, R12 7179 LEAL 1(DI), R14 7180 LEAL 2(DI), R15 7181 MOVQ -2(DX)(CX*1), R9 7182 SHLQ $0x10, R10 7183 IMULQ SI, R10 7184 SHRQ $0x32, R10 7185 SHLQ $0x10, R13 7186 IMULQ SI, R13 7187 SHRQ $0x32, R13 7188 SHLQ $0x20, R11 7189 IMULQ R8, R11 7190 SHRQ $0x34, R11 7191 SHLQ $0x20, R12 7192 IMULQ R8, R12 7193 SHRQ $0x34, R12 7194 MOVL DI, 24(SP)(R10*4) 7195 MOVL R14, 24(SP)(R13*4) 7196 MOVL R14, 65560(SP)(R11*4) 7197 MOVL R15, 65560(SP)(R12*4) 7198 MOVQ R9, R10 7199 MOVQ R9, R11 7200 SHRQ $0x08, R11 7201 MOVQ R11, R13 7202 LEAL -2(CX), R9 7203 LEAL -1(CX), DI 7204 SHLQ $0x10, R10 7205 IMULQ SI, R10 7206 SHRQ $0x32, R10 7207 SHLQ $0x20, R11 7208 IMULQ R8, R11 7209 SHRQ $0x34, R11 7210 SHLQ $0x10, R13 7211 IMULQ SI, R13 7212 SHRQ $0x32, R13 7213 MOVL R9, 24(SP)(R10*4) 7214 MOVL DI, 65560(SP)(R11*4) 7215 MOVL DI, 24(SP)(R13*4) 7216 JMP search_loop_encodeBetterBlockAsm12B 7217 7218emit_remainder_encodeBetterBlockAsm12B: 7219 MOVQ src_len+32(FP), CX 7220 SUBL 12(SP), CX 7221 LEAQ 3(AX)(CX*1), CX 7222 CMPQ CX, (SP) 7223 JL emit_remainder_ok_encodeBetterBlockAsm12B 7224 MOVQ $0x00000000, ret+48(FP) 7225 RET 7226 7227emit_remainder_ok_encodeBetterBlockAsm12B: 7228 MOVQ src_len+32(FP), CX 7229 MOVL 12(SP), BX 7230 CMPL BX, CX 7231 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B 7232 MOVL CX, SI 7233 MOVL CX, 12(SP) 7234 LEAQ (DX)(BX*1), CX 7235 SUBL BX, SI 7236 LEAL -1(SI), DX 7237 CMPL DX, $0x3c 7238 JLT one_byte_emit_remainder_encodeBetterBlockAsm12B 7239 CMPL DX, $0x00000100 7240 JLT two_bytes_emit_remainder_encodeBetterBlockAsm12B 7241 MOVB $0xf4, (AX) 7242 MOVW DX, 1(AX) 7243 ADDQ $0x03, AX 7244 JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B 7245 7246two_bytes_emit_remainder_encodeBetterBlockAsm12B: 7247 MOVB $0xf0, (AX) 7248 MOVB DL, 1(AX) 7249 ADDQ $0x02, AX 7250 CMPL DX, $0x40 7251 JL memmove_emit_remainder_encodeBetterBlockAsm12B 7252 JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B 7253 7254one_byte_emit_remainder_encodeBetterBlockAsm12B: 7255 SHLB $0x02, DL 7256 MOVB DL, (AX) 7257 ADDQ $0x01, AX 7258 7259memmove_emit_remainder_encodeBetterBlockAsm12B: 7260 LEAQ (AX)(SI*1), DX 7261 MOVL SI, BX 7262 7263 // genMemMoveShort 7264 CMPQ BX, $0x04 7265 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4 7266 CMPQ BX, $0x08 7267 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7 7268 CMPQ BX, $0x10 7269 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16 7270 CMPQ BX, $0x20 7271 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32 7272 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 7273 7274emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4: 7275 MOVL (CX), SI 7276 MOVL SI, (AX) 7277 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B 7278 7279emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: 7280 MOVL (CX), SI 7281 MOVL -4(CX)(BX*1), CX 7282 MOVL SI, (AX) 7283 MOVL CX, -4(AX)(BX*1) 7284 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B 7285 7286emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: 7287 MOVQ (CX), SI 7288 MOVQ -8(CX)(BX*1), CX 7289 MOVQ SI, (AX) 7290 MOVQ CX, -8(AX)(BX*1) 7291 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B 7292 7293emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: 7294 MOVOU (CX), X0 7295 MOVOU -16(CX)(BX*1), X1 7296 MOVOU X0, (AX) 7297 MOVOU X1, -16(AX)(BX*1) 7298 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B 7299 7300emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: 7301 MOVOU (CX), X0 7302 MOVOU 16(CX), X1 7303 MOVOU -32(CX)(BX*1), X2 7304 MOVOU -16(CX)(BX*1), X3 7305 MOVOU X0, (AX) 7306 MOVOU X1, 16(AX) 7307 MOVOU X2, -32(AX)(BX*1) 7308 MOVOU X3, -16(AX)(BX*1) 7309 7310memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: 7311 MOVQ DX, AX 7312 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B 7313 7314memmove_long_emit_remainder_encodeBetterBlockAsm12B: 7315 LEAQ (AX)(SI*1), DX 7316 MOVL SI, BX 7317 7318 // genMemMoveLong 7319 MOVOU (CX), X0 7320 MOVOU 16(CX), X1 7321 MOVOU -32(CX)(BX*1), X2 7322 MOVOU -16(CX)(BX*1), X3 7323 MOVQ BX, DI 7324 SHRQ $0x05, DI 7325 MOVQ AX, SI 7326 ANDL $0x0000001f, SI 7327 MOVQ $0x00000040, R8 7328 SUBQ SI, R8 7329 DECQ DI 7330 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7331 LEAQ -32(CX)(R8*1), SI 7332 LEAQ -32(AX)(R8*1), R9 7333 7334emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: 7335 MOVOU (SI), X4 7336 MOVOU 16(SI), X5 7337 MOVOA X4, (R9) 7338 MOVOA X5, 16(R9) 7339 ADDQ $0x20, R9 7340 ADDQ $0x20, SI 7341 ADDQ $0x20, R8 7342 DECQ DI 7343 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back 7344 7345emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: 7346 MOVOU -32(CX)(R8*1), X4 7347 MOVOU -16(CX)(R8*1), X5 7348 MOVOA X4, -32(AX)(R8*1) 7349 MOVOA X5, -16(AX)(R8*1) 7350 ADDQ $0x20, R8 7351 CMPQ BX, R8 7352 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7353 MOVOU X0, (AX) 7354 MOVOU X1, 16(AX) 7355 MOVOU X2, -32(AX)(BX*1) 7356 MOVOU X3, -16(AX)(BX*1) 7357 MOVQ DX, AX 7358 7359emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: 7360 MOVQ dst_base+0(FP), CX 7361 SUBQ CX, AX 7362 MOVQ AX, ret+48(FP) 7363 RET 7364 7365// func encodeBetterBlockAsm10B(dst []byte, src []byte) int 7366// Requires: SSE2 7367TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 7368 MOVQ dst_base+0(FP), AX 7369 MOVQ $0x000000a0, CX 7370 LEAQ 24(SP), DX 7371 PXOR X0, X0 7372 7373zero_loop_encodeBetterBlockAsm10B: 7374 MOVOU X0, (DX) 7375 MOVOU X0, 16(DX) 7376 MOVOU X0, 32(DX) 7377 MOVOU X0, 48(DX) 7378 MOVOU X0, 64(DX) 7379 MOVOU X0, 80(DX) 7380 MOVOU X0, 96(DX) 7381 MOVOU X0, 112(DX) 7382 ADDQ $0x80, DX 7383 DECQ CX 7384 JNZ zero_loop_encodeBetterBlockAsm10B 7385 MOVL $0x00000000, 12(SP) 7386 MOVQ src_len+32(FP), CX 7387 LEAQ -6(CX), DX 7388 LEAQ -8(CX), SI 7389 MOVL SI, 8(SP) 7390 SHRQ $0x05, CX 7391 SUBL CX, DX 7392 LEAQ (AX)(DX*1), DX 7393 MOVQ DX, (SP) 7394 MOVL $0x00000001, CX 7395 MOVL $0x00000000, 16(SP) 7396 MOVQ src_base+24(FP), DX 7397 7398search_loop_encodeBetterBlockAsm10B: 7399 MOVL CX, SI 7400 SUBL 12(SP), SI 7401 SHRL $0x05, SI 7402 LEAL 1(CX)(SI*1), SI 7403 CMPL SI, 8(SP) 7404 JGE emit_remainder_encodeBetterBlockAsm10B 7405 MOVQ (DX)(CX*1), DI 7406 MOVL SI, 20(SP) 7407 MOVQ $0x0000cf1bbcdcbf9b, R9 7408 MOVQ $0x9e3779b1, SI 7409 MOVQ DI, R10 7410 MOVQ DI, R11 7411 SHLQ $0x10, R10 7412 IMULQ R9, R10 7413 SHRQ $0x34, R10 7414 SHLQ $0x20, R11 7415 IMULQ SI, R11 7416 SHRQ $0x36, R11 7417 MOVL 24(SP)(R10*4), SI 7418 MOVL 16408(SP)(R11*4), R8 7419 MOVL CX, 24(SP)(R10*4) 7420 MOVL CX, 16408(SP)(R11*4) 7421 CMPL (DX)(SI*1), DI 7422 JEQ candidate_match_encodeBetterBlockAsm10B 7423 CMPL (DX)(R8*1), DI 7424 JEQ candidateS_match_encodeBetterBlockAsm10B 7425 MOVL 20(SP), CX 7426 JMP search_loop_encodeBetterBlockAsm10B 7427 7428candidateS_match_encodeBetterBlockAsm10B: 7429 SHRQ $0x08, DI 7430 MOVQ DI, R10 7431 SHLQ $0x10, R10 7432 IMULQ R9, R10 7433 SHRQ $0x34, R10 7434 MOVL 24(SP)(R10*4), SI 7435 INCL CX 7436 MOVL CX, 24(SP)(R10*4) 7437 CMPL (DX)(SI*1), DI 7438 JEQ candidate_match_encodeBetterBlockAsm10B 7439 DECL CX 7440 MOVL R8, SI 7441 7442candidate_match_encodeBetterBlockAsm10B: 7443 MOVL 12(SP), DI 7444 TESTL SI, SI 7445 JZ match_extend_back_end_encodeBetterBlockAsm10B 7446 7447match_extend_back_loop_encodeBetterBlockAsm10B: 7448 CMPL CX, DI 7449 JLE match_extend_back_end_encodeBetterBlockAsm10B 7450 MOVB -1(DX)(SI*1), BL 7451 MOVB -1(DX)(CX*1), R8 7452 CMPB BL, R8 7453 JNE match_extend_back_end_encodeBetterBlockAsm10B 7454 LEAL -1(CX), CX 7455 DECL SI 7456 JZ match_extend_back_end_encodeBetterBlockAsm10B 7457 JMP match_extend_back_loop_encodeBetterBlockAsm10B 7458 7459match_extend_back_end_encodeBetterBlockAsm10B: 7460 MOVL CX, DI 7461 SUBL 12(SP), DI 7462 LEAQ 3(AX)(DI*1), DI 7463 CMPQ DI, (SP) 7464 JL match_dst_size_check_encodeBetterBlockAsm10B 7465 MOVQ $0x00000000, ret+48(FP) 7466 RET 7467 7468match_dst_size_check_encodeBetterBlockAsm10B: 7469 MOVL CX, DI 7470 ADDL $0x04, CX 7471 ADDL $0x04, SI 7472 MOVQ src_len+32(FP), R8 7473 SUBL CX, R8 7474 LEAQ (DX)(CX*1), R9 7475 LEAQ (DX)(SI*1), R10 7476 7477 // matchLen 7478 XORL R12, R12 7479 CMPL R8, $0x08 7480 JL matchlen_single_match_nolit_encodeBetterBlockAsm10B 7481 7482matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: 7483 MOVQ (R9)(R12*1), R11 7484 XORQ (R10)(R12*1), R11 7485 TESTQ R11, R11 7486 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B 7487 BSFQ R11, R11 7488 SARQ $0x03, R11 7489 LEAL (R12)(R11*1), R12 7490 JMP match_nolit_end_encodeBetterBlockAsm10B 7491 7492matchlen_loop_match_nolit_encodeBetterBlockAsm10B: 7493 LEAL -8(R8), R8 7494 LEAL 8(R12), R12 7495 CMPL R8, $0x08 7496 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B 7497 7498matchlen_single_match_nolit_encodeBetterBlockAsm10B: 7499 TESTL R8, R8 7500 JZ match_nolit_end_encodeBetterBlockAsm10B 7501 7502matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B: 7503 MOVB (R9)(R12*1), R11 7504 CMPB (R10)(R12*1), R11 7505 JNE match_nolit_end_encodeBetterBlockAsm10B 7506 LEAL 1(R12), R12 7507 DECL R8 7508 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B 7509 7510match_nolit_end_encodeBetterBlockAsm10B: 7511 MOVL CX, R8 7512 SUBL SI, R8 7513 7514 // Check if repeat 7515 CMPL 16(SP), R8 7516 JEQ match_is_repeat_encodeBetterBlockAsm10B 7517 MOVL R8, 16(SP) 7518 MOVL 12(SP), SI 7519 CMPL SI, DI 7520 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B 7521 MOVL DI, R9 7522 MOVL DI, 12(SP) 7523 LEAQ (DX)(SI*1), R10 7524 SUBL SI, R9 7525 LEAL -1(R9), SI 7526 CMPL SI, $0x3c 7527 JLT one_byte_match_emit_encodeBetterBlockAsm10B 7528 CMPL SI, $0x00000100 7529 JLT two_bytes_match_emit_encodeBetterBlockAsm10B 7530 MOVB $0xf4, (AX) 7531 MOVW SI, 1(AX) 7532 ADDQ $0x03, AX 7533 JMP memmove_long_match_emit_encodeBetterBlockAsm10B 7534 7535two_bytes_match_emit_encodeBetterBlockAsm10B: 7536 MOVB $0xf0, (AX) 7537 MOVB SI, 1(AX) 7538 ADDQ $0x02, AX 7539 CMPL SI, $0x40 7540 JL memmove_match_emit_encodeBetterBlockAsm10B 7541 JMP memmove_long_match_emit_encodeBetterBlockAsm10B 7542 7543one_byte_match_emit_encodeBetterBlockAsm10B: 7544 SHLB $0x02, SI 7545 MOVB SI, (AX) 7546 ADDQ $0x01, AX 7547 7548memmove_match_emit_encodeBetterBlockAsm10B: 7549 LEAQ (AX)(R9*1), SI 7550 7551 // genMemMoveShort 7552 CMPQ R9, $0x04 7553 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 7554 CMPQ R9, $0x08 7555 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 7556 CMPQ R9, $0x10 7557 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 7558 CMPQ R9, $0x20 7559 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 7560 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 7561 7562emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4: 7563 MOVL (R10), R11 7564 MOVL R11, (AX) 7565 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B 7566 7567emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: 7568 MOVL (R10), R11 7569 MOVL -4(R10)(R9*1), R10 7570 MOVL R11, (AX) 7571 MOVL R10, -4(AX)(R9*1) 7572 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B 7573 7574emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: 7575 MOVQ (R10), R11 7576 MOVQ -8(R10)(R9*1), R10 7577 MOVQ R11, (AX) 7578 MOVQ R10, -8(AX)(R9*1) 7579 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B 7580 7581emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: 7582 MOVOU (R10), X0 7583 MOVOU -16(R10)(R9*1), X1 7584 MOVOU X0, (AX) 7585 MOVOU X1, -16(AX)(R9*1) 7586 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B 7587 7588emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: 7589 MOVOU (R10), X0 7590 MOVOU 16(R10), X1 7591 MOVOU -32(R10)(R9*1), X2 7592 MOVOU -16(R10)(R9*1), X3 7593 MOVOU X0, (AX) 7594 MOVOU X1, 16(AX) 7595 MOVOU X2, -32(AX)(R9*1) 7596 MOVOU X3, -16(AX)(R9*1) 7597 7598memmove_end_copy_match_emit_encodeBetterBlockAsm10B: 7599 MOVQ SI, AX 7600 JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B 7601 7602memmove_long_match_emit_encodeBetterBlockAsm10B: 7603 LEAQ (AX)(R9*1), SI 7604 7605 // genMemMoveLong 7606 MOVOU (R10), X0 7607 MOVOU 16(R10), X1 7608 MOVOU -32(R10)(R9*1), X2 7609 MOVOU -16(R10)(R9*1), X3 7610 MOVQ R9, R13 7611 SHRQ $0x05, R13 7612 MOVQ AX, R11 7613 ANDL $0x0000001f, R11 7614 MOVQ $0x00000040, R14 7615 SUBQ R11, R14 7616 DECQ R13 7617 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 7618 LEAQ -32(R10)(R14*1), R11 7619 LEAQ -32(AX)(R14*1), R15 7620 7621emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: 7622 MOVOU (R11), X4 7623 MOVOU 16(R11), X5 7624 MOVOA X4, (R15) 7625 MOVOA X5, 16(R15) 7626 ADDQ $0x20, R15 7627 ADDQ $0x20, R11 7628 ADDQ $0x20, R14 7629 DECQ R13 7630 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back 7631 7632emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: 7633 MOVOU -32(R10)(R14*1), X4 7634 MOVOU -16(R10)(R14*1), X5 7635 MOVOA X4, -32(AX)(R14*1) 7636 MOVOA X5, -16(AX)(R14*1) 7637 ADDQ $0x20, R14 7638 CMPQ R9, R14 7639 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 7640 MOVOU X0, (AX) 7641 MOVOU X1, 16(AX) 7642 MOVOU X2, -32(AX)(R9*1) 7643 MOVOU X3, -16(AX)(R9*1) 7644 MOVQ SI, AX 7645 7646emit_literal_done_match_emit_encodeBetterBlockAsm10B: 7647 ADDL R12, CX 7648 ADDL $0x04, R12 7649 MOVL CX, 12(SP) 7650 7651 // emitCopy 7652two_byte_offset_match_nolit_encodeBetterBlockAsm10B: 7653 CMPL R12, $0x40 7654 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B 7655 MOVB $0xee, (AX) 7656 MOVW R8, 1(AX) 7657 LEAL -60(R12), R12 7658 ADDQ $0x03, AX 7659 7660 // emitRepeat 7661 MOVL R12, SI 7662 LEAL -4(R12), R12 7663 CMPL SI, $0x08 7664 JLE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short 7665 CMPL SI, $0x0c 7666 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short 7667 CMPL R8, $0x00000800 7668 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short 7669 7670cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: 7671 CMPL R12, $0x00000104 7672 JLT repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short 7673 LEAL -256(R12), R12 7674 MOVW $0x0019, (AX) 7675 MOVW R12, 2(AX) 7676 ADDQ $0x04, AX 7677 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 7678 7679repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: 7680 LEAL -4(R12), R12 7681 MOVW $0x0015, (AX) 7682 MOVB R12, 2(AX) 7683 ADDQ $0x03, AX 7684 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 7685 7686repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: 7687 SHLL $0x02, R12 7688 ORL $0x01, R12 7689 MOVW R12, (AX) 7690 ADDQ $0x02, AX 7691 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 7692 7693repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: 7694 XORQ SI, SI 7695 LEAL 1(SI)(R12*4), R12 7696 MOVB R8, 1(AX) 7697 SARL $0x08, R8 7698 SHLL $0x05, R8 7699 ORL R8, R12 7700 MOVB R12, (AX) 7701 ADDQ $0x02, AX 7702 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 7703 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm10B 7704 7705two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: 7706 CMPL R12, $0x0c 7707 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B 7708 CMPL R8, $0x00000800 7709 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B 7710 MOVB $0x01, BL 7711 LEAL -16(BX)(R12*4), R12 7712 MOVB R8, 1(AX) 7713 SHRL $0x08, R8 7714 SHLL $0x05, R8 7715 ORL R8, R12 7716 MOVB R12, (AX) 7717 ADDQ $0x02, AX 7718 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 7719 7720emit_copy_three_match_nolit_encodeBetterBlockAsm10B: 7721 MOVB $0x02, BL 7722 LEAL -4(BX)(R12*4), R12 7723 MOVB R12, (AX) 7724 MOVW R8, 1(AX) 7725 ADDQ $0x03, AX 7726 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 7727 7728match_is_repeat_encodeBetterBlockAsm10B: 7729 MOVL 12(SP), SI 7730 CMPL SI, DI 7731 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B 7732 MOVL DI, R9 7733 MOVL DI, 12(SP) 7734 LEAQ (DX)(SI*1), R10 7735 SUBL SI, R9 7736 LEAL -1(R9), SI 7737 CMPL SI, $0x3c 7738 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm10B 7739 CMPL SI, $0x00000100 7740 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm10B 7741 MOVB $0xf4, (AX) 7742 MOVW SI, 1(AX) 7743 ADDQ $0x03, AX 7744 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B 7745 7746two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: 7747 MOVB $0xf0, (AX) 7748 MOVB SI, 1(AX) 7749 ADDQ $0x02, AX 7750 CMPL SI, $0x40 7751 JL memmove_match_emit_repeat_encodeBetterBlockAsm10B 7752 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B 7753 7754one_byte_match_emit_repeat_encodeBetterBlockAsm10B: 7755 SHLB $0x02, SI 7756 MOVB SI, (AX) 7757 ADDQ $0x01, AX 7758 7759memmove_match_emit_repeat_encodeBetterBlockAsm10B: 7760 LEAQ (AX)(R9*1), SI 7761 7762 // genMemMoveShort 7763 CMPQ R9, $0x04 7764 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 7765 CMPQ R9, $0x08 7766 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 7767 CMPQ R9, $0x10 7768 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 7769 CMPQ R9, $0x20 7770 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 7771 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 7772 7773emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4: 7774 MOVL (R10), R11 7775 MOVL R11, (AX) 7776 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B 7777 7778emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: 7779 MOVL (R10), R11 7780 MOVL -4(R10)(R9*1), R10 7781 MOVL R11, (AX) 7782 MOVL R10, -4(AX)(R9*1) 7783 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B 7784 7785emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: 7786 MOVQ (R10), R11 7787 MOVQ -8(R10)(R9*1), R10 7788 MOVQ R11, (AX) 7789 MOVQ R10, -8(AX)(R9*1) 7790 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B 7791 7792emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: 7793 MOVOU (R10), X0 7794 MOVOU -16(R10)(R9*1), X1 7795 MOVOU X0, (AX) 7796 MOVOU X1, -16(AX)(R9*1) 7797 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B 7798 7799emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: 7800 MOVOU (R10), X0 7801 MOVOU 16(R10), X1 7802 MOVOU -32(R10)(R9*1), X2 7803 MOVOU -16(R10)(R9*1), X3 7804 MOVOU X0, (AX) 7805 MOVOU X1, 16(AX) 7806 MOVOU X2, -32(AX)(R9*1) 7807 MOVOU X3, -16(AX)(R9*1) 7808 7809memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: 7810 MOVQ SI, AX 7811 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B 7812 7813memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: 7814 LEAQ (AX)(R9*1), SI 7815 7816 // genMemMoveLong 7817 MOVOU (R10), X0 7818 MOVOU 16(R10), X1 7819 MOVOU -32(R10)(R9*1), X2 7820 MOVOU -16(R10)(R9*1), X3 7821 MOVQ R9, R13 7822 SHRQ $0x05, R13 7823 MOVQ AX, R11 7824 ANDL $0x0000001f, R11 7825 MOVQ $0x00000040, R14 7826 SUBQ R11, R14 7827 DECQ R13 7828 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 7829 LEAQ -32(R10)(R14*1), R11 7830 LEAQ -32(AX)(R14*1), R15 7831 7832emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: 7833 MOVOU (R11), X4 7834 MOVOU 16(R11), X5 7835 MOVOA X4, (R15) 7836 MOVOA X5, 16(R15) 7837 ADDQ $0x20, R15 7838 ADDQ $0x20, R11 7839 ADDQ $0x20, R14 7840 DECQ R13 7841 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back 7842 7843emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: 7844 MOVOU -32(R10)(R14*1), X4 7845 MOVOU -16(R10)(R14*1), X5 7846 MOVOA X4, -32(AX)(R14*1) 7847 MOVOA X5, -16(AX)(R14*1) 7848 ADDQ $0x20, R14 7849 CMPQ R9, R14 7850 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 7851 MOVOU X0, (AX) 7852 MOVOU X1, 16(AX) 7853 MOVOU X2, -32(AX)(R9*1) 7854 MOVOU X3, -16(AX)(R9*1) 7855 MOVQ SI, AX 7856 7857emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: 7858 ADDL R12, CX 7859 ADDL $0x04, R12 7860 MOVL CX, 12(SP) 7861 7862 // emitRepeat 7863 MOVL R12, SI 7864 LEAL -4(R12), R12 7865 CMPL SI, $0x08 7866 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B 7867 CMPL SI, $0x0c 7868 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B 7869 CMPL R8, $0x00000800 7870 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B 7871 7872cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: 7873 CMPL R12, $0x00000104 7874 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B 7875 LEAL -256(R12), R12 7876 MOVW $0x0019, (AX) 7877 MOVW R12, 2(AX) 7878 ADDQ $0x04, AX 7879 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 7880 7881repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: 7882 LEAL -4(R12), R12 7883 MOVW $0x0015, (AX) 7884 MOVB R12, 2(AX) 7885 ADDQ $0x03, AX 7886 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 7887 7888repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: 7889 SHLL $0x02, R12 7890 ORL $0x01, R12 7891 MOVW R12, (AX) 7892 ADDQ $0x02, AX 7893 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 7894 7895repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: 7896 XORQ SI, SI 7897 LEAL 1(SI)(R12*4), R12 7898 MOVB R8, 1(AX) 7899 SARL $0x08, R8 7900 SHLL $0x05, R8 7901 ORL R8, R12 7902 MOVB R12, (AX) 7903 ADDQ $0x02, AX 7904 7905match_nolit_emitcopy_end_encodeBetterBlockAsm10B: 7906 CMPL CX, 8(SP) 7907 JGE emit_remainder_encodeBetterBlockAsm10B 7908 CMPQ AX, (SP) 7909 JL match_nolit_dst_ok_encodeBetterBlockAsm10B 7910 MOVQ $0x00000000, ret+48(FP) 7911 RET 7912 7913match_nolit_dst_ok_encodeBetterBlockAsm10B: 7914 MOVQ $0x0000cf1bbcdcbf9b, SI 7915 MOVQ $0x9e3779b1, R8 7916 INCL DI 7917 MOVQ (DX)(DI*1), R9 7918 MOVQ R9, R10 7919 MOVQ R9, R11 7920 MOVQ R9, R12 7921 SHRQ $0x08, R11 7922 MOVQ R11, R13 7923 SHRQ $0x10, R12 7924 LEAL 1(DI), R14 7925 LEAL 2(DI), R15 7926 MOVQ -2(DX)(CX*1), R9 7927 SHLQ $0x10, R10 7928 IMULQ SI, R10 7929 SHRQ $0x34, R10 7930 SHLQ $0x10, R13 7931 IMULQ SI, R13 7932 SHRQ $0x34, R13 7933 SHLQ $0x20, R11 7934 IMULQ R8, R11 7935 SHRQ $0x36, R11 7936 SHLQ $0x20, R12 7937 IMULQ R8, R12 7938 SHRQ $0x36, R12 7939 MOVL DI, 24(SP)(R10*4) 7940 MOVL R14, 24(SP)(R13*4) 7941 MOVL R14, 16408(SP)(R11*4) 7942 MOVL R15, 16408(SP)(R12*4) 7943 MOVQ R9, R10 7944 MOVQ R9, R11 7945 SHRQ $0x08, R11 7946 MOVQ R11, R13 7947 LEAL -2(CX), R9 7948 LEAL -1(CX), DI 7949 SHLQ $0x10, R10 7950 IMULQ SI, R10 7951 SHRQ $0x34, R10 7952 SHLQ $0x20, R11 7953 IMULQ R8, R11 7954 SHRQ $0x36, R11 7955 SHLQ $0x10, R13 7956 IMULQ SI, R13 7957 SHRQ $0x34, R13 7958 MOVL R9, 24(SP)(R10*4) 7959 MOVL DI, 16408(SP)(R11*4) 7960 MOVL DI, 24(SP)(R13*4) 7961 JMP search_loop_encodeBetterBlockAsm10B 7962 7963emit_remainder_encodeBetterBlockAsm10B: 7964 MOVQ src_len+32(FP), CX 7965 SUBL 12(SP), CX 7966 LEAQ 3(AX)(CX*1), CX 7967 CMPQ CX, (SP) 7968 JL emit_remainder_ok_encodeBetterBlockAsm10B 7969 MOVQ $0x00000000, ret+48(FP) 7970 RET 7971 7972emit_remainder_ok_encodeBetterBlockAsm10B: 7973 MOVQ src_len+32(FP), CX 7974 MOVL 12(SP), BX 7975 CMPL BX, CX 7976 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B 7977 MOVL CX, SI 7978 MOVL CX, 12(SP) 7979 LEAQ (DX)(BX*1), CX 7980 SUBL BX, SI 7981 LEAL -1(SI), DX 7982 CMPL DX, $0x3c 7983 JLT one_byte_emit_remainder_encodeBetterBlockAsm10B 7984 CMPL DX, $0x00000100 7985 JLT two_bytes_emit_remainder_encodeBetterBlockAsm10B 7986 MOVB $0xf4, (AX) 7987 MOVW DX, 1(AX) 7988 ADDQ $0x03, AX 7989 JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B 7990 7991two_bytes_emit_remainder_encodeBetterBlockAsm10B: 7992 MOVB $0xf0, (AX) 7993 MOVB DL, 1(AX) 7994 ADDQ $0x02, AX 7995 CMPL DX, $0x40 7996 JL memmove_emit_remainder_encodeBetterBlockAsm10B 7997 JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B 7998 7999one_byte_emit_remainder_encodeBetterBlockAsm10B: 8000 SHLB $0x02, DL 8001 MOVB DL, (AX) 8002 ADDQ $0x01, AX 8003 8004memmove_emit_remainder_encodeBetterBlockAsm10B: 8005 LEAQ (AX)(SI*1), DX 8006 MOVL SI, BX 8007 8008 // genMemMoveShort 8009 CMPQ BX, $0x04 8010 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4 8011 CMPQ BX, $0x08 8012 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7 8013 CMPQ BX, $0x10 8014 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16 8015 CMPQ BX, $0x20 8016 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32 8017 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 8018 8019emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4: 8020 MOVL (CX), SI 8021 MOVL SI, (AX) 8022 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B 8023 8024emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: 8025 MOVL (CX), SI 8026 MOVL -4(CX)(BX*1), CX 8027 MOVL SI, (AX) 8028 MOVL CX, -4(AX)(BX*1) 8029 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B 8030 8031emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: 8032 MOVQ (CX), SI 8033 MOVQ -8(CX)(BX*1), CX 8034 MOVQ SI, (AX) 8035 MOVQ CX, -8(AX)(BX*1) 8036 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B 8037 8038emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: 8039 MOVOU (CX), X0 8040 MOVOU -16(CX)(BX*1), X1 8041 MOVOU X0, (AX) 8042 MOVOU X1, -16(AX)(BX*1) 8043 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B 8044 8045emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: 8046 MOVOU (CX), X0 8047 MOVOU 16(CX), X1 8048 MOVOU -32(CX)(BX*1), X2 8049 MOVOU -16(CX)(BX*1), X3 8050 MOVOU X0, (AX) 8051 MOVOU X1, 16(AX) 8052 MOVOU X2, -32(AX)(BX*1) 8053 MOVOU X3, -16(AX)(BX*1) 8054 8055memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: 8056 MOVQ DX, AX 8057 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B 8058 8059memmove_long_emit_remainder_encodeBetterBlockAsm10B: 8060 LEAQ (AX)(SI*1), DX 8061 MOVL SI, BX 8062 8063 // genMemMoveLong 8064 MOVOU (CX), X0 8065 MOVOU 16(CX), X1 8066 MOVOU -32(CX)(BX*1), X2 8067 MOVOU -16(CX)(BX*1), X3 8068 MOVQ BX, DI 8069 SHRQ $0x05, DI 8070 MOVQ AX, SI 8071 ANDL $0x0000001f, SI 8072 MOVQ $0x00000040, R8 8073 SUBQ SI, R8 8074 DECQ DI 8075 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 8076 LEAQ -32(CX)(R8*1), SI 8077 LEAQ -32(AX)(R8*1), R9 8078 8079emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: 8080 MOVOU (SI), X4 8081 MOVOU 16(SI), X5 8082 MOVOA X4, (R9) 8083 MOVOA X5, 16(R9) 8084 ADDQ $0x20, R9 8085 ADDQ $0x20, SI 8086 ADDQ $0x20, R8 8087 DECQ DI 8088 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back 8089 8090emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: 8091 MOVOU -32(CX)(R8*1), X4 8092 MOVOU -16(CX)(R8*1), X5 8093 MOVOA X4, -32(AX)(R8*1) 8094 MOVOA X5, -16(AX)(R8*1) 8095 ADDQ $0x20, R8 8096 CMPQ BX, R8 8097 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 8098 MOVOU X0, (AX) 8099 MOVOU X1, 16(AX) 8100 MOVOU X2, -32(AX)(BX*1) 8101 MOVOU X3, -16(AX)(BX*1) 8102 MOVQ DX, AX 8103 8104emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: 8105 MOVQ dst_base+0(FP), CX 8106 SUBQ CX, AX 8107 MOVQ AX, ret+48(FP) 8108 RET 8109 8110// func encodeBetterBlockAsm8B(dst []byte, src []byte) int 8111// Requires: SSE2 8112TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 8113 MOVQ dst_base+0(FP), AX 8114 MOVQ $0x00000028, CX 8115 LEAQ 24(SP), DX 8116 PXOR X0, X0 8117 8118zero_loop_encodeBetterBlockAsm8B: 8119 MOVOU X0, (DX) 8120 MOVOU X0, 16(DX) 8121 MOVOU X0, 32(DX) 8122 MOVOU X0, 48(DX) 8123 MOVOU X0, 64(DX) 8124 MOVOU X0, 80(DX) 8125 MOVOU X0, 96(DX) 8126 MOVOU X0, 112(DX) 8127 ADDQ $0x80, DX 8128 DECQ CX 8129 JNZ zero_loop_encodeBetterBlockAsm8B 8130 MOVL $0x00000000, 12(SP) 8131 MOVQ src_len+32(FP), CX 8132 LEAQ -6(CX), DX 8133 LEAQ -8(CX), SI 8134 MOVL SI, 8(SP) 8135 SHRQ $0x05, CX 8136 SUBL CX, DX 8137 LEAQ (AX)(DX*1), DX 8138 MOVQ DX, (SP) 8139 MOVL $0x00000001, CX 8140 MOVL $0x00000000, 16(SP) 8141 MOVQ src_base+24(FP), DX 8142 8143search_loop_encodeBetterBlockAsm8B: 8144 MOVL CX, SI 8145 SUBL 12(SP), SI 8146 SHRL $0x04, SI 8147 LEAL 1(CX)(SI*1), SI 8148 CMPL SI, 8(SP) 8149 JGE emit_remainder_encodeBetterBlockAsm8B 8150 MOVQ (DX)(CX*1), DI 8151 MOVL SI, 20(SP) 8152 MOVQ $0x0000cf1bbcdcbf9b, R9 8153 MOVQ $0x9e3779b1, SI 8154 MOVQ DI, R10 8155 MOVQ DI, R11 8156 SHLQ $0x10, R10 8157 IMULQ R9, R10 8158 SHRQ $0x36, R10 8159 SHLQ $0x20, R11 8160 IMULQ SI, R11 8161 SHRQ $0x38, R11 8162 MOVL 24(SP)(R10*4), SI 8163 MOVL 4120(SP)(R11*4), R8 8164 MOVL CX, 24(SP)(R10*4) 8165 MOVL CX, 4120(SP)(R11*4) 8166 CMPL (DX)(SI*1), DI 8167 JEQ candidate_match_encodeBetterBlockAsm8B 8168 CMPL (DX)(R8*1), DI 8169 JEQ candidateS_match_encodeBetterBlockAsm8B 8170 MOVL 20(SP), CX 8171 JMP search_loop_encodeBetterBlockAsm8B 8172 8173candidateS_match_encodeBetterBlockAsm8B: 8174 SHRQ $0x08, DI 8175 MOVQ DI, R10 8176 SHLQ $0x10, R10 8177 IMULQ R9, R10 8178 SHRQ $0x36, R10 8179 MOVL 24(SP)(R10*4), SI 8180 INCL CX 8181 MOVL CX, 24(SP)(R10*4) 8182 CMPL (DX)(SI*1), DI 8183 JEQ candidate_match_encodeBetterBlockAsm8B 8184 DECL CX 8185 MOVL R8, SI 8186 8187candidate_match_encodeBetterBlockAsm8B: 8188 MOVL 12(SP), DI 8189 TESTL SI, SI 8190 JZ match_extend_back_end_encodeBetterBlockAsm8B 8191 8192match_extend_back_loop_encodeBetterBlockAsm8B: 8193 CMPL CX, DI 8194 JLE match_extend_back_end_encodeBetterBlockAsm8B 8195 MOVB -1(DX)(SI*1), BL 8196 MOVB -1(DX)(CX*1), R8 8197 CMPB BL, R8 8198 JNE match_extend_back_end_encodeBetterBlockAsm8B 8199 LEAL -1(CX), CX 8200 DECL SI 8201 JZ match_extend_back_end_encodeBetterBlockAsm8B 8202 JMP match_extend_back_loop_encodeBetterBlockAsm8B 8203 8204match_extend_back_end_encodeBetterBlockAsm8B: 8205 MOVL CX, DI 8206 SUBL 12(SP), DI 8207 LEAQ 3(AX)(DI*1), DI 8208 CMPQ DI, (SP) 8209 JL match_dst_size_check_encodeBetterBlockAsm8B 8210 MOVQ $0x00000000, ret+48(FP) 8211 RET 8212 8213match_dst_size_check_encodeBetterBlockAsm8B: 8214 MOVL CX, DI 8215 ADDL $0x04, CX 8216 ADDL $0x04, SI 8217 MOVQ src_len+32(FP), R8 8218 SUBL CX, R8 8219 LEAQ (DX)(CX*1), R9 8220 LEAQ (DX)(SI*1), R10 8221 8222 // matchLen 8223 XORL R12, R12 8224 CMPL R8, $0x08 8225 JL matchlen_single_match_nolit_encodeBetterBlockAsm8B 8226 8227matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: 8228 MOVQ (R9)(R12*1), R11 8229 XORQ (R10)(R12*1), R11 8230 TESTQ R11, R11 8231 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B 8232 BSFQ R11, R11 8233 SARQ $0x03, R11 8234 LEAL (R12)(R11*1), R12 8235 JMP match_nolit_end_encodeBetterBlockAsm8B 8236 8237matchlen_loop_match_nolit_encodeBetterBlockAsm8B: 8238 LEAL -8(R8), R8 8239 LEAL 8(R12), R12 8240 CMPL R8, $0x08 8241 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B 8242 8243matchlen_single_match_nolit_encodeBetterBlockAsm8B: 8244 TESTL R8, R8 8245 JZ match_nolit_end_encodeBetterBlockAsm8B 8246 8247matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B: 8248 MOVB (R9)(R12*1), R11 8249 CMPB (R10)(R12*1), R11 8250 JNE match_nolit_end_encodeBetterBlockAsm8B 8251 LEAL 1(R12), R12 8252 DECL R8 8253 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B 8254 8255match_nolit_end_encodeBetterBlockAsm8B: 8256 MOVL CX, R8 8257 SUBL SI, R8 8258 8259 // Check if repeat 8260 CMPL 16(SP), R8 8261 JEQ match_is_repeat_encodeBetterBlockAsm8B 8262 MOVL R8, 16(SP) 8263 MOVL 12(SP), SI 8264 CMPL SI, DI 8265 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B 8266 MOVL DI, R9 8267 MOVL DI, 12(SP) 8268 LEAQ (DX)(SI*1), R10 8269 SUBL SI, R9 8270 LEAL -1(R9), SI 8271 CMPL SI, $0x3c 8272 JLT one_byte_match_emit_encodeBetterBlockAsm8B 8273 CMPL SI, $0x00000100 8274 JLT two_bytes_match_emit_encodeBetterBlockAsm8B 8275 MOVB $0xf4, (AX) 8276 MOVW SI, 1(AX) 8277 ADDQ $0x03, AX 8278 JMP memmove_long_match_emit_encodeBetterBlockAsm8B 8279 8280two_bytes_match_emit_encodeBetterBlockAsm8B: 8281 MOVB $0xf0, (AX) 8282 MOVB SI, 1(AX) 8283 ADDQ $0x02, AX 8284 CMPL SI, $0x40 8285 JL memmove_match_emit_encodeBetterBlockAsm8B 8286 JMP memmove_long_match_emit_encodeBetterBlockAsm8B 8287 8288one_byte_match_emit_encodeBetterBlockAsm8B: 8289 SHLB $0x02, SI 8290 MOVB SI, (AX) 8291 ADDQ $0x01, AX 8292 8293memmove_match_emit_encodeBetterBlockAsm8B: 8294 LEAQ (AX)(R9*1), SI 8295 8296 // genMemMoveShort 8297 CMPQ R9, $0x04 8298 JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 8299 CMPQ R9, $0x08 8300 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 8301 CMPQ R9, $0x10 8302 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 8303 CMPQ R9, $0x20 8304 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 8305 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 8306 8307emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4: 8308 MOVL (R10), R11 8309 MOVL R11, (AX) 8310 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B 8311 8312emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: 8313 MOVL (R10), R11 8314 MOVL -4(R10)(R9*1), R10 8315 MOVL R11, (AX) 8316 MOVL R10, -4(AX)(R9*1) 8317 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B 8318 8319emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: 8320 MOVQ (R10), R11 8321 MOVQ -8(R10)(R9*1), R10 8322 MOVQ R11, (AX) 8323 MOVQ R10, -8(AX)(R9*1) 8324 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B 8325 8326emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: 8327 MOVOU (R10), X0 8328 MOVOU -16(R10)(R9*1), X1 8329 MOVOU X0, (AX) 8330 MOVOU X1, -16(AX)(R9*1) 8331 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B 8332 8333emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: 8334 MOVOU (R10), X0 8335 MOVOU 16(R10), X1 8336 MOVOU -32(R10)(R9*1), X2 8337 MOVOU -16(R10)(R9*1), X3 8338 MOVOU X0, (AX) 8339 MOVOU X1, 16(AX) 8340 MOVOU X2, -32(AX)(R9*1) 8341 MOVOU X3, -16(AX)(R9*1) 8342 8343memmove_end_copy_match_emit_encodeBetterBlockAsm8B: 8344 MOVQ SI, AX 8345 JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B 8346 8347memmove_long_match_emit_encodeBetterBlockAsm8B: 8348 LEAQ (AX)(R9*1), SI 8349 8350 // genMemMoveLong 8351 MOVOU (R10), X0 8352 MOVOU 16(R10), X1 8353 MOVOU -32(R10)(R9*1), X2 8354 MOVOU -16(R10)(R9*1), X3 8355 MOVQ R9, R13 8356 SHRQ $0x05, R13 8357 MOVQ AX, R11 8358 ANDL $0x0000001f, R11 8359 MOVQ $0x00000040, R14 8360 SUBQ R11, R14 8361 DECQ R13 8362 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8363 LEAQ -32(R10)(R14*1), R11 8364 LEAQ -32(AX)(R14*1), R15 8365 8366emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: 8367 MOVOU (R11), X4 8368 MOVOU 16(R11), X5 8369 MOVOA X4, (R15) 8370 MOVOA X5, 16(R15) 8371 ADDQ $0x20, R15 8372 ADDQ $0x20, R11 8373 ADDQ $0x20, R14 8374 DECQ R13 8375 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back 8376 8377emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: 8378 MOVOU -32(R10)(R14*1), X4 8379 MOVOU -16(R10)(R14*1), X5 8380 MOVOA X4, -32(AX)(R14*1) 8381 MOVOA X5, -16(AX)(R14*1) 8382 ADDQ $0x20, R14 8383 CMPQ R9, R14 8384 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8385 MOVOU X0, (AX) 8386 MOVOU X1, 16(AX) 8387 MOVOU X2, -32(AX)(R9*1) 8388 MOVOU X3, -16(AX)(R9*1) 8389 MOVQ SI, AX 8390 8391emit_literal_done_match_emit_encodeBetterBlockAsm8B: 8392 ADDL R12, CX 8393 ADDL $0x04, R12 8394 MOVL CX, 12(SP) 8395 8396 // emitCopy 8397two_byte_offset_match_nolit_encodeBetterBlockAsm8B: 8398 CMPL R12, $0x40 8399 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B 8400 MOVB $0xee, (AX) 8401 MOVW R8, 1(AX) 8402 LEAL -60(R12), R12 8403 ADDQ $0x03, AX 8404 8405 // emitRepeat 8406 MOVL R12, SI 8407 LEAL -4(R12), R12 8408 CMPL SI, $0x08 8409 JLE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short 8410 CMPL SI, $0x0c 8411 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short 8412 8413cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: 8414 CMPL R12, $0x00000104 8415 JLT repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short 8416 LEAL -256(R12), R12 8417 MOVW $0x0019, (AX) 8418 MOVW R12, 2(AX) 8419 ADDQ $0x04, AX 8420 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8421 8422repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: 8423 LEAL -4(R12), R12 8424 MOVW $0x0015, (AX) 8425 MOVB R12, 2(AX) 8426 ADDQ $0x03, AX 8427 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8428 8429repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: 8430 SHLL $0x02, R12 8431 ORL $0x01, R12 8432 MOVW R12, (AX) 8433 ADDQ $0x02, AX 8434 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8435 XORQ SI, SI 8436 LEAL 1(SI)(R12*4), R12 8437 MOVB R8, 1(AX) 8438 SARL $0x08, R8 8439 SHLL $0x05, R8 8440 ORL R8, R12 8441 MOVB R12, (AX) 8442 ADDQ $0x02, AX 8443 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8444 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm8B 8445 8446two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: 8447 CMPL R12, $0x0c 8448 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm8B 8449 MOVB $0x01, BL 8450 LEAL -16(BX)(R12*4), R12 8451 MOVB R8, 1(AX) 8452 SHRL $0x08, R8 8453 SHLL $0x05, R8 8454 ORL R8, R12 8455 MOVB R12, (AX) 8456 ADDQ $0x02, AX 8457 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8458 8459emit_copy_three_match_nolit_encodeBetterBlockAsm8B: 8460 MOVB $0x02, BL 8461 LEAL -4(BX)(R12*4), R12 8462 MOVB R12, (AX) 8463 MOVW R8, 1(AX) 8464 ADDQ $0x03, AX 8465 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8466 8467match_is_repeat_encodeBetterBlockAsm8B: 8468 MOVL 12(SP), SI 8469 CMPL SI, DI 8470 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B 8471 MOVL DI, R8 8472 MOVL DI, 12(SP) 8473 LEAQ (DX)(SI*1), R9 8474 SUBL SI, R8 8475 LEAL -1(R8), SI 8476 CMPL SI, $0x3c 8477 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm8B 8478 CMPL SI, $0x00000100 8479 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm8B 8480 MOVB $0xf4, (AX) 8481 MOVW SI, 1(AX) 8482 ADDQ $0x03, AX 8483 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B 8484 8485two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: 8486 MOVB $0xf0, (AX) 8487 MOVB SI, 1(AX) 8488 ADDQ $0x02, AX 8489 CMPL SI, $0x40 8490 JL memmove_match_emit_repeat_encodeBetterBlockAsm8B 8491 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B 8492 8493one_byte_match_emit_repeat_encodeBetterBlockAsm8B: 8494 SHLB $0x02, SI 8495 MOVB SI, (AX) 8496 ADDQ $0x01, AX 8497 8498memmove_match_emit_repeat_encodeBetterBlockAsm8B: 8499 LEAQ (AX)(R8*1), SI 8500 8501 // genMemMoveShort 8502 CMPQ R8, $0x04 8503 JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 8504 CMPQ R8, $0x08 8505 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 8506 CMPQ R8, $0x10 8507 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 8508 CMPQ R8, $0x20 8509 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 8510 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 8511 8512emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: 8513 MOVL (R9), R10 8514 MOVL R10, (AX) 8515 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B 8516 8517emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: 8518 MOVL (R9), R10 8519 MOVL -4(R9)(R8*1), R9 8520 MOVL R10, (AX) 8521 MOVL R9, -4(AX)(R8*1) 8522 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B 8523 8524emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: 8525 MOVQ (R9), R10 8526 MOVQ -8(R9)(R8*1), R9 8527 MOVQ R10, (AX) 8528 MOVQ R9, -8(AX)(R8*1) 8529 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B 8530 8531emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: 8532 MOVOU (R9), X0 8533 MOVOU -16(R9)(R8*1), X1 8534 MOVOU X0, (AX) 8535 MOVOU X1, -16(AX)(R8*1) 8536 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B 8537 8538emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: 8539 MOVOU (R9), X0 8540 MOVOU 16(R9), X1 8541 MOVOU -32(R9)(R8*1), X2 8542 MOVOU -16(R9)(R8*1), X3 8543 MOVOU X0, (AX) 8544 MOVOU X1, 16(AX) 8545 MOVOU X2, -32(AX)(R8*1) 8546 MOVOU X3, -16(AX)(R8*1) 8547 8548memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: 8549 MOVQ SI, AX 8550 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B 8551 8552memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: 8553 LEAQ (AX)(R8*1), SI 8554 8555 // genMemMoveLong 8556 MOVOU (R9), X0 8557 MOVOU 16(R9), X1 8558 MOVOU -32(R9)(R8*1), X2 8559 MOVOU -16(R9)(R8*1), X3 8560 MOVQ R8, R11 8561 SHRQ $0x05, R11 8562 MOVQ AX, R10 8563 ANDL $0x0000001f, R10 8564 MOVQ $0x00000040, R13 8565 SUBQ R10, R13 8566 DECQ R11 8567 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8568 LEAQ -32(R9)(R13*1), R10 8569 LEAQ -32(AX)(R13*1), R14 8570 8571emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: 8572 MOVOU (R10), X4 8573 MOVOU 16(R10), X5 8574 MOVOA X4, (R14) 8575 MOVOA X5, 16(R14) 8576 ADDQ $0x20, R14 8577 ADDQ $0x20, R10 8578 ADDQ $0x20, R13 8579 DECQ R11 8580 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back 8581 8582emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: 8583 MOVOU -32(R9)(R13*1), X4 8584 MOVOU -16(R9)(R13*1), X5 8585 MOVOA X4, -32(AX)(R13*1) 8586 MOVOA X5, -16(AX)(R13*1) 8587 ADDQ $0x20, R13 8588 CMPQ R8, R13 8589 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8590 MOVOU X0, (AX) 8591 MOVOU X1, 16(AX) 8592 MOVOU X2, -32(AX)(R8*1) 8593 MOVOU X3, -16(AX)(R8*1) 8594 MOVQ SI, AX 8595 8596emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: 8597 ADDL R12, CX 8598 ADDL $0x04, R12 8599 MOVL CX, 12(SP) 8600 8601 // emitRepeat 8602 MOVL R12, SI 8603 LEAL -4(R12), R12 8604 CMPL SI, $0x08 8605 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B 8606 CMPL SI, $0x0c 8607 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B 8608 8609cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: 8610 CMPL R12, $0x00000104 8611 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B 8612 LEAL -256(R12), R12 8613 MOVW $0x0019, (AX) 8614 MOVW R12, 2(AX) 8615 ADDQ $0x04, AX 8616 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8617 8618repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: 8619 LEAL -4(R12), R12 8620 MOVW $0x0015, (AX) 8621 MOVB R12, 2(AX) 8622 ADDQ $0x03, AX 8623 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8624 8625repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: 8626 SHLL $0x02, R12 8627 ORL $0x01, R12 8628 MOVW R12, (AX) 8629 ADDQ $0x02, AX 8630 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8631 XORQ SI, SI 8632 LEAL 1(SI)(R12*4), R12 8633 MOVB R8, 1(AX) 8634 SARL $0x08, R8 8635 SHLL $0x05, R8 8636 ORL R8, R12 8637 MOVB R12, (AX) 8638 ADDQ $0x02, AX 8639 8640match_nolit_emitcopy_end_encodeBetterBlockAsm8B: 8641 CMPL CX, 8(SP) 8642 JGE emit_remainder_encodeBetterBlockAsm8B 8643 CMPQ AX, (SP) 8644 JL match_nolit_dst_ok_encodeBetterBlockAsm8B 8645 MOVQ $0x00000000, ret+48(FP) 8646 RET 8647 8648match_nolit_dst_ok_encodeBetterBlockAsm8B: 8649 MOVQ $0x0000cf1bbcdcbf9b, SI 8650 MOVQ $0x9e3779b1, R8 8651 INCL DI 8652 MOVQ (DX)(DI*1), R9 8653 MOVQ R9, R10 8654 MOVQ R9, R11 8655 MOVQ R9, R12 8656 SHRQ $0x08, R11 8657 MOVQ R11, R13 8658 SHRQ $0x10, R12 8659 LEAL 1(DI), R14 8660 LEAL 2(DI), R15 8661 MOVQ -2(DX)(CX*1), R9 8662 SHLQ $0x10, R10 8663 IMULQ SI, R10 8664 SHRQ $0x36, R10 8665 SHLQ $0x10, R13 8666 IMULQ SI, R13 8667 SHRQ $0x36, R13 8668 SHLQ $0x20, R11 8669 IMULQ R8, R11 8670 SHRQ $0x38, R11 8671 SHLQ $0x20, R12 8672 IMULQ R8, R12 8673 SHRQ $0x38, R12 8674 MOVL DI, 24(SP)(R10*4) 8675 MOVL R14, 24(SP)(R13*4) 8676 MOVL R14, 4120(SP)(R11*4) 8677 MOVL R15, 4120(SP)(R12*4) 8678 MOVQ R9, R10 8679 MOVQ R9, R11 8680 SHRQ $0x08, R11 8681 MOVQ R11, R13 8682 LEAL -2(CX), R9 8683 LEAL -1(CX), DI 8684 SHLQ $0x10, R10 8685 IMULQ SI, R10 8686 SHRQ $0x36, R10 8687 SHLQ $0x20, R11 8688 IMULQ R8, R11 8689 SHRQ $0x38, R11 8690 SHLQ $0x10, R13 8691 IMULQ SI, R13 8692 SHRQ $0x36, R13 8693 MOVL R9, 24(SP)(R10*4) 8694 MOVL DI, 4120(SP)(R11*4) 8695 MOVL DI, 24(SP)(R13*4) 8696 JMP search_loop_encodeBetterBlockAsm8B 8697 8698emit_remainder_encodeBetterBlockAsm8B: 8699 MOVQ src_len+32(FP), CX 8700 SUBL 12(SP), CX 8701 LEAQ 3(AX)(CX*1), CX 8702 CMPQ CX, (SP) 8703 JL emit_remainder_ok_encodeBetterBlockAsm8B 8704 MOVQ $0x00000000, ret+48(FP) 8705 RET 8706 8707emit_remainder_ok_encodeBetterBlockAsm8B: 8708 MOVQ src_len+32(FP), CX 8709 MOVL 12(SP), BX 8710 CMPL BX, CX 8711 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B 8712 MOVL CX, SI 8713 MOVL CX, 12(SP) 8714 LEAQ (DX)(BX*1), CX 8715 SUBL BX, SI 8716 LEAL -1(SI), DX 8717 CMPL DX, $0x3c 8718 JLT one_byte_emit_remainder_encodeBetterBlockAsm8B 8719 CMPL DX, $0x00000100 8720 JLT two_bytes_emit_remainder_encodeBetterBlockAsm8B 8721 MOVB $0xf4, (AX) 8722 MOVW DX, 1(AX) 8723 ADDQ $0x03, AX 8724 JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B 8725 8726two_bytes_emit_remainder_encodeBetterBlockAsm8B: 8727 MOVB $0xf0, (AX) 8728 MOVB DL, 1(AX) 8729 ADDQ $0x02, AX 8730 CMPL DX, $0x40 8731 JL memmove_emit_remainder_encodeBetterBlockAsm8B 8732 JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B 8733 8734one_byte_emit_remainder_encodeBetterBlockAsm8B: 8735 SHLB $0x02, DL 8736 MOVB DL, (AX) 8737 ADDQ $0x01, AX 8738 8739memmove_emit_remainder_encodeBetterBlockAsm8B: 8740 LEAQ (AX)(SI*1), DX 8741 MOVL SI, BX 8742 8743 // genMemMoveShort 8744 CMPQ BX, $0x04 8745 JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4 8746 CMPQ BX, $0x08 8747 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7 8748 CMPQ BX, $0x10 8749 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16 8750 CMPQ BX, $0x20 8751 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32 8752 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 8753 8754emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4: 8755 MOVL (CX), SI 8756 MOVL SI, (AX) 8757 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B 8758 8759emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: 8760 MOVL (CX), SI 8761 MOVL -4(CX)(BX*1), CX 8762 MOVL SI, (AX) 8763 MOVL CX, -4(AX)(BX*1) 8764 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B 8765 8766emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: 8767 MOVQ (CX), SI 8768 MOVQ -8(CX)(BX*1), CX 8769 MOVQ SI, (AX) 8770 MOVQ CX, -8(AX)(BX*1) 8771 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B 8772 8773emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: 8774 MOVOU (CX), X0 8775 MOVOU -16(CX)(BX*1), X1 8776 MOVOU X0, (AX) 8777 MOVOU X1, -16(AX)(BX*1) 8778 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B 8779 8780emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: 8781 MOVOU (CX), X0 8782 MOVOU 16(CX), X1 8783 MOVOU -32(CX)(BX*1), X2 8784 MOVOU -16(CX)(BX*1), X3 8785 MOVOU X0, (AX) 8786 MOVOU X1, 16(AX) 8787 MOVOU X2, -32(AX)(BX*1) 8788 MOVOU X3, -16(AX)(BX*1) 8789 8790memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: 8791 MOVQ DX, AX 8792 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B 8793 8794memmove_long_emit_remainder_encodeBetterBlockAsm8B: 8795 LEAQ (AX)(SI*1), DX 8796 MOVL SI, BX 8797 8798 // genMemMoveLong 8799 MOVOU (CX), X0 8800 MOVOU 16(CX), X1 8801 MOVOU -32(CX)(BX*1), X2 8802 MOVOU -16(CX)(BX*1), X3 8803 MOVQ BX, DI 8804 SHRQ $0x05, DI 8805 MOVQ AX, SI 8806 ANDL $0x0000001f, SI 8807 MOVQ $0x00000040, R8 8808 SUBQ SI, R8 8809 DECQ DI 8810 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8811 LEAQ -32(CX)(R8*1), SI 8812 LEAQ -32(AX)(R8*1), R9 8813 8814emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: 8815 MOVOU (SI), X4 8816 MOVOU 16(SI), X5 8817 MOVOA X4, (R9) 8818 MOVOA X5, 16(R9) 8819 ADDQ $0x20, R9 8820 ADDQ $0x20, SI 8821 ADDQ $0x20, R8 8822 DECQ DI 8823 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back 8824 8825emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: 8826 MOVOU -32(CX)(R8*1), X4 8827 MOVOU -16(CX)(R8*1), X5 8828 MOVOA X4, -32(AX)(R8*1) 8829 MOVOA X5, -16(AX)(R8*1) 8830 ADDQ $0x20, R8 8831 CMPQ BX, R8 8832 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8833 MOVOU X0, (AX) 8834 MOVOU X1, 16(AX) 8835 MOVOU X2, -32(AX)(BX*1) 8836 MOVOU X3, -16(AX)(BX*1) 8837 MOVQ DX, AX 8838 8839emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: 8840 MOVQ dst_base+0(FP), CX 8841 SUBQ CX, AX 8842 MOVQ AX, ret+48(FP) 8843 RET 8844 8845// func encodeSnappyBlockAsm(dst []byte, src []byte) int 8846// Requires: SSE2 8847TEXT ·encodeSnappyBlockAsm(SB), $65560-56 8848 MOVQ dst_base+0(FP), AX 8849 MOVQ $0x00000200, CX 8850 LEAQ 24(SP), DX 8851 PXOR X0, X0 8852 8853zero_loop_encodeSnappyBlockAsm: 8854 MOVOU X0, (DX) 8855 MOVOU X0, 16(DX) 8856 MOVOU X0, 32(DX) 8857 MOVOU X0, 48(DX) 8858 MOVOU X0, 64(DX) 8859 MOVOU X0, 80(DX) 8860 MOVOU X0, 96(DX) 8861 MOVOU X0, 112(DX) 8862 ADDQ $0x80, DX 8863 DECQ CX 8864 JNZ zero_loop_encodeSnappyBlockAsm 8865 MOVL $0x00000000, 12(SP) 8866 MOVQ src_len+32(FP), CX 8867 LEAQ -9(CX), DX 8868 LEAQ -8(CX), SI 8869 MOVL SI, 8(SP) 8870 SHRQ $0x05, CX 8871 SUBL CX, DX 8872 LEAQ (AX)(DX*1), DX 8873 MOVQ DX, (SP) 8874 MOVL $0x00000001, CX 8875 MOVL CX, 16(SP) 8876 MOVQ src_base+24(FP), DX 8877 8878search_loop_encodeSnappyBlockAsm: 8879 MOVL CX, SI 8880 SUBL 12(SP), SI 8881 SHRL $0x06, SI 8882 LEAL 4(CX)(SI*1), SI 8883 CMPL SI, 8(SP) 8884 JGE emit_remainder_encodeSnappyBlockAsm 8885 MOVQ (DX)(CX*1), DI 8886 MOVL SI, 20(SP) 8887 MOVQ $0x0000cf1bbcdcbf9b, R9 8888 MOVQ DI, R10 8889 MOVQ DI, R11 8890 SHRQ $0x08, R11 8891 SHLQ $0x10, R10 8892 IMULQ R9, R10 8893 SHRQ $0x32, R10 8894 SHLQ $0x10, R11 8895 IMULQ R9, R11 8896 SHRQ $0x32, R11 8897 MOVL 24(SP)(R10*4), SI 8898 MOVL 24(SP)(R11*4), R8 8899 MOVL CX, 24(SP)(R10*4) 8900 LEAL 1(CX), R10 8901 MOVL R10, 24(SP)(R11*4) 8902 MOVQ DI, R10 8903 SHRQ $0x10, R10 8904 SHLQ $0x10, R10 8905 IMULQ R9, R10 8906 SHRQ $0x32, R10 8907 MOVL CX, R9 8908 SUBL 16(SP), R9 8909 MOVL 1(DX)(R9*1), R11 8910 MOVQ DI, R9 8911 SHRQ $0x08, R9 8912 CMPL R9, R11 8913 JNE no_repeat_found_encodeSnappyBlockAsm 8914 LEAL 1(CX), DI 8915 MOVL 12(SP), SI 8916 MOVL DI, R8 8917 SUBL 16(SP), R8 8918 JZ repeat_extend_back_end_encodeSnappyBlockAsm 8919 8920repeat_extend_back_loop_encodeSnappyBlockAsm: 8921 CMPL DI, SI 8922 JLE repeat_extend_back_end_encodeSnappyBlockAsm 8923 MOVB -1(DX)(R8*1), BL 8924 MOVB -1(DX)(DI*1), R9 8925 CMPB BL, R9 8926 JNE repeat_extend_back_end_encodeSnappyBlockAsm 8927 LEAL -1(DI), DI 8928 DECL R8 8929 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm 8930 8931repeat_extend_back_end_encodeSnappyBlockAsm: 8932 MOVL 12(SP), SI 8933 CMPL SI, DI 8934 JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm 8935 MOVL DI, R8 8936 MOVL DI, 12(SP) 8937 LEAQ (DX)(SI*1), R9 8938 SUBL SI, R8 8939 LEAL -1(R8), SI 8940 CMPL SI, $0x3c 8941 JLT one_byte_repeat_emit_encodeSnappyBlockAsm 8942 CMPL SI, $0x00000100 8943 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm 8944 CMPL SI, $0x00010000 8945 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm 8946 CMPL SI, $0x01000000 8947 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm 8948 MOVB $0xfc, (AX) 8949 MOVL SI, 1(AX) 8950 ADDQ $0x05, AX 8951 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm 8952 8953four_bytes_repeat_emit_encodeSnappyBlockAsm: 8954 MOVL SI, R10 8955 SHRL $0x10, R10 8956 MOVB $0xf8, (AX) 8957 MOVW SI, 1(AX) 8958 MOVB R10, 3(AX) 8959 ADDQ $0x04, AX 8960 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm 8961 8962three_bytes_repeat_emit_encodeSnappyBlockAsm: 8963 MOVB $0xf4, (AX) 8964 MOVW SI, 1(AX) 8965 ADDQ $0x03, AX 8966 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm 8967 8968two_bytes_repeat_emit_encodeSnappyBlockAsm: 8969 MOVB $0xf0, (AX) 8970 MOVB SI, 1(AX) 8971 ADDQ $0x02, AX 8972 CMPL SI, $0x40 8973 JL memmove_repeat_emit_encodeSnappyBlockAsm 8974 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm 8975 8976one_byte_repeat_emit_encodeSnappyBlockAsm: 8977 SHLB $0x02, SI 8978 MOVB SI, (AX) 8979 ADDQ $0x01, AX 8980 8981memmove_repeat_emit_encodeSnappyBlockAsm: 8982 LEAQ (AX)(R8*1), SI 8983 8984 // genMemMoveShort 8985 CMPQ R8, $0x08 8986 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 8987 CMPQ R8, $0x10 8988 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 8989 CMPQ R8, $0x20 8990 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 8991 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 8992 8993emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: 8994 MOVQ (R9), R10 8995 MOVQ R10, (AX) 8996 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm 8997 8998emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: 8999 MOVQ (R9), R10 9000 MOVQ -8(R9)(R8*1), R9 9001 MOVQ R10, (AX) 9002 MOVQ R9, -8(AX)(R8*1) 9003 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm 9004 9005emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: 9006 MOVOU (R9), X0 9007 MOVOU -16(R9)(R8*1), X1 9008 MOVOU X0, (AX) 9009 MOVOU X1, -16(AX)(R8*1) 9010 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm 9011 9012emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: 9013 MOVOU (R9), X0 9014 MOVOU 16(R9), X1 9015 MOVOU -32(R9)(R8*1), X2 9016 MOVOU -16(R9)(R8*1), X3 9017 MOVOU X0, (AX) 9018 MOVOU X1, 16(AX) 9019 MOVOU X2, -32(AX)(R8*1) 9020 MOVOU X3, -16(AX)(R8*1) 9021 9022memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: 9023 MOVQ SI, AX 9024 JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm 9025 9026memmove_long_repeat_emit_encodeSnappyBlockAsm: 9027 LEAQ (AX)(R8*1), SI 9028 9029 // genMemMoveLong 9030 MOVOU (R9), X0 9031 MOVOU 16(R9), X1 9032 MOVOU -32(R9)(R8*1), X2 9033 MOVOU -16(R9)(R8*1), X3 9034 MOVQ R8, R11 9035 SHRQ $0x05, R11 9036 MOVQ AX, R10 9037 ANDL $0x0000001f, R10 9038 MOVQ $0x00000040, R12 9039 SUBQ R10, R12 9040 DECQ R11 9041 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9042 LEAQ -32(R9)(R12*1), R10 9043 LEAQ -32(AX)(R12*1), R13 9044 9045emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: 9046 MOVOU (R10), X4 9047 MOVOU 16(R10), X5 9048 MOVOA X4, (R13) 9049 MOVOA X5, 16(R13) 9050 ADDQ $0x20, R13 9051 ADDQ $0x20, R10 9052 ADDQ $0x20, R12 9053 DECQ R11 9054 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back 9055 9056emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: 9057 MOVOU -32(R9)(R12*1), X4 9058 MOVOU -16(R9)(R12*1), X5 9059 MOVOA X4, -32(AX)(R12*1) 9060 MOVOA X5, -16(AX)(R12*1) 9061 ADDQ $0x20, R12 9062 CMPQ R8, R12 9063 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9064 MOVOU X0, (AX) 9065 MOVOU X1, 16(AX) 9066 MOVOU X2, -32(AX)(R8*1) 9067 MOVOU X3, -16(AX)(R8*1) 9068 MOVQ SI, AX 9069 9070emit_literal_done_repeat_emit_encodeSnappyBlockAsm: 9071 ADDL $0x05, CX 9072 MOVL CX, SI 9073 SUBL 16(SP), SI 9074 MOVQ src_len+32(FP), R8 9075 SUBL CX, R8 9076 LEAQ (DX)(CX*1), R9 9077 LEAQ (DX)(SI*1), SI 9078 9079 // matchLen 9080 XORL R11, R11 9081 CMPL R8, $0x08 9082 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm 9083 9084matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: 9085 MOVQ (R9)(R11*1), R10 9086 XORQ (SI)(R11*1), R10 9087 TESTQ R10, R10 9088 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm 9089 BSFQ R10, R10 9090 SARQ $0x03, R10 9091 LEAL (R11)(R10*1), R11 9092 JMP repeat_extend_forward_end_encodeSnappyBlockAsm 9093 9094matchlen_loop_repeat_extend_encodeSnappyBlockAsm: 9095 LEAL -8(R8), R8 9096 LEAL 8(R11), R11 9097 CMPL R8, $0x08 9098 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm 9099 9100matchlen_single_repeat_extend_encodeSnappyBlockAsm: 9101 TESTL R8, R8 9102 JZ repeat_extend_forward_end_encodeSnappyBlockAsm 9103 9104matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm: 9105 MOVB (R9)(R11*1), R10 9106 CMPB (SI)(R11*1), R10 9107 JNE repeat_extend_forward_end_encodeSnappyBlockAsm 9108 LEAL 1(R11), R11 9109 DECL R8 9110 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm 9111 9112repeat_extend_forward_end_encodeSnappyBlockAsm: 9113 ADDL R11, CX 9114 MOVL CX, SI 9115 SUBL DI, SI 9116 MOVL 16(SP), DI 9117 9118 // emitCopy 9119 CMPL DI, $0x00010000 9120 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm 9121 9122four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: 9123 CMPL SI, $0x40 9124 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm 9125 MOVB $0xff, (AX) 9126 MOVL DI, 1(AX) 9127 LEAL -64(SI), SI 9128 ADDQ $0x05, AX 9129 CMPL SI, $0x04 9130 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm 9131 JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm 9132 9133four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: 9134 TESTL SI, SI 9135 JZ repeat_end_emit_encodeSnappyBlockAsm 9136 MOVB $0x03, BL 9137 LEAL -4(BX)(SI*4), SI 9138 MOVB SI, (AX) 9139 MOVL DI, 1(AX) 9140 ADDQ $0x05, AX 9141 JMP repeat_end_emit_encodeSnappyBlockAsm 9142 9143two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: 9144 CMPL SI, $0x40 9145 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm 9146 MOVB $0xee, (AX) 9147 MOVW DI, 1(AX) 9148 LEAL -60(SI), SI 9149 ADDQ $0x03, AX 9150 JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm 9151 9152two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: 9153 CMPL SI, $0x0c 9154 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm 9155 CMPL DI, $0x00000800 9156 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm 9157 MOVB $0x01, BL 9158 LEAL -16(BX)(SI*4), SI 9159 MOVB DI, 1(AX) 9160 SHRL $0x08, DI 9161 SHLL $0x05, DI 9162 ORL DI, SI 9163 MOVB SI, (AX) 9164 ADDQ $0x02, AX 9165 JMP repeat_end_emit_encodeSnappyBlockAsm 9166 9167emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: 9168 MOVB $0x02, BL 9169 LEAL -4(BX)(SI*4), SI 9170 MOVB SI, (AX) 9171 MOVW DI, 1(AX) 9172 ADDQ $0x03, AX 9173 9174repeat_end_emit_encodeSnappyBlockAsm: 9175 MOVL CX, 12(SP) 9176 JMP search_loop_encodeSnappyBlockAsm 9177 9178no_repeat_found_encodeSnappyBlockAsm: 9179 CMPL (DX)(SI*1), DI 9180 JEQ candidate_match_encodeSnappyBlockAsm 9181 SHRQ $0x08, DI 9182 MOVL 24(SP)(R10*4), SI 9183 LEAL 2(CX), R9 9184 CMPL (DX)(R8*1), DI 9185 JEQ candidate2_match_encodeSnappyBlockAsm 9186 MOVL R9, 24(SP)(R10*4) 9187 SHRQ $0x08, DI 9188 CMPL (DX)(SI*1), DI 9189 JEQ candidate3_match_encodeSnappyBlockAsm 9190 MOVL 20(SP), CX 9191 JMP search_loop_encodeSnappyBlockAsm 9192 9193candidate3_match_encodeSnappyBlockAsm: 9194 ADDL $0x02, CX 9195 JMP candidate_match_encodeSnappyBlockAsm 9196 9197candidate2_match_encodeSnappyBlockAsm: 9198 MOVL R9, 24(SP)(R10*4) 9199 INCL CX 9200 MOVL R8, SI 9201 9202candidate_match_encodeSnappyBlockAsm: 9203 MOVL 12(SP), DI 9204 TESTL SI, SI 9205 JZ match_extend_back_end_encodeSnappyBlockAsm 9206 9207match_extend_back_loop_encodeSnappyBlockAsm: 9208 CMPL CX, DI 9209 JLE match_extend_back_end_encodeSnappyBlockAsm 9210 MOVB -1(DX)(SI*1), BL 9211 MOVB -1(DX)(CX*1), R8 9212 CMPB BL, R8 9213 JNE match_extend_back_end_encodeSnappyBlockAsm 9214 LEAL -1(CX), CX 9215 DECL SI 9216 JZ match_extend_back_end_encodeSnappyBlockAsm 9217 JMP match_extend_back_loop_encodeSnappyBlockAsm 9218 9219match_extend_back_end_encodeSnappyBlockAsm: 9220 MOVL CX, DI 9221 SUBL 12(SP), DI 9222 LEAQ 5(AX)(DI*1), DI 9223 CMPQ DI, (SP) 9224 JL match_dst_size_check_encodeSnappyBlockAsm 9225 MOVQ $0x00000000, ret+48(FP) 9226 RET 9227 9228match_dst_size_check_encodeSnappyBlockAsm: 9229 MOVL CX, DI 9230 MOVL 12(SP), R8 9231 CMPL R8, DI 9232 JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm 9233 MOVL DI, R9 9234 MOVL DI, 12(SP) 9235 LEAQ (DX)(R8*1), DI 9236 SUBL R8, R9 9237 LEAL -1(R9), R8 9238 CMPL R8, $0x3c 9239 JLT one_byte_match_emit_encodeSnappyBlockAsm 9240 CMPL R8, $0x00000100 9241 JLT two_bytes_match_emit_encodeSnappyBlockAsm 9242 CMPL R8, $0x00010000 9243 JLT three_bytes_match_emit_encodeSnappyBlockAsm 9244 CMPL R8, $0x01000000 9245 JLT four_bytes_match_emit_encodeSnappyBlockAsm 9246 MOVB $0xfc, (AX) 9247 MOVL R8, 1(AX) 9248 ADDQ $0x05, AX 9249 JMP memmove_long_match_emit_encodeSnappyBlockAsm 9250 9251four_bytes_match_emit_encodeSnappyBlockAsm: 9252 MOVL R8, R10 9253 SHRL $0x10, R10 9254 MOVB $0xf8, (AX) 9255 MOVW R8, 1(AX) 9256 MOVB R10, 3(AX) 9257 ADDQ $0x04, AX 9258 JMP memmove_long_match_emit_encodeSnappyBlockAsm 9259 9260three_bytes_match_emit_encodeSnappyBlockAsm: 9261 MOVB $0xf4, (AX) 9262 MOVW R8, 1(AX) 9263 ADDQ $0x03, AX 9264 JMP memmove_long_match_emit_encodeSnappyBlockAsm 9265 9266two_bytes_match_emit_encodeSnappyBlockAsm: 9267 MOVB $0xf0, (AX) 9268 MOVB R8, 1(AX) 9269 ADDQ $0x02, AX 9270 CMPL R8, $0x40 9271 JL memmove_match_emit_encodeSnappyBlockAsm 9272 JMP memmove_long_match_emit_encodeSnappyBlockAsm 9273 9274one_byte_match_emit_encodeSnappyBlockAsm: 9275 SHLB $0x02, R8 9276 MOVB R8, (AX) 9277 ADDQ $0x01, AX 9278 9279memmove_match_emit_encodeSnappyBlockAsm: 9280 LEAQ (AX)(R9*1), R8 9281 9282 // genMemMoveShort 9283 CMPQ R9, $0x08 9284 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 9285 CMPQ R9, $0x10 9286 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 9287 CMPQ R9, $0x20 9288 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 9289 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 9290 9291emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: 9292 MOVQ (DI), R10 9293 MOVQ R10, (AX) 9294 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm 9295 9296emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: 9297 MOVQ (DI), R10 9298 MOVQ -8(DI)(R9*1), DI 9299 MOVQ R10, (AX) 9300 MOVQ DI, -8(AX)(R9*1) 9301 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm 9302 9303emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: 9304 MOVOU (DI), X0 9305 MOVOU -16(DI)(R9*1), X1 9306 MOVOU X0, (AX) 9307 MOVOU X1, -16(AX)(R9*1) 9308 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm 9309 9310emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: 9311 MOVOU (DI), X0 9312 MOVOU 16(DI), X1 9313 MOVOU -32(DI)(R9*1), X2 9314 MOVOU -16(DI)(R9*1), X3 9315 MOVOU X0, (AX) 9316 MOVOU X1, 16(AX) 9317 MOVOU X2, -32(AX)(R9*1) 9318 MOVOU X3, -16(AX)(R9*1) 9319 9320memmove_end_copy_match_emit_encodeSnappyBlockAsm: 9321 MOVQ R8, AX 9322 JMP emit_literal_done_match_emit_encodeSnappyBlockAsm 9323 9324memmove_long_match_emit_encodeSnappyBlockAsm: 9325 LEAQ (AX)(R9*1), R8 9326 9327 // genMemMoveLong 9328 MOVOU (DI), X0 9329 MOVOU 16(DI), X1 9330 MOVOU -32(DI)(R9*1), X2 9331 MOVOU -16(DI)(R9*1), X3 9332 MOVQ R9, R11 9333 SHRQ $0x05, R11 9334 MOVQ AX, R10 9335 ANDL $0x0000001f, R10 9336 MOVQ $0x00000040, R12 9337 SUBQ R10, R12 9338 DECQ R11 9339 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9340 LEAQ -32(DI)(R12*1), R10 9341 LEAQ -32(AX)(R12*1), R13 9342 9343emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: 9344 MOVOU (R10), X4 9345 MOVOU 16(R10), X5 9346 MOVOA X4, (R13) 9347 MOVOA X5, 16(R13) 9348 ADDQ $0x20, R13 9349 ADDQ $0x20, R10 9350 ADDQ $0x20, R12 9351 DECQ R11 9352 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back 9353 9354emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: 9355 MOVOU -32(DI)(R12*1), X4 9356 MOVOU -16(DI)(R12*1), X5 9357 MOVOA X4, -32(AX)(R12*1) 9358 MOVOA X5, -16(AX)(R12*1) 9359 ADDQ $0x20, R12 9360 CMPQ R9, R12 9361 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9362 MOVOU X0, (AX) 9363 MOVOU X1, 16(AX) 9364 MOVOU X2, -32(AX)(R9*1) 9365 MOVOU X3, -16(AX)(R9*1) 9366 MOVQ R8, AX 9367 9368emit_literal_done_match_emit_encodeSnappyBlockAsm: 9369match_nolit_loop_encodeSnappyBlockAsm: 9370 MOVL CX, DI 9371 SUBL SI, DI 9372 MOVL DI, 16(SP) 9373 ADDL $0x04, CX 9374 ADDL $0x04, SI 9375 MOVQ src_len+32(FP), DI 9376 SUBL CX, DI 9377 LEAQ (DX)(CX*1), R8 9378 LEAQ (DX)(SI*1), SI 9379 9380 // matchLen 9381 XORL R10, R10 9382 CMPL DI, $0x08 9383 JL matchlen_single_match_nolit_encodeSnappyBlockAsm 9384 9385matchlen_loopback_match_nolit_encodeSnappyBlockAsm: 9386 MOVQ (R8)(R10*1), R9 9387 XORQ (SI)(R10*1), R9 9388 TESTQ R9, R9 9389 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm 9390 BSFQ R9, R9 9391 SARQ $0x03, R9 9392 LEAL (R10)(R9*1), R10 9393 JMP match_nolit_end_encodeSnappyBlockAsm 9394 9395matchlen_loop_match_nolit_encodeSnappyBlockAsm: 9396 LEAL -8(DI), DI 9397 LEAL 8(R10), R10 9398 CMPL DI, $0x08 9399 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm 9400 9401matchlen_single_match_nolit_encodeSnappyBlockAsm: 9402 TESTL DI, DI 9403 JZ match_nolit_end_encodeSnappyBlockAsm 9404 9405matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm: 9406 MOVB (R8)(R10*1), R9 9407 CMPB (SI)(R10*1), R9 9408 JNE match_nolit_end_encodeSnappyBlockAsm 9409 LEAL 1(R10), R10 9410 DECL DI 9411 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm 9412 9413match_nolit_end_encodeSnappyBlockAsm: 9414 ADDL R10, CX 9415 MOVL 16(SP), SI 9416 ADDL $0x04, R10 9417 MOVL CX, 12(SP) 9418 9419 // emitCopy 9420 CMPL SI, $0x00010000 9421 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm 9422 9423four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: 9424 CMPL R10, $0x40 9425 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm 9426 MOVB $0xff, (AX) 9427 MOVL SI, 1(AX) 9428 LEAL -64(R10), R10 9429 ADDQ $0x05, AX 9430 CMPL R10, $0x04 9431 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm 9432 JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm 9433 9434four_bytes_remain_match_nolit_encodeSnappyBlockAsm: 9435 TESTL R10, R10 9436 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm 9437 MOVB $0x03, BL 9438 LEAL -4(BX)(R10*4), R10 9439 MOVB R10, (AX) 9440 MOVL SI, 1(AX) 9441 ADDQ $0x05, AX 9442 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm 9443 9444two_byte_offset_match_nolit_encodeSnappyBlockAsm: 9445 CMPL R10, $0x40 9446 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm 9447 MOVB $0xee, (AX) 9448 MOVW SI, 1(AX) 9449 LEAL -60(R10), R10 9450 ADDQ $0x03, AX 9451 JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm 9452 9453two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: 9454 CMPL R10, $0x0c 9455 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm 9456 CMPL SI, $0x00000800 9457 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm 9458 MOVB $0x01, BL 9459 LEAL -16(BX)(R10*4), R10 9460 MOVB SI, 1(AX) 9461 SHRL $0x08, SI 9462 SHLL $0x05, SI 9463 ORL SI, R10 9464 MOVB R10, (AX) 9465 ADDQ $0x02, AX 9466 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm 9467 9468emit_copy_three_match_nolit_encodeSnappyBlockAsm: 9469 MOVB $0x02, BL 9470 LEAL -4(BX)(R10*4), R10 9471 MOVB R10, (AX) 9472 MOVW SI, 1(AX) 9473 ADDQ $0x03, AX 9474 9475match_nolit_emitcopy_end_encodeSnappyBlockAsm: 9476 CMPL CX, 8(SP) 9477 JGE emit_remainder_encodeSnappyBlockAsm 9478 MOVQ -2(DX)(CX*1), DI 9479 CMPQ AX, (SP) 9480 JL match_nolit_dst_ok_encodeSnappyBlockAsm 9481 MOVQ $0x00000000, ret+48(FP) 9482 RET 9483 9484match_nolit_dst_ok_encodeSnappyBlockAsm: 9485 MOVQ $0x0000cf1bbcdcbf9b, R9 9486 MOVQ DI, R8 9487 SHRQ $0x10, DI 9488 MOVQ DI, SI 9489 SHLQ $0x10, R8 9490 IMULQ R9, R8 9491 SHRQ $0x32, R8 9492 SHLQ $0x10, SI 9493 IMULQ R9, SI 9494 SHRQ $0x32, SI 9495 LEAL -2(CX), R9 9496 LEAQ 24(SP)(SI*4), R10 9497 MOVL (R10), SI 9498 MOVL R9, 24(SP)(R8*4) 9499 MOVL CX, (R10) 9500 CMPL (DX)(SI*1), DI 9501 JEQ match_nolit_loop_encodeSnappyBlockAsm 9502 INCL CX 9503 JMP search_loop_encodeSnappyBlockAsm 9504 9505emit_remainder_encodeSnappyBlockAsm: 9506 MOVQ src_len+32(FP), CX 9507 SUBL 12(SP), CX 9508 LEAQ 5(AX)(CX*1), CX 9509 CMPQ CX, (SP) 9510 JL emit_remainder_ok_encodeSnappyBlockAsm 9511 MOVQ $0x00000000, ret+48(FP) 9512 RET 9513 9514emit_remainder_ok_encodeSnappyBlockAsm: 9515 MOVQ src_len+32(FP), CX 9516 MOVL 12(SP), BX 9517 CMPL BX, CX 9518 JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm 9519 MOVL CX, SI 9520 MOVL CX, 12(SP) 9521 LEAQ (DX)(BX*1), CX 9522 SUBL BX, SI 9523 LEAL -1(SI), DX 9524 CMPL DX, $0x3c 9525 JLT one_byte_emit_remainder_encodeSnappyBlockAsm 9526 CMPL DX, $0x00000100 9527 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm 9528 CMPL DX, $0x00010000 9529 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm 9530 CMPL DX, $0x01000000 9531 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm 9532 MOVB $0xfc, (AX) 9533 MOVL DX, 1(AX) 9534 ADDQ $0x05, AX 9535 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm 9536 9537four_bytes_emit_remainder_encodeSnappyBlockAsm: 9538 MOVL DX, BX 9539 SHRL $0x10, BX 9540 MOVB $0xf8, (AX) 9541 MOVW DX, 1(AX) 9542 MOVB BL, 3(AX) 9543 ADDQ $0x04, AX 9544 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm 9545 9546three_bytes_emit_remainder_encodeSnappyBlockAsm: 9547 MOVB $0xf4, (AX) 9548 MOVW DX, 1(AX) 9549 ADDQ $0x03, AX 9550 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm 9551 9552two_bytes_emit_remainder_encodeSnappyBlockAsm: 9553 MOVB $0xf0, (AX) 9554 MOVB DL, 1(AX) 9555 ADDQ $0x02, AX 9556 CMPL DX, $0x40 9557 JL memmove_emit_remainder_encodeSnappyBlockAsm 9558 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm 9559 9560one_byte_emit_remainder_encodeSnappyBlockAsm: 9561 SHLB $0x02, DL 9562 MOVB DL, (AX) 9563 ADDQ $0x01, AX 9564 9565memmove_emit_remainder_encodeSnappyBlockAsm: 9566 LEAQ (AX)(SI*1), DX 9567 MOVL SI, BX 9568 9569 // genMemMoveShort 9570 CMPQ BX, $0x08 9571 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8 9572 CMPQ BX, $0x10 9573 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16 9574 CMPQ BX, $0x20 9575 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32 9576 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 9577 9578emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8: 9579 MOVQ (CX), SI 9580 MOVQ SI, (AX) 9581 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm 9582 9583emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: 9584 MOVQ (CX), SI 9585 MOVQ -8(CX)(BX*1), CX 9586 MOVQ SI, (AX) 9587 MOVQ CX, -8(AX)(BX*1) 9588 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm 9589 9590emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: 9591 MOVOU (CX), X0 9592 MOVOU -16(CX)(BX*1), X1 9593 MOVOU X0, (AX) 9594 MOVOU X1, -16(AX)(BX*1) 9595 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm 9596 9597emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: 9598 MOVOU (CX), X0 9599 MOVOU 16(CX), X1 9600 MOVOU -32(CX)(BX*1), X2 9601 MOVOU -16(CX)(BX*1), X3 9602 MOVOU X0, (AX) 9603 MOVOU X1, 16(AX) 9604 MOVOU X2, -32(AX)(BX*1) 9605 MOVOU X3, -16(AX)(BX*1) 9606 9607memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: 9608 MOVQ DX, AX 9609 JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm 9610 9611memmove_long_emit_remainder_encodeSnappyBlockAsm: 9612 LEAQ (AX)(SI*1), DX 9613 MOVL SI, BX 9614 9615 // genMemMoveLong 9616 MOVOU (CX), X0 9617 MOVOU 16(CX), X1 9618 MOVOU -32(CX)(BX*1), X2 9619 MOVOU -16(CX)(BX*1), X3 9620 MOVQ BX, DI 9621 SHRQ $0x05, DI 9622 MOVQ AX, SI 9623 ANDL $0x0000001f, SI 9624 MOVQ $0x00000040, R8 9625 SUBQ SI, R8 9626 DECQ DI 9627 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9628 LEAQ -32(CX)(R8*1), SI 9629 LEAQ -32(AX)(R8*1), R9 9630 9631emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: 9632 MOVOU (SI), X4 9633 MOVOU 16(SI), X5 9634 MOVOA X4, (R9) 9635 MOVOA X5, 16(R9) 9636 ADDQ $0x20, R9 9637 ADDQ $0x20, SI 9638 ADDQ $0x20, R8 9639 DECQ DI 9640 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back 9641 9642emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: 9643 MOVOU -32(CX)(R8*1), X4 9644 MOVOU -16(CX)(R8*1), X5 9645 MOVOA X4, -32(AX)(R8*1) 9646 MOVOA X5, -16(AX)(R8*1) 9647 ADDQ $0x20, R8 9648 CMPQ BX, R8 9649 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9650 MOVOU X0, (AX) 9651 MOVOU X1, 16(AX) 9652 MOVOU X2, -32(AX)(BX*1) 9653 MOVOU X3, -16(AX)(BX*1) 9654 MOVQ DX, AX 9655 9656emit_literal_done_emit_remainder_encodeSnappyBlockAsm: 9657 MOVQ dst_base+0(FP), CX 9658 SUBQ CX, AX 9659 MOVQ AX, ret+48(FP) 9660 RET 9661 9662// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int 9663// Requires: SSE2 9664TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56 9665 MOVQ dst_base+0(FP), AX 9666 MOVQ $0x00000200, CX 9667 LEAQ 24(SP), DX 9668 PXOR X0, X0 9669 9670zero_loop_encodeSnappyBlockAsm64K: 9671 MOVOU X0, (DX) 9672 MOVOU X0, 16(DX) 9673 MOVOU X0, 32(DX) 9674 MOVOU X0, 48(DX) 9675 MOVOU X0, 64(DX) 9676 MOVOU X0, 80(DX) 9677 MOVOU X0, 96(DX) 9678 MOVOU X0, 112(DX) 9679 ADDQ $0x80, DX 9680 DECQ CX 9681 JNZ zero_loop_encodeSnappyBlockAsm64K 9682 MOVL $0x00000000, 12(SP) 9683 MOVQ src_len+32(FP), CX 9684 LEAQ -9(CX), DX 9685 LEAQ -8(CX), SI 9686 MOVL SI, 8(SP) 9687 SHRQ $0x05, CX 9688 SUBL CX, DX 9689 LEAQ (AX)(DX*1), DX 9690 MOVQ DX, (SP) 9691 MOVL $0x00000001, CX 9692 MOVL CX, 16(SP) 9693 MOVQ src_base+24(FP), DX 9694 9695search_loop_encodeSnappyBlockAsm64K: 9696 MOVL CX, SI 9697 SUBL 12(SP), SI 9698 SHRL $0x06, SI 9699 LEAL 4(CX)(SI*1), SI 9700 CMPL SI, 8(SP) 9701 JGE emit_remainder_encodeSnappyBlockAsm64K 9702 MOVQ (DX)(CX*1), DI 9703 MOVL SI, 20(SP) 9704 MOVQ $0x0000cf1bbcdcbf9b, R9 9705 MOVQ DI, R10 9706 MOVQ DI, R11 9707 SHRQ $0x08, R11 9708 SHLQ $0x10, R10 9709 IMULQ R9, R10 9710 SHRQ $0x32, R10 9711 SHLQ $0x10, R11 9712 IMULQ R9, R11 9713 SHRQ $0x32, R11 9714 MOVL 24(SP)(R10*4), SI 9715 MOVL 24(SP)(R11*4), R8 9716 MOVL CX, 24(SP)(R10*4) 9717 LEAL 1(CX), R10 9718 MOVL R10, 24(SP)(R11*4) 9719 MOVQ DI, R10 9720 SHRQ $0x10, R10 9721 SHLQ $0x10, R10 9722 IMULQ R9, R10 9723 SHRQ $0x32, R10 9724 MOVL CX, R9 9725 SUBL 16(SP), R9 9726 MOVL 1(DX)(R9*1), R11 9727 MOVQ DI, R9 9728 SHRQ $0x08, R9 9729 CMPL R9, R11 9730 JNE no_repeat_found_encodeSnappyBlockAsm64K 9731 LEAL 1(CX), DI 9732 MOVL 12(SP), SI 9733 MOVL DI, R8 9734 SUBL 16(SP), R8 9735 JZ repeat_extend_back_end_encodeSnappyBlockAsm64K 9736 9737repeat_extend_back_loop_encodeSnappyBlockAsm64K: 9738 CMPL DI, SI 9739 JLE repeat_extend_back_end_encodeSnappyBlockAsm64K 9740 MOVB -1(DX)(R8*1), BL 9741 MOVB -1(DX)(DI*1), R9 9742 CMPB BL, R9 9743 JNE repeat_extend_back_end_encodeSnappyBlockAsm64K 9744 LEAL -1(DI), DI 9745 DECL R8 9746 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K 9747 9748repeat_extend_back_end_encodeSnappyBlockAsm64K: 9749 MOVL 12(SP), SI 9750 CMPL SI, DI 9751 JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K 9752 MOVL DI, R8 9753 MOVL DI, 12(SP) 9754 LEAQ (DX)(SI*1), R9 9755 SUBL SI, R8 9756 LEAL -1(R8), SI 9757 CMPL SI, $0x3c 9758 JLT one_byte_repeat_emit_encodeSnappyBlockAsm64K 9759 CMPL SI, $0x00000100 9760 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm64K 9761 MOVB $0xf4, (AX) 9762 MOVW SI, 1(AX) 9763 ADDQ $0x03, AX 9764 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K 9765 9766two_bytes_repeat_emit_encodeSnappyBlockAsm64K: 9767 MOVB $0xf0, (AX) 9768 MOVB SI, 1(AX) 9769 ADDQ $0x02, AX 9770 CMPL SI, $0x40 9771 JL memmove_repeat_emit_encodeSnappyBlockAsm64K 9772 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K 9773 9774one_byte_repeat_emit_encodeSnappyBlockAsm64K: 9775 SHLB $0x02, SI 9776 MOVB SI, (AX) 9777 ADDQ $0x01, AX 9778 9779memmove_repeat_emit_encodeSnappyBlockAsm64K: 9780 LEAQ (AX)(R8*1), SI 9781 9782 // genMemMoveShort 9783 CMPQ R8, $0x08 9784 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 9785 CMPQ R8, $0x10 9786 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 9787 CMPQ R8, $0x20 9788 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 9789 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 9790 9791emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8: 9792 MOVQ (R9), R10 9793 MOVQ R10, (AX) 9794 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K 9795 9796emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: 9797 MOVQ (R9), R10 9798 MOVQ -8(R9)(R8*1), R9 9799 MOVQ R10, (AX) 9800 MOVQ R9, -8(AX)(R8*1) 9801 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K 9802 9803emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: 9804 MOVOU (R9), X0 9805 MOVOU -16(R9)(R8*1), X1 9806 MOVOU X0, (AX) 9807 MOVOU X1, -16(AX)(R8*1) 9808 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K 9809 9810emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: 9811 MOVOU (R9), X0 9812 MOVOU 16(R9), X1 9813 MOVOU -32(R9)(R8*1), X2 9814 MOVOU -16(R9)(R8*1), X3 9815 MOVOU X0, (AX) 9816 MOVOU X1, 16(AX) 9817 MOVOU X2, -32(AX)(R8*1) 9818 MOVOU X3, -16(AX)(R8*1) 9819 9820memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K: 9821 MOVQ SI, AX 9822 JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K 9823 9824memmove_long_repeat_emit_encodeSnappyBlockAsm64K: 9825 LEAQ (AX)(R8*1), SI 9826 9827 // genMemMoveLong 9828 MOVOU (R9), X0 9829 MOVOU 16(R9), X1 9830 MOVOU -32(R9)(R8*1), X2 9831 MOVOU -16(R9)(R8*1), X3 9832 MOVQ R8, R11 9833 SHRQ $0x05, R11 9834 MOVQ AX, R10 9835 ANDL $0x0000001f, R10 9836 MOVQ $0x00000040, R12 9837 SUBQ R10, R12 9838 DECQ R11 9839 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 9840 LEAQ -32(R9)(R12*1), R10 9841 LEAQ -32(AX)(R12*1), R13 9842 9843emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: 9844 MOVOU (R10), X4 9845 MOVOU 16(R10), X5 9846 MOVOA X4, (R13) 9847 MOVOA X5, 16(R13) 9848 ADDQ $0x20, R13 9849 ADDQ $0x20, R10 9850 ADDQ $0x20, R12 9851 DECQ R11 9852 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back 9853 9854emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: 9855 MOVOU -32(R9)(R12*1), X4 9856 MOVOU -16(R9)(R12*1), X5 9857 MOVOA X4, -32(AX)(R12*1) 9858 MOVOA X5, -16(AX)(R12*1) 9859 ADDQ $0x20, R12 9860 CMPQ R8, R12 9861 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 9862 MOVOU X0, (AX) 9863 MOVOU X1, 16(AX) 9864 MOVOU X2, -32(AX)(R8*1) 9865 MOVOU X3, -16(AX)(R8*1) 9866 MOVQ SI, AX 9867 9868emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: 9869 ADDL $0x05, CX 9870 MOVL CX, SI 9871 SUBL 16(SP), SI 9872 MOVQ src_len+32(FP), R8 9873 SUBL CX, R8 9874 LEAQ (DX)(CX*1), R9 9875 LEAQ (DX)(SI*1), SI 9876 9877 // matchLen 9878 XORL R11, R11 9879 CMPL R8, $0x08 9880 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm64K 9881 9882matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: 9883 MOVQ (R9)(R11*1), R10 9884 XORQ (SI)(R11*1), R10 9885 TESTQ R10, R10 9886 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K 9887 BSFQ R10, R10 9888 SARQ $0x03, R10 9889 LEAL (R11)(R10*1), R11 9890 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K 9891 9892matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K: 9893 LEAL -8(R8), R8 9894 LEAL 8(R11), R11 9895 CMPL R8, $0x08 9896 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K 9897 9898matchlen_single_repeat_extend_encodeSnappyBlockAsm64K: 9899 TESTL R8, R8 9900 JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K 9901 9902matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm64K: 9903 MOVB (R9)(R11*1), R10 9904 CMPB (SI)(R11*1), R10 9905 JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K 9906 LEAL 1(R11), R11 9907 DECL R8 9908 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm64K 9909 9910repeat_extend_forward_end_encodeSnappyBlockAsm64K: 9911 ADDL R11, CX 9912 MOVL CX, SI 9913 SUBL DI, SI 9914 MOVL 16(SP), DI 9915 9916 // emitCopy 9917two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: 9918 CMPL SI, $0x40 9919 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K 9920 MOVB $0xee, (AX) 9921 MOVW DI, 1(AX) 9922 LEAL -60(SI), SI 9923 ADDQ $0x03, AX 9924 JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K 9925 9926two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: 9927 CMPL SI, $0x0c 9928 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K 9929 CMPL DI, $0x00000800 9930 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K 9931 MOVB $0x01, BL 9932 LEAL -16(BX)(SI*4), SI 9933 MOVB DI, 1(AX) 9934 SHRL $0x08, DI 9935 SHLL $0x05, DI 9936 ORL DI, SI 9937 MOVB SI, (AX) 9938 ADDQ $0x02, AX 9939 JMP repeat_end_emit_encodeSnappyBlockAsm64K 9940 9941emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: 9942 MOVB $0x02, BL 9943 LEAL -4(BX)(SI*4), SI 9944 MOVB SI, (AX) 9945 MOVW DI, 1(AX) 9946 ADDQ $0x03, AX 9947 9948repeat_end_emit_encodeSnappyBlockAsm64K: 9949 MOVL CX, 12(SP) 9950 JMP search_loop_encodeSnappyBlockAsm64K 9951 9952no_repeat_found_encodeSnappyBlockAsm64K: 9953 CMPL (DX)(SI*1), DI 9954 JEQ candidate_match_encodeSnappyBlockAsm64K 9955 SHRQ $0x08, DI 9956 MOVL 24(SP)(R10*4), SI 9957 LEAL 2(CX), R9 9958 CMPL (DX)(R8*1), DI 9959 JEQ candidate2_match_encodeSnappyBlockAsm64K 9960 MOVL R9, 24(SP)(R10*4) 9961 SHRQ $0x08, DI 9962 CMPL (DX)(SI*1), DI 9963 JEQ candidate3_match_encodeSnappyBlockAsm64K 9964 MOVL 20(SP), CX 9965 JMP search_loop_encodeSnappyBlockAsm64K 9966 9967candidate3_match_encodeSnappyBlockAsm64K: 9968 ADDL $0x02, CX 9969 JMP candidate_match_encodeSnappyBlockAsm64K 9970 9971candidate2_match_encodeSnappyBlockAsm64K: 9972 MOVL R9, 24(SP)(R10*4) 9973 INCL CX 9974 MOVL R8, SI 9975 9976candidate_match_encodeSnappyBlockAsm64K: 9977 MOVL 12(SP), DI 9978 TESTL SI, SI 9979 JZ match_extend_back_end_encodeSnappyBlockAsm64K 9980 9981match_extend_back_loop_encodeSnappyBlockAsm64K: 9982 CMPL CX, DI 9983 JLE match_extend_back_end_encodeSnappyBlockAsm64K 9984 MOVB -1(DX)(SI*1), BL 9985 MOVB -1(DX)(CX*1), R8 9986 CMPB BL, R8 9987 JNE match_extend_back_end_encodeSnappyBlockAsm64K 9988 LEAL -1(CX), CX 9989 DECL SI 9990 JZ match_extend_back_end_encodeSnappyBlockAsm64K 9991 JMP match_extend_back_loop_encodeSnappyBlockAsm64K 9992 9993match_extend_back_end_encodeSnappyBlockAsm64K: 9994 MOVL CX, DI 9995 SUBL 12(SP), DI 9996 LEAQ 3(AX)(DI*1), DI 9997 CMPQ DI, (SP) 9998 JL match_dst_size_check_encodeSnappyBlockAsm64K 9999 MOVQ $0x00000000, ret+48(FP) 10000 RET 10001 10002match_dst_size_check_encodeSnappyBlockAsm64K: 10003 MOVL CX, DI 10004 MOVL 12(SP), R8 10005 CMPL R8, DI 10006 JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K 10007 MOVL DI, R9 10008 MOVL DI, 12(SP) 10009 LEAQ (DX)(R8*1), DI 10010 SUBL R8, R9 10011 LEAL -1(R9), R8 10012 CMPL R8, $0x3c 10013 JLT one_byte_match_emit_encodeSnappyBlockAsm64K 10014 CMPL R8, $0x00000100 10015 JLT two_bytes_match_emit_encodeSnappyBlockAsm64K 10016 MOVB $0xf4, (AX) 10017 MOVW R8, 1(AX) 10018 ADDQ $0x03, AX 10019 JMP memmove_long_match_emit_encodeSnappyBlockAsm64K 10020 10021two_bytes_match_emit_encodeSnappyBlockAsm64K: 10022 MOVB $0xf0, (AX) 10023 MOVB R8, 1(AX) 10024 ADDQ $0x02, AX 10025 CMPL R8, $0x40 10026 JL memmove_match_emit_encodeSnappyBlockAsm64K 10027 JMP memmove_long_match_emit_encodeSnappyBlockAsm64K 10028 10029one_byte_match_emit_encodeSnappyBlockAsm64K: 10030 SHLB $0x02, R8 10031 MOVB R8, (AX) 10032 ADDQ $0x01, AX 10033 10034memmove_match_emit_encodeSnappyBlockAsm64K: 10035 LEAQ (AX)(R9*1), R8 10036 10037 // genMemMoveShort 10038 CMPQ R9, $0x08 10039 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 10040 CMPQ R9, $0x10 10041 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 10042 CMPQ R9, $0x20 10043 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 10044 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 10045 10046emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: 10047 MOVQ (DI), R10 10048 MOVQ R10, (AX) 10049 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K 10050 10051emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: 10052 MOVQ (DI), R10 10053 MOVQ -8(DI)(R9*1), DI 10054 MOVQ R10, (AX) 10055 MOVQ DI, -8(AX)(R9*1) 10056 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K 10057 10058emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: 10059 MOVOU (DI), X0 10060 MOVOU -16(DI)(R9*1), X1 10061 MOVOU X0, (AX) 10062 MOVOU X1, -16(AX)(R9*1) 10063 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K 10064 10065emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: 10066 MOVOU (DI), X0 10067 MOVOU 16(DI), X1 10068 MOVOU -32(DI)(R9*1), X2 10069 MOVOU -16(DI)(R9*1), X3 10070 MOVOU X0, (AX) 10071 MOVOU X1, 16(AX) 10072 MOVOU X2, -32(AX)(R9*1) 10073 MOVOU X3, -16(AX)(R9*1) 10074 10075memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: 10076 MOVQ R8, AX 10077 JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K 10078 10079memmove_long_match_emit_encodeSnappyBlockAsm64K: 10080 LEAQ (AX)(R9*1), R8 10081 10082 // genMemMoveLong 10083 MOVOU (DI), X0 10084 MOVOU 16(DI), X1 10085 MOVOU -32(DI)(R9*1), X2 10086 MOVOU -16(DI)(R9*1), X3 10087 MOVQ R9, R11 10088 SHRQ $0x05, R11 10089 MOVQ AX, R10 10090 ANDL $0x0000001f, R10 10091 MOVQ $0x00000040, R12 10092 SUBQ R10, R12 10093 DECQ R11 10094 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 10095 LEAQ -32(DI)(R12*1), R10 10096 LEAQ -32(AX)(R12*1), R13 10097 10098emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: 10099 MOVOU (R10), X4 10100 MOVOU 16(R10), X5 10101 MOVOA X4, (R13) 10102 MOVOA X5, 16(R13) 10103 ADDQ $0x20, R13 10104 ADDQ $0x20, R10 10105 ADDQ $0x20, R12 10106 DECQ R11 10107 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back 10108 10109emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: 10110 MOVOU -32(DI)(R12*1), X4 10111 MOVOU -16(DI)(R12*1), X5 10112 MOVOA X4, -32(AX)(R12*1) 10113 MOVOA X5, -16(AX)(R12*1) 10114 ADDQ $0x20, R12 10115 CMPQ R9, R12 10116 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 10117 MOVOU X0, (AX) 10118 MOVOU X1, 16(AX) 10119 MOVOU X2, -32(AX)(R9*1) 10120 MOVOU X3, -16(AX)(R9*1) 10121 MOVQ R8, AX 10122 10123emit_literal_done_match_emit_encodeSnappyBlockAsm64K: 10124match_nolit_loop_encodeSnappyBlockAsm64K: 10125 MOVL CX, DI 10126 SUBL SI, DI 10127 MOVL DI, 16(SP) 10128 ADDL $0x04, CX 10129 ADDL $0x04, SI 10130 MOVQ src_len+32(FP), DI 10131 SUBL CX, DI 10132 LEAQ (DX)(CX*1), R8 10133 LEAQ (DX)(SI*1), SI 10134 10135 // matchLen 10136 XORL R10, R10 10137 CMPL DI, $0x08 10138 JL matchlen_single_match_nolit_encodeSnappyBlockAsm64K 10139 10140matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: 10141 MOVQ (R8)(R10*1), R9 10142 XORQ (SI)(R10*1), R9 10143 TESTQ R9, R9 10144 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K 10145 BSFQ R9, R9 10146 SARQ $0x03, R9 10147 LEAL (R10)(R9*1), R10 10148 JMP match_nolit_end_encodeSnappyBlockAsm64K 10149 10150matchlen_loop_match_nolit_encodeSnappyBlockAsm64K: 10151 LEAL -8(DI), DI 10152 LEAL 8(R10), R10 10153 CMPL DI, $0x08 10154 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K 10155 10156matchlen_single_match_nolit_encodeSnappyBlockAsm64K: 10157 TESTL DI, DI 10158 JZ match_nolit_end_encodeSnappyBlockAsm64K 10159 10160matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm64K: 10161 MOVB (R8)(R10*1), R9 10162 CMPB (SI)(R10*1), R9 10163 JNE match_nolit_end_encodeSnappyBlockAsm64K 10164 LEAL 1(R10), R10 10165 DECL DI 10166 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm64K 10167 10168match_nolit_end_encodeSnappyBlockAsm64K: 10169 ADDL R10, CX 10170 MOVL 16(SP), SI 10171 ADDL $0x04, R10 10172 MOVL CX, 12(SP) 10173 10174 // emitCopy 10175two_byte_offset_match_nolit_encodeSnappyBlockAsm64K: 10176 CMPL R10, $0x40 10177 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K 10178 MOVB $0xee, (AX) 10179 MOVW SI, 1(AX) 10180 LEAL -60(R10), R10 10181 ADDQ $0x03, AX 10182 JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K 10183 10184two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K: 10185 CMPL R10, $0x0c 10186 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K 10187 CMPL SI, $0x00000800 10188 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K 10189 MOVB $0x01, BL 10190 LEAL -16(BX)(R10*4), R10 10191 MOVB SI, 1(AX) 10192 SHRL $0x08, SI 10193 SHLL $0x05, SI 10194 ORL SI, R10 10195 MOVB R10, (AX) 10196 ADDQ $0x02, AX 10197 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K 10198 10199emit_copy_three_match_nolit_encodeSnappyBlockAsm64K: 10200 MOVB $0x02, BL 10201 LEAL -4(BX)(R10*4), R10 10202 MOVB R10, (AX) 10203 MOVW SI, 1(AX) 10204 ADDQ $0x03, AX 10205 10206match_nolit_emitcopy_end_encodeSnappyBlockAsm64K: 10207 CMPL CX, 8(SP) 10208 JGE emit_remainder_encodeSnappyBlockAsm64K 10209 MOVQ -2(DX)(CX*1), DI 10210 CMPQ AX, (SP) 10211 JL match_nolit_dst_ok_encodeSnappyBlockAsm64K 10212 MOVQ $0x00000000, ret+48(FP) 10213 RET 10214 10215match_nolit_dst_ok_encodeSnappyBlockAsm64K: 10216 MOVQ $0x0000cf1bbcdcbf9b, R9 10217 MOVQ DI, R8 10218 SHRQ $0x10, DI 10219 MOVQ DI, SI 10220 SHLQ $0x10, R8 10221 IMULQ R9, R8 10222 SHRQ $0x32, R8 10223 SHLQ $0x10, SI 10224 IMULQ R9, SI 10225 SHRQ $0x32, SI 10226 LEAL -2(CX), R9 10227 LEAQ 24(SP)(SI*4), R10 10228 MOVL (R10), SI 10229 MOVL R9, 24(SP)(R8*4) 10230 MOVL CX, (R10) 10231 CMPL (DX)(SI*1), DI 10232 JEQ match_nolit_loop_encodeSnappyBlockAsm64K 10233 INCL CX 10234 JMP search_loop_encodeSnappyBlockAsm64K 10235 10236emit_remainder_encodeSnappyBlockAsm64K: 10237 MOVQ src_len+32(FP), CX 10238 SUBL 12(SP), CX 10239 LEAQ 3(AX)(CX*1), CX 10240 CMPQ CX, (SP) 10241 JL emit_remainder_ok_encodeSnappyBlockAsm64K 10242 MOVQ $0x00000000, ret+48(FP) 10243 RET 10244 10245emit_remainder_ok_encodeSnappyBlockAsm64K: 10246 MOVQ src_len+32(FP), CX 10247 MOVL 12(SP), BX 10248 CMPL BX, CX 10249 JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K 10250 MOVL CX, SI 10251 MOVL CX, 12(SP) 10252 LEAQ (DX)(BX*1), CX 10253 SUBL BX, SI 10254 LEAL -1(SI), DX 10255 CMPL DX, $0x3c 10256 JLT one_byte_emit_remainder_encodeSnappyBlockAsm64K 10257 CMPL DX, $0x00000100 10258 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm64K 10259 MOVB $0xf4, (AX) 10260 MOVW DX, 1(AX) 10261 ADDQ $0x03, AX 10262 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K 10263 10264two_bytes_emit_remainder_encodeSnappyBlockAsm64K: 10265 MOVB $0xf0, (AX) 10266 MOVB DL, 1(AX) 10267 ADDQ $0x02, AX 10268 CMPL DX, $0x40 10269 JL memmove_emit_remainder_encodeSnappyBlockAsm64K 10270 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K 10271 10272one_byte_emit_remainder_encodeSnappyBlockAsm64K: 10273 SHLB $0x02, DL 10274 MOVB DL, (AX) 10275 ADDQ $0x01, AX 10276 10277memmove_emit_remainder_encodeSnappyBlockAsm64K: 10278 LEAQ (AX)(SI*1), DX 10279 MOVL SI, BX 10280 10281 // genMemMoveShort 10282 CMPQ BX, $0x08 10283 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8 10284 CMPQ BX, $0x10 10285 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16 10286 CMPQ BX, $0x20 10287 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32 10288 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64 10289 10290emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8: 10291 MOVQ (CX), SI 10292 MOVQ SI, (AX) 10293 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K 10294 10295emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16: 10296 MOVQ (CX), SI 10297 MOVQ -8(CX)(BX*1), CX 10298 MOVQ SI, (AX) 10299 MOVQ CX, -8(AX)(BX*1) 10300 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K 10301 10302emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32: 10303 MOVOU (CX), X0 10304 MOVOU -16(CX)(BX*1), X1 10305 MOVOU X0, (AX) 10306 MOVOU X1, -16(AX)(BX*1) 10307 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K 10308 10309emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64: 10310 MOVOU (CX), X0 10311 MOVOU 16(CX), X1 10312 MOVOU -32(CX)(BX*1), X2 10313 MOVOU -16(CX)(BX*1), X3 10314 MOVOU X0, (AX) 10315 MOVOU X1, 16(AX) 10316 MOVOU X2, -32(AX)(BX*1) 10317 MOVOU X3, -16(AX)(BX*1) 10318 10319memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K: 10320 MOVQ DX, AX 10321 JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K 10322 10323memmove_long_emit_remainder_encodeSnappyBlockAsm64K: 10324 LEAQ (AX)(SI*1), DX 10325 MOVL SI, BX 10326 10327 // genMemMoveLong 10328 MOVOU (CX), X0 10329 MOVOU 16(CX), X1 10330 MOVOU -32(CX)(BX*1), X2 10331 MOVOU -16(CX)(BX*1), X3 10332 MOVQ BX, DI 10333 SHRQ $0x05, DI 10334 MOVQ AX, SI 10335 ANDL $0x0000001f, SI 10336 MOVQ $0x00000040, R8 10337 SUBQ SI, R8 10338 DECQ DI 10339 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 10340 LEAQ -32(CX)(R8*1), SI 10341 LEAQ -32(AX)(R8*1), R9 10342 10343emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: 10344 MOVOU (SI), X4 10345 MOVOU 16(SI), X5 10346 MOVOA X4, (R9) 10347 MOVOA X5, 16(R9) 10348 ADDQ $0x20, R9 10349 ADDQ $0x20, SI 10350 ADDQ $0x20, R8 10351 DECQ DI 10352 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back 10353 10354emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: 10355 MOVOU -32(CX)(R8*1), X4 10356 MOVOU -16(CX)(R8*1), X5 10357 MOVOA X4, -32(AX)(R8*1) 10358 MOVOA X5, -16(AX)(R8*1) 10359 ADDQ $0x20, R8 10360 CMPQ BX, R8 10361 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 10362 MOVOU X0, (AX) 10363 MOVOU X1, 16(AX) 10364 MOVOU X2, -32(AX)(BX*1) 10365 MOVOU X3, -16(AX)(BX*1) 10366 MOVQ DX, AX 10367 10368emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K: 10369 MOVQ dst_base+0(FP), CX 10370 SUBQ CX, AX 10371 MOVQ AX, ret+48(FP) 10372 RET 10373 10374// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int 10375// Requires: SSE2 10376TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 10377 MOVQ dst_base+0(FP), AX 10378 MOVQ $0x00000080, CX 10379 LEAQ 24(SP), DX 10380 PXOR X0, X0 10381 10382zero_loop_encodeSnappyBlockAsm12B: 10383 MOVOU X0, (DX) 10384 MOVOU X0, 16(DX) 10385 MOVOU X0, 32(DX) 10386 MOVOU X0, 48(DX) 10387 MOVOU X0, 64(DX) 10388 MOVOU X0, 80(DX) 10389 MOVOU X0, 96(DX) 10390 MOVOU X0, 112(DX) 10391 ADDQ $0x80, DX 10392 DECQ CX 10393 JNZ zero_loop_encodeSnappyBlockAsm12B 10394 MOVL $0x00000000, 12(SP) 10395 MOVQ src_len+32(FP), CX 10396 LEAQ -9(CX), DX 10397 LEAQ -8(CX), SI 10398 MOVL SI, 8(SP) 10399 SHRQ $0x05, CX 10400 SUBL CX, DX 10401 LEAQ (AX)(DX*1), DX 10402 MOVQ DX, (SP) 10403 MOVL $0x00000001, CX 10404 MOVL CX, 16(SP) 10405 MOVQ src_base+24(FP), DX 10406 10407search_loop_encodeSnappyBlockAsm12B: 10408 MOVL CX, SI 10409 SUBL 12(SP), SI 10410 SHRL $0x05, SI 10411 LEAL 4(CX)(SI*1), SI 10412 CMPL SI, 8(SP) 10413 JGE emit_remainder_encodeSnappyBlockAsm12B 10414 MOVQ (DX)(CX*1), DI 10415 MOVL SI, 20(SP) 10416 MOVQ $0x000000cf1bbcdcbb, R9 10417 MOVQ DI, R10 10418 MOVQ DI, R11 10419 SHRQ $0x08, R11 10420 SHLQ $0x18, R10 10421 IMULQ R9, R10 10422 SHRQ $0x34, R10 10423 SHLQ $0x18, R11 10424 IMULQ R9, R11 10425 SHRQ $0x34, R11 10426 MOVL 24(SP)(R10*4), SI 10427 MOVL 24(SP)(R11*4), R8 10428 MOVL CX, 24(SP)(R10*4) 10429 LEAL 1(CX), R10 10430 MOVL R10, 24(SP)(R11*4) 10431 MOVQ DI, R10 10432 SHRQ $0x10, R10 10433 SHLQ $0x18, R10 10434 IMULQ R9, R10 10435 SHRQ $0x34, R10 10436 MOVL CX, R9 10437 SUBL 16(SP), R9 10438 MOVL 1(DX)(R9*1), R11 10439 MOVQ DI, R9 10440 SHRQ $0x08, R9 10441 CMPL R9, R11 10442 JNE no_repeat_found_encodeSnappyBlockAsm12B 10443 LEAL 1(CX), DI 10444 MOVL 12(SP), SI 10445 MOVL DI, R8 10446 SUBL 16(SP), R8 10447 JZ repeat_extend_back_end_encodeSnappyBlockAsm12B 10448 10449repeat_extend_back_loop_encodeSnappyBlockAsm12B: 10450 CMPL DI, SI 10451 JLE repeat_extend_back_end_encodeSnappyBlockAsm12B 10452 MOVB -1(DX)(R8*1), BL 10453 MOVB -1(DX)(DI*1), R9 10454 CMPB BL, R9 10455 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B 10456 LEAL -1(DI), DI 10457 DECL R8 10458 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B 10459 10460repeat_extend_back_end_encodeSnappyBlockAsm12B: 10461 MOVL 12(SP), SI 10462 CMPL SI, DI 10463 JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B 10464 MOVL DI, R8 10465 MOVL DI, 12(SP) 10466 LEAQ (DX)(SI*1), R9 10467 SUBL SI, R8 10468 LEAL -1(R8), SI 10469 CMPL SI, $0x3c 10470 JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B 10471 CMPL SI, $0x00000100 10472 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B 10473 MOVB $0xf4, (AX) 10474 MOVW SI, 1(AX) 10475 ADDQ $0x03, AX 10476 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B 10477 10478two_bytes_repeat_emit_encodeSnappyBlockAsm12B: 10479 MOVB $0xf0, (AX) 10480 MOVB SI, 1(AX) 10481 ADDQ $0x02, AX 10482 CMPL SI, $0x40 10483 JL memmove_repeat_emit_encodeSnappyBlockAsm12B 10484 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B 10485 10486one_byte_repeat_emit_encodeSnappyBlockAsm12B: 10487 SHLB $0x02, SI 10488 MOVB SI, (AX) 10489 ADDQ $0x01, AX 10490 10491memmove_repeat_emit_encodeSnappyBlockAsm12B: 10492 LEAQ (AX)(R8*1), SI 10493 10494 // genMemMoveShort 10495 CMPQ R8, $0x08 10496 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 10497 CMPQ R8, $0x10 10498 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 10499 CMPQ R8, $0x20 10500 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 10501 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 10502 10503emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: 10504 MOVQ (R9), R10 10505 MOVQ R10, (AX) 10506 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B 10507 10508emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: 10509 MOVQ (R9), R10 10510 MOVQ -8(R9)(R8*1), R9 10511 MOVQ R10, (AX) 10512 MOVQ R9, -8(AX)(R8*1) 10513 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B 10514 10515emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: 10516 MOVOU (R9), X0 10517 MOVOU -16(R9)(R8*1), X1 10518 MOVOU X0, (AX) 10519 MOVOU X1, -16(AX)(R8*1) 10520 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B 10521 10522emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: 10523 MOVOU (R9), X0 10524 MOVOU 16(R9), X1 10525 MOVOU -32(R9)(R8*1), X2 10526 MOVOU -16(R9)(R8*1), X3 10527 MOVOU X0, (AX) 10528 MOVOU X1, 16(AX) 10529 MOVOU X2, -32(AX)(R8*1) 10530 MOVOU X3, -16(AX)(R8*1) 10531 10532memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: 10533 MOVQ SI, AX 10534 JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B 10535 10536memmove_long_repeat_emit_encodeSnappyBlockAsm12B: 10537 LEAQ (AX)(R8*1), SI 10538 10539 // genMemMoveLong 10540 MOVOU (R9), X0 10541 MOVOU 16(R9), X1 10542 MOVOU -32(R9)(R8*1), X2 10543 MOVOU -16(R9)(R8*1), X3 10544 MOVQ R8, R11 10545 SHRQ $0x05, R11 10546 MOVQ AX, R10 10547 ANDL $0x0000001f, R10 10548 MOVQ $0x00000040, R12 10549 SUBQ R10, R12 10550 DECQ R11 10551 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10552 LEAQ -32(R9)(R12*1), R10 10553 LEAQ -32(AX)(R12*1), R13 10554 10555emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: 10556 MOVOU (R10), X4 10557 MOVOU 16(R10), X5 10558 MOVOA X4, (R13) 10559 MOVOA X5, 16(R13) 10560 ADDQ $0x20, R13 10561 ADDQ $0x20, R10 10562 ADDQ $0x20, R12 10563 DECQ R11 10564 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back 10565 10566emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: 10567 MOVOU -32(R9)(R12*1), X4 10568 MOVOU -16(R9)(R12*1), X5 10569 MOVOA X4, -32(AX)(R12*1) 10570 MOVOA X5, -16(AX)(R12*1) 10571 ADDQ $0x20, R12 10572 CMPQ R8, R12 10573 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10574 MOVOU X0, (AX) 10575 MOVOU X1, 16(AX) 10576 MOVOU X2, -32(AX)(R8*1) 10577 MOVOU X3, -16(AX)(R8*1) 10578 MOVQ SI, AX 10579 10580emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: 10581 ADDL $0x05, CX 10582 MOVL CX, SI 10583 SUBL 16(SP), SI 10584 MOVQ src_len+32(FP), R8 10585 SUBL CX, R8 10586 LEAQ (DX)(CX*1), R9 10587 LEAQ (DX)(SI*1), SI 10588 10589 // matchLen 10590 XORL R11, R11 10591 CMPL R8, $0x08 10592 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm12B 10593 10594matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: 10595 MOVQ (R9)(R11*1), R10 10596 XORQ (SI)(R11*1), R10 10597 TESTQ R10, R10 10598 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B 10599 BSFQ R10, R10 10600 SARQ $0x03, R10 10601 LEAL (R11)(R10*1), R11 10602 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B 10603 10604matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: 10605 LEAL -8(R8), R8 10606 LEAL 8(R11), R11 10607 CMPL R8, $0x08 10608 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B 10609 10610matchlen_single_repeat_extend_encodeSnappyBlockAsm12B: 10611 TESTL R8, R8 10612 JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B 10613 10614matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B: 10615 MOVB (R9)(R11*1), R10 10616 CMPB (SI)(R11*1), R10 10617 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B 10618 LEAL 1(R11), R11 10619 DECL R8 10620 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B 10621 10622repeat_extend_forward_end_encodeSnappyBlockAsm12B: 10623 ADDL R11, CX 10624 MOVL CX, SI 10625 SUBL DI, SI 10626 MOVL 16(SP), DI 10627 10628 // emitCopy 10629two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: 10630 CMPL SI, $0x40 10631 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B 10632 MOVB $0xee, (AX) 10633 MOVW DI, 1(AX) 10634 LEAL -60(SI), SI 10635 ADDQ $0x03, AX 10636 JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B 10637 10638two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: 10639 CMPL SI, $0x0c 10640 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B 10641 CMPL DI, $0x00000800 10642 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B 10643 MOVB $0x01, BL 10644 LEAL -16(BX)(SI*4), SI 10645 MOVB DI, 1(AX) 10646 SHRL $0x08, DI 10647 SHLL $0x05, DI 10648 ORL DI, SI 10649 MOVB SI, (AX) 10650 ADDQ $0x02, AX 10651 JMP repeat_end_emit_encodeSnappyBlockAsm12B 10652 10653emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: 10654 MOVB $0x02, BL 10655 LEAL -4(BX)(SI*4), SI 10656 MOVB SI, (AX) 10657 MOVW DI, 1(AX) 10658 ADDQ $0x03, AX 10659 10660repeat_end_emit_encodeSnappyBlockAsm12B: 10661 MOVL CX, 12(SP) 10662 JMP search_loop_encodeSnappyBlockAsm12B 10663 10664no_repeat_found_encodeSnappyBlockAsm12B: 10665 CMPL (DX)(SI*1), DI 10666 JEQ candidate_match_encodeSnappyBlockAsm12B 10667 SHRQ $0x08, DI 10668 MOVL 24(SP)(R10*4), SI 10669 LEAL 2(CX), R9 10670 CMPL (DX)(R8*1), DI 10671 JEQ candidate2_match_encodeSnappyBlockAsm12B 10672 MOVL R9, 24(SP)(R10*4) 10673 SHRQ $0x08, DI 10674 CMPL (DX)(SI*1), DI 10675 JEQ candidate3_match_encodeSnappyBlockAsm12B 10676 MOVL 20(SP), CX 10677 JMP search_loop_encodeSnappyBlockAsm12B 10678 10679candidate3_match_encodeSnappyBlockAsm12B: 10680 ADDL $0x02, CX 10681 JMP candidate_match_encodeSnappyBlockAsm12B 10682 10683candidate2_match_encodeSnappyBlockAsm12B: 10684 MOVL R9, 24(SP)(R10*4) 10685 INCL CX 10686 MOVL R8, SI 10687 10688candidate_match_encodeSnappyBlockAsm12B: 10689 MOVL 12(SP), DI 10690 TESTL SI, SI 10691 JZ match_extend_back_end_encodeSnappyBlockAsm12B 10692 10693match_extend_back_loop_encodeSnappyBlockAsm12B: 10694 CMPL CX, DI 10695 JLE match_extend_back_end_encodeSnappyBlockAsm12B 10696 MOVB -1(DX)(SI*1), BL 10697 MOVB -1(DX)(CX*1), R8 10698 CMPB BL, R8 10699 JNE match_extend_back_end_encodeSnappyBlockAsm12B 10700 LEAL -1(CX), CX 10701 DECL SI 10702 JZ match_extend_back_end_encodeSnappyBlockAsm12B 10703 JMP match_extend_back_loop_encodeSnappyBlockAsm12B 10704 10705match_extend_back_end_encodeSnappyBlockAsm12B: 10706 MOVL CX, DI 10707 SUBL 12(SP), DI 10708 LEAQ 3(AX)(DI*1), DI 10709 CMPQ DI, (SP) 10710 JL match_dst_size_check_encodeSnappyBlockAsm12B 10711 MOVQ $0x00000000, ret+48(FP) 10712 RET 10713 10714match_dst_size_check_encodeSnappyBlockAsm12B: 10715 MOVL CX, DI 10716 MOVL 12(SP), R8 10717 CMPL R8, DI 10718 JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B 10719 MOVL DI, R9 10720 MOVL DI, 12(SP) 10721 LEAQ (DX)(R8*1), DI 10722 SUBL R8, R9 10723 LEAL -1(R9), R8 10724 CMPL R8, $0x3c 10725 JLT one_byte_match_emit_encodeSnappyBlockAsm12B 10726 CMPL R8, $0x00000100 10727 JLT two_bytes_match_emit_encodeSnappyBlockAsm12B 10728 MOVB $0xf4, (AX) 10729 MOVW R8, 1(AX) 10730 ADDQ $0x03, AX 10731 JMP memmove_long_match_emit_encodeSnappyBlockAsm12B 10732 10733two_bytes_match_emit_encodeSnappyBlockAsm12B: 10734 MOVB $0xf0, (AX) 10735 MOVB R8, 1(AX) 10736 ADDQ $0x02, AX 10737 CMPL R8, $0x40 10738 JL memmove_match_emit_encodeSnappyBlockAsm12B 10739 JMP memmove_long_match_emit_encodeSnappyBlockAsm12B 10740 10741one_byte_match_emit_encodeSnappyBlockAsm12B: 10742 SHLB $0x02, R8 10743 MOVB R8, (AX) 10744 ADDQ $0x01, AX 10745 10746memmove_match_emit_encodeSnappyBlockAsm12B: 10747 LEAQ (AX)(R9*1), R8 10748 10749 // genMemMoveShort 10750 CMPQ R9, $0x08 10751 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 10752 CMPQ R9, $0x10 10753 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 10754 CMPQ R9, $0x20 10755 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 10756 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 10757 10758emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: 10759 MOVQ (DI), R10 10760 MOVQ R10, (AX) 10761 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B 10762 10763emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: 10764 MOVQ (DI), R10 10765 MOVQ -8(DI)(R9*1), DI 10766 MOVQ R10, (AX) 10767 MOVQ DI, -8(AX)(R9*1) 10768 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B 10769 10770emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: 10771 MOVOU (DI), X0 10772 MOVOU -16(DI)(R9*1), X1 10773 MOVOU X0, (AX) 10774 MOVOU X1, -16(AX)(R9*1) 10775 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B 10776 10777emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: 10778 MOVOU (DI), X0 10779 MOVOU 16(DI), X1 10780 MOVOU -32(DI)(R9*1), X2 10781 MOVOU -16(DI)(R9*1), X3 10782 MOVOU X0, (AX) 10783 MOVOU X1, 16(AX) 10784 MOVOU X2, -32(AX)(R9*1) 10785 MOVOU X3, -16(AX)(R9*1) 10786 10787memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: 10788 MOVQ R8, AX 10789 JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B 10790 10791memmove_long_match_emit_encodeSnappyBlockAsm12B: 10792 LEAQ (AX)(R9*1), R8 10793 10794 // genMemMoveLong 10795 MOVOU (DI), X0 10796 MOVOU 16(DI), X1 10797 MOVOU -32(DI)(R9*1), X2 10798 MOVOU -16(DI)(R9*1), X3 10799 MOVQ R9, R11 10800 SHRQ $0x05, R11 10801 MOVQ AX, R10 10802 ANDL $0x0000001f, R10 10803 MOVQ $0x00000040, R12 10804 SUBQ R10, R12 10805 DECQ R11 10806 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10807 LEAQ -32(DI)(R12*1), R10 10808 LEAQ -32(AX)(R12*1), R13 10809 10810emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: 10811 MOVOU (R10), X4 10812 MOVOU 16(R10), X5 10813 MOVOA X4, (R13) 10814 MOVOA X5, 16(R13) 10815 ADDQ $0x20, R13 10816 ADDQ $0x20, R10 10817 ADDQ $0x20, R12 10818 DECQ R11 10819 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back 10820 10821emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: 10822 MOVOU -32(DI)(R12*1), X4 10823 MOVOU -16(DI)(R12*1), X5 10824 MOVOA X4, -32(AX)(R12*1) 10825 MOVOA X5, -16(AX)(R12*1) 10826 ADDQ $0x20, R12 10827 CMPQ R9, R12 10828 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10829 MOVOU X0, (AX) 10830 MOVOU X1, 16(AX) 10831 MOVOU X2, -32(AX)(R9*1) 10832 MOVOU X3, -16(AX)(R9*1) 10833 MOVQ R8, AX 10834 10835emit_literal_done_match_emit_encodeSnappyBlockAsm12B: 10836match_nolit_loop_encodeSnappyBlockAsm12B: 10837 MOVL CX, DI 10838 SUBL SI, DI 10839 MOVL DI, 16(SP) 10840 ADDL $0x04, CX 10841 ADDL $0x04, SI 10842 MOVQ src_len+32(FP), DI 10843 SUBL CX, DI 10844 LEAQ (DX)(CX*1), R8 10845 LEAQ (DX)(SI*1), SI 10846 10847 // matchLen 10848 XORL R10, R10 10849 CMPL DI, $0x08 10850 JL matchlen_single_match_nolit_encodeSnappyBlockAsm12B 10851 10852matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: 10853 MOVQ (R8)(R10*1), R9 10854 XORQ (SI)(R10*1), R9 10855 TESTQ R9, R9 10856 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B 10857 BSFQ R9, R9 10858 SARQ $0x03, R9 10859 LEAL (R10)(R9*1), R10 10860 JMP match_nolit_end_encodeSnappyBlockAsm12B 10861 10862matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: 10863 LEAL -8(DI), DI 10864 LEAL 8(R10), R10 10865 CMPL DI, $0x08 10866 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B 10867 10868matchlen_single_match_nolit_encodeSnappyBlockAsm12B: 10869 TESTL DI, DI 10870 JZ match_nolit_end_encodeSnappyBlockAsm12B 10871 10872matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B: 10873 MOVB (R8)(R10*1), R9 10874 CMPB (SI)(R10*1), R9 10875 JNE match_nolit_end_encodeSnappyBlockAsm12B 10876 LEAL 1(R10), R10 10877 DECL DI 10878 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B 10879 10880match_nolit_end_encodeSnappyBlockAsm12B: 10881 ADDL R10, CX 10882 MOVL 16(SP), SI 10883 ADDL $0x04, R10 10884 MOVL CX, 12(SP) 10885 10886 // emitCopy 10887two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: 10888 CMPL R10, $0x40 10889 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B 10890 MOVB $0xee, (AX) 10891 MOVW SI, 1(AX) 10892 LEAL -60(R10), R10 10893 ADDQ $0x03, AX 10894 JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B 10895 10896two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: 10897 CMPL R10, $0x0c 10898 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B 10899 CMPL SI, $0x00000800 10900 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B 10901 MOVB $0x01, BL 10902 LEAL -16(BX)(R10*4), R10 10903 MOVB SI, 1(AX) 10904 SHRL $0x08, SI 10905 SHLL $0x05, SI 10906 ORL SI, R10 10907 MOVB R10, (AX) 10908 ADDQ $0x02, AX 10909 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B 10910 10911emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: 10912 MOVB $0x02, BL 10913 LEAL -4(BX)(R10*4), R10 10914 MOVB R10, (AX) 10915 MOVW SI, 1(AX) 10916 ADDQ $0x03, AX 10917 10918match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: 10919 CMPL CX, 8(SP) 10920 JGE emit_remainder_encodeSnappyBlockAsm12B 10921 MOVQ -2(DX)(CX*1), DI 10922 CMPQ AX, (SP) 10923 JL match_nolit_dst_ok_encodeSnappyBlockAsm12B 10924 MOVQ $0x00000000, ret+48(FP) 10925 RET 10926 10927match_nolit_dst_ok_encodeSnappyBlockAsm12B: 10928 MOVQ $0x000000cf1bbcdcbb, R9 10929 MOVQ DI, R8 10930 SHRQ $0x10, DI 10931 MOVQ DI, SI 10932 SHLQ $0x18, R8 10933 IMULQ R9, R8 10934 SHRQ $0x34, R8 10935 SHLQ $0x18, SI 10936 IMULQ R9, SI 10937 SHRQ $0x34, SI 10938 LEAL -2(CX), R9 10939 LEAQ 24(SP)(SI*4), R10 10940 MOVL (R10), SI 10941 MOVL R9, 24(SP)(R8*4) 10942 MOVL CX, (R10) 10943 CMPL (DX)(SI*1), DI 10944 JEQ match_nolit_loop_encodeSnappyBlockAsm12B 10945 INCL CX 10946 JMP search_loop_encodeSnappyBlockAsm12B 10947 10948emit_remainder_encodeSnappyBlockAsm12B: 10949 MOVQ src_len+32(FP), CX 10950 SUBL 12(SP), CX 10951 LEAQ 3(AX)(CX*1), CX 10952 CMPQ CX, (SP) 10953 JL emit_remainder_ok_encodeSnappyBlockAsm12B 10954 MOVQ $0x00000000, ret+48(FP) 10955 RET 10956 10957emit_remainder_ok_encodeSnappyBlockAsm12B: 10958 MOVQ src_len+32(FP), CX 10959 MOVL 12(SP), BX 10960 CMPL BX, CX 10961 JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B 10962 MOVL CX, SI 10963 MOVL CX, 12(SP) 10964 LEAQ (DX)(BX*1), CX 10965 SUBL BX, SI 10966 LEAL -1(SI), DX 10967 CMPL DX, $0x3c 10968 JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B 10969 CMPL DX, $0x00000100 10970 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B 10971 MOVB $0xf4, (AX) 10972 MOVW DX, 1(AX) 10973 ADDQ $0x03, AX 10974 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B 10975 10976two_bytes_emit_remainder_encodeSnappyBlockAsm12B: 10977 MOVB $0xf0, (AX) 10978 MOVB DL, 1(AX) 10979 ADDQ $0x02, AX 10980 CMPL DX, $0x40 10981 JL memmove_emit_remainder_encodeSnappyBlockAsm12B 10982 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B 10983 10984one_byte_emit_remainder_encodeSnappyBlockAsm12B: 10985 SHLB $0x02, DL 10986 MOVB DL, (AX) 10987 ADDQ $0x01, AX 10988 10989memmove_emit_remainder_encodeSnappyBlockAsm12B: 10990 LEAQ (AX)(SI*1), DX 10991 MOVL SI, BX 10992 10993 // genMemMoveShort 10994 CMPQ BX, $0x08 10995 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8 10996 CMPQ BX, $0x10 10997 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16 10998 CMPQ BX, $0x20 10999 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32 11000 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 11001 11002emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8: 11003 MOVQ (CX), SI 11004 MOVQ SI, (AX) 11005 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B 11006 11007emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: 11008 MOVQ (CX), SI 11009 MOVQ -8(CX)(BX*1), CX 11010 MOVQ SI, (AX) 11011 MOVQ CX, -8(AX)(BX*1) 11012 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B 11013 11014emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: 11015 MOVOU (CX), X0 11016 MOVOU -16(CX)(BX*1), X1 11017 MOVOU X0, (AX) 11018 MOVOU X1, -16(AX)(BX*1) 11019 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B 11020 11021emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: 11022 MOVOU (CX), X0 11023 MOVOU 16(CX), X1 11024 MOVOU -32(CX)(BX*1), X2 11025 MOVOU -16(CX)(BX*1), X3 11026 MOVOU X0, (AX) 11027 MOVOU X1, 16(AX) 11028 MOVOU X2, -32(AX)(BX*1) 11029 MOVOU X3, -16(AX)(BX*1) 11030 11031memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: 11032 MOVQ DX, AX 11033 JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B 11034 11035memmove_long_emit_remainder_encodeSnappyBlockAsm12B: 11036 LEAQ (AX)(SI*1), DX 11037 MOVL SI, BX 11038 11039 // genMemMoveLong 11040 MOVOU (CX), X0 11041 MOVOU 16(CX), X1 11042 MOVOU -32(CX)(BX*1), X2 11043 MOVOU -16(CX)(BX*1), X3 11044 MOVQ BX, DI 11045 SHRQ $0x05, DI 11046 MOVQ AX, SI 11047 ANDL $0x0000001f, SI 11048 MOVQ $0x00000040, R8 11049 SUBQ SI, R8 11050 DECQ DI 11051 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 11052 LEAQ -32(CX)(R8*1), SI 11053 LEAQ -32(AX)(R8*1), R9 11054 11055emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: 11056 MOVOU (SI), X4 11057 MOVOU 16(SI), X5 11058 MOVOA X4, (R9) 11059 MOVOA X5, 16(R9) 11060 ADDQ $0x20, R9 11061 ADDQ $0x20, SI 11062 ADDQ $0x20, R8 11063 DECQ DI 11064 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back 11065 11066emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: 11067 MOVOU -32(CX)(R8*1), X4 11068 MOVOU -16(CX)(R8*1), X5 11069 MOVOA X4, -32(AX)(R8*1) 11070 MOVOA X5, -16(AX)(R8*1) 11071 ADDQ $0x20, R8 11072 CMPQ BX, R8 11073 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 11074 MOVOU X0, (AX) 11075 MOVOU X1, 16(AX) 11076 MOVOU X2, -32(AX)(BX*1) 11077 MOVOU X3, -16(AX)(BX*1) 11078 MOVQ DX, AX 11079 11080emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: 11081 MOVQ dst_base+0(FP), CX 11082 SUBQ CX, AX 11083 MOVQ AX, ret+48(FP) 11084 RET 11085 11086// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int 11087// Requires: SSE2 11088TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 11089 MOVQ dst_base+0(FP), AX 11090 MOVQ $0x00000020, CX 11091 LEAQ 24(SP), DX 11092 PXOR X0, X0 11093 11094zero_loop_encodeSnappyBlockAsm10B: 11095 MOVOU X0, (DX) 11096 MOVOU X0, 16(DX) 11097 MOVOU X0, 32(DX) 11098 MOVOU X0, 48(DX) 11099 MOVOU X0, 64(DX) 11100 MOVOU X0, 80(DX) 11101 MOVOU X0, 96(DX) 11102 MOVOU X0, 112(DX) 11103 ADDQ $0x80, DX 11104 DECQ CX 11105 JNZ zero_loop_encodeSnappyBlockAsm10B 11106 MOVL $0x00000000, 12(SP) 11107 MOVQ src_len+32(FP), CX 11108 LEAQ -9(CX), DX 11109 LEAQ -8(CX), SI 11110 MOVL SI, 8(SP) 11111 SHRQ $0x05, CX 11112 SUBL CX, DX 11113 LEAQ (AX)(DX*1), DX 11114 MOVQ DX, (SP) 11115 MOVL $0x00000001, CX 11116 MOVL CX, 16(SP) 11117 MOVQ src_base+24(FP), DX 11118 11119search_loop_encodeSnappyBlockAsm10B: 11120 MOVL CX, SI 11121 SUBL 12(SP), SI 11122 SHRL $0x05, SI 11123 LEAL 4(CX)(SI*1), SI 11124 CMPL SI, 8(SP) 11125 JGE emit_remainder_encodeSnappyBlockAsm10B 11126 MOVQ (DX)(CX*1), DI 11127 MOVL SI, 20(SP) 11128 MOVQ $0x9e3779b1, R9 11129 MOVQ DI, R10 11130 MOVQ DI, R11 11131 SHRQ $0x08, R11 11132 SHLQ $0x20, R10 11133 IMULQ R9, R10 11134 SHRQ $0x36, R10 11135 SHLQ $0x20, R11 11136 IMULQ R9, R11 11137 SHRQ $0x36, R11 11138 MOVL 24(SP)(R10*4), SI 11139 MOVL 24(SP)(R11*4), R8 11140 MOVL CX, 24(SP)(R10*4) 11141 LEAL 1(CX), R10 11142 MOVL R10, 24(SP)(R11*4) 11143 MOVQ DI, R10 11144 SHRQ $0x10, R10 11145 SHLQ $0x20, R10 11146 IMULQ R9, R10 11147 SHRQ $0x36, R10 11148 MOVL CX, R9 11149 SUBL 16(SP), R9 11150 MOVL 1(DX)(R9*1), R11 11151 MOVQ DI, R9 11152 SHRQ $0x08, R9 11153 CMPL R9, R11 11154 JNE no_repeat_found_encodeSnappyBlockAsm10B 11155 LEAL 1(CX), DI 11156 MOVL 12(SP), SI 11157 MOVL DI, R8 11158 SUBL 16(SP), R8 11159 JZ repeat_extend_back_end_encodeSnappyBlockAsm10B 11160 11161repeat_extend_back_loop_encodeSnappyBlockAsm10B: 11162 CMPL DI, SI 11163 JLE repeat_extend_back_end_encodeSnappyBlockAsm10B 11164 MOVB -1(DX)(R8*1), BL 11165 MOVB -1(DX)(DI*1), R9 11166 CMPB BL, R9 11167 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B 11168 LEAL -1(DI), DI 11169 DECL R8 11170 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B 11171 11172repeat_extend_back_end_encodeSnappyBlockAsm10B: 11173 MOVL 12(SP), SI 11174 CMPL SI, DI 11175 JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B 11176 MOVL DI, R8 11177 MOVL DI, 12(SP) 11178 LEAQ (DX)(SI*1), R9 11179 SUBL SI, R8 11180 LEAL -1(R8), SI 11181 CMPL SI, $0x3c 11182 JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B 11183 CMPL SI, $0x00000100 11184 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B 11185 MOVB $0xf4, (AX) 11186 MOVW SI, 1(AX) 11187 ADDQ $0x03, AX 11188 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B 11189 11190two_bytes_repeat_emit_encodeSnappyBlockAsm10B: 11191 MOVB $0xf0, (AX) 11192 MOVB SI, 1(AX) 11193 ADDQ $0x02, AX 11194 CMPL SI, $0x40 11195 JL memmove_repeat_emit_encodeSnappyBlockAsm10B 11196 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B 11197 11198one_byte_repeat_emit_encodeSnappyBlockAsm10B: 11199 SHLB $0x02, SI 11200 MOVB SI, (AX) 11201 ADDQ $0x01, AX 11202 11203memmove_repeat_emit_encodeSnappyBlockAsm10B: 11204 LEAQ (AX)(R8*1), SI 11205 11206 // genMemMoveShort 11207 CMPQ R8, $0x08 11208 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 11209 CMPQ R8, $0x10 11210 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 11211 CMPQ R8, $0x20 11212 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 11213 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 11214 11215emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: 11216 MOVQ (R9), R10 11217 MOVQ R10, (AX) 11218 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B 11219 11220emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: 11221 MOVQ (R9), R10 11222 MOVQ -8(R9)(R8*1), R9 11223 MOVQ R10, (AX) 11224 MOVQ R9, -8(AX)(R8*1) 11225 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B 11226 11227emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: 11228 MOVOU (R9), X0 11229 MOVOU -16(R9)(R8*1), X1 11230 MOVOU X0, (AX) 11231 MOVOU X1, -16(AX)(R8*1) 11232 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B 11233 11234emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: 11235 MOVOU (R9), X0 11236 MOVOU 16(R9), X1 11237 MOVOU -32(R9)(R8*1), X2 11238 MOVOU -16(R9)(R8*1), X3 11239 MOVOU X0, (AX) 11240 MOVOU X1, 16(AX) 11241 MOVOU X2, -32(AX)(R8*1) 11242 MOVOU X3, -16(AX)(R8*1) 11243 11244memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: 11245 MOVQ SI, AX 11246 JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B 11247 11248memmove_long_repeat_emit_encodeSnappyBlockAsm10B: 11249 LEAQ (AX)(R8*1), SI 11250 11251 // genMemMoveLong 11252 MOVOU (R9), X0 11253 MOVOU 16(R9), X1 11254 MOVOU -32(R9)(R8*1), X2 11255 MOVOU -16(R9)(R8*1), X3 11256 MOVQ R8, R11 11257 SHRQ $0x05, R11 11258 MOVQ AX, R10 11259 ANDL $0x0000001f, R10 11260 MOVQ $0x00000040, R12 11261 SUBQ R10, R12 11262 DECQ R11 11263 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11264 LEAQ -32(R9)(R12*1), R10 11265 LEAQ -32(AX)(R12*1), R13 11266 11267emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: 11268 MOVOU (R10), X4 11269 MOVOU 16(R10), X5 11270 MOVOA X4, (R13) 11271 MOVOA X5, 16(R13) 11272 ADDQ $0x20, R13 11273 ADDQ $0x20, R10 11274 ADDQ $0x20, R12 11275 DECQ R11 11276 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back 11277 11278emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: 11279 MOVOU -32(R9)(R12*1), X4 11280 MOVOU -16(R9)(R12*1), X5 11281 MOVOA X4, -32(AX)(R12*1) 11282 MOVOA X5, -16(AX)(R12*1) 11283 ADDQ $0x20, R12 11284 CMPQ R8, R12 11285 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11286 MOVOU X0, (AX) 11287 MOVOU X1, 16(AX) 11288 MOVOU X2, -32(AX)(R8*1) 11289 MOVOU X3, -16(AX)(R8*1) 11290 MOVQ SI, AX 11291 11292emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: 11293 ADDL $0x05, CX 11294 MOVL CX, SI 11295 SUBL 16(SP), SI 11296 MOVQ src_len+32(FP), R8 11297 SUBL CX, R8 11298 LEAQ (DX)(CX*1), R9 11299 LEAQ (DX)(SI*1), SI 11300 11301 // matchLen 11302 XORL R11, R11 11303 CMPL R8, $0x08 11304 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm10B 11305 11306matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: 11307 MOVQ (R9)(R11*1), R10 11308 XORQ (SI)(R11*1), R10 11309 TESTQ R10, R10 11310 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B 11311 BSFQ R10, R10 11312 SARQ $0x03, R10 11313 LEAL (R11)(R10*1), R11 11314 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B 11315 11316matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: 11317 LEAL -8(R8), R8 11318 LEAL 8(R11), R11 11319 CMPL R8, $0x08 11320 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B 11321 11322matchlen_single_repeat_extend_encodeSnappyBlockAsm10B: 11323 TESTL R8, R8 11324 JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B 11325 11326matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B: 11327 MOVB (R9)(R11*1), R10 11328 CMPB (SI)(R11*1), R10 11329 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B 11330 LEAL 1(R11), R11 11331 DECL R8 11332 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B 11333 11334repeat_extend_forward_end_encodeSnappyBlockAsm10B: 11335 ADDL R11, CX 11336 MOVL CX, SI 11337 SUBL DI, SI 11338 MOVL 16(SP), DI 11339 11340 // emitCopy 11341two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: 11342 CMPL SI, $0x40 11343 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B 11344 MOVB $0xee, (AX) 11345 MOVW DI, 1(AX) 11346 LEAL -60(SI), SI 11347 ADDQ $0x03, AX 11348 JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B 11349 11350two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: 11351 CMPL SI, $0x0c 11352 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B 11353 CMPL DI, $0x00000800 11354 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B 11355 MOVB $0x01, BL 11356 LEAL -16(BX)(SI*4), SI 11357 MOVB DI, 1(AX) 11358 SHRL $0x08, DI 11359 SHLL $0x05, DI 11360 ORL DI, SI 11361 MOVB SI, (AX) 11362 ADDQ $0x02, AX 11363 JMP repeat_end_emit_encodeSnappyBlockAsm10B 11364 11365emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: 11366 MOVB $0x02, BL 11367 LEAL -4(BX)(SI*4), SI 11368 MOVB SI, (AX) 11369 MOVW DI, 1(AX) 11370 ADDQ $0x03, AX 11371 11372repeat_end_emit_encodeSnappyBlockAsm10B: 11373 MOVL CX, 12(SP) 11374 JMP search_loop_encodeSnappyBlockAsm10B 11375 11376no_repeat_found_encodeSnappyBlockAsm10B: 11377 CMPL (DX)(SI*1), DI 11378 JEQ candidate_match_encodeSnappyBlockAsm10B 11379 SHRQ $0x08, DI 11380 MOVL 24(SP)(R10*4), SI 11381 LEAL 2(CX), R9 11382 CMPL (DX)(R8*1), DI 11383 JEQ candidate2_match_encodeSnappyBlockAsm10B 11384 MOVL R9, 24(SP)(R10*4) 11385 SHRQ $0x08, DI 11386 CMPL (DX)(SI*1), DI 11387 JEQ candidate3_match_encodeSnappyBlockAsm10B 11388 MOVL 20(SP), CX 11389 JMP search_loop_encodeSnappyBlockAsm10B 11390 11391candidate3_match_encodeSnappyBlockAsm10B: 11392 ADDL $0x02, CX 11393 JMP candidate_match_encodeSnappyBlockAsm10B 11394 11395candidate2_match_encodeSnappyBlockAsm10B: 11396 MOVL R9, 24(SP)(R10*4) 11397 INCL CX 11398 MOVL R8, SI 11399 11400candidate_match_encodeSnappyBlockAsm10B: 11401 MOVL 12(SP), DI 11402 TESTL SI, SI 11403 JZ match_extend_back_end_encodeSnappyBlockAsm10B 11404 11405match_extend_back_loop_encodeSnappyBlockAsm10B: 11406 CMPL CX, DI 11407 JLE match_extend_back_end_encodeSnappyBlockAsm10B 11408 MOVB -1(DX)(SI*1), BL 11409 MOVB -1(DX)(CX*1), R8 11410 CMPB BL, R8 11411 JNE match_extend_back_end_encodeSnappyBlockAsm10B 11412 LEAL -1(CX), CX 11413 DECL SI 11414 JZ match_extend_back_end_encodeSnappyBlockAsm10B 11415 JMP match_extend_back_loop_encodeSnappyBlockAsm10B 11416 11417match_extend_back_end_encodeSnappyBlockAsm10B: 11418 MOVL CX, DI 11419 SUBL 12(SP), DI 11420 LEAQ 3(AX)(DI*1), DI 11421 CMPQ DI, (SP) 11422 JL match_dst_size_check_encodeSnappyBlockAsm10B 11423 MOVQ $0x00000000, ret+48(FP) 11424 RET 11425 11426match_dst_size_check_encodeSnappyBlockAsm10B: 11427 MOVL CX, DI 11428 MOVL 12(SP), R8 11429 CMPL R8, DI 11430 JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B 11431 MOVL DI, R9 11432 MOVL DI, 12(SP) 11433 LEAQ (DX)(R8*1), DI 11434 SUBL R8, R9 11435 LEAL -1(R9), R8 11436 CMPL R8, $0x3c 11437 JLT one_byte_match_emit_encodeSnappyBlockAsm10B 11438 CMPL R8, $0x00000100 11439 JLT two_bytes_match_emit_encodeSnappyBlockAsm10B 11440 MOVB $0xf4, (AX) 11441 MOVW R8, 1(AX) 11442 ADDQ $0x03, AX 11443 JMP memmove_long_match_emit_encodeSnappyBlockAsm10B 11444 11445two_bytes_match_emit_encodeSnappyBlockAsm10B: 11446 MOVB $0xf0, (AX) 11447 MOVB R8, 1(AX) 11448 ADDQ $0x02, AX 11449 CMPL R8, $0x40 11450 JL memmove_match_emit_encodeSnappyBlockAsm10B 11451 JMP memmove_long_match_emit_encodeSnappyBlockAsm10B 11452 11453one_byte_match_emit_encodeSnappyBlockAsm10B: 11454 SHLB $0x02, R8 11455 MOVB R8, (AX) 11456 ADDQ $0x01, AX 11457 11458memmove_match_emit_encodeSnappyBlockAsm10B: 11459 LEAQ (AX)(R9*1), R8 11460 11461 // genMemMoveShort 11462 CMPQ R9, $0x08 11463 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 11464 CMPQ R9, $0x10 11465 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 11466 CMPQ R9, $0x20 11467 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 11468 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 11469 11470emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: 11471 MOVQ (DI), R10 11472 MOVQ R10, (AX) 11473 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B 11474 11475emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: 11476 MOVQ (DI), R10 11477 MOVQ -8(DI)(R9*1), DI 11478 MOVQ R10, (AX) 11479 MOVQ DI, -8(AX)(R9*1) 11480 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B 11481 11482emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: 11483 MOVOU (DI), X0 11484 MOVOU -16(DI)(R9*1), X1 11485 MOVOU X0, (AX) 11486 MOVOU X1, -16(AX)(R9*1) 11487 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B 11488 11489emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: 11490 MOVOU (DI), X0 11491 MOVOU 16(DI), X1 11492 MOVOU -32(DI)(R9*1), X2 11493 MOVOU -16(DI)(R9*1), X3 11494 MOVOU X0, (AX) 11495 MOVOU X1, 16(AX) 11496 MOVOU X2, -32(AX)(R9*1) 11497 MOVOU X3, -16(AX)(R9*1) 11498 11499memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: 11500 MOVQ R8, AX 11501 JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B 11502 11503memmove_long_match_emit_encodeSnappyBlockAsm10B: 11504 LEAQ (AX)(R9*1), R8 11505 11506 // genMemMoveLong 11507 MOVOU (DI), X0 11508 MOVOU 16(DI), X1 11509 MOVOU -32(DI)(R9*1), X2 11510 MOVOU -16(DI)(R9*1), X3 11511 MOVQ R9, R11 11512 SHRQ $0x05, R11 11513 MOVQ AX, R10 11514 ANDL $0x0000001f, R10 11515 MOVQ $0x00000040, R12 11516 SUBQ R10, R12 11517 DECQ R11 11518 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11519 LEAQ -32(DI)(R12*1), R10 11520 LEAQ -32(AX)(R12*1), R13 11521 11522emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: 11523 MOVOU (R10), X4 11524 MOVOU 16(R10), X5 11525 MOVOA X4, (R13) 11526 MOVOA X5, 16(R13) 11527 ADDQ $0x20, R13 11528 ADDQ $0x20, R10 11529 ADDQ $0x20, R12 11530 DECQ R11 11531 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back 11532 11533emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: 11534 MOVOU -32(DI)(R12*1), X4 11535 MOVOU -16(DI)(R12*1), X5 11536 MOVOA X4, -32(AX)(R12*1) 11537 MOVOA X5, -16(AX)(R12*1) 11538 ADDQ $0x20, R12 11539 CMPQ R9, R12 11540 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11541 MOVOU X0, (AX) 11542 MOVOU X1, 16(AX) 11543 MOVOU X2, -32(AX)(R9*1) 11544 MOVOU X3, -16(AX)(R9*1) 11545 MOVQ R8, AX 11546 11547emit_literal_done_match_emit_encodeSnappyBlockAsm10B: 11548match_nolit_loop_encodeSnappyBlockAsm10B: 11549 MOVL CX, DI 11550 SUBL SI, DI 11551 MOVL DI, 16(SP) 11552 ADDL $0x04, CX 11553 ADDL $0x04, SI 11554 MOVQ src_len+32(FP), DI 11555 SUBL CX, DI 11556 LEAQ (DX)(CX*1), R8 11557 LEAQ (DX)(SI*1), SI 11558 11559 // matchLen 11560 XORL R10, R10 11561 CMPL DI, $0x08 11562 JL matchlen_single_match_nolit_encodeSnappyBlockAsm10B 11563 11564matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: 11565 MOVQ (R8)(R10*1), R9 11566 XORQ (SI)(R10*1), R9 11567 TESTQ R9, R9 11568 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B 11569 BSFQ R9, R9 11570 SARQ $0x03, R9 11571 LEAL (R10)(R9*1), R10 11572 JMP match_nolit_end_encodeSnappyBlockAsm10B 11573 11574matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: 11575 LEAL -8(DI), DI 11576 LEAL 8(R10), R10 11577 CMPL DI, $0x08 11578 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B 11579 11580matchlen_single_match_nolit_encodeSnappyBlockAsm10B: 11581 TESTL DI, DI 11582 JZ match_nolit_end_encodeSnappyBlockAsm10B 11583 11584matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B: 11585 MOVB (R8)(R10*1), R9 11586 CMPB (SI)(R10*1), R9 11587 JNE match_nolit_end_encodeSnappyBlockAsm10B 11588 LEAL 1(R10), R10 11589 DECL DI 11590 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B 11591 11592match_nolit_end_encodeSnappyBlockAsm10B: 11593 ADDL R10, CX 11594 MOVL 16(SP), SI 11595 ADDL $0x04, R10 11596 MOVL CX, 12(SP) 11597 11598 // emitCopy 11599two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: 11600 CMPL R10, $0x40 11601 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B 11602 MOVB $0xee, (AX) 11603 MOVW SI, 1(AX) 11604 LEAL -60(R10), R10 11605 ADDQ $0x03, AX 11606 JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B 11607 11608two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: 11609 CMPL R10, $0x0c 11610 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B 11611 CMPL SI, $0x00000800 11612 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B 11613 MOVB $0x01, BL 11614 LEAL -16(BX)(R10*4), R10 11615 MOVB SI, 1(AX) 11616 SHRL $0x08, SI 11617 SHLL $0x05, SI 11618 ORL SI, R10 11619 MOVB R10, (AX) 11620 ADDQ $0x02, AX 11621 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B 11622 11623emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: 11624 MOVB $0x02, BL 11625 LEAL -4(BX)(R10*4), R10 11626 MOVB R10, (AX) 11627 MOVW SI, 1(AX) 11628 ADDQ $0x03, AX 11629 11630match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: 11631 CMPL CX, 8(SP) 11632 JGE emit_remainder_encodeSnappyBlockAsm10B 11633 MOVQ -2(DX)(CX*1), DI 11634 CMPQ AX, (SP) 11635 JL match_nolit_dst_ok_encodeSnappyBlockAsm10B 11636 MOVQ $0x00000000, ret+48(FP) 11637 RET 11638 11639match_nolit_dst_ok_encodeSnappyBlockAsm10B: 11640 MOVQ $0x9e3779b1, R9 11641 MOVQ DI, R8 11642 SHRQ $0x10, DI 11643 MOVQ DI, SI 11644 SHLQ $0x20, R8 11645 IMULQ R9, R8 11646 SHRQ $0x36, R8 11647 SHLQ $0x20, SI 11648 IMULQ R9, SI 11649 SHRQ $0x36, SI 11650 LEAL -2(CX), R9 11651 LEAQ 24(SP)(SI*4), R10 11652 MOVL (R10), SI 11653 MOVL R9, 24(SP)(R8*4) 11654 MOVL CX, (R10) 11655 CMPL (DX)(SI*1), DI 11656 JEQ match_nolit_loop_encodeSnappyBlockAsm10B 11657 INCL CX 11658 JMP search_loop_encodeSnappyBlockAsm10B 11659 11660emit_remainder_encodeSnappyBlockAsm10B: 11661 MOVQ src_len+32(FP), CX 11662 SUBL 12(SP), CX 11663 LEAQ 3(AX)(CX*1), CX 11664 CMPQ CX, (SP) 11665 JL emit_remainder_ok_encodeSnappyBlockAsm10B 11666 MOVQ $0x00000000, ret+48(FP) 11667 RET 11668 11669emit_remainder_ok_encodeSnappyBlockAsm10B: 11670 MOVQ src_len+32(FP), CX 11671 MOVL 12(SP), BX 11672 CMPL BX, CX 11673 JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B 11674 MOVL CX, SI 11675 MOVL CX, 12(SP) 11676 LEAQ (DX)(BX*1), CX 11677 SUBL BX, SI 11678 LEAL -1(SI), DX 11679 CMPL DX, $0x3c 11680 JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B 11681 CMPL DX, $0x00000100 11682 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B 11683 MOVB $0xf4, (AX) 11684 MOVW DX, 1(AX) 11685 ADDQ $0x03, AX 11686 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B 11687 11688two_bytes_emit_remainder_encodeSnappyBlockAsm10B: 11689 MOVB $0xf0, (AX) 11690 MOVB DL, 1(AX) 11691 ADDQ $0x02, AX 11692 CMPL DX, $0x40 11693 JL memmove_emit_remainder_encodeSnappyBlockAsm10B 11694 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B 11695 11696one_byte_emit_remainder_encodeSnappyBlockAsm10B: 11697 SHLB $0x02, DL 11698 MOVB DL, (AX) 11699 ADDQ $0x01, AX 11700 11701memmove_emit_remainder_encodeSnappyBlockAsm10B: 11702 LEAQ (AX)(SI*1), DX 11703 MOVL SI, BX 11704 11705 // genMemMoveShort 11706 CMPQ BX, $0x08 11707 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8 11708 CMPQ BX, $0x10 11709 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16 11710 CMPQ BX, $0x20 11711 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32 11712 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 11713 11714emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8: 11715 MOVQ (CX), SI 11716 MOVQ SI, (AX) 11717 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B 11718 11719emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: 11720 MOVQ (CX), SI 11721 MOVQ -8(CX)(BX*1), CX 11722 MOVQ SI, (AX) 11723 MOVQ CX, -8(AX)(BX*1) 11724 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B 11725 11726emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: 11727 MOVOU (CX), X0 11728 MOVOU -16(CX)(BX*1), X1 11729 MOVOU X0, (AX) 11730 MOVOU X1, -16(AX)(BX*1) 11731 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B 11732 11733emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: 11734 MOVOU (CX), X0 11735 MOVOU 16(CX), X1 11736 MOVOU -32(CX)(BX*1), X2 11737 MOVOU -16(CX)(BX*1), X3 11738 MOVOU X0, (AX) 11739 MOVOU X1, 16(AX) 11740 MOVOU X2, -32(AX)(BX*1) 11741 MOVOU X3, -16(AX)(BX*1) 11742 11743memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: 11744 MOVQ DX, AX 11745 JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B 11746 11747memmove_long_emit_remainder_encodeSnappyBlockAsm10B: 11748 LEAQ (AX)(SI*1), DX 11749 MOVL SI, BX 11750 11751 // genMemMoveLong 11752 MOVOU (CX), X0 11753 MOVOU 16(CX), X1 11754 MOVOU -32(CX)(BX*1), X2 11755 MOVOU -16(CX)(BX*1), X3 11756 MOVQ BX, DI 11757 SHRQ $0x05, DI 11758 MOVQ AX, SI 11759 ANDL $0x0000001f, SI 11760 MOVQ $0x00000040, R8 11761 SUBQ SI, R8 11762 DECQ DI 11763 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11764 LEAQ -32(CX)(R8*1), SI 11765 LEAQ -32(AX)(R8*1), R9 11766 11767emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: 11768 MOVOU (SI), X4 11769 MOVOU 16(SI), X5 11770 MOVOA X4, (R9) 11771 MOVOA X5, 16(R9) 11772 ADDQ $0x20, R9 11773 ADDQ $0x20, SI 11774 ADDQ $0x20, R8 11775 DECQ DI 11776 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back 11777 11778emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: 11779 MOVOU -32(CX)(R8*1), X4 11780 MOVOU -16(CX)(R8*1), X5 11781 MOVOA X4, -32(AX)(R8*1) 11782 MOVOA X5, -16(AX)(R8*1) 11783 ADDQ $0x20, R8 11784 CMPQ BX, R8 11785 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11786 MOVOU X0, (AX) 11787 MOVOU X1, 16(AX) 11788 MOVOU X2, -32(AX)(BX*1) 11789 MOVOU X3, -16(AX)(BX*1) 11790 MOVQ DX, AX 11791 11792emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: 11793 MOVQ dst_base+0(FP), CX 11794 SUBQ CX, AX 11795 MOVQ AX, ret+48(FP) 11796 RET 11797 11798// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int 11799// Requires: SSE2 11800TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 11801 MOVQ dst_base+0(FP), AX 11802 MOVQ $0x00000008, CX 11803 LEAQ 24(SP), DX 11804 PXOR X0, X0 11805 11806zero_loop_encodeSnappyBlockAsm8B: 11807 MOVOU X0, (DX) 11808 MOVOU X0, 16(DX) 11809 MOVOU X0, 32(DX) 11810 MOVOU X0, 48(DX) 11811 MOVOU X0, 64(DX) 11812 MOVOU X0, 80(DX) 11813 MOVOU X0, 96(DX) 11814 MOVOU X0, 112(DX) 11815 ADDQ $0x80, DX 11816 DECQ CX 11817 JNZ zero_loop_encodeSnappyBlockAsm8B 11818 MOVL $0x00000000, 12(SP) 11819 MOVQ src_len+32(FP), CX 11820 LEAQ -9(CX), DX 11821 LEAQ -8(CX), SI 11822 MOVL SI, 8(SP) 11823 SHRQ $0x05, CX 11824 SUBL CX, DX 11825 LEAQ (AX)(DX*1), DX 11826 MOVQ DX, (SP) 11827 MOVL $0x00000001, CX 11828 MOVL CX, 16(SP) 11829 MOVQ src_base+24(FP), DX 11830 11831search_loop_encodeSnappyBlockAsm8B: 11832 MOVL CX, SI 11833 SUBL 12(SP), SI 11834 SHRL $0x04, SI 11835 LEAL 4(CX)(SI*1), SI 11836 CMPL SI, 8(SP) 11837 JGE emit_remainder_encodeSnappyBlockAsm8B 11838 MOVQ (DX)(CX*1), DI 11839 MOVL SI, 20(SP) 11840 MOVQ $0x9e3779b1, R9 11841 MOVQ DI, R10 11842 MOVQ DI, R11 11843 SHRQ $0x08, R11 11844 SHLQ $0x20, R10 11845 IMULQ R9, R10 11846 SHRQ $0x38, R10 11847 SHLQ $0x20, R11 11848 IMULQ R9, R11 11849 SHRQ $0x38, R11 11850 MOVL 24(SP)(R10*4), SI 11851 MOVL 24(SP)(R11*4), R8 11852 MOVL CX, 24(SP)(R10*4) 11853 LEAL 1(CX), R10 11854 MOVL R10, 24(SP)(R11*4) 11855 MOVQ DI, R10 11856 SHRQ $0x10, R10 11857 SHLQ $0x20, R10 11858 IMULQ R9, R10 11859 SHRQ $0x38, R10 11860 MOVL CX, R9 11861 SUBL 16(SP), R9 11862 MOVL 1(DX)(R9*1), R11 11863 MOVQ DI, R9 11864 SHRQ $0x08, R9 11865 CMPL R9, R11 11866 JNE no_repeat_found_encodeSnappyBlockAsm8B 11867 LEAL 1(CX), DI 11868 MOVL 12(SP), SI 11869 MOVL DI, R8 11870 SUBL 16(SP), R8 11871 JZ repeat_extend_back_end_encodeSnappyBlockAsm8B 11872 11873repeat_extend_back_loop_encodeSnappyBlockAsm8B: 11874 CMPL DI, SI 11875 JLE repeat_extend_back_end_encodeSnappyBlockAsm8B 11876 MOVB -1(DX)(R8*1), BL 11877 MOVB -1(DX)(DI*1), R9 11878 CMPB BL, R9 11879 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B 11880 LEAL -1(DI), DI 11881 DECL R8 11882 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B 11883 11884repeat_extend_back_end_encodeSnappyBlockAsm8B: 11885 MOVL 12(SP), SI 11886 CMPL SI, DI 11887 JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B 11888 MOVL DI, R8 11889 MOVL DI, 12(SP) 11890 LEAQ (DX)(SI*1), R9 11891 SUBL SI, R8 11892 LEAL -1(R8), SI 11893 CMPL SI, $0x3c 11894 JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B 11895 CMPL SI, $0x00000100 11896 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B 11897 MOVB $0xf4, (AX) 11898 MOVW SI, 1(AX) 11899 ADDQ $0x03, AX 11900 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B 11901 11902two_bytes_repeat_emit_encodeSnappyBlockAsm8B: 11903 MOVB $0xf0, (AX) 11904 MOVB SI, 1(AX) 11905 ADDQ $0x02, AX 11906 CMPL SI, $0x40 11907 JL memmove_repeat_emit_encodeSnappyBlockAsm8B 11908 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B 11909 11910one_byte_repeat_emit_encodeSnappyBlockAsm8B: 11911 SHLB $0x02, SI 11912 MOVB SI, (AX) 11913 ADDQ $0x01, AX 11914 11915memmove_repeat_emit_encodeSnappyBlockAsm8B: 11916 LEAQ (AX)(R8*1), SI 11917 11918 // genMemMoveShort 11919 CMPQ R8, $0x08 11920 JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 11921 CMPQ R8, $0x10 11922 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 11923 CMPQ R8, $0x20 11924 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 11925 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 11926 11927emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: 11928 MOVQ (R9), R10 11929 MOVQ R10, (AX) 11930 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B 11931 11932emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: 11933 MOVQ (R9), R10 11934 MOVQ -8(R9)(R8*1), R9 11935 MOVQ R10, (AX) 11936 MOVQ R9, -8(AX)(R8*1) 11937 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B 11938 11939emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: 11940 MOVOU (R9), X0 11941 MOVOU -16(R9)(R8*1), X1 11942 MOVOU X0, (AX) 11943 MOVOU X1, -16(AX)(R8*1) 11944 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B 11945 11946emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: 11947 MOVOU (R9), X0 11948 MOVOU 16(R9), X1 11949 MOVOU -32(R9)(R8*1), X2 11950 MOVOU -16(R9)(R8*1), X3 11951 MOVOU X0, (AX) 11952 MOVOU X1, 16(AX) 11953 MOVOU X2, -32(AX)(R8*1) 11954 MOVOU X3, -16(AX)(R8*1) 11955 11956memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: 11957 MOVQ SI, AX 11958 JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B 11959 11960memmove_long_repeat_emit_encodeSnappyBlockAsm8B: 11961 LEAQ (AX)(R8*1), SI 11962 11963 // genMemMoveLong 11964 MOVOU (R9), X0 11965 MOVOU 16(R9), X1 11966 MOVOU -32(R9)(R8*1), X2 11967 MOVOU -16(R9)(R8*1), X3 11968 MOVQ R8, R11 11969 SHRQ $0x05, R11 11970 MOVQ AX, R10 11971 ANDL $0x0000001f, R10 11972 MOVQ $0x00000040, R12 11973 SUBQ R10, R12 11974 DECQ R11 11975 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 11976 LEAQ -32(R9)(R12*1), R10 11977 LEAQ -32(AX)(R12*1), R13 11978 11979emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: 11980 MOVOU (R10), X4 11981 MOVOU 16(R10), X5 11982 MOVOA X4, (R13) 11983 MOVOA X5, 16(R13) 11984 ADDQ $0x20, R13 11985 ADDQ $0x20, R10 11986 ADDQ $0x20, R12 11987 DECQ R11 11988 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back 11989 11990emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: 11991 MOVOU -32(R9)(R12*1), X4 11992 MOVOU -16(R9)(R12*1), X5 11993 MOVOA X4, -32(AX)(R12*1) 11994 MOVOA X5, -16(AX)(R12*1) 11995 ADDQ $0x20, R12 11996 CMPQ R8, R12 11997 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 11998 MOVOU X0, (AX) 11999 MOVOU X1, 16(AX) 12000 MOVOU X2, -32(AX)(R8*1) 12001 MOVOU X3, -16(AX)(R8*1) 12002 MOVQ SI, AX 12003 12004emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: 12005 ADDL $0x05, CX 12006 MOVL CX, SI 12007 SUBL 16(SP), SI 12008 MOVQ src_len+32(FP), R8 12009 SUBL CX, R8 12010 LEAQ (DX)(CX*1), R9 12011 LEAQ (DX)(SI*1), SI 12012 12013 // matchLen 12014 XORL R11, R11 12015 CMPL R8, $0x08 12016 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm8B 12017 12018matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: 12019 MOVQ (R9)(R11*1), R10 12020 XORQ (SI)(R11*1), R10 12021 TESTQ R10, R10 12022 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B 12023 BSFQ R10, R10 12024 SARQ $0x03, R10 12025 LEAL (R11)(R10*1), R11 12026 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B 12027 12028matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: 12029 LEAL -8(R8), R8 12030 LEAL 8(R11), R11 12031 CMPL R8, $0x08 12032 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B 12033 12034matchlen_single_repeat_extend_encodeSnappyBlockAsm8B: 12035 TESTL R8, R8 12036 JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B 12037 12038matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B: 12039 MOVB (R9)(R11*1), R10 12040 CMPB (SI)(R11*1), R10 12041 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B 12042 LEAL 1(R11), R11 12043 DECL R8 12044 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B 12045 12046repeat_extend_forward_end_encodeSnappyBlockAsm8B: 12047 ADDL R11, CX 12048 MOVL CX, SI 12049 SUBL DI, SI 12050 MOVL 16(SP), DI 12051 12052 // emitCopy 12053two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: 12054 CMPL SI, $0x40 12055 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B 12056 MOVB $0xee, (AX) 12057 MOVW DI, 1(AX) 12058 LEAL -60(SI), SI 12059 ADDQ $0x03, AX 12060 JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B 12061 12062two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: 12063 CMPL SI, $0x0c 12064 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B 12065 MOVB $0x01, BL 12066 LEAL -16(BX)(SI*4), SI 12067 MOVB DI, 1(AX) 12068 SHRL $0x08, DI 12069 SHLL $0x05, DI 12070 ORL DI, SI 12071 MOVB SI, (AX) 12072 ADDQ $0x02, AX 12073 JMP repeat_end_emit_encodeSnappyBlockAsm8B 12074 12075emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: 12076 MOVB $0x02, BL 12077 LEAL -4(BX)(SI*4), SI 12078 MOVB SI, (AX) 12079 MOVW DI, 1(AX) 12080 ADDQ $0x03, AX 12081 12082repeat_end_emit_encodeSnappyBlockAsm8B: 12083 MOVL CX, 12(SP) 12084 JMP search_loop_encodeSnappyBlockAsm8B 12085 12086no_repeat_found_encodeSnappyBlockAsm8B: 12087 CMPL (DX)(SI*1), DI 12088 JEQ candidate_match_encodeSnappyBlockAsm8B 12089 SHRQ $0x08, DI 12090 MOVL 24(SP)(R10*4), SI 12091 LEAL 2(CX), R9 12092 CMPL (DX)(R8*1), DI 12093 JEQ candidate2_match_encodeSnappyBlockAsm8B 12094 MOVL R9, 24(SP)(R10*4) 12095 SHRQ $0x08, DI 12096 CMPL (DX)(SI*1), DI 12097 JEQ candidate3_match_encodeSnappyBlockAsm8B 12098 MOVL 20(SP), CX 12099 JMP search_loop_encodeSnappyBlockAsm8B 12100 12101candidate3_match_encodeSnappyBlockAsm8B: 12102 ADDL $0x02, CX 12103 JMP candidate_match_encodeSnappyBlockAsm8B 12104 12105candidate2_match_encodeSnappyBlockAsm8B: 12106 MOVL R9, 24(SP)(R10*4) 12107 INCL CX 12108 MOVL R8, SI 12109 12110candidate_match_encodeSnappyBlockAsm8B: 12111 MOVL 12(SP), DI 12112 TESTL SI, SI 12113 JZ match_extend_back_end_encodeSnappyBlockAsm8B 12114 12115match_extend_back_loop_encodeSnappyBlockAsm8B: 12116 CMPL CX, DI 12117 JLE match_extend_back_end_encodeSnappyBlockAsm8B 12118 MOVB -1(DX)(SI*1), BL 12119 MOVB -1(DX)(CX*1), R8 12120 CMPB BL, R8 12121 JNE match_extend_back_end_encodeSnappyBlockAsm8B 12122 LEAL -1(CX), CX 12123 DECL SI 12124 JZ match_extend_back_end_encodeSnappyBlockAsm8B 12125 JMP match_extend_back_loop_encodeSnappyBlockAsm8B 12126 12127match_extend_back_end_encodeSnappyBlockAsm8B: 12128 MOVL CX, DI 12129 SUBL 12(SP), DI 12130 LEAQ 3(AX)(DI*1), DI 12131 CMPQ DI, (SP) 12132 JL match_dst_size_check_encodeSnappyBlockAsm8B 12133 MOVQ $0x00000000, ret+48(FP) 12134 RET 12135 12136match_dst_size_check_encodeSnappyBlockAsm8B: 12137 MOVL CX, DI 12138 MOVL 12(SP), R8 12139 CMPL R8, DI 12140 JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B 12141 MOVL DI, R9 12142 MOVL DI, 12(SP) 12143 LEAQ (DX)(R8*1), DI 12144 SUBL R8, R9 12145 LEAL -1(R9), R8 12146 CMPL R8, $0x3c 12147 JLT one_byte_match_emit_encodeSnappyBlockAsm8B 12148 CMPL R8, $0x00000100 12149 JLT two_bytes_match_emit_encodeSnappyBlockAsm8B 12150 MOVB $0xf4, (AX) 12151 MOVW R8, 1(AX) 12152 ADDQ $0x03, AX 12153 JMP memmove_long_match_emit_encodeSnappyBlockAsm8B 12154 12155two_bytes_match_emit_encodeSnappyBlockAsm8B: 12156 MOVB $0xf0, (AX) 12157 MOVB R8, 1(AX) 12158 ADDQ $0x02, AX 12159 CMPL R8, $0x40 12160 JL memmove_match_emit_encodeSnappyBlockAsm8B 12161 JMP memmove_long_match_emit_encodeSnappyBlockAsm8B 12162 12163one_byte_match_emit_encodeSnappyBlockAsm8B: 12164 SHLB $0x02, R8 12165 MOVB R8, (AX) 12166 ADDQ $0x01, AX 12167 12168memmove_match_emit_encodeSnappyBlockAsm8B: 12169 LEAQ (AX)(R9*1), R8 12170 12171 // genMemMoveShort 12172 CMPQ R9, $0x08 12173 JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 12174 CMPQ R9, $0x10 12175 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 12176 CMPQ R9, $0x20 12177 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 12178 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 12179 12180emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: 12181 MOVQ (DI), R10 12182 MOVQ R10, (AX) 12183 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B 12184 12185emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: 12186 MOVQ (DI), R10 12187 MOVQ -8(DI)(R9*1), DI 12188 MOVQ R10, (AX) 12189 MOVQ DI, -8(AX)(R9*1) 12190 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B 12191 12192emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: 12193 MOVOU (DI), X0 12194 MOVOU -16(DI)(R9*1), X1 12195 MOVOU X0, (AX) 12196 MOVOU X1, -16(AX)(R9*1) 12197 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B 12198 12199emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: 12200 MOVOU (DI), X0 12201 MOVOU 16(DI), X1 12202 MOVOU -32(DI)(R9*1), X2 12203 MOVOU -16(DI)(R9*1), X3 12204 MOVOU X0, (AX) 12205 MOVOU X1, 16(AX) 12206 MOVOU X2, -32(AX)(R9*1) 12207 MOVOU X3, -16(AX)(R9*1) 12208 12209memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: 12210 MOVQ R8, AX 12211 JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B 12212 12213memmove_long_match_emit_encodeSnappyBlockAsm8B: 12214 LEAQ (AX)(R9*1), R8 12215 12216 // genMemMoveLong 12217 MOVOU (DI), X0 12218 MOVOU 16(DI), X1 12219 MOVOU -32(DI)(R9*1), X2 12220 MOVOU -16(DI)(R9*1), X3 12221 MOVQ R9, R11 12222 SHRQ $0x05, R11 12223 MOVQ AX, R10 12224 ANDL $0x0000001f, R10 12225 MOVQ $0x00000040, R12 12226 SUBQ R10, R12 12227 DECQ R11 12228 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 12229 LEAQ -32(DI)(R12*1), R10 12230 LEAQ -32(AX)(R12*1), R13 12231 12232emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: 12233 MOVOU (R10), X4 12234 MOVOU 16(R10), X5 12235 MOVOA X4, (R13) 12236 MOVOA X5, 16(R13) 12237 ADDQ $0x20, R13 12238 ADDQ $0x20, R10 12239 ADDQ $0x20, R12 12240 DECQ R11 12241 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back 12242 12243emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: 12244 MOVOU -32(DI)(R12*1), X4 12245 MOVOU -16(DI)(R12*1), X5 12246 MOVOA X4, -32(AX)(R12*1) 12247 MOVOA X5, -16(AX)(R12*1) 12248 ADDQ $0x20, R12 12249 CMPQ R9, R12 12250 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 12251 MOVOU X0, (AX) 12252 MOVOU X1, 16(AX) 12253 MOVOU X2, -32(AX)(R9*1) 12254 MOVOU X3, -16(AX)(R9*1) 12255 MOVQ R8, AX 12256 12257emit_literal_done_match_emit_encodeSnappyBlockAsm8B: 12258match_nolit_loop_encodeSnappyBlockAsm8B: 12259 MOVL CX, DI 12260 SUBL SI, DI 12261 MOVL DI, 16(SP) 12262 ADDL $0x04, CX 12263 ADDL $0x04, SI 12264 MOVQ src_len+32(FP), DI 12265 SUBL CX, DI 12266 LEAQ (DX)(CX*1), R8 12267 LEAQ (DX)(SI*1), SI 12268 12269 // matchLen 12270 XORL R10, R10 12271 CMPL DI, $0x08 12272 JL matchlen_single_match_nolit_encodeSnappyBlockAsm8B 12273 12274matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: 12275 MOVQ (R8)(R10*1), R9 12276 XORQ (SI)(R10*1), R9 12277 TESTQ R9, R9 12278 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B 12279 BSFQ R9, R9 12280 SARQ $0x03, R9 12281 LEAL (R10)(R9*1), R10 12282 JMP match_nolit_end_encodeSnappyBlockAsm8B 12283 12284matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: 12285 LEAL -8(DI), DI 12286 LEAL 8(R10), R10 12287 CMPL DI, $0x08 12288 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B 12289 12290matchlen_single_match_nolit_encodeSnappyBlockAsm8B: 12291 TESTL DI, DI 12292 JZ match_nolit_end_encodeSnappyBlockAsm8B 12293 12294matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B: 12295 MOVB (R8)(R10*1), R9 12296 CMPB (SI)(R10*1), R9 12297 JNE match_nolit_end_encodeSnappyBlockAsm8B 12298 LEAL 1(R10), R10 12299 DECL DI 12300 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B 12301 12302match_nolit_end_encodeSnappyBlockAsm8B: 12303 ADDL R10, CX 12304 MOVL 16(SP), SI 12305 ADDL $0x04, R10 12306 MOVL CX, 12(SP) 12307 12308 // emitCopy 12309two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: 12310 CMPL R10, $0x40 12311 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B 12312 MOVB $0xee, (AX) 12313 MOVW SI, 1(AX) 12314 LEAL -60(R10), R10 12315 ADDQ $0x03, AX 12316 JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B 12317 12318two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: 12319 CMPL R10, $0x0c 12320 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B 12321 MOVB $0x01, BL 12322 LEAL -16(BX)(R10*4), R10 12323 MOVB SI, 1(AX) 12324 SHRL $0x08, SI 12325 SHLL $0x05, SI 12326 ORL SI, R10 12327 MOVB R10, (AX) 12328 ADDQ $0x02, AX 12329 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B 12330 12331emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: 12332 MOVB $0x02, BL 12333 LEAL -4(BX)(R10*4), R10 12334 MOVB R10, (AX) 12335 MOVW SI, 1(AX) 12336 ADDQ $0x03, AX 12337 12338match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: 12339 CMPL CX, 8(SP) 12340 JGE emit_remainder_encodeSnappyBlockAsm8B 12341 MOVQ -2(DX)(CX*1), DI 12342 CMPQ AX, (SP) 12343 JL match_nolit_dst_ok_encodeSnappyBlockAsm8B 12344 MOVQ $0x00000000, ret+48(FP) 12345 RET 12346 12347match_nolit_dst_ok_encodeSnappyBlockAsm8B: 12348 MOVQ $0x9e3779b1, R9 12349 MOVQ DI, R8 12350 SHRQ $0x10, DI 12351 MOVQ DI, SI 12352 SHLQ $0x20, R8 12353 IMULQ R9, R8 12354 SHRQ $0x38, R8 12355 SHLQ $0x20, SI 12356 IMULQ R9, SI 12357 SHRQ $0x38, SI 12358 LEAL -2(CX), R9 12359 LEAQ 24(SP)(SI*4), R10 12360 MOVL (R10), SI 12361 MOVL R9, 24(SP)(R8*4) 12362 MOVL CX, (R10) 12363 CMPL (DX)(SI*1), DI 12364 JEQ match_nolit_loop_encodeSnappyBlockAsm8B 12365 INCL CX 12366 JMP search_loop_encodeSnappyBlockAsm8B 12367 12368emit_remainder_encodeSnappyBlockAsm8B: 12369 MOVQ src_len+32(FP), CX 12370 SUBL 12(SP), CX 12371 LEAQ 3(AX)(CX*1), CX 12372 CMPQ CX, (SP) 12373 JL emit_remainder_ok_encodeSnappyBlockAsm8B 12374 MOVQ $0x00000000, ret+48(FP) 12375 RET 12376 12377emit_remainder_ok_encodeSnappyBlockAsm8B: 12378 MOVQ src_len+32(FP), CX 12379 MOVL 12(SP), BX 12380 CMPL BX, CX 12381 JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B 12382 MOVL CX, SI 12383 MOVL CX, 12(SP) 12384 LEAQ (DX)(BX*1), CX 12385 SUBL BX, SI 12386 LEAL -1(SI), DX 12387 CMPL DX, $0x3c 12388 JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B 12389 CMPL DX, $0x00000100 12390 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B 12391 MOVB $0xf4, (AX) 12392 MOVW DX, 1(AX) 12393 ADDQ $0x03, AX 12394 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B 12395 12396two_bytes_emit_remainder_encodeSnappyBlockAsm8B: 12397 MOVB $0xf0, (AX) 12398 MOVB DL, 1(AX) 12399 ADDQ $0x02, AX 12400 CMPL DX, $0x40 12401 JL memmove_emit_remainder_encodeSnappyBlockAsm8B 12402 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B 12403 12404one_byte_emit_remainder_encodeSnappyBlockAsm8B: 12405 SHLB $0x02, DL 12406 MOVB DL, (AX) 12407 ADDQ $0x01, AX 12408 12409memmove_emit_remainder_encodeSnappyBlockAsm8B: 12410 LEAQ (AX)(SI*1), DX 12411 MOVL SI, BX 12412 12413 // genMemMoveShort 12414 CMPQ BX, $0x08 12415 JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8 12416 CMPQ BX, $0x10 12417 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16 12418 CMPQ BX, $0x20 12419 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32 12420 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 12421 12422emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8: 12423 MOVQ (CX), SI 12424 MOVQ SI, (AX) 12425 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B 12426 12427emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: 12428 MOVQ (CX), SI 12429 MOVQ -8(CX)(BX*1), CX 12430 MOVQ SI, (AX) 12431 MOVQ CX, -8(AX)(BX*1) 12432 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B 12433 12434emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: 12435 MOVOU (CX), X0 12436 MOVOU -16(CX)(BX*1), X1 12437 MOVOU X0, (AX) 12438 MOVOU X1, -16(AX)(BX*1) 12439 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B 12440 12441emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: 12442 MOVOU (CX), X0 12443 MOVOU 16(CX), X1 12444 MOVOU -32(CX)(BX*1), X2 12445 MOVOU -16(CX)(BX*1), X3 12446 MOVOU X0, (AX) 12447 MOVOU X1, 16(AX) 12448 MOVOU X2, -32(AX)(BX*1) 12449 MOVOU X3, -16(AX)(BX*1) 12450 12451memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: 12452 MOVQ DX, AX 12453 JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B 12454 12455memmove_long_emit_remainder_encodeSnappyBlockAsm8B: 12456 LEAQ (AX)(SI*1), DX 12457 MOVL SI, BX 12458 12459 // genMemMoveLong 12460 MOVOU (CX), X0 12461 MOVOU 16(CX), X1 12462 MOVOU -32(CX)(BX*1), X2 12463 MOVOU -16(CX)(BX*1), X3 12464 MOVQ BX, DI 12465 SHRQ $0x05, DI 12466 MOVQ AX, SI 12467 ANDL $0x0000001f, SI 12468 MOVQ $0x00000040, R8 12469 SUBQ SI, R8 12470 DECQ DI 12471 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 12472 LEAQ -32(CX)(R8*1), SI 12473 LEAQ -32(AX)(R8*1), R9 12474 12475emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: 12476 MOVOU (SI), X4 12477 MOVOU 16(SI), X5 12478 MOVOA X4, (R9) 12479 MOVOA X5, 16(R9) 12480 ADDQ $0x20, R9 12481 ADDQ $0x20, SI 12482 ADDQ $0x20, R8 12483 DECQ DI 12484 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back 12485 12486emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: 12487 MOVOU -32(CX)(R8*1), X4 12488 MOVOU -16(CX)(R8*1), X5 12489 MOVOA X4, -32(AX)(R8*1) 12490 MOVOA X5, -16(AX)(R8*1) 12491 ADDQ $0x20, R8 12492 CMPQ BX, R8 12493 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 12494 MOVOU X0, (AX) 12495 MOVOU X1, 16(AX) 12496 MOVOU X2, -32(AX)(BX*1) 12497 MOVOU X3, -16(AX)(BX*1) 12498 MOVQ DX, AX 12499 12500emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: 12501 MOVQ dst_base+0(FP), CX 12502 SUBQ CX, AX 12503 MOVQ AX, ret+48(FP) 12504 RET 12505 12506// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int 12507// Requires: SSE2 12508TEXT ·encodeSnappyBetterBlockAsm(SB), $327704-56 12509 MOVQ dst_base+0(FP), AX 12510 MOVQ $0x00000a00, CX 12511 LEAQ 24(SP), DX 12512 PXOR X0, X0 12513 12514zero_loop_encodeSnappyBetterBlockAsm: 12515 MOVOU X0, (DX) 12516 MOVOU X0, 16(DX) 12517 MOVOU X0, 32(DX) 12518 MOVOU X0, 48(DX) 12519 MOVOU X0, 64(DX) 12520 MOVOU X0, 80(DX) 12521 MOVOU X0, 96(DX) 12522 MOVOU X0, 112(DX) 12523 ADDQ $0x80, DX 12524 DECQ CX 12525 JNZ zero_loop_encodeSnappyBetterBlockAsm 12526 MOVL $0x00000000, 12(SP) 12527 MOVQ src_len+32(FP), CX 12528 LEAQ -9(CX), DX 12529 LEAQ -8(CX), SI 12530 MOVL SI, 8(SP) 12531 SHRQ $0x05, CX 12532 SUBL CX, DX 12533 LEAQ (AX)(DX*1), DX 12534 MOVQ DX, (SP) 12535 MOVL $0x00000001, CX 12536 MOVL $0x00000000, 16(SP) 12537 MOVQ src_base+24(FP), DX 12538 12539search_loop_encodeSnappyBetterBlockAsm: 12540 MOVL CX, SI 12541 SUBL 12(SP), SI 12542 SHRL $0x07, SI 12543 CMPL SI, $0x63 12544 JLE check_maxskip_ok_encodeSnappyBetterBlockAsm 12545 LEAL 100(CX), SI 12546 JMP check_maxskip_cont_encodeSnappyBetterBlockAsm 12547 12548check_maxskip_ok_encodeSnappyBetterBlockAsm: 12549 LEAL 1(CX)(SI*1), SI 12550 12551check_maxskip_cont_encodeSnappyBetterBlockAsm: 12552 CMPL SI, 8(SP) 12553 JGE emit_remainder_encodeSnappyBetterBlockAsm 12554 MOVQ (DX)(CX*1), DI 12555 MOVL SI, 20(SP) 12556 MOVQ $0x00cf1bbcdcbfa563, R9 12557 MOVQ $0x9e3779b1, SI 12558 MOVQ DI, R10 12559 MOVQ DI, R11 12560 SHLQ $0x08, R10 12561 IMULQ R9, R10 12562 SHRQ $0x30, R10 12563 SHLQ $0x20, R11 12564 IMULQ SI, R11 12565 SHRQ $0x32, R11 12566 MOVL 24(SP)(R10*4), SI 12567 MOVL 262168(SP)(R11*4), R8 12568 MOVL CX, 24(SP)(R10*4) 12569 MOVL CX, 262168(SP)(R11*4) 12570 CMPL (DX)(SI*1), DI 12571 JEQ candidate_match_encodeSnappyBetterBlockAsm 12572 CMPL (DX)(R8*1), DI 12573 JEQ candidateS_match_encodeSnappyBetterBlockAsm 12574 MOVL 20(SP), CX 12575 JMP search_loop_encodeSnappyBetterBlockAsm 12576 12577candidateS_match_encodeSnappyBetterBlockAsm: 12578 SHRQ $0x08, DI 12579 MOVQ DI, R10 12580 SHLQ $0x08, R10 12581 IMULQ R9, R10 12582 SHRQ $0x30, R10 12583 MOVL 24(SP)(R10*4), SI 12584 INCL CX 12585 MOVL CX, 24(SP)(R10*4) 12586 CMPL (DX)(SI*1), DI 12587 JEQ candidate_match_encodeSnappyBetterBlockAsm 12588 DECL CX 12589 MOVL R8, SI 12590 12591candidate_match_encodeSnappyBetterBlockAsm: 12592 MOVL 12(SP), DI 12593 TESTL SI, SI 12594 JZ match_extend_back_end_encodeSnappyBetterBlockAsm 12595 12596match_extend_back_loop_encodeSnappyBetterBlockAsm: 12597 CMPL CX, DI 12598 JLE match_extend_back_end_encodeSnappyBetterBlockAsm 12599 MOVB -1(DX)(SI*1), BL 12600 MOVB -1(DX)(CX*1), R8 12601 CMPB BL, R8 12602 JNE match_extend_back_end_encodeSnappyBetterBlockAsm 12603 LEAL -1(CX), CX 12604 DECL SI 12605 JZ match_extend_back_end_encodeSnappyBetterBlockAsm 12606 JMP match_extend_back_loop_encodeSnappyBetterBlockAsm 12607 12608match_extend_back_end_encodeSnappyBetterBlockAsm: 12609 MOVL CX, DI 12610 SUBL 12(SP), DI 12611 LEAQ 5(AX)(DI*1), DI 12612 CMPQ DI, (SP) 12613 JL match_dst_size_check_encodeSnappyBetterBlockAsm 12614 MOVQ $0x00000000, ret+48(FP) 12615 RET 12616 12617match_dst_size_check_encodeSnappyBetterBlockAsm: 12618 MOVL CX, DI 12619 ADDL $0x04, CX 12620 ADDL $0x04, SI 12621 MOVQ src_len+32(FP), R8 12622 SUBL CX, R8 12623 LEAQ (DX)(CX*1), R9 12624 LEAQ (DX)(SI*1), R10 12625 12626 // matchLen 12627 XORL R12, R12 12628 CMPL R8, $0x08 12629 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm 12630 12631matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: 12632 MOVQ (R9)(R12*1), R11 12633 XORQ (R10)(R12*1), R11 12634 TESTQ R11, R11 12635 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm 12636 BSFQ R11, R11 12637 SARQ $0x03, R11 12638 LEAL (R12)(R11*1), R12 12639 JMP match_nolit_end_encodeSnappyBetterBlockAsm 12640 12641matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm: 12642 LEAL -8(R8), R8 12643 LEAL 8(R12), R12 12644 CMPL R8, $0x08 12645 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm 12646 12647matchlen_single_match_nolit_encodeSnappyBetterBlockAsm: 12648 TESTL R8, R8 12649 JZ match_nolit_end_encodeSnappyBetterBlockAsm 12650 12651matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm: 12652 MOVB (R9)(R12*1), R11 12653 CMPB (R10)(R12*1), R11 12654 JNE match_nolit_end_encodeSnappyBetterBlockAsm 12655 LEAL 1(R12), R12 12656 DECL R8 12657 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm 12658 12659match_nolit_end_encodeSnappyBetterBlockAsm: 12660 MOVL CX, R8 12661 SUBL SI, R8 12662 12663 // Check if repeat 12664 CMPL R12, $0x01 12665 JG match_length_ok_encodeSnappyBetterBlockAsm 12666 CMPL R8, $0x0000ffff 12667 JLE match_length_ok_encodeSnappyBetterBlockAsm 12668 MOVL 20(SP), CX 12669 INCL CX 12670 JMP search_loop_encodeSnappyBetterBlockAsm 12671 12672match_length_ok_encodeSnappyBetterBlockAsm: 12673 MOVL R8, 16(SP) 12674 MOVL 12(SP), SI 12675 CMPL SI, DI 12676 JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm 12677 MOVL DI, R9 12678 MOVL DI, 12(SP) 12679 LEAQ (DX)(SI*1), R10 12680 SUBL SI, R9 12681 LEAL -1(R9), SI 12682 CMPL SI, $0x3c 12683 JLT one_byte_match_emit_encodeSnappyBetterBlockAsm 12684 CMPL SI, $0x00000100 12685 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm 12686 CMPL SI, $0x00010000 12687 JLT three_bytes_match_emit_encodeSnappyBetterBlockAsm 12688 CMPL SI, $0x01000000 12689 JLT four_bytes_match_emit_encodeSnappyBetterBlockAsm 12690 MOVB $0xfc, (AX) 12691 MOVL SI, 1(AX) 12692 ADDQ $0x05, AX 12693 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm 12694 12695four_bytes_match_emit_encodeSnappyBetterBlockAsm: 12696 MOVL SI, R11 12697 SHRL $0x10, R11 12698 MOVB $0xf8, (AX) 12699 MOVW SI, 1(AX) 12700 MOVB R11, 3(AX) 12701 ADDQ $0x04, AX 12702 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm 12703 12704three_bytes_match_emit_encodeSnappyBetterBlockAsm: 12705 MOVB $0xf4, (AX) 12706 MOVW SI, 1(AX) 12707 ADDQ $0x03, AX 12708 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm 12709 12710two_bytes_match_emit_encodeSnappyBetterBlockAsm: 12711 MOVB $0xf0, (AX) 12712 MOVB SI, 1(AX) 12713 ADDQ $0x02, AX 12714 CMPL SI, $0x40 12715 JL memmove_match_emit_encodeSnappyBetterBlockAsm 12716 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm 12717 12718one_byte_match_emit_encodeSnappyBetterBlockAsm: 12719 SHLB $0x02, SI 12720 MOVB SI, (AX) 12721 ADDQ $0x01, AX 12722 12723memmove_match_emit_encodeSnappyBetterBlockAsm: 12724 LEAQ (AX)(R9*1), SI 12725 12726 // genMemMoveShort 12727 CMPQ R9, $0x08 12728 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 12729 CMPQ R9, $0x10 12730 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16 12731 CMPQ R9, $0x20 12732 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32 12733 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64 12734 12735emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8: 12736 MOVQ (R10), R11 12737 MOVQ R11, (AX) 12738 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm 12739 12740emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16: 12741 MOVQ (R10), R11 12742 MOVQ -8(R10)(R9*1), R10 12743 MOVQ R11, (AX) 12744 MOVQ R10, -8(AX)(R9*1) 12745 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm 12746 12747emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32: 12748 MOVOU (R10), X0 12749 MOVOU -16(R10)(R9*1), X1 12750 MOVOU X0, (AX) 12751 MOVOU X1, -16(AX)(R9*1) 12752 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm 12753 12754emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64: 12755 MOVOU (R10), X0 12756 MOVOU 16(R10), X1 12757 MOVOU -32(R10)(R9*1), X2 12758 MOVOU -16(R10)(R9*1), X3 12759 MOVOU X0, (AX) 12760 MOVOU X1, 16(AX) 12761 MOVOU X2, -32(AX)(R9*1) 12762 MOVOU X3, -16(AX)(R9*1) 12763 12764memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm: 12765 MOVQ SI, AX 12766 JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm 12767 12768memmove_long_match_emit_encodeSnappyBetterBlockAsm: 12769 LEAQ (AX)(R9*1), SI 12770 12771 // genMemMoveLong 12772 MOVOU (R10), X0 12773 MOVOU 16(R10), X1 12774 MOVOU -32(R10)(R9*1), X2 12775 MOVOU -16(R10)(R9*1), X3 12776 MOVQ R9, R13 12777 SHRQ $0x05, R13 12778 MOVQ AX, R11 12779 ANDL $0x0000001f, R11 12780 MOVQ $0x00000040, R14 12781 SUBQ R11, R14 12782 DECQ R13 12783 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 12784 LEAQ -32(R10)(R14*1), R11 12785 LEAQ -32(AX)(R14*1), R15 12786 12787emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back: 12788 MOVOU (R11), X4 12789 MOVOU 16(R11), X5 12790 MOVOA X4, (R15) 12791 MOVOA X5, 16(R15) 12792 ADDQ $0x20, R15 12793 ADDQ $0x20, R11 12794 ADDQ $0x20, R14 12795 DECQ R13 12796 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back 12797 12798emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: 12799 MOVOU -32(R10)(R14*1), X4 12800 MOVOU -16(R10)(R14*1), X5 12801 MOVOA X4, -32(AX)(R14*1) 12802 MOVOA X5, -16(AX)(R14*1) 12803 ADDQ $0x20, R14 12804 CMPQ R9, R14 12805 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 12806 MOVOU X0, (AX) 12807 MOVOU X1, 16(AX) 12808 MOVOU X2, -32(AX)(R9*1) 12809 MOVOU X3, -16(AX)(R9*1) 12810 MOVQ SI, AX 12811 12812emit_literal_done_match_emit_encodeSnappyBetterBlockAsm: 12813 ADDL R12, CX 12814 ADDL $0x04, R12 12815 MOVL CX, 12(SP) 12816 12817 // emitCopy 12818 CMPL R8, $0x00010000 12819 JL two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm 12820 12821four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm: 12822 CMPL R12, $0x40 12823 JLE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm 12824 MOVB $0xff, (AX) 12825 MOVL R8, 1(AX) 12826 LEAL -64(R12), R12 12827 ADDQ $0x05, AX 12828 CMPL R12, $0x04 12829 JL four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm 12830 JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm 12831 12832four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm: 12833 TESTL R12, R12 12834 JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm 12835 MOVB $0x03, BL 12836 LEAL -4(BX)(R12*4), R12 12837 MOVB R12, (AX) 12838 MOVL R8, 1(AX) 12839 ADDQ $0x05, AX 12840 JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm 12841 12842two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm: 12843 CMPL R12, $0x40 12844 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm 12845 MOVB $0xee, (AX) 12846 MOVW R8, 1(AX) 12847 LEAL -60(R12), R12 12848 ADDQ $0x03, AX 12849 JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm 12850 12851two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm: 12852 CMPL R12, $0x0c 12853 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm 12854 CMPL R8, $0x00000800 12855 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm 12856 MOVB $0x01, BL 12857 LEAL -16(BX)(R12*4), R12 12858 MOVB R8, 1(AX) 12859 SHRL $0x08, R8 12860 SHLL $0x05, R8 12861 ORL R8, R12 12862 MOVB R12, (AX) 12863 ADDQ $0x02, AX 12864 JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm 12865 12866emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm: 12867 MOVB $0x02, BL 12868 LEAL -4(BX)(R12*4), R12 12869 MOVB R12, (AX) 12870 MOVW R8, 1(AX) 12871 ADDQ $0x03, AX 12872 12873match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: 12874 CMPL CX, 8(SP) 12875 JGE emit_remainder_encodeSnappyBetterBlockAsm 12876 CMPQ AX, (SP) 12877 JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm 12878 MOVQ $0x00000000, ret+48(FP) 12879 RET 12880 12881match_nolit_dst_ok_encodeSnappyBetterBlockAsm: 12882 MOVQ $0x00cf1bbcdcbfa563, SI 12883 MOVQ $0x9e3779b1, R8 12884 INCL DI 12885 MOVQ (DX)(DI*1), R9 12886 MOVQ R9, R10 12887 MOVQ R9, R11 12888 MOVQ R9, R12 12889 SHRQ $0x08, R11 12890 MOVQ R11, R13 12891 SHRQ $0x10, R12 12892 LEAL 1(DI), R14 12893 LEAL 2(DI), R15 12894 MOVQ -2(DX)(CX*1), R9 12895 SHLQ $0x08, R10 12896 IMULQ SI, R10 12897 SHRQ $0x30, R10 12898 SHLQ $0x08, R13 12899 IMULQ SI, R13 12900 SHRQ $0x30, R13 12901 SHLQ $0x20, R11 12902 IMULQ R8, R11 12903 SHRQ $0x32, R11 12904 SHLQ $0x20, R12 12905 IMULQ R8, R12 12906 SHRQ $0x32, R12 12907 MOVL DI, 24(SP)(R10*4) 12908 MOVL R14, 24(SP)(R13*4) 12909 MOVL R14, 262168(SP)(R11*4) 12910 MOVL R15, 262168(SP)(R12*4) 12911 MOVQ R9, R10 12912 MOVQ R9, R11 12913 SHRQ $0x08, R11 12914 MOVQ R11, R13 12915 LEAL -2(CX), R9 12916 LEAL -1(CX), DI 12917 SHLQ $0x08, R10 12918 IMULQ SI, R10 12919 SHRQ $0x30, R10 12920 SHLQ $0x20, R11 12921 IMULQ R8, R11 12922 SHRQ $0x32, R11 12923 SHLQ $0x08, R13 12924 IMULQ SI, R13 12925 SHRQ $0x30, R13 12926 MOVL R9, 24(SP)(R10*4) 12927 MOVL DI, 262168(SP)(R11*4) 12928 MOVL DI, 24(SP)(R13*4) 12929 JMP search_loop_encodeSnappyBetterBlockAsm 12930 12931emit_remainder_encodeSnappyBetterBlockAsm: 12932 MOVQ src_len+32(FP), CX 12933 SUBL 12(SP), CX 12934 LEAQ 5(AX)(CX*1), CX 12935 CMPQ CX, (SP) 12936 JL emit_remainder_ok_encodeSnappyBetterBlockAsm 12937 MOVQ $0x00000000, ret+48(FP) 12938 RET 12939 12940emit_remainder_ok_encodeSnappyBetterBlockAsm: 12941 MOVQ src_len+32(FP), CX 12942 MOVL 12(SP), BX 12943 CMPL BX, CX 12944 JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm 12945 MOVL CX, SI 12946 MOVL CX, 12(SP) 12947 LEAQ (DX)(BX*1), CX 12948 SUBL BX, SI 12949 LEAL -1(SI), DX 12950 CMPL DX, $0x3c 12951 JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm 12952 CMPL DX, $0x00000100 12953 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm 12954 CMPL DX, $0x00010000 12955 JLT three_bytes_emit_remainder_encodeSnappyBetterBlockAsm 12956 CMPL DX, $0x01000000 12957 JLT four_bytes_emit_remainder_encodeSnappyBetterBlockAsm 12958 MOVB $0xfc, (AX) 12959 MOVL DX, 1(AX) 12960 ADDQ $0x05, AX 12961 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm 12962 12963four_bytes_emit_remainder_encodeSnappyBetterBlockAsm: 12964 MOVL DX, BX 12965 SHRL $0x10, BX 12966 MOVB $0xf8, (AX) 12967 MOVW DX, 1(AX) 12968 MOVB BL, 3(AX) 12969 ADDQ $0x04, AX 12970 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm 12971 12972three_bytes_emit_remainder_encodeSnappyBetterBlockAsm: 12973 MOVB $0xf4, (AX) 12974 MOVW DX, 1(AX) 12975 ADDQ $0x03, AX 12976 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm 12977 12978two_bytes_emit_remainder_encodeSnappyBetterBlockAsm: 12979 MOVB $0xf0, (AX) 12980 MOVB DL, 1(AX) 12981 ADDQ $0x02, AX 12982 CMPL DX, $0x40 12983 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm 12984 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm 12985 12986one_byte_emit_remainder_encodeSnappyBetterBlockAsm: 12987 SHLB $0x02, DL 12988 MOVB DL, (AX) 12989 ADDQ $0x01, AX 12990 12991memmove_emit_remainder_encodeSnappyBetterBlockAsm: 12992 LEAQ (AX)(SI*1), DX 12993 MOVL SI, BX 12994 12995 // genMemMoveShort 12996 CMPQ BX, $0x08 12997 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8 12998 CMPQ BX, $0x10 12999 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16 13000 CMPQ BX, $0x20 13001 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32 13002 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64 13003 13004emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8: 13005 MOVQ (CX), SI 13006 MOVQ SI, (AX) 13007 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm 13008 13009emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16: 13010 MOVQ (CX), SI 13011 MOVQ -8(CX)(BX*1), CX 13012 MOVQ SI, (AX) 13013 MOVQ CX, -8(AX)(BX*1) 13014 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm 13015 13016emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32: 13017 MOVOU (CX), X0 13018 MOVOU -16(CX)(BX*1), X1 13019 MOVOU X0, (AX) 13020 MOVOU X1, -16(AX)(BX*1) 13021 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm 13022 13023emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64: 13024 MOVOU (CX), X0 13025 MOVOU 16(CX), X1 13026 MOVOU -32(CX)(BX*1), X2 13027 MOVOU -16(CX)(BX*1), X3 13028 MOVOU X0, (AX) 13029 MOVOU X1, 16(AX) 13030 MOVOU X2, -32(AX)(BX*1) 13031 MOVOU X3, -16(AX)(BX*1) 13032 13033memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm: 13034 MOVQ DX, AX 13035 JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm 13036 13037memmove_long_emit_remainder_encodeSnappyBetterBlockAsm: 13038 LEAQ (AX)(SI*1), DX 13039 MOVL SI, BX 13040 13041 // genMemMoveLong 13042 MOVOU (CX), X0 13043 MOVOU 16(CX), X1 13044 MOVOU -32(CX)(BX*1), X2 13045 MOVOU -16(CX)(BX*1), X3 13046 MOVQ BX, DI 13047 SHRQ $0x05, DI 13048 MOVQ AX, SI 13049 ANDL $0x0000001f, SI 13050 MOVQ $0x00000040, R8 13051 SUBQ SI, R8 13052 DECQ DI 13053 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 13054 LEAQ -32(CX)(R8*1), SI 13055 LEAQ -32(AX)(R8*1), R9 13056 13057emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back: 13058 MOVOU (SI), X4 13059 MOVOU 16(SI), X5 13060 MOVOA X4, (R9) 13061 MOVOA X5, 16(R9) 13062 ADDQ $0x20, R9 13063 ADDQ $0x20, SI 13064 ADDQ $0x20, R8 13065 DECQ DI 13066 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back 13067 13068emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: 13069 MOVOU -32(CX)(R8*1), X4 13070 MOVOU -16(CX)(R8*1), X5 13071 MOVOA X4, -32(AX)(R8*1) 13072 MOVOA X5, -16(AX)(R8*1) 13073 ADDQ $0x20, R8 13074 CMPQ BX, R8 13075 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 13076 MOVOU X0, (AX) 13077 MOVOU X1, 16(AX) 13078 MOVOU X2, -32(AX)(BX*1) 13079 MOVOU X3, -16(AX)(BX*1) 13080 MOVQ DX, AX 13081 13082emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm: 13083 MOVQ dst_base+0(FP), CX 13084 SUBQ CX, AX 13085 MOVQ AX, ret+48(FP) 13086 RET 13087 13088// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int 13089// Requires: SSE2 13090TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56 13091 MOVQ dst_base+0(FP), AX 13092 MOVQ $0x00000a00, CX 13093 LEAQ 24(SP), DX 13094 PXOR X0, X0 13095 13096zero_loop_encodeSnappyBetterBlockAsm64K: 13097 MOVOU X0, (DX) 13098 MOVOU X0, 16(DX) 13099 MOVOU X0, 32(DX) 13100 MOVOU X0, 48(DX) 13101 MOVOU X0, 64(DX) 13102 MOVOU X0, 80(DX) 13103 MOVOU X0, 96(DX) 13104 MOVOU X0, 112(DX) 13105 ADDQ $0x80, DX 13106 DECQ CX 13107 JNZ zero_loop_encodeSnappyBetterBlockAsm64K 13108 MOVL $0x00000000, 12(SP) 13109 MOVQ src_len+32(FP), CX 13110 LEAQ -9(CX), DX 13111 LEAQ -8(CX), SI 13112 MOVL SI, 8(SP) 13113 SHRQ $0x05, CX 13114 SUBL CX, DX 13115 LEAQ (AX)(DX*1), DX 13116 MOVQ DX, (SP) 13117 MOVL $0x00000001, CX 13118 MOVL $0x00000000, 16(SP) 13119 MOVQ src_base+24(FP), DX 13120 13121search_loop_encodeSnappyBetterBlockAsm64K: 13122 MOVL CX, SI 13123 SUBL 12(SP), SI 13124 SHRL $0x07, SI 13125 LEAL 1(CX)(SI*1), SI 13126 CMPL SI, 8(SP) 13127 JGE emit_remainder_encodeSnappyBetterBlockAsm64K 13128 MOVQ (DX)(CX*1), DI 13129 MOVL SI, 20(SP) 13130 MOVQ $0x00cf1bbcdcbfa563, R9 13131 MOVQ $0x9e3779b1, SI 13132 MOVQ DI, R10 13133 MOVQ DI, R11 13134 SHLQ $0x08, R10 13135 IMULQ R9, R10 13136 SHRQ $0x30, R10 13137 SHLQ $0x20, R11 13138 IMULQ SI, R11 13139 SHRQ $0x32, R11 13140 MOVL 24(SP)(R10*4), SI 13141 MOVL 262168(SP)(R11*4), R8 13142 MOVL CX, 24(SP)(R10*4) 13143 MOVL CX, 262168(SP)(R11*4) 13144 CMPL (DX)(SI*1), DI 13145 JEQ candidate_match_encodeSnappyBetterBlockAsm64K 13146 CMPL (DX)(R8*1), DI 13147 JEQ candidateS_match_encodeSnappyBetterBlockAsm64K 13148 MOVL 20(SP), CX 13149 JMP search_loop_encodeSnappyBetterBlockAsm64K 13150 13151candidateS_match_encodeSnappyBetterBlockAsm64K: 13152 SHRQ $0x08, DI 13153 MOVQ DI, R10 13154 SHLQ $0x08, R10 13155 IMULQ R9, R10 13156 SHRQ $0x30, R10 13157 MOVL 24(SP)(R10*4), SI 13158 INCL CX 13159 MOVL CX, 24(SP)(R10*4) 13160 CMPL (DX)(SI*1), DI 13161 JEQ candidate_match_encodeSnappyBetterBlockAsm64K 13162 DECL CX 13163 MOVL R8, SI 13164 13165candidate_match_encodeSnappyBetterBlockAsm64K: 13166 MOVL 12(SP), DI 13167 TESTL SI, SI 13168 JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K 13169 13170match_extend_back_loop_encodeSnappyBetterBlockAsm64K: 13171 CMPL CX, DI 13172 JLE match_extend_back_end_encodeSnappyBetterBlockAsm64K 13173 MOVB -1(DX)(SI*1), BL 13174 MOVB -1(DX)(CX*1), R8 13175 CMPB BL, R8 13176 JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K 13177 LEAL -1(CX), CX 13178 DECL SI 13179 JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K 13180 JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K 13181 13182match_extend_back_end_encodeSnappyBetterBlockAsm64K: 13183 MOVL CX, DI 13184 SUBL 12(SP), DI 13185 LEAQ 3(AX)(DI*1), DI 13186 CMPQ DI, (SP) 13187 JL match_dst_size_check_encodeSnappyBetterBlockAsm64K 13188 MOVQ $0x00000000, ret+48(FP) 13189 RET 13190 13191match_dst_size_check_encodeSnappyBetterBlockAsm64K: 13192 MOVL CX, DI 13193 ADDL $0x04, CX 13194 ADDL $0x04, SI 13195 MOVQ src_len+32(FP), R8 13196 SUBL CX, R8 13197 LEAQ (DX)(CX*1), R9 13198 LEAQ (DX)(SI*1), R10 13199 13200 // matchLen 13201 XORL R12, R12 13202 CMPL R8, $0x08 13203 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm64K 13204 13205matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: 13206 MOVQ (R9)(R12*1), R11 13207 XORQ (R10)(R12*1), R11 13208 TESTQ R11, R11 13209 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K 13210 BSFQ R11, R11 13211 SARQ $0x03, R11 13212 LEAL (R12)(R11*1), R12 13213 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K 13214 13215matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K: 13216 LEAL -8(R8), R8 13217 LEAL 8(R12), R12 13218 CMPL R8, $0x08 13219 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K 13220 13221matchlen_single_match_nolit_encodeSnappyBetterBlockAsm64K: 13222 TESTL R8, R8 13223 JZ match_nolit_end_encodeSnappyBetterBlockAsm64K 13224 13225matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: 13226 MOVB (R9)(R12*1), R11 13227 CMPB (R10)(R12*1), R11 13228 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K 13229 LEAL 1(R12), R12 13230 DECL R8 13231 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm64K 13232 13233match_nolit_end_encodeSnappyBetterBlockAsm64K: 13234 MOVL CX, R8 13235 SUBL SI, R8 13236 13237 // Check if repeat 13238 MOVL R8, 16(SP) 13239 MOVL 12(SP), SI 13240 CMPL SI, DI 13241 JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K 13242 MOVL DI, R9 13243 MOVL DI, 12(SP) 13244 LEAQ (DX)(SI*1), R10 13245 SUBL SI, R9 13246 LEAL -1(R9), SI 13247 CMPL SI, $0x3c 13248 JLT one_byte_match_emit_encodeSnappyBetterBlockAsm64K 13249 CMPL SI, $0x00000100 13250 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm64K 13251 MOVB $0xf4, (AX) 13252 MOVW SI, 1(AX) 13253 ADDQ $0x03, AX 13254 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K 13255 13256two_bytes_match_emit_encodeSnappyBetterBlockAsm64K: 13257 MOVB $0xf0, (AX) 13258 MOVB SI, 1(AX) 13259 ADDQ $0x02, AX 13260 CMPL SI, $0x40 13261 JL memmove_match_emit_encodeSnappyBetterBlockAsm64K 13262 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K 13263 13264one_byte_match_emit_encodeSnappyBetterBlockAsm64K: 13265 SHLB $0x02, SI 13266 MOVB SI, (AX) 13267 ADDQ $0x01, AX 13268 13269memmove_match_emit_encodeSnappyBetterBlockAsm64K: 13270 LEAQ (AX)(R9*1), SI 13271 13272 // genMemMoveShort 13273 CMPQ R9, $0x08 13274 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 13275 CMPQ R9, $0x10 13276 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 13277 CMPQ R9, $0x20 13278 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 13279 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 13280 13281emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8: 13282 MOVQ (R10), R11 13283 MOVQ R11, (AX) 13284 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K 13285 13286emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: 13287 MOVQ (R10), R11 13288 MOVQ -8(R10)(R9*1), R10 13289 MOVQ R11, (AX) 13290 MOVQ R10, -8(AX)(R9*1) 13291 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K 13292 13293emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: 13294 MOVOU (R10), X0 13295 MOVOU -16(R10)(R9*1), X1 13296 MOVOU X0, (AX) 13297 MOVOU X1, -16(AX)(R9*1) 13298 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K 13299 13300emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: 13301 MOVOU (R10), X0 13302 MOVOU 16(R10), X1 13303 MOVOU -32(R10)(R9*1), X2 13304 MOVOU -16(R10)(R9*1), X3 13305 MOVOU X0, (AX) 13306 MOVOU X1, 16(AX) 13307 MOVOU X2, -32(AX)(R9*1) 13308 MOVOU X3, -16(AX)(R9*1) 13309 13310memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K: 13311 MOVQ SI, AX 13312 JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K 13313 13314memmove_long_match_emit_encodeSnappyBetterBlockAsm64K: 13315 LEAQ (AX)(R9*1), SI 13316 13317 // genMemMoveLong 13318 MOVOU (R10), X0 13319 MOVOU 16(R10), X1 13320 MOVOU -32(R10)(R9*1), X2 13321 MOVOU -16(R10)(R9*1), X3 13322 MOVQ R9, R13 13323 SHRQ $0x05, R13 13324 MOVQ AX, R11 13325 ANDL $0x0000001f, R11 13326 MOVQ $0x00000040, R14 13327 SUBQ R11, R14 13328 DECQ R13 13329 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 13330 LEAQ -32(R10)(R14*1), R11 13331 LEAQ -32(AX)(R14*1), R15 13332 13333emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: 13334 MOVOU (R11), X4 13335 MOVOU 16(R11), X5 13336 MOVOA X4, (R15) 13337 MOVOA X5, 16(R15) 13338 ADDQ $0x20, R15 13339 ADDQ $0x20, R11 13340 ADDQ $0x20, R14 13341 DECQ R13 13342 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back 13343 13344emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: 13345 MOVOU -32(R10)(R14*1), X4 13346 MOVOU -16(R10)(R14*1), X5 13347 MOVOA X4, -32(AX)(R14*1) 13348 MOVOA X5, -16(AX)(R14*1) 13349 ADDQ $0x20, R14 13350 CMPQ R9, R14 13351 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 13352 MOVOU X0, (AX) 13353 MOVOU X1, 16(AX) 13354 MOVOU X2, -32(AX)(R9*1) 13355 MOVOU X3, -16(AX)(R9*1) 13356 MOVQ SI, AX 13357 13358emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K: 13359 ADDL R12, CX 13360 ADDL $0x04, R12 13361 MOVL CX, 12(SP) 13362 13363 // emitCopy 13364two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K: 13365 CMPL R12, $0x40 13366 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K 13367 MOVB $0xee, (AX) 13368 MOVW R8, 1(AX) 13369 LEAL -60(R12), R12 13370 ADDQ $0x03, AX 13371 JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K 13372 13373two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K: 13374 CMPL R12, $0x0c 13375 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K 13376 CMPL R8, $0x00000800 13377 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K 13378 MOVB $0x01, BL 13379 LEAL -16(BX)(R12*4), R12 13380 MOVB R8, 1(AX) 13381 SHRL $0x08, R8 13382 SHLL $0x05, R8 13383 ORL R8, R12 13384 MOVB R12, (AX) 13385 ADDQ $0x02, AX 13386 JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K 13387 13388emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K: 13389 MOVB $0x02, BL 13390 LEAL -4(BX)(R12*4), R12 13391 MOVB R12, (AX) 13392 MOVW R8, 1(AX) 13393 ADDQ $0x03, AX 13394 13395match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: 13396 CMPL CX, 8(SP) 13397 JGE emit_remainder_encodeSnappyBetterBlockAsm64K 13398 CMPQ AX, (SP) 13399 JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K 13400 MOVQ $0x00000000, ret+48(FP) 13401 RET 13402 13403match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: 13404 MOVQ $0x00cf1bbcdcbfa563, SI 13405 MOVQ $0x9e3779b1, R8 13406 INCL DI 13407 MOVQ (DX)(DI*1), R9 13408 MOVQ R9, R10 13409 MOVQ R9, R11 13410 MOVQ R9, R12 13411 SHRQ $0x08, R11 13412 MOVQ R11, R13 13413 SHRQ $0x10, R12 13414 LEAL 1(DI), R14 13415 LEAL 2(DI), R15 13416 MOVQ -2(DX)(CX*1), R9 13417 SHLQ $0x08, R10 13418 IMULQ SI, R10 13419 SHRQ $0x30, R10 13420 SHLQ $0x08, R13 13421 IMULQ SI, R13 13422 SHRQ $0x30, R13 13423 SHLQ $0x20, R11 13424 IMULQ R8, R11 13425 SHRQ $0x32, R11 13426 SHLQ $0x20, R12 13427 IMULQ R8, R12 13428 SHRQ $0x32, R12 13429 MOVL DI, 24(SP)(R10*4) 13430 MOVL R14, 24(SP)(R13*4) 13431 MOVL R14, 262168(SP)(R11*4) 13432 MOVL R15, 262168(SP)(R12*4) 13433 MOVQ R9, R10 13434 MOVQ R9, R11 13435 SHRQ $0x08, R11 13436 MOVQ R11, R13 13437 LEAL -2(CX), R9 13438 LEAL -1(CX), DI 13439 SHLQ $0x08, R10 13440 IMULQ SI, R10 13441 SHRQ $0x30, R10 13442 SHLQ $0x20, R11 13443 IMULQ R8, R11 13444 SHRQ $0x32, R11 13445 SHLQ $0x08, R13 13446 IMULQ SI, R13 13447 SHRQ $0x30, R13 13448 MOVL R9, 24(SP)(R10*4) 13449 MOVL DI, 262168(SP)(R11*4) 13450 MOVL DI, 24(SP)(R13*4) 13451 JMP search_loop_encodeSnappyBetterBlockAsm64K 13452 13453emit_remainder_encodeSnappyBetterBlockAsm64K: 13454 MOVQ src_len+32(FP), CX 13455 SUBL 12(SP), CX 13456 LEAQ 3(AX)(CX*1), CX 13457 CMPQ CX, (SP) 13458 JL emit_remainder_ok_encodeSnappyBetterBlockAsm64K 13459 MOVQ $0x00000000, ret+48(FP) 13460 RET 13461 13462emit_remainder_ok_encodeSnappyBetterBlockAsm64K: 13463 MOVQ src_len+32(FP), CX 13464 MOVL 12(SP), BX 13465 CMPL BX, CX 13466 JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K 13467 MOVL CX, SI 13468 MOVL CX, 12(SP) 13469 LEAQ (DX)(BX*1), CX 13470 SUBL BX, SI 13471 LEAL -1(SI), DX 13472 CMPL DX, $0x3c 13473 JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K 13474 CMPL DX, $0x00000100 13475 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K 13476 MOVB $0xf4, (AX) 13477 MOVW DX, 1(AX) 13478 ADDQ $0x03, AX 13479 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K 13480 13481two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: 13482 MOVB $0xf0, (AX) 13483 MOVB DL, 1(AX) 13484 ADDQ $0x02, AX 13485 CMPL DX, $0x40 13486 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm64K 13487 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K 13488 13489one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K: 13490 SHLB $0x02, DL 13491 MOVB DL, (AX) 13492 ADDQ $0x01, AX 13493 13494memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: 13495 LEAQ (AX)(SI*1), DX 13496 MOVL SI, BX 13497 13498 // genMemMoveShort 13499 CMPQ BX, $0x08 13500 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8 13501 CMPQ BX, $0x10 13502 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 13503 CMPQ BX, $0x20 13504 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 13505 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 13506 13507emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8: 13508 MOVQ (CX), SI 13509 MOVQ SI, (AX) 13510 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K 13511 13512emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: 13513 MOVQ (CX), SI 13514 MOVQ -8(CX)(BX*1), CX 13515 MOVQ SI, (AX) 13516 MOVQ CX, -8(AX)(BX*1) 13517 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K 13518 13519emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: 13520 MOVOU (CX), X0 13521 MOVOU -16(CX)(BX*1), X1 13522 MOVOU X0, (AX) 13523 MOVOU X1, -16(AX)(BX*1) 13524 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K 13525 13526emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: 13527 MOVOU (CX), X0 13528 MOVOU 16(CX), X1 13529 MOVOU -32(CX)(BX*1), X2 13530 MOVOU -16(CX)(BX*1), X3 13531 MOVOU X0, (AX) 13532 MOVOU X1, 16(AX) 13533 MOVOU X2, -32(AX)(BX*1) 13534 MOVOU X3, -16(AX)(BX*1) 13535 13536memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K: 13537 MOVQ DX, AX 13538 JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K 13539 13540memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K: 13541 LEAQ (AX)(SI*1), DX 13542 MOVL SI, BX 13543 13544 // genMemMoveLong 13545 MOVOU (CX), X0 13546 MOVOU 16(CX), X1 13547 MOVOU -32(CX)(BX*1), X2 13548 MOVOU -16(CX)(BX*1), X3 13549 MOVQ BX, DI 13550 SHRQ $0x05, DI 13551 MOVQ AX, SI 13552 ANDL $0x0000001f, SI 13553 MOVQ $0x00000040, R8 13554 SUBQ SI, R8 13555 DECQ DI 13556 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 13557 LEAQ -32(CX)(R8*1), SI 13558 LEAQ -32(AX)(R8*1), R9 13559 13560emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: 13561 MOVOU (SI), X4 13562 MOVOU 16(SI), X5 13563 MOVOA X4, (R9) 13564 MOVOA X5, 16(R9) 13565 ADDQ $0x20, R9 13566 ADDQ $0x20, SI 13567 ADDQ $0x20, R8 13568 DECQ DI 13569 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back 13570 13571emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: 13572 MOVOU -32(CX)(R8*1), X4 13573 MOVOU -16(CX)(R8*1), X5 13574 MOVOA X4, -32(AX)(R8*1) 13575 MOVOA X5, -16(AX)(R8*1) 13576 ADDQ $0x20, R8 13577 CMPQ BX, R8 13578 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 13579 MOVOU X0, (AX) 13580 MOVOU X1, 16(AX) 13581 MOVOU X2, -32(AX)(BX*1) 13582 MOVOU X3, -16(AX)(BX*1) 13583 MOVQ DX, AX 13584 13585emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K: 13586 MOVQ dst_base+0(FP), CX 13587 SUBQ CX, AX 13588 MOVQ AX, ret+48(FP) 13589 RET 13590 13591// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int 13592// Requires: SSE2 13593TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56 13594 MOVQ dst_base+0(FP), AX 13595 MOVQ $0x00000280, CX 13596 LEAQ 24(SP), DX 13597 PXOR X0, X0 13598 13599zero_loop_encodeSnappyBetterBlockAsm12B: 13600 MOVOU X0, (DX) 13601 MOVOU X0, 16(DX) 13602 MOVOU X0, 32(DX) 13603 MOVOU X0, 48(DX) 13604 MOVOU X0, 64(DX) 13605 MOVOU X0, 80(DX) 13606 MOVOU X0, 96(DX) 13607 MOVOU X0, 112(DX) 13608 ADDQ $0x80, DX 13609 DECQ CX 13610 JNZ zero_loop_encodeSnappyBetterBlockAsm12B 13611 MOVL $0x00000000, 12(SP) 13612 MOVQ src_len+32(FP), CX 13613 LEAQ -9(CX), DX 13614 LEAQ -8(CX), SI 13615 MOVL SI, 8(SP) 13616 SHRQ $0x05, CX 13617 SUBL CX, DX 13618 LEAQ (AX)(DX*1), DX 13619 MOVQ DX, (SP) 13620 MOVL $0x00000001, CX 13621 MOVL $0x00000000, 16(SP) 13622 MOVQ src_base+24(FP), DX 13623 13624search_loop_encodeSnappyBetterBlockAsm12B: 13625 MOVL CX, SI 13626 SUBL 12(SP), SI 13627 SHRL $0x06, SI 13628 LEAL 1(CX)(SI*1), SI 13629 CMPL SI, 8(SP) 13630 JGE emit_remainder_encodeSnappyBetterBlockAsm12B 13631 MOVQ (DX)(CX*1), DI 13632 MOVL SI, 20(SP) 13633 MOVQ $0x0000cf1bbcdcbf9b, R9 13634 MOVQ $0x9e3779b1, SI 13635 MOVQ DI, R10 13636 MOVQ DI, R11 13637 SHLQ $0x10, R10 13638 IMULQ R9, R10 13639 SHRQ $0x32, R10 13640 SHLQ $0x20, R11 13641 IMULQ SI, R11 13642 SHRQ $0x34, R11 13643 MOVL 24(SP)(R10*4), SI 13644 MOVL 65560(SP)(R11*4), R8 13645 MOVL CX, 24(SP)(R10*4) 13646 MOVL CX, 65560(SP)(R11*4) 13647 CMPL (DX)(SI*1), DI 13648 JEQ candidate_match_encodeSnappyBetterBlockAsm12B 13649 CMPL (DX)(R8*1), DI 13650 JEQ candidateS_match_encodeSnappyBetterBlockAsm12B 13651 MOVL 20(SP), CX 13652 JMP search_loop_encodeSnappyBetterBlockAsm12B 13653 13654candidateS_match_encodeSnappyBetterBlockAsm12B: 13655 SHRQ $0x08, DI 13656 MOVQ DI, R10 13657 SHLQ $0x10, R10 13658 IMULQ R9, R10 13659 SHRQ $0x32, R10 13660 MOVL 24(SP)(R10*4), SI 13661 INCL CX 13662 MOVL CX, 24(SP)(R10*4) 13663 CMPL (DX)(SI*1), DI 13664 JEQ candidate_match_encodeSnappyBetterBlockAsm12B 13665 DECL CX 13666 MOVL R8, SI 13667 13668candidate_match_encodeSnappyBetterBlockAsm12B: 13669 MOVL 12(SP), DI 13670 TESTL SI, SI 13671 JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B 13672 13673match_extend_back_loop_encodeSnappyBetterBlockAsm12B: 13674 CMPL CX, DI 13675 JLE match_extend_back_end_encodeSnappyBetterBlockAsm12B 13676 MOVB -1(DX)(SI*1), BL 13677 MOVB -1(DX)(CX*1), R8 13678 CMPB BL, R8 13679 JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B 13680 LEAL -1(CX), CX 13681 DECL SI 13682 JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B 13683 JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B 13684 13685match_extend_back_end_encodeSnappyBetterBlockAsm12B: 13686 MOVL CX, DI 13687 SUBL 12(SP), DI 13688 LEAQ 3(AX)(DI*1), DI 13689 CMPQ DI, (SP) 13690 JL match_dst_size_check_encodeSnappyBetterBlockAsm12B 13691 MOVQ $0x00000000, ret+48(FP) 13692 RET 13693 13694match_dst_size_check_encodeSnappyBetterBlockAsm12B: 13695 MOVL CX, DI 13696 ADDL $0x04, CX 13697 ADDL $0x04, SI 13698 MOVQ src_len+32(FP), R8 13699 SUBL CX, R8 13700 LEAQ (DX)(CX*1), R9 13701 LEAQ (DX)(SI*1), R10 13702 13703 // matchLen 13704 XORL R12, R12 13705 CMPL R8, $0x08 13706 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm12B 13707 13708matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: 13709 MOVQ (R9)(R12*1), R11 13710 XORQ (R10)(R12*1), R11 13711 TESTQ R11, R11 13712 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B 13713 BSFQ R11, R11 13714 SARQ $0x03, R11 13715 LEAL (R12)(R11*1), R12 13716 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B 13717 13718matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B: 13719 LEAL -8(R8), R8 13720 LEAL 8(R12), R12 13721 CMPL R8, $0x08 13722 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B 13723 13724matchlen_single_match_nolit_encodeSnappyBetterBlockAsm12B: 13725 TESTL R8, R8 13726 JZ match_nolit_end_encodeSnappyBetterBlockAsm12B 13727 13728matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: 13729 MOVB (R9)(R12*1), R11 13730 CMPB (R10)(R12*1), R11 13731 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B 13732 LEAL 1(R12), R12 13733 DECL R8 13734 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm12B 13735 13736match_nolit_end_encodeSnappyBetterBlockAsm12B: 13737 MOVL CX, R8 13738 SUBL SI, R8 13739 13740 // Check if repeat 13741 MOVL R8, 16(SP) 13742 MOVL 12(SP), SI 13743 CMPL SI, DI 13744 JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B 13745 MOVL DI, R9 13746 MOVL DI, 12(SP) 13747 LEAQ (DX)(SI*1), R10 13748 SUBL SI, R9 13749 LEAL -1(R9), SI 13750 CMPL SI, $0x3c 13751 JLT one_byte_match_emit_encodeSnappyBetterBlockAsm12B 13752 CMPL SI, $0x00000100 13753 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm12B 13754 MOVB $0xf4, (AX) 13755 MOVW SI, 1(AX) 13756 ADDQ $0x03, AX 13757 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B 13758 13759two_bytes_match_emit_encodeSnappyBetterBlockAsm12B: 13760 MOVB $0xf0, (AX) 13761 MOVB SI, 1(AX) 13762 ADDQ $0x02, AX 13763 CMPL SI, $0x40 13764 JL memmove_match_emit_encodeSnappyBetterBlockAsm12B 13765 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B 13766 13767one_byte_match_emit_encodeSnappyBetterBlockAsm12B: 13768 SHLB $0x02, SI 13769 MOVB SI, (AX) 13770 ADDQ $0x01, AX 13771 13772memmove_match_emit_encodeSnappyBetterBlockAsm12B: 13773 LEAQ (AX)(R9*1), SI 13774 13775 // genMemMoveShort 13776 CMPQ R9, $0x08 13777 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 13778 CMPQ R9, $0x10 13779 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 13780 CMPQ R9, $0x20 13781 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 13782 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 13783 13784emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8: 13785 MOVQ (R10), R11 13786 MOVQ R11, (AX) 13787 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B 13788 13789emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: 13790 MOVQ (R10), R11 13791 MOVQ -8(R10)(R9*1), R10 13792 MOVQ R11, (AX) 13793 MOVQ R10, -8(AX)(R9*1) 13794 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B 13795 13796emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: 13797 MOVOU (R10), X0 13798 MOVOU -16(R10)(R9*1), X1 13799 MOVOU X0, (AX) 13800 MOVOU X1, -16(AX)(R9*1) 13801 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B 13802 13803emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: 13804 MOVOU (R10), X0 13805 MOVOU 16(R10), X1 13806 MOVOU -32(R10)(R9*1), X2 13807 MOVOU -16(R10)(R9*1), X3 13808 MOVOU X0, (AX) 13809 MOVOU X1, 16(AX) 13810 MOVOU X2, -32(AX)(R9*1) 13811 MOVOU X3, -16(AX)(R9*1) 13812 13813memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B: 13814 MOVQ SI, AX 13815 JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B 13816 13817memmove_long_match_emit_encodeSnappyBetterBlockAsm12B: 13818 LEAQ (AX)(R9*1), SI 13819 13820 // genMemMoveLong 13821 MOVOU (R10), X0 13822 MOVOU 16(R10), X1 13823 MOVOU -32(R10)(R9*1), X2 13824 MOVOU -16(R10)(R9*1), X3 13825 MOVQ R9, R13 13826 SHRQ $0x05, R13 13827 MOVQ AX, R11 13828 ANDL $0x0000001f, R11 13829 MOVQ $0x00000040, R14 13830 SUBQ R11, R14 13831 DECQ R13 13832 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 13833 LEAQ -32(R10)(R14*1), R11 13834 LEAQ -32(AX)(R14*1), R15 13835 13836emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: 13837 MOVOU (R11), X4 13838 MOVOU 16(R11), X5 13839 MOVOA X4, (R15) 13840 MOVOA X5, 16(R15) 13841 ADDQ $0x20, R15 13842 ADDQ $0x20, R11 13843 ADDQ $0x20, R14 13844 DECQ R13 13845 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back 13846 13847emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: 13848 MOVOU -32(R10)(R14*1), X4 13849 MOVOU -16(R10)(R14*1), X5 13850 MOVOA X4, -32(AX)(R14*1) 13851 MOVOA X5, -16(AX)(R14*1) 13852 ADDQ $0x20, R14 13853 CMPQ R9, R14 13854 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 13855 MOVOU X0, (AX) 13856 MOVOU X1, 16(AX) 13857 MOVOU X2, -32(AX)(R9*1) 13858 MOVOU X3, -16(AX)(R9*1) 13859 MOVQ SI, AX 13860 13861emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B: 13862 ADDL R12, CX 13863 ADDL $0x04, R12 13864 MOVL CX, 12(SP) 13865 13866 // emitCopy 13867two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B: 13868 CMPL R12, $0x40 13869 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B 13870 MOVB $0xee, (AX) 13871 MOVW R8, 1(AX) 13872 LEAL -60(R12), R12 13873 ADDQ $0x03, AX 13874 JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B 13875 13876two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B: 13877 CMPL R12, $0x0c 13878 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B 13879 CMPL R8, $0x00000800 13880 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B 13881 MOVB $0x01, BL 13882 LEAL -16(BX)(R12*4), R12 13883 MOVB R8, 1(AX) 13884 SHRL $0x08, R8 13885 SHLL $0x05, R8 13886 ORL R8, R12 13887 MOVB R12, (AX) 13888 ADDQ $0x02, AX 13889 JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B 13890 13891emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B: 13892 MOVB $0x02, BL 13893 LEAL -4(BX)(R12*4), R12 13894 MOVB R12, (AX) 13895 MOVW R8, 1(AX) 13896 ADDQ $0x03, AX 13897 13898match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: 13899 CMPL CX, 8(SP) 13900 JGE emit_remainder_encodeSnappyBetterBlockAsm12B 13901 CMPQ AX, (SP) 13902 JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B 13903 MOVQ $0x00000000, ret+48(FP) 13904 RET 13905 13906match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: 13907 MOVQ $0x0000cf1bbcdcbf9b, SI 13908 MOVQ $0x9e3779b1, R8 13909 INCL DI 13910 MOVQ (DX)(DI*1), R9 13911 MOVQ R9, R10 13912 MOVQ R9, R11 13913 MOVQ R9, R12 13914 SHRQ $0x08, R11 13915 MOVQ R11, R13 13916 SHRQ $0x10, R12 13917 LEAL 1(DI), R14 13918 LEAL 2(DI), R15 13919 MOVQ -2(DX)(CX*1), R9 13920 SHLQ $0x10, R10 13921 IMULQ SI, R10 13922 SHRQ $0x32, R10 13923 SHLQ $0x10, R13 13924 IMULQ SI, R13 13925 SHRQ $0x32, R13 13926 SHLQ $0x20, R11 13927 IMULQ R8, R11 13928 SHRQ $0x34, R11 13929 SHLQ $0x20, R12 13930 IMULQ R8, R12 13931 SHRQ $0x34, R12 13932 MOVL DI, 24(SP)(R10*4) 13933 MOVL R14, 24(SP)(R13*4) 13934 MOVL R14, 65560(SP)(R11*4) 13935 MOVL R15, 65560(SP)(R12*4) 13936 MOVQ R9, R10 13937 MOVQ R9, R11 13938 SHRQ $0x08, R11 13939 MOVQ R11, R13 13940 LEAL -2(CX), R9 13941 LEAL -1(CX), DI 13942 SHLQ $0x10, R10 13943 IMULQ SI, R10 13944 SHRQ $0x32, R10 13945 SHLQ $0x20, R11 13946 IMULQ R8, R11 13947 SHRQ $0x34, R11 13948 SHLQ $0x10, R13 13949 IMULQ SI, R13 13950 SHRQ $0x32, R13 13951 MOVL R9, 24(SP)(R10*4) 13952 MOVL DI, 65560(SP)(R11*4) 13953 MOVL DI, 24(SP)(R13*4) 13954 JMP search_loop_encodeSnappyBetterBlockAsm12B 13955 13956emit_remainder_encodeSnappyBetterBlockAsm12B: 13957 MOVQ src_len+32(FP), CX 13958 SUBL 12(SP), CX 13959 LEAQ 3(AX)(CX*1), CX 13960 CMPQ CX, (SP) 13961 JL emit_remainder_ok_encodeSnappyBetterBlockAsm12B 13962 MOVQ $0x00000000, ret+48(FP) 13963 RET 13964 13965emit_remainder_ok_encodeSnappyBetterBlockAsm12B: 13966 MOVQ src_len+32(FP), CX 13967 MOVL 12(SP), BX 13968 CMPL BX, CX 13969 JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B 13970 MOVL CX, SI 13971 MOVL CX, 12(SP) 13972 LEAQ (DX)(BX*1), CX 13973 SUBL BX, SI 13974 LEAL -1(SI), DX 13975 CMPL DX, $0x3c 13976 JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B 13977 CMPL DX, $0x00000100 13978 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B 13979 MOVB $0xf4, (AX) 13980 MOVW DX, 1(AX) 13981 ADDQ $0x03, AX 13982 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B 13983 13984two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: 13985 MOVB $0xf0, (AX) 13986 MOVB DL, 1(AX) 13987 ADDQ $0x02, AX 13988 CMPL DX, $0x40 13989 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm12B 13990 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B 13991 13992one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B: 13993 SHLB $0x02, DL 13994 MOVB DL, (AX) 13995 ADDQ $0x01, AX 13996 13997memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: 13998 LEAQ (AX)(SI*1), DX 13999 MOVL SI, BX 14000 14001 // genMemMoveShort 14002 CMPQ BX, $0x08 14003 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8 14004 CMPQ BX, $0x10 14005 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 14006 CMPQ BX, $0x20 14007 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 14008 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 14009 14010emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8: 14011 MOVQ (CX), SI 14012 MOVQ SI, (AX) 14013 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B 14014 14015emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: 14016 MOVQ (CX), SI 14017 MOVQ -8(CX)(BX*1), CX 14018 MOVQ SI, (AX) 14019 MOVQ CX, -8(AX)(BX*1) 14020 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B 14021 14022emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: 14023 MOVOU (CX), X0 14024 MOVOU -16(CX)(BX*1), X1 14025 MOVOU X0, (AX) 14026 MOVOU X1, -16(AX)(BX*1) 14027 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B 14028 14029emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: 14030 MOVOU (CX), X0 14031 MOVOU 16(CX), X1 14032 MOVOU -32(CX)(BX*1), X2 14033 MOVOU -16(CX)(BX*1), X3 14034 MOVOU X0, (AX) 14035 MOVOU X1, 16(AX) 14036 MOVOU X2, -32(AX)(BX*1) 14037 MOVOU X3, -16(AX)(BX*1) 14038 14039memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B: 14040 MOVQ DX, AX 14041 JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B 14042 14043memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B: 14044 LEAQ (AX)(SI*1), DX 14045 MOVL SI, BX 14046 14047 // genMemMoveLong 14048 MOVOU (CX), X0 14049 MOVOU 16(CX), X1 14050 MOVOU -32(CX)(BX*1), X2 14051 MOVOU -16(CX)(BX*1), X3 14052 MOVQ BX, DI 14053 SHRQ $0x05, DI 14054 MOVQ AX, SI 14055 ANDL $0x0000001f, SI 14056 MOVQ $0x00000040, R8 14057 SUBQ SI, R8 14058 DECQ DI 14059 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 14060 LEAQ -32(CX)(R8*1), SI 14061 LEAQ -32(AX)(R8*1), R9 14062 14063emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: 14064 MOVOU (SI), X4 14065 MOVOU 16(SI), X5 14066 MOVOA X4, (R9) 14067 MOVOA X5, 16(R9) 14068 ADDQ $0x20, R9 14069 ADDQ $0x20, SI 14070 ADDQ $0x20, R8 14071 DECQ DI 14072 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back 14073 14074emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: 14075 MOVOU -32(CX)(R8*1), X4 14076 MOVOU -16(CX)(R8*1), X5 14077 MOVOA X4, -32(AX)(R8*1) 14078 MOVOA X5, -16(AX)(R8*1) 14079 ADDQ $0x20, R8 14080 CMPQ BX, R8 14081 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 14082 MOVOU X0, (AX) 14083 MOVOU X1, 16(AX) 14084 MOVOU X2, -32(AX)(BX*1) 14085 MOVOU X3, -16(AX)(BX*1) 14086 MOVQ DX, AX 14087 14088emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B: 14089 MOVQ dst_base+0(FP), CX 14090 SUBQ CX, AX 14091 MOVQ AX, ret+48(FP) 14092 RET 14093 14094// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int 14095// Requires: SSE2 14096TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56 14097 MOVQ dst_base+0(FP), AX 14098 MOVQ $0x000000a0, CX 14099 LEAQ 24(SP), DX 14100 PXOR X0, X0 14101 14102zero_loop_encodeSnappyBetterBlockAsm10B: 14103 MOVOU X0, (DX) 14104 MOVOU X0, 16(DX) 14105 MOVOU X0, 32(DX) 14106 MOVOU X0, 48(DX) 14107 MOVOU X0, 64(DX) 14108 MOVOU X0, 80(DX) 14109 MOVOU X0, 96(DX) 14110 MOVOU X0, 112(DX) 14111 ADDQ $0x80, DX 14112 DECQ CX 14113 JNZ zero_loop_encodeSnappyBetterBlockAsm10B 14114 MOVL $0x00000000, 12(SP) 14115 MOVQ src_len+32(FP), CX 14116 LEAQ -9(CX), DX 14117 LEAQ -8(CX), SI 14118 MOVL SI, 8(SP) 14119 SHRQ $0x05, CX 14120 SUBL CX, DX 14121 LEAQ (AX)(DX*1), DX 14122 MOVQ DX, (SP) 14123 MOVL $0x00000001, CX 14124 MOVL $0x00000000, 16(SP) 14125 MOVQ src_base+24(FP), DX 14126 14127search_loop_encodeSnappyBetterBlockAsm10B: 14128 MOVL CX, SI 14129 SUBL 12(SP), SI 14130 SHRL $0x05, SI 14131 LEAL 1(CX)(SI*1), SI 14132 CMPL SI, 8(SP) 14133 JGE emit_remainder_encodeSnappyBetterBlockAsm10B 14134 MOVQ (DX)(CX*1), DI 14135 MOVL SI, 20(SP) 14136 MOVQ $0x0000cf1bbcdcbf9b, R9 14137 MOVQ $0x9e3779b1, SI 14138 MOVQ DI, R10 14139 MOVQ DI, R11 14140 SHLQ $0x10, R10 14141 IMULQ R9, R10 14142 SHRQ $0x34, R10 14143 SHLQ $0x20, R11 14144 IMULQ SI, R11 14145 SHRQ $0x36, R11 14146 MOVL 24(SP)(R10*4), SI 14147 MOVL 16408(SP)(R11*4), R8 14148 MOVL CX, 24(SP)(R10*4) 14149 MOVL CX, 16408(SP)(R11*4) 14150 CMPL (DX)(SI*1), DI 14151 JEQ candidate_match_encodeSnappyBetterBlockAsm10B 14152 CMPL (DX)(R8*1), DI 14153 JEQ candidateS_match_encodeSnappyBetterBlockAsm10B 14154 MOVL 20(SP), CX 14155 JMP search_loop_encodeSnappyBetterBlockAsm10B 14156 14157candidateS_match_encodeSnappyBetterBlockAsm10B: 14158 SHRQ $0x08, DI 14159 MOVQ DI, R10 14160 SHLQ $0x10, R10 14161 IMULQ R9, R10 14162 SHRQ $0x34, R10 14163 MOVL 24(SP)(R10*4), SI 14164 INCL CX 14165 MOVL CX, 24(SP)(R10*4) 14166 CMPL (DX)(SI*1), DI 14167 JEQ candidate_match_encodeSnappyBetterBlockAsm10B 14168 DECL CX 14169 MOVL R8, SI 14170 14171candidate_match_encodeSnappyBetterBlockAsm10B: 14172 MOVL 12(SP), DI 14173 TESTL SI, SI 14174 JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B 14175 14176match_extend_back_loop_encodeSnappyBetterBlockAsm10B: 14177 CMPL CX, DI 14178 JLE match_extend_back_end_encodeSnappyBetterBlockAsm10B 14179 MOVB -1(DX)(SI*1), BL 14180 MOVB -1(DX)(CX*1), R8 14181 CMPB BL, R8 14182 JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B 14183 LEAL -1(CX), CX 14184 DECL SI 14185 JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B 14186 JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B 14187 14188match_extend_back_end_encodeSnappyBetterBlockAsm10B: 14189 MOVL CX, DI 14190 SUBL 12(SP), DI 14191 LEAQ 3(AX)(DI*1), DI 14192 CMPQ DI, (SP) 14193 JL match_dst_size_check_encodeSnappyBetterBlockAsm10B 14194 MOVQ $0x00000000, ret+48(FP) 14195 RET 14196 14197match_dst_size_check_encodeSnappyBetterBlockAsm10B: 14198 MOVL CX, DI 14199 ADDL $0x04, CX 14200 ADDL $0x04, SI 14201 MOVQ src_len+32(FP), R8 14202 SUBL CX, R8 14203 LEAQ (DX)(CX*1), R9 14204 LEAQ (DX)(SI*1), R10 14205 14206 // matchLen 14207 XORL R12, R12 14208 CMPL R8, $0x08 14209 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm10B 14210 14211matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: 14212 MOVQ (R9)(R12*1), R11 14213 XORQ (R10)(R12*1), R11 14214 TESTQ R11, R11 14215 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B 14216 BSFQ R11, R11 14217 SARQ $0x03, R11 14218 LEAL (R12)(R11*1), R12 14219 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B 14220 14221matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B: 14222 LEAL -8(R8), R8 14223 LEAL 8(R12), R12 14224 CMPL R8, $0x08 14225 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B 14226 14227matchlen_single_match_nolit_encodeSnappyBetterBlockAsm10B: 14228 TESTL R8, R8 14229 JZ match_nolit_end_encodeSnappyBetterBlockAsm10B 14230 14231matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: 14232 MOVB (R9)(R12*1), R11 14233 CMPB (R10)(R12*1), R11 14234 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B 14235 LEAL 1(R12), R12 14236 DECL R8 14237 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm10B 14238 14239match_nolit_end_encodeSnappyBetterBlockAsm10B: 14240 MOVL CX, R8 14241 SUBL SI, R8 14242 14243 // Check if repeat 14244 MOVL R8, 16(SP) 14245 MOVL 12(SP), SI 14246 CMPL SI, DI 14247 JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B 14248 MOVL DI, R9 14249 MOVL DI, 12(SP) 14250 LEAQ (DX)(SI*1), R10 14251 SUBL SI, R9 14252 LEAL -1(R9), SI 14253 CMPL SI, $0x3c 14254 JLT one_byte_match_emit_encodeSnappyBetterBlockAsm10B 14255 CMPL SI, $0x00000100 14256 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm10B 14257 MOVB $0xf4, (AX) 14258 MOVW SI, 1(AX) 14259 ADDQ $0x03, AX 14260 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B 14261 14262two_bytes_match_emit_encodeSnappyBetterBlockAsm10B: 14263 MOVB $0xf0, (AX) 14264 MOVB SI, 1(AX) 14265 ADDQ $0x02, AX 14266 CMPL SI, $0x40 14267 JL memmove_match_emit_encodeSnappyBetterBlockAsm10B 14268 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B 14269 14270one_byte_match_emit_encodeSnappyBetterBlockAsm10B: 14271 SHLB $0x02, SI 14272 MOVB SI, (AX) 14273 ADDQ $0x01, AX 14274 14275memmove_match_emit_encodeSnappyBetterBlockAsm10B: 14276 LEAQ (AX)(R9*1), SI 14277 14278 // genMemMoveShort 14279 CMPQ R9, $0x08 14280 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 14281 CMPQ R9, $0x10 14282 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 14283 CMPQ R9, $0x20 14284 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 14285 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 14286 14287emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8: 14288 MOVQ (R10), R11 14289 MOVQ R11, (AX) 14290 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B 14291 14292emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: 14293 MOVQ (R10), R11 14294 MOVQ -8(R10)(R9*1), R10 14295 MOVQ R11, (AX) 14296 MOVQ R10, -8(AX)(R9*1) 14297 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B 14298 14299emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: 14300 MOVOU (R10), X0 14301 MOVOU -16(R10)(R9*1), X1 14302 MOVOU X0, (AX) 14303 MOVOU X1, -16(AX)(R9*1) 14304 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B 14305 14306emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: 14307 MOVOU (R10), X0 14308 MOVOU 16(R10), X1 14309 MOVOU -32(R10)(R9*1), X2 14310 MOVOU -16(R10)(R9*1), X3 14311 MOVOU X0, (AX) 14312 MOVOU X1, 16(AX) 14313 MOVOU X2, -32(AX)(R9*1) 14314 MOVOU X3, -16(AX)(R9*1) 14315 14316memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B: 14317 MOVQ SI, AX 14318 JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B 14319 14320memmove_long_match_emit_encodeSnappyBetterBlockAsm10B: 14321 LEAQ (AX)(R9*1), SI 14322 14323 // genMemMoveLong 14324 MOVOU (R10), X0 14325 MOVOU 16(R10), X1 14326 MOVOU -32(R10)(R9*1), X2 14327 MOVOU -16(R10)(R9*1), X3 14328 MOVQ R9, R13 14329 SHRQ $0x05, R13 14330 MOVQ AX, R11 14331 ANDL $0x0000001f, R11 14332 MOVQ $0x00000040, R14 14333 SUBQ R11, R14 14334 DECQ R13 14335 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 14336 LEAQ -32(R10)(R14*1), R11 14337 LEAQ -32(AX)(R14*1), R15 14338 14339emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: 14340 MOVOU (R11), X4 14341 MOVOU 16(R11), X5 14342 MOVOA X4, (R15) 14343 MOVOA X5, 16(R15) 14344 ADDQ $0x20, R15 14345 ADDQ $0x20, R11 14346 ADDQ $0x20, R14 14347 DECQ R13 14348 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back 14349 14350emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: 14351 MOVOU -32(R10)(R14*1), X4 14352 MOVOU -16(R10)(R14*1), X5 14353 MOVOA X4, -32(AX)(R14*1) 14354 MOVOA X5, -16(AX)(R14*1) 14355 ADDQ $0x20, R14 14356 CMPQ R9, R14 14357 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 14358 MOVOU X0, (AX) 14359 MOVOU X1, 16(AX) 14360 MOVOU X2, -32(AX)(R9*1) 14361 MOVOU X3, -16(AX)(R9*1) 14362 MOVQ SI, AX 14363 14364emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B: 14365 ADDL R12, CX 14366 ADDL $0x04, R12 14367 MOVL CX, 12(SP) 14368 14369 // emitCopy 14370two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B: 14371 CMPL R12, $0x40 14372 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B 14373 MOVB $0xee, (AX) 14374 MOVW R8, 1(AX) 14375 LEAL -60(R12), R12 14376 ADDQ $0x03, AX 14377 JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B 14378 14379two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B: 14380 CMPL R12, $0x0c 14381 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B 14382 CMPL R8, $0x00000800 14383 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B 14384 MOVB $0x01, BL 14385 LEAL -16(BX)(R12*4), R12 14386 MOVB R8, 1(AX) 14387 SHRL $0x08, R8 14388 SHLL $0x05, R8 14389 ORL R8, R12 14390 MOVB R12, (AX) 14391 ADDQ $0x02, AX 14392 JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B 14393 14394emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B: 14395 MOVB $0x02, BL 14396 LEAL -4(BX)(R12*4), R12 14397 MOVB R12, (AX) 14398 MOVW R8, 1(AX) 14399 ADDQ $0x03, AX 14400 14401match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: 14402 CMPL CX, 8(SP) 14403 JGE emit_remainder_encodeSnappyBetterBlockAsm10B 14404 CMPQ AX, (SP) 14405 JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B 14406 MOVQ $0x00000000, ret+48(FP) 14407 RET 14408 14409match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: 14410 MOVQ $0x0000cf1bbcdcbf9b, SI 14411 MOVQ $0x9e3779b1, R8 14412 INCL DI 14413 MOVQ (DX)(DI*1), R9 14414 MOVQ R9, R10 14415 MOVQ R9, R11 14416 MOVQ R9, R12 14417 SHRQ $0x08, R11 14418 MOVQ R11, R13 14419 SHRQ $0x10, R12 14420 LEAL 1(DI), R14 14421 LEAL 2(DI), R15 14422 MOVQ -2(DX)(CX*1), R9 14423 SHLQ $0x10, R10 14424 IMULQ SI, R10 14425 SHRQ $0x34, R10 14426 SHLQ $0x10, R13 14427 IMULQ SI, R13 14428 SHRQ $0x34, R13 14429 SHLQ $0x20, R11 14430 IMULQ R8, R11 14431 SHRQ $0x36, R11 14432 SHLQ $0x20, R12 14433 IMULQ R8, R12 14434 SHRQ $0x36, R12 14435 MOVL DI, 24(SP)(R10*4) 14436 MOVL R14, 24(SP)(R13*4) 14437 MOVL R14, 16408(SP)(R11*4) 14438 MOVL R15, 16408(SP)(R12*4) 14439 MOVQ R9, R10 14440 MOVQ R9, R11 14441 SHRQ $0x08, R11 14442 MOVQ R11, R13 14443 LEAL -2(CX), R9 14444 LEAL -1(CX), DI 14445 SHLQ $0x10, R10 14446 IMULQ SI, R10 14447 SHRQ $0x34, R10 14448 SHLQ $0x20, R11 14449 IMULQ R8, R11 14450 SHRQ $0x36, R11 14451 SHLQ $0x10, R13 14452 IMULQ SI, R13 14453 SHRQ $0x34, R13 14454 MOVL R9, 24(SP)(R10*4) 14455 MOVL DI, 16408(SP)(R11*4) 14456 MOVL DI, 24(SP)(R13*4) 14457 JMP search_loop_encodeSnappyBetterBlockAsm10B 14458 14459emit_remainder_encodeSnappyBetterBlockAsm10B: 14460 MOVQ src_len+32(FP), CX 14461 SUBL 12(SP), CX 14462 LEAQ 3(AX)(CX*1), CX 14463 CMPQ CX, (SP) 14464 JL emit_remainder_ok_encodeSnappyBetterBlockAsm10B 14465 MOVQ $0x00000000, ret+48(FP) 14466 RET 14467 14468emit_remainder_ok_encodeSnappyBetterBlockAsm10B: 14469 MOVQ src_len+32(FP), CX 14470 MOVL 12(SP), BX 14471 CMPL BX, CX 14472 JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B 14473 MOVL CX, SI 14474 MOVL CX, 12(SP) 14475 LEAQ (DX)(BX*1), CX 14476 SUBL BX, SI 14477 LEAL -1(SI), DX 14478 CMPL DX, $0x3c 14479 JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B 14480 CMPL DX, $0x00000100 14481 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B 14482 MOVB $0xf4, (AX) 14483 MOVW DX, 1(AX) 14484 ADDQ $0x03, AX 14485 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B 14486 14487two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: 14488 MOVB $0xf0, (AX) 14489 MOVB DL, 1(AX) 14490 ADDQ $0x02, AX 14491 CMPL DX, $0x40 14492 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm10B 14493 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B 14494 14495one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B: 14496 SHLB $0x02, DL 14497 MOVB DL, (AX) 14498 ADDQ $0x01, AX 14499 14500memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: 14501 LEAQ (AX)(SI*1), DX 14502 MOVL SI, BX 14503 14504 // genMemMoveShort 14505 CMPQ BX, $0x08 14506 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8 14507 CMPQ BX, $0x10 14508 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 14509 CMPQ BX, $0x20 14510 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 14511 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 14512 14513emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8: 14514 MOVQ (CX), SI 14515 MOVQ SI, (AX) 14516 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B 14517 14518emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: 14519 MOVQ (CX), SI 14520 MOVQ -8(CX)(BX*1), CX 14521 MOVQ SI, (AX) 14522 MOVQ CX, -8(AX)(BX*1) 14523 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B 14524 14525emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: 14526 MOVOU (CX), X0 14527 MOVOU -16(CX)(BX*1), X1 14528 MOVOU X0, (AX) 14529 MOVOU X1, -16(AX)(BX*1) 14530 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B 14531 14532emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: 14533 MOVOU (CX), X0 14534 MOVOU 16(CX), X1 14535 MOVOU -32(CX)(BX*1), X2 14536 MOVOU -16(CX)(BX*1), X3 14537 MOVOU X0, (AX) 14538 MOVOU X1, 16(AX) 14539 MOVOU X2, -32(AX)(BX*1) 14540 MOVOU X3, -16(AX)(BX*1) 14541 14542memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B: 14543 MOVQ DX, AX 14544 JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B 14545 14546memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B: 14547 LEAQ (AX)(SI*1), DX 14548 MOVL SI, BX 14549 14550 // genMemMoveLong 14551 MOVOU (CX), X0 14552 MOVOU 16(CX), X1 14553 MOVOU -32(CX)(BX*1), X2 14554 MOVOU -16(CX)(BX*1), X3 14555 MOVQ BX, DI 14556 SHRQ $0x05, DI 14557 MOVQ AX, SI 14558 ANDL $0x0000001f, SI 14559 MOVQ $0x00000040, R8 14560 SUBQ SI, R8 14561 DECQ DI 14562 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 14563 LEAQ -32(CX)(R8*1), SI 14564 LEAQ -32(AX)(R8*1), R9 14565 14566emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: 14567 MOVOU (SI), X4 14568 MOVOU 16(SI), X5 14569 MOVOA X4, (R9) 14570 MOVOA X5, 16(R9) 14571 ADDQ $0x20, R9 14572 ADDQ $0x20, SI 14573 ADDQ $0x20, R8 14574 DECQ DI 14575 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back 14576 14577emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: 14578 MOVOU -32(CX)(R8*1), X4 14579 MOVOU -16(CX)(R8*1), X5 14580 MOVOA X4, -32(AX)(R8*1) 14581 MOVOA X5, -16(AX)(R8*1) 14582 ADDQ $0x20, R8 14583 CMPQ BX, R8 14584 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 14585 MOVOU X0, (AX) 14586 MOVOU X1, 16(AX) 14587 MOVOU X2, -32(AX)(BX*1) 14588 MOVOU X3, -16(AX)(BX*1) 14589 MOVQ DX, AX 14590 14591emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B: 14592 MOVQ dst_base+0(FP), CX 14593 SUBQ CX, AX 14594 MOVQ AX, ret+48(FP) 14595 RET 14596 14597// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int 14598// Requires: SSE2 14599TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56 14600 MOVQ dst_base+0(FP), AX 14601 MOVQ $0x00000028, CX 14602 LEAQ 24(SP), DX 14603 PXOR X0, X0 14604 14605zero_loop_encodeSnappyBetterBlockAsm8B: 14606 MOVOU X0, (DX) 14607 MOVOU X0, 16(DX) 14608 MOVOU X0, 32(DX) 14609 MOVOU X0, 48(DX) 14610 MOVOU X0, 64(DX) 14611 MOVOU X0, 80(DX) 14612 MOVOU X0, 96(DX) 14613 MOVOU X0, 112(DX) 14614 ADDQ $0x80, DX 14615 DECQ CX 14616 JNZ zero_loop_encodeSnappyBetterBlockAsm8B 14617 MOVL $0x00000000, 12(SP) 14618 MOVQ src_len+32(FP), CX 14619 LEAQ -9(CX), DX 14620 LEAQ -8(CX), SI 14621 MOVL SI, 8(SP) 14622 SHRQ $0x05, CX 14623 SUBL CX, DX 14624 LEAQ (AX)(DX*1), DX 14625 MOVQ DX, (SP) 14626 MOVL $0x00000001, CX 14627 MOVL $0x00000000, 16(SP) 14628 MOVQ src_base+24(FP), DX 14629 14630search_loop_encodeSnappyBetterBlockAsm8B: 14631 MOVL CX, SI 14632 SUBL 12(SP), SI 14633 SHRL $0x04, SI 14634 LEAL 1(CX)(SI*1), SI 14635 CMPL SI, 8(SP) 14636 JGE emit_remainder_encodeSnappyBetterBlockAsm8B 14637 MOVQ (DX)(CX*1), DI 14638 MOVL SI, 20(SP) 14639 MOVQ $0x0000cf1bbcdcbf9b, R9 14640 MOVQ $0x9e3779b1, SI 14641 MOVQ DI, R10 14642 MOVQ DI, R11 14643 SHLQ $0x10, R10 14644 IMULQ R9, R10 14645 SHRQ $0x36, R10 14646 SHLQ $0x20, R11 14647 IMULQ SI, R11 14648 SHRQ $0x38, R11 14649 MOVL 24(SP)(R10*4), SI 14650 MOVL 4120(SP)(R11*4), R8 14651 MOVL CX, 24(SP)(R10*4) 14652 MOVL CX, 4120(SP)(R11*4) 14653 CMPL (DX)(SI*1), DI 14654 JEQ candidate_match_encodeSnappyBetterBlockAsm8B 14655 CMPL (DX)(R8*1), DI 14656 JEQ candidateS_match_encodeSnappyBetterBlockAsm8B 14657 MOVL 20(SP), CX 14658 JMP search_loop_encodeSnappyBetterBlockAsm8B 14659 14660candidateS_match_encodeSnappyBetterBlockAsm8B: 14661 SHRQ $0x08, DI 14662 MOVQ DI, R10 14663 SHLQ $0x10, R10 14664 IMULQ R9, R10 14665 SHRQ $0x36, R10 14666 MOVL 24(SP)(R10*4), SI 14667 INCL CX 14668 MOVL CX, 24(SP)(R10*4) 14669 CMPL (DX)(SI*1), DI 14670 JEQ candidate_match_encodeSnappyBetterBlockAsm8B 14671 DECL CX 14672 MOVL R8, SI 14673 14674candidate_match_encodeSnappyBetterBlockAsm8B: 14675 MOVL 12(SP), DI 14676 TESTL SI, SI 14677 JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B 14678 14679match_extend_back_loop_encodeSnappyBetterBlockAsm8B: 14680 CMPL CX, DI 14681 JLE match_extend_back_end_encodeSnappyBetterBlockAsm8B 14682 MOVB -1(DX)(SI*1), BL 14683 MOVB -1(DX)(CX*1), R8 14684 CMPB BL, R8 14685 JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B 14686 LEAL -1(CX), CX 14687 DECL SI 14688 JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B 14689 JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B 14690 14691match_extend_back_end_encodeSnappyBetterBlockAsm8B: 14692 MOVL CX, DI 14693 SUBL 12(SP), DI 14694 LEAQ 3(AX)(DI*1), DI 14695 CMPQ DI, (SP) 14696 JL match_dst_size_check_encodeSnappyBetterBlockAsm8B 14697 MOVQ $0x00000000, ret+48(FP) 14698 RET 14699 14700match_dst_size_check_encodeSnappyBetterBlockAsm8B: 14701 MOVL CX, DI 14702 ADDL $0x04, CX 14703 ADDL $0x04, SI 14704 MOVQ src_len+32(FP), R8 14705 SUBL CX, R8 14706 LEAQ (DX)(CX*1), R9 14707 LEAQ (DX)(SI*1), R10 14708 14709 // matchLen 14710 XORL R12, R12 14711 CMPL R8, $0x08 14712 JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm8B 14713 14714matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: 14715 MOVQ (R9)(R12*1), R11 14716 XORQ (R10)(R12*1), R11 14717 TESTQ R11, R11 14718 JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B 14719 BSFQ R11, R11 14720 SARQ $0x03, R11 14721 LEAL (R12)(R11*1), R12 14722 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B 14723 14724matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B: 14725 LEAL -8(R8), R8 14726 LEAL 8(R12), R12 14727 CMPL R8, $0x08 14728 JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B 14729 14730matchlen_single_match_nolit_encodeSnappyBetterBlockAsm8B: 14731 TESTL R8, R8 14732 JZ match_nolit_end_encodeSnappyBetterBlockAsm8B 14733 14734matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: 14735 MOVB (R9)(R12*1), R11 14736 CMPB (R10)(R12*1), R11 14737 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B 14738 LEAL 1(R12), R12 14739 DECL R8 14740 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm8B 14741 14742match_nolit_end_encodeSnappyBetterBlockAsm8B: 14743 MOVL CX, R8 14744 SUBL SI, R8 14745 14746 // Check if repeat 14747 MOVL R8, 16(SP) 14748 MOVL 12(SP), SI 14749 CMPL SI, DI 14750 JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B 14751 MOVL DI, R9 14752 MOVL DI, 12(SP) 14753 LEAQ (DX)(SI*1), R10 14754 SUBL SI, R9 14755 LEAL -1(R9), SI 14756 CMPL SI, $0x3c 14757 JLT one_byte_match_emit_encodeSnappyBetterBlockAsm8B 14758 CMPL SI, $0x00000100 14759 JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm8B 14760 MOVB $0xf4, (AX) 14761 MOVW SI, 1(AX) 14762 ADDQ $0x03, AX 14763 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B 14764 14765two_bytes_match_emit_encodeSnappyBetterBlockAsm8B: 14766 MOVB $0xf0, (AX) 14767 MOVB SI, 1(AX) 14768 ADDQ $0x02, AX 14769 CMPL SI, $0x40 14770 JL memmove_match_emit_encodeSnappyBetterBlockAsm8B 14771 JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B 14772 14773one_byte_match_emit_encodeSnappyBetterBlockAsm8B: 14774 SHLB $0x02, SI 14775 MOVB SI, (AX) 14776 ADDQ $0x01, AX 14777 14778memmove_match_emit_encodeSnappyBetterBlockAsm8B: 14779 LEAQ (AX)(R9*1), SI 14780 14781 // genMemMoveShort 14782 CMPQ R9, $0x08 14783 JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 14784 CMPQ R9, $0x10 14785 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 14786 CMPQ R9, $0x20 14787 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 14788 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 14789 14790emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8: 14791 MOVQ (R10), R11 14792 MOVQ R11, (AX) 14793 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B 14794 14795emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: 14796 MOVQ (R10), R11 14797 MOVQ -8(R10)(R9*1), R10 14798 MOVQ R11, (AX) 14799 MOVQ R10, -8(AX)(R9*1) 14800 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B 14801 14802emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: 14803 MOVOU (R10), X0 14804 MOVOU -16(R10)(R9*1), X1 14805 MOVOU X0, (AX) 14806 MOVOU X1, -16(AX)(R9*1) 14807 JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B 14808 14809emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: 14810 MOVOU (R10), X0 14811 MOVOU 16(R10), X1 14812 MOVOU -32(R10)(R9*1), X2 14813 MOVOU -16(R10)(R9*1), X3 14814 MOVOU X0, (AX) 14815 MOVOU X1, 16(AX) 14816 MOVOU X2, -32(AX)(R9*1) 14817 MOVOU X3, -16(AX)(R9*1) 14818 14819memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B: 14820 MOVQ SI, AX 14821 JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B 14822 14823memmove_long_match_emit_encodeSnappyBetterBlockAsm8B: 14824 LEAQ (AX)(R9*1), SI 14825 14826 // genMemMoveLong 14827 MOVOU (R10), X0 14828 MOVOU 16(R10), X1 14829 MOVOU -32(R10)(R9*1), X2 14830 MOVOU -16(R10)(R9*1), X3 14831 MOVQ R9, R13 14832 SHRQ $0x05, R13 14833 MOVQ AX, R11 14834 ANDL $0x0000001f, R11 14835 MOVQ $0x00000040, R14 14836 SUBQ R11, R14 14837 DECQ R13 14838 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 14839 LEAQ -32(R10)(R14*1), R11 14840 LEAQ -32(AX)(R14*1), R15 14841 14842emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: 14843 MOVOU (R11), X4 14844 MOVOU 16(R11), X5 14845 MOVOA X4, (R15) 14846 MOVOA X5, 16(R15) 14847 ADDQ $0x20, R15 14848 ADDQ $0x20, R11 14849 ADDQ $0x20, R14 14850 DECQ R13 14851 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back 14852 14853emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: 14854 MOVOU -32(R10)(R14*1), X4 14855 MOVOU -16(R10)(R14*1), X5 14856 MOVOA X4, -32(AX)(R14*1) 14857 MOVOA X5, -16(AX)(R14*1) 14858 ADDQ $0x20, R14 14859 CMPQ R9, R14 14860 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 14861 MOVOU X0, (AX) 14862 MOVOU X1, 16(AX) 14863 MOVOU X2, -32(AX)(R9*1) 14864 MOVOU X3, -16(AX)(R9*1) 14865 MOVQ SI, AX 14866 14867emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B: 14868 ADDL R12, CX 14869 ADDL $0x04, R12 14870 MOVL CX, 12(SP) 14871 14872 // emitCopy 14873two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B: 14874 CMPL R12, $0x40 14875 JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B 14876 MOVB $0xee, (AX) 14877 MOVW R8, 1(AX) 14878 LEAL -60(R12), R12 14879 ADDQ $0x03, AX 14880 JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B 14881 14882two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B: 14883 CMPL R12, $0x0c 14884 JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B 14885 MOVB $0x01, BL 14886 LEAL -16(BX)(R12*4), R12 14887 MOVB R8, 1(AX) 14888 SHRL $0x08, R8 14889 SHLL $0x05, R8 14890 ORL R8, R12 14891 MOVB R12, (AX) 14892 ADDQ $0x02, AX 14893 JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B 14894 14895emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B: 14896 MOVB $0x02, BL 14897 LEAL -4(BX)(R12*4), R12 14898 MOVB R12, (AX) 14899 MOVW R8, 1(AX) 14900 ADDQ $0x03, AX 14901 14902match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: 14903 CMPL CX, 8(SP) 14904 JGE emit_remainder_encodeSnappyBetterBlockAsm8B 14905 CMPQ AX, (SP) 14906 JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B 14907 MOVQ $0x00000000, ret+48(FP) 14908 RET 14909 14910match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: 14911 MOVQ $0x0000cf1bbcdcbf9b, SI 14912 MOVQ $0x9e3779b1, R8 14913 INCL DI 14914 MOVQ (DX)(DI*1), R9 14915 MOVQ R9, R10 14916 MOVQ R9, R11 14917 MOVQ R9, R12 14918 SHRQ $0x08, R11 14919 MOVQ R11, R13 14920 SHRQ $0x10, R12 14921 LEAL 1(DI), R14 14922 LEAL 2(DI), R15 14923 MOVQ -2(DX)(CX*1), R9 14924 SHLQ $0x10, R10 14925 IMULQ SI, R10 14926 SHRQ $0x36, R10 14927 SHLQ $0x10, R13 14928 IMULQ SI, R13 14929 SHRQ $0x36, R13 14930 SHLQ $0x20, R11 14931 IMULQ R8, R11 14932 SHRQ $0x38, R11 14933 SHLQ $0x20, R12 14934 IMULQ R8, R12 14935 SHRQ $0x38, R12 14936 MOVL DI, 24(SP)(R10*4) 14937 MOVL R14, 24(SP)(R13*4) 14938 MOVL R14, 4120(SP)(R11*4) 14939 MOVL R15, 4120(SP)(R12*4) 14940 MOVQ R9, R10 14941 MOVQ R9, R11 14942 SHRQ $0x08, R11 14943 MOVQ R11, R13 14944 LEAL -2(CX), R9 14945 LEAL -1(CX), DI 14946 SHLQ $0x10, R10 14947 IMULQ SI, R10 14948 SHRQ $0x36, R10 14949 SHLQ $0x20, R11 14950 IMULQ R8, R11 14951 SHRQ $0x38, R11 14952 SHLQ $0x10, R13 14953 IMULQ SI, R13 14954 SHRQ $0x36, R13 14955 MOVL R9, 24(SP)(R10*4) 14956 MOVL DI, 4120(SP)(R11*4) 14957 MOVL DI, 24(SP)(R13*4) 14958 JMP search_loop_encodeSnappyBetterBlockAsm8B 14959 14960emit_remainder_encodeSnappyBetterBlockAsm8B: 14961 MOVQ src_len+32(FP), CX 14962 SUBL 12(SP), CX 14963 LEAQ 3(AX)(CX*1), CX 14964 CMPQ CX, (SP) 14965 JL emit_remainder_ok_encodeSnappyBetterBlockAsm8B 14966 MOVQ $0x00000000, ret+48(FP) 14967 RET 14968 14969emit_remainder_ok_encodeSnappyBetterBlockAsm8B: 14970 MOVQ src_len+32(FP), CX 14971 MOVL 12(SP), BX 14972 CMPL BX, CX 14973 JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B 14974 MOVL CX, SI 14975 MOVL CX, 12(SP) 14976 LEAQ (DX)(BX*1), CX 14977 SUBL BX, SI 14978 LEAL -1(SI), DX 14979 CMPL DX, $0x3c 14980 JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B 14981 CMPL DX, $0x00000100 14982 JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B 14983 MOVB $0xf4, (AX) 14984 MOVW DX, 1(AX) 14985 ADDQ $0x03, AX 14986 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B 14987 14988two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: 14989 MOVB $0xf0, (AX) 14990 MOVB DL, 1(AX) 14991 ADDQ $0x02, AX 14992 CMPL DX, $0x40 14993 JL memmove_emit_remainder_encodeSnappyBetterBlockAsm8B 14994 JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B 14995 14996one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B: 14997 SHLB $0x02, DL 14998 MOVB DL, (AX) 14999 ADDQ $0x01, AX 15000 15001memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: 15002 LEAQ (AX)(SI*1), DX 15003 MOVL SI, BX 15004 15005 // genMemMoveShort 15006 CMPQ BX, $0x08 15007 JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8 15008 CMPQ BX, $0x10 15009 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 15010 CMPQ BX, $0x20 15011 JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 15012 JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 15013 15014emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8: 15015 MOVQ (CX), SI 15016 MOVQ SI, (AX) 15017 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B 15018 15019emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: 15020 MOVQ (CX), SI 15021 MOVQ -8(CX)(BX*1), CX 15022 MOVQ SI, (AX) 15023 MOVQ CX, -8(AX)(BX*1) 15024 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B 15025 15026emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: 15027 MOVOU (CX), X0 15028 MOVOU -16(CX)(BX*1), X1 15029 MOVOU X0, (AX) 15030 MOVOU X1, -16(AX)(BX*1) 15031 JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B 15032 15033emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: 15034 MOVOU (CX), X0 15035 MOVOU 16(CX), X1 15036 MOVOU -32(CX)(BX*1), X2 15037 MOVOU -16(CX)(BX*1), X3 15038 MOVOU X0, (AX) 15039 MOVOU X1, 16(AX) 15040 MOVOU X2, -32(AX)(BX*1) 15041 MOVOU X3, -16(AX)(BX*1) 15042 15043memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B: 15044 MOVQ DX, AX 15045 JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B 15046 15047memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B: 15048 LEAQ (AX)(SI*1), DX 15049 MOVL SI, BX 15050 15051 // genMemMoveLong 15052 MOVOU (CX), X0 15053 MOVOU 16(CX), X1 15054 MOVOU -32(CX)(BX*1), X2 15055 MOVOU -16(CX)(BX*1), X3 15056 MOVQ BX, DI 15057 SHRQ $0x05, DI 15058 MOVQ AX, SI 15059 ANDL $0x0000001f, SI 15060 MOVQ $0x00000040, R8 15061 SUBQ SI, R8 15062 DECQ DI 15063 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 15064 LEAQ -32(CX)(R8*1), SI 15065 LEAQ -32(AX)(R8*1), R9 15066 15067emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: 15068 MOVOU (SI), X4 15069 MOVOU 16(SI), X5 15070 MOVOA X4, (R9) 15071 MOVOA X5, 16(R9) 15072 ADDQ $0x20, R9 15073 ADDQ $0x20, SI 15074 ADDQ $0x20, R8 15075 DECQ DI 15076 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back 15077 15078emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: 15079 MOVOU -32(CX)(R8*1), X4 15080 MOVOU -16(CX)(R8*1), X5 15081 MOVOA X4, -32(AX)(R8*1) 15082 MOVOA X5, -16(AX)(R8*1) 15083 ADDQ $0x20, R8 15084 CMPQ BX, R8 15085 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 15086 MOVOU X0, (AX) 15087 MOVOU X1, 16(AX) 15088 MOVOU X2, -32(AX)(BX*1) 15089 MOVOU X3, -16(AX)(BX*1) 15090 MOVQ DX, AX 15091 15092emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B: 15093 MOVQ dst_base+0(FP), CX 15094 SUBQ CX, AX 15095 MOVQ AX, ret+48(FP) 15096 RET 15097 15098// func emitLiteral(dst []byte, lit []byte) int 15099// Requires: SSE2 15100TEXT ·emitLiteral(SB), NOSPLIT, $0-56 15101 MOVQ lit_len+32(FP), DX 15102 MOVQ dst_base+0(FP), AX 15103 MOVQ lit_base+24(FP), CX 15104 TESTQ DX, DX 15105 JZ emit_literal_end_standalone_skip 15106 MOVL DX, BX 15107 LEAL -1(DX), SI 15108 CMPL SI, $0x3c 15109 JLT one_byte_standalone 15110 CMPL SI, $0x00000100 15111 JLT two_bytes_standalone 15112 CMPL SI, $0x00010000 15113 JLT three_bytes_standalone 15114 CMPL SI, $0x01000000 15115 JLT four_bytes_standalone 15116 MOVB $0xfc, (AX) 15117 MOVL SI, 1(AX) 15118 ADDQ $0x05, BX 15119 ADDQ $0x05, AX 15120 JMP memmove_long_standalone 15121 15122four_bytes_standalone: 15123 MOVL SI, DI 15124 SHRL $0x10, DI 15125 MOVB $0xf8, (AX) 15126 MOVW SI, 1(AX) 15127 MOVB DI, 3(AX) 15128 ADDQ $0x04, BX 15129 ADDQ $0x04, AX 15130 JMP memmove_long_standalone 15131 15132three_bytes_standalone: 15133 MOVB $0xf4, (AX) 15134 MOVW SI, 1(AX) 15135 ADDQ $0x03, BX 15136 ADDQ $0x03, AX 15137 JMP memmove_long_standalone 15138 15139two_bytes_standalone: 15140 MOVB $0xf0, (AX) 15141 MOVB SI, 1(AX) 15142 ADDQ $0x02, BX 15143 ADDQ $0x02, AX 15144 CMPL SI, $0x40 15145 JL memmove_standalone 15146 JMP memmove_long_standalone 15147 15148one_byte_standalone: 15149 SHLB $0x02, SI 15150 MOVB SI, (AX) 15151 ADDQ $0x01, BX 15152 ADDQ $0x01, AX 15153 15154memmove_standalone: 15155 // genMemMoveShort 15156 CMPQ DX, $0x03 15157 JB emit_lit_memmove_standalone_memmove_move_1or2 15158 JE emit_lit_memmove_standalone_memmove_move_3 15159 CMPQ DX, $0x08 15160 JB emit_lit_memmove_standalone_memmove_move_4through7 15161 CMPQ DX, $0x10 15162 JBE emit_lit_memmove_standalone_memmove_move_8through16 15163 CMPQ DX, $0x20 15164 JBE emit_lit_memmove_standalone_memmove_move_17through32 15165 JMP emit_lit_memmove_standalone_memmove_move_33through64 15166 15167emit_lit_memmove_standalone_memmove_move_1or2: 15168 MOVB (CX), SI 15169 MOVB -1(CX)(DX*1), CL 15170 MOVB SI, (AX) 15171 MOVB CL, -1(AX)(DX*1) 15172 JMP emit_literal_end_standalone 15173 15174emit_lit_memmove_standalone_memmove_move_3: 15175 MOVW (CX), SI 15176 MOVB 2(CX), CL 15177 MOVW SI, (AX) 15178 MOVB CL, 2(AX) 15179 JMP emit_literal_end_standalone 15180 15181emit_lit_memmove_standalone_memmove_move_4through7: 15182 MOVL (CX), SI 15183 MOVL -4(CX)(DX*1), CX 15184 MOVL SI, (AX) 15185 MOVL CX, -4(AX)(DX*1) 15186 JMP emit_literal_end_standalone 15187 15188emit_lit_memmove_standalone_memmove_move_8through16: 15189 MOVQ (CX), SI 15190 MOVQ -8(CX)(DX*1), CX 15191 MOVQ SI, (AX) 15192 MOVQ CX, -8(AX)(DX*1) 15193 JMP emit_literal_end_standalone 15194 15195emit_lit_memmove_standalone_memmove_move_17through32: 15196 MOVOU (CX), X0 15197 MOVOU -16(CX)(DX*1), X1 15198 MOVOU X0, (AX) 15199 MOVOU X1, -16(AX)(DX*1) 15200 JMP emit_literal_end_standalone 15201 15202emit_lit_memmove_standalone_memmove_move_33through64: 15203 MOVOU (CX), X0 15204 MOVOU 16(CX), X1 15205 MOVOU -32(CX)(DX*1), X2 15206 MOVOU -16(CX)(DX*1), X3 15207 MOVOU X0, (AX) 15208 MOVOU X1, 16(AX) 15209 MOVOU X2, -32(AX)(DX*1) 15210 MOVOU X3, -16(AX)(DX*1) 15211 JMP emit_literal_end_standalone 15212 JMP emit_literal_end_standalone 15213 15214memmove_long_standalone: 15215 // genMemMoveLong 15216 MOVOU (CX), X0 15217 MOVOU 16(CX), X1 15218 MOVOU -32(CX)(DX*1), X2 15219 MOVOU -16(CX)(DX*1), X3 15220 MOVQ DX, DI 15221 SHRQ $0x05, DI 15222 MOVQ AX, SI 15223 ANDL $0x0000001f, SI 15224 MOVQ $0x00000040, R8 15225 SUBQ SI, R8 15226 DECQ DI 15227 JA emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 15228 LEAQ -32(CX)(R8*1), SI 15229 LEAQ -32(AX)(R8*1), R9 15230 15231emit_lit_memmove_long_standalonelarge_big_loop_back: 15232 MOVOU (SI), X4 15233 MOVOU 16(SI), X5 15234 MOVOA X4, (R9) 15235 MOVOA X5, 16(R9) 15236 ADDQ $0x20, R9 15237 ADDQ $0x20, SI 15238 ADDQ $0x20, R8 15239 DECQ DI 15240 JNA emit_lit_memmove_long_standalonelarge_big_loop_back 15241 15242emit_lit_memmove_long_standalonelarge_forward_sse_loop_32: 15243 MOVOU -32(CX)(R8*1), X4 15244 MOVOU -16(CX)(R8*1), X5 15245 MOVOA X4, -32(AX)(R8*1) 15246 MOVOA X5, -16(AX)(R8*1) 15247 ADDQ $0x20, R8 15248 CMPQ DX, R8 15249 JAE emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 15250 MOVOU X0, (AX) 15251 MOVOU X1, 16(AX) 15252 MOVOU X2, -32(AX)(DX*1) 15253 MOVOU X3, -16(AX)(DX*1) 15254 JMP emit_literal_end_standalone 15255 JMP emit_literal_end_standalone 15256 15257emit_literal_end_standalone_skip: 15258 XORQ BX, BX 15259 15260emit_literal_end_standalone: 15261 MOVQ BX, ret+48(FP) 15262 RET 15263 15264// func emitRepeat(dst []byte, offset int, length int) int 15265TEXT ·emitRepeat(SB), NOSPLIT, $0-48 15266 XORQ BX, BX 15267 MOVQ dst_base+0(FP), AX 15268 MOVQ offset+24(FP), CX 15269 MOVQ length+32(FP), DX 15270 15271 // emitRepeat 15272emit_repeat_again_standalone: 15273 MOVL DX, SI 15274 LEAL -4(DX), DX 15275 CMPL SI, $0x08 15276 JLE repeat_two_standalone 15277 CMPL SI, $0x0c 15278 JGE cant_repeat_two_offset_standalone 15279 CMPL CX, $0x00000800 15280 JLT repeat_two_offset_standalone 15281 15282cant_repeat_two_offset_standalone: 15283 CMPL DX, $0x00000104 15284 JLT repeat_three_standalone 15285 CMPL DX, $0x00010100 15286 JLT repeat_four_standalone 15287 CMPL DX, $0x0100ffff 15288 JLT repeat_five_standalone 15289 LEAL -16842747(DX), DX 15290 MOVW $0x001d, (AX) 15291 MOVW $0xfffb, 2(AX) 15292 MOVB $0xff, 4(AX) 15293 ADDQ $0x05, AX 15294 ADDQ $0x05, BX 15295 JMP emit_repeat_again_standalone 15296 15297repeat_five_standalone: 15298 LEAL -65536(DX), DX 15299 MOVL DX, CX 15300 MOVW $0x001d, (AX) 15301 MOVW DX, 2(AX) 15302 SARL $0x10, CX 15303 MOVB CL, 4(AX) 15304 ADDQ $0x05, BX 15305 ADDQ $0x05, AX 15306 JMP gen_emit_repeat_end 15307 15308repeat_four_standalone: 15309 LEAL -256(DX), DX 15310 MOVW $0x0019, (AX) 15311 MOVW DX, 2(AX) 15312 ADDQ $0x04, BX 15313 ADDQ $0x04, AX 15314 JMP gen_emit_repeat_end 15315 15316repeat_three_standalone: 15317 LEAL -4(DX), DX 15318 MOVW $0x0015, (AX) 15319 MOVB DL, 2(AX) 15320 ADDQ $0x03, BX 15321 ADDQ $0x03, AX 15322 JMP gen_emit_repeat_end 15323 15324repeat_two_standalone: 15325 SHLL $0x02, DX 15326 ORL $0x01, DX 15327 MOVW DX, (AX) 15328 ADDQ $0x02, BX 15329 ADDQ $0x02, AX 15330 JMP gen_emit_repeat_end 15331 15332repeat_two_offset_standalone: 15333 XORQ SI, SI 15334 LEAL 1(SI)(DX*4), DX 15335 MOVB CL, 1(AX) 15336 SARL $0x08, CX 15337 SHLL $0x05, CX 15338 ORL CX, DX 15339 MOVB DL, (AX) 15340 ADDQ $0x02, BX 15341 ADDQ $0x02, AX 15342 15343gen_emit_repeat_end: 15344 MOVQ BX, ret+40(FP) 15345 RET 15346 15347// func emitCopy(dst []byte, offset int, length int) int 15348TEXT ·emitCopy(SB), NOSPLIT, $0-48 15349 XORQ BX, BX 15350 MOVQ dst_base+0(FP), AX 15351 MOVQ offset+24(FP), CX 15352 MOVQ length+32(FP), DX 15353 15354 // emitCopy 15355 CMPL CX, $0x00010000 15356 JL two_byte_offset_standalone 15357 15358four_bytes_loop_back_standalone: 15359 CMPL DX, $0x40 15360 JLE four_bytes_remain_standalone 15361 MOVB $0xff, (AX) 15362 MOVL CX, 1(AX) 15363 LEAL -64(DX), DX 15364 ADDQ $0x05, BX 15365 ADDQ $0x05, AX 15366 CMPL DX, $0x04 15367 JL four_bytes_remain_standalone 15368 15369 // emitRepeat 15370emit_repeat_again_standalone_emit_copy: 15371 MOVL DX, SI 15372 LEAL -4(DX), DX 15373 CMPL SI, $0x08 15374 JLE repeat_two_standalone_emit_copy 15375 CMPL SI, $0x0c 15376 JGE cant_repeat_two_offset_standalone_emit_copy 15377 CMPL CX, $0x00000800 15378 JLT repeat_two_offset_standalone_emit_copy 15379 15380cant_repeat_two_offset_standalone_emit_copy: 15381 CMPL DX, $0x00000104 15382 JLT repeat_three_standalone_emit_copy 15383 CMPL DX, $0x00010100 15384 JLT repeat_four_standalone_emit_copy 15385 CMPL DX, $0x0100ffff 15386 JLT repeat_five_standalone_emit_copy 15387 LEAL -16842747(DX), DX 15388 MOVW $0x001d, (AX) 15389 MOVW $0xfffb, 2(AX) 15390 MOVB $0xff, 4(AX) 15391 ADDQ $0x05, AX 15392 ADDQ $0x05, BX 15393 JMP emit_repeat_again_standalone_emit_copy 15394 15395repeat_five_standalone_emit_copy: 15396 LEAL -65536(DX), DX 15397 MOVL DX, CX 15398 MOVW $0x001d, (AX) 15399 MOVW DX, 2(AX) 15400 SARL $0x10, CX 15401 MOVB CL, 4(AX) 15402 ADDQ $0x05, BX 15403 ADDQ $0x05, AX 15404 JMP gen_emit_copy_end 15405 15406repeat_four_standalone_emit_copy: 15407 LEAL -256(DX), DX 15408 MOVW $0x0019, (AX) 15409 MOVW DX, 2(AX) 15410 ADDQ $0x04, BX 15411 ADDQ $0x04, AX 15412 JMP gen_emit_copy_end 15413 15414repeat_three_standalone_emit_copy: 15415 LEAL -4(DX), DX 15416 MOVW $0x0015, (AX) 15417 MOVB DL, 2(AX) 15418 ADDQ $0x03, BX 15419 ADDQ $0x03, AX 15420 JMP gen_emit_copy_end 15421 15422repeat_two_standalone_emit_copy: 15423 SHLL $0x02, DX 15424 ORL $0x01, DX 15425 MOVW DX, (AX) 15426 ADDQ $0x02, BX 15427 ADDQ $0x02, AX 15428 JMP gen_emit_copy_end 15429 15430repeat_two_offset_standalone_emit_copy: 15431 XORQ SI, SI 15432 LEAL 1(SI)(DX*4), DX 15433 MOVB CL, 1(AX) 15434 SARL $0x08, CX 15435 SHLL $0x05, CX 15436 ORL CX, DX 15437 MOVB DL, (AX) 15438 ADDQ $0x02, BX 15439 ADDQ $0x02, AX 15440 JMP gen_emit_copy_end 15441 JMP four_bytes_loop_back_standalone 15442 15443four_bytes_remain_standalone: 15444 TESTL DX, DX 15445 JZ gen_emit_copy_end 15446 MOVB $0x03, SI 15447 LEAL -4(SI)(DX*4), DX 15448 MOVB DL, (AX) 15449 MOVL CX, 1(AX) 15450 ADDQ $0x05, BX 15451 ADDQ $0x05, AX 15452 JMP gen_emit_copy_end 15453 15454two_byte_offset_standalone: 15455 CMPL DX, $0x40 15456 JLE two_byte_offset_short_standalone 15457 MOVB $0xee, (AX) 15458 MOVW CX, 1(AX) 15459 LEAL -60(DX), DX 15460 ADDQ $0x03, AX 15461 ADDQ $0x03, BX 15462 15463 // emitRepeat 15464emit_repeat_again_standalone_emit_copy_short: 15465 MOVL DX, SI 15466 LEAL -4(DX), DX 15467 CMPL SI, $0x08 15468 JLE repeat_two_standalone_emit_copy_short 15469 CMPL SI, $0x0c 15470 JGE cant_repeat_two_offset_standalone_emit_copy_short 15471 CMPL CX, $0x00000800 15472 JLT repeat_two_offset_standalone_emit_copy_short 15473 15474cant_repeat_two_offset_standalone_emit_copy_short: 15475 CMPL DX, $0x00000104 15476 JLT repeat_three_standalone_emit_copy_short 15477 CMPL DX, $0x00010100 15478 JLT repeat_four_standalone_emit_copy_short 15479 CMPL DX, $0x0100ffff 15480 JLT repeat_five_standalone_emit_copy_short 15481 LEAL -16842747(DX), DX 15482 MOVW $0x001d, (AX) 15483 MOVW $0xfffb, 2(AX) 15484 MOVB $0xff, 4(AX) 15485 ADDQ $0x05, AX 15486 ADDQ $0x05, BX 15487 JMP emit_repeat_again_standalone_emit_copy_short 15488 15489repeat_five_standalone_emit_copy_short: 15490 LEAL -65536(DX), DX 15491 MOVL DX, CX 15492 MOVW $0x001d, (AX) 15493 MOVW DX, 2(AX) 15494 SARL $0x10, CX 15495 MOVB CL, 4(AX) 15496 ADDQ $0x05, BX 15497 ADDQ $0x05, AX 15498 JMP gen_emit_copy_end 15499 15500repeat_four_standalone_emit_copy_short: 15501 LEAL -256(DX), DX 15502 MOVW $0x0019, (AX) 15503 MOVW DX, 2(AX) 15504 ADDQ $0x04, BX 15505 ADDQ $0x04, AX 15506 JMP gen_emit_copy_end 15507 15508repeat_three_standalone_emit_copy_short: 15509 LEAL -4(DX), DX 15510 MOVW $0x0015, (AX) 15511 MOVB DL, 2(AX) 15512 ADDQ $0x03, BX 15513 ADDQ $0x03, AX 15514 JMP gen_emit_copy_end 15515 15516repeat_two_standalone_emit_copy_short: 15517 SHLL $0x02, DX 15518 ORL $0x01, DX 15519 MOVW DX, (AX) 15520 ADDQ $0x02, BX 15521 ADDQ $0x02, AX 15522 JMP gen_emit_copy_end 15523 15524repeat_two_offset_standalone_emit_copy_short: 15525 XORQ SI, SI 15526 LEAL 1(SI)(DX*4), DX 15527 MOVB CL, 1(AX) 15528 SARL $0x08, CX 15529 SHLL $0x05, CX 15530 ORL CX, DX 15531 MOVB DL, (AX) 15532 ADDQ $0x02, BX 15533 ADDQ $0x02, AX 15534 JMP gen_emit_copy_end 15535 JMP two_byte_offset_standalone 15536 15537two_byte_offset_short_standalone: 15538 CMPL DX, $0x0c 15539 JGE emit_copy_three_standalone 15540 CMPL CX, $0x00000800 15541 JGE emit_copy_three_standalone 15542 MOVB $0x01, SI 15543 LEAL -16(SI)(DX*4), DX 15544 MOVB CL, 1(AX) 15545 SHRL $0x08, CX 15546 SHLL $0x05, CX 15547 ORL CX, DX 15548 MOVB DL, (AX) 15549 ADDQ $0x02, BX 15550 ADDQ $0x02, AX 15551 JMP gen_emit_copy_end 15552 15553emit_copy_three_standalone: 15554 MOVB $0x02, SI 15555 LEAL -4(SI)(DX*4), DX 15556 MOVB DL, (AX) 15557 MOVW CX, 1(AX) 15558 ADDQ $0x03, BX 15559 ADDQ $0x03, AX 15560 15561gen_emit_copy_end: 15562 MOVQ BX, ret+40(FP) 15563 RET 15564 15565// func emitCopyNoRepeat(dst []byte, offset int, length int) int 15566TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48 15567 XORQ BX, BX 15568 MOVQ dst_base+0(FP), AX 15569 MOVQ offset+24(FP), CX 15570 MOVQ length+32(FP), DX 15571 15572 // emitCopy 15573 CMPL CX, $0x00010000 15574 JL two_byte_offset_standalone_snappy 15575 15576four_bytes_loop_back_standalone_snappy: 15577 CMPL DX, $0x40 15578 JLE four_bytes_remain_standalone_snappy 15579 MOVB $0xff, (AX) 15580 MOVL CX, 1(AX) 15581 LEAL -64(DX), DX 15582 ADDQ $0x05, BX 15583 ADDQ $0x05, AX 15584 CMPL DX, $0x04 15585 JL four_bytes_remain_standalone_snappy 15586 JMP four_bytes_loop_back_standalone_snappy 15587 15588four_bytes_remain_standalone_snappy: 15589 TESTL DX, DX 15590 JZ gen_emit_copy_end_snappy 15591 MOVB $0x03, SI 15592 LEAL -4(SI)(DX*4), DX 15593 MOVB DL, (AX) 15594 MOVL CX, 1(AX) 15595 ADDQ $0x05, BX 15596 ADDQ $0x05, AX 15597 JMP gen_emit_copy_end_snappy 15598 15599two_byte_offset_standalone_snappy: 15600 CMPL DX, $0x40 15601 JLE two_byte_offset_short_standalone_snappy 15602 MOVB $0xee, (AX) 15603 MOVW CX, 1(AX) 15604 LEAL -60(DX), DX 15605 ADDQ $0x03, AX 15606 ADDQ $0x03, BX 15607 JMP two_byte_offset_standalone_snappy 15608 15609two_byte_offset_short_standalone_snappy: 15610 CMPL DX, $0x0c 15611 JGE emit_copy_three_standalone_snappy 15612 CMPL CX, $0x00000800 15613 JGE emit_copy_three_standalone_snappy 15614 MOVB $0x01, SI 15615 LEAL -16(SI)(DX*4), DX 15616 MOVB CL, 1(AX) 15617 SHRL $0x08, CX 15618 SHLL $0x05, CX 15619 ORL CX, DX 15620 MOVB DL, (AX) 15621 ADDQ $0x02, BX 15622 ADDQ $0x02, AX 15623 JMP gen_emit_copy_end_snappy 15624 15625emit_copy_three_standalone_snappy: 15626 MOVB $0x02, SI 15627 LEAL -4(SI)(DX*4), DX 15628 MOVB DL, (AX) 15629 MOVW CX, 1(AX) 15630 ADDQ $0x03, BX 15631 ADDQ $0x03, AX 15632 15633gen_emit_copy_end_snappy: 15634 MOVQ BX, ret+40(FP) 15635 RET 15636 15637// func matchLen(a []byte, b []byte) int 15638TEXT ·matchLen(SB), NOSPLIT, $0-56 15639 MOVQ a_base+0(FP), AX 15640 MOVQ b_base+24(FP), CX 15641 MOVQ a_len+8(FP), DX 15642 15643 // matchLen 15644 XORL SI, SI 15645 CMPL DX, $0x08 15646 JL matchlen_single_standalone 15647 15648matchlen_loopback_standalone: 15649 MOVQ (AX)(SI*1), BX 15650 XORQ (CX)(SI*1), BX 15651 TESTQ BX, BX 15652 JZ matchlen_loop_standalone 15653 BSFQ BX, BX 15654 SARQ $0x03, BX 15655 LEAL (SI)(BX*1), SI 15656 JMP gen_match_len_end 15657 15658matchlen_loop_standalone: 15659 LEAL -8(DX), DX 15660 LEAL 8(SI), SI 15661 CMPL DX, $0x08 15662 JGE matchlen_loopback_standalone 15663 15664matchlen_single_standalone: 15665 TESTL DX, DX 15666 JZ gen_match_len_end 15667 15668matchlen_single_loopback_standalone: 15669 MOVB (AX)(SI*1), BL 15670 CMPB (CX)(SI*1), BL 15671 JNE gen_match_len_end 15672 LEAL 1(SI), SI 15673 DECL DX 15674 JNZ matchlen_single_loopback_standalone 15675 15676gen_match_len_end: 15677 MOVQ SI, ret+48(FP) 15678 RET 15679