1// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. 2 3// +build !appengine 4// +build !noasm 5// +build gc 6 7#include "textflag.h" 8 9// func encodeBlockAsm(dst []byte, src []byte) int 10// Requires: SSE2 11TEXT ·encodeBlockAsm(SB), $65560-56 12 MOVQ dst_base+0(FP), AX 13 MOVQ $0x00000200, CX 14 LEAQ 24(SP), DX 15 PXOR X0, X0 16 17zero_loop_encodeBlockAsm: 18 MOVOU X0, (DX) 19 MOVOU X0, 16(DX) 20 MOVOU X0, 32(DX) 21 MOVOU X0, 48(DX) 22 MOVOU X0, 64(DX) 23 MOVOU X0, 80(DX) 24 MOVOU X0, 96(DX) 25 MOVOU X0, 112(DX) 26 ADDQ $0x80, DX 27 DECQ CX 28 JNZ zero_loop_encodeBlockAsm 29 MOVL $0x00000000, 12(SP) 30 MOVQ src_len+32(FP), CX 31 LEAQ -5(CX), DX 32 LEAQ -8(CX), SI 33 MOVL SI, 8(SP) 34 SHRQ $0x05, CX 35 SUBL CX, DX 36 LEAQ (AX)(DX*1), DX 37 MOVQ DX, (SP) 38 MOVL $0x00000001, CX 39 MOVL CX, 16(SP) 40 MOVQ src_base+24(FP), DX 41 42search_loop_encodeBlockAsm: 43 MOVL CX, SI 44 SUBL 12(SP), SI 45 SHRL $0x06, SI 46 LEAL 4(CX)(SI*1), SI 47 CMPL SI, 8(SP) 48 JGE emit_remainder_encodeBlockAsm 49 MOVQ (DX)(CX*1), DI 50 MOVL SI, 20(SP) 51 MOVQ $0x0000cf1bbcdcbf9b, R9 52 MOVQ DI, R10 53 MOVQ DI, R11 54 SHRQ $0x08, R11 55 SHLQ $0x10, R10 56 IMULQ R9, R10 57 SHRQ $0x32, R10 58 SHLQ $0x10, R11 59 IMULQ R9, R11 60 SHRQ $0x32, R11 61 MOVL 24(SP)(R10*4), SI 62 MOVL 24(SP)(R11*4), R8 63 MOVL CX, 24(SP)(R10*4) 64 LEAL 1(CX), R10 65 MOVL R10, 24(SP)(R11*4) 66 MOVQ DI, R10 67 SHRQ $0x10, R10 68 SHLQ $0x10, R10 69 IMULQ R9, R10 70 SHRQ $0x32, R10 71 MOVL CX, R9 72 SUBL 16(SP), R9 73 MOVL 1(DX)(R9*1), R11 74 MOVQ DI, R9 75 SHRQ $0x08, R9 76 CMPL R9, R11 77 JNE no_repeat_found_encodeBlockAsm 78 LEAL 1(CX), DI 79 MOVL 12(SP), R8 80 MOVL DI, SI 81 SUBL 16(SP), SI 82 JZ repeat_extend_back_end_encodeBlockAsm 83 84repeat_extend_back_loop_encodeBlockAsm: 85 CMPL DI, R8 86 JLE repeat_extend_back_end_encodeBlockAsm 87 MOVB -1(DX)(SI*1), BL 88 MOVB -1(DX)(DI*1), R9 89 CMPB BL, R9 90 JNE repeat_extend_back_end_encodeBlockAsm 91 LEAL -1(DI), DI 92 DECL SI 93 JNZ repeat_extend_back_loop_encodeBlockAsm 94 95repeat_extend_back_end_encodeBlockAsm: 96 MOVL 12(SP), SI 97 CMPL SI, DI 98 JEQ emit_literal_done_repeat_emit_encodeBlockAsm 99 MOVL DI, R9 100 MOVL DI, 12(SP) 101 LEAQ (DX)(SI*1), R10 102 SUBL SI, R9 103 LEAL -1(R9), SI 104 CMPL SI, $0x3c 105 JLT one_byte_repeat_emit_encodeBlockAsm 106 CMPL SI, $0x00000100 107 JLT two_bytes_repeat_emit_encodeBlockAsm 108 CMPL SI, $0x00010000 109 JLT three_bytes_repeat_emit_encodeBlockAsm 110 CMPL SI, $0x01000000 111 JLT four_bytes_repeat_emit_encodeBlockAsm 112 MOVB $0xfc, (AX) 113 MOVL SI, 1(AX) 114 ADDQ $0x05, AX 115 JMP memmove_long_repeat_emit_encodeBlockAsm 116 117four_bytes_repeat_emit_encodeBlockAsm: 118 MOVL SI, R11 119 SHRL $0x10, R11 120 MOVB $0xf8, (AX) 121 MOVW SI, 1(AX) 122 MOVB R11, 3(AX) 123 ADDQ $0x04, AX 124 JMP memmove_long_repeat_emit_encodeBlockAsm 125 126three_bytes_repeat_emit_encodeBlockAsm: 127 MOVB $0xf4, (AX) 128 MOVW SI, 1(AX) 129 ADDQ $0x03, AX 130 JMP memmove_long_repeat_emit_encodeBlockAsm 131 132two_bytes_repeat_emit_encodeBlockAsm: 133 MOVB $0xf0, (AX) 134 MOVB SI, 1(AX) 135 ADDQ $0x02, AX 136 CMPL SI, $0x40 137 JL memmove_repeat_emit_encodeBlockAsm 138 JMP memmove_long_repeat_emit_encodeBlockAsm 139 140one_byte_repeat_emit_encodeBlockAsm: 141 SHLB $0x02, SI 142 MOVB SI, (AX) 143 ADDQ $0x01, AX 144 145memmove_repeat_emit_encodeBlockAsm: 146 LEAQ (AX)(R9*1), SI 147 148 // genMemMoveShort 149 CMPQ R9, $0x03 150 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2 151 JE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3 152 CMPQ R9, $0x08 153 JB emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7 154 CMPQ R9, $0x10 155 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 156 CMPQ R9, $0x20 157 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 158 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 159 160emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2: 161 MOVB (R10), R11 162 MOVB -1(R10)(R9*1), R10 163 MOVB R11, (AX) 164 MOVB R10, -1(AX)(R9*1) 165 JMP memmove_end_copy_repeat_emit_encodeBlockAsm 166 167emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3: 168 MOVW (R10), R11 169 MOVB 2(R10), R10 170 MOVW R11, (AX) 171 MOVB R10, 2(AX) 172 JMP memmove_end_copy_repeat_emit_encodeBlockAsm 173 174emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7: 175 MOVL (R10), R11 176 MOVL -4(R10)(R9*1), R10 177 MOVL R11, (AX) 178 MOVL R10, -4(AX)(R9*1) 179 JMP memmove_end_copy_repeat_emit_encodeBlockAsm 180 181emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: 182 MOVQ (R10), R11 183 MOVQ -8(R10)(R9*1), R10 184 MOVQ R11, (AX) 185 MOVQ R10, -8(AX)(R9*1) 186 JMP memmove_end_copy_repeat_emit_encodeBlockAsm 187 188emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: 189 MOVOU (R10), X0 190 MOVOU -16(R10)(R9*1), X1 191 MOVOU X0, (AX) 192 MOVOU X1, -16(AX)(R9*1) 193 JMP memmove_end_copy_repeat_emit_encodeBlockAsm 194 195emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: 196 MOVOU (R10), X0 197 MOVOU 16(R10), X1 198 MOVOU -32(R10)(R9*1), X2 199 MOVOU -16(R10)(R9*1), X3 200 MOVOU X0, (AX) 201 MOVOU X1, 16(AX) 202 MOVOU X2, -32(AX)(R9*1) 203 MOVOU X3, -16(AX)(R9*1) 204 205memmove_end_copy_repeat_emit_encodeBlockAsm: 206 MOVQ SI, AX 207 JMP emit_literal_done_repeat_emit_encodeBlockAsm 208 209memmove_long_repeat_emit_encodeBlockAsm: 210 LEAQ (AX)(R9*1), SI 211 212 // genMemMoveLong 213 MOVOU (R10), X0 214 MOVOU 16(R10), X1 215 MOVOU -32(R10)(R9*1), X2 216 MOVOU -16(R10)(R9*1), X3 217 MOVQ R9, R12 218 SHRQ $0x05, R12 219 MOVQ AX, R11 220 ANDL $0x0000001f, R11 221 MOVQ $0x00000040, R13 222 SUBQ R11, R13 223 DECQ R12 224 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 225 LEAQ -32(R10)(R13*1), R11 226 LEAQ -32(AX)(R13*1), R14 227 228emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: 229 MOVOU (R11), X4 230 MOVOU 16(R11), X5 231 MOVOA X4, (R14) 232 MOVOA X5, 16(R14) 233 ADDQ $0x20, R14 234 ADDQ $0x20, R11 235 ADDQ $0x20, R13 236 DECQ R12 237 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back 238 239emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: 240 MOVOU -32(R10)(R13*1), X4 241 MOVOU -16(R10)(R13*1), X5 242 MOVOA X4, -32(AX)(R13*1) 243 MOVOA X5, -16(AX)(R13*1) 244 ADDQ $0x20, R13 245 CMPQ R9, R13 246 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 247 MOVOU X0, (AX) 248 MOVOU X1, 16(AX) 249 MOVOU X2, -32(AX)(R9*1) 250 MOVOU X3, -16(AX)(R9*1) 251 MOVQ SI, AX 252 253emit_literal_done_repeat_emit_encodeBlockAsm: 254 ADDL $0x05, CX 255 MOVL CX, SI 256 SUBL 16(SP), SI 257 MOVQ src_len+32(FP), R9 258 SUBL CX, R9 259 LEAQ (DX)(CX*1), R10 260 LEAQ (DX)(SI*1), SI 261 262 // matchLen 263 XORL R12, R12 264 CMPL R9, $0x08 265 JL matchlen_single_repeat_extend_encodeBlockAsm 266 267matchlen_loopback_repeat_extend_encodeBlockAsm: 268 MOVQ (R10)(R12*1), R11 269 XORQ (SI)(R12*1), R11 270 TESTQ R11, R11 271 JZ matchlen_loop_repeat_extend_encodeBlockAsm 272 BSFQ R11, R11 273 SARQ $0x03, R11 274 LEAL (R12)(R11*1), R12 275 JMP repeat_extend_forward_end_encodeBlockAsm 276 277matchlen_loop_repeat_extend_encodeBlockAsm: 278 LEAL -8(R9), R9 279 LEAL 8(R12), R12 280 CMPL R9, $0x08 281 JGE matchlen_loopback_repeat_extend_encodeBlockAsm 282 283matchlen_single_repeat_extend_encodeBlockAsm: 284 TESTL R9, R9 285 JZ repeat_extend_forward_end_encodeBlockAsm 286 287matchlen_single_loopback_repeat_extend_encodeBlockAsm: 288 MOVB (R10)(R12*1), R11 289 CMPB (SI)(R12*1), R11 290 JNE repeat_extend_forward_end_encodeBlockAsm 291 LEAL 1(R12), R12 292 DECL R9 293 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm 294 295repeat_extend_forward_end_encodeBlockAsm: 296 ADDL R12, CX 297 MOVL CX, SI 298 SUBL DI, SI 299 MOVL 16(SP), DI 300 TESTL R8, R8 301 JZ repeat_as_copy_encodeBlockAsm 302 303 // emitRepeat 304emit_repeat_again_match_repeat_encodeBlockAsm: 305 MOVL SI, R8 306 LEAL -4(SI), SI 307 CMPL R8, $0x08 308 JLE repeat_two_match_repeat_encodeBlockAsm 309 CMPL R8, $0x0c 310 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm 311 CMPL DI, $0x00000800 312 JLT repeat_two_offset_match_repeat_encodeBlockAsm 313 314cant_repeat_two_offset_match_repeat_encodeBlockAsm: 315 CMPL SI, $0x00000104 316 JLT repeat_three_match_repeat_encodeBlockAsm 317 CMPL SI, $0x00010100 318 JLT repeat_four_match_repeat_encodeBlockAsm 319 CMPL SI, $0x0100ffff 320 JLT repeat_five_match_repeat_encodeBlockAsm 321 LEAL -16842747(SI), SI 322 MOVW $0x001d, (AX) 323 MOVW $0xfffb, 2(AX) 324 MOVB $0xff, 4(AX) 325 ADDQ $0x05, AX 326 JMP emit_repeat_again_match_repeat_encodeBlockAsm 327 328repeat_five_match_repeat_encodeBlockAsm: 329 LEAL -65536(SI), SI 330 MOVL SI, DI 331 MOVW $0x001d, (AX) 332 MOVW SI, 2(AX) 333 SARL $0x10, DI 334 MOVB DI, 4(AX) 335 ADDQ $0x05, AX 336 JMP repeat_end_emit_encodeBlockAsm 337 338repeat_four_match_repeat_encodeBlockAsm: 339 LEAL -256(SI), SI 340 MOVW $0x0019, (AX) 341 MOVW SI, 2(AX) 342 ADDQ $0x04, AX 343 JMP repeat_end_emit_encodeBlockAsm 344 345repeat_three_match_repeat_encodeBlockAsm: 346 LEAL -4(SI), SI 347 MOVW $0x0015, (AX) 348 MOVB SI, 2(AX) 349 ADDQ $0x03, AX 350 JMP repeat_end_emit_encodeBlockAsm 351 352repeat_two_match_repeat_encodeBlockAsm: 353 SHLL $0x02, SI 354 ORL $0x01, SI 355 MOVW SI, (AX) 356 ADDQ $0x02, AX 357 JMP repeat_end_emit_encodeBlockAsm 358 359repeat_two_offset_match_repeat_encodeBlockAsm: 360 XORQ R8, R8 361 LEAL 1(R8)(SI*4), SI 362 MOVB DI, 1(AX) 363 SARL $0x08, DI 364 SHLL $0x05, DI 365 ORL DI, SI 366 MOVB SI, (AX) 367 ADDQ $0x02, AX 368 JMP repeat_end_emit_encodeBlockAsm 369 370repeat_as_copy_encodeBlockAsm: 371 // emitCopy 372 CMPL DI, $0x00010000 373 JL two_byte_offset_repeat_as_copy_encodeBlockAsm 374 375four_bytes_loop_back_repeat_as_copy_encodeBlockAsm: 376 CMPL SI, $0x40 377 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm 378 MOVB $0xff, (AX) 379 MOVL DI, 1(AX) 380 LEAL -64(SI), SI 381 ADDQ $0x05, AX 382 CMPL SI, $0x04 383 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm 384 385 // emitRepeat 386emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: 387 MOVL SI, R8 388 LEAL -4(SI), SI 389 CMPL R8, $0x08 390 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy 391 CMPL R8, $0x0c 392 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy 393 CMPL DI, $0x00000800 394 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy 395 396cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: 397 CMPL SI, $0x00000104 398 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy 399 CMPL SI, $0x00010100 400 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy 401 CMPL SI, $0x0100ffff 402 JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy 403 LEAL -16842747(SI), SI 404 MOVW $0x001d, (AX) 405 MOVW $0xfffb, 2(AX) 406 MOVB $0xff, 4(AX) 407 ADDQ $0x05, AX 408 JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy 409 410repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: 411 LEAL -65536(SI), SI 412 MOVL SI, DI 413 MOVW $0x001d, (AX) 414 MOVW SI, 2(AX) 415 SARL $0x10, DI 416 MOVB DI, 4(AX) 417 ADDQ $0x05, AX 418 JMP repeat_end_emit_encodeBlockAsm 419 420repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: 421 LEAL -256(SI), SI 422 MOVW $0x0019, (AX) 423 MOVW SI, 2(AX) 424 ADDQ $0x04, AX 425 JMP repeat_end_emit_encodeBlockAsm 426 427repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: 428 LEAL -4(SI), SI 429 MOVW $0x0015, (AX) 430 MOVB SI, 2(AX) 431 ADDQ $0x03, AX 432 JMP repeat_end_emit_encodeBlockAsm 433 434repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: 435 SHLL $0x02, SI 436 ORL $0x01, SI 437 MOVW SI, (AX) 438 ADDQ $0x02, AX 439 JMP repeat_end_emit_encodeBlockAsm 440 441repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: 442 XORQ R8, R8 443 LEAL 1(R8)(SI*4), SI 444 MOVB DI, 1(AX) 445 SARL $0x08, DI 446 SHLL $0x05, DI 447 ORL DI, SI 448 MOVB SI, (AX) 449 ADDQ $0x02, AX 450 JMP repeat_end_emit_encodeBlockAsm 451 JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm 452 453four_bytes_remain_repeat_as_copy_encodeBlockAsm: 454 TESTL SI, SI 455 JZ repeat_end_emit_encodeBlockAsm 456 MOVB $0x03, BL 457 LEAL -4(BX)(SI*4), SI 458 MOVB SI, (AX) 459 MOVL DI, 1(AX) 460 ADDQ $0x05, AX 461 JMP repeat_end_emit_encodeBlockAsm 462 463two_byte_offset_repeat_as_copy_encodeBlockAsm: 464 CMPL SI, $0x40 465 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm 466 MOVB $0xee, (AX) 467 MOVW DI, 1(AX) 468 LEAL -60(SI), SI 469 ADDQ $0x03, AX 470 471 // emitRepeat 472emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: 473 MOVL SI, R8 474 LEAL -4(SI), SI 475 CMPL R8, $0x08 476 JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short 477 CMPL R8, $0x0c 478 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short 479 CMPL DI, $0x00000800 480 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short 481 482cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: 483 CMPL SI, $0x00000104 484 JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short 485 CMPL SI, $0x00010100 486 JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short 487 CMPL SI, $0x0100ffff 488 JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short 489 LEAL -16842747(SI), SI 490 MOVW $0x001d, (AX) 491 MOVW $0xfffb, 2(AX) 492 MOVB $0xff, 4(AX) 493 ADDQ $0x05, AX 494 JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short 495 496repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: 497 LEAL -65536(SI), SI 498 MOVL SI, DI 499 MOVW $0x001d, (AX) 500 MOVW SI, 2(AX) 501 SARL $0x10, DI 502 MOVB DI, 4(AX) 503 ADDQ $0x05, AX 504 JMP repeat_end_emit_encodeBlockAsm 505 506repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: 507 LEAL -256(SI), SI 508 MOVW $0x0019, (AX) 509 MOVW SI, 2(AX) 510 ADDQ $0x04, AX 511 JMP repeat_end_emit_encodeBlockAsm 512 513repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: 514 LEAL -4(SI), SI 515 MOVW $0x0015, (AX) 516 MOVB SI, 2(AX) 517 ADDQ $0x03, AX 518 JMP repeat_end_emit_encodeBlockAsm 519 520repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: 521 SHLL $0x02, SI 522 ORL $0x01, SI 523 MOVW SI, (AX) 524 ADDQ $0x02, AX 525 JMP repeat_end_emit_encodeBlockAsm 526 527repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: 528 XORQ R8, R8 529 LEAL 1(R8)(SI*4), SI 530 MOVB DI, 1(AX) 531 SARL $0x08, DI 532 SHLL $0x05, DI 533 ORL DI, SI 534 MOVB SI, (AX) 535 ADDQ $0x02, AX 536 JMP repeat_end_emit_encodeBlockAsm 537 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm 538 539two_byte_offset_short_repeat_as_copy_encodeBlockAsm: 540 CMPL SI, $0x0c 541 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm 542 CMPL DI, $0x00000800 543 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm 544 MOVB $0x01, BL 545 LEAL -16(BX)(SI*4), SI 546 MOVB DI, 1(AX) 547 SHRL $0x08, DI 548 SHLL $0x05, DI 549 ORL DI, SI 550 MOVB SI, (AX) 551 ADDQ $0x02, AX 552 JMP repeat_end_emit_encodeBlockAsm 553 554emit_copy_three_repeat_as_copy_encodeBlockAsm: 555 MOVB $0x02, BL 556 LEAL -4(BX)(SI*4), SI 557 MOVB SI, (AX) 558 MOVW DI, 1(AX) 559 ADDQ $0x03, AX 560 561repeat_end_emit_encodeBlockAsm: 562 MOVL CX, 12(SP) 563 JMP search_loop_encodeBlockAsm 564 565no_repeat_found_encodeBlockAsm: 566 CMPL (DX)(SI*1), DI 567 JEQ candidate_match_encodeBlockAsm 568 SHRQ $0x08, DI 569 MOVL 24(SP)(R10*4), SI 570 LEAL 2(CX), R9 571 CMPL (DX)(R8*1), DI 572 JEQ candidate2_match_encodeBlockAsm 573 MOVL R9, 24(SP)(R10*4) 574 SHRQ $0x08, DI 575 CMPL (DX)(SI*1), DI 576 JEQ candidate3_match_encodeBlockAsm 577 MOVL 20(SP), CX 578 JMP search_loop_encodeBlockAsm 579 580candidate3_match_encodeBlockAsm: 581 ADDL $0x02, CX 582 JMP candidate_match_encodeBlockAsm 583 584candidate2_match_encodeBlockAsm: 585 MOVL R9, 24(SP)(R10*4) 586 INCL CX 587 MOVL R8, SI 588 589candidate_match_encodeBlockAsm: 590 MOVL 12(SP), DI 591 TESTL SI, SI 592 JZ match_extend_back_end_encodeBlockAsm 593 594match_extend_back_loop_encodeBlockAsm: 595 CMPL CX, DI 596 JLE match_extend_back_end_encodeBlockAsm 597 MOVB -1(DX)(SI*1), BL 598 MOVB -1(DX)(CX*1), R8 599 CMPB BL, R8 600 JNE match_extend_back_end_encodeBlockAsm 601 LEAL -1(CX), CX 602 DECL SI 603 JZ match_extend_back_end_encodeBlockAsm 604 JMP match_extend_back_loop_encodeBlockAsm 605 606match_extend_back_end_encodeBlockAsm: 607 MOVL CX, DI 608 SUBL 12(SP), DI 609 LEAQ 5(AX)(DI*1), DI 610 CMPQ DI, (SP) 611 JL match_dst_size_check_encodeBlockAsm 612 MOVQ $0x00000000, ret+48(FP) 613 RET 614 615match_dst_size_check_encodeBlockAsm: 616 MOVL CX, DI 617 MOVL 12(SP), R8 618 CMPL R8, DI 619 JEQ emit_literal_done_match_emit_encodeBlockAsm 620 MOVL DI, R9 621 MOVL DI, 12(SP) 622 LEAQ (DX)(R8*1), DI 623 SUBL R8, R9 624 LEAL -1(R9), R8 625 CMPL R8, $0x3c 626 JLT one_byte_match_emit_encodeBlockAsm 627 CMPL R8, $0x00000100 628 JLT two_bytes_match_emit_encodeBlockAsm 629 CMPL R8, $0x00010000 630 JLT three_bytes_match_emit_encodeBlockAsm 631 CMPL R8, $0x01000000 632 JLT four_bytes_match_emit_encodeBlockAsm 633 MOVB $0xfc, (AX) 634 MOVL R8, 1(AX) 635 ADDQ $0x05, AX 636 JMP memmove_long_match_emit_encodeBlockAsm 637 638four_bytes_match_emit_encodeBlockAsm: 639 MOVL R8, R10 640 SHRL $0x10, R10 641 MOVB $0xf8, (AX) 642 MOVW R8, 1(AX) 643 MOVB R10, 3(AX) 644 ADDQ $0x04, AX 645 JMP memmove_long_match_emit_encodeBlockAsm 646 647three_bytes_match_emit_encodeBlockAsm: 648 MOVB $0xf4, (AX) 649 MOVW R8, 1(AX) 650 ADDQ $0x03, AX 651 JMP memmove_long_match_emit_encodeBlockAsm 652 653two_bytes_match_emit_encodeBlockAsm: 654 MOVB $0xf0, (AX) 655 MOVB R8, 1(AX) 656 ADDQ $0x02, AX 657 CMPL R8, $0x40 658 JL memmove_match_emit_encodeBlockAsm 659 JMP memmove_long_match_emit_encodeBlockAsm 660 661one_byte_match_emit_encodeBlockAsm: 662 SHLB $0x02, R8 663 MOVB R8, (AX) 664 ADDQ $0x01, AX 665 666memmove_match_emit_encodeBlockAsm: 667 LEAQ (AX)(R9*1), R8 668 669 // genMemMoveShort 670 CMPQ R9, $0x03 671 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2 672 JE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3 673 CMPQ R9, $0x08 674 JB emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7 675 CMPQ R9, $0x10 676 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 677 CMPQ R9, $0x20 678 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 679 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 680 681emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2: 682 MOVB (DI), R10 683 MOVB -1(DI)(R9*1), DI 684 MOVB R10, (AX) 685 MOVB DI, -1(AX)(R9*1) 686 JMP memmove_end_copy_match_emit_encodeBlockAsm 687 688emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3: 689 MOVW (DI), R10 690 MOVB 2(DI), DI 691 MOVW R10, (AX) 692 MOVB DI, 2(AX) 693 JMP memmove_end_copy_match_emit_encodeBlockAsm 694 695emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7: 696 MOVL (DI), R10 697 MOVL -4(DI)(R9*1), DI 698 MOVL R10, (AX) 699 MOVL DI, -4(AX)(R9*1) 700 JMP memmove_end_copy_match_emit_encodeBlockAsm 701 702emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: 703 MOVQ (DI), R10 704 MOVQ -8(DI)(R9*1), DI 705 MOVQ R10, (AX) 706 MOVQ DI, -8(AX)(R9*1) 707 JMP memmove_end_copy_match_emit_encodeBlockAsm 708 709emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: 710 MOVOU (DI), X0 711 MOVOU -16(DI)(R9*1), X1 712 MOVOU X0, (AX) 713 MOVOU X1, -16(AX)(R9*1) 714 JMP memmove_end_copy_match_emit_encodeBlockAsm 715 716emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: 717 MOVOU (DI), X0 718 MOVOU 16(DI), X1 719 MOVOU -32(DI)(R9*1), X2 720 MOVOU -16(DI)(R9*1), X3 721 MOVOU X0, (AX) 722 MOVOU X1, 16(AX) 723 MOVOU X2, -32(AX)(R9*1) 724 MOVOU X3, -16(AX)(R9*1) 725 726memmove_end_copy_match_emit_encodeBlockAsm: 727 MOVQ R8, AX 728 JMP emit_literal_done_match_emit_encodeBlockAsm 729 730memmove_long_match_emit_encodeBlockAsm: 731 LEAQ (AX)(R9*1), R8 732 733 // genMemMoveLong 734 MOVOU (DI), X0 735 MOVOU 16(DI), X1 736 MOVOU -32(DI)(R9*1), X2 737 MOVOU -16(DI)(R9*1), X3 738 MOVQ R9, R11 739 SHRQ $0x05, R11 740 MOVQ AX, R10 741 ANDL $0x0000001f, R10 742 MOVQ $0x00000040, R12 743 SUBQ R10, R12 744 DECQ R11 745 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 746 LEAQ -32(DI)(R12*1), R10 747 LEAQ -32(AX)(R12*1), R13 748 749emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: 750 MOVOU (R10), X4 751 MOVOU 16(R10), X5 752 MOVOA X4, (R13) 753 MOVOA X5, 16(R13) 754 ADDQ $0x20, R13 755 ADDQ $0x20, R10 756 ADDQ $0x20, R12 757 DECQ R11 758 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back 759 760emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: 761 MOVOU -32(DI)(R12*1), X4 762 MOVOU -16(DI)(R12*1), X5 763 MOVOA X4, -32(AX)(R12*1) 764 MOVOA X5, -16(AX)(R12*1) 765 ADDQ $0x20, R12 766 CMPQ R9, R12 767 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 768 MOVOU X0, (AX) 769 MOVOU X1, 16(AX) 770 MOVOU X2, -32(AX)(R9*1) 771 MOVOU X3, -16(AX)(R9*1) 772 MOVQ R8, AX 773 774emit_literal_done_match_emit_encodeBlockAsm: 775match_nolit_loop_encodeBlockAsm: 776 MOVL CX, DI 777 SUBL SI, DI 778 MOVL DI, 16(SP) 779 ADDL $0x04, CX 780 ADDL $0x04, SI 781 MOVQ src_len+32(FP), DI 782 SUBL CX, DI 783 LEAQ (DX)(CX*1), R8 784 LEAQ (DX)(SI*1), SI 785 786 // matchLen 787 XORL R10, R10 788 CMPL DI, $0x08 789 JL matchlen_single_match_nolit_encodeBlockAsm 790 791matchlen_loopback_match_nolit_encodeBlockAsm: 792 MOVQ (R8)(R10*1), R9 793 XORQ (SI)(R10*1), R9 794 TESTQ R9, R9 795 JZ matchlen_loop_match_nolit_encodeBlockAsm 796 BSFQ R9, R9 797 SARQ $0x03, R9 798 LEAL (R10)(R9*1), R10 799 JMP match_nolit_end_encodeBlockAsm 800 801matchlen_loop_match_nolit_encodeBlockAsm: 802 LEAL -8(DI), DI 803 LEAL 8(R10), R10 804 CMPL DI, $0x08 805 JGE matchlen_loopback_match_nolit_encodeBlockAsm 806 807matchlen_single_match_nolit_encodeBlockAsm: 808 TESTL DI, DI 809 JZ match_nolit_end_encodeBlockAsm 810 811matchlen_single_loopback_match_nolit_encodeBlockAsm: 812 MOVB (R8)(R10*1), R9 813 CMPB (SI)(R10*1), R9 814 JNE match_nolit_end_encodeBlockAsm 815 LEAL 1(R10), R10 816 DECL DI 817 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm 818 819match_nolit_end_encodeBlockAsm: 820 ADDL R10, CX 821 MOVL 16(SP), SI 822 ADDL $0x04, R10 823 MOVL CX, 12(SP) 824 825 // emitCopy 826 CMPL SI, $0x00010000 827 JL two_byte_offset_match_nolit_encodeBlockAsm 828 829four_bytes_loop_back_match_nolit_encodeBlockAsm: 830 CMPL R10, $0x40 831 JLE four_bytes_remain_match_nolit_encodeBlockAsm 832 MOVB $0xff, (AX) 833 MOVL SI, 1(AX) 834 LEAL -64(R10), R10 835 ADDQ $0x05, AX 836 CMPL R10, $0x04 837 JL four_bytes_remain_match_nolit_encodeBlockAsm 838 839 // emitRepeat 840emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: 841 MOVL R10, DI 842 LEAL -4(R10), R10 843 CMPL DI, $0x08 844 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy 845 CMPL DI, $0x0c 846 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy 847 CMPL SI, $0x00000800 848 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy 849 850cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: 851 CMPL R10, $0x00000104 852 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy 853 CMPL R10, $0x00010100 854 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy 855 CMPL R10, $0x0100ffff 856 JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy 857 LEAL -16842747(R10), R10 858 MOVW $0x001d, (AX) 859 MOVW $0xfffb, 2(AX) 860 MOVB $0xff, 4(AX) 861 ADDQ $0x05, AX 862 JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy 863 864repeat_five_match_nolit_encodeBlockAsm_emit_copy: 865 LEAL -65536(R10), R10 866 MOVL R10, SI 867 MOVW $0x001d, (AX) 868 MOVW R10, 2(AX) 869 SARL $0x10, SI 870 MOVB SI, 4(AX) 871 ADDQ $0x05, AX 872 JMP match_nolit_emitcopy_end_encodeBlockAsm 873 874repeat_four_match_nolit_encodeBlockAsm_emit_copy: 875 LEAL -256(R10), R10 876 MOVW $0x0019, (AX) 877 MOVW R10, 2(AX) 878 ADDQ $0x04, AX 879 JMP match_nolit_emitcopy_end_encodeBlockAsm 880 881repeat_three_match_nolit_encodeBlockAsm_emit_copy: 882 LEAL -4(R10), R10 883 MOVW $0x0015, (AX) 884 MOVB R10, 2(AX) 885 ADDQ $0x03, AX 886 JMP match_nolit_emitcopy_end_encodeBlockAsm 887 888repeat_two_match_nolit_encodeBlockAsm_emit_copy: 889 SHLL $0x02, R10 890 ORL $0x01, R10 891 MOVW R10, (AX) 892 ADDQ $0x02, AX 893 JMP match_nolit_emitcopy_end_encodeBlockAsm 894 895repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: 896 XORQ DI, DI 897 LEAL 1(DI)(R10*4), R10 898 MOVB SI, 1(AX) 899 SARL $0x08, SI 900 SHLL $0x05, SI 901 ORL SI, R10 902 MOVB R10, (AX) 903 ADDQ $0x02, AX 904 JMP match_nolit_emitcopy_end_encodeBlockAsm 905 JMP four_bytes_loop_back_match_nolit_encodeBlockAsm 906 907four_bytes_remain_match_nolit_encodeBlockAsm: 908 TESTL R10, R10 909 JZ match_nolit_emitcopy_end_encodeBlockAsm 910 MOVB $0x03, BL 911 LEAL -4(BX)(R10*4), R10 912 MOVB R10, (AX) 913 MOVL SI, 1(AX) 914 ADDQ $0x05, AX 915 JMP match_nolit_emitcopy_end_encodeBlockAsm 916 917two_byte_offset_match_nolit_encodeBlockAsm: 918 CMPL R10, $0x40 919 JLE two_byte_offset_short_match_nolit_encodeBlockAsm 920 MOVB $0xee, (AX) 921 MOVW SI, 1(AX) 922 LEAL -60(R10), R10 923 ADDQ $0x03, AX 924 925 // emitRepeat 926emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: 927 MOVL R10, DI 928 LEAL -4(R10), R10 929 CMPL DI, $0x08 930 JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short 931 CMPL DI, $0x0c 932 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short 933 CMPL SI, $0x00000800 934 JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short 935 936cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: 937 CMPL R10, $0x00000104 938 JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short 939 CMPL R10, $0x00010100 940 JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short 941 CMPL R10, $0x0100ffff 942 JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short 943 LEAL -16842747(R10), R10 944 MOVW $0x001d, (AX) 945 MOVW $0xfffb, 2(AX) 946 MOVB $0xff, 4(AX) 947 ADDQ $0x05, AX 948 JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short 949 950repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: 951 LEAL -65536(R10), R10 952 MOVL R10, SI 953 MOVW $0x001d, (AX) 954 MOVW R10, 2(AX) 955 SARL $0x10, SI 956 MOVB SI, 4(AX) 957 ADDQ $0x05, AX 958 JMP match_nolit_emitcopy_end_encodeBlockAsm 959 960repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: 961 LEAL -256(R10), R10 962 MOVW $0x0019, (AX) 963 MOVW R10, 2(AX) 964 ADDQ $0x04, AX 965 JMP match_nolit_emitcopy_end_encodeBlockAsm 966 967repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: 968 LEAL -4(R10), R10 969 MOVW $0x0015, (AX) 970 MOVB R10, 2(AX) 971 ADDQ $0x03, AX 972 JMP match_nolit_emitcopy_end_encodeBlockAsm 973 974repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: 975 SHLL $0x02, R10 976 ORL $0x01, R10 977 MOVW R10, (AX) 978 ADDQ $0x02, AX 979 JMP match_nolit_emitcopy_end_encodeBlockAsm 980 981repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: 982 XORQ DI, DI 983 LEAL 1(DI)(R10*4), R10 984 MOVB SI, 1(AX) 985 SARL $0x08, SI 986 SHLL $0x05, SI 987 ORL SI, R10 988 MOVB R10, (AX) 989 ADDQ $0x02, AX 990 JMP match_nolit_emitcopy_end_encodeBlockAsm 991 JMP two_byte_offset_match_nolit_encodeBlockAsm 992 993two_byte_offset_short_match_nolit_encodeBlockAsm: 994 CMPL R10, $0x0c 995 JGE emit_copy_three_match_nolit_encodeBlockAsm 996 CMPL SI, $0x00000800 997 JGE emit_copy_three_match_nolit_encodeBlockAsm 998 MOVB $0x01, BL 999 LEAL -16(BX)(R10*4), R10 1000 MOVB SI, 1(AX) 1001 SHRL $0x08, SI 1002 SHLL $0x05, SI 1003 ORL SI, R10 1004 MOVB R10, (AX) 1005 ADDQ $0x02, AX 1006 JMP match_nolit_emitcopy_end_encodeBlockAsm 1007 1008emit_copy_three_match_nolit_encodeBlockAsm: 1009 MOVB $0x02, BL 1010 LEAL -4(BX)(R10*4), R10 1011 MOVB R10, (AX) 1012 MOVW SI, 1(AX) 1013 ADDQ $0x03, AX 1014 1015match_nolit_emitcopy_end_encodeBlockAsm: 1016 CMPL CX, 8(SP) 1017 JGE emit_remainder_encodeBlockAsm 1018 MOVQ -2(DX)(CX*1), DI 1019 CMPQ AX, (SP) 1020 JL match_nolit_dst_ok_encodeBlockAsm 1021 MOVQ $0x00000000, ret+48(FP) 1022 RET 1023 1024match_nolit_dst_ok_encodeBlockAsm: 1025 MOVQ $0x0000cf1bbcdcbf9b, R9 1026 MOVQ DI, R8 1027 SHRQ $0x10, DI 1028 MOVQ DI, SI 1029 SHLQ $0x10, R8 1030 IMULQ R9, R8 1031 SHRQ $0x32, R8 1032 SHLQ $0x10, SI 1033 IMULQ R9, SI 1034 SHRQ $0x32, SI 1035 LEAL -2(CX), R9 1036 LEAQ 24(SP)(SI*4), R10 1037 MOVL (R10), SI 1038 MOVL R9, 24(SP)(R8*4) 1039 MOVL CX, (R10) 1040 CMPL (DX)(SI*1), DI 1041 JEQ match_nolit_loop_encodeBlockAsm 1042 INCL CX 1043 JMP search_loop_encodeBlockAsm 1044 1045emit_remainder_encodeBlockAsm: 1046 MOVQ src_len+32(FP), CX 1047 SUBL 12(SP), CX 1048 LEAQ 5(AX)(CX*1), CX 1049 CMPQ CX, (SP) 1050 JL emit_remainder_ok_encodeBlockAsm 1051 MOVQ $0x00000000, ret+48(FP) 1052 RET 1053 1054emit_remainder_ok_encodeBlockAsm: 1055 MOVQ src_len+32(FP), CX 1056 MOVL 12(SP), BX 1057 CMPL BX, CX 1058 JEQ emit_literal_done_emit_remainder_encodeBlockAsm 1059 MOVL CX, SI 1060 MOVL CX, 12(SP) 1061 LEAQ (DX)(BX*1), CX 1062 SUBL BX, SI 1063 LEAL -1(SI), DX 1064 CMPL DX, $0x3c 1065 JLT one_byte_emit_remainder_encodeBlockAsm 1066 CMPL DX, $0x00000100 1067 JLT two_bytes_emit_remainder_encodeBlockAsm 1068 CMPL DX, $0x00010000 1069 JLT three_bytes_emit_remainder_encodeBlockAsm 1070 CMPL DX, $0x01000000 1071 JLT four_bytes_emit_remainder_encodeBlockAsm 1072 MOVB $0xfc, (AX) 1073 MOVL DX, 1(AX) 1074 ADDQ $0x05, AX 1075 JMP memmove_long_emit_remainder_encodeBlockAsm 1076 1077four_bytes_emit_remainder_encodeBlockAsm: 1078 MOVL DX, BX 1079 SHRL $0x10, BX 1080 MOVB $0xf8, (AX) 1081 MOVW DX, 1(AX) 1082 MOVB BL, 3(AX) 1083 ADDQ $0x04, AX 1084 JMP memmove_long_emit_remainder_encodeBlockAsm 1085 1086three_bytes_emit_remainder_encodeBlockAsm: 1087 MOVB $0xf4, (AX) 1088 MOVW DX, 1(AX) 1089 ADDQ $0x03, AX 1090 JMP memmove_long_emit_remainder_encodeBlockAsm 1091 1092two_bytes_emit_remainder_encodeBlockAsm: 1093 MOVB $0xf0, (AX) 1094 MOVB DL, 1(AX) 1095 ADDQ $0x02, AX 1096 CMPL DX, $0x40 1097 JL memmove_emit_remainder_encodeBlockAsm 1098 JMP memmove_long_emit_remainder_encodeBlockAsm 1099 1100one_byte_emit_remainder_encodeBlockAsm: 1101 SHLB $0x02, DL 1102 MOVB DL, (AX) 1103 ADDQ $0x01, AX 1104 1105memmove_emit_remainder_encodeBlockAsm: 1106 LEAQ (AX)(SI*1), DX 1107 MOVL SI, BX 1108 1109 // genMemMoveShort 1110 CMPQ BX, $0x03 1111 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2 1112 JE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3 1113 CMPQ BX, $0x08 1114 JB emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7 1115 CMPQ BX, $0x10 1116 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16 1117 CMPQ BX, $0x20 1118 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32 1119 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 1120 1121emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: 1122 MOVB (CX), SI 1123 MOVB -1(CX)(BX*1), CL 1124 MOVB SI, (AX) 1125 MOVB CL, -1(AX)(BX*1) 1126 JMP memmove_end_copy_emit_remainder_encodeBlockAsm 1127 1128emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: 1129 MOVW (CX), SI 1130 MOVB 2(CX), CL 1131 MOVW SI, (AX) 1132 MOVB CL, 2(AX) 1133 JMP memmove_end_copy_emit_remainder_encodeBlockAsm 1134 1135emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7: 1136 MOVL (CX), SI 1137 MOVL -4(CX)(BX*1), CX 1138 MOVL SI, (AX) 1139 MOVL CX, -4(AX)(BX*1) 1140 JMP memmove_end_copy_emit_remainder_encodeBlockAsm 1141 1142emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: 1143 MOVQ (CX), SI 1144 MOVQ -8(CX)(BX*1), CX 1145 MOVQ SI, (AX) 1146 MOVQ CX, -8(AX)(BX*1) 1147 JMP memmove_end_copy_emit_remainder_encodeBlockAsm 1148 1149emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: 1150 MOVOU (CX), X0 1151 MOVOU -16(CX)(BX*1), X1 1152 MOVOU X0, (AX) 1153 MOVOU X1, -16(AX)(BX*1) 1154 JMP memmove_end_copy_emit_remainder_encodeBlockAsm 1155 1156emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: 1157 MOVOU (CX), X0 1158 MOVOU 16(CX), X1 1159 MOVOU -32(CX)(BX*1), X2 1160 MOVOU -16(CX)(BX*1), X3 1161 MOVOU X0, (AX) 1162 MOVOU X1, 16(AX) 1163 MOVOU X2, -32(AX)(BX*1) 1164 MOVOU X3, -16(AX)(BX*1) 1165 1166memmove_end_copy_emit_remainder_encodeBlockAsm: 1167 MOVQ DX, AX 1168 JMP emit_literal_done_emit_remainder_encodeBlockAsm 1169 1170memmove_long_emit_remainder_encodeBlockAsm: 1171 LEAQ (AX)(SI*1), DX 1172 MOVL SI, BX 1173 1174 // genMemMoveLong 1175 MOVOU (CX), X0 1176 MOVOU 16(CX), X1 1177 MOVOU -32(CX)(BX*1), X2 1178 MOVOU -16(CX)(BX*1), X3 1179 MOVQ BX, DI 1180 SHRQ $0x05, DI 1181 MOVQ AX, SI 1182 ANDL $0x0000001f, SI 1183 MOVQ $0x00000040, R8 1184 SUBQ SI, R8 1185 DECQ DI 1186 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 1187 LEAQ -32(CX)(R8*1), SI 1188 LEAQ -32(AX)(R8*1), R9 1189 1190emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: 1191 MOVOU (SI), X4 1192 MOVOU 16(SI), X5 1193 MOVOA X4, (R9) 1194 MOVOA X5, 16(R9) 1195 ADDQ $0x20, R9 1196 ADDQ $0x20, SI 1197 ADDQ $0x20, R8 1198 DECQ DI 1199 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back 1200 1201emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: 1202 MOVOU -32(CX)(R8*1), X4 1203 MOVOU -16(CX)(R8*1), X5 1204 MOVOA X4, -32(AX)(R8*1) 1205 MOVOA X5, -16(AX)(R8*1) 1206 ADDQ $0x20, R8 1207 CMPQ BX, R8 1208 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 1209 MOVOU X0, (AX) 1210 MOVOU X1, 16(AX) 1211 MOVOU X2, -32(AX)(BX*1) 1212 MOVOU X3, -16(AX)(BX*1) 1213 MOVQ DX, AX 1214 1215emit_literal_done_emit_remainder_encodeBlockAsm: 1216 MOVQ dst_base+0(FP), CX 1217 SUBQ CX, AX 1218 MOVQ AX, ret+48(FP) 1219 RET 1220 1221// func encodeBlockAsm4MB(dst []byte, src []byte) int 1222// Requires: SSE2 1223TEXT ·encodeBlockAsm4MB(SB), $65560-56 1224 MOVQ dst_base+0(FP), AX 1225 MOVQ $0x00000200, CX 1226 LEAQ 24(SP), DX 1227 PXOR X0, X0 1228 1229zero_loop_encodeBlockAsm4MB: 1230 MOVOU X0, (DX) 1231 MOVOU X0, 16(DX) 1232 MOVOU X0, 32(DX) 1233 MOVOU X0, 48(DX) 1234 MOVOU X0, 64(DX) 1235 MOVOU X0, 80(DX) 1236 MOVOU X0, 96(DX) 1237 MOVOU X0, 112(DX) 1238 ADDQ $0x80, DX 1239 DECQ CX 1240 JNZ zero_loop_encodeBlockAsm4MB 1241 MOVL $0x00000000, 12(SP) 1242 MOVQ src_len+32(FP), CX 1243 LEAQ -5(CX), DX 1244 LEAQ -8(CX), SI 1245 MOVL SI, 8(SP) 1246 SHRQ $0x05, CX 1247 SUBL CX, DX 1248 LEAQ (AX)(DX*1), DX 1249 MOVQ DX, (SP) 1250 MOVL $0x00000001, CX 1251 MOVL CX, 16(SP) 1252 MOVQ src_base+24(FP), DX 1253 1254search_loop_encodeBlockAsm4MB: 1255 MOVL CX, SI 1256 SUBL 12(SP), SI 1257 SHRL $0x06, SI 1258 LEAL 4(CX)(SI*1), SI 1259 CMPL SI, 8(SP) 1260 JGE emit_remainder_encodeBlockAsm4MB 1261 MOVQ (DX)(CX*1), DI 1262 MOVL SI, 20(SP) 1263 MOVQ $0x0000cf1bbcdcbf9b, R9 1264 MOVQ DI, R10 1265 MOVQ DI, R11 1266 SHRQ $0x08, R11 1267 SHLQ $0x10, R10 1268 IMULQ R9, R10 1269 SHRQ $0x32, R10 1270 SHLQ $0x10, R11 1271 IMULQ R9, R11 1272 SHRQ $0x32, R11 1273 MOVL 24(SP)(R10*4), SI 1274 MOVL 24(SP)(R11*4), R8 1275 MOVL CX, 24(SP)(R10*4) 1276 LEAL 1(CX), R10 1277 MOVL R10, 24(SP)(R11*4) 1278 MOVQ DI, R10 1279 SHRQ $0x10, R10 1280 SHLQ $0x10, R10 1281 IMULQ R9, R10 1282 SHRQ $0x32, R10 1283 MOVL CX, R9 1284 SUBL 16(SP), R9 1285 MOVL 1(DX)(R9*1), R11 1286 MOVQ DI, R9 1287 SHRQ $0x08, R9 1288 CMPL R9, R11 1289 JNE no_repeat_found_encodeBlockAsm4MB 1290 LEAL 1(CX), DI 1291 MOVL 12(SP), R8 1292 MOVL DI, SI 1293 SUBL 16(SP), SI 1294 JZ repeat_extend_back_end_encodeBlockAsm4MB 1295 1296repeat_extend_back_loop_encodeBlockAsm4MB: 1297 CMPL DI, R8 1298 JLE repeat_extend_back_end_encodeBlockAsm4MB 1299 MOVB -1(DX)(SI*1), BL 1300 MOVB -1(DX)(DI*1), R9 1301 CMPB BL, R9 1302 JNE repeat_extend_back_end_encodeBlockAsm4MB 1303 LEAL -1(DI), DI 1304 DECL SI 1305 JNZ repeat_extend_back_loop_encodeBlockAsm4MB 1306 1307repeat_extend_back_end_encodeBlockAsm4MB: 1308 MOVL 12(SP), SI 1309 CMPL SI, DI 1310 JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB 1311 MOVL DI, R9 1312 MOVL DI, 12(SP) 1313 LEAQ (DX)(SI*1), R10 1314 SUBL SI, R9 1315 LEAL -1(R9), SI 1316 CMPL SI, $0x3c 1317 JLT one_byte_repeat_emit_encodeBlockAsm4MB 1318 CMPL SI, $0x00000100 1319 JLT two_bytes_repeat_emit_encodeBlockAsm4MB 1320 CMPL SI, $0x00010000 1321 JLT three_bytes_repeat_emit_encodeBlockAsm4MB 1322 MOVL SI, R11 1323 SHRL $0x10, R11 1324 MOVB $0xf8, (AX) 1325 MOVW SI, 1(AX) 1326 MOVB R11, 3(AX) 1327 ADDQ $0x04, AX 1328 JMP memmove_long_repeat_emit_encodeBlockAsm4MB 1329 1330three_bytes_repeat_emit_encodeBlockAsm4MB: 1331 MOVB $0xf4, (AX) 1332 MOVW SI, 1(AX) 1333 ADDQ $0x03, AX 1334 JMP memmove_long_repeat_emit_encodeBlockAsm4MB 1335 1336two_bytes_repeat_emit_encodeBlockAsm4MB: 1337 MOVB $0xf0, (AX) 1338 MOVB SI, 1(AX) 1339 ADDQ $0x02, AX 1340 CMPL SI, $0x40 1341 JL memmove_repeat_emit_encodeBlockAsm4MB 1342 JMP memmove_long_repeat_emit_encodeBlockAsm4MB 1343 1344one_byte_repeat_emit_encodeBlockAsm4MB: 1345 SHLB $0x02, SI 1346 MOVB SI, (AX) 1347 ADDQ $0x01, AX 1348 1349memmove_repeat_emit_encodeBlockAsm4MB: 1350 LEAQ (AX)(R9*1), SI 1351 1352 // genMemMoveShort 1353 CMPQ R9, $0x03 1354 JB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_1or2 1355 JE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_3 1356 CMPQ R9, $0x08 1357 JB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_4through7 1358 CMPQ R9, $0x10 1359 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 1360 CMPQ R9, $0x20 1361 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 1362 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 1363 1364emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_1or2: 1365 MOVB (R10), R11 1366 MOVB -1(R10)(R9*1), R10 1367 MOVB R11, (AX) 1368 MOVB R10, -1(AX)(R9*1) 1369 JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB 1370 1371emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_3: 1372 MOVW (R10), R11 1373 MOVB 2(R10), R10 1374 MOVW R11, (AX) 1375 MOVB R10, 2(AX) 1376 JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB 1377 1378emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_4through7: 1379 MOVL (R10), R11 1380 MOVL -4(R10)(R9*1), R10 1381 MOVL R11, (AX) 1382 MOVL R10, -4(AX)(R9*1) 1383 JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB 1384 1385emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: 1386 MOVQ (R10), R11 1387 MOVQ -8(R10)(R9*1), R10 1388 MOVQ R11, (AX) 1389 MOVQ R10, -8(AX)(R9*1) 1390 JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB 1391 1392emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: 1393 MOVOU (R10), X0 1394 MOVOU -16(R10)(R9*1), X1 1395 MOVOU X0, (AX) 1396 MOVOU X1, -16(AX)(R9*1) 1397 JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB 1398 1399emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: 1400 MOVOU (R10), X0 1401 MOVOU 16(R10), X1 1402 MOVOU -32(R10)(R9*1), X2 1403 MOVOU -16(R10)(R9*1), X3 1404 MOVOU X0, (AX) 1405 MOVOU X1, 16(AX) 1406 MOVOU X2, -32(AX)(R9*1) 1407 MOVOU X3, -16(AX)(R9*1) 1408 1409memmove_end_copy_repeat_emit_encodeBlockAsm4MB: 1410 MOVQ SI, AX 1411 JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB 1412 1413memmove_long_repeat_emit_encodeBlockAsm4MB: 1414 LEAQ (AX)(R9*1), SI 1415 1416 // genMemMoveLong 1417 MOVOU (R10), X0 1418 MOVOU 16(R10), X1 1419 MOVOU -32(R10)(R9*1), X2 1420 MOVOU -16(R10)(R9*1), X3 1421 MOVQ R9, R12 1422 SHRQ $0x05, R12 1423 MOVQ AX, R11 1424 ANDL $0x0000001f, R11 1425 MOVQ $0x00000040, R13 1426 SUBQ R11, R13 1427 DECQ R12 1428 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 1429 LEAQ -32(R10)(R13*1), R11 1430 LEAQ -32(AX)(R13*1), R14 1431 1432emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: 1433 MOVOU (R11), X4 1434 MOVOU 16(R11), X5 1435 MOVOA X4, (R14) 1436 MOVOA X5, 16(R14) 1437 ADDQ $0x20, R14 1438 ADDQ $0x20, R11 1439 ADDQ $0x20, R13 1440 DECQ R12 1441 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back 1442 1443emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: 1444 MOVOU -32(R10)(R13*1), X4 1445 MOVOU -16(R10)(R13*1), X5 1446 MOVOA X4, -32(AX)(R13*1) 1447 MOVOA X5, -16(AX)(R13*1) 1448 ADDQ $0x20, R13 1449 CMPQ R9, R13 1450 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 1451 MOVOU X0, (AX) 1452 MOVOU X1, 16(AX) 1453 MOVOU X2, -32(AX)(R9*1) 1454 MOVOU X3, -16(AX)(R9*1) 1455 MOVQ SI, AX 1456 1457emit_literal_done_repeat_emit_encodeBlockAsm4MB: 1458 ADDL $0x05, CX 1459 MOVL CX, SI 1460 SUBL 16(SP), SI 1461 MOVQ src_len+32(FP), R9 1462 SUBL CX, R9 1463 LEAQ (DX)(CX*1), R10 1464 LEAQ (DX)(SI*1), SI 1465 1466 // matchLen 1467 XORL R12, R12 1468 CMPL R9, $0x08 1469 JL matchlen_single_repeat_extend_encodeBlockAsm4MB 1470 1471matchlen_loopback_repeat_extend_encodeBlockAsm4MB: 1472 MOVQ (R10)(R12*1), R11 1473 XORQ (SI)(R12*1), R11 1474 TESTQ R11, R11 1475 JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB 1476 BSFQ R11, R11 1477 SARQ $0x03, R11 1478 LEAL (R12)(R11*1), R12 1479 JMP repeat_extend_forward_end_encodeBlockAsm4MB 1480 1481matchlen_loop_repeat_extend_encodeBlockAsm4MB: 1482 LEAL -8(R9), R9 1483 LEAL 8(R12), R12 1484 CMPL R9, $0x08 1485 JGE matchlen_loopback_repeat_extend_encodeBlockAsm4MB 1486 1487matchlen_single_repeat_extend_encodeBlockAsm4MB: 1488 TESTL R9, R9 1489 JZ repeat_extend_forward_end_encodeBlockAsm4MB 1490 1491matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB: 1492 MOVB (R10)(R12*1), R11 1493 CMPB (SI)(R12*1), R11 1494 JNE repeat_extend_forward_end_encodeBlockAsm4MB 1495 LEAL 1(R12), R12 1496 DECL R9 1497 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB 1498 1499repeat_extend_forward_end_encodeBlockAsm4MB: 1500 ADDL R12, CX 1501 MOVL CX, SI 1502 SUBL DI, SI 1503 MOVL 16(SP), DI 1504 TESTL R8, R8 1505 JZ repeat_as_copy_encodeBlockAsm4MB 1506 1507 // emitRepeat 1508 MOVL SI, R8 1509 LEAL -4(SI), SI 1510 CMPL R8, $0x08 1511 JLE repeat_two_match_repeat_encodeBlockAsm4MB 1512 CMPL R8, $0x0c 1513 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB 1514 CMPL DI, $0x00000800 1515 JLT repeat_two_offset_match_repeat_encodeBlockAsm4MB 1516 1517cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: 1518 CMPL SI, $0x00000104 1519 JLT repeat_three_match_repeat_encodeBlockAsm4MB 1520 CMPL SI, $0x00010100 1521 JLT repeat_four_match_repeat_encodeBlockAsm4MB 1522 LEAL -65536(SI), SI 1523 MOVL SI, DI 1524 MOVW $0x001d, (AX) 1525 MOVW SI, 2(AX) 1526 SARL $0x10, DI 1527 MOVB DI, 4(AX) 1528 ADDQ $0x05, AX 1529 JMP repeat_end_emit_encodeBlockAsm4MB 1530 1531repeat_four_match_repeat_encodeBlockAsm4MB: 1532 LEAL -256(SI), SI 1533 MOVW $0x0019, (AX) 1534 MOVW SI, 2(AX) 1535 ADDQ $0x04, AX 1536 JMP repeat_end_emit_encodeBlockAsm4MB 1537 1538repeat_three_match_repeat_encodeBlockAsm4MB: 1539 LEAL -4(SI), SI 1540 MOVW $0x0015, (AX) 1541 MOVB SI, 2(AX) 1542 ADDQ $0x03, AX 1543 JMP repeat_end_emit_encodeBlockAsm4MB 1544 1545repeat_two_match_repeat_encodeBlockAsm4MB: 1546 SHLL $0x02, SI 1547 ORL $0x01, SI 1548 MOVW SI, (AX) 1549 ADDQ $0x02, AX 1550 JMP repeat_end_emit_encodeBlockAsm4MB 1551 1552repeat_two_offset_match_repeat_encodeBlockAsm4MB: 1553 XORQ R8, R8 1554 LEAL 1(R8)(SI*4), SI 1555 MOVB DI, 1(AX) 1556 SARL $0x08, DI 1557 SHLL $0x05, DI 1558 ORL DI, SI 1559 MOVB SI, (AX) 1560 ADDQ $0x02, AX 1561 JMP repeat_end_emit_encodeBlockAsm4MB 1562 1563repeat_as_copy_encodeBlockAsm4MB: 1564 // emitCopy 1565 CMPL DI, $0x00010000 1566 JL two_byte_offset_repeat_as_copy_encodeBlockAsm4MB 1567 1568four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB: 1569 CMPL SI, $0x40 1570 JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB 1571 MOVB $0xff, (AX) 1572 MOVL DI, 1(AX) 1573 LEAL -64(SI), SI 1574 ADDQ $0x05, AX 1575 CMPL SI, $0x04 1576 JL four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB 1577 1578 // emitRepeat 1579 MOVL SI, R8 1580 LEAL -4(SI), SI 1581 CMPL R8, $0x08 1582 JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1583 CMPL R8, $0x0c 1584 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1585 CMPL DI, $0x00000800 1586 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1587 1588cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1589 CMPL SI, $0x00000104 1590 JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1591 CMPL SI, $0x00010100 1592 JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy 1593 LEAL -65536(SI), SI 1594 MOVL SI, DI 1595 MOVW $0x001d, (AX) 1596 MOVW SI, 2(AX) 1597 SARL $0x10, DI 1598 MOVB DI, 4(AX) 1599 ADDQ $0x05, AX 1600 JMP repeat_end_emit_encodeBlockAsm4MB 1601 1602repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1603 LEAL -256(SI), SI 1604 MOVW $0x0019, (AX) 1605 MOVW SI, 2(AX) 1606 ADDQ $0x04, AX 1607 JMP repeat_end_emit_encodeBlockAsm4MB 1608 1609repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1610 LEAL -4(SI), SI 1611 MOVW $0x0015, (AX) 1612 MOVB SI, 2(AX) 1613 ADDQ $0x03, AX 1614 JMP repeat_end_emit_encodeBlockAsm4MB 1615 1616repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1617 SHLL $0x02, SI 1618 ORL $0x01, SI 1619 MOVW SI, (AX) 1620 ADDQ $0x02, AX 1621 JMP repeat_end_emit_encodeBlockAsm4MB 1622 1623repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: 1624 XORQ R8, R8 1625 LEAL 1(R8)(SI*4), SI 1626 MOVB DI, 1(AX) 1627 SARL $0x08, DI 1628 SHLL $0x05, DI 1629 ORL DI, SI 1630 MOVB SI, (AX) 1631 ADDQ $0x02, AX 1632 JMP repeat_end_emit_encodeBlockAsm4MB 1633 JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB 1634 1635four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: 1636 TESTL SI, SI 1637 JZ repeat_end_emit_encodeBlockAsm4MB 1638 MOVB $0x03, BL 1639 LEAL -4(BX)(SI*4), SI 1640 MOVB SI, (AX) 1641 MOVL DI, 1(AX) 1642 ADDQ $0x05, AX 1643 JMP repeat_end_emit_encodeBlockAsm4MB 1644 1645two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: 1646 CMPL SI, $0x40 1647 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB 1648 MOVB $0xee, (AX) 1649 MOVW DI, 1(AX) 1650 LEAL -60(SI), SI 1651 ADDQ $0x03, AX 1652 1653 // emitRepeat 1654 MOVL SI, R8 1655 LEAL -4(SI), SI 1656 CMPL R8, $0x08 1657 JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1658 CMPL R8, $0x0c 1659 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1660 CMPL DI, $0x00000800 1661 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1662 1663cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1664 CMPL SI, $0x00000104 1665 JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1666 CMPL SI, $0x00010100 1667 JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short 1668 LEAL -65536(SI), SI 1669 MOVL SI, DI 1670 MOVW $0x001d, (AX) 1671 MOVW SI, 2(AX) 1672 SARL $0x10, DI 1673 MOVB DI, 4(AX) 1674 ADDQ $0x05, AX 1675 JMP repeat_end_emit_encodeBlockAsm4MB 1676 1677repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1678 LEAL -256(SI), SI 1679 MOVW $0x0019, (AX) 1680 MOVW SI, 2(AX) 1681 ADDQ $0x04, AX 1682 JMP repeat_end_emit_encodeBlockAsm4MB 1683 1684repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1685 LEAL -4(SI), SI 1686 MOVW $0x0015, (AX) 1687 MOVB SI, 2(AX) 1688 ADDQ $0x03, AX 1689 JMP repeat_end_emit_encodeBlockAsm4MB 1690 1691repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1692 SHLL $0x02, SI 1693 ORL $0x01, SI 1694 MOVW SI, (AX) 1695 ADDQ $0x02, AX 1696 JMP repeat_end_emit_encodeBlockAsm4MB 1697 1698repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: 1699 XORQ R8, R8 1700 LEAL 1(R8)(SI*4), SI 1701 MOVB DI, 1(AX) 1702 SARL $0x08, DI 1703 SHLL $0x05, DI 1704 ORL DI, SI 1705 MOVB SI, (AX) 1706 ADDQ $0x02, AX 1707 JMP repeat_end_emit_encodeBlockAsm4MB 1708 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm4MB 1709 1710two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: 1711 CMPL SI, $0x0c 1712 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB 1713 CMPL DI, $0x00000800 1714 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB 1715 MOVB $0x01, BL 1716 LEAL -16(BX)(SI*4), SI 1717 MOVB DI, 1(AX) 1718 SHRL $0x08, DI 1719 SHLL $0x05, DI 1720 ORL DI, SI 1721 MOVB SI, (AX) 1722 ADDQ $0x02, AX 1723 JMP repeat_end_emit_encodeBlockAsm4MB 1724 1725emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: 1726 MOVB $0x02, BL 1727 LEAL -4(BX)(SI*4), SI 1728 MOVB SI, (AX) 1729 MOVW DI, 1(AX) 1730 ADDQ $0x03, AX 1731 1732repeat_end_emit_encodeBlockAsm4MB: 1733 MOVL CX, 12(SP) 1734 JMP search_loop_encodeBlockAsm4MB 1735 1736no_repeat_found_encodeBlockAsm4MB: 1737 CMPL (DX)(SI*1), DI 1738 JEQ candidate_match_encodeBlockAsm4MB 1739 SHRQ $0x08, DI 1740 MOVL 24(SP)(R10*4), SI 1741 LEAL 2(CX), R9 1742 CMPL (DX)(R8*1), DI 1743 JEQ candidate2_match_encodeBlockAsm4MB 1744 MOVL R9, 24(SP)(R10*4) 1745 SHRQ $0x08, DI 1746 CMPL (DX)(SI*1), DI 1747 JEQ candidate3_match_encodeBlockAsm4MB 1748 MOVL 20(SP), CX 1749 JMP search_loop_encodeBlockAsm4MB 1750 1751candidate3_match_encodeBlockAsm4MB: 1752 ADDL $0x02, CX 1753 JMP candidate_match_encodeBlockAsm4MB 1754 1755candidate2_match_encodeBlockAsm4MB: 1756 MOVL R9, 24(SP)(R10*4) 1757 INCL CX 1758 MOVL R8, SI 1759 1760candidate_match_encodeBlockAsm4MB: 1761 MOVL 12(SP), DI 1762 TESTL SI, SI 1763 JZ match_extend_back_end_encodeBlockAsm4MB 1764 1765match_extend_back_loop_encodeBlockAsm4MB: 1766 CMPL CX, DI 1767 JLE match_extend_back_end_encodeBlockAsm4MB 1768 MOVB -1(DX)(SI*1), BL 1769 MOVB -1(DX)(CX*1), R8 1770 CMPB BL, R8 1771 JNE match_extend_back_end_encodeBlockAsm4MB 1772 LEAL -1(CX), CX 1773 DECL SI 1774 JZ match_extend_back_end_encodeBlockAsm4MB 1775 JMP match_extend_back_loop_encodeBlockAsm4MB 1776 1777match_extend_back_end_encodeBlockAsm4MB: 1778 MOVL CX, DI 1779 SUBL 12(SP), DI 1780 LEAQ 4(AX)(DI*1), DI 1781 CMPQ DI, (SP) 1782 JL match_dst_size_check_encodeBlockAsm4MB 1783 MOVQ $0x00000000, ret+48(FP) 1784 RET 1785 1786match_dst_size_check_encodeBlockAsm4MB: 1787 MOVL CX, DI 1788 MOVL 12(SP), R8 1789 CMPL R8, DI 1790 JEQ emit_literal_done_match_emit_encodeBlockAsm4MB 1791 MOVL DI, R9 1792 MOVL DI, 12(SP) 1793 LEAQ (DX)(R8*1), DI 1794 SUBL R8, R9 1795 LEAL -1(R9), R8 1796 CMPL R8, $0x3c 1797 JLT one_byte_match_emit_encodeBlockAsm4MB 1798 CMPL R8, $0x00000100 1799 JLT two_bytes_match_emit_encodeBlockAsm4MB 1800 CMPL R8, $0x00010000 1801 JLT three_bytes_match_emit_encodeBlockAsm4MB 1802 MOVL R8, R10 1803 SHRL $0x10, R10 1804 MOVB $0xf8, (AX) 1805 MOVW R8, 1(AX) 1806 MOVB R10, 3(AX) 1807 ADDQ $0x04, AX 1808 JMP memmove_long_match_emit_encodeBlockAsm4MB 1809 1810three_bytes_match_emit_encodeBlockAsm4MB: 1811 MOVB $0xf4, (AX) 1812 MOVW R8, 1(AX) 1813 ADDQ $0x03, AX 1814 JMP memmove_long_match_emit_encodeBlockAsm4MB 1815 1816two_bytes_match_emit_encodeBlockAsm4MB: 1817 MOVB $0xf0, (AX) 1818 MOVB R8, 1(AX) 1819 ADDQ $0x02, AX 1820 CMPL R8, $0x40 1821 JL memmove_match_emit_encodeBlockAsm4MB 1822 JMP memmove_long_match_emit_encodeBlockAsm4MB 1823 1824one_byte_match_emit_encodeBlockAsm4MB: 1825 SHLB $0x02, R8 1826 MOVB R8, (AX) 1827 ADDQ $0x01, AX 1828 1829memmove_match_emit_encodeBlockAsm4MB: 1830 LEAQ (AX)(R9*1), R8 1831 1832 // genMemMoveShort 1833 CMPQ R9, $0x03 1834 JB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_1or2 1835 JE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_3 1836 CMPQ R9, $0x08 1837 JB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_4through7 1838 CMPQ R9, $0x10 1839 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 1840 CMPQ R9, $0x20 1841 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 1842 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 1843 1844emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_1or2: 1845 MOVB (DI), R10 1846 MOVB -1(DI)(R9*1), DI 1847 MOVB R10, (AX) 1848 MOVB DI, -1(AX)(R9*1) 1849 JMP memmove_end_copy_match_emit_encodeBlockAsm4MB 1850 1851emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_3: 1852 MOVW (DI), R10 1853 MOVB 2(DI), DI 1854 MOVW R10, (AX) 1855 MOVB DI, 2(AX) 1856 JMP memmove_end_copy_match_emit_encodeBlockAsm4MB 1857 1858emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_4through7: 1859 MOVL (DI), R10 1860 MOVL -4(DI)(R9*1), DI 1861 MOVL R10, (AX) 1862 MOVL DI, -4(AX)(R9*1) 1863 JMP memmove_end_copy_match_emit_encodeBlockAsm4MB 1864 1865emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: 1866 MOVQ (DI), R10 1867 MOVQ -8(DI)(R9*1), DI 1868 MOVQ R10, (AX) 1869 MOVQ DI, -8(AX)(R9*1) 1870 JMP memmove_end_copy_match_emit_encodeBlockAsm4MB 1871 1872emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: 1873 MOVOU (DI), X0 1874 MOVOU -16(DI)(R9*1), X1 1875 MOVOU X0, (AX) 1876 MOVOU X1, -16(AX)(R9*1) 1877 JMP memmove_end_copy_match_emit_encodeBlockAsm4MB 1878 1879emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: 1880 MOVOU (DI), X0 1881 MOVOU 16(DI), X1 1882 MOVOU -32(DI)(R9*1), X2 1883 MOVOU -16(DI)(R9*1), X3 1884 MOVOU X0, (AX) 1885 MOVOU X1, 16(AX) 1886 MOVOU X2, -32(AX)(R9*1) 1887 MOVOU X3, -16(AX)(R9*1) 1888 1889memmove_end_copy_match_emit_encodeBlockAsm4MB: 1890 MOVQ R8, AX 1891 JMP emit_literal_done_match_emit_encodeBlockAsm4MB 1892 1893memmove_long_match_emit_encodeBlockAsm4MB: 1894 LEAQ (AX)(R9*1), R8 1895 1896 // genMemMoveLong 1897 MOVOU (DI), X0 1898 MOVOU 16(DI), X1 1899 MOVOU -32(DI)(R9*1), X2 1900 MOVOU -16(DI)(R9*1), X3 1901 MOVQ R9, R11 1902 SHRQ $0x05, R11 1903 MOVQ AX, R10 1904 ANDL $0x0000001f, R10 1905 MOVQ $0x00000040, R12 1906 SUBQ R10, R12 1907 DECQ R11 1908 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 1909 LEAQ -32(DI)(R12*1), R10 1910 LEAQ -32(AX)(R12*1), R13 1911 1912emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: 1913 MOVOU (R10), X4 1914 MOVOU 16(R10), X5 1915 MOVOA X4, (R13) 1916 MOVOA X5, 16(R13) 1917 ADDQ $0x20, R13 1918 ADDQ $0x20, R10 1919 ADDQ $0x20, R12 1920 DECQ R11 1921 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back 1922 1923emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: 1924 MOVOU -32(DI)(R12*1), X4 1925 MOVOU -16(DI)(R12*1), X5 1926 MOVOA X4, -32(AX)(R12*1) 1927 MOVOA X5, -16(AX)(R12*1) 1928 ADDQ $0x20, R12 1929 CMPQ R9, R12 1930 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 1931 MOVOU X0, (AX) 1932 MOVOU X1, 16(AX) 1933 MOVOU X2, -32(AX)(R9*1) 1934 MOVOU X3, -16(AX)(R9*1) 1935 MOVQ R8, AX 1936 1937emit_literal_done_match_emit_encodeBlockAsm4MB: 1938match_nolit_loop_encodeBlockAsm4MB: 1939 MOVL CX, DI 1940 SUBL SI, DI 1941 MOVL DI, 16(SP) 1942 ADDL $0x04, CX 1943 ADDL $0x04, SI 1944 MOVQ src_len+32(FP), DI 1945 SUBL CX, DI 1946 LEAQ (DX)(CX*1), R8 1947 LEAQ (DX)(SI*1), SI 1948 1949 // matchLen 1950 XORL R10, R10 1951 CMPL DI, $0x08 1952 JL matchlen_single_match_nolit_encodeBlockAsm4MB 1953 1954matchlen_loopback_match_nolit_encodeBlockAsm4MB: 1955 MOVQ (R8)(R10*1), R9 1956 XORQ (SI)(R10*1), R9 1957 TESTQ R9, R9 1958 JZ matchlen_loop_match_nolit_encodeBlockAsm4MB 1959 BSFQ R9, R9 1960 SARQ $0x03, R9 1961 LEAL (R10)(R9*1), R10 1962 JMP match_nolit_end_encodeBlockAsm4MB 1963 1964matchlen_loop_match_nolit_encodeBlockAsm4MB: 1965 LEAL -8(DI), DI 1966 LEAL 8(R10), R10 1967 CMPL DI, $0x08 1968 JGE matchlen_loopback_match_nolit_encodeBlockAsm4MB 1969 1970matchlen_single_match_nolit_encodeBlockAsm4MB: 1971 TESTL DI, DI 1972 JZ match_nolit_end_encodeBlockAsm4MB 1973 1974matchlen_single_loopback_match_nolit_encodeBlockAsm4MB: 1975 MOVB (R8)(R10*1), R9 1976 CMPB (SI)(R10*1), R9 1977 JNE match_nolit_end_encodeBlockAsm4MB 1978 LEAL 1(R10), R10 1979 DECL DI 1980 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm4MB 1981 1982match_nolit_end_encodeBlockAsm4MB: 1983 ADDL R10, CX 1984 MOVL 16(SP), SI 1985 ADDL $0x04, R10 1986 MOVL CX, 12(SP) 1987 1988 // emitCopy 1989 CMPL SI, $0x00010000 1990 JL two_byte_offset_match_nolit_encodeBlockAsm4MB 1991 1992four_bytes_loop_back_match_nolit_encodeBlockAsm4MB: 1993 CMPL R10, $0x40 1994 JLE four_bytes_remain_match_nolit_encodeBlockAsm4MB 1995 MOVB $0xff, (AX) 1996 MOVL SI, 1(AX) 1997 LEAL -64(R10), R10 1998 ADDQ $0x05, AX 1999 CMPL R10, $0x04 2000 JL four_bytes_remain_match_nolit_encodeBlockAsm4MB 2001 2002 // emitRepeat 2003 MOVL R10, DI 2004 LEAL -4(R10), R10 2005 CMPL DI, $0x08 2006 JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy 2007 CMPL DI, $0x0c 2008 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy 2009 CMPL SI, $0x00000800 2010 JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy 2011 2012cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: 2013 CMPL R10, $0x00000104 2014 JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy 2015 CMPL R10, $0x00010100 2016 JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy 2017 LEAL -65536(R10), R10 2018 MOVL R10, SI 2019 MOVW $0x001d, (AX) 2020 MOVW R10, 2(AX) 2021 SARL $0x10, SI 2022 MOVB SI, 4(AX) 2023 ADDQ $0x05, AX 2024 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2025 2026repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: 2027 LEAL -256(R10), R10 2028 MOVW $0x0019, (AX) 2029 MOVW R10, 2(AX) 2030 ADDQ $0x04, AX 2031 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2032 2033repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: 2034 LEAL -4(R10), R10 2035 MOVW $0x0015, (AX) 2036 MOVB R10, 2(AX) 2037 ADDQ $0x03, AX 2038 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2039 2040repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: 2041 SHLL $0x02, R10 2042 ORL $0x01, R10 2043 MOVW R10, (AX) 2044 ADDQ $0x02, AX 2045 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2046 2047repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: 2048 XORQ DI, DI 2049 LEAL 1(DI)(R10*4), R10 2050 MOVB SI, 1(AX) 2051 SARL $0x08, SI 2052 SHLL $0x05, SI 2053 ORL SI, R10 2054 MOVB R10, (AX) 2055 ADDQ $0x02, AX 2056 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2057 JMP four_bytes_loop_back_match_nolit_encodeBlockAsm4MB 2058 2059four_bytes_remain_match_nolit_encodeBlockAsm4MB: 2060 TESTL R10, R10 2061 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB 2062 MOVB $0x03, BL 2063 LEAL -4(BX)(R10*4), R10 2064 MOVB R10, (AX) 2065 MOVL SI, 1(AX) 2066 ADDQ $0x05, AX 2067 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2068 2069two_byte_offset_match_nolit_encodeBlockAsm4MB: 2070 CMPL R10, $0x40 2071 JLE two_byte_offset_short_match_nolit_encodeBlockAsm4MB 2072 MOVB $0xee, (AX) 2073 MOVW SI, 1(AX) 2074 LEAL -60(R10), R10 2075 ADDQ $0x03, AX 2076 2077 // emitRepeat 2078 MOVL R10, DI 2079 LEAL -4(R10), R10 2080 CMPL DI, $0x08 2081 JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short 2082 CMPL DI, $0x0c 2083 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short 2084 CMPL SI, $0x00000800 2085 JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short 2086 2087cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: 2088 CMPL R10, $0x00000104 2089 JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short 2090 CMPL R10, $0x00010100 2091 JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short 2092 LEAL -65536(R10), R10 2093 MOVL R10, SI 2094 MOVW $0x001d, (AX) 2095 MOVW R10, 2(AX) 2096 SARL $0x10, SI 2097 MOVB SI, 4(AX) 2098 ADDQ $0x05, AX 2099 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2100 2101repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: 2102 LEAL -256(R10), R10 2103 MOVW $0x0019, (AX) 2104 MOVW R10, 2(AX) 2105 ADDQ $0x04, AX 2106 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2107 2108repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: 2109 LEAL -4(R10), R10 2110 MOVW $0x0015, (AX) 2111 MOVB R10, 2(AX) 2112 ADDQ $0x03, AX 2113 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2114 2115repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: 2116 SHLL $0x02, R10 2117 ORL $0x01, R10 2118 MOVW R10, (AX) 2119 ADDQ $0x02, AX 2120 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2121 2122repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: 2123 XORQ DI, DI 2124 LEAL 1(DI)(R10*4), R10 2125 MOVB SI, 1(AX) 2126 SARL $0x08, SI 2127 SHLL $0x05, SI 2128 ORL SI, R10 2129 MOVB R10, (AX) 2130 ADDQ $0x02, AX 2131 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2132 JMP two_byte_offset_match_nolit_encodeBlockAsm4MB 2133 2134two_byte_offset_short_match_nolit_encodeBlockAsm4MB: 2135 CMPL R10, $0x0c 2136 JGE emit_copy_three_match_nolit_encodeBlockAsm4MB 2137 CMPL SI, $0x00000800 2138 JGE emit_copy_three_match_nolit_encodeBlockAsm4MB 2139 MOVB $0x01, BL 2140 LEAL -16(BX)(R10*4), R10 2141 MOVB SI, 1(AX) 2142 SHRL $0x08, SI 2143 SHLL $0x05, SI 2144 ORL SI, R10 2145 MOVB R10, (AX) 2146 ADDQ $0x02, AX 2147 JMP match_nolit_emitcopy_end_encodeBlockAsm4MB 2148 2149emit_copy_three_match_nolit_encodeBlockAsm4MB: 2150 MOVB $0x02, BL 2151 LEAL -4(BX)(R10*4), R10 2152 MOVB R10, (AX) 2153 MOVW SI, 1(AX) 2154 ADDQ $0x03, AX 2155 2156match_nolit_emitcopy_end_encodeBlockAsm4MB: 2157 CMPL CX, 8(SP) 2158 JGE emit_remainder_encodeBlockAsm4MB 2159 MOVQ -2(DX)(CX*1), DI 2160 CMPQ AX, (SP) 2161 JL match_nolit_dst_ok_encodeBlockAsm4MB 2162 MOVQ $0x00000000, ret+48(FP) 2163 RET 2164 2165match_nolit_dst_ok_encodeBlockAsm4MB: 2166 MOVQ $0x0000cf1bbcdcbf9b, R9 2167 MOVQ DI, R8 2168 SHRQ $0x10, DI 2169 MOVQ DI, SI 2170 SHLQ $0x10, R8 2171 IMULQ R9, R8 2172 SHRQ $0x32, R8 2173 SHLQ $0x10, SI 2174 IMULQ R9, SI 2175 SHRQ $0x32, SI 2176 LEAL -2(CX), R9 2177 LEAQ 24(SP)(SI*4), R10 2178 MOVL (R10), SI 2179 MOVL R9, 24(SP)(R8*4) 2180 MOVL CX, (R10) 2181 CMPL (DX)(SI*1), DI 2182 JEQ match_nolit_loop_encodeBlockAsm4MB 2183 INCL CX 2184 JMP search_loop_encodeBlockAsm4MB 2185 2186emit_remainder_encodeBlockAsm4MB: 2187 MOVQ src_len+32(FP), CX 2188 SUBL 12(SP), CX 2189 LEAQ 4(AX)(CX*1), CX 2190 CMPQ CX, (SP) 2191 JL emit_remainder_ok_encodeBlockAsm4MB 2192 MOVQ $0x00000000, ret+48(FP) 2193 RET 2194 2195emit_remainder_ok_encodeBlockAsm4MB: 2196 MOVQ src_len+32(FP), CX 2197 MOVL 12(SP), BX 2198 CMPL BX, CX 2199 JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB 2200 MOVL CX, SI 2201 MOVL CX, 12(SP) 2202 LEAQ (DX)(BX*1), CX 2203 SUBL BX, SI 2204 LEAL -1(SI), DX 2205 CMPL DX, $0x3c 2206 JLT one_byte_emit_remainder_encodeBlockAsm4MB 2207 CMPL DX, $0x00000100 2208 JLT two_bytes_emit_remainder_encodeBlockAsm4MB 2209 CMPL DX, $0x00010000 2210 JLT three_bytes_emit_remainder_encodeBlockAsm4MB 2211 MOVL DX, BX 2212 SHRL $0x10, BX 2213 MOVB $0xf8, (AX) 2214 MOVW DX, 1(AX) 2215 MOVB BL, 3(AX) 2216 ADDQ $0x04, AX 2217 JMP memmove_long_emit_remainder_encodeBlockAsm4MB 2218 2219three_bytes_emit_remainder_encodeBlockAsm4MB: 2220 MOVB $0xf4, (AX) 2221 MOVW DX, 1(AX) 2222 ADDQ $0x03, AX 2223 JMP memmove_long_emit_remainder_encodeBlockAsm4MB 2224 2225two_bytes_emit_remainder_encodeBlockAsm4MB: 2226 MOVB $0xf0, (AX) 2227 MOVB DL, 1(AX) 2228 ADDQ $0x02, AX 2229 CMPL DX, $0x40 2230 JL memmove_emit_remainder_encodeBlockAsm4MB 2231 JMP memmove_long_emit_remainder_encodeBlockAsm4MB 2232 2233one_byte_emit_remainder_encodeBlockAsm4MB: 2234 SHLB $0x02, DL 2235 MOVB DL, (AX) 2236 ADDQ $0x01, AX 2237 2238memmove_emit_remainder_encodeBlockAsm4MB: 2239 LEAQ (AX)(SI*1), DX 2240 MOVL SI, BX 2241 2242 // genMemMoveShort 2243 CMPQ BX, $0x03 2244 JB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2 2245 JE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3 2246 CMPQ BX, $0x08 2247 JB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7 2248 CMPQ BX, $0x10 2249 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16 2250 CMPQ BX, $0x20 2251 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32 2252 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 2253 2254emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2: 2255 MOVB (CX), SI 2256 MOVB -1(CX)(BX*1), CL 2257 MOVB SI, (AX) 2258 MOVB CL, -1(AX)(BX*1) 2259 JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB 2260 2261emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3: 2262 MOVW (CX), SI 2263 MOVB 2(CX), CL 2264 MOVW SI, (AX) 2265 MOVB CL, 2(AX) 2266 JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB 2267 2268emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7: 2269 MOVL (CX), SI 2270 MOVL -4(CX)(BX*1), CX 2271 MOVL SI, (AX) 2272 MOVL CX, -4(AX)(BX*1) 2273 JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB 2274 2275emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: 2276 MOVQ (CX), SI 2277 MOVQ -8(CX)(BX*1), CX 2278 MOVQ SI, (AX) 2279 MOVQ CX, -8(AX)(BX*1) 2280 JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB 2281 2282emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: 2283 MOVOU (CX), X0 2284 MOVOU -16(CX)(BX*1), X1 2285 MOVOU X0, (AX) 2286 MOVOU X1, -16(AX)(BX*1) 2287 JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB 2288 2289emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: 2290 MOVOU (CX), X0 2291 MOVOU 16(CX), X1 2292 MOVOU -32(CX)(BX*1), X2 2293 MOVOU -16(CX)(BX*1), X3 2294 MOVOU X0, (AX) 2295 MOVOU X1, 16(AX) 2296 MOVOU X2, -32(AX)(BX*1) 2297 MOVOU X3, -16(AX)(BX*1) 2298 2299memmove_end_copy_emit_remainder_encodeBlockAsm4MB: 2300 MOVQ DX, AX 2301 JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB 2302 2303memmove_long_emit_remainder_encodeBlockAsm4MB: 2304 LEAQ (AX)(SI*1), DX 2305 MOVL SI, BX 2306 2307 // genMemMoveLong 2308 MOVOU (CX), X0 2309 MOVOU 16(CX), X1 2310 MOVOU -32(CX)(BX*1), X2 2311 MOVOU -16(CX)(BX*1), X3 2312 MOVQ BX, DI 2313 SHRQ $0x05, DI 2314 MOVQ AX, SI 2315 ANDL $0x0000001f, SI 2316 MOVQ $0x00000040, R8 2317 SUBQ SI, R8 2318 DECQ DI 2319 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 2320 LEAQ -32(CX)(R8*1), SI 2321 LEAQ -32(AX)(R8*1), R9 2322 2323emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: 2324 MOVOU (SI), X4 2325 MOVOU 16(SI), X5 2326 MOVOA X4, (R9) 2327 MOVOA X5, 16(R9) 2328 ADDQ $0x20, R9 2329 ADDQ $0x20, SI 2330 ADDQ $0x20, R8 2331 DECQ DI 2332 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back 2333 2334emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: 2335 MOVOU -32(CX)(R8*1), X4 2336 MOVOU -16(CX)(R8*1), X5 2337 MOVOA X4, -32(AX)(R8*1) 2338 MOVOA X5, -16(AX)(R8*1) 2339 ADDQ $0x20, R8 2340 CMPQ BX, R8 2341 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 2342 MOVOU X0, (AX) 2343 MOVOU X1, 16(AX) 2344 MOVOU X2, -32(AX)(BX*1) 2345 MOVOU X3, -16(AX)(BX*1) 2346 MOVQ DX, AX 2347 2348emit_literal_done_emit_remainder_encodeBlockAsm4MB: 2349 MOVQ dst_base+0(FP), CX 2350 SUBQ CX, AX 2351 MOVQ AX, ret+48(FP) 2352 RET 2353 2354// func encodeBlockAsm12B(dst []byte, src []byte) int 2355// Requires: SSE2 2356TEXT ·encodeBlockAsm12B(SB), $16408-56 2357 MOVQ dst_base+0(FP), AX 2358 MOVQ $0x00000080, CX 2359 LEAQ 24(SP), DX 2360 PXOR X0, X0 2361 2362zero_loop_encodeBlockAsm12B: 2363 MOVOU X0, (DX) 2364 MOVOU X0, 16(DX) 2365 MOVOU X0, 32(DX) 2366 MOVOU X0, 48(DX) 2367 MOVOU X0, 64(DX) 2368 MOVOU X0, 80(DX) 2369 MOVOU X0, 96(DX) 2370 MOVOU X0, 112(DX) 2371 ADDQ $0x80, DX 2372 DECQ CX 2373 JNZ zero_loop_encodeBlockAsm12B 2374 MOVL $0x00000000, 12(SP) 2375 MOVQ src_len+32(FP), CX 2376 LEAQ -5(CX), DX 2377 LEAQ -8(CX), SI 2378 MOVL SI, 8(SP) 2379 SHRQ $0x05, CX 2380 SUBL CX, DX 2381 LEAQ (AX)(DX*1), DX 2382 MOVQ DX, (SP) 2383 MOVL $0x00000001, CX 2384 MOVL CX, 16(SP) 2385 MOVQ src_base+24(FP), DX 2386 2387search_loop_encodeBlockAsm12B: 2388 MOVL CX, SI 2389 SUBL 12(SP), SI 2390 SHRL $0x05, SI 2391 LEAL 4(CX)(SI*1), SI 2392 CMPL SI, 8(SP) 2393 JGE emit_remainder_encodeBlockAsm12B 2394 MOVQ (DX)(CX*1), DI 2395 MOVL SI, 20(SP) 2396 MOVQ $0x000000cf1bbcdcbb, R9 2397 MOVQ DI, R10 2398 MOVQ DI, R11 2399 SHRQ $0x08, R11 2400 SHLQ $0x18, R10 2401 IMULQ R9, R10 2402 SHRQ $0x34, R10 2403 SHLQ $0x18, R11 2404 IMULQ R9, R11 2405 SHRQ $0x34, R11 2406 MOVL 24(SP)(R10*4), SI 2407 MOVL 24(SP)(R11*4), R8 2408 MOVL CX, 24(SP)(R10*4) 2409 LEAL 1(CX), R10 2410 MOVL R10, 24(SP)(R11*4) 2411 MOVQ DI, R10 2412 SHRQ $0x10, R10 2413 SHLQ $0x18, R10 2414 IMULQ R9, R10 2415 SHRQ $0x34, R10 2416 MOVL CX, R9 2417 SUBL 16(SP), R9 2418 MOVL 1(DX)(R9*1), R11 2419 MOVQ DI, R9 2420 SHRQ $0x08, R9 2421 CMPL R9, R11 2422 JNE no_repeat_found_encodeBlockAsm12B 2423 LEAL 1(CX), DI 2424 MOVL 12(SP), R8 2425 MOVL DI, SI 2426 SUBL 16(SP), SI 2427 JZ repeat_extend_back_end_encodeBlockAsm12B 2428 2429repeat_extend_back_loop_encodeBlockAsm12B: 2430 CMPL DI, R8 2431 JLE repeat_extend_back_end_encodeBlockAsm12B 2432 MOVB -1(DX)(SI*1), BL 2433 MOVB -1(DX)(DI*1), R9 2434 CMPB BL, R9 2435 JNE repeat_extend_back_end_encodeBlockAsm12B 2436 LEAL -1(DI), DI 2437 DECL SI 2438 JNZ repeat_extend_back_loop_encodeBlockAsm12B 2439 2440repeat_extend_back_end_encodeBlockAsm12B: 2441 MOVL 12(SP), SI 2442 CMPL SI, DI 2443 JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B 2444 MOVL DI, R9 2445 MOVL DI, 12(SP) 2446 LEAQ (DX)(SI*1), R10 2447 SUBL SI, R9 2448 LEAL -1(R9), SI 2449 CMPL SI, $0x3c 2450 JLT one_byte_repeat_emit_encodeBlockAsm12B 2451 CMPL SI, $0x00000100 2452 JLT two_bytes_repeat_emit_encodeBlockAsm12B 2453 MOVB $0xf4, (AX) 2454 MOVW SI, 1(AX) 2455 ADDQ $0x03, AX 2456 JMP memmove_long_repeat_emit_encodeBlockAsm12B 2457 2458two_bytes_repeat_emit_encodeBlockAsm12B: 2459 MOVB $0xf0, (AX) 2460 MOVB SI, 1(AX) 2461 ADDQ $0x02, AX 2462 CMPL SI, $0x40 2463 JL memmove_repeat_emit_encodeBlockAsm12B 2464 JMP memmove_long_repeat_emit_encodeBlockAsm12B 2465 2466one_byte_repeat_emit_encodeBlockAsm12B: 2467 SHLB $0x02, SI 2468 MOVB SI, (AX) 2469 ADDQ $0x01, AX 2470 2471memmove_repeat_emit_encodeBlockAsm12B: 2472 LEAQ (AX)(R9*1), SI 2473 2474 // genMemMoveShort 2475 CMPQ R9, $0x03 2476 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2 2477 JE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3 2478 CMPQ R9, $0x08 2479 JB emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7 2480 CMPQ R9, $0x10 2481 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 2482 CMPQ R9, $0x20 2483 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 2484 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 2485 2486emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2: 2487 MOVB (R10), R11 2488 MOVB -1(R10)(R9*1), R10 2489 MOVB R11, (AX) 2490 MOVB R10, -1(AX)(R9*1) 2491 JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B 2492 2493emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3: 2494 MOVW (R10), R11 2495 MOVB 2(R10), R10 2496 MOVW R11, (AX) 2497 MOVB R10, 2(AX) 2498 JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B 2499 2500emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7: 2501 MOVL (R10), R11 2502 MOVL -4(R10)(R9*1), R10 2503 MOVL R11, (AX) 2504 MOVL R10, -4(AX)(R9*1) 2505 JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B 2506 2507emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: 2508 MOVQ (R10), R11 2509 MOVQ -8(R10)(R9*1), R10 2510 MOVQ R11, (AX) 2511 MOVQ R10, -8(AX)(R9*1) 2512 JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B 2513 2514emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: 2515 MOVOU (R10), X0 2516 MOVOU -16(R10)(R9*1), X1 2517 MOVOU X0, (AX) 2518 MOVOU X1, -16(AX)(R9*1) 2519 JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B 2520 2521emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: 2522 MOVOU (R10), X0 2523 MOVOU 16(R10), X1 2524 MOVOU -32(R10)(R9*1), X2 2525 MOVOU -16(R10)(R9*1), X3 2526 MOVOU X0, (AX) 2527 MOVOU X1, 16(AX) 2528 MOVOU X2, -32(AX)(R9*1) 2529 MOVOU X3, -16(AX)(R9*1) 2530 2531memmove_end_copy_repeat_emit_encodeBlockAsm12B: 2532 MOVQ SI, AX 2533 JMP emit_literal_done_repeat_emit_encodeBlockAsm12B 2534 2535memmove_long_repeat_emit_encodeBlockAsm12B: 2536 LEAQ (AX)(R9*1), SI 2537 2538 // genMemMoveLong 2539 MOVOU (R10), X0 2540 MOVOU 16(R10), X1 2541 MOVOU -32(R10)(R9*1), X2 2542 MOVOU -16(R10)(R9*1), X3 2543 MOVQ R9, R12 2544 SHRQ $0x05, R12 2545 MOVQ AX, R11 2546 ANDL $0x0000001f, R11 2547 MOVQ $0x00000040, R13 2548 SUBQ R11, R13 2549 DECQ R12 2550 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 2551 LEAQ -32(R10)(R13*1), R11 2552 LEAQ -32(AX)(R13*1), R14 2553 2554emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: 2555 MOVOU (R11), X4 2556 MOVOU 16(R11), X5 2557 MOVOA X4, (R14) 2558 MOVOA X5, 16(R14) 2559 ADDQ $0x20, R14 2560 ADDQ $0x20, R11 2561 ADDQ $0x20, R13 2562 DECQ R12 2563 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back 2564 2565emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: 2566 MOVOU -32(R10)(R13*1), X4 2567 MOVOU -16(R10)(R13*1), X5 2568 MOVOA X4, -32(AX)(R13*1) 2569 MOVOA X5, -16(AX)(R13*1) 2570 ADDQ $0x20, R13 2571 CMPQ R9, R13 2572 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 2573 MOVOU X0, (AX) 2574 MOVOU X1, 16(AX) 2575 MOVOU X2, -32(AX)(R9*1) 2576 MOVOU X3, -16(AX)(R9*1) 2577 MOVQ SI, AX 2578 2579emit_literal_done_repeat_emit_encodeBlockAsm12B: 2580 ADDL $0x05, CX 2581 MOVL CX, SI 2582 SUBL 16(SP), SI 2583 MOVQ src_len+32(FP), R9 2584 SUBL CX, R9 2585 LEAQ (DX)(CX*1), R10 2586 LEAQ (DX)(SI*1), SI 2587 2588 // matchLen 2589 XORL R12, R12 2590 CMPL R9, $0x08 2591 JL matchlen_single_repeat_extend_encodeBlockAsm12B 2592 2593matchlen_loopback_repeat_extend_encodeBlockAsm12B: 2594 MOVQ (R10)(R12*1), R11 2595 XORQ (SI)(R12*1), R11 2596 TESTQ R11, R11 2597 JZ matchlen_loop_repeat_extend_encodeBlockAsm12B 2598 BSFQ R11, R11 2599 SARQ $0x03, R11 2600 LEAL (R12)(R11*1), R12 2601 JMP repeat_extend_forward_end_encodeBlockAsm12B 2602 2603matchlen_loop_repeat_extend_encodeBlockAsm12B: 2604 LEAL -8(R9), R9 2605 LEAL 8(R12), R12 2606 CMPL R9, $0x08 2607 JGE matchlen_loopback_repeat_extend_encodeBlockAsm12B 2608 2609matchlen_single_repeat_extend_encodeBlockAsm12B: 2610 TESTL R9, R9 2611 JZ repeat_extend_forward_end_encodeBlockAsm12B 2612 2613matchlen_single_loopback_repeat_extend_encodeBlockAsm12B: 2614 MOVB (R10)(R12*1), R11 2615 CMPB (SI)(R12*1), R11 2616 JNE repeat_extend_forward_end_encodeBlockAsm12B 2617 LEAL 1(R12), R12 2618 DECL R9 2619 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm12B 2620 2621repeat_extend_forward_end_encodeBlockAsm12B: 2622 ADDL R12, CX 2623 MOVL CX, SI 2624 SUBL DI, SI 2625 MOVL 16(SP), DI 2626 TESTL R8, R8 2627 JZ repeat_as_copy_encodeBlockAsm12B 2628 2629 // emitRepeat 2630 MOVL SI, R8 2631 LEAL -4(SI), SI 2632 CMPL R8, $0x08 2633 JLE repeat_two_match_repeat_encodeBlockAsm12B 2634 CMPL R8, $0x0c 2635 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B 2636 CMPL DI, $0x00000800 2637 JLT repeat_two_offset_match_repeat_encodeBlockAsm12B 2638 2639cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: 2640 CMPL SI, $0x00000104 2641 JLT repeat_three_match_repeat_encodeBlockAsm12B 2642 LEAL -256(SI), SI 2643 MOVW $0x0019, (AX) 2644 MOVW SI, 2(AX) 2645 ADDQ $0x04, AX 2646 JMP repeat_end_emit_encodeBlockAsm12B 2647 2648repeat_three_match_repeat_encodeBlockAsm12B: 2649 LEAL -4(SI), SI 2650 MOVW $0x0015, (AX) 2651 MOVB SI, 2(AX) 2652 ADDQ $0x03, AX 2653 JMP repeat_end_emit_encodeBlockAsm12B 2654 2655repeat_two_match_repeat_encodeBlockAsm12B: 2656 SHLL $0x02, SI 2657 ORL $0x01, SI 2658 MOVW SI, (AX) 2659 ADDQ $0x02, AX 2660 JMP repeat_end_emit_encodeBlockAsm12B 2661 2662repeat_two_offset_match_repeat_encodeBlockAsm12B: 2663 XORQ R8, R8 2664 LEAL 1(R8)(SI*4), SI 2665 MOVB DI, 1(AX) 2666 SARL $0x08, DI 2667 SHLL $0x05, DI 2668 ORL DI, SI 2669 MOVB SI, (AX) 2670 ADDQ $0x02, AX 2671 JMP repeat_end_emit_encodeBlockAsm12B 2672 2673repeat_as_copy_encodeBlockAsm12B: 2674 // emitCopy 2675two_byte_offset_repeat_as_copy_encodeBlockAsm12B: 2676 CMPL SI, $0x40 2677 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B 2678 MOVB $0xee, (AX) 2679 MOVW DI, 1(AX) 2680 LEAL -60(SI), SI 2681 ADDQ $0x03, AX 2682 2683 // emitRepeat 2684 MOVL SI, R8 2685 LEAL -4(SI), SI 2686 CMPL R8, $0x08 2687 JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short 2688 CMPL R8, $0x0c 2689 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short 2690 CMPL DI, $0x00000800 2691 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short 2692 2693cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: 2694 CMPL SI, $0x00000104 2695 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short 2696 LEAL -256(SI), SI 2697 MOVW $0x0019, (AX) 2698 MOVW SI, 2(AX) 2699 ADDQ $0x04, AX 2700 JMP repeat_end_emit_encodeBlockAsm12B 2701 2702repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: 2703 LEAL -4(SI), SI 2704 MOVW $0x0015, (AX) 2705 MOVB SI, 2(AX) 2706 ADDQ $0x03, AX 2707 JMP repeat_end_emit_encodeBlockAsm12B 2708 2709repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: 2710 SHLL $0x02, SI 2711 ORL $0x01, SI 2712 MOVW SI, (AX) 2713 ADDQ $0x02, AX 2714 JMP repeat_end_emit_encodeBlockAsm12B 2715 2716repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: 2717 XORQ R8, R8 2718 LEAL 1(R8)(SI*4), SI 2719 MOVB DI, 1(AX) 2720 SARL $0x08, DI 2721 SHLL $0x05, DI 2722 ORL DI, SI 2723 MOVB SI, (AX) 2724 ADDQ $0x02, AX 2725 JMP repeat_end_emit_encodeBlockAsm12B 2726 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B 2727 2728two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: 2729 CMPL SI, $0x0c 2730 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B 2731 CMPL DI, $0x00000800 2732 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B 2733 MOVB $0x01, BL 2734 LEAL -16(BX)(SI*4), SI 2735 MOVB DI, 1(AX) 2736 SHRL $0x08, DI 2737 SHLL $0x05, DI 2738 ORL DI, SI 2739 MOVB SI, (AX) 2740 ADDQ $0x02, AX 2741 JMP repeat_end_emit_encodeBlockAsm12B 2742 2743emit_copy_three_repeat_as_copy_encodeBlockAsm12B: 2744 MOVB $0x02, BL 2745 LEAL -4(BX)(SI*4), SI 2746 MOVB SI, (AX) 2747 MOVW DI, 1(AX) 2748 ADDQ $0x03, AX 2749 2750repeat_end_emit_encodeBlockAsm12B: 2751 MOVL CX, 12(SP) 2752 JMP search_loop_encodeBlockAsm12B 2753 2754no_repeat_found_encodeBlockAsm12B: 2755 CMPL (DX)(SI*1), DI 2756 JEQ candidate_match_encodeBlockAsm12B 2757 SHRQ $0x08, DI 2758 MOVL 24(SP)(R10*4), SI 2759 LEAL 2(CX), R9 2760 CMPL (DX)(R8*1), DI 2761 JEQ candidate2_match_encodeBlockAsm12B 2762 MOVL R9, 24(SP)(R10*4) 2763 SHRQ $0x08, DI 2764 CMPL (DX)(SI*1), DI 2765 JEQ candidate3_match_encodeBlockAsm12B 2766 MOVL 20(SP), CX 2767 JMP search_loop_encodeBlockAsm12B 2768 2769candidate3_match_encodeBlockAsm12B: 2770 ADDL $0x02, CX 2771 JMP candidate_match_encodeBlockAsm12B 2772 2773candidate2_match_encodeBlockAsm12B: 2774 MOVL R9, 24(SP)(R10*4) 2775 INCL CX 2776 MOVL R8, SI 2777 2778candidate_match_encodeBlockAsm12B: 2779 MOVL 12(SP), DI 2780 TESTL SI, SI 2781 JZ match_extend_back_end_encodeBlockAsm12B 2782 2783match_extend_back_loop_encodeBlockAsm12B: 2784 CMPL CX, DI 2785 JLE match_extend_back_end_encodeBlockAsm12B 2786 MOVB -1(DX)(SI*1), BL 2787 MOVB -1(DX)(CX*1), R8 2788 CMPB BL, R8 2789 JNE match_extend_back_end_encodeBlockAsm12B 2790 LEAL -1(CX), CX 2791 DECL SI 2792 JZ match_extend_back_end_encodeBlockAsm12B 2793 JMP match_extend_back_loop_encodeBlockAsm12B 2794 2795match_extend_back_end_encodeBlockAsm12B: 2796 MOVL CX, DI 2797 SUBL 12(SP), DI 2798 LEAQ 3(AX)(DI*1), DI 2799 CMPQ DI, (SP) 2800 JL match_dst_size_check_encodeBlockAsm12B 2801 MOVQ $0x00000000, ret+48(FP) 2802 RET 2803 2804match_dst_size_check_encodeBlockAsm12B: 2805 MOVL CX, DI 2806 MOVL 12(SP), R8 2807 CMPL R8, DI 2808 JEQ emit_literal_done_match_emit_encodeBlockAsm12B 2809 MOVL DI, R9 2810 MOVL DI, 12(SP) 2811 LEAQ (DX)(R8*1), DI 2812 SUBL R8, R9 2813 LEAL -1(R9), R8 2814 CMPL R8, $0x3c 2815 JLT one_byte_match_emit_encodeBlockAsm12B 2816 CMPL R8, $0x00000100 2817 JLT two_bytes_match_emit_encodeBlockAsm12B 2818 MOVB $0xf4, (AX) 2819 MOVW R8, 1(AX) 2820 ADDQ $0x03, AX 2821 JMP memmove_long_match_emit_encodeBlockAsm12B 2822 2823two_bytes_match_emit_encodeBlockAsm12B: 2824 MOVB $0xf0, (AX) 2825 MOVB R8, 1(AX) 2826 ADDQ $0x02, AX 2827 CMPL R8, $0x40 2828 JL memmove_match_emit_encodeBlockAsm12B 2829 JMP memmove_long_match_emit_encodeBlockAsm12B 2830 2831one_byte_match_emit_encodeBlockAsm12B: 2832 SHLB $0x02, R8 2833 MOVB R8, (AX) 2834 ADDQ $0x01, AX 2835 2836memmove_match_emit_encodeBlockAsm12B: 2837 LEAQ (AX)(R9*1), R8 2838 2839 // genMemMoveShort 2840 CMPQ R9, $0x03 2841 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2 2842 JE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3 2843 CMPQ R9, $0x08 2844 JB emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7 2845 CMPQ R9, $0x10 2846 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 2847 CMPQ R9, $0x20 2848 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 2849 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 2850 2851emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2: 2852 MOVB (DI), R10 2853 MOVB -1(DI)(R9*1), DI 2854 MOVB R10, (AX) 2855 MOVB DI, -1(AX)(R9*1) 2856 JMP memmove_end_copy_match_emit_encodeBlockAsm12B 2857 2858emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3: 2859 MOVW (DI), R10 2860 MOVB 2(DI), DI 2861 MOVW R10, (AX) 2862 MOVB DI, 2(AX) 2863 JMP memmove_end_copy_match_emit_encodeBlockAsm12B 2864 2865emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7: 2866 MOVL (DI), R10 2867 MOVL -4(DI)(R9*1), DI 2868 MOVL R10, (AX) 2869 MOVL DI, -4(AX)(R9*1) 2870 JMP memmove_end_copy_match_emit_encodeBlockAsm12B 2871 2872emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: 2873 MOVQ (DI), R10 2874 MOVQ -8(DI)(R9*1), DI 2875 MOVQ R10, (AX) 2876 MOVQ DI, -8(AX)(R9*1) 2877 JMP memmove_end_copy_match_emit_encodeBlockAsm12B 2878 2879emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: 2880 MOVOU (DI), X0 2881 MOVOU -16(DI)(R9*1), X1 2882 MOVOU X0, (AX) 2883 MOVOU X1, -16(AX)(R9*1) 2884 JMP memmove_end_copy_match_emit_encodeBlockAsm12B 2885 2886emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: 2887 MOVOU (DI), X0 2888 MOVOU 16(DI), X1 2889 MOVOU -32(DI)(R9*1), X2 2890 MOVOU -16(DI)(R9*1), X3 2891 MOVOU X0, (AX) 2892 MOVOU X1, 16(AX) 2893 MOVOU X2, -32(AX)(R9*1) 2894 MOVOU X3, -16(AX)(R9*1) 2895 2896memmove_end_copy_match_emit_encodeBlockAsm12B: 2897 MOVQ R8, AX 2898 JMP emit_literal_done_match_emit_encodeBlockAsm12B 2899 2900memmove_long_match_emit_encodeBlockAsm12B: 2901 LEAQ (AX)(R9*1), R8 2902 2903 // genMemMoveLong 2904 MOVOU (DI), X0 2905 MOVOU 16(DI), X1 2906 MOVOU -32(DI)(R9*1), X2 2907 MOVOU -16(DI)(R9*1), X3 2908 MOVQ R9, R11 2909 SHRQ $0x05, R11 2910 MOVQ AX, R10 2911 ANDL $0x0000001f, R10 2912 MOVQ $0x00000040, R12 2913 SUBQ R10, R12 2914 DECQ R11 2915 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 2916 LEAQ -32(DI)(R12*1), R10 2917 LEAQ -32(AX)(R12*1), R13 2918 2919emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: 2920 MOVOU (R10), X4 2921 MOVOU 16(R10), X5 2922 MOVOA X4, (R13) 2923 MOVOA X5, 16(R13) 2924 ADDQ $0x20, R13 2925 ADDQ $0x20, R10 2926 ADDQ $0x20, R12 2927 DECQ R11 2928 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back 2929 2930emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: 2931 MOVOU -32(DI)(R12*1), X4 2932 MOVOU -16(DI)(R12*1), X5 2933 MOVOA X4, -32(AX)(R12*1) 2934 MOVOA X5, -16(AX)(R12*1) 2935 ADDQ $0x20, R12 2936 CMPQ R9, R12 2937 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 2938 MOVOU X0, (AX) 2939 MOVOU X1, 16(AX) 2940 MOVOU X2, -32(AX)(R9*1) 2941 MOVOU X3, -16(AX)(R9*1) 2942 MOVQ R8, AX 2943 2944emit_literal_done_match_emit_encodeBlockAsm12B: 2945match_nolit_loop_encodeBlockAsm12B: 2946 MOVL CX, DI 2947 SUBL SI, DI 2948 MOVL DI, 16(SP) 2949 ADDL $0x04, CX 2950 ADDL $0x04, SI 2951 MOVQ src_len+32(FP), DI 2952 SUBL CX, DI 2953 LEAQ (DX)(CX*1), R8 2954 LEAQ (DX)(SI*1), SI 2955 2956 // matchLen 2957 XORL R10, R10 2958 CMPL DI, $0x08 2959 JL matchlen_single_match_nolit_encodeBlockAsm12B 2960 2961matchlen_loopback_match_nolit_encodeBlockAsm12B: 2962 MOVQ (R8)(R10*1), R9 2963 XORQ (SI)(R10*1), R9 2964 TESTQ R9, R9 2965 JZ matchlen_loop_match_nolit_encodeBlockAsm12B 2966 BSFQ R9, R9 2967 SARQ $0x03, R9 2968 LEAL (R10)(R9*1), R10 2969 JMP match_nolit_end_encodeBlockAsm12B 2970 2971matchlen_loop_match_nolit_encodeBlockAsm12B: 2972 LEAL -8(DI), DI 2973 LEAL 8(R10), R10 2974 CMPL DI, $0x08 2975 JGE matchlen_loopback_match_nolit_encodeBlockAsm12B 2976 2977matchlen_single_match_nolit_encodeBlockAsm12B: 2978 TESTL DI, DI 2979 JZ match_nolit_end_encodeBlockAsm12B 2980 2981matchlen_single_loopback_match_nolit_encodeBlockAsm12B: 2982 MOVB (R8)(R10*1), R9 2983 CMPB (SI)(R10*1), R9 2984 JNE match_nolit_end_encodeBlockAsm12B 2985 LEAL 1(R10), R10 2986 DECL DI 2987 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B 2988 2989match_nolit_end_encodeBlockAsm12B: 2990 ADDL R10, CX 2991 MOVL 16(SP), SI 2992 ADDL $0x04, R10 2993 MOVL CX, 12(SP) 2994 2995 // emitCopy 2996two_byte_offset_match_nolit_encodeBlockAsm12B: 2997 CMPL R10, $0x40 2998 JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B 2999 MOVB $0xee, (AX) 3000 MOVW SI, 1(AX) 3001 LEAL -60(R10), R10 3002 ADDQ $0x03, AX 3003 3004 // emitRepeat 3005 MOVL R10, DI 3006 LEAL -4(R10), R10 3007 CMPL DI, $0x08 3008 JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short 3009 CMPL DI, $0x0c 3010 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short 3011 CMPL SI, $0x00000800 3012 JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short 3013 3014cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: 3015 CMPL R10, $0x00000104 3016 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short 3017 LEAL -256(R10), R10 3018 MOVW $0x0019, (AX) 3019 MOVW R10, 2(AX) 3020 ADDQ $0x04, AX 3021 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 3022 3023repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: 3024 LEAL -4(R10), R10 3025 MOVW $0x0015, (AX) 3026 MOVB R10, 2(AX) 3027 ADDQ $0x03, AX 3028 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 3029 3030repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: 3031 SHLL $0x02, R10 3032 ORL $0x01, R10 3033 MOVW R10, (AX) 3034 ADDQ $0x02, AX 3035 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 3036 3037repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: 3038 XORQ DI, DI 3039 LEAL 1(DI)(R10*4), R10 3040 MOVB SI, 1(AX) 3041 SARL $0x08, SI 3042 SHLL $0x05, SI 3043 ORL SI, R10 3044 MOVB R10, (AX) 3045 ADDQ $0x02, AX 3046 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 3047 JMP two_byte_offset_match_nolit_encodeBlockAsm12B 3048 3049two_byte_offset_short_match_nolit_encodeBlockAsm12B: 3050 CMPL R10, $0x0c 3051 JGE emit_copy_three_match_nolit_encodeBlockAsm12B 3052 CMPL SI, $0x00000800 3053 JGE emit_copy_three_match_nolit_encodeBlockAsm12B 3054 MOVB $0x01, BL 3055 LEAL -16(BX)(R10*4), R10 3056 MOVB SI, 1(AX) 3057 SHRL $0x08, SI 3058 SHLL $0x05, SI 3059 ORL SI, R10 3060 MOVB R10, (AX) 3061 ADDQ $0x02, AX 3062 JMP match_nolit_emitcopy_end_encodeBlockAsm12B 3063 3064emit_copy_three_match_nolit_encodeBlockAsm12B: 3065 MOVB $0x02, BL 3066 LEAL -4(BX)(R10*4), R10 3067 MOVB R10, (AX) 3068 MOVW SI, 1(AX) 3069 ADDQ $0x03, AX 3070 3071match_nolit_emitcopy_end_encodeBlockAsm12B: 3072 CMPL CX, 8(SP) 3073 JGE emit_remainder_encodeBlockAsm12B 3074 MOVQ -2(DX)(CX*1), DI 3075 CMPQ AX, (SP) 3076 JL match_nolit_dst_ok_encodeBlockAsm12B 3077 MOVQ $0x00000000, ret+48(FP) 3078 RET 3079 3080match_nolit_dst_ok_encodeBlockAsm12B: 3081 MOVQ $0x000000cf1bbcdcbb, R9 3082 MOVQ DI, R8 3083 SHRQ $0x10, DI 3084 MOVQ DI, SI 3085 SHLQ $0x18, R8 3086 IMULQ R9, R8 3087 SHRQ $0x34, R8 3088 SHLQ $0x18, SI 3089 IMULQ R9, SI 3090 SHRQ $0x34, SI 3091 LEAL -2(CX), R9 3092 LEAQ 24(SP)(SI*4), R10 3093 MOVL (R10), SI 3094 MOVL R9, 24(SP)(R8*4) 3095 MOVL CX, (R10) 3096 CMPL (DX)(SI*1), DI 3097 JEQ match_nolit_loop_encodeBlockAsm12B 3098 INCL CX 3099 JMP search_loop_encodeBlockAsm12B 3100 3101emit_remainder_encodeBlockAsm12B: 3102 MOVQ src_len+32(FP), CX 3103 SUBL 12(SP), CX 3104 LEAQ 3(AX)(CX*1), CX 3105 CMPQ CX, (SP) 3106 JL emit_remainder_ok_encodeBlockAsm12B 3107 MOVQ $0x00000000, ret+48(FP) 3108 RET 3109 3110emit_remainder_ok_encodeBlockAsm12B: 3111 MOVQ src_len+32(FP), CX 3112 MOVL 12(SP), BX 3113 CMPL BX, CX 3114 JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B 3115 MOVL CX, SI 3116 MOVL CX, 12(SP) 3117 LEAQ (DX)(BX*1), CX 3118 SUBL BX, SI 3119 LEAL -1(SI), DX 3120 CMPL DX, $0x3c 3121 JLT one_byte_emit_remainder_encodeBlockAsm12B 3122 CMPL DX, $0x00000100 3123 JLT two_bytes_emit_remainder_encodeBlockAsm12B 3124 MOVB $0xf4, (AX) 3125 MOVW DX, 1(AX) 3126 ADDQ $0x03, AX 3127 JMP memmove_long_emit_remainder_encodeBlockAsm12B 3128 3129two_bytes_emit_remainder_encodeBlockAsm12B: 3130 MOVB $0xf0, (AX) 3131 MOVB DL, 1(AX) 3132 ADDQ $0x02, AX 3133 CMPL DX, $0x40 3134 JL memmove_emit_remainder_encodeBlockAsm12B 3135 JMP memmove_long_emit_remainder_encodeBlockAsm12B 3136 3137one_byte_emit_remainder_encodeBlockAsm12B: 3138 SHLB $0x02, DL 3139 MOVB DL, (AX) 3140 ADDQ $0x01, AX 3141 3142memmove_emit_remainder_encodeBlockAsm12B: 3143 LEAQ (AX)(SI*1), DX 3144 MOVL SI, BX 3145 3146 // genMemMoveShort 3147 CMPQ BX, $0x03 3148 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2 3149 JE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3 3150 CMPQ BX, $0x08 3151 JB emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7 3152 CMPQ BX, $0x10 3153 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16 3154 CMPQ BX, $0x20 3155 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32 3156 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 3157 3158emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: 3159 MOVB (CX), SI 3160 MOVB -1(CX)(BX*1), CL 3161 MOVB SI, (AX) 3162 MOVB CL, -1(AX)(BX*1) 3163 JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B 3164 3165emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: 3166 MOVW (CX), SI 3167 MOVB 2(CX), CL 3168 MOVW SI, (AX) 3169 MOVB CL, 2(AX) 3170 JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B 3171 3172emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7: 3173 MOVL (CX), SI 3174 MOVL -4(CX)(BX*1), CX 3175 MOVL SI, (AX) 3176 MOVL CX, -4(AX)(BX*1) 3177 JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B 3178 3179emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: 3180 MOVQ (CX), SI 3181 MOVQ -8(CX)(BX*1), CX 3182 MOVQ SI, (AX) 3183 MOVQ CX, -8(AX)(BX*1) 3184 JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B 3185 3186emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: 3187 MOVOU (CX), X0 3188 MOVOU -16(CX)(BX*1), X1 3189 MOVOU X0, (AX) 3190 MOVOU X1, -16(AX)(BX*1) 3191 JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B 3192 3193emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: 3194 MOVOU (CX), X0 3195 MOVOU 16(CX), X1 3196 MOVOU -32(CX)(BX*1), X2 3197 MOVOU -16(CX)(BX*1), X3 3198 MOVOU X0, (AX) 3199 MOVOU X1, 16(AX) 3200 MOVOU X2, -32(AX)(BX*1) 3201 MOVOU X3, -16(AX)(BX*1) 3202 3203memmove_end_copy_emit_remainder_encodeBlockAsm12B: 3204 MOVQ DX, AX 3205 JMP emit_literal_done_emit_remainder_encodeBlockAsm12B 3206 3207memmove_long_emit_remainder_encodeBlockAsm12B: 3208 LEAQ (AX)(SI*1), DX 3209 MOVL SI, BX 3210 3211 // genMemMoveLong 3212 MOVOU (CX), X0 3213 MOVOU 16(CX), X1 3214 MOVOU -32(CX)(BX*1), X2 3215 MOVOU -16(CX)(BX*1), X3 3216 MOVQ BX, DI 3217 SHRQ $0x05, DI 3218 MOVQ AX, SI 3219 ANDL $0x0000001f, SI 3220 MOVQ $0x00000040, R8 3221 SUBQ SI, R8 3222 DECQ DI 3223 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 3224 LEAQ -32(CX)(R8*1), SI 3225 LEAQ -32(AX)(R8*1), R9 3226 3227emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: 3228 MOVOU (SI), X4 3229 MOVOU 16(SI), X5 3230 MOVOA X4, (R9) 3231 MOVOA X5, 16(R9) 3232 ADDQ $0x20, R9 3233 ADDQ $0x20, SI 3234 ADDQ $0x20, R8 3235 DECQ DI 3236 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back 3237 3238emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: 3239 MOVOU -32(CX)(R8*1), X4 3240 MOVOU -16(CX)(R8*1), X5 3241 MOVOA X4, -32(AX)(R8*1) 3242 MOVOA X5, -16(AX)(R8*1) 3243 ADDQ $0x20, R8 3244 CMPQ BX, R8 3245 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 3246 MOVOU X0, (AX) 3247 MOVOU X1, 16(AX) 3248 MOVOU X2, -32(AX)(BX*1) 3249 MOVOU X3, -16(AX)(BX*1) 3250 MOVQ DX, AX 3251 3252emit_literal_done_emit_remainder_encodeBlockAsm12B: 3253 MOVQ dst_base+0(FP), CX 3254 SUBQ CX, AX 3255 MOVQ AX, ret+48(FP) 3256 RET 3257 3258// func encodeBlockAsm10B(dst []byte, src []byte) int 3259// Requires: SSE2 3260TEXT ·encodeBlockAsm10B(SB), $4120-56 3261 MOVQ dst_base+0(FP), AX 3262 MOVQ $0x00000020, CX 3263 LEAQ 24(SP), DX 3264 PXOR X0, X0 3265 3266zero_loop_encodeBlockAsm10B: 3267 MOVOU X0, (DX) 3268 MOVOU X0, 16(DX) 3269 MOVOU X0, 32(DX) 3270 MOVOU X0, 48(DX) 3271 MOVOU X0, 64(DX) 3272 MOVOU X0, 80(DX) 3273 MOVOU X0, 96(DX) 3274 MOVOU X0, 112(DX) 3275 ADDQ $0x80, DX 3276 DECQ CX 3277 JNZ zero_loop_encodeBlockAsm10B 3278 MOVL $0x00000000, 12(SP) 3279 MOVQ src_len+32(FP), CX 3280 LEAQ -5(CX), DX 3281 LEAQ -8(CX), SI 3282 MOVL SI, 8(SP) 3283 SHRQ $0x05, CX 3284 SUBL CX, DX 3285 LEAQ (AX)(DX*1), DX 3286 MOVQ DX, (SP) 3287 MOVL $0x00000001, CX 3288 MOVL CX, 16(SP) 3289 MOVQ src_base+24(FP), DX 3290 3291search_loop_encodeBlockAsm10B: 3292 MOVL CX, SI 3293 SUBL 12(SP), SI 3294 SHRL $0x05, SI 3295 LEAL 4(CX)(SI*1), SI 3296 CMPL SI, 8(SP) 3297 JGE emit_remainder_encodeBlockAsm10B 3298 MOVQ (DX)(CX*1), DI 3299 MOVL SI, 20(SP) 3300 MOVQ $0x9e3779b1, R9 3301 MOVQ DI, R10 3302 MOVQ DI, R11 3303 SHRQ $0x08, R11 3304 SHLQ $0x20, R10 3305 IMULQ R9, R10 3306 SHRQ $0x36, R10 3307 SHLQ $0x20, R11 3308 IMULQ R9, R11 3309 SHRQ $0x36, R11 3310 MOVL 24(SP)(R10*4), SI 3311 MOVL 24(SP)(R11*4), R8 3312 MOVL CX, 24(SP)(R10*4) 3313 LEAL 1(CX), R10 3314 MOVL R10, 24(SP)(R11*4) 3315 MOVQ DI, R10 3316 SHRQ $0x10, R10 3317 SHLQ $0x20, R10 3318 IMULQ R9, R10 3319 SHRQ $0x36, R10 3320 MOVL CX, R9 3321 SUBL 16(SP), R9 3322 MOVL 1(DX)(R9*1), R11 3323 MOVQ DI, R9 3324 SHRQ $0x08, R9 3325 CMPL R9, R11 3326 JNE no_repeat_found_encodeBlockAsm10B 3327 LEAL 1(CX), DI 3328 MOVL 12(SP), R8 3329 MOVL DI, SI 3330 SUBL 16(SP), SI 3331 JZ repeat_extend_back_end_encodeBlockAsm10B 3332 3333repeat_extend_back_loop_encodeBlockAsm10B: 3334 CMPL DI, R8 3335 JLE repeat_extend_back_end_encodeBlockAsm10B 3336 MOVB -1(DX)(SI*1), BL 3337 MOVB -1(DX)(DI*1), R9 3338 CMPB BL, R9 3339 JNE repeat_extend_back_end_encodeBlockAsm10B 3340 LEAL -1(DI), DI 3341 DECL SI 3342 JNZ repeat_extend_back_loop_encodeBlockAsm10B 3343 3344repeat_extend_back_end_encodeBlockAsm10B: 3345 MOVL 12(SP), SI 3346 CMPL SI, DI 3347 JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B 3348 MOVL DI, R9 3349 MOVL DI, 12(SP) 3350 LEAQ (DX)(SI*1), R10 3351 SUBL SI, R9 3352 LEAL -1(R9), SI 3353 CMPL SI, $0x3c 3354 JLT one_byte_repeat_emit_encodeBlockAsm10B 3355 CMPL SI, $0x00000100 3356 JLT two_bytes_repeat_emit_encodeBlockAsm10B 3357 MOVB $0xf4, (AX) 3358 MOVW SI, 1(AX) 3359 ADDQ $0x03, AX 3360 JMP memmove_long_repeat_emit_encodeBlockAsm10B 3361 3362two_bytes_repeat_emit_encodeBlockAsm10B: 3363 MOVB $0xf0, (AX) 3364 MOVB SI, 1(AX) 3365 ADDQ $0x02, AX 3366 CMPL SI, $0x40 3367 JL memmove_repeat_emit_encodeBlockAsm10B 3368 JMP memmove_long_repeat_emit_encodeBlockAsm10B 3369 3370one_byte_repeat_emit_encodeBlockAsm10B: 3371 SHLB $0x02, SI 3372 MOVB SI, (AX) 3373 ADDQ $0x01, AX 3374 3375memmove_repeat_emit_encodeBlockAsm10B: 3376 LEAQ (AX)(R9*1), SI 3377 3378 // genMemMoveShort 3379 CMPQ R9, $0x03 3380 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2 3381 JE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3 3382 CMPQ R9, $0x08 3383 JB emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7 3384 CMPQ R9, $0x10 3385 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 3386 CMPQ R9, $0x20 3387 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 3388 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 3389 3390emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2: 3391 MOVB (R10), R11 3392 MOVB -1(R10)(R9*1), R10 3393 MOVB R11, (AX) 3394 MOVB R10, -1(AX)(R9*1) 3395 JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B 3396 3397emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3: 3398 MOVW (R10), R11 3399 MOVB 2(R10), R10 3400 MOVW R11, (AX) 3401 MOVB R10, 2(AX) 3402 JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B 3403 3404emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7: 3405 MOVL (R10), R11 3406 MOVL -4(R10)(R9*1), R10 3407 MOVL R11, (AX) 3408 MOVL R10, -4(AX)(R9*1) 3409 JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B 3410 3411emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: 3412 MOVQ (R10), R11 3413 MOVQ -8(R10)(R9*1), R10 3414 MOVQ R11, (AX) 3415 MOVQ R10, -8(AX)(R9*1) 3416 JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B 3417 3418emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: 3419 MOVOU (R10), X0 3420 MOVOU -16(R10)(R9*1), X1 3421 MOVOU X0, (AX) 3422 MOVOU X1, -16(AX)(R9*1) 3423 JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B 3424 3425emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: 3426 MOVOU (R10), X0 3427 MOVOU 16(R10), X1 3428 MOVOU -32(R10)(R9*1), X2 3429 MOVOU -16(R10)(R9*1), X3 3430 MOVOU X0, (AX) 3431 MOVOU X1, 16(AX) 3432 MOVOU X2, -32(AX)(R9*1) 3433 MOVOU X3, -16(AX)(R9*1) 3434 3435memmove_end_copy_repeat_emit_encodeBlockAsm10B: 3436 MOVQ SI, AX 3437 JMP emit_literal_done_repeat_emit_encodeBlockAsm10B 3438 3439memmove_long_repeat_emit_encodeBlockAsm10B: 3440 LEAQ (AX)(R9*1), SI 3441 3442 // genMemMoveLong 3443 MOVOU (R10), X0 3444 MOVOU 16(R10), X1 3445 MOVOU -32(R10)(R9*1), X2 3446 MOVOU -16(R10)(R9*1), X3 3447 MOVQ R9, R12 3448 SHRQ $0x05, R12 3449 MOVQ AX, R11 3450 ANDL $0x0000001f, R11 3451 MOVQ $0x00000040, R13 3452 SUBQ R11, R13 3453 DECQ R12 3454 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 3455 LEAQ -32(R10)(R13*1), R11 3456 LEAQ -32(AX)(R13*1), R14 3457 3458emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: 3459 MOVOU (R11), X4 3460 MOVOU 16(R11), X5 3461 MOVOA X4, (R14) 3462 MOVOA X5, 16(R14) 3463 ADDQ $0x20, R14 3464 ADDQ $0x20, R11 3465 ADDQ $0x20, R13 3466 DECQ R12 3467 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back 3468 3469emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: 3470 MOVOU -32(R10)(R13*1), X4 3471 MOVOU -16(R10)(R13*1), X5 3472 MOVOA X4, -32(AX)(R13*1) 3473 MOVOA X5, -16(AX)(R13*1) 3474 ADDQ $0x20, R13 3475 CMPQ R9, R13 3476 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 3477 MOVOU X0, (AX) 3478 MOVOU X1, 16(AX) 3479 MOVOU X2, -32(AX)(R9*1) 3480 MOVOU X3, -16(AX)(R9*1) 3481 MOVQ SI, AX 3482 3483emit_literal_done_repeat_emit_encodeBlockAsm10B: 3484 ADDL $0x05, CX 3485 MOVL CX, SI 3486 SUBL 16(SP), SI 3487 MOVQ src_len+32(FP), R9 3488 SUBL CX, R9 3489 LEAQ (DX)(CX*1), R10 3490 LEAQ (DX)(SI*1), SI 3491 3492 // matchLen 3493 XORL R12, R12 3494 CMPL R9, $0x08 3495 JL matchlen_single_repeat_extend_encodeBlockAsm10B 3496 3497matchlen_loopback_repeat_extend_encodeBlockAsm10B: 3498 MOVQ (R10)(R12*1), R11 3499 XORQ (SI)(R12*1), R11 3500 TESTQ R11, R11 3501 JZ matchlen_loop_repeat_extend_encodeBlockAsm10B 3502 BSFQ R11, R11 3503 SARQ $0x03, R11 3504 LEAL (R12)(R11*1), R12 3505 JMP repeat_extend_forward_end_encodeBlockAsm10B 3506 3507matchlen_loop_repeat_extend_encodeBlockAsm10B: 3508 LEAL -8(R9), R9 3509 LEAL 8(R12), R12 3510 CMPL R9, $0x08 3511 JGE matchlen_loopback_repeat_extend_encodeBlockAsm10B 3512 3513matchlen_single_repeat_extend_encodeBlockAsm10B: 3514 TESTL R9, R9 3515 JZ repeat_extend_forward_end_encodeBlockAsm10B 3516 3517matchlen_single_loopback_repeat_extend_encodeBlockAsm10B: 3518 MOVB (R10)(R12*1), R11 3519 CMPB (SI)(R12*1), R11 3520 JNE repeat_extend_forward_end_encodeBlockAsm10B 3521 LEAL 1(R12), R12 3522 DECL R9 3523 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm10B 3524 3525repeat_extend_forward_end_encodeBlockAsm10B: 3526 ADDL R12, CX 3527 MOVL CX, SI 3528 SUBL DI, SI 3529 MOVL 16(SP), DI 3530 TESTL R8, R8 3531 JZ repeat_as_copy_encodeBlockAsm10B 3532 3533 // emitRepeat 3534 MOVL SI, R8 3535 LEAL -4(SI), SI 3536 CMPL R8, $0x08 3537 JLE repeat_two_match_repeat_encodeBlockAsm10B 3538 CMPL R8, $0x0c 3539 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B 3540 CMPL DI, $0x00000800 3541 JLT repeat_two_offset_match_repeat_encodeBlockAsm10B 3542 3543cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: 3544 CMPL SI, $0x00000104 3545 JLT repeat_three_match_repeat_encodeBlockAsm10B 3546 LEAL -256(SI), SI 3547 MOVW $0x0019, (AX) 3548 MOVW SI, 2(AX) 3549 ADDQ $0x04, AX 3550 JMP repeat_end_emit_encodeBlockAsm10B 3551 3552repeat_three_match_repeat_encodeBlockAsm10B: 3553 LEAL -4(SI), SI 3554 MOVW $0x0015, (AX) 3555 MOVB SI, 2(AX) 3556 ADDQ $0x03, AX 3557 JMP repeat_end_emit_encodeBlockAsm10B 3558 3559repeat_two_match_repeat_encodeBlockAsm10B: 3560 SHLL $0x02, SI 3561 ORL $0x01, SI 3562 MOVW SI, (AX) 3563 ADDQ $0x02, AX 3564 JMP repeat_end_emit_encodeBlockAsm10B 3565 3566repeat_two_offset_match_repeat_encodeBlockAsm10B: 3567 XORQ R8, R8 3568 LEAL 1(R8)(SI*4), SI 3569 MOVB DI, 1(AX) 3570 SARL $0x08, DI 3571 SHLL $0x05, DI 3572 ORL DI, SI 3573 MOVB SI, (AX) 3574 ADDQ $0x02, AX 3575 JMP repeat_end_emit_encodeBlockAsm10B 3576 3577repeat_as_copy_encodeBlockAsm10B: 3578 // emitCopy 3579two_byte_offset_repeat_as_copy_encodeBlockAsm10B: 3580 CMPL SI, $0x40 3581 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B 3582 MOVB $0xee, (AX) 3583 MOVW DI, 1(AX) 3584 LEAL -60(SI), SI 3585 ADDQ $0x03, AX 3586 3587 // emitRepeat 3588 MOVL SI, R8 3589 LEAL -4(SI), SI 3590 CMPL R8, $0x08 3591 JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short 3592 CMPL R8, $0x0c 3593 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short 3594 CMPL DI, $0x00000800 3595 JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short 3596 3597cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: 3598 CMPL SI, $0x00000104 3599 JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short 3600 LEAL -256(SI), SI 3601 MOVW $0x0019, (AX) 3602 MOVW SI, 2(AX) 3603 ADDQ $0x04, AX 3604 JMP repeat_end_emit_encodeBlockAsm10B 3605 3606repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: 3607 LEAL -4(SI), SI 3608 MOVW $0x0015, (AX) 3609 MOVB SI, 2(AX) 3610 ADDQ $0x03, AX 3611 JMP repeat_end_emit_encodeBlockAsm10B 3612 3613repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: 3614 SHLL $0x02, SI 3615 ORL $0x01, SI 3616 MOVW SI, (AX) 3617 ADDQ $0x02, AX 3618 JMP repeat_end_emit_encodeBlockAsm10B 3619 3620repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: 3621 XORQ R8, R8 3622 LEAL 1(R8)(SI*4), SI 3623 MOVB DI, 1(AX) 3624 SARL $0x08, DI 3625 SHLL $0x05, DI 3626 ORL DI, SI 3627 MOVB SI, (AX) 3628 ADDQ $0x02, AX 3629 JMP repeat_end_emit_encodeBlockAsm10B 3630 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B 3631 3632two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: 3633 CMPL SI, $0x0c 3634 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B 3635 CMPL DI, $0x00000800 3636 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B 3637 MOVB $0x01, BL 3638 LEAL -16(BX)(SI*4), SI 3639 MOVB DI, 1(AX) 3640 SHRL $0x08, DI 3641 SHLL $0x05, DI 3642 ORL DI, SI 3643 MOVB SI, (AX) 3644 ADDQ $0x02, AX 3645 JMP repeat_end_emit_encodeBlockAsm10B 3646 3647emit_copy_three_repeat_as_copy_encodeBlockAsm10B: 3648 MOVB $0x02, BL 3649 LEAL -4(BX)(SI*4), SI 3650 MOVB SI, (AX) 3651 MOVW DI, 1(AX) 3652 ADDQ $0x03, AX 3653 3654repeat_end_emit_encodeBlockAsm10B: 3655 MOVL CX, 12(SP) 3656 JMP search_loop_encodeBlockAsm10B 3657 3658no_repeat_found_encodeBlockAsm10B: 3659 CMPL (DX)(SI*1), DI 3660 JEQ candidate_match_encodeBlockAsm10B 3661 SHRQ $0x08, DI 3662 MOVL 24(SP)(R10*4), SI 3663 LEAL 2(CX), R9 3664 CMPL (DX)(R8*1), DI 3665 JEQ candidate2_match_encodeBlockAsm10B 3666 MOVL R9, 24(SP)(R10*4) 3667 SHRQ $0x08, DI 3668 CMPL (DX)(SI*1), DI 3669 JEQ candidate3_match_encodeBlockAsm10B 3670 MOVL 20(SP), CX 3671 JMP search_loop_encodeBlockAsm10B 3672 3673candidate3_match_encodeBlockAsm10B: 3674 ADDL $0x02, CX 3675 JMP candidate_match_encodeBlockAsm10B 3676 3677candidate2_match_encodeBlockAsm10B: 3678 MOVL R9, 24(SP)(R10*4) 3679 INCL CX 3680 MOVL R8, SI 3681 3682candidate_match_encodeBlockAsm10B: 3683 MOVL 12(SP), DI 3684 TESTL SI, SI 3685 JZ match_extend_back_end_encodeBlockAsm10B 3686 3687match_extend_back_loop_encodeBlockAsm10B: 3688 CMPL CX, DI 3689 JLE match_extend_back_end_encodeBlockAsm10B 3690 MOVB -1(DX)(SI*1), BL 3691 MOVB -1(DX)(CX*1), R8 3692 CMPB BL, R8 3693 JNE match_extend_back_end_encodeBlockAsm10B 3694 LEAL -1(CX), CX 3695 DECL SI 3696 JZ match_extend_back_end_encodeBlockAsm10B 3697 JMP match_extend_back_loop_encodeBlockAsm10B 3698 3699match_extend_back_end_encodeBlockAsm10B: 3700 MOVL CX, DI 3701 SUBL 12(SP), DI 3702 LEAQ 3(AX)(DI*1), DI 3703 CMPQ DI, (SP) 3704 JL match_dst_size_check_encodeBlockAsm10B 3705 MOVQ $0x00000000, ret+48(FP) 3706 RET 3707 3708match_dst_size_check_encodeBlockAsm10B: 3709 MOVL CX, DI 3710 MOVL 12(SP), R8 3711 CMPL R8, DI 3712 JEQ emit_literal_done_match_emit_encodeBlockAsm10B 3713 MOVL DI, R9 3714 MOVL DI, 12(SP) 3715 LEAQ (DX)(R8*1), DI 3716 SUBL R8, R9 3717 LEAL -1(R9), R8 3718 CMPL R8, $0x3c 3719 JLT one_byte_match_emit_encodeBlockAsm10B 3720 CMPL R8, $0x00000100 3721 JLT two_bytes_match_emit_encodeBlockAsm10B 3722 MOVB $0xf4, (AX) 3723 MOVW R8, 1(AX) 3724 ADDQ $0x03, AX 3725 JMP memmove_long_match_emit_encodeBlockAsm10B 3726 3727two_bytes_match_emit_encodeBlockAsm10B: 3728 MOVB $0xf0, (AX) 3729 MOVB R8, 1(AX) 3730 ADDQ $0x02, AX 3731 CMPL R8, $0x40 3732 JL memmove_match_emit_encodeBlockAsm10B 3733 JMP memmove_long_match_emit_encodeBlockAsm10B 3734 3735one_byte_match_emit_encodeBlockAsm10B: 3736 SHLB $0x02, R8 3737 MOVB R8, (AX) 3738 ADDQ $0x01, AX 3739 3740memmove_match_emit_encodeBlockAsm10B: 3741 LEAQ (AX)(R9*1), R8 3742 3743 // genMemMoveShort 3744 CMPQ R9, $0x03 3745 JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2 3746 JE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3 3747 CMPQ R9, $0x08 3748 JB emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7 3749 CMPQ R9, $0x10 3750 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 3751 CMPQ R9, $0x20 3752 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 3753 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 3754 3755emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2: 3756 MOVB (DI), R10 3757 MOVB -1(DI)(R9*1), DI 3758 MOVB R10, (AX) 3759 MOVB DI, -1(AX)(R9*1) 3760 JMP memmove_end_copy_match_emit_encodeBlockAsm10B 3761 3762emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3: 3763 MOVW (DI), R10 3764 MOVB 2(DI), DI 3765 MOVW R10, (AX) 3766 MOVB DI, 2(AX) 3767 JMP memmove_end_copy_match_emit_encodeBlockAsm10B 3768 3769emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7: 3770 MOVL (DI), R10 3771 MOVL -4(DI)(R9*1), DI 3772 MOVL R10, (AX) 3773 MOVL DI, -4(AX)(R9*1) 3774 JMP memmove_end_copy_match_emit_encodeBlockAsm10B 3775 3776emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: 3777 MOVQ (DI), R10 3778 MOVQ -8(DI)(R9*1), DI 3779 MOVQ R10, (AX) 3780 MOVQ DI, -8(AX)(R9*1) 3781 JMP memmove_end_copy_match_emit_encodeBlockAsm10B 3782 3783emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: 3784 MOVOU (DI), X0 3785 MOVOU -16(DI)(R9*1), X1 3786 MOVOU X0, (AX) 3787 MOVOU X1, -16(AX)(R9*1) 3788 JMP memmove_end_copy_match_emit_encodeBlockAsm10B 3789 3790emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: 3791 MOVOU (DI), X0 3792 MOVOU 16(DI), X1 3793 MOVOU -32(DI)(R9*1), X2 3794 MOVOU -16(DI)(R9*1), X3 3795 MOVOU X0, (AX) 3796 MOVOU X1, 16(AX) 3797 MOVOU X2, -32(AX)(R9*1) 3798 MOVOU X3, -16(AX)(R9*1) 3799 3800memmove_end_copy_match_emit_encodeBlockAsm10B: 3801 MOVQ R8, AX 3802 JMP emit_literal_done_match_emit_encodeBlockAsm10B 3803 3804memmove_long_match_emit_encodeBlockAsm10B: 3805 LEAQ (AX)(R9*1), R8 3806 3807 // genMemMoveLong 3808 MOVOU (DI), X0 3809 MOVOU 16(DI), X1 3810 MOVOU -32(DI)(R9*1), X2 3811 MOVOU -16(DI)(R9*1), X3 3812 MOVQ R9, R11 3813 SHRQ $0x05, R11 3814 MOVQ AX, R10 3815 ANDL $0x0000001f, R10 3816 MOVQ $0x00000040, R12 3817 SUBQ R10, R12 3818 DECQ R11 3819 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 3820 LEAQ -32(DI)(R12*1), R10 3821 LEAQ -32(AX)(R12*1), R13 3822 3823emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: 3824 MOVOU (R10), X4 3825 MOVOU 16(R10), X5 3826 MOVOA X4, (R13) 3827 MOVOA X5, 16(R13) 3828 ADDQ $0x20, R13 3829 ADDQ $0x20, R10 3830 ADDQ $0x20, R12 3831 DECQ R11 3832 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back 3833 3834emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: 3835 MOVOU -32(DI)(R12*1), X4 3836 MOVOU -16(DI)(R12*1), X5 3837 MOVOA X4, -32(AX)(R12*1) 3838 MOVOA X5, -16(AX)(R12*1) 3839 ADDQ $0x20, R12 3840 CMPQ R9, R12 3841 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 3842 MOVOU X0, (AX) 3843 MOVOU X1, 16(AX) 3844 MOVOU X2, -32(AX)(R9*1) 3845 MOVOU X3, -16(AX)(R9*1) 3846 MOVQ R8, AX 3847 3848emit_literal_done_match_emit_encodeBlockAsm10B: 3849match_nolit_loop_encodeBlockAsm10B: 3850 MOVL CX, DI 3851 SUBL SI, DI 3852 MOVL DI, 16(SP) 3853 ADDL $0x04, CX 3854 ADDL $0x04, SI 3855 MOVQ src_len+32(FP), DI 3856 SUBL CX, DI 3857 LEAQ (DX)(CX*1), R8 3858 LEAQ (DX)(SI*1), SI 3859 3860 // matchLen 3861 XORL R10, R10 3862 CMPL DI, $0x08 3863 JL matchlen_single_match_nolit_encodeBlockAsm10B 3864 3865matchlen_loopback_match_nolit_encodeBlockAsm10B: 3866 MOVQ (R8)(R10*1), R9 3867 XORQ (SI)(R10*1), R9 3868 TESTQ R9, R9 3869 JZ matchlen_loop_match_nolit_encodeBlockAsm10B 3870 BSFQ R9, R9 3871 SARQ $0x03, R9 3872 LEAL (R10)(R9*1), R10 3873 JMP match_nolit_end_encodeBlockAsm10B 3874 3875matchlen_loop_match_nolit_encodeBlockAsm10B: 3876 LEAL -8(DI), DI 3877 LEAL 8(R10), R10 3878 CMPL DI, $0x08 3879 JGE matchlen_loopback_match_nolit_encodeBlockAsm10B 3880 3881matchlen_single_match_nolit_encodeBlockAsm10B: 3882 TESTL DI, DI 3883 JZ match_nolit_end_encodeBlockAsm10B 3884 3885matchlen_single_loopback_match_nolit_encodeBlockAsm10B: 3886 MOVB (R8)(R10*1), R9 3887 CMPB (SI)(R10*1), R9 3888 JNE match_nolit_end_encodeBlockAsm10B 3889 LEAL 1(R10), R10 3890 DECL DI 3891 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10B 3892 3893match_nolit_end_encodeBlockAsm10B: 3894 ADDL R10, CX 3895 MOVL 16(SP), SI 3896 ADDL $0x04, R10 3897 MOVL CX, 12(SP) 3898 3899 // emitCopy 3900two_byte_offset_match_nolit_encodeBlockAsm10B: 3901 CMPL R10, $0x40 3902 JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B 3903 MOVB $0xee, (AX) 3904 MOVW SI, 1(AX) 3905 LEAL -60(R10), R10 3906 ADDQ $0x03, AX 3907 3908 // emitRepeat 3909 MOVL R10, DI 3910 LEAL -4(R10), R10 3911 CMPL DI, $0x08 3912 JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short 3913 CMPL DI, $0x0c 3914 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short 3915 CMPL SI, $0x00000800 3916 JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short 3917 3918cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: 3919 CMPL R10, $0x00000104 3920 JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short 3921 LEAL -256(R10), R10 3922 MOVW $0x0019, (AX) 3923 MOVW R10, 2(AX) 3924 ADDQ $0x04, AX 3925 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3926 3927repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: 3928 LEAL -4(R10), R10 3929 MOVW $0x0015, (AX) 3930 MOVB R10, 2(AX) 3931 ADDQ $0x03, AX 3932 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3933 3934repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: 3935 SHLL $0x02, R10 3936 ORL $0x01, R10 3937 MOVW R10, (AX) 3938 ADDQ $0x02, AX 3939 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3940 3941repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: 3942 XORQ DI, DI 3943 LEAL 1(DI)(R10*4), R10 3944 MOVB SI, 1(AX) 3945 SARL $0x08, SI 3946 SHLL $0x05, SI 3947 ORL SI, R10 3948 MOVB R10, (AX) 3949 ADDQ $0x02, AX 3950 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3951 JMP two_byte_offset_match_nolit_encodeBlockAsm10B 3952 3953two_byte_offset_short_match_nolit_encodeBlockAsm10B: 3954 CMPL R10, $0x0c 3955 JGE emit_copy_three_match_nolit_encodeBlockAsm10B 3956 CMPL SI, $0x00000800 3957 JGE emit_copy_three_match_nolit_encodeBlockAsm10B 3958 MOVB $0x01, BL 3959 LEAL -16(BX)(R10*4), R10 3960 MOVB SI, 1(AX) 3961 SHRL $0x08, SI 3962 SHLL $0x05, SI 3963 ORL SI, R10 3964 MOVB R10, (AX) 3965 ADDQ $0x02, AX 3966 JMP match_nolit_emitcopy_end_encodeBlockAsm10B 3967 3968emit_copy_three_match_nolit_encodeBlockAsm10B: 3969 MOVB $0x02, BL 3970 LEAL -4(BX)(R10*4), R10 3971 MOVB R10, (AX) 3972 MOVW SI, 1(AX) 3973 ADDQ $0x03, AX 3974 3975match_nolit_emitcopy_end_encodeBlockAsm10B: 3976 CMPL CX, 8(SP) 3977 JGE emit_remainder_encodeBlockAsm10B 3978 MOVQ -2(DX)(CX*1), DI 3979 CMPQ AX, (SP) 3980 JL match_nolit_dst_ok_encodeBlockAsm10B 3981 MOVQ $0x00000000, ret+48(FP) 3982 RET 3983 3984match_nolit_dst_ok_encodeBlockAsm10B: 3985 MOVQ $0x9e3779b1, R9 3986 MOVQ DI, R8 3987 SHRQ $0x10, DI 3988 MOVQ DI, SI 3989 SHLQ $0x20, R8 3990 IMULQ R9, R8 3991 SHRQ $0x36, R8 3992 SHLQ $0x20, SI 3993 IMULQ R9, SI 3994 SHRQ $0x36, SI 3995 LEAL -2(CX), R9 3996 LEAQ 24(SP)(SI*4), R10 3997 MOVL (R10), SI 3998 MOVL R9, 24(SP)(R8*4) 3999 MOVL CX, (R10) 4000 CMPL (DX)(SI*1), DI 4001 JEQ match_nolit_loop_encodeBlockAsm10B 4002 INCL CX 4003 JMP search_loop_encodeBlockAsm10B 4004 4005emit_remainder_encodeBlockAsm10B: 4006 MOVQ src_len+32(FP), CX 4007 SUBL 12(SP), CX 4008 LEAQ 3(AX)(CX*1), CX 4009 CMPQ CX, (SP) 4010 JL emit_remainder_ok_encodeBlockAsm10B 4011 MOVQ $0x00000000, ret+48(FP) 4012 RET 4013 4014emit_remainder_ok_encodeBlockAsm10B: 4015 MOVQ src_len+32(FP), CX 4016 MOVL 12(SP), BX 4017 CMPL BX, CX 4018 JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B 4019 MOVL CX, SI 4020 MOVL CX, 12(SP) 4021 LEAQ (DX)(BX*1), CX 4022 SUBL BX, SI 4023 LEAL -1(SI), DX 4024 CMPL DX, $0x3c 4025 JLT one_byte_emit_remainder_encodeBlockAsm10B 4026 CMPL DX, $0x00000100 4027 JLT two_bytes_emit_remainder_encodeBlockAsm10B 4028 MOVB $0xf4, (AX) 4029 MOVW DX, 1(AX) 4030 ADDQ $0x03, AX 4031 JMP memmove_long_emit_remainder_encodeBlockAsm10B 4032 4033two_bytes_emit_remainder_encodeBlockAsm10B: 4034 MOVB $0xf0, (AX) 4035 MOVB DL, 1(AX) 4036 ADDQ $0x02, AX 4037 CMPL DX, $0x40 4038 JL memmove_emit_remainder_encodeBlockAsm10B 4039 JMP memmove_long_emit_remainder_encodeBlockAsm10B 4040 4041one_byte_emit_remainder_encodeBlockAsm10B: 4042 SHLB $0x02, DL 4043 MOVB DL, (AX) 4044 ADDQ $0x01, AX 4045 4046memmove_emit_remainder_encodeBlockAsm10B: 4047 LEAQ (AX)(SI*1), DX 4048 MOVL SI, BX 4049 4050 // genMemMoveShort 4051 CMPQ BX, $0x03 4052 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2 4053 JE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3 4054 CMPQ BX, $0x08 4055 JB emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7 4056 CMPQ BX, $0x10 4057 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16 4058 CMPQ BX, $0x20 4059 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32 4060 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 4061 4062emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: 4063 MOVB (CX), SI 4064 MOVB -1(CX)(BX*1), CL 4065 MOVB SI, (AX) 4066 MOVB CL, -1(AX)(BX*1) 4067 JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B 4068 4069emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: 4070 MOVW (CX), SI 4071 MOVB 2(CX), CL 4072 MOVW SI, (AX) 4073 MOVB CL, 2(AX) 4074 JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B 4075 4076emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7: 4077 MOVL (CX), SI 4078 MOVL -4(CX)(BX*1), CX 4079 MOVL SI, (AX) 4080 MOVL CX, -4(AX)(BX*1) 4081 JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B 4082 4083emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: 4084 MOVQ (CX), SI 4085 MOVQ -8(CX)(BX*1), CX 4086 MOVQ SI, (AX) 4087 MOVQ CX, -8(AX)(BX*1) 4088 JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B 4089 4090emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: 4091 MOVOU (CX), X0 4092 MOVOU -16(CX)(BX*1), X1 4093 MOVOU X0, (AX) 4094 MOVOU X1, -16(AX)(BX*1) 4095 JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B 4096 4097emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: 4098 MOVOU (CX), X0 4099 MOVOU 16(CX), X1 4100 MOVOU -32(CX)(BX*1), X2 4101 MOVOU -16(CX)(BX*1), X3 4102 MOVOU X0, (AX) 4103 MOVOU X1, 16(AX) 4104 MOVOU X2, -32(AX)(BX*1) 4105 MOVOU X3, -16(AX)(BX*1) 4106 4107memmove_end_copy_emit_remainder_encodeBlockAsm10B: 4108 MOVQ DX, AX 4109 JMP emit_literal_done_emit_remainder_encodeBlockAsm10B 4110 4111memmove_long_emit_remainder_encodeBlockAsm10B: 4112 LEAQ (AX)(SI*1), DX 4113 MOVL SI, BX 4114 4115 // genMemMoveLong 4116 MOVOU (CX), X0 4117 MOVOU 16(CX), X1 4118 MOVOU -32(CX)(BX*1), X2 4119 MOVOU -16(CX)(BX*1), X3 4120 MOVQ BX, DI 4121 SHRQ $0x05, DI 4122 MOVQ AX, SI 4123 ANDL $0x0000001f, SI 4124 MOVQ $0x00000040, R8 4125 SUBQ SI, R8 4126 DECQ DI 4127 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 4128 LEAQ -32(CX)(R8*1), SI 4129 LEAQ -32(AX)(R8*1), R9 4130 4131emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: 4132 MOVOU (SI), X4 4133 MOVOU 16(SI), X5 4134 MOVOA X4, (R9) 4135 MOVOA X5, 16(R9) 4136 ADDQ $0x20, R9 4137 ADDQ $0x20, SI 4138 ADDQ $0x20, R8 4139 DECQ DI 4140 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back 4141 4142emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: 4143 MOVOU -32(CX)(R8*1), X4 4144 MOVOU -16(CX)(R8*1), X5 4145 MOVOA X4, -32(AX)(R8*1) 4146 MOVOA X5, -16(AX)(R8*1) 4147 ADDQ $0x20, R8 4148 CMPQ BX, R8 4149 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 4150 MOVOU X0, (AX) 4151 MOVOU X1, 16(AX) 4152 MOVOU X2, -32(AX)(BX*1) 4153 MOVOU X3, -16(AX)(BX*1) 4154 MOVQ DX, AX 4155 4156emit_literal_done_emit_remainder_encodeBlockAsm10B: 4157 MOVQ dst_base+0(FP), CX 4158 SUBQ CX, AX 4159 MOVQ AX, ret+48(FP) 4160 RET 4161 4162// func encodeBlockAsm8B(dst []byte, src []byte) int 4163// Requires: SSE2 4164TEXT ·encodeBlockAsm8B(SB), $1048-56 4165 MOVQ dst_base+0(FP), AX 4166 MOVQ $0x00000008, CX 4167 LEAQ 24(SP), DX 4168 PXOR X0, X0 4169 4170zero_loop_encodeBlockAsm8B: 4171 MOVOU X0, (DX) 4172 MOVOU X0, 16(DX) 4173 MOVOU X0, 32(DX) 4174 MOVOU X0, 48(DX) 4175 MOVOU X0, 64(DX) 4176 MOVOU X0, 80(DX) 4177 MOVOU X0, 96(DX) 4178 MOVOU X0, 112(DX) 4179 ADDQ $0x80, DX 4180 DECQ CX 4181 JNZ zero_loop_encodeBlockAsm8B 4182 MOVL $0x00000000, 12(SP) 4183 MOVQ src_len+32(FP), CX 4184 LEAQ -5(CX), DX 4185 LEAQ -8(CX), SI 4186 MOVL SI, 8(SP) 4187 SHRQ $0x05, CX 4188 SUBL CX, DX 4189 LEAQ (AX)(DX*1), DX 4190 MOVQ DX, (SP) 4191 MOVL $0x00000001, CX 4192 MOVL CX, 16(SP) 4193 MOVQ src_base+24(FP), DX 4194 4195search_loop_encodeBlockAsm8B: 4196 MOVL CX, SI 4197 SUBL 12(SP), SI 4198 SHRL $0x04, SI 4199 LEAL 4(CX)(SI*1), SI 4200 CMPL SI, 8(SP) 4201 JGE emit_remainder_encodeBlockAsm8B 4202 MOVQ (DX)(CX*1), DI 4203 MOVL SI, 20(SP) 4204 MOVQ $0x9e3779b1, R9 4205 MOVQ DI, R10 4206 MOVQ DI, R11 4207 SHRQ $0x08, R11 4208 SHLQ $0x20, R10 4209 IMULQ R9, R10 4210 SHRQ $0x38, R10 4211 SHLQ $0x20, R11 4212 IMULQ R9, R11 4213 SHRQ $0x38, R11 4214 MOVL 24(SP)(R10*4), SI 4215 MOVL 24(SP)(R11*4), R8 4216 MOVL CX, 24(SP)(R10*4) 4217 LEAL 1(CX), R10 4218 MOVL R10, 24(SP)(R11*4) 4219 MOVQ DI, R10 4220 SHRQ $0x10, R10 4221 SHLQ $0x20, R10 4222 IMULQ R9, R10 4223 SHRQ $0x38, R10 4224 MOVL CX, R9 4225 SUBL 16(SP), R9 4226 MOVL 1(DX)(R9*1), R11 4227 MOVQ DI, R9 4228 SHRQ $0x08, R9 4229 CMPL R9, R11 4230 JNE no_repeat_found_encodeBlockAsm8B 4231 LEAL 1(CX), DI 4232 MOVL 12(SP), R8 4233 MOVL DI, SI 4234 SUBL 16(SP), SI 4235 JZ repeat_extend_back_end_encodeBlockAsm8B 4236 4237repeat_extend_back_loop_encodeBlockAsm8B: 4238 CMPL DI, R8 4239 JLE repeat_extend_back_end_encodeBlockAsm8B 4240 MOVB -1(DX)(SI*1), BL 4241 MOVB -1(DX)(DI*1), R9 4242 CMPB BL, R9 4243 JNE repeat_extend_back_end_encodeBlockAsm8B 4244 LEAL -1(DI), DI 4245 DECL SI 4246 JNZ repeat_extend_back_loop_encodeBlockAsm8B 4247 4248repeat_extend_back_end_encodeBlockAsm8B: 4249 MOVL 12(SP), SI 4250 CMPL SI, DI 4251 JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B 4252 MOVL DI, R9 4253 MOVL DI, 12(SP) 4254 LEAQ (DX)(SI*1), R10 4255 SUBL SI, R9 4256 LEAL -1(R9), SI 4257 CMPL SI, $0x3c 4258 JLT one_byte_repeat_emit_encodeBlockAsm8B 4259 CMPL SI, $0x00000100 4260 JLT two_bytes_repeat_emit_encodeBlockAsm8B 4261 MOVB $0xf4, (AX) 4262 MOVW SI, 1(AX) 4263 ADDQ $0x03, AX 4264 JMP memmove_long_repeat_emit_encodeBlockAsm8B 4265 4266two_bytes_repeat_emit_encodeBlockAsm8B: 4267 MOVB $0xf0, (AX) 4268 MOVB SI, 1(AX) 4269 ADDQ $0x02, AX 4270 CMPL SI, $0x40 4271 JL memmove_repeat_emit_encodeBlockAsm8B 4272 JMP memmove_long_repeat_emit_encodeBlockAsm8B 4273 4274one_byte_repeat_emit_encodeBlockAsm8B: 4275 SHLB $0x02, SI 4276 MOVB SI, (AX) 4277 ADDQ $0x01, AX 4278 4279memmove_repeat_emit_encodeBlockAsm8B: 4280 LEAQ (AX)(R9*1), SI 4281 4282 // genMemMoveShort 4283 CMPQ R9, $0x03 4284 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2 4285 JE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3 4286 CMPQ R9, $0x08 4287 JB emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7 4288 CMPQ R9, $0x10 4289 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 4290 CMPQ R9, $0x20 4291 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 4292 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 4293 4294emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2: 4295 MOVB (R10), R11 4296 MOVB -1(R10)(R9*1), R10 4297 MOVB R11, (AX) 4298 MOVB R10, -1(AX)(R9*1) 4299 JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B 4300 4301emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3: 4302 MOVW (R10), R11 4303 MOVB 2(R10), R10 4304 MOVW R11, (AX) 4305 MOVB R10, 2(AX) 4306 JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B 4307 4308emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7: 4309 MOVL (R10), R11 4310 MOVL -4(R10)(R9*1), R10 4311 MOVL R11, (AX) 4312 MOVL R10, -4(AX)(R9*1) 4313 JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B 4314 4315emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: 4316 MOVQ (R10), R11 4317 MOVQ -8(R10)(R9*1), R10 4318 MOVQ R11, (AX) 4319 MOVQ R10, -8(AX)(R9*1) 4320 JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B 4321 4322emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: 4323 MOVOU (R10), X0 4324 MOVOU -16(R10)(R9*1), X1 4325 MOVOU X0, (AX) 4326 MOVOU X1, -16(AX)(R9*1) 4327 JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B 4328 4329emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: 4330 MOVOU (R10), X0 4331 MOVOU 16(R10), X1 4332 MOVOU -32(R10)(R9*1), X2 4333 MOVOU -16(R10)(R9*1), X3 4334 MOVOU X0, (AX) 4335 MOVOU X1, 16(AX) 4336 MOVOU X2, -32(AX)(R9*1) 4337 MOVOU X3, -16(AX)(R9*1) 4338 4339memmove_end_copy_repeat_emit_encodeBlockAsm8B: 4340 MOVQ SI, AX 4341 JMP emit_literal_done_repeat_emit_encodeBlockAsm8B 4342 4343memmove_long_repeat_emit_encodeBlockAsm8B: 4344 LEAQ (AX)(R9*1), SI 4345 4346 // genMemMoveLong 4347 MOVOU (R10), X0 4348 MOVOU 16(R10), X1 4349 MOVOU -32(R10)(R9*1), X2 4350 MOVOU -16(R10)(R9*1), X3 4351 MOVQ R9, R12 4352 SHRQ $0x05, R12 4353 MOVQ AX, R11 4354 ANDL $0x0000001f, R11 4355 MOVQ $0x00000040, R13 4356 SUBQ R11, R13 4357 DECQ R12 4358 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 4359 LEAQ -32(R10)(R13*1), R11 4360 LEAQ -32(AX)(R13*1), R14 4361 4362emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: 4363 MOVOU (R11), X4 4364 MOVOU 16(R11), X5 4365 MOVOA X4, (R14) 4366 MOVOA X5, 16(R14) 4367 ADDQ $0x20, R14 4368 ADDQ $0x20, R11 4369 ADDQ $0x20, R13 4370 DECQ R12 4371 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back 4372 4373emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: 4374 MOVOU -32(R10)(R13*1), X4 4375 MOVOU -16(R10)(R13*1), X5 4376 MOVOA X4, -32(AX)(R13*1) 4377 MOVOA X5, -16(AX)(R13*1) 4378 ADDQ $0x20, R13 4379 CMPQ R9, R13 4380 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 4381 MOVOU X0, (AX) 4382 MOVOU X1, 16(AX) 4383 MOVOU X2, -32(AX)(R9*1) 4384 MOVOU X3, -16(AX)(R9*1) 4385 MOVQ SI, AX 4386 4387emit_literal_done_repeat_emit_encodeBlockAsm8B: 4388 ADDL $0x05, CX 4389 MOVL CX, SI 4390 SUBL 16(SP), SI 4391 MOVQ src_len+32(FP), R9 4392 SUBL CX, R9 4393 LEAQ (DX)(CX*1), R10 4394 LEAQ (DX)(SI*1), SI 4395 4396 // matchLen 4397 XORL R12, R12 4398 CMPL R9, $0x08 4399 JL matchlen_single_repeat_extend_encodeBlockAsm8B 4400 4401matchlen_loopback_repeat_extend_encodeBlockAsm8B: 4402 MOVQ (R10)(R12*1), R11 4403 XORQ (SI)(R12*1), R11 4404 TESTQ R11, R11 4405 JZ matchlen_loop_repeat_extend_encodeBlockAsm8B 4406 BSFQ R11, R11 4407 SARQ $0x03, R11 4408 LEAL (R12)(R11*1), R12 4409 JMP repeat_extend_forward_end_encodeBlockAsm8B 4410 4411matchlen_loop_repeat_extend_encodeBlockAsm8B: 4412 LEAL -8(R9), R9 4413 LEAL 8(R12), R12 4414 CMPL R9, $0x08 4415 JGE matchlen_loopback_repeat_extend_encodeBlockAsm8B 4416 4417matchlen_single_repeat_extend_encodeBlockAsm8B: 4418 TESTL R9, R9 4419 JZ repeat_extend_forward_end_encodeBlockAsm8B 4420 4421matchlen_single_loopback_repeat_extend_encodeBlockAsm8B: 4422 MOVB (R10)(R12*1), R11 4423 CMPB (SI)(R12*1), R11 4424 JNE repeat_extend_forward_end_encodeBlockAsm8B 4425 LEAL 1(R12), R12 4426 DECL R9 4427 JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm8B 4428 4429repeat_extend_forward_end_encodeBlockAsm8B: 4430 ADDL R12, CX 4431 MOVL CX, SI 4432 SUBL DI, SI 4433 MOVL 16(SP), DI 4434 TESTL R8, R8 4435 JZ repeat_as_copy_encodeBlockAsm8B 4436 4437 // emitRepeat 4438 MOVL SI, DI 4439 LEAL -4(SI), SI 4440 CMPL DI, $0x08 4441 JLE repeat_two_match_repeat_encodeBlockAsm8B 4442 CMPL DI, $0x0c 4443 JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B 4444 4445cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: 4446 CMPL SI, $0x00000104 4447 JLT repeat_three_match_repeat_encodeBlockAsm8B 4448 LEAL -256(SI), SI 4449 MOVW $0x0019, (AX) 4450 MOVW SI, 2(AX) 4451 ADDQ $0x04, AX 4452 JMP repeat_end_emit_encodeBlockAsm8B 4453 4454repeat_three_match_repeat_encodeBlockAsm8B: 4455 LEAL -4(SI), SI 4456 MOVW $0x0015, (AX) 4457 MOVB SI, 2(AX) 4458 ADDQ $0x03, AX 4459 JMP repeat_end_emit_encodeBlockAsm8B 4460 4461repeat_two_match_repeat_encodeBlockAsm8B: 4462 SHLL $0x02, SI 4463 ORL $0x01, SI 4464 MOVW SI, (AX) 4465 ADDQ $0x02, AX 4466 JMP repeat_end_emit_encodeBlockAsm8B 4467 XORQ R8, R8 4468 LEAL 1(R8)(SI*4), SI 4469 MOVB DI, 1(AX) 4470 SARL $0x08, DI 4471 SHLL $0x05, DI 4472 ORL DI, SI 4473 MOVB SI, (AX) 4474 ADDQ $0x02, AX 4475 JMP repeat_end_emit_encodeBlockAsm8B 4476 4477repeat_as_copy_encodeBlockAsm8B: 4478 // emitCopy 4479two_byte_offset_repeat_as_copy_encodeBlockAsm8B: 4480 CMPL SI, $0x40 4481 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B 4482 MOVB $0xee, (AX) 4483 MOVW DI, 1(AX) 4484 LEAL -60(SI), SI 4485 ADDQ $0x03, AX 4486 4487 // emitRepeat 4488 MOVL SI, DI 4489 LEAL -4(SI), SI 4490 CMPL DI, $0x08 4491 JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short 4492 CMPL DI, $0x0c 4493 JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short 4494 4495cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: 4496 CMPL SI, $0x00000104 4497 JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short 4498 LEAL -256(SI), SI 4499 MOVW $0x0019, (AX) 4500 MOVW SI, 2(AX) 4501 ADDQ $0x04, AX 4502 JMP repeat_end_emit_encodeBlockAsm8B 4503 4504repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: 4505 LEAL -4(SI), SI 4506 MOVW $0x0015, (AX) 4507 MOVB SI, 2(AX) 4508 ADDQ $0x03, AX 4509 JMP repeat_end_emit_encodeBlockAsm8B 4510 4511repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: 4512 SHLL $0x02, SI 4513 ORL $0x01, SI 4514 MOVW SI, (AX) 4515 ADDQ $0x02, AX 4516 JMP repeat_end_emit_encodeBlockAsm8B 4517 XORQ R8, R8 4518 LEAL 1(R8)(SI*4), SI 4519 MOVB DI, 1(AX) 4520 SARL $0x08, DI 4521 SHLL $0x05, DI 4522 ORL DI, SI 4523 MOVB SI, (AX) 4524 ADDQ $0x02, AX 4525 JMP repeat_end_emit_encodeBlockAsm8B 4526 JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B 4527 4528two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: 4529 CMPL SI, $0x0c 4530 JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B 4531 MOVB $0x01, BL 4532 LEAL -16(BX)(SI*4), SI 4533 MOVB DI, 1(AX) 4534 SHRL $0x08, DI 4535 SHLL $0x05, DI 4536 ORL DI, SI 4537 MOVB SI, (AX) 4538 ADDQ $0x02, AX 4539 JMP repeat_end_emit_encodeBlockAsm8B 4540 4541emit_copy_three_repeat_as_copy_encodeBlockAsm8B: 4542 MOVB $0x02, BL 4543 LEAL -4(BX)(SI*4), SI 4544 MOVB SI, (AX) 4545 MOVW DI, 1(AX) 4546 ADDQ $0x03, AX 4547 4548repeat_end_emit_encodeBlockAsm8B: 4549 MOVL CX, 12(SP) 4550 JMP search_loop_encodeBlockAsm8B 4551 4552no_repeat_found_encodeBlockAsm8B: 4553 CMPL (DX)(SI*1), DI 4554 JEQ candidate_match_encodeBlockAsm8B 4555 SHRQ $0x08, DI 4556 MOVL 24(SP)(R10*4), SI 4557 LEAL 2(CX), R9 4558 CMPL (DX)(R8*1), DI 4559 JEQ candidate2_match_encodeBlockAsm8B 4560 MOVL R9, 24(SP)(R10*4) 4561 SHRQ $0x08, DI 4562 CMPL (DX)(SI*1), DI 4563 JEQ candidate3_match_encodeBlockAsm8B 4564 MOVL 20(SP), CX 4565 JMP search_loop_encodeBlockAsm8B 4566 4567candidate3_match_encodeBlockAsm8B: 4568 ADDL $0x02, CX 4569 JMP candidate_match_encodeBlockAsm8B 4570 4571candidate2_match_encodeBlockAsm8B: 4572 MOVL R9, 24(SP)(R10*4) 4573 INCL CX 4574 MOVL R8, SI 4575 4576candidate_match_encodeBlockAsm8B: 4577 MOVL 12(SP), DI 4578 TESTL SI, SI 4579 JZ match_extend_back_end_encodeBlockAsm8B 4580 4581match_extend_back_loop_encodeBlockAsm8B: 4582 CMPL CX, DI 4583 JLE match_extend_back_end_encodeBlockAsm8B 4584 MOVB -1(DX)(SI*1), BL 4585 MOVB -1(DX)(CX*1), R8 4586 CMPB BL, R8 4587 JNE match_extend_back_end_encodeBlockAsm8B 4588 LEAL -1(CX), CX 4589 DECL SI 4590 JZ match_extend_back_end_encodeBlockAsm8B 4591 JMP match_extend_back_loop_encodeBlockAsm8B 4592 4593match_extend_back_end_encodeBlockAsm8B: 4594 MOVL CX, DI 4595 SUBL 12(SP), DI 4596 LEAQ 3(AX)(DI*1), DI 4597 CMPQ DI, (SP) 4598 JL match_dst_size_check_encodeBlockAsm8B 4599 MOVQ $0x00000000, ret+48(FP) 4600 RET 4601 4602match_dst_size_check_encodeBlockAsm8B: 4603 MOVL CX, DI 4604 MOVL 12(SP), R8 4605 CMPL R8, DI 4606 JEQ emit_literal_done_match_emit_encodeBlockAsm8B 4607 MOVL DI, R9 4608 MOVL DI, 12(SP) 4609 LEAQ (DX)(R8*1), DI 4610 SUBL R8, R9 4611 LEAL -1(R9), R8 4612 CMPL R8, $0x3c 4613 JLT one_byte_match_emit_encodeBlockAsm8B 4614 CMPL R8, $0x00000100 4615 JLT two_bytes_match_emit_encodeBlockAsm8B 4616 MOVB $0xf4, (AX) 4617 MOVW R8, 1(AX) 4618 ADDQ $0x03, AX 4619 JMP memmove_long_match_emit_encodeBlockAsm8B 4620 4621two_bytes_match_emit_encodeBlockAsm8B: 4622 MOVB $0xf0, (AX) 4623 MOVB R8, 1(AX) 4624 ADDQ $0x02, AX 4625 CMPL R8, $0x40 4626 JL memmove_match_emit_encodeBlockAsm8B 4627 JMP memmove_long_match_emit_encodeBlockAsm8B 4628 4629one_byte_match_emit_encodeBlockAsm8B: 4630 SHLB $0x02, R8 4631 MOVB R8, (AX) 4632 ADDQ $0x01, AX 4633 4634memmove_match_emit_encodeBlockAsm8B: 4635 LEAQ (AX)(R9*1), R8 4636 4637 // genMemMoveShort 4638 CMPQ R9, $0x03 4639 JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2 4640 JE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3 4641 CMPQ R9, $0x08 4642 JB emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7 4643 CMPQ R9, $0x10 4644 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 4645 CMPQ R9, $0x20 4646 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 4647 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 4648 4649emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2: 4650 MOVB (DI), R10 4651 MOVB -1(DI)(R9*1), DI 4652 MOVB R10, (AX) 4653 MOVB DI, -1(AX)(R9*1) 4654 JMP memmove_end_copy_match_emit_encodeBlockAsm8B 4655 4656emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3: 4657 MOVW (DI), R10 4658 MOVB 2(DI), DI 4659 MOVW R10, (AX) 4660 MOVB DI, 2(AX) 4661 JMP memmove_end_copy_match_emit_encodeBlockAsm8B 4662 4663emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7: 4664 MOVL (DI), R10 4665 MOVL -4(DI)(R9*1), DI 4666 MOVL R10, (AX) 4667 MOVL DI, -4(AX)(R9*1) 4668 JMP memmove_end_copy_match_emit_encodeBlockAsm8B 4669 4670emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: 4671 MOVQ (DI), R10 4672 MOVQ -8(DI)(R9*1), DI 4673 MOVQ R10, (AX) 4674 MOVQ DI, -8(AX)(R9*1) 4675 JMP memmove_end_copy_match_emit_encodeBlockAsm8B 4676 4677emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: 4678 MOVOU (DI), X0 4679 MOVOU -16(DI)(R9*1), X1 4680 MOVOU X0, (AX) 4681 MOVOU X1, -16(AX)(R9*1) 4682 JMP memmove_end_copy_match_emit_encodeBlockAsm8B 4683 4684emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: 4685 MOVOU (DI), X0 4686 MOVOU 16(DI), X1 4687 MOVOU -32(DI)(R9*1), X2 4688 MOVOU -16(DI)(R9*1), X3 4689 MOVOU X0, (AX) 4690 MOVOU X1, 16(AX) 4691 MOVOU X2, -32(AX)(R9*1) 4692 MOVOU X3, -16(AX)(R9*1) 4693 4694memmove_end_copy_match_emit_encodeBlockAsm8B: 4695 MOVQ R8, AX 4696 JMP emit_literal_done_match_emit_encodeBlockAsm8B 4697 4698memmove_long_match_emit_encodeBlockAsm8B: 4699 LEAQ (AX)(R9*1), R8 4700 4701 // genMemMoveLong 4702 MOVOU (DI), X0 4703 MOVOU 16(DI), X1 4704 MOVOU -32(DI)(R9*1), X2 4705 MOVOU -16(DI)(R9*1), X3 4706 MOVQ R9, R11 4707 SHRQ $0x05, R11 4708 MOVQ AX, R10 4709 ANDL $0x0000001f, R10 4710 MOVQ $0x00000040, R12 4711 SUBQ R10, R12 4712 DECQ R11 4713 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 4714 LEAQ -32(DI)(R12*1), R10 4715 LEAQ -32(AX)(R12*1), R13 4716 4717emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: 4718 MOVOU (R10), X4 4719 MOVOU 16(R10), X5 4720 MOVOA X4, (R13) 4721 MOVOA X5, 16(R13) 4722 ADDQ $0x20, R13 4723 ADDQ $0x20, R10 4724 ADDQ $0x20, R12 4725 DECQ R11 4726 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back 4727 4728emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: 4729 MOVOU -32(DI)(R12*1), X4 4730 MOVOU -16(DI)(R12*1), X5 4731 MOVOA X4, -32(AX)(R12*1) 4732 MOVOA X5, -16(AX)(R12*1) 4733 ADDQ $0x20, R12 4734 CMPQ R9, R12 4735 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 4736 MOVOU X0, (AX) 4737 MOVOU X1, 16(AX) 4738 MOVOU X2, -32(AX)(R9*1) 4739 MOVOU X3, -16(AX)(R9*1) 4740 MOVQ R8, AX 4741 4742emit_literal_done_match_emit_encodeBlockAsm8B: 4743match_nolit_loop_encodeBlockAsm8B: 4744 MOVL CX, DI 4745 SUBL SI, DI 4746 MOVL DI, 16(SP) 4747 ADDL $0x04, CX 4748 ADDL $0x04, SI 4749 MOVQ src_len+32(FP), DI 4750 SUBL CX, DI 4751 LEAQ (DX)(CX*1), R8 4752 LEAQ (DX)(SI*1), SI 4753 4754 // matchLen 4755 XORL R10, R10 4756 CMPL DI, $0x08 4757 JL matchlen_single_match_nolit_encodeBlockAsm8B 4758 4759matchlen_loopback_match_nolit_encodeBlockAsm8B: 4760 MOVQ (R8)(R10*1), R9 4761 XORQ (SI)(R10*1), R9 4762 TESTQ R9, R9 4763 JZ matchlen_loop_match_nolit_encodeBlockAsm8B 4764 BSFQ R9, R9 4765 SARQ $0x03, R9 4766 LEAL (R10)(R9*1), R10 4767 JMP match_nolit_end_encodeBlockAsm8B 4768 4769matchlen_loop_match_nolit_encodeBlockAsm8B: 4770 LEAL -8(DI), DI 4771 LEAL 8(R10), R10 4772 CMPL DI, $0x08 4773 JGE matchlen_loopback_match_nolit_encodeBlockAsm8B 4774 4775matchlen_single_match_nolit_encodeBlockAsm8B: 4776 TESTL DI, DI 4777 JZ match_nolit_end_encodeBlockAsm8B 4778 4779matchlen_single_loopback_match_nolit_encodeBlockAsm8B: 4780 MOVB (R8)(R10*1), R9 4781 CMPB (SI)(R10*1), R9 4782 JNE match_nolit_end_encodeBlockAsm8B 4783 LEAL 1(R10), R10 4784 DECL DI 4785 JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8B 4786 4787match_nolit_end_encodeBlockAsm8B: 4788 ADDL R10, CX 4789 MOVL 16(SP), SI 4790 ADDL $0x04, R10 4791 MOVL CX, 12(SP) 4792 4793 // emitCopy 4794two_byte_offset_match_nolit_encodeBlockAsm8B: 4795 CMPL R10, $0x40 4796 JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B 4797 MOVB $0xee, (AX) 4798 MOVW SI, 1(AX) 4799 LEAL -60(R10), R10 4800 ADDQ $0x03, AX 4801 4802 // emitRepeat 4803 MOVL R10, SI 4804 LEAL -4(R10), R10 4805 CMPL SI, $0x08 4806 JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short 4807 CMPL SI, $0x0c 4808 JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short 4809 4810cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: 4811 CMPL R10, $0x00000104 4812 JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short 4813 LEAL -256(R10), R10 4814 MOVW $0x0019, (AX) 4815 MOVW R10, 2(AX) 4816 ADDQ $0x04, AX 4817 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4818 4819repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: 4820 LEAL -4(R10), R10 4821 MOVW $0x0015, (AX) 4822 MOVB R10, 2(AX) 4823 ADDQ $0x03, AX 4824 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4825 4826repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: 4827 SHLL $0x02, R10 4828 ORL $0x01, R10 4829 MOVW R10, (AX) 4830 ADDQ $0x02, AX 4831 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4832 XORQ DI, DI 4833 LEAL 1(DI)(R10*4), R10 4834 MOVB SI, 1(AX) 4835 SARL $0x08, SI 4836 SHLL $0x05, SI 4837 ORL SI, R10 4838 MOVB R10, (AX) 4839 ADDQ $0x02, AX 4840 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4841 JMP two_byte_offset_match_nolit_encodeBlockAsm8B 4842 4843two_byte_offset_short_match_nolit_encodeBlockAsm8B: 4844 CMPL R10, $0x0c 4845 JGE emit_copy_three_match_nolit_encodeBlockAsm8B 4846 MOVB $0x01, BL 4847 LEAL -16(BX)(R10*4), R10 4848 MOVB SI, 1(AX) 4849 SHRL $0x08, SI 4850 SHLL $0x05, SI 4851 ORL SI, R10 4852 MOVB R10, (AX) 4853 ADDQ $0x02, AX 4854 JMP match_nolit_emitcopy_end_encodeBlockAsm8B 4855 4856emit_copy_three_match_nolit_encodeBlockAsm8B: 4857 MOVB $0x02, BL 4858 LEAL -4(BX)(R10*4), R10 4859 MOVB R10, (AX) 4860 MOVW SI, 1(AX) 4861 ADDQ $0x03, AX 4862 4863match_nolit_emitcopy_end_encodeBlockAsm8B: 4864 CMPL CX, 8(SP) 4865 JGE emit_remainder_encodeBlockAsm8B 4866 MOVQ -2(DX)(CX*1), DI 4867 CMPQ AX, (SP) 4868 JL match_nolit_dst_ok_encodeBlockAsm8B 4869 MOVQ $0x00000000, ret+48(FP) 4870 RET 4871 4872match_nolit_dst_ok_encodeBlockAsm8B: 4873 MOVQ $0x9e3779b1, R9 4874 MOVQ DI, R8 4875 SHRQ $0x10, DI 4876 MOVQ DI, SI 4877 SHLQ $0x20, R8 4878 IMULQ R9, R8 4879 SHRQ $0x38, R8 4880 SHLQ $0x20, SI 4881 IMULQ R9, SI 4882 SHRQ $0x38, SI 4883 LEAL -2(CX), R9 4884 LEAQ 24(SP)(SI*4), R10 4885 MOVL (R10), SI 4886 MOVL R9, 24(SP)(R8*4) 4887 MOVL CX, (R10) 4888 CMPL (DX)(SI*1), DI 4889 JEQ match_nolit_loop_encodeBlockAsm8B 4890 INCL CX 4891 JMP search_loop_encodeBlockAsm8B 4892 4893emit_remainder_encodeBlockAsm8B: 4894 MOVQ src_len+32(FP), CX 4895 SUBL 12(SP), CX 4896 LEAQ 3(AX)(CX*1), CX 4897 CMPQ CX, (SP) 4898 JL emit_remainder_ok_encodeBlockAsm8B 4899 MOVQ $0x00000000, ret+48(FP) 4900 RET 4901 4902emit_remainder_ok_encodeBlockAsm8B: 4903 MOVQ src_len+32(FP), CX 4904 MOVL 12(SP), BX 4905 CMPL BX, CX 4906 JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B 4907 MOVL CX, SI 4908 MOVL CX, 12(SP) 4909 LEAQ (DX)(BX*1), CX 4910 SUBL BX, SI 4911 LEAL -1(SI), DX 4912 CMPL DX, $0x3c 4913 JLT one_byte_emit_remainder_encodeBlockAsm8B 4914 CMPL DX, $0x00000100 4915 JLT two_bytes_emit_remainder_encodeBlockAsm8B 4916 MOVB $0xf4, (AX) 4917 MOVW DX, 1(AX) 4918 ADDQ $0x03, AX 4919 JMP memmove_long_emit_remainder_encodeBlockAsm8B 4920 4921two_bytes_emit_remainder_encodeBlockAsm8B: 4922 MOVB $0xf0, (AX) 4923 MOVB DL, 1(AX) 4924 ADDQ $0x02, AX 4925 CMPL DX, $0x40 4926 JL memmove_emit_remainder_encodeBlockAsm8B 4927 JMP memmove_long_emit_remainder_encodeBlockAsm8B 4928 4929one_byte_emit_remainder_encodeBlockAsm8B: 4930 SHLB $0x02, DL 4931 MOVB DL, (AX) 4932 ADDQ $0x01, AX 4933 4934memmove_emit_remainder_encodeBlockAsm8B: 4935 LEAQ (AX)(SI*1), DX 4936 MOVL SI, BX 4937 4938 // genMemMoveShort 4939 CMPQ BX, $0x03 4940 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2 4941 JE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3 4942 CMPQ BX, $0x08 4943 JB emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7 4944 CMPQ BX, $0x10 4945 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16 4946 CMPQ BX, $0x20 4947 JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32 4948 JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 4949 4950emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: 4951 MOVB (CX), SI 4952 MOVB -1(CX)(BX*1), CL 4953 MOVB SI, (AX) 4954 MOVB CL, -1(AX)(BX*1) 4955 JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B 4956 4957emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: 4958 MOVW (CX), SI 4959 MOVB 2(CX), CL 4960 MOVW SI, (AX) 4961 MOVB CL, 2(AX) 4962 JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B 4963 4964emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7: 4965 MOVL (CX), SI 4966 MOVL -4(CX)(BX*1), CX 4967 MOVL SI, (AX) 4968 MOVL CX, -4(AX)(BX*1) 4969 JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B 4970 4971emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: 4972 MOVQ (CX), SI 4973 MOVQ -8(CX)(BX*1), CX 4974 MOVQ SI, (AX) 4975 MOVQ CX, -8(AX)(BX*1) 4976 JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B 4977 4978emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: 4979 MOVOU (CX), X0 4980 MOVOU -16(CX)(BX*1), X1 4981 MOVOU X0, (AX) 4982 MOVOU X1, -16(AX)(BX*1) 4983 JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B 4984 4985emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: 4986 MOVOU (CX), X0 4987 MOVOU 16(CX), X1 4988 MOVOU -32(CX)(BX*1), X2 4989 MOVOU -16(CX)(BX*1), X3 4990 MOVOU X0, (AX) 4991 MOVOU X1, 16(AX) 4992 MOVOU X2, -32(AX)(BX*1) 4993 MOVOU X3, -16(AX)(BX*1) 4994 4995memmove_end_copy_emit_remainder_encodeBlockAsm8B: 4996 MOVQ DX, AX 4997 JMP emit_literal_done_emit_remainder_encodeBlockAsm8B 4998 4999memmove_long_emit_remainder_encodeBlockAsm8B: 5000 LEAQ (AX)(SI*1), DX 5001 MOVL SI, BX 5002 5003 // genMemMoveLong 5004 MOVOU (CX), X0 5005 MOVOU 16(CX), X1 5006 MOVOU -32(CX)(BX*1), X2 5007 MOVOU -16(CX)(BX*1), X3 5008 MOVQ BX, DI 5009 SHRQ $0x05, DI 5010 MOVQ AX, SI 5011 ANDL $0x0000001f, SI 5012 MOVQ $0x00000040, R8 5013 SUBQ SI, R8 5014 DECQ DI 5015 JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 5016 LEAQ -32(CX)(R8*1), SI 5017 LEAQ -32(AX)(R8*1), R9 5018 5019emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: 5020 MOVOU (SI), X4 5021 MOVOU 16(SI), X5 5022 MOVOA X4, (R9) 5023 MOVOA X5, 16(R9) 5024 ADDQ $0x20, R9 5025 ADDQ $0x20, SI 5026 ADDQ $0x20, R8 5027 DECQ DI 5028 JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back 5029 5030emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: 5031 MOVOU -32(CX)(R8*1), X4 5032 MOVOU -16(CX)(R8*1), X5 5033 MOVOA X4, -32(AX)(R8*1) 5034 MOVOA X5, -16(AX)(R8*1) 5035 ADDQ $0x20, R8 5036 CMPQ BX, R8 5037 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 5038 MOVOU X0, (AX) 5039 MOVOU X1, 16(AX) 5040 MOVOU X2, -32(AX)(BX*1) 5041 MOVOU X3, -16(AX)(BX*1) 5042 MOVQ DX, AX 5043 5044emit_literal_done_emit_remainder_encodeBlockAsm8B: 5045 MOVQ dst_base+0(FP), CX 5046 SUBQ CX, AX 5047 MOVQ AX, ret+48(FP) 5048 RET 5049 5050// func encodeBetterBlockAsm(dst []byte, src []byte) int 5051// Requires: SSE2 5052TEXT ·encodeBetterBlockAsm(SB), $327704-56 5053 MOVQ dst_base+0(FP), AX 5054 MOVQ $0x00000a00, CX 5055 LEAQ 24(SP), DX 5056 PXOR X0, X0 5057 5058zero_loop_encodeBetterBlockAsm: 5059 MOVOU X0, (DX) 5060 MOVOU X0, 16(DX) 5061 MOVOU X0, 32(DX) 5062 MOVOU X0, 48(DX) 5063 MOVOU X0, 64(DX) 5064 MOVOU X0, 80(DX) 5065 MOVOU X0, 96(DX) 5066 MOVOU X0, 112(DX) 5067 ADDQ $0x80, DX 5068 DECQ CX 5069 JNZ zero_loop_encodeBetterBlockAsm 5070 MOVL $0x00000000, 12(SP) 5071 MOVQ src_len+32(FP), CX 5072 LEAQ -6(CX), DX 5073 LEAQ -8(CX), SI 5074 MOVL SI, 8(SP) 5075 SHRQ $0x05, CX 5076 SUBL CX, DX 5077 LEAQ (AX)(DX*1), DX 5078 MOVQ DX, (SP) 5079 MOVL $0x00000001, CX 5080 MOVL $0x00000000, 16(SP) 5081 MOVQ src_base+24(FP), DX 5082 5083search_loop_encodeBetterBlockAsm: 5084 MOVL CX, SI 5085 SUBL 12(SP), SI 5086 SHRL $0x07, SI 5087 LEAL 1(CX)(SI*1), SI 5088 CMPL SI, 8(SP) 5089 JGE emit_remainder_encodeBetterBlockAsm 5090 MOVQ (DX)(CX*1), DI 5091 MOVL SI, 20(SP) 5092 MOVQ $0x00cf1bbcdcbfa563, R9 5093 MOVQ $0x9e3779b1, SI 5094 MOVQ DI, R10 5095 MOVQ DI, R11 5096 SHLQ $0x08, R10 5097 IMULQ R9, R10 5098 SHRQ $0x30, R10 5099 SHLQ $0x20, R11 5100 IMULQ SI, R11 5101 SHRQ $0x32, R11 5102 MOVL 24(SP)(R10*4), SI 5103 MOVL 262168(SP)(R11*4), R8 5104 MOVL CX, 24(SP)(R10*4) 5105 MOVL CX, 262168(SP)(R11*4) 5106 CMPL (DX)(SI*1), DI 5107 JEQ candidate_match_encodeBetterBlockAsm 5108 CMPL (DX)(R8*1), DI 5109 JEQ candidateS_match_encodeBetterBlockAsm 5110 MOVL 20(SP), CX 5111 JMP search_loop_encodeBetterBlockAsm 5112 5113candidateS_match_encodeBetterBlockAsm: 5114 SHRQ $0x08, DI 5115 MOVQ DI, R10 5116 SHLQ $0x08, R10 5117 IMULQ R9, R10 5118 SHRQ $0x30, R10 5119 MOVL 24(SP)(R10*4), SI 5120 INCL CX 5121 MOVL CX, 24(SP)(R10*4) 5122 CMPL (DX)(SI*1), DI 5123 JEQ candidate_match_encodeBetterBlockAsm 5124 DECL CX 5125 MOVL R8, SI 5126 5127candidate_match_encodeBetterBlockAsm: 5128 MOVL 12(SP), DI 5129 TESTL SI, SI 5130 JZ match_extend_back_end_encodeBetterBlockAsm 5131 5132match_extend_back_loop_encodeBetterBlockAsm: 5133 CMPL CX, DI 5134 JLE match_extend_back_end_encodeBetterBlockAsm 5135 MOVB -1(DX)(SI*1), BL 5136 MOVB -1(DX)(CX*1), R8 5137 CMPB BL, R8 5138 JNE match_extend_back_end_encodeBetterBlockAsm 5139 LEAL -1(CX), CX 5140 DECL SI 5141 JZ match_extend_back_end_encodeBetterBlockAsm 5142 JMP match_extend_back_loop_encodeBetterBlockAsm 5143 5144match_extend_back_end_encodeBetterBlockAsm: 5145 MOVL CX, DI 5146 SUBL 12(SP), DI 5147 LEAQ 5(AX)(DI*1), DI 5148 CMPQ DI, (SP) 5149 JL match_dst_size_check_encodeBetterBlockAsm 5150 MOVQ $0x00000000, ret+48(FP) 5151 RET 5152 5153match_dst_size_check_encodeBetterBlockAsm: 5154 MOVL CX, DI 5155 ADDL $0x04, CX 5156 ADDL $0x04, SI 5157 MOVQ src_len+32(FP), R8 5158 SUBL CX, R8 5159 LEAQ (DX)(CX*1), R9 5160 LEAQ (DX)(SI*1), R10 5161 5162 // matchLen 5163 XORL R12, R12 5164 CMPL R8, $0x08 5165 JL matchlen_single_match_nolit_encodeBetterBlockAsm 5166 5167matchlen_loopback_match_nolit_encodeBetterBlockAsm: 5168 MOVQ (R9)(R12*1), R11 5169 XORQ (R10)(R12*1), R11 5170 TESTQ R11, R11 5171 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm 5172 BSFQ R11, R11 5173 SARQ $0x03, R11 5174 LEAL (R12)(R11*1), R12 5175 JMP match_nolit_end_encodeBetterBlockAsm 5176 5177matchlen_loop_match_nolit_encodeBetterBlockAsm: 5178 LEAL -8(R8), R8 5179 LEAL 8(R12), R12 5180 CMPL R8, $0x08 5181 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm 5182 5183matchlen_single_match_nolit_encodeBetterBlockAsm: 5184 TESTL R8, R8 5185 JZ match_nolit_end_encodeBetterBlockAsm 5186 5187matchlen_single_loopback_match_nolit_encodeBetterBlockAsm: 5188 MOVB (R9)(R12*1), R11 5189 CMPB (R10)(R12*1), R11 5190 JNE match_nolit_end_encodeBetterBlockAsm 5191 LEAL 1(R12), R12 5192 DECL R8 5193 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm 5194 5195match_nolit_end_encodeBetterBlockAsm: 5196 MOVL CX, R8 5197 SUBL SI, R8 5198 5199 // Check if repeat 5200 CMPL 16(SP), R8 5201 JEQ match_is_repeat_encodeBetterBlockAsm 5202 CMPL R12, $0x01 5203 JG match_length_ok_encodeBetterBlockAsm 5204 CMPL R8, $0x0000ffff 5205 JLE match_length_ok_encodeBetterBlockAsm 5206 MOVL 20(SP), CX 5207 INCL CX 5208 JMP search_loop_encodeBetterBlockAsm 5209 5210match_length_ok_encodeBetterBlockAsm: 5211 MOVL R8, 16(SP) 5212 MOVL 12(SP), SI 5213 CMPL SI, DI 5214 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm 5215 MOVL DI, R9 5216 MOVL DI, 12(SP) 5217 LEAQ (DX)(SI*1), R10 5218 SUBL SI, R9 5219 LEAL -1(R9), SI 5220 CMPL SI, $0x3c 5221 JLT one_byte_match_emit_encodeBetterBlockAsm 5222 CMPL SI, $0x00000100 5223 JLT two_bytes_match_emit_encodeBetterBlockAsm 5224 CMPL SI, $0x00010000 5225 JLT three_bytes_match_emit_encodeBetterBlockAsm 5226 CMPL SI, $0x01000000 5227 JLT four_bytes_match_emit_encodeBetterBlockAsm 5228 MOVB $0xfc, (AX) 5229 MOVL SI, 1(AX) 5230 ADDQ $0x05, AX 5231 JMP memmove_long_match_emit_encodeBetterBlockAsm 5232 5233four_bytes_match_emit_encodeBetterBlockAsm: 5234 MOVL SI, R11 5235 SHRL $0x10, R11 5236 MOVB $0xf8, (AX) 5237 MOVW SI, 1(AX) 5238 MOVB R11, 3(AX) 5239 ADDQ $0x04, AX 5240 JMP memmove_long_match_emit_encodeBetterBlockAsm 5241 5242three_bytes_match_emit_encodeBetterBlockAsm: 5243 MOVB $0xf4, (AX) 5244 MOVW SI, 1(AX) 5245 ADDQ $0x03, AX 5246 JMP memmove_long_match_emit_encodeBetterBlockAsm 5247 5248two_bytes_match_emit_encodeBetterBlockAsm: 5249 MOVB $0xf0, (AX) 5250 MOVB SI, 1(AX) 5251 ADDQ $0x02, AX 5252 CMPL SI, $0x40 5253 JL memmove_match_emit_encodeBetterBlockAsm 5254 JMP memmove_long_match_emit_encodeBetterBlockAsm 5255 5256one_byte_match_emit_encodeBetterBlockAsm: 5257 SHLB $0x02, SI 5258 MOVB SI, (AX) 5259 ADDQ $0x01, AX 5260 5261memmove_match_emit_encodeBetterBlockAsm: 5262 LEAQ (AX)(R9*1), SI 5263 5264 // genMemMoveShort 5265 CMPQ R9, $0x03 5266 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_1or2 5267 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_3 5268 CMPQ R9, $0x08 5269 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 5270 CMPQ R9, $0x10 5271 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 5272 CMPQ R9, $0x20 5273 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 5274 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 5275 5276emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_1or2: 5277 MOVB (R10), R11 5278 MOVB -1(R10)(R9*1), R10 5279 MOVB R11, (AX) 5280 MOVB R10, -1(AX)(R9*1) 5281 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm 5282 5283emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_3: 5284 MOVW (R10), R11 5285 MOVB 2(R10), R10 5286 MOVW R11, (AX) 5287 MOVB R10, 2(AX) 5288 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm 5289 5290emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: 5291 MOVL (R10), R11 5292 MOVL -4(R10)(R9*1), R10 5293 MOVL R11, (AX) 5294 MOVL R10, -4(AX)(R9*1) 5295 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm 5296 5297emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: 5298 MOVQ (R10), R11 5299 MOVQ -8(R10)(R9*1), R10 5300 MOVQ R11, (AX) 5301 MOVQ R10, -8(AX)(R9*1) 5302 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm 5303 5304emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: 5305 MOVOU (R10), X0 5306 MOVOU -16(R10)(R9*1), X1 5307 MOVOU X0, (AX) 5308 MOVOU X1, -16(AX)(R9*1) 5309 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm 5310 5311emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: 5312 MOVOU (R10), X0 5313 MOVOU 16(R10), X1 5314 MOVOU -32(R10)(R9*1), X2 5315 MOVOU -16(R10)(R9*1), X3 5316 MOVOU X0, (AX) 5317 MOVOU X1, 16(AX) 5318 MOVOU X2, -32(AX)(R9*1) 5319 MOVOU X3, -16(AX)(R9*1) 5320 5321memmove_end_copy_match_emit_encodeBetterBlockAsm: 5322 MOVQ SI, AX 5323 JMP emit_literal_done_match_emit_encodeBetterBlockAsm 5324 5325memmove_long_match_emit_encodeBetterBlockAsm: 5326 LEAQ (AX)(R9*1), SI 5327 5328 // genMemMoveLong 5329 MOVOU (R10), X0 5330 MOVOU 16(R10), X1 5331 MOVOU -32(R10)(R9*1), X2 5332 MOVOU -16(R10)(R9*1), X3 5333 MOVQ R9, R13 5334 SHRQ $0x05, R13 5335 MOVQ AX, R11 5336 ANDL $0x0000001f, R11 5337 MOVQ $0x00000040, R14 5338 SUBQ R11, R14 5339 DECQ R13 5340 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 5341 LEAQ -32(R10)(R14*1), R11 5342 LEAQ -32(AX)(R14*1), R15 5343 5344emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: 5345 MOVOU (R11), X4 5346 MOVOU 16(R11), X5 5347 MOVOA X4, (R15) 5348 MOVOA X5, 16(R15) 5349 ADDQ $0x20, R15 5350 ADDQ $0x20, R11 5351 ADDQ $0x20, R14 5352 DECQ R13 5353 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back 5354 5355emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: 5356 MOVOU -32(R10)(R14*1), X4 5357 MOVOU -16(R10)(R14*1), X5 5358 MOVOA X4, -32(AX)(R14*1) 5359 MOVOA X5, -16(AX)(R14*1) 5360 ADDQ $0x20, R14 5361 CMPQ R9, R14 5362 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 5363 MOVOU X0, (AX) 5364 MOVOU X1, 16(AX) 5365 MOVOU X2, -32(AX)(R9*1) 5366 MOVOU X3, -16(AX)(R9*1) 5367 MOVQ SI, AX 5368 5369emit_literal_done_match_emit_encodeBetterBlockAsm: 5370 ADDL R12, CX 5371 ADDL $0x04, R12 5372 MOVL CX, 12(SP) 5373 5374 // emitCopy 5375 CMPL R8, $0x00010000 5376 JL two_byte_offset_match_nolit_encodeBetterBlockAsm 5377 5378four_bytes_loop_back_match_nolit_encodeBetterBlockAsm: 5379 CMPL R12, $0x40 5380 JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm 5381 MOVB $0xff, (AX) 5382 MOVL R8, 1(AX) 5383 LEAL -64(R12), R12 5384 ADDQ $0x05, AX 5385 CMPL R12, $0x04 5386 JL four_bytes_remain_match_nolit_encodeBetterBlockAsm 5387 5388 // emitRepeat 5389emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: 5390 MOVL R12, SI 5391 LEAL -4(R12), R12 5392 CMPL SI, $0x08 5393 JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy 5394 CMPL SI, $0x0c 5395 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy 5396 CMPL R8, $0x00000800 5397 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy 5398 5399cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: 5400 CMPL R12, $0x00000104 5401 JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy 5402 CMPL R12, $0x00010100 5403 JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy 5404 CMPL R12, $0x0100ffff 5405 JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy 5406 LEAL -16842747(R12), R12 5407 MOVW $0x001d, (AX) 5408 MOVW $0xfffb, 2(AX) 5409 MOVB $0xff, 4(AX) 5410 ADDQ $0x05, AX 5411 JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy 5412 5413repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: 5414 LEAL -65536(R12), R12 5415 MOVL R12, R8 5416 MOVW $0x001d, (AX) 5417 MOVW R12, 2(AX) 5418 SARL $0x10, R8 5419 MOVB R8, 4(AX) 5420 ADDQ $0x05, AX 5421 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5422 5423repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: 5424 LEAL -256(R12), R12 5425 MOVW $0x0019, (AX) 5426 MOVW R12, 2(AX) 5427 ADDQ $0x04, AX 5428 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5429 5430repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: 5431 LEAL -4(R12), R12 5432 MOVW $0x0015, (AX) 5433 MOVB R12, 2(AX) 5434 ADDQ $0x03, AX 5435 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5436 5437repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: 5438 SHLL $0x02, R12 5439 ORL $0x01, R12 5440 MOVW R12, (AX) 5441 ADDQ $0x02, AX 5442 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5443 5444repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: 5445 XORQ SI, SI 5446 LEAL 1(SI)(R12*4), R12 5447 MOVB R8, 1(AX) 5448 SARL $0x08, R8 5449 SHLL $0x05, R8 5450 ORL R8, R12 5451 MOVB R12, (AX) 5452 ADDQ $0x02, AX 5453 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5454 JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm 5455 5456four_bytes_remain_match_nolit_encodeBetterBlockAsm: 5457 TESTL R12, R12 5458 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm 5459 MOVB $0x03, BL 5460 LEAL -4(BX)(R12*4), R12 5461 MOVB R12, (AX) 5462 MOVL R8, 1(AX) 5463 ADDQ $0x05, AX 5464 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5465 5466two_byte_offset_match_nolit_encodeBetterBlockAsm: 5467 CMPL R12, $0x40 5468 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm 5469 MOVB $0xee, (AX) 5470 MOVW R8, 1(AX) 5471 LEAL -60(R12), R12 5472 ADDQ $0x03, AX 5473 5474 // emitRepeat 5475emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5476 MOVL R12, SI 5477 LEAL -4(R12), R12 5478 CMPL SI, $0x08 5479 JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short 5480 CMPL SI, $0x0c 5481 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short 5482 CMPL R8, $0x00000800 5483 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short 5484 5485cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5486 CMPL R12, $0x00000104 5487 JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short 5488 CMPL R12, $0x00010100 5489 JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short 5490 CMPL R12, $0x0100ffff 5491 JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short 5492 LEAL -16842747(R12), R12 5493 MOVW $0x001d, (AX) 5494 MOVW $0xfffb, 2(AX) 5495 MOVB $0xff, 4(AX) 5496 ADDQ $0x05, AX 5497 JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short 5498 5499repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5500 LEAL -65536(R12), R12 5501 MOVL R12, R8 5502 MOVW $0x001d, (AX) 5503 MOVW R12, 2(AX) 5504 SARL $0x10, R8 5505 MOVB R8, 4(AX) 5506 ADDQ $0x05, AX 5507 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5508 5509repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5510 LEAL -256(R12), R12 5511 MOVW $0x0019, (AX) 5512 MOVW R12, 2(AX) 5513 ADDQ $0x04, AX 5514 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5515 5516repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5517 LEAL -4(R12), R12 5518 MOVW $0x0015, (AX) 5519 MOVB R12, 2(AX) 5520 ADDQ $0x03, AX 5521 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5522 5523repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5524 SHLL $0x02, R12 5525 ORL $0x01, R12 5526 MOVW R12, (AX) 5527 ADDQ $0x02, AX 5528 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5529 5530repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: 5531 XORQ SI, SI 5532 LEAL 1(SI)(R12*4), R12 5533 MOVB R8, 1(AX) 5534 SARL $0x08, R8 5535 SHLL $0x05, R8 5536 ORL R8, R12 5537 MOVB R12, (AX) 5538 ADDQ $0x02, AX 5539 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5540 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm 5541 5542two_byte_offset_short_match_nolit_encodeBetterBlockAsm: 5543 CMPL R12, $0x0c 5544 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm 5545 CMPL R8, $0x00000800 5546 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm 5547 MOVB $0x01, BL 5548 LEAL -16(BX)(R12*4), R12 5549 MOVB R8, 1(AX) 5550 SHRL $0x08, R8 5551 SHLL $0x05, R8 5552 ORL R8, R12 5553 MOVB R12, (AX) 5554 ADDQ $0x02, AX 5555 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5556 5557emit_copy_three_match_nolit_encodeBetterBlockAsm: 5558 MOVB $0x02, BL 5559 LEAL -4(BX)(R12*4), R12 5560 MOVB R12, (AX) 5561 MOVW R8, 1(AX) 5562 ADDQ $0x03, AX 5563 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5564 5565match_is_repeat_encodeBetterBlockAsm: 5566 MOVL 12(SP), SI 5567 CMPL SI, DI 5568 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm 5569 MOVL DI, R9 5570 MOVL DI, 12(SP) 5571 LEAQ (DX)(SI*1), R10 5572 SUBL SI, R9 5573 LEAL -1(R9), SI 5574 CMPL SI, $0x3c 5575 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm 5576 CMPL SI, $0x00000100 5577 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm 5578 CMPL SI, $0x00010000 5579 JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm 5580 CMPL SI, $0x01000000 5581 JLT four_bytes_match_emit_repeat_encodeBetterBlockAsm 5582 MOVB $0xfc, (AX) 5583 MOVL SI, 1(AX) 5584 ADDQ $0x05, AX 5585 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm 5586 5587four_bytes_match_emit_repeat_encodeBetterBlockAsm: 5588 MOVL SI, R11 5589 SHRL $0x10, R11 5590 MOVB $0xf8, (AX) 5591 MOVW SI, 1(AX) 5592 MOVB R11, 3(AX) 5593 ADDQ $0x04, AX 5594 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm 5595 5596three_bytes_match_emit_repeat_encodeBetterBlockAsm: 5597 MOVB $0xf4, (AX) 5598 MOVW SI, 1(AX) 5599 ADDQ $0x03, AX 5600 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm 5601 5602two_bytes_match_emit_repeat_encodeBetterBlockAsm: 5603 MOVB $0xf0, (AX) 5604 MOVB SI, 1(AX) 5605 ADDQ $0x02, AX 5606 CMPL SI, $0x40 5607 JL memmove_match_emit_repeat_encodeBetterBlockAsm 5608 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm 5609 5610one_byte_match_emit_repeat_encodeBetterBlockAsm: 5611 SHLB $0x02, SI 5612 MOVB SI, (AX) 5613 ADDQ $0x01, AX 5614 5615memmove_match_emit_repeat_encodeBetterBlockAsm: 5616 LEAQ (AX)(R9*1), SI 5617 5618 // genMemMoveShort 5619 CMPQ R9, $0x03 5620 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_1or2 5621 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_3 5622 CMPQ R9, $0x08 5623 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 5624 CMPQ R9, $0x10 5625 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 5626 CMPQ R9, $0x20 5627 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 5628 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 5629 5630emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_1or2: 5631 MOVB (R10), R11 5632 MOVB -1(R10)(R9*1), R10 5633 MOVB R11, (AX) 5634 MOVB R10, -1(AX)(R9*1) 5635 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm 5636 5637emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_3: 5638 MOVW (R10), R11 5639 MOVB 2(R10), R10 5640 MOVW R11, (AX) 5641 MOVB R10, 2(AX) 5642 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm 5643 5644emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: 5645 MOVL (R10), R11 5646 MOVL -4(R10)(R9*1), R10 5647 MOVL R11, (AX) 5648 MOVL R10, -4(AX)(R9*1) 5649 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm 5650 5651emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: 5652 MOVQ (R10), R11 5653 MOVQ -8(R10)(R9*1), R10 5654 MOVQ R11, (AX) 5655 MOVQ R10, -8(AX)(R9*1) 5656 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm 5657 5658emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: 5659 MOVOU (R10), X0 5660 MOVOU -16(R10)(R9*1), X1 5661 MOVOU X0, (AX) 5662 MOVOU X1, -16(AX)(R9*1) 5663 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm 5664 5665emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: 5666 MOVOU (R10), X0 5667 MOVOU 16(R10), X1 5668 MOVOU -32(R10)(R9*1), X2 5669 MOVOU -16(R10)(R9*1), X3 5670 MOVOU X0, (AX) 5671 MOVOU X1, 16(AX) 5672 MOVOU X2, -32(AX)(R9*1) 5673 MOVOU X3, -16(AX)(R9*1) 5674 5675memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: 5676 MOVQ SI, AX 5677 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm 5678 5679memmove_long_match_emit_repeat_encodeBetterBlockAsm: 5680 LEAQ (AX)(R9*1), SI 5681 5682 // genMemMoveLong 5683 MOVOU (R10), X0 5684 MOVOU 16(R10), X1 5685 MOVOU -32(R10)(R9*1), X2 5686 MOVOU -16(R10)(R9*1), X3 5687 MOVQ R9, R13 5688 SHRQ $0x05, R13 5689 MOVQ AX, R11 5690 ANDL $0x0000001f, R11 5691 MOVQ $0x00000040, R14 5692 SUBQ R11, R14 5693 DECQ R13 5694 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 5695 LEAQ -32(R10)(R14*1), R11 5696 LEAQ -32(AX)(R14*1), R15 5697 5698emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: 5699 MOVOU (R11), X4 5700 MOVOU 16(R11), X5 5701 MOVOA X4, (R15) 5702 MOVOA X5, 16(R15) 5703 ADDQ $0x20, R15 5704 ADDQ $0x20, R11 5705 ADDQ $0x20, R14 5706 DECQ R13 5707 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back 5708 5709emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: 5710 MOVOU -32(R10)(R14*1), X4 5711 MOVOU -16(R10)(R14*1), X5 5712 MOVOA X4, -32(AX)(R14*1) 5713 MOVOA X5, -16(AX)(R14*1) 5714 ADDQ $0x20, R14 5715 CMPQ R9, R14 5716 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 5717 MOVOU X0, (AX) 5718 MOVOU X1, 16(AX) 5719 MOVOU X2, -32(AX)(R9*1) 5720 MOVOU X3, -16(AX)(R9*1) 5721 MOVQ SI, AX 5722 5723emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: 5724 ADDL R12, CX 5725 ADDL $0x04, R12 5726 MOVL CX, 12(SP) 5727 5728 // emitRepeat 5729emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: 5730 MOVL R12, SI 5731 LEAL -4(R12), R12 5732 CMPL SI, $0x08 5733 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm 5734 CMPL SI, $0x0c 5735 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm 5736 CMPL R8, $0x00000800 5737 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm 5738 5739cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: 5740 CMPL R12, $0x00000104 5741 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm 5742 CMPL R12, $0x00010100 5743 JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm 5744 CMPL R12, $0x0100ffff 5745 JLT repeat_five_match_nolit_repeat_encodeBetterBlockAsm 5746 LEAL -16842747(R12), R12 5747 MOVW $0x001d, (AX) 5748 MOVW $0xfffb, 2(AX) 5749 MOVB $0xff, 4(AX) 5750 ADDQ $0x05, AX 5751 JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm 5752 5753repeat_five_match_nolit_repeat_encodeBetterBlockAsm: 5754 LEAL -65536(R12), R12 5755 MOVL R12, R8 5756 MOVW $0x001d, (AX) 5757 MOVW R12, 2(AX) 5758 SARL $0x10, R8 5759 MOVB R8, 4(AX) 5760 ADDQ $0x05, AX 5761 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5762 5763repeat_four_match_nolit_repeat_encodeBetterBlockAsm: 5764 LEAL -256(R12), R12 5765 MOVW $0x0019, (AX) 5766 MOVW R12, 2(AX) 5767 ADDQ $0x04, AX 5768 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5769 5770repeat_three_match_nolit_repeat_encodeBetterBlockAsm: 5771 LEAL -4(R12), R12 5772 MOVW $0x0015, (AX) 5773 MOVB R12, 2(AX) 5774 ADDQ $0x03, AX 5775 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5776 5777repeat_two_match_nolit_repeat_encodeBetterBlockAsm: 5778 SHLL $0x02, R12 5779 ORL $0x01, R12 5780 MOVW R12, (AX) 5781 ADDQ $0x02, AX 5782 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm 5783 5784repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: 5785 XORQ SI, SI 5786 LEAL 1(SI)(R12*4), R12 5787 MOVB R8, 1(AX) 5788 SARL $0x08, R8 5789 SHLL $0x05, R8 5790 ORL R8, R12 5791 MOVB R12, (AX) 5792 ADDQ $0x02, AX 5793 5794match_nolit_emitcopy_end_encodeBetterBlockAsm: 5795 CMPL CX, 8(SP) 5796 JGE emit_remainder_encodeBetterBlockAsm 5797 CMPQ AX, (SP) 5798 JL match_nolit_dst_ok_encodeBetterBlockAsm 5799 MOVQ $0x00000000, ret+48(FP) 5800 RET 5801 5802match_nolit_dst_ok_encodeBetterBlockAsm: 5803 MOVQ $0x00cf1bbcdcbfa563, SI 5804 MOVQ $0x9e3779b1, R8 5805 INCL DI 5806 MOVQ (DX)(DI*1), R9 5807 MOVQ R9, R10 5808 MOVQ R9, R11 5809 MOVQ R9, R12 5810 SHRQ $0x08, R11 5811 MOVQ R11, R13 5812 SHRQ $0x10, R12 5813 LEAL 1(DI), R14 5814 LEAL 2(DI), R15 5815 MOVQ -2(DX)(CX*1), R9 5816 SHLQ $0x08, R10 5817 IMULQ SI, R10 5818 SHRQ $0x30, R10 5819 SHLQ $0x08, R13 5820 IMULQ SI, R13 5821 SHRQ $0x30, R13 5822 SHLQ $0x20, R11 5823 IMULQ R8, R11 5824 SHRQ $0x32, R11 5825 SHLQ $0x20, R12 5826 IMULQ R8, R12 5827 SHRQ $0x32, R12 5828 MOVL DI, 24(SP)(R10*4) 5829 MOVL R14, 24(SP)(R13*4) 5830 MOVL R14, 262168(SP)(R11*4) 5831 MOVL R15, 262168(SP)(R12*4) 5832 MOVQ R9, R10 5833 MOVQ R9, R11 5834 SHRQ $0x08, R11 5835 MOVQ R11, R13 5836 LEAL -2(CX), R9 5837 LEAL -1(CX), DI 5838 SHLQ $0x08, R10 5839 IMULQ SI, R10 5840 SHRQ $0x30, R10 5841 SHLQ $0x20, R11 5842 IMULQ R8, R11 5843 SHRQ $0x32, R11 5844 SHLQ $0x08, R13 5845 IMULQ SI, R13 5846 SHRQ $0x30, R13 5847 MOVL R9, 24(SP)(R10*4) 5848 MOVL DI, 262168(SP)(R11*4) 5849 MOVL DI, 24(SP)(R13*4) 5850 JMP search_loop_encodeBetterBlockAsm 5851 5852emit_remainder_encodeBetterBlockAsm: 5853 MOVQ src_len+32(FP), CX 5854 SUBL 12(SP), CX 5855 LEAQ 5(AX)(CX*1), CX 5856 CMPQ CX, (SP) 5857 JL emit_remainder_ok_encodeBetterBlockAsm 5858 MOVQ $0x00000000, ret+48(FP) 5859 RET 5860 5861emit_remainder_ok_encodeBetterBlockAsm: 5862 MOVQ src_len+32(FP), CX 5863 MOVL 12(SP), BX 5864 CMPL BX, CX 5865 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm 5866 MOVL CX, SI 5867 MOVL CX, 12(SP) 5868 LEAQ (DX)(BX*1), CX 5869 SUBL BX, SI 5870 LEAL -1(SI), DX 5871 CMPL DX, $0x3c 5872 JLT one_byte_emit_remainder_encodeBetterBlockAsm 5873 CMPL DX, $0x00000100 5874 JLT two_bytes_emit_remainder_encodeBetterBlockAsm 5875 CMPL DX, $0x00010000 5876 JLT three_bytes_emit_remainder_encodeBetterBlockAsm 5877 CMPL DX, $0x01000000 5878 JLT four_bytes_emit_remainder_encodeBetterBlockAsm 5879 MOVB $0xfc, (AX) 5880 MOVL DX, 1(AX) 5881 ADDQ $0x05, AX 5882 JMP memmove_long_emit_remainder_encodeBetterBlockAsm 5883 5884four_bytes_emit_remainder_encodeBetterBlockAsm: 5885 MOVL DX, BX 5886 SHRL $0x10, BX 5887 MOVB $0xf8, (AX) 5888 MOVW DX, 1(AX) 5889 MOVB BL, 3(AX) 5890 ADDQ $0x04, AX 5891 JMP memmove_long_emit_remainder_encodeBetterBlockAsm 5892 5893three_bytes_emit_remainder_encodeBetterBlockAsm: 5894 MOVB $0xf4, (AX) 5895 MOVW DX, 1(AX) 5896 ADDQ $0x03, AX 5897 JMP memmove_long_emit_remainder_encodeBetterBlockAsm 5898 5899two_bytes_emit_remainder_encodeBetterBlockAsm: 5900 MOVB $0xf0, (AX) 5901 MOVB DL, 1(AX) 5902 ADDQ $0x02, AX 5903 CMPL DX, $0x40 5904 JL memmove_emit_remainder_encodeBetterBlockAsm 5905 JMP memmove_long_emit_remainder_encodeBetterBlockAsm 5906 5907one_byte_emit_remainder_encodeBetterBlockAsm: 5908 SHLB $0x02, DL 5909 MOVB DL, (AX) 5910 ADDQ $0x01, AX 5911 5912memmove_emit_remainder_encodeBetterBlockAsm: 5913 LEAQ (AX)(SI*1), DX 5914 MOVL SI, BX 5915 5916 // genMemMoveShort 5917 CMPQ BX, $0x03 5918 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2 5919 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3 5920 CMPQ BX, $0x08 5921 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7 5922 CMPQ BX, $0x10 5923 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16 5924 CMPQ BX, $0x20 5925 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32 5926 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 5927 5928emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2: 5929 MOVB (CX), SI 5930 MOVB -1(CX)(BX*1), CL 5931 MOVB SI, (AX) 5932 MOVB CL, -1(AX)(BX*1) 5933 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm 5934 5935emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3: 5936 MOVW (CX), SI 5937 MOVB 2(CX), CL 5938 MOVW SI, (AX) 5939 MOVB CL, 2(AX) 5940 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm 5941 5942emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: 5943 MOVL (CX), SI 5944 MOVL -4(CX)(BX*1), CX 5945 MOVL SI, (AX) 5946 MOVL CX, -4(AX)(BX*1) 5947 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm 5948 5949emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: 5950 MOVQ (CX), SI 5951 MOVQ -8(CX)(BX*1), CX 5952 MOVQ SI, (AX) 5953 MOVQ CX, -8(AX)(BX*1) 5954 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm 5955 5956emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: 5957 MOVOU (CX), X0 5958 MOVOU -16(CX)(BX*1), X1 5959 MOVOU X0, (AX) 5960 MOVOU X1, -16(AX)(BX*1) 5961 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm 5962 5963emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: 5964 MOVOU (CX), X0 5965 MOVOU 16(CX), X1 5966 MOVOU -32(CX)(BX*1), X2 5967 MOVOU -16(CX)(BX*1), X3 5968 MOVOU X0, (AX) 5969 MOVOU X1, 16(AX) 5970 MOVOU X2, -32(AX)(BX*1) 5971 MOVOU X3, -16(AX)(BX*1) 5972 5973memmove_end_copy_emit_remainder_encodeBetterBlockAsm: 5974 MOVQ DX, AX 5975 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm 5976 5977memmove_long_emit_remainder_encodeBetterBlockAsm: 5978 LEAQ (AX)(SI*1), DX 5979 MOVL SI, BX 5980 5981 // genMemMoveLong 5982 MOVOU (CX), X0 5983 MOVOU 16(CX), X1 5984 MOVOU -32(CX)(BX*1), X2 5985 MOVOU -16(CX)(BX*1), X3 5986 MOVQ BX, DI 5987 SHRQ $0x05, DI 5988 MOVQ AX, SI 5989 ANDL $0x0000001f, SI 5990 MOVQ $0x00000040, R8 5991 SUBQ SI, R8 5992 DECQ DI 5993 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 5994 LEAQ -32(CX)(R8*1), SI 5995 LEAQ -32(AX)(R8*1), R9 5996 5997emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: 5998 MOVOU (SI), X4 5999 MOVOU 16(SI), X5 6000 MOVOA X4, (R9) 6001 MOVOA X5, 16(R9) 6002 ADDQ $0x20, R9 6003 ADDQ $0x20, SI 6004 ADDQ $0x20, R8 6005 DECQ DI 6006 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back 6007 6008emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: 6009 MOVOU -32(CX)(R8*1), X4 6010 MOVOU -16(CX)(R8*1), X5 6011 MOVOA X4, -32(AX)(R8*1) 6012 MOVOA X5, -16(AX)(R8*1) 6013 ADDQ $0x20, R8 6014 CMPQ BX, R8 6015 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 6016 MOVOU X0, (AX) 6017 MOVOU X1, 16(AX) 6018 MOVOU X2, -32(AX)(BX*1) 6019 MOVOU X3, -16(AX)(BX*1) 6020 MOVQ DX, AX 6021 6022emit_literal_done_emit_remainder_encodeBetterBlockAsm: 6023 MOVQ dst_base+0(FP), CX 6024 SUBQ CX, AX 6025 MOVQ AX, ret+48(FP) 6026 RET 6027 6028// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int 6029// Requires: SSE2 6030TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56 6031 MOVQ dst_base+0(FP), AX 6032 MOVQ $0x00000a00, CX 6033 LEAQ 24(SP), DX 6034 PXOR X0, X0 6035 6036zero_loop_encodeBetterBlockAsm4MB: 6037 MOVOU X0, (DX) 6038 MOVOU X0, 16(DX) 6039 MOVOU X0, 32(DX) 6040 MOVOU X0, 48(DX) 6041 MOVOU X0, 64(DX) 6042 MOVOU X0, 80(DX) 6043 MOVOU X0, 96(DX) 6044 MOVOU X0, 112(DX) 6045 ADDQ $0x80, DX 6046 DECQ CX 6047 JNZ zero_loop_encodeBetterBlockAsm4MB 6048 MOVL $0x00000000, 12(SP) 6049 MOVQ src_len+32(FP), CX 6050 LEAQ -6(CX), DX 6051 LEAQ -8(CX), SI 6052 MOVL SI, 8(SP) 6053 SHRQ $0x05, CX 6054 SUBL CX, DX 6055 LEAQ (AX)(DX*1), DX 6056 MOVQ DX, (SP) 6057 MOVL $0x00000001, CX 6058 MOVL $0x00000000, 16(SP) 6059 MOVQ src_base+24(FP), DX 6060 6061search_loop_encodeBetterBlockAsm4MB: 6062 MOVL CX, SI 6063 SUBL 12(SP), SI 6064 SHRL $0x07, SI 6065 LEAL 1(CX)(SI*1), SI 6066 CMPL SI, 8(SP) 6067 JGE emit_remainder_encodeBetterBlockAsm4MB 6068 MOVQ (DX)(CX*1), DI 6069 MOVL SI, 20(SP) 6070 MOVQ $0x00cf1bbcdcbfa563, R9 6071 MOVQ $0x9e3779b1, SI 6072 MOVQ DI, R10 6073 MOVQ DI, R11 6074 SHLQ $0x08, R10 6075 IMULQ R9, R10 6076 SHRQ $0x30, R10 6077 SHLQ $0x20, R11 6078 IMULQ SI, R11 6079 SHRQ $0x32, R11 6080 MOVL 24(SP)(R10*4), SI 6081 MOVL 262168(SP)(R11*4), R8 6082 MOVL CX, 24(SP)(R10*4) 6083 MOVL CX, 262168(SP)(R11*4) 6084 CMPL (DX)(SI*1), DI 6085 JEQ candidate_match_encodeBetterBlockAsm4MB 6086 CMPL (DX)(R8*1), DI 6087 JEQ candidateS_match_encodeBetterBlockAsm4MB 6088 MOVL 20(SP), CX 6089 JMP search_loop_encodeBetterBlockAsm4MB 6090 6091candidateS_match_encodeBetterBlockAsm4MB: 6092 SHRQ $0x08, DI 6093 MOVQ DI, R10 6094 SHLQ $0x08, R10 6095 IMULQ R9, R10 6096 SHRQ $0x30, R10 6097 MOVL 24(SP)(R10*4), SI 6098 INCL CX 6099 MOVL CX, 24(SP)(R10*4) 6100 CMPL (DX)(SI*1), DI 6101 JEQ candidate_match_encodeBetterBlockAsm4MB 6102 DECL CX 6103 MOVL R8, SI 6104 6105candidate_match_encodeBetterBlockAsm4MB: 6106 MOVL 12(SP), DI 6107 TESTL SI, SI 6108 JZ match_extend_back_end_encodeBetterBlockAsm4MB 6109 6110match_extend_back_loop_encodeBetterBlockAsm4MB: 6111 CMPL CX, DI 6112 JLE match_extend_back_end_encodeBetterBlockAsm4MB 6113 MOVB -1(DX)(SI*1), BL 6114 MOVB -1(DX)(CX*1), R8 6115 CMPB BL, R8 6116 JNE match_extend_back_end_encodeBetterBlockAsm4MB 6117 LEAL -1(CX), CX 6118 DECL SI 6119 JZ match_extend_back_end_encodeBetterBlockAsm4MB 6120 JMP match_extend_back_loop_encodeBetterBlockAsm4MB 6121 6122match_extend_back_end_encodeBetterBlockAsm4MB: 6123 MOVL CX, DI 6124 SUBL 12(SP), DI 6125 LEAQ 4(AX)(DI*1), DI 6126 CMPQ DI, (SP) 6127 JL match_dst_size_check_encodeBetterBlockAsm4MB 6128 MOVQ $0x00000000, ret+48(FP) 6129 RET 6130 6131match_dst_size_check_encodeBetterBlockAsm4MB: 6132 MOVL CX, DI 6133 ADDL $0x04, CX 6134 ADDL $0x04, SI 6135 MOVQ src_len+32(FP), R8 6136 SUBL CX, R8 6137 LEAQ (DX)(CX*1), R9 6138 LEAQ (DX)(SI*1), R10 6139 6140 // matchLen 6141 XORL R12, R12 6142 CMPL R8, $0x08 6143 JL matchlen_single_match_nolit_encodeBetterBlockAsm4MB 6144 6145matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: 6146 MOVQ (R9)(R12*1), R11 6147 XORQ (R10)(R12*1), R11 6148 TESTQ R11, R11 6149 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB 6150 BSFQ R11, R11 6151 SARQ $0x03, R11 6152 LEAL (R12)(R11*1), R12 6153 JMP match_nolit_end_encodeBetterBlockAsm4MB 6154 6155matchlen_loop_match_nolit_encodeBetterBlockAsm4MB: 6156 LEAL -8(R8), R8 6157 LEAL 8(R12), R12 6158 CMPL R8, $0x08 6159 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB 6160 6161matchlen_single_match_nolit_encodeBetterBlockAsm4MB: 6162 TESTL R8, R8 6163 JZ match_nolit_end_encodeBetterBlockAsm4MB 6164 6165matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB: 6166 MOVB (R9)(R12*1), R11 6167 CMPB (R10)(R12*1), R11 6168 JNE match_nolit_end_encodeBetterBlockAsm4MB 6169 LEAL 1(R12), R12 6170 DECL R8 6171 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB 6172 6173match_nolit_end_encodeBetterBlockAsm4MB: 6174 MOVL CX, R8 6175 SUBL SI, R8 6176 6177 // Check if repeat 6178 CMPL 16(SP), R8 6179 JEQ match_is_repeat_encodeBetterBlockAsm4MB 6180 CMPL R12, $0x01 6181 JG match_length_ok_encodeBetterBlockAsm4MB 6182 CMPL R8, $0x0000ffff 6183 JLE match_length_ok_encodeBetterBlockAsm4MB 6184 MOVL 20(SP), CX 6185 INCL CX 6186 JMP search_loop_encodeBetterBlockAsm4MB 6187 6188match_length_ok_encodeBetterBlockAsm4MB: 6189 MOVL R8, 16(SP) 6190 MOVL 12(SP), SI 6191 CMPL SI, DI 6192 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB 6193 MOVL DI, R9 6194 MOVL DI, 12(SP) 6195 LEAQ (DX)(SI*1), R10 6196 SUBL SI, R9 6197 LEAL -1(R9), SI 6198 CMPL SI, $0x3c 6199 JLT one_byte_match_emit_encodeBetterBlockAsm4MB 6200 CMPL SI, $0x00000100 6201 JLT two_bytes_match_emit_encodeBetterBlockAsm4MB 6202 CMPL SI, $0x00010000 6203 JLT three_bytes_match_emit_encodeBetterBlockAsm4MB 6204 MOVL SI, R11 6205 SHRL $0x10, R11 6206 MOVB $0xf8, (AX) 6207 MOVW SI, 1(AX) 6208 MOVB R11, 3(AX) 6209 ADDQ $0x04, AX 6210 JMP memmove_long_match_emit_encodeBetterBlockAsm4MB 6211 6212three_bytes_match_emit_encodeBetterBlockAsm4MB: 6213 MOVB $0xf4, (AX) 6214 MOVW SI, 1(AX) 6215 ADDQ $0x03, AX 6216 JMP memmove_long_match_emit_encodeBetterBlockAsm4MB 6217 6218two_bytes_match_emit_encodeBetterBlockAsm4MB: 6219 MOVB $0xf0, (AX) 6220 MOVB SI, 1(AX) 6221 ADDQ $0x02, AX 6222 CMPL SI, $0x40 6223 JL memmove_match_emit_encodeBetterBlockAsm4MB 6224 JMP memmove_long_match_emit_encodeBetterBlockAsm4MB 6225 6226one_byte_match_emit_encodeBetterBlockAsm4MB: 6227 SHLB $0x02, SI 6228 MOVB SI, (AX) 6229 ADDQ $0x01, AX 6230 6231memmove_match_emit_encodeBetterBlockAsm4MB: 6232 LEAQ (AX)(R9*1), SI 6233 6234 // genMemMoveShort 6235 CMPQ R9, $0x03 6236 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_1or2 6237 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_3 6238 CMPQ R9, $0x08 6239 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 6240 CMPQ R9, $0x10 6241 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 6242 CMPQ R9, $0x20 6243 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 6244 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 6245 6246emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_1or2: 6247 MOVB (R10), R11 6248 MOVB -1(R10)(R9*1), R10 6249 MOVB R11, (AX) 6250 MOVB R10, -1(AX)(R9*1) 6251 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB 6252 6253emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_3: 6254 MOVW (R10), R11 6255 MOVB 2(R10), R10 6256 MOVW R11, (AX) 6257 MOVB R10, 2(AX) 6258 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB 6259 6260emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: 6261 MOVL (R10), R11 6262 MOVL -4(R10)(R9*1), R10 6263 MOVL R11, (AX) 6264 MOVL R10, -4(AX)(R9*1) 6265 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB 6266 6267emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: 6268 MOVQ (R10), R11 6269 MOVQ -8(R10)(R9*1), R10 6270 MOVQ R11, (AX) 6271 MOVQ R10, -8(AX)(R9*1) 6272 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB 6273 6274emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: 6275 MOVOU (R10), X0 6276 MOVOU -16(R10)(R9*1), X1 6277 MOVOU X0, (AX) 6278 MOVOU X1, -16(AX)(R9*1) 6279 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB 6280 6281emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: 6282 MOVOU (R10), X0 6283 MOVOU 16(R10), X1 6284 MOVOU -32(R10)(R9*1), X2 6285 MOVOU -16(R10)(R9*1), X3 6286 MOVOU X0, (AX) 6287 MOVOU X1, 16(AX) 6288 MOVOU X2, -32(AX)(R9*1) 6289 MOVOU X3, -16(AX)(R9*1) 6290 6291memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: 6292 MOVQ SI, AX 6293 JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB 6294 6295memmove_long_match_emit_encodeBetterBlockAsm4MB: 6296 LEAQ (AX)(R9*1), SI 6297 6298 // genMemMoveLong 6299 MOVOU (R10), X0 6300 MOVOU 16(R10), X1 6301 MOVOU -32(R10)(R9*1), X2 6302 MOVOU -16(R10)(R9*1), X3 6303 MOVQ R9, R13 6304 SHRQ $0x05, R13 6305 MOVQ AX, R11 6306 ANDL $0x0000001f, R11 6307 MOVQ $0x00000040, R14 6308 SUBQ R11, R14 6309 DECQ R13 6310 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6311 LEAQ -32(R10)(R14*1), R11 6312 LEAQ -32(AX)(R14*1), R15 6313 6314emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: 6315 MOVOU (R11), X4 6316 MOVOU 16(R11), X5 6317 MOVOA X4, (R15) 6318 MOVOA X5, 16(R15) 6319 ADDQ $0x20, R15 6320 ADDQ $0x20, R11 6321 ADDQ $0x20, R14 6322 DECQ R13 6323 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back 6324 6325emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: 6326 MOVOU -32(R10)(R14*1), X4 6327 MOVOU -16(R10)(R14*1), X5 6328 MOVOA X4, -32(AX)(R14*1) 6329 MOVOA X5, -16(AX)(R14*1) 6330 ADDQ $0x20, R14 6331 CMPQ R9, R14 6332 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6333 MOVOU X0, (AX) 6334 MOVOU X1, 16(AX) 6335 MOVOU X2, -32(AX)(R9*1) 6336 MOVOU X3, -16(AX)(R9*1) 6337 MOVQ SI, AX 6338 6339emit_literal_done_match_emit_encodeBetterBlockAsm4MB: 6340 ADDL R12, CX 6341 ADDL $0x04, R12 6342 MOVL CX, 12(SP) 6343 6344 // emitCopy 6345 CMPL R8, $0x00010000 6346 JL two_byte_offset_match_nolit_encodeBetterBlockAsm4MB 6347 6348four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB: 6349 CMPL R12, $0x40 6350 JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB 6351 MOVB $0xff, (AX) 6352 MOVL R8, 1(AX) 6353 LEAL -64(R12), R12 6354 ADDQ $0x05, AX 6355 CMPL R12, $0x04 6356 JL four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB 6357 6358 // emitRepeat 6359 MOVL R12, SI 6360 LEAL -4(R12), R12 6361 CMPL SI, $0x08 6362 JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6363 CMPL SI, $0x0c 6364 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6365 CMPL R8, $0x00000800 6366 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6367 6368cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6369 CMPL R12, $0x00000104 6370 JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6371 CMPL R12, $0x00010100 6372 JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy 6373 LEAL -65536(R12), R12 6374 MOVL R12, R8 6375 MOVW $0x001d, (AX) 6376 MOVW R12, 2(AX) 6377 SARL $0x10, R8 6378 MOVB R8, 4(AX) 6379 ADDQ $0x05, AX 6380 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6381 6382repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6383 LEAL -256(R12), R12 6384 MOVW $0x0019, (AX) 6385 MOVW R12, 2(AX) 6386 ADDQ $0x04, AX 6387 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6388 6389repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6390 LEAL -4(R12), R12 6391 MOVW $0x0015, (AX) 6392 MOVB R12, 2(AX) 6393 ADDQ $0x03, AX 6394 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6395 6396repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6397 SHLL $0x02, R12 6398 ORL $0x01, R12 6399 MOVW R12, (AX) 6400 ADDQ $0x02, AX 6401 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6402 6403repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: 6404 XORQ SI, SI 6405 LEAL 1(SI)(R12*4), R12 6406 MOVB R8, 1(AX) 6407 SARL $0x08, R8 6408 SHLL $0x05, R8 6409 ORL R8, R12 6410 MOVB R12, (AX) 6411 ADDQ $0x02, AX 6412 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6413 JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB 6414 6415four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: 6416 TESTL R12, R12 6417 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6418 MOVB $0x03, BL 6419 LEAL -4(BX)(R12*4), R12 6420 MOVB R12, (AX) 6421 MOVL R8, 1(AX) 6422 ADDQ $0x05, AX 6423 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6424 6425two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: 6426 CMPL R12, $0x40 6427 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB 6428 MOVB $0xee, (AX) 6429 MOVW R8, 1(AX) 6430 LEAL -60(R12), R12 6431 ADDQ $0x03, AX 6432 6433 // emitRepeat 6434 MOVL R12, SI 6435 LEAL -4(R12), R12 6436 CMPL SI, $0x08 6437 JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6438 CMPL SI, $0x0c 6439 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6440 CMPL R8, $0x00000800 6441 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6442 6443cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6444 CMPL R12, $0x00000104 6445 JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6446 CMPL R12, $0x00010100 6447 JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short 6448 LEAL -65536(R12), R12 6449 MOVL R12, R8 6450 MOVW $0x001d, (AX) 6451 MOVW R12, 2(AX) 6452 SARL $0x10, R8 6453 MOVB R8, 4(AX) 6454 ADDQ $0x05, AX 6455 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6456 6457repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6458 LEAL -256(R12), R12 6459 MOVW $0x0019, (AX) 6460 MOVW R12, 2(AX) 6461 ADDQ $0x04, AX 6462 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6463 6464repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6465 LEAL -4(R12), R12 6466 MOVW $0x0015, (AX) 6467 MOVB R12, 2(AX) 6468 ADDQ $0x03, AX 6469 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6470 6471repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6472 SHLL $0x02, R12 6473 ORL $0x01, R12 6474 MOVW R12, (AX) 6475 ADDQ $0x02, AX 6476 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6477 6478repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: 6479 XORQ SI, SI 6480 LEAL 1(SI)(R12*4), R12 6481 MOVB R8, 1(AX) 6482 SARL $0x08, R8 6483 SHLL $0x05, R8 6484 ORL R8, R12 6485 MOVB R12, (AX) 6486 ADDQ $0x02, AX 6487 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6488 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm4MB 6489 6490two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: 6491 CMPL R12, $0x0c 6492 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB 6493 CMPL R8, $0x00000800 6494 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB 6495 MOVB $0x01, BL 6496 LEAL -16(BX)(R12*4), R12 6497 MOVB R8, 1(AX) 6498 SHRL $0x08, R8 6499 SHLL $0x05, R8 6500 ORL R8, R12 6501 MOVB R12, (AX) 6502 ADDQ $0x02, AX 6503 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6504 6505emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: 6506 MOVB $0x02, BL 6507 LEAL -4(BX)(R12*4), R12 6508 MOVB R12, (AX) 6509 MOVW R8, 1(AX) 6510 ADDQ $0x03, AX 6511 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6512 6513match_is_repeat_encodeBetterBlockAsm4MB: 6514 MOVL 12(SP), SI 6515 CMPL SI, DI 6516 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB 6517 MOVL DI, R9 6518 MOVL DI, 12(SP) 6519 LEAQ (DX)(SI*1), R10 6520 SUBL SI, R9 6521 LEAL -1(R9), SI 6522 CMPL SI, $0x3c 6523 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm4MB 6524 CMPL SI, $0x00000100 6525 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB 6526 CMPL SI, $0x00010000 6527 JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB 6528 MOVL SI, R11 6529 SHRL $0x10, R11 6530 MOVB $0xf8, (AX) 6531 MOVW SI, 1(AX) 6532 MOVB R11, 3(AX) 6533 ADDQ $0x04, AX 6534 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB 6535 6536three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: 6537 MOVB $0xf4, (AX) 6538 MOVW SI, 1(AX) 6539 ADDQ $0x03, AX 6540 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB 6541 6542two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: 6543 MOVB $0xf0, (AX) 6544 MOVB SI, 1(AX) 6545 ADDQ $0x02, AX 6546 CMPL SI, $0x40 6547 JL memmove_match_emit_repeat_encodeBetterBlockAsm4MB 6548 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB 6549 6550one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: 6551 SHLB $0x02, SI 6552 MOVB SI, (AX) 6553 ADDQ $0x01, AX 6554 6555memmove_match_emit_repeat_encodeBetterBlockAsm4MB: 6556 LEAQ (AX)(R9*1), SI 6557 6558 // genMemMoveShort 6559 CMPQ R9, $0x03 6560 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_1or2 6561 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_3 6562 CMPQ R9, $0x08 6563 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 6564 CMPQ R9, $0x10 6565 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 6566 CMPQ R9, $0x20 6567 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 6568 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 6569 6570emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_1or2: 6571 MOVB (R10), R11 6572 MOVB -1(R10)(R9*1), R10 6573 MOVB R11, (AX) 6574 MOVB R10, -1(AX)(R9*1) 6575 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB 6576 6577emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_3: 6578 MOVW (R10), R11 6579 MOVB 2(R10), R10 6580 MOVW R11, (AX) 6581 MOVB R10, 2(AX) 6582 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB 6583 6584emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: 6585 MOVL (R10), R11 6586 MOVL -4(R10)(R9*1), R10 6587 MOVL R11, (AX) 6588 MOVL R10, -4(AX)(R9*1) 6589 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB 6590 6591emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: 6592 MOVQ (R10), R11 6593 MOVQ -8(R10)(R9*1), R10 6594 MOVQ R11, (AX) 6595 MOVQ R10, -8(AX)(R9*1) 6596 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB 6597 6598emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: 6599 MOVOU (R10), X0 6600 MOVOU -16(R10)(R9*1), X1 6601 MOVOU X0, (AX) 6602 MOVOU X1, -16(AX)(R9*1) 6603 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB 6604 6605emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: 6606 MOVOU (R10), X0 6607 MOVOU 16(R10), X1 6608 MOVOU -32(R10)(R9*1), X2 6609 MOVOU -16(R10)(R9*1), X3 6610 MOVOU X0, (AX) 6611 MOVOU X1, 16(AX) 6612 MOVOU X2, -32(AX)(R9*1) 6613 MOVOU X3, -16(AX)(R9*1) 6614 6615memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: 6616 MOVQ SI, AX 6617 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB 6618 6619memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: 6620 LEAQ (AX)(R9*1), SI 6621 6622 // genMemMoveLong 6623 MOVOU (R10), X0 6624 MOVOU 16(R10), X1 6625 MOVOU -32(R10)(R9*1), X2 6626 MOVOU -16(R10)(R9*1), X3 6627 MOVQ R9, R13 6628 SHRQ $0x05, R13 6629 MOVQ AX, R11 6630 ANDL $0x0000001f, R11 6631 MOVQ $0x00000040, R14 6632 SUBQ R11, R14 6633 DECQ R13 6634 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6635 LEAQ -32(R10)(R14*1), R11 6636 LEAQ -32(AX)(R14*1), R15 6637 6638emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: 6639 MOVOU (R11), X4 6640 MOVOU 16(R11), X5 6641 MOVOA X4, (R15) 6642 MOVOA X5, 16(R15) 6643 ADDQ $0x20, R15 6644 ADDQ $0x20, R11 6645 ADDQ $0x20, R14 6646 DECQ R13 6647 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back 6648 6649emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: 6650 MOVOU -32(R10)(R14*1), X4 6651 MOVOU -16(R10)(R14*1), X5 6652 MOVOA X4, -32(AX)(R14*1) 6653 MOVOA X5, -16(AX)(R14*1) 6654 ADDQ $0x20, R14 6655 CMPQ R9, R14 6656 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6657 MOVOU X0, (AX) 6658 MOVOU X1, 16(AX) 6659 MOVOU X2, -32(AX)(R9*1) 6660 MOVOU X3, -16(AX)(R9*1) 6661 MOVQ SI, AX 6662 6663emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: 6664 ADDL R12, CX 6665 ADDL $0x04, R12 6666 MOVL CX, 12(SP) 6667 6668 // emitRepeat 6669 MOVL R12, SI 6670 LEAL -4(R12), R12 6671 CMPL SI, $0x08 6672 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB 6673 CMPL SI, $0x0c 6674 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB 6675 CMPL R8, $0x00000800 6676 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB 6677 6678cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: 6679 CMPL R12, $0x00000104 6680 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB 6681 CMPL R12, $0x00010100 6682 JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB 6683 LEAL -65536(R12), R12 6684 MOVL R12, R8 6685 MOVW $0x001d, (AX) 6686 MOVW R12, 2(AX) 6687 SARL $0x10, R8 6688 MOVB R8, 4(AX) 6689 ADDQ $0x05, AX 6690 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6691 6692repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: 6693 LEAL -256(R12), R12 6694 MOVW $0x0019, (AX) 6695 MOVW R12, 2(AX) 6696 ADDQ $0x04, AX 6697 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6698 6699repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: 6700 LEAL -4(R12), R12 6701 MOVW $0x0015, (AX) 6702 MOVB R12, 2(AX) 6703 ADDQ $0x03, AX 6704 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6705 6706repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: 6707 SHLL $0x02, R12 6708 ORL $0x01, R12 6709 MOVW R12, (AX) 6710 ADDQ $0x02, AX 6711 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB 6712 6713repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: 6714 XORQ SI, SI 6715 LEAL 1(SI)(R12*4), R12 6716 MOVB R8, 1(AX) 6717 SARL $0x08, R8 6718 SHLL $0x05, R8 6719 ORL R8, R12 6720 MOVB R12, (AX) 6721 ADDQ $0x02, AX 6722 6723match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: 6724 CMPL CX, 8(SP) 6725 JGE emit_remainder_encodeBetterBlockAsm4MB 6726 CMPQ AX, (SP) 6727 JL match_nolit_dst_ok_encodeBetterBlockAsm4MB 6728 MOVQ $0x00000000, ret+48(FP) 6729 RET 6730 6731match_nolit_dst_ok_encodeBetterBlockAsm4MB: 6732 MOVQ $0x00cf1bbcdcbfa563, SI 6733 MOVQ $0x9e3779b1, R8 6734 INCL DI 6735 MOVQ (DX)(DI*1), R9 6736 MOVQ R9, R10 6737 MOVQ R9, R11 6738 MOVQ R9, R12 6739 SHRQ $0x08, R11 6740 MOVQ R11, R13 6741 SHRQ $0x10, R12 6742 LEAL 1(DI), R14 6743 LEAL 2(DI), R15 6744 MOVQ -2(DX)(CX*1), R9 6745 SHLQ $0x08, R10 6746 IMULQ SI, R10 6747 SHRQ $0x30, R10 6748 SHLQ $0x08, R13 6749 IMULQ SI, R13 6750 SHRQ $0x30, R13 6751 SHLQ $0x20, R11 6752 IMULQ R8, R11 6753 SHRQ $0x32, R11 6754 SHLQ $0x20, R12 6755 IMULQ R8, R12 6756 SHRQ $0x32, R12 6757 MOVL DI, 24(SP)(R10*4) 6758 MOVL R14, 24(SP)(R13*4) 6759 MOVL R14, 262168(SP)(R11*4) 6760 MOVL R15, 262168(SP)(R12*4) 6761 MOVQ R9, R10 6762 MOVQ R9, R11 6763 SHRQ $0x08, R11 6764 MOVQ R11, R13 6765 LEAL -2(CX), R9 6766 LEAL -1(CX), DI 6767 SHLQ $0x08, R10 6768 IMULQ SI, R10 6769 SHRQ $0x30, R10 6770 SHLQ $0x20, R11 6771 IMULQ R8, R11 6772 SHRQ $0x32, R11 6773 SHLQ $0x08, R13 6774 IMULQ SI, R13 6775 SHRQ $0x30, R13 6776 MOVL R9, 24(SP)(R10*4) 6777 MOVL DI, 262168(SP)(R11*4) 6778 MOVL DI, 24(SP)(R13*4) 6779 JMP search_loop_encodeBetterBlockAsm4MB 6780 6781emit_remainder_encodeBetterBlockAsm4MB: 6782 MOVQ src_len+32(FP), CX 6783 SUBL 12(SP), CX 6784 LEAQ 4(AX)(CX*1), CX 6785 CMPQ CX, (SP) 6786 JL emit_remainder_ok_encodeBetterBlockAsm4MB 6787 MOVQ $0x00000000, ret+48(FP) 6788 RET 6789 6790emit_remainder_ok_encodeBetterBlockAsm4MB: 6791 MOVQ src_len+32(FP), CX 6792 MOVL 12(SP), BX 6793 CMPL BX, CX 6794 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB 6795 MOVL CX, SI 6796 MOVL CX, 12(SP) 6797 LEAQ (DX)(BX*1), CX 6798 SUBL BX, SI 6799 LEAL -1(SI), DX 6800 CMPL DX, $0x3c 6801 JLT one_byte_emit_remainder_encodeBetterBlockAsm4MB 6802 CMPL DX, $0x00000100 6803 JLT two_bytes_emit_remainder_encodeBetterBlockAsm4MB 6804 CMPL DX, $0x00010000 6805 JLT three_bytes_emit_remainder_encodeBetterBlockAsm4MB 6806 MOVL DX, BX 6807 SHRL $0x10, BX 6808 MOVB $0xf8, (AX) 6809 MOVW DX, 1(AX) 6810 MOVB BL, 3(AX) 6811 ADDQ $0x04, AX 6812 JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB 6813 6814three_bytes_emit_remainder_encodeBetterBlockAsm4MB: 6815 MOVB $0xf4, (AX) 6816 MOVW DX, 1(AX) 6817 ADDQ $0x03, AX 6818 JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB 6819 6820two_bytes_emit_remainder_encodeBetterBlockAsm4MB: 6821 MOVB $0xf0, (AX) 6822 MOVB DL, 1(AX) 6823 ADDQ $0x02, AX 6824 CMPL DX, $0x40 6825 JL memmove_emit_remainder_encodeBetterBlockAsm4MB 6826 JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB 6827 6828one_byte_emit_remainder_encodeBetterBlockAsm4MB: 6829 SHLB $0x02, DL 6830 MOVB DL, (AX) 6831 ADDQ $0x01, AX 6832 6833memmove_emit_remainder_encodeBetterBlockAsm4MB: 6834 LEAQ (AX)(SI*1), DX 6835 MOVL SI, BX 6836 6837 // genMemMoveShort 6838 CMPQ BX, $0x03 6839 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2 6840 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3 6841 CMPQ BX, $0x08 6842 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7 6843 CMPQ BX, $0x10 6844 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16 6845 CMPQ BX, $0x20 6846 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32 6847 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 6848 6849emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2: 6850 MOVB (CX), SI 6851 MOVB -1(CX)(BX*1), CL 6852 MOVB SI, (AX) 6853 MOVB CL, -1(AX)(BX*1) 6854 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB 6855 6856emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3: 6857 MOVW (CX), SI 6858 MOVB 2(CX), CL 6859 MOVW SI, (AX) 6860 MOVB CL, 2(AX) 6861 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB 6862 6863emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: 6864 MOVL (CX), SI 6865 MOVL -4(CX)(BX*1), CX 6866 MOVL SI, (AX) 6867 MOVL CX, -4(AX)(BX*1) 6868 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB 6869 6870emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: 6871 MOVQ (CX), SI 6872 MOVQ -8(CX)(BX*1), CX 6873 MOVQ SI, (AX) 6874 MOVQ CX, -8(AX)(BX*1) 6875 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB 6876 6877emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: 6878 MOVOU (CX), X0 6879 MOVOU -16(CX)(BX*1), X1 6880 MOVOU X0, (AX) 6881 MOVOU X1, -16(AX)(BX*1) 6882 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB 6883 6884emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: 6885 MOVOU (CX), X0 6886 MOVOU 16(CX), X1 6887 MOVOU -32(CX)(BX*1), X2 6888 MOVOU -16(CX)(BX*1), X3 6889 MOVOU X0, (AX) 6890 MOVOU X1, 16(AX) 6891 MOVOU X2, -32(AX)(BX*1) 6892 MOVOU X3, -16(AX)(BX*1) 6893 6894memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: 6895 MOVQ DX, AX 6896 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB 6897 6898memmove_long_emit_remainder_encodeBetterBlockAsm4MB: 6899 LEAQ (AX)(SI*1), DX 6900 MOVL SI, BX 6901 6902 // genMemMoveLong 6903 MOVOU (CX), X0 6904 MOVOU 16(CX), X1 6905 MOVOU -32(CX)(BX*1), X2 6906 MOVOU -16(CX)(BX*1), X3 6907 MOVQ BX, DI 6908 SHRQ $0x05, DI 6909 MOVQ AX, SI 6910 ANDL $0x0000001f, SI 6911 MOVQ $0x00000040, R8 6912 SUBQ SI, R8 6913 DECQ DI 6914 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6915 LEAQ -32(CX)(R8*1), SI 6916 LEAQ -32(AX)(R8*1), R9 6917 6918emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: 6919 MOVOU (SI), X4 6920 MOVOU 16(SI), X5 6921 MOVOA X4, (R9) 6922 MOVOA X5, 16(R9) 6923 ADDQ $0x20, R9 6924 ADDQ $0x20, SI 6925 ADDQ $0x20, R8 6926 DECQ DI 6927 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back 6928 6929emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: 6930 MOVOU -32(CX)(R8*1), X4 6931 MOVOU -16(CX)(R8*1), X5 6932 MOVOA X4, -32(AX)(R8*1) 6933 MOVOA X5, -16(AX)(R8*1) 6934 ADDQ $0x20, R8 6935 CMPQ BX, R8 6936 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 6937 MOVOU X0, (AX) 6938 MOVOU X1, 16(AX) 6939 MOVOU X2, -32(AX)(BX*1) 6940 MOVOU X3, -16(AX)(BX*1) 6941 MOVQ DX, AX 6942 6943emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: 6944 MOVQ dst_base+0(FP), CX 6945 SUBQ CX, AX 6946 MOVQ AX, ret+48(FP) 6947 RET 6948 6949// func encodeBetterBlockAsm12B(dst []byte, src []byte) int 6950// Requires: SSE2 6951TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 6952 MOVQ dst_base+0(FP), AX 6953 MOVQ $0x00000280, CX 6954 LEAQ 24(SP), DX 6955 PXOR X0, X0 6956 6957zero_loop_encodeBetterBlockAsm12B: 6958 MOVOU X0, (DX) 6959 MOVOU X0, 16(DX) 6960 MOVOU X0, 32(DX) 6961 MOVOU X0, 48(DX) 6962 MOVOU X0, 64(DX) 6963 MOVOU X0, 80(DX) 6964 MOVOU X0, 96(DX) 6965 MOVOU X0, 112(DX) 6966 ADDQ $0x80, DX 6967 DECQ CX 6968 JNZ zero_loop_encodeBetterBlockAsm12B 6969 MOVL $0x00000000, 12(SP) 6970 MOVQ src_len+32(FP), CX 6971 LEAQ -6(CX), DX 6972 LEAQ -8(CX), SI 6973 MOVL SI, 8(SP) 6974 SHRQ $0x05, CX 6975 SUBL CX, DX 6976 LEAQ (AX)(DX*1), DX 6977 MOVQ DX, (SP) 6978 MOVL $0x00000001, CX 6979 MOVL $0x00000000, 16(SP) 6980 MOVQ src_base+24(FP), DX 6981 6982search_loop_encodeBetterBlockAsm12B: 6983 MOVL CX, SI 6984 SUBL 12(SP), SI 6985 SHRL $0x06, SI 6986 LEAL 1(CX)(SI*1), SI 6987 CMPL SI, 8(SP) 6988 JGE emit_remainder_encodeBetterBlockAsm12B 6989 MOVQ (DX)(CX*1), DI 6990 MOVL SI, 20(SP) 6991 MOVQ $0x0000cf1bbcdcbf9b, R9 6992 MOVQ $0x9e3779b1, SI 6993 MOVQ DI, R10 6994 MOVQ DI, R11 6995 SHLQ $0x10, R10 6996 IMULQ R9, R10 6997 SHRQ $0x32, R10 6998 SHLQ $0x20, R11 6999 IMULQ SI, R11 7000 SHRQ $0x34, R11 7001 MOVL 24(SP)(R10*4), SI 7002 MOVL 65560(SP)(R11*4), R8 7003 MOVL CX, 24(SP)(R10*4) 7004 MOVL CX, 65560(SP)(R11*4) 7005 CMPL (DX)(SI*1), DI 7006 JEQ candidate_match_encodeBetterBlockAsm12B 7007 CMPL (DX)(R8*1), DI 7008 JEQ candidateS_match_encodeBetterBlockAsm12B 7009 MOVL 20(SP), CX 7010 JMP search_loop_encodeBetterBlockAsm12B 7011 7012candidateS_match_encodeBetterBlockAsm12B: 7013 SHRQ $0x08, DI 7014 MOVQ DI, R10 7015 SHLQ $0x10, R10 7016 IMULQ R9, R10 7017 SHRQ $0x32, R10 7018 MOVL 24(SP)(R10*4), SI 7019 INCL CX 7020 MOVL CX, 24(SP)(R10*4) 7021 CMPL (DX)(SI*1), DI 7022 JEQ candidate_match_encodeBetterBlockAsm12B 7023 DECL CX 7024 MOVL R8, SI 7025 7026candidate_match_encodeBetterBlockAsm12B: 7027 MOVL 12(SP), DI 7028 TESTL SI, SI 7029 JZ match_extend_back_end_encodeBetterBlockAsm12B 7030 7031match_extend_back_loop_encodeBetterBlockAsm12B: 7032 CMPL CX, DI 7033 JLE match_extend_back_end_encodeBetterBlockAsm12B 7034 MOVB -1(DX)(SI*1), BL 7035 MOVB -1(DX)(CX*1), R8 7036 CMPB BL, R8 7037 JNE match_extend_back_end_encodeBetterBlockAsm12B 7038 LEAL -1(CX), CX 7039 DECL SI 7040 JZ match_extend_back_end_encodeBetterBlockAsm12B 7041 JMP match_extend_back_loop_encodeBetterBlockAsm12B 7042 7043match_extend_back_end_encodeBetterBlockAsm12B: 7044 MOVL CX, DI 7045 SUBL 12(SP), DI 7046 LEAQ 3(AX)(DI*1), DI 7047 CMPQ DI, (SP) 7048 JL match_dst_size_check_encodeBetterBlockAsm12B 7049 MOVQ $0x00000000, ret+48(FP) 7050 RET 7051 7052match_dst_size_check_encodeBetterBlockAsm12B: 7053 MOVL CX, DI 7054 ADDL $0x04, CX 7055 ADDL $0x04, SI 7056 MOVQ src_len+32(FP), R8 7057 SUBL CX, R8 7058 LEAQ (DX)(CX*1), R9 7059 LEAQ (DX)(SI*1), R10 7060 7061 // matchLen 7062 XORL R12, R12 7063 CMPL R8, $0x08 7064 JL matchlen_single_match_nolit_encodeBetterBlockAsm12B 7065 7066matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: 7067 MOVQ (R9)(R12*1), R11 7068 XORQ (R10)(R12*1), R11 7069 TESTQ R11, R11 7070 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B 7071 BSFQ R11, R11 7072 SARQ $0x03, R11 7073 LEAL (R12)(R11*1), R12 7074 JMP match_nolit_end_encodeBetterBlockAsm12B 7075 7076matchlen_loop_match_nolit_encodeBetterBlockAsm12B: 7077 LEAL -8(R8), R8 7078 LEAL 8(R12), R12 7079 CMPL R8, $0x08 7080 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B 7081 7082matchlen_single_match_nolit_encodeBetterBlockAsm12B: 7083 TESTL R8, R8 7084 JZ match_nolit_end_encodeBetterBlockAsm12B 7085 7086matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B: 7087 MOVB (R9)(R12*1), R11 7088 CMPB (R10)(R12*1), R11 7089 JNE match_nolit_end_encodeBetterBlockAsm12B 7090 LEAL 1(R12), R12 7091 DECL R8 7092 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B 7093 7094match_nolit_end_encodeBetterBlockAsm12B: 7095 MOVL CX, R8 7096 SUBL SI, R8 7097 7098 // Check if repeat 7099 CMPL 16(SP), R8 7100 JEQ match_is_repeat_encodeBetterBlockAsm12B 7101 MOVL R8, 16(SP) 7102 MOVL 12(SP), SI 7103 CMPL SI, DI 7104 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B 7105 MOVL DI, R9 7106 MOVL DI, 12(SP) 7107 LEAQ (DX)(SI*1), R10 7108 SUBL SI, R9 7109 LEAL -1(R9), SI 7110 CMPL SI, $0x3c 7111 JLT one_byte_match_emit_encodeBetterBlockAsm12B 7112 CMPL SI, $0x00000100 7113 JLT two_bytes_match_emit_encodeBetterBlockAsm12B 7114 MOVB $0xf4, (AX) 7115 MOVW SI, 1(AX) 7116 ADDQ $0x03, AX 7117 JMP memmove_long_match_emit_encodeBetterBlockAsm12B 7118 7119two_bytes_match_emit_encodeBetterBlockAsm12B: 7120 MOVB $0xf0, (AX) 7121 MOVB SI, 1(AX) 7122 ADDQ $0x02, AX 7123 CMPL SI, $0x40 7124 JL memmove_match_emit_encodeBetterBlockAsm12B 7125 JMP memmove_long_match_emit_encodeBetterBlockAsm12B 7126 7127one_byte_match_emit_encodeBetterBlockAsm12B: 7128 SHLB $0x02, SI 7129 MOVB SI, (AX) 7130 ADDQ $0x01, AX 7131 7132memmove_match_emit_encodeBetterBlockAsm12B: 7133 LEAQ (AX)(R9*1), SI 7134 7135 // genMemMoveShort 7136 CMPQ R9, $0x03 7137 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_1or2 7138 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_3 7139 CMPQ R9, $0x08 7140 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 7141 CMPQ R9, $0x10 7142 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 7143 CMPQ R9, $0x20 7144 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 7145 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 7146 7147emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_1or2: 7148 MOVB (R10), R11 7149 MOVB -1(R10)(R9*1), R10 7150 MOVB R11, (AX) 7151 MOVB R10, -1(AX)(R9*1) 7152 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B 7153 7154emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_3: 7155 MOVW (R10), R11 7156 MOVB 2(R10), R10 7157 MOVW R11, (AX) 7158 MOVB R10, 2(AX) 7159 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B 7160 7161emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: 7162 MOVL (R10), R11 7163 MOVL -4(R10)(R9*1), R10 7164 MOVL R11, (AX) 7165 MOVL R10, -4(AX)(R9*1) 7166 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B 7167 7168emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: 7169 MOVQ (R10), R11 7170 MOVQ -8(R10)(R9*1), R10 7171 MOVQ R11, (AX) 7172 MOVQ R10, -8(AX)(R9*1) 7173 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B 7174 7175emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: 7176 MOVOU (R10), X0 7177 MOVOU -16(R10)(R9*1), X1 7178 MOVOU X0, (AX) 7179 MOVOU X1, -16(AX)(R9*1) 7180 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B 7181 7182emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: 7183 MOVOU (R10), X0 7184 MOVOU 16(R10), X1 7185 MOVOU -32(R10)(R9*1), X2 7186 MOVOU -16(R10)(R9*1), X3 7187 MOVOU X0, (AX) 7188 MOVOU X1, 16(AX) 7189 MOVOU X2, -32(AX)(R9*1) 7190 MOVOU X3, -16(AX)(R9*1) 7191 7192memmove_end_copy_match_emit_encodeBetterBlockAsm12B: 7193 MOVQ SI, AX 7194 JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B 7195 7196memmove_long_match_emit_encodeBetterBlockAsm12B: 7197 LEAQ (AX)(R9*1), SI 7198 7199 // genMemMoveLong 7200 MOVOU (R10), X0 7201 MOVOU 16(R10), X1 7202 MOVOU -32(R10)(R9*1), X2 7203 MOVOU -16(R10)(R9*1), X3 7204 MOVQ R9, R13 7205 SHRQ $0x05, R13 7206 MOVQ AX, R11 7207 ANDL $0x0000001f, R11 7208 MOVQ $0x00000040, R14 7209 SUBQ R11, R14 7210 DECQ R13 7211 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7212 LEAQ -32(R10)(R14*1), R11 7213 LEAQ -32(AX)(R14*1), R15 7214 7215emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: 7216 MOVOU (R11), X4 7217 MOVOU 16(R11), X5 7218 MOVOA X4, (R15) 7219 MOVOA X5, 16(R15) 7220 ADDQ $0x20, R15 7221 ADDQ $0x20, R11 7222 ADDQ $0x20, R14 7223 DECQ R13 7224 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back 7225 7226emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: 7227 MOVOU -32(R10)(R14*1), X4 7228 MOVOU -16(R10)(R14*1), X5 7229 MOVOA X4, -32(AX)(R14*1) 7230 MOVOA X5, -16(AX)(R14*1) 7231 ADDQ $0x20, R14 7232 CMPQ R9, R14 7233 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7234 MOVOU X0, (AX) 7235 MOVOU X1, 16(AX) 7236 MOVOU X2, -32(AX)(R9*1) 7237 MOVOU X3, -16(AX)(R9*1) 7238 MOVQ SI, AX 7239 7240emit_literal_done_match_emit_encodeBetterBlockAsm12B: 7241 ADDL R12, CX 7242 ADDL $0x04, R12 7243 MOVL CX, 12(SP) 7244 7245 // emitCopy 7246two_byte_offset_match_nolit_encodeBetterBlockAsm12B: 7247 CMPL R12, $0x40 7248 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B 7249 MOVB $0xee, (AX) 7250 MOVW R8, 1(AX) 7251 LEAL -60(R12), R12 7252 ADDQ $0x03, AX 7253 7254 // emitRepeat 7255 MOVL R12, SI 7256 LEAL -4(R12), R12 7257 CMPL SI, $0x08 7258 JLE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short 7259 CMPL SI, $0x0c 7260 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short 7261 CMPL R8, $0x00000800 7262 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short 7263 7264cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: 7265 CMPL R12, $0x00000104 7266 JLT repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short 7267 LEAL -256(R12), R12 7268 MOVW $0x0019, (AX) 7269 MOVW R12, 2(AX) 7270 ADDQ $0x04, AX 7271 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7272 7273repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: 7274 LEAL -4(R12), R12 7275 MOVW $0x0015, (AX) 7276 MOVB R12, 2(AX) 7277 ADDQ $0x03, AX 7278 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7279 7280repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: 7281 SHLL $0x02, R12 7282 ORL $0x01, R12 7283 MOVW R12, (AX) 7284 ADDQ $0x02, AX 7285 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7286 7287repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: 7288 XORQ SI, SI 7289 LEAL 1(SI)(R12*4), R12 7290 MOVB R8, 1(AX) 7291 SARL $0x08, R8 7292 SHLL $0x05, R8 7293 ORL R8, R12 7294 MOVB R12, (AX) 7295 ADDQ $0x02, AX 7296 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7297 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm12B 7298 7299two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: 7300 CMPL R12, $0x0c 7301 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B 7302 CMPL R8, $0x00000800 7303 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B 7304 MOVB $0x01, BL 7305 LEAL -16(BX)(R12*4), R12 7306 MOVB R8, 1(AX) 7307 SHRL $0x08, R8 7308 SHLL $0x05, R8 7309 ORL R8, R12 7310 MOVB R12, (AX) 7311 ADDQ $0x02, AX 7312 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7313 7314emit_copy_three_match_nolit_encodeBetterBlockAsm12B: 7315 MOVB $0x02, BL 7316 LEAL -4(BX)(R12*4), R12 7317 MOVB R12, (AX) 7318 MOVW R8, 1(AX) 7319 ADDQ $0x03, AX 7320 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7321 7322match_is_repeat_encodeBetterBlockAsm12B: 7323 MOVL 12(SP), SI 7324 CMPL SI, DI 7325 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B 7326 MOVL DI, R9 7327 MOVL DI, 12(SP) 7328 LEAQ (DX)(SI*1), R10 7329 SUBL SI, R9 7330 LEAL -1(R9), SI 7331 CMPL SI, $0x3c 7332 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm12B 7333 CMPL SI, $0x00000100 7334 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm12B 7335 MOVB $0xf4, (AX) 7336 MOVW SI, 1(AX) 7337 ADDQ $0x03, AX 7338 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B 7339 7340two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: 7341 MOVB $0xf0, (AX) 7342 MOVB SI, 1(AX) 7343 ADDQ $0x02, AX 7344 CMPL SI, $0x40 7345 JL memmove_match_emit_repeat_encodeBetterBlockAsm12B 7346 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B 7347 7348one_byte_match_emit_repeat_encodeBetterBlockAsm12B: 7349 SHLB $0x02, SI 7350 MOVB SI, (AX) 7351 ADDQ $0x01, AX 7352 7353memmove_match_emit_repeat_encodeBetterBlockAsm12B: 7354 LEAQ (AX)(R9*1), SI 7355 7356 // genMemMoveShort 7357 CMPQ R9, $0x03 7358 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_1or2 7359 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_3 7360 CMPQ R9, $0x08 7361 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 7362 CMPQ R9, $0x10 7363 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 7364 CMPQ R9, $0x20 7365 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 7366 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 7367 7368emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_1or2: 7369 MOVB (R10), R11 7370 MOVB -1(R10)(R9*1), R10 7371 MOVB R11, (AX) 7372 MOVB R10, -1(AX)(R9*1) 7373 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B 7374 7375emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_3: 7376 MOVW (R10), R11 7377 MOVB 2(R10), R10 7378 MOVW R11, (AX) 7379 MOVB R10, 2(AX) 7380 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B 7381 7382emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: 7383 MOVL (R10), R11 7384 MOVL -4(R10)(R9*1), R10 7385 MOVL R11, (AX) 7386 MOVL R10, -4(AX)(R9*1) 7387 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B 7388 7389emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: 7390 MOVQ (R10), R11 7391 MOVQ -8(R10)(R9*1), R10 7392 MOVQ R11, (AX) 7393 MOVQ R10, -8(AX)(R9*1) 7394 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B 7395 7396emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: 7397 MOVOU (R10), X0 7398 MOVOU -16(R10)(R9*1), X1 7399 MOVOU X0, (AX) 7400 MOVOU X1, -16(AX)(R9*1) 7401 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B 7402 7403emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: 7404 MOVOU (R10), X0 7405 MOVOU 16(R10), X1 7406 MOVOU -32(R10)(R9*1), X2 7407 MOVOU -16(R10)(R9*1), X3 7408 MOVOU X0, (AX) 7409 MOVOU X1, 16(AX) 7410 MOVOU X2, -32(AX)(R9*1) 7411 MOVOU X3, -16(AX)(R9*1) 7412 7413memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: 7414 MOVQ SI, AX 7415 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B 7416 7417memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: 7418 LEAQ (AX)(R9*1), SI 7419 7420 // genMemMoveLong 7421 MOVOU (R10), X0 7422 MOVOU 16(R10), X1 7423 MOVOU -32(R10)(R9*1), X2 7424 MOVOU -16(R10)(R9*1), X3 7425 MOVQ R9, R13 7426 SHRQ $0x05, R13 7427 MOVQ AX, R11 7428 ANDL $0x0000001f, R11 7429 MOVQ $0x00000040, R14 7430 SUBQ R11, R14 7431 DECQ R13 7432 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7433 LEAQ -32(R10)(R14*1), R11 7434 LEAQ -32(AX)(R14*1), R15 7435 7436emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: 7437 MOVOU (R11), X4 7438 MOVOU 16(R11), X5 7439 MOVOA X4, (R15) 7440 MOVOA X5, 16(R15) 7441 ADDQ $0x20, R15 7442 ADDQ $0x20, R11 7443 ADDQ $0x20, R14 7444 DECQ R13 7445 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back 7446 7447emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: 7448 MOVOU -32(R10)(R14*1), X4 7449 MOVOU -16(R10)(R14*1), X5 7450 MOVOA X4, -32(AX)(R14*1) 7451 MOVOA X5, -16(AX)(R14*1) 7452 ADDQ $0x20, R14 7453 CMPQ R9, R14 7454 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7455 MOVOU X0, (AX) 7456 MOVOU X1, 16(AX) 7457 MOVOU X2, -32(AX)(R9*1) 7458 MOVOU X3, -16(AX)(R9*1) 7459 MOVQ SI, AX 7460 7461emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: 7462 ADDL R12, CX 7463 ADDL $0x04, R12 7464 MOVL CX, 12(SP) 7465 7466 // emitRepeat 7467 MOVL R12, SI 7468 LEAL -4(R12), R12 7469 CMPL SI, $0x08 7470 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B 7471 CMPL SI, $0x0c 7472 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B 7473 CMPL R8, $0x00000800 7474 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B 7475 7476cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: 7477 CMPL R12, $0x00000104 7478 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B 7479 LEAL -256(R12), R12 7480 MOVW $0x0019, (AX) 7481 MOVW R12, 2(AX) 7482 ADDQ $0x04, AX 7483 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7484 7485repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: 7486 LEAL -4(R12), R12 7487 MOVW $0x0015, (AX) 7488 MOVB R12, 2(AX) 7489 ADDQ $0x03, AX 7490 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7491 7492repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: 7493 SHLL $0x02, R12 7494 ORL $0x01, R12 7495 MOVW R12, (AX) 7496 ADDQ $0x02, AX 7497 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B 7498 7499repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: 7500 XORQ SI, SI 7501 LEAL 1(SI)(R12*4), R12 7502 MOVB R8, 1(AX) 7503 SARL $0x08, R8 7504 SHLL $0x05, R8 7505 ORL R8, R12 7506 MOVB R12, (AX) 7507 ADDQ $0x02, AX 7508 7509match_nolit_emitcopy_end_encodeBetterBlockAsm12B: 7510 CMPL CX, 8(SP) 7511 JGE emit_remainder_encodeBetterBlockAsm12B 7512 CMPQ AX, (SP) 7513 JL match_nolit_dst_ok_encodeBetterBlockAsm12B 7514 MOVQ $0x00000000, ret+48(FP) 7515 RET 7516 7517match_nolit_dst_ok_encodeBetterBlockAsm12B: 7518 MOVQ $0x0000cf1bbcdcbf9b, SI 7519 MOVQ $0x9e3779b1, R8 7520 INCL DI 7521 MOVQ (DX)(DI*1), R9 7522 MOVQ R9, R10 7523 MOVQ R9, R11 7524 MOVQ R9, R12 7525 SHRQ $0x08, R11 7526 MOVQ R11, R13 7527 SHRQ $0x10, R12 7528 LEAL 1(DI), R14 7529 LEAL 2(DI), R15 7530 MOVQ -2(DX)(CX*1), R9 7531 SHLQ $0x10, R10 7532 IMULQ SI, R10 7533 SHRQ $0x32, R10 7534 SHLQ $0x10, R13 7535 IMULQ SI, R13 7536 SHRQ $0x32, R13 7537 SHLQ $0x20, R11 7538 IMULQ R8, R11 7539 SHRQ $0x34, R11 7540 SHLQ $0x20, R12 7541 IMULQ R8, R12 7542 SHRQ $0x34, R12 7543 MOVL DI, 24(SP)(R10*4) 7544 MOVL R14, 24(SP)(R13*4) 7545 MOVL R14, 65560(SP)(R11*4) 7546 MOVL R15, 65560(SP)(R12*4) 7547 MOVQ R9, R10 7548 MOVQ R9, R11 7549 SHRQ $0x08, R11 7550 MOVQ R11, R13 7551 LEAL -2(CX), R9 7552 LEAL -1(CX), DI 7553 SHLQ $0x10, R10 7554 IMULQ SI, R10 7555 SHRQ $0x32, R10 7556 SHLQ $0x20, R11 7557 IMULQ R8, R11 7558 SHRQ $0x34, R11 7559 SHLQ $0x10, R13 7560 IMULQ SI, R13 7561 SHRQ $0x32, R13 7562 MOVL R9, 24(SP)(R10*4) 7563 MOVL DI, 65560(SP)(R11*4) 7564 MOVL DI, 24(SP)(R13*4) 7565 JMP search_loop_encodeBetterBlockAsm12B 7566 7567emit_remainder_encodeBetterBlockAsm12B: 7568 MOVQ src_len+32(FP), CX 7569 SUBL 12(SP), CX 7570 LEAQ 3(AX)(CX*1), CX 7571 CMPQ CX, (SP) 7572 JL emit_remainder_ok_encodeBetterBlockAsm12B 7573 MOVQ $0x00000000, ret+48(FP) 7574 RET 7575 7576emit_remainder_ok_encodeBetterBlockAsm12B: 7577 MOVQ src_len+32(FP), CX 7578 MOVL 12(SP), BX 7579 CMPL BX, CX 7580 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B 7581 MOVL CX, SI 7582 MOVL CX, 12(SP) 7583 LEAQ (DX)(BX*1), CX 7584 SUBL BX, SI 7585 LEAL -1(SI), DX 7586 CMPL DX, $0x3c 7587 JLT one_byte_emit_remainder_encodeBetterBlockAsm12B 7588 CMPL DX, $0x00000100 7589 JLT two_bytes_emit_remainder_encodeBetterBlockAsm12B 7590 MOVB $0xf4, (AX) 7591 MOVW DX, 1(AX) 7592 ADDQ $0x03, AX 7593 JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B 7594 7595two_bytes_emit_remainder_encodeBetterBlockAsm12B: 7596 MOVB $0xf0, (AX) 7597 MOVB DL, 1(AX) 7598 ADDQ $0x02, AX 7599 CMPL DX, $0x40 7600 JL memmove_emit_remainder_encodeBetterBlockAsm12B 7601 JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B 7602 7603one_byte_emit_remainder_encodeBetterBlockAsm12B: 7604 SHLB $0x02, DL 7605 MOVB DL, (AX) 7606 ADDQ $0x01, AX 7607 7608memmove_emit_remainder_encodeBetterBlockAsm12B: 7609 LEAQ (AX)(SI*1), DX 7610 MOVL SI, BX 7611 7612 // genMemMoveShort 7613 CMPQ BX, $0x03 7614 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2 7615 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3 7616 CMPQ BX, $0x08 7617 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7 7618 CMPQ BX, $0x10 7619 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16 7620 CMPQ BX, $0x20 7621 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32 7622 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 7623 7624emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2: 7625 MOVB (CX), SI 7626 MOVB -1(CX)(BX*1), CL 7627 MOVB SI, (AX) 7628 MOVB CL, -1(AX)(BX*1) 7629 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B 7630 7631emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3: 7632 MOVW (CX), SI 7633 MOVB 2(CX), CL 7634 MOVW SI, (AX) 7635 MOVB CL, 2(AX) 7636 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B 7637 7638emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: 7639 MOVL (CX), SI 7640 MOVL -4(CX)(BX*1), CX 7641 MOVL SI, (AX) 7642 MOVL CX, -4(AX)(BX*1) 7643 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B 7644 7645emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: 7646 MOVQ (CX), SI 7647 MOVQ -8(CX)(BX*1), CX 7648 MOVQ SI, (AX) 7649 MOVQ CX, -8(AX)(BX*1) 7650 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B 7651 7652emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: 7653 MOVOU (CX), X0 7654 MOVOU -16(CX)(BX*1), X1 7655 MOVOU X0, (AX) 7656 MOVOU X1, -16(AX)(BX*1) 7657 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B 7658 7659emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: 7660 MOVOU (CX), X0 7661 MOVOU 16(CX), X1 7662 MOVOU -32(CX)(BX*1), X2 7663 MOVOU -16(CX)(BX*1), X3 7664 MOVOU X0, (AX) 7665 MOVOU X1, 16(AX) 7666 MOVOU X2, -32(AX)(BX*1) 7667 MOVOU X3, -16(AX)(BX*1) 7668 7669memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: 7670 MOVQ DX, AX 7671 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B 7672 7673memmove_long_emit_remainder_encodeBetterBlockAsm12B: 7674 LEAQ (AX)(SI*1), DX 7675 MOVL SI, BX 7676 7677 // genMemMoveLong 7678 MOVOU (CX), X0 7679 MOVOU 16(CX), X1 7680 MOVOU -32(CX)(BX*1), X2 7681 MOVOU -16(CX)(BX*1), X3 7682 MOVQ BX, DI 7683 SHRQ $0x05, DI 7684 MOVQ AX, SI 7685 ANDL $0x0000001f, SI 7686 MOVQ $0x00000040, R8 7687 SUBQ SI, R8 7688 DECQ DI 7689 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7690 LEAQ -32(CX)(R8*1), SI 7691 LEAQ -32(AX)(R8*1), R9 7692 7693emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: 7694 MOVOU (SI), X4 7695 MOVOU 16(SI), X5 7696 MOVOA X4, (R9) 7697 MOVOA X5, 16(R9) 7698 ADDQ $0x20, R9 7699 ADDQ $0x20, SI 7700 ADDQ $0x20, R8 7701 DECQ DI 7702 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back 7703 7704emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: 7705 MOVOU -32(CX)(R8*1), X4 7706 MOVOU -16(CX)(R8*1), X5 7707 MOVOA X4, -32(AX)(R8*1) 7708 MOVOA X5, -16(AX)(R8*1) 7709 ADDQ $0x20, R8 7710 CMPQ BX, R8 7711 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 7712 MOVOU X0, (AX) 7713 MOVOU X1, 16(AX) 7714 MOVOU X2, -32(AX)(BX*1) 7715 MOVOU X3, -16(AX)(BX*1) 7716 MOVQ DX, AX 7717 7718emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: 7719 MOVQ dst_base+0(FP), CX 7720 SUBQ CX, AX 7721 MOVQ AX, ret+48(FP) 7722 RET 7723 7724// func encodeBetterBlockAsm10B(dst []byte, src []byte) int 7725// Requires: SSE2 7726TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 7727 MOVQ dst_base+0(FP), AX 7728 MOVQ $0x000000a0, CX 7729 LEAQ 24(SP), DX 7730 PXOR X0, X0 7731 7732zero_loop_encodeBetterBlockAsm10B: 7733 MOVOU X0, (DX) 7734 MOVOU X0, 16(DX) 7735 MOVOU X0, 32(DX) 7736 MOVOU X0, 48(DX) 7737 MOVOU X0, 64(DX) 7738 MOVOU X0, 80(DX) 7739 MOVOU X0, 96(DX) 7740 MOVOU X0, 112(DX) 7741 ADDQ $0x80, DX 7742 DECQ CX 7743 JNZ zero_loop_encodeBetterBlockAsm10B 7744 MOVL $0x00000000, 12(SP) 7745 MOVQ src_len+32(FP), CX 7746 LEAQ -6(CX), DX 7747 LEAQ -8(CX), SI 7748 MOVL SI, 8(SP) 7749 SHRQ $0x05, CX 7750 SUBL CX, DX 7751 LEAQ (AX)(DX*1), DX 7752 MOVQ DX, (SP) 7753 MOVL $0x00000001, CX 7754 MOVL $0x00000000, 16(SP) 7755 MOVQ src_base+24(FP), DX 7756 7757search_loop_encodeBetterBlockAsm10B: 7758 MOVL CX, SI 7759 SUBL 12(SP), SI 7760 SHRL $0x05, SI 7761 LEAL 1(CX)(SI*1), SI 7762 CMPL SI, 8(SP) 7763 JGE emit_remainder_encodeBetterBlockAsm10B 7764 MOVQ (DX)(CX*1), DI 7765 MOVL SI, 20(SP) 7766 MOVQ $0x0000cf1bbcdcbf9b, R9 7767 MOVQ $0x9e3779b1, SI 7768 MOVQ DI, R10 7769 MOVQ DI, R11 7770 SHLQ $0x10, R10 7771 IMULQ R9, R10 7772 SHRQ $0x34, R10 7773 SHLQ $0x20, R11 7774 IMULQ SI, R11 7775 SHRQ $0x36, R11 7776 MOVL 24(SP)(R10*4), SI 7777 MOVL 16408(SP)(R11*4), R8 7778 MOVL CX, 24(SP)(R10*4) 7779 MOVL CX, 16408(SP)(R11*4) 7780 CMPL (DX)(SI*1), DI 7781 JEQ candidate_match_encodeBetterBlockAsm10B 7782 CMPL (DX)(R8*1), DI 7783 JEQ candidateS_match_encodeBetterBlockAsm10B 7784 MOVL 20(SP), CX 7785 JMP search_loop_encodeBetterBlockAsm10B 7786 7787candidateS_match_encodeBetterBlockAsm10B: 7788 SHRQ $0x08, DI 7789 MOVQ DI, R10 7790 SHLQ $0x10, R10 7791 IMULQ R9, R10 7792 SHRQ $0x34, R10 7793 MOVL 24(SP)(R10*4), SI 7794 INCL CX 7795 MOVL CX, 24(SP)(R10*4) 7796 CMPL (DX)(SI*1), DI 7797 JEQ candidate_match_encodeBetterBlockAsm10B 7798 DECL CX 7799 MOVL R8, SI 7800 7801candidate_match_encodeBetterBlockAsm10B: 7802 MOVL 12(SP), DI 7803 TESTL SI, SI 7804 JZ match_extend_back_end_encodeBetterBlockAsm10B 7805 7806match_extend_back_loop_encodeBetterBlockAsm10B: 7807 CMPL CX, DI 7808 JLE match_extend_back_end_encodeBetterBlockAsm10B 7809 MOVB -1(DX)(SI*1), BL 7810 MOVB -1(DX)(CX*1), R8 7811 CMPB BL, R8 7812 JNE match_extend_back_end_encodeBetterBlockAsm10B 7813 LEAL -1(CX), CX 7814 DECL SI 7815 JZ match_extend_back_end_encodeBetterBlockAsm10B 7816 JMP match_extend_back_loop_encodeBetterBlockAsm10B 7817 7818match_extend_back_end_encodeBetterBlockAsm10B: 7819 MOVL CX, DI 7820 SUBL 12(SP), DI 7821 LEAQ 3(AX)(DI*1), DI 7822 CMPQ DI, (SP) 7823 JL match_dst_size_check_encodeBetterBlockAsm10B 7824 MOVQ $0x00000000, ret+48(FP) 7825 RET 7826 7827match_dst_size_check_encodeBetterBlockAsm10B: 7828 MOVL CX, DI 7829 ADDL $0x04, CX 7830 ADDL $0x04, SI 7831 MOVQ src_len+32(FP), R8 7832 SUBL CX, R8 7833 LEAQ (DX)(CX*1), R9 7834 LEAQ (DX)(SI*1), R10 7835 7836 // matchLen 7837 XORL R12, R12 7838 CMPL R8, $0x08 7839 JL matchlen_single_match_nolit_encodeBetterBlockAsm10B 7840 7841matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: 7842 MOVQ (R9)(R12*1), R11 7843 XORQ (R10)(R12*1), R11 7844 TESTQ R11, R11 7845 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B 7846 BSFQ R11, R11 7847 SARQ $0x03, R11 7848 LEAL (R12)(R11*1), R12 7849 JMP match_nolit_end_encodeBetterBlockAsm10B 7850 7851matchlen_loop_match_nolit_encodeBetterBlockAsm10B: 7852 LEAL -8(R8), R8 7853 LEAL 8(R12), R12 7854 CMPL R8, $0x08 7855 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B 7856 7857matchlen_single_match_nolit_encodeBetterBlockAsm10B: 7858 TESTL R8, R8 7859 JZ match_nolit_end_encodeBetterBlockAsm10B 7860 7861matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B: 7862 MOVB (R9)(R12*1), R11 7863 CMPB (R10)(R12*1), R11 7864 JNE match_nolit_end_encodeBetterBlockAsm10B 7865 LEAL 1(R12), R12 7866 DECL R8 7867 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B 7868 7869match_nolit_end_encodeBetterBlockAsm10B: 7870 MOVL CX, R8 7871 SUBL SI, R8 7872 7873 // Check if repeat 7874 CMPL 16(SP), R8 7875 JEQ match_is_repeat_encodeBetterBlockAsm10B 7876 MOVL R8, 16(SP) 7877 MOVL 12(SP), SI 7878 CMPL SI, DI 7879 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B 7880 MOVL DI, R9 7881 MOVL DI, 12(SP) 7882 LEAQ (DX)(SI*1), R10 7883 SUBL SI, R9 7884 LEAL -1(R9), SI 7885 CMPL SI, $0x3c 7886 JLT one_byte_match_emit_encodeBetterBlockAsm10B 7887 CMPL SI, $0x00000100 7888 JLT two_bytes_match_emit_encodeBetterBlockAsm10B 7889 MOVB $0xf4, (AX) 7890 MOVW SI, 1(AX) 7891 ADDQ $0x03, AX 7892 JMP memmove_long_match_emit_encodeBetterBlockAsm10B 7893 7894two_bytes_match_emit_encodeBetterBlockAsm10B: 7895 MOVB $0xf0, (AX) 7896 MOVB SI, 1(AX) 7897 ADDQ $0x02, AX 7898 CMPL SI, $0x40 7899 JL memmove_match_emit_encodeBetterBlockAsm10B 7900 JMP memmove_long_match_emit_encodeBetterBlockAsm10B 7901 7902one_byte_match_emit_encodeBetterBlockAsm10B: 7903 SHLB $0x02, SI 7904 MOVB SI, (AX) 7905 ADDQ $0x01, AX 7906 7907memmove_match_emit_encodeBetterBlockAsm10B: 7908 LEAQ (AX)(R9*1), SI 7909 7910 // genMemMoveShort 7911 CMPQ R9, $0x03 7912 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_1or2 7913 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_3 7914 CMPQ R9, $0x08 7915 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 7916 CMPQ R9, $0x10 7917 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 7918 CMPQ R9, $0x20 7919 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 7920 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 7921 7922emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_1or2: 7923 MOVB (R10), R11 7924 MOVB -1(R10)(R9*1), R10 7925 MOVB R11, (AX) 7926 MOVB R10, -1(AX)(R9*1) 7927 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B 7928 7929emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_3: 7930 MOVW (R10), R11 7931 MOVB 2(R10), R10 7932 MOVW R11, (AX) 7933 MOVB R10, 2(AX) 7934 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B 7935 7936emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: 7937 MOVL (R10), R11 7938 MOVL -4(R10)(R9*1), R10 7939 MOVL R11, (AX) 7940 MOVL R10, -4(AX)(R9*1) 7941 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B 7942 7943emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: 7944 MOVQ (R10), R11 7945 MOVQ -8(R10)(R9*1), R10 7946 MOVQ R11, (AX) 7947 MOVQ R10, -8(AX)(R9*1) 7948 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B 7949 7950emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: 7951 MOVOU (R10), X0 7952 MOVOU -16(R10)(R9*1), X1 7953 MOVOU X0, (AX) 7954 MOVOU X1, -16(AX)(R9*1) 7955 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B 7956 7957emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: 7958 MOVOU (R10), X0 7959 MOVOU 16(R10), X1 7960 MOVOU -32(R10)(R9*1), X2 7961 MOVOU -16(R10)(R9*1), X3 7962 MOVOU X0, (AX) 7963 MOVOU X1, 16(AX) 7964 MOVOU X2, -32(AX)(R9*1) 7965 MOVOU X3, -16(AX)(R9*1) 7966 7967memmove_end_copy_match_emit_encodeBetterBlockAsm10B: 7968 MOVQ SI, AX 7969 JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B 7970 7971memmove_long_match_emit_encodeBetterBlockAsm10B: 7972 LEAQ (AX)(R9*1), SI 7973 7974 // genMemMoveLong 7975 MOVOU (R10), X0 7976 MOVOU 16(R10), X1 7977 MOVOU -32(R10)(R9*1), X2 7978 MOVOU -16(R10)(R9*1), X3 7979 MOVQ R9, R13 7980 SHRQ $0x05, R13 7981 MOVQ AX, R11 7982 ANDL $0x0000001f, R11 7983 MOVQ $0x00000040, R14 7984 SUBQ R11, R14 7985 DECQ R13 7986 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 7987 LEAQ -32(R10)(R14*1), R11 7988 LEAQ -32(AX)(R14*1), R15 7989 7990emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: 7991 MOVOU (R11), X4 7992 MOVOU 16(R11), X5 7993 MOVOA X4, (R15) 7994 MOVOA X5, 16(R15) 7995 ADDQ $0x20, R15 7996 ADDQ $0x20, R11 7997 ADDQ $0x20, R14 7998 DECQ R13 7999 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back 8000 8001emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: 8002 MOVOU -32(R10)(R14*1), X4 8003 MOVOU -16(R10)(R14*1), X5 8004 MOVOA X4, -32(AX)(R14*1) 8005 MOVOA X5, -16(AX)(R14*1) 8006 ADDQ $0x20, R14 8007 CMPQ R9, R14 8008 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 8009 MOVOU X0, (AX) 8010 MOVOU X1, 16(AX) 8011 MOVOU X2, -32(AX)(R9*1) 8012 MOVOU X3, -16(AX)(R9*1) 8013 MOVQ SI, AX 8014 8015emit_literal_done_match_emit_encodeBetterBlockAsm10B: 8016 ADDL R12, CX 8017 ADDL $0x04, R12 8018 MOVL CX, 12(SP) 8019 8020 // emitCopy 8021two_byte_offset_match_nolit_encodeBetterBlockAsm10B: 8022 CMPL R12, $0x40 8023 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B 8024 MOVB $0xee, (AX) 8025 MOVW R8, 1(AX) 8026 LEAL -60(R12), R12 8027 ADDQ $0x03, AX 8028 8029 // emitRepeat 8030 MOVL R12, SI 8031 LEAL -4(R12), R12 8032 CMPL SI, $0x08 8033 JLE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short 8034 CMPL SI, $0x0c 8035 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short 8036 CMPL R8, $0x00000800 8037 JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short 8038 8039cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: 8040 CMPL R12, $0x00000104 8041 JLT repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short 8042 LEAL -256(R12), R12 8043 MOVW $0x0019, (AX) 8044 MOVW R12, 2(AX) 8045 ADDQ $0x04, AX 8046 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 8047 8048repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: 8049 LEAL -4(R12), R12 8050 MOVW $0x0015, (AX) 8051 MOVB R12, 2(AX) 8052 ADDQ $0x03, AX 8053 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 8054 8055repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: 8056 SHLL $0x02, R12 8057 ORL $0x01, R12 8058 MOVW R12, (AX) 8059 ADDQ $0x02, AX 8060 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 8061 8062repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: 8063 XORQ SI, SI 8064 LEAL 1(SI)(R12*4), R12 8065 MOVB R8, 1(AX) 8066 SARL $0x08, R8 8067 SHLL $0x05, R8 8068 ORL R8, R12 8069 MOVB R12, (AX) 8070 ADDQ $0x02, AX 8071 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 8072 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm10B 8073 8074two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: 8075 CMPL R12, $0x0c 8076 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B 8077 CMPL R8, $0x00000800 8078 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B 8079 MOVB $0x01, BL 8080 LEAL -16(BX)(R12*4), R12 8081 MOVB R8, 1(AX) 8082 SHRL $0x08, R8 8083 SHLL $0x05, R8 8084 ORL R8, R12 8085 MOVB R12, (AX) 8086 ADDQ $0x02, AX 8087 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 8088 8089emit_copy_three_match_nolit_encodeBetterBlockAsm10B: 8090 MOVB $0x02, BL 8091 LEAL -4(BX)(R12*4), R12 8092 MOVB R12, (AX) 8093 MOVW R8, 1(AX) 8094 ADDQ $0x03, AX 8095 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 8096 8097match_is_repeat_encodeBetterBlockAsm10B: 8098 MOVL 12(SP), SI 8099 CMPL SI, DI 8100 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B 8101 MOVL DI, R9 8102 MOVL DI, 12(SP) 8103 LEAQ (DX)(SI*1), R10 8104 SUBL SI, R9 8105 LEAL -1(R9), SI 8106 CMPL SI, $0x3c 8107 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm10B 8108 CMPL SI, $0x00000100 8109 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm10B 8110 MOVB $0xf4, (AX) 8111 MOVW SI, 1(AX) 8112 ADDQ $0x03, AX 8113 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B 8114 8115two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: 8116 MOVB $0xf0, (AX) 8117 MOVB SI, 1(AX) 8118 ADDQ $0x02, AX 8119 CMPL SI, $0x40 8120 JL memmove_match_emit_repeat_encodeBetterBlockAsm10B 8121 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B 8122 8123one_byte_match_emit_repeat_encodeBetterBlockAsm10B: 8124 SHLB $0x02, SI 8125 MOVB SI, (AX) 8126 ADDQ $0x01, AX 8127 8128memmove_match_emit_repeat_encodeBetterBlockAsm10B: 8129 LEAQ (AX)(R9*1), SI 8130 8131 // genMemMoveShort 8132 CMPQ R9, $0x03 8133 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_1or2 8134 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_3 8135 CMPQ R9, $0x08 8136 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 8137 CMPQ R9, $0x10 8138 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 8139 CMPQ R9, $0x20 8140 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 8141 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 8142 8143emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_1or2: 8144 MOVB (R10), R11 8145 MOVB -1(R10)(R9*1), R10 8146 MOVB R11, (AX) 8147 MOVB R10, -1(AX)(R9*1) 8148 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B 8149 8150emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_3: 8151 MOVW (R10), R11 8152 MOVB 2(R10), R10 8153 MOVW R11, (AX) 8154 MOVB R10, 2(AX) 8155 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B 8156 8157emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: 8158 MOVL (R10), R11 8159 MOVL -4(R10)(R9*1), R10 8160 MOVL R11, (AX) 8161 MOVL R10, -4(AX)(R9*1) 8162 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B 8163 8164emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: 8165 MOVQ (R10), R11 8166 MOVQ -8(R10)(R9*1), R10 8167 MOVQ R11, (AX) 8168 MOVQ R10, -8(AX)(R9*1) 8169 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B 8170 8171emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: 8172 MOVOU (R10), X0 8173 MOVOU -16(R10)(R9*1), X1 8174 MOVOU X0, (AX) 8175 MOVOU X1, -16(AX)(R9*1) 8176 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B 8177 8178emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: 8179 MOVOU (R10), X0 8180 MOVOU 16(R10), X1 8181 MOVOU -32(R10)(R9*1), X2 8182 MOVOU -16(R10)(R9*1), X3 8183 MOVOU X0, (AX) 8184 MOVOU X1, 16(AX) 8185 MOVOU X2, -32(AX)(R9*1) 8186 MOVOU X3, -16(AX)(R9*1) 8187 8188memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: 8189 MOVQ SI, AX 8190 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B 8191 8192memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: 8193 LEAQ (AX)(R9*1), SI 8194 8195 // genMemMoveLong 8196 MOVOU (R10), X0 8197 MOVOU 16(R10), X1 8198 MOVOU -32(R10)(R9*1), X2 8199 MOVOU -16(R10)(R9*1), X3 8200 MOVQ R9, R13 8201 SHRQ $0x05, R13 8202 MOVQ AX, R11 8203 ANDL $0x0000001f, R11 8204 MOVQ $0x00000040, R14 8205 SUBQ R11, R14 8206 DECQ R13 8207 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 8208 LEAQ -32(R10)(R14*1), R11 8209 LEAQ -32(AX)(R14*1), R15 8210 8211emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: 8212 MOVOU (R11), X4 8213 MOVOU 16(R11), X5 8214 MOVOA X4, (R15) 8215 MOVOA X5, 16(R15) 8216 ADDQ $0x20, R15 8217 ADDQ $0x20, R11 8218 ADDQ $0x20, R14 8219 DECQ R13 8220 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back 8221 8222emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: 8223 MOVOU -32(R10)(R14*1), X4 8224 MOVOU -16(R10)(R14*1), X5 8225 MOVOA X4, -32(AX)(R14*1) 8226 MOVOA X5, -16(AX)(R14*1) 8227 ADDQ $0x20, R14 8228 CMPQ R9, R14 8229 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 8230 MOVOU X0, (AX) 8231 MOVOU X1, 16(AX) 8232 MOVOU X2, -32(AX)(R9*1) 8233 MOVOU X3, -16(AX)(R9*1) 8234 MOVQ SI, AX 8235 8236emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: 8237 ADDL R12, CX 8238 ADDL $0x04, R12 8239 MOVL CX, 12(SP) 8240 8241 // emitRepeat 8242 MOVL R12, SI 8243 LEAL -4(R12), R12 8244 CMPL SI, $0x08 8245 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B 8246 CMPL SI, $0x0c 8247 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B 8248 CMPL R8, $0x00000800 8249 JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B 8250 8251cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: 8252 CMPL R12, $0x00000104 8253 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B 8254 LEAL -256(R12), R12 8255 MOVW $0x0019, (AX) 8256 MOVW R12, 2(AX) 8257 ADDQ $0x04, AX 8258 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 8259 8260repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: 8261 LEAL -4(R12), R12 8262 MOVW $0x0015, (AX) 8263 MOVB R12, 2(AX) 8264 ADDQ $0x03, AX 8265 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 8266 8267repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: 8268 SHLL $0x02, R12 8269 ORL $0x01, R12 8270 MOVW R12, (AX) 8271 ADDQ $0x02, AX 8272 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B 8273 8274repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: 8275 XORQ SI, SI 8276 LEAL 1(SI)(R12*4), R12 8277 MOVB R8, 1(AX) 8278 SARL $0x08, R8 8279 SHLL $0x05, R8 8280 ORL R8, R12 8281 MOVB R12, (AX) 8282 ADDQ $0x02, AX 8283 8284match_nolit_emitcopy_end_encodeBetterBlockAsm10B: 8285 CMPL CX, 8(SP) 8286 JGE emit_remainder_encodeBetterBlockAsm10B 8287 CMPQ AX, (SP) 8288 JL match_nolit_dst_ok_encodeBetterBlockAsm10B 8289 MOVQ $0x00000000, ret+48(FP) 8290 RET 8291 8292match_nolit_dst_ok_encodeBetterBlockAsm10B: 8293 MOVQ $0x0000cf1bbcdcbf9b, SI 8294 MOVQ $0x9e3779b1, R8 8295 INCL DI 8296 MOVQ (DX)(DI*1), R9 8297 MOVQ R9, R10 8298 MOVQ R9, R11 8299 MOVQ R9, R12 8300 SHRQ $0x08, R11 8301 MOVQ R11, R13 8302 SHRQ $0x10, R12 8303 LEAL 1(DI), R14 8304 LEAL 2(DI), R15 8305 MOVQ -2(DX)(CX*1), R9 8306 SHLQ $0x10, R10 8307 IMULQ SI, R10 8308 SHRQ $0x34, R10 8309 SHLQ $0x10, R13 8310 IMULQ SI, R13 8311 SHRQ $0x34, R13 8312 SHLQ $0x20, R11 8313 IMULQ R8, R11 8314 SHRQ $0x36, R11 8315 SHLQ $0x20, R12 8316 IMULQ R8, R12 8317 SHRQ $0x36, R12 8318 MOVL DI, 24(SP)(R10*4) 8319 MOVL R14, 24(SP)(R13*4) 8320 MOVL R14, 16408(SP)(R11*4) 8321 MOVL R15, 16408(SP)(R12*4) 8322 MOVQ R9, R10 8323 MOVQ R9, R11 8324 SHRQ $0x08, R11 8325 MOVQ R11, R13 8326 LEAL -2(CX), R9 8327 LEAL -1(CX), DI 8328 SHLQ $0x10, R10 8329 IMULQ SI, R10 8330 SHRQ $0x34, R10 8331 SHLQ $0x20, R11 8332 IMULQ R8, R11 8333 SHRQ $0x36, R11 8334 SHLQ $0x10, R13 8335 IMULQ SI, R13 8336 SHRQ $0x34, R13 8337 MOVL R9, 24(SP)(R10*4) 8338 MOVL DI, 16408(SP)(R11*4) 8339 MOVL DI, 24(SP)(R13*4) 8340 JMP search_loop_encodeBetterBlockAsm10B 8341 8342emit_remainder_encodeBetterBlockAsm10B: 8343 MOVQ src_len+32(FP), CX 8344 SUBL 12(SP), CX 8345 LEAQ 3(AX)(CX*1), CX 8346 CMPQ CX, (SP) 8347 JL emit_remainder_ok_encodeBetterBlockAsm10B 8348 MOVQ $0x00000000, ret+48(FP) 8349 RET 8350 8351emit_remainder_ok_encodeBetterBlockAsm10B: 8352 MOVQ src_len+32(FP), CX 8353 MOVL 12(SP), BX 8354 CMPL BX, CX 8355 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B 8356 MOVL CX, SI 8357 MOVL CX, 12(SP) 8358 LEAQ (DX)(BX*1), CX 8359 SUBL BX, SI 8360 LEAL -1(SI), DX 8361 CMPL DX, $0x3c 8362 JLT one_byte_emit_remainder_encodeBetterBlockAsm10B 8363 CMPL DX, $0x00000100 8364 JLT two_bytes_emit_remainder_encodeBetterBlockAsm10B 8365 MOVB $0xf4, (AX) 8366 MOVW DX, 1(AX) 8367 ADDQ $0x03, AX 8368 JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B 8369 8370two_bytes_emit_remainder_encodeBetterBlockAsm10B: 8371 MOVB $0xf0, (AX) 8372 MOVB DL, 1(AX) 8373 ADDQ $0x02, AX 8374 CMPL DX, $0x40 8375 JL memmove_emit_remainder_encodeBetterBlockAsm10B 8376 JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B 8377 8378one_byte_emit_remainder_encodeBetterBlockAsm10B: 8379 SHLB $0x02, DL 8380 MOVB DL, (AX) 8381 ADDQ $0x01, AX 8382 8383memmove_emit_remainder_encodeBetterBlockAsm10B: 8384 LEAQ (AX)(SI*1), DX 8385 MOVL SI, BX 8386 8387 // genMemMoveShort 8388 CMPQ BX, $0x03 8389 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2 8390 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3 8391 CMPQ BX, $0x08 8392 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7 8393 CMPQ BX, $0x10 8394 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16 8395 CMPQ BX, $0x20 8396 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32 8397 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 8398 8399emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2: 8400 MOVB (CX), SI 8401 MOVB -1(CX)(BX*1), CL 8402 MOVB SI, (AX) 8403 MOVB CL, -1(AX)(BX*1) 8404 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B 8405 8406emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3: 8407 MOVW (CX), SI 8408 MOVB 2(CX), CL 8409 MOVW SI, (AX) 8410 MOVB CL, 2(AX) 8411 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B 8412 8413emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: 8414 MOVL (CX), SI 8415 MOVL -4(CX)(BX*1), CX 8416 MOVL SI, (AX) 8417 MOVL CX, -4(AX)(BX*1) 8418 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B 8419 8420emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: 8421 MOVQ (CX), SI 8422 MOVQ -8(CX)(BX*1), CX 8423 MOVQ SI, (AX) 8424 MOVQ CX, -8(AX)(BX*1) 8425 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B 8426 8427emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: 8428 MOVOU (CX), X0 8429 MOVOU -16(CX)(BX*1), X1 8430 MOVOU X0, (AX) 8431 MOVOU X1, -16(AX)(BX*1) 8432 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B 8433 8434emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: 8435 MOVOU (CX), X0 8436 MOVOU 16(CX), X1 8437 MOVOU -32(CX)(BX*1), X2 8438 MOVOU -16(CX)(BX*1), X3 8439 MOVOU X0, (AX) 8440 MOVOU X1, 16(AX) 8441 MOVOU X2, -32(AX)(BX*1) 8442 MOVOU X3, -16(AX)(BX*1) 8443 8444memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: 8445 MOVQ DX, AX 8446 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B 8447 8448memmove_long_emit_remainder_encodeBetterBlockAsm10B: 8449 LEAQ (AX)(SI*1), DX 8450 MOVL SI, BX 8451 8452 // genMemMoveLong 8453 MOVOU (CX), X0 8454 MOVOU 16(CX), X1 8455 MOVOU -32(CX)(BX*1), X2 8456 MOVOU -16(CX)(BX*1), X3 8457 MOVQ BX, DI 8458 SHRQ $0x05, DI 8459 MOVQ AX, SI 8460 ANDL $0x0000001f, SI 8461 MOVQ $0x00000040, R8 8462 SUBQ SI, R8 8463 DECQ DI 8464 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 8465 LEAQ -32(CX)(R8*1), SI 8466 LEAQ -32(AX)(R8*1), R9 8467 8468emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: 8469 MOVOU (SI), X4 8470 MOVOU 16(SI), X5 8471 MOVOA X4, (R9) 8472 MOVOA X5, 16(R9) 8473 ADDQ $0x20, R9 8474 ADDQ $0x20, SI 8475 ADDQ $0x20, R8 8476 DECQ DI 8477 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back 8478 8479emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: 8480 MOVOU -32(CX)(R8*1), X4 8481 MOVOU -16(CX)(R8*1), X5 8482 MOVOA X4, -32(AX)(R8*1) 8483 MOVOA X5, -16(AX)(R8*1) 8484 ADDQ $0x20, R8 8485 CMPQ BX, R8 8486 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 8487 MOVOU X0, (AX) 8488 MOVOU X1, 16(AX) 8489 MOVOU X2, -32(AX)(BX*1) 8490 MOVOU X3, -16(AX)(BX*1) 8491 MOVQ DX, AX 8492 8493emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: 8494 MOVQ dst_base+0(FP), CX 8495 SUBQ CX, AX 8496 MOVQ AX, ret+48(FP) 8497 RET 8498 8499// func encodeBetterBlockAsm8B(dst []byte, src []byte) int 8500// Requires: SSE2 8501TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 8502 MOVQ dst_base+0(FP), AX 8503 MOVQ $0x00000028, CX 8504 LEAQ 24(SP), DX 8505 PXOR X0, X0 8506 8507zero_loop_encodeBetterBlockAsm8B: 8508 MOVOU X0, (DX) 8509 MOVOU X0, 16(DX) 8510 MOVOU X0, 32(DX) 8511 MOVOU X0, 48(DX) 8512 MOVOU X0, 64(DX) 8513 MOVOU X0, 80(DX) 8514 MOVOU X0, 96(DX) 8515 MOVOU X0, 112(DX) 8516 ADDQ $0x80, DX 8517 DECQ CX 8518 JNZ zero_loop_encodeBetterBlockAsm8B 8519 MOVL $0x00000000, 12(SP) 8520 MOVQ src_len+32(FP), CX 8521 LEAQ -6(CX), DX 8522 LEAQ -8(CX), SI 8523 MOVL SI, 8(SP) 8524 SHRQ $0x05, CX 8525 SUBL CX, DX 8526 LEAQ (AX)(DX*1), DX 8527 MOVQ DX, (SP) 8528 MOVL $0x00000001, CX 8529 MOVL $0x00000000, 16(SP) 8530 MOVQ src_base+24(FP), DX 8531 8532search_loop_encodeBetterBlockAsm8B: 8533 MOVL CX, SI 8534 SUBL 12(SP), SI 8535 SHRL $0x04, SI 8536 LEAL 1(CX)(SI*1), SI 8537 CMPL SI, 8(SP) 8538 JGE emit_remainder_encodeBetterBlockAsm8B 8539 MOVQ (DX)(CX*1), DI 8540 MOVL SI, 20(SP) 8541 MOVQ $0x0000cf1bbcdcbf9b, R9 8542 MOVQ $0x9e3779b1, SI 8543 MOVQ DI, R10 8544 MOVQ DI, R11 8545 SHLQ $0x10, R10 8546 IMULQ R9, R10 8547 SHRQ $0x36, R10 8548 SHLQ $0x20, R11 8549 IMULQ SI, R11 8550 SHRQ $0x38, R11 8551 MOVL 24(SP)(R10*4), SI 8552 MOVL 4120(SP)(R11*4), R8 8553 MOVL CX, 24(SP)(R10*4) 8554 MOVL CX, 4120(SP)(R11*4) 8555 CMPL (DX)(SI*1), DI 8556 JEQ candidate_match_encodeBetterBlockAsm8B 8557 CMPL (DX)(R8*1), DI 8558 JEQ candidateS_match_encodeBetterBlockAsm8B 8559 MOVL 20(SP), CX 8560 JMP search_loop_encodeBetterBlockAsm8B 8561 8562candidateS_match_encodeBetterBlockAsm8B: 8563 SHRQ $0x08, DI 8564 MOVQ DI, R10 8565 SHLQ $0x10, R10 8566 IMULQ R9, R10 8567 SHRQ $0x36, R10 8568 MOVL 24(SP)(R10*4), SI 8569 INCL CX 8570 MOVL CX, 24(SP)(R10*4) 8571 CMPL (DX)(SI*1), DI 8572 JEQ candidate_match_encodeBetterBlockAsm8B 8573 DECL CX 8574 MOVL R8, SI 8575 8576candidate_match_encodeBetterBlockAsm8B: 8577 MOVL 12(SP), DI 8578 TESTL SI, SI 8579 JZ match_extend_back_end_encodeBetterBlockAsm8B 8580 8581match_extend_back_loop_encodeBetterBlockAsm8B: 8582 CMPL CX, DI 8583 JLE match_extend_back_end_encodeBetterBlockAsm8B 8584 MOVB -1(DX)(SI*1), BL 8585 MOVB -1(DX)(CX*1), R8 8586 CMPB BL, R8 8587 JNE match_extend_back_end_encodeBetterBlockAsm8B 8588 LEAL -1(CX), CX 8589 DECL SI 8590 JZ match_extend_back_end_encodeBetterBlockAsm8B 8591 JMP match_extend_back_loop_encodeBetterBlockAsm8B 8592 8593match_extend_back_end_encodeBetterBlockAsm8B: 8594 MOVL CX, DI 8595 SUBL 12(SP), DI 8596 LEAQ 3(AX)(DI*1), DI 8597 CMPQ DI, (SP) 8598 JL match_dst_size_check_encodeBetterBlockAsm8B 8599 MOVQ $0x00000000, ret+48(FP) 8600 RET 8601 8602match_dst_size_check_encodeBetterBlockAsm8B: 8603 MOVL CX, DI 8604 ADDL $0x04, CX 8605 ADDL $0x04, SI 8606 MOVQ src_len+32(FP), R8 8607 SUBL CX, R8 8608 LEAQ (DX)(CX*1), R9 8609 LEAQ (DX)(SI*1), R10 8610 8611 // matchLen 8612 XORL R12, R12 8613 CMPL R8, $0x08 8614 JL matchlen_single_match_nolit_encodeBetterBlockAsm8B 8615 8616matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: 8617 MOVQ (R9)(R12*1), R11 8618 XORQ (R10)(R12*1), R11 8619 TESTQ R11, R11 8620 JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B 8621 BSFQ R11, R11 8622 SARQ $0x03, R11 8623 LEAL (R12)(R11*1), R12 8624 JMP match_nolit_end_encodeBetterBlockAsm8B 8625 8626matchlen_loop_match_nolit_encodeBetterBlockAsm8B: 8627 LEAL -8(R8), R8 8628 LEAL 8(R12), R12 8629 CMPL R8, $0x08 8630 JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B 8631 8632matchlen_single_match_nolit_encodeBetterBlockAsm8B: 8633 TESTL R8, R8 8634 JZ match_nolit_end_encodeBetterBlockAsm8B 8635 8636matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B: 8637 MOVB (R9)(R12*1), R11 8638 CMPB (R10)(R12*1), R11 8639 JNE match_nolit_end_encodeBetterBlockAsm8B 8640 LEAL 1(R12), R12 8641 DECL R8 8642 JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B 8643 8644match_nolit_end_encodeBetterBlockAsm8B: 8645 MOVL CX, R8 8646 SUBL SI, R8 8647 8648 // Check if repeat 8649 CMPL 16(SP), R8 8650 JEQ match_is_repeat_encodeBetterBlockAsm8B 8651 MOVL R8, 16(SP) 8652 MOVL 12(SP), SI 8653 CMPL SI, DI 8654 JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B 8655 MOVL DI, R9 8656 MOVL DI, 12(SP) 8657 LEAQ (DX)(SI*1), R10 8658 SUBL SI, R9 8659 LEAL -1(R9), SI 8660 CMPL SI, $0x3c 8661 JLT one_byte_match_emit_encodeBetterBlockAsm8B 8662 CMPL SI, $0x00000100 8663 JLT two_bytes_match_emit_encodeBetterBlockAsm8B 8664 MOVB $0xf4, (AX) 8665 MOVW SI, 1(AX) 8666 ADDQ $0x03, AX 8667 JMP memmove_long_match_emit_encodeBetterBlockAsm8B 8668 8669two_bytes_match_emit_encodeBetterBlockAsm8B: 8670 MOVB $0xf0, (AX) 8671 MOVB SI, 1(AX) 8672 ADDQ $0x02, AX 8673 CMPL SI, $0x40 8674 JL memmove_match_emit_encodeBetterBlockAsm8B 8675 JMP memmove_long_match_emit_encodeBetterBlockAsm8B 8676 8677one_byte_match_emit_encodeBetterBlockAsm8B: 8678 SHLB $0x02, SI 8679 MOVB SI, (AX) 8680 ADDQ $0x01, AX 8681 8682memmove_match_emit_encodeBetterBlockAsm8B: 8683 LEAQ (AX)(R9*1), SI 8684 8685 // genMemMoveShort 8686 CMPQ R9, $0x03 8687 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_1or2 8688 JE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_3 8689 CMPQ R9, $0x08 8690 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 8691 CMPQ R9, $0x10 8692 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 8693 CMPQ R9, $0x20 8694 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 8695 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 8696 8697emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_1or2: 8698 MOVB (R10), R11 8699 MOVB -1(R10)(R9*1), R10 8700 MOVB R11, (AX) 8701 MOVB R10, -1(AX)(R9*1) 8702 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B 8703 8704emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_3: 8705 MOVW (R10), R11 8706 MOVB 2(R10), R10 8707 MOVW R11, (AX) 8708 MOVB R10, 2(AX) 8709 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B 8710 8711emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: 8712 MOVL (R10), R11 8713 MOVL -4(R10)(R9*1), R10 8714 MOVL R11, (AX) 8715 MOVL R10, -4(AX)(R9*1) 8716 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B 8717 8718emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: 8719 MOVQ (R10), R11 8720 MOVQ -8(R10)(R9*1), R10 8721 MOVQ R11, (AX) 8722 MOVQ R10, -8(AX)(R9*1) 8723 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B 8724 8725emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: 8726 MOVOU (R10), X0 8727 MOVOU -16(R10)(R9*1), X1 8728 MOVOU X0, (AX) 8729 MOVOU X1, -16(AX)(R9*1) 8730 JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B 8731 8732emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: 8733 MOVOU (R10), X0 8734 MOVOU 16(R10), X1 8735 MOVOU -32(R10)(R9*1), X2 8736 MOVOU -16(R10)(R9*1), X3 8737 MOVOU X0, (AX) 8738 MOVOU X1, 16(AX) 8739 MOVOU X2, -32(AX)(R9*1) 8740 MOVOU X3, -16(AX)(R9*1) 8741 8742memmove_end_copy_match_emit_encodeBetterBlockAsm8B: 8743 MOVQ SI, AX 8744 JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B 8745 8746memmove_long_match_emit_encodeBetterBlockAsm8B: 8747 LEAQ (AX)(R9*1), SI 8748 8749 // genMemMoveLong 8750 MOVOU (R10), X0 8751 MOVOU 16(R10), X1 8752 MOVOU -32(R10)(R9*1), X2 8753 MOVOU -16(R10)(R9*1), X3 8754 MOVQ R9, R13 8755 SHRQ $0x05, R13 8756 MOVQ AX, R11 8757 ANDL $0x0000001f, R11 8758 MOVQ $0x00000040, R14 8759 SUBQ R11, R14 8760 DECQ R13 8761 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8762 LEAQ -32(R10)(R14*1), R11 8763 LEAQ -32(AX)(R14*1), R15 8764 8765emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: 8766 MOVOU (R11), X4 8767 MOVOU 16(R11), X5 8768 MOVOA X4, (R15) 8769 MOVOA X5, 16(R15) 8770 ADDQ $0x20, R15 8771 ADDQ $0x20, R11 8772 ADDQ $0x20, R14 8773 DECQ R13 8774 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back 8775 8776emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: 8777 MOVOU -32(R10)(R14*1), X4 8778 MOVOU -16(R10)(R14*1), X5 8779 MOVOA X4, -32(AX)(R14*1) 8780 MOVOA X5, -16(AX)(R14*1) 8781 ADDQ $0x20, R14 8782 CMPQ R9, R14 8783 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8784 MOVOU X0, (AX) 8785 MOVOU X1, 16(AX) 8786 MOVOU X2, -32(AX)(R9*1) 8787 MOVOU X3, -16(AX)(R9*1) 8788 MOVQ SI, AX 8789 8790emit_literal_done_match_emit_encodeBetterBlockAsm8B: 8791 ADDL R12, CX 8792 ADDL $0x04, R12 8793 MOVL CX, 12(SP) 8794 8795 // emitCopy 8796two_byte_offset_match_nolit_encodeBetterBlockAsm8B: 8797 CMPL R12, $0x40 8798 JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B 8799 MOVB $0xee, (AX) 8800 MOVW R8, 1(AX) 8801 LEAL -60(R12), R12 8802 ADDQ $0x03, AX 8803 8804 // emitRepeat 8805 MOVL R12, SI 8806 LEAL -4(R12), R12 8807 CMPL SI, $0x08 8808 JLE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short 8809 CMPL SI, $0x0c 8810 JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short 8811 8812cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: 8813 CMPL R12, $0x00000104 8814 JLT repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short 8815 LEAL -256(R12), R12 8816 MOVW $0x0019, (AX) 8817 MOVW R12, 2(AX) 8818 ADDQ $0x04, AX 8819 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8820 8821repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: 8822 LEAL -4(R12), R12 8823 MOVW $0x0015, (AX) 8824 MOVB R12, 2(AX) 8825 ADDQ $0x03, AX 8826 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8827 8828repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: 8829 SHLL $0x02, R12 8830 ORL $0x01, R12 8831 MOVW R12, (AX) 8832 ADDQ $0x02, AX 8833 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8834 XORQ SI, SI 8835 LEAL 1(SI)(R12*4), R12 8836 MOVB R8, 1(AX) 8837 SARL $0x08, R8 8838 SHLL $0x05, R8 8839 ORL R8, R12 8840 MOVB R12, (AX) 8841 ADDQ $0x02, AX 8842 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8843 JMP two_byte_offset_match_nolit_encodeBetterBlockAsm8B 8844 8845two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: 8846 CMPL R12, $0x0c 8847 JGE emit_copy_three_match_nolit_encodeBetterBlockAsm8B 8848 MOVB $0x01, BL 8849 LEAL -16(BX)(R12*4), R12 8850 MOVB R8, 1(AX) 8851 SHRL $0x08, R8 8852 SHLL $0x05, R8 8853 ORL R8, R12 8854 MOVB R12, (AX) 8855 ADDQ $0x02, AX 8856 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8857 8858emit_copy_three_match_nolit_encodeBetterBlockAsm8B: 8859 MOVB $0x02, BL 8860 LEAL -4(BX)(R12*4), R12 8861 MOVB R12, (AX) 8862 MOVW R8, 1(AX) 8863 ADDQ $0x03, AX 8864 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 8865 8866match_is_repeat_encodeBetterBlockAsm8B: 8867 MOVL 12(SP), SI 8868 CMPL SI, DI 8869 JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B 8870 MOVL DI, R8 8871 MOVL DI, 12(SP) 8872 LEAQ (DX)(SI*1), R9 8873 SUBL SI, R8 8874 LEAL -1(R8), SI 8875 CMPL SI, $0x3c 8876 JLT one_byte_match_emit_repeat_encodeBetterBlockAsm8B 8877 CMPL SI, $0x00000100 8878 JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm8B 8879 MOVB $0xf4, (AX) 8880 MOVW SI, 1(AX) 8881 ADDQ $0x03, AX 8882 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B 8883 8884two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: 8885 MOVB $0xf0, (AX) 8886 MOVB SI, 1(AX) 8887 ADDQ $0x02, AX 8888 CMPL SI, $0x40 8889 JL memmove_match_emit_repeat_encodeBetterBlockAsm8B 8890 JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B 8891 8892one_byte_match_emit_repeat_encodeBetterBlockAsm8B: 8893 SHLB $0x02, SI 8894 MOVB SI, (AX) 8895 ADDQ $0x01, AX 8896 8897memmove_match_emit_repeat_encodeBetterBlockAsm8B: 8898 LEAQ (AX)(R8*1), SI 8899 8900 // genMemMoveShort 8901 CMPQ R8, $0x03 8902 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_1or2 8903 JE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_3 8904 CMPQ R8, $0x08 8905 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 8906 CMPQ R8, $0x10 8907 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 8908 CMPQ R8, $0x20 8909 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 8910 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 8911 8912emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_1or2: 8913 MOVB (R9), R10 8914 MOVB -1(R9)(R8*1), R9 8915 MOVB R10, (AX) 8916 MOVB R9, -1(AX)(R8*1) 8917 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B 8918 8919emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_3: 8920 MOVW (R9), R10 8921 MOVB 2(R9), R9 8922 MOVW R10, (AX) 8923 MOVB R9, 2(AX) 8924 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B 8925 8926emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: 8927 MOVL (R9), R10 8928 MOVL -4(R9)(R8*1), R9 8929 MOVL R10, (AX) 8930 MOVL R9, -4(AX)(R8*1) 8931 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B 8932 8933emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: 8934 MOVQ (R9), R10 8935 MOVQ -8(R9)(R8*1), R9 8936 MOVQ R10, (AX) 8937 MOVQ R9, -8(AX)(R8*1) 8938 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B 8939 8940emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: 8941 MOVOU (R9), X0 8942 MOVOU -16(R9)(R8*1), X1 8943 MOVOU X0, (AX) 8944 MOVOU X1, -16(AX)(R8*1) 8945 JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B 8946 8947emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: 8948 MOVOU (R9), X0 8949 MOVOU 16(R9), X1 8950 MOVOU -32(R9)(R8*1), X2 8951 MOVOU -16(R9)(R8*1), X3 8952 MOVOU X0, (AX) 8953 MOVOU X1, 16(AX) 8954 MOVOU X2, -32(AX)(R8*1) 8955 MOVOU X3, -16(AX)(R8*1) 8956 8957memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: 8958 MOVQ SI, AX 8959 JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B 8960 8961memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: 8962 LEAQ (AX)(R8*1), SI 8963 8964 // genMemMoveLong 8965 MOVOU (R9), X0 8966 MOVOU 16(R9), X1 8967 MOVOU -32(R9)(R8*1), X2 8968 MOVOU -16(R9)(R8*1), X3 8969 MOVQ R8, R11 8970 SHRQ $0x05, R11 8971 MOVQ AX, R10 8972 ANDL $0x0000001f, R10 8973 MOVQ $0x00000040, R13 8974 SUBQ R10, R13 8975 DECQ R11 8976 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8977 LEAQ -32(R9)(R13*1), R10 8978 LEAQ -32(AX)(R13*1), R14 8979 8980emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: 8981 MOVOU (R10), X4 8982 MOVOU 16(R10), X5 8983 MOVOA X4, (R14) 8984 MOVOA X5, 16(R14) 8985 ADDQ $0x20, R14 8986 ADDQ $0x20, R10 8987 ADDQ $0x20, R13 8988 DECQ R11 8989 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back 8990 8991emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: 8992 MOVOU -32(R9)(R13*1), X4 8993 MOVOU -16(R9)(R13*1), X5 8994 MOVOA X4, -32(AX)(R13*1) 8995 MOVOA X5, -16(AX)(R13*1) 8996 ADDQ $0x20, R13 8997 CMPQ R8, R13 8998 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 8999 MOVOU X0, (AX) 9000 MOVOU X1, 16(AX) 9001 MOVOU X2, -32(AX)(R8*1) 9002 MOVOU X3, -16(AX)(R8*1) 9003 MOVQ SI, AX 9004 9005emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: 9006 ADDL R12, CX 9007 ADDL $0x04, R12 9008 MOVL CX, 12(SP) 9009 9010 // emitRepeat 9011 MOVL R12, SI 9012 LEAL -4(R12), R12 9013 CMPL SI, $0x08 9014 JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B 9015 CMPL SI, $0x0c 9016 JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B 9017 9018cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: 9019 CMPL R12, $0x00000104 9020 JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B 9021 LEAL -256(R12), R12 9022 MOVW $0x0019, (AX) 9023 MOVW R12, 2(AX) 9024 ADDQ $0x04, AX 9025 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 9026 9027repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: 9028 LEAL -4(R12), R12 9029 MOVW $0x0015, (AX) 9030 MOVB R12, 2(AX) 9031 ADDQ $0x03, AX 9032 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 9033 9034repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: 9035 SHLL $0x02, R12 9036 ORL $0x01, R12 9037 MOVW R12, (AX) 9038 ADDQ $0x02, AX 9039 JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B 9040 XORQ SI, SI 9041 LEAL 1(SI)(R12*4), R12 9042 MOVB R8, 1(AX) 9043 SARL $0x08, R8 9044 SHLL $0x05, R8 9045 ORL R8, R12 9046 MOVB R12, (AX) 9047 ADDQ $0x02, AX 9048 9049match_nolit_emitcopy_end_encodeBetterBlockAsm8B: 9050 CMPL CX, 8(SP) 9051 JGE emit_remainder_encodeBetterBlockAsm8B 9052 CMPQ AX, (SP) 9053 JL match_nolit_dst_ok_encodeBetterBlockAsm8B 9054 MOVQ $0x00000000, ret+48(FP) 9055 RET 9056 9057match_nolit_dst_ok_encodeBetterBlockAsm8B: 9058 MOVQ $0x0000cf1bbcdcbf9b, SI 9059 MOVQ $0x9e3779b1, R8 9060 INCL DI 9061 MOVQ (DX)(DI*1), R9 9062 MOVQ R9, R10 9063 MOVQ R9, R11 9064 MOVQ R9, R12 9065 SHRQ $0x08, R11 9066 MOVQ R11, R13 9067 SHRQ $0x10, R12 9068 LEAL 1(DI), R14 9069 LEAL 2(DI), R15 9070 MOVQ -2(DX)(CX*1), R9 9071 SHLQ $0x10, R10 9072 IMULQ SI, R10 9073 SHRQ $0x36, R10 9074 SHLQ $0x10, R13 9075 IMULQ SI, R13 9076 SHRQ $0x36, R13 9077 SHLQ $0x20, R11 9078 IMULQ R8, R11 9079 SHRQ $0x38, R11 9080 SHLQ $0x20, R12 9081 IMULQ R8, R12 9082 SHRQ $0x38, R12 9083 MOVL DI, 24(SP)(R10*4) 9084 MOVL R14, 24(SP)(R13*4) 9085 MOVL R14, 4120(SP)(R11*4) 9086 MOVL R15, 4120(SP)(R12*4) 9087 MOVQ R9, R10 9088 MOVQ R9, R11 9089 SHRQ $0x08, R11 9090 MOVQ R11, R13 9091 LEAL -2(CX), R9 9092 LEAL -1(CX), DI 9093 SHLQ $0x10, R10 9094 IMULQ SI, R10 9095 SHRQ $0x36, R10 9096 SHLQ $0x20, R11 9097 IMULQ R8, R11 9098 SHRQ $0x38, R11 9099 SHLQ $0x10, R13 9100 IMULQ SI, R13 9101 SHRQ $0x36, R13 9102 MOVL R9, 24(SP)(R10*4) 9103 MOVL DI, 4120(SP)(R11*4) 9104 MOVL DI, 24(SP)(R13*4) 9105 JMP search_loop_encodeBetterBlockAsm8B 9106 9107emit_remainder_encodeBetterBlockAsm8B: 9108 MOVQ src_len+32(FP), CX 9109 SUBL 12(SP), CX 9110 LEAQ 3(AX)(CX*1), CX 9111 CMPQ CX, (SP) 9112 JL emit_remainder_ok_encodeBetterBlockAsm8B 9113 MOVQ $0x00000000, ret+48(FP) 9114 RET 9115 9116emit_remainder_ok_encodeBetterBlockAsm8B: 9117 MOVQ src_len+32(FP), CX 9118 MOVL 12(SP), BX 9119 CMPL BX, CX 9120 JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B 9121 MOVL CX, SI 9122 MOVL CX, 12(SP) 9123 LEAQ (DX)(BX*1), CX 9124 SUBL BX, SI 9125 LEAL -1(SI), DX 9126 CMPL DX, $0x3c 9127 JLT one_byte_emit_remainder_encodeBetterBlockAsm8B 9128 CMPL DX, $0x00000100 9129 JLT two_bytes_emit_remainder_encodeBetterBlockAsm8B 9130 MOVB $0xf4, (AX) 9131 MOVW DX, 1(AX) 9132 ADDQ $0x03, AX 9133 JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B 9134 9135two_bytes_emit_remainder_encodeBetterBlockAsm8B: 9136 MOVB $0xf0, (AX) 9137 MOVB DL, 1(AX) 9138 ADDQ $0x02, AX 9139 CMPL DX, $0x40 9140 JL memmove_emit_remainder_encodeBetterBlockAsm8B 9141 JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B 9142 9143one_byte_emit_remainder_encodeBetterBlockAsm8B: 9144 SHLB $0x02, DL 9145 MOVB DL, (AX) 9146 ADDQ $0x01, AX 9147 9148memmove_emit_remainder_encodeBetterBlockAsm8B: 9149 LEAQ (AX)(SI*1), DX 9150 MOVL SI, BX 9151 9152 // genMemMoveShort 9153 CMPQ BX, $0x03 9154 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2 9155 JE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3 9156 CMPQ BX, $0x08 9157 JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7 9158 CMPQ BX, $0x10 9159 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16 9160 CMPQ BX, $0x20 9161 JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32 9162 JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 9163 9164emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2: 9165 MOVB (CX), SI 9166 MOVB -1(CX)(BX*1), CL 9167 MOVB SI, (AX) 9168 MOVB CL, -1(AX)(BX*1) 9169 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B 9170 9171emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3: 9172 MOVW (CX), SI 9173 MOVB 2(CX), CL 9174 MOVW SI, (AX) 9175 MOVB CL, 2(AX) 9176 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B 9177 9178emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: 9179 MOVL (CX), SI 9180 MOVL -4(CX)(BX*1), CX 9181 MOVL SI, (AX) 9182 MOVL CX, -4(AX)(BX*1) 9183 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B 9184 9185emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: 9186 MOVQ (CX), SI 9187 MOVQ -8(CX)(BX*1), CX 9188 MOVQ SI, (AX) 9189 MOVQ CX, -8(AX)(BX*1) 9190 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B 9191 9192emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: 9193 MOVOU (CX), X0 9194 MOVOU -16(CX)(BX*1), X1 9195 MOVOU X0, (AX) 9196 MOVOU X1, -16(AX)(BX*1) 9197 JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B 9198 9199emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: 9200 MOVOU (CX), X0 9201 MOVOU 16(CX), X1 9202 MOVOU -32(CX)(BX*1), X2 9203 MOVOU -16(CX)(BX*1), X3 9204 MOVOU X0, (AX) 9205 MOVOU X1, 16(AX) 9206 MOVOU X2, -32(AX)(BX*1) 9207 MOVOU X3, -16(AX)(BX*1) 9208 9209memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: 9210 MOVQ DX, AX 9211 JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B 9212 9213memmove_long_emit_remainder_encodeBetterBlockAsm8B: 9214 LEAQ (AX)(SI*1), DX 9215 MOVL SI, BX 9216 9217 // genMemMoveLong 9218 MOVOU (CX), X0 9219 MOVOU 16(CX), X1 9220 MOVOU -32(CX)(BX*1), X2 9221 MOVOU -16(CX)(BX*1), X3 9222 MOVQ BX, DI 9223 SHRQ $0x05, DI 9224 MOVQ AX, SI 9225 ANDL $0x0000001f, SI 9226 MOVQ $0x00000040, R8 9227 SUBQ SI, R8 9228 DECQ DI 9229 JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 9230 LEAQ -32(CX)(R8*1), SI 9231 LEAQ -32(AX)(R8*1), R9 9232 9233emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: 9234 MOVOU (SI), X4 9235 MOVOU 16(SI), X5 9236 MOVOA X4, (R9) 9237 MOVOA X5, 16(R9) 9238 ADDQ $0x20, R9 9239 ADDQ $0x20, SI 9240 ADDQ $0x20, R8 9241 DECQ DI 9242 JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back 9243 9244emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: 9245 MOVOU -32(CX)(R8*1), X4 9246 MOVOU -16(CX)(R8*1), X5 9247 MOVOA X4, -32(AX)(R8*1) 9248 MOVOA X5, -16(AX)(R8*1) 9249 ADDQ $0x20, R8 9250 CMPQ BX, R8 9251 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 9252 MOVOU X0, (AX) 9253 MOVOU X1, 16(AX) 9254 MOVOU X2, -32(AX)(BX*1) 9255 MOVOU X3, -16(AX)(BX*1) 9256 MOVQ DX, AX 9257 9258emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: 9259 MOVQ dst_base+0(FP), CX 9260 SUBQ CX, AX 9261 MOVQ AX, ret+48(FP) 9262 RET 9263 9264// func encodeSnappyBlockAsm(dst []byte, src []byte) int 9265// Requires: SSE2 9266TEXT ·encodeSnappyBlockAsm(SB), $65560-56 9267 MOVQ dst_base+0(FP), AX 9268 MOVQ $0x00000200, CX 9269 LEAQ 24(SP), DX 9270 PXOR X0, X0 9271 9272zero_loop_encodeSnappyBlockAsm: 9273 MOVOU X0, (DX) 9274 MOVOU X0, 16(DX) 9275 MOVOU X0, 32(DX) 9276 MOVOU X0, 48(DX) 9277 MOVOU X0, 64(DX) 9278 MOVOU X0, 80(DX) 9279 MOVOU X0, 96(DX) 9280 MOVOU X0, 112(DX) 9281 ADDQ $0x80, DX 9282 DECQ CX 9283 JNZ zero_loop_encodeSnappyBlockAsm 9284 MOVL $0x00000000, 12(SP) 9285 MOVQ src_len+32(FP), CX 9286 LEAQ -5(CX), DX 9287 LEAQ -8(CX), SI 9288 MOVL SI, 8(SP) 9289 SHRQ $0x05, CX 9290 SUBL CX, DX 9291 LEAQ (AX)(DX*1), DX 9292 MOVQ DX, (SP) 9293 MOVL $0x00000001, CX 9294 MOVL CX, 16(SP) 9295 MOVQ src_base+24(FP), DX 9296 9297search_loop_encodeSnappyBlockAsm: 9298 MOVL CX, SI 9299 SUBL 12(SP), SI 9300 SHRL $0x06, SI 9301 LEAL 4(CX)(SI*1), SI 9302 CMPL SI, 8(SP) 9303 JGE emit_remainder_encodeSnappyBlockAsm 9304 MOVQ (DX)(CX*1), DI 9305 MOVL SI, 20(SP) 9306 MOVQ $0x0000cf1bbcdcbf9b, R9 9307 MOVQ DI, R10 9308 MOVQ DI, R11 9309 SHRQ $0x08, R11 9310 SHLQ $0x10, R10 9311 IMULQ R9, R10 9312 SHRQ $0x32, R10 9313 SHLQ $0x10, R11 9314 IMULQ R9, R11 9315 SHRQ $0x32, R11 9316 MOVL 24(SP)(R10*4), SI 9317 MOVL 24(SP)(R11*4), R8 9318 MOVL CX, 24(SP)(R10*4) 9319 LEAL 1(CX), R10 9320 MOVL R10, 24(SP)(R11*4) 9321 MOVQ DI, R10 9322 SHRQ $0x10, R10 9323 SHLQ $0x10, R10 9324 IMULQ R9, R10 9325 SHRQ $0x32, R10 9326 MOVL CX, R9 9327 SUBL 16(SP), R9 9328 MOVL 1(DX)(R9*1), R11 9329 MOVQ DI, R9 9330 SHRQ $0x08, R9 9331 CMPL R9, R11 9332 JNE no_repeat_found_encodeSnappyBlockAsm 9333 LEAL 1(CX), DI 9334 MOVL 12(SP), SI 9335 MOVL DI, R8 9336 SUBL 16(SP), R8 9337 JZ repeat_extend_back_end_encodeSnappyBlockAsm 9338 9339repeat_extend_back_loop_encodeSnappyBlockAsm: 9340 CMPL DI, SI 9341 JLE repeat_extend_back_end_encodeSnappyBlockAsm 9342 MOVB -1(DX)(R8*1), BL 9343 MOVB -1(DX)(DI*1), R9 9344 CMPB BL, R9 9345 JNE repeat_extend_back_end_encodeSnappyBlockAsm 9346 LEAL -1(DI), DI 9347 DECL R8 9348 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm 9349 9350repeat_extend_back_end_encodeSnappyBlockAsm: 9351 MOVL 12(SP), SI 9352 CMPL SI, DI 9353 JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm 9354 MOVL DI, R8 9355 MOVL DI, 12(SP) 9356 LEAQ (DX)(SI*1), R9 9357 SUBL SI, R8 9358 LEAL -1(R8), SI 9359 CMPL SI, $0x3c 9360 JLT one_byte_repeat_emit_encodeSnappyBlockAsm 9361 CMPL SI, $0x00000100 9362 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm 9363 CMPL SI, $0x00010000 9364 JLT three_bytes_repeat_emit_encodeSnappyBlockAsm 9365 CMPL SI, $0x01000000 9366 JLT four_bytes_repeat_emit_encodeSnappyBlockAsm 9367 MOVB $0xfc, (AX) 9368 MOVL SI, 1(AX) 9369 ADDQ $0x05, AX 9370 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm 9371 9372four_bytes_repeat_emit_encodeSnappyBlockAsm: 9373 MOVL SI, R10 9374 SHRL $0x10, R10 9375 MOVB $0xf8, (AX) 9376 MOVW SI, 1(AX) 9377 MOVB R10, 3(AX) 9378 ADDQ $0x04, AX 9379 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm 9380 9381three_bytes_repeat_emit_encodeSnappyBlockAsm: 9382 MOVB $0xf4, (AX) 9383 MOVW SI, 1(AX) 9384 ADDQ $0x03, AX 9385 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm 9386 9387two_bytes_repeat_emit_encodeSnappyBlockAsm: 9388 MOVB $0xf0, (AX) 9389 MOVB SI, 1(AX) 9390 ADDQ $0x02, AX 9391 CMPL SI, $0x40 9392 JL memmove_repeat_emit_encodeSnappyBlockAsm 9393 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm 9394 9395one_byte_repeat_emit_encodeSnappyBlockAsm: 9396 SHLB $0x02, SI 9397 MOVB SI, (AX) 9398 ADDQ $0x01, AX 9399 9400memmove_repeat_emit_encodeSnappyBlockAsm: 9401 LEAQ (AX)(R8*1), SI 9402 9403 // genMemMoveShort 9404 CMPQ R8, $0x03 9405 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2 9406 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3 9407 CMPQ R8, $0x08 9408 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4through7 9409 CMPQ R8, $0x10 9410 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 9411 CMPQ R8, $0x20 9412 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 9413 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 9414 9415emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2: 9416 MOVB (R9), R10 9417 MOVB -1(R9)(R8*1), R9 9418 MOVB R10, (AX) 9419 MOVB R9, -1(AX)(R8*1) 9420 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm 9421 9422emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3: 9423 MOVW (R9), R10 9424 MOVB 2(R9), R9 9425 MOVW R10, (AX) 9426 MOVB R9, 2(AX) 9427 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm 9428 9429emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4through7: 9430 MOVL (R9), R10 9431 MOVL -4(R9)(R8*1), R9 9432 MOVL R10, (AX) 9433 MOVL R9, -4(AX)(R8*1) 9434 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm 9435 9436emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: 9437 MOVQ (R9), R10 9438 MOVQ -8(R9)(R8*1), R9 9439 MOVQ R10, (AX) 9440 MOVQ R9, -8(AX)(R8*1) 9441 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm 9442 9443emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: 9444 MOVOU (R9), X0 9445 MOVOU -16(R9)(R8*1), X1 9446 MOVOU X0, (AX) 9447 MOVOU X1, -16(AX)(R8*1) 9448 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm 9449 9450emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: 9451 MOVOU (R9), X0 9452 MOVOU 16(R9), X1 9453 MOVOU -32(R9)(R8*1), X2 9454 MOVOU -16(R9)(R8*1), X3 9455 MOVOU X0, (AX) 9456 MOVOU X1, 16(AX) 9457 MOVOU X2, -32(AX)(R8*1) 9458 MOVOU X3, -16(AX)(R8*1) 9459 9460memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: 9461 MOVQ SI, AX 9462 JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm 9463 9464memmove_long_repeat_emit_encodeSnappyBlockAsm: 9465 LEAQ (AX)(R8*1), SI 9466 9467 // genMemMoveLong 9468 MOVOU (R9), X0 9469 MOVOU 16(R9), X1 9470 MOVOU -32(R9)(R8*1), X2 9471 MOVOU -16(R9)(R8*1), X3 9472 MOVQ R8, R11 9473 SHRQ $0x05, R11 9474 MOVQ AX, R10 9475 ANDL $0x0000001f, R10 9476 MOVQ $0x00000040, R12 9477 SUBQ R10, R12 9478 DECQ R11 9479 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9480 LEAQ -32(R9)(R12*1), R10 9481 LEAQ -32(AX)(R12*1), R13 9482 9483emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: 9484 MOVOU (R10), X4 9485 MOVOU 16(R10), X5 9486 MOVOA X4, (R13) 9487 MOVOA X5, 16(R13) 9488 ADDQ $0x20, R13 9489 ADDQ $0x20, R10 9490 ADDQ $0x20, R12 9491 DECQ R11 9492 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back 9493 9494emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: 9495 MOVOU -32(R9)(R12*1), X4 9496 MOVOU -16(R9)(R12*1), X5 9497 MOVOA X4, -32(AX)(R12*1) 9498 MOVOA X5, -16(AX)(R12*1) 9499 ADDQ $0x20, R12 9500 CMPQ R8, R12 9501 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9502 MOVOU X0, (AX) 9503 MOVOU X1, 16(AX) 9504 MOVOU X2, -32(AX)(R8*1) 9505 MOVOU X3, -16(AX)(R8*1) 9506 MOVQ SI, AX 9507 9508emit_literal_done_repeat_emit_encodeSnappyBlockAsm: 9509 ADDL $0x05, CX 9510 MOVL CX, SI 9511 SUBL 16(SP), SI 9512 MOVQ src_len+32(FP), R8 9513 SUBL CX, R8 9514 LEAQ (DX)(CX*1), R9 9515 LEAQ (DX)(SI*1), SI 9516 9517 // matchLen 9518 XORL R11, R11 9519 CMPL R8, $0x08 9520 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm 9521 9522matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: 9523 MOVQ (R9)(R11*1), R10 9524 XORQ (SI)(R11*1), R10 9525 TESTQ R10, R10 9526 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm 9527 BSFQ R10, R10 9528 SARQ $0x03, R10 9529 LEAL (R11)(R10*1), R11 9530 JMP repeat_extend_forward_end_encodeSnappyBlockAsm 9531 9532matchlen_loop_repeat_extend_encodeSnappyBlockAsm: 9533 LEAL -8(R8), R8 9534 LEAL 8(R11), R11 9535 CMPL R8, $0x08 9536 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm 9537 9538matchlen_single_repeat_extend_encodeSnappyBlockAsm: 9539 TESTL R8, R8 9540 JZ repeat_extend_forward_end_encodeSnappyBlockAsm 9541 9542matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm: 9543 MOVB (R9)(R11*1), R10 9544 CMPB (SI)(R11*1), R10 9545 JNE repeat_extend_forward_end_encodeSnappyBlockAsm 9546 LEAL 1(R11), R11 9547 DECL R8 9548 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm 9549 9550repeat_extend_forward_end_encodeSnappyBlockAsm: 9551 ADDL R11, CX 9552 MOVL CX, SI 9553 SUBL DI, SI 9554 MOVL 16(SP), DI 9555 9556 // emitCopy 9557 CMPL DI, $0x00010000 9558 JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm 9559 9560four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: 9561 CMPL SI, $0x40 9562 JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm 9563 MOVB $0xff, (AX) 9564 MOVL DI, 1(AX) 9565 LEAL -64(SI), SI 9566 ADDQ $0x05, AX 9567 CMPL SI, $0x04 9568 JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm 9569 JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm 9570 9571four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: 9572 TESTL SI, SI 9573 JZ repeat_end_emit_encodeSnappyBlockAsm 9574 MOVB $0x03, BL 9575 LEAL -4(BX)(SI*4), SI 9576 MOVB SI, (AX) 9577 MOVL DI, 1(AX) 9578 ADDQ $0x05, AX 9579 JMP repeat_end_emit_encodeSnappyBlockAsm 9580 9581two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: 9582 CMPL SI, $0x40 9583 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm 9584 MOVB $0xee, (AX) 9585 MOVW DI, 1(AX) 9586 LEAL -60(SI), SI 9587 ADDQ $0x03, AX 9588 JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm 9589 9590two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: 9591 CMPL SI, $0x0c 9592 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm 9593 CMPL DI, $0x00000800 9594 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm 9595 MOVB $0x01, BL 9596 LEAL -16(BX)(SI*4), SI 9597 MOVB DI, 1(AX) 9598 SHRL $0x08, DI 9599 SHLL $0x05, DI 9600 ORL DI, SI 9601 MOVB SI, (AX) 9602 ADDQ $0x02, AX 9603 JMP repeat_end_emit_encodeSnappyBlockAsm 9604 9605emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: 9606 MOVB $0x02, BL 9607 LEAL -4(BX)(SI*4), SI 9608 MOVB SI, (AX) 9609 MOVW DI, 1(AX) 9610 ADDQ $0x03, AX 9611 9612repeat_end_emit_encodeSnappyBlockAsm: 9613 MOVL CX, 12(SP) 9614 JMP search_loop_encodeSnappyBlockAsm 9615 9616no_repeat_found_encodeSnappyBlockAsm: 9617 CMPL (DX)(SI*1), DI 9618 JEQ candidate_match_encodeSnappyBlockAsm 9619 SHRQ $0x08, DI 9620 MOVL 24(SP)(R10*4), SI 9621 LEAL 2(CX), R9 9622 CMPL (DX)(R8*1), DI 9623 JEQ candidate2_match_encodeSnappyBlockAsm 9624 MOVL R9, 24(SP)(R10*4) 9625 SHRQ $0x08, DI 9626 CMPL (DX)(SI*1), DI 9627 JEQ candidate3_match_encodeSnappyBlockAsm 9628 MOVL 20(SP), CX 9629 JMP search_loop_encodeSnappyBlockAsm 9630 9631candidate3_match_encodeSnappyBlockAsm: 9632 ADDL $0x02, CX 9633 JMP candidate_match_encodeSnappyBlockAsm 9634 9635candidate2_match_encodeSnappyBlockAsm: 9636 MOVL R9, 24(SP)(R10*4) 9637 INCL CX 9638 MOVL R8, SI 9639 9640candidate_match_encodeSnappyBlockAsm: 9641 MOVL 12(SP), DI 9642 TESTL SI, SI 9643 JZ match_extend_back_end_encodeSnappyBlockAsm 9644 9645match_extend_back_loop_encodeSnappyBlockAsm: 9646 CMPL CX, DI 9647 JLE match_extend_back_end_encodeSnappyBlockAsm 9648 MOVB -1(DX)(SI*1), BL 9649 MOVB -1(DX)(CX*1), R8 9650 CMPB BL, R8 9651 JNE match_extend_back_end_encodeSnappyBlockAsm 9652 LEAL -1(CX), CX 9653 DECL SI 9654 JZ match_extend_back_end_encodeSnappyBlockAsm 9655 JMP match_extend_back_loop_encodeSnappyBlockAsm 9656 9657match_extend_back_end_encodeSnappyBlockAsm: 9658 MOVL CX, DI 9659 SUBL 12(SP), DI 9660 LEAQ 5(AX)(DI*1), DI 9661 CMPQ DI, (SP) 9662 JL match_dst_size_check_encodeSnappyBlockAsm 9663 MOVQ $0x00000000, ret+48(FP) 9664 RET 9665 9666match_dst_size_check_encodeSnappyBlockAsm: 9667 MOVL CX, DI 9668 MOVL 12(SP), R8 9669 CMPL R8, DI 9670 JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm 9671 MOVL DI, R9 9672 MOVL DI, 12(SP) 9673 LEAQ (DX)(R8*1), DI 9674 SUBL R8, R9 9675 LEAL -1(R9), R8 9676 CMPL R8, $0x3c 9677 JLT one_byte_match_emit_encodeSnappyBlockAsm 9678 CMPL R8, $0x00000100 9679 JLT two_bytes_match_emit_encodeSnappyBlockAsm 9680 CMPL R8, $0x00010000 9681 JLT three_bytes_match_emit_encodeSnappyBlockAsm 9682 CMPL R8, $0x01000000 9683 JLT four_bytes_match_emit_encodeSnappyBlockAsm 9684 MOVB $0xfc, (AX) 9685 MOVL R8, 1(AX) 9686 ADDQ $0x05, AX 9687 JMP memmove_long_match_emit_encodeSnappyBlockAsm 9688 9689four_bytes_match_emit_encodeSnappyBlockAsm: 9690 MOVL R8, R10 9691 SHRL $0x10, R10 9692 MOVB $0xf8, (AX) 9693 MOVW R8, 1(AX) 9694 MOVB R10, 3(AX) 9695 ADDQ $0x04, AX 9696 JMP memmove_long_match_emit_encodeSnappyBlockAsm 9697 9698three_bytes_match_emit_encodeSnappyBlockAsm: 9699 MOVB $0xf4, (AX) 9700 MOVW R8, 1(AX) 9701 ADDQ $0x03, AX 9702 JMP memmove_long_match_emit_encodeSnappyBlockAsm 9703 9704two_bytes_match_emit_encodeSnappyBlockAsm: 9705 MOVB $0xf0, (AX) 9706 MOVB R8, 1(AX) 9707 ADDQ $0x02, AX 9708 CMPL R8, $0x40 9709 JL memmove_match_emit_encodeSnappyBlockAsm 9710 JMP memmove_long_match_emit_encodeSnappyBlockAsm 9711 9712one_byte_match_emit_encodeSnappyBlockAsm: 9713 SHLB $0x02, R8 9714 MOVB R8, (AX) 9715 ADDQ $0x01, AX 9716 9717memmove_match_emit_encodeSnappyBlockAsm: 9718 LEAQ (AX)(R9*1), R8 9719 9720 // genMemMoveShort 9721 CMPQ R9, $0x03 9722 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2 9723 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3 9724 CMPQ R9, $0x08 9725 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4through7 9726 CMPQ R9, $0x10 9727 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 9728 CMPQ R9, $0x20 9729 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 9730 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 9731 9732emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2: 9733 MOVB (DI), R10 9734 MOVB -1(DI)(R9*1), DI 9735 MOVB R10, (AX) 9736 MOVB DI, -1(AX)(R9*1) 9737 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm 9738 9739emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3: 9740 MOVW (DI), R10 9741 MOVB 2(DI), DI 9742 MOVW R10, (AX) 9743 MOVB DI, 2(AX) 9744 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm 9745 9746emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4through7: 9747 MOVL (DI), R10 9748 MOVL -4(DI)(R9*1), DI 9749 MOVL R10, (AX) 9750 MOVL DI, -4(AX)(R9*1) 9751 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm 9752 9753emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: 9754 MOVQ (DI), R10 9755 MOVQ -8(DI)(R9*1), DI 9756 MOVQ R10, (AX) 9757 MOVQ DI, -8(AX)(R9*1) 9758 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm 9759 9760emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: 9761 MOVOU (DI), X0 9762 MOVOU -16(DI)(R9*1), X1 9763 MOVOU X0, (AX) 9764 MOVOU X1, -16(AX)(R9*1) 9765 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm 9766 9767emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: 9768 MOVOU (DI), X0 9769 MOVOU 16(DI), X1 9770 MOVOU -32(DI)(R9*1), X2 9771 MOVOU -16(DI)(R9*1), X3 9772 MOVOU X0, (AX) 9773 MOVOU X1, 16(AX) 9774 MOVOU X2, -32(AX)(R9*1) 9775 MOVOU X3, -16(AX)(R9*1) 9776 9777memmove_end_copy_match_emit_encodeSnappyBlockAsm: 9778 MOVQ R8, AX 9779 JMP emit_literal_done_match_emit_encodeSnappyBlockAsm 9780 9781memmove_long_match_emit_encodeSnappyBlockAsm: 9782 LEAQ (AX)(R9*1), R8 9783 9784 // genMemMoveLong 9785 MOVOU (DI), X0 9786 MOVOU 16(DI), X1 9787 MOVOU -32(DI)(R9*1), X2 9788 MOVOU -16(DI)(R9*1), X3 9789 MOVQ R9, R11 9790 SHRQ $0x05, R11 9791 MOVQ AX, R10 9792 ANDL $0x0000001f, R10 9793 MOVQ $0x00000040, R12 9794 SUBQ R10, R12 9795 DECQ R11 9796 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9797 LEAQ -32(DI)(R12*1), R10 9798 LEAQ -32(AX)(R12*1), R13 9799 9800emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: 9801 MOVOU (R10), X4 9802 MOVOU 16(R10), X5 9803 MOVOA X4, (R13) 9804 MOVOA X5, 16(R13) 9805 ADDQ $0x20, R13 9806 ADDQ $0x20, R10 9807 ADDQ $0x20, R12 9808 DECQ R11 9809 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back 9810 9811emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: 9812 MOVOU -32(DI)(R12*1), X4 9813 MOVOU -16(DI)(R12*1), X5 9814 MOVOA X4, -32(AX)(R12*1) 9815 MOVOA X5, -16(AX)(R12*1) 9816 ADDQ $0x20, R12 9817 CMPQ R9, R12 9818 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 9819 MOVOU X0, (AX) 9820 MOVOU X1, 16(AX) 9821 MOVOU X2, -32(AX)(R9*1) 9822 MOVOU X3, -16(AX)(R9*1) 9823 MOVQ R8, AX 9824 9825emit_literal_done_match_emit_encodeSnappyBlockAsm: 9826match_nolit_loop_encodeSnappyBlockAsm: 9827 MOVL CX, DI 9828 SUBL SI, DI 9829 MOVL DI, 16(SP) 9830 ADDL $0x04, CX 9831 ADDL $0x04, SI 9832 MOVQ src_len+32(FP), DI 9833 SUBL CX, DI 9834 LEAQ (DX)(CX*1), R8 9835 LEAQ (DX)(SI*1), SI 9836 9837 // matchLen 9838 XORL R10, R10 9839 CMPL DI, $0x08 9840 JL matchlen_single_match_nolit_encodeSnappyBlockAsm 9841 9842matchlen_loopback_match_nolit_encodeSnappyBlockAsm: 9843 MOVQ (R8)(R10*1), R9 9844 XORQ (SI)(R10*1), R9 9845 TESTQ R9, R9 9846 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm 9847 BSFQ R9, R9 9848 SARQ $0x03, R9 9849 LEAL (R10)(R9*1), R10 9850 JMP match_nolit_end_encodeSnappyBlockAsm 9851 9852matchlen_loop_match_nolit_encodeSnappyBlockAsm: 9853 LEAL -8(DI), DI 9854 LEAL 8(R10), R10 9855 CMPL DI, $0x08 9856 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm 9857 9858matchlen_single_match_nolit_encodeSnappyBlockAsm: 9859 TESTL DI, DI 9860 JZ match_nolit_end_encodeSnappyBlockAsm 9861 9862matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm: 9863 MOVB (R8)(R10*1), R9 9864 CMPB (SI)(R10*1), R9 9865 JNE match_nolit_end_encodeSnappyBlockAsm 9866 LEAL 1(R10), R10 9867 DECL DI 9868 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm 9869 9870match_nolit_end_encodeSnappyBlockAsm: 9871 ADDL R10, CX 9872 MOVL 16(SP), SI 9873 ADDL $0x04, R10 9874 MOVL CX, 12(SP) 9875 9876 // emitCopy 9877 CMPL SI, $0x00010000 9878 JL two_byte_offset_match_nolit_encodeSnappyBlockAsm 9879 9880four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: 9881 CMPL R10, $0x40 9882 JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm 9883 MOVB $0xff, (AX) 9884 MOVL SI, 1(AX) 9885 LEAL -64(R10), R10 9886 ADDQ $0x05, AX 9887 CMPL R10, $0x04 9888 JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm 9889 JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm 9890 9891four_bytes_remain_match_nolit_encodeSnappyBlockAsm: 9892 TESTL R10, R10 9893 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm 9894 MOVB $0x03, BL 9895 LEAL -4(BX)(R10*4), R10 9896 MOVB R10, (AX) 9897 MOVL SI, 1(AX) 9898 ADDQ $0x05, AX 9899 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm 9900 9901two_byte_offset_match_nolit_encodeSnappyBlockAsm: 9902 CMPL R10, $0x40 9903 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm 9904 MOVB $0xee, (AX) 9905 MOVW SI, 1(AX) 9906 LEAL -60(R10), R10 9907 ADDQ $0x03, AX 9908 JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm 9909 9910two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: 9911 CMPL R10, $0x0c 9912 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm 9913 CMPL SI, $0x00000800 9914 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm 9915 MOVB $0x01, BL 9916 LEAL -16(BX)(R10*4), R10 9917 MOVB SI, 1(AX) 9918 SHRL $0x08, SI 9919 SHLL $0x05, SI 9920 ORL SI, R10 9921 MOVB R10, (AX) 9922 ADDQ $0x02, AX 9923 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm 9924 9925emit_copy_three_match_nolit_encodeSnappyBlockAsm: 9926 MOVB $0x02, BL 9927 LEAL -4(BX)(R10*4), R10 9928 MOVB R10, (AX) 9929 MOVW SI, 1(AX) 9930 ADDQ $0x03, AX 9931 9932match_nolit_emitcopy_end_encodeSnappyBlockAsm: 9933 CMPL CX, 8(SP) 9934 JGE emit_remainder_encodeSnappyBlockAsm 9935 MOVQ -2(DX)(CX*1), DI 9936 CMPQ AX, (SP) 9937 JL match_nolit_dst_ok_encodeSnappyBlockAsm 9938 MOVQ $0x00000000, ret+48(FP) 9939 RET 9940 9941match_nolit_dst_ok_encodeSnappyBlockAsm: 9942 MOVQ $0x0000cf1bbcdcbf9b, R9 9943 MOVQ DI, R8 9944 SHRQ $0x10, DI 9945 MOVQ DI, SI 9946 SHLQ $0x10, R8 9947 IMULQ R9, R8 9948 SHRQ $0x32, R8 9949 SHLQ $0x10, SI 9950 IMULQ R9, SI 9951 SHRQ $0x32, SI 9952 LEAL -2(CX), R9 9953 LEAQ 24(SP)(SI*4), R10 9954 MOVL (R10), SI 9955 MOVL R9, 24(SP)(R8*4) 9956 MOVL CX, (R10) 9957 CMPL (DX)(SI*1), DI 9958 JEQ match_nolit_loop_encodeSnappyBlockAsm 9959 INCL CX 9960 JMP search_loop_encodeSnappyBlockAsm 9961 9962emit_remainder_encodeSnappyBlockAsm: 9963 MOVQ src_len+32(FP), CX 9964 SUBL 12(SP), CX 9965 LEAQ 5(AX)(CX*1), CX 9966 CMPQ CX, (SP) 9967 JL emit_remainder_ok_encodeSnappyBlockAsm 9968 MOVQ $0x00000000, ret+48(FP) 9969 RET 9970 9971emit_remainder_ok_encodeSnappyBlockAsm: 9972 MOVQ src_len+32(FP), CX 9973 MOVL 12(SP), BX 9974 CMPL BX, CX 9975 JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm 9976 MOVL CX, SI 9977 MOVL CX, 12(SP) 9978 LEAQ (DX)(BX*1), CX 9979 SUBL BX, SI 9980 LEAL -1(SI), DX 9981 CMPL DX, $0x3c 9982 JLT one_byte_emit_remainder_encodeSnappyBlockAsm 9983 CMPL DX, $0x00000100 9984 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm 9985 CMPL DX, $0x00010000 9986 JLT three_bytes_emit_remainder_encodeSnappyBlockAsm 9987 CMPL DX, $0x01000000 9988 JLT four_bytes_emit_remainder_encodeSnappyBlockAsm 9989 MOVB $0xfc, (AX) 9990 MOVL DX, 1(AX) 9991 ADDQ $0x05, AX 9992 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm 9993 9994four_bytes_emit_remainder_encodeSnappyBlockAsm: 9995 MOVL DX, BX 9996 SHRL $0x10, BX 9997 MOVB $0xf8, (AX) 9998 MOVW DX, 1(AX) 9999 MOVB BL, 3(AX) 10000 ADDQ $0x04, AX 10001 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm 10002 10003three_bytes_emit_remainder_encodeSnappyBlockAsm: 10004 MOVB $0xf4, (AX) 10005 MOVW DX, 1(AX) 10006 ADDQ $0x03, AX 10007 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm 10008 10009two_bytes_emit_remainder_encodeSnappyBlockAsm: 10010 MOVB $0xf0, (AX) 10011 MOVB DL, 1(AX) 10012 ADDQ $0x02, AX 10013 CMPL DX, $0x40 10014 JL memmove_emit_remainder_encodeSnappyBlockAsm 10015 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm 10016 10017one_byte_emit_remainder_encodeSnappyBlockAsm: 10018 SHLB $0x02, DL 10019 MOVB DL, (AX) 10020 ADDQ $0x01, AX 10021 10022memmove_emit_remainder_encodeSnappyBlockAsm: 10023 LEAQ (AX)(SI*1), DX 10024 MOVL SI, BX 10025 10026 // genMemMoveShort 10027 CMPQ BX, $0x03 10028 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2 10029 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3 10030 CMPQ BX, $0x08 10031 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7 10032 CMPQ BX, $0x10 10033 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16 10034 CMPQ BX, $0x20 10035 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32 10036 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 10037 10038emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: 10039 MOVB (CX), SI 10040 MOVB -1(CX)(BX*1), CL 10041 MOVB SI, (AX) 10042 MOVB CL, -1(AX)(BX*1) 10043 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm 10044 10045emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: 10046 MOVW (CX), SI 10047 MOVB 2(CX), CL 10048 MOVW SI, (AX) 10049 MOVB CL, 2(AX) 10050 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm 10051 10052emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7: 10053 MOVL (CX), SI 10054 MOVL -4(CX)(BX*1), CX 10055 MOVL SI, (AX) 10056 MOVL CX, -4(AX)(BX*1) 10057 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm 10058 10059emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: 10060 MOVQ (CX), SI 10061 MOVQ -8(CX)(BX*1), CX 10062 MOVQ SI, (AX) 10063 MOVQ CX, -8(AX)(BX*1) 10064 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm 10065 10066emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: 10067 MOVOU (CX), X0 10068 MOVOU -16(CX)(BX*1), X1 10069 MOVOU X0, (AX) 10070 MOVOU X1, -16(AX)(BX*1) 10071 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm 10072 10073emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: 10074 MOVOU (CX), X0 10075 MOVOU 16(CX), X1 10076 MOVOU -32(CX)(BX*1), X2 10077 MOVOU -16(CX)(BX*1), X3 10078 MOVOU X0, (AX) 10079 MOVOU X1, 16(AX) 10080 MOVOU X2, -32(AX)(BX*1) 10081 MOVOU X3, -16(AX)(BX*1) 10082 10083memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: 10084 MOVQ DX, AX 10085 JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm 10086 10087memmove_long_emit_remainder_encodeSnappyBlockAsm: 10088 LEAQ (AX)(SI*1), DX 10089 MOVL SI, BX 10090 10091 // genMemMoveLong 10092 MOVOU (CX), X0 10093 MOVOU 16(CX), X1 10094 MOVOU -32(CX)(BX*1), X2 10095 MOVOU -16(CX)(BX*1), X3 10096 MOVQ BX, DI 10097 SHRQ $0x05, DI 10098 MOVQ AX, SI 10099 ANDL $0x0000001f, SI 10100 MOVQ $0x00000040, R8 10101 SUBQ SI, R8 10102 DECQ DI 10103 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 10104 LEAQ -32(CX)(R8*1), SI 10105 LEAQ -32(AX)(R8*1), R9 10106 10107emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: 10108 MOVOU (SI), X4 10109 MOVOU 16(SI), X5 10110 MOVOA X4, (R9) 10111 MOVOA X5, 16(R9) 10112 ADDQ $0x20, R9 10113 ADDQ $0x20, SI 10114 ADDQ $0x20, R8 10115 DECQ DI 10116 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back 10117 10118emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: 10119 MOVOU -32(CX)(R8*1), X4 10120 MOVOU -16(CX)(R8*1), X5 10121 MOVOA X4, -32(AX)(R8*1) 10122 MOVOA X5, -16(AX)(R8*1) 10123 ADDQ $0x20, R8 10124 CMPQ BX, R8 10125 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 10126 MOVOU X0, (AX) 10127 MOVOU X1, 16(AX) 10128 MOVOU X2, -32(AX)(BX*1) 10129 MOVOU X3, -16(AX)(BX*1) 10130 MOVQ DX, AX 10131 10132emit_literal_done_emit_remainder_encodeSnappyBlockAsm: 10133 MOVQ dst_base+0(FP), CX 10134 SUBQ CX, AX 10135 MOVQ AX, ret+48(FP) 10136 RET 10137 10138// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int 10139// Requires: SSE2 10140TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 10141 MOVQ dst_base+0(FP), AX 10142 MOVQ $0x00000080, CX 10143 LEAQ 24(SP), DX 10144 PXOR X0, X0 10145 10146zero_loop_encodeSnappyBlockAsm12B: 10147 MOVOU X0, (DX) 10148 MOVOU X0, 16(DX) 10149 MOVOU X0, 32(DX) 10150 MOVOU X0, 48(DX) 10151 MOVOU X0, 64(DX) 10152 MOVOU X0, 80(DX) 10153 MOVOU X0, 96(DX) 10154 MOVOU X0, 112(DX) 10155 ADDQ $0x80, DX 10156 DECQ CX 10157 JNZ zero_loop_encodeSnappyBlockAsm12B 10158 MOVL $0x00000000, 12(SP) 10159 MOVQ src_len+32(FP), CX 10160 LEAQ -5(CX), DX 10161 LEAQ -8(CX), SI 10162 MOVL SI, 8(SP) 10163 SHRQ $0x05, CX 10164 SUBL CX, DX 10165 LEAQ (AX)(DX*1), DX 10166 MOVQ DX, (SP) 10167 MOVL $0x00000001, CX 10168 MOVL CX, 16(SP) 10169 MOVQ src_base+24(FP), DX 10170 10171search_loop_encodeSnappyBlockAsm12B: 10172 MOVL CX, SI 10173 SUBL 12(SP), SI 10174 SHRL $0x05, SI 10175 LEAL 4(CX)(SI*1), SI 10176 CMPL SI, 8(SP) 10177 JGE emit_remainder_encodeSnappyBlockAsm12B 10178 MOVQ (DX)(CX*1), DI 10179 MOVL SI, 20(SP) 10180 MOVQ $0x000000cf1bbcdcbb, R9 10181 MOVQ DI, R10 10182 MOVQ DI, R11 10183 SHRQ $0x08, R11 10184 SHLQ $0x18, R10 10185 IMULQ R9, R10 10186 SHRQ $0x34, R10 10187 SHLQ $0x18, R11 10188 IMULQ R9, R11 10189 SHRQ $0x34, R11 10190 MOVL 24(SP)(R10*4), SI 10191 MOVL 24(SP)(R11*4), R8 10192 MOVL CX, 24(SP)(R10*4) 10193 LEAL 1(CX), R10 10194 MOVL R10, 24(SP)(R11*4) 10195 MOVQ DI, R10 10196 SHRQ $0x10, R10 10197 SHLQ $0x18, R10 10198 IMULQ R9, R10 10199 SHRQ $0x34, R10 10200 MOVL CX, R9 10201 SUBL 16(SP), R9 10202 MOVL 1(DX)(R9*1), R11 10203 MOVQ DI, R9 10204 SHRQ $0x08, R9 10205 CMPL R9, R11 10206 JNE no_repeat_found_encodeSnappyBlockAsm12B 10207 LEAL 1(CX), DI 10208 MOVL 12(SP), SI 10209 MOVL DI, R8 10210 SUBL 16(SP), R8 10211 JZ repeat_extend_back_end_encodeSnappyBlockAsm12B 10212 10213repeat_extend_back_loop_encodeSnappyBlockAsm12B: 10214 CMPL DI, SI 10215 JLE repeat_extend_back_end_encodeSnappyBlockAsm12B 10216 MOVB -1(DX)(R8*1), BL 10217 MOVB -1(DX)(DI*1), R9 10218 CMPB BL, R9 10219 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B 10220 LEAL -1(DI), DI 10221 DECL R8 10222 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B 10223 10224repeat_extend_back_end_encodeSnappyBlockAsm12B: 10225 MOVL 12(SP), SI 10226 CMPL SI, DI 10227 JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B 10228 MOVL DI, R8 10229 MOVL DI, 12(SP) 10230 LEAQ (DX)(SI*1), R9 10231 SUBL SI, R8 10232 LEAL -1(R8), SI 10233 CMPL SI, $0x3c 10234 JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B 10235 CMPL SI, $0x00000100 10236 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B 10237 MOVB $0xf4, (AX) 10238 MOVW SI, 1(AX) 10239 ADDQ $0x03, AX 10240 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B 10241 10242two_bytes_repeat_emit_encodeSnappyBlockAsm12B: 10243 MOVB $0xf0, (AX) 10244 MOVB SI, 1(AX) 10245 ADDQ $0x02, AX 10246 CMPL SI, $0x40 10247 JL memmove_repeat_emit_encodeSnappyBlockAsm12B 10248 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B 10249 10250one_byte_repeat_emit_encodeSnappyBlockAsm12B: 10251 SHLB $0x02, SI 10252 MOVB SI, (AX) 10253 ADDQ $0x01, AX 10254 10255memmove_repeat_emit_encodeSnappyBlockAsm12B: 10256 LEAQ (AX)(R8*1), SI 10257 10258 // genMemMoveShort 10259 CMPQ R8, $0x03 10260 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2 10261 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3 10262 CMPQ R8, $0x08 10263 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4through7 10264 CMPQ R8, $0x10 10265 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 10266 CMPQ R8, $0x20 10267 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 10268 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 10269 10270emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2: 10271 MOVB (R9), R10 10272 MOVB -1(R9)(R8*1), R9 10273 MOVB R10, (AX) 10274 MOVB R9, -1(AX)(R8*1) 10275 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B 10276 10277emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3: 10278 MOVW (R9), R10 10279 MOVB 2(R9), R9 10280 MOVW R10, (AX) 10281 MOVB R9, 2(AX) 10282 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B 10283 10284emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4through7: 10285 MOVL (R9), R10 10286 MOVL -4(R9)(R8*1), R9 10287 MOVL R10, (AX) 10288 MOVL R9, -4(AX)(R8*1) 10289 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B 10290 10291emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: 10292 MOVQ (R9), R10 10293 MOVQ -8(R9)(R8*1), R9 10294 MOVQ R10, (AX) 10295 MOVQ R9, -8(AX)(R8*1) 10296 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B 10297 10298emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: 10299 MOVOU (R9), X0 10300 MOVOU -16(R9)(R8*1), X1 10301 MOVOU X0, (AX) 10302 MOVOU X1, -16(AX)(R8*1) 10303 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B 10304 10305emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: 10306 MOVOU (R9), X0 10307 MOVOU 16(R9), X1 10308 MOVOU -32(R9)(R8*1), X2 10309 MOVOU -16(R9)(R8*1), X3 10310 MOVOU X0, (AX) 10311 MOVOU X1, 16(AX) 10312 MOVOU X2, -32(AX)(R8*1) 10313 MOVOU X3, -16(AX)(R8*1) 10314 10315memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: 10316 MOVQ SI, AX 10317 JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B 10318 10319memmove_long_repeat_emit_encodeSnappyBlockAsm12B: 10320 LEAQ (AX)(R8*1), SI 10321 10322 // genMemMoveLong 10323 MOVOU (R9), X0 10324 MOVOU 16(R9), X1 10325 MOVOU -32(R9)(R8*1), X2 10326 MOVOU -16(R9)(R8*1), X3 10327 MOVQ R8, R11 10328 SHRQ $0x05, R11 10329 MOVQ AX, R10 10330 ANDL $0x0000001f, R10 10331 MOVQ $0x00000040, R12 10332 SUBQ R10, R12 10333 DECQ R11 10334 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10335 LEAQ -32(R9)(R12*1), R10 10336 LEAQ -32(AX)(R12*1), R13 10337 10338emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: 10339 MOVOU (R10), X4 10340 MOVOU 16(R10), X5 10341 MOVOA X4, (R13) 10342 MOVOA X5, 16(R13) 10343 ADDQ $0x20, R13 10344 ADDQ $0x20, R10 10345 ADDQ $0x20, R12 10346 DECQ R11 10347 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back 10348 10349emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: 10350 MOVOU -32(R9)(R12*1), X4 10351 MOVOU -16(R9)(R12*1), X5 10352 MOVOA X4, -32(AX)(R12*1) 10353 MOVOA X5, -16(AX)(R12*1) 10354 ADDQ $0x20, R12 10355 CMPQ R8, R12 10356 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10357 MOVOU X0, (AX) 10358 MOVOU X1, 16(AX) 10359 MOVOU X2, -32(AX)(R8*1) 10360 MOVOU X3, -16(AX)(R8*1) 10361 MOVQ SI, AX 10362 10363emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: 10364 ADDL $0x05, CX 10365 MOVL CX, SI 10366 SUBL 16(SP), SI 10367 MOVQ src_len+32(FP), R8 10368 SUBL CX, R8 10369 LEAQ (DX)(CX*1), R9 10370 LEAQ (DX)(SI*1), SI 10371 10372 // matchLen 10373 XORL R11, R11 10374 CMPL R8, $0x08 10375 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm12B 10376 10377matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: 10378 MOVQ (R9)(R11*1), R10 10379 XORQ (SI)(R11*1), R10 10380 TESTQ R10, R10 10381 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B 10382 BSFQ R10, R10 10383 SARQ $0x03, R10 10384 LEAL (R11)(R10*1), R11 10385 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B 10386 10387matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: 10388 LEAL -8(R8), R8 10389 LEAL 8(R11), R11 10390 CMPL R8, $0x08 10391 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B 10392 10393matchlen_single_repeat_extend_encodeSnappyBlockAsm12B: 10394 TESTL R8, R8 10395 JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B 10396 10397matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B: 10398 MOVB (R9)(R11*1), R10 10399 CMPB (SI)(R11*1), R10 10400 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B 10401 LEAL 1(R11), R11 10402 DECL R8 10403 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B 10404 10405repeat_extend_forward_end_encodeSnappyBlockAsm12B: 10406 ADDL R11, CX 10407 MOVL CX, SI 10408 SUBL DI, SI 10409 MOVL 16(SP), DI 10410 10411 // emitCopy 10412two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: 10413 CMPL SI, $0x40 10414 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B 10415 MOVB $0xee, (AX) 10416 MOVW DI, 1(AX) 10417 LEAL -60(SI), SI 10418 ADDQ $0x03, AX 10419 JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B 10420 10421two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: 10422 CMPL SI, $0x0c 10423 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B 10424 CMPL DI, $0x00000800 10425 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B 10426 MOVB $0x01, BL 10427 LEAL -16(BX)(SI*4), SI 10428 MOVB DI, 1(AX) 10429 SHRL $0x08, DI 10430 SHLL $0x05, DI 10431 ORL DI, SI 10432 MOVB SI, (AX) 10433 ADDQ $0x02, AX 10434 JMP repeat_end_emit_encodeSnappyBlockAsm12B 10435 10436emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: 10437 MOVB $0x02, BL 10438 LEAL -4(BX)(SI*4), SI 10439 MOVB SI, (AX) 10440 MOVW DI, 1(AX) 10441 ADDQ $0x03, AX 10442 10443repeat_end_emit_encodeSnappyBlockAsm12B: 10444 MOVL CX, 12(SP) 10445 JMP search_loop_encodeSnappyBlockAsm12B 10446 10447no_repeat_found_encodeSnappyBlockAsm12B: 10448 CMPL (DX)(SI*1), DI 10449 JEQ candidate_match_encodeSnappyBlockAsm12B 10450 SHRQ $0x08, DI 10451 MOVL 24(SP)(R10*4), SI 10452 LEAL 2(CX), R9 10453 CMPL (DX)(R8*1), DI 10454 JEQ candidate2_match_encodeSnappyBlockAsm12B 10455 MOVL R9, 24(SP)(R10*4) 10456 SHRQ $0x08, DI 10457 CMPL (DX)(SI*1), DI 10458 JEQ candidate3_match_encodeSnappyBlockAsm12B 10459 MOVL 20(SP), CX 10460 JMP search_loop_encodeSnappyBlockAsm12B 10461 10462candidate3_match_encodeSnappyBlockAsm12B: 10463 ADDL $0x02, CX 10464 JMP candidate_match_encodeSnappyBlockAsm12B 10465 10466candidate2_match_encodeSnappyBlockAsm12B: 10467 MOVL R9, 24(SP)(R10*4) 10468 INCL CX 10469 MOVL R8, SI 10470 10471candidate_match_encodeSnappyBlockAsm12B: 10472 MOVL 12(SP), DI 10473 TESTL SI, SI 10474 JZ match_extend_back_end_encodeSnappyBlockAsm12B 10475 10476match_extend_back_loop_encodeSnappyBlockAsm12B: 10477 CMPL CX, DI 10478 JLE match_extend_back_end_encodeSnappyBlockAsm12B 10479 MOVB -1(DX)(SI*1), BL 10480 MOVB -1(DX)(CX*1), R8 10481 CMPB BL, R8 10482 JNE match_extend_back_end_encodeSnappyBlockAsm12B 10483 LEAL -1(CX), CX 10484 DECL SI 10485 JZ match_extend_back_end_encodeSnappyBlockAsm12B 10486 JMP match_extend_back_loop_encodeSnappyBlockAsm12B 10487 10488match_extend_back_end_encodeSnappyBlockAsm12B: 10489 MOVL CX, DI 10490 SUBL 12(SP), DI 10491 LEAQ 3(AX)(DI*1), DI 10492 CMPQ DI, (SP) 10493 JL match_dst_size_check_encodeSnappyBlockAsm12B 10494 MOVQ $0x00000000, ret+48(FP) 10495 RET 10496 10497match_dst_size_check_encodeSnappyBlockAsm12B: 10498 MOVL CX, DI 10499 MOVL 12(SP), R8 10500 CMPL R8, DI 10501 JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B 10502 MOVL DI, R9 10503 MOVL DI, 12(SP) 10504 LEAQ (DX)(R8*1), DI 10505 SUBL R8, R9 10506 LEAL -1(R9), R8 10507 CMPL R8, $0x3c 10508 JLT one_byte_match_emit_encodeSnappyBlockAsm12B 10509 CMPL R8, $0x00000100 10510 JLT two_bytes_match_emit_encodeSnappyBlockAsm12B 10511 MOVB $0xf4, (AX) 10512 MOVW R8, 1(AX) 10513 ADDQ $0x03, AX 10514 JMP memmove_long_match_emit_encodeSnappyBlockAsm12B 10515 10516two_bytes_match_emit_encodeSnappyBlockAsm12B: 10517 MOVB $0xf0, (AX) 10518 MOVB R8, 1(AX) 10519 ADDQ $0x02, AX 10520 CMPL R8, $0x40 10521 JL memmove_match_emit_encodeSnappyBlockAsm12B 10522 JMP memmove_long_match_emit_encodeSnappyBlockAsm12B 10523 10524one_byte_match_emit_encodeSnappyBlockAsm12B: 10525 SHLB $0x02, R8 10526 MOVB R8, (AX) 10527 ADDQ $0x01, AX 10528 10529memmove_match_emit_encodeSnappyBlockAsm12B: 10530 LEAQ (AX)(R9*1), R8 10531 10532 // genMemMoveShort 10533 CMPQ R9, $0x03 10534 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2 10535 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3 10536 CMPQ R9, $0x08 10537 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4through7 10538 CMPQ R9, $0x10 10539 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 10540 CMPQ R9, $0x20 10541 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 10542 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 10543 10544emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2: 10545 MOVB (DI), R10 10546 MOVB -1(DI)(R9*1), DI 10547 MOVB R10, (AX) 10548 MOVB DI, -1(AX)(R9*1) 10549 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B 10550 10551emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3: 10552 MOVW (DI), R10 10553 MOVB 2(DI), DI 10554 MOVW R10, (AX) 10555 MOVB DI, 2(AX) 10556 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B 10557 10558emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4through7: 10559 MOVL (DI), R10 10560 MOVL -4(DI)(R9*1), DI 10561 MOVL R10, (AX) 10562 MOVL DI, -4(AX)(R9*1) 10563 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B 10564 10565emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: 10566 MOVQ (DI), R10 10567 MOVQ -8(DI)(R9*1), DI 10568 MOVQ R10, (AX) 10569 MOVQ DI, -8(AX)(R9*1) 10570 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B 10571 10572emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: 10573 MOVOU (DI), X0 10574 MOVOU -16(DI)(R9*1), X1 10575 MOVOU X0, (AX) 10576 MOVOU X1, -16(AX)(R9*1) 10577 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B 10578 10579emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: 10580 MOVOU (DI), X0 10581 MOVOU 16(DI), X1 10582 MOVOU -32(DI)(R9*1), X2 10583 MOVOU -16(DI)(R9*1), X3 10584 MOVOU X0, (AX) 10585 MOVOU X1, 16(AX) 10586 MOVOU X2, -32(AX)(R9*1) 10587 MOVOU X3, -16(AX)(R9*1) 10588 10589memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: 10590 MOVQ R8, AX 10591 JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B 10592 10593memmove_long_match_emit_encodeSnappyBlockAsm12B: 10594 LEAQ (AX)(R9*1), R8 10595 10596 // genMemMoveLong 10597 MOVOU (DI), X0 10598 MOVOU 16(DI), X1 10599 MOVOU -32(DI)(R9*1), X2 10600 MOVOU -16(DI)(R9*1), X3 10601 MOVQ R9, R11 10602 SHRQ $0x05, R11 10603 MOVQ AX, R10 10604 ANDL $0x0000001f, R10 10605 MOVQ $0x00000040, R12 10606 SUBQ R10, R12 10607 DECQ R11 10608 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10609 LEAQ -32(DI)(R12*1), R10 10610 LEAQ -32(AX)(R12*1), R13 10611 10612emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: 10613 MOVOU (R10), X4 10614 MOVOU 16(R10), X5 10615 MOVOA X4, (R13) 10616 MOVOA X5, 16(R13) 10617 ADDQ $0x20, R13 10618 ADDQ $0x20, R10 10619 ADDQ $0x20, R12 10620 DECQ R11 10621 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back 10622 10623emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: 10624 MOVOU -32(DI)(R12*1), X4 10625 MOVOU -16(DI)(R12*1), X5 10626 MOVOA X4, -32(AX)(R12*1) 10627 MOVOA X5, -16(AX)(R12*1) 10628 ADDQ $0x20, R12 10629 CMPQ R9, R12 10630 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10631 MOVOU X0, (AX) 10632 MOVOU X1, 16(AX) 10633 MOVOU X2, -32(AX)(R9*1) 10634 MOVOU X3, -16(AX)(R9*1) 10635 MOVQ R8, AX 10636 10637emit_literal_done_match_emit_encodeSnappyBlockAsm12B: 10638match_nolit_loop_encodeSnappyBlockAsm12B: 10639 MOVL CX, DI 10640 SUBL SI, DI 10641 MOVL DI, 16(SP) 10642 ADDL $0x04, CX 10643 ADDL $0x04, SI 10644 MOVQ src_len+32(FP), DI 10645 SUBL CX, DI 10646 LEAQ (DX)(CX*1), R8 10647 LEAQ (DX)(SI*1), SI 10648 10649 // matchLen 10650 XORL R10, R10 10651 CMPL DI, $0x08 10652 JL matchlen_single_match_nolit_encodeSnappyBlockAsm12B 10653 10654matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: 10655 MOVQ (R8)(R10*1), R9 10656 XORQ (SI)(R10*1), R9 10657 TESTQ R9, R9 10658 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B 10659 BSFQ R9, R9 10660 SARQ $0x03, R9 10661 LEAL (R10)(R9*1), R10 10662 JMP match_nolit_end_encodeSnappyBlockAsm12B 10663 10664matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: 10665 LEAL -8(DI), DI 10666 LEAL 8(R10), R10 10667 CMPL DI, $0x08 10668 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B 10669 10670matchlen_single_match_nolit_encodeSnappyBlockAsm12B: 10671 TESTL DI, DI 10672 JZ match_nolit_end_encodeSnappyBlockAsm12B 10673 10674matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B: 10675 MOVB (R8)(R10*1), R9 10676 CMPB (SI)(R10*1), R9 10677 JNE match_nolit_end_encodeSnappyBlockAsm12B 10678 LEAL 1(R10), R10 10679 DECL DI 10680 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B 10681 10682match_nolit_end_encodeSnappyBlockAsm12B: 10683 ADDL R10, CX 10684 MOVL 16(SP), SI 10685 ADDL $0x04, R10 10686 MOVL CX, 12(SP) 10687 10688 // emitCopy 10689two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: 10690 CMPL R10, $0x40 10691 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B 10692 MOVB $0xee, (AX) 10693 MOVW SI, 1(AX) 10694 LEAL -60(R10), R10 10695 ADDQ $0x03, AX 10696 JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B 10697 10698two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: 10699 CMPL R10, $0x0c 10700 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B 10701 CMPL SI, $0x00000800 10702 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B 10703 MOVB $0x01, BL 10704 LEAL -16(BX)(R10*4), R10 10705 MOVB SI, 1(AX) 10706 SHRL $0x08, SI 10707 SHLL $0x05, SI 10708 ORL SI, R10 10709 MOVB R10, (AX) 10710 ADDQ $0x02, AX 10711 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B 10712 10713emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: 10714 MOVB $0x02, BL 10715 LEAL -4(BX)(R10*4), R10 10716 MOVB R10, (AX) 10717 MOVW SI, 1(AX) 10718 ADDQ $0x03, AX 10719 10720match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: 10721 CMPL CX, 8(SP) 10722 JGE emit_remainder_encodeSnappyBlockAsm12B 10723 MOVQ -2(DX)(CX*1), DI 10724 CMPQ AX, (SP) 10725 JL match_nolit_dst_ok_encodeSnappyBlockAsm12B 10726 MOVQ $0x00000000, ret+48(FP) 10727 RET 10728 10729match_nolit_dst_ok_encodeSnappyBlockAsm12B: 10730 MOVQ $0x000000cf1bbcdcbb, R9 10731 MOVQ DI, R8 10732 SHRQ $0x10, DI 10733 MOVQ DI, SI 10734 SHLQ $0x18, R8 10735 IMULQ R9, R8 10736 SHRQ $0x34, R8 10737 SHLQ $0x18, SI 10738 IMULQ R9, SI 10739 SHRQ $0x34, SI 10740 LEAL -2(CX), R9 10741 LEAQ 24(SP)(SI*4), R10 10742 MOVL (R10), SI 10743 MOVL R9, 24(SP)(R8*4) 10744 MOVL CX, (R10) 10745 CMPL (DX)(SI*1), DI 10746 JEQ match_nolit_loop_encodeSnappyBlockAsm12B 10747 INCL CX 10748 JMP search_loop_encodeSnappyBlockAsm12B 10749 10750emit_remainder_encodeSnappyBlockAsm12B: 10751 MOVQ src_len+32(FP), CX 10752 SUBL 12(SP), CX 10753 LEAQ 3(AX)(CX*1), CX 10754 CMPQ CX, (SP) 10755 JL emit_remainder_ok_encodeSnappyBlockAsm12B 10756 MOVQ $0x00000000, ret+48(FP) 10757 RET 10758 10759emit_remainder_ok_encodeSnappyBlockAsm12B: 10760 MOVQ src_len+32(FP), CX 10761 MOVL 12(SP), BX 10762 CMPL BX, CX 10763 JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B 10764 MOVL CX, SI 10765 MOVL CX, 12(SP) 10766 LEAQ (DX)(BX*1), CX 10767 SUBL BX, SI 10768 LEAL -1(SI), DX 10769 CMPL DX, $0x3c 10770 JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B 10771 CMPL DX, $0x00000100 10772 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B 10773 MOVB $0xf4, (AX) 10774 MOVW DX, 1(AX) 10775 ADDQ $0x03, AX 10776 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B 10777 10778two_bytes_emit_remainder_encodeSnappyBlockAsm12B: 10779 MOVB $0xf0, (AX) 10780 MOVB DL, 1(AX) 10781 ADDQ $0x02, AX 10782 CMPL DX, $0x40 10783 JL memmove_emit_remainder_encodeSnappyBlockAsm12B 10784 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B 10785 10786one_byte_emit_remainder_encodeSnappyBlockAsm12B: 10787 SHLB $0x02, DL 10788 MOVB DL, (AX) 10789 ADDQ $0x01, AX 10790 10791memmove_emit_remainder_encodeSnappyBlockAsm12B: 10792 LEAQ (AX)(SI*1), DX 10793 MOVL SI, BX 10794 10795 // genMemMoveShort 10796 CMPQ BX, $0x03 10797 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2 10798 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3 10799 CMPQ BX, $0x08 10800 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7 10801 CMPQ BX, $0x10 10802 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16 10803 CMPQ BX, $0x20 10804 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32 10805 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 10806 10807emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: 10808 MOVB (CX), SI 10809 MOVB -1(CX)(BX*1), CL 10810 MOVB SI, (AX) 10811 MOVB CL, -1(AX)(BX*1) 10812 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B 10813 10814emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: 10815 MOVW (CX), SI 10816 MOVB 2(CX), CL 10817 MOVW SI, (AX) 10818 MOVB CL, 2(AX) 10819 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B 10820 10821emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7: 10822 MOVL (CX), SI 10823 MOVL -4(CX)(BX*1), CX 10824 MOVL SI, (AX) 10825 MOVL CX, -4(AX)(BX*1) 10826 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B 10827 10828emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: 10829 MOVQ (CX), SI 10830 MOVQ -8(CX)(BX*1), CX 10831 MOVQ SI, (AX) 10832 MOVQ CX, -8(AX)(BX*1) 10833 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B 10834 10835emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: 10836 MOVOU (CX), X0 10837 MOVOU -16(CX)(BX*1), X1 10838 MOVOU X0, (AX) 10839 MOVOU X1, -16(AX)(BX*1) 10840 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B 10841 10842emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: 10843 MOVOU (CX), X0 10844 MOVOU 16(CX), X1 10845 MOVOU -32(CX)(BX*1), X2 10846 MOVOU -16(CX)(BX*1), X3 10847 MOVOU X0, (AX) 10848 MOVOU X1, 16(AX) 10849 MOVOU X2, -32(AX)(BX*1) 10850 MOVOU X3, -16(AX)(BX*1) 10851 10852memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: 10853 MOVQ DX, AX 10854 JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B 10855 10856memmove_long_emit_remainder_encodeSnappyBlockAsm12B: 10857 LEAQ (AX)(SI*1), DX 10858 MOVL SI, BX 10859 10860 // genMemMoveLong 10861 MOVOU (CX), X0 10862 MOVOU 16(CX), X1 10863 MOVOU -32(CX)(BX*1), X2 10864 MOVOU -16(CX)(BX*1), X3 10865 MOVQ BX, DI 10866 SHRQ $0x05, DI 10867 MOVQ AX, SI 10868 ANDL $0x0000001f, SI 10869 MOVQ $0x00000040, R8 10870 SUBQ SI, R8 10871 DECQ DI 10872 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10873 LEAQ -32(CX)(R8*1), SI 10874 LEAQ -32(AX)(R8*1), R9 10875 10876emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: 10877 MOVOU (SI), X4 10878 MOVOU 16(SI), X5 10879 MOVOA X4, (R9) 10880 MOVOA X5, 16(R9) 10881 ADDQ $0x20, R9 10882 ADDQ $0x20, SI 10883 ADDQ $0x20, R8 10884 DECQ DI 10885 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back 10886 10887emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: 10888 MOVOU -32(CX)(R8*1), X4 10889 MOVOU -16(CX)(R8*1), X5 10890 MOVOA X4, -32(AX)(R8*1) 10891 MOVOA X5, -16(AX)(R8*1) 10892 ADDQ $0x20, R8 10893 CMPQ BX, R8 10894 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 10895 MOVOU X0, (AX) 10896 MOVOU X1, 16(AX) 10897 MOVOU X2, -32(AX)(BX*1) 10898 MOVOU X3, -16(AX)(BX*1) 10899 MOVQ DX, AX 10900 10901emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: 10902 MOVQ dst_base+0(FP), CX 10903 SUBQ CX, AX 10904 MOVQ AX, ret+48(FP) 10905 RET 10906 10907// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int 10908// Requires: SSE2 10909TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 10910 MOVQ dst_base+0(FP), AX 10911 MOVQ $0x00000020, CX 10912 LEAQ 24(SP), DX 10913 PXOR X0, X0 10914 10915zero_loop_encodeSnappyBlockAsm10B: 10916 MOVOU X0, (DX) 10917 MOVOU X0, 16(DX) 10918 MOVOU X0, 32(DX) 10919 MOVOU X0, 48(DX) 10920 MOVOU X0, 64(DX) 10921 MOVOU X0, 80(DX) 10922 MOVOU X0, 96(DX) 10923 MOVOU X0, 112(DX) 10924 ADDQ $0x80, DX 10925 DECQ CX 10926 JNZ zero_loop_encodeSnappyBlockAsm10B 10927 MOVL $0x00000000, 12(SP) 10928 MOVQ src_len+32(FP), CX 10929 LEAQ -5(CX), DX 10930 LEAQ -8(CX), SI 10931 MOVL SI, 8(SP) 10932 SHRQ $0x05, CX 10933 SUBL CX, DX 10934 LEAQ (AX)(DX*1), DX 10935 MOVQ DX, (SP) 10936 MOVL $0x00000001, CX 10937 MOVL CX, 16(SP) 10938 MOVQ src_base+24(FP), DX 10939 10940search_loop_encodeSnappyBlockAsm10B: 10941 MOVL CX, SI 10942 SUBL 12(SP), SI 10943 SHRL $0x05, SI 10944 LEAL 4(CX)(SI*1), SI 10945 CMPL SI, 8(SP) 10946 JGE emit_remainder_encodeSnappyBlockAsm10B 10947 MOVQ (DX)(CX*1), DI 10948 MOVL SI, 20(SP) 10949 MOVQ $0x9e3779b1, R9 10950 MOVQ DI, R10 10951 MOVQ DI, R11 10952 SHRQ $0x08, R11 10953 SHLQ $0x20, R10 10954 IMULQ R9, R10 10955 SHRQ $0x36, R10 10956 SHLQ $0x20, R11 10957 IMULQ R9, R11 10958 SHRQ $0x36, R11 10959 MOVL 24(SP)(R10*4), SI 10960 MOVL 24(SP)(R11*4), R8 10961 MOVL CX, 24(SP)(R10*4) 10962 LEAL 1(CX), R10 10963 MOVL R10, 24(SP)(R11*4) 10964 MOVQ DI, R10 10965 SHRQ $0x10, R10 10966 SHLQ $0x20, R10 10967 IMULQ R9, R10 10968 SHRQ $0x36, R10 10969 MOVL CX, R9 10970 SUBL 16(SP), R9 10971 MOVL 1(DX)(R9*1), R11 10972 MOVQ DI, R9 10973 SHRQ $0x08, R9 10974 CMPL R9, R11 10975 JNE no_repeat_found_encodeSnappyBlockAsm10B 10976 LEAL 1(CX), DI 10977 MOVL 12(SP), SI 10978 MOVL DI, R8 10979 SUBL 16(SP), R8 10980 JZ repeat_extend_back_end_encodeSnappyBlockAsm10B 10981 10982repeat_extend_back_loop_encodeSnappyBlockAsm10B: 10983 CMPL DI, SI 10984 JLE repeat_extend_back_end_encodeSnappyBlockAsm10B 10985 MOVB -1(DX)(R8*1), BL 10986 MOVB -1(DX)(DI*1), R9 10987 CMPB BL, R9 10988 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B 10989 LEAL -1(DI), DI 10990 DECL R8 10991 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B 10992 10993repeat_extend_back_end_encodeSnappyBlockAsm10B: 10994 MOVL 12(SP), SI 10995 CMPL SI, DI 10996 JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B 10997 MOVL DI, R8 10998 MOVL DI, 12(SP) 10999 LEAQ (DX)(SI*1), R9 11000 SUBL SI, R8 11001 LEAL -1(R8), SI 11002 CMPL SI, $0x3c 11003 JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B 11004 CMPL SI, $0x00000100 11005 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B 11006 MOVB $0xf4, (AX) 11007 MOVW SI, 1(AX) 11008 ADDQ $0x03, AX 11009 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B 11010 11011two_bytes_repeat_emit_encodeSnappyBlockAsm10B: 11012 MOVB $0xf0, (AX) 11013 MOVB SI, 1(AX) 11014 ADDQ $0x02, AX 11015 CMPL SI, $0x40 11016 JL memmove_repeat_emit_encodeSnappyBlockAsm10B 11017 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B 11018 11019one_byte_repeat_emit_encodeSnappyBlockAsm10B: 11020 SHLB $0x02, SI 11021 MOVB SI, (AX) 11022 ADDQ $0x01, AX 11023 11024memmove_repeat_emit_encodeSnappyBlockAsm10B: 11025 LEAQ (AX)(R8*1), SI 11026 11027 // genMemMoveShort 11028 CMPQ R8, $0x03 11029 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2 11030 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3 11031 CMPQ R8, $0x08 11032 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4through7 11033 CMPQ R8, $0x10 11034 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 11035 CMPQ R8, $0x20 11036 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 11037 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 11038 11039emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2: 11040 MOVB (R9), R10 11041 MOVB -1(R9)(R8*1), R9 11042 MOVB R10, (AX) 11043 MOVB R9, -1(AX)(R8*1) 11044 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B 11045 11046emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3: 11047 MOVW (R9), R10 11048 MOVB 2(R9), R9 11049 MOVW R10, (AX) 11050 MOVB R9, 2(AX) 11051 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B 11052 11053emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4through7: 11054 MOVL (R9), R10 11055 MOVL -4(R9)(R8*1), R9 11056 MOVL R10, (AX) 11057 MOVL R9, -4(AX)(R8*1) 11058 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B 11059 11060emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: 11061 MOVQ (R9), R10 11062 MOVQ -8(R9)(R8*1), R9 11063 MOVQ R10, (AX) 11064 MOVQ R9, -8(AX)(R8*1) 11065 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B 11066 11067emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: 11068 MOVOU (R9), X0 11069 MOVOU -16(R9)(R8*1), X1 11070 MOVOU X0, (AX) 11071 MOVOU X1, -16(AX)(R8*1) 11072 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B 11073 11074emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: 11075 MOVOU (R9), X0 11076 MOVOU 16(R9), X1 11077 MOVOU -32(R9)(R8*1), X2 11078 MOVOU -16(R9)(R8*1), X3 11079 MOVOU X0, (AX) 11080 MOVOU X1, 16(AX) 11081 MOVOU X2, -32(AX)(R8*1) 11082 MOVOU X3, -16(AX)(R8*1) 11083 11084memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: 11085 MOVQ SI, AX 11086 JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B 11087 11088memmove_long_repeat_emit_encodeSnappyBlockAsm10B: 11089 LEAQ (AX)(R8*1), SI 11090 11091 // genMemMoveLong 11092 MOVOU (R9), X0 11093 MOVOU 16(R9), X1 11094 MOVOU -32(R9)(R8*1), X2 11095 MOVOU -16(R9)(R8*1), X3 11096 MOVQ R8, R11 11097 SHRQ $0x05, R11 11098 MOVQ AX, R10 11099 ANDL $0x0000001f, R10 11100 MOVQ $0x00000040, R12 11101 SUBQ R10, R12 11102 DECQ R11 11103 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11104 LEAQ -32(R9)(R12*1), R10 11105 LEAQ -32(AX)(R12*1), R13 11106 11107emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: 11108 MOVOU (R10), X4 11109 MOVOU 16(R10), X5 11110 MOVOA X4, (R13) 11111 MOVOA X5, 16(R13) 11112 ADDQ $0x20, R13 11113 ADDQ $0x20, R10 11114 ADDQ $0x20, R12 11115 DECQ R11 11116 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back 11117 11118emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: 11119 MOVOU -32(R9)(R12*1), X4 11120 MOVOU -16(R9)(R12*1), X5 11121 MOVOA X4, -32(AX)(R12*1) 11122 MOVOA X5, -16(AX)(R12*1) 11123 ADDQ $0x20, R12 11124 CMPQ R8, R12 11125 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11126 MOVOU X0, (AX) 11127 MOVOU X1, 16(AX) 11128 MOVOU X2, -32(AX)(R8*1) 11129 MOVOU X3, -16(AX)(R8*1) 11130 MOVQ SI, AX 11131 11132emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: 11133 ADDL $0x05, CX 11134 MOVL CX, SI 11135 SUBL 16(SP), SI 11136 MOVQ src_len+32(FP), R8 11137 SUBL CX, R8 11138 LEAQ (DX)(CX*1), R9 11139 LEAQ (DX)(SI*1), SI 11140 11141 // matchLen 11142 XORL R11, R11 11143 CMPL R8, $0x08 11144 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm10B 11145 11146matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: 11147 MOVQ (R9)(R11*1), R10 11148 XORQ (SI)(R11*1), R10 11149 TESTQ R10, R10 11150 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B 11151 BSFQ R10, R10 11152 SARQ $0x03, R10 11153 LEAL (R11)(R10*1), R11 11154 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B 11155 11156matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: 11157 LEAL -8(R8), R8 11158 LEAL 8(R11), R11 11159 CMPL R8, $0x08 11160 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B 11161 11162matchlen_single_repeat_extend_encodeSnappyBlockAsm10B: 11163 TESTL R8, R8 11164 JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B 11165 11166matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B: 11167 MOVB (R9)(R11*1), R10 11168 CMPB (SI)(R11*1), R10 11169 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B 11170 LEAL 1(R11), R11 11171 DECL R8 11172 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B 11173 11174repeat_extend_forward_end_encodeSnappyBlockAsm10B: 11175 ADDL R11, CX 11176 MOVL CX, SI 11177 SUBL DI, SI 11178 MOVL 16(SP), DI 11179 11180 // emitCopy 11181two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: 11182 CMPL SI, $0x40 11183 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B 11184 MOVB $0xee, (AX) 11185 MOVW DI, 1(AX) 11186 LEAL -60(SI), SI 11187 ADDQ $0x03, AX 11188 JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B 11189 11190two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: 11191 CMPL SI, $0x0c 11192 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B 11193 CMPL DI, $0x00000800 11194 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B 11195 MOVB $0x01, BL 11196 LEAL -16(BX)(SI*4), SI 11197 MOVB DI, 1(AX) 11198 SHRL $0x08, DI 11199 SHLL $0x05, DI 11200 ORL DI, SI 11201 MOVB SI, (AX) 11202 ADDQ $0x02, AX 11203 JMP repeat_end_emit_encodeSnappyBlockAsm10B 11204 11205emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: 11206 MOVB $0x02, BL 11207 LEAL -4(BX)(SI*4), SI 11208 MOVB SI, (AX) 11209 MOVW DI, 1(AX) 11210 ADDQ $0x03, AX 11211 11212repeat_end_emit_encodeSnappyBlockAsm10B: 11213 MOVL CX, 12(SP) 11214 JMP search_loop_encodeSnappyBlockAsm10B 11215 11216no_repeat_found_encodeSnappyBlockAsm10B: 11217 CMPL (DX)(SI*1), DI 11218 JEQ candidate_match_encodeSnappyBlockAsm10B 11219 SHRQ $0x08, DI 11220 MOVL 24(SP)(R10*4), SI 11221 LEAL 2(CX), R9 11222 CMPL (DX)(R8*1), DI 11223 JEQ candidate2_match_encodeSnappyBlockAsm10B 11224 MOVL R9, 24(SP)(R10*4) 11225 SHRQ $0x08, DI 11226 CMPL (DX)(SI*1), DI 11227 JEQ candidate3_match_encodeSnappyBlockAsm10B 11228 MOVL 20(SP), CX 11229 JMP search_loop_encodeSnappyBlockAsm10B 11230 11231candidate3_match_encodeSnappyBlockAsm10B: 11232 ADDL $0x02, CX 11233 JMP candidate_match_encodeSnappyBlockAsm10B 11234 11235candidate2_match_encodeSnappyBlockAsm10B: 11236 MOVL R9, 24(SP)(R10*4) 11237 INCL CX 11238 MOVL R8, SI 11239 11240candidate_match_encodeSnappyBlockAsm10B: 11241 MOVL 12(SP), DI 11242 TESTL SI, SI 11243 JZ match_extend_back_end_encodeSnappyBlockAsm10B 11244 11245match_extend_back_loop_encodeSnappyBlockAsm10B: 11246 CMPL CX, DI 11247 JLE match_extend_back_end_encodeSnappyBlockAsm10B 11248 MOVB -1(DX)(SI*1), BL 11249 MOVB -1(DX)(CX*1), R8 11250 CMPB BL, R8 11251 JNE match_extend_back_end_encodeSnappyBlockAsm10B 11252 LEAL -1(CX), CX 11253 DECL SI 11254 JZ match_extend_back_end_encodeSnappyBlockAsm10B 11255 JMP match_extend_back_loop_encodeSnappyBlockAsm10B 11256 11257match_extend_back_end_encodeSnappyBlockAsm10B: 11258 MOVL CX, DI 11259 SUBL 12(SP), DI 11260 LEAQ 3(AX)(DI*1), DI 11261 CMPQ DI, (SP) 11262 JL match_dst_size_check_encodeSnappyBlockAsm10B 11263 MOVQ $0x00000000, ret+48(FP) 11264 RET 11265 11266match_dst_size_check_encodeSnappyBlockAsm10B: 11267 MOVL CX, DI 11268 MOVL 12(SP), R8 11269 CMPL R8, DI 11270 JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B 11271 MOVL DI, R9 11272 MOVL DI, 12(SP) 11273 LEAQ (DX)(R8*1), DI 11274 SUBL R8, R9 11275 LEAL -1(R9), R8 11276 CMPL R8, $0x3c 11277 JLT one_byte_match_emit_encodeSnappyBlockAsm10B 11278 CMPL R8, $0x00000100 11279 JLT two_bytes_match_emit_encodeSnappyBlockAsm10B 11280 MOVB $0xf4, (AX) 11281 MOVW R8, 1(AX) 11282 ADDQ $0x03, AX 11283 JMP memmove_long_match_emit_encodeSnappyBlockAsm10B 11284 11285two_bytes_match_emit_encodeSnappyBlockAsm10B: 11286 MOVB $0xf0, (AX) 11287 MOVB R8, 1(AX) 11288 ADDQ $0x02, AX 11289 CMPL R8, $0x40 11290 JL memmove_match_emit_encodeSnappyBlockAsm10B 11291 JMP memmove_long_match_emit_encodeSnappyBlockAsm10B 11292 11293one_byte_match_emit_encodeSnappyBlockAsm10B: 11294 SHLB $0x02, R8 11295 MOVB R8, (AX) 11296 ADDQ $0x01, AX 11297 11298memmove_match_emit_encodeSnappyBlockAsm10B: 11299 LEAQ (AX)(R9*1), R8 11300 11301 // genMemMoveShort 11302 CMPQ R9, $0x03 11303 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2 11304 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3 11305 CMPQ R9, $0x08 11306 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4through7 11307 CMPQ R9, $0x10 11308 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 11309 CMPQ R9, $0x20 11310 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 11311 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 11312 11313emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2: 11314 MOVB (DI), R10 11315 MOVB -1(DI)(R9*1), DI 11316 MOVB R10, (AX) 11317 MOVB DI, -1(AX)(R9*1) 11318 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B 11319 11320emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3: 11321 MOVW (DI), R10 11322 MOVB 2(DI), DI 11323 MOVW R10, (AX) 11324 MOVB DI, 2(AX) 11325 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B 11326 11327emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4through7: 11328 MOVL (DI), R10 11329 MOVL -4(DI)(R9*1), DI 11330 MOVL R10, (AX) 11331 MOVL DI, -4(AX)(R9*1) 11332 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B 11333 11334emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: 11335 MOVQ (DI), R10 11336 MOVQ -8(DI)(R9*1), DI 11337 MOVQ R10, (AX) 11338 MOVQ DI, -8(AX)(R9*1) 11339 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B 11340 11341emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: 11342 MOVOU (DI), X0 11343 MOVOU -16(DI)(R9*1), X1 11344 MOVOU X0, (AX) 11345 MOVOU X1, -16(AX)(R9*1) 11346 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B 11347 11348emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: 11349 MOVOU (DI), X0 11350 MOVOU 16(DI), X1 11351 MOVOU -32(DI)(R9*1), X2 11352 MOVOU -16(DI)(R9*1), X3 11353 MOVOU X0, (AX) 11354 MOVOU X1, 16(AX) 11355 MOVOU X2, -32(AX)(R9*1) 11356 MOVOU X3, -16(AX)(R9*1) 11357 11358memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: 11359 MOVQ R8, AX 11360 JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B 11361 11362memmove_long_match_emit_encodeSnappyBlockAsm10B: 11363 LEAQ (AX)(R9*1), R8 11364 11365 // genMemMoveLong 11366 MOVOU (DI), X0 11367 MOVOU 16(DI), X1 11368 MOVOU -32(DI)(R9*1), X2 11369 MOVOU -16(DI)(R9*1), X3 11370 MOVQ R9, R11 11371 SHRQ $0x05, R11 11372 MOVQ AX, R10 11373 ANDL $0x0000001f, R10 11374 MOVQ $0x00000040, R12 11375 SUBQ R10, R12 11376 DECQ R11 11377 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11378 LEAQ -32(DI)(R12*1), R10 11379 LEAQ -32(AX)(R12*1), R13 11380 11381emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: 11382 MOVOU (R10), X4 11383 MOVOU 16(R10), X5 11384 MOVOA X4, (R13) 11385 MOVOA X5, 16(R13) 11386 ADDQ $0x20, R13 11387 ADDQ $0x20, R10 11388 ADDQ $0x20, R12 11389 DECQ R11 11390 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back 11391 11392emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: 11393 MOVOU -32(DI)(R12*1), X4 11394 MOVOU -16(DI)(R12*1), X5 11395 MOVOA X4, -32(AX)(R12*1) 11396 MOVOA X5, -16(AX)(R12*1) 11397 ADDQ $0x20, R12 11398 CMPQ R9, R12 11399 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11400 MOVOU X0, (AX) 11401 MOVOU X1, 16(AX) 11402 MOVOU X2, -32(AX)(R9*1) 11403 MOVOU X3, -16(AX)(R9*1) 11404 MOVQ R8, AX 11405 11406emit_literal_done_match_emit_encodeSnappyBlockAsm10B: 11407match_nolit_loop_encodeSnappyBlockAsm10B: 11408 MOVL CX, DI 11409 SUBL SI, DI 11410 MOVL DI, 16(SP) 11411 ADDL $0x04, CX 11412 ADDL $0x04, SI 11413 MOVQ src_len+32(FP), DI 11414 SUBL CX, DI 11415 LEAQ (DX)(CX*1), R8 11416 LEAQ (DX)(SI*1), SI 11417 11418 // matchLen 11419 XORL R10, R10 11420 CMPL DI, $0x08 11421 JL matchlen_single_match_nolit_encodeSnappyBlockAsm10B 11422 11423matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: 11424 MOVQ (R8)(R10*1), R9 11425 XORQ (SI)(R10*1), R9 11426 TESTQ R9, R9 11427 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B 11428 BSFQ R9, R9 11429 SARQ $0x03, R9 11430 LEAL (R10)(R9*1), R10 11431 JMP match_nolit_end_encodeSnappyBlockAsm10B 11432 11433matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: 11434 LEAL -8(DI), DI 11435 LEAL 8(R10), R10 11436 CMPL DI, $0x08 11437 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B 11438 11439matchlen_single_match_nolit_encodeSnappyBlockAsm10B: 11440 TESTL DI, DI 11441 JZ match_nolit_end_encodeSnappyBlockAsm10B 11442 11443matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B: 11444 MOVB (R8)(R10*1), R9 11445 CMPB (SI)(R10*1), R9 11446 JNE match_nolit_end_encodeSnappyBlockAsm10B 11447 LEAL 1(R10), R10 11448 DECL DI 11449 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B 11450 11451match_nolit_end_encodeSnappyBlockAsm10B: 11452 ADDL R10, CX 11453 MOVL 16(SP), SI 11454 ADDL $0x04, R10 11455 MOVL CX, 12(SP) 11456 11457 // emitCopy 11458two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: 11459 CMPL R10, $0x40 11460 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B 11461 MOVB $0xee, (AX) 11462 MOVW SI, 1(AX) 11463 LEAL -60(R10), R10 11464 ADDQ $0x03, AX 11465 JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B 11466 11467two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: 11468 CMPL R10, $0x0c 11469 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B 11470 CMPL SI, $0x00000800 11471 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B 11472 MOVB $0x01, BL 11473 LEAL -16(BX)(R10*4), R10 11474 MOVB SI, 1(AX) 11475 SHRL $0x08, SI 11476 SHLL $0x05, SI 11477 ORL SI, R10 11478 MOVB R10, (AX) 11479 ADDQ $0x02, AX 11480 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B 11481 11482emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: 11483 MOVB $0x02, BL 11484 LEAL -4(BX)(R10*4), R10 11485 MOVB R10, (AX) 11486 MOVW SI, 1(AX) 11487 ADDQ $0x03, AX 11488 11489match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: 11490 CMPL CX, 8(SP) 11491 JGE emit_remainder_encodeSnappyBlockAsm10B 11492 MOVQ -2(DX)(CX*1), DI 11493 CMPQ AX, (SP) 11494 JL match_nolit_dst_ok_encodeSnappyBlockAsm10B 11495 MOVQ $0x00000000, ret+48(FP) 11496 RET 11497 11498match_nolit_dst_ok_encodeSnappyBlockAsm10B: 11499 MOVQ $0x9e3779b1, R9 11500 MOVQ DI, R8 11501 SHRQ $0x10, DI 11502 MOVQ DI, SI 11503 SHLQ $0x20, R8 11504 IMULQ R9, R8 11505 SHRQ $0x36, R8 11506 SHLQ $0x20, SI 11507 IMULQ R9, SI 11508 SHRQ $0x36, SI 11509 LEAL -2(CX), R9 11510 LEAQ 24(SP)(SI*4), R10 11511 MOVL (R10), SI 11512 MOVL R9, 24(SP)(R8*4) 11513 MOVL CX, (R10) 11514 CMPL (DX)(SI*1), DI 11515 JEQ match_nolit_loop_encodeSnappyBlockAsm10B 11516 INCL CX 11517 JMP search_loop_encodeSnappyBlockAsm10B 11518 11519emit_remainder_encodeSnappyBlockAsm10B: 11520 MOVQ src_len+32(FP), CX 11521 SUBL 12(SP), CX 11522 LEAQ 3(AX)(CX*1), CX 11523 CMPQ CX, (SP) 11524 JL emit_remainder_ok_encodeSnappyBlockAsm10B 11525 MOVQ $0x00000000, ret+48(FP) 11526 RET 11527 11528emit_remainder_ok_encodeSnappyBlockAsm10B: 11529 MOVQ src_len+32(FP), CX 11530 MOVL 12(SP), BX 11531 CMPL BX, CX 11532 JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B 11533 MOVL CX, SI 11534 MOVL CX, 12(SP) 11535 LEAQ (DX)(BX*1), CX 11536 SUBL BX, SI 11537 LEAL -1(SI), DX 11538 CMPL DX, $0x3c 11539 JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B 11540 CMPL DX, $0x00000100 11541 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B 11542 MOVB $0xf4, (AX) 11543 MOVW DX, 1(AX) 11544 ADDQ $0x03, AX 11545 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B 11546 11547two_bytes_emit_remainder_encodeSnappyBlockAsm10B: 11548 MOVB $0xf0, (AX) 11549 MOVB DL, 1(AX) 11550 ADDQ $0x02, AX 11551 CMPL DX, $0x40 11552 JL memmove_emit_remainder_encodeSnappyBlockAsm10B 11553 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B 11554 11555one_byte_emit_remainder_encodeSnappyBlockAsm10B: 11556 SHLB $0x02, DL 11557 MOVB DL, (AX) 11558 ADDQ $0x01, AX 11559 11560memmove_emit_remainder_encodeSnappyBlockAsm10B: 11561 LEAQ (AX)(SI*1), DX 11562 MOVL SI, BX 11563 11564 // genMemMoveShort 11565 CMPQ BX, $0x03 11566 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2 11567 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3 11568 CMPQ BX, $0x08 11569 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7 11570 CMPQ BX, $0x10 11571 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16 11572 CMPQ BX, $0x20 11573 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32 11574 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 11575 11576emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: 11577 MOVB (CX), SI 11578 MOVB -1(CX)(BX*1), CL 11579 MOVB SI, (AX) 11580 MOVB CL, -1(AX)(BX*1) 11581 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B 11582 11583emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: 11584 MOVW (CX), SI 11585 MOVB 2(CX), CL 11586 MOVW SI, (AX) 11587 MOVB CL, 2(AX) 11588 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B 11589 11590emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7: 11591 MOVL (CX), SI 11592 MOVL -4(CX)(BX*1), CX 11593 MOVL SI, (AX) 11594 MOVL CX, -4(AX)(BX*1) 11595 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B 11596 11597emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: 11598 MOVQ (CX), SI 11599 MOVQ -8(CX)(BX*1), CX 11600 MOVQ SI, (AX) 11601 MOVQ CX, -8(AX)(BX*1) 11602 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B 11603 11604emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: 11605 MOVOU (CX), X0 11606 MOVOU -16(CX)(BX*1), X1 11607 MOVOU X0, (AX) 11608 MOVOU X1, -16(AX)(BX*1) 11609 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B 11610 11611emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: 11612 MOVOU (CX), X0 11613 MOVOU 16(CX), X1 11614 MOVOU -32(CX)(BX*1), X2 11615 MOVOU -16(CX)(BX*1), X3 11616 MOVOU X0, (AX) 11617 MOVOU X1, 16(AX) 11618 MOVOU X2, -32(AX)(BX*1) 11619 MOVOU X3, -16(AX)(BX*1) 11620 11621memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: 11622 MOVQ DX, AX 11623 JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B 11624 11625memmove_long_emit_remainder_encodeSnappyBlockAsm10B: 11626 LEAQ (AX)(SI*1), DX 11627 MOVL SI, BX 11628 11629 // genMemMoveLong 11630 MOVOU (CX), X0 11631 MOVOU 16(CX), X1 11632 MOVOU -32(CX)(BX*1), X2 11633 MOVOU -16(CX)(BX*1), X3 11634 MOVQ BX, DI 11635 SHRQ $0x05, DI 11636 MOVQ AX, SI 11637 ANDL $0x0000001f, SI 11638 MOVQ $0x00000040, R8 11639 SUBQ SI, R8 11640 DECQ DI 11641 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11642 LEAQ -32(CX)(R8*1), SI 11643 LEAQ -32(AX)(R8*1), R9 11644 11645emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: 11646 MOVOU (SI), X4 11647 MOVOU 16(SI), X5 11648 MOVOA X4, (R9) 11649 MOVOA X5, 16(R9) 11650 ADDQ $0x20, R9 11651 ADDQ $0x20, SI 11652 ADDQ $0x20, R8 11653 DECQ DI 11654 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back 11655 11656emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: 11657 MOVOU -32(CX)(R8*1), X4 11658 MOVOU -16(CX)(R8*1), X5 11659 MOVOA X4, -32(AX)(R8*1) 11660 MOVOA X5, -16(AX)(R8*1) 11661 ADDQ $0x20, R8 11662 CMPQ BX, R8 11663 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 11664 MOVOU X0, (AX) 11665 MOVOU X1, 16(AX) 11666 MOVOU X2, -32(AX)(BX*1) 11667 MOVOU X3, -16(AX)(BX*1) 11668 MOVQ DX, AX 11669 11670emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: 11671 MOVQ dst_base+0(FP), CX 11672 SUBQ CX, AX 11673 MOVQ AX, ret+48(FP) 11674 RET 11675 11676// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int 11677// Requires: SSE2 11678TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 11679 MOVQ dst_base+0(FP), AX 11680 MOVQ $0x00000008, CX 11681 LEAQ 24(SP), DX 11682 PXOR X0, X0 11683 11684zero_loop_encodeSnappyBlockAsm8B: 11685 MOVOU X0, (DX) 11686 MOVOU X0, 16(DX) 11687 MOVOU X0, 32(DX) 11688 MOVOU X0, 48(DX) 11689 MOVOU X0, 64(DX) 11690 MOVOU X0, 80(DX) 11691 MOVOU X0, 96(DX) 11692 MOVOU X0, 112(DX) 11693 ADDQ $0x80, DX 11694 DECQ CX 11695 JNZ zero_loop_encodeSnappyBlockAsm8B 11696 MOVL $0x00000000, 12(SP) 11697 MOVQ src_len+32(FP), CX 11698 LEAQ -5(CX), DX 11699 LEAQ -8(CX), SI 11700 MOVL SI, 8(SP) 11701 SHRQ $0x05, CX 11702 SUBL CX, DX 11703 LEAQ (AX)(DX*1), DX 11704 MOVQ DX, (SP) 11705 MOVL $0x00000001, CX 11706 MOVL CX, 16(SP) 11707 MOVQ src_base+24(FP), DX 11708 11709search_loop_encodeSnappyBlockAsm8B: 11710 MOVL CX, SI 11711 SUBL 12(SP), SI 11712 SHRL $0x04, SI 11713 LEAL 4(CX)(SI*1), SI 11714 CMPL SI, 8(SP) 11715 JGE emit_remainder_encodeSnappyBlockAsm8B 11716 MOVQ (DX)(CX*1), DI 11717 MOVL SI, 20(SP) 11718 MOVQ $0x9e3779b1, R9 11719 MOVQ DI, R10 11720 MOVQ DI, R11 11721 SHRQ $0x08, R11 11722 SHLQ $0x20, R10 11723 IMULQ R9, R10 11724 SHRQ $0x38, R10 11725 SHLQ $0x20, R11 11726 IMULQ R9, R11 11727 SHRQ $0x38, R11 11728 MOVL 24(SP)(R10*4), SI 11729 MOVL 24(SP)(R11*4), R8 11730 MOVL CX, 24(SP)(R10*4) 11731 LEAL 1(CX), R10 11732 MOVL R10, 24(SP)(R11*4) 11733 MOVQ DI, R10 11734 SHRQ $0x10, R10 11735 SHLQ $0x20, R10 11736 IMULQ R9, R10 11737 SHRQ $0x38, R10 11738 MOVL CX, R9 11739 SUBL 16(SP), R9 11740 MOVL 1(DX)(R9*1), R11 11741 MOVQ DI, R9 11742 SHRQ $0x08, R9 11743 CMPL R9, R11 11744 JNE no_repeat_found_encodeSnappyBlockAsm8B 11745 LEAL 1(CX), DI 11746 MOVL 12(SP), SI 11747 MOVL DI, R8 11748 SUBL 16(SP), R8 11749 JZ repeat_extend_back_end_encodeSnappyBlockAsm8B 11750 11751repeat_extend_back_loop_encodeSnappyBlockAsm8B: 11752 CMPL DI, SI 11753 JLE repeat_extend_back_end_encodeSnappyBlockAsm8B 11754 MOVB -1(DX)(R8*1), BL 11755 MOVB -1(DX)(DI*1), R9 11756 CMPB BL, R9 11757 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B 11758 LEAL -1(DI), DI 11759 DECL R8 11760 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B 11761 11762repeat_extend_back_end_encodeSnappyBlockAsm8B: 11763 MOVL 12(SP), SI 11764 CMPL SI, DI 11765 JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B 11766 MOVL DI, R8 11767 MOVL DI, 12(SP) 11768 LEAQ (DX)(SI*1), R9 11769 SUBL SI, R8 11770 LEAL -1(R8), SI 11771 CMPL SI, $0x3c 11772 JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B 11773 CMPL SI, $0x00000100 11774 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B 11775 MOVB $0xf4, (AX) 11776 MOVW SI, 1(AX) 11777 ADDQ $0x03, AX 11778 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B 11779 11780two_bytes_repeat_emit_encodeSnappyBlockAsm8B: 11781 MOVB $0xf0, (AX) 11782 MOVB SI, 1(AX) 11783 ADDQ $0x02, AX 11784 CMPL SI, $0x40 11785 JL memmove_repeat_emit_encodeSnappyBlockAsm8B 11786 JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B 11787 11788one_byte_repeat_emit_encodeSnappyBlockAsm8B: 11789 SHLB $0x02, SI 11790 MOVB SI, (AX) 11791 ADDQ $0x01, AX 11792 11793memmove_repeat_emit_encodeSnappyBlockAsm8B: 11794 LEAQ (AX)(R8*1), SI 11795 11796 // genMemMoveShort 11797 CMPQ R8, $0x03 11798 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2 11799 JE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3 11800 CMPQ R8, $0x08 11801 JB emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4through7 11802 CMPQ R8, $0x10 11803 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 11804 CMPQ R8, $0x20 11805 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 11806 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 11807 11808emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2: 11809 MOVB (R9), R10 11810 MOVB -1(R9)(R8*1), R9 11811 MOVB R10, (AX) 11812 MOVB R9, -1(AX)(R8*1) 11813 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B 11814 11815emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3: 11816 MOVW (R9), R10 11817 MOVB 2(R9), R9 11818 MOVW R10, (AX) 11819 MOVB R9, 2(AX) 11820 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B 11821 11822emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4through7: 11823 MOVL (R9), R10 11824 MOVL -4(R9)(R8*1), R9 11825 MOVL R10, (AX) 11826 MOVL R9, -4(AX)(R8*1) 11827 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B 11828 11829emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: 11830 MOVQ (R9), R10 11831 MOVQ -8(R9)(R8*1), R9 11832 MOVQ R10, (AX) 11833 MOVQ R9, -8(AX)(R8*1) 11834 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B 11835 11836emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: 11837 MOVOU (R9), X0 11838 MOVOU -16(R9)(R8*1), X1 11839 MOVOU X0, (AX) 11840 MOVOU X1, -16(AX)(R8*1) 11841 JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B 11842 11843emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: 11844 MOVOU (R9), X0 11845 MOVOU 16(R9), X1 11846 MOVOU -32(R9)(R8*1), X2 11847 MOVOU -16(R9)(R8*1), X3 11848 MOVOU X0, (AX) 11849 MOVOU X1, 16(AX) 11850 MOVOU X2, -32(AX)(R8*1) 11851 MOVOU X3, -16(AX)(R8*1) 11852 11853memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: 11854 MOVQ SI, AX 11855 JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B 11856 11857memmove_long_repeat_emit_encodeSnappyBlockAsm8B: 11858 LEAQ (AX)(R8*1), SI 11859 11860 // genMemMoveLong 11861 MOVOU (R9), X0 11862 MOVOU 16(R9), X1 11863 MOVOU -32(R9)(R8*1), X2 11864 MOVOU -16(R9)(R8*1), X3 11865 MOVQ R8, R11 11866 SHRQ $0x05, R11 11867 MOVQ AX, R10 11868 ANDL $0x0000001f, R10 11869 MOVQ $0x00000040, R12 11870 SUBQ R10, R12 11871 DECQ R11 11872 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 11873 LEAQ -32(R9)(R12*1), R10 11874 LEAQ -32(AX)(R12*1), R13 11875 11876emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: 11877 MOVOU (R10), X4 11878 MOVOU 16(R10), X5 11879 MOVOA X4, (R13) 11880 MOVOA X5, 16(R13) 11881 ADDQ $0x20, R13 11882 ADDQ $0x20, R10 11883 ADDQ $0x20, R12 11884 DECQ R11 11885 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back 11886 11887emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: 11888 MOVOU -32(R9)(R12*1), X4 11889 MOVOU -16(R9)(R12*1), X5 11890 MOVOA X4, -32(AX)(R12*1) 11891 MOVOA X5, -16(AX)(R12*1) 11892 ADDQ $0x20, R12 11893 CMPQ R8, R12 11894 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 11895 MOVOU X0, (AX) 11896 MOVOU X1, 16(AX) 11897 MOVOU X2, -32(AX)(R8*1) 11898 MOVOU X3, -16(AX)(R8*1) 11899 MOVQ SI, AX 11900 11901emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: 11902 ADDL $0x05, CX 11903 MOVL CX, SI 11904 SUBL 16(SP), SI 11905 MOVQ src_len+32(FP), R8 11906 SUBL CX, R8 11907 LEAQ (DX)(CX*1), R9 11908 LEAQ (DX)(SI*1), SI 11909 11910 // matchLen 11911 XORL R11, R11 11912 CMPL R8, $0x08 11913 JL matchlen_single_repeat_extend_encodeSnappyBlockAsm8B 11914 11915matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: 11916 MOVQ (R9)(R11*1), R10 11917 XORQ (SI)(R11*1), R10 11918 TESTQ R10, R10 11919 JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B 11920 BSFQ R10, R10 11921 SARQ $0x03, R10 11922 LEAL (R11)(R10*1), R11 11923 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B 11924 11925matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: 11926 LEAL -8(R8), R8 11927 LEAL 8(R11), R11 11928 CMPL R8, $0x08 11929 JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B 11930 11931matchlen_single_repeat_extend_encodeSnappyBlockAsm8B: 11932 TESTL R8, R8 11933 JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B 11934 11935matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B: 11936 MOVB (R9)(R11*1), R10 11937 CMPB (SI)(R11*1), R10 11938 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B 11939 LEAL 1(R11), R11 11940 DECL R8 11941 JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B 11942 11943repeat_extend_forward_end_encodeSnappyBlockAsm8B: 11944 ADDL R11, CX 11945 MOVL CX, SI 11946 SUBL DI, SI 11947 MOVL 16(SP), DI 11948 11949 // emitCopy 11950two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: 11951 CMPL SI, $0x40 11952 JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B 11953 MOVB $0xee, (AX) 11954 MOVW DI, 1(AX) 11955 LEAL -60(SI), SI 11956 ADDQ $0x03, AX 11957 JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B 11958 11959two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: 11960 CMPL SI, $0x0c 11961 JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B 11962 MOVB $0x01, BL 11963 LEAL -16(BX)(SI*4), SI 11964 MOVB DI, 1(AX) 11965 SHRL $0x08, DI 11966 SHLL $0x05, DI 11967 ORL DI, SI 11968 MOVB SI, (AX) 11969 ADDQ $0x02, AX 11970 JMP repeat_end_emit_encodeSnappyBlockAsm8B 11971 11972emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: 11973 MOVB $0x02, BL 11974 LEAL -4(BX)(SI*4), SI 11975 MOVB SI, (AX) 11976 MOVW DI, 1(AX) 11977 ADDQ $0x03, AX 11978 11979repeat_end_emit_encodeSnappyBlockAsm8B: 11980 MOVL CX, 12(SP) 11981 JMP search_loop_encodeSnappyBlockAsm8B 11982 11983no_repeat_found_encodeSnappyBlockAsm8B: 11984 CMPL (DX)(SI*1), DI 11985 JEQ candidate_match_encodeSnappyBlockAsm8B 11986 SHRQ $0x08, DI 11987 MOVL 24(SP)(R10*4), SI 11988 LEAL 2(CX), R9 11989 CMPL (DX)(R8*1), DI 11990 JEQ candidate2_match_encodeSnappyBlockAsm8B 11991 MOVL R9, 24(SP)(R10*4) 11992 SHRQ $0x08, DI 11993 CMPL (DX)(SI*1), DI 11994 JEQ candidate3_match_encodeSnappyBlockAsm8B 11995 MOVL 20(SP), CX 11996 JMP search_loop_encodeSnappyBlockAsm8B 11997 11998candidate3_match_encodeSnappyBlockAsm8B: 11999 ADDL $0x02, CX 12000 JMP candidate_match_encodeSnappyBlockAsm8B 12001 12002candidate2_match_encodeSnappyBlockAsm8B: 12003 MOVL R9, 24(SP)(R10*4) 12004 INCL CX 12005 MOVL R8, SI 12006 12007candidate_match_encodeSnappyBlockAsm8B: 12008 MOVL 12(SP), DI 12009 TESTL SI, SI 12010 JZ match_extend_back_end_encodeSnappyBlockAsm8B 12011 12012match_extend_back_loop_encodeSnappyBlockAsm8B: 12013 CMPL CX, DI 12014 JLE match_extend_back_end_encodeSnappyBlockAsm8B 12015 MOVB -1(DX)(SI*1), BL 12016 MOVB -1(DX)(CX*1), R8 12017 CMPB BL, R8 12018 JNE match_extend_back_end_encodeSnappyBlockAsm8B 12019 LEAL -1(CX), CX 12020 DECL SI 12021 JZ match_extend_back_end_encodeSnappyBlockAsm8B 12022 JMP match_extend_back_loop_encodeSnappyBlockAsm8B 12023 12024match_extend_back_end_encodeSnappyBlockAsm8B: 12025 MOVL CX, DI 12026 SUBL 12(SP), DI 12027 LEAQ 3(AX)(DI*1), DI 12028 CMPQ DI, (SP) 12029 JL match_dst_size_check_encodeSnappyBlockAsm8B 12030 MOVQ $0x00000000, ret+48(FP) 12031 RET 12032 12033match_dst_size_check_encodeSnappyBlockAsm8B: 12034 MOVL CX, DI 12035 MOVL 12(SP), R8 12036 CMPL R8, DI 12037 JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B 12038 MOVL DI, R9 12039 MOVL DI, 12(SP) 12040 LEAQ (DX)(R8*1), DI 12041 SUBL R8, R9 12042 LEAL -1(R9), R8 12043 CMPL R8, $0x3c 12044 JLT one_byte_match_emit_encodeSnappyBlockAsm8B 12045 CMPL R8, $0x00000100 12046 JLT two_bytes_match_emit_encodeSnappyBlockAsm8B 12047 MOVB $0xf4, (AX) 12048 MOVW R8, 1(AX) 12049 ADDQ $0x03, AX 12050 JMP memmove_long_match_emit_encodeSnappyBlockAsm8B 12051 12052two_bytes_match_emit_encodeSnappyBlockAsm8B: 12053 MOVB $0xf0, (AX) 12054 MOVB R8, 1(AX) 12055 ADDQ $0x02, AX 12056 CMPL R8, $0x40 12057 JL memmove_match_emit_encodeSnappyBlockAsm8B 12058 JMP memmove_long_match_emit_encodeSnappyBlockAsm8B 12059 12060one_byte_match_emit_encodeSnappyBlockAsm8B: 12061 SHLB $0x02, R8 12062 MOVB R8, (AX) 12063 ADDQ $0x01, AX 12064 12065memmove_match_emit_encodeSnappyBlockAsm8B: 12066 LEAQ (AX)(R9*1), R8 12067 12068 // genMemMoveShort 12069 CMPQ R9, $0x03 12070 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2 12071 JE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3 12072 CMPQ R9, $0x08 12073 JB emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4through7 12074 CMPQ R9, $0x10 12075 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 12076 CMPQ R9, $0x20 12077 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 12078 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 12079 12080emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2: 12081 MOVB (DI), R10 12082 MOVB -1(DI)(R9*1), DI 12083 MOVB R10, (AX) 12084 MOVB DI, -1(AX)(R9*1) 12085 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B 12086 12087emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3: 12088 MOVW (DI), R10 12089 MOVB 2(DI), DI 12090 MOVW R10, (AX) 12091 MOVB DI, 2(AX) 12092 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B 12093 12094emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4through7: 12095 MOVL (DI), R10 12096 MOVL -4(DI)(R9*1), DI 12097 MOVL R10, (AX) 12098 MOVL DI, -4(AX)(R9*1) 12099 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B 12100 12101emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: 12102 MOVQ (DI), R10 12103 MOVQ -8(DI)(R9*1), DI 12104 MOVQ R10, (AX) 12105 MOVQ DI, -8(AX)(R9*1) 12106 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B 12107 12108emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: 12109 MOVOU (DI), X0 12110 MOVOU -16(DI)(R9*1), X1 12111 MOVOU X0, (AX) 12112 MOVOU X1, -16(AX)(R9*1) 12113 JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B 12114 12115emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: 12116 MOVOU (DI), X0 12117 MOVOU 16(DI), X1 12118 MOVOU -32(DI)(R9*1), X2 12119 MOVOU -16(DI)(R9*1), X3 12120 MOVOU X0, (AX) 12121 MOVOU X1, 16(AX) 12122 MOVOU X2, -32(AX)(R9*1) 12123 MOVOU X3, -16(AX)(R9*1) 12124 12125memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: 12126 MOVQ R8, AX 12127 JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B 12128 12129memmove_long_match_emit_encodeSnappyBlockAsm8B: 12130 LEAQ (AX)(R9*1), R8 12131 12132 // genMemMoveLong 12133 MOVOU (DI), X0 12134 MOVOU 16(DI), X1 12135 MOVOU -32(DI)(R9*1), X2 12136 MOVOU -16(DI)(R9*1), X3 12137 MOVQ R9, R11 12138 SHRQ $0x05, R11 12139 MOVQ AX, R10 12140 ANDL $0x0000001f, R10 12141 MOVQ $0x00000040, R12 12142 SUBQ R10, R12 12143 DECQ R11 12144 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 12145 LEAQ -32(DI)(R12*1), R10 12146 LEAQ -32(AX)(R12*1), R13 12147 12148emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: 12149 MOVOU (R10), X4 12150 MOVOU 16(R10), X5 12151 MOVOA X4, (R13) 12152 MOVOA X5, 16(R13) 12153 ADDQ $0x20, R13 12154 ADDQ $0x20, R10 12155 ADDQ $0x20, R12 12156 DECQ R11 12157 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back 12158 12159emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: 12160 MOVOU -32(DI)(R12*1), X4 12161 MOVOU -16(DI)(R12*1), X5 12162 MOVOA X4, -32(AX)(R12*1) 12163 MOVOA X5, -16(AX)(R12*1) 12164 ADDQ $0x20, R12 12165 CMPQ R9, R12 12166 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 12167 MOVOU X0, (AX) 12168 MOVOU X1, 16(AX) 12169 MOVOU X2, -32(AX)(R9*1) 12170 MOVOU X3, -16(AX)(R9*1) 12171 MOVQ R8, AX 12172 12173emit_literal_done_match_emit_encodeSnappyBlockAsm8B: 12174match_nolit_loop_encodeSnappyBlockAsm8B: 12175 MOVL CX, DI 12176 SUBL SI, DI 12177 MOVL DI, 16(SP) 12178 ADDL $0x04, CX 12179 ADDL $0x04, SI 12180 MOVQ src_len+32(FP), DI 12181 SUBL CX, DI 12182 LEAQ (DX)(CX*1), R8 12183 LEAQ (DX)(SI*1), SI 12184 12185 // matchLen 12186 XORL R10, R10 12187 CMPL DI, $0x08 12188 JL matchlen_single_match_nolit_encodeSnappyBlockAsm8B 12189 12190matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: 12191 MOVQ (R8)(R10*1), R9 12192 XORQ (SI)(R10*1), R9 12193 TESTQ R9, R9 12194 JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B 12195 BSFQ R9, R9 12196 SARQ $0x03, R9 12197 LEAL (R10)(R9*1), R10 12198 JMP match_nolit_end_encodeSnappyBlockAsm8B 12199 12200matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: 12201 LEAL -8(DI), DI 12202 LEAL 8(R10), R10 12203 CMPL DI, $0x08 12204 JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B 12205 12206matchlen_single_match_nolit_encodeSnappyBlockAsm8B: 12207 TESTL DI, DI 12208 JZ match_nolit_end_encodeSnappyBlockAsm8B 12209 12210matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B: 12211 MOVB (R8)(R10*1), R9 12212 CMPB (SI)(R10*1), R9 12213 JNE match_nolit_end_encodeSnappyBlockAsm8B 12214 LEAL 1(R10), R10 12215 DECL DI 12216 JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B 12217 12218match_nolit_end_encodeSnappyBlockAsm8B: 12219 ADDL R10, CX 12220 MOVL 16(SP), SI 12221 ADDL $0x04, R10 12222 MOVL CX, 12(SP) 12223 12224 // emitCopy 12225two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: 12226 CMPL R10, $0x40 12227 JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B 12228 MOVB $0xee, (AX) 12229 MOVW SI, 1(AX) 12230 LEAL -60(R10), R10 12231 ADDQ $0x03, AX 12232 JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B 12233 12234two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: 12235 CMPL R10, $0x0c 12236 JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B 12237 MOVB $0x01, BL 12238 LEAL -16(BX)(R10*4), R10 12239 MOVB SI, 1(AX) 12240 SHRL $0x08, SI 12241 SHLL $0x05, SI 12242 ORL SI, R10 12243 MOVB R10, (AX) 12244 ADDQ $0x02, AX 12245 JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B 12246 12247emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: 12248 MOVB $0x02, BL 12249 LEAL -4(BX)(R10*4), R10 12250 MOVB R10, (AX) 12251 MOVW SI, 1(AX) 12252 ADDQ $0x03, AX 12253 12254match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: 12255 CMPL CX, 8(SP) 12256 JGE emit_remainder_encodeSnappyBlockAsm8B 12257 MOVQ -2(DX)(CX*1), DI 12258 CMPQ AX, (SP) 12259 JL match_nolit_dst_ok_encodeSnappyBlockAsm8B 12260 MOVQ $0x00000000, ret+48(FP) 12261 RET 12262 12263match_nolit_dst_ok_encodeSnappyBlockAsm8B: 12264 MOVQ $0x9e3779b1, R9 12265 MOVQ DI, R8 12266 SHRQ $0x10, DI 12267 MOVQ DI, SI 12268 SHLQ $0x20, R8 12269 IMULQ R9, R8 12270 SHRQ $0x38, R8 12271 SHLQ $0x20, SI 12272 IMULQ R9, SI 12273 SHRQ $0x38, SI 12274 LEAL -2(CX), R9 12275 LEAQ 24(SP)(SI*4), R10 12276 MOVL (R10), SI 12277 MOVL R9, 24(SP)(R8*4) 12278 MOVL CX, (R10) 12279 CMPL (DX)(SI*1), DI 12280 JEQ match_nolit_loop_encodeSnappyBlockAsm8B 12281 INCL CX 12282 JMP search_loop_encodeSnappyBlockAsm8B 12283 12284emit_remainder_encodeSnappyBlockAsm8B: 12285 MOVQ src_len+32(FP), CX 12286 SUBL 12(SP), CX 12287 LEAQ 3(AX)(CX*1), CX 12288 CMPQ CX, (SP) 12289 JL emit_remainder_ok_encodeSnappyBlockAsm8B 12290 MOVQ $0x00000000, ret+48(FP) 12291 RET 12292 12293emit_remainder_ok_encodeSnappyBlockAsm8B: 12294 MOVQ src_len+32(FP), CX 12295 MOVL 12(SP), BX 12296 CMPL BX, CX 12297 JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B 12298 MOVL CX, SI 12299 MOVL CX, 12(SP) 12300 LEAQ (DX)(BX*1), CX 12301 SUBL BX, SI 12302 LEAL -1(SI), DX 12303 CMPL DX, $0x3c 12304 JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B 12305 CMPL DX, $0x00000100 12306 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B 12307 MOVB $0xf4, (AX) 12308 MOVW DX, 1(AX) 12309 ADDQ $0x03, AX 12310 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B 12311 12312two_bytes_emit_remainder_encodeSnappyBlockAsm8B: 12313 MOVB $0xf0, (AX) 12314 MOVB DL, 1(AX) 12315 ADDQ $0x02, AX 12316 CMPL DX, $0x40 12317 JL memmove_emit_remainder_encodeSnappyBlockAsm8B 12318 JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B 12319 12320one_byte_emit_remainder_encodeSnappyBlockAsm8B: 12321 SHLB $0x02, DL 12322 MOVB DL, (AX) 12323 ADDQ $0x01, AX 12324 12325memmove_emit_remainder_encodeSnappyBlockAsm8B: 12326 LEAQ (AX)(SI*1), DX 12327 MOVL SI, BX 12328 12329 // genMemMoveShort 12330 CMPQ BX, $0x03 12331 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2 12332 JE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3 12333 CMPQ BX, $0x08 12334 JB emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7 12335 CMPQ BX, $0x10 12336 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16 12337 CMPQ BX, $0x20 12338 JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32 12339 JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 12340 12341emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: 12342 MOVB (CX), SI 12343 MOVB -1(CX)(BX*1), CL 12344 MOVB SI, (AX) 12345 MOVB CL, -1(AX)(BX*1) 12346 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B 12347 12348emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: 12349 MOVW (CX), SI 12350 MOVB 2(CX), CL 12351 MOVW SI, (AX) 12352 MOVB CL, 2(AX) 12353 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B 12354 12355emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7: 12356 MOVL (CX), SI 12357 MOVL -4(CX)(BX*1), CX 12358 MOVL SI, (AX) 12359 MOVL CX, -4(AX)(BX*1) 12360 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B 12361 12362emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: 12363 MOVQ (CX), SI 12364 MOVQ -8(CX)(BX*1), CX 12365 MOVQ SI, (AX) 12366 MOVQ CX, -8(AX)(BX*1) 12367 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B 12368 12369emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: 12370 MOVOU (CX), X0 12371 MOVOU -16(CX)(BX*1), X1 12372 MOVOU X0, (AX) 12373 MOVOU X1, -16(AX)(BX*1) 12374 JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B 12375 12376emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: 12377 MOVOU (CX), X0 12378 MOVOU 16(CX), X1 12379 MOVOU -32(CX)(BX*1), X2 12380 MOVOU -16(CX)(BX*1), X3 12381 MOVOU X0, (AX) 12382 MOVOU X1, 16(AX) 12383 MOVOU X2, -32(AX)(BX*1) 12384 MOVOU X3, -16(AX)(BX*1) 12385 12386memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: 12387 MOVQ DX, AX 12388 JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B 12389 12390memmove_long_emit_remainder_encodeSnappyBlockAsm8B: 12391 LEAQ (AX)(SI*1), DX 12392 MOVL SI, BX 12393 12394 // genMemMoveLong 12395 MOVOU (CX), X0 12396 MOVOU 16(CX), X1 12397 MOVOU -32(CX)(BX*1), X2 12398 MOVOU -16(CX)(BX*1), X3 12399 MOVQ BX, DI 12400 SHRQ $0x05, DI 12401 MOVQ AX, SI 12402 ANDL $0x0000001f, SI 12403 MOVQ $0x00000040, R8 12404 SUBQ SI, R8 12405 DECQ DI 12406 JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 12407 LEAQ -32(CX)(R8*1), SI 12408 LEAQ -32(AX)(R8*1), R9 12409 12410emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: 12411 MOVOU (SI), X4 12412 MOVOU 16(SI), X5 12413 MOVOA X4, (R9) 12414 MOVOA X5, 16(R9) 12415 ADDQ $0x20, R9 12416 ADDQ $0x20, SI 12417 ADDQ $0x20, R8 12418 DECQ DI 12419 JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back 12420 12421emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: 12422 MOVOU -32(CX)(R8*1), X4 12423 MOVOU -16(CX)(R8*1), X5 12424 MOVOA X4, -32(AX)(R8*1) 12425 MOVOA X5, -16(AX)(R8*1) 12426 ADDQ $0x20, R8 12427 CMPQ BX, R8 12428 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 12429 MOVOU X0, (AX) 12430 MOVOU X1, 16(AX) 12431 MOVOU X2, -32(AX)(BX*1) 12432 MOVOU X3, -16(AX)(BX*1) 12433 MOVQ DX, AX 12434 12435emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: 12436 MOVQ dst_base+0(FP), CX 12437 SUBQ CX, AX 12438 MOVQ AX, ret+48(FP) 12439 RET 12440 12441// func emitLiteral(dst []byte, lit []byte) int 12442// Requires: SSE2 12443TEXT ·emitLiteral(SB), NOSPLIT, $0-56 12444 MOVQ lit_len+32(FP), DX 12445 MOVQ dst_base+0(FP), AX 12446 MOVQ lit_base+24(FP), CX 12447 TESTQ DX, DX 12448 JZ emit_literal_end_standalone_skip 12449 MOVL DX, BX 12450 LEAL -1(DX), SI 12451 CMPL SI, $0x3c 12452 JLT one_byte_standalone 12453 CMPL SI, $0x00000100 12454 JLT two_bytes_standalone 12455 CMPL SI, $0x00010000 12456 JLT three_bytes_standalone 12457 CMPL SI, $0x01000000 12458 JLT four_bytes_standalone 12459 MOVB $0xfc, (AX) 12460 MOVL SI, 1(AX) 12461 ADDQ $0x05, BX 12462 ADDQ $0x05, AX 12463 JMP memmove_long_standalone 12464 12465four_bytes_standalone: 12466 MOVL SI, DI 12467 SHRL $0x10, DI 12468 MOVB $0xf8, (AX) 12469 MOVW SI, 1(AX) 12470 MOVB DI, 3(AX) 12471 ADDQ $0x04, BX 12472 ADDQ $0x04, AX 12473 JMP memmove_long_standalone 12474 12475three_bytes_standalone: 12476 MOVB $0xf4, (AX) 12477 MOVW SI, 1(AX) 12478 ADDQ $0x03, BX 12479 ADDQ $0x03, AX 12480 JMP memmove_long_standalone 12481 12482two_bytes_standalone: 12483 MOVB $0xf0, (AX) 12484 MOVB SI, 1(AX) 12485 ADDQ $0x02, BX 12486 ADDQ $0x02, AX 12487 CMPL SI, $0x40 12488 JL memmove_standalone 12489 JMP memmove_long_standalone 12490 12491one_byte_standalone: 12492 SHLB $0x02, SI 12493 MOVB SI, (AX) 12494 ADDQ $0x01, BX 12495 ADDQ $0x01, AX 12496 12497memmove_standalone: 12498 // genMemMoveShort 12499 CMPQ DX, $0x03 12500 JB emit_lit_memmove_standalone_memmove_move_1or2 12501 JE emit_lit_memmove_standalone_memmove_move_3 12502 CMPQ DX, $0x08 12503 JB emit_lit_memmove_standalone_memmove_move_4through7 12504 CMPQ DX, $0x10 12505 JBE emit_lit_memmove_standalone_memmove_move_8through16 12506 CMPQ DX, $0x20 12507 JBE emit_lit_memmove_standalone_memmove_move_17through32 12508 JMP emit_lit_memmove_standalone_memmove_move_33through64 12509 12510emit_lit_memmove_standalone_memmove_move_1or2: 12511 MOVB (CX), SI 12512 MOVB -1(CX)(DX*1), CL 12513 MOVB SI, (AX) 12514 MOVB CL, -1(AX)(DX*1) 12515 JMP emit_literal_end_standalone 12516 12517emit_lit_memmove_standalone_memmove_move_3: 12518 MOVW (CX), SI 12519 MOVB 2(CX), CL 12520 MOVW SI, (AX) 12521 MOVB CL, 2(AX) 12522 JMP emit_literal_end_standalone 12523 12524emit_lit_memmove_standalone_memmove_move_4through7: 12525 MOVL (CX), SI 12526 MOVL -4(CX)(DX*1), CX 12527 MOVL SI, (AX) 12528 MOVL CX, -4(AX)(DX*1) 12529 JMP emit_literal_end_standalone 12530 12531emit_lit_memmove_standalone_memmove_move_8through16: 12532 MOVQ (CX), SI 12533 MOVQ -8(CX)(DX*1), CX 12534 MOVQ SI, (AX) 12535 MOVQ CX, -8(AX)(DX*1) 12536 JMP emit_literal_end_standalone 12537 12538emit_lit_memmove_standalone_memmove_move_17through32: 12539 MOVOU (CX), X0 12540 MOVOU -16(CX)(DX*1), X1 12541 MOVOU X0, (AX) 12542 MOVOU X1, -16(AX)(DX*1) 12543 JMP emit_literal_end_standalone 12544 12545emit_lit_memmove_standalone_memmove_move_33through64: 12546 MOVOU (CX), X0 12547 MOVOU 16(CX), X1 12548 MOVOU -32(CX)(DX*1), X2 12549 MOVOU -16(CX)(DX*1), X3 12550 MOVOU X0, (AX) 12551 MOVOU X1, 16(AX) 12552 MOVOU X2, -32(AX)(DX*1) 12553 MOVOU X3, -16(AX)(DX*1) 12554 JMP emit_literal_end_standalone 12555 JMP emit_literal_end_standalone 12556 12557memmove_long_standalone: 12558 // genMemMoveLong 12559 MOVOU (CX), X0 12560 MOVOU 16(CX), X1 12561 MOVOU -32(CX)(DX*1), X2 12562 MOVOU -16(CX)(DX*1), X3 12563 MOVQ DX, DI 12564 SHRQ $0x05, DI 12565 MOVQ AX, SI 12566 ANDL $0x0000001f, SI 12567 MOVQ $0x00000040, R8 12568 SUBQ SI, R8 12569 DECQ DI 12570 JA emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 12571 LEAQ -32(CX)(R8*1), SI 12572 LEAQ -32(AX)(R8*1), R9 12573 12574emit_lit_memmove_long_standalonelarge_big_loop_back: 12575 MOVOU (SI), X4 12576 MOVOU 16(SI), X5 12577 MOVOA X4, (R9) 12578 MOVOA X5, 16(R9) 12579 ADDQ $0x20, R9 12580 ADDQ $0x20, SI 12581 ADDQ $0x20, R8 12582 DECQ DI 12583 JNA emit_lit_memmove_long_standalonelarge_big_loop_back 12584 12585emit_lit_memmove_long_standalonelarge_forward_sse_loop_32: 12586 MOVOU -32(CX)(R8*1), X4 12587 MOVOU -16(CX)(R8*1), X5 12588 MOVOA X4, -32(AX)(R8*1) 12589 MOVOA X5, -16(AX)(R8*1) 12590 ADDQ $0x20, R8 12591 CMPQ DX, R8 12592 JAE emit_lit_memmove_long_standalonelarge_forward_sse_loop_32 12593 MOVOU X0, (AX) 12594 MOVOU X1, 16(AX) 12595 MOVOU X2, -32(AX)(DX*1) 12596 MOVOU X3, -16(AX)(DX*1) 12597 JMP emit_literal_end_standalone 12598 JMP emit_literal_end_standalone 12599 12600emit_literal_end_standalone_skip: 12601 XORQ BX, BX 12602 12603emit_literal_end_standalone: 12604 MOVQ BX, ret+48(FP) 12605 RET 12606 12607// func emitRepeat(dst []byte, offset int, length int) int 12608TEXT ·emitRepeat(SB), NOSPLIT, $0-48 12609 XORQ BX, BX 12610 MOVQ dst_base+0(FP), AX 12611 MOVQ offset+24(FP), CX 12612 MOVQ length+32(FP), DX 12613 12614 // emitRepeat 12615emit_repeat_again_standalone: 12616 MOVL DX, SI 12617 LEAL -4(DX), DX 12618 CMPL SI, $0x08 12619 JLE repeat_two_standalone 12620 CMPL SI, $0x0c 12621 JGE cant_repeat_two_offset_standalone 12622 CMPL CX, $0x00000800 12623 JLT repeat_two_offset_standalone 12624 12625cant_repeat_two_offset_standalone: 12626 CMPL DX, $0x00000104 12627 JLT repeat_three_standalone 12628 CMPL DX, $0x00010100 12629 JLT repeat_four_standalone 12630 CMPL DX, $0x0100ffff 12631 JLT repeat_five_standalone 12632 LEAL -16842747(DX), DX 12633 MOVW $0x001d, (AX) 12634 MOVW $0xfffb, 2(AX) 12635 MOVB $0xff, 4(AX) 12636 ADDQ $0x05, AX 12637 ADDQ $0x05, BX 12638 JMP emit_repeat_again_standalone 12639 12640repeat_five_standalone: 12641 LEAL -65536(DX), DX 12642 MOVL DX, CX 12643 MOVW $0x001d, (AX) 12644 MOVW DX, 2(AX) 12645 SARL $0x10, CX 12646 MOVB CL, 4(AX) 12647 ADDQ $0x05, BX 12648 ADDQ $0x05, AX 12649 JMP gen_emit_repeat_end 12650 12651repeat_four_standalone: 12652 LEAL -256(DX), DX 12653 MOVW $0x0019, (AX) 12654 MOVW DX, 2(AX) 12655 ADDQ $0x04, BX 12656 ADDQ $0x04, AX 12657 JMP gen_emit_repeat_end 12658 12659repeat_three_standalone: 12660 LEAL -4(DX), DX 12661 MOVW $0x0015, (AX) 12662 MOVB DL, 2(AX) 12663 ADDQ $0x03, BX 12664 ADDQ $0x03, AX 12665 JMP gen_emit_repeat_end 12666 12667repeat_two_standalone: 12668 SHLL $0x02, DX 12669 ORL $0x01, DX 12670 MOVW DX, (AX) 12671 ADDQ $0x02, BX 12672 ADDQ $0x02, AX 12673 JMP gen_emit_repeat_end 12674 12675repeat_two_offset_standalone: 12676 XORQ SI, SI 12677 LEAL 1(SI)(DX*4), DX 12678 MOVB CL, 1(AX) 12679 SARL $0x08, CX 12680 SHLL $0x05, CX 12681 ORL CX, DX 12682 MOVB DL, (AX) 12683 ADDQ $0x02, BX 12684 ADDQ $0x02, AX 12685 12686gen_emit_repeat_end: 12687 MOVQ BX, ret+40(FP) 12688 RET 12689 12690// func emitCopy(dst []byte, offset int, length int) int 12691TEXT ·emitCopy(SB), NOSPLIT, $0-48 12692 XORQ BX, BX 12693 MOVQ dst_base+0(FP), AX 12694 MOVQ offset+24(FP), CX 12695 MOVQ length+32(FP), DX 12696 12697 // emitCopy 12698 CMPL CX, $0x00010000 12699 JL two_byte_offset_standalone 12700 12701four_bytes_loop_back_standalone: 12702 CMPL DX, $0x40 12703 JLE four_bytes_remain_standalone 12704 MOVB $0xff, (AX) 12705 MOVL CX, 1(AX) 12706 LEAL -64(DX), DX 12707 ADDQ $0x05, BX 12708 ADDQ $0x05, AX 12709 CMPL DX, $0x04 12710 JL four_bytes_remain_standalone 12711 12712 // emitRepeat 12713emit_repeat_again_standalone_emit_copy: 12714 MOVL DX, SI 12715 LEAL -4(DX), DX 12716 CMPL SI, $0x08 12717 JLE repeat_two_standalone_emit_copy 12718 CMPL SI, $0x0c 12719 JGE cant_repeat_two_offset_standalone_emit_copy 12720 CMPL CX, $0x00000800 12721 JLT repeat_two_offset_standalone_emit_copy 12722 12723cant_repeat_two_offset_standalone_emit_copy: 12724 CMPL DX, $0x00000104 12725 JLT repeat_three_standalone_emit_copy 12726 CMPL DX, $0x00010100 12727 JLT repeat_four_standalone_emit_copy 12728 CMPL DX, $0x0100ffff 12729 JLT repeat_five_standalone_emit_copy 12730 LEAL -16842747(DX), DX 12731 MOVW $0x001d, (AX) 12732 MOVW $0xfffb, 2(AX) 12733 MOVB $0xff, 4(AX) 12734 ADDQ $0x05, AX 12735 ADDQ $0x05, BX 12736 JMP emit_repeat_again_standalone_emit_copy 12737 12738repeat_five_standalone_emit_copy: 12739 LEAL -65536(DX), DX 12740 MOVL DX, CX 12741 MOVW $0x001d, (AX) 12742 MOVW DX, 2(AX) 12743 SARL $0x10, CX 12744 MOVB CL, 4(AX) 12745 ADDQ $0x05, BX 12746 ADDQ $0x05, AX 12747 JMP gen_emit_copy_end 12748 12749repeat_four_standalone_emit_copy: 12750 LEAL -256(DX), DX 12751 MOVW $0x0019, (AX) 12752 MOVW DX, 2(AX) 12753 ADDQ $0x04, BX 12754 ADDQ $0x04, AX 12755 JMP gen_emit_copy_end 12756 12757repeat_three_standalone_emit_copy: 12758 LEAL -4(DX), DX 12759 MOVW $0x0015, (AX) 12760 MOVB DL, 2(AX) 12761 ADDQ $0x03, BX 12762 ADDQ $0x03, AX 12763 JMP gen_emit_copy_end 12764 12765repeat_two_standalone_emit_copy: 12766 SHLL $0x02, DX 12767 ORL $0x01, DX 12768 MOVW DX, (AX) 12769 ADDQ $0x02, BX 12770 ADDQ $0x02, AX 12771 JMP gen_emit_copy_end 12772 12773repeat_two_offset_standalone_emit_copy: 12774 XORQ SI, SI 12775 LEAL 1(SI)(DX*4), DX 12776 MOVB CL, 1(AX) 12777 SARL $0x08, CX 12778 SHLL $0x05, CX 12779 ORL CX, DX 12780 MOVB DL, (AX) 12781 ADDQ $0x02, BX 12782 ADDQ $0x02, AX 12783 JMP gen_emit_copy_end 12784 JMP four_bytes_loop_back_standalone 12785 12786four_bytes_remain_standalone: 12787 TESTL DX, DX 12788 JZ gen_emit_copy_end 12789 MOVB $0x03, SI 12790 LEAL -4(SI)(DX*4), DX 12791 MOVB DL, (AX) 12792 MOVL CX, 1(AX) 12793 ADDQ $0x05, BX 12794 ADDQ $0x05, AX 12795 JMP gen_emit_copy_end 12796 12797two_byte_offset_standalone: 12798 CMPL DX, $0x40 12799 JLE two_byte_offset_short_standalone 12800 MOVB $0xee, (AX) 12801 MOVW CX, 1(AX) 12802 LEAL -60(DX), DX 12803 ADDQ $0x03, AX 12804 ADDQ $0x03, BX 12805 12806 // emitRepeat 12807emit_repeat_again_standalone_emit_copy_short: 12808 MOVL DX, SI 12809 LEAL -4(DX), DX 12810 CMPL SI, $0x08 12811 JLE repeat_two_standalone_emit_copy_short 12812 CMPL SI, $0x0c 12813 JGE cant_repeat_two_offset_standalone_emit_copy_short 12814 CMPL CX, $0x00000800 12815 JLT repeat_two_offset_standalone_emit_copy_short 12816 12817cant_repeat_two_offset_standalone_emit_copy_short: 12818 CMPL DX, $0x00000104 12819 JLT repeat_three_standalone_emit_copy_short 12820 CMPL DX, $0x00010100 12821 JLT repeat_four_standalone_emit_copy_short 12822 CMPL DX, $0x0100ffff 12823 JLT repeat_five_standalone_emit_copy_short 12824 LEAL -16842747(DX), DX 12825 MOVW $0x001d, (AX) 12826 MOVW $0xfffb, 2(AX) 12827 MOVB $0xff, 4(AX) 12828 ADDQ $0x05, AX 12829 ADDQ $0x05, BX 12830 JMP emit_repeat_again_standalone_emit_copy_short 12831 12832repeat_five_standalone_emit_copy_short: 12833 LEAL -65536(DX), DX 12834 MOVL DX, CX 12835 MOVW $0x001d, (AX) 12836 MOVW DX, 2(AX) 12837 SARL $0x10, CX 12838 MOVB CL, 4(AX) 12839 ADDQ $0x05, BX 12840 ADDQ $0x05, AX 12841 JMP gen_emit_copy_end 12842 12843repeat_four_standalone_emit_copy_short: 12844 LEAL -256(DX), DX 12845 MOVW $0x0019, (AX) 12846 MOVW DX, 2(AX) 12847 ADDQ $0x04, BX 12848 ADDQ $0x04, AX 12849 JMP gen_emit_copy_end 12850 12851repeat_three_standalone_emit_copy_short: 12852 LEAL -4(DX), DX 12853 MOVW $0x0015, (AX) 12854 MOVB DL, 2(AX) 12855 ADDQ $0x03, BX 12856 ADDQ $0x03, AX 12857 JMP gen_emit_copy_end 12858 12859repeat_two_standalone_emit_copy_short: 12860 SHLL $0x02, DX 12861 ORL $0x01, DX 12862 MOVW DX, (AX) 12863 ADDQ $0x02, BX 12864 ADDQ $0x02, AX 12865 JMP gen_emit_copy_end 12866 12867repeat_two_offset_standalone_emit_copy_short: 12868 XORQ SI, SI 12869 LEAL 1(SI)(DX*4), DX 12870 MOVB CL, 1(AX) 12871 SARL $0x08, CX 12872 SHLL $0x05, CX 12873 ORL CX, DX 12874 MOVB DL, (AX) 12875 ADDQ $0x02, BX 12876 ADDQ $0x02, AX 12877 JMP gen_emit_copy_end 12878 JMP two_byte_offset_standalone 12879 12880two_byte_offset_short_standalone: 12881 CMPL DX, $0x0c 12882 JGE emit_copy_three_standalone 12883 CMPL CX, $0x00000800 12884 JGE emit_copy_three_standalone 12885 MOVB $0x01, SI 12886 LEAL -16(SI)(DX*4), DX 12887 MOVB CL, 1(AX) 12888 SHRL $0x08, CX 12889 SHLL $0x05, CX 12890 ORL CX, DX 12891 MOVB DL, (AX) 12892 ADDQ $0x02, BX 12893 ADDQ $0x02, AX 12894 JMP gen_emit_copy_end 12895 12896emit_copy_three_standalone: 12897 MOVB $0x02, SI 12898 LEAL -4(SI)(DX*4), DX 12899 MOVB DL, (AX) 12900 MOVW CX, 1(AX) 12901 ADDQ $0x03, BX 12902 ADDQ $0x03, AX 12903 12904gen_emit_copy_end: 12905 MOVQ BX, ret+40(FP) 12906 RET 12907 12908// func emitCopyNoRepeat(dst []byte, offset int, length int) int 12909TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48 12910 XORQ BX, BX 12911 MOVQ dst_base+0(FP), AX 12912 MOVQ offset+24(FP), CX 12913 MOVQ length+32(FP), DX 12914 12915 // emitCopy 12916 CMPL CX, $0x00010000 12917 JL two_byte_offset_standalone_snappy 12918 12919four_bytes_loop_back_standalone_snappy: 12920 CMPL DX, $0x40 12921 JLE four_bytes_remain_standalone_snappy 12922 MOVB $0xff, (AX) 12923 MOVL CX, 1(AX) 12924 LEAL -64(DX), DX 12925 ADDQ $0x05, BX 12926 ADDQ $0x05, AX 12927 CMPL DX, $0x04 12928 JL four_bytes_remain_standalone_snappy 12929 JMP four_bytes_loop_back_standalone_snappy 12930 12931four_bytes_remain_standalone_snappy: 12932 TESTL DX, DX 12933 JZ gen_emit_copy_end_snappy 12934 MOVB $0x03, SI 12935 LEAL -4(SI)(DX*4), DX 12936 MOVB DL, (AX) 12937 MOVL CX, 1(AX) 12938 ADDQ $0x05, BX 12939 ADDQ $0x05, AX 12940 JMP gen_emit_copy_end_snappy 12941 12942two_byte_offset_standalone_snappy: 12943 CMPL DX, $0x40 12944 JLE two_byte_offset_short_standalone_snappy 12945 MOVB $0xee, (AX) 12946 MOVW CX, 1(AX) 12947 LEAL -60(DX), DX 12948 ADDQ $0x03, AX 12949 ADDQ $0x03, BX 12950 JMP two_byte_offset_standalone_snappy 12951 12952two_byte_offset_short_standalone_snappy: 12953 CMPL DX, $0x0c 12954 JGE emit_copy_three_standalone_snappy 12955 CMPL CX, $0x00000800 12956 JGE emit_copy_three_standalone_snappy 12957 MOVB $0x01, SI 12958 LEAL -16(SI)(DX*4), DX 12959 MOVB CL, 1(AX) 12960 SHRL $0x08, CX 12961 SHLL $0x05, CX 12962 ORL CX, DX 12963 MOVB DL, (AX) 12964 ADDQ $0x02, BX 12965 ADDQ $0x02, AX 12966 JMP gen_emit_copy_end_snappy 12967 12968emit_copy_three_standalone_snappy: 12969 MOVB $0x02, SI 12970 LEAL -4(SI)(DX*4), DX 12971 MOVB DL, (AX) 12972 MOVW CX, 1(AX) 12973 ADDQ $0x03, BX 12974 ADDQ $0x03, AX 12975 12976gen_emit_copy_end_snappy: 12977 MOVQ BX, ret+40(FP) 12978 RET 12979 12980// func matchLen(a []byte, b []byte) int 12981TEXT ·matchLen(SB), NOSPLIT, $0-56 12982 MOVQ a_base+0(FP), AX 12983 MOVQ b_base+24(FP), CX 12984 MOVQ a_len+8(FP), DX 12985 12986 // matchLen 12987 XORL SI, SI 12988 CMPL DX, $0x08 12989 JL matchlen_single_standalone 12990 12991matchlen_loopback_standalone: 12992 MOVQ (AX)(SI*1), BX 12993 XORQ (CX)(SI*1), BX 12994 TESTQ BX, BX 12995 JZ matchlen_loop_standalone 12996 BSFQ BX, BX 12997 SARQ $0x03, BX 12998 LEAL (SI)(BX*1), SI 12999 JMP gen_match_len_end 13000 13001matchlen_loop_standalone: 13002 LEAL -8(DX), DX 13003 LEAL 8(SI), SI 13004 CMPL DX, $0x08 13005 JGE matchlen_loopback_standalone 13006 13007matchlen_single_standalone: 13008 TESTL DX, DX 13009 JZ gen_match_len_end 13010 13011matchlen_single_loopback_standalone: 13012 MOVB (AX)(SI*1), BL 13013 CMPB (CX)(SI*1), BL 13014 JNE gen_match_len_end 13015 LEAL 1(SI), SI 13016 DECL DX 13017 JNZ matchlen_single_loopback_standalone 13018 13019gen_match_len_end: 13020 MOVQ SI, ret+48(FP) 13021 RET 13022