1 /* 2 * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.asm.amd64; 26 27 import static jdk.vm.ci.amd64.AMD64.rax; 28 import static jdk.vm.ci.amd64.AMD64.rcx; 29 import static jdk.vm.ci.amd64.AMD64.rdx; 30 import static jdk.vm.ci.amd64.AMD64.rsp; 31 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseIncDec; 32 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmLoadAndClearUpper; 33 import static org.graalvm.compiler.asm.amd64.AMD64AsmOptions.UseXmmRegToRegMoveAll; 34 35 import org.graalvm.compiler.asm.Label; 36 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 37 import org.graalvm.compiler.core.common.NumUtil; 38 39 import jdk.vm.ci.amd64.AMD64; 40 import jdk.vm.ci.amd64.AMD64Kind; 41 import jdk.vm.ci.code.Register; 42 import jdk.vm.ci.code.TargetDescription; 43 44 /** 45 * This class implements commonly used X86 code patterns. 46 */ 47 public class AMD64MacroAssembler extends AMD64Assembler { 48 AMD64MacroAssembler(TargetDescription target)49 public AMD64MacroAssembler(TargetDescription target) { 50 super(target); 51 } 52 decrementq(Register reg, int value)53 public final void decrementq(Register reg, int value) { 54 if (value == Integer.MIN_VALUE) { 55 subq(reg, value); 56 return; 57 } 58 if (value < 0) { 59 incrementq(reg, -value); 60 return; 61 } 62 if (value == 0) { 63 return; 64 } 65 if (value == 1 && UseIncDec) { 66 decq(reg); 67 } else { 68 subq(reg, value); 69 } 70 } 71 decrementq(AMD64Address dst, int value)72 public final void decrementq(AMD64Address dst, int value) { 73 if (value == Integer.MIN_VALUE) { 74 subq(dst, value); 75 return; 76 } 77 if (value < 0) { 78 incrementq(dst, -value); 79 return; 80 } 81 if (value == 0) { 82 return; 83 } 84 if (value == 1 && UseIncDec) { 85 decq(dst); 86 } else { 87 subq(dst, value); 88 } 89 } 90 incrementq(Register reg, int value)91 public void incrementq(Register reg, int value) { 92 if (value == Integer.MIN_VALUE) { 93 addq(reg, value); 94 return; 95 } 96 if (value < 0) { 97 decrementq(reg, -value); 98 return; 99 } 100 if (value == 0) { 101 return; 102 } 103 if (value == 1 && UseIncDec) { 104 incq(reg); 105 } else { 106 addq(reg, value); 107 } 108 } 109 incrementq(AMD64Address dst, int value)110 public final void incrementq(AMD64Address dst, int value) { 111 if (value == Integer.MIN_VALUE) { 112 addq(dst, value); 113 return; 114 } 115 if (value < 0) { 116 decrementq(dst, -value); 117 return; 118 } 119 if (value == 0) { 120 return; 121 } 122 if (value == 1 && UseIncDec) { 123 incq(dst); 124 } else { 125 addq(dst, value); 126 } 127 } 128 movptr(Register dst, AMD64Address src)129 public final void movptr(Register dst, AMD64Address src) { 130 movq(dst, src); 131 } 132 movptr(AMD64Address dst, Register src)133 public final void movptr(AMD64Address dst, Register src) { 134 movq(dst, src); 135 } 136 movptr(AMD64Address dst, int src)137 public final void movptr(AMD64Address dst, int src) { 138 movslq(dst, src); 139 } 140 cmpptr(Register src1, Register src2)141 public final void cmpptr(Register src1, Register src2) { 142 cmpq(src1, src2); 143 } 144 cmpptr(Register src1, AMD64Address src2)145 public final void cmpptr(Register src1, AMD64Address src2) { 146 cmpq(src1, src2); 147 } 148 decrementl(Register reg)149 public final void decrementl(Register reg) { 150 decrementl(reg, 1); 151 } 152 decrementl(Register reg, int value)153 public final void decrementl(Register reg, int value) { 154 if (value == Integer.MIN_VALUE) { 155 subl(reg, value); 156 return; 157 } 158 if (value < 0) { 159 incrementl(reg, -value); 160 return; 161 } 162 if (value == 0) { 163 return; 164 } 165 if (value == 1 && UseIncDec) { 166 decl(reg); 167 } else { 168 subl(reg, value); 169 } 170 } 171 decrementl(AMD64Address dst, int value)172 public final void decrementl(AMD64Address dst, int value) { 173 if (value == Integer.MIN_VALUE) { 174 subl(dst, value); 175 return; 176 } 177 if (value < 0) { 178 incrementl(dst, -value); 179 return; 180 } 181 if (value == 0) { 182 return; 183 } 184 if (value == 1 && UseIncDec) { 185 decl(dst); 186 } else { 187 subl(dst, value); 188 } 189 } 190 incrementl(Register reg, int value)191 public final void incrementl(Register reg, int value) { 192 if (value == Integer.MIN_VALUE) { 193 addl(reg, value); 194 return; 195 } 196 if (value < 0) { 197 decrementl(reg, -value); 198 return; 199 } 200 if (value == 0) { 201 return; 202 } 203 if (value == 1 && UseIncDec) { 204 incl(reg); 205 } else { 206 addl(reg, value); 207 } 208 } 209 incrementl(AMD64Address dst, int value)210 public final void incrementl(AMD64Address dst, int value) { 211 if (value == Integer.MIN_VALUE) { 212 addl(dst, value); 213 return; 214 } 215 if (value < 0) { 216 decrementl(dst, -value); 217 return; 218 } 219 if (value == 0) { 220 return; 221 } 222 if (value == 1 && UseIncDec) { 223 incl(dst); 224 } else { 225 addl(dst, value); 226 } 227 } 228 movflt(Register dst, Register src)229 public void movflt(Register dst, Register src) { 230 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 231 if (UseXmmRegToRegMoveAll) { 232 movaps(dst, src); 233 } else { 234 movss(dst, src); 235 } 236 } 237 movflt(Register dst, AMD64Address src)238 public void movflt(Register dst, AMD64Address src) { 239 assert dst.getRegisterCategory().equals(AMD64.XMM); 240 movss(dst, src); 241 } 242 movflt(AMD64Address dst, Register src)243 public void movflt(AMD64Address dst, Register src) { 244 assert src.getRegisterCategory().equals(AMD64.XMM); 245 movss(dst, src); 246 } 247 movdbl(Register dst, Register src)248 public void movdbl(Register dst, Register src) { 249 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 250 if (UseXmmRegToRegMoveAll) { 251 movapd(dst, src); 252 } else { 253 movsd(dst, src); 254 } 255 } 256 movdbl(Register dst, AMD64Address src)257 public void movdbl(Register dst, AMD64Address src) { 258 assert dst.getRegisterCategory().equals(AMD64.XMM); 259 if (UseXmmLoadAndClearUpper) { 260 movsd(dst, src); 261 } else { 262 movlpd(dst, src); 263 } 264 } 265 movdbl(AMD64Address dst, Register src)266 public void movdbl(AMD64Address dst, Register src) { 267 assert src.getRegisterCategory().equals(AMD64.XMM); 268 movsd(dst, src); 269 } 270 271 /** 272 * Non-atomic write of a 64-bit constant to memory. Do not use if the address might be a 273 * volatile field! 274 */ movlong(AMD64Address dst, long src)275 public final void movlong(AMD64Address dst, long src) { 276 if (NumUtil.isInt(src)) { 277 AMD64MIOp.MOV.emit(this, OperandSize.QWORD, dst, (int) src); 278 } else { 279 AMD64Address high = new AMD64Address(dst.getBase(), dst.getIndex(), dst.getScale(), dst.getDisplacement() + 4); 280 movl(dst, (int) (src & 0xFFFFFFFF)); 281 movl(high, (int) (src >> 32)); 282 } 283 284 } 285 setl(ConditionFlag cc, Register dst)286 public final void setl(ConditionFlag cc, Register dst) { 287 setb(cc, dst); 288 movzbl(dst, dst); 289 } 290 setq(ConditionFlag cc, Register dst)291 public final void setq(ConditionFlag cc, Register dst) { 292 setb(cc, dst); 293 movzbq(dst, dst); 294 } 295 flog(Register dest, Register value, boolean base10)296 public final void flog(Register dest, Register value, boolean base10) { 297 if (base10) { 298 fldlg2(); 299 } else { 300 fldln2(); 301 } 302 AMD64Address tmp = trigPrologue(value); 303 fyl2x(); 304 trigEpilogue(dest, tmp); 305 } 306 fsin(Register dest, Register value)307 public final void fsin(Register dest, Register value) { 308 AMD64Address tmp = trigPrologue(value); 309 fsin(); 310 trigEpilogue(dest, tmp); 311 } 312 fcos(Register dest, Register value)313 public final void fcos(Register dest, Register value) { 314 AMD64Address tmp = trigPrologue(value); 315 fcos(); 316 trigEpilogue(dest, tmp); 317 } 318 ftan(Register dest, Register value)319 public final void ftan(Register dest, Register value) { 320 AMD64Address tmp = trigPrologue(value); 321 fptan(); 322 fstp(0); // ftan pushes 1.0 in addition to the actual result, pop 323 trigEpilogue(dest, tmp); 324 } 325 fpop()326 public final void fpop() { 327 ffree(0); 328 fincstp(); 329 } 330 trigPrologue(Register value)331 private AMD64Address trigPrologue(Register value) { 332 assert value.getRegisterCategory().equals(AMD64.XMM); 333 AMD64Address tmp = new AMD64Address(AMD64.rsp); 334 subq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes()); 335 movdbl(tmp, value); 336 fldd(tmp); 337 return tmp; 338 } 339 trigEpilogue(Register dest, AMD64Address tmp)340 private void trigEpilogue(Register dest, AMD64Address tmp) { 341 assert dest.getRegisterCategory().equals(AMD64.XMM); 342 fstpd(tmp); 343 movdbl(dest, tmp); 344 addq(AMD64.rsp, AMD64Kind.DOUBLE.getSizeInBytes()); 345 } 346 347 // IndexOf for constant substrings with size >= 8 chars 348 // which don't need to be loaded through stack. stringIndexofC8(Register str1, Register str2, Register cnt1, Register cnt2, int intCnt2, Register result, Register vec, Register tmp)349 public void stringIndexofC8(Register str1, Register str2, 350 Register cnt1, Register cnt2, 351 int intCnt2, Register result, 352 Register vec, Register tmp) { 353 // assert(UseSSE42Intrinsics, "SSE4.2 is required"); 354 355 // This method uses pcmpestri inxtruction with bound registers 356 // inputs: 357 // xmm - substring 358 // rax - substring length (elements count) 359 // mem - scanned string 360 // rdx - string length (elements count) 361 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 362 // outputs: 363 // rcx - matched index in string 364 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 365 366 Label reloadSubstr = new Label(); 367 Label scanToSubstr = new Label(); 368 Label scanSubstr = new Label(); 369 Label retFound = new Label(); 370 Label retNotFound = new Label(); 371 Label exit = new Label(); 372 Label foundSubstr = new Label(); 373 Label matchSubstrHead = new Label(); 374 Label reloadStr = new Label(); 375 Label foundCandidate = new Label(); 376 377 // Note, inline_string_indexOf() generates checks: 378 // if (substr.count > string.count) return -1; 379 // if (substr.count == 0) return 0; 380 assert intCnt2 >= 8 : "this code isused only for cnt2 >= 8 chars"; 381 382 // Load substring. 383 movdqu(vec, new AMD64Address(str2, 0)); 384 movl(cnt2, intCnt2); 385 movq(result, str1); // string addr 386 387 if (intCnt2 > 8) { 388 jmpb(scanToSubstr); 389 390 // Reload substr for rescan, this code 391 // is executed only for large substrings (> 8 chars) 392 bind(reloadSubstr); 393 movdqu(vec, new AMD64Address(str2, 0)); 394 negq(cnt2); // Jumped here with negative cnt2, convert to positive 395 396 bind(reloadStr); 397 // We came here after the beginning of the substring was 398 // matched but the rest of it was not so we need to search 399 // again. Start from the next element after the previous match. 400 401 // cnt2 is number of substring reminding elements and 402 // cnt1 is number of string reminding elements when cmp failed. 403 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 404 subl(cnt1, cnt2); 405 addl(cnt1, intCnt2); 406 movl(cnt2, intCnt2); // Now restore cnt2 407 408 decrementl(cnt1, 1); // Shift to next element 409 cmpl(cnt1, cnt2); 410 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 411 412 addq(result, 2); 413 414 } // (int_cnt2 > 8) 415 416 // Scan string for start of substr in 16-byte vectors 417 bind(scanToSubstr); 418 pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 419 jccb(ConditionFlag.Below, foundCandidate); // CF == 1 420 subl(cnt1, 8); 421 jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string 422 cmpl(cnt1, cnt2); 423 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 424 addq(result, 16); 425 jmpb(scanToSubstr); 426 427 // Found a potential substr 428 bind(foundCandidate); 429 // Matched whole vector if first element matched (tmp(rcx) == 0). 430 if (intCnt2 == 8) { 431 jccb(ConditionFlag.Overflow, retFound); // OF == 1 432 } else { // int_cnt2 > 8 433 jccb(ConditionFlag.Overflow, foundSubstr); 434 } 435 // After pcmpestri tmp(rcx) contains matched element index 436 // Compute start addr of substr 437 leaq(result, new AMD64Address(result, tmp, Scale.Times2, 0)); 438 439 // Make sure string is still long enough 440 subl(cnt1, tmp); 441 cmpl(cnt1, cnt2); 442 if (intCnt2 == 8) { 443 jccb(ConditionFlag.GreaterEqual, scanToSubstr); 444 } else { // int_cnt2 > 8 445 jccb(ConditionFlag.GreaterEqual, matchSubstrHead); 446 } 447 // Left less then substring. 448 449 bind(retNotFound); 450 movl(result, -1); 451 jmpb(exit); 452 453 if (intCnt2 > 8) { 454 // This code is optimized for the case when whole substring 455 // is matched if its head is matched. 456 bind(matchSubstrHead); 457 pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 458 // Reload only string if does not match 459 jccb(ConditionFlag.NoOverflow, reloadStr); // OF == 0 460 461 Label contScanSubstr = new Label(); 462 // Compare the rest of substring (> 8 chars). 463 bind(foundSubstr); 464 // First 8 chars are already matched. 465 negq(cnt2); 466 addq(cnt2, 8); 467 468 bind(scanSubstr); 469 subl(cnt1, 8); 470 cmpl(cnt2, -8); // Do not read beyond substring 471 jccb(ConditionFlag.LessEqual, contScanSubstr); 472 // Back-up strings to avoid reading beyond substring: 473 // cnt1 = cnt1 - cnt2 + 8 474 addl(cnt1, cnt2); // cnt2 is negative 475 addl(cnt1, 8); 476 movl(cnt2, 8); 477 negq(cnt2); 478 bind(contScanSubstr); 479 if (intCnt2 < 1024 * 1024 * 1024) { 480 movdqu(vec, new AMD64Address(str2, cnt2, Scale.Times2, intCnt2 * 2)); 481 pcmpestri(vec, new AMD64Address(result, cnt2, Scale.Times2, intCnt2 * 2), 0x0d); 482 } else { 483 // calculate index in register to avoid integer overflow (int_cnt2*2) 484 movl(tmp, intCnt2); 485 addq(tmp, cnt2); 486 movdqu(vec, new AMD64Address(str2, tmp, Scale.Times2, 0)); 487 pcmpestri(vec, new AMD64Address(result, tmp, Scale.Times2, 0), 0x0d); 488 } 489 // Need to reload strings pointers if not matched whole vector 490 jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 491 addq(cnt2, 8); 492 jcc(ConditionFlag.Negative, scanSubstr); 493 // Fall through if found full substring 494 495 } // (int_cnt2 > 8) 496 497 bind(retFound); 498 // Found result if we matched full small substring. 499 // Compute substr offset 500 subq(result, str1); 501 shrl(result, 1); // index 502 bind(exit); 503 504 } // string_indexofC8 505 506 // Small strings are loaded through stack if they cross page boundary. stringIndexOf(Register str1, Register str2, Register cnt1, Register cnt2, int intCnt2, Register result, Register vec, Register tmp, int vmPageSize)507 public void stringIndexOf(Register str1, Register str2, 508 Register cnt1, Register cnt2, 509 int intCnt2, Register result, 510 Register vec, Register tmp, int vmPageSize) { 511 // 512 // int_cnt2 is length of small (< 8 chars) constant substring 513 // or (-1) for non constant substring in which case its length 514 // is in cnt2 register. 515 // 516 // Note, inline_string_indexOf() generates checks: 517 // if (substr.count > string.count) return -1; 518 // if (substr.count == 0) return 0; 519 // 520 assert intCnt2 == -1 || (0 < intCnt2 && intCnt2 < 8) : "should be != 0"; 521 522 // This method uses pcmpestri instruction with bound registers 523 // inputs: 524 // xmm - substring 525 // rax - substring length (elements count) 526 // mem - scanned string 527 // rdx - string length (elements count) 528 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 529 // outputs: 530 // rcx - matched index in string 531 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 532 533 Label reloadSubstr = new Label(); 534 Label scanToSubstr = new Label(); 535 Label scanSubstr = new Label(); 536 Label adjustStr = new Label(); 537 Label retFound = new Label(); 538 Label retNotFound = new Label(); 539 Label cleanup = new Label(); 540 Label foundSubstr = new Label(); 541 Label foundCandidate = new Label(); 542 543 int wordSize = 8; 544 // We don't know where these strings are located 545 // and we can't read beyond them. Load them through stack. 546 Label bigStrings = new Label(); 547 Label checkStr = new Label(); 548 Label copySubstr = new Label(); 549 Label copyStr = new Label(); 550 551 movq(tmp, rsp); // save old SP 552 553 if (intCnt2 > 0) { // small (< 8 chars) constant substring 554 if (intCnt2 == 1) { // One char 555 movzwl(result, new AMD64Address(str2, 0)); 556 movdl(vec, result); // move 32 bits 557 } else if (intCnt2 == 2) { // Two chars 558 movdl(vec, new AMD64Address(str2, 0)); // move 32 bits 559 } else if (intCnt2 == 4) { // Four chars 560 movq(vec, new AMD64Address(str2, 0)); // move 64 bits 561 } else { // cnt2 = { 3, 5, 6, 7 } 562 // Array header size is 12 bytes in 32-bit VM 563 // + 6 bytes for 3 chars == 18 bytes, 564 // enough space to load vec and shift. 565 movdqu(vec, new AMD64Address(str2, (intCnt2 * 2) - 16)); 566 psrldq(vec, 16 - (intCnt2 * 2)); 567 } 568 } else { // not constant substring 569 cmpl(cnt2, 8); 570 jccb(ConditionFlag.AboveEqual, bigStrings); // Both strings are big enough 571 572 // We can read beyond string if str+16 does not cross page boundary 573 // since heaps are aligned and mapped by pages. 574 assert vmPageSize < 1024 * 1024 * 1024 : "default page should be small"; 575 movl(result, str2); // We need only low 32 bits 576 andl(result, (vmPageSize - 1)); 577 cmpl(result, (vmPageSize - 16)); 578 jccb(ConditionFlag.BelowEqual, checkStr); 579 580 // Move small strings to stack to allow load 16 bytes into vec. 581 subq(rsp, 16); 582 int stackOffset = wordSize - 2; 583 push(cnt2); 584 585 bind(copySubstr); 586 movzwl(result, new AMD64Address(str2, cnt2, Scale.Times2, -2)); 587 movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); 588 decrementl(cnt2, 1); 589 jccb(ConditionFlag.NotZero, copySubstr); 590 591 pop(cnt2); 592 movq(str2, rsp); // New substring address 593 } // non constant 594 595 bind(checkStr); 596 cmpl(cnt1, 8); 597 jccb(ConditionFlag.AboveEqual, bigStrings); 598 599 // Check cross page boundary. 600 movl(result, str1); // We need only low 32 bits 601 andl(result, (vmPageSize - 1)); 602 cmpl(result, (vmPageSize - 16)); 603 jccb(ConditionFlag.BelowEqual, bigStrings); 604 605 subq(rsp, 16); 606 int stackOffset = -2; 607 if (intCnt2 < 0) { // not constant 608 push(cnt2); 609 stackOffset += wordSize; 610 } 611 movl(cnt2, cnt1); 612 613 bind(copyStr); 614 movzwl(result, new AMD64Address(str1, cnt2, Scale.Times2, -2)); 615 movw(new AMD64Address(rsp, cnt2, Scale.Times2, stackOffset), result); 616 decrementl(cnt2, 1); 617 jccb(ConditionFlag.NotZero, copyStr); 618 619 if (intCnt2 < 0) { // not constant 620 pop(cnt2); 621 } 622 movq(str1, rsp); // New string address 623 624 bind(bigStrings); 625 // Load substring. 626 if (intCnt2 < 0) { // -1 627 movdqu(vec, new AMD64Address(str2, 0)); 628 push(cnt2); // substr count 629 push(str2); // substr addr 630 push(str1); // string addr 631 } else { 632 // Small (< 8 chars) constant substrings are loaded already. 633 movl(cnt2, intCnt2); 634 } 635 push(tmp); // original SP 636 // Finished loading 637 638 // ======================================================== 639 // Start search 640 // 641 642 movq(result, str1); // string addr 643 644 if (intCnt2 < 0) { // Only for non constant substring 645 jmpb(scanToSubstr); 646 647 // SP saved at sp+0 648 // String saved at sp+1*wordSize 649 // Substr saved at sp+2*wordSize 650 // Substr count saved at sp+3*wordSize 651 652 // Reload substr for rescan, this code 653 // is executed only for large substrings (> 8 chars) 654 bind(reloadSubstr); 655 movq(str2, new AMD64Address(rsp, 2 * wordSize)); 656 movl(cnt2, new AMD64Address(rsp, 3 * wordSize)); 657 movdqu(vec, new AMD64Address(str2, 0)); 658 // We came here after the beginning of the substring was 659 // matched but the rest of it was not so we need to search 660 // again. Start from the next element after the previous match. 661 subq(str1, result); // Restore counter 662 shrl(str1, 1); 663 addl(cnt1, str1); 664 decrementl(cnt1); // Shift to next element 665 cmpl(cnt1, cnt2); 666 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 667 668 addq(result, 2); 669 } // non constant 670 671 // Scan string for start of substr in 16-byte vectors 672 bind(scanToSubstr); 673 assert cnt1.equals(rdx) && cnt2.equals(rax) && tmp.equals(rcx) : "pcmpestri"; 674 pcmpestri(vec, new AMD64Address(result, 0), 0x0d); 675 jccb(ConditionFlag.Below, foundCandidate); // CF == 1 676 subl(cnt1, 8); 677 jccb(ConditionFlag.LessEqual, retNotFound); // Scanned full string 678 cmpl(cnt1, cnt2); 679 jccb(ConditionFlag.Negative, retNotFound); // Left less then substring 680 addq(result, 16); 681 682 bind(adjustStr); 683 cmpl(cnt1, 8); // Do not read beyond string 684 jccb(ConditionFlag.GreaterEqual, scanToSubstr); 685 // Back-up string to avoid reading beyond string. 686 leaq(result, new AMD64Address(result, cnt1, Scale.Times2, -16)); 687 movl(cnt1, 8); 688 jmpb(scanToSubstr); 689 690 // Found a potential substr 691 bind(foundCandidate); 692 // After pcmpestri tmp(rcx) contains matched element index 693 694 // Make sure string is still long enough 695 subl(cnt1, tmp); 696 cmpl(cnt1, cnt2); 697 jccb(ConditionFlag.GreaterEqual, foundSubstr); 698 // Left less then substring. 699 700 bind(retNotFound); 701 movl(result, -1); 702 jmpb(cleanup); 703 704 bind(foundSubstr); 705 // Compute start addr of substr 706 leaq(result, new AMD64Address(result, tmp, Scale.Times2)); 707 708 if (intCnt2 > 0) { // Constant substring 709 // Repeat search for small substring (< 8 chars) 710 // from new point without reloading substring. 711 // Have to check that we don't read beyond string. 712 cmpl(tmp, 8 - intCnt2); 713 jccb(ConditionFlag.Greater, adjustStr); 714 // Fall through if matched whole substring. 715 } else { // non constant 716 assert intCnt2 == -1 : "should be != 0"; 717 718 addl(tmp, cnt2); 719 // Found result if we matched whole substring. 720 cmpl(tmp, 8); 721 jccb(ConditionFlag.LessEqual, retFound); 722 723 // Repeat search for small substring (<= 8 chars) 724 // from new point 'str1' without reloading substring. 725 cmpl(cnt2, 8); 726 // Have to check that we don't read beyond string. 727 jccb(ConditionFlag.LessEqual, adjustStr); 728 729 Label checkNext = new Label(); 730 Label contScanSubstr = new Label(); 731 Label retFoundLong = new Label(); 732 // Compare the rest of substring (> 8 chars). 733 movq(str1, result); 734 735 cmpl(tmp, cnt2); 736 // First 8 chars are already matched. 737 jccb(ConditionFlag.Equal, checkNext); 738 739 bind(scanSubstr); 740 pcmpestri(vec, new AMD64Address(str1, 0), 0x0d); 741 // Need to reload strings pointers if not matched whole vector 742 jcc(ConditionFlag.NoOverflow, reloadSubstr); // OF == 0 743 744 bind(checkNext); 745 subl(cnt2, 8); 746 jccb(ConditionFlag.LessEqual, retFoundLong); // Found full substring 747 addq(str1, 16); 748 addq(str2, 16); 749 subl(cnt1, 8); 750 cmpl(cnt2, 8); // Do not read beyond substring 751 jccb(ConditionFlag.GreaterEqual, contScanSubstr); 752 // Back-up strings to avoid reading beyond substring. 753 leaq(str2, new AMD64Address(str2, cnt2, Scale.Times2, -16)); 754 leaq(str1, new AMD64Address(str1, cnt2, Scale.Times2, -16)); 755 subl(cnt1, cnt2); 756 movl(cnt2, 8); 757 addl(cnt1, 8); 758 bind(contScanSubstr); 759 movdqu(vec, new AMD64Address(str2, 0)); 760 jmpb(scanSubstr); 761 762 bind(retFoundLong); 763 movq(str1, new AMD64Address(rsp, wordSize)); 764 } // non constant 765 766 bind(retFound); 767 // Compute substr offset 768 subq(result, str1); 769 shrl(result, 1); // index 770 771 bind(cleanup); 772 pop(rsp); // restore SP 773 774 } 775 776 } 777