1 /*
2 * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "opto/c2_MacroAssembler.hpp"
29 #include "opto/intrinsicnode.hpp"
30
31 #ifdef PRODUCT
32 #define BLOCK_COMMENT(str) /* nothing */
33 #define STOP(error) stop(error)
34 #else
35 #define BLOCK_COMMENT(str) block_comment(str)
36 #define STOP(error) block_comment(error); stop(error)
37 #endif
38
39 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
40
41 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
42
43 // Search for str1 in str2 and return index or -1
string_indexof(Register str2,Register str1,Register cnt2,Register cnt1,Register tmp1,Register tmp2,Register tmp3,Register tmp4,Register tmp5,Register tmp6,int icnt1,Register result,int ae)44 void C2_MacroAssembler::string_indexof(Register str2, Register str1,
45 Register cnt2, Register cnt1,
46 Register tmp1, Register tmp2,
47 Register tmp3, Register tmp4,
48 Register tmp5, Register tmp6,
49 int icnt1, Register result, int ae) {
50 // NOTE: tmp5, tmp6 can be zr depending on specific method version
51 Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
52
53 Register ch1 = rscratch1;
54 Register ch2 = rscratch2;
55 Register cnt1tmp = tmp1;
56 Register cnt2tmp = tmp2;
57 Register cnt1_neg = cnt1;
58 Register cnt2_neg = cnt2;
59 Register result_tmp = tmp4;
60
61 bool isL = ae == StrIntrinsicNode::LL;
62
63 bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
64 bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
65 int str1_chr_shift = str1_isL ? 0:1;
66 int str2_chr_shift = str2_isL ? 0:1;
67 int str1_chr_size = str1_isL ? 1:2;
68 int str2_chr_size = str2_isL ? 1:2;
69 chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
70 (chr_insn)&MacroAssembler::ldrh;
71 chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
72 (chr_insn)&MacroAssembler::ldrh;
73 chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
74 chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
75
76 // Note, inline_string_indexOf() generates checks:
77 // if (substr.count > string.count) return -1;
78 // if (substr.count == 0) return 0;
79
80 // We have two strings, a source string in str2, cnt2 and a pattern string
81 // in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
82
83 // For larger pattern and source we use a simplified Boyer Moore algorithm.
84 // With a small pattern and source we use linear scan.
85
86 if (icnt1 == -1) {
87 sub(result_tmp, cnt2, cnt1);
88 cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
89 br(LT, LINEARSEARCH);
90 dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
91 subs(zr, cnt1, 256);
92 lsr(tmp1, cnt2, 2);
93 ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
94 br(GE, LINEARSTUB);
95 }
96
97 // The Boyer Moore alogorithm is based on the description here:-
98 //
99 // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
100 //
101 // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
102 // and the 'Good Suffix' rule.
103 //
104 // These rules are essentially heuristics for how far we can shift the
105 // pattern along the search string.
106 //
107 // The implementation here uses the 'Bad Character' rule only because of the
108 // complexity of initialisation for the 'Good Suffix' rule.
109 //
110 // This is also known as the Boyer-Moore-Horspool algorithm:-
111 //
112 // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
113 //
114 // This particular implementation has few java-specific optimizations.
115 //
116 // #define ASIZE 256
117 //
118 // int bm(unsigned char *x, int m, unsigned char *y, int n) {
119 // int i, j;
120 // unsigned c;
121 // unsigned char bc[ASIZE];
122 //
123 // /* Preprocessing */
124 // for (i = 0; i < ASIZE; ++i)
125 // bc[i] = m;
126 // for (i = 0; i < m - 1; ) {
127 // c = x[i];
128 // ++i;
129 // // c < 256 for Latin1 string, so, no need for branch
130 // #ifdef PATTERN_STRING_IS_LATIN1
131 // bc[c] = m - i;
132 // #else
133 // if (c < ASIZE) bc[c] = m - i;
134 // #endif
135 // }
136 //
137 // /* Searching */
138 // j = 0;
139 // while (j <= n - m) {
140 // c = y[i+j];
141 // if (x[m-1] == c)
142 // for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
143 // if (i < 0) return j;
144 // // c < 256 for Latin1 string, so, no need for branch
145 // #ifdef SOURCE_STRING_IS_LATIN1
146 // // LL case: (c< 256) always true. Remove branch
147 // j += bc[y[j+m-1]];
148 // #endif
149 // #ifndef PATTERN_STRING_IS_UTF
150 // // UU case: need if (c<ASIZE) check. Skip 1 character if not.
151 // if (c < ASIZE)
152 // j += bc[y[j+m-1]];
153 // else
154 // j += 1
155 // #endif
156 // #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
157 // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
158 // if (c < ASIZE)
159 // j += bc[y[j+m-1]];
160 // else
161 // j += m
162 // #endif
163 // }
164 // }
165
166 if (icnt1 == -1) {
167 Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
168 BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
169 Register cnt1end = tmp2;
170 Register str2end = cnt2;
171 Register skipch = tmp2;
172
173 // str1 length is >=8, so, we can read at least 1 register for cases when
174 // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
175 // UL case. We'll re-read last character in inner pre-loop code to have
176 // single outer pre-loop load
177 const int firstStep = isL ? 7 : 3;
178
179 const int ASIZE = 256;
180 const int STORED_BYTES = 32; // amount of bytes stored per instruction
181 sub(sp, sp, ASIZE);
182 mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
183 mov(ch1, sp);
184 BIND(BM_INIT_LOOP);
185 stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
186 subs(tmp5, tmp5, 1);
187 br(GT, BM_INIT_LOOP);
188
189 sub(cnt1tmp, cnt1, 1);
190 mov(tmp5, str2);
191 add(str2end, str2, result_tmp, LSL, str2_chr_shift);
192 sub(ch2, cnt1, 1);
193 mov(tmp3, str1);
194 BIND(BCLOOP);
195 (this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
196 if (!str1_isL) {
197 subs(zr, ch1, ASIZE);
198 br(HS, BCSKIP);
199 }
200 strb(ch2, Address(sp, ch1));
201 BIND(BCSKIP);
202 subs(ch2, ch2, 1);
203 br(GT, BCLOOP);
204
205 add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
206 if (str1_isL == str2_isL) {
207 // load last 8 bytes (8LL/4UU symbols)
208 ldr(tmp6, Address(tmp6, -wordSize));
209 } else {
210 ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
211 // convert Latin1 to UTF. We'll have to wait until load completed, but
212 // it's still faster than per-character loads+checks
213 lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
214 ubfx(ch1, tmp6, 8, 8); // str1[N-2]
215 ubfx(ch2, tmp6, 16, 8); // str1[N-3]
216 andr(tmp6, tmp6, 0xFF); // str1[N-4]
217 orr(ch2, ch1, ch2, LSL, 16);
218 orr(tmp6, tmp6, tmp3, LSL, 48);
219 orr(tmp6, tmp6, ch2, LSL, 16);
220 }
221 BIND(BMLOOPSTR2);
222 (this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
223 sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
224 if (str1_isL == str2_isL) {
225 // re-init tmp3. It's for free because it's executed in parallel with
226 // load above. Alternative is to initialize it before loop, but it'll
227 // affect performance on in-order systems with 2 or more ld/st pipelines
228 lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
229 }
230 if (!isL) { // UU/UL case
231 lsl(ch2, cnt1tmp, 1); // offset in bytes
232 }
233 cmp(tmp3, skipch);
234 br(NE, BMSKIP);
235 ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
236 mov(ch1, tmp6);
237 if (isL) {
238 b(BMLOOPSTR1_AFTER_LOAD);
239 } else {
240 sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
241 b(BMLOOPSTR1_CMP);
242 }
243 BIND(BMLOOPSTR1);
244 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
245 (this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
246 BIND(BMLOOPSTR1_AFTER_LOAD);
247 subs(cnt1tmp, cnt1tmp, 1);
248 br(LT, BMLOOPSTR1_LASTCMP);
249 BIND(BMLOOPSTR1_CMP);
250 cmp(ch1, ch2);
251 br(EQ, BMLOOPSTR1);
252 BIND(BMSKIP);
253 if (!isL) {
254 // if we've met UTF symbol while searching Latin1 pattern, then we can
255 // skip cnt1 symbols
256 if (str1_isL != str2_isL) {
257 mov(result_tmp, cnt1);
258 } else {
259 mov(result_tmp, 1);
260 }
261 subs(zr, skipch, ASIZE);
262 br(HS, BMADV);
263 }
264 ldrb(result_tmp, Address(sp, skipch)); // load skip distance
265 BIND(BMADV);
266 sub(cnt1tmp, cnt1, 1);
267 add(str2, str2, result_tmp, LSL, str2_chr_shift);
268 cmp(str2, str2end);
269 br(LE, BMLOOPSTR2);
270 add(sp, sp, ASIZE);
271 b(NOMATCH);
272 BIND(BMLOOPSTR1_LASTCMP);
273 cmp(ch1, ch2);
274 br(NE, BMSKIP);
275 BIND(BMMATCH);
276 sub(result, str2, tmp5);
277 if (!str2_isL) lsr(result, result, 1);
278 add(sp, sp, ASIZE);
279 b(DONE);
280
281 BIND(LINEARSTUB);
282 cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
283 br(LT, LINEAR_MEDIUM);
284 mov(result, zr);
285 RuntimeAddress stub = NULL;
286 if (isL) {
287 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
288 assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
289 } else if (str1_isL) {
290 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
291 assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
292 } else {
293 stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
294 assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
295 }
296 trampoline_call(stub);
297 b(DONE);
298 }
299
300 BIND(LINEARSEARCH);
301 {
302 Label DO1, DO2, DO3;
303
304 Register str2tmp = tmp2;
305 Register first = tmp3;
306
307 if (icnt1 == -1)
308 {
309 Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
310
311 cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
312 br(LT, DOSHORT);
313 BIND(LINEAR_MEDIUM);
314 (this->*str1_load_1chr)(first, Address(str1));
315 lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
316 sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
317 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
318 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
319
320 BIND(FIRST_LOOP);
321 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
322 cmp(first, ch2);
323 br(EQ, STR1_LOOP);
324 BIND(STR2_NEXT);
325 adds(cnt2_neg, cnt2_neg, str2_chr_size);
326 br(LE, FIRST_LOOP);
327 b(NOMATCH);
328
329 BIND(STR1_LOOP);
330 adds(cnt1tmp, cnt1_neg, str1_chr_size);
331 add(cnt2tmp, cnt2_neg, str2_chr_size);
332 br(GE, MATCH);
333
334 BIND(STR1_NEXT);
335 (this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
336 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
337 cmp(ch1, ch2);
338 br(NE, STR2_NEXT);
339 adds(cnt1tmp, cnt1tmp, str1_chr_size);
340 add(cnt2tmp, cnt2tmp, str2_chr_size);
341 br(LT, STR1_NEXT);
342 b(MATCH);
343
344 BIND(DOSHORT);
345 if (str1_isL == str2_isL) {
346 cmp(cnt1, (u1)2);
347 br(LT, DO1);
348 br(GT, DO3);
349 }
350 }
351
352 if (icnt1 == 4) {
353 Label CH1_LOOP;
354
355 (this->*load_4chr)(ch1, str1);
356 sub(result_tmp, cnt2, 4);
357 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
358 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
359
360 BIND(CH1_LOOP);
361 (this->*load_4chr)(ch2, Address(str2, cnt2_neg));
362 cmp(ch1, ch2);
363 br(EQ, MATCH);
364 adds(cnt2_neg, cnt2_neg, str2_chr_size);
365 br(LE, CH1_LOOP);
366 b(NOMATCH);
367 }
368
369 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
370 Label CH1_LOOP;
371
372 BIND(DO2);
373 (this->*load_2chr)(ch1, str1);
374 if (icnt1 == 2) {
375 sub(result_tmp, cnt2, 2);
376 }
377 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
378 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
379 BIND(CH1_LOOP);
380 (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
381 cmp(ch1, ch2);
382 br(EQ, MATCH);
383 adds(cnt2_neg, cnt2_neg, str2_chr_size);
384 br(LE, CH1_LOOP);
385 b(NOMATCH);
386 }
387
388 if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
389 Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
390
391 BIND(DO3);
392 (this->*load_2chr)(first, str1);
393 (this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
394 if (icnt1 == 3) {
395 sub(result_tmp, cnt2, 3);
396 }
397 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
398 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
399 BIND(FIRST_LOOP);
400 (this->*load_2chr)(ch2, Address(str2, cnt2_neg));
401 cmpw(first, ch2);
402 br(EQ, STR1_LOOP);
403 BIND(STR2_NEXT);
404 adds(cnt2_neg, cnt2_neg, str2_chr_size);
405 br(LE, FIRST_LOOP);
406 b(NOMATCH);
407
408 BIND(STR1_LOOP);
409 add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
410 (this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
411 cmp(ch1, ch2);
412 br(NE, STR2_NEXT);
413 b(MATCH);
414 }
415
416 if (icnt1 == -1 || icnt1 == 1) {
417 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
418
419 BIND(DO1);
420 (this->*str1_load_1chr)(ch1, str1);
421 cmp(cnt2, (u1)8);
422 br(LT, DO1_SHORT);
423
424 sub(result_tmp, cnt2, 8/str2_chr_size);
425 sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
426 mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
427 lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
428
429 if (str2_isL) {
430 orr(ch1, ch1, ch1, LSL, 8);
431 }
432 orr(ch1, ch1, ch1, LSL, 16);
433 orr(ch1, ch1, ch1, LSL, 32);
434 BIND(CH1_LOOP);
435 ldr(ch2, Address(str2, cnt2_neg));
436 eor(ch2, ch1, ch2);
437 sub(tmp1, ch2, tmp3);
438 orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
439 bics(tmp1, tmp1, tmp2);
440 br(NE, HAS_ZERO);
441 adds(cnt2_neg, cnt2_neg, 8);
442 br(LT, CH1_LOOP);
443
444 cmp(cnt2_neg, (u1)8);
445 mov(cnt2_neg, 0);
446 br(LT, CH1_LOOP);
447 b(NOMATCH);
448
449 BIND(HAS_ZERO);
450 rev(tmp1, tmp1);
451 clz(tmp1, tmp1);
452 add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
453 b(MATCH);
454
455 BIND(DO1_SHORT);
456 mov(result_tmp, cnt2);
457 lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
458 sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
459 BIND(DO1_LOOP);
460 (this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
461 cmpw(ch1, ch2);
462 br(EQ, MATCH);
463 adds(cnt2_neg, cnt2_neg, str2_chr_size);
464 br(LT, DO1_LOOP);
465 }
466 }
467 BIND(NOMATCH);
468 mov(result, -1);
469 b(DONE);
470 BIND(MATCH);
471 add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
472 BIND(DONE);
473 }
474
475 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
476 typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
477
string_indexof_char(Register str1,Register cnt1,Register ch,Register result,Register tmp1,Register tmp2,Register tmp3)478 void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
479 Register ch, Register result,
480 Register tmp1, Register tmp2, Register tmp3)
481 {
482 Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
483 Register cnt1_neg = cnt1;
484 Register ch1 = rscratch1;
485 Register result_tmp = rscratch2;
486
487 cbz(cnt1, NOMATCH);
488
489 cmp(cnt1, (u1)4);
490 br(LT, DO1_SHORT);
491
492 orr(ch, ch, ch, LSL, 16);
493 orr(ch, ch, ch, LSL, 32);
494
495 sub(cnt1, cnt1, 4);
496 mov(result_tmp, cnt1);
497 lea(str1, Address(str1, cnt1, Address::uxtw(1)));
498 sub(cnt1_neg, zr, cnt1, LSL, 1);
499
500 mov(tmp3, 0x0001000100010001);
501
502 BIND(CH1_LOOP);
503 ldr(ch1, Address(str1, cnt1_neg));
504 eor(ch1, ch, ch1);
505 sub(tmp1, ch1, tmp3);
506 orr(tmp2, ch1, 0x7fff7fff7fff7fff);
507 bics(tmp1, tmp1, tmp2);
508 br(NE, HAS_ZERO);
509 adds(cnt1_neg, cnt1_neg, 8);
510 br(LT, CH1_LOOP);
511
512 cmp(cnt1_neg, (u1)8);
513 mov(cnt1_neg, 0);
514 br(LT, CH1_LOOP);
515 b(NOMATCH);
516
517 BIND(HAS_ZERO);
518 rev(tmp1, tmp1);
519 clz(tmp1, tmp1);
520 add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
521 b(MATCH);
522
523 BIND(DO1_SHORT);
524 mov(result_tmp, cnt1);
525 lea(str1, Address(str1, cnt1, Address::uxtw(1)));
526 sub(cnt1_neg, zr, cnt1, LSL, 1);
527 BIND(DO1_LOOP);
528 ldrh(ch1, Address(str1, cnt1_neg));
529 cmpw(ch, ch1);
530 br(EQ, MATCH);
531 adds(cnt1_neg, cnt1_neg, 2);
532 br(LT, DO1_LOOP);
533 BIND(NOMATCH);
534 mov(result, -1);
535 b(DONE);
536 BIND(MATCH);
537 add(result, result_tmp, cnt1_neg, ASR, 1);
538 BIND(DONE);
539 }
540
541 // Compare strings.
string_compare(Register str1,Register str2,Register cnt1,Register cnt2,Register result,Register tmp1,Register tmp2,FloatRegister vtmp1,FloatRegister vtmp2,FloatRegister vtmp3,int ae)542 void C2_MacroAssembler::string_compare(Register str1, Register str2,
543 Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
544 FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
545 Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
546 DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
547 SHORT_LOOP_START, TAIL_CHECK;
548
549 bool isLL = ae == StrIntrinsicNode::LL;
550 bool isLU = ae == StrIntrinsicNode::LU;
551 bool isUL = ae == StrIntrinsicNode::UL;
552
553 // The stub threshold for LL strings is: 72 (64 + 8) chars
554 // UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
555 // LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
556 const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
557
558 bool str1_isL = isLL || isLU;
559 bool str2_isL = isLL || isUL;
560
561 int str1_chr_shift = str1_isL ? 0 : 1;
562 int str2_chr_shift = str2_isL ? 0 : 1;
563 int str1_chr_size = str1_isL ? 1 : 2;
564 int str2_chr_size = str2_isL ? 1 : 2;
565 int minCharsInWord = isLL ? wordSize : wordSize/2;
566
567 FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
568 chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
569 (chr_insn)&MacroAssembler::ldrh;
570 chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
571 (chr_insn)&MacroAssembler::ldrh;
572 uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
573 (uxt_insn)&MacroAssembler::uxthw;
574
575 BLOCK_COMMENT("string_compare {");
576
577 // Bizzarely, the counts are passed in bytes, regardless of whether they
578 // are L or U strings, however the result is always in characters.
579 if (!str1_isL) asrw(cnt1, cnt1, 1);
580 if (!str2_isL) asrw(cnt2, cnt2, 1);
581
582 // Compute the minimum of the string lengths and save the difference.
583 subsw(result, cnt1, cnt2);
584 cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
585
586 // A very short string
587 cmpw(cnt2, minCharsInWord);
588 br(Assembler::LE, SHORT_STRING);
589
590 // Compare longwords
591 // load first parts of strings and finish initialization while loading
592 {
593 if (str1_isL == str2_isL) { // LL or UU
594 ldr(tmp1, Address(str1));
595 cmp(str1, str2);
596 br(Assembler::EQ, DONE);
597 ldr(tmp2, Address(str2));
598 cmp(cnt2, stub_threshold);
599 br(GE, STUB);
600 subsw(cnt2, cnt2, minCharsInWord);
601 br(EQ, TAIL_CHECK);
602 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
603 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
604 sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
605 } else if (isLU) {
606 ldrs(vtmp, Address(str1));
607 ldr(tmp2, Address(str2));
608 cmp(cnt2, stub_threshold);
609 br(GE, STUB);
610 subw(cnt2, cnt2, 4);
611 eor(vtmpZ, T16B, vtmpZ, vtmpZ);
612 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
613 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
614 zip1(vtmp, T8B, vtmp, vtmpZ);
615 sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
616 sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
617 add(cnt1, cnt1, 4);
618 fmovd(tmp1, vtmp);
619 } else { // UL case
620 ldr(tmp1, Address(str1));
621 ldrs(vtmp, Address(str2));
622 cmp(cnt2, stub_threshold);
623 br(GE, STUB);
624 subw(cnt2, cnt2, 4);
625 lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
626 eor(vtmpZ, T16B, vtmpZ, vtmpZ);
627 lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
628 sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
629 zip1(vtmp, T8B, vtmp, vtmpZ);
630 sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
631 add(cnt1, cnt1, 8);
632 fmovd(tmp2, vtmp);
633 }
634 adds(cnt2, cnt2, isUL ? 4 : 8);
635 br(GE, TAIL);
636 eor(rscratch2, tmp1, tmp2);
637 cbnz(rscratch2, DIFFERENCE);
638 // main loop
639 bind(NEXT_WORD);
640 if (str1_isL == str2_isL) {
641 ldr(tmp1, Address(str1, cnt2));
642 ldr(tmp2, Address(str2, cnt2));
643 adds(cnt2, cnt2, 8);
644 } else if (isLU) {
645 ldrs(vtmp, Address(str1, cnt1));
646 ldr(tmp2, Address(str2, cnt2));
647 add(cnt1, cnt1, 4);
648 zip1(vtmp, T8B, vtmp, vtmpZ);
649 fmovd(tmp1, vtmp);
650 adds(cnt2, cnt2, 8);
651 } else { // UL
652 ldrs(vtmp, Address(str2, cnt2));
653 ldr(tmp1, Address(str1, cnt1));
654 zip1(vtmp, T8B, vtmp, vtmpZ);
655 add(cnt1, cnt1, 8);
656 fmovd(tmp2, vtmp);
657 adds(cnt2, cnt2, 4);
658 }
659 br(GE, TAIL);
660
661 eor(rscratch2, tmp1, tmp2);
662 cbz(rscratch2, NEXT_WORD);
663 b(DIFFERENCE);
664 bind(TAIL);
665 eor(rscratch2, tmp1, tmp2);
666 cbnz(rscratch2, DIFFERENCE);
667 // Last longword. In the case where length == 4 we compare the
668 // same longword twice, but that's still faster than another
669 // conditional branch.
670 if (str1_isL == str2_isL) {
671 ldr(tmp1, Address(str1));
672 ldr(tmp2, Address(str2));
673 } else if (isLU) {
674 ldrs(vtmp, Address(str1));
675 ldr(tmp2, Address(str2));
676 zip1(vtmp, T8B, vtmp, vtmpZ);
677 fmovd(tmp1, vtmp);
678 } else { // UL
679 ldrs(vtmp, Address(str2));
680 ldr(tmp1, Address(str1));
681 zip1(vtmp, T8B, vtmp, vtmpZ);
682 fmovd(tmp2, vtmp);
683 }
684 bind(TAIL_CHECK);
685 eor(rscratch2, tmp1, tmp2);
686 cbz(rscratch2, DONE);
687
688 // Find the first different characters in the longwords and
689 // compute their difference.
690 bind(DIFFERENCE);
691 rev(rscratch2, rscratch2);
692 clz(rscratch2, rscratch2);
693 andr(rscratch2, rscratch2, isLL ? -8 : -16);
694 lsrv(tmp1, tmp1, rscratch2);
695 (this->*ext_chr)(tmp1, tmp1);
696 lsrv(tmp2, tmp2, rscratch2);
697 (this->*ext_chr)(tmp2, tmp2);
698 subw(result, tmp1, tmp2);
699 b(DONE);
700 }
701
702 bind(STUB);
703 RuntimeAddress stub = NULL;
704 switch(ae) {
705 case StrIntrinsicNode::LL:
706 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
707 break;
708 case StrIntrinsicNode::UU:
709 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
710 break;
711 case StrIntrinsicNode::LU:
712 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
713 break;
714 case StrIntrinsicNode::UL:
715 stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
716 break;
717 default:
718 ShouldNotReachHere();
719 }
720 assert(stub.target() != NULL, "compare_long_string stub has not been generated");
721 trampoline_call(stub);
722 b(DONE);
723
724 bind(SHORT_STRING);
725 // Is the minimum length zero?
726 cbz(cnt2, DONE);
727 // arrange code to do most branches while loading and loading next characters
728 // while comparing previous
729 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
730 subs(cnt2, cnt2, 1);
731 br(EQ, SHORT_LAST_INIT);
732 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
733 b(SHORT_LOOP_START);
734 bind(SHORT_LOOP);
735 subs(cnt2, cnt2, 1);
736 br(EQ, SHORT_LAST);
737 bind(SHORT_LOOP_START);
738 (this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
739 (this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
740 cmp(tmp1, cnt1);
741 br(NE, SHORT_LOOP_TAIL);
742 subs(cnt2, cnt2, 1);
743 br(EQ, SHORT_LAST2);
744 (this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
745 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
746 cmp(tmp2, rscratch1);
747 br(EQ, SHORT_LOOP);
748 sub(result, tmp2, rscratch1);
749 b(DONE);
750 bind(SHORT_LOOP_TAIL);
751 sub(result, tmp1, cnt1);
752 b(DONE);
753 bind(SHORT_LAST2);
754 cmp(tmp2, rscratch1);
755 br(EQ, DONE);
756 sub(result, tmp2, rscratch1);
757
758 b(DONE);
759 bind(SHORT_LAST_INIT);
760 (this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
761 bind(SHORT_LAST);
762 cmp(tmp1, cnt1);
763 br(EQ, DONE);
764 sub(result, tmp1, cnt1);
765
766 bind(DONE);
767
768 BLOCK_COMMENT("} string_compare");
769 }
770