1 /*
2  * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "opto/c2_MacroAssembler.hpp"
29 #include "opto/intrinsicnode.hpp"
30 
31 #ifdef PRODUCT
32 #define BLOCK_COMMENT(str) // nothing
33 #else
34 #define BLOCK_COMMENT(str) block_comment(str)
35 #endif
36 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
37 
38 // Intrinsics for CompactStrings
39 
40 // Compress char[] to byte[] by compressing 16 bytes at once.
string_compress_16(Register src,Register dst,Register cnt,Register tmp1,Register tmp2,Register tmp3,Register tmp4,Register tmp5,Label & Lfailure)41 void C2_MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
42                                            Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
43                                            Label& Lfailure) {
44 
45   const Register tmp0 = R0;
46   assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
47   Label Lloop, Lslow;
48 
49   // Check if cnt >= 8 (= 16 bytes)
50   lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF00FF00FF
51   srwi_(tmp2, cnt, 3);
52   beq(CCR0, Lslow);
53   ori(tmp1, tmp1, 0xFF);
54   rldimi(tmp1, tmp1, 32, 0);
55   mtctr(tmp2);
56 
57   // 2x unrolled loop
58   bind(Lloop);
59   ld(tmp2, 0, src);               // _0_1_2_3 (Big Endian)
60   ld(tmp4, 8, src);               // _4_5_6_7
61 
62   orr(tmp0, tmp2, tmp4);
63   rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
64   rldimi(tmp2, tmp2, 2*8, 2*8);   // _0_2_3_3
65   rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
66   rldimi(tmp4, tmp4, 2*8, 2*8);   // _4_6_7_7
67 
68   andc_(tmp0, tmp0, tmp1);
69   bne(CCR0, Lfailure);            // Not latin1.
70   addi(src, src, 16);
71 
72   rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
73   srdi(tmp2, tmp2, 3*8);          // ____0_2_
74   rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
75   srdi(tmp4, tmp4, 3*8);          // ____4_6_
76 
77   orr(tmp2, tmp2, tmp3);          // ____0123
78   orr(tmp4, tmp4, tmp5);          // ____4567
79 
80   stw(tmp2, 0, dst);
81   stw(tmp4, 4, dst);
82   addi(dst, dst, 8);
83   bdnz(Lloop);
84 
85   bind(Lslow);                    // Fallback to slow version
86 }
87 
88 // Compress char[] to byte[]. cnt must be positive int.
string_compress(Register src,Register dst,Register cnt,Register tmp,Label & Lfailure)89 void C2_MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
90   Label Lloop;
91   mtctr(cnt);
92 
93   bind(Lloop);
94   lhz(tmp, 0, src);
95   cmplwi(CCR0, tmp, 0xff);
96   bgt(CCR0, Lfailure);            // Not latin1.
97   addi(src, src, 2);
98   stb(tmp, 0, dst);
99   addi(dst, dst, 1);
100   bdnz(Lloop);
101 }
102 
103 // Inflate byte[] to char[] by inflating 16 bytes at once.
string_inflate_16(Register src,Register dst,Register cnt,Register tmp1,Register tmp2,Register tmp3,Register tmp4,Register tmp5)104 void C2_MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
105                                           Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
106   const Register tmp0 = R0;
107   assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
108   Label Lloop, Lslow;
109 
110   // Check if cnt >= 8
111   srwi_(tmp2, cnt, 3);
112   beq(CCR0, Lslow);
113   lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF
114   ori(tmp1, tmp1, 0xFF);
115   mtctr(tmp2);
116 
117   // 2x unrolled loop
118   bind(Lloop);
119   lwz(tmp2, 0, src);              // ____0123 (Big Endian)
120   lwz(tmp4, 4, src);              // ____4567
121   addi(src, src, 8);
122 
123   rldicl(tmp3, tmp2, 7*8, 64-8);  // _______2
124   rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
125   rldicl(tmp5, tmp4, 7*8, 64-8);  // _______6
126   rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
127 
128   andc(tmp0, tmp2, tmp1);         // ____0_1_
129   rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
130   andc(tmp3, tmp4, tmp1);         // ____4_5_
131   rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
132 
133   rldimi(tmp2, tmp0, 3*8, 0*8);   // _0_1_2_3
134   rldimi(tmp4, tmp3, 3*8, 0*8);   // _4_5_6_7
135 
136   std(tmp2, 0, dst);
137   std(tmp4, 8, dst);
138   addi(dst, dst, 16);
139   bdnz(Lloop);
140 
141   bind(Lslow);                    // Fallback to slow version
142 }
143 
144 // Inflate byte[] to char[]. cnt must be positive int.
string_inflate(Register src,Register dst,Register cnt,Register tmp)145 void C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
146   Label Lloop;
147   mtctr(cnt);
148 
149   bind(Lloop);
150   lbz(tmp, 0, src);
151   addi(src, src, 1);
152   sth(tmp, 0, dst);
153   addi(dst, dst, 2);
154   bdnz(Lloop);
155 }
156 
string_compare(Register str1,Register str2,Register cnt1,Register cnt2,Register tmp1,Register result,int ae)157 void C2_MacroAssembler::string_compare(Register str1, Register str2,
158                                        Register cnt1, Register cnt2,
159                                        Register tmp1, Register result, int ae) {
160   const Register tmp0 = R0,
161                  diff = tmp1;
162 
163   assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
164   Label Ldone, Lslow, Lloop, Lreturn_diff;
165 
166   // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
167   // we interchange str1 and str2 in the UL case and negate the result.
168   // Like this, str1 is always latin1 encoded, except for the UU case.
169   // In addition, we need 0 (or sign which is 0) extend.
170 
171   if (ae == StrIntrinsicNode::UU) {
172     srwi(cnt1, cnt1, 1);
173   } else {
174     clrldi(cnt1, cnt1, 32);
175   }
176 
177   if (ae != StrIntrinsicNode::LL) {
178     srwi(cnt2, cnt2, 1);
179   } else {
180     clrldi(cnt2, cnt2, 32);
181   }
182 
183   // See if the lengths are different, and calculate min in cnt1.
184   // Save diff in case we need it for a tie-breaker.
185   subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
186   // if (diff > 0) { cnt1 = cnt2; }
187   if (VM_Version::has_isel()) {
188     isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
189   } else {
190     Label Lskip;
191     blt(CCR0, Lskip);
192     mr(cnt1, cnt2);
193     bind(Lskip);
194   }
195 
196   // Rename registers
197   Register chr1 = result;
198   Register chr2 = tmp0;
199 
200   // Compare multiple characters in fast loop (only implemented for same encoding).
201   int stride1 = 8, stride2 = 8;
202   if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
203     int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
204     Label Lfastloop, Lskipfast;
205 
206     srwi_(tmp0, cnt1, log2_chars_per_iter);
207     beq(CCR0, Lskipfast);
208     rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
209     li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
210     mtctr(tmp0);
211 
212     bind(Lfastloop);
213     ld(chr1, 0, str1);
214     ld(chr2, 0, str2);
215     cmpd(CCR0, chr1, chr2);
216     bne(CCR0, Lslow);
217     addi(str1, str1, stride1);
218     addi(str2, str2, stride2);
219     bdnz(Lfastloop);
220     mr(cnt1, cnt2); // Remaining characters.
221     bind(Lskipfast);
222   }
223 
224   // Loop which searches the first difference character by character.
225   cmpwi(CCR0, cnt1, 0);
226   beq(CCR0, Lreturn_diff);
227   bind(Lslow);
228   mtctr(cnt1);
229 
230   switch (ae) {
231     case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
232     case StrIntrinsicNode::UL: // fallthru (see comment above)
233     case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
234     case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
235     default: ShouldNotReachHere(); break;
236   }
237 
238   bind(Lloop);
239   if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
240   if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
241   subf_(result, chr2, chr1); // result = chr1 - chr2
242   bne(CCR0, Ldone);
243   addi(str1, str1, stride1);
244   addi(str2, str2, stride2);
245   bdnz(Lloop);
246 
247   // If strings are equal up to min length, return the length difference.
248   bind(Lreturn_diff);
249   mr(result, diff);
250 
251   // Otherwise, return the difference between the first mismatched chars.
252   bind(Ldone);
253   if (ae == StrIntrinsicNode::UL) {
254     neg(result, result); // Negate result (see note above).
255   }
256 }
257 
array_equals(bool is_array_equ,Register ary1,Register ary2,Register limit,Register tmp1,Register result,bool is_byte)258 void C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
259                                      Register limit, Register tmp1, Register result, bool is_byte) {
260   const Register tmp0 = R0;
261   assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
262   Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
263   bool limit_needs_shift = false;
264 
265   if (is_array_equ) {
266     const int length_offset = arrayOopDesc::length_offset_in_bytes();
267     const int base_offset   = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
268 
269     // Return true if the same array.
270     cmpd(CCR0, ary1, ary2);
271     beq(CCR0, Lskiploop);
272 
273     // Return false if one of them is NULL.
274     cmpdi(CCR0, ary1, 0);
275     cmpdi(CCR1, ary2, 0);
276     li(result, 0);
277     cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
278     beq(CCR0, Ldone);
279 
280     // Load the lengths of arrays.
281     lwz(limit, length_offset, ary1);
282     lwz(tmp0, length_offset, ary2);
283 
284     // Return false if the two arrays are not equal length.
285     cmpw(CCR0, limit, tmp0);
286     bne(CCR0, Ldone);
287 
288     // Load array addresses.
289     addi(ary1, ary1, base_offset);
290     addi(ary2, ary2, base_offset);
291   } else {
292     limit_needs_shift = !is_byte;
293     li(result, 0); // Assume not equal.
294   }
295 
296   // Rename registers
297   Register chr1 = tmp0;
298   Register chr2 = tmp1;
299 
300   // Compare 8 bytes per iteration in fast loop.
301   const int log2_chars_per_iter = is_byte ? 3 : 2;
302 
303   srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
304   beq(CCR0, Lskipfast);
305   mtctr(tmp0);
306 
307   bind(Lfastloop);
308   ld(chr1, 0, ary1);
309   ld(chr2, 0, ary2);
310   addi(ary1, ary1, 8);
311   addi(ary2, ary2, 8);
312   cmpd(CCR0, chr1, chr2);
313   bne(CCR0, Ldone);
314   bdnz(Lfastloop);
315 
316   bind(Lskipfast);
317   rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
318   beq(CCR0, Lskiploop);
319   mtctr(limit);
320 
321   // Character by character.
322   bind(Lloop);
323   if (is_byte) {
324     lbz(chr1, 0, ary1);
325     lbz(chr2, 0, ary2);
326     addi(ary1, ary1, 1);
327     addi(ary2, ary2, 1);
328   } else {
329     lhz(chr1, 0, ary1);
330     lhz(chr2, 0, ary2);
331     addi(ary1, ary1, 2);
332     addi(ary2, ary2, 2);
333   }
334   cmpw(CCR0, chr1, chr2);
335   bne(CCR0, Ldone);
336   bdnz(Lloop);
337 
338   bind(Lskiploop);
339   li(result, 1); // All characters are equal.
340   bind(Ldone);
341 }
342 
string_indexof(Register result,Register haystack,Register haycnt,Register needle,ciTypeArray * needle_values,Register needlecnt,int needlecntval,Register tmp1,Register tmp2,Register tmp3,Register tmp4,int ae)343 void C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
344                                        Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
345                                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
346 
347   // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
348   Label L_TooShort, L_Found, L_NotFound, L_End;
349   Register last_addr = haycnt, // Kill haycnt at the beginning.
350   addr      = tmp1,
351   n_start   = tmp2,
352   ch1       = tmp3,
353   ch2       = R0;
354 
355   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
356   const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
357   const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
358 
359   // **************************************************************************************************
360   // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
361   // **************************************************************************************************
362 
363   // Compute last haystack addr to use if no match gets found.
364   clrldi(haycnt, haycnt, 32);         // Ensure positive int is valid as 64 bit value.
365   addi(addr, haystack, -h_csize);     // Accesses use pre-increment.
366   if (needlecntval == 0) { // variable needlecnt
367    cmpwi(CCR6, needlecnt, 2);
368    clrldi(needlecnt, needlecnt, 32);  // Ensure positive int is valid as 64 bit value.
369    blt(CCR6, L_TooShort);             // Variable needlecnt: handle short needle separately.
370   }
371 
372   if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
373 
374   if (needlecntval == 0) { // variable needlecnt
375    subf(ch1, needlecnt, haycnt);      // Last character index to compare is haycnt-needlecnt.
376    addi(needlecnt, needlecnt, -2);    // Rest of needle.
377   } else { // constant needlecnt
378   guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
379   assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
380    addi(ch1, haycnt, -needlecntval);  // Last character index to compare is haycnt-needlecnt.
381    if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
382   }
383 
384   if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
385 
386   if (ae ==StrIntrinsicNode::UL) {
387    srwi(tmp4, n_start, 1*8);          // ___0
388    rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
389   }
390 
391   add(last_addr, haystack, ch1);      // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
392 
393   // Main Loop (now we have at least 2 characters).
394   Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
395   bind(L_OuterLoop); // Search for 1st 2 characters.
396   Register addr_diff = tmp4;
397    subf(addr_diff, addr, last_addr);  // Difference between already checked address and last address to check.
398    addi(addr, addr, h_csize);         // This is the new address we want to use for comparing.
399    srdi_(ch2, addr_diff, h_csize);
400    beq(CCR0, L_FinalCheck);           // 2 characters left?
401    mtctr(ch2);                        // num of characters / 2
402   bind(L_InnerLoop);                  // Main work horse (2x unrolled search loop)
403    if (h_csize == 2) {                // Load 2 characters of haystack (ignore alignment).
404     lwz(ch1, 0, addr);
405     lwz(ch2, 2, addr);
406    } else {
407     lhz(ch1, 0, addr);
408     lhz(ch2, 1, addr);
409    }
410    cmpw(CCR0, ch1, n_start);          // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
411    cmpw(CCR1, ch2, n_start);
412    beq(CCR0, L_Comp1);                // Did we find the needle start?
413    beq(CCR1, L_Comp2);
414    addi(addr, addr, 2 * h_csize);
415    bdnz(L_InnerLoop);
416   bind(L_FinalCheck);
417    andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
418    beq(CCR0, L_NotFound);
419    if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
420    cmpw(CCR1, ch1, n_start);
421    beq(CCR1, L_Comp1);
422   bind(L_NotFound);
423    li(result, -1);                    // not found
424    b(L_End);
425 
426    // **************************************************************************************************
427    // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
428    // **************************************************************************************************
429   if (needlecntval == 0) {           // We have to handle these cases separately.
430   Label L_OneCharLoop;
431   bind(L_TooShort);
432    mtctr(haycnt);
433    if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
434   bind(L_OneCharLoop);
435    if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
436    cmpw(CCR1, ch1, n_start);
437    beq(CCR1, L_Found);               // Did we find the one character needle?
438    bdnz(L_OneCharLoop);
439    li(result, -1);                   // Not found.
440    b(L_End);
441   }
442 
443   // **************************************************************************************************
444   // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
445   // **************************************************************************************************
446 
447   // Compare the rest
448   bind(L_Comp2);
449    addi(addr, addr, h_csize);        // First comparison has failed, 2nd one hit.
450   bind(L_Comp1);                     // Addr points to possible needle start.
451   if (needlecntval != 2) {           // Const needlecnt==2?
452    if (needlecntval != 3) {
453     if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
454     Register n_ind = tmp4,
455              h_ind = n_ind;
456     li(n_ind, 2 * n_csize);          // First 2 characters are already compared, use index 2.
457     mtctr(needlecnt);                // Decremented by 2, still > 0.
458    Label L_CompLoop;
459    bind(L_CompLoop);
460     if (ae ==StrIntrinsicNode::UL) {
461       h_ind = ch1;
462       sldi(h_ind, n_ind, 1);
463     }
464     if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
465     if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
466     cmpw(CCR1, ch1, ch2);
467     bne(CCR1, L_OuterLoop);
468     addi(n_ind, n_ind, n_csize);
469     bdnz(L_CompLoop);
470    } else { // No loop required if there's only one needle character left.
471     if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
472     if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
473     cmpw(CCR1, ch1, ch2);
474     bne(CCR1, L_OuterLoop);
475    }
476   }
477   // Return index ...
478   bind(L_Found);
479    subf(result, haystack, addr);     // relative to haystack, ...
480    if (h_csize == 2) { srdi(result, result, 1); } // in characters.
481   bind(L_End);
482 } // string_indexof
483 
string_indexof_char(Register result,Register haystack,Register haycnt,Register needle,jchar needleChar,Register tmp1,Register tmp2,bool is_byte)484 void C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
485                                             Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
486   assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
487 
488   Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
489   Register addr = tmp1,
490            ch1 = tmp2,
491            ch2 = R0;
492 
493   const int h_csize = is_byte ? 1 : 2;
494 
495 //4:
496    srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
497    mr(addr, haystack);
498    beq(CCR0, L_FinalCheck);
499    mtctr(tmp2);              // Move to count register.
500 //8:
501   bind(L_InnerLoop);         // Main work horse (2x unrolled search loop).
502    if (!is_byte) {
503     lhz(ch1, 0, addr);
504     lhz(ch2, 2, addr);
505    } else {
506     lbz(ch1, 0, addr);
507     lbz(ch2, 1, addr);
508    }
509    (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
510    (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
511    beq(CCR0, L_Found1);      // Did we find the needle?
512    beq(CCR1, L_Found2);
513    addi(addr, addr, 2 * h_csize);
514    bdnz(L_InnerLoop);
515 //16:
516   bind(L_FinalCheck);
517    andi_(R0, haycnt, 1);
518    beq(CCR0, L_NotFound);
519    if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
520    (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
521    beq(CCR1, L_Found1);
522 //21:
523   bind(L_NotFound);
524    li(result, -1);           // Not found.
525    b(L_End);
526 
527   bind(L_Found2);
528    addi(addr, addr, h_csize);
529 //24:
530   bind(L_Found1);            // Return index ...
531    subf(result, haystack, addr); // relative to haystack, ...
532    if (!is_byte) { srdi(result, result, 1); } // in characters.
533   bind(L_End);
534 } // string_indexof_char
535 
536 
has_negatives(Register src,Register cnt,Register result,Register tmp1,Register tmp2)537 void C2_MacroAssembler::has_negatives(Register src, Register cnt, Register result,
538                                       Register tmp1, Register tmp2) {
539   const Register tmp0 = R0;
540   assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
541   Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
542 
543   // Check if cnt >= 8 (= 16 bytes)
544   lis(tmp1, (int)(short)0x8080);  // tmp1 = 0x8080808080808080
545   srwi_(tmp2, cnt, 4);
546   li(result, 1);                  // Assume there's a negative byte.
547   beq(CCR0, Lslow);
548   ori(tmp1, tmp1, 0x8080);
549   rldimi(tmp1, tmp1, 32, 0);
550   mtctr(tmp2);
551 
552   // 2x unrolled loop
553   bind(Lfastloop);
554   ld(tmp2, 0, src);
555   ld(tmp0, 8, src);
556 
557   orr(tmp0, tmp2, tmp0);
558 
559   and_(tmp0, tmp0, tmp1);
560   bne(CCR0, Ldone);               // Found negative byte.
561   addi(src, src, 16);
562 
563   bdnz(Lfastloop);
564 
565   bind(Lslow);                    // Fallback to slow version
566   rldicl_(tmp0, cnt, 0, 64-4);
567   beq(CCR0, Lnoneg);
568   mtctr(tmp0);
569   bind(Lloop);
570   lbz(tmp0, 0, src);
571   addi(src, src, 1);
572   andi_(tmp0, tmp0, 0x80);
573   bne(CCR0, Ldone);               // Found negative byte.
574   bdnz(Lloop);
575   bind(Lnoneg);
576   li(result, 0);
577 
578   bind(Ldone);
579 }
580 
581