1 /*
2 * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016, 2018, SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/codeBuffer.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "compiler/disassembler.hpp"
30 #include "gc/shared/barrierSet.hpp"
31 #include "gc/shared/barrierSetAssembler.hpp"
32 #include "gc/shared/collectedHeap.inline.hpp"
33 #include "interpreter/interpreter.hpp"
34 #include "gc/shared/cardTableBarrierSet.hpp"
35 #include "memory/resourceArea.hpp"
36 #include "memory/universe.hpp"
37 #include "oops/accessDecorators.hpp"
38 #include "oops/compressedOops.inline.hpp"
39 #include "oops/klass.inline.hpp"
40 #ifdef COMPILER2
41 #include "opto/compile.hpp"
42 #include "opto/intrinsicnode.hpp"
43 #include "opto/matcher.hpp"
44 #endif
45 #include "prims/methodHandles.hpp"
46 #include "registerSaver_s390.hpp"
47 #include "runtime/biasedLocking.hpp"
48 #include "runtime/icache.hpp"
49 #include "runtime/interfaceSupport.inline.hpp"
50 #include "runtime/objectMonitor.hpp"
51 #include "runtime/os.hpp"
52 #include "runtime/safepoint.hpp"
53 #include "runtime/safepointMechanism.hpp"
54 #include "runtime/sharedRuntime.hpp"
55 #include "runtime/stubRoutines.hpp"
56 #include "utilities/events.hpp"
57 #include "utilities/macros.hpp"
58
59 #include <ucontext.h>
60
61 #define BLOCK_COMMENT(str) block_comment(str)
62 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
63
64 // Move 32-bit register if destination and source are different.
lr_if_needed(Register rd,Register rs)65 void MacroAssembler::lr_if_needed(Register rd, Register rs) {
66 if (rs != rd) { z_lr(rd, rs); }
67 }
68
69 // Move register if destination and source are different.
lgr_if_needed(Register rd,Register rs)70 void MacroAssembler::lgr_if_needed(Register rd, Register rs) {
71 if (rs != rd) { z_lgr(rd, rs); }
72 }
73
74 // Zero-extend 32-bit register into 64-bit register if destination and source are different.
llgfr_if_needed(Register rd,Register rs)75 void MacroAssembler::llgfr_if_needed(Register rd, Register rs) {
76 if (rs != rd) { z_llgfr(rd, rs); }
77 }
78
79 // Move float register if destination and source are different.
ldr_if_needed(FloatRegister rd,FloatRegister rs)80 void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) {
81 if (rs != rd) { z_ldr(rd, rs); }
82 }
83
84 // Move integer register if destination and source are different.
85 // It is assumed that shorter-than-int types are already
86 // appropriately sign-extended.
move_reg_if_needed(Register dst,BasicType dst_type,Register src,BasicType src_type)87 void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src,
88 BasicType src_type) {
89 assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types");
90 assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types");
91
92 if (dst_type == src_type) {
93 lgr_if_needed(dst, src); // Just move all 64 bits.
94 return;
95 }
96
97 switch (dst_type) {
98 // Do not support these types for now.
99 // case T_BOOLEAN:
100 case T_BYTE: // signed byte
101 switch (src_type) {
102 case T_INT:
103 z_lgbr(dst, src);
104 break;
105 default:
106 ShouldNotReachHere();
107 }
108 return;
109
110 case T_CHAR:
111 case T_SHORT:
112 switch (src_type) {
113 case T_INT:
114 if (dst_type == T_CHAR) {
115 z_llghr(dst, src);
116 } else {
117 z_lghr(dst, src);
118 }
119 break;
120 default:
121 ShouldNotReachHere();
122 }
123 return;
124
125 case T_INT:
126 switch (src_type) {
127 case T_BOOLEAN:
128 case T_BYTE:
129 case T_CHAR:
130 case T_SHORT:
131 case T_INT:
132 case T_LONG:
133 case T_OBJECT:
134 case T_ARRAY:
135 case T_VOID:
136 case T_ADDRESS:
137 lr_if_needed(dst, src);
138 // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug).
139 return;
140
141 default:
142 assert(false, "non-integer src type");
143 return;
144 }
145 case T_LONG:
146 switch (src_type) {
147 case T_BOOLEAN:
148 case T_BYTE:
149 case T_CHAR:
150 case T_SHORT:
151 case T_INT:
152 z_lgfr(dst, src); // sign extension
153 return;
154
155 case T_LONG:
156 case T_OBJECT:
157 case T_ARRAY:
158 case T_VOID:
159 case T_ADDRESS:
160 lgr_if_needed(dst, src);
161 return;
162
163 default:
164 assert(false, "non-integer src type");
165 return;
166 }
167 return;
168 case T_OBJECT:
169 case T_ARRAY:
170 case T_VOID:
171 case T_ADDRESS:
172 switch (src_type) {
173 // These types don't make sense to be converted to pointers:
174 // case T_BOOLEAN:
175 // case T_BYTE:
176 // case T_CHAR:
177 // case T_SHORT:
178
179 case T_INT:
180 z_llgfr(dst, src); // zero extension
181 return;
182
183 case T_LONG:
184 case T_OBJECT:
185 case T_ARRAY:
186 case T_VOID:
187 case T_ADDRESS:
188 lgr_if_needed(dst, src);
189 return;
190
191 default:
192 assert(false, "non-integer src type");
193 return;
194 }
195 return;
196 default:
197 assert(false, "non-integer dst type");
198 return;
199 }
200 }
201
202 // Move float register if destination and source are different.
move_freg_if_needed(FloatRegister dst,BasicType dst_type,FloatRegister src,BasicType src_type)203 void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type,
204 FloatRegister src, BasicType src_type) {
205 assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types");
206 assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types");
207 if (dst_type == src_type) {
208 ldr_if_needed(dst, src); // Just move all 64 bits.
209 } else {
210 switch (dst_type) {
211 case T_FLOAT:
212 assert(src_type == T_DOUBLE, "invalid float type combination");
213 z_ledbr(dst, src);
214 return;
215 case T_DOUBLE:
216 assert(src_type == T_FLOAT, "invalid float type combination");
217 z_ldebr(dst, src);
218 return;
219 default:
220 assert(false, "non-float dst type");
221 return;
222 }
223 }
224 }
225
226 // Optimized emitter for reg to mem operations.
227 // Uses modern instructions if running on modern hardware, classic instructions
228 // otherwise. Prefers (usually shorter) classic instructions if applicable.
229 // Data register (reg) cannot be used as work register.
230 //
231 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
232 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
freg2mem_opt(FloatRegister reg,int64_t disp,Register index,Register base,void (MacroAssembler::* modern)(FloatRegister,int64_t,Register,Register),void (MacroAssembler::* classic)(FloatRegister,int64_t,Register,Register),Register scratch)233 void MacroAssembler::freg2mem_opt(FloatRegister reg,
234 int64_t disp,
235 Register index,
236 Register base,
237 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
238 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
239 Register scratch) {
240 index = (index == noreg) ? Z_R0 : index;
241 if (Displacement::is_shortDisp(disp)) {
242 (this->*classic)(reg, disp, index, base);
243 } else {
244 if (Displacement::is_validDisp(disp)) {
245 (this->*modern)(reg, disp, index, base);
246 } else {
247 if (scratch != Z_R0 && scratch != Z_R1) {
248 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
249 } else {
250 if (scratch != Z_R0) { // scratch == Z_R1
251 if ((scratch == index) || (index == base)) {
252 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
253 } else {
254 add2reg(scratch, disp, base);
255 (this->*classic)(reg, 0, index, scratch);
256 if (base == scratch) {
257 add2reg(base, -disp); // Restore base.
258 }
259 }
260 } else { // scratch == Z_R0
261 z_lgr(scratch, base);
262 add2reg(base, disp);
263 (this->*classic)(reg, 0, index, base);
264 z_lgr(base, scratch); // Restore base.
265 }
266 }
267 }
268 }
269 }
270
freg2mem_opt(FloatRegister reg,const Address & a,bool is_double)271 void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) {
272 if (is_double) {
273 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std));
274 } else {
275 freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste));
276 }
277 }
278
279 // Optimized emitter for mem to reg operations.
280 // Uses modern instructions if running on modern hardware, classic instructions
281 // otherwise. Prefers (usually shorter) classic instructions if applicable.
282 // data register (reg) cannot be used as work register.
283 //
284 // Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
285 // CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
mem2freg_opt(FloatRegister reg,int64_t disp,Register index,Register base,void (MacroAssembler::* modern)(FloatRegister,int64_t,Register,Register),void (MacroAssembler::* classic)(FloatRegister,int64_t,Register,Register),Register scratch)286 void MacroAssembler::mem2freg_opt(FloatRegister reg,
287 int64_t disp,
288 Register index,
289 Register base,
290 void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
291 void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
292 Register scratch) {
293 index = (index == noreg) ? Z_R0 : index;
294 if (Displacement::is_shortDisp(disp)) {
295 (this->*classic)(reg, disp, index, base);
296 } else {
297 if (Displacement::is_validDisp(disp)) {
298 (this->*modern)(reg, disp, index, base);
299 } else {
300 if (scratch != Z_R0 && scratch != Z_R1) {
301 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
302 } else {
303 if (scratch != Z_R0) { // scratch == Z_R1
304 if ((scratch == index) || (index == base)) {
305 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
306 } else {
307 add2reg(scratch, disp, base);
308 (this->*classic)(reg, 0, index, scratch);
309 if (base == scratch) {
310 add2reg(base, -disp); // Restore base.
311 }
312 }
313 } else { // scratch == Z_R0
314 z_lgr(scratch, base);
315 add2reg(base, disp);
316 (this->*classic)(reg, 0, index, base);
317 z_lgr(base, scratch); // Restore base.
318 }
319 }
320 }
321 }
322 }
323
mem2freg_opt(FloatRegister reg,const Address & a,bool is_double)324 void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) {
325 if (is_double) {
326 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld));
327 } else {
328 mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le));
329 }
330 }
331
332 // Optimized emitter for reg to mem operations.
333 // Uses modern instructions if running on modern hardware, classic instructions
334 // otherwise. Prefers (usually shorter) classic instructions if applicable.
335 // Data register (reg) cannot be used as work register.
336 //
337 // Don't rely on register locking, instead pass a scratch register
338 // (Z_R0 by default)
339 // CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs!
reg2mem_opt(Register reg,int64_t disp,Register index,Register base,void (MacroAssembler::* modern)(Register,int64_t,Register,Register),void (MacroAssembler::* classic)(Register,int64_t,Register,Register),Register scratch)340 void MacroAssembler::reg2mem_opt(Register reg,
341 int64_t disp,
342 Register index,
343 Register base,
344 void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
345 void (MacroAssembler::*classic)(Register, int64_t, Register, Register),
346 Register scratch) {
347 index = (index == noreg) ? Z_R0 : index;
348 if (Displacement::is_shortDisp(disp)) {
349 (this->*classic)(reg, disp, index, base);
350 } else {
351 if (Displacement::is_validDisp(disp)) {
352 (this->*modern)(reg, disp, index, base);
353 } else {
354 if (scratch != Z_R0 && scratch != Z_R1) {
355 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
356 } else {
357 if (scratch != Z_R0) { // scratch == Z_R1
358 if ((scratch == index) || (index == base)) {
359 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
360 } else {
361 add2reg(scratch, disp, base);
362 (this->*classic)(reg, 0, index, scratch);
363 if (base == scratch) {
364 add2reg(base, -disp); // Restore base.
365 }
366 }
367 } else { // scratch == Z_R0
368 if ((scratch == reg) || (scratch == base) || (reg == base)) {
369 (this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
370 } else {
371 z_lgr(scratch, base);
372 add2reg(base, disp);
373 (this->*classic)(reg, 0, index, base);
374 z_lgr(base, scratch); // Restore base.
375 }
376 }
377 }
378 }
379 }
380 }
381
reg2mem_opt(Register reg,const Address & a,bool is_double)382 int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) {
383 int store_offset = offset();
384 if (is_double) {
385 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg));
386 } else {
387 reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st));
388 }
389 return store_offset;
390 }
391
392 // Optimized emitter for mem to reg operations.
393 // Uses modern instructions if running on modern hardware, classic instructions
394 // otherwise. Prefers (usually shorter) classic instructions if applicable.
395 // Data register (reg) will be used as work register where possible.
mem2reg_opt(Register reg,int64_t disp,Register index,Register base,void (MacroAssembler::* modern)(Register,int64_t,Register,Register),void (MacroAssembler::* classic)(Register,int64_t,Register,Register))396 void MacroAssembler::mem2reg_opt(Register reg,
397 int64_t disp,
398 Register index,
399 Register base,
400 void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
401 void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) {
402 index = (index == noreg) ? Z_R0 : index;
403 if (Displacement::is_shortDisp(disp)) {
404 (this->*classic)(reg, disp, index, base);
405 } else {
406 if (Displacement::is_validDisp(disp)) {
407 (this->*modern)(reg, disp, index, base);
408 } else {
409 if ((reg == index) && (reg == base)) {
410 z_sllg(reg, reg, 1);
411 add2reg(reg, disp);
412 (this->*classic)(reg, 0, noreg, reg);
413 } else if ((reg == index) && (reg != Z_R0)) {
414 add2reg(reg, disp);
415 (this->*classic)(reg, 0, reg, base);
416 } else if (reg == base) {
417 add2reg(reg, disp);
418 (this->*classic)(reg, 0, index, reg);
419 } else if (reg != Z_R0) {
420 add2reg(reg, disp, base);
421 (this->*classic)(reg, 0, index, reg);
422 } else { // reg == Z_R0 && reg != base here
423 add2reg(base, disp);
424 (this->*classic)(reg, 0, index, base);
425 add2reg(base, -disp);
426 }
427 }
428 }
429 }
430
mem2reg_opt(Register reg,const Address & a,bool is_double)431 void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) {
432 if (is_double) {
433 z_lg(reg, a);
434 } else {
435 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l));
436 }
437 }
438
mem2reg_signed_opt(Register reg,const Address & a)439 void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) {
440 mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf));
441 }
442
and_imm(Register r,long mask,Register tmp,bool wide)443 void MacroAssembler::and_imm(Register r, long mask,
444 Register tmp /* = Z_R0 */,
445 bool wide /* = false */) {
446 assert(wide || Immediate::is_simm32(mask), "mask value too large");
447
448 if (!wide) {
449 z_nilf(r, mask);
450 return;
451 }
452
453 assert(r != tmp, " need a different temporary register !");
454 load_const_optimized(tmp, mask);
455 z_ngr(r, tmp);
456 }
457
458 // Calculate the 1's complement.
459 // Note: The condition code is neither preserved nor correctly set by this code!!!
460 // Note: (wide == false) does not protect the high order half of the target register
461 // from alteration. It only serves as optimization hint for 32-bit results.
not_(Register r1,Register r2,bool wide)462 void MacroAssembler::not_(Register r1, Register r2, bool wide) {
463
464 if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place.
465 z_xilf(r1, -1);
466 if (wide) {
467 z_xihf(r1, -1);
468 }
469 } else { // Distinct src and dst registers.
470 load_const_optimized(r1, -1);
471 z_xgr(r1, r2);
472 }
473 }
474
create_mask(int lBitPos,int rBitPos)475 unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) {
476 assert(lBitPos >= 0, "zero is leftmost bit position");
477 assert(rBitPos <= 63, "63 is rightmost bit position");
478 assert(lBitPos <= rBitPos, "inverted selection interval");
479 return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1));
480 }
481
482 // Helper function for the "Rotate_then_<logicalOP>" emitters.
483 // Rotate src, then mask register contents such that only bits in range survive.
484 // For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range.
485 // For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range.
486 // The caller must ensure that the selected range only contains bits with defined value.
rotate_then_mask(Register dst,Register src,int lBitPos,int rBitPos,int nRotate,bool src32bit,bool dst32bit,bool oneBits)487 void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos,
488 int nRotate, bool src32bit, bool dst32bit, bool oneBits) {
489 assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination");
490 bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G).
491 bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G).
492 // Pre-determine which parts of dst will be zero after shift/rotate.
493 bool llZero = sll4rll && (nRotate >= 16);
494 bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48));
495 bool lfZero = llZero && lhZero;
496 bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32));
497 bool hhZero = (srl4rll && (nRotate <= -16));
498 bool hfZero = hlZero && hhZero;
499
500 // rotate then mask src operand.
501 // if oneBits == true, all bits outside selected range are 1s.
502 // if oneBits == false, all bits outside selected range are 0s.
503 if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away.
504 if (dst32bit) {
505 z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed.
506 } else {
507 if (sll4rll) { z_sllg(dst, src, nRotate); }
508 else if (srl4rll) { z_srlg(dst, src, -nRotate); }
509 else { z_rllg(dst, src, nRotate); }
510 }
511 } else {
512 if (sll4rll) { z_sllg(dst, src, nRotate); }
513 else if (srl4rll) { z_srlg(dst, src, -nRotate); }
514 else { z_rllg(dst, src, nRotate); }
515 }
516
517 unsigned long range_mask = create_mask(lBitPos, rBitPos);
518 unsigned int range_mask_h = (unsigned int)(range_mask >> 32);
519 unsigned int range_mask_l = (unsigned int)range_mask;
520 unsigned short range_mask_hh = (unsigned short)(range_mask >> 48);
521 unsigned short range_mask_hl = (unsigned short)(range_mask >> 32);
522 unsigned short range_mask_lh = (unsigned short)(range_mask >> 16);
523 unsigned short range_mask_ll = (unsigned short)range_mask;
524 // Works for z9 and newer H/W.
525 if (oneBits) {
526 if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s.
527 if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); }
528 } else {
529 // All bits outside range become 0s
530 if (((~range_mask_l) != 0) && !lfZero) {
531 z_nilf(dst, range_mask_l);
532 }
533 if (((~range_mask_h) != 0) && !dst32bit && !hfZero) {
534 z_nihf(dst, range_mask_h);
535 }
536 }
537 }
538
539 // Rotate src, then insert selected range from rotated src into dst.
540 // Clear dst before, if requested.
rotate_then_insert(Register dst,Register src,int lBitPos,int rBitPos,int nRotate,bool clear_dst)541 void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos,
542 int nRotate, bool clear_dst) {
543 // This version does not depend on src being zero-extended int2long.
544 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
545 z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest.
546 }
547
548 // Rotate src, then and selected range from rotated src into dst.
549 // Set condition code only if so requested. Otherwise it is unpredictable.
550 // See performance note in macroAssembler_s390.hpp for important information.
rotate_then_and(Register dst,Register src,int lBitPos,int rBitPos,int nRotate,bool test_only)551 void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos,
552 int nRotate, bool test_only) {
553 guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
554 // This version does not depend on src being zero-extended int2long.
555 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
556 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
557 }
558
559 // Rotate src, then or selected range from rotated src into dst.
560 // Set condition code only if so requested. Otherwise it is unpredictable.
561 // See performance note in macroAssembler_s390.hpp for important information.
rotate_then_or(Register dst,Register src,int lBitPos,int rBitPos,int nRotate,bool test_only)562 void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos,
563 int nRotate, bool test_only) {
564 guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
565 // This version does not depend on src being zero-extended int2long.
566 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
567 z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
568 }
569
570 // Rotate src, then xor selected range from rotated src into dst.
571 // Set condition code only if so requested. Otherwise it is unpredictable.
572 // See performance note in macroAssembler_s390.hpp for important information.
rotate_then_xor(Register dst,Register src,int lBitPos,int rBitPos,int nRotate,bool test_only)573 void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos,
574 int nRotate, bool test_only) {
575 guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
576 // This version does not depend on src being zero-extended int2long.
577 nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
578 z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
579 }
580
add64(Register r1,RegisterOrConstant inc)581 void MacroAssembler::add64(Register r1, RegisterOrConstant inc) {
582 if (inc.is_register()) {
583 z_agr(r1, inc.as_register());
584 } else { // constant
585 intptr_t imm = inc.as_constant();
586 add2reg(r1, imm);
587 }
588 }
589 // Helper function to multiply the 64bit contents of a register by a 16bit constant.
590 // The optimization tries to avoid the mghi instruction, since it uses the FPU for
591 // calculation and is thus rather slow.
592 //
593 // There is no handling for special cases, e.g. cval==0 or cval==1.
594 //
595 // Returns len of generated code block.
mul_reg64_const16(Register rval,Register work,int cval)596 unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) {
597 int block_start = offset();
598
599 bool sign_flip = cval < 0;
600 cval = sign_flip ? -cval : cval;
601
602 BLOCK_COMMENT("Reg64*Con16 {");
603
604 int bit1 = cval & -cval;
605 if (bit1 == cval) {
606 z_sllg(rval, rval, exact_log2(bit1));
607 if (sign_flip) { z_lcgr(rval, rval); }
608 } else {
609 int bit2 = (cval-bit1) & -(cval-bit1);
610 if ((bit1+bit2) == cval) {
611 z_sllg(work, rval, exact_log2(bit1));
612 z_sllg(rval, rval, exact_log2(bit2));
613 z_agr(rval, work);
614 if (sign_flip) { z_lcgr(rval, rval); }
615 } else {
616 if (sign_flip) { z_mghi(rval, -cval); }
617 else { z_mghi(rval, cval); }
618 }
619 }
620 BLOCK_COMMENT("} Reg64*Con16");
621
622 int block_end = offset();
623 return block_end - block_start;
624 }
625
626 // Generic operation r1 := r2 + imm.
627 //
628 // Should produce the best code for each supported CPU version.
629 // r2 == noreg yields r1 := r1 + imm
630 // imm == 0 emits either no instruction or r1 := r2 !
631 // NOTES: 1) Don't use this function where fixed sized
632 // instruction sequences are required!!!
633 // 2) Don't use this function if condition code
634 // setting is required!
635 // 3) Despite being declared as int64_t, the parameter imm
636 // must be a simm_32 value (= signed 32-bit integer).
add2reg(Register r1,int64_t imm,Register r2)637 void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) {
638 assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong");
639
640 if (r2 == noreg) { r2 = r1; }
641
642 // Handle special case imm == 0.
643 if (imm == 0) {
644 lgr_if_needed(r1, r2);
645 // Nothing else to do.
646 return;
647 }
648
649 if (!PreferLAoverADD || (r2 == Z_R0)) {
650 bool distinctOpnds = VM_Version::has_DistinctOpnds();
651
652 // Can we encode imm in 16 bits signed?
653 if (Immediate::is_simm16(imm)) {
654 if (r1 == r2) {
655 z_aghi(r1, imm);
656 return;
657 }
658 if (distinctOpnds) {
659 z_aghik(r1, r2, imm);
660 return;
661 }
662 z_lgr(r1, r2);
663 z_aghi(r1, imm);
664 return;
665 }
666 } else {
667 // Can we encode imm in 12 bits unsigned?
668 if (Displacement::is_shortDisp(imm)) {
669 z_la(r1, imm, r2);
670 return;
671 }
672 // Can we encode imm in 20 bits signed?
673 if (Displacement::is_validDisp(imm)) {
674 // Always use LAY instruction, so we don't need the tmp register.
675 z_lay(r1, imm, r2);
676 return;
677 }
678
679 }
680
681 // Can handle it (all possible values) with long immediates.
682 lgr_if_needed(r1, r2);
683 z_agfi(r1, imm);
684 }
685
686 // Generic operation r := b + x + d
687 //
688 // Addition of several operands with address generation semantics - sort of:
689 // - no restriction on the registers. Any register will do for any operand.
690 // - x == noreg: operand will be disregarded.
691 // - b == noreg: will use (contents of) result reg as operand (r := r + d).
692 // - x == Z_R0: just disregard
693 // - b == Z_R0: use as operand. This is not address generation semantics!!!
694 //
695 // The same restrictions as on add2reg() are valid!!!
add2reg_with_index(Register r,int64_t d,Register x,Register b)696 void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) {
697 assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong");
698
699 if (x == noreg) { x = Z_R0; }
700 if (b == noreg) { b = r; }
701
702 // Handle special case x == R0.
703 if (x == Z_R0) {
704 // Can simply add the immediate value to the base register.
705 add2reg(r, d, b);
706 return;
707 }
708
709 if (!PreferLAoverADD || (b == Z_R0)) {
710 bool distinctOpnds = VM_Version::has_DistinctOpnds();
711 // Handle special case d == 0.
712 if (d == 0) {
713 if (b == x) { z_sllg(r, b, 1); return; }
714 if (r == x) { z_agr(r, b); return; }
715 if (r == b) { z_agr(r, x); return; }
716 if (distinctOpnds) { z_agrk(r, x, b); return; }
717 z_lgr(r, b);
718 z_agr(r, x);
719 } else {
720 if (x == b) { z_sllg(r, x, 1); }
721 else if (r == x) { z_agr(r, b); }
722 else if (r == b) { z_agr(r, x); }
723 else if (distinctOpnds) { z_agrk(r, x, b); }
724 else {
725 z_lgr(r, b);
726 z_agr(r, x);
727 }
728 add2reg(r, d);
729 }
730 } else {
731 // Can we encode imm in 12 bits unsigned?
732 if (Displacement::is_shortDisp(d)) {
733 z_la(r, d, x, b);
734 return;
735 }
736 // Can we encode imm in 20 bits signed?
737 if (Displacement::is_validDisp(d)) {
738 z_lay(r, d, x, b);
739 return;
740 }
741 z_la(r, 0, x, b);
742 add2reg(r, d);
743 }
744 }
745
746 // Generic emitter (32bit) for direct memory increment.
747 // For optimal code, do not specify Z_R0 as temp register.
add2mem_32(const Address & a,int64_t imm,Register tmp)748 void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) {
749 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
750 z_asi(a, imm);
751 } else {
752 z_lgf(tmp, a);
753 add2reg(tmp, imm);
754 z_st(tmp, a);
755 }
756 }
757
add2mem_64(const Address & a,int64_t imm,Register tmp)758 void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) {
759 if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
760 z_agsi(a, imm);
761 } else {
762 z_lg(tmp, a);
763 add2reg(tmp, imm);
764 z_stg(tmp, a);
765 }
766 }
767
load_sized_value(Register dst,Address src,size_t size_in_bytes,bool is_signed)768 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
769 switch (size_in_bytes) {
770 case 8: z_lg(dst, src); break;
771 case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break;
772 case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break;
773 case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break;
774 default: ShouldNotReachHere();
775 }
776 }
777
store_sized_value(Register src,Address dst,size_t size_in_bytes)778 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
779 switch (size_in_bytes) {
780 case 8: z_stg(src, dst); break;
781 case 4: z_st(src, dst); break;
782 case 2: z_sth(src, dst); break;
783 case 1: z_stc(src, dst); break;
784 default: ShouldNotReachHere();
785 }
786 }
787
788 // Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and
789 // a high-order summand in register tmp.
790 //
791 // return value: < 0: No split required, si20 actually has property uimm12.
792 // >= 0: Split performed. Use return value as uimm12 displacement and
793 // tmp as index register.
split_largeoffset(int64_t si20_offset,Register tmp,bool fixed_codelen,bool accumulate)794 int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) {
795 assert(Immediate::is_simm20(si20_offset), "sanity");
796 int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive.
797 int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero.
798 assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) ||
799 !Displacement::is_shortDisp(si20_offset), "unexpected offset values");
800 assert((lg_off+ll_off) == si20_offset, "offset splitup error");
801
802 Register work = accumulate? Z_R0 : tmp;
803
804 if (fixed_codelen) { // Len of code = 10 = 4 + 6.
805 z_lghi(work, ll_off>>12); // Implicit sign extension.
806 z_slag(work, work, 12);
807 } else { // Len of code = 0..10.
808 if (ll_off == 0) { return -1; }
809 // ll_off has 8 significant bits (at most) plus sign.
810 if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte.
811 z_llilh(work, ll_off >> 16);
812 if (ll_off < 0) { // Sign-extension required.
813 z_lgfr(work, work);
814 }
815 } else {
816 if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte.
817 z_llill(work, ll_off);
818 } else { // Non-zero bits in both halfbytes.
819 z_lghi(work, ll_off>>12); // Implicit sign extension.
820 z_slag(work, work, 12);
821 }
822 }
823 }
824 if (accumulate) { z_algr(tmp, work); } // len of code += 4
825 return lg_off;
826 }
827
load_float_largeoffset(FloatRegister t,int64_t si20,Register a,Register tmp)828 void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
829 if (Displacement::is_validDisp(si20)) {
830 z_ley(t, si20, a);
831 } else {
832 // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset
833 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
834 // pool loads).
835 bool accumulate = true;
836 bool fixed_codelen = true;
837 Register work;
838
839 if (fixed_codelen) {
840 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen.
841 } else {
842 accumulate = (a == tmp);
843 }
844 work = tmp;
845
846 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
847 if (disp12 < 0) {
848 z_le(t, si20, work);
849 } else {
850 if (accumulate) {
851 z_le(t, disp12, work);
852 } else {
853 z_le(t, disp12, work, a);
854 }
855 }
856 }
857 }
858
load_double_largeoffset(FloatRegister t,int64_t si20,Register a,Register tmp)859 void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
860 if (Displacement::is_validDisp(si20)) {
861 z_ldy(t, si20, a);
862 } else {
863 // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset
864 // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
865 // pool loads).
866 bool accumulate = true;
867 bool fixed_codelen = true;
868 Register work;
869
870 if (fixed_codelen) {
871 z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen.
872 } else {
873 accumulate = (a == tmp);
874 }
875 work = tmp;
876
877 int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
878 if (disp12 < 0) {
879 z_ld(t, si20, work);
880 } else {
881 if (accumulate) {
882 z_ld(t, disp12, work);
883 } else {
884 z_ld(t, disp12, work, a);
885 }
886 }
887 }
888 }
889
890 // PCrelative TOC access.
891 // Returns distance (in bytes) from current position to start of consts section.
892 // Returns 0 (zero) if no consts section exists or if it has size zero.
toc_distance()893 long MacroAssembler::toc_distance() {
894 CodeSection* cs = code()->consts();
895 return (long)((cs != NULL) ? cs->start()-pc() : 0);
896 }
897
898 // Implementation on x86/sparc assumes that constant and instruction section are
899 // adjacent, but this doesn't hold. Two special situations may occur, that we must
900 // be able to handle:
901 // 1. const section may be located apart from the inst section.
902 // 2. const section may be empty
903 // In both cases, we use the const section's start address to compute the "TOC",
904 // this seems to occur only temporarily; in the final step we always seem to end up
905 // with the pc-relatice variant.
906 //
907 // PC-relative offset could be +/-2**32 -> use long for disp
908 // Furthermore: makes no sense to have special code for
909 // adjacent const and inst sections.
load_toc(Register Rtoc)910 void MacroAssembler::load_toc(Register Rtoc) {
911 // Simply use distance from start of const section (should be patched in the end).
912 long disp = toc_distance();
913
914 RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp);
915 relocate(rspec);
916 z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords.
917 }
918
919 // PCrelative TOC access.
920 // Load from anywhere pcrelative (with relocation of load instr)
load_long_pcrelative(Register Rdst,address dataLocation)921 void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) {
922 address pc = this->pc();
923 ptrdiff_t total_distance = dataLocation - pc;
924 RelocationHolder rspec = internal_word_Relocation::spec(dataLocation);
925
926 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
927 assert(total_distance != 0, "sanity");
928
929 // Some extra safety net.
930 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
931 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
932 }
933
934 (this)->relocate(rspec, relocInfo::pcrel_addr_format);
935 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
936 }
937
938
939 // PCrelative TOC access.
940 // Load from anywhere pcrelative (with relocation of load instr)
941 // loaded addr has to be relocated when added to constant pool.
load_addr_pcrelative(Register Rdst,address addrLocation)942 void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) {
943 address pc = this->pc();
944 ptrdiff_t total_distance = addrLocation - pc;
945 RelocationHolder rspec = internal_word_Relocation::spec(addrLocation);
946
947 assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
948
949 // Some extra safety net.
950 if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
951 guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
952 }
953
954 (this)->relocate(rspec, relocInfo::pcrel_addr_format);
955 z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
956 }
957
958 // Generic operation: load a value from memory and test.
959 // CondCode indicates the sign (<0, ==0, >0) of the loaded value.
load_and_test_byte(Register dst,const Address & a)960 void MacroAssembler::load_and_test_byte(Register dst, const Address &a) {
961 z_lb(dst, a);
962 z_ltr(dst, dst);
963 }
964
load_and_test_short(Register dst,const Address & a)965 void MacroAssembler::load_and_test_short(Register dst, const Address &a) {
966 int64_t disp = a.disp20();
967 if (Displacement::is_shortDisp(disp)) {
968 z_lh(dst, a);
969 } else if (Displacement::is_longDisp(disp)) {
970 z_lhy(dst, a);
971 } else {
972 guarantee(false, "displacement out of range");
973 }
974 z_ltr(dst, dst);
975 }
976
load_and_test_int(Register dst,const Address & a)977 void MacroAssembler::load_and_test_int(Register dst, const Address &a) {
978 z_lt(dst, a);
979 }
980
load_and_test_int2long(Register dst,const Address & a)981 void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) {
982 z_ltgf(dst, a);
983 }
984
load_and_test_long(Register dst,const Address & a)985 void MacroAssembler::load_and_test_long(Register dst, const Address &a) {
986 z_ltg(dst, a);
987 }
988
989 // Test a bit in memory.
testbit(const Address & a,unsigned int bit)990 void MacroAssembler::testbit(const Address &a, unsigned int bit) {
991 assert(a.index() == noreg, "no index reg allowed in testbit");
992 if (bit <= 7) {
993 z_tm(a.disp() + 3, a.base(), 1 << bit);
994 } else if (bit <= 15) {
995 z_tm(a.disp() + 2, a.base(), 1 << (bit - 8));
996 } else if (bit <= 23) {
997 z_tm(a.disp() + 1, a.base(), 1 << (bit - 16));
998 } else if (bit <= 31) {
999 z_tm(a.disp() + 0, a.base(), 1 << (bit - 24));
1000 } else {
1001 ShouldNotReachHere();
1002 }
1003 }
1004
1005 // Test a bit in a register. Result is reflected in CC.
testbit(Register r,unsigned int bitPos)1006 void MacroAssembler::testbit(Register r, unsigned int bitPos) {
1007 if (bitPos < 16) {
1008 z_tmll(r, 1U<<bitPos);
1009 } else if (bitPos < 32) {
1010 z_tmlh(r, 1U<<(bitPos-16));
1011 } else if (bitPos < 48) {
1012 z_tmhl(r, 1U<<(bitPos-32));
1013 } else if (bitPos < 64) {
1014 z_tmhh(r, 1U<<(bitPos-48));
1015 } else {
1016 ShouldNotReachHere();
1017 }
1018 }
1019
prefetch_read(Address a)1020 void MacroAssembler::prefetch_read(Address a) {
1021 z_pfd(1, a.disp20(), a.indexOrR0(), a.base());
1022 }
prefetch_update(Address a)1023 void MacroAssembler::prefetch_update(Address a) {
1024 z_pfd(2, a.disp20(), a.indexOrR0(), a.base());
1025 }
1026
1027 // Clear a register, i.e. load const zero into reg.
1028 // Return len (in bytes) of generated instruction(s).
1029 // whole_reg: Clear 64 bits if true, 32 bits otherwise.
1030 // set_cc: Use instruction that sets the condition code, if true.
clear_reg(Register r,bool whole_reg,bool set_cc)1031 int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) {
1032 unsigned int start_off = offset();
1033 if (whole_reg) {
1034 set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0);
1035 } else { // Only 32bit register.
1036 set_cc ? z_xr(r, r) : z_lhi(r, 0);
1037 }
1038 return offset() - start_off;
1039 }
1040
1041 #ifdef ASSERT
preset_reg(Register r,unsigned long pattern,int pattern_len)1042 int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) {
1043 switch (pattern_len) {
1044 case 1:
1045 pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8);
1046 case 2:
1047 pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16);
1048 case 4:
1049 pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32);
1050 case 8:
1051 return load_const_optimized_rtn_len(r, pattern, true);
1052 break;
1053 default:
1054 guarantee(false, "preset_reg: bad len");
1055 }
1056 return 0;
1057 }
1058 #endif
1059
1060 // addr: Address descriptor of memory to clear index register will not be used !
1061 // size: Number of bytes to clear.
1062 // !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!!
1063 // !!! Use store_const() instead !!!
clear_mem(const Address & addr,unsigned size)1064 void MacroAssembler::clear_mem(const Address& addr, unsigned size) {
1065 guarantee(size <= 256, "MacroAssembler::clear_mem: size too large");
1066
1067 if (size == 1) {
1068 z_mvi(addr, 0);
1069 return;
1070 }
1071
1072 switch (size) {
1073 case 2: z_mvhhi(addr, 0);
1074 return;
1075 case 4: z_mvhi(addr, 0);
1076 return;
1077 case 8: z_mvghi(addr, 0);
1078 return;
1079 default: ; // Fallthru to xc.
1080 }
1081
1082 z_xc(addr, size, addr);
1083 }
1084
align(int modulus)1085 void MacroAssembler::align(int modulus) {
1086 while (offset() % modulus != 0) z_nop();
1087 }
1088
1089 // Special version for non-relocateable code if required alignment
1090 // is larger than CodeEntryAlignment.
align_address(int modulus)1091 void MacroAssembler::align_address(int modulus) {
1092 while ((uintptr_t)pc() % modulus != 0) z_nop();
1093 }
1094
argument_address(RegisterOrConstant arg_slot,Register temp_reg,int64_t extra_slot_offset)1095 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
1096 Register temp_reg,
1097 int64_t extra_slot_offset) {
1098 // On Z, we can have index and disp in an Address. So don't call argument_offset,
1099 // which issues an unnecessary add instruction.
1100 int stackElementSize = Interpreter::stackElementSize;
1101 int64_t offset = extra_slot_offset * stackElementSize;
1102 const Register argbase = Z_esp;
1103 if (arg_slot.is_constant()) {
1104 offset += arg_slot.as_constant() * stackElementSize;
1105 return Address(argbase, offset);
1106 }
1107 // else
1108 assert(temp_reg != noreg, "must specify");
1109 assert(temp_reg != Z_ARG1, "base and index are conflicting");
1110 z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3
1111 return Address(argbase, temp_reg, offset);
1112 }
1113
1114
1115 //===================================================================
1116 //=== START C O N S T A N T S I N C O D E S T R E A M ===
1117 //===================================================================
1118 //=== P A T CH A B L E C O N S T A N T S ===
1119 //===================================================================
1120
1121
1122 //---------------------------------------------------
1123 // Load (patchable) constant into register
1124 //---------------------------------------------------
1125
1126
1127 // Load absolute address (and try to optimize).
1128 // Note: This method is usable only for position-fixed code,
1129 // referring to a position-fixed target location.
1130 // If not so, relocations and patching must be used.
load_absolute_address(Register d,address addr)1131 void MacroAssembler::load_absolute_address(Register d, address addr) {
1132 assert(addr != NULL, "should not happen");
1133 BLOCK_COMMENT("load_absolute_address:");
1134 if (addr == NULL) {
1135 z_larl(d, pc()); // Dummy emit for size calc.
1136 return;
1137 }
1138
1139 if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) {
1140 z_larl(d, addr);
1141 return;
1142 }
1143
1144 load_const_optimized(d, (long)addr);
1145 }
1146
1147 // Load a 64bit constant.
1148 // Patchable code sequence, but not atomically patchable.
1149 // Make sure to keep code size constant -> no value-dependent optimizations.
1150 // Do not kill condition code.
load_const(Register t,long x)1151 void MacroAssembler::load_const(Register t, long x) {
1152 // Note: Right shift is only cleanly defined for unsigned types
1153 // or for signed types with nonnegative values.
1154 Assembler::z_iihf(t, (long)((unsigned long)x >> 32));
1155 Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL));
1156 }
1157
1158 // Load a 32bit constant into a 64bit register, sign-extend or zero-extend.
1159 // Patchable code sequence, but not atomically patchable.
1160 // Make sure to keep code size constant -> no value-dependent optimizations.
1161 // Do not kill condition code.
load_const_32to64(Register t,int64_t x,bool sign_extend)1162 void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) {
1163 if (sign_extend) { Assembler::z_lgfi(t, x); }
1164 else { Assembler::z_llilf(t, x); }
1165 }
1166
1167 // Load narrow oop constant, no decompression.
load_narrow_oop(Register t,narrowOop a)1168 void MacroAssembler::load_narrow_oop(Register t, narrowOop a) {
1169 assert(UseCompressedOops, "must be on to call this method");
1170 load_const_32to64(t, a, false /*sign_extend*/);
1171 }
1172
1173 // Load narrow klass constant, compression required.
load_narrow_klass(Register t,Klass * k)1174 void MacroAssembler::load_narrow_klass(Register t, Klass* k) {
1175 assert(UseCompressedClassPointers, "must be on to call this method");
1176 narrowKlass encoded_k = Klass::encode_klass(k);
1177 load_const_32to64(t, encoded_k, false /*sign_extend*/);
1178 }
1179
1180 //------------------------------------------------------
1181 // Compare (patchable) constant with register.
1182 //------------------------------------------------------
1183
1184 // Compare narrow oop in reg with narrow oop constant, no decompression.
compare_immediate_narrow_oop(Register oop1,narrowOop oop2)1185 void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) {
1186 assert(UseCompressedOops, "must be on to call this method");
1187
1188 Assembler::z_clfi(oop1, oop2);
1189 }
1190
1191 // Compare narrow oop in reg with narrow oop constant, no decompression.
compare_immediate_narrow_klass(Register klass1,Klass * klass2)1192 void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) {
1193 assert(UseCompressedClassPointers, "must be on to call this method");
1194 narrowKlass encoded_k = Klass::encode_klass(klass2);
1195
1196 Assembler::z_clfi(klass1, encoded_k);
1197 }
1198
1199 //----------------------------------------------------------
1200 // Check which kind of load_constant we have here.
1201 //----------------------------------------------------------
1202
1203 // Detection of CPU version dependent load_const sequence.
1204 // The detection is valid only for code sequences generated by load_const,
1205 // not load_const_optimized.
is_load_const(address a)1206 bool MacroAssembler::is_load_const(address a) {
1207 unsigned long inst1, inst2;
1208 unsigned int len1, len2;
1209
1210 len1 = get_instruction(a, &inst1);
1211 len2 = get_instruction(a + len1, &inst2);
1212
1213 return is_z_iihf(inst1) && is_z_iilf(inst2);
1214 }
1215
1216 // Detection of CPU version dependent load_const_32to64 sequence.
1217 // Mostly used for narrow oops and narrow Klass pointers.
1218 // The detection is valid only for code sequences generated by load_const_32to64.
is_load_const_32to64(address pos)1219 bool MacroAssembler::is_load_const_32to64(address pos) {
1220 unsigned long inst1, inst2;
1221 unsigned int len1;
1222
1223 len1 = get_instruction(pos, &inst1);
1224 return is_z_llilf(inst1);
1225 }
1226
1227 // Detection of compare_immediate_narrow sequence.
1228 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
is_compare_immediate32(address pos)1229 bool MacroAssembler::is_compare_immediate32(address pos) {
1230 return is_equal(pos, CLFI_ZOPC, RIL_MASK);
1231 }
1232
1233 // Detection of compare_immediate_narrow sequence.
1234 // The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
is_compare_immediate_narrow_oop(address pos)1235 bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) {
1236 return is_compare_immediate32(pos);
1237 }
1238
1239 // Detection of compare_immediate_narrow sequence.
1240 // The detection is valid only for code sequences generated by compare_immediate_narrow_klass.
is_compare_immediate_narrow_klass(address pos)1241 bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) {
1242 return is_compare_immediate32(pos);
1243 }
1244
1245 //-----------------------------------
1246 // patch the load_constant
1247 //-----------------------------------
1248
1249 // CPU-version dependend patching of load_const.
patch_const(address a,long x)1250 void MacroAssembler::patch_const(address a, long x) {
1251 assert(is_load_const(a), "not a load of a constant");
1252 // Note: Right shift is only cleanly defined for unsigned types
1253 // or for signed types with nonnegative values.
1254 set_imm32((address)a, (long)((unsigned long)x >> 32));
1255 set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL));
1256 }
1257
1258 // Patching the value of CPU version dependent load_const_32to64 sequence.
1259 // The passed ptr MUST be in compressed format!
patch_load_const_32to64(address pos,int64_t np)1260 int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) {
1261 assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)");
1262
1263 set_imm32(pos, np);
1264 return 6;
1265 }
1266
1267 // Patching the value of CPU version dependent compare_immediate_narrow sequence.
1268 // The passed ptr MUST be in compressed format!
patch_compare_immediate_32(address pos,int64_t np)1269 int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) {
1270 assert(is_compare_immediate32(pos), "not a compressed ptr compare");
1271
1272 set_imm32(pos, np);
1273 return 6;
1274 }
1275
1276 // Patching the immediate value of CPU version dependent load_narrow_oop sequence.
1277 // The passed ptr must NOT be in compressed format!
patch_load_narrow_oop(address pos,oop o)1278 int MacroAssembler::patch_load_narrow_oop(address pos, oop o) {
1279 assert(UseCompressedOops, "Can only patch compressed oops");
1280
1281 narrowOop no = CompressedOops::encode(o);
1282 return patch_load_const_32to64(pos, no);
1283 }
1284
1285 // Patching the immediate value of CPU version dependent load_narrow_klass sequence.
1286 // The passed ptr must NOT be in compressed format!
patch_load_narrow_klass(address pos,Klass * k)1287 int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) {
1288 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
1289
1290 narrowKlass nk = Klass::encode_klass(k);
1291 return patch_load_const_32to64(pos, nk);
1292 }
1293
1294 // Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence.
1295 // The passed ptr must NOT be in compressed format!
patch_compare_immediate_narrow_oop(address pos,oop o)1296 int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) {
1297 assert(UseCompressedOops, "Can only patch compressed oops");
1298
1299 narrowOop no = CompressedOops::encode(o);
1300 return patch_compare_immediate_32(pos, no);
1301 }
1302
1303 // Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence.
1304 // The passed ptr must NOT be in compressed format!
patch_compare_immediate_narrow_klass(address pos,Klass * k)1305 int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) {
1306 assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
1307
1308 narrowKlass nk = Klass::encode_klass(k);
1309 return patch_compare_immediate_32(pos, nk);
1310 }
1311
1312 //------------------------------------------------------------------------
1313 // Extract the constant from a load_constant instruction stream.
1314 //------------------------------------------------------------------------
1315
1316 // Get constant from a load_const sequence.
get_const(address a)1317 long MacroAssembler::get_const(address a) {
1318 assert(is_load_const(a), "not a load of a constant");
1319 unsigned long x;
1320 x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32);
1321 x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff)));
1322 return (long) x;
1323 }
1324
1325 //--------------------------------------
1326 // Store a constant in memory.
1327 //--------------------------------------
1328
1329 // General emitter to move a constant to memory.
1330 // The store is atomic.
1331 // o Address must be given in RS format (no index register)
1332 // o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported.
1333 // o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned.
1334 // o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned.
1335 // o Memory slot must be at least as wide as constant, will assert otherwise.
1336 // o Signed constants will sign-extend, unsigned constants will zero-extend to slot width.
store_const(const Address & dest,long imm,unsigned int lm,unsigned int lc,Register scratch)1337 int MacroAssembler::store_const(const Address &dest, long imm,
1338 unsigned int lm, unsigned int lc,
1339 Register scratch) {
1340 int64_t disp = dest.disp();
1341 Register base = dest.base();
1342 assert(!dest.has_index(), "not supported");
1343 assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported");
1344 assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported");
1345 assert(lm>=lc, "memory slot too small");
1346 assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range");
1347 assert(Displacement::is_validDisp(disp), "displacement out of range");
1348
1349 bool is_shortDisp = Displacement::is_shortDisp(disp);
1350 int store_offset = -1;
1351
1352 // For target len == 1 it's easy.
1353 if (lm == 1) {
1354 store_offset = offset();
1355 if (is_shortDisp) {
1356 z_mvi(disp, base, imm);
1357 return store_offset;
1358 } else {
1359 z_mviy(disp, base, imm);
1360 return store_offset;
1361 }
1362 }
1363
1364 // All the "good stuff" takes an unsigned displacement.
1365 if (is_shortDisp) {
1366 // NOTE: Cannot use clear_mem for imm==0, because it is not atomic.
1367
1368 store_offset = offset();
1369 switch (lm) {
1370 case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening.
1371 z_mvhhi(disp, base, imm);
1372 return store_offset;
1373 case 4:
1374 if (Immediate::is_simm16(imm)) {
1375 z_mvhi(disp, base, imm);
1376 return store_offset;
1377 }
1378 break;
1379 case 8:
1380 if (Immediate::is_simm16(imm)) {
1381 z_mvghi(disp, base, imm);
1382 return store_offset;
1383 }
1384 break;
1385 default:
1386 ShouldNotReachHere();
1387 break;
1388 }
1389 }
1390
1391 // Can't optimize, so load value and store it.
1392 guarantee(scratch != noreg, " need a scratch register here !");
1393 if (imm != 0) {
1394 load_const_optimized(scratch, imm); // Preserves CC anyway.
1395 } else {
1396 // Leave CC alone!!
1397 (void) clear_reg(scratch, true, false); // Indicate unused result.
1398 }
1399
1400 store_offset = offset();
1401 if (is_shortDisp) {
1402 switch (lm) {
1403 case 2:
1404 z_sth(scratch, disp, Z_R0, base);
1405 return store_offset;
1406 case 4:
1407 z_st(scratch, disp, Z_R0, base);
1408 return store_offset;
1409 case 8:
1410 z_stg(scratch, disp, Z_R0, base);
1411 return store_offset;
1412 default:
1413 ShouldNotReachHere();
1414 break;
1415 }
1416 } else {
1417 switch (lm) {
1418 case 2:
1419 z_sthy(scratch, disp, Z_R0, base);
1420 return store_offset;
1421 case 4:
1422 z_sty(scratch, disp, Z_R0, base);
1423 return store_offset;
1424 case 8:
1425 z_stg(scratch, disp, Z_R0, base);
1426 return store_offset;
1427 default:
1428 ShouldNotReachHere();
1429 break;
1430 }
1431 }
1432 return -1; // should not reach here
1433 }
1434
1435 //===================================================================
1436 //=== N O T P A T CH A B L E C O N S T A N T S ===
1437 //===================================================================
1438
1439 // Load constant x into register t with a fast instrcution sequence
1440 // depending on the bits in x. Preserves CC under all circumstances.
load_const_optimized_rtn_len(Register t,long x,bool emit)1441 int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) {
1442 if (x == 0) {
1443 int len;
1444 if (emit) {
1445 len = clear_reg(t, true, false);
1446 } else {
1447 len = 4;
1448 }
1449 return len;
1450 }
1451
1452 if (Immediate::is_simm16(x)) {
1453 if (emit) { z_lghi(t, x); }
1454 return 4;
1455 }
1456
1457 // 64 bit value: | part1 | part2 | part3 | part4 |
1458 // At least one part is not zero!
1459 // Note: Right shift is only cleanly defined for unsigned types
1460 // or for signed types with nonnegative values.
1461 int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff;
1462 int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff;
1463 int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff;
1464 int part4 = (int)x & 0x0000ffff;
1465 int part12 = (int)((unsigned long)x >> 32);
1466 int part34 = (int)x;
1467
1468 // Lower word only (unsigned).
1469 if (part12 == 0) {
1470 if (part3 == 0) {
1471 if (emit) z_llill(t, part4);
1472 return 4;
1473 }
1474 if (part4 == 0) {
1475 if (emit) z_llilh(t, part3);
1476 return 4;
1477 }
1478 if (emit) z_llilf(t, part34);
1479 return 6;
1480 }
1481
1482 // Upper word only.
1483 if (part34 == 0) {
1484 if (part1 == 0) {
1485 if (emit) z_llihl(t, part2);
1486 return 4;
1487 }
1488 if (part2 == 0) {
1489 if (emit) z_llihh(t, part1);
1490 return 4;
1491 }
1492 if (emit) z_llihf(t, part12);
1493 return 6;
1494 }
1495
1496 // Lower word only (signed).
1497 if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) {
1498 if (emit) z_lgfi(t, part34);
1499 return 6;
1500 }
1501
1502 int len = 0;
1503
1504 if ((part1 == 0) || (part2 == 0)) {
1505 if (part1 == 0) {
1506 if (emit) z_llihl(t, part2);
1507 len += 4;
1508 } else {
1509 if (emit) z_llihh(t, part1);
1510 len += 4;
1511 }
1512 } else {
1513 if (emit) z_llihf(t, part12);
1514 len += 6;
1515 }
1516
1517 if ((part3 == 0) || (part4 == 0)) {
1518 if (part3 == 0) {
1519 if (emit) z_iill(t, part4);
1520 len += 4;
1521 } else {
1522 if (emit) z_iilh(t, part3);
1523 len += 4;
1524 }
1525 } else {
1526 if (emit) z_iilf(t, part34);
1527 len += 6;
1528 }
1529 return len;
1530 }
1531
1532 //=====================================================================
1533 //=== H I G H E R L E V E L B R A N C H E M I T T E R S ===
1534 //=====================================================================
1535
1536 // Note: In the worst case, one of the scratch registers is destroyed!!!
compare32_and_branch(Register r1,RegisterOrConstant x2,branch_condition cond,Label & lbl)1537 void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1538 // Right operand is constant.
1539 if (x2.is_constant()) {
1540 jlong value = x2.as_constant();
1541 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true);
1542 return;
1543 }
1544
1545 // Right operand is in register.
1546 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true);
1547 }
1548
1549 // Note: In the worst case, one of the scratch registers is destroyed!!!
compareU32_and_branch(Register r1,RegisterOrConstant x2,branch_condition cond,Label & lbl)1550 void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1551 // Right operand is constant.
1552 if (x2.is_constant()) {
1553 jlong value = x2.as_constant();
1554 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false);
1555 return;
1556 }
1557
1558 // Right operand is in register.
1559 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false);
1560 }
1561
1562 // Note: In the worst case, one of the scratch registers is destroyed!!!
compare64_and_branch(Register r1,RegisterOrConstant x2,branch_condition cond,Label & lbl)1563 void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1564 // Right operand is constant.
1565 if (x2.is_constant()) {
1566 jlong value = x2.as_constant();
1567 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true);
1568 return;
1569 }
1570
1571 // Right operand is in register.
1572 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true);
1573 }
1574
compareU64_and_branch(Register r1,RegisterOrConstant x2,branch_condition cond,Label & lbl)1575 void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1576 // Right operand is constant.
1577 if (x2.is_constant()) {
1578 jlong value = x2.as_constant();
1579 compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false);
1580 return;
1581 }
1582
1583 // Right operand is in register.
1584 compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false);
1585 }
1586
1587 // Generate an optimal branch to the branch target.
1588 // Optimal means that a relative branch (brc or brcl) is used if the
1589 // branch distance is short enough. Loading the target address into a
1590 // register and branching via reg is used as fallback only.
1591 //
1592 // Used registers:
1593 // Z_R1 - work reg. Holds branch target address.
1594 // Used in fallback case only.
1595 //
1596 // This version of branch_optimized is good for cases where the target address is known
1597 // and constant, i.e. is never changed (no relocation, no patching).
branch_optimized(Assembler::branch_condition cond,address branch_addr)1598 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) {
1599 address branch_origin = pc();
1600
1601 if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
1602 z_brc(cond, branch_addr);
1603 } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) {
1604 z_brcl(cond, branch_addr);
1605 } else {
1606 load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized.
1607 z_bcr(cond, Z_R1);
1608 }
1609 }
1610
1611 // This version of branch_optimized is good for cases where the target address
1612 // is potentially not yet known at the time the code is emitted.
1613 //
1614 // One very common case is a branch to an unbound label which is handled here.
1615 // The caller might know (or hope) that the branch distance is short enough
1616 // to be encoded in a 16bit relative address. In this case he will pass a
1617 // NearLabel branch_target.
1618 // Care must be taken with unbound labels. Each call to target(label) creates
1619 // an entry in the patch queue for that label to patch all references of the label
1620 // once it gets bound. Those recorded patch locations must be patchable. Otherwise,
1621 // an assertion fires at patch time.
branch_optimized(Assembler::branch_condition cond,Label & branch_target)1622 void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) {
1623 if (branch_target.is_bound()) {
1624 address branch_addr = target(branch_target);
1625 branch_optimized(cond, branch_addr);
1626 } else if (branch_target.is_near()) {
1627 z_brc(cond, branch_target); // Caller assures that the target will be in range for z_brc.
1628 } else {
1629 z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time.
1630 }
1631 }
1632
1633 // Generate an optimal compare and branch to the branch target.
1634 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the
1635 // branch distance is short enough. Loading the target address into a
1636 // register and branching via reg is used as fallback only.
1637 //
1638 // Input:
1639 // r1 - left compare operand
1640 // r2 - right compare operand
compare_and_branch_optimized(Register r1,Register r2,Assembler::branch_condition cond,address branch_addr,bool len64,bool has_sign)1641 void MacroAssembler::compare_and_branch_optimized(Register r1,
1642 Register r2,
1643 Assembler::branch_condition cond,
1644 address branch_addr,
1645 bool len64,
1646 bool has_sign) {
1647 unsigned int casenum = (len64?2:0)+(has_sign?0:1);
1648
1649 address branch_origin = pc();
1650 if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
1651 switch (casenum) {
1652 case 0: z_crj( r1, r2, cond, branch_addr); break;
1653 case 1: z_clrj (r1, r2, cond, branch_addr); break;
1654 case 2: z_cgrj(r1, r2, cond, branch_addr); break;
1655 case 3: z_clgrj(r1, r2, cond, branch_addr); break;
1656 default: ShouldNotReachHere(); break;
1657 }
1658 } else {
1659 switch (casenum) {
1660 case 0: z_cr( r1, r2); break;
1661 case 1: z_clr(r1, r2); break;
1662 case 2: z_cgr(r1, r2); break;
1663 case 3: z_clgr(r1, r2); break;
1664 default: ShouldNotReachHere(); break;
1665 }
1666 branch_optimized(cond, branch_addr);
1667 }
1668 }
1669
1670 // Generate an optimal compare and branch to the branch target.
1671 // Optimal means that a relative branch (clgij, brc or brcl) is used if the
1672 // branch distance is short enough. Loading the target address into a
1673 // register and branching via reg is used as fallback only.
1674 //
1675 // Input:
1676 // r1 - left compare operand (in register)
1677 // x2 - right compare operand (immediate)
compare_and_branch_optimized(Register r1,jlong x2,Assembler::branch_condition cond,Label & branch_target,bool len64,bool has_sign)1678 void MacroAssembler::compare_and_branch_optimized(Register r1,
1679 jlong x2,
1680 Assembler::branch_condition cond,
1681 Label& branch_target,
1682 bool len64,
1683 bool has_sign) {
1684 address branch_origin = pc();
1685 bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2));
1686 bool is_RelAddr16 = branch_target.is_near() ||
1687 (branch_target.is_bound() &&
1688 RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin));
1689 unsigned int casenum = (len64?2:0)+(has_sign?0:1);
1690
1691 if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) {
1692 switch (casenum) {
1693 case 0: z_cij( r1, x2, cond, branch_target); break;
1694 case 1: z_clij(r1, x2, cond, branch_target); break;
1695 case 2: z_cgij(r1, x2, cond, branch_target); break;
1696 case 3: z_clgij(r1, x2, cond, branch_target); break;
1697 default: ShouldNotReachHere(); break;
1698 }
1699 return;
1700 }
1701
1702 if (x2 == 0) {
1703 switch (casenum) {
1704 case 0: z_ltr(r1, r1); break;
1705 case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
1706 case 2: z_ltgr(r1, r1); break;
1707 case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
1708 default: ShouldNotReachHere(); break;
1709 }
1710 } else {
1711 if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) {
1712 switch (casenum) {
1713 case 0: z_chi(r1, x2); break;
1714 case 1: z_chi(r1, x2); break; // positive immediate < 2**15
1715 case 2: z_cghi(r1, x2); break;
1716 case 3: z_cghi(r1, x2); break; // positive immediate < 2**15
1717 default: break;
1718 }
1719 } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) {
1720 switch (casenum) {
1721 case 0: z_cfi( r1, x2); break;
1722 case 1: z_clfi(r1, x2); break;
1723 case 2: z_cgfi(r1, x2); break;
1724 case 3: z_clgfi(r1, x2); break;
1725 default: ShouldNotReachHere(); break;
1726 }
1727 } else {
1728 // No instruction with immediate operand possible, so load into register.
1729 Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1;
1730 load_const_optimized(scratch, x2);
1731 switch (casenum) {
1732 case 0: z_cr( r1, scratch); break;
1733 case 1: z_clr(r1, scratch); break;
1734 case 2: z_cgr(r1, scratch); break;
1735 case 3: z_clgr(r1, scratch); break;
1736 default: ShouldNotReachHere(); break;
1737 }
1738 }
1739 }
1740 branch_optimized(cond, branch_target);
1741 }
1742
1743 // Generate an optimal compare and branch to the branch target.
1744 // Optimal means that a relative branch (clgrj, brc or brcl) is used if the
1745 // branch distance is short enough. Loading the target address into a
1746 // register and branching via reg is used as fallback only.
1747 //
1748 // Input:
1749 // r1 - left compare operand
1750 // r2 - right compare operand
compare_and_branch_optimized(Register r1,Register r2,Assembler::branch_condition cond,Label & branch_target,bool len64,bool has_sign)1751 void MacroAssembler::compare_and_branch_optimized(Register r1,
1752 Register r2,
1753 Assembler::branch_condition cond,
1754 Label& branch_target,
1755 bool len64,
1756 bool has_sign) {
1757 unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1);
1758
1759 if (branch_target.is_bound()) {
1760 address branch_addr = target(branch_target);
1761 compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign);
1762 } else {
1763 if (VM_Version::has_CompareBranch() && branch_target.is_near()) {
1764 switch (casenum) {
1765 case 0: z_crj( r1, r2, cond, branch_target); break;
1766 case 1: z_clrj( r1, r2, cond, branch_target); break;
1767 case 2: z_cgrj( r1, r2, cond, branch_target); break;
1768 case 3: z_clgrj(r1, r2, cond, branch_target); break;
1769 default: ShouldNotReachHere(); break;
1770 }
1771 } else {
1772 switch (casenum) {
1773 case 0: z_cr( r1, r2); break;
1774 case 1: z_clr(r1, r2); break;
1775 case 2: z_cgr(r1, r2); break;
1776 case 3: z_clgr(r1, r2); break;
1777 default: ShouldNotReachHere(); break;
1778 }
1779 branch_optimized(cond, branch_target);
1780 }
1781 }
1782 }
1783
1784 //===========================================================================
1785 //=== END H I G H E R L E V E L B R A N C H E M I T T E R S ===
1786 //===========================================================================
1787
allocate_metadata_address(Metadata * obj)1788 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
1789 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1790 int index = oop_recorder()->allocate_metadata_index(obj);
1791 RelocationHolder rspec = metadata_Relocation::spec(index);
1792 return AddressLiteral((address)obj, rspec);
1793 }
1794
constant_metadata_address(Metadata * obj)1795 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
1796 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1797 int index = oop_recorder()->find_index(obj);
1798 RelocationHolder rspec = metadata_Relocation::spec(index);
1799 return AddressLiteral((address)obj, rspec);
1800 }
1801
allocate_oop_address(jobject obj)1802 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
1803 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1804 int oop_index = oop_recorder()->allocate_oop_index(obj);
1805 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
1806 }
1807
constant_oop_address(jobject obj)1808 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
1809 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1810 int oop_index = oop_recorder()->find_index(obj);
1811 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
1812 }
1813
1814 // NOTE: destroys r
c2bool(Register r,Register t)1815 void MacroAssembler::c2bool(Register r, Register t) {
1816 z_lcr(t, r); // t = -r
1817 z_or(r, t); // r = -r OR r
1818 z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise.
1819 }
1820
delayed_value_impl(intptr_t * delayed_value_addr,Register tmp,int offset)1821 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
1822 Register tmp,
1823 int offset) {
1824 intptr_t value = *delayed_value_addr;
1825 if (value != 0) {
1826 return RegisterOrConstant(value + offset);
1827 }
1828
1829 BLOCK_COMMENT("delayed_value {");
1830 // Load indirectly to solve generation ordering problem.
1831 load_absolute_address(tmp, (address) delayed_value_addr); // tmp = a;
1832 z_lg(tmp, 0, tmp); // tmp = *tmp;
1833
1834 #ifdef ASSERT
1835 NearLabel L;
1836 compare64_and_branch(tmp, (intptr_t)0L, Assembler::bcondNotEqual, L);
1837 z_illtrap();
1838 bind(L);
1839 #endif
1840
1841 if (offset != 0) {
1842 z_agfi(tmp, offset); // tmp = tmp + offset;
1843 }
1844
1845 BLOCK_COMMENT("} delayed_value");
1846 return RegisterOrConstant(tmp);
1847 }
1848
1849 // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos'
1850 // and return the resulting instruction.
1851 // Dest_pos and inst_pos are 32 bit only. These parms can only designate
1852 // relative positions.
1853 // Use correct argument types. Do not pre-calculate distance.
patched_branch(address dest_pos,unsigned long inst,address inst_pos)1854 unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) {
1855 int c = 0;
1856 unsigned long patched_inst = 0;
1857 if (is_call_pcrelative_short(inst) ||
1858 is_branch_pcrelative_short(inst) ||
1859 is_branchoncount_pcrelative_short(inst) ||
1860 is_branchonindex32_pcrelative_short(inst)) {
1861 c = 1;
1862 int m = fmask(15, 0); // simm16(-1, 16, 32);
1863 int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32);
1864 patched_inst = (inst & ~m) | v;
1865 } else if (is_compareandbranch_pcrelative_short(inst)) {
1866 c = 2;
1867 long m = fmask(31, 16); // simm16(-1, 16, 48);
1868 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
1869 patched_inst = (inst & ~m) | v;
1870 } else if (is_branchonindex64_pcrelative_short(inst)) {
1871 c = 3;
1872 long m = fmask(31, 16); // simm16(-1, 16, 48);
1873 long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
1874 patched_inst = (inst & ~m) | v;
1875 } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) {
1876 c = 4;
1877 long m = fmask(31, 0); // simm32(-1, 16, 48);
1878 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
1879 patched_inst = (inst & ~m) | v;
1880 } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions.
1881 c = 5;
1882 long m = fmask(31, 0); // simm32(-1, 16, 48);
1883 long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
1884 patched_inst = (inst & ~m) | v;
1885 } else {
1886 print_dbg_msg(tty, inst, "not a relative branch", 0);
1887 dump_code_range(tty, inst_pos, 32, "not a pcrelative branch");
1888 ShouldNotReachHere();
1889 }
1890
1891 long new_off = get_pcrel_offset(patched_inst);
1892 if (new_off != (dest_pos-inst_pos)) {
1893 tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off);
1894 print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0);
1895 print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0);
1896 #ifdef LUCY_DBG
1897 VM_Version::z_SIGSEGV();
1898 #endif
1899 ShouldNotReachHere();
1900 }
1901 return patched_inst;
1902 }
1903
1904 // Only called when binding labels (share/vm/asm/assembler.cpp)
1905 // Pass arguments as intended. Do not pre-calculate distance.
pd_patch_instruction(address branch,address target)1906 void MacroAssembler::pd_patch_instruction(address branch, address target) {
1907 unsigned long stub_inst;
1908 int inst_len = get_instruction(branch, &stub_inst);
1909
1910 set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len);
1911 }
1912
1913
1914 // Extract relative address (aka offset).
1915 // inv_simm16 works for 4-byte instructions only.
1916 // compare and branch instructions are 6-byte and have a 16bit offset "in the middle".
get_pcrel_offset(unsigned long inst)1917 long MacroAssembler::get_pcrel_offset(unsigned long inst) {
1918
1919 if (MacroAssembler::is_pcrelative_short(inst)) {
1920 if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) {
1921 return RelAddr::inv_pcrel_off16(inv_simm16(inst));
1922 } else {
1923 return RelAddr::inv_pcrel_off16(inv_simm16_48(inst));
1924 }
1925 }
1926
1927 if (MacroAssembler::is_pcrelative_long(inst)) {
1928 return RelAddr::inv_pcrel_off32(inv_simm32(inst));
1929 }
1930
1931 print_dbg_msg(tty, inst, "not a pcrelative instruction", 6);
1932 #ifdef LUCY_DBG
1933 VM_Version::z_SIGSEGV();
1934 #else
1935 ShouldNotReachHere();
1936 #endif
1937 return -1;
1938 }
1939
get_pcrel_offset(address pc)1940 long MacroAssembler::get_pcrel_offset(address pc) {
1941 unsigned long inst;
1942 unsigned int len = get_instruction(pc, &inst);
1943
1944 #ifdef ASSERT
1945 long offset;
1946 if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) {
1947 offset = get_pcrel_offset(inst);
1948 } else {
1949 offset = -1;
1950 }
1951
1952 if (offset == -1) {
1953 dump_code_range(tty, pc, 32, "not a pcrelative instruction");
1954 #ifdef LUCY_DBG
1955 VM_Version::z_SIGSEGV();
1956 #else
1957 ShouldNotReachHere();
1958 #endif
1959 }
1960 return offset;
1961 #else
1962 return get_pcrel_offset(inst);
1963 #endif // ASSERT
1964 }
1965
1966 // Get target address from pc-relative instructions.
get_target_addr_pcrel(address pc)1967 address MacroAssembler::get_target_addr_pcrel(address pc) {
1968 assert(is_pcrelative_long(pc), "not a pcrelative instruction");
1969 return pc + get_pcrel_offset(pc);
1970 }
1971
1972 // Patch pc relative load address.
patch_target_addr_pcrel(address pc,address con)1973 void MacroAssembler::patch_target_addr_pcrel(address pc, address con) {
1974 unsigned long inst;
1975 // Offset is +/- 2**32 -> use long.
1976 ptrdiff_t distance = con - pc;
1977
1978 get_instruction(pc, &inst);
1979
1980 if (is_pcrelative_short(inst)) {
1981 *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required.
1982
1983 // Some extra safety net.
1984 if (!RelAddr::is_in_range_of_RelAddr16(distance)) {
1985 print_dbg_msg(tty, inst, "distance out of range (16bit)", 4);
1986 dump_code_range(tty, pc, 32, "distance out of range (16bit)");
1987 guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16");
1988 }
1989 return;
1990 }
1991
1992 if (is_pcrelative_long(inst)) {
1993 *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc);
1994
1995 // Some Extra safety net.
1996 if (!RelAddr::is_in_range_of_RelAddr32(distance)) {
1997 print_dbg_msg(tty, inst, "distance out of range (32bit)", 6);
1998 dump_code_range(tty, pc, 32, "distance out of range (32bit)");
1999 guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32");
2000 }
2001 return;
2002 }
2003
2004 guarantee(false, "not a pcrelative instruction to patch!");
2005 }
2006
2007 // "Current PC" here means the address just behind the basr instruction.
get_PC(Register result)2008 address MacroAssembler::get_PC(Register result) {
2009 z_basr(result, Z_R0); // Don't branch, just save next instruction address in result.
2010 return pc();
2011 }
2012
2013 // Get current PC + offset.
2014 // Offset given in bytes, must be even!
2015 // "Current PC" here means the address of the larl instruction plus the given offset.
get_PC(Register result,int64_t offset)2016 address MacroAssembler::get_PC(Register result, int64_t offset) {
2017 address here = pc();
2018 z_larl(result, offset/2); // Save target instruction address in result.
2019 return here + offset;
2020 }
2021
instr_size(Register size,Register pc)2022 void MacroAssembler::instr_size(Register size, Register pc) {
2023 // Extract 2 most significant bits of current instruction.
2024 z_llgc(size, Address(pc));
2025 z_srl(size, 6);
2026 // Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6.
2027 z_ahi(size, 3);
2028 z_nill(size, 6);
2029 }
2030
2031 // Resize_frame with SP(new) = SP(old) - [offset].
resize_frame_sub(Register offset,Register fp,bool load_fp)2032 void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp)
2033 {
2034 assert_different_registers(offset, fp, Z_SP);
2035 if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); }
2036
2037 z_sgr(Z_SP, offset);
2038 z_stg(fp, _z_abi(callers_sp), Z_SP);
2039 }
2040
2041 // Resize_frame with SP(new) = [newSP] + offset.
2042 // This emitter is useful if we already have calculated a pointer
2043 // into the to-be-allocated stack space, e.g. with special alignment properties,
2044 // but need some additional space, e.g. for spilling.
2045 // newSP is the pre-calculated pointer. It must not be modified.
2046 // fp holds, or is filled with, the frame pointer.
2047 // offset is the additional increment which is added to addr to form the new SP.
2048 // Note: specify a negative value to reserve more space!
2049 // load_fp == true only indicates that fp is not pre-filled with the frame pointer.
2050 // It does not guarantee that fp contains the frame pointer at the end.
resize_frame_abs_with_offset(Register newSP,Register fp,int offset,bool load_fp)2051 void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) {
2052 assert_different_registers(newSP, fp, Z_SP);
2053
2054 if (load_fp) {
2055 z_lg(fp, _z_abi(callers_sp), Z_SP);
2056 }
2057
2058 add2reg(Z_SP, offset, newSP);
2059 z_stg(fp, _z_abi(callers_sp), Z_SP);
2060 }
2061
2062 // Resize_frame with SP(new) = [newSP].
2063 // load_fp == true only indicates that fp is not pre-filled with the frame pointer.
2064 // It does not guarantee that fp contains the frame pointer at the end.
resize_frame_absolute(Register newSP,Register fp,bool load_fp)2065 void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) {
2066 assert_different_registers(newSP, fp, Z_SP);
2067
2068 if (load_fp) {
2069 z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store.
2070 }
2071
2072 z_lgr(Z_SP, newSP);
2073 if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses.
2074 z_stg(fp, _z_abi(callers_sp), newSP);
2075 } else {
2076 z_stg(fp, _z_abi(callers_sp), Z_SP);
2077 }
2078 }
2079
2080 // Resize_frame with SP(new) = SP(old) + offset.
resize_frame(RegisterOrConstant offset,Register fp,bool load_fp)2081 void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) {
2082 assert_different_registers(fp, Z_SP);
2083
2084 if (load_fp) {
2085 z_lg(fp, _z_abi(callers_sp), Z_SP);
2086 }
2087 add64(Z_SP, offset);
2088 z_stg(fp, _z_abi(callers_sp), Z_SP);
2089 }
2090
push_frame(Register bytes,Register old_sp,bool copy_sp,bool bytes_with_inverted_sign)2091 void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) {
2092 #ifdef ASSERT
2093 assert_different_registers(bytes, old_sp, Z_SP);
2094 if (!copy_sp) {
2095 z_cgr(old_sp, Z_SP);
2096 asm_assert_eq("[old_sp]!=[Z_SP]", 0x211);
2097 }
2098 #endif
2099 if (copy_sp) { z_lgr(old_sp, Z_SP); }
2100 if (bytes_with_inverted_sign) {
2101 z_agr(Z_SP, bytes);
2102 } else {
2103 z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster.
2104 }
2105 z_stg(old_sp, _z_abi(callers_sp), Z_SP);
2106 }
2107
push_frame(unsigned int bytes,Register scratch)2108 unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) {
2109 long offset = Assembler::align(bytes, frame::alignment_in_bytes);
2110 assert(offset > 0, "should push a frame with positive size, size = %ld.", offset);
2111 assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset);
2112
2113 // We must not write outside the current stack bounds (given by Z_SP).
2114 // Thus, we have to first update Z_SP and then store the previous SP as stack linkage.
2115 // We rely on Z_R0 by default to be available as scratch.
2116 z_lgr(scratch, Z_SP);
2117 add2reg(Z_SP, -offset);
2118 z_stg(scratch, _z_abi(callers_sp), Z_SP);
2119 #ifdef ASSERT
2120 // Just make sure nobody uses the value in the default scratch register.
2121 // When another register is used, the caller might rely on it containing the frame pointer.
2122 if (scratch == Z_R0) {
2123 z_iihf(scratch, 0xbaadbabe);
2124 z_iilf(scratch, 0xdeadbeef);
2125 }
2126 #endif
2127 return offset;
2128 }
2129
2130 // Push a frame of size `bytes' plus abi160 on top.
push_frame_abi160(unsigned int bytes)2131 unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) {
2132 BLOCK_COMMENT("push_frame_abi160 {");
2133 unsigned int res = push_frame(bytes + frame::z_abi_160_size);
2134 BLOCK_COMMENT("} push_frame_abi160");
2135 return res;
2136 }
2137
2138 // Pop current C frame.
pop_frame()2139 void MacroAssembler::pop_frame() {
2140 BLOCK_COMMENT("pop_frame:");
2141 Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
2142 }
2143
2144 // Pop current C frame and restore return PC register (Z_R14).
pop_frame_restore_retPC(int frame_size_in_bytes)2145 void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) {
2146 BLOCK_COMMENT("pop_frame_restore_retPC:");
2147 int retPC_offset = _z_abi16(return_pc) + frame_size_in_bytes;
2148 // If possible, pop frame by add instead of load (a penny saved is a penny got :-).
2149 if (Displacement::is_validDisp(retPC_offset)) {
2150 z_lg(Z_R14, retPC_offset, Z_SP);
2151 add2reg(Z_SP, frame_size_in_bytes);
2152 } else {
2153 add2reg(Z_SP, frame_size_in_bytes);
2154 restore_return_pc();
2155 }
2156 }
2157
call_VM_leaf_base(address entry_point,bool allow_relocation)2158 void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) {
2159 if (allow_relocation) {
2160 call_c(entry_point);
2161 } else {
2162 call_c_static(entry_point);
2163 }
2164 }
2165
call_VM_leaf_base(address entry_point)2166 void MacroAssembler::call_VM_leaf_base(address entry_point) {
2167 bool allow_relocation = true;
2168 call_VM_leaf_base(entry_point, allow_relocation);
2169 }
2170
call_VM_base(Register oop_result,Register last_java_sp,address entry_point,bool allow_relocation,bool check_exceptions)2171 void MacroAssembler::call_VM_base(Register oop_result,
2172 Register last_java_sp,
2173 address entry_point,
2174 bool allow_relocation,
2175 bool check_exceptions) { // Defaults to true.
2176 // Allow_relocation indicates, if true, that the generated code shall
2177 // be fit for code relocation or referenced data relocation. In other
2178 // words: all addresses must be considered variable. PC-relative addressing
2179 // is not possible then.
2180 // On the other hand, if (allow_relocation == false), addresses and offsets
2181 // may be considered stable, enabling us to take advantage of some PC-relative
2182 // addressing tweaks. These might improve performance and reduce code size.
2183
2184 // Determine last_java_sp register.
2185 if (!last_java_sp->is_valid()) {
2186 last_java_sp = Z_SP; // Load Z_SP as SP.
2187 }
2188
2189 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation);
2190
2191 // ARG1 must hold thread address.
2192 z_lgr(Z_ARG1, Z_thread);
2193
2194 address return_pc = NULL;
2195 if (allow_relocation) {
2196 return_pc = call_c(entry_point);
2197 } else {
2198 return_pc = call_c_static(entry_point);
2199 }
2200
2201 reset_last_Java_frame(allow_relocation);
2202
2203 // C++ interp handles this in the interpreter.
2204 check_and_handle_popframe(Z_thread);
2205 check_and_handle_earlyret(Z_thread);
2206
2207 // Check for pending exceptions.
2208 if (check_exceptions) {
2209 // Check for pending exceptions (java_thread is set upon return).
2210 load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset()));
2211
2212 // This used to conditionally jump to forward_exception however it is
2213 // possible if we relocate that the branch will not reach. So we must jump
2214 // around so we can always reach.
2215
2216 Label ok;
2217 z_bre(ok); // Bcondequal is the same as bcondZero.
2218 call_stub(StubRoutines::forward_exception_entry());
2219 bind(ok);
2220 }
2221
2222 // Get oop result if there is one and reset the value in the thread.
2223 if (oop_result->is_valid()) {
2224 get_vm_result(oop_result);
2225 }
2226
2227 _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls.
2228 }
2229
call_VM_base(Register oop_result,Register last_java_sp,address entry_point,bool check_exceptions)2230 void MacroAssembler::call_VM_base(Register oop_result,
2231 Register last_java_sp,
2232 address entry_point,
2233 bool check_exceptions) { // Defaults to true.
2234 bool allow_relocation = true;
2235 call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions);
2236 }
2237
2238 // VM calls without explicit last_java_sp.
2239
call_VM(Register oop_result,address entry_point,bool check_exceptions)2240 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
2241 // Call takes possible detour via InterpreterMacroAssembler.
2242 call_VM_base(oop_result, noreg, entry_point, true, check_exceptions);
2243 }
2244
call_VM(Register oop_result,address entry_point,Register arg_1,bool check_exceptions)2245 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
2246 // Z_ARG1 is reserved for the thread.
2247 lgr_if_needed(Z_ARG2, arg_1);
2248 call_VM(oop_result, entry_point, check_exceptions);
2249 }
2250
call_VM(Register oop_result,address entry_point,Register arg_1,Register arg_2,bool check_exceptions)2251 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
2252 // Z_ARG1 is reserved for the thread.
2253 lgr_if_needed(Z_ARG2, arg_1);
2254 assert(arg_2 != Z_ARG2, "smashed argument");
2255 lgr_if_needed(Z_ARG3, arg_2);
2256 call_VM(oop_result, entry_point, check_exceptions);
2257 }
2258
call_VM(Register oop_result,address entry_point,Register arg_1,Register arg_2,Register arg_3,bool check_exceptions)2259 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
2260 Register arg_3, bool check_exceptions) {
2261 // Z_ARG1 is reserved for the thread.
2262 lgr_if_needed(Z_ARG2, arg_1);
2263 assert(arg_2 != Z_ARG2, "smashed argument");
2264 lgr_if_needed(Z_ARG3, arg_2);
2265 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
2266 lgr_if_needed(Z_ARG4, arg_3);
2267 call_VM(oop_result, entry_point, check_exceptions);
2268 }
2269
2270 // VM static calls without explicit last_java_sp.
2271
call_VM_static(Register oop_result,address entry_point,bool check_exceptions)2272 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) {
2273 // Call takes possible detour via InterpreterMacroAssembler.
2274 call_VM_base(oop_result, noreg, entry_point, false, check_exceptions);
2275 }
2276
call_VM_static(Register oop_result,address entry_point,Register arg_1,Register arg_2,Register arg_3,bool check_exceptions)2277 void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2,
2278 Register arg_3, bool check_exceptions) {
2279 // Z_ARG1 is reserved for the thread.
2280 lgr_if_needed(Z_ARG2, arg_1);
2281 assert(arg_2 != Z_ARG2, "smashed argument");
2282 lgr_if_needed(Z_ARG3, arg_2);
2283 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
2284 lgr_if_needed(Z_ARG4, arg_3);
2285 call_VM_static(oop_result, entry_point, check_exceptions);
2286 }
2287
2288 // VM calls with explicit last_java_sp.
2289
call_VM(Register oop_result,Register last_java_sp,address entry_point,bool check_exceptions)2290 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) {
2291 // Call takes possible detour via InterpreterMacroAssembler.
2292 call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions);
2293 }
2294
call_VM(Register oop_result,Register last_java_sp,address entry_point,Register arg_1,bool check_exceptions)2295 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
2296 // Z_ARG1 is reserved for the thread.
2297 lgr_if_needed(Z_ARG2, arg_1);
2298 call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2299 }
2300
call_VM(Register oop_result,Register last_java_sp,address entry_point,Register arg_1,Register arg_2,bool check_exceptions)2301 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
2302 Register arg_2, bool check_exceptions) {
2303 // Z_ARG1 is reserved for the thread.
2304 lgr_if_needed(Z_ARG2, arg_1);
2305 assert(arg_2 != Z_ARG2, "smashed argument");
2306 lgr_if_needed(Z_ARG3, arg_2);
2307 call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2308 }
2309
call_VM(Register oop_result,Register last_java_sp,address entry_point,Register arg_1,Register arg_2,Register arg_3,bool check_exceptions)2310 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
2311 Register arg_2, Register arg_3, bool check_exceptions) {
2312 // Z_ARG1 is reserved for the thread.
2313 lgr_if_needed(Z_ARG2, arg_1);
2314 assert(arg_2 != Z_ARG2, "smashed argument");
2315 lgr_if_needed(Z_ARG3, arg_2);
2316 assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
2317 lgr_if_needed(Z_ARG4, arg_3);
2318 call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2319 }
2320
2321 // VM leaf calls.
2322
call_VM_leaf(address entry_point)2323 void MacroAssembler::call_VM_leaf(address entry_point) {
2324 // Call takes possible detour via InterpreterMacroAssembler.
2325 call_VM_leaf_base(entry_point, true);
2326 }
2327
call_VM_leaf(address entry_point,Register arg_1)2328 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
2329 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2330 call_VM_leaf(entry_point);
2331 }
2332
call_VM_leaf(address entry_point,Register arg_1,Register arg_2)2333 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
2334 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2335 assert(arg_2 != Z_ARG1, "smashed argument");
2336 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2337 call_VM_leaf(entry_point);
2338 }
2339
call_VM_leaf(address entry_point,Register arg_1,Register arg_2,Register arg_3)2340 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
2341 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2342 assert(arg_2 != Z_ARG1, "smashed argument");
2343 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2344 assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
2345 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
2346 call_VM_leaf(entry_point);
2347 }
2348
2349 // Static VM leaf calls.
2350 // Really static VM leaf calls are never patched.
2351
call_VM_leaf_static(address entry_point)2352 void MacroAssembler::call_VM_leaf_static(address entry_point) {
2353 // Call takes possible detour via InterpreterMacroAssembler.
2354 call_VM_leaf_base(entry_point, false);
2355 }
2356
call_VM_leaf_static(address entry_point,Register arg_1)2357 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) {
2358 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2359 call_VM_leaf_static(entry_point);
2360 }
2361
call_VM_leaf_static(address entry_point,Register arg_1,Register arg_2)2362 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) {
2363 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2364 assert(arg_2 != Z_ARG1, "smashed argument");
2365 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2366 call_VM_leaf_static(entry_point);
2367 }
2368
call_VM_leaf_static(address entry_point,Register arg_1,Register arg_2,Register arg_3)2369 void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
2370 if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2371 assert(arg_2 != Z_ARG1, "smashed argument");
2372 if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2373 assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
2374 if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
2375 call_VM_leaf_static(entry_point);
2376 }
2377
2378 // Don't use detour via call_c(reg).
call_c(address function_entry)2379 address MacroAssembler::call_c(address function_entry) {
2380 load_const(Z_R1, function_entry);
2381 return call(Z_R1);
2382 }
2383
2384 // Variant for really static (non-relocatable) calls which are never patched.
call_c_static(address function_entry)2385 address MacroAssembler::call_c_static(address function_entry) {
2386 load_absolute_address(Z_R1, function_entry);
2387 #if 0 // def ASSERT
2388 // Verify that call site did not move.
2389 load_const_optimized(Z_R0, function_entry);
2390 z_cgr(Z_R1, Z_R0);
2391 z_brc(bcondEqual, 3);
2392 z_illtrap(0xba);
2393 #endif
2394 return call(Z_R1);
2395 }
2396
call_c_opt(address function_entry)2397 address MacroAssembler::call_c_opt(address function_entry) {
2398 bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */);
2399 _last_calls_return_pc = success ? pc() : NULL;
2400 return _last_calls_return_pc;
2401 }
2402
2403 // Identify a call_far_patchable instruction: LARL + LG + BASR
2404 //
2405 // nop ; optionally, if required for alignment
2406 // lgrl rx,A(TOC entry) ; PC-relative access into constant pool
2407 // basr Z_R14,rx ; end of this instruction must be aligned to a word boundary
2408 //
2409 // Code pattern will eventually get patched into variant2 (see below for detection code).
2410 //
is_call_far_patchable_variant0_at(address instruction_addr)2411 bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) {
2412 address iaddr = instruction_addr;
2413
2414 // Check for the actual load instruction.
2415 if (!is_load_const_from_toc(iaddr)) { return false; }
2416 iaddr += load_const_from_toc_size();
2417
2418 // Check for the call (BASR) instruction, finally.
2419 assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch");
2420 return is_call_byregister(iaddr);
2421 }
2422
2423 // Identify a call_far_patchable instruction: BRASL
2424 //
2425 // Code pattern to suits atomic patching:
2426 // nop ; Optionally, if required for alignment.
2427 // nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer).
2428 // nop ; For code pattern detection: Prepend each BRASL with a nop.
2429 // brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned !
is_call_far_patchable_variant2_at(address instruction_addr)2430 bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) {
2431 const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size());
2432
2433 // Check for correct number of leading nops.
2434 address iaddr;
2435 for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) {
2436 if (!is_z_nop(iaddr)) { return false; }
2437 }
2438 assert(iaddr == call_addr, "sanity");
2439
2440 // --> Check for call instruction.
2441 if (is_call_far_pcrelative(call_addr)) {
2442 assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch");
2443 return true;
2444 }
2445
2446 return false;
2447 }
2448
2449 // Emit a NOT mt-safely patchable 64 bit absolute call.
2450 // If toc_offset == -2, then the destination of the call (= target) is emitted
2451 // to the constant pool and a runtime_call relocation is added
2452 // to the code buffer.
2453 // If toc_offset != -2, target must already be in the constant pool at
2454 // _ctableStart+toc_offset (a caller can retrieve toc_offset
2455 // from the runtime_call relocation).
2456 // Special handling of emitting to scratch buffer when there is no constant pool.
2457 // Slightly changed code pattern. We emit an additional nop if we would
2458 // not end emitting at a word aligned address. This is to ensure
2459 // an atomically patchable displacement in brasl instructions.
2460 //
2461 // A call_far_patchable comes in different flavors:
2462 // - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register)
2463 // - LGRL(CP) / BR (address in constant pool, pc-relative accesss)
2464 // - BRASL (relative address of call target coded in instruction)
2465 // All flavors occupy the same amount of space. Length differences are compensated
2466 // by leading nops, such that the instruction sequence always ends at the same
2467 // byte offset. This is required to keep the return offset constant.
2468 // Furthermore, the return address (the end of the instruction sequence) is forced
2469 // to be on a 4-byte boundary. This is required for atomic patching, should we ever
2470 // need to patch the call target of the BRASL flavor.
2471 // RETURN value: false, if no constant pool entry could be allocated, true otherwise.
call_far_patchable(address target,int64_t tocOffset)2472 bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) {
2473 // Get current pc and ensure word alignment for end of instr sequence.
2474 const address start_pc = pc();
2475 const intptr_t start_off = offset();
2476 assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address");
2477 const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop.
2478 const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit();
2479 const bool emit_relative_call = !emit_target_to_pool &&
2480 RelAddr::is_in_range_of_RelAddr32(dist) &&
2481 ReoptimizeCallSequences &&
2482 !code_section()->scratch_emit();
2483
2484 if (emit_relative_call) {
2485 // Add padding to get the same size as below.
2486 const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size();
2487 unsigned int current_padding;
2488 for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); }
2489 assert(current_padding == padding, "sanity");
2490
2491 // relative call: len = 2(nop) + 6 (brasl)
2492 // CodeBlob resize cannot occur in this case because
2493 // this call is emitted into pre-existing space.
2494 z_nop(); // Prepend each BRASL with a nop.
2495 z_brasl(Z_R14, target);
2496 } else {
2497 // absolute call: Get address from TOC.
2498 // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8}
2499 if (emit_target_to_pool) {
2500 // When emitting the call for the first time, we do not need to use
2501 // the pc-relative version. It will be patched anyway, when the code
2502 // buffer is copied.
2503 // Relocation is not needed when !ReoptimizeCallSequences.
2504 relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none;
2505 AddressLiteral dest(target, rt);
2506 // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills
2507 // inst_mark(). Reset if possible.
2508 bool reset_mark = (inst_mark() == pc());
2509 tocOffset = store_oop_in_toc(dest);
2510 if (reset_mark) { set_inst_mark(); }
2511 if (tocOffset == -1) {
2512 return false; // Couldn't create constant pool entry.
2513 }
2514 }
2515 assert(offset() == start_off, "emit no code before this point!");
2516
2517 address tocPos = pc() + tocOffset;
2518 if (emit_target_to_pool) {
2519 tocPos = code()->consts()->start() + tocOffset;
2520 }
2521 load_long_pcrelative(Z_R14, tocPos);
2522 z_basr(Z_R14, Z_R14);
2523 }
2524
2525 #ifdef ASSERT
2526 // Assert that we can identify the emitted call.
2527 assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call");
2528 assert(offset() == start_off+call_far_patchable_size(), "wrong size");
2529
2530 if (emit_target_to_pool) {
2531 assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target,
2532 "wrong encoding of dest address");
2533 }
2534 #endif
2535 return true; // success
2536 }
2537
2538 // Identify a call_far_patchable instruction.
2539 // For more detailed information see header comment of call_far_patchable.
is_call_far_patchable_at(address instruction_addr)2540 bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) {
2541 return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL
2542 is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR
2543 }
2544
2545 // Does the call_far_patchable instruction use a pc-relative encoding
2546 // of the call destination?
is_call_far_patchable_pcrelative_at(address instruction_addr)2547 bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) {
2548 // Variant 2 is pc-relative.
2549 return is_call_far_patchable_variant2_at(instruction_addr);
2550 }
2551
is_call_far_pcrelative(address instruction_addr)2552 bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) {
2553 // Prepend each BRASL with a nop.
2554 return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required.
2555 }
2556
2557 // Set destination address of a call_far_patchable instruction.
set_dest_of_call_far_patchable_at(address instruction_addr,address dest,int64_t tocOffset)2558 void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) {
2559 ResourceMark rm;
2560
2561 // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit).
2562 int code_size = MacroAssembler::call_far_patchable_size();
2563 CodeBuffer buf(instruction_addr, code_size);
2564 MacroAssembler masm(&buf);
2565 masm.call_far_patchable(dest, tocOffset);
2566 ICache::invalidate_range(instruction_addr, code_size); // Empty on z.
2567 }
2568
2569 // Get dest address of a call_far_patchable instruction.
get_dest_of_call_far_patchable_at(address instruction_addr,address ctable)2570 address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) {
2571 // Dynamic TOC: absolute address in constant pool.
2572 // Check variant2 first, it is more frequent.
2573
2574 // Relative address encoded in call instruction.
2575 if (is_call_far_patchable_variant2_at(instruction_addr)) {
2576 return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop.
2577
2578 // Absolute address in constant pool.
2579 } else if (is_call_far_patchable_variant0_at(instruction_addr)) {
2580 address iaddr = instruction_addr;
2581
2582 long tocOffset = get_load_const_from_toc_offset(iaddr);
2583 address tocLoc = iaddr + tocOffset;
2584 return *(address *)(tocLoc);
2585 } else {
2586 fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr);
2587 fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n",
2588 *(unsigned long*)instruction_addr,
2589 *(unsigned long*)(instruction_addr+8),
2590 call_far_patchable_size());
2591 Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size());
2592 ShouldNotReachHere();
2593 return NULL;
2594 }
2595 }
2596
align_call_far_patchable(address pc)2597 void MacroAssembler::align_call_far_patchable(address pc) {
2598 if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); }
2599 }
2600
check_and_handle_earlyret(Register java_thread)2601 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
2602 }
2603
check_and_handle_popframe(Register java_thread)2604 void MacroAssembler::check_and_handle_popframe(Register java_thread) {
2605 }
2606
2607 // Read from the polling page.
2608 // Use TM or TMY instruction, depending on read offset.
2609 // offset = 0: Use TM, safepoint polling.
2610 // offset < 0: Use TMY, profiling safepoint polling.
load_from_polling_page(Register polling_page_address,int64_t offset)2611 void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) {
2612 if (Immediate::is_uimm12(offset)) {
2613 z_tm(offset, polling_page_address, mask_safepoint);
2614 } else {
2615 z_tmy(offset, polling_page_address, mask_profiling);
2616 }
2617 }
2618
2619 // Check whether z_instruction is a read access to the polling page
2620 // which was emitted by load_from_polling_page(..).
is_load_from_polling_page(address instr_loc)2621 bool MacroAssembler::is_load_from_polling_page(address instr_loc) {
2622 unsigned long z_instruction;
2623 unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2624
2625 if (ilen == 2) { return false; } // It's none of the allowed instructions.
2626
2627 if (ilen == 4) {
2628 if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail.
2629
2630 int ms = inv_mask(z_instruction,8,32); // mask
2631 int ra = inv_reg(z_instruction,16,32); // base register
2632 int ds = inv_uimm12(z_instruction); // displacement
2633
2634 if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) {
2635 return false; // It's not a z_tm(0, ra, mask_safepoint). Fail.
2636 }
2637
2638 } else { /* if (ilen == 6) */
2639
2640 assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y).");
2641
2642 if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail.
2643
2644 int ms = inv_mask(z_instruction,8,48); // mask
2645 int ra = inv_reg(z_instruction,16,48); // base register
2646 int ds = inv_simm20(z_instruction); // displacement
2647 }
2648
2649 return true;
2650 }
2651
2652 // Extract poll address from instruction and ucontext.
get_poll_address(address instr_loc,void * ucontext)2653 address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) {
2654 assert(ucontext != NULL, "must have ucontext");
2655 ucontext_t* uc = (ucontext_t*) ucontext;
2656 unsigned long z_instruction;
2657 unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2658
2659 if (ilen == 4 && is_z_tm(z_instruction)) {
2660 int ra = inv_reg(z_instruction, 16, 32); // base register
2661 int ds = inv_uimm12(z_instruction); // displacement
2662 address addr = (address)uc->uc_mcontext.gregs[ra];
2663 return addr + ds;
2664 } else if (ilen == 6 && is_z_tmy(z_instruction)) {
2665 int ra = inv_reg(z_instruction, 16, 48); // base register
2666 int ds = inv_simm20(z_instruction); // displacement
2667 address addr = (address)uc->uc_mcontext.gregs[ra];
2668 return addr + ds;
2669 }
2670
2671 ShouldNotReachHere();
2672 return NULL;
2673 }
2674
2675 // Extract poll register from instruction.
get_poll_register(address instr_loc)2676 uint MacroAssembler::get_poll_register(address instr_loc) {
2677 unsigned long z_instruction;
2678 unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2679
2680 if (ilen == 4 && is_z_tm(z_instruction)) {
2681 return (uint)inv_reg(z_instruction, 16, 32); // base register
2682 } else if (ilen == 6 && is_z_tmy(z_instruction)) {
2683 return (uint)inv_reg(z_instruction, 16, 48); // base register
2684 }
2685
2686 ShouldNotReachHere();
2687 return 0;
2688 }
2689
is_memory_serialization(int instruction,JavaThread * thread,void * ucontext)2690 bool MacroAssembler::is_memory_serialization(int instruction, JavaThread* thread, void* ucontext) {
2691 ShouldNotCallThis();
2692 return false;
2693 }
2694
2695 // Write serialization page so VM thread can do a pseudo remote membar
2696 // We use the current thread pointer to calculate a thread specific
2697 // offset to write to within the page. This minimizes bus traffic
2698 // due to cache line collision.
serialize_memory(Register thread,Register tmp1,Register tmp2)2699 void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
2700 assert_different_registers(tmp1, tmp2);
2701 z_sllg(tmp2, thread, os::get_serialize_page_shift_count());
2702 load_const_optimized(tmp1, (long) os::get_memory_serialize_page());
2703
2704 int mask = os::get_serialize_page_mask();
2705 if (Immediate::is_uimm16(mask)) {
2706 z_nill(tmp2, mask);
2707 z_llghr(tmp2, tmp2);
2708 } else {
2709 z_nilf(tmp2, mask);
2710 z_llgfr(tmp2, tmp2);
2711 }
2712
2713 z_release();
2714 z_st(Z_R0, 0, tmp2, tmp1);
2715 }
2716
safepoint_poll(Label & slow_path,Register temp_reg)2717 void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
2718 if (SafepointMechanism::uses_thread_local_poll()) {
2719 const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
2720 // Armed page has poll_bit set.
2721 z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
2722 z_brnaz(slow_path);
2723 } else {
2724 load_const_optimized(temp_reg, SafepointSynchronize::address_of_state());
2725 z_cli(/*SafepointSynchronize::sz_state()*/4-1, temp_reg, SafepointSynchronize::_not_synchronized);
2726 z_brne(slow_path);
2727 }
2728 }
2729
2730 // Don't rely on register locking, always use Z_R1 as scratch register instead.
bang_stack_with_offset(int offset)2731 void MacroAssembler::bang_stack_with_offset(int offset) {
2732 // Stack grows down, caller passes positive offset.
2733 assert(offset > 0, "must bang with positive offset");
2734 if (Displacement::is_validDisp(-offset)) {
2735 z_tmy(-offset, Z_SP, mask_stackbang);
2736 } else {
2737 add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!!
2738 z_tm(0, Z_R1, mask_stackbang); // Just banging.
2739 }
2740 }
2741
reserved_stack_check(Register return_pc)2742 void MacroAssembler::reserved_stack_check(Register return_pc) {
2743 // Test if reserved zone needs to be enabled.
2744 Label no_reserved_zone_enabling;
2745 assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub.");
2746 BLOCK_COMMENT("reserved_stack_check {");
2747
2748 z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset()));
2749 z_brl(no_reserved_zone_enabling);
2750
2751 // Enable reserved zone again, throw stack overflow exception.
2752 save_return_pc();
2753 push_frame_abi160(0);
2754 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread);
2755 pop_frame();
2756 restore_return_pc();
2757
2758 load_const_optimized(Z_R1, StubRoutines::throw_delayed_StackOverflowError_entry());
2759 // Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc.
2760 z_br(Z_R1);
2761
2762 should_not_reach_here();
2763
2764 bind(no_reserved_zone_enabling);
2765 BLOCK_COMMENT("} reserved_stack_check");
2766 }
2767
2768 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
tlab_allocate(Register obj,Register var_size_in_bytes,int con_size_in_bytes,Register t1,Label & slow_case)2769 void MacroAssembler::tlab_allocate(Register obj,
2770 Register var_size_in_bytes,
2771 int con_size_in_bytes,
2772 Register t1,
2773 Label& slow_case) {
2774 assert_different_registers(obj, var_size_in_bytes, t1);
2775 Register end = t1;
2776 Register thread = Z_thread;
2777
2778 z_lg(obj, Address(thread, JavaThread::tlab_top_offset()));
2779 if (var_size_in_bytes == noreg) {
2780 z_lay(end, Address(obj, con_size_in_bytes));
2781 } else {
2782 z_lay(end, Address(obj, var_size_in_bytes));
2783 }
2784 z_cg(end, Address(thread, JavaThread::tlab_end_offset()));
2785 branch_optimized(bcondHigh, slow_case);
2786
2787 // Update the tlab top pointer.
2788 z_stg(end, Address(thread, JavaThread::tlab_top_offset()));
2789
2790 // Recover var_size_in_bytes if necessary.
2791 if (var_size_in_bytes == end) {
2792 z_sgr(var_size_in_bytes, obj);
2793 }
2794 }
2795
2796 // Emitter for interface method lookup.
2797 // input: recv_klass, intf_klass, itable_index
2798 // output: method_result
2799 // kills: itable_index, temp1_reg, Z_R0, Z_R1
2800 // TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs.
2801 // If the register is still not needed then, remove it.
lookup_interface_method(Register recv_klass,Register intf_klass,RegisterOrConstant itable_index,Register method_result,Register temp1_reg,Label & no_such_interface,bool return_method)2802 void MacroAssembler::lookup_interface_method(Register recv_klass,
2803 Register intf_klass,
2804 RegisterOrConstant itable_index,
2805 Register method_result,
2806 Register temp1_reg,
2807 Label& no_such_interface,
2808 bool return_method) {
2809
2810 const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr.
2811 const Register itable_entry_addr = Z_R1_scratch;
2812 const Register itable_interface = Z_R0_scratch;
2813
2814 BLOCK_COMMENT("lookup_interface_method {");
2815
2816 // Load start of itable entries into itable_entry_addr.
2817 z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset()));
2818 z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes()));
2819
2820 // Loop over all itable entries until desired interfaceOop(Rinterface) found.
2821 const int vtable_base_offset = in_bytes(Klass::vtable_start_offset());
2822
2823 add2reg_with_index(itable_entry_addr,
2824 vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(),
2825 recv_klass, vtable_len);
2826
2827 const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize;
2828 Label search;
2829
2830 bind(search);
2831
2832 // Handle IncompatibleClassChangeError.
2833 // If the entry is NULL then we've reached the end of the table
2834 // without finding the expected interface, so throw an exception.
2835 load_and_test_long(itable_interface, Address(itable_entry_addr));
2836 z_bre(no_such_interface);
2837
2838 add2reg(itable_entry_addr, itable_offset_search_inc);
2839 z_cgr(itable_interface, intf_klass);
2840 z_brne(search);
2841
2842 // Entry found and itable_entry_addr points to it, get offset of vtable for interface.
2843 if (return_method) {
2844 const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() -
2845 itableOffsetEntry::interface_offset_in_bytes()) -
2846 itable_offset_search_inc;
2847
2848 // Compute itableMethodEntry and get method and entry point
2849 // we use addressing with index and displacement, since the formula
2850 // for computing the entry's offset has a fixed and a dynamic part,
2851 // the latter depending on the matched interface entry and on the case,
2852 // that the itable index has been passed as a register, not a constant value.
2853 int method_offset = itableMethodEntry::method_offset_in_bytes();
2854 // Fixed part (displacement), common operand.
2855 Register itable_offset = method_result; // Dynamic part (index register).
2856
2857 if (itable_index.is_register()) {
2858 // Compute the method's offset in that register, for the formula, see the
2859 // else-clause below.
2860 z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize));
2861 z_agf(itable_offset, vtable_offset_offset, itable_entry_addr);
2862 } else {
2863 // Displacement increases.
2864 method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant();
2865
2866 // Load index from itable.
2867 z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr);
2868 }
2869
2870 // Finally load the method's oop.
2871 z_lg(method_result, method_offset, itable_offset, recv_klass);
2872 }
2873 BLOCK_COMMENT("} lookup_interface_method");
2874 }
2875
2876 // Lookup for virtual method invocation.
lookup_virtual_method(Register recv_klass,RegisterOrConstant vtable_index,Register method_result)2877 void MacroAssembler::lookup_virtual_method(Register recv_klass,
2878 RegisterOrConstant vtable_index,
2879 Register method_result) {
2880 assert_different_registers(recv_klass, vtable_index.register_or_noreg());
2881 assert(vtableEntry::size() * wordSize == wordSize,
2882 "else adjust the scaling in the code below");
2883
2884 BLOCK_COMMENT("lookup_virtual_method {");
2885
2886 const int base = in_bytes(Klass::vtable_start_offset());
2887
2888 if (vtable_index.is_constant()) {
2889 // Load with base + disp.
2890 Address vtable_entry_addr(recv_klass,
2891 vtable_index.as_constant() * wordSize +
2892 base +
2893 vtableEntry::method_offset_in_bytes());
2894
2895 z_lg(method_result, vtable_entry_addr);
2896 } else {
2897 // Shift index properly and load with base + index + disp.
2898 Register vindex = vtable_index.as_register();
2899 Address vtable_entry_addr(recv_klass, vindex,
2900 base + vtableEntry::method_offset_in_bytes());
2901
2902 z_sllg(vindex, vindex, exact_log2(wordSize));
2903 z_lg(method_result, vtable_entry_addr);
2904 }
2905 BLOCK_COMMENT("} lookup_virtual_method");
2906 }
2907
2908 // Factor out code to call ic_miss_handler.
2909 // Generate code to call the inline cache miss handler.
2910 //
2911 // In most cases, this code will be generated out-of-line.
2912 // The method parameters are intended to provide some variability.
2913 // ICM - Label which has to be bound to the start of useful code (past any traps).
2914 // trapMarker - Marking byte for the generated illtrap instructions (if any).
2915 // Any value except 0x00 is supported.
2916 // = 0x00 - do not generate illtrap instructions.
2917 // use nops to fill ununsed space.
2918 // requiredSize - required size of the generated code. If the actually
2919 // generated code is smaller, use padding instructions to fill up.
2920 // = 0 - no size requirement, no padding.
2921 // scratch - scratch register to hold branch target address.
2922 //
2923 // The method returns the code offset of the bound label.
call_ic_miss_handler(Label & ICM,int trapMarker,int requiredSize,Register scratch)2924 unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) {
2925 intptr_t startOffset = offset();
2926
2927 // Prevent entry at content_begin().
2928 if (trapMarker != 0) {
2929 z_illtrap(trapMarker);
2930 }
2931
2932 // Load address of inline cache miss code into scratch register
2933 // and branch to cache miss handler.
2934 BLOCK_COMMENT("IC miss handler {");
2935 BIND(ICM);
2936 unsigned int labelOffset = offset();
2937 AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub());
2938
2939 load_const_optimized(scratch, icmiss);
2940 z_br(scratch);
2941
2942 // Fill unused space.
2943 if (requiredSize > 0) {
2944 while ((offset() - startOffset) < requiredSize) {
2945 if (trapMarker == 0) {
2946 z_nop();
2947 } else {
2948 z_illtrap(trapMarker);
2949 }
2950 }
2951 }
2952 BLOCK_COMMENT("} IC miss handler");
2953 return labelOffset;
2954 }
2955
nmethod_UEP(Label & ic_miss)2956 void MacroAssembler::nmethod_UEP(Label& ic_miss) {
2957 Register ic_reg = Z_inline_cache;
2958 int klass_offset = oopDesc::klass_offset_in_bytes();
2959 if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2960 if (VM_Version::has_CompareBranch()) {
2961 z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss);
2962 } else {
2963 z_ltgr(Z_ARG1, Z_ARG1);
2964 z_bre(ic_miss);
2965 }
2966 }
2967 // Compare cached class against klass from receiver.
2968 compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false);
2969 z_brne(ic_miss);
2970 }
2971
check_klass_subtype_fast_path(Register sub_klass,Register super_klass,Register temp1_reg,Label * L_success,Label * L_failure,Label * L_slow_path,RegisterOrConstant super_check_offset)2972 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
2973 Register super_klass,
2974 Register temp1_reg,
2975 Label* L_success,
2976 Label* L_failure,
2977 Label* L_slow_path,
2978 RegisterOrConstant super_check_offset) {
2979
2980 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2981 const int sco_offset = in_bytes(Klass::super_check_offset_offset());
2982
2983 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
2984 bool need_slow_path = (must_load_sco ||
2985 super_check_offset.constant_or_zero() == sc_offset);
2986
2987 // Input registers must not overlap.
2988 assert_different_registers(sub_klass, super_klass, temp1_reg);
2989 if (super_check_offset.is_register()) {
2990 assert_different_registers(sub_klass, super_klass,
2991 super_check_offset.as_register());
2992 } else if (must_load_sco) {
2993 assert(temp1_reg != noreg, "supply either a temp or a register offset");
2994 }
2995
2996 const Register Rsuper_check_offset = temp1_reg;
2997
2998 NearLabel L_fallthrough;
2999 int label_nulls = 0;
3000 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3001 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3002 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
3003 assert(label_nulls <= 1 ||
3004 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
3005 "at most one NULL in the batch, usually");
3006
3007 BLOCK_COMMENT("check_klass_subtype_fast_path {");
3008 // If the pointers are equal, we are done (e.g., String[] elements).
3009 // This self-check enables sharing of secondary supertype arrays among
3010 // non-primary types such as array-of-interface. Otherwise, each such
3011 // type would need its own customized SSA.
3012 // We move this check to the front of the fast path because many
3013 // type checks are in fact trivially successful in this manner,
3014 // so we get a nicely predicted branch right at the start of the check.
3015 compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success);
3016
3017 // Check the supertype display, which is uint.
3018 if (must_load_sco) {
3019 z_llgf(Rsuper_check_offset, sco_offset, super_klass);
3020 super_check_offset = RegisterOrConstant(Rsuper_check_offset);
3021 }
3022 Address super_check_addr(sub_klass, super_check_offset, 0);
3023 z_cg(super_klass, super_check_addr); // compare w/ displayed supertype
3024
3025 // This check has worked decisively for primary supers.
3026 // Secondary supers are sought in the super_cache ('super_cache_addr').
3027 // (Secondary supers are interfaces and very deeply nested subtypes.)
3028 // This works in the same check above because of a tricky aliasing
3029 // between the super_cache and the primary super display elements.
3030 // (The 'super_check_addr' can address either, as the case requires.)
3031 // Note that the cache is updated below if it does not help us find
3032 // what we need immediately.
3033 // So if it was a primary super, we can just fail immediately.
3034 // Otherwise, it's the slow path for us (no success at this point).
3035
3036 // Hacked jmp, which may only be used just before L_fallthrough.
3037 #define final_jmp(label) \
3038 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
3039 else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/
3040
3041 if (super_check_offset.is_register()) {
3042 branch_optimized(Assembler::bcondEqual, *L_success);
3043 z_cfi(super_check_offset.as_register(), sc_offset);
3044 if (L_failure == &L_fallthrough) {
3045 branch_optimized(Assembler::bcondEqual, *L_slow_path);
3046 } else {
3047 branch_optimized(Assembler::bcondNotEqual, *L_failure);
3048 final_jmp(*L_slow_path);
3049 }
3050 } else if (super_check_offset.as_constant() == sc_offset) {
3051 // Need a slow path; fast failure is impossible.
3052 if (L_slow_path == &L_fallthrough) {
3053 branch_optimized(Assembler::bcondEqual, *L_success);
3054 } else {
3055 branch_optimized(Assembler::bcondNotEqual, *L_slow_path);
3056 final_jmp(*L_success);
3057 }
3058 } else {
3059 // No slow path; it's a fast decision.
3060 if (L_failure == &L_fallthrough) {
3061 branch_optimized(Assembler::bcondEqual, *L_success);
3062 } else {
3063 branch_optimized(Assembler::bcondNotEqual, *L_failure);
3064 final_jmp(*L_success);
3065 }
3066 }
3067
3068 bind(L_fallthrough);
3069 #undef local_brc
3070 #undef final_jmp
3071 BLOCK_COMMENT("} check_klass_subtype_fast_path");
3072 // fallthru (to slow path)
3073 }
3074
check_klass_subtype_slow_path(Register Rsubklass,Register Rsuperklass,Register Rarray_ptr,Register Rlength,Label * L_success,Label * L_failure)3075 void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
3076 Register Rsuperklass,
3077 Register Rarray_ptr, // tmp
3078 Register Rlength, // tmp
3079 Label* L_success,
3080 Label* L_failure) {
3081 // Input registers must not overlap.
3082 // Also check for R1 which is explicitely used here.
3083 assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength);
3084 NearLabel L_fallthrough, L_loop;
3085 int label_nulls = 0;
3086 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3087 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3088 assert(label_nulls <= 1, "at most one NULL in the batch");
3089
3090 const int ss_offset = in_bytes(Klass::secondary_supers_offset());
3091 const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3092
3093 const int length_offset = Array<Klass*>::length_offset_in_bytes();
3094 const int base_offset = Array<Klass*>::base_offset_in_bytes();
3095
3096 // Hacked jmp, which may only be used just before L_fallthrough.
3097 #define final_jmp(label) \
3098 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
3099 else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/
3100
3101 NearLabel loop_iterate, loop_count, match;
3102
3103 BLOCK_COMMENT("check_klass_subtype_slow_path {");
3104 z_lg(Rarray_ptr, ss_offset, Rsubklass);
3105
3106 load_and_test_int(Rlength, Address(Rarray_ptr, length_offset));
3107 branch_optimized(Assembler::bcondZero, *L_failure);
3108
3109 // Oops in table are NO MORE compressed.
3110 z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match.
3111 z_bre(match); // Shortcut for array length = 1.
3112
3113 // No match yet, so we must walk the array's elements.
3114 z_lngfr(Rlength, Rlength);
3115 z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array
3116 z_llill(Z_R1, BytesPerWord); // Set increment/end index.
3117 add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord
3118 z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord
3119 z_bru(loop_count);
3120
3121 BIND(loop_iterate);
3122 z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match.
3123 z_bre(match);
3124 BIND(loop_count);
3125 z_brxlg(Rlength, Z_R1, loop_iterate);
3126
3127 // Rsuperklass not found among secondary super classes -> failure.
3128 branch_optimized(Assembler::bcondAlways, *L_failure);
3129
3130 // Got a hit. Return success (zero result). Set cache.
3131 // Cache load doesn't happen here. For speed it is directly emitted by the compiler.
3132
3133 BIND(match);
3134
3135 z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
3136
3137 final_jmp(*L_success);
3138
3139 // Exit to the surrounding code.
3140 BIND(L_fallthrough);
3141 #undef local_brc
3142 #undef final_jmp
3143 BLOCK_COMMENT("} check_klass_subtype_slow_path");
3144 }
3145
3146 // Emitter for combining fast and slow path.
check_klass_subtype(Register sub_klass,Register super_klass,Register temp1_reg,Register temp2_reg,Label & L_success)3147 void MacroAssembler::check_klass_subtype(Register sub_klass,
3148 Register super_klass,
3149 Register temp1_reg,
3150 Register temp2_reg,
3151 Label& L_success) {
3152 NearLabel failure;
3153 BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name()));
3154 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg,
3155 &L_success, &failure, NULL);
3156 check_klass_subtype_slow_path(sub_klass, super_klass,
3157 temp1_reg, temp2_reg, &L_success, NULL);
3158 BIND(failure);
3159 BLOCK_COMMENT("} check_klass_subtype");
3160 }
3161
3162 // Increment a counter at counter_address when the eq condition code is
3163 // set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code.
increment_counter_eq(address counter_address,Register tmp1_reg,Register tmp2_reg)3164 void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) {
3165 Label l;
3166 z_brne(l);
3167 load_const(tmp1_reg, counter_address);
3168 add2mem_32(Address(tmp1_reg), 1, tmp2_reg);
3169 z_cr(tmp1_reg, tmp1_reg); // Set cc to eq.
3170 bind(l);
3171 }
3172
3173 // Semantics are dependent on the slow_case label:
3174 // If the slow_case label is not NULL, failure to biased-lock the object
3175 // transfers control to the location of the slow_case label. If the
3176 // object could be biased-locked, control is transferred to the done label.
3177 // The condition code is unpredictable.
3178 //
3179 // If the slow_case label is NULL, failure to biased-lock the object results
3180 // in a transfer of control to the done label with a condition code of not_equal.
3181 // If the biased-lock could be successfully obtained, control is transfered to
3182 // the done label with a condition code of equal.
3183 // It is mandatory to react on the condition code At the done label.
3184 //
biased_locking_enter(Register obj_reg,Register mark_reg,Register temp_reg,Register temp2_reg,Label & done,Label * slow_case)3185 void MacroAssembler::biased_locking_enter(Register obj_reg,
3186 Register mark_reg,
3187 Register temp_reg,
3188 Register temp2_reg, // May be Z_RO!
3189 Label &done,
3190 Label *slow_case) {
3191 assert(UseBiasedLocking, "why call this otherwise?");
3192 assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg);
3193
3194 Label cas_label; // Try, if implemented, CAS locking. Fall thru to slow path otherwise.
3195
3196 BLOCK_COMMENT("biased_locking_enter {");
3197
3198 // Biased locking
3199 // See whether the lock is currently biased toward our thread and
3200 // whether the epoch is still valid.
3201 // Note that the runtime guarantees sufficient alignment of JavaThread
3202 // pointers to allow age to be placed into low bits.
3203 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits,
3204 "biased locking makes assumptions about bit layout");
3205 z_lr(temp_reg, mark_reg);
3206 z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place);
3207 z_chi(temp_reg, markOopDesc::biased_lock_pattern);
3208 z_brne(cas_label); // Try cas if object is not biased, i.e. cannot be biased locked.
3209
3210 load_prototype_header(temp_reg, obj_reg);
3211 load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place));
3212
3213 z_ogr(temp_reg, Z_thread);
3214 z_xgr(temp_reg, mark_reg);
3215 z_ngr(temp_reg, temp2_reg);
3216 if (PrintBiasedLockingStatistics) {
3217 increment_counter_eq((address) BiasedLocking::biased_lock_entry_count_addr(), mark_reg, temp2_reg);
3218 // Restore mark_reg.
3219 z_lg(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
3220 }
3221 branch_optimized(Assembler::bcondEqual, done); // Biased lock obtained, return success.
3222
3223 Label try_revoke_bias;
3224 Label try_rebias;
3225 Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
3226
3227 //----------------------------------------------------------------------------
3228 // At this point we know that the header has the bias pattern and
3229 // that we are not the bias owner in the current epoch. We need to
3230 // figure out more details about the state of the header in order to
3231 // know what operations can be legally performed on the object's
3232 // header.
3233
3234 // If the low three bits in the xor result aren't clear, that means
3235 // the prototype header is no longer biased and we have to revoke
3236 // the bias on this object.
3237 z_tmll(temp_reg, markOopDesc::biased_lock_mask_in_place);
3238 z_brnaz(try_revoke_bias);
3239
3240 // Biasing is still enabled for this data type. See whether the
3241 // epoch of the current bias is still valid, meaning that the epoch
3242 // bits of the mark word are equal to the epoch bits of the
3243 // prototype header. (Note that the prototype header's epoch bits
3244 // only change at a safepoint.) If not, attempt to rebias the object
3245 // toward the current thread. Note that we must be absolutely sure
3246 // that the current epoch is invalid in order to do this because
3247 // otherwise the manipulations it performs on the mark word are
3248 // illegal.
3249 z_tmll(temp_reg, markOopDesc::epoch_mask_in_place);
3250 z_brnaz(try_rebias);
3251
3252 //----------------------------------------------------------------------------
3253 // The epoch of the current bias is still valid but we know nothing
3254 // about the owner; it might be set or it might be clear. Try to
3255 // acquire the bias of the object using an atomic operation. If this
3256 // fails we will go in to the runtime to revoke the object's bias.
3257 // Note that we first construct the presumed unbiased header so we
3258 // don't accidentally blow away another thread's valid bias.
3259 z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place |
3260 markOopDesc::epoch_mask_in_place);
3261 z_lgr(temp_reg, Z_thread);
3262 z_llgfr(mark_reg, mark_reg);
3263 z_ogr(temp_reg, mark_reg);
3264
3265 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3266
3267 z_csg(mark_reg, temp_reg, 0, obj_reg);
3268
3269 // If the biasing toward our thread failed, this means that
3270 // another thread succeeded in biasing it toward itself and we
3271 // need to revoke that bias. The revocation will occur in the
3272 // interpreter runtime in the slow case.
3273
3274 if (PrintBiasedLockingStatistics) {
3275 increment_counter_eq((address) BiasedLocking::anonymously_biased_lock_entry_count_addr(),
3276 temp_reg, temp2_reg);
3277 }
3278 if (slow_case != NULL) {
3279 branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way.
3280 }
3281 branch_optimized(Assembler::bcondAlways, done); // Biased lock status given in condition code.
3282
3283 //----------------------------------------------------------------------------
3284 bind(try_rebias);
3285 // At this point we know the epoch has expired, meaning that the
3286 // current "bias owner", if any, is actually invalid. Under these
3287 // circumstances _only_, we are allowed to use the current header's
3288 // value as the comparison value when doing the cas to acquire the
3289 // bias in the current epoch. In other words, we allow transfer of
3290 // the bias from one thread to another directly in this situation.
3291
3292 z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
3293 load_prototype_header(temp_reg, obj_reg);
3294 z_llgfr(mark_reg, mark_reg);
3295
3296 z_ogr(temp_reg, Z_thread);
3297
3298 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3299
3300 z_csg(mark_reg, temp_reg, 0, obj_reg);
3301
3302 // If the biasing toward our thread failed, this means that
3303 // another thread succeeded in biasing it toward itself and we
3304 // need to revoke that bias. The revocation will occur in the
3305 // interpreter runtime in the slow case.
3306
3307 if (PrintBiasedLockingStatistics) {
3308 increment_counter_eq((address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg, temp2_reg);
3309 }
3310 if (slow_case != NULL) {
3311 branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way.
3312 }
3313 z_bru(done); // Biased lock status given in condition code.
3314
3315 //----------------------------------------------------------------------------
3316 bind(try_revoke_bias);
3317 // The prototype mark in the klass doesn't have the bias bit set any
3318 // more, indicating that objects of this data type are not supposed
3319 // to be biased any more. We are going to try to reset the mark of
3320 // this object to the prototype value and fall through to the
3321 // CAS-based locking scheme. Note that if our CAS fails, it means
3322 // that another thread raced us for the privilege of revoking the
3323 // bias of this particular object, so it's okay to continue in the
3324 // normal locking code.
3325 load_prototype_header(temp_reg, obj_reg);
3326
3327 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3328
3329 z_csg(mark_reg, temp_reg, 0, obj_reg);
3330
3331 // Fall through to the normal CAS-based lock, because no matter what
3332 // the result of the above CAS, some thread must have succeeded in
3333 // removing the bias bit from the object's header.
3334 if (PrintBiasedLockingStatistics) {
3335 // z_cgr(mark_reg, temp2_reg);
3336 increment_counter_eq((address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg, temp2_reg);
3337 }
3338
3339 bind(cas_label);
3340 BLOCK_COMMENT("} biased_locking_enter");
3341 }
3342
biased_locking_exit(Register mark_addr,Register temp_reg,Label & done)3343 void MacroAssembler::biased_locking_exit(Register mark_addr, Register temp_reg, Label& done) {
3344 // Check for biased locking unlock case, which is a no-op
3345 // Note: we do not have to check the thread ID for two reasons.
3346 // First, the interpreter checks for IllegalMonitorStateException at
3347 // a higher level. Second, if the bias was revoked while we held the
3348 // lock, the object could not be rebiased toward another thread, so
3349 // the bias bit would be clear.
3350 BLOCK_COMMENT("biased_locking_exit {");
3351
3352 z_lg(temp_reg, 0, mark_addr);
3353 z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place);
3354
3355 z_chi(temp_reg, markOopDesc::biased_lock_pattern);
3356 z_bre(done);
3357 BLOCK_COMMENT("} biased_locking_exit");
3358 }
3359
compiler_fast_lock_object(Register oop,Register box,Register temp1,Register temp2,bool try_bias)3360 void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
3361 Register displacedHeader = temp1;
3362 Register currentHeader = temp1;
3363 Register temp = temp2;
3364 NearLabel done, object_has_monitor;
3365
3366 BLOCK_COMMENT("compiler_fast_lock_object {");
3367
3368 // Load markOop from oop into mark.
3369 z_lg(displacedHeader, 0, oop);
3370
3371 if (try_bias) {
3372 biased_locking_enter(oop, displacedHeader, temp, Z_R0, done);
3373 }
3374
3375 // Handle existing monitor.
3376 if ((EmitSync & 0x01) == 0) {
3377 // The object has an existing monitor iff (mark & monitor_value) != 0.
3378 guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
3379 z_lr(temp, displacedHeader);
3380 z_nill(temp, markOopDesc::monitor_value);
3381 z_brne(object_has_monitor);
3382 }
3383
3384 // Set mark to markOop | markOopDesc::unlocked_value.
3385 z_oill(displacedHeader, markOopDesc::unlocked_value);
3386
3387 // Load Compare Value application register.
3388
3389 // Initialize the box (must happen before we update the object mark).
3390 z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box);
3391
3392 // Memory Fence (in cmpxchgd)
3393 // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
3394
3395 // If the compare-and-swap succeeded, then we found an unlocked object and we
3396 // have now locked it.
3397 z_csg(displacedHeader, box, 0, oop);
3398 assert(currentHeader==displacedHeader, "must be same register"); // Identified two registers from z/Architecture.
3399 z_bre(done);
3400
3401 // We did not see an unlocked object so try the fast recursive case.
3402
3403 z_sgr(currentHeader, Z_SP);
3404 load_const_optimized(temp, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
3405
3406 z_ngr(currentHeader, temp);
3407 // z_brne(done);
3408 // z_release();
3409 z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box);
3410
3411 z_bru(done);
3412
3413 if ((EmitSync & 0x01) == 0) {
3414 Register zero = temp;
3415 Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
3416 bind(object_has_monitor);
3417 // The object's monitor m is unlocked iff m->owner == NULL,
3418 // otherwise m->owner may contain a thread or a stack address.
3419 //
3420 // Try to CAS m->owner from NULL to current thread.
3421 z_lghi(zero, 0);
3422 // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
3423 z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
3424 // Store a non-null value into the box.
3425 z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
3426 #ifdef ASSERT
3427 z_brne(done);
3428 // We've acquired the monitor, check some invariants.
3429 // Invariant 1: _recursions should be 0.
3430 asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
3431 "monitor->_recursions should be 0", -1);
3432 z_ltgr(zero, zero); // Set CR=EQ.
3433 #endif
3434 }
3435 bind(done);
3436
3437 BLOCK_COMMENT("} compiler_fast_lock_object");
3438 // If locking was successful, CR should indicate 'EQ'.
3439 // The compiler or the native wrapper generates a branch to the runtime call
3440 // _complete_monitor_locking_Java.
3441 }
3442
compiler_fast_unlock_object(Register oop,Register box,Register temp1,Register temp2,bool try_bias)3443 void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
3444 Register displacedHeader = temp1;
3445 Register currentHeader = temp2;
3446 Register temp = temp1;
3447 Register monitor = temp2;
3448
3449 Label done, object_has_monitor;
3450
3451 BLOCK_COMMENT("compiler_fast_unlock_object {");
3452
3453 if (try_bias) {
3454 biased_locking_exit(oop, currentHeader, done);
3455 }
3456
3457 // Find the lock address and load the displaced header from the stack.
3458 // if the displaced header is zero, we have a recursive unlock.
3459 load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3460 z_bre(done);
3461
3462 // Handle existing monitor.
3463 if ((EmitSync & 0x02) == 0) {
3464 // The object has an existing monitor iff (mark & monitor_value) != 0.
3465 z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
3466 guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
3467 z_nill(currentHeader, markOopDesc::monitor_value);
3468 z_brne(object_has_monitor);
3469 }
3470
3471 // Check if it is still a light weight lock, this is true if we see
3472 // the stack address of the basicLock in the markOop of the object
3473 // copy box to currentHeader such that csg does not kill it.
3474 z_lgr(currentHeader, box);
3475 z_csg(currentHeader, displacedHeader, 0, oop);
3476 z_bru(done); // Csg sets CR as desired.
3477
3478 // Handle existing monitor.
3479 if ((EmitSync & 0x02) == 0) {
3480 bind(object_has_monitor);
3481 z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set.
3482 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
3483 z_brne(done);
3484 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
3485 z_brne(done);
3486 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
3487 z_brne(done);
3488 load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
3489 z_brne(done);
3490 z_release();
3491 z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
3492 }
3493
3494 bind(done);
3495
3496 BLOCK_COMMENT("} compiler_fast_unlock_object");
3497 // flag == EQ indicates success
3498 // flag == NE indicates failure
3499 }
3500
resolve_jobject(Register value,Register tmp1,Register tmp2)3501 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) {
3502 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3503 bs->resolve_jobject(this, value, tmp1, tmp2);
3504 }
3505
3506 // Last_Java_sp must comply to the rules in frame_s390.hpp.
set_last_Java_frame(Register last_Java_sp,Register last_Java_pc,bool allow_relocation)3507 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) {
3508 BLOCK_COMMENT("set_last_Java_frame {");
3509
3510 // Always set last_Java_pc and flags first because once last_Java_sp
3511 // is visible has_last_Java_frame is true and users will look at the
3512 // rest of the fields. (Note: flags should always be zero before we
3513 // get here so doesn't need to be set.)
3514
3515 // Verify that last_Java_pc was zeroed on return to Java.
3516 if (allow_relocation) {
3517 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()),
3518 Z_thread,
3519 "last_Java_pc not zeroed before leaving Java",
3520 0x200);
3521 } else {
3522 asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()),
3523 Z_thread,
3524 "last_Java_pc not zeroed before leaving Java",
3525 0x200);
3526 }
3527
3528 // When returning from calling out from Java mode the frame anchor's
3529 // last_Java_pc will always be set to NULL. It is set here so that
3530 // if we are doing a call to native (not VM) that we capture the
3531 // known pc and don't have to rely on the native call having a
3532 // standard frame linkage where we can find the pc.
3533 if (last_Java_pc!=noreg) {
3534 z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset()));
3535 }
3536
3537 // This membar release is not required on z/Architecture, since the sequence of stores
3538 // in maintained. Nevertheless, we leave it in to document the required ordering.
3539 // The implementation of z_release() should be empty.
3540 // z_release();
3541
3542 z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset()));
3543 BLOCK_COMMENT("} set_last_Java_frame");
3544 }
3545
reset_last_Java_frame(bool allow_relocation)3546 void MacroAssembler::reset_last_Java_frame(bool allow_relocation) {
3547 BLOCK_COMMENT("reset_last_Java_frame {");
3548
3549 if (allow_relocation) {
3550 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()),
3551 Z_thread,
3552 "SP was not set, still zero",
3553 0x202);
3554 } else {
3555 asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()),
3556 Z_thread,
3557 "SP was not set, still zero",
3558 0x202);
3559 }
3560
3561 // _last_Java_sp = 0
3562 // Clearing storage must be atomic here, so don't use clear_mem()!
3563 store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0);
3564
3565 // _last_Java_pc = 0
3566 store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0);
3567
3568 BLOCK_COMMENT("} reset_last_Java_frame");
3569 return;
3570 }
3571
set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp,Register tmp1,bool allow_relocation)3572 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) {
3573 assert_different_registers(sp, tmp1);
3574
3575 // We cannot trust that code generated by the C++ compiler saves R14
3576 // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at
3577 // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()).
3578 // Therefore we load the PC into tmp1 and let set_last_Java_frame() save
3579 // it into the frame anchor.
3580 get_PC(tmp1);
3581 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation);
3582 }
3583
set_thread_state(JavaThreadState new_state)3584 void MacroAssembler::set_thread_state(JavaThreadState new_state) {
3585 z_release();
3586
3587 assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction");
3588 assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int");
3589 store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false);
3590 }
3591
get_vm_result(Register oop_result)3592 void MacroAssembler::get_vm_result(Register oop_result) {
3593 verify_thread();
3594
3595 z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
3596 clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*));
3597
3598 verify_oop(oop_result);
3599 }
3600
get_vm_result_2(Register result)3601 void MacroAssembler::get_vm_result_2(Register result) {
3602 verify_thread();
3603
3604 z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset()));
3605 clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*));
3606 }
3607
3608 // We require that C code which does not return a value in vm_result will
3609 // leave it undisturbed.
set_vm_result(Register oop_result)3610 void MacroAssembler::set_vm_result(Register oop_result) {
3611 z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
3612 }
3613
3614 // Explicit null checks (used for method handle code).
null_check(Register reg,Register tmp,int64_t offset)3615 void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) {
3616 if (!ImplicitNullChecks) {
3617 NearLabel ok;
3618
3619 compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok);
3620
3621 // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address).
3622 address exception_entry = Interpreter::throw_NullPointerException_entry();
3623 load_absolute_address(reg, exception_entry);
3624 z_br(reg);
3625
3626 bind(ok);
3627 } else {
3628 if (needs_explicit_null_check((intptr_t)offset)) {
3629 // Provoke OS NULL exception if reg = NULL by
3630 // accessing M[reg] w/o changing any registers.
3631 z_lg(tmp, 0, reg);
3632 }
3633 // else
3634 // Nothing to do, (later) access of M[reg + offset]
3635 // will provoke OS NULL exception if reg = NULL.
3636 }
3637 }
3638
3639 //-------------------------------------
3640 // Compressed Klass Pointers
3641 //-------------------------------------
3642
3643 // Klass oop manipulations if compressed.
encode_klass_not_null(Register dst,Register src)3644 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3645 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible.
3646 address base = Universe::narrow_klass_base();
3647 int shift = Universe::narrow_klass_shift();
3648 assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3649
3650 BLOCK_COMMENT("cKlass encoder {");
3651
3652 #ifdef ASSERT
3653 Label ok;
3654 z_tmll(current, KlassAlignmentInBytes-1); // Check alignment.
3655 z_brc(Assembler::bcondAllZero, ok);
3656 // The plain disassembler does not recognize illtrap. It instead displays
3657 // a 32-bit value. Issueing two illtraps assures the disassembler finds
3658 // the proper beginning of the next instruction.
3659 z_illtrap(0xee);
3660 z_illtrap(0xee);
3661 bind(ok);
3662 #endif
3663
3664 if (base != NULL) {
3665 unsigned int base_h = ((unsigned long)base)>>32;
3666 unsigned int base_l = (unsigned int)((unsigned long)base);
3667 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3668 lgr_if_needed(dst, current);
3669 z_aih(dst, -((int)base_h)); // Base has no set bits in lower half.
3670 } else if ((base_h == 0) && (base_l != 0)) {
3671 lgr_if_needed(dst, current);
3672 z_agfi(dst, -(int)base_l);
3673 } else {
3674 load_const(Z_R0, base);
3675 lgr_if_needed(dst, current);
3676 z_sgr(dst, Z_R0);
3677 }
3678 current = dst;
3679 }
3680 if (shift != 0) {
3681 assert (LogKlassAlignmentInBytes == shift, "decode alg wrong");
3682 z_srlg(dst, current, shift);
3683 current = dst;
3684 }
3685 lgr_if_needed(dst, current); // Move may be required (if neither base nor shift != 0).
3686
3687 BLOCK_COMMENT("} cKlass encoder");
3688 }
3689
3690 // This function calculates the size of the code generated by
3691 // decode_klass_not_null(register dst, Register src)
3692 // when (Universe::heap() != NULL). Hence, if the instructions
3693 // it generates change, then this method needs to be updated.
instr_size_for_decode_klass_not_null()3694 int MacroAssembler::instr_size_for_decode_klass_not_null() {
3695 address base = Universe::narrow_klass_base();
3696 int shift_size = Universe::narrow_klass_shift() == 0 ? 0 : 6; /* sllg */
3697 int addbase_size = 0;
3698 assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3699
3700 if (base != NULL) {
3701 unsigned int base_h = ((unsigned long)base)>>32;
3702 unsigned int base_l = (unsigned int)((unsigned long)base);
3703 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3704 addbase_size += 6; /* aih */
3705 } else if ((base_h == 0) && (base_l != 0)) {
3706 addbase_size += 6; /* algfi */
3707 } else {
3708 addbase_size += load_const_size();
3709 addbase_size += 4; /* algr */
3710 }
3711 }
3712 #ifdef ASSERT
3713 addbase_size += 10;
3714 addbase_size += 2; // Extra sigill.
3715 #endif
3716 return addbase_size + shift_size;
3717 }
3718
3719 // !!! If the instructions that get generated here change
3720 // then function instr_size_for_decode_klass_not_null()
3721 // needs to get updated.
3722 // This variant of decode_klass_not_null() must generate predictable code!
3723 // The code must only depend on globally known parameters.
decode_klass_not_null(Register dst)3724 void MacroAssembler::decode_klass_not_null(Register dst) {
3725 address base = Universe::narrow_klass_base();
3726 int shift = Universe::narrow_klass_shift();
3727 int beg_off = offset();
3728 assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3729
3730 BLOCK_COMMENT("cKlass decoder (const size) {");
3731
3732 if (shift != 0) { // Shift required?
3733 z_sllg(dst, dst, shift);
3734 }
3735 if (base != NULL) {
3736 unsigned int base_h = ((unsigned long)base)>>32;
3737 unsigned int base_l = (unsigned int)((unsigned long)base);
3738 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3739 z_aih(dst, base_h); // Base has no set bits in lower half.
3740 } else if ((base_h == 0) && (base_l != 0)) {
3741 z_algfi(dst, base_l); // Base has no set bits in upper half.
3742 } else {
3743 load_const(Z_R0, base); // Base has set bits everywhere.
3744 z_algr(dst, Z_R0);
3745 }
3746 }
3747
3748 #ifdef ASSERT
3749 Label ok;
3750 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
3751 z_brc(Assembler::bcondAllZero, ok);
3752 // The plain disassembler does not recognize illtrap. It instead displays
3753 // a 32-bit value. Issueing two illtraps assures the disassembler finds
3754 // the proper beginning of the next instruction.
3755 z_illtrap(0xd1);
3756 z_illtrap(0xd1);
3757 bind(ok);
3758 #endif
3759 assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch.");
3760
3761 BLOCK_COMMENT("} cKlass decoder (const size)");
3762 }
3763
3764 // This variant of decode_klass_not_null() is for cases where
3765 // 1) the size of the generated instructions may vary
3766 // 2) the result is (potentially) stored in a register different from the source.
decode_klass_not_null(Register dst,Register src)3767 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3768 address base = Universe::narrow_klass_base();
3769 int shift = Universe::narrow_klass_shift();
3770 assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3771
3772 BLOCK_COMMENT("cKlass decoder {");
3773
3774 if (src == noreg) src = dst;
3775
3776 if (shift != 0) { // Shift or at least move required?
3777 z_sllg(dst, src, shift);
3778 } else {
3779 lgr_if_needed(dst, src);
3780 }
3781
3782 if (base != NULL) {
3783 unsigned int base_h = ((unsigned long)base)>>32;
3784 unsigned int base_l = (unsigned int)((unsigned long)base);
3785 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3786 z_aih(dst, base_h); // Base has not set bits in lower half.
3787 } else if ((base_h == 0) && (base_l != 0)) {
3788 z_algfi(dst, base_l); // Base has no set bits in upper half.
3789 } else {
3790 load_const_optimized(Z_R0, base); // Base has set bits everywhere.
3791 z_algr(dst, Z_R0);
3792 }
3793 }
3794
3795 #ifdef ASSERT
3796 Label ok;
3797 z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
3798 z_brc(Assembler::bcondAllZero, ok);
3799 // The plain disassembler does not recognize illtrap. It instead displays
3800 // a 32-bit value. Issueing two illtraps assures the disassembler finds
3801 // the proper beginning of the next instruction.
3802 z_illtrap(0xd2);
3803 z_illtrap(0xd2);
3804 bind(ok);
3805 #endif
3806 BLOCK_COMMENT("} cKlass decoder");
3807 }
3808
load_klass(Register klass,Address mem)3809 void MacroAssembler::load_klass(Register klass, Address mem) {
3810 if (UseCompressedClassPointers) {
3811 z_llgf(klass, mem);
3812 // Attention: no null check here!
3813 decode_klass_not_null(klass);
3814 } else {
3815 z_lg(klass, mem);
3816 }
3817 }
3818
load_klass(Register klass,Register src_oop)3819 void MacroAssembler::load_klass(Register klass, Register src_oop) {
3820 if (UseCompressedClassPointers) {
3821 z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop);
3822 // Attention: no null check here!
3823 decode_klass_not_null(klass);
3824 } else {
3825 z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop);
3826 }
3827 }
3828
load_prototype_header(Register Rheader,Register Rsrc_oop)3829 void MacroAssembler::load_prototype_header(Register Rheader, Register Rsrc_oop) {
3830 assert_different_registers(Rheader, Rsrc_oop);
3831 load_klass(Rheader, Rsrc_oop);
3832 z_lg(Rheader, Address(Rheader, Klass::prototype_header_offset()));
3833 }
3834
store_klass(Register klass,Register dst_oop,Register ck)3835 void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) {
3836 if (UseCompressedClassPointers) {
3837 assert_different_registers(dst_oop, klass, Z_R0);
3838 if (ck == noreg) ck = klass;
3839 encode_klass_not_null(ck, klass);
3840 z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
3841 } else {
3842 z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
3843 }
3844 }
3845
store_klass_gap(Register s,Register d)3846 void MacroAssembler::store_klass_gap(Register s, Register d) {
3847 if (UseCompressedClassPointers) {
3848 assert(s != d, "not enough registers");
3849 // Support s = noreg.
3850 if (s != noreg) {
3851 z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
3852 } else {
3853 z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0);
3854 }
3855 }
3856 }
3857
3858 // Compare klass ptr in memory against klass ptr in register.
3859 //
3860 // Rop1 - klass in register, always uncompressed.
3861 // disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag.
3862 // Rbase - Base address of cKlass in memory.
3863 // maybeNULL - True if Rop1 possibly is a NULL.
compare_klass_ptr(Register Rop1,int64_t disp,Register Rbase,bool maybeNULL)3864 void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL) {
3865
3866 BLOCK_COMMENT("compare klass ptr {");
3867
3868 if (UseCompressedClassPointers) {
3869 const int shift = Universe::narrow_klass_shift();
3870 address base = Universe::narrow_klass_base();
3871
3872 assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift");
3873 assert_different_registers(Rop1, Z_R0);
3874 assert_different_registers(Rop1, Rbase, Z_R1);
3875
3876 // First encode register oop and then compare with cOop in memory.
3877 // This sequence saves an unnecessary cOop load and decode.
3878 if (base == NULL) {
3879 if (shift == 0) {
3880 z_cl(Rop1, disp, Rbase); // Unscaled
3881 } else {
3882 z_srlg(Z_R0, Rop1, shift); // ZeroBased
3883 z_cl(Z_R0, disp, Rbase);
3884 }
3885 } else { // HeapBased
3886 #ifdef ASSERT
3887 bool used_R0 = true;
3888 bool used_R1 = true;
3889 #endif
3890 Register current = Rop1;
3891 Label done;
3892
3893 if (maybeNULL) { // NULL ptr must be preserved!
3894 z_ltgr(Z_R0, current);
3895 z_bre(done);
3896 current = Z_R0;
3897 }
3898
3899 unsigned int base_h = ((unsigned long)base)>>32;
3900 unsigned int base_l = (unsigned int)((unsigned long)base);
3901 if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3902 lgr_if_needed(Z_R0, current);
3903 z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half.
3904 } else if ((base_h == 0) && (base_l != 0)) {
3905 lgr_if_needed(Z_R0, current);
3906 z_agfi(Z_R0, -(int)base_l);
3907 } else {
3908 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
3909 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement.
3910 }
3911
3912 if (shift != 0) {
3913 z_srlg(Z_R0, Z_R0, shift);
3914 }
3915 bind(done);
3916 z_cl(Z_R0, disp, Rbase);
3917 #ifdef ASSERT
3918 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
3919 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
3920 #endif
3921 }
3922 } else {
3923 z_clg(Rop1, disp, Z_R0, Rbase);
3924 }
3925 BLOCK_COMMENT("} compare klass ptr");
3926 }
3927
3928 //---------------------------
3929 // Compressed oops
3930 //---------------------------
3931
encode_heap_oop(Register oop)3932 void MacroAssembler::encode_heap_oop(Register oop) {
3933 oop_encoder(oop, oop, true /*maybe null*/);
3934 }
3935
encode_heap_oop_not_null(Register oop)3936 void MacroAssembler::encode_heap_oop_not_null(Register oop) {
3937 oop_encoder(oop, oop, false /*not null*/);
3938 }
3939
3940 // Called with something derived from the oop base. e.g. oop_base>>3.
get_oop_base_pow2_offset(uint64_t oop_base)3941 int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) {
3942 unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff;
3943 unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff;
3944 unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff;
3945 unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff;
3946 unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1)
3947 + (oop_base_lh == 0 ? 0:1)
3948 + (oop_base_hl == 0 ? 0:1)
3949 + (oop_base_hh == 0 ? 0:1);
3950
3951 assert(oop_base != 0, "This is for HeapBased cOops only");
3952
3953 if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2.
3954 uint64_t pow2_offset = 0x10000 - oop_base_ll;
3955 if (pow2_offset < 0x8000) { // This might not be necessary.
3956 uint64_t oop_base2 = oop_base + pow2_offset;
3957
3958 oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff;
3959 oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff;
3960 oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff;
3961 oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff;
3962 n_notzero_parts = (oop_base_ll == 0 ? 0:1) +
3963 (oop_base_lh == 0 ? 0:1) +
3964 (oop_base_hl == 0 ? 0:1) +
3965 (oop_base_hh == 0 ? 0:1);
3966 if (n_notzero_parts == 1) {
3967 assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register");
3968 return -pow2_offset;
3969 }
3970 }
3971 }
3972 return 0;
3973 }
3974
3975 // If base address is offset from a straight power of two by just a few pages,
3976 // return this offset to the caller for a possible later composite add.
3977 // TODO/FIX: will only work correctly for 4k pages.
get_oop_base(Register Rbase,uint64_t oop_base)3978 int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) {
3979 int pow2_offset = get_oop_base_pow2_offset(oop_base);
3980
3981 load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible.
3982
3983 return pow2_offset;
3984 }
3985
get_oop_base_complement(Register Rbase,uint64_t oop_base)3986 int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) {
3987 int offset = get_oop_base(Rbase, oop_base);
3988 z_lcgr(Rbase, Rbase);
3989 return -offset;
3990 }
3991
3992 // Compare compressed oop in memory against oop in register.
3993 // Rop1 - Oop in register.
3994 // disp - Offset of cOop in memory.
3995 // Rbase - Base address of cOop in memory.
3996 // maybeNULL - True if Rop1 possibly is a NULL.
3997 // maybeNULLtarget - Branch target for Rop1 == NULL, if flow control shall NOT continue with compare instruction.
compare_heap_oop(Register Rop1,Address mem,bool maybeNULL)3998 void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybeNULL) {
3999 Register Rbase = mem.baseOrR0();
4000 Register Rindex = mem.indexOrR0();
4001 int64_t disp = mem.disp();
4002
4003 const int shift = Universe::narrow_oop_shift();
4004 address base = Universe::narrow_oop_base();
4005
4006 assert(UseCompressedOops, "must be on to call this method");
4007 assert(Universe::heap() != NULL, "java heap must be initialized to call this method");
4008 assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
4009 assert_different_registers(Rop1, Z_R0);
4010 assert_different_registers(Rop1, Rbase, Z_R1);
4011 assert_different_registers(Rop1, Rindex, Z_R1);
4012
4013 BLOCK_COMMENT("compare heap oop {");
4014
4015 // First encode register oop and then compare with cOop in memory.
4016 // This sequence saves an unnecessary cOop load and decode.
4017 if (base == NULL) {
4018 if (shift == 0) {
4019 z_cl(Rop1, disp, Rindex, Rbase); // Unscaled
4020 } else {
4021 z_srlg(Z_R0, Rop1, shift); // ZeroBased
4022 z_cl(Z_R0, disp, Rindex, Rbase);
4023 }
4024 } else { // HeapBased
4025 #ifdef ASSERT
4026 bool used_R0 = true;
4027 bool used_R1 = true;
4028 #endif
4029 Label done;
4030 int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
4031
4032 if (maybeNULL) { // NULL ptr must be preserved!
4033 z_ltgr(Z_R0, Rop1);
4034 z_bre(done);
4035 }
4036
4037 add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1);
4038 z_srlg(Z_R0, Z_R0, shift);
4039
4040 bind(done);
4041 z_cl(Z_R0, disp, Rindex, Rbase);
4042 #ifdef ASSERT
4043 if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
4044 if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
4045 #endif
4046 }
4047 BLOCK_COMMENT("} compare heap oop");
4048 }
4049
access_store_at(BasicType type,DecoratorSet decorators,const Address & addr,Register val,Register tmp1,Register tmp2,Register tmp3)4050 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
4051 const Address& addr, Register val,
4052 Register tmp1, Register tmp2, Register tmp3) {
4053 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL |
4054 ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator");
4055 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4056 decorators = AccessInternal::decorator_fixup(decorators);
4057 bool as_raw = (decorators & AS_RAW) != 0;
4058 if (as_raw) {
4059 bs->BarrierSetAssembler::store_at(this, decorators, type,
4060 addr, val,
4061 tmp1, tmp2, tmp3);
4062 } else {
4063 bs->store_at(this, decorators, type,
4064 addr, val,
4065 tmp1, tmp2, tmp3);
4066 }
4067 }
4068
access_load_at(BasicType type,DecoratorSet decorators,const Address & addr,Register dst,Register tmp1,Register tmp2,Label * is_null)4069 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
4070 const Address& addr, Register dst,
4071 Register tmp1, Register tmp2, Label *is_null) {
4072 assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL |
4073 ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator");
4074 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4075 decorators = AccessInternal::decorator_fixup(decorators);
4076 bool as_raw = (decorators & AS_RAW) != 0;
4077 if (as_raw) {
4078 bs->BarrierSetAssembler::load_at(this, decorators, type,
4079 addr, dst,
4080 tmp1, tmp2, is_null);
4081 } else {
4082 bs->load_at(this, decorators, type,
4083 addr, dst,
4084 tmp1, tmp2, is_null);
4085 }
4086 }
4087
load_heap_oop(Register dest,const Address & a,Register tmp1,Register tmp2,DecoratorSet decorators,Label * is_null)4088 void MacroAssembler::load_heap_oop(Register dest, const Address &a,
4089 Register tmp1, Register tmp2,
4090 DecoratorSet decorators, Label *is_null) {
4091 access_load_at(T_OBJECT, IN_HEAP | decorators, a, dest, tmp1, tmp2, is_null);
4092 }
4093
store_heap_oop(Register Roop,const Address & a,Register tmp1,Register tmp2,Register tmp3,DecoratorSet decorators)4094 void MacroAssembler::store_heap_oop(Register Roop, const Address &a,
4095 Register tmp1, Register tmp2, Register tmp3,
4096 DecoratorSet decorators) {
4097 access_store_at(T_OBJECT, IN_HEAP | decorators, a, Roop, tmp1, tmp2, tmp3);
4098 }
4099
4100 //-------------------------------------------------
4101 // Encode compressed oop. Generally usable encoder.
4102 //-------------------------------------------------
4103 // Rsrc - contains regular oop on entry. It remains unchanged.
4104 // Rdst - contains compressed oop on exit.
4105 // Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged.
4106 //
4107 // Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality.
4108 // Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance.
4109 //
4110 // only32bitValid is set, if later code only uses the lower 32 bits. In this
4111 // case we must not fix the upper 32 bits.
oop_encoder(Register Rdst,Register Rsrc,bool maybeNULL,Register Rbase,int pow2_offset,bool only32bitValid)4112 void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL,
4113 Register Rbase, int pow2_offset, bool only32bitValid) {
4114
4115 const address oop_base = Universe::narrow_oop_base();
4116 const int oop_shift = Universe::narrow_oop_shift();
4117 const bool disjoint = Universe::narrow_oop_base_disjoint();
4118
4119 assert(UseCompressedOops, "must be on to call this method");
4120 assert(Universe::heap() != NULL, "java heap must be initialized to call this encoder");
4121 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
4122
4123 if (disjoint || (oop_base == NULL)) {
4124 BLOCK_COMMENT("cOop encoder zeroBase {");
4125 if (oop_shift == 0) {
4126 if (oop_base != NULL && !only32bitValid) {
4127 z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again.
4128 } else {
4129 lgr_if_needed(Rdst, Rsrc);
4130 }
4131 } else {
4132 z_srlg(Rdst, Rsrc, oop_shift);
4133 if (oop_base != NULL && !only32bitValid) {
4134 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4135 }
4136 }
4137 BLOCK_COMMENT("} cOop encoder zeroBase");
4138 return;
4139 }
4140
4141 bool used_R0 = false;
4142 bool used_R1 = false;
4143
4144 BLOCK_COMMENT("cOop encoder general {");
4145 assert_different_registers(Rdst, Z_R1);
4146 assert_different_registers(Rsrc, Rbase);
4147 if (maybeNULL) {
4148 Label done;
4149 // We reorder shifting and subtracting, so that we can compare
4150 // and shift in parallel:
4151 //
4152 // cycle 0: potential LoadN, base = <const>
4153 // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0)
4154 // cycle 2: if (cr) br, dst = dst + base + offset
4155
4156 // Get oop_base components.
4157 if (pow2_offset == -1) {
4158 if (Rdst == Rbase) {
4159 if (Rdst == Z_R1 || Rsrc == Z_R1) {
4160 Rbase = Z_R0;
4161 used_R0 = true;
4162 } else {
4163 Rdst = Z_R1;
4164 used_R1 = true;
4165 }
4166 }
4167 if (Rbase == Z_R1) {
4168 used_R1 = true;
4169 }
4170 pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift);
4171 }
4172 assert_different_registers(Rdst, Rbase);
4173
4174 // Check for NULL oop (must be left alone) and shift.
4175 if (oop_shift != 0) { // Shift out alignment bits
4176 if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set.
4177 z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
4178 } else {
4179 z_srlg(Rdst, Rsrc, oop_shift);
4180 z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero.
4181 // This probably is faster, as it does not write a register. No!
4182 // z_cghi(Rsrc, 0);
4183 }
4184 } else {
4185 z_ltgr(Rdst, Rsrc); // Move NULL to result register.
4186 }
4187 z_bre(done);
4188
4189 // Subtract oop_base components.
4190 if ((Rdst == Z_R0) || (Rbase == Z_R0)) {
4191 z_algr(Rdst, Rbase);
4192 if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); }
4193 } else {
4194 add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst);
4195 }
4196 if (!only32bitValid) {
4197 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4198 }
4199 bind(done);
4200
4201 } else { // not null
4202 // Get oop_base components.
4203 if (pow2_offset == -1) {
4204 pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base);
4205 }
4206
4207 // Subtract oop_base components and shift.
4208 if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) {
4209 // Don't use lay instruction.
4210 if (Rdst == Rsrc) {
4211 z_algr(Rdst, Rbase);
4212 } else {
4213 lgr_if_needed(Rdst, Rbase);
4214 z_algr(Rdst, Rsrc);
4215 }
4216 if (pow2_offset != 0) add2reg(Rdst, pow2_offset);
4217 } else {
4218 add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc);
4219 }
4220 if (oop_shift != 0) { // Shift out alignment bits.
4221 z_srlg(Rdst, Rdst, oop_shift);
4222 }
4223 if (!only32bitValid) {
4224 z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4225 }
4226 }
4227 #ifdef ASSERT
4228 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); }
4229 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); }
4230 #endif
4231 BLOCK_COMMENT("} cOop encoder general");
4232 }
4233
4234 //-------------------------------------------------
4235 // decode compressed oop. Generally usable decoder.
4236 //-------------------------------------------------
4237 // Rsrc - contains compressed oop on entry.
4238 // Rdst - contains regular oop on exit.
4239 // Rdst and Rsrc may indicate same register.
4240 // Rdst must not be the same register as Rbase, if Rbase was preloaded (before call).
4241 // Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch.
4242 // Rbase - register to use for the base
4243 // pow2_offset - offset of base to nice value. If -1, base must be loaded.
4244 // For performance, it is good to
4245 // - avoid Z_R0 for any of the argument registers.
4246 // - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance.
4247 // - avoid Z_R1 for Rdst if Rdst == Rbase.
oop_decoder(Register Rdst,Register Rsrc,bool maybeNULL,Register Rbase,int pow2_offset)4248 void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, Register Rbase, int pow2_offset) {
4249
4250 const address oop_base = Universe::narrow_oop_base();
4251 const int oop_shift = Universe::narrow_oop_shift();
4252 const bool disjoint = Universe::narrow_oop_base_disjoint();
4253
4254 assert(UseCompressedOops, "must be on to call this method");
4255 assert(Universe::heap() != NULL, "java heap must be initialized to call this decoder");
4256 assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes),
4257 "cOop encoder detected bad shift");
4258
4259 // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary.
4260
4261 if (oop_base != NULL) {
4262 unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff;
4263 unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff;
4264 unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff;
4265 if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) {
4266 BLOCK_COMMENT("cOop decoder disjointBase {");
4267 // We do not need to load the base. Instead, we can install the upper bits
4268 // with an OR instead of an ADD.
4269 Label done;
4270
4271 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
4272 if (maybeNULL) { // NULL ptr must be preserved!
4273 z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
4274 z_bre(done);
4275 } else {
4276 z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone.
4277 }
4278 if ((oop_base_hl != 0) && (oop_base_hh != 0)) {
4279 z_oihf(Rdst, oop_base_hf);
4280 } else if (oop_base_hl != 0) {
4281 z_oihl(Rdst, oop_base_hl);
4282 } else {
4283 assert(oop_base_hh != 0, "not heapbased mode");
4284 z_oihh(Rdst, oop_base_hh);
4285 }
4286 bind(done);
4287 BLOCK_COMMENT("} cOop decoder disjointBase");
4288 } else {
4289 BLOCK_COMMENT("cOop decoder general {");
4290 // There are three decode steps:
4291 // scale oop offset (shift left)
4292 // get base (in reg) and pow2_offset (constant)
4293 // add base, pow2_offset, and oop offset
4294 // The following register overlap situations may exist:
4295 // Rdst == Rsrc, Rbase any other
4296 // not a problem. Scaling in-place leaves Rbase undisturbed.
4297 // Loading Rbase does not impact the scaled offset.
4298 // Rdst == Rbase, Rsrc any other
4299 // scaling would destroy a possibly preloaded Rbase. Loading Rbase
4300 // would destroy the scaled offset.
4301 // Remedy: use Rdst_tmp if Rbase has been preloaded.
4302 // use Rbase_tmp if base has to be loaded.
4303 // Rsrc == Rbase, Rdst any other
4304 // Only possible without preloaded Rbase.
4305 // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before.
4306 // Rsrc == Rbase, Rdst == Rbase
4307 // Only possible without preloaded Rbase.
4308 // Loading Rbase would destroy compressed oop. Scaling in-place is ok.
4309 // Remedy: use Rbase_tmp.
4310 //
4311 Label done;
4312 Register Rdst_tmp = Rdst;
4313 Register Rbase_tmp = Rbase;
4314 bool used_R0 = false;
4315 bool used_R1 = false;
4316 bool base_preloaded = pow2_offset >= 0;
4317 guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller");
4318 assert(oop_shift != 0, "room for optimization");
4319
4320 // Check if we need to use scratch registers.
4321 if (Rdst == Rbase) {
4322 assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg");
4323 if (Rdst != Rsrc) {
4324 if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
4325 else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
4326 } else {
4327 Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1;
4328 }
4329 }
4330 if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase);
4331
4332 // Scale oop and check for NULL.
4333 // Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
4334 if (maybeNULL) { // NULL ptr must be preserved!
4335 z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
4336 z_bre(done);
4337 } else {
4338 z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone.
4339 }
4340
4341 // Get oop_base components.
4342 if (!base_preloaded) {
4343 pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base);
4344 }
4345
4346 // Add up all components.
4347 if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) {
4348 z_algr(Rdst_tmp, Rbase_tmp);
4349 if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); }
4350 } else {
4351 add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp);
4352 }
4353
4354 bind(done);
4355 lgr_if_needed(Rdst, Rdst_tmp);
4356 #ifdef ASSERT
4357 if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); }
4358 if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); }
4359 #endif
4360 BLOCK_COMMENT("} cOop decoder general");
4361 }
4362 } else {
4363 BLOCK_COMMENT("cOop decoder zeroBase {");
4364 if (oop_shift == 0) {
4365 lgr_if_needed(Rdst, Rsrc);
4366 } else {
4367 z_sllg(Rdst, Rsrc, oop_shift);
4368 }
4369 BLOCK_COMMENT("} cOop decoder zeroBase");
4370 }
4371 }
4372
4373 // ((OopHandle)result).resolve();
resolve_oop_handle(Register result)4374 void MacroAssembler::resolve_oop_handle(Register result) {
4375 // OopHandle::resolve is an indirection.
4376 z_lg(result, 0, result);
4377 }
4378
load_mirror(Register mirror,Register method)4379 void MacroAssembler::load_mirror(Register mirror, Register method) {
4380 mem2reg_opt(mirror, Address(method, Method::const_offset()));
4381 mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
4382 mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
4383 mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
4384 resolve_oop_handle(mirror);
4385 }
4386
4387 //---------------------------------------------------------------
4388 //--- Operations on arrays.
4389 //---------------------------------------------------------------
4390
4391 // Compiler ensures base is doubleword aligned and cnt is #doublewords.
4392 // Emitter does not KILL cnt and base arguments, since they need to be copied to
4393 // work registers anyway.
4394 // Actually, only r0, r1, and r5 are killed.
Clear_Array(Register cnt_arg,Register base_pointer_arg,Register src_addr,Register src_len)4395 unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len) {
4396 // Src_addr is evenReg.
4397 // Src_len is odd_Reg.
4398
4399 int block_start = offset();
4400 Register tmp_reg = src_len; // Holds target instr addr for EX.
4401 Register dst_len = Z_R1; // Holds dst len for MVCLE.
4402 Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
4403
4404 Label doXC, doMVCLE, done;
4405
4406 BLOCK_COMMENT("Clear_Array {");
4407
4408 // Check for zero len and convert to long.
4409 z_ltgfr(src_len, cnt_arg); // Remember casted value for doSTG case.
4410 z_bre(done); // Nothing to do if len == 0.
4411
4412 // Prefetch data to be cleared.
4413 if (VM_Version::has_Prefetch()) {
4414 z_pfd(0x02, 0, Z_R0, base_pointer_arg);
4415 z_pfd(0x02, 256, Z_R0, base_pointer_arg);
4416 }
4417
4418 z_sllg(dst_len, src_len, 3); // #bytes to clear.
4419 z_cghi(src_len, 32); // Check for len <= 256 bytes (<=32 DW).
4420 z_brnh(doXC); // If so, use executed XC to clear.
4421
4422 // MVCLE: initialize long arrays (general case).
4423 bind(doMVCLE);
4424 z_lgr(dst_addr, base_pointer_arg);
4425 clear_reg(src_len, true, false); // Src len of MVCLE is zero.
4426
4427 MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
4428 z_bru(done);
4429
4430 // XC: initialize short arrays.
4431 Label XC_template; // Instr template, never exec directly!
4432 bind(XC_template);
4433 z_xc(0,0,base_pointer_arg,0,base_pointer_arg);
4434
4435 bind(doXC);
4436 add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE.
4437 if (VM_Version::has_ExecuteExtensions()) {
4438 z_exrl(dst_len, XC_template); // Execute XC with var. len.
4439 } else {
4440 z_larl(tmp_reg, XC_template);
4441 z_ex(dst_len,0,Z_R0,tmp_reg); // Execute XC with var. len.
4442 }
4443 // z_bru(done); // fallthru
4444
4445 bind(done);
4446
4447 BLOCK_COMMENT("} Clear_Array");
4448
4449 int block_end = offset();
4450 return block_end - block_start;
4451 }
4452
4453 // Compiler ensures base is doubleword aligned and cnt is count of doublewords.
4454 // Emitter does not KILL any arguments nor work registers.
4455 // Emitter generates up to 16 XC instructions, depending on the array length.
Clear_Array_Const(long cnt,Register base)4456 unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
4457 int block_start = offset();
4458 int off;
4459 int lineSize_Bytes = AllocatePrefetchStepSize;
4460 int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord;
4461 bool doPrefetch = VM_Version::has_Prefetch();
4462 int XC_maxlen = 256;
4463 int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0;
4464
4465 BLOCK_COMMENT("Clear_Array_Const {");
4466 assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only");
4467
4468 // Do less prefetching for very short arrays.
4469 if (numXCInstr > 0) {
4470 // Prefetch only some cache lines, then begin clearing.
4471 if (doPrefetch) {
4472 if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear,
4473 z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line.
4474 } else {
4475 assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines");
4476 for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) {
4477 z_pfd(0x02, off*lineSize_Bytes, Z_R0, base);
4478 }
4479 }
4480 }
4481
4482 for (off=0; off<(numXCInstr-1); off++) {
4483 z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base);
4484
4485 // Prefetch some cache lines in advance.
4486 if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) {
4487 z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base);
4488 }
4489 }
4490 if (off*XC_maxlen < cnt*BytesPerWord) {
4491 z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base);
4492 }
4493 }
4494 BLOCK_COMMENT("} Clear_Array_Const");
4495
4496 int block_end = offset();
4497 return block_end - block_start;
4498 }
4499
4500 // Compiler ensures base is doubleword aligned and cnt is #doublewords.
4501 // Emitter does not KILL cnt and base arguments, since they need to be copied to
4502 // work registers anyway.
4503 // Actually, only r0, r1, r4, and r5 (which are work registers) are killed.
4504 //
4505 // For very large arrays, exploit MVCLE H/W support.
4506 // MVCLE instruction automatically exploits H/W-optimized page mover.
4507 // - Bytes up to next page boundary are cleared with a series of XC to self.
4508 // - All full pages are cleared with the page mover H/W assist.
4509 // - Remaining bytes are again cleared by a series of XC to self.
4510 //
Clear_Array_Const_Big(long cnt,Register base_pointer_arg,Register src_addr,Register src_len)4511 unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len) {
4512 // Src_addr is evenReg.
4513 // Src_len is odd_Reg.
4514
4515 int block_start = offset();
4516 Register dst_len = Z_R1; // Holds dst len for MVCLE.
4517 Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
4518
4519 BLOCK_COMMENT("Clear_Array_Const_Big {");
4520
4521 // Get len to clear.
4522 load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8
4523
4524 // Prepare other args to MVCLE.
4525 z_lgr(dst_addr, base_pointer_arg);
4526 // Indicate unused result.
4527 (void) clear_reg(src_len, true, false); // Src len of MVCLE is zero.
4528
4529 // Clear.
4530 MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
4531 BLOCK_COMMENT("} Clear_Array_Const_Big");
4532
4533 int block_end = offset();
4534 return block_end - block_start;
4535 }
4536
4537 // Allocator.
CopyRawMemory_AlignedDisjoint(Register src_reg,Register dst_reg,Register cnt_reg,Register tmp1_reg,Register tmp2_reg)4538 unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
4539 Register cnt_reg,
4540 Register tmp1_reg, Register tmp2_reg) {
4541 // Tmp1 is oddReg.
4542 // Tmp2 is evenReg.
4543
4544 int block_start = offset();
4545 Label doMVC, doMVCLE, done, MVC_template;
4546
4547 BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {");
4548
4549 // Check for zero len and convert to long.
4550 z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case.
4551 z_bre(done); // Nothing to do if len == 0.
4552
4553 z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready.
4554
4555 z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW).
4556 z_brnh(doMVC); // If so, use executed MVC to clear.
4557
4558 bind(doMVCLE); // A lot of data (more than 256 bytes).
4559 // Prep dest reg pair.
4560 z_lgr(Z_R0, dst_reg); // dst addr
4561 // Dst len already in Z_R1.
4562 // Prep src reg pair.
4563 z_lgr(tmp2_reg, src_reg); // src addr
4564 z_lgr(tmp1_reg, Z_R1); // Src len same as dst len.
4565
4566 // Do the copy.
4567 move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache.
4568 z_bru(done); // All done.
4569
4570 bind(MVC_template); // Just some data (not more than 256 bytes).
4571 z_mvc(0, 0, dst_reg, 0, src_reg);
4572
4573 bind(doMVC);
4574
4575 if (VM_Version::has_ExecuteExtensions()) {
4576 add2reg(Z_R1, -1);
4577 } else {
4578 add2reg(tmp1_reg, -1, Z_R1);
4579 z_larl(Z_R1, MVC_template);
4580 }
4581
4582 if (VM_Version::has_Prefetch()) {
4583 z_pfd(1, 0,Z_R0,src_reg);
4584 z_pfd(2, 0,Z_R0,dst_reg);
4585 // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy.
4586 // z_pfd(2,256,Z_R0,dst_reg);
4587 }
4588
4589 if (VM_Version::has_ExecuteExtensions()) {
4590 z_exrl(Z_R1, MVC_template);
4591 } else {
4592 z_ex(tmp1_reg, 0, Z_R0, Z_R1);
4593 }
4594
4595 bind(done);
4596
4597 BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint");
4598
4599 int block_end = offset();
4600 return block_end - block_start;
4601 }
4602
4603 #ifdef COMPILER2
4604 //------------------------------------------------------
4605 // Special String Intrinsics. Implementation
4606 //------------------------------------------------------
4607
4608 // Intrinsics for CompactStrings
4609
4610 // Compress char[] to byte[].
4611 // Restores: src, dst
4612 // Uses: cnt
4613 // Kills: tmp, Z_R0, Z_R1.
4614 // Early clobber: result.
4615 // Note:
4616 // cnt is signed int. Do not rely on high word!
4617 // counts # characters, not bytes.
4618 // The result is the number of characters copied before the first incompatible character was found.
4619 // If precise is true, the processing stops exactly at this point. Otherwise, the result may be off
4620 // by a few bytes. The result always indicates the number of copied characters.
4621 // When used as a character index, the returned value points to the first incompatible character.
4622 //
4623 // Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
4624 // - Different number of characters may have been written to dead array (if precise is false).
4625 // - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
string_compress(Register result,Register src,Register dst,Register cnt,Register tmp,bool precise)4626 unsigned int MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt,
4627 Register tmp, bool precise) {
4628 assert_different_registers(Z_R0, Z_R1, result, src, dst, cnt, tmp);
4629
4630 if (precise) {
4631 BLOCK_COMMENT("encode_iso_array {");
4632 } else {
4633 BLOCK_COMMENT("string_compress {");
4634 }
4635 int block_start = offset();
4636
4637 Register Rsrc = src;
4638 Register Rdst = dst;
4639 Register Rix = tmp;
4640 Register Rcnt = cnt;
4641 Register Rmask = result; // holds incompatibility check mask until result value is stored.
4642 Label ScalarShortcut, AllDone;
4643
4644 z_iilf(Rmask, 0xFF00FF00);
4645 z_iihf(Rmask, 0xFF00FF00);
4646
4647 #if 0 // Sacrifice shortcuts for code compactness
4648 {
4649 //---< shortcuts for short strings (very frequent) >---
4650 // Strings with 4 and 8 characters were fond to occur very frequently.
4651 // Therefore, we handle them right away with minimal overhead.
4652 Label skipShortcut, skip4Shortcut, skip8Shortcut;
4653 Register Rout = Z_R0;
4654 z_chi(Rcnt, 4);
4655 z_brne(skip4Shortcut); // 4 characters are very frequent
4656 z_lg(Z_R0, 0, Rsrc); // Treat exactly 4 characters specially.
4657 if (VM_Version::has_DistinctOpnds()) {
4658 Rout = Z_R0;
4659 z_ngrk(Rix, Z_R0, Rmask);
4660 } else {
4661 Rout = Rix;
4662 z_lgr(Rix, Z_R0);
4663 z_ngr(Z_R0, Rmask);
4664 }
4665 z_brnz(skipShortcut);
4666 z_stcmh(Rout, 5, 0, Rdst);
4667 z_stcm(Rout, 5, 2, Rdst);
4668 z_lgfr(result, Rcnt);
4669 z_bru(AllDone);
4670 bind(skip4Shortcut);
4671
4672 z_chi(Rcnt, 8);
4673 z_brne(skip8Shortcut); // There's more to do...
4674 z_lmg(Z_R0, Z_R1, 0, Rsrc); // Treat exactly 8 characters specially.
4675 if (VM_Version::has_DistinctOpnds()) {
4676 Rout = Z_R0;
4677 z_ogrk(Rix, Z_R0, Z_R1);
4678 z_ngr(Rix, Rmask);
4679 } else {
4680 Rout = Rix;
4681 z_lgr(Rix, Z_R0);
4682 z_ogr(Z_R0, Z_R1);
4683 z_ngr(Z_R0, Rmask);
4684 }
4685 z_brnz(skipShortcut);
4686 z_stcmh(Rout, 5, 0, Rdst);
4687 z_stcm(Rout, 5, 2, Rdst);
4688 z_stcmh(Z_R1, 5, 4, Rdst);
4689 z_stcm(Z_R1, 5, 6, Rdst);
4690 z_lgfr(result, Rcnt);
4691 z_bru(AllDone);
4692
4693 bind(skip8Shortcut);
4694 clear_reg(Z_R0, true, false); // #characters already processed (none). Precond for scalar loop.
4695 z_brl(ScalarShortcut); // Just a few characters
4696
4697 bind(skipShortcut);
4698 }
4699 #endif
4700 clear_reg(Z_R0); // make sure register is properly initialized.
4701
4702 if (VM_Version::has_VectorFacility()) {
4703 const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
4704 // Otherwise just do nothing in vector mode.
4705 // Must be multiple of 2*(vector register length in chars (8 HW = 128 bits)).
4706 const int log_min_vcnt = exact_log2(min_vcnt);
4707 Label VectorLoop, VectorDone, VectorBreak;
4708
4709 VectorRegister Vtmp1 = Z_V16;
4710 VectorRegister Vtmp2 = Z_V17;
4711 VectorRegister Vmask = Z_V18;
4712 VectorRegister Vzero = Z_V19;
4713 VectorRegister Vsrc_first = Z_V20;
4714 VectorRegister Vsrc_last = Z_V23;
4715
4716 assert((Vsrc_last->encoding() - Vsrc_first->encoding() + 1) == min_vcnt/8, "logic error");
4717 assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
4718 z_srak(Rix, Rcnt, log_min_vcnt); // # vector loop iterations
4719 z_brz(VectorDone); // not enough data for vector loop
4720
4721 z_vzero(Vzero); // all zeroes
4722 z_vgmh(Vmask, 0, 7); // generate 0xff00 mask for all 2-byte elements
4723 z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop
4724
4725 bind(VectorLoop);
4726 z_vlm(Vsrc_first, Vsrc_last, 0, Rsrc);
4727 add2reg(Rsrc, min_vcnt*2);
4728
4729 //---< check for incompatible character >---
4730 z_vo(Vtmp1, Z_V20, Z_V21);
4731 z_vo(Vtmp2, Z_V22, Z_V23);
4732 z_vo(Vtmp1, Vtmp1, Vtmp2);
4733 z_vn(Vtmp1, Vtmp1, Vmask);
4734 z_vceqhs(Vtmp1, Vtmp1, Vzero); // high half of all chars must be zero for successful compress.
4735 z_bvnt(VectorBreak); // break vector loop if not all vector elements compare eq -> incompatible character found.
4736 // re-process data from current iteration in break handler.
4737
4738 //---< pack & store characters >---
4739 z_vpkh(Vtmp1, Z_V20, Z_V21); // pack (src1, src2) -> tmp1
4740 z_vpkh(Vtmp2, Z_V22, Z_V23); // pack (src3, src4) -> tmp2
4741 z_vstm(Vtmp1, Vtmp2, 0, Rdst); // store packed string
4742 add2reg(Rdst, min_vcnt);
4743
4744 z_brct(Rix, VectorLoop);
4745
4746 z_bru(VectorDone);
4747
4748 bind(VectorBreak);
4749 add2reg(Rsrc, -min_vcnt*2); // Fix Rsrc. Rsrc was already updated, but Rdst and Rix are not.
4750 z_sll(Rix, log_min_vcnt); // # chars processed so far in VectorLoop, excl. current iteration.
4751 z_sr(Z_R0, Rix); // correct # chars processed in total.
4752
4753 bind(VectorDone);
4754 }
4755
4756 {
4757 const int min_cnt = 8; // Minimum #characters required to use unrolled loop.
4758 // Otherwise just do nothing in unrolled loop.
4759 // Must be multiple of 8.
4760 const int log_min_cnt = exact_log2(min_cnt);
4761 Label UnrolledLoop, UnrolledDone, UnrolledBreak;
4762
4763 if (VM_Version::has_DistinctOpnds()) {
4764 z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop
4765 } else {
4766 z_lr(Rix, Rcnt);
4767 z_sr(Rix, Z_R0);
4768 }
4769 z_sra(Rix, log_min_cnt); // unrolled loop count
4770 z_brz(UnrolledDone);
4771
4772 bind(UnrolledLoop);
4773 z_lmg(Z_R0, Z_R1, 0, Rsrc);
4774 if (precise) {
4775 z_ogr(Z_R1, Z_R0); // check all 8 chars for incompatibility
4776 z_ngr(Z_R1, Rmask);
4777 z_brnz(UnrolledBreak);
4778
4779 z_lg(Z_R1, 8, Rsrc); // reload destroyed register
4780 z_stcmh(Z_R0, 5, 0, Rdst);
4781 z_stcm(Z_R0, 5, 2, Rdst);
4782 } else {
4783 z_stcmh(Z_R0, 5, 0, Rdst);
4784 z_stcm(Z_R0, 5, 2, Rdst);
4785
4786 z_ogr(Z_R0, Z_R1);
4787 z_ngr(Z_R0, Rmask);
4788 z_brnz(UnrolledBreak);
4789 }
4790 z_stcmh(Z_R1, 5, 4, Rdst);
4791 z_stcm(Z_R1, 5, 6, Rdst);
4792
4793 add2reg(Rsrc, min_cnt*2);
4794 add2reg(Rdst, min_cnt);
4795 z_brct(Rix, UnrolledLoop);
4796
4797 z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop.
4798 z_nilf(Z_R0, ~(min_cnt-1));
4799 z_tmll(Rcnt, min_cnt-1);
4800 z_brnaz(ScalarShortcut); // if all bits zero, there is nothing left to do for scalar loop.
4801 // Rix == 0 in all cases.
4802 z_sllg(Z_R1, Rcnt, 1); // # src bytes already processed. Only lower 32 bits are valid!
4803 // Z_R1 contents must be treated as unsigned operand! For huge strings,
4804 // (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
4805 z_lgfr(result, Rcnt); // all characters processed.
4806 z_slgfr(Rdst, Rcnt); // restore ptr
4807 z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore
4808 z_bru(AllDone);
4809
4810 bind(UnrolledBreak);
4811 z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop
4812 z_nilf(Z_R0, ~(min_cnt-1));
4813 z_sll(Rix, log_min_cnt); // # chars not yet processed in UnrolledLoop (due to break), broken iteration not included.
4814 z_sr(Z_R0, Rix); // fix # chars processed OK so far.
4815 if (!precise) {
4816 z_lgfr(result, Z_R0);
4817 z_sllg(Z_R1, Z_R0, 1); // # src bytes already processed. Only lower 32 bits are valid!
4818 // Z_R1 contents must be treated as unsigned operand! For huge strings,
4819 // (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
4820 z_aghi(result, min_cnt/2); // min_cnt/2 characters have already been written
4821 // but ptrs were not updated yet.
4822 z_slgfr(Rdst, Z_R0); // restore ptr
4823 z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore
4824 z_bru(AllDone);
4825 }
4826 bind(UnrolledDone);
4827 }
4828
4829 {
4830 Label ScalarLoop, ScalarDone, ScalarBreak;
4831
4832 bind(ScalarShortcut);
4833 z_ltgfr(result, Rcnt);
4834 z_brz(AllDone);
4835
4836 #if 0 // Sacrifice shortcuts for code compactness
4837 {
4838 //---< Special treatment for very short strings (one or two characters) >---
4839 // For these strings, we are sure that the above code was skipped.
4840 // Thus, no registers were modified, register restore is not required.
4841 Label ScalarDoit, Scalar2Char;
4842 z_chi(Rcnt, 2);
4843 z_brh(ScalarDoit);
4844 z_llh(Z_R1, 0, Z_R0, Rsrc);
4845 z_bre(Scalar2Char);
4846 z_tmll(Z_R1, 0xff00);
4847 z_lghi(result, 0); // cnt == 1, first char invalid, no chars successfully processed
4848 z_brnaz(AllDone);
4849 z_stc(Z_R1, 0, Z_R0, Rdst);
4850 z_lghi(result, 1);
4851 z_bru(AllDone);
4852
4853 bind(Scalar2Char);
4854 z_llh(Z_R0, 2, Z_R0, Rsrc);
4855 z_tmll(Z_R1, 0xff00);
4856 z_lghi(result, 0); // cnt == 2, first char invalid, no chars successfully processed
4857 z_brnaz(AllDone);
4858 z_stc(Z_R1, 0, Z_R0, Rdst);
4859 z_tmll(Z_R0, 0xff00);
4860 z_lghi(result, 1); // cnt == 2, second char invalid, one char successfully processed
4861 z_brnaz(AllDone);
4862 z_stc(Z_R0, 1, Z_R0, Rdst);
4863 z_lghi(result, 2);
4864 z_bru(AllDone);
4865
4866 bind(ScalarDoit);
4867 }
4868 #endif
4869
4870 if (VM_Version::has_DistinctOpnds()) {
4871 z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop
4872 } else {
4873 z_lr(Rix, Rcnt);
4874 z_sr(Rix, Z_R0);
4875 }
4876 z_lgfr(result, Rcnt); // # processed characters (if all runs ok).
4877 z_brz(ScalarDone); // uses CC from Rix calculation
4878
4879 bind(ScalarLoop);
4880 z_llh(Z_R1, 0, Z_R0, Rsrc);
4881 z_tmll(Z_R1, 0xff00);
4882 z_brnaz(ScalarBreak);
4883 z_stc(Z_R1, 0, Z_R0, Rdst);
4884 add2reg(Rsrc, 2);
4885 add2reg(Rdst, 1);
4886 z_brct(Rix, ScalarLoop);
4887
4888 z_bru(ScalarDone);
4889
4890 bind(ScalarBreak);
4891 z_sr(result, Rix);
4892
4893 bind(ScalarDone);
4894 z_sgfr(Rdst, result); // restore ptr
4895 z_sgfr(Rsrc, result); // restore ptr, double the element count for Rsrc restore
4896 z_sgfr(Rsrc, result);
4897 }
4898 bind(AllDone);
4899
4900 if (precise) {
4901 BLOCK_COMMENT("} encode_iso_array");
4902 } else {
4903 BLOCK_COMMENT("} string_compress");
4904 }
4905 return offset() - block_start;
4906 }
4907
4908 // Inflate byte[] to char[].
string_inflate_trot(Register src,Register dst,Register cnt,Register tmp)4909 unsigned int MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) {
4910 int block_start = offset();
4911
4912 BLOCK_COMMENT("string_inflate {");
4913
4914 Register stop_char = Z_R0;
4915 Register table = Z_R1;
4916 Register src_addr = tmp;
4917
4918 assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt);
4919 assert(dst->encoding()%2 == 0, "must be even reg");
4920 assert(cnt->encoding()%2 == 1, "must be odd reg");
4921 assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair");
4922
4923 StubRoutines::zarch::generate_load_trot_table_addr(this, table); // kills Z_R0 (if ASSERT)
4924 clear_reg(stop_char); // Stop character. Not used here, but initialized to have a defined value.
4925 lgr_if_needed(src_addr, src);
4926 z_llgfr(cnt, cnt); // # src characters, must be a positive simm32.
4927
4928 translate_ot(dst, src_addr, /* mask = */ 0x0001);
4929
4930 BLOCK_COMMENT("} string_inflate");
4931
4932 return offset() - block_start;
4933 }
4934
4935 // Inflate byte[] to char[].
4936 // Restores: src, dst
4937 // Uses: cnt
4938 // Kills: tmp, Z_R0, Z_R1.
4939 // Note:
4940 // cnt is signed int. Do not rely on high word!
4941 // counts # characters, not bytes.
string_inflate(Register src,Register dst,Register cnt,Register tmp)4942 unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
4943 assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp);
4944
4945 BLOCK_COMMENT("string_inflate {");
4946 int block_start = offset();
4947
4948 Register Rcnt = cnt; // # characters (src: bytes, dst: char (2-byte)), remaining after current loop.
4949 Register Rix = tmp; // loop index
4950 Register Rsrc = src; // addr(src array)
4951 Register Rdst = dst; // addr(dst array)
4952 Label ScalarShortcut, AllDone;
4953
4954 #if 0 // Sacrifice shortcuts for code compactness
4955 {
4956 //---< shortcuts for short strings (very frequent) >---
4957 Label skipShortcut, skip4Shortcut;
4958 z_ltr(Rcnt, Rcnt); // absolutely nothing to do for strings of len == 0.
4959 z_brz(AllDone);
4960 clear_reg(Z_R0); // make sure registers are properly initialized.
4961 clear_reg(Z_R1);
4962 z_chi(Rcnt, 4);
4963 z_brne(skip4Shortcut); // 4 characters are very frequent
4964 z_icm(Z_R0, 5, 0, Rsrc); // Treat exactly 4 characters specially.
4965 z_icm(Z_R1, 5, 2, Rsrc);
4966 z_stm(Z_R0, Z_R1, 0, Rdst);
4967 z_bru(AllDone);
4968 bind(skip4Shortcut);
4969
4970 z_chi(Rcnt, 8);
4971 z_brh(skipShortcut); // There's a lot to do...
4972 z_lgfr(Z_R0, Rcnt); // remaining #characters (<= 8). Precond for scalar loop.
4973 // This does not destroy the "register cleared" state of Z_R0.
4974 z_brl(ScalarShortcut); // Just a few characters
4975 z_icmh(Z_R0, 5, 0, Rsrc); // Treat exactly 8 characters specially.
4976 z_icmh(Z_R1, 5, 4, Rsrc);
4977 z_icm(Z_R0, 5, 2, Rsrc);
4978 z_icm(Z_R1, 5, 6, Rsrc);
4979 z_stmg(Z_R0, Z_R1, 0, Rdst);
4980 z_bru(AllDone);
4981 bind(skipShortcut);
4982 }
4983 #endif
4984 clear_reg(Z_R0); // make sure register is properly initialized.
4985
4986 if (VM_Version::has_VectorFacility()) {
4987 const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
4988 // Otherwise just do nothing in vector mode.
4989 // Must be multiple of vector register length (16 bytes = 128 bits).
4990 const int log_min_vcnt = exact_log2(min_vcnt);
4991 Label VectorLoop, VectorDone;
4992
4993 assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
4994 z_srak(Rix, Rcnt, log_min_vcnt); // calculate # vector loop iterations
4995 z_brz(VectorDone); // skip if none
4996
4997 z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop
4998
4999 bind(VectorLoop);
5000 z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte)
5001 add2reg(Rsrc, min_vcnt);
5002
5003 z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)
5004 z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)
5005 z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high)
5006 z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low)
5007 z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes
5008 add2reg(Rdst, min_vcnt*2);
5009
5010 z_brct(Rix, VectorLoop);
5011
5012 bind(VectorDone);
5013 }
5014
5015 const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop.
5016 // Otherwise just do nothing in unrolled scalar mode.
5017 // Must be multiple of 8.
5018 {
5019 const int log_min_cnt = exact_log2(min_cnt);
5020 Label UnrolledLoop, UnrolledDone;
5021
5022
5023 if (VM_Version::has_DistinctOpnds()) {
5024 z_srk(Rix, Rcnt, Z_R0); // remaining # chars to process in unrolled loop
5025 } else {
5026 z_lr(Rix, Rcnt);
5027 z_sr(Rix, Z_R0);
5028 }
5029 z_sra(Rix, log_min_cnt); // unrolled loop count
5030 z_brz(UnrolledDone);
5031
5032 clear_reg(Z_R0);
5033 clear_reg(Z_R1);
5034
5035 bind(UnrolledLoop);
5036 z_icmh(Z_R0, 5, 0, Rsrc);
5037 z_icmh(Z_R1, 5, 4, Rsrc);
5038 z_icm(Z_R0, 5, 2, Rsrc);
5039 z_icm(Z_R1, 5, 6, Rsrc);
5040 add2reg(Rsrc, min_cnt);
5041
5042 z_stmg(Z_R0, Z_R1, 0, Rdst);
5043
5044 add2reg(Rdst, min_cnt*2);
5045 z_brct(Rix, UnrolledLoop);
5046
5047 bind(UnrolledDone);
5048 z_lgfr(Z_R0, Rcnt); // # chars left over after unrolled loop.
5049 z_nilf(Z_R0, min_cnt-1);
5050 z_brnz(ScalarShortcut); // if zero, there is nothing left to do for scalar loop.
5051 // Rix == 0 in all cases.
5052 z_sgfr(Z_R0, Rcnt); // negative # characters the ptrs have been advanced previously.
5053 z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore.
5054 z_agr(Rdst, Z_R0);
5055 z_agr(Rsrc, Z_R0); // restore ptr.
5056 z_bru(AllDone);
5057 }
5058
5059 {
5060 bind(ScalarShortcut);
5061 // Z_R0 must contain remaining # characters as 64-bit signed int here.
5062 // register contents is preserved over scalar processing (for register fixup).
5063
5064 #if 0 // Sacrifice shortcuts for code compactness
5065 {
5066 Label ScalarDefault;
5067 z_chi(Rcnt, 2);
5068 z_brh(ScalarDefault);
5069 z_llc(Z_R0, 0, Z_R0, Rsrc); // 6 bytes
5070 z_sth(Z_R0, 0, Z_R0, Rdst); // 4 bytes
5071 z_brl(AllDone);
5072 z_llc(Z_R0, 1, Z_R0, Rsrc); // 6 bytes
5073 z_sth(Z_R0, 2, Z_R0, Rdst); // 4 bytes
5074 z_bru(AllDone);
5075 bind(ScalarDefault);
5076 }
5077 #endif
5078
5079 Label CodeTable;
5080 // Some comments on Rix calculation:
5081 // - Rcnt is small, therefore no bits shifted out of low word (sll(g) instructions).
5082 // - high word of both Rix and Rcnt may contain garbage
5083 // - the final lngfr takes care of that garbage, extending the sign to high word
5084 z_sllg(Rix, Z_R0, 2); // calculate 10*Rix = (4*Rix + Rix)*2
5085 z_ar(Rix, Z_R0);
5086 z_larl(Z_R1, CodeTable);
5087 z_sll(Rix, 1);
5088 z_lngfr(Rix, Rix); // ix range: [0..7], after inversion & mult: [-(7*12)..(0*12)].
5089 z_bc(Assembler::bcondAlways, 0, Rix, Z_R1);
5090
5091 z_llc(Z_R1, 6, Z_R0, Rsrc); // 6 bytes
5092 z_sth(Z_R1, 12, Z_R0, Rdst); // 4 bytes
5093
5094 z_llc(Z_R1, 5, Z_R0, Rsrc);
5095 z_sth(Z_R1, 10, Z_R0, Rdst);
5096
5097 z_llc(Z_R1, 4, Z_R0, Rsrc);
5098 z_sth(Z_R1, 8, Z_R0, Rdst);
5099
5100 z_llc(Z_R1, 3, Z_R0, Rsrc);
5101 z_sth(Z_R1, 6, Z_R0, Rdst);
5102
5103 z_llc(Z_R1, 2, Z_R0, Rsrc);
5104 z_sth(Z_R1, 4, Z_R0, Rdst);
5105
5106 z_llc(Z_R1, 1, Z_R0, Rsrc);
5107 z_sth(Z_R1, 2, Z_R0, Rdst);
5108
5109 z_llc(Z_R1, 0, Z_R0, Rsrc);
5110 z_sth(Z_R1, 0, Z_R0, Rdst);
5111 bind(CodeTable);
5112
5113 z_chi(Rcnt, 8); // no fixup for small strings. Rdst, Rsrc were not modified.
5114 z_brl(AllDone);
5115
5116 z_sgfr(Z_R0, Rcnt); // # characters the ptrs have been advanced previously.
5117 z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore.
5118 z_agr(Rdst, Z_R0);
5119 z_agr(Rsrc, Z_R0); // restore ptr.
5120 }
5121 bind(AllDone);
5122
5123 BLOCK_COMMENT("} string_inflate");
5124 return offset() - block_start;
5125 }
5126
5127 // Inflate byte[] to char[], length known at compile time.
5128 // Restores: src, dst
5129 // Kills: tmp, Z_R0, Z_R1.
5130 // Note:
5131 // len is signed int. Counts # characters, not bytes.
string_inflate_const(Register src,Register dst,Register tmp,int len)5132 unsigned int MacroAssembler::string_inflate_const(Register src, Register dst, Register tmp, int len) {
5133 assert_different_registers(Z_R0, Z_R1, src, dst, tmp);
5134
5135 BLOCK_COMMENT("string_inflate_const {");
5136 int block_start = offset();
5137
5138 Register Rix = tmp; // loop index
5139 Register Rsrc = src; // addr(src array)
5140 Register Rdst = dst; // addr(dst array)
5141 Label ScalarShortcut, AllDone;
5142 int nprocessed = 0;
5143 int src_off = 0; // compensate for saved (optimized away) ptr advancement.
5144 int dst_off = 0; // compensate for saved (optimized away) ptr advancement.
5145 bool restore_inputs = false;
5146 bool workreg_clear = false;
5147
5148 if ((len >= 32) && VM_Version::has_VectorFacility()) {
5149 const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
5150 // Otherwise just do nothing in vector mode.
5151 // Must be multiple of vector register length (16 bytes = 128 bits).
5152 const int log_min_vcnt = exact_log2(min_vcnt);
5153 const int iterations = (len - nprocessed) >> log_min_vcnt;
5154 nprocessed += iterations << log_min_vcnt;
5155 Label VectorLoop;
5156
5157 if (iterations == 1) {
5158 z_vlm(Z_V20, Z_V21, 0+src_off, Rsrc); // get next 32 characters (single-byte)
5159 z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)
5160 z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)
5161 z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high)
5162 z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low)
5163 z_vstm(Z_V22, Z_V25, 0+dst_off, Rdst); // store next 32 bytes
5164
5165 src_off += min_vcnt;
5166 dst_off += min_vcnt*2;
5167 } else {
5168 restore_inputs = true;
5169
5170 z_lgfi(Rix, len>>log_min_vcnt);
5171 bind(VectorLoop);
5172 z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte)
5173 add2reg(Rsrc, min_vcnt);
5174
5175 z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)
5176 z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)
5177 z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high)
5178 z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low)
5179 z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes
5180 add2reg(Rdst, min_vcnt*2);
5181
5182 z_brct(Rix, VectorLoop);
5183 }
5184 }
5185
5186 if (((len-nprocessed) >= 16) && VM_Version::has_VectorFacility()) {
5187 const int min_vcnt = 16; // Minimum #characters required to use vector instructions.
5188 // Otherwise just do nothing in vector mode.
5189 // Must be multiple of vector register length (16 bytes = 128 bits).
5190 const int log_min_vcnt = exact_log2(min_vcnt);
5191 const int iterations = (len - nprocessed) >> log_min_vcnt;
5192 nprocessed += iterations << log_min_vcnt;
5193 assert(iterations == 1, "must be!");
5194
5195 z_vl(Z_V20, 0+src_off, Z_R0, Rsrc); // get next 16 characters (single-byte)
5196 z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)
5197 z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)
5198 z_vstm(Z_V22, Z_V23, 0+dst_off, Rdst); // store next 32 bytes
5199
5200 src_off += min_vcnt;
5201 dst_off += min_vcnt*2;
5202 }
5203
5204 if ((len-nprocessed) > 8) {
5205 const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop.
5206 // Otherwise just do nothing in unrolled scalar mode.
5207 // Must be multiple of 8.
5208 const int log_min_cnt = exact_log2(min_cnt);
5209 const int iterations = (len - nprocessed) >> log_min_cnt;
5210 nprocessed += iterations << log_min_cnt;
5211
5212 //---< avoid loop overhead/ptr increment for small # iterations >---
5213 if (iterations <= 2) {
5214 clear_reg(Z_R0);
5215 clear_reg(Z_R1);
5216 workreg_clear = true;
5217
5218 z_icmh(Z_R0, 5, 0+src_off, Rsrc);
5219 z_icmh(Z_R1, 5, 4+src_off, Rsrc);
5220 z_icm(Z_R0, 5, 2+src_off, Rsrc);
5221 z_icm(Z_R1, 5, 6+src_off, Rsrc);
5222 z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
5223
5224 src_off += min_cnt;
5225 dst_off += min_cnt*2;
5226 }
5227
5228 if (iterations == 2) {
5229 z_icmh(Z_R0, 5, 0+src_off, Rsrc);
5230 z_icmh(Z_R1, 5, 4+src_off, Rsrc);
5231 z_icm(Z_R0, 5, 2+src_off, Rsrc);
5232 z_icm(Z_R1, 5, 6+src_off, Rsrc);
5233 z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
5234
5235 src_off += min_cnt;
5236 dst_off += min_cnt*2;
5237 }
5238
5239 if (iterations > 2) {
5240 Label UnrolledLoop;
5241 restore_inputs = true;
5242
5243 clear_reg(Z_R0);
5244 clear_reg(Z_R1);
5245 workreg_clear = true;
5246
5247 z_lgfi(Rix, iterations);
5248 bind(UnrolledLoop);
5249 z_icmh(Z_R0, 5, 0, Rsrc);
5250 z_icmh(Z_R1, 5, 4, Rsrc);
5251 z_icm(Z_R0, 5, 2, Rsrc);
5252 z_icm(Z_R1, 5, 6, Rsrc);
5253 add2reg(Rsrc, min_cnt);
5254
5255 z_stmg(Z_R0, Z_R1, 0, Rdst);
5256 add2reg(Rdst, min_cnt*2);
5257
5258 z_brct(Rix, UnrolledLoop);
5259 }
5260 }
5261
5262 if ((len-nprocessed) > 0) {
5263 switch (len-nprocessed) {
5264 case 8:
5265 if (!workreg_clear) {
5266 clear_reg(Z_R0);
5267 clear_reg(Z_R1);
5268 }
5269 z_icmh(Z_R0, 5, 0+src_off, Rsrc);
5270 z_icmh(Z_R1, 5, 4+src_off, Rsrc);
5271 z_icm(Z_R0, 5, 2+src_off, Rsrc);
5272 z_icm(Z_R1, 5, 6+src_off, Rsrc);
5273 z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
5274 break;
5275 case 7:
5276 if (!workreg_clear) {
5277 clear_reg(Z_R0);
5278 clear_reg(Z_R1);
5279 }
5280 clear_reg(Rix);
5281 z_icm(Z_R0, 5, 0+src_off, Rsrc);
5282 z_icm(Z_R1, 5, 2+src_off, Rsrc);
5283 z_icm(Rix, 5, 4+src_off, Rsrc);
5284 z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5285 z_llc(Z_R0, 6+src_off, Z_R0, Rsrc);
5286 z_st(Rix, 8+dst_off, Z_R0, Rdst);
5287 z_sth(Z_R0, 12+dst_off, Z_R0, Rdst);
5288 break;
5289 case 6:
5290 if (!workreg_clear) {
5291 clear_reg(Z_R0);
5292 clear_reg(Z_R1);
5293 }
5294 clear_reg(Rix);
5295 z_icm(Z_R0, 5, 0+src_off, Rsrc);
5296 z_icm(Z_R1, 5, 2+src_off, Rsrc);
5297 z_icm(Rix, 5, 4+src_off, Rsrc);
5298 z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5299 z_st(Rix, 8+dst_off, Z_R0, Rdst);
5300 break;
5301 case 5:
5302 if (!workreg_clear) {
5303 clear_reg(Z_R0);
5304 clear_reg(Z_R1);
5305 }
5306 z_icm(Z_R0, 5, 0+src_off, Rsrc);
5307 z_icm(Z_R1, 5, 2+src_off, Rsrc);
5308 z_llc(Rix, 4+src_off, Z_R0, Rsrc);
5309 z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5310 z_sth(Rix, 8+dst_off, Z_R0, Rdst);
5311 break;
5312 case 4:
5313 if (!workreg_clear) {
5314 clear_reg(Z_R0);
5315 clear_reg(Z_R1);
5316 }
5317 z_icm(Z_R0, 5, 0+src_off, Rsrc);
5318 z_icm(Z_R1, 5, 2+src_off, Rsrc);
5319 z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
5320 break;
5321 case 3:
5322 if (!workreg_clear) {
5323 clear_reg(Z_R0);
5324 }
5325 z_llc(Z_R1, 2+src_off, Z_R0, Rsrc);
5326 z_icm(Z_R0, 5, 0+src_off, Rsrc);
5327 z_sth(Z_R1, 4+dst_off, Z_R0, Rdst);
5328 z_st(Z_R0, 0+dst_off, Rdst);
5329 break;
5330 case 2:
5331 z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);
5332 z_llc(Z_R1, 1+src_off, Z_R0, Rsrc);
5333 z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);
5334 z_sth(Z_R1, 2+dst_off, Z_R0, Rdst);
5335 break;
5336 case 1:
5337 z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);
5338 z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);
5339 break;
5340 default:
5341 guarantee(false, "Impossible");
5342 break;
5343 }
5344 src_off += len-nprocessed;
5345 dst_off += (len-nprocessed)*2;
5346 nprocessed = len;
5347 }
5348
5349 //---< restore modified input registers >---
5350 if ((nprocessed > 0) && restore_inputs) {
5351 z_agfi(Rsrc, -(nprocessed-src_off));
5352 if (nprocessed < 1000000000) { // avoid int overflow
5353 z_agfi(Rdst, -(nprocessed*2-dst_off));
5354 } else {
5355 z_agfi(Rdst, -(nprocessed-dst_off));
5356 z_agfi(Rdst, -nprocessed);
5357 }
5358 }
5359
5360 BLOCK_COMMENT("} string_inflate_const");
5361 return offset() - block_start;
5362 }
5363
5364 // Kills src.
has_negatives(Register result,Register src,Register cnt,Register odd_reg,Register even_reg,Register tmp)5365 unsigned int MacroAssembler::has_negatives(Register result, Register src, Register cnt,
5366 Register odd_reg, Register even_reg, Register tmp) {
5367 int block_start = offset();
5368 Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone;
5369 const Register addr = src, mask = tmp;
5370
5371 BLOCK_COMMENT("has_negatives {");
5372
5373 z_llgfr(Z_R1, cnt); // Number of bytes to read. (Must be a positive simm32.)
5374 z_llilf(mask, 0x80808080);
5375 z_lhi(result, 1); // Assume true.
5376 // Last possible addr for fast loop.
5377 z_lay(odd_reg, -16, Z_R1, src);
5378 z_chi(cnt, 16);
5379 z_brl(Lslow);
5380
5381 // ind1: index, even_reg: index increment, odd_reg: index limit
5382 z_iihf(mask, 0x80808080);
5383 z_lghi(even_reg, 16);
5384
5385 bind(Lloop1); // 16 bytes per iteration.
5386 z_lg(Z_R0, Address(addr));
5387 z_lg(Z_R1, Address(addr, 8));
5388 z_ogr(Z_R0, Z_R1);
5389 z_ngr(Z_R0, mask);
5390 z_brne(Ldone); // If found return 1.
5391 z_brxlg(addr, even_reg, Lloop1);
5392
5393 bind(Lslow);
5394 z_aghi(odd_reg, 16-1); // Last possible addr for slow loop.
5395 z_lghi(even_reg, 1);
5396 z_cgr(addr, odd_reg);
5397 z_brh(Lnotfound);
5398
5399 bind(Lloop2); // 1 byte per iteration.
5400 z_cli(Address(addr), 0x80);
5401 z_brnl(Ldone); // If found return 1.
5402 z_brxlg(addr, even_reg, Lloop2);
5403
5404 bind(Lnotfound);
5405 z_lhi(result, 0);
5406
5407 bind(Ldone);
5408
5409 BLOCK_COMMENT("} has_negatives");
5410
5411 return offset() - block_start;
5412 }
5413
5414 // kill: cnt1, cnt2, odd_reg, even_reg; early clobber: result
string_compare(Register str1,Register str2,Register cnt1,Register cnt2,Register odd_reg,Register even_reg,Register result,int ae)5415 unsigned int MacroAssembler::string_compare(Register str1, Register str2,
5416 Register cnt1, Register cnt2,
5417 Register odd_reg, Register even_reg, Register result, int ae) {
5418 int block_start = offset();
5419
5420 assert_different_registers(str1, cnt1, cnt2, odd_reg, even_reg, result);
5421 assert_different_registers(str2, cnt1, cnt2, odd_reg, even_reg, result);
5422
5423 // If strings are equal up to min length, return the length difference.
5424 const Register diff = result, // Pre-set result with length difference.
5425 min = cnt1, // min number of bytes
5426 tmp = cnt2;
5427
5428 // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
5429 // we interchange str1 and str2 in the UL case and negate the result.
5430 // Like this, str1 is always latin1 encoded, except for the UU case.
5431 // In addition, we need 0 (or sign which is 0) extend when using 64 bit register.
5432 const bool used_as_LU = (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL);
5433
5434 BLOCK_COMMENT("string_compare {");
5435
5436 if (used_as_LU) {
5437 z_srl(cnt2, 1);
5438 }
5439
5440 // See if the lengths are different, and calculate min in cnt1.
5441 // Save diff in case we need it for a tie-breaker.
5442
5443 // diff = cnt1 - cnt2
5444 if (VM_Version::has_DistinctOpnds()) {
5445 z_srk(diff, cnt1, cnt2);
5446 } else {
5447 z_lr(diff, cnt1);
5448 z_sr(diff, cnt2);
5449 }
5450 if (str1 != str2) {
5451 if (VM_Version::has_LoadStoreConditional()) {
5452 z_locr(min, cnt2, Assembler::bcondHigh);
5453 } else {
5454 Label Lskip;
5455 z_brl(Lskip); // min ok if cnt1 < cnt2
5456 z_lr(min, cnt2); // min = cnt2
5457 bind(Lskip);
5458 }
5459 }
5460
5461 if (ae == StrIntrinsicNode::UU) {
5462 z_sra(diff, 1);
5463 }
5464 if (str1 != str2) {
5465 Label Ldone;
5466 if (used_as_LU) {
5467 // Loop which searches the first difference character by character.
5468 Label Lloop;
5469 const Register ind1 = Z_R1,
5470 ind2 = min;
5471 int stride1 = 1, stride2 = 2; // See comment above.
5472
5473 // ind1: index, even_reg: index increment, odd_reg: index limit
5474 z_llilf(ind1, (unsigned int)(-stride1));
5475 z_lhi(even_reg, stride1);
5476 add2reg(odd_reg, -stride1, min);
5477 clear_reg(ind2); // kills min
5478
5479 bind(Lloop);
5480 z_brxh(ind1, even_reg, Ldone);
5481 z_llc(tmp, Address(str1, ind1));
5482 z_llh(Z_R0, Address(str2, ind2));
5483 z_ahi(ind2, stride2);
5484 z_sr(tmp, Z_R0);
5485 z_bre(Lloop);
5486
5487 z_lr(result, tmp);
5488
5489 } else {
5490 // Use clcle in fast loop (only for same encoding).
5491 z_lgr(Z_R0, str1);
5492 z_lgr(even_reg, str2);
5493 z_llgfr(Z_R1, min);
5494 z_llgfr(odd_reg, min);
5495
5496 if (ae == StrIntrinsicNode::LL) {
5497 compare_long_ext(Z_R0, even_reg, 0);
5498 } else {
5499 compare_long_uni(Z_R0, even_reg, 0);
5500 }
5501 z_bre(Ldone);
5502 z_lgr(Z_R1, Z_R0);
5503 if (ae == StrIntrinsicNode::LL) {
5504 z_llc(Z_R0, Address(even_reg));
5505 z_llc(result, Address(Z_R1));
5506 } else {
5507 z_llh(Z_R0, Address(even_reg));
5508 z_llh(result, Address(Z_R1));
5509 }
5510 z_sr(result, Z_R0);
5511 }
5512
5513 // Otherwise, return the difference between the first mismatched chars.
5514 bind(Ldone);
5515 }
5516
5517 if (ae == StrIntrinsicNode::UL) {
5518 z_lcr(result, result); // Negate result (see note above).
5519 }
5520
5521 BLOCK_COMMENT("} string_compare");
5522
5523 return offset() - block_start;
5524 }
5525
array_equals(bool is_array_equ,Register ary1,Register ary2,Register limit,Register odd_reg,Register even_reg,Register result,bool is_byte)5526 unsigned int MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
5527 Register odd_reg, Register even_reg, Register result, bool is_byte) {
5528 int block_start = offset();
5529
5530 BLOCK_COMMENT("array_equals {");
5531
5532 assert_different_registers(ary1, limit, odd_reg, even_reg);
5533 assert_different_registers(ary2, limit, odd_reg, even_reg);
5534
5535 Label Ldone, Ldone_true, Ldone_false, Lclcle, CLC_template;
5536 int base_offset = 0;
5537
5538 if (ary1 != ary2) {
5539 if (is_array_equ) {
5540 base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
5541
5542 // Return true if the same array.
5543 compareU64_and_branch(ary1, ary2, Assembler::bcondEqual, Ldone_true);
5544
5545 // Return false if one of them is NULL.
5546 compareU64_and_branch(ary1, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
5547 compareU64_and_branch(ary2, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
5548
5549 // Load the lengths of arrays.
5550 z_llgf(odd_reg, Address(ary1, arrayOopDesc::length_offset_in_bytes()));
5551
5552 // Return false if the two arrays are not equal length.
5553 z_c(odd_reg, Address(ary2, arrayOopDesc::length_offset_in_bytes()));
5554 z_brne(Ldone_false);
5555
5556 // string len in bytes (right operand)
5557 if (!is_byte) {
5558 z_chi(odd_reg, 128);
5559 z_sll(odd_reg, 1); // preserves flags
5560 z_brh(Lclcle);
5561 } else {
5562 compareU32_and_branch(odd_reg, (intptr_t)256, Assembler::bcondHigh, Lclcle);
5563 }
5564 } else {
5565 z_llgfr(odd_reg, limit); // Need to zero-extend prior to using the value.
5566 compareU32_and_branch(limit, (intptr_t)256, Assembler::bcondHigh, Lclcle);
5567 }
5568
5569
5570 // Use clc instruction for up to 256 bytes.
5571 {
5572 Register str1_reg = ary1,
5573 str2_reg = ary2;
5574 if (is_array_equ) {
5575 str1_reg = Z_R1;
5576 str2_reg = even_reg;
5577 add2reg(str1_reg, base_offset, ary1); // string addr (left operand)
5578 add2reg(str2_reg, base_offset, ary2); // string addr (right operand)
5579 }
5580 z_ahi(odd_reg, -1); // Clc uses decremented limit. Also compare result to 0.
5581 z_brl(Ldone_true);
5582 // Note: We could jump to the template if equal.
5583
5584 assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
5585 z_exrl(odd_reg, CLC_template);
5586 z_bre(Ldone_true);
5587 // fall through
5588
5589 bind(Ldone_false);
5590 clear_reg(result);
5591 z_bru(Ldone);
5592
5593 bind(CLC_template);
5594 z_clc(0, 0, str1_reg, 0, str2_reg);
5595 }
5596
5597 // Use clcle instruction.
5598 {
5599 bind(Lclcle);
5600 add2reg(even_reg, base_offset, ary2); // string addr (right operand)
5601 add2reg(Z_R0, base_offset, ary1); // string addr (left operand)
5602
5603 z_lgr(Z_R1, odd_reg); // string len in bytes (left operand)
5604 if (is_byte) {
5605 compare_long_ext(Z_R0, even_reg, 0);
5606 } else {
5607 compare_long_uni(Z_R0, even_reg, 0);
5608 }
5609 z_lghi(result, 0); // Preserve flags.
5610 z_brne(Ldone);
5611 }
5612 }
5613 // fall through
5614
5615 bind(Ldone_true);
5616 z_lghi(result, 1); // All characters are equal.
5617 bind(Ldone);
5618
5619 BLOCK_COMMENT("} array_equals");
5620
5621 return offset() - block_start;
5622 }
5623
5624 // kill: haycnt, needlecnt, odd_reg, even_reg; early clobber: result
string_indexof(Register result,Register haystack,Register haycnt,Register needle,Register needlecnt,int needlecntval,Register odd_reg,Register even_reg,int ae)5625 unsigned int MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
5626 Register needle, Register needlecnt, int needlecntval,
5627 Register odd_reg, Register even_reg, int ae) {
5628 int block_start = offset();
5629
5630 // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
5631 assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
5632 const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
5633 const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
5634 Label L_needle1, L_Found, L_NotFound;
5635
5636 BLOCK_COMMENT("string_indexof {");
5637
5638 if (needle == haystack) {
5639 z_lhi(result, 0);
5640 } else {
5641
5642 // Load first character of needle (R0 used by search_string instructions).
5643 if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); }
5644
5645 // Compute last haystack addr to use if no match gets found.
5646 if (needlecnt != noreg) { // variable needlecnt
5647 z_ahi(needlecnt, -1); // Remaining characters after first one.
5648 z_sr(haycnt, needlecnt); // Compute index succeeding last element to compare.
5649 if (n_csize == 2) { z_sll(needlecnt, 1); } // In bytes.
5650 } else { // constant needlecnt
5651 assert((needlecntval & 0x7fff) == needlecntval, "must be positive simm16 immediate");
5652 // Compute index succeeding last element to compare.
5653 if (needlecntval != 1) { z_ahi(haycnt, 1 - needlecntval); }
5654 }
5655
5656 z_llgfr(haycnt, haycnt); // Clear high half.
5657 z_lgr(result, haystack); // Final result will be computed from needle start pointer.
5658 if (h_csize == 2) { z_sll(haycnt, 1); } // Scale to number of bytes.
5659 z_agr(haycnt, haystack); // Point to address succeeding last element (haystack+scale*(haycnt-needlecnt+1)).
5660
5661 if (h_csize != n_csize) {
5662 assert(ae == StrIntrinsicNode::UL, "Invalid encoding");
5663
5664 if (needlecnt != noreg || needlecntval != 1) {
5665 if (needlecnt != noreg) {
5666 compare32_and_branch(needlecnt, (intptr_t)0, Assembler::bcondEqual, L_needle1);
5667 }
5668
5669 // Main Loop: UL version (now we have at least 2 characters).
5670 Label L_OuterLoop, L_InnerLoop, L_Skip;
5671 bind(L_OuterLoop); // Search for 1st 2 characters.
5672 z_lgr(Z_R1, haycnt);
5673 MacroAssembler::search_string_uni(Z_R1, result);
5674 z_brc(Assembler::bcondNotFound, L_NotFound);
5675 z_lgr(result, Z_R1);
5676
5677 z_lghi(Z_R1, n_csize);
5678 z_lghi(even_reg, h_csize);
5679 bind(L_InnerLoop);
5680 z_llgc(odd_reg, Address(needle, Z_R1));
5681 z_ch(odd_reg, Address(result, even_reg));
5682 z_brne(L_Skip);
5683 if (needlecnt != noreg) { z_cr(Z_R1, needlecnt); } else { z_chi(Z_R1, needlecntval - 1); }
5684 z_brnl(L_Found);
5685 z_aghi(Z_R1, n_csize);
5686 z_aghi(even_reg, h_csize);
5687 z_bru(L_InnerLoop);
5688
5689 bind(L_Skip);
5690 z_aghi(result, h_csize); // This is the new address we want to use for comparing.
5691 z_bru(L_OuterLoop);
5692 }
5693
5694 } else {
5695 const intptr_t needle_bytes = (n_csize == 2) ? ((needlecntval - 1) << 1) : (needlecntval - 1);
5696 Label L_clcle;
5697
5698 if (needlecnt != noreg || (needlecntval != 1 && needle_bytes <= 256)) {
5699 if (needlecnt != noreg) {
5700 compare32_and_branch(needlecnt, 256, Assembler::bcondHigh, L_clcle);
5701 z_ahi(needlecnt, -1); // remaining bytes -1 (for CLC)
5702 z_brl(L_needle1);
5703 }
5704
5705 // Main Loop: clc version (now we have at least 2 characters).
5706 Label L_OuterLoop, CLC_template;
5707 bind(L_OuterLoop); // Search for 1st 2 characters.
5708 z_lgr(Z_R1, haycnt);
5709 if (h_csize == 1) {
5710 MacroAssembler::search_string(Z_R1, result);
5711 } else {
5712 MacroAssembler::search_string_uni(Z_R1, result);
5713 }
5714 z_brc(Assembler::bcondNotFound, L_NotFound);
5715 z_lgr(result, Z_R1);
5716
5717 if (needlecnt != noreg) {
5718 assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
5719 z_exrl(needlecnt, CLC_template);
5720 } else {
5721 z_clc(h_csize, needle_bytes -1, Z_R1, n_csize, needle);
5722 }
5723 z_bre(L_Found);
5724 z_aghi(result, h_csize); // This is the new address we want to use for comparing.
5725 z_bru(L_OuterLoop);
5726
5727 if (needlecnt != noreg) {
5728 bind(CLC_template);
5729 z_clc(h_csize, 0, Z_R1, n_csize, needle);
5730 }
5731 }
5732
5733 if (needlecnt != noreg || needle_bytes > 256) {
5734 bind(L_clcle);
5735
5736 // Main Loop: clcle version (now we have at least 256 bytes).
5737 Label L_OuterLoop, CLC_template;
5738 bind(L_OuterLoop); // Search for 1st 2 characters.
5739 z_lgr(Z_R1, haycnt);
5740 if (h_csize == 1) {
5741 MacroAssembler::search_string(Z_R1, result);
5742 } else {
5743 MacroAssembler::search_string_uni(Z_R1, result);
5744 }
5745 z_brc(Assembler::bcondNotFound, L_NotFound);
5746
5747 add2reg(Z_R0, n_csize, needle);
5748 add2reg(even_reg, h_csize, Z_R1);
5749 z_lgr(result, Z_R1);
5750 if (needlecnt != noreg) {
5751 z_llgfr(Z_R1, needlecnt); // needle len in bytes (left operand)
5752 z_llgfr(odd_reg, needlecnt);
5753 } else {
5754 load_const_optimized(Z_R1, needle_bytes);
5755 if (Immediate::is_simm16(needle_bytes)) { z_lghi(odd_reg, needle_bytes); } else { z_lgr(odd_reg, Z_R1); }
5756 }
5757 if (h_csize == 1) {
5758 compare_long_ext(Z_R0, even_reg, 0);
5759 } else {
5760 compare_long_uni(Z_R0, even_reg, 0);
5761 }
5762 z_bre(L_Found);
5763
5764 if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); } // Reload.
5765 z_aghi(result, h_csize); // This is the new address we want to use for comparing.
5766 z_bru(L_OuterLoop);
5767 }
5768 }
5769
5770 if (needlecnt != noreg || needlecntval == 1) {
5771 bind(L_needle1);
5772
5773 // Single needle character version.
5774 if (h_csize == 1) {
5775 MacroAssembler::search_string(haycnt, result);
5776 } else {
5777 MacroAssembler::search_string_uni(haycnt, result);
5778 }
5779 z_lgr(result, haycnt);
5780 z_brc(Assembler::bcondFound, L_Found);
5781 }
5782
5783 bind(L_NotFound);
5784 add2reg(result, -1, haystack); // Return -1.
5785
5786 bind(L_Found); // Return index (or -1 in fallthrough case).
5787 z_sgr(result, haystack);
5788 if (h_csize == 2) { z_srag(result, result, exact_log2(sizeof(jchar))); }
5789 }
5790 BLOCK_COMMENT("} string_indexof");
5791
5792 return offset() - block_start;
5793 }
5794
5795 // early clobber: result
string_indexof_char(Register result,Register haystack,Register haycnt,Register needle,jchar needleChar,Register odd_reg,Register even_reg,bool is_byte)5796 unsigned int MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
5797 Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte) {
5798 int block_start = offset();
5799
5800 BLOCK_COMMENT("string_indexof_char {");
5801
5802 if (needle == haystack) {
5803 z_lhi(result, 0);
5804 } else {
5805
5806 Label Ldone;
5807
5808 z_llgfr(odd_reg, haycnt); // Preset loop ctr/searchrange end.
5809 if (needle == noreg) {
5810 load_const_optimized(Z_R0, (unsigned long)needleChar);
5811 } else {
5812 if (is_byte) {
5813 z_llgcr(Z_R0, needle); // First (and only) needle char.
5814 } else {
5815 z_llghr(Z_R0, needle); // First (and only) needle char.
5816 }
5817 }
5818
5819 if (!is_byte) {
5820 z_agr(odd_reg, odd_reg); // Calc #bytes to be processed with SRSTU.
5821 }
5822
5823 z_lgr(even_reg, haystack); // haystack addr
5824 z_agr(odd_reg, haystack); // First char after range end.
5825 z_lghi(result, -1);
5826
5827 if (is_byte) {
5828 MacroAssembler::search_string(odd_reg, even_reg);
5829 } else {
5830 MacroAssembler::search_string_uni(odd_reg, even_reg);
5831 }
5832 z_brc(Assembler::bcondNotFound, Ldone);
5833 if (is_byte) {
5834 if (VM_Version::has_DistinctOpnds()) {
5835 z_sgrk(result, odd_reg, haystack);
5836 } else {
5837 z_sgr(odd_reg, haystack);
5838 z_lgr(result, odd_reg);
5839 }
5840 } else {
5841 z_slgr(odd_reg, haystack);
5842 z_srlg(result, odd_reg, exact_log2(sizeof(jchar)));
5843 }
5844
5845 bind(Ldone);
5846 }
5847 BLOCK_COMMENT("} string_indexof_char");
5848
5849 return offset() - block_start;
5850 }
5851 #endif
5852
5853 //-------------------------------------------------
5854 // Constants (scalar and oop) in constant pool
5855 //-------------------------------------------------
5856
5857 // Add a non-relocated constant to the CP.
store_const_in_toc(AddressLiteral & val)5858 int MacroAssembler::store_const_in_toc(AddressLiteral& val) {
5859 long value = val.value();
5860 address tocPos = long_constant(value);
5861
5862 if (tocPos != NULL) {
5863 int tocOffset = (int)(tocPos - code()->consts()->start());
5864 return tocOffset;
5865 }
5866 // Address_constant returned NULL, so no constant entry has been created.
5867 // In that case, we return a "fatal" offset, just in case that subsequently
5868 // generated access code is executed.
5869 return -1;
5870 }
5871
5872 // Returns the TOC offset where the address is stored.
5873 // Add a relocated constant to the CP.
store_oop_in_toc(AddressLiteral & oop)5874 int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) {
5875 // Use RelocationHolder::none for the constant pool entry.
5876 // Otherwise we will end up with a failing NativeCall::verify(x),
5877 // where x is the address of the constant pool entry.
5878 address tocPos = address_constant((address)oop.value(), RelocationHolder::none);
5879
5880 if (tocPos != NULL) {
5881 int tocOffset = (int)(tocPos - code()->consts()->start());
5882 RelocationHolder rsp = oop.rspec();
5883 Relocation *rel = rsp.reloc();
5884
5885 // Store toc_offset in relocation, used by call_far_patchable.
5886 if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) {
5887 ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset);
5888 }
5889 // Relocate at the load's pc.
5890 relocate(rsp);
5891
5892 return tocOffset;
5893 }
5894 // Address_constant returned NULL, so no constant entry has been created
5895 // in that case, we return a "fatal" offset, just in case that subsequently
5896 // generated access code is executed.
5897 return -1;
5898 }
5899
load_const_from_toc(Register dst,AddressLiteral & a,Register Rtoc)5900 bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
5901 int tocOffset = store_const_in_toc(a);
5902 if (tocOffset == -1) return false;
5903 address tocPos = tocOffset + code()->consts()->start();
5904 assert((address)code()->consts()->start() != NULL, "Please add CP address");
5905 relocate(a.rspec());
5906 load_long_pcrelative(dst, tocPos);
5907 return true;
5908 }
5909
load_oop_from_toc(Register dst,AddressLiteral & a,Register Rtoc)5910 bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
5911 int tocOffset = store_oop_in_toc(a);
5912 if (tocOffset == -1) return false;
5913 address tocPos = tocOffset + code()->consts()->start();
5914 assert((address)code()->consts()->start() != NULL, "Please add CP address");
5915
5916 load_addr_pcrelative(dst, tocPos);
5917 return true;
5918 }
5919
5920 // If the instruction sequence at the given pc is a load_const_from_toc
5921 // sequence, return the value currently stored at the referenced position
5922 // in the TOC.
get_const_from_toc(address pc)5923 intptr_t MacroAssembler::get_const_from_toc(address pc) {
5924
5925 assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
5926
5927 long offset = get_load_const_from_toc_offset(pc);
5928 address dataLoc = NULL;
5929 if (is_load_const_from_toc_pcrelative(pc)) {
5930 dataLoc = pc + offset;
5931 } else {
5932 CodeBlob* cb = CodeCache::find_blob_unsafe(pc); // Else we get assertion if nmethod is zombie.
5933 assert(cb && cb->is_nmethod(), "sanity");
5934 nmethod* nm = (nmethod*)cb;
5935 dataLoc = nm->ctable_begin() + offset;
5936 }
5937 return *(intptr_t *)dataLoc;
5938 }
5939
5940 // If the instruction sequence at the given pc is a load_const_from_toc
5941 // sequence, copy the passed-in new_data value into the referenced
5942 // position in the TOC.
set_const_in_toc(address pc,unsigned long new_data,CodeBlob * cb)5943 void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) {
5944 assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
5945
5946 long offset = MacroAssembler::get_load_const_from_toc_offset(pc);
5947 address dataLoc = NULL;
5948 if (is_load_const_from_toc_pcrelative(pc)) {
5949 dataLoc = pc+offset;
5950 } else {
5951 nmethod* nm = CodeCache::find_nmethod(pc);
5952 assert((cb == NULL) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob");
5953 dataLoc = nm->ctable_begin() + offset;
5954 }
5955 if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary.
5956 *(unsigned long *)dataLoc = new_data;
5957 }
5958 }
5959
5960 // Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc
5961 // site. Verify by calling is_load_const_from_toc() before!!
5962 // Offset is +/- 2**32 -> use long.
get_load_const_from_toc_offset(address a)5963 long MacroAssembler::get_load_const_from_toc_offset(address a) {
5964 assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load");
5965 // expected code sequence:
5966 // z_lgrl(t, simm32); len = 6
5967 unsigned long inst;
5968 unsigned int len = get_instruction(a, &inst);
5969 return get_pcrel_offset(inst);
5970 }
5971
5972 //**********************************************************************************
5973 // inspection of generated instruction sequences for a particular pattern
5974 //**********************************************************************************
5975
is_load_const_from_toc_pcrelative(address a)5976 bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) {
5977 #ifdef ASSERT
5978 unsigned long inst;
5979 unsigned int len = get_instruction(a+2, &inst);
5980 if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) {
5981 const int range = 128;
5982 Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl");
5983 VM_Version::z_SIGSEGV();
5984 }
5985 #endif
5986 // expected code sequence:
5987 // z_lgrl(t, relAddr32); len = 6
5988 //TODO: verify accessed data is in CP, if possible.
5989 return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used.
5990 }
5991
is_load_const_from_toc_call(address a)5992 bool MacroAssembler::is_load_const_from_toc_call(address a) {
5993 return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size());
5994 }
5995
is_load_const_call(address a)5996 bool MacroAssembler::is_load_const_call(address a) {
5997 return is_load_const(a) && is_call_byregister(a + load_const_size());
5998 }
5999
6000 //-------------------------------------------------
6001 // Emitters for some really CICS instructions
6002 //-------------------------------------------------
6003
move_long_ext(Register dst,Register src,unsigned int pad)6004 void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) {
6005 assert(dst->encoding()%2==0, "must be an even/odd register pair");
6006 assert(src->encoding()%2==0, "must be an even/odd register pair");
6007 assert(pad<256, "must be a padding BYTE");
6008
6009 Label retry;
6010 bind(retry);
6011 Assembler::z_mvcle(dst, src, pad);
6012 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6013 }
6014
compare_long_ext(Register left,Register right,unsigned int pad)6015 void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) {
6016 assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
6017 assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
6018 assert(pad<256, "must be a padding BYTE");
6019
6020 Label retry;
6021 bind(retry);
6022 Assembler::z_clcle(left, right, pad, Z_R0);
6023 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6024 }
6025
compare_long_uni(Register left,Register right,unsigned int pad)6026 void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) {
6027 assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
6028 assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
6029 assert(pad<=0xfff, "must be a padding HALFWORD");
6030 assert(VM_Version::has_ETF2(), "instruction must be available");
6031
6032 Label retry;
6033 bind(retry);
6034 Assembler::z_clclu(left, right, pad, Z_R0);
6035 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6036 }
6037
search_string(Register end,Register start)6038 void MacroAssembler::search_string(Register end, Register start) {
6039 assert(end->encoding() != 0, "end address must not be in R0");
6040 assert(start->encoding() != 0, "start address must not be in R0");
6041
6042 Label retry;
6043 bind(retry);
6044 Assembler::z_srst(end, start);
6045 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6046 }
6047
search_string_uni(Register end,Register start)6048 void MacroAssembler::search_string_uni(Register end, Register start) {
6049 assert(end->encoding() != 0, "end address must not be in R0");
6050 assert(start->encoding() != 0, "start address must not be in R0");
6051 assert(VM_Version::has_ETF3(), "instruction must be available");
6052
6053 Label retry;
6054 bind(retry);
6055 Assembler::z_srstu(end, start);
6056 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6057 }
6058
kmac(Register srcBuff)6059 void MacroAssembler::kmac(Register srcBuff) {
6060 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
6061 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
6062
6063 Label retry;
6064 bind(retry);
6065 Assembler::z_kmac(Z_R0, srcBuff);
6066 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6067 }
6068
kimd(Register srcBuff)6069 void MacroAssembler::kimd(Register srcBuff) {
6070 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
6071 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
6072
6073 Label retry;
6074 bind(retry);
6075 Assembler::z_kimd(Z_R0, srcBuff);
6076 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6077 }
6078
klmd(Register srcBuff)6079 void MacroAssembler::klmd(Register srcBuff) {
6080 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
6081 assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
6082
6083 Label retry;
6084 bind(retry);
6085 Assembler::z_klmd(Z_R0, srcBuff);
6086 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6087 }
6088
km(Register dstBuff,Register srcBuff)6089 void MacroAssembler::km(Register dstBuff, Register srcBuff) {
6090 // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
6091 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
6092 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
6093 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
6094 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
6095
6096 Label retry;
6097 bind(retry);
6098 Assembler::z_km(dstBuff, srcBuff);
6099 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6100 }
6101
kmc(Register dstBuff,Register srcBuff)6102 void MacroAssembler::kmc(Register dstBuff, Register srcBuff) {
6103 // DstBuff and srcBuff are allowed to be the same register (encryption in-place).
6104 // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
6105 assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
6106 assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
6107 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
6108
6109 Label retry;
6110 bind(retry);
6111 Assembler::z_kmc(dstBuff, srcBuff);
6112 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6113 }
6114
cksm(Register crcBuff,Register srcBuff)6115 void MacroAssembler::cksm(Register crcBuff, Register srcBuff) {
6116 assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
6117
6118 Label retry;
6119 bind(retry);
6120 Assembler::z_cksm(crcBuff, srcBuff);
6121 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6122 }
6123
translate_oo(Register r1,Register r2,uint m3)6124 void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) {
6125 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
6126 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
6127
6128 Label retry;
6129 bind(retry);
6130 Assembler::z_troo(r1, r2, m3);
6131 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6132 }
6133
translate_ot(Register r1,Register r2,uint m3)6134 void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) {
6135 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
6136 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
6137
6138 Label retry;
6139 bind(retry);
6140 Assembler::z_trot(r1, r2, m3);
6141 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6142 }
6143
translate_to(Register r1,Register r2,uint m3)6144 void MacroAssembler::translate_to(Register r1, Register r2, uint m3) {
6145 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
6146 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
6147
6148 Label retry;
6149 bind(retry);
6150 Assembler::z_trto(r1, r2, m3);
6151 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6152 }
6153
translate_tt(Register r1,Register r2,uint m3)6154 void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) {
6155 assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
6156 assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
6157
6158 Label retry;
6159 bind(retry);
6160 Assembler::z_trtt(r1, r2, m3);
6161 Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
6162 }
6163
6164 //---------------------------------------
6165 // Helpers for Intrinsic Emitters
6166 //---------------------------------------
6167
6168 /**
6169 * uint32_t crc;
6170 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
6171 */
fold_byte_crc32(Register crc,Register val,Register table,Register tmp)6172 void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) {
6173 assert_different_registers(crc, table, tmp);
6174 assert_different_registers(val, table);
6175 if (crc == val) { // Must rotate first to use the unmodified value.
6176 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
6177 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits.
6178 } else {
6179 z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits.
6180 rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
6181 }
6182 z_x(crc, Address(table, tmp, 0));
6183 }
6184
6185 /**
6186 * uint32_t crc;
6187 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
6188 */
fold_8bit_crc32(Register crc,Register table,Register tmp)6189 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
6190 fold_byte_crc32(crc, crc, table, tmp);
6191 }
6192
6193 /**
6194 * Emits code to update CRC-32 with a byte value according to constants in table.
6195 *
6196 * @param [in,out]crc Register containing the crc.
6197 * @param [in]val Register containing the byte to fold into the CRC.
6198 * @param [in]table Register containing the table of crc constants.
6199 *
6200 * uint32_t crc;
6201 * val = crc_table[(val ^ crc) & 0xFF];
6202 * crc = val ^ (crc >> 8);
6203 */
update_byte_crc32(Register crc,Register val,Register table)6204 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
6205 z_xr(val, crc);
6206 fold_byte_crc32(crc, val, table, val);
6207 }
6208
6209
6210 /**
6211 * @param crc register containing existing CRC (32-bit)
6212 * @param buf register pointing to input byte buffer (byte*)
6213 * @param len register containing number of bytes
6214 * @param table register pointing to CRC table
6215 */
update_byteLoop_crc32(Register crc,Register buf,Register len,Register table,Register data)6216 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) {
6217 assert_different_registers(crc, buf, len, table, data);
6218
6219 Label L_mainLoop, L_done;
6220 const int mainLoop_stepping = 1;
6221
6222 // Process all bytes in a single-byte loop.
6223 z_ltr(len, len);
6224 z_brnh(L_done);
6225
6226 bind(L_mainLoop);
6227 z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
6228 add2reg(buf, mainLoop_stepping); // Advance buffer position.
6229 update_byte_crc32(crc, data, table);
6230 z_brct(len, L_mainLoop); // Iterate.
6231
6232 bind(L_done);
6233 }
6234
6235 /**
6236 * Emits code to update CRC-32 with a 4-byte value according to constants in table.
6237 * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c.
6238 *
6239 */
update_1word_crc32(Register crc,Register buf,Register table,int bufDisp,int bufInc,Register t0,Register t1,Register t2,Register t3)6240 void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
6241 Register t0, Register t1, Register t2, Register t3) {
6242 // This is what we implement (the DOBIG4 part):
6243 //
6244 // #define DOBIG4 c ^= *++buf4; \
6245 // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
6246 // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
6247 // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
6248 // Pre-calculate (constant) column offsets, use columns 4..7 for big-endian.
6249 const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
6250 const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
6251 const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
6252 const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
6253
6254 // XOR crc with next four bytes of buffer.
6255 lgr_if_needed(t0, crc);
6256 z_x(t0, Address(buf, bufDisp));
6257 if (bufInc != 0) {
6258 add2reg(buf, bufInc);
6259 }
6260
6261 // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices.
6262 rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2
6263 rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2
6264 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2
6265 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2
6266
6267 // XOR indexed table values to calculate updated crc.
6268 z_ly(t2, Address(table, t2, (intptr_t)ix1));
6269 z_ly(t0, Address(table, t0, (intptr_t)ix3));
6270 z_xy(t2, Address(table, t3, (intptr_t)ix0));
6271 z_xy(t0, Address(table, t1, (intptr_t)ix2));
6272 z_xr(t0, t2); // Now t0 contains the updated CRC value.
6273 lgr_if_needed(crc, t0);
6274 }
6275
6276 /**
6277 * @param crc register containing existing CRC (32-bit)
6278 * @param buf register pointing to input byte buffer (byte*)
6279 * @param len register containing number of bytes
6280 * @param table register pointing to CRC table
6281 *
6282 * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
6283 */
kernel_crc32_1word(Register crc,Register buf,Register len,Register table,Register t0,Register t1,Register t2,Register t3,bool invertCRC)6284 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
6285 Register t0, Register t1, Register t2, Register t3,
6286 bool invertCRC) {
6287 assert_different_registers(crc, buf, len, table);
6288
6289 Label L_mainLoop, L_tail;
6290 Register data = t0;
6291 Register ctr = Z_R0;
6292 const int mainLoop_stepping = 4;
6293 const int log_stepping = exact_log2(mainLoop_stepping);
6294
6295 // Don't test for len <= 0 here. This pathological case should not occur anyway.
6296 // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
6297 // The situation itself is detected and handled correctly by the conditional branches
6298 // following aghi(len, -stepping) and aghi(len, +stepping).
6299
6300 if (invertCRC) {
6301 not_(crc, noreg, false); // 1s complement of crc
6302 }
6303
6304 // Check for short (<4 bytes) buffer.
6305 z_srag(ctr, len, log_stepping);
6306 z_brnh(L_tail);
6307
6308 z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
6309 rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
6310
6311 BIND(L_mainLoop);
6312 update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3);
6313 z_brct(ctr, L_mainLoop); // Iterate.
6314
6315 z_lrvr(crc, crc); // Revert byte order back to original.
6316
6317 // Process last few (<8) bytes of buffer.
6318 BIND(L_tail);
6319 update_byteLoop_crc32(crc, buf, len, table, data);
6320
6321 if (invertCRC) {
6322 not_(crc, noreg, false); // 1s complement of crc
6323 }
6324 }
6325
6326 /**
6327 * @param crc register containing existing CRC (32-bit)
6328 * @param buf register pointing to input byte buffer (byte*)
6329 * @param len register containing number of bytes
6330 * @param table register pointing to CRC table
6331 */
kernel_crc32_1byte(Register crc,Register buf,Register len,Register table,Register t0,Register t1,Register t2,Register t3,bool invertCRC)6332 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
6333 Register t0, Register t1, Register t2, Register t3,
6334 bool invertCRC) {
6335 assert_different_registers(crc, buf, len, table);
6336 Register data = t0;
6337
6338 if (invertCRC) {
6339 not_(crc, noreg, false); // 1s complement of crc
6340 }
6341
6342 update_byteLoop_crc32(crc, buf, len, table, data);
6343
6344 if (invertCRC) {
6345 not_(crc, noreg, false); // 1s complement of crc
6346 }
6347 }
6348
kernel_crc32_singleByte(Register crc,Register buf,Register len,Register table,Register tmp,bool invertCRC)6349 void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
6350 bool invertCRC) {
6351 assert_different_registers(crc, buf, len, table, tmp);
6352
6353 if (invertCRC) {
6354 not_(crc, noreg, false); // 1s complement of crc
6355 }
6356
6357 z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
6358 update_byte_crc32(crc, tmp, table);
6359
6360 if (invertCRC) {
6361 not_(crc, noreg, false); // 1s complement of crc
6362 }
6363 }
6364
kernel_crc32_singleByteReg(Register crc,Register val,Register table,bool invertCRC)6365 void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table,
6366 bool invertCRC) {
6367 assert_different_registers(crc, val, table);
6368
6369 if (invertCRC) {
6370 not_(crc, noreg, false); // 1s complement of crc
6371 }
6372
6373 update_byte_crc32(crc, val, table);
6374
6375 if (invertCRC) {
6376 not_(crc, noreg, false); // 1s complement of crc
6377 }
6378 }
6379
6380 //
6381 // Code for BigInteger::multiplyToLen() intrinsic.
6382 //
6383
6384 // dest_lo += src1 + src2
6385 // dest_hi += carry1 + carry2
6386 // Z_R7 is destroyed !
add2_with_carry(Register dest_hi,Register dest_lo,Register src1,Register src2)6387 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo,
6388 Register src1, Register src2) {
6389 clear_reg(Z_R7);
6390 z_algr(dest_lo, src1);
6391 z_alcgr(dest_hi, Z_R7);
6392 z_algr(dest_lo, src2);
6393 z_alcgr(dest_hi, Z_R7);
6394 }
6395
6396 // Multiply 64 bit by 64 bit first loop.
multiply_64_x_64_loop(Register x,Register xstart,Register x_xstart,Register y,Register y_idx,Register z,Register carry,Register product,Register idx,Register kdx)6397 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart,
6398 Register x_xstart,
6399 Register y, Register y_idx,
6400 Register z,
6401 Register carry,
6402 Register product,
6403 Register idx, Register kdx) {
6404 // jlong carry, x[], y[], z[];
6405 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
6406 // huge_128 product = y[idx] * x[xstart] + carry;
6407 // z[kdx] = (jlong)product;
6408 // carry = (jlong)(product >>> 64);
6409 // }
6410 // z[xstart] = carry;
6411
6412 Label L_first_loop, L_first_loop_exit;
6413 Label L_one_x, L_one_y, L_multiply;
6414
6415 z_aghi(xstart, -1);
6416 z_brl(L_one_x); // Special case: length of x is 1.
6417
6418 // Load next two integers of x.
6419 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
6420 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
6421
6422
6423 bind(L_first_loop);
6424
6425 z_aghi(idx, -1);
6426 z_brl(L_first_loop_exit);
6427 z_aghi(idx, -1);
6428 z_brl(L_one_y);
6429
6430 // Load next two integers of y.
6431 z_sllg(Z_R1_scratch, idx, LogBytesPerInt);
6432 mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0));
6433
6434
6435 bind(L_multiply);
6436
6437 Register multiplicand = product->successor();
6438 Register product_low = multiplicand;
6439
6440 lgr_if_needed(multiplicand, x_xstart);
6441 z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand
6442 clear_reg(Z_R7);
6443 z_algr(product_low, carry); // Add carry to result.
6444 z_alcgr(product, Z_R7); // Add carry of the last addition.
6445 add2reg(kdx, -2);
6446
6447 // Store result.
6448 z_sllg(Z_R7, kdx, LogBytesPerInt);
6449 reg2mem_opt(product_low, Address(z, Z_R7, 0));
6450 lgr_if_needed(carry, product);
6451 z_bru(L_first_loop);
6452
6453
6454 bind(L_one_y); // Load one 32 bit portion of y as (0,value).
6455
6456 clear_reg(y_idx);
6457 mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false);
6458 z_bru(L_multiply);
6459
6460
6461 bind(L_one_x); // Load one 32 bit portion of x as (0,value).
6462
6463 clear_reg(x_xstart);
6464 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
6465 z_bru(L_first_loop);
6466
6467 bind(L_first_loop_exit);
6468 }
6469
6470 // Multiply 64 bit by 64 bit and add 128 bit.
multiply_add_128_x_128(Register x_xstart,Register y,Register z,Register yz_idx,Register idx,Register carry,Register product,int offset)6471 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y,
6472 Register z,
6473 Register yz_idx, Register idx,
6474 Register carry, Register product,
6475 int offset) {
6476 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
6477 // z[kdx] = (jlong)product;
6478
6479 Register multiplicand = product->successor();
6480 Register product_low = multiplicand;
6481
6482 z_sllg(Z_R7, idx, LogBytesPerInt);
6483 mem2reg_opt(yz_idx, Address(y, Z_R7, offset));
6484
6485 lgr_if_needed(multiplicand, x_xstart);
6486 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
6487 mem2reg_opt(yz_idx, Address(z, Z_R7, offset));
6488
6489 add2_with_carry(product, product_low, carry, yz_idx);
6490
6491 z_sllg(Z_R7, idx, LogBytesPerInt);
6492 reg2mem_opt(product_low, Address(z, Z_R7, offset));
6493
6494 }
6495
6496 // Multiply 128 bit by 128 bit. Unrolled inner loop.
multiply_128_x_128_loop(Register x_xstart,Register y,Register z,Register yz_idx,Register idx,Register jdx,Register carry,Register product,Register carry2)6497 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
6498 Register y, Register z,
6499 Register yz_idx, Register idx,
6500 Register jdx,
6501 Register carry, Register product,
6502 Register carry2) {
6503 // jlong carry, x[], y[], z[];
6504 // int kdx = ystart+1;
6505 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
6506 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
6507 // z[kdx+idx+1] = (jlong)product;
6508 // jlong carry2 = (jlong)(product >>> 64);
6509 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
6510 // z[kdx+idx] = (jlong)product;
6511 // carry = (jlong)(product >>> 64);
6512 // }
6513 // idx += 2;
6514 // if (idx > 0) {
6515 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
6516 // z[kdx+idx] = (jlong)product;
6517 // carry = (jlong)(product >>> 64);
6518 // }
6519
6520 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
6521
6522 // scale the index
6523 lgr_if_needed(jdx, idx);
6524 and_imm(jdx, 0xfffffffffffffffcL);
6525 rshift(jdx, 2);
6526
6527
6528 bind(L_third_loop);
6529
6530 z_aghi(jdx, -1);
6531 z_brl(L_third_loop_exit);
6532 add2reg(idx, -4);
6533
6534 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8);
6535 lgr_if_needed(carry2, product);
6536
6537 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0);
6538 lgr_if_needed(carry, product);
6539 z_bru(L_third_loop);
6540
6541
6542 bind(L_third_loop_exit); // Handle any left-over operand parts.
6543
6544 and_imm(idx, 0x3);
6545 z_brz(L_post_third_loop_done);
6546
6547 Label L_check_1;
6548
6549 z_aghi(idx, -2);
6550 z_brl(L_check_1);
6551
6552 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0);
6553 lgr_if_needed(carry, product);
6554
6555
6556 bind(L_check_1);
6557
6558 add2reg(idx, 0x2);
6559 and_imm(idx, 0x1);
6560 z_aghi(idx, -1);
6561 z_brl(L_post_third_loop_done);
6562
6563 Register multiplicand = product->successor();
6564 Register product_low = multiplicand;
6565
6566 z_sllg(Z_R7, idx, LogBytesPerInt);
6567 clear_reg(yz_idx);
6568 mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false);
6569 lgr_if_needed(multiplicand, x_xstart);
6570 z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
6571 clear_reg(yz_idx);
6572 mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false);
6573
6574 add2_with_carry(product, product_low, yz_idx, carry);
6575
6576 z_sllg(Z_R7, idx, LogBytesPerInt);
6577 reg2mem_opt(product_low, Address(z, Z_R7, 0), false);
6578 rshift(product_low, 32);
6579
6580 lshift(product, 32);
6581 z_ogr(product_low, product);
6582 lgr_if_needed(carry, product_low);
6583
6584 bind(L_post_third_loop_done);
6585 }
6586
multiply_to_len(Register x,Register xlen,Register y,Register ylen,Register z,Register tmp1,Register tmp2,Register tmp3,Register tmp4,Register tmp5)6587 void MacroAssembler::multiply_to_len(Register x, Register xlen,
6588 Register y, Register ylen,
6589 Register z,
6590 Register tmp1, Register tmp2,
6591 Register tmp3, Register tmp4,
6592 Register tmp5) {
6593 ShortBranchVerifier sbv(this);
6594
6595 assert_different_registers(x, xlen, y, ylen, z,
6596 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7);
6597 assert_different_registers(x, xlen, y, ylen, z,
6598 tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8);
6599
6600 z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
6601
6602 // In openJdk, we store the argument as 32-bit value to slot.
6603 Address zlen(Z_SP, _z_abi(remaining_cargs)); // Int in long on big endian.
6604
6605 const Register idx = tmp1;
6606 const Register kdx = tmp2;
6607 const Register xstart = tmp3;
6608
6609 const Register y_idx = tmp4;
6610 const Register carry = tmp5;
6611 const Register product = Z_R0_scratch;
6612 const Register x_xstart = Z_R8;
6613
6614 // First Loop.
6615 //
6616 // final static long LONG_MASK = 0xffffffffL;
6617 // int xstart = xlen - 1;
6618 // int ystart = ylen - 1;
6619 // long carry = 0;
6620 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
6621 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
6622 // z[kdx] = (int)product;
6623 // carry = product >>> 32;
6624 // }
6625 // z[xstart] = (int)carry;
6626 //
6627
6628 lgr_if_needed(idx, ylen); // idx = ylen
6629 z_llgf(kdx, zlen); // C2 does not respect int to long conversion for stub calls, thus load zero-extended.
6630 clear_reg(carry); // carry = 0
6631
6632 Label L_done;
6633
6634 lgr_if_needed(xstart, xlen);
6635 z_aghi(xstart, -1);
6636 z_brl(L_done);
6637
6638 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
6639
6640 NearLabel L_second_loop;
6641 compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop);
6642
6643 NearLabel L_carry;
6644 z_aghi(kdx, -1);
6645 z_brz(L_carry);
6646
6647 // Store lower 32 bits of carry.
6648 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
6649 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
6650 rshift(carry, 32);
6651 z_aghi(kdx, -1);
6652
6653
6654 bind(L_carry);
6655
6656 // Store upper 32 bits of carry.
6657 z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
6658 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
6659
6660 // Second and third (nested) loops.
6661 //
6662 // for (int i = xstart-1; i >= 0; i--) { // Second loop
6663 // carry = 0;
6664 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
6665 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
6666 // (z[k] & LONG_MASK) + carry;
6667 // z[k] = (int)product;
6668 // carry = product >>> 32;
6669 // }
6670 // z[i] = (int)carry;
6671 // }
6672 //
6673 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
6674
6675 const Register jdx = tmp1;
6676
6677 bind(L_second_loop);
6678
6679 clear_reg(carry); // carry = 0;
6680 lgr_if_needed(jdx, ylen); // j = ystart+1
6681
6682 z_aghi(xstart, -1); // i = xstart-1;
6683 z_brl(L_done);
6684
6685 // Use free slots in the current stackframe instead of push/pop.
6686 Address zsave(Z_SP, _z_abi(carg_1));
6687 reg2mem_opt(z, zsave);
6688
6689
6690 Label L_last_x;
6691
6692 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
6693 load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j
6694 z_aghi(xstart, -1); // i = xstart-1;
6695 z_brl(L_last_x);
6696
6697 z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
6698 mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
6699
6700
6701 Label L_third_loop_prologue;
6702
6703 bind(L_third_loop_prologue);
6704
6705 Address xsave(Z_SP, _z_abi(carg_2));
6706 Address xlensave(Z_SP, _z_abi(carg_3));
6707 Address ylensave(Z_SP, _z_abi(carg_4));
6708
6709 reg2mem_opt(x, xsave);
6710 reg2mem_opt(xstart, xlensave);
6711 reg2mem_opt(ylen, ylensave);
6712
6713
6714 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x);
6715
6716 mem2reg_opt(z, zsave);
6717 mem2reg_opt(x, xsave);
6718 mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter!
6719 mem2reg_opt(ylen, ylensave);
6720
6721 add2reg(tmp3, 1, xlen);
6722 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
6723 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
6724 z_aghi(tmp3, -1);
6725 z_brl(L_done);
6726
6727 rshift(carry, 32);
6728 z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
6729 reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
6730 z_bru(L_second_loop);
6731
6732 // Next infrequent code is moved outside loops.
6733 bind(L_last_x);
6734
6735 clear_reg(x_xstart);
6736 mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
6737 z_bru(L_third_loop_prologue);
6738
6739 bind(L_done);
6740
6741 z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
6742 }
6743
6744 #ifndef PRODUCT
6745 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false).
asm_assert(bool check_equal,const char * msg,int id)6746 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) {
6747 Label ok;
6748 if (check_equal) {
6749 z_bre(ok);
6750 } else {
6751 z_brne(ok);
6752 }
6753 stop(msg, id);
6754 bind(ok);
6755 }
6756
6757 // Assert if CC indicates "low".
asm_assert_low(const char * msg,int id)6758 void MacroAssembler::asm_assert_low(const char *msg, int id) {
6759 Label ok;
6760 z_brnl(ok);
6761 stop(msg, id);
6762 bind(ok);
6763 }
6764
6765 // Assert if CC indicates "high".
asm_assert_high(const char * msg,int id)6766 void MacroAssembler::asm_assert_high(const char *msg, int id) {
6767 Label ok;
6768 z_brnh(ok);
6769 stop(msg, id);
6770 bind(ok);
6771 }
6772
6773 // Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false)
6774 // generate non-relocatable code.
asm_assert_static(bool check_equal,const char * msg,int id)6775 void MacroAssembler::asm_assert_static(bool check_equal, const char *msg, int id) {
6776 Label ok;
6777 if (check_equal) { z_bre(ok); }
6778 else { z_brne(ok); }
6779 stop_static(msg, id);
6780 bind(ok);
6781 }
6782
asm_assert_mems_zero(bool check_equal,bool allow_relocation,int size,int64_t mem_offset,Register mem_base,const char * msg,int id)6783 void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset,
6784 Register mem_base, const char* msg, int id) {
6785 switch (size) {
6786 case 4:
6787 load_and_test_int(Z_R0, Address(mem_base, mem_offset));
6788 break;
6789 case 8:
6790 load_and_test_long(Z_R0, Address(mem_base, mem_offset));
6791 break;
6792 default:
6793 ShouldNotReachHere();
6794 }
6795 if (allow_relocation) { asm_assert(check_equal, msg, id); }
6796 else { asm_assert_static(check_equal, msg, id); }
6797 }
6798
6799 // Check the condition
6800 // expected_size == FP - SP
6801 // after transformation:
6802 // expected_size - FP + SP == 0
6803 // Destroys Register expected_size if no tmp register is passed.
asm_assert_frame_size(Register expected_size,Register tmp,const char * msg,int id)6804 void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) {
6805 if (tmp == noreg) {
6806 tmp = expected_size;
6807 } else {
6808 if (tmp != expected_size) {
6809 z_lgr(tmp, expected_size);
6810 }
6811 z_algr(tmp, Z_SP);
6812 z_slg(tmp, 0, Z_R0, Z_SP);
6813 asm_assert_eq(msg, id);
6814 }
6815 }
6816 #endif // !PRODUCT
6817
verify_thread()6818 void MacroAssembler::verify_thread() {
6819 if (VerifyThread) {
6820 unimplemented("", 117);
6821 }
6822 }
6823
6824 // Plausibility check for oops.
verify_oop(Register oop,const char * msg)6825 void MacroAssembler::verify_oop(Register oop, const char* msg) {
6826 if (!VerifyOops) return;
6827
6828 BLOCK_COMMENT("verify_oop {");
6829 Register tmp = Z_R0;
6830 unsigned int nbytes_save = 5*BytesPerWord;
6831 address entry = StubRoutines::verify_oop_subroutine_entry_address();
6832
6833 save_return_pc();
6834 push_frame_abi160(nbytes_save);
6835 z_stmg(Z_R1, Z_R5, frame::z_abi_160_size, Z_SP);
6836
6837 z_lgr(Z_ARG2, oop);
6838 load_const(Z_ARG1, (address) msg);
6839 load_const(Z_R1, entry);
6840 z_lg(Z_R1, 0, Z_R1);
6841 call_c(Z_R1);
6842
6843 z_lmg(Z_R1, Z_R5, frame::z_abi_160_size, Z_SP);
6844 pop_frame();
6845 restore_return_pc();
6846
6847 BLOCK_COMMENT("} verify_oop ");
6848 }
6849
6850 const char* MacroAssembler::stop_types[] = {
6851 "stop",
6852 "untested",
6853 "unimplemented",
6854 "shouldnotreachhere"
6855 };
6856
stop_on_request(const char * tp,const char * msg)6857 static void stop_on_request(const char* tp, const char* msg) {
6858 tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg);
6859 guarantee(false, "Z assembly code requires stop: %s", msg);
6860 }
6861
stop(int type,const char * msg,int id)6862 void MacroAssembler::stop(int type, const char* msg, int id) {
6863 BLOCK_COMMENT(err_msg("stop: %s {", msg));
6864
6865 // Setup arguments.
6866 load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
6867 load_const(Z_ARG2, (void*) msg);
6868 get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address.
6869 save_return_pc(); // Saves return pc Z_R14.
6870 push_frame_abi160(0);
6871 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
6872 // The plain disassembler does not recognize illtrap. It instead displays
6873 // a 32-bit value. Issueing two illtraps assures the disassembler finds
6874 // the proper beginning of the next instruction.
6875 z_illtrap(); // Illegal instruction.
6876 z_illtrap(); // Illegal instruction.
6877
6878 BLOCK_COMMENT(" } stop");
6879 }
6880
6881 // Special version of stop() for code size reduction.
6882 // Reuses the previously generated call sequence, if any.
6883 // Generates the call sequence on its own, if necessary.
6884 // Note: This code will work only in non-relocatable code!
6885 // The relative address of the data elements (arg1, arg2) must not change.
6886 // The reentry point must not move relative to it's users. This prerequisite
6887 // should be given for "hand-written" code, if all chain calls are in the same code blob.
6888 // Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe.
stop_chain(address reentry,int type,const char * msg,int id,bool allow_relocation)6889 address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) {
6890 BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==NULL?"init":"cont", allow_relocation?"reloc ":"static", msg));
6891
6892 // Setup arguments.
6893 if (allow_relocation) {
6894 // Relocatable version (for comparison purposes). Remove after some time.
6895 load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
6896 load_const(Z_ARG2, (void*) msg);
6897 } else {
6898 load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]);
6899 load_absolute_address(Z_ARG2, (address)msg);
6900 }
6901 if ((reentry != NULL) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) {
6902 BLOCK_COMMENT("branch to reentry point:");
6903 z_brc(bcondAlways, reentry);
6904 } else {
6905 BLOCK_COMMENT("reentry point:");
6906 reentry = pc(); // Re-entry point for subsequent stop calls.
6907 save_return_pc(); // Saves return pc Z_R14.
6908 push_frame_abi160(0);
6909 if (allow_relocation) {
6910 reentry = NULL; // Prevent reentry if code relocation is allowed.
6911 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
6912 } else {
6913 call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
6914 }
6915 z_illtrap(); // Illegal instruction as emergency stop, should the above call return.
6916 }
6917 BLOCK_COMMENT(" } stop_chain");
6918
6919 return reentry;
6920 }
6921
6922 // Special version of stop() for code size reduction.
6923 // Assumes constant relative addresses for data and runtime call.
stop_static(int type,const char * msg,int id)6924 void MacroAssembler::stop_static(int type, const char* msg, int id) {
6925 stop_chain(NULL, type, msg, id, false);
6926 }
6927
stop_subroutine()6928 void MacroAssembler::stop_subroutine() {
6929 unimplemented("stop_subroutine", 710);
6930 }
6931
6932 // Prints msg to stdout from within generated code..
warn(const char * msg)6933 void MacroAssembler::warn(const char* msg) {
6934 RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14);
6935 load_absolute_address(Z_R1, (address) warning);
6936 load_absolute_address(Z_ARG1, (address) msg);
6937 (void) call(Z_R1);
6938 RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers);
6939 }
6940
6941 #ifndef PRODUCT
6942
6943 // Write pattern 0x0101010101010101 in region [low-before, high+after].
zap_from_to(Register low,Register high,Register val,Register addr,int before,int after)6944 void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) {
6945 if (!ZapEmptyStackFields) return;
6946 BLOCK_COMMENT("zap memory region {");
6947 load_const_optimized(val, 0x0101010101010101);
6948 int size = before + after;
6949 if (low == high && size < 5 && size > 0) {
6950 int offset = -before*BytesPerWord;
6951 for (int i = 0; i < size; ++i) {
6952 z_stg(val, Address(low, offset));
6953 offset +=(1*BytesPerWord);
6954 }
6955 } else {
6956 add2reg(addr, -before*BytesPerWord, low);
6957 if (after) {
6958 #ifdef ASSERT
6959 jlong check = after * BytesPerWord;
6960 assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !");
6961 #endif
6962 add2reg(high, after * BytesPerWord);
6963 }
6964 NearLabel loop;
6965 bind(loop);
6966 z_stg(val, Address(addr));
6967 add2reg(addr, 8);
6968 compare64_and_branch(addr, high, bcondNotHigh, loop);
6969 if (after) {
6970 add2reg(high, -after * BytesPerWord);
6971 }
6972 }
6973 BLOCK_COMMENT("} zap memory region");
6974 }
6975 #endif // !PRODUCT
6976
SkipIfEqual(MacroAssembler * masm,const bool * flag_addr,bool value,Register _rscratch)6977 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) {
6978 _masm = masm;
6979 _masm->load_absolute_address(_rscratch, (address)flag_addr);
6980 _masm->load_and_test_int(_rscratch, Address(_rscratch));
6981 if (value) {
6982 _masm->z_brne(_label); // Skip if true, i.e. != 0.
6983 } else {
6984 _masm->z_bre(_label); // Skip if false, i.e. == 0.
6985 }
6986 }
6987
~SkipIfEqual()6988 SkipIfEqual::~SkipIfEqual() {
6989 _masm->bind(_label);
6990 }
6991