1 /*
2 * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016, 2018 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "code/debugInfoRec.hpp"
29 #include "code/icBuffer.hpp"
30 #include "code/vtableStubs.hpp"
31 #include "gc/shared/gcLocker.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "interpreter/interp_masm.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "nativeInst_s390.hpp"
36 #include "oops/compiledICHolder.hpp"
37 #include "registerSaver_s390.hpp"
38 #include "runtime/safepointMechanism.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "runtime/vframeArray.hpp"
41 #include "utilities/align.hpp"
42 #include "vmreg_s390.inline.hpp"
43 #ifdef COMPILER1
44 #include "c1/c1_Runtime1.hpp"
45 #endif
46 #ifdef COMPILER2
47 #include "opto/ad.hpp"
48 #include "opto/runtime.hpp"
49 #endif
50
51 #ifdef PRODUCT
52 #define __ masm->
53 #else
54 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
55 #endif
56
57 #define BLOCK_COMMENT(str) __ block_comment(str)
58 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
59
60 #define RegisterSaver_LiveIntReg(regname) \
61 { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() }
62
63 #define RegisterSaver_LiveFloatReg(regname) \
64 { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
65
66 // Registers which are not saved/restored, but still they have got a frame slot.
67 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
68 #define RegisterSaver_ExcludedIntReg(regname) \
69 { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
70
71 // Registers which are not saved/restored, but still they have got a frame slot.
72 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
73 #define RegisterSaver_ExcludedFloatReg(regname) \
74 { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
75
76 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
77 // Live registers which get spilled to the stack. Register positions
78 // in this array correspond directly to the stack layout.
79 //
80 // live float registers:
81 //
82 RegisterSaver_LiveFloatReg(Z_F0 ),
83 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
84 RegisterSaver_LiveFloatReg(Z_F2 ),
85 RegisterSaver_LiveFloatReg(Z_F3 ),
86 RegisterSaver_LiveFloatReg(Z_F4 ),
87 RegisterSaver_LiveFloatReg(Z_F5 ),
88 RegisterSaver_LiveFloatReg(Z_F6 ),
89 RegisterSaver_LiveFloatReg(Z_F7 ),
90 RegisterSaver_LiveFloatReg(Z_F8 ),
91 RegisterSaver_LiveFloatReg(Z_F9 ),
92 RegisterSaver_LiveFloatReg(Z_F10),
93 RegisterSaver_LiveFloatReg(Z_F11),
94 RegisterSaver_LiveFloatReg(Z_F12),
95 RegisterSaver_LiveFloatReg(Z_F13),
96 RegisterSaver_LiveFloatReg(Z_F14),
97 RegisterSaver_LiveFloatReg(Z_F15),
98 //
99 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
100 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
101 RegisterSaver_LiveIntReg(Z_R2 ),
102 RegisterSaver_LiveIntReg(Z_R3 ),
103 RegisterSaver_LiveIntReg(Z_R4 ),
104 RegisterSaver_LiveIntReg(Z_R5 ),
105 RegisterSaver_LiveIntReg(Z_R6 ),
106 RegisterSaver_LiveIntReg(Z_R7 ),
107 RegisterSaver_LiveIntReg(Z_R8 ),
108 RegisterSaver_LiveIntReg(Z_R9 ),
109 RegisterSaver_LiveIntReg(Z_R10),
110 RegisterSaver_LiveIntReg(Z_R11),
111 RegisterSaver_LiveIntReg(Z_R12),
112 RegisterSaver_LiveIntReg(Z_R13),
113 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
114 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
115 };
116
117 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
118 // Live registers which get spilled to the stack. Register positions
119 // in this array correspond directly to the stack layout.
120 //
121 // live float registers: All excluded, but still they get a stack slot to get same frame size.
122 //
123 RegisterSaver_ExcludedFloatReg(Z_F0 ),
124 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
125 RegisterSaver_ExcludedFloatReg(Z_F2 ),
126 RegisterSaver_ExcludedFloatReg(Z_F3 ),
127 RegisterSaver_ExcludedFloatReg(Z_F4 ),
128 RegisterSaver_ExcludedFloatReg(Z_F5 ),
129 RegisterSaver_ExcludedFloatReg(Z_F6 ),
130 RegisterSaver_ExcludedFloatReg(Z_F7 ),
131 RegisterSaver_ExcludedFloatReg(Z_F8 ),
132 RegisterSaver_ExcludedFloatReg(Z_F9 ),
133 RegisterSaver_ExcludedFloatReg(Z_F10),
134 RegisterSaver_ExcludedFloatReg(Z_F11),
135 RegisterSaver_ExcludedFloatReg(Z_F12),
136 RegisterSaver_ExcludedFloatReg(Z_F13),
137 RegisterSaver_ExcludedFloatReg(Z_F14),
138 RegisterSaver_ExcludedFloatReg(Z_F15),
139 //
140 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
141 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
142 RegisterSaver_LiveIntReg(Z_R2 ),
143 RegisterSaver_LiveIntReg(Z_R3 ),
144 RegisterSaver_LiveIntReg(Z_R4 ),
145 RegisterSaver_LiveIntReg(Z_R5 ),
146 RegisterSaver_LiveIntReg(Z_R6 ),
147 RegisterSaver_LiveIntReg(Z_R7 ),
148 RegisterSaver_LiveIntReg(Z_R8 ),
149 RegisterSaver_LiveIntReg(Z_R9 ),
150 RegisterSaver_LiveIntReg(Z_R10),
151 RegisterSaver_LiveIntReg(Z_R11),
152 RegisterSaver_LiveIntReg(Z_R12),
153 RegisterSaver_LiveIntReg(Z_R13),
154 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
155 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
156 };
157
158 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
159 // Live registers which get spilled to the stack. Register positions
160 // in this array correspond directly to the stack layout.
161 //
162 // live float registers:
163 //
164 RegisterSaver_LiveFloatReg(Z_F0 ),
165 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
166 RegisterSaver_LiveFloatReg(Z_F2 ),
167 RegisterSaver_LiveFloatReg(Z_F3 ),
168 RegisterSaver_LiveFloatReg(Z_F4 ),
169 RegisterSaver_LiveFloatReg(Z_F5 ),
170 RegisterSaver_LiveFloatReg(Z_F6 ),
171 RegisterSaver_LiveFloatReg(Z_F7 ),
172 RegisterSaver_LiveFloatReg(Z_F8 ),
173 RegisterSaver_LiveFloatReg(Z_F9 ),
174 RegisterSaver_LiveFloatReg(Z_F10),
175 RegisterSaver_LiveFloatReg(Z_F11),
176 RegisterSaver_LiveFloatReg(Z_F12),
177 RegisterSaver_LiveFloatReg(Z_F13),
178 RegisterSaver_LiveFloatReg(Z_F14),
179 RegisterSaver_LiveFloatReg(Z_F15),
180 //
181 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
182 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
183 RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
184 RegisterSaver_LiveIntReg(Z_R3 ),
185 RegisterSaver_LiveIntReg(Z_R4 ),
186 RegisterSaver_LiveIntReg(Z_R5 ),
187 RegisterSaver_LiveIntReg(Z_R6 ),
188 RegisterSaver_LiveIntReg(Z_R7 ),
189 RegisterSaver_LiveIntReg(Z_R8 ),
190 RegisterSaver_LiveIntReg(Z_R9 ),
191 RegisterSaver_LiveIntReg(Z_R10),
192 RegisterSaver_LiveIntReg(Z_R11),
193 RegisterSaver_LiveIntReg(Z_R12),
194 RegisterSaver_LiveIntReg(Z_R13),
195 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
196 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
197 };
198
199 // Live argument registers which get spilled to the stack.
200 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
201 RegisterSaver_LiveFloatReg(Z_FARG1),
202 RegisterSaver_LiveFloatReg(Z_FARG2),
203 RegisterSaver_LiveFloatReg(Z_FARG3),
204 RegisterSaver_LiveFloatReg(Z_FARG4),
205 RegisterSaver_LiveIntReg(Z_ARG1),
206 RegisterSaver_LiveIntReg(Z_ARG2),
207 RegisterSaver_LiveIntReg(Z_ARG3),
208 RegisterSaver_LiveIntReg(Z_ARG4),
209 RegisterSaver_LiveIntReg(Z_ARG5)
210 };
211
212 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
213 // Live registers which get spilled to the stack. Register positions
214 // in this array correspond directly to the stack layout.
215 //
216 // live float registers:
217 //
218 RegisterSaver_LiveFloatReg(Z_F0 ),
219 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
220 RegisterSaver_LiveFloatReg(Z_F2 ),
221 RegisterSaver_LiveFloatReg(Z_F3 ),
222 RegisterSaver_LiveFloatReg(Z_F4 ),
223 RegisterSaver_LiveFloatReg(Z_F5 ),
224 RegisterSaver_LiveFloatReg(Z_F6 ),
225 RegisterSaver_LiveFloatReg(Z_F7 ),
226 // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
227 // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
228 // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
229 // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
230 // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
231 // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
232 // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
233 // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
234 //
235 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
236 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
237 RegisterSaver_LiveIntReg(Z_R2 ),
238 RegisterSaver_LiveIntReg(Z_R3 ),
239 RegisterSaver_LiveIntReg(Z_R4 ),
240 RegisterSaver_LiveIntReg(Z_R5 ),
241 // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
242 // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
243 // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
244 // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
245 // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
246 // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
247 // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
248 // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
249 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
250 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
251 };
252
live_reg_save_size(RegisterSet reg_set)253 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
254 int reg_space = -1;
255 switch (reg_set) {
256 case all_registers: reg_space = sizeof(RegisterSaver_LiveRegs); break;
257 case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
258 case all_integer_registers: reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
259 case all_volatile_registers: reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
260 case arg_registers: reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
261 default: ShouldNotReachHere();
262 }
263 return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
264 }
265
266
live_reg_frame_size(RegisterSet reg_set)267 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
268 return live_reg_save_size(reg_set) + frame::z_abi_160_size;
269 }
270
271
272 // return_pc: Specify the register that should be stored as the return pc in the current frame.
save_live_registers(MacroAssembler * masm,RegisterSet reg_set,Register return_pc)273 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
274 // Record volatile registers as callee-save values in an OopMap so
275 // their save locations will be propagated to the caller frame's
276 // RegisterMap during StackFrameStream construction (needed for
277 // deoptimization; see compiledVFrame::create_stack_value).
278
279 // Calculate frame size.
280 const int frame_size_in_bytes = live_reg_frame_size(reg_set);
281 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
282 const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
283
284 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
285 OopMap* map = new OopMap(frame_size_in_slots, 0);
286
287 int regstosave_num = 0;
288 const RegisterSaver::LiveRegType* live_regs = NULL;
289
290 switch (reg_set) {
291 case all_registers:
292 regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
293 live_regs = RegisterSaver_LiveRegs;
294 break;
295 case all_registers_except_r2:
296 regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
297 live_regs = RegisterSaver_LiveRegsWithoutR2;
298 break;
299 case all_integer_registers:
300 regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
301 live_regs = RegisterSaver_LiveIntRegs;
302 break;
303 case all_volatile_registers:
304 regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
305 live_regs = RegisterSaver_LiveVolatileRegs;
306 break;
307 case arg_registers:
308 regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
309 live_regs = RegisterSaver_LiveArgRegs;
310 break;
311 default: ShouldNotReachHere();
312 }
313
314 // Save return pc in old frame.
315 __ save_return_pc(return_pc);
316
317 // Push a new frame (includes stack linkage).
318 // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
319 // illegally used to pass parameters by RangeCheckStub::emit_code().
320 __ push_frame(frame_size_in_bytes, return_pc);
321 // We have to restore return_pc right away.
322 // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
323 // Nobody else knows which register we saved.
324 __ z_lg(return_pc, _z_abi16(return_pc) + frame_size_in_bytes, Z_SP);
325
326 // Register save area in new frame starts above z_abi_160 area.
327 int offset = register_save_offset;
328
329 Register first = noreg;
330 Register last = noreg;
331 int first_offset = -1;
332 bool float_spilled = false;
333
334 for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
335 int reg_num = live_regs[i].reg_num;
336 int reg_type = live_regs[i].reg_type;
337
338 switch (reg_type) {
339 case RegisterSaver::int_reg: {
340 Register reg = as_Register(reg_num);
341 if (last != reg->predecessor()) {
342 if (first != noreg) {
343 __ z_stmg(first, last, first_offset, Z_SP);
344 }
345 first = reg;
346 first_offset = offset;
347 DEBUG_ONLY(float_spilled = false);
348 }
349 last = reg;
350 assert(last != Z_R0, "r0 would require special treatment");
351 assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
352 break;
353 }
354
355 case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
356 continue; // Continue with next loop iteration.
357
358 case RegisterSaver::float_reg: {
359 FloatRegister freg = as_FloatRegister(reg_num);
360 __ z_std(freg, offset, Z_SP);
361 DEBUG_ONLY(float_spilled = true);
362 break;
363 }
364
365 default:
366 ShouldNotReachHere();
367 break;
368 }
369
370 // Second set_callee_saved is really a waste but we'll keep things as they were for now
371 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
372 map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
373 }
374 assert(first != noreg, "Should spill at least one int reg.");
375 __ z_stmg(first, last, first_offset, Z_SP);
376
377 // And we're done.
378 return map;
379 }
380
381
382 // Generate the OopMap (again, regs where saved before).
generate_oop_map(MacroAssembler * masm,RegisterSet reg_set)383 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
384 // Calculate frame size.
385 const int frame_size_in_bytes = live_reg_frame_size(reg_set);
386 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
387 const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
388
389 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
390 OopMap* map = new OopMap(frame_size_in_slots, 0);
391
392 int regstosave_num = 0;
393 const RegisterSaver::LiveRegType* live_regs = NULL;
394
395 switch (reg_set) {
396 case all_registers:
397 regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
398 live_regs = RegisterSaver_LiveRegs;
399 break;
400 case all_registers_except_r2:
401 regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
402 live_regs = RegisterSaver_LiveRegsWithoutR2;
403 break;
404 case all_integer_registers:
405 regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
406 live_regs = RegisterSaver_LiveIntRegs;
407 break;
408 case all_volatile_registers:
409 regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
410 live_regs = RegisterSaver_LiveVolatileRegs;
411 break;
412 case arg_registers:
413 regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
414 live_regs = RegisterSaver_LiveArgRegs;
415 break;
416 default: ShouldNotReachHere();
417 }
418
419 // Register save area in new frame starts above z_abi_160 area.
420 int offset = register_save_offset;
421 for (int i = 0; i < regstosave_num; i++) {
422 if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
423 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
424 map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
425 }
426 offset += reg_size;
427 }
428 return map;
429 }
430
431
432 // Pop the current frame and restore all the registers that we saved.
restore_live_registers(MacroAssembler * masm,RegisterSet reg_set)433 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
434 int offset;
435 const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
436
437 Register first = noreg;
438 Register last = noreg;
439 int first_offset = -1;
440 bool float_spilled = false;
441
442 int regstosave_num = 0;
443 const RegisterSaver::LiveRegType* live_regs = NULL;
444
445 switch (reg_set) {
446 case all_registers:
447 regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
448 live_regs = RegisterSaver_LiveRegs;
449 break;
450 case all_registers_except_r2:
451 regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
452 live_regs = RegisterSaver_LiveRegsWithoutR2;
453 break;
454 case all_integer_registers:
455 regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
456 live_regs = RegisterSaver_LiveIntRegs;
457 break;
458 case all_volatile_registers:
459 regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
460 live_regs = RegisterSaver_LiveVolatileRegs;
461 break;
462 case arg_registers:
463 regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
464 live_regs = RegisterSaver_LiveArgRegs;
465 break;
466 default: ShouldNotReachHere();
467 }
468
469 // Restore all registers (ints and floats).
470
471 // Register save area in new frame starts above z_abi_160 area.
472 offset = register_save_offset;
473
474 for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
475 int reg_num = live_regs[i].reg_num;
476 int reg_type = live_regs[i].reg_type;
477
478 switch (reg_type) {
479 case RegisterSaver::excluded_reg:
480 continue; // Continue with next loop iteration.
481
482 case RegisterSaver::int_reg: {
483 Register reg = as_Register(reg_num);
484 if (last != reg->predecessor()) {
485 if (first != noreg) {
486 __ z_lmg(first, last, first_offset, Z_SP);
487 }
488 first = reg;
489 first_offset = offset;
490 DEBUG_ONLY(float_spilled = false);
491 }
492 last = reg;
493 assert(last != Z_R0, "r0 would require special treatment");
494 assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
495 break;
496 }
497
498 case RegisterSaver::float_reg: {
499 FloatRegister freg = as_FloatRegister(reg_num);
500 __ z_ld(freg, offset, Z_SP);
501 DEBUG_ONLY(float_spilled = true);
502 break;
503 }
504
505 default:
506 ShouldNotReachHere();
507 }
508 }
509 assert(first != noreg, "Should spill at least one int reg.");
510 __ z_lmg(first, last, first_offset, Z_SP);
511
512 // Pop the frame.
513 __ pop_frame();
514
515 // Restore the flags.
516 __ restore_return_pc();
517 }
518
519
520 // Pop the current frame and restore the registers that might be holding a result.
restore_result_registers(MacroAssembler * masm)521 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
522 int i;
523 int offset;
524 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
525 sizeof(RegisterSaver::LiveRegType);
526 const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
527
528 // Restore all result registers (ints and floats).
529 offset = register_save_offset;
530 for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
531 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
532 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
533 switch (reg_type) {
534 case RegisterSaver::excluded_reg:
535 continue; // Continue with next loop iteration.
536 case RegisterSaver::int_reg: {
537 if (as_Register(reg_num) == Z_RET) { // int result_reg
538 __ z_lg(as_Register(reg_num), offset, Z_SP);
539 }
540 break;
541 }
542 case RegisterSaver::float_reg: {
543 if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
544 __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
545 }
546 break;
547 }
548 default:
549 ShouldNotReachHere();
550 }
551 }
552 }
553
trampoline_size()554 size_t SharedRuntime::trampoline_size() {
555 return MacroAssembler::load_const_size() + 2;
556 }
557
generate_trampoline(MacroAssembler * masm,address destination)558 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
559 // Think about using pc-relative branch.
560 __ load_const(Z_R1_scratch, destination);
561 __ z_br(Z_R1_scratch);
562 }
563
564 // ---------------------------------------------------------------------------
save_native_result(MacroAssembler * masm,BasicType ret_type,int frame_slots)565 void SharedRuntime::save_native_result(MacroAssembler * masm,
566 BasicType ret_type,
567 int frame_slots) {
568 Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
569
570 switch (ret_type) {
571 case T_BOOLEAN: // Save shorter types as int. Do we need sign extension at restore??
572 case T_BYTE:
573 case T_CHAR:
574 case T_SHORT:
575 case T_INT:
576 __ reg2mem_opt(Z_RET, memaddr, false);
577 break;
578 case T_OBJECT: // Save pointer types as long.
579 case T_ARRAY:
580 case T_ADDRESS:
581 case T_VOID:
582 case T_LONG:
583 __ reg2mem_opt(Z_RET, memaddr);
584 break;
585 case T_FLOAT:
586 __ freg2mem_opt(Z_FRET, memaddr, false);
587 break;
588 case T_DOUBLE:
589 __ freg2mem_opt(Z_FRET, memaddr);
590 break;
591 }
592 }
593
restore_native_result(MacroAssembler * masm,BasicType ret_type,int frame_slots)594 void SharedRuntime::restore_native_result(MacroAssembler *masm,
595 BasicType ret_type,
596 int frame_slots) {
597 Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
598
599 switch (ret_type) {
600 case T_BOOLEAN: // Restore shorter types as int. Do we need sign extension at restore??
601 case T_BYTE:
602 case T_CHAR:
603 case T_SHORT:
604 case T_INT:
605 __ mem2reg_opt(Z_RET, memaddr, false);
606 break;
607 case T_OBJECT: // Restore pointer types as long.
608 case T_ARRAY:
609 case T_ADDRESS:
610 case T_VOID:
611 case T_LONG:
612 __ mem2reg_opt(Z_RET, memaddr);
613 break;
614 case T_FLOAT:
615 __ mem2freg_opt(Z_FRET, memaddr, false);
616 break;
617 case T_DOUBLE:
618 __ mem2freg_opt(Z_FRET, memaddr);
619 break;
620 }
621 }
622
623 // ---------------------------------------------------------------------------
624 // Read the array of BasicTypes from a signature, and compute where the
625 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
626 // quantities. Values less than VMRegImpl::stack0 are registers, those above
627 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
628 // as framesizes are fixed.
629 // VMRegImpl::stack0 refers to the first slot 0(sp).
630 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
631 // up to RegisterImpl::number_of_registers are the 64-bit integer registers.
632
633 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
634 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
635 // units regardless of build.
636
637 // The Java calling convention is a "shifted" version of the C ABI.
638 // By skipping the first C ABI register we can call non-static jni methods
639 // with small numbers of arguments without having to shuffle the arguments
640 // at all. Since we control the java ABI we ought to at least get some
641 // advantage out of it.
java_calling_convention(const BasicType * sig_bt,VMRegPair * regs,int total_args_passed,int is_outgoing)642 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
643 VMRegPair *regs,
644 int total_args_passed,
645 int is_outgoing) {
646 // c2c calling conventions for compiled-compiled calls.
647
648 // An int/float occupies 1 slot here.
649 const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats.
650 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
651
652 const VMReg z_iarg_reg[5] = {
653 Z_R2->as_VMReg(),
654 Z_R3->as_VMReg(),
655 Z_R4->as_VMReg(),
656 Z_R5->as_VMReg(),
657 Z_R6->as_VMReg()
658 };
659 const VMReg z_farg_reg[4] = {
660 Z_F0->as_VMReg(),
661 Z_F2->as_VMReg(),
662 Z_F4->as_VMReg(),
663 Z_F6->as_VMReg()
664 };
665 const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
666 const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
667
668 assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
669 assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
670
671 int i;
672 int stk = 0;
673 int ireg = 0;
674 int freg = 0;
675
676 for (int i = 0; i < total_args_passed; ++i) {
677 switch (sig_bt[i]) {
678 case T_BOOLEAN:
679 case T_CHAR:
680 case T_BYTE:
681 case T_SHORT:
682 case T_INT:
683 if (ireg < z_num_iarg_registers) {
684 // Put int/ptr in register.
685 regs[i].set1(z_iarg_reg[ireg]);
686 ++ireg;
687 } else {
688 // Put int/ptr on stack.
689 regs[i].set1(VMRegImpl::stack2reg(stk));
690 stk += inc_stk_for_intfloat;
691 }
692 break;
693 case T_LONG:
694 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
695 if (ireg < z_num_iarg_registers) {
696 // Put long in register.
697 regs[i].set2(z_iarg_reg[ireg]);
698 ++ireg;
699 } else {
700 // Put long on stack and align to 2 slots.
701 if (stk & 0x1) { ++stk; }
702 regs[i].set2(VMRegImpl::stack2reg(stk));
703 stk += inc_stk_for_longdouble;
704 }
705 break;
706 case T_OBJECT:
707 case T_ARRAY:
708 case T_ADDRESS:
709 if (ireg < z_num_iarg_registers) {
710 // Put ptr in register.
711 regs[i].set2(z_iarg_reg[ireg]);
712 ++ireg;
713 } else {
714 // Put ptr on stack and align to 2 slots, because
715 // "64-bit pointers record oop-ishness on 2 aligned adjacent
716 // registers." (see OopFlow::build_oop_map).
717 if (stk & 0x1) { ++stk; }
718 regs[i].set2(VMRegImpl::stack2reg(stk));
719 stk += inc_stk_for_longdouble;
720 }
721 break;
722 case T_FLOAT:
723 if (freg < z_num_farg_registers) {
724 // Put float in register.
725 regs[i].set1(z_farg_reg[freg]);
726 ++freg;
727 } else {
728 // Put float on stack.
729 regs[i].set1(VMRegImpl::stack2reg(stk));
730 stk += inc_stk_for_intfloat;
731 }
732 break;
733 case T_DOUBLE:
734 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
735 if (freg < z_num_farg_registers) {
736 // Put double in register.
737 regs[i].set2(z_farg_reg[freg]);
738 ++freg;
739 } else {
740 // Put double on stack and align to 2 slots.
741 if (stk & 0x1) { ++stk; }
742 regs[i].set2(VMRegImpl::stack2reg(stk));
743 stk += inc_stk_for_longdouble;
744 }
745 break;
746 case T_VOID:
747 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
748 // Do not count halves.
749 regs[i].set_bad();
750 break;
751 default:
752 ShouldNotReachHere();
753 }
754 }
755 return align_up(stk, 2);
756 }
757
c_calling_convention(const BasicType * sig_bt,VMRegPair * regs,VMRegPair * regs2,int total_args_passed)758 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
759 VMRegPair *regs,
760 VMRegPair *regs2,
761 int total_args_passed) {
762 assert(regs2 == NULL, "second VMRegPair array not used on this platform");
763
764 // Calling conventions for C runtime calls and calls to JNI native methods.
765 const VMReg z_iarg_reg[5] = {
766 Z_R2->as_VMReg(),
767 Z_R3->as_VMReg(),
768 Z_R4->as_VMReg(),
769 Z_R5->as_VMReg(),
770 Z_R6->as_VMReg()
771 };
772 const VMReg z_farg_reg[4] = {
773 Z_F0->as_VMReg(),
774 Z_F2->as_VMReg(),
775 Z_F4->as_VMReg(),
776 Z_F6->as_VMReg()
777 };
778 const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
779 const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
780
781 // Check calling conventions consistency.
782 assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
783 assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
784
785 // Avoid passing C arguments in the wrong stack slots.
786
787 // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
788 // 2 such slots, like 64 bit values do.
789 const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats.
790 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
791
792 int i;
793 // Leave room for C-compatible ABI
794 int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
795 int freg = 0;
796 int ireg = 0;
797
798 // We put the first 5 arguments into registers and the rest on the
799 // stack. Float arguments are already in their argument registers
800 // due to c2c calling conventions (see calling_convention).
801 for (int i = 0; i < total_args_passed; ++i) {
802 switch (sig_bt[i]) {
803 case T_BOOLEAN:
804 case T_CHAR:
805 case T_BYTE:
806 case T_SHORT:
807 case T_INT:
808 // Fall through, handle as long.
809 case T_LONG:
810 case T_OBJECT:
811 case T_ARRAY:
812 case T_ADDRESS:
813 case T_METADATA:
814 // Oops are already boxed if required (JNI).
815 if (ireg < z_num_iarg_registers) {
816 regs[i].set2(z_iarg_reg[ireg]);
817 ++ireg;
818 } else {
819 regs[i].set2(VMRegImpl::stack2reg(stk));
820 stk += inc_stk_for_longdouble;
821 }
822 break;
823 case T_FLOAT:
824 if (freg < z_num_farg_registers) {
825 regs[i].set1(z_farg_reg[freg]);
826 ++freg;
827 } else {
828 regs[i].set1(VMRegImpl::stack2reg(stk+1));
829 stk += inc_stk_for_intfloat;
830 }
831 break;
832 case T_DOUBLE:
833 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
834 if (freg < z_num_farg_registers) {
835 regs[i].set2(z_farg_reg[freg]);
836 ++freg;
837 } else {
838 // Put double on stack.
839 regs[i].set2(VMRegImpl::stack2reg(stk));
840 stk += inc_stk_for_longdouble;
841 }
842 break;
843 case T_VOID:
844 // Do not count halves.
845 regs[i].set_bad();
846 break;
847 default:
848 ShouldNotReachHere();
849 }
850 }
851 return align_up(stk, 2);
852 }
853
854 ////////////////////////////////////////////////////////////////////////
855 //
856 // Argument shufflers
857 //
858 ////////////////////////////////////////////////////////////////////////
859
860 //----------------------------------------------------------------------
861 // The java_calling_convention describes stack locations as ideal slots on
862 // a frame with no abi restrictions. Since we must observe abi restrictions
863 // (like the placement of the register window) the slots must be biased by
864 // the following value.
865 //----------------------------------------------------------------------
reg2slot(VMReg r)866 static int reg2slot(VMReg r) {
867 return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
868 }
869
reg2offset(VMReg r)870 static int reg2offset(VMReg r) {
871 return reg2slot(r) * VMRegImpl::stack_slot_size;
872 }
873
verify_oop_args(MacroAssembler * masm,int total_args_passed,const BasicType * sig_bt,const VMRegPair * regs)874 static void verify_oop_args(MacroAssembler *masm,
875 int total_args_passed,
876 const BasicType *sig_bt,
877 const VMRegPair *regs) {
878 if (!VerifyOops) { return; }
879
880 for (int i = 0; i < total_args_passed; i++) {
881 if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
882 VMReg r = regs[i].first();
883 assert(r->is_valid(), "bad oop arg");
884
885 if (r->is_stack()) {
886 __ z_lg(Z_R0_scratch,
887 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
888 __ verify_oop(Z_R0_scratch);
889 } else {
890 __ verify_oop(r->as_Register());
891 }
892 }
893 }
894 }
895
gen_special_dispatch(MacroAssembler * masm,int total_args_passed,vmIntrinsics::ID special_dispatch,const BasicType * sig_bt,const VMRegPair * regs)896 static void gen_special_dispatch(MacroAssembler *masm,
897 int total_args_passed,
898 vmIntrinsics::ID special_dispatch,
899 const BasicType *sig_bt,
900 const VMRegPair *regs) {
901 verify_oop_args(masm, total_args_passed, sig_bt, regs);
902
903 // Now write the args into the outgoing interpreter space.
904 bool has_receiver = false;
905 Register receiver_reg = noreg;
906 int member_arg_pos = -1;
907 Register member_reg = noreg;
908 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
909
910 if (ref_kind != 0) {
911 member_arg_pos = total_args_passed - 1; // trailing MemberName argument
912 member_reg = Z_R9; // Known to be free at this point.
913 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
914 } else {
915 guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
916 has_receiver = true;
917 }
918
919 if (member_reg != noreg) {
920 // Load the member_arg into register, if necessary.
921 assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
922 assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
923
924 VMReg r = regs[member_arg_pos].first();
925 assert(r->is_valid(), "bad member arg");
926
927 if (r->is_stack()) {
928 __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
929 } else {
930 // No data motion is needed.
931 member_reg = r->as_Register();
932 }
933 }
934
935 if (has_receiver) {
936 // Make sure the receiver is loaded into a register.
937 assert(total_args_passed > 0, "oob");
938 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
939
940 VMReg r = regs[0].first();
941 assert(r->is_valid(), "bad receiver arg");
942
943 if (r->is_stack()) {
944 // Porting note: This assumes that compiled calling conventions always
945 // pass the receiver oop in a register. If this is not true on some
946 // platform, pick a temp and load the receiver from stack.
947 assert(false, "receiver always in a register");
948 receiver_reg = Z_R13; // Known to be free at this point.
949 __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
950 } else {
951 // No data motion is needed.
952 receiver_reg = r->as_Register();
953 }
954 }
955
956 // Figure out which address we are really jumping to:
957 MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
958 receiver_reg, member_reg,
959 /*for_compiler_entry:*/ true);
960 }
961
962 ////////////////////////////////////////////////////////////////////////
963 //
964 // Argument shufflers
965 //
966 ////////////////////////////////////////////////////////////////////////
967
968 // Is the size of a vector size (in bytes) bigger than a size saved by default?
969 // 8 bytes registers are saved by default on z/Architecture.
is_wide_vector(int size)970 bool SharedRuntime::is_wide_vector(int size) {
971 // Note, MaxVectorSize == 8 on this platform.
972 assert(size <= 8, "%d bytes vectors are not supported", size);
973 return size > 8;
974 }
975
976 //----------------------------------------------------------------------
977 // An oop arg. Must pass a handle not the oop itself
978 //----------------------------------------------------------------------
object_move(MacroAssembler * masm,OopMap * map,int oop_handle_offset,int framesize_in_slots,VMRegPair src,VMRegPair dst,bool is_receiver,int * receiver_offset)979 static void object_move(MacroAssembler *masm,
980 OopMap *map,
981 int oop_handle_offset,
982 int framesize_in_slots,
983 VMRegPair src,
984 VMRegPair dst,
985 bool is_receiver,
986 int *receiver_offset) {
987 int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
988
989 assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
990
991 // Must pass a handle. First figure out the location we use as a handle.
992
993 if (src.first()->is_stack()) {
994 // Oop is already on the stack, put handle on stack or in register
995 // If handle will be on the stack, use temp reg to calculate it.
996 Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
997 Label skip;
998 int slot_in_older_frame = reg2slot(src.first());
999
1000 guarantee(!is_receiver, "expecting receiver in register");
1001 map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
1002
1003 __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1004 __ load_and_test_long(Z_R0, Address(rHandle));
1005 __ z_brne(skip);
1006 // Use a NULL handle if oop is NULL.
1007 __ clear_reg(rHandle, true, false);
1008 __ bind(skip);
1009
1010 // Copy handle to the right place (register or stack).
1011 if (dst.first()->is_stack()) {
1012 __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1013 } // else
1014 // nothing to do. rHandle uses the correct register
1015 } else {
1016 // Oop is passed in an input register. We must flush it to the stack.
1017 const Register rOop = src.first()->as_Register();
1018 const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1019 int oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1020 int oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1021 NearLabel skip;
1022
1023 if (is_receiver) {
1024 *receiver_offset = oop_slot_offset;
1025 }
1026 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1027
1028 // Flush Oop to stack, calculate handle.
1029 __ z_stg(rOop, oop_slot_offset, Z_SP);
1030 __ add2reg(rHandle, oop_slot_offset, Z_SP);
1031
1032 // If Oop == NULL, use a NULL handle.
1033 __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1034 __ clear_reg(rHandle, true, false);
1035 __ bind(skip);
1036
1037 // Copy handle to the right place (register or stack).
1038 if (dst.first()->is_stack()) {
1039 __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1040 } // else
1041 // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1042 }
1043 }
1044
1045 //----------------------------------------------------------------------
1046 // A float arg. May have to do float reg to int reg conversion
1047 //----------------------------------------------------------------------
float_move(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots,int workspace_slot_offset)1048 static void float_move(MacroAssembler *masm,
1049 VMRegPair src,
1050 VMRegPair dst,
1051 int framesize_in_slots,
1052 int workspace_slot_offset) {
1053 int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1054 int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1055
1056 // We do not accept an argument in a VMRegPair to be spread over two slots,
1057 // no matter what physical location (reg or stack) the slots may have.
1058 // We just check for the unaccepted slot to be invalid.
1059 assert(!src.second()->is_valid(), "float in arg spread over two slots");
1060 assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1061
1062 if (src.first()->is_stack()) {
1063 if (dst.first()->is_stack()) {
1064 // stack -> stack. The easiest of the bunch.
1065 __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1066 Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1067 } else {
1068 // stack to reg
1069 Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1070 if (dst.first()->is_Register()) {
1071 __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1072 } else {
1073 __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1074 }
1075 }
1076 } else if (src.first()->is_Register()) {
1077 if (dst.first()->is_stack()) {
1078 // gpr -> stack
1079 __ reg2mem_opt(src.first()->as_Register(),
1080 Address(Z_SP, reg2offset(dst.first()), false ));
1081 } else {
1082 if (dst.first()->is_Register()) {
1083 // gpr -> gpr
1084 __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1085 src.first()->as_Register(), T_INT);
1086 } else {
1087 if (VM_Version::has_FPSupportEnhancements()) {
1088 // gpr -> fpr. Exploit z10 capability of direct transfer.
1089 __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1090 } else {
1091 // gpr -> fpr. Use work space on stack to transfer data.
1092 Address stackaddr(Z_SP, workspace_offset);
1093
1094 __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1095 __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1096 }
1097 }
1098 }
1099 } else {
1100 if (dst.first()->is_stack()) {
1101 // fpr -> stack
1102 __ freg2mem_opt(src.first()->as_FloatRegister(),
1103 Address(Z_SP, reg2offset(dst.first())), false);
1104 } else {
1105 if (dst.first()->is_Register()) {
1106 if (VM_Version::has_FPSupportEnhancements()) {
1107 // fpr -> gpr.
1108 __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1109 } else {
1110 // fpr -> gpr. Use work space on stack to transfer data.
1111 Address stackaddr(Z_SP, workspace_offset);
1112
1113 __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1114 __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1115 }
1116 } else {
1117 // fpr -> fpr
1118 __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1119 src.first()->as_FloatRegister(), T_FLOAT);
1120 }
1121 }
1122 }
1123 }
1124
1125 //----------------------------------------------------------------------
1126 // A double arg. May have to do double reg to long reg conversion
1127 //----------------------------------------------------------------------
double_move(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots,int workspace_slot_offset)1128 static void double_move(MacroAssembler *masm,
1129 VMRegPair src,
1130 VMRegPair dst,
1131 int framesize_in_slots,
1132 int workspace_slot_offset) {
1133 int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1134 int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1135
1136 // Since src is always a java calling convention we know that the
1137 // src pair is always either all registers or all stack (and aligned?)
1138
1139 if (src.first()->is_stack()) {
1140 if (dst.first()->is_stack()) {
1141 // stack -> stack. The easiest of the bunch.
1142 __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1143 Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1144 } else {
1145 // stack to reg
1146 Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1147
1148 if (dst.first()->is_Register()) {
1149 __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1150 } else {
1151 __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1152 }
1153 }
1154 } else if (src.first()->is_Register()) {
1155 if (dst.first()->is_stack()) {
1156 // gpr -> stack
1157 __ reg2mem_opt(src.first()->as_Register(),
1158 Address(Z_SP, reg2offset(dst.first())));
1159 } else {
1160 if (dst.first()->is_Register()) {
1161 // gpr -> gpr
1162 __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1163 src.first()->as_Register(), T_LONG);
1164 } else {
1165 if (VM_Version::has_FPSupportEnhancements()) {
1166 // gpr -> fpr. Exploit z10 capability of direct transfer.
1167 __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1168 } else {
1169 // gpr -> fpr. Use work space on stack to transfer data.
1170 Address stackaddr(Z_SP, workspace_offset);
1171 __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1172 __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1173 }
1174 }
1175 }
1176 } else {
1177 if (dst.first()->is_stack()) {
1178 // fpr -> stack
1179 __ freg2mem_opt(src.first()->as_FloatRegister(),
1180 Address(Z_SP, reg2offset(dst.first())));
1181 } else {
1182 if (dst.first()->is_Register()) {
1183 if (VM_Version::has_FPSupportEnhancements()) {
1184 // fpr -> gpr. Exploit z10 capability of direct transfer.
1185 __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1186 } else {
1187 // fpr -> gpr. Use work space on stack to transfer data.
1188 Address stackaddr(Z_SP, workspace_offset);
1189
1190 __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1191 __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1192 }
1193 } else {
1194 // fpr -> fpr
1195 // In theory these overlap but the ordering is such that this is likely a nop.
1196 __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1197 src.first()->as_FloatRegister(), T_DOUBLE);
1198 }
1199 }
1200 }
1201 }
1202
1203 //----------------------------------------------------------------------
1204 // A long arg.
1205 //----------------------------------------------------------------------
long_move(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots)1206 static void long_move(MacroAssembler *masm,
1207 VMRegPair src,
1208 VMRegPair dst,
1209 int framesize_in_slots) {
1210 int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1211
1212 if (src.first()->is_stack()) {
1213 if (dst.first()->is_stack()) {
1214 // stack -> stack. The easiest of the bunch.
1215 __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1216 Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1217 } else {
1218 // stack to reg
1219 assert(dst.first()->is_Register(), "long dst value must be in GPR");
1220 __ mem2reg_opt(dst.first()->as_Register(),
1221 Address(Z_SP, reg2offset(src.first()) + frame_offset));
1222 }
1223 } else {
1224 // reg to reg
1225 assert(src.first()->is_Register(), "long src value must be in GPR");
1226 if (dst.first()->is_stack()) {
1227 // reg -> stack
1228 __ reg2mem_opt(src.first()->as_Register(),
1229 Address(Z_SP, reg2offset(dst.first())));
1230 } else {
1231 // reg -> reg
1232 assert(dst.first()->is_Register(), "long dst value must be in GPR");
1233 __ move_reg_if_needed(dst.first()->as_Register(),
1234 T_LONG, src.first()->as_Register(), T_LONG);
1235 }
1236 }
1237 }
1238
1239
1240 //----------------------------------------------------------------------
1241 // A int-like arg.
1242 //----------------------------------------------------------------------
1243 // On z/Architecture we will store integer like items to the stack as 64 bit
1244 // items, according to the z/Architecture ABI, even though Java would only store
1245 // 32 bits for a parameter.
1246 // We do sign extension for all base types. That is ok since the only
1247 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1248 // Sign extension 32->64 bit will thus not affect the value.
1249 //----------------------------------------------------------------------
move32_64(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots)1250 static void move32_64(MacroAssembler *masm,
1251 VMRegPair src,
1252 VMRegPair dst,
1253 int framesize_in_slots) {
1254 int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1255
1256 if (src.first()->is_stack()) {
1257 Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1258 if (dst.first()->is_stack()) {
1259 // stack -> stack. MVC not posible due to sign extension.
1260 Address firstaddr(Z_SP, reg2offset(dst.first()));
1261 __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1262 __ reg2mem_opt(Z_R0_scratch, firstaddr);
1263 } else {
1264 // stack -> reg, sign extended
1265 __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1266 }
1267 } else {
1268 if (dst.first()->is_stack()) {
1269 // reg -> stack, sign extended
1270 Address firstaddr(Z_SP, reg2offset(dst.first()));
1271 __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1272 __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1273 } else {
1274 // reg -> reg, sign extended
1275 __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1276 }
1277 }
1278 }
1279
save_or_restore_arguments(MacroAssembler * masm,const int stack_slots,const int total_in_args,const int arg_save_area,OopMap * map,VMRegPair * in_regs,BasicType * in_sig_bt)1280 static void save_or_restore_arguments(MacroAssembler *masm,
1281 const int stack_slots,
1282 const int total_in_args,
1283 const int arg_save_area,
1284 OopMap *map,
1285 VMRegPair *in_regs,
1286 BasicType *in_sig_bt) {
1287
1288 // If map is non-NULL then the code should store the values,
1289 // otherwise it should load them.
1290 int slot = arg_save_area;
1291 // Handle double words first.
1292 for (int i = 0; i < total_in_args; i++) {
1293 if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1294 int offset = slot * VMRegImpl::stack_slot_size;
1295 slot += VMRegImpl::slots_per_word;
1296 assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1297 const FloatRegister freg = in_regs[i].first()->as_FloatRegister();
1298 Address stackaddr(Z_SP, offset);
1299 if (map != NULL) {
1300 __ freg2mem_opt(freg, stackaddr);
1301 } else {
1302 __ mem2freg_opt(freg, stackaddr);
1303 }
1304 } else if (in_regs[i].first()->is_Register() &&
1305 (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1306 int offset = slot * VMRegImpl::stack_slot_size;
1307 const Register reg = in_regs[i].first()->as_Register();
1308 if (map != NULL) {
1309 __ z_stg(reg, offset, Z_SP);
1310 if (in_sig_bt[i] == T_ARRAY) {
1311 map->set_oop(VMRegImpl::stack2reg(slot));
1312 }
1313 } else {
1314 __ z_lg(reg, offset, Z_SP);
1315 }
1316 slot += VMRegImpl::slots_per_word;
1317 assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1318 }
1319 }
1320
1321 // Save or restore single word registers.
1322 for (int i = 0; i < total_in_args; i++) {
1323 if (in_regs[i].first()->is_Register()) {
1324 int offset = slot * VMRegImpl::stack_slot_size;
1325 // Value lives in an input register. Save it on stack.
1326 switch (in_sig_bt[i]) {
1327 case T_BOOLEAN:
1328 case T_CHAR:
1329 case T_BYTE:
1330 case T_SHORT:
1331 case T_INT: {
1332 const Register reg = in_regs[i].first()->as_Register();
1333 Address stackaddr(Z_SP, offset);
1334 if (map != NULL) {
1335 __ z_st(reg, stackaddr);
1336 } else {
1337 __ z_lgf(reg, stackaddr);
1338 }
1339 slot++;
1340 assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)");
1341 break;
1342 }
1343 case T_ARRAY:
1344 case T_LONG:
1345 // handled above
1346 break;
1347 case T_OBJECT:
1348 default: ShouldNotReachHere();
1349 }
1350 } else if (in_regs[i].first()->is_FloatRegister()) {
1351 if (in_sig_bt[i] == T_FLOAT) {
1352 int offset = slot * VMRegImpl::stack_slot_size;
1353 slot++;
1354 assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1355 const FloatRegister freg = in_regs[i].first()->as_FloatRegister();
1356 Address stackaddr(Z_SP, offset);
1357 if (map != NULL) {
1358 __ freg2mem_opt(freg, stackaddr, false);
1359 } else {
1360 __ mem2freg_opt(freg, stackaddr, false);
1361 }
1362 }
1363 } else if (in_regs[i].first()->is_stack() &&
1364 in_sig_bt[i] == T_ARRAY && map != NULL) {
1365 int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1366 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1367 }
1368 }
1369 }
1370
1371 // Check GCLocker::needs_gc and enter the runtime if it's true. This
1372 // keeps a new JNI critical region from starting until a GC has been
1373 // forced. Save down any oops in registers and describe them in an OopMap.
check_needs_gc_for_critical_native(MacroAssembler * masm,const int stack_slots,const int total_in_args,const int arg_save_area,OopMapSet * oop_maps,VMRegPair * in_regs,BasicType * in_sig_bt)1374 static void check_needs_gc_for_critical_native(MacroAssembler *masm,
1375 const int stack_slots,
1376 const int total_in_args,
1377 const int arg_save_area,
1378 OopMapSet *oop_maps,
1379 VMRegPair *in_regs,
1380 BasicType *in_sig_bt) {
1381 __ block_comment("check GCLocker::needs_gc");
1382 Label cont;
1383
1384 // Check GCLocker::_needs_gc flag.
1385 __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
1386 __ z_cli(0, Z_R1_scratch, 0);
1387 __ z_bre(cont);
1388
1389 // Save down any values that are live in registers and call into the
1390 // runtime to halt for a GC.
1391 OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1392
1393 save_or_restore_arguments(masm, stack_slots, total_in_args,
1394 arg_save_area, map, in_regs, in_sig_bt);
1395 address the_pc = __ pc();
1396 __ set_last_Java_frame(Z_SP, noreg);
1397
1398 __ block_comment("block_for_jni_critical");
1399 __ z_lgr(Z_ARG1, Z_thread);
1400
1401 address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1402 __ call_c(entry_point);
1403 oop_maps->add_gc_map(__ offset(), map);
1404
1405 __ reset_last_Java_frame();
1406
1407 // Reload all the register arguments.
1408 save_or_restore_arguments(masm, stack_slots, total_in_args,
1409 arg_save_area, NULL, in_regs, in_sig_bt);
1410
1411 __ bind(cont);
1412
1413 if (StressCriticalJNINatives) {
1414 // Stress register saving
1415 OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1416 save_or_restore_arguments(masm, stack_slots, total_in_args,
1417 arg_save_area, map, in_regs, in_sig_bt);
1418
1419 // Destroy argument registers.
1420 for (int i = 0; i < total_in_args; i++) {
1421 if (in_regs[i].first()->is_Register()) {
1422 // Don't set CC.
1423 __ clear_reg(in_regs[i].first()->as_Register(), true, false);
1424 } else {
1425 if (in_regs[i].first()->is_FloatRegister()) {
1426 FloatRegister fr = in_regs[i].first()->as_FloatRegister();
1427 __ z_lcdbr(fr, fr);
1428 }
1429 }
1430 }
1431
1432 save_or_restore_arguments(masm, stack_slots, total_in_args,
1433 arg_save_area, NULL, in_regs, in_sig_bt);
1434 }
1435 }
1436
move_ptr(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots)1437 static void move_ptr(MacroAssembler *masm,
1438 VMRegPair src,
1439 VMRegPair dst,
1440 int framesize_in_slots) {
1441 int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1442
1443 if (src.first()->is_stack()) {
1444 if (dst.first()->is_stack()) {
1445 // stack to stack
1446 __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
1447 __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
1448 } else {
1449 // stack to reg
1450 __ mem2reg_opt(dst.first()->as_Register(),
1451 Address(Z_SP, reg2offset(src.first()) + frame_offset));
1452 }
1453 } else {
1454 if (dst.first()->is_stack()) {
1455 // reg to stack
1456 __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
1457 } else {
1458 __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
1459 }
1460 }
1461 }
1462
1463 // Unpack an array argument into a pointer to the body and the length
1464 // if the array is non-null, otherwise pass 0 for both.
unpack_array_argument(MacroAssembler * masm,VMRegPair reg,BasicType in_elem_type,VMRegPair body_arg,VMRegPair length_arg,int framesize_in_slots)1465 static void unpack_array_argument(MacroAssembler *masm,
1466 VMRegPair reg,
1467 BasicType in_elem_type,
1468 VMRegPair body_arg,
1469 VMRegPair length_arg,
1470 int framesize_in_slots) {
1471 Register tmp_reg = Z_tmp_2;
1472 Register tmp2_reg = Z_tmp_1;
1473
1474 assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1475 "possible collision");
1476 assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1477 "possible collision");
1478
1479 // Pass the length, ptr pair.
1480 NearLabel set_out_args;
1481 VMRegPair tmp, tmp2;
1482
1483 tmp.set_ptr(tmp_reg->as_VMReg());
1484 tmp2.set_ptr(tmp2_reg->as_VMReg());
1485 if (reg.first()->is_stack()) {
1486 // Load the arg up from the stack.
1487 move_ptr(masm, reg, tmp, framesize_in_slots);
1488 reg = tmp;
1489 }
1490
1491 const Register first = reg.first()->as_Register();
1492
1493 // Don't set CC, indicate unused result.
1494 (void) __ clear_reg(tmp2_reg, true, false);
1495 if (tmp_reg != first) {
1496 __ clear_reg(tmp_reg, true, false); // Don't set CC.
1497 }
1498 __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
1499 __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
1500 __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
1501
1502 __ bind(set_out_args);
1503 move_ptr(masm, tmp, body_arg, framesize_in_slots);
1504 move32_64(masm, tmp2, length_arg, framesize_in_slots);
1505 }
1506
1507 //----------------------------------------------------------------------
1508 // Wrap a JNI call.
1509 //----------------------------------------------------------------------
1510 #undef USE_RESIZE_FRAME
generate_native_wrapper(MacroAssembler * masm,const methodHandle & method,int compile_id,BasicType * in_sig_bt,VMRegPair * in_regs,BasicType ret_type,address critical_entry)1511 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1512 const methodHandle& method,
1513 int compile_id,
1514 BasicType *in_sig_bt,
1515 VMRegPair *in_regs,
1516 BasicType ret_type,
1517 address critical_entry) {
1518 int total_in_args = method->size_of_parameters();
1519 if (method->is_method_handle_intrinsic()) {
1520 vmIntrinsics::ID iid = method->intrinsic_id();
1521 intptr_t start = (intptr_t) __ pc();
1522 int vep_offset = ((intptr_t) __ pc()) - start;
1523
1524 gen_special_dispatch(masm, total_in_args,
1525 method->intrinsic_id(), in_sig_bt, in_regs);
1526
1527 int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1528
1529 __ flush();
1530
1531 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // No out slots at all, actually.
1532
1533 return nmethod::new_native_nmethod(method,
1534 compile_id,
1535 masm->code(),
1536 vep_offset,
1537 frame_complete,
1538 stack_slots / VMRegImpl::slots_per_word,
1539 in_ByteSize(-1),
1540 in_ByteSize(-1),
1541 (OopMapSet *) NULL);
1542 }
1543
1544
1545 ///////////////////////////////////////////////////////////////////////
1546 //
1547 // Precalculations before generating any code
1548 //
1549 ///////////////////////////////////////////////////////////////////////
1550
1551 bool is_critical_native = true;
1552 address native_func = critical_entry;
1553 if (native_func == NULL) {
1554 native_func = method->native_function();
1555 is_critical_native = false;
1556 }
1557 assert(native_func != NULL, "must have function");
1558
1559 //---------------------------------------------------------------------
1560 // We have received a description of where all the java args are located
1561 // on entry to the wrapper. We need to convert these args to where
1562 // the jni function will expect them. To figure out where they go
1563 // we convert the java signature to a C signature by inserting
1564 // the hidden arguments as arg[0] and possibly arg[1] (static method).
1565 //
1566 // The first hidden argument arg[0] is a pointer to the JNI environment.
1567 // It is generated for every call.
1568 // The second argument arg[1] to the JNI call, which is hidden for static
1569 // methods, is the boxed lock object. For static calls, the lock object
1570 // is the static method itself. The oop is constructed here. for instance
1571 // calls, the lock is performed on the object itself, the pointer of
1572 // which is passed as the first visible argument.
1573 //---------------------------------------------------------------------
1574
1575 // Additionally, on z/Architecture we must convert integers
1576 // to longs in the C signature. We do this in advance in order to have
1577 // no trouble with indexes into the bt-arrays.
1578 // So convert the signature and registers now, and adjust the total number
1579 // of in-arguments accordingly.
1580 bool method_is_static = method->is_static();
1581 int total_c_args = total_in_args;
1582
1583 if (!is_critical_native) {
1584 int n_hidden_args = method_is_static ? 2 : 1;
1585 total_c_args += n_hidden_args;
1586 } else {
1587 // No JNIEnv*, no this*, but unpacked arrays (base+length).
1588 for (int i = 0; i < total_in_args; i++) {
1589 if (in_sig_bt[i] == T_ARRAY) {
1590 total_c_args ++;
1591 }
1592 }
1593 }
1594
1595 BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1596 VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1597 BasicType* in_elem_bt = NULL;
1598
1599 // Create the signature for the C call:
1600 // 1) add the JNIEnv*
1601 // 2) add the class if the method is static
1602 // 3) copy the rest of the incoming signature (shifted by the number of
1603 // hidden arguments)
1604
1605 int argc = 0;
1606 if (!is_critical_native) {
1607 out_sig_bt[argc++] = T_ADDRESS;
1608 if (method->is_static()) {
1609 out_sig_bt[argc++] = T_OBJECT;
1610 }
1611
1612 for (int i = 0; i < total_in_args; i++) {
1613 out_sig_bt[argc++] = in_sig_bt[i];
1614 }
1615 } else {
1616 Thread* THREAD = Thread::current();
1617 in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1618 SignatureStream ss(method->signature());
1619 int o = 0;
1620 for (int i = 0; i < total_in_args; i++, o++) {
1621 if (in_sig_bt[i] == T_ARRAY) {
1622 // Arrays are passed as tuples (int, elem*).
1623 Symbol* atype = ss.as_symbol(CHECK_NULL);
1624 const char* at = atype->as_C_string();
1625 if (strlen(at) == 2) {
1626 assert(at[0] == '[', "must be");
1627 switch (at[1]) {
1628 case 'B': in_elem_bt[o] = T_BYTE; break;
1629 case 'C': in_elem_bt[o] = T_CHAR; break;
1630 case 'D': in_elem_bt[o] = T_DOUBLE; break;
1631 case 'F': in_elem_bt[o] = T_FLOAT; break;
1632 case 'I': in_elem_bt[o] = T_INT; break;
1633 case 'J': in_elem_bt[o] = T_LONG; break;
1634 case 'S': in_elem_bt[o] = T_SHORT; break;
1635 case 'Z': in_elem_bt[o] = T_BOOLEAN; break;
1636 default: ShouldNotReachHere();
1637 }
1638 }
1639 } else {
1640 in_elem_bt[o] = T_VOID;
1641 }
1642 if (in_sig_bt[i] != T_VOID) {
1643 assert(in_sig_bt[i] == ss.type(), "must match");
1644 ss.next();
1645 }
1646 }
1647 assert(total_in_args == o, "must match");
1648
1649 for (int i = 0; i < total_in_args; i++) {
1650 if (in_sig_bt[i] == T_ARRAY) {
1651 // Arrays are passed as tuples (int, elem*).
1652 out_sig_bt[argc++] = T_INT;
1653 out_sig_bt[argc++] = T_ADDRESS;
1654 } else {
1655 out_sig_bt[argc++] = in_sig_bt[i];
1656 }
1657 }
1658 }
1659
1660 ///////////////////////////////////////////////////////////////////////
1661 // Now figure out where the args must be stored and how much stack space
1662 // they require (neglecting out_preserve_stack_slots but providing space
1663 // for storing the first five register arguments).
1664 // It's weird, see int_stk_helper.
1665 ///////////////////////////////////////////////////////////////////////
1666
1667 //---------------------------------------------------------------------
1668 // Compute framesize for the wrapper.
1669 //
1670 // - We need to handlize all oops passed in registers.
1671 // - We must create space for them here that is disjoint from the save area.
1672 // - We always just allocate 5 words for storing down these object.
1673 // This allows us to simply record the base and use the Ireg number to
1674 // decide which slot to use.
1675 // - Note that the reg number used to index the stack slot is the inbound
1676 // number, not the outbound number.
1677 // - We must shuffle args to match the native convention,
1678 // and to include var-args space.
1679 //---------------------------------------------------------------------
1680
1681 //---------------------------------------------------------------------
1682 // Calculate the total number of stack slots we will need:
1683 // - 1) abi requirements
1684 // - 2) outgoing args
1685 // - 3) space for inbound oop handle area
1686 // - 4) space for handlizing a klass if static method
1687 // - 5) space for a lock if synchronized method
1688 // - 6) workspace (save rtn value, int<->float reg moves, ...)
1689 // - 7) filler slots for alignment
1690 //---------------------------------------------------------------------
1691 // Here is how the space we have allocated will look like.
1692 // Since we use resize_frame, we do not create a new stack frame,
1693 // but just extend the one we got with our own data area.
1694 //
1695 // If an offset or pointer name points to a separator line, it is
1696 // assumed that addressing with offset 0 selects storage starting
1697 // at the first byte above the separator line.
1698 //
1699 //
1700 // ... ...
1701 // | caller's frame |
1702 // FP-> |---------------------|
1703 // | filler slots, if any|
1704 // 7| #slots == mult of 2 |
1705 // |---------------------|
1706 // | work space |
1707 // 6| 2 slots = 8 bytes |
1708 // |---------------------|
1709 // 5| lock box (if sync) |
1710 // |---------------------| <- lock_slot_offset
1711 // 4| klass (if static) |
1712 // |---------------------| <- klass_slot_offset
1713 // 3| oopHandle area |
1714 // | (save area for |
1715 // | critical natives) |
1716 // | |
1717 // | |
1718 // |---------------------| <- oop_handle_offset
1719 // 2| outbound memory |
1720 // ... ...
1721 // | based arguments |
1722 // |---------------------|
1723 // | vararg |
1724 // ... ...
1725 // | area |
1726 // |---------------------| <- out_arg_slot_offset
1727 // 1| out_preserved_slots |
1728 // ... ...
1729 // | (z_abi spec) |
1730 // SP-> |---------------------| <- FP_slot_offset (back chain)
1731 // ... ...
1732 //
1733 //---------------------------------------------------------------------
1734
1735 // *_slot_offset indicates offset from SP in #stack slots
1736 // *_offset indicates offset from SP in #bytes
1737
1738 int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1739 SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1740
1741 // Now the space for the inbound oop handle area.
1742 int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1743 if (is_critical_native) {
1744 // Critical natives may have to call out so they need a save area
1745 // for register arguments.
1746 int double_slots = 0;
1747 int single_slots = 0;
1748 for (int i = 0; i < total_in_args; i++) {
1749 if (in_regs[i].first()->is_Register()) {
1750 const Register reg = in_regs[i].first()->as_Register();
1751 switch (in_sig_bt[i]) {
1752 case T_BOOLEAN:
1753 case T_BYTE:
1754 case T_SHORT:
1755 case T_CHAR:
1756 case T_INT:
1757 // Fall through.
1758 case T_ARRAY:
1759 case T_LONG: double_slots++; break;
1760 default: ShouldNotReachHere();
1761 }
1762 } else {
1763 if (in_regs[i].first()->is_FloatRegister()) {
1764 switch (in_sig_bt[i]) {
1765 case T_FLOAT: single_slots++; break;
1766 case T_DOUBLE: double_slots++; break;
1767 default: ShouldNotReachHere();
1768 }
1769 }
1770 }
1771 } // for
1772 total_save_slots = double_slots * 2 + align_up(single_slots, 2); // Round to even.
1773 }
1774
1775 int oop_handle_slot_offset = stack_slots;
1776 stack_slots += total_save_slots; // 3)
1777
1778 int klass_slot_offset = 0;
1779 int klass_offset = -1;
1780 if (method_is_static && !is_critical_native) { // 4)
1781 klass_slot_offset = stack_slots;
1782 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1783 stack_slots += VMRegImpl::slots_per_word;
1784 }
1785
1786 int lock_slot_offset = 0;
1787 int lock_offset = -1;
1788 if (method->is_synchronized()) { // 5)
1789 lock_slot_offset = stack_slots;
1790 lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size;
1791 stack_slots += VMRegImpl::slots_per_word;
1792 }
1793
1794 int workspace_slot_offset= stack_slots; // 6)
1795 stack_slots += 2;
1796
1797 // Now compute actual number of stack words we need.
1798 // Round to align stack properly.
1799 stack_slots = align_up(stack_slots, // 7)
1800 frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1801 int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1802
1803
1804 ///////////////////////////////////////////////////////////////////////
1805 // Now we can start generating code
1806 ///////////////////////////////////////////////////////////////////////
1807
1808 unsigned int wrapper_CodeStart = __ offset();
1809 unsigned int wrapper_UEPStart;
1810 unsigned int wrapper_VEPStart;
1811 unsigned int wrapper_FrameDone;
1812 unsigned int wrapper_CRegsSet;
1813 Label handle_pending_exception;
1814 Label ic_miss;
1815
1816 //---------------------------------------------------------------------
1817 // Unverified entry point (UEP)
1818 //---------------------------------------------------------------------
1819 wrapper_UEPStart = __ offset();
1820
1821 // check ic: object class <-> cached class
1822 if (!method_is_static) __ nmethod_UEP(ic_miss);
1823 // Fill with nops (alignment of verified entry point).
1824 __ align(CodeEntryAlignment);
1825
1826 //---------------------------------------------------------------------
1827 // Verified entry point (VEP)
1828 //---------------------------------------------------------------------
1829 wrapper_VEPStart = __ offset();
1830
1831 __ save_return_pc();
1832 __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
1833 #ifndef USE_RESIZE_FRAME
1834 __ push_frame(frame_size_in_bytes); // Create a new frame for the wrapper.
1835 #else
1836 __ resize_frame(-frame_size_in_bytes, Z_R0_scratch); // No new frame for the wrapper.
1837 // Just resize the existing one.
1838 #endif
1839
1840 wrapper_FrameDone = __ offset();
1841
1842 __ verify_thread();
1843
1844 // Native nmethod wrappers never take possession of the oop arguments.
1845 // So the caller will gc the arguments.
1846 // The only thing we need an oopMap for is if the call is static.
1847 //
1848 // An OopMap for lock (and class if static), and one for the VM call itself
1849 OopMapSet *oop_maps = new OopMapSet();
1850 OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1851
1852 if (is_critical_native) {
1853 check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
1854 oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
1855 }
1856
1857
1858 //////////////////////////////////////////////////////////////////////
1859 //
1860 // The Grand Shuffle
1861 //
1862 //////////////////////////////////////////////////////////////////////
1863 //
1864 // We immediately shuffle the arguments so that for any vm call we have
1865 // to make from here on out (sync slow path, jvmti, etc.) we will have
1866 // captured the oops from our caller and have a valid oopMap for them.
1867 //
1868 //--------------------------------------------------------------------
1869 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1870 // (derived from JavaThread* which is in Z_thread) and, if static,
1871 // the class mirror instead of a receiver. This pretty much guarantees that
1872 // register layout will not match. We ignore these extra arguments during
1873 // the shuffle. The shuffle is described by the two calling convention
1874 // vectors we have in our possession. We simply walk the java vector to
1875 // get the source locations and the c vector to get the destinations.
1876 //
1877 // This is a trick. We double the stack slots so we can claim
1878 // the oops in the caller's frame. Since we are sure to have
1879 // more args than the caller doubling is enough to make
1880 // sure we can capture all the incoming oop args from the caller.
1881 //--------------------------------------------------------------------
1882
1883 // Record sp-based slot for receiver on stack for non-static methods.
1884 int receiver_offset = -1;
1885
1886 //--------------------------------------------------------------------
1887 // We move the arguments backwards because the floating point registers
1888 // destination will always be to a register with a greater or equal
1889 // register number or the stack.
1890 // jix is the index of the incoming Java arguments.
1891 // cix is the index of the outgoing C arguments.
1892 //--------------------------------------------------------------------
1893
1894 #ifdef ASSERT
1895 bool reg_destroyed[RegisterImpl::number_of_registers];
1896 bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1897 for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1898 reg_destroyed[r] = false;
1899 }
1900 for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1901 freg_destroyed[f] = false;
1902 }
1903 #endif // ASSERT
1904
1905 for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1906 #ifdef ASSERT
1907 if (in_regs[jix].first()->is_Register()) {
1908 assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1909 } else {
1910 if (in_regs[jix].first()->is_FloatRegister()) {
1911 assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1912 }
1913 }
1914 if (out_regs[cix].first()->is_Register()) {
1915 reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1916 } else {
1917 if (out_regs[cix].first()->is_FloatRegister()) {
1918 freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1919 }
1920 }
1921 #endif // ASSERT
1922
1923 switch (in_sig_bt[jix]) {
1924 // Due to casting, small integers should only occur in pairs with type T_LONG.
1925 case T_BOOLEAN:
1926 case T_CHAR:
1927 case T_BYTE:
1928 case T_SHORT:
1929 case T_INT:
1930 // Move int and do sign extension.
1931 move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1932 break;
1933
1934 case T_LONG :
1935 long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1936 break;
1937
1938 case T_ARRAY:
1939 if (is_critical_native) {
1940 int body_arg = cix;
1941 cix -= 1; // Point to length arg.
1942 unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
1943 break;
1944 }
1945 // else fallthrough
1946 case T_OBJECT:
1947 assert(!is_critical_native, "no oop arguments");
1948 object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1949 ((jix == 0) && (!method_is_static)),
1950 &receiver_offset);
1951 break;
1952 case T_VOID:
1953 break;
1954
1955 case T_FLOAT:
1956 float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1957 break;
1958
1959 case T_DOUBLE:
1960 assert(jix+1 < total_in_args && in_sig_bt[jix+1] == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1961 double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1962 break;
1963
1964 case T_ADDRESS:
1965 assert(false, "found T_ADDRESS in java args");
1966 break;
1967
1968 default:
1969 ShouldNotReachHere();
1970 }
1971 }
1972
1973 //--------------------------------------------------------------------
1974 // Pre-load a static method's oop into ARG2.
1975 // Used both by locking code and the normal JNI call code.
1976 //--------------------------------------------------------------------
1977 if (method_is_static && !is_critical_native) {
1978 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1979
1980 // Now handlize the static class mirror in ARG2. It's known not-null.
1981 __ z_stg(Z_ARG2, klass_offset, Z_SP);
1982 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1983 __ add2reg(Z_ARG2, klass_offset, Z_SP);
1984 }
1985
1986 // Get JNIEnv* which is first argument to native.
1987 if (!is_critical_native) {
1988 __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1989 }
1990
1991 //////////////////////////////////////////////////////////////////////
1992 // We have all of the arguments setup at this point.
1993 // We MUST NOT touch any outgoing regs from this point on.
1994 // So if we must call out we must push a new frame.
1995 //////////////////////////////////////////////////////////////////////
1996
1997
1998 // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1999 // Both values represent the same position.
2000 __ get_PC(Z_R10); // PC into register
2001 wrapper_CRegsSet = __ offset(); // and into into variable.
2002
2003 // Z_R10 now has the pc loaded that we will use when we finally call to native.
2004
2005 // We use the same pc/oopMap repeatedly when we call out.
2006 oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
2007
2008 // Lock a synchronized method.
2009
2010 if (method->is_synchronized()) {
2011 assert(!is_critical_native, "unhandled");
2012
2013 // ATTENTION: args and Z_R10 must be preserved.
2014 Register r_oop = Z_R11;
2015 Register r_box = Z_R12;
2016 Register r_tmp1 = Z_R13;
2017 Register r_tmp2 = Z_R7;
2018 Label done;
2019
2020 // Load the oop for the object or class. R_carg2_classorobject contains
2021 // either the handlized oop from the incoming arguments or the handlized
2022 // class mirror (if the method is static).
2023 __ z_lg(r_oop, 0, Z_ARG2);
2024
2025 lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
2026 // Get the lock box slot's address.
2027 __ add2reg(r_box, lock_offset, Z_SP);
2028
2029 #ifdef ASSERT
2030 if (UseBiasedLocking)
2031 // Making the box point to itself will make it clear it went unused
2032 // but also be obviously invalid.
2033 __ z_stg(r_box, 0, r_box);
2034 #endif // ASSERT
2035
2036 // Try fastpath for locking.
2037 // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
2038 __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
2039 __ z_bre(done);
2040
2041 //-------------------------------------------------------------------------
2042 // None of the above fast optimizations worked so we have to get into the
2043 // slow case of monitor enter. Inline a special case of call_VM that
2044 // disallows any pending_exception.
2045 //-------------------------------------------------------------------------
2046
2047 Register oldSP = Z_R11;
2048
2049 __ z_lgr(oldSP, Z_SP);
2050
2051 RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2052
2053 // Prepare arguments for call.
2054 __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
2055 __ add2reg(Z_ARG2, lock_offset, oldSP);
2056 __ z_lgr(Z_ARG3, Z_thread);
2057
2058 __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
2059
2060 // Do the call.
2061 __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
2062 __ call(Z_R1_scratch);
2063
2064 __ reset_last_Java_frame();
2065
2066 RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2067 #ifdef ASSERT
2068 { Label L;
2069 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2070 __ z_bre(L);
2071 __ stop("no pending exception allowed on exit from IR::monitorenter");
2072 __ bind(L);
2073 }
2074 #endif
2075 __ bind(done);
2076 } // lock for synchronized methods
2077
2078
2079 //////////////////////////////////////////////////////////////////////
2080 // Finally just about ready to make the JNI call.
2081 //////////////////////////////////////////////////////////////////////
2082
2083 // Use that pc we placed in Z_R10 a while back as the current frame anchor.
2084 __ set_last_Java_frame(Z_SP, Z_R10);
2085
2086 // Transition from _thread_in_Java to _thread_in_native.
2087 __ set_thread_state(_thread_in_native);
2088
2089
2090 //////////////////////////////////////////////////////////////////////
2091 // This is the JNI call.
2092 //////////////////////////////////////////////////////////////////////
2093
2094 __ call_c(native_func);
2095
2096
2097 //////////////////////////////////////////////////////////////////////
2098 // We have survived the call once we reach here.
2099 //////////////////////////////////////////////////////////////////////
2100
2101
2102 //--------------------------------------------------------------------
2103 // Unpack native results.
2104 //--------------------------------------------------------------------
2105 // For int-types, we do any needed sign-extension required.
2106 // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
2107 // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
2108 // blocking or unlocking.
2109 // An OOP result (handle) is done specially in the slow-path code.
2110 //--------------------------------------------------------------------
2111 switch (ret_type) {
2112 case T_VOID: break; // Nothing to do!
2113 case T_FLOAT: break; // Got it where we want it (unless slow-path)
2114 case T_DOUBLE: break; // Got it where we want it (unless slow-path)
2115 case T_LONG: break; // Got it where we want it (unless slow-path)
2116 case T_OBJECT: break; // Really a handle.
2117 // Cannot de-handlize until after reclaiming jvm_lock.
2118 case T_ARRAY: break;
2119
2120 case T_BOOLEAN: // 0 -> false(0); !0 -> true(1)
2121 __ z_lngfr(Z_RET, Z_RET); // Force sign bit on except for zero.
2122 __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
2123 break;
2124 case T_BYTE: __ z_lgbr(Z_RET, Z_RET); break; // sign extension
2125 case T_CHAR: __ z_llghr(Z_RET, Z_RET); break; // unsigned result
2126 case T_SHORT: __ z_lghr(Z_RET, Z_RET); break; // sign extension
2127 case T_INT: __ z_lgfr(Z_RET, Z_RET); break; // sign-extend for beauty.
2128
2129 default:
2130 ShouldNotReachHere();
2131 break;
2132 }
2133
2134
2135 // Switch thread to "native transition" state before reading the synchronization state.
2136 // This additional state is necessary because reading and testing the synchronization
2137 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2138 // - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2139 // - VM thread changes sync state to synchronizing and suspends threads for GC.
2140 // - Thread A is resumed to finish this native method, but doesn't block here since it
2141 // didn't see any synchronization in progress, and escapes.
2142
2143 // Transition from _thread_in_native to _thread_in_native_trans.
2144 __ set_thread_state(_thread_in_native_trans);
2145
2146 // Safepoint synchronization
2147 //--------------------------------------------------------------------
2148 // Must we block?
2149 //--------------------------------------------------------------------
2150 // Block, if necessary, before resuming in _thread_in_Java state.
2151 // In order for GC to work, don't clear the last_Java_sp until after blocking.
2152 //--------------------------------------------------------------------
2153 Label after_transition;
2154 {
2155 Label no_block, sync;
2156
2157 save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
2158
2159 if (os::is_MP()) {
2160 if (UseMembar) {
2161 // Force this write out before the read below.
2162 __ z_fence();
2163 } else {
2164 // Write serialization page so VM thread can do a pseudo remote membar.
2165 // We use the current thread pointer to calculate a thread specific
2166 // offset to write to within the page. This minimizes bus traffic
2167 // due to cache line collision.
2168 __ serialize_memory(Z_thread, Z_R1, Z_R2);
2169 }
2170 }
2171 __ safepoint_poll(sync, Z_R1);
2172
2173 __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
2174 __ z_bre(no_block);
2175
2176 // Block. Save any potential method result value before the operation and
2177 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2178 // lets us share the oopMap we used when we went native rather than create
2179 // a distinct one for this pc.
2180 //
2181 __ bind(sync);
2182 __ z_acquire();
2183
2184 address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2185 : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2186
2187 __ call_VM_leaf(entry_point, Z_thread);
2188
2189 if (is_critical_native) {
2190 restore_native_result(masm, ret_type, workspace_slot_offset);
2191 __ z_bru(after_transition); // No thread state transition here.
2192 }
2193 __ bind(no_block);
2194 restore_native_result(masm, ret_type, workspace_slot_offset);
2195 }
2196
2197 //--------------------------------------------------------------------
2198 // Thread state is thread_in_native_trans. Any safepoint blocking has
2199 // already happened so we can now change state to _thread_in_Java.
2200 //--------------------------------------------------------------------
2201 // Transition from _thread_in_native_trans to _thread_in_Java.
2202 __ set_thread_state(_thread_in_Java);
2203 __ bind(after_transition);
2204
2205
2206 //--------------------------------------------------------------------
2207 // Reguard any pages if necessary.
2208 // Protect native result from being destroyed.
2209 //--------------------------------------------------------------------
2210
2211 Label no_reguard;
2212
2213 __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
2214 JavaThread::stack_guard_yellow_reserved_disabled);
2215
2216 __ z_bre(no_reguard);
2217
2218 save_native_result(masm, ret_type, workspace_slot_offset);
2219 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
2220 restore_native_result(masm, ret_type, workspace_slot_offset);
2221
2222 __ bind(no_reguard);
2223
2224
2225 // Synchronized methods (slow path only)
2226 // No pending exceptions for now.
2227 //--------------------------------------------------------------------
2228 // Handle possibly pending exception (will unlock if necessary).
2229 // Native result is, if any is live, in Z_FRES or Z_RES.
2230 //--------------------------------------------------------------------
2231 // Unlock
2232 //--------------------------------------------------------------------
2233 if (method->is_synchronized()) {
2234 const Register r_oop = Z_R11;
2235 const Register r_box = Z_R12;
2236 const Register r_tmp1 = Z_R13;
2237 const Register r_tmp2 = Z_R7;
2238 Label done;
2239
2240 // Get unboxed oop of class mirror or object ...
2241 int offset = method_is_static ? klass_offset : receiver_offset;
2242
2243 assert(offset != -1, "");
2244 __ z_lg(r_oop, offset, Z_SP);
2245
2246 // ... and address of lock object box.
2247 __ add2reg(r_box, lock_offset, Z_SP);
2248
2249 // Try fastpath for unlocking.
2250 __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
2251 __ z_bre(done);
2252
2253 // Slow path for unlocking.
2254 // Save and restore any potential method result value around the unlocking operation.
2255 const Register R_exc = Z_R11;
2256
2257 save_native_result(masm, ret_type, workspace_slot_offset);
2258
2259 // Must save pending exception around the slow-path VM call. Since it's a
2260 // leaf call, the pending exception (if any) can be kept in a register.
2261 __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2262 assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
2263
2264 // Must clear pending-exception before re-entering the VM. Since this is
2265 // a leaf call, pending-exception-oop can be safely kept in a register.
2266 __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
2267
2268 // Inline a special case of call_VM that disallows any pending_exception.
2269
2270 // Get locked oop from the handle we passed to jni.
2271 __ z_lg(Z_ARG1, offset, Z_SP);
2272 __ add2reg(Z_ARG2, lock_offset, Z_SP);
2273 __ z_lgr(Z_ARG3, Z_thread);
2274
2275 __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2276
2277 __ call(Z_R1_scratch);
2278
2279 #ifdef ASSERT
2280 {
2281 Label L;
2282 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2283 __ z_bre(L);
2284 __ stop("no pending exception allowed on exit from IR::monitorexit");
2285 __ bind(L);
2286 }
2287 #endif
2288
2289 // Check_forward_pending_exception jump to forward_exception if any pending
2290 // exception is set. The forward_exception routine expects to see the
2291 // exception in pending_exception and not in a register. Kind of clumsy,
2292 // since all folks who branch to forward_exception must have tested
2293 // pending_exception first and hence have it in a register already.
2294 __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2295 restore_native_result(masm, ret_type, workspace_slot_offset);
2296 __ z_bru(done);
2297 __ z_illtrap(0x66);
2298
2299 __ bind(done);
2300 }
2301
2302
2303 //--------------------------------------------------------------------
2304 // Clear "last Java frame" SP and PC.
2305 //--------------------------------------------------------------------
2306 __ verify_thread(); // Z_thread must be correct.
2307
2308 __ reset_last_Java_frame();
2309
2310 // Unpack oop result, e.g. JNIHandles::resolve result.
2311 if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2312 __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
2313 }
2314
2315 if (CheckJNICalls) {
2316 // clear_pending_jni_exception_check
2317 __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2318 }
2319
2320 // Reset handle block.
2321 if (!is_critical_native) {
2322 __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2323 __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
2324
2325 // Check for pending exceptions.
2326 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2327 __ z_brne(handle_pending_exception);
2328 }
2329
2330
2331 //////////////////////////////////////////////////////////////////////
2332 // Return
2333 //////////////////////////////////////////////////////////////////////
2334
2335
2336 #ifndef USE_RESIZE_FRAME
2337 __ pop_frame(); // Pop wrapper frame.
2338 #else
2339 __ resize_frame(frame_size_in_bytes, Z_R0_scratch); // Revert stack extension.
2340 #endif
2341 __ restore_return_pc(); // This is the way back to the caller.
2342 __ z_br(Z_R14);
2343
2344
2345 //////////////////////////////////////////////////////////////////////
2346 // Out-of-line calls to the runtime.
2347 //////////////////////////////////////////////////////////////////////
2348
2349
2350 if (!is_critical_native) {
2351
2352 //---------------------------------------------------------------------
2353 // Handler for pending exceptions (out-of-line).
2354 //---------------------------------------------------------------------
2355 // Since this is a native call, we know the proper exception handler
2356 // is the empty function. We just pop this frame and then jump to
2357 // forward_exception_entry. Z_R14 will contain the native caller's
2358 // return PC.
2359 __ bind(handle_pending_exception);
2360 __ pop_frame();
2361 __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2362 __ restore_return_pc();
2363 __ z_br(Z_R1_scratch);
2364
2365 //---------------------------------------------------------------------
2366 // Handler for a cache miss (out-of-line)
2367 //---------------------------------------------------------------------
2368 __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2369 }
2370 __ flush();
2371
2372
2373 //////////////////////////////////////////////////////////////////////
2374 // end of code generation
2375 //////////////////////////////////////////////////////////////////////
2376
2377
2378 nmethod *nm = nmethod::new_native_nmethod(method,
2379 compile_id,
2380 masm->code(),
2381 (int)(wrapper_VEPStart-wrapper_CodeStart),
2382 (int)(wrapper_FrameDone-wrapper_CodeStart),
2383 stack_slots / VMRegImpl::slots_per_word,
2384 (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2385 in_ByteSize(lock_offset),
2386 oop_maps);
2387
2388 if (is_critical_native) {
2389 nm->set_lazy_critical_native(true);
2390 }
2391
2392 return nm;
2393 }
2394
gen_c2i_adapter(MacroAssembler * masm,int total_args_passed,int comp_args_on_stack,const BasicType * sig_bt,const VMRegPair * regs,Label & skip_fixup)2395 static address gen_c2i_adapter(MacroAssembler *masm,
2396 int total_args_passed,
2397 int comp_args_on_stack,
2398 const BasicType *sig_bt,
2399 const VMRegPair *regs,
2400 Label &skip_fixup) {
2401 // Before we get into the guts of the C2I adapter, see if we should be here
2402 // at all. We've come from compiled code and are attempting to jump to the
2403 // interpreter, which means the caller made a static call to get here
2404 // (vcalls always get a compiled target if there is one). Check for a
2405 // compiled target. If there is one, we need to patch the caller's call.
2406
2407 // These two defs MUST MATCH code in gen_i2c2i_adapter!
2408 const Register ientry = Z_R11;
2409 const Register code = Z_R11;
2410
2411 address c2i_entrypoint;
2412 Label patch_callsite;
2413
2414 // Regular (verified) c2i entry point.
2415 c2i_entrypoint = __ pc();
2416
2417 // Call patching needed?
2418 __ load_and_test_long(Z_R0_scratch, method_(code));
2419 __ z_lg(ientry, method_(interpreter_entry)); // Preload interpreter entry (also if patching).
2420 __ z_brne(patch_callsite); // Patch required if code != NULL (compiled target exists).
2421
2422 __ bind(skip_fixup); // Return point from patch_callsite.
2423
2424 // Since all args are passed on the stack, total_args_passed*wordSize is the
2425 // space we need. We need ABI scratch area but we use the caller's since
2426 // it has already been allocated.
2427
2428 const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2429 int extraspace = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2430 Register sender_SP = Z_R10;
2431 Register value = Z_R12;
2432
2433 // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2434 // In addition, frame manager expects initial_caller_sp in Z_R10.
2435 __ z_lgr(sender_SP, Z_SP);
2436
2437 // This should always fit in 14 bit immediate.
2438 __ resize_frame(-extraspace, Z_R0_scratch);
2439
2440 // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2441 // args. This essentially moves the callers ABI scratch area from the top to the
2442 // bottom of the arg area.
2443
2444 int st_off = extraspace - wordSize;
2445
2446 // Now write the args into the outgoing interpreter space.
2447 for (int i = 0; i < total_args_passed; i++) {
2448 VMReg r_1 = regs[i].first();
2449 VMReg r_2 = regs[i].second();
2450 if (!r_1->is_valid()) {
2451 assert(!r_2->is_valid(), "");
2452 continue;
2453 }
2454 if (r_1->is_stack()) {
2455 // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2456 // We must account for it here.
2457 int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2458
2459 if (!r_2->is_valid()) {
2460 __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2461 } else {
2462 // longs are given 2 64-bit slots in the interpreter,
2463 // but the data is passed in only 1 slot.
2464 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2465 #ifdef ASSERT
2466 __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2467 #endif
2468 st_off -= wordSize;
2469 }
2470 __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2471 }
2472 } else {
2473 if (r_1->is_Register()) {
2474 if (!r_2->is_valid()) {
2475 __ z_st(r_1->as_Register(), st_off, Z_SP);
2476 } else {
2477 // longs are given 2 64-bit slots in the interpreter, but the
2478 // data is passed in only 1 slot.
2479 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2480 #ifdef ASSERT
2481 __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2482 #endif
2483 st_off -= wordSize;
2484 }
2485 __ z_stg(r_1->as_Register(), st_off, Z_SP);
2486 }
2487 } else {
2488 assert(r_1->is_FloatRegister(), "");
2489 if (!r_2->is_valid()) {
2490 __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2491 } else {
2492 // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2493 // data is passed in only 1 slot.
2494 // One of these should get known junk...
2495 #ifdef ASSERT
2496 __ z_lzdr(Z_F1);
2497 __ z_std(Z_F1, st_off, Z_SP);
2498 #endif
2499 st_off-=wordSize;
2500 __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2501 }
2502 }
2503 }
2504 st_off -= wordSize;
2505 }
2506
2507
2508 // Jump to the interpreter just as if interpreter was doing it.
2509 __ add2reg(Z_esp, st_off, Z_SP);
2510
2511 // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2512 __ z_br(ientry);
2513
2514
2515 // Prevent illegal entry to out-of-line code.
2516 __ z_illtrap(0x22);
2517
2518 // Generate out-of-line runtime call to patch caller,
2519 // then continue as interpreted.
2520
2521 // IF you lose the race you go interpreted.
2522 // We don't see any possible endless c2i -> i2c -> c2i ...
2523 // transitions no matter how rare.
2524 __ bind(patch_callsite);
2525
2526 RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2527 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2528 RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2529 __ z_bru(skip_fixup);
2530
2531 // end of out-of-line code
2532
2533 return c2i_entrypoint;
2534 }
2535
2536 // On entry, the following registers are set
2537 //
2538 // Z_thread r8 - JavaThread*
2539 // Z_method r9 - callee's method (method to be invoked)
2540 // Z_esp r7 - operand (or expression) stack pointer of caller. one slot above last arg.
2541 // Z_SP r15 - SP prepared by call stub such that caller's outgoing args are near top
2542 //
gen_i2c_adapter(MacroAssembler * masm,int total_args_passed,int comp_args_on_stack,const BasicType * sig_bt,const VMRegPair * regs)2543 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2544 int total_args_passed,
2545 int comp_args_on_stack,
2546 const BasicType *sig_bt,
2547 const VMRegPair *regs) {
2548 const Register value = Z_R12;
2549 const Register ld_ptr= Z_esp;
2550
2551 int ld_offset = total_args_passed * wordSize;
2552
2553 // Cut-out for having no stack args.
2554 if (comp_args_on_stack) {
2555 // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2556 // registers are below. By subtracting stack0, we either get a negative
2557 // number (all values in registers) or the maximum stack slot accessed.
2558 // Convert VMRegImpl (4 byte) stack slots to words.
2559 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2560 // Round up to miminum stack alignment, in wordSize
2561 comp_words_on_stack = align_up(comp_words_on_stack, 2);
2562
2563 __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2564 }
2565
2566 // Now generate the shuffle code. Pick up all register args and move the
2567 // rest through register value=Z_R12.
2568 for (int i = 0; i < total_args_passed; i++) {
2569 if (sig_bt[i] == T_VOID) {
2570 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2571 continue;
2572 }
2573
2574 // Pick up 0, 1 or 2 words from ld_ptr.
2575 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2576 "scrambled load targets?");
2577 VMReg r_1 = regs[i].first();
2578 VMReg r_2 = regs[i].second();
2579 if (!r_1->is_valid()) {
2580 assert(!r_2->is_valid(), "");
2581 continue;
2582 }
2583 if (r_1->is_FloatRegister()) {
2584 if (!r_2->is_valid()) {
2585 __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2586 ld_offset-=wordSize;
2587 } else {
2588 // Skip the unused interpreter slot.
2589 __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2590 ld_offset -= 2 * wordSize;
2591 }
2592 } else {
2593 if (r_1->is_stack()) {
2594 // Must do a memory to memory move.
2595 int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2596
2597 if (!r_2->is_valid()) {
2598 __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2599 } else {
2600 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2601 // data is passed in only 1 slot.
2602 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2603 ld_offset -= wordSize;
2604 }
2605 __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2606 }
2607 } else {
2608 if (!r_2->is_valid()) {
2609 // Not sure we need to do this but it shouldn't hurt.
2610 if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
2611 __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2612 } else {
2613 __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2614 }
2615 } else {
2616 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2617 // data is passed in only 1 slot.
2618 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2619 ld_offset -= wordSize;
2620 }
2621 __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2622 }
2623 }
2624 ld_offset -= wordSize;
2625 }
2626 }
2627
2628 // Jump to the compiled code just as if compiled code was doing it.
2629 // load target address from method oop:
2630 __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2631
2632 // Store method oop into thread->callee_target.
2633 // 6243940: We might end up in handle_wrong_method if
2634 // the callee is deoptimized as we race thru here. If that
2635 // happens we don't want to take a safepoint because the
2636 // caller frame will look interpreted and arguments are now
2637 // "compiled" so it is much better to make this transition
2638 // invisible to the stack walking code. Unfortunately, if
2639 // we try and find the callee by normal means a safepoint
2640 // is possible. So we stash the desired callee in the thread
2641 // and the vm will find it there should this case occur.
2642 __ z_stg(Z_method, thread_(callee_target));
2643
2644 __ z_br(Z_R1_scratch);
2645 }
2646
generate_i2c2i_adapters(MacroAssembler * masm,int total_args_passed,int comp_args_on_stack,const BasicType * sig_bt,const VMRegPair * regs,AdapterFingerPrint * fingerprint)2647 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2648 int total_args_passed,
2649 int comp_args_on_stack,
2650 const BasicType *sig_bt,
2651 const VMRegPair *regs,
2652 AdapterFingerPrint* fingerprint) {
2653 __ align(CodeEntryAlignment);
2654 address i2c_entry = __ pc();
2655 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2656
2657 address c2i_unverified_entry;
2658
2659 Label skip_fixup;
2660 {
2661 Label ic_miss;
2662 const int klass_offset = oopDesc::klass_offset_in_bytes();
2663 const int holder_klass_offset = CompiledICHolder::holder_klass_offset();
2664 const int holder_metadata_offset = CompiledICHolder::holder_metadata_offset();
2665
2666 // Out-of-line call to ic_miss handler.
2667 __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2668
2669 // Unverified Entry Point UEP
2670 __ align(CodeEntryAlignment);
2671 c2i_unverified_entry = __ pc();
2672
2673 // Check the pointers.
2674 if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2675 __ z_ltgr(Z_ARG1, Z_ARG1);
2676 __ z_bre(ic_miss);
2677 }
2678 __ verify_oop(Z_ARG1);
2679
2680 // Check ic: object class <-> cached class
2681 // Compress cached class for comparison. That's more efficient.
2682 if (UseCompressedClassPointers) {
2683 __ z_lg(Z_R11, holder_klass_offset, Z_method); // Z_R11 is overwritten a few instructions down anyway.
2684 __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2685 } else {
2686 __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2687 }
2688 __ z_brne(ic_miss); // Cache miss: call runtime to handle this.
2689
2690 // This def MUST MATCH code in gen_c2i_adapter!
2691 const Register code = Z_R11;
2692
2693 __ z_lg(Z_method, holder_metadata_offset, Z_method);
2694 __ load_and_test_long(Z_R0, method_(code));
2695 __ z_brne(ic_miss); // Cache miss: call runtime to handle this.
2696
2697 // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2698 }
2699
2700 address c2i_entry;
2701 c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2702
2703 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
2704 }
2705
2706 // This function returns the adjust size (in number of words) to a c2i adapter
2707 // activation for use during deoptimization.
2708 //
2709 // Actually only compiled frames need to be adjusted, but it
2710 // doesn't harm to adjust entry and interpreter frames, too.
2711 //
last_frame_adjust(int callee_parameters,int callee_locals)2712 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2713 assert(callee_locals >= callee_parameters,
2714 "test and remove; got more parms than locals");
2715 // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2716 return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2717 frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2718 }
2719
out_preserve_stack_slots()2720 uint SharedRuntime::out_preserve_stack_slots() {
2721 return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2722 }
2723
2724 //
2725 // Frame generation for deopt and uncommon trap blobs.
2726 //
push_skeleton_frame(MacroAssembler * masm,Register frame_sizes_reg,Register pcs_reg,Register frame_size_reg,Register pc_reg)2727 static void push_skeleton_frame(MacroAssembler* masm,
2728 /* Unchanged */
2729 Register frame_sizes_reg,
2730 Register pcs_reg,
2731 /* Invalidate */
2732 Register frame_size_reg,
2733 Register pc_reg) {
2734 BLOCK_COMMENT(" push_skeleton_frame {");
2735 __ z_lg(pc_reg, 0, pcs_reg);
2736 __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2737 __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2738 Register fp = pc_reg;
2739 __ push_frame(frame_size_reg, fp);
2740 #ifdef ASSERT
2741 // The magic is required for successful walking skeletal frames.
2742 __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2743 __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2744 // Fill other slots that are supposedly not necessary with eye catchers.
2745 __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2746 __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2747 // The sender_sp of the bottom frame is set before pushing it.
2748 // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2749 // is unknown here. Luckily it is not needed before filling the frame in
2750 // layout_activation(), we assert this by setting an eye catcher (see
2751 // comments on sender_sp in frame_s390.hpp).
2752 __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2753 #endif // ASSERT
2754 BLOCK_COMMENT(" } push_skeleton_frame");
2755 }
2756
2757 // Loop through the UnrollBlock info and create new frames.
push_skeleton_frames(MacroAssembler * masm,bool deopt,Register unroll_block_reg,Register frame_sizes_reg,Register number_of_frames_reg,Register pcs_reg,Register tmp1,Register tmp2)2758 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2759 /* read */
2760 Register unroll_block_reg,
2761 /* invalidate */
2762 Register frame_sizes_reg,
2763 Register number_of_frames_reg,
2764 Register pcs_reg,
2765 Register tmp1,
2766 Register tmp2) {
2767 BLOCK_COMMENT("push_skeleton_frames {");
2768 // _number_of_frames is of type int (deoptimization.hpp).
2769 __ z_lgf(number_of_frames_reg,
2770 Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2771 __ z_lg(pcs_reg,
2772 Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2773 __ z_lg(frame_sizes_reg,
2774 Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2775
2776 // stack: (caller_of_deoptee, ...).
2777
2778 // If caller_of_deoptee is a compiled frame, then we extend it to make
2779 // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2780 // See also Deoptimization::last_frame_adjust() above.
2781 // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2782
2783 __ z_lgf(Z_R1_scratch,
2784 Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2785 __ z_lgr(tmp1, Z_SP); // Save the sender sp before extending the frame.
2786 __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2787 // The oldest skeletal frame requires a valid sender_sp to make it walkable
2788 // (it is required to find the original pc of caller_of_deoptee if it is marked
2789 // for deoptimization - see nmethod::orig_pc_addr()).
2790 __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2791
2792 // Now push the new interpreter frames.
2793 Label loop, loop_entry;
2794
2795 // Make sure that there is at least one entry in the array.
2796 DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2797 __ asm_assert_ne("array_size must be > 0", 0x205);
2798
2799 __ z_bru(loop_entry);
2800
2801 __ bind(loop);
2802
2803 __ add2reg(frame_sizes_reg, wordSize);
2804 __ add2reg(pcs_reg, wordSize);
2805
2806 __ bind(loop_entry);
2807
2808 // Allocate a new frame, fill in the pc.
2809 push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2810
2811 __ z_aghi(number_of_frames_reg, -1); // Emit AGHI, because it sets the condition code
2812 __ z_brne(loop);
2813
2814 // Set the top frame's return pc.
2815 __ add2reg(pcs_reg, wordSize);
2816 __ z_lg(Z_R0_scratch, 0, pcs_reg);
2817 __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2818 BLOCK_COMMENT("} push_skeleton_frames");
2819 }
2820
2821 //------------------------------generate_deopt_blob----------------------------
generate_deopt_blob()2822 void SharedRuntime::generate_deopt_blob() {
2823 // Allocate space for the code.
2824 ResourceMark rm;
2825 // Setup code generation tools.
2826 CodeBuffer buffer("deopt_blob", 2048, 1024);
2827 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2828 Label exec_mode_initialized;
2829 OopMap* map = NULL;
2830 OopMapSet *oop_maps = new OopMapSet();
2831
2832 unsigned int start_off = __ offset();
2833 Label cont;
2834
2835 // --------------------------------------------------------------------------
2836 // Normal entry (non-exception case)
2837 //
2838 // We have been called from the deopt handler of the deoptee.
2839 // Z_R14 points behind the call in the deopt handler. We adjust
2840 // it such that it points to the start of the deopt handler.
2841 // The return_pc has been stored in the frame of the deoptee and
2842 // will replace the address of the deopt_handler in the call
2843 // to Deoptimization::fetch_unroll_info below.
2844 // The (int) cast is necessary, because -((unsigned int)14)
2845 // is an unsigned int.
2846 __ add2reg(Z_R14, -(int)NativeCall::max_instruction_size());
2847
2848 const Register exec_mode_reg = Z_tmp_1;
2849
2850 // stack: (deoptee, caller of deoptee, ...)
2851
2852 // pushes an "unpack" frame
2853 // R14 contains the return address pointing into the deoptimized
2854 // nmethod that was valid just before the nmethod was deoptimized.
2855 // save R14 into the deoptee frame. the `fetch_unroll_info'
2856 // procedure called below will read it from there.
2857 map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2858
2859 // note the entry point.
2860 __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2861 __ z_bru(exec_mode_initialized);
2862
2863 #ifndef COMPILER1
2864 int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2865 #else
2866 // --------------------------------------------------------------------------
2867 // Reexecute entry
2868 // - Z_R14 = Deopt Handler in nmethod
2869
2870 int reexecute_offset = __ offset() - start_off;
2871
2872 // No need to update map as each call to save_live_registers will produce identical oopmap
2873 (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2874
2875 __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2876 __ z_bru(exec_mode_initialized);
2877 #endif
2878
2879
2880 // --------------------------------------------------------------------------
2881 // Exception entry. We reached here via a branch. Registers on entry:
2882 // - Z_EXC_OOP (Z_ARG1) = exception oop
2883 // - Z_EXC_PC (Z_ARG2) = the exception pc.
2884
2885 int exception_offset = __ offset() - start_off;
2886
2887 // all registers are dead at this entry point, except for Z_EXC_OOP, and
2888 // Z_EXC_PC which contain the exception oop and exception pc
2889 // respectively. Set them in TLS and fall thru to the
2890 // unpack_with_exception_in_tls entry point.
2891
2892 // Store exception oop and pc in thread (location known to GC).
2893 // Need this since the call to "fetch_unroll_info()" may safepoint.
2894 __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2895 __ z_stg(Z_EXC_PC, Address(Z_thread, JavaThread::exception_pc_offset()));
2896
2897 // fall through
2898
2899 int exception_in_tls_offset = __ offset() - start_off;
2900
2901 // new implementation because exception oop is now passed in JavaThread
2902
2903 // Prolog for exception case
2904 // All registers must be preserved because they might be used by LinearScan
2905 // Exceptiop oop and throwing PC are passed in JavaThread
2906
2907 // load throwing pc from JavaThread and us it as the return address of the current frame.
2908 __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2909
2910 // Save everything in sight.
2911 (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2912
2913 // Now it is safe to overwrite any register
2914
2915 // Clear the exception pc field in JavaThread
2916 __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2917
2918 // Deopt during an exception. Save exec mode for unpack_frames.
2919 __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2920
2921
2922 #ifdef ASSERT
2923 // verify that there is really an exception oop in JavaThread
2924 __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2925 __ verify_oop(Z_ARG1);
2926
2927 // verify that there is no pending exception
2928 __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2929 "must not have pending exception here", __LINE__);
2930 #endif
2931
2932 // --------------------------------------------------------------------------
2933 // At this point, the live registers are saved and
2934 // the exec_mode_reg has been set up correctly.
2935 __ bind(exec_mode_initialized);
2936
2937 // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2938
2939 {
2940 const Register unroll_block_reg = Z_tmp_2;
2941
2942 // we need to set `last_Java_frame' because `fetch_unroll_info' will
2943 // call `last_Java_frame()'. however we can't block and no gc will
2944 // occur so we don't need an oopmap. the value of the pc in the
2945 // frame is not particularly important. it just needs to identify the blob.
2946
2947 // Don't set last_Java_pc anymore here (is implicitly NULL then).
2948 // the correct PC is retrieved in pd_last_frame() in that case.
2949 __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2950 // With EscapeAnalysis turned on, this call may safepoint
2951 // despite it's marked as "leaf call"!
2952 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2953 // Set an oopmap for the call site this describes all our saved volatile registers
2954 int offs = __ offset();
2955 oop_maps->add_gc_map(offs, map);
2956
2957 __ reset_last_Java_frame();
2958 // save the return value.
2959 __ z_lgr(unroll_block_reg, Z_RET);
2960 // restore the return registers that have been saved
2961 // (among other registers) by save_live_registers(...).
2962 RegisterSaver::restore_result_registers(masm);
2963
2964 // reload the exec mode from the UnrollBlock (it might have changed)
2965 __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2966
2967 // In excp_deopt_mode, restore and clear exception oop which we
2968 // stored in the thread during exception entry above. The exception
2969 // oop will be the return value of this stub.
2970 NearLabel skip_restore_excp;
2971 __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2972 __ z_lg(Z_RET, thread_(exception_oop));
2973 __ clear_mem(thread_(exception_oop), 8);
2974 __ bind(skip_restore_excp);
2975
2976 // remove the "unpack" frame
2977 __ pop_frame();
2978
2979 // stack: (deoptee, caller of deoptee, ...).
2980
2981 // pop the deoptee's frame
2982 __ pop_frame();
2983
2984 // stack: (caller_of_deoptee, ...).
2985
2986 // loop through the `UnrollBlock' info and create interpreter frames.
2987 push_skeleton_frames(masm, true/*deopt*/,
2988 unroll_block_reg,
2989 Z_tmp_3,
2990 Z_tmp_4,
2991 Z_ARG5,
2992 Z_ARG4,
2993 Z_ARG3);
2994
2995 // stack: (skeletal interpreter frame, ..., optional skeletal
2996 // interpreter frame, caller of deoptee, ...).
2997 }
2998
2999 // push an "unpack" frame taking care of float / int return values.
3000 __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
3001
3002 // stack: (unpack frame, skeletal interpreter frame, ..., optional
3003 // skeletal interpreter frame, caller of deoptee, ...).
3004
3005 // spill live volatile registers since we'll do a call.
3006 __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3007 __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3008
3009 // let the unpacker layout information in the skeletal frames just allocated.
3010 __ get_PC(Z_RET);
3011 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
3012 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3013 Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
3014
3015 __ reset_last_Java_frame();
3016
3017 // restore the volatiles saved above.
3018 __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3019 __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3020
3021 // pop the "unpack" frame.
3022 __ pop_frame();
3023 __ restore_return_pc();
3024
3025 // stack: (top interpreter frame, ..., optional interpreter frame,
3026 // caller of deoptee, ...).
3027
3028 __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3029 __ restore_bcp();
3030 __ restore_locals();
3031 __ restore_esp();
3032
3033 // return to the interpreter entry point.
3034 __ z_br(Z_R14);
3035
3036 // Make sure all code is generated
3037 masm->flush();
3038
3039 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3040 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3041 }
3042
3043
3044 #ifdef COMPILER2
3045 //------------------------------generate_uncommon_trap_blob--------------------
generate_uncommon_trap_blob()3046 void SharedRuntime::generate_uncommon_trap_blob() {
3047 // Allocate space for the code
3048 ResourceMark rm;
3049 // Setup code generation tools
3050 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3051 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3052
3053 Register unroll_block_reg = Z_tmp_1;
3054 Register klass_index_reg = Z_ARG2;
3055 Register unc_trap_reg = Z_ARG2;
3056
3057 // stack: (deoptee, caller_of_deoptee, ...).
3058
3059 // push a dummy "unpack" frame and call
3060 // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3061 // vframe array and return the `UnrollBlock' information.
3062
3063 // save R14 to compiled frame.
3064 __ save_return_pc();
3065 // push the "unpack_frame".
3066 __ push_frame_abi160(0);
3067
3068 // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
3069
3070 // set the "unpack" frame as last_Java_frame.
3071 // `Deoptimization::uncommon_trap' expects it and considers its
3072 // sender frame as the deoptee frame.
3073 __ get_PC(Z_R1_scratch);
3074 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3075
3076 __ z_lgr(klass_index_reg, Z_ARG1); // passed implicitly as ARG2
3077 __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap); // passed implicitly as ARG3
3078 BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
3079 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
3080
3081 __ reset_last_Java_frame();
3082
3083 // pop the "unpack" frame
3084 __ pop_frame();
3085
3086 // stack: (deoptee, caller_of_deoptee, ...).
3087
3088 // save the return value.
3089 __ z_lgr(unroll_block_reg, Z_RET);
3090
3091 // pop the deoptee frame.
3092 __ pop_frame();
3093
3094 // stack: (caller_of_deoptee, ...).
3095
3096 #ifdef ASSERT
3097 assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
3098 assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
3099 const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
3100 #ifndef VM_LITTLE_ENDIAN
3101 + 3
3102 #endif
3103 ;
3104 if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
3105 __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3106 } else {
3107 __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3108 }
3109 __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
3110 #endif
3111
3112 __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
3113
3114 // allocate new interpreter frame(s) and possibly resize the caller's frame
3115 // (no more adapters !)
3116 push_skeleton_frames(masm, false/*deopt*/,
3117 unroll_block_reg,
3118 Z_tmp_2,
3119 Z_tmp_3,
3120 Z_tmp_4,
3121 Z_ARG5,
3122 Z_ARG4);
3123
3124 // stack: (skeletal interpreter frame, ..., optional skeletal
3125 // interpreter frame, (resized) caller of deoptee, ...).
3126
3127 // push a dummy "unpack" frame taking care of float return values.
3128 // call `Deoptimization::unpack_frames' to layout information in the
3129 // interpreter frames just created
3130
3131 // push the "unpack" frame
3132 const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
3133
3134 // stack: (unpack frame, skeletal interpreter frame, ..., optional
3135 // skeletal interpreter frame, (resized) caller of deoptee, ...).
3136
3137 // set the "unpack" frame as last_Java_frame
3138 __ get_PC(Z_R1_scratch);
3139 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3140
3141 // indicate it is the uncommon trap case
3142 BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
3143 __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3144 // let the unpacker layout information in the skeletal frames just allocated.
3145 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
3146
3147 __ reset_last_Java_frame();
3148 // pop the "unpack" frame
3149 __ pop_frame();
3150 // restore LR from top interpreter frame
3151 __ restore_return_pc();
3152
3153 // stack: (top interpreter frame, ..., optional interpreter frame,
3154 // (resized) caller of deoptee, ...).
3155
3156 __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3157 __ restore_bcp();
3158 __ restore_locals();
3159 __ restore_esp();
3160
3161 // return to the interpreter entry point
3162 __ z_br(Z_R14);
3163
3164 masm->flush();
3165 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
3166 }
3167 #endif // COMPILER2
3168
3169
3170 //------------------------------generate_handler_blob------
3171 //
3172 // Generate a special Compile2Runtime blob that saves all registers,
3173 // and setup oopmap.
generate_handler_blob(address call_ptr,int poll_type)3174 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3175 assert(StubRoutines::forward_exception_entry() != NULL,
3176 "must be generated before");
3177
3178 ResourceMark rm;
3179 OopMapSet *oop_maps = new OopMapSet();
3180 OopMap* map;
3181
3182 // Allocate space for the code. Setup code generation tools.
3183 CodeBuffer buffer("handler_blob", 2048, 1024);
3184 MacroAssembler* masm = new MacroAssembler(&buffer);
3185
3186 unsigned int start_off = __ offset();
3187 address call_pc = NULL;
3188 int frame_size_in_bytes;
3189
3190 bool cause_return = (poll_type == POLL_AT_RETURN);
3191 // Make room for return address (or push it again)
3192 if (!cause_return) {
3193 __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
3194 }
3195
3196 // Save registers, fpu state, and flags
3197 map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3198
3199 if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3200 // Keep a copy of the return pc to detect if it gets modified.
3201 __ z_lgr(Z_R6, Z_R14);
3202 }
3203
3204 // The following is basically a call_VM. However, we need the precise
3205 // address of the call in order to generate an oopmap. Hence, we do all the
3206 // work outselves.
3207 __ set_last_Java_frame(Z_SP, noreg);
3208
3209 // call into the runtime to handle the safepoint poll
3210 __ call_VM_leaf(call_ptr, Z_thread);
3211
3212
3213 // Set an oopmap for the call site. This oopmap will map all
3214 // oop-registers and debug-info registers as callee-saved. This
3215 // will allow deoptimization at this safepoint to find all possible
3216 // debug-info recordings, as well as let GC find all oops.
3217
3218 oop_maps->add_gc_map((int)(__ offset()-start_off), map);
3219
3220 Label noException;
3221
3222 __ reset_last_Java_frame();
3223
3224 __ load_and_test_long(Z_R1, thread_(pending_exception));
3225 __ z_bre(noException);
3226
3227 // Pending exception case, used (sporadically) by
3228 // api/java_lang/Thread.State/index#ThreadState et al.
3229 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3230
3231 // Jump to forward_exception_entry, with the issuing PC in Z_R14
3232 // so it looks like the original nmethod called forward_exception_entry.
3233 __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3234 __ z_br(Z_R1_scratch);
3235
3236 // No exception case
3237 __ bind(noException);
3238
3239 if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3240 Label no_adjust;
3241 // If our stashed return pc was modified by the runtime we avoid touching it
3242 const int offset_of_return_pc = _z_abi16(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
3243 __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
3244 __ z_brne(no_adjust);
3245
3246 // Adjust return pc forward to step over the safepoint poll instruction
3247 __ instr_size(Z_R1_scratch, Z_R6);
3248 __ z_agr(Z_R6, Z_R1_scratch);
3249 __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
3250
3251 __ bind(no_adjust);
3252 }
3253
3254 // Normal exit, restore registers and exit.
3255 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3256
3257 __ z_br(Z_R14);
3258
3259 // Make sure all code is generated
3260 masm->flush();
3261
3262 // Fill-out other meta info
3263 return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3264 }
3265
3266
3267 //
3268 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3269 //
3270 // Generate a stub that calls into vm to find out the proper destination
3271 // of a Java call. All the argument registers are live at this point
3272 // but since this is generic code we don't know what they are and the caller
3273 // must do any gc of the args.
3274 //
generate_resolve_blob(address destination,const char * name)3275 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3276 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3277
3278 // allocate space for the code
3279 ResourceMark rm;
3280
3281 CodeBuffer buffer(name, 1000, 512);
3282 MacroAssembler* masm = new MacroAssembler(&buffer);
3283
3284 OopMapSet *oop_maps = new OopMapSet();
3285 OopMap* map = NULL;
3286
3287 unsigned int start_off = __ offset();
3288
3289 map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3290
3291 // We must save a PC from within the stub as return PC
3292 // C code doesn't store the LR where we expect the PC,
3293 // so we would run into trouble upon stack walking.
3294 __ get_PC(Z_R1_scratch);
3295
3296 unsigned int frame_complete = __ offset();
3297
3298 __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3299
3300 __ call_VM_leaf(destination, Z_thread, Z_method);
3301
3302
3303 // Set an oopmap for the call site.
3304 // We need this not only for callee-saved registers, but also for volatile
3305 // registers that the compiler might be keeping live across a safepoint.
3306
3307 oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3308
3309 // clear last_Java_sp
3310 __ reset_last_Java_frame();
3311
3312 // check for pending exceptions
3313 Label pending;
3314 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3315 __ z_brne(pending);
3316
3317 __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3318 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3319
3320 // get the returned method
3321 __ get_vm_result_2(Z_method);
3322
3323 // We are back the the original state on entry and ready to go.
3324 __ z_br(Z_R1_scratch);
3325
3326 // Pending exception after the safepoint
3327
3328 __ bind(pending);
3329
3330 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3331
3332 // exception pending => remove activation and forward to exception handler
3333
3334 __ z_lgr(Z_R2, Z_R0); // pending_exception
3335 __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3336 __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3337 __ z_br(Z_R1_scratch);
3338
3339 // -------------
3340 // make sure all code is generated
3341 masm->flush();
3342
3343 // return the blob
3344 // frame_size_words or bytes??
3345 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3346 oop_maps, true);
3347
3348 }
3349
3350 //------------------------------Montgomery multiplication------------------------
3351 //
3352
3353 // Subtract 0:b from carry:a. Return carry.
3354 static unsigned long
sub(unsigned long a[],unsigned long b[],unsigned long carry,long len)3355 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3356 unsigned long i, c = 8 * (unsigned long)(len - 1);
3357 __asm__ __volatile__ (
3358 "SLGR %[i], %[i] \n" // initialize to 0 and pre-set carry
3359 "LGHI 0, 8 \n" // index increment (for BRXLG)
3360 "LGR 1, %[c] \n" // index limit (for BRXLG)
3361 "0: \n"
3362 "LG %[c], 0(%[i],%[a]) \n"
3363 "SLBG %[c], 0(%[i],%[b]) \n" // subtract with borrow
3364 "STG %[c], 0(%[i],%[a]) \n"
3365 "BRXLG %[i], 0, 0b \n" // while ((i+=8)<limit);
3366 "SLBGR %[c], %[c] \n" // save carry - 1
3367 : [i]"=&a"(i), [c]"+r"(c)
3368 : [a]"a"(a), [b]"a"(b)
3369 : "cc", "memory", "r0", "r1"
3370 );
3371 return carry + c;
3372 }
3373
3374 // Multiply (unsigned) Long A by Long B, accumulating the double-
3375 // length result into the accumulator formed of T0, T1, and T2.
MACC(unsigned long A[],long A_ind,unsigned long B[],long B_ind,unsigned long & T0,unsigned long & T1,unsigned long & T2)3376 inline void MACC(unsigned long A[], long A_ind,
3377 unsigned long B[], long B_ind,
3378 unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3379 long A_si = 8 * A_ind,
3380 B_si = 8 * B_ind;
3381 __asm__ __volatile__ (
3382 "LG 1, 0(%[A_si],%[A]) \n"
3383 "MLG 0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3384 "ALGR %[T0], 1 \n"
3385 "LGHI 1, 0 \n" // r1 = 0
3386 "ALCGR %[T1], 0 \n"
3387 "ALCGR %[T2], 1 \n"
3388 : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3389 : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3390 : "cc", "r0", "r1"
3391 );
3392 }
3393
3394 // As above, but add twice the double-length result into the
3395 // accumulator.
MACC2(unsigned long A[],long A_ind,unsigned long B[],long B_ind,unsigned long & T0,unsigned long & T1,unsigned long & T2)3396 inline void MACC2(unsigned long A[], long A_ind,
3397 unsigned long B[], long B_ind,
3398 unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3399 const unsigned long zero = 0;
3400 long A_si = 8 * A_ind,
3401 B_si = 8 * B_ind;
3402 __asm__ __volatile__ (
3403 "LG 1, 0(%[A_si],%[A]) \n"
3404 "MLG 0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3405 "ALGR %[T0], 1 \n"
3406 "ALCGR %[T1], 0 \n"
3407 "ALCGR %[T2], %[zero] \n"
3408 "ALGR %[T0], 1 \n"
3409 "ALCGR %[T1], 0 \n"
3410 "ALCGR %[T2], %[zero] \n"
3411 : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3412 : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3413 : "cc", "r0", "r1"
3414 );
3415 }
3416
3417 // Fast Montgomery multiplication. The derivation of the algorithm is
3418 // in "A Cryptographic Library for the Motorola DSP56000,
3419 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3420 static void
montgomery_multiply(unsigned long a[],unsigned long b[],unsigned long n[],unsigned long m[],unsigned long inv,int len)3421 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3422 unsigned long m[], unsigned long inv, int len) {
3423 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3424 int i;
3425
3426 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3427
3428 for (i = 0; i < len; i++) {
3429 int j;
3430 for (j = 0; j < i; j++) {
3431 MACC(a, j, b, i-j, t0, t1, t2);
3432 MACC(m, j, n, i-j, t0, t1, t2);
3433 }
3434 MACC(a, i, b, 0, t0, t1, t2);
3435 m[i] = t0 * inv;
3436 MACC(m, i, n, 0, t0, t1, t2);
3437
3438 assert(t0 == 0, "broken Montgomery multiply");
3439
3440 t0 = t1; t1 = t2; t2 = 0;
3441 }
3442
3443 for (i = len; i < 2 * len; i++) {
3444 int j;
3445 for (j = i - len + 1; j < len; j++) {
3446 MACC(a, j, b, i-j, t0, t1, t2);
3447 MACC(m, j, n, i-j, t0, t1, t2);
3448 }
3449 m[i-len] = t0;
3450 t0 = t1; t1 = t2; t2 = 0;
3451 }
3452
3453 while (t0) {
3454 t0 = sub(m, n, t0, len);
3455 }
3456 }
3457
3458 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3459 // multiplies so it should be up to 25% faster than Montgomery
3460 // multiplication. However, its loop control is more complex and it
3461 // may actually run slower on some machines.
3462 static void
montgomery_square(unsigned long a[],unsigned long n[],unsigned long m[],unsigned long inv,int len)3463 montgomery_square(unsigned long a[], unsigned long n[],
3464 unsigned long m[], unsigned long inv, int len) {
3465 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3466 int i;
3467
3468 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3469
3470 for (i = 0; i < len; i++) {
3471 int j;
3472 int end = (i+1)/2;
3473 for (j = 0; j < end; j++) {
3474 MACC2(a, j, a, i-j, t0, t1, t2);
3475 MACC(m, j, n, i-j, t0, t1, t2);
3476 }
3477 if ((i & 1) == 0) {
3478 MACC(a, j, a, j, t0, t1, t2);
3479 }
3480 for (; j < i; j++) {
3481 MACC(m, j, n, i-j, t0, t1, t2);
3482 }
3483 m[i] = t0 * inv;
3484 MACC(m, i, n, 0, t0, t1, t2);
3485
3486 assert(t0 == 0, "broken Montgomery square");
3487
3488 t0 = t1; t1 = t2; t2 = 0;
3489 }
3490
3491 for (i = len; i < 2*len; i++) {
3492 int start = i-len+1;
3493 int end = start + (len - start)/2;
3494 int j;
3495 for (j = start; j < end; j++) {
3496 MACC2(a, j, a, i-j, t0, t1, t2);
3497 MACC(m, j, n, i-j, t0, t1, t2);
3498 }
3499 if ((i & 1) == 0) {
3500 MACC(a, j, a, j, t0, t1, t2);
3501 }
3502 for (; j < len; j++) {
3503 MACC(m, j, n, i-j, t0, t1, t2);
3504 }
3505 m[i-len] = t0;
3506 t0 = t1; t1 = t2; t2 = 0;
3507 }
3508
3509 while (t0) {
3510 t0 = sub(m, n, t0, len);
3511 }
3512 }
3513
3514 // The threshold at which squaring is advantageous was determined
3515 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3516 // Value seems to be ok for other platforms, too.
3517 #define MONTGOMERY_SQUARING_THRESHOLD 64
3518
3519 // Copy len longwords from s to d, word-swapping as we go. The
3520 // destination array is reversed.
reverse_words(unsigned long * s,unsigned long * d,int len)3521 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3522 d += len;
3523 while(len-- > 0) {
3524 d--;
3525 unsigned long s_val = *s;
3526 // Swap words in a longword on little endian machines.
3527 #ifdef VM_LITTLE_ENDIAN
3528 Unimplemented();
3529 #endif
3530 *d = s_val;
3531 s++;
3532 }
3533 }
3534
montgomery_multiply(jint * a_ints,jint * b_ints,jint * n_ints,jint len,jlong inv,jint * m_ints)3535 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3536 jint len, jlong inv,
3537 jint *m_ints) {
3538 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3539 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3540 int longwords = len/2;
3541
3542 // Make very sure we don't use so much space that the stack might
3543 // overflow. 512 jints corresponds to an 16384-bit integer and
3544 // will use here a total of 8k bytes of stack space.
3545 int total_allocation = longwords * sizeof (unsigned long) * 4;
3546 guarantee(total_allocation <= 8192, "must be");
3547 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3548
3549 // Local scratch arrays
3550 unsigned long
3551 *a = scratch + 0 * longwords,
3552 *b = scratch + 1 * longwords,
3553 *n = scratch + 2 * longwords,
3554 *m = scratch + 3 * longwords;
3555
3556 reverse_words((unsigned long *)a_ints, a, longwords);
3557 reverse_words((unsigned long *)b_ints, b, longwords);
3558 reverse_words((unsigned long *)n_ints, n, longwords);
3559
3560 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3561
3562 reverse_words(m, (unsigned long *)m_ints, longwords);
3563 }
3564
montgomery_square(jint * a_ints,jint * n_ints,jint len,jlong inv,jint * m_ints)3565 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3566 jint len, jlong inv,
3567 jint *m_ints) {
3568 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3569 assert(len % 2 == 0, "array length in montgomery_square must be even");
3570 int longwords = len/2;
3571
3572 // Make very sure we don't use so much space that the stack might
3573 // overflow. 512 jints corresponds to an 16384-bit integer and
3574 // will use here a total of 6k bytes of stack space.
3575 int total_allocation = longwords * sizeof (unsigned long) * 3;
3576 guarantee(total_allocation <= 8192, "must be");
3577 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3578
3579 // Local scratch arrays
3580 unsigned long
3581 *a = scratch + 0 * longwords,
3582 *n = scratch + 1 * longwords,
3583 *m = scratch + 2 * longwords;
3584
3585 reverse_words((unsigned long *)a_ints, a, longwords);
3586 reverse_words((unsigned long *)n_ints, n, longwords);
3587
3588 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3589 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3590 } else {
3591 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3592 }
3593
3594 reverse_words(m, (unsigned long *)m_ints, longwords);
3595 }
3596
3597 extern "C"
SpinPause()3598 int SpinPause() {
3599 return 0;
3600 }
3601