1 /*
2  * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
3  * Copyright (c) 2016, 2018 SAP SE. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.
9  *
10  * This code is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * version 2 for more details (a copy is included in the LICENSE file that
14  * accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License version
17  * 2 along with this work; if not, write to the Free Software Foundation,
18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21  * or visit www.oracle.com if you need additional information or have any
22  * questions.
23  *
24  */
25 
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "code/debugInfoRec.hpp"
29 #include "code/icBuffer.hpp"
30 #include "code/vtableStubs.hpp"
31 #include "gc/shared/gcLocker.hpp"
32 #include "interpreter/interpreter.hpp"
33 #include "interpreter/interp_masm.hpp"
34 #include "memory/resourceArea.hpp"
35 #include "nativeInst_s390.hpp"
36 #include "oops/compiledICHolder.hpp"
37 #include "registerSaver_s390.hpp"
38 #include "runtime/safepointMechanism.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "runtime/vframeArray.hpp"
41 #include "utilities/align.hpp"
42 #include "vmreg_s390.inline.hpp"
43 #ifdef COMPILER1
44 #include "c1/c1_Runtime1.hpp"
45 #endif
46 #ifdef COMPILER2
47 #include "opto/ad.hpp"
48 #include "opto/runtime.hpp"
49 #endif
50 
51 #ifdef PRODUCT
52 #define __ masm->
53 #else
54 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
55 #endif
56 
57 #define BLOCK_COMMENT(str) __ block_comment(str)
58 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
59 
60 #define RegisterSaver_LiveIntReg(regname) \
61   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
62 
63 #define RegisterSaver_LiveFloatReg(regname) \
64   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
65 
66 // Registers which are not saved/restored, but still they have got a frame slot.
67 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
68 #define RegisterSaver_ExcludedIntReg(regname) \
69   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
70 
71 // Registers which are not saved/restored, but still they have got a frame slot.
72 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
73 #define RegisterSaver_ExcludedFloatReg(regname) \
74   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
75 
76 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
77   // Live registers which get spilled to the stack. Register positions
78   // in this array correspond directly to the stack layout.
79   //
80   // live float registers:
81   //
82   RegisterSaver_LiveFloatReg(Z_F0 ),
83   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
84   RegisterSaver_LiveFloatReg(Z_F2 ),
85   RegisterSaver_LiveFloatReg(Z_F3 ),
86   RegisterSaver_LiveFloatReg(Z_F4 ),
87   RegisterSaver_LiveFloatReg(Z_F5 ),
88   RegisterSaver_LiveFloatReg(Z_F6 ),
89   RegisterSaver_LiveFloatReg(Z_F7 ),
90   RegisterSaver_LiveFloatReg(Z_F8 ),
91   RegisterSaver_LiveFloatReg(Z_F9 ),
92   RegisterSaver_LiveFloatReg(Z_F10),
93   RegisterSaver_LiveFloatReg(Z_F11),
94   RegisterSaver_LiveFloatReg(Z_F12),
95   RegisterSaver_LiveFloatReg(Z_F13),
96   RegisterSaver_LiveFloatReg(Z_F14),
97   RegisterSaver_LiveFloatReg(Z_F15),
98   //
99   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
100   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
101   RegisterSaver_LiveIntReg(Z_R2 ),
102   RegisterSaver_LiveIntReg(Z_R3 ),
103   RegisterSaver_LiveIntReg(Z_R4 ),
104   RegisterSaver_LiveIntReg(Z_R5 ),
105   RegisterSaver_LiveIntReg(Z_R6 ),
106   RegisterSaver_LiveIntReg(Z_R7 ),
107   RegisterSaver_LiveIntReg(Z_R8 ),
108   RegisterSaver_LiveIntReg(Z_R9 ),
109   RegisterSaver_LiveIntReg(Z_R10),
110   RegisterSaver_LiveIntReg(Z_R11),
111   RegisterSaver_LiveIntReg(Z_R12),
112   RegisterSaver_LiveIntReg(Z_R13),
113   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
114   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
115 };
116 
117 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
118   // Live registers which get spilled to the stack. Register positions
119   // in this array correspond directly to the stack layout.
120   //
121   // live float registers: All excluded, but still they get a stack slot to get same frame size.
122   //
123   RegisterSaver_ExcludedFloatReg(Z_F0 ),
124   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
125   RegisterSaver_ExcludedFloatReg(Z_F2 ),
126   RegisterSaver_ExcludedFloatReg(Z_F3 ),
127   RegisterSaver_ExcludedFloatReg(Z_F4 ),
128   RegisterSaver_ExcludedFloatReg(Z_F5 ),
129   RegisterSaver_ExcludedFloatReg(Z_F6 ),
130   RegisterSaver_ExcludedFloatReg(Z_F7 ),
131   RegisterSaver_ExcludedFloatReg(Z_F8 ),
132   RegisterSaver_ExcludedFloatReg(Z_F9 ),
133   RegisterSaver_ExcludedFloatReg(Z_F10),
134   RegisterSaver_ExcludedFloatReg(Z_F11),
135   RegisterSaver_ExcludedFloatReg(Z_F12),
136   RegisterSaver_ExcludedFloatReg(Z_F13),
137   RegisterSaver_ExcludedFloatReg(Z_F14),
138   RegisterSaver_ExcludedFloatReg(Z_F15),
139   //
140   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
141   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
142   RegisterSaver_LiveIntReg(Z_R2 ),
143   RegisterSaver_LiveIntReg(Z_R3 ),
144   RegisterSaver_LiveIntReg(Z_R4 ),
145   RegisterSaver_LiveIntReg(Z_R5 ),
146   RegisterSaver_LiveIntReg(Z_R6 ),
147   RegisterSaver_LiveIntReg(Z_R7 ),
148   RegisterSaver_LiveIntReg(Z_R8 ),
149   RegisterSaver_LiveIntReg(Z_R9 ),
150   RegisterSaver_LiveIntReg(Z_R10),
151   RegisterSaver_LiveIntReg(Z_R11),
152   RegisterSaver_LiveIntReg(Z_R12),
153   RegisterSaver_LiveIntReg(Z_R13),
154   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
155   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
156 };
157 
158 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
159   // Live registers which get spilled to the stack. Register positions
160   // in this array correspond directly to the stack layout.
161   //
162   // live float registers:
163   //
164   RegisterSaver_LiveFloatReg(Z_F0 ),
165   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
166   RegisterSaver_LiveFloatReg(Z_F2 ),
167   RegisterSaver_LiveFloatReg(Z_F3 ),
168   RegisterSaver_LiveFloatReg(Z_F4 ),
169   RegisterSaver_LiveFloatReg(Z_F5 ),
170   RegisterSaver_LiveFloatReg(Z_F6 ),
171   RegisterSaver_LiveFloatReg(Z_F7 ),
172   RegisterSaver_LiveFloatReg(Z_F8 ),
173   RegisterSaver_LiveFloatReg(Z_F9 ),
174   RegisterSaver_LiveFloatReg(Z_F10),
175   RegisterSaver_LiveFloatReg(Z_F11),
176   RegisterSaver_LiveFloatReg(Z_F12),
177   RegisterSaver_LiveFloatReg(Z_F13),
178   RegisterSaver_LiveFloatReg(Z_F14),
179   RegisterSaver_LiveFloatReg(Z_F15),
180   //
181   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
182   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
183   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
184   RegisterSaver_LiveIntReg(Z_R3 ),
185   RegisterSaver_LiveIntReg(Z_R4 ),
186   RegisterSaver_LiveIntReg(Z_R5 ),
187   RegisterSaver_LiveIntReg(Z_R6 ),
188   RegisterSaver_LiveIntReg(Z_R7 ),
189   RegisterSaver_LiveIntReg(Z_R8 ),
190   RegisterSaver_LiveIntReg(Z_R9 ),
191   RegisterSaver_LiveIntReg(Z_R10),
192   RegisterSaver_LiveIntReg(Z_R11),
193   RegisterSaver_LiveIntReg(Z_R12),
194   RegisterSaver_LiveIntReg(Z_R13),
195   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
196   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
197 };
198 
199 // Live argument registers which get spilled to the stack.
200 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
201   RegisterSaver_LiveFloatReg(Z_FARG1),
202   RegisterSaver_LiveFloatReg(Z_FARG2),
203   RegisterSaver_LiveFloatReg(Z_FARG3),
204   RegisterSaver_LiveFloatReg(Z_FARG4),
205   RegisterSaver_LiveIntReg(Z_ARG1),
206   RegisterSaver_LiveIntReg(Z_ARG2),
207   RegisterSaver_LiveIntReg(Z_ARG3),
208   RegisterSaver_LiveIntReg(Z_ARG4),
209   RegisterSaver_LiveIntReg(Z_ARG5)
210 };
211 
212 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
213   // Live registers which get spilled to the stack. Register positions
214   // in this array correspond directly to the stack layout.
215   //
216   // live float registers:
217   //
218   RegisterSaver_LiveFloatReg(Z_F0 ),
219   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
220   RegisterSaver_LiveFloatReg(Z_F2 ),
221   RegisterSaver_LiveFloatReg(Z_F3 ),
222   RegisterSaver_LiveFloatReg(Z_F4 ),
223   RegisterSaver_LiveFloatReg(Z_F5 ),
224   RegisterSaver_LiveFloatReg(Z_F6 ),
225   RegisterSaver_LiveFloatReg(Z_F7 ),
226   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
227   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
228   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
229   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
230   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
231   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
232   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
233   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
234   //
235   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
236   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
237   RegisterSaver_LiveIntReg(Z_R2 ),
238   RegisterSaver_LiveIntReg(Z_R3 ),
239   RegisterSaver_LiveIntReg(Z_R4 ),
240   RegisterSaver_LiveIntReg(Z_R5 ),
241   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
242   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
243   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
244   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
245   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
246   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
247   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
248   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
249   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
250   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
251 };
252 
live_reg_save_size(RegisterSet reg_set)253 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
254   int reg_space = -1;
255   switch (reg_set) {
256     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
257     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
258     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
259     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
260     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
261     default: ShouldNotReachHere();
262   }
263   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
264 }
265 
266 
live_reg_frame_size(RegisterSet reg_set)267 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
268   return live_reg_save_size(reg_set) + frame::z_abi_160_size;
269 }
270 
271 
272 // return_pc: Specify the register that should be stored as the return pc in the current frame.
save_live_registers(MacroAssembler * masm,RegisterSet reg_set,Register return_pc)273 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
274   // Record volatile registers as callee-save values in an OopMap so
275   // their save locations will be propagated to the caller frame's
276   // RegisterMap during StackFrameStream construction (needed for
277   // deoptimization; see compiledVFrame::create_stack_value).
278 
279   // Calculate frame size.
280   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
281   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
282   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
283 
284   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
285   OopMap* map = new OopMap(frame_size_in_slots, 0);
286 
287   int regstosave_num = 0;
288   const RegisterSaver::LiveRegType* live_regs = NULL;
289 
290   switch (reg_set) {
291     case all_registers:
292       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
293       live_regs      = RegisterSaver_LiveRegs;
294       break;
295     case all_registers_except_r2:
296       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
297       live_regs      = RegisterSaver_LiveRegsWithoutR2;
298       break;
299     case all_integer_registers:
300       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
301       live_regs      = RegisterSaver_LiveIntRegs;
302       break;
303     case all_volatile_registers:
304       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
305       live_regs      = RegisterSaver_LiveVolatileRegs;
306       break;
307     case arg_registers:
308       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
309       live_regs      = RegisterSaver_LiveArgRegs;
310       break;
311     default: ShouldNotReachHere();
312   }
313 
314   // Save return pc in old frame.
315   __ save_return_pc(return_pc);
316 
317   // Push a new frame (includes stack linkage).
318   // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
319   // illegally used to pass parameters by RangeCheckStub::emit_code().
320   __ push_frame(frame_size_in_bytes, return_pc);
321   // We have to restore return_pc right away.
322   // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
323   // Nobody else knows which register we saved.
324   __ z_lg(return_pc, _z_abi16(return_pc) + frame_size_in_bytes, Z_SP);
325 
326   // Register save area in new frame starts above z_abi_160 area.
327   int offset = register_save_offset;
328 
329   Register first = noreg;
330   Register last  = noreg;
331   int      first_offset = -1;
332   bool     float_spilled = false;
333 
334   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
335     int reg_num  = live_regs[i].reg_num;
336     int reg_type = live_regs[i].reg_type;
337 
338     switch (reg_type) {
339       case RegisterSaver::int_reg: {
340         Register reg = as_Register(reg_num);
341         if (last != reg->predecessor()) {
342           if (first != noreg) {
343             __ z_stmg(first, last, first_offset, Z_SP);
344           }
345           first = reg;
346           first_offset = offset;
347           DEBUG_ONLY(float_spilled = false);
348         }
349         last = reg;
350         assert(last != Z_R0, "r0 would require special treatment");
351         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
352         break;
353       }
354 
355       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
356         continue; // Continue with next loop iteration.
357 
358       case RegisterSaver::float_reg: {
359         FloatRegister freg = as_FloatRegister(reg_num);
360         __ z_std(freg, offset, Z_SP);
361         DEBUG_ONLY(float_spilled = true);
362         break;
363       }
364 
365       default:
366         ShouldNotReachHere();
367         break;
368     }
369 
370     // Second set_callee_saved is really a waste but we'll keep things as they were for now
371     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
372     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
373   }
374   assert(first != noreg, "Should spill at least one int reg.");
375   __ z_stmg(first, last, first_offset, Z_SP);
376 
377   // And we're done.
378   return map;
379 }
380 
381 
382 // Generate the OopMap (again, regs where saved before).
generate_oop_map(MacroAssembler * masm,RegisterSet reg_set)383 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
384   // Calculate frame size.
385   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
386   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
387   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
388 
389   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
390   OopMap* map = new OopMap(frame_size_in_slots, 0);
391 
392   int regstosave_num = 0;
393   const RegisterSaver::LiveRegType* live_regs = NULL;
394 
395   switch (reg_set) {
396     case all_registers:
397       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
398       live_regs      = RegisterSaver_LiveRegs;
399       break;
400     case all_registers_except_r2:
401       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
402       live_regs      = RegisterSaver_LiveRegsWithoutR2;
403       break;
404     case all_integer_registers:
405       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
406       live_regs      = RegisterSaver_LiveIntRegs;
407       break;
408     case all_volatile_registers:
409       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
410       live_regs      = RegisterSaver_LiveVolatileRegs;
411       break;
412     case arg_registers:
413       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
414       live_regs      = RegisterSaver_LiveArgRegs;
415       break;
416     default: ShouldNotReachHere();
417   }
418 
419   // Register save area in new frame starts above z_abi_160 area.
420   int offset = register_save_offset;
421   for (int i = 0; i < regstosave_num; i++) {
422     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
423       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
424       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
425     }
426     offset += reg_size;
427   }
428   return map;
429 }
430 
431 
432 // Pop the current frame and restore all the registers that we saved.
restore_live_registers(MacroAssembler * masm,RegisterSet reg_set)433 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
434   int offset;
435   const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
436 
437   Register first = noreg;
438   Register last = noreg;
439   int      first_offset = -1;
440   bool     float_spilled = false;
441 
442   int regstosave_num = 0;
443   const RegisterSaver::LiveRegType* live_regs = NULL;
444 
445   switch (reg_set) {
446     case all_registers:
447       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
448       live_regs      = RegisterSaver_LiveRegs;
449       break;
450     case all_registers_except_r2:
451       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
452       live_regs      = RegisterSaver_LiveRegsWithoutR2;
453       break;
454     case all_integer_registers:
455       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
456       live_regs      = RegisterSaver_LiveIntRegs;
457       break;
458     case all_volatile_registers:
459       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
460       live_regs      = RegisterSaver_LiveVolatileRegs;
461       break;
462     case arg_registers:
463       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
464       live_regs      = RegisterSaver_LiveArgRegs;
465       break;
466     default: ShouldNotReachHere();
467   }
468 
469   // Restore all registers (ints and floats).
470 
471   // Register save area in new frame starts above z_abi_160 area.
472   offset = register_save_offset;
473 
474   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
475     int reg_num  = live_regs[i].reg_num;
476     int reg_type = live_regs[i].reg_type;
477 
478     switch (reg_type) {
479       case RegisterSaver::excluded_reg:
480         continue; // Continue with next loop iteration.
481 
482       case RegisterSaver::int_reg: {
483         Register reg = as_Register(reg_num);
484         if (last != reg->predecessor()) {
485           if (first != noreg) {
486             __ z_lmg(first, last, first_offset, Z_SP);
487           }
488           first = reg;
489           first_offset = offset;
490           DEBUG_ONLY(float_spilled = false);
491         }
492         last = reg;
493         assert(last != Z_R0, "r0 would require special treatment");
494         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
495         break;
496       }
497 
498       case RegisterSaver::float_reg: {
499         FloatRegister freg = as_FloatRegister(reg_num);
500         __ z_ld(freg, offset, Z_SP);
501         DEBUG_ONLY(float_spilled = true);
502         break;
503       }
504 
505       default:
506         ShouldNotReachHere();
507     }
508   }
509   assert(first != noreg, "Should spill at least one int reg.");
510   __ z_lmg(first, last, first_offset, Z_SP);
511 
512   // Pop the frame.
513   __ pop_frame();
514 
515   // Restore the flags.
516   __ restore_return_pc();
517 }
518 
519 
520 // Pop the current frame and restore the registers that might be holding a result.
restore_result_registers(MacroAssembler * masm)521 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
522   int i;
523   int offset;
524   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
525                                    sizeof(RegisterSaver::LiveRegType);
526   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
527 
528   // Restore all result registers (ints and floats).
529   offset = register_save_offset;
530   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
531     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
532     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
533     switch (reg_type) {
534       case RegisterSaver::excluded_reg:
535         continue; // Continue with next loop iteration.
536       case RegisterSaver::int_reg: {
537         if (as_Register(reg_num) == Z_RET) { // int result_reg
538           __ z_lg(as_Register(reg_num), offset, Z_SP);
539         }
540         break;
541       }
542       case RegisterSaver::float_reg: {
543         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
544           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
545         }
546         break;
547       }
548       default:
549         ShouldNotReachHere();
550     }
551   }
552 }
553 
trampoline_size()554 size_t SharedRuntime::trampoline_size() {
555   return MacroAssembler::load_const_size() + 2;
556 }
557 
generate_trampoline(MacroAssembler * masm,address destination)558 void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
559   // Think about using pc-relative branch.
560   __ load_const(Z_R1_scratch, destination);
561   __ z_br(Z_R1_scratch);
562 }
563 
564 // ---------------------------------------------------------------------------
save_native_result(MacroAssembler * masm,BasicType ret_type,int frame_slots)565 void SharedRuntime::save_native_result(MacroAssembler * masm,
566                                        BasicType ret_type,
567                                        int frame_slots) {
568   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
569 
570   switch (ret_type) {
571     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
572     case T_BYTE:
573     case T_CHAR:
574     case T_SHORT:
575     case T_INT:
576       __ reg2mem_opt(Z_RET, memaddr, false);
577       break;
578     case T_OBJECT:   // Save pointer types as long.
579     case T_ARRAY:
580     case T_ADDRESS:
581     case T_VOID:
582     case T_LONG:
583       __ reg2mem_opt(Z_RET, memaddr);
584       break;
585     case T_FLOAT:
586       __ freg2mem_opt(Z_FRET, memaddr, false);
587       break;
588     case T_DOUBLE:
589       __ freg2mem_opt(Z_FRET, memaddr);
590       break;
591   }
592 }
593 
restore_native_result(MacroAssembler * masm,BasicType ret_type,int frame_slots)594 void SharedRuntime::restore_native_result(MacroAssembler *masm,
595                                           BasicType       ret_type,
596                                           int             frame_slots) {
597   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
598 
599   switch (ret_type) {
600     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
601     case T_BYTE:
602     case T_CHAR:
603     case T_SHORT:
604     case T_INT:
605       __ mem2reg_opt(Z_RET, memaddr, false);
606       break;
607     case T_OBJECT:   // Restore pointer types as long.
608     case T_ARRAY:
609     case T_ADDRESS:
610     case T_VOID:
611     case T_LONG:
612       __ mem2reg_opt(Z_RET, memaddr);
613       break;
614     case T_FLOAT:
615       __ mem2freg_opt(Z_FRET, memaddr, false);
616       break;
617     case T_DOUBLE:
618       __ mem2freg_opt(Z_FRET, memaddr);
619       break;
620   }
621 }
622 
623 // ---------------------------------------------------------------------------
624 // Read the array of BasicTypes from a signature, and compute where the
625 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
626 // quantities. Values less than VMRegImpl::stack0 are registers, those above
627 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
628 // as framesizes are fixed.
629 // VMRegImpl::stack0 refers to the first slot 0(sp).
630 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
631 // up to RegisterImpl::number_of_registers are the 64-bit integer registers.
632 
633 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
634 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
635 // units regardless of build.
636 
637 // The Java calling convention is a "shifted" version of the C ABI.
638 // By skipping the first C ABI register we can call non-static jni methods
639 // with small numbers of arguments without having to shuffle the arguments
640 // at all. Since we control the java ABI we ought to at least get some
641 // advantage out of it.
java_calling_convention(const BasicType * sig_bt,VMRegPair * regs,int total_args_passed,int is_outgoing)642 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
643                                            VMRegPair *regs,
644                                            int total_args_passed,
645                                            int is_outgoing) {
646   // c2c calling conventions for compiled-compiled calls.
647 
648   // An int/float occupies 1 slot here.
649   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
650   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
651 
652   const VMReg z_iarg_reg[5] = {
653     Z_R2->as_VMReg(),
654     Z_R3->as_VMReg(),
655     Z_R4->as_VMReg(),
656     Z_R5->as_VMReg(),
657     Z_R6->as_VMReg()
658   };
659   const VMReg z_farg_reg[4] = {
660     Z_F0->as_VMReg(),
661     Z_F2->as_VMReg(),
662     Z_F4->as_VMReg(),
663     Z_F6->as_VMReg()
664   };
665   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
666   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
667 
668   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
669   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
670 
671   int i;
672   int stk = 0;
673   int ireg = 0;
674   int freg = 0;
675 
676   for (int i = 0; i < total_args_passed; ++i) {
677     switch (sig_bt[i]) {
678       case T_BOOLEAN:
679       case T_CHAR:
680       case T_BYTE:
681       case T_SHORT:
682       case T_INT:
683         if (ireg < z_num_iarg_registers) {
684           // Put int/ptr in register.
685           regs[i].set1(z_iarg_reg[ireg]);
686           ++ireg;
687         } else {
688           // Put int/ptr on stack.
689           regs[i].set1(VMRegImpl::stack2reg(stk));
690           stk += inc_stk_for_intfloat;
691         }
692         break;
693       case T_LONG:
694         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
695         if (ireg < z_num_iarg_registers) {
696           // Put long in register.
697           regs[i].set2(z_iarg_reg[ireg]);
698           ++ireg;
699         } else {
700           // Put long on stack and align to 2 slots.
701           if (stk & 0x1) { ++stk; }
702           regs[i].set2(VMRegImpl::stack2reg(stk));
703           stk += inc_stk_for_longdouble;
704         }
705         break;
706       case T_OBJECT:
707       case T_ARRAY:
708       case T_ADDRESS:
709         if (ireg < z_num_iarg_registers) {
710           // Put ptr in register.
711           regs[i].set2(z_iarg_reg[ireg]);
712           ++ireg;
713         } else {
714           // Put ptr on stack and align to 2 slots, because
715           // "64-bit pointers record oop-ishness on 2 aligned adjacent
716           // registers." (see OopFlow::build_oop_map).
717           if (stk & 0x1) { ++stk; }
718           regs[i].set2(VMRegImpl::stack2reg(stk));
719           stk += inc_stk_for_longdouble;
720         }
721         break;
722       case T_FLOAT:
723         if (freg < z_num_farg_registers) {
724           // Put float in register.
725           regs[i].set1(z_farg_reg[freg]);
726           ++freg;
727         } else {
728           // Put float on stack.
729           regs[i].set1(VMRegImpl::stack2reg(stk));
730           stk += inc_stk_for_intfloat;
731         }
732         break;
733       case T_DOUBLE:
734         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
735         if (freg < z_num_farg_registers) {
736           // Put double in register.
737           regs[i].set2(z_farg_reg[freg]);
738           ++freg;
739         } else {
740           // Put double on stack and align to 2 slots.
741           if (stk & 0x1) { ++stk; }
742           regs[i].set2(VMRegImpl::stack2reg(stk));
743           stk += inc_stk_for_longdouble;
744         }
745         break;
746       case T_VOID:
747         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
748         // Do not count halves.
749         regs[i].set_bad();
750         break;
751       default:
752         ShouldNotReachHere();
753     }
754   }
755   return align_up(stk, 2);
756 }
757 
c_calling_convention(const BasicType * sig_bt,VMRegPair * regs,VMRegPair * regs2,int total_args_passed)758 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
759                                         VMRegPair *regs,
760                                         VMRegPair *regs2,
761                                         int total_args_passed) {
762   assert(regs2 == NULL, "second VMRegPair array not used on this platform");
763 
764   // Calling conventions for C runtime calls and calls to JNI native methods.
765   const VMReg z_iarg_reg[5] = {
766     Z_R2->as_VMReg(),
767     Z_R3->as_VMReg(),
768     Z_R4->as_VMReg(),
769     Z_R5->as_VMReg(),
770     Z_R6->as_VMReg()
771   };
772   const VMReg z_farg_reg[4] = {
773     Z_F0->as_VMReg(),
774     Z_F2->as_VMReg(),
775     Z_F4->as_VMReg(),
776     Z_F6->as_VMReg()
777   };
778   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
779   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
780 
781   // Check calling conventions consistency.
782   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
783   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
784 
785   // Avoid passing C arguments in the wrong stack slots.
786 
787   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
788   // 2 such slots, like 64 bit values do.
789   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
790   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
791 
792   int i;
793   // Leave room for C-compatible ABI
794   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
795   int freg = 0;
796   int ireg = 0;
797 
798   // We put the first 5 arguments into registers and the rest on the
799   // stack. Float arguments are already in their argument registers
800   // due to c2c calling conventions (see calling_convention).
801   for (int i = 0; i < total_args_passed; ++i) {
802     switch (sig_bt[i]) {
803       case T_BOOLEAN:
804       case T_CHAR:
805       case T_BYTE:
806       case T_SHORT:
807       case T_INT:
808         // Fall through, handle as long.
809       case T_LONG:
810       case T_OBJECT:
811       case T_ARRAY:
812       case T_ADDRESS:
813       case T_METADATA:
814         // Oops are already boxed if required (JNI).
815         if (ireg < z_num_iarg_registers) {
816           regs[i].set2(z_iarg_reg[ireg]);
817           ++ireg;
818         } else {
819           regs[i].set2(VMRegImpl::stack2reg(stk));
820           stk += inc_stk_for_longdouble;
821         }
822         break;
823       case T_FLOAT:
824         if (freg < z_num_farg_registers) {
825           regs[i].set1(z_farg_reg[freg]);
826           ++freg;
827         } else {
828           regs[i].set1(VMRegImpl::stack2reg(stk+1));
829           stk +=  inc_stk_for_intfloat;
830         }
831         break;
832       case T_DOUBLE:
833         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
834         if (freg < z_num_farg_registers) {
835           regs[i].set2(z_farg_reg[freg]);
836           ++freg;
837         } else {
838           // Put double on stack.
839           regs[i].set2(VMRegImpl::stack2reg(stk));
840           stk += inc_stk_for_longdouble;
841         }
842         break;
843       case T_VOID:
844         // Do not count halves.
845         regs[i].set_bad();
846         break;
847       default:
848         ShouldNotReachHere();
849     }
850   }
851   return align_up(stk, 2);
852 }
853 
854 ////////////////////////////////////////////////////////////////////////
855 //
856 //  Argument shufflers
857 //
858 ////////////////////////////////////////////////////////////////////////
859 
860 //----------------------------------------------------------------------
861 // The java_calling_convention describes stack locations as ideal slots on
862 // a frame with no abi restrictions. Since we must observe abi restrictions
863 // (like the placement of the register window) the slots must be biased by
864 // the following value.
865 //----------------------------------------------------------------------
reg2slot(VMReg r)866 static int reg2slot(VMReg r) {
867   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
868 }
869 
reg2offset(VMReg r)870 static int reg2offset(VMReg r) {
871   return reg2slot(r) * VMRegImpl::stack_slot_size;
872 }
873 
verify_oop_args(MacroAssembler * masm,int total_args_passed,const BasicType * sig_bt,const VMRegPair * regs)874 static void verify_oop_args(MacroAssembler *masm,
875                             int total_args_passed,
876                             const BasicType *sig_bt,
877                             const VMRegPair *regs) {
878   if (!VerifyOops) { return; }
879 
880   for (int i = 0; i < total_args_passed; i++) {
881     if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
882       VMReg r = regs[i].first();
883       assert(r->is_valid(), "bad oop arg");
884 
885       if (r->is_stack()) {
886         __ z_lg(Z_R0_scratch,
887                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
888         __ verify_oop(Z_R0_scratch);
889       } else {
890         __ verify_oop(r->as_Register());
891       }
892     }
893   }
894 }
895 
gen_special_dispatch(MacroAssembler * masm,int total_args_passed,vmIntrinsics::ID special_dispatch,const BasicType * sig_bt,const VMRegPair * regs)896 static void gen_special_dispatch(MacroAssembler *masm,
897                                  int total_args_passed,
898                                  vmIntrinsics::ID special_dispatch,
899                                  const BasicType *sig_bt,
900                                  const VMRegPair *regs) {
901   verify_oop_args(masm, total_args_passed, sig_bt, regs);
902 
903   // Now write the args into the outgoing interpreter space.
904   bool     has_receiver   = false;
905   Register receiver_reg   = noreg;
906   int      member_arg_pos = -1;
907   Register member_reg     = noreg;
908   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
909 
910   if (ref_kind != 0) {
911     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
912     member_reg = Z_R9;                       // Known to be free at this point.
913     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
914   } else {
915     guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch);
916     has_receiver = true;
917   }
918 
919   if (member_reg != noreg) {
920     // Load the member_arg into register, if necessary.
921     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
922     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
923 
924     VMReg r = regs[member_arg_pos].first();
925     assert(r->is_valid(), "bad member arg");
926 
927     if (r->is_stack()) {
928       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
929     } else {
930       // No data motion is needed.
931       member_reg = r->as_Register();
932     }
933   }
934 
935   if (has_receiver) {
936     // Make sure the receiver is loaded into a register.
937     assert(total_args_passed > 0, "oob");
938     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
939 
940     VMReg r = regs[0].first();
941     assert(r->is_valid(), "bad receiver arg");
942 
943     if (r->is_stack()) {
944       // Porting note: This assumes that compiled calling conventions always
945       // pass the receiver oop in a register. If this is not true on some
946       // platform, pick a temp and load the receiver from stack.
947       assert(false, "receiver always in a register");
948       receiver_reg = Z_R13;  // Known to be free at this point.
949       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
950     } else {
951       // No data motion is needed.
952       receiver_reg = r->as_Register();
953     }
954   }
955 
956   // Figure out which address we are really jumping to:
957   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
958                                                  receiver_reg, member_reg,
959                                                  /*for_compiler_entry:*/ true);
960 }
961 
962 ////////////////////////////////////////////////////////////////////////
963 //
964 //  Argument shufflers
965 //
966 ////////////////////////////////////////////////////////////////////////
967 
968 // Is the size of a vector size (in bytes) bigger than a size saved by default?
969 // 8 bytes registers are saved by default on z/Architecture.
is_wide_vector(int size)970 bool SharedRuntime::is_wide_vector(int size) {
971   // Note, MaxVectorSize == 8 on this platform.
972   assert(size <= 8, "%d bytes vectors are not supported", size);
973   return size > 8;
974 }
975 
976 //----------------------------------------------------------------------
977 // An oop arg. Must pass a handle not the oop itself
978 //----------------------------------------------------------------------
object_move(MacroAssembler * masm,OopMap * map,int oop_handle_offset,int framesize_in_slots,VMRegPair src,VMRegPair dst,bool is_receiver,int * receiver_offset)979 static void object_move(MacroAssembler *masm,
980                         OopMap *map,
981                         int oop_handle_offset,
982                         int framesize_in_slots,
983                         VMRegPair src,
984                         VMRegPair dst,
985                         bool is_receiver,
986                         int *receiver_offset) {
987   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
988 
989   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
990 
991   // Must pass a handle. First figure out the location we use as a handle.
992 
993   if (src.first()->is_stack()) {
994     // Oop is already on the stack, put handle on stack or in register
995     // If handle will be on the stack, use temp reg to calculate it.
996     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
997     Label    skip;
998     int      slot_in_older_frame = reg2slot(src.first());
999 
1000     guarantee(!is_receiver, "expecting receiver in register");
1001     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
1002 
1003     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1004     __ load_and_test_long(Z_R0, Address(rHandle));
1005     __ z_brne(skip);
1006     // Use a NULL handle if oop is NULL.
1007     __ clear_reg(rHandle, true, false);
1008     __ bind(skip);
1009 
1010     // Copy handle to the right place (register or stack).
1011     if (dst.first()->is_stack()) {
1012       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1013     } // else
1014       // nothing to do. rHandle uses the correct register
1015   } else {
1016     // Oop is passed in an input register. We must flush it to the stack.
1017     const Register rOop = src.first()->as_Register();
1018     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1019     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1020     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1021     NearLabel skip;
1022 
1023     if (is_receiver) {
1024       *receiver_offset = oop_slot_offset;
1025     }
1026     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1027 
1028     // Flush Oop to stack, calculate handle.
1029     __ z_stg(rOop, oop_slot_offset, Z_SP);
1030     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1031 
1032     // If Oop == NULL, use a NULL handle.
1033     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1034     __ clear_reg(rHandle, true, false);
1035     __ bind(skip);
1036 
1037     // Copy handle to the right place (register or stack).
1038     if (dst.first()->is_stack()) {
1039       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1040     } // else
1041       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1042   }
1043 }
1044 
1045 //----------------------------------------------------------------------
1046 // A float arg. May have to do float reg to int reg conversion
1047 //----------------------------------------------------------------------
float_move(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots,int workspace_slot_offset)1048 static void float_move(MacroAssembler *masm,
1049                        VMRegPair src,
1050                        VMRegPair dst,
1051                        int framesize_in_slots,
1052                        int workspace_slot_offset) {
1053   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1054   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1055 
1056   // We do not accept an argument in a VMRegPair to be spread over two slots,
1057   // no matter what physical location (reg or stack) the slots may have.
1058   // We just check for the unaccepted slot to be invalid.
1059   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1060   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1061 
1062   if (src.first()->is_stack()) {
1063     if (dst.first()->is_stack()) {
1064       // stack -> stack. The easiest of the bunch.
1065       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1066                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1067     } else {
1068       // stack to reg
1069       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1070       if (dst.first()->is_Register()) {
1071         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1072       } else {
1073         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1074       }
1075     }
1076   } else if (src.first()->is_Register()) {
1077     if (dst.first()->is_stack()) {
1078       // gpr -> stack
1079       __ reg2mem_opt(src.first()->as_Register(),
1080                      Address(Z_SP, reg2offset(dst.first()), false ));
1081     } else {
1082       if (dst.first()->is_Register()) {
1083         // gpr -> gpr
1084         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1085                               src.first()->as_Register(), T_INT);
1086       } else {
1087         if (VM_Version::has_FPSupportEnhancements()) {
1088           // gpr -> fpr. Exploit z10 capability of direct transfer.
1089           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1090         } else {
1091           // gpr -> fpr. Use work space on stack to transfer data.
1092           Address   stackaddr(Z_SP, workspace_offset);
1093 
1094           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1095           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1096         }
1097       }
1098     }
1099   } else {
1100     if (dst.first()->is_stack()) {
1101       // fpr -> stack
1102       __ freg2mem_opt(src.first()->as_FloatRegister(),
1103                       Address(Z_SP, reg2offset(dst.first())), false);
1104     } else {
1105       if (dst.first()->is_Register()) {
1106         if (VM_Version::has_FPSupportEnhancements()) {
1107           // fpr -> gpr.
1108           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1109         } else {
1110           // fpr -> gpr. Use work space on stack to transfer data.
1111           Address   stackaddr(Z_SP, workspace_offset);
1112 
1113           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1114           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1115         }
1116       } else {
1117         // fpr -> fpr
1118         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1119                                src.first()->as_FloatRegister(), T_FLOAT);
1120       }
1121     }
1122   }
1123 }
1124 
1125 //----------------------------------------------------------------------
1126 // A double arg. May have to do double reg to long reg conversion
1127 //----------------------------------------------------------------------
double_move(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots,int workspace_slot_offset)1128 static void double_move(MacroAssembler *masm,
1129                         VMRegPair src,
1130                         VMRegPair dst,
1131                         int framesize_in_slots,
1132                         int workspace_slot_offset) {
1133   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1134   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1135 
1136   // Since src is always a java calling convention we know that the
1137   // src pair is always either all registers or all stack (and aligned?)
1138 
1139   if (src.first()->is_stack()) {
1140     if (dst.first()->is_stack()) {
1141       // stack -> stack. The easiest of the bunch.
1142       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1143                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1144     } else {
1145       // stack to reg
1146       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1147 
1148       if (dst.first()->is_Register()) {
1149         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1150       } else {
1151         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1152       }
1153     }
1154   } else if (src.first()->is_Register()) {
1155     if (dst.first()->is_stack()) {
1156       // gpr -> stack
1157       __ reg2mem_opt(src.first()->as_Register(),
1158                      Address(Z_SP, reg2offset(dst.first())));
1159     } else {
1160       if (dst.first()->is_Register()) {
1161         // gpr -> gpr
1162         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1163                               src.first()->as_Register(), T_LONG);
1164       } else {
1165         if (VM_Version::has_FPSupportEnhancements()) {
1166           // gpr -> fpr. Exploit z10 capability of direct transfer.
1167           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1168         } else {
1169           // gpr -> fpr. Use work space on stack to transfer data.
1170           Address stackaddr(Z_SP, workspace_offset);
1171           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1172           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1173         }
1174       }
1175     }
1176   } else {
1177     if (dst.first()->is_stack()) {
1178       // fpr -> stack
1179       __ freg2mem_opt(src.first()->as_FloatRegister(),
1180                       Address(Z_SP, reg2offset(dst.first())));
1181     } else {
1182       if (dst.first()->is_Register()) {
1183         if (VM_Version::has_FPSupportEnhancements()) {
1184           // fpr -> gpr. Exploit z10 capability of direct transfer.
1185           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1186         } else {
1187           // fpr -> gpr. Use work space on stack to transfer data.
1188           Address stackaddr(Z_SP, workspace_offset);
1189 
1190           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1191           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1192         }
1193       } else {
1194         // fpr -> fpr
1195         // In theory these overlap but the ordering is such that this is likely a nop.
1196         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1197                                src.first()->as_FloatRegister(), T_DOUBLE);
1198       }
1199     }
1200   }
1201 }
1202 
1203 //----------------------------------------------------------------------
1204 // A long arg.
1205 //----------------------------------------------------------------------
long_move(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots)1206 static void long_move(MacroAssembler *masm,
1207                       VMRegPair src,
1208                       VMRegPair dst,
1209                       int framesize_in_slots) {
1210   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1211 
1212   if (src.first()->is_stack()) {
1213     if (dst.first()->is_stack()) {
1214       // stack -> stack. The easiest of the bunch.
1215       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1216                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1217     } else {
1218       // stack to reg
1219       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1220       __ mem2reg_opt(dst.first()->as_Register(),
1221                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1222     }
1223   } else {
1224     // reg to reg
1225     assert(src.first()->is_Register(), "long src value must be in GPR");
1226     if (dst.first()->is_stack()) {
1227       // reg -> stack
1228       __ reg2mem_opt(src.first()->as_Register(),
1229                      Address(Z_SP, reg2offset(dst.first())));
1230     } else {
1231       // reg -> reg
1232       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1233       __ move_reg_if_needed(dst.first()->as_Register(),
1234                             T_LONG, src.first()->as_Register(), T_LONG);
1235     }
1236   }
1237 }
1238 
1239 
1240 //----------------------------------------------------------------------
1241 // A int-like arg.
1242 //----------------------------------------------------------------------
1243 // On z/Architecture we will store integer like items to the stack as 64 bit
1244 // items, according to the z/Architecture ABI, even though Java would only store
1245 // 32 bits for a parameter.
1246 // We do sign extension for all base types. That is ok since the only
1247 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1248 // Sign extension 32->64 bit will thus not affect the value.
1249 //----------------------------------------------------------------------
move32_64(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots)1250 static void move32_64(MacroAssembler *masm,
1251                       VMRegPair src,
1252                       VMRegPair dst,
1253                       int framesize_in_slots) {
1254   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1255 
1256   if (src.first()->is_stack()) {
1257     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1258     if (dst.first()->is_stack()) {
1259       // stack -> stack. MVC not posible due to sign extension.
1260       Address firstaddr(Z_SP, reg2offset(dst.first()));
1261       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1262       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1263     } else {
1264       // stack -> reg, sign extended
1265       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1266     }
1267   } else {
1268     if (dst.first()->is_stack()) {
1269       // reg -> stack, sign extended
1270       Address firstaddr(Z_SP, reg2offset(dst.first()));
1271       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1272       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1273     } else {
1274       // reg -> reg, sign extended
1275       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1276     }
1277   }
1278 }
1279 
save_or_restore_arguments(MacroAssembler * masm,const int stack_slots,const int total_in_args,const int arg_save_area,OopMap * map,VMRegPair * in_regs,BasicType * in_sig_bt)1280 static void save_or_restore_arguments(MacroAssembler *masm,
1281                                       const int stack_slots,
1282                                       const int total_in_args,
1283                                       const int arg_save_area,
1284                                       OopMap *map,
1285                                       VMRegPair *in_regs,
1286                                       BasicType *in_sig_bt) {
1287 
1288   // If map is non-NULL then the code should store the values,
1289   // otherwise it should load them.
1290   int slot = arg_save_area;
1291   // Handle double words first.
1292   for (int i = 0; i < total_in_args; i++) {
1293     if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1294       int offset = slot * VMRegImpl::stack_slot_size;
1295       slot += VMRegImpl::slots_per_word;
1296       assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1297       const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1298       Address   stackaddr(Z_SP, offset);
1299       if (map != NULL) {
1300         __ freg2mem_opt(freg, stackaddr);
1301       } else {
1302         __ mem2freg_opt(freg, stackaddr);
1303       }
1304     } else if (in_regs[i].first()->is_Register() &&
1305                (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1306       int offset = slot * VMRegImpl::stack_slot_size;
1307       const Register   reg = in_regs[i].first()->as_Register();
1308       if (map != NULL) {
1309         __ z_stg(reg, offset, Z_SP);
1310         if (in_sig_bt[i] == T_ARRAY) {
1311           map->set_oop(VMRegImpl::stack2reg(slot));
1312         }
1313       } else {
1314         __ z_lg(reg, offset, Z_SP);
1315       }
1316       slot += VMRegImpl::slots_per_word;
1317       assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1318     }
1319   }
1320 
1321   // Save or restore single word registers.
1322   for (int i = 0; i < total_in_args; i++) {
1323     if (in_regs[i].first()->is_Register()) {
1324       int offset = slot * VMRegImpl::stack_slot_size;
1325       // Value lives in an input register. Save it on stack.
1326       switch (in_sig_bt[i]) {
1327         case T_BOOLEAN:
1328         case T_CHAR:
1329         case T_BYTE:
1330         case T_SHORT:
1331         case T_INT: {
1332           const Register   reg = in_regs[i].first()->as_Register();
1333           Address   stackaddr(Z_SP, offset);
1334           if (map != NULL) {
1335             __ z_st(reg, stackaddr);
1336           } else {
1337             __ z_lgf(reg, stackaddr);
1338           }
1339           slot++;
1340           assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)");
1341           break;
1342         }
1343         case T_ARRAY:
1344         case T_LONG:
1345           // handled above
1346           break;
1347         case T_OBJECT:
1348         default: ShouldNotReachHere();
1349       }
1350     } else if (in_regs[i].first()->is_FloatRegister()) {
1351       if (in_sig_bt[i] == T_FLOAT) {
1352         int offset = slot * VMRegImpl::stack_slot_size;
1353         slot++;
1354         assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1355         const FloatRegister   freg = in_regs[i].first()->as_FloatRegister();
1356         Address   stackaddr(Z_SP, offset);
1357         if (map != NULL) {
1358           __ freg2mem_opt(freg, stackaddr, false);
1359         } else {
1360           __ mem2freg_opt(freg, stackaddr, false);
1361         }
1362       }
1363     } else if (in_regs[i].first()->is_stack() &&
1364                in_sig_bt[i] == T_ARRAY && map != NULL) {
1365       int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1366       map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1367     }
1368   }
1369 }
1370 
1371 // Check GCLocker::needs_gc and enter the runtime if it's true. This
1372 // keeps a new JNI critical region from starting until a GC has been
1373 // forced. Save down any oops in registers and describe them in an OopMap.
check_needs_gc_for_critical_native(MacroAssembler * masm,const int stack_slots,const int total_in_args,const int arg_save_area,OopMapSet * oop_maps,VMRegPair * in_regs,BasicType * in_sig_bt)1374 static void check_needs_gc_for_critical_native(MacroAssembler   *masm,
1375                                                 const int stack_slots,
1376                                                 const int total_in_args,
1377                                                 const int arg_save_area,
1378                                                 OopMapSet *oop_maps,
1379                                                 VMRegPair *in_regs,
1380                                                 BasicType *in_sig_bt) {
1381   __ block_comment("check GCLocker::needs_gc");
1382   Label cont;
1383 
1384   // Check GCLocker::_needs_gc flag.
1385   __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
1386   __ z_cli(0, Z_R1_scratch, 0);
1387   __ z_bre(cont);
1388 
1389   // Save down any values that are live in registers and call into the
1390   // runtime to halt for a GC.
1391   OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1392 
1393   save_or_restore_arguments(masm, stack_slots, total_in_args,
1394                             arg_save_area, map, in_regs, in_sig_bt);
1395   address the_pc = __ pc();
1396   __ set_last_Java_frame(Z_SP, noreg);
1397 
1398   __ block_comment("block_for_jni_critical");
1399   __ z_lgr(Z_ARG1, Z_thread);
1400 
1401   address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1402   __ call_c(entry_point);
1403   oop_maps->add_gc_map(__ offset(), map);
1404 
1405   __ reset_last_Java_frame();
1406 
1407   // Reload all the register arguments.
1408   save_or_restore_arguments(masm, stack_slots, total_in_args,
1409                             arg_save_area, NULL, in_regs, in_sig_bt);
1410 
1411   __ bind(cont);
1412 
1413   if (StressCriticalJNINatives) {
1414     // Stress register saving
1415     OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1416     save_or_restore_arguments(masm, stack_slots, total_in_args,
1417                               arg_save_area, map, in_regs, in_sig_bt);
1418 
1419     // Destroy argument registers.
1420     for (int i = 0; i < total_in_args; i++) {
1421       if (in_regs[i].first()->is_Register()) {
1422         // Don't set CC.
1423         __ clear_reg(in_regs[i].first()->as_Register(), true, false);
1424       } else {
1425         if (in_regs[i].first()->is_FloatRegister()) {
1426           FloatRegister fr = in_regs[i].first()->as_FloatRegister();
1427           __ z_lcdbr(fr, fr);
1428         }
1429       }
1430     }
1431 
1432     save_or_restore_arguments(masm, stack_slots, total_in_args,
1433                               arg_save_area, NULL, in_regs, in_sig_bt);
1434   }
1435 }
1436 
move_ptr(MacroAssembler * masm,VMRegPair src,VMRegPair dst,int framesize_in_slots)1437 static void move_ptr(MacroAssembler *masm,
1438                      VMRegPair src,
1439                      VMRegPair dst,
1440                      int framesize_in_slots) {
1441   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1442 
1443   if (src.first()->is_stack()) {
1444     if (dst.first()->is_stack()) {
1445       // stack to stack
1446       __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset));
1447       __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first())));
1448     } else {
1449       // stack to reg
1450       __ mem2reg_opt(dst.first()->as_Register(),
1451                      Address(Z_SP, reg2offset(src.first()) + frame_offset));
1452     }
1453   } else {
1454     if (dst.first()->is_stack()) {
1455       // reg to stack
1456     __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first())));
1457     } else {
1458     __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register());
1459     }
1460   }
1461 }
1462 
1463 // Unpack an array argument into a pointer to the body and the length
1464 // if the array is non-null, otherwise pass 0 for both.
unpack_array_argument(MacroAssembler * masm,VMRegPair reg,BasicType in_elem_type,VMRegPair body_arg,VMRegPair length_arg,int framesize_in_slots)1465 static void unpack_array_argument(MacroAssembler *masm,
1466                                    VMRegPair reg,
1467                                    BasicType in_elem_type,
1468                                    VMRegPair body_arg,
1469                                    VMRegPair length_arg,
1470                                    int framesize_in_slots) {
1471   Register tmp_reg = Z_tmp_2;
1472   Register tmp2_reg = Z_tmp_1;
1473 
1474   assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1475          "possible collision");
1476   assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1477          "possible collision");
1478 
1479   // Pass the length, ptr pair.
1480   NearLabel set_out_args;
1481   VMRegPair tmp, tmp2;
1482 
1483   tmp.set_ptr(tmp_reg->as_VMReg());
1484   tmp2.set_ptr(tmp2_reg->as_VMReg());
1485   if (reg.first()->is_stack()) {
1486     // Load the arg up from the stack.
1487     move_ptr(masm, reg, tmp, framesize_in_slots);
1488     reg = tmp;
1489   }
1490 
1491   const Register first = reg.first()->as_Register();
1492 
1493   // Don't set CC, indicate unused result.
1494   (void) __ clear_reg(tmp2_reg, true, false);
1495   if (tmp_reg != first) {
1496     __ clear_reg(tmp_reg, true, false);  // Don't set CC.
1497   }
1498   __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args);
1499   __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes()));
1500   __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first);
1501 
1502   __ bind(set_out_args);
1503   move_ptr(masm, tmp, body_arg, framesize_in_slots);
1504   move32_64(masm, tmp2, length_arg, framesize_in_slots);
1505 }
1506 
1507 //----------------------------------------------------------------------
1508 // Wrap a JNI call.
1509 //----------------------------------------------------------------------
1510 #undef USE_RESIZE_FRAME
generate_native_wrapper(MacroAssembler * masm,const methodHandle & method,int compile_id,BasicType * in_sig_bt,VMRegPair * in_regs,BasicType ret_type,address critical_entry)1511 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1512                                                 const methodHandle& method,
1513                                                 int compile_id,
1514                                                 BasicType *in_sig_bt,
1515                                                 VMRegPair *in_regs,
1516                                                 BasicType ret_type,
1517                                                 address critical_entry) {
1518   int total_in_args = method->size_of_parameters();
1519   if (method->is_method_handle_intrinsic()) {
1520     vmIntrinsics::ID iid = method->intrinsic_id();
1521     intptr_t start = (intptr_t) __ pc();
1522     int vep_offset = ((intptr_t) __ pc()) - start;
1523 
1524     gen_special_dispatch(masm, total_in_args,
1525                          method->intrinsic_id(), in_sig_bt, in_regs);
1526 
1527     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1528 
1529     __ flush();
1530 
1531     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1532 
1533     return nmethod::new_native_nmethod(method,
1534                                        compile_id,
1535                                        masm->code(),
1536                                        vep_offset,
1537                                        frame_complete,
1538                                        stack_slots / VMRegImpl::slots_per_word,
1539                                        in_ByteSize(-1),
1540                                        in_ByteSize(-1),
1541                                        (OopMapSet *) NULL);
1542   }
1543 
1544 
1545   ///////////////////////////////////////////////////////////////////////
1546   //
1547   //  Precalculations before generating any code
1548   //
1549   ///////////////////////////////////////////////////////////////////////
1550 
1551   bool is_critical_native = true;
1552   address native_func = critical_entry;
1553   if (native_func == NULL) {
1554     native_func = method->native_function();
1555     is_critical_native = false;
1556   }
1557   assert(native_func != NULL, "must have function");
1558 
1559   //---------------------------------------------------------------------
1560   // We have received a description of where all the java args are located
1561   // on entry to the wrapper. We need to convert these args to where
1562   // the jni function will expect them. To figure out where they go
1563   // we convert the java signature to a C signature by inserting
1564   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1565   //
1566   // The first hidden argument arg[0] is a pointer to the JNI environment.
1567   // It is generated for every call.
1568   // The second argument arg[1] to the JNI call, which is hidden for static
1569   // methods, is the boxed lock object. For static calls, the lock object
1570   // is the static method itself. The oop is constructed here. for instance
1571   // calls, the lock is performed on the object itself, the pointer of
1572   // which is passed as the first visible argument.
1573   //---------------------------------------------------------------------
1574 
1575   // Additionally, on z/Architecture we must convert integers
1576   // to longs in the C signature. We do this in advance in order to have
1577   // no trouble with indexes into the bt-arrays.
1578   // So convert the signature and registers now, and adjust the total number
1579   // of in-arguments accordingly.
1580   bool method_is_static = method->is_static();
1581   int  total_c_args     = total_in_args;
1582 
1583   if (!is_critical_native) {
1584     int n_hidden_args = method_is_static ? 2 : 1;
1585     total_c_args += n_hidden_args;
1586   } else {
1587     // No JNIEnv*, no this*, but unpacked arrays (base+length).
1588     for (int i = 0; i < total_in_args; i++) {
1589       if (in_sig_bt[i] == T_ARRAY) {
1590         total_c_args ++;
1591       }
1592     }
1593   }
1594 
1595   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1596   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1597   BasicType* in_elem_bt = NULL;
1598 
1599   // Create the signature for the C call:
1600   //   1) add the JNIEnv*
1601   //   2) add the class if the method is static
1602   //   3) copy the rest of the incoming signature (shifted by the number of
1603   //      hidden arguments)
1604 
1605   int argc = 0;
1606   if (!is_critical_native) {
1607     out_sig_bt[argc++] = T_ADDRESS;
1608     if (method->is_static()) {
1609       out_sig_bt[argc++] = T_OBJECT;
1610     }
1611 
1612     for (int i = 0; i < total_in_args; i++) {
1613       out_sig_bt[argc++] = in_sig_bt[i];
1614     }
1615   } else {
1616     Thread* THREAD = Thread::current();
1617     in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1618     SignatureStream ss(method->signature());
1619     int o = 0;
1620     for (int i = 0; i < total_in_args; i++, o++) {
1621       if (in_sig_bt[i] == T_ARRAY) {
1622         // Arrays are passed as tuples (int, elem*).
1623         Symbol* atype = ss.as_symbol(CHECK_NULL);
1624         const char* at = atype->as_C_string();
1625         if (strlen(at) == 2) {
1626           assert(at[0] == '[', "must be");
1627           switch (at[1]) {
1628             case 'B': in_elem_bt[o]  = T_BYTE; break;
1629             case 'C': in_elem_bt[o]  = T_CHAR; break;
1630             case 'D': in_elem_bt[o]  = T_DOUBLE; break;
1631             case 'F': in_elem_bt[o]  = T_FLOAT; break;
1632             case 'I': in_elem_bt[o]  = T_INT; break;
1633             case 'J': in_elem_bt[o]  = T_LONG; break;
1634             case 'S': in_elem_bt[o]  = T_SHORT; break;
1635             case 'Z': in_elem_bt[o]  = T_BOOLEAN; break;
1636             default: ShouldNotReachHere();
1637           }
1638         }
1639       } else {
1640         in_elem_bt[o] = T_VOID;
1641       }
1642       if (in_sig_bt[i] != T_VOID) {
1643         assert(in_sig_bt[i] == ss.type(), "must match");
1644         ss.next();
1645       }
1646     }
1647     assert(total_in_args == o, "must match");
1648 
1649     for (int i = 0; i < total_in_args; i++) {
1650       if (in_sig_bt[i] == T_ARRAY) {
1651         // Arrays are passed as tuples (int, elem*).
1652         out_sig_bt[argc++] = T_INT;
1653         out_sig_bt[argc++] = T_ADDRESS;
1654       } else {
1655         out_sig_bt[argc++] = in_sig_bt[i];
1656       }
1657     }
1658   }
1659 
1660   ///////////////////////////////////////////////////////////////////////
1661   // Now figure out where the args must be stored and how much stack space
1662   // they require (neglecting out_preserve_stack_slots but providing space
1663   // for storing the first five register arguments).
1664   // It's weird, see int_stk_helper.
1665   ///////////////////////////////////////////////////////////////////////
1666 
1667   //---------------------------------------------------------------------
1668   // Compute framesize for the wrapper.
1669   //
1670   // - We need to handlize all oops passed in registers.
1671   // - We must create space for them here that is disjoint from the save area.
1672   // - We always just allocate 5 words for storing down these object.
1673   //   This allows us to simply record the base and use the Ireg number to
1674   //   decide which slot to use.
1675   // - Note that the reg number used to index the stack slot is the inbound
1676   //   number, not the outbound number.
1677   // - We must shuffle args to match the native convention,
1678   //   and to include var-args space.
1679   //---------------------------------------------------------------------
1680 
1681   //---------------------------------------------------------------------
1682   // Calculate the total number of stack slots we will need:
1683   // - 1) abi requirements
1684   // - 2) outgoing args
1685   // - 3) space for inbound oop handle area
1686   // - 4) space for handlizing a klass if static method
1687   // - 5) space for a lock if synchronized method
1688   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1689   // - 7) filler slots for alignment
1690   //---------------------------------------------------------------------
1691   // Here is how the space we have allocated will look like.
1692   // Since we use resize_frame, we do not create a new stack frame,
1693   // but just extend the one we got with our own data area.
1694   //
1695   // If an offset or pointer name points to a separator line, it is
1696   // assumed that addressing with offset 0 selects storage starting
1697   // at the first byte above the separator line.
1698   //
1699   //
1700   //     ...                   ...
1701   //      | caller's frame      |
1702   // FP-> |---------------------|
1703   //      | filler slots, if any|
1704   //     7| #slots == mult of 2 |
1705   //      |---------------------|
1706   //      | work space          |
1707   //     6| 2 slots = 8 bytes   |
1708   //      |---------------------|
1709   //     5| lock box (if sync)  |
1710   //      |---------------------| <- lock_slot_offset
1711   //     4| klass (if static)   |
1712   //      |---------------------| <- klass_slot_offset
1713   //     3| oopHandle area      |
1714   //      | (save area for      |
1715   //      |  critical natives)  |
1716   //      |                     |
1717   //      |                     |
1718   //      |---------------------| <- oop_handle_offset
1719   //     2| outbound memory     |
1720   //     ...                   ...
1721   //      | based arguments     |
1722   //      |---------------------|
1723   //      | vararg              |
1724   //     ...                   ...
1725   //      | area                |
1726   //      |---------------------| <- out_arg_slot_offset
1727   //     1| out_preserved_slots |
1728   //     ...                   ...
1729   //      | (z_abi spec)        |
1730   // SP-> |---------------------| <- FP_slot_offset (back chain)
1731   //     ...                   ...
1732   //
1733   //---------------------------------------------------------------------
1734 
1735   // *_slot_offset indicates offset from SP in #stack slots
1736   // *_offset      indicates offset from SP in #bytes
1737 
1738   int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1739                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1740 
1741   // Now the space for the inbound oop handle area.
1742   int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1743   if (is_critical_native) {
1744     // Critical natives may have to call out so they need a save area
1745     // for register arguments.
1746     int double_slots = 0;
1747     int single_slots = 0;
1748     for (int i = 0; i < total_in_args; i++) {
1749       if (in_regs[i].first()->is_Register()) {
1750         const Register reg = in_regs[i].first()->as_Register();
1751         switch (in_sig_bt[i]) {
1752           case T_BOOLEAN:
1753           case T_BYTE:
1754           case T_SHORT:
1755           case T_CHAR:
1756           case T_INT:
1757           // Fall through.
1758           case T_ARRAY:
1759           case T_LONG: double_slots++; break;
1760           default:  ShouldNotReachHere();
1761         }
1762       } else {
1763         if (in_regs[i].first()->is_FloatRegister()) {
1764           switch (in_sig_bt[i]) {
1765             case T_FLOAT:  single_slots++; break;
1766             case T_DOUBLE: double_slots++; break;
1767             default:  ShouldNotReachHere();
1768           }
1769         }
1770       }
1771     }  // for
1772     total_save_slots = double_slots * 2 + align_up(single_slots, 2); // Round to even.
1773   }
1774 
1775   int oop_handle_slot_offset = stack_slots;
1776   stack_slots += total_save_slots;                                        // 3)
1777 
1778   int klass_slot_offset = 0;
1779   int klass_offset      = -1;
1780   if (method_is_static && !is_critical_native) {                          // 4)
1781     klass_slot_offset  = stack_slots;
1782     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1783     stack_slots       += VMRegImpl::slots_per_word;
1784   }
1785 
1786   int lock_slot_offset = 0;
1787   int lock_offset      = -1;
1788   if (method->is_synchronized()) {                                        // 5)
1789     lock_slot_offset   = stack_slots;
1790     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1791     stack_slots       += VMRegImpl::slots_per_word;
1792   }
1793 
1794   int workspace_slot_offset= stack_slots;                                 // 6)
1795   stack_slots         += 2;
1796 
1797   // Now compute actual number of stack words we need.
1798   // Round to align stack properly.
1799   stack_slots = align_up(stack_slots,                                     // 7)
1800                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1801   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1802 
1803 
1804   ///////////////////////////////////////////////////////////////////////
1805   // Now we can start generating code
1806   ///////////////////////////////////////////////////////////////////////
1807 
1808   unsigned int wrapper_CodeStart  = __ offset();
1809   unsigned int wrapper_UEPStart;
1810   unsigned int wrapper_VEPStart;
1811   unsigned int wrapper_FrameDone;
1812   unsigned int wrapper_CRegsSet;
1813   Label     handle_pending_exception;
1814   Label     ic_miss;
1815 
1816   //---------------------------------------------------------------------
1817   // Unverified entry point (UEP)
1818   //---------------------------------------------------------------------
1819   wrapper_UEPStart = __ offset();
1820 
1821   // check ic: object class <-> cached class
1822   if (!method_is_static) __ nmethod_UEP(ic_miss);
1823   // Fill with nops (alignment of verified entry point).
1824   __ align(CodeEntryAlignment);
1825 
1826   //---------------------------------------------------------------------
1827   // Verified entry point (VEP)
1828   //---------------------------------------------------------------------
1829   wrapper_VEPStart = __ offset();
1830 
1831   __ save_return_pc();
1832   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1833 #ifndef USE_RESIZE_FRAME
1834   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1835 #else
1836   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1837                                                           // Just resize the existing one.
1838 #endif
1839 
1840   wrapper_FrameDone = __ offset();
1841 
1842   __ verify_thread();
1843 
1844   // Native nmethod wrappers never take possession of the oop arguments.
1845   // So the caller will gc the arguments.
1846   // The only thing we need an oopMap for is if the call is static.
1847   //
1848   // An OopMap for lock (and class if static), and one for the VM call itself
1849   OopMapSet  *oop_maps        = new OopMapSet();
1850   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1851 
1852   if (is_critical_native) {
1853     check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
1854                                        oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
1855   }
1856 
1857 
1858   //////////////////////////////////////////////////////////////////////
1859   //
1860   // The Grand Shuffle
1861   //
1862   //////////////////////////////////////////////////////////////////////
1863   //
1864   // We immediately shuffle the arguments so that for any vm call we have
1865   // to make from here on out (sync slow path, jvmti, etc.) we will have
1866   // captured the oops from our caller and have a valid oopMap for them.
1867   //
1868   //--------------------------------------------------------------------
1869   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1870   // (derived from JavaThread* which is in Z_thread) and, if static,
1871   // the class mirror instead of a receiver. This pretty much guarantees that
1872   // register layout will not match. We ignore these extra arguments during
1873   // the shuffle. The shuffle is described by the two calling convention
1874   // vectors we have in our possession. We simply walk the java vector to
1875   // get the source locations and the c vector to get the destinations.
1876   //
1877   // This is a trick. We double the stack slots so we can claim
1878   // the oops in the caller's frame. Since we are sure to have
1879   // more args than the caller doubling is enough to make
1880   // sure we can capture all the incoming oop args from the caller.
1881   //--------------------------------------------------------------------
1882 
1883   // Record sp-based slot for receiver on stack for non-static methods.
1884   int receiver_offset = -1;
1885 
1886   //--------------------------------------------------------------------
1887   // We move the arguments backwards because the floating point registers
1888   // destination will always be to a register with a greater or equal
1889   // register number or the stack.
1890   //   jix is the index of the incoming Java arguments.
1891   //   cix is the index of the outgoing C arguments.
1892   //--------------------------------------------------------------------
1893 
1894 #ifdef ASSERT
1895   bool reg_destroyed[RegisterImpl::number_of_registers];
1896   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1897   for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1898     reg_destroyed[r] = false;
1899   }
1900   for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1901     freg_destroyed[f] = false;
1902   }
1903 #endif // ASSERT
1904 
1905   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1906 #ifdef ASSERT
1907     if (in_regs[jix].first()->is_Register()) {
1908       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1909     } else {
1910       if (in_regs[jix].first()->is_FloatRegister()) {
1911         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1912       }
1913     }
1914     if (out_regs[cix].first()->is_Register()) {
1915       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1916     } else {
1917       if (out_regs[cix].first()->is_FloatRegister()) {
1918         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1919       }
1920     }
1921 #endif // ASSERT
1922 
1923     switch (in_sig_bt[jix]) {
1924       // Due to casting, small integers should only occur in pairs with type T_LONG.
1925       case T_BOOLEAN:
1926       case T_CHAR:
1927       case T_BYTE:
1928       case T_SHORT:
1929       case T_INT:
1930         // Move int and do sign extension.
1931         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1932         break;
1933 
1934       case T_LONG :
1935         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1936         break;
1937 
1938       case T_ARRAY:
1939         if (is_critical_native) {
1940           int body_arg = cix;
1941           cix -= 1; // Point to length arg.
1942           unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots);
1943           break;
1944         }
1945         // else fallthrough
1946       case T_OBJECT:
1947         assert(!is_critical_native, "no oop arguments");
1948         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1949                     ((jix == 0) && (!method_is_static)),
1950                     &receiver_offset);
1951         break;
1952       case T_VOID:
1953         break;
1954 
1955       case T_FLOAT:
1956         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1957         break;
1958 
1959       case T_DOUBLE:
1960         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1961         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1962         break;
1963 
1964       case T_ADDRESS:
1965         assert(false, "found T_ADDRESS in java args");
1966         break;
1967 
1968       default:
1969         ShouldNotReachHere();
1970     }
1971   }
1972 
1973   //--------------------------------------------------------------------
1974   // Pre-load a static method's oop into ARG2.
1975   // Used both by locking code and the normal JNI call code.
1976   //--------------------------------------------------------------------
1977   if (method_is_static && !is_critical_native) {
1978     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1979 
1980     // Now handlize the static class mirror in ARG2. It's known not-null.
1981     __ z_stg(Z_ARG2, klass_offset, Z_SP);
1982     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1983     __ add2reg(Z_ARG2, klass_offset, Z_SP);
1984   }
1985 
1986   // Get JNIEnv* which is first argument to native.
1987   if (!is_critical_native) {
1988     __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1989   }
1990 
1991   //////////////////////////////////////////////////////////////////////
1992   // We have all of the arguments setup at this point.
1993   // We MUST NOT touch any outgoing regs from this point on.
1994   // So if we must call out we must push a new frame.
1995   //////////////////////////////////////////////////////////////////////
1996 
1997 
1998   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1999   // Both values represent the same position.
2000   __ get_PC(Z_R10);                // PC into register
2001   wrapper_CRegsSet = __ offset();  // and into into variable.
2002 
2003   // Z_R10 now has the pc loaded that we will use when we finally call to native.
2004 
2005   // We use the same pc/oopMap repeatedly when we call out.
2006   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
2007 
2008   // Lock a synchronized method.
2009 
2010   if (method->is_synchronized()) {
2011     assert(!is_critical_native, "unhandled");
2012 
2013     // ATTENTION: args and Z_R10 must be preserved.
2014     Register r_oop  = Z_R11;
2015     Register r_box  = Z_R12;
2016     Register r_tmp1 = Z_R13;
2017     Register r_tmp2 = Z_R7;
2018     Label done;
2019 
2020     // Load the oop for the object or class. R_carg2_classorobject contains
2021     // either the handlized oop from the incoming arguments or the handlized
2022     // class mirror (if the method is static).
2023     __ z_lg(r_oop, 0, Z_ARG2);
2024 
2025     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
2026     // Get the lock box slot's address.
2027     __ add2reg(r_box, lock_offset, Z_SP);
2028 
2029 #ifdef ASSERT
2030     if (UseBiasedLocking)
2031       // Making the box point to itself will make it clear it went unused
2032       // but also be obviously invalid.
2033       __ z_stg(r_box, 0, r_box);
2034 #endif // ASSERT
2035 
2036     // Try fastpath for locking.
2037     // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
2038     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
2039     __ z_bre(done);
2040 
2041     //-------------------------------------------------------------------------
2042     // None of the above fast optimizations worked so we have to get into the
2043     // slow case of monitor enter. Inline a special case of call_VM that
2044     // disallows any pending_exception.
2045     //-------------------------------------------------------------------------
2046 
2047     Register oldSP = Z_R11;
2048 
2049     __ z_lgr(oldSP, Z_SP);
2050 
2051     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2052 
2053     // Prepare arguments for call.
2054     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
2055     __ add2reg(Z_ARG2, lock_offset, oldSP);
2056     __ z_lgr(Z_ARG3, Z_thread);
2057 
2058     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
2059 
2060     // Do the call.
2061     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
2062     __ call(Z_R1_scratch);
2063 
2064     __ reset_last_Java_frame();
2065 
2066     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2067 #ifdef ASSERT
2068     { Label L;
2069       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2070       __ z_bre(L);
2071       __ stop("no pending exception allowed on exit from IR::monitorenter");
2072       __ bind(L);
2073     }
2074 #endif
2075     __ bind(done);
2076   } // lock for synchronized methods
2077 
2078 
2079   //////////////////////////////////////////////////////////////////////
2080   // Finally just about ready to make the JNI call.
2081   //////////////////////////////////////////////////////////////////////
2082 
2083   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
2084   __ set_last_Java_frame(Z_SP, Z_R10);
2085 
2086   // Transition from _thread_in_Java to _thread_in_native.
2087   __ set_thread_state(_thread_in_native);
2088 
2089 
2090   //////////////////////////////////////////////////////////////////////
2091   // This is the JNI call.
2092   //////////////////////////////////////////////////////////////////////
2093 
2094   __ call_c(native_func);
2095 
2096 
2097   //////////////////////////////////////////////////////////////////////
2098   // We have survived the call once we reach here.
2099   //////////////////////////////////////////////////////////////////////
2100 
2101 
2102   //--------------------------------------------------------------------
2103   // Unpack native results.
2104   //--------------------------------------------------------------------
2105   // For int-types, we do any needed sign-extension required.
2106   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
2107   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
2108   // blocking or unlocking.
2109   // An OOP result (handle) is done specially in the slow-path code.
2110   //--------------------------------------------------------------------
2111   switch (ret_type) {
2112     case T_VOID:    break;         // Nothing to do!
2113     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
2114     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
2115     case T_LONG:    break;         // Got it where we want it (unless slow-path)
2116     case T_OBJECT:  break;         // Really a handle.
2117                                    // Cannot de-handlize until after reclaiming jvm_lock.
2118     case T_ARRAY:   break;
2119 
2120     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
2121       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
2122       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
2123       break;
2124     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
2125     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
2126     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
2127     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
2128 
2129     default:
2130       ShouldNotReachHere();
2131       break;
2132   }
2133 
2134 
2135   // Switch thread to "native transition" state before reading the synchronization state.
2136   // This additional state is necessary because reading and testing the synchronization
2137   // state is not atomic w.r.t. GC, as this scenario demonstrates:
2138   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2139   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
2140   //   - Thread A is resumed to finish this native method, but doesn't block here since it
2141   //     didn't see any synchronization in progress, and escapes.
2142 
2143   // Transition from _thread_in_native to _thread_in_native_trans.
2144   __ set_thread_state(_thread_in_native_trans);
2145 
2146   // Safepoint synchronization
2147   //--------------------------------------------------------------------
2148   // Must we block?
2149   //--------------------------------------------------------------------
2150   // Block, if necessary, before resuming in _thread_in_Java state.
2151   // In order for GC to work, don't clear the last_Java_sp until after blocking.
2152   //--------------------------------------------------------------------
2153   Label after_transition;
2154   {
2155     Label no_block, sync;
2156 
2157     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
2158 
2159     if (os::is_MP()) {
2160       if (UseMembar) {
2161         // Force this write out before the read below.
2162         __ z_fence();
2163       } else {
2164         // Write serialization page so VM thread can do a pseudo remote membar.
2165         // We use the current thread pointer to calculate a thread specific
2166         // offset to write to within the page. This minimizes bus traffic
2167         // due to cache line collision.
2168         __ serialize_memory(Z_thread, Z_R1, Z_R2);
2169       }
2170     }
2171     __ safepoint_poll(sync, Z_R1);
2172 
2173     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
2174     __ z_bre(no_block);
2175 
2176     // Block. Save any potential method result value before the operation and
2177     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2178     // lets us share the oopMap we used when we went native rather than create
2179     // a distinct one for this pc.
2180     //
2181     __ bind(sync);
2182     __ z_acquire();
2183 
2184     address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2185                                              : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2186 
2187     __ call_VM_leaf(entry_point, Z_thread);
2188 
2189     if (is_critical_native) {
2190       restore_native_result(masm, ret_type, workspace_slot_offset);
2191       __ z_bru(after_transition); // No thread state transition here.
2192     }
2193     __ bind(no_block);
2194     restore_native_result(masm, ret_type, workspace_slot_offset);
2195   }
2196 
2197   //--------------------------------------------------------------------
2198   // Thread state is thread_in_native_trans. Any safepoint blocking has
2199   // already happened so we can now change state to _thread_in_Java.
2200   //--------------------------------------------------------------------
2201   // Transition from _thread_in_native_trans to _thread_in_Java.
2202   __ set_thread_state(_thread_in_Java);
2203   __ bind(after_transition);
2204 
2205 
2206   //--------------------------------------------------------------------
2207   // Reguard any pages if necessary.
2208   // Protect native result from being destroyed.
2209   //--------------------------------------------------------------------
2210 
2211   Label no_reguard;
2212 
2213   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)),
2214            JavaThread::stack_guard_yellow_reserved_disabled);
2215 
2216   __ z_bre(no_reguard);
2217 
2218   save_native_result(masm, ret_type, workspace_slot_offset);
2219   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
2220   restore_native_result(masm, ret_type, workspace_slot_offset);
2221 
2222   __ bind(no_reguard);
2223 
2224 
2225   // Synchronized methods (slow path only)
2226   // No pending exceptions for now.
2227   //--------------------------------------------------------------------
2228   // Handle possibly pending exception (will unlock if necessary).
2229   // Native result is, if any is live, in Z_FRES or Z_RES.
2230   //--------------------------------------------------------------------
2231   // Unlock
2232   //--------------------------------------------------------------------
2233   if (method->is_synchronized()) {
2234     const Register r_oop        = Z_R11;
2235     const Register r_box        = Z_R12;
2236     const Register r_tmp1       = Z_R13;
2237     const Register r_tmp2       = Z_R7;
2238     Label done;
2239 
2240     // Get unboxed oop of class mirror or object ...
2241     int   offset = method_is_static ? klass_offset : receiver_offset;
2242 
2243     assert(offset != -1, "");
2244     __ z_lg(r_oop, offset, Z_SP);
2245 
2246     // ... and address of lock object box.
2247     __ add2reg(r_box, lock_offset, Z_SP);
2248 
2249     // Try fastpath for unlocking.
2250     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
2251     __ z_bre(done);
2252 
2253     // Slow path for unlocking.
2254     // Save and restore any potential method result value around the unlocking operation.
2255     const Register R_exc = Z_R11;
2256 
2257     save_native_result(masm, ret_type, workspace_slot_offset);
2258 
2259     // Must save pending exception around the slow-path VM call. Since it's a
2260     // leaf call, the pending exception (if any) can be kept in a register.
2261     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2262     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
2263 
2264     // Must clear pending-exception before re-entering the VM. Since this is
2265     // a leaf call, pending-exception-oop can be safely kept in a register.
2266     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
2267 
2268     // Inline a special case of call_VM that disallows any pending_exception.
2269 
2270     // Get locked oop from the handle we passed to jni.
2271     __ z_lg(Z_ARG1, offset, Z_SP);
2272     __ add2reg(Z_ARG2, lock_offset, Z_SP);
2273     __ z_lgr(Z_ARG3, Z_thread);
2274 
2275     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2276 
2277     __ call(Z_R1_scratch);
2278 
2279 #ifdef ASSERT
2280     {
2281       Label L;
2282       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2283       __ z_bre(L);
2284       __ stop("no pending exception allowed on exit from IR::monitorexit");
2285       __ bind(L);
2286     }
2287 #endif
2288 
2289     // Check_forward_pending_exception jump to forward_exception if any pending
2290     // exception is set. The forward_exception routine expects to see the
2291     // exception in pending_exception and not in a register. Kind of clumsy,
2292     // since all folks who branch to forward_exception must have tested
2293     // pending_exception first and hence have it in a register already.
2294     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2295     restore_native_result(masm, ret_type, workspace_slot_offset);
2296     __ z_bru(done);
2297     __ z_illtrap(0x66);
2298 
2299     __ bind(done);
2300   }
2301 
2302 
2303   //--------------------------------------------------------------------
2304   // Clear "last Java frame" SP and PC.
2305   //--------------------------------------------------------------------
2306   __ verify_thread(); // Z_thread must be correct.
2307 
2308   __ reset_last_Java_frame();
2309 
2310   // Unpack oop result, e.g. JNIHandles::resolve result.
2311   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2312     __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
2313   }
2314 
2315   if (CheckJNICalls) {
2316     // clear_pending_jni_exception_check
2317     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2318   }
2319 
2320   // Reset handle block.
2321   if (!is_critical_native) {
2322     __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2323     __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
2324 
2325     // Check for pending exceptions.
2326     __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2327     __ z_brne(handle_pending_exception);
2328   }
2329 
2330 
2331   //////////////////////////////////////////////////////////////////////
2332   // Return
2333   //////////////////////////////////////////////////////////////////////
2334 
2335 
2336 #ifndef USE_RESIZE_FRAME
2337   __ pop_frame();                     // Pop wrapper frame.
2338 #else
2339   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2340 #endif
2341   __ restore_return_pc();             // This is the way back to the caller.
2342   __ z_br(Z_R14);
2343 
2344 
2345   //////////////////////////////////////////////////////////////////////
2346   // Out-of-line calls to the runtime.
2347   //////////////////////////////////////////////////////////////////////
2348 
2349 
2350   if (!is_critical_native) {
2351 
2352     //---------------------------------------------------------------------
2353     // Handler for pending exceptions (out-of-line).
2354     //---------------------------------------------------------------------
2355     // Since this is a native call, we know the proper exception handler
2356     // is the empty function. We just pop this frame and then jump to
2357     // forward_exception_entry. Z_R14 will contain the native caller's
2358     // return PC.
2359     __ bind(handle_pending_exception);
2360     __ pop_frame();
2361     __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2362     __ restore_return_pc();
2363     __ z_br(Z_R1_scratch);
2364 
2365     //---------------------------------------------------------------------
2366     // Handler for a cache miss (out-of-line)
2367     //---------------------------------------------------------------------
2368     __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2369   }
2370   __ flush();
2371 
2372 
2373   //////////////////////////////////////////////////////////////////////
2374   // end of code generation
2375   //////////////////////////////////////////////////////////////////////
2376 
2377 
2378   nmethod *nm = nmethod::new_native_nmethod(method,
2379                                             compile_id,
2380                                             masm->code(),
2381                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2382                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2383                                             stack_slots / VMRegImpl::slots_per_word,
2384                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2385                                             in_ByteSize(lock_offset),
2386                                             oop_maps);
2387 
2388   if (is_critical_native) {
2389     nm->set_lazy_critical_native(true);
2390   }
2391 
2392   return nm;
2393 }
2394 
gen_c2i_adapter(MacroAssembler * masm,int total_args_passed,int comp_args_on_stack,const BasicType * sig_bt,const VMRegPair * regs,Label & skip_fixup)2395 static address gen_c2i_adapter(MacroAssembler  *masm,
2396                                int total_args_passed,
2397                                int comp_args_on_stack,
2398                                const BasicType *sig_bt,
2399                                const VMRegPair *regs,
2400                                Label &skip_fixup) {
2401   // Before we get into the guts of the C2I adapter, see if we should be here
2402   // at all. We've come from compiled code and are attempting to jump to the
2403   // interpreter, which means the caller made a static call to get here
2404   // (vcalls always get a compiled target if there is one). Check for a
2405   // compiled target. If there is one, we need to patch the caller's call.
2406 
2407   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2408   const Register ientry = Z_R11;
2409   const Register code   = Z_R11;
2410 
2411   address c2i_entrypoint;
2412   Label   patch_callsite;
2413 
2414   // Regular (verified) c2i entry point.
2415   c2i_entrypoint = __ pc();
2416 
2417   // Call patching needed?
2418   __ load_and_test_long(Z_R0_scratch, method_(code));
2419   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2420   __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
2421 
2422   __ bind(skip_fixup);  // Return point from patch_callsite.
2423 
2424   // Since all args are passed on the stack, total_args_passed*wordSize is the
2425   // space we need. We need ABI scratch area but we use the caller's since
2426   // it has already been allocated.
2427 
2428   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2429   int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2430   Register  sender_SP   = Z_R10;
2431   Register  value       = Z_R12;
2432 
2433   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2434   // In addition, frame manager expects initial_caller_sp in Z_R10.
2435   __ z_lgr(sender_SP, Z_SP);
2436 
2437   // This should always fit in 14 bit immediate.
2438   __ resize_frame(-extraspace, Z_R0_scratch);
2439 
2440   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2441   // args. This essentially moves the callers ABI scratch area from the top to the
2442   // bottom of the arg area.
2443 
2444   int st_off =  extraspace - wordSize;
2445 
2446   // Now write the args into the outgoing interpreter space.
2447   for (int i = 0; i < total_args_passed; i++) {
2448     VMReg r_1 = regs[i].first();
2449     VMReg r_2 = regs[i].second();
2450     if (!r_1->is_valid()) {
2451       assert(!r_2->is_valid(), "");
2452       continue;
2453     }
2454     if (r_1->is_stack()) {
2455       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2456       // We must account for it here.
2457       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2458 
2459       if (!r_2->is_valid()) {
2460         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2461       } else {
2462         // longs are given 2 64-bit slots in the interpreter,
2463         // but the data is passed in only 1 slot.
2464         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2465 #ifdef ASSERT
2466           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2467 #endif
2468           st_off -= wordSize;
2469         }
2470         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2471       }
2472     } else {
2473       if (r_1->is_Register()) {
2474         if (!r_2->is_valid()) {
2475           __ z_st(r_1->as_Register(), st_off, Z_SP);
2476         } else {
2477           // longs are given 2 64-bit slots in the interpreter, but the
2478           // data is passed in only 1 slot.
2479           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2480 #ifdef ASSERT
2481             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2482 #endif
2483             st_off -= wordSize;
2484           }
2485           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2486         }
2487       } else {
2488         assert(r_1->is_FloatRegister(), "");
2489         if (!r_2->is_valid()) {
2490           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2491         } else {
2492           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2493           // data is passed in only 1 slot.
2494           // One of these should get known junk...
2495 #ifdef ASSERT
2496           __ z_lzdr(Z_F1);
2497           __ z_std(Z_F1, st_off, Z_SP);
2498 #endif
2499           st_off-=wordSize;
2500           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2501         }
2502       }
2503     }
2504     st_off -= wordSize;
2505   }
2506 
2507 
2508   // Jump to the interpreter just as if interpreter was doing it.
2509   __ add2reg(Z_esp, st_off, Z_SP);
2510 
2511   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2512   __ z_br(ientry);
2513 
2514 
2515   // Prevent illegal entry to out-of-line code.
2516   __ z_illtrap(0x22);
2517 
2518   // Generate out-of-line runtime call to patch caller,
2519   // then continue as interpreted.
2520 
2521   // IF you lose the race you go interpreted.
2522   // We don't see any possible endless c2i -> i2c -> c2i ...
2523   // transitions no matter how rare.
2524   __ bind(patch_callsite);
2525 
2526   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2527   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2528   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2529   __ z_bru(skip_fixup);
2530 
2531   // end of out-of-line code
2532 
2533   return c2i_entrypoint;
2534 }
2535 
2536 // On entry, the following registers are set
2537 //
2538 //    Z_thread  r8  - JavaThread*
2539 //    Z_method  r9  - callee's method (method to be invoked)
2540 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2541 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2542 //
gen_i2c_adapter(MacroAssembler * masm,int total_args_passed,int comp_args_on_stack,const BasicType * sig_bt,const VMRegPair * regs)2543 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2544                                     int total_args_passed,
2545                                     int comp_args_on_stack,
2546                                     const BasicType *sig_bt,
2547                                     const VMRegPair *regs) {
2548   const Register value = Z_R12;
2549   const Register ld_ptr= Z_esp;
2550 
2551   int ld_offset = total_args_passed * wordSize;
2552 
2553   // Cut-out for having no stack args.
2554   if (comp_args_on_stack) {
2555     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2556     // registers are below. By subtracting stack0, we either get a negative
2557     // number (all values in registers) or the maximum stack slot accessed.
2558     // Convert VMRegImpl (4 byte) stack slots to words.
2559     int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2560     // Round up to miminum stack alignment, in wordSize
2561     comp_words_on_stack = align_up(comp_words_on_stack, 2);
2562 
2563     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2564   }
2565 
2566   // Now generate the shuffle code. Pick up all register args and move the
2567   // rest through register value=Z_R12.
2568   for (int i = 0; i < total_args_passed; i++) {
2569     if (sig_bt[i] == T_VOID) {
2570       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2571       continue;
2572     }
2573 
2574     // Pick up 0, 1 or 2 words from ld_ptr.
2575     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2576            "scrambled load targets?");
2577     VMReg r_1 = regs[i].first();
2578     VMReg r_2 = regs[i].second();
2579     if (!r_1->is_valid()) {
2580       assert(!r_2->is_valid(), "");
2581       continue;
2582     }
2583     if (r_1->is_FloatRegister()) {
2584       if (!r_2->is_valid()) {
2585         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2586         ld_offset-=wordSize;
2587       } else {
2588         // Skip the unused interpreter slot.
2589         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2590         ld_offset -= 2 * wordSize;
2591       }
2592     } else {
2593       if (r_1->is_stack()) {
2594         // Must do a memory to memory move.
2595         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2596 
2597         if (!r_2->is_valid()) {
2598           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2599         } else {
2600           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2601           // data is passed in only 1 slot.
2602           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2603             ld_offset -= wordSize;
2604           }
2605           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2606         }
2607       } else {
2608         if (!r_2->is_valid()) {
2609           // Not sure we need to do this but it shouldn't hurt.
2610           if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
2611             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2612           } else {
2613             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2614           }
2615         } else {
2616           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2617           // data is passed in only 1 slot.
2618           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2619             ld_offset -= wordSize;
2620           }
2621           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2622         }
2623       }
2624       ld_offset -= wordSize;
2625     }
2626   }
2627 
2628   // Jump to the compiled code just as if compiled code was doing it.
2629   // load target address from method oop:
2630   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2631 
2632   // Store method oop into thread->callee_target.
2633   // 6243940: We might end up in handle_wrong_method if
2634   // the callee is deoptimized as we race thru here. If that
2635   // happens we don't want to take a safepoint because the
2636   // caller frame will look interpreted and arguments are now
2637   // "compiled" so it is much better to make this transition
2638   // invisible to the stack walking code. Unfortunately, if
2639   // we try and find the callee by normal means a safepoint
2640   // is possible. So we stash the desired callee in the thread
2641   // and the vm will find it there should this case occur.
2642   __ z_stg(Z_method, thread_(callee_target));
2643 
2644   __ z_br(Z_R1_scratch);
2645 }
2646 
generate_i2c2i_adapters(MacroAssembler * masm,int total_args_passed,int comp_args_on_stack,const BasicType * sig_bt,const VMRegPair * regs,AdapterFingerPrint * fingerprint)2647 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2648                                                             int total_args_passed,
2649                                                             int comp_args_on_stack,
2650                                                             const BasicType *sig_bt,
2651                                                             const VMRegPair *regs,
2652                                                             AdapterFingerPrint* fingerprint) {
2653   __ align(CodeEntryAlignment);
2654   address i2c_entry = __ pc();
2655   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2656 
2657   address c2i_unverified_entry;
2658 
2659   Label skip_fixup;
2660   {
2661     Label ic_miss;
2662     const int klass_offset           = oopDesc::klass_offset_in_bytes();
2663     const int holder_klass_offset    = CompiledICHolder::holder_klass_offset();
2664     const int holder_metadata_offset = CompiledICHolder::holder_metadata_offset();
2665 
2666     // Out-of-line call to ic_miss handler.
2667     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2668 
2669     // Unverified Entry Point UEP
2670     __ align(CodeEntryAlignment);
2671     c2i_unverified_entry = __ pc();
2672 
2673     // Check the pointers.
2674     if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2675       __ z_ltgr(Z_ARG1, Z_ARG1);
2676       __ z_bre(ic_miss);
2677     }
2678     __ verify_oop(Z_ARG1);
2679 
2680     // Check ic: object class <-> cached class
2681     // Compress cached class for comparison. That's more efficient.
2682     if (UseCompressedClassPointers) {
2683       __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2684       __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2685     } else {
2686       __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2687     }
2688     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2689 
2690     // This def MUST MATCH code in gen_c2i_adapter!
2691     const Register code = Z_R11;
2692 
2693     __ z_lg(Z_method, holder_metadata_offset, Z_method);
2694     __ load_and_test_long(Z_R0, method_(code));
2695     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2696 
2697     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2698   }
2699 
2700   address c2i_entry;
2701   c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2702 
2703   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
2704 }
2705 
2706 // This function returns the adjust size (in number of words) to a c2i adapter
2707 // activation for use during deoptimization.
2708 //
2709 // Actually only compiled frames need to be adjusted, but it
2710 // doesn't harm to adjust entry and interpreter frames, too.
2711 //
last_frame_adjust(int callee_parameters,int callee_locals)2712 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2713   assert(callee_locals >= callee_parameters,
2714           "test and remove; got more parms than locals");
2715   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2716   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2717          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2718 }
2719 
out_preserve_stack_slots()2720 uint SharedRuntime::out_preserve_stack_slots() {
2721   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2722 }
2723 
2724 //
2725 // Frame generation for deopt and uncommon trap blobs.
2726 //
push_skeleton_frame(MacroAssembler * masm,Register frame_sizes_reg,Register pcs_reg,Register frame_size_reg,Register pc_reg)2727 static void push_skeleton_frame(MacroAssembler* masm,
2728                           /* Unchanged */
2729                           Register frame_sizes_reg,
2730                           Register pcs_reg,
2731                           /* Invalidate */
2732                           Register frame_size_reg,
2733                           Register pc_reg) {
2734   BLOCK_COMMENT("  push_skeleton_frame {");
2735    __ z_lg(pc_reg, 0, pcs_reg);
2736    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2737    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2738    Register fp = pc_reg;
2739    __ push_frame(frame_size_reg, fp);
2740 #ifdef ASSERT
2741    // The magic is required for successful walking skeletal frames.
2742    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2743    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2744    // Fill other slots that are supposedly not necessary with eye catchers.
2745    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2746    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2747    // The sender_sp of the bottom frame is set before pushing it.
2748    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2749    // is unknown here. Luckily it is not needed before filling the frame in
2750    // layout_activation(), we assert this by setting an eye catcher (see
2751    // comments on sender_sp in frame_s390.hpp).
2752    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2753 #endif // ASSERT
2754   BLOCK_COMMENT("  } push_skeleton_frame");
2755 }
2756 
2757 // Loop through the UnrollBlock info and create new frames.
push_skeleton_frames(MacroAssembler * masm,bool deopt,Register unroll_block_reg,Register frame_sizes_reg,Register number_of_frames_reg,Register pcs_reg,Register tmp1,Register tmp2)2758 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2759                             /* read */
2760                             Register unroll_block_reg,
2761                             /* invalidate */
2762                             Register frame_sizes_reg,
2763                             Register number_of_frames_reg,
2764                             Register pcs_reg,
2765                             Register tmp1,
2766                             Register tmp2) {
2767   BLOCK_COMMENT("push_skeleton_frames {");
2768   // _number_of_frames is of type int (deoptimization.hpp).
2769   __ z_lgf(number_of_frames_reg,
2770            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2771   __ z_lg(pcs_reg,
2772           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2773   __ z_lg(frame_sizes_reg,
2774           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2775 
2776   // stack: (caller_of_deoptee, ...).
2777 
2778   // If caller_of_deoptee is a compiled frame, then we extend it to make
2779   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2780   // See also Deoptimization::last_frame_adjust() above.
2781   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2782 
2783   __ z_lgf(Z_R1_scratch,
2784            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2785   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2786   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2787   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2788   // (it is required to find the original pc of caller_of_deoptee if it is marked
2789   // for deoptimization - see nmethod::orig_pc_addr()).
2790   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2791 
2792   // Now push the new interpreter frames.
2793   Label loop, loop_entry;
2794 
2795   // Make sure that there is at least one entry in the array.
2796   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2797   __ asm_assert_ne("array_size must be > 0", 0x205);
2798 
2799   __ z_bru(loop_entry);
2800 
2801   __ bind(loop);
2802 
2803   __ add2reg(frame_sizes_reg, wordSize);
2804   __ add2reg(pcs_reg, wordSize);
2805 
2806   __ bind(loop_entry);
2807 
2808   // Allocate a new frame, fill in the pc.
2809   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2810 
2811   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2812   __ z_brne(loop);
2813 
2814   // Set the top frame's return pc.
2815   __ add2reg(pcs_reg, wordSize);
2816   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2817   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2818   BLOCK_COMMENT("} push_skeleton_frames");
2819 }
2820 
2821 //------------------------------generate_deopt_blob----------------------------
generate_deopt_blob()2822 void SharedRuntime::generate_deopt_blob() {
2823   // Allocate space for the code.
2824   ResourceMark rm;
2825   // Setup code generation tools.
2826   CodeBuffer buffer("deopt_blob", 2048, 1024);
2827   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2828   Label exec_mode_initialized;
2829   OopMap* map = NULL;
2830   OopMapSet *oop_maps = new OopMapSet();
2831 
2832   unsigned int start_off = __ offset();
2833   Label cont;
2834 
2835   // --------------------------------------------------------------------------
2836   // Normal entry (non-exception case)
2837   //
2838   // We have been called from the deopt handler of the deoptee.
2839   // Z_R14 points behind the call in the deopt handler. We adjust
2840   // it such that it points to the start of the deopt handler.
2841   // The return_pc has been stored in the frame of the deoptee and
2842   // will replace the address of the deopt_handler in the call
2843   // to Deoptimization::fetch_unroll_info below.
2844   // The (int) cast is necessary, because -((unsigned int)14)
2845   // is an unsigned int.
2846   __ add2reg(Z_R14, -(int)NativeCall::max_instruction_size());
2847 
2848   const Register   exec_mode_reg = Z_tmp_1;
2849 
2850   // stack: (deoptee, caller of deoptee, ...)
2851 
2852   // pushes an "unpack" frame
2853   // R14 contains the return address pointing into the deoptimized
2854   // nmethod that was valid just before the nmethod was deoptimized.
2855   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2856   // procedure called below will read it from there.
2857   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2858 
2859   // note the entry point.
2860   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2861   __ z_bru(exec_mode_initialized);
2862 
2863 #ifndef COMPILER1
2864   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2865 #else
2866   // --------------------------------------------------------------------------
2867   // Reexecute entry
2868   // - Z_R14 = Deopt Handler in nmethod
2869 
2870   int reexecute_offset = __ offset() - start_off;
2871 
2872   // No need to update map as each call to save_live_registers will produce identical oopmap
2873   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2874 
2875   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2876   __ z_bru(exec_mode_initialized);
2877 #endif
2878 
2879 
2880   // --------------------------------------------------------------------------
2881   // Exception entry. We reached here via a branch. Registers on entry:
2882   // - Z_EXC_OOP (Z_ARG1) = exception oop
2883   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2884 
2885   int exception_offset = __ offset() - start_off;
2886 
2887   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2888   // Z_EXC_PC which contain the exception oop and exception pc
2889   // respectively.  Set them in TLS and fall thru to the
2890   // unpack_with_exception_in_tls entry point.
2891 
2892   // Store exception oop and pc in thread (location known to GC).
2893   // Need this since the call to "fetch_unroll_info()" may safepoint.
2894   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2895   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2896 
2897   // fall through
2898 
2899   int exception_in_tls_offset = __ offset() - start_off;
2900 
2901   // new implementation because exception oop is now passed in JavaThread
2902 
2903   // Prolog for exception case
2904   // All registers must be preserved because they might be used by LinearScan
2905   // Exceptiop oop and throwing PC are passed in JavaThread
2906 
2907   // load throwing pc from JavaThread and us it as the return address of the current frame.
2908   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2909 
2910   // Save everything in sight.
2911   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2912 
2913   // Now it is safe to overwrite any register
2914 
2915   // Clear the exception pc field in JavaThread
2916   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2917 
2918   // Deopt during an exception.  Save exec mode for unpack_frames.
2919   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2920 
2921 
2922 #ifdef ASSERT
2923   // verify that there is really an exception oop in JavaThread
2924   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2925   __ verify_oop(Z_ARG1);
2926 
2927   // verify that there is no pending exception
2928   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2929                              "must not have pending exception here", __LINE__);
2930 #endif
2931 
2932   // --------------------------------------------------------------------------
2933   // At this point, the live registers are saved and
2934   // the exec_mode_reg has been set up correctly.
2935   __ bind(exec_mode_initialized);
2936 
2937   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2938 
2939   {
2940   const Register unroll_block_reg  = Z_tmp_2;
2941 
2942   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2943   // call `last_Java_frame()'.  however we can't block and no gc will
2944   // occur so we don't need an oopmap. the value of the pc in the
2945   // frame is not particularly important.  it just needs to identify the blob.
2946 
2947   // Don't set last_Java_pc anymore here (is implicitly NULL then).
2948   // the correct PC is retrieved in pd_last_frame() in that case.
2949   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2950   // With EscapeAnalysis turned on, this call may safepoint
2951   // despite it's marked as "leaf call"!
2952   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2953   // Set an oopmap for the call site this describes all our saved volatile registers
2954   int offs = __ offset();
2955   oop_maps->add_gc_map(offs, map);
2956 
2957   __ reset_last_Java_frame();
2958   // save the return value.
2959   __ z_lgr(unroll_block_reg, Z_RET);
2960   // restore the return registers that have been saved
2961   // (among other registers) by save_live_registers(...).
2962   RegisterSaver::restore_result_registers(masm);
2963 
2964   // reload the exec mode from the UnrollBlock (it might have changed)
2965   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2966 
2967   // In excp_deopt_mode, restore and clear exception oop which we
2968   // stored in the thread during exception entry above. The exception
2969   // oop will be the return value of this stub.
2970   NearLabel skip_restore_excp;
2971   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2972   __ z_lg(Z_RET, thread_(exception_oop));
2973   __ clear_mem(thread_(exception_oop), 8);
2974   __ bind(skip_restore_excp);
2975 
2976   // remove the "unpack" frame
2977   __ pop_frame();
2978 
2979   // stack: (deoptee, caller of deoptee, ...).
2980 
2981   // pop the deoptee's frame
2982   __ pop_frame();
2983 
2984   // stack: (caller_of_deoptee, ...).
2985 
2986   // loop through the `UnrollBlock' info and create interpreter frames.
2987   push_skeleton_frames(masm, true/*deopt*/,
2988                   unroll_block_reg,
2989                   Z_tmp_3,
2990                   Z_tmp_4,
2991                   Z_ARG5,
2992                   Z_ARG4,
2993                   Z_ARG3);
2994 
2995   // stack: (skeletal interpreter frame, ..., optional skeletal
2996   // interpreter frame, caller of deoptee, ...).
2997   }
2998 
2999   // push an "unpack" frame taking care of float / int return values.
3000   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
3001 
3002   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3003   // skeletal interpreter frame, caller of deoptee, ...).
3004 
3005   // spill live volatile registers since we'll do a call.
3006   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3007   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3008 
3009   // let the unpacker layout information in the skeletal frames just allocated.
3010   __ get_PC(Z_RET);
3011   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
3012   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3013                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
3014 
3015   __ reset_last_Java_frame();
3016 
3017   // restore the volatiles saved above.
3018   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
3019   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
3020 
3021   // pop the "unpack" frame.
3022   __ pop_frame();
3023   __ restore_return_pc();
3024 
3025   // stack: (top interpreter frame, ..., optional interpreter frame,
3026   // caller of deoptee, ...).
3027 
3028   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3029   __ restore_bcp();
3030   __ restore_locals();
3031   __ restore_esp();
3032 
3033   // return to the interpreter entry point.
3034   __ z_br(Z_R14);
3035 
3036   // Make sure all code is generated
3037   masm->flush();
3038 
3039   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3040   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3041 }
3042 
3043 
3044 #ifdef COMPILER2
3045 //------------------------------generate_uncommon_trap_blob--------------------
generate_uncommon_trap_blob()3046 void SharedRuntime::generate_uncommon_trap_blob() {
3047   // Allocate space for the code
3048   ResourceMark rm;
3049   // Setup code generation tools
3050   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3051   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3052 
3053   Register unroll_block_reg = Z_tmp_1;
3054   Register klass_index_reg  = Z_ARG2;
3055   Register unc_trap_reg     = Z_ARG2;
3056 
3057   // stack: (deoptee, caller_of_deoptee, ...).
3058 
3059   // push a dummy "unpack" frame and call
3060   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3061   // vframe array and return the `UnrollBlock' information.
3062 
3063   // save R14 to compiled frame.
3064   __ save_return_pc();
3065   // push the "unpack_frame".
3066   __ push_frame_abi160(0);
3067 
3068   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
3069 
3070   // set the "unpack" frame as last_Java_frame.
3071   // `Deoptimization::uncommon_trap' expects it and considers its
3072   // sender frame as the deoptee frame.
3073   __ get_PC(Z_R1_scratch);
3074   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3075 
3076   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
3077   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
3078   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
3079   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
3080 
3081   __ reset_last_Java_frame();
3082 
3083   // pop the "unpack" frame
3084   __ pop_frame();
3085 
3086   // stack: (deoptee, caller_of_deoptee, ...).
3087 
3088   // save the return value.
3089   __ z_lgr(unroll_block_reg, Z_RET);
3090 
3091   // pop the deoptee frame.
3092   __ pop_frame();
3093 
3094   // stack: (caller_of_deoptee, ...).
3095 
3096 #ifdef ASSERT
3097   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
3098   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
3099   const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
3100 #ifndef VM_LITTLE_ENDIAN
3101   + 3
3102 #endif
3103   ;
3104   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
3105     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3106   } else {
3107     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
3108   }
3109   __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
3110 #endif
3111 
3112   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
3113 
3114   // allocate new interpreter frame(s) and possibly resize the caller's frame
3115   // (no more adapters !)
3116   push_skeleton_frames(masm, false/*deopt*/,
3117                   unroll_block_reg,
3118                   Z_tmp_2,
3119                   Z_tmp_3,
3120                   Z_tmp_4,
3121                   Z_ARG5,
3122                   Z_ARG4);
3123 
3124   // stack: (skeletal interpreter frame, ..., optional skeletal
3125   // interpreter frame, (resized) caller of deoptee, ...).
3126 
3127   // push a dummy "unpack" frame taking care of float return values.
3128   // call `Deoptimization::unpack_frames' to layout information in the
3129   // interpreter frames just created
3130 
3131   // push the "unpack" frame
3132    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
3133 
3134   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3135   // skeletal interpreter frame, (resized) caller of deoptee, ...).
3136 
3137   // set the "unpack" frame as last_Java_frame
3138   __ get_PC(Z_R1_scratch);
3139   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
3140 
3141   // indicate it is the uncommon trap case
3142   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
3143   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3144   // let the unpacker layout information in the skeletal frames just allocated.
3145   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
3146 
3147   __ reset_last_Java_frame();
3148   // pop the "unpack" frame
3149   __ pop_frame();
3150   // restore LR from top interpreter frame
3151   __ restore_return_pc();
3152 
3153   // stack: (top interpreter frame, ..., optional interpreter frame,
3154   // (resized) caller of deoptee, ...).
3155 
3156   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
3157   __ restore_bcp();
3158   __ restore_locals();
3159   __ restore_esp();
3160 
3161   // return to the interpreter entry point
3162   __ z_br(Z_R14);
3163 
3164   masm->flush();
3165   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
3166 }
3167 #endif // COMPILER2
3168 
3169 
3170 //------------------------------generate_handler_blob------
3171 //
3172 // Generate a special Compile2Runtime blob that saves all registers,
3173 // and setup oopmap.
generate_handler_blob(address call_ptr,int poll_type)3174 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3175   assert(StubRoutines::forward_exception_entry() != NULL,
3176          "must be generated before");
3177 
3178   ResourceMark rm;
3179   OopMapSet *oop_maps = new OopMapSet();
3180   OopMap* map;
3181 
3182   // Allocate space for the code. Setup code generation tools.
3183   CodeBuffer buffer("handler_blob", 2048, 1024);
3184   MacroAssembler* masm = new MacroAssembler(&buffer);
3185 
3186   unsigned int start_off = __ offset();
3187   address call_pc = NULL;
3188   int frame_size_in_bytes;
3189 
3190   bool cause_return = (poll_type == POLL_AT_RETURN);
3191   // Make room for return address (or push it again)
3192   if (!cause_return) {
3193     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
3194   }
3195 
3196   // Save registers, fpu state, and flags
3197   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3198 
3199   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3200     // Keep a copy of the return pc to detect if it gets modified.
3201     __ z_lgr(Z_R6, Z_R14);
3202   }
3203 
3204   // The following is basically a call_VM. However, we need the precise
3205   // address of the call in order to generate an oopmap. Hence, we do all the
3206   // work outselves.
3207   __ set_last_Java_frame(Z_SP, noreg);
3208 
3209   // call into the runtime to handle the safepoint poll
3210   __ call_VM_leaf(call_ptr, Z_thread);
3211 
3212 
3213   // Set an oopmap for the call site. This oopmap will map all
3214   // oop-registers and debug-info registers as callee-saved. This
3215   // will allow deoptimization at this safepoint to find all possible
3216   // debug-info recordings, as well as let GC find all oops.
3217 
3218   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
3219 
3220   Label noException;
3221 
3222   __ reset_last_Java_frame();
3223 
3224   __ load_and_test_long(Z_R1, thread_(pending_exception));
3225   __ z_bre(noException);
3226 
3227   // Pending exception case, used (sporadically) by
3228   // api/java_lang/Thread.State/index#ThreadState et al.
3229   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3230 
3231   // Jump to forward_exception_entry, with the issuing PC in Z_R14
3232   // so it looks like the original nmethod called forward_exception_entry.
3233   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3234   __ z_br(Z_R1_scratch);
3235 
3236   // No exception case
3237   __ bind(noException);
3238 
3239   if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
3240     Label no_adjust;
3241      // If our stashed return pc was modified by the runtime we avoid touching it
3242     const int offset_of_return_pc = _z_abi16(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
3243     __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
3244     __ z_brne(no_adjust);
3245 
3246     // Adjust return pc forward to step over the safepoint poll instruction
3247     __ instr_size(Z_R1_scratch, Z_R6);
3248     __ z_agr(Z_R6, Z_R1_scratch);
3249     __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
3250 
3251     __ bind(no_adjust);
3252   }
3253 
3254   // Normal exit, restore registers and exit.
3255   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3256 
3257   __ z_br(Z_R14);
3258 
3259   // Make sure all code is generated
3260   masm->flush();
3261 
3262   // Fill-out other meta info
3263   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
3264 }
3265 
3266 
3267 //
3268 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3269 //
3270 // Generate a stub that calls into vm to find out the proper destination
3271 // of a Java call. All the argument registers are live at this point
3272 // but since this is generic code we don't know what they are and the caller
3273 // must do any gc of the args.
3274 //
generate_resolve_blob(address destination,const char * name)3275 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3276   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3277 
3278   // allocate space for the code
3279   ResourceMark rm;
3280 
3281   CodeBuffer buffer(name, 1000, 512);
3282   MacroAssembler* masm                = new MacroAssembler(&buffer);
3283 
3284   OopMapSet *oop_maps = new OopMapSet();
3285   OopMap* map = NULL;
3286 
3287   unsigned int start_off = __ offset();
3288 
3289   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3290 
3291   // We must save a PC from within the stub as return PC
3292   // C code doesn't store the LR where we expect the PC,
3293   // so we would run into trouble upon stack walking.
3294   __ get_PC(Z_R1_scratch);
3295 
3296   unsigned int frame_complete = __ offset();
3297 
3298   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3299 
3300   __ call_VM_leaf(destination, Z_thread, Z_method);
3301 
3302 
3303   // Set an oopmap for the call site.
3304   // We need this not only for callee-saved registers, but also for volatile
3305   // registers that the compiler might be keeping live across a safepoint.
3306 
3307   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3308 
3309   // clear last_Java_sp
3310   __ reset_last_Java_frame();
3311 
3312   // check for pending exceptions
3313   Label pending;
3314   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3315   __ z_brne(pending);
3316 
3317   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3318   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3319 
3320   // get the returned method
3321   __ get_vm_result_2(Z_method);
3322 
3323   // We are back the the original state on entry and ready to go.
3324   __ z_br(Z_R1_scratch);
3325 
3326   // Pending exception after the safepoint
3327 
3328   __ bind(pending);
3329 
3330   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3331 
3332   // exception pending => remove activation and forward to exception handler
3333 
3334   __ z_lgr(Z_R2, Z_R0); // pending_exception
3335   __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3336   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3337   __ z_br(Z_R1_scratch);
3338 
3339   // -------------
3340   // make sure all code is generated
3341   masm->flush();
3342 
3343   // return the blob
3344   // frame_size_words or bytes??
3345   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3346                                        oop_maps, true);
3347 
3348 }
3349 
3350 //------------------------------Montgomery multiplication------------------------
3351 //
3352 
3353 // Subtract 0:b from carry:a. Return carry.
3354 static unsigned long
sub(unsigned long a[],unsigned long b[],unsigned long carry,long len)3355 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3356   unsigned long i, c = 8 * (unsigned long)(len - 1);
3357   __asm__ __volatile__ (
3358     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3359     "LGHI   0, 8               \n" // index increment (for BRXLG)
3360     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3361     "0:                        \n"
3362     "LG     %[c], 0(%[i],%[a]) \n"
3363     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3364     "STG    %[c], 0(%[i],%[a]) \n"
3365     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3366     "SLBGR  %[c], %[c]         \n" // save carry - 1
3367     : [i]"=&a"(i), [c]"+r"(c)
3368     : [a]"a"(a), [b]"a"(b)
3369     : "cc", "memory", "r0", "r1"
3370  );
3371   return carry + c;
3372 }
3373 
3374 // Multiply (unsigned) Long A by Long B, accumulating the double-
3375 // length result into the accumulator formed of T0, T1, and T2.
MACC(unsigned long A[],long A_ind,unsigned long B[],long B_ind,unsigned long & T0,unsigned long & T1,unsigned long & T2)3376 inline void MACC(unsigned long A[], long A_ind,
3377                  unsigned long B[], long B_ind,
3378                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3379   long A_si = 8 * A_ind,
3380        B_si = 8 * B_ind;
3381   __asm__ __volatile__ (
3382     "LG     1, 0(%[A_si],%[A]) \n"
3383     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3384     "ALGR   %[T0], 1           \n"
3385     "LGHI   1, 0               \n" // r1 = 0
3386     "ALCGR  %[T1], 0           \n"
3387     "ALCGR  %[T2], 1           \n"
3388     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3389     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3390     : "cc", "r0", "r1"
3391  );
3392 }
3393 
3394 // As above, but add twice the double-length result into the
3395 // accumulator.
MACC2(unsigned long A[],long A_ind,unsigned long B[],long B_ind,unsigned long & T0,unsigned long & T1,unsigned long & T2)3396 inline void MACC2(unsigned long A[], long A_ind,
3397                   unsigned long B[], long B_ind,
3398                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3399   const unsigned long zero = 0;
3400   long A_si = 8 * A_ind,
3401        B_si = 8 * B_ind;
3402   __asm__ __volatile__ (
3403     "LG     1, 0(%[A_si],%[A]) \n"
3404     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3405     "ALGR   %[T0], 1           \n"
3406     "ALCGR  %[T1], 0           \n"
3407     "ALCGR  %[T2], %[zero]     \n"
3408     "ALGR   %[T0], 1           \n"
3409     "ALCGR  %[T1], 0           \n"
3410     "ALCGR  %[T2], %[zero]     \n"
3411     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3412     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3413     : "cc", "r0", "r1"
3414  );
3415 }
3416 
3417 // Fast Montgomery multiplication. The derivation of the algorithm is
3418 // in "A Cryptographic Library for the Motorola DSP56000,
3419 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3420 static void
montgomery_multiply(unsigned long a[],unsigned long b[],unsigned long n[],unsigned long m[],unsigned long inv,int len)3421 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3422                     unsigned long m[], unsigned long inv, int len) {
3423   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3424   int i;
3425 
3426   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3427 
3428   for (i = 0; i < len; i++) {
3429     int j;
3430     for (j = 0; j < i; j++) {
3431       MACC(a, j, b, i-j, t0, t1, t2);
3432       MACC(m, j, n, i-j, t0, t1, t2);
3433     }
3434     MACC(a, i, b, 0, t0, t1, t2);
3435     m[i] = t0 * inv;
3436     MACC(m, i, n, 0, t0, t1, t2);
3437 
3438     assert(t0 == 0, "broken Montgomery multiply");
3439 
3440     t0 = t1; t1 = t2; t2 = 0;
3441   }
3442 
3443   for (i = len; i < 2 * len; i++) {
3444     int j;
3445     for (j = i - len + 1; j < len; j++) {
3446       MACC(a, j, b, i-j, t0, t1, t2);
3447       MACC(m, j, n, i-j, t0, t1, t2);
3448     }
3449     m[i-len] = t0;
3450     t0 = t1; t1 = t2; t2 = 0;
3451   }
3452 
3453   while (t0) {
3454     t0 = sub(m, n, t0, len);
3455   }
3456 }
3457 
3458 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3459 // multiplies so it should be up to 25% faster than Montgomery
3460 // multiplication. However, its loop control is more complex and it
3461 // may actually run slower on some machines.
3462 static void
montgomery_square(unsigned long a[],unsigned long n[],unsigned long m[],unsigned long inv,int len)3463 montgomery_square(unsigned long a[], unsigned long n[],
3464                   unsigned long m[], unsigned long inv, int len) {
3465   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3466   int i;
3467 
3468   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3469 
3470   for (i = 0; i < len; i++) {
3471     int j;
3472     int end = (i+1)/2;
3473     for (j = 0; j < end; j++) {
3474       MACC2(a, j, a, i-j, t0, t1, t2);
3475       MACC(m, j, n, i-j, t0, t1, t2);
3476     }
3477     if ((i & 1) == 0) {
3478       MACC(a, j, a, j, t0, t1, t2);
3479     }
3480     for (; j < i; j++) {
3481       MACC(m, j, n, i-j, t0, t1, t2);
3482     }
3483     m[i] = t0 * inv;
3484     MACC(m, i, n, 0, t0, t1, t2);
3485 
3486     assert(t0 == 0, "broken Montgomery square");
3487 
3488     t0 = t1; t1 = t2; t2 = 0;
3489   }
3490 
3491   for (i = len; i < 2*len; i++) {
3492     int start = i-len+1;
3493     int end = start + (len - start)/2;
3494     int j;
3495     for (j = start; j < end; j++) {
3496       MACC2(a, j, a, i-j, t0, t1, t2);
3497       MACC(m, j, n, i-j, t0, t1, t2);
3498     }
3499     if ((i & 1) == 0) {
3500       MACC(a, j, a, j, t0, t1, t2);
3501     }
3502     for (; j < len; j++) {
3503       MACC(m, j, n, i-j, t0, t1, t2);
3504     }
3505     m[i-len] = t0;
3506     t0 = t1; t1 = t2; t2 = 0;
3507   }
3508 
3509   while (t0) {
3510     t0 = sub(m, n, t0, len);
3511   }
3512 }
3513 
3514 // The threshold at which squaring is advantageous was determined
3515 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3516 // Value seems to be ok for other platforms, too.
3517 #define MONTGOMERY_SQUARING_THRESHOLD 64
3518 
3519 // Copy len longwords from s to d, word-swapping as we go. The
3520 // destination array is reversed.
reverse_words(unsigned long * s,unsigned long * d,int len)3521 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3522   d += len;
3523   while(len-- > 0) {
3524     d--;
3525     unsigned long s_val = *s;
3526     // Swap words in a longword on little endian machines.
3527 #ifdef VM_LITTLE_ENDIAN
3528      Unimplemented();
3529 #endif
3530     *d = s_val;
3531     s++;
3532   }
3533 }
3534 
montgomery_multiply(jint * a_ints,jint * b_ints,jint * n_ints,jint len,jlong inv,jint * m_ints)3535 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3536                                         jint len, jlong inv,
3537                                         jint *m_ints) {
3538   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3539   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3540   int longwords = len/2;
3541 
3542   // Make very sure we don't use so much space that the stack might
3543   // overflow. 512 jints corresponds to an 16384-bit integer and
3544   // will use here a total of 8k bytes of stack space.
3545   int total_allocation = longwords * sizeof (unsigned long) * 4;
3546   guarantee(total_allocation <= 8192, "must be");
3547   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3548 
3549   // Local scratch arrays
3550   unsigned long
3551     *a = scratch + 0 * longwords,
3552     *b = scratch + 1 * longwords,
3553     *n = scratch + 2 * longwords,
3554     *m = scratch + 3 * longwords;
3555 
3556   reverse_words((unsigned long *)a_ints, a, longwords);
3557   reverse_words((unsigned long *)b_ints, b, longwords);
3558   reverse_words((unsigned long *)n_ints, n, longwords);
3559 
3560   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3561 
3562   reverse_words(m, (unsigned long *)m_ints, longwords);
3563 }
3564 
montgomery_square(jint * a_ints,jint * n_ints,jint len,jlong inv,jint * m_ints)3565 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3566                                       jint len, jlong inv,
3567                                       jint *m_ints) {
3568   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3569   assert(len % 2 == 0, "array length in montgomery_square must be even");
3570   int longwords = len/2;
3571 
3572   // Make very sure we don't use so much space that the stack might
3573   // overflow. 512 jints corresponds to an 16384-bit integer and
3574   // will use here a total of 6k bytes of stack space.
3575   int total_allocation = longwords * sizeof (unsigned long) * 3;
3576   guarantee(total_allocation <= 8192, "must be");
3577   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3578 
3579   // Local scratch arrays
3580   unsigned long
3581     *a = scratch + 0 * longwords,
3582     *n = scratch + 1 * longwords,
3583     *m = scratch + 2 * longwords;
3584 
3585   reverse_words((unsigned long *)a_ints, a, longwords);
3586   reverse_words((unsigned long *)n_ints, n, longwords);
3587 
3588   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3589     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3590   } else {
3591     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3592   }
3593 
3594   reverse_words(m, (unsigned long *)m_ints, longwords);
3595 }
3596 
3597 extern "C"
SpinPause()3598 int SpinPause() {
3599   return 0;
3600 }
3601