1 /*
2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "compiler/disassembler.hpp"
28 #include "interpreter/interp_masm.hpp"
29 #include "interpreter/interpreter.hpp"
30 #include "interpreter/interpreterRuntime.hpp"
31 #include "interpreter/templateInterpreterGenerator.hpp"
32 #include "runtime/arguments.hpp"
33 #include "runtime/sharedRuntime.hpp"
34 
35 #define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->
36 
37 #ifdef _WIN64
generate_slow_signature_handler()38 address TemplateInterpreterGenerator::generate_slow_signature_handler() {
39   address entry = __ pc();
40 
41   // rbx: method
42   // r14: pointer to locals
43   // c_rarg3: first stack arg - wordSize
44   __ mov(c_rarg3, rsp);
45   // adjust rsp
46   __ subptr(rsp, 4 * wordSize);
47   __ call_VM(noreg,
48              CAST_FROM_FN_PTR(address,
49                               InterpreterRuntime::slow_signature_handler),
50              rbx, r14, c_rarg3);
51 
52   // rax: result handler
53 
54   // Stack layout:
55   // rsp: 3 integer or float args (if static first is unused)
56   //      1 float/double identifiers
57   //        return address
58   //        stack args
59   //        garbage
60   //        expression stack bottom
61   //        bcp (NULL)
62   //        ...
63 
64   // Do FP first so we can use c_rarg3 as temp
65   __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers
66 
67   for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) {
68     XMMRegister floatreg = as_XMMRegister(i+1);
69     Label isfloatordouble, isdouble, next;
70 
71     __ testl(c_rarg3, 1 << (i*2));      // Float or Double?
72     __ jcc(Assembler::notZero, isfloatordouble);
73 
74     // Do Int register here
75     switch ( i ) {
76       case 0:
77         __ movl(rscratch1, Address(rbx, Method::access_flags_offset()));
78         __ testl(rscratch1, JVM_ACC_STATIC);
79         __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
80         break;
81       case 1:
82         __ movptr(c_rarg2, Address(rsp, wordSize));
83         break;
84       case 2:
85         __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
86         break;
87       default:
88         break;
89     }
90 
91     __ jmp (next);
92 
93     __ bind(isfloatordouble);
94     __ testl(c_rarg3, 1 << ((i*2)+1));     // Double?
95     __ jcc(Assembler::notZero, isdouble);
96 
97 // Do Float Here
98     __ movflt(floatreg, Address(rsp, i * wordSize));
99     __ jmp(next);
100 
101 // Do Double here
102     __ bind(isdouble);
103     __ movdbl(floatreg, Address(rsp, i * wordSize));
104 
105     __ bind(next);
106   }
107 
108 
109   // restore rsp
110   __ addptr(rsp, 4 * wordSize);
111 
112   __ ret(0);
113 
114   return entry;
115 }
116 #else
generate_slow_signature_handler()117 address TemplateInterpreterGenerator::generate_slow_signature_handler() {
118   address entry = __ pc();
119 
120   // rbx: method
121   // r14: pointer to locals
122   // c_rarg3: first stack arg - wordSize
123   __ mov(c_rarg3, rsp);
124   // adjust rsp
125   __ subptr(rsp, 14 * wordSize);
126   __ call_VM(noreg,
127              CAST_FROM_FN_PTR(address,
128                               InterpreterRuntime::slow_signature_handler),
129              rbx, r14, c_rarg3);
130 
131   // rax: result handler
132 
133   // Stack layout:
134   // rsp: 5 integer args (if static first is unused)
135   //      1 float/double identifiers
136   //      8 double args
137   //        return address
138   //        stack args
139   //        garbage
140   //        expression stack bottom
141   //        bcp (NULL)
142   //        ...
143 
144   // Do FP first so we can use c_rarg3 as temp
145   __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers
146 
147   for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
148     const XMMRegister r = as_XMMRegister(i);
149 
150     Label d, done;
151 
152     __ testl(c_rarg3, 1 << i);
153     __ jcc(Assembler::notZero, d);
154     __ movflt(r, Address(rsp, (6 + i) * wordSize));
155     __ jmp(done);
156     __ bind(d);
157     __ movdbl(r, Address(rsp, (6 + i) * wordSize));
158     __ bind(done);
159   }
160 
161   // Now handle integrals.  Only do c_rarg1 if not static.
162   __ movl(c_rarg3, Address(rbx, Method::access_flags_offset()));
163   __ testl(c_rarg3, JVM_ACC_STATIC);
164   __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
165 
166   __ movptr(c_rarg2, Address(rsp, wordSize));
167   __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
168   __ movptr(c_rarg4, Address(rsp, 3 * wordSize));
169   __ movptr(c_rarg5, Address(rsp, 4 * wordSize));
170 
171   // restore rsp
172   __ addptr(rsp, 14 * wordSize);
173 
174   __ ret(0);
175 
176   return entry;
177 }
178 #endif  // __WIN64
179 
180 /**
181  * Method entry for static native methods:
182  *   int java.util.zip.CRC32.update(int crc, int b)
183  */
generate_CRC32_update_entry()184 address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
185   if (UseCRC32Intrinsics) {
186     address entry = __ pc();
187 
188     // rbx,: Method*
189     // r13: senderSP must preserved for slow path, set SP to it on fast path
190     // c_rarg0: scratch (rdi on non-Win64, rcx on Win64)
191     // c_rarg1: scratch (rsi on non-Win64, rdx on Win64)
192 
193     Label slow_path;
194     __ safepoint_poll(slow_path, r15_thread, rscratch1);
195 
196     // We don't generate local frame and don't align stack because
197     // we call stub code and there is no safepoint on this path.
198 
199     // Load parameters
200     const Register crc = rax;  // crc
201     const Register val = c_rarg0;  // source java byte value
202     const Register tbl = c_rarg1;  // scratch
203 
204     // Arguments are reversed on java expression stack
205     __ movl(val, Address(rsp,   wordSize)); // byte value
206     __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
207 
208     __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
209     __ notl(crc); // ~crc
210     __ update_byte_crc32(crc, val, tbl);
211     __ notl(crc); // ~crc
212     // result in rax
213 
214     // _areturn
215     __ pop(rdi);                // get return address
216     __ mov(rsp, r13);           // set sp to sender sp
217     __ jmp(rdi);
218 
219     // generate a vanilla native entry as the slow path
220     __ bind(slow_path);
221     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
222     return entry;
223   }
224   return NULL;
225 }
226 
227 /**
228  * Method entry for static native methods:
229  *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
230  *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
231  */
generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind)232 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
233   if (UseCRC32Intrinsics) {
234     address entry = __ pc();
235 
236     // rbx,: Method*
237     // r13: senderSP must preserved for slow path, set SP to it on fast path
238 
239     Label slow_path;
240     __ safepoint_poll(slow_path, r15_thread, rscratch1);
241 
242     // We don't generate local frame and don't align stack because
243     // we call stub code and there is no safepoint on this path.
244 
245     // Load parameters
246     const Register crc = c_rarg0;  // crc
247     const Register buf = c_rarg1;  // source java byte array address
248     const Register len = c_rarg2;  // length
249     const Register off = len;      // offset (never overlaps with 'len')
250 
251     // Arguments are reversed on java expression stack
252     // Calculate address of start element
253     if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
254       __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
255       __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
256       __ addq(buf, off); // + offset
257       __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
258     } else {
259       __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
260       __ resolve(IS_NOT_NULL | ACCESS_READ, buf);
261       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
262       __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
263       __ addq(buf, off); // + offset
264       __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
265     }
266     // Can now load 'len' since we're finished with 'off'
267     __ movl(len, Address(rsp, wordSize)); // Length
268 
269     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
270     // result in rax
271 
272     // _areturn
273     __ pop(rdi);                // get return address
274     __ mov(rsp, r13);           // set sp to sender sp
275     __ jmp(rdi);
276 
277     // generate a vanilla native entry as the slow path
278     __ bind(slow_path);
279     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
280     return entry;
281   }
282   return NULL;
283 }
284 
285 /**
286 * Method entry for static (non-native) methods:
287 *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
288 *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
289 */
generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind)290 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
291   if (UseCRC32CIntrinsics) {
292     address entry = __ pc();
293     // Load parameters
294     const Register crc = c_rarg0;  // crc
295     const Register buf = c_rarg1;  // source java byte array address
296     const Register len = c_rarg2;
297     const Register off = c_rarg3;  // offset
298     const Register end = len;
299 
300     // Arguments are reversed on java expression stack
301     // Calculate address of start element
302     if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
303       __ movptr(buf, Address(rsp, 3 * wordSize)); // long address
304       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
305       __ addq(buf, off); // + offset
306       __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC
307       // Note on 5 * wordSize vs. 4 * wordSize:
308       // *   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
309       //                                                   4         2,3          1        0
310       // end starts at SP + 8
311       // The Java(R) Virtual Machine Specification Java SE 7 Edition
312       // 4.10.2.3. Values of Types long and double
313       //    "When calculating operand stack length, values of type long and double have length two."
314     } else {
315       __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array
316       __ resolve(IS_NOT_NULL | ACCESS_READ, buf);
317       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
318       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
319       __ addq(buf, off); // + offset
320       __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC
321     }
322     __ movl(end, Address(rsp, wordSize)); // end
323     __ subl(end, off); // end - off
324     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
325     // result in rax
326     // _areturn
327     __ pop(rdi);                // get return address
328     __ mov(rsp, r13);           // set sp to sender sp
329     __ jmp(rdi);
330 
331     return entry;
332   }
333 
334   return NULL;
335 }
336 
337 //
338 // Various method entries
339 //
340 
generate_math_entry(AbstractInterpreter::MethodKind kind)341 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
342 
343   // rbx,: Method*
344   // rcx: scratrch
345   // r13: sender sp
346 
347   if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
348 
349   address entry_point = __ pc();
350 
351   // These don't need a safepoint check because they aren't virtually
352   // callable. We won't enter these intrinsics from compiled code.
353   // If in the future we added an intrinsic which was virtually callable
354   // we'd have to worry about how to safepoint so that this code is used.
355 
356   // mathematical functions inlined by compiler
357   // (interpreter must provide identical implementation
358   // in order to avoid monotonicity bugs when switching
359   // from interpreter to compiler in the middle of some
360   // computation)
361   //
362   // stack: [ ret adr ] <-- rsp
363   //        [ lo(arg) ]
364   //        [ hi(arg) ]
365   //
366 
367   if (kind == Interpreter::java_lang_math_fmaD) {
368     if (!UseFMA) {
369       return NULL; // Generate a vanilla entry
370     }
371     __ movdbl(xmm0, Address(rsp, wordSize));
372     __ movdbl(xmm1, Address(rsp, 3 * wordSize));
373     __ movdbl(xmm2, Address(rsp, 5 * wordSize));
374     __ fmad(xmm0, xmm1, xmm2, xmm0);
375   } else if (kind == Interpreter::java_lang_math_fmaF) {
376     if (!UseFMA) {
377       return NULL; // Generate a vanilla entry
378     }
379     __ movflt(xmm0, Address(rsp, wordSize));
380     __ movflt(xmm1, Address(rsp, 2 * wordSize));
381     __ movflt(xmm2, Address(rsp, 3 * wordSize));
382     __ fmaf(xmm0, xmm1, xmm2, xmm0);
383   } else if (kind == Interpreter::java_lang_math_sqrt) {
384     __ sqrtsd(xmm0, Address(rsp, wordSize));
385   } else if (kind == Interpreter::java_lang_math_exp) {
386     __ movdbl(xmm0, Address(rsp, wordSize));
387     if (StubRoutines::dexp() != NULL) {
388       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
389     } else {
390       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
391     }
392   } else if (kind == Interpreter::java_lang_math_log) {
393     __ movdbl(xmm0, Address(rsp, wordSize));
394     if (StubRoutines::dlog() != NULL) {
395       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
396     } else {
397       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
398     }
399   } else if (kind == Interpreter::java_lang_math_log10) {
400     __ movdbl(xmm0, Address(rsp, wordSize));
401     if (StubRoutines::dlog10() != NULL) {
402       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
403     } else {
404       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
405     }
406   } else if (kind == Interpreter::java_lang_math_sin) {
407     __ movdbl(xmm0, Address(rsp, wordSize));
408     if (StubRoutines::dsin() != NULL) {
409       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
410     } else {
411       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
412     }
413   } else if (kind == Interpreter::java_lang_math_cos) {
414     __ movdbl(xmm0, Address(rsp, wordSize));
415     if (StubRoutines::dcos() != NULL) {
416       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
417     } else {
418       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
419     }
420   } else if (kind == Interpreter::java_lang_math_pow) {
421     __ movdbl(xmm1, Address(rsp, wordSize));
422     __ movdbl(xmm0, Address(rsp, 3 * wordSize));
423     if (StubRoutines::dpow() != NULL) {
424       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
425     } else {
426       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
427     }
428   } else if (kind == Interpreter::java_lang_math_tan) {
429     __ movdbl(xmm0, Address(rsp, wordSize));
430     if (StubRoutines::dtan() != NULL) {
431       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
432     } else {
433       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
434     }
435   } else {
436     __ fld_d(Address(rsp, wordSize));
437     switch (kind) {
438     case Interpreter::java_lang_math_abs:
439       __ fabs();
440       break;
441     default:
442       ShouldNotReachHere();
443     }
444 
445     // return double result in xmm0 for interpreter and compilers.
446     __ subptr(rsp, 2*wordSize);
447     // Round to 64bit precision
448     __ fstp_d(Address(rsp, 0));
449     __ movdbl(xmm0, Address(rsp, 0));
450     __ addptr(rsp, 2*wordSize);
451   }
452 
453 
454   __ pop(rax);
455   __ mov(rsp, r13);
456   __ jmp(rax);
457 
458   return entry_point;
459 }
460 
461