1#
2# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4#
5# This code is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License version 2 only, as
7# published by the Free Software Foundation.
8#
9# This code is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12# version 2 for more details (a copy is included in the LICENSE file that
13# accompanied this code).
14#
15# You should have received a copy of the GNU General Public License version
16# 2 along with this work; if not, write to the Free Software Foundation,
17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18#
19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20# or visit www.oracle.com if you need additional information or have any
21# questions.
22#
23
24#ifdef __APPLE__
25# Darwin uses _ prefixed global symbols
26#define SYMBOL(s) _ ## s
27#define ELF_TYPE(name, description)
28#else
29#define SYMBOL(s) s
30#define ELF_TYPE(name, description) .type name,description
31#endif
32
33        # NOTE WELL!  The _Copy functions are called directly
34	# from server-compiler-generated code via CallLeafNoFP,
35	# which means that they *must* either not use floating
36	# point or use it in the same manner as does the server
37	# compiler.
38
39        .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
40	.globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
41        .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
42        .globl SYMBOL(_Copy_arrayof_conjoint_jints)
43        .globl SYMBOL(_Copy_conjoint_jints_atomic)
44        .globl SYMBOL(_Copy_arrayof_conjoint_jlongs)
45        .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
46
47	.text
48
49        .globl SYMBOL(SpinPause)
50        .p2align 4,,15
51        ELF_TYPE(SpinPause,@function)
52SYMBOL(SpinPause):
53        rep
54        nop
55        movq   $1, %rax
56        ret
57
58        # Support for void Copy::arrayof_conjoint_bytes(void* from,
59        #                                               void* to,
60        #                                               size_t count)
61        # rdi - from
62        # rsi - to
63        # rdx - count, treated as ssize_t
64        #
65        .p2align 4,,15
66	ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
67SYMBOL(_Copy_arrayof_conjoint_bytes):
68        movq     %rdx,%r8             # byte count
69        shrq     $3,%rdx              # qword count
70        cmpq     %rdi,%rsi
71        leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
72        jbe      acb_CopyRight
73        cmpq     %rax,%rsi
74        jbe      acb_CopyLeft
75acb_CopyRight:
76        leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
77        leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
78        negq     %rdx
79        jmp      7f
80        .p2align 4,,15
811:      movq     8(%rax,%rdx,8),%rsi
82        movq     %rsi,8(%rcx,%rdx,8)
83        addq     $1,%rdx
84        jnz      1b
852:      testq    $4,%r8               # check for trailing dword
86        jz       3f
87        movl     8(%rax),%esi         # copy trailing dword
88        movl     %esi,8(%rcx)
89        addq     $4,%rax
90        addq     $4,%rcx              # original %rsi is trashed, so we
91                                      #  can't use it as a base register
923:      testq    $2,%r8               # check for trailing word
93        jz       4f
94        movw     8(%rax),%si          # copy trailing word
95        movw     %si,8(%rcx)
96        addq     $2,%rcx
974:      testq    $1,%r8               # check for trailing byte
98        jz       5f
99        movb     -1(%rdi,%r8,1),%al   # copy trailing byte
100        movb     %al,8(%rcx)
1015:      ret
102        .p2align 4,,15
1036:      movq     -24(%rax,%rdx,8),%rsi
104        movq     %rsi,-24(%rcx,%rdx,8)
105        movq     -16(%rax,%rdx,8),%rsi
106        movq     %rsi,-16(%rcx,%rdx,8)
107        movq     -8(%rax,%rdx,8),%rsi
108        movq     %rsi,-8(%rcx,%rdx,8)
109        movq     (%rax,%rdx,8),%rsi
110        movq     %rsi,(%rcx,%rdx,8)
1117:      addq     $4,%rdx
112        jle      6b
113        subq     $4,%rdx
114        jl       1b
115        jmp      2b
116acb_CopyLeft:
117        testq    $1,%r8               # check for trailing byte
118        jz       1f
119        movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
120        movb     %cl,-1(%rsi,%r8,1)
121        subq     $1,%r8               # adjust for possible trailing word
1221:      testq    $2,%r8               # check for trailing word
123        jz       2f
124        movw     -2(%rdi,%r8,1),%cx   # copy trailing word
125        movw     %cx,-2(%rsi,%r8,1)
1262:      testq    $4,%r8               # check for trailing dword
127        jz       5f
128        movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
129        movl     %ecx,(%rsi,%rdx,8)
130        jmp      5f
131        .p2align 4,,15
1323:      movq     -8(%rdi,%rdx,8),%rcx
133        movq     %rcx,-8(%rsi,%rdx,8)
134        subq     $1,%rdx
135        jnz      3b
136        ret
137        .p2align 4,,15
1384:      movq     24(%rdi,%rdx,8),%rcx
139        movq     %rcx,24(%rsi,%rdx,8)
140        movq     16(%rdi,%rdx,8),%rcx
141        movq     %rcx,16(%rsi,%rdx,8)
142        movq     8(%rdi,%rdx,8),%rcx
143        movq     %rcx,8(%rsi,%rdx,8)
144        movq     (%rdi,%rdx,8),%rcx
145        movq     %rcx,(%rsi,%rdx,8)
1465:      subq     $4,%rdx
147        jge      4b
148        addq     $4,%rdx
149        jg       3b
150        ret
151
152        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
153        #                                                 void* to,
154        #                                                 size_t count)
155        # Equivalent to
156        #   conjoint_jshorts_atomic
157        #
158        # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
159        # let the hardware handle it.  The tow or four words within dwords
160        # or qwords that span cache line boundaries will still be loaded
161        # and stored atomically.
162        #
163        # rdi - from
164        # rsi - to
165        # rdx - count, treated as ssize_t
166        #
167        .p2align 4,,15
168	ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
169	ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
170SYMBOL(_Copy_arrayof_conjoint_jshorts):
171SYMBOL(_Copy_conjoint_jshorts_atomic):
172        movq     %rdx,%r8             # word count
173        shrq     $2,%rdx              # qword count
174        cmpq     %rdi,%rsi
175        leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
176        jbe      acs_CopyRight
177        cmpq     %rax,%rsi
178        jbe      acs_CopyLeft
179acs_CopyRight:
180        leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
181        leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
182        negq     %rdx
183        jmp      6f
1841:      movq     8(%rax,%rdx,8),%rsi
185        movq     %rsi,8(%rcx,%rdx,8)
186        addq     $1,%rdx
187        jnz      1b
1882:      testq    $2,%r8               # check for trailing dword
189        jz       3f
190        movl     8(%rax),%esi         # copy trailing dword
191        movl     %esi,8(%rcx)
192        addq     $4,%rcx              # original %rsi is trashed, so we
193                                      #  can't use it as a base register
1943:      testq    $1,%r8               # check for trailing word
195        jz       4f
196        movw     -2(%rdi,%r8,2),%si   # copy trailing word
197        movw     %si,8(%rcx)
1984:      ret
199        .p2align 4,,15
2005:      movq     -24(%rax,%rdx,8),%rsi
201        movq     %rsi,-24(%rcx,%rdx,8)
202        movq     -16(%rax,%rdx,8),%rsi
203        movq     %rsi,-16(%rcx,%rdx,8)
204        movq     -8(%rax,%rdx,8),%rsi
205        movq     %rsi,-8(%rcx,%rdx,8)
206        movq     (%rax,%rdx,8),%rsi
207        movq     %rsi,(%rcx,%rdx,8)
2086:      addq     $4,%rdx
209        jle      5b
210        subq     $4,%rdx
211        jl       1b
212        jmp      2b
213acs_CopyLeft:
214        testq    $1,%r8               # check for trailing word
215        jz       1f
216        movw     -2(%rdi,%r8,2),%cx   # copy trailing word
217        movw     %cx,-2(%rsi,%r8,2)
2181:      testq    $2,%r8               # check for trailing dword
219        jz       4f
220        movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
221        movl     %ecx,(%rsi,%rdx,8)
222        jmp      4f
2232:      movq     -8(%rdi,%rdx,8),%rcx
224        movq     %rcx,-8(%rsi,%rdx,8)
225        subq     $1,%rdx
226        jnz      2b
227        ret
228        .p2align 4,,15
2293:      movq     24(%rdi,%rdx,8),%rcx
230        movq     %rcx,24(%rsi,%rdx,8)
231        movq     16(%rdi,%rdx,8),%rcx
232        movq     %rcx,16(%rsi,%rdx,8)
233        movq     8(%rdi,%rdx,8),%rcx
234        movq     %rcx,8(%rsi,%rdx,8)
235        movq     (%rdi,%rdx,8),%rcx
236        movq     %rcx,(%rsi,%rdx,8)
2374:      subq     $4,%rdx
238        jge      3b
239        addq     $4,%rdx
240        jg       2b
241        ret
242
243        # Support for void Copy::arrayof_conjoint_jints(jint* from,
244        #                                               jint* to,
245        #                                               size_t count)
246        # Equivalent to
247        #   conjoint_jints_atomic
248        #
249        # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
250        # the hardware handle it.  The two dwords within qwords that span
251        # cache line boundaries will still be loaded and stored atomically.
252        #
253        # rdi - from
254        # rsi - to
255        # rdx - count, treated as ssize_t
256        #
257        .p2align 4,,15
258	ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
259	ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
260SYMBOL(_Copy_arrayof_conjoint_jints):
261SYMBOL(_Copy_conjoint_jints_atomic):
262        movq     %rdx,%r8             # dword count
263        shrq     %rdx                 # qword count
264        cmpq     %rdi,%rsi
265        leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
266        jbe      aci_CopyRight
267        cmpq     %rax,%rsi
268        jbe      aci_CopyLeft
269aci_CopyRight:
270        leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
271        leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
272        negq     %rdx
273        jmp      5f
274        .p2align 4,,15
2751:      movq     8(%rax,%rdx,8),%rsi
276        movq     %rsi,8(%rcx,%rdx,8)
277        addq     $1,%rdx
278        jnz       1b
2792:      testq    $1,%r8               # check for trailing dword
280        jz       3f
281        movl     8(%rax),%esi         # copy trailing dword
282        movl     %esi,8(%rcx)
2833:      ret
284        .p2align 4,,15
2854:      movq     -24(%rax,%rdx,8),%rsi
286        movq     %rsi,-24(%rcx,%rdx,8)
287        movq     -16(%rax,%rdx,8),%rsi
288        movq     %rsi,-16(%rcx,%rdx,8)
289        movq     -8(%rax,%rdx,8),%rsi
290        movq     %rsi,-8(%rcx,%rdx,8)
291        movq     (%rax,%rdx,8),%rsi
292        movq     %rsi,(%rcx,%rdx,8)
2935:      addq     $4,%rdx
294        jle      4b
295        subq     $4,%rdx
296        jl       1b
297        jmp      2b
298aci_CopyLeft:
299        testq    $1,%r8               # check for trailing dword
300        jz       3f
301        movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
302        movl     %ecx,-4(%rsi,%r8,4)
303        jmp      3f
3041:      movq     -8(%rdi,%rdx,8),%rcx
305        movq     %rcx,-8(%rsi,%rdx,8)
306        subq     $1,%rdx
307        jnz      1b
308        ret
309        .p2align 4,,15
3102:      movq     24(%rdi,%rdx,8),%rcx
311        movq     %rcx,24(%rsi,%rdx,8)
312        movq     16(%rdi,%rdx,8),%rcx
313        movq     %rcx,16(%rsi,%rdx,8)
314        movq     8(%rdi,%rdx,8),%rcx
315        movq     %rcx,8(%rsi,%rdx,8)
316        movq     (%rdi,%rdx,8),%rcx
317        movq     %rcx,(%rsi,%rdx,8)
3183:      subq     $4,%rdx
319        jge      2b
320        addq     $4,%rdx
321        jg       1b
322        ret
323
324        # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
325        #                                                jlong* to,
326        #                                                size_t count)
327        # Equivalent to
328        #   conjoint_jlongs_atomic
329        #   arrayof_conjoint_oops
330        #   conjoint_oops_atomic
331        #
332        # rdi - from
333        # rsi - to
334        # rdx - count, treated as ssize_t
335        #
336        .p2align 4,,15
337	ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function)
338	ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
339SYMBOL(_Copy_arrayof_conjoint_jlongs):
340SYMBOL(_Copy_conjoint_jlongs_atomic):
341        cmpq     %rdi,%rsi
342        leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
343        jbe      acl_CopyRight
344        cmpq     %rax,%rsi
345        jbe      acl_CopyLeft
346acl_CopyRight:
347        leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
348        negq     %rdx
349        jmp      3f
3501:      movq     8(%rax,%rdx,8),%rsi
351        movq     %rsi,8(%rcx,%rdx,8)
352        addq     $1,%rdx
353        jnz      1b
354        ret
355        .p2align 4,,15
3562:      movq     -24(%rax,%rdx,8),%rsi
357        movq     %rsi,-24(%rcx,%rdx,8)
358        movq     -16(%rax,%rdx,8),%rsi
359        movq     %rsi,-16(%rcx,%rdx,8)
360        movq     -8(%rax,%rdx,8),%rsi
361        movq     %rsi,-8(%rcx,%rdx,8)
362        movq     (%rax,%rdx,8),%rsi
363        movq     %rsi,(%rcx,%rdx,8)
3643:      addq     $4,%rdx
365        jle      2b
366        subq     $4,%rdx
367        jl       1b
368        ret
3694:      movq     -8(%rdi,%rdx,8),%rcx
370        movq     %rcx,-8(%rsi,%rdx,8)
371        subq     $1,%rdx
372        jnz      4b
373        ret
374        .p2align 4,,15
3755:      movq     24(%rdi,%rdx,8),%rcx
376        movq     %rcx,24(%rsi,%rdx,8)
377        movq     16(%rdi,%rdx,8),%rcx
378        movq     %rcx,16(%rsi,%rdx,8)
379        movq     8(%rdi,%rdx,8),%rcx
380        movq     %rcx,8(%rsi,%rdx,8)
381        movq     (%rdi,%rdx,8),%rcx
382        movq     %rcx,(%rsi,%rdx,8)
383acl_CopyLeft:
384        subq     $4,%rdx
385        jge      5b
386        addq     $4,%rdx
387        jg       4b
388        ret
389