1#
2# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4#
5# This code is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License version 2 only, as
7# published by the Free Software Foundation.
8#
9# This code is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12# version 2 for more details (a copy is included in the LICENSE file that
13# accompanied this code).
14#
15# You should have received a copy of the GNU General Public License version
16# 2 along with this work; if not, write to the Free Software Foundation,
17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18#
19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20# or visit www.oracle.com if you need additional information or have any
21# questions.
22#
23
24
25#ifdef __APPLE__
26# Darwin uses _ prefixed global symbols
27#define SYMBOL(s) _ ## s
28#define ELF_TYPE(name, description)
29#else
30#define SYMBOL(s) s
31#define ELF_TYPE(name, description) .type name,description
32#endif
33
34        .globl SYMBOL(fixcw)
35
36        # NOTE WELL!  The _Copy functions are called directly
37        # from server-compiler-generated code via CallLeafNoFP,
38        # which means that they *must* either not use floating
39        # point or use it in the same manner as does the server
40        # compiler.
41
42        .globl SYMBOL(_Copy_conjoint_bytes)
43        .globl SYMBOL(_Copy_arrayof_conjoint_bytes)
44        .globl SYMBOL(_Copy_conjoint_jshorts_atomic)
45        .globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
46        .globl SYMBOL(_Copy_conjoint_jints_atomic)
47        .globl SYMBOL(_Copy_arrayof_conjoint_jints)
48        .globl SYMBOL(_Copy_conjoint_jlongs_atomic)
49        .globl SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts)
50
51        .globl SYMBOL(_Atomic_cmpxchg_long)
52        .globl SYMBOL(_Atomic_move_long)
53
54        .text
55
56# Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp
57# Set fpu to 53 bit precision.  This happens too early to use a stub.
58# ported from solaris_x86_32.s
59        .p2align 4,,15
60SYMBOL(fixcw):
61        pushl    $0x27f
62        fldcw    0(%esp)
63        popl     %eax
64        ret
65
66        .globl  SYMBOL(SpinPause)
67        ELF_TYPE(SpinPause,@function)
68        .p2align 4,,15
69SYMBOL(SpinPause):
70        rep
71        nop
72        movl    $1, %eax
73        ret
74
75        # Support for void Copy::conjoint_bytes(void* from,
76        #                                       void* to,
77        #                                       size_t count)
78        .p2align 4,,15
79        ELF_TYPE(_Copy_conjoint_bytes,@function)
80SYMBOL(_Copy_conjoint_bytes):
81        pushl    %esi
82        movl     4+12(%esp),%ecx      # count
83        pushl    %edi
84        movl     8+ 4(%esp),%esi      # from
85        movl     8+ 8(%esp),%edi      # to
86        cmpl     %esi,%edi
87        leal     -1(%esi,%ecx),%eax   # from + count - 1
88        jbe      cb_CopyRight
89        cmpl     %eax,%edi
90        jbe      cb_CopyLeft
91        # copy from low to high
92cb_CopyRight:
93        cmpl     $3,%ecx
94        jbe      5f                   # <= 3 bytes
95        # align source address at dword address boundary
96        movl     %ecx,%eax            # original count
97        movl     $4,%ecx
98        subl     %esi,%ecx
99        andl     $3,%ecx              # prefix byte count
100        jz       1f                   # no prefix
101        subl     %ecx,%eax            # byte count less prefix
102        # copy prefix
103        subl     %esi,%edi
1040:      movb     (%esi),%dl
105        movb     %dl,(%edi,%esi,1)
106        addl     $1,%esi
107        subl     $1,%ecx
108        jnz      0b
109        addl     %esi,%edi
1101:      movl     %eax,%ecx            # byte count less prefix
111        shrl     $2,%ecx              # dword count
112        jz       4f                   # no dwords to move
113        cmpl     $32,%ecx
114        jbe      2f                   # <= 32 dwords
115        # copy aligned dwords
116        rep;     smovl
117        jmp      4f
118        # copy aligned dwords
1192:      subl     %esi,%edi
120        .p2align 4,,15
1213:      movl     (%esi),%edx
122        movl     %edx,(%edi,%esi,1)
123        addl     $4,%esi
124        subl     $1,%ecx
125        jnz      3b
126        addl     %esi,%edi
1274:      movl     %eax,%ecx            # byte count less prefix
1285:      andl     $3,%ecx              # suffix byte count
129        jz       7f                   # no suffix
130        # copy suffix
131        xorl     %eax,%eax
1326:      movb     (%esi,%eax,1),%dl
133        movb     %dl,(%edi,%eax,1)
134        addl     $1,%eax
135        subl     $1,%ecx
136        jnz      6b
1377:      popl     %edi
138        popl     %esi
139        ret
140        # copy from high to low
141cb_CopyLeft:
142        std
143        leal     -4(%edi,%ecx),%edi   # to + count - 4
144        movl     %eax,%esi            # from + count - 1
145        movl     %ecx,%eax
146        subl     $3,%esi              # from + count - 4
147        cmpl     $3,%ecx
148        jbe      5f                   # <= 3 bytes
1491:      shrl     $2,%ecx              # dword count
150        jz       4f                   # no dwords to move
151        cmpl     $32,%ecx
152        ja       3f                   # > 32 dwords
153        # copy dwords, aligned or not
154        subl     %esi,%edi
155        .p2align 4,,15
1562:      movl     (%esi),%edx
157        movl     %edx,(%edi,%esi,1)
158        subl     $4,%esi
159        subl     $1,%ecx
160        jnz      2b
161        addl     %esi,%edi
162        jmp      4f
163        # copy dwords, aligned or not
1643:      rep;     smovl
1654:      movl     %eax,%ecx            # byte count
1665:      andl     $3,%ecx              # suffix byte count
167        jz       7f                   # no suffix
168        # copy suffix
169        subl     %esi,%edi
170        addl     $3,%esi
1716:      movb     (%esi),%dl
172        movb     %dl,(%edi,%esi,1)
173        subl     $1,%esi
174        subl     $1,%ecx
175        jnz      6b
1767:      cld
177        popl     %edi
178        popl     %esi
179        ret
180
181        # Support for void Copy::arrayof_conjoint_bytes(void* from,
182        #                                               void* to,
183        #                                               size_t count)
184        #
185        # Same as _Copy_conjoint_bytes, except no source alignment check.
186        .p2align 4,,15
187        ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
188SYMBOL(_Copy_arrayof_conjoint_bytes):
189        pushl    %esi
190        movl     4+12(%esp),%ecx      # count
191        pushl    %edi
192        movl     8+ 4(%esp),%esi      # from
193        movl     8+ 8(%esp),%edi      # to
194        cmpl     %esi,%edi
195        leal     -1(%esi,%ecx),%eax   # from + count - 1
196        jbe      acb_CopyRight
197        cmpl     %eax,%edi
198        jbe      acb_CopyLeft
199        # copy from low to high
200acb_CopyRight:
201        cmpl     $3,%ecx
202        jbe      5f
2031:      movl     %ecx,%eax
204        shrl     $2,%ecx
205        jz       4f
206        cmpl     $32,%ecx
207        ja       3f
208        # copy aligned dwords
209        subl     %esi,%edi
210        .p2align 4,,15
2112:      movl     (%esi),%edx
212        movl     %edx,(%edi,%esi,1)
213        addl     $4,%esi
214        subl     $1,%ecx
215        jnz      2b
216        addl     %esi,%edi
217        jmp      4f
218        # copy aligned dwords
2193:      rep;     smovl
2204:      movl     %eax,%ecx
2215:      andl     $3,%ecx
222        jz       7f
223        # copy suffix
224        xorl     %eax,%eax
2256:      movb     (%esi,%eax,1),%dl
226        movb     %dl,(%edi,%eax,1)
227        addl     $1,%eax
228        subl     $1,%ecx
229        jnz      6b
2307:      popl     %edi
231        popl     %esi
232        ret
233acb_CopyLeft:
234        std
235        leal     -4(%edi,%ecx),%edi   # to + count - 4
236        movl     %eax,%esi            # from + count - 1
237        movl     %ecx,%eax
238        subl     $3,%esi              # from + count - 4
239        cmpl     $3,%ecx
240        jbe      5f
2411:      shrl     $2,%ecx
242        jz       4f
243        cmpl     $32,%ecx
244        jbe      2f                   # <= 32 dwords
245        rep;     smovl
246        jmp      4f
247        .space 8
2482:      subl     %esi,%edi
249        .p2align 4,,15
2503:      movl     (%esi),%edx
251        movl     %edx,(%edi,%esi,1)
252        subl     $4,%esi
253        subl     $1,%ecx
254        jnz      3b
255        addl     %esi,%edi
2564:      movl     %eax,%ecx
2575:      andl     $3,%ecx
258        jz       7f
259        subl     %esi,%edi
260        addl     $3,%esi
2616:      movb     (%esi),%dl
262        movb     %dl,(%edi,%esi,1)
263        subl     $1,%esi
264        subl     $1,%ecx
265        jnz      6b
2667:      cld
267        popl     %edi
268        popl     %esi
269        ret
270
271        # Support for void Copy::conjoint_jshorts_atomic(void* from,
272        #                                                void* to,
273        #                                                size_t count)
274        .p2align 4,,15
275        ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function)
276SYMBOL(_Copy_conjoint_jshorts_atomic):
277        pushl    %esi
278        movl     4+12(%esp),%ecx      # count
279        pushl    %edi
280        movl     8+ 4(%esp),%esi      # from
281        movl     8+ 8(%esp),%edi      # to
282        cmpl     %esi,%edi
283        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
284        jbe      cs_CopyRight
285        cmpl     %eax,%edi
286        jbe      cs_CopyLeft
287        # copy from low to high
288cs_CopyRight:
289        # align source address at dword address boundary
290        movl     %esi,%eax            # original from
291        andl     $3,%eax              # either 0 or 2
292        jz       1f                   # no prefix
293        # copy prefix
294        subl     $1,%ecx
295        jl       5f                   # zero count
296        movw     (%esi),%dx
297        movw     %dx,(%edi)
298        addl     %eax,%esi            # %eax == 2
299        addl     %eax,%edi
3001:      movl     %ecx,%eax            # word count less prefix
301        sarl     %ecx                 # dword count
302        jz       4f                   # no dwords to move
303        cmpl     $32,%ecx
304        jbe      2f                   # <= 32 dwords
305        # copy aligned dwords
306        rep;     smovl
307        jmp      4f
308        # copy aligned dwords
3092:      subl     %esi,%edi
310        .p2align 4,,15
3113:      movl     (%esi),%edx
312        movl     %edx,(%edi,%esi,1)
313        addl     $4,%esi
314        subl     $1,%ecx
315        jnz      3b
316        addl     %esi,%edi
3174:      andl     $1,%eax              # suffix count
318        jz       5f                   # no suffix
319        # copy suffix
320        movw     (%esi),%dx
321        movw     %dx,(%edi)
3225:      popl     %edi
323        popl     %esi
324        ret
325        # copy from high to low
326cs_CopyLeft:
327        std
328        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
329        movl     %eax,%esi            # from + count*2 - 2
330        movl     %ecx,%eax
331        subl     $2,%esi              # from + count*2 - 4
3321:      sarl     %ecx                 # dword count
333        jz       4f                   # no dwords to move
334        cmpl     $32,%ecx
335        ja       3f                   # > 32 dwords
336        subl     %esi,%edi
337        .p2align 4,,15
3382:      movl     (%esi),%edx
339        movl     %edx,(%edi,%esi,1)
340        subl     $4,%esi
341        subl     $1,%ecx
342        jnz      2b
343        addl     %esi,%edi
344        jmp      4f
3453:      rep;     smovl
3464:      andl     $1,%eax              # suffix count
347        jz       5f                   # no suffix
348        # copy suffix
349        addl     $2,%esi
350        addl     $2,%edi
351        movw     (%esi),%dx
352        movw     %dx,(%edi)
3535:      cld
354        popl     %edi
355        popl     %esi
356        ret
357
358        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
359        #                                                 void* to,
360        #                                                 size_t count)
361        .p2align 4,,15
362        ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function)
363SYMBOL(_Copy_arrayof_conjoint_jshorts):
364        pushl    %esi
365        movl     4+12(%esp),%ecx      # count
366        pushl    %edi
367        movl     8+ 4(%esp),%esi      # from
368        movl     8+ 8(%esp),%edi      # to
369        cmpl     %esi,%edi
370        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
371        jbe      acs_CopyRight
372        cmpl     %eax,%edi
373        jbe      acs_CopyLeft
374acs_CopyRight:
375        movl     %ecx,%eax            # word count
376        sarl     %ecx                 # dword count
377        jz       4f                   # no dwords to move
378        cmpl     $32,%ecx
379        jbe      2f                   # <= 32 dwords
380        # copy aligned dwords
381        rep;     smovl
382        jmp      4f
383        # copy aligned dwords
384        .space 5
3852:      subl     %esi,%edi
386        .p2align 4,,15
3873:      movl     (%esi),%edx
388        movl     %edx,(%edi,%esi,1)
389        addl     $4,%esi
390        subl     $1,%ecx
391        jnz      3b
392        addl     %esi,%edi
3934:      andl     $1,%eax              # suffix count
394        jz       5f                   # no suffix
395        # copy suffix
396        movw     (%esi),%dx
397        movw     %dx,(%edi)
3985:      popl     %edi
399        popl     %esi
400        ret
401acs_CopyLeft:
402        std
403        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
404        movl     %eax,%esi            # from + count*2 - 2
405        movl     %ecx,%eax
406        subl     $2,%esi              # from + count*2 - 4
407        sarl     %ecx                 # dword count
408        jz       4f                   # no dwords to move
409        cmpl     $32,%ecx
410        ja       3f                   # > 32 dwords
411        subl     %esi,%edi
412        .p2align 4,,15
4132:      movl     (%esi),%edx
414        movl     %edx,(%edi,%esi,1)
415        subl     $4,%esi
416        subl     $1,%ecx
417        jnz      2b
418        addl     %esi,%edi
419        jmp      4f
4203:      rep;     smovl
4214:      andl     $1,%eax              # suffix count
422        jz       5f                   # no suffix
423        # copy suffix
424        addl     $2,%esi
425        addl     $2,%edi
426        movw     (%esi),%dx
427        movw     %dx,(%edi)
4285:      cld
429        popl     %edi
430        popl     %esi
431        ret
432
433        # Support for void Copy::conjoint_jints_atomic(void* from,
434        #                                              void* to,
435        #                                              size_t count)
436        # Equivalent to
437        #   arrayof_conjoint_jints
438        .p2align 4,,15
439        ELF_TYPE(_Copy_conjoint_jints_atomic,@function)
440        ELF_TYPE(_Copy_arrayof_conjoint_jints,@function)
441SYMBOL(_Copy_conjoint_jints_atomic):
442SYMBOL(_Copy_arrayof_conjoint_jints):
443        pushl    %esi
444        movl     4+12(%esp),%ecx      # count
445        pushl    %edi
446        movl     8+ 4(%esp),%esi      # from
447        movl     8+ 8(%esp),%edi      # to
448        cmpl     %esi,%edi
449        leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
450        jbe      ci_CopyRight
451        cmpl     %eax,%edi
452        jbe      ci_CopyLeft
453ci_CopyRight:
454        cmpl     $32,%ecx
455        jbe      2f                   # <= 32 dwords
456        rep;     smovl
457        popl     %edi
458        popl     %esi
459        ret
460        .space 10
4612:      subl     %esi,%edi
462        jmp      4f
463        .p2align 4,,15
4643:      movl     (%esi),%edx
465        movl     %edx,(%edi,%esi,1)
466        addl     $4,%esi
4674:      subl     $1,%ecx
468        jge      3b
469        popl     %edi
470        popl     %esi
471        ret
472ci_CopyLeft:
473        std
474        leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
475        cmpl     $32,%ecx
476        ja       4f                   # > 32 dwords
477        subl     %eax,%edi            # eax == from + count*4 - 4
478        jmp      3f
479        .p2align 4,,15
4802:      movl     (%eax),%edx
481        movl     %edx,(%edi,%eax,1)
482        subl     $4,%eax
4833:      subl     $1,%ecx
484        jge      2b
485        cld
486        popl     %edi
487        popl     %esi
488        ret
4894:      movl     %eax,%esi            # from + count*4 - 4
490        rep;     smovl
491        cld
492        popl     %edi
493        popl     %esi
494        ret
495
496        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
497        #                                               jlong* to,
498        #                                               size_t count)
499        #
500        # 32-bit
501        #
502        # count treated as signed
503        #
504        # // if (from > to) {
505        #   while (--count >= 0) {
506        #     *to++ = *from++;
507        #   }
508        # } else {
509        #   while (--count >= 0) {
510        #     to[count] = from[count];
511        #   }
512        # }
513        .p2align 4,,15
514        ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function)
515SYMBOL(_Copy_conjoint_jlongs_atomic):
516        movl     4+8(%esp),%ecx       # count
517        movl     4+0(%esp),%eax       # from
518        movl     4+4(%esp),%edx       # to
519        cmpl     %eax,%edx
520        jae      cla_CopyLeft
521cla_CopyRight:
522        subl     %eax,%edx
523        jmp      2f
524        .p2align 4,,15
5251:      fildll   (%eax)
526        fistpll  (%edx,%eax,1)
527        addl     $8,%eax
5282:      subl     $1,%ecx
529        jge      1b
530        ret
531        .p2align 4,,15
5323:      fildll   (%eax,%ecx,8)
533        fistpll  (%edx,%ecx,8)
534cla_CopyLeft:
535        subl     $1,%ecx
536        jge      3b
537        ret
538
539        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
540        #                                                 void* to,
541        #                                                 size_t count)
542        .p2align 4,,15
543        ELF_TYPE(_mmx_Copy_arrayof_conjoint_jshorts,@function)
544SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts):
545        pushl    %esi
546        movl     4+12(%esp),%ecx
547        pushl    %edi
548        movl     8+ 4(%esp),%esi
549        movl     8+ 8(%esp),%edi
550        cmpl     %esi,%edi
551        leal     -2(%esi,%ecx,2),%eax
552        jbe      mmx_acs_CopyRight
553        cmpl     %eax,%edi
554        jbe      mmx_acs_CopyLeft
555mmx_acs_CopyRight:
556        movl     %ecx,%eax
557        sarl     %ecx
558        je       5f
559        cmpl     $33,%ecx
560        jae      3f
5611:      subl     %esi,%edi
562        .p2align 4,,15
5632:      movl     (%esi),%edx
564        movl     %edx,(%edi,%esi,1)
565        addl     $4,%esi
566        subl     $1,%ecx
567        jnz      2b
568        addl     %esi,%edi
569        jmp      5f
5703:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
571        subl     $1,%ecx
5724:      .p2align 4,,15
573        movq     0(%esi),%mm0
574        addl     $64,%edi
575        movq     8(%esi),%mm1
576        subl     $16,%ecx
577        movq     16(%esi),%mm2
578        movq     %mm0,-64(%edi)
579        movq     24(%esi),%mm0
580        movq     %mm1,-56(%edi)
581        movq     32(%esi),%mm1
582        movq     %mm2,-48(%edi)
583        movq     40(%esi),%mm2
584        movq     %mm0,-40(%edi)
585        movq     48(%esi),%mm0
586        movq     %mm1,-32(%edi)
587        movq     56(%esi),%mm1
588        movq     %mm2,-24(%edi)
589        movq     %mm0,-16(%edi)
590        addl     $64,%esi
591        movq     %mm1,-8(%edi)
592        cmpl     $16,%ecx
593        jge      4b
594        emms
595        testl    %ecx,%ecx
596        ja       1b
5975:      andl     $1,%eax
598        je       7f
5996:      movw     (%esi),%dx
600        movw     %dx,(%edi)
6017:      popl     %edi
602        popl     %esi
603        ret
604mmx_acs_CopyLeft:
605        std
606        leal     -4(%edi,%ecx,2),%edi
607        movl     %eax,%esi
608        movl     %ecx,%eax
609        subl     $2,%esi
610        sarl     %ecx
611        je       4f
612        cmpl     $32,%ecx
613        ja       3f
614        subl     %esi,%edi
615        .p2align 4,,15
6162:      movl     (%esi),%edx
617        movl     %edx,(%edi,%esi,1)
618        subl     $4,%esi
619        subl     $1,%ecx
620        jnz      2b
621        addl     %esi,%edi
622        jmp      4f
6233:      rep;     smovl
6244:      andl     $1,%eax
625        je       6f
626        addl     $2,%esi
627        addl     $2,%edi
6285:      movw     (%esi),%dx
629        movw     %dx,(%edi)
6306:      cld
631        popl     %edi
632        popl     %esi
633        ret
634
635
636        # Support for jlong Atomic::cmpxchg(jlong exchange_value,
637        #                                   volatile jlong* dest,
638        #                                   jlong compare_value,
639        #                                   bool is_MP)
640        #
641        .p2align 4,,15
642        ELF_TYPE(_Atomic_cmpxchg_long,@function)
643SYMBOL(_Atomic_cmpxchg_long):
644                                   #  8(%esp) : return PC
645        pushl    %ebx              #  4(%esp) : old %ebx
646        pushl    %edi              #  0(%esp) : old %edi
647        movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
648        movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
649        movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
650        movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
651        movl     20(%esp), %edi    # 20(%esp) : dest
652        cmpl     $0, 32(%esp)      # 32(%esp) : is_MP
653        je       1f
654        lock
6551:      cmpxchg8b (%edi)
656        popl     %edi
657        popl     %ebx
658        ret
659
660
661        # Support for jlong Atomic::load and Atomic::store.
662        # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst)
663        .p2align 4,,15
664        ELF_TYPE(_Atomic_move_long,@function)
665SYMBOL(_Atomic_move_long):
666        movl     4(%esp), %eax   # src
667        fildll    (%eax)
668        movl     8(%esp), %eax   # dest
669        fistpll   (%eax)
670        ret
671
672