1# 2# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. 3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4# 5# This code is free software; you can redistribute it and/or modify it 6# under the terms of the GNU General Public License version 2 only, as 7# published by the Free Software Foundation. 8# 9# This code is distributed in the hope that it will be useful, but WITHOUT 10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12# version 2 for more details (a copy is included in the LICENSE file that 13# accompanied this code). 14# 15# You should have received a copy of the GNU General Public License version 16# 2 along with this work; if not, write to the Free Software Foundation, 17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18# 19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20# or visit www.oracle.com if you need additional information or have any 21# questions. 22# 23 24 25#ifdef __APPLE__ 26# Darwin uses _ prefixed global symbols 27#define SYMBOL(s) _ ## s 28#define ELF_TYPE(name, description) 29#else 30#define SYMBOL(s) s 31#define ELF_TYPE(name, description) .type name,description 32#endif 33 34 .globl SYMBOL(fixcw) 35 36 # NOTE WELL! The _Copy functions are called directly 37 # from server-compiler-generated code via CallLeafNoFP, 38 # which means that they *must* either not use floating 39 # point or use it in the same manner as does the server 40 # compiler. 41 42 .globl SYMBOL(_Copy_conjoint_bytes) 43 .globl SYMBOL(_Copy_arrayof_conjoint_bytes) 44 .globl SYMBOL(_Copy_conjoint_jshorts_atomic) 45 .globl SYMBOL(_Copy_arrayof_conjoint_jshorts) 46 .globl SYMBOL(_Copy_conjoint_jints_atomic) 47 .globl SYMBOL(_Copy_arrayof_conjoint_jints) 48 .globl SYMBOL(_Copy_conjoint_jlongs_atomic) 49 .globl SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts) 50 51 .globl SYMBOL(_Atomic_cmpxchg_long) 52 .globl SYMBOL(_Atomic_move_long) 53 54 .text 55 56# Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp 57# Set fpu to 53 bit precision. This happens too early to use a stub. 58# ported from solaris_x86_32.s 59 .p2align 4,,15 60SYMBOL(fixcw): 61 pushl $0x27f 62 fldcw 0(%esp) 63 popl %eax 64 ret 65 66 .globl SYMBOL(SpinPause) 67 ELF_TYPE(SpinPause,@function) 68 .p2align 4,,15 69SYMBOL(SpinPause): 70 rep 71 nop 72 movl $1, %eax 73 ret 74 75 # Support for void Copy::conjoint_bytes(void* from, 76 # void* to, 77 # size_t count) 78 .p2align 4,,15 79 ELF_TYPE(_Copy_conjoint_bytes,@function) 80SYMBOL(_Copy_conjoint_bytes): 81 pushl %esi 82 movl 4+12(%esp),%ecx # count 83 pushl %edi 84 movl 8+ 4(%esp),%esi # from 85 movl 8+ 8(%esp),%edi # to 86 cmpl %esi,%edi 87 leal -1(%esi,%ecx),%eax # from + count - 1 88 jbe cb_CopyRight 89 cmpl %eax,%edi 90 jbe cb_CopyLeft 91 # copy from low to high 92cb_CopyRight: 93 cmpl $3,%ecx 94 jbe 5f # <= 3 bytes 95 # align source address at dword address boundary 96 movl %ecx,%eax # original count 97 movl $4,%ecx 98 subl %esi,%ecx 99 andl $3,%ecx # prefix byte count 100 jz 1f # no prefix 101 subl %ecx,%eax # byte count less prefix 102 # copy prefix 103 subl %esi,%edi 1040: movb (%esi),%dl 105 movb %dl,(%edi,%esi,1) 106 addl $1,%esi 107 subl $1,%ecx 108 jnz 0b 109 addl %esi,%edi 1101: movl %eax,%ecx # byte count less prefix 111 shrl $2,%ecx # dword count 112 jz 4f # no dwords to move 113 cmpl $32,%ecx 114 jbe 2f # <= 32 dwords 115 # copy aligned dwords 116 rep; smovl 117 jmp 4f 118 # copy aligned dwords 1192: subl %esi,%edi 120 .p2align 4,,15 1213: movl (%esi),%edx 122 movl %edx,(%edi,%esi,1) 123 addl $4,%esi 124 subl $1,%ecx 125 jnz 3b 126 addl %esi,%edi 1274: movl %eax,%ecx # byte count less prefix 1285: andl $3,%ecx # suffix byte count 129 jz 7f # no suffix 130 # copy suffix 131 xorl %eax,%eax 1326: movb (%esi,%eax,1),%dl 133 movb %dl,(%edi,%eax,1) 134 addl $1,%eax 135 subl $1,%ecx 136 jnz 6b 1377: popl %edi 138 popl %esi 139 ret 140 # copy from high to low 141cb_CopyLeft: 142 std 143 leal -4(%edi,%ecx),%edi # to + count - 4 144 movl %eax,%esi # from + count - 1 145 movl %ecx,%eax 146 subl $3,%esi # from + count - 4 147 cmpl $3,%ecx 148 jbe 5f # <= 3 bytes 1491: shrl $2,%ecx # dword count 150 jz 4f # no dwords to move 151 cmpl $32,%ecx 152 ja 3f # > 32 dwords 153 # copy dwords, aligned or not 154 subl %esi,%edi 155 .p2align 4,,15 1562: movl (%esi),%edx 157 movl %edx,(%edi,%esi,1) 158 subl $4,%esi 159 subl $1,%ecx 160 jnz 2b 161 addl %esi,%edi 162 jmp 4f 163 # copy dwords, aligned or not 1643: rep; smovl 1654: movl %eax,%ecx # byte count 1665: andl $3,%ecx # suffix byte count 167 jz 7f # no suffix 168 # copy suffix 169 subl %esi,%edi 170 addl $3,%esi 1716: movb (%esi),%dl 172 movb %dl,(%edi,%esi,1) 173 subl $1,%esi 174 subl $1,%ecx 175 jnz 6b 1767: cld 177 popl %edi 178 popl %esi 179 ret 180 181 # Support for void Copy::arrayof_conjoint_bytes(void* from, 182 # void* to, 183 # size_t count) 184 # 185 # Same as _Copy_conjoint_bytes, except no source alignment check. 186 .p2align 4,,15 187 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function) 188SYMBOL(_Copy_arrayof_conjoint_bytes): 189 pushl %esi 190 movl 4+12(%esp),%ecx # count 191 pushl %edi 192 movl 8+ 4(%esp),%esi # from 193 movl 8+ 8(%esp),%edi # to 194 cmpl %esi,%edi 195 leal -1(%esi,%ecx),%eax # from + count - 1 196 jbe acb_CopyRight 197 cmpl %eax,%edi 198 jbe acb_CopyLeft 199 # copy from low to high 200acb_CopyRight: 201 cmpl $3,%ecx 202 jbe 5f 2031: movl %ecx,%eax 204 shrl $2,%ecx 205 jz 4f 206 cmpl $32,%ecx 207 ja 3f 208 # copy aligned dwords 209 subl %esi,%edi 210 .p2align 4,,15 2112: movl (%esi),%edx 212 movl %edx,(%edi,%esi,1) 213 addl $4,%esi 214 subl $1,%ecx 215 jnz 2b 216 addl %esi,%edi 217 jmp 4f 218 # copy aligned dwords 2193: rep; smovl 2204: movl %eax,%ecx 2215: andl $3,%ecx 222 jz 7f 223 # copy suffix 224 xorl %eax,%eax 2256: movb (%esi,%eax,1),%dl 226 movb %dl,(%edi,%eax,1) 227 addl $1,%eax 228 subl $1,%ecx 229 jnz 6b 2307: popl %edi 231 popl %esi 232 ret 233acb_CopyLeft: 234 std 235 leal -4(%edi,%ecx),%edi # to + count - 4 236 movl %eax,%esi # from + count - 1 237 movl %ecx,%eax 238 subl $3,%esi # from + count - 4 239 cmpl $3,%ecx 240 jbe 5f 2411: shrl $2,%ecx 242 jz 4f 243 cmpl $32,%ecx 244 jbe 2f # <= 32 dwords 245 rep; smovl 246 jmp 4f 247 .space 8 2482: subl %esi,%edi 249 .p2align 4,,15 2503: movl (%esi),%edx 251 movl %edx,(%edi,%esi,1) 252 subl $4,%esi 253 subl $1,%ecx 254 jnz 3b 255 addl %esi,%edi 2564: movl %eax,%ecx 2575: andl $3,%ecx 258 jz 7f 259 subl %esi,%edi 260 addl $3,%esi 2616: movb (%esi),%dl 262 movb %dl,(%edi,%esi,1) 263 subl $1,%esi 264 subl $1,%ecx 265 jnz 6b 2667: cld 267 popl %edi 268 popl %esi 269 ret 270 271 # Support for void Copy::conjoint_jshorts_atomic(void* from, 272 # void* to, 273 # size_t count) 274 .p2align 4,,15 275 ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function) 276SYMBOL(_Copy_conjoint_jshorts_atomic): 277 pushl %esi 278 movl 4+12(%esp),%ecx # count 279 pushl %edi 280 movl 8+ 4(%esp),%esi # from 281 movl 8+ 8(%esp),%edi # to 282 cmpl %esi,%edi 283 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 284 jbe cs_CopyRight 285 cmpl %eax,%edi 286 jbe cs_CopyLeft 287 # copy from low to high 288cs_CopyRight: 289 # align source address at dword address boundary 290 movl %esi,%eax # original from 291 andl $3,%eax # either 0 or 2 292 jz 1f # no prefix 293 # copy prefix 294 subl $1,%ecx 295 jl 5f # zero count 296 movw (%esi),%dx 297 movw %dx,(%edi) 298 addl %eax,%esi # %eax == 2 299 addl %eax,%edi 3001: movl %ecx,%eax # word count less prefix 301 sarl %ecx # dword count 302 jz 4f # no dwords to move 303 cmpl $32,%ecx 304 jbe 2f # <= 32 dwords 305 # copy aligned dwords 306 rep; smovl 307 jmp 4f 308 # copy aligned dwords 3092: subl %esi,%edi 310 .p2align 4,,15 3113: movl (%esi),%edx 312 movl %edx,(%edi,%esi,1) 313 addl $4,%esi 314 subl $1,%ecx 315 jnz 3b 316 addl %esi,%edi 3174: andl $1,%eax # suffix count 318 jz 5f # no suffix 319 # copy suffix 320 movw (%esi),%dx 321 movw %dx,(%edi) 3225: popl %edi 323 popl %esi 324 ret 325 # copy from high to low 326cs_CopyLeft: 327 std 328 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 329 movl %eax,%esi # from + count*2 - 2 330 movl %ecx,%eax 331 subl $2,%esi # from + count*2 - 4 3321: sarl %ecx # dword count 333 jz 4f # no dwords to move 334 cmpl $32,%ecx 335 ja 3f # > 32 dwords 336 subl %esi,%edi 337 .p2align 4,,15 3382: movl (%esi),%edx 339 movl %edx,(%edi,%esi,1) 340 subl $4,%esi 341 subl $1,%ecx 342 jnz 2b 343 addl %esi,%edi 344 jmp 4f 3453: rep; smovl 3464: andl $1,%eax # suffix count 347 jz 5f # no suffix 348 # copy suffix 349 addl $2,%esi 350 addl $2,%edi 351 movw (%esi),%dx 352 movw %dx,(%edi) 3535: cld 354 popl %edi 355 popl %esi 356 ret 357 358 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 359 # void* to, 360 # size_t count) 361 .p2align 4,,15 362 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function) 363SYMBOL(_Copy_arrayof_conjoint_jshorts): 364 pushl %esi 365 movl 4+12(%esp),%ecx # count 366 pushl %edi 367 movl 8+ 4(%esp),%esi # from 368 movl 8+ 8(%esp),%edi # to 369 cmpl %esi,%edi 370 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 371 jbe acs_CopyRight 372 cmpl %eax,%edi 373 jbe acs_CopyLeft 374acs_CopyRight: 375 movl %ecx,%eax # word count 376 sarl %ecx # dword count 377 jz 4f # no dwords to move 378 cmpl $32,%ecx 379 jbe 2f # <= 32 dwords 380 # copy aligned dwords 381 rep; smovl 382 jmp 4f 383 # copy aligned dwords 384 .space 5 3852: subl %esi,%edi 386 .p2align 4,,15 3873: movl (%esi),%edx 388 movl %edx,(%edi,%esi,1) 389 addl $4,%esi 390 subl $1,%ecx 391 jnz 3b 392 addl %esi,%edi 3934: andl $1,%eax # suffix count 394 jz 5f # no suffix 395 # copy suffix 396 movw (%esi),%dx 397 movw %dx,(%edi) 3985: popl %edi 399 popl %esi 400 ret 401acs_CopyLeft: 402 std 403 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 404 movl %eax,%esi # from + count*2 - 2 405 movl %ecx,%eax 406 subl $2,%esi # from + count*2 - 4 407 sarl %ecx # dword count 408 jz 4f # no dwords to move 409 cmpl $32,%ecx 410 ja 3f # > 32 dwords 411 subl %esi,%edi 412 .p2align 4,,15 4132: movl (%esi),%edx 414 movl %edx,(%edi,%esi,1) 415 subl $4,%esi 416 subl $1,%ecx 417 jnz 2b 418 addl %esi,%edi 419 jmp 4f 4203: rep; smovl 4214: andl $1,%eax # suffix count 422 jz 5f # no suffix 423 # copy suffix 424 addl $2,%esi 425 addl $2,%edi 426 movw (%esi),%dx 427 movw %dx,(%edi) 4285: cld 429 popl %edi 430 popl %esi 431 ret 432 433 # Support for void Copy::conjoint_jints_atomic(void* from, 434 # void* to, 435 # size_t count) 436 # Equivalent to 437 # arrayof_conjoint_jints 438 .p2align 4,,15 439 ELF_TYPE(_Copy_conjoint_jints_atomic,@function) 440 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function) 441SYMBOL(_Copy_conjoint_jints_atomic): 442SYMBOL(_Copy_arrayof_conjoint_jints): 443 pushl %esi 444 movl 4+12(%esp),%ecx # count 445 pushl %edi 446 movl 8+ 4(%esp),%esi # from 447 movl 8+ 8(%esp),%edi # to 448 cmpl %esi,%edi 449 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 450 jbe ci_CopyRight 451 cmpl %eax,%edi 452 jbe ci_CopyLeft 453ci_CopyRight: 454 cmpl $32,%ecx 455 jbe 2f # <= 32 dwords 456 rep; smovl 457 popl %edi 458 popl %esi 459 ret 460 .space 10 4612: subl %esi,%edi 462 jmp 4f 463 .p2align 4,,15 4643: movl (%esi),%edx 465 movl %edx,(%edi,%esi,1) 466 addl $4,%esi 4674: subl $1,%ecx 468 jge 3b 469 popl %edi 470 popl %esi 471 ret 472ci_CopyLeft: 473 std 474 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 475 cmpl $32,%ecx 476 ja 4f # > 32 dwords 477 subl %eax,%edi # eax == from + count*4 - 4 478 jmp 3f 479 .p2align 4,,15 4802: movl (%eax),%edx 481 movl %edx,(%edi,%eax,1) 482 subl $4,%eax 4833: subl $1,%ecx 484 jge 2b 485 cld 486 popl %edi 487 popl %esi 488 ret 4894: movl %eax,%esi # from + count*4 - 4 490 rep; smovl 491 cld 492 popl %edi 493 popl %esi 494 ret 495 496 # Support for void Copy::conjoint_jlongs_atomic(jlong* from, 497 # jlong* to, 498 # size_t count) 499 # 500 # 32-bit 501 # 502 # count treated as signed 503 # 504 # // if (from > to) { 505 # while (--count >= 0) { 506 # *to++ = *from++; 507 # } 508 # } else { 509 # while (--count >= 0) { 510 # to[count] = from[count]; 511 # } 512 # } 513 .p2align 4,,15 514 ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function) 515SYMBOL(_Copy_conjoint_jlongs_atomic): 516 movl 4+8(%esp),%ecx # count 517 movl 4+0(%esp),%eax # from 518 movl 4+4(%esp),%edx # to 519 cmpl %eax,%edx 520 jae cla_CopyLeft 521cla_CopyRight: 522 subl %eax,%edx 523 jmp 2f 524 .p2align 4,,15 5251: fildll (%eax) 526 fistpll (%edx,%eax,1) 527 addl $8,%eax 5282: subl $1,%ecx 529 jge 1b 530 ret 531 .p2align 4,,15 5323: fildll (%eax,%ecx,8) 533 fistpll (%edx,%ecx,8) 534cla_CopyLeft: 535 subl $1,%ecx 536 jge 3b 537 ret 538 539 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 540 # void* to, 541 # size_t count) 542 .p2align 4,,15 543 ELF_TYPE(_mmx_Copy_arrayof_conjoint_jshorts,@function) 544SYMBOL(_mmx_Copy_arrayof_conjoint_jshorts): 545 pushl %esi 546 movl 4+12(%esp),%ecx 547 pushl %edi 548 movl 8+ 4(%esp),%esi 549 movl 8+ 8(%esp),%edi 550 cmpl %esi,%edi 551 leal -2(%esi,%ecx,2),%eax 552 jbe mmx_acs_CopyRight 553 cmpl %eax,%edi 554 jbe mmx_acs_CopyLeft 555mmx_acs_CopyRight: 556 movl %ecx,%eax 557 sarl %ecx 558 je 5f 559 cmpl $33,%ecx 560 jae 3f 5611: subl %esi,%edi 562 .p2align 4,,15 5632: movl (%esi),%edx 564 movl %edx,(%edi,%esi,1) 565 addl $4,%esi 566 subl $1,%ecx 567 jnz 2b 568 addl %esi,%edi 569 jmp 5f 5703: smovl # align to 8 bytes, we know we are 4 byte aligned to start 571 subl $1,%ecx 5724: .p2align 4,,15 573 movq 0(%esi),%mm0 574 addl $64,%edi 575 movq 8(%esi),%mm1 576 subl $16,%ecx 577 movq 16(%esi),%mm2 578 movq %mm0,-64(%edi) 579 movq 24(%esi),%mm0 580 movq %mm1,-56(%edi) 581 movq 32(%esi),%mm1 582 movq %mm2,-48(%edi) 583 movq 40(%esi),%mm2 584 movq %mm0,-40(%edi) 585 movq 48(%esi),%mm0 586 movq %mm1,-32(%edi) 587 movq 56(%esi),%mm1 588 movq %mm2,-24(%edi) 589 movq %mm0,-16(%edi) 590 addl $64,%esi 591 movq %mm1,-8(%edi) 592 cmpl $16,%ecx 593 jge 4b 594 emms 595 testl %ecx,%ecx 596 ja 1b 5975: andl $1,%eax 598 je 7f 5996: movw (%esi),%dx 600 movw %dx,(%edi) 6017: popl %edi 602 popl %esi 603 ret 604mmx_acs_CopyLeft: 605 std 606 leal -4(%edi,%ecx,2),%edi 607 movl %eax,%esi 608 movl %ecx,%eax 609 subl $2,%esi 610 sarl %ecx 611 je 4f 612 cmpl $32,%ecx 613 ja 3f 614 subl %esi,%edi 615 .p2align 4,,15 6162: movl (%esi),%edx 617 movl %edx,(%edi,%esi,1) 618 subl $4,%esi 619 subl $1,%ecx 620 jnz 2b 621 addl %esi,%edi 622 jmp 4f 6233: rep; smovl 6244: andl $1,%eax 625 je 6f 626 addl $2,%esi 627 addl $2,%edi 6285: movw (%esi),%dx 629 movw %dx,(%edi) 6306: cld 631 popl %edi 632 popl %esi 633 ret 634 635 636 # Support for jlong Atomic::cmpxchg(jlong exchange_value, 637 # volatile jlong* dest, 638 # jlong compare_value, 639 # bool is_MP) 640 # 641 .p2align 4,,15 642 ELF_TYPE(_Atomic_cmpxchg_long,@function) 643SYMBOL(_Atomic_cmpxchg_long): 644 # 8(%esp) : return PC 645 pushl %ebx # 4(%esp) : old %ebx 646 pushl %edi # 0(%esp) : old %edi 647 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) 648 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) 649 movl 24(%esp), %eax # 24(%esp) : compare_value (low) 650 movl 28(%esp), %edx # 28(%esp) : compare_value (high) 651 movl 20(%esp), %edi # 20(%esp) : dest 652 cmpl $0, 32(%esp) # 32(%esp) : is_MP 653 je 1f 654 lock 6551: cmpxchg8b (%edi) 656 popl %edi 657 popl %ebx 658 ret 659 660 661 # Support for jlong Atomic::load and Atomic::store. 662 # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst) 663 .p2align 4,,15 664 ELF_TYPE(_Atomic_move_long,@function) 665SYMBOL(_Atomic_move_long): 666 movl 4(%esp), %eax # src 667 fildll (%eax) 668 movl 8(%esp), %eax # dest 669 fistpll (%eax) 670 ret 671 672