163d1a8abSmrg# ieee754 sf routines for FT32
263d1a8abSmrg
3*ec02198aSmrg/* Copyright (C) 1995-2020 Free Software Foundation, Inc.
463d1a8abSmrg
563d1a8abSmrgThis file is free software; you can redistribute it and/or modify it
663d1a8abSmrgunder the terms of the GNU General Public License as published by the
763d1a8abSmrgFree Software Foundation; either version 3, or (at your option) any
863d1a8abSmrglater version.
963d1a8abSmrg
1063d1a8abSmrgThis file is distributed in the hope that it will be useful, but
1163d1a8abSmrgWITHOUT ANY WARRANTY; without even the implied warranty of
1263d1a8abSmrgMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1363d1a8abSmrgGeneral Public License for more details.
1463d1a8abSmrg
1563d1a8abSmrgUnder Section 7 of GPL version 3, you are granted additional
1663d1a8abSmrgpermissions described in the GCC Runtime Library Exception, version
1763d1a8abSmrg3.1, as published by the Free Software Foundation.
1863d1a8abSmrg
1963d1a8abSmrgYou should have received a copy of the GNU General Public License and
2063d1a8abSmrga copy of the GCC Runtime Library Exception along with this program;
2163d1a8abSmrgsee the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
2263d1a8abSmrg<http://www.gnu.org/licenses/>.  */
2363d1a8abSmrg
2463d1a8abSmrg# See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
2563d1a8abSmrg# for implementation details of all except division which is detailed below
2663d1a8abSmrg#
2763d1a8abSmrg
2863d1a8abSmrg#ifdef L_fp_tools
2963d1a8abSmrg// .global __cmpsf2_
3063d1a8abSmrgnan:            .long 0x7FFFFFFF    # also abs mask
3163d1a8abSmrginf:            .long 0x7F800000
3263d1a8abSmrgsign_mask:      .long 0x80000000
3363d1a8abSmrgm_mask:         .long 0x007FFFFF
3463d1a8abSmrgexp_bias:       .long 127
3563d1a8abSmrgedge_case:      .long 0x00FFFFFF
3663d1a8abSmrgsmallest_norm:  .long 0x00800000    # implicit bit
3763d1a8abSmrghigh_FF:        .long 0xFF000000
3863d1a8abSmrghigh_uint:      .long 0xFFFFFFFF
3963d1a8abSmrg
4063d1a8abSmrgntz_table:
4163d1a8abSmrg    .byte   32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
4263d1a8abSmrg    .byte   10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
4363d1a8abSmrg    .byte   31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
4463d1a8abSmrg    .byte   30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0
4563d1a8abSmrg
4663d1a8abSmrg#endif
4763d1a8abSmrg
4863d1a8abSmrg# Supply a few 'missing' instructions
4963d1a8abSmrg
5063d1a8abSmrg# not
5163d1a8abSmrg.macro      not rd,r1
5263d1a8abSmrg    xor     \rd,\r1,-1
5363d1a8abSmrg.endm
5463d1a8abSmrg
5563d1a8abSmrg# negate
5663d1a8abSmrg.macro      neg x
5763d1a8abSmrg    not     \x, \x
5863d1a8abSmrg    add     \x, \x, 1
5963d1a8abSmrg.endm
6063d1a8abSmrg
6163d1a8abSmrg# set $cc from the result of "ashl reg,dist"
6263d1a8abSmrg.macro  ashlcc reg,dist
6363d1a8abSmrg    .long   0x5de04008 | (\reg << 15) | (\dist << 4)
6463d1a8abSmrg.endm
6563d1a8abSmrg
6663d1a8abSmrg
6763d1a8abSmrg# converts an unsigned number x to a signed rep based on the bits in sign
6863d1a8abSmrg# sign should be 0x00000000 or 0xffffffff.
6963d1a8abSmrg.macro      to_signed x, sign
7063d1a8abSmrg    add     \x,\x,\sign    # conditionally decrement x
7163d1a8abSmrg    xor     \x,\x,\sign    # conditionally complement x
7263d1a8abSmrg.endm
7363d1a8abSmrg
7463d1a8abSmrg
7563d1a8abSmrg.macro  ld32    r,v
7663d1a8abSmrg    ldk     \r,(\v>>10)
7763d1a8abSmrg    ldl     \r,\r,(\v & 1023)
7863d1a8abSmrg.endm
7963d1a8abSmrg
8063d1a8abSmrg# calculate trailing zero count in x, also uses scr.
8163d1a8abSmrg# Using Seal's algorithm
8263d1a8abSmrg.macro      ntz x, scr
8363d1a8abSmrg    not     \scr, \x
8463d1a8abSmrg    add     \scr, \scr, 1
8563d1a8abSmrg    and     \x, \x, \scr
8663d1a8abSmrg    ashl    \scr, \x, 4
8763d1a8abSmrg    add     \x, \scr, \x
8863d1a8abSmrg    ashl    \scr, \x, 6
8963d1a8abSmrg    add     \x, \scr, \x
9063d1a8abSmrg    ashl    \scr, \x, 16
9163d1a8abSmrg    sub     \x, \scr, \x
9263d1a8abSmrg    lshr    \x, \x, 26
9363d1a8abSmrg    ldk     \scr, ntz_table
9463d1a8abSmrg    add     \x, \x, \scr
9563d1a8abSmrg    lpmi.b  \x, \x, 0
9663d1a8abSmrg.endm
9763d1a8abSmrg
9863d1a8abSmrg# calculate leading zero count
9963d1a8abSmrg.macro      nlz x, scr
10063d1a8abSmrg    flip    \x, \x, 31
10163d1a8abSmrg    ntz     \x, \scr
10263d1a8abSmrg.endm
10363d1a8abSmrg
10463d1a8abSmrg
10563d1a8abSmrg# Round 26 bit mantissa to nearest
10663d1a8abSmrg# | 23 bits frac | G | R | S |
10763d1a8abSmrg.macro      round m,  s1, s2
10863d1a8abSmrg    ldk     \s1,0xc8
10963d1a8abSmrg    and     \s2,\m,7
11063d1a8abSmrg    lshr    \s1,\s1,\s2
11163d1a8abSmrg    and     \s1,\s1,1
11263d1a8abSmrg    lshr    \m,\m,2
11363d1a8abSmrg    add     \m,\m,\s1
11463d1a8abSmrg.endm
11563d1a8abSmrg
11663d1a8abSmrg# If NZ, set the LSB of reg
11763d1a8abSmrg.macro      sticky reg
11863d1a8abSmrg    jmpc    z,1f
11963d1a8abSmrg    or      \reg,\reg,1             # set the sticky bit to 1
12063d1a8abSmrg1:
12163d1a8abSmrg.endm
12263d1a8abSmrg
12363d1a8abSmrg##########################################################################
12463d1a8abSmrg##########################################################################
12563d1a8abSmrg## addition & subtraction
12663d1a8abSmrg
12763d1a8abSmrg#if defined(L_subsf3) || defined(L_addsub_sf)
12863d1a8abSmrg.global __subsf3
12963d1a8abSmrg__subsf3:
13063d1a8abSmrg    # this is subtraction, so we just change the sign of r1
13163d1a8abSmrg    lpm     $r2,sign_mask
13263d1a8abSmrg    xor     $r1,$r1,$r2
13363d1a8abSmrg    jmp     __addsf3
13463d1a8abSmrg#endif
13563d1a8abSmrg
13663d1a8abSmrg#if defined(L_addsf3) || defined(L_addsub_sf)
13763d1a8abSmrg.global __addsf3
13863d1a8abSmrg__addsf3:
13963d1a8abSmrg    # x in $r0, y in $r1, result z in $r0       --||| 100 instructions +/- |||--
14063d1a8abSmrg    # unpack e, calc d
14163d1a8abSmrg    bextu   $r2,$r0,(8<<5)|23   # ex in r2
14263d1a8abSmrg    bextu   $r3,$r1,(8<<5)|23   # ey in r3
14363d1a8abSmrg    sub     $r5,$r2,$r3         # d = ex - ey
14463d1a8abSmrg
14563d1a8abSmrg    # Special values are 0x00 and 0xff in ex and ey.
14663d1a8abSmrg    # If (ex&ey) != 0 or (xy|ey)=255 then there may be
14763d1a8abSmrg    # a special value.
14863d1a8abSmrg    tst     $r2,$r3
14963d1a8abSmrg    jmpc    nz,1f
15063d1a8abSmrg    jmp     slow
15163d1a8abSmrg1:  or      $r4,$r2,$r3
15263d1a8abSmrg    cmp     $r4,255
15363d1a8abSmrg    jmpc    nz,no_special_vals
15463d1a8abSmrgslow:
15563d1a8abSmrg    # Check for early exit
15663d1a8abSmrg    cmp     $r2,0
15763d1a8abSmrg    jmpc    z,test_if_not_255
15863d1a8abSmrg    cmp     $r3,0
15963d1a8abSmrg    jmpc    nz,no_early_exit
16063d1a8abSmrgtest_if_not_255:
16163d1a8abSmrg    cmp     $r2,255
16263d1a8abSmrg    jmpc    z,no_early_exit
16363d1a8abSmrg    cmp     $r3,255
16463d1a8abSmrg    jmpc    z,no_early_exit
16563d1a8abSmrg    or      $r6,$r2,$r3
16663d1a8abSmrg    cmp     $r6,0
16763d1a8abSmrg    jmpc    nz,was_not_zero
16863d1a8abSmrg    and     $r0,$r0,$r1
16963d1a8abSmrg    lpm     $r1,sign_mask
17063d1a8abSmrg    and     $r0,$r0,$r1
17163d1a8abSmrg    return
17263d1a8abSmrgwas_not_zero:
17363d1a8abSmrg    cmp     $r2,0
17463d1a8abSmrg    jmpc    nz,ret_x
17563d1a8abSmrg    move    $r0,$r1
17663d1a8abSmrg    return
17763d1a8abSmrgret_x:
17863d1a8abSmrg    return
17963d1a8abSmrgno_early_exit:
18063d1a8abSmrg    # setup to test for special values
18163d1a8abSmrg    sub     $r6,$r2,1
18263d1a8abSmrg    and     $r6,$r6,0xFE
18363d1a8abSmrg    sub     $r7,$r3,1
18463d1a8abSmrg    and     $r7,$r7,0xFE
18563d1a8abSmrg    # test for special values
18663d1a8abSmrg    cmp     $r6,$r7
18763d1a8abSmrg    jmpc    gte,ex_spec_is_gte
18863d1a8abSmrg    move    $r6,$r7
18963d1a8abSmrgex_spec_is_gte:
19063d1a8abSmrg    cmp     $r6,0xFE
19163d1a8abSmrg    jmpc    nz,no_special_vals
19263d1a8abSmrg    cmp     $r5,0
19363d1a8abSmrg    jmpc    ns,d_gte_0
19463d1a8abSmrg    cmp     $r3,0xFF
19563d1a8abSmrg    jmpc    z,ret_y
19663d1a8abSmrg    cmp     $r2,0
19763d1a8abSmrg    jmpc    z,ret_y
19863d1a8abSmrgret_y:
19963d1a8abSmrg    move    $r0,$r1
20063d1a8abSmrg    return
20163d1a8abSmrgd_gte_0:
20263d1a8abSmrg    cmp     $r5,0
20363d1a8abSmrg    jmpc    z,d_is_0
20463d1a8abSmrg    cmp     $r2,0xFF
20563d1a8abSmrg    jmpc    z,ret_x
20663d1a8abSmrg    cmp     $r3,0
20763d1a8abSmrg    jmpc    z,ret_x
20863d1a8abSmrgd_is_0:
20963d1a8abSmrg    cmp     $r2,0xFF
21063d1a8abSmrg    jmpc    nz,no_special_vals
21163d1a8abSmrg    ashl    $r6,$r0,9           # clear all except x frac
21263d1a8abSmrg    ashl    $r7,$r1,9           # clear all except y frac
21363d1a8abSmrg    or      $r6,$r6,$r7
21463d1a8abSmrg    cmp     $r6,0
21563d1a8abSmrg    jmpc    nz,ret_nan
21663d1a8abSmrg    lshr    $r4,$r0,31          # sx in r4
21763d1a8abSmrg    lshr    $r5,$r1,31          # sy in r4
21863d1a8abSmrg    cmp     $r4,$r5
21963d1a8abSmrg    jmpc    nz,ret_nan
22063d1a8abSmrg    return
22163d1a8abSmrgret_nan:
22263d1a8abSmrg    lpm     $r0,nan
22363d1a8abSmrg    return
22463d1a8abSmrgno_special_vals:
22563d1a8abSmrg    ldk     $r8,(1<<10)|(9<<5)|26   # setup implicit bit and mask for e
22663d1a8abSmrg    #----------------------
22763d1a8abSmrg    ashr    $r4,$r0,31              # sx in r4
22863d1a8abSmrg    ashl    $r0,$r0,3               # shift mx 3 for GRS bits
22963d1a8abSmrg    bins    $r0,$r0,$r8             # clear sx, ex and add implicit bit mx
23063d1a8abSmrg    # change mx to signed mantissa
23163d1a8abSmrg    to_signed $r0,$r4
23263d1a8abSmrg    #----------------------
23363d1a8abSmrg    ashr    $r4,$r1,31              # sy in r4
23463d1a8abSmrg    ashl    $r1,$r1,3               # shift my 3 for GRS bits
23563d1a8abSmrg    bins    $r1,$r1,$r8             # clear sy, ey and add implicit bit my
23663d1a8abSmrg    # change my to signed mantissa
23763d1a8abSmrg    to_signed $r1,$r4
23863d1a8abSmrg    #----------------------
23963d1a8abSmrg    # test if we swap ms based on d sign
24063d1a8abSmrg    cmp     $r5,0
24163d1a8abSmrg    jmpc    gte,noswap
24263d1a8abSmrg    # swap mx & my
24363d1a8abSmrg    xor     $r0,$r0,$r1
24463d1a8abSmrg    xor     $r1,$r0,$r1
24563d1a8abSmrg    xor     $r0,$r0,$r1
24663d1a8abSmrg    # d positive means that ex>=ey, so ez = ex
24763d1a8abSmrg    # d negative means that ey>ex, so ez = ey
24863d1a8abSmrg    move    $r2,$r3
24963d1a8abSmrg    # |d|
25063d1a8abSmrg    neg     $r5
25163d1a8abSmrgnoswap:
25263d1a8abSmrg                                    # now $r2 = ez = max(ex,ey)
25363d1a8abSmrg    cmp     $r5,26                  # max necessary alignment shift is 26
25463d1a8abSmrg    jmpc    lt,under_26
25563d1a8abSmrg    ldk     $r5,26
25663d1a8abSmrgunder_26:
25763d1a8abSmrg    ldk     $r7,-1
25863d1a8abSmrg    ashl    $r7,$r7,$r5             # create inverse of mask for test of S bit value in discarded my
25963d1a8abSmrg    not     $r7,$r7
26063d1a8abSmrg    tst     $r1,$r7                 # determine value of sticky bit
26163d1a8abSmrg    # shift my >> |d|
26263d1a8abSmrg    ashr    $r1,$r1,$r5
26363d1a8abSmrg    sticky  $r1
26463d1a8abSmrg
26563d1a8abSmrg    # add ms
26663d1a8abSmrg    add     $r0,$r0,$r1
26763d1a8abSmrg
26863d1a8abSmrg    # $r4 = sign(mx), mx = |mx|
26963d1a8abSmrg    ashr    $r4,$r0,31
27063d1a8abSmrg    xor     $r0,$r0,$r4
27163d1a8abSmrg    sub     $r0,$r0,$r4
27263d1a8abSmrg
27363d1a8abSmrg    # realign mantissa using leading zero count
27463d1a8abSmrg    flip    $r7,$r0,31
27563d1a8abSmrg    ntz     $r7,$r8
27663d1a8abSmrg    ashl    $r0,$r0,$r7
27763d1a8abSmrg    btst    $r0,(6<<5)|0            # test low bits for sticky again
27863d1a8abSmrg    lshr    $r0,$r0,6
27963d1a8abSmrg    sticky  $r0
28063d1a8abSmrg
28163d1a8abSmrg    # update exponent
28263d1a8abSmrg    add     $r2,$r2,5
28363d1a8abSmrg    sub     $r2,$r2,$r7
28463d1a8abSmrg
28563d1a8abSmrg    # Round to nearest
28663d1a8abSmrg    round   $r0,$r7,$r6
28763d1a8abSmrg
28863d1a8abSmrg    # detect_exp_update
28963d1a8abSmrg    lshr    $r6,$r0,24
29063d1a8abSmrg    add     $r2,$r2,$r6
29163d1a8abSmrg
29263d1a8abSmrg    # final tests
29363d1a8abSmrg    # mz == 0? if so, we just bail with a +0
29463d1a8abSmrg    cmp     $r0,0
29563d1a8abSmrg    jmpc    nz,msum_not_zero
29663d1a8abSmrg    ldk     $r0,0
29763d1a8abSmrg    return
29863d1a8abSmrgmsum_not_zero:
29963d1a8abSmrg    # Combined check that (1 <= ez <= 254)
30063d1a8abSmrg    sub     $r3,$r2,1
30163d1a8abSmrg    cmp     $r3,254
30263d1a8abSmrg    jmpc    b,no_special_ret
30363d1a8abSmrg    # underflow?
30463d1a8abSmrg    cmp     $r2,0
30563d1a8abSmrg    jmpc    gt,no_under
30663d1a8abSmrg    ldk     $r0,0
30763d1a8abSmrg    jmp     pack_sz
30863d1a8abSmrgno_under:
30963d1a8abSmrg    # overflow?
31063d1a8abSmrg    cmp     $r2,255
31163d1a8abSmrg    jmpc    lt,no_special_ret
31263d1a8abSmrg    ldk     $r0,0x7F8
31363d1a8abSmrg    ashl    $r0,$r0,20
31463d1a8abSmrg    jmp     pack_sz
31563d1a8abSmrgno_special_ret:
31663d1a8abSmrg    # Pack ez
31763d1a8abSmrg    ldl     $r2,$r2,(8<<5)|23
31863d1a8abSmrg    bins    $r0,$r0,$r2             # width = 8, pos = 23 pack ez
31963d1a8abSmrg    # Pack sz
32063d1a8abSmrgpack_sz:
32163d1a8abSmrg    ldl     $r4,$r4,(1<<5)|31
32263d1a8abSmrg    bins    $r0,$r0,$r4             # width = 1, pos = 31 set sz to sy
32363d1a8abSmrg    return
32463d1a8abSmrg#endif
32563d1a8abSmrg
32663d1a8abSmrg##########################################################################
32763d1a8abSmrg##########################################################################
32863d1a8abSmrg## multiplication
32963d1a8abSmrg
33063d1a8abSmrg#ifdef  L_mulsf3
33163d1a8abSmrg.global __mulsf3
33263d1a8abSmrg__mulsf3:
33363d1a8abSmrg    # x in $r0, y in $r1, result z in $r0       --||| 61 instructions +/- |||--
33463d1a8abSmrg
33563d1a8abSmrg    # unpack e
33663d1a8abSmrg    bextu   $r2,$r0,(8<<5)|23   # ex in r2
33763d1a8abSmrg    bextu   $r3,$r1,(8<<5)|23   # ey in r3
33863d1a8abSmrg    # calc result sign
33963d1a8abSmrg    xor     $r4,$r0,$r1
34063d1a8abSmrg    lpm     $r5,sign_mask
34163d1a8abSmrg    and     $r4,$r4,$r5         # sz in r4
34263d1a8abSmrg
34363d1a8abSmrg    # unpack m add implicit bit
34463d1a8abSmrg    ldk     $r5,(1<<10)|(9<<5)|23   # setup implicit bit and mask for e
34563d1a8abSmrg    #----------------------
34663d1a8abSmrg    bins    $r0,$r0,$r5             # clear sx, ex and add implicit bit mx
34763d1a8abSmrg
34863d1a8abSmrg    sub     $r6,$r2,1
34963d1a8abSmrg    cmp     $r6,254
35063d1a8abSmrg    jmpc    b,1f
35163d1a8abSmrg    jmp     slow_mul
35263d1a8abSmrg1:  sub     $r6,$r3,1
35363d1a8abSmrg    cmp     $r6,254
35463d1a8abSmrg    jmpc    b,no_special_vals_mul
35563d1a8abSmrg
35663d1a8abSmrgslow_mul:
35763d1a8abSmrg    # Check for early exit
35863d1a8abSmrg    cmp     $r2,0
35963d1a8abSmrg    jmpc    z,op_is_zero
36063d1a8abSmrg    cmp     $r3,0
36163d1a8abSmrg    jmpc    nz,no_early_exit_mul
36263d1a8abSmrgop_is_zero:
36363d1a8abSmrg    cmp     $r2,255
36463d1a8abSmrg    jmpc    z,no_early_exit_mul
36563d1a8abSmrg    cmp     $r3,255
36663d1a8abSmrg    jmpc    z,no_early_exit_mul
36763d1a8abSmrg    move    $r0,$r4
36863d1a8abSmrg    return
36963d1a8abSmrgno_early_exit_mul:
37063d1a8abSmrg    # setup to test for special values
37163d1a8abSmrg    sub     $r6,$r2,1
37263d1a8abSmrg    and     $r6,$r6,0xFE
37363d1a8abSmrg    sub     $r7,$r3,1
37463d1a8abSmrg    and     $r7,$r7,0xFE
37563d1a8abSmrg    # test for special values
37663d1a8abSmrg    cmp     $r6,$r7
37763d1a8abSmrg    jmpc    gte,ex_spec_is_gte_ey_mul
37863d1a8abSmrg    move    $r6,$r7
37963d1a8abSmrgex_spec_is_gte_ey_mul:
38063d1a8abSmrg    cmp     $r6,0xFE
38163d1a8abSmrg    jmpc    nz,no_special_vals_mul
38263d1a8abSmrg    cmp     $r2,0xFF
38363d1a8abSmrg    jmpc    nz,ex_not_FF_mul
38463d1a8abSmrg    ashl    $r6,$r0,9
38563d1a8abSmrg    cmp     $r6,0
38663d1a8abSmrg    jmpc    nz,ret_nan
38763d1a8abSmrg    cmp     $r3,0
38863d1a8abSmrg    jmpc    z,ret_nan
38963d1a8abSmrg    ashl    $r6,$r1,1
39063d1a8abSmrg    lpm     $r7,high_FF
39163d1a8abSmrg    cmp     $r6,$r7
39263d1a8abSmrg    jmpc    a,ret_nan
39363d1a8abSmrg    cmp     $r6,0
39463d1a8abSmrg    jmpc    z,ret_nan
39563d1a8abSmrg    # infinity
39663d1a8abSmrg    lpm     $r0,inf
39763d1a8abSmrg    or      $r0,$r0,$r4
39863d1a8abSmrg    return
39963d1a8abSmrgex_not_FF_mul:
40063d1a8abSmrg    cmp     $r2,0
40163d1a8abSmrg    jmpc    nz,no_nan_mul
40263d1a8abSmrg    cmp     $r3,0xFF
40363d1a8abSmrg    jmpc    nz,no_nan_mul
40463d1a8abSmrg    jmp     ret_nan
40563d1a8abSmrgno_nan_mul:
40663d1a8abSmrg    lpm     $r0,nan
40763d1a8abSmrg    and     $r0,$r0,$r1
40863d1a8abSmrg    or      $r0,$r0,$r4
40963d1a8abSmrg    return
41063d1a8abSmrg
41163d1a8abSmrgret_nan:
41263d1a8abSmrg    lpm     $r0,nan
41363d1a8abSmrg    return
41463d1a8abSmrg
41563d1a8abSmrgno_special_vals_mul:
41663d1a8abSmrg    bins    $r1,$r1,$r5         # clear sy, ey and add implicit bit my
41763d1a8abSmrg    # calc ez
41863d1a8abSmrg    add     $r3,$r2,$r3
41963d1a8abSmrg    sub     $r3,$r3,127         # ez in r3
42063d1a8abSmrg
42163d1a8abSmrg    # (r1,r2) = R0 * R1
42263d1a8abSmrg    mul     $r2,$r0,$r1
42363d1a8abSmrg    muluh   $r1,$r0,$r1
42463d1a8abSmrg
42563d1a8abSmrg    btst    $r1,(1<<5)|15       # XXX use jmpx
42663d1a8abSmrg    jmpc    z,mul_z0
42763d1a8abSmrg
42863d1a8abSmrg    # mz is 1X.XX...X
42963d1a8abSmrg    # 48-bit product is in (r1,r2). The low 22 bits of r2
43063d1a8abSmrg    # are discarded.
43163d1a8abSmrg    lshr    $r0,$r2,22
43263d1a8abSmrg    ashl    $r1,$r1,10
43363d1a8abSmrg    or      $r0,$r0,$r1         # r0 = (r1,r2) >> 22
43463d1a8abSmrg    ashlcc  2,10
43563d1a8abSmrg    sticky  $r0
43663d1a8abSmrg    add     $r3,$r3,1           # bump exponent
43763d1a8abSmrg
43863d1a8abSmrg    # Round to nearest
43963d1a8abSmrg    round   $r0, $r1, $r2
44063d1a8abSmrg    lshr    $r6,$r0,24
44163d1a8abSmrg    add     $r3,$r3,$r6
44263d1a8abSmrg
44363d1a8abSmrg    sub     $r6,$r3,1
44463d1a8abSmrg    cmp     $r6,254
44563d1a8abSmrg    jmpc    b,no_special_ret_mul
44663d1a8abSmrg
44763d1a8abSmrgspecial_ret_mul:
44863d1a8abSmrg    # When the final exponent <= 0, result is flushed to 0 except
44963d1a8abSmrg    # for the border case 0x00FFFFFF which is promoted to next higher
45063d1a8abSmrg    # FP no., that is, the smallest "normalized" number.
45163d1a8abSmrg    cmp     $r3,0
45263d1a8abSmrg    jmpc    gt,exp_normal
45363d1a8abSmrg    # Pack ez
45463d1a8abSmrg    ldl     $r3,$r3,(8<<5)|23
45563d1a8abSmrg    bins    $r0,$r0,$r3 # width = 8, pos = 23 pack ez
45663d1a8abSmrg    lpm     $r2,edge_case
45763d1a8abSmrg    cmp     $r0,$r2
45863d1a8abSmrg    jmpc    nz,no_edge_case
45963d1a8abSmrg    lpm     $r0,smallest_norm
46063d1a8abSmrg    jmp     pack_sz_mul
46163d1a8abSmrgno_edge_case:
46263d1a8abSmrg    ldk     $r0,0
46363d1a8abSmrg    jmp     pack_sz_mul
46463d1a8abSmrgexp_normal:
46563d1a8abSmrg    # overflow?
46663d1a8abSmrg    cmp     $r3,255
46763d1a8abSmrg    jmpc    lt,no_special_ret_mul
46863d1a8abSmrg    ldk     $r0,0x7F8
46963d1a8abSmrg    ashl    $r0,$r0,20
47063d1a8abSmrg    jmp     pack_sz_mul
47163d1a8abSmrgno_special_ret_mul:
47263d1a8abSmrg    # Pack ez
47363d1a8abSmrg    ldl     $r3,$r3,(8<<5)|23
47463d1a8abSmrg    bins    $r0,$r0,$r3 # width = 8, pos = 23 pack ez
47563d1a8abSmrg    # Pack sz
47663d1a8abSmrgpack_sz_mul:
47763d1a8abSmrg    or    $r0,$r0,$r4
47863d1a8abSmrg    return
47963d1a8abSmrg
48063d1a8abSmrgmul_z0:
48163d1a8abSmrg    # mz is 0X.XX...X
48263d1a8abSmrg    # 48-bit product is in (r1,r2). The low 21 bits of r2
48363d1a8abSmrg    # are discarded.
48463d1a8abSmrg    lshr    $r0,$r2,21
48563d1a8abSmrg    ashl    $r1,$r1,11
48663d1a8abSmrg    or      $r0,$r0,$r1         # r0 = (r1,r2) >> 22
48763d1a8abSmrg    ashlcc  2,11
48863d1a8abSmrg    sticky  $r0
48963d1a8abSmrg    # Round to nearest
49063d1a8abSmrg    round   $r0, $r1, $r2
49163d1a8abSmrg    lshr    $r6,$r0,24
49263d1a8abSmrg    add     $r3,$r3,$r6
49363d1a8abSmrg
49463d1a8abSmrg    sub     $r6,$r3,1
49563d1a8abSmrg    cmp     $r6,254
49663d1a8abSmrg    jmpc    b,no_special_ret_mul
49763d1a8abSmrg    jmp     special_ret_mul
49863d1a8abSmrg#endif
49963d1a8abSmrg
50063d1a8abSmrg##########################################################################
50163d1a8abSmrg##########################################################################
50263d1a8abSmrg## division
50363d1a8abSmrg
50463d1a8abSmrg## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
50563d1a8abSmrg## for implementation details
50663d1a8abSmrg
50763d1a8abSmrg
50863d1a8abSmrg
50963d1a8abSmrg
51063d1a8abSmrg#ifdef  L_divsf3
51163d1a8abSmrgdc_1: .long             0xffffe7d7
51263d1a8abSmrgdc_2: .long             0xffffffe8
51363d1a8abSmrgdc_3: .long             0xffbad86f
51463d1a8abSmrgdc_4: .long             0xfffbece7
51563d1a8abSmrgdc_5: .long             0xf3672b51
51663d1a8abSmrgdc_6: .long             0xfd9d3a3e
51763d1a8abSmrgdc_7: .long             0x9a3c4390
51863d1a8abSmrgdc_8: .long             0xd4d2ce9b
51963d1a8abSmrgdc_9: .long             0x1bba92b3
52063d1a8abSmrgdc_10: .long            0x525a1a8b
52163d1a8abSmrgdc_11: .long            0x0452b1bf
52263d1a8abSmrgdc_12: .long            0xFFFFFFC0
52363d1a8abSmrgspec_val_test:  .long   0x7F7FFFFF
52463d1a8abSmrg
52563d1a8abSmrg.global __divsf3
52663d1a8abSmrg__divsf3:
52763d1a8abSmrg    push    $r13
52863d1a8abSmrg    # x in $r0, y in $r1, result z in $r0       --||| 73 instructions +/- |||-
52963d1a8abSmrg    bextu   $r10,$r0,(8<<5)|23   # ex in r2
53063d1a8abSmrg    bextu   $r11,$r1,(8<<5)|23   # ey in r3
53163d1a8abSmrg    lpm     $r6, m_mask
53263d1a8abSmrg    and     $r2, $r0, $r6        # mx
53363d1a8abSmrg    and     $r3, $r1, $r6        # my
53463d1a8abSmrg    cmp     $r2,$r3
53563d1a8abSmrg    bextu   $r2,$r30,(1<<5)|4   # c = Tx >= T;
53663d1a8abSmrg    ashl    $r3,$r3,9           # T = X << 9;
53763d1a8abSmrg    lpm     $r13, sign_mask
53863d1a8abSmrg    ashl    $r4,$r0,8           # X8 = X << 8;
53963d1a8abSmrg    or      $r4,$r4,$r13        # Mx = X8 | 0x80000000;
54063d1a8abSmrg    lshr    $r5,$r4,$r2         # S = Mx >> c;
54163d1a8abSmrg    # calc D
54263d1a8abSmrg    sub     $r2, $r11, $r2
54363d1a8abSmrg    add     $r12, $r10, 125
54463d1a8abSmrg    sub     $r2, $r12, $r2      # int D = (Ex + 125) - (Ey - c);
54563d1a8abSmrg    # calc result sign
54663d1a8abSmrg    xor     $r12,$r0,$r1
54763d1a8abSmrg    and     $r12,$r12,$r13      # Sr = ( X ˆ Y ) & 0x80000000;
54863d1a8abSmrg    # check early exit
54963d1a8abSmrg    cmp     $r10, 0
55063d1a8abSmrg    jmpc    nz, no_early_ret_dev
55163d1a8abSmrg    cmp     $r11, 0
55263d1a8abSmrg    jmpc    z, no_early_ret_dev
55363d1a8abSmrg    cmp     $r11, 255
55463d1a8abSmrg    jmpc    z, no_early_ret_dev
55563d1a8abSmrg    move    $r0, $r12
55663d1a8abSmrg    pop     $r13
55763d1a8abSmrg    return
55863d1a8abSmrgno_early_ret_dev:
55963d1a8abSmrg # setup to test for special values
56063d1a8abSmrg    sub     $r8,$r10,1
56163d1a8abSmrg    and     $r8,$r8,0xFE
56263d1a8abSmrg    sub     $r9,$r11,1
56363d1a8abSmrg    and     $r9,$r9,0xFE
56463d1a8abSmrg    # test for special values
56563d1a8abSmrg    cmp     $r8, $r9
56663d1a8abSmrg    jmpc    gte, absXm1_gte_absYm1
56763d1a8abSmrg    move    $r8, $r9
56863d1a8abSmrgabsXm1_gte_absYm1:
56963d1a8abSmrg    cmp     $r8, 0xFE
57063d1a8abSmrg    jmpc    nz, no_spec_ret_div
57163d1a8abSmrg    cmp     $r10, 0xFF
57263d1a8abSmrg    jmpc    nz, ex_not_FF_div
57363d1a8abSmrg    lpm     $r6, m_mask
57463d1a8abSmrg    and     $r2, $r0, $r6        # mx
57563d1a8abSmrg    cmp     $r2, 0
57663d1a8abSmrg    jmpc    nz, ret_nan_div
57763d1a8abSmrg    cmp     $r11, 0xFF
57863d1a8abSmrg    jmpc    z, ret_nan_div
57963d1a8abSmrg    jmp     ret_inf_div
58063d1a8abSmrgex_not_FF_div:
58163d1a8abSmrg    cmp     $r11, 0xFF
58263d1a8abSmrg    jmpc    nz, ey_not_FF_div
58363d1a8abSmrg    ashl    $r13, $r1, 9
58463d1a8abSmrg    cmp     $r13, 0
58563d1a8abSmrg    jmpc    nz, ret_nan_div
58663d1a8abSmrg    move    $r0, $r12
58763d1a8abSmrg    pop     $r13
58863d1a8abSmrg    return
58963d1a8abSmrgey_not_FF_div:
59063d1a8abSmrg    or      $r10, $r10, $r11
59163d1a8abSmrg    cmp     $r10, 0
59263d1a8abSmrg    jmpc    z, ret_nan_div
59363d1a8abSmrgret_inf_div:
59463d1a8abSmrg    lpm     $r6, inf
59563d1a8abSmrg    move    $r0, $r6
59663d1a8abSmrg    or      $r0, $r0, $r12
59763d1a8abSmrg    pop     $r13
59863d1a8abSmrg    return
59963d1a8abSmrgret_nan_div:
60063d1a8abSmrg    lpm     $r0, nan
60163d1a8abSmrg    pop     $r13
60263d1a8abSmrg    return
60363d1a8abSmrg
60463d1a8abSmrgno_spec_ret_div:
60563d1a8abSmrg# check for overflow
60663d1a8abSmrg    ldk     $r6, 0xFE
60763d1a8abSmrg    cmp     $r2, $r6
60863d1a8abSmrg    jmpc    lt, no_overflow_div
60963d1a8abSmrg    lpm     $r6, inf
61063d1a8abSmrg    or      $r0, $r12, $r6
61163d1a8abSmrg    pop     $r13
61263d1a8abSmrg    return
61363d1a8abSmrgno_overflow_div:
61463d1a8abSmrg# check for underflow
61563d1a8abSmrg    cmp     $r2, 0
61663d1a8abSmrg    jmpc    ns, no_underflow_div
61763d1a8abSmrg    xnor    $r6, $r6, $r6       # -1
61863d1a8abSmrg    cmp     $r2, $r6
61963d1a8abSmrg    jmpc    nz, ret_sr_div
62063d1a8abSmrg    ldk     $r7, 0xFF
62163d1a8abSmrg    xor     $r6, $r6, $r7       # 0xFF ^ -1 = 0xFFFFFF00
62263d1a8abSmrg    cmp     $r4, $r6
62363d1a8abSmrg    jmpc    nz, ret_sr_div
62463d1a8abSmrg    lpm     $r6, sign_mask
62563d1a8abSmrg    cmp     $r4, $r6
62663d1a8abSmrg    jmpc    nz, ret_sr_div
62763d1a8abSmrg    lshr    $r0, $r6, 8
62863d1a8abSmrg    or      $r0, $r0, $r12
62963d1a8abSmrg    pop     $r13
63063d1a8abSmrg    return
63163d1a8abSmrgret_sr_div:
63263d1a8abSmrg    move    $r0, $r12
63363d1a8abSmrg    pop     $r13
63463d1a8abSmrg    return
63563d1a8abSmrgno_underflow_div:
63663d1a8abSmrg    lpm     $r6, dc_1
63763d1a8abSmrg    muluh   $r7, $r3, $r6       # i0 = mul( T , 0xffffe7d7 );
63863d1a8abSmrg    lpm     $r6, dc_2
63963d1a8abSmrg    sub     $r7, $r6, $r7       # i1 = 0xffffffe8 - i0;
64063d1a8abSmrg    muluh   $r7, $r5, $r7       # i2 = mul( S , i1 );
64163d1a8abSmrg    add     $r7, $r7, 0x20      # i3 = 0x00000020 + i2;
64263d1a8abSmrg    muluh   $r8, $r3, $r3       # i4 = mul( T , T );
64363d1a8abSmrg    muluh   $r9, $r5, $r8       # i5 = mul( S , i4 );
64463d1a8abSmrg    lpm     $r6, dc_3
64563d1a8abSmrg    muluh   $r10, $r3, $r6      # i6 = mul( T , 0xffbad86f );
64663d1a8abSmrg    lpm     $r6, dc_4
64763d1a8abSmrg    sub     $r10, $r6, $r10     # i7 = 0xfffbece7 - i6;
64863d1a8abSmrg    muluh   $r10, $r9, $r10     # i8 = mul( i5 , i7 );
64963d1a8abSmrg    add     $r7, $r7, $r10      # i9 = i3 + i8;
65063d1a8abSmrg    muluh   $r9, $r8, $r9       # i10 = mul( i4 , i5 );
65163d1a8abSmrg    lpm     $r6, dc_5
65263d1a8abSmrg    muluh   $r10, $r3, $r6      # i11 = mul( T , 0xf3672b51 );
65363d1a8abSmrg    lpm     $r6, dc_6
65463d1a8abSmrg    sub     $r10, $r6, $r10     # i12 = 0xfd9d3a3e - i11;
65563d1a8abSmrg    lpm     $r6, dc_7
65663d1a8abSmrg    muluh   $r11, $r3, $r6      # i13 = mul( T , 0x9a3c4390 );
65763d1a8abSmrg    lpm     $r6, dc_8
65863d1a8abSmrg    sub     $r11, $r6, $r11     # i14 = 0xd4d2ce9b - i13
65963d1a8abSmrg    muluh   $r11, $r8, $r11     # i15 = mul( i4 , i14 );
66063d1a8abSmrg    add     $r10, $r10, $r11    # i16 = i12 + i15;
66163d1a8abSmrg    muluh   $r10, $r9, $r10     # i17 = mul( i10 , i16 )
66263d1a8abSmrg    add     $r7, $r7, $r10      # i18 = i9 + i17;
66363d1a8abSmrg    muluh   $r10, $r8, $r8      # i19 = mul( i4 , i4 );
66463d1a8abSmrg    lpm     $r6, dc_9
66563d1a8abSmrg    muluh   $r11, $r3, $r6      # i20 = mul( T , 0x1bba92b3 );
66663d1a8abSmrg    lpm     $r6, dc_10
66763d1a8abSmrg    sub     $r11, $r6, $r11     # i21 = 0x525a1a8b - i20;
66863d1a8abSmrg    lpm     $r6, dc_11
66963d1a8abSmrg    muluh   $r8, $r8, $r6       # i22 = mul( i4 , 0x0452b1bf );
67063d1a8abSmrg    add     $r8, $r11, $r8      # i23 = i21 + i22;
67163d1a8abSmrg    muluh   $r8, $r10, $r8      # i24 = mul( i19 , i23 );
67263d1a8abSmrg    muluh   $r8, $r9, $r8       # i25 = mul( i10 , i24 );
67363d1a8abSmrg    add     $r3, $r7, $r8       # V = i18 + i25;
67463d1a8abSmrg# W = V & 0xFFFFFFC0;
67563d1a8abSmrg    lpm     $r6, dc_12
67663d1a8abSmrg    and     $r3, $r3, $r6   # W
67763d1a8abSmrg# round and pack final values
67863d1a8abSmrg    ashl    $r0, $r2, 23        # pack D
67963d1a8abSmrg    or      $r0, $r0, $r12      # pack Sr
68063d1a8abSmrg    ashl    $r12, $r1, 8
68163d1a8abSmrg    or      $r12, $r12, $r13    # My
68263d1a8abSmrg    muluh   $r10, $r3, $r12
68363d1a8abSmrg    lshr    $r11, $r5, 1
68463d1a8abSmrg    cmp     $r10, $r11
68563d1a8abSmrg    jmpc    gte, div_ret_1
68663d1a8abSmrg    add     $r3, $r3, 0x40
68763d1a8abSmrgdiv_ret_1:
68863d1a8abSmrg    lshr    $r3, $r3, 7
68963d1a8abSmrg    add     $r0, $r0, $r3
69063d1a8abSmrg    pop     $r13
69163d1a8abSmrg    return
69263d1a8abSmrg#endif
69363d1a8abSmrg
69463d1a8abSmrg##########################################################################
69563d1a8abSmrg##########################################################################
69663d1a8abSmrg## Negate
69763d1a8abSmrg
69863d1a8abSmrg#ifdef L_negsf
69963d1a8abSmrg.global __negsf
70063d1a8abSmrg__negsf:
70163d1a8abSmrg    lpm     $r1, sign_mask
70263d1a8abSmrg    xor     $r0, $r0, $r1
70363d1a8abSmrg    return
70463d1a8abSmrg#endif
70563d1a8abSmrg
70663d1a8abSmrg##########################################################################
70763d1a8abSmrg##########################################################################
70863d1a8abSmrg## float to int & unsigned int
70963d1a8abSmrg
71063d1a8abSmrg#ifdef L_fixsfsi
71163d1a8abSmrg.global __fixsfsi
71263d1a8abSmrg__fixsfsi: # 20 instructions
71363d1a8abSmrg    bextu   $r1,$r0,(8<<5)|23   # e in r1
71463d1a8abSmrg    lshr    $r2,$r0,31          # s in r2
71563d1a8abSmrg    lpm     $r3, m_mask
71663d1a8abSmrg    and     $r0,$r0,$r3         # m in r0
71763d1a8abSmrg    # test nan
71863d1a8abSmrg    cmp     $r1,0xFF
71963d1a8abSmrg    jmpc    nz, int_not_nan
72063d1a8abSmrg    cmp     $r0,0
72163d1a8abSmrg    jmpc    z, int_not_nan
72263d1a8abSmrg    ldk     $r0,0
72363d1a8abSmrg    return
72463d1a8abSmrgint_not_nan:
72563d1a8abSmrg    # test edges
72663d1a8abSmrg    cmp     $r1, 127
72763d1a8abSmrg    jmpc    gte, int_not_zero   # lower limit
72863d1a8abSmrg    ldk     $r0,0
72963d1a8abSmrg    return
73063d1a8abSmrgint_not_zero:
73163d1a8abSmrg    cmp     $r1, 158
73263d1a8abSmrg    jmpc    lt, int_not_max    # upper limit
73363d1a8abSmrg    lpm     $r0, nan
73463d1a8abSmrg    cmp     $r2, 0
73563d1a8abSmrg    jmpc    z, int_positive
73663d1a8abSmrg    xnor    $r0, $r0, 0
73763d1a8abSmrg    return
73863d1a8abSmrgint_not_max:
73963d1a8abSmrg    lpm     $r3, smallest_norm
74063d1a8abSmrg    or      $r0, $r0, $r3       # set implicit bit
74163d1a8abSmrg    sub     $r1, $r1, 150
74263d1a8abSmrg    cmp     $r1, 0
74363d1a8abSmrg    jmpc    s, shift_right
74463d1a8abSmrg    ashl    $r0, $r0, $r1
74563d1a8abSmrg    jmp     set_int_sign
74663d1a8abSmrgshift_right:
74763d1a8abSmrg    xnor    $r1, $r1, 0
74863d1a8abSmrg    add     $r1, $r1, 1
74963d1a8abSmrg    lshr    $r0, $r0, $r1
75063d1a8abSmrgset_int_sign:
75163d1a8abSmrg    cmp     $r2, 0
75263d1a8abSmrg    jmpc    z, int_positive
75363d1a8abSmrg    xnor    $r0, $r0, 0
75463d1a8abSmrg    add     $r0, $r0, 1
75563d1a8abSmrgint_positive:
75663d1a8abSmrg    return
75763d1a8abSmrg#endif
75863d1a8abSmrg
75963d1a8abSmrg#ifdef L_fixunssfsi
76063d1a8abSmrg.global __fixunssfsi
76163d1a8abSmrg__fixunssfsi: # 19 instructions
76263d1a8abSmrg    lshr    $r2, $r0, 31          # s in r2
76363d1a8abSmrg    cmp     $r2, 0
76463d1a8abSmrg    jmpc    z, uint_not_neg
76563d1a8abSmrg    ldk     $r0, 0
76663d1a8abSmrg    return
76763d1a8abSmrguint_not_neg:
76863d1a8abSmrg    bextu   $r1, $r0, (8<<5)|23   # e in r1
76963d1a8abSmrg    sub     $r1, $r1, 127
77063d1a8abSmrg    lpm     $r3, m_mask
77163d1a8abSmrg    and     $r0, $r0, $r3         # m in r0
77263d1a8abSmrg    # test nan
77363d1a8abSmrg    cmp     $r1, 0xFF
77463d1a8abSmrg    jmpc    nz, uint_not_nan
77563d1a8abSmrg    cmp     $r0, 0
77663d1a8abSmrg    jmpc    z, uint_not_nan
77763d1a8abSmrg    ldk     $r0, 0
77863d1a8abSmrg    return
77963d1a8abSmrguint_not_nan:
78063d1a8abSmrg    # test edges
78163d1a8abSmrg    cmp     $r1, 0
78263d1a8abSmrg    jmpc    ns, uint_not_zero   # lower limit
78363d1a8abSmrg    ldk     $r0, 0
78463d1a8abSmrg    return
78563d1a8abSmrguint_not_zero:
78663d1a8abSmrg    lpm     $r3, smallest_norm
78763d1a8abSmrg    or      $r0, $r0, $r3       # set implicit bit
78863d1a8abSmrg    cmp     $r1, 23
78963d1a8abSmrg    jmpc    lt, shift_uint_right
79063d1a8abSmrg    sub     $r1, $r1, 23
79163d1a8abSmrg    ashl    $r0, $r0, $r1
79263d1a8abSmrg    return
79363d1a8abSmrgshift_uint_right:
79463d1a8abSmrg    ldk     $r3, 23
79563d1a8abSmrg    sub     $r1, $r3, $r1
79663d1a8abSmrg    lshr    $r0, $r0, $r1
79763d1a8abSmrg    return
79863d1a8abSmrg#endif
79963d1a8abSmrg
80063d1a8abSmrg##########################################################################
80163d1a8abSmrg##########################################################################
80263d1a8abSmrg## int & unsigned int to float
80363d1a8abSmrg
80463d1a8abSmrg
80563d1a8abSmrg.macro  i2f x, s1, s2, s3, lbl
80663d1a8abSmrg    move    \s1, \x
80763d1a8abSmrg    nlz     \s1, \s2
80863d1a8abSmrg    cmp     \s1, 8
80963d1a8abSmrg    jmpc    s, float_round\lbl
81063d1a8abSmrg    sub     \s2, \s1, 8
81163d1a8abSmrg    ashl    \x, \x, \s2
81263d1a8abSmrg    jmp     float_no_round\lbl
81363d1a8abSmrgfloat_round\lbl:
81463d1a8abSmrg    cmp     \s1, 6
81563d1a8abSmrg    jmpc    s, float_shift_right\lbl
81663d1a8abSmrg    sub     \s2, \s1, 6
81763d1a8abSmrg    ashl    \x, \x, \s2
81863d1a8abSmrg    jmp     float_round_and_pack\lbl
81963d1a8abSmrgfloat_shift_right\lbl:
82063d1a8abSmrg    ldk     \s2, 6
82163d1a8abSmrg    sub     \s2, \s2, \s1
82263d1a8abSmrg    xnor    \s3, \s3 ,\s3           # 0xFFFFFFFF
82363d1a8abSmrg    ashl    \s3, \s3 ,\s2           # create inverse of mask for test of S bit value in discarded my
82463d1a8abSmrg    xnor    \s3, \s3 ,0             # NOT
82563d1a8abSmrg    tst     \x, \s3                # determine value of sticky bit
82663d1a8abSmrg    lshr    \x, \x, \s2
82763d1a8abSmrg    jmpc    z,float_round_and_pack\lbl
82863d1a8abSmrg    or      \x, \x, 1               # set the sticky bit to 1
82963d1a8abSmrgfloat_round_and_pack\lbl:
83063d1a8abSmrg    bextu   \s2, \x, (1<<5)|2      # extract low bit of m
83163d1a8abSmrg    or      \x, \x, \s2           # or p into r
83263d1a8abSmrg    add     \x, \x, 1
83363d1a8abSmrg    lshr    \x, \x, 2
83463d1a8abSmrg    btst    \x, (1<<5)|24          # test for carry from round
83563d1a8abSmrg    jmpc    z, float_no_round\lbl
83663d1a8abSmrg    sub     \s1, \s1, 1             # inc e for carry (actually dec nlz)
83763d1a8abSmrg    lshr    \x, \x, 1
83863d1a8abSmrgfloat_no_round\lbl:
83963d1a8abSmrg    ldk     \s2, 158
84063d1a8abSmrg    sub     \s1, \s2, \s1
84163d1a8abSmrg    # Pack e
84263d1a8abSmrg    ldl     \s1, \s1, (8<<5)|23
84363d1a8abSmrg    bins    \x, \x, \s1
84463d1a8abSmrg.endm
84563d1a8abSmrg
84663d1a8abSmrg
84763d1a8abSmrg#ifdef L_floatsisf
84863d1a8abSmrg.global __floatsisf
84963d1a8abSmrg__floatsisf:                       # 32 instructions
85063d1a8abSmrg    cmp     $r0, 0
85163d1a8abSmrg    jmpc    nz, float_not_zero
85263d1a8abSmrg    return
85363d1a8abSmrgfloat_not_zero:
85463d1a8abSmrg    ashr    $r1, $r0, 31            # s in r1
85563d1a8abSmrg    xor     $r0, $r0, $r1           # cond neg
85663d1a8abSmrg    sub     $r0, $r0, $r1
85763d1a8abSmrg    i2f     $r0, $r2, $r3, $r4, 1
85863d1a8abSmrg    ldl     $r1, $r1, (1<<5)|31
85963d1a8abSmrg    bins    $r0, $r0, $r1
86063d1a8abSmrg    return
86163d1a8abSmrg#endif
86263d1a8abSmrg
86363d1a8abSmrg#ifdef L_floatunsisf
86463d1a8abSmrg.global __floatunsisf
86563d1a8abSmrg__floatunsisf:                        # 26 instructions
86663d1a8abSmrg    cmp     $r0, 0
86763d1a8abSmrg    jmpc    nz, float_not_zero2
86863d1a8abSmrg    return
86963d1a8abSmrgfloat_not_zero2:
87063d1a8abSmrg    i2f     $r0, $r1, $r2, $r3, 2
87163d1a8abSmrg    return
87263d1a8abSmrg#endif
87363d1a8abSmrg
87463d1a8abSmrg#if 0
87563d1a8abSmrg##########################################################################
87663d1a8abSmrg##########################################################################
87763d1a8abSmrg## float compare
87863d1a8abSmrg
87963d1a8abSmrg
88063d1a8abSmrg__cmpsf2_:
88163d1a8abSmrg    # calc abs vals
88263d1a8abSmrg    lpm     $r3, nan                # also abs mask
88363d1a8abSmrg    and     $r2, $r0, $r3
88463d1a8abSmrg    and     $r3, $r1, $r3
88563d1a8abSmrg    # test if either abs is nan
88663d1a8abSmrg    lpm     $r4, inf
88763d1a8abSmrg    cmp     $r2, $r4
88863d1a8abSmrg    jmpc    gt, cmp_is_gt
88963d1a8abSmrg    cmp     $r3, $r4
89063d1a8abSmrg    jmpc    gt, cmp_is_gt
89163d1a8abSmrg    # test if both are 0
89263d1a8abSmrg    or      $r2, $r2, $r3
89363d1a8abSmrg    cmp     $r2, 0
89463d1a8abSmrg    jmpc    z, cmp_is_eq
89563d1a8abSmrg    # test if eq
89663d1a8abSmrg    cmp     $r0, $r1
89763d1a8abSmrg    jmpc    z, cmp_is_eq
89863d1a8abSmrg    # -- if either is pos
89963d1a8abSmrg    and     $r2, $r0, $r1
90063d1a8abSmrg    cmp     $r2, 0
90163d1a8abSmrg    jmpc    s, cmp_both_neg
90263d1a8abSmrg    cmp     $r0, $r1
90363d1a8abSmrg    jmpc    gt, cmp_is_gt
90463d1a8abSmrg    # r0 < r1
90563d1a8abSmrg    lpm     $r0, high_uint
90663d1a8abSmrg    return
90763d1a8abSmrgcmp_both_neg:
90863d1a8abSmrg    cmp     $r0, $r1
90963d1a8abSmrg    jmpc    lt, cmp_is_gt
91063d1a8abSmrg    # r0 < r1
91163d1a8abSmrg    lpm     $r0, high_uint
91263d1a8abSmrg    return
91363d1a8abSmrgcmp_is_gt:
91463d1a8abSmrg    ldk     $r0, 1
91563d1a8abSmrg    return
91663d1a8abSmrgcmp_is_eq:
91763d1a8abSmrg    ldk     $r0, 0
91863d1a8abSmrg    return
91963d1a8abSmrg#endif
92063d1a8abSmrg
92163d1a8abSmrg#ifdef  L_udivsi3
92263d1a8abSmrg.global __udivsi3
92363d1a8abSmrg__udivsi3:
92463d1a8abSmrg	# $r0 is dividend
92563d1a8abSmrg	# $r1 is divisor
92663d1a8abSmrg	ldk	$r2,0
92763d1a8abSmrg	push	$r28
92863d1a8abSmrg	ldk	$r28,-32
92963d1a8abSmrg0:
93063d1a8abSmrg	lshr	$r3,$r0,31	# Shift $r2:$r0 left one
93163d1a8abSmrg	ashl	$r0,$r0,1
93263d1a8abSmrg	ashl	$r2,$r2,1
93363d1a8abSmrg	or	$r2,$r2,$r3
93463d1a8abSmrg	cmp	$r2,$r1
93563d1a8abSmrg	jmpc	b,1f
93663d1a8abSmrg2:
93763d1a8abSmrg	sub	$r2,$r2,$r1
93863d1a8abSmrg	add	$r0,$r0,1
93963d1a8abSmrg1:
94063d1a8abSmrg	add	$r28,$r28,1
94163d1a8abSmrg	jmpx	31,$r28,1,0b
94263d1a8abSmrg	pop	$r28
94363d1a8abSmrg	# $r0: quotient
94463d1a8abSmrg	# $r2: remainder
94563d1a8abSmrg	return
94663d1a8abSmrg#endif
94763d1a8abSmrg
94863d1a8abSmrg#ifdef	L_umodsi3
94963d1a8abSmrg.global	__umodsi3
95063d1a8abSmrg__umodsi3:
95163d1a8abSmrg	call	__udivsi3
95263d1a8abSmrg	move	$r0,$r2
95363d1a8abSmrg	return
95463d1a8abSmrg#endif
95563d1a8abSmrg
95663d1a8abSmrg#ifdef	L_divsi3
95763d1a8abSmrg.global	__divsi3
95863d1a8abSmrg__divsi3:
95963d1a8abSmrg	xor	$r5,$r0,$r1	# $r5 is sign of result
96063d1a8abSmrg	ashr	$r2,$r0,31	# $r0 = abs($r0)
96163d1a8abSmrg	xor	$r0,$r0,$r2
96263d1a8abSmrg	sub	$r0,$r0,$r2
96363d1a8abSmrg	ashr	$r2,$r1,31	# $r1 = abs($r1)
96463d1a8abSmrg	xor	$r1,$r1,$r2
96563d1a8abSmrg	sub	$r1,$r1,$r2
96663d1a8abSmrg	call	__udivsi3
96763d1a8abSmrg	ashr	$r5,$r5,31
96863d1a8abSmrg	xor	$r0,$r0,$r5
96963d1a8abSmrg	sub	$r0,$r0,$r5
97063d1a8abSmrg	return
97163d1a8abSmrg
97263d1a8abSmrg#endif
97363d1a8abSmrg
97463d1a8abSmrg#ifdef	L_modsi3
97563d1a8abSmrg.global	__modsi3
97663d1a8abSmrg__modsi3:
97763d1a8abSmrg	move	$r5,$r0		# $r5 is sign of result
97863d1a8abSmrg	ashr	$r2,$r0,31	# $r0 = abs($r0)
97963d1a8abSmrg	xor	$r0,$r0,$r2
98063d1a8abSmrg	sub	$r0,$r0,$r2
98163d1a8abSmrg	ashr	$r2,$r1,31	# $r1 = abs($r1)
98263d1a8abSmrg	xor	$r1,$r1,$r2
98363d1a8abSmrg	sub	$r1,$r1,$r2
98463d1a8abSmrg	call	__umodsi3
98563d1a8abSmrg	ashr	$r5,$r5,31
98663d1a8abSmrg	xor	$r0,$r0,$r5
98763d1a8abSmrg	sub	$r0,$r0,$r5
98863d1a8abSmrg	return
98963d1a8abSmrg#endif
990