1module Float where
2
3primitive type ValidFloat : # -> # -> Prop
4
5/** IEEE-754 floating point numbers. */
6primitive type { exponent : #, precision : #}
7  ValidFloat exponent precision => Float exponent precision : *
8
9/** An abbreviation for common 16-bit floating point numbers. */
10type Float16  = Float 5 11
11
12/** An abbreviation for common 32-bit floating point numbers. */
13type Float32  = Float 8 24
14
15/** An abbreviation for common 64-bit floating point numbers. */
16type Float64  = Float 11 53
17
18/** An abbreviation for common 128-bit floating point numbers. */
19type Float128 = Float 15 113
20
21/** An abbreviation for common 256-bit floating point numbers. */
22type Float256 = Float 19 237
23
24
25
26/* ----------------------------------------------------------------------
27 * Rounding modes (this should be an enumeration type, when we add these)
28 *---------------------------------------------------------------------- */
29
30/**
31 * A 'RoundingMode' is used to specify the precise behavior of some
32 * floating point primitives.
33 *
34 * There are five valid 'RoundingMode' values:
35 *  * roundNearestEven
36 *  * roundNearestAway
37 *  * roundPositive
38 *  * roundNegative
39 *  * roundZero
40 */
41type RoundingMode = [3]
42
43/** Round toward nearest, ties go to even. */
44roundNearestEven, rne : RoundingMode
45roundNearestEven = 0
46rne              = roundNearestEven
47
48/** Round toward nearest, ties away from zero. */
49roundNearestAway, rna : RoundingMode
50roundNearestAway  = 1
51rna               = roundNearestAway
52
53/** Round toward positive infinity. */
54roundPositive, rtp : RoundingMode
55roundPositive     = 2
56rtp               = roundPositive
57
58/** Round toward negative infinity. */
59roundNegative, rtn : RoundingMode
60roundNegative     = 3
61rtn               = roundNegative
62
63/** Round toward zero. */
64roundZero, rtz : RoundingMode
65roundZero         = 4
66rtz               = roundZero
67
68
69
70/* ----------------------------------------------------------------------
71 * Constants
72 * ---------------------------------------------------------------------- */
73
74/** Not a number. */
75primitive
76  fpNaN : {e,p} ValidFloat e p => Float e p
77
78/** Positive infinity. */
79primitive
80  fpPosInf : {e,p} ValidFloat e p => Float e p
81
82/** Negative infinity. */
83fpNegInf : {e,p} ValidFloat e p => Float e p
84fpNegInf = - fpPosInf
85
86/** Positive zero. */
87fpPosZero : {e,p} ValidFloat e p => Float e p
88fpPosZero = zero
89
90/** Negative zero. */
91fpNegZero : {e,p} ValidFloat e p => Float e p
92fpNegZero = - fpPosZero
93
94
95// Binary representations
96
97/** A floating point number using the exact bit pattern,
98in IEEE interchange format with layout:
99
100  (sign : [1]) # (biased_exponent : [e]) # (significand : [p-1])
101*/
102primitive
103  fpFromBits : {e,p} ValidFloat e p => [e + p] -> Float e p
104
105/** Export a floating point number in IEEE interchange format with layout:
106
107  (sign : [1]) # (biased_exponent : [e]) # (significand : [p-1])
108
109NaN is represented as:
110  * positive:           sign        == 0
111  * quiet with no info: significand == 0b1 # 0
112*/
113primitive
114  fpToBits : {e,p} ValidFloat e p => Float e p -> [e + p]
115
116
117
118
119
120/* ----------------------------------------------------------------------
121 * Predicates
122 * ----------------------------------------------------------------------
123 */
124
125// Operations in `Cmp` use IEEE reasoning.
126
127/** Check if two floating point numbers are representationally the same.
128In particular, the following hold:
129    *    NaN       =.= NaN
130    * ~ (pfNegZero =.= fpPosZero)
131*/
132primitive
133  (=.=) : {e,p} ValidFloat e p => Float e p -> Float e p -> Bool
134
135infix 20 =.=
136
137/** Test if this value is not-a-number (NaN). */
138primitive fpIsNaN : {e,p} ValidFloat e p => Float e p -> Bool
139
140/** Test if this value is positive or negative infinity. */
141primitive fpIsInf : {e,p} ValidFloat e p => Float e p -> Bool
142
143/** Test if this value is positive or negative zero. */
144primitive fpIsZero : {e,p} ValidFloat e p => Float e p -> Bool
145
146/** Test if this value is negative. */
147primitive fpIsNeg : {e,p} ValidFloat e p => Float e p -> Bool
148
149/** Test if this value is normal (not NaN, not infinite, not zero, and not subnormal). */
150primitive fpIsNormal : {e,p} ValidFloat e p => Float e p -> Bool
151
152/**
153 * Test if this value is subnormal.  Subnormal values are nonzero
154 * values with magnitudes smaller than can be represented with the
155 * normal implicit leading bit convention.
156 */
157primitive fpIsSubnormal : {e,p} ValidFloat e p => Float e p -> Bool
158
159/* Returns true for numbers that are not an infinity or NaN. */
160fpIsFinite : {e,p} ValidFloat e p => Float e p -> Bool
161fpIsFinite f = ~ (fpIsNaN f \/ fpIsInf f )
162
163
164/* ----------------------------------------------------------------------
165 * Arithmetic
166 * ---------------------------------------------------------------------- */
167
168
169/** Add floating point numbers using the given rounding mode. */
170primitive
171  fpAdd : {e,p} ValidFloat e p =>
172    RoundingMode -> Float e p -> Float e p -> Float e p
173
174/** Subtract floating point numbers using the given rounding mode. */
175primitive
176  fpSub : {e,p} ValidFloat e p =>
177    RoundingMode -> Float e p -> Float e p -> Float e p
178
179/** Multiply floating point numbers using the given rounding mode. */
180primitive
181  fpMul : {e,p} ValidFloat e p =>
182    RoundingMode -> Float e p -> Float e p -> Float e p
183
184/** Divide floating point numbers using the given rounding mode. */
185primitive
186  fpDiv : {e,p} ValidFloat e p =>
187    RoundingMode -> Float e p -> Float e p -> Float e p
188
189/**
190 * Fused-multiply-add.  'fpFMA r x y z' computes the value '(x*y)+z',
191 * rounding the result according to mode 'r' only after performing both
192 * operations.
193 */
194primitive
195  fpFMA : {e,p} ValidFloat e p =>
196    RoundingMode -> Float e p -> Float e p -> Float e p -> Float e p
197
198/**
199 * Absolute value of a floating-point value.
200 */
201primitive
202  fpAbs : {e,p} ValidFloat e p =>
203    Float e p -> Float e p
204
205/**
206 * Square root of a floating-point value.  The square root of
207 * a negative value yiels NaN, except that the sqaure root of
208 * '-0.0' is '-0.0'.
209 */
210primitive
211  fpSqrt : {e,p} ValidFloat e p =>
212    RoundingMode -> Float e p -> Float e p
213
214/* ------------------------------------------------------------ *
215 * Rationals                                                    *
216 * ------------------------------------------------------------ */
217
218/** Convert a floating point number to a rational.
219It is an error to use this with infinity or NaN **/
220primitive
221  fpToRational : {e,p} ValidFloat e p =>
222    Float e p -> Rational
223
224/** Convert a rational to a floating point number, using the
225given rounding mode, if the number cannot be represented exactly. */
226primitive
227  fpFromRational : {e,p} ValidFloat e p =>
228    RoundingMode -> Rational -> Float e p
229