1module Float where 2 3primitive type ValidFloat : # -> # -> Prop 4 5/** IEEE-754 floating point numbers. */ 6primitive type { exponent : #, precision : #} 7 ValidFloat exponent precision => Float exponent precision : * 8 9/** An abbreviation for common 16-bit floating point numbers. */ 10type Float16 = Float 5 11 11 12/** An abbreviation for common 32-bit floating point numbers. */ 13type Float32 = Float 8 24 14 15/** An abbreviation for common 64-bit floating point numbers. */ 16type Float64 = Float 11 53 17 18/** An abbreviation for common 128-bit floating point numbers. */ 19type Float128 = Float 15 113 20 21/** An abbreviation for common 256-bit floating point numbers. */ 22type Float256 = Float 19 237 23 24 25 26/* ---------------------------------------------------------------------- 27 * Rounding modes (this should be an enumeration type, when we add these) 28 *---------------------------------------------------------------------- */ 29 30/** 31 * A 'RoundingMode' is used to specify the precise behavior of some 32 * floating point primitives. 33 * 34 * There are five valid 'RoundingMode' values: 35 * * roundNearestEven 36 * * roundNearestAway 37 * * roundPositive 38 * * roundNegative 39 * * roundZero 40 */ 41type RoundingMode = [3] 42 43/** Round toward nearest, ties go to even. */ 44roundNearestEven, rne : RoundingMode 45roundNearestEven = 0 46rne = roundNearestEven 47 48/** Round toward nearest, ties away from zero. */ 49roundNearestAway, rna : RoundingMode 50roundNearestAway = 1 51rna = roundNearestAway 52 53/** Round toward positive infinity. */ 54roundPositive, rtp : RoundingMode 55roundPositive = 2 56rtp = roundPositive 57 58/** Round toward negative infinity. */ 59roundNegative, rtn : RoundingMode 60roundNegative = 3 61rtn = roundNegative 62 63/** Round toward zero. */ 64roundZero, rtz : RoundingMode 65roundZero = 4 66rtz = roundZero 67 68 69 70/* ---------------------------------------------------------------------- 71 * Constants 72 * ---------------------------------------------------------------------- */ 73 74/** Not a number. */ 75primitive 76 fpNaN : {e,p} ValidFloat e p => Float e p 77 78/** Positive infinity. */ 79primitive 80 fpPosInf : {e,p} ValidFloat e p => Float e p 81 82/** Negative infinity. */ 83fpNegInf : {e,p} ValidFloat e p => Float e p 84fpNegInf = - fpPosInf 85 86/** Positive zero. */ 87fpPosZero : {e,p} ValidFloat e p => Float e p 88fpPosZero = zero 89 90/** Negative zero. */ 91fpNegZero : {e,p} ValidFloat e p => Float e p 92fpNegZero = - fpPosZero 93 94 95// Binary representations 96 97/** A floating point number using the exact bit pattern, 98in IEEE interchange format with layout: 99 100 (sign : [1]) # (biased_exponent : [e]) # (significand : [p-1]) 101*/ 102primitive 103 fpFromBits : {e,p} ValidFloat e p => [e + p] -> Float e p 104 105/** Export a floating point number in IEEE interchange format with layout: 106 107 (sign : [1]) # (biased_exponent : [e]) # (significand : [p-1]) 108 109NaN is represented as: 110 * positive: sign == 0 111 * quiet with no info: significand == 0b1 # 0 112*/ 113primitive 114 fpToBits : {e,p} ValidFloat e p => Float e p -> [e + p] 115 116 117 118 119 120/* ---------------------------------------------------------------------- 121 * Predicates 122 * ---------------------------------------------------------------------- 123 */ 124 125// Operations in `Cmp` use IEEE reasoning. 126 127/** Check if two floating point numbers are representationally the same. 128In particular, the following hold: 129 * NaN =.= NaN 130 * ~ (pfNegZero =.= fpPosZero) 131*/ 132primitive 133 (=.=) : {e,p} ValidFloat e p => Float e p -> Float e p -> Bool 134 135infix 20 =.= 136 137/** Test if this value is not-a-number (NaN). */ 138primitive fpIsNaN : {e,p} ValidFloat e p => Float e p -> Bool 139 140/** Test if this value is positive or negative infinity. */ 141primitive fpIsInf : {e,p} ValidFloat e p => Float e p -> Bool 142 143/** Test if this value is positive or negative zero. */ 144primitive fpIsZero : {e,p} ValidFloat e p => Float e p -> Bool 145 146/** Test if this value is negative. */ 147primitive fpIsNeg : {e,p} ValidFloat e p => Float e p -> Bool 148 149/** Test if this value is normal (not NaN, not infinite, not zero, and not subnormal). */ 150primitive fpIsNormal : {e,p} ValidFloat e p => Float e p -> Bool 151 152/** 153 * Test if this value is subnormal. Subnormal values are nonzero 154 * values with magnitudes smaller than can be represented with the 155 * normal implicit leading bit convention. 156 */ 157primitive fpIsSubnormal : {e,p} ValidFloat e p => Float e p -> Bool 158 159/* Returns true for numbers that are not an infinity or NaN. */ 160fpIsFinite : {e,p} ValidFloat e p => Float e p -> Bool 161fpIsFinite f = ~ (fpIsNaN f \/ fpIsInf f ) 162 163 164/* ---------------------------------------------------------------------- 165 * Arithmetic 166 * ---------------------------------------------------------------------- */ 167 168 169/** Add floating point numbers using the given rounding mode. */ 170primitive 171 fpAdd : {e,p} ValidFloat e p => 172 RoundingMode -> Float e p -> Float e p -> Float e p 173 174/** Subtract floating point numbers using the given rounding mode. */ 175primitive 176 fpSub : {e,p} ValidFloat e p => 177 RoundingMode -> Float e p -> Float e p -> Float e p 178 179/** Multiply floating point numbers using the given rounding mode. */ 180primitive 181 fpMul : {e,p} ValidFloat e p => 182 RoundingMode -> Float e p -> Float e p -> Float e p 183 184/** Divide floating point numbers using the given rounding mode. */ 185primitive 186 fpDiv : {e,p} ValidFloat e p => 187 RoundingMode -> Float e p -> Float e p -> Float e p 188 189/** 190 * Fused-multiply-add. 'fpFMA r x y z' computes the value '(x*y)+z', 191 * rounding the result according to mode 'r' only after performing both 192 * operations. 193 */ 194primitive 195 fpFMA : {e,p} ValidFloat e p => 196 RoundingMode -> Float e p -> Float e p -> Float e p -> Float e p 197 198/** 199 * Absolute value of a floating-point value. 200 */ 201primitive 202 fpAbs : {e,p} ValidFloat e p => 203 Float e p -> Float e p 204 205/** 206 * Square root of a floating-point value. The square root of 207 * a negative value yiels NaN, except that the sqaure root of 208 * '-0.0' is '-0.0'. 209 */ 210primitive 211 fpSqrt : {e,p} ValidFloat e p => 212 RoundingMode -> Float e p -> Float e p 213 214/* ------------------------------------------------------------ * 215 * Rationals * 216 * ------------------------------------------------------------ */ 217 218/** Convert a floating point number to a rational. 219It is an error to use this with infinity or NaN **/ 220primitive 221 fpToRational : {e,p} ValidFloat e p => 222 Float e p -> Rational 223 224/** Convert a rational to a floating point number, using the 225given rounding mode, if the number cannot be represented exactly. */ 226primitive 227 fpFromRational : {e,p} ValidFloat e p => 228 RoundingMode -> Rational -> Float e p 229