1module Data.Csv.Conversion.Internal 2 ( decimal 3 , scientific 4 , realFloat 5 ) where 6 7import Data.ByteString.Builder (Builder, toLazyByteString, word8, char8, 8 string8, byteString) 9import qualified Data.ByteString.Builder.Prim as BP 10import Data.ByteString.Builder.Scientific (scientificBuilder) 11import Data.Array.Base (unsafeAt) 12import Data.Array.IArray 13import qualified Data.ByteString as B 14import Data.Char (ord) 15import Data.Int 16import qualified Data.Monoid as Mon 17import Data.Scientific (Scientific) 18import Data.Word 19 20import Data.Csv.Util (toStrict) 21 22------------------------------------------------------------------------ 23-- Integers 24 25decimal :: Integral a => a -> B.ByteString 26decimal = toStrict . toLazyByteString . formatDecimal 27{-# INLINE decimal #-} 28 29-- TODO: Add an optimized version for Integer. 30 31formatDecimal :: Integral a => a -> Builder 32{-# RULES "formatDecimal/Int" formatDecimal = formatBoundedSigned 33 :: Int -> Builder #-} 34{-# RULES "formatDecimal/Int8" formatDecimal = formatBoundedSigned 35 :: Int8 -> Builder #-} 36{-# RULES "formatDecimal/Int16" formatDecimal = formatBoundedSigned 37 :: Int16 -> Builder #-} 38{-# RULES "formatDecimal/Int32" formatDecimal = formatBoundedSigned 39 :: Int32 -> Builder #-} 40{-# RULES "formatDecimal/Int64" formatDecimal = formatBoundedSigned 41 :: Int64 -> Builder #-} 42{-# RULES "formatDecimal/Word" formatDecimal = formatPositive 43 :: Word -> Builder #-} 44{-# RULES "formatDecimal/Word8" formatDecimal = formatPositive 45 :: Word8 -> Builder #-} 46{-# RULES "formatDecimal/Word16" formatDecimal = formatPositive 47 :: Word16 -> Builder #-} 48{-# RULES "formatDecimal/Word32" formatDecimal = formatPositive 49 :: Word32 -> Builder #-} 50{-# RULES "formatDecimal/Word64" formatDecimal = formatPositive 51 :: Word64 -> Builder #-} 52{-# NOINLINE formatDecimal #-} 53formatDecimal i 54 | i < 0 = minus Mon.<> 55 if i <= -128 56 then formatPositive (-(i `quot` 10)) Mon.<> digit (-(i `rem` 10)) 57 else formatPositive (-i) 58 | otherwise = formatPositive i 59 60formatBoundedSigned :: (Integral a, Bounded a) => a -> Builder 61{-# SPECIALIZE formatBoundedSigned :: Int -> Builder #-} 62{-# SPECIALIZE formatBoundedSigned :: Int8 -> Builder #-} 63{-# SPECIALIZE formatBoundedSigned :: Int16 -> Builder #-} 64{-# SPECIALIZE formatBoundedSigned :: Int32 -> Builder #-} 65{-# SPECIALIZE formatBoundedSigned :: Int64 -> Builder #-} 66formatBoundedSigned i 67 | i < 0 = minus Mon.<> 68 if i == minBound 69 then formatPositive (-(i `quot` 10)) Mon.<> digit (-(i `rem` 10)) 70 else formatPositive (-i) 71 | otherwise = formatPositive i 72 73formatPositive :: Integral a => a -> Builder 74{-# SPECIALIZE formatPositive :: Int -> Builder #-} 75{-# SPECIALIZE formatPositive :: Int8 -> Builder #-} 76{-# SPECIALIZE formatPositive :: Int16 -> Builder #-} 77{-# SPECIALIZE formatPositive :: Int32 -> Builder #-} 78{-# SPECIALIZE formatPositive :: Int64 -> Builder #-} 79{-# SPECIALIZE formatPositive :: Word -> Builder #-} 80{-# SPECIALIZE formatPositive :: Word8 -> Builder #-} 81{-# SPECIALIZE formatPositive :: Word16 -> Builder #-} 82{-# SPECIALIZE formatPositive :: Word32 -> Builder #-} 83{-# SPECIALIZE formatPositive :: Word64 -> Builder #-} 84formatPositive = go 85 where go n | n < 10 = digit n 86 | otherwise = go (n `quot` 10) Mon.<> digit (n `rem` 10) 87 88minus :: Builder 89minus = word8 45 90 91zero :: Word8 92zero = 48 93 94digit :: Integral a => a -> Builder 95digit n = word8 $! i2w (fromIntegral n) 96{-# INLINE digit #-} 97 98i2w :: Int -> Word8 99i2w i = zero + fromIntegral i 100{-# INLINE i2w #-} 101 102------------------------------------------------------------------------ 103-- Floating point numbers 104 105scientific :: Scientific -> B.ByteString 106scientific = toStrict . toLazyByteString . scientificBuilder 107{-# INLINE scientific #-} 108 109realFloat :: RealFloat a => a -> B.ByteString 110{-# SPECIALIZE realFloat :: Float -> B.ByteString #-} 111{-# SPECIALIZE realFloat :: Double -> B.ByteString #-} 112realFloat = toStrict . toLazyByteString . formatRealFloat Generic 113 114-- | Control the rendering of floating point numbers. 115data FPFormat = Exponent 116 -- ^ Scientific notation (e.g. @2.3e123@). 117 | Fixed 118 -- ^ Standard decimal notation. 119 | Generic 120 -- ^ Use decimal notation for values between @0.1@ and 121 -- @9,999,999@, and scientific notation otherwise. 122 deriving (Enum, Read, Show) 123 124formatRealFloat :: RealFloat a => FPFormat -> a -> Builder 125{-# SPECIALIZE formatRealFloat :: FPFormat -> Float -> Builder #-} 126{-# SPECIALIZE formatRealFloat :: FPFormat -> Double -> Builder #-} 127formatRealFloat fmt x 128 | isNaN x = string8 "NaN" 129 | isInfinite x = if x < 0 130 then string8 "-Infinity" 131 else string8 "Infinity" 132 | x < 0 || isNegativeZero x = minus Mon.<> doFmt fmt (floatToDigits (-x)) 133 | otherwise = doFmt fmt (floatToDigits x) 134 where 135 doFmt format (is, e) = 136 let ds = map i2d is in 137 case format of 138 Generic -> 139 doFmt (if e < 0 || e > 7 then Exponent else Fixed) 140 (is,e) 141 Exponent -> 142 let show_e' = formatDecimal (e-1) in 143 case ds of 144 [48] -> string8 "0.0e0" 145 [d] -> word8 d Mon.<> string8 ".0e" Mon.<> show_e' 146 (d:ds') -> word8 d Mon.<> char8 '.' Mon.<> word8s ds' Mon.<> 147 char8 'e' Mon.<> show_e' 148 [] -> error "formatRealFloat/doFmt/Exponent: []" 149 Fixed 150 | e <= 0 -> string8 "0." Mon.<> 151 byteString (B.replicate (-e) zero) Mon.<> 152 word8s ds 153 | otherwise -> 154 let 155 f 0 s rs = mk0 (reverse s) Mon.<> char8 '.' Mon.<> mk0 rs 156 f n s [] = f (n-1) (zero:s) [] 157 f n s (r:rs) = f (n-1) (r:s) rs 158 in 159 f e [] ds 160 where mk0 ls = case ls of { [] -> word8 zero ; _ -> word8s ls} 161 162-- Based on "Printing Floating-Point Numbers Quickly and Accurately" 163-- by R.G. Burger and R.K. Dybvig in PLDI 96. 164-- This version uses a much slower logarithm estimator. It should be improved. 165 166-- | 'floatToDigits' takes a base and a non-negative 'RealFloat' number, 167-- and returns a list of digits and an exponent. 168-- In particular, if @x>=0@, and 169-- 170-- > floatToDigits base x = ([d1,d2,...,dn], e) 171-- 172-- then 173-- 174-- (1) @n >= 1@ 175-- 176-- (2) @x = 0.d1d2...dn * (base**e)@ 177-- 178-- (3) @0 <= di <= base-1@ 179 180floatToDigits :: (RealFloat a) => a -> ([Int], Int) 181{-# SPECIALIZE floatToDigits :: Float -> ([Int], Int) #-} 182{-# SPECIALIZE floatToDigits :: Double -> ([Int], Int) #-} 183floatToDigits 0 = ([0], 0) 184floatToDigits x = 185 let 186 (f0, e0) = decodeFloat x 187 (minExp0, _) = floatRange x 188 p = floatDigits x 189 b = floatRadix x 190 minExp = minExp0 - p -- the real minimum exponent 191 -- Haskell requires that f be adjusted so denormalized numbers 192 -- will have an impossibly low exponent. Adjust for this. 193 (f, e) = 194 let n = minExp - e0 in 195 if n > 0 then (f0 `quot` (expt b n), e0+n) else (f0, e0) 196 (r, s, mUp, mDn) = 197 if e >= 0 then 198 let be = expt b e in 199 if f == expt b (p-1) then 200 (f*be*b*2, 2*b, be*b, be) -- according to Burger and Dybvig 201 else 202 (f*be*2, 2, be, be) 203 else 204 if e > minExp && f == expt b (p-1) then 205 (f*b*2, expt b (-e+1)*2, b, 1) 206 else 207 (f*2, expt b (-e)*2, 1, 1) 208 k :: Int 209 k = 210 let 211 k0 :: Int 212 k0 = 213 if b == 2 then 214 -- logBase 10 2 is very slightly larger than 8651/28738 215 -- (about 5.3558e-10), so if log x >= 0, the approximation 216 -- k1 is too small, hence we add one and need one fixup step less. 217 -- If log x < 0, the approximation errs rather on the high side. 218 -- That is usually more than compensated for by ignoring the 219 -- fractional part of logBase 2 x, but when x is a power of 1/2 220 -- or slightly larger and the exponent is a multiple of the 221 -- denominator of the rational approximation to logBase 10 2, 222 -- k1 is larger than logBase 10 x. If k1 > 1 + logBase 10 x, 223 -- we get a leading zero-digit we don't want. 224 -- With the approximation 3/10, this happened for 225 -- 0.5^1030, 0.5^1040, ..., 0.5^1070 and values close above. 226 -- The approximation 8651/28738 guarantees k1 < 1 + logBase 10 x 227 -- for IEEE-ish floating point types with exponent fields 228 -- <= 17 bits and mantissae of several thousand bits, earlier 229 -- convergents to logBase 10 2 would fail for long double. 230 -- Using quot instead of div is a little faster and requires 231 -- fewer fixup steps for negative lx. 232 let lx = p - 1 + e0 233 k1 = (lx * 8651) `quot` 28738 234 in if lx >= 0 then k1 + 1 else k1 235 else 236 -- f :: Integer, log :: Float -> Float, 237 -- ceiling :: Float -> Int 238 ceiling ((log (fromInteger (f+1) :: Float) + 239 fromIntegral e * log (fromInteger b)) / 240 log 10) 241--WAS: fromInt e * log (fromInteger b)) 242 243 fixup n = 244 if n >= 0 then 245 if r + mUp <= expt 10 n * s then n else fixup (n+1) 246 else 247 if expt 10 (-n) * (r + mUp) <= s then n else fixup (n+1) 248 in 249 fixup k0 250 251 gen ds rn sN mUpN mDnN = 252 let 253 (dn, rn') = (rn * 10) `quotRem` sN 254 mUpN' = mUpN * 10 255 mDnN' = mDnN * 10 256 in 257 case (rn' < mDnN', rn' + mUpN' > sN) of 258 (True, False) -> dn : ds 259 (False, True) -> dn+1 : ds 260 (True, True) -> if rn' * 2 < sN then dn : ds else dn+1 : ds 261 (False, False) -> gen (dn:ds) rn' sN mUpN' mDnN' 262 263 rds = 264 if k >= 0 then 265 gen [] r (s * expt 10 k) mUp mDn 266 else 267 let bk = expt 10 (-k) in 268 gen [] (r * bk) s (mUp * bk) (mDn * bk) 269 in 270 (map fromIntegral (reverse rds), k) 271 272-- Exponentiation with a cache for the most common numbers. 273minExpt, maxExpt :: Int 274minExpt = 0 275maxExpt = 1100 276 277expt :: Integer -> Int -> Integer 278expt base n 279 | base == 2 && n >= minExpt && n <= maxExpt = expts `unsafeAt` n 280 | base == 10 && n <= maxExpt10 = expts10 `unsafeAt` n 281 | otherwise = base^n 282 283expts :: Array Int Integer 284expts = array (minExpt,maxExpt) [(n,2^n) | n <- [minExpt .. maxExpt]] 285 286maxExpt10 :: Int 287maxExpt10 = 324 288 289expts10 :: Array Int Integer 290expts10 = array (minExpt,maxExpt10) [(n,10^n) | n <- [minExpt .. maxExpt10]] 291 292-- | Unsafe conversion for decimal digits. 293{-# INLINE i2d #-} 294i2d :: Int -> Word8 295i2d i = fromIntegral (ord '0' + i) 296 297-- | Word8 list rendering 298word8s :: [Word8] -> Builder 299word8s = BP.primMapListFixed BP.word8 300