1module Data.Csv.Conversion.Internal
2    ( decimal
3    , scientific
4    , realFloat
5    ) where
6
7import Data.ByteString.Builder (Builder, toLazyByteString, word8, char8,
8                                string8, byteString)
9import qualified Data.ByteString.Builder.Prim as BP
10import Data.ByteString.Builder.Scientific (scientificBuilder)
11import Data.Array.Base (unsafeAt)
12import Data.Array.IArray
13import qualified Data.ByteString as B
14import Data.Char (ord)
15import Data.Int
16import qualified Data.Monoid as Mon
17import Data.Scientific (Scientific)
18import Data.Word
19
20import Data.Csv.Util (toStrict)
21
22------------------------------------------------------------------------
23-- Integers
24
25decimal :: Integral a => a -> B.ByteString
26decimal = toStrict . toLazyByteString . formatDecimal
27{-# INLINE decimal #-}
28
29-- TODO: Add an optimized version for Integer.
30
31formatDecimal :: Integral a => a -> Builder
32{-# RULES "formatDecimal/Int" formatDecimal = formatBoundedSigned
33    :: Int -> Builder #-}
34{-# RULES "formatDecimal/Int8" formatDecimal = formatBoundedSigned
35    :: Int8 -> Builder #-}
36{-# RULES "formatDecimal/Int16" formatDecimal = formatBoundedSigned
37    :: Int16 -> Builder #-}
38{-# RULES "formatDecimal/Int32" formatDecimal = formatBoundedSigned
39    :: Int32 -> Builder #-}
40{-# RULES "formatDecimal/Int64" formatDecimal = formatBoundedSigned
41    :: Int64 -> Builder #-}
42{-# RULES "formatDecimal/Word" formatDecimal = formatPositive
43    :: Word -> Builder #-}
44{-# RULES "formatDecimal/Word8" formatDecimal = formatPositive
45    :: Word8 -> Builder #-}
46{-# RULES "formatDecimal/Word16" formatDecimal = formatPositive
47    :: Word16 -> Builder #-}
48{-# RULES "formatDecimal/Word32" formatDecimal = formatPositive
49    :: Word32 -> Builder #-}
50{-# RULES "formatDecimal/Word64" formatDecimal = formatPositive
51    :: Word64 -> Builder #-}
52{-# NOINLINE formatDecimal #-}
53formatDecimal i
54    | i < 0     = minus Mon.<>
55                  if i <= -128
56                  then formatPositive (-(i `quot` 10)) Mon.<> digit (-(i `rem` 10))
57                  else formatPositive (-i)
58    | otherwise = formatPositive i
59
60formatBoundedSigned :: (Integral a, Bounded a) => a -> Builder
61{-# SPECIALIZE formatBoundedSigned :: Int -> Builder #-}
62{-# SPECIALIZE formatBoundedSigned :: Int8 -> Builder #-}
63{-# SPECIALIZE formatBoundedSigned :: Int16 -> Builder #-}
64{-# SPECIALIZE formatBoundedSigned :: Int32 -> Builder #-}
65{-# SPECIALIZE formatBoundedSigned :: Int64 -> Builder #-}
66formatBoundedSigned i
67    | i < 0     = minus Mon.<>
68                  if i == minBound
69                  then formatPositive (-(i `quot` 10)) Mon.<> digit (-(i `rem` 10))
70                  else formatPositive (-i)
71    | otherwise = formatPositive i
72
73formatPositive :: Integral a => a -> Builder
74{-# SPECIALIZE formatPositive :: Int -> Builder #-}
75{-# SPECIALIZE formatPositive :: Int8 -> Builder #-}
76{-# SPECIALIZE formatPositive :: Int16 -> Builder #-}
77{-# SPECIALIZE formatPositive :: Int32 -> Builder #-}
78{-# SPECIALIZE formatPositive :: Int64 -> Builder #-}
79{-# SPECIALIZE formatPositive :: Word -> Builder #-}
80{-# SPECIALIZE formatPositive :: Word8 -> Builder #-}
81{-# SPECIALIZE formatPositive :: Word16 -> Builder #-}
82{-# SPECIALIZE formatPositive :: Word32 -> Builder #-}
83{-# SPECIALIZE formatPositive :: Word64 -> Builder #-}
84formatPositive = go
85  where go n | n < 10    = digit n
86             | otherwise = go (n `quot` 10) Mon.<> digit (n `rem` 10)
87
88minus :: Builder
89minus = word8 45
90
91zero :: Word8
92zero = 48
93
94digit :: Integral a => a -> Builder
95digit n = word8 $! i2w (fromIntegral n)
96{-# INLINE digit #-}
97
98i2w :: Int -> Word8
99i2w i = zero + fromIntegral i
100{-# INLINE i2w #-}
101
102------------------------------------------------------------------------
103-- Floating point numbers
104
105scientific :: Scientific -> B.ByteString
106scientific = toStrict . toLazyByteString . scientificBuilder
107{-# INLINE scientific #-}
108
109realFloat :: RealFloat a => a -> B.ByteString
110{-# SPECIALIZE realFloat :: Float -> B.ByteString #-}
111{-# SPECIALIZE realFloat :: Double -> B.ByteString #-}
112realFloat = toStrict . toLazyByteString . formatRealFloat Generic
113
114-- | Control the rendering of floating point numbers.
115data FPFormat = Exponent
116              -- ^ Scientific notation (e.g. @2.3e123@).
117              | Fixed
118              -- ^ Standard decimal notation.
119              | Generic
120              -- ^ Use decimal notation for values between @0.1@ and
121              -- @9,999,999@, and scientific notation otherwise.
122                deriving (Enum, Read, Show)
123
124formatRealFloat :: RealFloat a => FPFormat -> a -> Builder
125{-# SPECIALIZE formatRealFloat :: FPFormat -> Float -> Builder #-}
126{-# SPECIALIZE formatRealFloat :: FPFormat -> Double -> Builder #-}
127formatRealFloat fmt x
128   | isNaN x                   = string8 "NaN"
129   | isInfinite x              = if x < 0
130                                 then string8 "-Infinity"
131                                 else string8 "Infinity"
132   | x < 0 || isNegativeZero x = minus Mon.<> doFmt fmt (floatToDigits (-x))
133   | otherwise                 = doFmt fmt (floatToDigits x)
134 where
135  doFmt format (is, e) =
136    let ds = map i2d is in
137    case format of
138     Generic ->
139      doFmt (if e < 0 || e > 7 then Exponent else Fixed)
140            (is,e)
141     Exponent ->
142        let show_e' = formatDecimal (e-1) in
143        case ds of
144          [48]    -> string8 "0.0e0"
145          [d]     -> word8 d Mon.<> string8 ".0e" Mon.<> show_e'
146          (d:ds') -> word8 d Mon.<> char8 '.' Mon.<> word8s ds' Mon.<>
147                     char8 'e' Mon.<> show_e'
148          []      -> error "formatRealFloat/doFmt/Exponent: []"
149     Fixed
150          | e <= 0    -> string8 "0." Mon.<>
151                         byteString (B.replicate (-e) zero) Mon.<>
152                         word8s ds
153          | otherwise ->
154             let
155                f 0 s    rs  = mk0 (reverse s) Mon.<> char8 '.' Mon.<> mk0 rs
156                f n s    []  = f (n-1) (zero:s) []
157                f n s (r:rs) = f (n-1) (r:s) rs
158             in
159                f e [] ds
160       where mk0 ls = case ls of { [] -> word8 zero ; _ -> word8s ls}
161
162-- Based on "Printing Floating-Point Numbers Quickly and Accurately"
163-- by R.G. Burger and R.K. Dybvig in PLDI 96.
164-- This version uses a much slower logarithm estimator. It should be improved.
165
166-- | 'floatToDigits' takes a base and a non-negative 'RealFloat' number,
167-- and returns a list of digits and an exponent.
168-- In particular, if @x>=0@, and
169--
170-- > floatToDigits base x = ([d1,d2,...,dn], e)
171--
172-- then
173--
174--      (1) @n >= 1@
175--
176--      (2) @x = 0.d1d2...dn * (base**e)@
177--
178--      (3) @0 <= di <= base-1@
179
180floatToDigits :: (RealFloat a) => a -> ([Int], Int)
181{-# SPECIALIZE floatToDigits :: Float -> ([Int], Int) #-}
182{-# SPECIALIZE floatToDigits :: Double -> ([Int], Int) #-}
183floatToDigits 0 = ([0], 0)
184floatToDigits x =
185 let
186  (f0, e0) = decodeFloat x
187  (minExp0, _) = floatRange x
188  p = floatDigits x
189  b = floatRadix x
190  minExp = minExp0 - p -- the real minimum exponent
191  -- Haskell requires that f be adjusted so denormalized numbers
192  -- will have an impossibly low exponent.  Adjust for this.
193  (f, e) =
194   let n = minExp - e0 in
195   if n > 0 then (f0 `quot` (expt b n), e0+n) else (f0, e0)
196  (r, s, mUp, mDn) =
197   if e >= 0 then
198    let be = expt b e in
199    if f == expt b (p-1) then
200      (f*be*b*2, 2*b, be*b, be)     -- according to Burger and Dybvig
201    else
202      (f*be*2, 2, be, be)
203   else
204    if e > minExp && f == expt b (p-1) then
205      (f*b*2, expt b (-e+1)*2, b, 1)
206    else
207      (f*2, expt b (-e)*2, 1, 1)
208  k :: Int
209  k =
210   let
211    k0 :: Int
212    k0 =
213     if b == 2 then
214        -- logBase 10 2 is very slightly larger than 8651/28738
215        -- (about 5.3558e-10), so if log x >= 0, the approximation
216        -- k1 is too small, hence we add one and need one fixup step less.
217        -- If log x < 0, the approximation errs rather on the high side.
218        -- That is usually more than compensated for by ignoring the
219        -- fractional part of logBase 2 x, but when x is a power of 1/2
220        -- or slightly larger and the exponent is a multiple of the
221        -- denominator of the rational approximation to logBase 10 2,
222        -- k1 is larger than logBase 10 x. If k1 > 1 + logBase 10 x,
223        -- we get a leading zero-digit we don't want.
224        -- With the approximation 3/10, this happened for
225        -- 0.5^1030, 0.5^1040, ..., 0.5^1070 and values close above.
226        -- The approximation 8651/28738 guarantees k1 < 1 + logBase 10 x
227        -- for IEEE-ish floating point types with exponent fields
228        -- <= 17 bits and mantissae of several thousand bits, earlier
229        -- convergents to logBase 10 2 would fail for long double.
230        -- Using quot instead of div is a little faster and requires
231        -- fewer fixup steps for negative lx.
232        let lx = p - 1 + e0
233            k1 = (lx * 8651) `quot` 28738
234        in if lx >= 0 then k1 + 1 else k1
235     else
236        -- f :: Integer, log :: Float -> Float,
237        --               ceiling :: Float -> Int
238        ceiling ((log (fromInteger (f+1) :: Float) +
239                 fromIntegral e * log (fromInteger b)) /
240                   log 10)
241--WAS:            fromInt e * log (fromInteger b))
242
243    fixup n =
244      if n >= 0 then
245        if r + mUp <= expt 10 n * s then n else fixup (n+1)
246      else
247        if expt 10 (-n) * (r + mUp) <= s then n else fixup (n+1)
248   in
249   fixup k0
250
251  gen ds rn sN mUpN mDnN =
252   let
253    (dn, rn') = (rn * 10) `quotRem` sN
254    mUpN' = mUpN * 10
255    mDnN' = mDnN * 10
256   in
257   case (rn' < mDnN', rn' + mUpN' > sN) of
258    (True,  False) -> dn : ds
259    (False, True)  -> dn+1 : ds
260    (True,  True)  -> if rn' * 2 < sN then dn : ds else dn+1 : ds
261    (False, False) -> gen (dn:ds) rn' sN mUpN' mDnN'
262
263  rds =
264   if k >= 0 then
265      gen [] r (s * expt 10 k) mUp mDn
266   else
267     let bk = expt 10 (-k) in
268     gen [] (r * bk) s (mUp * bk) (mDn * bk)
269 in
270 (map fromIntegral (reverse rds), k)
271
272-- Exponentiation with a cache for the most common numbers.
273minExpt, maxExpt :: Int
274minExpt = 0
275maxExpt = 1100
276
277expt :: Integer -> Int -> Integer
278expt base n
279    | base == 2 && n >= minExpt && n <= maxExpt = expts `unsafeAt` n
280    | base == 10 && n <= maxExpt10              = expts10 `unsafeAt` n
281    | otherwise                                 = base^n
282
283expts :: Array Int Integer
284expts = array (minExpt,maxExpt) [(n,2^n) | n <- [minExpt .. maxExpt]]
285
286maxExpt10 :: Int
287maxExpt10 = 324
288
289expts10 :: Array Int Integer
290expts10 = array (minExpt,maxExpt10) [(n,10^n) | n <- [minExpt .. maxExpt10]]
291
292-- | Unsafe conversion for decimal digits.
293{-# INLINE i2d #-}
294i2d :: Int -> Word8
295i2d i = fromIntegral (ord '0' + i)
296
297-- | Word8 list rendering
298word8s :: [Word8] -> Builder
299word8s = BP.primMapListFixed BP.word8
300