1{-# LANGUAGE CPP #-}
2{-# LANGUAGE DeriveDataTypeable #-}
3{-# LANGUAGE BangPatterns #-}
4{-# LANGUAGE ScopedTypeVariables #-}
5{-# LANGUAGE UnboxedTuples #-}
6{-# LANGUAGE PatternGuards #-}
7
8-- |
9-- Module      :  Data.Scientific
10-- Copyright   :  Bas van Dijk 2013
11-- License     :  BSD3
12-- Maintainer  :  Bas van Dijk <v.dijk.bas@gmail.com>
13--
14-- This module provides the number type 'Scientific'. Scientific numbers are
15-- arbitrary precision and space efficient. They are represented using
16-- <http://en.wikipedia.org/wiki/Scientific_notation scientific notation>. The
17-- implementation uses an 'Integer' 'coefficient' @c@ and an 'Int'
18-- 'base10Exponent' @e@. A scientific number corresponds to the 'Fractional'
19-- number: @'fromInteger' c * 10 '^^' e@.
20--
21-- Note that since we're using an 'Int' to represent the exponent these numbers
22-- aren't truly arbitrary precision. I intend to change the type of the exponent
23-- to 'Integer' in a future release.
24--
25-- /WARNING:/ Although @Scientific@ has instances for all numeric classes the
26-- methods should be used with caution when applied to scientific numbers coming
27-- from untrusted sources. See the warnings of the instances belonging to
28-- 'Scientific'.
29--
30-- The main application of 'Scientific' is to be used as the target of parsing
31-- arbitrary precision numbers coming from an untrusted source. The advantages
32-- over using 'Rational' for this are that:
33--
34-- * A 'Scientific' is more efficient to construct. Rational numbers need to be
35-- constructed using '%' which has to compute the 'gcd' of the 'numerator' and
36-- 'denominator'.
37--
38-- * 'Scientific' is safe against numbers with huge exponents. For example:
39-- @1e1000000000 :: 'Rational'@ will fill up all space and crash your
40-- program. Scientific works as expected:
41--
42--  > > read "1e1000000000" :: Scientific
43--  > 1.0e1000000000
44--
45-- * Also, the space usage of converting scientific numbers with huge exponents
46-- to @'Integral's@ (like: 'Int') or @'RealFloat's@ (like: 'Double' or 'Float')
47-- will always be bounded by the target type.
48--
49-- This module is designed to be imported qualified:
50--
51-- @import Data.Scientific as Scientific@
52module Data.Scientific
53    ( Scientific
54
55      -- * Construction
56    , scientific
57
58      -- * Projections
59    , coefficient
60    , base10Exponent
61
62      -- * Predicates
63    , isFloating
64    , isInteger
65
66      -- * Conversions
67      -- ** Rational
68    , unsafeFromRational
69    , fromRationalRepetend
70    , fromRationalRepetendLimited
71    , fromRationalRepetendUnlimited
72    , toRationalRepetend
73
74      -- ** Floating & integer
75    , floatingOrInteger
76    , toRealFloat
77    , toBoundedRealFloat
78    , toBoundedInteger
79    , fromFloatDigits
80
81      -- * Parsing
82    , scientificP
83
84      -- * Pretty printing
85    , formatScientific
86    , FPFormat(..)
87
88    , toDecimalDigits
89
90      -- * Normalization
91    , normalize
92    ) where
93
94
95----------------------------------------------------------------------
96-- Imports
97----------------------------------------------------------------------
98
99import           Control.Exception            (throw, ArithException(DivideByZero))
100import           Control.Monad                (mplus)
101import           Control.Monad.ST             (runST)
102import           Control.DeepSeq              (NFData, rnf)
103import           Data.Binary                  (Binary, get, put)
104import           Data.Char                    (intToDigit, ord)
105import           Data.Data                    (Data)
106import           Data.Hashable                (Hashable(..))
107import           Data.Int                     (Int8, Int16, Int32, Int64)
108import qualified Data.Map            as M     (Map, empty, insert, lookup)
109import           Data.Ratio                   ((%), numerator, denominator)
110import           Data.Typeable                (Typeable)
111import qualified Data.Primitive.Array as Primitive
112import           Data.Word                    (Word8, Word16, Word32, Word64)
113import           Math.NumberTheory.Logarithms (integerLog10')
114import qualified Numeric                      (floatToDigits)
115import qualified Text.Read                       as Read
116import           Text.Read                        (readPrec)
117import qualified Text.ParserCombinators.ReadPrec as ReadPrec
118import qualified Text.ParserCombinators.ReadP    as ReadP
119import           Text.ParserCombinators.ReadP     ( ReadP )
120import           Data.Text.Lazy.Builder.RealFloat (FPFormat(..))
121
122#if !MIN_VERSION_base(4,9,0)
123import           Control.Applicative          ((*>))
124#endif
125
126#if !MIN_VERSION_base(4,8,0)
127import           Data.Functor                 ((<$>))
128import           Data.Word                    (Word)
129import           Control.Applicative          ((<*>))
130#endif
131
132#if MIN_VERSION_base(4,5,0)
133import           Data.Bits                    (unsafeShiftR)
134#else
135import           Data.Bits                    (shiftR)
136#endif
137
138import GHC.Integer        (quotRemInteger, quotInteger)
139import GHC.Integer.Compat (divInteger)
140import Utils              (roundTo)
141
142
143----------------------------------------------------------------------
144-- Type
145----------------------------------------------------------------------
146
147-- | An arbitrary-precision number represented using
148-- <http://en.wikipedia.org/wiki/Scientific_notation scientific notation>.
149--
150-- This type describes the set of all @'Real's@ which have a finite
151-- decimal expansion.
152--
153-- A scientific number with 'coefficient' @c@ and 'base10Exponent' @e@
154-- corresponds to the 'Fractional' number: @'fromInteger' c * 10 '^^' e@
155data Scientific = Scientific
156    { coefficient :: !Integer
157      -- ^ The coefficient of a scientific number.
158      --
159      -- Note that this number is not necessarily normalized, i.e.
160      -- it could contain trailing zeros.
161      --
162      -- Scientific numbers are automatically normalized when pretty printed or
163      -- in 'toDecimalDigits'.
164      --
165      -- Use 'normalize' to do manual normalization.
166
167    , base10Exponent :: {-# UNPACK #-} !Int
168      -- ^ The base-10 exponent of a scientific number.
169    } deriving (Typeable, Data)
170
171-- | @scientific c e@ constructs a scientific number which corresponds
172-- to the 'Fractional' number: @'fromInteger' c * 10 '^^' e@.
173scientific :: Integer -> Int -> Scientific
174scientific = Scientific
175
176
177----------------------------------------------------------------------
178-- Instances
179----------------------------------------------------------------------
180
181instance NFData Scientific where
182    rnf (Scientific _ _) = ()
183
184-- | A hash can be safely calculated from a @Scientific@. No magnitude @10^e@ is
185-- calculated so there's no risk of a blowup in space or time when hashing
186-- scientific numbers coming from untrusted sources.
187instance Hashable Scientific where
188    hashWithSalt salt s = salt `hashWithSalt` c `hashWithSalt` e
189      where
190        Scientific c e = normalize s
191
192-- | Note that in the future I intend to change the type of the 'base10Exponent'
193-- from @Int@ to @Integer@. To be forward compatible the @Binary@ instance
194-- already encodes the exponent as 'Integer'.
195instance Binary Scientific where
196    put (Scientific c e) = put c *> put (toInteger e)
197    get = Scientific <$> get <*> (fromInteger <$> get)
198
199-- | Scientific numbers can be safely compared for equality. No magnitude @10^e@
200-- is calculated so there's no risk of a blowup in space or time when comparing
201-- scientific numbers coming from untrusted sources.
202instance Eq Scientific where
203    s1 == s2 = c1 == c2 && e1 == e2
204      where
205        Scientific c1 e1 = normalize s1
206        Scientific c2 e2 = normalize s2
207
208-- | Scientific numbers can be safely compared for ordering. No magnitude @10^e@
209-- is calculated so there's no risk of a blowup in space or time when comparing
210-- scientific numbers coming from untrusted sources.
211instance Ord Scientific where
212    compare s1 s2
213        | c1 == c2 && e1 == e2 = EQ
214        | c1 < 0    = if c2 < 0 then cmp (-c2) e2 (-c1) e1 else LT
215        | c1 > 0    = if c2 > 0 then cmp   c1  e1   c2  e2 else GT
216        | otherwise = if c2 > 0 then LT else GT
217      where
218        Scientific c1 e1 = normalize s1
219        Scientific c2 e2 = normalize s2
220
221        cmp cx ex cy ey
222            | log10sx < log10sy = LT
223            | log10sx > log10sy = GT
224            | d < 0     = if cx <= (cy `quotInteger` magnitude (-d)) then LT else GT
225            | d > 0     = if cy >  (cx `quotInteger` magnitude   d)  then LT else GT
226            | otherwise = if cx < cy                                 then LT else GT
227          where
228            log10sx = log10cx + ex
229            log10sy = log10cy + ey
230
231            log10cx = integerLog10' cx
232            log10cy = integerLog10' cy
233
234            d = log10cx - log10cy
235
236-- | /WARNING:/ '+' and '-' compute the 'Integer' magnitude: @10^e@ where @e@ is
237-- the difference between the @'base10Exponent's@ of the arguments. If these
238-- methods are applied to arguments which have huge exponents this could fill up
239-- all space and crash your program! So don't apply these methods to scientific
240-- numbers coming from untrusted sources. The other methods can be used safely.
241instance Num Scientific where
242    Scientific c1 e1 + Scientific c2 e2
243       | e1 < e2   = Scientific (c1   + c2*l) e1
244       | otherwise = Scientific (c1*r + c2  ) e2
245         where
246           l = magnitude (e2 - e1)
247           r = magnitude (e1 - e2)
248    {-# INLINABLE (+) #-}
249
250    Scientific c1 e1 - Scientific c2 e2
251       | e1 < e2   = Scientific (c1   - c2*l) e1
252       | otherwise = Scientific (c1*r - c2  ) e2
253         where
254           l = magnitude (e2 - e1)
255           r = magnitude (e1 - e2)
256    {-# INLINABLE (-) #-}
257
258    Scientific c1 e1 * Scientific c2 e2 =
259        Scientific (c1 * c2) (e1 + e2)
260    {-# INLINABLE (*) #-}
261
262    abs (Scientific c e) = Scientific (abs c) e
263    {-# INLINABLE abs #-}
264
265    negate (Scientific c e) = Scientific (negate c) e
266    {-# INLINABLE negate #-}
267
268    signum (Scientific c _) = Scientific (signum c) 0
269    {-# INLINABLE signum #-}
270
271    fromInteger i = Scientific i 0
272    {-# INLINABLE fromInteger #-}
273
274-- | /WARNING:/ 'toRational' needs to compute the 'Integer' magnitude:
275-- @10^e@. If applied to a huge exponent this could fill up all space
276-- and crash your program!
277--
278-- Avoid applying 'toRational' (or 'realToFrac') to scientific numbers
279-- coming from an untrusted source and use 'toRealFloat' instead. The
280-- latter guards against excessive space usage.
281instance Real Scientific where
282    toRational (Scientific c e)
283      | e < 0     =  c % magnitude (-e)
284      | otherwise = (c * magnitude   e) % 1
285    {-# INLINABLE toRational #-}
286
287{-# RULES
288  "realToFrac_toRealFloat_Double"
289   realToFrac = toRealFloat :: Scientific -> Double #-}
290
291{-# RULES
292  "realToFrac_toRealFloat_Float"
293   realToFrac = toRealFloat :: Scientific -> Float #-}
294
295-- | /WARNING:/ 'recip' and '/' will throw an error when their outputs are
296-- <https://en.wikipedia.org/wiki/Repeating_decimal repeating decimals>.
297--
298-- 'fromRational' will throw an error when the input 'Rational' is a repeating
299-- decimal.  Consider using 'fromRationalRepetend' for these rationals which
300-- will detect the repetition and indicate where it starts.
301instance Fractional Scientific where
302    recip = fromRational . recip . toRational
303    {-# INLINABLE recip #-}
304
305    x / y = fromRational $ toRational x / toRational y
306    {-# INLINABLE (/) #-}
307
308    fromRational rational =
309        case mbRepetendIx of
310          Nothing -> s
311          Just _ix -> error $
312            "fromRational has been applied to a repeating decimal " ++
313            "which can't be represented as a Scientific! " ++
314            "It's better to avoid performing fractional operations on Scientifics " ++
315            "and convert them to other fractional types like Double as early as possible."
316      where
317        (s, mbRepetendIx) = fromRationalRepetendUnlimited rational
318
319-- | Although 'fromRational' is unsafe because it will throw errors on
320-- <https://en.wikipedia.org/wiki/Repeating_decimal repeating decimals>,
321-- @unsafeFromRational@ is even more unsafe because it will diverge instead (i.e
322-- loop and consume all space). Though it will be more efficient because it
323-- doesn't need to consume space linear in the number of digits in the resulting
324-- scientific to detect the repetition.
325--
326-- Consider using 'fromRationalRepetend' for these rationals which will detect
327-- the repetition and indicate where it starts.
328unsafeFromRational :: Rational -> Scientific
329unsafeFromRational rational
330    | d == 0    = throw DivideByZero
331    | otherwise = positivize (longDiv 0 0) (numerator rational)
332  where
333    -- Divide the numerator by the denominator using long division.
334    longDiv :: Integer -> Int -> (Integer -> Scientific)
335    longDiv !c !e  0 = Scientific c e
336    longDiv !c !e !n
337                      -- TODO: Use a logarithm here!
338        | n < d     = longDiv (c * 10) (e - 1) (n * 10)
339        | otherwise = case n `quotRemInteger` d of
340                        (#q, r#) -> longDiv (c + q) e r
341
342    d = denominator rational
343
344-- | Like 'fromRational' and 'unsafeFromRational', this function converts a
345-- `Rational` to a `Scientific` but instead of failing or diverging (i.e loop
346-- and consume all space) on
347-- <https://en.wikipedia.org/wiki/Repeating_decimal repeating decimals>
348-- it detects the repeating part, the /repetend/, and returns where it starts.
349--
350-- To detect the repetition this function consumes space linear in the number of
351-- digits in the resulting scientific. In order to bound the space usage an
352-- optional limit can be specified. If the number of digits reaches this limit
353-- @Left (s, r)@ will be returned. Here @s@ is the 'Scientific' constructed so
354-- far and @r@ is the remaining 'Rational'. @toRational s + r@ yields the
355-- original 'Rational'
356--
357-- If the limit is not reached or no limit was specified @Right (s,
358-- mbRepetendIx)@ will be returned. Here @s@ is the 'Scientific' without any
359-- repetition and @mbRepetendIx@ specifies if and where in the fractional part
360-- the repetend begins.
361--
362-- For example:
363--
364-- @fromRationalRepetend Nothing (1 % 28) == Right (3.571428e-2, Just 2)@
365--
366-- This represents the repeating decimal: @0.03571428571428571428...@
367-- which is sometimes also unambiguously denoted as @0.03(571428)@.
368-- Here the repetend is enclosed in parentheses and starts at the 3rd digit (index 2)
369-- in the fractional part. Specifying a limit results in the following:
370--
371-- @fromRationalRepetend (Just 4) (1 % 28) == Left (3.5e-2, 1 % 1400)@
372--
373-- You can expect the following property to hold.
374--
375-- @ forall (mbLimit :: Maybe Int) (r :: Rational).
376-- r == (case 'fromRationalRepetend' mbLimit r of
377--        Left (s, r') -> toRational s + r'
378--        Right (s, mbRepetendIx) ->
379--          case mbRepetendIx of
380--            Nothing         -> toRational s
381--            Just repetendIx -> 'toRationalRepetend' s repetendIx)
382-- @
383fromRationalRepetend
384    :: Maybe Int -- ^ Optional limit
385    -> Rational
386    -> Either (Scientific, Rational)
387              (Scientific, Maybe Int)
388fromRationalRepetend mbLimit rational =
389    case mbLimit of
390      Nothing -> Right $ fromRationalRepetendUnlimited rational
391      Just l  -> fromRationalRepetendLimited l rational
392
393-- | Like 'fromRationalRepetend' but always accepts a limit.
394fromRationalRepetendLimited
395    :: Int -- ^ limit
396    -> Rational
397    -> Either (Scientific, Rational)
398              (Scientific, Maybe Int)
399fromRationalRepetendLimited l rational
400        | d == 0    = throw DivideByZero
401        | num < 0   = case longDiv (-num) of
402                        Left  (s, r)  -> Left  (-s, -r)
403                        Right (s, mb) -> Right (-s, mb)
404        | otherwise = longDiv num
405      where
406        num = numerator rational
407
408        longDiv :: Integer -> Either (Scientific, Rational) (Scientific, Maybe Int)
409        longDiv = longDivWithLimit 0 0 M.empty
410
411        longDivWithLimit
412            :: Integer
413            -> Int
414            -> M.Map Integer Int
415            -> (Integer -> Either (Scientific, Rational)
416                                  (Scientific, Maybe Int))
417        longDivWithLimit !c !e _ns 0 = Right (Scientific c e, Nothing)
418        longDivWithLimit !c !e  ns !n
419            | Just e' <- M.lookup n ns = Right (Scientific c e, Just (-e'))
420            | e <= (-l) = Left (Scientific c e, n % (d * magnitude (-e)))
421            | n < d = let !ns' = M.insert n e ns
422                      in longDivWithLimit (c * 10) (e - 1) ns' (n * 10)
423            | otherwise = case n `quotRemInteger` d of
424                            (#q, r#) -> longDivWithLimit (c + q) e ns r
425
426        d = denominator rational
427
428-- | Like 'fromRationalRepetend' but doesn't accept a limit.
429fromRationalRepetendUnlimited :: Rational -> (Scientific, Maybe Int)
430fromRationalRepetendUnlimited rational
431        | d == 0    = throw DivideByZero
432        | num < 0   = case longDiv (-num) of
433                        (s, mb) -> (-s, mb)
434        | otherwise = longDiv num
435      where
436        num = numerator rational
437
438        longDiv :: Integer -> (Scientific, Maybe Int)
439        longDiv = longDivNoLimit 0 0 M.empty
440
441        longDivNoLimit :: Integer
442                       -> Int
443                       -> M.Map Integer Int
444                       -> (Integer -> (Scientific, Maybe Int))
445        longDivNoLimit !c !e _ns 0 = (Scientific c e, Nothing)
446        longDivNoLimit !c !e  ns !n
447            | Just e' <- M.lookup n ns = (Scientific c e, Just (-e'))
448            | n < d     = let !ns' = M.insert n e ns
449                          in longDivNoLimit (c * 10) (e - 1) ns' (n * 10)
450            | otherwise = case n `quotRemInteger` d of
451                            (#q, r#) -> longDivNoLimit (c + q) e ns r
452
453        d = denominator rational
454
455-- |
456-- Converts a `Scientific` with a /repetend/ (a repeating part in the fraction),
457-- which starts at the given index, into its corresponding 'Rational'.
458--
459-- For example to convert the repeating decimal @0.03(571428)@ you would use:
460-- @toRationalRepetend 0.03571428 2 == 1 % 28@
461--
462-- Preconditions for @toRationalRepetend s r@:
463--
464-- * @r >= 0@
465--
466-- * @r < -(base10Exponent s)@
467--
468-- /WARNING:/ @toRationalRepetend@ needs to compute the 'Integer' magnitude:
469-- @10^^n@. Where @n@ is based on the 'base10Exponent` of the scientific. If
470-- applied to a huge exponent this could fill up all space and crash your
471-- program! So don't apply this function to untrusted input.
472--
473-- The formula to convert the @Scientific@ @s@
474-- with a repetend starting at index @r@ is described in the paper:
475-- <http://fiziko.bureau42.com/teaching_tidbits/turning_repeating_decimals_into_fractions.pdf turning_repeating_decimals_into_fractions.pdf>
476-- and is defined as follows:
477--
478-- @
479--   (fromInteger nonRepetend + repetend % nines) /
480--   fromInteger (10^^r)
481-- where
482--   c  = coefficient s
483--   e  = base10Exponent s
484--
485--   -- Size of the fractional part.
486--   f = (-e)
487--
488--   -- Size of the repetend.
489--   n = f - r
490--
491--   m = 10^^n
492--
493--   (nonRepetend, repetend) = c \`quotRem\` m
494--
495--   nines = m - 1
496-- @
497-- Also see: 'fromRationalRepetend'.
498toRationalRepetend
499    :: Scientific
500    -> Int -- ^ Repetend index
501    -> Rational
502toRationalRepetend s r
503    | r < 0  = error "toRationalRepetend: Negative repetend index!"
504    | r >= f = error "toRationalRepetend: Repetend index >= than number of digits in the fractional part!"
505    | otherwise = (fromInteger nonRepetend + repetend % nines) /
506                  fromInteger (magnitude r)
507  where
508    c  = coefficient s
509    e  = base10Exponent s
510
511    -- Size of the fractional part.
512    f = (-e)
513
514    -- Size of the repetend.
515    n = f - r
516
517    m = magnitude n
518
519    (#nonRepetend, repetend#) = c `quotRemInteger` m
520
521    nines = m - 1
522
523-- | /WARNING:/ the methods of the @RealFrac@ instance need to compute the
524-- magnitude @10^e@. If applied to a huge exponent this could take a long
525-- time. Even worse, when the destination type is unbounded (i.e. 'Integer') it
526-- could fill up all space and crash your program!
527instance RealFrac Scientific where
528    -- | The function 'properFraction' takes a Scientific number @s@
529    -- and returns a pair @(n,f)@ such that @s = n+f@, and:
530    --
531    -- * @n@ is an integral number with the same sign as @s@; and
532    --
533    -- * @f@ is a fraction with the same type and sign as @s@,
534    --   and with absolute value less than @1@.
535    properFraction s@(Scientific c e)
536        | e < 0     = if dangerouslySmall c e
537                      then (0, s)
538                      else case c `quotRemInteger` magnitude (-e) of
539                             (#q, r#) -> (fromInteger q, Scientific r e)
540        | otherwise = (toIntegral s, 0)
541    {-# INLINABLE properFraction #-}
542
543    -- | @'truncate' s@ returns the integer nearest @s@
544    -- between zero and @s@
545    truncate = whenFloating $ \c e ->
546                 if dangerouslySmall c e
547                 then 0
548                 else fromInteger $ c `quotInteger` magnitude (-e)
549    {-# INLINABLE truncate #-}
550
551    -- | @'round' s@ returns the nearest integer to @s@;
552    --   the even integer if @s@ is equidistant between two integers
553    round = whenFloating $ \c e ->
554              if dangerouslySmall c e
555              then 0
556              else let (#q, r#) = c `quotRemInteger` magnitude (-e)
557                       n = fromInteger q
558                       m | r < 0     = n - 1
559                         | otherwise = n + 1
560                       f = Scientific r e
561                   in case signum $ coefficient $ abs f - 0.5 of
562                        -1 -> n
563                        0  -> if even n then n else m
564                        1  -> m
565                        _  -> error "round default defn: Bad value"
566    {-# INLINABLE round #-}
567
568    -- | @'ceiling' s@ returns the least integer not less than @s@
569    ceiling = whenFloating $ \c e ->
570                if dangerouslySmall c e
571                then if c <= 0
572                     then 0
573                     else 1
574                else case c `quotRemInteger` magnitude (-e) of
575                       (#q, r#) | r <= 0    -> fromInteger q
576                                | otherwise -> fromInteger (q + 1)
577    {-# INLINABLE ceiling #-}
578
579    -- | @'floor' s@ returns the greatest integer not greater than @s@
580    floor = whenFloating $ \c e ->
581              if dangerouslySmall c e
582              then if c < 0
583                   then -1
584                   else 0
585              else fromInteger (c `divInteger` magnitude (-e))
586    {-# INLINABLE floor #-}
587
588
589----------------------------------------------------------------------
590-- Internal utilities
591----------------------------------------------------------------------
592
593-- | This function is used in the 'RealFrac' methods to guard against
594-- computing a huge magnitude (-e) which could take up all space.
595--
596-- Think about parsing a scientific number from an untrusted
597-- string. An attacker could supply 1e-1000000000. Lets say we want to
598-- 'floor' that number to an 'Int'. When we naively try to floor it
599-- using:
600--
601-- @
602-- floor = whenFloating $ \c e ->
603--           fromInteger (c `div` magnitude (-e))
604-- @
605--
606-- We will compute the huge Integer: @magnitude 1000000000@. This
607-- computation will quickly fill up all space and crash the program.
608--
609-- Note that for large /positive/ exponents there is no risk of a
610-- space-leak since 'whenFloating' will compute:
611--
612-- @fromInteger c * magnitude e :: a@
613--
614-- where @a@ is the target type (Int in this example). So here the
615-- space usage is bounded by the target type.
616--
617-- For large negative exponents we check if the exponent is smaller
618-- than some limit (currently -324). In that case we know that the
619-- scientific number is really small (unless the coefficient has many
620-- digits) so we can immediately return -1 for negative scientific
621-- numbers or 0 for positive numbers.
622--
623-- More precisely if @dangerouslySmall c e@ returns 'True' the
624-- scientific number @s@ is guaranteed to be between:
625-- @-0.1 > s < 0.1@.
626--
627-- Note that we avoid computing the number of decimal digits in c
628-- (log10 c) if the exponent is not below the limit.
629dangerouslySmall :: Integer -> Int -> Bool
630dangerouslySmall c e = e < (-limit) && e < (-integerLog10' (abs c)) - 1
631{-# INLINE dangerouslySmall #-}
632
633limit :: Int
634limit = maxExpt
635
636positivize :: (Ord a, Num a, Num b) => (a -> b) -> (a -> b)
637positivize f x | x < 0     = -(f (-x))
638               | otherwise =   f   x
639{-# INLINE positivize #-}
640
641whenFloating :: (Num a) => (Integer -> Int -> a) -> Scientific -> a
642whenFloating f s@(Scientific c e)
643    | e < 0     = f c e
644    | otherwise = toIntegral s
645{-# INLINE whenFloating #-}
646
647-- | Precondition: the 'Scientific' @s@ needs to be an integer:
648-- @base10Exponent (normalize s) >= 0@
649toIntegral :: (Num a) => Scientific -> a
650toIntegral (Scientific c e) = fromInteger c * magnitude e
651{-# INLINE toIntegral #-}
652
653
654----------------------------------------------------------------------
655-- Exponentiation with a cache for the most common numbers.
656----------------------------------------------------------------------
657
658-- | The same limit as in GHC.Float.
659maxExpt :: Int
660maxExpt = 324
661
662expts10 :: Primitive.Array Integer
663expts10 = runST $ do
664    ma <- Primitive.newArray maxExpt uninitialised
665    Primitive.writeArray ma 0  1
666    Primitive.writeArray ma 1 10
667    let go !ix
668          | ix == maxExpt = Primitive.unsafeFreezeArray ma
669          | otherwise = do
670              Primitive.writeArray ma  ix        xx
671              Primitive.writeArray ma (ix+1) (10*xx)
672              go (ix+2)
673          where
674            xx = x * x
675            x  = Primitive.indexArray expts10 half
676#if MIN_VERSION_base(4,5,0)
677            !half = ix `unsafeShiftR` 1
678#else
679            !half = ix `shiftR` 1
680#endif
681    go 2
682
683uninitialised :: error
684uninitialised = error "Data.Scientific: uninitialised element"
685
686-- | @magnitude e == 10 ^ e@
687magnitude :: Num a => Int -> a
688magnitude e | e < maxExpt = cachedPow10 e
689            | otherwise   = cachedPow10 hi * 10 ^ (e - hi)
690    where
691      cachedPow10 = fromInteger . Primitive.indexArray expts10
692
693      hi = maxExpt - 1
694
695
696----------------------------------------------------------------------
697-- Conversions
698----------------------------------------------------------------------
699
700-- | Convert a 'RealFloat' (like a 'Double' or 'Float') into a 'Scientific'
701-- number.
702--
703-- Note that this function uses 'Numeric.floatToDigits' to compute the digits
704-- and exponent of the 'RealFloat' number. Be aware that the algorithm used in
705-- 'Numeric.floatToDigits' doesn't work as expected for some numbers, e.g. as
706-- the 'Double' @1e23@ is converted to @9.9999999999999991611392e22@, and that
707-- value is shown as @9.999999999999999e22@ rather than the shorter @1e23@; the
708-- algorithm doesn't take the rounding direction for values exactly half-way
709-- between two adjacent representable values into account, so if you have a
710-- value with a short decimal representation exactly half-way between two
711-- adjacent representable values, like @5^23*2^e@ for @e@ close to 23, the
712-- algorithm doesn't know in which direction the short decimal representation
713-- would be rounded and computes more digits
714fromFloatDigits :: (RealFloat a) => a -> Scientific
715fromFloatDigits 0  = 0
716fromFloatDigits rf = positivize fromPositiveRealFloat rf
717    where
718      fromPositiveRealFloat r = go digits 0 0
719        where
720          (digits, e) = Numeric.floatToDigits 10 r
721
722          go :: [Int] -> Integer -> Int -> Scientific
723          go []     !c !n = Scientific c (e - n)
724          go (d:ds) !c !n = go ds (c * 10 + toInteger d) (n + 1)
725
726{-# INLINABLE fromFloatDigits #-}
727
728{-# SPECIALIZE fromFloatDigits :: Double -> Scientific #-}
729{-# SPECIALIZE fromFloatDigits :: Float  -> Scientific #-}
730
731-- | Safely convert a 'Scientific' number into a 'RealFloat' (like a 'Double' or a
732-- 'Float').
733--
734-- Note that this function uses 'realToFrac' (@'fromRational' . 'toRational'@)
735-- internally but it guards against computing huge Integer magnitudes (@10^e@)
736-- that could fill up all space and crash your program. If the 'base10Exponent'
737-- of the given 'Scientific' is too big or too small to be represented in the
738-- target type, Infinity or 0 will be returned respectively. Use
739-- 'toBoundedRealFloat' which explicitly handles this case by returning 'Left'.
740--
741-- Always prefer 'toRealFloat' over 'realToFrac' when converting from scientific
742-- numbers coming from an untrusted source.
743toRealFloat :: (RealFloat a) => Scientific -> a
744toRealFloat = either id id . toBoundedRealFloat
745
746{-# INLINABLE toRealFloat #-}
747{-# INLINABLE toBoundedRealFloat #-}
748
749{-# SPECIALIZE toRealFloat        :: Scientific -> Double #-}
750{-# SPECIALIZE toRealFloat        :: Scientific -> Float  #-}
751{-# SPECIALIZE toBoundedRealFloat :: Scientific -> Either Double Double #-}
752{-# SPECIALIZE toBoundedRealFloat :: Scientific -> Either Float  Float  #-}
753
754-- | Preciser version of `toRealFloat`. If the 'base10Exponent' of the given
755-- 'Scientific' is too big or too small to be represented in the target type,
756-- Infinity or 0 will be returned as 'Left'.
757toBoundedRealFloat :: forall a. (RealFloat a) => Scientific -> Either a a
758toBoundedRealFloat s@(Scientific c e)
759    | c == 0     = Right 0
760    | e >  limit = if e > hiLimit then Left $ sign (1/0) -- Infinity
761                   else Right $ fromRational ((c * magnitude e) % 1)
762    | e < -limit = if e < loLimit && e + d < loLimit then Left $ sign 0
763                   else Right $ fromRational (c % magnitude (-e))
764    | otherwise = Right $ fromRational (toRational s)
765                       -- We can't use realToFrac here
766                       -- because that will cause an infinite loop
767                       -- when the function is specialized for Double and Float
768                       -- caused by the realToFrac_toRealFloat_Double/Float rewrite RULEs.
769  where
770    hiLimit, loLimit :: Int
771    hiLimit = ceiling (fromIntegral hi     * log10Radix)
772    loLimit = floor   (fromIntegral lo     * log10Radix) -
773              ceiling (fromIntegral digits * log10Radix)
774
775    log10Radix :: Double
776    log10Radix = logBase 10 $ fromInteger radix
777
778    radix    = floatRadix  (undefined :: a)
779    digits   = floatDigits (undefined :: a)
780    (lo, hi) = floatRange  (undefined :: a)
781
782    d = integerLog10' (abs c)
783
784    sign x | c < 0     = -x
785           | otherwise =  x
786
787-- | Convert a `Scientific` to a bounded integer.
788--
789-- If the given `Scientific` doesn't fit in the target representation, it will
790-- return `Nothing`.
791--
792-- This function also guards against computing huge Integer magnitudes (@10^e@)
793-- that could fill up all space and crash your program.
794toBoundedInteger :: forall i. (Integral i, Bounded i) => Scientific -> Maybe i
795toBoundedInteger s
796    | c == 0    = fromIntegerBounded 0
797    | integral  = if dangerouslyBig
798                  then Nothing
799                  else fromIntegerBounded n
800    | otherwise = Nothing
801  where
802    c = coefficient s
803
804    integral = e >= 0 || e' >= 0
805
806    e  = base10Exponent s
807    e' = base10Exponent s'
808
809    s' = normalize s
810
811    dangerouslyBig = e > limit &&
812                     e > integerLog10' (max (abs iMinBound) (abs iMaxBound))
813
814    fromIntegerBounded :: Integer -> Maybe i
815    fromIntegerBounded i
816        | i < iMinBound || i > iMaxBound = Nothing
817        | otherwise                      = Just $ fromInteger i
818
819    iMinBound = toInteger (minBound :: i)
820    iMaxBound = toInteger (maxBound :: i)
821
822    -- This should not be evaluated if the given Scientific is dangerouslyBig
823    -- since it could consume all space and crash the process:
824    n :: Integer
825    n = toIntegral s'
826
827{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Int #-}
828{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Int8 #-}
829{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Int16 #-}
830{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Int32 #-}
831{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Int64 #-}
832{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Word #-}
833{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Word8 #-}
834{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Word16 #-}
835{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Word32 #-}
836{-# SPECIALIZE toBoundedInteger :: Scientific -> Maybe Word64 #-}
837
838-- | @floatingOrInteger@ determines if the scientific is floating point or
839-- integer.
840--
841-- In case it's floating-point the scientific is converted to the desired
842-- 'RealFloat' using 'toRealFloat' and wrapped in 'Left'.
843--
844-- In case it's integer to scientific is converted to the desired 'Integral' and
845-- wrapped in 'Right'.
846--
847-- /WARNING:/ To convert the scientific to an integral the magnitude @10^e@
848-- needs to be computed. If applied to a huge exponent this could take a long
849-- time. Even worse, when the destination type is unbounded (i.e. 'Integer') it
850-- could fill up all space and crash your program! So don't apply this function
851-- to untrusted input but use 'toBoundedInteger' instead.
852--
853-- Also see: 'isFloating' or 'isInteger'.
854floatingOrInteger :: (RealFloat r, Integral i) => Scientific -> Either r i
855floatingOrInteger s
856    | base10Exponent s  >= 0 = Right (toIntegral   s)
857    | base10Exponent s' >= 0 = Right (toIntegral   s')
858    | otherwise              = Left  (toRealFloat  s')
859  where
860    s' = normalize s
861
862
863----------------------------------------------------------------------
864-- Predicates
865----------------------------------------------------------------------
866
867-- | Return 'True' if the scientific is a floating point, 'False' otherwise.
868--
869-- Also see: 'floatingOrInteger'.
870isFloating :: Scientific -> Bool
871isFloating = not . isInteger
872
873-- | Return 'True' if the scientific is an integer, 'False' otherwise.
874--
875-- Also see: 'floatingOrInteger'.
876isInteger :: Scientific -> Bool
877isInteger s = base10Exponent s  >= 0 ||
878              base10Exponent s' >= 0
879  where
880    s' = normalize s
881
882
883----------------------------------------------------------------------
884-- Parsing
885----------------------------------------------------------------------
886
887-- | Supports the skipping of parentheses and whitespaces. Example:
888--
889-- > > read " ( ((  -1.0e+3 ) ))" :: Scientific
890-- > -1000.0
891--
892-- (Note: This @Read@ instance makes internal use of
893-- 'scientificP' to parse the floating-point number.)
894instance Read Scientific where
895    readPrec = Read.parens $ ReadPrec.lift (ReadP.skipSpaces >> scientificP)
896
897-- A strict pair
898data SP = SP !Integer {-# UNPACK #-}!Int
899
900-- | A parser for parsing a floating-point
901-- number into a 'Scientific' value. Example:
902--
903-- > > import Text.ParserCombinators.ReadP (readP_to_S)
904-- > > readP_to_S scientificP "3"
905-- > [(3.0,"")]
906-- > > readP_to_S scientificP "3.0e2"
907-- > [(3.0,"e2"),(300.0,"")]
908-- > > readP_to_S scientificP "+3.0e+2"
909-- > [(3.0,"e+2"),(300.0,"")]
910-- > > readP_to_S scientificP "-3.0e-2"
911-- > [(-3.0,"e-2"),(-3.0e-2,"")]
912--
913-- Note: This parser only parses the number itself; it does
914-- not parse any surrounding parentheses or whitespaces.
915scientificP :: ReadP Scientific
916scientificP = do
917  let positive = (('+' ==) <$> ReadP.satisfy isSign) `mplus` return True
918  pos <- positive
919
920  let step :: Num a => a -> Int -> a
921      step a digit = a * 10 + fromIntegral digit
922      {-# INLINE step #-}
923
924  n <- foldDigits step 0
925
926  let s = SP n 0
927      fractional = foldDigits (\(SP a e) digit ->
928                                 SP (step a digit) (e-1)) s
929
930  SP coeff expnt <- (ReadP.satisfy (== '.') >> fractional)
931                    ReadP.<++ return s
932
933  let signedCoeff | pos       =   coeff
934                  | otherwise = (-coeff)
935
936      eP = do posE <- positive
937              e <- foldDigits step 0
938              if posE
939                then return   e
940                else return (-e)
941
942  (ReadP.satisfy isE >>
943           ((Scientific signedCoeff . (expnt +)) <$> eP)) `mplus`
944     return (Scientific signedCoeff    expnt)
945
946
947foldDigits :: (a -> Int -> a) -> a -> ReadP a
948foldDigits f z = do
949    c <- ReadP.satisfy isDecimal
950    let digit = ord c - 48
951        a = f z digit
952
953    ReadP.look >>= go a
954  where
955    go !a [] = return a
956    go !a (c:cs)
957        | isDecimal c = do
958            _ <- ReadP.get
959            let digit = ord c - 48
960            go (f a digit) cs
961        | otherwise = return a
962
963isDecimal :: Char -> Bool
964isDecimal c = c >= '0' && c <= '9'
965{-# INLINE isDecimal #-}
966
967isSign :: Char -> Bool
968isSign c = c == '-' || c == '+'
969{-# INLINE isSign #-}
970
971isE :: Char -> Bool
972isE c = c == 'e' || c == 'E'
973{-# INLINE isE #-}
974
975
976----------------------------------------------------------------------
977-- Pretty Printing
978----------------------------------------------------------------------
979
980-- | See 'formatScientific' if you need more control over the rendering.
981instance Show Scientific where
982    show s | coefficient s < 0 = '-':showPositive (-s)
983           | otherwise         =     showPositive   s
984      where
985        showPositive :: Scientific -> String
986        showPositive = fmtAsGeneric . toDecimalDigits
987
988        fmtAsGeneric :: ([Int], Int) -> String
989        fmtAsGeneric x@(_is, e)
990            | e < 0 || e > 7 = fmtAsExponent x
991            | otherwise      = fmtAsFixed    x
992
993fmtAsExponent :: ([Int], Int) -> String
994fmtAsExponent (is, e) =
995    case ds of
996      "0"     -> "0.0e0"
997      [d]     -> d : '.' :'0' : 'e' : show_e'
998      (d:ds') -> d : '.' : ds' ++ ('e' : show_e')
999      []      -> error "formatScientific/doFmt/FFExponent: []"
1000  where
1001    show_e' = show (e-1)
1002
1003    ds = map intToDigit is
1004
1005fmtAsFixed :: ([Int], Int) -> String
1006fmtAsFixed (is, e)
1007    | e <= 0    = '0':'.':(replicate (-e) '0' ++ ds)
1008    | otherwise =
1009        let
1010           f 0 s    rs  = mk0 (reverse s) ++ '.':mk0 rs
1011           f n s    ""  = f (n-1) ('0':s) ""
1012           f n s (r:rs) = f (n-1) (r:s) rs
1013        in
1014           f e "" ds
1015  where
1016    mk0 "" = "0"
1017    mk0 ls = ls
1018
1019    ds = map intToDigit is
1020
1021-- | Like 'show' but provides rendering options.
1022formatScientific :: FPFormat
1023                 -> Maybe Int  -- ^ Number of decimal places to render.
1024                 -> Scientific
1025                 -> String
1026formatScientific format mbDecs s
1027    | coefficient s < 0 = '-':formatPositiveScientific (-s)
1028    | otherwise         =     formatPositiveScientific   s
1029  where
1030    formatPositiveScientific :: Scientific -> String
1031    formatPositiveScientific s' = case format of
1032        Generic  -> fmtAsGeneric        $ toDecimalDigits s'
1033        Exponent -> fmtAsExponentMbDecs $ toDecimalDigits s'
1034        Fixed    -> fmtAsFixedMbDecs    $ toDecimalDigits s'
1035
1036    fmtAsGeneric :: ([Int], Int) -> String
1037    fmtAsGeneric x@(_is, e)
1038        | e < 0 || e > 7 = fmtAsExponentMbDecs x
1039        | otherwise      = fmtAsFixedMbDecs x
1040
1041    fmtAsExponentMbDecs :: ([Int], Int) -> String
1042    fmtAsExponentMbDecs x = case mbDecs of
1043                              Nothing  -> fmtAsExponent x
1044                              Just dec -> fmtAsExponentDecs dec x
1045
1046    fmtAsFixedMbDecs :: ([Int], Int) -> String
1047    fmtAsFixedMbDecs x = case mbDecs of
1048                           Nothing  -> fmtAsFixed x
1049                           Just dec -> fmtAsFixedDecs dec x
1050
1051    fmtAsExponentDecs :: Int -> ([Int], Int) -> String
1052    fmtAsExponentDecs dec (is, e) =
1053        let dec' = max dec 1 in
1054            case is of
1055             [0] -> '0' :'.' : take dec' (repeat '0') ++ "e0"
1056             _ ->
1057              let
1058               (ei,is') = roundTo (dec'+1) is
1059               (d:ds') = map intToDigit (if ei > 0 then init is' else is')
1060              in
1061              d:'.':ds' ++ 'e':show (e-1+ei)
1062
1063    fmtAsFixedDecs :: Int -> ([Int], Int) -> String
1064    fmtAsFixedDecs dec (is, e) =
1065        let dec' = max dec 0 in
1066        if e >= 0 then
1067         let
1068          (ei,is') = roundTo (dec' + e) is
1069          (ls,rs)  = splitAt (e+ei) (map intToDigit is')
1070         in
1071         mk0 ls ++ (if null rs then "" else '.':rs)
1072        else
1073         let
1074          (ei,is') = roundTo dec' (replicate (-e) 0 ++ is)
1075          d:ds' = map intToDigit (if ei > 0 then is' else 0:is')
1076         in
1077         d : (if null ds' then "" else '.':ds')
1078      where
1079        mk0 ls = case ls of { "" -> "0" ; _ -> ls}
1080
1081----------------------------------------------------------------------
1082
1083-- | Similar to 'Numeric.floatToDigits', @toDecimalDigits@ takes a
1084-- positive 'Scientific' number, and returns a list of digits and
1085-- a base-10 exponent. In particular, if @x>=0@, and
1086--
1087-- > toDecimalDigits x = ([d1,d2,...,dn], e)
1088--
1089-- then
1090--
1091--     1. @n >= 1@
1092--     2. @x = 0.d1d2...dn * (10^^e)@
1093--     3. @0 <= di <= 9@
1094--     4. @null $ takeWhile (==0) $ reverse [d1,d2,...,dn]@
1095--
1096-- The last property means that the coefficient will be normalized, i.e. doesn't
1097-- contain trailing zeros.
1098toDecimalDigits :: Scientific -> ([Int], Int)
1099toDecimalDigits (Scientific 0  _)  = ([0], 0)
1100toDecimalDigits (Scientific c' e') =
1101    case normalizePositive c' e' of
1102      Scientific c e -> go c 0 []
1103        where
1104          go :: Integer -> Int -> [Int] -> ([Int], Int)
1105          go 0 !n ds = (ds, ne) where !ne = n + e
1106          go i !n ds = case i `quotRemInteger` 10 of
1107                         (# q, r #) -> go q (n+1) (d:ds)
1108                           where
1109                             !d = fromIntegral r
1110
1111
1112----------------------------------------------------------------------
1113-- Normalization
1114----------------------------------------------------------------------
1115
1116-- | Normalize a scientific number by dividing out powers of 10 from the
1117-- 'coefficient' and incrementing the 'base10Exponent' each time.
1118--
1119-- You should rarely have a need for this function since scientific numbers are
1120-- automatically normalized when pretty-printed and in 'toDecimalDigits'.
1121normalize :: Scientific -> Scientific
1122normalize (Scientific c e)
1123    | c > 0 =   normalizePositive   c  e
1124    | c < 0 = -(normalizePositive (-c) e)
1125    | otherwise {- c == 0 -} = Scientific 0 0
1126
1127normalizePositive :: Integer -> Int -> Scientific
1128normalizePositive !c !e = case quotRemInteger c 10 of
1129                            (# c', r #)
1130                                | r == 0    -> normalizePositive c' (e+1)
1131                                | otherwise -> Scientific c e
1132