1{-# LANGUAGE CPP, DeriveDataTypeable #-}
2#if __GLASGOW_HASKELL__ >= 704
3{-# LANGUAGE Safe #-}
4#elif __GLASGOW_HASKELL__ >= 702
5{-# LANGUAGE Trustworthy #-}
6#endif
7-- |
8-- Module      : Data.Text.Encoding.Error
9-- Copyright   : (c) Bryan O'Sullivan 2009
10--
11-- License     : BSD-style
12-- Maintainer  : bos@serpentine.com
13-- Portability : GHC
14--
15-- Types and functions for dealing with encoding and decoding errors
16-- in Unicode text.
17--
18-- The standard functions for encoding and decoding text are strict,
19-- which is to say that they throw exceptions on invalid input.  This
20-- is often unhelpful on real world input, so alternative functions
21-- exist that accept custom handlers for dealing with invalid inputs.
22-- These 'OnError' handlers are normal Haskell functions.  You can use
23-- one of the presupplied functions in this module, or you can write a
24-- custom handler of your own.
25
26module Data.Text.Encoding.Error
27    (
28    -- * Error handling types
29      UnicodeException(..)
30    , OnError
31    , OnDecodeError
32    , OnEncodeError
33    -- * Useful error handling functions
34    , lenientDecode
35    , strictDecode
36    , strictEncode
37    , ignore
38    , replace
39    ) where
40
41import Control.DeepSeq (NFData (..))
42import Control.Exception (Exception, throw)
43import Data.Typeable (Typeable)
44import Data.Word (Word8)
45import Numeric (showHex)
46
47-- | Function type for handling a coding error.  It is supplied with
48-- two inputs:
49--
50-- * A 'String' that describes the error.
51--
52-- * The input value that caused the error.  If the error arose
53--   because the end of input was reached or could not be identified
54--   precisely, this value will be 'Nothing'.
55--
56-- If the handler returns a value wrapped with 'Just', that value will
57-- be used in the output as the replacement for the invalid input.  If
58-- it returns 'Nothing', no value will be used in the output.
59--
60-- Should the handler need to abort processing, it should use 'error'
61-- or 'throw' an exception (preferably a 'UnicodeException').  It may
62-- use the description provided to construct a more helpful error
63-- report.
64type OnError a b = String -> Maybe a -> Maybe b
65
66-- | A handler for a decoding error.
67type OnDecodeError = OnError Word8 Char
68
69-- | A handler for an encoding error.
70{-# DEPRECATED OnEncodeError "This exception is never used in practice, and will be removed." #-}
71type OnEncodeError = OnError Char Word8
72
73-- | An exception type for representing Unicode encoding errors.
74data UnicodeException =
75    DecodeError String (Maybe Word8)
76    -- ^ Could not decode a byte sequence because it was invalid under
77    -- the given encoding, or ran out of input in mid-decode.
78  | EncodeError String (Maybe Char)
79    -- ^ Tried to encode a character that could not be represented
80    -- under the given encoding, or ran out of input in mid-encode.
81    deriving (Eq, Typeable)
82
83{-# DEPRECATED EncodeError "This constructor is never used, and will be removed." #-}
84
85showUnicodeException :: UnicodeException -> String
86showUnicodeException (DecodeError desc (Just w))
87    = "Cannot decode byte '\\x" ++ showHex w ("': " ++ desc)
88showUnicodeException (DecodeError desc Nothing)
89    = "Cannot decode input: " ++ desc
90showUnicodeException (EncodeError desc (Just c))
91    = "Cannot encode character '\\x" ++ showHex (fromEnum c) ("': " ++ desc)
92showUnicodeException (EncodeError desc Nothing)
93    = "Cannot encode input: " ++ desc
94
95instance Show UnicodeException where
96    show = showUnicodeException
97
98instance Exception UnicodeException
99
100instance NFData UnicodeException where
101    rnf (DecodeError desc w) = rnf desc `seq` rnf w `seq` ()
102    rnf (EncodeError desc c) = rnf desc `seq` rnf c `seq` ()
103
104-- | Throw a 'UnicodeException' if decoding fails.
105strictDecode :: OnDecodeError
106strictDecode desc c = throw (DecodeError desc c)
107
108-- | Replace an invalid input byte with the Unicode replacement
109-- character U+FFFD.
110lenientDecode :: OnDecodeError
111lenientDecode _ _ = Just '\xfffd'
112
113-- | Throw a 'UnicodeException' if encoding fails.
114{-# DEPRECATED strictEncode "This function always throws an exception, and will be removed." #-}
115strictEncode :: OnEncodeError
116strictEncode desc c = throw (EncodeError desc c)
117
118-- | Ignore an invalid input, substituting nothing in the output.
119ignore :: OnError a b
120ignore _ _ = Nothing
121
122-- | Replace an invalid input with a valid output.
123replace :: b -> OnError a b
124replace c _ _ = Just c
125