1{-# LANGUAGE CPP, DeriveDataTypeable #-} 2#if __GLASGOW_HASKELL__ >= 704 3{-# LANGUAGE Safe #-} 4#elif __GLASGOW_HASKELL__ >= 702 5{-# LANGUAGE Trustworthy #-} 6#endif 7-- | 8-- Module : Data.Text.Encoding.Error 9-- Copyright : (c) Bryan O'Sullivan 2009 10-- 11-- License : BSD-style 12-- Maintainer : bos@serpentine.com 13-- Portability : GHC 14-- 15-- Types and functions for dealing with encoding and decoding errors 16-- in Unicode text. 17-- 18-- The standard functions for encoding and decoding text are strict, 19-- which is to say that they throw exceptions on invalid input. This 20-- is often unhelpful on real world input, so alternative functions 21-- exist that accept custom handlers for dealing with invalid inputs. 22-- These 'OnError' handlers are normal Haskell functions. You can use 23-- one of the presupplied functions in this module, or you can write a 24-- custom handler of your own. 25 26module Data.Text.Encoding.Error 27 ( 28 -- * Error handling types 29 UnicodeException(..) 30 , OnError 31 , OnDecodeError 32 , OnEncodeError 33 -- * Useful error handling functions 34 , lenientDecode 35 , strictDecode 36 , strictEncode 37 , ignore 38 , replace 39 ) where 40 41import Control.DeepSeq (NFData (..)) 42import Control.Exception (Exception, throw) 43import Data.Typeable (Typeable) 44import Data.Word (Word8) 45import Numeric (showHex) 46 47-- | Function type for handling a coding error. It is supplied with 48-- two inputs: 49-- 50-- * A 'String' that describes the error. 51-- 52-- * The input value that caused the error. If the error arose 53-- because the end of input was reached or could not be identified 54-- precisely, this value will be 'Nothing'. 55-- 56-- If the handler returns a value wrapped with 'Just', that value will 57-- be used in the output as the replacement for the invalid input. If 58-- it returns 'Nothing', no value will be used in the output. 59-- 60-- Should the handler need to abort processing, it should use 'error' 61-- or 'throw' an exception (preferably a 'UnicodeException'). It may 62-- use the description provided to construct a more helpful error 63-- report. 64type OnError a b = String -> Maybe a -> Maybe b 65 66-- | A handler for a decoding error. 67type OnDecodeError = OnError Word8 Char 68 69-- | A handler for an encoding error. 70{-# DEPRECATED OnEncodeError "This exception is never used in practice, and will be removed." #-} 71type OnEncodeError = OnError Char Word8 72 73-- | An exception type for representing Unicode encoding errors. 74data UnicodeException = 75 DecodeError String (Maybe Word8) 76 -- ^ Could not decode a byte sequence because it was invalid under 77 -- the given encoding, or ran out of input in mid-decode. 78 | EncodeError String (Maybe Char) 79 -- ^ Tried to encode a character that could not be represented 80 -- under the given encoding, or ran out of input in mid-encode. 81 deriving (Eq, Typeable) 82 83{-# DEPRECATED EncodeError "This constructor is never used, and will be removed." #-} 84 85showUnicodeException :: UnicodeException -> String 86showUnicodeException (DecodeError desc (Just w)) 87 = "Cannot decode byte '\\x" ++ showHex w ("': " ++ desc) 88showUnicodeException (DecodeError desc Nothing) 89 = "Cannot decode input: " ++ desc 90showUnicodeException (EncodeError desc (Just c)) 91 = "Cannot encode character '\\x" ++ showHex (fromEnum c) ("': " ++ desc) 92showUnicodeException (EncodeError desc Nothing) 93 = "Cannot encode input: " ++ desc 94 95instance Show UnicodeException where 96 show = showUnicodeException 97 98instance Exception UnicodeException 99 100instance NFData UnicodeException where 101 rnf (DecodeError desc w) = rnf desc `seq` rnf w `seq` () 102 rnf (EncodeError desc c) = rnf desc `seq` rnf c `seq` () 103 104-- | Throw a 'UnicodeException' if decoding fails. 105strictDecode :: OnDecodeError 106strictDecode desc c = throw (DecodeError desc c) 107 108-- | Replace an invalid input byte with the Unicode replacement 109-- character U+FFFD. 110lenientDecode :: OnDecodeError 111lenientDecode _ _ = Just '\xfffd' 112 113-- | Throw a 'UnicodeException' if encoding fails. 114{-# DEPRECATED strictEncode "This function always throws an exception, and will be removed." #-} 115strictEncode :: OnEncodeError 116strictEncode desc c = throw (EncodeError desc c) 117 118-- | Ignore an invalid input, substituting nothing in the output. 119ignore :: OnError a b 120ignore _ _ = Nothing 121 122-- | Replace an invalid input with a valid output. 123replace :: b -> OnError a b 124replace c _ _ = Just c 125