1{-# LANGUAGE Trustworthy #-} 2{-# LANGUAGE NoImplicitPrelude 3 , BangPatterns 4 , NondecreasingIndentation 5 #-} 6{-# OPTIONS_GHC -funbox-strict-fields #-} 7 8----------------------------------------------------------------------------- 9-- | 10-- Module : GHC.IO.Encoding.Latin1 11-- Copyright : (c) The University of Glasgow, 2009 12-- License : see libraries/base/LICENSE 13-- 14-- Maintainer : libraries@haskell.org 15-- Stability : internal 16-- Portability : non-portable 17-- 18-- Single-byte encodings that map directly to Unicode code points. 19-- 20-- Portions Copyright : (c) Tom Harper 2008-2009, 21-- (c) Bryan O'Sullivan 2009, 22-- (c) Duncan Coutts 2009 23-- 24----------------------------------------------------------------------------- 25 26module GHC.IO.Encoding.Latin1 ( 27 latin1, mkLatin1, 28 latin1_checked, mkLatin1_checked, 29 ascii, mkAscii, 30 latin1_decode, 31 ascii_decode, 32 latin1_encode, 33 latin1_checked_encode, 34 ascii_encode, 35 ) where 36 37import GHC.Base 38import GHC.Real 39import GHC.Num 40-- import GHC.IO 41import GHC.IO.Buffer 42import GHC.IO.Encoding.Failure 43import GHC.IO.Encoding.Types 44 45-- ----------------------------------------------------------------------------- 46-- Latin1 47 48latin1 :: TextEncoding 49latin1 = mkLatin1 ErrorOnCodingFailure 50 51-- | @since 4.4.0.0 52mkLatin1 :: CodingFailureMode -> TextEncoding 53mkLatin1 cfm = TextEncoding { textEncodingName = "ISO-8859-1", 54 mkTextDecoder = latin1_DF cfm, 55 mkTextEncoder = latin1_EF cfm } 56 57latin1_DF :: CodingFailureMode -> IO (TextDecoder ()) 58latin1_DF cfm = 59 return (BufferCodec { 60 encode = latin1_decode, 61 recover = recoverDecode cfm, 62 close = return (), 63 getState = return (), 64 setState = const $ return () 65 }) 66 67latin1_EF :: CodingFailureMode -> IO (TextEncoder ()) 68latin1_EF cfm = 69 return (BufferCodec { 70 encode = latin1_encode, 71 recover = recoverEncode cfm, 72 close = return (), 73 getState = return (), 74 setState = const $ return () 75 }) 76 77latin1_checked :: TextEncoding 78latin1_checked = mkLatin1_checked ErrorOnCodingFailure 79 80-- | @since 4.4.0.0 81mkLatin1_checked :: CodingFailureMode -> TextEncoding 82mkLatin1_checked cfm = TextEncoding { textEncodingName = "ISO-8859-1", 83 mkTextDecoder = latin1_DF cfm, 84 mkTextEncoder = latin1_checked_EF cfm } 85 86latin1_checked_EF :: CodingFailureMode -> IO (TextEncoder ()) 87latin1_checked_EF cfm = 88 return (BufferCodec { 89 encode = latin1_checked_encode, 90 recover = recoverEncode cfm, 91 close = return (), 92 getState = return (), 93 setState = const $ return () 94 }) 95 96-- ----------------------------------------------------------------------------- 97-- ASCII 98 99-- | @since 4.9.0.0 100ascii :: TextEncoding 101ascii = mkAscii ErrorOnCodingFailure 102 103-- | @since 4.9.0.0 104mkAscii :: CodingFailureMode -> TextEncoding 105mkAscii cfm = TextEncoding { textEncodingName = "ASCII", 106 mkTextDecoder = ascii_DF cfm, 107 mkTextEncoder = ascii_EF cfm } 108 109ascii_DF :: CodingFailureMode -> IO (TextDecoder ()) 110ascii_DF cfm = 111 return (BufferCodec { 112 encode = ascii_decode, 113 recover = recoverDecode cfm, 114 close = return (), 115 getState = return (), 116 setState = const $ return () 117 }) 118 119ascii_EF :: CodingFailureMode -> IO (TextEncoder ()) 120ascii_EF cfm = 121 return (BufferCodec { 122 encode = ascii_encode, 123 recover = recoverEncode cfm, 124 close = return (), 125 getState = return (), 126 setState = const $ return () 127 }) 128 129 130 131-- ----------------------------------------------------------------------------- 132-- The actual decoders and encoders 133 134-- TODO: Eliminate code duplication between the checked and unchecked 135-- versions of the decoder or encoder (but don't change the Core!) 136 137latin1_decode :: DecodeBuffer 138latin1_decode 139 input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } 140 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } 141 = let 142 loop !ir !ow 143 | ow >= os = done OutputUnderflow ir ow 144 | ir >= iw = done InputUnderflow ir ow 145 | otherwise = do 146 c0 <- readWord8Buf iraw ir 147 ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0)) 148 loop (ir+1) ow' 149 150 -- lambda-lifted, to avoid thunks being built in the inner-loop: 151 done why !ir !ow = return (why, 152 if ir == iw then input{ bufL=0, bufR=0 } 153 else input{ bufL=ir }, 154 output{ bufR=ow }) 155 in 156 loop ir0 ow0 157 158ascii_decode :: DecodeBuffer 159ascii_decode 160 input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } 161 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } 162 = let 163 loop !ir !ow 164 | ow >= os = done OutputUnderflow ir ow 165 | ir >= iw = done InputUnderflow ir ow 166 | otherwise = do 167 c0 <- readWord8Buf iraw ir 168 if c0 > 0x7f then invalid else do 169 ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0)) 170 loop (ir+1) ow' 171 where 172 invalid = done InvalidSequence ir ow 173 174 -- lambda-lifted, to avoid thunks being built in the inner-loop: 175 done why !ir !ow = return (why, 176 if ir == iw then input{ bufL=0, bufR=0 } 177 else input{ bufL=ir }, 178 output{ bufR=ow }) 179 in 180 loop ir0 ow0 181 182latin1_encode :: EncodeBuffer 183latin1_encode 184 input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } 185 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } 186 = let 187 done why !ir !ow = return (why, 188 if ir == iw then input{ bufL=0, bufR=0 } 189 else input{ bufL=ir }, 190 output{ bufR=ow }) 191 loop !ir !ow 192 | ow >= os = done OutputUnderflow ir ow 193 | ir >= iw = done InputUnderflow ir ow 194 | otherwise = do 195 (c,ir') <- readCharBuf iraw ir 196 writeWord8Buf oraw ow (fromIntegral (ord c)) 197 loop ir' (ow+1) 198 in 199 loop ir0 ow0 200 201latin1_checked_encode :: EncodeBuffer 202latin1_checked_encode input output 203 = single_byte_checked_encode 0xff input output 204 205ascii_encode :: EncodeBuffer 206ascii_encode input output 207 = single_byte_checked_encode 0x7f input output 208 209single_byte_checked_encode :: Int -> EncodeBuffer 210single_byte_checked_encode max_legal_char 211 input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ } 212 output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os } 213 = let 214 done why !ir !ow = return (why, 215 if ir == iw then input{ bufL=0, bufR=0 } 216 else input{ bufL=ir }, 217 output{ bufR=ow }) 218 loop !ir !ow 219 | ow >= os = done OutputUnderflow ir ow 220 | ir >= iw = done InputUnderflow ir ow 221 | otherwise = do 222 (c,ir') <- readCharBuf iraw ir 223 if ord c > max_legal_char then invalid else do 224 writeWord8Buf oraw ow (fromIntegral (ord c)) 225 loop ir' (ow+1) 226 where 227 invalid = done InvalidSequence ir ow 228 in 229 loop ir0 ow0 230{-# INLINE single_byte_checked_encode #-} 231