1{-# LANGUAGE Trustworthy #-}
2{-# LANGUAGE NoImplicitPrelude
3           , BangPatterns
4           , NondecreasingIndentation
5  #-}
6{-# OPTIONS_GHC  -funbox-strict-fields #-}
7
8-----------------------------------------------------------------------------
9-- |
10-- Module      :  GHC.IO.Encoding.Latin1
11-- Copyright   :  (c) The University of Glasgow, 2009
12-- License     :  see libraries/base/LICENSE
13--
14-- Maintainer  :  libraries@haskell.org
15-- Stability   :  internal
16-- Portability :  non-portable
17--
18-- Single-byte encodings that map directly to Unicode code points.
19--
20-- Portions Copyright   : (c) Tom Harper 2008-2009,
21--                        (c) Bryan O'Sullivan 2009,
22--                        (c) Duncan Coutts 2009
23--
24-----------------------------------------------------------------------------
25
26module GHC.IO.Encoding.Latin1 (
27  latin1, mkLatin1,
28  latin1_checked, mkLatin1_checked,
29  ascii, mkAscii,
30  latin1_decode,
31  ascii_decode,
32  latin1_encode,
33  latin1_checked_encode,
34  ascii_encode,
35  ) where
36
37import GHC.Base
38import GHC.Real
39import GHC.Num
40-- import GHC.IO
41import GHC.IO.Buffer
42import GHC.IO.Encoding.Failure
43import GHC.IO.Encoding.Types
44
45-- -----------------------------------------------------------------------------
46-- Latin1
47
48latin1 :: TextEncoding
49latin1 = mkLatin1 ErrorOnCodingFailure
50
51-- | @since 4.4.0.0
52mkLatin1 :: CodingFailureMode -> TextEncoding
53mkLatin1 cfm = TextEncoding { textEncodingName = "ISO-8859-1",
54                              mkTextDecoder = latin1_DF cfm,
55                              mkTextEncoder = latin1_EF cfm }
56
57latin1_DF :: CodingFailureMode -> IO (TextDecoder ())
58latin1_DF cfm =
59  return (BufferCodec {
60             encode   = latin1_decode,
61             recover  = recoverDecode cfm,
62             close    = return (),
63             getState = return (),
64             setState = const $ return ()
65          })
66
67latin1_EF :: CodingFailureMode -> IO (TextEncoder ())
68latin1_EF cfm =
69  return (BufferCodec {
70             encode   = latin1_encode,
71             recover  = recoverEncode cfm,
72             close    = return (),
73             getState = return (),
74             setState = const $ return ()
75          })
76
77latin1_checked :: TextEncoding
78latin1_checked = mkLatin1_checked ErrorOnCodingFailure
79
80-- | @since 4.4.0.0
81mkLatin1_checked :: CodingFailureMode -> TextEncoding
82mkLatin1_checked cfm = TextEncoding { textEncodingName = "ISO-8859-1",
83                                      mkTextDecoder = latin1_DF cfm,
84                                      mkTextEncoder = latin1_checked_EF cfm }
85
86latin1_checked_EF :: CodingFailureMode -> IO (TextEncoder ())
87latin1_checked_EF cfm =
88  return (BufferCodec {
89             encode   = latin1_checked_encode,
90             recover  = recoverEncode cfm,
91             close    = return (),
92             getState = return (),
93             setState = const $ return ()
94          })
95
96-- -----------------------------------------------------------------------------
97-- ASCII
98
99-- | @since 4.9.0.0
100ascii :: TextEncoding
101ascii = mkAscii ErrorOnCodingFailure
102
103-- | @since 4.9.0.0
104mkAscii :: CodingFailureMode -> TextEncoding
105mkAscii cfm = TextEncoding { textEncodingName = "ASCII",
106                             mkTextDecoder = ascii_DF cfm,
107                             mkTextEncoder = ascii_EF cfm }
108
109ascii_DF :: CodingFailureMode -> IO (TextDecoder ())
110ascii_DF cfm =
111  return (BufferCodec {
112             encode   = ascii_decode,
113             recover  = recoverDecode cfm,
114             close    = return (),
115             getState = return (),
116             setState = const $ return ()
117          })
118
119ascii_EF :: CodingFailureMode -> IO (TextEncoder ())
120ascii_EF cfm =
121  return (BufferCodec {
122             encode   = ascii_encode,
123             recover  = recoverEncode cfm,
124             close    = return (),
125             getState = return (),
126             setState = const $ return ()
127          })
128
129
130
131-- -----------------------------------------------------------------------------
132-- The actual decoders and encoders
133
134-- TODO: Eliminate code duplication between the checked and unchecked
135-- versions of the decoder or encoder (but don't change the Core!)
136
137latin1_decode :: DecodeBuffer
138latin1_decode
139  input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
140  output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
141 = let
142       loop !ir !ow
143         | ow >= os = done OutputUnderflow ir ow
144         | ir >= iw = done InputUnderflow ir ow
145         | otherwise = do
146              c0 <- readWord8Buf iraw ir
147              ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
148              loop (ir+1) ow'
149
150       -- lambda-lifted, to avoid thunks being built in the inner-loop:
151       done why !ir !ow = return (why,
152                                  if ir == iw then input{ bufL=0, bufR=0 }
153                                              else input{ bufL=ir },
154                                  output{ bufR=ow })
155    in
156    loop ir0 ow0
157
158ascii_decode :: DecodeBuffer
159ascii_decode
160  input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
161  output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
162 = let
163       loop !ir !ow
164         | ow >= os = done OutputUnderflow ir ow
165         | ir >= iw = done InputUnderflow ir ow
166         | otherwise = do
167              c0 <- readWord8Buf iraw ir
168              if c0 > 0x7f then invalid else do
169              ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
170              loop (ir+1) ow'
171         where
172           invalid = done InvalidSequence ir ow
173
174       -- lambda-lifted, to avoid thunks being built in the inner-loop:
175       done why !ir !ow = return (why,
176                                  if ir == iw then input{ bufL=0, bufR=0 }
177                                              else input{ bufL=ir },
178                                  output{ bufR=ow })
179    in
180    loop ir0 ow0
181
182latin1_encode :: EncodeBuffer
183latin1_encode
184  input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
185  output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
186 = let
187      done why !ir !ow = return (why,
188                                 if ir == iw then input{ bufL=0, bufR=0 }
189                                             else input{ bufL=ir },
190                                 output{ bufR=ow })
191      loop !ir !ow
192        | ow >= os = done OutputUnderflow ir ow
193        | ir >= iw = done InputUnderflow ir ow
194        | otherwise = do
195           (c,ir') <- readCharBuf iraw ir
196           writeWord8Buf oraw ow (fromIntegral (ord c))
197           loop ir' (ow+1)
198    in
199    loop ir0 ow0
200
201latin1_checked_encode :: EncodeBuffer
202latin1_checked_encode input output
203 = single_byte_checked_encode 0xff input output
204
205ascii_encode :: EncodeBuffer
206ascii_encode input output
207 = single_byte_checked_encode 0x7f input output
208
209single_byte_checked_encode :: Int -> EncodeBuffer
210single_byte_checked_encode max_legal_char
211  input@Buffer{  bufRaw=iraw, bufL=ir0, bufR=iw,  bufSize=_  }
212  output@Buffer{ bufRaw=oraw, bufL=_,   bufR=ow0, bufSize=os }
213 = let
214      done why !ir !ow = return (why,
215                                 if ir == iw then input{ bufL=0, bufR=0 }
216                                             else input{ bufL=ir },
217                                 output{ bufR=ow })
218      loop !ir !ow
219        | ow >= os = done OutputUnderflow ir ow
220        | ir >= iw = done InputUnderflow ir ow
221        | otherwise = do
222           (c,ir') <- readCharBuf iraw ir
223           if ord c > max_legal_char then invalid else do
224           writeWord8Buf oraw ow (fromIntegral (ord c))
225           loop ir' (ow+1)
226        where
227           invalid = done InvalidSequence ir ow
228    in
229    loop ir0 ow0
230{-# INLINE single_byte_checked_encode #-}
231