1{-# LANGUAGE OverloadedStrings, CPP #-}
2module Network.HTTP.Types.URI
3(
4  -- * Query string
5  QueryItem
6, Query
7, SimpleQueryItem
8, SimpleQuery
9, simpleQueryToQuery
10, renderQuery
11, renderQueryBuilder
12, renderSimpleQuery
13, parseQuery
14, parseQueryReplacePlus
15, parseSimpleQuery
16  -- **Escape only parts
17, renderQueryPartialEscape
18, renderQueryBuilderPartialEscape
19, EscapeItem(..)
20, PartialEscapeQueryItem
21, PartialEscapeQuery
22  -- ** Text query string (UTF8 encoded)
23, QueryText
24, queryTextToQuery
25, queryToQueryText
26, renderQueryText
27, parseQueryText
28  -- * Path segments
29, encodePathSegments
30, decodePathSegments
31, encodePathSegmentsRelative
32  -- * Path (segments + query string)
33, extractPath
34, encodePath
35, decodePath
36  -- * URL encoding / decoding
37, urlEncodeBuilder
38, urlEncode
39, urlDecode
40)
41where
42
43import           Control.Arrow
44import           Data.Bits
45import           Data.Char
46import           Data.List
47import           Data.Maybe
48#if __GLASGOW_HASKELL__ < 710
49import           Data.Monoid
50#endif
51import           Data.Text                      (Text)
52import           Data.Text.Encoding             (encodeUtf8, decodeUtf8With)
53import           Data.Text.Encoding.Error       (lenientDecode)
54import           Data.Word
55import qualified Data.ByteString                as B
56import qualified Data.ByteString.Builder        as B
57import qualified Data.ByteString.Lazy           as BL
58import           Data.ByteString.Char8          () {-IsString-}
59
60-- | Query item
61type QueryItem = (B.ByteString, Maybe B.ByteString)
62
63-- | Query.
64--
65-- General form: @a=b&c=d@, but if the value is Nothing, it becomes
66-- @a&c=d@.
67type Query = [QueryItem]
68
69-- | Like Query, but with 'Text' instead of 'B.ByteString' (UTF8-encoded).
70type QueryText = [(Text, Maybe Text)]
71
72-- | Convert 'QueryText' to 'Query'.
73queryTextToQuery :: QueryText -> Query
74queryTextToQuery = map $ encodeUtf8 *** fmap encodeUtf8
75
76-- | Convert 'QueryText' to a 'B.Builder'.
77renderQueryText :: Bool -- ^ prepend a question mark?
78                -> QueryText
79                -> B.Builder
80renderQueryText b = renderQueryBuilder b . queryTextToQuery
81
82-- | Convert 'Query' to 'QueryText' (leniently decoding the UTF-8).
83queryToQueryText :: Query -> QueryText
84queryToQueryText =
85    map $ go *** fmap go
86  where
87    go = decodeUtf8With lenientDecode
88
89-- | Parse 'QueryText' from a 'B.ByteString'. See 'parseQuery' for details.
90parseQueryText :: B.ByteString -> QueryText
91parseQueryText = queryToQueryText . parseQuery
92
93-- | Simplified Query item type without support for parameter-less items.
94type SimpleQueryItem = (B.ByteString, B.ByteString)
95
96-- | Simplified Query type without support for parameter-less items.
97type SimpleQuery = [SimpleQueryItem]
98
99-- | Convert 'SimpleQuery' to 'Query'.
100simpleQueryToQuery :: SimpleQuery -> Query
101simpleQueryToQuery = map (second Just)
102
103-- | Convert 'Query' to a 'Builder'.
104renderQueryBuilder :: Bool -- ^ prepend a question mark?
105                   -> Query
106                   -> B.Builder
107renderQueryBuilder _ [] = mempty
108-- FIXME replace mconcat + map with foldr
109renderQueryBuilder qmark' (p:ps) = mconcat
110    $ go (if qmark' then qmark else mempty) p
111    : map (go amp) ps
112  where
113    qmark = B.byteString "?"
114    amp = B.byteString "&"
115    equal = B.byteString "="
116    go sep (k, mv) = mconcat [
117                      sep
118                     , urlEncodeBuilder True k
119                     , case mv of
120                         Nothing -> mempty
121                         Just v -> equal `mappend` urlEncodeBuilder True v
122                     ]
123
124-- | Convert 'Query' to 'ByteString'.
125renderQuery :: Bool -- ^ prepend question mark?
126            -> Query -> B.ByteString
127renderQuery qm = BL.toStrict . B.toLazyByteString . renderQueryBuilder qm
128
129-- | Convert 'SimpleQuery' to 'ByteString'.
130renderSimpleQuery :: Bool -- ^ prepend question mark?
131                  -> SimpleQuery -> B.ByteString
132renderSimpleQuery useQuestionMark = renderQuery useQuestionMark . simpleQueryToQuery
133
134-- | Split out the query string into a list of keys and values. A few
135-- importants points:
136--
137-- * The result returned is still bytestrings, since we perform no character
138-- decoding here. Most likely, you will want to use UTF-8 decoding, but this is
139-- left to the user of the library.
140--
141-- * Percent decoding errors are ignored. In particular, @"%Q"@ will be output as
142-- @"%Q"@.
143--
144-- * It decodes @\'+\'@ characters to @\' \'@
145parseQuery :: B.ByteString -> Query
146parseQuery = parseQueryReplacePlus True
147
148-- | Same functionality as 'parseQuery' with the option to decode @\'+\'@ characters to @\' \'@
149-- or preserve @\'+\'@
150parseQueryReplacePlus :: Bool -> B.ByteString -> Query
151parseQueryReplacePlus replacePlus bs = parseQueryString' $ dropQuestion bs
152  where
153    dropQuestion q =
154        case B.uncons q of
155            Just (63, q') -> q'
156            _ -> q
157    parseQueryString' q | B.null q = []
158    parseQueryString' q =
159        let (x, xs) = breakDiscard queryStringSeparators q
160         in parsePair x : parseQueryString' xs
161      where
162        parsePair x =
163            let (k, v) = B.break (== 61) x -- equal sign
164                v'' =
165                    case B.uncons v of
166                        Just (_, v') -> Just $ urlDecode replacePlus v'
167                        _ -> Nothing
168             in (urlDecode replacePlus k, v'')
169
170queryStringSeparators :: B.ByteString
171queryStringSeparators = B.pack [38,59] -- ampersand, semicolon
172
173-- | Break the second bytestring at the first occurrence of any bytes from
174-- the first bytestring, discarding that byte.
175breakDiscard :: B.ByteString -> B.ByteString -> (B.ByteString, B.ByteString)
176breakDiscard seps s =
177    let (x, y) = B.break (`B.elem` seps) s
178     in (x, B.drop 1 y)
179
180-- | Parse 'SimpleQuery' from a 'ByteString'.
181parseSimpleQuery :: B.ByteString -> SimpleQuery
182parseSimpleQuery = map (second $ fromMaybe B.empty) . parseQuery
183
184ord8 :: Char -> Word8
185ord8 = fromIntegral . ord
186
187unreservedQS, unreservedPI :: [Word8]
188unreservedQS = map ord8 "-_.~"
189unreservedPI = map ord8 "-_.~:@&=+$,"
190
191-- | Percent-encoding for URLs.
192urlEncodeBuilder' :: [Word8] -> B.ByteString -> B.Builder
193urlEncodeBuilder' extraUnreserved = mconcat . map encodeChar . B.unpack
194    where
195      encodeChar ch | unreserved ch = B.word8 ch
196                    | otherwise     = h2 ch
197
198      unreserved ch | ch >= 65 && ch <= 90  = True -- A-Z
199                    | ch >= 97 && ch <= 122 = True -- a-z
200                    | ch >= 48 && ch <= 57  = True -- 0-9
201      unreserved c = c `elem` extraUnreserved
202
203      -- must be upper-case
204      h2 v = B.word8 37 `mappend` B.word8 (h a) `mappend` B.word8 (h b) -- 37 = %
205          where (a, b) = v `divMod` 16
206      h i | i < 10    = 48 + i -- zero (0)
207          | otherwise = 65 + i - 10 -- 65: A
208
209-- | Percent-encoding for URLs (using 'B.Builder').
210urlEncodeBuilder
211    :: Bool -- ^ Whether input is in query string. True: Query string, False: Path element
212    -> B.ByteString
213    -> B.Builder
214urlEncodeBuilder True  = urlEncodeBuilder' unreservedQS
215urlEncodeBuilder False = urlEncodeBuilder' unreservedPI
216
217-- | Percent-encoding for URLs.
218urlEncode :: Bool -- ^ Whether to decode @\'+\'@ to @\' \'@
219          -> B.ByteString -- ^ The ByteString to encode as URL
220          -> B.ByteString -- ^ The encoded URL
221urlEncode q = BL.toStrict . B.toLazyByteString . urlEncodeBuilder q
222
223-- | Percent-decoding.
224urlDecode :: Bool -- ^ Whether to decode @\'+\'@ to @\' \'@
225          -> B.ByteString -> B.ByteString
226urlDecode replacePlus z = fst $ B.unfoldrN (B.length z) go z
227  where
228    go bs =
229        case B.uncons bs of
230            Nothing -> Nothing
231            Just (43, ws) | replacePlus -> Just (32, ws) -- plus to space
232            Just (37, ws) -> Just $ fromMaybe (37, ws) $ do -- percent
233                (x, xs) <- B.uncons ws
234                x' <- hexVal x
235                (y, ys) <- B.uncons xs
236                y' <- hexVal y
237                Just (combine x' y', ys)
238            Just (w, ws) -> Just (w, ws)
239    hexVal w
240        | 48 <= w && w <= 57  = Just $ w - 48 -- 0 - 9
241        | 65 <= w && w <= 70  = Just $ w - 55 -- A - F
242        | 97 <= w && w <= 102 = Just $ w - 87 -- a - f
243        | otherwise = Nothing
244    combine :: Word8 -> Word8 -> Word8
245    combine a b = shiftL a 4 .|. b
246
247-- | Encodes a list of path segments into a valid URL fragment.
248--
249-- This function takes the following three steps:
250--
251-- * UTF-8 encodes the characters.
252--
253-- * Performs percent encoding on all unreserved characters, as well as @\:\@\=\+\$@,
254--
255-- * Prepends each segment with a slash.
256--
257-- For example:
258--
259-- > encodePathSegments [\"foo\", \"bar\", \"baz\"]
260-- \"\/foo\/bar\/baz\"
261--
262-- > encodePathSegments [\"foo bar\", \"baz\/bin\"]
263-- \"\/foo\%20bar\/baz\%2Fbin\"
264--
265-- > encodePathSegments [\"שלום\"]
266-- \"\/%D7%A9%D7%9C%D7%95%D7%9D\"
267--
268-- Huge thanks to Jeremy Shaw who created the original implementation of this
269-- function in web-routes and did such thorough research to determine all
270-- correct escaping procedures.
271encodePathSegments :: [Text] -> B.Builder
272encodePathSegments = foldr (\x -> mappend (B.byteString "/" `mappend` encodePathSegment x)) mempty
273
274-- | Like encodePathSegments, but without the initial slash.
275encodePathSegmentsRelative :: [Text] -> B.Builder
276encodePathSegmentsRelative xs = mconcat $ intersperse (B.byteString "/") (map encodePathSegment xs)
277
278encodePathSegment :: Text -> B.Builder
279encodePathSegment = urlEncodeBuilder False . encodeUtf8
280
281-- | Parse a list of path segments from a valid URL fragment.
282decodePathSegments :: B.ByteString -> [Text]
283decodePathSegments "" = []
284decodePathSegments "/" = []
285decodePathSegments a =
286    go $ drop1Slash a
287  where
288    drop1Slash bs =
289        case B.uncons bs of
290            Just (47, bs') -> bs' -- 47 == /
291            _ -> bs
292    go bs =
293        let (x, y) = B.break (== 47) bs
294         in decodePathSegment x :
295            if B.null y
296                then []
297                else go $ B.drop 1 y
298
299decodePathSegment :: B.ByteString -> Text
300decodePathSegment = decodeUtf8With lenientDecode . urlDecode False
301
302-- | Extract whole path (path segments + query) from a
303-- <http://tools.ietf.org/html/rfc2616#section-5.1.2 RFC 2616 Request-URI>.
304--
305-- >>> extractPath "/path"
306-- "/path"
307--
308-- >>> extractPath "http://example.com:8080/path"
309-- "/path"
310--
311-- >>> extractPath "http://example.com"
312-- "/"
313--
314-- >>> extractPath ""
315-- "/"
316extractPath :: B.ByteString -> B.ByteString
317extractPath = ensureNonEmpty . extract
318  where
319    extract path
320      | "http://"  `B.isPrefixOf` path = (snd . breakOnSlash . B.drop 7) path
321      | "https://" `B.isPrefixOf` path = (snd . breakOnSlash . B.drop 8) path
322      | otherwise                      = path
323    breakOnSlash = B.break (== 47)
324    ensureNonEmpty "" = "/"
325    ensureNonEmpty p  = p
326
327-- | Encode a whole path (path segments + query).
328encodePath :: [Text] -> Query -> B.Builder
329encodePath x [] = encodePathSegments x
330encodePath x y = encodePathSegments x `mappend` renderQueryBuilder True y
331
332-- | Decode a whole path (path segments + query).
333decodePath :: B.ByteString -> ([Text], Query)
334decodePath b =
335    let (x, y) = B.break (== 63) b -- question mark
336    in (decodePathSegments x, parseQuery y)
337
338-----------------------------------------------------------------------------------------
339
340-- | For some URIs characters must not be URI encoded,
341-- e.g. @\'+\'@ or @\':\'@ in @q=a+language:haskell+created:2009-01-01..2009-02-01&sort=stars@
342-- The character list unreservedPI instead of unreservedQS would solve this.
343-- But we explicitly decide what part to encode.
344-- This is mandatory when searching for @\'+\'@: @q=%2B+language:haskell@.
345data EscapeItem = QE B.ByteString -- will be URL encoded
346                | QN B.ByteString -- will not be url encoded, e.g. @\'+\'@ or @\':\'@
347    deriving (Show, Eq, Ord)
348
349-- | Query item
350type PartialEscapeQueryItem = (B.ByteString, [EscapeItem])
351
352-- | Query with some chars that should not be escaped.
353--
354-- General form: @a=b&c=d:e+f&g=h@
355type PartialEscapeQuery = [PartialEscapeQueryItem]
356
357-- | Convert 'PartialEscapeQuery' to 'ByteString'.
358renderQueryPartialEscape :: Bool -- ^ prepend question mark?
359            -> PartialEscapeQuery -> B.ByteString
360renderQueryPartialEscape qm = BL.toStrict . B.toLazyByteString . renderQueryBuilderPartialEscape qm
361
362-- | Convert 'PartialEscapeQuery' to a 'Builder'.
363renderQueryBuilderPartialEscape :: Bool -- ^ prepend a question mark?
364                   -> PartialEscapeQuery
365                   -> B.Builder
366renderQueryBuilderPartialEscape _ [] = mempty
367-- FIXME replace mconcat + map with foldr
368renderQueryBuilderPartialEscape qmark' (p:ps) = mconcat
369    $ go (if qmark' then qmark else mempty) p
370    : map (go amp) ps
371  where
372    qmark = B.byteString "?"
373    amp = B.byteString "&"
374    equal = B.byteString "="
375    go sep (k, mv) = mconcat [
376                      sep
377                     , urlEncodeBuilder True k
378                     , case mv of
379                         [] -> mempty
380                         vs -> equal `mappend` (mconcat (map encode vs))
381                     ]
382    encode (QE v) = urlEncodeBuilder True v
383    encode (QN v) = B.byteString v
384
385