1{-# LANGUAGE OverloadedStrings, CPP #-} 2module Network.HTTP.Types.URI 3( 4 -- * Query string 5 QueryItem 6, Query 7, SimpleQueryItem 8, SimpleQuery 9, simpleQueryToQuery 10, renderQuery 11, renderQueryBuilder 12, renderSimpleQuery 13, parseQuery 14, parseQueryReplacePlus 15, parseSimpleQuery 16 -- **Escape only parts 17, renderQueryPartialEscape 18, renderQueryBuilderPartialEscape 19, EscapeItem(..) 20, PartialEscapeQueryItem 21, PartialEscapeQuery 22 -- ** Text query string (UTF8 encoded) 23, QueryText 24, queryTextToQuery 25, queryToQueryText 26, renderQueryText 27, parseQueryText 28 -- * Path segments 29, encodePathSegments 30, decodePathSegments 31, encodePathSegmentsRelative 32 -- * Path (segments + query string) 33, extractPath 34, encodePath 35, decodePath 36 -- * URL encoding / decoding 37, urlEncodeBuilder 38, urlEncode 39, urlDecode 40) 41where 42 43import Control.Arrow 44import Data.Bits 45import Data.Char 46import Data.List 47import Data.Maybe 48#if __GLASGOW_HASKELL__ < 710 49import Data.Monoid 50#endif 51import Data.Text (Text) 52import Data.Text.Encoding (encodeUtf8, decodeUtf8With) 53import Data.Text.Encoding.Error (lenientDecode) 54import Data.Word 55import qualified Data.ByteString as B 56import qualified Data.ByteString.Builder as B 57import qualified Data.ByteString.Lazy as BL 58import Data.ByteString.Char8 () {-IsString-} 59 60-- | Query item 61type QueryItem = (B.ByteString, Maybe B.ByteString) 62 63-- | Query. 64-- 65-- General form: @a=b&c=d@, but if the value is Nothing, it becomes 66-- @a&c=d@. 67type Query = [QueryItem] 68 69-- | Like Query, but with 'Text' instead of 'B.ByteString' (UTF8-encoded). 70type QueryText = [(Text, Maybe Text)] 71 72-- | Convert 'QueryText' to 'Query'. 73queryTextToQuery :: QueryText -> Query 74queryTextToQuery = map $ encodeUtf8 *** fmap encodeUtf8 75 76-- | Convert 'QueryText' to a 'B.Builder'. 77renderQueryText :: Bool -- ^ prepend a question mark? 78 -> QueryText 79 -> B.Builder 80renderQueryText b = renderQueryBuilder b . queryTextToQuery 81 82-- | Convert 'Query' to 'QueryText' (leniently decoding the UTF-8). 83queryToQueryText :: Query -> QueryText 84queryToQueryText = 85 map $ go *** fmap go 86 where 87 go = decodeUtf8With lenientDecode 88 89-- | Parse 'QueryText' from a 'B.ByteString'. See 'parseQuery' for details. 90parseQueryText :: B.ByteString -> QueryText 91parseQueryText = queryToQueryText . parseQuery 92 93-- | Simplified Query item type without support for parameter-less items. 94type SimpleQueryItem = (B.ByteString, B.ByteString) 95 96-- | Simplified Query type without support for parameter-less items. 97type SimpleQuery = [SimpleQueryItem] 98 99-- | Convert 'SimpleQuery' to 'Query'. 100simpleQueryToQuery :: SimpleQuery -> Query 101simpleQueryToQuery = map (second Just) 102 103-- | Convert 'Query' to a 'Builder'. 104renderQueryBuilder :: Bool -- ^ prepend a question mark? 105 -> Query 106 -> B.Builder 107renderQueryBuilder _ [] = mempty 108-- FIXME replace mconcat + map with foldr 109renderQueryBuilder qmark' (p:ps) = mconcat 110 $ go (if qmark' then qmark else mempty) p 111 : map (go amp) ps 112 where 113 qmark = B.byteString "?" 114 amp = B.byteString "&" 115 equal = B.byteString "=" 116 go sep (k, mv) = mconcat [ 117 sep 118 , urlEncodeBuilder True k 119 , case mv of 120 Nothing -> mempty 121 Just v -> equal `mappend` urlEncodeBuilder True v 122 ] 123 124-- | Convert 'Query' to 'ByteString'. 125renderQuery :: Bool -- ^ prepend question mark? 126 -> Query -> B.ByteString 127renderQuery qm = BL.toStrict . B.toLazyByteString . renderQueryBuilder qm 128 129-- | Convert 'SimpleQuery' to 'ByteString'. 130renderSimpleQuery :: Bool -- ^ prepend question mark? 131 -> SimpleQuery -> B.ByteString 132renderSimpleQuery useQuestionMark = renderQuery useQuestionMark . simpleQueryToQuery 133 134-- | Split out the query string into a list of keys and values. A few 135-- importants points: 136-- 137-- * The result returned is still bytestrings, since we perform no character 138-- decoding here. Most likely, you will want to use UTF-8 decoding, but this is 139-- left to the user of the library. 140-- 141-- * Percent decoding errors are ignored. In particular, @"%Q"@ will be output as 142-- @"%Q"@. 143-- 144-- * It decodes @\'+\'@ characters to @\' \'@ 145parseQuery :: B.ByteString -> Query 146parseQuery = parseQueryReplacePlus True 147 148-- | Same functionality as 'parseQuery' with the option to decode @\'+\'@ characters to @\' \'@ 149-- or preserve @\'+\'@ 150parseQueryReplacePlus :: Bool -> B.ByteString -> Query 151parseQueryReplacePlus replacePlus bs = parseQueryString' $ dropQuestion bs 152 where 153 dropQuestion q = 154 case B.uncons q of 155 Just (63, q') -> q' 156 _ -> q 157 parseQueryString' q | B.null q = [] 158 parseQueryString' q = 159 let (x, xs) = breakDiscard queryStringSeparators q 160 in parsePair x : parseQueryString' xs 161 where 162 parsePair x = 163 let (k, v) = B.break (== 61) x -- equal sign 164 v'' = 165 case B.uncons v of 166 Just (_, v') -> Just $ urlDecode replacePlus v' 167 _ -> Nothing 168 in (urlDecode replacePlus k, v'') 169 170queryStringSeparators :: B.ByteString 171queryStringSeparators = B.pack [38,59] -- ampersand, semicolon 172 173-- | Break the second bytestring at the first occurrence of any bytes from 174-- the first bytestring, discarding that byte. 175breakDiscard :: B.ByteString -> B.ByteString -> (B.ByteString, B.ByteString) 176breakDiscard seps s = 177 let (x, y) = B.break (`B.elem` seps) s 178 in (x, B.drop 1 y) 179 180-- | Parse 'SimpleQuery' from a 'ByteString'. 181parseSimpleQuery :: B.ByteString -> SimpleQuery 182parseSimpleQuery = map (second $ fromMaybe B.empty) . parseQuery 183 184ord8 :: Char -> Word8 185ord8 = fromIntegral . ord 186 187unreservedQS, unreservedPI :: [Word8] 188unreservedQS = map ord8 "-_.~" 189unreservedPI = map ord8 "-_.~:@&=+$," 190 191-- | Percent-encoding for URLs. 192urlEncodeBuilder' :: [Word8] -> B.ByteString -> B.Builder 193urlEncodeBuilder' extraUnreserved = mconcat . map encodeChar . B.unpack 194 where 195 encodeChar ch | unreserved ch = B.word8 ch 196 | otherwise = h2 ch 197 198 unreserved ch | ch >= 65 && ch <= 90 = True -- A-Z 199 | ch >= 97 && ch <= 122 = True -- a-z 200 | ch >= 48 && ch <= 57 = True -- 0-9 201 unreserved c = c `elem` extraUnreserved 202 203 -- must be upper-case 204 h2 v = B.word8 37 `mappend` B.word8 (h a) `mappend` B.word8 (h b) -- 37 = % 205 where (a, b) = v `divMod` 16 206 h i | i < 10 = 48 + i -- zero (0) 207 | otherwise = 65 + i - 10 -- 65: A 208 209-- | Percent-encoding for URLs (using 'B.Builder'). 210urlEncodeBuilder 211 :: Bool -- ^ Whether input is in query string. True: Query string, False: Path element 212 -> B.ByteString 213 -> B.Builder 214urlEncodeBuilder True = urlEncodeBuilder' unreservedQS 215urlEncodeBuilder False = urlEncodeBuilder' unreservedPI 216 217-- | Percent-encoding for URLs. 218urlEncode :: Bool -- ^ Whether to decode @\'+\'@ to @\' \'@ 219 -> B.ByteString -- ^ The ByteString to encode as URL 220 -> B.ByteString -- ^ The encoded URL 221urlEncode q = BL.toStrict . B.toLazyByteString . urlEncodeBuilder q 222 223-- | Percent-decoding. 224urlDecode :: Bool -- ^ Whether to decode @\'+\'@ to @\' \'@ 225 -> B.ByteString -> B.ByteString 226urlDecode replacePlus z = fst $ B.unfoldrN (B.length z) go z 227 where 228 go bs = 229 case B.uncons bs of 230 Nothing -> Nothing 231 Just (43, ws) | replacePlus -> Just (32, ws) -- plus to space 232 Just (37, ws) -> Just $ fromMaybe (37, ws) $ do -- percent 233 (x, xs) <- B.uncons ws 234 x' <- hexVal x 235 (y, ys) <- B.uncons xs 236 y' <- hexVal y 237 Just (combine x' y', ys) 238 Just (w, ws) -> Just (w, ws) 239 hexVal w 240 | 48 <= w && w <= 57 = Just $ w - 48 -- 0 - 9 241 | 65 <= w && w <= 70 = Just $ w - 55 -- A - F 242 | 97 <= w && w <= 102 = Just $ w - 87 -- a - f 243 | otherwise = Nothing 244 combine :: Word8 -> Word8 -> Word8 245 combine a b = shiftL a 4 .|. b 246 247-- | Encodes a list of path segments into a valid URL fragment. 248-- 249-- This function takes the following three steps: 250-- 251-- * UTF-8 encodes the characters. 252-- 253-- * Performs percent encoding on all unreserved characters, as well as @\:\@\=\+\$@, 254-- 255-- * Prepends each segment with a slash. 256-- 257-- For example: 258-- 259-- > encodePathSegments [\"foo\", \"bar\", \"baz\"] 260-- \"\/foo\/bar\/baz\" 261-- 262-- > encodePathSegments [\"foo bar\", \"baz\/bin\"] 263-- \"\/foo\%20bar\/baz\%2Fbin\" 264-- 265-- > encodePathSegments [\"שלום\"] 266-- \"\/%D7%A9%D7%9C%D7%95%D7%9D\" 267-- 268-- Huge thanks to Jeremy Shaw who created the original implementation of this 269-- function in web-routes and did such thorough research to determine all 270-- correct escaping procedures. 271encodePathSegments :: [Text] -> B.Builder 272encodePathSegments = foldr (\x -> mappend (B.byteString "/" `mappend` encodePathSegment x)) mempty 273 274-- | Like encodePathSegments, but without the initial slash. 275encodePathSegmentsRelative :: [Text] -> B.Builder 276encodePathSegmentsRelative xs = mconcat $ intersperse (B.byteString "/") (map encodePathSegment xs) 277 278encodePathSegment :: Text -> B.Builder 279encodePathSegment = urlEncodeBuilder False . encodeUtf8 280 281-- | Parse a list of path segments from a valid URL fragment. 282decodePathSegments :: B.ByteString -> [Text] 283decodePathSegments "" = [] 284decodePathSegments "/" = [] 285decodePathSegments a = 286 go $ drop1Slash a 287 where 288 drop1Slash bs = 289 case B.uncons bs of 290 Just (47, bs') -> bs' -- 47 == / 291 _ -> bs 292 go bs = 293 let (x, y) = B.break (== 47) bs 294 in decodePathSegment x : 295 if B.null y 296 then [] 297 else go $ B.drop 1 y 298 299decodePathSegment :: B.ByteString -> Text 300decodePathSegment = decodeUtf8With lenientDecode . urlDecode False 301 302-- | Extract whole path (path segments + query) from a 303-- <http://tools.ietf.org/html/rfc2616#section-5.1.2 RFC 2616 Request-URI>. 304-- 305-- >>> extractPath "/path" 306-- "/path" 307-- 308-- >>> extractPath "http://example.com:8080/path" 309-- "/path" 310-- 311-- >>> extractPath "http://example.com" 312-- "/" 313-- 314-- >>> extractPath "" 315-- "/" 316extractPath :: B.ByteString -> B.ByteString 317extractPath = ensureNonEmpty . extract 318 where 319 extract path 320 | "http://" `B.isPrefixOf` path = (snd . breakOnSlash . B.drop 7) path 321 | "https://" `B.isPrefixOf` path = (snd . breakOnSlash . B.drop 8) path 322 | otherwise = path 323 breakOnSlash = B.break (== 47) 324 ensureNonEmpty "" = "/" 325 ensureNonEmpty p = p 326 327-- | Encode a whole path (path segments + query). 328encodePath :: [Text] -> Query -> B.Builder 329encodePath x [] = encodePathSegments x 330encodePath x y = encodePathSegments x `mappend` renderQueryBuilder True y 331 332-- | Decode a whole path (path segments + query). 333decodePath :: B.ByteString -> ([Text], Query) 334decodePath b = 335 let (x, y) = B.break (== 63) b -- question mark 336 in (decodePathSegments x, parseQuery y) 337 338----------------------------------------------------------------------------------------- 339 340-- | For some URIs characters must not be URI encoded, 341-- e.g. @\'+\'@ or @\':\'@ in @q=a+language:haskell+created:2009-01-01..2009-02-01&sort=stars@ 342-- The character list unreservedPI instead of unreservedQS would solve this. 343-- But we explicitly decide what part to encode. 344-- This is mandatory when searching for @\'+\'@: @q=%2B+language:haskell@. 345data EscapeItem = QE B.ByteString -- will be URL encoded 346 | QN B.ByteString -- will not be url encoded, e.g. @\'+\'@ or @\':\'@ 347 deriving (Show, Eq, Ord) 348 349-- | Query item 350type PartialEscapeQueryItem = (B.ByteString, [EscapeItem]) 351 352-- | Query with some chars that should not be escaped. 353-- 354-- General form: @a=b&c=d:e+f&g=h@ 355type PartialEscapeQuery = [PartialEscapeQueryItem] 356 357-- | Convert 'PartialEscapeQuery' to 'ByteString'. 358renderQueryPartialEscape :: Bool -- ^ prepend question mark? 359 -> PartialEscapeQuery -> B.ByteString 360renderQueryPartialEscape qm = BL.toStrict . B.toLazyByteString . renderQueryBuilderPartialEscape qm 361 362-- | Convert 'PartialEscapeQuery' to a 'Builder'. 363renderQueryBuilderPartialEscape :: Bool -- ^ prepend a question mark? 364 -> PartialEscapeQuery 365 -> B.Builder 366renderQueryBuilderPartialEscape _ [] = mempty 367-- FIXME replace mconcat + map with foldr 368renderQueryBuilderPartialEscape qmark' (p:ps) = mconcat 369 $ go (if qmark' then qmark else mempty) p 370 : map (go amp) ps 371 where 372 qmark = B.byteString "?" 373 amp = B.byteString "&" 374 equal = B.byteString "=" 375 go sep (k, mv) = mconcat [ 376 sep 377 , urlEncodeBuilder True k 378 , case mv of 379 [] -> mempty 380 vs -> equal `mappend` (mconcat (map encode vs)) 381 ] 382 encode (QE v) = urlEncodeBuilder True v 383 encode (QN v) = B.byteString v 384 385