1{-# LANGUAGE RecordWildCards, CPP #-}
2#if __GLASGOW_HASKELL__ >= 800
3{-# LANGUAGE DeriveLift, StandaloneDeriving #-}
4#else
5{-# LANGUAGE TemplateHaskell #-}
6#endif
7#if MIN_VERSION_template_haskell(2,12,0) && MIN_VERSION_parsec(3,13,0)
8{-# LANGUAGE Safe #-}
9#elif __GLASGOW_HASKELL__ >= 702
10{-# LANGUAGE Trustworthy #-}
11#endif
12
13--------------------------------------------------------------------------------
14-- |
15--  Module      :  Network.URI
16--  Copyright   :  (c) 2004, Graham Klyne
17--  License     :  BSD-style (see end of this file)
18--
19--  Maintainer  :  Graham Klyne <gk@ninebynine.org>
20--  Stability   :  provisional
21--  Portability :  portable
22--
23--  This module defines functions for handling URIs.  It presents
24--  substantially the same interface as the older GHC Network.URI module, but
25--  is implemented using Parsec rather than a Regex library that is not
26--  available with Hugs.  The internal representation of URI has been changed
27--  so that URI strings are more completely preserved when round-tripping to a
28--  URI value and back.
29--
30--  In addition, four methods are provided for parsing different
31--  kinds of URI string (as noted in RFC3986):
32--      'parseURI',
33--      'parseURIReference',
34--      'parseRelativeReference' and
35--      'parseAbsoluteURI'.
36--
37--  Further, four methods are provided for classifying different
38--  kinds of URI string (as noted in RFC3986):
39--      'isURI',
40--      'isURIReference',
41--      'isRelativeReference' and
42--      'isAbsoluteURI'.
43--
44--  The long-standing official reference for URI handling was RFC2396 [1],
45--  as updated by RFC 2732 [2], but this was replaced by a new specification,
46--  RFC3986 [3] in January 2005.  This latter specification has been used
47--  as the primary reference for constructing the URI parser implemented
48--  here, and it is intended that there is a direct relationship between
49--  the syntax definition in that document and this parser implementation.
50--
51--  RFC 1808 [4] contains a number of test cases for relative URI handling.
52--  Dan Connolly's Python module @uripath.py@ [5] also contains useful details
53--  and test cases.
54--
55--  Some of the code has been copied from the previous GHC implementation,
56--  but the parser is replaced with one that performs more complete
57--  syntax checking of the URI itself, according to RFC3986 [3].
58--
59--  References
60--
61--  (1) <http://www.ietf.org/rfc/rfc2396.txt>
62--
63--  (2) <http://www.ietf.org/rfc/rfc2732.txt>
64--
65--  (3) <http://www.ietf.org/rfc/rfc3986.txt>
66--
67--  (4) <http://www.ietf.org/rfc/rfc1808.txt>
68--
69--  (5) <http://www.w3.org/2000/10/swap/uripath.py>
70--
71--------------------------------------------------------------------------------
72
73module Network.URI
74    (
75    -- * The URI type
76      URI(..)
77    , URIAuth(..)
78    , nullURI
79    , nullURIAuth
80
81    , rectify, rectifyAuth
82
83    -- * Parsing
84    , parseURI
85    , parseURIReference
86    , parseRelativeReference
87    , parseAbsoluteURI
88
89    -- * Test for strings containing various kinds of URI
90    , isURI
91    , isURIReference
92    , isRelativeReference
93    , isAbsoluteURI
94    , isIPv6address
95    , isIPv4address
96
97    -- * Predicates
98    , uriIsAbsolute
99    , uriIsRelative
100
101    -- * Relative URIs
102    , relativeTo
103    , nonStrictRelativeTo
104    , relativeFrom
105
106    -- * Operations on URI strings
107    -- | Support for putting strings into URI-friendly
108    --   escaped format and getting them back again.
109    --   This can't be done transparently in all cases, because certain
110    --   characters have different meanings in different kinds of URI.
111    --   The URI spec [3], section 2.4, indicates that all URI components
112    --   should be escaped before they are assembled as a URI:
113    --   \"Once produced, a URI is always in its percent-encoded form\"
114    , uriToString, uriAuthToString
115    , isReserved, isUnreserved
116    , isAllowedInURI, isUnescapedInURI
117    , isUnescapedInURIComponent
118    , escapeURIChar
119    , escapeURIString
120    , unEscapeString
121    , pathSegments
122
123    -- * URI Normalization functions
124    , normalizeCase
125    , normalizeEscape
126    , normalizePathSegments
127
128    -- * Deprecated functions
129    , parseabsoluteURI
130    , escapeString
131    , reserved, unreserved
132    , scheme, authority, path, query, fragment
133    ) where
134
135import Text.ParserCombinators.Parsec
136    ( GenParser, ParseError
137    , parse, (<?>), try
138    , option, many1, count, notFollowedBy
139    , char, satisfy, oneOf, string, eof
140    , unexpected
141    )
142
143import Control.Applicative
144import Control.Monad (MonadPlus(..))
145import Control.DeepSeq (NFData(rnf), deepseq)
146import Data.Char (ord, chr, isHexDigit, toLower, toUpper, digitToInt)
147import Data.Bits ((.|.),(.&.),shiftL,shiftR)
148import Data.List (unfoldr, isPrefixOf, isSuffixOf)
149import Numeric (showIntAtBase)
150
151import Language.Haskell.TH.Syntax (Lift(..))
152
153#if !MIN_VERSION_base(4,8,0)
154import Data.Traversable (sequenceA)
155#endif
156
157import Data.Typeable (Typeable)
158#if MIN_VERSION_base(4,0,0)
159import Data.Data (Data)
160#else
161import Data.Generics (Data)
162#endif
163
164#if __GLASGOW_HASKELL__ >= 702
165import GHC.Generics (Generic)
166#endif
167
168------------------------------------------------------------
169--  The URI datatype
170------------------------------------------------------------
171
172-- |Represents a general universal resource identifier using
173--  its component parts.
174--
175--  For example, for the URI
176--
177--  >   foo://anonymous@www.haskell.org:42/ghc?query#frag
178--
179--  the components are:
180--
181data URI = URI
182    { uriScheme     :: String           -- ^ @foo:@
183    , uriAuthority  :: Maybe URIAuth    -- ^ @\/\/anonymous\@www.haskell.org:42@
184    , uriPath       :: String           -- ^ @\/ghc@
185    , uriQuery      :: String           -- ^ @?query@
186    , uriFragment   :: String           -- ^ @#frag@
187#if __GLASGOW_HASKELL__ >= 702
188    } deriving (Eq, Ord, Typeable, Data, Generic)
189#else
190    } deriving (Eq, Ord, Typeable, Data)
191#endif
192
193-- | Add a prefix to a string, unless it already has it.
194ensurePrefix :: String -> String -> String
195ensurePrefix p s = if isPrefixOf p s then s else p ++ s
196
197-- | Add a suffix to a string, unless it already has it.
198ensureSuffix :: String -> String -> String
199ensureSuffix p s = if isSuffixOf p s then s else s ++ p
200
201-- | Given a URIAuth in "nonstandard" form (lacking required separator characters),
202-- return one that is standard.
203rectifyAuth :: URIAuth -> URIAuth
204rectifyAuth a = URIAuth {
205  uriUserInfo = unlessEmpty (ensureSuffix "@") (uriUserInfo a),
206  uriRegName = uriRegName a,
207  uriPort = unlessEmpty (ensurePrefix ":") (uriPort a)
208  }
209
210-- | Given a URI in "nonstandard" form (lacking required separator characters),
211-- return one that is standard.
212rectify :: URI -> URI
213rectify u = URI {
214  uriScheme = ensureSuffix ":" (uriScheme u),
215  uriAuthority = fmap rectifyAuth (uriAuthority u),
216  uriPath = uriPath u,
217  uriQuery = unlessEmpty (ensurePrefix "?") (uriQuery u),
218  uriFragment = unlessEmpty (ensurePrefix "#") (uriFragment u)
219  }
220
221-- | Apply the function to the list, unless that list is empty, in
222-- which case leave it alone.
223unlessEmpty :: ([a] -> [a]) -> [a] -> [a]
224unlessEmpty _f [] = []
225unlessEmpty  f  x = f x
226
227instance NFData URI where
228    rnf (URI s a p q f)
229        = s `deepseq` a `deepseq` p `deepseq` q `deepseq` f `deepseq` ()
230
231-- |Type for authority value within a URI
232data URIAuth = URIAuth
233    { uriUserInfo   :: String           -- ^ @anonymous\@@
234    , uriRegName    :: String           -- ^ @www.haskell.org@
235    , uriPort       :: String           -- ^ @:42@
236#if __GLASGOW_HASKELL__ >= 702
237    } deriving (Eq, Ord, Show, Typeable, Data, Generic)
238#else
239    } deriving (Eq, Ord, Show, Typeable, Data)
240#endif
241
242instance NFData URIAuth where
243    rnf (URIAuth ui rn p) = ui `deepseq` rn `deepseq` p `deepseq` ()
244
245-- |Blank URI
246nullURI :: URI
247nullURI = URI
248    { uriScheme     = ""
249    , uriAuthority  = Nothing
250    , uriPath       = ""
251    , uriQuery      = ""
252    , uriFragment   = ""
253    }
254
255-- |Blank URIAuth.
256nullURIAuth :: URIAuth
257nullURIAuth = URIAuth
258    { uriUserInfo   = ""
259    , uriRegName    = ""
260    , uriPort       = ""
261    }
262
263--  URI as instance of Show.  Note that for security reasons, the default
264--  behaviour is to suppress any userinfo field (see RFC3986, section 7.5).
265--  This can be overridden by using uriToString directly with first
266--  argument @id@ (noting that this returns a ShowS value rather than a string).
267--
268--  [[[Another design would be to embed the userinfo mapping function in
269--  the URIAuth value, with the default value suppressing userinfo formatting,
270--  but providing a function to return a new URI value with userinfo
271--  data exposed by show.]]]
272--
273instance Show URI where
274    showsPrec _ = uriToString defaultUserInfoMap
275
276defaultUserInfoMap :: String -> String
277defaultUserInfoMap uinf = user++newpass
278    where
279        (user,pass) = break (==':') uinf
280        newpass     = if null pass || (pass == "@")
281                                   || (pass == ":@")
282                        then pass
283                        else ":...@"
284
285------------------------------------------------------------
286--  Parse a URI
287------------------------------------------------------------
288
289-- |Turn a string containing a URI into a 'URI'.
290--  Returns 'Nothing' if the string is not a valid URI;
291--  (an absolute URI with optional fragment identifier).
292--
293--  NOTE: this is different from the previous network.URI,
294--  whose @parseURI@ function works like 'parseURIReference'
295--  in this module.
296--
297parseURI :: String -> Maybe URI
298parseURI = parseURIAny uri
299
300-- |Parse a URI reference to a 'URI' value.
301--  Returns 'Nothing' if the string is not a valid URI reference.
302--  (an absolute or relative URI with optional fragment identifier).
303--
304parseURIReference :: String -> Maybe URI
305parseURIReference = parseURIAny uriReference
306
307-- |Parse a relative URI to a 'URI' value.
308--  Returns 'Nothing' if the string is not a valid relative URI.
309--  (a relative URI with optional fragment identifier).
310--
311parseRelativeReference :: String -> Maybe URI
312parseRelativeReference = parseURIAny relativeRef
313
314-- |Parse an absolute URI to a 'URI' value.
315--  Returns 'Nothing' if the string is not a valid absolute URI.
316--  (an absolute URI without a fragment identifier).
317--
318parseAbsoluteURI :: String -> Maybe URI
319parseAbsoluteURI = parseURIAny absoluteURI
320
321-- |Test if string contains a valid URI
322--  (an absolute URI with optional fragment identifier).
323--
324isURI :: String -> Bool
325isURI = isValidParse uri
326
327-- |Test if string contains a valid URI reference
328--  (an absolute or relative URI with optional fragment identifier).
329--
330isURIReference :: String -> Bool
331isURIReference = isValidParse uriReference
332
333-- |Test if string contains a valid relative URI
334--  (a relative URI with optional fragment identifier).
335--
336isRelativeReference :: String -> Bool
337isRelativeReference = isValidParse relativeRef
338
339-- |Test if string contains a valid absolute URI
340--  (an absolute URI without a fragment identifier).
341--
342isAbsoluteURI :: String -> Bool
343isAbsoluteURI = isValidParse absoluteURI
344
345-- |Test if string contains a valid IPv6 address
346--
347isIPv6address :: String -> Bool
348isIPv6address = isValidParse ipv6address
349
350-- |Test if string contains a valid IPv4 address
351--
352isIPv4address :: String -> Bool
353isIPv4address = isValidParse ipv4address
354
355--  Helper function for turning a string into a URI
356--
357parseURIAny :: URIParser URI -> String -> Maybe URI
358parseURIAny parser uristr = case parseAll parser "" uristr of
359        Left  _ -> Nothing
360        Right u -> Just u
361
362--  Helper function to test a string match to a parser
363--
364isValidParse :: URIParser a -> String -> Bool
365isValidParse parser uristr = case parseAll parser "" uristr of
366        -- Left  e -> error (show e)
367        Left  _ -> False
368        Right _ -> True
369
370parseAll :: URIParser a -> String -> String -> Either ParseError a
371parseAll parser filename uristr = parse newparser filename uristr
372    where
373        newparser =
374            do  { res <- parser
375                ; eof
376                ; return res
377                }
378
379------------------------------------------------------------
380--  Predicates
381------------------------------------------------------------
382
383uriIsAbsolute :: URI -> Bool
384uriIsAbsolute (URI {uriScheme = scheme'}) = scheme' /= ""
385
386uriIsRelative :: URI -> Bool
387uriIsRelative = not . uriIsAbsolute
388
389------------------------------------------------------------
390--  URI parser body based on Parsec elements and combinators
391------------------------------------------------------------
392
393--  Parser parser type.
394--  Currently
395type URIParser a = GenParser Char () a
396
397--  RFC3986, section 2.1
398--
399--  Parse and return a 'pct-encoded' sequence
400--
401escaped :: URIParser String
402escaped = sequenceA [char '%', hexDigitChar, hexDigitChar]
403
404--  RFC3986, section 2.2
405--
406-- |Returns 'True' if the character is a \"reserved\" character in a
407--  URI.  To include a literal instance of one of these characters in a
408--  component of a URI, it must be escaped.
409--
410isReserved :: Char -> Bool
411isReserved c = isGenDelims c || isSubDelims c
412
413-- As per https://github.com/haskell/network-uri/pull/46, it was found
414-- that the explicit case statement was noticably faster than a nicer
415-- expression in terms of `elem`.
416isGenDelims :: Char -> Bool
417isGenDelims c =
418  case c of
419    ':' -> True
420    '/' -> True
421    '?' -> True
422    '#' -> True
423    '[' -> True
424    ']' -> True
425    '@' -> True
426    _ -> False
427
428-- As per https://github.com/haskell/network-uri/pull/46, it was found
429-- that the explicit case statement was noticably faster than a nicer
430-- expression in terms of `elem`.
431isSubDelims :: Char -> Bool
432isSubDelims c =
433  case c of
434    '!' -> True
435    '$' -> True
436    '&' -> True
437    '\'' -> True
438    '(' -> True
439    ')' -> True
440    '*' -> True
441    '+' -> True
442    ',' -> True
443    ';' -> True
444    '=' -> True
445    _ -> False
446
447subDelims :: URIParser String
448subDelims = (:[]) <$> oneOf "!$&'()*+,;="
449
450--  RFC3986, section 2.3
451--
452-- |Returns 'True' if the character is an \"unreserved\" character in
453--  a URI.  These characters do not need to be escaped in a URI.  The
454--  only characters allowed in a URI are either \"reserved\",
455--  \"unreserved\", or an escape sequence (@%@ followed by two hex digits).
456--
457isUnreserved :: Char -> Bool
458isUnreserved c = isAlphaNumChar c || (c `elem` "-_.~")
459
460unreservedChar :: URIParser String
461unreservedChar = (:[]) <$> satisfy isUnreserved
462
463--  RFC3986, section 3
464--
465--   URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
466--
467--   hier-part   = "//" authority path-abempty
468--               / path-abs
469--               / path-rootless
470--               / path-empty
471
472uri :: URIParser URI
473uri =
474    do  { us <- try uscheme
475        -- ; ua <- option Nothing ( do { try (string "//") ; uauthority } )
476        -- ; up <- upath
477        ; (ua,up) <- hierPart
478        ; uq <- option "" ( do { _ <- char '?' ; uquery    } )
479        ; uf <- option "" ( do { _ <- char '#' ; ufragment } )
480        ; return $ URI
481            { uriScheme    = us
482            , uriAuthority = ua
483            , uriPath      = up
484            , uriQuery     = uq
485            , uriFragment  = uf
486            }
487        }
488
489hierPart :: URIParser ((Maybe URIAuth),String)
490hierPart =
491        do  { _ <- try (string "//")
492            ; ua <- uauthority
493            ; up <- pathAbEmpty
494            ; return (ua,up)
495            }
496    <|> do  { up <- pathAbs
497            ; return (Nothing,up)
498            }
499    <|> do  { up <- pathRootLess
500            ; return (Nothing,up)
501            }
502    <|> do  { return (Nothing,"")
503            }
504
505--  RFC3986, section 3.1
506
507uscheme :: URIParser String
508uscheme =
509    do  { s <- oneThenMany alphaChar (satisfy isSchemeChar)
510        ; _ <- char ':'
511        ; return $ s++":"
512        }
513
514--  RFC3986, section 3.2
515
516uauthority :: URIParser (Maybe URIAuth)
517uauthority =
518    do  { uu <- option "" (try userinfo)
519        ; uh <- host
520        ; up <- option "" port
521        ; return $ Just $ URIAuth
522            { uriUserInfo = uu
523            , uriRegName  = uh
524            , uriPort     = up
525            }
526        }
527
528--  RFC3986, section 3.2.1
529
530userinfo :: URIParser String
531userinfo =
532    do  { uu <- many (uchar ";:&=+$,")
533        ; _ <- char '@'
534        ; return (concat uu ++"@")
535        }
536
537--  RFC3986, section 3.2.2
538--  RFC6874, section 2
539
540host :: URIParser String
541host = ipLiteral <|> try ipv4address <|> regName
542
543ipLiteral :: URIParser String
544ipLiteral =
545    do  { _ <- char '['
546        ; ua <- ( ipv6addrz <|> ipvFuture )
547        ; _ <- char ']'
548        ; return $ "[" ++ ua ++ "]"
549        }
550    <?> "IP address literal"
551
552ipvFuture :: URIParser String
553ipvFuture =
554    do  { _ <- char 'v'
555        ; h <- hexDigitChar
556        ; _ <- char '.'
557        ; a <- many1 (satisfy isIpvFutureChar)
558        ; return $ 'v':h:'.':a
559        }
560
561isIpvFutureChar :: Char -> Bool
562isIpvFutureChar c = isUnreserved c || isSubDelims c || (c==';')
563
564zoneid :: URIParser String
565zoneid = concat <$> many1 (unreservedChar <|> escaped)
566
567ipv6addrz :: URIParser String
568ipv6addrz = (++) <$> ipv6address <*> option "" (try $ (++) <$> string "%25" <*> zoneid)
569
570ipv6address :: URIParser String
571ipv6address =
572        try ( do
573                { a2 <- count 6 h4c
574                ; a3 <- ls32
575                ; return $ concat a2 ++ a3
576                } )
577    <|> try ( do
578                { _ <- string "::"
579                ; a2 <- count 5 h4c
580                ; a3 <- ls32
581                ; return $ "::" ++ concat a2 ++ a3
582                } )
583    <|> try ( do
584                { a1 <- opt_n_h4c_h4 0
585                ; _ <- string "::"
586                ; a2 <- count 4 h4c
587                ; a3 <- ls32
588                ; return $ a1 ++ "::" ++ concat a2 ++ a3
589                } )
590    <|> try ( do
591                { a1 <- opt_n_h4c_h4 1
592                ; _ <- string "::"
593                ; a2 <- count 3 h4c
594                ; a3 <- ls32
595                ; return $ a1 ++ "::" ++ concat a2 ++ a3
596                } )
597    <|> try ( do
598                { a1 <- opt_n_h4c_h4 2
599                ; _ <- string "::"
600                ; a2 <- count 2 h4c
601                ; a3 <- ls32
602                ; return $ a1 ++ "::" ++ concat a2 ++ a3
603                } )
604    <|> try ( do
605                { a1 <- opt_n_h4c_h4 3
606                ; _ <- string "::"
607                ; a2 <- h4c
608                ; a3 <- ls32
609                ; return $ a1 ++ "::" ++ a2 ++ a3
610                } )
611    <|> try ( do
612                { a1 <- opt_n_h4c_h4 4
613                ; _ <- string "::"
614                ; a3 <- ls32
615                ; return $ a1 ++ "::" ++ a3
616                } )
617    <|> try ( do
618                { a1 <- opt_n_h4c_h4 5
619                ; _ <- string "::"
620                ; a3 <- h4
621                ; return $ a1 ++ "::" ++ a3
622                } )
623    <|> try ( do
624                { a1 <- opt_n_h4c_h4 6
625                ; _ <- string "::"
626                ; return $ a1 ++ "::"
627                } )
628    <?> "IPv6 address"
629
630opt_n_h4c_h4 :: Int -> URIParser String
631opt_n_h4c_h4 n = option "" $
632    do  { a1 <- countMinMax 0 n h4c
633        ; a2 <- h4
634        ; return $ concat a1 ++ a2
635        }
636
637ls32 :: URIParser String
638ls32 =  try ( do
639                { a1 <- h4c
640                ; a2 <- h4
641                ; return (a1++a2)
642                } )
643    <|> ipv4address
644
645h4c :: URIParser String
646h4c = try $
647    do  { a1 <- h4
648        ; _ <- char ':'
649        ; _ <- notFollowedBy (char ':')
650        ; return $ a1 ++ ":"
651        }
652
653h4 :: URIParser String
654h4 = countMinMax 1 4 hexDigitChar
655
656ipv4address :: URIParser String
657ipv4address =
658    do  { a1 <- decOctet ; _ <- char '.'
659        ; a2 <- decOctet ; _ <- char '.'
660        ; a3 <- decOctet ; _ <- char '.'
661        ; a4 <- decOctet
662        ; _ <- notFollowedBy nameChar
663        ; return $ a1++"."++a2++"."++a3++"."++a4
664        }
665    <?> "IPv4 Address"
666
667decOctet :: URIParser String
668decOctet =
669    do  { a1 <- countMinMax 1 3 digitChar
670        ; if (read a1 :: Integer) > 255 then
671            fail "Decimal octet value too large"
672          else
673            return a1
674        }
675
676regName :: URIParser String
677regName =
678    do  { ss <- countMinMax 0 255 nameChar
679        ; return $ concat ss
680        }
681    <?> "Registered name"
682
683
684nameChar :: URIParser String
685nameChar = (unreservedChar <|> escaped <|> subDelims)
686    <?> "Name character"
687
688--  RFC3986, section 3.2.3
689
690port :: URIParser String
691port =
692    do  { _ <- char ':'
693        ; p <- many digitChar
694        ; return (':':p)
695        }
696
697--
698--  RFC3986, section 3.3
699--
700--   path          = path-abempty    ; begins with "/" or is empty
701--                 / path-abs        ; begins with "/" but not "//"
702--                 / path-noscheme   ; begins with a non-colon segment
703--                 / path-rootless   ; begins with a segment
704--                 / path-empty      ; zero characters
705--
706--   path-abempty  = *( "/" segment )
707--   path-abs      = "/" [ segment-nz *( "/" segment ) ]
708--   path-noscheme = segment-nzc *( "/" segment )
709--   path-rootless = segment-nz *( "/" segment )
710--   path-empty    = 0<pchar>
711--
712--   segment       = *pchar
713--   segment-nz    = 1*pchar
714--   segment-nzc   = 1*( unreserved / pct-encoded / sub-delims / "@" )
715--
716--   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
717
718{-
719upath :: URIParser String
720upath = pathAbEmpty
721    <|> pathAbs
722    <|> pathNoScheme
723    <|> pathRootLess
724    <|> pathEmpty
725-}
726
727pathAbEmpty :: URIParser String
728pathAbEmpty =
729    do  { ss <- many slashSegment
730        ; return $ concat ss
731        }
732
733pathAbs :: URIParser String
734pathAbs =
735    do  { _ <- char '/'
736        ; ss <- option "" pathRootLess
737        ; return $ '/':ss
738        }
739
740pathNoScheme :: URIParser String
741pathNoScheme =
742    do  { s1 <- segmentNzc
743        ; ss <- many slashSegment
744        ; return $ concat (s1:ss)
745        }
746
747pathRootLess :: URIParser String
748pathRootLess =
749    do  { s1 <- segmentNz
750        ; ss <- many slashSegment
751        ; return $ concat (s1:ss)
752        }
753
754slashSegment :: URIParser String
755slashSegment =
756    do  { _ <- char '/'
757        ; s <- segment
758        ; return ('/':s)
759        }
760
761segment :: URIParser String
762segment =
763    do  { ps <- many pchar
764        ; return $ concat ps
765        }
766
767segmentNz :: URIParser String
768segmentNz =
769    do  { ps <- many1 pchar
770        ; return $ concat ps
771        }
772
773segmentNzc :: URIParser String
774segmentNzc =
775    do  { ps <- many1 (uchar "@")
776        ; return $ concat ps
777        }
778
779pchar :: URIParser String
780pchar = uchar ":@"
781
782-- helper function for pchar and friends
783uchar :: String -> URIParser String
784uchar extras =
785        unreservedChar
786    <|> escaped
787    <|> subDelims
788    <|> do { c <- oneOf extras ; return [c] }
789
790--  RFC3986, section 3.4
791
792uquery :: URIParser String
793uquery =
794    do  { ss <- many $ uchar (":@"++"/?")
795        ; return $ '?':concat ss
796        }
797
798--  RFC3986, section 3.5
799
800ufragment :: URIParser String
801ufragment =
802    do  { ss <- many $ uchar (":@"++"/?")
803        ; return $ '#':concat ss
804        }
805
806--  Reference, Relative and Absolute URI forms
807--
808--  RFC3986, section 4.1
809
810uriReference :: URIParser URI
811uriReference = uri <|> relativeRef
812
813--  RFC3986, section 4.2
814--
815--   relative-URI  = relative-part [ "?" query ] [ "#" fragment ]
816--
817--   relative-part = "//" authority path-abempty
818--                 / path-abs
819--                 / path-noscheme
820--                 / path-empty
821
822relativeRef :: URIParser URI
823relativeRef =
824    do  { notMatching uscheme
825        -- ; ua <- option Nothing ( do { try (string "//") ; uauthority } )
826        -- ; up <- upath
827        ; (ua,up) <- relativePart
828        ; uq <- option "" ( do { _ <- char '?' ; uquery    } )
829        ; uf <- option "" ( do { _ <- char '#' ; ufragment } )
830        ; return $ URI
831            { uriScheme    = ""
832            , uriAuthority = ua
833            , uriPath      = up
834            , uriQuery     = uq
835            , uriFragment  = uf
836            }
837        }
838
839relativePart :: URIParser ((Maybe URIAuth),String)
840relativePart =
841        do  { _ <- try (string "//")
842            ; ua <- uauthority
843            ; up <- pathAbEmpty
844            ; return (ua,up)
845            }
846    <|> do  { up <- pathAbs
847            ; return (Nothing,up)
848            }
849    <|> do  { up <- pathNoScheme
850            ; return (Nothing,up)
851            }
852    <|> do  { return (Nothing,"")
853            }
854
855--  RFC3986, section 4.3
856
857absoluteURI :: URIParser URI
858absoluteURI =
859    do  { us <- uscheme
860        -- ; ua <- option Nothing ( do { try (string "//") ; uauthority } )
861        -- ; up <- upath
862        ; (ua,up) <- hierPart
863        ; uq <- option "" ( do { _ <- char '?' ; uquery    } )
864        ; return $ URI
865            { uriScheme    = us
866            , uriAuthority = ua
867            , uriPath      = up
868            , uriQuery     = uq
869            , uriFragment  = ""
870            }
871        }
872
873--  Imports from RFC 2234
874
875    -- NOTE: can't use isAlphaNum etc. because these deal with ISO 8859
876    -- (and possibly Unicode!) chars.
877    -- [[[Above was a comment originally in GHC Network/URI.hs:
878    --    when IRIs are introduced then most codepoints above 128(?) should
879    --    be treated as unreserved, and higher codepoints for letters should
880    --    certainly be allowed.
881    -- ]]]
882
883isAlphaChar :: Char -> Bool
884isAlphaChar c    = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
885
886isDigitChar :: Char -> Bool
887isDigitChar c    = (c >= '0' && c <= '9')
888
889isAlphaNumChar :: Char -> Bool
890isAlphaNumChar c = isAlphaChar c || isDigitChar c
891
892isHexDigitChar :: Char -> Bool
893isHexDigitChar c = isHexDigit c
894
895isSchemeChar :: Char -> Bool
896isSchemeChar c   = (isAlphaNumChar c) || (c `elem` "+-.")
897
898alphaChar :: URIParser Char
899alphaChar = satisfy isAlphaChar         -- or: Parsec.letter ?
900
901digitChar :: URIParser Char
902digitChar = satisfy isDigitChar         -- or: Parsec.digit ?
903
904hexDigitChar :: URIParser Char
905hexDigitChar = satisfy isHexDigitChar   -- or: Parsec.hexDigit ?
906
907--  Additional parser combinators for common patterns
908
909oneThenMany :: GenParser t s a -> GenParser t s a -> GenParser t s [a]
910oneThenMany p1 pr =
911    do  { a1 <- p1
912        ; ar <- many pr
913        ; return (a1:ar)
914        }
915
916countMinMax :: Int -> Int -> GenParser t s a -> GenParser t s [a]
917countMinMax m n p | m > 0 =
918    do  { a1 <- p
919        ; ar <- countMinMax (m-1) (n-1) p
920        ; return (a1:ar)
921        }
922countMinMax _ n _ | n <= 0 = return []
923countMinMax _ n p = option [] $
924    do  { a1 <- p
925        ; ar <- countMinMax 0 (n-1) p
926        ; return (a1:ar)
927        }
928
929notMatching :: Show a => GenParser tok st a -> GenParser tok st ()
930notMatching p = do { a <- try p ; unexpected (show a) } <|> return ()
931
932------------------------------------------------------------
933--  Reconstruct a URI string
934------------------------------------------------------------
935--
936-- |Turn a 'URI' into a string.
937--
938--  Uses a supplied function to map the userinfo part of the URI.
939--
940--  The Show instance for URI uses a mapping that hides any password
941--  that may be present in the URI.  Use this function with argument @id@
942--  to preserve the password in the formatted output.
943--
944uriToString :: (String->String) -> URI -> ShowS
945uriToString userinfomap URI { uriScheme=myscheme
946                            , uriAuthority=myauthority
947                            , uriPath=mypath
948                            , uriQuery=myquery
949                            , uriFragment=myfragment
950                            } =
951    (myscheme++) . (uriAuthToString userinfomap myauthority)
952               . (mypath++) . (myquery++) . (myfragment++)
953
954uriAuthToString :: (String->String) -> (Maybe URIAuth) -> ShowS
955uriAuthToString _           Nothing   = id          -- shows ""
956uriAuthToString userinfomap
957        (Just URIAuth { uriUserInfo = myuinfo
958                      , uriRegName  = myregname
959                      , uriPort     = myport
960                      } ) =
961    ("//"++) . (if null myuinfo then id else ((userinfomap myuinfo)++))
962             . (myregname++)
963             . (myport++)
964
965------------------------------------------------------------
966--  Character classes
967------------------------------------------------------------
968
969-- | Returns 'True' if the character is allowed in a URI.
970--
971isAllowedInURI :: Char -> Bool
972isAllowedInURI c = isReserved c || isUnreserved c || c == '%' -- escape char
973
974-- | Returns 'True' if the character is allowed unescaped in a URI.
975--
976-- >>> escapeURIString isUnescapedInURI "http://haskell.org:80?some_param=true&other_param=їґ"
977-- "http://haskell.org:80?some_param=true&other_param=%D1%97%D2%91"
978isUnescapedInURI :: Char -> Bool
979isUnescapedInURI c = isReserved c || isUnreserved c
980
981-- | Returns 'True' if the character is allowed unescaped in a URI component.
982--
983-- >>> escapeURIString isUnescapedInURIComponent "http://haskell.org:80?some_param=true&other_param=їґ"
984-- "http%3A%2F%2Fhaskell.org%3A80%3Fsome_param%3Dtrue%26other_param%3D%D1%97%D2%91"
985isUnescapedInURIComponent :: Char -> Bool
986isUnescapedInURIComponent c = not (isReserved c || not (isUnescapedInURI c))
987
988------------------------------------------------------------
989--  Escape sequence handling
990------------------------------------------------------------
991
992-- |Escape character if supplied predicate is not satisfied,
993--  otherwise return character as singleton string.
994--
995escapeURIChar :: (Char->Bool) -> Char -> String
996escapeURIChar p c
997    | p c       = [c]
998    | otherwise = concatMap (\i -> '%' : myShowHex i "") (utf8EncodeChar c)
999    where
1000        myShowHex :: Int -> ShowS
1001        myShowHex n r =  case showIntAtBase 16 (toChrHex) n r of
1002            []  -> "00"
1003            [x] -> ['0',x]
1004            cs  -> cs
1005        toChrHex d
1006            | d < 10    = chr (ord '0' + fromIntegral d)
1007            | otherwise = chr (ord 'A' + fromIntegral (d - 10))
1008
1009-- From http://hackage.haskell.org/package/utf8-string
1010-- by Eric Mertens, BSD3
1011-- Returns [Int] for use with showIntAtBase
1012utf8EncodeChar :: Char -> [Int]
1013utf8EncodeChar = map fromIntegral . go . ord
1014 where
1015  go oc
1016   | oc <= 0x7f       = [oc]
1017
1018   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
1019                        , 0x80 + oc .&. 0x3f
1020                        ]
1021
1022   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
1023                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
1024                        , 0x80 + oc .&. 0x3f
1025                        ]
1026   | otherwise        = [ 0xf0 + (oc `shiftR` 18)
1027                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
1028                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
1029                        , 0x80 + oc .&. 0x3f
1030                        ]
1031
1032-- |Can be used to make a string valid for use in a URI.
1033--
1034escapeURIString
1035    :: (Char->Bool)     -- ^ a predicate which returns 'False'
1036                        --   if the character should be escaped
1037    -> String           -- ^ the string to process
1038    -> String           -- ^ the resulting URI string
1039escapeURIString p s = concatMap (escapeURIChar p) s
1040
1041-- |Turns all instances of escaped characters in the string back
1042--  into literal characters.
1043--
1044unEscapeString :: String -> String
1045unEscapeString [] = ""
1046unEscapeString s@(c:cs) = case unEscapeByte s of
1047    Just (byte, rest) -> unEscapeUtf8 byte rest
1048    Nothing -> c : unEscapeString cs
1049
1050unEscapeByte :: String -> Maybe (Int, String)
1051unEscapeByte ('%':x1:x2:s) | isHexDigit x1 && isHexDigit x2 =
1052    Just (digitToInt x1 * 16 + digitToInt x2, s)
1053unEscapeByte _ = Nothing
1054
1055-- Adapted from http://hackage.haskell.org/package/utf8-string
1056-- by Eric Mertens, BSD3
1057unEscapeUtf8 :: Int -> String -> String
1058unEscapeUtf8 c rest
1059    | c < 0x80 = chr c : unEscapeString rest
1060    | c < 0xc0 = replacement_character : unEscapeString rest
1061    | c < 0xe0 = multi1
1062    | c < 0xf0 = multi_byte 2 0xf 0x800
1063    | c < 0xf8 = multi_byte 3 0x7 0x10000
1064    | c < 0xfc = multi_byte 4 0x3 0x200000
1065    | c < 0xfe = multi_byte 5 0x1 0x4000000
1066    | otherwise    = replacement_character : unEscapeString rest
1067    where
1068    replacement_character = '\xfffd'
1069    multi1 = case unEscapeByte rest of
1070      Just (c1, ds) | c1 .&. 0xc0 == 0x80 ->
1071        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f)
1072        in if d >= 0x000080 then toEnum d : unEscapeString ds
1073                            else replacement_character : unEscapeString ds
1074      _ -> replacement_character : unEscapeString rest
1075
1076    multi_byte :: Int -> Int -> Int -> String
1077    multi_byte i mask overlong =
1078      aux i rest (unEscapeByte rest) (c .&. mask)
1079      where
1080        aux 0 rs _ acc
1081          | overlong <= acc && acc <= 0x10ffff &&
1082            (acc < 0xd800 || 0xdfff < acc)     &&
1083            (acc < 0xfffe || 0xffff < acc)      = chr acc : unEscapeString rs
1084          | otherwise = replacement_character : unEscapeString rs
1085
1086        aux n _ (Just (r, rs)) acc
1087          | r .&. 0xc0 == 0x80 = aux (n-1) rs (unEscapeByte rs)
1088                               $! shiftL acc 6 .|. (r .&. 0x3f)
1089
1090        aux _ rs _ _ = replacement_character : unEscapeString rs
1091
1092------------------------------------------------------------
1093-- Resolving a relative URI relative to a base URI
1094------------------------------------------------------------
1095
1096-- |Returns a new 'URI' which represents the value of the
1097--  first 'URI' interpreted as relative to the second 'URI'.
1098--  For example:
1099--
1100--  > "foo" `relativeTo` "http://bar.org/" = "http://bar.org/foo"
1101--  > "http:foo" `nonStrictRelativeTo` "http://bar.org/" = "http://bar.org/foo"
1102--
1103--  Algorithm from RFC3986 [3], section 5.2.2
1104--
1105
1106nonStrictRelativeTo :: URI -> URI -> URI
1107nonStrictRelativeTo ref base = relativeTo ref' base
1108    where
1109        ref' = if uriScheme ref == uriScheme base
1110               then ref { uriScheme="" }
1111               else ref
1112
1113isDefined :: ( MonadPlus m, Eq (m a) ) => m a -> Bool
1114isDefined a = a /= mzero
1115
1116-- | Returns a new 'URI' which represents the value of the first 'URI'
1117-- interpreted as relative to the second 'URI'.
1118--
1119-- Algorithm from RFC3986 [3], section 5.2
1120relativeTo :: URI -> URI -> URI
1121relativeTo ref base
1122    | isDefined ( uriScheme ref ) =
1123        just_segments ref
1124    | isDefined ( uriAuthority ref ) =
1125        just_segments ref { uriScheme = uriScheme base }
1126    | isDefined ( uriPath ref ) =
1127        if (head (uriPath ref) == '/') then
1128            just_segments ref
1129                { uriScheme    = uriScheme base
1130                , uriAuthority = uriAuthority base
1131                }
1132        else
1133            just_segments ref
1134                { uriScheme    = uriScheme base
1135                , uriAuthority = uriAuthority base
1136                , uriPath      = mergePaths base ref
1137                }
1138    | isDefined ( uriQuery ref ) =
1139        just_segments ref
1140            { uriScheme    = uriScheme base
1141            , uriAuthority = uriAuthority base
1142            , uriPath      = uriPath base
1143            }
1144    | otherwise =
1145        just_segments ref
1146            { uriScheme    = uriScheme base
1147            , uriAuthority = uriAuthority base
1148            , uriPath      = uriPath base
1149            , uriQuery     = uriQuery base
1150            }
1151    where
1152        just_segments u =
1153            u { uriPath = removeDotSegments (uriPath u) }
1154        mergePaths b r
1155            | isDefined (uriAuthority b) && null pb = '/':pr
1156            | otherwise                             = dropLast pb ++ pr
1157            where
1158                pb = uriPath b
1159                pr = uriPath r
1160        dropLast = fst . splitLast -- reverse . dropWhile (/='/') . reverse
1161
1162--  Remove dot segments, but protect leading '/' character
1163removeDotSegments :: String -> String
1164removeDotSegments ('/':ps) = '/':elimDots ps []
1165removeDotSegments ps       = elimDots ps []
1166
1167--  Second arg accumulates segments processed so far in reverse order
1168elimDots :: String -> [String] -> String
1169-- elimDots ps rs | traceVal "\nps " ps $ traceVal "rs " rs $ False = error ""
1170elimDots [] [] = ""
1171elimDots [] rs = concat (reverse rs)
1172elimDots (    '.':'/':ps)     rs = elimDots ps rs
1173elimDots (    '.':[]    )     rs = elimDots [] rs
1174elimDots (    '.':'.':'/':ps) rs = elimDots ps (drop 1 rs)
1175elimDots (    '.':'.':[]    ) rs = elimDots [] (drop 1 rs)
1176elimDots ps rs = elimDots ps1 (r:rs)
1177    where
1178        (r,ps1) = nextSegment ps
1179
1180--  Returns the next segment and the rest of the path from a path string.
1181--  Each segment ends with the next '/' or the end of string.
1182--
1183nextSegment :: String -> (String,String)
1184nextSegment ps =
1185    case break (=='/') ps of
1186        (r,'/':ps1) -> (r++"/",ps1)
1187        (r,_)       -> (r,[])
1188
1189-- | The segments of the path component of a URI. E.g.,
1190segments :: String -> [String]
1191segments str = dropLeadingEmpty $ unfoldr nextSegmentMaybe str
1192    where
1193        nextSegmentMaybe "" = Nothing
1194        nextSegmentMaybe ps =
1195            case break (=='/') ps of
1196                (seg, '/':ps1) -> Just (seg, ps1)
1197                (seg, _)       -> Just (seg, "")
1198        dropLeadingEmpty ("":xs) = xs
1199        dropLeadingEmpty xs      = xs
1200
1201-- | Returns the segments of the path component. E.g.,
1202--    pathSegments <$> parseURI "http://example.org/foo/bar/baz"
1203-- == ["foo", "bar", "baz"]
1204pathSegments :: URI -> [String]
1205pathSegments = segments . uriPath
1206
1207-- | Split last (name) segment from path, returning (path,name)
1208splitLast :: String -> (String,String)
1209splitLast p = (reverse revpath,reverse revname)
1210    where
1211        (revname,revpath) = break (=='/') $ reverse p
1212
1213------------------------------------------------------------
1214-- Finding a URI relative to a base URI
1215------------------------------------------------------------
1216
1217-- |Returns a new 'URI' which represents the relative location of
1218--  the first 'URI' with respect to the second 'URI'.  Thus, the
1219--  values supplied are expected to be absolute URIs, and the result
1220--  returned may be a relative URI.
1221--
1222--  Example:
1223--
1224--  > "http://example.com/Root/sub1/name2#frag"
1225--  >   `relativeFrom` "http://example.com/Root/sub2/name2#frag"
1226--  >   == "../sub1/name2#frag"
1227--
1228--  There is no single correct implementation of this function,
1229--  but any acceptable implementation must satisfy the following:
1230--
1231--  > (uabs `relativeFrom` ubase) `relativeTo` ubase == uabs
1232--
1233--  For any valid absolute URI.
1234--  (cf. <http://lists.w3.org/Archives/Public/uri/2003Jan/0008.html>
1235--       <http://lists.w3.org/Archives/Public/uri/2003Jan/0005.html>)
1236--
1237relativeFrom :: URI -> URI -> URI
1238relativeFrom uabs base
1239    | diff uriScheme    uabs base = uabs
1240    | diff uriAuthority uabs base = uabs { uriScheme = "" }
1241    | diff uriPath      uabs base = uabs
1242        { uriScheme    = ""
1243        , uriAuthority = Nothing
1244        , uriPath      = relPathFrom (removeBodyDotSegments $ uriPath uabs)
1245                                     (removeBodyDotSegments $ uriPath base)
1246        }
1247    | diff uriQuery     uabs base = uabs
1248        { uriScheme    = ""
1249        , uriAuthority = Nothing
1250        , uriPath      = ""
1251        }
1252    | otherwise = uabs          -- Always carry fragment from uabs
1253        { uriScheme    = ""
1254        , uriAuthority = Nothing
1255        , uriPath      = ""
1256        , uriQuery     = ""
1257        }
1258    where
1259        diff :: Eq b => (a -> b) -> a -> a -> Bool
1260        diff sel u1 u2 = sel u1 /= sel u2
1261        -- Remove dot segments except the final segment
1262        removeBodyDotSegments p = removeDotSegments p1 ++ p2
1263            where
1264                (p1,p2) = splitLast p
1265
1266-- | Calculate the path to the first argument, from the second argument.
1267relPathFrom :: String -> String -> String
1268relPathFrom []   _    = "/"
1269relPathFrom pabs []   = pabs
1270relPathFrom pabs base =
1271    if sa1 == sb1                       -- If the first segments are equal
1272        then if (sa1 == "/")              -- and they're absolute,
1273            then if (sa2 == sb2)            -- then if the 2nd segs are equal,
1274                then relPathFrom1 ra2 rb2   -- relativize from there.
1275                else
1276                   pabs                     -- Otherwise it's not worth trying.
1277            else relPathFrom1 ra1 rb1     -- If same & relative, relativize.
1278        else pabs                       -- If 1st segs not equal, just use pabs.
1279    where
1280        (sa1,ra1) = nextSegment pabs
1281        (sb1,rb1) = nextSegment base
1282        (sa2,ra2) = nextSegment ra1
1283        (sb2,rb2) = nextSegment rb1
1284
1285--  relPathFrom1 strips off trailing names from the supplied paths, and finds
1286--  the relative path from base to target.
1287relPathFrom1 :: String -> String -> String
1288relPathFrom1 pabs base = relName
1289    where
1290        -- Relative paths are reckoned without the basename, so split those off.
1291        (sa,na) = splitLast pabs
1292        (sb,nb) = splitLast base
1293        rp      = relSegsFrom sa sb
1294        relName = if null rp then
1295                      -- If the relative path is empty, and the basenames are
1296                      -- the same, then the paths must be exactly the same.
1297                      if (na == nb) then ""
1298                      -- If the name is vulnerable to being misinterpreted,
1299                      -- add a dot segment in advance to protect it.
1300                      else if protect na then "./"++na
1301                      else na
1302                  else
1303                      rp++na
1304        -- If a single-segment path is null or contains a ':', it needs
1305        -- "protection" from being interpreted as a different kind of URL.
1306        protect s = null s || ':' `elem` s
1307
1308--  relSegsFrom discards any equal leading segments from two *directory*
1309--  paths, then invokes difSegsFrom to calculate a relative path from the end
1310--  of the base path to the end of the target path.
1311relSegsFrom :: String -> String -> String
1312{-
1313relSegsFrom sabs base
1314    | traceVal "\nrelSegsFrom\nsabs " sabs $ traceVal "base " base $
1315      False = error ""
1316-}
1317relSegsFrom []   []   = ""      -- paths are identical
1318relSegsFrom sabs base =
1319    if sa1 == sb1
1320        then relSegsFrom ra1 rb1
1321        else difSegsFrom sabs base
1322    where
1323        (sa1,ra1) = nextSegment sabs
1324        (sb1,rb1) = nextSegment base
1325
1326-- Given two paths @a@, @b@, count out the necessary number of ".." segments
1327-- to get from the depth of @b@ to the path @a@.
1328difSegsFrom :: String -> String -> String
1329{-
1330difSegsFrom sabs base
1331    | traceVal "\ndifSegsFrom\nsabs " sabs $ traceVal "base " base $
1332      False = error ""
1333-}
1334difSegsFrom sabs ""   = sabs
1335difSegsFrom sabs base = difSegsFrom ("../"++sabs) (snd $ nextSegment base)
1336
1337------------------------------------------------------------
1338--  Other normalization functions
1339------------------------------------------------------------
1340
1341-- |Case normalization; cf. RFC3986 section 6.2.2.1
1342--  NOTE:  authority case normalization is not performed
1343--
1344normalizeCase :: String -> String
1345normalizeCase uristr = ncScheme uristr
1346    where
1347        ncScheme (':':cs)                = ':':ncEscape cs
1348        ncScheme (c:cs) | isSchemeChar c = toLower c:ncScheme cs
1349        ncScheme _                       = ncEscape uristr -- no scheme present
1350        ncEscape ('%':h1:h2:cs) = '%':toUpper h1:toUpper h2:ncEscape cs
1351        ncEscape (c:cs)         = c:ncEscape cs
1352        ncEscape []             = []
1353
1354-- |Encoding normalization; cf. RFC3986 section 6.2.2.2
1355--
1356normalizeEscape :: String -> String
1357normalizeEscape ('%':h1:h2:cs)
1358    | isHexDigit h1 && isHexDigit h2 && isUnreserved escval =
1359        escval:normalizeEscape cs
1360    where
1361        escval = chr (digitToInt h1*16+digitToInt h2)
1362normalizeEscape (c:cs)         = c:normalizeEscape cs
1363normalizeEscape []             = []
1364
1365-- |Path segment normalization; cf. RFC3986 section 6.2.2.3
1366--
1367normalizePathSegments :: String -> String
1368normalizePathSegments uristr = normstr juri
1369    where
1370        juri = parseURI uristr
1371        normstr Nothing  = uristr
1372        normstr (Just u) = show (normuri u)
1373        normuri u = u { uriPath = removeDotSegments (uriPath u) }
1374
1375------------------------------------------------------------
1376--  Lift instances to support Network.URI.Static
1377------------------------------------------------------------
1378
1379#if __GLASGOW_HASKELL__ >= 800
1380deriving instance Lift URI
1381deriving instance Lift URIAuth
1382#else
1383instance Lift URI where
1384    lift (URI {..}) = [| URI {..} |]
1385
1386instance Lift URIAuth where
1387    lift (URIAuth {..}) = [| URIAuth {..} |]
1388#endif
1389
1390------------------------------------------------------------
1391--  Deprecated functions
1392------------------------------------------------------------
1393
1394{-# DEPRECATED parseabsoluteURI "use parseAbsoluteURI" #-}
1395parseabsoluteURI :: String -> Maybe URI
1396parseabsoluteURI = parseAbsoluteURI
1397
1398{-# DEPRECATED escapeString "use escapeURIString, and note the flipped arguments" #-}
1399escapeString :: String -> (Char->Bool) -> String
1400escapeString = flip escapeURIString
1401
1402{-# DEPRECATED reserved "use isReserved" #-}
1403reserved :: Char -> Bool
1404reserved = isReserved
1405
1406{-# DEPRECATED unreserved "use isUnreserved" #-}
1407unreserved :: Char -> Bool
1408unreserved = isUnreserved
1409
1410--  Additional component access functions for backward compatibility
1411
1412{-# DEPRECATED scheme "use uriScheme" #-}
1413scheme :: URI -> String
1414scheme = orNull init . uriScheme
1415
1416{-# DEPRECATED authority "use uriAuthority, and note changed functionality" #-}
1417authority :: URI -> String
1418authority = dropss . ($"") . uriAuthToString id . uriAuthority
1419    where
1420        -- Old-style authority component does not include leading '//'
1421        dropss ('/':'/':s) = s
1422        dropss s           = s
1423
1424{-# DEPRECATED path "use uriPath" #-}
1425path :: URI -> String
1426path = uriPath
1427
1428{-# DEPRECATED query "use uriQuery, and note changed functionality" #-}
1429query :: URI -> String
1430query = orNull tail . uriQuery
1431
1432{-# DEPRECATED fragment "use uriFragment, and note changed functionality" #-}
1433fragment :: URI -> String
1434fragment = orNull tail . uriFragment
1435
1436orNull :: ([a]->[a]) -> [a] -> [a]
1437orNull _ [] = []
1438orNull f as = f as
1439
1440--------------------------------------------------------------------------------
1441--
1442--  Copyright (c) 2004, G. KLYNE.  All rights reserved.
1443--  Distributed as free software under the following license.
1444--
1445--  Redistribution and use in source and binary forms, with or without
1446--  modification, are permitted provided that the following conditions
1447--  are met:
1448--
1449--  - Redistributions of source code must retain the above copyright notice,
1450--  this list of conditions and the following disclaimer.
1451--
1452--  - Redistributions in binary form must reproduce the above copyright
1453--  notice, this list of conditions and the following disclaimer in the
1454--  documentation and/or other materials provided with the distribution.
1455--
1456--  - Neither name of the copyright holders nor the names of its
1457--  contributors may be used to endorse or promote products derived from
1458--  this software without specific prior written permission.
1459--
1460--  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS
1461--  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1462--  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1463--  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1464--  HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
1465--  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
1466--  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
1467--  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
1468--  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
1469--  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
1470--  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1471--
1472--------------------------------------------------------------------------------
1473