1{-# LANGUAGE DeriveDataTypeable #-}
2
3-- ------------------------------------------------------------
4
5{- |
6   Module     : Text.XML.HXT.DOM.QualifiedName
7   Copyright  : Copyright (C) 2011 Uwe Schmidt
8   License    : MIT
9
10   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
11   Stability  : stable
12   Portability: portable
13
14   The types and functions for qualified names
15
16-}
17
18-- ------------------------------------------------------------
19
20module Text.XML.HXT.DOM.QualifiedName
21    ( QName
22    , XName(unXN)
23    , NsEnv
24
25    , mkQName
26    , mkName
27    , mkNsName
28    , mkSNsName
29    , mkPrefixLocalPart
30
31    , equivQName
32    , equivUri
33    , equalQNameBy
34
35    , namePrefix
36    , localPart
37    , namespaceUri
38
39    , newXName
40    , nullXName
41    , isNullXName
42    , newQName
43
44    , mkQName'
45    , namePrefix'
46    , localPart'
47    , namespaceUri'
48
49    , setNamePrefix'
50    , setLocalPart'
51    , setNamespaceUri'
52
53    , qualifiedName
54    , qualifiedName'
55    , universalName
56    , universalUri
57    , buildUniversalName
58
59    , normalizeNsUri
60
61    , setNamespace                      -- namespace related functions
62    , isNCName
63    , isWellformedQualifiedName
64    , isWellformedQName
65    , isWellformedNSDecl
66    , isWellformedNameSpaceName
67    , isNameSpaceName
68    , isDeclaredNamespace
69
70    , xmlNamespaceXName
71    , xmlXName
72    , xmlnsNamespaceXName
73    , xmlnsXName
74    , xmlnsQN
75
76    , toNsEnv
77    )
78
79where
80
81{-
82import           Debug.Trace
83 -}
84
85import           Control.Arrow                     ((***))
86
87import           Control.DeepSeq
88import           Control.FlatSeq
89
90import           Data.AssocList
91import           Data.Binary
92import           Data.Char                         (toLower)
93import           Data.IORef
94import           Data.List                         (isPrefixOf)
95import qualified Data.Map                          as M
96import           Data.Typeable
97
98import           System.IO.Unsafe                  (unsafePerformIO)
99
100import           Text.XML.HXT.DOM.XmlKeywords      (a_xml, a_xmlns,
101                                                    xmlNamespace,
102                                                    xmlnsNamespace)
103
104import           Data.Char.Properties.XMLCharProps (isXmlNCNameChar,
105                                                    isXmlNCNameStartChar)
106
107-- -----------------------------------------------------------------------------
108
109-- | XML names are represented by Strings, but these strings do not mix up with normal strings.
110-- Names are always reduced to normal form, and they are stored internally in a name cache
111-- for sharing equal names by the same data structure
112
113data XName                      = XN { _idXN :: ! Int        -- for optimization of equality test, see Eq instance
114                                     ,  unXN ::   String
115                                     }
116                                  deriving (Typeable)
117
118instance Eq XName where
119    (XN id1 _) == (XN id2 _)    = id1 == id2
120
121instance Ord XName where
122    compare (XN _ n1) (XN _ n2) = compare n1 n2
123{-
124instance Read XName where
125    readsPrec p str             = [ (newXName x, y) | (x, y) <- readsPrec p str ]
126
127instance Show XName where
128    show (XN _ s)               = show s
129-}
130instance NFData XName where
131    rnf (XN _ s)                = rnf s
132
133instance WNFData XName where
134    rwnf (XN _ s)               = rnf s
135
136instance Binary XName where
137    put (XN _ s)                = put s
138    get                         = do
139                                  s <- get
140                                  return $! newXName s
141
142-----------------------------------------------------------------------------
143
144-- |
145-- Type for the namespace association list, used when propagating namespaces by
146-- modifying the 'QName' values in a tree
147
148type NsEnv              = AssocList XName XName
149
150-----------------------------------------------------------------------------
151
152-- |
153-- Namespace support for element and attribute names.
154--
155-- A qualified name consists of a name prefix, a local name
156-- and a namespace uri.
157-- All modules, which are not namespace aware, use only the 'localPart' component.
158-- When dealing with namespaces, the document tree must be processed by 'Text.XML.HXT.Arrow.Namespace.propagateNamespaces'
159-- to split names of structure \"prefix:localPart\" and label the name with the apropriate namespace uri
160
161data QName      = QN { localPart'    :: ! XName
162                     , namePrefix'   :: ! XName
163                     , namespaceUri' :: ! XName
164                     }
165             deriving (Typeable)
166
167-- -----------------------------------------------------------------------------
168
169-- | Two QNames are equal if (1. case) namespaces are both empty and the qualified names
170-- (prefix:localpart) are the same or (2. case) namespaces are set and namespaces and
171-- local parts are equal
172
173instance Eq QName where
174    (QN lp1 px1 ns1) == (QN lp2 px2 ns2)
175        | ns1 /= ns2            = False                 -- namespaces are set and differ
176        | not (isNullXName ns1) = lp1 == lp2            -- namespaces are set and are equal: local parts must be equal
177        | otherwise             = lp1 == lp2            -- no namespaces are set: local parts must be equal
178                                  &&                    -- and prefixes are not set or they are equal
179                                  px1 == px2
180
181instance Ord QName where
182  compare (QN lp1 px1 ns1) (QN lp2 px2 ns2)
183      | isNullXName ns1 && isNullXName ns2              -- no namespaces set: px is significant
184          = compare (px1, lp1) (px2, lp2)
185      | otherwise                                       -- namespace aware cmp: ns is significant, px is irrelevant
186          = compare (lp1, ns1) (lp2, ns2)
187
188instance NFData  QName where
189    rnf x = seq x ()
190
191instance WNFData QName
192
193instance Show QName where
194    show = showQN
195
196-- -----------------------------------------------------------------------------
197
198instance Binary QName where
199    put (QN lp px ns)   = put (unXN px) >>
200                          put (unXN lp) >>
201                          put (unXN ns)
202    get                 = do
203                          px <- get
204                          lp <- get
205                          ns <- get
206                          return $! newNsName lp px ns
207                          --     ^^
208                          -- strict apply !!!
209                          -- build the QNames strict, else the name sharing optimization will not be in effect
210
211-- -----------------------------------------------------------------------------
212
213isNullXName             :: XName -> Bool
214isNullXName             = (== nullXName)
215{-# INLINE isNullXName #-}
216
217namePrefix              :: QName -> String
218namePrefix              = unXN . namePrefix'
219{-# INLINE namePrefix #-}
220
221localPart               :: QName -> String
222localPart               = unXN . localPart'
223{-# INLINE localPart #-}
224
225namespaceUri            :: QName -> String
226namespaceUri            = unXN . namespaceUri'
227{-# INLINE namespaceUri #-}
228
229-- ------------------------------------------------------------
230
231-- | set name prefix
232
233setNamespaceUri'                        :: XName -> QName -> QName
234setNamespaceUri' ns (QN lp px _ns)      = newQName lp px ns
235
236-- | set local part
237
238setLocalPart'                           :: XName -> QName -> QName
239setLocalPart' lp (QN _lp px ns)         = newQName lp px ns
240
241-- | set name prefix
242
243setNamePrefix'                          :: XName -> QName -> QName
244setNamePrefix' px (QN lp _px ns)        = newQName lp px ns
245
246-- ------------------------------------------------------------
247
248-- |
249-- builds the full name \"prefix:localPart\", if prefix is not null, else the local part is the result
250
251qualifiedName                   :: QName -> String
252qualifiedName (QN lp px _ns)
253    | isNullXName px            = unXN lp
254    | otherwise                 = unXN px ++ (':' : unXN lp)
255
256-- | functional list version of qualifiedName used in xshow
257
258qualifiedName'                   :: QName -> String -> String
259qualifiedName' (QN lp px _ns)
260    | isNullXName px            = (unXN lp ++)
261    | otherwise                 = (unXN px ++) . (':' :) . (unXN lp ++)
262
263-- |
264-- builds the \"universal\" name, that is the namespace uri surrounded with \"{\" and \"}\" followed by the local part
265-- (specialisation of 'buildUniversalName')
266
267universalName                   :: QName -> String
268universalName                   = buildUniversalName (\ ns lp -> '{' : ns ++ '}' : lp)
269
270-- |
271-- builds an \"universal\" uri, that is the namespace uri followed by the local part. This is usefull for RDF applications,
272-- where the subject, predicate and object often are concatenated from namespace uri and local part
273-- (specialisation of 'buildUniversalName')
274
275universalUri                    :: QName -> String
276universalUri                    = buildUniversalName (++)
277
278-- |
279-- builds a string from the namespace uri and the local part. If the namespace uri is empty, the local part is returned, else
280-- namespace uri and local part are combined with the combining function given by the first parameter
281
282buildUniversalName              :: (String -> String -> String) -> QName -> String
283buildUniversalName bf n@(QN _lp _px ns)
284    | isNullXName ns            = localPart n
285    | otherwise                 = unXN ns `bf` localPart n
286
287showQN                          :: QName -> String
288showQN n
289    | null ns                   = show $ qualifiedName n
290    | otherwise                 = show $ "{" ++ ns ++ "}" ++ qualifiedName n
291    where
292    ns = namespaceUri n
293
294-- ------------------------------------------------------------
295--
296-- internal XName functions
297
298mkQName'                        :: XName -> XName -> XName -> QName
299mkQName' px lp ns               = newQName lp px ns
300{-# DEPRECATED mkQName' "use newQName instead with lp px ns param seq " #-}
301
302-- ------------------------------------------------------------
303
304-- |
305-- constructs a simple name, with prefix and localPart but without a namespace uri.
306--
307-- see also 'mkQName', 'mkName'
308
309mkPrefixLocalPart               :: String -> String -> QName
310mkPrefixLocalPart px lp
311    | null px                   = newLpName lp
312    | otherwise                 = newPxName lp px
313
314-- |
315-- constructs a simple, namespace unaware name.
316-- If the name is in @prefix:localpart@ form and the prefix is not empty
317-- the name is split internally into
318-- a prefix and a local part.
319
320mkName                          :: String -> QName
321mkName n
322    | (':' `elem` n)
323      &&
324      not (null px)                     -- more restrictive: isWellformedQualifiedName n
325                                = newPxName lp px
326    | otherwise                 = newLpName n
327    where
328    (px, (_ : lp)) = span (/= ':') n
329
330-- |
331-- constructs a complete qualified name with 'namePrefix', 'localPart' and 'namespaceUri'.
332-- This function can be used to build not wellformed prefix:localpart names.
333-- The XPath module uses wildcard names like @xxx:*@. These must be build with 'mkQName'
334-- and not with mkName.
335
336mkQName                         :: String -> String -> String -> QName
337mkQName px lp ns
338    | null ns                   = mkPrefixLocalPart px lp
339    | otherwise                 = newNsName lp px ns
340
341-- ------------------------------------------------------------
342
343-- |
344-- old name for 'mkName'
345
346mkSNsName                       :: String -> QName
347mkSNsName                       = mkName
348{-# DEPRECATED mkSNsName "use mkName instead" #-}
349
350-- |
351-- constructs a simple, namespace aware name, with prefix:localPart as first parameter,
352-- namspace uri as second.
353--
354-- see also 'mkName', 'mkPrefixLocalPart'
355
356{-
357mkNsName                          :: String -> String -> QName
358mkNsName n ns                     = trace ("mkNsName: " ++ show n ++ " " ++ show ns) (mkNsName' n ns)
359-}
360
361mkNsName                          :: String -> String -> QName
362mkNsName n ns
363    | null ns                   = qn
364    | otherwise                 = setNamespaceUri' ns' qn
365    where
366    qn                          = mkName n
367    ns'                         = newXName ns
368
369-- ------------------------------------------------------------
370
371-- | Equivalent QNames are defined as follows: The URIs are normalized before comparison.
372-- Comparison is done with 'equalQNameBy' and 'equivUri'
373
374equivQName                      :: QName -> QName -> Bool
375equivQName                      = equalQNameBy equivUri
376
377-- | Comparison of normalized namespace URIs using 'normalizeNsUri'
378
379equivUri                        :: String -> String -> Bool
380equivUri x y                    = normalizeNsUri x == normalizeNsUri y
381
382-- | Sometimes a weaker equality relation than 'equalQName' is appropriate, e.g no case significance in names, ...
383-- a name normalization function can be applied to the strings before comparing. Called by 'equalQName' and
384-- 'equivQName'
385
386equalQNameBy                    :: (String -> String -> Bool) -> QName -> QName -> Bool
387equalQNameBy equiv q1 q2        = localPart q1 == localPart q2
388                                  &&
389                                  (namespaceUri q1 `equiv` namespaceUri q2)
390
391-- |  Normalization of URIs: Normalization is done by conversion into lowercase letters. A trailing \"\/\" is ignored
392
393normalizeNsUri                  :: String -> String
394normalizeNsUri                  = map toLower . stripSlash
395    where
396    stripSlash ""               = ""
397    stripSlash s
398        | last s == '/'         = init s
399        | otherwise             = s
400
401-- -----------------------------------------------------------------------------
402
403-- Namespace predicates
404
405-- |
406-- Compute the name prefix and the namespace uri for a qualified name.
407--
408-- This function does not test whether the name is a wellformed qualified name.
409-- see Namespaces in XML Rule [6] to [8]. Error checking is done with separate functions,
410-- see 'isWellformedQName' and 'isWellformedQualifiedName' for error checking.
411
412setNamespace                    :: NsEnv -> QName -> QName
413setNamespace env n@(QN lp px _ns)
414                                = maybe n (\ ns -> newQName lp px ns) . lookup px $ env
415
416-- -----------------------------------------------------------------------------
417--
418
419-- |
420-- test for wellformed NCName, rule [4] XML Namespaces
421
422isNCName                        :: String -> Bool
423isNCName []                     = False
424isNCName n                      = and ( zipWith ($)
425                                        (isXmlNCNameStartChar : repeat isXmlNCNameChar)
426                                        n
427                                      )
428
429-- |
430-- test for wellformed QName, rule [6] XML Namespaces
431-- predicate is used in filter 'valdateNamespaces'.
432
433isWellformedQualifiedName       :: String -> Bool
434isWellformedQualifiedName s
435    | null lp                   = isNCName px
436    | otherwise                 = isNCName px && isNCName (tail lp)
437    where
438    (px, lp)                    = span (/= ':') s
439
440-- |
441-- test for wellformed QName values.
442-- A QName is wellformed, if the local part is a NCName, the namePrefix, if not empty, is also a NCName.
443-- predicate is used in filter 'valdateNamespaces'.
444
445isWellformedQName               :: QName -> Bool
446isWellformedQName (QN lp px _ns)
447                                = (isNCName . unXN) lp                          -- rule [8] XML Namespaces
448                                  &&
449                                  ( isNullXName px
450                                    ||
451                                    (isNCName . unXN) px                        -- rule [7] XML Namespaces
452                                  )
453
454-- |
455-- test whether an attribute name is a namesapce declaration name.
456-- If this is not the case True is the result, else
457-- the name must be a well formed namespace name:
458-- All namespace prefixes starting with \"xml\" are reserved for XML related definitions.
459-- predicate is used in filter 'valdateNamespaces'.
460
461isWellformedNSDecl              :: QName -> Bool
462isWellformedNSDecl n
463                                = not (isNameSpaceName n)
464                                  ||
465                                  isWellformedNameSpaceName n
466
467-- |
468-- test for a namespace name to be well formed
469
470isWellformedNameSpaceName       :: QName -> Bool
471isWellformedNameSpaceName n@(QN lp px _ns)
472    | isNullXName px            = lp == xmlnsXName
473    | otherwise                 = px == xmlnsXName
474                                  &&
475                                  not (null lp')
476                                  &&
477                                  not (a_xml `isPrefixOf` lp')
478    where
479    lp'                         = localPart n
480
481
482-- |
483-- test whether a name is a namespace declaration attribute name
484
485isNameSpaceName                         :: QName -> Bool
486isNameSpaceName (QN lp px _ns)
487    | isNullXName px                    = lp == xmlnsXName
488    | otherwise                         = px == xmlnsXName
489
490-- |
491--
492-- predicate is used in filter 'valdateNamespaces'.
493
494isDeclaredNamespace                     :: QName -> Bool
495isDeclaredNamespace (QN _lp px ns)
496    | isNullXName px                    = True                          -- no namespace used
497    | px == xmlnsXName                  = ns == xmlnsNamespaceXName     -- "xmlns" has a predefined namespace uri
498    | px == xmlXName                    = ns == xmlNamespaceXName       -- "xml" has a predefiend namespace"
499    | otherwise                         = not (isNullXName ns)          -- namespace values are not empty
500
501-- -----------------------------------------------------------------------------
502
503toNsEnv                         :: AssocList String String -> NsEnv
504toNsEnv                         = map (newXName *** newXName)
505
506-- -----------------------------------------------------------------------------
507
508-- the name and string cache
509
510data NameCache          = NC { _newXN   :: ! Int                                       -- next free name id
511                             , _xnCache :: ! (M.Map String XName)
512                             , _qnCache :: ! (M.Map (XName, XName, XName) QName)       -- we need another type than QName
513                             }                                                          -- for the key because of the unusable
514                                                                                        -- Eq instance of QName
515type ChangeNameCache r  = NameCache -> (NameCache, r)
516
517-- ------------------------------------------------------------
518
519-- | the internal cache for QNames (and name strings)
520
521theNameCache            :: IORef NameCache
522theNameCache            = unsafePerformIO (newIORef $ initialCache)
523{-# NOINLINE theNameCache #-}
524
525initialXNames           :: [XName]
526
527nullXName
528 , xmlnsNamespaceXName
529 , xmlnsXName
530 , xmlNamespaceXName
531 , xmlXName             :: XName
532
533initialXNames@
534 [ nullXName
535 , xmlnsNamespaceXName
536 , xmlnsXName
537 , xmlNamespaceXName
538 , xmlXName
539 ]                      = zipWith XN [0..] $
540                          [ ""
541                          , xmlnsNamespace
542                          , a_xmlns
543                          , xmlNamespace
544                          , a_xml
545                          ]
546
547initialQNames           :: [QName]
548
549xmlnsQN                 :: QName
550
551initialQNames@
552 [xmlnsQN]              = [QN xmlnsXName nullXName xmlnsNamespaceXName]
553
554initialCache            :: NameCache
555initialCache            = NC
556                          (length initialXNames)
557                          (M.fromList $ map (\ xn -> (unXN xn, xn)) initialXNames)
558                          (M.fromList $ map (\ qn@(QN lp px ns) -> ((lp, px, ns), qn)) initialQNames)
559
560-- ------------------------------------------------------------
561
562changeNameCache         :: NFData r => ChangeNameCache r -> r
563changeNameCache action  = unsafePerformIO changeNameCache'
564    where
565    action' c =
566      let r = action c
567      in
568       fst r `seq` r    -- eval name cache to whnf
569
570    changeNameCache' =
571      do
572      -- putStrLn "modify cache"
573      res <- atomicModifyIORef theNameCache action'
574      -- putStrLn "cache modified"
575      return res
576
577{-# NOINLINE changeNameCache #-}
578
579newXName'               :: String -> ChangeNameCache XName
580newXName' n c@(NC nxn xm qm)
581                        = case M.lookup n xm of
582                          Just xn       -> (c, xn)
583                          Nothing       -> let nxn' = nxn + 1 in
584                                           let xn   = (XN nxn n) in
585                                           let xm'  = M.insert n xn xm in
586                                           -- trace ("newXName: XN " ++ show nxn ++ " " ++ show n) $
587                                           rnf xn `seq` (NC nxn' xm' qm, xn)
588
589newQName'               :: XName -> XName -> XName -> ChangeNameCache QName
590newQName' lp px ns c@(NC nxn xm qm)
591                        = case M.lookup q' qm of
592                          Just qn       -> -- trace ("oldQName: " ++ show qn) $                 -- log evaluation sequence
593                                           (c, qn)
594                          Nothing       -> let qm'  = M.insert q' q qm in
595                                           -- trace ("newQName: " ++ show q) $                  -- log insertion of a new QName
596                                           q `seq` (NC nxn xm qm', q)
597    where
598    q'                  = (lp, px, ns)
599    q                   = QN lp px ns
600
601andThen                 :: ChangeNameCache r1 ->
602                           (r1 -> ChangeNameCache r2) -> ChangeNameCache r2
603andThen a1 a2 c0        = let (c1, r1) = a1 c0 in
604                          (a2 r1) c1
605
606newXName                :: String -> XName
607newXName n              = changeNameCache $
608                          newXName' n
609
610newQName                :: XName -> XName -> XName -> QName
611newQName lp px ns       = lp `seq` px `seq` ns `seq`            -- XNames must be evaluated, else MVar blocks
612                          ( changeNameCache $
613                            newQName' lp px ns
614                          )
615
616newLpName               :: String -> QName
617newLpName lp            = changeNameCache $
618                          newXName' lp `andThen` \ lp' ->
619                          newQName' lp' nullXName nullXName
620
621newPxName               :: String -> String -> QName
622newPxName lp px         = changeNameCache $
623                          newXName' lp `andThen` \ lp' ->
624                          newXName' px `andThen` \ px' ->
625                          newQName' lp' px' nullXName
626
627newNsName               :: String -> String -> String -> QName
628newNsName lp px ns      = changeNameCache $
629                          newXName' lp `andThen` \ lp' ->
630                          newXName' px `andThen` \ px' ->
631                          newXName' ns `andThen` \ ns' ->
632                          newQName' lp' px' ns'
633
634-----------------------------------------------------------------------------
635