1{-# LANGUAGE DeriveDataTypeable #-} 2 3-- ------------------------------------------------------------ 4 5{- | 6 Module : Text.XML.HXT.DOM.QualifiedName 7 Copyright : Copyright (C) 2011 Uwe Schmidt 8 License : MIT 9 10 Maintainer : Uwe Schmidt (uwe@fh-wedel.de) 11 Stability : stable 12 Portability: portable 13 14 The types and functions for qualified names 15 16-} 17 18-- ------------------------------------------------------------ 19 20module Text.XML.HXT.DOM.QualifiedName 21 ( QName 22 , XName(unXN) 23 , NsEnv 24 25 , mkQName 26 , mkName 27 , mkNsName 28 , mkSNsName 29 , mkPrefixLocalPart 30 31 , equivQName 32 , equivUri 33 , equalQNameBy 34 35 , namePrefix 36 , localPart 37 , namespaceUri 38 39 , newXName 40 , nullXName 41 , isNullXName 42 , newQName 43 44 , mkQName' 45 , namePrefix' 46 , localPart' 47 , namespaceUri' 48 49 , setNamePrefix' 50 , setLocalPart' 51 , setNamespaceUri' 52 53 , qualifiedName 54 , qualifiedName' 55 , universalName 56 , universalUri 57 , buildUniversalName 58 59 , normalizeNsUri 60 61 , setNamespace -- namespace related functions 62 , isNCName 63 , isWellformedQualifiedName 64 , isWellformedQName 65 , isWellformedNSDecl 66 , isWellformedNameSpaceName 67 , isNameSpaceName 68 , isDeclaredNamespace 69 70 , xmlNamespaceXName 71 , xmlXName 72 , xmlnsNamespaceXName 73 , xmlnsXName 74 , xmlnsQN 75 76 , toNsEnv 77 ) 78 79where 80 81{- 82import Debug.Trace 83 -} 84 85import Control.Arrow ((***)) 86 87import Control.DeepSeq 88import Control.FlatSeq 89 90import Data.AssocList 91import Data.Binary 92import Data.Char (toLower) 93import Data.IORef 94import Data.List (isPrefixOf) 95import qualified Data.Map as M 96import Data.Typeable 97 98import System.IO.Unsafe (unsafePerformIO) 99 100import Text.XML.HXT.DOM.XmlKeywords (a_xml, a_xmlns, 101 xmlNamespace, 102 xmlnsNamespace) 103 104import Data.Char.Properties.XMLCharProps (isXmlNCNameChar, 105 isXmlNCNameStartChar) 106 107-- ----------------------------------------------------------------------------- 108 109-- | XML names are represented by Strings, but these strings do not mix up with normal strings. 110-- Names are always reduced to normal form, and they are stored internally in a name cache 111-- for sharing equal names by the same data structure 112 113data XName = XN { _idXN :: ! Int -- for optimization of equality test, see Eq instance 114 , unXN :: String 115 } 116 deriving (Typeable) 117 118instance Eq XName where 119 (XN id1 _) == (XN id2 _) = id1 == id2 120 121instance Ord XName where 122 compare (XN _ n1) (XN _ n2) = compare n1 n2 123{- 124instance Read XName where 125 readsPrec p str = [ (newXName x, y) | (x, y) <- readsPrec p str ] 126 127instance Show XName where 128 show (XN _ s) = show s 129-} 130instance NFData XName where 131 rnf (XN _ s) = rnf s 132 133instance WNFData XName where 134 rwnf (XN _ s) = rnf s 135 136instance Binary XName where 137 put (XN _ s) = put s 138 get = do 139 s <- get 140 return $! newXName s 141 142----------------------------------------------------------------------------- 143 144-- | 145-- Type for the namespace association list, used when propagating namespaces by 146-- modifying the 'QName' values in a tree 147 148type NsEnv = AssocList XName XName 149 150----------------------------------------------------------------------------- 151 152-- | 153-- Namespace support for element and attribute names. 154-- 155-- A qualified name consists of a name prefix, a local name 156-- and a namespace uri. 157-- All modules, which are not namespace aware, use only the 'localPart' component. 158-- When dealing with namespaces, the document tree must be processed by 'Text.XML.HXT.Arrow.Namespace.propagateNamespaces' 159-- to split names of structure \"prefix:localPart\" and label the name with the apropriate namespace uri 160 161data QName = QN { localPart' :: ! XName 162 , namePrefix' :: ! XName 163 , namespaceUri' :: ! XName 164 } 165 deriving (Typeable) 166 167-- ----------------------------------------------------------------------------- 168 169-- | Two QNames are equal if (1. case) namespaces are both empty and the qualified names 170-- (prefix:localpart) are the same or (2. case) namespaces are set and namespaces and 171-- local parts are equal 172 173instance Eq QName where 174 (QN lp1 px1 ns1) == (QN lp2 px2 ns2) 175 | ns1 /= ns2 = False -- namespaces are set and differ 176 | not (isNullXName ns1) = lp1 == lp2 -- namespaces are set and are equal: local parts must be equal 177 | otherwise = lp1 == lp2 -- no namespaces are set: local parts must be equal 178 && -- and prefixes are not set or they are equal 179 px1 == px2 180 181instance Ord QName where 182 compare (QN lp1 px1 ns1) (QN lp2 px2 ns2) 183 | isNullXName ns1 && isNullXName ns2 -- no namespaces set: px is significant 184 = compare (px1, lp1) (px2, lp2) 185 | otherwise -- namespace aware cmp: ns is significant, px is irrelevant 186 = compare (lp1, ns1) (lp2, ns2) 187 188instance NFData QName where 189 rnf x = seq x () 190 191instance WNFData QName 192 193instance Show QName where 194 show = showQN 195 196-- ----------------------------------------------------------------------------- 197 198instance Binary QName where 199 put (QN lp px ns) = put (unXN px) >> 200 put (unXN lp) >> 201 put (unXN ns) 202 get = do 203 px <- get 204 lp <- get 205 ns <- get 206 return $! newNsName lp px ns 207 -- ^^ 208 -- strict apply !!! 209 -- build the QNames strict, else the name sharing optimization will not be in effect 210 211-- ----------------------------------------------------------------------------- 212 213isNullXName :: XName -> Bool 214isNullXName = (== nullXName) 215{-# INLINE isNullXName #-} 216 217namePrefix :: QName -> String 218namePrefix = unXN . namePrefix' 219{-# INLINE namePrefix #-} 220 221localPart :: QName -> String 222localPart = unXN . localPart' 223{-# INLINE localPart #-} 224 225namespaceUri :: QName -> String 226namespaceUri = unXN . namespaceUri' 227{-# INLINE namespaceUri #-} 228 229-- ------------------------------------------------------------ 230 231-- | set name prefix 232 233setNamespaceUri' :: XName -> QName -> QName 234setNamespaceUri' ns (QN lp px _ns) = newQName lp px ns 235 236-- | set local part 237 238setLocalPart' :: XName -> QName -> QName 239setLocalPart' lp (QN _lp px ns) = newQName lp px ns 240 241-- | set name prefix 242 243setNamePrefix' :: XName -> QName -> QName 244setNamePrefix' px (QN lp _px ns) = newQName lp px ns 245 246-- ------------------------------------------------------------ 247 248-- | 249-- builds the full name \"prefix:localPart\", if prefix is not null, else the local part is the result 250 251qualifiedName :: QName -> String 252qualifiedName (QN lp px _ns) 253 | isNullXName px = unXN lp 254 | otherwise = unXN px ++ (':' : unXN lp) 255 256-- | functional list version of qualifiedName used in xshow 257 258qualifiedName' :: QName -> String -> String 259qualifiedName' (QN lp px _ns) 260 | isNullXName px = (unXN lp ++) 261 | otherwise = (unXN px ++) . (':' :) . (unXN lp ++) 262 263-- | 264-- builds the \"universal\" name, that is the namespace uri surrounded with \"{\" and \"}\" followed by the local part 265-- (specialisation of 'buildUniversalName') 266 267universalName :: QName -> String 268universalName = buildUniversalName (\ ns lp -> '{' : ns ++ '}' : lp) 269 270-- | 271-- builds an \"universal\" uri, that is the namespace uri followed by the local part. This is usefull for RDF applications, 272-- where the subject, predicate and object often are concatenated from namespace uri and local part 273-- (specialisation of 'buildUniversalName') 274 275universalUri :: QName -> String 276universalUri = buildUniversalName (++) 277 278-- | 279-- builds a string from the namespace uri and the local part. If the namespace uri is empty, the local part is returned, else 280-- namespace uri and local part are combined with the combining function given by the first parameter 281 282buildUniversalName :: (String -> String -> String) -> QName -> String 283buildUniversalName bf n@(QN _lp _px ns) 284 | isNullXName ns = localPart n 285 | otherwise = unXN ns `bf` localPart n 286 287showQN :: QName -> String 288showQN n 289 | null ns = show $ qualifiedName n 290 | otherwise = show $ "{" ++ ns ++ "}" ++ qualifiedName n 291 where 292 ns = namespaceUri n 293 294-- ------------------------------------------------------------ 295-- 296-- internal XName functions 297 298mkQName' :: XName -> XName -> XName -> QName 299mkQName' px lp ns = newQName lp px ns 300{-# DEPRECATED mkQName' "use newQName instead with lp px ns param seq " #-} 301 302-- ------------------------------------------------------------ 303 304-- | 305-- constructs a simple name, with prefix and localPart but without a namespace uri. 306-- 307-- see also 'mkQName', 'mkName' 308 309mkPrefixLocalPart :: String -> String -> QName 310mkPrefixLocalPart px lp 311 | null px = newLpName lp 312 | otherwise = newPxName lp px 313 314-- | 315-- constructs a simple, namespace unaware name. 316-- If the name is in @prefix:localpart@ form and the prefix is not empty 317-- the name is split internally into 318-- a prefix and a local part. 319 320mkName :: String -> QName 321mkName n 322 | (':' `elem` n) 323 && 324 not (null px) -- more restrictive: isWellformedQualifiedName n 325 = newPxName lp px 326 | otherwise = newLpName n 327 where 328 (px, (_ : lp)) = span (/= ':') n 329 330-- | 331-- constructs a complete qualified name with 'namePrefix', 'localPart' and 'namespaceUri'. 332-- This function can be used to build not wellformed prefix:localpart names. 333-- The XPath module uses wildcard names like @xxx:*@. These must be build with 'mkQName' 334-- and not with mkName. 335 336mkQName :: String -> String -> String -> QName 337mkQName px lp ns 338 | null ns = mkPrefixLocalPart px lp 339 | otherwise = newNsName lp px ns 340 341-- ------------------------------------------------------------ 342 343-- | 344-- old name for 'mkName' 345 346mkSNsName :: String -> QName 347mkSNsName = mkName 348{-# DEPRECATED mkSNsName "use mkName instead" #-} 349 350-- | 351-- constructs a simple, namespace aware name, with prefix:localPart as first parameter, 352-- namspace uri as second. 353-- 354-- see also 'mkName', 'mkPrefixLocalPart' 355 356{- 357mkNsName :: String -> String -> QName 358mkNsName n ns = trace ("mkNsName: " ++ show n ++ " " ++ show ns) (mkNsName' n ns) 359-} 360 361mkNsName :: String -> String -> QName 362mkNsName n ns 363 | null ns = qn 364 | otherwise = setNamespaceUri' ns' qn 365 where 366 qn = mkName n 367 ns' = newXName ns 368 369-- ------------------------------------------------------------ 370 371-- | Equivalent QNames are defined as follows: The URIs are normalized before comparison. 372-- Comparison is done with 'equalQNameBy' and 'equivUri' 373 374equivQName :: QName -> QName -> Bool 375equivQName = equalQNameBy equivUri 376 377-- | Comparison of normalized namespace URIs using 'normalizeNsUri' 378 379equivUri :: String -> String -> Bool 380equivUri x y = normalizeNsUri x == normalizeNsUri y 381 382-- | Sometimes a weaker equality relation than 'equalQName' is appropriate, e.g no case significance in names, ... 383-- a name normalization function can be applied to the strings before comparing. Called by 'equalQName' and 384-- 'equivQName' 385 386equalQNameBy :: (String -> String -> Bool) -> QName -> QName -> Bool 387equalQNameBy equiv q1 q2 = localPart q1 == localPart q2 388 && 389 (namespaceUri q1 `equiv` namespaceUri q2) 390 391-- | Normalization of URIs: Normalization is done by conversion into lowercase letters. A trailing \"\/\" is ignored 392 393normalizeNsUri :: String -> String 394normalizeNsUri = map toLower . stripSlash 395 where 396 stripSlash "" = "" 397 stripSlash s 398 | last s == '/' = init s 399 | otherwise = s 400 401-- ----------------------------------------------------------------------------- 402 403-- Namespace predicates 404 405-- | 406-- Compute the name prefix and the namespace uri for a qualified name. 407-- 408-- This function does not test whether the name is a wellformed qualified name. 409-- see Namespaces in XML Rule [6] to [8]. Error checking is done with separate functions, 410-- see 'isWellformedQName' and 'isWellformedQualifiedName' for error checking. 411 412setNamespace :: NsEnv -> QName -> QName 413setNamespace env n@(QN lp px _ns) 414 = maybe n (\ ns -> newQName lp px ns) . lookup px $ env 415 416-- ----------------------------------------------------------------------------- 417-- 418 419-- | 420-- test for wellformed NCName, rule [4] XML Namespaces 421 422isNCName :: String -> Bool 423isNCName [] = False 424isNCName n = and ( zipWith ($) 425 (isXmlNCNameStartChar : repeat isXmlNCNameChar) 426 n 427 ) 428 429-- | 430-- test for wellformed QName, rule [6] XML Namespaces 431-- predicate is used in filter 'valdateNamespaces'. 432 433isWellformedQualifiedName :: String -> Bool 434isWellformedQualifiedName s 435 | null lp = isNCName px 436 | otherwise = isNCName px && isNCName (tail lp) 437 where 438 (px, lp) = span (/= ':') s 439 440-- | 441-- test for wellformed QName values. 442-- A QName is wellformed, if the local part is a NCName, the namePrefix, if not empty, is also a NCName. 443-- predicate is used in filter 'valdateNamespaces'. 444 445isWellformedQName :: QName -> Bool 446isWellformedQName (QN lp px _ns) 447 = (isNCName . unXN) lp -- rule [8] XML Namespaces 448 && 449 ( isNullXName px 450 || 451 (isNCName . unXN) px -- rule [7] XML Namespaces 452 ) 453 454-- | 455-- test whether an attribute name is a namesapce declaration name. 456-- If this is not the case True is the result, else 457-- the name must be a well formed namespace name: 458-- All namespace prefixes starting with \"xml\" are reserved for XML related definitions. 459-- predicate is used in filter 'valdateNamespaces'. 460 461isWellformedNSDecl :: QName -> Bool 462isWellformedNSDecl n 463 = not (isNameSpaceName n) 464 || 465 isWellformedNameSpaceName n 466 467-- | 468-- test for a namespace name to be well formed 469 470isWellformedNameSpaceName :: QName -> Bool 471isWellformedNameSpaceName n@(QN lp px _ns) 472 | isNullXName px = lp == xmlnsXName 473 | otherwise = px == xmlnsXName 474 && 475 not (null lp') 476 && 477 not (a_xml `isPrefixOf` lp') 478 where 479 lp' = localPart n 480 481 482-- | 483-- test whether a name is a namespace declaration attribute name 484 485isNameSpaceName :: QName -> Bool 486isNameSpaceName (QN lp px _ns) 487 | isNullXName px = lp == xmlnsXName 488 | otherwise = px == xmlnsXName 489 490-- | 491-- 492-- predicate is used in filter 'valdateNamespaces'. 493 494isDeclaredNamespace :: QName -> Bool 495isDeclaredNamespace (QN _lp px ns) 496 | isNullXName px = True -- no namespace used 497 | px == xmlnsXName = ns == xmlnsNamespaceXName -- "xmlns" has a predefined namespace uri 498 | px == xmlXName = ns == xmlNamespaceXName -- "xml" has a predefiend namespace" 499 | otherwise = not (isNullXName ns) -- namespace values are not empty 500 501-- ----------------------------------------------------------------------------- 502 503toNsEnv :: AssocList String String -> NsEnv 504toNsEnv = map (newXName *** newXName) 505 506-- ----------------------------------------------------------------------------- 507 508-- the name and string cache 509 510data NameCache = NC { _newXN :: ! Int -- next free name id 511 , _xnCache :: ! (M.Map String XName) 512 , _qnCache :: ! (M.Map (XName, XName, XName) QName) -- we need another type than QName 513 } -- for the key because of the unusable 514 -- Eq instance of QName 515type ChangeNameCache r = NameCache -> (NameCache, r) 516 517-- ------------------------------------------------------------ 518 519-- | the internal cache for QNames (and name strings) 520 521theNameCache :: IORef NameCache 522theNameCache = unsafePerformIO (newIORef $ initialCache) 523{-# NOINLINE theNameCache #-} 524 525initialXNames :: [XName] 526 527nullXName 528 , xmlnsNamespaceXName 529 , xmlnsXName 530 , xmlNamespaceXName 531 , xmlXName :: XName 532 533initialXNames@ 534 [ nullXName 535 , xmlnsNamespaceXName 536 , xmlnsXName 537 , xmlNamespaceXName 538 , xmlXName 539 ] = zipWith XN [0..] $ 540 [ "" 541 , xmlnsNamespace 542 , a_xmlns 543 , xmlNamespace 544 , a_xml 545 ] 546 547initialQNames :: [QName] 548 549xmlnsQN :: QName 550 551initialQNames@ 552 [xmlnsQN] = [QN xmlnsXName nullXName xmlnsNamespaceXName] 553 554initialCache :: NameCache 555initialCache = NC 556 (length initialXNames) 557 (M.fromList $ map (\ xn -> (unXN xn, xn)) initialXNames) 558 (M.fromList $ map (\ qn@(QN lp px ns) -> ((lp, px, ns), qn)) initialQNames) 559 560-- ------------------------------------------------------------ 561 562changeNameCache :: NFData r => ChangeNameCache r -> r 563changeNameCache action = unsafePerformIO changeNameCache' 564 where 565 action' c = 566 let r = action c 567 in 568 fst r `seq` r -- eval name cache to whnf 569 570 changeNameCache' = 571 do 572 -- putStrLn "modify cache" 573 res <- atomicModifyIORef theNameCache action' 574 -- putStrLn "cache modified" 575 return res 576 577{-# NOINLINE changeNameCache #-} 578 579newXName' :: String -> ChangeNameCache XName 580newXName' n c@(NC nxn xm qm) 581 = case M.lookup n xm of 582 Just xn -> (c, xn) 583 Nothing -> let nxn' = nxn + 1 in 584 let xn = (XN nxn n) in 585 let xm' = M.insert n xn xm in 586 -- trace ("newXName: XN " ++ show nxn ++ " " ++ show n) $ 587 rnf xn `seq` (NC nxn' xm' qm, xn) 588 589newQName' :: XName -> XName -> XName -> ChangeNameCache QName 590newQName' lp px ns c@(NC nxn xm qm) 591 = case M.lookup q' qm of 592 Just qn -> -- trace ("oldQName: " ++ show qn) $ -- log evaluation sequence 593 (c, qn) 594 Nothing -> let qm' = M.insert q' q qm in 595 -- trace ("newQName: " ++ show q) $ -- log insertion of a new QName 596 q `seq` (NC nxn xm qm', q) 597 where 598 q' = (lp, px, ns) 599 q = QN lp px ns 600 601andThen :: ChangeNameCache r1 -> 602 (r1 -> ChangeNameCache r2) -> ChangeNameCache r2 603andThen a1 a2 c0 = let (c1, r1) = a1 c0 in 604 (a2 r1) c1 605 606newXName :: String -> XName 607newXName n = changeNameCache $ 608 newXName' n 609 610newQName :: XName -> XName -> XName -> QName 611newQName lp px ns = lp `seq` px `seq` ns `seq` -- XNames must be evaluated, else MVar blocks 612 ( changeNameCache $ 613 newQName' lp px ns 614 ) 615 616newLpName :: String -> QName 617newLpName lp = changeNameCache $ 618 newXName' lp `andThen` \ lp' -> 619 newQName' lp' nullXName nullXName 620 621newPxName :: String -> String -> QName 622newPxName lp px = changeNameCache $ 623 newXName' lp `andThen` \ lp' -> 624 newXName' px `andThen` \ px' -> 625 newQName' lp' px' nullXName 626 627newNsName :: String -> String -> String -> QName 628newNsName lp px ns = changeNameCache $ 629 newXName' lp `andThen` \ lp' -> 630 newXName' px `andThen` \ px' -> 631 newXName' ns `andThen` \ ns' -> 632 newQName' lp' px' ns' 633 634----------------------------------------------------------------------------- 635