1{-# LANGUAGE DeriveDataTypeable #-} 2 3-- ------------------------------------------------------------ 4 5{- | 6 Module : Data.Atom 7 Copyright : Copyright (C) 2008 Uwe Schmidt 8 License : MIT 9 10 Maintainer : Uwe Schmidt (uwe\@fh-wedel.de) 11 Stability : experimental 12 Portability: non-portable 13 14 Unique Atoms generated from Strings and 15 managed as flyweights 16 17 Data.Atom can be used for caching and storage optimisation 18 of frequently used strings. An @Atom@ is constructed from a @String@. 19 For two equal strings the identical atom is returned. 20 21 This module can be used for optimizing memory usage when working with 22 strings or names. Many applications use data types like 23 @Map String SomeAttribute@ where a rather fixed set of keys is used. 24 Especially XML applications often work with a limited set of element and attribute names. 25 For these applications it becomes more memory efficient when working with types like 26 @Map Atom SomeAttribute@ and convert the keys into atoms before operating 27 on such a map. 28 29 Internally this module manages a map of atoms. The atoms are internally represented 30 by @ByteString@s. When creating a new atom from a string, the string is first converted 31 into an UTF8 @Word8@ sequence, which is packed into a @ByteString@. This @ByteString@ is looked 32 up in the table of atoms. If it is already there, the value in the map is used as atom, else 33 the new @ByteString@ is inserted into the map. 34 35 Of course the implementation of this name cache uses @unsavePerformIO@. 36 The global cache is managed by ue of an @IORef@ and atomicModifyIORef. 37 38 The following laws hold for atoms 39 40 > 41 > s == t => newAtom s == newAtom t 42 > s `compare` t => newAtom s `compare` newAtom t 43 > show . newAtom == id 44 45 Equality test for @Atom@s runs in /O(1)/, it is just a pointer comarison. 46 The @Ord@ comparisons have the same runtime like the @ByteString@ comparisons. 47 Internally there is an UTF8 comparison, but UTF8 encoding preserves the total order. 48 49 Warning: The internal cache never shrinks during execution. So using it in a 50 undisciplined way can lead to memory leaks. 51-} 52 53----------------------------------------------------------------------------- 54 55module Data.Atom ( 56 -- * Atom objects 57 Atom, -- instance (Eq, Ord, Read, Show) 58 newAtom, -- :: String -> Atom 59 share -- :: String -> String 60 ) where 61 62import Control.DeepSeq 63 64import Data.ByteString (ByteString, pack, unpack) 65import Data.ByteString.Internal (c2w, toForeignPtr, w2c) 66import Data.IORef 67import qualified Data.Map as M 68import Data.String.Unicode (unicodeToUtf8) 69import Data.String.UTF8Decoding (decodeUtf8) 70import Data.Typeable 71 72import System.IO.Unsafe (unsafePerformIO) 73 74-- ------------------------------------------------------------ 75 76type Atoms = M.Map ByteString ByteString 77 78newtype Atom = A { bs :: ByteString } 79 deriving (Typeable) 80 81-- ------------------------------------------------------------ 82 83-- | the internal cache for the strings 84 85theAtoms :: IORef Atoms 86theAtoms = unsafePerformIO (newIORef M.empty) 87{-# NOINLINE theAtoms #-} 88 89-- | insert a bytestring into the atom cache 90 91insertAtom :: ByteString -> Atoms -> (Atoms, Atom) 92insertAtom s m = maybe (M.insert s s m, A s) 93 (\ s' -> (m, A s')) 94 . 95 M.lookup s $ m 96 97-- | creation of an @Atom@ from a @String@ 98 99newAtom :: String -> Atom 100newAtom = unsafePerformIO . newAtom' 101{-# NOINLINE newAtom #-} 102 103-- | The internal operation running in the IO monad 104newAtom' :: String -> IO Atom 105newAtom' s = do 106 -- putStrLn "insert atom into cache" 107 res <- atomicModifyIORef theAtoms insert 108 -- putStrLn "atom cache updated" 109 return res 110 where 111 insert m = let r = insertAtom (pack. map c2w . unicodeToUtf8 $ s) m 112 in 113 fst r `seq` r 114 115-- | Insert a @String@ into the atom cache and convert the atom back into a @String@. 116-- 117-- locically @share == id@ holds, but internally equal strings share the same memory. 118 119share :: String -> String 120share = show . newAtom 121 122instance Eq Atom where 123 a1 == a2 = fp1 == fp2 124 where 125 (fp1, _, _) = toForeignPtr . bs $ a1 126 (fp2, _, _) = toForeignPtr . bs $ a2 127 128instance Ord Atom where 129 compare a1 a2 130 | a1 == a2 = EQ 131 | otherwise = compare (bs a1) (bs a2) 132 133instance Read Atom where 134 readsPrec p str = [ (newAtom x, y) | (x, y) <- readsPrec p str ] 135 136instance Show Atom where 137 show = fst . decodeUtf8 . map w2c . unpack . bs 138 -- show = show . toForeignPtr . bs -- for debug only 139 140instance NFData Atom where 141 rnf x = seq x () 142 143----------------------------------------------------------------------------- 144