1module Language.Haskell.HsColour.Anchors 2 ( insertAnchors 3 ) where 4 5import Language.Haskell.HsColour.Classify 6import Language.Haskell.HsColour.General 7import Data.List 8import Data.Char (isUpper, isLower, isDigit, ord, intToDigit) 9 10-- This is an attempt to find the first defining occurrence of an 11-- identifier (function, datatype, class) in a Haskell source file. 12-- Rather than parse the module properly, we try to get by with just 13-- a finite state automaton. Keeping a record of identifiers we 14-- have already seen, we look at the beginning of every line to see 15-- if it starts with the right tokens to introduce a defn. If so, 16-- we look a little bit further until we can be certain. Then plonk 17-- (or not) an anchor at the beginning of the line. 18 19type Anchor = String 20 21-- | 'insertAnchors' places an anchor marker in the token stream before the 22-- first defining occurrence of any identifier. Here, /before/ means 23-- immediately preceding its type signature, or preceding a (haddock) 24-- comment that comes immediately before the type signature, or failing 25-- either of those, before the first equation. 26insertAnchors :: [(TokenType,String)] -> [Either Anchor (TokenType,String)] 27insertAnchors = anchor emptyST 28 29-- looks at first token in the left-most position of each line 30-- precondition: have just seen a newline token. 31anchor :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)] 32anchor st s = case identifier st s of 33 Nothing -> emit st s 34 Just v -> Left (escape v): emit (insertST v st) s 35 36-- some chars are not valid in anchor URIs: http://www.ietf.org/rfc/rfc3986 37-- NOTE: This code assumes characters are 8-bit. 38-- Ideally, it should transcode to utf8 octets first. 39escape :: String -> String 40escape = concatMap enc 41 where enc x | isDigit x 42 || isURIFragmentValid x 43 || isLower x 44 || isUpper x = [x] 45 | ord x >= 256 = [x] -- not correct, but better than nothing 46 | otherwise = ['%',hexHi (ord x), hexLo (ord x)] 47 hexHi d = intToDigit (d`div`16) 48 hexLo d = intToDigit (d`mod`16) 49 isURIFragmentValid x = x `elem` "!$&'()*+,;=/?-._~:@" 50 51-- emit passes stuff through until the next newline has been encountered, 52-- then jumps back into the anchor function 53-- pre-condition: newlines are explicitly single tokens 54emit :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)] 55emit st (t@(Space,"\n"):stream) = Right t: anchor st stream 56emit st (t:stream) = Right t: emit st stream 57emit _ [] = [] 58 59-- Given that we are at the beginning of a line, determine whether there 60-- is an identifier defined here, and if so, return it. 61-- precondition: have just seen a newline token. 62identifier :: ST -> [(TokenType, String)] -> Maybe String 63identifier st t@((kind,v):stream) | kind`elem`[Varid,Definition] = 64 case skip stream of 65 ((Varop,v):_) | not (v`inST`st) -> Just (fix v) 66 notVarop -- | typesig stream -> Nothing -- not a defn 67 | v `inST` st -> Nothing -- already defined 68 | otherwise -> Just v 69identifier st t@((Layout,"("):stream) = 70 case stream of 71 ((Varop,v):(Layout,")"):_) 72 -- | typesig stream -> Nothing 73 | v `inST` st -> Nothing 74 | otherwise -> Just (fix v) 75 notVarop -> case skip (munchParens stream) of 76 ((Varop,v):_) | not (v`inST`st) -> Just (fix v) 77 _ -> Nothing 78identifier st t@((Keyword,"foreign"):stream) = Nothing -- not yet implemented 79identifier st t@((Keyword,"data"):(Space,_):(Keyword,"family"):stream) 80 = getConid stream 81identifier st t@((Keyword,"data"):stream) = getConid stream 82identifier st t@((Keyword,"newtype"):stream) = getConid stream 83identifier st t@((Keyword,"type"):(Space,_):(Keyword,"family"):stream) 84 = getConid stream 85identifier st t@((Keyword,"type"):stream) = getConid stream 86identifier st t@((Keyword,"class"):stream) = getConid stream 87identifier st t@((Keyword,"instance"):stream)= getInstance stream 88identifier st t@((Comment,_):(Space,"\n"):stream) = identifier st stream 89identifier st stream = Nothing 90 91-- Is this really a type signature? (no longer used) 92typesig :: [(TokenType,String)] -> Bool 93typesig ((Keyglyph,"::"):_) = True 94typesig ((Varid,_):stream) = typesig stream 95typesig ((Layout,"("):(Varop,_):(Layout,")"):stream) = typesig stream 96typesig ((Layout,","):stream) = typesig stream 97typesig ((Space,_):stream) = typesig stream 98typesig ((Comment,_):stream) = typesig stream 99typesig _ = False 100 101-- throw away everything from opening paren to matching close 102munchParens :: [(TokenType, String)] -> [(TokenType, String)] 103munchParens = munch (0::Int) -- already seen open paren 104 where munch 0 ((Layout,")"):rest) = rest 105 munch n ((Layout,")"):rest) = munch (n-1) rest 106 munch n ((Layout,"("):rest) = munch (n+1) rest 107 munch n (_:rest) = munch n rest 108 munch _ [] = [] -- source is ill-formed 109 110-- ensure anchor name is correct for a Varop 111fix :: String -> String 112fix ('`':v) = dropLast '`' v 113fix v = v 114 115-- look past whitespace and comments to next "real" token 116skip :: [(TokenType, t)] -> [(TokenType, t)] 117skip ((Space,_):stream) = skip stream 118skip ((Comment,_):stream) = skip stream 119skip stream = stream 120 121-- skip possible context up to and including "=>", returning next Conid token 122-- (this function is highly partial - relies on source being parse-correct) 123getConid :: [(TokenType, String)] -> Maybe String 124getConid stream = 125 case skip stream of 126 ((Conid,c):rest) -> case context rest of 127 ((Keyglyph,"="):_) -> Just c 128 ((Keyglyph,"=>"):more) -> 129 case skip more of 130 ((Conid,c'):_) -> Just c' 131 v -> debug v ("Conid "++c++" =>") 132 v -> debug v ("Conid "++c++" no = or =>") 133 ((Layout,"("):rest) -> case context rest of 134 ((Keyglyph,"=>"):more) -> 135 case skip more of 136 ((Conid,c'):_) -> Just c' 137 v -> debug v ("(...) =>") 138 v -> debug v ("(...) no =>") 139 v -> debug v ("no Conid or (...)") 140 where debug _ _ = Nothing 141 -- debug (s:t) c = error ("HsColour: getConid failed: "++show s 142 -- ++"\n in the context of: "++c) 143 144-- jump past possible class context 145context :: [(TokenType, String)] -> [(TokenType, String)] 146context stream@((Keyglyph,"="):_) = stream 147context stream@((Keyglyph,"=>"):_) = stream 148context stream@((Keyglyph,"⇒"):_) = stream 149context (_:stream) = context stream 150context [] = [] 151 152-- the anchor name for an instance is just the entire instance head, minus 153-- any extra context clause 154getInstance = Just . unwords . ("instance":) . words . concat . map snd 155 . trimContext . takeWhile (not . terminator) 156 where 157 trimContext ts = if (Keyglyph,"=>") `elem` ts 158 || (Keyglyph,"⇒") `elem` ts 159 then tail . dropWhile (`notElem`[(Keyglyph,"=>") 160 ,(Keyglyph,"⇒")]) $ ts 161 else ts 162 terminator (Keyword, _) = True 163 terminator (Comment, _) = True 164 terminator (Cpp, _) = True 165 terminator (Keyglyph,"|") = True 166 terminator (Layout, ";") = True 167 terminator (Layout, "{") = True 168 terminator (Layout, "}") = True 169 terminator _ = False 170 171-- simple implementation of a string lookup table. 172-- replace this with something more sophisticated if needed. 173type ST = [String] 174 175emptyST :: ST 176emptyST = [] 177 178insertST :: String -> ST -> ST 179insertST k st = insert k st 180 181inST :: String -> ST -> Bool 182inST k st = k `elem` st 183