1module Language.Haskell.HsColour.Anchors
2  ( insertAnchors
3  ) where
4
5import Language.Haskell.HsColour.Classify
6import Language.Haskell.HsColour.General
7import Data.List
8import Data.Char (isUpper, isLower, isDigit, ord, intToDigit)
9
10-- This is an attempt to find the first defining occurrence of an
11-- identifier (function, datatype, class) in a Haskell source file.
12-- Rather than parse the module properly, we try to get by with just
13-- a finite state automaton.  Keeping a record of identifiers we
14-- have already seen, we look at the beginning of every line to see
15-- if it starts with the right tokens to introduce a defn.  If so,
16-- we look a little bit further until we can be certain.  Then plonk
17-- (or not) an anchor at the beginning of the line.
18
19type Anchor = String
20
21-- | 'insertAnchors' places an anchor marker in the token stream before the
22--   first defining occurrence of any identifier.  Here, /before/ means
23--   immediately preceding its type signature, or preceding a (haddock)
24--   comment that comes immediately before the type signature, or failing
25--   either of those, before the first equation.
26insertAnchors :: [(TokenType,String)] -> [Either Anchor (TokenType,String)]
27insertAnchors = anchor emptyST
28
29-- looks at first token in the left-most position of each line
30-- precondition: have just seen a newline token.
31anchor :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
32anchor st s = case identifier st s of
33                Nothing -> emit st s
34                Just v  -> Left (escape v): emit (insertST v st) s
35
36-- some chars are not valid in anchor URIs: http://www.ietf.org/rfc/rfc3986
37-- NOTE: This code assumes characters are 8-bit.
38--       Ideally, it should transcode to utf8 octets first.
39escape :: String -> String
40escape = concatMap enc
41    where enc x | isDigit x
42                || isURIFragmentValid x
43                || isLower x
44                || isUpper x = [x]
45                | ord x >= 256 = [x] -- not correct, but better than nothing
46                | otherwise  = ['%',hexHi (ord x), hexLo (ord x)]
47          hexHi d = intToDigit (d`div`16)
48          hexLo d = intToDigit (d`mod`16)
49          isURIFragmentValid x = x `elem` "!$&'()*+,;=/?-._~:@"
50
51-- emit passes stuff through until the next newline has been encountered,
52-- then jumps back into the anchor function
53-- pre-condition: newlines are explicitly single tokens
54emit :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
55emit st (t@(Space,"\n"):stream) = Right t: anchor st stream
56emit st (t:stream)              = Right t: emit st stream
57emit _  []                      = []
58
59-- Given that we are at the beginning of a line, determine whether there
60-- is an identifier defined here, and if so, return it.
61-- precondition: have just seen a newline token.
62identifier ::  ST -> [(TokenType, String)] -> Maybe String
63identifier st t@((kind,v):stream) | kind`elem`[Varid,Definition] =
64    case skip stream of
65        ((Varop,v):_) | not (v`inST`st) -> Just (fix v)
66        notVarop  --  | typesig stream  -> Nothing    -- not a defn
67                      | v `inST` st     -> Nothing    -- already defined
68                      | otherwise       -> Just v
69identifier st t@((Layout,"("):stream) =
70    case stream of
71      ((Varop,v):(Layout,")"):_)
72                  --  | typesig stream  -> Nothing
73	              | v `inST` st     -> Nothing
74	              | otherwise	-> Just (fix v)
75      notVarop -> case skip (munchParens stream) of
76          ((Varop,v):_) | not (v`inST`st) -> Just (fix v)
77          _             -> Nothing
78identifier st t@((Keyword,"foreign"):stream) = Nothing -- not yet implemented
79identifier st t@((Keyword,"data"):(Space,_):(Keyword,"family"):stream)
80                                             = getConid stream
81identifier st t@((Keyword,"data"):stream)    = getConid stream
82identifier st t@((Keyword,"newtype"):stream) = getConid stream
83identifier st t@((Keyword,"type"):(Space,_):(Keyword,"family"):stream)
84                                             = getConid stream
85identifier st t@((Keyword,"type"):stream)    = getConid stream
86identifier st t@((Keyword,"class"):stream)   = getConid stream
87identifier st t@((Keyword,"instance"):stream)= getInstance stream
88identifier st t@((Comment,_):(Space,"\n"):stream) = identifier st stream
89identifier st stream = Nothing
90
91-- Is this really a type signature?  (no longer used)
92typesig :: [(TokenType,String)] -> Bool
93typesig ((Keyglyph,"::"):_)   = True
94typesig ((Varid,_):stream)    = typesig stream
95typesig ((Layout,"("):(Varop,_):(Layout,")"):stream)    = typesig stream
96typesig ((Layout,","):stream) = typesig stream
97typesig ((Space,_):stream)    = typesig stream
98typesig ((Comment,_):stream)  = typesig stream
99typesig _                     = False
100
101-- throw away everything from opening paren to matching close
102munchParens ::  [(TokenType, String)] -> [(TokenType, String)]
103munchParens =  munch (0::Int)	-- already seen open paren
104  where munch 0 ((Layout,")"):rest) = rest
105        munch n ((Layout,")"):rest) = munch (n-1) rest
106        munch n ((Layout,"("):rest) = munch (n+1) rest
107        munch n (_:rest)            = munch n rest
108        munch _ []                  = []	-- source is ill-formed
109
110-- ensure anchor name is correct for a Varop
111fix ::  String -> String
112fix ('`':v) = dropLast '`' v
113fix v       = v
114
115-- look past whitespace and comments to next "real" token
116skip ::  [(TokenType, t)] -> [(TokenType, t)]
117skip ((Space,_):stream)   = skip stream
118skip ((Comment,_):stream) = skip stream
119skip stream               = stream
120
121-- skip possible context up to and including "=>", returning next Conid token
122-- (this function is highly partial - relies on source being parse-correct)
123getConid ::  [(TokenType, String)] -> Maybe String
124getConid stream =
125    case skip stream of
126        ((Conid,c):rest) -> case context rest of
127                              ((Keyglyph,"="):_)     -> Just c
128                              ((Keyglyph,"=>"):more) ->
129                                  case skip more of
130                                      ((Conid,c'):_) -> Just c'
131                                      v -> debug v ("Conid "++c++" =>")
132                              v -> debug v ("Conid "++c++" no = or =>")
133        ((Layout,"("):rest) -> case context rest of
134                                   ((Keyglyph,"=>"):more) ->
135                                       case skip more of
136                                           ((Conid,c'):_) -> Just c'
137                                           v -> debug v ("(...) =>")
138                                   v -> debug v ("(...) no =>")
139        v -> debug v ("no Conid or (...)")
140    where debug   _   _ = Nothing
141       -- debug (s:t) c = error ("HsColour: getConid failed: "++show s
142       --                       ++"\n  in the context of: "++c)
143
144-- jump past possible class context
145context ::  [(TokenType, String)] -> [(TokenType, String)]
146context stream@((Keyglyph,"="):_) = stream
147context stream@((Keyglyph,"=>"):_) = stream
148context stream@((Keyglyph,"⇒"):_) = stream
149context (_:stream) = context stream
150context [] = []
151
152-- the anchor name for an instance is just the entire instance head, minus
153-- any extra context clause
154getInstance = Just . unwords . ("instance":) . words . concat . map snd
155              . trimContext . takeWhile (not . terminator)
156  where
157    trimContext ts = if (Keyglyph,"=>") `elem` ts
158                     ||  (Keyglyph,"⇒") `elem` ts
159                     then tail . dropWhile (`notElem`[(Keyglyph,"=>")
160                                                     ,(Keyglyph,"⇒")]) $ ts
161                     else ts
162    terminator (Keyword, _)   = True
163    terminator (Comment, _)   = True
164    terminator (Cpp,     _)   = True
165    terminator (Keyglyph,"|") = True
166    terminator (Layout,  ";") = True
167    terminator (Layout,  "{") = True
168    terminator (Layout,  "}") = True
169    terminator _              = False
170
171-- simple implementation of a string lookup table.
172-- replace this with something more sophisticated if needed.
173type ST = [String]
174
175emptyST :: ST
176emptyST = []
177
178insertST :: String -> ST -> ST
179insertST k st = insert k st
180
181inST :: String -> ST -> Bool
182inST k st = k `elem` st
183