1-- A rather crude asm parser.
2--
3--
4-- we only handle a subset of AT&T assembly
5-- right now.  This is what gcc and clang can
6-- emit.  For clang using llvm-ir might be
7-- even better.  For gcc gimple if that can
8-- be consumed reliably somehow.
9--
10-- For now we'll rely on the at&t assembly
11-- to be sufficient for constants.
12--
13
14
15module ATTParser where
16
17import Control.Applicative ((<|>))
18import Data.Word (Word32, Word64)
19import Data.Int (Int64)
20import Data.Char (isDigit, isSpace)
21import Data.Bits (shiftL, shiftR, (.|.))
22import Data.Maybe (fromMaybe)
23
24data Inst = Ident String
25          | Long Word32
26          | Quad Word64
27          | Ref String
28          | Ascii String
29          deriving Show
30
31mkLong :: Word32 -> Inst
32mkLong = Long
33mkQuad :: Word64 -> Inst
34mkQuad = Quad
35-- | turn @x@ and @(x)@ into @Ref x@.
36-- The (x) syntax can be found in mips assembly.
37mkRef :: String -> Inst
38mkRef ('(':r) | (')':r') <- reverse r = Ref $ reverse r'
39mkRef r = Ref r
40
41mkAscii :: String -> Inst
42mkAscii = Ascii
43
44type ASM = [(String, Inst)]
45
46isIdent :: Inst -> Bool
47isIdent (Ident _) = True
48isIdent _ = False
49
50trim :: String -> String
51trim = reverse . dropWhile (`elem` " \t") . reverse . dropWhile (`elem` " \t")
52-- | generalized @words@.
53words' :: (a -> Bool) -> [a] -> [[a]]
54words' p s = case dropWhile p s of
55             [] -> []
56             s' -> w : words' p s''
57                   where (w, s'') = break p s'
58
59isNumber :: String -> Bool
60isNumber ('-':x) = all isDigit x
61isNumber ('+':x) = all isDigit x
62isNumber x       = all isDigit x
63
64-- | process the assembly instructions, filtering out
65-- identifiers and constant values.
66preprocess :: String -> [Inst]
67preprocess [] = []
68preprocess ('\t':attr) = let (h, t) = break isSpace attr
69                         in case h:words' (=='\t') t of
70                         -- 8 byte values
71                         (".quad":x:_) | isNumber (w x) -> [mkQuad $ read (w x)]
72                                       | otherwise      -> [mkRef  $ (w x)]
73                         (".xword":x:_)| isNumber (w x) -> [mkQuad $ read (w x)]
74                                       | otherwise      -> [mkRef  $ (w x)]
75                         (".8byte":x:_)| isNumber (w x) -> [mkQuad $ read (w x)]
76                                       | otherwise      -> [mkRef  $ (w x)]
77                         ("data8":x:_) | isNumber (w x) -> [mkQuad $ read (w x)]
78                                       | otherwise      -> [mkRef  $ (w x)]
79
80                         -- 4 byte values
81                         (".long":x:_) | isNumber (w x) -> [mkLong $ read (w x)]
82                                       | otherwise      -> [mkRef  $ (w x)]
83                         (".word":x:_) | isNumber (w x) -> [mkLong $ read (w x)]
84                                       | otherwise      -> [mkRef  $ (w x)]
85                         (".4byte":x:_)| isNumber (w x) -> [mkLong $ read (w x)]
86                                       | otherwise      -> [mkRef  $ (w x)]
87
88                         (".space":x:_)| (w x) == "4"   -> [mkLong 0]
89                                       | (w x) == "8"   -> [mkQuad 0]
90                         (".skip":x:_) | (w x) == "4"   -> [mkLong 0]
91                                       | (w x) == "8"   -> [mkQuad 0]
92
93                         (".ascii":x:_)             -> [mkAscii $ read x]
94                         (".asciz":x:_)             -> [mkAscii $ read x ++ "\0"]
95                         -- found on nios, sh4, alpha, mk68k; all without \0.
96                         (".string":x:_)            -> [mkAscii $ read x ++ "\0"]
97                         -- found on hppa
98                         (".stringz":x:_)           -> [mkAscii $ read x ++ "\0"]
99                         -- ia64
100                         ("stringz":x:_)            -> [mkAscii $ read x ++ "\0"]
101                         _                          -> []
102  where w = head . words
103preprocess ('.':'z':'e':'r':'o':'f':'i':'l':'l':' ':x) = case words' (==',') x of
104      (_seg:_sect:sym:size:_) | size == "4" -> [Ident sym, mkLong 0]
105                              | size == "8" -> [Ident sym, mkQuad 0]
106      _ -> []
107preprocess (c:cs) | not (isSpace c) = [Ident $ takeWhile (/= ':') (c:cs)]
108                  | otherwise       = []
109
110-- | turn the list of instructions into an associated list
111parseInsts :: [Inst] -> [(String, Inst)]
112parseInsts [] = []
113parseInsts (Ident name:xs) = case break isIdent xs of
114  ([], xs') -> parseInsts xs'
115  (is, xs') -> (name, combineInst is):parseInsts xs'
116parseInsts _ = error "Invalid instructions"
117
118-- | combine instructions (e.g. two long into a quad)
119combineInst :: [Inst] -> Inst
120combineInst [Quad i] = Quad i
121combineInst [Long i] = Quad (fromIntegral i)
122combineInst [Long h, Long l] = Quad $ (shiftL (fromIntegral h) 32) .|. fromIntegral l
123combineInst [Ref s]  = Ref s
124combineInst [Ascii s] = Ascii s
125combineInst is = error $ "Cannot combine instructions: " ++ show is
126
127-- | inline references
128inlineRef :: [(String, Inst)] -> [(String, Inst)]
129inlineRef xs = map go xs
130  where go (k, Ref name) = (k, fromMaybe (error $ "failed to find reference " ++ show name) $ lookup name xs)
131        go x = x
132
133fixWordOrder :: [(String, Inst)] -> [(String, Inst)]
134fixWordOrder xs = case lookupInteger "___hsc2hs_BOM___" xs of
135  Just 1 -> map go xs
136  _ -> xs
137  where go (k, Quad w) = (k, Quad $ shiftL w 32 .|. shiftR w 32)
138        go x = x
139
140parse :: FilePath -> IO [(String, Inst)]
141parse f = (fixWordOrder . inlineRef . parseInsts . concatMap preprocess . lines) `fmap` readFile f
142
143-- | lookup a symbol without or with underscore prefix
144lookup_ :: String -> [(String,b)] -> Maybe b
145lookup_ k l = lookup k l <|> lookup ("_" ++ k) l
146
147lookupString :: String -> [(String, Inst)] -> Maybe String
148lookupString k l = case (lookup_ k l) of
149  Just (Ascii s) -> Just s
150  _ -> Nothing
151
152lookupInteger :: String -> [(String, Inst)] -> Maybe Integer
153lookupInteger k l = case (lookup_ k l, lookup_ (k ++ "___hsc2hs_sign___") l) of
154  (Just (Quad i), Just (Quad 1)) -> Just (fromIntegral (fromIntegral i :: Int64))
155  (Just (Quad i), _) -> Just (fromIntegral i)
156  _ -> Nothing
157