1-- A rather crude asm parser. 2-- 3-- 4-- we only handle a subset of AT&T assembly 5-- right now. This is what gcc and clang can 6-- emit. For clang using llvm-ir might be 7-- even better. For gcc gimple if that can 8-- be consumed reliably somehow. 9-- 10-- For now we'll rely on the at&t assembly 11-- to be sufficient for constants. 12-- 13 14 15module ATTParser where 16 17import Control.Applicative ((<|>)) 18import Data.Word (Word32, Word64) 19import Data.Int (Int64) 20import Data.Char (isDigit, isSpace) 21import Data.Bits (shiftL, shiftR, (.|.)) 22import Data.Maybe (fromMaybe) 23 24data Inst = Ident String 25 | Long Word32 26 | Quad Word64 27 | Ref String 28 | Ascii String 29 deriving Show 30 31mkLong :: Word32 -> Inst 32mkLong = Long 33mkQuad :: Word64 -> Inst 34mkQuad = Quad 35-- | turn @x@ and @(x)@ into @Ref x@. 36-- The (x) syntax can be found in mips assembly. 37mkRef :: String -> Inst 38mkRef ('(':r) | (')':r') <- reverse r = Ref $ reverse r' 39mkRef r = Ref r 40 41mkAscii :: String -> Inst 42mkAscii = Ascii 43 44type ASM = [(String, Inst)] 45 46isIdent :: Inst -> Bool 47isIdent (Ident _) = True 48isIdent _ = False 49 50trim :: String -> String 51trim = reverse . dropWhile (`elem` " \t") . reverse . dropWhile (`elem` " \t") 52-- | generalized @words@. 53words' :: (a -> Bool) -> [a] -> [[a]] 54words' p s = case dropWhile p s of 55 [] -> [] 56 s' -> w : words' p s'' 57 where (w, s'') = break p s' 58 59isNumber :: String -> Bool 60isNumber ('-':x) = all isDigit x 61isNumber ('+':x) = all isDigit x 62isNumber x = all isDigit x 63 64-- | process the assembly instructions, filtering out 65-- identifiers and constant values. 66preprocess :: String -> [Inst] 67preprocess [] = [] 68preprocess ('\t':attr) = let (h, t) = break isSpace attr 69 in case h:words' (=='\t') t of 70 -- 8 byte values 71 (".quad":x:_) | isNumber (w x) -> [mkQuad $ read (w x)] 72 | otherwise -> [mkRef $ (w x)] 73 (".xword":x:_)| isNumber (w x) -> [mkQuad $ read (w x)] 74 | otherwise -> [mkRef $ (w x)] 75 (".8byte":x:_)| isNumber (w x) -> [mkQuad $ read (w x)] 76 | otherwise -> [mkRef $ (w x)] 77 ("data8":x:_) | isNumber (w x) -> [mkQuad $ read (w x)] 78 | otherwise -> [mkRef $ (w x)] 79 80 -- 4 byte values 81 (".long":x:_) | isNumber (w x) -> [mkLong $ read (w x)] 82 | otherwise -> [mkRef $ (w x)] 83 (".word":x:_) | isNumber (w x) -> [mkLong $ read (w x)] 84 | otherwise -> [mkRef $ (w x)] 85 (".4byte":x:_)| isNumber (w x) -> [mkLong $ read (w x)] 86 | otherwise -> [mkRef $ (w x)] 87 88 (".space":x:_)| (w x) == "4" -> [mkLong 0] 89 | (w x) == "8" -> [mkQuad 0] 90 (".skip":x:_) | (w x) == "4" -> [mkLong 0] 91 | (w x) == "8" -> [mkQuad 0] 92 93 (".ascii":x:_) -> [mkAscii $ read x] 94 (".asciz":x:_) -> [mkAscii $ read x ++ "\0"] 95 -- found on nios, sh4, alpha, mk68k; all without \0. 96 (".string":x:_) -> [mkAscii $ read x ++ "\0"] 97 -- found on hppa 98 (".stringz":x:_) -> [mkAscii $ read x ++ "\0"] 99 -- ia64 100 ("stringz":x:_) -> [mkAscii $ read x ++ "\0"] 101 _ -> [] 102 where w = head . words 103preprocess ('.':'z':'e':'r':'o':'f':'i':'l':'l':' ':x) = case words' (==',') x of 104 (_seg:_sect:sym:size:_) | size == "4" -> [Ident sym, mkLong 0] 105 | size == "8" -> [Ident sym, mkQuad 0] 106 _ -> [] 107preprocess (c:cs) | not (isSpace c) = [Ident $ takeWhile (/= ':') (c:cs)] 108 | otherwise = [] 109 110-- | turn the list of instructions into an associated list 111parseInsts :: [Inst] -> [(String, Inst)] 112parseInsts [] = [] 113parseInsts (Ident name:xs) = case break isIdent xs of 114 ([], xs') -> parseInsts xs' 115 (is, xs') -> (name, combineInst is):parseInsts xs' 116parseInsts _ = error "Invalid instructions" 117 118-- | combine instructions (e.g. two long into a quad) 119combineInst :: [Inst] -> Inst 120combineInst [Quad i] = Quad i 121combineInst [Long i] = Quad (fromIntegral i) 122combineInst [Long h, Long l] = Quad $ (shiftL (fromIntegral h) 32) .|. fromIntegral l 123combineInst [Ref s] = Ref s 124combineInst [Ascii s] = Ascii s 125combineInst is = error $ "Cannot combine instructions: " ++ show is 126 127-- | inline references 128inlineRef :: [(String, Inst)] -> [(String, Inst)] 129inlineRef xs = map go xs 130 where go (k, Ref name) = (k, fromMaybe (error $ "failed to find reference " ++ show name) $ lookup name xs) 131 go x = x 132 133fixWordOrder :: [(String, Inst)] -> [(String, Inst)] 134fixWordOrder xs = case lookupInteger "___hsc2hs_BOM___" xs of 135 Just 1 -> map go xs 136 _ -> xs 137 where go (k, Quad w) = (k, Quad $ shiftL w 32 .|. shiftR w 32) 138 go x = x 139 140parse :: FilePath -> IO [(String, Inst)] 141parse f = (fixWordOrder . inlineRef . parseInsts . concatMap preprocess . lines) `fmap` readFile f 142 143-- | lookup a symbol without or with underscore prefix 144lookup_ :: String -> [(String,b)] -> Maybe b 145lookup_ k l = lookup k l <|> lookup ("_" ++ k) l 146 147lookupString :: String -> [(String, Inst)] -> Maybe String 148lookupString k l = case (lookup_ k l) of 149 Just (Ascii s) -> Just s 150 _ -> Nothing 151 152lookupInteger :: String -> [(String, Inst)] -> Maybe Integer 153lookupInteger k l = case (lookup_ k l, lookup_ (k ++ "___hsc2hs_sign___") l) of 154 (Just (Quad i), Just (Quad 1)) -> Just (fromIntegral (fromIntegral i :: Int64)) 155 (Just (Quad i), _) -> Just (fromIntegral i) 156 _ -> Nothing 157