1{-# LANGUAGE DeriveGeneric #-} 2{-# LANGUAGE DerivingStrategies #-} 3{-# LANGUAGE GeneralizedNewtypeDeriving #-} 4{-# LANGUAGE BangPatterns #-} 5{-# LANGUAGE ScopedTypeVariables #-} 6{-# LANGUAGE LambdaCase #-} 7 8module HiFileParser 9 ( Interface(..) 10 , List(..) 11 , Dictionary(..) 12 , Module(..) 13 , Usage(..) 14 , Dependencies(..) 15 , getInterface 16 , fromFile 17 ) where 18 19{- HLINT ignore "Reduce duplication" -} 20 21import Control.Monad (replicateM, replicateM_) 22import Data.Binary (Word64,Word32,Word8) 23import qualified Data.Binary.Get as G (Get, Decoder (..), bytesRead, 24 getByteString, getInt64be, 25 getWord32be, getWord64be, 26 getWord8, lookAhead, 27 runGetIncremental, skip) 28import Data.Bool (bool) 29import Data.ByteString.Lazy.Internal (defaultChunkSize) 30import Data.Char (chr) 31import Data.Functor (void, ($>)) 32import Data.List (find) 33import Data.Maybe (catMaybes) 34import Data.Semigroup ((<>)) 35import qualified Data.Vector as V 36import GHC.IO.IOMode (IOMode (..)) 37import Numeric (showHex) 38import RIO.ByteString as B (ByteString, hGetSome, null) 39import RIO (Int64,Generic, NFData) 40import System.IO (withBinaryFile) 41import Data.Bits (FiniteBits(..),testBit, 42 unsafeShiftL,(.|.),clearBit, 43 complement) 44import Control.Monad.State 45import qualified Debug.Trace 46 47newtype IfaceGetState = IfaceGetState 48 { useLEB128 :: Bool -- ^ Use LEB128 encoding for numbers 49 } 50 51type Get a = StateT IfaceGetState G.Get a 52 53enableDebug :: Bool 54enableDebug = False 55 56traceGet :: String -> Get () 57traceGet s 58 | enableDebug = Debug.Trace.trace s (return ()) 59 | otherwise = return () 60 61traceShow :: Show a => String -> Get a -> Get a 62traceShow s g 63 | not enableDebug = g 64 | otherwise = do 65 a <- g 66 traceGet (s ++ " " ++ show a) 67 return a 68 69runGetIncremental :: Get a -> G.Decoder a 70runGetIncremental g = G.runGetIncremental (evalStateT g emptyState) 71 where 72 emptyState = IfaceGetState False 73 74getByteString :: Int -> Get ByteString 75getByteString i = lift (G.getByteString i) 76 77getWord8 :: Get Word8 78getWord8 = lift G.getWord8 79 80bytesRead :: Get Int64 81bytesRead = lift G.bytesRead 82 83skip :: Int -> Get () 84skip = lift . G.skip 85 86uleb :: Get a -> Get a -> Get a 87uleb f g = do 88 c <- gets useLEB128 89 if c then f else g 90 91getWord32be :: Get Word32 92getWord32be = uleb getULEB128 (lift G.getWord32be) 93 94getWord64be :: Get Word64 95getWord64be = uleb getULEB128 (lift G.getWord64be) 96 97getInt64be :: Get Int64 98getInt64be = uleb getSLEB128 (lift G.getInt64be) 99 100lookAhead :: Get b -> Get b 101lookAhead g = do 102 s <- get 103 lift $ G.lookAhead (evalStateT g s) 104 105getPtr :: Get Word32 106getPtr = lift G.getWord32be 107 108type IsBoot = Bool 109 110type ModuleName = ByteString 111 112newtype List a = List 113 { unList :: [a] 114 } deriving newtype (Show, NFData) 115 116newtype Dictionary = Dictionary 117 { unDictionary :: V.Vector ByteString 118 } deriving newtype (Show, NFData) 119 120newtype Module = Module 121 { unModule :: ModuleName 122 } deriving newtype (Show, NFData) 123 124newtype Usage = Usage 125 { unUsage :: FilePath 126 } deriving newtype (Show, NFData) 127 128data Dependencies = Dependencies 129 { dmods :: List (ModuleName, IsBoot) 130 , dpkgs :: List (ModuleName, Bool) 131 , dorphs :: List Module 132 , dfinsts :: List Module 133 , dplugins :: List ModuleName 134 } deriving (Show, Generic) 135instance NFData Dependencies 136 137data Interface = Interface 138 { deps :: Dependencies 139 , usage :: List Usage 140 } deriving (Show, Generic) 141instance NFData Interface 142 143-- | Read a block prefixed with its length 144withBlockPrefix :: Get a -> Get a 145withBlockPrefix f = getPtr *> f 146 147getBool :: Get Bool 148getBool = toEnum . fromIntegral <$> getWord8 149 150getString :: Get String 151getString = fmap (chr . fromIntegral) . unList <$> getList getWord32be 152 153getMaybe :: Get a -> Get (Maybe a) 154getMaybe f = bool (pure Nothing) (Just <$> f) =<< getBool 155 156getList :: Get a -> Get (List a) 157getList f = do 158 use_uleb <- gets useLEB128 159 if use_uleb 160 then do 161 l <- (getSLEB128 :: Get Int64) 162 List <$> replicateM (fromIntegral l) f 163 else do 164 i <- getWord8 165 l <- 166 if i == 0xff 167 then getWord32be 168 else pure (fromIntegral i :: Word32) 169 List <$> replicateM (fromIntegral l) f 170 171getTuple :: Get a -> Get b -> Get (a, b) 172getTuple f g = (,) <$> f <*> g 173 174getByteStringSized :: Get ByteString 175getByteStringSized = do 176 size <- getInt64be 177 getByteString (fromIntegral size) 178 179getDictionary :: Int -> Get Dictionary 180getDictionary ptr = do 181 offset <- bytesRead 182 skip $ ptr - fromIntegral offset 183 size <- fromIntegral <$> getInt64be 184 traceGet ("Dictionary size: " ++ show size) 185 dict <- Dictionary <$> V.replicateM size getByteStringSized 186 traceGet ("Dictionary: " ++ show dict) 187 return dict 188 189getCachedBS :: Dictionary -> Get ByteString 190getCachedBS d = go =<< traceShow "Dict index:" getWord32be 191 where 192 go i = 193 case unDictionary d V.!? fromIntegral i of 194 Just bs -> pure bs 195 Nothing -> fail $ "Invalid dictionary index: " <> show i 196 197-- | Get Fingerprint 198getFP' :: Get String 199getFP' = do 200 x <- getWord64be 201 y <- getWord64be 202 return (showHex x (showHex y "")) 203 204getFP :: Get () 205getFP = void getFP' 206 207getInterface721 :: Dictionary -> Get Interface 208getInterface721 d = do 209 void getModule 210 void getBool 211 replicateM_ 2 getFP 212 void getBool 213 void getBool 214 Interface <$> getDependencies <*> getUsage 215 where 216 getModule = getCachedBS d *> (Module <$> getCachedBS d) 217 getDependencies = 218 withBlockPrefix $ 219 Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*> 220 getList (getTuple (getCachedBS d) getBool) <*> 221 getList getModule <*> 222 getList getModule <*> 223 pure (List []) 224 getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go 225 where 226 go :: Get (Maybe Usage) 227 go = do 228 usageType <- getWord8 229 case usageType of 230 0 -> getModule *> getFP *> getBool $> Nothing 231 1 -> 232 getCachedBS d *> getFP *> getMaybe getFP *> 233 getList (getTuple (getWord8 *> getCachedBS d) getFP) *> 234 getBool $> Nothing 235 _ -> fail $ "Invalid usageType: " <> show usageType 236 237getInterface741 :: Dictionary -> Get Interface 238getInterface741 d = do 239 void getModule 240 void getBool 241 replicateM_ 3 getFP 242 void getBool 243 void getBool 244 Interface <$> getDependencies <*> getUsage 245 where 246 getModule = getCachedBS d *> (Module <$> getCachedBS d) 247 getDependencies = 248 withBlockPrefix $ 249 Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*> 250 getList (getTuple (getCachedBS d) getBool) <*> 251 getList getModule <*> 252 getList getModule <*> 253 pure (List []) 254 getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go 255 where 256 go :: Get (Maybe Usage) 257 go = do 258 usageType <- getWord8 259 case usageType of 260 0 -> getModule *> getFP *> getBool $> Nothing 261 1 -> 262 getCachedBS d *> getFP *> getMaybe getFP *> 263 getList (getTuple (getWord8 *> getCachedBS d) getFP) *> 264 getBool $> Nothing 265 2 -> Just . Usage <$> getString <* getWord64be <* getWord64be 266 _ -> fail $ "Invalid usageType: " <> show usageType 267 268getInterface761 :: Dictionary -> Get Interface 269getInterface761 d = do 270 void getModule 271 void getBool 272 replicateM_ 3 getFP 273 void getBool 274 void getBool 275 Interface <$> getDependencies <*> getUsage 276 where 277 getModule = getCachedBS d *> (Module <$> getCachedBS d) 278 getDependencies = 279 withBlockPrefix $ 280 Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*> 281 getList (getTuple (getCachedBS d) getBool) <*> 282 getList getModule <*> 283 getList getModule <*> 284 pure (List []) 285 getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go 286 where 287 go :: Get (Maybe Usage) 288 go = do 289 usageType <- getWord8 290 case usageType of 291 0 -> getModule *> getFP *> getBool $> Nothing 292 1 -> 293 getCachedBS d *> getFP *> getMaybe getFP *> 294 getList (getTuple (getWord8 *> getCachedBS d) getFP) *> 295 getBool $> Nothing 296 2 -> Just . Usage <$> getString <* getWord64be <* getWord64be 297 _ -> fail $ "Invalid usageType: " <> show usageType 298 299getInterface781 :: Dictionary -> Get Interface 300getInterface781 d = do 301 void getModule 302 void getBool 303 replicateM_ 3 getFP 304 void getBool 305 void getBool 306 Interface <$> getDependencies <*> getUsage 307 where 308 getModule = getCachedBS d *> (Module <$> getCachedBS d) 309 getDependencies = 310 withBlockPrefix $ 311 Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*> 312 getList (getTuple (getCachedBS d) getBool) <*> 313 getList getModule <*> 314 getList getModule <*> 315 pure (List []) 316 getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go 317 where 318 go :: Get (Maybe Usage) 319 go = do 320 usageType <- getWord8 321 case usageType of 322 0 -> getModule *> getFP *> getBool $> Nothing 323 1 -> 324 getCachedBS d *> getFP *> getMaybe getFP *> 325 getList (getTuple (getWord8 *> getCachedBS d) getFP) *> 326 getBool $> Nothing 327 2 -> Just . Usage <$> getString <* getFP 328 _ -> fail $ "Invalid usageType: " <> show usageType 329 330getInterface801 :: Dictionary -> Get Interface 331getInterface801 d = do 332 void getModule 333 void getWord8 334 replicateM_ 3 getFP 335 void getBool 336 void getBool 337 Interface <$> getDependencies <*> getUsage 338 where 339 getModule = getCachedBS d *> (Module <$> getCachedBS d) 340 getDependencies = 341 withBlockPrefix $ 342 Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*> 343 getList (getTuple (getCachedBS d) getBool) <*> 344 getList getModule <*> 345 getList getModule <*> 346 pure (List []) 347 getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go 348 where 349 go :: Get (Maybe Usage) 350 go = do 351 usageType <- getWord8 352 case usageType of 353 0 -> getModule *> getFP *> getBool $> Nothing 354 1 -> 355 getCachedBS d *> getFP *> getMaybe getFP *> 356 getList (getTuple (getWord8 *> getCachedBS d) getFP) *> 357 getBool $> Nothing 358 2 -> Just . Usage <$> getString <* getFP 359 3 -> getModule *> getFP $> Nothing 360 _ -> fail $ "Invalid usageType: " <> show usageType 361 362getInterface821 :: Dictionary -> Get Interface 363getInterface821 d = do 364 void getModule 365 void $ getMaybe getModule 366 void getWord8 367 replicateM_ 3 getFP 368 void getBool 369 void getBool 370 Interface <$> getDependencies <*> getUsage 371 where 372 getModule = do 373 idType <- getWord8 374 case idType of 375 0 -> void $ getCachedBS d 376 _ -> 377 void $ 378 getCachedBS d *> getList (getTuple (getCachedBS d) getModule) 379 Module <$> getCachedBS d 380 getDependencies = 381 withBlockPrefix $ 382 Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*> 383 getList (getTuple (getCachedBS d) getBool) <*> 384 getList getModule <*> 385 getList getModule <*> 386 pure (List []) 387 getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go 388 where 389 go :: Get (Maybe Usage) 390 go = do 391 usageType <- getWord8 392 case usageType of 393 0 -> getModule *> getFP *> getBool $> Nothing 394 1 -> 395 getCachedBS d *> getFP *> getMaybe getFP *> 396 getList (getTuple (getWord8 *> getCachedBS d) getFP) *> 397 getBool $> Nothing 398 2 -> Just . Usage <$> getString <* getFP 399 3 -> getModule *> getFP $> Nothing 400 _ -> fail $ "Invalid usageType: " <> show usageType 401 402getInterface841 :: Dictionary -> Get Interface 403getInterface841 d = do 404 void getModule 405 void $ getMaybe getModule 406 void getWord8 407 replicateM_ 5 getFP 408 void getBool 409 void getBool 410 Interface <$> getDependencies <*> getUsage 411 where 412 getModule = do 413 idType <- getWord8 414 case idType of 415 0 -> void $ getCachedBS d 416 _ -> 417 void $ 418 getCachedBS d *> getList (getTuple (getCachedBS d) getModule) 419 Module <$> getCachedBS d 420 getDependencies = 421 withBlockPrefix $ 422 Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*> 423 getList (getTuple (getCachedBS d) getBool) <*> 424 getList getModule <*> 425 getList getModule <*> 426 pure (List []) 427 getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go 428 where 429 go :: Get (Maybe Usage) 430 go = do 431 usageType <- getWord8 432 case usageType of 433 0 -> getModule *> getFP *> getBool $> Nothing 434 1 -> 435 getCachedBS d *> getFP *> getMaybe getFP *> 436 getList (getTuple (getWord8 *> getCachedBS d) getFP) *> 437 getBool $> Nothing 438 2 -> Just . Usage <$> getString <* getFP 439 3 -> getModule *> getFP $> Nothing 440 _ -> fail $ "Invalid usageType: " <> show usageType 441 442getInterface861 :: Dictionary -> Get Interface 443getInterface861 d = do 444 void getModule 445 void $ getMaybe getModule 446 void getWord8 447 replicateM_ 6 getFP 448 void getBool 449 void getBool 450 Interface <$> getDependencies <*> getUsage 451 where 452 getModule = do 453 idType <- getWord8 454 case idType of 455 0 -> void $ getCachedBS d 456 _ -> 457 void $ 458 getCachedBS d *> getList (getTuple (getCachedBS d) getModule) 459 Module <$> getCachedBS d 460 getDependencies = 461 withBlockPrefix $ 462 Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*> 463 getList (getTuple (getCachedBS d) getBool) <*> 464 getList getModule <*> 465 getList getModule <*> 466 getList (getCachedBS d) 467 getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go 468 where 469 go :: Get (Maybe Usage) 470 go = do 471 usageType <- getWord8 472 case usageType of 473 0 -> getModule *> getFP *> getBool $> Nothing 474 1 -> 475 getCachedBS d *> getFP *> getMaybe getFP *> 476 getList (getTuple (getWord8 *> getCachedBS d) getFP) *> 477 getBool $> Nothing 478 2 -> Just . Usage <$> getString <* getFP 479 3 -> getModule *> getFP $> Nothing 480 _ -> fail $ "Invalid usageType: " <> show usageType 481 482getInterface8101 :: Dictionary -> Get Interface 483getInterface8101 d = do 484 void $ traceShow "Module:" getModule 485 void $ traceShow "Sig:" $ getMaybe getModule 486 void getWord8 487 replicateM_ 6 getFP 488 void getBool 489 void getBool 490 Interface <$> traceShow "Dependencies:" getDependencies <*> traceShow "Usage:" getUsage 491 where 492 getModule = do 493 idType <- traceShow "Unit type:" getWord8 494 case idType of 495 0 -> void $ getCachedBS d 496 1 -> 497 void $ 498 getCachedBS d *> getList (getTuple (getCachedBS d) getModule) 499 _ -> fail $ "Invalid unit type: " <> show idType 500 Module <$> getCachedBS d 501 getDependencies = 502 withBlockPrefix $ 503 Dependencies 504 <$> getList (getTuple (getCachedBS d) getBool) 505 <*> getList (getTuple (getCachedBS d) getBool) 506 <*> getList getModule 507 <*> getList getModule 508 <*> getList (getCachedBS d) 509 getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go 510 where 511 go :: Get (Maybe Usage) 512 go = do 513 usageType <- traceShow "Usage type:" getWord8 514 case usageType of 515 0 -> traceShow "Module:" getModule *> getFP *> getBool $> Nothing 516 1 -> 517 traceShow "Home module:" (getCachedBS d) *> getFP *> getMaybe getFP *> 518 getList (getTuple (getWord8 *> getCachedBS d) getFP) *> 519 getBool $> Nothing 520 2 -> Just . Usage <$> traceShow "File:" getString <* traceShow "FP:" getFP' 521 3 -> getModule *> getFP $> Nothing 522 _ -> fail $ "Invalid usageType: " <> show usageType 523 524getInterface :: Get Interface 525getInterface = do 526 let enableLEB128 = modify (\c -> c { useLEB128 = True}) 527 528 magic <- lookAhead getWord32be >>= \case 529 -- normal magic 530 0x1face -> getWord32be 531 0x1face64 -> getWord32be 532 m -> do 533 -- GHC 8.10 mistakenly encoded header fields with LEB128 534 -- so it gets special treatment 535 lookAhead (enableLEB128 >> getWord32be) >>= \case 536 0x1face -> enableLEB128 >> getWord32be 537 0x1face64 -> enableLEB128 >> getWord32be 538 _ -> fail $ "Invalid magic: " <> showHex m "" 539 540 traceGet ("Magic: " ++ showHex magic "") 541 542 -- empty field (removed in 9.0...) 543 case magic of 544 0x1face -> do 545 e <- lookAhead getWord32be 546 if e == 0 547 then void getWord32be 548 else enableLEB128 -- > 9.0 549 0x1face64 -> do 550 e <- lookAhead getWord64be 551 if e == 0 552 then void getWord64be 553 else enableLEB128 -- > 9.0 554 _ -> return () 555 556 -- ghc version 557 version <- getString 558 traceGet ("Version: " ++ version) 559 560 -- way 561 way <- getString 562 traceGet ("Ways: " ++ show way) 563 564 -- extensible fields (GHC > 9.0) 565 when (version >= "9001") $ void getPtr 566 567 -- dict_ptr 568 dictPtr <- getPtr 569 traceGet ("Dict ptr: " ++ show dictPtr) 570 571 -- dict 572 dict <- lookAhead $ getDictionary $ fromIntegral dictPtr 573 574 -- symtable_ptr 575 void getPtr 576 let versions = 577 [ ("8101", getInterface8101) 578 , ("8061", getInterface861) 579 , ("8041", getInterface841) 580 , ("8021", getInterface821) 581 , ("8001", getInterface801) 582 , ("7081", getInterface781) 583 , ("7061", getInterface761) 584 , ("7041", getInterface741) 585 , ("7021", getInterface721) 586 ] 587 case snd <$> find ((version >=) . fst) versions of 588 Just f -> f dict 589 Nothing -> fail $ "Unsupported version: " <> version 590 591 592fromFile :: FilePath -> IO (Either String Interface) 593fromFile fp = withBinaryFile fp ReadMode go 594 where 595 go h = 596 let feed (G.Done _ _ iface) = pure $ Right iface 597 feed (G.Fail _ _ msg) = pure $ Left msg 598 feed (G.Partial k) = do 599 chunk <- hGetSome h defaultChunkSize 600 feed $ k $ if B.null chunk then Nothing else Just chunk 601 in feed $ runGetIncremental getInterface 602 603 604getULEB128 :: forall a. (Integral a, FiniteBits a) => Get a 605getULEB128 = 606 go 0 0 607 where 608 go :: Int -> a -> Get a 609 go shift w = do 610 b <- getWord8 611 let !hasMore = testBit b 7 612 let !val = w .|. (clearBit (fromIntegral b) 7 `unsafeShiftL` shift) :: a 613 if hasMore 614 then do 615 go (shift+7) val 616 else 617 return $! val 618 619getSLEB128 :: forall a. (Integral a, FiniteBits a) => Get a 620getSLEB128 = do 621 (val,shift,signed) <- go 0 0 622 if signed && (shift < finiteBitSize val ) 623 then return $! ((complement 0 `unsafeShiftL` shift) .|. val) 624 else return val 625 where 626 go :: Int -> a -> Get (a,Int,Bool) 627 go shift val = do 628 byte <- getWord8 629 let !byteVal = fromIntegral (clearBit byte 7) :: a 630 let !val' = val .|. (byteVal `unsafeShiftL` shift) 631 let !more = testBit byte 7 632 let !shift' = shift+7 633 if more 634 then go shift' val' 635 else do 636 let !signed = testBit byte 6 637 return (val',shift',signed) 638