1{-# LANGUAGE DeriveGeneric              #-}
2{-# LANGUAGE DerivingStrategies         #-}
3{-# LANGUAGE GeneralizedNewtypeDeriving #-}
4{-# LANGUAGE BangPatterns               #-}
5{-# LANGUAGE ScopedTypeVariables        #-}
6{-# LANGUAGE LambdaCase                 #-}
7
8module HiFileParser
9    ( Interface(..)
10    , List(..)
11    , Dictionary(..)
12    , Module(..)
13    , Usage(..)
14    , Dependencies(..)
15    , getInterface
16    , fromFile
17    ) where
18
19{- HLINT ignore "Reduce duplication" -}
20
21import           Control.Monad                 (replicateM, replicateM_)
22import           Data.Binary                   (Word64,Word32,Word8)
23import qualified Data.Binary.Get as G          (Get, Decoder (..), bytesRead,
24                                                getByteString, getInt64be,
25                                                getWord32be, getWord64be,
26                                                getWord8, lookAhead,
27                                                runGetIncremental, skip)
28import           Data.Bool                     (bool)
29import           Data.ByteString.Lazy.Internal (defaultChunkSize)
30import           Data.Char                     (chr)
31import           Data.Functor                  (void, ($>))
32import           Data.List                     (find)
33import           Data.Maybe                    (catMaybes)
34import           Data.Semigroup                ((<>))
35import qualified Data.Vector                   as V
36import           GHC.IO.IOMode                 (IOMode (..))
37import           Numeric                       (showHex)
38import           RIO.ByteString                as B (ByteString, hGetSome, null)
39import           RIO                           (Int64,Generic, NFData)
40import           System.IO                     (withBinaryFile)
41import           Data.Bits                     (FiniteBits(..),testBit,
42                                                unsafeShiftL,(.|.),clearBit,
43                                                complement)
44import           Control.Monad.State
45import qualified Debug.Trace
46
47newtype IfaceGetState = IfaceGetState
48  { useLEB128 :: Bool -- ^ Use LEB128 encoding for numbers
49  }
50
51type Get a = StateT IfaceGetState G.Get a
52
53enableDebug :: Bool
54enableDebug = False
55
56traceGet :: String -> Get ()
57traceGet s
58  | enableDebug = Debug.Trace.trace s (return ())
59  | otherwise    = return ()
60
61traceShow :: Show a => String -> Get a -> Get a
62traceShow s g
63  | not enableDebug = g
64  | otherwise = do
65    a <- g
66    traceGet (s ++ " " ++ show a)
67    return a
68
69runGetIncremental :: Get a -> G.Decoder a
70runGetIncremental g = G.runGetIncremental (evalStateT g emptyState)
71  where
72    emptyState = IfaceGetState False
73
74getByteString :: Int -> Get ByteString
75getByteString i = lift (G.getByteString i)
76
77getWord8 :: Get Word8
78getWord8 = lift G.getWord8
79
80bytesRead :: Get Int64
81bytesRead = lift G.bytesRead
82
83skip :: Int -> Get ()
84skip = lift . G.skip
85
86uleb :: Get a -> Get a -> Get a
87uleb f g = do
88  c <- gets useLEB128
89  if c then f else g
90
91getWord32be :: Get Word32
92getWord32be = uleb getULEB128 (lift G.getWord32be)
93
94getWord64be :: Get Word64
95getWord64be = uleb getULEB128 (lift G.getWord64be)
96
97getInt64be :: Get Int64
98getInt64be = uleb getSLEB128 (lift G.getInt64be)
99
100lookAhead :: Get b -> Get b
101lookAhead g = do
102  s <- get
103  lift $ G.lookAhead (evalStateT g s)
104
105getPtr :: Get Word32
106getPtr = lift G.getWord32be
107
108type IsBoot = Bool
109
110type ModuleName = ByteString
111
112newtype List a = List
113    { unList :: [a]
114    } deriving newtype (Show, NFData)
115
116newtype Dictionary = Dictionary
117    { unDictionary :: V.Vector ByteString
118    } deriving newtype (Show, NFData)
119
120newtype Module = Module
121    { unModule :: ModuleName
122    } deriving newtype (Show, NFData)
123
124newtype Usage = Usage
125    { unUsage :: FilePath
126    } deriving newtype (Show, NFData)
127
128data Dependencies = Dependencies
129    { dmods    :: List (ModuleName, IsBoot)
130    , dpkgs    :: List (ModuleName, Bool)
131    , dorphs   :: List Module
132    , dfinsts  :: List Module
133    , dplugins :: List ModuleName
134    } deriving (Show, Generic)
135instance NFData Dependencies
136
137data Interface = Interface
138    { deps  :: Dependencies
139    , usage :: List Usage
140    } deriving (Show, Generic)
141instance NFData Interface
142
143-- | Read a block prefixed with its length
144withBlockPrefix :: Get a -> Get a
145withBlockPrefix f = getPtr *> f
146
147getBool :: Get Bool
148getBool = toEnum . fromIntegral <$> getWord8
149
150getString :: Get String
151getString = fmap (chr . fromIntegral) . unList <$> getList getWord32be
152
153getMaybe :: Get a -> Get (Maybe a)
154getMaybe f = bool (pure Nothing) (Just <$> f) =<< getBool
155
156getList :: Get a -> Get (List a)
157getList f = do
158  use_uleb <- gets useLEB128
159  if use_uleb
160    then do
161      l <- (getSLEB128 :: Get Int64)
162      List <$> replicateM (fromIntegral l) f
163    else do
164      i <- getWord8
165      l <-
166          if i == 0xff
167              then getWord32be
168              else pure (fromIntegral i :: Word32)
169      List <$> replicateM (fromIntegral l) f
170
171getTuple :: Get a -> Get b -> Get (a, b)
172getTuple f g = (,) <$> f <*> g
173
174getByteStringSized :: Get ByteString
175getByteStringSized = do
176    size <- getInt64be
177    getByteString (fromIntegral size)
178
179getDictionary :: Int -> Get Dictionary
180getDictionary ptr = do
181    offset <- bytesRead
182    skip $ ptr - fromIntegral offset
183    size <- fromIntegral <$> getInt64be
184    traceGet ("Dictionary size: " ++ show size)
185    dict <- Dictionary <$> V.replicateM size getByteStringSized
186    traceGet ("Dictionary: " ++ show dict)
187    return dict
188
189getCachedBS :: Dictionary -> Get ByteString
190getCachedBS d = go =<< traceShow "Dict index:" getWord32be
191  where
192    go i =
193        case unDictionary d V.!? fromIntegral i of
194            Just bs -> pure bs
195            Nothing -> fail $ "Invalid dictionary index: " <> show i
196
197-- | Get Fingerprint
198getFP' :: Get String
199getFP' = do
200  x <- getWord64be
201  y <- getWord64be
202  return (showHex x (showHex y ""))
203
204getFP :: Get ()
205getFP = void getFP'
206
207getInterface721 :: Dictionary -> Get Interface
208getInterface721 d = do
209    void getModule
210    void getBool
211    replicateM_ 2 getFP
212    void getBool
213    void getBool
214    Interface <$> getDependencies <*> getUsage
215  where
216    getModule = getCachedBS d *> (Module <$> getCachedBS d)
217    getDependencies =
218        withBlockPrefix $
219        Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*>
220        getList (getTuple (getCachedBS d) getBool) <*>
221        getList getModule <*>
222        getList getModule <*>
223        pure (List [])
224    getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go
225      where
226        go :: Get (Maybe Usage)
227        go = do
228            usageType <- getWord8
229            case usageType of
230                0 -> getModule *> getFP *> getBool $> Nothing
231                1 ->
232                    getCachedBS d *> getFP *> getMaybe getFP *>
233                    getList (getTuple (getWord8 *> getCachedBS d) getFP) *>
234                    getBool $> Nothing
235                _ -> fail $ "Invalid usageType: " <> show usageType
236
237getInterface741 :: Dictionary -> Get Interface
238getInterface741 d = do
239    void getModule
240    void getBool
241    replicateM_ 3 getFP
242    void getBool
243    void getBool
244    Interface <$> getDependencies <*> getUsage
245  where
246    getModule = getCachedBS d *> (Module <$> getCachedBS d)
247    getDependencies =
248        withBlockPrefix $
249        Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*>
250        getList (getTuple (getCachedBS d) getBool) <*>
251        getList getModule <*>
252        getList getModule <*>
253        pure (List [])
254    getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go
255      where
256        go :: Get (Maybe Usage)
257        go = do
258            usageType <- getWord8
259            case usageType of
260                0 -> getModule *> getFP *> getBool $> Nothing
261                1 ->
262                    getCachedBS d *> getFP *> getMaybe getFP *>
263                    getList (getTuple (getWord8 *> getCachedBS d) getFP) *>
264                    getBool $> Nothing
265                2 -> Just . Usage <$> getString <* getWord64be <* getWord64be
266                _ -> fail $ "Invalid usageType: " <> show usageType
267
268getInterface761 :: Dictionary -> Get Interface
269getInterface761 d = do
270    void getModule
271    void getBool
272    replicateM_ 3 getFP
273    void getBool
274    void getBool
275    Interface <$> getDependencies <*> getUsage
276  where
277    getModule = getCachedBS d *> (Module <$> getCachedBS d)
278    getDependencies =
279        withBlockPrefix $
280        Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*>
281        getList (getTuple (getCachedBS d) getBool) <*>
282        getList getModule <*>
283        getList getModule <*>
284        pure (List [])
285    getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go
286      where
287        go :: Get (Maybe Usage)
288        go = do
289            usageType <- getWord8
290            case usageType of
291                0 -> getModule *> getFP *> getBool $> Nothing
292                1 ->
293                    getCachedBS d *> getFP *> getMaybe getFP *>
294                    getList (getTuple (getWord8 *> getCachedBS d) getFP) *>
295                    getBool $> Nothing
296                2 -> Just . Usage <$> getString <* getWord64be <* getWord64be
297                _ -> fail $ "Invalid usageType: " <> show usageType
298
299getInterface781 :: Dictionary -> Get Interface
300getInterface781 d = do
301    void getModule
302    void getBool
303    replicateM_ 3 getFP
304    void getBool
305    void getBool
306    Interface <$> getDependencies <*> getUsage
307  where
308    getModule = getCachedBS d *> (Module <$> getCachedBS d)
309    getDependencies =
310        withBlockPrefix $
311        Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*>
312        getList (getTuple (getCachedBS d) getBool) <*>
313        getList getModule <*>
314        getList getModule <*>
315        pure (List [])
316    getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go
317      where
318        go :: Get (Maybe Usage)
319        go = do
320            usageType <- getWord8
321            case usageType of
322                0 -> getModule *> getFP *> getBool $> Nothing
323                1 ->
324                    getCachedBS d *> getFP *> getMaybe getFP *>
325                    getList (getTuple (getWord8 *> getCachedBS d) getFP) *>
326                    getBool $> Nothing
327                2 -> Just . Usage <$> getString <* getFP
328                _ -> fail $ "Invalid usageType: " <> show usageType
329
330getInterface801 :: Dictionary -> Get Interface
331getInterface801 d = do
332    void getModule
333    void getWord8
334    replicateM_ 3 getFP
335    void getBool
336    void getBool
337    Interface <$> getDependencies <*> getUsage
338  where
339    getModule = getCachedBS d *> (Module <$> getCachedBS d)
340    getDependencies =
341        withBlockPrefix $
342        Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*>
343        getList (getTuple (getCachedBS d) getBool) <*>
344        getList getModule <*>
345        getList getModule <*>
346        pure (List [])
347    getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go
348      where
349        go :: Get (Maybe Usage)
350        go = do
351            usageType <- getWord8
352            case usageType of
353                0 -> getModule *> getFP *> getBool $> Nothing
354                1 ->
355                    getCachedBS d *> getFP *> getMaybe getFP *>
356                    getList (getTuple (getWord8 *> getCachedBS d) getFP) *>
357                    getBool $> Nothing
358                2 -> Just . Usage <$> getString <* getFP
359                3 -> getModule *> getFP $> Nothing
360                _ -> fail $ "Invalid usageType: " <> show usageType
361
362getInterface821 :: Dictionary -> Get Interface
363getInterface821 d = do
364    void getModule
365    void $ getMaybe getModule
366    void getWord8
367    replicateM_ 3 getFP
368    void getBool
369    void getBool
370    Interface <$> getDependencies <*> getUsage
371  where
372    getModule = do
373        idType <- getWord8
374        case idType of
375            0 -> void $ getCachedBS d
376            _ ->
377                void $
378                getCachedBS d *> getList (getTuple (getCachedBS d) getModule)
379        Module <$> getCachedBS d
380    getDependencies =
381        withBlockPrefix $
382        Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*>
383        getList (getTuple (getCachedBS d) getBool) <*>
384        getList getModule <*>
385        getList getModule <*>
386        pure (List [])
387    getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go
388      where
389        go :: Get (Maybe Usage)
390        go = do
391            usageType <- getWord8
392            case usageType of
393                0 -> getModule *> getFP *> getBool $> Nothing
394                1 ->
395                    getCachedBS d *> getFP *> getMaybe getFP *>
396                    getList (getTuple (getWord8 *> getCachedBS d) getFP) *>
397                    getBool $> Nothing
398                2 -> Just . Usage <$> getString <* getFP
399                3 -> getModule *> getFP $> Nothing
400                _ -> fail $ "Invalid usageType: " <> show usageType
401
402getInterface841 :: Dictionary -> Get Interface
403getInterface841 d = do
404    void getModule
405    void $ getMaybe getModule
406    void getWord8
407    replicateM_ 5 getFP
408    void getBool
409    void getBool
410    Interface <$> getDependencies <*> getUsage
411  where
412    getModule = do
413        idType <- getWord8
414        case idType of
415            0 -> void $ getCachedBS d
416            _ ->
417                void $
418                getCachedBS d *> getList (getTuple (getCachedBS d) getModule)
419        Module <$> getCachedBS d
420    getDependencies =
421        withBlockPrefix $
422        Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*>
423        getList (getTuple (getCachedBS d) getBool) <*>
424        getList getModule <*>
425        getList getModule <*>
426        pure (List [])
427    getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go
428      where
429        go :: Get (Maybe Usage)
430        go = do
431            usageType <- getWord8
432            case usageType of
433                0 -> getModule *> getFP *> getBool $> Nothing
434                1 ->
435                    getCachedBS d *> getFP *> getMaybe getFP *>
436                    getList (getTuple (getWord8 *> getCachedBS d) getFP) *>
437                    getBool $> Nothing
438                2 -> Just . Usage <$> getString <* getFP
439                3 -> getModule *> getFP $> Nothing
440                _ -> fail $ "Invalid usageType: " <> show usageType
441
442getInterface861 :: Dictionary -> Get Interface
443getInterface861 d = do
444    void getModule
445    void $ getMaybe getModule
446    void getWord8
447    replicateM_ 6 getFP
448    void getBool
449    void getBool
450    Interface <$> getDependencies <*> getUsage
451  where
452    getModule = do
453        idType <- getWord8
454        case idType of
455            0 -> void $ getCachedBS d
456            _ ->
457                void $
458                getCachedBS d *> getList (getTuple (getCachedBS d) getModule)
459        Module <$> getCachedBS d
460    getDependencies =
461        withBlockPrefix $
462        Dependencies <$> getList (getTuple (getCachedBS d) getBool) <*>
463        getList (getTuple (getCachedBS d) getBool) <*>
464        getList getModule <*>
465        getList getModule <*>
466        getList (getCachedBS d)
467    getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go
468      where
469        go :: Get (Maybe Usage)
470        go = do
471            usageType <- getWord8
472            case usageType of
473                0 -> getModule *> getFP *> getBool $> Nothing
474                1 ->
475                    getCachedBS d *> getFP *> getMaybe getFP *>
476                    getList (getTuple (getWord8 *> getCachedBS d) getFP) *>
477                    getBool $> Nothing
478                2 -> Just . Usage <$> getString <* getFP
479                3 -> getModule *> getFP $> Nothing
480                _ -> fail $ "Invalid usageType: " <> show usageType
481
482getInterface8101 :: Dictionary -> Get Interface
483getInterface8101 d = do
484    void $ traceShow "Module:" getModule
485    void $ traceShow "Sig:" $ getMaybe getModule
486    void getWord8
487    replicateM_ 6 getFP
488    void getBool
489    void getBool
490    Interface <$> traceShow "Dependencies:" getDependencies <*> traceShow "Usage:" getUsage
491  where
492    getModule = do
493        idType <- traceShow "Unit type:" getWord8
494        case idType of
495            0 -> void $ getCachedBS d
496            1 ->
497                void $
498                getCachedBS d *> getList (getTuple (getCachedBS d) getModule)
499            _ -> fail $ "Invalid unit type: " <> show idType
500        Module <$> getCachedBS d
501    getDependencies =
502        withBlockPrefix $
503        Dependencies
504          <$> getList (getTuple (getCachedBS d) getBool)
505          <*> getList (getTuple (getCachedBS d) getBool)
506          <*> getList getModule
507          <*> getList getModule
508          <*> getList (getCachedBS d)
509    getUsage = withBlockPrefix $ List . catMaybes . unList <$> getList go
510      where
511        go :: Get (Maybe Usage)
512        go = do
513            usageType <- traceShow "Usage type:" getWord8
514            case usageType of
515                0 -> traceShow "Module:" getModule *> getFP *> getBool $> Nothing
516                1 ->
517                    traceShow "Home module:" (getCachedBS d) *> getFP *> getMaybe getFP *>
518                    getList (getTuple (getWord8 *> getCachedBS d) getFP) *>
519                    getBool $> Nothing
520                2 -> Just . Usage <$> traceShow "File:" getString <* traceShow "FP:" getFP'
521                3 -> getModule *> getFP $> Nothing
522                _ -> fail $ "Invalid usageType: " <> show usageType
523
524getInterface :: Get Interface
525getInterface = do
526    let enableLEB128 = modify (\c -> c { useLEB128 = True})
527
528    magic <- lookAhead getWord32be >>= \case
529        -- normal magic
530        0x1face      -> getWord32be
531        0x1face64    -> getWord32be
532        m            -> do
533          -- GHC 8.10 mistakenly encoded header fields with LEB128
534          -- so it gets special treatment
535          lookAhead (enableLEB128 >> getWord32be) >>= \case
536            0x1face      -> enableLEB128 >> getWord32be
537            0x1face64    -> enableLEB128 >> getWord32be
538            _            -> fail $ "Invalid magic: " <> showHex m ""
539
540    traceGet ("Magic: " ++ showHex magic "")
541
542    -- empty field (removed in 9.0...)
543    case magic of
544        0x1face      -> do
545          e <- lookAhead getWord32be
546          if e == 0
547            then void getWord32be
548            else enableLEB128 -- > 9.0
549        0x1face64    -> do
550          e <- lookAhead getWord64be
551          if e == 0
552            then void getWord64be
553            else enableLEB128 -- > 9.0
554        _            -> return ()
555
556    -- ghc version
557    version <- getString
558    traceGet ("Version: " ++ version)
559
560    -- way
561    way <- getString
562    traceGet ("Ways: " ++ show way)
563
564    -- extensible fields (GHC > 9.0)
565    when (version >= "9001") $ void getPtr
566
567    -- dict_ptr
568    dictPtr <- getPtr
569    traceGet ("Dict ptr: " ++ show dictPtr)
570
571    -- dict
572    dict <- lookAhead $ getDictionary $ fromIntegral dictPtr
573
574    -- symtable_ptr
575    void getPtr
576    let versions =
577            [ ("8101", getInterface8101)
578            , ("8061", getInterface861)
579            , ("8041", getInterface841)
580            , ("8021", getInterface821)
581            , ("8001", getInterface801)
582            , ("7081", getInterface781)
583            , ("7061", getInterface761)
584            , ("7041", getInterface741)
585            , ("7021", getInterface721)
586            ]
587    case snd <$> find ((version >=) . fst) versions of
588        Just f  -> f dict
589        Nothing -> fail $ "Unsupported version: " <> version
590
591
592fromFile :: FilePath -> IO (Either String Interface)
593fromFile fp = withBinaryFile fp ReadMode go
594  where
595    go h =
596      let feed (G.Done _ _ iface) = pure $ Right iface
597          feed (G.Fail _ _ msg) = pure $ Left msg
598          feed (G.Partial k) = do
599            chunk <- hGetSome h defaultChunkSize
600            feed $ k $ if B.null chunk then Nothing else Just chunk
601      in feed $ runGetIncremental getInterface
602
603
604getULEB128 :: forall a. (Integral a, FiniteBits a) => Get a
605getULEB128 =
606    go 0 0
607  where
608    go :: Int -> a -> Get a
609    go shift w = do
610        b <- getWord8
611        let !hasMore = testBit b 7
612        let !val = w .|. (clearBit (fromIntegral b) 7 `unsafeShiftL` shift) :: a
613        if hasMore
614            then do
615                go (shift+7) val
616            else
617                return $! val
618
619getSLEB128 :: forall a. (Integral a, FiniteBits a) => Get a
620getSLEB128 = do
621    (val,shift,signed) <- go 0 0
622    if signed && (shift < finiteBitSize val )
623        then return $! ((complement 0 `unsafeShiftL` shift) .|. val)
624        else return val
625    where
626        go :: Int -> a -> Get (a,Int,Bool)
627        go shift val = do
628            byte <- getWord8
629            let !byteVal = fromIntegral (clearBit byte 7) :: a
630            let !val' = val .|. (byteVal `unsafeShiftL` shift)
631            let !more = testBit byte 7
632            let !shift' = shift+7
633            if more
634                then go shift' val'
635                else do
636                    let !signed = testBit byte 6
637                    return (val',shift',signed)
638