1{-# LANGUAGE CPP #-}
2{-# LANGUAGE ScopedTypeVariables #-}
3{-# LANGUAGE FlexibleContexts #-}
4{-# LANGUAGE RankNTypes #-}
5{-# LANGUAGE DeriveGeneric #-}
6{-# LANGUAGE BangPatterns #-}
7
8-----------------------------------------------------------------------------
9-- |
10-- Module      :  Distribution.Simple.Utils
11-- Copyright   :  Isaac Jones, Simon Marlow 2003-2004
12-- License     :  BSD3
13--                portions Copyright (c) 2007, Galois Inc.
14--
15-- Maintainer  :  cabal-devel@haskell.org
16-- Portability :  portable
17--
18-- A large and somewhat miscellaneous collection of utility functions used
19-- throughout the rest of the Cabal lib and in other tools that use the Cabal
20-- lib like @cabal-install@. It has a very simple set of logging actions. It
21-- has low level functions for running programs, a bunch of wrappers for
22-- various directory and file functions that do extra logging.
23
24module Distribution.Utils.Generic (
25        -- * reading and writing files safely
26        withFileContents,
27        writeFileAtomic,
28
29        -- * Unicode
30
31        -- ** Conversions
32        fromUTF8BS,
33        fromUTF8LBS,
34
35        toUTF8BS,
36        toUTF8LBS,
37
38        validateUTF8,
39
40        -- ** File I/O
41        readUTF8File,
42        withUTF8FileContents,
43        writeUTF8File,
44
45        -- ** BOM
46        ignoreBOM,
47
48        -- ** Misc
49        normaliseLineEndings,
50
51        -- * generic utils
52        dropWhileEndLE,
53        takeWhileEndLE,
54        equating,
55        comparing,
56        isInfixOf,
57        intercalate,
58        lowercase,
59        isAscii,
60        isAsciiAlpha,
61        isAsciiAlphaNum,
62        listUnion,
63        listUnionRight,
64        ordNub,
65        ordNubBy,
66        ordNubRight,
67        safeHead,
68        safeTail,
69        safeLast,
70        safeInit,
71        unintersperse,
72        wrapText,
73        wrapLine,
74        unfoldrM,
75        spanMaybe,
76        breakMaybe,
77        unsnoc,
78        unsnocNE,
79
80        -- * Triples
81        fstOf3,
82        sndOf3,
83        trdOf3,
84
85        -- * FilePath stuff
86        isAbsoluteOnAnyPlatform,
87        isRelativeOnAnyPlatform,
88  ) where
89
90import Prelude ()
91import Distribution.Compat.Prelude
92
93import Distribution.Utils.String
94
95import Data.Bits ((.&.), (.|.), shiftL)
96import Data.List
97    ( isInfixOf )
98import qualified Data.Set as Set
99import qualified Data.ByteString as SBS
100import qualified Data.ByteString.Lazy as LBS
101
102import System.Directory
103    ( removeFile, renameFile )
104import System.FilePath
105    ( (<.>), splitFileName )
106import System.IO
107    ( withFile, withBinaryFile
108    , openBinaryTempFileWithDefaultPermissions
109    , IOMode(ReadMode), hGetContents, hClose )
110import qualified Control.Exception as Exception
111
112-- -----------------------------------------------------------------------------
113-- Helper functions
114
115-- | Wraps text to the default line width. Existing newlines are preserved.
116wrapText :: String -> String
117wrapText = unlines
118         . map (intercalate "\n"
119              . map unwords
120              . wrapLine 79
121              . words)
122         . lines
123
124-- | Wraps a list of words to a list of lines of words of a particular width.
125wrapLine :: Int -> [String] -> [[String]]
126wrapLine width = wrap 0 []
127  where wrap :: Int -> [String] -> [String] -> [[String]]
128        wrap 0   []   (w:ws)
129          | length w + 1 > width
130          = wrap (length w) [w] ws
131        wrap col line (w:ws)
132          | col + length w + 1 > width
133          = reverse line : wrap 0 [] (w:ws)
134        wrap col line (w:ws)
135          = let col' = col + length w + 1
136             in wrap col' (w:line) ws
137        wrap _ []   [] = []
138        wrap _ line [] = [reverse line]
139
140-----------------------------------
141-- Safely reading and writing files
142
143-- | Gets the contents of a file, but guarantee that it gets closed.
144--
145-- The file is read lazily but if it is not fully consumed by the action then
146-- the remaining input is truncated and the file is closed.
147--
148withFileContents :: FilePath -> (String -> IO a) -> IO a
149withFileContents name action =
150  withFile name ReadMode
151           (\hnd -> hGetContents hnd >>= action)
152
153-- | Writes a file atomically.
154--
155-- The file is either written successfully or an IO exception is raised and
156-- the original file is left unchanged.
157--
158-- On windows it is not possible to delete a file that is open by a process.
159-- This case will give an IO exception but the atomic property is not affected.
160--
161writeFileAtomic :: FilePath -> LBS.ByteString -> IO ()
162writeFileAtomic targetPath content = do
163  let (targetDir, targetFile) = splitFileName targetPath
164  Exception.bracketOnError
165    (openBinaryTempFileWithDefaultPermissions targetDir $ targetFile <.> "tmp")
166    (\(tmpPath, handle) -> hClose handle >> removeFile tmpPath)
167    (\(tmpPath, handle) -> do
168        LBS.hPut handle content
169        hClose handle
170        renameFile tmpPath targetPath)
171
172-- ------------------------------------------------------------
173-- * Unicode stuff
174-- ------------------------------------------------------------
175
176-- | Decode 'String' from UTF8-encoded 'BS.ByteString'
177--
178-- Invalid data in the UTF8 stream (this includes code-points @U+D800@
179-- through @U+DFFF@) will be decoded as the replacement character (@U+FFFD@).
180--
181fromUTF8BS :: SBS.ByteString -> String
182fromUTF8BS = decodeStringUtf8 . SBS.unpack
183
184-- | Variant of 'fromUTF8BS' for lazy 'BS.ByteString's
185--
186fromUTF8LBS :: LBS.ByteString -> String
187fromUTF8LBS = decodeStringUtf8 . LBS.unpack
188
189-- | Encode 'String' to UTF8-encoded 'SBS.ByteString'
190--
191-- Code-points in the @U+D800@-@U+DFFF@ range will be encoded
192-- as the replacement character (i.e. @U+FFFD@).
193--
194toUTF8BS :: String -> SBS.ByteString
195toUTF8BS = SBS.pack . encodeStringUtf8
196
197-- | Variant of 'toUTF8BS' for lazy 'BS.ByteString's
198--
199toUTF8LBS :: String -> LBS.ByteString
200toUTF8LBS = LBS.pack . encodeStringUtf8
201
202-- | Check that strict 'ByteString' is valid UTF8. Returns 'Just offset' if it's not.
203validateUTF8 :: SBS.ByteString -> Maybe Int
204validateUTF8 = go 0 where
205    go off bs = case SBS.uncons bs of
206        Nothing -> Nothing
207        Just (c, bs')
208            | c <= 0x7F -> go (off + 1) bs'
209            | c <= 0xBF -> Just off
210            | c <= 0xDF -> twoBytes off c bs'
211            | c <= 0xEF -> moreBytes off 3 0x800     bs' (fromIntegral $ c .&. 0xF)
212            | c <= 0xF7 -> moreBytes off 4 0x10000   bs' (fromIntegral $ c .&. 0x7)
213            | c <= 0xFB -> moreBytes off 5 0x200000  bs' (fromIntegral $ c .&. 0x3)
214            | c <= 0xFD -> moreBytes off 6 0x4000000 bs' (fromIntegral $ c .&. 0x1)
215            | otherwise -> Just off
216
217    twoBytes off c0 bs = case SBS.uncons bs of
218        Nothing        -> Just off
219        Just (c1, bs')
220            | c1 .&. 0xC0 == 0x80 ->
221                if d >= (0x80 :: Int)
222                then go (off + 2) bs'
223                else Just off
224            | otherwise -> Just off
225          where
226            d = (fromIntegral (c0 .&. 0x1F) `shiftL` 6) .|. fromIntegral (c1 .&. 0x3F)
227
228    moreBytes :: Int -> Int -> Int -> SBS.ByteString -> Int -> Maybe Int
229    moreBytes off 1 overlong cs' acc
230      | overlong <= acc, acc <= 0x10FFFF, acc < 0xD800 || 0xDFFF < acc
231      = go (off + 1) cs'
232
233      | otherwise
234      = Just off
235
236    moreBytes off byteCount overlong bs acc = case SBS.uncons bs of
237        Just (cn, bs') | cn .&. 0xC0 == 0x80 ->
238            moreBytes (off + 1) (byteCount-1) overlong bs' ((acc `shiftL` 6) .|. fromIntegral cn .&. 0x3F)
239        _ -> Just off
240
241
242-- | Ignore a Unicode byte order mark (BOM) at the beginning of the input
243--
244ignoreBOM :: String -> String
245ignoreBOM ('\xFEFF':string) = string
246ignoreBOM string            = string
247
248-- | Reads a UTF8 encoded text file as a Unicode String
249--
250-- Reads lazily using ordinary 'readFile'.
251--
252readUTF8File :: FilePath -> IO String
253readUTF8File f = (ignoreBOM . fromUTF8LBS) <$> LBS.readFile f
254
255-- | Reads a UTF8 encoded text file as a Unicode String
256--
257-- Same behaviour as 'withFileContents'.
258--
259withUTF8FileContents :: FilePath -> (String -> IO a) -> IO a
260withUTF8FileContents name action =
261  withBinaryFile name ReadMode
262    (\hnd -> LBS.hGetContents hnd >>= action . ignoreBOM . fromUTF8LBS)
263
264-- | Writes a Unicode String as a UTF8 encoded text file.
265--
266-- Uses 'writeFileAtomic', so provides the same guarantees.
267--
268writeUTF8File :: FilePath -> String -> IO ()
269writeUTF8File path = writeFileAtomic path . toUTF8LBS
270
271-- | Fix different systems silly line ending conventions
272normaliseLineEndings :: String -> String
273normaliseLineEndings [] = []
274normaliseLineEndings ('\r':'\n':s) = '\n' : normaliseLineEndings s -- windows
275normaliseLineEndings ('\r':s)      = '\n' : normaliseLineEndings s -- old OS X
276normaliseLineEndings (  c :s)      =   c  : normaliseLineEndings s
277
278-- ------------------------------------------------------------
279-- * Common utils
280-- ------------------------------------------------------------
281
282-- | @dropWhileEndLE p@ is equivalent to @reverse . dropWhile p . reverse@, but
283-- quite a bit faster. The difference between "Data.List.dropWhileEnd" and this
284-- version is that the one in "Data.List" is strict in elements, but spine-lazy,
285-- while this one is spine-strict but lazy in elements. That's what @LE@ stands
286-- for - "lazy in elements".
287--
288-- Example:
289--
290-- >>> safeTail $ Data.List.dropWhileEnd (<3) [undefined, 5, 4, 3, 2, 1]
291-- *** Exception: Prelude.undefined
292-- ...
293--
294-- >>> safeTail $ dropWhileEndLE (<3) [undefined, 5, 4, 3, 2, 1]
295-- [5,4,3]
296--
297-- >>> take 3 $ Data.List.dropWhileEnd (<3) [5, 4, 3, 2, 1, undefined]
298-- [5,4,3]
299--
300-- >>> take 3 $ dropWhileEndLE (<3) [5, 4, 3, 2, 1, undefined]
301-- *** Exception: Prelude.undefined
302-- ...
303--
304dropWhileEndLE :: (a -> Bool) -> [a] -> [a]
305dropWhileEndLE p = foldr (\x r -> if null r && p x then [] else x:r) []
306
307-- | @takeWhileEndLE p@ is equivalent to @reverse . takeWhile p . reverse@, but
308-- is usually faster (as well as being easier to read).
309takeWhileEndLE :: (a -> Bool) -> [a] -> [a]
310takeWhileEndLE p = fst . foldr go ([], False)
311  where
312    go x (rest, done)
313      | not done && p x = (x:rest, False)
314      | otherwise = (rest, True)
315
316-- | Like 'Data.List.nub', but has @O(n log n)@ complexity instead of
317-- @O(n^2)@. Code for 'ordNub' and 'listUnion' taken from Niklas Hambüchen's
318-- <http://github.com/nh2/haskell-ordnub ordnub> package.
319ordNub :: Ord a => [a] -> [a]
320ordNub = ordNubBy id
321
322-- | Like 'ordNub' and 'Data.List.nubBy'. Selects a key for each element and
323-- takes the nub based on that key.
324ordNubBy :: Ord b => (a -> b) -> [a] -> [a]
325ordNubBy f l = go Set.empty l
326  where
327    go !_ [] = []
328    go !s (x:xs)
329      | y `Set.member` s = go s xs
330      | otherwise        = let !s' = Set.insert y s
331                            in x : go s' xs
332      where
333        y = f x
334
335-- | Like "Data.List.union", but has @O(n log n)@ complexity instead of
336-- @O(n^2)@.
337listUnion :: (Ord a) => [a] -> [a] -> [a]
338listUnion a b = a ++ ordNub (filter (`Set.notMember` aSet) b)
339  where
340    aSet = Set.fromList a
341
342-- | A right-biased version of 'ordNub'.
343--
344-- Example:
345--
346-- >>> ordNub [1,2,1] :: [Int]
347-- [1,2]
348--
349-- >>> ordNubRight [1,2,1] :: [Int]
350-- [2,1]
351--
352ordNubRight :: (Ord a) => [a] -> [a]
353ordNubRight = fst . foldr go ([], Set.empty)
354  where
355    go x p@(l, s) = if x `Set.member` s then p
356                                        else (x:l, Set.insert x s)
357
358-- | A right-biased version of 'listUnion'.
359--
360-- Example:
361--
362-- >>> listUnion [1,2,3,4,3] [2,1,1]
363-- [1,2,3,4,3]
364--
365-- >>> listUnionRight [1,2,3,4,3] [2,1,1]
366-- [4,3,2,1,1]
367--
368listUnionRight :: (Ord a) => [a] -> [a] -> [a]
369listUnionRight a b = ordNubRight (filter (`Set.notMember` bSet) a) ++ b
370  where
371    bSet = Set.fromList b
372
373-- | A total variant of 'head'.
374--
375-- @since 3.2.0.0
376safeHead :: [a] -> Maybe a
377safeHead []    = Nothing
378safeHead (x:_) = Just x
379
380-- | A total variant of 'tail'.
381--
382-- @since 3.2.0.0
383safeTail :: [a] -> [a]
384safeTail []     = []
385safeTail (_:xs) = xs
386
387-- | A total variant of 'last'.
388--
389-- @since 3.2.0.0
390safeLast :: [a] -> Maybe a
391safeLast []     = Nothing
392safeLast (x:xs) = Just (foldl (\_ a -> a) x xs)
393
394-- | A total variant of 'init'.
395--
396-- @since 3.2.0.0
397safeInit :: [a] -> [a]
398safeInit []     = []
399safeInit [_]    = []
400safeInit (x:xs) = x : safeInit xs
401
402equating :: Eq a => (b -> a) -> b -> b -> Bool
403equating p x y = p x == p y
404
405-- | Lower case string
406--
407-- >>> lowercase "Foobar"
408-- "foobar"
409lowercase :: String -> String
410lowercase = map toLower
411
412-- | Ascii characters
413isAscii :: Char -> Bool
414isAscii c = fromEnum c < 0x80
415
416-- | Ascii letters.
417isAsciiAlpha :: Char -> Bool
418isAsciiAlpha c = ('a' <= c && c <= 'z')
419    || ('A' <= c && c <= 'Z')
420
421-- | Ascii letters and digits.
422--
423-- >>> isAsciiAlphaNum 'a'
424-- True
425--
426-- >>> isAsciiAlphaNum 'ä'
427-- False
428--
429isAsciiAlphaNum :: Char -> Bool
430isAsciiAlphaNum c = isAscii c && isAlphaNum c
431
432unintersperse :: Char -> String -> [String]
433unintersperse mark = unfoldr unintersperse1 where
434  unintersperse1 str
435    | null str = Nothing
436    | otherwise =
437        let (this, rest) = break (== mark) str in
438        Just (this, safeTail rest)
439
440-- | Like 'break', but with 'Maybe' predicate
441--
442-- >>> breakMaybe (readMaybe :: String -> Maybe Int) ["foo", "bar", "1", "2", "quu"]
443-- (["foo","bar"],Just (1,["2","quu"]))
444--
445-- >>> breakMaybe (readMaybe :: String -> Maybe Int) ["foo", "bar"]
446-- (["foo","bar"],Nothing)
447--
448-- @since 2.2
449--
450breakMaybe :: (a -> Maybe b) -> [a] -> ([a], Maybe (b, [a]))
451breakMaybe f = go id where
452    go !acc []     = (acc [], Nothing)
453    go !acc (x:xs) = case f x of
454        Nothing -> go (acc . (x:)) xs
455        Just b  -> (acc [], Just (b, xs))
456
457-- | Like 'span' but with 'Maybe' predicate
458--
459-- >>> spanMaybe listToMaybe [[1,2],[3],[],[4,5],[6,7]]
460-- ([1,3],[[],[4,5],[6,7]])
461--
462-- >>> spanMaybe (readMaybe :: String -> Maybe Int) ["1", "2", "foo"]
463-- ([1,2],["foo"])
464--
465-- @since 2.2
466--
467spanMaybe :: (a -> Maybe b) -> [a] -> ([b],[a])
468spanMaybe _ xs@[] =  ([], xs)
469spanMaybe p xs@(x:xs') = case p x of
470    Just y  -> let (ys, zs) = spanMaybe p xs' in (y : ys, zs)
471    Nothing -> ([], xs)
472
473-- | 'unfoldr' with monadic action.
474--
475-- >>> take 5 $ unfoldrM (\b r -> Just (r + b, b + 1)) (1 :: Int) 2
476-- [3,4,5,6,7]
477--
478-- @since 2.2
479--
480unfoldrM :: Monad m => (b -> m (Maybe (a, b))) -> b -> m [a]
481unfoldrM f = go where
482    go b = do
483        m <- f b
484        case m of
485            Nothing      -> return []
486            Just (a, b') -> liftM (a :) (go b')
487
488-- | The opposite of 'snoc', which is the reverse of 'cons'
489--
490-- Example:
491--
492-- >>> unsnoc [1, 2, 3]
493-- Just ([1,2],3)
494--
495-- >>> unsnoc []
496-- Nothing
497--
498-- @since 3.2.0.0
499--
500unsnoc :: [a] -> Maybe ([a], a)
501unsnoc []     = Nothing
502unsnoc (x:xs) = Just (unsnocNE (x :| xs))
503
504-- | Like 'unsnoc', but for 'NonEmpty' so without the 'Maybe'
505--
506-- Example:
507--
508-- >>> unsnocNE (1 :| [2, 3])
509-- ([1,2],3)
510--
511-- >>> unsnocNE (1 :| [])
512-- ([],1)
513--
514-- @since 3.2.0.0
515--
516unsnocNE :: NonEmpty a -> ([a], a)
517unsnocNE (x:|xs) = go x xs where
518    go y []     = ([], y)
519    go y (z:zs) = let ~(ws, w) = go z zs in (y : ws, w)
520
521-------------------------------------------------------------------------------
522-- Triples
523-------------------------------------------------------------------------------
524
525-- | @since 3.4.0.0
526fstOf3 :: (a,b,c) -> a
527fstOf3 (a,_,_) = a
528
529-- | @since 3.4.0.0
530sndOf3 :: (a,b,c) -> b
531sndOf3 (_,b,_) = b
532
533-- | @since 3.4.0.0
534trdOf3 :: (a,b,c) -> c
535trdOf3 (_,_,c) = c
536
537-- ------------------------------------------------------------
538-- * FilePath stuff
539-- ------------------------------------------------------------
540
541-- | 'isAbsoluteOnAnyPlatform' and 'isRelativeOnAnyPlatform' are like
542-- 'System.FilePath.isAbsolute' and 'System.FilePath.isRelative' but have
543-- platform independent heuristics.
544-- The System.FilePath exists in two versions, Windows and Posix. The two
545-- versions don't agree on what is a relative path and we don't know if we're
546-- given Windows or Posix paths.
547-- This results in false positives when running on Posix and inspecting
548-- Windows paths, like the hackage server does.
549-- System.FilePath.Posix.isAbsolute \"C:\\hello\" == False
550-- System.FilePath.Windows.isAbsolute \"/hello\" == False
551-- This means that we would treat paths that start with \"/\" to be absolute.
552-- On Posix they are indeed absolute, while on Windows they are not.
553--
554-- The portable versions should be used when we might deal with paths that
555-- are from another OS than the host OS. For example, the Hackage Server
556-- deals with both Windows and Posix paths while performing the
557-- PackageDescription checks. In contrast, when we run 'cabal configure' we
558-- do expect the paths to be correct for our OS and we should not have to use
559-- the platform independent heuristics.
560isAbsoluteOnAnyPlatform :: FilePath -> Bool
561-- C:\\directory
562isAbsoluteOnAnyPlatform (drive:':':'\\':_) = isAlpha drive
563isAbsoluteOnAnyPlatform (drive:':':'/':_)  = isAlpha drive
564-- UNC
565isAbsoluteOnAnyPlatform ('\\':'\\':_) = True
566-- Posix root
567isAbsoluteOnAnyPlatform ('/':_) = True
568isAbsoluteOnAnyPlatform _ = False
569
570-- | @isRelativeOnAnyPlatform = not . 'isAbsoluteOnAnyPlatform'@
571isRelativeOnAnyPlatform :: FilePath -> Bool
572isRelativeOnAnyPlatform = not . isAbsoluteOnAnyPlatform
573
574-- $setup
575-- >>> import Data.Maybe
576-- >>> import Text.Read
577