1{-
2    Copyright 2012-2021 Vidar Holen
3
4    This file is part of ShellCheck.
5    https://www.shellcheck.net
6
7    ShellCheck is free software: you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation, either version 3 of the License, or
10    (at your option) any later version.
11
12    ShellCheck is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <https://www.gnu.org/licenses/>.
19-}
20{-# LANGUAGE TemplateHaskell #-}
21module ShellCheck.ASTLib where
22
23import ShellCheck.AST
24import ShellCheck.Regex
25
26import Control.Monad.Writer
27import Control.Monad
28import Data.Char
29import Data.Functor
30import Data.Functor.Identity
31import Data.List
32import Data.Maybe
33import qualified Data.Map as Map
34import Numeric (showHex)
35
36import Test.QuickCheck
37
38arguments (T_SimpleCommand _ _ (cmd:args)) = args
39
40-- Is this a type of loop?
41isLoop t = case t of
42        T_WhileExpression {} -> True
43        T_UntilExpression {} -> True
44        T_ForIn {} -> True
45        T_ForArithmetic {} -> True
46        T_SelectIn {}  -> True
47        _ -> False
48
49-- Will this split into multiple words when used as an argument?
50willSplit x =
51  case x of
52    T_DollarBraced {} -> True
53    T_DollarExpansion {} -> True
54    T_Backticked {} -> True
55    T_BraceExpansion {} -> True
56    T_Glob {} -> True
57    T_Extglob {} -> True
58    T_DoubleQuoted _ l -> any willBecomeMultipleArgs l
59    T_NormalWord _ l -> any willSplit l
60    _ -> False
61
62isGlob t = case t of
63    T_Extglob {} -> True
64    T_Glob {} -> True
65    T_NormalWord _ l -> any isGlob l || hasSplitRange l
66    _ -> False
67  where
68    -- foo[x${var}y] gets parsed as foo,[,x,$var,y],
69    -- so check if there's such an interval
70    hasSplitRange l =
71        let afterBracket = dropWhile (not . isHalfOpenRange) l
72        in any isClosingRange afterBracket
73
74    isHalfOpenRange t =
75        case t of
76            T_Literal _ "[" -> True
77            _ -> False
78
79    isClosingRange t =
80        case t of
81            T_Literal _ str -> ']' `elem` str
82            _ -> False
83
84
85-- Is this shell word a constant?
86isConstant token =
87    case token of
88        -- This ignores some cases like ~"foo":
89        T_NormalWord _ (T_Literal _ ('~':_) : _)  -> False
90        T_NormalWord _ l   -> all isConstant l
91        T_DoubleQuoted _ l -> all isConstant l
92        T_SingleQuoted _ _ -> True
93        T_Literal _ _ -> True
94        _ -> False
95
96-- Is this an empty literal?
97isEmpty token =
98    case token of
99        T_NormalWord _ l   -> all isEmpty l
100        T_DoubleQuoted _ l -> all isEmpty l
101        T_SingleQuoted _ "" -> True
102        T_Literal _ "" -> True
103        _ -> False
104
105-- Quick&lazy oversimplification of commands, throwing away details
106-- and returning a list like  ["find", ".", "-name", "${VAR}*" ].
107oversimplify token =
108    case token of
109        (T_NormalWord _ l) -> [concat (concatMap oversimplify l)]
110        (T_DoubleQuoted _ l) -> [concat (concatMap oversimplify l)]
111        (T_SingleQuoted _ s) -> [s]
112        (T_DollarBraced _ _ _) -> ["${VAR}"]
113        (T_DollarArithmetic _ _) -> ["${VAR}"]
114        (T_DollarExpansion _ _) -> ["${VAR}"]
115        (T_Backticked _ _) -> ["${VAR}"]
116        (T_Glob _ s) -> [s]
117        (T_Pipeline _ _ [x]) -> oversimplify x
118        (T_Literal _ x) -> [x]
119        (T_ParamSubSpecialChar _ x) -> [x]
120        (T_SimpleCommand _ vars words) -> concatMap oversimplify words
121        (T_Redirecting _ _ foo) -> oversimplify foo
122        (T_DollarSingleQuoted _ s) -> [s]
123        (T_Annotation _ _ s) -> oversimplify s
124        -- Workaround for let "foo = bar" parsing
125        (TA_Sequence _ [TA_Expansion _ v]) -> concatMap oversimplify v
126        _ -> []
127
128
129-- Turn a SimpleCommand foo -avz --bar=baz into args "a", "v", "z", "bar",
130-- each in a tuple of (token, stringFlag). Non-flag arguments are added with
131-- stringFlag == "".
132getFlagsUntil stopCondition (T_SimpleCommand _ _ (_:args)) =
133    let tokenAndText = map (\x -> (x, concat $ oversimplify x)) args
134        (flagArgs, rest) = break (stopCondition . snd) tokenAndText
135    in
136        concatMap flag flagArgs ++ map (\(t, _) -> (t, "")) rest
137  where
138    flag (x, '-':'-':arg) = [ (x, takeWhile (/= '=') arg) ]
139    flag (x, '-':args) = map (\v -> (x, [v])) args
140    flag (x, _) = [ (x, "") ]
141getFlagsUntil _ _ = error "Internal shellcheck error, please report! (getFlags on non-command)"
142
143-- Get all flags in a GNU way, up until --
144getAllFlags :: Token -> [(Token, String)]
145getAllFlags = getFlagsUntil (== "--")
146-- Get all flags in a BSD way, up until first non-flag argument or --
147getLeadingFlags = getFlagsUntil (\x -> x == "--" || (not $ "-" `isPrefixOf` x))
148
149-- Check if a command has a flag.
150hasFlag cmd str = str `elem` (map snd $ getAllFlags cmd)
151
152-- Is this token a word that starts with a dash?
153isFlag token =
154    case getWordParts token of
155        T_Literal _ ('-':_) : _ -> True
156        _ -> False
157
158-- Is this token a flag where the - is unquoted?
159isUnquotedFlag token = fromMaybe False $ do
160    str <- getLeadingUnquotedString token
161    return $ "-" `isPrefixOf` str
162
163-- getGnuOpts "erd:u:" will parse a list of arguments tokens like `read`
164--     -re -d : -u 3 bar
165-- into
166--     Just [("r", (-re, -re)), ("e", (-re, -re)), ("d", (-d,:)), ("u", (-u,3)), ("", (bar,bar))]
167--
168-- Each string flag maps to a tuple of (flag, argument), where argument=flag if it
169-- doesn't take a specific one.
170--
171-- Any unrecognized flag will result in Nothing. The exception is if arbitraryLongOpts
172-- is set, in which case --anything will map to "anything".
173getGnuOpts :: String -> [Token] -> Maybe [(String, (Token, Token))]
174getGnuOpts str args = getOpts (True, False) str [] args
175
176-- As above, except the first non-arg string will treat the rest as arguments
177getBsdOpts :: String -> [Token] -> Maybe [(String, (Token, Token))]
178getBsdOpts str args = getOpts (False, False) str [] args
179
180-- Tests for this are in Commands.hs where it's more frequently used
181getOpts ::
182    -- Behavioral config: gnu style, allow arbitrary long options
183    (Bool, Bool)
184    -- A getopts style string
185    -> String
186    -- List of long options and whether they take arguments
187    -> [(String, Bool)]
188    -- List of arguments (excluding command)
189    -> [Token]
190    -- List of flags to tuple of (optionToken, valueToken)
191    -> Maybe [(String, (Token, Token))]
192
193getOpts (gnu, arbitraryLongOpts) string longopts args = process args
194  where
195    flagList (c:':':rest) = ([c], True) : flagList rest
196    flagList (c:rest)     = ([c], False) : flagList rest
197    flagList []           = longopts
198    flagMap = Map.fromList $ ("", False) : flagList string
199
200    process [] = return []
201    process (token:rest) = do
202        case getLiteralStringDef "\0" token of
203            "--" -> return $ listToArgs rest
204            '-':'-':word -> do
205                let (name, arg) = span (/= '=') word
206                needsArg <-
207                    if arbitraryLongOpts
208                    then return $ Map.findWithDefault False name flagMap
209                    else Map.lookup name flagMap
210
211                if needsArg && null arg
212                  then
213                    case rest of
214                        (arg:rest2) -> do
215                            more <- process rest2
216                            return $ (name, (token, arg)) : more
217                        _ -> fail "Missing arg"
218                  else do
219                    more <- process rest
220                    -- Consider splitting up token to get arg
221                    return $ (name, (token, token)) : more
222            '-':opts -> shortToOpts opts token rest
223            arg ->
224                if gnu
225                then do
226                    more <- process rest
227                    return $ ("", (token, token)):more
228                else return $ listToArgs (token:rest)
229
230    shortToOpts opts token args =
231        case opts of
232            c:rest -> do
233                needsArg <- Map.lookup [c] flagMap
234                case () of
235                    _ | needsArg && null rest -> do
236                        (next:restArgs) <- return args
237                        more <- process restArgs
238                        return $ ([c], (token, next)):more
239                    _ | needsArg -> do
240                        more <- process args
241                        return $ ([c], (token, token)):more
242                    _ -> do
243                        more <- shortToOpts rest token args
244                        return $ ([c], (token, token)):more
245            [] -> process args
246
247    listToArgs = map (\x -> ("", (x, x)))
248
249
250-- Generic getOpts that doesn't rely on a format string, but may also be inaccurate.
251-- This provides a best guess interpretation instead of failing when new options are added.
252--
253--    "--" is treated as end of arguments
254--    "--anything[=foo]" is treated as a long option without argument
255--    "-any" is treated as -a -n -y, with the next arg as an option to -y unless it starts with -
256--    anything else is an argument
257getGenericOpts :: [Token] -> [(String, (Token, Token))]
258getGenericOpts = process
259  where
260    process (token:rest) =
261        case getLiteralStringDef "\0" token of
262            "--" -> map (\c -> ("", (c,c))) rest
263            '-':'-':word -> (takeWhile (`notElem` "\0=") word, (token, token)) : process rest
264            '-':optString ->
265                let opts = takeWhile (/= '\0') optString
266                in
267                    case rest of
268                        next:_ | "-" `isPrefixOf` getLiteralStringDef "\0" next  ->
269                            map (\c -> ([c], (token, token))) opts ++ process rest
270                        next:remainder ->
271                            case reverse opts of
272                                last:initial ->
273                                    map (\c -> ([c], (token, token))) (reverse initial)
274                                        ++ [([last], (token, next))]
275                                        ++ process remainder
276                                [] -> process remainder
277                        [] -> map (\c -> ([c], (token, token))) opts
278            _ -> ("", (token, token)) : process rest
279    process [] = []
280
281
282-- Is this an expansion of multiple items of an array?
283isArrayExpansion (T_DollarBraced _ _ l) =
284    let string = concat $ oversimplify l in
285        "@" `isPrefixOf` string ||
286            not ("#" `isPrefixOf` string) && "[@]" `isInfixOf` string
287isArrayExpansion _ = False
288
289-- Is it possible that this arg becomes multiple args?
290mayBecomeMultipleArgs t = willBecomeMultipleArgs t || f False t
291  where
292    f quoted (T_DollarBraced _ _ l) =
293        let string = concat $ oversimplify l in
294            not quoted || "!" `isPrefixOf` string
295    f quoted (T_DoubleQuoted _ parts) = any (f True) parts
296    f quoted (T_NormalWord _ parts) = any (f quoted) parts
297    f _ _ = False
298
299-- Is it certain that this word will becomes multiple words?
300willBecomeMultipleArgs t = willConcatInAssignment t || f t
301  where
302    f T_Extglob {} = True
303    f T_Glob {} = True
304    f T_BraceExpansion {} = True
305    f (T_NormalWord _ parts) = any f parts
306    f _ = False
307
308-- This does token cause implicit concatenation in assignments?
309willConcatInAssignment token =
310    case token of
311        t@T_DollarBraced {} -> isArrayExpansion t
312        (T_DoubleQuoted _ parts) -> any willConcatInAssignment parts
313        (T_NormalWord _ parts) -> any willConcatInAssignment parts
314        _ -> False
315
316-- Maybe get the literal string corresponding to this token
317getLiteralString :: Token -> Maybe String
318getLiteralString = getLiteralStringExt (const Nothing)
319
320-- Definitely get a literal string, with a given default for all non-literals
321getLiteralStringDef :: String -> Token -> String
322getLiteralStringDef x = runIdentity . getLiteralStringExt (const $ return x)
323
324-- Definitely get a literal string, skipping over all non-literals
325onlyLiteralString :: Token -> String
326onlyLiteralString = getLiteralStringDef ""
327
328-- Maybe get a literal string, but only if it's an unquoted argument.
329getUnquotedLiteral (T_NormalWord _ list) =
330    concat <$> mapM str list
331  where
332    str (T_Literal _ s) = return s
333    str _ = Nothing
334getUnquotedLiteral _ = Nothing
335
336isQuotes t =
337    case t of
338        T_DoubleQuoted {} -> True
339        T_SingleQuoted {} -> True
340        _ -> False
341
342-- Get the last unquoted T_Literal in a word like "${var}foo"THIS
343-- or nothing if the word does not end in an unquoted literal.
344getTrailingUnquotedLiteral :: Token -> Maybe Token
345getTrailingUnquotedLiteral t =
346    case t of
347        (T_NormalWord _ list@(_:_)) ->
348            from (last list)
349        _ -> Nothing
350  where
351    from t =
352        case t of
353            T_Literal {} -> return t
354            _ -> Nothing
355
356-- Get the leading, unquoted, literal string of a token (if any).
357getLeadingUnquotedString :: Token -> Maybe String
358getLeadingUnquotedString t =
359    case t of
360        T_NormalWord _ ((T_Literal _ s) : rest) -> return $ s ++ from rest
361        _ -> Nothing
362  where
363    from ((T_Literal _ s):rest) = s ++ from rest
364    from _ = ""
365
366-- Maybe get the literal string of this token and any globs in it.
367getGlobOrLiteralString = getLiteralStringExt f
368  where
369    f (T_Glob _ str) = return str
370    f _ = Nothing
371
372-- Maybe get the literal value of a token, using a custom function
373-- to map unrecognized Tokens into strings.
374getLiteralStringExt :: Monad m => (Token -> m String) -> Token -> m String
375getLiteralStringExt more = g
376  where
377    allInList = fmap concat . mapM g
378    g (T_DoubleQuoted _ l) = allInList l
379    g (T_DollarDoubleQuoted _ l) = allInList l
380    g (T_NormalWord _ l) = allInList l
381    g (TA_Expansion _ l) = allInList l
382    g (T_SingleQuoted _ s) = return s
383    g (T_Literal _ s) = return s
384    g (T_ParamSubSpecialChar _ s) = return s
385    g (T_DollarSingleQuoted _ s) = return $ decodeEscapes s
386    g x = more x
387
388    -- Bash style $'..' decoding
389    decodeEscapes ('\\':c:cs) =
390        case c of
391            'a' -> '\a' : rest
392            'b' -> '\b' : rest
393            'e' -> '\x1B' : rest
394            'f' -> '\f' : rest
395            'n' -> '\n' : rest
396            'r' -> '\r' : rest
397            't' -> '\t' : rest
398            'v' -> '\v' : rest
399            '\'' -> '\'' : rest
400            '"' -> '"' : rest
401            '\\' -> '\\' : rest
402            'x' ->
403                case cs of
404                    (x:y:more) ->
405                        if isHexDigit x && isHexDigit y
406                        then chr (16*(digitToInt x) + (digitToInt y)) : rest
407                        else '\\':c:rest
408            _ | isOctDigit c ->
409                let digits = take 3 $ takeWhile isOctDigit (c:cs)
410                    num = parseOct digits
411                in (if num < 256 then chr num else '?') : rest
412            _ -> '\\' : c : rest
413      where
414        rest = decodeEscapes cs
415        parseOct = f 0
416          where
417            f n "" = n
418            f n (c:rest) = f (n * 8 + digitToInt c) rest
419    decodeEscapes (c:cs) = c : decodeEscapes cs
420    decodeEscapes [] = []
421
422-- Is this token a string literal?
423isLiteral t = isJust $ getLiteralString t
424
425-- Escape user data for messages.
426-- Messages generally avoid repeating user data, but sometimes it's helpful.
427e4m = escapeForMessage
428escapeForMessage :: String -> String
429escapeForMessage str = concatMap f str
430  where
431    f '\\' = "\\\\"
432    f '\n' = "\\n"
433    f '\r' = "\\r"
434    f '\t' = "\\t"
435    f '\x1B' = "\\e"
436    f c =
437        if shouldEscape c
438        then
439            if ord c < 256
440            then "\\x" ++ (pad0 2 $ toHex c)
441            else "\\U" ++ (pad0 4 $ toHex c)
442        else [c]
443
444    shouldEscape c =
445        (not $ isPrint c)
446        || (not (isAscii c) && not (isLetter c))
447
448    pad0 :: Int -> String -> String
449    pad0 n s =
450        let l = length s in
451            if l < n
452            then (replicate (n-l) '0') ++ s
453            else s
454    toHex :: Char -> String
455    toHex c = map toUpper $ showHex (ord c) ""
456
457-- Turn a NormalWord like foo="bar $baz" into a series of constituent elements like [foo=,bar ,$baz]
458getWordParts (T_NormalWord _ l)   = concatMap getWordParts l
459getWordParts (T_DoubleQuoted _ l) = l
460-- TA_Expansion is basically T_NormalWord for arithmetic expressions
461getWordParts (TA_Expansion _ l)   = concatMap getWordParts l
462getWordParts other                = [other]
463
464-- Return a list of NormalWords that would result from brace expansion
465braceExpand (T_NormalWord id list) = take 1000 $ do
466    items <- mapM part list
467    return $ T_NormalWord id items
468  where
469    part (T_BraceExpansion id items) = do
470        item <- items
471        braceExpand item
472    part x = return x
473
474-- Maybe get a SimpleCommand from immediate wrappers like T_Redirections
475getCommand t =
476    case t of
477        T_Redirecting _ _ w -> getCommand w
478        T_SimpleCommand _ _ (w:_) -> return t
479        T_Annotation _ _ t -> getCommand t
480        _ -> Nothing
481
482-- Maybe get the command name string of a token representing a command
483getCommandName :: Token -> Maybe String
484getCommandName = fst . getCommandNameAndToken False
485
486-- Maybe get the name+arguments of a command.
487getCommandArgv t = do
488    (T_SimpleCommand _ _ args@(_:_)) <- getCommand t
489    return args
490
491-- Get the command name token from a command, i.e.
492-- the token representing 'ls' in 'ls -la 2> foo'.
493-- If it can't be determined, return the original token.
494getCommandTokenOrThis = snd . getCommandNameAndToken False
495
496-- Given a command, get the string and token that represents the command name.
497-- If direct, return the actual command (e.g. exec in 'exec ls')
498-- If not, return the logical command (e.g. 'ls' in 'exec ls')
499
500getCommandNameAndToken :: Bool -> Token -> (Maybe String, Token)
501getCommandNameAndToken direct t = fromMaybe (Nothing, t) $ do
502    cmd@(T_SimpleCommand _ _ (w:rest)) <- getCommand t
503    s <- getLiteralString w
504    return $ fromMaybe (Just s, w) $ do
505        guard $ not direct
506        actual <- getEffectiveCommandToken s cmd rest
507        return (getLiteralString actual, actual)
508  where
509    getEffectiveCommandToken str cmd args =
510        let
511            firstArg = do
512                arg <- listToMaybe args
513                guard . not $ isFlag arg
514                return arg
515        in
516            case str of
517                "busybox" -> firstArg
518                "builtin" -> firstArg
519                "command" -> firstArg
520                "run" -> firstArg -- Used by bats
521                "exec" -> do
522                    opts <- getBsdOpts "cla:" args
523                    (_, (t, _)) <- find (null . fst) opts
524                    return t
525                _ -> fail ""
526
527-- If a command substitution is a single command, get its name.
528--  $(date +%s) = Just "date"
529getCommandNameFromExpansion :: Token -> Maybe String
530getCommandNameFromExpansion t =
531    case t of
532        T_DollarExpansion _ [c] -> extract c
533        T_Backticked _ [c] -> extract c
534        T_DollarBraceCommandExpansion _ [c] -> extract c
535        _ -> Nothing
536  where
537    extract (T_Pipeline _ _ [cmd]) = getCommandName cmd
538    extract _ = Nothing
539
540-- Get the basename of a token representing a command
541getCommandBasename = fmap basename . getCommandName
542
543basename = reverse . takeWhile (/= '/') . reverse
544
545isAssignment t =
546    case t of
547        T_Redirecting _ _ w -> isAssignment w
548        T_SimpleCommand _ (w:_) [] -> True
549        T_Assignment {} -> True
550        T_Annotation _ _ w -> isAssignment w
551        _ -> False
552
553isOnlyRedirection t =
554    case t of
555        T_Pipeline _ _ [x] -> isOnlyRedirection x
556        T_Annotation _ _ w -> isOnlyRedirection w
557        T_Redirecting _ (_:_) c -> isOnlyRedirection c
558        T_SimpleCommand _ [] [] -> True
559        _ -> False
560
561isFunction t = case t of T_Function {} -> True; _ -> False
562
563-- Bats tests are functions for the purpose of 'local' and such
564isFunctionLike t =
565    case t of
566        T_Function {} -> True
567        T_BatsTest {} -> True
568        _ -> False
569
570
571isBraceExpansion t = case t of T_BraceExpansion {} -> True; _ -> False
572
573-- Get the lists of commands from tokens that contain them, such as
574-- the conditions and bodies of while loops or branches of if statements.
575getCommandSequences :: Token -> [[Token]]
576getCommandSequences t =
577    case t of
578        T_Script _ _ cmds -> [cmds]
579        T_BraceGroup _ cmds -> [cmds]
580        T_Subshell _ cmds -> [cmds]
581        T_WhileExpression _ cond cmds -> [cond, cmds]
582        T_UntilExpression _ cond cmds -> [cond, cmds]
583        T_ForIn _ _ _ cmds -> [cmds]
584        T_ForArithmetic _ _ _ _ cmds -> [cmds]
585        T_IfExpression _ thens elses -> (concatMap (\(a,b) -> [a,b]) thens) ++ [elses]
586        T_Annotation _ _ t -> getCommandSequences t
587
588        T_DollarExpansion _ cmds -> [cmds]
589        T_DollarBraceCommandExpansion _ cmds -> [cmds]
590        T_Backticked _ cmds -> [cmds]
591        _ -> []
592
593-- Get a list of names of associative arrays
594getAssociativeArrays t =
595    nub . execWriter $ doAnalysis f t
596  where
597    f :: Token -> Writer [String] ()
598    f t@T_SimpleCommand {} = sequence_ $ do
599        name <- getCommandName t
600        let assocNames = ["declare","local","typeset"]
601        guard $ name `elem` assocNames
602        let flags = getAllFlags t
603        guard $ "A" `elem` map snd flags
604        let args = [arg | (arg, "") <- flags]
605        let names = mapMaybe (getLiteralStringExt nameAssignments) args
606        return $ tell names
607    f _ = return ()
608
609    nameAssignments t =
610        case t of
611            T_Assignment _ _ name _ _ -> return name
612            _ -> Nothing
613
614-- A Pseudoglob is a wildcard pattern used for checking if a match can succeed.
615-- For example, [[ $(cmd).jpg == [a-z] ]] will give the patterns *.jpg and ?, which
616-- can be proven never to match.
617data PseudoGlob = PGAny | PGMany | PGChar Char
618    deriving (Eq, Show)
619
620-- Turn a word into a PG pattern, replacing all unknown/runtime values with
621-- PGMany.
622wordToPseudoGlob :: Token -> [PseudoGlob]
623wordToPseudoGlob = fromMaybe [PGMany] . wordToPseudoGlob' False
624
625-- Turn a word into a PG pattern, but only if we can preserve
626-- exact semantics.
627wordToExactPseudoGlob :: Token -> Maybe [PseudoGlob]
628wordToExactPseudoGlob = wordToPseudoGlob' True
629
630wordToPseudoGlob' :: Bool -> Token -> Maybe [PseudoGlob]
631wordToPseudoGlob' exact word =
632    simplifyPseudoGlob <$> toGlob word
633  where
634    toGlob :: Token -> Maybe [PseudoGlob]
635    toGlob word =
636        case word of
637            T_NormalWord _ (T_Literal _ ('~':str):rest) -> do
638                guard $ not exact
639                let this = (PGMany : (map PGChar $ dropWhile (/= '/') str))
640                tail <- concat <$> (mapM f $ concatMap getWordParts rest)
641                return $ this ++ tail
642            _ -> concat <$> (mapM f $ getWordParts word)
643
644    f x = case x of
645        T_Literal _ s      -> return $ map PGChar s
646        T_SingleQuoted _ s -> return $ map PGChar s
647        T_Glob _ "?"       -> return [PGAny]
648        T_Glob _ "*"       -> return [PGMany]
649        T_Glob _ ('[':_) | not exact -> return [PGAny]
650        _ -> if exact then fail "" else return [PGMany]
651
652
653-- Reorder a PseudoGlob for more efficient matching, e.g.
654-- f?*?**g -> f??*g
655simplifyPseudoGlob :: [PseudoGlob] -> [PseudoGlob]
656simplifyPseudoGlob = f
657  where
658    f [] = []
659    f (x@(PGChar _) : rest ) = x : f rest
660    f list =
661        let (anys, rest) = span (\x -> x == PGMany || x == PGAny) list in
662            order anys ++ f rest
663
664    order s = let (any, many) = partition (== PGAny) s in
665        any ++ take 1 many
666
667-- Check whether the two patterns can ever overlap.
668pseudoGlobsCanOverlap :: [PseudoGlob] -> [PseudoGlob] -> Bool
669pseudoGlobsCanOverlap = matchable
670  where
671    matchable x@(xf:xs) y@(yf:ys) =
672        case (xf, yf) of
673            (PGMany, _) -> matchable x ys || matchable xs y
674            (_, PGMany) -> matchable x ys || matchable xs y
675            (PGAny, _) -> matchable xs ys
676            (_, PGAny) -> matchable xs ys
677            (_, _) -> xf == yf && matchable xs ys
678
679    matchable [] [] = True
680    matchable (PGMany : rest) [] = matchable rest []
681    matchable (_:_) [] = False
682    matchable [] r = matchable r []
683
684-- Check whether the first pattern always overlaps the second.
685pseudoGlobIsSuperSetof :: [PseudoGlob] -> [PseudoGlob] -> Bool
686pseudoGlobIsSuperSetof = matchable
687  where
688    matchable x@(xf:xs) y@(yf:ys) =
689        case (xf, yf) of
690            (PGMany, PGMany) -> matchable x ys
691            (PGMany, _) -> matchable x ys || matchable xs y
692            (_, PGMany) -> False
693            (PGAny, _) -> matchable xs ys
694            (_, PGAny) -> False
695            (_, _) -> xf == yf && matchable xs ys
696
697    matchable [] [] = True
698    matchable (PGMany : rest) [] = matchable rest []
699    matchable _ _ = False
700
701wordsCanBeEqual x y = pseudoGlobsCanOverlap (wordToPseudoGlob x) (wordToPseudoGlob y)
702
703-- Is this an expansion that can be quoted,
704-- e.g. $(foo) `foo` $foo (but not {foo,})?
705isQuoteableExpansion t = case t of
706    T_DollarBraced {} -> True
707    _ -> isCommandSubstitution t
708
709isCommandSubstitution t = case t of
710    T_DollarExpansion {} -> True
711    T_DollarBraceCommandExpansion {} -> True
712    T_Backticked {} -> True
713    _ -> False
714
715-- Is this an expansion that results in a simple string?
716isStringExpansion t = isCommandSubstitution t || case t of
717    T_DollarArithmetic {} -> True
718    T_DollarBraced {} -> not (isArrayExpansion t)
719    _ -> False
720
721-- Is this a T_Annotation that ignores a specific code?
722isAnnotationIgnoringCode code t =
723    case t of
724        T_Annotation _ anns _ -> any hasNum anns
725        _ -> False
726  where
727    hasNum (DisableComment from to) = code >= from && code < to
728    hasNum _                   = False
729
730prop_executableFromShebang1 = executableFromShebang "/bin/sh" == "sh"
731prop_executableFromShebang2 = executableFromShebang "/bin/bash" == "bash"
732prop_executableFromShebang3 = executableFromShebang "/usr/bin/env ksh" == "ksh"
733prop_executableFromShebang4 = executableFromShebang "/usr/bin/env -S foo=bar bash -x" == "bash"
734prop_executableFromShebang5 = executableFromShebang "/usr/bin/env --split-string=bash -x" == "bash"
735prop_executableFromShebang6 = executableFromShebang "/usr/bin/env --split-string=foo=bar bash -x" == "bash"
736prop_executableFromShebang7 = executableFromShebang "/usr/bin/env --split-string bash -x" == "bash"
737prop_executableFromShebang8 = executableFromShebang "/usr/bin/env --split-string foo=bar bash -x" == "bash"
738prop_executableFromShebang9 = executableFromShebang "/usr/bin/env foo=bar dash" == "dash"
739prop_executableFromShebang10 = executableFromShebang "/bin/busybox sh" == "ash"
740prop_executableFromShebang11 = executableFromShebang "/bin/busybox ash" == "ash"
741
742-- Get the shell executable from a string like '/usr/bin/env bash'
743executableFromShebang :: String -> String
744executableFromShebang = shellFor
745  where
746    re = mkRegex "/env +(-S|--split-string=?)? *(.*)"
747    shellFor s | s `matches` re =
748        case matchRegex re s of
749            Just [flag, shell] -> fromEnvArgs (words shell)
750            _ -> ""
751    shellFor sb =
752        case words sb of
753            [] -> ""
754            [x] -> basename x
755            (first:second:args) | basename first == "busybox" ->
756                case basename second of
757                   "sh" -> "ash" -- busybox sh is ash
758                   x -> x
759            (first:args) | basename first == "env" ->
760                fromEnvArgs args
761            (first:_) -> basename first
762
763    fromEnvArgs args = fromMaybe "" $ find (notElem '=') $ skipFlags args
764    basename s = reverse . takeWhile (/= '/') . reverse $ s
765    skipFlags = dropWhile ("-" `isPrefixOf`)
766
767return []
768runTests = $quickCheckAll
769