1--
2-- Copyright (c) 2013-2019 Nicola Bonelli <nicola@pfq.io>
3--
4-- This program is free software; you can redistribute it and/or modify
5-- it under the terms of the GNU General Public License as published by
6-- the Free Software Foundation; either version 2 of the License, or
7-- (at your option) any later version.
8--
9-- This program is distributed in the hope that it will be useful,
10-- but WITHOUT ANY WARRANTY; without even the implied warranty of
11-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12-- GNU General Public License for more details.
13--
14-- You should have received a copy of the GNU General Public License
15-- along with this program; if not, write to the Free Software
16-- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17--
18
19{-# LANGUAGE RecordWildCards #-}
20
21module Main where
22
23import Data.List
24import Data.List.Split (chunksOf)
25import qualified Data.Map as M
26import Data.Maybe
27import Data.Char
28import Data.Data()
29
30import Data.IORef
31import Data.Version(showVersion)
32import Data.Function
33import qualified Data.Set as Set
34import Paths_cgrep
35
36import Control.Exception as E
37import Control.Concurrent
38import Control.Concurrent.Async
39import Control.Monad.STM
40import Control.Concurrent.STM.TChan
41
42import Control.Monad
43import Control.Monad.Trans
44import Control.Monad.Trans.Except
45import Control.Monad.Trans.Reader
46import Control.Applicative
47
48import System.Console.CmdArgs
49import System.Directory
50import System.FilePath ((</>))
51import System.Environment
52import System.PosixCompat.Files as PosixCompat
53import System.IO
54import System.Exit
55import System.Process (readProcess, runProcess, waitForProcess)
56
57import CGrep.CGrep
58import CGrep.Lang
59import CGrep.Output
60import CGrep.Common
61import CGrep.Parser.WildCard
62
63import CmdOptions
64import Options
65import Util
66import Debug
67import Config
68import Reader
69
70import qualified Data.ByteString as B
71import qualified Data.ByteString.Char8 as C
72import qualified Codec.Binary.UTF8.String as UC
73
74import Data.Tuple.Extra
75
76fileFilter :: Options -> [Lang] -> FilePath -> Bool
77fileFilter opts langs filename = maybe False (liftA2 (||) (const $ null langs) (`elem` langs)) (getFileLang opts filename)
78
79
80getFilesMagic :: [FilePath] -> IO [String]
81getFilesMagic filenames = lines <$> readProcess "/usr/bin/file" ("-b" : filenames) []
82
83
84-- push file names in Chan...
85
86withRecursiveContents :: Options -> FilePath -> [Lang] -> [String] -> Set.Set FilePath -> ([FilePath] -> IO ()) -> IO ()
87withRecursiveContents opts dir langs pdirs visited action = do
88    isDir <-  doesDirectoryExist dir
89    if isDir then do
90               xs <- getDirectoryContents dir
91
92               (dirs,files) <- partitionM doesDirectoryExist [dir </> x | x <- xs, x `notElem` [".", ".."]]
93
94               magics <- if null (magic_filter opts) || null files
95                          then return []
96                          else getFilesMagic files
97
98               -- filter the list of files
99               --
100               let files' = if null magics
101                              then  filter (fileFilter opts langs) files
102                              else  catMaybes $ zipWith (\f m ->  if any (`isInfixOf` m) (magic_filter opts) then Just f else Nothing ) files magics
103
104               unless (null files') $
105                    let chunks = chunksOf (Options.chunk opts) files' in
106                    forM_ chunks $ \b -> action b
107
108               -- process dirs
109               --
110               forM_ dirs $ \path -> do
111                    lstatus <- getSymbolicLinkStatus path
112                    when ( deference_recursive opts || not (PosixCompat.isSymbolicLink lstatus)) $
113                        unless (isPruneableDir path pdirs) $ do -- this is a good directory (unless already visited)!
114                            cpath <- canonicalizePath path
115                            unless (cpath `Set.member` visited) $
116                                withRecursiveContents opts path langs pdirs (Set.insert cpath visited) action
117             else action [dir]
118
119
120isPruneableDir:: FilePath -> [FilePath] -> Bool
121isPruneableDir dir = any (`isSuffixOf` pdir)
122    where pdir = mkPrunableDirName dir
123
124mkPrunableDirName :: FilePath -> FilePath
125mkPrunableDirName xs | "/" `isSuffixOf` xs = xs
126                     | otherwise           = xs ++ "/"
127
128-- read patterns from file
129
130readPatternsFromFile :: FilePath -> IO [C.ByteString]
131readPatternsFromFile f =
132    if null f then return []
133              else map trim8 . C.lines <$> C.readFile f
134
135getFilePaths :: Bool        ->     -- pattern(s) from file
136                [String]    ->     -- list of patterns and files
137                [String]
138getFilePaths False xs = if length xs == 1 then [] else tail xs
139getFilePaths True  xs = xs
140
141
142parallelSearch :: [FilePath] -> [C.ByteString] -> [Lang] -> (Bool, Bool) -> OptionT IO ()
143parallelSearch paths patterns langs (isTermIn, _) = do
144
145    (conf@Config{..}, opts@Options{..}) <- ask
146
147    -- create Transactional Chan and Vars...
148
149    in_chan  <- liftIO newTChanIO
150    out_chan <- liftIO newTChanIO
151
152
153    -- launch worker threads...
154
155    forM_ [1 .. jobs] $ \_ -> liftIO . forkIO $
156        void $ runExceptT . forever $ do
157            fs <- lift $ atomically $ readTChan in_chan
158            lift $
159                E.catch (
160                    case fs of
161                        [] -> atomically $ writeTChan out_chan []
162                        xs -> void $ (if asynch then flip mapConcurrently
163                                                else forM) xs $ \x -> do
164                                out <- fmap (take max_count ) (runReaderT (runCgrep conf opts x patterns) (conf, sanitizeOptions x opts))
165                                unless (null out) $ atomically $ writeTChan out_chan out)
166                       (\e -> let msg = show (e :: SomeException) in
167                            hPutStrLn stderr (showFileName conf opts (getTargetName (head fs))
168                                ++ ": exception: " ++ takeN 80 msg))
169            when (null fs) $ throwE ()
170
171
172    -- push the files to grep for...
173
174    _ <- liftIO . forkIO $ do
175
176        if recursive || deference_recursive
177            then forM_ (if null paths then ["."] else paths) $ \p ->
178                    withRecursiveContents opts p langs
179                        (mkPrunableDirName <$> configPruneDirs ++ prune_dir) (Set.singleton p) (atomically . writeTChan in_chan)
180
181            else forM_ (if null paths && not isTermIn then [""] else paths) (atomically . writeTChan in_chan . (:[]))
182
183        -- enqueue EOF messages:
184
185        replicateM_ jobs ((atomically . writeTChan in_chan) [])
186
187    -- dump output until workers are done
188
189    putPrettyHeader
190
191    let stop = jobs
192
193    matchingFiles <- liftIO $ newIORef Set.empty
194
195    fix (\action n m ->
196         unless (n == stop) $ do
197                 out <- liftIO $ atomically $ readTChan out_chan
198                 case out of
199                      [] -> action (n+1) m
200                      _  -> do
201                          case () of
202                            _ | json -> when m $ liftIO $ putStrLn ","
203                              | otherwise -> return ()
204                          let out' = map (\p -> p {outTokens = map (\(off, s) -> (length $ UC.decode $ B.unpack $ C.take off $ outLine p, UC.decodeString s)) $ outTokens p}) out
205                          prettyOutput out' >>= mapM_ (liftIO . putStrLn)
206                          liftIO $ when (vim || editor) $
207                                    mapM_ (modifyIORef matchingFiles . Set.insert . (outFilePath &&& outLineNo)) out
208                          action n True
209        )  0 False
210
211
212    putPrettyFooter
213
214    -- run editor...
215
216    when (vim || editor ) $ liftIO $ do
217
218        editor' <- if vim
219                    then return (Just "vim")
220                    else lookupEnv "EDITOR"
221
222        files   <- Set.toList <$> readIORef matchingFiles
223
224        let editFiles = (if fileline || configFileLine
225                            then fmap (\(a,b) -> a ++ ":" ++ show b)
226                            else nub . sort . fmap fst) files
227
228        putStrLn $ "cgrep: open files " ++ unwords editFiles ++ "..."
229        void $ runProcess (fromJust $ editor' <|> Just "vi")
230                          editFiles
231                          Nothing
232                          Nothing
233                          (Just stdin)
234                          (Just stdout)
235                          (Just stderr) >>= waitForProcess
236
237
238main :: IO ()
239main = do
240    -- check whether this is a terminal device
241
242    isTermIn  <- hIsTerminalDevice stdin
243    isTermOut <- hIsTerminalDevice stdout
244
245    -- read Cgrep config options
246
247    (conf, _)  <- getConfig
248
249    -- read command-line options
250
251    opts  <- (if isTermOut
252                then \o -> o { color = color o || configColors conf }
253                else id) <$> cmdArgsRun options
254
255    -- check for multiple backends...
256
257    when (length (catMaybes [
258#ifdef ENABLE_HINT
259                hint opts,
260#endif
261                format opts,
262                if xml opts  then Just "" else Nothing,
263                if json opts then Just "" else Nothing
264               ]) > 1)
265        $ error "you can use one back-end at time!"
266
267
268    -- display lang-map and exit...
269
270    when (language_map opts) $
271        dumpLangMap langMap >> exitSuccess
272
273    -- check whether the pattern list is empty, display help message if it's the case
274
275    when (null (others opts) && isTermIn && null (file opts)) $
276        withArgs ["--help"] $ void (cmdArgsRun options)
277
278    -- load patterns:
279
280    patterns <- if null (file opts) then return $ map (C.pack . UC.encodeString) (((:[]).head.others) opts)
281                                    else readPatternsFromFile $ file opts
282
283    let patterns' = map (if ignore_case opts then ic else id) patterns
284            where ic | (not . isRegexp) opts && semantic opts = C.unwords . map (\p -> if C.unpack p `elem` wildCardTokens then p else C.map toLower p) . C.words
285                     | otherwise = C.map toLower
286                        where wildCardTokens = "OR" : M.keys wildCardMap   -- "OR" is not included in wildCardMap
287
288    -- display the configuration in use
289
290    -- when (isJust confpath) $
291    --    hPutStrLn stderr $ showBold opts ("Using '" ++ fromJust confpath ++ "' configuration file...")
292
293    -- load files to parse:
294
295    let paths = getFilePaths (notNull (file opts)) (others opts)
296
297    -- parse cmd line language list:
298
299    let (l0, l1, l2) = splitLangList (language_filter opts)
300
301    -- language enabled:
302
303    let langs = (if null l0 then configLanguages conf else l0 `union` l1) \\ l2
304
305    runReaderT (do putStrLevel1 $ "Cgrep " ++ showVersion version ++ "!"
306                   putStrLevel1 $ "options   : " ++ show opts
307                   putStrLevel1 $ "config    : " ++ show conf
308                   putStrLevel1 $ "languages : " ++ show langs
309                   putStrLevel1 $ "pattern   : " ++ show patterns'
310                   putStrLevel1 $ "files     : " ++ show paths
311                   putStrLevel1 $ "isTermIn  : " ++ show isTermIn
312                   putStrLevel1 $ "isTermOut : " ++ show isTermOut
313        ) (conf, opts)
314
315    -- specify number of cores
316
317    when (cores opts /= 0) $ setNumCapabilities (cores opts)
318
319    -- run search
320
321    runReaderT (parallelSearch paths patterns' langs (isTermIn, isTermOut)) (conf, opts)
322
323
324