1{-# LANGUAGE Rank2Types, CPP, FlexibleContexts #-}
2module Categorize where
3
4import Data
5
6import qualified Text.Regex.PCRE.Light.Text as RE
7import qualified Data.MyText as T
8import Data.MyText (Text)
9import Control.Applicative (empty, (<*))
10import Control.Monad
11import Control.Monad.Instances()
12import Control.Monad.Trans.Reader hiding (local)
13import Control.Monad.Reader.Class (local)
14import Control.Monad.Trans.Class
15import Data.Functor.Identity
16
17import Control.Applicative ((<$>))
18import Control.DeepSeq
19import Data.Char
20import Data.List
21import Data.Map.Strict (Map)
22import qualified Data.Map.Strict as Map
23import Data.Maybe
24import Data.Time.Calendar (toGregorian, fromGregorian)
25import Data.Time.Calendar.WeekDate (toWeekDate)
26import Data.Time.Clock
27import Data.Time.Format (formatTime)
28import Data.Time.LocalTime
29import System.Exit
30import System.IO
31import Text.Show.Functions
32import Text.Parsec
33import Text.Parsec.ExprFail
34import Text.Parsec.Token
35#if MIN_VERSION_time(1,5,0)
36import Data.Time.Format(defaultTimeLocale, iso8601DateFormat)
37#else
38import System.Locale (defaultTimeLocale, iso8601DateFormat)
39#endif
40import Debug.Trace
41import Text.Printf
42
43type Categorizer = TimeLog CaptureData -> TimeLog (Ctx, ActivityData)
44type Rule = Ctx -> ActivityData
45type Environment = Map String Cond
46
47type Parser = ParsecT String () (ReaderT (TimeZone, Environment) Identity)
48
49data Ctx = Ctx
50        { cNow :: TimeLogEntry CaptureData
51        , cCurrentWindow :: Maybe (Bool, Text, Text)
52        , cWindowInScope :: Maybe (Bool, Text, Text)
53        , cSubsts :: [Text]
54        , cCurrentTime :: UTCTime
55        , cTimeZone :: TimeZone
56        , conditionBindings :: Map String Cond
57        } deriving Show
58
59instance NFData Ctx where
60    rnf (Ctx a b c d e f g) = a `deepseq` b `deepseq` c `deepseq` d `deepseq` e `deepseq` f `deepseq` g `deepseq` ()
61
62type Cond = CtxFun [Text]
63
64type CtxFun a = Ctx -> Maybe a
65
66data CondPrim
67        = CondString (CtxFun Text)
68        | CondRegex (CtxFun RE.Regex)
69        | CondInteger (CtxFun Integer)
70        | CondTime (CtxFun NominalDiffTime)
71        | CondDate (CtxFun UTCTime)
72        | CondCond (CtxFun [Text])
73        | CondStringList (CtxFun [Text])
74        | CondRegexList (CtxFun [RE.Regex])
75
76newtype Cmp = Cmp (forall a. Ord a => a -> a -> Bool)
77
78data DateVar = DvDate | DvNow
79
80data TimeVar = TvTime | TvSampleAge
81
82data NumVar = NvIdle
83
84runParserStack :: Stream s (ReaderT r Identity) t
85               => r
86               -> ParsecT s () (ReaderT r Identity) a
87               -> SourceName
88               -> s
89               -> Either ParseError a
90runParserStack env p filename =
91  runIdentity . flip runReaderT env . runParserT p () filename
92
93readCategorizer :: FilePath -> IO Categorizer
94readCategorizer filename = withFile filename ReadMode $ \h -> do
95        hSetEncoding h utf8
96        content <- hGetContents h
97        time <- getCurrentTime
98        tz <- getCurrentTimeZone
99        case runParserStack (tz, Map.empty) (between (return ()) eof parseRules) filename content of
100          Left err -> do
101                putStrLn "Parser error:"
102                print err
103                exitFailure
104          Right cat -> return
105                (map (fmap (mkSecond (postpare . cat))) . prepare time tz)
106
107applyCond :: String -> TimeZone -> Environment -> TimeLogEntry (Ctx, ActivityData) -> Bool
108applyCond s tz env =
109        case runParserStack (tz, env) (parseCond <* eof) "command line parameter" s of
110          Left err -> error (show err)
111          Right c  -> isJust . c . fst . tlData
112
113prepare :: UTCTime -> TimeZone -> TimeLog CaptureData -> TimeLog Ctx
114prepare time tz = map go
115  where go now  = now {tlData = Ctx now (findActive (cWindows (tlData now))) Nothing [] time tz Map.empty }
116
117-- | Here, we filter out tags appearing twice, and make sure that only one of
118--   each category survives
119postpare :: ActivityData -> ActivityData
120postpare = nubBy go
121  where go (Activity (Just c1) _) (Activity (Just c2) _) = c1 == c2
122        go a1                     a2                     = a1 == a2
123
124lang :: GenTokenParser String () (ReaderT (TimeZone, Environment) Identity)
125lang = makeTokenParser LanguageDef
126                { commentStart   = "{-"
127                , commentEnd     = "-}"
128                , commentLine    = "--"
129                , nestedComments = True
130                , identStart     = letter
131                , identLetter    = alphaNum <|> oneOf "_'"
132                , opStart        = oneOf ":!#$%&*+./<=>?@\\^|-~"
133                , opLetter       = oneOf ":!#$%&*+./<=>?@\\^|-~"
134                , reservedOpNames= []
135                , reservedNames  = [ "title"
136                                   , "program"
137                                   , "active"
138                                   , "idle"
139                                   , "time"
140                                   , "sampleage"
141                                   , "date"
142                                   , "now"
143                                   , "desktop"
144                                   ]
145                , caseSensitive  = True
146                }
147
148parseRules :: Parser Rule
149parseRules = do
150        whiteSpace lang
151        a <- option id (reserved lang "aliases" >> parens lang parseAliasSpecs)
152        rb <- parseRulesBody
153        return (a . rb)
154
155parseAliasSpecs :: Parser (ActivityData -> ActivityData)
156parseAliasSpecs = do as <- sepEndBy1 parseAliasSpec (comma lang)
157                     return $ \ad -> foldr doAlias ad as
158
159doAlias :: (Text, Text) -> ActivityData -> ActivityData
160doAlias (s1,s2) = map go
161  where go (Activity cat tag) = Activity (if cat == Just s1 then Just s2 else cat)
162                                         (if tag == s1 then s2 else tag)
163
164parseAliasSpec :: Parser (Text, Text)
165parseAliasSpec = do s1 <- T.pack <$> stringLiteral lang
166                    reservedOp lang "->"
167                    s2 <- T.pack <$> stringLiteral lang
168                    return (s1,s2)
169
170parseRulesBody :: Parser Rule
171parseRulesBody = do
172        x <- parseRule
173        choice [ do _ <- comma lang
174                    xs <- parseRule `sepEndBy1` comma lang
175                    return (matchAny (x:xs))
176               , do _ <- semi lang
177                    xs <- parseRule `sepEndBy1` semi lang
178                    return (matchFirst (x:xs))
179               ,    return x
180               ]
181
182withBinding :: String -> Cond -> Parser a -> Parser a
183withBinding k v = local (\(tz,env) -> (tz, Map.insert k v env))
184
185parseConditionBinding :: Parser Rule
186parseConditionBinding = do
187  _ <- reserved lang "condition"
188  varname <- identifier lang
189  _ <- reservedOp lang "="
190  cond <- parseCond
191  _ <- reserved lang "in"
192  withBinding varname cond parseRule
193
194parseRule :: Parser Rule
195parseRule = choice
196        [    braces lang parseRules
197        , do cond <- parseCond
198             reservedOp lang "==>"
199             rule <- parseRule
200             return (ifThenElse cond rule matchNone)
201        , do reserved lang "if"
202             cond <- parseCond
203             reserved lang "then"
204             rule1 <- parseRule
205             reserved lang "else"
206             rule2 <- parseRule
207             return (ifThenElse cond rule1 rule2)
208        , do reserved lang "tag"
209             parseSetTag
210        , parseConditionBinding
211        ]
212
213parseCond :: Parser Cond
214parseCond = do cp <- parseCondExpr
215               case cp of
216                CondCond c -> return c
217                _          -> fail $ printf "Expression of type %s" (cpType cp)
218
219parseCondExpr :: Parser CondPrim
220parseCondExpr = buildExpressionParser [
221                [ Prefix (reservedOp lang "!" >> return checkNot) ],
222                [ Prefix (reserved lang "day of week" >> return evalDayOfWeek)
223                , Prefix (reserved lang "day of month" >> return evalDayOfMonth)
224                , Prefix (reserved lang "month" >> return evalMonth)
225                , Prefix (reserved lang "year" >> return evalYear)
226                , Prefix (reserved lang "format" >> return formatDate) ],
227                [ Infix (reservedOp lang "=~" >> return checkRegex) AssocNone
228                , Infix (checkCmp <$> parseCmp) AssocNone
229                ],
230                [ Prefix (reserved lang "current window" >> return checkCurrentwindow)
231                , Prefix (reserved lang "any window" >> return checkAnyWindow)
232                ],
233                [ Infix (reservedOp lang "&&" >> return checkAnd) AssocRight ],
234                [ Infix (reservedOp lang "||" >> return checkOr) AssocRight ]
235            ] parseCondPrim
236
237cpType :: CondPrim -> String
238cpType (CondString _) = "String"
239cpType (CondRegex _) = "Regex"
240cpType (CondInteger _) = "Integer"
241cpType (CondTime _) = "Time"
242cpType (CondDate _) = "Date"
243cpType (CondCond _) = "Condition"
244cpType (CondStringList _) = "List of Strings"
245cpType (CondRegexList _) = "List of regular expressions"
246
247checkRegex :: CondPrim -> CondPrim -> Erring CondPrim
248checkRegex (CondString getStr) (CondRegex getRegex) = Right $ CondCond $ \ctx -> do
249        str <- getStr ctx
250        regex <- getRegex ctx
251        tail <$> RE.match regex str [RE.exec_no_utf8_check]
252checkRegex (CondString getStr) (CondRegexList getRegexList) = Right $ CondCond $ \ctx -> do
253        str <- getStr ctx
254        regexes <- getRegexList ctx
255        tail <$> msum (map (\regex -> RE.match regex str [RE.exec_no_utf8_check]) regexes)
256checkRegex cp1 cp2 = Left $
257        printf "Cannot apply =~ to an expression of type %s and type %s"
258               (cpType cp1) (cpType cp2)
259
260checkAnd :: CondPrim-> CondPrim -> Erring CondPrim
261checkAnd (CondCond c1) (CondCond c2) = Right $ CondCond $ do
262        res1 <- c1
263        res2 <- c2
264        return $ res1 >> res2
265checkAnd cp1 cp2 = Left $
266        printf "Cannot apply && to an expression of type %s and type %s"
267               (cpType cp1) (cpType cp2)
268
269checkOr :: CondPrim-> CondPrim -> Erring CondPrim
270checkOr (CondCond c1) (CondCond c2) = Right $ CondCond $ do
271        res1 <- c1
272        res2 <- c2
273        return $ res1 `mplus` res2
274checkOr cp1 cp2 = Left $
275        printf "Cannot apply && to an expression of type %s and type %s"
276               (cpType cp1) (cpType cp2)
277
278checkNot :: CondPrim -> Erring CondPrim
279checkNot (CondCond getCnd) = Right . CondCond $ fmap (maybe (Just []) (const Nothing)) getCnd
280checkNot cp = Left $
281        printf "Cannot apply ! to an expression of type %s"
282               (cpType cp)
283
284checkCmp :: Cmp -> CondPrim -> CondPrim -> Erring CondPrim
285checkCmp (Cmp (?)) (CondInteger getN1) (CondInteger getN2) = Right $ CondCond $ \ctx -> do
286        n1 <- getN1 ctx
287        n2 <- getN2 ctx
288        guard (n1 ? n2)
289        return []
290checkCmp (Cmp (?)) (CondTime getT1) (CondTime getT2) = Right $ CondCond $ \ctx -> do
291        t1 <- getT1 ctx
292        t2 <- getT2 ctx
293        guard (t1 ? t2)
294        return []
295checkCmp (Cmp (?)) (CondDate getT1) (CondDate getT2) = Right $ CondCond $ \ctx -> do
296        t1 <- getT1 ctx
297        t2 <- getT2 ctx
298        guard (t1 ? t2)
299        return []
300checkCmp (Cmp (?)) (CondString getS1) (CondString getS2) = Right $ CondCond $ \ctx -> do
301        s1 <- getS1 ctx
302        s2 <- getS2 ctx
303        guard (s1 ? s2)
304        return []
305checkCmp (Cmp (?)) (CondString getS1) (CondStringList getS2) = Right $ CondCond $ \ctx -> do
306        s1 <- getS1 ctx
307        sl <- getS2 ctx
308        guard (any (s1 ?) sl)
309        return []
310checkCmp _ cp1 cp2 = Left $
311        printf "Cannot compare expressions of type %s and type %s"
312               (cpType cp1) (cpType cp2)
313
314checkCurrentwindow :: CondPrim -> Erring CondPrim
315checkCurrentwindow (CondCond cond) = Right $ CondCond $ \ctx ->
316        cond (ctx { cWindowInScope = cCurrentWindow ctx })
317checkCurrentwindow cp = Left $
318        printf "Cannot apply current window to an expression of type %s"
319               (cpType cp)
320
321checkAnyWindow :: CondPrim -> Erring CondPrim
322checkAnyWindow (CondCond cond) = Right $ CondCond $ \ctx ->
323        msum $ map (\w -> cond (ctx { cWindowInScope = Just w }))
324                                     (cWindows (tlData (cNow ctx)))
325checkAnyWindow cp = Left $
326        printf "Cannot apply current window to an expression of type %s"
327               (cpType cp)
328
329fst3 :: (a,b,c) -> a
330fst3 (a,_,_) = a
331
332snd3 :: (a,b,c) -> b
333snd3 (_,b,_) = b
334
335trd3 :: (a,b,c) -> c
336trd3 (_,_,c) = c
337
338-- Day of week is an integer in [1..7].
339evalDayOfWeek :: CondPrim -> Erring CondPrim
340evalDayOfWeek (CondDate df) = Right $ CondInteger $ \ctx ->
341  let tz = cTimeZone ctx in
342  (toInteger . trd3 . toWeekDate . localDay . utcToLocalTime tz) `fmap` df ctx
343evalDayOfWeek cp = Left $ printf
344  "Cannot apply day of week to an expression of type %s, only to $date."
345  (cpType cp)
346
347-- Day of month is an integer in [1..31].
348evalDayOfMonth :: CondPrim -> Erring CondPrim
349evalDayOfMonth (CondDate df) = Right $ CondInteger $ \ctx ->
350  let tz = cTimeZone ctx in
351  (toInteger . trd3 . toGregorian . localDay . utcToLocalTime tz) `fmap` df ctx
352evalDayOfMonth cp = Left $ printf
353  "Cannot apply day of month to an expression of type %s, only to $date."
354  (cpType cp)
355
356-- Month is an integer in [1..12].
357evalMonth :: CondPrim -> Erring CondPrim
358evalMonth (CondDate df) = Right $ CondInteger $ \ctx ->
359  let tz = cTimeZone ctx in
360  (toInteger . snd3 . toGregorian . localDay . utcToLocalTime tz) `fmap` df ctx
361evalMonth cp = Left $ printf
362  "Cannot apply month to an expression of type %s, only to $date."
363  (cpType cp)
364
365evalYear :: CondPrim -> Erring CondPrim
366evalYear (CondDate df) = Right $ CondInteger $ \ctx ->
367  let tz = cTimeZone ctx in
368  (fst3 . toGregorian . localDay . utcToLocalTime tz) `fmap` df ctx
369evalYear cp = Left $ printf
370  "Cannot apply year to an expression of type %s, only to $date."
371  (cpType cp)
372
373-- format date according to ISO 8601 (YYYY-MM-DD)
374formatDate :: CondPrim -> Erring CondPrim
375formatDate (CondDate df) = Right $ CondString $ \ctx ->
376  let tz = cTimeZone ctx
377      local = utcToLocalTime tz `fmap` df ctx
378   in T.pack . formatTime defaultTimeLocale (iso8601DateFormat Nothing) <$> local
379formatDate cp = Left $ printf
380  "Cannot format an expression of type %s, only $date." (cpType cp)
381
382parseCmp :: Parser Cmp
383parseCmp = choice $ map (\(s,o) -> reservedOp lang s >> return o)
384                        [(">=",Cmp (>=)),
385                         (">", Cmp (>)),
386                         ("==",Cmp (==)),
387                         ("=", Cmp (==)),
388                         ("!=",Cmp (/=)),
389                         ("<", Cmp (<)),
390                         ("<=",Cmp (<=))]
391
392parseCondPrim :: Parser CondPrim
393parseCondPrim = choice
394        [ parens lang parseCondExpr
395        , brackets lang (choice [
396            (do list <- commaSep1 lang (stringLiteral lang)
397                return $ CondStringList (const (Just (map T.pack list)))
398            ) <?> "list of strings",
399            (do list <- commaSep1 lang parseRegex
400                return $ CondRegexList (const (Just list))
401            ) <?> "list of regular expressions"
402            ])
403        , char '$' >> choice
404             [ do backref <- read <$> many1 digit
405                  return $ CondString (getBackref backref)
406             , choice [ reserved lang "title" >> return (CondString (getVar "title"))
407                      , reserved lang "program" >> return (CondString (getVar "program"))
408                      , reserved lang "active" >> return (CondCond checkActive)
409                      , reserved lang "idle" >> return (CondInteger (getNumVar NvIdle))
410                      , reserved lang "time" >> return (CondTime (getTimeVar TvTime))
411                      , reserved lang "sampleage" >> return (CondTime (getTimeVar TvSampleAge))
412                      , reserved lang "date" >> return (CondDate (getDateVar DvDate))
413                      , reserved lang "now" >> return (CondDate (getDateVar DvNow))
414                      , reserved lang "desktop" >> return (CondString (getVar "desktop"))
415                      , do varname <- identifier lang
416                           inEnvironment <- (lift (asks (Map.lookup varname . snd)))
417                           case inEnvironment of
418                             Nothing -> fail ("Reference to unbound variable: '" ++ varname ++ "'")
419                             Just cond -> return (CondCond cond)
420                      ]
421              ] <?> "variable"
422        , do regex <- parseRegex <?> "regular expression"
423             return $ CondRegex (const (Just regex))
424        , do str <- T.pack <$> stringLiteral lang <?> "string"
425             return $ CondString (const (Just str))
426        , try $ do time <- parseTime <?> "time" -- backtrack here, it might have been a number
427                   return $ CondTime (const (Just time))
428        , try $ do date <- parseDate <?> "date" -- backtrack here, it might have been a number
429                   return $ CondDate (const (Just date))
430        , do num <- natural lang <?> "number"
431             return $ CondInteger (const (Just num))
432        ]
433
434parseRegex :: Parser RE.Regex
435parseRegex = fmap (flip RE.compile [] . T.pack) $ lexeme lang $ choice
436        [ between (char '/') (char '/') (many1 (noneOf "/"))
437        , do _ <- char 'm'
438             c <- anyChar
439             str <- many1 (noneOf [c])
440             _ <- char c
441             return str
442        ]
443
444-- | Parses a day-of-time specification (hh:mm)
445parseTime :: Parser NominalDiffTime
446parseTime = fmap fromIntegral $ lexeme lang $ do
447               hour <- read <$> many1 digit
448               _ <- char ':'
449               minute <- read <$> count 2 digit
450               return $ (hour * 60 + minute) * 60
451
452parseDate :: Parser UTCTime
453parseDate = lexeme lang $ do
454    tz <- lift (asks fst)
455    year <- read <$> count 4 digit
456    _ <- char '-'
457    month <- read <$> count 2 digit
458    _ <- char '-'
459    day <- read <$> count 2 digit
460    time <- option 0 parseTime
461    let date = LocalTime (fromGregorian year month day) (TimeOfDay 0 0 0)
462    return $ addUTCTime time $ localTimeToUTC tz date
463
464
465parseSetTag :: Parser Rule
466parseSetTag = lexeme lang $ do
467                 firstPart <- parseTagPart
468                 choice [ do char ':'
469                             secondPart <- parseTagPart
470                             return $ do cat <- firstPart
471                                         tag <- secondPart
472                                         return $ maybeToList $ do
473                                            cat <- cat
474                                            tag <- tag
475                                            return $ Activity (Just cat) tag
476                        ,    return $ do tag <- firstPart
477                                         return $ maybeToList $ do
478                                            tag <- tag
479                                            return $ Activity Nothing tag
480                        ]
481
482replaceForbidden :: Maybe Text -> Maybe Text
483replaceForbidden = fmap $ T.map go
484  where
485    go c | isAlphaNum c  = c
486         | c `elem` "-_" = c
487         | otherwise     = '_'
488
489parseTagPart :: Parser (Ctx -> Maybe Text)
490parseTagPart = do parts <- many1 (choice
491                        [ do char '$'
492                             (replaceForbidden . ) <$> choice
493                               [ do num <- read <$> many1 digit
494                                    return $ getBackref num
495                               , do varname <- many1 (letter <|> oneOf ".")
496                                    return $ getVar varname
497                               ] <?> "variable"
498                        , do s <- many1 (alphaNum <|> oneOf "-_")
499                             return $ const (Just (T.pack s))
500                        ])
501                  return $ (fmap T.concat . sequence) <$> sequence parts
502
503ifThenElse :: Cond -> Rule -> Rule -> Rule
504ifThenElse cond r1 r2 = do res <- cond
505                           case res of
506                            Just substs -> r1 . setSubsts substs
507                            Nothing -> r2
508  where setSubsts :: [Text] -> Ctx -> Ctx
509        setSubsts substs ctx = ctx { cSubsts = substs }
510
511
512matchAny :: [Rule] -> Rule
513matchAny rules = concat <$> sequence rules
514matchFirst :: [Rule] -> Rule
515matchFirst rules = takeFirst <$> sequence rules
516  where takeFirst [] = []
517        takeFirst ([]:xs) = takeFirst xs
518        takeFirst (x:_) = x
519
520
521getBackref :: Integer -> CtxFun Text
522getBackref n ctx = listToMaybe (drop (fromIntegral n-1) (cSubsts ctx))
523
524getVar :: String -> CtxFun Text
525getVar v ctx | "current" `isPrefixOf` v = do
526                let var = drop (length "current.") v
527                win <- cCurrentWindow ctx
528                getVar var (ctx { cWindowInScope = Just win })
529getVar "title"   ctx = do
530                (_,t,_) <- cWindowInScope ctx
531                return t
532getVar "program" ctx = do
533                (_,_,p) <- cWindowInScope ctx
534                return p
535getVar "desktop" ctx = return $ cDesktop (tlData (cNow ctx))
536getVar v _ = error $ "Unknown variable " ++ v
537
538getNumVar :: NumVar -> CtxFun Integer
539getNumVar NvIdle ctx = Just $ cLastActivity (tlData (cNow ctx)) `div` 1000
540
541getTimeVar :: TimeVar -> CtxFun NominalDiffTime
542getTimeVar TvTime ctx = Just $
543   let utc = tlTime . cNow $ ctx
544       tz = cTimeZone ctx
545       local = utcToLocalTime tz utc
546       midnightUTC = localTimeToUTC tz $ local { localTimeOfDay = midnight }
547    in utc `diffUTCTime` midnightUTC
548getTimeVar TvSampleAge ctx = Just $ cCurrentTime ctx `diffUTCTime` tlTime (cNow ctx)
549
550getDateVar :: DateVar -> CtxFun UTCTime
551getDateVar DvDate = Just . tlTime . cNow
552getDateVar DvNow = Just . cCurrentTime
553
554findActive :: [(Bool, t, t1)] -> Maybe (Bool, t, t1)
555findActive = find (\(a,_,_) -> a)
556
557checkActive :: Cond
558checkActive ctx = do (a,_,_) <- cWindowInScope ctx
559                     guard a
560                     return []
561
562matchNone :: Rule
563matchNone = const []
564
565justIf :: a -> Bool -> Maybe a
566justIf x True = Just x
567justIf _ False = Nothing
568
569mkSecond :: (a -> b) -> a -> (a, b)
570mkSecond f a = (a, f a)
571