1{-# LANGUAGE Rank2Types, CPP, FlexibleContexts #-} 2module Categorize where 3 4import Data 5 6import qualified Text.Regex.PCRE.Light.Text as RE 7import qualified Data.MyText as T 8import Data.MyText (Text) 9import Control.Applicative (empty, (<*)) 10import Control.Monad 11import Control.Monad.Instances() 12import Control.Monad.Trans.Reader hiding (local) 13import Control.Monad.Reader.Class (local) 14import Control.Monad.Trans.Class 15import Data.Functor.Identity 16 17import Control.Applicative ((<$>)) 18import Control.DeepSeq 19import Data.Char 20import Data.List 21import Data.Map.Strict (Map) 22import qualified Data.Map.Strict as Map 23import Data.Maybe 24import Data.Time.Calendar (toGregorian, fromGregorian) 25import Data.Time.Calendar.WeekDate (toWeekDate) 26import Data.Time.Clock 27import Data.Time.Format (formatTime) 28import Data.Time.LocalTime 29import System.Exit 30import System.IO 31import Text.Show.Functions 32import Text.Parsec 33import Text.Parsec.ExprFail 34import Text.Parsec.Token 35#if MIN_VERSION_time(1,5,0) 36import Data.Time.Format(defaultTimeLocale, iso8601DateFormat) 37#else 38import System.Locale (defaultTimeLocale, iso8601DateFormat) 39#endif 40import Debug.Trace 41import Text.Printf 42 43type Categorizer = TimeLog CaptureData -> TimeLog (Ctx, ActivityData) 44type Rule = Ctx -> ActivityData 45type Environment = Map String Cond 46 47type Parser = ParsecT String () (ReaderT (TimeZone, Environment) Identity) 48 49data Ctx = Ctx 50 { cNow :: TimeLogEntry CaptureData 51 , cCurrentWindow :: Maybe (Bool, Text, Text) 52 , cWindowInScope :: Maybe (Bool, Text, Text) 53 , cSubsts :: [Text] 54 , cCurrentTime :: UTCTime 55 , cTimeZone :: TimeZone 56 , conditionBindings :: Map String Cond 57 } deriving Show 58 59instance NFData Ctx where 60 rnf (Ctx a b c d e f g) = a `deepseq` b `deepseq` c `deepseq` d `deepseq` e `deepseq` f `deepseq` g `deepseq` () 61 62type Cond = CtxFun [Text] 63 64type CtxFun a = Ctx -> Maybe a 65 66data CondPrim 67 = CondString (CtxFun Text) 68 | CondRegex (CtxFun RE.Regex) 69 | CondInteger (CtxFun Integer) 70 | CondTime (CtxFun NominalDiffTime) 71 | CondDate (CtxFun UTCTime) 72 | CondCond (CtxFun [Text]) 73 | CondStringList (CtxFun [Text]) 74 | CondRegexList (CtxFun [RE.Regex]) 75 76newtype Cmp = Cmp (forall a. Ord a => a -> a -> Bool) 77 78data DateVar = DvDate | DvNow 79 80data TimeVar = TvTime | TvSampleAge 81 82data NumVar = NvIdle 83 84runParserStack :: Stream s (ReaderT r Identity) t 85 => r 86 -> ParsecT s () (ReaderT r Identity) a 87 -> SourceName 88 -> s 89 -> Either ParseError a 90runParserStack env p filename = 91 runIdentity . flip runReaderT env . runParserT p () filename 92 93readCategorizer :: FilePath -> IO Categorizer 94readCategorizer filename = withFile filename ReadMode $ \h -> do 95 hSetEncoding h utf8 96 content <- hGetContents h 97 time <- getCurrentTime 98 tz <- getCurrentTimeZone 99 case runParserStack (tz, Map.empty) (between (return ()) eof parseRules) filename content of 100 Left err -> do 101 putStrLn "Parser error:" 102 print err 103 exitFailure 104 Right cat -> return 105 (map (fmap (mkSecond (postpare . cat))) . prepare time tz) 106 107applyCond :: String -> TimeZone -> Environment -> TimeLogEntry (Ctx, ActivityData) -> Bool 108applyCond s tz env = 109 case runParserStack (tz, env) (parseCond <* eof) "command line parameter" s of 110 Left err -> error (show err) 111 Right c -> isJust . c . fst . tlData 112 113prepare :: UTCTime -> TimeZone -> TimeLog CaptureData -> TimeLog Ctx 114prepare time tz = map go 115 where go now = now {tlData = Ctx now (findActive (cWindows (tlData now))) Nothing [] time tz Map.empty } 116 117-- | Here, we filter out tags appearing twice, and make sure that only one of 118-- each category survives 119postpare :: ActivityData -> ActivityData 120postpare = nubBy go 121 where go (Activity (Just c1) _) (Activity (Just c2) _) = c1 == c2 122 go a1 a2 = a1 == a2 123 124lang :: GenTokenParser String () (ReaderT (TimeZone, Environment) Identity) 125lang = makeTokenParser LanguageDef 126 { commentStart = "{-" 127 , commentEnd = "-}" 128 , commentLine = "--" 129 , nestedComments = True 130 , identStart = letter 131 , identLetter = alphaNum <|> oneOf "_'" 132 , opStart = oneOf ":!#$%&*+./<=>?@\\^|-~" 133 , opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~" 134 , reservedOpNames= [] 135 , reservedNames = [ "title" 136 , "program" 137 , "active" 138 , "idle" 139 , "time" 140 , "sampleage" 141 , "date" 142 , "now" 143 , "desktop" 144 ] 145 , caseSensitive = True 146 } 147 148parseRules :: Parser Rule 149parseRules = do 150 whiteSpace lang 151 a <- option id (reserved lang "aliases" >> parens lang parseAliasSpecs) 152 rb <- parseRulesBody 153 return (a . rb) 154 155parseAliasSpecs :: Parser (ActivityData -> ActivityData) 156parseAliasSpecs = do as <- sepEndBy1 parseAliasSpec (comma lang) 157 return $ \ad -> foldr doAlias ad as 158 159doAlias :: (Text, Text) -> ActivityData -> ActivityData 160doAlias (s1,s2) = map go 161 where go (Activity cat tag) = Activity (if cat == Just s1 then Just s2 else cat) 162 (if tag == s1 then s2 else tag) 163 164parseAliasSpec :: Parser (Text, Text) 165parseAliasSpec = do s1 <- T.pack <$> stringLiteral lang 166 reservedOp lang "->" 167 s2 <- T.pack <$> stringLiteral lang 168 return (s1,s2) 169 170parseRulesBody :: Parser Rule 171parseRulesBody = do 172 x <- parseRule 173 choice [ do _ <- comma lang 174 xs <- parseRule `sepEndBy1` comma lang 175 return (matchAny (x:xs)) 176 , do _ <- semi lang 177 xs <- parseRule `sepEndBy1` semi lang 178 return (matchFirst (x:xs)) 179 , return x 180 ] 181 182withBinding :: String -> Cond -> Parser a -> Parser a 183withBinding k v = local (\(tz,env) -> (tz, Map.insert k v env)) 184 185parseConditionBinding :: Parser Rule 186parseConditionBinding = do 187 _ <- reserved lang "condition" 188 varname <- identifier lang 189 _ <- reservedOp lang "=" 190 cond <- parseCond 191 _ <- reserved lang "in" 192 withBinding varname cond parseRule 193 194parseRule :: Parser Rule 195parseRule = choice 196 [ braces lang parseRules 197 , do cond <- parseCond 198 reservedOp lang "==>" 199 rule <- parseRule 200 return (ifThenElse cond rule matchNone) 201 , do reserved lang "if" 202 cond <- parseCond 203 reserved lang "then" 204 rule1 <- parseRule 205 reserved lang "else" 206 rule2 <- parseRule 207 return (ifThenElse cond rule1 rule2) 208 , do reserved lang "tag" 209 parseSetTag 210 , parseConditionBinding 211 ] 212 213parseCond :: Parser Cond 214parseCond = do cp <- parseCondExpr 215 case cp of 216 CondCond c -> return c 217 _ -> fail $ printf "Expression of type %s" (cpType cp) 218 219parseCondExpr :: Parser CondPrim 220parseCondExpr = buildExpressionParser [ 221 [ Prefix (reservedOp lang "!" >> return checkNot) ], 222 [ Prefix (reserved lang "day of week" >> return evalDayOfWeek) 223 , Prefix (reserved lang "day of month" >> return evalDayOfMonth) 224 , Prefix (reserved lang "month" >> return evalMonth) 225 , Prefix (reserved lang "year" >> return evalYear) 226 , Prefix (reserved lang "format" >> return formatDate) ], 227 [ Infix (reservedOp lang "=~" >> return checkRegex) AssocNone 228 , Infix (checkCmp <$> parseCmp) AssocNone 229 ], 230 [ Prefix (reserved lang "current window" >> return checkCurrentwindow) 231 , Prefix (reserved lang "any window" >> return checkAnyWindow) 232 ], 233 [ Infix (reservedOp lang "&&" >> return checkAnd) AssocRight ], 234 [ Infix (reservedOp lang "||" >> return checkOr) AssocRight ] 235 ] parseCondPrim 236 237cpType :: CondPrim -> String 238cpType (CondString _) = "String" 239cpType (CondRegex _) = "Regex" 240cpType (CondInteger _) = "Integer" 241cpType (CondTime _) = "Time" 242cpType (CondDate _) = "Date" 243cpType (CondCond _) = "Condition" 244cpType (CondStringList _) = "List of Strings" 245cpType (CondRegexList _) = "List of regular expressions" 246 247checkRegex :: CondPrim -> CondPrim -> Erring CondPrim 248checkRegex (CondString getStr) (CondRegex getRegex) = Right $ CondCond $ \ctx -> do 249 str <- getStr ctx 250 regex <- getRegex ctx 251 tail <$> RE.match regex str [RE.exec_no_utf8_check] 252checkRegex (CondString getStr) (CondRegexList getRegexList) = Right $ CondCond $ \ctx -> do 253 str <- getStr ctx 254 regexes <- getRegexList ctx 255 tail <$> msum (map (\regex -> RE.match regex str [RE.exec_no_utf8_check]) regexes) 256checkRegex cp1 cp2 = Left $ 257 printf "Cannot apply =~ to an expression of type %s and type %s" 258 (cpType cp1) (cpType cp2) 259 260checkAnd :: CondPrim-> CondPrim -> Erring CondPrim 261checkAnd (CondCond c1) (CondCond c2) = Right $ CondCond $ do 262 res1 <- c1 263 res2 <- c2 264 return $ res1 >> res2 265checkAnd cp1 cp2 = Left $ 266 printf "Cannot apply && to an expression of type %s and type %s" 267 (cpType cp1) (cpType cp2) 268 269checkOr :: CondPrim-> CondPrim -> Erring CondPrim 270checkOr (CondCond c1) (CondCond c2) = Right $ CondCond $ do 271 res1 <- c1 272 res2 <- c2 273 return $ res1 `mplus` res2 274checkOr cp1 cp2 = Left $ 275 printf "Cannot apply && to an expression of type %s and type %s" 276 (cpType cp1) (cpType cp2) 277 278checkNot :: CondPrim -> Erring CondPrim 279checkNot (CondCond getCnd) = Right . CondCond $ fmap (maybe (Just []) (const Nothing)) getCnd 280checkNot cp = Left $ 281 printf "Cannot apply ! to an expression of type %s" 282 (cpType cp) 283 284checkCmp :: Cmp -> CondPrim -> CondPrim -> Erring CondPrim 285checkCmp (Cmp (?)) (CondInteger getN1) (CondInteger getN2) = Right $ CondCond $ \ctx -> do 286 n1 <- getN1 ctx 287 n2 <- getN2 ctx 288 guard (n1 ? n2) 289 return [] 290checkCmp (Cmp (?)) (CondTime getT1) (CondTime getT2) = Right $ CondCond $ \ctx -> do 291 t1 <- getT1 ctx 292 t2 <- getT2 ctx 293 guard (t1 ? t2) 294 return [] 295checkCmp (Cmp (?)) (CondDate getT1) (CondDate getT2) = Right $ CondCond $ \ctx -> do 296 t1 <- getT1 ctx 297 t2 <- getT2 ctx 298 guard (t1 ? t2) 299 return [] 300checkCmp (Cmp (?)) (CondString getS1) (CondString getS2) = Right $ CondCond $ \ctx -> do 301 s1 <- getS1 ctx 302 s2 <- getS2 ctx 303 guard (s1 ? s2) 304 return [] 305checkCmp (Cmp (?)) (CondString getS1) (CondStringList getS2) = Right $ CondCond $ \ctx -> do 306 s1 <- getS1 ctx 307 sl <- getS2 ctx 308 guard (any (s1 ?) sl) 309 return [] 310checkCmp _ cp1 cp2 = Left $ 311 printf "Cannot compare expressions of type %s and type %s" 312 (cpType cp1) (cpType cp2) 313 314checkCurrentwindow :: CondPrim -> Erring CondPrim 315checkCurrentwindow (CondCond cond) = Right $ CondCond $ \ctx -> 316 cond (ctx { cWindowInScope = cCurrentWindow ctx }) 317checkCurrentwindow cp = Left $ 318 printf "Cannot apply current window to an expression of type %s" 319 (cpType cp) 320 321checkAnyWindow :: CondPrim -> Erring CondPrim 322checkAnyWindow (CondCond cond) = Right $ CondCond $ \ctx -> 323 msum $ map (\w -> cond (ctx { cWindowInScope = Just w })) 324 (cWindows (tlData (cNow ctx))) 325checkAnyWindow cp = Left $ 326 printf "Cannot apply current window to an expression of type %s" 327 (cpType cp) 328 329fst3 :: (a,b,c) -> a 330fst3 (a,_,_) = a 331 332snd3 :: (a,b,c) -> b 333snd3 (_,b,_) = b 334 335trd3 :: (a,b,c) -> c 336trd3 (_,_,c) = c 337 338-- Day of week is an integer in [1..7]. 339evalDayOfWeek :: CondPrim -> Erring CondPrim 340evalDayOfWeek (CondDate df) = Right $ CondInteger $ \ctx -> 341 let tz = cTimeZone ctx in 342 (toInteger . trd3 . toWeekDate . localDay . utcToLocalTime tz) `fmap` df ctx 343evalDayOfWeek cp = Left $ printf 344 "Cannot apply day of week to an expression of type %s, only to $date." 345 (cpType cp) 346 347-- Day of month is an integer in [1..31]. 348evalDayOfMonth :: CondPrim -> Erring CondPrim 349evalDayOfMonth (CondDate df) = Right $ CondInteger $ \ctx -> 350 let tz = cTimeZone ctx in 351 (toInteger . trd3 . toGregorian . localDay . utcToLocalTime tz) `fmap` df ctx 352evalDayOfMonth cp = Left $ printf 353 "Cannot apply day of month to an expression of type %s, only to $date." 354 (cpType cp) 355 356-- Month is an integer in [1..12]. 357evalMonth :: CondPrim -> Erring CondPrim 358evalMonth (CondDate df) = Right $ CondInteger $ \ctx -> 359 let tz = cTimeZone ctx in 360 (toInteger . snd3 . toGregorian . localDay . utcToLocalTime tz) `fmap` df ctx 361evalMonth cp = Left $ printf 362 "Cannot apply month to an expression of type %s, only to $date." 363 (cpType cp) 364 365evalYear :: CondPrim -> Erring CondPrim 366evalYear (CondDate df) = Right $ CondInteger $ \ctx -> 367 let tz = cTimeZone ctx in 368 (fst3 . toGregorian . localDay . utcToLocalTime tz) `fmap` df ctx 369evalYear cp = Left $ printf 370 "Cannot apply year to an expression of type %s, only to $date." 371 (cpType cp) 372 373-- format date according to ISO 8601 (YYYY-MM-DD) 374formatDate :: CondPrim -> Erring CondPrim 375formatDate (CondDate df) = Right $ CondString $ \ctx -> 376 let tz = cTimeZone ctx 377 local = utcToLocalTime tz `fmap` df ctx 378 in T.pack . formatTime defaultTimeLocale (iso8601DateFormat Nothing) <$> local 379formatDate cp = Left $ printf 380 "Cannot format an expression of type %s, only $date." (cpType cp) 381 382parseCmp :: Parser Cmp 383parseCmp = choice $ map (\(s,o) -> reservedOp lang s >> return o) 384 [(">=",Cmp (>=)), 385 (">", Cmp (>)), 386 ("==",Cmp (==)), 387 ("=", Cmp (==)), 388 ("!=",Cmp (/=)), 389 ("<", Cmp (<)), 390 ("<=",Cmp (<=))] 391 392parseCondPrim :: Parser CondPrim 393parseCondPrim = choice 394 [ parens lang parseCondExpr 395 , brackets lang (choice [ 396 (do list <- commaSep1 lang (stringLiteral lang) 397 return $ CondStringList (const (Just (map T.pack list))) 398 ) <?> "list of strings", 399 (do list <- commaSep1 lang parseRegex 400 return $ CondRegexList (const (Just list)) 401 ) <?> "list of regular expressions" 402 ]) 403 , char '$' >> choice 404 [ do backref <- read <$> many1 digit 405 return $ CondString (getBackref backref) 406 , choice [ reserved lang "title" >> return (CondString (getVar "title")) 407 , reserved lang "program" >> return (CondString (getVar "program")) 408 , reserved lang "active" >> return (CondCond checkActive) 409 , reserved lang "idle" >> return (CondInteger (getNumVar NvIdle)) 410 , reserved lang "time" >> return (CondTime (getTimeVar TvTime)) 411 , reserved lang "sampleage" >> return (CondTime (getTimeVar TvSampleAge)) 412 , reserved lang "date" >> return (CondDate (getDateVar DvDate)) 413 , reserved lang "now" >> return (CondDate (getDateVar DvNow)) 414 , reserved lang "desktop" >> return (CondString (getVar "desktop")) 415 , do varname <- identifier lang 416 inEnvironment <- (lift (asks (Map.lookup varname . snd))) 417 case inEnvironment of 418 Nothing -> fail ("Reference to unbound variable: '" ++ varname ++ "'") 419 Just cond -> return (CondCond cond) 420 ] 421 ] <?> "variable" 422 , do regex <- parseRegex <?> "regular expression" 423 return $ CondRegex (const (Just regex)) 424 , do str <- T.pack <$> stringLiteral lang <?> "string" 425 return $ CondString (const (Just str)) 426 , try $ do time <- parseTime <?> "time" -- backtrack here, it might have been a number 427 return $ CondTime (const (Just time)) 428 , try $ do date <- parseDate <?> "date" -- backtrack here, it might have been a number 429 return $ CondDate (const (Just date)) 430 , do num <- natural lang <?> "number" 431 return $ CondInteger (const (Just num)) 432 ] 433 434parseRegex :: Parser RE.Regex 435parseRegex = fmap (flip RE.compile [] . T.pack) $ lexeme lang $ choice 436 [ between (char '/') (char '/') (many1 (noneOf "/")) 437 , do _ <- char 'm' 438 c <- anyChar 439 str <- many1 (noneOf [c]) 440 _ <- char c 441 return str 442 ] 443 444-- | Parses a day-of-time specification (hh:mm) 445parseTime :: Parser NominalDiffTime 446parseTime = fmap fromIntegral $ lexeme lang $ do 447 hour <- read <$> many1 digit 448 _ <- char ':' 449 minute <- read <$> count 2 digit 450 return $ (hour * 60 + minute) * 60 451 452parseDate :: Parser UTCTime 453parseDate = lexeme lang $ do 454 tz <- lift (asks fst) 455 year <- read <$> count 4 digit 456 _ <- char '-' 457 month <- read <$> count 2 digit 458 _ <- char '-' 459 day <- read <$> count 2 digit 460 time <- option 0 parseTime 461 let date = LocalTime (fromGregorian year month day) (TimeOfDay 0 0 0) 462 return $ addUTCTime time $ localTimeToUTC tz date 463 464 465parseSetTag :: Parser Rule 466parseSetTag = lexeme lang $ do 467 firstPart <- parseTagPart 468 choice [ do char ':' 469 secondPart <- parseTagPart 470 return $ do cat <- firstPart 471 tag <- secondPart 472 return $ maybeToList $ do 473 cat <- cat 474 tag <- tag 475 return $ Activity (Just cat) tag 476 , return $ do tag <- firstPart 477 return $ maybeToList $ do 478 tag <- tag 479 return $ Activity Nothing tag 480 ] 481 482replaceForbidden :: Maybe Text -> Maybe Text 483replaceForbidden = fmap $ T.map go 484 where 485 go c | isAlphaNum c = c 486 | c `elem` "-_" = c 487 | otherwise = '_' 488 489parseTagPart :: Parser (Ctx -> Maybe Text) 490parseTagPart = do parts <- many1 (choice 491 [ do char '$' 492 (replaceForbidden . ) <$> choice 493 [ do num <- read <$> many1 digit 494 return $ getBackref num 495 , do varname <- many1 (letter <|> oneOf ".") 496 return $ getVar varname 497 ] <?> "variable" 498 , do s <- many1 (alphaNum <|> oneOf "-_") 499 return $ const (Just (T.pack s)) 500 ]) 501 return $ (fmap T.concat . sequence) <$> sequence parts 502 503ifThenElse :: Cond -> Rule -> Rule -> Rule 504ifThenElse cond r1 r2 = do res <- cond 505 case res of 506 Just substs -> r1 . setSubsts substs 507 Nothing -> r2 508 where setSubsts :: [Text] -> Ctx -> Ctx 509 setSubsts substs ctx = ctx { cSubsts = substs } 510 511 512matchAny :: [Rule] -> Rule 513matchAny rules = concat <$> sequence rules 514matchFirst :: [Rule] -> Rule 515matchFirst rules = takeFirst <$> sequence rules 516 where takeFirst [] = [] 517 takeFirst ([]:xs) = takeFirst xs 518 takeFirst (x:_) = x 519 520 521getBackref :: Integer -> CtxFun Text 522getBackref n ctx = listToMaybe (drop (fromIntegral n-1) (cSubsts ctx)) 523 524getVar :: String -> CtxFun Text 525getVar v ctx | "current" `isPrefixOf` v = do 526 let var = drop (length "current.") v 527 win <- cCurrentWindow ctx 528 getVar var (ctx { cWindowInScope = Just win }) 529getVar "title" ctx = do 530 (_,t,_) <- cWindowInScope ctx 531 return t 532getVar "program" ctx = do 533 (_,_,p) <- cWindowInScope ctx 534 return p 535getVar "desktop" ctx = return $ cDesktop (tlData (cNow ctx)) 536getVar v _ = error $ "Unknown variable " ++ v 537 538getNumVar :: NumVar -> CtxFun Integer 539getNumVar NvIdle ctx = Just $ cLastActivity (tlData (cNow ctx)) `div` 1000 540 541getTimeVar :: TimeVar -> CtxFun NominalDiffTime 542getTimeVar TvTime ctx = Just $ 543 let utc = tlTime . cNow $ ctx 544 tz = cTimeZone ctx 545 local = utcToLocalTime tz utc 546 midnightUTC = localTimeToUTC tz $ local { localTimeOfDay = midnight } 547 in utc `diffUTCTime` midnightUTC 548getTimeVar TvSampleAge ctx = Just $ cCurrentTime ctx `diffUTCTime` tlTime (cNow ctx) 549 550getDateVar :: DateVar -> CtxFun UTCTime 551getDateVar DvDate = Just . tlTime . cNow 552getDateVar DvNow = Just . cCurrentTime 553 554findActive :: [(Bool, t, t1)] -> Maybe (Bool, t, t1) 555findActive = find (\(a,_,_) -> a) 556 557checkActive :: Cond 558checkActive ctx = do (a,_,_) <- cWindowInScope ctx 559 guard a 560 return [] 561 562matchNone :: Rule 563matchNone = const [] 564 565justIf :: a -> Bool -> Maybe a 566justIf x True = Just x 567justIf _ False = Nothing 568 569mkSecond :: (a -> b) -> a -> (a, b) 570mkSecond f a = (a, f a) 571