1# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2# This source code is licensed under both the GPLv2 (found in the 3# COPYING file in the root directory) and Apache 2.0 License 4# (found in the LICENSE.Apache file in the root directory). 5 6from abc import ABC, abstractmethod 7from calendar import timegm 8from enum import Enum 9import glob 10import re 11import time 12 13 14NO_COL_FAMILY = 'DB_WIDE' 15 16 17class DataSource(ABC): 18 class Type(Enum): 19 LOG = 1 20 DB_OPTIONS = 2 21 TIME_SERIES = 3 22 23 def __init__(self, type): 24 self.type = type 25 26 @abstractmethod 27 def check_and_trigger_conditions(self, conditions): 28 pass 29 30 31class Log: 32 @staticmethod 33 def is_new_log(log_line): 34 # The assumption is that a new log will start with a date printed in 35 # the below regex format. 36 date_regex = '\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}' 37 return re.match(date_regex, log_line) 38 39 def __init__(self, log_line, column_families): 40 token_list = log_line.strip().split() 41 self.time = token_list[0] 42 self.context = token_list[1] 43 self.message = " ".join(token_list[2:]) 44 self.column_family = None 45 # example log for 'default' column family: 46 # "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634] 47 # [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n" 48 for col_fam in column_families: 49 search_for_str = '\[' + col_fam + '\]' 50 if re.search(search_for_str, self.message): 51 self.column_family = col_fam 52 break 53 if not self.column_family: 54 self.column_family = NO_COL_FAMILY 55 56 def get_human_readable_time(self): 57 # example from a log line: '2018/07/25-11:25:45.782710' 58 return self.time 59 60 def get_column_family(self): 61 return self.column_family 62 63 def get_context(self): 64 return self.context 65 66 def get_message(self): 67 return self.message 68 69 def append_message(self, remaining_log): 70 self.message = self.message + '\n' + remaining_log.strip() 71 72 def get_timestamp(self): 73 # example: '2018/07/25-11:25:45.782710' will be converted to the GMT 74 # Unix timestamp 1532517945 (note: this method assumes that self.time 75 # is in GMT) 76 hr_time = self.time + 'GMT' 77 timestamp = timegm(time.strptime(hr_time, "%Y/%m/%d-%H:%M:%S.%f%Z")) 78 return timestamp 79 80 def __repr__(self): 81 return ( 82 'time: ' + self.time + '; context: ' + self.context + 83 '; col_fam: ' + self.column_family + 84 '; message: ' + self.message 85 ) 86 87 88class DatabaseLogs(DataSource): 89 def __init__(self, logs_path_prefix, column_families): 90 super().__init__(DataSource.Type.LOG) 91 self.logs_path_prefix = logs_path_prefix 92 self.column_families = column_families 93 94 def trigger_conditions_for_log(self, conditions, log): 95 # For a LogCondition object, trigger is: 96 # Dict[column_family_name, List[Log]]. This explains why the condition 97 # was triggered and for which column families. 98 for cond in conditions: 99 if re.search(cond.regex, log.get_message(), re.IGNORECASE): 100 trigger = cond.get_trigger() 101 if not trigger: 102 trigger = {} 103 if log.get_column_family() not in trigger: 104 trigger[log.get_column_family()] = [] 105 trigger[log.get_column_family()].append(log) 106 cond.set_trigger(trigger) 107 108 def check_and_trigger_conditions(self, conditions): 109 for file_name in glob.glob(self.logs_path_prefix + '*'): 110 # TODO(poojam23): find a way to distinguish between log files 111 # - generated in the current experiment but are labeled 'old' 112 # because they LOGs exceeded the file size limit AND 113 # - generated in some previous experiment that are also labeled 114 # 'old' and were not deleted for some reason 115 if re.search('old', file_name, re.IGNORECASE): 116 continue 117 with open(file_name, 'r') as db_logs: 118 new_log = None 119 for line in db_logs: 120 if Log.is_new_log(line): 121 if new_log: 122 self.trigger_conditions_for_log( 123 conditions, new_log 124 ) 125 new_log = Log(line, self.column_families) 126 else: 127 # To account for logs split into multiple lines 128 new_log.append_message(line) 129 # Check for the last log in the file. 130 if new_log: 131 self.trigger_conditions_for_log(conditions, new_log) 132