1#!/usr/bin/env python3 2# 3# Wireshark - Network traffic analyzer 4# By Gerald Combs <gerald@wireshark.org> 5# Copyright 1998 Gerald Combs 6# 7# SPDX-License-Identifier: GPL-2.0-or-later 8# 9'''\ 10Generate Sysdig event dissector sections from the sysdig sources. 11 12Reads driver/event_table.c and driver/ppm_events_public.h and generates 13corresponding dissection code in packet-sysdig-event.c. Updates are 14performed in-place in the dissector code. 15 16Requires an Internet connection. Assets are loaded from GitHub over HTTPS, from falcosecurity/libs master. 17''' 18 19import logging 20import os 21import os.path 22import re 23import urllib.request, urllib.error, urllib.parse 24import sys 25 26sysdig_repo_pfx = 'https://raw.githubusercontent.com/falcosecurity/libs/master/' 27 28def exit_msg(msg=None, status=1): 29 if msg is not None: 30 sys.stderr.write(msg + '\n\n') 31 sys.stderr.write(__doc__ + '\n') 32 sys.exit(status) 33 34def get_url_lines(url): 35 '''Open a URL. 36 Returns the URL body as a list of lines. 37 ''' 38 req_headers = { 'User-Agent': 'Wireshark generate-sysdig-event' } 39 try: 40 req = urllib.request.Request(url, headers=req_headers) 41 response = urllib.request.urlopen(req) 42 lines = response.read().decode().splitlines() 43 response.close() 44 except urllib.error.HTTPError as err: 45 exit_msg("HTTP error fetching {0}: {1}".format(url, err.reason)) 46 except urllib.error.URLError as err: 47 exit_msg("URL error fetching {0}: {1}".format(url, err.reason)) 48 except OSError as err: 49 exit_msg("OS error fetching {0}".format(url, err.strerror)) 50 except Exception: 51 exit_msg("Unexpected error:", sys.exc_info()[0]) 52 53 return lines 54 55 56ppm_ev_pub_lines = get_url_lines(sysdig_repo_pfx + 'driver/ppm_events_public.h') 57 58ppme_re = re.compile('^\s+PPME_([A-Z0-9_]+_[EX])\s*=\s*([0-9]+)\s*,') 59 60event_info_d = {} 61 62def get_event_defines(): 63 event_d = {} 64 for line in ppm_ev_pub_lines: 65 m = ppme_re.match(line) 66 if m: 67 event_d[int(m.group(2))] = m.group(1) 68 return event_d 69 70ppm_ev_table_lines = get_url_lines(sysdig_repo_pfx + 'driver/event_table.c') 71 72hf_d = {} 73 74event_info_re = re.compile('^\s+/\*\s*PPME_.*\*\/\s*{\s*"([A-Za-z0-9_]+)"\s*,[^,]+,[^,]+,\s*([0-9]+)\s*[,{}]') 75event_param_re = re.compile('{\s*"([A-Za-z0-9_ ]+)"\s*,\s*PT_([A-Z0-9_]+)\s*,\s*PF_([A-Z0-9_]+)\s*[,}]') 76 77def get_event_names(): 78 '''Return a contiguous list of event names. Names are lower case.''' 79 event_name_l = [] 80 for line in ppm_ev_table_lines: 81 ei = event_info_re.match(line) 82 if ei: 83 event_name_l.append(ei.group(1)) 84 return event_name_l 85 86# PT_xxx to FT_xxx 87pt_to_ft = { 88 'BYTEBUF': 'BYTES', 89 'CHARBUF': 'STRING', 90 'FD': 'INT64', 91 'FSPATH': 'STRING', 92} 93 94# FT_xxx to BASE_xxx 95force_param_formats = { 96 'STRING': 'NONE', 97 'INT.*': 'DEC', 98} 99 100def get_event_params(): 101 '''Return a list of dictionaries containing event names and parameter info.''' 102 event_param_l = [] 103 event_num = 0 104 force_string_l = ['args', 'env'] 105 for line in ppm_ev_table_lines: 106 ei = event_info_re.match(line) 107 ep = event_param_re.findall(line) 108 if ei and ep: 109 event_name = ei.group(1) 110 src_param_count = int(ei.group(2)) 111 if len(ep) != src_param_count: 112 err_msg = '{}: found {} parameters. Expected {}. Params: {}'.format( 113 event_name, len(ep), src_param_count, repr(ep)) 114 if len(ep) > src_param_count: 115 logging.warning(err_msg) 116 del ep[src_param_count:] 117 else: 118 raise NameError(err_msg) 119 for p in ep: 120 if p[0] in force_string_l: 121 param_type = 'STRING' 122 elif p[1] in pt_to_ft: 123 param_type = pt_to_ft[p[1]] 124 elif p[0] == 'flags' and p[1].startswith('INT') and 'HEX' in p[2]: 125 param_type = 'U' + p[1] 126 elif 'INT' in p[1]: 127 # Ints 128 param_type = p[1] 129 else: 130 # Fall back to bytes 131 param_type = 'BYTES' 132 133 if p[2] == 'NA': 134 if 'INT' in param_type: 135 param_format = 'DEC' 136 else: 137 param_format = 'NONE' 138 elif param_type == 'BYTES': 139 param_format = 'NONE' 140 else: 141 param_format = p[2] 142 143 for pt_pat, force_pf in force_param_formats.items(): 144 if re.match(pt_pat, param_type) and param_format != force_pf: 145 err_msg = 'Forcing {} {} format to {}. Params: {}'.format( 146 event_name, param_type, force_pf, repr(ep)) 147 logging.warning(err_msg) 148 param_format = force_pf 149 150 param_d = { 151 'event_name': event_name, 152 'event_num': event_num, 153 # use replace() to account for "plugin ID" param name (ie: param names with space) 154 'param_name': p[0].replace(" ", "_"), 155 'param_type': param_type, 156 'param_format': param_format, 157 } 158 event_param_l.append(param_d) 159 if ei: 160 event_num += 1 161 return event_param_l 162 163def param_to_hf_name(param): 164 return 'hf_param_{}_{}'.format(param['param_name'], param['param_type'].lower()) 165 166def param_to_value_string_name(param): 167 return '{}_{}_vals'.format(param['param_name'], param['param_type'].lower()) 168 169def get_param_desc(param): 170 # Try to coerce event names and parameters into human-friendly 171 # strings. 172 # XXX This could use some work. 173 174 # Specific descriptions. Event name + parameter name. 175 param_descs = { 176 'accept.queuepct': 'Accept queue per connection', 177 'execve.args': 'Program arguments', 178 'execve.comm': 'Command', 179 'execve.cwd': 'Current working directory', 180 } 181 # General descriptions. Event name only. 182 event_descs = { 183 'ioctl': 'I/O control', 184 } 185 186 event_name = param['event_name'] 187 param_id = '{}.{}'.format(event_name, param['param_name']) 188 if param_id in param_descs: 189 param_desc = param_descs[param_id] 190 elif event_name in event_descs: 191 param_desc = '{}: {}'.format(event_descs[event_name], param['param_name']) 192 else: 193 param_desc = param['param_name'] 194 return param_desc 195 196def main(): 197 logging.basicConfig(format='%(levelname)s: %(message)s') 198 199 # Event list 200 event_d = get_event_defines() 201 event_nums = list(event_d.keys()) 202 event_nums.sort() 203 204 event_name_l = get_event_names() 205 event_param_l = get_event_params() 206 207 hf_d = {} 208 for param in event_param_l: 209 hf_name = param_to_hf_name(param) 210 hf_d[hf_name] = param 211 212 idx_id_to_name = { '': 'no' } 213 parameter_index_l = [] 214 215 for en in range (0, len(event_nums)): 216 param_id = '' 217 param_l = [] 218 event_var = event_d[en].lower() 219 for param in event_param_l: 220 if param['event_num'] == en: 221 hf_name = param_to_hf_name(param) 222 param_l.append(hf_name) 223 param_id += ':' + param['param_name'] + '_' + param['param_type'] 224 225 ei_str = '' 226 if param_id not in idx_id_to_name: 227 idx_id_to_name[param_id] = event_var 228 ei_str = 'static int * const {}_indexes[] = {{ &{}, NULL }};'.format( 229 event_var, 230 ', &'.join(param_l) 231 ) 232 else: 233 ei_str = '#define {}_indexes {}_indexes'.format(event_var, idx_id_to_name[param_id]) 234 235 parameter_index_l.append(ei_str) 236 237 dissector_path = os.path.join(os.path.dirname(__file__), 238 '..', 'epan', 'dissectors', 'packet-sysdig-event.c') 239 dissector_f = open(dissector_path, 'r') 240 dissector_lines = list(dissector_f) 241 dissector_f = open(dissector_path, 'w+') 242 243 # Strip out old content 244 strip_re_l = [] 245 strip_re_l.append(re.compile('^static\s+int\s+hf_param_.*;')) 246 strip_re_l.append(re.compile('^#define\s+EVT_STR_[A-Z0-9_]+\s+"[A-Za-z0-9_]+"')) 247 strip_re_l.append(re.compile('^#define\s+EVT_[A-Z0-9_]+\s+[0-9]+')) 248 strip_re_l.append(re.compile('^\s*{\s*EVT_[A-Z0-9_]+\s*,\s*EVT_STR_[A-Z0-9_]+\s*}')) 249 strip_re_l.append(re.compile('^static\s+const\s+int\s+\*\s*[a-z0-9_]+_[ex]_indexes\[\]\s*=\s*\{\s*&hf_param_.*NULL\s*\}\s*;')) 250 strip_re_l.append(re.compile('^static\s+int\s*\*\s+const\s+[a-z0-9_]+_[ex]_indexes\[\]\s*=\s*\{\s*&hf_param_.*NULL\s*\}\s*;')) 251 strip_re_l.append(re.compile('^\s*#define\s+[a-z0-9_]+_[ex]_indexes\s+[a-z0-9_]+_indexes')) 252 strip_re_l.append(re.compile('^\s*\{\s*EVT_[A-Z0-9_]+_[EX]\s*,\s*[a-z0-9_]+_[ex]_indexes\s*}\s*,')) 253 strip_re_l.append(re.compile('^\s*{\s*&hf_param_.*},')) # Must all be on one line 254 255 for strip_re in strip_re_l: 256 dissector_lines = [l for l in dissector_lines if not strip_re.search(l)] 257 258 # Find our value strings 259 value_string_re = re.compile('static\s+const\s+value_string\s+([A-Za-z0-9_]+_vals)') 260 value_string_l = [] 261 for line in dissector_lines: 262 vs = value_string_re.match(line) 263 if vs: 264 value_string_l.append(vs.group(1)) 265 266 # Add in new content after comments. 267 268 header_fields_c = 'Header fields' 269 header_fields_re = re.compile('/\*\s+' + header_fields_c, flags = re.IGNORECASE) 270 header_fields_l = [] 271 for hf_name in sorted(hf_d.keys()): 272 header_fields_l.append('static int {} = -1;'.format(hf_name)) 273 274 event_names_c = 'Event names' 275 event_names_re = re.compile('/\*\s+' + event_names_c, flags = re.IGNORECASE) 276 event_names_l = [] 277 event_str_l = list(set(event_name_l)) 278 event_str_l.sort() 279 for evt_str in event_str_l: 280 event_names_l.append('#define EVT_STR_{0:24s} "{1:s}"'.format(evt_str.upper(), evt_str)) 281 282 event_definitions_c = 'Event definitions' 283 event_definitions_re = re.compile('/\*\s+' + event_definitions_c, flags = re.IGNORECASE) 284 event_definitions_l = [] 285 for evt in event_nums: 286 event_definitions_l.append('#define EVT_{0:24s} {1:3d}'.format(event_d[evt], evt)) 287 288 value_strings_c = 'Value strings' 289 value_strings_re = re.compile('/\*\s+' + value_strings_c, flags = re.IGNORECASE) 290 value_strings_l = [] 291 for evt in event_nums: 292 evt_num = 'EVT_{},'.format(event_d[evt]) 293 evt_str = 'EVT_STR_' + event_name_l[evt].upper() 294 value_strings_l.append(' {{ {0:<32s} {1:s} }},'.format(evt_num, evt_str)) 295 296 parameter_index_c = 'Parameter indexes' 297 parameter_index_re = re.compile('/\*\s+' + parameter_index_c, flags = re.IGNORECASE) 298 # parameter_index_l defined above. 299 300 event_tree_c = 'Event tree' 301 event_tree_re = re.compile('/\*\s+' + event_tree_c, flags = re.IGNORECASE) 302 event_tree_l = [] 303 for evt in event_nums: 304 evt_num = 'EVT_{}'.format(event_d[evt]) 305 evt_idx = '{}_indexes'.format(event_d[evt].lower()) 306 event_tree_l.append(' {{ {}, {} }},'.format(evt_num, evt_idx)) 307 308 header_field_reg_c = 'Header field registration' 309 header_field_reg_re = re.compile('/\*\s+' + header_field_reg_c, flags = re.IGNORECASE) 310 header_field_reg_l = [] 311 for hf_name in sorted(hf_d.keys()): 312 param = hf_d[hf_name] 313 event_name = param['event_name'] 314 param_desc = get_param_desc(param) 315 param_name = param['param_name'] 316 param_type = param['param_type'] 317 param_format = param['param_format'] 318 fieldconvert = 'NULL' 319 vs_name = param_to_value_string_name(param) 320 if vs_name in value_string_l and 'INT' in param_type: 321 fieldconvert = 'VALS({})'.format(vs_name) 322 header_field_reg_l.append(' {{ &{}, {{ "{}", "sysdig.param.{}.{}", FT_{}, BASE_{}, {}, 0, NULL, HFILL }} }},'.format( 323 hf_name, 324 param_desc, 325 event_name, 326 param_name, 327 param_type, 328 param_format, 329 fieldconvert 330 )) 331 332 for line in dissector_lines: 333 fill_comment = None 334 fill_l = [] 335 336 if header_fields_re.match(line): 337 fill_comment = header_fields_c 338 fill_l = header_fields_l 339 elif event_names_re.match(line): 340 fill_comment = event_names_c 341 fill_l = event_names_l 342 elif event_definitions_re.match(line): 343 fill_comment = event_definitions_c 344 fill_l = event_definitions_l 345 elif value_strings_re.match(line): 346 fill_comment = value_strings_c 347 fill_l = value_strings_l 348 elif parameter_index_re.match(line): 349 fill_comment = parameter_index_c 350 fill_l = parameter_index_l 351 elif event_tree_re.match(line): 352 fill_comment = event_tree_c 353 fill_l = event_tree_l 354 elif header_field_reg_re.match(line): 355 fill_comment = header_field_reg_c 356 fill_l = header_field_reg_l 357 358 if fill_comment is not None: 359 # Write our comment followed by the content 360 print(('Generating {}, {:d} lines'.format(fill_comment, len(fill_l)))) 361 dissector_f.write('/* {}. Automatically generated by tools/{} */\n'.format( 362 fill_comment, 363 os.path.basename(__file__) 364 )) 365 for line in fill_l: 366 dissector_f.write('{}\n'.format(line)) 367 # Fill each section only once 368 del fill_l[:] 369 else: 370 # Existing content 371 dissector_f.write(line) 372 373 dissector_f.close() 374 375# 376# On with the show 377# 378 379if __name__ == "__main__": 380 sys.exit(main()) 381