1#!/usr/bin/env python3
2#
3# Wireshark - Network traffic analyzer
4# By Gerald Combs <gerald@wireshark.org>
5# Copyright 1998 Gerald Combs
6#
7# SPDX-License-Identifier: GPL-2.0-or-later
8#
9'''\
10Generate Sysdig event dissector sections from the sysdig sources.
11
12Reads driver/event_table.c and driver/ppm_events_public.h and generates
13corresponding dissection code in packet-sysdig-event.c. Updates are
14performed in-place in the dissector code.
15
16Requires an Internet connection. Assets are loaded from GitHub over HTTPS, from falcosecurity/libs master.
17'''
18
19import logging
20import os
21import os.path
22import re
23import urllib.request, urllib.error, urllib.parse
24import sys
25
26sysdig_repo_pfx = 'https://raw.githubusercontent.com/falcosecurity/libs/master/'
27
28def exit_msg(msg=None, status=1):
29    if msg is not None:
30        sys.stderr.write(msg + '\n\n')
31    sys.stderr.write(__doc__ + '\n')
32    sys.exit(status)
33
34def get_url_lines(url):
35    '''Open a URL.
36    Returns the URL body as a list of lines.
37    '''
38    req_headers = { 'User-Agent': 'Wireshark generate-sysdig-event' }
39    try:
40        req = urllib.request.Request(url, headers=req_headers)
41        response = urllib.request.urlopen(req)
42        lines = response.read().decode().splitlines()
43        response.close()
44    except urllib.error.HTTPError as err:
45        exit_msg("HTTP error fetching {0}: {1}".format(url, err.reason))
46    except urllib.error.URLError as err:
47        exit_msg("URL error fetching {0}: {1}".format(url, err.reason))
48    except OSError as err:
49        exit_msg("OS error fetching {0}".format(url, err.strerror))
50    except Exception:
51        exit_msg("Unexpected error:", sys.exc_info()[0])
52
53    return lines
54
55
56ppm_ev_pub_lines = get_url_lines(sysdig_repo_pfx + 'driver/ppm_events_public.h')
57
58ppme_re = re.compile('^\s+PPME_([A-Z0-9_]+_[EX])\s*=\s*([0-9]+)\s*,')
59
60event_info_d = {}
61
62def get_event_defines():
63    event_d = {}
64    for line in ppm_ev_pub_lines:
65        m = ppme_re.match(line)
66        if m:
67            event_d[int(m.group(2))] = m.group(1)
68    return event_d
69
70ppm_ev_table_lines = get_url_lines(sysdig_repo_pfx + 'driver/event_table.c')
71
72hf_d = {}
73
74event_info_re = re.compile('^\s+/\*\s*PPME_.*\*\/\s*{\s*"([A-Za-z0-9_]+)"\s*,[^,]+,[^,]+,\s*([0-9]+)\s*[,{}]')
75event_param_re = re.compile('{\s*"([A-Za-z0-9_ ]+)"\s*,\s*PT_([A-Z0-9_]+)\s*,\s*PF_([A-Z0-9_]+)\s*[,}]')
76
77def get_event_names():
78    '''Return a contiguous list of event names. Names are lower case.'''
79    event_name_l = []
80    for line in ppm_ev_table_lines:
81        ei = event_info_re.match(line)
82        if ei:
83            event_name_l.append(ei.group(1))
84    return event_name_l
85
86# PT_xxx to FT_xxx
87pt_to_ft = {
88    'BYTEBUF': 'BYTES',
89    'CHARBUF': 'STRING',
90    'FD': 'INT64',
91    'FSPATH': 'STRING',
92}
93
94# FT_xxx to BASE_xxx
95force_param_formats = {
96    'STRING': 'NONE',
97    'INT.*': 'DEC',
98}
99
100def get_event_params():
101    '''Return a list of dictionaries containing event names and parameter info.'''
102    event_param_l = []
103    event_num = 0
104    force_string_l = ['args', 'env']
105    for line in ppm_ev_table_lines:
106        ei = event_info_re.match(line)
107        ep = event_param_re.findall(line)
108        if ei and ep:
109            event_name = ei.group(1)
110            src_param_count = int(ei.group(2))
111            if len(ep) != src_param_count:
112                err_msg = '{}: found {} parameters. Expected {}. Params: {}'.format(
113                    event_name, len(ep), src_param_count, repr(ep))
114                if len(ep) > src_param_count:
115                    logging.warning(err_msg)
116                    del ep[src_param_count:]
117                else:
118                    raise NameError(err_msg)
119            for p in ep:
120                if p[0] in force_string_l:
121                    param_type = 'STRING'
122                elif p[1] in pt_to_ft:
123                    param_type = pt_to_ft[p[1]]
124                elif p[0] == 'flags' and p[1].startswith('INT') and 'HEX' in p[2]:
125                    param_type = 'U' + p[1]
126                elif 'INT' in p[1]:
127                    # Ints
128                    param_type = p[1]
129                else:
130                    # Fall back to bytes
131                    param_type = 'BYTES'
132
133                if p[2] == 'NA':
134                    if 'INT' in param_type:
135                        param_format = 'DEC'
136                    else:
137                        param_format = 'NONE'
138                elif param_type == 'BYTES':
139                    param_format = 'NONE'
140                else:
141                    param_format = p[2]
142
143                for pt_pat, force_pf in force_param_formats.items():
144                    if re.match(pt_pat, param_type) and param_format != force_pf:
145                        err_msg = 'Forcing {} {} format to {}. Params: {}'.format(
146                            event_name, param_type, force_pf, repr(ep))
147                        logging.warning(err_msg)
148                        param_format = force_pf
149
150                param_d = {
151                    'event_name': event_name,
152                    'event_num': event_num,
153                    # use replace() to account for "plugin ID" param name (ie: param names with space)
154                    'param_name': p[0].replace(" ", "_"),
155                    'param_type': param_type,
156                    'param_format': param_format,
157                }
158                event_param_l.append(param_d)
159        if ei:
160            event_num += 1
161    return event_param_l
162
163def param_to_hf_name(param):
164    return 'hf_param_{}_{}'.format(param['param_name'], param['param_type'].lower())
165
166def param_to_value_string_name(param):
167    return '{}_{}_vals'.format(param['param_name'], param['param_type'].lower())
168
169def get_param_desc(param):
170    # Try to coerce event names and parameters into human-friendly
171    # strings.
172    # XXX This could use some work.
173
174    # Specific descriptions. Event name + parameter name.
175    param_descs = {
176        'accept.queuepct': 'Accept queue per connection',
177        'execve.args': 'Program arguments',
178        'execve.comm': 'Command',
179        'execve.cwd': 'Current working directory',
180    }
181    # General descriptions. Event name only.
182    event_descs = {
183        'ioctl': 'I/O control',
184    }
185
186    event_name = param['event_name']
187    param_id = '{}.{}'.format(event_name, param['param_name'])
188    if param_id in param_descs:
189        param_desc = param_descs[param_id]
190    elif event_name in event_descs:
191        param_desc = '{}: {}'.format(event_descs[event_name], param['param_name'])
192    else:
193        param_desc = param['param_name']
194    return param_desc
195
196def main():
197    logging.basicConfig(format='%(levelname)s: %(message)s')
198
199    # Event list
200    event_d = get_event_defines()
201    event_nums = list(event_d.keys())
202    event_nums.sort()
203
204    event_name_l = get_event_names()
205    event_param_l = get_event_params()
206
207    hf_d = {}
208    for param in event_param_l:
209        hf_name = param_to_hf_name(param)
210        hf_d[hf_name] = param
211
212    idx_id_to_name = { '': 'no' }
213    parameter_index_l = []
214
215    for en in range (0, len(event_nums)):
216        param_id = ''
217        param_l = []
218        event_var = event_d[en].lower()
219        for param in event_param_l:
220            if param['event_num'] == en:
221                hf_name = param_to_hf_name(param)
222                param_l.append(hf_name)
223                param_id += ':' + param['param_name'] + '_' + param['param_type']
224
225        ei_str = ''
226        if param_id not in idx_id_to_name:
227            idx_id_to_name[param_id] = event_var
228            ei_str = 'static int * const {}_indexes[] = {{ &{}, NULL }};'.format(
229                event_var,
230                ', &'.join(param_l)
231            )
232        else:
233            ei_str = '#define {}_indexes {}_indexes'.format(event_var, idx_id_to_name[param_id])
234
235        parameter_index_l.append(ei_str)
236
237    dissector_path = os.path.join(os.path.dirname(__file__),
238        '..', 'epan', 'dissectors', 'packet-sysdig-event.c')
239    dissector_f = open(dissector_path, 'r')
240    dissector_lines = list(dissector_f)
241    dissector_f = open(dissector_path, 'w+')
242
243    # Strip out old content
244    strip_re_l = []
245    strip_re_l.append(re.compile('^static\s+int\s+hf_param_.*;'))
246    strip_re_l.append(re.compile('^#define\s+EVT_STR_[A-Z0-9_]+\s+"[A-Za-z0-9_]+"'))
247    strip_re_l.append(re.compile('^#define\s+EVT_[A-Z0-9_]+\s+[0-9]+'))
248    strip_re_l.append(re.compile('^\s*{\s*EVT_[A-Z0-9_]+\s*,\s*EVT_STR_[A-Z0-9_]+\s*}'))
249    strip_re_l.append(re.compile('^static\s+const\s+int\s+\*\s*[a-z0-9_]+_[ex]_indexes\[\]\s*=\s*\{\s*&hf_param_.*NULL\s*\}\s*;'))
250    strip_re_l.append(re.compile('^static\s+int\s*\*\s+const\s+[a-z0-9_]+_[ex]_indexes\[\]\s*=\s*\{\s*&hf_param_.*NULL\s*\}\s*;'))
251    strip_re_l.append(re.compile('^\s*#define\s+[a-z0-9_]+_[ex]_indexes\s+[a-z0-9_]+_indexes'))
252    strip_re_l.append(re.compile('^\s*\{\s*EVT_[A-Z0-9_]+_[EX]\s*,\s*[a-z0-9_]+_[ex]_indexes\s*}\s*,'))
253    strip_re_l.append(re.compile('^\s*{\s*&hf_param_.*},')) # Must all be on one line
254
255    for strip_re in strip_re_l:
256        dissector_lines = [l for l in dissector_lines if not strip_re.search(l)]
257
258    # Find our value strings
259    value_string_re = re.compile('static\s+const\s+value_string\s+([A-Za-z0-9_]+_vals)')
260    value_string_l = []
261    for line in dissector_lines:
262        vs = value_string_re.match(line)
263        if vs:
264            value_string_l.append(vs.group(1))
265
266    # Add in new content after comments.
267
268    header_fields_c = 'Header fields'
269    header_fields_re = re.compile('/\*\s+' + header_fields_c, flags = re.IGNORECASE)
270    header_fields_l = []
271    for hf_name in sorted(hf_d.keys()):
272        header_fields_l.append('static int {} = -1;'.format(hf_name))
273
274    event_names_c = 'Event names'
275    event_names_re = re.compile('/\*\s+' + event_names_c, flags = re.IGNORECASE)
276    event_names_l = []
277    event_str_l = list(set(event_name_l))
278    event_str_l.sort()
279    for evt_str in event_str_l:
280        event_names_l.append('#define EVT_STR_{0:24s} "{1:s}"'.format(evt_str.upper(), evt_str))
281
282    event_definitions_c = 'Event definitions'
283    event_definitions_re = re.compile('/\*\s+' + event_definitions_c, flags = re.IGNORECASE)
284    event_definitions_l = []
285    for evt in event_nums:
286        event_definitions_l.append('#define EVT_{0:24s} {1:3d}'.format(event_d[evt], evt))
287
288    value_strings_c = 'Value strings'
289    value_strings_re = re.compile('/\*\s+' + value_strings_c, flags = re.IGNORECASE)
290    value_strings_l = []
291    for evt in event_nums:
292        evt_num = 'EVT_{},'.format(event_d[evt])
293        evt_str = 'EVT_STR_' + event_name_l[evt].upper()
294        value_strings_l.append('    {{ {0:<32s} {1:s} }},'.format(evt_num, evt_str))
295
296    parameter_index_c = 'Parameter indexes'
297    parameter_index_re = re.compile('/\*\s+' + parameter_index_c, flags = re.IGNORECASE)
298    # parameter_index_l defined above.
299
300    event_tree_c = 'Event tree'
301    event_tree_re = re.compile('/\*\s+' + event_tree_c, flags = re.IGNORECASE)
302    event_tree_l = []
303    for evt in event_nums:
304        evt_num = 'EVT_{}'.format(event_d[evt])
305        evt_idx = '{}_indexes'.format(event_d[evt].lower())
306        event_tree_l.append('    {{ {}, {} }},'.format(evt_num, evt_idx))
307
308    header_field_reg_c = 'Header field registration'
309    header_field_reg_re = re.compile('/\*\s+' + header_field_reg_c, flags = re.IGNORECASE)
310    header_field_reg_l = []
311    for hf_name in sorted(hf_d.keys()):
312        param = hf_d[hf_name]
313        event_name = param['event_name']
314        param_desc = get_param_desc(param)
315        param_name = param['param_name']
316        param_type = param['param_type']
317        param_format = param['param_format']
318        fieldconvert = 'NULL'
319        vs_name = param_to_value_string_name(param)
320        if vs_name in value_string_l and 'INT' in param_type:
321            fieldconvert = 'VALS({})'.format(vs_name)
322        header_field_reg_l.append('        {{ &{}, {{ "{}", "sysdig.param.{}.{}", FT_{}, BASE_{}, {}, 0, NULL, HFILL }} }},'.format(
323            hf_name,
324            param_desc,
325            event_name,
326            param_name,
327            param_type,
328            param_format,
329            fieldconvert
330            ))
331
332    for line in dissector_lines:
333        fill_comment = None
334        fill_l = []
335
336        if header_fields_re.match(line):
337            fill_comment = header_fields_c
338            fill_l = header_fields_l
339        elif event_names_re.match(line):
340            fill_comment = event_names_c
341            fill_l = event_names_l
342        elif event_definitions_re.match(line):
343            fill_comment = event_definitions_c
344            fill_l = event_definitions_l
345        elif value_strings_re.match(line):
346            fill_comment = value_strings_c
347            fill_l = value_strings_l
348        elif parameter_index_re.match(line):
349            fill_comment = parameter_index_c
350            fill_l = parameter_index_l
351        elif event_tree_re.match(line):
352            fill_comment = event_tree_c
353            fill_l = event_tree_l
354        elif header_field_reg_re.match(line):
355            fill_comment = header_field_reg_c
356            fill_l = header_field_reg_l
357
358        if fill_comment is not None:
359            # Write our comment followed by the content
360            print(('Generating {}, {:d} lines'.format(fill_comment, len(fill_l))))
361            dissector_f.write('/* {}. Automatically generated by tools/{} */\n'.format(
362                fill_comment,
363                os.path.basename(__file__)
364                ))
365            for line in fill_l:
366                dissector_f.write('{}\n'.format(line))
367            # Fill each section only once
368            del fill_l[:]
369        else:
370            # Existing content
371            dissector_f.write(line)
372
373    dissector_f.close()
374
375#
376# On with the show
377#
378
379if __name__ == "__main__":
380    sys.exit(main())
381