1#!/usr/bin/env python3
2#
3# Tool to index protocols that appears in the given capture files
4#
5# The script list_protos_in_cap.sh does the same thing.
6#
7# Copyright 2009, Kovarththanan Rajaratnam <kovarththanan.rajaratnam@gmail.com>
8#
9# Wireshark - Network traffic analyzer
10# By Gerald Combs <gerald@wireshark.org>
11# Copyright 1998 Gerald Combs
12#
13# SPDX-License-Identifier: GPL-2.0-or-later
14#
15
16from optparse import OptionParser
17import multiprocessing
18import sys
19import os
20import subprocess
21import re
22import pickle
23import tempfile
24import filecmp
25import random
26
27def extract_protos_from_file_proces(tshark, file):
28    try:
29        cmd = [tshark, "-Tfields", "-e", "frame.protocols", "-r", file]
30        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
31        (stdout, stderr) = p.communicate()
32        stdout = stdout.decode('utf-8')
33        if p.returncode != 0:
34            return (file, {})
35
36        proto_hash = {}
37        for line in stdout.splitlines():
38            if not re.match(r'^[\w:-]+$', line):
39                continue
40
41            for proto in line.split(':'):
42                proto_hash[proto] = 1 + proto_hash.setdefault(proto, 0)
43
44        return (file, proto_hash)
45    except KeyboardInterrupt:
46        return None
47
48def extract_protos_from_file(tshark, num_procs, max_files, cap_files, cap_hash, index_file_name):
49    pool = multiprocessing.Pool(num_procs)
50    results = [pool.apply_async(extract_protos_from_file_proces, [tshark, file]) for file in cap_files]
51    try:
52        for (cur_item_idx,result_async) in enumerate(results):
53            file_result = result_async.get()
54            action = "SKIPPED" if file_result[1] is {} else "PROCESSED"
55            print("%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result[0], os.path.getsize(file_result[0])))
56            cap_hash.update(dict([file_result]))
57    except KeyboardInterrupt:
58        print("%s was interrupted by user" % (sys.argv[0]))
59        pool.terminate()
60        exit(1)
61
62    index_file = open(index_file_name, "wb")
63    pickle.dump(cap_hash, index_file)
64    index_file.close()
65    exit(0)
66
67def dissect_file_process(tshark, tmpdir, file):
68    try:
69        (handle_o, tmpfile_o) = tempfile.mkstemp(suffix='_stdout', dir=tmpdir)
70        (handle_e, tmpfile_e) = tempfile.mkstemp(suffix='_stderr', dir=tmpdir)
71        cmd = [tshark, "-nxVr", file]
72        p = subprocess.Popen(cmd, stdout=handle_o, stderr=handle_e)
73        (stdout, stderr) = p.communicate()
74        if p.returncode == 0:
75            return (file, True, tmpfile_o, tmpfile_e)
76        else:
77            return (file, False, tmpfile_o, tmpfile_e)
78
79    except KeyboardInterrupt:
80        return False
81
82    finally:
83        os.close(handle_o)
84        os.close(handle_e)
85
86def dissect_files(tshark, tmpdir, num_procs, max_files, cap_files):
87    pool = multiprocessing.Pool(num_procs)
88    results = [pool.apply_async(dissect_file_process, [tshark, tmpdir, file]) for file in cap_files]
89    try:
90        for (cur_item_idx,result_async) in enumerate(results):
91            file_result = result_async.get()
92            action = "FAILED" if file_result[1] is False else "PASSED"
93            print("%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result[0], os.path.getsize(file_result[0])))
94    except KeyboardInterrupt:
95        print("%s was interrupted by user" % (sys.argv[0]))
96        pool.terminate()
97        exit(1)
98
99def compare_files(tshark_bin, tmpdir, tshark_cmp, num_procs, max_files, cap_files):
100    pool = multiprocessing.Pool(num_procs)
101    results_bin = [pool.apply_async(dissect_file_process, [tshark_bin, tmpdir, file]) for file in cap_files]
102    results_cmp = [pool.apply_async(dissect_file_process, [tshark_cmp, tmpdir, file]) for file in cap_files]
103    try:
104        for (cur_item_idx,(result_async_bin, result_async_cmp)) in enumerate(zip(results_bin, results_cmp)):
105            file_result_bin = result_async_bin.get()
106            file_result_cmp = result_async_cmp.get()
107            if file_result_cmp[1] is False or file_result_bin[1] is False:
108                action = "FAILED (exitcode)"
109            if not filecmp.cmp(file_result_bin[2], file_result_cmp[2]):
110                action = "FAILED (stdout)"
111            if not filecmp.cmp(file_result_bin[3], file_result_cmp[3]):
112                action = "FAILED (stderr)"
113            else:
114                action = "PASSED"
115                os.remove(file_result_bin[2])
116                os.remove(file_result_cmp[2])
117                os.remove(file_result_bin[3])
118                os.remove(file_result_cmp[3])
119
120            print("%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result_bin[0], os.path.getsize(file_result_bin[0])))
121            print("%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result_cmp[0], os.path.getsize(file_result_cmp[0])))
122    except KeyboardInterrupt:
123        print("%s was interrupted by user" % (sys.argv[0]))
124        pool.terminate()
125        exit(1)
126
127def list_all_proto(cap_hash):
128    proto_hash = {}
129    for files_hash in cap_hash.values():
130        for proto,count in files_hash.items():
131            proto_hash[proto] = count + proto_hash.setdefault(proto, 0)
132
133    return proto_hash
134
135def list_all_files(cap_hash):
136    files = list(cap_hash.keys())
137    files.sort()
138
139    return files
140
141def list_all_proto_files(cap_hash, proto_comma_delit):
142    protos = [ x.strip() for x in proto_comma_delit.split(',') ]
143    files = []
144    for (file, files_hash) in cap_hash.items():
145        for proto in files_hash.keys():
146            if proto in protos:
147                files.append(file)
148                break
149
150    return files
151
152def index_file_action(options):
153    return options.list_all_proto or \
154           options.list_all_files or \
155           options.list_all_proto_files or \
156           options.dissect_files
157
158def find_capture_files(paths, cap_hash):
159    cap_files = []
160    for path in paths:
161        if os.path.isdir(path):
162            path = os.path.normpath(path)
163            for root, dirs, files in os.walk(path):
164                cap_files += [os.path.join(root, name) for name in files if os.path.join(root, name) not in cap_hash]
165        elif path not in cap_hash:
166            cap_files.append(path)
167    return cap_files
168
169def find_tshark_executable(bin_dir):
170    for file in ["tshark.exe", "tshark"]:
171        tshark = os.path.join(bin_dir, file)
172        if os.access(tshark, os.X_OK):
173            return tshark
174
175    return None
176
177def main():
178    parser = OptionParser(usage="usage: %prog [options] index_file [file_1|dir_1 [.. file_n|dir_n]]")
179    parser.add_option("-d", "--dissect-files", dest="dissect_files", default=False, action="store_true",
180                      help="Dissect all matching files")
181    parser.add_option("-m", "--max-files", dest="max_files", default=sys.maxsize, type="int",
182                      help="Max number of files to process")
183    parser.add_option("-b", "--binary-dir", dest="bin_dir", default=os.getcwd(),
184                      help="Directory containing tshark executable")
185    parser.add_option("-c", "--compare-dir", dest="compare_dir", default=None,
186                      help="Directory containing tshark executable which is used for comparison")
187    parser.add_option("-j", dest="num_procs", default=multiprocessing.cpu_count(), type=int,
188                      help="Max number of processes to spawn")
189    parser.add_option("-r", "--randomize", default=False, action="store_true",
190                      help="Randomize the file list order")
191    parser.add_option("", "--list-all-proto", dest="list_all_proto", default=False, action="store_true",
192                      help="List all protocols in index file")
193    parser.add_option("", "--list-all-files", dest="list_all_files", default=False, action="store_true",
194                      help="List all files in index file")
195    parser.add_option("", "--list-all-proto-files", dest="list_all_proto_files", default=False,
196                      metavar="PROTO_1[, .. PROTO_N]",
197                      help="List all files in index file containing the given protocol")
198
199    (options, args) = parser.parse_args()
200
201    if len(args) == 0:
202        parser.error("index_file is a required argument")
203
204    if len(args) == 1 and not index_file_action(options):
205        parser.error("one capture file/directory must be specified")
206
207    if options.dissect_files and not options.list_all_files and not options.list_all_proto_files:
208        parser.error("--list-all-files or --list-all-proto-files must be specified")
209
210    if options.dissect_files and not options.compare_dir is None:
211        parser.error("--dissect-files and --compare-dir cannot be specified at the same time")
212
213    index_file_name = args.pop(0)
214    paths = args
215    cap_hash = {}
216    try:
217        index_file = open(index_file_name, "rb")
218        print("index file: %s [OPENED]" % index_file.name)
219        cap_hash = pickle.load(index_file)
220        index_file.close()
221        print("%d files" % len(cap_hash))
222    except IOError:
223        print("index file: %s [NEW]" % index_file_name)
224
225    if options.list_all_proto:
226        print(list_all_proto(cap_hash))
227        exit(0)
228
229    indexed_files = []
230    if options.list_all_files:
231        indexed_files = list_all_files(cap_hash)
232        print(indexed_files)
233
234    if options.list_all_proto_files:
235        indexed_files = list_all_proto_files(cap_hash, options.list_all_proto_files)
236        print(indexed_files)
237
238    tshark_bin = find_tshark_executable(options.bin_dir)
239    if not tshark_bin is None:
240        print("tshark: %s [FOUND]" % tshark_bin)
241    else:
242        print("tshark: %s [MISSING]" % tshark_bin)
243        exit(1)
244
245    if not options.compare_dir is None:
246        tshark_cmp = find_tshark_executable(options.compare_dir)
247        if not tshark_cmp is None:
248            print("tshark: %s [FOUND]" % tshark_cmp)
249        else:
250            print("tshark: %s [MISSING]" % tshark_cmp)
251            exit(1)
252
253    if options.dissect_files or options.compare_dir:
254        cap_files = indexed_files
255    elif options.list_all_proto_files or options.list_all_files:
256        exit(0)
257    else:
258        cap_files = find_capture_files(paths, cap_hash)
259
260    if options.randomize:
261        random.shuffle(cap_files)
262    else:
263        cap_files.sort()
264
265    options.max_files = min(options.max_files, len(cap_files))
266    print("%u total files, %u working files" % (len(cap_files), options.max_files))
267    cap_files = cap_files[:options.max_files]
268    if options.compare_dir or options.dissect_files:
269        tmpdir = tempfile.mkdtemp()
270        print("Temporary working dir: %s" % tmpdir)
271    try:
272        if options.compare_dir:
273            compare_files(tshark_bin, tmpdir, tshark_cmp, options.num_procs, options.max_files, cap_files)
274        elif options.dissect_files:
275            dissect_files(tshark_bin, tmpdir, options.num_procs, options.max_files, cap_files)
276        else:
277            extract_protos_from_file(tshark_bin, options.num_procs, options.max_files, cap_files, cap_hash, index_file_name)
278    finally:
279        # Dissection may result in a non-empty directory.
280        if options.compare_dir:
281            os.rmdir(tmpdir)
282if __name__ == "__main__":
283    main()
284