1# calculate_distance script
2# Calculates and prints the distance between two or more MAEC Malware Subjects
3# NOTE: This code imports and uses the maec.analytics.distance module, which uses the external numpy library.
4# Numpy can be found here: https://pypi.python.org/pypi/numpy
5
6# Copyright (c) 2018, The MITRE Corporation. All rights reserved.
7# See LICENSE.txt for complete terms.
8
9import os
10import maec
11import argparse
12from maec.analytics.distance import Distance
13from maec.package.package import Package
14
15def main():
16    # Setup the argument parser
17    parser = argparse.ArgumentParser(description="MAEC Distance Calculation script")
18    group = parser.add_mutually_exclusive_group()
19    group.add_argument("-l", "-list", nargs="+", help="a space separated list of MAEC Package files to calculate the distances for")
20    group.add_argument("-d", "-directory", help="the path to a directory of MAEC Package files to calculate the distances for")
21    parser.add_argument("--only_static", "--only_static", help="use only static features in the distance calculation", action="store_true")
22    parser.add_argument("--only_dynamic", "--only_dynamic", help="use only dynamic features (Actions) in the distance calculation", action="store_true")
23    parser.add_argument("output", help="the name of the CSV file to which the calculated distances will be written")
24    args = parser.parse_args()
25    package_list = []
26
27    # Parse the input files
28    if args.l:
29        for file in args.l:
30            api_obj = maec.parse_xml_instance(file)['api']
31            if isinstance(api_obj, Package):
32                package_list.append(api_obj)
33    elif args.d:
34        for filename in os.listdir(args.d):
35            if '.xml' not in filename:
36                pass
37            else:
38                api_obj = maec.parse_xml_instance(os.path.join(args.d, filename))['api']
39                if isinstance(api_obj, Package):
40                    package_list.append(api_obj)
41
42    # Perform the distance calculation
43    dist = Distance(package_list)
44    # Set the particular features that will be used
45    if args.only_static:
46        dist.options_dict['use_dynamic_features'] = False
47    if args.only_dynamic:
48        dist.options_dict['use_static_features'] = False
49    dist.calculate()
50    # Write the results to the specified CSV file
51    out_file = open(args.output, mode='w')
52    dist.print_distances(out_file)
53    out_file.close()
54
55
56if __name__ == "__main__":
57    main()