1#!/usr/bin/env python3
2#
3# Parses the CSV version of the IANA Service Name and Transport Protocol Port Number Registry
4# and generates a services(5) file.
5#
6# Wireshark - Network traffic analyzer
7# By Gerald Combs <gerald@wireshark.org>
8# Copyright 2013 Gerald Combs
9#
10# SPDX-License-Identifier: GPL-2.0-or-later
11
12iana_svc_url = 'https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.csv'
13
14__doc__ = '''\
15Usage: make-services.py [url]
16
17url defaults to
18    %s
19''' % (iana_svc_url)
20
21import sys
22import getopt
23import csv
24import re
25import collections
26import urllib.request, urllib.error, urllib.parse
27import codecs
28
29services_file = 'services'
30
31exclude_services = [
32    '^spr-itunes',
33    '^spl-itunes',
34    '^shilp',
35    ]
36
37min_source_lines = 14000 # Size was ~ 14800 on 2017-07-20
38
39def parse_port(port_str):
40
41    p = port_str.split('-')
42    try:
43        if len(p) == 1:
44            return tuple([int(p[0])])
45        if len(p) == 2:
46            return tuple([int(p[0]), int(p[1])])
47    except ValueError:
48        pass
49    return ()
50
51def port_to_str(port):
52    if len(port) == 2:
53        return str(port[0]) + '-' + str(port[1])
54    return str(port[0])
55
56def parse_rows(svc_fd):
57    port_reader = csv.reader(svc_fd)
58    count = 0
59
60    # Header positions as of 2013-08-06
61    headers = next(port_reader)
62
63    try:
64        sn_pos = headers.index('Service Name')
65    except Exception:
66        sn_pos = 0
67    try:
68        pn_pos = headers.index('Port Number')
69    except Exception:
70        pn_pos = 1
71    try:
72        tp_pos = headers.index('Transport Protocol')
73    except Exception:
74        tp_pos = 2
75    try:
76        desc_pos = headers.index('Description')
77    except Exception:
78        desc_pos = 3
79
80    services_map = {}
81
82    for row in port_reader:
83        service = row[sn_pos]
84        port = parse_port(row[pn_pos])
85        proto = row[tp_pos]
86        description = row[desc_pos]
87        count += 1
88
89        if len(service) < 1 or not port or len(proto) < 1:
90            continue
91
92        if re.search('|'.join(exclude_services), service):
93            continue
94
95        # max 15 chars
96        service = service[:15].rstrip()
97
98        # replace blanks (for some non-standard long names)
99        service = service.replace(" ", "-")
100
101        description = description.replace("\n", "")
102        description = re.sub("IANA assigned this well-formed service .+$", "", description)
103        description = re.sub("  +", " ", description)
104        description = description.strip()
105        if description == service or description == service.replace("-", " "):
106            description = None
107
108        if not port in services_map:
109            services_map[port] = collections.OrderedDict()
110
111        # Remove some duplicates (first entry wins)
112        proto_exists = False
113        for k in services_map[port].keys():
114            if proto in services_map[port][k]:
115                proto_exists = True
116                break
117        if proto_exists:
118            continue
119
120        if not service in services_map[port]:
121            services_map[port][service] = [description]
122        services_map[port][service].append(proto)
123
124    if count < min_source_lines:
125        exit_msg('Not enough parsed data')
126
127    return services_map
128
129def write_body(d, f):
130    keys = list(d.keys())
131    keys.sort()
132
133    for port in keys:
134        for serv in d[port].keys():
135            sep = "\t" * (1 + abs((15 - len(serv)) // 8))
136            port_str = port_to_str(port) + "/" + "/".join(d[port][serv][1:])
137            line = serv + sep + port_str
138            description = d[port][serv][0]
139            if description:
140                sep = "\t"
141                if len(port_str) < 8:
142                    sep *= 2
143                line += sep + "# " + description
144            line += "\n"
145            f.write(line)
146
147def exit_msg(msg=None, status=1):
148    if msg is not None:
149        sys.stderr.write(msg + '\n\n')
150    sys.stderr.write(__doc__ + '\n')
151    sys.exit(status)
152
153def main(argv):
154    if sys.version_info[0] < 3:
155        print("This requires Python 3")
156        sys.exit(2)
157
158    try:
159        opts, _ = getopt.getopt(argv, "h", ["help"])
160    except getopt.GetoptError:
161        exit_msg()
162    for opt, _ in opts:
163        if opt in ("-h", "--help"):
164            exit_msg(None, 0)
165
166    if (len(argv) > 0):
167        svc_url = argv[0]
168    else:
169        svc_url = iana_svc_url
170
171    try:
172        if not svc_url.startswith('http'):
173            svc_fd = open(svc_url)
174        else:
175            req = urllib.request.urlopen(svc_url)
176            svc_fd = codecs.getreader('utf8')(req)
177    except Exception:
178        exit_msg('Error opening ' + svc_url)
179
180    body = parse_rows(svc_fd)
181
182    out = open(services_file, 'w')
183    out.write('''\
184# This is a local copy of the IANA port-numbers file.
185#
186# Wireshark uses it to resolve port numbers into human readable
187# service names, e.g. TCP port 80 -> http.
188#
189# It is subject to copyright and being used with IANA's permission:
190# https://www.wireshark.org/lists/wireshark-dev/200708/msg00160.html
191#
192# The original file can be found at:
193# %s
194#
195# The format is the same as that used for services(5). It is allowed to merge
196# identical protocols, for example:
197#   foo 64/tcp
198#   foo 64/udp
199# becomes
200#   foo 64/tcp/udp
201#
202
203''' % (iana_svc_url))
204
205    write_body(body, out)
206
207    out.close()
208
209if __name__ == "__main__":
210    sys.exit(main(sys.argv[1:]))
211