1#!/usr/bin/env python3
2#
3# make-usb - Creates a file containing vendor and product ids.
4# It use the databases from
5# https://usb-ids.gowdy.us/
6# to create our file epan/dissectors/usb.c
7#
8# It also uses the values culled out of libgphoto2 using usb-ptp-extract-models.pl
9
10import re
11import sys
12import urllib.request, urllib.error, urllib.parse
13
14MODE_IDLE           = 0
15MODE_VENDOR_PRODUCT = 1
16MIN_VENDORS = 3400 # 3409 as of 2020-11-15
17MIN_PRODUCTS = 20000 # 20361 as of 2020-11-15
18
19mode = MODE_IDLE
20
21# The canonical location for the usb.ids file is http://www.linux-usb.org/usb.ids.
22# As of November 2020 that site isn't available over HTTPS. Use what appears to
23# be the source code repository for the site.
24req_headers = { 'User-Agent': 'Wireshark make-usb' }
25req = urllib.request.Request('https://sourceforge.net/p/linux-usb/repo/HEAD/tree/trunk/htdocs/usb.ids?format=raw', headers=req_headers)
26response = urllib.request.urlopen(req)
27lines = response.read().decode('UTF-8', 'replace').splitlines()
28
29vendors  = dict()
30products = dict()
31vendors_str="static const value_string usb_vendors_vals[] = {\n"
32products_str="static const value_string usb_products_vals[] = {\n"
33
34# Escape backslashes, quotes, control characters and non-ASCII characters.
35escapes = {}
36for i in range(256):
37    if i in b'\\"':
38        escapes[i] = '\\%c' % i
39    elif i in range(0x20, 0x80) or i in b'\t':
40        escapes[i] = chr(i)
41    else:
42        escapes[i] = '\\%03o' % i
43
44for utf8line in lines:
45    # Convert single backslashes to double (escaped) backslashes, escape quotes, etc.
46    utf8line = utf8line.rstrip()
47    utf8line = re.sub("\?+", "?", utf8line)
48    line = ''.join(escapes[byte] for byte in utf8line.encode('utf8'))
49
50    if line == "# Vendors, devices and interfaces. Please keep sorted.":
51        mode = MODE_VENDOR_PRODUCT
52        continue
53    elif line == "# List of known device classes, subclasses and protocols":
54        mode = MODE_IDLE
55        continue
56
57    if mode == MODE_VENDOR_PRODUCT:
58        if re.match("^[0-9a-f]{4}", line):
59            last_vendor=line[:4]
60            vendors[last_vendor] = line[4:].strip()
61        elif re.match("^\t[0-9a-f]{4}", line):
62            line = line.strip()
63            product = "%s%s"%(last_vendor, line[:4])
64            products[product] = line[4:].strip()
65
66
67# Grab from libgphoto (indirectly through tools/usb-ptp-extract-models.pl)
68u = open('tools/usb-ptp-extract-models.txt','r')
69for line in u.readlines():
70    fields=line.split()
71    products[fields[0]]= ' '.join(fields[1:])
72
73if (len(vendors) < MIN_VENDORS):
74    sys.stderr.write("Not enough vendors: %d\n" % len(vendors))
75    sys.exit(1)
76
77if (len(products) < MIN_PRODUCTS):
78    sys.stderr.write("Not enough products: %d\n" % len(products))
79    sys.exit(1)
80
81for v in sorted(vendors):
82    vendors_str += "    { 0x%s, \"%s\" },\n"%(v,vendors[v])
83
84vendors_str += """    { 0, NULL }\n};
85value_string_ext ext_usb_vendors_vals = VALUE_STRING_EXT_INIT(usb_vendors_vals);
86"""
87
88for p in sorted(products):
89    products_str += "    { 0x%s, \"%s\" },\n"%(p,products[p])
90
91products_str += """    { 0, NULL }\n};
92value_string_ext ext_usb_products_vals = VALUE_STRING_EXT_INIT(usb_products_vals);
93"""
94
95header="""/* usb.c
96 * USB vendor id and product ids
97 * This file was generated by running python ./tools/make-usb.py
98 * Don't change it directly.
99 *
100 * Copyright 2012, Michal Labedzki for Tieto Corporation
101 *
102 * Other values imported from libghoto2/camlibs/ptp2/library.c, music-players.h
103 *
104 * Copyright (C) 2001-2005 Mariusz Woloszyn <emsi@ipartners.pl>
105 * Copyright (C) 2003-2013 Marcus Meissner <marcus@jet.franken.de>
106 * Copyright (C) 2005 Hubert Figuiere <hfiguiere@teaser.fr>
107 * Copyright (C) 2009 Axel Waggershauser <awagger@web.de>
108 * Copyright (C) 2005-2007 Richard A. Low <richard@wentnet.com>
109 * Copyright (C) 2005-2012 Linus Walleij <triad@df.lth.se>
110 * Copyright (C) 2007 Ted Bullock
111 * Copyright (C) 2012 Sony Mobile Communications AB
112 *
113 * Wireshark - Network traffic analyzer
114 * By Gerald Combs <gerald@wireshark.org>
115 * Copyright 1998 Gerald Combs
116 *
117 * SPDX-License-Identifier: GPL-2.0-or-later
118 */
119
120/*
121 * XXX We should probably parse a USB ID file at program start instead
122 * of generating this file.
123 */
124
125#include "config.h"
126#include <epan/packet.h>
127"""
128
129f = open('epan/dissectors/usb.c', 'w')
130f.write(header)
131f.write("\n")
132f.write(vendors_str)
133f.write("\n\n")
134f.write(products_str)
135f.write("\n")
136f.close()
137
138print("Success!")
139