1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
4
5
6import bz2
7import os
8import sys
9from datetime import datetime
10from urllib.request import urlopen
11
12from setup import download_securely
13
14
15def download_from_calibre_server(url):
16    ca = os.path.join(sys.resources_location, 'calibre-ebook-root-CA.crt')
17    with urlopen(url, cafile=ca) as f:
18        return f.read()
19
20
21def filter_ans(ans):
22    return list(filter(None, (x.strip() for x in ans)))
23
24
25def common_user_agents():
26    print('Getting recent UAs...')
27    raw = download_from_calibre_server('https://code.calibre-ebook.com/ua-popularity')
28    ans = {}
29    for line in bz2.decompress(raw).decode('utf-8').splitlines():
30        count, ua = line.partition(':')[::2]
31        count = int(count.strip())
32        ua = ua.strip()
33        if len(ua) > 20:
34            ans[ua] = count
35    return ans, list(sorted(ans, reverse=True, key=ans.__getitem__))
36
37
38def firefox_versions():
39    print('Getting firefox versions...')
40    import html5lib
41    raw = download_securely(
42        'https://www.mozilla.org/en-US/firefox/releases/').decode('utf-8')
43    root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
44    ol = root.xpath('//main[@id="main-content"]/ol')[0]
45    ol.xpath('descendant::li/strong/a[@href]')
46    ans = filter_ans(ol.xpath('descendant::li/strong/a[@href]/text()'))
47    if not ans:
48        raise ValueError('Failed to download list of firefox versions')
49    return ans
50
51
52def chrome_versions():
53    print('Getting chrome versions...')
54    import html5lib
55    raw = download_securely(
56        'https://en.wikipedia.org/wiki/Google_Chrome_version_history').decode('utf-8')
57    root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False)
58    table = root.xpath('//*[@id="mw-content-text"]//tbody')[-1]
59    ans = []
60    for tr in table.iterchildren('tr'):
61        cells = tuple(tr.iterchildren('td'))
62        if not cells:
63            continue
64        if not cells[2].text or not cells[2].text.strip():
65            continue
66        s = cells[0].get('style')
67        if '#a0e75a' not in s and 'salmon' not in s:
68            break
69        chrome_version = cells[0].text.strip()
70        ts = datetime.strptime(cells[1].text.strip().split()[
71                               0], '%Y-%m-%d').date().strftime('%Y-%m-%d')
72        try:
73            webkit_version = cells[2].text.strip().split()[1]
74        except IndexError:
75            continue
76        ans.append({'date': ts, 'chrome_version': chrome_version,
77                    'webkit_version': webkit_version})
78    return list(reversed(ans))
79
80
81def all_desktop_platforms(user_agents):
82    ans = set()
83    for ua in user_agents:
84        if ' Mobile ' not in ua and 'Mobile/' not in ua and ('Firefox/' in ua or 'Chrome/' in ua):
85            plat = ua.partition('(')[2].partition(')')[0]
86            parts = plat.split(';')
87            if 'Firefox/' in ua:
88                del parts[-1]
89            ans.add(';'.join(parts))
90    return ans
91
92
93def get_data():
94    ua_freq_map, common = common_user_agents()
95    ans = {
96        'chrome_versions': chrome_versions(),
97        'firefox_versions': firefox_versions(),
98        'common_user_agents': common,
99        'user_agents_popularity': ua_freq_map,
100        'timestamp': datetime.utcnow().isoformat() + '+00:00',
101    }
102    ans['desktop_platforms'] = list(all_desktop_platforms(ans['common_user_agents']))
103    return ans
104