1#!/usr/local/bin/python3.8 2# vim:fileencoding=utf-8 3# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net> 4 5 6import bz2 7import os 8import sys 9from datetime import datetime 10from urllib.request import urlopen 11 12from setup import download_securely 13 14 15def download_from_calibre_server(url): 16 ca = os.path.join(sys.resources_location, 'calibre-ebook-root-CA.crt') 17 with urlopen(url, cafile=ca) as f: 18 return f.read() 19 20 21def filter_ans(ans): 22 return list(filter(None, (x.strip() for x in ans))) 23 24 25def common_user_agents(): 26 print('Getting recent UAs...') 27 raw = download_from_calibre_server('https://code.calibre-ebook.com/ua-popularity') 28 ans = {} 29 for line in bz2.decompress(raw).decode('utf-8').splitlines(): 30 count, ua = line.partition(':')[::2] 31 count = int(count.strip()) 32 ua = ua.strip() 33 if len(ua) > 20: 34 ans[ua] = count 35 return ans, list(sorted(ans, reverse=True, key=ans.__getitem__)) 36 37 38def firefox_versions(): 39 print('Getting firefox versions...') 40 import html5lib 41 raw = download_securely( 42 'https://www.mozilla.org/en-US/firefox/releases/').decode('utf-8') 43 root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) 44 ol = root.xpath('//main[@id="main-content"]/ol')[0] 45 ol.xpath('descendant::li/strong/a[@href]') 46 ans = filter_ans(ol.xpath('descendant::li/strong/a[@href]/text()')) 47 if not ans: 48 raise ValueError('Failed to download list of firefox versions') 49 return ans 50 51 52def chrome_versions(): 53 print('Getting chrome versions...') 54 import html5lib 55 raw = download_securely( 56 'https://en.wikipedia.org/wiki/Google_Chrome_version_history').decode('utf-8') 57 root = html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False) 58 table = root.xpath('//*[@id="mw-content-text"]//tbody')[-1] 59 ans = [] 60 for tr in table.iterchildren('tr'): 61 cells = tuple(tr.iterchildren('td')) 62 if not cells: 63 continue 64 if not cells[2].text or not cells[2].text.strip(): 65 continue 66 s = cells[0].get('style') 67 if '#a0e75a' not in s and 'salmon' not in s: 68 break 69 chrome_version = cells[0].text.strip() 70 ts = datetime.strptime(cells[1].text.strip().split()[ 71 0], '%Y-%m-%d').date().strftime('%Y-%m-%d') 72 try: 73 webkit_version = cells[2].text.strip().split()[1] 74 except IndexError: 75 continue 76 ans.append({'date': ts, 'chrome_version': chrome_version, 77 'webkit_version': webkit_version}) 78 return list(reversed(ans)) 79 80 81def all_desktop_platforms(user_agents): 82 ans = set() 83 for ua in user_agents: 84 if ' Mobile ' not in ua and 'Mobile/' not in ua and ('Firefox/' in ua or 'Chrome/' in ua): 85 plat = ua.partition('(')[2].partition(')')[0] 86 parts = plat.split(';') 87 if 'Firefox/' in ua: 88 del parts[-1] 89 ans.add(';'.join(parts)) 90 return ans 91 92 93def get_data(): 94 ua_freq_map, common = common_user_agents() 95 ans = { 96 'chrome_versions': chrome_versions(), 97 'firefox_versions': firefox_versions(), 98 'common_user_agents': common, 99 'user_agents_popularity': ua_freq_map, 100 'timestamp': datetime.utcnow().isoformat() + '+00:00', 101 } 102 ans['desktop_platforms'] = list(all_desktop_platforms(ans['common_user_agents'])) 103 return ans 104