1# -*- coding: utf-8 -*- 2# 3# gPodder - A media aggregator and podcast client 4# Copyright (c) 2005-2018 The gPodder Team 5# 6# gPodder is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 3 of the License, or 9# (at your option) any later version. 10# 11# gPodder is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program. If not, see <http://www.gnu.org/licenses/>. 18# 19 20# 21# gpodder.escapist - Escapist Videos download magic 22# somini <somini29@yandex.com>; 2014-09-14 23# 24 25 26import json 27import logging 28import re 29import urllib.error 30import urllib.parse 31import urllib.request 32 33import gpodder 34from gpodder import registry, util 35 36logger = logging.getLogger(__name__) 37 38 39# This matches the more reliable URL 40ESCAPIST_NUMBER_RE = re.compile(r'http://www.escapistmagazine.com/videos/view/(\d+)', re.IGNORECASE) 41# This matches regular URL, mainly those that come in the RSS feeds 42ESCAPIST_REGULAR_RE = re.compile(r'http://www.escapistmagazine.com/videos/view/([\w-]+)/(\d+)-', re.IGNORECASE) 43# This finds the RSS for a given URL 44DATA_RSS_RE = re.compile(r'http://www.escapistmagazine.com/rss/videos/list/([1-9][0-9]*)\.xml') 45# This matches the "configuration". The important part is the JSON between the parens 46DATA_CONFIG_RE = re.compile(r'imsVideo\.play\((.*)\)\;\<\/script\>', re.IGNORECASE) 47# This matches the cover art for an RSS. We shouldn't parse XML with regex. 48DATA_COVERART_RE = re.compile(r'<url>(http:.+\.jpg)</url>') 49 50 51class EscapistError(BaseException): pass 52 53 54@registry.download_url.register 55def escapist_real_download_url(unused_config, episode, allow_partial): 56 res = get_real_download_url(episode.url) 57 return None if res == episode.url else res 58 59 60def get_real_download_url(url): 61 video_id = get_escapist_id(url) 62 if video_id is None: 63 return url 64 65 web_data = get_escapist_web(video_id) 66 67 data_config_frag = DATA_CONFIG_RE.search(web_data) 68 69 data_config_url = get_escapist_config_url(data_config_frag.group(1)) 70 71 if data_config_url is None: 72 raise EscapistError('Cannot parse configuration from the site') 73 74 logger.debug('Config URL: %s', data_config_url) 75 76 data_config_data = util.urlopen(data_config_url).read().decode('utf-8') 77 78 # TODO: This second argument should get a real name 79 real_url = get_escapist_real_url(data_config_data, data_config_frag.group(1)) 80 81 if real_url is None: 82 raise EscapistError('Cannot get MP4 URL from The Escapist') 83 elif "sales-marketing/" in real_url: 84 raise EscapistError('Oops, seems The Escapist blocked this IP. Wait a few days/weeks to get it unblocked') 85 else: 86 return real_url 87 88 89def get_escapist_id(url): 90 result = ESCAPIST_NUMBER_RE.match(url) 91 if result is not None: 92 return result.group(1) 93 94 result = ESCAPIST_REGULAR_RE.match(url) 95 if result is not None: 96 return result.group(2) 97 98 return None 99 100 101def is_video_link(url): 102 return (get_escapist_id(url) is not None) 103 104 105def get_real_channel_url(url): 106 video_id = get_escapist_id(url) 107 if video_id is None: 108 return url 109 110 web_data = get_escapist_web(video_id) 111 112 data_config_frag = DATA_RSS_RE.search(web_data) 113 if data_config_frag is None: 114 raise EscapistError('Cannot get RSS URL from The Escapist') 115 return data_config_frag.group(0) 116 117 118def get_real_cover(url): 119 rss_url = get_real_channel_url(url) 120 if rss_url is None: 121 return None 122 123 # FIXME: can I be sure to decode it as utf-8? 124 rss_data = util.urlopen(rss_url).read() 125 rss_data_frag = DATA_COVERART_RE.search(rss_data) 126 127 if rss_data_frag is None: 128 return None 129 130 return rss_data_frag.group(1) 131 132 133def get_escapist_web(video_id): 134 if video_id is None: 135 return None 136 137 # FIXME: must check if it's utf-8 138 web_url = 'http://www.escapistmagazine.com/videos/view/%s' % video_id 139 return util.urlopen(web_url).read() 140 141 142def get_escapist_config_url(data): 143 if data is None: 144 return None 145 146 query_string = urllib.parse.urlencode(json.loads(data)) 147 148 return 'http://www.escapistmagazine.com/videos/vidconfig.php?%s' % query_string 149 150 151def get_escapist_real_url(data, config_json): 152 if data is None: 153 return None 154 155 config_data = json.loads(config_json) 156 if config_data is None: 157 return None 158 159 # The data is scrambled, unscramble 160 # Direct port from 'imsVideos.prototype.processRequest' from the file 'ims_videos.min.js' 161 162 one_hash = config_data["hash"] 163 # Turn the string into numbers 164 hash_n = [ord(x) for x in one_hash] 165 # Split the data into 2char strings 166 hex_hashes = [data[x:(x + 2)] for x in range(0, len(data), 2)] 167 # Turn the strings into numbers, considering the hex value 168 num_hashes = [int(h, 16) for h in hex_hashes] 169 # Characters again, from the value 170 # str_hashes = [ unichr(n) for n in num_hashes ] 171 172 # Bitwise XOR num_hashes and the hash 173 result_num = [] 174 for idx in range(0, len(num_hashes)): 175 result_num.append(num_hashes[idx] ^ hash_n[idx % len(hash_n)]) 176 177 # At last, Numbers back into characters 178 result = ''.join([chr(x) for x in result_num]) 179 # A wild JSON appears... 180 # You use "Master Ball"... 181 escapist_cfg = json.loads(result) 182 # It's super effective! 183 184 # TODO: There's a way to choose different video types, for now just pick MP4@480p 185 return escapist_cfg["files"]["videos"][2]["src"] 186