1# -*- coding: utf-8 -*-
2#
3# gPodder - A media aggregator and podcast client
4# Copyright (c) 2005-2018 The gPodder Team
5#
6# gPodder is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 3 of the License, or
9# (at your option) any later version.
10#
11# gPodder is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program.  If not, see <http://www.gnu.org/licenses/>.
18#
19
20#
21#  gpodder.escapist - Escapist Videos download magic
22#  somini <somini29@yandex.com>; 2014-09-14
23#
24
25
26import json
27import logging
28import re
29import urllib.error
30import urllib.parse
31import urllib.request
32
33import gpodder
34from gpodder import registry, util
35
36logger = logging.getLogger(__name__)
37
38
39# This matches the more reliable URL
40ESCAPIST_NUMBER_RE = re.compile(r'http://www.escapistmagazine.com/videos/view/(\d+)', re.IGNORECASE)
41# This matches regular URL, mainly those that come in the RSS feeds
42ESCAPIST_REGULAR_RE = re.compile(r'http://www.escapistmagazine.com/videos/view/([\w-]+)/(\d+)-', re.IGNORECASE)
43# This finds the RSS for a given URL
44DATA_RSS_RE = re.compile(r'http://www.escapistmagazine.com/rss/videos/list/([1-9][0-9]*)\.xml')
45# This matches the "configuration". The important part is the JSON between the parens
46DATA_CONFIG_RE = re.compile(r'imsVideo\.play\((.*)\)\;\<\/script\>', re.IGNORECASE)
47# This matches the cover art for an RSS. We shouldn't parse XML with regex.
48DATA_COVERART_RE = re.compile(r'<url>(http:.+\.jpg)</url>')
49
50
51class EscapistError(BaseException): pass
52
53
54@registry.download_url.register
55def escapist_real_download_url(unused_config, episode, allow_partial):
56    res = get_real_download_url(episode.url)
57    return None if res == episode.url else res
58
59
60def get_real_download_url(url):
61    video_id = get_escapist_id(url)
62    if video_id is None:
63        return url
64
65    web_data = get_escapist_web(video_id)
66
67    data_config_frag = DATA_CONFIG_RE.search(web_data)
68
69    data_config_url = get_escapist_config_url(data_config_frag.group(1))
70
71    if data_config_url is None:
72        raise EscapistError('Cannot parse configuration from the site')
73
74    logger.debug('Config URL: %s', data_config_url)
75
76    data_config_data = util.urlopen(data_config_url).read().decode('utf-8')
77
78    # TODO: This second argument should get a real name
79    real_url = get_escapist_real_url(data_config_data, data_config_frag.group(1))
80
81    if real_url is None:
82        raise EscapistError('Cannot get MP4 URL from The Escapist')
83    elif "sales-marketing/" in real_url:
84        raise EscapistError('Oops, seems The Escapist blocked this IP. Wait a few days/weeks to get it unblocked')
85    else:
86        return real_url
87
88
89def get_escapist_id(url):
90    result = ESCAPIST_NUMBER_RE.match(url)
91    if result is not None:
92        return result.group(1)
93
94    result = ESCAPIST_REGULAR_RE.match(url)
95    if result is not None:
96        return result.group(2)
97
98    return None
99
100
101def is_video_link(url):
102    return (get_escapist_id(url) is not None)
103
104
105def get_real_channel_url(url):
106    video_id = get_escapist_id(url)
107    if video_id is None:
108        return url
109
110    web_data = get_escapist_web(video_id)
111
112    data_config_frag = DATA_RSS_RE.search(web_data)
113    if data_config_frag is None:
114        raise EscapistError('Cannot get RSS URL from The Escapist')
115    return data_config_frag.group(0)
116
117
118def get_real_cover(url):
119    rss_url = get_real_channel_url(url)
120    if rss_url is None:
121        return None
122
123    # FIXME: can I be sure to decode it as utf-8?
124    rss_data = util.urlopen(rss_url).read()
125    rss_data_frag = DATA_COVERART_RE.search(rss_data)
126
127    if rss_data_frag is None:
128        return None
129
130    return rss_data_frag.group(1)
131
132
133def get_escapist_web(video_id):
134    if video_id is None:
135        return None
136
137    # FIXME: must check if it's utf-8
138    web_url = 'http://www.escapistmagazine.com/videos/view/%s' % video_id
139    return util.urlopen(web_url).read()
140
141
142def get_escapist_config_url(data):
143    if data is None:
144        return None
145
146    query_string = urllib.parse.urlencode(json.loads(data))
147
148    return 'http://www.escapistmagazine.com/videos/vidconfig.php?%s' % query_string
149
150
151def get_escapist_real_url(data, config_json):
152    if data is None:
153        return None
154
155    config_data = json.loads(config_json)
156    if config_data is None:
157        return None
158
159    # The data is scrambled, unscramble
160    # Direct port from 'imsVideos.prototype.processRequest' from the file 'ims_videos.min.js'
161
162    one_hash = config_data["hash"]
163    # Turn the string into numbers
164    hash_n = [ord(x) for x in one_hash]
165    # Split the data into 2char strings
166    hex_hashes = [data[x:(x + 2)] for x in range(0, len(data), 2)]
167    # Turn the strings into numbers, considering the hex value
168    num_hashes = [int(h, 16) for h in hex_hashes]
169    # Characters again, from the value
170    # str_hashes = [ unichr(n) for n in num_hashes ]
171
172    # Bitwise XOR num_hashes and the hash
173    result_num = []
174    for idx in range(0, len(num_hashes)):
175        result_num.append(num_hashes[idx] ^ hash_n[idx % len(hash_n)])
176
177    # At last, Numbers back into characters
178    result = ''.join([chr(x) for x in result_num])
179    # A wild JSON appears...
180    # You use "Master Ball"...
181    escapist_cfg = json.loads(result)
182    # It's super effective!
183
184    # TODO: There's a way to choose different video types, for now just pick MP4@480p
185    return escapist_cfg["files"]["videos"][2]["src"]
186