1# coding: utf-8
2from __future__ import unicode_literals
3
4import itertools
5import re
6
7from .common import (
8    InfoExtractor,
9    SearchInfoExtractor
10)
11from ..compat import (
12    compat_HTTPError,
13    compat_kwargs,
14    compat_str,
15    compat_urlparse,
16)
17from ..utils import (
18    error_to_compat_str,
19    ExtractorError,
20    float_or_none,
21    HEADRequest,
22    int_or_none,
23    KNOWN_EXTENSIONS,
24    mimetype2ext,
25    str_or_none,
26    try_get,
27    unified_timestamp,
28    update_url_query,
29    url_or_none,
30    urlhandle_detect_ext,
31)
32
33
34class SoundcloudEmbedIE(InfoExtractor):
35    _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
36    _TEST = {
37        # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
38        'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
39        'only_matching': True,
40    }
41
42    @staticmethod
43    def _extract_urls(webpage):
44        return [m.group('url') for m in re.finditer(
45            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
46            webpage)]
47
48    def _real_extract(self, url):
49        query = compat_urlparse.parse_qs(
50            compat_urlparse.urlparse(url).query)
51        api_url = query['url'][0]
52        secret_token = query.get('secret_token')
53        if secret_token:
54            api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
55        return self.url_result(api_url)
56
57
58class SoundcloudIE(InfoExtractor):
59    """Information extractor for soundcloud.com
60       To access the media, the uid of the song and a stream token
61       must be extracted from the page source and the script must make
62       a request to media.soundcloud.com/crossdomain.xml. Then
63       the media can be grabbed by requesting from an url composed
64       of the stream token and uid
65     """
66
67    _VALID_URL = r'''(?x)^(?:https?://)?
68                    (?:(?:(?:www\.|m\.)?soundcloud\.com/
69                            (?!stations/track)
70                            (?P<uploader>[\w\d-]+)/
71                            (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
72                            (?P<title>[\w\d-]+)/?
73                            (?P<token>[^?]+?)?(?:[?].*)?$)
74                       |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
75                          (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
76                    )
77                    '''
78    IE_NAME = 'soundcloud'
79    _TESTS = [
80        {
81            'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
82            'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
83            'info_dict': {
84                'id': '62986583',
85                'ext': 'mp3',
86                'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
87                'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
88                'uploader': 'E.T. ExTerrestrial Music',
89                'uploader_id': '1571244',
90                'timestamp': 1349920598,
91                'upload_date': '20121011',
92                'duration': 143.216,
93                'license': 'all-rights-reserved',
94                'view_count': int,
95                'like_count': int,
96                'comment_count': int,
97                'repost_count': int,
98            }
99        },
100        # geo-restricted
101        {
102            'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
103            'info_dict': {
104                'id': '47127627',
105                'ext': 'mp3',
106                'title': 'Goldrushed',
107                'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
108                'uploader': 'The Royal Concept',
109                'uploader_id': '9615865',
110                'timestamp': 1337635207,
111                'upload_date': '20120521',
112                'duration': 227.155,
113                'license': 'all-rights-reserved',
114                'view_count': int,
115                'like_count': int,
116                'comment_count': int,
117                'repost_count': int,
118            },
119        },
120        # private link
121        {
122            'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
123            'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
124            'info_dict': {
125                'id': '123998367',
126                'ext': 'mp3',
127                'title': 'Youtube - Dl Test Video \'\' Ä↭',
128                'description': 'test chars:  \"\'/\\ä↭',
129                'uploader': 'jaimeMF',
130                'uploader_id': '69767071',
131                'timestamp': 1386604920,
132                'upload_date': '20131209',
133                'duration': 9.927,
134                'license': 'all-rights-reserved',
135                'view_count': int,
136                'like_count': int,
137                'comment_count': int,
138                'repost_count': int,
139            },
140        },
141        # private link (alt format)
142        {
143            'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
144            'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
145            'info_dict': {
146                'id': '123998367',
147                'ext': 'mp3',
148                'title': 'Youtube - Dl Test Video \'\' Ä↭',
149                'description': 'test chars:  \"\'/\\ä↭',
150                'uploader': 'jaimeMF',
151                'uploader_id': '69767071',
152                'timestamp': 1386604920,
153                'upload_date': '20131209',
154                'duration': 9.927,
155                'license': 'all-rights-reserved',
156                'view_count': int,
157                'like_count': int,
158                'comment_count': int,
159                'repost_count': int,
160            },
161        },
162        # downloadable song
163        {
164            'url': 'https://soundcloud.com/oddsamples/bus-brakes',
165            'md5': '7624f2351f8a3b2e7cd51522496e7631',
166            'info_dict': {
167                'id': '128590877',
168                'ext': 'mp3',
169                'title': 'Bus Brakes',
170                'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
171                'uploader': 'oddsamples',
172                'uploader_id': '73680509',
173                'timestamp': 1389232924,
174                'upload_date': '20140109',
175                'duration': 17.346,
176                'license': 'cc-by-sa',
177                'view_count': int,
178                'like_count': int,
179                'comment_count': int,
180                'repost_count': int,
181            },
182        },
183        # private link, downloadable format
184        {
185            'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
186            'md5': '64a60b16e617d41d0bef032b7f55441e',
187            'info_dict': {
188                'id': '340344461',
189                'ext': 'wav',
190                'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
191                'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
192                'uploader': 'Ori Uplift Music',
193                'uploader_id': '12563093',
194                'timestamp': 1504206263,
195                'upload_date': '20170831',
196                'duration': 7449.096,
197                'license': 'all-rights-reserved',
198                'view_count': int,
199                'like_count': int,
200                'comment_count': int,
201                'repost_count': int,
202            },
203        },
204        # no album art, use avatar pic for thumbnail
205        {
206            'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
207            'md5': '59c7872bc44e5d99b7211891664760c2',
208            'info_dict': {
209                'id': '309699954',
210                'ext': 'mp3',
211                'title': 'Sideways (Prod. Mad Real)',
212                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
213                'uploader': 'garyvee',
214                'uploader_id': '2366352',
215                'timestamp': 1488152409,
216                'upload_date': '20170226',
217                'duration': 207.012,
218                'thumbnail': r're:https?://.*\.jpg',
219                'license': 'all-rights-reserved',
220                'view_count': int,
221                'like_count': int,
222                'comment_count': int,
223                'repost_count': int,
224            },
225            'params': {
226                'skip_download': True,
227            },
228        },
229        {
230            'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
231            'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
232            'info_dict': {
233                'id': '583011102',
234                'ext': 'mp3',
235                'title': 'Mezzo Valzer',
236                'description': 'md5:4138d582f81866a530317bae316e8b61',
237                'uploader': 'Micronie',
238                'uploader_id': '3352531',
239                'timestamp': 1551394171,
240                'upload_date': '20190228',
241                'duration': 180.157,
242                'thumbnail': r're:https?://.*\.jpg',
243                'license': 'all-rights-reserved',
244                'view_count': int,
245                'like_count': int,
246                'comment_count': int,
247                'repost_count': int,
248            },
249        },
250        {
251            # with AAC HQ format available via OAuth token
252            'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
253            'only_matching': True,
254        },
255    ]
256
257    _API_V2_BASE = 'https://api-v2.soundcloud.com/'
258    _BASE_URL = 'https://soundcloud.com/'
259    _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
260
261    _ARTWORK_MAP = {
262        'mini': 16,
263        'tiny': 20,
264        'small': 32,
265        'badge': 47,
266        't67x67': 67,
267        'large': 100,
268        't300x300': 300,
269        'crop': 400,
270        't500x500': 500,
271        'original': 0,
272    }
273
274    def _store_client_id(self, client_id):
275        self._downloader.cache.store('soundcloud', 'client_id', client_id)
276
277    def _update_client_id(self):
278        webpage = self._download_webpage('https://soundcloud.com/', None)
279        for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
280            script = self._download_webpage(src, None, fatal=False)
281            if script:
282                client_id = self._search_regex(
283                    r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
284                    script, 'client id', default=None)
285                if client_id:
286                    self._CLIENT_ID = client_id
287                    self._store_client_id(client_id)
288                    return
289        raise ExtractorError('Unable to extract client id')
290
291    def _download_json(self, *args, **kwargs):
292        non_fatal = kwargs.get('fatal') is False
293        if non_fatal:
294            del kwargs['fatal']
295        query = kwargs.get('query', {}).copy()
296        for _ in range(2):
297            query['client_id'] = self._CLIENT_ID
298            kwargs['query'] = query
299            try:
300                return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
301            except ExtractorError as e:
302                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
303                    self._store_client_id(None)
304                    self._update_client_id()
305                    continue
306                elif non_fatal:
307                    self._downloader.report_warning(error_to_compat_str(e))
308                    return False
309                raise
310
311    def _real_initialize(self):
312        self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
313
314    @classmethod
315    def _resolv_url(cls, url):
316        return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
317
318    def _extract_info_dict(self, info, full_title=None, secret_token=None):
319        track_id = compat_str(info['id'])
320        title = info['title']
321
322        format_urls = set()
323        formats = []
324        query = {'client_id': self._CLIENT_ID}
325        if secret_token:
326            query['secret_token'] = secret_token
327
328        if info.get('downloadable') and info.get('has_downloads_left'):
329            download_url = update_url_query(
330                self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
331            redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
332            if redirect_url:
333                urlh = self._request_webpage(
334                    HEADRequest(redirect_url), track_id, fatal=False)
335                if urlh:
336                    format_url = urlh.geturl()
337                    format_urls.add(format_url)
338                    formats.append({
339                        'format_id': 'download',
340                        'ext': urlhandle_detect_ext(urlh) or 'mp3',
341                        'filesize': int_or_none(urlh.headers.get('Content-Length')),
342                        'url': format_url,
343                        'preference': 10,
344                    })
345
346        def invalid_url(url):
347            return not url or url in format_urls
348
349        def add_format(f, protocol, is_preview=False):
350            mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
351            if mobj:
352                for k, v in mobj.groupdict().items():
353                    if not f.get(k):
354                        f[k] = v
355            format_id_list = []
356            if protocol:
357                format_id_list.append(protocol)
358            ext = f.get('ext')
359            if ext == 'aac':
360                f['abr'] = '256'
361            for k in ('ext', 'abr'):
362                v = f.get(k)
363                if v:
364                    format_id_list.append(v)
365            preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
366            if preview:
367                format_id_list.append('preview')
368            abr = f.get('abr')
369            if abr:
370                f['abr'] = int(abr)
371            if protocol == 'hls':
372                protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
373            else:
374                protocol = 'http'
375            f.update({
376                'format_id': '_'.join(format_id_list),
377                'protocol': protocol,
378                'preference': -10 if preview else None,
379            })
380            formats.append(f)
381
382        # New API
383        transcodings = try_get(
384            info, lambda x: x['media']['transcodings'], list) or []
385        for t in transcodings:
386            if not isinstance(t, dict):
387                continue
388            format_url = url_or_none(t.get('url'))
389            if not format_url:
390                continue
391            stream = self._download_json(
392                format_url, track_id, query=query, fatal=False)
393            if not isinstance(stream, dict):
394                continue
395            stream_url = url_or_none(stream.get('url'))
396            if invalid_url(stream_url):
397                continue
398            format_urls.add(stream_url)
399            stream_format = t.get('format') or {}
400            protocol = stream_format.get('protocol')
401            if protocol != 'hls' and '/hls' in format_url:
402                protocol = 'hls'
403            ext = None
404            preset = str_or_none(t.get('preset'))
405            if preset:
406                ext = preset.split('_')[0]
407            if ext not in KNOWN_EXTENSIONS:
408                ext = mimetype2ext(stream_format.get('mime_type'))
409            add_format({
410                'url': stream_url,
411                'ext': ext,
412            }, 'http' if protocol == 'progressive' else protocol,
413                t.get('snipped') or '/preview/' in format_url)
414
415        for f in formats:
416            f['vcodec'] = 'none'
417
418        if not formats and info.get('policy') == 'BLOCK':
419            self.raise_geo_restricted()
420        self._sort_formats(formats)
421
422        user = info.get('user') or {}
423
424        thumbnails = []
425        artwork_url = info.get('artwork_url')
426        thumbnail = artwork_url or user.get('avatar_url')
427        if isinstance(thumbnail, compat_str):
428            if re.search(self._IMAGE_REPL_RE, thumbnail):
429                for image_id, size in self._ARTWORK_MAP.items():
430                    i = {
431                        'id': image_id,
432                        'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
433                    }
434                    if image_id == 'tiny' and not artwork_url:
435                        size = 18
436                    elif image_id == 'original':
437                        i['preference'] = 10
438                    if size:
439                        i.update({
440                            'width': size,
441                            'height': size,
442                        })
443                    thumbnails.append(i)
444            else:
445                thumbnails = [{'url': thumbnail}]
446
447        def extract_count(key):
448            return int_or_none(info.get('%s_count' % key))
449
450        return {
451            'id': track_id,
452            'uploader': user.get('username'),
453            'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
454            'uploader_url': user.get('permalink_url'),
455            'timestamp': unified_timestamp(info.get('created_at')),
456            'title': title,
457            'description': info.get('description'),
458            'thumbnails': thumbnails,
459            'duration': float_or_none(info.get('duration'), 1000),
460            'webpage_url': info.get('permalink_url'),
461            'license': info.get('license'),
462            'view_count': extract_count('playback'),
463            'like_count': extract_count('favoritings') or extract_count('likes'),
464            'comment_count': extract_count('comment'),
465            'repost_count': extract_count('reposts'),
466            'genre': info.get('genre'),
467            'formats': formats
468        }
469
470    def _real_extract(self, url):
471        mobj = re.match(self._VALID_URL, url)
472
473        track_id = mobj.group('track_id')
474
475        query = {}
476        if track_id:
477            info_json_url = self._API_V2_BASE + 'tracks/' + track_id
478            full_title = track_id
479            token = mobj.group('secret_token')
480            if token:
481                query['secret_token'] = token
482        else:
483            full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
484            token = mobj.group('token')
485            if token:
486                resolve_title += '/%s' % token
487            info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
488
489        info = self._download_json(
490            info_json_url, full_title, 'Downloading info JSON', query=query)
491
492        return self._extract_info_dict(info, full_title, token)
493
494
495class SoundcloudPlaylistBaseIE(SoundcloudIE):
496    def _extract_set(self, playlist, token=None):
497        playlist_id = compat_str(playlist['id'])
498        tracks = playlist.get('tracks') or []
499        if not all([t.get('permalink_url') for t in tracks]) and token:
500            tracks = self._download_json(
501                self._API_V2_BASE + 'tracks', playlist_id,
502                'Downloading tracks', query={
503                    'ids': ','.join([compat_str(t['id']) for t in tracks]),
504                    'playlistId': playlist_id,
505                    'playlistSecretToken': token,
506                })
507        entries = []
508        for track in tracks:
509            track_id = str_or_none(track.get('id'))
510            url = track.get('permalink_url')
511            if not url:
512                if not track_id:
513                    continue
514                url = self._API_V2_BASE + 'tracks/' + track_id
515                if token:
516                    url += '?secret_token=' + token
517            entries.append(self.url_result(
518                url, SoundcloudIE.ie_key(), track_id))
519        return self.playlist_result(
520            entries, playlist_id,
521            playlist.get('title'),
522            playlist.get('description'))
523
524
525class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
526    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
527    IE_NAME = 'soundcloud:set'
528    _TESTS = [{
529        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
530        'info_dict': {
531            'id': '2284613',
532            'title': 'The Royal Concept EP',
533            'description': 'md5:71d07087c7a449e8941a70a29e34671e',
534        },
535        'playlist_mincount': 5,
536    }, {
537        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
538        'only_matching': True,
539    }]
540
541    def _real_extract(self, url):
542        mobj = re.match(self._VALID_URL, url)
543
544        full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
545        token = mobj.group('token')
546        if token:
547            full_title += '/' + token
548
549        info = self._download_json(self._resolv_url(
550            self._BASE_URL + full_title), full_title)
551
552        if 'errors' in info:
553            msgs = (compat_str(err['error_message']) for err in info['errors'])
554            raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
555
556        return self._extract_set(info, token)
557
558
559class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
560    def _extract_playlist(self, base_url, playlist_id, playlist_title):
561        # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
562        # https://developers.soundcloud.com/blog/offset-pagination-deprecated
563        COMMON_QUERY = {
564            'limit': 200,
565            'linked_partitioning': '1',
566        }
567
568        query = COMMON_QUERY.copy()
569        query['offset'] = 0
570
571        next_href = base_url
572
573        entries = []
574        for i in itertools.count():
575            response = self._download_json(
576                next_href, playlist_id,
577                'Downloading track page %s' % (i + 1), query=query)
578
579            collection = response['collection']
580
581            if not isinstance(collection, list):
582                collection = []
583
584            # Empty collection may be returned, in this case we proceed
585            # straight to next_href
586
587            def resolve_entry(candidates):
588                for cand in candidates:
589                    if not isinstance(cand, dict):
590                        continue
591                    permalink_url = url_or_none(cand.get('permalink_url'))
592                    if not permalink_url:
593                        continue
594                    return self.url_result(
595                        permalink_url,
596                        SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
597                        str_or_none(cand.get('id')), cand.get('title'))
598
599            for e in collection:
600                entry = resolve_entry((e, e.get('track'), e.get('playlist')))
601                if entry:
602                    entries.append(entry)
603
604            next_href = response.get('next_href')
605            if not next_href:
606                break
607
608            next_href = response['next_href']
609            parsed_next_href = compat_urlparse.urlparse(next_href)
610            query = compat_urlparse.parse_qs(parsed_next_href.query)
611            query.update(COMMON_QUERY)
612
613        return {
614            '_type': 'playlist',
615            'id': playlist_id,
616            'title': playlist_title,
617            'entries': entries,
618        }
619
620
621class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
622    _VALID_URL = r'''(?x)
623                        https?://
624                            (?:(?:www|m)\.)?soundcloud\.com/
625                            (?P<user>[^/]+)
626                            (?:/
627                                (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
628                            )?
629                            /?(?:[?#].*)?$
630                    '''
631    IE_NAME = 'soundcloud:user'
632    _TESTS = [{
633        'url': 'https://soundcloud.com/soft-cell-official',
634        'info_dict': {
635            'id': '207965082',
636            'title': 'Soft Cell (All)',
637        },
638        'playlist_mincount': 28,
639    }, {
640        'url': 'https://soundcloud.com/soft-cell-official/tracks',
641        'info_dict': {
642            'id': '207965082',
643            'title': 'Soft Cell (Tracks)',
644        },
645        'playlist_mincount': 27,
646    }, {
647        'url': 'https://soundcloud.com/soft-cell-official/albums',
648        'info_dict': {
649            'id': '207965082',
650            'title': 'Soft Cell (Albums)',
651        },
652        'playlist_mincount': 1,
653    }, {
654        'url': 'https://soundcloud.com/jcv246/sets',
655        'info_dict': {
656            'id': '12982173',
657            'title': 'Jordi / cv (Sets)',
658        },
659        'playlist_mincount': 2,
660    }, {
661        'url': 'https://soundcloud.com/jcv246/reposts',
662        'info_dict': {
663            'id': '12982173',
664            'title': 'Jordi / cv (Reposts)',
665        },
666        'playlist_mincount': 6,
667    }, {
668        'url': 'https://soundcloud.com/clalberg/likes',
669        'info_dict': {
670            'id': '11817582',
671            'title': 'clalberg (Likes)',
672        },
673        'playlist_mincount': 5,
674    }, {
675        'url': 'https://soundcloud.com/grynpyret/spotlight',
676        'info_dict': {
677            'id': '7098329',
678            'title': 'Grynpyret (Spotlight)',
679        },
680        'playlist_mincount': 1,
681    }]
682
683    _BASE_URL_MAP = {
684        'all': 'stream/users/%s',
685        'tracks': 'users/%s/tracks',
686        'albums': 'users/%s/albums',
687        'sets': 'users/%s/playlists',
688        'reposts': 'stream/users/%s/reposts',
689        'likes': 'users/%s/likes',
690        'spotlight': 'users/%s/spotlight',
691    }
692
693    def _real_extract(self, url):
694        mobj = re.match(self._VALID_URL, url)
695        uploader = mobj.group('user')
696
697        user = self._download_json(
698            self._resolv_url(self._BASE_URL + uploader),
699            uploader, 'Downloading user info')
700
701        resource = mobj.group('rsrc') or 'all'
702
703        return self._extract_playlist(
704            self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
705            str_or_none(user.get('id')),
706            '%s (%s)' % (user['username'], resource.capitalize()))
707
708
709class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
710    _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
711    IE_NAME = 'soundcloud:trackstation'
712    _TESTS = [{
713        'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
714        'info_dict': {
715            'id': '286017854',
716            'title': 'Track station: your text',
717        },
718        'playlist_mincount': 47,
719    }]
720
721    def _real_extract(self, url):
722        track_name = self._match_id(url)
723
724        track = self._download_json(self._resolv_url(url), track_name)
725        track_id = self._search_regex(
726            r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
727
728        return self._extract_playlist(
729            self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
730            track_id, 'Track station: %s' % track['title'])
731
732
733class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
734    _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
735    IE_NAME = 'soundcloud:playlist'
736    _TESTS = [{
737        'url': 'https://api.soundcloud.com/playlists/4110309',
738        'info_dict': {
739            'id': '4110309',
740            'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
741            'description': 're:.*?TILT Brass - Bowery Poetry Club',
742        },
743        'playlist_count': 6,
744    }]
745
746    def _real_extract(self, url):
747        mobj = re.match(self._VALID_URL, url)
748        playlist_id = mobj.group('id')
749
750        query = {}
751        token = mobj.group('token')
752        if token:
753            query['secret_token'] = token
754
755        data = self._download_json(
756            self._API_V2_BASE + 'playlists/' + playlist_id,
757            playlist_id, 'Downloading playlist', query=query)
758
759        return self._extract_set(data, token)
760
761
762class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
763    IE_NAME = 'soundcloud:search'
764    IE_DESC = 'Soundcloud search'
765    _MAX_RESULTS = float('inf')
766    _TESTS = [{
767        'url': 'scsearch15:post-avant jazzcore',
768        'info_dict': {
769            'title': 'post-avant jazzcore',
770        },
771        'playlist_count': 15,
772    }]
773
774    _SEARCH_KEY = 'scsearch'
775    _MAX_RESULTS_PER_PAGE = 200
776    _DEFAULT_RESULTS_PER_PAGE = 50
777
778    def _get_collection(self, endpoint, collection_id, **query):
779        limit = min(
780            query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
781            self._MAX_RESULTS_PER_PAGE)
782        query.update({
783            'limit': limit,
784            'linked_partitioning': 1,
785            'offset': 0,
786        })
787        next_url = update_url_query(self._API_V2_BASE + endpoint, query)
788
789        collected_results = 0
790
791        for i in itertools.count(1):
792            response = self._download_json(
793                next_url, collection_id, 'Downloading page {0}'.format(i),
794                'Unable to download API page')
795
796            collection = response.get('collection', [])
797            if not collection:
798                break
799
800            collection = list(filter(bool, collection))
801            collected_results += len(collection)
802
803            for item in collection:
804                yield self.url_result(item['uri'], SoundcloudIE.ie_key())
805
806            if not collection or collected_results >= limit:
807                break
808
809            next_url = response.get('next_href')
810            if not next_url:
811                break
812
813    def _get_n_results(self, query, n):
814        tracks = self._get_collection('search/tracks', query, limit=n, q=query)
815        return self.playlist_result(tracks, playlist_title=query)
816