1# This file is part of pyacoustid.
2# Copyright 2014, Adrian Sampson.
3#
4# Permission is hereby granted, free of charge, to any person obtaining
5# a copy of this software and associated documentation files (the
6# "Software"), to deal in the Software without restriction, including
7# without limitation the rights to use, copy, modify, merge, publish,
8# distribute, sublicense, and/or sell copies of the Software, and to
9# permit persons to whom the Software is furnished to do so, subject to
10# the following conditions:
11#
12# The above copyright notice and this permission notice shall be
13# included in all copies or substantial portions of the Software.
14
15from __future__ import division
16from __future__ import absolute_import
17
18import os
19import json
20import requests
21import contextlib
22import errno
23try:
24    import audioread
25    have_audioread = True
26except ImportError:
27    have_audioread = False
28try:
29    import chromaprint
30    have_chromaprint = True
31except ImportError:
32    have_chromaprint = False
33import subprocess
34import threading
35import time
36import gzip
37from io import BytesIO
38
39
40API_BASE_URL = 'http://api.acoustid.org/v2/'
41DEFAULT_META = ['recordings']
42REQUEST_INTERVAL = 0.33  # 3 requests/second.
43MAX_AUDIO_LENGTH = 120  # Seconds.
44FPCALC_COMMAND = 'fpcalc'
45FPCALC_ENVVAR = 'FPCALC'
46
47
48# Exceptions.
49
50class AcoustidError(Exception):
51    """Base for exceptions in this module."""
52
53
54class FingerprintGenerationError(AcoustidError):
55    """The audio could not be fingerprinted."""
56
57
58class NoBackendError(FingerprintGenerationError):
59    """The audio could not be fingerprinted because neither the
60    Chromaprint library nor the fpcalc command-line tool is installed.
61    """
62
63
64class FingerprintSubmissionError(AcoustidError):
65    """Missing required data for a fingerprint submission."""
66
67
68class WebServiceError(AcoustidError):
69    """The Web service request failed. The field ``message`` contains a
70    description of the error. If this is an error that was specifically
71    sent by the acoustid server, then the ``code`` field contains the
72    acoustid error code.
73    """
74    def __init__(self, message, response=None):
75        """Create an error for the given HTTP response body, if
76        provided, with the ``message`` as a fallback.
77        """
78        if response:
79            # Try to parse the JSON error response.
80            try:
81                data = json.loads(response)
82            except ValueError:
83                pass
84            else:
85                if isinstance(data.get('error'), dict):
86                    error = data['error']
87                    if 'message' in error:
88                        message = error['message']
89                    if 'code' in error:
90                        self.code = error['code']
91
92        super(WebServiceError, self).__init__(message)
93        self.message = message
94
95
96# Endpoint configuration.
97
98def set_base_url(url):
99    """Set the URL of the API server to query."""
100    if not url.endswith('/'):
101        url += '/'
102    global API_BASE_URL
103    API_BASE_URL = url
104
105
106def _get_lookup_url():
107    """Get the URL of the lookup API endpoint."""
108    return API_BASE_URL + 'lookup'
109
110
111def _get_submit_url():
112    """Get the URL of the submission API endpoint."""
113    return API_BASE_URL + 'submit'
114
115
116def _get_submission_status_url():
117    """Get the URL of the submission status API endpoint."""
118    return API_BASE_URL + 'submission_status'
119
120
121# Compressed HTTP request bodies.
122
123def _compress(data):
124    """Compress a bytestring to a gzip archive."""
125    sio = BytesIO()
126    with contextlib.closing(gzip.GzipFile(fileobj=sio, mode='wb')) as f:
127        f.write(data)
128    return sio.getvalue()
129
130
131class CompressedHTTPAdapter(requests.adapters.HTTPAdapter):
132    """An `HTTPAdapter` that compresses request bodies with gzip. The
133    Content-Encoding header is set accordingly.
134    """
135    def add_headers(self, request, **kwargs):
136        body = request.body
137        if not isinstance(body, bytes):
138            body = body.encode('utf8')
139        request.prepare_body(_compress(body), None)
140        request.headers['Content-Encoding'] = 'gzip'
141
142
143# Utilities.
144
145class _rate_limit(object):  # noqa: N801
146    """A decorator that limits the rate at which the function may be
147    called.  The rate is controlled by the REQUEST_INTERVAL module-level
148    constant; set the value to zero to disable rate limiting. The
149    limiting is thread-safe; only one thread may be in the function at a
150    time (acts like a monitor in this sense).
151    """
152    def __init__(self, fun):
153        self.fun = fun
154        self.last_call = 0.0
155        self.lock = threading.Lock()
156
157    def __call__(self, *args, **kwargs):
158        with self.lock:
159            # Wait until request_rate time has passed since last_call,
160            # then update last_call.
161            since_last_call = time.time() - self.last_call
162            if since_last_call < REQUEST_INTERVAL:
163                time.sleep(REQUEST_INTERVAL - since_last_call)
164            self.last_call = time.time()
165
166            # Call the original function.
167            return self.fun(*args, **kwargs)
168
169
170@_rate_limit
171def _api_request(url, params, timeout=None):
172    """Makes a POST request for the URL with the given form parameters,
173    which are encoded as compressed form data, and returns a parsed JSON
174    response. May raise a WebServiceError if the request fails.
175    If the specified timeout passes, then raises a TimeoutError.
176    """
177    headers = {
178        'Accept-Encoding': 'gzip',
179        "Content-Type": "application/x-www-form-urlencoded"
180    }
181
182    with requests.Session() as session:
183        session.mount('http://', CompressedHTTPAdapter())
184        try:
185            if isinstance(params.get('meta'), list):
186                params['meta'] = ' '.join(params['meta'])
187            response = session.post(url,
188                                    data=params,
189                                    headers=headers,
190                                    timeout=timeout)
191        except requests.exceptions.RequestException as exc:
192            raise WebServiceError("HTTP request failed: {0}".format(exc))
193        except requests.exceptions.ReadTimeout:
194            raise WebServiceError(
195                "HTTP request timed out ({0}s)".format(timeout)
196            )
197
198    try:
199        return response.json()
200    except ValueError:
201        raise WebServiceError('response is not valid JSON')
202
203
204# Main API.
205
206def fingerprint(samplerate, channels, pcmiter, maxlength=MAX_AUDIO_LENGTH):
207    """Fingerprint audio data given its sample rate and number of
208    channels.  pcmiter should be an iterable containing blocks of PCM
209    data as byte strings. Raises a FingerprintGenerationError if
210    anything goes wrong.
211    """
212    # Maximum number of samples to decode.
213    endposition = samplerate * channels * maxlength
214
215    try:
216        fper = chromaprint.Fingerprinter()
217        fper.start(samplerate, channels)
218
219        position = 0  # Samples of audio fed to the fingerprinter.
220        for block in pcmiter:
221            fper.feed(block)
222            position += len(block) // 2  # 2 bytes/sample.
223            if position >= endposition:
224                break
225
226        return fper.finish()
227    except chromaprint.FingerprintError:
228        raise FingerprintGenerationError("fingerprint calculation failed")
229
230
231def lookup(apikey, fingerprint, duration, meta=DEFAULT_META, timeout=None):
232    """Look up a fingerprint with the Acoustid Web service. Returns the
233    Python object reflecting the response JSON data. To get more data
234    back, ``meta`` can be a list of keywords from this list: recordings,
235    recordingids, releases, releaseids, releasegroups, releasegroupids,
236    tracks, compress, usermeta, sources.
237    """
238    params = {
239        'format': 'json',
240        'client': apikey,
241        'duration': int(duration),
242        'fingerprint': fingerprint,
243        'meta': meta,
244    }
245    return _api_request(_get_lookup_url(), params, timeout)
246
247
248def parse_lookup_result(data):
249    """Given a parsed JSON response, generate tuples containing the match
250    score, the MusicBrainz recording ID, the title of the recording, and
251    the name of the recording's first artist. (If an artist is not
252    available, the last item is None.) If the response is incomplete,
253    raises a WebServiceError.
254    """
255    if data['status'] != 'ok':
256        raise WebServiceError("status: %s" % data['status'])
257    if 'results' not in data:
258        raise WebServiceError("results not included")
259
260    for result in data['results']:
261        score = result['score']
262        if 'recordings' not in result:
263            # No recording attached. This result is not very useful.
264            continue
265
266        for recording in result['recordings']:
267            # Get the artist if available.
268            if recording.get('artists'):
269                names = [artist['name'] for artist in recording['artists']]
270                artist_name = '; '.join(names)
271            else:
272                artist_name = None
273
274            yield score, recording['id'], recording.get('title'), artist_name
275
276
277def _fingerprint_file_audioread(path, maxlength):
278    """Fingerprint a file by using audioread and chromaprint."""
279    try:
280        with audioread.audio_open(path) as f:
281            duration = f.duration
282            fp = fingerprint(f.samplerate, f.channels, iter(f), maxlength)
283    except audioread.DecodeError:
284        raise FingerprintGenerationError("audio could not be decoded")
285    return duration, fp
286
287
288def _fingerprint_file_fpcalc(path, maxlength):
289    """Fingerprint a file by calling the fpcalc application."""
290    fpcalc = os.environ.get(FPCALC_ENVVAR, FPCALC_COMMAND)
291    command = [fpcalc, "-length", str(maxlength), path]
292    try:
293        with open(os.devnull, 'wb') as devnull:
294            proc = subprocess.Popen(command, stdout=subprocess.PIPE,
295                                    stderr=devnull)
296            output, _ = proc.communicate()
297    except OSError as exc:
298        if exc.errno == errno.ENOENT:
299            raise NoBackendError("fpcalc not found")
300        else:
301            raise FingerprintGenerationError("fpcalc invocation failed: %s" %
302                                             str(exc))
303    except UnicodeEncodeError:
304        # Due to a bug in Python 2's subprocess on Windows, Unicode
305        # filenames can fail to encode on that platform. See:
306        # http://bugs.python.org/issue1759845
307        raise FingerprintGenerationError("argument encoding failed")
308    retcode = proc.poll()
309    if retcode:
310        raise FingerprintGenerationError("fpcalc exited with status %i" %
311                                         retcode)
312
313    duration = fp = None
314    for line in output.splitlines():
315        try:
316            parts = line.split(b'=', 1)
317        except ValueError:
318            raise FingerprintGenerationError("malformed fpcalc output")
319        if parts[0] == b'DURATION':
320            try:
321                duration = float(parts[1])
322            except ValueError:
323                raise FingerprintGenerationError("fpcalc duration not numeric")
324        elif parts[0] == b'FINGERPRINT':
325            fp = parts[1]
326
327    if duration is None or fp is None:
328        raise FingerprintGenerationError("missing fpcalc output")
329    return duration, fp
330
331
332def fingerprint_file(path, maxlength=MAX_AUDIO_LENGTH, force_fpcalc=False):
333    """Fingerprint a file either using the Chromaprint dynamic library
334    or the fpcalc command-line tool, whichever is available (unless
335    ``force_fpcalc`` is specified). Returns the duration and the
336    fingerprint.
337    """
338    path = os.path.abspath(os.path.expanduser(path))
339    if have_audioread and have_chromaprint and not force_fpcalc:
340        return _fingerprint_file_audioread(path, maxlength)
341    else:
342        return _fingerprint_file_fpcalc(path, maxlength)
343
344
345def match(apikey, path, meta=DEFAULT_META, parse=True, force_fpcalc=False,
346          timeout=None):
347    """Look up the metadata for an audio file. If ``parse`` is true,
348    then ``parse_lookup_result`` is used to return an iterator over
349    small tuple of relevant information; otherwise, the full parsed JSON
350    response is returned. Fingerprinting uses either the Chromaprint
351    library or the fpcalc command-line tool; if ``force_fpcalc`` is
352    true, only the latter will be used. To get more data back, ``meta``
353    can be a list of keywords from this list: recordings, recordingids,
354    releases, releaseids, releasegroups, releasegroupids, tracks,
355    compress, usermeta, sources.
356    """
357    duration, fp = fingerprint_file(path, force_fpcalc=force_fpcalc)
358    response = lookup(apikey, fp, duration, meta, timeout)
359    if parse:
360        return parse_lookup_result(response)
361    else:
362        return response
363
364
365def submit(apikey, userkey, data, timeout=None):
366    """Submit a fingerprint to the acoustid server. The ``apikey`` and
367    ``userkey`` parameters are API keys for the application and the
368    submitting user, respectively.
369
370    ``data`` may be either a single dictionary or a list of
371    dictionaries. In either case, each dictionary must contain a
372    ``fingerprint`` key and a ``duration`` key and may include the
373    following: ``puid``, ``mbid``, ``track``, ``artist``, ``album``,
374    ``albumartist``, ``year``, ``trackno``, ``discno``, ``fileformat``,
375    ``bitrate``
376
377    If the required keys are not present in a dictionary, a
378    FingerprintSubmissionError is raised.
379
380    Returns the parsed JSON response.
381    """
382    if isinstance(data, dict):
383        data = [data]
384
385    args = {
386        'format': 'json',
387        'client': apikey,
388        'user': userkey,
389    }
390
391    # Build up "field.#" parameters corresponding to the parameters
392    # given in each dictionary.
393    for i, d in enumerate(data):
394        if "duration" not in d or "fingerprint" not in d:
395            raise FingerprintSubmissionError("missing required parameters")
396
397        # The duration needs to be an integer.
398        d["duration"] = int(d["duration"])
399
400        for k, v in d.items():
401            args["%s.%s" % (k, i)] = v
402
403    response = _api_request(_get_submit_url(), args, timeout)
404    if response.get('status') != 'ok':
405        try:
406            code = response['error']['code']
407            message = response['error']['message']
408        except KeyError:
409            raise WebServiceError("response: {0}".format(response))
410        raise WebServiceError("error {0}: {1}".format(code, message))
411    return response
412
413
414def get_submission_status(apikey, submission_id, timeout=None):
415    """Get the status of a submission to the acoustid server.
416    ``submission_id`` is the id of a fingerprint submission, as returned
417    in the response object of a call to the ``submit`` endpoint.
418    """
419    params = {
420        'format': 'json',
421        'client': apikey,
422        'id': submission_id,
423    }
424    return _api_request(_get_submission_status_url(), params, timeout)
425