1# This file is part of pyacoustid. 2# Copyright 2014, Adrian Sampson. 3# 4# Permission is hereby granted, free of charge, to any person obtaining 5# a copy of this software and associated documentation files (the 6# "Software"), to deal in the Software without restriction, including 7# without limitation the rights to use, copy, modify, merge, publish, 8# distribute, sublicense, and/or sell copies of the Software, and to 9# permit persons to whom the Software is furnished to do so, subject to 10# the following conditions: 11# 12# The above copyright notice and this permission notice shall be 13# included in all copies or substantial portions of the Software. 14 15from __future__ import division 16from __future__ import absolute_import 17 18import os 19import json 20import requests 21import contextlib 22import errno 23try: 24 import audioread 25 have_audioread = True 26except ImportError: 27 have_audioread = False 28try: 29 import chromaprint 30 have_chromaprint = True 31except ImportError: 32 have_chromaprint = False 33import subprocess 34import threading 35import time 36import gzip 37from io import BytesIO 38 39 40API_BASE_URL = 'http://api.acoustid.org/v2/' 41DEFAULT_META = ['recordings'] 42REQUEST_INTERVAL = 0.33 # 3 requests/second. 43MAX_AUDIO_LENGTH = 120 # Seconds. 44FPCALC_COMMAND = 'fpcalc' 45FPCALC_ENVVAR = 'FPCALC' 46 47 48# Exceptions. 49 50class AcoustidError(Exception): 51 """Base for exceptions in this module.""" 52 53 54class FingerprintGenerationError(AcoustidError): 55 """The audio could not be fingerprinted.""" 56 57 58class NoBackendError(FingerprintGenerationError): 59 """The audio could not be fingerprinted because neither the 60 Chromaprint library nor the fpcalc command-line tool is installed. 61 """ 62 63 64class FingerprintSubmissionError(AcoustidError): 65 """Missing required data for a fingerprint submission.""" 66 67 68class WebServiceError(AcoustidError): 69 """The Web service request failed. The field ``message`` contains a 70 description of the error. If this is an error that was specifically 71 sent by the acoustid server, then the ``code`` field contains the 72 acoustid error code. 73 """ 74 def __init__(self, message, response=None): 75 """Create an error for the given HTTP response body, if 76 provided, with the ``message`` as a fallback. 77 """ 78 if response: 79 # Try to parse the JSON error response. 80 try: 81 data = json.loads(response) 82 except ValueError: 83 pass 84 else: 85 if isinstance(data.get('error'), dict): 86 error = data['error'] 87 if 'message' in error: 88 message = error['message'] 89 if 'code' in error: 90 self.code = error['code'] 91 92 super(WebServiceError, self).__init__(message) 93 self.message = message 94 95 96# Endpoint configuration. 97 98def set_base_url(url): 99 """Set the URL of the API server to query.""" 100 if not url.endswith('/'): 101 url += '/' 102 global API_BASE_URL 103 API_BASE_URL = url 104 105 106def _get_lookup_url(): 107 """Get the URL of the lookup API endpoint.""" 108 return API_BASE_URL + 'lookup' 109 110 111def _get_submit_url(): 112 """Get the URL of the submission API endpoint.""" 113 return API_BASE_URL + 'submit' 114 115 116def _get_submission_status_url(): 117 """Get the URL of the submission status API endpoint.""" 118 return API_BASE_URL + 'submission_status' 119 120 121# Compressed HTTP request bodies. 122 123def _compress(data): 124 """Compress a bytestring to a gzip archive.""" 125 sio = BytesIO() 126 with contextlib.closing(gzip.GzipFile(fileobj=sio, mode='wb')) as f: 127 f.write(data) 128 return sio.getvalue() 129 130 131class CompressedHTTPAdapter(requests.adapters.HTTPAdapter): 132 """An `HTTPAdapter` that compresses request bodies with gzip. The 133 Content-Encoding header is set accordingly. 134 """ 135 def add_headers(self, request, **kwargs): 136 body = request.body 137 if not isinstance(body, bytes): 138 body = body.encode('utf8') 139 request.prepare_body(_compress(body), None) 140 request.headers['Content-Encoding'] = 'gzip' 141 142 143# Utilities. 144 145class _rate_limit(object): # noqa: N801 146 """A decorator that limits the rate at which the function may be 147 called. The rate is controlled by the REQUEST_INTERVAL module-level 148 constant; set the value to zero to disable rate limiting. The 149 limiting is thread-safe; only one thread may be in the function at a 150 time (acts like a monitor in this sense). 151 """ 152 def __init__(self, fun): 153 self.fun = fun 154 self.last_call = 0.0 155 self.lock = threading.Lock() 156 157 def __call__(self, *args, **kwargs): 158 with self.lock: 159 # Wait until request_rate time has passed since last_call, 160 # then update last_call. 161 since_last_call = time.time() - self.last_call 162 if since_last_call < REQUEST_INTERVAL: 163 time.sleep(REQUEST_INTERVAL - since_last_call) 164 self.last_call = time.time() 165 166 # Call the original function. 167 return self.fun(*args, **kwargs) 168 169 170@_rate_limit 171def _api_request(url, params, timeout=None): 172 """Makes a POST request for the URL with the given form parameters, 173 which are encoded as compressed form data, and returns a parsed JSON 174 response. May raise a WebServiceError if the request fails. 175 If the specified timeout passes, then raises a TimeoutError. 176 """ 177 headers = { 178 'Accept-Encoding': 'gzip', 179 "Content-Type": "application/x-www-form-urlencoded" 180 } 181 182 with requests.Session() as session: 183 session.mount('http://', CompressedHTTPAdapter()) 184 try: 185 if isinstance(params.get('meta'), list): 186 params['meta'] = ' '.join(params['meta']) 187 response = session.post(url, 188 data=params, 189 headers=headers, 190 timeout=timeout) 191 except requests.exceptions.RequestException as exc: 192 raise WebServiceError("HTTP request failed: {0}".format(exc)) 193 except requests.exceptions.ReadTimeout: 194 raise WebServiceError( 195 "HTTP request timed out ({0}s)".format(timeout) 196 ) 197 198 try: 199 return response.json() 200 except ValueError: 201 raise WebServiceError('response is not valid JSON') 202 203 204# Main API. 205 206def fingerprint(samplerate, channels, pcmiter, maxlength=MAX_AUDIO_LENGTH): 207 """Fingerprint audio data given its sample rate and number of 208 channels. pcmiter should be an iterable containing blocks of PCM 209 data as byte strings. Raises a FingerprintGenerationError if 210 anything goes wrong. 211 """ 212 # Maximum number of samples to decode. 213 endposition = samplerate * channels * maxlength 214 215 try: 216 fper = chromaprint.Fingerprinter() 217 fper.start(samplerate, channels) 218 219 position = 0 # Samples of audio fed to the fingerprinter. 220 for block in pcmiter: 221 fper.feed(block) 222 position += len(block) // 2 # 2 bytes/sample. 223 if position >= endposition: 224 break 225 226 return fper.finish() 227 except chromaprint.FingerprintError: 228 raise FingerprintGenerationError("fingerprint calculation failed") 229 230 231def lookup(apikey, fingerprint, duration, meta=DEFAULT_META, timeout=None): 232 """Look up a fingerprint with the Acoustid Web service. Returns the 233 Python object reflecting the response JSON data. To get more data 234 back, ``meta`` can be a list of keywords from this list: recordings, 235 recordingids, releases, releaseids, releasegroups, releasegroupids, 236 tracks, compress, usermeta, sources. 237 """ 238 params = { 239 'format': 'json', 240 'client': apikey, 241 'duration': int(duration), 242 'fingerprint': fingerprint, 243 'meta': meta, 244 } 245 return _api_request(_get_lookup_url(), params, timeout) 246 247 248def parse_lookup_result(data): 249 """Given a parsed JSON response, generate tuples containing the match 250 score, the MusicBrainz recording ID, the title of the recording, and 251 the name of the recording's first artist. (If an artist is not 252 available, the last item is None.) If the response is incomplete, 253 raises a WebServiceError. 254 """ 255 if data['status'] != 'ok': 256 raise WebServiceError("status: %s" % data['status']) 257 if 'results' not in data: 258 raise WebServiceError("results not included") 259 260 for result in data['results']: 261 score = result['score'] 262 if 'recordings' not in result: 263 # No recording attached. This result is not very useful. 264 continue 265 266 for recording in result['recordings']: 267 # Get the artist if available. 268 if recording.get('artists'): 269 names = [artist['name'] for artist in recording['artists']] 270 artist_name = '; '.join(names) 271 else: 272 artist_name = None 273 274 yield score, recording['id'], recording.get('title'), artist_name 275 276 277def _fingerprint_file_audioread(path, maxlength): 278 """Fingerprint a file by using audioread and chromaprint.""" 279 try: 280 with audioread.audio_open(path) as f: 281 duration = f.duration 282 fp = fingerprint(f.samplerate, f.channels, iter(f), maxlength) 283 except audioread.DecodeError: 284 raise FingerprintGenerationError("audio could not be decoded") 285 return duration, fp 286 287 288def _fingerprint_file_fpcalc(path, maxlength): 289 """Fingerprint a file by calling the fpcalc application.""" 290 fpcalc = os.environ.get(FPCALC_ENVVAR, FPCALC_COMMAND) 291 command = [fpcalc, "-length", str(maxlength), path] 292 try: 293 with open(os.devnull, 'wb') as devnull: 294 proc = subprocess.Popen(command, stdout=subprocess.PIPE, 295 stderr=devnull) 296 output, _ = proc.communicate() 297 except OSError as exc: 298 if exc.errno == errno.ENOENT: 299 raise NoBackendError("fpcalc not found") 300 else: 301 raise FingerprintGenerationError("fpcalc invocation failed: %s" % 302 str(exc)) 303 except UnicodeEncodeError: 304 # Due to a bug in Python 2's subprocess on Windows, Unicode 305 # filenames can fail to encode on that platform. See: 306 # http://bugs.python.org/issue1759845 307 raise FingerprintGenerationError("argument encoding failed") 308 retcode = proc.poll() 309 if retcode: 310 raise FingerprintGenerationError("fpcalc exited with status %i" % 311 retcode) 312 313 duration = fp = None 314 for line in output.splitlines(): 315 try: 316 parts = line.split(b'=', 1) 317 except ValueError: 318 raise FingerprintGenerationError("malformed fpcalc output") 319 if parts[0] == b'DURATION': 320 try: 321 duration = float(parts[1]) 322 except ValueError: 323 raise FingerprintGenerationError("fpcalc duration not numeric") 324 elif parts[0] == b'FINGERPRINT': 325 fp = parts[1] 326 327 if duration is None or fp is None: 328 raise FingerprintGenerationError("missing fpcalc output") 329 return duration, fp 330 331 332def fingerprint_file(path, maxlength=MAX_AUDIO_LENGTH, force_fpcalc=False): 333 """Fingerprint a file either using the Chromaprint dynamic library 334 or the fpcalc command-line tool, whichever is available (unless 335 ``force_fpcalc`` is specified). Returns the duration and the 336 fingerprint. 337 """ 338 path = os.path.abspath(os.path.expanduser(path)) 339 if have_audioread and have_chromaprint and not force_fpcalc: 340 return _fingerprint_file_audioread(path, maxlength) 341 else: 342 return _fingerprint_file_fpcalc(path, maxlength) 343 344 345def match(apikey, path, meta=DEFAULT_META, parse=True, force_fpcalc=False, 346 timeout=None): 347 """Look up the metadata for an audio file. If ``parse`` is true, 348 then ``parse_lookup_result`` is used to return an iterator over 349 small tuple of relevant information; otherwise, the full parsed JSON 350 response is returned. Fingerprinting uses either the Chromaprint 351 library or the fpcalc command-line tool; if ``force_fpcalc`` is 352 true, only the latter will be used. To get more data back, ``meta`` 353 can be a list of keywords from this list: recordings, recordingids, 354 releases, releaseids, releasegroups, releasegroupids, tracks, 355 compress, usermeta, sources. 356 """ 357 duration, fp = fingerprint_file(path, force_fpcalc=force_fpcalc) 358 response = lookup(apikey, fp, duration, meta, timeout) 359 if parse: 360 return parse_lookup_result(response) 361 else: 362 return response 363 364 365def submit(apikey, userkey, data, timeout=None): 366 """Submit a fingerprint to the acoustid server. The ``apikey`` and 367 ``userkey`` parameters are API keys for the application and the 368 submitting user, respectively. 369 370 ``data`` may be either a single dictionary or a list of 371 dictionaries. In either case, each dictionary must contain a 372 ``fingerprint`` key and a ``duration`` key and may include the 373 following: ``puid``, ``mbid``, ``track``, ``artist``, ``album``, 374 ``albumartist``, ``year``, ``trackno``, ``discno``, ``fileformat``, 375 ``bitrate`` 376 377 If the required keys are not present in a dictionary, a 378 FingerprintSubmissionError is raised. 379 380 Returns the parsed JSON response. 381 """ 382 if isinstance(data, dict): 383 data = [data] 384 385 args = { 386 'format': 'json', 387 'client': apikey, 388 'user': userkey, 389 } 390 391 # Build up "field.#" parameters corresponding to the parameters 392 # given in each dictionary. 393 for i, d in enumerate(data): 394 if "duration" not in d or "fingerprint" not in d: 395 raise FingerprintSubmissionError("missing required parameters") 396 397 # The duration needs to be an integer. 398 d["duration"] = int(d["duration"]) 399 400 for k, v in d.items(): 401 args["%s.%s" % (k, i)] = v 402 403 response = _api_request(_get_submit_url(), args, timeout) 404 if response.get('status') != 'ok': 405 try: 406 code = response['error']['code'] 407 message = response['error']['message'] 408 except KeyError: 409 raise WebServiceError("response: {0}".format(response)) 410 raise WebServiceError("error {0}: {1}".format(code, message)) 411 return response 412 413 414def get_submission_status(apikey, submission_id, timeout=None): 415 """Get the status of a submission to the acoustid server. 416 ``submission_id`` is the id of a fingerprint submission, as returned 417 in the response object of a call to the ``submit`` endpoint. 418 """ 419 params = { 420 'format': 'json', 421 'client': apikey, 422 'id': submission_id, 423 } 424 return _api_request(_get_submission_status_url(), params, timeout) 425