1# -*- coding: utf-8 -*- 2 3import datetime 4import difflib 5import time 6import re 7 8def allmusic_albumfind(data, artist, album): 9 data = data.decode('utf-8') 10 albums = [] 11 albumlist = re.findall('class="album">\s*(.*?)\s*</li', data, re.S) 12 for item in albumlist: 13 albumdata = {} 14 albumartist = re.search('class="artist">.*?>(.*?)</a', item, re.S) 15 if albumartist: 16 albumdata['artist'] = albumartist.group(1) 17 else: # classical album 18 continue 19 albumname = re.search('class="title">.*?>(.*?)</a', item, re.S) 20 if albumname: 21 albumdata['album'] = albumname.group(1) 22 else: # not likely to happen, but just in case 23 continue 24 # filter inaccurate results 25 artistmatch = difflib.SequenceMatcher(None, artist.lower(), albumdata['artist'].lower()).ratio() 26 albummatch = difflib.SequenceMatcher(None, album.lower(), albumdata['album'].lower()).ratio() 27 if artistmatch > 0.90 and albummatch > 0.90: 28 albumurl = re.search('class="title">\s*<a href="(.*?)"', item) 29 if albumurl: 30 albumdata['url'] = albumurl.group(1) 31 else: # not likely to happen, but just in case 32 continue 33 albums.append(albumdata) 34 # we are only interested in the top result 35 break 36 return albums 37 38def allmusic_albumdetails(data): 39 data = data.decode('utf-8') 40 albumdata = {} 41 releasedata = re.search('class="release-date">.*?<span>(.*?)<', data, re.S) 42 if releasedata: 43 dateformat = releasedata.group(1) 44 if len(dateformat) > 4: 45 try: 46 # month day, year 47 albumdata['releasedate'] = datetime.datetime(*(time.strptime(dateformat, '%B %d, %Y')[0:3])).strftime('%Y-%m-%d') 48 except: 49 # month, year 50 albumdata['releasedate'] = datetime.datetime(*(time.strptime(dateformat, '%B, %Y')[0:3])).strftime('%Y-%m') 51 else: 52 # year 53 albumdata['releasedate'] = dateformat 54 yeardata = re.search('class="year".*?>\s*(.*?)\s*<', data) 55 if yeardata: 56 albumdata['year'] = yeardata.group(1) 57 genredata = re.search('class="genre">.*?">(.*?)<', data, re.S) 58 if genredata: 59 albumdata['genre'] = genredata.group(1) 60 styledata = re.search('class="styles">.*?div>\s*(.*?)\s*</div', data, re.S) 61 if styledata: 62 stylelist = re.findall('">(.*?)<', styledata.group(1)) 63 if stylelist: 64 albumdata['styles'] = ' / '.join(stylelist) 65 mooddata = re.search('class="moods">.*?div>\s*(.*?)\s*</div', data, re.S) 66 if mooddata: 67 moodlist = re.findall('">(.*?)<', mooddata.group(1)) 68 if moodlist: 69 albumdata['moods'] = ' / '.join(moodlist) 70 themedata = re.search('class="themes">.*?div>\s*(.*?)\s*</div', data, re.S) 71 if themedata: 72 themelist = re.findall('">(.*?)<', themedata.group(1)) 73 if themelist: 74 albumdata['themes'] = ' / '.join(themelist) 75 ratingdata = re.search('itemprop="ratingValue">\s*(.*?)\s*</div', data) 76 if ratingdata: 77 albumdata['rating'] = ratingdata.group(1) 78 albumdata['votes'] = '' 79 titledata = re.search('class="album-title".*?>\s*(.*?)\s*<', data, re.S) 80 if titledata: 81 albumdata['album'] = titledata.group(1) 82 labeldata = re.search('class="label-catalog".*?<.*?>(.*?)<', data, re.S) 83 if labeldata: 84 albumdata['label'] = labeldata.group(1) 85 artistdata = re.search('class="album-artist".*?<span.*?>\s*(.*?)\s*</span', data, re.S) 86 if artistdata: 87 artistlist = re.findall('">(.*?)<', artistdata.group(1)) 88 artists = [] 89 for item in artistlist: 90 artistinfo = {} 91 artistinfo['artist'] = item 92 artists.append(artistinfo) 93 if artists: 94 albumdata['artist'] = artists 95 albumdata['artist_description'] = ' / '.join(artistlist) 96 thumbsdata = re.search('class="album-contain".*?src="(.*?)"', data, re.S) 97 if thumbsdata: 98 thumbs = [] 99 thumbdata = {} 100 thumb = thumbsdata.group(1).rstrip('?partner=allrovi.com') 101 # ignore internal blank thumb 102 if thumb.startswith('http'): 103 # 0=largest / 1=75 / 2=150 / 3=250 / 4=400 / 5=500 / 6=1080 104 if thumb.endswith('f=5'): 105 thumbdata['image'] = thumb.replace('f=5', 'f=0') 106 thumbdata['preview'] = thumb.replace('f=5', 'f=2') 107 else: 108 thumbdata['image'] = thumb 109 thumbdata['preview'] = thumb 110 thumbdata['aspect'] = 'thumb' 111 thumbs.append(thumbdata) 112 albumdata['thumb'] = thumbs 113 return albumdata 114