1# -*- coding: utf-8 -*-
2
3import datetime
4import difflib
5import time
6import re
7
8def allmusic_albumfind(data, artist, album):
9    data = data.decode('utf-8')
10    albums = []
11    albumlist = re.findall('class="album">\s*(.*?)\s*</li', data, re.S)
12    for item in albumlist:
13        albumdata = {}
14        albumartist = re.search('class="artist">.*?>(.*?)</a', item, re.S)
15        if albumartist:
16            albumdata['artist'] = albumartist.group(1)
17        else: # classical album
18            continue
19        albumname = re.search('class="title">.*?>(.*?)</a', item, re.S)
20        if albumname:
21            albumdata['album'] = albumname.group(1)
22        else: # not likely to happen, but just in case
23            continue
24        # filter inaccurate results
25        artistmatch = difflib.SequenceMatcher(None, artist.lower(), albumdata['artist'].lower()).ratio()
26        albummatch = difflib.SequenceMatcher(None, album.lower(), albumdata['album'].lower()).ratio()
27        if artistmatch > 0.90 and albummatch > 0.90:
28            albumurl = re.search('class="title">\s*<a href="(.*?)"', item)
29            if albumurl:
30                albumdata['url'] = albumurl.group(1)
31            else: # not likely to happen, but just in case
32                continue
33            albums.append(albumdata)
34            # we are only interested in the top result
35            break
36    return albums
37
38def allmusic_albumdetails(data):
39    data = data.decode('utf-8')
40    albumdata = {}
41    releasedata = re.search('class="release-date">.*?<span>(.*?)<', data, re.S)
42    if releasedata:
43        dateformat = releasedata.group(1)
44        if len(dateformat) > 4:
45            try:
46                # month day, year
47                albumdata['releasedate'] = datetime.datetime(*(time.strptime(dateformat, '%B %d, %Y')[0:3])).strftime('%Y-%m-%d')
48            except:
49                # month, year
50                albumdata['releasedate'] = datetime.datetime(*(time.strptime(dateformat, '%B, %Y')[0:3])).strftime('%Y-%m')
51        else:
52            # year
53            albumdata['releasedate'] = dateformat
54    yeardata = re.search('class="year".*?>\s*(.*?)\s*<', data)
55    if yeardata:
56        albumdata['year'] = yeardata.group(1)
57    genredata = re.search('class="genre">.*?">(.*?)<', data, re.S)
58    if genredata:
59        albumdata['genre'] = genredata.group(1)
60    styledata = re.search('class="styles">.*?div>\s*(.*?)\s*</div', data, re.S)
61    if styledata:
62        stylelist = re.findall('">(.*?)<', styledata.group(1))
63        if stylelist:
64            albumdata['styles'] =  ' / '.join(stylelist)
65    mooddata = re.search('class="moods">.*?div>\s*(.*?)\s*</div', data, re.S)
66    if mooddata:
67        moodlist = re.findall('">(.*?)<', mooddata.group(1))
68        if moodlist:
69            albumdata['moods'] =  ' / '.join(moodlist)
70    themedata = re.search('class="themes">.*?div>\s*(.*?)\s*</div', data, re.S)
71    if themedata:
72        themelist = re.findall('">(.*?)<', themedata.group(1))
73        if themelist:
74            albumdata['themes'] =  ' / '.join(themelist)
75    ratingdata = re.search('itemprop="ratingValue">\s*(.*?)\s*</div', data)
76    if ratingdata:
77        albumdata['rating'] = ratingdata.group(1)
78    albumdata['votes'] = ''
79    titledata = re.search('class="album-title".*?>\s*(.*?)\s*<', data, re.S)
80    if titledata:
81        albumdata['album'] = titledata.group(1)
82    labeldata = re.search('class="label-catalog".*?<.*?>(.*?)<', data, re.S)
83    if labeldata:
84        albumdata['label'] = labeldata.group(1)
85    artistdata = re.search('class="album-artist".*?<span.*?>\s*(.*?)\s*</span', data, re.S)
86    if artistdata:
87        artistlist = re.findall('">(.*?)<', artistdata.group(1))
88        artists = []
89        for item in artistlist:
90            artistinfo = {}
91            artistinfo['artist'] = item
92            artists.append(artistinfo)
93        if artists:
94            albumdata['artist'] = artists
95            albumdata['artist_description'] = ' / '.join(artistlist)
96    thumbsdata = re.search('class="album-contain".*?src="(.*?)"', data, re.S)
97    if thumbsdata:
98        thumbs = []
99        thumbdata = {}
100        thumb = thumbsdata.group(1).rstrip('?partner=allrovi.com')
101        # ignore internal blank thumb
102        if thumb.startswith('http'):
103            # 0=largest / 1=75 / 2=150 / 3=250 / 4=400 / 5=500 / 6=1080
104            if thumb.endswith('f=5'):
105                thumbdata['image'] = thumb.replace('f=5', 'f=0')
106                thumbdata['preview'] = thumb.replace('f=5', 'f=2')
107            else:
108                thumbdata['image'] = thumb
109                thumbdata['preview'] = thumb
110            thumbdata['aspect'] = 'thumb'
111            thumbs.append(thumbdata)
112            albumdata['thumb'] = thumbs
113    return albumdata
114