1 /*
2  * Cantata
3  *
4  * Copyright (c) 2011-2020 Craig Drummond <craig.p.drummond@gmail.com>
5  *
6  */
7 /* This file is part of Clementine.
8    Copyright 2010, David Sansome <me@davidsansome.com>
9 
10    Clementine is free software: you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation, either version 3 of the License, or
13    (at your option) any later version.
14 
15    Clementine is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with Clementine.  If not, see <http://www.gnu.org/licenses/>.
22 */
23 
24 #include "ultimatelyricsprovider.h"
25 #include "network/networkaccessmanager.h"
26 #include <QTextCodec>
27 #include <QXmlStreamReader>
28 #include <QUrl>
29 #include <QUrlQuery>
30 #include <QDebug>
31 static bool debugEnabled=false;
32 #define DBUG if (debugEnabled) qWarning() << "Lyrics" << __FUNCTION__
enableDebug()33 void UltimateLyricsProvider::enableDebug()
34 {
35     debugEnabled=true;
36 }
37 
38 static const QString constArtistArg=QLatin1String("{Artist}");
39 static const QString constArtistLowerArg=QLatin1String("{artist}");
40 static const QString constArtistLowerNoSpaceArg=QLatin1String("{artist2}");
41 static const QString constArtistFirstCharArg=QLatin1String("{a}");
42 static const QString constAlbumArg=QLatin1String("{Album}");
43 static const QString constAlbumLowerArg=QLatin1String("{album}");
44 static const QString constAlbumLowerNoSpaceArg=QLatin1String("{album2}");
45 static const QString constTitleLowerArg=QLatin1String("{title}");
46 static const QString constTitleArg=QLatin1String("{Title}");
47 static const QString constTitleCaseArg=QLatin1String("{Title2}");
48 static const QString constYearArg=QLatin1String("{year}");
49 static const QString constTrackNoArg=QLatin1String("{track}");
50 
noSpace(const QString & text)51 static QString noSpace(const QString &text)
52 {
53     QString ret(text);
54     ret.remove(' ');
55     return ret;
56 }
57 
firstChar(const QString & text)58 static QString firstChar(const QString &text)
59 {
60     return text.isEmpty() ? text : text[0].toLower();
61 }
62 
titleCase(const QString & text)63 static QString titleCase(const QString &text)
64 {
65     if (0==text.length()) {
66         return QString();
67     }
68     if (1==text.length()) {
69         return text[0].toUpper();
70     }
71     return text[0].toUpper() + text.right(text.length() - 1).toLower();
72 }
73 
doTagReplace(QString str,const Song & song)74 static QString doTagReplace(QString str, const Song &song)
75 {
76     if (str.contains(QLatin1Char('{'))) {
77         QString artistFixed=song.basicArtist();
78         str.replace(constArtistArg, artistFixed);
79         str.replace(constArtistFirstCharArg, firstChar(artistFixed));
80         str.replace(constAlbumArg, song.album);
81         str.replace(constTitleArg, song.basicTitle());
82         str.replace(constYearArg, QString::number(song.year));
83         str.replace(constTrackNoArg, QString::number(song.track));
84     }
85     return str;
86 }
87 
extract(const QString & source,const QString & begin,const QString & end,bool isTag=false)88 static QString extract(const QString &source, const QString &begin, const QString &end, bool isTag=false)
89 {
90     DBUG << "Looking for" << begin << end;
91     int beginIdx = source.indexOf(begin, 0, Qt::CaseInsensitive);
92     bool skipTagClose=false;
93 
94     if (-1==beginIdx && isTag) {
95         beginIdx = source.indexOf(QString(begin).remove(">"), 0, Qt::CaseInsensitive);
96         skipTagClose=true;
97     }
98     if (-1==beginIdx) {
99         DBUG << "Failed to find begin";
100         return QString();
101     }
102     if (skipTagClose) {
103         int closeIdx=source.indexOf(">", beginIdx);
104         if (-1!=closeIdx) {
105             beginIdx=closeIdx+1;
106         } else {
107             beginIdx += begin.length();
108         }
109     } else {
110         beginIdx += begin.length();
111     }
112 
113     int endIdx = source.indexOf(end, beginIdx, Qt::CaseInsensitive);
114     if (-1==endIdx && QLatin1String("null")!=end) {
115         DBUG << "Failed to find end";
116         return QString();
117     }
118 
119     DBUG << "Found match";
120     return source.mid(beginIdx, endIdx - beginIdx - 1);
121 }
122 
extractXmlTag(const QString & source,const QString & tag)123 static QString extractXmlTag(const QString &source, const QString &tag)
124 {
125     DBUG << "Looking for" << tag;
126     QRegExp re("<(\\w+).*>"); // ಠ_ಠ
127     if (-1==re.indexIn(tag)) {
128         DBUG << "Failed to find tag";
129         return QString();
130     }
131 
132     DBUG << "Found match";
133     return extract(source, tag, "</" + re.cap(1) + ">", true);
134 }
135 
exclude(const QString & source,const QString & begin,const QString & end)136 static QString exclude(const QString &source, const QString &begin, const QString &end)
137 {
138     int beginIdx = source.indexOf(begin, 0, Qt::CaseInsensitive);
139     if (-1==beginIdx) {
140         return source;
141     }
142 
143     int endIdx = source.indexOf(end, beginIdx + begin.length(), Qt::CaseInsensitive);
144     if (-1==endIdx) {
145         return source;
146     }
147 
148     return source.left(beginIdx) + source.right(source.length() - endIdx - end.length());
149 }
150 
excludeXmlTag(const QString & source,const QString & tag)151 static QString excludeXmlTag(const QString &source, const QString &tag)
152 {
153     QRegExp re("<(\\w+).*>"); // ಠ_ಠ
154     if (-1==re.indexIn(tag)) {
155         return source;
156     }
157 
158     return exclude(source, tag, "</" + re.cap(1) + ">");
159 }
160 
applyExtractRule(const UltimateLyricsProvider::Rule & rule,QString & content,const Song & song)161 static void applyExtractRule(const UltimateLyricsProvider::Rule &rule, QString &content, const Song &song)
162 {
163     for (const UltimateLyricsProvider::RuleItem &item: rule) {
164         if (item.second.isNull()) {
165             content = extractXmlTag(content, doTagReplace(item.first, song));
166         } else {
167             content = extract(content, doTagReplace(item.first, song), doTagReplace(item.second, song));
168         }
169     }
170 }
171 
applyExcludeRule(const UltimateLyricsProvider::Rule & rule,QString & content,const Song & song)172 static void applyExcludeRule(const UltimateLyricsProvider::Rule &rule, QString &content, const Song &song)
173 {
174     for (const UltimateLyricsProvider::RuleItem &item: rule) {
175         if (item.second.isNull()) {
176             content = excludeXmlTag(content, doTagReplace(item.first, song));
177         } else {
178             content = exclude(content, doTagReplace(item.first, song), doTagReplace(item.second, song));
179         }
180     }
181 }
182 
urlEncode(QString str)183 static QString urlEncode(QString str)
184 {
185     str.replace(QLatin1Char('&'), QLatin1String("%26"));
186     str.replace(QLatin1Char('?'), QLatin1String("%3f"));
187     str.replace(QLatin1Char('+'), QLatin1String("%2b"));
188     return str;
189 }
190 
UltimateLyricsProvider()191 UltimateLyricsProvider::UltimateLyricsProvider()
192     : enabled(true)
193     , relevance(0)
194 {
195 }
196 
~UltimateLyricsProvider()197 UltimateLyricsProvider::~UltimateLyricsProvider()
198 {
199     abort();
200 }
201 
displayName() const202 QString UltimateLyricsProvider::displayName() const
203 {
204     QString n(name);
205     n.replace("(POLISH)", tr("(Polish Translations)"));
206     n.replace("(PORTUGUESE)", tr("(Portuguese Translations)"));
207     return n;
208 }
209 
fetchInfo(int id,const Song & metadata)210 void UltimateLyricsProvider::fetchInfo(int id, const Song &metadata)
211 {
212     const QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1().constData());
213     if (!codec) {
214         emit lyricsReady(id, QString());
215         return;
216     }
217 
218     QString artistFixed=metadata.basicArtist();
219     QString titleFixed=metadata.basicTitle();
220     QString urlText(url);
221 
222     if (QLatin1String("lyrics.wikia.com")==name) {
223         QUrl url(urlText);
224         QUrlQuery query;
225 
226         query.addQueryItem(QLatin1String("artist"), artistFixed);
227         query.addQueryItem(QLatin1String("song"), titleFixed);
228         query.addQueryItem(QLatin1String("func"), QLatin1String("getSong"));
229         query.addQueryItem(QLatin1String("fmt"), QLatin1String("xml"));
230         url.setQuery(query);
231 
232         NetworkJob *reply = NetworkAccessManager::self()->get(url);
233         requests[reply] = id;
234         connect(reply, SIGNAL(finished()), this, SLOT(wikiMediaSearchResponse()));
235         return;
236     }
237 
238     songs.insert(id, metadata);
239 
240     // Fill in fields in the URL
241     bool urlContainsDetails=urlText.contains(QLatin1Char('{'));
242     if (urlContainsDetails) {
243         doUrlReplace(constArtistArg, artistFixed, urlText);
244         doUrlReplace(constArtistLowerArg, artistFixed.toLower(), urlText);
245         doUrlReplace(constArtistLowerNoSpaceArg, noSpace(artistFixed.toLower()), urlText);
246         doUrlReplace(constArtistFirstCharArg, firstChar(artistFixed), urlText);
247         doUrlReplace(constAlbumArg, metadata.album, urlText);
248         doUrlReplace(constAlbumLowerArg, metadata.album.toLower(), urlText);
249         doUrlReplace(constAlbumLowerNoSpaceArg, noSpace(metadata.album.toLower()), urlText);
250         doUrlReplace(constTitleArg, titleFixed, urlText);
251         doUrlReplace(constTitleLowerArg, titleFixed.toLower(), urlText);
252         doUrlReplace(constTitleCaseArg, titleCase(titleFixed), urlText);
253         doUrlReplace(constYearArg, QString::number(metadata.year), urlText);
254         doUrlReplace(constTrackNoArg, QString::number(metadata.track), urlText);
255     }
256 
257     // For some reason Qt messes up the ? -> %3F and & -> %26 conversions - by placing 25 after the %
258     // So, try and revert this...
259     QUrl url(urlText);
260 
261     if (urlContainsDetails) {
262         QByteArray data=url.toEncoded();
263         data.replace("%253F", "%3F");
264         data.replace("%253f", "%3f");
265         data.replace("%2526", "%26");
266         url=QUrl::fromEncoded(data, QUrl::StrictMode);
267     }
268 
269     QNetworkRequest req(url);
270     req.setRawHeader("User-Agent", "Mozilla/5.0 (X11; Linux i686; rv:6.0) Gecko/20100101 Firefox/6.0");
271     NetworkJob *reply = NetworkAccessManager::self()->get(req);
272     requests[reply] = id;
273     connect(reply, SIGNAL(finished()), this, SLOT(lyricsFetched()));
274 }
275 
abort()276 void UltimateLyricsProvider::abort()
277 {
278     QHash<NetworkJob *, int>::ConstIterator it(requests.constBegin());
279     QHash<NetworkJob *, int>::ConstIterator end(requests.constEnd());
280 
281     for (; it!=end; ++it) {
282         it.key()->cancelAndDelete();
283     }
284     requests.clear();
285     songs.clear();
286 }
287 
wikiMediaSearchResponse()288 void UltimateLyricsProvider::wikiMediaSearchResponse()
289 {
290     NetworkJob *reply = qobject_cast<NetworkJob*>(sender());
291     if (!reply) {
292         return;
293     }
294 
295     int id = requests.take(reply);
296     reply->deleteLater();
297 
298     if (!reply->ok()) {
299         emit lyricsReady(id, QString());
300         return;
301     }
302 
303     QUrl url;
304     QXmlStreamReader doc(reply->actualJob());
305     while (!doc.atEnd()) {
306         doc.readNext();
307         if (doc.isStartElement() && QLatin1String("url")==doc.name()) {
308             QString lyricsUrl=doc.readElementText();
309             if (!lyricsUrl.contains(QLatin1String("action=edit"))) {
310                 url=QUrl::fromEncoded(lyricsUrl.toUtf8()).toString();
311             }
312             break;
313         }
314     }
315 
316     if (url.isValid()) {
317         QString path=url.path();
318         QByteArray u=url.scheme().toLatin1()+"://"+url.host().toLatin1()+"/api.php?action=query&prop=revisions&rvprop=content&format=xml&titles=";
319         QByteArray titles=QUrl::toPercentEncoding(path.startsWith(QLatin1Char('/')) ? path.mid(1) : path).replace('+', "%2b");
320         NetworkJob *reply = NetworkAccessManager::self()->get(QUrl::fromEncoded(u+titles));
321         requests[reply] = id;
322         connect(reply, SIGNAL(finished()), this, SLOT(wikiMediaLyricsFetched()));
323     } else {
324         emit lyricsReady(id, QString());
325     }
326 }
327 
wikiMediaLyricsFetched()328 void UltimateLyricsProvider::wikiMediaLyricsFetched()
329 {
330     NetworkJob *reply = qobject_cast<NetworkJob*>(sender());
331     if (!reply) {
332         return;
333     }
334 
335     int id = requests.take(reply);
336     reply->deleteLater();
337 
338     if (!reply->ok()) {
339         emit lyricsReady(id, QString());
340         return;
341     }
342 
343     const QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1().constData());
344     QString contents = codec->toUnicode(reply->readAll()).replace("<br />", "<br/>");
345     DBUG << name << "response" << contents;
346     emit lyricsReady(id, extract(contents, QLatin1String("&lt;lyrics&gt;"), QLatin1String("&lt;/lyrics&gt;")));
347 }
348 
lyricsFetched()349 void UltimateLyricsProvider::lyricsFetched()
350 {
351     NetworkJob *reply = qobject_cast<NetworkJob*>(sender());
352     if (!reply) {
353         return;
354     }
355 
356     int id = requests.take(reply);
357     reply->deleteLater();
358     Song song=songs.take(id);
359 
360     if (!reply->ok()) {
361         //emit Finished(id);
362         emit lyricsReady(id, QString());
363         return;
364     }
365 
366     const QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1().constData());
367     const QString originalContent = codec->toUnicode(reply->readAll()).replace("<br />", "<br/>");
368 
369     DBUG << name << "response" << originalContent;
370     // Check for invalid indicators
371     for (const QString &indicator: invalidIndicators) {
372         if (originalContent.contains(indicator)) {
373             //emit Finished(id);
374             DBUG << name << "invalid";
375             emit lyricsReady(id, QString());
376             return;
377         }
378     }
379 
380     QString lyrics;
381 
382     // Apply extract rules
383     for (const Rule& rule: extractRules) {
384         QString content = originalContent;
385         applyExtractRule(rule, content, song);
386         #ifndef Q_OS_WIN
387         content.replace(QLatin1String("\r"), QLatin1String(""));
388         #endif
389         content=content.trimmed();
390 
391         if (!content.isEmpty()) {
392             lyrics = content;
393             break;
394         }
395     }
396 
397     // Apply exclude rules
398     for (const Rule& rule: excludeRules) {
399         applyExcludeRule(rule, lyrics, song);
400     }
401 
402     lyrics=lyrics.trimmed();
403     lyrics.replace("<br/>\n", "<br/>");
404     lyrics.replace("<br>\n", "<br/>");
405     DBUG << name << (lyrics.isEmpty() ? "empty" : "succeeded");
406     emit lyricsReady(id, lyrics);
407 }
408 
doUrlReplace(const QString & tag,const QString & value,QString & u) const409 void UltimateLyricsProvider::doUrlReplace(const QString &tag, const QString &value, QString &u) const
410 {
411     if (!u.contains(tag)) {
412         return;
413     }
414 
415     // Apply URL character replacement
416     QString valueCopy(value);
417     for (const UltimateLyricsProvider::UrlFormat& format: urlFormats) {
418         QRegExp re("[" + QRegExp::escape(format.first) + "]");
419         valueCopy.replace(re, format.second);
420     }
421     u.replace(tag, urlEncode(valueCopy), Qt::CaseInsensitive);
422 }
423 
424 #include "moc_ultimatelyricsprovider.cpp"
425