1 /*
2 * Cantata
3 *
4 * Copyright (c) 2011-2020 Craig Drummond <craig.p.drummond@gmail.com>
5 *
6 */
7 /* This file is part of Clementine.
8 Copyright 2010, David Sansome <me@davidsansome.com>
9
10 Clementine is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 Clementine is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with Clementine. If not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "ultimatelyricsprovider.h"
25 #include "network/networkaccessmanager.h"
26 #include <QTextCodec>
27 #include <QXmlStreamReader>
28 #include <QUrl>
29 #include <QUrlQuery>
30 #include <QDebug>
31 static bool debugEnabled=false;
32 #define DBUG if (debugEnabled) qWarning() << "Lyrics" << __FUNCTION__
enableDebug()33 void UltimateLyricsProvider::enableDebug()
34 {
35 debugEnabled=true;
36 }
37
38 static const QString constArtistArg=QLatin1String("{Artist}");
39 static const QString constArtistLowerArg=QLatin1String("{artist}");
40 static const QString constArtistLowerNoSpaceArg=QLatin1String("{artist2}");
41 static const QString constArtistFirstCharArg=QLatin1String("{a}");
42 static const QString constAlbumArg=QLatin1String("{Album}");
43 static const QString constAlbumLowerArg=QLatin1String("{album}");
44 static const QString constAlbumLowerNoSpaceArg=QLatin1String("{album2}");
45 static const QString constTitleLowerArg=QLatin1String("{title}");
46 static const QString constTitleArg=QLatin1String("{Title}");
47 static const QString constTitleCaseArg=QLatin1String("{Title2}");
48 static const QString constYearArg=QLatin1String("{year}");
49 static const QString constTrackNoArg=QLatin1String("{track}");
50
noSpace(const QString & text)51 static QString noSpace(const QString &text)
52 {
53 QString ret(text);
54 ret.remove(' ');
55 return ret;
56 }
57
firstChar(const QString & text)58 static QString firstChar(const QString &text)
59 {
60 return text.isEmpty() ? text : text[0].toLower();
61 }
62
titleCase(const QString & text)63 static QString titleCase(const QString &text)
64 {
65 if (0==text.length()) {
66 return QString();
67 }
68 if (1==text.length()) {
69 return text[0].toUpper();
70 }
71 return text[0].toUpper() + text.right(text.length() - 1).toLower();
72 }
73
doTagReplace(QString str,const Song & song)74 static QString doTagReplace(QString str, const Song &song)
75 {
76 if (str.contains(QLatin1Char('{'))) {
77 QString artistFixed=song.basicArtist();
78 str.replace(constArtistArg, artistFixed);
79 str.replace(constArtistFirstCharArg, firstChar(artistFixed));
80 str.replace(constAlbumArg, song.album);
81 str.replace(constTitleArg, song.basicTitle());
82 str.replace(constYearArg, QString::number(song.year));
83 str.replace(constTrackNoArg, QString::number(song.track));
84 }
85 return str;
86 }
87
extract(const QString & source,const QString & begin,const QString & end,bool isTag=false)88 static QString extract(const QString &source, const QString &begin, const QString &end, bool isTag=false)
89 {
90 DBUG << "Looking for" << begin << end;
91 int beginIdx = source.indexOf(begin, 0, Qt::CaseInsensitive);
92 bool skipTagClose=false;
93
94 if (-1==beginIdx && isTag) {
95 beginIdx = source.indexOf(QString(begin).remove(">"), 0, Qt::CaseInsensitive);
96 skipTagClose=true;
97 }
98 if (-1==beginIdx) {
99 DBUG << "Failed to find begin";
100 return QString();
101 }
102 if (skipTagClose) {
103 int closeIdx=source.indexOf(">", beginIdx);
104 if (-1!=closeIdx) {
105 beginIdx=closeIdx+1;
106 } else {
107 beginIdx += begin.length();
108 }
109 } else {
110 beginIdx += begin.length();
111 }
112
113 int endIdx = source.indexOf(end, beginIdx, Qt::CaseInsensitive);
114 if (-1==endIdx && QLatin1String("null")!=end) {
115 DBUG << "Failed to find end";
116 return QString();
117 }
118
119 DBUG << "Found match";
120 return source.mid(beginIdx, endIdx - beginIdx - 1);
121 }
122
extractXmlTag(const QString & source,const QString & tag)123 static QString extractXmlTag(const QString &source, const QString &tag)
124 {
125 DBUG << "Looking for" << tag;
126 QRegExp re("<(\\w+).*>"); // ಠ_ಠ
127 if (-1==re.indexIn(tag)) {
128 DBUG << "Failed to find tag";
129 return QString();
130 }
131
132 DBUG << "Found match";
133 return extract(source, tag, "</" + re.cap(1) + ">", true);
134 }
135
exclude(const QString & source,const QString & begin,const QString & end)136 static QString exclude(const QString &source, const QString &begin, const QString &end)
137 {
138 int beginIdx = source.indexOf(begin, 0, Qt::CaseInsensitive);
139 if (-1==beginIdx) {
140 return source;
141 }
142
143 int endIdx = source.indexOf(end, beginIdx + begin.length(), Qt::CaseInsensitive);
144 if (-1==endIdx) {
145 return source;
146 }
147
148 return source.left(beginIdx) + source.right(source.length() - endIdx - end.length());
149 }
150
excludeXmlTag(const QString & source,const QString & tag)151 static QString excludeXmlTag(const QString &source, const QString &tag)
152 {
153 QRegExp re("<(\\w+).*>"); // ಠ_ಠ
154 if (-1==re.indexIn(tag)) {
155 return source;
156 }
157
158 return exclude(source, tag, "</" + re.cap(1) + ">");
159 }
160
applyExtractRule(const UltimateLyricsProvider::Rule & rule,QString & content,const Song & song)161 static void applyExtractRule(const UltimateLyricsProvider::Rule &rule, QString &content, const Song &song)
162 {
163 for (const UltimateLyricsProvider::RuleItem &item: rule) {
164 if (item.second.isNull()) {
165 content = extractXmlTag(content, doTagReplace(item.first, song));
166 } else {
167 content = extract(content, doTagReplace(item.first, song), doTagReplace(item.second, song));
168 }
169 }
170 }
171
applyExcludeRule(const UltimateLyricsProvider::Rule & rule,QString & content,const Song & song)172 static void applyExcludeRule(const UltimateLyricsProvider::Rule &rule, QString &content, const Song &song)
173 {
174 for (const UltimateLyricsProvider::RuleItem &item: rule) {
175 if (item.second.isNull()) {
176 content = excludeXmlTag(content, doTagReplace(item.first, song));
177 } else {
178 content = exclude(content, doTagReplace(item.first, song), doTagReplace(item.second, song));
179 }
180 }
181 }
182
urlEncode(QString str)183 static QString urlEncode(QString str)
184 {
185 str.replace(QLatin1Char('&'), QLatin1String("%26"));
186 str.replace(QLatin1Char('?'), QLatin1String("%3f"));
187 str.replace(QLatin1Char('+'), QLatin1String("%2b"));
188 return str;
189 }
190
UltimateLyricsProvider()191 UltimateLyricsProvider::UltimateLyricsProvider()
192 : enabled(true)
193 , relevance(0)
194 {
195 }
196
~UltimateLyricsProvider()197 UltimateLyricsProvider::~UltimateLyricsProvider()
198 {
199 abort();
200 }
201
displayName() const202 QString UltimateLyricsProvider::displayName() const
203 {
204 QString n(name);
205 n.replace("(POLISH)", tr("(Polish Translations)"));
206 n.replace("(PORTUGUESE)", tr("(Portuguese Translations)"));
207 return n;
208 }
209
fetchInfo(int id,const Song & metadata)210 void UltimateLyricsProvider::fetchInfo(int id, const Song &metadata)
211 {
212 const QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1().constData());
213 if (!codec) {
214 emit lyricsReady(id, QString());
215 return;
216 }
217
218 QString artistFixed=metadata.basicArtist();
219 QString titleFixed=metadata.basicTitle();
220 QString urlText(url);
221
222 if (QLatin1String("lyrics.wikia.com")==name) {
223 QUrl url(urlText);
224 QUrlQuery query;
225
226 query.addQueryItem(QLatin1String("artist"), artistFixed);
227 query.addQueryItem(QLatin1String("song"), titleFixed);
228 query.addQueryItem(QLatin1String("func"), QLatin1String("getSong"));
229 query.addQueryItem(QLatin1String("fmt"), QLatin1String("xml"));
230 url.setQuery(query);
231
232 NetworkJob *reply = NetworkAccessManager::self()->get(url);
233 requests[reply] = id;
234 connect(reply, SIGNAL(finished()), this, SLOT(wikiMediaSearchResponse()));
235 return;
236 }
237
238 songs.insert(id, metadata);
239
240 // Fill in fields in the URL
241 bool urlContainsDetails=urlText.contains(QLatin1Char('{'));
242 if (urlContainsDetails) {
243 doUrlReplace(constArtistArg, artistFixed, urlText);
244 doUrlReplace(constArtistLowerArg, artistFixed.toLower(), urlText);
245 doUrlReplace(constArtistLowerNoSpaceArg, noSpace(artistFixed.toLower()), urlText);
246 doUrlReplace(constArtistFirstCharArg, firstChar(artistFixed), urlText);
247 doUrlReplace(constAlbumArg, metadata.album, urlText);
248 doUrlReplace(constAlbumLowerArg, metadata.album.toLower(), urlText);
249 doUrlReplace(constAlbumLowerNoSpaceArg, noSpace(metadata.album.toLower()), urlText);
250 doUrlReplace(constTitleArg, titleFixed, urlText);
251 doUrlReplace(constTitleLowerArg, titleFixed.toLower(), urlText);
252 doUrlReplace(constTitleCaseArg, titleCase(titleFixed), urlText);
253 doUrlReplace(constYearArg, QString::number(metadata.year), urlText);
254 doUrlReplace(constTrackNoArg, QString::number(metadata.track), urlText);
255 }
256
257 // For some reason Qt messes up the ? -> %3F and & -> %26 conversions - by placing 25 after the %
258 // So, try and revert this...
259 QUrl url(urlText);
260
261 if (urlContainsDetails) {
262 QByteArray data=url.toEncoded();
263 data.replace("%253F", "%3F");
264 data.replace("%253f", "%3f");
265 data.replace("%2526", "%26");
266 url=QUrl::fromEncoded(data, QUrl::StrictMode);
267 }
268
269 QNetworkRequest req(url);
270 req.setRawHeader("User-Agent", "Mozilla/5.0 (X11; Linux i686; rv:6.0) Gecko/20100101 Firefox/6.0");
271 NetworkJob *reply = NetworkAccessManager::self()->get(req);
272 requests[reply] = id;
273 connect(reply, SIGNAL(finished()), this, SLOT(lyricsFetched()));
274 }
275
abort()276 void UltimateLyricsProvider::abort()
277 {
278 QHash<NetworkJob *, int>::ConstIterator it(requests.constBegin());
279 QHash<NetworkJob *, int>::ConstIterator end(requests.constEnd());
280
281 for (; it!=end; ++it) {
282 it.key()->cancelAndDelete();
283 }
284 requests.clear();
285 songs.clear();
286 }
287
wikiMediaSearchResponse()288 void UltimateLyricsProvider::wikiMediaSearchResponse()
289 {
290 NetworkJob *reply = qobject_cast<NetworkJob*>(sender());
291 if (!reply) {
292 return;
293 }
294
295 int id = requests.take(reply);
296 reply->deleteLater();
297
298 if (!reply->ok()) {
299 emit lyricsReady(id, QString());
300 return;
301 }
302
303 QUrl url;
304 QXmlStreamReader doc(reply->actualJob());
305 while (!doc.atEnd()) {
306 doc.readNext();
307 if (doc.isStartElement() && QLatin1String("url")==doc.name()) {
308 QString lyricsUrl=doc.readElementText();
309 if (!lyricsUrl.contains(QLatin1String("action=edit"))) {
310 url=QUrl::fromEncoded(lyricsUrl.toUtf8()).toString();
311 }
312 break;
313 }
314 }
315
316 if (url.isValid()) {
317 QString path=url.path();
318 QByteArray u=url.scheme().toLatin1()+"://"+url.host().toLatin1()+"/api.php?action=query&prop=revisions&rvprop=content&format=xml&titles=";
319 QByteArray titles=QUrl::toPercentEncoding(path.startsWith(QLatin1Char('/')) ? path.mid(1) : path).replace('+', "%2b");
320 NetworkJob *reply = NetworkAccessManager::self()->get(QUrl::fromEncoded(u+titles));
321 requests[reply] = id;
322 connect(reply, SIGNAL(finished()), this, SLOT(wikiMediaLyricsFetched()));
323 } else {
324 emit lyricsReady(id, QString());
325 }
326 }
327
wikiMediaLyricsFetched()328 void UltimateLyricsProvider::wikiMediaLyricsFetched()
329 {
330 NetworkJob *reply = qobject_cast<NetworkJob*>(sender());
331 if (!reply) {
332 return;
333 }
334
335 int id = requests.take(reply);
336 reply->deleteLater();
337
338 if (!reply->ok()) {
339 emit lyricsReady(id, QString());
340 return;
341 }
342
343 const QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1().constData());
344 QString contents = codec->toUnicode(reply->readAll()).replace("<br />", "<br/>");
345 DBUG << name << "response" << contents;
346 emit lyricsReady(id, extract(contents, QLatin1String("<lyrics>"), QLatin1String("</lyrics>")));
347 }
348
lyricsFetched()349 void UltimateLyricsProvider::lyricsFetched()
350 {
351 NetworkJob *reply = qobject_cast<NetworkJob*>(sender());
352 if (!reply) {
353 return;
354 }
355
356 int id = requests.take(reply);
357 reply->deleteLater();
358 Song song=songs.take(id);
359
360 if (!reply->ok()) {
361 //emit Finished(id);
362 emit lyricsReady(id, QString());
363 return;
364 }
365
366 const QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1().constData());
367 const QString originalContent = codec->toUnicode(reply->readAll()).replace("<br />", "<br/>");
368
369 DBUG << name << "response" << originalContent;
370 // Check for invalid indicators
371 for (const QString &indicator: invalidIndicators) {
372 if (originalContent.contains(indicator)) {
373 //emit Finished(id);
374 DBUG << name << "invalid";
375 emit lyricsReady(id, QString());
376 return;
377 }
378 }
379
380 QString lyrics;
381
382 // Apply extract rules
383 for (const Rule& rule: extractRules) {
384 QString content = originalContent;
385 applyExtractRule(rule, content, song);
386 #ifndef Q_OS_WIN
387 content.replace(QLatin1String("\r"), QLatin1String(""));
388 #endif
389 content=content.trimmed();
390
391 if (!content.isEmpty()) {
392 lyrics = content;
393 break;
394 }
395 }
396
397 // Apply exclude rules
398 for (const Rule& rule: excludeRules) {
399 applyExcludeRule(rule, lyrics, song);
400 }
401
402 lyrics=lyrics.trimmed();
403 lyrics.replace("<br/>\n", "<br/>");
404 lyrics.replace("<br>\n", "<br/>");
405 DBUG << name << (lyrics.isEmpty() ? "empty" : "succeeded");
406 emit lyricsReady(id, lyrics);
407 }
408
doUrlReplace(const QString & tag,const QString & value,QString & u) const409 void UltimateLyricsProvider::doUrlReplace(const QString &tag, const QString &value, QString &u) const
410 {
411 if (!u.contains(tag)) {
412 return;
413 }
414
415 // Apply URL character replacement
416 QString valueCopy(value);
417 for (const UltimateLyricsProvider::UrlFormat& format: urlFormats) {
418 QRegExp re("[" + QRegExp::escape(format.first) + "]");
419 valueCopy.replace(re, format.second);
420 }
421 u.replace(tag, urlEncode(valueCopy), Qt::CaseInsensitive);
422 }
423
424 #include "moc_ultimatelyricsprovider.cpp"
425