1 /* StreamParser.cpp */
2
3 /* Copyright (C) 2011-2020 Michael Lugmair (Lucio Carreras)
4 *
5 * This file is part of sayonara player
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "StreamParser.h"
22 #include "Utils/Utils.h"
23 #include "Utils/Algorithm.h"
24 #include "Utils/MetaData/MetaData.h"
25 #include "Utils/MetaData/MetaDataList.h"
26 #include "Utils/FileUtils.h"
27 #include "Utils/WebAccess/AsyncWebAccess.h"
28 #include "Utils/WebAccess/IcyWebAccess.h"
29 #include "Utils/Parser/PlaylistParser.h"
30 #include "Utils/Parser/PodcastParser.h"
31 #include "Utils/Logger/Logger.h"
32 #include "Utils/StandardPaths.h"
33
34 #include <QFile>
35 #include <QDir>
36 #include <QUrl>
37
38 namespace Algorithm=Util::Algorithm;
39
40 struct StreamParser::Private
41 {
42 // If an url leads me to some website content and I have to parse it
43 // and this Url is found again during parsing, it cannot be a stream
44 // and so, it cannot be a metadata object
45 QStringList forbiddenUrls;
46 QString stationName;
47 QString lastUrl;
48 QString coverUrl;
49 MetaDataList tracks;
50 QStringList urls;
51 AsyncWebAccess* activeAwa=nullptr;
52 IcyWebAccess* activeIcy=nullptr;
53 const int MaxSizeUrls=5000;
54 int timeout;
55 bool stopped;
56
PrivateStreamParser::Private57 Private() :
58 timeout(5000),
59 stopped(false)
60 {}
61
isUrlForbiddenStreamParser::Private62 bool isUrlForbidden(const QUrl& url) const
63 {
64 for(const QString& fu : forbiddenUrls)
65 {
66 const QUrl forbiddenUrl(fu);
67 const QString forbidden_host = forbiddenUrl.host();
68
69 if ((forbidden_host.compare(url.host(), Qt::CaseInsensitive) == 0) &&
70 (forbiddenUrl.port(80) == url.port(80)) &&
71 (forbiddenUrl.path().compare(url.path()) == 0) &&
72 (forbiddenUrl.fileName().compare(url.path()) == 0))
73 {
74 return true;
75 }
76 }
77
78 return false;
79 }
80
writePlaylistFileStreamParser::Private81 QString writePlaylistFile(const QByteArray& data) const
82 {
83 QString extension = Util::File::getFileExtension(lastUrl);
84 QString filename = Util::tempPath("ParsedPlaylist");
85
86 if(!extension.isEmpty()) {
87 filename += "." + extension;
88 }
89
90 Util::File::writeFile(data, filename);
91
92 return filename;
93 }
94 };
95
StreamParser(QObject * parent)96 StreamParser::StreamParser(QObject* parent) :
97 QObject(parent)
98 {
99 m = Pimpl::make<Private>();
100 }
101
102 StreamParser::~StreamParser() = default;
103
parse(const QString & stationName,const QString & stationUrl,int timeout)104 void StreamParser::parse(const QString& stationName, const QString& stationUrl, int timeout)
105 {
106 m->stationName.clear();
107
108 if(!stationUrl.isEmpty())
109 {
110 m->stationName = stationName;
111 QStringList urls{ stationUrl };
112 parse(urls, timeout);
113 }
114 }
115
parse(const QStringList & urls,int timeout)116 void StreamParser::parse(const QStringList& urls, int timeout)
117 {
118 m->timeout = timeout;
119 m->stopped = false;
120 m->tracks.clear();
121
122 m->urls = urls;
123 m->urls.removeDuplicates();
124
125 if(m->urls.size() > m->MaxSizeUrls)
126 {
127 emit sigUrlCountExceeded(m->urls.size(), m->MaxSizeUrls);
128 }
129
130 else
131 {
132 parseNextUrl();
133 }
134 }
135
136
parseNextUrl()137 bool StreamParser::parseNextUrl()
138 {
139 if(m->stopped)
140 {
141 emit sigStopped();
142 return false;
143 }
144
145 if(m->urls.isEmpty())
146 {
147 spLog(Log::Develop, this) << "No more urls to parse";
148 emit sigFinished( !m->tracks.empty());
149 return false;
150 }
151
152 m->activeAwa = new AsyncWebAccess(this);
153 m->activeAwa->setBehavior(AsyncWebAccess::Behavior::AsSayonara);
154
155 connect(m->activeAwa, &AsyncWebAccess::sigFinished, this, &StreamParser::awaFinished);
156
157 const QString url = m->urls.takeFirst();
158 m->activeAwa->run(url, m->timeout);
159
160 return true;
161 }
162
163
awaFinished()164 void StreamParser::awaFinished()
165 {
166 auto* awa = dynamic_cast<AsyncWebAccess*>(sender());
167
168 AsyncWebAccess::Status status = awa->status();
169 m->lastUrl = awa->url();
170 m->activeAwa = nullptr;
171
172 if(m->stopped)
173 {
174 awa->deleteLater();
175 emit sigStopped();
176 return;
177 }
178
179 switch(status)
180 {
181 case AsyncWebAccess::Status::GotData:
182 {
183 m->forbiddenUrls << m->lastUrl;
184 spLog(Log::Develop, this) << "Got data. Try to parse content";
185
186 QPair<MetaDataList, PlaylistFiles> result = parseContent(awa->data());
187
188 m->tracks << result.first;
189 m->urls << result.second;
190
191 m->tracks.removeDuplicates();
192 m->urls.removeDuplicates();
193 } break;
194
195 case AsyncWebAccess::Status::NoHttp:
196 {
197 spLog(Log::Develop, this) << "No correct http was found. Maybe Icy?";
198
199 auto* iwa = new IcyWebAccess(this);
200 m->activeIcy = iwa;
201 connect(iwa, &IcyWebAccess::sigFinished, this, &StreamParser::icyFinished);
202 iwa->check(QUrl(m->lastUrl));
203
204 awa->deleteLater();
205 } return;
206
207 case AsyncWebAccess::Status::AudioStream:
208 {
209 spLog(Log::Develop, this) << "Found audio stream";
210 MetaData md;
211 setMetadataTag(md, m->lastUrl, m->coverUrl);
212
213 m->tracks << md;
214 m->tracks.removeDuplicates();
215 } break;
216
217 default:
218 spLog(Log::Develop, this) << "Web Access finished: " << int(status);
219 }
220
221 awa->deleteLater();
222
223 if(m->urls.size() > m->MaxSizeUrls){
224 emit sigUrlCountExceeded(m->urls.size(), m->MaxSizeUrls);
225 }
226
227 else {
228 parseNextUrl();
229 }
230 }
231
232
icyFinished()233 void StreamParser::icyFinished()
234 {
235 auto* iwa = dynamic_cast<IcyWebAccess*>(sender());
236 IcyWebAccess::Status status = iwa->status();
237 m->activeIcy = nullptr;
238
239 if(m->stopped){
240 iwa->deleteLater();
241 emit sigStopped();
242 return;
243 }
244
245 if(status == IcyWebAccess::Status::Success)
246 {
247 spLog(Log::Develop, this) << "Stream is icy stream";
248 MetaData md;
249 setMetadataTag(md, m->lastUrl, m->coverUrl);
250
251 m->tracks << md;
252 m->tracks.removeDuplicates();
253 }
254
255 else {
256 spLog(Log::Develop, this) << "Stream is no icy stream";
257 }
258
259 iwa->deleteLater();
260
261 parseNextUrl();
262 }
263
264
parseContent(const QByteArray & data) const265 QPair<MetaDataList, PlaylistFiles> StreamParser::parseContent(const QByteArray& data) const
266 {
267 QPair<MetaDataList, PlaylistFiles> result;
268
269 spLog(Log::Crazy, this) << QString::fromUtf8(data);
270
271 /** 1. try if podcast file **/
272 result.first = PodcastParser::parsePodcastXmlFile(data);
273
274 /** 2. try if playlist file **/
275 if(result.first.isEmpty())
276 {
277 const QString filename = m->writePlaylistFile(data);
278 result.first = PlaylistParser::parsePlaylist(filename);
279 QFile::remove(filename);
280 }
281
282 if(result.first.isEmpty())
283 {
284 result = parseWebsite(data);
285 }
286
287 else
288 {
289 for(MetaData& md : m->tracks)
290 {
291 setMetadataTag(md, m->lastUrl, m->coverUrl);
292 }
293 }
294
295 return result;
296 }
297
parseWebsite(const QByteArray & arr) const298 QPair<MetaDataList, PlaylistFiles> StreamParser::parseWebsite(const QByteArray& arr) const
299 {
300 MetaDataList tracks;
301 QStringList playlistFiles;
302
303 QStringList validExtensions;
304 validExtensions << Util::soundfileExtensions(false);
305 validExtensions << Util::playlistExtensions(false);
306
307 const QString rePrefix = "(http[s]*://|\"/|'/)";
308 const QString rePath = "\\S+\\.(" + validExtensions.join("|") + ")";
309 const QString reString = "(" + rePrefix + rePath + ")";
310
311 QRegExp regExp(reString);
312 const QUrl parentUrl(m->lastUrl);
313
314 const QString website = QString::fromUtf8(arr);
315 int idx = regExp.indexIn(website);
316
317 QStringList foundUrls;
318 while(idx >= 0)
319 {
320 const QStringList foundStrings = regExp.capturedTexts();
321 for(QString str : foundStrings)
322 {
323 if((str.size() > 7) && !m->isUrlForbidden(QUrl(str)))
324 {
325 if(str.startsWith("\"") || str.startsWith("'"))
326 {
327 str.remove(0, 1);
328 }
329
330 foundUrls << str;
331 }
332 }
333
334 idx = regExp.indexIn(website, idx + 1);
335 }
336
337 foundUrls.removeDuplicates();
338
339 for(const QString& foundUrl : Algorithm::AsConst(foundUrls))
340 {
341 QUrl url(foundUrl);
342 if(url.isRelative())
343 {
344 url.setScheme(parentUrl.scheme());
345 url.setHost(parentUrl.host());
346 }
347
348 if(Util::File::isPlaylistFile(foundUrl))
349 {
350 playlistFiles << foundUrl;
351 }
352
353 else if(Util::File::isSoundFile(foundUrl))
354 {
355 MetaData track;
356 setMetadataTag(track, url.toString());
357
358 const auto filename = Util::File::getFilenameOfPath(url.path());
359 if(!filename.trimmed().isEmpty())
360 {
361 track.setTitle(filename);
362 }
363
364 track.setCoverDownloadUrls({m->lastUrl});
365 tracks << track;
366 }
367 }
368
369 spLog(Log::Develop, this) << "Found " << m->urls.size() << " playlists and " << tracks.size() << " streams";
370
371 return QPair<MetaDataList, PlaylistFiles>(tracks, playlistFiles);
372 }
373
setMetadataTag(MetaData & md,const QString & streamUrl,const QString & coverUrl) const374 void StreamParser::setMetadataTag(MetaData& md, const QString& streamUrl, const QString& coverUrl) const
375 {
376 md.setRadioStation(streamUrl, m->stationName);
377
378 if(md.filepath().trimmed().isEmpty()) {
379 md.setFilepath(streamUrl);
380 }
381
382 if(!coverUrl.isEmpty()) {
383 md.setCoverDownloadUrls({coverUrl});
384 }
385 }
386
tracks() const387 MetaDataList StreamParser::tracks() const
388 {
389 return m->tracks;
390 }
391
setCoverUrl(const QString & coverUrl)392 void StreamParser::setCoverUrl(const QString& coverUrl)
393 {
394 m->coverUrl = coverUrl;
395
396 for(MetaData& md : m->tracks){
397 md.setCoverDownloadUrls({coverUrl});
398 }
399 }
400
stop()401 void StreamParser::stop()
402 {
403 m->stopped = true;
404
405 if(m->activeAwa)
406 {
407 AsyncWebAccess* awa = m->activeAwa;
408 m->activeAwa = nullptr;
409 awa->stop();
410 }
411
412 if(m->activeIcy)
413 {
414 IcyWebAccess* icy = m->activeIcy;
415 m->activeIcy = nullptr;
416 icy->stop();
417 }
418 }
419
isStopped() const420 bool StreamParser::isStopped() const
421 {
422 return m->stopped;
423 }
424