1 /* StreamParser.cpp */
2 
3 /* Copyright (C) 2011-2020 Michael Lugmair (Lucio Carreras)
4  *
5  * This file is part of sayonara player
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11 
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16 
17  * You should have received a copy of the GNU General Public License
18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "StreamParser.h"
22 #include "Utils/Utils.h"
23 #include "Utils/Algorithm.h"
24 #include "Utils/MetaData/MetaData.h"
25 #include "Utils/MetaData/MetaDataList.h"
26 #include "Utils/FileUtils.h"
27 #include "Utils/WebAccess/AsyncWebAccess.h"
28 #include "Utils/WebAccess/IcyWebAccess.h"
29 #include "Utils/Parser/PlaylistParser.h"
30 #include "Utils/Parser/PodcastParser.h"
31 #include "Utils/Logger/Logger.h"
32 #include "Utils/StandardPaths.h"
33 
34 #include <QFile>
35 #include <QDir>
36 #include <QUrl>
37 
38 namespace Algorithm=Util::Algorithm;
39 
40 struct StreamParser::Private
41 {
42 	// If an url leads me to some website content and I have to parse it
43 	// and this Url is found again during parsing, it cannot be a stream
44 	// and so, it cannot be a metadata object
45 	QStringList		forbiddenUrls;
46 	QString			stationName;
47 	QString			lastUrl;
48 	QString			coverUrl;
49 	MetaDataList	tracks;
50 	QStringList 	urls;
51 	AsyncWebAccess* activeAwa=nullptr;
52 	IcyWebAccess*	activeIcy=nullptr;
53 	const int		MaxSizeUrls=5000;
54 	int				timeout;
55 	bool			stopped;
56 
PrivateStreamParser::Private57 	Private() :
58 		timeout(5000),
59 		stopped(false)
60 	{}
61 
isUrlForbiddenStreamParser::Private62 	bool isUrlForbidden(const QUrl& url) const
63 	{
64 		for(const QString& fu : forbiddenUrls)
65 		{
66 			const QUrl forbiddenUrl(fu);
67 			const QString forbidden_host = forbiddenUrl.host();
68 
69 			if ((forbidden_host.compare(url.host(), Qt::CaseInsensitive) == 0) &&
70 				(forbiddenUrl.port(80) == url.port(80)) &&
71 				(forbiddenUrl.path().compare(url.path()) == 0) &&
72 				(forbiddenUrl.fileName().compare(url.path()) == 0))
73 			{
74 				return true;
75 			}
76 		}
77 
78 		return false;
79 	}
80 
writePlaylistFileStreamParser::Private81 	QString writePlaylistFile(const QByteArray& data) const
82 	{
83 		QString extension = Util::File::getFileExtension(lastUrl);
84 		QString filename = Util::tempPath("ParsedPlaylist");
85 
86 		if(!extension.isEmpty()) {
87 			filename += "." + extension;
88 		}
89 
90 		Util::File::writeFile(data, filename);
91 
92 		return filename;
93 	}
94 };
95 
StreamParser(QObject * parent)96 StreamParser::StreamParser(QObject* parent) :
97 	QObject(parent)
98 {
99 	m = Pimpl::make<Private>();
100 }
101 
102 StreamParser::~StreamParser() = default;
103 
parse(const QString & stationName,const QString & stationUrl,int timeout)104 void StreamParser::parse(const QString& stationName, const QString& stationUrl, int timeout)
105 {
106 	m->stationName.clear();
107 
108 	if(!stationUrl.isEmpty())
109 	{
110 		m->stationName = stationName;
111 		QStringList urls{ stationUrl };
112 		parse(urls, timeout);
113 	}
114 }
115 
parse(const QStringList & urls,int timeout)116 void StreamParser::parse(const QStringList& urls, int timeout)
117 {
118 	m->timeout = timeout;
119 	m->stopped = false;
120 	m->tracks.clear();
121 
122 	m->urls = urls;
123 	m->urls.removeDuplicates();
124 
125 	if(m->urls.size() > m->MaxSizeUrls)
126 	{
127 		emit sigUrlCountExceeded(m->urls.size(), m->MaxSizeUrls);
128 	}
129 
130 	else
131 	{
132 		parseNextUrl();
133 	}
134 }
135 
136 
parseNextUrl()137 bool StreamParser::parseNextUrl()
138 {
139 	if(m->stopped)
140 	{
141 		emit sigStopped();
142 		return false;
143 	}
144 
145 	if(m->urls.isEmpty())
146 	{
147 		spLog(Log::Develop, this) << "No more urls to parse";
148 		emit sigFinished( !m->tracks.empty());
149 		return false;
150 	}
151 
152 	m->activeAwa = new AsyncWebAccess(this);
153 	m->activeAwa->setBehavior(AsyncWebAccess::Behavior::AsSayonara);
154 
155 	connect(m->activeAwa, &AsyncWebAccess::sigFinished, this, &StreamParser::awaFinished);
156 
157 	const QString url = m->urls.takeFirst();
158 	m->activeAwa->run(url, m->timeout);
159 
160 	return true;
161 }
162 
163 
awaFinished()164 void StreamParser::awaFinished()
165 {
166 	auto* awa = dynamic_cast<AsyncWebAccess*>(sender());
167 
168 	AsyncWebAccess::Status status = awa->status();
169 	m->lastUrl = awa->url();
170 	m->activeAwa = nullptr;
171 
172 	if(m->stopped)
173 	{
174 		awa->deleteLater();
175 		emit sigStopped();
176 		return;
177 	}
178 
179 	switch(status)
180 	{
181 		case AsyncWebAccess::Status::GotData:
182 		{
183 			m->forbiddenUrls << m->lastUrl;
184 			spLog(Log::Develop, this) << "Got data. Try to parse content";
185 
186 			QPair<MetaDataList, PlaylistFiles> result = parseContent(awa->data());
187 
188 			m->tracks << result.first;
189 			m->urls << result.second;
190 
191 			m->tracks.removeDuplicates();
192 			m->urls.removeDuplicates();
193 		} break;
194 
195 		case AsyncWebAccess::Status::NoHttp:
196 		{
197 			spLog(Log::Develop, this) << "No correct http was found. Maybe Icy?";
198 
199 			auto* iwa = new IcyWebAccess(this);
200 			m->activeIcy = iwa;
201 			connect(iwa, &IcyWebAccess::sigFinished, this, &StreamParser::icyFinished);
202 			iwa->check(QUrl(m->lastUrl));
203 
204 			awa->deleteLater();
205 		} return;
206 
207 		case AsyncWebAccess::Status::AudioStream:
208 		{
209 			spLog(Log::Develop, this) << "Found audio stream";
210 			MetaData md;
211 			setMetadataTag(md, m->lastUrl, m->coverUrl);
212 
213 			m->tracks << md;
214 			m->tracks.removeDuplicates();
215 		} break;
216 
217 		default:
218 			spLog(Log::Develop, this) << "Web Access finished: " << int(status);
219 	}
220 
221 	awa->deleteLater();
222 
223 	if(m->urls.size() > m->MaxSizeUrls){
224 		emit sigUrlCountExceeded(m->urls.size(), m->MaxSizeUrls);
225 	}
226 
227 	else {
228 		parseNextUrl();
229 	}
230 }
231 
232 
icyFinished()233 void StreamParser::icyFinished()
234 {
235 	auto* iwa = dynamic_cast<IcyWebAccess*>(sender());
236 	IcyWebAccess::Status status = iwa->status();
237 	m->activeIcy = nullptr;
238 
239 	if(m->stopped){
240 		iwa->deleteLater();
241 		emit sigStopped();
242 		return;
243 	}
244 
245 	if(status == IcyWebAccess::Status::Success)
246 	{
247 		spLog(Log::Develop, this) << "Stream is icy stream";
248 		MetaData md;
249 		setMetadataTag(md, m->lastUrl, m->coverUrl);
250 
251 		m->tracks << md;
252 		m->tracks.removeDuplicates();
253 	}
254 
255 	else {
256 		spLog(Log::Develop, this) << "Stream is no icy stream";
257 	}
258 
259 	iwa->deleteLater();
260 
261 	parseNextUrl();
262 }
263 
264 
parseContent(const QByteArray & data) const265 QPair<MetaDataList, PlaylistFiles> StreamParser::parseContent(const QByteArray& data) const
266 {
267 	QPair<MetaDataList, PlaylistFiles> result;
268 
269 	spLog(Log::Crazy, this) << QString::fromUtf8(data);
270 
271 	/** 1. try if podcast file **/
272 	result.first = PodcastParser::parsePodcastXmlFile(data);
273 
274 	/** 2. try if playlist file **/
275 	if(result.first.isEmpty())
276 	{
277 		const QString filename = m->writePlaylistFile(data);
278 		result.first = PlaylistParser::parsePlaylist(filename);
279 		QFile::remove(filename);
280 	}
281 
282 	if(result.first.isEmpty())
283 	{
284 		result = parseWebsite(data);
285 	}
286 
287 	else
288 	{
289 		for(MetaData& md : m->tracks)
290 		{
291 			setMetadataTag(md, m->lastUrl, m->coverUrl);
292 		}
293 	}
294 
295 	return result;
296 }
297 
parseWebsite(const QByteArray & arr) const298 QPair<MetaDataList, PlaylistFiles> StreamParser::parseWebsite(const QByteArray& arr) const
299 {
300 	MetaDataList tracks;
301 	QStringList playlistFiles;
302 
303 	QStringList validExtensions;
304 	validExtensions << Util::soundfileExtensions(false);
305 	validExtensions << Util::playlistExtensions(false);
306 
307 	const QString rePrefix = "(http[s]*://|\"/|'/)";
308 	const QString rePath = "\\S+\\.(" + validExtensions.join("|") + ")";
309 	const QString reString = "(" + rePrefix + rePath + ")";
310 
311 	QRegExp regExp(reString);
312 	const QUrl parentUrl(m->lastUrl);
313 
314 	const QString website = QString::fromUtf8(arr);
315 	int idx = regExp.indexIn(website);
316 
317 	QStringList foundUrls;
318 	while(idx >= 0)
319 	{
320 		const QStringList foundStrings = regExp.capturedTexts();
321 		for(QString str : foundStrings)
322 		{
323 			if((str.size() > 7) && !m->isUrlForbidden(QUrl(str)))
324 			{
325 				if(str.startsWith("\"") || str.startsWith("'"))
326 				{
327 					str.remove(0, 1);
328 				}
329 
330 				foundUrls << str;
331 			}
332 		}
333 
334 		idx = regExp.indexIn(website, idx + 1);
335 	}
336 
337 	foundUrls.removeDuplicates();
338 
339 	for(const QString& foundUrl : Algorithm::AsConst(foundUrls))
340 	{
341 		QUrl url(foundUrl);
342 		if(url.isRelative())
343 		{
344 			url.setScheme(parentUrl.scheme());
345 			url.setHost(parentUrl.host());
346 		}
347 
348 		if(Util::File::isPlaylistFile(foundUrl))
349 		{
350 			playlistFiles << foundUrl;
351 		}
352 
353 		else if(Util::File::isSoundFile(foundUrl))
354 		{
355 			MetaData track;
356 			setMetadataTag(track, url.toString());
357 
358 			const auto filename = Util::File::getFilenameOfPath(url.path());
359 			if(!filename.trimmed().isEmpty())
360 			{
361 				track.setTitle(filename);
362 			}
363 
364 			track.setCoverDownloadUrls({m->lastUrl});
365 			tracks << track;
366 		}
367 	}
368 
369 	spLog(Log::Develop, this) << "Found " << m->urls.size() << " playlists and " << tracks.size() << " streams";
370 
371 	return QPair<MetaDataList, PlaylistFiles>(tracks, playlistFiles);
372 }
373 
setMetadataTag(MetaData & md,const QString & streamUrl,const QString & coverUrl) const374 void StreamParser::setMetadataTag(MetaData& md, const QString& streamUrl, const QString& coverUrl) const
375 {
376 	md.setRadioStation(streamUrl, m->stationName);
377 
378 	if(md.filepath().trimmed().isEmpty()) {
379 		md.setFilepath(streamUrl);
380 	}
381 
382 	if(!coverUrl.isEmpty()) {
383 		md.setCoverDownloadUrls({coverUrl});
384 	}
385 }
386 
tracks() const387 MetaDataList StreamParser::tracks() const
388 {
389 	return m->tracks;
390 }
391 
setCoverUrl(const QString & coverUrl)392 void StreamParser::setCoverUrl(const QString& coverUrl)
393 {
394 	m->coverUrl = coverUrl;
395 
396 	for(MetaData& md : m->tracks){
397 		md.setCoverDownloadUrls({coverUrl});
398 	}
399 }
400 
stop()401 void StreamParser::stop()
402 {
403 	m->stopped = true;
404 
405 	if(m->activeAwa)
406 	{
407 		AsyncWebAccess* awa = m->activeAwa;
408 		m->activeAwa = nullptr;
409 		awa->stop();
410 	}
411 
412 	if(m->activeIcy)
413 	{
414 		IcyWebAccess* icy = m->activeIcy;
415 		m->activeIcy = nullptr;
416 		icy->stop();
417 	}
418 }
419 
isStopped() const420 bool StreamParser::isStopped() const
421 {
422 	return m->stopped;
423 }
424