1 /*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8
9 #include "Scraper.h"
10
11 #include "AddonManager.h"
12 #include "FileItem.h"
13 #include "ServiceBroker.h"
14 #include "URL.h"
15 #include "Util.h"
16 #include "addons/settings/AddonSettings.h"
17 #include "filesystem/CurlFile.h"
18 #include "filesystem/Directory.h"
19 #include "filesystem/File.h"
20 #include "filesystem/PluginDirectory.h"
21 #include "guilib/LocalizeStrings.h"
22 #include "music/Album.h"
23 #include "music/Artist.h"
24 #include "music/MusicDatabase.h"
25 #include "music/infoscanner/MusicAlbumInfo.h"
26 #include "music/infoscanner/MusicArtistInfo.h"
27 #include "settings/AdvancedSettings.h"
28 #include "settings/SettingsComponent.h"
29 #include "settings/SettingsValueFlatJsonSerializer.h"
30 #include "utils/CharsetConverter.h"
31 #include "utils/ScraperParser.h"
32 #include "utils/ScraperUrl.h"
33 #include "utils/StringUtils.h"
34 #include "utils/URIUtils.h"
35 #include "utils/XMLUtils.h"
36 #include "utils/log.h"
37 #include "video/VideoDatabase.h"
38
39 #include <algorithm>
40 #include <sstream>
41
42 #include <fstrcmp.h>
43
44 using namespace XFILE;
45 using namespace MUSIC_GRABBER;
46 using namespace VIDEO;
47
48 namespace ADDON
49 {
50
51 typedef struct
52 {
53 const char *name;
54 CONTENT_TYPE type;
55 int pretty;
56 } ContentMapping;
57
58 static const ContentMapping content[] = {{"unknown", CONTENT_NONE, 231},
59 {"albums", CONTENT_ALBUMS, 132},
60 {"music", CONTENT_ALBUMS, 132},
61 {"artists", CONTENT_ARTISTS, 133},
62 {"movies", CONTENT_MOVIES, 20342},
63 {"tvshows", CONTENT_TVSHOWS, 20343},
64 {"musicvideos", CONTENT_MUSICVIDEOS, 20389}};
65
TranslateContent(const CONTENT_TYPE & type,bool pretty)66 std::string TranslateContent(const CONTENT_TYPE &type, bool pretty /*=false*/)
67 {
68 for (const ContentMapping& map : content)
69 {
70 if (type == map.type)
71 {
72 if (pretty && map.pretty)
73 return g_localizeStrings.Get(map.pretty);
74 else
75 return map.name;
76 }
77 }
78 return "";
79 }
80
TranslateContent(const std::string & string)81 CONTENT_TYPE TranslateContent(const std::string &string)
82 {
83 for (const ContentMapping& map : content)
84 {
85 if (string == map.name)
86 return map.type;
87 }
88 return CONTENT_NONE;
89 }
90
ScraperTypeFromContent(const CONTENT_TYPE & content)91 TYPE ScraperTypeFromContent(const CONTENT_TYPE &content)
92 {
93 switch (content)
94 {
95 case CONTENT_ALBUMS:
96 return ADDON_SCRAPER_ALBUMS;
97 case CONTENT_ARTISTS:
98 return ADDON_SCRAPER_ARTISTS;
99 case CONTENT_MOVIES:
100 return ADDON_SCRAPER_MOVIES;
101 case CONTENT_MUSICVIDEOS:
102 return ADDON_SCRAPER_MUSICVIDEOS;
103 case CONTENT_TVSHOWS:
104 return ADDON_SCRAPER_TVSHOWS;
105 default:
106 return ADDON_UNKNOWN;
107 }
108 }
109
110 // if the XML root is <error>, throw CScraperError with enclosed <title>/<message> values
CheckScraperError(const TiXmlElement * pxeRoot)111 static void CheckScraperError(const TiXmlElement *pxeRoot)
112 {
113 if (!pxeRoot || StringUtils::CompareNoCase(pxeRoot->Value(), "error"))
114 return;
115 std::string sTitle;
116 std::string sMessage;
117 XMLUtils::GetString(pxeRoot, "title", sTitle);
118 XMLUtils::GetString(pxeRoot, "message", sMessage);
119 throw CScraperError(sTitle, sMessage);
120 }
121
CScraper(const AddonInfoPtr & addonInfo,TYPE addonType)122 CScraper::CScraper(const AddonInfoPtr& addonInfo, TYPE addonType)
123 : CAddon(addonInfo, addonType)
124 , m_fLoaded(false)
125 , m_requiressettings(false)
126 , m_pathContent(CONTENT_NONE)
127 {
128 m_requiressettings = addonInfo->Type(addonType)->GetValue("@requiressettings").asBoolean();
129
130 CDateTimeSpan persistence;
131 std::string tmp = addonInfo->Type(addonType)->GetValue("@cachepersistence").asString();
132 if (!tmp.empty())
133 m_persistence.SetFromTimeString(tmp);
134
135 switch (addonType)
136 {
137 case ADDON_SCRAPER_ALBUMS:
138 m_pathContent = CONTENT_ALBUMS;
139 break;
140 case ADDON_SCRAPER_ARTISTS:
141 m_pathContent = CONTENT_ARTISTS;
142 break;
143 case ADDON_SCRAPER_MOVIES:
144 m_pathContent = CONTENT_MOVIES;
145 break;
146 case ADDON_SCRAPER_MUSICVIDEOS:
147 m_pathContent = CONTENT_MUSICVIDEOS;
148 break;
149 case ADDON_SCRAPER_TVSHOWS:
150 m_pathContent = CONTENT_TVSHOWS;
151 break;
152 default:
153 break;
154 }
155
156 m_isPython = URIUtils::GetExtension(addonInfo->Type(addonType)->LibPath()) == ".py";
157 }
158
Supports(const CONTENT_TYPE & content) const159 bool CScraper::Supports(const CONTENT_TYPE &content) const
160 {
161 return Type() == ScraperTypeFromContent(content);
162 }
163
SetPathSettings(CONTENT_TYPE content,const std::string & xml)164 bool CScraper::SetPathSettings(CONTENT_TYPE content, const std::string &xml)
165 {
166 m_pathContent = content;
167 if (!LoadSettings(false, false))
168 return false;
169
170 if (xml.empty())
171 return true;
172
173 CXBMCTinyXML doc;
174 doc.Parse(xml);
175 return SettingsFromXML(doc);
176 }
177
GetPathSettings()178 std::string CScraper::GetPathSettings()
179 {
180 if (!LoadSettings(false))
181 return "";
182
183 std::stringstream stream;
184 CXBMCTinyXML doc;
185 SettingsToXML(doc);
186 if (doc.RootElement())
187 stream << *doc.RootElement();
188
189 return stream.str();
190 }
191
ClearCache()192 void CScraper::ClearCache()
193 {
194 std::string strCachePath = URIUtils::AddFileToFolder(CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_cachePath, "scrapers");
195
196 // create scraper cache dir if needed
197 if (!CDirectory::Exists(strCachePath))
198 CDirectory::Create(strCachePath);
199
200 strCachePath = URIUtils::AddFileToFolder(strCachePath, ID());
201 URIUtils::AddSlashAtEnd(strCachePath);
202
203 if (CDirectory::Exists(strCachePath))
204 {
205 CFileItemList items;
206 CDirectory::GetDirectory(strCachePath, items, "", DIR_FLAG_DEFAULTS);
207 for (int i = 0; i < items.Size(); ++i)
208 {
209 // wipe cache
210 if (items[i]->m_dateTime + m_persistence <= CDateTime::GetCurrentDateTime())
211 CFile::Delete(items[i]->GetDynPath());
212 }
213 }
214 else
215 CDirectory::Create(strCachePath);
216 }
217
218 // returns a vector of strings: the first is the XML output by the function; the rest
219 // is XML output by chained functions, possibly recursively
220 // the CCurlFile object is passed in so that URL fetches can be canceled from other threads
221 // throws CScraperError abort on internal failures (e.g., parse errors)
Run(const std::string & function,const CScraperUrl & scrURL,CCurlFile & http,const std::vector<std::string> * extras)222 std::vector<std::string> CScraper::Run(const std::string &function,
223 const CScraperUrl &scrURL,
224 CCurlFile &http,
225 const std::vector<std::string> *extras)
226 {
227 if (!Load())
228 throw CScraperError();
229
230 std::string strXML = InternalRun(function, scrURL, http, extras);
231 if (strXML.empty())
232 {
233 if (function != "NfoUrl" && function != "ResolveIDToUrl")
234 CLog::Log(LOGERROR, "%s: Unable to parse web site", __FUNCTION__);
235 throw CScraperError();
236 }
237
238 CLog::Log(LOGDEBUG, "scraper: %s returned %s", function.c_str(), strXML.c_str());
239
240 CXBMCTinyXML doc;
241 /* all data was converted to UTF-8 before being processed by scraper */
242 doc.Parse(strXML, TIXML_ENCODING_UTF8);
243 if (!doc.RootElement())
244 {
245 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
246 throw CScraperError();
247 }
248
249 std::vector<std::string> result;
250 result.push_back(strXML);
251 TiXmlElement *xchain = doc.RootElement()->FirstChildElement();
252 // skip children of the root element until <url> or <chain>
253 while (xchain && strcmp(xchain->Value(), "url") && strcmp(xchain->Value(), "chain"))
254 xchain = xchain->NextSiblingElement();
255 while (xchain)
256 {
257 // <chain|url function="...">param</>
258 const char *szFunction = xchain->Attribute("function");
259 if (szFunction)
260 {
261 CScraperUrl scrURL2;
262 std::vector<std::string> extras;
263 // for <chain>, pass the contained text as a parameter; for <url>, as URL content
264 if (strcmp(xchain->Value(), "chain") == 0)
265 {
266 if (xchain->FirstChild())
267 extras.emplace_back(xchain->FirstChild()->Value());
268 }
269 else
270 scrURL2.ParseAndAppendUrl(xchain);
271 // Fix for empty chains. $$1 would still contain the
272 // previous value as there is no child of the xml node.
273 // since $$1 will always either contain the data from an
274 // url or the parameters to a chain, we can safely clear it here
275 // to fix this issue
276 m_parser.m_param[0].clear();
277 std::vector<std::string> result2 = RunNoThrow(szFunction, scrURL2, http, &extras);
278 result.insert(result.end(), result2.begin(), result2.end());
279 }
280 xchain = xchain->NextSiblingElement();
281 // continue to skip past non-<url> or <chain> elements
282 while (xchain && strcmp(xchain->Value(), "url") && strcmp(xchain->Value(), "chain"))
283 xchain = xchain->NextSiblingElement();
284 }
285
286 return result;
287 }
288
289 // just like Run, but returns an empty list instead of throwing in case of error
290 // don't use in new code; errors should be handled appropriately
RunNoThrow(const std::string & function,const CScraperUrl & url,XFILE::CCurlFile & http,const std::vector<std::string> * extras)291 std::vector<std::string> CScraper::RunNoThrow(const std::string &function,
292 const CScraperUrl &url,
293 XFILE::CCurlFile &http,
294 const std::vector<std::string> *extras)
295 {
296 std::vector<std::string> vcs;
297 try
298 {
299 vcs = Run(function, url, http, extras);
300 }
301 catch (const CScraperError &sce)
302 {
303 assert(sce.FAborted()); // the only kind we should get
304 }
305 return vcs;
306 }
307
InternalRun(const std::string & function,const CScraperUrl & scrURL,CCurlFile & http,const std::vector<std::string> * extras)308 std::string CScraper::InternalRun(const std::string &function,
309 const CScraperUrl &scrURL,
310 CCurlFile &http,
311 const std::vector<std::string> *extras)
312 {
313 // walk the list of input URLs and fetch each into parser parameters
314 const auto& urls = scrURL.GetUrls();
315 size_t i;
316 for (i = 0; i < urls.size(); ++i)
317 {
318 if (!CScraperUrl::Get(urls[i], m_parser.m_param[i], http, ID()) ||
319 m_parser.m_param[i].empty())
320 return "";
321 }
322 // put the 'extra' parameterts into the parser parameter list too
323 if (extras)
324 {
325 for (size_t j = 0; j < extras->size(); ++j)
326 m_parser.m_param[j + i] = (*extras)[j];
327 }
328
329 return m_parser.Parse(function, this);
330 }
331
GetPathSettingsAsJSON()332 std::string CScraper::GetPathSettingsAsJSON()
333 {
334 static const std::string EmptyPathSettings = "{}";
335
336 if (!LoadSettings(false))
337 return EmptyPathSettings;
338
339 CSettingsValueFlatJsonSerializer jsonSerializer;
340 auto json = jsonSerializer.SerializeValues(GetSettings()->GetSettingsManager());
341 if (json.empty())
342 return EmptyPathSettings;
343
344 return json;
345 }
346
Load()347 bool CScraper::Load()
348 {
349 if (m_fLoaded || m_isPython)
350 return true;
351
352 bool result = m_parser.Load(LibPath());
353 if (result)
354 {
355 //! @todo this routine assumes that deps are a single level, and assumes the dep is installed.
356 //! 1. Does it make sense to have recursive dependencies?
357 //! 2. Should we be checking the dep versions or do we assume it is ok?
358 auto deps = GetDependencies();
359 auto itr = deps.begin();
360 while (itr != deps.end())
361 {
362 if (itr->id == "xbmc.metadata")
363 {
364 ++itr;
365 continue;
366 }
367 AddonPtr dep;
368
369 bool bOptional = itr->optional;
370
371 if (CServiceBroker::GetAddonMgr().GetAddon((*itr).id, dep, ADDON::ADDON_UNKNOWN,
372 ADDON::OnlyEnabled::YES))
373 {
374 CXBMCTinyXML doc;
375 if (dep->Type() == ADDON_SCRAPER_LIBRARY && doc.LoadFile(dep->LibPath()))
376 m_parser.AddDocument(&doc);
377 }
378 else
379 {
380 if (!bOptional)
381 {
382 result = false;
383 break;
384 }
385 }
386 ++itr;
387 }
388 }
389
390 if (!result)
391 CLog::Log(LOGWARNING, "failed to load scraper XML from %s", LibPath().c_str());
392 return m_fLoaded = result;
393 }
394
IsInUse() const395 bool CScraper::IsInUse() const
396 {
397 if (Supports(CONTENT_ALBUMS) || Supports(CONTENT_ARTISTS))
398 { // music scraper
399 CMusicDatabase db;
400 if (db.Open() && db.ScraperInUse(ID()))
401 return true;
402 }
403 else
404 { // video scraper
405 CVideoDatabase db;
406 if (db.Open() && db.ScraperInUse(ID()))
407 return true;
408 }
409 return false;
410 }
411
IsNoop()412 bool CScraper::IsNoop()
413 {
414 if (!Load())
415 throw CScraperError();
416
417 return !m_isPython && m_parser.IsNoop();
418 }
419
420 // pass in contents of .nfo file; returns URL (possibly empty if none found)
421 // and may populate strId, or throws CScraperError on error
NfoUrl(const std::string & sNfoContent)422 CScraperUrl CScraper::NfoUrl(const std::string &sNfoContent)
423 {
424 CScraperUrl scurlRet;
425
426 if (IsNoop())
427 return scurlRet;
428
429 if (m_isPython)
430 {
431 std::stringstream str;
432 str << "plugin://" << ID() << "?action=NfoUrl&nfo=" << CURL::Encode(sNfoContent)
433 << "&pathSettings=" << CURL::Encode(GetPathSettingsAsJSON());
434
435 CFileItemList items;
436 if (!XFILE::CDirectory::GetDirectory(str.str(), items, "", DIR_FLAG_DEFAULTS))
437 return scurlRet;
438
439 if (items.Size() == 0)
440 return scurlRet;
441 if (items.Size() > 1)
442 CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
443
444 CScraperUrl::SUrlEntry surl;
445 surl.m_type = CScraperUrl::UrlType::General;
446 surl.m_url = items[0]->GetDynPath();
447 scurlRet.AppendUrl(surl);
448 return scurlRet;
449 }
450
451 // scraper function takes contents of .nfo file, returns XML (see below)
452 std::vector<std::string> vcsIn;
453 vcsIn.push_back(sNfoContent);
454 CScraperUrl scurl;
455 CCurlFile fcurl;
456 std::vector<std::string> vcsOut = Run("NfoUrl", scurl, fcurl, &vcsIn);
457 if (vcsOut.empty() || vcsOut[0].empty())
458 return scurlRet;
459 if (vcsOut.size() > 1)
460 CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
461
462 // parse returned XML: either <error> element on error, blank on failure,
463 // or <url>...</url> or <url>...</url><id>...</id> on success
464 for (size_t i = 0; i < vcsOut.size(); ++i)
465 {
466 CXBMCTinyXML doc;
467 doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8);
468 CheckScraperError(doc.RootElement());
469
470 if (doc.RootElement())
471 {
472 /*
473 NOTE: Scrapers might return invalid xml with some loose
474 elements (eg. '<url>http://some.url</url><id>123</id>').
475 Since XMLUtils::GetString() is assuming well formed xml
476 with start and end-tags we're not able to use it.
477 Check for the desired Elements instead.
478 */
479 TiXmlElement *pxeUrl = NULL;
480 TiXmlElement *pId = NULL;
481 if (!strcmp(doc.RootElement()->Value(), "details"))
482 {
483 pxeUrl = doc.RootElement()->FirstChildElement("url");
484 pId = doc.RootElement()->FirstChildElement("id");
485 }
486 else
487 {
488 pId = doc.FirstChildElement("id");
489 pxeUrl = doc.FirstChildElement("url");
490 }
491 if (pId && pId->FirstChild())
492 scurlRet.SetId(pId->FirstChild()->ValueStr());
493
494 if (pxeUrl && pxeUrl->Attribute("function"))
495 continue;
496
497 if (pxeUrl)
498 scurlRet.ParseAndAppendUrl(pxeUrl);
499 else if (!strcmp(doc.RootElement()->Value(), "url"))
500 scurlRet.ParseAndAppendUrl(doc.RootElement());
501 else
502 continue;
503 break;
504 }
505 }
506 return scurlRet;
507 }
508
ResolveIDToUrl(const std::string & externalID)509 CScraperUrl CScraper::ResolveIDToUrl(const std::string &externalID)
510 {
511 CScraperUrl scurlRet;
512
513 if (m_isPython)
514 {
515 std::stringstream str;
516 str << "plugin://" << ID() << "?action=resolveid&key=" << CURL::Encode(externalID)
517 << "&pathSettings=" << CURL::Encode(GetPathSettingsAsJSON());
518
519 CFileItem item("resolve me", false);
520
521 if (XFILE::CPluginDirectory::GetPluginResult(str.str(), item, false))
522 scurlRet.ParseFromData(item.GetDynPath());
523
524 return scurlRet;
525 }
526
527 // scraper function takes an external ID, returns XML (see below)
528 std::vector<std::string> vcsIn;
529 vcsIn.push_back(externalID);
530 CScraperUrl scurl;
531 CCurlFile fcurl;
532 std::vector<std::string> vcsOut = Run("ResolveIDToUrl", scurl, fcurl, &vcsIn);
533 if (vcsOut.empty() || vcsOut[0].empty())
534 return scurlRet;
535 if (vcsOut.size() > 1)
536 CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
537
538 // parse returned XML: either <error> element on error, blank on failure,
539 // or <url>...</url> or <url>...</url><id>...</id> on success
540 for (size_t i = 0; i < vcsOut.size(); ++i)
541 {
542 CXBMCTinyXML doc;
543 doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8);
544 CheckScraperError(doc.RootElement());
545
546 if (doc.RootElement())
547 {
548 /*
549 NOTE: Scrapers might return invalid xml with some loose
550 elements (eg. '<url>http://some.url</url><id>123</id>').
551 Since XMLUtils::GetString() is assuming well formed xml
552 with start and end-tags we're not able to use it.
553 Check for the desired Elements instead.
554 */
555 TiXmlElement *pxeUrl = NULL;
556 TiXmlElement *pId = NULL;
557 if (!strcmp(doc.RootElement()->Value(), "details"))
558 {
559 pxeUrl = doc.RootElement()->FirstChildElement("url");
560 pId = doc.RootElement()->FirstChildElement("id");
561 }
562 else
563 {
564 pId = doc.FirstChildElement("id");
565 pxeUrl = doc.FirstChildElement("url");
566 }
567 if (pId && pId->FirstChild())
568 scurlRet.SetId(pId->FirstChild()->ValueStr());
569
570 if (pxeUrl && pxeUrl->Attribute("function"))
571 continue;
572
573 if (pxeUrl)
574 scurlRet.ParseAndAppendUrl(pxeUrl);
575 else if (!strcmp(doc.RootElement()->Value(), "url"))
576 scurlRet.ParseAndAppendUrl(doc.RootElement());
577 else
578 continue;
579 break;
580 }
581 }
582 return scurlRet;
583 }
584
RelevanceSortFunction(const CScraperUrl & left,const CScraperUrl & right)585 static bool RelevanceSortFunction(const CScraperUrl &left, const CScraperUrl &right)
586 {
587 return left.GetRelevance() > right.GetRelevance();
588 }
589
590 template<class T>
591 static T FromFileItem(const CFileItem &item);
592
593 template<>
FromFileItem(const CFileItem & item)594 CScraperUrl FromFileItem<CScraperUrl>(const CFileItem &item)
595 {
596 CScraperUrl url;
597
598 url.SetTitle(item.GetLabel());
599 if (item.HasProperty("relevance"))
600 url.SetRelevance(item.GetProperty("relevance").asDouble());
601 CScraperUrl::SUrlEntry surl;
602 surl.m_type = CScraperUrl::UrlType::General;
603 surl.m_url = item.GetDynPath();
604 url.AppendUrl(surl);
605
606 return url;
607 }
608
609 template<>
FromFileItem(const CFileItem & item)610 CMusicAlbumInfo FromFileItem<CMusicAlbumInfo>(const CFileItem &item)
611 {
612 CMusicAlbumInfo info;
613 const std::string& sTitle = item.GetLabel();
614 std::string sArtist = item.GetProperty("album.artist").asString();
615 std::string sAlbumName;
616 if (!sArtist.empty())
617 sAlbumName = StringUtils::Format("%s - %s", sArtist.c_str(), sTitle.c_str());
618 else
619 sAlbumName = sTitle;
620
621 CScraperUrl url;
622 url.AppendUrl(CScraperUrl::SUrlEntry(item.GetDynPath()));
623
624 info = CMusicAlbumInfo(sTitle, sArtist, sAlbumName, url);
625 if (item.HasProperty("relevance"))
626 info.SetRelevance(item.GetProperty("relevance").asFloat());
627
628 if (item.HasProperty("album.releasestatus"))
629 info.GetAlbum().strReleaseStatus = item.GetProperty("album.releasestatus").asString();
630 if (item.HasProperty("album.type"))
631 info.GetAlbum().strType = item.GetProperty("album.type").asString();
632 if (item.HasProperty("album.year"))
633 info.GetAlbum().strReleaseDate = item.GetProperty("album.year").asString();
634 if (item.HasProperty("album.label"))
635 info.GetAlbum().strLabel = item.GetProperty("album.label").asString();
636 info.GetAlbum().art = item.GetArt();
637
638 return info;
639 }
640
641 template<>
FromFileItem(const CFileItem & item)642 CMusicArtistInfo FromFileItem<CMusicArtistInfo>(const CFileItem &item)
643 {
644 CMusicArtistInfo info;
645 const std::string& sTitle = item.GetLabel();
646
647 CScraperUrl url;
648 url.AppendUrl(CScraperUrl::SUrlEntry(item.GetDynPath()));
649
650 info = CMusicArtistInfo(sTitle, url);
651 if (item.HasProperty("artist.genre"))
652 info.GetArtist().genre = StringUtils::Split(item.GetProperty("artist.genre").asString(),
653 CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_musicItemSeparator);
654 if (item.HasProperty("artist.disambiguation"))
655 info.GetArtist().strDisambiguation = item.GetProperty("artist.disambiguation").asString();
656 if (item.HasProperty("artist.type"))
657 info.GetArtist().strType = item.GetProperty("artist.type").asString();
658 if (item.HasProperty("artist.gender"))
659 info.GetArtist().strGender = item.GetProperty("artist.gender").asString();
660 if (item.HasProperty("artist.born"))
661 info.GetArtist().strBorn = item.GetProperty("artist.born").asString();
662
663 return info;
664 }
665
666 template<class T>
PythonFind(const std::string & ID,const std::map<std::string,std::string> & additionals)667 static std::vector<T> PythonFind(const std::string &ID,
668 const std::map<std::string, std::string> &additionals)
669 {
670 std::vector<T> result;
671 CFileItemList items;
672 std::stringstream str;
673 str << "plugin://" << ID << "?action=find";
674 for (const auto &it : additionals)
675 str << "&" << it.first << "=" << CURL::Encode(it.second);
676
677 if (XFILE::CDirectory::GetDirectory(str.str(), items, "", DIR_FLAG_DEFAULTS))
678 {
679 for (const auto& it : items)
680 result.emplace_back(std::move(FromFileItem<T>(*it)));
681 }
682
683 return result;
684 }
685
FromString(const CFileItem & item,const std::string & key)686 static std::string FromString(const CFileItem &item, const std::string &key)
687 {
688 return item.GetProperty(key).asString();
689 }
690
FromArray(const CFileItem & item,const std::string & key,int sep)691 static std::vector<std::string> FromArray(const CFileItem &item, const std::string &key, int sep)
692 {
693 return StringUtils::Split(item.GetProperty(key).asString(),
694 sep ? CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_videoItemSeparator
695 : CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_musicItemSeparator);
696 }
697
ParseThumbs(CScraperUrl & scurl,const CFileItem & item,int nThumbs,const std::string & tag)698 static void ParseThumbs(CScraperUrl &scurl,
699 const CFileItem &item,
700 int nThumbs,
701 const std::string &tag)
702 {
703 for (int i = 0; i < nThumbs; ++i)
704 {
705 std::stringstream prefix;
706 prefix << tag << i + 1;
707 std::string url = FromString(item, prefix.str() + ".url");
708 std::string aspect = FromString(item, prefix.str() + ".aspect");
709 std::string preview = FromString(item, prefix.str() + ".preview");
710 scurl.AddParsedUrl(url, aspect, preview);
711 }
712 }
713
ParseFanart(const CFileItem & item,int nFanart,const std::string & tag)714 static std::string ParseFanart(const CFileItem &item, int nFanart, const std::string &tag)
715 {
716 std::string result;
717 TiXmlElement fanart("fanart");
718 for (int i = 0; i < nFanart; ++i)
719 {
720 std::stringstream prefix;
721 prefix << tag << i + 1;
722 std::string url = FromString(item, prefix.str() + ".url");
723 std::string preview = FromString(item, prefix.str() + ".preview");
724 TiXmlElement thumb("thumb");
725 thumb.SetAttribute("preview", preview);
726 TiXmlText text(url);
727 thumb.InsertEndChild(text);
728 fanart.InsertEndChild(thumb);
729 }
730 result << fanart;
731
732 return result;
733 }
734
735 template<class T>
736 static void DetailsFromFileItem(const CFileItem &, T &);
737
738 template<>
DetailsFromFileItem(const CFileItem & item,CAlbum & album)739 void DetailsFromFileItem<CAlbum>(const CFileItem &item, CAlbum &album)
740 {
741 album.strAlbum = item.GetLabel();
742 album.strMusicBrainzAlbumID = FromString(item, "album.musicbrainzid");
743 album.strReleaseGroupMBID = FromString(item, "album.releasegroupid");
744
745 int nArtists = item.GetProperty("album.artists").asInteger32();
746 album.artistCredits.reserve(nArtists);
747 for (int i = 0; i < nArtists; ++i)
748 {
749 std::stringstream prefix;
750 prefix << "album.artist" << i + 1;
751 CArtistCredit artistCredit;
752 artistCredit.SetArtist(FromString(item, prefix.str() + ".name"));
753 artistCredit.SetMusicBrainzArtistID(FromString(item, prefix.str() + ".musicbrainzid"));
754 album.artistCredits.push_back(artistCredit);
755 }
756
757 album.strArtistDesc = FromString(item, "album.artist_description");
758 album.genre = FromArray(item, "album.genre", 0);
759 album.styles = FromArray(item, "album.styles", 0);
760 album.moods = FromArray(item, "album.moods", 0);
761 album.themes = FromArray(item, "album.themes", 0);
762 album.bCompilation = item.GetProperty("album.compilation").asBoolean();
763 album.strReview = FromString(item, "album.review");
764 album.strReleaseDate = FromString(item, "album.releasedate");
765 if (album.strReleaseDate.empty())
766 album.strReleaseDate = FromString(item, "album.year");
767 album.strOrigReleaseDate = FromString(item, "album.originaldate");
768 album.strLabel = FromString(item, "album.label");
769 album.strType = FromString(item, "album.type");
770 album.strReleaseStatus = FromString(item, "album.releasestatus");
771 album.fRating = item.GetProperty("album.rating").asFloat();
772 album.iUserrating = item.GetProperty("album.user_rating").asInteger32();
773 album.iVotes = item.GetProperty("album.votes").asInteger32();
774
775 /* Scrapers fetch a list of possible art but do not set the current images used because art
776 selection depends on other preferences so is handled by CMusicInfoScanner
777 album.art = item.GetArt();
778 */
779
780 int nThumbs = item.GetProperty("album.thumbs").asInteger32();
781 ParseThumbs(album.thumbURL, item, nThumbs, "album.thumb");
782 }
783
784 template<>
DetailsFromFileItem(const CFileItem & item,CArtist & artist)785 void DetailsFromFileItem<CArtist>(const CFileItem &item, CArtist &artist)
786 {
787 artist.strArtist = item.GetLabel();
788 artist.strMusicBrainzArtistID = FromString(item, "artist.musicbrainzid");
789 artist.strDisambiguation = FromString(item, "artist.disambiguation");
790 artist.strType = FromString(item, "artist.type");
791 artist.strGender = FromString(item, "artist.gender");
792 artist.genre = FromArray(item, "artist.genre", 0);
793 artist.styles = FromArray(item, "artist.styles", 0);
794 artist.moods = FromArray(item, "artist.moods", 0);
795 artist.yearsActive = FromArray(item, "artist.years_active", 0);
796 artist.instruments = FromArray(item, "artist.instruments", 0);
797 artist.strBorn = FromString(item, "artist.born");
798 artist.strFormed = FromString(item, "artist.formed");
799 artist.strBiography = FromString(item, "artist.biography");
800 artist.strDied = FromString(item, "artist.died");
801 artist.strDisbanded = FromString(item, "artist.disbanded");
802
803 /* Scrapers fetch a list of possible art but do not set the current images used because art
804 selection depends on other preferences so is handled by CMusicInfoScanner
805 artist.art = item.GetArt();
806 */
807
808 int nAlbums = item.GetProperty("artist.albums").asInteger32();
809 artist.discography.reserve(nAlbums);
810 for (int i = 0; i < nAlbums; ++i)
811 {
812 std::stringstream prefix;
813 prefix << "artist.album" << i + 1;
814 CDiscoAlbum discoAlbum;
815 discoAlbum.strAlbum = FromString(item, prefix.str() + ".title");
816 discoAlbum.strYear = FromString(item, prefix.str() + ".year");
817 discoAlbum.strReleaseGroupMBID = FromString(item, prefix.str() + ".musicbrainzreleasegroupid");
818 artist.discography.emplace_back(discoAlbum);
819 }
820
821 int nThumbs = item.GetProperty("artist.thumbs").asInteger32();
822 ParseThumbs(artist.thumbURL, item, nThumbs, "artist.thumb");
823
824 // Support deprecated fanarts property, add to artist.thumbURL
825 int nFanart = item.GetProperty("artist.fanarts").asInteger32();
826 if (nFanart > 0)
827 {
828 CFanart fanart;
829 fanart.m_xml = ParseFanart(item, nFanart, "artist.fanart");
830 fanart.Unpack();
831 for (unsigned int i = 0; i < fanart.GetNumFanarts(); i++)
832 artist.thumbURL.AddParsedUrl(fanart.GetImageURL(i), "fanart", fanart.GetPreviewURL(i));
833 }
834 }
835
836 template<>
DetailsFromFileItem(const CFileItem & item,CVideoInfoTag & tag)837 void DetailsFromFileItem<CVideoInfoTag>(const CFileItem &item, CVideoInfoTag &tag)
838 {
839 if (item.HasVideoInfoTag())
840 tag = *item.GetVideoInfoTag();
841 }
842
843 template<class T>
PythonDetails(const std::string & ID,const std::string & key,const std::string & url,const std::string & action,const std::string & pathSettings,T & result)844 static bool PythonDetails(const std::string &ID,
845 const std::string &key,
846 const std::string &url,
847 const std::string &action,
848 const std::string &pathSettings,
849 T &result)
850 {
851 std::stringstream str;
852 str << "plugin://" << ID << "?action=" << action << "&" << key << "=" << CURL::Encode(url);
853 str << "&pathSettings=" << CURL::Encode(pathSettings);
854
855 CFileItem item(url, false);
856
857 if (!XFILE::CPluginDirectory::GetPluginResult(str.str(), item, false))
858 return false;
859
860 DetailsFromFileItem(item, result);
861 return true;
862 }
863
864 // fetch list of matching movies sorted by relevance (may be empty);
865 // throws CScraperError on error; first called with fFirst set, then unset if first try fails
FindMovie(XFILE::CCurlFile & fcurl,const std::string & movieTitle,int movieYear,bool fFirst)866 std::vector<CScraperUrl> CScraper::FindMovie(XFILE::CCurlFile &fcurl,
867 const std::string &movieTitle, int movieYear,
868 bool fFirst)
869 {
870 // prepare parameters for URL creation
871 std::string sTitle, sYear;
872 if (movieYear < 0)
873 {
874 std::string sTitleYear;
875 CUtil::CleanString(movieTitle, sTitle, sTitleYear, sYear, true /*fRemoveExt*/, fFirst);
876 }
877 else
878 {
879 sTitle = movieTitle;
880 sYear = std::to_string( movieYear );
881 }
882
883 CLog::Log(LOGDEBUG,
884 "%s: Searching for '%s' using %s scraper "
885 "(path: '%s', content: '%s', version: '%s')",
886 __FUNCTION__, sTitle.c_str(), Name().c_str(), Path().c_str(),
887 ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
888
889 std::vector<CScraperUrl> vcscurl;
890 if (IsNoop())
891 return vcscurl;
892
893 if (!fFirst)
894 StringUtils::Replace(sTitle, '-', ' ');
895
896 if (m_isPython)
897 {
898 std::map<std::string, std::string> additionals{{"title", sTitle}};
899 if (!sYear.empty())
900 additionals.insert({"year", sYear});
901 additionals.emplace("pathSettings", GetPathSettingsAsJSON());
902 return PythonFind<CScraperUrl>(ID(), additionals);
903 }
904
905 std::vector<std::string> vcsIn(1);
906 g_charsetConverter.utf8To(SearchStringEncoding(), sTitle, vcsIn[0]);
907 vcsIn[0] = CURL::Encode(vcsIn[0]);
908 if (fFirst && !sYear.empty())
909 vcsIn.push_back(sYear);
910
911 // request a search URL from the title/filename/etc.
912 CScraperUrl scurl;
913 std::vector<std::string> vcsOut = Run("CreateSearchUrl", scurl, fcurl, &vcsIn);
914 if (vcsOut.empty())
915 {
916 CLog::Log(LOGDEBUG, "%s: CreateSearchUrl failed", __FUNCTION__);
917 throw CScraperError();
918 }
919 scurl.ParseFromData(vcsOut[0]);
920
921 // do the search, and parse the result into a list
922 vcsIn.clear();
923 vcsIn.push_back(scurl.GetFirstThumbUrl());
924 vcsOut = Run("GetSearchResults", scurl, fcurl, &vcsIn);
925
926 bool fSort(true);
927 std::set<std::string> stsDupeCheck;
928 bool fResults(false);
929 for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
930 {
931 CXBMCTinyXML doc;
932 doc.Parse(*i, TIXML_ENCODING_UTF8);
933 if (!doc.RootElement())
934 {
935 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
936 continue; // might have more valid results later
937 }
938
939 CheckScraperError(doc.RootElement());
940
941 TiXmlHandle xhDoc(&doc);
942 TiXmlHandle xhResults = xhDoc.FirstChild("results");
943 if (!xhResults.Element())
944 continue;
945 fResults = true; // even if empty
946
947 // we need to sort if returned results don't specify 'sorted="yes"'
948 if (fSort)
949 {
950 const char *sorted = xhResults.Element()->Attribute("sorted");
951 if (sorted != NULL)
952 fSort = !StringUtils::EqualsNoCase(sorted, "yes");
953 }
954
955 for (TiXmlElement *pxeMovie = xhResults.FirstChild("entity").Element(); pxeMovie;
956 pxeMovie = pxeMovie->NextSiblingElement())
957 {
958 TiXmlNode *pxnTitle = pxeMovie->FirstChild("title");
959 TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
960 if (pxnTitle && pxnTitle->FirstChild() && pxeLink && pxeLink->FirstChild())
961 {
962 CScraperUrl scurlMovie;
963 auto title = pxnTitle->FirstChild()->ValueStr();
964 std::string id;
965 if (XMLUtils::GetString(pxeMovie, "id", id))
966 scurlMovie.SetId(id);
967
968 for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
969 scurlMovie.ParseAndAppendUrl(pxeLink);
970
971 // calculate the relevance of this hit
972 std::string sCompareTitle = scurlMovie.GetTitle();
973 StringUtils::ToLower(sCompareTitle);
974 std::string sMatchTitle = sTitle;
975 StringUtils::ToLower(sMatchTitle);
976
977 /*
978 * Identify the best match by performing a fuzzy string compare on the search term and
979 * the result. Additionally, use the year (if available) to further refine the best match.
980 * An exact match scores 1, a match off by a year scores 0.5 (release dates can vary between
981 * countries), otherwise it scores 0.
982 */
983 std::string sCompareYear;
984 XMLUtils::GetString(pxeMovie, "year", sCompareYear);
985
986 double yearScore = 0;
987 if (!sYear.empty() && !sCompareYear.empty())
988 yearScore =
989 std::max(0.0, 1 - 0.5 * abs(atoi(sYear.c_str()) - atoi(sCompareYear.c_str())));
990
991 scurlMovie.SetRelevance(fstrcmp(sMatchTitle.c_str(), sCompareTitle.c_str()) + yearScore);
992
993 // reconstruct a title for the user
994 if (!sCompareYear.empty())
995 title += StringUtils::Format(" (%s)", sCompareYear.c_str());
996
997 std::string sLanguage;
998 if (XMLUtils::GetString(pxeMovie, "language", sLanguage) && !sLanguage.empty())
999 title += StringUtils::Format(" (%s)", sLanguage.c_str());
1000
1001 // filter for dupes from naughty scrapers
1002 if (stsDupeCheck.insert(scurlMovie.GetFirstThumbUrl() + " " + title).second)
1003 {
1004 scurlMovie.SetTitle(title);
1005 vcscurl.push_back(scurlMovie);
1006 }
1007 }
1008 }
1009 }
1010
1011 if (!fResults)
1012 throw CScraperError(); // scraper aborted
1013
1014 if (fSort)
1015 std::stable_sort(vcscurl.begin(), vcscurl.end(), RelevanceSortFunction);
1016
1017 return vcscurl;
1018 }
1019
1020 // find album by artist, using fcurl for web fetches
1021 // returns a list of albums (empty if no match or failure)
FindAlbum(CCurlFile & fcurl,const std::string & sAlbum,const std::string & sArtist)1022 std::vector<CMusicAlbumInfo> CScraper::FindAlbum(CCurlFile &fcurl,
1023 const std::string &sAlbum,
1024 const std::string &sArtist)
1025 {
1026 CLog::Log(LOGDEBUG,
1027 "%s: Searching for '%s - %s' using %s scraper "
1028 "(path: '%s', content: '%s', version: '%s')",
1029 __FUNCTION__, sArtist.c_str(), sAlbum.c_str(), Name().c_str(), Path().c_str(),
1030 ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1031
1032 std::vector<CMusicAlbumInfo> vcali;
1033 if (IsNoop())
1034 return vcali;
1035
1036 if (m_isPython)
1037 return PythonFind<CMusicAlbumInfo>(ID(),
1038 {{"title", sAlbum}, {"artist", sArtist}, {"pathSettings", GetPathSettingsAsJSON()}});
1039
1040 // scraper function is given the album and artist as parameters and
1041 // returns an XML <url> element parseable by CScraperUrl
1042 std::vector<std::string> extras(2);
1043 g_charsetConverter.utf8To(SearchStringEncoding(), sAlbum, extras[0]);
1044 g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[1]);
1045 extras[0] = CURL::Encode(extras[0]);
1046 extras[1] = CURL::Encode(extras[1]);
1047 CScraperUrl scurl;
1048 std::vector<std::string> vcsOut = RunNoThrow("CreateAlbumSearchUrl", scurl, fcurl, &extras);
1049 if (vcsOut.size() > 1)
1050 CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
1051
1052 if (vcsOut.empty() || vcsOut[0].empty())
1053 return vcali;
1054 scurl.ParseFromData(vcsOut[0]);
1055
1056 // the next function is passed the contents of the returned URL, and returns
1057 // an empty string on failure; on success, returns XML matches in the form:
1058 // <results>
1059 // <entity>
1060 // <title>...</title>
1061 // <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
1062 // <artist>...</artist>
1063 // <year>...</year>
1064 // <relevance [scale="..."]>...</relevance> (scale defaults to 1; score is divided by it)
1065 // </entity>
1066 // ...
1067 // </results>
1068 vcsOut = RunNoThrow("GetAlbumSearchResults", scurl, fcurl);
1069
1070 // parse the returned XML into a vector of album objects
1071 for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1072 {
1073 CXBMCTinyXML doc;
1074 doc.Parse(*i, TIXML_ENCODING_UTF8);
1075 TiXmlHandle xhDoc(&doc);
1076
1077 for (TiXmlElement *pxeAlbum = xhDoc.FirstChild("results").FirstChild("entity").Element();
1078 pxeAlbum; pxeAlbum = pxeAlbum->NextSiblingElement())
1079 {
1080 std::string sTitle;
1081 if (XMLUtils::GetString(pxeAlbum, "title", sTitle) && !sTitle.empty())
1082 {
1083 std::string sArtist;
1084 std::string sAlbumName;
1085 if (XMLUtils::GetString(pxeAlbum, "artist", sArtist) && !sArtist.empty())
1086 sAlbumName = StringUtils::Format("%s - %s", sArtist.c_str(), sTitle.c_str());
1087 else
1088 sAlbumName = sTitle;
1089
1090 std::string sYear;
1091 if (XMLUtils::GetString(pxeAlbum, "year", sYear) && !sYear.empty())
1092 sAlbumName = StringUtils::Format("%s (%s)", sAlbumName.c_str(), sYear.c_str());
1093
1094 // if no URL is provided, use the URL we got back from CreateAlbumSearchUrl
1095 // (e.g., in case we only got one result back and were sent to the detail page)
1096 TiXmlElement *pxeLink = pxeAlbum->FirstChildElement("url");
1097 CScraperUrl scurlAlbum;
1098 if (!pxeLink)
1099 scurlAlbum.ParseFromData(scurl.GetData());
1100 for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1101 scurlAlbum.ParseAndAppendUrl(pxeLink);
1102
1103 if (!scurlAlbum.HasUrls())
1104 continue;
1105
1106 CMusicAlbumInfo ali(sTitle, sArtist, sAlbumName, scurlAlbum);
1107
1108 TiXmlElement *pxeRel = pxeAlbum->FirstChildElement("relevance");
1109 if (pxeRel && pxeRel->FirstChild())
1110 {
1111 const char *szScale = pxeRel->Attribute("scale");
1112 float flScale = szScale ? float(atof(szScale)) : 1;
1113 ali.SetRelevance(float(atof(pxeRel->FirstChild()->Value())) / flScale);
1114 }
1115
1116 vcali.push_back(ali);
1117 }
1118 }
1119 }
1120 return vcali;
1121 }
1122
1123 // find artist, using fcurl for web fetches
1124 // returns a list of artists (empty if no match or failure)
FindArtist(CCurlFile & fcurl,const std::string & sArtist)1125 std::vector<CMusicArtistInfo> CScraper::FindArtist(CCurlFile &fcurl, const std::string &sArtist)
1126 {
1127 CLog::Log(LOGDEBUG,
1128 "%s: Searching for '%s' using %s scraper "
1129 "(file: '%s', content: '%s', version: '%s')",
1130 __FUNCTION__, sArtist.c_str(), Name().c_str(), Path().c_str(),
1131 ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1132
1133 std::vector<CMusicArtistInfo> vcari;
1134 if (IsNoop())
1135 return vcari;
1136
1137 if (m_isPython)
1138 return PythonFind<CMusicArtistInfo>(ID(),
1139 {{"artist", sArtist}, {"pathSettings", GetPathSettingsAsJSON()}});
1140
1141 // scraper function is given the artist as parameter and
1142 // returns an XML <url> element parseable by CScraperUrl
1143 std::vector<std::string> extras(1);
1144 g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[0]);
1145 extras[0] = CURL::Encode(extras[0]);
1146 CScraperUrl scurl;
1147 std::vector<std::string> vcsOut = RunNoThrow("CreateArtistSearchUrl", scurl, fcurl, &extras);
1148
1149 if (vcsOut.empty() || vcsOut[0].empty())
1150 return vcari;
1151 scurl.ParseFromData(vcsOut[0]);
1152
1153 // the next function is passed the contents of the returned URL, and returns
1154 // an empty string on failure; on success, returns XML matches in the form:
1155 // <results>
1156 // <entity>
1157 // <title>...</title>
1158 // <year>...</year>
1159 // <genre>...</genre>
1160 // <disambiguation>...</disambiguation>
1161 // <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
1162 // </entity>
1163 // ...
1164 // </results>
1165 vcsOut = RunNoThrow("GetArtistSearchResults", scurl, fcurl);
1166
1167 // parse the returned XML into a vector of artist objects
1168 for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1169 {
1170 CXBMCTinyXML doc;
1171 doc.Parse(*i, TIXML_ENCODING_UTF8);
1172 if (!doc.RootElement())
1173 {
1174 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1175 return vcari;
1176 }
1177 TiXmlHandle xhDoc(&doc);
1178 for (TiXmlElement *pxeArtist = xhDoc.FirstChild("results").FirstChild("entity").Element();
1179 pxeArtist; pxeArtist = pxeArtist->NextSiblingElement())
1180 {
1181 TiXmlNode *pxnTitle = pxeArtist->FirstChild("title");
1182 if (pxnTitle && pxnTitle->FirstChild())
1183 {
1184 CScraperUrl scurlArtist;
1185
1186 TiXmlElement *pxeLink = pxeArtist->FirstChildElement("url");
1187 if (!pxeLink)
1188 scurlArtist.ParseFromData(scurl.GetData());
1189 for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1190 scurlArtist.ParseAndAppendUrl(pxeLink);
1191
1192 if (!scurlArtist.HasUrls())
1193 continue;
1194
1195 CMusicArtistInfo ari(pxnTitle->FirstChild()->Value(), scurlArtist);
1196 std::string genre;
1197 XMLUtils::GetString(pxeArtist, "genre", genre);
1198 if (!genre.empty())
1199 ari.GetArtist().genre =
1200 StringUtils::Split(genre, CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_musicItemSeparator);
1201 XMLUtils::GetString(pxeArtist, "disambiguation", ari.GetArtist().strDisambiguation);
1202 XMLUtils::GetString(pxeArtist, "year", ari.GetArtist().strBorn);
1203
1204 vcari.push_back(ari);
1205 }
1206 }
1207 }
1208 return vcari;
1209 }
1210
1211 // fetch list of episodes from URL (from video database)
GetEpisodeList(XFILE::CCurlFile & fcurl,const CScraperUrl & scurl)1212 EPISODELIST CScraper::GetEpisodeList(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl)
1213 {
1214 EPISODELIST vcep;
1215 if (!scurl.HasUrls())
1216 return vcep;
1217
1218 CLog::Log(LOGDEBUG,
1219 "%s: Searching '%s' using %s scraper "
1220 "(file: '%s', content: '%s', version: '%s')",
1221 __FUNCTION__, scurl.GetFirstThumbUrl(), Name().c_str(), Path().c_str(),
1222 ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1223
1224 if (m_isPython)
1225 {
1226 std::stringstream str;
1227 str << "plugin://" << ID()
1228 << "?action=getepisodelist&url=" << CURL::Encode(scurl.GetFirstThumbUrl())
1229 << "&pathSettings=" << CURL::Encode(GetPathSettingsAsJSON());
1230
1231 CFileItemList items;
1232 if (!XFILE::CDirectory::GetDirectory(str.str(), items, "", DIR_FLAG_DEFAULTS))
1233 return vcep;
1234
1235 for (int i = 0; i < items.Size(); ++i)
1236 {
1237 EPISODE ep;
1238 const auto& tag = *items[i]->GetVideoInfoTag();
1239 ep.strTitle = tag.m_strTitle;
1240 ep.iSeason = tag.m_iSeason;
1241 ep.iEpisode = tag.m_iEpisode;
1242 ep.cDate = tag.m_firstAired;
1243 ep.iSubepisode = items[i]->GetProperty("video.sub_episode").asInteger();
1244 CScraperUrl::SUrlEntry surl;
1245 surl.m_type = CScraperUrl::UrlType::General;
1246 surl.m_url = items[i]->GetURL().Get();
1247 ep.cScraperUrl.AppendUrl(surl);
1248 vcep.push_back(ep);
1249 }
1250
1251 return vcep;
1252 }
1253
1254 std::vector<std::string> vcsIn;
1255 vcsIn.push_back(scurl.GetFirstThumbUrl());
1256 std::vector<std::string> vcsOut = RunNoThrow("GetEpisodeList", scurl, fcurl, &vcsIn);
1257
1258 // parse the XML response
1259 for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1260 {
1261 CXBMCTinyXML doc;
1262 doc.Parse(*i);
1263 if (!doc.RootElement())
1264 {
1265 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1266 continue;
1267 }
1268
1269 TiXmlHandle xhDoc(&doc);
1270 for (TiXmlElement *pxeMovie = xhDoc.FirstChild("episodeguide").FirstChild("episode").Element();
1271 pxeMovie; pxeMovie = pxeMovie->NextSiblingElement())
1272 {
1273 EPISODE ep;
1274 TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
1275 std::string strEpNum;
1276 if (pxeLink && XMLUtils::GetInt(pxeMovie, "season", ep.iSeason) &&
1277 XMLUtils::GetString(pxeMovie, "epnum", strEpNum) && !strEpNum.empty())
1278 {
1279 CScraperUrl &scurlEp(ep.cScraperUrl);
1280 size_t dot = strEpNum.find('.');
1281 ep.iEpisode = atoi(strEpNum.c_str());
1282 ep.iSubepisode = (dot != std::string::npos) ? atoi(strEpNum.substr(dot + 1).c_str()) : 0;
1283 std::string title;
1284 if (!XMLUtils::GetString(pxeMovie, "title", title) || title.empty())
1285 title = g_localizeStrings.Get(10005); // Not available
1286 scurlEp.SetTitle(title);
1287 std::string id;
1288 if (XMLUtils::GetString(pxeMovie, "id", id))
1289 scurlEp.SetId(id);
1290
1291 for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1292 scurlEp.ParseAndAppendUrl(pxeLink);
1293
1294 // date must be the format of yyyy-mm-dd
1295 ep.cDate.SetValid(false);
1296 std::string sDate;
1297 if (XMLUtils::GetString(pxeMovie, "aired", sDate) && sDate.length() == 10)
1298 {
1299 tm tm;
1300 if (strptime(sDate.c_str(), "%Y-%m-%d", &tm))
1301 ep.cDate.SetDate(1900 + tm.tm_year, tm.tm_mon + 1, tm.tm_mday);
1302 }
1303 vcep.push_back(ep);
1304 }
1305 }
1306 }
1307
1308 return vcep;
1309 }
1310
1311 // takes URL; returns true and populates video details on success, false otherwise
GetVideoDetails(XFILE::CCurlFile & fcurl,const CScraperUrl & scurl,bool fMovie,CVideoInfoTag & video)1312 bool CScraper::GetVideoDetails(XFILE::CCurlFile &fcurl,
1313 const CScraperUrl &scurl,
1314 bool fMovie /*else episode*/,
1315 CVideoInfoTag &video)
1316 {
1317 CLog::Log(LOGDEBUG,
1318 "%s: Reading %s '%s' using %s scraper "
1319 "(file: '%s', content: '%s', version: '%s')",
1320 __FUNCTION__, fMovie ? MediaTypeMovie : MediaTypeEpisode, scurl.GetFirstThumbUrl(),
1321 Name().c_str(), Path().c_str(), ADDON::TranslateContent(Content()).c_str(),
1322 Version().asString().c_str());
1323
1324 video.Reset();
1325
1326 if (m_isPython)
1327 return PythonDetails(ID(), "url", scurl.GetFirstThumbUrl(),
1328 fMovie ? "getdetails" : "getepisodedetails", GetPathSettingsAsJSON(), video);
1329
1330 std::string sFunc = fMovie ? "GetDetails" : "GetEpisodeDetails";
1331 std::vector<std::string> vcsIn;
1332 vcsIn.push_back(scurl.GetId());
1333 vcsIn.push_back(scurl.GetFirstThumbUrl());
1334 std::vector<std::string> vcsOut = RunNoThrow(sFunc, scurl, fcurl, &vcsIn);
1335
1336 // parse XML output
1337 bool fRet(false);
1338 for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1339 {
1340 CXBMCTinyXML doc;
1341 doc.Parse(*i, TIXML_ENCODING_UTF8);
1342 if (!doc.RootElement())
1343 {
1344 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1345 continue;
1346 }
1347
1348 TiXmlHandle xhDoc(&doc);
1349 TiXmlElement *pxeDetails = xhDoc.FirstChild("details").Element();
1350 if (!pxeDetails)
1351 {
1352 CLog::Log(LOGERROR, "%s: Invalid XML file (want <details>)", __FUNCTION__);
1353 continue;
1354 }
1355 video.Load(pxeDetails, true /*fChain*/);
1356 fRet = true; // but don't exit in case of chaining
1357 }
1358 return fRet;
1359 }
1360
1361 // takes a URL; returns true and populates album on success, false otherwise
GetAlbumDetails(CCurlFile & fcurl,const CScraperUrl & scurl,CAlbum & album)1362 bool CScraper::GetAlbumDetails(CCurlFile &fcurl, const CScraperUrl &scurl, CAlbum &album)
1363 {
1364 CLog::Log(LOGDEBUG,
1365 "%s: Reading '%s' using %s scraper "
1366 "(file: '%s', content: '%s', version: '%s')",
1367 __FUNCTION__, scurl.GetFirstThumbUrl(), Name().c_str(), Path().c_str(),
1368 ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1369
1370 if (m_isPython)
1371 return PythonDetails(ID(), "url", scurl.GetFirstThumbUrl(),
1372 "getdetails", GetPathSettingsAsJSON(), album);
1373
1374 std::vector<std::string> vcsOut = RunNoThrow("GetAlbumDetails", scurl, fcurl);
1375
1376 // parse the returned XML into an album object (see CAlbum::Load for details)
1377 bool fRet(false);
1378 for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1379 {
1380 CXBMCTinyXML doc;
1381 doc.Parse(*i, TIXML_ENCODING_UTF8);
1382 if (!doc.RootElement())
1383 {
1384 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1385 return false;
1386 }
1387 fRet = album.Load(doc.RootElement(), i != vcsOut.begin());
1388 }
1389 return fRet;
1390 }
1391
1392 // takes a URL (one returned from FindArtist), the original search string, and
1393 // returns true and populates artist on success, false on failure
GetArtistDetails(CCurlFile & fcurl,const CScraperUrl & scurl,const std::string & sSearch,CArtist & artist)1394 bool CScraper::GetArtistDetails(CCurlFile &fcurl,
1395 const CScraperUrl &scurl,
1396 const std::string &sSearch,
1397 CArtist &artist)
1398 {
1399 if (!scurl.HasUrls())
1400 return false;
1401
1402 CLog::Log(LOGDEBUG,
1403 "%s: Reading '%s' ('%s') using %s scraper "
1404 "(file: '%s', content: '%s', version: '%s')",
1405 __FUNCTION__, scurl.GetFirstThumbUrl(), sSearch.c_str(), Name().c_str(),
1406 Path().c_str(), ADDON::TranslateContent(Content()).c_str(),
1407 Version().asString().c_str());
1408
1409 if (m_isPython)
1410 return PythonDetails(ID(), "url", scurl.GetFirstThumbUrl(),
1411 "getdetails", GetPathSettingsAsJSON(), artist);
1412
1413 // pass in the original search string for chaining to search other sites
1414 std::vector<std::string> vcIn;
1415 vcIn.push_back(sSearch);
1416 vcIn[0] = CURL::Encode(vcIn[0]);
1417
1418 std::vector<std::string> vcsOut = RunNoThrow("GetArtistDetails", scurl, fcurl, &vcIn);
1419
1420 // ok, now parse the xml file
1421 bool fRet(false);
1422 for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1423 {
1424 CXBMCTinyXML doc;
1425 doc.Parse(*i, TIXML_ENCODING_UTF8);
1426 if (!doc.RootElement())
1427 {
1428 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1429 return false;
1430 }
1431
1432 fRet = artist.Load(doc.RootElement(), i != vcsOut.begin());
1433 }
1434 return fRet;
1435 }
1436
GetArtwork(XFILE::CCurlFile & fcurl,CVideoInfoTag & details)1437 bool CScraper::GetArtwork(XFILE::CCurlFile &fcurl, CVideoInfoTag &details)
1438 {
1439 if (!details.HasUniqueID())
1440 return false;
1441
1442 CLog::Log(LOGDEBUG,
1443 "%s: Reading artwork for '%s' using %s scraper "
1444 "(file: '%s', content: '%s', version: '%s')",
1445 __FUNCTION__, details.GetUniqueID().c_str(), Name().c_str(), Path().c_str(),
1446 ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1447
1448 if (m_isPython)
1449 return PythonDetails(ID(), "id", details.GetUniqueID(),
1450 "getartwork", GetPathSettingsAsJSON(), details);
1451
1452 std::vector<std::string> vcsIn;
1453 CScraperUrl scurl;
1454 vcsIn.push_back(details.GetUniqueID());
1455 std::vector<std::string> vcsOut = RunNoThrow("GetArt", scurl, fcurl, &vcsIn);
1456
1457 bool fRet(false);
1458 for (std::vector<std::string>::const_iterator it = vcsOut.begin(); it != vcsOut.end(); ++it)
1459 {
1460 CXBMCTinyXML doc;
1461 doc.Parse(*it, TIXML_ENCODING_UTF8);
1462 if (!doc.RootElement())
1463 {
1464 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1465 return false;
1466 }
1467 fRet = details.Load(doc.RootElement(), it != vcsOut.begin());
1468 }
1469 return fRet;
1470 }
1471 }
1472