1 /*
2  *  Copyright (C) 2005-2018 Team Kodi
3  *  This file is part of Kodi - https://kodi.tv
4  *
5  *  SPDX-License-Identifier: GPL-2.0-or-later
6  *  See LICENSES/README.md for more information.
7  */
8 
9 #include "Scraper.h"
10 
11 #include "AddonManager.h"
12 #include "FileItem.h"
13 #include "ServiceBroker.h"
14 #include "URL.h"
15 #include "Util.h"
16 #include "addons/settings/AddonSettings.h"
17 #include "filesystem/CurlFile.h"
18 #include "filesystem/Directory.h"
19 #include "filesystem/File.h"
20 #include "filesystem/PluginDirectory.h"
21 #include "guilib/LocalizeStrings.h"
22 #include "music/Album.h"
23 #include "music/Artist.h"
24 #include "music/MusicDatabase.h"
25 #include "music/infoscanner/MusicAlbumInfo.h"
26 #include "music/infoscanner/MusicArtistInfo.h"
27 #include "settings/AdvancedSettings.h"
28 #include "settings/SettingsComponent.h"
29 #include "settings/SettingsValueFlatJsonSerializer.h"
30 #include "utils/CharsetConverter.h"
31 #include "utils/ScraperParser.h"
32 #include "utils/ScraperUrl.h"
33 #include "utils/StringUtils.h"
34 #include "utils/URIUtils.h"
35 #include "utils/XMLUtils.h"
36 #include "utils/log.h"
37 #include "video/VideoDatabase.h"
38 
39 #include <algorithm>
40 #include <sstream>
41 
42 #include <fstrcmp.h>
43 
44 using namespace XFILE;
45 using namespace MUSIC_GRABBER;
46 using namespace VIDEO;
47 
48 namespace ADDON
49 {
50 
51 typedef struct
52 {
53   const char *name;
54   CONTENT_TYPE type;
55   int pretty;
56 } ContentMapping;
57 
58 static const ContentMapping content[] = {{"unknown", CONTENT_NONE, 231},
59                                          {"albums", CONTENT_ALBUMS, 132},
60                                          {"music", CONTENT_ALBUMS, 132},
61                                          {"artists", CONTENT_ARTISTS, 133},
62                                          {"movies", CONTENT_MOVIES, 20342},
63                                          {"tvshows", CONTENT_TVSHOWS, 20343},
64                                          {"musicvideos", CONTENT_MUSICVIDEOS, 20389}};
65 
TranslateContent(const CONTENT_TYPE & type,bool pretty)66 std::string TranslateContent(const CONTENT_TYPE &type, bool pretty /*=false*/)
67 {
68   for (const ContentMapping& map : content)
69   {
70     if (type == map.type)
71     {
72       if (pretty && map.pretty)
73         return g_localizeStrings.Get(map.pretty);
74       else
75         return map.name;
76     }
77   }
78   return "";
79 }
80 
TranslateContent(const std::string & string)81 CONTENT_TYPE TranslateContent(const std::string &string)
82 {
83   for (const ContentMapping& map : content)
84   {
85     if (string == map.name)
86       return map.type;
87   }
88   return CONTENT_NONE;
89 }
90 
ScraperTypeFromContent(const CONTENT_TYPE & content)91 TYPE ScraperTypeFromContent(const CONTENT_TYPE &content)
92 {
93   switch (content)
94   {
95   case CONTENT_ALBUMS:
96     return ADDON_SCRAPER_ALBUMS;
97   case CONTENT_ARTISTS:
98     return ADDON_SCRAPER_ARTISTS;
99   case CONTENT_MOVIES:
100     return ADDON_SCRAPER_MOVIES;
101   case CONTENT_MUSICVIDEOS:
102     return ADDON_SCRAPER_MUSICVIDEOS;
103   case CONTENT_TVSHOWS:
104     return ADDON_SCRAPER_TVSHOWS;
105   default:
106     return ADDON_UNKNOWN;
107   }
108 }
109 
110 // if the XML root is <error>, throw CScraperError with enclosed <title>/<message> values
CheckScraperError(const TiXmlElement * pxeRoot)111 static void CheckScraperError(const TiXmlElement *pxeRoot)
112 {
113   if (!pxeRoot || StringUtils::CompareNoCase(pxeRoot->Value(), "error"))
114     return;
115   std::string sTitle;
116   std::string sMessage;
117   XMLUtils::GetString(pxeRoot, "title", sTitle);
118   XMLUtils::GetString(pxeRoot, "message", sMessage);
119   throw CScraperError(sTitle, sMessage);
120 }
121 
CScraper(const AddonInfoPtr & addonInfo,TYPE addonType)122 CScraper::CScraper(const AddonInfoPtr& addonInfo, TYPE addonType)
123     : CAddon(addonInfo, addonType)
124     , m_fLoaded(false)
125     , m_requiressettings(false)
126     , m_pathContent(CONTENT_NONE)
127 {
128   m_requiressettings = addonInfo->Type(addonType)->GetValue("@requiressettings").asBoolean();
129 
130   CDateTimeSpan persistence;
131   std::string tmp = addonInfo->Type(addonType)->GetValue("@cachepersistence").asString();
132   if (!tmp.empty())
133     m_persistence.SetFromTimeString(tmp);
134 
135   switch (addonType)
136   {
137   case ADDON_SCRAPER_ALBUMS:
138     m_pathContent = CONTENT_ALBUMS;
139     break;
140   case ADDON_SCRAPER_ARTISTS:
141     m_pathContent = CONTENT_ARTISTS;
142     break;
143   case ADDON_SCRAPER_MOVIES:
144     m_pathContent = CONTENT_MOVIES;
145     break;
146   case ADDON_SCRAPER_MUSICVIDEOS:
147     m_pathContent = CONTENT_MUSICVIDEOS;
148     break;
149   case ADDON_SCRAPER_TVSHOWS:
150     m_pathContent = CONTENT_TVSHOWS;
151     break;
152   default:
153     break;
154   }
155 
156   m_isPython = URIUtils::GetExtension(addonInfo->Type(addonType)->LibPath()) == ".py";
157 }
158 
Supports(const CONTENT_TYPE & content) const159 bool CScraper::Supports(const CONTENT_TYPE &content) const
160 {
161   return Type() == ScraperTypeFromContent(content);
162 }
163 
SetPathSettings(CONTENT_TYPE content,const std::string & xml)164 bool CScraper::SetPathSettings(CONTENT_TYPE content, const std::string &xml)
165 {
166   m_pathContent = content;
167   if (!LoadSettings(false, false))
168     return false;
169 
170   if (xml.empty())
171     return true;
172 
173   CXBMCTinyXML doc;
174   doc.Parse(xml);
175   return SettingsFromXML(doc);
176 }
177 
GetPathSettings()178 std::string CScraper::GetPathSettings()
179 {
180   if (!LoadSettings(false))
181     return "";
182 
183   std::stringstream stream;
184   CXBMCTinyXML doc;
185   SettingsToXML(doc);
186   if (doc.RootElement())
187     stream << *doc.RootElement();
188 
189   return stream.str();
190 }
191 
ClearCache()192 void CScraper::ClearCache()
193 {
194   std::string strCachePath = URIUtils::AddFileToFolder(CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_cachePath, "scrapers");
195 
196   // create scraper cache dir if needed
197   if (!CDirectory::Exists(strCachePath))
198     CDirectory::Create(strCachePath);
199 
200   strCachePath = URIUtils::AddFileToFolder(strCachePath, ID());
201   URIUtils::AddSlashAtEnd(strCachePath);
202 
203   if (CDirectory::Exists(strCachePath))
204   {
205     CFileItemList items;
206     CDirectory::GetDirectory(strCachePath, items, "", DIR_FLAG_DEFAULTS);
207     for (int i = 0; i < items.Size(); ++i)
208     {
209       // wipe cache
210       if (items[i]->m_dateTime + m_persistence <= CDateTime::GetCurrentDateTime())
211         CFile::Delete(items[i]->GetDynPath());
212     }
213   }
214   else
215     CDirectory::Create(strCachePath);
216 }
217 
218 // returns a vector of strings: the first is the XML output by the function; the rest
219 // is XML output by chained functions, possibly recursively
220 // the CCurlFile object is passed in so that URL fetches can be canceled from other threads
221 // throws CScraperError abort on internal failures (e.g., parse errors)
Run(const std::string & function,const CScraperUrl & scrURL,CCurlFile & http,const std::vector<std::string> * extras)222 std::vector<std::string> CScraper::Run(const std::string &function,
223                                        const CScraperUrl &scrURL,
224                                        CCurlFile &http,
225                                        const std::vector<std::string> *extras)
226 {
227   if (!Load())
228     throw CScraperError();
229 
230   std::string strXML = InternalRun(function, scrURL, http, extras);
231   if (strXML.empty())
232   {
233     if (function != "NfoUrl" && function != "ResolveIDToUrl")
234       CLog::Log(LOGERROR, "%s: Unable to parse web site", __FUNCTION__);
235     throw CScraperError();
236   }
237 
238   CLog::Log(LOGDEBUG, "scraper: %s returned %s", function.c_str(), strXML.c_str());
239 
240   CXBMCTinyXML doc;
241   /* all data was converted to UTF-8 before being processed by scraper */
242   doc.Parse(strXML, TIXML_ENCODING_UTF8);
243   if (!doc.RootElement())
244   {
245     CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
246     throw CScraperError();
247   }
248 
249   std::vector<std::string> result;
250   result.push_back(strXML);
251   TiXmlElement *xchain = doc.RootElement()->FirstChildElement();
252   // skip children of the root element until <url> or <chain>
253   while (xchain && strcmp(xchain->Value(), "url") && strcmp(xchain->Value(), "chain"))
254     xchain = xchain->NextSiblingElement();
255   while (xchain)
256   {
257     // <chain|url function="...">param</>
258     const char *szFunction = xchain->Attribute("function");
259     if (szFunction)
260     {
261       CScraperUrl scrURL2;
262       std::vector<std::string> extras;
263       // for <chain>, pass the contained text as a parameter; for <url>, as URL content
264       if (strcmp(xchain->Value(), "chain") == 0)
265       {
266         if (xchain->FirstChild())
267           extras.emplace_back(xchain->FirstChild()->Value());
268       }
269       else
270         scrURL2.ParseAndAppendUrl(xchain);
271       // Fix for empty chains. $$1 would still contain the
272       // previous value as there is no child of the xml node.
273       // since $$1 will always either contain the data from an
274       // url or the parameters to a chain, we can safely clear it here
275       // to fix this issue
276       m_parser.m_param[0].clear();
277       std::vector<std::string> result2 = RunNoThrow(szFunction, scrURL2, http, &extras);
278       result.insert(result.end(), result2.begin(), result2.end());
279     }
280     xchain = xchain->NextSiblingElement();
281     // continue to skip past non-<url> or <chain> elements
282     while (xchain && strcmp(xchain->Value(), "url") && strcmp(xchain->Value(), "chain"))
283       xchain = xchain->NextSiblingElement();
284   }
285 
286   return result;
287 }
288 
289 // just like Run, but returns an empty list instead of throwing in case of error
290 // don't use in new code; errors should be handled appropriately
RunNoThrow(const std::string & function,const CScraperUrl & url,XFILE::CCurlFile & http,const std::vector<std::string> * extras)291 std::vector<std::string> CScraper::RunNoThrow(const std::string &function,
292                                               const CScraperUrl &url,
293                                               XFILE::CCurlFile &http,
294                                               const std::vector<std::string> *extras)
295 {
296   std::vector<std::string> vcs;
297   try
298   {
299     vcs = Run(function, url, http, extras);
300   }
301   catch (const CScraperError &sce)
302   {
303     assert(sce.FAborted()); // the only kind we should get
304   }
305   return vcs;
306 }
307 
InternalRun(const std::string & function,const CScraperUrl & scrURL,CCurlFile & http,const std::vector<std::string> * extras)308 std::string CScraper::InternalRun(const std::string &function,
309                                   const CScraperUrl &scrURL,
310                                   CCurlFile &http,
311                                   const std::vector<std::string> *extras)
312 {
313   // walk the list of input URLs and fetch each into parser parameters
314   const auto& urls = scrURL.GetUrls();
315   size_t i;
316   for (i = 0; i < urls.size(); ++i)
317   {
318     if (!CScraperUrl::Get(urls[i], m_parser.m_param[i], http, ID()) ||
319         m_parser.m_param[i].empty())
320       return "";
321   }
322   // put the 'extra' parameterts into the parser parameter list too
323   if (extras)
324   {
325     for (size_t j = 0; j < extras->size(); ++j)
326       m_parser.m_param[j + i] = (*extras)[j];
327   }
328 
329   return m_parser.Parse(function, this);
330 }
331 
GetPathSettingsAsJSON()332 std::string CScraper::GetPathSettingsAsJSON()
333 {
334   static const std::string EmptyPathSettings = "{}";
335 
336   if (!LoadSettings(false))
337     return EmptyPathSettings;
338 
339   CSettingsValueFlatJsonSerializer jsonSerializer;
340   auto json = jsonSerializer.SerializeValues(GetSettings()->GetSettingsManager());
341   if (json.empty())
342     return EmptyPathSettings;
343 
344   return json;
345 }
346 
Load()347 bool CScraper::Load()
348 {
349   if (m_fLoaded || m_isPython)
350     return true;
351 
352   bool result = m_parser.Load(LibPath());
353   if (result)
354   {
355     //! @todo this routine assumes that deps are a single level, and assumes the dep is installed.
356     //!       1. Does it make sense to have recursive dependencies?
357     //!       2. Should we be checking the dep versions or do we assume it is ok?
358     auto deps = GetDependencies();
359     auto itr = deps.begin();
360     while (itr != deps.end())
361     {
362       if (itr->id == "xbmc.metadata")
363       {
364         ++itr;
365         continue;
366       }
367       AddonPtr dep;
368 
369       bool bOptional = itr->optional;
370 
371       if (CServiceBroker::GetAddonMgr().GetAddon((*itr).id, dep, ADDON::ADDON_UNKNOWN,
372                                                  ADDON::OnlyEnabled::YES))
373       {
374         CXBMCTinyXML doc;
375         if (dep->Type() == ADDON_SCRAPER_LIBRARY && doc.LoadFile(dep->LibPath()))
376           m_parser.AddDocument(&doc);
377       }
378       else
379       {
380         if (!bOptional)
381         {
382           result = false;
383           break;
384         }
385       }
386       ++itr;
387     }
388   }
389 
390   if (!result)
391     CLog::Log(LOGWARNING, "failed to load scraper XML from %s", LibPath().c_str());
392   return m_fLoaded = result;
393 }
394 
IsInUse() const395 bool CScraper::IsInUse() const
396 {
397   if (Supports(CONTENT_ALBUMS) || Supports(CONTENT_ARTISTS))
398   { // music scraper
399     CMusicDatabase db;
400     if (db.Open() && db.ScraperInUse(ID()))
401       return true;
402   }
403   else
404   { // video scraper
405     CVideoDatabase db;
406     if (db.Open() && db.ScraperInUse(ID()))
407       return true;
408   }
409   return false;
410 }
411 
IsNoop()412 bool CScraper::IsNoop()
413 {
414   if (!Load())
415     throw CScraperError();
416 
417   return !m_isPython && m_parser.IsNoop();
418 }
419 
420 // pass in contents of .nfo file; returns URL (possibly empty if none found)
421 // and may populate strId, or throws CScraperError on error
NfoUrl(const std::string & sNfoContent)422 CScraperUrl CScraper::NfoUrl(const std::string &sNfoContent)
423 {
424   CScraperUrl scurlRet;
425 
426   if (IsNoop())
427     return scurlRet;
428 
429   if (m_isPython)
430   {
431     std::stringstream str;
432     str << "plugin://" << ID() << "?action=NfoUrl&nfo=" << CURL::Encode(sNfoContent)
433         << "&pathSettings=" << CURL::Encode(GetPathSettingsAsJSON());
434 
435     CFileItemList items;
436     if (!XFILE::CDirectory::GetDirectory(str.str(), items, "", DIR_FLAG_DEFAULTS))
437       return scurlRet;
438 
439     if (items.Size() == 0)
440       return scurlRet;
441     if (items.Size() > 1)
442       CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
443 
444     CScraperUrl::SUrlEntry surl;
445     surl.m_type = CScraperUrl::UrlType::General;
446     surl.m_url = items[0]->GetDynPath();
447     scurlRet.AppendUrl(surl);
448     return scurlRet;
449   }
450 
451   // scraper function takes contents of .nfo file, returns XML (see below)
452   std::vector<std::string> vcsIn;
453   vcsIn.push_back(sNfoContent);
454   CScraperUrl scurl;
455   CCurlFile fcurl;
456   std::vector<std::string> vcsOut = Run("NfoUrl", scurl, fcurl, &vcsIn);
457   if (vcsOut.empty() || vcsOut[0].empty())
458     return scurlRet;
459   if (vcsOut.size() > 1)
460     CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
461 
462   // parse returned XML: either <error> element on error, blank on failure,
463   // or <url>...</url> or <url>...</url><id>...</id> on success
464   for (size_t i = 0; i < vcsOut.size(); ++i)
465   {
466     CXBMCTinyXML doc;
467     doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8);
468     CheckScraperError(doc.RootElement());
469 
470     if (doc.RootElement())
471     {
472       /*
473        NOTE: Scrapers might return invalid xml with some loose
474        elements (eg. '<url>http://some.url</url><id>123</id>').
475        Since XMLUtils::GetString() is assuming well formed xml
476        with start and end-tags we're not able to use it.
477        Check for the desired Elements instead.
478       */
479       TiXmlElement *pxeUrl = NULL;
480       TiXmlElement *pId = NULL;
481       if (!strcmp(doc.RootElement()->Value(), "details"))
482       {
483         pxeUrl = doc.RootElement()->FirstChildElement("url");
484         pId = doc.RootElement()->FirstChildElement("id");
485       }
486       else
487       {
488         pId = doc.FirstChildElement("id");
489         pxeUrl = doc.FirstChildElement("url");
490       }
491       if (pId && pId->FirstChild())
492         scurlRet.SetId(pId->FirstChild()->ValueStr());
493 
494       if (pxeUrl && pxeUrl->Attribute("function"))
495         continue;
496 
497       if (pxeUrl)
498         scurlRet.ParseAndAppendUrl(pxeUrl);
499       else if (!strcmp(doc.RootElement()->Value(), "url"))
500         scurlRet.ParseAndAppendUrl(doc.RootElement());
501       else
502         continue;
503       break;
504     }
505   }
506   return scurlRet;
507 }
508 
ResolveIDToUrl(const std::string & externalID)509 CScraperUrl CScraper::ResolveIDToUrl(const std::string &externalID)
510 {
511   CScraperUrl scurlRet;
512 
513   if (m_isPython)
514   {
515     std::stringstream str;
516     str << "plugin://" << ID() << "?action=resolveid&key=" << CURL::Encode(externalID)
517         << "&pathSettings=" << CURL::Encode(GetPathSettingsAsJSON());
518 
519     CFileItem item("resolve me", false);
520 
521     if (XFILE::CPluginDirectory::GetPluginResult(str.str(), item, false))
522       scurlRet.ParseFromData(item.GetDynPath());
523 
524     return scurlRet;
525   }
526 
527   // scraper function takes an external ID, returns XML (see below)
528   std::vector<std::string> vcsIn;
529   vcsIn.push_back(externalID);
530   CScraperUrl scurl;
531   CCurlFile fcurl;
532   std::vector<std::string> vcsOut = Run("ResolveIDToUrl", scurl, fcurl, &vcsIn);
533   if (vcsOut.empty() || vcsOut[0].empty())
534     return scurlRet;
535   if (vcsOut.size() > 1)
536     CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
537 
538   // parse returned XML: either <error> element on error, blank on failure,
539   // or <url>...</url> or <url>...</url><id>...</id> on success
540   for (size_t i = 0; i < vcsOut.size(); ++i)
541   {
542     CXBMCTinyXML doc;
543     doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8);
544     CheckScraperError(doc.RootElement());
545 
546     if (doc.RootElement())
547     {
548       /*
549        NOTE: Scrapers might return invalid xml with some loose
550        elements (eg. '<url>http://some.url</url><id>123</id>').
551        Since XMLUtils::GetString() is assuming well formed xml
552        with start and end-tags we're not able to use it.
553        Check for the desired Elements instead.
554        */
555       TiXmlElement *pxeUrl = NULL;
556       TiXmlElement *pId = NULL;
557       if (!strcmp(doc.RootElement()->Value(), "details"))
558       {
559         pxeUrl = doc.RootElement()->FirstChildElement("url");
560         pId = doc.RootElement()->FirstChildElement("id");
561       }
562       else
563       {
564         pId = doc.FirstChildElement("id");
565         pxeUrl = doc.FirstChildElement("url");
566       }
567       if (pId && pId->FirstChild())
568         scurlRet.SetId(pId->FirstChild()->ValueStr());
569 
570       if (pxeUrl && pxeUrl->Attribute("function"))
571         continue;
572 
573       if (pxeUrl)
574         scurlRet.ParseAndAppendUrl(pxeUrl);
575       else if (!strcmp(doc.RootElement()->Value(), "url"))
576         scurlRet.ParseAndAppendUrl(doc.RootElement());
577       else
578         continue;
579       break;
580     }
581   }
582   return scurlRet;
583 }
584 
RelevanceSortFunction(const CScraperUrl & left,const CScraperUrl & right)585 static bool RelevanceSortFunction(const CScraperUrl &left, const CScraperUrl &right)
586 {
587   return left.GetRelevance() > right.GetRelevance();
588 }
589 
590 template<class T>
591 static T FromFileItem(const CFileItem &item);
592 
593 template<>
FromFileItem(const CFileItem & item)594 CScraperUrl FromFileItem<CScraperUrl>(const CFileItem &item)
595 {
596   CScraperUrl url;
597 
598   url.SetTitle(item.GetLabel());
599   if (item.HasProperty("relevance"))
600     url.SetRelevance(item.GetProperty("relevance").asDouble());
601   CScraperUrl::SUrlEntry surl;
602   surl.m_type = CScraperUrl::UrlType::General;
603   surl.m_url = item.GetDynPath();
604   url.AppendUrl(surl);
605 
606   return url;
607 }
608 
609 template<>
FromFileItem(const CFileItem & item)610 CMusicAlbumInfo FromFileItem<CMusicAlbumInfo>(const CFileItem &item)
611 {
612   CMusicAlbumInfo info;
613   const std::string& sTitle = item.GetLabel();
614   std::string sArtist = item.GetProperty("album.artist").asString();
615   std::string sAlbumName;
616   if (!sArtist.empty())
617     sAlbumName = StringUtils::Format("%s - %s", sArtist.c_str(), sTitle.c_str());
618   else
619     sAlbumName = sTitle;
620 
621   CScraperUrl url;
622   url.AppendUrl(CScraperUrl::SUrlEntry(item.GetDynPath()));
623 
624   info = CMusicAlbumInfo(sTitle, sArtist, sAlbumName, url);
625   if (item.HasProperty("relevance"))
626     info.SetRelevance(item.GetProperty("relevance").asFloat());
627 
628   if (item.HasProperty("album.releasestatus"))
629     info.GetAlbum().strReleaseStatus = item.GetProperty("album.releasestatus").asString();
630   if (item.HasProperty("album.type"))
631     info.GetAlbum().strType = item.GetProperty("album.type").asString();
632   if (item.HasProperty("album.year"))
633     info.GetAlbum().strReleaseDate = item.GetProperty("album.year").asString();
634   if (item.HasProperty("album.label"))
635     info.GetAlbum().strLabel = item.GetProperty("album.label").asString();
636   info.GetAlbum().art = item.GetArt();
637 
638   return info;
639 }
640 
641 template<>
FromFileItem(const CFileItem & item)642 CMusicArtistInfo FromFileItem<CMusicArtistInfo>(const CFileItem &item)
643 {
644   CMusicArtistInfo info;
645   const std::string& sTitle = item.GetLabel();
646 
647   CScraperUrl url;
648   url.AppendUrl(CScraperUrl::SUrlEntry(item.GetDynPath()));
649 
650   info = CMusicArtistInfo(sTitle, url);
651   if (item.HasProperty("artist.genre"))
652     info.GetArtist().genre = StringUtils::Split(item.GetProperty("artist.genre").asString(),
653                                                 CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_musicItemSeparator);
654   if (item.HasProperty("artist.disambiguation"))
655     info.GetArtist().strDisambiguation = item.GetProperty("artist.disambiguation").asString();
656   if (item.HasProperty("artist.type"))
657     info.GetArtist().strType = item.GetProperty("artist.type").asString();
658   if (item.HasProperty("artist.gender"))
659     info.GetArtist().strGender = item.GetProperty("artist.gender").asString();
660   if (item.HasProperty("artist.born"))
661     info.GetArtist().strBorn = item.GetProperty("artist.born").asString();
662 
663   return info;
664 }
665 
666 template<class T>
PythonFind(const std::string & ID,const std::map<std::string,std::string> & additionals)667 static std::vector<T> PythonFind(const std::string &ID,
668                                  const std::map<std::string, std::string> &additionals)
669 {
670   std::vector<T> result;
671   CFileItemList items;
672   std::stringstream str;
673   str << "plugin://" << ID << "?action=find";
674   for (const auto &it : additionals)
675     str << "&" << it.first << "=" << CURL::Encode(it.second);
676 
677   if (XFILE::CDirectory::GetDirectory(str.str(), items, "", DIR_FLAG_DEFAULTS))
678   {
679     for (const auto& it : items)
680       result.emplace_back(std::move(FromFileItem<T>(*it)));
681   }
682 
683   return result;
684 }
685 
FromString(const CFileItem & item,const std::string & key)686 static std::string FromString(const CFileItem &item, const std::string &key)
687 {
688   return item.GetProperty(key).asString();
689 }
690 
FromArray(const CFileItem & item,const std::string & key,int sep)691 static std::vector<std::string> FromArray(const CFileItem &item, const std::string &key, int sep)
692 {
693   return StringUtils::Split(item.GetProperty(key).asString(),
694                             sep ? CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_videoItemSeparator
695                                 : CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_musicItemSeparator);
696 }
697 
ParseThumbs(CScraperUrl & scurl,const CFileItem & item,int nThumbs,const std::string & tag)698 static void ParseThumbs(CScraperUrl &scurl,
699                         const CFileItem &item,
700                         int nThumbs,
701                         const std::string &tag)
702 {
703   for (int i = 0; i < nThumbs; ++i)
704   {
705     std::stringstream prefix;
706     prefix << tag << i + 1;
707     std::string url = FromString(item, prefix.str() + ".url");
708     std::string aspect = FromString(item, prefix.str() + ".aspect");
709     std::string preview = FromString(item, prefix.str() + ".preview");
710     scurl.AddParsedUrl(url, aspect, preview);
711   }
712 }
713 
ParseFanart(const CFileItem & item,int nFanart,const std::string & tag)714 static std::string ParseFanart(const CFileItem &item, int nFanart, const std::string &tag)
715 {
716   std::string result;
717   TiXmlElement fanart("fanart");
718   for (int i = 0; i < nFanart; ++i)
719   {
720     std::stringstream prefix;
721     prefix << tag << i + 1;
722     std::string url = FromString(item, prefix.str() + ".url");
723     std::string preview = FromString(item, prefix.str() + ".preview");
724     TiXmlElement thumb("thumb");
725     thumb.SetAttribute("preview", preview);
726     TiXmlText text(url);
727     thumb.InsertEndChild(text);
728     fanart.InsertEndChild(thumb);
729   }
730   result << fanart;
731 
732   return result;
733 }
734 
735 template<class T>
736 static void DetailsFromFileItem(const CFileItem &, T &);
737 
738 template<>
DetailsFromFileItem(const CFileItem & item,CAlbum & album)739 void DetailsFromFileItem<CAlbum>(const CFileItem &item, CAlbum &album)
740 {
741   album.strAlbum = item.GetLabel();
742   album.strMusicBrainzAlbumID = FromString(item, "album.musicbrainzid");
743   album.strReleaseGroupMBID = FromString(item, "album.releasegroupid");
744 
745   int nArtists = item.GetProperty("album.artists").asInteger32();
746   album.artistCredits.reserve(nArtists);
747   for (int i = 0; i < nArtists; ++i)
748   {
749     std::stringstream prefix;
750     prefix << "album.artist" << i + 1;
751     CArtistCredit artistCredit;
752     artistCredit.SetArtist(FromString(item, prefix.str() + ".name"));
753     artistCredit.SetMusicBrainzArtistID(FromString(item, prefix.str() + ".musicbrainzid"));
754     album.artistCredits.push_back(artistCredit);
755   }
756 
757   album.strArtistDesc = FromString(item, "album.artist_description");
758   album.genre = FromArray(item, "album.genre", 0);
759   album.styles = FromArray(item, "album.styles", 0);
760   album.moods = FromArray(item, "album.moods", 0);
761   album.themes = FromArray(item, "album.themes", 0);
762   album.bCompilation = item.GetProperty("album.compilation").asBoolean();
763   album.strReview = FromString(item, "album.review");
764   album.strReleaseDate = FromString(item, "album.releasedate");
765   if (album.strReleaseDate.empty())
766     album.strReleaseDate = FromString(item, "album.year");
767   album.strOrigReleaseDate = FromString(item, "album.originaldate");
768   album.strLabel = FromString(item, "album.label");
769   album.strType = FromString(item, "album.type");
770   album.strReleaseStatus = FromString(item, "album.releasestatus");
771   album.fRating = item.GetProperty("album.rating").asFloat();
772   album.iUserrating = item.GetProperty("album.user_rating").asInteger32();
773   album.iVotes = item.GetProperty("album.votes").asInteger32();
774 
775   /* Scrapers fetch a list of possible art but do not set the current images used because art
776      selection depends on other preferences so is handled by CMusicInfoScanner
777      album.art = item.GetArt();
778   */
779 
780   int nThumbs = item.GetProperty("album.thumbs").asInteger32();
781   ParseThumbs(album.thumbURL, item, nThumbs, "album.thumb");
782 }
783 
784 template<>
DetailsFromFileItem(const CFileItem & item,CArtist & artist)785 void DetailsFromFileItem<CArtist>(const CFileItem &item, CArtist &artist)
786 {
787   artist.strArtist = item.GetLabel();
788   artist.strMusicBrainzArtistID = FromString(item, "artist.musicbrainzid");
789   artist.strDisambiguation = FromString(item, "artist.disambiguation");
790   artist.strType = FromString(item, "artist.type");
791   artist.strGender = FromString(item, "artist.gender");
792   artist.genre = FromArray(item, "artist.genre", 0);
793   artist.styles = FromArray(item, "artist.styles", 0);
794   artist.moods = FromArray(item, "artist.moods", 0);
795   artist.yearsActive = FromArray(item, "artist.years_active", 0);
796   artist.instruments = FromArray(item, "artist.instruments", 0);
797   artist.strBorn = FromString(item, "artist.born");
798   artist.strFormed = FromString(item, "artist.formed");
799   artist.strBiography = FromString(item, "artist.biography");
800   artist.strDied = FromString(item, "artist.died");
801   artist.strDisbanded = FromString(item, "artist.disbanded");
802 
803   /* Scrapers fetch a list of possible art but do not set the current images used because art
804      selection depends on other preferences so is handled by CMusicInfoScanner
805      artist.art = item.GetArt();
806   */
807 
808   int nAlbums = item.GetProperty("artist.albums").asInteger32();
809   artist.discography.reserve(nAlbums);
810   for (int i = 0; i < nAlbums; ++i)
811   {
812     std::stringstream prefix;
813     prefix << "artist.album" << i + 1;
814     CDiscoAlbum discoAlbum;
815     discoAlbum.strAlbum = FromString(item, prefix.str() + ".title");
816     discoAlbum.strYear = FromString(item, prefix.str() + ".year");
817     discoAlbum.strReleaseGroupMBID = FromString(item, prefix.str() + ".musicbrainzreleasegroupid");
818     artist.discography.emplace_back(discoAlbum);
819   }
820 
821   int nThumbs = item.GetProperty("artist.thumbs").asInteger32();
822   ParseThumbs(artist.thumbURL, item, nThumbs, "artist.thumb");
823 
824   // Support deprecated fanarts property, add to artist.thumbURL
825   int nFanart = item.GetProperty("artist.fanarts").asInteger32();
826   if (nFanart > 0)
827   {
828     CFanart fanart;
829     fanart.m_xml = ParseFanart(item, nFanart, "artist.fanart");
830     fanart.Unpack();
831     for (unsigned int i = 0; i < fanart.GetNumFanarts(); i++)
832       artist.thumbURL.AddParsedUrl(fanart.GetImageURL(i), "fanart", fanart.GetPreviewURL(i));
833   }
834 }
835 
836 template<>
DetailsFromFileItem(const CFileItem & item,CVideoInfoTag & tag)837 void DetailsFromFileItem<CVideoInfoTag>(const CFileItem &item, CVideoInfoTag &tag)
838 {
839   if (item.HasVideoInfoTag())
840     tag = *item.GetVideoInfoTag();
841 }
842 
843 template<class T>
PythonDetails(const std::string & ID,const std::string & key,const std::string & url,const std::string & action,const std::string & pathSettings,T & result)844 static bool PythonDetails(const std::string &ID,
845                           const std::string &key,
846                           const std::string &url,
847                           const std::string &action,
848                           const std::string &pathSettings,
849                           T &result)
850 {
851   std::stringstream str;
852   str << "plugin://" << ID << "?action=" << action << "&" << key << "=" << CURL::Encode(url);
853   str << "&pathSettings=" << CURL::Encode(pathSettings);
854 
855   CFileItem item(url, false);
856 
857   if (!XFILE::CPluginDirectory::GetPluginResult(str.str(), item, false))
858     return false;
859 
860   DetailsFromFileItem(item, result);
861   return true;
862 }
863 
864 // fetch list of matching movies sorted by relevance (may be empty);
865 // throws CScraperError on error; first called with fFirst set, then unset if first try fails
FindMovie(XFILE::CCurlFile & fcurl,const std::string & movieTitle,int movieYear,bool fFirst)866 std::vector<CScraperUrl> CScraper::FindMovie(XFILE::CCurlFile &fcurl,
867                                              const std::string &movieTitle, int movieYear,
868                                              bool fFirst)
869 {
870   // prepare parameters for URL creation
871   std::string sTitle, sYear;
872   if (movieYear < 0)
873   {
874     std::string sTitleYear;
875     CUtil::CleanString(movieTitle, sTitle, sTitleYear, sYear, true /*fRemoveExt*/, fFirst);
876   }
877   else
878   {
879     sTitle = movieTitle;
880     sYear = std::to_string( movieYear );
881   }
882 
883   CLog::Log(LOGDEBUG,
884             "%s: Searching for '%s' using %s scraper "
885             "(path: '%s', content: '%s', version: '%s')",
886             __FUNCTION__, sTitle.c_str(), Name().c_str(), Path().c_str(),
887             ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
888 
889   std::vector<CScraperUrl> vcscurl;
890   if (IsNoop())
891     return vcscurl;
892 
893   if (!fFirst)
894     StringUtils::Replace(sTitle, '-', ' ');
895 
896   if (m_isPython)
897   {
898     std::map<std::string, std::string> additionals{{"title", sTitle}};
899     if (!sYear.empty())
900       additionals.insert({"year", sYear});
901     additionals.emplace("pathSettings", GetPathSettingsAsJSON());
902     return PythonFind<CScraperUrl>(ID(), additionals);
903   }
904 
905   std::vector<std::string> vcsIn(1);
906   g_charsetConverter.utf8To(SearchStringEncoding(), sTitle, vcsIn[0]);
907   vcsIn[0] = CURL::Encode(vcsIn[0]);
908   if (fFirst && !sYear.empty())
909     vcsIn.push_back(sYear);
910 
911   // request a search URL from the title/filename/etc.
912   CScraperUrl scurl;
913   std::vector<std::string> vcsOut = Run("CreateSearchUrl", scurl, fcurl, &vcsIn);
914   if (vcsOut.empty())
915   {
916     CLog::Log(LOGDEBUG, "%s: CreateSearchUrl failed", __FUNCTION__);
917     throw CScraperError();
918   }
919   scurl.ParseFromData(vcsOut[0]);
920 
921   // do the search, and parse the result into a list
922   vcsIn.clear();
923   vcsIn.push_back(scurl.GetFirstThumbUrl());
924   vcsOut = Run("GetSearchResults", scurl, fcurl, &vcsIn);
925 
926   bool fSort(true);
927   std::set<std::string> stsDupeCheck;
928   bool fResults(false);
929   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
930   {
931     CXBMCTinyXML doc;
932     doc.Parse(*i, TIXML_ENCODING_UTF8);
933     if (!doc.RootElement())
934     {
935       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
936       continue; // might have more valid results later
937     }
938 
939     CheckScraperError(doc.RootElement());
940 
941     TiXmlHandle xhDoc(&doc);
942     TiXmlHandle xhResults = xhDoc.FirstChild("results");
943     if (!xhResults.Element())
944       continue;
945     fResults = true; // even if empty
946 
947     // we need to sort if returned results don't specify 'sorted="yes"'
948     if (fSort)
949     {
950       const char *sorted = xhResults.Element()->Attribute("sorted");
951       if (sorted != NULL)
952         fSort = !StringUtils::EqualsNoCase(sorted, "yes");
953     }
954 
955     for (TiXmlElement *pxeMovie = xhResults.FirstChild("entity").Element(); pxeMovie;
956          pxeMovie = pxeMovie->NextSiblingElement())
957     {
958       TiXmlNode *pxnTitle = pxeMovie->FirstChild("title");
959       TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
960       if (pxnTitle && pxnTitle->FirstChild() && pxeLink && pxeLink->FirstChild())
961       {
962         CScraperUrl scurlMovie;
963         auto title = pxnTitle->FirstChild()->ValueStr();
964         std::string id;
965         if (XMLUtils::GetString(pxeMovie, "id", id))
966           scurlMovie.SetId(id);
967 
968         for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
969           scurlMovie.ParseAndAppendUrl(pxeLink);
970 
971         // calculate the relevance of this hit
972         std::string sCompareTitle = scurlMovie.GetTitle();
973         StringUtils::ToLower(sCompareTitle);
974         std::string sMatchTitle = sTitle;
975         StringUtils::ToLower(sMatchTitle);
976 
977         /*
978          * Identify the best match by performing a fuzzy string compare on the search term and
979          * the result. Additionally, use the year (if available) to further refine the best match.
980          * An exact match scores 1, a match off by a year scores 0.5 (release dates can vary between
981          * countries), otherwise it scores 0.
982          */
983         std::string sCompareYear;
984         XMLUtils::GetString(pxeMovie, "year", sCompareYear);
985 
986         double yearScore = 0;
987         if (!sYear.empty() && !sCompareYear.empty())
988           yearScore =
989               std::max(0.0, 1 - 0.5 * abs(atoi(sYear.c_str()) - atoi(sCompareYear.c_str())));
990 
991         scurlMovie.SetRelevance(fstrcmp(sMatchTitle.c_str(), sCompareTitle.c_str()) + yearScore);
992 
993         // reconstruct a title for the user
994         if (!sCompareYear.empty())
995           title += StringUtils::Format(" (%s)", sCompareYear.c_str());
996 
997         std::string sLanguage;
998         if (XMLUtils::GetString(pxeMovie, "language", sLanguage) && !sLanguage.empty())
999           title += StringUtils::Format(" (%s)", sLanguage.c_str());
1000 
1001         // filter for dupes from naughty scrapers
1002         if (stsDupeCheck.insert(scurlMovie.GetFirstThumbUrl() + " " + title).second)
1003         {
1004           scurlMovie.SetTitle(title);
1005           vcscurl.push_back(scurlMovie);
1006         }
1007       }
1008     }
1009   }
1010 
1011   if (!fResults)
1012     throw CScraperError(); // scraper aborted
1013 
1014   if (fSort)
1015     std::stable_sort(vcscurl.begin(), vcscurl.end(), RelevanceSortFunction);
1016 
1017   return vcscurl;
1018 }
1019 
1020 // find album by artist, using fcurl for web fetches
1021 // returns a list of albums (empty if no match or failure)
FindAlbum(CCurlFile & fcurl,const std::string & sAlbum,const std::string & sArtist)1022 std::vector<CMusicAlbumInfo> CScraper::FindAlbum(CCurlFile &fcurl,
1023                                                  const std::string &sAlbum,
1024                                                  const std::string &sArtist)
1025 {
1026   CLog::Log(LOGDEBUG,
1027             "%s: Searching for '%s - %s' using %s scraper "
1028             "(path: '%s', content: '%s', version: '%s')",
1029             __FUNCTION__, sArtist.c_str(), sAlbum.c_str(), Name().c_str(), Path().c_str(),
1030             ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1031 
1032   std::vector<CMusicAlbumInfo> vcali;
1033   if (IsNoop())
1034     return vcali;
1035 
1036   if (m_isPython)
1037     return PythonFind<CMusicAlbumInfo>(ID(),
1038       {{"title", sAlbum}, {"artist", sArtist}, {"pathSettings", GetPathSettingsAsJSON()}});
1039 
1040   // scraper function is given the album and artist as parameters and
1041   // returns an XML <url> element parseable by CScraperUrl
1042   std::vector<std::string> extras(2);
1043   g_charsetConverter.utf8To(SearchStringEncoding(), sAlbum, extras[0]);
1044   g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[1]);
1045   extras[0] = CURL::Encode(extras[0]);
1046   extras[1] = CURL::Encode(extras[1]);
1047   CScraperUrl scurl;
1048   std::vector<std::string> vcsOut = RunNoThrow("CreateAlbumSearchUrl", scurl, fcurl, &extras);
1049   if (vcsOut.size() > 1)
1050     CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
1051 
1052   if (vcsOut.empty() || vcsOut[0].empty())
1053     return vcali;
1054   scurl.ParseFromData(vcsOut[0]);
1055 
1056   // the next function is passed the contents of the returned URL, and returns
1057   // an empty string on failure; on success, returns XML matches in the form:
1058   // <results>
1059   //  <entity>
1060   //   <title>...</title>
1061   //   <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
1062   //   <artist>...</artist>
1063   //   <year>...</year>
1064   //   <relevance [scale="..."]>...</relevance> (scale defaults to 1; score is divided by it)
1065   //  </entity>
1066   //  ...
1067   // </results>
1068   vcsOut = RunNoThrow("GetAlbumSearchResults", scurl, fcurl);
1069 
1070   // parse the returned XML into a vector of album objects
1071   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1072   {
1073     CXBMCTinyXML doc;
1074     doc.Parse(*i, TIXML_ENCODING_UTF8);
1075     TiXmlHandle xhDoc(&doc);
1076 
1077     for (TiXmlElement *pxeAlbum = xhDoc.FirstChild("results").FirstChild("entity").Element();
1078          pxeAlbum; pxeAlbum = pxeAlbum->NextSiblingElement())
1079     {
1080       std::string sTitle;
1081       if (XMLUtils::GetString(pxeAlbum, "title", sTitle) && !sTitle.empty())
1082       {
1083         std::string sArtist;
1084         std::string sAlbumName;
1085         if (XMLUtils::GetString(pxeAlbum, "artist", sArtist) && !sArtist.empty())
1086           sAlbumName = StringUtils::Format("%s - %s", sArtist.c_str(), sTitle.c_str());
1087         else
1088           sAlbumName = sTitle;
1089 
1090         std::string sYear;
1091         if (XMLUtils::GetString(pxeAlbum, "year", sYear) && !sYear.empty())
1092           sAlbumName = StringUtils::Format("%s (%s)", sAlbumName.c_str(), sYear.c_str());
1093 
1094         // if no URL is provided, use the URL we got back from CreateAlbumSearchUrl
1095         // (e.g., in case we only got one result back and were sent to the detail page)
1096         TiXmlElement *pxeLink = pxeAlbum->FirstChildElement("url");
1097         CScraperUrl scurlAlbum;
1098         if (!pxeLink)
1099           scurlAlbum.ParseFromData(scurl.GetData());
1100         for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1101           scurlAlbum.ParseAndAppendUrl(pxeLink);
1102 
1103         if (!scurlAlbum.HasUrls())
1104           continue;
1105 
1106         CMusicAlbumInfo ali(sTitle, sArtist, sAlbumName, scurlAlbum);
1107 
1108         TiXmlElement *pxeRel = pxeAlbum->FirstChildElement("relevance");
1109         if (pxeRel && pxeRel->FirstChild())
1110         {
1111           const char *szScale = pxeRel->Attribute("scale");
1112           float flScale = szScale ? float(atof(szScale)) : 1;
1113           ali.SetRelevance(float(atof(pxeRel->FirstChild()->Value())) / flScale);
1114         }
1115 
1116         vcali.push_back(ali);
1117       }
1118     }
1119   }
1120   return vcali;
1121 }
1122 
1123 // find artist, using fcurl for web fetches
1124 // returns a list of artists (empty if no match or failure)
FindArtist(CCurlFile & fcurl,const std::string & sArtist)1125 std::vector<CMusicArtistInfo> CScraper::FindArtist(CCurlFile &fcurl, const std::string &sArtist)
1126 {
1127   CLog::Log(LOGDEBUG,
1128             "%s: Searching for '%s' using %s scraper "
1129             "(file: '%s', content: '%s', version: '%s')",
1130             __FUNCTION__, sArtist.c_str(), Name().c_str(), Path().c_str(),
1131             ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1132 
1133   std::vector<CMusicArtistInfo> vcari;
1134   if (IsNoop())
1135     return vcari;
1136 
1137   if (m_isPython)
1138     return PythonFind<CMusicArtistInfo>(ID(),
1139       {{"artist", sArtist}, {"pathSettings", GetPathSettingsAsJSON()}});
1140 
1141   // scraper function is given the artist as parameter and
1142   // returns an XML <url> element parseable by CScraperUrl
1143   std::vector<std::string> extras(1);
1144   g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[0]);
1145   extras[0] = CURL::Encode(extras[0]);
1146   CScraperUrl scurl;
1147   std::vector<std::string> vcsOut = RunNoThrow("CreateArtistSearchUrl", scurl, fcurl, &extras);
1148 
1149   if (vcsOut.empty() || vcsOut[0].empty())
1150     return vcari;
1151   scurl.ParseFromData(vcsOut[0]);
1152 
1153   // the next function is passed the contents of the returned URL, and returns
1154   // an empty string on failure; on success, returns XML matches in the form:
1155   // <results>
1156   //  <entity>
1157   //   <title>...</title>
1158   //   <year>...</year>
1159   //   <genre>...</genre>
1160   //   <disambiguation>...</disambiguation>
1161   //   <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
1162   //  </entity>
1163   //  ...
1164   // </results>
1165   vcsOut = RunNoThrow("GetArtistSearchResults", scurl, fcurl);
1166 
1167   // parse the returned XML into a vector of artist objects
1168   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1169   {
1170     CXBMCTinyXML doc;
1171     doc.Parse(*i, TIXML_ENCODING_UTF8);
1172     if (!doc.RootElement())
1173     {
1174       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1175       return vcari;
1176     }
1177     TiXmlHandle xhDoc(&doc);
1178     for (TiXmlElement *pxeArtist = xhDoc.FirstChild("results").FirstChild("entity").Element();
1179          pxeArtist; pxeArtist = pxeArtist->NextSiblingElement())
1180     {
1181       TiXmlNode *pxnTitle = pxeArtist->FirstChild("title");
1182       if (pxnTitle && pxnTitle->FirstChild())
1183       {
1184         CScraperUrl scurlArtist;
1185 
1186         TiXmlElement *pxeLink = pxeArtist->FirstChildElement("url");
1187         if (!pxeLink)
1188           scurlArtist.ParseFromData(scurl.GetData());
1189         for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1190           scurlArtist.ParseAndAppendUrl(pxeLink);
1191 
1192         if (!scurlArtist.HasUrls())
1193           continue;
1194 
1195         CMusicArtistInfo ari(pxnTitle->FirstChild()->Value(), scurlArtist);
1196         std::string genre;
1197         XMLUtils::GetString(pxeArtist, "genre", genre);
1198         if (!genre.empty())
1199           ari.GetArtist().genre =
1200               StringUtils::Split(genre, CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_musicItemSeparator);
1201         XMLUtils::GetString(pxeArtist, "disambiguation", ari.GetArtist().strDisambiguation);
1202         XMLUtils::GetString(pxeArtist, "year", ari.GetArtist().strBorn);
1203 
1204         vcari.push_back(ari);
1205       }
1206     }
1207   }
1208   return vcari;
1209 }
1210 
1211 // fetch list of episodes from URL (from video database)
GetEpisodeList(XFILE::CCurlFile & fcurl,const CScraperUrl & scurl)1212 EPISODELIST CScraper::GetEpisodeList(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl)
1213 {
1214   EPISODELIST vcep;
1215   if (!scurl.HasUrls())
1216     return vcep;
1217 
1218   CLog::Log(LOGDEBUG,
1219             "%s: Searching '%s' using %s scraper "
1220             "(file: '%s', content: '%s', version: '%s')",
1221             __FUNCTION__, scurl.GetFirstThumbUrl(), Name().c_str(), Path().c_str(),
1222             ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1223 
1224   if (m_isPython)
1225   {
1226     std::stringstream str;
1227     str << "plugin://" << ID()
1228         << "?action=getepisodelist&url=" << CURL::Encode(scurl.GetFirstThumbUrl())
1229         << "&pathSettings=" << CURL::Encode(GetPathSettingsAsJSON());
1230 
1231     CFileItemList items;
1232     if (!XFILE::CDirectory::GetDirectory(str.str(), items, "", DIR_FLAG_DEFAULTS))
1233       return vcep;
1234 
1235     for (int i = 0; i < items.Size(); ++i)
1236     {
1237       EPISODE ep;
1238       const auto& tag = *items[i]->GetVideoInfoTag();
1239       ep.strTitle = tag.m_strTitle;
1240       ep.iSeason = tag.m_iSeason;
1241       ep.iEpisode = tag.m_iEpisode;
1242       ep.cDate = tag.m_firstAired;
1243       ep.iSubepisode = items[i]->GetProperty("video.sub_episode").asInteger();
1244       CScraperUrl::SUrlEntry surl;
1245       surl.m_type = CScraperUrl::UrlType::General;
1246       surl.m_url = items[i]->GetURL().Get();
1247       ep.cScraperUrl.AppendUrl(surl);
1248       vcep.push_back(ep);
1249     }
1250 
1251     return vcep;
1252   }
1253 
1254   std::vector<std::string> vcsIn;
1255   vcsIn.push_back(scurl.GetFirstThumbUrl());
1256   std::vector<std::string> vcsOut = RunNoThrow("GetEpisodeList", scurl, fcurl, &vcsIn);
1257 
1258   // parse the XML response
1259   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1260   {
1261     CXBMCTinyXML doc;
1262     doc.Parse(*i);
1263     if (!doc.RootElement())
1264     {
1265       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1266       continue;
1267     }
1268 
1269     TiXmlHandle xhDoc(&doc);
1270     for (TiXmlElement *pxeMovie = xhDoc.FirstChild("episodeguide").FirstChild("episode").Element();
1271          pxeMovie; pxeMovie = pxeMovie->NextSiblingElement())
1272     {
1273       EPISODE ep;
1274       TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
1275       std::string strEpNum;
1276       if (pxeLink && XMLUtils::GetInt(pxeMovie, "season", ep.iSeason) &&
1277           XMLUtils::GetString(pxeMovie, "epnum", strEpNum) && !strEpNum.empty())
1278       {
1279         CScraperUrl &scurlEp(ep.cScraperUrl);
1280         size_t dot = strEpNum.find('.');
1281         ep.iEpisode = atoi(strEpNum.c_str());
1282         ep.iSubepisode = (dot != std::string::npos) ? atoi(strEpNum.substr(dot + 1).c_str()) : 0;
1283         std::string title;
1284         if (!XMLUtils::GetString(pxeMovie, "title", title) || title.empty())
1285           title = g_localizeStrings.Get(10005); // Not available
1286         scurlEp.SetTitle(title);
1287         std::string id;
1288         if (XMLUtils::GetString(pxeMovie, "id", id))
1289           scurlEp.SetId(id);
1290 
1291         for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1292           scurlEp.ParseAndAppendUrl(pxeLink);
1293 
1294         // date must be the format of yyyy-mm-dd
1295         ep.cDate.SetValid(false);
1296         std::string sDate;
1297         if (XMLUtils::GetString(pxeMovie, "aired", sDate) && sDate.length() == 10)
1298         {
1299           tm tm;
1300           if (strptime(sDate.c_str(), "%Y-%m-%d", &tm))
1301             ep.cDate.SetDate(1900 + tm.tm_year, tm.tm_mon + 1, tm.tm_mday);
1302         }
1303         vcep.push_back(ep);
1304       }
1305     }
1306   }
1307 
1308   return vcep;
1309 }
1310 
1311 // takes URL; returns true and populates video details on success, false otherwise
GetVideoDetails(XFILE::CCurlFile & fcurl,const CScraperUrl & scurl,bool fMovie,CVideoInfoTag & video)1312 bool CScraper::GetVideoDetails(XFILE::CCurlFile &fcurl,
1313                                const CScraperUrl &scurl,
1314                                bool fMovie /*else episode*/,
1315                                CVideoInfoTag &video)
1316 {
1317   CLog::Log(LOGDEBUG,
1318             "%s: Reading %s '%s' using %s scraper "
1319             "(file: '%s', content: '%s', version: '%s')",
1320             __FUNCTION__, fMovie ? MediaTypeMovie : MediaTypeEpisode, scurl.GetFirstThumbUrl(),
1321             Name().c_str(), Path().c_str(), ADDON::TranslateContent(Content()).c_str(),
1322             Version().asString().c_str());
1323 
1324   video.Reset();
1325 
1326   if (m_isPython)
1327     return PythonDetails(ID(), "url", scurl.GetFirstThumbUrl(),
1328       fMovie ? "getdetails" : "getepisodedetails", GetPathSettingsAsJSON(), video);
1329 
1330   std::string sFunc = fMovie ? "GetDetails" : "GetEpisodeDetails";
1331   std::vector<std::string> vcsIn;
1332   vcsIn.push_back(scurl.GetId());
1333   vcsIn.push_back(scurl.GetFirstThumbUrl());
1334   std::vector<std::string> vcsOut = RunNoThrow(sFunc, scurl, fcurl, &vcsIn);
1335 
1336   // parse XML output
1337   bool fRet(false);
1338   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1339   {
1340     CXBMCTinyXML doc;
1341     doc.Parse(*i, TIXML_ENCODING_UTF8);
1342     if (!doc.RootElement())
1343     {
1344       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1345       continue;
1346     }
1347 
1348     TiXmlHandle xhDoc(&doc);
1349     TiXmlElement *pxeDetails = xhDoc.FirstChild("details").Element();
1350     if (!pxeDetails)
1351     {
1352       CLog::Log(LOGERROR, "%s: Invalid XML file (want <details>)", __FUNCTION__);
1353       continue;
1354     }
1355     video.Load(pxeDetails, true /*fChain*/);
1356     fRet = true; // but don't exit in case of chaining
1357   }
1358   return fRet;
1359 }
1360 
1361 // takes a URL; returns true and populates album on success, false otherwise
GetAlbumDetails(CCurlFile & fcurl,const CScraperUrl & scurl,CAlbum & album)1362 bool CScraper::GetAlbumDetails(CCurlFile &fcurl, const CScraperUrl &scurl, CAlbum &album)
1363 {
1364   CLog::Log(LOGDEBUG,
1365             "%s: Reading '%s' using %s scraper "
1366             "(file: '%s', content: '%s', version: '%s')",
1367             __FUNCTION__, scurl.GetFirstThumbUrl(), Name().c_str(), Path().c_str(),
1368             ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1369 
1370   if (m_isPython)
1371     return PythonDetails(ID(), "url", scurl.GetFirstThumbUrl(),
1372       "getdetails", GetPathSettingsAsJSON(), album);
1373 
1374   std::vector<std::string> vcsOut = RunNoThrow("GetAlbumDetails", scurl, fcurl);
1375 
1376   // parse the returned XML into an album object (see CAlbum::Load for details)
1377   bool fRet(false);
1378   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1379   {
1380     CXBMCTinyXML doc;
1381     doc.Parse(*i, TIXML_ENCODING_UTF8);
1382     if (!doc.RootElement())
1383     {
1384       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1385       return false;
1386     }
1387     fRet = album.Load(doc.RootElement(), i != vcsOut.begin());
1388   }
1389   return fRet;
1390 }
1391 
1392 // takes a URL (one returned from FindArtist), the original search string, and
1393 // returns true and populates artist on success, false on failure
GetArtistDetails(CCurlFile & fcurl,const CScraperUrl & scurl,const std::string & sSearch,CArtist & artist)1394 bool CScraper::GetArtistDetails(CCurlFile &fcurl,
1395                                 const CScraperUrl &scurl,
1396                                 const std::string &sSearch,
1397                                 CArtist &artist)
1398 {
1399   if (!scurl.HasUrls())
1400     return false;
1401 
1402   CLog::Log(LOGDEBUG,
1403             "%s: Reading '%s' ('%s') using %s scraper "
1404             "(file: '%s', content: '%s', version: '%s')",
1405             __FUNCTION__, scurl.GetFirstThumbUrl(), sSearch.c_str(), Name().c_str(),
1406             Path().c_str(), ADDON::TranslateContent(Content()).c_str(),
1407             Version().asString().c_str());
1408 
1409   if (m_isPython)
1410     return PythonDetails(ID(), "url", scurl.GetFirstThumbUrl(),
1411       "getdetails", GetPathSettingsAsJSON(), artist);
1412 
1413   // pass in the original search string for chaining to search other sites
1414   std::vector<std::string> vcIn;
1415   vcIn.push_back(sSearch);
1416   vcIn[0] = CURL::Encode(vcIn[0]);
1417 
1418   std::vector<std::string> vcsOut = RunNoThrow("GetArtistDetails", scurl, fcurl, &vcIn);
1419 
1420   // ok, now parse the xml file
1421   bool fRet(false);
1422   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1423   {
1424     CXBMCTinyXML doc;
1425     doc.Parse(*i, TIXML_ENCODING_UTF8);
1426     if (!doc.RootElement())
1427     {
1428       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1429       return false;
1430     }
1431 
1432     fRet = artist.Load(doc.RootElement(), i != vcsOut.begin());
1433   }
1434   return fRet;
1435 }
1436 
GetArtwork(XFILE::CCurlFile & fcurl,CVideoInfoTag & details)1437 bool CScraper::GetArtwork(XFILE::CCurlFile &fcurl, CVideoInfoTag &details)
1438 {
1439   if (!details.HasUniqueID())
1440     return false;
1441 
1442   CLog::Log(LOGDEBUG,
1443             "%s: Reading artwork for '%s' using %s scraper "
1444             "(file: '%s', content: '%s', version: '%s')",
1445             __FUNCTION__, details.GetUniqueID().c_str(), Name().c_str(), Path().c_str(),
1446             ADDON::TranslateContent(Content()).c_str(), Version().asString().c_str());
1447 
1448   if (m_isPython)
1449     return PythonDetails(ID(), "id", details.GetUniqueID(),
1450       "getartwork", GetPathSettingsAsJSON(), details);
1451 
1452   std::vector<std::string> vcsIn;
1453   CScraperUrl scurl;
1454   vcsIn.push_back(details.GetUniqueID());
1455   std::vector<std::string> vcsOut = RunNoThrow("GetArt", scurl, fcurl, &vcsIn);
1456 
1457   bool fRet(false);
1458   for (std::vector<std::string>::const_iterator it = vcsOut.begin(); it != vcsOut.end(); ++it)
1459   {
1460     CXBMCTinyXML doc;
1461     doc.Parse(*it, TIXML_ENCODING_UTF8);
1462     if (!doc.RootElement())
1463     {
1464       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1465       return false;
1466     }
1467     fRet = details.Load(doc.RootElement(), it != vcsOut.begin());
1468   }
1469   return fRet;
1470 }
1471 }
1472