1 /**************************************************************************************** 2 * Copyright (c) 2007 Bart Cerneels <bart.cerneels@kde.org> * 3 * 2009 Mathias Panzenböck <grosser.meister.morti@gmx.net> * 4 * 2013 Ralf Engels <ralf-engels@gmx.de> * 5 * * 6 * This program is free software; you can redistribute it and/or modify it under * 7 * the terms of the GNU General Public License as published by the Free Software * 8 * Foundation; either version 2 of the License, or (at your option) any later * 9 * version. * 10 * * 11 * This program is distributed in the hope that it will be useful, but WITHOUT ANY * 12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A * 13 * PARTICULAR PURPOSE. See the GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License along with * 16 * this program. If not, see <http://www.gnu.org/licenses/>. * 17 ****************************************************************************************/ 18 19 #ifndef PODCASTREADER_H 20 #define PODCASTREADER_H 21 22 #include "core/podcasts/PodcastProvider.h" 23 #include "core/podcasts/PodcastMeta.h" 24 25 #include <QDateTime> 26 #include <QXmlStreamReader> 27 #include <QObject> 28 #include <QStack> 29 #include <QRegExp> 30 31 #include <KIO/TransferJob> 32 33 class QUrl; 34 class KJob; 35 36 namespace Podcasts { 37 38 /** Class that parses a podcast xml file and provides the results to a PodcastProvider. 39 40 @author Bart Cerneels <bart.cerneels@kde.org> 41 Mathias Panzenböck <grooser.meister.morti@gmx.net> 42 */ 43 class AMAROKCORE_EXPORT PodcastReader : public QObject 44 { 45 Q_OBJECT 46 public: 47 /** Create a new PodcastReader that delivers the result to the podcastProvider. 48 Note: the PodcastProvider pointer is not owned by the PodcastReader and 49 must remain valid throughout the lifetime of this object. 50 */ 51 explicit PodcastReader( PodcastProvider *podcastProvider, QObject *parent = nullptr ); 52 ~PodcastReader() override; 53 54 bool read( QIODevice *device ); 55 bool read( const QUrl &url ); 56 bool update(const PodcastChannelPtr &channel ); url()57 QUrl & url() { return m_url; } 58 channel()59 Podcasts::PodcastChannelPtr channel() { return m_channel; } 60 error()61 QXmlStreamReader::Error error () const { return m_xmlReader.error(); } errorString()62 QString errorString () const { return m_xmlReader.errorString(); } 63 64 Q_SIGNALS: 65 void finished( PodcastReader *podcastReader ); 66 void statusBarSorryMessage( const QString &message ); 67 void statusBarNewProgressOperation( KIO::TransferJob *, const QString &, Podcasts::PodcastReader* ); 68 69 public Q_SLOTS: 70 virtual void slotAbort(); 71 72 private Q_SLOTS: 73 void slotRedirection( KIO::Job *job, const QUrl &url ); 74 void slotPermanentRedirection ( KIO::Job * job, const QUrl &fromUrl, 75 const QUrl &toUrl ); 76 void slotAddData( KIO::Job *, const QByteArray & data ); 77 78 void downloadResult( KJob * ); 79 80 private: 81 /** these are the keys used by the automata */ 82 enum ElementType 83 { 84 Unknown = 0, 85 Any, 86 Document, 87 CharacterData, 88 Rss, 89 Rdf, 90 Feed, 91 Channel, 92 Item, 93 NewFeedUrl, 94 Image, 95 Link, 96 Author, 97 ItunesAuthor, 98 Url, 99 Title, 100 EnclosureElement, 101 Guid, 102 PubDate, 103 Description, 104 Body, 105 Html, 106 Entry, 107 Subtitle, 108 ItunesSubtitle, 109 Updated, 110 Published, 111 Summary, 112 ItunesSummary, 113 Keywords, 114 ItunesKeywords, 115 Content, 116 SupportedContent, 117 Name, 118 Id, 119 Logo, 120 Icon, 121 Creator, 122 Encoded 123 }; 124 125 class Action; 126 typedef void (PodcastReader::*ActionCallback)(); 127 typedef QHash<ElementType, Action*> ActionMap; 128 129 class Action 130 { 131 public: Action(ActionMap & actionMap)132 explicit Action( ActionMap &actionMap ) 133 : m_actionMap( actionMap ) 134 , m_begin( 0 ) 135 , m_end( 0 ) 136 , m_characters( 0 ) {} 137 Action(ActionMap & actionMap,ActionCallback begin)138 Action(ActionMap &actionMap, ActionCallback begin) 139 : m_actionMap( actionMap ) 140 , m_begin( begin ) 141 , m_end( 0 ) 142 , m_characters( 0 ) {} 143 Action(ActionMap & actionMap,ActionCallback begin,ActionCallback end)144 Action(ActionMap &actionMap, ActionCallback begin, ActionCallback end) 145 : m_actionMap( actionMap ) 146 , m_begin( begin ) 147 , m_end( end ) 148 , m_characters( 0 ) {} 149 Action(ActionMap & actionMap,ActionCallback begin,ActionCallback end,ActionCallback characters)150 Action(ActionMap &actionMap, ActionCallback begin, 151 ActionCallback end, ActionCallback characters) 152 : m_actionMap( actionMap ) 153 , m_begin( begin ) 154 , m_end( end ) 155 , m_characters( characters ) {} 156 157 void begin(PodcastReader *podcastReader) const; 158 void end(PodcastReader *podcastReader) const; 159 void characters(PodcastReader *podcastReader) const; 160 actionMap()161 const ActionMap &actionMap() const { return m_actionMap; } 162 163 private: 164 ActionMap &m_actionMap; 165 ActionCallback m_begin; 166 ActionCallback m_end; 167 ActionCallback m_characters; 168 }; 169 170 static bool mightBeHtml( const QString& text ); 171 172 ElementType elementType() const; 173 bool read(); 174 bool continueRead(); 175 void createChannel(); 176 177 // callback methods for feed parsing: 178 void beginRss(); 179 void beginRdf(); 180 void beginFeed(); 181 void beginHtml(); 182 void beginUnknownFeedType(); 183 void beginEnclosure(); 184 void beginText(); 185 void beginChannel(); 186 void beginItem(); 187 void beginImage(); 188 void beginXml(); 189 void beginNoElement(); 190 void beginAtomText(); 191 void beginAtomFeedLink(); 192 void beginAtomEntryLink(); 193 void beginAtomTextChild(); 194 195 void endDocument(); 196 void endTitle(); 197 void endSubtitle(); 198 void endDescription(); 199 void endEncoded(); 200 void endBody(); 201 void endLink(); 202 void endGuid(); 203 void endPubDate(); 204 void endItem(); 205 void endImageUrl(); 206 void endKeywords(); 207 void endNewFeedUrl(); 208 void endAuthor(); 209 void endCreator(); 210 void endXml(); 211 void endAtomLogo(); 212 void endAtomIcon(); 213 void endAtomTitle(); 214 void endAtomSubtitle(); 215 void endAtomPublished(); 216 void endAtomUpdated(); 217 void endAtomSummary(); 218 void endAtomContent(); 219 void endAtomTextChild(); 220 221 // TODO: maybe I can remove readCharacters() and readEscapedCharacters() 222 // and use readAtomTextCharacters() plus setting m_contentType even 223 // in Rss 1.0/2.0 parsers instead. 224 void readCharacters(); 225 void readNoCharacters(); 226 void readEscapedCharacters(); 227 void readAtomTextCharacters(); 228 229 QDateTime parsePubDate( const QString &datestring ); 230 231 void stopWithError(const QString &message); 232 233 static QString unescape( const QString &text ); 234 static QString textToHtml( const QString &text ); 235 236 QString atomTextAsText(); 237 QString atomTextAsHtml(); 238 239 QStringRef attribute(const char *namespaceUri, const char *name) const; 240 bool hasAttribute(const char *namespaceUri, const char *name) const; 241 242 void setDescription(const QString &description); 243 void setSummary(const QString &description); 244 245 /** podcastEpisodeCheck 246 * Check if this PodcastEpisode has been fetched before. Uses a scoring algorithm. 247 * @return A pointer to a PodcastEpisode that has been fetched before or the \ 248 * same pointer as the argument. 249 */ 250 Podcasts::PodcastEpisodePtr podcastEpisodeCheck( Podcasts::PodcastEpisodePtr episode ); 251 252 // TODO: move this to PodcastMeta and add a field 253 // descriptionType to PodcastCommonMeta. 254 enum ContentType 255 { 256 TextContent, 257 HtmlContent, 258 XHtmlContent 259 }; 260 261 class Enclosure 262 { 263 public: Enclosure(const QUrl & url,int filesize,const QString & mimeType)264 Enclosure(const QUrl &url, int filesize, const QString& mimeType) 265 : m_url( url ), m_filesize( filesize ), m_mimeType( mimeType ) {} 266 url()267 const QUrl &url() const { return m_url; } fileSize()268 int fileSize() const { return m_filesize; } mimeType()269 const QString &mimeType() const { return m_mimeType; } 270 271 private: 272 QUrl m_url; 273 int m_filesize; 274 QString m_mimeType; 275 }; 276 277 class StaticData { 278 public: 279 StaticData(); 280 281 // This here basically builds an automata. 282 // This way feed parsing can be paused after any token, 283 // thus enabling paralell download and parsing of multiple 284 // feeds without the need for threads. 285 286 QHash<QString, ElementType> knownElements; 287 QRegExp removeScripts; 288 QRegExp mightBeHtml; 289 QRegExp linkify; 290 291 // Actions 292 Action startAction; 293 294 Action docAction; 295 Action xmlAction; 296 Action skipAction; 297 Action noContentAction; 298 299 Action rdfAction; // RSS 1.0 300 Action rssAction; // RSS 2.0 301 Action feedAction; // Atom 302 Action htmlAction; 303 Action unknownFeedTypeAction; 304 305 // RSS 1.0+2.0 306 Action rss10ChannelAction; 307 Action rss20ChannelAction; 308 309 Action titleAction; 310 Action subtitleAction; 311 Action descriptionAction; 312 Action encodedAction; 313 Action bodyAction; 314 Action linkAction; 315 Action imageAction; 316 Action itemAction; 317 Action urlAction; 318 Action authorAction; 319 Action creatorAction; 320 Action enclosureAction; 321 Action guidAction; 322 Action pubDateAction; 323 Action keywordsAction; 324 Action newFeedUrlAction; 325 326 // Atom 327 Action atomLogoAction; 328 Action atomIconAction; 329 Action atomEntryAction; 330 Action atomTitleAction; 331 Action atomSubtitleAction; 332 Action atomAuthorAction; 333 Action atomFeedLinkAction; 334 Action atomEntryLinkAction; 335 Action atomIdAction; 336 Action atomPublishedAction; 337 Action atomUpdatedAction; 338 Action atomSummaryAction; 339 Action atomContentAction; 340 Action atomTextAction; 341 342 // ActionMaps 343 ActionMap rootMap; 344 ActionMap skipMap; 345 ActionMap noContentMap; 346 ActionMap xmlMap; 347 348 ActionMap docMap; 349 ActionMap rssMap; 350 ActionMap rdfMap; 351 ActionMap feedMap; 352 353 ActionMap rss10ChannelMap; 354 ActionMap rss20ChannelMap; 355 ActionMap imageMap; 356 ActionMap itemMap; 357 ActionMap textMap; 358 359 ActionMap atomEntryMap; 360 ActionMap atomAuthorMap; 361 ActionMap atomTextMap; 362 }; 363 364 static const StaticData sd; 365 366 QXmlStreamReader m_xmlReader; 367 368 QUrl m_url; 369 PodcastProvider *m_podcastProvider; 370 KIO::TransferJob *m_transferJob; 371 Podcasts::PodcastChannelPtr m_channel; 372 Podcasts::PodcastEpisodePtr m_item; 373 374 /** This points to the data of the current channel or (if parsing an item) 375 the data of the current item */ 376 Podcasts::PodcastMetaCommon *m_current; 377 378 // this somewhat emulates a callstack (without local variables): 379 QStack<const Action*> m_actionStack; 380 381 ContentType m_contentType; 382 QString m_buffer; 383 QList<Enclosure> m_enclosures; 384 385 }; 386 387 } //namespace Podcasts 388 389 #endif 390