1 /****************************************************************************************
2  * Copyright (c) 2010 Bart Cerneels <bart.cerneels@kde.org>                             *
3  *               2009 Mathias Panzenböck <grosser.meister.morti@gmx.net>                *
4  *                                                                                      *
5  * This program is free software; you can redistribute it and/or modify it under        *
6  * the terms of the GNU General Public License as published by the Free Software        *
7  * Foundation; either version 2 of the License, or (at your option) any later           *
8  * version.                                                                             *
9  *                                                                                      *
10  * This program is distributed in the hope that it will be useful, but WITHOUT ANY      *
11  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A      *
12  * PARTICULAR PURPOSE. See the GNU General Public License for more details.             *
13  *                                                                                      *
14  * You should have received a copy of the GNU General Public License along with         *
15  * this program.  If not, see <http://www.gnu.org/licenses/>.                           *
16  ****************************************************************************************/
17 
18 #include "OpmlParser.h"
19 
20 #include "core/support/Amarok.h"
21 #include "core/support/Debug.h"
22 
23 #include <QFile>
24 #include <QXmlStreamReader>
25 
26 #include <KLocalizedString>
27 #include <kio/job.h>
28 
29 const QString OpmlParser::OPML_MIME = "text/x-opml+xml";
30 
31 const OpmlParser::StaticData OpmlParser::sd;
32 
OpmlParser(const QUrl & url)33 OpmlParser::OpmlParser( const QUrl &url )
34         : QObject()
35         , ThreadWeaver::Job()
36         , QXmlStreamReader()
37         , m_url( url )
38 {
39 }
40 
~OpmlParser()41 OpmlParser::~OpmlParser()
42 {
43 }
44 
45 void
run(ThreadWeaver::JobPointer self,ThreadWeaver::Thread * thread)46 OpmlParser::run(ThreadWeaver::JobPointer self, ThreadWeaver::Thread *thread)
47 {
48     Q_UNUSED(self);
49     Q_UNUSED(thread);
50     read( m_url );
51 }
52 
53 void
defaultBegin(const ThreadWeaver::JobPointer & self,ThreadWeaver::Thread * thread)54 OpmlParser::defaultBegin(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread)
55 {
56     Q_EMIT started(self);
57     ThreadWeaver::Job::defaultBegin(self, thread);
58 }
59 
60 void
defaultEnd(const ThreadWeaver::JobPointer & self,ThreadWeaver::Thread * thread)61 OpmlParser::defaultEnd(const ThreadWeaver::JobPointer& self, ThreadWeaver::Thread *thread)
62 {
63     ThreadWeaver::Job::defaultEnd(self, thread);
64     if (!self->success()) {
65         Q_EMIT failed(self);
66     }
67     Q_EMIT done(self);
68 }
69 
70 bool
read(const QUrl & url)71 OpmlParser::read( const QUrl &url )
72 {
73     m_url = url;
74     if( m_url.isLocalFile() )
75     {
76         //read directly from local file
77         QFile localFile( m_url.toLocalFile() );
78         if( !localFile.open( QIODevice::ReadOnly ) )
79         {
80             debug() << "failed to open local OPML file " << m_url.url();
81             return false;
82         }
83 
84         return read( &localFile );
85     }
86 
87     m_transferJob = KIO::get( m_url, KIO::Reload, KIO::HideProgressInfo );
88 
89     connect( m_transferJob, &KIO::TransferJob::data,
90              this, &OpmlParser::slotAddData );
91 
92     connect( m_transferJob, &KIO::TransferJob::result,
93              this, &OpmlParser::downloadResult );
94 
95     // parse data
96     return read();
97 }
98 
99 bool
read(QIODevice * device)100 OpmlParser::read( QIODevice *device )
101 {
102     setDevice( device );
103     return read();
104 }
105 
106 void
slotAddData(KIO::Job * job,const QByteArray & data)107 OpmlParser::slotAddData( KIO::Job *job, const QByteArray &data )
108 {
109     Q_UNUSED( job )
110 
111     QXmlStreamReader::addData( data );
112 
113     // parse more data
114     continueRead();
115 }
116 
117 void
downloadResult(KJob * job)118 OpmlParser::downloadResult( KJob *job )
119 {
120     // parse more data
121     continueRead();
122 
123     KIO::TransferJob *transferJob = dynamic_cast<KIO::TransferJob *>( job );
124     if( job->error() || ( transferJob && transferJob->isErrorPage() ) )
125     {
126         QString errorMessage =
127             i18n( "Reading OPML podcast from %1 failed with error:\n", m_url.url() );
128         errorMessage = errorMessage.append( job->errorString() );
129 
130 //        Q_EMIT statusBarSorryMessage( errorMessage );
131     }
132 
133     m_transferJob = 0;
134 }
135 
136 void
slotAbort()137 OpmlParser::slotAbort()
138 {
139     DEBUG_BLOCK
140 }
141 
142 void
begin(OpmlParser * opmlParser) const143 OpmlParser::Action::begin( OpmlParser *opmlParser ) const
144 {
145     if( m_begin )
146         (( *opmlParser ).*m_begin )();
147 }
148 
149 void
end(OpmlParser * opmlParser) const150 OpmlParser::Action::end( OpmlParser *opmlParser ) const
151 {
152     if( m_end )
153         (( *opmlParser ).*m_end )();
154 }
155 
156 void
characters(OpmlParser * opmlParser) const157 OpmlParser::Action::characters( OpmlParser *opmlParser ) const
158 {
159     if( m_characters )
160         (( *opmlParser ).*m_characters )();
161 }
162 
163 // initialization of the feed parser automata:
StaticData()164 OpmlParser::StaticData::StaticData()
165     : startAction( rootMap )
166     , docAction(
167         docMap,
168         0,
169         &OpmlParser::endDocument )
170     , skipAction( skipMap )
171     , noContentAction(
172             noContentMap,
173             &OpmlParser::beginNoElement,
174             0,
175             &OpmlParser::readNoCharacters )
176     , opmlAction(
177             opmlMap,
178             &OpmlParser::beginOpml )
179     , headAction(
180             headMap,
181             0,
182             &OpmlParser::endHead )
183     , titleAction(
184             textMap,
185             &OpmlParser::beginText,
186             &OpmlParser::endTitle,
187             &OpmlParser::readCharacters )
188     , bodyAction( bodyMap )
189     , outlineAction(
190             outlineMap,
191             &OpmlParser::beginOutline,
192             &OpmlParser::endOutline )
193 {
194     // known elements:
195     knownElements[ "opml" ] = Opml;
196     knownElements[ "html" ] = Html;
197     knownElements[ "HTML" ] = Html;
198     knownElements[ "head" ] = Head;
199     knownElements[ "title" ] = Title;
200     knownElements[ "dateCreated" ] = DateCreated;
201     knownElements[ "dateModified" ] = DateModified;
202     knownElements[ "ownerName" ] = OwnerName;
203     knownElements[ "ownerEmail" ] = OwnerEmail;
204     knownElements[ "ownerId" ] = OwnerId;
205     knownElements[ "docs" ] = Docs;
206     knownElements[ "expansionState" ] = ExpansionState;
207     knownElements[ "vertScrollState" ] = VertScrollState;
208     knownElements[ "windowTop" ] = WindowTop;
209     knownElements[ "windowLeft" ] = WindowLeft;
210     knownElements[ "windowBottom" ] = WindowBottom;
211     knownElements[ "windowRight" ] = WindowRight;
212     knownElements[ "body" ] = Body;
213     knownElements[ "outline" ] = Outline;
214 
215     // before start document/after end document
216     rootMap.insert( Document, &docAction );
217 
218     // parse document
219     docMap.insert( Opml, &opmlAction );
220 //    docMap.insert( Html, &htmlAction );
221 
222     // parse <opml>
223     opmlMap.insert( Head, &headAction );
224     opmlMap.insert( Body, &bodyAction );
225 
226     // parse <head>
227     headMap.insert( Title, &titleAction );
228     headMap.insert( DateCreated, &skipAction );
229     headMap.insert( DateModified, &skipAction );
230     headMap.insert( OwnerName, &skipAction );
231     headMap.insert( OwnerEmail, &skipAction );
232     headMap.insert( OwnerId, &skipAction );
233     headMap.insert( Docs, &skipAction );
234     headMap.insert( ExpansionState, &skipAction );
235     headMap.insert( VertScrollState, &skipAction );
236     headMap.insert( WindowTop, &skipAction );
237     headMap.insert( WindowLeft, &skipAction );
238     headMap.insert( WindowBottom, &skipAction );
239     headMap.insert( WindowRight, &skipAction );
240 
241     // parse <body>
242     bodyMap.insert( Outline, &outlineAction );
243 
244     // parse <outline> in case of sub-elements
245     outlineMap.insert( Outline, &outlineAction );
246 
247     // skip elements
248     skipMap.insert( Any, &skipAction );
249 
250 }
251 
252 OpmlParser::ElementType
elementType() const253 OpmlParser::elementType() const
254 {
255     if( isEndDocument() || isStartDocument() )
256         return Document;
257 
258     if( isCDATA() || isCharacters() )
259         return CharacterData;
260 
261     ElementType elementType = sd.knownElements[ QXmlStreamReader::name().toString()];
262 
263     return elementType;
264 }
265 
266 bool
read()267 OpmlParser::read()
268 {
269     m_buffer.clear();
270     m_actionStack.clear();
271     m_actionStack.push( &( OpmlParser::sd.startAction ) );
272     setNamespaceProcessing( false );
273 
274     return continueRead();
275 }
276 
277 bool
continueRead()278 OpmlParser::continueRead()
279 {
280     // this is some kind of pushdown automata
281     // with this it should be possible to parse feeds in parallel
282     // without using threads
283     DEBUG_BLOCK
284 
285     while( !atEnd() && error() != CustomError )
286     {
287         TokenType token = readNext();
288 
289         if( error() == PrematureEndOfDocumentError && m_transferJob )
290             return true;
291 
292         if( hasError() )
293         {
294             Q_EMIT doneParsing();
295             return false;
296         }
297 
298         if( m_actionStack.isEmpty() )
299         {
300             debug() << "expected element on stack!";
301             return false;
302         }
303 
304         const Action* action = m_actionStack.top();
305         const Action* subAction = 0;
306 
307         switch( token )
308         {
309             case Invalid:
310             {
311                 debug() << "invalid token received at line " << lineNumber();
312                 debug() << "Error:\n" << errorString();
313                 return false;
314             }
315 
316             case StartDocument:
317             case StartElement:
318                 subAction = action->actionMap()[ elementType() ];
319 
320                 if( !subAction )
321                     subAction = action->actionMap()[ Any ];
322 
323                 if( !subAction )
324                     subAction = &( OpmlParser::sd.skipAction );
325 
326                 m_actionStack.push( subAction );
327 
328                 subAction->begin( this );
329                 break;
330 
331             case EndDocument:
332             case EndElement:
333                 action->end( this );
334 
335                 if( m_actionStack.pop() != action )
336                 {
337                     debug() << "popped other element than expected!";
338                 }
339                 break;
340 
341             case Characters:
342                 if( !isWhitespace() || isCDATA() )
343                 {
344                     action->characters( this );
345                 }
346 
347                 // ignorable whitespaces
348             case Comment:
349             case EntityReference:
350             case ProcessingInstruction:
351             case DTD:
352             case NoToken:
353                 // ignore
354                 break;
355         }
356     }
357 
358     return !hasError();
359 }
360 
361 void
stopWithError(const QString & message)362 OpmlParser::stopWithError( const QString &message )
363 {
364     raiseError( message );
365 
366     if( m_transferJob )
367     {
368         m_transferJob->kill( KJob::EmitResult );
369         m_transferJob = 0;
370     }
371 
372     Q_EMIT doneParsing();
373 }
374 
375 void
beginOpml()376 OpmlParser::beginOpml()
377 {
378     m_outlineStack.clear();
379 }
380 
381 void
beginText()382 OpmlParser::beginText()
383 {
384     m_buffer.clear();
385 }
386 
387 void
beginOutline()388 OpmlParser::beginOutline()
389 {
390     OpmlOutline *parent = m_outlineStack.empty() ? 0 : m_outlineStack.top();
391     OpmlOutline *outline = new OpmlOutline( parent );
392     //adding outline to stack
393     m_outlineStack.push( outline );
394     if( parent )
395     {
396         parent->setHasChildren( true );
397         parent->addChild( outline );
398     }
399 
400     foreach( const QXmlStreamAttribute &attribute, attributes() )
401         outline->addAttribute( attribute.name().toString(), attribute.value().toString() );
402 
403     Q_EMIT outlineParsed( outline );
404 }
405 
406 void
beginNoElement()407 OpmlParser::beginNoElement()
408 {
409     debug() << "no element expected here, but got element: " << QXmlStreamReader::name();
410 }
411 
412 void
endDocument()413 OpmlParser::endDocument()
414 {
415     Q_EMIT doneParsing();
416 }
417 
418 void
endHead()419 OpmlParser::endHead()
420 {
421     Q_EMIT headerDone();
422 }
423 
424 void
endTitle()425 OpmlParser::endTitle()
426 {
427     m_headerData.insert( "title", m_buffer.trimmed() );
428 }
429 
430 void
endOutline()431 OpmlParser::endOutline()
432 {
433     OpmlOutline *outline = m_outlineStack.pop();
434     if( m_outlineStack.isEmpty() )
435         m_outlines << outline;
436 }
437 
438 void
readCharacters()439 OpmlParser::readCharacters()
440 {
441     m_buffer += text();
442 }
443 
444 void
readNoCharacters()445 OpmlParser::readNoCharacters()
446 {
447     DEBUG_BLOCK
448     debug() << "no characters expected here";
449 }
450