1 /* This file is part of the wvWare 2 project
2    Copyright (C) 2001-2003 Werner Trobin <trobin@kde.org>
3 
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Library General Public
6    License version 2 as published by the Free Software Foundation.
7 
8    This library is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11    Library General Public License for more details.
12 
13    You should have received a copy of the GNU Library General Public License
14    along with this library; see the file COPYING.LIB.  If not, write to
15    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16    Boston, MA 02111-1307, USA.
17 */
18 
19 #include "parser9x.h"
20 #include "properties97.h"
21 #include "styles.h"
22 #include "word97_helper.h"
23 #include "lists.h"
24 #include "handlers.h"
25 #include "footnotes97.h"
26 #include "headers.h"
27 #include "fonts.h"
28 #include "textconverter.h"
29 #include "olestream.h"
30 #include "fields.h"
31 #include "graphics.h"
32 #include "associatedstrings.h"
33 #include "paragraphproperties.h"
34 #include "functor.h"
35 #include "functordata.h"
36 #include "word95_generated.h"
37 #include "convert.h"
38 #include "zcodec.hxx"
39 #include "wvlog.h"
40 
41 #include <gsf/gsf-input.h>
42 #include <gsf/gsf-output.h>
43 #include <gsf/gsf-input-memory.h>
44 #include <gsf/gsf-output-memory.h>
45 
46 #include <numeric>
47 #include <string.h>
48 
49 using namespace wvWare;
50 
51 
Position(U32 cp,const PLCF<Word97::PCD> * plcfpcd)52 Parser9x::Position::Position( U32 cp, const PLCF<Word97::PCD>* plcfpcd ) :
53     piece( 0 ), offset( cp )
54 {
55     PLCFIterator<Word97::PCD> it( *plcfpcd );
56     for ( ; it.current(); ++it, ++piece ) {
57         if ( it.currentLim() > cp && it.currentStart() <= cp )
58             break;
59         offset -= it.currentRun();
60     }
61 }
62 
63 
Parser9x(OLEStorage * storage,OLEStreamReader * wordDocument,const Word97::FIB & fib)64 Parser9x::Parser9x( OLEStorage* storage, OLEStreamReader* wordDocument, const Word97::FIB& fib ) :
65     Parser( storage, wordDocument ), m_fib( fib ), m_table( 0 ), m_data( 0 ), m_properties( 0 ),
66     m_headers( 0 ), m_lists( 0 ), m_textconverter( 0 ), m_fields( 0 ), m_footnotes( 0 ),
67     m_fonts( 0 ), m_drawings( 0 ), m_plcfpcd( 0 ), m_tableRowStart( 0 ), m_tableRowLength( 0 ),
68     m_cellMarkFound( false ), m_remainingCells( 0 ), m_currentParagraph( new Paragraph ),
69     m_remainingChars( 0 ), m_sectionNumber( 0 ), m_subDocument( None ), m_parsingMode( Default )
70 {
71     if ( !isOk() )
72         return;
73 
74     m_table = storage->createStreamReader( tableStream() );
75     if ( !m_table || !m_table->isValid() ) {
76         wvlog << "Error: Couldn't open the table stream (i.e. [0|1]Table or WordDocument)" << std::endl;
77         m_okay = false;
78         return;
79     }
80 
81     m_data = storage->createStreamReader( "Data" );
82     if ( !m_data || !m_data->isValid() ) {
83         wvlog << "Information: Couldn't open the Data stream, no big deal" << std::endl;
84         delete m_data;
85         m_data = 0;
86     }
87 
88 #ifdef WV2_DUMP_FIB
89     wvlog << "Dumping some parts of the FIB: " << std::endl;
90     wvlog << "   wIdent=" << m_fib.wIdent << std::endl;
91     wvlog << "   nFib=" << m_fib.nFib << std::endl;
92     wvlog << "   nFibBack=" << m_fib.nFibBack << std::endl;
93     wvlog << "   lid=0x" << std::hex << m_fib.lid << std::dec << std::endl;
94     wvlog << "   lidFE=0x" << std::hex << m_fib.lidFE << std::dec << std::endl;
95     wvlog << "   fEncrypted=" << m_fib.fEncrypted << std::endl;
96     wvlog << "   chs=" << m_fib.chs << std::endl;
97     wvlog << "   fcMin=" << m_fib.fcMin << std::endl;
98     wvlog << "   fcMac=" << m_fib.fcMac << std::endl;
99     wvlog << "   ccpText=" << m_fib.ccpText << std::endl;
100     wvlog << "   ccpFtn=" << m_fib.ccpFtn << std::endl;
101     wvlog << "   ccpHdd=" << m_fib.ccpHdd << std::endl;
102     wvlog << "   ccpMcr=" << m_fib.ccpMcr << std::endl;
103     wvlog << "   ccpAtn=" << m_fib.ccpAtn << std::endl;
104     wvlog << "   ccpEdn=" << m_fib.ccpEdn << std::endl;
105     wvlog << "   ccpTxbx=" << m_fib.ccpTxbx << std::endl;
106     wvlog << "   ccpHdrTxbx=" << m_fib.ccpHdrTxbx << std::endl;
107     wvlog << "   pnFbpChpFirst=" << m_fib.pnFbpChpFirst << std::endl;
108     wvlog << "   pnChpFirst=" << m_fib.pnChpFirst << std::endl;
109     wvlog << "   cpnBteChp=" << m_fib.cpnBteChp << std::endl;
110     wvlog << "   pnFbpPapFirst=" << m_fib.pnFbpPapFirst << std::endl;
111     wvlog << "   pnPapFirst=" << m_fib.pnPapFirst << std::endl;
112     wvlog << "   cpnBtePap=" << m_fib.cpnBtePap << std::endl;
113 #endif
114 
115     // Initialize all the cached data structures like stylesheets, fonts,
116     // textconverter,...
117     init();
118 }
119 
~Parser9x()120 Parser9x::~Parser9x()
121 {
122     // Sanity check
123     if ( !oldParsingStates.empty() || m_subDocument != None )
124         wvlog << "Bug: Someone messed up the save/restore stack!" << std::endl;
125 
126     delete m_currentParagraph;
127     delete m_tableRowStart;
128     delete m_drawings;
129     delete m_fonts;
130     delete m_plcfpcd;
131     delete m_headers;
132     delete m_footnotes;
133     delete m_fields;
134     delete m_textconverter;
135     delete m_properties;
136     delete m_lists;
137     delete m_data;
138     delete m_table;
139 }
140 
parse()141 bool Parser9x::parse()
142 {
143     if ( !isOk() )
144         return false;
145 
146     if ( m_fib.fEncrypted ) {
147         // There is some code out there to break this "encryption", do we want
148         // to implement that?
149         // We could either ask for a password or cheat a bit :-)
150         wvlog << "Error: The document is encrypted." << std::endl;
151         return false;
152     }
153 
154     if ( m_fib.lcbClx == 0 )
155         fakePieceTable();
156     else {
157         // Get the piece table
158         if ( !readPieceTable() )
159             return false;
160     }
161 
162     // start parsing the body
163     if ( !parseBody() )
164         return false;
165     return true;
166 }
167 
fib() const168 const Word97::FIB& Parser9x::fib() const
169 {
170     return m_fib;
171 }
172 
dop() const173 const Word97::DOP& Parser9x::dop() const
174 {
175     return m_properties->dop();
176 }
177 
font(S16 ftc) const178 const Word97::FFN& Parser9x::font( S16 ftc ) const
179 {
180     return m_fonts->font( ftc );
181 }
182 
associatedStrings()183 AssociatedStrings Parser9x::associatedStrings()
184 {
185     return AssociatedStrings( m_fib.fcSttbfAssoc, m_fib.lcbSttbfAssoc,
186                               m_fib.fFarEast ? m_fib.lidFE : m_fib.lid, m_table );
187 }
188 
styleSheet() const189 const StyleSheet& Parser9x::styleSheet() const
190 {
191     return m_properties->styleSheet();
192 }
193 
parseHeaders(const HeaderData & data)194 void Parser9x::parseHeaders( const HeaderData& data )
195 {
196     m_subDocumentHandler->headersStart();
197     for ( unsigned char mask = HeaderData::HeaderEven; mask <= HeaderData::FooterFirst; mask <<= 1 )
198         if ( mask & data.headerMask )
199             parseHeader( data, mask );
200     m_subDocumentHandler->headersEnd();
201 }
202 
parseFootnote(const FootnoteData & data)203 void Parser9x::parseFootnote( const FootnoteData& data )
204 {
205 #ifdef WV2_DEBUG_FOOTNOTES
206     wvlog << "Parser9x::parseFootnote() #####################" << std::endl;
207 #endif
208     if ( data.limCP - data.startCP == 0 ) // shouldn't happen, but well...
209         return;
210 
211     saveState( data.limCP - data.startCP, data.type == FootnoteData::Footnote ? Footnote : Endnote );
212     m_subDocumentHandler->footnoteStart();
213 
214     U32 offset = m_fib.ccpText + data.startCP;
215     if ( data.type == FootnoteData::Endnote )
216         offset += m_fib.ccpFtn + m_fib.ccpHdd + m_fib.ccpMcr + m_fib.ccpAtn;
217     parseHelper( Position( offset, m_plcfpcd ) );
218 
219     m_subDocumentHandler->footnoteEnd();
220     restoreState();
221 #ifdef WV2_DEBUG_FOOTNOTES
222     wvlog << "Parser9x::parseFootnote() done ################" << std::endl;
223 #endif
224 }
225 
parseTableRow(const TableRowData & data)226 void Parser9x::parseTableRow( const TableRowData& data )
227 {
228 #ifdef WV2_DEBUG_TABLES
229     wvlog << "Parser9x::parseTableRow(): startPiece=" << data.startPiece << " startOffset="
230           << data.startOffset << " length=" << data.length << std::endl;
231 #endif
232 
233     if ( data.length == 0 ) // idiot safe ;-)
234         return;
235 
236     saveState( data.length, static_cast<SubDocument>( data.subDocument ), Table );
237     m_remainingCells = data.tap->itcMac;
238     m_tableHandler->tableRowStart( data.tap );
239     m_tableHandler->tableCellStart();
240 
241     parseHelper( Position( data.startPiece, data.startOffset ) );
242 
243     m_tableHandler->tableRowEnd();
244     restoreState();
245 
246 #ifdef WV2_DEBUG_TABLES
247     wvlog << "Parser9x::parseTableRow() done #####################" << std::endl;
248 #endif
249 }
250 
parsePicture(const PictureData & data)251 void Parser9x::parsePicture( const PictureData& data )
252 {
253     wvlog << "Parser9x::parsePicture" << std::endl;
254     OLEStreamReader* stream = m_fib.nFib < Word8nFib ? m_wordDocument : m_data;
255     stream->push(); // saveState would be overkill
256 
257     //go to the position in the stream after the PICF, where the actual picture data/escher is
258     if ( !stream->seek( data.fcPic + data.picf->cbHeader, G_SEEK_SET ) ) {
259         wvlog << "Error: Parser9x::parsePicture couldn't seek properly" << std::endl;
260         stream->pop();
261         return;
262     }
263 
264     if ( data.picf->mfp.mm == 0x64 || data.picf->mfp.mm == 0x66 ) {
265         wvlog << "Linked graphic in Escher object" << std::endl;
266         parsePictureEscher( data, stream, data.picf->lcb, data.fcPic );
267     }
268     else {
269         switch ( data.picf->mfp.mm ) {
270             case 94: // A .bmp or a .gif name is stored after the PICF
271             case 98: // The .tiff name is stored after the PICF
272                 parsePictureExternalHelper( data, stream );
273                 break;
274             case 99: // A full bmp is stored after the PICF -- not handled in OOo??
275                 parsePictureBitmapHelper( data, stream );
276                 break;
277             default: // It has to be a .wmf or .emf file (right after the PICF)
278                 wvlog << "assuming WMF/EMF file... not sure this is correct" << std::endl;
279                 parsePictureWmfHelper( data, stream );
280                 break;
281         }
282     }
283     stream->pop();
284 }
285 
tableStream() const286 std::string Parser9x::tableStream() const
287 {
288     if ( m_fib.nFib < Word8nFib )
289         return "WordDocument";    // Word 6 or Word 7 (==95)
290     else
291         return m_fib.fWhichTblStm ? "1Table" : "0Table";  // Word 8 (==97) or newer
292 }
293 
init()294 void Parser9x::init()
295 {
296     if ( m_fib.fFarEast )
297         m_textconverter = new TextConverter( m_fib.lidFE );
298     else
299         m_textconverter = new TextConverter( m_fib.lid );
300 
301     // Get hold of all the SEP/PAP/CHP related structures and the StyleSheet
302     m_properties = new Properties97( m_wordDocument, m_table, m_fib );
303 
304     if ( m_fib.nFib < Word8nFib ) // Word67
305         m_lists = new ListInfoProvider( &styleSheet() );
306     else
307         m_lists = new ListInfoProvider( m_table, m_fib, &m_properties->styleSheet() );
308 
309     m_fonts = new FontCollection( m_table, m_fib );
310     m_fields = new Fields( m_table, m_fib );
311     m_drawings = new Drawings( m_table, m_fib );
312 
313     if ( m_fib.ccpFtn != 0 )
314         m_footnotes = new Footnotes97( m_table, m_fib );
315 }
316 
readPieceTable()317 bool Parser9x::readPieceTable()
318 {
319     m_table->seek( m_fib.fcClx );
320     // first skip the leading grpprl blocks, we'll re-read them
321     // if we need them later (no caching here)
322     U8 blockType = m_table->readU8();
323     while ( blockType == wvWare::clxtGrpprl ) {
324         U16 size = m_table->readU16();
325 #if WV2_DUMP_PIECE_TABLE > 0
326         wvlog << "Found a clxtGrpprl (size=" << size << ")" << std::endl;
327 #endif
328         m_table->seek( size, G_SEEK_CUR );
329         blockType = m_table->readU8();
330     }
331     if ( blockType == wvWare::clxtPlcfpcd ) {
332         U32 size = m_table->readU32();
333 #if WV2_DUMP_PIECE_TABLE > 0
334         wvlog << "Found the clxtPlcfpcd (size=" << size << ")" << std::endl;
335 #endif
336         m_plcfpcd = new PLCF<Word97::PCD>( size, m_table, false );
337 
338 #if WV2_DUMP_PIECE_TABLE > 1
339         PLCFIterator<Word97::PCD> it( *m_plcfpcd );
340         for ( int i = 0; it.current(); ++it, ++i ) {
341             wvlog << "Piece Table Entry(" << i << "): " << std::endl;
342             wvlog << "   start: " << it.currentStart() << std::endl;
343             wvlog << "   lim: " << it.currentLim() << std::endl;
344             wvlog << "   complex: " << it.current()->prm.fComplex << std::endl;
345             if ( it.current()->prm.fComplex )
346                 wvlog << "   igrpprl: " << it.current()->prm.toPRM2().igrpprl << std::endl;
347             else
348                 wvlog << "   isprm: " << it.current()->prm.isprm << std::endl;
349 
350             U32 fc = it.current()->fc;
351             U32 limit = it.currentRun() << 1;
352             wvlog << "   value: " << fc << std::endl;
353             if ( fc & 0x40000000 ) {
354                 fc = ( fc & 0xbfffffff ) >> 1;
355                 limit >>= 1;
356                 wvlog << "   value (cleared 2nd MSB, div. by 2): " << fc << std::endl;
357             }
358             m_wordDocument->seek( fc );
359             wvlog << "   position: " << m_wordDocument->tell() << ", limit: " << limit << std::endl;
360             for ( unsigned int j = 0; j < limit; ++j ) {
361                 U8 foo = m_wordDocument->readU8();
362                 if ( foo > 31 )
363                     wvlog << static_cast<char>( foo );
364                 else if ( foo == PARAGRAPH_MARK )
365                     wvlog << std::endl;
366                 else if ( foo > 0 )
367                     wvlog << "{" <<  static_cast<int>( foo ) << "}";
368                 else
369                     wvlog << "_";
370             }
371             wvlog << std::endl << "   position: " << m_wordDocument->tell() << ", limit: " << limit << std::endl;
372         }
373 #endif
374     }
375     else {
376         wvlog << "Oooops, couldn't find the piece table." << std::endl;
377         return false;
378     }
379     return true;
380 }
381 
fakePieceTable()382 void Parser9x::fakePieceTable()
383 {
384     U32 fakePlcfPCD[ 4 ];
385     // The first CP is 0 (endianness doesn't matter :-)
386     fakePlcfPCD[ 0 ] = 0;
387     // The second CP corresponds to the length of the document
388     fakePlcfPCD[ 1 ] = toLittleEndian( m_fib.ccpText + m_fib.ccpFtn + m_fib.ccpHdd + m_fib.ccpMcr +
389                                        m_fib.ccpAtn + m_fib.ccpEdn + m_fib.ccpTxbx + m_fib.ccpHdrTxbx );
390 
391     // Now fake a matching PCD
392     U8* tmp( reinterpret_cast<U8*>( &fakePlcfPCD[0] ) );
393     tmp += 8;
394     *tmp++ = 0;  // first the bitfields (unused)
395     *tmp++ = 0;
396     U32 fcMin = m_fib.fcMin << 1;
397     fcMin |= 0x40000000;
398     *tmp++ = static_cast<U8>( fcMin & 0x000000ff );
399     *tmp++ = static_cast<U8>( ( fcMin & 0x0000ff00 ) >> 8 );   // then store the
400     *tmp++ = static_cast<U8>( ( fcMin & 0x00ff0000 ) >> 16 );  // fc in little
401     *tmp++ = static_cast<U8>( ( fcMin & 0xff000000 ) >> 24 );  // endian style
402     *tmp++ = 0;  // then an empty PRM
403     *tmp++ = 0;
404 
405     tmp = reinterpret_cast<U8*>( &fakePlcfPCD[0] );
406     m_plcfpcd = new PLCF<Word97::PCD>( 16, tmp );
407 }
408 
parseBody()409 bool Parser9x::parseBody()
410 {
411     saveState( m_fib.ccpText, Main );
412     m_subDocumentHandler->bodyStart();
413 
414     SharedPtr<const Word97::SEP> sep( m_properties->sepForCP( 0 ) );
415     if ( !sep )
416         sep = new Word97::SEP(); // don't pass 0 pointers in any case
417     m_textHandler->sectionStart( sep ); // First section, starting at CP 0
418     emitHeaderData( sep );
419     sep = 0; // get rid of the huge SEP
420 
421     // Process all the pieces belonging to the main document text
422     parseHelper( Position( 0, static_cast<U32>( 0 ) ) );
423 
424     // Implicit end of the section
425     m_textHandler->sectionEnd();
426     m_subDocumentHandler->bodyEnd();
427     restoreState();
428     return true;
429 }
430 
parseHelper(Position startPos)431 void Parser9x::parseHelper( Position startPos )
432 {
433     PLCFIterator<Word97::PCD> it( m_plcfpcd->at( startPos.piece ) );
434 
435     while ( m_remainingChars > 0 && it.current() ) {
436         U32 fc = it.current()->fc;   // Start FC of this piece
437         bool unicode;
438         realFC( fc, unicode );
439 
440         U32 limit = it.currentRun(); // Number of characters in this piece
441 
442         // Check whether the text starts somewhere within the piece, reset at the end of the loop body
443         if ( startPos.offset != 0 ) {
444             fc += unicode ? startPos.offset * 2 : startPos.offset;
445             limit -= startPos.offset;
446         }
447 
448         limit = limit > m_remainingChars ? m_remainingChars : limit;
449         m_wordDocument->seek( fc );
450 
451         if ( unicode ) {
452             XCHAR* string = new XCHAR[ limit ];
453             // First read the whole piece
454             for ( unsigned int j = 0; j < limit; ++j ) {
455                 string[ j ] = m_wordDocument->readU16();
456                 if ( ( string[ j ] & 0xff00 ) == 0xf000 ) {
457                     // Microsoft uses a Private Unicode Area (PUA) to store the characters of the
458                     // Symbol and the Wingdings font. We simply clear these bits to shift the
459                     // characters to 0x00XX and hope the correct font is installed. If the font
460                     // isn't there, the user will get some ASCII text instead of symbols :}
461                     //wvlog << "private unicode area detected -- cropping" << std::endl;
462                     string[ j ] &= 0x00ff;
463                 }
464             }
465             processPiece<XCHAR>( string, fc, limit, startPos ); // also takes care to delete [] string
466         }
467         else {
468             U8* string = new U8[ limit ];
469             m_wordDocument->read( string, limit );
470             processPiece<U8>( string, fc, limit, startPos ); // also takes care to delete [] string
471         }
472         m_remainingChars -= limit;
473         ++it;
474         ++startPos.piece;
475         startPos.offset = 0; // just in case it was != 0 in the first iteration
476     }
477 }
478 
479 template<typename String>
processPiece(String * string,U32 fc,U32 limit,const Position & position)480 void Parser9x::processPiece( String* string, U32 fc, U32 limit, const Position& position )
481 {
482     // Take a closer look at the piece we just read. "start" and "index" are
483     // counted in character positions (take care!)
484     unsigned int start = 0;
485     unsigned int index = 0;
486     while ( index < limit ) {
487         switch( string[ index ] ) {
488             case SECTION_MARK:
489             {
490                 if ( !m_currentParagraph->empty() || start != index ) {
491                     // No "index - start + 1" here, as we don't want to copy the section mark!
492                     UString ustring( processPieceStringHelper( string, start, index ) );
493                     m_currentParagraph->push_back( Chunk( ustring, Position( position.piece, position.offset + start ),
494                                                           fc + start * sizeof( String ), sizeof( String ) == sizeof( XCHAR ) ) );
495                     processParagraph( fc + index * sizeof( String ) );
496                 }
497                 start = ++index;
498 
499                 SharedPtr<const Word97::SEP> sep( m_properties->sepForCP( m_fib.ccpText - m_remainingChars + index ) );
500                 if ( sep ) {
501                     // It's not only a page break, it's a new section
502                     m_textHandler->sectionEnd();
503                     m_textHandler->sectionStart( sep );
504                     emitHeaderData( sep );
505                 }
506                 else
507                     m_textHandler->pageBreak();
508                 break;
509             }
510             case CELL_MARK: // same ASCII code as a ROW_MARK
511                 m_cellMarkFound = true;
512                 // Fall-through intended. A row/cell end is also a paragraph end.
513             case PARAGRAPH_MARK:
514             {
515                 // No "index - start + 1" here, as we don't want to copy the paragraph mark!
516                 UString ustring( processPieceStringHelper( string, start, index ) );
517                 m_currentParagraph->push_back( Chunk( ustring, Position( position.piece, position.offset + start ),
518                                                       fc + start * sizeof( String ), sizeof( String ) == sizeof( XCHAR ) ) );
519                 processParagraph( fc + index * sizeof( String ) );
520                 m_cellMarkFound = false;
521                 start = ++index;
522                 break;
523             }
524             // "Special" characters
525             case TAB:
526                 string[ index ] = m_inlineHandler->tab();
527                 ++index;
528                 break;
529             case HARD_LINE_BREAK:
530                 string[ index ] = m_inlineHandler->hardLineBreak();
531                 ++index;
532                 break;
533             case COLUMN_BREAK:
534                 string[ index ] = m_inlineHandler->columnBreak();
535                 ++index;
536                 break;
537             case NON_BREAKING_HYPHEN:
538                 string[ index ] = m_inlineHandler->nonBreakingHyphen();
539                 ++index;
540                 break;
541             case NON_REQUIRED_HYPHEN:
542                 string[ index ] = m_inlineHandler->nonRequiredHyphen();
543                 ++index;
544                 break;
545             case NON_BREAKING_SPACE:
546                 string[ index ] = m_inlineHandler->nonBreakingSpace();
547                 ++index;
548                 break;
549             default:
550                 ++index;
551                 break;
552         }
553     }
554     if ( start < limit ) {
555         // Finally we have to add the remaining text to the current paragaph (if there is any)
556         UString ustring( processPieceStringHelper( string, start, limit ) );
557         m_currentParagraph->push_back( Chunk( ustring, Position( position.piece, position.offset + start ),
558                                               fc + start * sizeof( String ), sizeof( String ) == sizeof( XCHAR ) ) );
559     }
560     delete [] string;
561 }
562 
processPieceStringHelper(XCHAR * string,unsigned int start,unsigned int index) const563 UString Parser9x::processPieceStringHelper( XCHAR* string, unsigned int start, unsigned int index ) const
564 {
565     return UString( reinterpret_cast<const wvWare::UChar *>( &string[ start ] ), index - start );
566 }
567 
processPieceStringHelper(U8 * string,unsigned int start,unsigned int index) const568 UString Parser9x::processPieceStringHelper( U8* string, unsigned int start, unsigned int index ) const
569 {
570     return m_textconverter->convert( reinterpret_cast<char*>( &string[ start ] ), index - start );
571 }
572 
processParagraph(U32 fc)573 void Parser9x::processParagraph( U32 fc )
574 {
575     // Get the PAP structure as it was at the last full-save
576     ParagraphProperties* props( m_properties->fullSavedPap( fc, m_data ) );
577     // ...and apply the latest changes, then the PAP is completely restored
578     m_properties->applyClxGrpprl( m_plcfpcd->at( m_currentParagraph->back().m_position.piece ).current(), m_fib.fcClx, props );
579 
580     // Skim the tables first, as soon as the functor is invoked we have to
581     // parse them and emit the text
582     if ( m_parsingMode == Default && props->pap().fInTable ) {
583         if ( !m_tableRowStart ) {
584             m_tableRowStart = new Position( m_currentParagraph->front().m_position );
585             m_tableRowLength = 0;
586 #ifdef WV2_DEBUG_TABLES
587             wvlog << "Start of a table row: piece=" << m_tableRowStart->piece << " offset="
588                   << m_tableRowStart->offset << std::endl;
589 #endif
590         }
591         m_tableRowLength += std::accumulate( m_currentParagraph->begin(), m_currentParagraph->end(),
592                                              1, &Parser9x::accumulativeLength ); // init == 1 because of the parag. mark!
593         if ( props->pap().fTtp ) {
594             // Restore the table properties of this row
595             Word97::TAP* tap = m_properties->fullSavedTap( fc, m_data );
596             m_properties->applyClxGrpprl( m_plcfpcd->at( m_currentParagraph->back().m_position.piece ).current(),
597                                           m_fib.fcClx, tap, m_properties->styleByIndex( props->pap().istd ) );
598 
599             SharedPtr<const Word97::TAP> sharedTap( tap );
600             // We decrement the length by 1 that the trailing row mark doesn't emit
601             // one empty paragraph during parsing.
602             m_textHandler->tableRowFound( make_functor( *this, &Parser9x::parseTableRow,
603                                                         TableRowData( m_tableRowStart->piece, m_tableRowStart->offset,
604                                                                       m_tableRowLength - 1, static_cast<int>( m_subDocument ),
605                                                                       sharedTap ) ),
606                                           sharedTap );
607             delete m_tableRowStart;
608             m_tableRowStart = 0;
609         }
610         delete props;
611     }
612     else {
613         // Now that we have the complete PAP, let's see if this paragraph belongs to a list
614         props->createListInfo( *m_lists );
615 
616         SharedPtr<const ParagraphProperties> sharedProps( props ); // keep it that way, else the ParagraphProperties get deleted!
617         m_textHandler->paragraphStart( sharedProps );
618 
619         // Get the appropriate style for this paragraph
620         const Style* style = m_properties->styleByIndex( props->pap().istd );
621         if ( !style ) {
622             wvlog << "Warning: Huh, really obscure error, couldn't find the Style for the current PAP -- skipping" << std::endl;
623             return;
624         }
625 
626         // Now walk the paragraph, chunk for chunk
627         std::list<Chunk>::const_iterator it = m_currentParagraph->begin();
628         std::list<Chunk>::const_iterator end = m_currentParagraph->end();
629         for ( ; it != end; ++it ) {
630             U32 index = 0;
631             const U32 limit = ( *it ).m_text.length();
632             const PLCFIterator<Word97::PCD> pcdIt( m_plcfpcd->at( ( *it ).m_position.piece ) );
633 
634             while ( index < limit ) {
635                 Word97::CHP* chp = new Word97::CHP( style->chp() );
636                 U32 length = m_properties->fullSavedChp( ( *it ).m_startFC + index * ( ( *it ).m_isUnicode ? 2 : 1 ), chp, style );
637                 if ( ( *it ).m_isUnicode )
638                     length >>= 1;
639                 length = length > limit - index ? limit - index : length;
640 
641                 m_properties->applyClxGrpprl( pcdIt.current(), m_fib.fcClx, chp, style );
642                 SharedPtr<const Word97::CHP> sharedChp( chp ); // keep it that way, else the CHP gets deleted!
643                 processChunk( *it, chp, length, index, pcdIt.currentStart() );
644                 index += length;
645             }
646         }
647         m_textHandler->paragraphEnd();
648 
649         if ( m_cellMarkFound ) {
650             m_tableHandler->tableCellEnd();
651             if ( --m_remainingCells )
652                 m_tableHandler->tableCellStart();
653         }
654     }
655     m_currentParagraph->clear();
656 }
657 
processChunk(const Chunk & chunk,SharedPtr<const Word97::CHP> chp,U32 length,U32 index,U32 currentStart)658 void Parser9x::processChunk( const Chunk& chunk, SharedPtr<const Word97::CHP> chp,
659                              U32 length, U32 index, U32 currentStart )
660 {
661     // Some characters have a special meaning (e.g. a footnote is anchored at some
662     // position inside the text) and they *don't* have the fSpec flag set. This means
663     // that we have to watch out for such characters even in plain text. Slooow :}
664     //
665     // For now we only have to handle footnote and endnote references that way. Due to that
666     // the code below is a bit simpler right now, but I fear we have to extend that later on.
667     // (We will have to keep track of the type of disruption, footnote() takes care of all now)
668     //
669     // A precondition for the footnote/endnote implementation below is, that footnote and
670     // endnote references only occur in the main body text. The reason is that we only check
671     // for the next footnote inside the PLCF and don't take subdocuments into account. If
672     // it turns out that this precondition is not satisfied we would have to change the
673     // O(1) nextFootnote() call to something like an O(n) containsFootnote( start, lim )
674     // Up to now Word 97, 2000, and 2002 seem to be bug compatible and fullfill that precondition.
675     //
676     while ( length > 0 ) {
677         U32 disruption = 0xffffffff; // "infinity"
678         if ( m_footnotes ) {
679             U32 nextFtn = m_footnotes->nextFootnote();
680             U32 nextEnd = m_footnotes->nextEndnote();
681             disruption = nextFtn < nextEnd ? nextFtn : nextEnd;
682 #ifdef WV2_DEBUG_FOOTNOTES
683             wvlog << "nextFtn=" << nextFtn << " nextEnd=" << nextEnd << " disruption="
684                   << disruption << " length=" << length << std::endl;
685 #endif
686         }
687         U32 startCP = currentStart + chunk.m_position.offset + index;
688 
689         if ( disruption >= startCP && disruption < startCP + length ) {
690 #ifdef WV2_DEBUG_FOOTNOTES
691             wvlog << "startCP=" << startCP << " len=" << length << " disruption=" << disruption << std::endl;
692 #endif
693             U32 disLen = disruption - startCP;
694             if ( disLen != 0 )
695                 processRun( chunk, chp, disLen, index, currentStart );
696             length -= disLen;
697             index += disLen;
698             processFootnote( chunk.m_text[ index ], disruption, chp );
699             --length;
700             ++index;
701         }
702         else {
703             // common case, no disruption at all (or the end of a disrupted chunk)
704             processRun( chunk, chp, length, index, currentStart );
705             break;   // should be faster than messing with length...
706         }
707     }
708 }
709 
processRun(const Chunk & chunk,SharedPtr<const Word97::CHP> chp,U32 length,U32 index,U32 currentStart)710 void Parser9x::processRun( const Chunk& chunk, SharedPtr<const Word97::CHP> chp,
711                            U32 length, U32 index, U32 currentStart )
712 {
713     if ( chp->fSpec ) {
714         U32 i = 0;
715         while ( i < length ) {
716             processSpecialCharacter( chunk.m_text[ index + i ], currentStart + chunk.m_position.offset + index + i, chp );
717             ++i;
718         }
719     }
720     else {
721         UConstString str( const_cast<UChar*>( chunk.m_text.data() ) + index, length );
722         m_textHandler->runOfText( str.string(), chp );
723     }
724 }
725 
processSpecialCharacter(UChar character,U32 globalCP,SharedPtr<const Word97::CHP> chp)726 void Parser9x::processSpecialCharacter( UChar character, U32 globalCP, SharedPtr<const Word97::CHP> chp )
727 {
728     switch( character.unicode() ) {
729         // Is it one of the "simple" special characters?
730         case TextHandler::CurrentPageNumber:
731         case TextHandler::LineNumber:
732         case TextHandler::AbbreviatedDate:
733         case TextHandler::TimeHMS:
734         case TextHandler::CurrentSectionNumber:
735         case TextHandler::AbbreviatedDayOfWeek:
736         case TextHandler::DayOfWeek:
737         case TextHandler::DayShort:
738         case TextHandler::HourCurrentTime:
739         case TextHandler::HourCurrentTimeTwoDigits:
740         case TextHandler::MinuteCurrentTime:
741         case TextHandler::MinuteCurrentTimeTwoDigits:
742         case TextHandler::SecondsCurrentTime:
743         case TextHandler::AMPMCurrentTime:
744         case TextHandler::CurrentTimeHMSOld:
745         case TextHandler::DateM:
746         case TextHandler::DateShort:
747         case TextHandler::MonthShort:
748         case TextHandler::YearLong:
749         case TextHandler::YearShort:
750         case TextHandler::AbbreviatedMonth:
751         case TextHandler::MonthLong:
752         case TextHandler::CurrentTimeHMS:
753         case TextHandler::DateLong:
754             m_textHandler->specialCharacter( static_cast<TextHandler::SpecialCharacter>( character.unicode() ), chp );
755             break;
756 
757         // It has to be one of the very special characters...
758         case TextHandler::Picture:
759             emitPictureData( chp );
760             break;
761         case TextHandler::DrawnObject:
762             emitDrawnObject( chp );
763             break;
764         case TextHandler::FootnoteAuto:
765             if ( m_subDocument == Footnote || m_subDocument == Endnote )
766                 m_textHandler->footnoteAutoNumber( chp );
767             else
768                 processFootnote( character, globalCP, chp );
769             break;
770         case TextHandler::FieldBegin:
771         {
772             const FLD* fld( m_fields->fldForCP( m_subDocument, toLocalCP( globalCP ) ) );
773             if ( fld )
774                 m_textHandler->fieldStart( fld, chp );
775             break;
776         }
777         case TextHandler::FieldSeparator:
778         {
779             const FLD* fld( m_fields->fldForCP( m_subDocument, toLocalCP( globalCP ) ) );
780             if ( fld )
781                 m_textHandler->fieldSeparator( fld, chp );
782             break;
783         }
784         case TextHandler::FieldEnd:
785         {
786             const FLD* fld( m_fields->fldForCP( m_subDocument, toLocalCP( globalCP ) ) );
787             if ( fld )
788                 m_textHandler->fieldEnd( fld, chp );
789             break;
790         }
791         case TextHandler::FieldEscapeChar:
792             wvlog << "Found an escape character ++++++++++++++++++++?" << std::endl;
793             break;
794         default:
795             wvlog << "Parser9x::processSpecialCharacter(): Support for character " << character.unicode()
796                   << " not implemented yet." << std::endl;
797             break;
798     }
799 }
800 
processFootnote(UChar character,U32 globalCP,SharedPtr<const Word97::CHP> chp)801 void Parser9x::processFootnote( UChar character, U32 globalCP, SharedPtr<const Word97::CHP> chp )
802 {
803     if ( !m_footnotes ) {
804         wvlog << "Bug: Found a footnote, but m_footnotes == 0!" << std::endl;
805         return;
806     }
807 #ifdef WV2_DEBUG_FOOTNOTES
808     wvlog << "######### Footnote found: CP=" << globalCP << std::endl;
809 #endif
810     bool ok;
811     FootnoteData data( m_footnotes->footnote( globalCP, ok ) );
812     if ( ok )
813         m_textHandler->footnoteFound( data.type, character, chp, make_functor( *this, &Parser9x::parseFootnote, data ) );
814 }
815 
emitHeaderData(SharedPtr<const Word97::SEP> sep)816 void Parser9x::emitHeaderData( SharedPtr<const Word97::SEP> sep )
817 {
818     // We don't care about non-existant headers
819     if ( !m_headers )
820         return;
821 
822     // MS Word stores headers in a very strange way, so we have to keep track
823     // of the section numbers. We use a 0-based index for convenience inside
824     // the header reading code. (Werner)
825     //
826     // Of course the file format has changed between Word 6/7 and Word 8, so
827     // I had to add a workaround... oh well.
828     HeaderData data( m_sectionNumber++ );
829 
830     if ( m_fib.nFib < Word8nFib ) {
831         data.headerMask = sep->grpfIhdt;
832         m_headers->headerMask( sep->grpfIhdt );
833     }
834     else {
835         if ( sep->fTitlePage )
836             data.headerMask |= HeaderData::HeaderFirst | HeaderData::FooterFirst;
837         if ( dop().fFacingPages )
838             data.headerMask |= HeaderData::HeaderEven | HeaderData::FooterEven;
839     }
840     m_textHandler->headersFound( make_functor( *this, &Parser9x::parseHeaders, data ) );
841 }
842 
emitDrawnObject(SharedPtr<const Word97::CHP> chp)843 void Parser9x::emitDrawnObject( SharedPtr<const Word97::CHP> chp )
844 {
845 #ifdef WV2_DEBUG_PICTURES
846     wvlog << "TODO: process 'Drawn object': " << static_cast<int> (chp->fSpec) << " "
847         << static_cast<int> (chp->fObj) << " " << static_cast<int> (chp->fOle2) << " "
848         << chp->fcPic_fcObj_lTagObj << std::endl;
849 #endif
850 
851 }
852 
emitPictureData(SharedPtr<const Word97::CHP> chp)853 void Parser9x::emitPictureData( SharedPtr<const Word97::CHP> chp )
854 {
855 #ifdef WV2_DEBUG_PICTURES
856     wvlog << "Found a picture; the fcPic is " << chp->fcPic_fcObj_lTagObj << std::endl;
857 #endif
858 
859     OLEStreamReader* stream( m_fib.nFib < Word8nFib ? m_wordDocument : m_data );
860     if ( !stream || static_cast<unsigned int>( chp->fcPic_fcObj_lTagObj ) >= stream->size() ) {
861         wvlog << "Error: Severe problems when trying to read an image. Skipping." << std::endl;
862         return;
863     }
864     stream->push();
865     stream->seek( chp->fcPic_fcObj_lTagObj, G_SEEK_SET );
866 
867     Word97::PICF* picf( 0 );
868     if ( m_fib.nFib < Word8nFib )
869         picf = new Word97::PICF( Word95::toWord97( Word95::PICF( stream, false ) ) );
870     else
871         picf = new Word97::PICF( stream, false );
872     stream->pop();
873 
874     if ( picf->cbHeader < 58 ) {
875         wvlog << "Error: Found an image with a PICF smaller than 58 bytes! Skipping the image." << std::endl;
876         delete picf;
877         return;
878     }
879     if ( picf->fError ) {
880         wvlog << "Information: Skipping the image, fError is set" << std::endl;
881         delete picf;
882         return;
883     }
884 
885 #ifdef WV2_DEBUG_PICTURES
886     wvlog << "picf:" << std::endl << " lcb=" << picf->lcb << " cbHeader=" << picf->cbHeader
887           <<  std::endl << " mfp.mm=" << picf->mfp.mm << " mfp.xExt=" << picf->mfp.xExt
888           << " mfp.yExt=" << picf->mfp.yExt << " mfp.hMF=" << picf->mfp.hMF << std::endl
889           << " dxaGoal=" << picf->dxaGoal << " dyaGoal=" << picf->dyaGoal << " mx="
890           << picf->mx << " my=" << picf->my << std::endl << " dxaCropLeft=" << picf->dxaCropLeft
891           << " dyaCropTop=" << picf->dyaCropTop << " dxaCropRight=" << picf->dxaCropRight
892           << " dyaCropBottom=" << picf->dyaCropBottom << std::endl << " fFrameEmpty="
893           << picf->fFrameEmpty << " fBitmap=" << picf->fBitmap << " fDrawHatch="
894           << picf->fDrawHatch << " fError=" << picf->fError << " bpp=" << picf->bpp
895           << std::endl << " dxaOrigin=" << picf->dxaOrigin << " dyaOrigin="
896           << picf->dyaOrigin << std::endl;
897 #endif
898 
899     SharedPtr<const Word97::PICF> sharedPicf( picf );
900     m_textHandler->pictureFound( make_functor( *this, &Parser9x::parsePicture,
901                                                PictureData( static_cast<U32>( chp->fcPic_fcObj_lTagObj ), sharedPicf ) ),
902                                  sharedPicf, chp );
903 }
904 
parseHeader(const HeaderData & data,unsigned char mask)905 void Parser9x::parseHeader( const HeaderData& data, unsigned char mask )
906 {
907 #ifdef WV2_DEBUG_HEADERS
908     wvlog << "parsing one header for section " << data.sectionNumber << ": mask=0x"
909           <<  std::hex << static_cast<int>( mask ) << std::dec << std::endl;
910 #endif
911 
912     // First we have to determine the CP start/lim for the header text. From what I
913     // found out Word 8 does it that way:
914     //    - At the begin of the plcfhdd there are always 6 "0 fields" (stoppers)
915     //    - The number of headers modulo 6 is always 0
916     // Word 6 does it completely different, of course :-}
917     std::pair<U32, U32> range( m_headers->findHeader( data.sectionNumber, mask ) );
918 
919     int length = range.second - range.first;
920 #ifdef WV2_DEBUG_HEADERS
921     wvlog << "found a range: start=" << range.first << " lim=" << range.second << std::endl
922           << "length: " << length << std::endl;
923 #endif
924     if ( length < 1 ) {
925 #ifdef WV2_DEBUG_HEADERS
926         wvlog << "Warning: Didn't find a valid CPs for this header -- faking it" << std::endl;
927 #endif
928         m_subDocumentHandler->headerStart( static_cast<HeaderData::Type>( mask ) );
929         SharedPtr<const ParagraphProperties> sharedProps( new ParagraphProperties );
930         m_textHandler->paragraphStart( sharedProps );
931         m_textHandler->paragraphEnd();
932         m_subDocumentHandler->headerEnd();
933         return;
934     }
935     else if ( length > 1 )
936         --length; // get rid of the trailing "end of header/footer" character
937 
938     saveState( length, Header );
939 
940     m_subDocumentHandler->headerStart( static_cast<HeaderData::Type>( mask ) );
941     parseHelper( Position( m_fib.ccpText + m_fib.ccpFtn + range.first, m_plcfpcd ) );
942     m_subDocumentHandler->headerEnd();
943 
944     restoreState();
945 }
946 
parsePictureEscher(const PictureData & data,OLEStreamReader * stream,int totalPicfSize,int picfStartPos)947 void Parser9x::parsePictureEscher( const PictureData& data, OLEStreamReader* stream,
948         int totalPicfSize, int picfStartPos )
949 {
950     int endOfPicf = picfStartPos + totalPicfSize;
951 #ifdef WV2_DEBUG_PICTURES
952     wvlog << "Parser9x::parsePictureEscher:\n  Total PICF size = " << totalPicfSize
953         << "\n  PICF start position = " << picfStartPos
954         << "\n  current stream position = " << stream->tell()
955         << "\n  endOfPicf = " << endOfPicf << std::endl;
956 #endif
957 
958     //from OOo code, looks like we have to process this type differently
959     //  read a byte in, and that's an offset before reading the image
960     if ( data.picf->mfp.mm == 102 )
961     {
962         U8 byte = stream->readU8();
963         int offset = static_cast<unsigned int> (byte);
964         wvlog << "  0x66 offset is " << offset << std::endl;
965         stream->seek( offset, G_SEEK_CUR );
966     }
967 
968     //now we do a big loop, just reading each record until we get to the end of the picf
969     do
970     {
971         //read header
972         EscherHeader header( stream );
973 #ifdef WV2_DEBUG_PICTURES
974         wvlog << "Starting new outer record: " << std::endl;
975         header.dump();
976 #endif
977         //process record
978         wvlog << header.getRecordType() << std::endl;
979         if( !header.isAtom() )
980         {
981             wvlog << "Reading container..." << std::endl;
982             //same process again with container
983             int endOfContainer = stream->tell() + header.recordSize();
984             do
985             {
986                 //read header
987                 EscherHeader h( stream );
988 #ifdef WV2_DEBUG_PICTURES
989                 wvlog << "  starting new inner record: " << std::endl;
990                 h.dump();
991                 wvlog << h.getRecordType() << std::endl;
992 #endif
993                 //process record
994                 if( h.isAtom() )
995                 {
996                     U8* s = new U8[ h.recordSize() ];
997                     stream->read( s, h.recordSize() );
998                     //clean up memory
999                     delete [] s;
1000                 }
1001                 else
1002                     wvlog << "  Error - container inside a container!" << std::endl;
1003             } while (stream->tell() != endOfContainer);
1004             wvlog << "End of container." << std::endl;
1005         } //finished processing a container
1006         else
1007         {
1008             wvlog << "Reading atom" << std::endl;
1009             if( header.getRecordType() == "msofbtBSE" )
1010             {
1011                 //process image
1012                 FBSE fbse( stream );
1013 #ifdef WV2_DEBUG_PICTURES
1014                 fbse.dump();
1015                 wvlog << "name length is " << fbse.getNameLength() << std::endl;
1016 #endif
1017                 //the data is actually in a new record!
1018                 EscherHeader h( stream );
1019 #ifdef WV2_DEBUG_PICTURES
1020                 wvlog << " reading data record after fbse record" << std::endl;
1021                 h.dump();
1022 #endif
1023                 string blipType = h.getRecordType();
1024                 Blip blip( stream, blipType );
1025 #ifdef WV2_DEBUG_PICTURES
1026                 wvlog << "  Blip record dump:" << std::endl;
1027                 blip.dump();
1028 #endif
1029                 //if Blip is compressed, we have to process differently
1030                 if( blip.isCompressed() )
1031                 {
1032                     wvlog << "Decompressing image data at " << stream->tell() << "..." << std::endl;
1033                     ZCodec z( 0x8000, 0x8000 );
1034                     z.BeginCompression();
1035                     z.SetBreak(blip.compressedImageSize());
1036                     std::vector<U8> outBuffer;
1037                     int err = z.Decompress( *stream, &outBuffer );
1038 #ifdef WV2_DEBUG_PICTURES
1039                     wvlog << "  err=" << err << std::endl;
1040                     wvlog << "  outBuffer size = " << outBuffer.size() << std::endl;
1041 #endif
1042                     z.EndCompression(&outBuffer);
1043                     //pass vector to escherData instead of OLEImageReader
1044                     m_pictureHandler->escherData(outBuffer, data.picf, fbse.getBlipType());
1045                 }
1046                 //normal data, just create an OLEImageReader to be read
1047                 else
1048                 {
1049                     int start = stream->tell();
1050                     int limit = endOfPicf; //TODO is it possible that it wouldn't go all the way to the end?
1051                     OLEImageReader reader( *stream, start, limit);
1052                     m_pictureHandler->escherData(reader, data.picf, fbse.getBlipType());
1053                     //we've read the data in OLEImageReader, so advance stream to the
1054                     //end of OLEImageReader
1055                     stream->seek( endOfPicf, G_SEEK_SET );
1056                 }
1057             }
1058             else
1059             {
1060                 //we can't really process this atom, because we don't recognize the type
1061                 //so just skip to the end of this picf
1062                 wvlog << "  unrecognized atom, so we'll skip this image" << std::endl;
1063                 stream->seek( endOfPicf );
1064                 //U8* string = new U8[ header.recordSize() ];
1065                 //stream->read( string, header.recordSize() );
1066                 //clean up memory
1067                 //delete [] string;
1068             }
1069             wvlog << "End of atom." << std::endl;
1070         } //finished processing an atom record
1071         wvlog << "current position: " << stream->tell() << ", endOfPicf:" << endOfPicf << std::endl;
1072         if( stream->tell() > endOfPicf )
1073             wvlog << "Error! We read past the end of the picture!" << std::endl;
1074     } while (stream->tell() != endOfPicf); //end of record
1075 }
1076 
parsePictureExternalHelper(const PictureData & data,OLEStreamReader * stream)1077 void Parser9x::parsePictureExternalHelper( const PictureData& data, OLEStreamReader* stream )
1078 {
1079 #ifdef WV2_DEBUG_PICTURES
1080     wvlog << "Parser9x::parsePictureExternalHelper" << std::endl;
1081 #endif
1082 
1083     // Guessing... some testing would be nice
1084     const U8 length( stream->readU8() );
1085     U8* string = new U8[ length ];
1086     stream->read( string, length );
1087     // Do we have to use the textconverter here?
1088     UString ustring( m_textconverter->convert( reinterpret_cast<char*>( string ),
1089                                                static_cast<unsigned int>( length ) ) );
1090     delete [] string;
1091 
1092     m_pictureHandler->externalImage( ustring, data.picf );
1093 }
1094 
parsePictureBitmapHelper(const PictureData & data,OLEStreamReader * stream)1095 void Parser9x::parsePictureBitmapHelper( const PictureData& data, OLEStreamReader* stream )
1096 {
1097 #ifdef WV2_DEBUG_PICTURES
1098     wvlog << "Parser9x::parsePictureBitmapHelper" << std::endl;
1099 #endif
1100     OLEImageReader reader( *stream, data.fcPic + data.picf->cbHeader, data.fcPic + data.picf->lcb );
1101     m_pictureHandler->bitmapData( reader, data.picf );
1102 }
1103 
parsePictureWmfHelper(const PictureData & data,OLEStreamReader * stream)1104 void Parser9x::parsePictureWmfHelper( const PictureData& data, OLEStreamReader* stream )
1105 {
1106 #ifdef WV2_DEBUG_PICTURES
1107     wvlog << "Parser9x::parsePictureWmfHelper" << std::endl;
1108 #endif
1109     // ###### TODO: Handle the Mac case (x-wmf + PICT)
1110     // ###### CHECK: Do we want to do anything about .emf files?
1111     OLEImageReader reader( *stream, data.fcPic + data.picf->cbHeader, data.fcPic + data.picf->lcb );
1112     m_pictureHandler->wmfData( reader, data.picf );
1113 }
1114 
saveState(U32 newRemainingChars,SubDocument newSubDocument,ParsingMode newParsingMode)1115 void Parser9x::saveState( U32 newRemainingChars, SubDocument newSubDocument, ParsingMode newParsingMode )
1116 {
1117     oldParsingStates.push( ParsingState( m_tableRowStart, m_tableRowLength, m_cellMarkFound, m_remainingCells,
1118                                          m_currentParagraph, m_remainingChars, m_sectionNumber, m_subDocument,
1119                                          m_parsingMode ) );
1120     m_tableRowStart = 0;
1121     m_cellMarkFound = false;
1122     m_currentParagraph = new Paragraph;
1123     m_remainingChars = newRemainingChars;
1124     m_subDocument = newSubDocument;
1125     m_parsingMode = newParsingMode;
1126 
1127     m_wordDocument->push();
1128     if ( m_data )
1129         m_data->push();
1130 }
1131 
restoreState()1132 void Parser9x::restoreState()
1133 {
1134     if ( oldParsingStates.empty() ) {
1135         wvlog << "Bug: You messed up the save/restore stack! The stack is empty" << std::endl;
1136         return;
1137     }
1138 
1139     if ( m_data )
1140         m_data->pop();
1141     m_wordDocument->pop();
1142 
1143     ParsingState ps( oldParsingStates.top() );
1144     oldParsingStates.pop();
1145 
1146     if ( m_tableRowStart )
1147         wvlog << "Bug: We still have to process the table row." << std::endl;
1148     delete m_tableRowStart;   // Should be a no-op, but I hate mem-leaks even for buggy code ;-)
1149     m_tableRowStart = ps.tableRowStart;
1150     m_tableRowLength = ps.tableRowLength;
1151     m_cellMarkFound = ps.cellMarkFound;
1152     m_remainingCells = ps.remainingCells;
1153 
1154     if ( !m_currentParagraph->empty() )
1155         wvlog << "Bug: The current paragraph isn't empty." << std::endl;
1156     delete m_currentParagraph;
1157     m_currentParagraph = ps.paragraph;
1158 
1159     if ( m_remainingChars != 0 )
1160         wvlog << "Bug: Still got " << m_remainingChars << " remaining chars." << std::endl;
1161     m_remainingChars = ps.remainingChars;
1162     m_sectionNumber = ps.sectionNumber;
1163 
1164     m_subDocument = ps.subDocument;
1165     m_parsingMode = ps.parsingMode;
1166 }
1167 
toLocalCP(U32 globalCP) const1168 U32 Parser9x::toLocalCP( U32 globalCP ) const
1169 {
1170     if ( globalCP < m_fib.ccpText )
1171         return globalCP;
1172     globalCP -= m_fib.ccpText;
1173 
1174     if ( globalCP < m_fib.ccpFtn )
1175         return globalCP;
1176     globalCP -= m_fib.ccpFtn;
1177 
1178     if ( globalCP < m_fib.ccpHdd )
1179         return globalCP;
1180     globalCP -= m_fib.ccpHdd;
1181 
1182     if ( globalCP < m_fib.ccpMcr )
1183         return globalCP;
1184     globalCP -= m_fib.ccpMcr;
1185 
1186     if ( globalCP < m_fib.ccpAtn )
1187         return globalCP;
1188     globalCP -= m_fib.ccpAtn;
1189 
1190     if ( globalCP < m_fib.ccpEdn )
1191         return globalCP;
1192     globalCP -= m_fib.ccpEdn;
1193 
1194     if ( globalCP < m_fib.ccpTxbx )
1195         return globalCP;
1196     globalCP -= m_fib.ccpTxbx;
1197 
1198     if ( globalCP < m_fib.ccpHdrTxbx )
1199         return globalCP;
1200     globalCP -= m_fib.ccpHdrTxbx;
1201 
1202     wvlog << "Warning: You aimed " << globalCP << " characters past the end of the text!" << std::endl;
1203     return globalCP;
1204 }
1205 
accumulativeLength(int len,const Parser9x::Chunk & chunk)1206 int Parser9x::accumulativeLength( int len, const Parser9x::Chunk& chunk )
1207 {
1208     return len + chunk.m_text.length();
1209 }
1210