1 /* This file is part of the wvWare 2 project
2 Copyright (C) 2001-2003 Werner Trobin <trobin@kde.org>
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public
6 License version 2 as published by the Free Software Foundation.
7
8 This library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Library General Public License for more details.
12
13 You should have received a copy of the GNU Library General Public License
14 along with this library; see the file COPYING.LIB. If not, write to
15 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 Boston, MA 02111-1307, USA.
17 */
18
19 #include "parser9x.h"
20 #include "properties97.h"
21 #include "styles.h"
22 #include "word97_helper.h"
23 #include "lists.h"
24 #include "handlers.h"
25 #include "footnotes97.h"
26 #include "headers.h"
27 #include "fonts.h"
28 #include "textconverter.h"
29 #include "olestream.h"
30 #include "fields.h"
31 #include "graphics.h"
32 #include "associatedstrings.h"
33 #include "paragraphproperties.h"
34 #include "functor.h"
35 #include "functordata.h"
36 #include "word95_generated.h"
37 #include "convert.h"
38 #include "zcodec.hxx"
39 #include "wvlog.h"
40
41 #include <gsf/gsf-input.h>
42 #include <gsf/gsf-output.h>
43 #include <gsf/gsf-input-memory.h>
44 #include <gsf/gsf-output-memory.h>
45
46 #include <numeric>
47 #include <string.h>
48
49 using namespace wvWare;
50
51
Position(U32 cp,const PLCF<Word97::PCD> * plcfpcd)52 Parser9x::Position::Position( U32 cp, const PLCF<Word97::PCD>* plcfpcd ) :
53 piece( 0 ), offset( cp )
54 {
55 PLCFIterator<Word97::PCD> it( *plcfpcd );
56 for ( ; it.current(); ++it, ++piece ) {
57 if ( it.currentLim() > cp && it.currentStart() <= cp )
58 break;
59 offset -= it.currentRun();
60 }
61 }
62
63
Parser9x(OLEStorage * storage,OLEStreamReader * wordDocument,const Word97::FIB & fib)64 Parser9x::Parser9x( OLEStorage* storage, OLEStreamReader* wordDocument, const Word97::FIB& fib ) :
65 Parser( storage, wordDocument ), m_fib( fib ), m_table( 0 ), m_data( 0 ), m_properties( 0 ),
66 m_headers( 0 ), m_lists( 0 ), m_textconverter( 0 ), m_fields( 0 ), m_footnotes( 0 ),
67 m_fonts( 0 ), m_drawings( 0 ), m_plcfpcd( 0 ), m_tableRowStart( 0 ), m_tableRowLength( 0 ),
68 m_cellMarkFound( false ), m_remainingCells( 0 ), m_currentParagraph( new Paragraph ),
69 m_remainingChars( 0 ), m_sectionNumber( 0 ), m_subDocument( None ), m_parsingMode( Default )
70 {
71 if ( !isOk() )
72 return;
73
74 m_table = storage->createStreamReader( tableStream() );
75 if ( !m_table || !m_table->isValid() ) {
76 wvlog << "Error: Couldn't open the table stream (i.e. [0|1]Table or WordDocument)" << std::endl;
77 m_okay = false;
78 return;
79 }
80
81 m_data = storage->createStreamReader( "Data" );
82 if ( !m_data || !m_data->isValid() ) {
83 wvlog << "Information: Couldn't open the Data stream, no big deal" << std::endl;
84 delete m_data;
85 m_data = 0;
86 }
87
88 #ifdef WV2_DUMP_FIB
89 wvlog << "Dumping some parts of the FIB: " << std::endl;
90 wvlog << " wIdent=" << m_fib.wIdent << std::endl;
91 wvlog << " nFib=" << m_fib.nFib << std::endl;
92 wvlog << " nFibBack=" << m_fib.nFibBack << std::endl;
93 wvlog << " lid=0x" << std::hex << m_fib.lid << std::dec << std::endl;
94 wvlog << " lidFE=0x" << std::hex << m_fib.lidFE << std::dec << std::endl;
95 wvlog << " fEncrypted=" << m_fib.fEncrypted << std::endl;
96 wvlog << " chs=" << m_fib.chs << std::endl;
97 wvlog << " fcMin=" << m_fib.fcMin << std::endl;
98 wvlog << " fcMac=" << m_fib.fcMac << std::endl;
99 wvlog << " ccpText=" << m_fib.ccpText << std::endl;
100 wvlog << " ccpFtn=" << m_fib.ccpFtn << std::endl;
101 wvlog << " ccpHdd=" << m_fib.ccpHdd << std::endl;
102 wvlog << " ccpMcr=" << m_fib.ccpMcr << std::endl;
103 wvlog << " ccpAtn=" << m_fib.ccpAtn << std::endl;
104 wvlog << " ccpEdn=" << m_fib.ccpEdn << std::endl;
105 wvlog << " ccpTxbx=" << m_fib.ccpTxbx << std::endl;
106 wvlog << " ccpHdrTxbx=" << m_fib.ccpHdrTxbx << std::endl;
107 wvlog << " pnFbpChpFirst=" << m_fib.pnFbpChpFirst << std::endl;
108 wvlog << " pnChpFirst=" << m_fib.pnChpFirst << std::endl;
109 wvlog << " cpnBteChp=" << m_fib.cpnBteChp << std::endl;
110 wvlog << " pnFbpPapFirst=" << m_fib.pnFbpPapFirst << std::endl;
111 wvlog << " pnPapFirst=" << m_fib.pnPapFirst << std::endl;
112 wvlog << " cpnBtePap=" << m_fib.cpnBtePap << std::endl;
113 #endif
114
115 // Initialize all the cached data structures like stylesheets, fonts,
116 // textconverter,...
117 init();
118 }
119
~Parser9x()120 Parser9x::~Parser9x()
121 {
122 // Sanity check
123 if ( !oldParsingStates.empty() || m_subDocument != None )
124 wvlog << "Bug: Someone messed up the save/restore stack!" << std::endl;
125
126 delete m_currentParagraph;
127 delete m_tableRowStart;
128 delete m_drawings;
129 delete m_fonts;
130 delete m_plcfpcd;
131 delete m_headers;
132 delete m_footnotes;
133 delete m_fields;
134 delete m_textconverter;
135 delete m_properties;
136 delete m_lists;
137 delete m_data;
138 delete m_table;
139 }
140
parse()141 bool Parser9x::parse()
142 {
143 if ( !isOk() )
144 return false;
145
146 if ( m_fib.fEncrypted ) {
147 // There is some code out there to break this "encryption", do we want
148 // to implement that?
149 // We could either ask for a password or cheat a bit :-)
150 wvlog << "Error: The document is encrypted." << std::endl;
151 return false;
152 }
153
154 if ( m_fib.lcbClx == 0 )
155 fakePieceTable();
156 else {
157 // Get the piece table
158 if ( !readPieceTable() )
159 return false;
160 }
161
162 // start parsing the body
163 if ( !parseBody() )
164 return false;
165 return true;
166 }
167
fib() const168 const Word97::FIB& Parser9x::fib() const
169 {
170 return m_fib;
171 }
172
dop() const173 const Word97::DOP& Parser9x::dop() const
174 {
175 return m_properties->dop();
176 }
177
font(S16 ftc) const178 const Word97::FFN& Parser9x::font( S16 ftc ) const
179 {
180 return m_fonts->font( ftc );
181 }
182
associatedStrings()183 AssociatedStrings Parser9x::associatedStrings()
184 {
185 return AssociatedStrings( m_fib.fcSttbfAssoc, m_fib.lcbSttbfAssoc,
186 m_fib.fFarEast ? m_fib.lidFE : m_fib.lid, m_table );
187 }
188
styleSheet() const189 const StyleSheet& Parser9x::styleSheet() const
190 {
191 return m_properties->styleSheet();
192 }
193
parseHeaders(const HeaderData & data)194 void Parser9x::parseHeaders( const HeaderData& data )
195 {
196 m_subDocumentHandler->headersStart();
197 for ( unsigned char mask = HeaderData::HeaderEven; mask <= HeaderData::FooterFirst; mask <<= 1 )
198 if ( mask & data.headerMask )
199 parseHeader( data, mask );
200 m_subDocumentHandler->headersEnd();
201 }
202
parseFootnote(const FootnoteData & data)203 void Parser9x::parseFootnote( const FootnoteData& data )
204 {
205 #ifdef WV2_DEBUG_FOOTNOTES
206 wvlog << "Parser9x::parseFootnote() #####################" << std::endl;
207 #endif
208 if ( data.limCP - data.startCP == 0 ) // shouldn't happen, but well...
209 return;
210
211 saveState( data.limCP - data.startCP, data.type == FootnoteData::Footnote ? Footnote : Endnote );
212 m_subDocumentHandler->footnoteStart();
213
214 U32 offset = m_fib.ccpText + data.startCP;
215 if ( data.type == FootnoteData::Endnote )
216 offset += m_fib.ccpFtn + m_fib.ccpHdd + m_fib.ccpMcr + m_fib.ccpAtn;
217 parseHelper( Position( offset, m_plcfpcd ) );
218
219 m_subDocumentHandler->footnoteEnd();
220 restoreState();
221 #ifdef WV2_DEBUG_FOOTNOTES
222 wvlog << "Parser9x::parseFootnote() done ################" << std::endl;
223 #endif
224 }
225
parseTableRow(const TableRowData & data)226 void Parser9x::parseTableRow( const TableRowData& data )
227 {
228 #ifdef WV2_DEBUG_TABLES
229 wvlog << "Parser9x::parseTableRow(): startPiece=" << data.startPiece << " startOffset="
230 << data.startOffset << " length=" << data.length << std::endl;
231 #endif
232
233 if ( data.length == 0 ) // idiot safe ;-)
234 return;
235
236 saveState( data.length, static_cast<SubDocument>( data.subDocument ), Table );
237 m_remainingCells = data.tap->itcMac;
238 m_tableHandler->tableRowStart( data.tap );
239 m_tableHandler->tableCellStart();
240
241 parseHelper( Position( data.startPiece, data.startOffset ) );
242
243 m_tableHandler->tableRowEnd();
244 restoreState();
245
246 #ifdef WV2_DEBUG_TABLES
247 wvlog << "Parser9x::parseTableRow() done #####################" << std::endl;
248 #endif
249 }
250
parsePicture(const PictureData & data)251 void Parser9x::parsePicture( const PictureData& data )
252 {
253 wvlog << "Parser9x::parsePicture" << std::endl;
254 OLEStreamReader* stream = m_fib.nFib < Word8nFib ? m_wordDocument : m_data;
255 stream->push(); // saveState would be overkill
256
257 //go to the position in the stream after the PICF, where the actual picture data/escher is
258 if ( !stream->seek( data.fcPic + data.picf->cbHeader, G_SEEK_SET ) ) {
259 wvlog << "Error: Parser9x::parsePicture couldn't seek properly" << std::endl;
260 stream->pop();
261 return;
262 }
263
264 if ( data.picf->mfp.mm == 0x64 || data.picf->mfp.mm == 0x66 ) {
265 wvlog << "Linked graphic in Escher object" << std::endl;
266 parsePictureEscher( data, stream, data.picf->lcb, data.fcPic );
267 }
268 else {
269 switch ( data.picf->mfp.mm ) {
270 case 94: // A .bmp or a .gif name is stored after the PICF
271 case 98: // The .tiff name is stored after the PICF
272 parsePictureExternalHelper( data, stream );
273 break;
274 case 99: // A full bmp is stored after the PICF -- not handled in OOo??
275 parsePictureBitmapHelper( data, stream );
276 break;
277 default: // It has to be a .wmf or .emf file (right after the PICF)
278 wvlog << "assuming WMF/EMF file... not sure this is correct" << std::endl;
279 parsePictureWmfHelper( data, stream );
280 break;
281 }
282 }
283 stream->pop();
284 }
285
tableStream() const286 std::string Parser9x::tableStream() const
287 {
288 if ( m_fib.nFib < Word8nFib )
289 return "WordDocument"; // Word 6 or Word 7 (==95)
290 else
291 return m_fib.fWhichTblStm ? "1Table" : "0Table"; // Word 8 (==97) or newer
292 }
293
init()294 void Parser9x::init()
295 {
296 if ( m_fib.fFarEast )
297 m_textconverter = new TextConverter( m_fib.lidFE );
298 else
299 m_textconverter = new TextConverter( m_fib.lid );
300
301 // Get hold of all the SEP/PAP/CHP related structures and the StyleSheet
302 m_properties = new Properties97( m_wordDocument, m_table, m_fib );
303
304 if ( m_fib.nFib < Word8nFib ) // Word67
305 m_lists = new ListInfoProvider( &styleSheet() );
306 else
307 m_lists = new ListInfoProvider( m_table, m_fib, &m_properties->styleSheet() );
308
309 m_fonts = new FontCollection( m_table, m_fib );
310 m_fields = new Fields( m_table, m_fib );
311 m_drawings = new Drawings( m_table, m_fib );
312
313 if ( m_fib.ccpFtn != 0 )
314 m_footnotes = new Footnotes97( m_table, m_fib );
315 }
316
readPieceTable()317 bool Parser9x::readPieceTable()
318 {
319 m_table->seek( m_fib.fcClx );
320 // first skip the leading grpprl blocks, we'll re-read them
321 // if we need them later (no caching here)
322 U8 blockType = m_table->readU8();
323 while ( blockType == wvWare::clxtGrpprl ) {
324 U16 size = m_table->readU16();
325 #if WV2_DUMP_PIECE_TABLE > 0
326 wvlog << "Found a clxtGrpprl (size=" << size << ")" << std::endl;
327 #endif
328 m_table->seek( size, G_SEEK_CUR );
329 blockType = m_table->readU8();
330 }
331 if ( blockType == wvWare::clxtPlcfpcd ) {
332 U32 size = m_table->readU32();
333 #if WV2_DUMP_PIECE_TABLE > 0
334 wvlog << "Found the clxtPlcfpcd (size=" << size << ")" << std::endl;
335 #endif
336 m_plcfpcd = new PLCF<Word97::PCD>( size, m_table, false );
337
338 #if WV2_DUMP_PIECE_TABLE > 1
339 PLCFIterator<Word97::PCD> it( *m_plcfpcd );
340 for ( int i = 0; it.current(); ++it, ++i ) {
341 wvlog << "Piece Table Entry(" << i << "): " << std::endl;
342 wvlog << " start: " << it.currentStart() << std::endl;
343 wvlog << " lim: " << it.currentLim() << std::endl;
344 wvlog << " complex: " << it.current()->prm.fComplex << std::endl;
345 if ( it.current()->prm.fComplex )
346 wvlog << " igrpprl: " << it.current()->prm.toPRM2().igrpprl << std::endl;
347 else
348 wvlog << " isprm: " << it.current()->prm.isprm << std::endl;
349
350 U32 fc = it.current()->fc;
351 U32 limit = it.currentRun() << 1;
352 wvlog << " value: " << fc << std::endl;
353 if ( fc & 0x40000000 ) {
354 fc = ( fc & 0xbfffffff ) >> 1;
355 limit >>= 1;
356 wvlog << " value (cleared 2nd MSB, div. by 2): " << fc << std::endl;
357 }
358 m_wordDocument->seek( fc );
359 wvlog << " position: " << m_wordDocument->tell() << ", limit: " << limit << std::endl;
360 for ( unsigned int j = 0; j < limit; ++j ) {
361 U8 foo = m_wordDocument->readU8();
362 if ( foo > 31 )
363 wvlog << static_cast<char>( foo );
364 else if ( foo == PARAGRAPH_MARK )
365 wvlog << std::endl;
366 else if ( foo > 0 )
367 wvlog << "{" << static_cast<int>( foo ) << "}";
368 else
369 wvlog << "_";
370 }
371 wvlog << std::endl << " position: " << m_wordDocument->tell() << ", limit: " << limit << std::endl;
372 }
373 #endif
374 }
375 else {
376 wvlog << "Oooops, couldn't find the piece table." << std::endl;
377 return false;
378 }
379 return true;
380 }
381
fakePieceTable()382 void Parser9x::fakePieceTable()
383 {
384 U32 fakePlcfPCD[ 4 ];
385 // The first CP is 0 (endianness doesn't matter :-)
386 fakePlcfPCD[ 0 ] = 0;
387 // The second CP corresponds to the length of the document
388 fakePlcfPCD[ 1 ] = toLittleEndian( m_fib.ccpText + m_fib.ccpFtn + m_fib.ccpHdd + m_fib.ccpMcr +
389 m_fib.ccpAtn + m_fib.ccpEdn + m_fib.ccpTxbx + m_fib.ccpHdrTxbx );
390
391 // Now fake a matching PCD
392 U8* tmp( reinterpret_cast<U8*>( &fakePlcfPCD[0] ) );
393 tmp += 8;
394 *tmp++ = 0; // first the bitfields (unused)
395 *tmp++ = 0;
396 U32 fcMin = m_fib.fcMin << 1;
397 fcMin |= 0x40000000;
398 *tmp++ = static_cast<U8>( fcMin & 0x000000ff );
399 *tmp++ = static_cast<U8>( ( fcMin & 0x0000ff00 ) >> 8 ); // then store the
400 *tmp++ = static_cast<U8>( ( fcMin & 0x00ff0000 ) >> 16 ); // fc in little
401 *tmp++ = static_cast<U8>( ( fcMin & 0xff000000 ) >> 24 ); // endian style
402 *tmp++ = 0; // then an empty PRM
403 *tmp++ = 0;
404
405 tmp = reinterpret_cast<U8*>( &fakePlcfPCD[0] );
406 m_plcfpcd = new PLCF<Word97::PCD>( 16, tmp );
407 }
408
parseBody()409 bool Parser9x::parseBody()
410 {
411 saveState( m_fib.ccpText, Main );
412 m_subDocumentHandler->bodyStart();
413
414 SharedPtr<const Word97::SEP> sep( m_properties->sepForCP( 0 ) );
415 if ( !sep )
416 sep = new Word97::SEP(); // don't pass 0 pointers in any case
417 m_textHandler->sectionStart( sep ); // First section, starting at CP 0
418 emitHeaderData( sep );
419 sep = 0; // get rid of the huge SEP
420
421 // Process all the pieces belonging to the main document text
422 parseHelper( Position( 0, static_cast<U32>( 0 ) ) );
423
424 // Implicit end of the section
425 m_textHandler->sectionEnd();
426 m_subDocumentHandler->bodyEnd();
427 restoreState();
428 return true;
429 }
430
parseHelper(Position startPos)431 void Parser9x::parseHelper( Position startPos )
432 {
433 PLCFIterator<Word97::PCD> it( m_plcfpcd->at( startPos.piece ) );
434
435 while ( m_remainingChars > 0 && it.current() ) {
436 U32 fc = it.current()->fc; // Start FC of this piece
437 bool unicode;
438 realFC( fc, unicode );
439
440 U32 limit = it.currentRun(); // Number of characters in this piece
441
442 // Check whether the text starts somewhere within the piece, reset at the end of the loop body
443 if ( startPos.offset != 0 ) {
444 fc += unicode ? startPos.offset * 2 : startPos.offset;
445 limit -= startPos.offset;
446 }
447
448 limit = limit > m_remainingChars ? m_remainingChars : limit;
449 m_wordDocument->seek( fc );
450
451 if ( unicode ) {
452 XCHAR* string = new XCHAR[ limit ];
453 // First read the whole piece
454 for ( unsigned int j = 0; j < limit; ++j ) {
455 string[ j ] = m_wordDocument->readU16();
456 if ( ( string[ j ] & 0xff00 ) == 0xf000 ) {
457 // Microsoft uses a Private Unicode Area (PUA) to store the characters of the
458 // Symbol and the Wingdings font. We simply clear these bits to shift the
459 // characters to 0x00XX and hope the correct font is installed. If the font
460 // isn't there, the user will get some ASCII text instead of symbols :}
461 //wvlog << "private unicode area detected -- cropping" << std::endl;
462 string[ j ] &= 0x00ff;
463 }
464 }
465 processPiece<XCHAR>( string, fc, limit, startPos ); // also takes care to delete [] string
466 }
467 else {
468 U8* string = new U8[ limit ];
469 m_wordDocument->read( string, limit );
470 processPiece<U8>( string, fc, limit, startPos ); // also takes care to delete [] string
471 }
472 m_remainingChars -= limit;
473 ++it;
474 ++startPos.piece;
475 startPos.offset = 0; // just in case it was != 0 in the first iteration
476 }
477 }
478
479 template<typename String>
processPiece(String * string,U32 fc,U32 limit,const Position & position)480 void Parser9x::processPiece( String* string, U32 fc, U32 limit, const Position& position )
481 {
482 // Take a closer look at the piece we just read. "start" and "index" are
483 // counted in character positions (take care!)
484 unsigned int start = 0;
485 unsigned int index = 0;
486 while ( index < limit ) {
487 switch( string[ index ] ) {
488 case SECTION_MARK:
489 {
490 if ( !m_currentParagraph->empty() || start != index ) {
491 // No "index - start + 1" here, as we don't want to copy the section mark!
492 UString ustring( processPieceStringHelper( string, start, index ) );
493 m_currentParagraph->push_back( Chunk( ustring, Position( position.piece, position.offset + start ),
494 fc + start * sizeof( String ), sizeof( String ) == sizeof( XCHAR ) ) );
495 processParagraph( fc + index * sizeof( String ) );
496 }
497 start = ++index;
498
499 SharedPtr<const Word97::SEP> sep( m_properties->sepForCP( m_fib.ccpText - m_remainingChars + index ) );
500 if ( sep ) {
501 // It's not only a page break, it's a new section
502 m_textHandler->sectionEnd();
503 m_textHandler->sectionStart( sep );
504 emitHeaderData( sep );
505 }
506 else
507 m_textHandler->pageBreak();
508 break;
509 }
510 case CELL_MARK: // same ASCII code as a ROW_MARK
511 m_cellMarkFound = true;
512 // Fall-through intended. A row/cell end is also a paragraph end.
513 case PARAGRAPH_MARK:
514 {
515 // No "index - start + 1" here, as we don't want to copy the paragraph mark!
516 UString ustring( processPieceStringHelper( string, start, index ) );
517 m_currentParagraph->push_back( Chunk( ustring, Position( position.piece, position.offset + start ),
518 fc + start * sizeof( String ), sizeof( String ) == sizeof( XCHAR ) ) );
519 processParagraph( fc + index * sizeof( String ) );
520 m_cellMarkFound = false;
521 start = ++index;
522 break;
523 }
524 // "Special" characters
525 case TAB:
526 string[ index ] = m_inlineHandler->tab();
527 ++index;
528 break;
529 case HARD_LINE_BREAK:
530 string[ index ] = m_inlineHandler->hardLineBreak();
531 ++index;
532 break;
533 case COLUMN_BREAK:
534 string[ index ] = m_inlineHandler->columnBreak();
535 ++index;
536 break;
537 case NON_BREAKING_HYPHEN:
538 string[ index ] = m_inlineHandler->nonBreakingHyphen();
539 ++index;
540 break;
541 case NON_REQUIRED_HYPHEN:
542 string[ index ] = m_inlineHandler->nonRequiredHyphen();
543 ++index;
544 break;
545 case NON_BREAKING_SPACE:
546 string[ index ] = m_inlineHandler->nonBreakingSpace();
547 ++index;
548 break;
549 default:
550 ++index;
551 break;
552 }
553 }
554 if ( start < limit ) {
555 // Finally we have to add the remaining text to the current paragaph (if there is any)
556 UString ustring( processPieceStringHelper( string, start, limit ) );
557 m_currentParagraph->push_back( Chunk( ustring, Position( position.piece, position.offset + start ),
558 fc + start * sizeof( String ), sizeof( String ) == sizeof( XCHAR ) ) );
559 }
560 delete [] string;
561 }
562
processPieceStringHelper(XCHAR * string,unsigned int start,unsigned int index) const563 UString Parser9x::processPieceStringHelper( XCHAR* string, unsigned int start, unsigned int index ) const
564 {
565 return UString( reinterpret_cast<const wvWare::UChar *>( &string[ start ] ), index - start );
566 }
567
processPieceStringHelper(U8 * string,unsigned int start,unsigned int index) const568 UString Parser9x::processPieceStringHelper( U8* string, unsigned int start, unsigned int index ) const
569 {
570 return m_textconverter->convert( reinterpret_cast<char*>( &string[ start ] ), index - start );
571 }
572
processParagraph(U32 fc)573 void Parser9x::processParagraph( U32 fc )
574 {
575 // Get the PAP structure as it was at the last full-save
576 ParagraphProperties* props( m_properties->fullSavedPap( fc, m_data ) );
577 // ...and apply the latest changes, then the PAP is completely restored
578 m_properties->applyClxGrpprl( m_plcfpcd->at( m_currentParagraph->back().m_position.piece ).current(), m_fib.fcClx, props );
579
580 // Skim the tables first, as soon as the functor is invoked we have to
581 // parse them and emit the text
582 if ( m_parsingMode == Default && props->pap().fInTable ) {
583 if ( !m_tableRowStart ) {
584 m_tableRowStart = new Position( m_currentParagraph->front().m_position );
585 m_tableRowLength = 0;
586 #ifdef WV2_DEBUG_TABLES
587 wvlog << "Start of a table row: piece=" << m_tableRowStart->piece << " offset="
588 << m_tableRowStart->offset << std::endl;
589 #endif
590 }
591 m_tableRowLength += std::accumulate( m_currentParagraph->begin(), m_currentParagraph->end(),
592 1, &Parser9x::accumulativeLength ); // init == 1 because of the parag. mark!
593 if ( props->pap().fTtp ) {
594 // Restore the table properties of this row
595 Word97::TAP* tap = m_properties->fullSavedTap( fc, m_data );
596 m_properties->applyClxGrpprl( m_plcfpcd->at( m_currentParagraph->back().m_position.piece ).current(),
597 m_fib.fcClx, tap, m_properties->styleByIndex( props->pap().istd ) );
598
599 SharedPtr<const Word97::TAP> sharedTap( tap );
600 // We decrement the length by 1 that the trailing row mark doesn't emit
601 // one empty paragraph during parsing.
602 m_textHandler->tableRowFound( make_functor( *this, &Parser9x::parseTableRow,
603 TableRowData( m_tableRowStart->piece, m_tableRowStart->offset,
604 m_tableRowLength - 1, static_cast<int>( m_subDocument ),
605 sharedTap ) ),
606 sharedTap );
607 delete m_tableRowStart;
608 m_tableRowStart = 0;
609 }
610 delete props;
611 }
612 else {
613 // Now that we have the complete PAP, let's see if this paragraph belongs to a list
614 props->createListInfo( *m_lists );
615
616 SharedPtr<const ParagraphProperties> sharedProps( props ); // keep it that way, else the ParagraphProperties get deleted!
617 m_textHandler->paragraphStart( sharedProps );
618
619 // Get the appropriate style for this paragraph
620 const Style* style = m_properties->styleByIndex( props->pap().istd );
621 if ( !style ) {
622 wvlog << "Warning: Huh, really obscure error, couldn't find the Style for the current PAP -- skipping" << std::endl;
623 return;
624 }
625
626 // Now walk the paragraph, chunk for chunk
627 std::list<Chunk>::const_iterator it = m_currentParagraph->begin();
628 std::list<Chunk>::const_iterator end = m_currentParagraph->end();
629 for ( ; it != end; ++it ) {
630 U32 index = 0;
631 const U32 limit = ( *it ).m_text.length();
632 const PLCFIterator<Word97::PCD> pcdIt( m_plcfpcd->at( ( *it ).m_position.piece ) );
633
634 while ( index < limit ) {
635 Word97::CHP* chp = new Word97::CHP( style->chp() );
636 U32 length = m_properties->fullSavedChp( ( *it ).m_startFC + index * ( ( *it ).m_isUnicode ? 2 : 1 ), chp, style );
637 if ( ( *it ).m_isUnicode )
638 length >>= 1;
639 length = length > limit - index ? limit - index : length;
640
641 m_properties->applyClxGrpprl( pcdIt.current(), m_fib.fcClx, chp, style );
642 SharedPtr<const Word97::CHP> sharedChp( chp ); // keep it that way, else the CHP gets deleted!
643 processChunk( *it, chp, length, index, pcdIt.currentStart() );
644 index += length;
645 }
646 }
647 m_textHandler->paragraphEnd();
648
649 if ( m_cellMarkFound ) {
650 m_tableHandler->tableCellEnd();
651 if ( --m_remainingCells )
652 m_tableHandler->tableCellStart();
653 }
654 }
655 m_currentParagraph->clear();
656 }
657
processChunk(const Chunk & chunk,SharedPtr<const Word97::CHP> chp,U32 length,U32 index,U32 currentStart)658 void Parser9x::processChunk( const Chunk& chunk, SharedPtr<const Word97::CHP> chp,
659 U32 length, U32 index, U32 currentStart )
660 {
661 // Some characters have a special meaning (e.g. a footnote is anchored at some
662 // position inside the text) and they *don't* have the fSpec flag set. This means
663 // that we have to watch out for such characters even in plain text. Slooow :}
664 //
665 // For now we only have to handle footnote and endnote references that way. Due to that
666 // the code below is a bit simpler right now, but I fear we have to extend that later on.
667 // (We will have to keep track of the type of disruption, footnote() takes care of all now)
668 //
669 // A precondition for the footnote/endnote implementation below is, that footnote and
670 // endnote references only occur in the main body text. The reason is that we only check
671 // for the next footnote inside the PLCF and don't take subdocuments into account. If
672 // it turns out that this precondition is not satisfied we would have to change the
673 // O(1) nextFootnote() call to something like an O(n) containsFootnote( start, lim )
674 // Up to now Word 97, 2000, and 2002 seem to be bug compatible and fullfill that precondition.
675 //
676 while ( length > 0 ) {
677 U32 disruption = 0xffffffff; // "infinity"
678 if ( m_footnotes ) {
679 U32 nextFtn = m_footnotes->nextFootnote();
680 U32 nextEnd = m_footnotes->nextEndnote();
681 disruption = nextFtn < nextEnd ? nextFtn : nextEnd;
682 #ifdef WV2_DEBUG_FOOTNOTES
683 wvlog << "nextFtn=" << nextFtn << " nextEnd=" << nextEnd << " disruption="
684 << disruption << " length=" << length << std::endl;
685 #endif
686 }
687 U32 startCP = currentStart + chunk.m_position.offset + index;
688
689 if ( disruption >= startCP && disruption < startCP + length ) {
690 #ifdef WV2_DEBUG_FOOTNOTES
691 wvlog << "startCP=" << startCP << " len=" << length << " disruption=" << disruption << std::endl;
692 #endif
693 U32 disLen = disruption - startCP;
694 if ( disLen != 0 )
695 processRun( chunk, chp, disLen, index, currentStart );
696 length -= disLen;
697 index += disLen;
698 processFootnote( chunk.m_text[ index ], disruption, chp );
699 --length;
700 ++index;
701 }
702 else {
703 // common case, no disruption at all (or the end of a disrupted chunk)
704 processRun( chunk, chp, length, index, currentStart );
705 break; // should be faster than messing with length...
706 }
707 }
708 }
709
processRun(const Chunk & chunk,SharedPtr<const Word97::CHP> chp,U32 length,U32 index,U32 currentStart)710 void Parser9x::processRun( const Chunk& chunk, SharedPtr<const Word97::CHP> chp,
711 U32 length, U32 index, U32 currentStart )
712 {
713 if ( chp->fSpec ) {
714 U32 i = 0;
715 while ( i < length ) {
716 processSpecialCharacter( chunk.m_text[ index + i ], currentStart + chunk.m_position.offset + index + i, chp );
717 ++i;
718 }
719 }
720 else {
721 UConstString str( const_cast<UChar*>( chunk.m_text.data() ) + index, length );
722 m_textHandler->runOfText( str.string(), chp );
723 }
724 }
725
processSpecialCharacter(UChar character,U32 globalCP,SharedPtr<const Word97::CHP> chp)726 void Parser9x::processSpecialCharacter( UChar character, U32 globalCP, SharedPtr<const Word97::CHP> chp )
727 {
728 switch( character.unicode() ) {
729 // Is it one of the "simple" special characters?
730 case TextHandler::CurrentPageNumber:
731 case TextHandler::LineNumber:
732 case TextHandler::AbbreviatedDate:
733 case TextHandler::TimeHMS:
734 case TextHandler::CurrentSectionNumber:
735 case TextHandler::AbbreviatedDayOfWeek:
736 case TextHandler::DayOfWeek:
737 case TextHandler::DayShort:
738 case TextHandler::HourCurrentTime:
739 case TextHandler::HourCurrentTimeTwoDigits:
740 case TextHandler::MinuteCurrentTime:
741 case TextHandler::MinuteCurrentTimeTwoDigits:
742 case TextHandler::SecondsCurrentTime:
743 case TextHandler::AMPMCurrentTime:
744 case TextHandler::CurrentTimeHMSOld:
745 case TextHandler::DateM:
746 case TextHandler::DateShort:
747 case TextHandler::MonthShort:
748 case TextHandler::YearLong:
749 case TextHandler::YearShort:
750 case TextHandler::AbbreviatedMonth:
751 case TextHandler::MonthLong:
752 case TextHandler::CurrentTimeHMS:
753 case TextHandler::DateLong:
754 m_textHandler->specialCharacter( static_cast<TextHandler::SpecialCharacter>( character.unicode() ), chp );
755 break;
756
757 // It has to be one of the very special characters...
758 case TextHandler::Picture:
759 emitPictureData( chp );
760 break;
761 case TextHandler::DrawnObject:
762 emitDrawnObject( chp );
763 break;
764 case TextHandler::FootnoteAuto:
765 if ( m_subDocument == Footnote || m_subDocument == Endnote )
766 m_textHandler->footnoteAutoNumber( chp );
767 else
768 processFootnote( character, globalCP, chp );
769 break;
770 case TextHandler::FieldBegin:
771 {
772 const FLD* fld( m_fields->fldForCP( m_subDocument, toLocalCP( globalCP ) ) );
773 if ( fld )
774 m_textHandler->fieldStart( fld, chp );
775 break;
776 }
777 case TextHandler::FieldSeparator:
778 {
779 const FLD* fld( m_fields->fldForCP( m_subDocument, toLocalCP( globalCP ) ) );
780 if ( fld )
781 m_textHandler->fieldSeparator( fld, chp );
782 break;
783 }
784 case TextHandler::FieldEnd:
785 {
786 const FLD* fld( m_fields->fldForCP( m_subDocument, toLocalCP( globalCP ) ) );
787 if ( fld )
788 m_textHandler->fieldEnd( fld, chp );
789 break;
790 }
791 case TextHandler::FieldEscapeChar:
792 wvlog << "Found an escape character ++++++++++++++++++++?" << std::endl;
793 break;
794 default:
795 wvlog << "Parser9x::processSpecialCharacter(): Support for character " << character.unicode()
796 << " not implemented yet." << std::endl;
797 break;
798 }
799 }
800
processFootnote(UChar character,U32 globalCP,SharedPtr<const Word97::CHP> chp)801 void Parser9x::processFootnote( UChar character, U32 globalCP, SharedPtr<const Word97::CHP> chp )
802 {
803 if ( !m_footnotes ) {
804 wvlog << "Bug: Found a footnote, but m_footnotes == 0!" << std::endl;
805 return;
806 }
807 #ifdef WV2_DEBUG_FOOTNOTES
808 wvlog << "######### Footnote found: CP=" << globalCP << std::endl;
809 #endif
810 bool ok;
811 FootnoteData data( m_footnotes->footnote( globalCP, ok ) );
812 if ( ok )
813 m_textHandler->footnoteFound( data.type, character, chp, make_functor( *this, &Parser9x::parseFootnote, data ) );
814 }
815
emitHeaderData(SharedPtr<const Word97::SEP> sep)816 void Parser9x::emitHeaderData( SharedPtr<const Word97::SEP> sep )
817 {
818 // We don't care about non-existant headers
819 if ( !m_headers )
820 return;
821
822 // MS Word stores headers in a very strange way, so we have to keep track
823 // of the section numbers. We use a 0-based index for convenience inside
824 // the header reading code. (Werner)
825 //
826 // Of course the file format has changed between Word 6/7 and Word 8, so
827 // I had to add a workaround... oh well.
828 HeaderData data( m_sectionNumber++ );
829
830 if ( m_fib.nFib < Word8nFib ) {
831 data.headerMask = sep->grpfIhdt;
832 m_headers->headerMask( sep->grpfIhdt );
833 }
834 else {
835 if ( sep->fTitlePage )
836 data.headerMask |= HeaderData::HeaderFirst | HeaderData::FooterFirst;
837 if ( dop().fFacingPages )
838 data.headerMask |= HeaderData::HeaderEven | HeaderData::FooterEven;
839 }
840 m_textHandler->headersFound( make_functor( *this, &Parser9x::parseHeaders, data ) );
841 }
842
emitDrawnObject(SharedPtr<const Word97::CHP> chp)843 void Parser9x::emitDrawnObject( SharedPtr<const Word97::CHP> chp )
844 {
845 #ifdef WV2_DEBUG_PICTURES
846 wvlog << "TODO: process 'Drawn object': " << static_cast<int> (chp->fSpec) << " "
847 << static_cast<int> (chp->fObj) << " " << static_cast<int> (chp->fOle2) << " "
848 << chp->fcPic_fcObj_lTagObj << std::endl;
849 #endif
850
851 }
852
emitPictureData(SharedPtr<const Word97::CHP> chp)853 void Parser9x::emitPictureData( SharedPtr<const Word97::CHP> chp )
854 {
855 #ifdef WV2_DEBUG_PICTURES
856 wvlog << "Found a picture; the fcPic is " << chp->fcPic_fcObj_lTagObj << std::endl;
857 #endif
858
859 OLEStreamReader* stream( m_fib.nFib < Word8nFib ? m_wordDocument : m_data );
860 if ( !stream || static_cast<unsigned int>( chp->fcPic_fcObj_lTagObj ) >= stream->size() ) {
861 wvlog << "Error: Severe problems when trying to read an image. Skipping." << std::endl;
862 return;
863 }
864 stream->push();
865 stream->seek( chp->fcPic_fcObj_lTagObj, G_SEEK_SET );
866
867 Word97::PICF* picf( 0 );
868 if ( m_fib.nFib < Word8nFib )
869 picf = new Word97::PICF( Word95::toWord97( Word95::PICF( stream, false ) ) );
870 else
871 picf = new Word97::PICF( stream, false );
872 stream->pop();
873
874 if ( picf->cbHeader < 58 ) {
875 wvlog << "Error: Found an image with a PICF smaller than 58 bytes! Skipping the image." << std::endl;
876 delete picf;
877 return;
878 }
879 if ( picf->fError ) {
880 wvlog << "Information: Skipping the image, fError is set" << std::endl;
881 delete picf;
882 return;
883 }
884
885 #ifdef WV2_DEBUG_PICTURES
886 wvlog << "picf:" << std::endl << " lcb=" << picf->lcb << " cbHeader=" << picf->cbHeader
887 << std::endl << " mfp.mm=" << picf->mfp.mm << " mfp.xExt=" << picf->mfp.xExt
888 << " mfp.yExt=" << picf->mfp.yExt << " mfp.hMF=" << picf->mfp.hMF << std::endl
889 << " dxaGoal=" << picf->dxaGoal << " dyaGoal=" << picf->dyaGoal << " mx="
890 << picf->mx << " my=" << picf->my << std::endl << " dxaCropLeft=" << picf->dxaCropLeft
891 << " dyaCropTop=" << picf->dyaCropTop << " dxaCropRight=" << picf->dxaCropRight
892 << " dyaCropBottom=" << picf->dyaCropBottom << std::endl << " fFrameEmpty="
893 << picf->fFrameEmpty << " fBitmap=" << picf->fBitmap << " fDrawHatch="
894 << picf->fDrawHatch << " fError=" << picf->fError << " bpp=" << picf->bpp
895 << std::endl << " dxaOrigin=" << picf->dxaOrigin << " dyaOrigin="
896 << picf->dyaOrigin << std::endl;
897 #endif
898
899 SharedPtr<const Word97::PICF> sharedPicf( picf );
900 m_textHandler->pictureFound( make_functor( *this, &Parser9x::parsePicture,
901 PictureData( static_cast<U32>( chp->fcPic_fcObj_lTagObj ), sharedPicf ) ),
902 sharedPicf, chp );
903 }
904
parseHeader(const HeaderData & data,unsigned char mask)905 void Parser9x::parseHeader( const HeaderData& data, unsigned char mask )
906 {
907 #ifdef WV2_DEBUG_HEADERS
908 wvlog << "parsing one header for section " << data.sectionNumber << ": mask=0x"
909 << std::hex << static_cast<int>( mask ) << std::dec << std::endl;
910 #endif
911
912 // First we have to determine the CP start/lim for the header text. From what I
913 // found out Word 8 does it that way:
914 // - At the begin of the plcfhdd there are always 6 "0 fields" (stoppers)
915 // - The number of headers modulo 6 is always 0
916 // Word 6 does it completely different, of course :-}
917 std::pair<U32, U32> range( m_headers->findHeader( data.sectionNumber, mask ) );
918
919 int length = range.second - range.first;
920 #ifdef WV2_DEBUG_HEADERS
921 wvlog << "found a range: start=" << range.first << " lim=" << range.second << std::endl
922 << "length: " << length << std::endl;
923 #endif
924 if ( length < 1 ) {
925 #ifdef WV2_DEBUG_HEADERS
926 wvlog << "Warning: Didn't find a valid CPs for this header -- faking it" << std::endl;
927 #endif
928 m_subDocumentHandler->headerStart( static_cast<HeaderData::Type>( mask ) );
929 SharedPtr<const ParagraphProperties> sharedProps( new ParagraphProperties );
930 m_textHandler->paragraphStart( sharedProps );
931 m_textHandler->paragraphEnd();
932 m_subDocumentHandler->headerEnd();
933 return;
934 }
935 else if ( length > 1 )
936 --length; // get rid of the trailing "end of header/footer" character
937
938 saveState( length, Header );
939
940 m_subDocumentHandler->headerStart( static_cast<HeaderData::Type>( mask ) );
941 parseHelper( Position( m_fib.ccpText + m_fib.ccpFtn + range.first, m_plcfpcd ) );
942 m_subDocumentHandler->headerEnd();
943
944 restoreState();
945 }
946
parsePictureEscher(const PictureData & data,OLEStreamReader * stream,int totalPicfSize,int picfStartPos)947 void Parser9x::parsePictureEscher( const PictureData& data, OLEStreamReader* stream,
948 int totalPicfSize, int picfStartPos )
949 {
950 int endOfPicf = picfStartPos + totalPicfSize;
951 #ifdef WV2_DEBUG_PICTURES
952 wvlog << "Parser9x::parsePictureEscher:\n Total PICF size = " << totalPicfSize
953 << "\n PICF start position = " << picfStartPos
954 << "\n current stream position = " << stream->tell()
955 << "\n endOfPicf = " << endOfPicf << std::endl;
956 #endif
957
958 //from OOo code, looks like we have to process this type differently
959 // read a byte in, and that's an offset before reading the image
960 if ( data.picf->mfp.mm == 102 )
961 {
962 U8 byte = stream->readU8();
963 int offset = static_cast<unsigned int> (byte);
964 wvlog << " 0x66 offset is " << offset << std::endl;
965 stream->seek( offset, G_SEEK_CUR );
966 }
967
968 //now we do a big loop, just reading each record until we get to the end of the picf
969 do
970 {
971 //read header
972 EscherHeader header( stream );
973 #ifdef WV2_DEBUG_PICTURES
974 wvlog << "Starting new outer record: " << std::endl;
975 header.dump();
976 #endif
977 //process record
978 wvlog << header.getRecordType() << std::endl;
979 if( !header.isAtom() )
980 {
981 wvlog << "Reading container..." << std::endl;
982 //same process again with container
983 int endOfContainer = stream->tell() + header.recordSize();
984 do
985 {
986 //read header
987 EscherHeader h( stream );
988 #ifdef WV2_DEBUG_PICTURES
989 wvlog << " starting new inner record: " << std::endl;
990 h.dump();
991 wvlog << h.getRecordType() << std::endl;
992 #endif
993 //process record
994 if( h.isAtom() )
995 {
996 U8* s = new U8[ h.recordSize() ];
997 stream->read( s, h.recordSize() );
998 //clean up memory
999 delete [] s;
1000 }
1001 else
1002 wvlog << " Error - container inside a container!" << std::endl;
1003 } while (stream->tell() != endOfContainer);
1004 wvlog << "End of container." << std::endl;
1005 } //finished processing a container
1006 else
1007 {
1008 wvlog << "Reading atom" << std::endl;
1009 if( header.getRecordType() == "msofbtBSE" )
1010 {
1011 //process image
1012 FBSE fbse( stream );
1013 #ifdef WV2_DEBUG_PICTURES
1014 fbse.dump();
1015 wvlog << "name length is " << fbse.getNameLength() << std::endl;
1016 #endif
1017 //the data is actually in a new record!
1018 EscherHeader h( stream );
1019 #ifdef WV2_DEBUG_PICTURES
1020 wvlog << " reading data record after fbse record" << std::endl;
1021 h.dump();
1022 #endif
1023 string blipType = h.getRecordType();
1024 Blip blip( stream, blipType );
1025 #ifdef WV2_DEBUG_PICTURES
1026 wvlog << " Blip record dump:" << std::endl;
1027 blip.dump();
1028 #endif
1029 //if Blip is compressed, we have to process differently
1030 if( blip.isCompressed() )
1031 {
1032 wvlog << "Decompressing image data at " << stream->tell() << "..." << std::endl;
1033 ZCodec z( 0x8000, 0x8000 );
1034 z.BeginCompression();
1035 z.SetBreak(blip.compressedImageSize());
1036 std::vector<U8> outBuffer;
1037 int err = z.Decompress( *stream, &outBuffer );
1038 #ifdef WV2_DEBUG_PICTURES
1039 wvlog << " err=" << err << std::endl;
1040 wvlog << " outBuffer size = " << outBuffer.size() << std::endl;
1041 #endif
1042 z.EndCompression(&outBuffer);
1043 //pass vector to escherData instead of OLEImageReader
1044 m_pictureHandler->escherData(outBuffer, data.picf, fbse.getBlipType());
1045 }
1046 //normal data, just create an OLEImageReader to be read
1047 else
1048 {
1049 int start = stream->tell();
1050 int limit = endOfPicf; //TODO is it possible that it wouldn't go all the way to the end?
1051 OLEImageReader reader( *stream, start, limit);
1052 m_pictureHandler->escherData(reader, data.picf, fbse.getBlipType());
1053 //we've read the data in OLEImageReader, so advance stream to the
1054 //end of OLEImageReader
1055 stream->seek( endOfPicf, G_SEEK_SET );
1056 }
1057 }
1058 else
1059 {
1060 //we can't really process this atom, because we don't recognize the type
1061 //so just skip to the end of this picf
1062 wvlog << " unrecognized atom, so we'll skip this image" << std::endl;
1063 stream->seek( endOfPicf );
1064 //U8* string = new U8[ header.recordSize() ];
1065 //stream->read( string, header.recordSize() );
1066 //clean up memory
1067 //delete [] string;
1068 }
1069 wvlog << "End of atom." << std::endl;
1070 } //finished processing an atom record
1071 wvlog << "current position: " << stream->tell() << ", endOfPicf:" << endOfPicf << std::endl;
1072 if( stream->tell() > endOfPicf )
1073 wvlog << "Error! We read past the end of the picture!" << std::endl;
1074 } while (stream->tell() != endOfPicf); //end of record
1075 }
1076
parsePictureExternalHelper(const PictureData & data,OLEStreamReader * stream)1077 void Parser9x::parsePictureExternalHelper( const PictureData& data, OLEStreamReader* stream )
1078 {
1079 #ifdef WV2_DEBUG_PICTURES
1080 wvlog << "Parser9x::parsePictureExternalHelper" << std::endl;
1081 #endif
1082
1083 // Guessing... some testing would be nice
1084 const U8 length( stream->readU8() );
1085 U8* string = new U8[ length ];
1086 stream->read( string, length );
1087 // Do we have to use the textconverter here?
1088 UString ustring( m_textconverter->convert( reinterpret_cast<char*>( string ),
1089 static_cast<unsigned int>( length ) ) );
1090 delete [] string;
1091
1092 m_pictureHandler->externalImage( ustring, data.picf );
1093 }
1094
parsePictureBitmapHelper(const PictureData & data,OLEStreamReader * stream)1095 void Parser9x::parsePictureBitmapHelper( const PictureData& data, OLEStreamReader* stream )
1096 {
1097 #ifdef WV2_DEBUG_PICTURES
1098 wvlog << "Parser9x::parsePictureBitmapHelper" << std::endl;
1099 #endif
1100 OLEImageReader reader( *stream, data.fcPic + data.picf->cbHeader, data.fcPic + data.picf->lcb );
1101 m_pictureHandler->bitmapData( reader, data.picf );
1102 }
1103
parsePictureWmfHelper(const PictureData & data,OLEStreamReader * stream)1104 void Parser9x::parsePictureWmfHelper( const PictureData& data, OLEStreamReader* stream )
1105 {
1106 #ifdef WV2_DEBUG_PICTURES
1107 wvlog << "Parser9x::parsePictureWmfHelper" << std::endl;
1108 #endif
1109 // ###### TODO: Handle the Mac case (x-wmf + PICT)
1110 // ###### CHECK: Do we want to do anything about .emf files?
1111 OLEImageReader reader( *stream, data.fcPic + data.picf->cbHeader, data.fcPic + data.picf->lcb );
1112 m_pictureHandler->wmfData( reader, data.picf );
1113 }
1114
saveState(U32 newRemainingChars,SubDocument newSubDocument,ParsingMode newParsingMode)1115 void Parser9x::saveState( U32 newRemainingChars, SubDocument newSubDocument, ParsingMode newParsingMode )
1116 {
1117 oldParsingStates.push( ParsingState( m_tableRowStart, m_tableRowLength, m_cellMarkFound, m_remainingCells,
1118 m_currentParagraph, m_remainingChars, m_sectionNumber, m_subDocument,
1119 m_parsingMode ) );
1120 m_tableRowStart = 0;
1121 m_cellMarkFound = false;
1122 m_currentParagraph = new Paragraph;
1123 m_remainingChars = newRemainingChars;
1124 m_subDocument = newSubDocument;
1125 m_parsingMode = newParsingMode;
1126
1127 m_wordDocument->push();
1128 if ( m_data )
1129 m_data->push();
1130 }
1131
restoreState()1132 void Parser9x::restoreState()
1133 {
1134 if ( oldParsingStates.empty() ) {
1135 wvlog << "Bug: You messed up the save/restore stack! The stack is empty" << std::endl;
1136 return;
1137 }
1138
1139 if ( m_data )
1140 m_data->pop();
1141 m_wordDocument->pop();
1142
1143 ParsingState ps( oldParsingStates.top() );
1144 oldParsingStates.pop();
1145
1146 if ( m_tableRowStart )
1147 wvlog << "Bug: We still have to process the table row." << std::endl;
1148 delete m_tableRowStart; // Should be a no-op, but I hate mem-leaks even for buggy code ;-)
1149 m_tableRowStart = ps.tableRowStart;
1150 m_tableRowLength = ps.tableRowLength;
1151 m_cellMarkFound = ps.cellMarkFound;
1152 m_remainingCells = ps.remainingCells;
1153
1154 if ( !m_currentParagraph->empty() )
1155 wvlog << "Bug: The current paragraph isn't empty." << std::endl;
1156 delete m_currentParagraph;
1157 m_currentParagraph = ps.paragraph;
1158
1159 if ( m_remainingChars != 0 )
1160 wvlog << "Bug: Still got " << m_remainingChars << " remaining chars." << std::endl;
1161 m_remainingChars = ps.remainingChars;
1162 m_sectionNumber = ps.sectionNumber;
1163
1164 m_subDocument = ps.subDocument;
1165 m_parsingMode = ps.parsingMode;
1166 }
1167
toLocalCP(U32 globalCP) const1168 U32 Parser9x::toLocalCP( U32 globalCP ) const
1169 {
1170 if ( globalCP < m_fib.ccpText )
1171 return globalCP;
1172 globalCP -= m_fib.ccpText;
1173
1174 if ( globalCP < m_fib.ccpFtn )
1175 return globalCP;
1176 globalCP -= m_fib.ccpFtn;
1177
1178 if ( globalCP < m_fib.ccpHdd )
1179 return globalCP;
1180 globalCP -= m_fib.ccpHdd;
1181
1182 if ( globalCP < m_fib.ccpMcr )
1183 return globalCP;
1184 globalCP -= m_fib.ccpMcr;
1185
1186 if ( globalCP < m_fib.ccpAtn )
1187 return globalCP;
1188 globalCP -= m_fib.ccpAtn;
1189
1190 if ( globalCP < m_fib.ccpEdn )
1191 return globalCP;
1192 globalCP -= m_fib.ccpEdn;
1193
1194 if ( globalCP < m_fib.ccpTxbx )
1195 return globalCP;
1196 globalCP -= m_fib.ccpTxbx;
1197
1198 if ( globalCP < m_fib.ccpHdrTxbx )
1199 return globalCP;
1200 globalCP -= m_fib.ccpHdrTxbx;
1201
1202 wvlog << "Warning: You aimed " << globalCP << " characters past the end of the text!" << std::endl;
1203 return globalCP;
1204 }
1205
accumulativeLength(int len,const Parser9x::Chunk & chunk)1206 int Parser9x::accumulativeLength( int len, const Parser9x::Chunk& chunk )
1207 {
1208 return len + chunk.m_text.length();
1209 }
1210