1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
2 /* libwpd
3 * Version: MPL 2.0 / LGPLv2.1+
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * Major Contributor(s):
10 * Copyright (C) 2004 Marc Maurer (uwog@uwog.net)
11 * Copyright (C) 2004 Fridrich Strba (fridrich.strba@bluewin.ch)
12 *
13 * For minor contributions see the git repository.
14 *
15 * Alternatively, the contents of this file may be used under the terms
16 * of the GNU Lesser General Public License Version 2.1 or later
17 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
18 * applicable instead of those above.
19 *
20 * For further information visit http://libwpd.sourceforge.net
21 */
22
23 /* "This product is not manufactured, approved, or supported by
24 * Corel Corporation or Corel Corporation Limited."
25 */
26
27 #include "WP3Parser.h"
28
29 #include <memory>
30
31 #include "WPXHeader.h"
32 #include "WP3Part.h"
33 #include "WP3ContentListener.h"
34 #include "WP3StylesListener.h"
35 #include "WP3ResourceFork.h"
36 #include "libwpd_internal.h"
37 #include "WPXTable.h"
38 #include "WPXTableList.h"
39
WP3Parser(librevenge::RVNGInputStream * input,WPXHeader * header,WPXEncryption * encryption)40 WP3Parser::WP3Parser(librevenge::RVNGInputStream *input, WPXHeader *header, WPXEncryption *encryption) :
41 WPXParser(input, header, encryption)
42 {
43 }
44
~WP3Parser()45 WP3Parser::~WP3Parser()
46 {
47 }
48
getResourceFork(librevenge::RVNGInputStream * input,WPXEncryption * encryption)49 WP3ResourceFork *WP3Parser::getResourceFork(librevenge::RVNGInputStream *input, WPXEncryption *encryption)
50 {
51 // Certain WP2 documents actually don't contain resource fork, so check for its existence
52 if (!getHeader() || getHeader()->getDocumentOffset() <= 0x10)
53 {
54 WPD_DEBUG_MSG(("WP3Parser: Document does not contain resource fork\n"));
55 return nullptr;
56 }
57
58 return new WP3ResourceFork(input, encryption);
59 }
60
parse(librevenge::RVNGInputStream * input,WPXEncryption * encryption,WP3Listener * listener)61 void WP3Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP3Listener *listener)
62 {
63 listener->startDocument();
64
65 input->seek(getHeader()->getDocumentOffset(), librevenge::RVNG_SEEK_SET);
66
67 WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell()));
68
69 parseDocument(input, encryption, listener);
70
71 listener->endDocument();
72 }
73
74 // parseDocument: parses a document body (may call itself recursively, on other streams, or itself)
parseDocument(librevenge::RVNGInputStream * input,WPXEncryption * encryption,WP3Listener * listener)75 void WP3Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP3Listener *listener)
76 {
77 while (!input->isEnd())
78 {
79 unsigned char readVal;
80 readVal = readU8(input, encryption);
81
82 if (readVal == 0 || readVal == 0x7F || readVal == 0xFF)
83 {
84 // FIXME: VERIFY: is this IF clause correct? (0xFF seems to be OK at least)
85 // do nothing: this token is meaningless and is likely just corruption
86 }
87 else if (readVal >= (unsigned char)0x01 && readVal <= (unsigned char)0x1F)
88 {
89 // control characters ?
90 }
91 else if (readVal >= (unsigned char)0x20 && readVal <= (unsigned char)0x7E)
92 {
93 listener->insertCharacter(readVal);
94 }
95 else
96 {
97 std::unique_ptr<WP3Part> part(WP3Part::constructPart(input, encryption, readVal));
98 if (part)
99 part->parse(listener);
100 }
101 }
102 }
103
parse(librevenge::RVNGTextInterface * textInterface)104 void WP3Parser::parse(librevenge::RVNGTextInterface *textInterface)
105 {
106 librevenge::RVNGInputStream *input = getInput();
107 WPXEncryption *encryption = getEncryption();
108 std::list<WPXPageSpan> pageList;
109 WPXTableList tableList;
110
111 try
112 {
113 const std::unique_ptr<WP3ResourceFork> resourceFork{getResourceFork(input, encryption)};
114
115 // do a "first-pass" parse of the document
116 // gather table border information, page properties (per-page)
117 WP3StylesListener stylesListener(pageList, tableList);
118 stylesListener.setResourceFork(resourceFork.get());
119 parse(input, encryption, &stylesListener);
120
121 // postprocess the pageList == remove duplicate page spans due to the page breaks
122 auto previousPage = pageList.begin();
123 for (auto Iter=pageList.begin(); Iter != pageList.end(); /* Iter++ */)
124 {
125 if ((Iter != previousPage) && (*previousPage==*Iter))
126 {
127 (*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan());
128 Iter = pageList.erase(Iter);
129 }
130 else
131 {
132 previousPage = Iter;
133 ++Iter;
134 }
135 }
136
137 // second pass: here is where we actually send the messages to the target app
138 // that are necessary to emit the body of the target document
139 WP3ContentListener listener(pageList, textInterface); // FIXME: SHOULD BE CONTENT_LISTENER, AND SHOULD BE PASSED TABLE DATA!
140 listener.setResourceFork(resourceFork.get());
141 parse(input, encryption, &listener);
142 }
143 catch (FileException)
144 {
145 WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
146 throw FileException();
147 }
148 }
149
parseSubDocument(librevenge::RVNGTextInterface * textInterface)150 void WP3Parser::parseSubDocument(librevenge::RVNGTextInterface *textInterface)
151 {
152 std::list<WPXPageSpan> pageList;
153 WPXTableList tableList;
154
155 librevenge::RVNGInputStream *input = getInput();
156
157 try
158 {
159 WP3StylesListener stylesListener(pageList, tableList);
160 stylesListener.startSubDocument();
161 parseDocument(input, nullptr, &stylesListener);
162 stylesListener.endSubDocument();
163
164 input->seek(0, librevenge::RVNG_SEEK_SET);
165
166 WP3ContentListener listener(pageList, textInterface);
167 listener.startSubDocument();
168 parseDocument(input, nullptr, &listener);
169 listener.endSubDocument();
170 }
171 catch (FileException)
172 {
173 WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
174 throw FileException();
175 }
176 }
177
178 /* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */
179