1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
2 /* libwpd
3  * Version: MPL 2.0 / LGPLv2.1+
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * Major Contributor(s):
10  * Copyright (C) 2004 Marc Maurer (uwog@uwog.net)
11  * Copyright (C) 2004 Fridrich Strba (fridrich.strba@bluewin.ch)
12  *
13  * For minor contributions see the git repository.
14  *
15  * Alternatively, the contents of this file may be used under the terms
16  * of the GNU Lesser General Public License Version 2.1 or later
17  * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
18  * applicable instead of those above.
19  *
20  * For further information visit http://libwpd.sourceforge.net
21  */
22 
23 /* "This product is not manufactured, approved, or supported by
24  * Corel Corporation or Corel Corporation Limited."
25  */
26 
27 #include "WP3Parser.h"
28 
29 #include <memory>
30 
31 #include "WPXHeader.h"
32 #include "WP3Part.h"
33 #include "WP3ContentListener.h"
34 #include "WP3StylesListener.h"
35 #include "WP3ResourceFork.h"
36 #include "libwpd_internal.h"
37 #include "WPXTable.h"
38 #include "WPXTableList.h"
39 
WP3Parser(librevenge::RVNGInputStream * input,WPXHeader * header,WPXEncryption * encryption)40 WP3Parser::WP3Parser(librevenge::RVNGInputStream *input, WPXHeader *header, WPXEncryption *encryption) :
41 	WPXParser(input, header, encryption)
42 {
43 }
44 
~WP3Parser()45 WP3Parser::~WP3Parser()
46 {
47 }
48 
getResourceFork(librevenge::RVNGInputStream * input,WPXEncryption * encryption)49 WP3ResourceFork *WP3Parser::getResourceFork(librevenge::RVNGInputStream *input, WPXEncryption *encryption)
50 {
51 	// Certain WP2 documents actually don't contain resource fork, so check for its existence
52 	if (!getHeader() || getHeader()->getDocumentOffset() <= 0x10)
53 	{
54 		WPD_DEBUG_MSG(("WP3Parser: Document does not contain resource fork\n"));
55 		return nullptr;
56 	}
57 
58 	return new WP3ResourceFork(input, encryption);
59 }
60 
parse(librevenge::RVNGInputStream * input,WPXEncryption * encryption,WP3Listener * listener)61 void WP3Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP3Listener *listener)
62 {
63 	listener->startDocument();
64 
65 	input->seek(getHeader()->getDocumentOffset(), librevenge::RVNG_SEEK_SET);
66 
67 	WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell()));
68 
69 	parseDocument(input, encryption, listener);
70 
71 	listener->endDocument();
72 }
73 
74 // parseDocument: parses a document body (may call itself recursively, on other streams, or itself)
parseDocument(librevenge::RVNGInputStream * input,WPXEncryption * encryption,WP3Listener * listener)75 void WP3Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP3Listener *listener)
76 {
77 	while (!input->isEnd())
78 	{
79 		unsigned char readVal;
80 		readVal = readU8(input, encryption);
81 
82 		if (readVal == 0 || readVal == 0x7F || readVal == 0xFF)
83 		{
84 			// FIXME: VERIFY: is this IF clause correct? (0xFF seems to be OK at least)
85 			// do nothing: this token is meaningless and is likely just corruption
86 		}
87 		else if (readVal >= (unsigned char)0x01 && readVal <= (unsigned char)0x1F)
88 		{
89 			// control characters ?
90 		}
91 		else if (readVal >= (unsigned char)0x20 && readVal <= (unsigned char)0x7E)
92 		{
93 			listener->insertCharacter(readVal);
94 		}
95 		else
96 		{
97 			std::unique_ptr<WP3Part> part(WP3Part::constructPart(input, encryption, readVal));
98 			if (part)
99 				part->parse(listener);
100 		}
101 	}
102 }
103 
parse(librevenge::RVNGTextInterface * textInterface)104 void WP3Parser::parse(librevenge::RVNGTextInterface *textInterface)
105 {
106 	librevenge::RVNGInputStream *input = getInput();
107 	WPXEncryption *encryption = getEncryption();
108 	std::list<WPXPageSpan> pageList;
109 	WPXTableList tableList;
110 
111 	try
112 	{
113 		const std::unique_ptr<WP3ResourceFork> resourceFork{getResourceFork(input, encryption)};
114 
115 		// do a "first-pass" parse of the document
116 		// gather table border information, page properties (per-page)
117 		WP3StylesListener stylesListener(pageList, tableList);
118 		stylesListener.setResourceFork(resourceFork.get());
119 		parse(input, encryption, &stylesListener);
120 
121 		// postprocess the pageList == remove duplicate page spans due to the page breaks
122 		auto previousPage = pageList.begin();
123 		for (auto Iter=pageList.begin(); Iter != pageList.end(); /* Iter++ */)
124 		{
125 			if ((Iter != previousPage) && (*previousPage==*Iter))
126 			{
127 				(*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan());
128 				Iter = pageList.erase(Iter);
129 			}
130 			else
131 			{
132 				previousPage = Iter;
133 				++Iter;
134 			}
135 		}
136 
137 		// second pass: here is where we actually send the messages to the target app
138 		// that are necessary to emit the body of the target document
139 		WP3ContentListener listener(pageList, textInterface); // FIXME: SHOULD BE CONTENT_LISTENER, AND SHOULD BE PASSED TABLE DATA!
140 		listener.setResourceFork(resourceFork.get());
141 		parse(input, encryption, &listener);
142 	}
143 	catch (FileException)
144 	{
145 		WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
146 		throw FileException();
147 	}
148 }
149 
parseSubDocument(librevenge::RVNGTextInterface * textInterface)150 void WP3Parser::parseSubDocument(librevenge::RVNGTextInterface *textInterface)
151 {
152 	std::list<WPXPageSpan> pageList;
153 	WPXTableList tableList;
154 
155 	librevenge::RVNGInputStream *input = getInput();
156 
157 	try
158 	{
159 		WP3StylesListener stylesListener(pageList, tableList);
160 		stylesListener.startSubDocument();
161 		parseDocument(input, nullptr, &stylesListener);
162 		stylesListener.endSubDocument();
163 
164 		input->seek(0, librevenge::RVNG_SEEK_SET);
165 
166 		WP3ContentListener listener(pageList, textInterface);
167 		listener.startSubDocument();
168 		parseDocument(input, nullptr, &listener);
169 		listener.endSubDocument();
170 	}
171 	catch (FileException)
172 	{
173 		WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
174 		throw FileException();
175 	}
176 }
177 
178 /* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */
179