1 /*
2 * This file is part of Office 2007 Filters for Calligra
3 * Copyright (C) 2002 Laurent Montel <lmontel@mandrakesoft.com>
4 * Copyright (C) 2003 David Faure <faure@kde.org>
5 * Copyright (C) 2002, 2003, 2004 Nicolas GOUTTE <goutte@kde.org>
6 * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
7 *
8 * Contact: Suresh Chande suresh.chande@nokia.com
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * version 2.1 as published by the Free Software Foundation.
13 *
14 * This library is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22 * 02110-1301 USA
23 *
24 */
25
26 #include "DocxImport.h"
27
28 #include <MsooXmlUtils.h>
29 #include <MsooXmlSchemas.h>
30 #include <MsooXmlContentTypes.h>
31 #include <MsooXmlRelationships.h>
32 #include "DocxXmlDocumentReader.h"
33 #include "DocxXmlStylesReader.h"
34 #include "DocxXmlNumberingReader.h"
35 #include "DocxXmlFootnoteReader.h"
36 #include "DocxXmlCommentsReader.h"
37 #include "DocxXmlEndnoteReader.h"
38 #include "DocxXmlFontTableReader.h"
39 #include "DocxXmlSettingsReader.h"
40 #include "DocxDebug.h"
41
42 #include <QColor>
43 #include <QFile>
44 #include <QFont>
45 #include <QPen>
46 #include <QRegExp>
47 #include <QImage>
48
49 #include <kpluginfactory.h>
50
51 #include <KoEmbeddedDocumentSaver.h>
52 #include <KoDocumentInfo.h>
53 #include <KoDocument.h>
54 #include <KoFilterChain.h>
55 #include <KoPageLayout.h>
56 #include <KoXmlWriter.h>
57
58 K_PLUGIN_FACTORY_WITH_JSON(DocxImportFactory, "calligra_filter_docx2odt.json",
59 registerPlugin<DocxImport>();)
60
61 enum DocxDocumentType {
62 DocxDocument,
63 DocxTemplate
64 };
65
66 class DocxImport::Private
67 {
68 public:
Private()69 Private() : type(DocxDocument), macrosEnabled(false) {
70 }
71
mainDocumentContentType() const72 const char* mainDocumentContentType() const
73 {
74 if (type == DocxTemplate) {
75 return MSOOXML::ContentTypes::wordTemplate;
76 }
77 return MSOOXML::ContentTypes::wordDocument;
78 }
79
80 DocxDocumentType type;
81 bool macrosEnabled;
82 QMap<QString, QVariant> documentSettings;
83 QMap<QString, QString> colorMap;
84 };
85
DocxImport(QObject * parent,const QVariantList &)86 DocxImport::DocxImport(QObject* parent, const QVariantList &)
87 : MSOOXML::MsooXmlImport(QLatin1String("text"), parent), d(new Private)
88 {
89 }
90
~DocxImport()91 DocxImport::~DocxImport()
92 {
93 delete d;
94 }
95
documentSettings() const96 QMap<QString, QVariant> DocxImport::documentSettings() const
97 {
98 return d->documentSettings;
99 }
100
documentSetting(const QString & name) const101 QVariant DocxImport::documentSetting(const QString& name) const
102 {
103 return d->documentSettings.value(name);
104 }
105
acceptsSourceMimeType(const QByteArray & mime) const106 bool DocxImport::acceptsSourceMimeType(const QByteArray& mime) const
107 {
108 debugDocx << "Entering DOCX Import filter: from " << mime;
109 if (mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
110 d->type = DocxDocument;
111 d->macrosEnabled = false;
112 }
113 else if (mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.template") {
114 d->type = DocxTemplate;
115 d->macrosEnabled = false;
116 }
117 else if (mime == "application/vnd.ms-word.document.macroEnabled.12") {
118 d->type = DocxDocument;
119 d->macrosEnabled = true;
120 }
121 else if (mime == "application/vnd.ms-word.template.macroEnabled.12") {
122 d->type = DocxTemplate;
123 d->macrosEnabled = true;
124 }
125 else
126 return false;
127 return true;
128 }
129
acceptsDestinationMimeType(const QByteArray & mime) const130 bool DocxImport::acceptsDestinationMimeType(const QByteArray& mime) const
131 {
132 debugDocx << "Entering DOCX Import filter: to " << mime;
133 return mime == "application/vnd.oasis.opendocument.text";
134 }
135
parseParts(KoOdfWriters * writers,MSOOXML::MsooXmlRelationships * relationships,QString & errorMessage)136 KoFilter::ConversionStatus DocxImport::parseParts(KoOdfWriters *writers, MSOOXML::MsooXmlRelationships *relationships,
137 QString& errorMessage)
138 {
139 writers->body->addAttribute("text:use-soft-page-breaks", "true");
140
141 // 0. parse settings.xml
142 {
143 DocxXmlSettingsReaderContext context(d->documentSettings);
144 DocxXmlSettingsReader settingsReader(writers);
145 d->colorMap = context.colorMap;
146
147 RETURN_IF_ERROR( loadAndParseDocumentIfExists(
148 MSOOXML::ContentTypes::wordSettings, &settingsReader, writers, errorMessage, &context) )
149 }
150
151 reportProgress(5);
152
153 // 1. parse font table
154 {
155 DocxXmlFontTableReaderContext context(*writers->mainStyles);
156 DocxXmlFontTableReader fontTableReader(writers);
157 RETURN_IF_ERROR( loadAndParseDocumentIfExists(
158 MSOOXML::ContentTypes::wordFontTable, &fontTableReader, writers, errorMessage, &context) )
159 }
160
161 QList<QByteArray> partNames = this->partNames(d->mainDocumentContentType());
162 if (partNames.count() != 1) {
163 errorMessage = i18n("Unable to find part for type %1", d->mainDocumentContentType());
164 return KoFilter::WrongFormat;
165 }
166 const QString documentPathAndFile(partNames.first());
167 QString documentPath, documentFile;
168 MSOOXML::Utils::splitPathAndFile(documentPathAndFile, &documentPath, &documentFile);
169
170 // 2. parse theme for the document if a theme exists
171 MSOOXML::DrawingMLTheme themes;
172 const QString docThemePathAndFile(relationships->targetForType(
173 documentPath, documentFile,
174 QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/theme"));
175 if (!docThemePathAndFile.isEmpty()) {
176 debugDocx << QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/theme";
177
178 // prepare the themes-reader
179 QString docThemePath, docThemeFile;
180 MSOOXML::Utils::splitPathAndFile(docThemePathAndFile, &docThemePath, &docThemeFile);
181
182 MSOOXML::MsooXmlThemesReader themesReader(writers);
183 MSOOXML::MsooXmlThemesReaderContext themecontext(themes, relationships, (MSOOXML::MsooXmlImport*)this,
184 docThemePath, docThemeFile);
185
186 KoFilter::ConversionStatus status
187 = loadAndParseDocument(&themesReader, docThemePathAndFile, errorMessage, &themecontext);
188
189 debugDocx << "Reading ThemePathAndFile:" << docThemePathAndFile << "status=" << status;
190 }
191
192 reportProgress(15);
193
194 // Main document context, to which we collect footnotes, endnotes,
195 // comments, numbering, tablestyles
196 DocxXmlDocumentReaderContext mainContext(*this, documentPath, documentFile, *relationships, &themes);
197
198 // 3. parse styles
199 {
200 // get styles path from document's relationships, not from content
201 // types; typically returns /word/styles.xml
202 //
203 // ECMA-376, 11.3.12 Style Definitions Part, p. 65
204 //
205 // An instance of this part type contains the definition for a set of
206 // styles used by this document. A package shall contain at most two
207 // Style Definitions parts. One instance of that part shall be the
208 // target of an implicit relationship from the Main Document (§11.3.10)
209 // part, and the other shall be the target of an implicit relationship
210 // in from the Glossary Document (§11.3.8) part.
211
212 const QString stylesPathAndFile(relationships->targetForType(documentPath, documentFile,
213 QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/styles"));
214 DocxXmlStylesReader stylesReader(writers);
215 if (!stylesPathAndFile.isEmpty()) {
216 QString stylesPath, stylesFile;
217 MSOOXML::Utils::splitPathAndFile(stylesPathAndFile, &stylesPath, &stylesFile);
218 DocxXmlDocumentReaderContext context(*this, stylesPath, stylesFile, *relationships, &themes);
219
220 RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
221 stylesPathAndFile, &stylesReader, writers, errorMessage, &context) )
222
223 mainContext.m_tableStyles = context.m_tableStyles;
224 mainContext.m_namedDefaultStyles = context.m_namedDefaultStyles;
225 mainContext.m_defaultFontSizePt = context.m_defaultFontSizePt;
226 }
227 }
228
229 reportProgress(25);
230
231 // 4. parse numbering
232 const QString numberingPathAndFile(relationships->targetForType(documentPath, documentFile,
233 QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/numbering"));
234 DocxXmlNumberingReader numberingReader(writers);
235 QString numberingPath, numberingFile;
236 MSOOXML::Utils::splitPathAndFile(numberingPathAndFile, &numberingPath, &numberingFile);
237 DocxXmlDocumentReaderContext numberingContext(*this, numberingPath, numberingFile, *relationships, &themes);
238
239 if (!numberingPathAndFile.isEmpty()) {
240 RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
241 numberingPathAndFile, &numberingReader, writers, errorMessage, &numberingContext) )
242 }
243 mainContext.m_bulletStyles = numberingContext.m_bulletStyles;
244 mainContext.m_abstractNumIDs = numberingContext.m_abstractNumIDs;
245
246 reportProgress(30);
247
248 {
249 // 5. parse footnotes
250 const QString footnotePathAndFile(relationships->targetForType(documentPath, documentFile,
251 QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/footnotes"));
252 //! @todo use m_contentTypes.values() when multiple paths are expected, e.g. for ContentTypes::wordHeader
253 DocxXmlFootnoteReader footnoteReader(writers);
254 if (!footnotePathAndFile.isEmpty()) {
255 QString footnotePath, footnoteFile;
256 MSOOXML::Utils::splitPathAndFile(footnotePathAndFile, &footnotePath, &footnoteFile);
257 DocxXmlDocumentReaderContext context(*this, footnotePath, footnoteFile, *relationships, &themes);
258 context.m_tableStyles = mainContext.m_tableStyles;
259 context.m_bulletStyles = mainContext.m_bulletStyles;
260 context.m_namedDefaultStyles = mainContext.m_namedDefaultStyles;
261 context.m_abstractNumIDs = mainContext.m_abstractNumIDs;
262
263 RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
264 footnotePathAndFile, &footnoteReader, writers, errorMessage, &context) )
265 mainContext.m_footnotes = context.m_footnotes;
266 }
267 reportProgress(35);
268
269 // 6. parse comments
270 const QString commentPathAndFile(relationships->targetForType(documentPath, documentFile,
271 QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/comments"));
272 DocxXmlCommentReader commentReader(writers);
273 if (!commentPathAndFile.isEmpty()) {
274 QString commentPath, commentFile;
275 MSOOXML::Utils::splitPathAndFile(commentPathAndFile, &commentPath, &commentFile);
276 DocxXmlDocumentReaderContext context(*this, commentPath, commentFile, *relationships, &themes);
277 context.m_tableStyles = mainContext.m_tableStyles;
278 context.m_bulletStyles = mainContext.m_bulletStyles;
279 //TODO: m_abstractNumIDs and m_namedDefaultStyles might be needed
280
281 RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
282 commentPathAndFile, &commentReader, writers, errorMessage, &context) )
283 mainContext.m_comments = context.m_comments;
284 }
285
286 reportProgress(40);
287
288 // 7. parse endnotes
289 const QString endnotePathAndFile(relationships->targetForType(documentPath, documentFile,
290 QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/endnotes"));
291 DocxXmlEndnoteReader endnoteReader(writers);
292 if (!endnotePathAndFile.isEmpty()) {
293 QString endnotePath, endnoteFile;
294 MSOOXML::Utils::splitPathAndFile(endnotePathAndFile, &endnotePath, &endnoteFile);
295 DocxXmlDocumentReaderContext context(*this, endnotePath, endnoteFile, *relationships, &themes);
296 context.m_tableStyles = mainContext.m_tableStyles;
297 context.m_bulletStyles = mainContext.m_bulletStyles;
298 context.m_namedDefaultStyles = mainContext.m_namedDefaultStyles;
299 context.m_abstractNumIDs = mainContext.m_abstractNumIDs;
300
301 RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
302 endnotePathAndFile, &endnoteReader, writers, errorMessage, &context) )
303 mainContext.m_endnotes = context.m_endnotes;
304 }
305 reportProgress(45);
306
307 // 8. parse document
308 // Some of the templates MIGHT be defined in numberingreader.
309 DocxXmlDocumentReader documentReader(writers);
310 documentReader.m_definedShapeTypes = numberingReader.m_definedShapeTypes;
311 RETURN_IF_ERROR( loadAndParseDocument(
312 d->mainDocumentContentType(), &documentReader, writers, errorMessage, &mainContext) )
313 }
314 reportProgress(100);
315
316 return KoFilter::OK;
317 }
318
writeConfigurationSettings(KoXmlWriter * settings) const319 void DocxImport::writeConfigurationSettings(KoXmlWriter* settings) const
320 {
321 MsooXmlImport::writeConfigurationSettings(settings);
322
323 // This config item is used in KoTextLayoutArea::handleBordersAndSpacing
324 // during layouting. The defined 'Above paragraph' and 'Below paragraph'
325 // paragraph spacing (which is written in the ODF as fo:margin-top for the
326 // KoParagraphStyle) are not applied to the first and the last paragraph if
327 // this value is true.
328 settings->startElement("config:config-item");
329 settings->addAttribute("config:name", "AddParaTableSpacingAtStart");
330 settings->addAttribute("config:type", "boolean");
331 settings->addTextSpan("true");
332 settings->endElement();
333
334 // OOo requires this config item to display files produced by this filter
335 // correctly. If true, then the fo:text-indent attribute will be ignored.
336 settings->startElement("config:config-item");
337 settings->addAttribute("config:name", "IgnoreFirstLineIndentInNumbering");
338 settings->addAttribute("config:type", "boolean");
339 settings->addTextSpan("false");
340 settings->endElement();
341 }
342
343 #include "DocxImport.moc"
344