1 /*
2  * This file is part of Office 2007 Filters for Calligra
3  * Copyright (C) 2002 Laurent Montel <lmontel@mandrakesoft.com>
4  * Copyright (C) 2003 David Faure <faure@kde.org>
5  * Copyright (C) 2002, 2003, 2004 Nicolas GOUTTE <goutte@kde.org>
6  * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
7  *
8  * Contact: Suresh Chande suresh.chande@nokia.com
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * version 2.1 as published by the Free Software Foundation.
13  *
14  * This library is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22  * 02110-1301 USA
23  *
24  */
25 
26 #include "DocxImport.h"
27 
28 #include <MsooXmlUtils.h>
29 #include <MsooXmlSchemas.h>
30 #include <MsooXmlContentTypes.h>
31 #include <MsooXmlRelationships.h>
32 #include "DocxXmlDocumentReader.h"
33 #include "DocxXmlStylesReader.h"
34 #include "DocxXmlNumberingReader.h"
35 #include "DocxXmlFootnoteReader.h"
36 #include "DocxXmlCommentsReader.h"
37 #include "DocxXmlEndnoteReader.h"
38 #include "DocxXmlFontTableReader.h"
39 #include "DocxXmlSettingsReader.h"
40 #include "DocxDebug.h"
41 
42 #include <QColor>
43 #include <QFile>
44 #include <QFont>
45 #include <QPen>
46 #include <QRegExp>
47 #include <QImage>
48 
49 #include <kpluginfactory.h>
50 
51 #include <KoEmbeddedDocumentSaver.h>
52 #include <KoDocumentInfo.h>
53 #include <KoDocument.h>
54 #include <KoFilterChain.h>
55 #include <KoPageLayout.h>
56 #include <KoXmlWriter.h>
57 
58 K_PLUGIN_FACTORY_WITH_JSON(DocxImportFactory, "calligra_filter_docx2odt.json",
59                            registerPlugin<DocxImport>();)
60 
61 enum DocxDocumentType {
62     DocxDocument,
63     DocxTemplate
64 };
65 
66 class DocxImport::Private
67 {
68 public:
Private()69     Private() : type(DocxDocument), macrosEnabled(false) {
70     }
71 
mainDocumentContentType() const72     const char* mainDocumentContentType() const
73     {
74         if (type == DocxTemplate) {
75             return MSOOXML::ContentTypes::wordTemplate;
76         }
77         return MSOOXML::ContentTypes::wordDocument;
78     }
79 
80     DocxDocumentType type;
81     bool macrosEnabled;
82     QMap<QString, QVariant> documentSettings;
83     QMap<QString, QString> colorMap;
84 };
85 
DocxImport(QObject * parent,const QVariantList &)86 DocxImport::DocxImport(QObject* parent, const QVariantList &)
87         : MSOOXML::MsooXmlImport(QLatin1String("text"), parent), d(new Private)
88 {
89 }
90 
~DocxImport()91 DocxImport::~DocxImport()
92 {
93     delete d;
94 }
95 
documentSettings() const96 QMap<QString, QVariant> DocxImport::documentSettings() const
97 {
98     return d->documentSettings;
99 }
100 
documentSetting(const QString & name) const101 QVariant DocxImport::documentSetting(const QString& name) const
102 {
103     return d->documentSettings.value(name);
104 }
105 
acceptsSourceMimeType(const QByteArray & mime) const106 bool DocxImport::acceptsSourceMimeType(const QByteArray& mime) const
107 {
108     debugDocx << "Entering DOCX Import filter: from " << mime;
109     if (mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document") {
110         d->type = DocxDocument;
111         d->macrosEnabled = false;
112     }
113     else if (mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.template") {
114         d->type = DocxTemplate;
115         d->macrosEnabled = false;
116     }
117     else if (mime == "application/vnd.ms-word.document.macroEnabled.12") {
118         d->type = DocxDocument;
119         d->macrosEnabled = true;
120     }
121     else if (mime == "application/vnd.ms-word.template.macroEnabled.12") {
122         d->type = DocxTemplate;
123         d->macrosEnabled = true;
124     }
125     else
126         return false;
127     return true;
128 }
129 
acceptsDestinationMimeType(const QByteArray & mime) const130 bool DocxImport::acceptsDestinationMimeType(const QByteArray& mime) const
131 {
132     debugDocx << "Entering DOCX Import filter: to " << mime;
133     return mime == "application/vnd.oasis.opendocument.text";
134 }
135 
parseParts(KoOdfWriters * writers,MSOOXML::MsooXmlRelationships * relationships,QString & errorMessage)136 KoFilter::ConversionStatus DocxImport::parseParts(KoOdfWriters *writers, MSOOXML::MsooXmlRelationships *relationships,
137         QString& errorMessage)
138 {
139     writers->body->addAttribute("text:use-soft-page-breaks", "true");
140 
141     // 0. parse settings.xml
142     {
143         DocxXmlSettingsReaderContext context(d->documentSettings);
144         DocxXmlSettingsReader settingsReader(writers);
145         d->colorMap = context.colorMap;
146 
147         RETURN_IF_ERROR( loadAndParseDocumentIfExists(
148             MSOOXML::ContentTypes::wordSettings, &settingsReader, writers, errorMessage, &context) )
149     }
150 
151     reportProgress(5);
152 
153     // 1. parse font table
154     {
155         DocxXmlFontTableReaderContext context(*writers->mainStyles);
156         DocxXmlFontTableReader fontTableReader(writers);
157         RETURN_IF_ERROR( loadAndParseDocumentIfExists(
158             MSOOXML::ContentTypes::wordFontTable, &fontTableReader, writers, errorMessage, &context) )
159     }
160 
161     QList<QByteArray> partNames = this->partNames(d->mainDocumentContentType());
162     if (partNames.count() != 1) {
163         errorMessage = i18n("Unable to find part for type %1", d->mainDocumentContentType());
164         return KoFilter::WrongFormat;
165     }
166     const QString documentPathAndFile(partNames.first());
167     QString documentPath, documentFile;
168     MSOOXML::Utils::splitPathAndFile(documentPathAndFile, &documentPath, &documentFile);
169 
170     // 2. parse theme for the document if a theme exists
171     MSOOXML::DrawingMLTheme themes;
172     const QString docThemePathAndFile(relationships->targetForType(
173         documentPath, documentFile,
174         QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/theme"));
175     if (!docThemePathAndFile.isEmpty()) {
176         debugDocx << QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/theme";
177 
178         // prepare the themes-reader
179         QString docThemePath, docThemeFile;
180         MSOOXML::Utils::splitPathAndFile(docThemePathAndFile, &docThemePath, &docThemeFile);
181 
182         MSOOXML::MsooXmlThemesReader themesReader(writers);
183         MSOOXML::MsooXmlThemesReaderContext themecontext(themes, relationships, (MSOOXML::MsooXmlImport*)this,
184             docThemePath, docThemeFile);
185 
186         KoFilter::ConversionStatus status
187             = loadAndParseDocument(&themesReader, docThemePathAndFile, errorMessage, &themecontext);
188 
189         debugDocx << "Reading ThemePathAndFile:" << docThemePathAndFile << "status=" << status;
190     }
191 
192     reportProgress(15);
193 
194     // Main document context, to which we collect footnotes, endnotes,
195     // comments, numbering, tablestyles
196     DocxXmlDocumentReaderContext mainContext(*this, documentPath, documentFile, *relationships, &themes);
197 
198     // 3. parse styles
199     {
200         // get styles path from document's relationships, not from content
201         // types; typically returns /word/styles.xml
202         //
203         // ECMA-376, 11.3.12 Style Definitions Part, p. 65
204         //
205         // An instance of this part type contains the definition for a set of
206         // styles used by this document.  A package shall contain at most two
207         // Style Definitions parts.  One instance of that part shall be the
208         // target of an implicit relationship from the Main Document (§11.3.10)
209         // part, and the other shall be the target of an implicit relationship
210         // in from the Glossary Document (§11.3.8) part.
211 
212         const QString stylesPathAndFile(relationships->targetForType(documentPath, documentFile,
213             QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/styles"));
214         DocxXmlStylesReader stylesReader(writers);
215         if (!stylesPathAndFile.isEmpty()) {
216             QString stylesPath, stylesFile;
217             MSOOXML::Utils::splitPathAndFile(stylesPathAndFile, &stylesPath, &stylesFile);
218             DocxXmlDocumentReaderContext context(*this, stylesPath, stylesFile, *relationships, &themes);
219 
220             RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
221                 stylesPathAndFile, &stylesReader, writers, errorMessage, &context) )
222 
223             mainContext.m_tableStyles = context.m_tableStyles;
224             mainContext.m_namedDefaultStyles = context.m_namedDefaultStyles;
225             mainContext.m_defaultFontSizePt = context.m_defaultFontSizePt;
226         }
227     }
228 
229     reportProgress(25);
230 
231     // 4. parse numbering
232     const QString numberingPathAndFile(relationships->targetForType(documentPath, documentFile,
233         QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/numbering"));
234     DocxXmlNumberingReader numberingReader(writers);
235     QString numberingPath, numberingFile;
236     MSOOXML::Utils::splitPathAndFile(numberingPathAndFile, &numberingPath, &numberingFile);
237     DocxXmlDocumentReaderContext numberingContext(*this, numberingPath, numberingFile, *relationships, &themes);
238 
239     if (!numberingPathAndFile.isEmpty()) {
240         RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
241             numberingPathAndFile, &numberingReader, writers, errorMessage, &numberingContext) )
242     }
243     mainContext.m_bulletStyles = numberingContext.m_bulletStyles;
244     mainContext.m_abstractNumIDs = numberingContext.m_abstractNumIDs;
245 
246     reportProgress(30);
247 
248     {
249 	// 5. parse footnotes
250         const QString footnotePathAndFile(relationships->targetForType(documentPath, documentFile,
251             QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/footnotes"));
252         //! @todo use m_contentTypes.values() when multiple paths are expected, e.g. for ContentTypes::wordHeader
253         DocxXmlFootnoteReader footnoteReader(writers);
254         if (!footnotePathAndFile.isEmpty()) {
255             QString footnotePath, footnoteFile;
256             MSOOXML::Utils::splitPathAndFile(footnotePathAndFile, &footnotePath, &footnoteFile);
257             DocxXmlDocumentReaderContext context(*this, footnotePath, footnoteFile, *relationships, &themes);
258             context.m_tableStyles = mainContext.m_tableStyles;
259             context.m_bulletStyles = mainContext.m_bulletStyles;
260             context.m_namedDefaultStyles = mainContext.m_namedDefaultStyles;
261             context.m_abstractNumIDs = mainContext.m_abstractNumIDs;
262 
263             RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
264                 footnotePathAndFile, &footnoteReader, writers, errorMessage, &context) )
265             mainContext.m_footnotes = context.m_footnotes;
266         }
267         reportProgress(35);
268 
269         // 6. parse comments
270         const QString commentPathAndFile(relationships->targetForType(documentPath, documentFile,
271            QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/comments"));
272         DocxXmlCommentReader commentReader(writers);
273         if (!commentPathAndFile.isEmpty()) {
274             QString commentPath, commentFile;
275             MSOOXML::Utils::splitPathAndFile(commentPathAndFile, &commentPath, &commentFile);
276             DocxXmlDocumentReaderContext context(*this, commentPath, commentFile, *relationships, &themes);
277             context.m_tableStyles = mainContext.m_tableStyles;
278             context.m_bulletStyles = mainContext.m_bulletStyles;
279             //TODO: m_abstractNumIDs and m_namedDefaultStyles might be needed
280 
281             RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
282                 commentPathAndFile, &commentReader, writers, errorMessage, &context) )
283             mainContext.m_comments = context.m_comments;
284         }
285 
286         reportProgress(40);
287 
288         // 7. parse endnotes
289         const QString endnotePathAndFile(relationships->targetForType(documentPath, documentFile,
290             QLatin1String(MSOOXML::Schemas::officeDocument::relationships) + "/endnotes"));
291         DocxXmlEndnoteReader endnoteReader(writers);
292         if (!endnotePathAndFile.isEmpty()) {
293             QString endnotePath, endnoteFile;
294             MSOOXML::Utils::splitPathAndFile(endnotePathAndFile, &endnotePath, &endnoteFile);
295             DocxXmlDocumentReaderContext context(*this, endnotePath, endnoteFile, *relationships, &themes);
296             context.m_tableStyles = mainContext.m_tableStyles;
297             context.m_bulletStyles = mainContext.m_bulletStyles;
298             context.m_namedDefaultStyles = mainContext.m_namedDefaultStyles;
299             context.m_abstractNumIDs = mainContext.m_abstractNumIDs;
300 
301             RETURN_IF_ERROR( loadAndParseDocumentFromFileIfExists(
302                 endnotePathAndFile, &endnoteReader, writers, errorMessage, &context) )
303             mainContext.m_endnotes = context.m_endnotes;
304         }
305         reportProgress(45);
306 
307         // 8. parse document
308         // Some of the templates MIGHT be defined in numberingreader.
309         DocxXmlDocumentReader documentReader(writers);
310         documentReader.m_definedShapeTypes = numberingReader.m_definedShapeTypes;
311         RETURN_IF_ERROR( loadAndParseDocument(
312             d->mainDocumentContentType(), &documentReader, writers, errorMessage, &mainContext) )
313     }
314     reportProgress(100);
315 
316     return KoFilter::OK;
317 }
318 
writeConfigurationSettings(KoXmlWriter * settings) const319 void DocxImport::writeConfigurationSettings(KoXmlWriter* settings) const
320 {
321     MsooXmlImport::writeConfigurationSettings(settings);
322 
323     // This config item is used in KoTextLayoutArea::handleBordersAndSpacing
324     // during layouting.  The defined 'Above paragraph' and 'Below paragraph'
325     // paragraph spacing (which is written in the ODF as fo:margin-top for the
326     // KoParagraphStyle) are not applied to the first and the last paragraph if
327     // this value is true.
328     settings->startElement("config:config-item");
329     settings->addAttribute("config:name", "AddParaTableSpacingAtStart");
330     settings->addAttribute("config:type", "boolean");
331     settings->addTextSpan("true");
332     settings->endElement();
333 
334     // OOo requires this config item to display files produced by this filter
335     // correctly.  If true, then the fo:text-indent attribute will be ignored.
336     settings->startElement("config:config-item");
337     settings->addAttribute("config:name", "IgnoreFirstLineIndentInNumbering");
338     settings->addAttribute("config:type", "boolean");
339     settings->addTextSpan("false");
340     settings->endElement();
341 }
342 
343 #include "DocxImport.moc"
344