1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */ 2 3 /* libmwaw 4 * Version: MPL 2.0 / LGPLv2+ 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 7 * 2.0 (the "License"); you may not use this file except in compliance with 8 * the License or as specified alternatively below. You may obtain a copy of 9 * the License at http://www.mozilla.org/MPL/ 10 * 11 * Software distributed under the License is distributed on an "AS IS" basis, 12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 13 * for the specific language governing rights and limitations under the 14 * License. 15 * 16 * Major Contributor(s): 17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com) 18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net) 19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch) 20 * Copyright (C) 2006, 2007 Andrew Ziem 21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr) 22 * 23 * 24 * All Rights Reserved. 25 * 26 * For minor contributions see the git repository. 27 * 28 * Alternatively, the contents of this file may be used under the terms of 29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"), 30 * in which case the provisions of the LGPLv2+ are applicable 31 * instead of those above. 32 */ 33 34 /* 35 * parser for Microsoft Word ( version 3.0-5.1 ) 36 */ 37 #ifndef MS_WRD_MWAW_PARSER 38 # define MS_WRD_MWAW_PARSER 39 40 #include <list> 41 #include <map> 42 #include <string> 43 #include <vector> 44 45 #include "MWAWDebug.hxx" 46 #include "MWAWEntry.hxx" 47 #include "MWAWInputStream.hxx" 48 #include "MWAWPosition.hxx" 49 50 #include "MWAWParser.hxx" 51 52 namespace MsWrdParserInternal 53 { 54 struct Object; 55 struct State; 56 class SubDocument; 57 } 58 59 class MsWrdText; 60 class MsWrdTextStyles; 61 62 //! the entry of MsWrdParser 63 struct MsWrdEntry final : public MWAWEntry { 64 //! constructor MsWrdEntryMsWrdEntry65 MsWrdEntry() 66 : MWAWEntry() 67 , m_pictType(-1) 68 { 69 } 70 MsWrdEntry(MsWrdEntry const &)=default; 71 //! destructor 72 ~MsWrdEntry() final; 73 /** \brief returns the text id 74 * 75 * This field is used to differentiate main text, header, ...) 76 */ pictTypeMsWrdEntry77 int pictType() const 78 { 79 return m_pictType; 80 } 81 //! sets the picture id setPictTypeMsWrdEntry82 void setPictType(int newId) 83 { 84 m_pictType = newId; 85 } 86 //! operator<< 87 friend std::ostream &operator<<(std::ostream &o, MsWrdEntry const &entry); 88 //! the picture identificator 89 int m_pictType; 90 }; 91 92 /** \brief the main class to read a Microsoft Word file 93 * 94 * 95 * 96 */ 97 class MsWrdParser final : public MWAWTextParser 98 { 99 friend class MsWrdText; 100 friend class MsWrdTextStyles; 101 friend class MsWrdParserInternal::SubDocument; 102 103 public: 104 //! constructor 105 MsWrdParser(MWAWInputStreamPtr const &input, MWAWRSRCParserPtr const &rsrcParser, MWAWHeader *header); 106 //! destructor 107 ~MsWrdParser() final; 108 109 //! checks if the document header is correct (or not) 110 bool checkHeader(MWAWHeader *header, bool strict=false) final; 111 112 //! the main parse function 113 void parse(librevenge::RVNGTextInterface *documentInterface) final; 114 115 protected: 116 //! inits all internal variables 117 void init(); 118 119 //! creates the listener which will be associated to the document 120 void createDocument(librevenge::RVNGTextInterface *documentInterface); 121 122 //! finds the different zones 123 bool createZones(); 124 125 //! finish reading the header (v3) 126 bool readHeaderEndV3(); 127 128 //! read the list of zones 129 bool readZoneList(); 130 131 //! read the print info zone 132 bool readPrintInfo(MsWrdEntry &entry); 133 134 //! read the printer name 135 bool readPrinter(MsWrdEntry &entry); 136 137 //! read the document sumary 138 bool readDocSum(MsWrdEntry &entry); 139 140 //! read a zone which consists in a list of string 141 bool readStringsZone(MsWrdEntry &entry, std::vector<std::string> &list); 142 143 //! read the objects 144 bool readObjects(); 145 146 //! read the object list 147 bool readObjectList(MsWrdEntry &entry); 148 149 //! read the object flags 150 bool readObjectFlags(MsWrdEntry &entry); 151 152 //! read an object 153 bool readObject(MsWrdParserInternal::Object &obj); 154 155 //! read the page dimensions + ? 156 bool readDocumentInfo(MsWrdEntry &entry); 157 158 //! read the zone 17( some bdbox + text position ?) 159 bool readZone17(MsWrdEntry &entry); 160 161 //! check if a position corresponds or not to a picture entry 162 bool checkPicturePos(long pos, int type); 163 164 //! read a picture data 165 bool readPicture(MsWrdEntry &entry); 166 //! send a picture 167 void sendPicture(long fPos, int cPos, MWAWPosition::AnchorTo anchor=MWAWPosition::Char); 168 169 //! returns the color corresponding to an id 170 bool getColor(int id, MWAWColor &col) const; 171 172 //! adds a new page 173 void newPage(int number); 174 175 /* 176 * interface with subdocument 177 */ 178 //! try to send a footnote id 179 void sendFootnote(int id); 180 181 //! try to send a bookmark field id 182 void sendFieldComment(int id); 183 184 //! try to send a footnote, a field to the textParser 185 void send(int id, libmwaw::SubDocumentType type); 186 //! try to send a text to the textParser 187 void send(MWAWEntry const &entry); 188 //! try to send a simple text zone(ie. a comment) 189 void sendSimpleTextZone(MWAWListenerPtr &listener, MWAWEntry const &entry); 190 191 // 192 // low level 193 // 194 195 //! read a file entry 196 MsWrdEntry readEntry(std::string type, int id=-1); 197 198 protected: 199 // 200 // data 201 // 202 //! the state 203 std::shared_ptr<MsWrdParserInternal::State> m_state; 204 205 //! the list of entries 206 std::multimap<std::string, MsWrdEntry> m_entryMap; 207 208 //! the text parser 209 std::shared_ptr<MsWrdText> m_textParser; 210 }; 211 #endif 212 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab: 213