1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */ 2 3 /* libmwaw 4 * Version: MPL 2.0 / LGPLv2+ 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 7 * 2.0 (the "License"); you may not use this file except in compliance with 8 * the License or as specified alternatively below. You may obtain a copy of 9 * the License at http://www.mozilla.org/MPL/ 10 * 11 * Software distributed under the License is distributed on an "AS IS" basis, 12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 13 * for the specific language governing rights and limitations under the 14 * License. 15 * 16 * Major Contributor(s): 17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com) 18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net) 19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch) 20 * Copyright (C) 2006, 2007 Andrew Ziem 21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr) 22 * 23 * 24 * All Rights Reserved. 25 * 26 * For minor contributions see the git repository. 27 * 28 * Alternatively, the contents of this file may be used under the terms of 29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"), 30 * in which case the provisions of the LGPLv2+ are applicable 31 * instead of those above. 32 */ 33 34 /* 35 * Parser to Microsoft Word text document 36 * 37 */ 38 #ifndef MS_WRD_MWAW_TEXT 39 # define MS_WRD_MWAW_TEXT 40 41 #include <map> 42 #include <string> 43 #include <vector> 44 45 #include "libmwaw_internal.hxx" 46 47 #include "MWAWEntry.hxx" 48 49 #include "MWAWDebug.hxx" 50 51 #include "MsWrdTextStyles.hxx" 52 53 namespace MsWrdTextInternal 54 { 55 struct State; 56 struct Table; 57 } 58 59 struct MsWrdEntry; 60 class MsWrdParser; 61 class MsWrdTextStyles; 62 63 /** \brief the main class to read the text part of Microsoft Word file */ 64 class MsWrdText 65 { 66 friend class MsWrdParser; 67 friend class MsWrdTextStyles; 68 public: 69 //! Internal: the plc 70 struct PLC { 71 enum Type { TextPosition, HeaderFooter, Page, Section, ParagraphInfo, Paragraph, Font, Footnote, FootnoteDef, Field, Object }; PLCMsWrdText::PLC72 PLC(Type type, int id=0) 73 : m_type(type) 74 , m_id(id) 75 , m_extra("") 76 { 77 } 78 //! operator<< 79 friend std::ostream &operator<<(std::ostream &o, PLC const &plc); 80 //! a comparaison structure 81 struct ltstr { operator ()MsWrdText::PLC::ltstr82 bool operator()(PLC const &s1, PLC const &s2) const 83 { 84 if (s1.m_type != s2.m_type) 85 return int(s1.m_type) < int(s2.m_type); 86 if (s1.m_id != s2.m_id) 87 return s1.m_id < s2.m_id; 88 return false; 89 } 90 }; 91 //! the plc type 92 Type m_type; 93 //! the identificator 94 int m_id; 95 //! some extra data 96 std::string m_extra; 97 }; 98 public: 99 //! constructor 100 explicit MsWrdText(MsWrdParser &parser); 101 //! destructor 102 virtual ~MsWrdText(); 103 104 /** returns the file version */ 105 int version() const; 106 107 /** returns the number of pages */ 108 int numPages() const; 109 110 /** returns the header entry */ 111 MWAWEntry getHeader() const; 112 113 /** returns the footer entry */ 114 MWAWEntry getFooter() const; 115 protected: 116 //! returns the parser state getParserState()117 std::shared_ptr<MWAWParserState> &getParserState() 118 { 119 return m_parserState; 120 } 121 122 //! send a main zone 123 bool sendMainText(); 124 125 //! send a text zone 126 bool sendText(MWAWEntry const &textEntry, bool mainZone, bool tableCell=false); 127 //! try to open a section 128 bool sendSection(int sectionId); 129 //! reads the three different zone size 130 bool readHeaderTextLength(); 131 132 //! finds the different zones 133 bool createZones(long bot); 134 135 //! read the text structure(some paragraph style+some text position?) 136 bool readTextStruct(MsWrdEntry &entry); 137 138 //! read the page limit ? 139 bool readPageBreak(MsWrdEntry &entry); 140 141 //! read the paragraph height info 142 bool readParagraphInfo(MsWrdEntry &entry); 143 144 //! read the field data 145 bool readFields(MsWrdEntry &entry, std::vector<long> const &fieldPos); 146 147 //! send a field note to a listener 148 bool sendFieldComment(int id); 149 150 //! read the footnote pos in text + val 151 bool readFootnotesPos(MsWrdEntry &entry, std::vector<long> const ¬eDef); 152 153 //! read the footnote data 154 bool readFootnotesData(MsWrdEntry &entry); 155 156 //! send a note to a listener 157 bool sendFootnote(int id); 158 159 //! read the font names 160 bool readFontNames(MsWrdEntry &entry); 161 162 //! sends the data which have not yet been sent to the listener 163 void flushExtra(); 164 165 //! try to send a table. 166 bool sendTable(MsWrdTextInternal::Table const &table); 167 168 // interface with MsWrdTextStyles 169 170 //! returns the main text length 171 long getMainTextLength() const; 172 //! returns the text correspondance zone ( textpos, plc ) 173 std::multimap<long, MsWrdText::PLC> &getTextPLCMap(); 174 //! returns the file correspondance zone ( filepos, plc ) 175 std::multimap<long, MsWrdText::PLC> &getFilePLCMap(); 176 177 // 178 // low level 179 // 180 181 //! prepare the data to be send 182 void prepareData(); 183 184 //! cut the text in line/cell pos 185 void prepareLines(); 186 //! convert the file position in character position and compute the paragraph limit 187 void convertFilePLCPos(); 188 //! retrieve the paragraph properties 189 void prepareParagraphProperties(); 190 //! retrieve the font properties 191 void prepareFontProperties(); 192 193 //! find the table end position knowing the end cell/pos delimiter 194 void prepareTableLimits(); 195 //! try to find a table which begin at position cPos, if so, update its data... 196 bool updateTableBeginnningAt(long cPos, long &nextCPos); 197 198 //! read a zone which consists in a list of int 199 bool readLongZone(MsWrdEntry &entry, int sz, std::vector<long> &list); 200 201 private: 202 MsWrdText(MsWrdText const &orig) = delete; 203 MsWrdText &operator=(MsWrdText const &orig) = delete; 204 205 protected: 206 // 207 // data 208 // 209 //! the parser state 210 MWAWParserStatePtr m_parserState; 211 212 //! the state 213 std::shared_ptr<MsWrdTextInternal::State> m_state; 214 215 //! the style manager 216 std::shared_ptr<MsWrdTextStyles> m_stylesManager; 217 218 //! the main parser; 219 MsWrdParser *m_mainParser; 220 }; 221 #endif 222 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab: 223