1 /* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
2 
3 /* libmwaw
4 * Version: MPL 2.0 / LGPLv2+
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 2.0 (the "License"); you may not use this file except in compliance with
8 * the License or as specified alternatively below. You may obtain a copy of
9 * the License at http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Major Contributor(s):
17 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
18 * Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
19 * Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
20 * Copyright (C) 2006, 2007 Andrew Ziem
21 * Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
22 *
23 *
24 * All Rights Reserved.
25 *
26 * For minor contributions see the git repository.
27 *
28 * Alternatively, the contents of this file may be used under the terms of
29 * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
30 * in which case the provisions of the LGPLv2+ are applicable
31 * instead of those above.
32 */
33 
34 /*
35  * Parser to Microsoft Word text document
36  *
37  */
38 #ifndef MS_WRD_MWAW_TEXT
39 #  define MS_WRD_MWAW_TEXT
40 
41 #include <map>
42 #include <string>
43 #include <vector>
44 
45 #include "libmwaw_internal.hxx"
46 
47 #include "MWAWEntry.hxx"
48 
49 #include "MWAWDebug.hxx"
50 
51 #include "MsWrdTextStyles.hxx"
52 
53 namespace MsWrdTextInternal
54 {
55 struct State;
56 struct Table;
57 }
58 
59 struct MsWrdEntry;
60 class MsWrdParser;
61 class MsWrdTextStyles;
62 
63 /** \brief the main class to read the text part of Microsoft Word file */
64 class MsWrdText
65 {
66   friend class MsWrdParser;
67   friend class MsWrdTextStyles;
68 public:
69   //! Internal: the plc
70   struct PLC {
71     enum Type { TextPosition, HeaderFooter, Page, Section, ParagraphInfo, Paragraph, Font, Footnote, FootnoteDef, Field, Object };
PLCMsWrdText::PLC72     PLC(Type type, int id=0)
73       : m_type(type)
74       , m_id(id)
75       , m_extra("")
76     {
77     }
78     //! operator<<
79     friend std::ostream &operator<<(std::ostream &o, PLC const &plc);
80     //! a comparaison structure
81     struct ltstr {
operator ()MsWrdText::PLC::ltstr82       bool operator()(PLC const &s1, PLC const &s2) const
83       {
84         if (s1.m_type != s2.m_type)
85           return int(s1.m_type) < int(s2.m_type);
86         if (s1.m_id != s2.m_id)
87           return s1.m_id < s2.m_id;
88         return false;
89       }
90     };
91     //! the plc type
92     Type m_type;
93     //! the identificator
94     int m_id;
95     //! some extra data
96     std::string m_extra;
97   };
98 public:
99   //! constructor
100   explicit MsWrdText(MsWrdParser &parser);
101   //! destructor
102   virtual ~MsWrdText();
103 
104   /** returns the file version */
105   int version() const;
106 
107   /** returns the number of pages */
108   int numPages() const;
109 
110   /** returns the header entry */
111   MWAWEntry getHeader() const;
112 
113   /** returns the footer entry */
114   MWAWEntry getFooter() const;
115 protected:
116   //! returns the parser state
getParserState()117   std::shared_ptr<MWAWParserState> &getParserState()
118   {
119     return m_parserState;
120   }
121 
122   //! send a main zone
123   bool sendMainText();
124 
125   //! send a text zone
126   bool sendText(MWAWEntry const &textEntry, bool mainZone, bool tableCell=false);
127   //! try to open a section
128   bool sendSection(int sectionId);
129   //! reads the three different zone size
130   bool readHeaderTextLength();
131 
132   //! finds the different zones
133   bool createZones(long bot);
134 
135   //! read the text structure(some paragraph style+some text position?)
136   bool readTextStruct(MsWrdEntry &entry);
137 
138   //! read the page limit ?
139   bool readPageBreak(MsWrdEntry &entry);
140 
141   //! read the paragraph height info
142   bool readParagraphInfo(MsWrdEntry &entry);
143 
144   //! read the field data
145   bool readFields(MsWrdEntry &entry, std::vector<long> const &fieldPos);
146 
147   //! send a field note to a listener
148   bool sendFieldComment(int id);
149 
150   //! read the footnote pos in text + val
151   bool readFootnotesPos(MsWrdEntry &entry, std::vector<long> const &noteDef);
152 
153   //! read the footnote data
154   bool readFootnotesData(MsWrdEntry &entry);
155 
156   //! send a note to a listener
157   bool sendFootnote(int id);
158 
159   //! read the font names
160   bool readFontNames(MsWrdEntry &entry);
161 
162   //! sends the data which have not yet been sent to the listener
163   void flushExtra();
164 
165   //! try to send a table.
166   bool sendTable(MsWrdTextInternal::Table const &table);
167 
168   // interface with MsWrdTextStyles
169 
170   //! returns the main text length
171   long getMainTextLength() const;
172   //! returns the text correspondance zone ( textpos, plc )
173   std::multimap<long, MsWrdText::PLC> &getTextPLCMap();
174   //! returns the file correspondance zone ( filepos, plc )
175   std::multimap<long, MsWrdText::PLC> &getFilePLCMap();
176 
177   //
178   // low level
179   //
180 
181   //! prepare the data to be send
182   void prepareData();
183 
184   //! cut the text in line/cell pos
185   void prepareLines();
186   //! convert the file position in character position and compute the paragraph limit
187   void convertFilePLCPos();
188   //! retrieve the paragraph properties
189   void prepareParagraphProperties();
190   //! retrieve the font properties
191   void prepareFontProperties();
192 
193   //! find the table end position knowing the end cell/pos delimiter
194   void prepareTableLimits();
195   //! try to find a table which begin at position cPos, if so, update its data...
196   bool updateTableBeginnningAt(long cPos, long &nextCPos);
197 
198   //! read a zone which consists in a list of int
199   bool readLongZone(MsWrdEntry &entry, int sz, std::vector<long> &list);
200 
201 private:
202   MsWrdText(MsWrdText const &orig) = delete;
203   MsWrdText &operator=(MsWrdText const &orig) = delete;
204 
205 protected:
206   //
207   // data
208   //
209   //! the parser state
210   MWAWParserStatePtr m_parserState;
211 
212   //! the state
213   std::shared_ptr<MsWrdTextInternal::State> m_state;
214 
215   //! the style manager
216   std::shared_ptr<MsWrdTextStyles> m_stylesManager;
217 
218   //! the main parser;
219   MsWrdParser *m_mainParser;
220 };
221 #endif
222 // vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab:
223