1 // -*- C++ -*-
2 /**
3  * \file Paragraph.h
4  * This file is part of LyX, the document processor.
5  * Licence details can be found in the file COPYING.
6  *
7  * \author Asger Alstrup
8  * \author Lars Gullik Bjønnes
9  * \author John Levon
10  * \author André Pönitz
11  * \author Jürgen Vigna
12  *
13  * Full author contact details are available in file CREDITS.
14  */
15 
16 #ifndef PARAGRAPH_H
17 #define PARAGRAPH_H
18 
19 #include "FontEnums.h"
20 #include "LayoutEnums.h"
21 #include "SpellChecker.h"
22 
23 #include "support/strfwd.h"
24 #include "support/types.h"
25 
26 #include <set>
27 
28 namespace lyx {
29 
30 class AuthorList;
31 class Buffer;
32 class BufferParams;
33 class Change;
34 class Counters;
35 class Cursor;
36 class CursorSlice;
37 class DocIterator;
38 class docstring_list;
39 class DocumentClass;
40 class Inset;
41 class InsetBibitem;
42 class LaTeXFeatures;
43 class InsetList;
44 class Language;
45 class Layout;
46 class Font;
47 class MetricsInfo;
48 class OutputParams;
49 class PainterInfo;
50 class ParagraphParameters;
51 class TocBackend;
52 class WordLangTuple;
53 class XHTMLStream;
54 class otexstream;
55 
56 class FontSpan {
57 public:
58 	/// Invalid font span containing no character
FontSpan()59 	FontSpan() : first(0), last(-1) {}
60 	/// Span including first and last
FontSpan(pos_type f,pos_type l)61 	FontSpan(pos_type f, pos_type l) : first(f), last(l) {}
62 
63 public:
64 	/// Range including first and last.
65 	pos_type first, last;
66 
67 	inline bool operator<(FontSpan const & s) const
68 	{
69 		return first < s.first;
70 	}
71 
72 	inline bool operator==(FontSpan const & s) const
73 	{
74 		return first == s.first && last == s.last;
75 	}
76 
contains(pos_type p)77 	inline bool contains(pos_type p) const
78 	{
79 		return first <= p && p <= last;
80 	}
81 
size()82 	inline size_t size() const
83 	{
84 		return empty() ? 0 : last - first;
85 	}
86 
87 
intersect(FontSpan const & f)88 	inline FontSpan intersect(FontSpan const & f) const
89 	{
90 		FontSpan result = FontSpan();
91 		if (contains(f.first))
92 			result.first = f.first;
93 		else if (f.contains(first))
94 			result.first = first;
95 		else
96 			return result;
97 		if (contains(f.last))
98 			result.last = f.last;
99 		else if (f.contains(last))
100 			result.last = last;
101 		return result;
102 	}
103 
empty()104 	inline bool empty() const
105 	{
106 		return first > last;
107 	}
108 };
109 
110 ///
111 enum TextCase {
112 	///
113 	text_lowercase = 0,
114 	///
115 	text_capitalization = 1,
116 	///
117 	text_uppercase = 2
118 };
119 
120 
121 ///
122 enum AsStringParameter
123 {
124 	AS_STR_NONE = 0, ///< No option, only printable characters.
125 	AS_STR_LABEL = 1, ///< Prefix with paragraph label.
126 	AS_STR_INSETS = 2, ///< Go into insets.
127 	AS_STR_NEWLINES = 4, ///< Get also newline characters.
128 	AS_STR_SKIPDELETE = 8, ///< Skip deleted text in change tracking.
129 	AS_STR_PLAINTEXT = 16 ///< Don't export formatting when descending into insets.
130 };
131 
132 
133 /// A Paragraph holds all text, attributes and insets in a text paragraph
134 class Paragraph
135 {
136 public:
137 	///
138 	Paragraph();
139 	/// Copy constructor.
140 	Paragraph(Paragraph const &);
141 	/// Partial copy constructor.
142 	/// Copy the Paragraph contents from \p beg to \p end (without end).
143 	Paragraph(Paragraph const & par, pos_type beg, pos_type end);
144 	///
145 	Paragraph & operator=(Paragraph const &);
146 	///
147 	~Paragraph();
148 	///
149 	int id() const;
150 	///
151 	void setId(int id);
152 
153 	///
154 	void addChangesToToc(DocIterator const & cdit, Buffer const & buf,
155 	                     bool output_active, TocBackend & backend) const;
156 	/// set the buffer flag if there are changes in the paragraph
157 	void addChangesToBuffer(Buffer const & buf) const;
158 	///
159 	bool isChangeUpdateRequired() const;
160 	///
161 	Language const * getParLanguage(BufferParams const &) const;
162 	///
163 	bool isRTL(BufferParams const &) const;
164 	///
165 	void changeLanguage(BufferParams const & bparams,
166 			    Language const * from, Language const * to);
167 	///
168 	bool isMultiLingual(BufferParams const &) const;
169 	///
170 	void getLanguages(std::set<Language const *> &) const;
171 
172 	/// Convert the paragraph to a string.
173 	/// \param AsStringParameter options. This can contain any combination of
174 	/// asStringParameter values. Valid examples:
175 	///		asString(AS_STR_LABEL)
176 	///		asString(AS_STR_LABEL | AS_STR_INSETS)
177 	///		asString(AS_STR_INSETS)
178 	docstring asString(int options = AS_STR_NONE) const;
179 
180 	/// Convert the paragraph to a string.
181 	/// \note If options includes AS_STR_PLAINTEXT, then runparams must be != 0
182 	docstring asString(pos_type beg, pos_type end,
183 			   int options = AS_STR_NONE,
184 			   const OutputParams *runparams = 0) const;
185 	///
186 	void forOutliner(docstring &, size_t maxlen, bool shorten = true,
187 	                 bool label = true) const;
188 
189 	///
190 	void write(std::ostream &, BufferParams const &,
191 		depth_type & depth) const;
192 	///
193 	void validate(LaTeXFeatures &) const;
194 
195 	/// \param force means: output even if layout.inpreamble is true.
196 	void latex(BufferParams const &, Font const & outerfont, otexstream &,
197 		OutputParams const &, int start_pos = 0, int end_pos = -1,
198 		bool force = false) const;
199 
200 	/// Can we drop the standard paragraph wrapper?
201 	bool emptyTag() const;
202 
203 	/// Get the id of the paragraph, usefull for docbook
204 	std::string getID(Buffer const & buf, OutputParams const & runparams) const;
205 
206 	/// Output the first word of a paragraph, return the position where it left.
207 	pos_type firstWordDocBook(odocstream & os, OutputParams const & runparams) const;
208 
209 	/// Output the first word of a paragraph, return the position where it left.
210 	pos_type firstWordLyXHTML(XHTMLStream & xs, OutputParams const & runparams) const;
211 
212 	/// Writes to stream the docbook representation
213 	void simpleDocBookOnePar(Buffer const & buf,
214 				 odocstream &,
215 				 OutputParams const & runparams,
216 				 Font const & outerfont,
217 				 pos_type initial = 0) const;
218 	/// \return any material that has had to be deferred until after the
219 	/// paragraph has closed.
220 	docstring simpleLyXHTMLOnePar(Buffer const & buf,
221 				 XHTMLStream & xs,
222 				 OutputParams const & runparams,
223 				 Font const & outerfont,
224 				 bool start_paragraph = true,
225 				 bool close_paragraph = true,
226 				 pos_type initial = 0) const;
227 
228 	///
229 	bool hasSameLayout(Paragraph const & par) const;
230 
231 	///
232 	void makeSameLayout(Paragraph const & par);
233 
234 	///
235 	void setInsetOwner(Inset const * inset);
236 	///
237 	Inset const & inInset() const;
238 	///
239 	bool allowParagraphCustomization() const;
240 	///
241 	bool usePlainLayout() const;
242 	///
243 	bool isPassThru() const;
244 	///
245 	pos_type size() const;
246 	///
247 	bool empty() const;
248 
249 	///
250 	Layout const & layout() const;
251 	/// Do not pass a temporary to this!
252 	void setLayout(Layout const & layout);
253 	///
254 	void setPlainOrDefaultLayout(DocumentClass const & tc);
255 	///
256 	void setDefaultLayout(DocumentClass const & tc);
257 	///
258 	void setPlainLayout(DocumentClass const & tc);
259 
260 	/// This is the item depth, only used by enumerate and itemize
261 	signed char itemdepth;
262 
263 	/// look up change at given pos
264 	Change const & lookupChange(pos_type pos) const;
265 
266 	/// is there a change within the given range (does not
267 	/// check contained paragraphs)
268 	bool isChanged(pos_type start, pos_type end) const;
269 	/// is there an unchanged char at the given pos ?
270 	bool isChanged(pos_type pos) const;
271 
272 	/// is there an insertion at the given pos ?
273 	bool isInserted(pos_type pos) const;
274 	/// is there a deletion at the given pos ?
275 	bool isDeleted(pos_type pos) const;
276 	/// is the whole paragraph deleted ?
277 	bool isDeleted(pos_type start, pos_type end) const;
278 
279 	/// will the paragraph be physically merged with the next
280 	/// one if the imaginary end-of-par character is logically deleted?
281 	bool isMergedOnEndOfParDeletion(bool trackChanges) const;
282 
283 	/// set change for the entire par
284 	void setChange(Change const & change);
285 
286 	/// set change at given pos
287 	void setChange(pos_type pos, Change const & change);
288 
289 	/// accept changes within the given range
290 	void acceptChanges(pos_type start, pos_type end);
291 
292 	/// reject changes within the given range
293 	void rejectChanges(pos_type start, pos_type end);
294 
295 	/// Paragraphs can contain "manual labels", for example, Description
296 	/// environment. The text for this user-editable label is stored in
297 	/// the paragraph alongside the text of the rest of the paragraph
298 	/// (the body). This function returns the starting position of the
299 	/// body of the text in the paragraph.
300 	pos_type beginOfBody() const;
301 	/// recompute this value
302 	void setBeginOfBody();
303 
304 	///
305 	docstring expandLabel(Layout const &, BufferParams const &) const;
306 	///
307 	docstring expandDocBookLabel(Layout const &, BufferParams const &) const;
308 	///
309 	docstring const & labelString() const;
310 	/// the next two functions are for the manual labels
311 	docstring const getLabelWidthString() const;
312 	/// Set label width string.
313 	void setLabelWidthString(docstring const & s);
314 	/// Actual paragraph alignment used
315 	LyXAlignment getAlign(BufferParams const &) const;
316 	/// Default paragraph alignment as determined by layout
317 	LyXAlignment getDefaultAlign(BufferParams const &) const;
318 	/// The nesting depth of a paragraph
319 	depth_type getDepth() const;
320 	/// The maximal possible depth of a paragraph after this one
321 	depth_type getMaxDepthAfter() const;
322 	///
323 	void applyLayout(Layout const & new_layout);
324 
325 	/// (logically) erase the char at pos; return true if it was actually erased
326 	bool eraseChar(pos_type pos, bool trackChanges);
327 	/// (logically) erase the given range; return the number of chars actually erased
328 	int eraseChars(pos_type start, pos_type end, bool trackChanges);
329 
330 	///
331 	void resetFonts(Font const & font);
332 
333 	/** Get uninstantiated font setting. Returns the difference
334 	    between the characters font and the layoutfont.
335 	    This is what is stored in the fonttable
336 	*/
337 	Font const &
338 	getFontSettings(BufferParams const &, pos_type pos) const;
339 	///
340 	Font const & getFirstFontSettings(BufferParams const &) const;
341 
342 	/** Get fully instantiated font. If pos == -1, use the layout
343 	    font attached to this paragraph.
344 	    If pos == -2, use the label font of the layout attached here.
345 	    In all cases, the font is instantiated, i.e. does not have any
346 	    attributes with values FONT_INHERIT, FONT_IGNORE or
347 	    FONT_TOGGLE.
348 	*/
349 	Font const getFont(BufferParams const &, pos_type pos,
350 			      Font const & outerfont) const;
351 	Font const getLayoutFont(BufferParams const &,
352 				    Font const & outerfont) const;
353 	Font const getLabelFont(BufferParams const &,
354 				   Font const & outerfont) const;
355 	/**
356 	 * The font returned by the above functions is the same in a
357 	 * span of characters. This method will return the first and
358 	 * the last positions in the paragraph for which that font is
359 	 * the same. This can be used to avoid unnecessary calls to getFont.
360 	 */
361 	FontSpan fontSpan(pos_type pos) const;
362 	///
363 	char_type getChar(pos_type pos) const;
364 	/// Get the char, but mirror all bracket characters if it is right-to-left
365 	char_type getUChar(BufferParams const &, OutputParams const &,
366 			   pos_type pos) const;
367 	/// pos <= size() (there is a dummy font change at the end of each par)
368 	void setFont(pos_type pos, Font const & font);
369 	///
370 	void insert(pos_type pos, docstring const & str,
371 		    Font const & font, Change const & change);
372 
373 	///
374 	void appendString(docstring const & s, Font const & font,
375 		Change const & change);
376 	///
377 	void appendChar(char_type c, Font const & font, Change const & change);
378 	///
379 	void insertChar(pos_type pos, char_type c, bool trackChanges);
380 	///
381 	void insertChar(pos_type pos, char_type c,
382 			Font const &, bool trackChanges);
383 	///
384 	void insertChar(pos_type pos, char_type c,
385 			Font const &, Change const & change);
386 	/// Insert \p inset at position \p pos with \p change traking status and
387 	/// \p font.
388 	/// \return true if successful.
389 	bool insertInset(pos_type pos, Inset * inset,
390 			 Font const & font, Change const & change);
391 	///
392 	Inset * getInset(pos_type pos);
393 	///
394 	Inset const * getInset(pos_type pos) const;
395 
396 	/// Release inset at given position.
397 	/// \warning does not honour change tracking!
398 	/// Therefore, it should only be used for breaking and merging
399 	/// paragraphs
400 	Inset * releaseInset(pos_type pos);
401 
402 	///
403 	InsetList const & insetList() const;
404 	///
405 	void setBuffer(Buffer &);
406 	///
407 	void resetBuffer();
408 
409 	///
410 	bool isHfill(pos_type pos) const;
411 
412 	/// hinted by profiler
413 	bool isInset(pos_type pos) const;
414 	///
415 	bool isNewline(pos_type pos) const;
416 	///
417 	bool isEnvSeparator(pos_type pos) const;
418 	/// return true if the char is a word separator
419 	bool isSeparator(pos_type pos) const;
420 	///
421 	bool isLineSeparator(pos_type pos) const;
422 	/// True if the character/inset at this point is a word separator.
423 	/// Note that digits in particular are not considered as word separator.
424 	bool isWordSeparator(pos_type pos) const;
425 	/// True if the element at this point is a character that is not a letter.
426 	bool isChar(pos_type pos) const;
427 	/// True if the element at this point is a space
428 	bool isSpace(pos_type pos) const;
429 	/// True if the element at this point is a hard hyphen or a apostrophe
430 	/// If it is enclosed by spaces return false
431 	bool isHardHyphenOrApostrophe(pos_type pos) const;
432 
433 	/// returns true if at least one line break or line separator has been deleted
434 	/// at the beginning of the paragraph (either physically or logically)
435 	bool stripLeadingSpaces(bool trackChanges);
436 
437 	/// return true if we allow multiple spaces
438 	bool isFreeSpacing() const;
439 
440 	/// return true if we allow this par to stay empty
441 	bool allowEmpty() const;
442 	///
443 	ParagraphParameters & params();
444 	///
445 	ParagraphParameters const & params() const;
446 
447 	/// Check whether a call to fixBiblio is needed.
448 	bool brokenBiblio() const;
449 	/// Check if we are in a Biblio environment and insert or
450 	/// delete InsetBibitems as necessary.
451 	/// \retval int 1, if we had to add an inset, in which case
452 	/// the cursor will need to move cursor forward; -pos, if we deleted
453 	/// an inset, in which case pos is the position from which the inset
454 	/// was deleted, and the cursor will need to be moved back one if it
455 	/// was previously past that position. Return 0 otherwise.
456 	int fixBiblio(Buffer const & buffer);
457 
458 	/// For each author, set 'used' to true if there is a change
459 	/// by this author in the paragraph.
460 	void checkAuthors(AuthorList const & authorList);
461 
462 	///
463 	void changeCase(BufferParams const & bparams, pos_type pos,
464 		pos_type & right, TextCase action);
465 
466 	/// find \param str string inside Paragraph.
467 	/// \return non-zero if the specified string is at the specified
468 	///	position; returned value is the actual match length in positions
469 	/// \param del specifies whether deleted strings in ct mode will be considered
470 	int find(
471 		docstring const & str, ///< string to search
472 		bool cs, ///<
473 		bool mw, ///<
474 		pos_type pos, ///< start from here.
475 		bool del = true) const;
476 
477 	void locateWord(pos_type & from, pos_type & to,
478 		word_location const loc) const;
479 	///
480 	void updateWords();
481 
482 	/// Spellcheck word at position \p from and fill in found misspelled word
483 	/// and \p suggestions if \p do_suggestion is true.
484 	/// \return result from spell checker, SpellChecker::UNKNOWN_WORD when misspelled.
485 	SpellChecker::Result spellCheck(pos_type & from, pos_type & to, WordLangTuple & wl,
486 		docstring_list & suggestions, bool do_suggestion =  true,
487 		bool check_learned = false) const;
488 
489 	/// Spell checker status at position \p pos.
490 	/// If \p check_boundary is true the status of position immediately
491 	/// before \p pos is tested too if it is at word boundary.
492 	/// \return true if one of the tested positions is misspelled.
493 	bool isMisspelled(pos_type pos, bool check_boundary = false) const;
494 
495 	/// \return the spell range (misspelled area) around position.
496 	/// Range is empty if word at position is correctly spelled.
497 	FontSpan const & getSpellRange(pos_type pos) const;
498 
499 	/// spell check of whole paragraph
500 	/// remember results until call of requestSpellCheck()
501 	void spellCheck() const;
502 
503 	/// query state of spell checker results
504 	bool needsSpellCheck() const;
505 	/// mark position of text manipulation to inform the spell checker
506 	/// default value -1 marks the whole paragraph to be checked (again)
507 	void requestSpellCheck(pos_type pos = -1);
508 
509 	/// an automatically generated identifying label for this paragraph.
510 	/// presently used only in the XHTML output routines.
511 	std::string magicLabel() const;
512 
513 	/// anonymizes the paragraph contents (but not the paragraphs
514 	/// contained inside it. Does not handle undo.
515 	void anonymize();
516 
517 private:
518 	/// Expand the counters for the labelstring of \c layout
519 	docstring expandParagraphLabel(Layout const &, BufferParams const &,
520 		bool process_appendix) const;
521 	///
522 	void deregisterWords();
523 	///
524 	void collectWords();
525 	///
526 	void registerWords();
527 
528 	/// Pimpl away stuff
529 	class Private;
530 	///
531 	friend class Paragraph::Private;
532 	///
533 	Private * d;
534 };
535 
536 } // namespace lyx
537 
538 #endif // PARAGRAPH_H
539