1 /* 2 SPDX-FileCopyrightText: 2005 Piotr Szymanski <niedakh@gmail.com> 3 4 SPDX-License-Identifier: GPL-2.0-or-later 5 */ 6 7 #ifndef _OKULAR_TEXTPAGE_H_ 8 #define _OKULAR_TEXTPAGE_H_ 9 10 #include <QList> 11 #include <QString> 12 13 #include "global.h" 14 #include "okularcore_export.h" 15 16 class QTransform; 17 18 namespace Okular 19 { 20 class NormalizedPoint; 21 class NormalizedRect; 22 class Page; 23 class PagePrivate; 24 class TextPagePrivate; 25 class TextSelection; 26 class RegularAreaRect; 27 28 /*! @class TextEntity 29 * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box. 30 * 31 * To enable searching and text selection, a generator can give information about the textual 32 * content of a Page using a TextPage. 33 * A TextPage is created using TextEntity objects. 34 * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page. 35 * 36 * Ideally, every single glyph is represented by its own TextEntity. 37 * If the textual representation of a graphical glyph contains more than one character, 38 * the TextEntity must contain the whole string which represents the glyph. 39 * 40 * When the Generator has created the TextPage, and it is added to a Page, 41 * the text entities are reordered to words, lines, and paragraphs, to optimize search and text selection. 42 * This way, the Generator does not need to care about the logical order of lines or paragraphs. 43 * 44 * @par Text Selection/Highlighting 45 * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted. 46 * That is, if the TextEntity represents a word, only the whole word can be selected. 47 * It would not be possible to select a single glyph of the word, because its bounding box is not known. 48 * 49 * @see TextPage, Generator 50 */ 51 class OKULARCORE_EXPORT TextEntity 52 { 53 public: 54 typedef QList<TextEntity *> List; 55 56 /** 57 * Creates a new text entity with the given @p text and the 58 * given @p area. 59 */ 60 TextEntity(const QString &text, NormalizedRect *area); 61 62 /** 63 * Destroys the text entity. 64 */ 65 ~TextEntity(); 66 67 /** 68 * Returns the text of the text entity. 69 */ 70 QString text() const; 71 72 /** 73 * Returns the bounding area of the text entity. 74 */ 75 NormalizedRect *area() const; 76 77 /** 78 * Returns the transformed area of the text entity. 79 */ 80 NormalizedRect transformedArea(const QTransform &matrix) const; 81 82 private: 83 QString m_text; 84 NormalizedRect *m_area; 85 86 class Private; 87 const Private *d; 88 89 Q_DISABLE_COPY(TextEntity) 90 }; 91 92 /** 93 * @short Represents the textual information of a Page. Makes search and text selection possible. 94 * 95 * A Generator with text support should add a TextPage to every Page. 96 * For every piece of text, a TextEntity is added, holding the string representation and the bounding box. 97 * 98 * Ideally, every TextEntity describes only one glyph. 99 * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers). 100 * 101 * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection. 102 * 103 * @see TextEntity 104 */ 105 class OKULARCORE_EXPORT TextPage 106 { 107 /// @cond PRIVATE 108 friend class Page; 109 friend class PagePrivate; 110 /// @endcond 111 112 public: 113 /** 114 * Defines the behaviour of adding characters to text() result 115 * @since 0.10 (KDE 4.4) 116 */ 117 enum TextAreaInclusionBehaviour { 118 AnyPixelTextAreaInclusionBehaviour, ///< A character is included into text() result if any pixel of his bounding box is in the given area 119 CentralPixelTextAreaInclusionBehaviour ///< A character is included into text() result if the central pixel of his bounding box is in the given area 120 }; 121 122 /** 123 * Creates a new text page. 124 */ 125 TextPage(); 126 127 /** 128 * Creates a new text page with the given @p words. 129 */ 130 explicit TextPage(const TextEntity::List &words); 131 132 /** 133 * Destroys the text page. 134 */ 135 ~TextPage(); 136 137 /** 138 * Appends the given @p text with the given @p area as new 139 * @ref TextEntity to the page. 140 */ 141 void append(const QString &text, NormalizedRect *area); 142 143 /** 144 * Returns the bounding rect of the text which matches the following criteria 145 * or 0 if the search is not successful. 146 * 147 * @param searchID An unique id for this search. 148 * @param query The search text. 149 * @param direction The direction of the search (@ref SearchDirection) 150 * @param caseSensitivity If Qt::CaseSensitive, the search is case sensitive; otherwise 151 * the search is case insensitive. 152 * @param area If null the search starts at the beginning of the page, otherwise 153 * right/below the coordinates of the given rect. 154 */ 155 RegularAreaRect *findText(int searchID, const QString &query, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area); 156 157 /** 158 * Text extraction function. Looks for text in the given @p area. 159 * 160 * @return 161 * - If @p area points to a valid null area, a null string. 162 * - If @p area is nullptr, the whole page text as a single string. 163 * - Otherwise, the text which is included by @p area, as a single string. 164 * Uses AnyPixelTextAreaInclusionBehaviour 165 */ 166 QString text(const RegularAreaRect *area = nullptr) const; 167 168 /** 169 * Text extraction function. Looks for text in the given @p area. 170 * 171 * @return 172 * - If @p area points to a valid null area, a null string. 173 * - If @p area is nullptr, the whole page text as a single string. 174 * - Otherwise, the text which is included by @p area, as a single string. 175 * @since 0.10 (KDE 4.4) 176 */ 177 QString text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const; 178 179 /** 180 * Text entity extraction function. Similar to text() but returns 181 * the words including their bounding rectangles. Note that 182 * ownership of the contents of the returned list belongs to the 183 * caller. 184 * @since 0.14 (KDE 4.8) 185 */ 186 TextEntity::List words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const; 187 188 /** 189 * Returns the area and text of the word at the given point 190 * Note that ownership of the returned area belongs to the caller. 191 * @since 0.15 (KDE 4.9) 192 */ 193 RegularAreaRect *wordAt(const NormalizedPoint &p, QString *word = nullptr) const; 194 195 /** 196 * Returns the rectangular area of the given @p selection. 197 */ 198 RegularAreaRect *textArea(TextSelection *selection) const; 199 200 private: 201 TextPagePrivate *const d; 202 203 Q_DISABLE_COPY(TextPage) 204 }; 205 206 } 207 208 #endif 209