1 /*
2     SPDX-FileCopyrightText: 2005 Piotr Szymanski <niedakh@gmail.com>
3 
4     SPDX-License-Identifier: GPL-2.0-or-later
5 */
6 
7 #ifndef _OKULAR_TEXTPAGE_H_
8 #define _OKULAR_TEXTPAGE_H_
9 
10 #include <QList>
11 #include <QString>
12 
13 #include "global.h"
14 #include "okularcore_export.h"
15 
16 class QTransform;
17 
18 namespace Okular
19 {
20 class NormalizedPoint;
21 class NormalizedRect;
22 class Page;
23 class PagePrivate;
24 class TextPagePrivate;
25 class TextSelection;
26 class RegularAreaRect;
27 
28 /*! @class TextEntity
29  * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box.
30  *
31  * To enable searching and text selection, a generator can give information about the textual
32  * content of a Page using a TextPage.
33  * A TextPage is created using TextEntity objects.
34  * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page.
35  *
36  * Ideally, every single glyph is represented by its own TextEntity.
37  * If the textual representation of a graphical glyph contains more than one character,
38  * the TextEntity must contain the whole string which represents the glyph.
39  *
40  * When the Generator has created the TextPage, and it is added to a Page,
41  * the text entities are reordered to words, lines, and paragraphs, to optimize search and text selection.
42  * This way, the Generator does not need to care about the logical order of lines or paragraphs.
43  *
44  * @par Text Selection/Highlighting
45  * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted.
46  * That is, if the TextEntity represents a word, only the whole word can be selected.
47  * It would not be possible to select a single glyph of the word, because its bounding box is not known.
48  *
49  * @see TextPage, Generator
50  */
51 class OKULARCORE_EXPORT TextEntity
52 {
53 public:
54     typedef QList<TextEntity *> List;
55 
56     /**
57      * Creates a new text entity with the given @p text and the
58      * given @p area.
59      */
60     TextEntity(const QString &text, NormalizedRect *area);
61 
62     /**
63      * Destroys the text entity.
64      */
65     ~TextEntity();
66 
67     /**
68      * Returns the text of the text entity.
69      */
70     QString text() const;
71 
72     /**
73      * Returns the bounding area of the text entity.
74      */
75     NormalizedRect *area() const;
76 
77     /**
78      * Returns the transformed area of the text entity.
79      */
80     NormalizedRect transformedArea(const QTransform &matrix) const;
81 
82 private:
83     QString m_text;
84     NormalizedRect *m_area;
85 
86     class Private;
87     const Private *d;
88 
89     Q_DISABLE_COPY(TextEntity)
90 };
91 
92 /**
93  * @short Represents the textual information of a Page. Makes search and text selection possible.
94  *
95  * A Generator with text support should add a TextPage to every Page.
96  * For every piece of text, a TextEntity is added, holding the string representation and the bounding box.
97  *
98  * Ideally, every TextEntity describes only one glyph.
99  * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers).
100  *
101  * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection.
102  *
103  * @see TextEntity
104  */
105 class OKULARCORE_EXPORT TextPage
106 {
107     /// @cond PRIVATE
108     friend class Page;
109     friend class PagePrivate;
110     /// @endcond
111 
112 public:
113     /**
114      * Defines the behaviour of adding characters to text() result
115      * @since 0.10 (KDE 4.4)
116      */
117     enum TextAreaInclusionBehaviour {
118         AnyPixelTextAreaInclusionBehaviour,    ///< A character is included into text() result if any pixel of his bounding box is in the given area
119         CentralPixelTextAreaInclusionBehaviour ///< A character is included into text() result if the central pixel of his bounding box is in the given area
120     };
121 
122     /**
123      * Creates a new text page.
124      */
125     TextPage();
126 
127     /**
128      * Creates a new text page with the given @p words.
129      */
130     explicit TextPage(const TextEntity::List &words);
131 
132     /**
133      * Destroys the text page.
134      */
135     ~TextPage();
136 
137     /**
138      * Appends the given @p text with the given @p area as new
139      * @ref TextEntity to the page.
140      */
141     void append(const QString &text, NormalizedRect *area);
142 
143     /**
144      * Returns the bounding rect of the text which matches the following criteria
145      * or 0 if the search is not successful.
146      *
147      * @param searchID An unique id for this search.
148      * @param query The search text.
149      * @param direction The direction of the search (@ref SearchDirection)
150      * @param caseSensitivity If Qt::CaseSensitive, the search is case sensitive; otherwise
151      *                        the search is case insensitive.
152      * @param area If null the search starts at the beginning of the page, otherwise
153      *                 right/below the coordinates of the given rect.
154      */
155     RegularAreaRect *findText(int searchID, const QString &query, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area);
156 
157     /**
158      * Text extraction function. Looks for text in the given @p area.
159      *
160      * @return
161      * - If @p area points to a valid null area, a null string.
162      * - If @p area is nullptr, the whole page text as a single string.
163      * - Otherwise, the text which is included by @p area, as a single string.
164      * Uses AnyPixelTextAreaInclusionBehaviour
165      */
166     QString text(const RegularAreaRect *area = nullptr) const;
167 
168     /**
169      * Text extraction function. Looks for text in the given @p area.
170      *
171      * @return
172      * - If @p area points to a valid null area, a null string.
173      * - If @p area is nullptr, the whole page text as a single string.
174      * - Otherwise, the text which is included by @p area, as a single string.
175      * @since 0.10 (KDE 4.4)
176      */
177     QString text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
178 
179     /**
180      * Text entity extraction function. Similar to text() but returns
181      * the words including their bounding rectangles. Note that
182      * ownership of the contents of the returned list belongs to the
183      * caller.
184      * @since 0.14 (KDE 4.8)
185      */
186     TextEntity::List words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
187 
188     /**
189      * Returns the area and text of the word at the given point
190      * Note that ownership of the returned area belongs to the caller.
191      * @since 0.15 (KDE 4.9)
192      */
193     RegularAreaRect *wordAt(const NormalizedPoint &p, QString *word = nullptr) const;
194 
195     /**
196      * Returns the rectangular area of the given @p selection.
197      */
198     RegularAreaRect *textArea(TextSelection *selection) const;
199 
200 private:
201     TextPagePrivate *const d;
202 
203     Q_DISABLE_COPY(TextPage)
204 };
205 
206 }
207 
208 #endif
209