1 /************************************************************************
2 * *
3 * This file is part of Kooka, a scanning/OCR application using *
4 * Qt <http://www.qt.io> and KDE Frameworks <http://www.kde.org>. *
5 * *
6 * Copyright (C) 2003-2016 Klaas Freitag <freitag@suse.de> *
7 * Jonathan Marten <jjm@keelhaul.me.uk> *
8 * *
9 * Kooka is free software; you can redistribute it and/or modify it *
10 * under the terms of the GNU Library General Public License as *
11 * published by the Free Software Foundation and appearing in the *
12 * file COPYING included in the packaging of this file; either *
13 * version 2 of the License, or (at your option) any later version. *
14 * *
15 * As a special exception, permission is given to link this program *
16 * with any version of the KADMOS OCR/ICR engine (a product of *
17 * reRecognition GmbH, Kreuzlingen), and distribute the resulting *
18 * executable without including the source code for KADMOS in the *
19 * source distribution. *
20 * *
21 * This program is distributed in the hope that it will be useful, *
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
24 * GNU General Public License for more details. *
25 * *
26 * You should have received a copy of the GNU General Public *
27 * License along with this program; see the file COPYING. If *
28 * not, see <http://www.gnu.org/licenses/>. *
29 * *
30 ************************************************************************/
31
32 #include "ocrresedit.h"
33
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STRERROR
38 #include <string.h>
39 #endif
40
41 #include <qcolor.h>
42 #include <qfile.h>
43 #include <qtextstream.h>
44 #include <qdebug.h>
45 #include <qfiledialog.h>
46
47 #include <klocalizedstring.h>
48 #include <kmessagebox.h>
49
50 #include "abstractocrengine.h"
51 #include "recentsaver.h"
52
53 // The OCR results are stored in our text document. Each OCR'ed word has
54 // properties stored in its QTextCharFormat recording the word rectangle
55 // (if the OCR engine provides this information) and possibly other details
56 // also. We can read out those properties again to highlight the relevant
57 // part of the result image when a cursor move or selection is made.
58 //
59 // Spell checking mostly uses KTextEdit's built in spell checking support
60 // (which uses Sonnet).
61 //
62 // Caution: if the spell checking dialogue is cancelled, the text format
63 // properties will be lost - the symptom of this is that the same place in
64 // the result image will be highlighted no matter where in the text the
65 // cursor or selection is. This is bug 229150, hopefully fixed in KDE SC 4.5.
66
OcrResEdit(QWidget * parent)67 OcrResEdit::OcrResEdit(QWidget *parent)
68 : KTextEdit(parent)
69 {
70 setObjectName("OcrResEdit");
71
72 setTabChangesFocus(true); // will never OCR these
73 slotSetReadOnly(true); // initially, anyway
74
75 connect(this, &OcrResEdit::cursorPositionChanged, this, &OcrResEdit::slotUpdateHighlight);
76
77 // TODO: monitor textChanged() signal, if document emptied (cleared)
78 // then tell OCR engine to stop tracking and double clicks
79 // then ImageCanvas can disable selection if tracking active (because it
80 // doesn't paint properly).
81 }
82
moveForward(QTextCursor & curs,bool once=true)83 static void moveForward(QTextCursor &curs, bool once = true)
84 {
85 if (once) {
86 curs.movePosition(QTextCursor::NextCharacter);
87 }
88 while (curs.atBlockStart()) {
89 curs.movePosition(QTextCursor::NextCharacter);
90 }
91 }
92
slotSelectWord(const QPoint & pos)93 void OcrResEdit::slotSelectWord(const QPoint &pos)
94 {
95 if (document()->isEmpty()) {
96 return; // nothing to search
97 }
98
99 //qDebug() << pos;
100
101 QTextCursor curs(document()); // start of document
102 QRect wordRect;
103
104 // First find the start of the word corresponding to the clicked point
105
106 moveForward(curs, false);
107 while (!curs.atEnd()) {
108 QTextCharFormat fmt = curs.charFormat();
109 QRect rect = fmt.property(OcrWordData::Rectangle).toRect();
110 ////qDebug() << "at" << curs.position() << "rect" << rect;
111 if (rect.isValid() && rect.contains(pos, true)) {
112 wordRect = rect;
113 break;
114 }
115 moveForward(curs);
116 }
117
118 //qDebug() << "found rect" << wordRect << "at" << curs.position();
119
120 if (!wordRect.isValid()) {
121 return; // no word found
122 }
123
124 // Then find the end of the word. That is an OCR result word, i.e. a
125 // span with the same character format, not a text word ended by whitespace.
126
127 QTextCursor wordStart = curs;
128 QTextCharFormat ref = wordStart.charFormat();
129
130 moveForward(curs);
131 while (!curs.atEnd()) {
132 QTextCharFormat fmt = curs.charFormat();
133 ////qDebug() << "at" << curs.position() << "rect" << fmt.property(OcrWordData::Rectangle).toRect();
134 if (fmt != ref) {
135 ////qDebug() << "mismatch at" << curs.position();
136 break;
137 }
138 moveForward(curs);
139 }
140
141 curs.movePosition(QTextCursor::PreviousCharacter);
142 //qDebug() << "word start" << wordStart.position() << "end" << curs.position();
143 int pos1 = wordStart.position();
144 int pos2 = curs.position();
145 if (pos1 == pos2) {
146 return; // no word found
147 }
148
149 QTextCursor wc(document());
150 wc.setPosition(wordStart.position() - 1, QTextCursor::MoveAnchor);
151 wc.setPosition(curs.position(), QTextCursor::KeepAnchor);
152 setTextCursor(wc);
153 ensureCursorVisible();
154 }
155
slotSaveText()156 void OcrResEdit::slotSaveText()
157 {
158 RecentSaver saver("saveOCR");
159 QString fileName = QFileDialog::getSaveFileName(this, i18n("Save OCR Result Text"),
160 saver.recentPath(), i18n("Text File (*.txt)"));
161 if (fileName.isEmpty()) return;
162 saver.save(fileName);
163
164 QFile file(fileName);
165 if (!file.open(QIODevice::WriteOnly)) {
166 QString msg = xi18nc("@info", "Unable to save the OCR results file<nl/><filename>%1</filename>", fileName);
167 #ifdef HAVE_STRERROR
168 msg += xi18nc("@info", "<nl/>%1", strerror(errno));
169 #endif
170 KMessageBox::error(this, msg, i18n("Error saving OCR results"));
171 return;
172 }
173
174 QTextStream stream(&file);
175 stream << toPlainText();
176 file.close();
177 }
178
slotUpdateHighlight()179 void OcrResEdit::slotUpdateHighlight()
180 {
181 if (isReadOnly()) {
182 return;
183 }
184 ////qDebug() << "pos" << textCursor().position() << "hassel" << textCursor().hasSelection()
185 // << "start" << textCursor().selectionStart() << "end" << textCursor().selectionEnd();
186
187 QTextCursor curs = textCursor(); // will not move cursor, see
188 // QTextEdit::textCursor() doc
189 if (curs.hasSelection()) {
190 ////qDebug() << "sel start" << curs.selectionStart() << "end" << curs.selectionEnd();
191
192 int send = curs.selectionEnd();
193 curs.setPosition(curs.selectionStart());
194 curs.movePosition(QTextCursor::NextCharacter);
195 QTextCharFormat ref = curs.charFormat();
196 ////qDebug() << "at" << curs.position() << "format rect" << ref.property(OcrWordData::Rectangle).toRect();
197 bool same = true;
198
199 while (curs.position() != send) {
200 curs.movePosition(QTextCursor::NextCharacter);
201 QTextCharFormat fmt = curs.charFormat();
202 ////qDebug() << "at" << curs.position() << "format rect" << fmt.property(OcrWordData::Rectangle).toRect();
203 if (fmt != ref) {
204 ////qDebug() << "mismatch at" << curs.position();
205 same = false;
206 break;
207 }
208 }
209
210 ////qDebug() << "range same format?" << same;
211 if (same) { // valid word selection
212 QRect r = ref.property(OcrWordData::Rectangle).toRect();
213 ////qDebug() << "rect" << r;
214 emit highlightWord(r);
215 return;
216 }
217 }
218
219 emit highlightWord(QRect()); // no valid word selection,
220 // clear highlight
221 QTextCharFormat fmt = textCursor().charFormat();
222 QRect r = fmt.property(OcrWordData::Rectangle).toRect();
223 if (r.isValid()) {
224 emit scrollToWord(r); // scroll to cursor position
225 }
226 }
227
228 // QTextEdit::setReadOnly() is no longer a slot in Qt4!
slotSetReadOnly(bool isRO)229 void OcrResEdit::slotSetReadOnly(bool isRO)
230 {
231 setReadOnly(isRO);
232 if (isRO) setCheckSpellingEnabled(false);
233 }
234