1 /************************************************************************
2  *									*
3  *  This file is part of Kooka, a scanning/OCR application using	*
4  *  Qt <http://www.qt.io> and KDE Frameworks <http://www.kde.org>.	*
5  *									*
6  *  Copyright (C) 2003-2016 Klaas Freitag <freitag@suse.de>		*
7  *                          Jonathan Marten <jjm@keelhaul.me.uk>	*
8  *									*
9  *  Kooka is free software; you can redistribute it and/or modify it	*
10  *  under the terms of the GNU Library General Public License as	*
11  *  published by the Free Software Foundation and appearing in the	*
12  *  file COPYING included in the packaging of this file;  either	*
13  *  version 2 of the License, or (at your option) any later version.	*
14  *									*
15  *  As a special exception, permission is given to link this program	*
16  *  with any version of the KADMOS OCR/ICR engine (a product of		*
17  *  reRecognition GmbH, Kreuzlingen), and distribute the resulting	*
18  *  executable without including the source code for KADMOS in the	*
19  *  source distribution.						*
20  *									*
21  *  This program is distributed in the hope that it will be useful,	*
22  *  but WITHOUT ANY WARRANTY; without even the implied warranty of	*
23  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the	*
24  *  GNU General Public License for more details.			*
25  *									*
26  *  You should have received a copy of the GNU General Public		*
27  *  License along with this program;  see the file COPYING.  If		*
28  *  not, see <http://www.gnu.org/licenses/>.				*
29  *									*
30  ************************************************************************/
31 
32 #include "ocrresedit.h"
33 
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STRERROR
38 #include <string.h>
39 #endif
40 
41 #include <qcolor.h>
42 #include <qfile.h>
43 #include <qtextstream.h>
44 #include <qdebug.h>
45 #include <qfiledialog.h>
46 
47 #include <klocalizedstring.h>
48 #include <kmessagebox.h>
49 
50 #include "abstractocrengine.h"
51 #include "recentsaver.h"
52 
53 //  The OCR results are stored in our text document.  Each OCR'ed word has
54 //  properties stored in its QTextCharFormat recording the word rectangle
55 //  (if the OCR engine provides this information) and possibly other details
56 //  also.  We can read out those properties again to highlight the relevant
57 //  part of the result image when a cursor move or selection is made.
58 //
59 //  Spell checking mostly uses KTextEdit's built in spell checking support
60 //  (which uses Sonnet).
61 //
62 //  Caution:  if the spell checking dialogue is cancelled, the text format
63 //  properties will be lost - the symptom of this is that the same place in
64 //  the result image will be highlighted no matter where in the text the
65 //  cursor or selection is.  This is bug 229150, hopefully fixed in KDE SC 4.5.
66 
OcrResEdit(QWidget * parent)67 OcrResEdit::OcrResEdit(QWidget *parent)
68     : KTextEdit(parent)
69 {
70     setObjectName("OcrResEdit");
71 
72     setTabChangesFocus(true);               // will never OCR these
73     slotSetReadOnly(true);              // initially, anyway
74 
75     connect(this, &OcrResEdit::cursorPositionChanged, this, &OcrResEdit::slotUpdateHighlight);
76 
77 // TODO: monitor textChanged() signal, if document emptied (cleared)
78 // then tell OCR engine to stop tracking and double clicks
79 // then ImageCanvas can disable selection if tracking active (because it
80 // doesn't paint properly).
81 }
82 
moveForward(QTextCursor & curs,bool once=true)83 static void moveForward(QTextCursor &curs, bool once = true)
84 {
85     if (once) {
86         curs.movePosition(QTextCursor::NextCharacter);
87     }
88     while (curs.atBlockStart()) {
89         curs.movePosition(QTextCursor::NextCharacter);
90     }
91 }
92 
slotSelectWord(const QPoint & pos)93 void OcrResEdit::slotSelectWord(const QPoint &pos)
94 {
95     if (document()->isEmpty()) {
96         return;    // nothing to search
97     }
98 
99     //qDebug() << pos;
100 
101     QTextCursor curs(document());           // start of document
102     QRect wordRect;
103 
104     // First find the start of the word corresponding to the clicked point
105 
106     moveForward(curs, false);
107     while (!curs.atEnd()) {
108         QTextCharFormat fmt = curs.charFormat();
109         QRect rect = fmt.property(OcrWordData::Rectangle).toRect();
110         ////qDebug() << "at" << curs.position() << "rect" << rect;
111         if (rect.isValid() && rect.contains(pos, true)) {
112             wordRect = rect;
113             break;
114         }
115         moveForward(curs);
116     }
117 
118     //qDebug() << "found rect" << wordRect << "at" << curs.position();
119 
120     if (!wordRect.isValid()) {
121         return;    // no word found
122     }
123 
124     // Then find the end of the word.  That is an OCR result word, i.e. a
125     // span with the same character format, not a text word ended by whitespace.
126 
127     QTextCursor wordStart = curs;
128     QTextCharFormat ref = wordStart.charFormat();
129 
130     moveForward(curs);
131     while (!curs.atEnd()) {
132         QTextCharFormat fmt = curs.charFormat();
133         ////qDebug() << "at" << curs.position() << "rect" << fmt.property(OcrWordData::Rectangle).toRect();
134         if (fmt != ref) {
135             ////qDebug() << "mismatch at" << curs.position();
136             break;
137         }
138         moveForward(curs);
139     }
140 
141     curs.movePosition(QTextCursor::PreviousCharacter);
142     //qDebug() << "word start" << wordStart.position() << "end" << curs.position();
143     int pos1 = wordStart.position();
144     int pos2 = curs.position();
145     if (pos1 == pos2) {
146         return;    // no word found
147     }
148 
149     QTextCursor wc(document());
150     wc.setPosition(wordStart.position() - 1, QTextCursor::MoveAnchor);
151     wc.setPosition(curs.position(), QTextCursor::KeepAnchor);
152     setTextCursor(wc);
153     ensureCursorVisible();
154 }
155 
slotSaveText()156 void OcrResEdit::slotSaveText()
157 {
158     RecentSaver saver("saveOCR");
159     QString fileName = QFileDialog::getSaveFileName(this, i18n("Save OCR Result Text"),
160                                                     saver.recentPath(), i18n("Text File (*.txt)"));
161     if (fileName.isEmpty()) return;
162     saver.save(fileName);
163 
164     QFile file(fileName);
165     if (!file.open(QIODevice::WriteOnly)) {
166         QString msg = xi18nc("@info", "Unable to save the OCR results file<nl/><filename>%1</filename>", fileName);
167 #ifdef HAVE_STRERROR
168         msg += xi18nc("@info", "<nl/>%1", strerror(errno));
169 #endif
170         KMessageBox::error(this, msg, i18n("Error saving OCR results"));
171         return;
172     }
173 
174     QTextStream stream(&file);
175     stream << toPlainText();
176     file.close();
177 }
178 
slotUpdateHighlight()179 void OcrResEdit::slotUpdateHighlight()
180 {
181     if (isReadOnly()) {
182         return;
183     }
184     ////qDebug() << "pos" << textCursor().position() << "hassel" << textCursor().hasSelection()
185     //         << "start" << textCursor().selectionStart() << "end" << textCursor().selectionEnd();
186 
187     QTextCursor curs = textCursor();			// will not move cursor, see
188 							// QTextEdit::textCursor() doc
189     if (curs.hasSelection()) {
190         ////qDebug() << "sel start" << curs.selectionStart() << "end" << curs.selectionEnd();
191 
192         int send = curs.selectionEnd();
193         curs.setPosition(curs.selectionStart());
194         curs.movePosition(QTextCursor::NextCharacter);
195         QTextCharFormat ref = curs.charFormat();
196         ////qDebug() << "at" << curs.position() << "format rect" << ref.property(OcrWordData::Rectangle).toRect();
197         bool same = true;
198 
199         while (curs.position() != send) {
200             curs.movePosition(QTextCursor::NextCharacter);
201             QTextCharFormat fmt = curs.charFormat();
202             ////qDebug() << "at" << curs.position() << "format rect" << fmt.property(OcrWordData::Rectangle).toRect();
203             if (fmt != ref) {
204                 ////qDebug() << "mismatch at" << curs.position();
205                 same = false;
206                 break;
207             }
208         }
209 
210         ////qDebug() << "range same format?" << same;
211         if (same) {                 // valid word selection
212             QRect r = ref.property(OcrWordData::Rectangle).toRect();
213             ////qDebug() << "rect" << r;
214             emit highlightWord(r);
215             return;
216         }
217     }
218 
219     emit highlightWord(QRect());            // no valid word selection,
220     // clear highlight
221     QTextCharFormat fmt = textCursor().charFormat();
222     QRect r = fmt.property(OcrWordData::Rectangle).toRect();
223     if (r.isValid()) {
224         emit scrollToWord(r);    // scroll to cursor position
225     }
226 }
227 
228 // QTextEdit::setReadOnly() is no longer a slot in Qt4!
slotSetReadOnly(bool isRO)229 void OcrResEdit::slotSetReadOnly(bool isRO)
230 {
231     setReadOnly(isRO);
232     if (isRO) setCheckSpellingEnabled(false);
233 }
234