1 /****************************************************************************
2 **
3 ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the tools applications of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41
42 #include "metatranslator.h"
43
44 #include <QVector>
45 #include <QMap>
46 #include <QStringList>
47 #include <stdio.h>
48 #include <ctype.h>
49
50 typedef QMap<QByteArray, MetaTranslatorMessage> TMM;
51 typedef QList<MetaTranslatorMessage> TML;
52
isDigitFriendly(int c)53 static bool isDigitFriendly( int c )
54 {
55 return ispunct((uchar)c) || isspace((uchar)c);
56 }
57
numberLength(const char * s)58 static int numberLength( const char *s )
59 {
60 int i = 0;
61
62 if ( isdigit((uchar)s[0]) ) {
63 do {
64 i++;
65 } while (isdigit((uchar)s[i]) ||
66 (isDigitFriendly(s[i]) &&
67 (isdigit((uchar)s[i + 1]) ||
68 (isDigitFriendly(s[i + 1]) && isdigit((uchar)s[i + 2])))));
69 }
70 return i;
71 }
72
73 /*
74 Returns a version of 'key' where all numbers have been replaced by zeroes. If
75 there were none, returns "".
76 */
zeroKey(const char * key)77 static QByteArray zeroKey( const char *key )
78 {
79 QByteArray zeroed;
80 zeroed.resize( int(strlen(key)) + 1 );
81 char *z = zeroed.data();
82 int i = 0, j = 0;
83 int len;
84 bool metSomething = false;
85
86 while ( key[i] != '\0' ) {
87 len = numberLength( key + i );
88 if ( len > 0 ) {
89 i += len;
90 z[j++] = '0';
91 metSomething = true;
92 } else {
93 z[j++] = key[i++];
94 }
95 }
96 z[j] = '\0';
97
98 if ( metSomething )
99 return zeroed;
100 else
101 return "";
102 }
103
translationAttempt(const QString & oldTranslation,const char * oldSource,const char * newSource)104 static QString translationAttempt( const QString& oldTranslation,
105 const char *oldSource,
106 const char *newSource )
107 {
108 int p = zeroKey( oldSource ).count( '0' );
109 int oldSourceLen = qstrlen( oldSource );
110 QString attempt;
111 QStringList oldNumbers;
112 QStringList newNumbers;
113 QVector<bool> met( p );
114 QVector<int> matchedYet( p );
115 int i, j;
116 int k = 0, ell, best;
117 int m, n;
118 int pass;
119
120 /*
121 This algorithm is hard to follow, so we'll consider an example
122 all along: oldTranslation is "XeT 3.0", oldSource is "TeX 3.0"
123 and newSource is "XeT 3.1".
124
125 First, we set up two tables: oldNumbers and newNumbers. In our
126 example, oldNumber[0] is "3.0" and newNumber[0] is "3.1".
127 */
128 for ( i = 0, j = 0; i < oldSourceLen; i++, j++ ) {
129 m = numberLength( oldSource + i );
130 n = numberLength( newSource + j );
131 if ( m > 0 ) {
132 oldNumbers.append( QByteArray(oldSource + i, m + 1) );
133 newNumbers.append( QByteArray(newSource + j, n + 1) );
134 i += m;
135 j += n;
136 met[k] = false;
137 matchedYet[k] = 0;
138 k++;
139 }
140 }
141
142 /*
143 We now go over the old translation, "XeT 3.0", one letter at a
144 time, looking for numbers found in oldNumbers. Whenever such a
145 number is met, it is replaced with its newNumber equivalent. In
146 our example, the "3.0" of "XeT 3.0" becomes "3.1".
147 */
148 for ( i = 0; i < (int) oldTranslation.length(); i++ ) {
149 attempt += oldTranslation[i];
150 for ( k = 0; k < p; k++ ) {
151 if ( oldTranslation[i] == oldNumbers[k][matchedYet[k]] )
152 matchedYet[k]++;
153 else
154 matchedYet[k] = 0;
155 }
156
157 /*
158 Let's find out if the last character ended a match. We make
159 two passes over the data. In the first pass, we try to
160 match only numbers that weren't matched yet; if that fails,
161 the second pass does the trick. This is useful in some
162 suspicious cases, flagged below.
163 */
164 for ( pass = 0; pass < 2; pass++ ) {
165 best = p; // an impossible value
166 for ( k = 0; k < p; k++ ) {
167 if ( (!met[k] || pass > 0) &&
168 matchedYet[k] == (int) oldNumbers[k].length() &&
169 numberLength(oldTranslation.toLatin1().constData() + (i + 1) -
170 matchedYet[k]) == matchedYet[k] ) {
171 // the longer the better
172 if ( best == p || matchedYet[k] > matchedYet[best] )
173 best = k;
174 }
175 }
176 if ( best != p ) {
177 attempt.truncate( attempt.length() - matchedYet[best] );
178 attempt += newNumbers[best];
179 met[best] = true;
180 for ( k = 0; k < p; k++ )
181 matchedYet[k] = 0;
182 break;
183 }
184 }
185 }
186
187 /*
188 We flag two kinds of suspicious cases. They are identified as
189 such with comments such as "{2000?}" at the end.
190
191 Example of the first kind: old source text "TeX 3.0" translated
192 as "XeT 2.0" is flagged "TeX 2.0 {3.0?}", no matter what the
193 new text is.
194 */
195 for ( k = 0; k < p; k++ ) {
196 if ( !met[k] )
197 attempt += QString( " {" ) + newNumbers[k] + QString( "?}" );
198 }
199
200 /*
201 Example of the second kind: "1 of 1" translated as "1 af 1",
202 with new source text "1 of 2", generates "1 af 2 {1 or 2?}"
203 because it's not clear which of "1 af 2" and "2 af 1" is right.
204 */
205 for ( k = 0; k < p; k++ ) {
206 for ( ell = 0; ell < p; ell++ ) {
207 if ( k != ell && oldNumbers[k] == oldNumbers[ell] &&
208 newNumbers[k] < newNumbers[ell] )
209 attempt += QString( " {" ) + newNumbers[k] + QString( " or " ) +
210 newNumbers[ell] + QString( "?}" );
211 }
212 }
213 return attempt;
214 }
215
216 /*
217 Augments a MetaTranslator with translations easily derived from
218 similar existing (probably obsolete) translations.
219
220 For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1"
221 has no translation, "XeT 3.1" is added to the translator and is
222 marked Unfinished.
223
224 Returns the number of additional messages that this heuristic translated.
225 */
applyNumberHeuristic(MetaTranslator * tor)226 int applyNumberHeuristic( MetaTranslator *tor )
227 {
228 TMM translated, untranslated;
229 TMM::Iterator t, u;
230 TML all = tor->messages();
231 TML::Iterator it;
232 int inserted = 0;
233
234 for ( it = all.begin(); it != all.end(); ++it ) {
235 bool hasTranslation = (*it).isTranslated();
236 if ( (*it).type() == MetaTranslatorMessage::Unfinished ) {
237 if ( !hasTranslation )
238 untranslated.insert(QByteArray((*it).context()) + "\n" + (*it).sourceText() + "\n"
239 + (*it).comment(), *it);
240 } else if ( hasTranslation && (*it).translations().count() == 1 ) {
241 translated.insert( zeroKey((*it).sourceText()), *it );
242 }
243 }
244
245 for ( u = untranslated.begin(); u != untranslated.end(); ++u ) {
246 t = translated.find( zeroKey((*u).sourceText()) );
247 if ( t != translated.end() && !t.key().isEmpty() &&
248 qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) {
249 MetaTranslatorMessage m( *u );
250 m.setTranslation(translationAttempt((*t).translation(), (*t).sourceText(),
251 (*u).sourceText()));
252 tor->insert( m );
253 inserted++;
254 }
255 }
256 return inserted;
257 }
258