1 /****************************************************************************
2 **
3 ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the tools applications of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia.  For licensing terms and
14 ** conditions see http://qt.digia.com/licensing.  For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file.  Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights.  These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file.  Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 #include "metatranslator.h"
43 
44 #include <QVector>
45 #include <QMap>
46 #include <QStringList>
47 #include <stdio.h>
48 #include <ctype.h>
49 
50 typedef QMap<QByteArray, MetaTranslatorMessage> TMM;
51 typedef QList<MetaTranslatorMessage> TML;
52 
isDigitFriendly(int c)53 static bool isDigitFriendly( int c )
54 {
55     return ispunct((uchar)c) || isspace((uchar)c);
56 }
57 
numberLength(const char * s)58 static int numberLength( const char *s )
59 {
60     int i = 0;
61 
62     if ( isdigit((uchar)s[0]) ) {
63         do {
64             i++;
65         } while (isdigit((uchar)s[i]) ||
66                  (isDigitFriendly(s[i]) &&
67                   (isdigit((uchar)s[i + 1]) ||
68                    (isDigitFriendly(s[i + 1]) && isdigit((uchar)s[i + 2])))));
69     }
70     return i;
71 }
72 
73 /*
74   Returns a version of 'key' where all numbers have been replaced by zeroes.  If
75   there were none, returns "".
76 */
zeroKey(const char * key)77 static QByteArray zeroKey( const char *key )
78 {
79     QByteArray zeroed;
80     zeroed.resize( int(strlen(key)) + 1 );
81     char *z = zeroed.data();
82     int i = 0, j = 0;
83     int len;
84     bool metSomething = false;
85 
86     while ( key[i] != '\0' ) {
87         len = numberLength( key + i );
88         if ( len > 0 ) {
89             i += len;
90             z[j++] = '0';
91             metSomething = true;
92         } else {
93             z[j++] = key[i++];
94         }
95     }
96     z[j] = '\0';
97 
98     if ( metSomething )
99         return zeroed;
100     else
101         return "";
102 }
103 
translationAttempt(const QString & oldTranslation,const char * oldSource,const char * newSource)104 static QString translationAttempt( const QString& oldTranslation,
105                                    const char *oldSource,
106                                    const char *newSource )
107 {
108     int p = zeroKey( oldSource ).count( '0' );
109     int oldSourceLen = qstrlen( oldSource );
110     QString attempt;
111     QStringList oldNumbers;
112     QStringList newNumbers;
113     QVector<bool> met( p );
114     QVector<int> matchedYet( p );
115     int i, j;
116     int k = 0, ell, best;
117     int m, n;
118     int pass;
119 
120     /*
121       This algorithm is hard to follow, so we'll consider an example
122       all along: oldTranslation is "XeT 3.0", oldSource is "TeX 3.0"
123       and newSource is "XeT 3.1".
124 
125       First, we set up two tables: oldNumbers and newNumbers. In our
126       example, oldNumber[0] is "3.0" and newNumber[0] is "3.1".
127     */
128     for ( i = 0, j = 0; i < oldSourceLen; i++, j++ ) {
129         m = numberLength( oldSource + i );
130         n = numberLength( newSource + j );
131         if ( m > 0 ) {
132             oldNumbers.append( QByteArray(oldSource + i, m + 1) );
133             newNumbers.append( QByteArray(newSource + j, n + 1) );
134             i += m;
135             j += n;
136             met[k] = false;
137             matchedYet[k] = 0;
138             k++;
139         }
140     }
141 
142     /*
143       We now go over the old translation, "XeT 3.0", one letter at a
144       time, looking for numbers found in oldNumbers. Whenever such a
145       number is met, it is replaced with its newNumber equivalent. In
146       our example, the "3.0" of "XeT 3.0" becomes "3.1".
147     */
148     for ( i = 0; i < (int) oldTranslation.length(); i++ ) {
149         attempt += oldTranslation[i];
150         for ( k = 0; k < p; k++ ) {
151             if ( oldTranslation[i] == oldNumbers[k][matchedYet[k]] )
152                 matchedYet[k]++;
153             else
154                 matchedYet[k] = 0;
155         }
156 
157         /*
158           Let's find out if the last character ended a match. We make
159           two passes over the data. In the first pass, we try to
160           match only numbers that weren't matched yet; if that fails,
161           the second pass does the trick. This is useful in some
162           suspicious cases, flagged below.
163         */
164         for ( pass = 0; pass < 2; pass++ ) {
165             best = p; // an impossible value
166             for ( k = 0; k < p; k++ ) {
167                 if ( (!met[k] || pass > 0) &&
168                      matchedYet[k] == (int) oldNumbers[k].length() &&
169                      numberLength(oldTranslation.toLatin1().constData() + (i + 1) -
170                                   matchedYet[k]) == matchedYet[k] ) {
171                     // the longer the better
172                     if ( best == p || matchedYet[k] > matchedYet[best] )
173                         best = k;
174                 }
175             }
176             if ( best != p ) {
177                 attempt.truncate( attempt.length() - matchedYet[best] );
178                 attempt += newNumbers[best];
179                 met[best] = true;
180                 for ( k = 0; k < p; k++ )
181                     matchedYet[k] = 0;
182                 break;
183             }
184         }
185     }
186 
187     /*
188       We flag two kinds of suspicious cases. They are identified as
189       such with comments such as "{2000?}" at the end.
190 
191       Example of the first kind: old source text "TeX 3.0" translated
192       as "XeT 2.0" is flagged "TeX 2.0 {3.0?}", no matter what the
193       new text is.
194     */
195     for ( k = 0; k < p; k++ ) {
196         if ( !met[k] )
197             attempt += QString( " {" ) + newNumbers[k] + QString( "?}" );
198     }
199 
200     /*
201       Example of the second kind: "1 of 1" translated as "1 af 1",
202       with new source text "1 of 2", generates "1 af 2 {1 or 2?}"
203       because it's not clear which of "1 af 2" and "2 af 1" is right.
204     */
205     for ( k = 0; k < p; k++ ) {
206         for ( ell = 0; ell < p; ell++ ) {
207             if ( k != ell && oldNumbers[k] == oldNumbers[ell] &&
208                     newNumbers[k] < newNumbers[ell] )
209                 attempt += QString( " {" ) + newNumbers[k] + QString( " or " ) +
210                            newNumbers[ell] + QString( "?}" );
211         }
212     }
213     return attempt;
214 }
215 
216 /*
217   Augments a MetaTranslator with translations easily derived from
218   similar existing (probably obsolete) translations.
219 
220   For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1"
221   has no translation, "XeT 3.1" is added to the translator and is
222   marked Unfinished.
223 
224   Returns the number of additional messages that this heuristic translated.
225 */
applyNumberHeuristic(MetaTranslator * tor)226 int applyNumberHeuristic( MetaTranslator *tor )
227 {
228     TMM translated, untranslated;
229     TMM::Iterator t, u;
230     TML all = tor->messages();
231     TML::Iterator it;
232     int inserted = 0;
233 
234     for ( it = all.begin(); it != all.end(); ++it ) {
235         bool hasTranslation = (*it).isTranslated();
236         if ( (*it).type() == MetaTranslatorMessage::Unfinished ) {
237             if ( !hasTranslation )
238                 untranslated.insert(QByteArray((*it).context()) + "\n" + (*it).sourceText() + "\n"
239                                     + (*it).comment(), *it);
240         } else if ( hasTranslation && (*it).translations().count() == 1 ) {
241             translated.insert( zeroKey((*it).sourceText()), *it );
242         }
243     }
244 
245     for ( u = untranslated.begin(); u != untranslated.end(); ++u ) {
246         t = translated.find( zeroKey((*u).sourceText()) );
247         if ( t != translated.end() && !t.key().isEmpty() &&
248              qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) {
249             MetaTranslatorMessage m( *u );
250             m.setTranslation(translationAttempt((*t).translation(), (*t).sourceText(),
251                                                 (*u).sourceText()));
252             tor->insert( m );
253             inserted++;
254         }
255     }
256     return inserted;
257 }
258