1 /*
2 * This file is part of the PySide Tools project.
3 *
4 * Copyright (C) 1992-2006 Trolltech AS. All rights reserved.
5 * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
6 *
7 * Contact: PySide team <pyside@openbossa.org>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * version 2 as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
21 * 02110-1301 USA
22 *
23 */
24
25
26 #include "metatranslator.h"
27 #include "simtexth.h"
28 #include <stdio.h>
29
30 // defined in numberh.cpp
31 extern int applyNumberHeuristic( MetaTranslator *tor );
32 // defined in sametexth.cpp
33 extern int applySameTextHeuristic( MetaTranslator *tor );
34
35 typedef QList<MetaTranslatorMessage> TML;
36
37 /*
38 Merges two MetaTranslator objects into the first one. The first one
39 is a set of source texts and translations for a previous version of
40 the internationalized program; the second one is a set of fresh
41 source texts newly extracted from the source code, without any
42 translation yet.
43 */
44
merge(const MetaTranslator * tor,const MetaTranslator * virginTor,MetaTranslator * outTor,bool verbose,bool noObsolete)45 void merge( const MetaTranslator *tor, const MetaTranslator *virginTor, MetaTranslator *outTor, bool verbose, bool noObsolete )
46 {
47 int known = 0;
48 int neww = 0;
49 int obsoleted = 0;
50 int UntranslatedObsoleted = 0;
51 int similarTextHeuristicCount = 0;
52 TML all = tor->messages();
53 TML::Iterator it;
54 outTor->setLanguageCode(tor->languageCode());
55
56 /*
57 The types of all the messages from the vernacular translator
58 are updated according to the virgin translator.
59 */
60 for ( it = all.begin(); it != all.end(); ++it ) {
61 MetaTranslatorMessage::Type newType = MetaTranslatorMessage::Finished;
62 MetaTranslatorMessage m = *it;
63
64 // skip context comment
65 if ( !QByteArray(m.sourceText()).isEmpty() ) {
66 MetaTranslatorMessage mv = virginTor->find(m.context(), m.sourceText(), m.comment());
67 if ( mv.isNull() ) {
68 mv = virginTor->find(m.context(), m.comment(), m.fileName(), m.lineNumber());
69 if ( mv.isNull() ) {
70 // did not find it in the virgin, mark it as obsolete
71 newType = MetaTranslatorMessage::Obsolete;
72 if ( m.type() != MetaTranslatorMessage::Obsolete )
73 obsoleted++;
74 } else {
75 // Do not just accept it if its on the same line number, but different source text.
76 // Also check if the texts are more or less similar before we consider them to represent the same message...
77 // ### The QString() cast is evil
78 if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) {
79 // It is just slightly modified, assume that it is the same string
80 m = MetaTranslatorMessage(m.context(), mv.sourceText(), m.comment(), m.fileName(), m.lineNumber(), m.translations());
81 m.setPlural(mv.isPlural());
82
83 // Mark it as unfinished. (Since the source text was changed it might require re-translating...)
84 newType = MetaTranslatorMessage::Unfinished;
85 ++similarTextHeuristicCount;
86 } else {
87 // The virgin and vernacular sourceTexts are so different that we could not find it.
88 newType = MetaTranslatorMessage::Obsolete;
89 if ( m.type() != MetaTranslatorMessage::Obsolete )
90 obsoleted++;
91 }
92 neww++;
93 }
94 } else {
95 switch ( m.type() ) {
96 case MetaTranslatorMessage::Finished:
97 default:
98 if (m.isPlural() == mv.isPlural()) {
99 newType = MetaTranslatorMessage::Finished;
100 } else {
101 newType = MetaTranslatorMessage::Unfinished;
102 }
103 known++;
104 break;
105 case MetaTranslatorMessage::Unfinished:
106 newType = MetaTranslatorMessage::Unfinished;
107 known++;
108 break;
109 case MetaTranslatorMessage::Obsolete:
110 newType = MetaTranslatorMessage::Unfinished;
111 neww++;
112 }
113
114 // Always get the filename and linenumber info from the virgin Translator, in case it has changed location.
115 // This should also enable us to read a file that does not have the <location> element.
116 m.setFileName(mv.fileName());
117 m.setLineNumber(mv.lineNumber());
118 m.setPlural(mv.isPlural()); // ### why not use operator=?
119 }
120
121 if (newType == MetaTranslatorMessage::Obsolete && !m.isTranslated()) {
122 ++UntranslatedObsoleted;
123 }
124
125 m.setType(newType);
126 outTor->insert(m);
127 }
128 }
129
130 /*
131 Messages found only in the virgin translator are added to the
132 vernacular translator. Among these are all the context comments.
133 */
134 all = virginTor->messages();
135
136 for ( it = all.begin(); it != all.end(); ++it ) {
137 MetaTranslatorMessage mv = *it;
138 bool found = tor->contains(mv.context(), mv.sourceText(), mv.comment());
139 if (!found) {
140 MetaTranslatorMessage m = tor->find(mv.context(), mv.comment(), mv.fileName(), mv.lineNumber());
141 if (!m.isNull()) {
142 if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) {
143 found = true;
144 }
145 } else {
146 found = false;
147 }
148 }
149 if ( !found ) {
150 outTor->insert( mv );
151 if ( !QByteArray(mv.sourceText()).isEmpty() )
152 neww++;
153 }
154 }
155
156 /*
157 The same-text heuristic handles cases where a message has an
158 obsolete counterpart with a different context or comment.
159 */
160 int sameTextHeuristicCount = applySameTextHeuristic( outTor );
161
162 /*
163 The number heuristic handles cases where a message has an
164 obsolete counterpart with mostly numbers differing in the
165 source text.
166 */
167 int sameNumberHeuristicCount = applyNumberHeuristic( outTor );
168
169 if ( verbose ) {
170 int totalFound = neww + known;
171 fprintf( stderr, " Found %d source text%s (%d new and %d already existing)\n",
172 totalFound, totalFound == 1 ? "" : "s", neww, known);
173
174 if (obsoleted) {
175 if (noObsolete) {
176 fprintf( stderr, " Removed %d obsolete entr%s\n",
177 obsoleted, obsoleted == 1 ? "y" : "ies" );
178 } else {
179 int total = obsoleted - UntranslatedObsoleted;
180 fprintf( stderr, " Kept %d obsolete translation%s\n",
181 total, total == 1 ? "" : "s" );
182
183 fprintf( stderr, " Removed %d obsolete untranslated entr%s\n",
184 UntranslatedObsoleted, UntranslatedObsoleted == 1 ? "y" : "ies" );
185
186 }
187 }
188
189 if (sameNumberHeuristicCount)
190 fprintf( stderr, " Number heuristic provided %d translation%s\n",
191 sameNumberHeuristicCount, sameNumberHeuristicCount == 1 ? "" : "s" );
192 if (sameTextHeuristicCount)
193 fprintf( stderr, " Same-text heuristic provided %d translation%s\n",
194 sameTextHeuristicCount, sameTextHeuristicCount == 1 ? "" : "s" );
195 if (similarTextHeuristicCount)
196 fprintf( stderr, " Similar-text heuristic provided %d translation%s\n",
197 similarTextHeuristicCount, similarTextHeuristicCount == 1 ? "" : "s" );
198 }
199 }
200