1 /*
2  * This file is part of the PySide Tools project.
3  *
4  * Copyright (C) 1992-2006 Trolltech AS. All rights reserved.
5  * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
6  *
7  * Contact: PySide team <pyside@openbossa.org>
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * version 2 as published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
21  * 02110-1301 USA
22  *
23  */
24 
25 
26 #include "metatranslator.h"
27 #include "simtexth.h"
28 #include <stdio.h>
29 
30 // defined in numberh.cpp
31 extern int applyNumberHeuristic( MetaTranslator *tor );
32 // defined in sametexth.cpp
33 extern int applySameTextHeuristic( MetaTranslator *tor );
34 
35 typedef QList<MetaTranslatorMessage> TML;
36 
37 /*
38   Merges two MetaTranslator objects into the first one. The first one
39   is a set of source texts and translations for a previous version of
40   the internationalized program; the second one is a set of fresh
41   source texts newly extracted from the source code, without any
42   translation yet.
43 */
44 
merge(const MetaTranslator * tor,const MetaTranslator * virginTor,MetaTranslator * outTor,bool verbose,bool noObsolete)45 void merge( const MetaTranslator *tor, const MetaTranslator *virginTor, MetaTranslator *outTor, bool verbose, bool noObsolete )
46 {
47     int known = 0;
48     int neww = 0;
49     int obsoleted = 0;
50     int UntranslatedObsoleted = 0;
51     int similarTextHeuristicCount = 0;
52     TML all = tor->messages();
53     TML::Iterator it;
54     outTor->setLanguageCode(tor->languageCode());
55 
56     /*
57       The types of all the messages from the vernacular translator
58       are updated according to the virgin translator.
59     */
60     for ( it = all.begin(); it != all.end(); ++it ) {
61         MetaTranslatorMessage::Type newType = MetaTranslatorMessage::Finished;
62         MetaTranslatorMessage m = *it;
63 
64         // skip context comment
65         if ( !QByteArray(m.sourceText()).isEmpty() ) {
66             MetaTranslatorMessage mv = virginTor->find(m.context(), m.sourceText(), m.comment());
67             if ( mv.isNull() ) {
68                 mv = virginTor->find(m.context(), m.comment(), m.fileName(), m.lineNumber());
69                 if ( mv.isNull() ) {
70                     // did not find it in the virgin, mark it as obsolete
71                     newType = MetaTranslatorMessage::Obsolete;
72                     if ( m.type() != MetaTranslatorMessage::Obsolete )
73                         obsoleted++;
74                 } else {
75                     // Do not just accept it if its on the same line number, but different source text.
76                     // Also check if the texts are more or less similar before we consider them to represent the same message...
77                     // ### The QString() cast is evil
78                     if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) {
79                         // It is just slightly modified, assume that it is the same string
80                         m = MetaTranslatorMessage(m.context(), mv.sourceText(), m.comment(), m.fileName(), m.lineNumber(), m.translations());
81                         m.setPlural(mv.isPlural());
82 
83                         // Mark it as unfinished. (Since the source text was changed it might require re-translating...)
84                         newType = MetaTranslatorMessage::Unfinished;
85                         ++similarTextHeuristicCount;
86                     } else {
87                         // The virgin and vernacular sourceTexts are so different that we could not find it.
88                         newType = MetaTranslatorMessage::Obsolete;
89                         if ( m.type() != MetaTranslatorMessage::Obsolete )
90                             obsoleted++;
91                     }
92                     neww++;
93                 }
94             } else {
95                 switch ( m.type() ) {
96                 case MetaTranslatorMessage::Finished:
97                 default:
98                     if (m.isPlural() == mv.isPlural()) {
99                         newType = MetaTranslatorMessage::Finished;
100                     } else {
101                         newType = MetaTranslatorMessage::Unfinished;
102                     }
103                     known++;
104                     break;
105                 case MetaTranslatorMessage::Unfinished:
106                     newType = MetaTranslatorMessage::Unfinished;
107                     known++;
108                     break;
109                 case MetaTranslatorMessage::Obsolete:
110                     newType = MetaTranslatorMessage::Unfinished;
111                     neww++;
112                 }
113 
114                 // Always get the filename and linenumber info from the virgin Translator, in case it has changed location.
115                 // This should also enable us to read a file that does not have the <location> element.
116                 m.setFileName(mv.fileName());
117                 m.setLineNumber(mv.lineNumber());
118                 m.setPlural(mv.isPlural());             // ### why not use operator=?
119             }
120 
121             if (newType == MetaTranslatorMessage::Obsolete && !m.isTranslated()) {
122                 ++UntranslatedObsoleted;
123             }
124 
125             m.setType(newType);
126             outTor->insert(m);
127         }
128     }
129 
130     /*
131       Messages found only in the virgin translator are added to the
132       vernacular translator. Among these are all the context comments.
133     */
134     all = virginTor->messages();
135 
136     for ( it = all.begin(); it != all.end(); ++it ) {
137         MetaTranslatorMessage mv = *it;
138         bool found = tor->contains(mv.context(), mv.sourceText(), mv.comment());
139         if (!found) {
140             MetaTranslatorMessage m = tor->find(mv.context(), mv.comment(), mv.fileName(), mv.lineNumber());
141             if (!m.isNull()) {
142                 if (getSimilarityScore(QString(m.sourceText()), mv.sourceText()) >= textSimilarityThreshold) {
143                     found = true;
144                 }
145             } else {
146                 found = false;
147             }
148         }
149         if ( !found ) {
150             outTor->insert( mv );
151             if ( !QByteArray(mv.sourceText()).isEmpty() )
152                 neww++;
153         }
154     }
155 
156     /*
157       The same-text heuristic handles cases where a message has an
158       obsolete counterpart with a different context or comment.
159     */
160     int sameTextHeuristicCount = applySameTextHeuristic( outTor );
161 
162     /*
163       The number heuristic handles cases where a message has an
164       obsolete counterpart with mostly numbers differing in the
165       source text.
166     */
167     int sameNumberHeuristicCount = applyNumberHeuristic( outTor );
168 
169     if ( verbose ) {
170         int totalFound = neww + known;
171         fprintf( stderr, "    Found %d source text%s (%d new and %d already existing)\n",
172             totalFound, totalFound == 1 ? "" : "s", neww, known);
173 
174         if (obsoleted) {
175             if (noObsolete) {
176                 fprintf( stderr, "    Removed %d obsolete entr%s\n",
177                 obsoleted, obsoleted == 1 ? "y" : "ies" );
178             } else {
179                 int total = obsoleted - UntranslatedObsoleted;
180                 fprintf( stderr, "    Kept %d obsolete translation%s\n",
181                 total, total == 1 ? "" : "s" );
182 
183                 fprintf( stderr, "    Removed %d obsolete untranslated entr%s\n",
184                 UntranslatedObsoleted, UntranslatedObsoleted == 1 ? "y" : "ies" );
185 
186             }
187         }
188 
189         if (sameNumberHeuristicCount)
190             fprintf( stderr, "    Number heuristic provided %d translation%s\n",
191                      sameNumberHeuristicCount, sameNumberHeuristicCount == 1 ? "" : "s" );
192         if (sameTextHeuristicCount)
193             fprintf( stderr, "    Same-text heuristic provided %d translation%s\n",
194                      sameTextHeuristicCount, sameTextHeuristicCount == 1 ? "" : "s" );
195         if (similarTextHeuristicCount)
196             fprintf( stderr, "    Similar-text heuristic provided %d translation%s\n",
197                      similarTextHeuristicCount, similarTextHeuristicCount == 1 ? "" : "s" );
198     }
199 }
200