1<?php 2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project 3// 4// All Rights Reserved. See copyright.txt for details and a complete list of authors. 5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details. 6// $Id$ 7 8include_once __DIR__ . '/../../../diff/Diff.php'; 9include_once __DIR__ . '/../../..//diff/difflib.php'; 10include_once __DIR__ . '/../../..//diff/Renderer.php'; 11include_once __DIR__ . '/../../..//diff/renderer_unified.php'; 12include_once __DIR__ . '/SentenceAlignments.php'; 13include_once __DIR__ . '/SentenceSegmentor.php'; 14include_once __DIR__ . '/MockMTWrapper.php'; 15 16 17/* 18 * Class used to update the modifications done in one version of page to the other version of same page. 19 */ 20 21class Multilingual_Aligner_UpdateSentences1 22{ 23 24 //$translation is 1 in case of source modification(H) and 0 in case of target modification(T"), final_diff is carrying end result 25 public function DifferencebetweenOriginalFileandModifiedFile($unchangedSource_array, $changedSource_array, $alignments, $translator, $source_lng, $target_lng, $translation) 26 { 27 $changed_diff_unchanged = []; 28 $changedSource_translated = []; 29 $changed_diff_unchanged = $this->text_diff($unchangedSource_array, $changedSource_array); 30 //both files are samea 31 if (count($changed_diff_unchanged) == 0) { 32 $changed_diff_unchanged = $changedSource_array; 33 } 34 $changed_diff_unchanged = $this->remove_wikisyntax($changed_diff_unchanged); 35 $changed_diff_unchanged = $this->identify_shuffled_and_negative_sentences($changed_diff_unchanged); 36 //Converting sentences in Source Language to Target language 37 //files are in source language 38 if ($translation == 1) { 39 $changedSource_translated = $this->changedSourceFileTranslatedIntoTargetLanguage($changed_diff_unchanged, $alignments, $translator, $source_lng, $target_lng); 40 $final_diff = $changedSource_translated; 41 } else { 42 $changed_final = []; 43 $ii = 0; 44 foreach ($changed_diff_unchanged as $val) { 45 if (strcmp($changed_diff_unchanged[$ii], "*deleted*") != 0) { 46 $changed_final[] = trim($changed_diff_unchanged[$ii]); 47 } 48 $ii = $ii + 1; 49 } 50 $final_diff = $changed_final; 51 } 52 return $final_diff; 53 } 54 55 56 public function identify_shuffled_and_negative_sentences($changed_diff_unchanged) 57 { 58 $ii = 0; 59 foreach ($changed_diff_unchanged as $value) { 60 //sentence is preceded by '-' 61 if (strcmp(substr($value, 0, 1), "-") == 0) { 62 $temp = "+" . substr($value, 1); 63 $match = $this->array_search_function($temp, $changed_diff_unchanged); 64 //sentence is shuffled 65 if ($match != -1) { 66 $changed_diff_unchanged[$ii] = "";//eliminating the -ve sentence 67 $changed_diff_unchanged[$match] = substr($value, 1); 68 } else { 69 $changed_diff_unchanged[$ii] = "*deleted*"; 70 } 71 } 72 //sentence is preceded by '+' 73 if (strcmp(substr($value, 0, 1), "+") == 0) { 74 $temp = "-" . substr($value, 1); 75 $match = $this->array_search_function($temp, $changed_diff_unchanged); 76 //sentence is shuffled 77 if ($match != -1) { 78 $changed_diff_unchanged[$match] = "*deleted*";//eliminating the -ve sentence 79 $changed_diff_unchanged[$ii] = substr($value, 1); 80 } 81 } 82 $ii = $ii + 1; 83 } 84 return $changed_diff_unchanged; 85 } 86 87 public function remove_wikisyntax($sentences) 88 { 89 foreach ($sentences as $val) { 90 $num = 0; 91 $val = preg_replace('/<span class="diffchar">/', '', $val); 92 $val = preg_replace('#</span>#', '', $val); 93 $new_val = explode('<br />', $val); 94 foreach ($new_val as $nn) { 95 if ($val != ""&&$val[0] == "-") { 96 if ($num == 0) { 97 $sentences_new[] = trim($nn); 98 } else { 99 $sentences_new[] = "-" . trim($nn); 100 } 101 } elseif ($val != ""&&$val[0] == "+") { 102 if ($num == 0) { 103 $sentences_new[] = trim($nn); 104 } else { 105 $sentences_new[] = "+" . trim($nn); 106 } 107 } else { 108 $sentences_new[] = $nn; 109 } 110 $num++; 111 } 112 } 113 return $sentences_new; 114 } 115 116 117 public function text_diff($unchangedSentence_array, $changedSentence_array) 118 { 119 $changed_diff_unchanged = []; 120 $diff = new Text_Diff($unchangedSentence_array, $changedSentence_array); 121 $context = count($unchangedSentence_array); 122 $renderer = new Text_Diff_Renderer_unified($context); 123 $arr = $renderer->render($diff); 124 $kk = 0; 125 $body = 0; 126 $del = 0; 127 $add = 0; 128 foreach ($arr as $ee) { 129 if ($kk != 0) { 130 foreach ($ee as $key => $val) { 131 if ($val == "diffbody") { 132 $body = 1; 133 } 134 if ($val == "diffdeleted") { 135 $del = 1; 136 } 137 if ($val == "diffadded") { 138 $add = 1; 139 } 140 if ($key == "data") { 141 foreach ($val as $item) { 142 if ($body == 1) { 143 $changed_diff_unchanged[] = $item; 144 } 145 if ($del == 1) { 146 $changed_diff_unchanged[] = "-" . $item; 147 } 148 if ($add == 1) { 149 $changed_diff_unchanged[] = "+" . $item; 150 } 151 } 152 $body = 0; 153 $del = 0; 154 $add = 0; 155 } 156 } 157 } 158 $kk = $kk + 1; 159 } 160 return $changed_diff_unchanged; 161 } 162 163 public function changedSourceFileTranslatedIntoTargetLanguage($changed_diff_unchanged, $alignments, $translator, $source_lng, $target_lng) 164 { 165 $segmentor = new Multilingual_Aligner_SentenceSegmentor(); 166 $num = 0; 167 foreach ($changed_diff_unchanged as $value) { 168 if ($value == "*deleted*") { 169 unset($changed_diff_unchanged[$num]); 170 } 171 $num++; 172 } 173 $changed_diff_unchanged = array_values($changed_diff_unchanged); 174 $num = 0; 175 while (count($changed_diff_unchanged) > 0) { 176 $value = $changed_diff_unchanged[0]; 177 $num++; 178 $key_value = ""; 179 $target_lng_array = $alignments->getSentenceInOtherLanguage($value, $source_lng, $key_value, $changed_diff_unchanged, $this->array_search_function($value, $changed_diff_unchanged)); //as two or more target sentences are being considered as one string, here instead of string arrays should be returned 180 $key_value = $target_lng_array[0]; 181 $target_lng_sentence = $target_lng_array[1]; 182 if (strcmp($target_lng_sentence, "NULL") != 0) { 183 $source_sent = $segmentor->segment(trim($key_value)); 184 $index = $this->array_search_function($value, $changed_diff_unchanged); 185 $jj = 0; 186 for ($ii = $index, $count_ss = count($source_sent); $ii < $count_ss + $index + $jj; $ii++) { 187 if ($changed_diff_unchanged[$ii] == "" || $changed_diff_unchanged[$ii][0] != "+") { 188 unset($changed_diff_unchanged[$ii]); 189 } else { 190 $jj++; 191 } 192 } 193 $sentences = $segmentor->segment(trim($target_lng_sentence)); 194 foreach ($sentences as $item) { 195 $changedSource_translated[] = trim($item); 196 } 197 } else { 198 //Machine Translation is required 199 if ($value != "" && $value != "+") { 200 if ($value[0] == "+") { 201 $temp = substr($value, 1); 202 $translation = $translator->getTranslationInOtherLanguage($temp, $source_lng); 203 if ($translation != "NULL") { 204 $changedSource_translated[] = "+" . trim($translation); 205 } else { 206 //$changedSource_translated[]="+"."no translation is available in french for $temp"; 207 $changedSource_translated[] = "+" . "$temp"; 208 } 209 } else { 210 $translation = $translator->getTranslationInOtherLanguage($value, $source_lng); 211 if ($translation != "NULL") { 212 $changedSource_translated[] = "+" . trim($translation); 213 } else { 214 //$changedSource_translated[]="+"."no translation is available in french for $value"; 215 $changedSource_translated[] = "+" . "$value"; 216 } 217 } 218 } else { 219 $changedSource_translated[] = $value; 220 } 221 $index = $this->array_search_function($value, $changed_diff_unchanged); 222 unset($changed_diff_unchanged[$index]); 223 } 224 $changed_diff_unchanged = array_values($changed_diff_unchanged); 225 } 226 return $changedSource_translated; 227 } 228 229 public function separate_negative_positive_normal_sentences($newarray_diff_oldarray) 230 { 231 $negative_array = []; 232 $positive_array = []; 233 $normal_array = []; 234 $ii = -1; 235 foreach ($newarray_diff_oldarray as $value) { 236 $ii++; 237 //sentence starts with '-' 238 if (strcmp(substr($value, 0, 1), "-") == 0) { 239 $temp = "+" . substr($value, 1); 240 $match = $this->array_search_function($temp, $newarray_diff_oldarray); 241 if ($match != -1) { 242 //if same sentence is being added in to both source and target files 243 if ($temp[1] == '+') { 244 $newarray_diff_oldarray[$ii] = ""; 245 } else { 246 if (($this->array_search_function(substr($value, 1), $normal_array)) == -1) { 247 $normal_array[] = substr($value, 1); 248 } 249 } 250 } else { 251 $negative_array[] = $value; 252 } 253 //sentence starts with '+' { 254 } elseif (strcmp(substr($value, 0, 1), "+") == 0) { 255 $temp = "-" . substr($value, 1); 256 $match = $this->array_search_function($temp, $newarray_diff_oldarray); 257 if ($match != -1) { 258 //if same sentence is being added in to both source and target files 259 if ($temp[1] == "+") { 260 $positive_array[] = $value; 261 } else { 262 if ($this->array_search_function(substr($value, 1), $normal_array) == -1) { 263 $normal_array[] = substr($value, 1); 264 } 265 } 266 } else { 267 $positive_array[] = $value; 268 } 269 //normal sentence 270 } else { 271 $normal_array[] = $value; 272 } 273 } 274 $combo_array = [$negative_array, $positive_array, $normal_array]; 275 return $combo_array; 276 } 277 278 public function getlocation_addedsentenceintoSource_or_deletedsentencefromTarget($negative_array, $positive_array, $normal_array, $target_diff_source) 279 { 280 $add_beginning = []; 281 $sentence_location = []; 282 foreach ($negative_array as $item) { 283 $index = $this->array_search_function($item, $target_diff_source); 284 $get = 0; // to check if there is any normal sentence before this negative sentence 285 for ($jj = $index - 1; $jj >= 0; $jj--) { 286 if ($get == 1) { 287 break; 288 } 289 if ($target_diff_source[$jj][0] == "+" || $target_diff_source[$jj][0] == "-") { 290 $temp = substr($target_diff_source[$jj], 1); 291 } else { 292 $temp = $target_diff_source[$jj]; 293 } 294 $search_result = $this->array_search_function($temp, $normal_array); 295 if ($search_result != -1) { 296 $found = 0; //to chack if already present in hash table 297 $get = 1; ///found a normal sentence before 298 foreach ($sentence_location as $key => $val) { 299 if (strcmp($key, $temp) == 0) { 300 $found = 1; 301 $sentence_location[$key][count($sentence_location[$key])] = $item; 302 } 303 } 304 if ($found == 0) { 305 $sentence_location[$temp] = [$item]; 306 } 307 } 308 //search in positive_array is doubtful 309 } 310 311 if ($get == 0) { 312 $add_beginning[] = $item; 313 } 314 } 315 $combo_arr = [$add_beginning,$sentence_location]; 316 return $combo_arr; 317 } 318 319 320 321 public function FinalUpdatedFileinTagetLanguage($Souce_Updated_Translated, $Target_Updated) 322 { 323 $target_diff_source = $this->text_diff($Souce_Updated_Translated, $Target_Updated); 324 $target_diff_source_new = $this->remove_wikisyntax($target_diff_source); 325 //generation of three arrays 326 $combo_array = $this->separate_negative_positive_normal_sentences($target_diff_source_new); 327 $negative_array = $combo_array[0]; 328 $positive_array = $combo_array[1]; 329 $normal_array = $combo_array[2]; 330 //generation of three arrays is complete 331 //Creating hash table to get the proper location for insertion 332 $combo_arr = $this->getlocation_addedsentenceintoSource_or_deletedsentencefromTarget($negative_array, $positive_array, $normal_array, $target_diff_source_new); 333 $add_beginning = $combo_arr[0]; 334 $sentence_location = $combo_arr[1]; 335 //sentence_location and add_beginning is complete 336 //generation of final updated target file 337 foreach ($add_beginning as $item) { 338 if ($item[1] == "+") { 339 $finalUpdatedTarget[] = "Added_Source " . substr($item, 2); 340 } else { 341 $bb = 2; 342 while (is_numeric($item[$bb])) { 343 $bb++; 344 } 345 $finalUpdatedTarget[] = "Deleted_Target " . substr($item, $bb + 1); 346 } 347 } 348 349 foreach ($Target_Updated as $item) { 350 //if present in positive_array 351 if (($index = $this->array_search_function("+" . $item, $positive_array)) != -1) { 352 if ($positive_array[$index] != "+" && $positive_array[$index][1] == '+') { //'++' case 353 $temp = substr($item, 1); 354 } elseif ($positive_array[$index] == "+" || $positive_array[$index][1] != '+') { 355 $bb = 2; 356 while (is_numeric($item[$bb])) { 357 $bb++; 358 } 359 $item = substr($item, $bb + 1); 360 $temp = "Deleted_Source " . $item; 361 } 362 $finalUpdatedTarget[] = $temp; 363 } else { 364 $item1 = $item; 365 if ($item != "") { 366 if ($item[0] == "+") { //if same sentence is added at same positions in both source and target 367 $item1 = substr($item, 1); 368 } else { 369 $bb = 2; 370 while (is_numeric($item[$bb])) { 371 $bb++; 372 } 373 $item1 = substr($item, $bb + 1); 374 } 375 } 376 $finalUpdatedTarget[] = $item1; 377 foreach ($sentence_location as $key => $val) { 378 if (strcmp($key, $item) == 0) { 379 foreach ($val as $add) { 380 if ($add != "-" && $add[1] == "+") { 381 $finalUpdatedTarget[] = "Added_Source " . substr($add, 2); 382 } elseif ($add == "-" || $add[1] != "+") { 383 if ($add != "-") { 384 $bb = 2; 385 while (is_numeric($add[$bb])) { 386 $bb++; 387 } 388 $add = substr($add, 0, 1) . substr($add, $bb + 1); 389 } 390 $finalUpdatedTarget[] = "Deleted_Target " . substr($add, 1); 391 } 392 } 393 break; 394 } 395 } 396 } 397 } 398 return $finalUpdatedTarget; 399 } 400 401 public function array_search_function($temp, $array) 402 { 403 $ii = 0; 404 foreach ($array as $val) { 405 if (strcmp($temp, $val) == 0) { 406 return $ii; 407 } 408 $ii++; 409 } 410 return -1; 411 } 412 413 public function strpos_function($string, $pat) 414 { 415 if (strlen($string) == 0 && strlen($pat) == 0) { 416 return 0; 417 } elseif (strlen($string) == 0 ||strlen($pat) == 0) { 418 return -1; 419 } 420 $start = 0; 421 $lasts = strlen($string) - 1; 422 $lastp = strlen($pat) - 1; 423 $endmatch = $lastp; 424 $jj = 0; 425 for ($ii = 0; $endmatch <= $lasts; $endmatch++,$start++) { 426 if ($string[$endmatch] == $pat[$lastp]) { 427 for ($jj = 0,$ii = $start; $jj < $lastp && $string[$ii] == $pat[$jj]; $ii++,$jj++) { 428 } 429 }//for $jj 430 if ($jj == $lastp) { 431 return $start; 432 } 433 }//for $ii 434 return -1; 435 } 436} 437