1<?php
2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project
3//
4// All Rights Reserved. See copyright.txt for details and a complete list of authors.
5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details.
6// $Id$
7
8include_once __DIR__ . '/../../../diff/Diff.php';
9include_once __DIR__ . '/../../..//diff/difflib.php';
10include_once __DIR__ . '/../../..//diff/Renderer.php';
11include_once __DIR__ . '/../../..//diff/renderer_unified.php';
12include_once __DIR__ . '/SentenceAlignments.php';
13include_once __DIR__ . '/SentenceSegmentor.php';
14include_once __DIR__ . '/MockMTWrapper.php';
15
16
17/*
18 * Class used to update the modifications done in one version of page to the other version of same page.
19 */
20
21class Multilingual_Aligner_UpdateSentences1
22{
23
24	//$translation is 1 in case of source modification(H) and 0 in case of target modification(T"), final_diff is carrying end result
25	public function DifferencebetweenOriginalFileandModifiedFile($unchangedSource_array, $changedSource_array, $alignments, $translator, $source_lng, $target_lng, $translation)
26	{
27		$changed_diff_unchanged = [];
28		$changedSource_translated = [];
29		$changed_diff_unchanged = $this->text_diff($unchangedSource_array, $changedSource_array);
30		//both files are samea
31		if (count($changed_diff_unchanged) == 0) {
32			$changed_diff_unchanged = $changedSource_array;
33		}
34		$changed_diff_unchanged = $this->remove_wikisyntax($changed_diff_unchanged);
35		$changed_diff_unchanged = $this->identify_shuffled_and_negative_sentences($changed_diff_unchanged);
36		//Converting sentences in Source Language to Target language
37		//files are in source language
38		if ($translation == 1) {
39			$changedSource_translated = $this->changedSourceFileTranslatedIntoTargetLanguage($changed_diff_unchanged, $alignments, $translator, $source_lng, $target_lng);
40			$final_diff = $changedSource_translated;
41		} else {
42			$changed_final = [];
43			$ii = 0;
44			foreach ($changed_diff_unchanged as $val) {
45				if (strcmp($changed_diff_unchanged[$ii], "*deleted*") != 0) {
46					$changed_final[] = trim($changed_diff_unchanged[$ii]);
47				}
48				$ii = $ii + 1;
49			}
50			$final_diff = $changed_final;
51		}
52		return $final_diff;
53	}
54
55
56	public function identify_shuffled_and_negative_sentences($changed_diff_unchanged)
57	{
58		$ii = 0;
59		foreach ($changed_diff_unchanged as $value) {
60			//sentence is preceded by '-'
61			if (strcmp(substr($value, 0, 1), "-") == 0) {
62				$temp = "+" . substr($value, 1);
63				$match = $this->array_search_function($temp, $changed_diff_unchanged);
64				//sentence is shuffled
65				if ($match != -1) {
66					$changed_diff_unchanged[$ii] = "";//eliminating the -ve sentence
67					$changed_diff_unchanged[$match] = substr($value, 1);
68				} else {
69					$changed_diff_unchanged[$ii] = "*deleted*";
70				}
71			}
72			//sentence is preceded by '+'
73			if (strcmp(substr($value, 0, 1), "+") == 0) {
74				$temp = "-" . substr($value, 1);
75				$match = $this->array_search_function($temp, $changed_diff_unchanged);
76				//sentence is shuffled
77				if ($match != -1) {
78					$changed_diff_unchanged[$match] = "*deleted*";//eliminating the -ve sentence
79					$changed_diff_unchanged[$ii] = substr($value, 1);
80				}
81			}
82			$ii = $ii + 1;
83		}
84		return 	$changed_diff_unchanged;
85	}
86
87	public function remove_wikisyntax($sentences)
88	{
89		foreach ($sentences as $val) {
90			$num = 0;
91			$val = preg_replace('/<span class="diffchar">/', '', $val);
92			$val = preg_replace('#</span>#', '', $val);
93			$new_val = explode('<br />', $val);
94			foreach ($new_val as $nn) {
95				if ($val != ""&&$val[0] == "-") {
96					if ($num == 0) {
97						$sentences_new[] = trim($nn);
98					} else {
99						$sentences_new[] = "-" . trim($nn);
100					}
101				} elseif ($val != ""&&$val[0] == "+") {
102					if ($num == 0) {
103						$sentences_new[] = trim($nn);
104					} else {
105						$sentences_new[] = "+" . trim($nn);
106					}
107				} else {
108					$sentences_new[] = $nn;
109				}
110				$num++;
111			}
112		}
113		return $sentences_new;
114	}
115
116
117	public function text_diff($unchangedSentence_array, $changedSentence_array)
118	{
119		$changed_diff_unchanged = [];
120		$diff = new Text_Diff($unchangedSentence_array, $changedSentence_array);
121		$context = count($unchangedSentence_array);
122		$renderer = new Text_Diff_Renderer_unified($context);
123		$arr = $renderer->render($diff);
124		$kk = 0;
125		$body = 0;
126		$del = 0;
127		$add = 0;
128		foreach ($arr as $ee) {
129			if ($kk != 0) {
130				foreach ($ee as $key => $val) {
131					if ($val == "diffbody") {
132						$body = 1;
133					}
134					if ($val == "diffdeleted") {
135						$del = 1;
136					}
137					if ($val == "diffadded") {
138						$add = 1;
139					}
140					if ($key == "data") {
141						foreach ($val as $item) {
142							if ($body == 1) {
143								$changed_diff_unchanged[] = $item;
144							}
145							if ($del == 1) {
146								$changed_diff_unchanged[] = "-" . $item;
147							}
148							if ($add == 1) {
149								$changed_diff_unchanged[] = "+" . $item;
150							}
151						}
152						$body = 0;
153						$del = 0;
154						$add = 0;
155					}
156				}
157			}
158			$kk = $kk + 1;
159		}
160		return $changed_diff_unchanged;
161	}
162
163	public function changedSourceFileTranslatedIntoTargetLanguage($changed_diff_unchanged, $alignments, $translator, $source_lng, $target_lng)
164	{
165		$segmentor = new Multilingual_Aligner_SentenceSegmentor();
166		$num = 0;
167		foreach ($changed_diff_unchanged as $value) {
168			if ($value == "*deleted*") {
169				unset($changed_diff_unchanged[$num]);
170			}
171			$num++;
172		}
173		$changed_diff_unchanged = array_values($changed_diff_unchanged);
174		$num = 0;
175		while (count($changed_diff_unchanged) > 0) {
176			$value = $changed_diff_unchanged[0];
177			$num++;
178			$key_value = "";
179			$target_lng_array = $alignments->getSentenceInOtherLanguage($value, $source_lng, $key_value, $changed_diff_unchanged, $this->array_search_function($value, $changed_diff_unchanged));  //as two or more target sentences are being considered as one string, here instead of string arrays should be returned
180			$key_value = $target_lng_array[0];
181			$target_lng_sentence = $target_lng_array[1];
182			if (strcmp($target_lng_sentence, "NULL") != 0) {
183				$source_sent = $segmentor->segment(trim($key_value));
184				$index = $this->array_search_function($value, $changed_diff_unchanged);
185				$jj = 0;
186				for ($ii = $index, $count_ss = count($source_sent); $ii < $count_ss + $index + $jj; $ii++) {
187					if ($changed_diff_unchanged[$ii] == "" || $changed_diff_unchanged[$ii][0] != "+") {
188						unset($changed_diff_unchanged[$ii]);
189					} else {
190						$jj++;
191					}
192				}
193				$sentences = $segmentor->segment(trim($target_lng_sentence));
194				foreach ($sentences as $item) {
195					$changedSource_translated[] = trim($item);
196				}
197			} else {
198				//Machine Translation is required
199				if ($value != "" && $value != "+") {
200					if ($value[0] == "+") {
201						$temp = substr($value, 1);
202						$translation = $translator->getTranslationInOtherLanguage($temp, $source_lng);
203						if ($translation != "NULL") {
204							$changedSource_translated[] = "+" . trim($translation);
205						} else {
206							//$changedSource_translated[]="+"."no translation is available in french for $temp";
207							$changedSource_translated[] = "+" . "$temp";
208						}
209					} else {
210							$translation = $translator->getTranslationInOtherLanguage($value, $source_lng);
211						if ($translation != "NULL") {
212							$changedSource_translated[] = "+" . trim($translation);
213						} else {
214								//$changedSource_translated[]="+"."no translation is available in french for $value";
215							$changedSource_translated[] = "+" . "$value";
216						}
217					}
218				} else {
219					$changedSource_translated[] = $value;
220				}
221				$index = $this->array_search_function($value, $changed_diff_unchanged);
222				unset($changed_diff_unchanged[$index]);
223			}
224			$changed_diff_unchanged = array_values($changed_diff_unchanged);
225		}
226		return $changedSource_translated;
227	}
228
229	public function separate_negative_positive_normal_sentences($newarray_diff_oldarray)
230	{
231		$negative_array = [];
232		$positive_array = [];
233		$normal_array = [];
234		$ii = -1;
235		foreach ($newarray_diff_oldarray as $value) {
236			$ii++;
237			//sentence starts with '-'
238			if (strcmp(substr($value, 0, 1), "-") == 0) {
239				$temp = "+" . substr($value, 1);
240				$match = $this->array_search_function($temp, $newarray_diff_oldarray);
241				if ($match != -1) {
242					//if same sentence is being added in to both source and target files
243					if ($temp[1] == '+') {
244						$newarray_diff_oldarray[$ii] = "";
245					} else {
246						if (($this->array_search_function(substr($value, 1), $normal_array)) == -1) {
247							$normal_array[] = substr($value, 1);
248						}
249					}
250				} else {
251					$negative_array[] = $value;
252				}
253			//sentence starts with '+' {
254			} elseif (strcmp(substr($value, 0, 1), "+") == 0) {
255				$temp = "-" . substr($value, 1);
256				$match = $this->array_search_function($temp, $newarray_diff_oldarray);
257				if ($match != -1) {
258					//if same sentence is being added in to both source and target files
259					if ($temp[1] == "+") {
260						$positive_array[] = $value;
261					} else {
262						if ($this->array_search_function(substr($value, 1), $normal_array) == -1) {
263							$normal_array[] = substr($value, 1);
264						}
265					}
266				} else {
267					$positive_array[] = $value;
268				}
269				//normal sentence
270			} else {
271				$normal_array[] = $value;
272			}
273		}
274		$combo_array = [$negative_array, $positive_array, $normal_array];
275		return $combo_array;
276	}
277
278	public function getlocation_addedsentenceintoSource_or_deletedsentencefromTarget($negative_array, $positive_array, $normal_array, $target_diff_source)
279	{
280		$add_beginning = [];
281		$sentence_location = [];
282		foreach ($negative_array as $item) {
283			$index = $this->array_search_function($item, $target_diff_source);
284			$get = 0; // to check if there is any normal sentence before this negative sentence
285			for ($jj = $index - 1; $jj >= 0; $jj--) {
286				if ($get == 1) {
287					break;
288				}
289				if ($target_diff_source[$jj][0] == "+" || $target_diff_source[$jj][0] == "-") {
290					$temp = substr($target_diff_source[$jj], 1);
291				} else {
292					$temp = $target_diff_source[$jj];
293				}
294				$search_result = $this->array_search_function($temp, $normal_array);
295				if ($search_result != -1) {
296					$found = 0; //to chack if already present in hash table
297					$get = 1; ///found a normal sentence before
298					foreach ($sentence_location as $key => $val) {
299						if (strcmp($key, $temp) == 0) {
300							$found = 1;
301							$sentence_location[$key][count($sentence_location[$key])] = $item;
302						}
303					}
304					if ($found == 0) {
305						$sentence_location[$temp] = [$item];
306					}
307				}
308				//search in positive_array is doubtful
309			}
310
311			if ($get == 0) {
312				$add_beginning[] = $item;
313			}
314		}
315		$combo_arr = [$add_beginning,$sentence_location];
316		return $combo_arr;
317	}
318
319
320
321	public function FinalUpdatedFileinTagetLanguage($Souce_Updated_Translated, $Target_Updated)
322	{
323		$target_diff_source = $this->text_diff($Souce_Updated_Translated, $Target_Updated);
324		$target_diff_source_new = $this->remove_wikisyntax($target_diff_source);
325		//generation of three arrays
326		$combo_array = $this->separate_negative_positive_normal_sentences($target_diff_source_new);
327		$negative_array = $combo_array[0];
328		$positive_array = $combo_array[1];
329		$normal_array = $combo_array[2];
330		//generation of three arrays is complete
331		//Creating hash table to get the proper location for insertion
332		$combo_arr = $this->getlocation_addedsentenceintoSource_or_deletedsentencefromTarget($negative_array, $positive_array, $normal_array, $target_diff_source_new);
333		$add_beginning = $combo_arr[0];
334		$sentence_location = $combo_arr[1];
335		//sentence_location and add_beginning is complete
336		//generation of final updated target file
337		foreach ($add_beginning as $item) {
338			if ($item[1] == "+") {
339				$finalUpdatedTarget[] = "Added_Source " . substr($item, 2);
340			} else {
341				$bb = 2;
342				while (is_numeric($item[$bb])) {
343					$bb++;
344				}
345				$finalUpdatedTarget[] = "Deleted_Target " . substr($item, $bb + 1);
346			}
347		}
348
349		foreach ($Target_Updated as $item) {
350	   //if present in positive_array
351			if (($index = $this->array_search_function("+" . $item, $positive_array)) != -1) {
352				if ($positive_array[$index] != "+" && $positive_array[$index][1] == '+') { //'++' case
353					$temp = substr($item, 1);
354				} elseif ($positive_array[$index] == "+" || $positive_array[$index][1] != '+') {
355					$bb = 2;
356					while (is_numeric($item[$bb])) {
357						$bb++;
358					}
359					$item = substr($item, $bb + 1);
360					$temp = "Deleted_Source " . $item;
361				}
362				$finalUpdatedTarget[] = $temp;
363			} else {
364				$item1 = $item;
365				if ($item != "") {
366					if ($item[0] == "+") { //if same sentence is added at same positions in both source and target
367						$item1 = substr($item, 1);
368					} else {
369						$bb = 2;
370						while (is_numeric($item[$bb])) {
371							$bb++;
372						}
373						$item1 = substr($item, $bb + 1);
374					}
375				}
376				$finalUpdatedTarget[] = $item1;
377				foreach ($sentence_location as $key => $val) {
378					if (strcmp($key, $item) == 0) {
379						foreach ($val as $add) {
380							if ($add != "-" && $add[1] == "+") {
381								$finalUpdatedTarget[] = "Added_Source " . substr($add, 2);
382							} elseif ($add == "-" || $add[1] != "+") {
383								if ($add != "-") {
384									$bb = 2;
385									while (is_numeric($add[$bb])) {
386										$bb++;
387									}
388									$add = substr($add, 0, 1) . substr($add, $bb + 1);
389								}
390								$finalUpdatedTarget[] = "Deleted_Target " . substr($add, 1);
391							}
392						}
393						break;
394					}
395				}
396			}
397		}
398		return $finalUpdatedTarget;
399	}
400
401	public function array_search_function($temp, $array)
402	{
403		$ii = 0;
404		foreach ($array as $val) {
405			if (strcmp($temp, $val) == 0) {
406				return $ii;
407			}
408			$ii++;
409		}
410		return -1;
411	}
412
413	public function strpos_function($string, $pat)
414	{
415		if (strlen($string) == 0 && strlen($pat) == 0) {
416			return 0;
417		} elseif (strlen($string) == 0 ||strlen($pat) == 0) {
418			return -1;
419		}
420		$start = 0;
421		$lasts = strlen($string) - 1;
422		$lastp = strlen($pat) - 1;
423		$endmatch = $lastp;
424		$jj = 0;
425		for ($ii = 0; $endmatch <= $lasts; $endmatch++,$start++) {
426			if ($string[$endmatch] == $pat[$lastp]) {
427				for ($jj = 0,$ii = $start; $jj < $lastp && $string[$ii] == $pat[$jj]; $ii++,$jj++) {
428				}
429			}//for $jj
430			if ($jj == $lastp) {
431				return $start;
432			}
433		}//for $ii
434		return -1;
435	}
436}
437