1<?php 2// (c) Copyright by authors of the Tiki Wiki CMS Groupware Project 3// 4// All Rights Reserved. See copyright.txt for details and a complete list of authors. 5// Licensed under the GNU LESSER GENERAL PUBLIC LICENSE. See license.txt for details. 6// $Id$ 7 8/** 9 * Class used to store aligned bilingual sentences for two different 10 * linguistic versions of a same document. For example, English 11 * sentences with their corresponding French sentences. 12 */ 13include_once "SentenceSegmentor.php"; 14 15class Multilingual_Aligner_SentenceAlignments 16{ 17 protected $alignment_table = []; 18 protected $l1 = "en"; 19 protected $l2 = "fr"; 20 21 public function addSentencePair($lng1_sentence, $lng1, $lng2_sentence, $lng2) 22 { 23 if ($lng1 == $this->l1) { 24 $this->alignment_table[$lng1_sentence] = $lng2_sentence; 25 } elseif ($lng2 == $this->l1) { 26 $this->alignment_table[$lng2_sentence] = $lng1_sentence; 27 } 28 } 29 30 public function getSentenceInOtherLanguage($source_lng_sentence, $source_lng, $key_value, $sentence_array, $index) 31 { 32 $segmentor = new Multilingual_Aligner_SentenceSegmentor(); 33 34 if ($source_lng == $this->l1) { 35 $k = 1; 36 } elseif ($source_lng == $this->l2) { 37 $k = 0; 38 } 39 40 foreach ($this->alignment_table as $key => $val) { 41 if ($k == 1) { 42 $sentences = $segmentor->segment(trim($key)); 43 if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) { //if one of those is matched 44 $found = 1; 45 for ($j = 1, $l = 1, $count_sentences = count($sentences); $j < $count_sentences; $l++) { 46 $flag = 0; 47 if (($l + $index) >= count($sentence_array)) { 48 $found = 0; 49 break; 50 } 51 52 if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) { 53 if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") { //if it is an added sentence 54 $found = 0; 55 break; 56 } else { 57 $flag = 1; 58 } 59 }//if 60 61 if ($flag == 0) { 62 $j++; 63 } 64 } //for 65 66 if ($found == 1) { 67 $key_value = $key; 68 $array = [$key, $val]; 69 return $array; 70 } //if 71 } //if 72 } else { 73 $sentences = $segmentor->segment(trim($val)); 74 if (strcmp(trim($sentences[0]), trim($source_lng_sentence)) == 0) { //if one of those is matched 75 $found = 1; 76 for ($j = $i + 1, $l = 1, $count_sentences = count($sentences); $j < $count_sentences; $l++) { 77 $flag = 0; 78 if (($l + $index) >= count($sentence_array)) { 79 $found = 0; 80 break; 81 } 82 83 if (strcmp(trim($sentence_array[$index + $l]), trim($sentences[$j])) != 0) { 84 if ($sentence_array[$index + $l] == "" || $sentence_array[$index + $l][0] != "+") { //if it is an added sentence 85 $found = 0; 86 break; 87 } else { 88 $flag = 1; 89 } 90 }//if 91 92 if ($flag == 0) { 93 $j++; 94 } 95 }//for 96 97 if ($found == 1) { 98 $key_value = $val; 99 $array = [$val, $key]; 100 return $array; 101 } //if 102 } //if 103 } //else 104 } //foreach 105 106 if ($k == 1) { 107 $times = 0; 108 $i = -1; 109 $temp1 = "NULL"; 110 $temp2 = "NULL"; 111 $index1 = $index; 112 $start = 0; 113 $value = ""; 114 $found = 0; 115 116 foreach ($this->alignment_table as $key => $val) { 117 $start++; 118 $sent_ind = 0; 119 $sentences = $segmentor->segment(trim($key)); 120 for ($j = 0, $count_sentences = count($sentences); $j < $count_sentences; $j++) { 121 $sentences[$j] = trim($sentences[$j]); 122 } 123 124 while (1) { 125 $found = 0; 126 127 if ($temp1 == "NULL" && $sent_ind < count($sentences)) { 128 $temp1 = $sentences[$sent_ind]; 129 $sent_ind++; 130 } 131 132 if ($temp2 == "NULL") { 133 $temp2 = $source_lng_sentence; 134 $index1; 135 } 136 137 $temp1 = trim($temp1); 138 $temp2 = trim($temp2); 139 140 if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) { 141 $found = 1; 142 if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { 143 for ($u = 0; $u < $start; $u++) { //return key and val 144 prev($this->alignment_table); 145 } 146 $d = key($this->alignment_table); 147 $key_value = $key_value . $d; 148 $value = $value . current($this->alignment_table); 149 for ($u = 0; $u < $start - 1; $u++) { //return key and val 150 next($this->alignment_table); 151 $d = key($this->alignment_table); 152 153 $key_value = $key_value . $d; 154 $value = $value . current($this->alignment_table); 155 } 156 $array = [$key_value, $value, $dummy]; 157 $start = 0; 158 return $array; 159 } 160 161 $temp1 = substr($temp1, strlen($temp2)); 162 if ($temp1 == "") { 163 $temp1 = "NULL"; 164 } 165 166 while (($index1 + 1) < count($sentence_array)) { 167 if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") { 168 $temp2 = $sentence_array[$index1 + 1]; 169 $index1++; 170 break; 171 } //if 172 $index1++; 173 } //while 174 continue; 175 } elseif (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) { 176 $found = 1; 177 178 if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { 179 for ($u = 0; $u < $start; $u++) { //return key and val 180 prev($this->alignment_table); 181 } 182 $d = key($this->alignment_table); 183 184 $key_value = $key_value . $d; 185 $value = $value . current($this->alignment_table); 186 for ($u = 0; $u < $start - 1; $u++) { //return key and val 187 next($this->alignment_table); 188 $key_value = $key_value . key($this->alignment_table); 189 $value = $value . current($this->alignment_table); 190 } 191 $array = [$key_value, $value, $dummy]; 192 $start = 0; 193 return $array; 194 } 195 196 $temp2 = substr($temp2, strlen($temp1)); 197 if ($sent_ind >= count($sentences)) { 198 $temp1 = "NULL"; 199 break; 200 } else { 201 $temp1 = $sentences[$sent_ind]; 202 $sent_ind++; 203 } 204 }//if strpos_function($sentence[0],$source_lng_sentence) 205 206 if ($found == 0) { 207 $start = 0; 208 $value = ""; 209 break; 210 }//if 211 }//while 212 213 if ($found == 0) { 214 $temp1 = "NULL"; 215 $temp2 = "NULL"; 216 $index1 = $index; 217 } 218 }//foreach 219 } else { //if $k 220 $times = 0; 221 $i = -1; 222 $temp1 = "NULL"; 223 $temp2 = "NULL"; 224 $index1 = $index; 225 $start = 0; 226 $value = ""; 227 $found = 0; 228 229 foreach ($this->alignment_table as $key => $val) { 230 $start++; 231 $sent_ind = 0; 232 $sentences = $segmentor->segment(trim($val)); 233 for ($j = 0, $count_sentences = count($sentences); $j < $count_sentences; $j++) { 234 $sentences[$j] = trim($sentences[$j]); 235 } 236 237 while (1) { 238 $found = 0; 239 240 if ($temp1 == "NULL" && $sent_ind < count($sentences)) { 241 $temp1 = $sentences[$sent_ind]; 242 $sent_ind++; 243 } 244 245 if ($temp2 == "NULL") { 246 $temp2 = $source_lng_sentence; 247 $index1; 248 } 249 250 $temp1 = trim($temp1); 251 $temp2 = trim($temp2); 252 253 if (($c = $this->strpos_function($temp1, $temp2)) != -1 && $c == 0) { 254 $found = 1; 255 if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { 256 for ($u = 0; $u < $start; $u++) {//return key and val 257 prev($this->alignment_table); 258 } 259 260 $d = current($this->alignment_table); 261 $key_value = $key_value . $d; 262 $value = $value . key($this->alignment_table); 263 for ($u = 0; $u < $start - 1; $u++) { //return key and val 264 next($this->alignment_table); 265 $d = current($this->alignment_table); 266 $key_value = $key_value . $d; 267 $value = $value . key($this->alignment_table); 268 } 269 270 $array = [$key_value, $value, $dummy]; 271 $start = 0; 272 return $array; 273 } 274 275 $temp1 = substr($temp1, strlen($temp2)); 276 if ($temp1 == "") { 277 $temp1 = "NULL"; 278 } 279 280 while (($index1 + 1) < count($sentence_array)) { 281 if ($sentence_array[$index1 + 1] == "" || $sentence_array[$index1 + 1][0] != "+") { 282 $temp2 = $sentence_array[$index1 + 1]; 283 $index1++; 284 break; 285 }//if 286 $index1++; 287 }//while 288 continue; 289 } elseif (($c = $this->strpos_function($temp2, $temp1)) != -1 && $c == 0) { 290 $found = 1; 291 if (strlen($temp1) == strlen($temp2) && $sent_ind == count($sentences)) { 292 for ($u = 0; $u < $start; $u++) { //return key and val 293 prev($this->alignment_table); 294 } 295 296 $d = current($this->alignment_table); 297 298 $key_value = $key_value . $d; 299 $value = $value . key($this->alignment_table); 300 for ($u = 0; $u < $start - 1; $u++) { //return key and val 301 next($this->alignment_table); 302 $key_value = $key_value . current($this->alignment_table); 303 $value = $value . current($this->alignment_table); 304 } 305 306 $array = [$key_value, $value,$dummy]; 307 $start = 0; 308 return $array; 309 } 310 311 $temp2 = substr($temp2, strlen($temp1)); 312 if ($sent_ind >= count($sentences)) { 313 $temp1 = "NULL"; 314 break; 315 } else { 316 $temp1 = $sentences[$sent_ind]; 317 $sent_ind++; 318 } 319 } 320 if ($found == 0) { 321 $start = 0; 322 $value = ""; 323 break; 324 } 325 } //while 326 327 if ($found == 0) { 328 $temp1 = "NULL"; 329 $temp2 = "NULL"; 330 $index1 = $index; 331 } 332 } //foreach 333 } //else 334 335 $array = ["", "NULL"]; 336 return $array; 337 } 338 339 public function display_alignment_table() 340 { 341 echo "in func display<br/>"; 342 foreach ($this->alignment_table as $key => $val) { 343 echo "<-->" . $key . "<--->" . $val . "<--><br/>"; 344 } 345 } 346 347 public function strpos_function($string, $pat) 348 { 349 if (strlen($string) == 0 && strlen($pat) == 0) { 350 return 0; 351 } elseif (strlen($string) == 0 || strlen($pat) == 0) { 352 return -1; 353 } 354 $start = 0; 355 $lasts = strlen($string) - 1; 356 $lastp = strlen($pat) - 1; 357 $endmatch = $lastp; 358 $j = 0; 359 360 for ($i = 0; $endmatch <= $lasts; $endmatch++, $start++) { 361 if ($string[$endmatch] == $pat[$lastp]) { 362 for ($j = 0, $i = $start; $j < $lastp && $string[$i] == $pat[$j]; 363 $i++,$j++) { 364 } 365 } 366 if ($j == $lastp) { 367 return $start; 368 } 369 } 370 return -1; 371 } 372} 373