1<?php 2/** 3 * Zend Framework (http://framework.zend.com/) 4 * 5 * @link http://github.com/zendframework/zf2 for the canonical source repository 6 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) 7 * @license http://framework.zend.com/license/new-bsd New BSD License 8 * @package Zend_Search 9 */ 10 11namespace ZendSearch\Lucene\Storage\File; 12 13use ZendSearch\Lucene; 14 15/** 16 * @category Zend 17 * @package Zend_Search_Lucene 18 * @subpackage Storage 19 */ 20class Memory extends AbstractFile 21{ 22 /** 23 * FileData 24 * 25 * @var string 26 */ 27 private $_data; 28 29 /** 30 * File Position 31 * 32 * @var integer 33 */ 34 private $_position = 0; 35 36 37 /** 38 * Object constractor 39 * 40 * @param string $data 41 */ 42 public function __construct($data) 43 { 44 $this->_data = $data; 45 } 46 47 /** 48 * Reads $length number of bytes at the current position in the 49 * file and advances the file pointer. 50 * 51 * @param integer $length 52 * @return string 53 */ 54 protected function _fread($length = 1) 55 { 56 $returnValue = substr($this->_data, $this->_position, $length); 57 $this->_position += $length; 58 return $returnValue; 59 } 60 61 62 /** 63 * Sets the file position indicator and advances the file pointer. 64 * The new position, measured in bytes from the beginning of the file, 65 * is obtained by adding offset to the position specified by whence, 66 * whose values are defined as follows: 67 * SEEK_SET - Set position equal to offset bytes. 68 * SEEK_CUR - Set position to current location plus offset. 69 * SEEK_END - Set position to end-of-file plus offset. (To move to 70 * a position before the end-of-file, you need to pass a negative value 71 * in offset.) 72 * Upon success, returns 0; otherwise, returns -1 73 * 74 * @param integer $offset 75 * @param integer $whence 76 * @return integer 77 */ 78 public function seek($offset, $whence=SEEK_SET) 79 { 80 switch ($whence) { 81 case SEEK_SET: 82 $this->_position = $offset; 83 break; 84 85 case SEEK_CUR: 86 $this->_position += $offset; 87 break; 88 89 case SEEK_END: 90 $this->_position = strlen($this->_data); 91 $this->_position += $offset; 92 break; 93 94 default: 95 break; 96 } 97 } 98 99 /** 100 * Get file position. 101 * 102 * @return integer 103 */ 104 public function tell() 105 { 106 return $this->_position; 107 } 108 109 /** 110 * Flush output. 111 * 112 * Returns true on success or false on failure. 113 * 114 * @return boolean 115 */ 116 public function flush() 117 { 118 // Do nothing 119 120 return true; 121 } 122 123 /** 124 * Writes $length number of bytes (all, if $length===null) to the end 125 * of the file. 126 * 127 * @param string $data 128 * @param integer $length 129 */ 130 protected function _fwrite($data, $length=null) 131 { 132 // We do not need to check if file position points to the end of "file". 133 // Only append operation is supported now 134 135 if ($length !== null) { 136 $this->_data .= substr($data, 0, $length); 137 } else { 138 $this->_data .= $data; 139 } 140 141 $this->_position = strlen($this->_data); 142 } 143 144 /** 145 * Lock file 146 * 147 * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock) 148 * 149 * @param integer $lockType 150 * @return boolean 151 */ 152 public function lock($lockType, $nonBlockinLock = false) 153 { 154 // Memory files can't be shared 155 // do nothing 156 157 return true; 158 } 159 160 /** 161 * Unlock file 162 */ 163 public function unlock() 164 { 165 // Memory files can't be shared 166 // do nothing 167 } 168 169 /** 170 * Reads a byte from the current position in the file 171 * and advances the file pointer. 172 * 173 * @return integer 174 */ 175 public function readByte() 176 { 177 return ord($this->_data[$this->_position++]); 178 } 179 180 /** 181 * Writes a byte to the end of the file. 182 * 183 * @param integer $byte 184 */ 185 public function writeByte($byte) 186 { 187 // We do not need to check if file position points to the end of "file". 188 // Only append operation is supported now 189 190 $this->_data .= chr($byte); 191 $this->_position = strlen($this->_data); 192 193 return 1; 194 } 195 196 /** 197 * Read num bytes from the current position in the file 198 * and advances the file pointer. 199 * 200 * @param integer $num 201 * @return string 202 */ 203 public function readBytes($num) 204 { 205 $returnValue = substr($this->_data, $this->_position, $num); 206 $this->_position += $num; 207 208 return $returnValue; 209 } 210 211 /** 212 * Writes num bytes of data (all, if $num===null) to the end 213 * of the string. 214 * 215 * @param string $data 216 * @param integer $num 217 */ 218 public function writeBytes($data, $num=null) 219 { 220 // We do not need to check if file position points to the end of "file". 221 // Only append operation is supported now 222 223 if ($num !== null) { 224 $this->_data .= substr($data, 0, $num); 225 } else { 226 $this->_data .= $data; 227 } 228 229 $this->_position = strlen($this->_data); 230 } 231 232 233 /** 234 * Reads an integer from the current position in the file 235 * and advances the file pointer. 236 * 237 * @return integer 238 */ 239 public function readInt() 240 { 241 $str = substr($this->_data, $this->_position, 4); 242 $this->_position += 4; 243 244 return ord($str[0]) << 24 | 245 ord($str[1]) << 16 | 246 ord($str[2]) << 8 | 247 ord($str[3]); 248 } 249 250 251 /** 252 * Writes an integer to the end of file. 253 * 254 * @param integer $value 255 */ 256 public function writeInt($value) 257 { 258 // We do not need to check if file position points to the end of "file". 259 // Only append operation is supported now 260 261 settype($value, 'integer'); 262 $this->_data .= chr($value>>24 & 0xFF) . 263 chr($value>>16 & 0xFF) . 264 chr($value>>8 & 0xFF) . 265 chr($value & 0xFF); 266 267 $this->_position = strlen($this->_data); 268 } 269 270 271 /** 272 * Returns a long integer from the current position in the file 273 * and advances the file pointer. 274 * 275 * @return integer 276 */ 277 public function readLong() 278 { 279 /** 280 * Check, that we work in 64-bit mode. 281 * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb 282 */ 283 if (PHP_INT_SIZE > 4) { 284 $str = substr($this->_data, $this->_position, 8); 285 $this->_position += 8; 286 287 return ord($str[0]) << 56 | 288 ord($str[1]) << 48 | 289 ord($str[2]) << 40 | 290 ord($str[3]) << 32 | 291 ord($str[4]) << 24 | 292 ord($str[5]) << 16 | 293 ord($str[6]) << 8 | 294 ord($str[7]); 295 } else { 296 return $this->_readLong32Bit(); 297 } 298 } 299 300 /** 301 * Writes long integer to the end of file 302 * 303 * @param integer $value 304 */ 305 public function writeLong($value) 306 { 307 // We do not need to check if file position points to the end of "file". 308 // Only append operation is supported now 309 310 /** 311 * Check, that we work in 64-bit mode. 312 * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb 313 */ 314 if (PHP_INT_SIZE > 4) { 315 settype($value, 'integer'); 316 $this->_data .= chr($value>>56 & 0xFF) . 317 chr($value>>48 & 0xFF) . 318 chr($value>>40 & 0xFF) . 319 chr($value>>32 & 0xFF) . 320 chr($value>>24 & 0xFF) . 321 chr($value>>16 & 0xFF) . 322 chr($value>>8 & 0xFF) . 323 chr($value & 0xFF); 324 } else { 325 $this->_writeLong32Bit($value); 326 } 327 328 $this->_position = strlen($this->_data); 329 } 330 331 332 /** 333 * Returns a long integer from the current position in the file, 334 * advances the file pointer and return it as float (for 32-bit platforms). 335 * 336 * @throws \ZendSearch\Lucene\Exception\RuntimeException 337 * @return integer|float 338 */ 339 protected function _readLong32Bit() 340 { 341 $wordHigh = $this->readInt(); 342 $wordLow = $this->readInt(); 343 344 if ($wordHigh & (int)0x80000000) { 345 // It's a negative value since the highest bit is set 346 if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) { 347 return $wordLow; 348 } else { 349 throw new Lucene\Exception\RuntimeException( 350 'Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.' 351 ); 352 } 353 354 } 355 356 if ($wordLow < 0) { 357 // Value is large than 0x7FFF FFFF. Represent low word as float. 358 $wordLow &= 0x7FFFFFFF; 359 $wordLow += (float)0x80000000; 360 } 361 362 if ($wordHigh == 0) { 363 // Return value as integer if possible 364 return $wordLow; 365 } 366 367 return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow; 368 } 369 370 371 /** 372 * Writes long integer to the end of file (32-bit platforms implementation) 373 * 374 * @param integer|float $value 375 * @throws \ZendSearch\Lucene\Exception\RuntimeException 376 */ 377 protected function _writeLong32Bit($value) 378 { 379 if ($value < (int)0x80000000) { 380 throw new Lucene\Exception\RuntimeException( 381 'Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.' 382 ); 383 } 384 385 if ($value < 0) { 386 $wordHigh = (int)0xFFFFFFFF; 387 $wordLow = (int)$value; 388 } else { 389 $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */); 390 $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */; 391 392 if ($wordLow > 0x7FFFFFFF) { 393 // Highest bit of low word is set. Translate it to the corresponding negative integer value 394 $wordLow -= 0x80000000; 395 $wordLow |= 0x80000000; 396 } 397 } 398 399 $this->writeInt($wordHigh); 400 $this->writeInt($wordLow); 401 } 402 403 /** 404 * Returns a variable-length integer from the current 405 * position in the file and advances the file pointer. 406 * 407 * @return integer 408 */ 409 public function readVInt() 410 { 411 $nextByte = ord($this->_data[$this->_position++]); 412 $val = $nextByte & 0x7F; 413 414 for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) { 415 $nextByte = ord($this->_data[$this->_position++]); 416 $val |= ($nextByte & 0x7F) << $shift; 417 } 418 return $val; 419 } 420 421 /** 422 * Writes a variable-length integer to the end of file. 423 * 424 * @param integer $value 425 */ 426 public function writeVInt($value) 427 { 428 // We do not need to check if file position points to the end of "file". 429 // Only append operation is supported now 430 431 settype($value, 'integer'); 432 while ($value > 0x7F) { 433 $this->_data .= chr( ($value & 0x7F)|0x80 ); 434 $value >>= 7; 435 } 436 $this->_data .= chr($value); 437 438 $this->_position = strlen($this->_data); 439 } 440 441 442 /** 443 * Reads a string from the current position in the file 444 * and advances the file pointer. 445 * 446 * @return string 447 */ 448 public function readString() 449 { 450 $strlen = $this->readVInt(); 451 if ($strlen == 0) { 452 return ''; 453 } else { 454 /** 455 * This implementation supports only Basic Multilingual Plane 456 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support 457 * "supplementary characters" (characters whose code points are 458 * greater than 0xFFFF) 459 * Java 2 represents these characters as a pair of char (16-bit) 460 * values, the first from the high-surrogates range (0xD800-0xDBFF), 461 * the second from the low-surrogates range (0xDC00-0xDFFF). Then 462 * they are encoded as usual UTF-8 characters in six bytes. 463 * Standard UTF-8 representation uses four bytes for supplementary 464 * characters. 465 */ 466 467 $str_val = substr($this->_data, $this->_position, $strlen); 468 $this->_position += $strlen; 469 470 for ($count = 0; $count < $strlen; $count++ ) { 471 if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) { 472 $addBytes = 1; 473 if (ord($str_val[$count]) & 0x20 ) { 474 $addBytes++; 475 476 // Never used. Java2 doesn't encode strings in four bytes 477 if (ord($str_val[$count]) & 0x10 ) { 478 $addBytes++; 479 } 480 } 481 $str_val .= substr($this->_data, $this->_position, $addBytes); 482 $this->_position += $addBytes; 483 $strlen += $addBytes; 484 485 // Check for null character. Java2 encodes null character 486 // in two bytes. 487 if (ord($str_val[$count]) == 0xC0 && 488 ord($str_val[$count+1]) == 0x80 ) { 489 $str_val[$count] = 0; 490 $str_val = substr($str_val,0,$count+1) 491 . substr($str_val,$count+2); 492 } 493 $count += $addBytes; 494 } 495 } 496 497 return $str_val; 498 } 499 } 500 501 /** 502 * Writes a string to the end of file. 503 * 504 * @param string $str 505 * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException 506 */ 507 public function writeString($str) 508 { 509 /** 510 * This implementation supports only Basic Multilingual Plane 511 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support 512 * "supplementary characters" (characters whose code points are 513 * greater than 0xFFFF) 514 * Java 2 represents these characters as a pair of char (16-bit) 515 * values, the first from the high-surrogates range (0xD800-0xDBFF), 516 * the second from the low-surrogates range (0xDC00-0xDFFF). Then 517 * they are encoded as usual UTF-8 characters in six bytes. 518 * Standard UTF-8 representation uses four bytes for supplementary 519 * characters. 520 */ 521 522 // We do not need to check if file position points to the end of "file". 523 // Only append operation is supported now 524 525 // convert input to a string before iterating string characters 526 settype($str, 'string'); 527 528 $chars = $strlen = strlen($str); 529 $containNullChars = false; 530 531 for ($count = 0; $count < $strlen; $count++ ) { 532 /** 533 * String is already in Java 2 representation. 534 * We should only calculate actual string length and replace 535 * \x00 by \xC0\x80 536 */ 537 if ((ord($str[$count]) & 0xC0) == 0xC0) { 538 $addBytes = 1; 539 if (ord($str[$count]) & 0x20 ) { 540 $addBytes++; 541 542 // Never used. Java2 doesn't encode strings in four bytes 543 // and we dont't support non-BMP characters 544 if (ord($str[$count]) & 0x10 ) { 545 $addBytes++; 546 } 547 } 548 $chars -= $addBytes; 549 550 if (ord($str[$count]) == 0 ) { 551 $containNullChars = true; 552 } 553 $count += $addBytes; 554 } 555 } 556 557 if ($chars < 0) { 558 throw new Lucene\Exception\InvalidArgumentException('Invalid UTF-8 string'); 559 } 560 561 $this->writeVInt($chars); 562 if ($containNullChars) { 563 $this->_data .= str_replace($str, "\x00", "\xC0\x80"); 564 565 } else { 566 $this->_data .= $str; 567 } 568 569 $this->_position = strlen($this->_data); 570 } 571 572 573 /** 574 * Reads binary data from the current position in the file 575 * and advances the file pointer. 576 * 577 * @return string 578 */ 579 public function readBinary() 580 { 581 $length = $this->readVInt(); 582 $returnValue = substr($this->_data, $this->_position, $length); 583 $this->_position += $length; 584 return $returnValue; 585 } 586} 587