1<?php
2/**
3 * Zend Framework (http://framework.zend.com/)
4 *
5 * @link      http://github.com/zendframework/zf2 for the canonical source repository
6 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
7 * @license   http://framework.zend.com/license/new-bsd New BSD License
8 * @package   Zend_Search
9 */
10
11namespace ZendSearch\Lucene\Storage\File;
12
13use ZendSearch\Lucene;
14
15/**
16 * @category   Zend
17 * @package    Zend_Search_Lucene
18 * @subpackage Storage
19 */
20class Memory extends AbstractFile
21{
22    /**
23     * FileData
24     *
25     * @var string
26     */
27    private $_data;
28
29    /**
30     * File Position
31     *
32     * @var integer
33     */
34    private $_position = 0;
35
36
37    /**
38     * Object constractor
39     *
40     * @param string $data
41     */
42    public function __construct($data)
43    {
44        $this->_data = $data;
45    }
46
47    /**
48     * Reads $length number of bytes at the current position in the
49     * file and advances the file pointer.
50     *
51     * @param integer $length
52     * @return string
53     */
54    protected function _fread($length = 1)
55    {
56        $returnValue = substr($this->_data, $this->_position, $length);
57        $this->_position += $length;
58        return $returnValue;
59    }
60
61
62    /**
63     * Sets the file position indicator and advances the file pointer.
64     * The new position, measured in bytes from the beginning of the file,
65     * is obtained by adding offset to the position specified by whence,
66     * whose values are defined as follows:
67     * SEEK_SET - Set position equal to offset bytes.
68     * SEEK_CUR - Set position to current location plus offset.
69     * SEEK_END - Set position to end-of-file plus offset. (To move to
70     * a position before the end-of-file, you need to pass a negative value
71     * in offset.)
72     * Upon success, returns 0; otherwise, returns -1
73     *
74     * @param integer $offset
75     * @param integer $whence
76     * @return integer
77     */
78    public function seek($offset, $whence=SEEK_SET)
79    {
80        switch ($whence) {
81            case SEEK_SET:
82                $this->_position = $offset;
83                break;
84
85            case SEEK_CUR:
86                $this->_position += $offset;
87                break;
88
89            case SEEK_END:
90                $this->_position = strlen($this->_data);
91                $this->_position += $offset;
92                break;
93
94            default:
95                break;
96        }
97    }
98
99    /**
100     * Get file position.
101     *
102     * @return integer
103     */
104    public function tell()
105    {
106        return $this->_position;
107    }
108
109    /**
110     * Flush output.
111     *
112     * Returns true on success or false on failure.
113     *
114     * @return boolean
115     */
116    public function flush()
117    {
118        // Do nothing
119
120        return true;
121    }
122
123    /**
124     * Writes $length number of bytes (all, if $length===null) to the end
125     * of the file.
126     *
127     * @param string $data
128     * @param integer $length
129     */
130    protected function _fwrite($data, $length=null)
131    {
132        // We do not need to check if file position points to the end of "file".
133        // Only append operation is supported now
134
135        if ($length !== null) {
136            $this->_data .= substr($data, 0, $length);
137        } else {
138            $this->_data .= $data;
139        }
140
141        $this->_position = strlen($this->_data);
142    }
143
144    /**
145     * Lock file
146     *
147     * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
148     *
149     * @param integer $lockType
150     * @return boolean
151     */
152    public function lock($lockType, $nonBlockinLock = false)
153    {
154        // Memory files can't be shared
155        // do nothing
156
157        return true;
158    }
159
160    /**
161     * Unlock file
162     */
163    public function unlock()
164    {
165        // Memory files can't be shared
166        // do nothing
167    }
168
169    /**
170     * Reads a byte from the current position in the file
171     * and advances the file pointer.
172     *
173     * @return integer
174     */
175    public function readByte()
176    {
177        return ord($this->_data[$this->_position++]);
178    }
179
180    /**
181     * Writes a byte to the end of the file.
182     *
183     * @param integer $byte
184     */
185    public function writeByte($byte)
186    {
187        // We do not need to check if file position points to the end of "file".
188        // Only append operation is supported now
189
190        $this->_data .= chr($byte);
191        $this->_position = strlen($this->_data);
192
193        return 1;
194    }
195
196    /**
197     * Read num bytes from the current position in the file
198     * and advances the file pointer.
199     *
200     * @param integer $num
201     * @return string
202     */
203    public function readBytes($num)
204    {
205        $returnValue = substr($this->_data, $this->_position, $num);
206        $this->_position += $num;
207
208        return $returnValue;
209    }
210
211    /**
212     * Writes num bytes of data (all, if $num===null) to the end
213     * of the string.
214     *
215     * @param string $data
216     * @param integer $num
217     */
218    public function writeBytes($data, $num=null)
219    {
220        // We do not need to check if file position points to the end of "file".
221        // Only append operation is supported now
222
223        if ($num !== null) {
224            $this->_data .= substr($data, 0, $num);
225        } else {
226            $this->_data .= $data;
227        }
228
229        $this->_position = strlen($this->_data);
230    }
231
232
233    /**
234     * Reads an integer from the current position in the file
235     * and advances the file pointer.
236     *
237     * @return integer
238     */
239    public function readInt()
240    {
241        $str = substr($this->_data, $this->_position, 4);
242        $this->_position += 4;
243
244        return  ord($str[0]) << 24 |
245                ord($str[1]) << 16 |
246                ord($str[2]) << 8  |
247                ord($str[3]);
248    }
249
250
251    /**
252     * Writes an integer to the end of file.
253     *
254     * @param integer $value
255     */
256    public function writeInt($value)
257    {
258        // We do not need to check if file position points to the end of "file".
259        // Only append operation is supported now
260
261        settype($value, 'integer');
262        $this->_data .= chr($value>>24 & 0xFF) .
263                        chr($value>>16 & 0xFF) .
264                        chr($value>>8  & 0xFF) .
265                        chr($value     & 0xFF);
266
267        $this->_position = strlen($this->_data);
268    }
269
270
271    /**
272     * Returns a long integer from the current position in the file
273     * and advances the file pointer.
274     *
275     * @return integer
276     */
277    public function readLong()
278    {
279        /**
280         * Check, that we work in 64-bit mode.
281         * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
282         */
283        if (PHP_INT_SIZE > 4) {
284            $str = substr($this->_data, $this->_position, 8);
285            $this->_position += 8;
286
287            return  ord($str[0]) << 56  |
288                    ord($str[1]) << 48  |
289                    ord($str[2]) << 40  |
290                    ord($str[3]) << 32  |
291                    ord($str[4]) << 24  |
292                    ord($str[5]) << 16  |
293                    ord($str[6]) << 8   |
294                    ord($str[7]);
295        } else {
296            return $this->_readLong32Bit();
297        }
298    }
299
300    /**
301     * Writes long integer to the end of file
302     *
303     * @param integer $value
304     */
305    public function writeLong($value)
306    {
307        // We do not need to check if file position points to the end of "file".
308        // Only append operation is supported now
309
310        /**
311         * Check, that we work in 64-bit mode.
312         * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
313         */
314        if (PHP_INT_SIZE > 4) {
315            settype($value, 'integer');
316            $this->_data .= chr($value>>56 & 0xFF) .
317                            chr($value>>48 & 0xFF) .
318                            chr($value>>40 & 0xFF) .
319                            chr($value>>32 & 0xFF) .
320                            chr($value>>24 & 0xFF) .
321                            chr($value>>16 & 0xFF) .
322                            chr($value>>8  & 0xFF) .
323                            chr($value     & 0xFF);
324        } else {
325            $this->_writeLong32Bit($value);
326        }
327
328        $this->_position = strlen($this->_data);
329    }
330
331
332    /**
333     * Returns a long integer from the current position in the file,
334     * advances the file pointer and return it as float (for 32-bit platforms).
335     *
336     * @throws \ZendSearch\Lucene\Exception\RuntimeException
337     * @return integer|float
338     */
339    protected function _readLong32Bit()
340    {
341        $wordHigh = $this->readInt();
342        $wordLow  = $this->readInt();
343
344        if ($wordHigh & (int)0x80000000) {
345            // It's a negative value since the highest bit is set
346            if ($wordHigh == (int)0xFFFFFFFF  &&  ($wordLow & (int)0x80000000)) {
347                return $wordLow;
348            } else {
349                throw new Lucene\Exception\RuntimeException(
350                    'Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'
351                );
352            }
353
354        }
355
356        if ($wordLow < 0) {
357            // Value is large than 0x7FFF FFFF. Represent low word as float.
358            $wordLow &= 0x7FFFFFFF;
359            $wordLow += (float)0x80000000;
360        }
361
362        if ($wordHigh == 0) {
363            // Return value as integer if possible
364            return $wordLow;
365        }
366
367        return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
368    }
369
370
371    /**
372     * Writes long integer to the end of file (32-bit platforms implementation)
373     *
374     * @param integer|float $value
375     * @throws \ZendSearch\Lucene\Exception\RuntimeException
376     */
377    protected function _writeLong32Bit($value)
378    {
379        if ($value < (int)0x80000000) {
380            throw new Lucene\Exception\RuntimeException(
381                'Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'
382            );
383        }
384
385        if ($value < 0) {
386            $wordHigh = (int)0xFFFFFFFF;
387            $wordLow  = (int)$value;
388        } else {
389            $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
390            $wordLow  = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
391
392            if ($wordLow > 0x7FFFFFFF) {
393                // Highest bit of low word is set. Translate it to the corresponding negative integer value
394                $wordLow -= 0x80000000;
395                $wordLow |= 0x80000000;
396            }
397        }
398
399        $this->writeInt($wordHigh);
400        $this->writeInt($wordLow);
401    }
402
403    /**
404     * Returns a variable-length integer from the current
405     * position in the file and advances the file pointer.
406     *
407     * @return integer
408     */
409    public function readVInt()
410    {
411        $nextByte = ord($this->_data[$this->_position++]);
412        $val = $nextByte & 0x7F;
413
414        for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
415            $nextByte = ord($this->_data[$this->_position++]);
416            $val |= ($nextByte & 0x7F) << $shift;
417        }
418        return $val;
419    }
420
421    /**
422     * Writes a variable-length integer to the end of file.
423     *
424     * @param integer $value
425     */
426    public function writeVInt($value)
427    {
428        // We do not need to check if file position points to the end of "file".
429        // Only append operation is supported now
430
431        settype($value, 'integer');
432        while ($value > 0x7F) {
433            $this->_data .= chr( ($value & 0x7F)|0x80 );
434            $value >>= 7;
435        }
436        $this->_data .= chr($value);
437
438        $this->_position = strlen($this->_data);
439    }
440
441
442    /**
443     * Reads a string from the current position in the file
444     * and advances the file pointer.
445     *
446     * @return string
447     */
448    public function readString()
449    {
450        $strlen = $this->readVInt();
451        if ($strlen == 0) {
452            return '';
453        } else {
454            /**
455             * This implementation supports only Basic Multilingual Plane
456             * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
457             * "supplementary characters" (characters whose code points are
458             * greater than 0xFFFF)
459             * Java 2 represents these characters as a pair of char (16-bit)
460             * values, the first from the high-surrogates range (0xD800-0xDBFF),
461             * the second from the low-surrogates range (0xDC00-0xDFFF). Then
462             * they are encoded as usual UTF-8 characters in six bytes.
463             * Standard UTF-8 representation uses four bytes for supplementary
464             * characters.
465             */
466
467            $str_val = substr($this->_data, $this->_position, $strlen);
468            $this->_position += $strlen;
469
470            for ($count = 0; $count < $strlen; $count++ ) {
471                if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
472                    $addBytes = 1;
473                    if (ord($str_val[$count]) & 0x20 ) {
474                        $addBytes++;
475
476                        // Never used. Java2 doesn't encode strings in four bytes
477                        if (ord($str_val[$count]) & 0x10 ) {
478                            $addBytes++;
479                        }
480                    }
481                    $str_val .= substr($this->_data, $this->_position, $addBytes);
482                    $this->_position += $addBytes;
483                    $strlen          += $addBytes;
484
485                    // Check for null character. Java2 encodes null character
486                    // in two bytes.
487                    if (ord($str_val[$count])   == 0xC0 &&
488                        ord($str_val[$count+1]) == 0x80   ) {
489                        $str_val[$count] = 0;
490                        $str_val = substr($str_val,0,$count+1)
491                                 . substr($str_val,$count+2);
492                    }
493                    $count += $addBytes;
494                }
495            }
496
497            return $str_val;
498        }
499    }
500
501    /**
502     * Writes a string to the end of file.
503     *
504     * @param string $str
505     * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException
506     */
507    public function writeString($str)
508    {
509        /**
510         * This implementation supports only Basic Multilingual Plane
511         * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
512         * "supplementary characters" (characters whose code points are
513         * greater than 0xFFFF)
514         * Java 2 represents these characters as a pair of char (16-bit)
515         * values, the first from the high-surrogates range (0xD800-0xDBFF),
516         * the second from the low-surrogates range (0xDC00-0xDFFF). Then
517         * they are encoded as usual UTF-8 characters in six bytes.
518         * Standard UTF-8 representation uses four bytes for supplementary
519         * characters.
520         */
521
522        // We do not need to check if file position points to the end of "file".
523        // Only append operation is supported now
524
525        // convert input to a string before iterating string characters
526        settype($str, 'string');
527
528        $chars = $strlen = strlen($str);
529        $containNullChars = false;
530
531        for ($count = 0; $count < $strlen; $count++ ) {
532            /**
533             * String is already in Java 2 representation.
534             * We should only calculate actual string length and replace
535             * \x00 by \xC0\x80
536             */
537            if ((ord($str[$count]) & 0xC0) == 0xC0) {
538                $addBytes = 1;
539                if (ord($str[$count]) & 0x20 ) {
540                    $addBytes++;
541
542                    // Never used. Java2 doesn't encode strings in four bytes
543                    // and we dont't support non-BMP characters
544                    if (ord($str[$count]) & 0x10 ) {
545                        $addBytes++;
546                    }
547                }
548                $chars -= $addBytes;
549
550                if (ord($str[$count]) == 0 ) {
551                    $containNullChars = true;
552                }
553                $count += $addBytes;
554            }
555        }
556
557        if ($chars < 0) {
558            throw new Lucene\Exception\InvalidArgumentException('Invalid UTF-8 string');
559        }
560
561        $this->writeVInt($chars);
562        if ($containNullChars) {
563            $this->_data .= str_replace($str, "\x00", "\xC0\x80");
564
565        } else {
566            $this->_data .= $str;
567        }
568
569        $this->_position = strlen($this->_data);
570    }
571
572
573    /**
574     * Reads binary data from the current position in the file
575     * and advances the file pointer.
576     *
577     * @return string
578     */
579    public function readBinary()
580    {
581        $length = $this->readVInt();
582        $returnValue = substr($this->_data, $this->_position, $length);
583        $this->_position += $length;
584        return $returnValue;
585    }
586}
587