1<?php
2/**
3 * Zend Framework (http://framework.zend.com/)
4 *
5 * @link      http://github.com/zendframework/zf2 for the canonical source repository
6 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
7 * @license   http://framework.zend.com/license/new-bsd New BSD License
8 * @package   Zend_Search
9 */
10
11namespace ZendSearch\Lucene\Document;
12
13/**
14 * A field is a section of a Document.  Each field has two parts,
15 * a name and a value. Values may be free text or they may be atomic
16 * keywords, which are not further processed. Such keywords may
17 * be used to represent dates, urls, etc.  Fields are optionally
18 * stored in the index, so that they may be returned with hits
19 * on the document.
20 *
21 * @category   Zend
22 * @package    Zend_Search_Lucene
23 * @subpackage Document
24 */
25class Field
26{
27    /**
28     * Field name
29     *
30     * @var string
31     */
32    public $name;
33
34    /**
35     * Field value
36     *
37     * @var boolean
38     */
39    public $value;
40
41    /**
42     * Field is to be stored in the index for return with search hits.
43     *
44     * @var boolean
45     */
46    public $isStored    = false;
47
48    /**
49     * Field is to be indexed, so that it may be searched on.
50     *
51     * @var boolean
52     */
53    public $isIndexed   = true;
54
55    /**
56     * Field should be tokenized as text prior to indexing.
57     *
58     * @var boolean
59     */
60    public $isTokenized = true;
61    /**
62     * Field is stored as binary.
63     *
64     * @var boolean
65     */
66    public $isBinary    = false;
67
68    /**
69     * Field are stored as a term vector
70     *
71     * @var boolean
72     */
73    public $storeTermVector = false;
74
75    /**
76     * Field boost factor
77     * It's not stored directly in the index, but affects on normalization factor
78     *
79     * @var float
80     */
81    public $boost = 1.0;
82
83    /**
84     * Field value encoding.
85     *
86     * @var string
87     */
88    public $encoding;
89
90    /**
91     * Object constructor
92     *
93     * @param string $name
94     * @param string $value
95     * @param string $encoding
96     * @param boolean $isStored
97     * @param boolean $isIndexed
98     * @param boolean $isTokenized
99     * @param boolean $isBinary
100     */
101    public function __construct($name, $value, $encoding, $isStored, $isIndexed, $isTokenized, $isBinary = false)
102    {
103        $this->name  = $name;
104        $this->value = $value;
105
106        if (!$isBinary) {
107            $this->encoding    = $encoding;
108            $this->isTokenized = $isTokenized;
109        } else {
110            $this->encoding    = '';
111            $this->isTokenized = false;
112        }
113
114        $this->isStored  = $isStored;
115        $this->isIndexed = $isIndexed;
116        $this->isBinary  = $isBinary;
117
118        $this->storeTermVector = false;
119        $this->boost           = 1.0;
120    }
121
122
123    /**
124     * Constructs a String-valued Field that is not tokenized, but is indexed
125     * and stored.  Useful for non-text fields, e.g. date or url.
126     *
127     * @param string $name
128     * @param string $value
129     * @param string $encoding
130     * @return \ZendSearch\Lucene\Document\Field
131     */
132    public static function keyword($name, $value, $encoding = 'UTF-8')
133    {
134        return new self($name, $value, $encoding, true, true, false);
135    }
136
137
138    /**
139     * Constructs a String-valued Field that is not tokenized nor indexed,
140     * but is stored in the index, for return with hits.
141     *
142     * @param string $name
143     * @param string $value
144     * @param string $encoding
145     * @return \ZendSearch\Lucene\Document\Field
146     */
147    public static function unIndexed($name, $value, $encoding = 'UTF-8')
148    {
149        return new self($name, $value, $encoding, true, false, false);
150    }
151
152
153    /**
154     * Constructs a Binary String valued Field that is not tokenized nor indexed,
155     * but is stored in the index, for return with hits.
156     *
157     * @param string $name
158     * @param string $value
159     * @param string $encoding
160     * @return \ZendSearch\Lucene\Document\Field
161     */
162    public static function binary($name, $value)
163    {
164        return new self($name, $value, '', true, false, false, true);
165    }
166
167    /**
168     * Constructs a String-valued Field that is tokenized and indexed,
169     * and is stored in the index, for return with hits.  Useful for short text
170     * fields, like "title" or "subject". Term vector will not be stored for this field.
171     *
172     * @param string $name
173     * @param string $value
174     * @param string $encoding
175     * @return \ZendSearch\Lucene\Document\Field
176     */
177    public static function text($name, $value, $encoding = 'UTF-8')
178    {
179        return new self($name, $value, $encoding, true, true, true);
180    }
181
182
183    /**
184     * Constructs a String-valued Field that is tokenized and indexed,
185     * but that is not stored in the index.
186     *
187     * @param string $name
188     * @param string $value
189     * @param string $encoding
190     * @return \ZendSearch\Lucene\Document\Field
191     */
192    public static function unStored($name, $value, $encoding = 'UTF-8')
193    {
194        return new self($name, $value, $encoding, false, true, true);
195    }
196
197    /**
198     * Get field value in UTF-8 encoding
199     *
200     * @return string
201     */
202    public function getUtf8Value()
203    {
204        if (strcasecmp($this->encoding, 'utf8' ) == 0  ||
205            strcasecmp($this->encoding, 'utf-8') == 0 ) {
206                return $this->value;
207        } else {
208
209            return (PHP_OS != 'AIX') ? iconv($this->encoding, 'UTF-8', $this->value) : iconv('ISO8859-1', 'UTF-8', $this->value);
210        }
211    }
212}
213