1<?php 2/** 3 * Zend Framework (http://framework.zend.com/) 4 * 5 * @link http://github.com/zendframework/zf2 for the canonical source repository 6 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com) 7 * @license http://framework.zend.com/license/new-bsd New BSD License 8 * @package Zend_Search 9 */ 10 11namespace ZendSearch\Lucene\Document; 12 13/** 14 * A field is a section of a Document. Each field has two parts, 15 * a name and a value. Values may be free text or they may be atomic 16 * keywords, which are not further processed. Such keywords may 17 * be used to represent dates, urls, etc. Fields are optionally 18 * stored in the index, so that they may be returned with hits 19 * on the document. 20 * 21 * @category Zend 22 * @package Zend_Search_Lucene 23 * @subpackage Document 24 */ 25class Field 26{ 27 /** 28 * Field name 29 * 30 * @var string 31 */ 32 public $name; 33 34 /** 35 * Field value 36 * 37 * @var boolean 38 */ 39 public $value; 40 41 /** 42 * Field is to be stored in the index for return with search hits. 43 * 44 * @var boolean 45 */ 46 public $isStored = false; 47 48 /** 49 * Field is to be indexed, so that it may be searched on. 50 * 51 * @var boolean 52 */ 53 public $isIndexed = true; 54 55 /** 56 * Field should be tokenized as text prior to indexing. 57 * 58 * @var boolean 59 */ 60 public $isTokenized = true; 61 /** 62 * Field is stored as binary. 63 * 64 * @var boolean 65 */ 66 public $isBinary = false; 67 68 /** 69 * Field are stored as a term vector 70 * 71 * @var boolean 72 */ 73 public $storeTermVector = false; 74 75 /** 76 * Field boost factor 77 * It's not stored directly in the index, but affects on normalization factor 78 * 79 * @var float 80 */ 81 public $boost = 1.0; 82 83 /** 84 * Field value encoding. 85 * 86 * @var string 87 */ 88 public $encoding; 89 90 /** 91 * Object constructor 92 * 93 * @param string $name 94 * @param string $value 95 * @param string $encoding 96 * @param boolean $isStored 97 * @param boolean $isIndexed 98 * @param boolean $isTokenized 99 * @param boolean $isBinary 100 */ 101 public function __construct($name, $value, $encoding, $isStored, $isIndexed, $isTokenized, $isBinary = false) 102 { 103 $this->name = $name; 104 $this->value = $value; 105 106 if (!$isBinary) { 107 $this->encoding = $encoding; 108 $this->isTokenized = $isTokenized; 109 } else { 110 $this->encoding = ''; 111 $this->isTokenized = false; 112 } 113 114 $this->isStored = $isStored; 115 $this->isIndexed = $isIndexed; 116 $this->isBinary = $isBinary; 117 118 $this->storeTermVector = false; 119 $this->boost = 1.0; 120 } 121 122 123 /** 124 * Constructs a String-valued Field that is not tokenized, but is indexed 125 * and stored. Useful for non-text fields, e.g. date or url. 126 * 127 * @param string $name 128 * @param string $value 129 * @param string $encoding 130 * @return \ZendSearch\Lucene\Document\Field 131 */ 132 public static function keyword($name, $value, $encoding = 'UTF-8') 133 { 134 return new self($name, $value, $encoding, true, true, false); 135 } 136 137 138 /** 139 * Constructs a String-valued Field that is not tokenized nor indexed, 140 * but is stored in the index, for return with hits. 141 * 142 * @param string $name 143 * @param string $value 144 * @param string $encoding 145 * @return \ZendSearch\Lucene\Document\Field 146 */ 147 public static function unIndexed($name, $value, $encoding = 'UTF-8') 148 { 149 return new self($name, $value, $encoding, true, false, false); 150 } 151 152 153 /** 154 * Constructs a Binary String valued Field that is not tokenized nor indexed, 155 * but is stored in the index, for return with hits. 156 * 157 * @param string $name 158 * @param string $value 159 * @param string $encoding 160 * @return \ZendSearch\Lucene\Document\Field 161 */ 162 public static function binary($name, $value) 163 { 164 return new self($name, $value, '', true, false, false, true); 165 } 166 167 /** 168 * Constructs a String-valued Field that is tokenized and indexed, 169 * and is stored in the index, for return with hits. Useful for short text 170 * fields, like "title" or "subject". Term vector will not be stored for this field. 171 * 172 * @param string $name 173 * @param string $value 174 * @param string $encoding 175 * @return \ZendSearch\Lucene\Document\Field 176 */ 177 public static function text($name, $value, $encoding = 'UTF-8') 178 { 179 return new self($name, $value, $encoding, true, true, true); 180 } 181 182 183 /** 184 * Constructs a String-valued Field that is tokenized and indexed, 185 * but that is not stored in the index. 186 * 187 * @param string $name 188 * @param string $value 189 * @param string $encoding 190 * @return \ZendSearch\Lucene\Document\Field 191 */ 192 public static function unStored($name, $value, $encoding = 'UTF-8') 193 { 194 return new self($name, $value, $encoding, false, true, true); 195 } 196 197 /** 198 * Get field value in UTF-8 encoding 199 * 200 * @return string 201 */ 202 public function getUtf8Value() 203 { 204 if (strcasecmp($this->encoding, 'utf8' ) == 0 || 205 strcasecmp($this->encoding, 'utf-8') == 0 ) { 206 return $this->value; 207 } else { 208 209 return (PHP_OS != 'AIX') ? iconv($this->encoding, 'UTF-8', $this->value) : iconv('ISO8859-1', 'UTF-8', $this->value); 210 } 211 } 212} 213