1<?php 2 3// 4// $Id$ 5// 6 7// 8// Copyright (c) 2001-2012, Andrew Aksyonoff 9// Copyright (c) 2008-2012, Sphinx Technologies Inc 10// All rights reserved 11// 12// This program is free software; you can redistribute it and/or modify 13// it under the terms of the GNU General Public License. You should have 14// received a copy of the GPL license along with this program; if you 15// did not, you can find it at http://www.gnu.org/ 16// 17 18///////////////////////////////////////////////////////////////////////////// 19// PHP version of Sphinx searchd client (PHP API) 20///////////////////////////////////////////////////////////////////////////// 21 22/// known searchd commands 23define ( "SEARCHD_COMMAND_SEARCH", 0 ); 24define ( "SEARCHD_COMMAND_EXCERPT", 1 ); 25define ( "SEARCHD_COMMAND_UPDATE", 2 ); 26define ( "SEARCHD_COMMAND_KEYWORDS",3 ); 27 28/// current client-side command implementation versions 29define ( "VER_COMMAND_SEARCH", 0x116 ); 30define ( "VER_COMMAND_EXCERPT", 0x100 ); 31define ( "VER_COMMAND_UPDATE", 0x102 ); 32define ( "VER_COMMAND_KEYWORDS", 0x100 ); 33 34/// known searchd status codes 35define ( "SEARCHD_OK", 0 ); 36define ( "SEARCHD_ERROR", 1 ); 37define ( "SEARCHD_RETRY", 2 ); 38define ( "SEARCHD_WARNING", 3 ); 39 40/// known match modes 41define ( "SPH_MATCH_ALL", 0 ); 42define ( "SPH_MATCH_ANY", 1 ); 43define ( "SPH_MATCH_PHRASE", 2 ); 44define ( "SPH_MATCH_BOOLEAN", 3 ); 45define ( "SPH_MATCH_EXTENDED", 4 ); 46define ( "SPH_MATCH_FULLSCAN", 5 ); 47define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED) 48 49/// known ranking modes (ext2 only) 50define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one 51define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality) 52define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1 53define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts 54define ( "SPH_RANK_PROXIMITY", 4 ); 55 56/// known sort modes 57define ( "SPH_SORT_RELEVANCE", 0 ); 58define ( "SPH_SORT_ATTR_DESC", 1 ); 59define ( "SPH_SORT_ATTR_ASC", 2 ); 60define ( "SPH_SORT_TIME_SEGMENTS", 3 ); 61define ( "SPH_SORT_EXTENDED", 4 ); 62define ( "SPH_SORT_EXPR", 5 ); 63 64/// known filter types 65define ( "SPH_FILTER_VALUES", 0 ); 66define ( "SPH_FILTER_RANGE", 1 ); 67define ( "SPH_FILTER_FLOATRANGE", 2 ); 68 69/// known attribute types 70define ( "SPH_ATTR_INTEGER", 1 ); 71define ( "SPH_ATTR_TIMESTAMP", 2 ); 72define ( "SPH_ATTR_ORDINAL", 3 ); 73define ( "SPH_ATTR_BOOL", 4 ); 74define ( "SPH_ATTR_FLOAT", 5 ); 75define ( "SPH_ATTR_BIGINT", 6 ); 76define ( "SPH_ATTR_MULTI", 0x40000000 ); 77 78/// known grouping functions 79define ( "SPH_GROUPBY_DAY", 0 ); 80define ( "SPH_GROUPBY_WEEK", 1 ); 81define ( "SPH_GROUPBY_MONTH", 2 ); 82define ( "SPH_GROUPBY_YEAR", 3 ); 83define ( "SPH_GROUPBY_ATTR", 4 ); 84define ( "SPH_GROUPBY_ATTRPAIR", 5 ); 85 86 87/// portably pack numeric to 64 unsigned bits, network order 88function sphPack64 ( $v ) 89{ 90 assert ( is_numeric($v) ); 91 92 // x64 route 93 if ( PHP_INT_SIZE>=8 ) 94 { 95 $i = (int)$v; 96 return pack ( "NN", $i>>32, $i&((1<<32)-1) ); 97 } 98 99 // x32 route, bcmath 100 $x = "4294967296"; 101 if ( function_exists("bcmul") ) 102 { 103 $h = bcdiv ( $v, $x, 0 ); 104 $l = bcmod ( $v, $x ); 105 if ( $v<0 ) 106 { 107 $h = -1+(float)$h; 108 $l = $l+(float)$x; 109 } 110 return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit 111 } 112 113 // x32 route, 15 or less decimal digits 114 // we can use float, because its actually double and has 52 precision bits 115 if ( strlen($v)<=15 ) 116 { 117 $f = (float)$v; 118 $h = (int)($f/$x); 119 $l = $f-$x*(float)$h; 120 if ( $v<0 ) 121 { 122 $h = -1+(float)$h; 123 $l = $l+(float)$x; 124 } 125 return pack ( "NN", $h, $l ); 126 } 127 128 // x32 route, 16 or more decimal digits 129 // well, let me know if you *really* need this 130 die ( "INTERNAL ERROR: packing more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" ); 131} 132 133/// portably unpack 64 signed bits, network order to numeric 134function sphUnpack64 ( $v ) 135{ 136 list($h,$l) = array_values ( unpack ( "N*N*", $v ) ); 137 138 // x64 route 139 if ( PHP_INT_SIZE>=8 ) 140 { 141 if ( $h<0 ) $h += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again 142 if ( $l<0 ) $l += (1<<32); 143 return ($h<<32) + $l; 144 } 145 146 // x32 route 147 $x = "4294967296"; 148 $y = 0; 149 $p = ""; 150 if ( $h<0 ) 151 { 152 $h = ~$h; 153 $l = ~$l; 154 $y = 1; 155 $p = "-"; 156 } 157 $h = sprintf ( "%u", $h ); 158 $l = sprintf ( "%u", $l ); 159 160 // bcmath 161 if ( function_exists("bcmul") ) 162 return $p . bcadd ( bcadd ( $l, bcmul ( $x, $h ) ), $y ); 163 164 // no bcmath, 15 or less decimal digits 165 // we can use float, because its actually double and has 52 precision bits 166 if ( $h<1048576 ) 167 { 168 $f = ((float)$h)*$x + (float)$l + (float)$y; 169 return $p . sprintf ( "%.0f", $f ); // builtin conversion is only about 39-40 bits precise! 170 } 171 172 // x32 route, 16 or more decimal digits 173 // well, let me know if you *really* need this 174 die ( "INTERNAL ERROR: unpacking more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" ); 175} 176 177 178/// sphinx searchd client class 179class SphinxClient 180{ 181 var $_host; ///< searchd host (default is "localhost") 182 var $_port; ///< searchd port (default is 9312) 183 var $_offset; ///< how many records to seek from result-set start (default is 0) 184 var $_limit; ///< how many records to return from result-set starting at offset (default is 20) 185 var $_mode; ///< query matching mode (default is SPH_MATCH_ALL) 186 var $_weights; ///< per-field weights (default is 1 for all fields) 187 var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE) 188 var $_sortby; ///< attribute to sort by (defualt is "") 189 var $_min_id; ///< min ID to match (default is 0, which means no limit) 190 var $_max_id; ///< max ID to match (default is 0, which means no limit) 191 var $_filters; ///< search filters 192 var $_groupby; ///< group-by attribute name 193 var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with) 194 var $_groupsort; ///< group-by sorting clause (to sort groups in result set with) 195 var $_groupdistinct;///< group-by count-distinct attribute 196 var $_maxmatches; ///< max matches to retrieve 197 var $_cutoff; ///< cutoff to stop searching at (default is 0) 198 var $_retrycount; ///< distributed retries count 199 var $_retrydelay; ///< distributed retries delay 200 var $_anchor; ///< geographical anchor point 201 var $_indexweights; ///< per-index weights 202 var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25) 203 var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit) 204 var $_fieldweights; ///< per-field-name weights 205 var $_overrides; ///< per-query attribute values overrides 206 var $_select; ///< select-list (attributes or expressions, with optional aliases) 207 208 var $_error; ///< last error message 209 var $_warning; ///< last warning message 210 211 var $_reqs; ///< requests array for multi-query 212 var $_mbenc; ///< stored mbstring encoding 213 var $_arrayresult; ///< whether $result["matches"] should be a hash or an array 214 215 ///////////////////////////////////////////////////////////////////////////// 216 // common stuff 217 ///////////////////////////////////////////////////////////////////////////// 218 219 /// create a new client object and fill defaults 220 function SphinxClient () 221 { 222 // per-client-object settings 223 $this->_host = "localhost"; 224 $this->_port = 9312; 225 226 // per-query settings 227 $this->_offset = 0; 228 $this->_limit = 20; 229 $this->_mode = SPH_MATCH_ALL; 230 $this->_weights = array (); 231 $this->_sort = SPH_SORT_RELEVANCE; 232 $this->_sortby = ""; 233 $this->_min_id = 0; 234 $this->_max_id = 0; 235 $this->_filters = array (); 236 $this->_groupby = ""; 237 $this->_groupfunc = SPH_GROUPBY_DAY; 238 $this->_groupsort = "@group desc"; 239 $this->_groupdistinct= ""; 240 $this->_maxmatches = 1000; 241 $this->_cutoff = 0; 242 $this->_retrycount = 0; 243 $this->_retrydelay = 0; 244 $this->_anchor = array (); 245 $this->_indexweights= array (); 246 $this->_ranker = SPH_RANK_PROXIMITY_BM25; 247 $this->_maxquerytime= 0; 248 $this->_fieldweights= array(); 249 $this->_overrides = array(); 250 $this->_select = "*"; 251 252 $this->_error = ""; // per-reply fields (for single-query case) 253 $this->_warning = ""; 254 $this->_reqs = array (); // requests storage (for multi-query case) 255 $this->_mbenc = ""; 256 $this->_arrayresult = false; 257 } 258 259 /// get last error message (string) 260 function GetLastError () 261 { 262 return $this->_error; 263 } 264 265 /// get last warning message (string) 266 function GetLastWarning () 267 { 268 return $this->_warning; 269 } 270 271 /// set searchd host name (string) and port (integer) 272 function SetServer ( $host, $port ) 273 { 274 assert ( is_string($host) ); 275 assert ( is_int($port) ); 276 $this->_host = $host; 277 $this->_port = $port; 278 } 279 280 ///////////////////////////////////////////////////////////////////////////// 281 282 /// enter mbstring workaround mode 283 function _MBPush () 284 { 285 $this->_mbenc = ""; 286 if ( ini_get ( "mbstring.func_overload" ) & 2 ) 287 { 288 $this->_mbenc = mb_internal_encoding(); 289 mb_internal_encoding ( "latin1" ); 290 } 291 } 292 293 /// leave mbstring workaround mode 294 function _MBPop () 295 { 296 if ( $this->_mbenc ) 297 mb_internal_encoding ( $this->_mbenc ); 298 } 299 300 /// connect to searchd server 301 function _Connect () 302 { 303 return fopen('php://stdout', 'w'); 304 } 305 306 function _OldConnect() 307 { 308 if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) ) 309 { 310 $this->_error = "connection to {$this->_host}:{$this->_port} failed"; 311 return false; 312 } 313 314 // check version 315 list(,$v) = unpack ( "N*", fread ( $fp, 4 ) ); 316 $v = (int)$v; 317 if ( $v<1 ) 318 { 319 fclose ( $fp ); 320 $this->_error = "expected searchd protocol version 1+, got version '$v'"; 321 return false; 322 } 323 324 // all ok, send my version 325 fwrite ( $fp, pack ( "N", 1 ) ); 326 return $fp; 327 } 328 329 /// get and check response packet from searchd server 330 function _GetResponse ( $fp, $client_ver ) 331 { 332 return false; 333 } 334 335 function _OldGetResponse ( $fp, $client_ver ) 336 { 337 $response = ""; 338 $len = 0; 339 340 $header = fread ( $fp, 8 ); 341 if ( strlen($header)==8 ) 342 { 343 list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) ); 344 $left = $len; 345 while ( $left>0 && !feof($fp) ) 346 { 347 $chunk = fread ( $fp, $left ); 348 if ( $chunk ) 349 { 350 $response .= $chunk; 351 $left -= strlen($chunk); 352 } 353 } 354 } 355 fclose ( $fp ); 356 357 // check response 358 $read = strlen ( $response ); 359 if ( !$response || $read!=$len ) 360 { 361 $this->_error = $len 362 ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)" 363 : "received zero-sized searchd response"; 364 return false; 365 } 366 367 // check status 368 if ( $status==SEARCHD_WARNING ) 369 { 370 list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) ); 371 $this->_warning = substr ( $response, 4, $wlen ); 372 return substr ( $response, 4+$wlen ); 373 } 374 if ( $status==SEARCHD_ERROR ) 375 { 376 $this->_error = "searchd error: " . substr ( $response, 4 ); 377 return false; 378 } 379 if ( $status==SEARCHD_RETRY ) 380 { 381 $this->_error = "temporary searchd error: " . substr ( $response, 4 ); 382 return false; 383 } 384 if ( $status!=SEARCHD_OK ) 385 { 386 $this->_error = "unknown status code '$status'"; 387 return false; 388 } 389 390 // check version 391 if ( $ver<$client_ver ) 392 { 393 $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work", 394 $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff ); 395 } 396 397 return $response; 398 } 399 400 ///////////////////////////////////////////////////////////////////////////// 401 // searching 402 ///////////////////////////////////////////////////////////////////////////// 403 404 /// set offset and count into result set, 405 /// and optionally set max-matches and cutoff limits 406 function SetLimits ( $offset, $limit, $max=0, $cutoff=0 ) 407 { 408 assert ( is_int($offset) ); 409 assert ( is_int($limit) ); 410 assert ( $offset>=0 ); 411 assert ( $limit>0 ); 412 assert ( $max>=0 ); 413 $this->_offset = $offset; 414 $this->_limit = $limit; 415 if ( $max>0 ) 416 $this->_maxmatches = $max; 417 if ( $cutoff>0 ) 418 $this->_cutoff = $cutoff; 419 } 420 421 /// set maximum query time, in milliseconds, per-index 422 /// integer, 0 means "do not limit" 423 function SetMaxQueryTime ( $max ) 424 { 425 assert ( is_int($max) ); 426 assert ( $max>=0 ); 427 $this->_maxquerytime = $max; 428 } 429 430 /// set matching mode 431 function SetMatchMode ( $mode ) 432 { 433 assert ( $mode==SPH_MATCH_ALL 434 || $mode==SPH_MATCH_ANY 435 || $mode==SPH_MATCH_PHRASE 436 || $mode==SPH_MATCH_BOOLEAN 437 || $mode==SPH_MATCH_EXTENDED 438 || $mode==SPH_MATCH_FULLSCAN 439 || $mode==SPH_MATCH_EXTENDED2 ); 440 $this->_mode = $mode; 441 } 442 443 /// set ranking mode 444 function SetRankingMode ( $ranker ) 445 { 446 assert ( $ranker==SPH_RANK_PROXIMITY_BM25 447 || $ranker==SPH_RANK_BM25 448 || $ranker==SPH_RANK_NONE 449 || $ranker==SPH_RANK_WORDCOUNT 450 || $ranker==SPH_RANK_PROXIMITY ); 451 $this->_ranker = $ranker; 452 } 453 454 /// set matches sorting mode 455 function SetSortMode ( $mode, $sortby="" ) 456 { 457 assert ( 458 $mode==SPH_SORT_RELEVANCE || 459 $mode==SPH_SORT_ATTR_DESC || 460 $mode==SPH_SORT_ATTR_ASC || 461 $mode==SPH_SORT_TIME_SEGMENTS || 462 $mode==SPH_SORT_EXTENDED || 463 $mode==SPH_SORT_EXPR ); 464 assert ( is_string($sortby) ); 465 assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 ); 466 467 $this->_sort = $mode; 468 $this->_sortby = $sortby; 469 } 470 471 /// bind per-field weights by order 472 /// DEPRECATED; use SetFieldWeights() instead 473 function SetWeights ( $weights ) 474 { 475 assert ( is_array($weights) ); 476 foreach ( $weights as $weight ) 477 assert ( is_int($weight) ); 478 479 $this->_weights = $weights; 480 } 481 482 /// bind per-field weights by name 483 function SetFieldWeights ( $weights ) 484 { 485 assert ( is_array($weights) ); 486 foreach ( $weights as $name=>$weight ) 487 { 488 assert ( is_string($name) ); 489 assert ( is_int($weight) ); 490 } 491 $this->_fieldweights = $weights; 492 } 493 494 /// bind per-index weights by name 495 function SetIndexWeights ( $weights ) 496 { 497 assert ( is_array($weights) ); 498 foreach ( $weights as $index=>$weight ) 499 { 500 assert ( is_string($index) ); 501 assert ( is_int($weight) ); 502 } 503 $this->_indexweights = $weights; 504 } 505 506 /// set IDs range to match 507 /// only match records if document ID is beetwen $min and $max (inclusive) 508 function SetIDRange ( $min, $max ) 509 { 510 assert ( is_numeric($min) ); 511 assert ( is_numeric($max) ); 512 assert ( $min<=$max ); 513 $this->_min_id = $min; 514 $this->_max_id = $max; 515 } 516 517 /// set values set filter 518 /// only match records where $attribute value is in given set 519 function SetFilter ( $attribute, $values, $exclude=false ) 520 { 521 assert ( is_string($attribute) ); 522 assert ( is_array($values) ); 523 assert ( count($values) ); 524 525 if ( is_array($values) && count($values) ) 526 { 527 foreach ( $values as $value ) 528 assert ( is_numeric($value) ); 529 530 $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values ); 531 } 532 } 533 534 /// set range filter 535 /// only match records if $attribute value is beetwen $min and $max (inclusive) 536 function SetFilterRange ( $attribute, $min, $max, $exclude=false ) 537 { 538 assert ( is_string($attribute) ); 539 assert ( is_numeric($min) ); 540 assert ( is_numeric($max) ); 541 assert ( $min<=$max ); 542 543 $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); 544 } 545 546 /// set float range filter 547 /// only match records if $attribute value is beetwen $min and $max (inclusive) 548 function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false ) 549 { 550 assert ( is_string($attribute) ); 551 assert ( is_float($min) ); 552 assert ( is_float($max) ); 553 assert ( $min<=$max ); 554 555 $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max ); 556 } 557 558 /// setup anchor point for geosphere distance calculations 559 /// required to use @geodist in filters and sorting 560 /// latitude and longitude must be in radians 561 function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long ) 562 { 563 assert ( is_string($attrlat) ); 564 assert ( is_string($attrlong) ); 565 assert ( is_float($lat) ); 566 assert ( is_float($long) ); 567 568 $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long ); 569 } 570 571 /// set grouping attribute and function 572 function SetGroupBy ( $attribute, $func, $groupsort="@group desc" ) 573 { 574 assert ( is_string($attribute) ); 575 assert ( is_string($groupsort) ); 576 assert ( $func==SPH_GROUPBY_DAY 577 || $func==SPH_GROUPBY_WEEK 578 || $func==SPH_GROUPBY_MONTH 579 || $func==SPH_GROUPBY_YEAR 580 || $func==SPH_GROUPBY_ATTR 581 || $func==SPH_GROUPBY_ATTRPAIR ); 582 583 $this->_groupby = $attribute; 584 $this->_groupfunc = $func; 585 $this->_groupsort = $groupsort; 586 } 587 588 /// set count-distinct attribute for group-by queries 589 function SetGroupDistinct ( $attribute ) 590 { 591 assert ( is_string($attribute) ); 592 $this->_groupdistinct = $attribute; 593 } 594 595 /// set distributed retries count and delay 596 function SetRetries ( $count, $delay=0 ) 597 { 598 assert ( is_int($count) && $count>=0 ); 599 assert ( is_int($delay) && $delay>=0 ); 600 $this->_retrycount = $count; 601 $this->_retrydelay = $delay; 602 } 603 604 /// set result set format (hash or array; hash by default) 605 /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs 606 function SetArrayResult ( $arrayresult ) 607 { 608 assert ( is_bool($arrayresult) ); 609 $this->_arrayresult = $arrayresult; 610 } 611 612 /// set attribute values override 613 /// there can be only one override per attribute 614 /// $values must be a hash that maps document IDs to attribute values 615 function SetOverride ( $attrname, $attrtype, $values ) 616 { 617 assert ( is_string ( $attrname ) ); 618 assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) ); 619 assert ( is_array ( $values ) ); 620 621 $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values ); 622 } 623 624 /// set select-list (attributes or expressions), SQL-like syntax 625 function SetSelect ( $select ) 626 { 627 assert ( is_string ( $select ) ); 628 $this->_select = $select; 629 } 630 631 ////////////////////////////////////////////////////////////////////////////// 632 633 /// clear all filters (for multi-queries) 634 function ResetFilters () 635 { 636 $this->_filters = array(); 637 $this->_anchor = array(); 638 } 639 640 /// clear groupby settings (for multi-queries) 641 function ResetGroupBy () 642 { 643 $this->_groupby = ""; 644 $this->_groupfunc = SPH_GROUPBY_DAY; 645 $this->_groupsort = "@group desc"; 646 $this->_groupdistinct= ""; 647 } 648 649 /// clear all attribute value overrides (for multi-queries) 650 function ResetOverrides () 651 { 652 $this->_overrides = array (); 653 } 654 655 ////////////////////////////////////////////////////////////////////////////// 656 657 /// connect to searchd server, run given search query through given indexes, 658 /// and return the search results 659 function Query ( $query, $index="*", $comment="" ) 660 { 661 assert ( empty($this->_reqs) ); 662 663 $this->AddQuery ( $query, $index, $comment ); 664 $results = $this->RunQueries (); 665 666 if ( !is_array($results) ) 667 return false; // probably network error; error message should be already filled 668 669 $this->_error = $results[0]["error"]; 670 $this->_warning = $results[0]["warning"]; 671 if ( $results[0]["status"]==SEARCHD_ERROR ) 672 return false; 673 else 674 return $results[0]; 675 } 676 677 /// helper to pack floats in network byte order 678 function _PackFloat ( $f ) 679 { 680 $t1 = pack ( "f", $f ); // machine order 681 list(,$t2) = unpack ( "L*", $t1 ); // int in machine order 682 return pack ( "N", $t2 ); 683 } 684 685 /// add query to multi-query batch 686 /// returns index into results array from RunQueries() call 687 function AddQuery ( $query, $index="*", $comment="" ) 688 { 689 // mbstring workaround 690 $this->_MBPush (); 691 692 // build request 693 $req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits 694 $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby; 695 $req .= pack ( "N", strlen($query) ) . $query; // query itself 696 $req .= pack ( "N", count($this->_weights) ); // weights 697 foreach ( $this->_weights as $weight ) 698 $req .= pack ( "N", (int)$weight ); 699 $req .= pack ( "N", strlen($index) ) . $index; // indexes 700 $req .= pack ( "N", 1 ); // id64 range marker 701 $req .= sphPack64 ( $this->_min_id ) . sphPack64 ( $this->_max_id ); // id64 range 702 703 // filters 704 $req .= pack ( "N", count($this->_filters) ); 705 foreach ( $this->_filters as $filter ) 706 { 707 $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"]; 708 $req .= pack ( "N", $filter["type"] ); 709 switch ( $filter["type"] ) 710 { 711 case SPH_FILTER_VALUES: 712 $req .= pack ( "N", count($filter["values"]) ); 713 foreach ( $filter["values"] as $value ) 714 $req .= sphPack64 ( $value ); 715 break; 716 717 case SPH_FILTER_RANGE: 718 $req .= sphPack64 ( $filter["min"] ) . sphPack64 ( $filter["max"] ); 719 break; 720 721 case SPH_FILTER_FLOATRANGE: 722 $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] ); 723 break; 724 725 default: 726 assert ( 0 && "internal error: unhandled filter type" ); 727 } 728 $req .= pack ( "N", $filter["exclude"] ); 729 } 730 731 // group-by clause, max-matches count, group-sort clause, cutoff count 732 $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby; 733 $req .= pack ( "N", $this->_maxmatches ); 734 $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort; 735 $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay ); 736 $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct; 737 738 // anchor point 739 if ( empty($this->_anchor) ) 740 { 741 $req .= pack ( "N", 0 ); 742 } else 743 { 744 $a =& $this->_anchor; 745 $req .= pack ( "N", 1 ); 746 $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"]; 747 $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"]; 748 $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] ); 749 } 750 751 // per-index weights 752 $req .= pack ( "N", count($this->_indexweights) ); 753 foreach ( $this->_indexweights as $idx=>$weight ) 754 $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight ); 755 756 // max query time 757 $req .= pack ( "N", $this->_maxquerytime ); 758 759 // per-field weights 760 $req .= pack ( "N", count($this->_fieldweights) ); 761 foreach ( $this->_fieldweights as $field=>$weight ) 762 $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight ); 763 764 // comment 765 $req .= pack ( "N", strlen($comment) ) . $comment; 766 767 // attribute overrides 768 $req .= pack ( "N", count($this->_overrides) ); 769 foreach ( $this->_overrides as $key => $entry ) 770 { 771 $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"]; 772 $req .= pack ( "NN", $entry["type"], count($entry["values"]) ); 773 foreach ( $entry["values"] as $id=>$val ) 774 { 775 assert ( is_numeric($id) ); 776 assert ( is_numeric($val) ); 777 778 $req .= sphPack64 ( $id ); 779 switch ( $entry["type"] ) 780 { 781 case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break; 782 case SPH_ATTR_BIGINT: $req .= sphPack64 ( $val ); break; 783 default: $req .= pack ( "N", $val ); break; 784 } 785 } 786 } 787 788 // select-list 789 $req .= pack ( "N", strlen($this->_select) ) . $this->_select; 790 791 // mbstring workaround 792 $this->_MBPop (); 793 794 // store request to requests array 795 $this->_reqs[] = $req; 796 return count($this->_reqs)-1; 797 } 798 799 /// connect to searchd, run queries batch, and return an array of result sets 800 function RunQueries () 801 { 802 if ( empty($this->_reqs) ) 803 { 804 $this->_error = "no queries defined, issue AddQuery() first"; 805 return false; 806 } 807 808 // mbstring workaround 809 $this->_MBPush (); 810 811 if (!( $fp = $this->_Connect() )) 812 { 813 $this->_MBPop (); 814 return false; 815 } 816 817 //////////////////////////// 818 // send query, get response 819 //////////////////////////// 820 821 $nreqs = count($this->_reqs); 822 $req = join ( "", $this->_reqs ); 823 $len = 4+strlen($req); 824 $req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header 825 826 fwrite ( $fp, $req, $len+8 ); 827 if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) )) 828 { 829 $this->_MBPop (); 830 return false; 831 } 832 833 $this->_reqs = array (); 834 835 ////////////////// 836 // parse response 837 ////////////////// 838 839 $p = 0; // current position 840 $max = strlen($response); // max position for checks, to protect against broken responses 841 842 $results = array (); 843 for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ ) 844 { 845 $results[] = array(); 846 $result =& $results[$ires]; 847 848 $result["error"] = ""; 849 $result["warning"] = ""; 850 851 // extract status 852 list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 853 $result["status"] = $status; 854 if ( $status!=SEARCHD_OK ) 855 { 856 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 857 $message = substr ( $response, $p, $len ); $p += $len; 858 859 if ( $status==SEARCHD_WARNING ) 860 { 861 $result["warning"] = $message; 862 } else 863 { 864 $result["error"] = $message; 865 continue; 866 } 867 } 868 869 // read schema 870 $fields = array (); 871 $attrs = array (); 872 873 list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 874 while ( $nfields-->0 && $p<$max ) 875 { 876 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 877 $fields[] = substr ( $response, $p, $len ); $p += $len; 878 } 879 $result["fields"] = $fields; 880 881 list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 882 while ( $nattrs-->0 && $p<$max ) 883 { 884 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 885 $attr = substr ( $response, $p, $len ); $p += $len; 886 list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 887 $attrs[$attr] = $type; 888 } 889 $result["attrs"] = $attrs; 890 891 // read match count 892 list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 893 list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 894 895 // read matches 896 $idx = -1; 897 while ( $count-->0 && $p<$max ) 898 { 899 // index into result array 900 $idx++; 901 902 // parse document id and weight 903 if ( $id64 ) 904 { 905 $doc = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8; 906 list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 907 } else 908 { 909 list ( $doc, $weight ) = array_values ( unpack ( "N*N*", 910 substr ( $response, $p, 8 ) ) ); 911 $p += 8; 912 913 if ( PHP_INT_SIZE>=8 ) 914 { 915 // x64 route, workaround broken unpack() in 5.2.2+ 916 if ( $doc<0 ) $doc += (1<<32); 917 } else 918 { 919 // x32 route, workaround php signed/unsigned braindamage 920 $doc = sprintf ( "%u", $doc ); 921 } 922 } 923 $weight = sprintf ( "%u", $weight ); 924 925 // create match entry 926 if ( $this->_arrayresult ) 927 $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight ); 928 else 929 $result["matches"][$doc]["weight"] = $weight; 930 931 // parse and create attributes 932 $attrvals = array (); 933 foreach ( $attrs as $attr=>$type ) 934 { 935 // handle 64bit ints 936 if ( $type==SPH_ATTR_BIGINT ) 937 { 938 $attrvals[$attr] = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8; 939 continue; 940 } 941 942 // handle floats 943 if ( $type==SPH_ATTR_FLOAT ) 944 { 945 list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 946 list(,$fval) = unpack ( "f*", pack ( "L", $uval ) ); 947 $attrvals[$attr] = $fval; 948 continue; 949 } 950 951 // handle everything else as unsigned ints 952 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 953 if ( $type & SPH_ATTR_MULTI ) 954 { 955 $attrvals[$attr] = array (); 956 $nvalues = $val; 957 while ( $nvalues-->0 && $p<$max ) 958 { 959 list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 960 $attrvals[$attr][] = sprintf ( "%u", $val ); 961 } 962 } else 963 { 964 $attrvals[$attr] = sprintf ( "%u", $val ); 965 } 966 } 967 968 if ( $this->_arrayresult ) 969 $result["matches"][$idx]["attrs"] = $attrvals; 970 else 971 $result["matches"][$doc]["attrs"] = $attrvals; 972 } 973 974 list ( $total, $total_found, $msecs, $words ) = 975 array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) ); 976 $result["total"] = sprintf ( "%u", $total ); 977 $result["total_found"] = sprintf ( "%u", $total_found ); 978 $result["time"] = sprintf ( "%.3f", $msecs/1000 ); 979 $p += 16; 980 981 while ( $words-->0 && $p<$max ) 982 { 983 list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4; 984 $word = substr ( $response, $p, $len ); $p += $len; 985 list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8; 986 $result["words"][$word] = array ( 987 "docs"=>sprintf ( "%u", $docs ), 988 "hits"=>sprintf ( "%u", $hits ) ); 989 } 990 } 991 992 $this->_MBPop (); 993 return $results; 994 } 995 996 ///////////////////////////////////////////////////////////////////////////// 997 // excerpts generation 998 ///////////////////////////////////////////////////////////////////////////// 999 1000 /// connect to searchd server, and generate exceprts (snippets) 1001 /// of given documents for given query. returns false on failure, 1002 /// an array of snippets on success 1003 function BuildExcerpts ( $docs, $index, $words, $opts=array() ) 1004 { 1005 assert ( is_array($docs) ); 1006 assert ( is_string($index) ); 1007 assert ( is_string($words) ); 1008 assert ( is_array($opts) ); 1009 1010 $this->_MBPush (); 1011 1012 if (!( $fp = $this->_Connect() )) 1013 { 1014 $this->_MBPop(); 1015 return false; 1016 } 1017 1018 ///////////////// 1019 // fixup options 1020 ///////////////// 1021 1022 if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>"; 1023 if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>"; 1024 if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... "; 1025 if ( !isset($opts["limit"]) ) $opts["limit"] = 256; 1026 if ( !isset($opts["around"]) ) $opts["around"] = 5; 1027 if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false; 1028 if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false; 1029 if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false; 1030 if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false; 1031 1032 ///////////////// 1033 // build request 1034 ///////////////// 1035 1036 // v.1.0 req 1037 $flags = 1; // remove spaces 1038 if ( $opts["exact_phrase"] ) $flags |= 2; 1039 if ( $opts["single_passage"] ) $flags |= 4; 1040 if ( $opts["use_boundaries"] ) $flags |= 8; 1041 if ( $opts["weight_order"] ) $flags |= 16; 1042 $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags 1043 $req .= pack ( "N", strlen($index) ) . $index; // req index 1044 $req .= pack ( "N", strlen($words) ) . $words; // req words 1045 1046 // options 1047 $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"]; 1048 $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"]; 1049 $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"]; 1050 $req .= pack ( "N", (int)$opts["limit"] ); 1051 $req .= pack ( "N", (int)$opts["around"] ); 1052 1053 // documents 1054 $req .= pack ( "N", count($docs) ); 1055 foreach ( $docs as $doc ) 1056 { 1057 assert ( is_string($doc) ); 1058 $req .= pack ( "N", strlen($doc) ) . $doc; 1059 } 1060 1061 //////////////////////////// 1062 // send query, get response 1063 //////////////////////////// 1064 1065 $len = strlen($req); 1066 $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header 1067 $wrote = fwrite ( $fp, $req, $len+8 ); 1068 if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) )) 1069 { 1070 $this->_MBPop (); 1071 return false; 1072 } 1073 1074 ////////////////// 1075 // parse response 1076 ////////////////// 1077 1078 $pos = 0; 1079 $res = array (); 1080 $rlen = strlen($response); 1081 for ( $i=0; $i<count($docs); $i++ ) 1082 { 1083 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); 1084 $pos += 4; 1085 1086 if ( $pos+$len > $rlen ) 1087 { 1088 $this->_error = "incomplete reply"; 1089 $this->_MBPop (); 1090 return false; 1091 } 1092 $res[] = $len ? substr ( $response, $pos, $len ) : ""; 1093 $pos += $len; 1094 } 1095 1096 $this->_MBPop (); 1097 return $res; 1098 } 1099 1100 1101 ///////////////////////////////////////////////////////////////////////////// 1102 // keyword generation 1103 ///////////////////////////////////////////////////////////////////////////// 1104 1105 /// connect to searchd server, and generate keyword list for a given query 1106 /// returns false on failure, 1107 /// an array of words on success 1108 function BuildKeywords ( $query, $index, $hits ) 1109 { 1110 assert ( is_string($query) ); 1111 assert ( is_string($index) ); 1112 assert ( is_bool($hits) ); 1113 1114 $this->_MBPush (); 1115 1116 if (!( $fp = $this->_Connect() )) 1117 { 1118 $this->_MBPop(); 1119 return false; 1120 } 1121 1122 ///////////////// 1123 // build request 1124 ///////////////// 1125 1126 // v.1.0 req 1127 $req = pack ( "N", strlen($query) ) . $query; // req query 1128 $req .= pack ( "N", strlen($index) ) . $index; // req index 1129 $req .= pack ( "N", (int)$hits ); 1130 1131 //////////////////////////// 1132 // send query, get response 1133 //////////////////////////// 1134 1135 $len = strlen($req); 1136 $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header 1137 $wrote = fwrite ( $fp, $req, $len+8 ); 1138 if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) )) 1139 { 1140 $this->_MBPop (); 1141 return false; 1142 } 1143 1144 ////////////////// 1145 // parse response 1146 ////////////////// 1147 1148 $pos = 0; 1149 $res = array (); 1150 $rlen = strlen($response); 1151 list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) ); 1152 $pos += 4; 1153 for ( $i=0; $i<$nwords; $i++ ) 1154 { 1155 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; 1156 $tokenized = $len ? substr ( $response, $pos, $len ) : ""; 1157 $pos += $len; 1158 1159 list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4; 1160 $normalized = $len ? substr ( $response, $pos, $len ) : ""; 1161 $pos += $len; 1162 1163 $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized ); 1164 1165 if ( $hits ) 1166 { 1167 list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) ); 1168 $pos += 8; 1169 $res [$i]["docs"] = $ndocs; 1170 $res [$i]["hits"] = $nhits; 1171 } 1172 1173 if ( $pos > $rlen ) 1174 { 1175 $this->_error = "incomplete reply"; 1176 $this->_MBPop (); 1177 return false; 1178 } 1179 } 1180 1181 $this->_MBPop (); 1182 return $res; 1183 } 1184 1185 function EscapeString ( $string ) 1186 { 1187 $from = array ( '(',')','|','-','!','@','~','"','&', '/' ); 1188 $to = array ( '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/' ); 1189 1190 return str_replace ( $from, $to, $string ); 1191 } 1192 1193 ///////////////////////////////////////////////////////////////////////////// 1194 // attribute updates 1195 ///////////////////////////////////////////////////////////////////////////// 1196 1197 /// batch update given attributes in given rows in given indexes 1198 /// returns amount of updated documents (0 or more) on success, or -1 on failure 1199 function UpdateAttributes ( $index, $attrs, $values, $mva=false ) 1200 { 1201 // verify everything 1202 assert ( is_string($index) ); 1203 assert ( is_bool($mva) ); 1204 1205 assert ( is_array($attrs) ); 1206 foreach ( $attrs as $attr ) 1207 assert ( is_string($attr) ); 1208 1209 assert ( is_array($values) ); 1210 foreach ( $values as $id=>$entry ) 1211 { 1212 assert ( is_numeric($id) ); 1213 assert ( is_array($entry) ); 1214 assert ( count($entry)==count($attrs) ); 1215 foreach ( $entry as $v ) 1216 { 1217 if ( $mva ) 1218 { 1219 assert ( is_array($v) ); 1220 foreach ( $v as $vv ) 1221 assert ( is_int($vv) ); 1222 } else 1223 assert ( is_int($v) ); 1224 } 1225 } 1226 1227 // build request 1228 $req = pack ( "N", strlen($index) ) . $index; 1229 1230 $req .= pack ( "N", count($attrs) ); 1231 foreach ( $attrs as $attr ) 1232 { 1233 $req .= pack ( "N", strlen($attr) ) . $attr; 1234 $req .= pack ( "N", $mva ? 1 : 0 ); 1235 } 1236 1237 $req .= pack ( "N", count($values) ); 1238 foreach ( $values as $id=>$entry ) 1239 { 1240 $req .= sphPack64 ( $id ); 1241 foreach ( $entry as $v ) 1242 { 1243 $req .= pack ( "N", $mva ? count($v) : $v ); 1244 if ( $mva ) 1245 foreach ( $v as $vv ) 1246 $req .= pack ( "N", $vv ); 1247 } 1248 } 1249 1250 // connect, send query, get response 1251 if (!( $fp = $this->_Connect() )) 1252 return -1; 1253 1254 $len = strlen($req); 1255 $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header 1256 fwrite ( $fp, $req, $len+8 ); 1257 1258 if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) )) 1259 return -1; 1260 1261 // parse response 1262 list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) ); 1263 return $updated; 1264 } 1265} 1266 1267// 1268// $Id$ 1269// 1270 1271?> 1272