1<?php
2
3//
4// $Id$
5//
6
7//
8// Copyright (c) 2001-2016, Andrew Aksyonoff
9// Copyright (c) 2008-2016, Sphinx Technologies Inc
10// All rights reserved
11//
12// This program is free software; you can redistribute it and/or modify
13// it under the terms of the GNU General Public License. You should have
14// received a copy of the GPL license along with this program; if you
15// did not, you can find it at http://www.gnu.org/
16//
17
18/////////////////////////////////////////////////////////////////////////////
19// PHP version of Sphinx searchd client (PHP API)
20/////////////////////////////////////////////////////////////////////////////
21
22/// known searchd commands
23define ( "SEARCHD_COMMAND_SEARCH",	0 );
24define ( "SEARCHD_COMMAND_EXCERPT",	1 );
25define ( "SEARCHD_COMMAND_UPDATE",	2 );
26define ( "SEARCHD_COMMAND_KEYWORDS",3 );
27
28/// current client-side command implementation versions
29define ( "VER_COMMAND_SEARCH",		0x116 );
30define ( "VER_COMMAND_EXCERPT",		0x100 );
31define ( "VER_COMMAND_UPDATE",		0x102 );
32define ( "VER_COMMAND_KEYWORDS",	0x100 );
33
34/// known searchd status codes
35define ( "SEARCHD_OK",				0 );
36define ( "SEARCHD_ERROR",			1 );
37define ( "SEARCHD_RETRY",			2 );
38define ( "SEARCHD_WARNING",			3 );
39
40/// known match modes
41define ( "SPH_MATCH_ALL",			0 );
42define ( "SPH_MATCH_ANY",			1 );
43define ( "SPH_MATCH_PHRASE",		2 );
44define ( "SPH_MATCH_BOOLEAN",		3 );
45define ( "SPH_MATCH_EXTENDED",		4 );
46define ( "SPH_MATCH_FULLSCAN",		5 );
47define ( "SPH_MATCH_EXTENDED2",		6 );	// extended engine V2 (TEMPORARY, WILL BE REMOVED)
48
49/// known ranking modes (ext2 only)
50define ( "SPH_RANK_PROXIMITY_BM25",	0 );	///< default mode, phrase proximity major factor and BM25 minor one
51define ( "SPH_RANK_BM25",			1 );	///< statistical mode, BM25 ranking only (faster but worse quality)
52define ( "SPH_RANK_NONE",			2 );	///< no ranking, all matches get a weight of 1
53define ( "SPH_RANK_WORDCOUNT",		3 );	///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
54define ( "SPH_RANK_PROXIMITY",		4 );
55
56/// known sort modes
57define ( "SPH_SORT_RELEVANCE",		0 );
58define ( "SPH_SORT_ATTR_DESC",		1 );
59define ( "SPH_SORT_ATTR_ASC",		2 );
60define ( "SPH_SORT_TIME_SEGMENTS", 	3 );
61define ( "SPH_SORT_EXTENDED", 		4 );
62define ( "SPH_SORT_EXPR", 			5 );
63
64/// known filter types
65define ( "SPH_FILTER_VALUES",		0 );
66define ( "SPH_FILTER_RANGE",		1 );
67define ( "SPH_FILTER_FLOATRANGE",	2 );
68
69/// known attribute types
70define ( "SPH_ATTR_INTEGER",		1 );
71define ( "SPH_ATTR_TIMESTAMP",		2 );
72define ( "SPH_ATTR_ORDINAL",		3 );
73define ( "SPH_ATTR_BOOL",			4 );
74define ( "SPH_ATTR_FLOAT",			5 );
75define ( "SPH_ATTR_BIGINT",			6 );
76define ( "SPH_ATTR_MULTI",			0x40000000 );
77
78/// known grouping functions
79define ( "SPH_GROUPBY_DAY",			0 );
80define ( "SPH_GROUPBY_WEEK",		1 );
81define ( "SPH_GROUPBY_MONTH",		2 );
82define ( "SPH_GROUPBY_YEAR",		3 );
83define ( "SPH_GROUPBY_ATTR",		4 );
84define ( "SPH_GROUPBY_ATTRPAIR",	5 );
85
86
87/// portably pack numeric to 64 unsigned bits, network order
88function sphPack64 ( $v )
89{
90	assert ( is_numeric($v) );
91
92	// x64 route
93	if ( PHP_INT_SIZE>=8 )
94	{
95		$i = (int)$v;
96		return pack ( "NN", $i>>32, $i&((1<<32)-1) );
97	}
98
99	// x32 route, bcmath
100	$x = "4294967296";
101	if ( function_exists("bcmul") )
102	{
103		$h = bcdiv ( $v, $x, 0 );
104		$l = bcmod ( $v, $x );
105		if ( $v<0 )
106		{
107			$h = -1+(float)$h;
108			$l = $l+(float)$x;
109		}
110		return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
111	}
112
113	// x32 route, 15 or less decimal digits
114	// we can use float, because its actually double and has 52 precision bits
115	if ( strlen($v)<=15 )
116	{
117		$f = (float)$v;
118		$h = (int)($f/$x);
119		$l = $f-$x*(float)$h;
120		if ( $v<0 )
121		{
122			$h = -1+(float)$h;
123			$l = $l+(float)$x;
124		}
125		return pack ( "NN", $h, $l );
126	}
127
128	// x32 route, 16 or more decimal digits
129	// well, let me know if you *really* need this
130	die ( "INTERNAL ERROR: packing more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
131}
132
133/// portably unpack 64 signed bits, network order to numeric
134function sphUnpack64 ( $v )
135{
136	list($h,$l) = array_values ( unpack ( "N*N*", $v ) );
137
138	// x64 route
139	if ( PHP_INT_SIZE>=8 )
140	{
141		if ( $h<0 ) $h += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
142		if ( $l<0 ) $l += (1<<32);
143		return ($h<<32) + $l;
144	}
145
146	// x32 route
147	$x = "4294967296";
148	$y = 0;
149	$p = "";
150	if ( $h<0 )
151	{
152		$h = ~$h;
153		$l = ~$l;
154		$y = 1;
155		$p = "-";
156	}
157	$h = sprintf ( "%u", $h );
158	$l = sprintf ( "%u", $l );
159
160	// bcmath
161	if ( function_exists("bcmul") )
162		return $p . bcadd ( bcadd ( $l, bcmul ( $x, $h ) ), $y );
163
164	// no bcmath, 15 or less decimal digits
165	// we can use float, because its actually double and has 52 precision bits
166	if ( $h<1048576 )
167	{
168		$f = ((float)$h)*$x + (float)$l + (float)$y;
169		return $p . sprintf ( "%.0f", $f ); // builtin conversion is only about 39-40 bits precise!
170	}
171
172	// x32 route, 16 or more decimal digits
173	// well, let me know if you *really* need this
174	die ( "INTERNAL ERROR: unpacking more than 15-digit numeric on 32-bit PHP is not implemented yet (contact support)" );
175}
176
177
178/// sphinx searchd client class
179class SphinxClient
180{
181	var $_host;			///< searchd host (default is "localhost")
182	var $_port;			///< searchd port (default is 9312)
183	var $_offset;		///< how many records to seek from result-set start (default is 0)
184	var $_limit;		///< how many records to return from result-set starting at offset (default is 20)
185	var $_mode;			///< query matching mode (default is SPH_MATCH_EXTENDED2)
186	var $_weights;		///< per-field weights (default is 1 for all fields)
187	var $_sort;			///< match sorting mode (default is SPH_SORT_RELEVANCE)
188	var $_sortby;		///< attribute to sort by (defualt is "")
189	var $_min_id;		///< min ID to match (default is 0, which means no limit)
190	var $_max_id;		///< max ID to match (default is 0, which means no limit)
191	var $_filters;		///< search filters
192	var $_groupby;		///< group-by attribute name
193	var $_groupfunc;	///< group-by function (to pre-process group-by attribute value with)
194	var $_groupsort;	///< group-by sorting clause (to sort groups in result set with)
195	var $_groupdistinct;///< group-by count-distinct attribute
196	var $_maxmatches;	///< max matches to retrieve
197	var $_cutoff;		///< cutoff to stop searching at (default is 0)
198	var $_retrycount;	///< distributed retries count
199	var $_retrydelay;	///< distributed retries delay
200	var $_anchor;		///< geographical anchor point
201	var $_indexweights;	///< per-index weights
202	var $_ranker;		///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
203	var $_maxquerytime;	///< max query time, milliseconds (default is 0, do not limit)
204	var $_fieldweights;	///< per-field-name weights
205	var $_overrides;	///< per-query attribute values overrides
206	var $_select;		///< select-list (attributes or expressions, with optional aliases)
207
208	var $_error;		///< last error message
209	var $_warning;		///< last warning message
210
211	var $_reqs;			///< requests array for multi-query
212	var $_mbenc;		///< stored mbstring encoding
213	var $_arrayresult;	///< whether $result["matches"] should be a hash or an array
214
215	/////////////////////////////////////////////////////////////////////////////
216	// common stuff
217	/////////////////////////////////////////////////////////////////////////////
218
219	/// create a new client object and fill defaults
220	function SphinxClient ()
221	{
222		// per-client-object settings
223		$this->_host		= "localhost";
224		$this->_port		= 9312;
225
226		// per-query settings
227		$this->_offset		= 0;
228		$this->_limit		= 20;
229		$this->_mode		= SPH_MATCH_EXTENDED2;
230		$this->_weights		= array ();
231		$this->_sort		= SPH_SORT_RELEVANCE;
232		$this->_sortby		= "";
233		$this->_min_id		= 0;
234		$this->_max_id		= 0;
235		$this->_filters		= array ();
236		$this->_groupby		= "";
237		$this->_groupfunc	= SPH_GROUPBY_DAY;
238		$this->_groupsort	= "@group desc";
239		$this->_groupdistinct= "";
240		$this->_maxmatches	= 1000;
241		$this->_cutoff		= 0;
242		$this->_retrycount	= 0;
243		$this->_retrydelay	= 0;
244		$this->_anchor		= array ();
245		$this->_indexweights= array ();
246		$this->_ranker		= SPH_RANK_PROXIMITY_BM25;
247		$this->_maxquerytime= 0;
248		$this->_fieldweights= array();
249		$this->_overrides 	= array();
250		$this->_select		= "*";
251
252		$this->_error		= ""; // per-reply fields (for single-query case)
253		$this->_warning		= "";
254		$this->_reqs		= array ();	// requests storage (for multi-query case)
255		$this->_mbenc		= "";
256		$this->_arrayresult	= false;
257	}
258
259	/// get last error message (string)
260	function GetLastError ()
261	{
262		return $this->_error;
263	}
264
265	/// get last warning message (string)
266	function GetLastWarning ()
267	{
268		return $this->_warning;
269	}
270
271	/// set searchd host name (string) and port (integer)
272	function SetServer ( $host, $port )
273	{
274		assert ( is_string($host) );
275		assert ( is_int($port) );
276		$this->_host = $host;
277		$this->_port = $port;
278	}
279
280	/////////////////////////////////////////////////////////////////////////////
281
282	/// enter mbstring workaround mode
283	function _MBPush ()
284	{
285		$this->_mbenc = "";
286		if ( ini_get ( "mbstring.func_overload" ) & 2 )
287		{
288			$this->_mbenc = mb_internal_encoding();
289			mb_internal_encoding ( "latin1" );
290		}
291    }
292
293	/// leave mbstring workaround mode
294	function _MBPop ()
295	{
296		if ( $this->_mbenc )
297			mb_internal_encoding ( $this->_mbenc );
298	}
299
300	/// connect to searchd server
301	function _Connect ()
302	{
303		return fopen('php://stdout', 'w');
304	}
305
306	function _OldConnect()
307	{
308		if (!( $fp = @fsockopen ( $this->_host, $this->_port ) ) )
309		{
310			$this->_error = "connection to {$this->_host}:{$this->_port} failed";
311			return false;
312		}
313
314		// check version
315		list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
316		$v = (int)$v;
317		if ( $v<1 )
318		{
319			fclose ( $fp );
320			$this->_error = "expected searchd protocol version 1+, got version '$v'";
321			return false;
322		}
323
324		// all ok, send my version
325		fwrite ( $fp, pack ( "N", 1 ) );
326		return $fp;
327	}
328
329	/// get and check response packet from searchd server
330	function _GetResponse ( $fp, $client_ver )
331	{
332		return false;
333	}
334
335	function _OldGetResponse ( $fp, $client_ver )
336	{
337		$response = "";
338		$len = 0;
339
340		$header = fread ( $fp, 8 );
341		if ( strlen($header)==8 )
342		{
343			list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
344			$left = $len;
345			while ( $left>0 && !feof($fp) )
346			{
347				$chunk = fread ( $fp, $left );
348				if ( $chunk )
349				{
350					$response .= $chunk;
351					$left -= strlen($chunk);
352				}
353			}
354		}
355		fclose ( $fp );
356
357		// check response
358		$read = strlen ( $response );
359		if ( !$response || $read!=$len )
360		{
361			$this->_error = $len
362				? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
363				: "received zero-sized searchd response";
364			return false;
365		}
366
367		// check status
368		if ( $status==SEARCHD_WARNING )
369		{
370			list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
371			$this->_warning = substr ( $response, 4, $wlen );
372			return substr ( $response, 4+$wlen );
373		}
374		if ( $status==SEARCHD_ERROR )
375		{
376			$this->_error = "searchd error: " . substr ( $response, 4 );
377			return false;
378		}
379		if ( $status==SEARCHD_RETRY )
380		{
381			$this->_error = "temporary searchd error: " . substr ( $response, 4 );
382			return false;
383		}
384		if ( $status!=SEARCHD_OK )
385		{
386			$this->_error = "unknown status code '$status'";
387			return false;
388		}
389
390		// check version
391		if ( $ver<$client_ver )
392		{
393			$this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
394				$ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
395		}
396
397		return $response;
398	}
399
400	/////////////////////////////////////////////////////////////////////////////
401	// searching
402	/////////////////////////////////////////////////////////////////////////////
403
404	/// set offset and count into result set,
405	/// and optionally set max-matches and cutoff limits
406	function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
407	{
408		assert ( is_int($offset) );
409		assert ( is_int($limit) );
410		assert ( $offset>=0 );
411		assert ( $limit>0 );
412		assert ( $max>=0 );
413		$this->_offset = $offset;
414		$this->_limit = $limit;
415		if ( $max>0 )
416			$this->_maxmatches = $max;
417		if ( $cutoff>0 )
418			$this->_cutoff = $cutoff;
419	}
420
421	/// set maximum query time, in milliseconds, per-index
422	/// integer, 0 means "do not limit"
423	function SetMaxQueryTime ( $max )
424	{
425		assert ( is_int($max) );
426		assert ( $max>=0 );
427		$this->_maxquerytime = $max;
428	}
429
430	/// set matching mode
431	function SetMatchMode ( $mode )
432	{
433		assert ( $mode==SPH_MATCH_ALL
434			|| $mode==SPH_MATCH_ANY
435			|| $mode==SPH_MATCH_PHRASE
436			|| $mode==SPH_MATCH_BOOLEAN
437			|| $mode==SPH_MATCH_EXTENDED
438			|| $mode==SPH_MATCH_FULLSCAN
439			|| $mode==SPH_MATCH_EXTENDED2 );
440		$this->_mode = $mode;
441	}
442
443	/// set ranking mode
444	function SetRankingMode ( $ranker )
445	{
446		assert ( $ranker==SPH_RANK_PROXIMITY_BM25
447			|| $ranker==SPH_RANK_BM25
448			|| $ranker==SPH_RANK_NONE
449			|| $ranker==SPH_RANK_WORDCOUNT
450			|| $ranker==SPH_RANK_PROXIMITY );
451		$this->_ranker = $ranker;
452	}
453
454	/// set matches sorting mode
455	function SetSortMode ( $mode, $sortby="" )
456	{
457		assert (
458			$mode==SPH_SORT_RELEVANCE ||
459			$mode==SPH_SORT_ATTR_DESC ||
460			$mode==SPH_SORT_ATTR_ASC ||
461			$mode==SPH_SORT_TIME_SEGMENTS ||
462			$mode==SPH_SORT_EXTENDED ||
463			$mode==SPH_SORT_EXPR );
464		assert ( is_string($sortby) );
465		assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
466
467		$this->_sort = $mode;
468		$this->_sortby = $sortby;
469	}
470
471	/// bind per-field weights by order
472	/// DEPRECATED; use SetFieldWeights() instead
473	function SetWeights ( $weights )
474	{
475		assert ( is_array($weights) );
476		foreach ( $weights as $weight )
477			assert ( is_int($weight) );
478
479		$this->_weights = $weights;
480	}
481
482	/// bind per-field weights by name
483	function SetFieldWeights ( $weights )
484	{
485		assert ( is_array($weights) );
486		foreach ( $weights as $name=>$weight )
487		{
488			assert ( is_string($name) );
489			assert ( is_int($weight) );
490		}
491		$this->_fieldweights = $weights;
492	}
493
494	/// bind per-index weights by name
495	function SetIndexWeights ( $weights )
496	{
497		assert ( is_array($weights) );
498		foreach ( $weights as $index=>$weight )
499		{
500			assert ( is_string($index) );
501			assert ( is_int($weight) );
502		}
503		$this->_indexweights = $weights;
504	}
505
506	/// set IDs range to match
507	/// only match records if document ID is beetwen $min and $max (inclusive)
508	function SetIDRange ( $min, $max )
509	{
510		assert ( is_numeric($min) );
511		assert ( is_numeric($max) );
512		assert ( $min<=$max );
513		$this->_min_id = $min;
514		$this->_max_id = $max;
515	}
516
517	/// set values set filter
518	/// only match records where $attribute value is in given set
519	function SetFilter ( $attribute, $values, $exclude=false )
520	{
521		assert ( is_string($attribute) );
522		assert ( is_array($values) );
523		assert ( count($values) );
524
525		if ( is_array($values) && count($values) )
526		{
527			foreach ( $values as $value )
528				assert ( is_numeric($value) );
529
530			$this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
531		}
532	}
533
534	/// set range filter
535	/// only match records if $attribute value is beetwen $min and $max (inclusive)
536	function SetFilterRange ( $attribute, $min, $max, $exclude=false )
537	{
538		assert ( is_string($attribute) );
539		assert ( is_numeric($min) );
540		assert ( is_numeric($max) );
541		assert ( $min<=$max );
542
543		$this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
544	}
545
546	/// set float range filter
547	/// only match records if $attribute value is beetwen $min and $max (inclusive)
548	function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
549	{
550		assert ( is_string($attribute) );
551		assert ( is_float($min) );
552		assert ( is_float($max) );
553		assert ( $min<=$max );
554
555		$this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
556	}
557
558	/// setup anchor point for geosphere distance calculations
559	/// required to use @geodist in filters and sorting
560	/// latitude and longitude must be in radians
561	function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
562	{
563		assert ( is_string($attrlat) );
564		assert ( is_string($attrlong) );
565		assert ( is_float($lat) );
566		assert ( is_float($long) );
567
568		$this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
569	}
570
571	/// set grouping attribute and function
572	function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
573	{
574		assert ( is_string($attribute) );
575		assert ( is_string($groupsort) );
576		assert ( $func==SPH_GROUPBY_DAY
577			|| $func==SPH_GROUPBY_WEEK
578			|| $func==SPH_GROUPBY_MONTH
579			|| $func==SPH_GROUPBY_YEAR
580			|| $func==SPH_GROUPBY_ATTR
581			|| $func==SPH_GROUPBY_ATTRPAIR );
582
583		$this->_groupby = $attribute;
584		$this->_groupfunc = $func;
585		$this->_groupsort = $groupsort;
586	}
587
588	/// set count-distinct attribute for group-by queries
589	function SetGroupDistinct ( $attribute )
590	{
591		assert ( is_string($attribute) );
592		$this->_groupdistinct = $attribute;
593	}
594
595	/// set distributed retries count and delay
596	function SetRetries ( $count, $delay=0 )
597	{
598		assert ( is_int($count) && $count>=0 );
599		assert ( is_int($delay) && $delay>=0 );
600		$this->_retrycount = $count;
601		$this->_retrydelay = $delay;
602	}
603
604	/// set result set format (hash or array; hash by default)
605	/// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
606	function SetArrayResult ( $arrayresult )
607	{
608		assert ( is_bool($arrayresult) );
609		$this->_arrayresult = $arrayresult;
610	}
611
612	/// set attribute values override
613	/// there can be only one override per attribute
614	/// $values must be a hash that maps document IDs to attribute values
615	function SetOverride ( $attrname, $attrtype, $values )
616	{
617		assert ( is_string ( $attrname ) );
618		assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) );
619		assert ( is_array ( $values ) );
620
621		$this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
622	}
623
624	/// set select-list (attributes or expressions), SQL-like syntax
625	function SetSelect ( $select )
626	{
627		assert ( is_string ( $select ) );
628		$this->_select = $select;
629	}
630
631	//////////////////////////////////////////////////////////////////////////////
632
633	/// clear all filters (for multi-queries)
634	function ResetFilters ()
635	{
636		$this->_filters = array();
637		$this->_anchor = array();
638	}
639
640	/// clear groupby settings (for multi-queries)
641	function ResetGroupBy ()
642	{
643		$this->_groupby		= "";
644		$this->_groupfunc	= SPH_GROUPBY_DAY;
645		$this->_groupsort	= "@group desc";
646		$this->_groupdistinct= "";
647	}
648
649	/// clear all attribute value overrides (for multi-queries)
650	function ResetOverrides ()
651    {
652    	$this->_overrides = array ();
653    }
654
655	//////////////////////////////////////////////////////////////////////////////
656
657	/// connect to searchd server, run given search query through given indexes,
658	/// and return the search results
659	function Query ( $query, $index="*", $comment="" )
660	{
661		assert ( empty($this->_reqs) );
662
663		$this->AddQuery ( $query, $index, $comment );
664		$results = $this->RunQueries ();
665
666		if ( !is_array($results) )
667			return false; // probably network error; error message should be already filled
668
669		$this->_error = $results[0]["error"];
670		$this->_warning = $results[0]["warning"];
671		if ( $results[0]["status"]==SEARCHD_ERROR )
672			return false;
673		else
674			return $results[0];
675	}
676
677	/// helper to pack floats in network byte order
678	function _PackFloat ( $f )
679	{
680		$t1 = pack ( "f", $f ); // machine order
681		list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
682		return pack ( "N", $t2 );
683	}
684
685	/// add query to multi-query batch
686	/// returns index into results array from RunQueries() call
687	function AddQuery ( $query, $index="*", $comment="" )
688	{
689		// mbstring workaround
690		$this->_MBPush ();
691
692		// build request
693		$req = pack ( "NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort ); // mode and limits
694		$req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
695		$req .= pack ( "N", strlen($query) ) . $query; // query itself
696		$req .= pack ( "N", count($this->_weights) ); // weights
697		foreach ( $this->_weights as $weight )
698			$req .= pack ( "N", (int)$weight );
699		$req .= pack ( "N", strlen($index) ) . $index; // indexes
700		$req .= pack ( "N", 1 ); // id64 range marker
701		$req .= sphPack64 ( $this->_min_id ) . sphPack64 ( $this->_max_id ); // id64 range
702
703		// filters
704		$req .= pack ( "N", count($this->_filters) );
705		foreach ( $this->_filters as $filter )
706		{
707			$req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
708			$req .= pack ( "N", $filter["type"] );
709			switch ( $filter["type"] )
710			{
711				case SPH_FILTER_VALUES:
712					$req .= pack ( "N", count($filter["values"]) );
713					foreach ( $filter["values"] as $value )
714						$req .= sphPack64 ( $value );
715					break;
716
717				case SPH_FILTER_RANGE:
718					$req .= sphPack64 ( $filter["min"] ) . sphPack64 ( $filter["max"] );
719					break;
720
721				case SPH_FILTER_FLOATRANGE:
722					$req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
723					break;
724
725				default:
726					assert ( 0 && "internal error: unhandled filter type" );
727			}
728			$req .= pack ( "N", $filter["exclude"] );
729		}
730
731		// group-by clause, max-matches count, group-sort clause, cutoff count
732		$req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
733		$req .= pack ( "N", $this->_maxmatches );
734		$req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
735		$req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
736		$req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
737
738		// anchor point
739		if ( empty($this->_anchor) )
740		{
741			$req .= pack ( "N", 0 );
742		} else
743		{
744			$a =& $this->_anchor;
745			$req .= pack ( "N", 1 );
746			$req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
747			$req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
748			$req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
749		}
750
751		// per-index weights
752		$req .= pack ( "N", count($this->_indexweights) );
753		foreach ( $this->_indexweights as $idx=>$weight )
754			$req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
755
756		// max query time
757		$req .= pack ( "N", $this->_maxquerytime );
758
759		// per-field weights
760		$req .= pack ( "N", count($this->_fieldweights) );
761		foreach ( $this->_fieldweights as $field=>$weight )
762			$req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
763
764		// comment
765		$req .= pack ( "N", strlen($comment) ) . $comment;
766
767		// attribute overrides
768		$req .= pack ( "N", count($this->_overrides) );
769		foreach ( $this->_overrides as $key => $entry )
770		{
771			$req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
772			$req .= pack ( "NN", $entry["type"], count($entry["values"]) );
773			foreach ( $entry["values"] as $id=>$val )
774			{
775				assert ( is_numeric($id) );
776				assert ( is_numeric($val) );
777
778				$req .= sphPack64 ( $id );
779				switch ( $entry["type"] )
780				{
781					case SPH_ATTR_FLOAT:	$req .= $this->_PackFloat ( $val ); break;
782					case SPH_ATTR_BIGINT:	$req .= sphPack64 ( $val ); break;
783					default:				$req .= pack ( "N", $val ); break;
784				}
785			}
786		}
787
788		// select-list
789		$req .= pack ( "N", strlen($this->_select) ) . $this->_select;
790
791		// mbstring workaround
792		$this->_MBPop ();
793
794		// store request to requests array
795		$this->_reqs[] = $req;
796		return count($this->_reqs)-1;
797	}
798
799	/// connect to searchd, run queries batch, and return an array of result sets
800	function RunQueries ()
801	{
802		if ( empty($this->_reqs) )
803		{
804			$this->_error = "no queries defined, issue AddQuery() first";
805			return false;
806		}
807
808		// mbstring workaround
809		$this->_MBPush ();
810
811		if (!( $fp = $this->_Connect() ))
812		{
813			$this->_MBPop ();
814			return false;
815		}
816
817		////////////////////////////
818		// send query, get response
819		////////////////////////////
820
821		$nreqs = count($this->_reqs);
822		$req = join ( "", $this->_reqs );
823		$len = 4+strlen($req);
824		$req = pack ( "nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ) . $req; // add header
825
826		fwrite ( $fp, $req, $len+8 );
827		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ))
828		{
829			$this->_MBPop ();
830			return false;
831		}
832
833		$this->_reqs = array ();
834
835		//////////////////
836		// parse response
837		//////////////////
838
839		$p = 0; // current position
840		$max = strlen($response); // max position for checks, to protect against broken responses
841
842		$results = array ();
843		for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
844		{
845			$results[] = array();
846			$result =& $results[$ires];
847
848			$result["error"] = "";
849			$result["warning"] = "";
850
851			// extract status
852			list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
853			$result["status"] = $status;
854			if ( $status!=SEARCHD_OK )
855			{
856				list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
857				$message = substr ( $response, $p, $len ); $p += $len;
858
859				if ( $status==SEARCHD_WARNING )
860				{
861					$result["warning"] = $message;
862				} else
863				{
864					$result["error"] = $message;
865					continue;
866				}
867			}
868
869			// read schema
870			$fields = array ();
871			$attrs = array ();
872
873			list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
874			while ( $nfields-->0 && $p<$max )
875			{
876				list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
877				$fields[] = substr ( $response, $p, $len ); $p += $len;
878			}
879			$result["fields"] = $fields;
880
881			list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
882			while ( $nattrs-->0 && $p<$max  )
883			{
884				list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
885				$attr = substr ( $response, $p, $len ); $p += $len;
886				list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
887				$attrs[$attr] = $type;
888			}
889			$result["attrs"] = $attrs;
890
891			// read match count
892			list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
893			list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
894
895			// read matches
896			$idx = -1;
897			while ( $count-->0 && $p<$max )
898			{
899				// index into result array
900				$idx++;
901
902				// parse document id and weight
903				if ( $id64 )
904				{
905					$doc = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8;
906					list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
907				} else
908				{
909					list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
910						substr ( $response, $p, 8 ) ) );
911					$p += 8;
912
913					if ( PHP_INT_SIZE>=8 )
914					{
915						// x64 route, workaround broken unpack() in 5.2.2+
916						if ( $doc<0 ) $doc += (1<<32);
917					} else
918					{
919						// x32 route, workaround php signed/unsigned braindamage
920						$doc = sprintf ( "%u", $doc );
921					}
922				}
923				$weight = sprintf ( "%u", $weight );
924
925				// create match entry
926				if ( $this->_arrayresult )
927					$result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
928				else
929					$result["matches"][$doc]["weight"] = $weight;
930
931				// parse and create attributes
932				$attrvals = array ();
933				foreach ( $attrs as $attr=>$type )
934				{
935					// handle 64bit ints
936					if ( $type==SPH_ATTR_BIGINT )
937					{
938						$attrvals[$attr] = sphUnpack64 ( substr ( $response, $p, 8 ) ); $p += 8;
939						continue;
940					}
941
942					// handle floats
943					if ( $type==SPH_ATTR_FLOAT )
944					{
945						list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
946						list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
947						$attrvals[$attr] = $fval;
948						continue;
949					}
950
951					// handle everything else as unsigned ints
952					list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
953					if ( $type & SPH_ATTR_MULTI )
954					{
955						$attrvals[$attr] = array ();
956						$nvalues = $val;
957						while ( $nvalues-->0 && $p<$max )
958						{
959							list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
960							$attrvals[$attr][] = sprintf ( "%u", $val );
961						}
962					} else
963					{
964						$attrvals[$attr] = sprintf ( "%u", $val );
965					}
966				}
967
968				if ( $this->_arrayresult )
969					$result["matches"][$idx]["attrs"] = $attrvals;
970				else
971					$result["matches"][$doc]["attrs"] = $attrvals;
972			}
973
974			list ( $total, $total_found, $msecs, $words ) =
975				array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
976			$result["total"] = sprintf ( "%u", $total );
977			$result["total_found"] = sprintf ( "%u", $total_found );
978			$result["time"] = sprintf ( "%.3f", $msecs/1000 );
979			$p += 16;
980
981			while ( $words-->0 && $p<$max )
982			{
983				list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
984				$word = substr ( $response, $p, $len ); $p += $len;
985				list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
986				$result["words"][$word] = array (
987					"docs"=>sprintf ( "%u", $docs ),
988					"hits"=>sprintf ( "%u", $hits ) );
989			}
990		}
991
992		$this->_MBPop ();
993		return $results;
994	}
995
996	/////////////////////////////////////////////////////////////////////////////
997	// excerpts generation
998	/////////////////////////////////////////////////////////////////////////////
999
1000	/// connect to searchd server, and generate exceprts (snippets)
1001	/// of given documents for given query. returns false on failure,
1002	/// an array of snippets on success
1003	function BuildExcerpts ( $docs, $index, $words, $opts=array() )
1004	{
1005		assert ( is_array($docs) );
1006		assert ( is_string($index) );
1007		assert ( is_string($words) );
1008		assert ( is_array($opts) );
1009
1010		$this->_MBPush ();
1011
1012		if (!( $fp = $this->_Connect() ))
1013		{
1014			$this->_MBPop();
1015			return false;
1016		}
1017
1018		/////////////////
1019		// fixup options
1020		/////////////////
1021
1022		if ( !isset($opts["before_match"]) )		$opts["before_match"] = "<b>";
1023		if ( !isset($opts["after_match"]) )			$opts["after_match"] = "</b>";
1024		if ( !isset($opts["chunk_separator"]) )		$opts["chunk_separator"] = " ... ";
1025		if ( !isset($opts["limit"]) )				$opts["limit"] = 256;
1026		if ( !isset($opts["around"]) )				$opts["around"] = 5;
1027		if ( !isset($opts["exact_phrase"]) )		$opts["exact_phrase"] = false;
1028		if ( !isset($opts["single_passage"]) )		$opts["single_passage"] = false;
1029		if ( !isset($opts["use_boundaries"]) )		$opts["use_boundaries"] = false;
1030		if ( !isset($opts["weight_order"]) )		$opts["weight_order"] = false;
1031
1032		/////////////////
1033		// build request
1034		/////////////////
1035
1036		// v.1.0 req
1037		$flags = 1; // remove spaces
1038		if ( $opts["exact_phrase"] )	$flags |= 2;
1039		if ( $opts["single_passage"] )	$flags |= 4;
1040		if ( $opts["use_boundaries"] )	$flags |= 8;
1041		if ( $opts["weight_order"] )	$flags |= 16;
1042		$req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
1043		$req .= pack ( "N", strlen($index) ) . $index; // req index
1044		$req .= pack ( "N", strlen($words) ) . $words; // req words
1045
1046		// options
1047		$req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
1048		$req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
1049		$req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
1050		$req .= pack ( "N", (int)$opts["limit"] );
1051		$req .= pack ( "N", (int)$opts["around"] );
1052
1053		// documents
1054		$req .= pack ( "N", count($docs) );
1055		foreach ( $docs as $doc )
1056		{
1057			assert ( is_string($doc) );
1058			$req .= pack ( "N", strlen($doc) ) . $doc;
1059		}
1060
1061		////////////////////////////
1062		// send query, get response
1063		////////////////////////////
1064
1065		$len = strlen($req);
1066		$req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
1067		$wrote = fwrite ( $fp, $req, $len+8 );
1068		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ))
1069		{
1070			$this->_MBPop ();
1071			return false;
1072		}
1073
1074		//////////////////
1075		// parse response
1076		//////////////////
1077
1078		$pos = 0;
1079		$res = array ();
1080		$rlen = strlen($response);
1081		for ( $i=0; $i<count($docs); $i++ )
1082		{
1083			list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1084			$pos += 4;
1085
1086			if ( $pos+$len > $rlen )
1087			{
1088				$this->_error = "incomplete reply";
1089				$this->_MBPop ();
1090				return false;
1091			}
1092			$res[] = $len ? substr ( $response, $pos, $len ) : "";
1093			$pos += $len;
1094		}
1095
1096		$this->_MBPop ();
1097		return $res;
1098	}
1099
1100
1101	/////////////////////////////////////////////////////////////////////////////
1102	// keyword generation
1103	/////////////////////////////////////////////////////////////////////////////
1104
1105	/// connect to searchd server, and generate keyword list for a given query
1106	/// returns false on failure,
1107	/// an array of words on success
1108	function BuildKeywords ( $query, $index, $hits )
1109	{
1110		assert ( is_string($query) );
1111		assert ( is_string($index) );
1112		assert ( is_bool($hits) );
1113
1114		$this->_MBPush ();
1115
1116		if (!( $fp = $this->_Connect() ))
1117		{
1118			$this->_MBPop();
1119			return false;
1120		}
1121
1122		/////////////////
1123		// build request
1124		/////////////////
1125
1126		// v.1.0 req
1127		$req  = pack ( "N", strlen($query) ) . $query; // req query
1128		$req .= pack ( "N", strlen($index) ) . $index; // req index
1129		$req .= pack ( "N", (int)$hits );
1130
1131		////////////////////////////
1132		// send query, get response
1133		////////////////////////////
1134
1135		$len = strlen($req);
1136		$req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
1137		$wrote = fwrite ( $fp, $req, $len+8 );
1138		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ))
1139		{
1140			$this->_MBPop ();
1141			return false;
1142		}
1143
1144		//////////////////
1145		// parse response
1146		//////////////////
1147
1148		$pos = 0;
1149		$res = array ();
1150		$rlen = strlen($response);
1151		list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
1152		$pos += 4;
1153		for ( $i=0; $i<$nwords; $i++ )
1154		{
1155			list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );	$pos += 4;
1156			$tokenized = $len ? substr ( $response, $pos, $len ) : "";
1157			$pos += $len;
1158
1159			list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );	$pos += 4;
1160			$normalized = $len ? substr ( $response, $pos, $len ) : "";
1161			$pos += $len;
1162
1163			$res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
1164
1165			if ( $hits )
1166			{
1167				list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
1168				$pos += 8;
1169				$res [$i]["docs"] = $ndocs;
1170				$res [$i]["hits"] = $nhits;
1171			}
1172
1173			if ( $pos > $rlen )
1174			{
1175				$this->_error = "incomplete reply";
1176				$this->_MBPop ();
1177				return false;
1178			}
1179		}
1180
1181		$this->_MBPop ();
1182		return $res;
1183	}
1184
1185	function EscapeString ( $string )
1186	{
1187		$from = array ( '(',')','|','-','!','@','~','"','&', '/' );
1188		$to   = array ( '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/' );
1189
1190		return str_replace ( $from, $to, $string );
1191	}
1192
1193	/////////////////////////////////////////////////////////////////////////////
1194	// attribute updates
1195	/////////////////////////////////////////////////////////////////////////////
1196
1197	/// batch update given attributes in given rows in given indexes
1198	/// returns amount of updated documents (0 or more) on success, or -1 on failure
1199	function UpdateAttributes ( $index, $attrs, $values, $mva=false )
1200	{
1201		// verify everything
1202		assert ( is_string($index) );
1203		assert ( is_bool($mva) );
1204
1205		assert ( is_array($attrs) );
1206		foreach ( $attrs as $attr )
1207			assert ( is_string($attr) );
1208
1209		assert ( is_array($values) );
1210		foreach ( $values as $id=>$entry )
1211		{
1212			assert ( is_numeric($id) );
1213			assert ( is_array($entry) );
1214			assert ( count($entry)==count($attrs) );
1215			foreach ( $entry as $v )
1216			{
1217				if ( $mva )
1218				{
1219					assert ( is_array($v) );
1220					foreach ( $v as $vv )
1221						assert ( is_int($vv) );
1222				} else
1223					assert ( is_int($v) );
1224			}
1225		}
1226
1227		// build request
1228		$req = pack ( "N", strlen($index) ) . $index;
1229
1230		$req .= pack ( "N", count($attrs) );
1231		foreach ( $attrs as $attr )
1232		{
1233			$req .= pack ( "N", strlen($attr) ) . $attr;
1234			$req .= pack ( "N", $mva ? 1 : 0 );
1235		}
1236
1237		$req .= pack ( "N", count($values) );
1238		foreach ( $values as $id=>$entry )
1239		{
1240			$req .= sphPack64 ( $id );
1241			foreach ( $entry as $v )
1242			{
1243				$req .= pack ( "N", $mva ? count($v) : $v );
1244				if ( $mva )
1245					foreach ( $v as $vv )
1246						$req .= pack ( "N", $vv );
1247			}
1248		}
1249
1250		// connect, send query, get response
1251		if (!( $fp = $this->_Connect() ))
1252			return -1;
1253
1254		$len = strlen($req);
1255		$req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
1256		fwrite ( $fp, $req, $len+8 );
1257
1258		if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
1259			return -1;
1260
1261		// parse response
1262		list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
1263		return $updated;
1264	}
1265}
1266
1267//
1268// $Id$
1269//
1270
1271?>
1272