1# = client.rb - Sphinx Client API
2#
3# Author::    Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
5# License::   Distributes under the same terms as Ruby
6# Version::   0.9.9-r1299
7# Website::   http://kpumuk.info/projects/ror-plugins/sphinx
8#
9# This library is distributed under the terms of the Ruby license.
10# You can freely distribute/modify this library.
11
12# ==Sphinx Client API
13#
14# The Sphinx Client API is used to communicate with <tt>searchd</tt>
15# daemon and get search results from Sphinx.
16#
17# ===Usage
18#
19#   sphinx = Sphinx::Client.new
20#   result = sphinx.Query('test')
21#   ids = result['matches'].map { |match| match['id'] }.join(',')
22#   posts = Post.find :all, :conditions => "id IN (#{ids})"
23#
24#   docs = posts.map(&:body)
25#   excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
27# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
28#							WARNING
29# We strongly recommend you to use SphinxQL instead of the API
30# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
31
32
33require 'socket'
34
35module Sphinx
36  # :stopdoc:
37
38  class SphinxError < StandardError; end
39  class SphinxArgumentError < SphinxError; end
40  class SphinxConnectError < SphinxError; end
41  class SphinxResponseError < SphinxError; end
42  class SphinxInternalError < SphinxError; end
43  class SphinxTemporaryError < SphinxError; end
44  class SphinxUnknownError < SphinxError; end
45
46  # :startdoc:
47
48  class Client
49
50    # :stopdoc:
51
52    # Known searchd commands
53
54    # search command
55    SEARCHD_COMMAND_SEARCH   = 0
56    # excerpt command
57    SEARCHD_COMMAND_EXCERPT  = 1
58    # update command
59    SEARCHD_COMMAND_UPDATE   = 2
60    # keywords command
61    SEARCHD_COMMAND_KEYWORDS = 3
62
63    # Current client-side command implementation versions
64
65    # search command version
66    VER_COMMAND_SEARCH   = 0x119
67    # excerpt command version
68    VER_COMMAND_EXCERPT  = 0x102
69    # update command version
70    VER_COMMAND_UPDATE   = 0x103
71    # keywords command version
72    VER_COMMAND_KEYWORDS = 0x100
73
74    # Known searchd status codes
75
76    # general success, command-specific reply follows
77    SEARCHD_OK      = 0
78    # general failure, command-specific reply may follow
79    SEARCHD_ERROR   = 1
80    # temporaty failure, client should retry later
81    SEARCHD_RETRY   = 2
82    # general success, warning message and command-specific reply follow
83    SEARCHD_WARNING = 3
84
85    # :startdoc:
86
87    # Known match modes
88
89    # match all query words
90    SPH_MATCH_ALL       = 0
91    # match any query word
92    SPH_MATCH_ANY       = 1
93    # match this exact phrase
94    SPH_MATCH_PHRASE    = 2
95    # match this boolean query
96    SPH_MATCH_BOOLEAN   = 3
97    # match this extended query
98    SPH_MATCH_EXTENDED  = 4
99    # match all document IDs w/o fulltext query, apply filters
100    SPH_MATCH_FULLSCAN  = 5
101    # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
102    SPH_MATCH_EXTENDED2 = 6
103
104    # Known ranking modes (ext2 only)
105
106    # default mode, phrase proximity major factor and BM25 minor one
107    SPH_RANK_PROXIMITY_BM25 = 0
108    # statistical mode, BM25 ranking only (faster but worse quality)
109    SPH_RANK_BM25           = 1
110    # no ranking, all matches get a weight of 1
111    SPH_RANK_NONE           = 2
112    # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
113    SPH_RANK_WORDCOUNT      = 3
114    # phrase proximity
115    SPH_RANK_PROXIMITY      = 4
116    SPH_RANK_MATCHANY       = 5
117    SPH_RANK_FIELDMASK      = 6
118    SPH_RANK_SPH04          = 7
119    SPH_RANK_EXPR           = 8
120
121    # Known sort modes
122
123    # sort by document relevance desc, then by date
124    SPH_SORT_RELEVANCE     = 0
125    # sort by document date desc, then by relevance desc
126    SPH_SORT_ATTR_DESC     = 1
127    # sort by document date asc, then by relevance desc
128    SPH_SORT_ATTR_ASC      = 2
129    # sort by time segments (hour/day/week/etc) desc, then by relevance desc
130    SPH_SORT_TIME_SEGMENTS = 3
131    # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
132    SPH_SORT_EXTENDED      = 4
133    # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
134    SPH_SORT_EXPR          = 5
135
136    # Known filter types
137
138    # filter by integer values set
139    SPH_FILTER_VALUES      = 0
140    # filter by integer range
141    SPH_FILTER_RANGE       = 1
142    # filter by float range
143    SPH_FILTER_FLOATRANGE  = 2
144
145    # Known attribute types
146
147    # this attr is just an integer
148    SPH_ATTR_INTEGER   = 1
149    # this attr is a timestamp
150    SPH_ATTR_TIMESTAMP = 2
151    # this attr is an ordinal string number (integer at search time,
152    # specially handled at indexing time)
153    SPH_ATTR_ORDINAL   = 3
154    # this attr is a boolean bit field
155    SPH_ATTR_BOOL      = 4
156    # this attr is a float
157    SPH_ATTR_FLOAT     = 5
158    # signed 64-bit integer
159    SPH_ATTR_BIGINT    = 6
160	# string
161	SPH_ATTR_STRING		= 7
162    # this attr has multiple values (0 or more)
163	SPH_ATTR_MULTI			= 0x40000001
164	SPH_ATTR_MULTI64		= 0x40000002
165
166    # Known grouping functions
167
168    # group by day
169    SPH_GROUPBY_DAY      = 0
170    # group by week
171    SPH_GROUPBY_WEEK     = 1
172    # group by month
173    SPH_GROUPBY_MONTH    = 2
174    # group by year
175    SPH_GROUPBY_YEAR     = 3
176    # group by attribute value
177    SPH_GROUPBY_ATTR     = 4
178    # group by sequential attrs pair
179    SPH_GROUPBY_ATTRPAIR = 5
180
181    # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
182    def initialize
183      # per-client-object settings
184      @host          = 'localhost'             # searchd host (default is "localhost")
185      @port          = 9312                    # searchd port (default is 9312)
186
187      # per-query settings
188      @offset        = 0                       # how many records to seek from result-set start (default is 0)
189      @limit         = 20                      # how many records to return from result-set starting at offset (default is 20)
190      @mode          = SPH_MATCH_EXTENDED2     # query matching mode (default is SPH_MATCH_EXTENDED2)
191      @weights       = []                      # per-field weights (default is 1 for all fields)
192      @sort          = SPH_SORT_RELEVANCE      # match sorting mode (default is SPH_SORT_RELEVANCE)
193      @sortby        = ''                      # attribute to sort by (defualt is "")
194      @min_id        = 0                       # min ID to match (default is 0, which means no limit)
195      @max_id        = 0                       # max ID to match (default is 0, which means no limit)
196      @filters       = []                      # search filters
197      @groupby       = ''                      # group-by attribute name
198      @groupfunc     = SPH_GROUPBY_DAY         # function to pre-process group-by attribute value with
199      @groupsort     = '@group desc'           # group-by sorting clause (to sort groups in result set with)
200      @groupdistinct = ''                      # group-by count-distinct attribute
201      @maxmatches    = 1000                    # max matches to retrieve
202      @cutoff        = 0                       # cutoff to stop searching at (default is 0)
203      @retrycount    = 0                       # distributed retries count
204      @retrydelay    = 0                       # distributed retries delay
205      @anchor        = []                      # geographical anchor point
206      @indexweights  = []                      # per-index weights
207      @ranker        = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
208      @rankexpr	     = ''                      # ranker expression for SPH_RANK_EXPR
209      @maxquerytime  = 0                       # max query time, milliseconds (default is 0, do not limit)
210      @fieldweights  = {}                      # per-field-name weights
211      @overrides     = []                      # per-query attribute values overrides
212      @select        = '*'                     # select-list (attributes or expressions, with optional aliases)
213
214      # per-reply fields (for single-query case)
215      @error         = ''                      # last error message
216      @warning       = ''                      # last warning message
217
218      @reqs          = []                      # requests storage (for multi-query case)
219      @mbenc         = ''                      # stored mbstring encoding
220    end
221
222    # Get last error message.
223    def GetLastError
224      @error
225    end
226
227    # Get last warning message.
228    def GetLastWarning
229      @warning
230    end
231
232    # Set searchd host name (string) and port (integer).
233    def SetServer(host, port)
234      assert { host.instance_of? String }
235      assert { port.instance_of? Fixnum }
236
237      @host = host
238      @port = port
239    end
240
241    # Set offset and count into result set,
242    # and optionally set max-matches and cutoff limits.
243    def SetLimits(offset, limit, max = 0, cutoff = 0)
244      assert { offset.instance_of? Fixnum }
245      assert { limit.instance_of? Fixnum }
246      assert { max.instance_of? Fixnum }
247      assert { offset >= 0 }
248      assert { limit > 0 }
249      assert { max >= 0 }
250
251      @offset = offset
252      @limit = limit
253      @maxmatches = max if max > 0
254      @cutoff = cutoff if cutoff > 0
255    end
256
257    # Set maximum query time, in milliseconds, per-index,
258    # integer, 0 means "do not limit"
259    def SetMaxQueryTime(max)
260      assert { max.instance_of? Fixnum }
261      assert { max >= 0 }
262      @maxquerytime = max
263    end
264
265    # Set matching mode. DEPRECATED
266    def SetMatchMode(mode)
267      $stderr.puts "DEPRECATED: Do not call this method or, even better, use SphinxQL instead of an API\n"
268      assert { mode == SPH_MATCH_ALL \
269            || mode == SPH_MATCH_ANY \
270            || mode == SPH_MATCH_PHRASE \
271            || mode == SPH_MATCH_BOOLEAN \
272            || mode == SPH_MATCH_EXTENDED \
273            || mode == SPH_MATCH_FULLSCAN \
274            || mode == SPH_MATCH_EXTENDED2 }
275
276      @mode = mode
277    end
278
279    # Set ranking mode.
280    def SetRankingMode(ranker, rankexpr = '')
281      assert { ranker == SPH_RANK_PROXIMITY_BM25 \
282            || ranker == SPH_RANK_BM25 \
283            || ranker == SPH_RANK_NONE \
284            || ranker == SPH_RANK_WORDCOUNT \
285            || ranker == SPH_RANK_PROXIMITY \
286            || ranker == SPH_RANK_MATCHANY \
287            || ranker == SPH_RANK_FIELDMASK \
288            || ranker == SPH_RANK_SPH04 \
289            || ranker == SPH_RANK_EXPR }
290
291      @ranker = ranker
292      @rankexpr = rankexpr
293    end
294
295    # Set matches sorting mode.
296    def SetSortMode(mode, sortby = '')
297      assert { mode == SPH_SORT_RELEVANCE \
298            || mode == SPH_SORT_ATTR_DESC \
299            || mode == SPH_SORT_ATTR_ASC \
300            || mode == SPH_SORT_TIME_SEGMENTS \
301            || mode == SPH_SORT_EXTENDED \
302            || mode == SPH_SORT_EXPR }
303      assert { sortby.instance_of? String }
304      assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
305
306      @sort = mode
307      @sortby = sortby
308    end
309
310    # Bind per-field weights by order.
311    #
312    # DEPRECATED; use SetFieldWeights() instead.
313    def SetWeights(weights)
314      assert { weights.instance_of? Array }
315      weights.each do |weight|
316        assert { weight.instance_of? Fixnum }
317      end
318
319      @weights = weights
320    end
321
322    # Bind per-field weights by name.
323    #
324    # Takes string (field name) to integer name (field weight) hash as an argument.
325    # * Takes precedence over SetWeights().
326    # * Unknown names will be silently ignored.
327    # * Unbound fields will be silently given a weight of 1.
328    def SetFieldWeights(weights)
329      assert { weights.instance_of? Hash }
330      weights.each do |name, weight|
331        assert { name.instance_of? String }
332        assert { weight.instance_of? Fixnum }
333      end
334
335      @fieldweights = weights
336    end
337
338    # Bind per-index weights by name.
339    def SetIndexWeights(weights)
340      assert { weights.instance_of? Hash }
341      weights.each do |index, weight|
342        assert { index.instance_of? String }
343        assert { weight.instance_of? Fixnum }
344      end
345
346      @indexweights = weights
347    end
348
349    # Set IDs range to match.
350    #
351    # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
352    def SetIDRange(min, max)
353      assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
354      assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
355      assert { min <= max }
356
357      @min_id = min
358      @max_id = max
359    end
360
361    # Set values filter.
362    #
363    # Only match those records where <tt>attribute</tt> column values
364    # are in specified set.
365    def SetFilter(attribute, values, exclude = false)
366      assert { attribute.instance_of? String }
367      assert { values.instance_of? Array }
368      assert { !values.empty? }
369
370      if values.instance_of?(Array) && values.size > 0
371        values.each do |value|
372          assert { value.instance_of? Fixnum }
373        end
374
375        @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
376      end
377    end
378
379    # Set range filter.
380    #
381    # Only match those records where <tt>attribute</tt> column value
382    # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
383    def SetFilterRange(attribute, min, max, exclude = false)
384      assert { attribute.instance_of? String }
385      assert { min.instance_of? Fixnum or min.instance_of? Bignum }
386      assert { max.instance_of? Fixnum or max.instance_of? Bignum }
387      assert { min <= max }
388
389      @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
390    end
391
392    # Set float range filter.
393    #
394    # Only match those records where <tt>attribute</tt> column value
395    # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
396    def SetFilterFloatRange(attribute, min, max, exclude = false)
397      assert { attribute.instance_of? String }
398      assert { min.instance_of? Float }
399      assert { max.instance_of? Float }
400      assert { min <= max }
401
402      @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
403    end
404
405    # Setup anchor point for geosphere distance calculations.
406    #
407    # Required to use <tt>@geodist</tt> in filters and sorting
408    # distance will be computed to this point. Latitude and longitude
409    # must be in radians.
410    #
411    # * <tt>attrlat</tt> -- is the name of latitude attribute
412    # * <tt>attrlong</tt> -- is the name of longitude attribute
413    # * <tt>lat</tt> -- is anchor point latitude, in radians
414    # * <tt>long</tt> -- is anchor point longitude, in radians
415    def SetGeoAnchor(attrlat, attrlong, lat, long)
416      assert { attrlat.instance_of? String }
417      assert { attrlong.instance_of? String }
418      assert { lat.instance_of? Float }
419      assert { long.instance_of? Float }
420
421      @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
422    end
423
424    # Set grouping attribute and function.
425    #
426    # In grouping mode, all matches are assigned to different groups
427    # based on grouping function value.
428    #
429    # Each group keeps track of the total match count, and the best match
430    # (in this group) according to current sorting function.
431    #
432    # The final result set contains one best match per group, with
433    # grouping function value and matches count attached.
434    #
435    # Groups in result set could be sorted by any sorting clause,
436    # including both document attributes and the following special
437    # internal Sphinx attributes:
438    #
439    # * @id - match document ID;
440    # * @weight, @rank, @relevance -  match weight;
441    # * @group - groupby function value;
442    # * @count - amount of matches in group.
443    #
444    # the default mode is to sort by groupby value in descending order,
445    # ie. by '@group desc'.
446    #
447    # 'total_found' would contain total amount of matching groups over
448    # the whole index.
449    #
450    # WARNING: grouping is done in fixed memory and thus its results
451    # are only approximate; so there might be more groups reported
452    # in total_found than actually present. @count might also
453    # be underestimated.
454    #
455    # For example, if sorting by relevance and grouping by "published"
456    # attribute with SPH_GROUPBY_DAY function, then the result set will
457    # contain one most relevant match per each day when there were any
458    # matches published, with day number and per-day match count attached,
459    # and sorted by day number in descending order (ie. recent days first).
460    def SetGroupBy(attribute, func, groupsort = '@group desc')
461      assert { attribute.instance_of? String }
462      assert { groupsort.instance_of? String }
463      assert { func == SPH_GROUPBY_DAY \
464            || func == SPH_GROUPBY_WEEK \
465            || func == SPH_GROUPBY_MONTH \
466            || func == SPH_GROUPBY_YEAR \
467            || func == SPH_GROUPBY_ATTR \
468            || func == SPH_GROUPBY_ATTRPAIR }
469
470      @groupby = attribute
471      @groupfunc = func
472      @groupsort = groupsort
473    end
474
475    # Set count-distinct attribute for group-by queries.
476    def SetGroupDistinct(attribute)
477      assert { attribute.instance_of? String }
478      @groupdistinct = attribute
479    end
480
481    # Set distributed retries count and delay.
482    def SetRetries(count, delay = 0)
483      assert { count.instance_of? Fixnum }
484      assert { delay.instance_of? Fixnum }
485
486      @retrycount = count
487      @retrydelay = delay
488    end
489
490    # DEPRECATED: Set attribute values override
491    #
492	  # There can be only one override per attribute.
493	  # +values+ must be a hash that maps document IDs to attribute values.
494	  def SetOverride(attrname, attrtype, values)
495      $stderr.puts "DEPRECATED: Do not call this method. Use SphinxQL REMAP() function instead.\n"
496      assert { attrname.instance_of? String }
497      assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
498      assert { values.instance_of? Hash }
499
500      @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
501	  end
502
503    # Set select-list (attributes or expressions), SQL-like syntax.
504    def SetSelect(select)
505		  assert { select.instance_of? String }
506		  @select = select
507		end
508
509    # Clear all filters (for multi-queries).
510    def ResetFilters
511      @filters = []
512      @anchor = []
513    end
514
515    # Clear groupby settings (for multi-queries).
516    def ResetGroupBy
517      @groupby       = ''
518      @groupfunc     = SPH_GROUPBY_DAY
519      @groupsort     = '@group desc'
520      @groupdistinct = ''
521    end
522
523    # Clear all attribute value overrides (for multi-queries).
524    def ResetOverrides
525      @overrides = []
526    end
527
528    # Connect to searchd server and run given search query.
529    #
530    # <tt>query</tt> is query string
531
532    # <tt>index</tt> is index name (or names) to query. default value is "*" which means
533    # to query all indexes. Accepted characters for index names are letters, numbers,
534    # dash, and underscore; everything else is considered a separator. Therefore,
535    # all the following calls are valid and will search two indexes:
536    #
537    #   sphinx.Query('test query', 'main delta')
538    #   sphinx.Query('test query', 'main;delta')
539    #   sphinx.Query('test query', 'main, delta')
540    #
541    # Index order matters. If identical IDs are found in two or more indexes,
542    # weight and attribute values from the very last matching index will be used
543    # for sorting and returning to client. Therefore, in the example above,
544    # matches from "delta" index will always "win" over matches from "main".
545    #
546    # Returns false on failure.
547    # Returns hash which has the following keys on success:
548    #
549    # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
550    # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
551    # * <tt>'total_found'</tt> -- total amount of matching documents in index
552    # * <tt>'time'</tt> -- search time
553    # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
554    def Query(query, index = '*', comment = '')
555      assert { @reqs.empty? }
556      @reqs = []
557
558      self.AddQuery(query, index, comment)
559      results = self.RunQueries
560
561      # probably network error; error message should be already filled
562      return false unless results.instance_of?(Array)
563
564      @error = results[0]['error']
565      @warning = results[0]['warning']
566
567      return false if results[0]['status'] == SEARCHD_ERROR
568      return results[0]
569    end
570
571    # Add query to batch.
572    #
573    # Batch queries enable searchd to perform internal optimizations,
574    # if possible; and reduce network connection overheads in all cases.
575    #
576    # For instance, running exactly the same query with different
577    # groupby settings will enable searched to perform expensive
578    # full-text search and ranking operation only once, but compute
579    # multiple groupby results from its output.
580    #
581    # Parameters are exactly the same as in <tt>Query</tt> call.
582    # Returns index to results array returned by <tt>RunQueries</tt> call.
583    def AddQuery(query, index = '*', comment = '')
584      # build request
585
586      # mode and limits
587      request = Request.new
588      request.put_int @offset, @limit, @mode, @ranker
589      # process the 'expr' ranker
590      if @ranker == SPH_RANK_EXPR
591        request.put_string @rankexpr
592      end
593
594      request.put_int @sort
595
596      request.put_string @sortby
597      # query itself
598      request.put_string query
599      # weights
600      request.put_int_array @weights
601      # indexes
602      request.put_string index
603      # id64 range marker
604      request.put_int 1
605      # id64 range
606      request.put_int64 @min_id.to_i, @max_id.to_i
607
608      # filters
609      request.put_int @filters.length
610      @filters.each do |filter|
611        request.put_string filter['attr']
612        request.put_int filter['type']
613
614        case filter['type']
615          when SPH_FILTER_VALUES
616            request.put_int64_array filter['values']
617          when SPH_FILTER_RANGE
618            request.put_int64 filter['min'], filter['max']
619          when SPH_FILTER_FLOATRANGE
620            request.put_float filter['min'], filter['max']
621          else
622            raise SphinxInternalError, 'Internal error: unhandled filter type'
623        end
624        request.put_int filter['exclude'] ? 1 : 0
625      end
626
627      # group-by clause, max-matches count, group-sort clause, cutoff count
628      request.put_int @groupfunc
629      request.put_string @groupby
630      request.put_int @maxmatches
631      request.put_string @groupsort
632      request.put_int @cutoff, @retrycount, @retrydelay
633      request.put_string @groupdistinct
634
635      # anchor point
636      if @anchor.empty?
637        request.put_int 0
638      else
639        request.put_int 1
640        request.put_string @anchor['attrlat'], @anchor['attrlong']
641        request.put_float @anchor['lat'], @anchor['long']
642      end
643
644      # per-index weights
645      request.put_int @indexweights.length
646      @indexweights.each do |idx, weight|
647        request.put_string idx
648        request.put_int weight
649      end
650
651      # max query time
652      request.put_int @maxquerytime
653
654      # per-field weights
655      request.put_int @fieldweights.length
656      @fieldweights.each do |field, weight|
657        request.put_string field
658        request.put_int weight
659      end
660
661      # comment
662      request.put_string comment
663
664      # attribute overrides
665      request.put_int @overrides.length
666      for entry in @overrides do
667        request.put_string entry['attr']
668        request.put_int entry['type'], entry['values'].size
669        entry['values'].each do |id, val|
670          assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
671          assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
672
673          request.put_int64 id
674          case entry['type']
675            when SPH_ATTR_FLOAT
676              request.put_float val
677            when SPH_ATTR_BIGINT
678              request.put_int64 val
679            else
680              request.put_int val
681          end
682        end
683      end
684
685      # select-list
686      request.put_string @select
687
688      # store request to requests array
689      @reqs << request.to_s;
690      return @reqs.length - 1
691    end
692
693    # Run queries batch.
694    #
695    # Returns an array of result sets on success.
696    # Returns false on network IO failure.
697    #
698    # Each result set in returned array is a hash which containts
699    # the same keys as the hash returned by <tt>Query</tt>, plus:
700    #
701    # * <tt>'error'</tt> -- search error for this query
702    # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
703    def RunQueries
704      if @reqs.empty?
705        @error = 'No queries defined, issue AddQuery() first'
706        return false
707      end
708
709      req = @reqs.join('')
710      nreqs = @reqs.length
711      @reqs = []
712      response = PerformRequest(:search, req, nreqs)
713
714      # parse response
715      begin
716        results = []
717        ires = 0
718        while ires < nreqs
719          ires += 1
720          result = {}
721
722          result['error'] = ''
723          result['warning'] = ''
724
725          # extract status
726          status = result['status'] = response.get_int
727          if status != SEARCHD_OK
728            message = response.get_string
729            if status == SEARCHD_WARNING
730              result['warning'] = message
731            else
732              result['error'] = message
733              results << result
734              next
735            end
736          end
737
738          # read schema
739          fields = []
740          attrs = {}
741          attrs_names_in_order = []
742
743          nfields = response.get_int
744          while nfields > 0
745            nfields -= 1
746            fields << response.get_string
747          end
748          result['fields'] = fields
749
750          nattrs = response.get_int
751          while nattrs > 0
752            nattrs -= 1
753            attr = response.get_string
754            type = response.get_int
755            attrs[attr] = type
756            attrs_names_in_order << attr
757          end
758          result['attrs'] = attrs
759
760          # read match count
761          count = response.get_int
762          id64 = response.get_int
763
764          # read matches
765          result['matches'] = []
766          while count > 0
767            count -= 1
768
769            if id64 != 0
770              doc = response.get_int64
771              weight = response.get_int
772            else
773              doc, weight = response.get_ints(2)
774            end
775
776            r = {} # This is a single result put in the result['matches'] array
777            r['id'] = doc
778            r['weight'] = weight
779            attrs_names_in_order.each do |a|
780              r['attrs'] ||= {}
781
782              case attrs[a]
783                when SPH_ATTR_BIGINT
784                  # handle 64-bit ints
785                  r['attrs'][a] = response.get_int64
786                when SPH_ATTR_FLOAT
787                  # handle floats
788                  r['attrs'][a] = response.get_float
789				when SPH_ATTR_STRING
790				  # handle string
791				  r['attrs'][a] = response.get_string
792                else
793                  # handle everything else as unsigned ints
794                  val = response.get_int
795                  if attrs[a]==SPH_ATTR_MULTI
796                    r['attrs'][a] = []
797                    1.upto(val) do
798                      r['attrs'][a] << response.get_int
799                    end
800                  elsif attrs[a]==SPH_ATTR_MULTI64
801                    r['attrs'][a] = []
802					val = val/2
803                    1.upto(val) do
804                      r['attrs'][a] << response.get_int64
805                    end
806                  else
807                    r['attrs'][a] = val
808                  end
809              end
810            end
811            result['matches'] << r
812          end
813          result['total'], result['total_found'], msecs, words = response.get_ints(4)
814          result['time'] = '%.3f' % (msecs / 1000.0)
815
816          result['words'] = {}
817          while words > 0
818            words -= 1
819            word = response.get_string
820            docs, hits = response.get_ints(2)
821            result['words'][word] = { 'docs' => docs, 'hits' => hits }
822          end
823
824          results << result
825        end
826      #rescue EOFError
827      #  @error = 'incomplete reply'
828      #  raise SphinxResponseError, @error
829      end
830
831      return results
832    end
833
834    # Connect to searchd server and generate exceprts from given documents.
835    #
836    # * <tt>docs</tt> -- an array of strings which represent the documents' contents
837    # * <tt>index</tt> -- a string specifiying the index which settings will be used
838    # for stemming, lexing and case folding
839    # * <tt>words</tt> -- a string which contains the words to highlight
840    # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
841    #
842    # You can use following parameters:
843    # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
844    # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
845    # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
846    # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
847    # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
848    # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
849    # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
850    # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
851    # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
852    #
853    # Returns false on failure.
854    # Returns an array of string excerpts on success.
855    def BuildExcerpts(docs, index, words, opts = {})
856      assert { docs.instance_of? Array }
857      assert { index.instance_of? String }
858      assert { words.instance_of? String }
859      assert { opts.instance_of? Hash }
860
861      # fixup options
862      opts['before_match'] ||= '<b>';
863      opts['after_match'] ||= '</b>';
864      opts['chunk_separator'] ||= ' ... ';
865	  opts['html_strip_mode'] ||= 'index';
866      opts['limit'] ||= 256;
867	  opts['limit_passages'] ||= 0;
868	  opts['limit_words'] ||= 0;
869      opts['around'] ||= 5;
870	  opts['start_passage_id'] ||= 1;
871      opts['exact_phrase'] ||= false
872      opts['single_passage'] ||= false
873      opts['use_boundaries'] ||= false
874      opts['weight_order'] ||= false
875	  opts['load_files'] ||= false
876	  opts['allow_empty'] ||= false
877
878      # build request
879
880      # v.1.0 req
881      flags = 1
882      flags |= 2  if opts['exact_phrase']
883      flags |= 4  if opts['single_passage']
884      flags |= 8  if opts['use_boundaries']
885      flags |= 16 if opts['weight_order']
886	  flags |= 32 if opts['query_mode']
887	  flags |= 64 if opts['force_all_words']
888	  flags |= 128 if opts['load_files']
889	  flags |= 256 if opts['allow_empty']
890
891      request = Request.new
892      request.put_int 0, flags # mode=0, flags=1 (remove spaces)
893      # req index
894      request.put_string index
895      # req words
896      request.put_string words
897
898      # options
899      request.put_string opts['before_match']
900      request.put_string opts['after_match']
901      request.put_string opts['chunk_separator']
902      request.put_int opts['limit'].to_i, opts['around'].to_i
903
904	  # options v1.2
905	  request.put_int opts['limit_passages'].to_i
906	  request.put_int opts['limit_words'].to_i
907	  request.put_int opts['start_passage_id'].to_i
908	  request.put_string opts['html_strip_mode']
909
910      # documents
911      request.put_int docs.size
912      docs.each do |doc|
913        assert { doc.instance_of? String }
914
915        request.put_string doc
916      end
917
918      response = PerformRequest(:excerpt, request)
919
920      # parse response
921      begin
922        res = []
923        docs.each do |doc|
924          res << response.get_string
925        end
926      rescue EOFError
927        @error = 'incomplete reply'
928        raise SphinxResponseError, @error
929      end
930      return res
931    end
932
933    # Connect to searchd server, and generate keyword list for a given query.
934    #
935    # Returns an array of words on success.
936    def BuildKeywords(query, index, hits)
937      assert { query.instance_of? String }
938      assert { index.instance_of? String }
939      assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
940
941      # build request
942      request = Request.new
943      # v.1.0 req
944      request.put_string query # req query
945      request.put_string index # req index
946      request.put_int hits ? 1 : 0
947
948      response = PerformRequest(:keywords, request)
949
950      # parse response
951      begin
952        res = []
953        nwords = response.get_int
954        0.upto(nwords - 1) do |i|
955          tokenized = response.get_string
956          normalized = response.get_string
957
958          entry = { 'tokenized' => tokenized, 'normalized' => normalized }
959          entry['docs'], entry['hits'] = response.get_ints(2) if hits
960
961          res << entry
962        end
963      rescue EOFError
964        @error = 'incomplete reply'
965        raise SphinxResponseError, @error
966      end
967
968      return res
969    end
970
971    # Batch update given attributes in given rows in given indexes.
972    #
973    # * +index+ is a name of the index to be updated
974    # * +attrs+ is an array of attribute name strings.
975    # * +values+ is a hash where key is document id, and value is an array of
976    # * +mva+ identifies whether update MVA
977    # new attribute values
978    # * +ignoreexistent+ identifies whether silently ignore updating of non-existent columns
979    #
980    # Returns number of actually updated documents (0 or more) on success.
981    # Returns -1 on failure.
982    #
983    # Usage example:
984    #    sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
985    def UpdateAttributes(index, attrs, values, mva = false, ignoreexistent = false )
986      # verify everything
987      assert { index.instance_of? String }
988      assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
989      assert { ignoreexistent.instance_of?(TrueClass) || ignoreexistent.instance_of?(FalseClass) }
990
991      assert { attrs.instance_of? Array }
992      attrs.each do |attr|
993        assert { attr.instance_of? String }
994      end
995
996      assert { values.instance_of? Hash }
997      values.each do |id, entry|
998        assert { id.instance_of? Fixnum }
999        assert { entry.instance_of? Array }
1000        assert { entry.length == attrs.length }
1001        entry.each do |v|
1002          if mva
1003            assert { v.instance_of? Array }
1004            v.each { |vv| assert { vv.instance_of? Fixnum } }
1005          else
1006            assert { v.instance_of? Fixnum }
1007          end
1008        end
1009      end
1010
1011      # build request
1012      request = Request.new
1013      request.put_string index
1014
1015      request.put_int attrs.length
1016      request.put_int ignoreexistent ? 1 : 0
1017      for attr in attrs
1018        request.put_string attr
1019        request.put_int mva ? 1 : 0
1020      end
1021
1022      request.put_int values.length
1023      values.each do |id, entry|
1024        request.put_int64 id
1025        if mva
1026          entry.each { |v| request.put_int_array v }
1027        else
1028          request.put_int(*entry)
1029        end
1030      end
1031
1032      response = PerformRequest(:update, request)
1033
1034      # parse response
1035      begin
1036        return response.get_int
1037      rescue EOFError
1038        @error = 'incomplete reply'
1039        raise SphinxResponseError, @error
1040      end
1041    end
1042
1043    protected
1044
1045      # Connect to searchd server.
1046      def Connect
1047        begin
1048          if @host[0,1]=='/'
1049            sock = UNIXSocket.new(@host)
1050          else
1051            sock = TCPSocket.new(@host, @port)
1052          end
1053        rescue => err
1054          @error = "connection to #{@host}:#{@port} failed (error=#{err})"
1055          raise SphinxConnectError, @error
1056        end
1057
1058        v = sock.recv(4).unpack('N*').first
1059        if v < 1
1060          sock.close
1061          @error = "expected searchd protocol version 1+, got version '#{v}'"
1062          raise SphinxConnectError, @error
1063        end
1064
1065        sock.send([1].pack('N'), 0)
1066        sock
1067      end
1068
1069      # Get and check response packet from searchd server.
1070      def GetResponse(sock, client_version)
1071        response = ''
1072        len = 0
1073
1074        header = sock.recv(8)
1075        if header.length == 8
1076          status, ver, len = header.unpack('n2N')
1077          left = len.to_i
1078          while left > 0 do
1079            begin
1080              chunk = sock.recv(left)
1081              if chunk
1082                response << chunk
1083                left -= chunk.length
1084              end
1085            rescue EOFError
1086              break
1087            end
1088          end
1089        end
1090        sock.close
1091
1092        # check response
1093        read = response.length
1094        if response.empty? or read != len.to_i
1095          @error = response.empty? \
1096            ? 'received zero-sized searchd response' \
1097            : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
1098          raise SphinxResponseError, @error
1099        end
1100
1101        # check status
1102        if (status == SEARCHD_WARNING)
1103          wlen = response[0, 4].unpack('N*').first
1104          @warning = response[4, wlen]
1105          return response[4 + wlen, response.length - 4 - wlen]
1106        end
1107
1108        if status == SEARCHD_ERROR
1109          @error = 'searchd error: ' + response[4, response.length - 4]
1110          raise SphinxInternalError, @error
1111        end
1112
1113        if status == SEARCHD_RETRY
1114          @error = 'temporary searchd error: ' + response[4, response.length - 4]
1115          raise SphinxTemporaryError, @error
1116        end
1117
1118        unless status == SEARCHD_OK
1119          @error = "unknown status code: '#{status}'"
1120          raise SphinxUnknownError, @error
1121        end
1122
1123        # check version
1124        if ver < client_version
1125          @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
1126            "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
1127        end
1128
1129        return response
1130      end
1131
1132      # Connect, send query, get response.
1133      def PerformRequest(command, request, additional = nil)
1134        cmd = command.to_s.upcase
1135        command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1136        command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1137
1138        sock = self.Connect
1139        len = request.to_s.length + (additional != nil ? 8 : 0)
1140        header = [command_id, command_ver, len].pack('nnN')
1141        header << [0, additional].pack('NN') if additional != nil
1142        sock.send(header + request.to_s, 0)
1143        response = self.GetResponse(sock, command_ver)
1144        return Response.new(response)
1145      end
1146
1147      # :stopdoc:
1148      def assert
1149        raise 'Assertion failed!' unless yield if $DEBUG
1150      end
1151      # :startdoc:
1152  end
1153end
1154