1# = client.rb - Sphinx Client API
2#
3# Author::    Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
5# License::   Distributes under the same terms as Ruby
6# Version::   0.9.9-r1299
7# Website::   http://kpumuk.info/projects/ror-plugins/sphinx
8#
9# This library is distributed under the terms of the Ruby license.
10# You can freely distribute/modify this library.
11
12# ==Sphinx Client API
13#
14# The Sphinx Client API is used to communicate with <tt>searchd</tt>
15# daemon and get search results from Sphinx.
16#
17# ===Usage
18#
19#   sphinx = Sphinx::Client.new
20#   result = sphinx.Query('test')
21#   ids = result['matches'].map { |match| match['id'] }.join(',')
22#   posts = Post.find :all, :conditions => "id IN (#{ids})"
23#
24#   docs = posts.map(&:body)
25#   excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
27require 'socket'
28
29module Sphinx
30  # :stopdoc:
31
32  class SphinxError < StandardError; end
33  class SphinxArgumentError < SphinxError; end
34  class SphinxConnectError < SphinxError; end
35  class SphinxResponseError < SphinxError; end
36  class SphinxInternalError < SphinxError; end
37  class SphinxTemporaryError < SphinxError; end
38  class SphinxUnknownError < SphinxError; end
39
40  # :startdoc:
41
42  class Client
43
44    # :stopdoc:
45
46    # Known searchd commands
47
48    # search command
49    SEARCHD_COMMAND_SEARCH   = 0
50    # excerpt command
51    SEARCHD_COMMAND_EXCERPT  = 1
52    # update command
53    SEARCHD_COMMAND_UPDATE   = 2
54    # keywords command
55    SEARCHD_COMMAND_KEYWORDS = 3
56
57    # Current client-side command implementation versions
58
59    # search command version
60    VER_COMMAND_SEARCH   = 0x119
61    # excerpt command version
62    VER_COMMAND_EXCERPT  = 0x102
63    # update command version
64    VER_COMMAND_UPDATE   = 0x102
65    # keywords command version
66    VER_COMMAND_KEYWORDS = 0x100
67
68    # Known searchd status codes
69
70    # general success, command-specific reply follows
71    SEARCHD_OK      = 0
72    # general failure, command-specific reply may follow
73    SEARCHD_ERROR   = 1
74    # temporaty failure, client should retry later
75    SEARCHD_RETRY   = 2
76    # general success, warning message and command-specific reply follow
77    SEARCHD_WARNING = 3
78
79    # :startdoc:
80
81    # Known match modes
82
83    # match all query words
84    SPH_MATCH_ALL       = 0
85    # match any query word
86    SPH_MATCH_ANY       = 1
87    # match this exact phrase
88    SPH_MATCH_PHRASE    = 2
89    # match this boolean query
90    SPH_MATCH_BOOLEAN   = 3
91    # match this extended query
92    SPH_MATCH_EXTENDED  = 4
93    # match all document IDs w/o fulltext query, apply filters
94    SPH_MATCH_FULLSCAN  = 5
95    # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
96    SPH_MATCH_EXTENDED2 = 6
97
98    # Known ranking modes (ext2 only)
99
100    # default mode, phrase proximity major factor and BM25 minor one
101    SPH_RANK_PROXIMITY_BM25 = 0
102    # statistical mode, BM25 ranking only (faster but worse quality)
103    SPH_RANK_BM25           = 1
104    # no ranking, all matches get a weight of 1
105    SPH_RANK_NONE           = 2
106    # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
107    SPH_RANK_WORDCOUNT      = 3
108    # phrase proximity
109    SPH_RANK_PROXIMITY      = 4
110    SPH_RANK_MATCHANY       = 5
111    SPH_RANK_FIELDMASK      = 6
112    SPH_RANK_SPH04          = 7
113    SPH_RANK_EXPR           = 8
114
115    # Known sort modes
116
117    # sort by document relevance desc, then by date
118    SPH_SORT_RELEVANCE     = 0
119    # sort by document date desc, then by relevance desc
120    SPH_SORT_ATTR_DESC     = 1
121    # sort by document date asc, then by relevance desc
122    SPH_SORT_ATTR_ASC      = 2
123    # sort by time segments (hour/day/week/etc) desc, then by relevance desc
124    SPH_SORT_TIME_SEGMENTS = 3
125    # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
126    SPH_SORT_EXTENDED      = 4
127    # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
128    SPH_SORT_EXPR          = 5
129
130    # Known filter types
131
132    # filter by integer values set
133    SPH_FILTER_VALUES      = 0
134    # filter by integer range
135    SPH_FILTER_RANGE       = 1
136    # filter by float range
137    SPH_FILTER_FLOATRANGE  = 2
138
139    # Known attribute types
140
141    # this attr is just an integer
142    SPH_ATTR_INTEGER   = 1
143    # this attr is a timestamp
144    SPH_ATTR_TIMESTAMP = 2
145    # this attr is an ordinal string number (integer at search time,
146    # specially handled at indexing time)
147    SPH_ATTR_ORDINAL   = 3
148    # this attr is a boolean bit field
149    SPH_ATTR_BOOL      = 4
150    # this attr is a float
151    SPH_ATTR_FLOAT     = 5
152    # signed 64-bit integer
153    SPH_ATTR_BIGINT    = 6
154	# string
155	SPH_ATTR_STRING		= 7
156    # this attr has multiple values (0 or more)
157	SPH_ATTR_MULTI			= 0x40000001
158	SPH_ATTR_MULTI64		= 0x40000002
159
160    # Known grouping functions
161
162    # group by day
163    SPH_GROUPBY_DAY      = 0
164    # group by week
165    SPH_GROUPBY_WEEK     = 1
166    # group by month
167    SPH_GROUPBY_MONTH    = 2
168    # group by year
169    SPH_GROUPBY_YEAR     = 3
170    # group by attribute value
171    SPH_GROUPBY_ATTR     = 4
172    # group by sequential attrs pair
173    SPH_GROUPBY_ATTRPAIR = 5
174
175    # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
176    def initialize
177      # per-client-object settings
178      @host          = 'localhost'             # searchd host (default is "localhost")
179      @port          = 9312                    # searchd port (default is 9312)
180
181      # per-query settings
182      @offset        = 0                       # how many records to seek from result-set start (default is 0)
183      @limit         = 20                      # how many records to return from result-set starting at offset (default is 20)
184      @mode          = SPH_MATCH_ALL           # query matching mode (default is SPH_MATCH_ALL)
185      @weights       = []                      # per-field weights (default is 1 for all fields)
186      @sort          = SPH_SORT_RELEVANCE      # match sorting mode (default is SPH_SORT_RELEVANCE)
187      @sortby        = ''                      # attribute to sort by (defualt is "")
188      @min_id        = 0                       # min ID to match (default is 0, which means no limit)
189      @max_id        = 0                       # max ID to match (default is 0, which means no limit)
190      @filters       = []                      # search filters
191      @groupby       = ''                      # group-by attribute name
192      @groupfunc     = SPH_GROUPBY_DAY         # function to pre-process group-by attribute value with
193      @groupsort     = '@group desc'           # group-by sorting clause (to sort groups in result set with)
194      @groupdistinct = ''                      # group-by count-distinct attribute
195      @maxmatches    = 1000                    # max matches to retrieve
196      @cutoff        = 0                       # cutoff to stop searching at (default is 0)
197      @retrycount    = 0                       # distributed retries count
198      @retrydelay    = 0                       # distributed retries delay
199      @anchor        = []                      # geographical anchor point
200      @indexweights  = []                      # per-index weights
201      @ranker        = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
202      @rankexpr	     = ''                      # ranker expression for SPH_RANK_EXPR
203      @maxquerytime  = 0                       # max query time, milliseconds (default is 0, do not limit)
204      @fieldweights  = {}                      # per-field-name weights
205      @overrides     = []                      # per-query attribute values overrides
206      @select        = '*'                     # select-list (attributes or expressions, with optional aliases)
207
208      # per-reply fields (for single-query case)
209      @error         = ''                      # last error message
210      @warning       = ''                      # last warning message
211
212      @reqs          = []                      # requests storage (for multi-query case)
213      @mbenc         = ''                      # stored mbstring encoding
214    end
215
216    # Get last error message.
217    def GetLastError
218      @error
219    end
220
221    # Get last warning message.
222    def GetLastWarning
223      @warning
224    end
225
226    # Set searchd host name (string) and port (integer).
227    def SetServer(host, port)
228      assert { host.instance_of? String }
229      assert { port.instance_of? Fixnum }
230
231      @host = host
232      @port = port
233    end
234
235    # Set offset and count into result set,
236    # and optionally set max-matches and cutoff limits.
237    def SetLimits(offset, limit, max = 0, cutoff = 0)
238      assert { offset.instance_of? Fixnum }
239      assert { limit.instance_of? Fixnum }
240      assert { max.instance_of? Fixnum }
241      assert { offset >= 0 }
242      assert { limit > 0 }
243      assert { max >= 0 }
244
245      @offset = offset
246      @limit = limit
247      @maxmatches = max if max > 0
248      @cutoff = cutoff if cutoff > 0
249    end
250
251    # Set maximum query time, in milliseconds, per-index,
252    # integer, 0 means "do not limit"
253    def SetMaxQueryTime(max)
254      assert { max.instance_of? Fixnum }
255      assert { max >= 0 }
256      @maxquerytime = max
257    end
258
259    # Set matching mode.
260    def SetMatchMode(mode)
261      assert { mode == SPH_MATCH_ALL \
262            || mode == SPH_MATCH_ANY \
263            || mode == SPH_MATCH_PHRASE \
264            || mode == SPH_MATCH_BOOLEAN \
265            || mode == SPH_MATCH_EXTENDED \
266            || mode == SPH_MATCH_FULLSCAN \
267            || mode == SPH_MATCH_EXTENDED2 }
268
269      @mode = mode
270    end
271
272    # Set ranking mode.
273    def SetRankingMode(ranker, rankexpr = '')
274      assert { ranker == SPH_RANK_PROXIMITY_BM25 \
275            || ranker == SPH_RANK_BM25 \
276            || ranker == SPH_RANK_NONE \
277            || ranker == SPH_RANK_WORDCOUNT \
278            || ranker == SPH_RANK_PROXIMITY \
279            || ranker == SPH_RANK_MATCHANY \
280            || ranker == SPH_RANK_FIELDMASK \
281            || ranker == SPH_RANK_SPH04 \
282            || ranker == SPH_RANK_EXPR }
283
284      @ranker = ranker
285      @rankexpr = rankexpr
286    end
287
288    # Set matches sorting mode.
289    def SetSortMode(mode, sortby = '')
290      assert { mode == SPH_SORT_RELEVANCE \
291            || mode == SPH_SORT_ATTR_DESC \
292            || mode == SPH_SORT_ATTR_ASC \
293            || mode == SPH_SORT_TIME_SEGMENTS \
294            || mode == SPH_SORT_EXTENDED \
295            || mode == SPH_SORT_EXPR }
296      assert { sortby.instance_of? String }
297      assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
298
299      @sort = mode
300      @sortby = sortby
301    end
302
303    # Bind per-field weights by order.
304    #
305    # DEPRECATED; use SetFieldWeights() instead.
306    def SetWeights(weights)
307      assert { weights.instance_of? Array }
308      weights.each do |weight|
309        assert { weight.instance_of? Fixnum }
310      end
311
312      @weights = weights
313    end
314
315    # Bind per-field weights by name.
316    #
317    # Takes string (field name) to integer name (field weight) hash as an argument.
318    # * Takes precedence over SetWeights().
319    # * Unknown names will be silently ignored.
320    # * Unbound fields will be silently given a weight of 1.
321    def SetFieldWeights(weights)
322      assert { weights.instance_of? Hash }
323      weights.each do |name, weight|
324        assert { name.instance_of? String }
325        assert { weight.instance_of? Fixnum }
326      end
327
328      @fieldweights = weights
329    end
330
331    # Bind per-index weights by name.
332    def SetIndexWeights(weights)
333      assert { weights.instance_of? Hash }
334      weights.each do |index, weight|
335        assert { index.instance_of? String }
336        assert { weight.instance_of? Fixnum }
337      end
338
339      @indexweights = weights
340    end
341
342    # Set IDs range to match.
343    #
344    # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
345    def SetIDRange(min, max)
346      assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
347      assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
348      assert { min <= max }
349
350      @min_id = min
351      @max_id = max
352    end
353
354    # Set values filter.
355    #
356    # Only match those records where <tt>attribute</tt> column values
357    # are in specified set.
358    def SetFilter(attribute, values, exclude = false)
359      assert { attribute.instance_of? String }
360      assert { values.instance_of? Array }
361      assert { !values.empty? }
362
363      if values.instance_of?(Array) && values.size > 0
364        values.each do |value|
365          assert { value.instance_of? Fixnum }
366        end
367
368        @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
369      end
370    end
371
372    # Set range filter.
373    #
374    # Only match those records where <tt>attribute</tt> column value
375    # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
376    def SetFilterRange(attribute, min, max, exclude = false)
377      assert { attribute.instance_of? String }
378      assert { min.instance_of? Fixnum or min.instance_of? Bignum }
379      assert { max.instance_of? Fixnum or max.instance_of? Bignum }
380      assert { min <= max }
381
382      @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
383    end
384
385    # Set float range filter.
386    #
387    # Only match those records where <tt>attribute</tt> column value
388    # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
389    def SetFilterFloatRange(attribute, min, max, exclude = false)
390      assert { attribute.instance_of? String }
391      assert { min.instance_of? Float }
392      assert { max.instance_of? Float }
393      assert { min <= max }
394
395      @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
396    end
397
398    # Setup anchor point for geosphere distance calculations.
399    #
400    # Required to use <tt>@geodist</tt> in filters and sorting
401    # distance will be computed to this point. Latitude and longitude
402    # must be in radians.
403    #
404    # * <tt>attrlat</tt> -- is the name of latitude attribute
405    # * <tt>attrlong</tt> -- is the name of longitude attribute
406    # * <tt>lat</tt> -- is anchor point latitude, in radians
407    # * <tt>long</tt> -- is anchor point longitude, in radians
408    def SetGeoAnchor(attrlat, attrlong, lat, long)
409      assert { attrlat.instance_of? String }
410      assert { attrlong.instance_of? String }
411      assert { lat.instance_of? Float }
412      assert { long.instance_of? Float }
413
414      @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
415    end
416
417    # Set grouping attribute and function.
418    #
419    # In grouping mode, all matches are assigned to different groups
420    # based on grouping function value.
421    #
422    # Each group keeps track of the total match count, and the best match
423    # (in this group) according to current sorting function.
424    #
425    # The final result set contains one best match per group, with
426    # grouping function value and matches count attached.
427    #
428    # Groups in result set could be sorted by any sorting clause,
429    # including both document attributes and the following special
430    # internal Sphinx attributes:
431    #
432    # * @id - match document ID;
433    # * @weight, @rank, @relevance -  match weight;
434    # * @group - groupby function value;
435    # * @count - amount of matches in group.
436    #
437    # the default mode is to sort by groupby value in descending order,
438    # ie. by '@group desc'.
439    #
440    # 'total_found' would contain total amount of matching groups over
441    # the whole index.
442    #
443    # WARNING: grouping is done in fixed memory and thus its results
444    # are only approximate; so there might be more groups reported
445    # in total_found than actually present. @count might also
446    # be underestimated.
447    #
448    # For example, if sorting by relevance and grouping by "published"
449    # attribute with SPH_GROUPBY_DAY function, then the result set will
450    # contain one most relevant match per each day when there were any
451    # matches published, with day number and per-day match count attached,
452    # and sorted by day number in descending order (ie. recent days first).
453    def SetGroupBy(attribute, func, groupsort = '@group desc')
454      assert { attribute.instance_of? String }
455      assert { groupsort.instance_of? String }
456      assert { func == SPH_GROUPBY_DAY \
457            || func == SPH_GROUPBY_WEEK \
458            || func == SPH_GROUPBY_MONTH \
459            || func == SPH_GROUPBY_YEAR \
460            || func == SPH_GROUPBY_ATTR \
461            || func == SPH_GROUPBY_ATTRPAIR }
462
463      @groupby = attribute
464      @groupfunc = func
465      @groupsort = groupsort
466    end
467
468    # Set count-distinct attribute for group-by queries.
469    def SetGroupDistinct(attribute)
470      assert { attribute.instance_of? String }
471      @groupdistinct = attribute
472    end
473
474    # Set distributed retries count and delay.
475    def SetRetries(count, delay = 0)
476      assert { count.instance_of? Fixnum }
477      assert { delay.instance_of? Fixnum }
478
479      @retrycount = count
480      @retrydelay = delay
481    end
482
483    # Set attribute values override
484    #
485	  # There can be only one override per attribute.
486	  # +values+ must be a hash that maps document IDs to attribute values.
487	  def SetOverride(attrname, attrtype, values)
488      assert { attrname.instance_of? String }
489      assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
490      assert { values.instance_of? Hash }
491
492      @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
493	  end
494
495    # Set select-list (attributes or expressions), SQL-like syntax.
496    def SetSelect(select)
497		  assert { select.instance_of? String }
498		  @select = select
499		end
500
501    # Clear all filters (for multi-queries).
502    def ResetFilters
503      @filters = []
504      @anchor = []
505    end
506
507    # Clear groupby settings (for multi-queries).
508    def ResetGroupBy
509      @groupby       = ''
510      @groupfunc     = SPH_GROUPBY_DAY
511      @groupsort     = '@group desc'
512      @groupdistinct = ''
513    end
514
515    # Clear all attribute value overrides (for multi-queries).
516    def ResetOverrides
517      @overrides = []
518    end
519
520    # Connect to searchd server and run given search query.
521    #
522    # <tt>query</tt> is query string
523
524    # <tt>index</tt> is index name (or names) to query. default value is "*" which means
525    # to query all indexes. Accepted characters for index names are letters, numbers,
526    # dash, and underscore; everything else is considered a separator. Therefore,
527    # all the following calls are valid and will search two indexes:
528    #
529    #   sphinx.Query('test query', 'main delta')
530    #   sphinx.Query('test query', 'main;delta')
531    #   sphinx.Query('test query', 'main, delta')
532    #
533    # Index order matters. If identical IDs are found in two or more indexes,
534    # weight and attribute values from the very last matching index will be used
535    # for sorting and returning to client. Therefore, in the example above,
536    # matches from "delta" index will always "win" over matches from "main".
537    #
538    # Returns false on failure.
539    # Returns hash which has the following keys on success:
540    #
541    # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
542    # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
543    # * <tt>'total_found'</tt> -- total amount of matching documents in index
544    # * <tt>'time'</tt> -- search time
545    # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
546    def Query(query, index = '*', comment = '')
547      assert { @reqs.empty? }
548      @reqs = []
549
550      self.AddQuery(query, index, comment)
551      results = self.RunQueries
552
553      # probably network error; error message should be already filled
554      return false unless results.instance_of?(Array)
555
556      @error = results[0]['error']
557      @warning = results[0]['warning']
558
559      return false if results[0]['status'] == SEARCHD_ERROR
560      return results[0]
561    end
562
563    # Add query to batch.
564    #
565    # Batch queries enable searchd to perform internal optimizations,
566    # if possible; and reduce network connection overheads in all cases.
567    #
568    # For instance, running exactly the same query with different
569    # groupby settings will enable searched to perform expensive
570    # full-text search and ranking operation only once, but compute
571    # multiple groupby results from its output.
572    #
573    # Parameters are exactly the same as in <tt>Query</tt> call.
574    # Returns index to results array returned by <tt>RunQueries</tt> call.
575    def AddQuery(query, index = '*', comment = '')
576      # build request
577
578      # mode and limits
579      request = Request.new
580      request.put_int @offset, @limit, @mode, @ranker
581      # process the 'expr' ranker
582      if @ranker == SPH_RANK_EXPR
583        request.put_string @rankexpr
584      end
585
586      request.put_int @sort
587
588      request.put_string @sortby
589      # query itself
590      request.put_string query
591      # weights
592      request.put_int_array @weights
593      # indexes
594      request.put_string index
595      # id64 range marker
596      request.put_int 1
597      # id64 range
598      request.put_int64 @min_id.to_i, @max_id.to_i
599
600      # filters
601      request.put_int @filters.length
602      @filters.each do |filter|
603        request.put_string filter['attr']
604        request.put_int filter['type']
605
606        case filter['type']
607          when SPH_FILTER_VALUES
608            request.put_int64_array filter['values']
609          when SPH_FILTER_RANGE
610            request.put_int64 filter['min'], filter['max']
611          when SPH_FILTER_FLOATRANGE
612            request.put_float filter['min'], filter['max']
613          else
614            raise SphinxInternalError, 'Internal error: unhandled filter type'
615        end
616        request.put_int filter['exclude'] ? 1 : 0
617      end
618
619      # group-by clause, max-matches count, group-sort clause, cutoff count
620      request.put_int @groupfunc
621      request.put_string @groupby
622      request.put_int @maxmatches
623      request.put_string @groupsort
624      request.put_int @cutoff, @retrycount, @retrydelay
625      request.put_string @groupdistinct
626
627      # anchor point
628      if @anchor.empty?
629        request.put_int 0
630      else
631        request.put_int 1
632        request.put_string @anchor['attrlat'], @anchor['attrlong']
633        request.put_float @anchor['lat'], @anchor['long']
634      end
635
636      # per-index weights
637      request.put_int @indexweights.length
638      @indexweights.each do |idx, weight|
639        request.put_string idx
640        request.put_int weight
641      end
642
643      # max query time
644      request.put_int @maxquerytime
645
646      # per-field weights
647      request.put_int @fieldweights.length
648      @fieldweights.each do |field, weight|
649        request.put_string field
650        request.put_int weight
651      end
652
653      # comment
654      request.put_string comment
655
656      # attribute overrides
657      request.put_int @overrides.length
658      for entry in @overrides do
659        request.put_string entry['attr']
660        request.put_int entry['type'], entry['values'].size
661        entry['values'].each do |id, val|
662          assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
663          assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
664
665          request.put_int64 id
666          case entry['type']
667            when SPH_ATTR_FLOAT
668              request.put_float val
669            when SPH_ATTR_BIGINT
670              request.put_int64 val
671            else
672              request.put_int val
673          end
674        end
675      end
676
677      # select-list
678      request.put_string @select
679
680      # store request to requests array
681      @reqs << request.to_s;
682      return @reqs.length - 1
683    end
684
685    # Run queries batch.
686    #
687    # Returns an array of result sets on success.
688    # Returns false on network IO failure.
689    #
690    # Each result set in returned array is a hash which containts
691    # the same keys as the hash returned by <tt>Query</tt>, plus:
692    #
693    # * <tt>'error'</tt> -- search error for this query
694    # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
695    def RunQueries
696      if @reqs.empty?
697        @error = 'No queries defined, issue AddQuery() first'
698        return false
699      end
700
701      req = @reqs.join('')
702      nreqs = @reqs.length
703      @reqs = []
704      response = PerformRequest(:search, req, nreqs)
705
706      # parse response
707      begin
708        results = []
709        ires = 0
710        while ires < nreqs
711          ires += 1
712          result = {}
713
714          result['error'] = ''
715          result['warning'] = ''
716
717          # extract status
718          status = result['status'] = response.get_int
719          if status != SEARCHD_OK
720            message = response.get_string
721            if status == SEARCHD_WARNING
722              result['warning'] = message
723            else
724              result['error'] = message
725              results << result
726              next
727            end
728          end
729
730          # read schema
731          fields = []
732          attrs = {}
733          attrs_names_in_order = []
734
735          nfields = response.get_int
736          while nfields > 0
737            nfields -= 1
738            fields << response.get_string
739          end
740          result['fields'] = fields
741
742          nattrs = response.get_int
743          while nattrs > 0
744            nattrs -= 1
745            attr = response.get_string
746            type = response.get_int
747            attrs[attr] = type
748            attrs_names_in_order << attr
749          end
750          result['attrs'] = attrs
751
752          # read match count
753          count = response.get_int
754          id64 = response.get_int
755
756          # read matches
757          result['matches'] = []
758          while count > 0
759            count -= 1
760
761            if id64 != 0
762              doc = response.get_int64
763              weight = response.get_int
764            else
765              doc, weight = response.get_ints(2)
766            end
767
768            r = {} # This is a single result put in the result['matches'] array
769            r['id'] = doc
770            r['weight'] = weight
771            attrs_names_in_order.each do |a|
772              r['attrs'] ||= {}
773
774              case attrs[a]
775                when SPH_ATTR_BIGINT
776                  # handle 64-bit ints
777                  r['attrs'][a] = response.get_int64
778                when SPH_ATTR_FLOAT
779                  # handle floats
780                  r['attrs'][a] = response.get_float
781				when SPH_ATTR_STRING
782				  # handle string
783				  r['attrs'][a] = response.get_string
784                else
785                  # handle everything else as unsigned ints
786                  val = response.get_int
787                  if attrs[a]==SPH_ATTR_MULTI
788                    r['attrs'][a] = []
789                    1.upto(val) do
790                      r['attrs'][a] << response.get_int
791                    end
792                  elsif attrs[a]==SPH_ATTR_MULTI64
793                    r['attrs'][a] = []
794					val = val/2
795                    1.upto(val) do
796                      r['attrs'][a] << response.get_int64
797                    end
798                  else
799                    r['attrs'][a] = val
800                  end
801              end
802            end
803            result['matches'] << r
804          end
805          result['total'], result['total_found'], msecs, words = response.get_ints(4)
806          result['time'] = '%.3f' % (msecs / 1000.0)
807
808          result['words'] = {}
809          while words > 0
810            words -= 1
811            word = response.get_string
812            docs, hits = response.get_ints(2)
813            result['words'][word] = { 'docs' => docs, 'hits' => hits }
814          end
815
816          results << result
817        end
818      #rescue EOFError
819      #  @error = 'incomplete reply'
820      #  raise SphinxResponseError, @error
821      end
822
823      return results
824    end
825
826    # Connect to searchd server and generate exceprts from given documents.
827    #
828    # * <tt>docs</tt> -- an array of strings which represent the documents' contents
829    # * <tt>index</tt> -- a string specifiying the index which settings will be used
830    # for stemming, lexing and case folding
831    # * <tt>words</tt> -- a string which contains the words to highlight
832    # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
833    #
834    # You can use following parameters:
835    # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
836    # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
837    # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
838    # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
839    # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
840    # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
841    # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
842    # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
843    # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
844    #
845    # Returns false on failure.
846    # Returns an array of string excerpts on success.
847    def BuildExcerpts(docs, index, words, opts = {})
848      assert { docs.instance_of? Array }
849      assert { index.instance_of? String }
850      assert { words.instance_of? String }
851      assert { opts.instance_of? Hash }
852
853      # fixup options
854      opts['before_match'] ||= '<b>';
855      opts['after_match'] ||= '</b>';
856      opts['chunk_separator'] ||= ' ... ';
857	  opts['html_strip_mode'] ||= 'index';
858      opts['limit'] ||= 256;
859	  opts['limit_passages'] ||= 0;
860	  opts['limit_words'] ||= 0;
861      opts['around'] ||= 5;
862	  opts['start_passage_id'] ||= 1;
863      opts['exact_phrase'] ||= false
864      opts['single_passage'] ||= false
865      opts['use_boundaries'] ||= false
866      opts['weight_order'] ||= false
867	  opts['load_files'] ||= false
868	  opts['allow_empty'] ||= false
869
870      # build request
871
872      # v.1.0 req
873      flags = 1
874      flags |= 2  if opts['exact_phrase']
875      flags |= 4  if opts['single_passage']
876      flags |= 8  if opts['use_boundaries']
877      flags |= 16 if opts['weight_order']
878	  flags |= 32 if opts['query_mode']
879	  flags |= 64 if opts['force_all_words']
880	  flags |= 128 if opts['load_files']
881	  flags |= 256 if opts['allow_empty']
882
883      request = Request.new
884      request.put_int 0, flags # mode=0, flags=1 (remove spaces)
885      # req index
886      request.put_string index
887      # req words
888      request.put_string words
889
890      # options
891      request.put_string opts['before_match']
892      request.put_string opts['after_match']
893      request.put_string opts['chunk_separator']
894      request.put_int opts['limit'].to_i, opts['around'].to_i
895
896	  # options v1.2
897	  request.put_int opts['limit_passages'].to_i
898	  request.put_int opts['limit_words'].to_i
899	  request.put_int opts['start_passage_id'].to_i
900	  request.put_string opts['html_strip_mode']
901
902      # documents
903      request.put_int docs.size
904      docs.each do |doc|
905        assert { doc.instance_of? String }
906
907        request.put_string doc
908      end
909
910      response = PerformRequest(:excerpt, request)
911
912      # parse response
913      begin
914        res = []
915        docs.each do |doc|
916          res << response.get_string
917        end
918      rescue EOFError
919        @error = 'incomplete reply'
920        raise SphinxResponseError, @error
921      end
922      return res
923    end
924
925    # Connect to searchd server, and generate keyword list for a given query.
926    #
927    # Returns an array of words on success.
928    def BuildKeywords(query, index, hits)
929      assert { query.instance_of? String }
930      assert { index.instance_of? String }
931      assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
932
933      # build request
934      request = Request.new
935      # v.1.0 req
936      request.put_string query # req query
937      request.put_string index # req index
938      request.put_int hits ? 1 : 0
939
940      response = PerformRequest(:keywords, request)
941
942      # parse response
943      begin
944        res = []
945        nwords = response.get_int
946        0.upto(nwords - 1) do |i|
947          tokenized = response.get_string
948          normalized = response.get_string
949
950          entry = { 'tokenized' => tokenized, 'normalized' => normalized }
951          entry['docs'], entry['hits'] = response.get_ints(2) if hits
952
953          res << entry
954        end
955      rescue EOFError
956        @error = 'incomplete reply'
957        raise SphinxResponseError, @error
958      end
959
960      return res
961    end
962
963    # Batch update given attributes in given rows in given indexes.
964    #
965    # * +index+ is a name of the index to be updated
966    # * +attrs+ is an array of attribute name strings.
967    # * +values+ is a hash where key is document id, and value is an array of
968    # * +mva+ identifies whether update MVA
969    # new attribute values
970    #
971    # Returns number of actually updated documents (0 or more) on success.
972    # Returns -1 on failure.
973    #
974    # Usage example:
975    #    sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
976    def UpdateAttributes(index, attrs, values, mva = false)
977      # verify everything
978      assert { index.instance_of? String }
979      assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
980
981      assert { attrs.instance_of? Array }
982      attrs.each do |attr|
983        assert { attr.instance_of? String }
984      end
985
986      assert { values.instance_of? Hash }
987      values.each do |id, entry|
988        assert { id.instance_of? Fixnum }
989        assert { entry.instance_of? Array }
990        assert { entry.length == attrs.length }
991        entry.each do |v|
992          if mva
993            assert { v.instance_of? Array }
994            v.each { |vv| assert { vv.instance_of? Fixnum } }
995          else
996            assert { v.instance_of? Fixnum }
997          end
998        end
999      end
1000
1001      # build request
1002      request = Request.new
1003      request.put_string index
1004
1005      request.put_int attrs.length
1006      for attr in attrs
1007        request.put_string attr
1008        request.put_int mva ? 1 : 0
1009      end
1010
1011      request.put_int values.length
1012      values.each do |id, entry|
1013        request.put_int64 id
1014        if mva
1015          entry.each { |v| request.put_int_array v }
1016        else
1017          request.put_int(*entry)
1018        end
1019      end
1020
1021      response = PerformRequest(:update, request)
1022
1023      # parse response
1024      begin
1025        return response.get_int
1026      rescue EOFError
1027        @error = 'incomplete reply'
1028        raise SphinxResponseError, @error
1029      end
1030    end
1031
1032    protected
1033
1034      # Connect to searchd server.
1035      def Connect
1036        begin
1037          if @host[0,1]=='/'
1038            sock = UNIXSocket.new(@host)
1039          else
1040            sock = TCPSocket.new(@host, @port)
1041          end
1042        rescue => err
1043          @error = "connection to #{@host}:#{@port} failed (error=#{err})"
1044          raise SphinxConnectError, @error
1045        end
1046
1047        v = sock.recv(4).unpack('N*').first
1048        if v < 1
1049          sock.close
1050          @error = "expected searchd protocol version 1+, got version '#{v}'"
1051          raise SphinxConnectError, @error
1052        end
1053
1054        sock.send([1].pack('N'), 0)
1055        sock
1056      end
1057
1058      # Get and check response packet from searchd server.
1059      def GetResponse(sock, client_version)
1060        response = ''
1061        len = 0
1062
1063        header = sock.recv(8)
1064        if header.length == 8
1065          status, ver, len = header.unpack('n2N')
1066          left = len.to_i
1067          while left > 0 do
1068            begin
1069              chunk = sock.recv(left)
1070              if chunk
1071                response << chunk
1072                left -= chunk.length
1073              end
1074            rescue EOFError
1075              break
1076            end
1077          end
1078        end
1079        sock.close
1080
1081        # check response
1082        read = response.length
1083        if response.empty? or read != len.to_i
1084          @error = response.empty? \
1085            ? 'received zero-sized searchd response' \
1086            : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
1087          raise SphinxResponseError, @error
1088        end
1089
1090        # check status
1091        if (status == SEARCHD_WARNING)
1092          wlen = response[0, 4].unpack('N*').first
1093          @warning = response[4, wlen]
1094          return response[4 + wlen, response.length - 4 - wlen]
1095        end
1096
1097        if status == SEARCHD_ERROR
1098          @error = 'searchd error: ' + response[4, response.length - 4]
1099          raise SphinxInternalError, @error
1100        end
1101
1102        if status == SEARCHD_RETRY
1103          @error = 'temporary searchd error: ' + response[4, response.length - 4]
1104          raise SphinxTemporaryError, @error
1105        end
1106
1107        unless status == SEARCHD_OK
1108          @error = "unknown status code: '#{status}'"
1109          raise SphinxUnknownError, @error
1110        end
1111
1112        # check version
1113        if ver < client_version
1114          @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
1115            "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
1116        end
1117
1118        return response
1119      end
1120
1121      # Connect, send query, get response.
1122      def PerformRequest(command, request, additional = nil)
1123        cmd = command.to_s.upcase
1124        command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1125        command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1126
1127        sock = self.Connect
1128        len = request.to_s.length + (additional != nil ? 8 : 0)
1129        header = [command_id, command_ver, len].pack('nnN')
1130        header << [0, additional].pack('NN') if additional != nil
1131        sock.send(header + request.to_s, 0)
1132        response = self.GetResponse(sock, command_ver)
1133        return Response.new(response)
1134      end
1135
1136      # :stopdoc:
1137      def assert
1138        raise 'Assertion failed!' unless yield if $DEBUG
1139      end
1140      # :startdoc:
1141  end
1142end
1143