1# = client.rb - Sphinx Client API 2# 3# Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>. 4# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk 5# License:: Distributes under the same terms as Ruby 6# Version:: 0.9.9-r1299 7# Website:: http://kpumuk.info/projects/ror-plugins/sphinx 8# 9# This library is distributed under the terms of the Ruby license. 10# You can freely distribute/modify this library. 11 12# ==Sphinx Client API 13# 14# The Sphinx Client API is used to communicate with <tt>searchd</tt> 15# daemon and get search results from Sphinx. 16# 17# ===Usage 18# 19# sphinx = Sphinx::Client.new 20# result = sphinx.Query('test') 21# ids = result['matches'].map { |match| match['id'] }.join(',') 22# posts = Post.find :all, :conditions => "id IN (#{ids})" 23# 24# docs = posts.map(&:body) 25# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test') 26 27require 'socket' 28 29module Sphinx 30 # :stopdoc: 31 32 class SphinxError < StandardError; end 33 class SphinxArgumentError < SphinxError; end 34 class SphinxConnectError < SphinxError; end 35 class SphinxResponseError < SphinxError; end 36 class SphinxInternalError < SphinxError; end 37 class SphinxTemporaryError < SphinxError; end 38 class SphinxUnknownError < SphinxError; end 39 40 # :startdoc: 41 42 class Client 43 44 # :stopdoc: 45 46 # Known searchd commands 47 48 # search command 49 SEARCHD_COMMAND_SEARCH = 0 50 # excerpt command 51 SEARCHD_COMMAND_EXCERPT = 1 52 # update command 53 SEARCHD_COMMAND_UPDATE = 2 54 # keywords command 55 SEARCHD_COMMAND_KEYWORDS = 3 56 57 # Current client-side command implementation versions 58 59 # search command version 60 VER_COMMAND_SEARCH = 0x119 61 # excerpt command version 62 VER_COMMAND_EXCERPT = 0x102 63 # update command version 64 VER_COMMAND_UPDATE = 0x102 65 # keywords command version 66 VER_COMMAND_KEYWORDS = 0x100 67 68 # Known searchd status codes 69 70 # general success, command-specific reply follows 71 SEARCHD_OK = 0 72 # general failure, command-specific reply may follow 73 SEARCHD_ERROR = 1 74 # temporaty failure, client should retry later 75 SEARCHD_RETRY = 2 76 # general success, warning message and command-specific reply follow 77 SEARCHD_WARNING = 3 78 79 # :startdoc: 80 81 # Known match modes 82 83 # match all query words 84 SPH_MATCH_ALL = 0 85 # match any query word 86 SPH_MATCH_ANY = 1 87 # match this exact phrase 88 SPH_MATCH_PHRASE = 2 89 # match this boolean query 90 SPH_MATCH_BOOLEAN = 3 91 # match this extended query 92 SPH_MATCH_EXTENDED = 4 93 # match all document IDs w/o fulltext query, apply filters 94 SPH_MATCH_FULLSCAN = 5 95 # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE) 96 SPH_MATCH_EXTENDED2 = 6 97 98 # Known ranking modes (ext2 only) 99 100 # default mode, phrase proximity major factor and BM25 minor one 101 SPH_RANK_PROXIMITY_BM25 = 0 102 # statistical mode, BM25 ranking only (faster but worse quality) 103 SPH_RANK_BM25 = 1 104 # no ranking, all matches get a weight of 1 105 SPH_RANK_NONE = 2 106 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts 107 SPH_RANK_WORDCOUNT = 3 108 # phrase proximity 109 SPH_RANK_PROXIMITY = 4 110 SPH_RANK_MATCHANY = 5 111 SPH_RANK_FIELDMASK = 6 112 SPH_RANK_SPH04 = 7 113 SPH_RANK_EXPR = 8 114 115 # Known sort modes 116 117 # sort by document relevance desc, then by date 118 SPH_SORT_RELEVANCE = 0 119 # sort by document date desc, then by relevance desc 120 SPH_SORT_ATTR_DESC = 1 121 # sort by document date asc, then by relevance desc 122 SPH_SORT_ATTR_ASC = 2 123 # sort by time segments (hour/day/week/etc) desc, then by relevance desc 124 SPH_SORT_TIME_SEGMENTS = 3 125 # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC") 126 SPH_SORT_EXTENDED = 4 127 # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)") 128 SPH_SORT_EXPR = 5 129 130 # Known filter types 131 132 # filter by integer values set 133 SPH_FILTER_VALUES = 0 134 # filter by integer range 135 SPH_FILTER_RANGE = 1 136 # filter by float range 137 SPH_FILTER_FLOATRANGE = 2 138 139 # Known attribute types 140 141 # this attr is just an integer 142 SPH_ATTR_INTEGER = 1 143 # this attr is a timestamp 144 SPH_ATTR_TIMESTAMP = 2 145 # this attr is an ordinal string number (integer at search time, 146 # specially handled at indexing time) 147 SPH_ATTR_ORDINAL = 3 148 # this attr is a boolean bit field 149 SPH_ATTR_BOOL = 4 150 # this attr is a float 151 SPH_ATTR_FLOAT = 5 152 # signed 64-bit integer 153 SPH_ATTR_BIGINT = 6 154 # string 155 SPH_ATTR_STRING = 7 156 # this attr has multiple values (0 or more) 157 SPH_ATTR_MULTI = 0x40000001 158 SPH_ATTR_MULTI64 = 0x40000002 159 160 # Known grouping functions 161 162 # group by day 163 SPH_GROUPBY_DAY = 0 164 # group by week 165 SPH_GROUPBY_WEEK = 1 166 # group by month 167 SPH_GROUPBY_MONTH = 2 168 # group by year 169 SPH_GROUPBY_YEAR = 3 170 # group by attribute value 171 SPH_GROUPBY_ATTR = 4 172 # group by sequential attrs pair 173 SPH_GROUPBY_ATTRPAIR = 5 174 175 # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values. 176 def initialize 177 # per-client-object settings 178 @host = 'localhost' # searchd host (default is "localhost") 179 @port = 9312 # searchd port (default is 9312) 180 181 # per-query settings 182 @offset = 0 # how many records to seek from result-set start (default is 0) 183 @limit = 20 # how many records to return from result-set starting at offset (default is 20) 184 @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) 185 @weights = [] # per-field weights (default is 1 for all fields) 186 @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) 187 @sortby = '' # attribute to sort by (defualt is "") 188 @min_id = 0 # min ID to match (default is 0, which means no limit) 189 @max_id = 0 # max ID to match (default is 0, which means no limit) 190 @filters = [] # search filters 191 @groupby = '' # group-by attribute name 192 @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with 193 @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) 194 @groupdistinct = '' # group-by count-distinct attribute 195 @maxmatches = 1000 # max matches to retrieve 196 @cutoff = 0 # cutoff to stop searching at (default is 0) 197 @retrycount = 0 # distributed retries count 198 @retrydelay = 0 # distributed retries delay 199 @anchor = [] # geographical anchor point 200 @indexweights = [] # per-index weights 201 @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25) 202 @rankexpr = '' # ranker expression for SPH_RANK_EXPR 203 @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) 204 @fieldweights = {} # per-field-name weights 205 @overrides = [] # per-query attribute values overrides 206 @select = '*' # select-list (attributes or expressions, with optional aliases) 207 208 # per-reply fields (for single-query case) 209 @error = '' # last error message 210 @warning = '' # last warning message 211 212 @reqs = [] # requests storage (for multi-query case) 213 @mbenc = '' # stored mbstring encoding 214 end 215 216 # Get last error message. 217 def GetLastError 218 @error 219 end 220 221 # Get last warning message. 222 def GetLastWarning 223 @warning 224 end 225 226 # Set searchd host name (string) and port (integer). 227 def SetServer(host, port) 228 assert { host.instance_of? String } 229 assert { port.instance_of? Fixnum } 230 231 @host = host 232 @port = port 233 end 234 235 # Set offset and count into result set, 236 # and optionally set max-matches and cutoff limits. 237 def SetLimits(offset, limit, max = 0, cutoff = 0) 238 assert { offset.instance_of? Fixnum } 239 assert { limit.instance_of? Fixnum } 240 assert { max.instance_of? Fixnum } 241 assert { offset >= 0 } 242 assert { limit > 0 } 243 assert { max >= 0 } 244 245 @offset = offset 246 @limit = limit 247 @maxmatches = max if max > 0 248 @cutoff = cutoff if cutoff > 0 249 end 250 251 # Set maximum query time, in milliseconds, per-index, 252 # integer, 0 means "do not limit" 253 def SetMaxQueryTime(max) 254 assert { max.instance_of? Fixnum } 255 assert { max >= 0 } 256 @maxquerytime = max 257 end 258 259 # Set matching mode. 260 def SetMatchMode(mode) 261 assert { mode == SPH_MATCH_ALL \ 262 || mode == SPH_MATCH_ANY \ 263 || mode == SPH_MATCH_PHRASE \ 264 || mode == SPH_MATCH_BOOLEAN \ 265 || mode == SPH_MATCH_EXTENDED \ 266 || mode == SPH_MATCH_FULLSCAN \ 267 || mode == SPH_MATCH_EXTENDED2 } 268 269 @mode = mode 270 end 271 272 # Set ranking mode. 273 def SetRankingMode(ranker, rankexpr = '') 274 assert { ranker == SPH_RANK_PROXIMITY_BM25 \ 275 || ranker == SPH_RANK_BM25 \ 276 || ranker == SPH_RANK_NONE \ 277 || ranker == SPH_RANK_WORDCOUNT \ 278 || ranker == SPH_RANK_PROXIMITY \ 279 || ranker == SPH_RANK_MATCHANY \ 280 || ranker == SPH_RANK_FIELDMASK \ 281 || ranker == SPH_RANK_SPH04 \ 282 || ranker == SPH_RANK_EXPR } 283 284 @ranker = ranker 285 @rankexpr = rankexpr 286 end 287 288 # Set matches sorting mode. 289 def SetSortMode(mode, sortby = '') 290 assert { mode == SPH_SORT_RELEVANCE \ 291 || mode == SPH_SORT_ATTR_DESC \ 292 || mode == SPH_SORT_ATTR_ASC \ 293 || mode == SPH_SORT_TIME_SEGMENTS \ 294 || mode == SPH_SORT_EXTENDED \ 295 || mode == SPH_SORT_EXPR } 296 assert { sortby.instance_of? String } 297 assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? } 298 299 @sort = mode 300 @sortby = sortby 301 end 302 303 # Bind per-field weights by order. 304 # 305 # DEPRECATED; use SetFieldWeights() instead. 306 def SetWeights(weights) 307 assert { weights.instance_of? Array } 308 weights.each do |weight| 309 assert { weight.instance_of? Fixnum } 310 end 311 312 @weights = weights 313 end 314 315 # Bind per-field weights by name. 316 # 317 # Takes string (field name) to integer name (field weight) hash as an argument. 318 # * Takes precedence over SetWeights(). 319 # * Unknown names will be silently ignored. 320 # * Unbound fields will be silently given a weight of 1. 321 def SetFieldWeights(weights) 322 assert { weights.instance_of? Hash } 323 weights.each do |name, weight| 324 assert { name.instance_of? String } 325 assert { weight.instance_of? Fixnum } 326 end 327 328 @fieldweights = weights 329 end 330 331 # Bind per-index weights by name. 332 def SetIndexWeights(weights) 333 assert { weights.instance_of? Hash } 334 weights.each do |index, weight| 335 assert { index.instance_of? String } 336 assert { weight.instance_of? Fixnum } 337 end 338 339 @indexweights = weights 340 end 341 342 # Set IDs range to match. 343 # 344 # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive). 345 def SetIDRange(min, max) 346 assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) } 347 assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) } 348 assert { min <= max } 349 350 @min_id = min 351 @max_id = max 352 end 353 354 # Set values filter. 355 # 356 # Only match those records where <tt>attribute</tt> column values 357 # are in specified set. 358 def SetFilter(attribute, values, exclude = false) 359 assert { attribute.instance_of? String } 360 assert { values.instance_of? Array } 361 assert { !values.empty? } 362 363 if values.instance_of?(Array) && values.size > 0 364 values.each do |value| 365 assert { value.instance_of? Fixnum } 366 end 367 368 @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values } 369 end 370 end 371 372 # Set range filter. 373 # 374 # Only match those records where <tt>attribute</tt> column value 375 # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>). 376 def SetFilterRange(attribute, min, max, exclude = false) 377 assert { attribute.instance_of? String } 378 assert { min.instance_of? Fixnum or min.instance_of? Bignum } 379 assert { max.instance_of? Fixnum or max.instance_of? Bignum } 380 assert { min <= max } 381 382 @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max } 383 end 384 385 # Set float range filter. 386 # 387 # Only match those records where <tt>attribute</tt> column value 388 # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>). 389 def SetFilterFloatRange(attribute, min, max, exclude = false) 390 assert { attribute.instance_of? String } 391 assert { min.instance_of? Float } 392 assert { max.instance_of? Float } 393 assert { min <= max } 394 395 @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max } 396 end 397 398 # Setup anchor point for geosphere distance calculations. 399 # 400 # Required to use <tt>@geodist</tt> in filters and sorting 401 # distance will be computed to this point. Latitude and longitude 402 # must be in radians. 403 # 404 # * <tt>attrlat</tt> -- is the name of latitude attribute 405 # * <tt>attrlong</tt> -- is the name of longitude attribute 406 # * <tt>lat</tt> -- is anchor point latitude, in radians 407 # * <tt>long</tt> -- is anchor point longitude, in radians 408 def SetGeoAnchor(attrlat, attrlong, lat, long) 409 assert { attrlat.instance_of? String } 410 assert { attrlong.instance_of? String } 411 assert { lat.instance_of? Float } 412 assert { long.instance_of? Float } 413 414 @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long } 415 end 416 417 # Set grouping attribute and function. 418 # 419 # In grouping mode, all matches are assigned to different groups 420 # based on grouping function value. 421 # 422 # Each group keeps track of the total match count, and the best match 423 # (in this group) according to current sorting function. 424 # 425 # The final result set contains one best match per group, with 426 # grouping function value and matches count attached. 427 # 428 # Groups in result set could be sorted by any sorting clause, 429 # including both document attributes and the following special 430 # internal Sphinx attributes: 431 # 432 # * @id - match document ID; 433 # * @weight, @rank, @relevance - match weight; 434 # * @group - groupby function value; 435 # * @count - amount of matches in group. 436 # 437 # the default mode is to sort by groupby value in descending order, 438 # ie. by '@group desc'. 439 # 440 # 'total_found' would contain total amount of matching groups over 441 # the whole index. 442 # 443 # WARNING: grouping is done in fixed memory and thus its results 444 # are only approximate; so there might be more groups reported 445 # in total_found than actually present. @count might also 446 # be underestimated. 447 # 448 # For example, if sorting by relevance and grouping by "published" 449 # attribute with SPH_GROUPBY_DAY function, then the result set will 450 # contain one most relevant match per each day when there were any 451 # matches published, with day number and per-day match count attached, 452 # and sorted by day number in descending order (ie. recent days first). 453 def SetGroupBy(attribute, func, groupsort = '@group desc') 454 assert { attribute.instance_of? String } 455 assert { groupsort.instance_of? String } 456 assert { func == SPH_GROUPBY_DAY \ 457 || func == SPH_GROUPBY_WEEK \ 458 || func == SPH_GROUPBY_MONTH \ 459 || func == SPH_GROUPBY_YEAR \ 460 || func == SPH_GROUPBY_ATTR \ 461 || func == SPH_GROUPBY_ATTRPAIR } 462 463 @groupby = attribute 464 @groupfunc = func 465 @groupsort = groupsort 466 end 467 468 # Set count-distinct attribute for group-by queries. 469 def SetGroupDistinct(attribute) 470 assert { attribute.instance_of? String } 471 @groupdistinct = attribute 472 end 473 474 # Set distributed retries count and delay. 475 def SetRetries(count, delay = 0) 476 assert { count.instance_of? Fixnum } 477 assert { delay.instance_of? Fixnum } 478 479 @retrycount = count 480 @retrydelay = delay 481 end 482 483 # Set attribute values override 484 # 485 # There can be only one override per attribute. 486 # +values+ must be a hash that maps document IDs to attribute values. 487 def SetOverride(attrname, attrtype, values) 488 assert { attrname.instance_of? String } 489 assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) } 490 assert { values.instance_of? Hash } 491 492 @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values } 493 end 494 495 # Set select-list (attributes or expressions), SQL-like syntax. 496 def SetSelect(select) 497 assert { select.instance_of? String } 498 @select = select 499 end 500 501 # Clear all filters (for multi-queries). 502 def ResetFilters 503 @filters = [] 504 @anchor = [] 505 end 506 507 # Clear groupby settings (for multi-queries). 508 def ResetGroupBy 509 @groupby = '' 510 @groupfunc = SPH_GROUPBY_DAY 511 @groupsort = '@group desc' 512 @groupdistinct = '' 513 end 514 515 # Clear all attribute value overrides (for multi-queries). 516 def ResetOverrides 517 @overrides = [] 518 end 519 520 # Connect to searchd server and run given search query. 521 # 522 # <tt>query</tt> is query string 523 524 # <tt>index</tt> is index name (or names) to query. default value is "*" which means 525 # to query all indexes. Accepted characters for index names are letters, numbers, 526 # dash, and underscore; everything else is considered a separator. Therefore, 527 # all the following calls are valid and will search two indexes: 528 # 529 # sphinx.Query('test query', 'main delta') 530 # sphinx.Query('test query', 'main;delta') 531 # sphinx.Query('test query', 'main, delta') 532 # 533 # Index order matters. If identical IDs are found in two or more indexes, 534 # weight and attribute values from the very last matching index will be used 535 # for sorting and returning to client. Therefore, in the example above, 536 # matches from "delta" index will always "win" over matches from "main". 537 # 538 # Returns false on failure. 539 # Returns hash which has the following keys on success: 540 # 541 # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id. 542 # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h) 543 # * <tt>'total_found'</tt> -- total amount of matching documents in index 544 # * <tt>'time'</tt> -- search time 545 # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash 546 def Query(query, index = '*', comment = '') 547 assert { @reqs.empty? } 548 @reqs = [] 549 550 self.AddQuery(query, index, comment) 551 results = self.RunQueries 552 553 # probably network error; error message should be already filled 554 return false unless results.instance_of?(Array) 555 556 @error = results[0]['error'] 557 @warning = results[0]['warning'] 558 559 return false if results[0]['status'] == SEARCHD_ERROR 560 return results[0] 561 end 562 563 # Add query to batch. 564 # 565 # Batch queries enable searchd to perform internal optimizations, 566 # if possible; and reduce network connection overheads in all cases. 567 # 568 # For instance, running exactly the same query with different 569 # groupby settings will enable searched to perform expensive 570 # full-text search and ranking operation only once, but compute 571 # multiple groupby results from its output. 572 # 573 # Parameters are exactly the same as in <tt>Query</tt> call. 574 # Returns index to results array returned by <tt>RunQueries</tt> call. 575 def AddQuery(query, index = '*', comment = '') 576 # build request 577 578 # mode and limits 579 request = Request.new 580 request.put_int @offset, @limit, @mode, @ranker 581 # process the 'expr' ranker 582 if @ranker == SPH_RANK_EXPR 583 request.put_string @rankexpr 584 end 585 586 request.put_int @sort 587 588 request.put_string @sortby 589 # query itself 590 request.put_string query 591 # weights 592 request.put_int_array @weights 593 # indexes 594 request.put_string index 595 # id64 range marker 596 request.put_int 1 597 # id64 range 598 request.put_int64 @min_id.to_i, @max_id.to_i 599 600 # filters 601 request.put_int @filters.length 602 @filters.each do |filter| 603 request.put_string filter['attr'] 604 request.put_int filter['type'] 605 606 case filter['type'] 607 when SPH_FILTER_VALUES 608 request.put_int64_array filter['values'] 609 when SPH_FILTER_RANGE 610 request.put_int64 filter['min'], filter['max'] 611 when SPH_FILTER_FLOATRANGE 612 request.put_float filter['min'], filter['max'] 613 else 614 raise SphinxInternalError, 'Internal error: unhandled filter type' 615 end 616 request.put_int filter['exclude'] ? 1 : 0 617 end 618 619 # group-by clause, max-matches count, group-sort clause, cutoff count 620 request.put_int @groupfunc 621 request.put_string @groupby 622 request.put_int @maxmatches 623 request.put_string @groupsort 624 request.put_int @cutoff, @retrycount, @retrydelay 625 request.put_string @groupdistinct 626 627 # anchor point 628 if @anchor.empty? 629 request.put_int 0 630 else 631 request.put_int 1 632 request.put_string @anchor['attrlat'], @anchor['attrlong'] 633 request.put_float @anchor['lat'], @anchor['long'] 634 end 635 636 # per-index weights 637 request.put_int @indexweights.length 638 @indexweights.each do |idx, weight| 639 request.put_string idx 640 request.put_int weight 641 end 642 643 # max query time 644 request.put_int @maxquerytime 645 646 # per-field weights 647 request.put_int @fieldweights.length 648 @fieldweights.each do |field, weight| 649 request.put_string field 650 request.put_int weight 651 end 652 653 # comment 654 request.put_string comment 655 656 # attribute overrides 657 request.put_int @overrides.length 658 for entry in @overrides do 659 request.put_string entry['attr'] 660 request.put_int entry['type'], entry['values'].size 661 entry['values'].each do |id, val| 662 assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) } 663 assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) } 664 665 request.put_int64 id 666 case entry['type'] 667 when SPH_ATTR_FLOAT 668 request.put_float val 669 when SPH_ATTR_BIGINT 670 request.put_int64 val 671 else 672 request.put_int val 673 end 674 end 675 end 676 677 # select-list 678 request.put_string @select 679 680 # store request to requests array 681 @reqs << request.to_s; 682 return @reqs.length - 1 683 end 684 685 # Run queries batch. 686 # 687 # Returns an array of result sets on success. 688 # Returns false on network IO failure. 689 # 690 # Each result set in returned array is a hash which containts 691 # the same keys as the hash returned by <tt>Query</tt>, plus: 692 # 693 # * <tt>'error'</tt> -- search error for this query 694 # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash 695 def RunQueries 696 if @reqs.empty? 697 @error = 'No queries defined, issue AddQuery() first' 698 return false 699 end 700 701 req = @reqs.join('') 702 nreqs = @reqs.length 703 @reqs = [] 704 response = PerformRequest(:search, req, nreqs) 705 706 # parse response 707 begin 708 results = [] 709 ires = 0 710 while ires < nreqs 711 ires += 1 712 result = {} 713 714 result['error'] = '' 715 result['warning'] = '' 716 717 # extract status 718 status = result['status'] = response.get_int 719 if status != SEARCHD_OK 720 message = response.get_string 721 if status == SEARCHD_WARNING 722 result['warning'] = message 723 else 724 result['error'] = message 725 results << result 726 next 727 end 728 end 729 730 # read schema 731 fields = [] 732 attrs = {} 733 attrs_names_in_order = [] 734 735 nfields = response.get_int 736 while nfields > 0 737 nfields -= 1 738 fields << response.get_string 739 end 740 result['fields'] = fields 741 742 nattrs = response.get_int 743 while nattrs > 0 744 nattrs -= 1 745 attr = response.get_string 746 type = response.get_int 747 attrs[attr] = type 748 attrs_names_in_order << attr 749 end 750 result['attrs'] = attrs 751 752 # read match count 753 count = response.get_int 754 id64 = response.get_int 755 756 # read matches 757 result['matches'] = [] 758 while count > 0 759 count -= 1 760 761 if id64 != 0 762 doc = response.get_int64 763 weight = response.get_int 764 else 765 doc, weight = response.get_ints(2) 766 end 767 768 r = {} # This is a single result put in the result['matches'] array 769 r['id'] = doc 770 r['weight'] = weight 771 attrs_names_in_order.each do |a| 772 r['attrs'] ||= {} 773 774 case attrs[a] 775 when SPH_ATTR_BIGINT 776 # handle 64-bit ints 777 r['attrs'][a] = response.get_int64 778 when SPH_ATTR_FLOAT 779 # handle floats 780 r['attrs'][a] = response.get_float 781 when SPH_ATTR_STRING 782 # handle string 783 r['attrs'][a] = response.get_string 784 else 785 # handle everything else as unsigned ints 786 val = response.get_int 787 if attrs[a]==SPH_ATTR_MULTI 788 r['attrs'][a] = [] 789 1.upto(val) do 790 r['attrs'][a] << response.get_int 791 end 792 elsif attrs[a]==SPH_ATTR_MULTI64 793 r['attrs'][a] = [] 794 val = val/2 795 1.upto(val) do 796 r['attrs'][a] << response.get_int64 797 end 798 else 799 r['attrs'][a] = val 800 end 801 end 802 end 803 result['matches'] << r 804 end 805 result['total'], result['total_found'], msecs, words = response.get_ints(4) 806 result['time'] = '%.3f' % (msecs / 1000.0) 807 808 result['words'] = {} 809 while words > 0 810 words -= 1 811 word = response.get_string 812 docs, hits = response.get_ints(2) 813 result['words'][word] = { 'docs' => docs, 'hits' => hits } 814 end 815 816 results << result 817 end 818 #rescue EOFError 819 # @error = 'incomplete reply' 820 # raise SphinxResponseError, @error 821 end 822 823 return results 824 end 825 826 # Connect to searchd server and generate exceprts from given documents. 827 # 828 # * <tt>docs</tt> -- an array of strings which represent the documents' contents 829 # * <tt>index</tt> -- a string specifiying the index which settings will be used 830 # for stemming, lexing and case folding 831 # * <tt>words</tt> -- a string which contains the words to highlight 832 # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters. 833 # 834 # You can use following parameters: 835 # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>" 836 # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>" 837 # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... " 838 # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256 839 # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5 840 # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt> 841 # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false 842 # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer 843 # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order 844 # 845 # Returns false on failure. 846 # Returns an array of string excerpts on success. 847 def BuildExcerpts(docs, index, words, opts = {}) 848 assert { docs.instance_of? Array } 849 assert { index.instance_of? String } 850 assert { words.instance_of? String } 851 assert { opts.instance_of? Hash } 852 853 # fixup options 854 opts['before_match'] ||= '<b>'; 855 opts['after_match'] ||= '</b>'; 856 opts['chunk_separator'] ||= ' ... '; 857 opts['html_strip_mode'] ||= 'index'; 858 opts['limit'] ||= 256; 859 opts['limit_passages'] ||= 0; 860 opts['limit_words'] ||= 0; 861 opts['around'] ||= 5; 862 opts['start_passage_id'] ||= 1; 863 opts['exact_phrase'] ||= false 864 opts['single_passage'] ||= false 865 opts['use_boundaries'] ||= false 866 opts['weight_order'] ||= false 867 opts['load_files'] ||= false 868 opts['allow_empty'] ||= false 869 870 # build request 871 872 # v.1.0 req 873 flags = 1 874 flags |= 2 if opts['exact_phrase'] 875 flags |= 4 if opts['single_passage'] 876 flags |= 8 if opts['use_boundaries'] 877 flags |= 16 if opts['weight_order'] 878 flags |= 32 if opts['query_mode'] 879 flags |= 64 if opts['force_all_words'] 880 flags |= 128 if opts['load_files'] 881 flags |= 256 if opts['allow_empty'] 882 883 request = Request.new 884 request.put_int 0, flags # mode=0, flags=1 (remove spaces) 885 # req index 886 request.put_string index 887 # req words 888 request.put_string words 889 890 # options 891 request.put_string opts['before_match'] 892 request.put_string opts['after_match'] 893 request.put_string opts['chunk_separator'] 894 request.put_int opts['limit'].to_i, opts['around'].to_i 895 896 # options v1.2 897 request.put_int opts['limit_passages'].to_i 898 request.put_int opts['limit_words'].to_i 899 request.put_int opts['start_passage_id'].to_i 900 request.put_string opts['html_strip_mode'] 901 902 # documents 903 request.put_int docs.size 904 docs.each do |doc| 905 assert { doc.instance_of? String } 906 907 request.put_string doc 908 end 909 910 response = PerformRequest(:excerpt, request) 911 912 # parse response 913 begin 914 res = [] 915 docs.each do |doc| 916 res << response.get_string 917 end 918 rescue EOFError 919 @error = 'incomplete reply' 920 raise SphinxResponseError, @error 921 end 922 return res 923 end 924 925 # Connect to searchd server, and generate keyword list for a given query. 926 # 927 # Returns an array of words on success. 928 def BuildKeywords(query, index, hits) 929 assert { query.instance_of? String } 930 assert { index.instance_of? String } 931 assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) } 932 933 # build request 934 request = Request.new 935 # v.1.0 req 936 request.put_string query # req query 937 request.put_string index # req index 938 request.put_int hits ? 1 : 0 939 940 response = PerformRequest(:keywords, request) 941 942 # parse response 943 begin 944 res = [] 945 nwords = response.get_int 946 0.upto(nwords - 1) do |i| 947 tokenized = response.get_string 948 normalized = response.get_string 949 950 entry = { 'tokenized' => tokenized, 'normalized' => normalized } 951 entry['docs'], entry['hits'] = response.get_ints(2) if hits 952 953 res << entry 954 end 955 rescue EOFError 956 @error = 'incomplete reply' 957 raise SphinxResponseError, @error 958 end 959 960 return res 961 end 962 963 # Batch update given attributes in given rows in given indexes. 964 # 965 # * +index+ is a name of the index to be updated 966 # * +attrs+ is an array of attribute name strings. 967 # * +values+ is a hash where key is document id, and value is an array of 968 # * +mva+ identifies whether update MVA 969 # new attribute values 970 # 971 # Returns number of actually updated documents (0 or more) on success. 972 # Returns -1 on failure. 973 # 974 # Usage example: 975 # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] }) 976 def UpdateAttributes(index, attrs, values, mva = false) 977 # verify everything 978 assert { index.instance_of? String } 979 assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) } 980 981 assert { attrs.instance_of? Array } 982 attrs.each do |attr| 983 assert { attr.instance_of? String } 984 end 985 986 assert { values.instance_of? Hash } 987 values.each do |id, entry| 988 assert { id.instance_of? Fixnum } 989 assert { entry.instance_of? Array } 990 assert { entry.length == attrs.length } 991 entry.each do |v| 992 if mva 993 assert { v.instance_of? Array } 994 v.each { |vv| assert { vv.instance_of? Fixnum } } 995 else 996 assert { v.instance_of? Fixnum } 997 end 998 end 999 end 1000 1001 # build request 1002 request = Request.new 1003 request.put_string index 1004 1005 request.put_int attrs.length 1006 for attr in attrs 1007 request.put_string attr 1008 request.put_int mva ? 1 : 0 1009 end 1010 1011 request.put_int values.length 1012 values.each do |id, entry| 1013 request.put_int64 id 1014 if mva 1015 entry.each { |v| request.put_int_array v } 1016 else 1017 request.put_int(*entry) 1018 end 1019 end 1020 1021 response = PerformRequest(:update, request) 1022 1023 # parse response 1024 begin 1025 return response.get_int 1026 rescue EOFError 1027 @error = 'incomplete reply' 1028 raise SphinxResponseError, @error 1029 end 1030 end 1031 1032 protected 1033 1034 # Connect to searchd server. 1035 def Connect 1036 begin 1037 if @host[0,1]=='/' 1038 sock = UNIXSocket.new(@host) 1039 else 1040 sock = TCPSocket.new(@host, @port) 1041 end 1042 rescue => err 1043 @error = "connection to #{@host}:#{@port} failed (error=#{err})" 1044 raise SphinxConnectError, @error 1045 end 1046 1047 v = sock.recv(4).unpack('N*').first 1048 if v < 1 1049 sock.close 1050 @error = "expected searchd protocol version 1+, got version '#{v}'" 1051 raise SphinxConnectError, @error 1052 end 1053 1054 sock.send([1].pack('N'), 0) 1055 sock 1056 end 1057 1058 # Get and check response packet from searchd server. 1059 def GetResponse(sock, client_version) 1060 response = '' 1061 len = 0 1062 1063 header = sock.recv(8) 1064 if header.length == 8 1065 status, ver, len = header.unpack('n2N') 1066 left = len.to_i 1067 while left > 0 do 1068 begin 1069 chunk = sock.recv(left) 1070 if chunk 1071 response << chunk 1072 left -= chunk.length 1073 end 1074 rescue EOFError 1075 break 1076 end 1077 end 1078 end 1079 sock.close 1080 1081 # check response 1082 read = response.length 1083 if response.empty? or read != len.to_i 1084 @error = response.empty? \ 1085 ? 'received zero-sized searchd response' \ 1086 : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" 1087 raise SphinxResponseError, @error 1088 end 1089 1090 # check status 1091 if (status == SEARCHD_WARNING) 1092 wlen = response[0, 4].unpack('N*').first 1093 @warning = response[4, wlen] 1094 return response[4 + wlen, response.length - 4 - wlen] 1095 end 1096 1097 if status == SEARCHD_ERROR 1098 @error = 'searchd error: ' + response[4, response.length - 4] 1099 raise SphinxInternalError, @error 1100 end 1101 1102 if status == SEARCHD_RETRY 1103 @error = 'temporary searchd error: ' + response[4, response.length - 4] 1104 raise SphinxTemporaryError, @error 1105 end 1106 1107 unless status == SEARCHD_OK 1108 @error = "unknown status code: '#{status}'" 1109 raise SphinxUnknownError, @error 1110 end 1111 1112 # check version 1113 if ver < client_version 1114 @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " + 1115 "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work" 1116 end 1117 1118 return response 1119 end 1120 1121 # Connect, send query, get response. 1122 def PerformRequest(command, request, additional = nil) 1123 cmd = command.to_s.upcase 1124 command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd) 1125 command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd) 1126 1127 sock = self.Connect 1128 len = request.to_s.length + (additional != nil ? 8 : 0) 1129 header = [command_id, command_ver, len].pack('nnN') 1130 header << [0, additional].pack('NN') if additional != nil 1131 sock.send(header + request.to_s, 0) 1132 response = self.GetResponse(sock, command_ver) 1133 return Response.new(response) 1134 end 1135 1136 # :stopdoc: 1137 def assert 1138 raise 'Assertion failed!' unless yield if $DEBUG 1139 end 1140 # :startdoc: 1141 end 1142end 1143