1# = client.rb - Sphinx Client API 2# 3# Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>. 4# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk 5# License:: Distributes under the same terms as Ruby 6# Version:: 0.9.9-r1299 7# Website:: http://kpumuk.info/projects/ror-plugins/sphinx 8# 9# This library is distributed under the terms of the Ruby license. 10# You can freely distribute/modify this library. 11 12# ==Sphinx Client API 13# 14# The Sphinx Client API is used to communicate with <tt>searchd</tt> 15# daemon and get search results from Sphinx. 16# 17# ===Usage 18# 19# sphinx = Sphinx::Client.new 20# result = sphinx.Query('test') 21# ids = result['matches'].map { |match| match['id'] }.join(',') 22# posts = Post.find :all, :conditions => "id IN (#{ids})" 23# 24# docs = posts.map(&:body) 25# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test') 26 27# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 28# WARNING 29# We strongly recommend you to use SphinxQL instead of the API 30# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 31 32 33require 'socket' 34 35module Sphinx 36 # :stopdoc: 37 38 class SphinxError < StandardError; end 39 class SphinxArgumentError < SphinxError; end 40 class SphinxConnectError < SphinxError; end 41 class SphinxResponseError < SphinxError; end 42 class SphinxInternalError < SphinxError; end 43 class SphinxTemporaryError < SphinxError; end 44 class SphinxUnknownError < SphinxError; end 45 46 # :startdoc: 47 48 class Client 49 50 # :stopdoc: 51 52 # Known searchd commands 53 54 # search command 55 SEARCHD_COMMAND_SEARCH = 0 56 # excerpt command 57 SEARCHD_COMMAND_EXCERPT = 1 58 # update command 59 SEARCHD_COMMAND_UPDATE = 2 60 # keywords command 61 SEARCHD_COMMAND_KEYWORDS = 3 62 63 # Current client-side command implementation versions 64 65 # search command version 66 VER_COMMAND_SEARCH = 0x119 67 # excerpt command version 68 VER_COMMAND_EXCERPT = 0x102 69 # update command version 70 VER_COMMAND_UPDATE = 0x103 71 # keywords command version 72 VER_COMMAND_KEYWORDS = 0x100 73 74 # Known searchd status codes 75 76 # general success, command-specific reply follows 77 SEARCHD_OK = 0 78 # general failure, command-specific reply may follow 79 SEARCHD_ERROR = 1 80 # temporaty failure, client should retry later 81 SEARCHD_RETRY = 2 82 # general success, warning message and command-specific reply follow 83 SEARCHD_WARNING = 3 84 85 # :startdoc: 86 87 # Known match modes 88 89 # match all query words 90 SPH_MATCH_ALL = 0 91 # match any query word 92 SPH_MATCH_ANY = 1 93 # match this exact phrase 94 SPH_MATCH_PHRASE = 2 95 # match this boolean query 96 SPH_MATCH_BOOLEAN = 3 97 # match this extended query 98 SPH_MATCH_EXTENDED = 4 99 # match all document IDs w/o fulltext query, apply filters 100 SPH_MATCH_FULLSCAN = 5 101 # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE) 102 SPH_MATCH_EXTENDED2 = 6 103 104 # Known ranking modes (ext2 only) 105 106 # default mode, phrase proximity major factor and BM25 minor one 107 SPH_RANK_PROXIMITY_BM25 = 0 108 # statistical mode, BM25 ranking only (faster but worse quality) 109 SPH_RANK_BM25 = 1 110 # no ranking, all matches get a weight of 1 111 SPH_RANK_NONE = 2 112 # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts 113 SPH_RANK_WORDCOUNT = 3 114 # phrase proximity 115 SPH_RANK_PROXIMITY = 4 116 SPH_RANK_MATCHANY = 5 117 SPH_RANK_FIELDMASK = 6 118 SPH_RANK_SPH04 = 7 119 SPH_RANK_EXPR = 8 120 121 # Known sort modes 122 123 # sort by document relevance desc, then by date 124 SPH_SORT_RELEVANCE = 0 125 # sort by document date desc, then by relevance desc 126 SPH_SORT_ATTR_DESC = 1 127 # sort by document date asc, then by relevance desc 128 SPH_SORT_ATTR_ASC = 2 129 # sort by time segments (hour/day/week/etc) desc, then by relevance desc 130 SPH_SORT_TIME_SEGMENTS = 3 131 # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC") 132 SPH_SORT_EXTENDED = 4 133 # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)") 134 SPH_SORT_EXPR = 5 135 136 # Known filter types 137 138 # filter by integer values set 139 SPH_FILTER_VALUES = 0 140 # filter by integer range 141 SPH_FILTER_RANGE = 1 142 # filter by float range 143 SPH_FILTER_FLOATRANGE = 2 144 145 # Known attribute types 146 147 # this attr is just an integer 148 SPH_ATTR_INTEGER = 1 149 # this attr is a timestamp 150 SPH_ATTR_TIMESTAMP = 2 151 # this attr is an ordinal string number (integer at search time, 152 # specially handled at indexing time) 153 SPH_ATTR_ORDINAL = 3 154 # this attr is a boolean bit field 155 SPH_ATTR_BOOL = 4 156 # this attr is a float 157 SPH_ATTR_FLOAT = 5 158 # signed 64-bit integer 159 SPH_ATTR_BIGINT = 6 160 # string 161 SPH_ATTR_STRING = 7 162 # this attr has multiple values (0 or more) 163 SPH_ATTR_MULTI = 0x40000001 164 SPH_ATTR_MULTI64 = 0x40000002 165 166 # Known grouping functions 167 168 # group by day 169 SPH_GROUPBY_DAY = 0 170 # group by week 171 SPH_GROUPBY_WEEK = 1 172 # group by month 173 SPH_GROUPBY_MONTH = 2 174 # group by year 175 SPH_GROUPBY_YEAR = 3 176 # group by attribute value 177 SPH_GROUPBY_ATTR = 4 178 # group by sequential attrs pair 179 SPH_GROUPBY_ATTRPAIR = 5 180 181 # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values. 182 def initialize 183 # per-client-object settings 184 @host = 'localhost' # searchd host (default is "localhost") 185 @port = 9312 # searchd port (default is 9312) 186 187 # per-query settings 188 @offset = 0 # how many records to seek from result-set start (default is 0) 189 @limit = 20 # how many records to return from result-set starting at offset (default is 20) 190 @mode = SPH_MATCH_EXTENDED2 # query matching mode (default is SPH_MATCH_EXTENDED2) 191 @weights = [] # per-field weights (default is 1 for all fields) 192 @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) 193 @sortby = '' # attribute to sort by (defualt is "") 194 @min_id = 0 # min ID to match (default is 0, which means no limit) 195 @max_id = 0 # max ID to match (default is 0, which means no limit) 196 @filters = [] # search filters 197 @groupby = '' # group-by attribute name 198 @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with 199 @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) 200 @groupdistinct = '' # group-by count-distinct attribute 201 @maxmatches = 1000 # max matches to retrieve 202 @cutoff = 0 # cutoff to stop searching at (default is 0) 203 @retrycount = 0 # distributed retries count 204 @retrydelay = 0 # distributed retries delay 205 @anchor = [] # geographical anchor point 206 @indexweights = [] # per-index weights 207 @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25) 208 @rankexpr = '' # ranker expression for SPH_RANK_EXPR 209 @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) 210 @fieldweights = {} # per-field-name weights 211 @overrides = [] # per-query attribute values overrides 212 @select = '*' # select-list (attributes or expressions, with optional aliases) 213 214 # per-reply fields (for single-query case) 215 @error = '' # last error message 216 @warning = '' # last warning message 217 218 @reqs = [] # requests storage (for multi-query case) 219 @mbenc = '' # stored mbstring encoding 220 end 221 222 # Get last error message. 223 def GetLastError 224 @error 225 end 226 227 # Get last warning message. 228 def GetLastWarning 229 @warning 230 end 231 232 # Set searchd host name (string) and port (integer). 233 def SetServer(host, port) 234 assert { host.instance_of? String } 235 assert { port.instance_of? Fixnum } 236 237 @host = host 238 @port = port 239 end 240 241 # Set offset and count into result set, 242 # and optionally set max-matches and cutoff limits. 243 def SetLimits(offset, limit, max = 0, cutoff = 0) 244 assert { offset.instance_of? Fixnum } 245 assert { limit.instance_of? Fixnum } 246 assert { max.instance_of? Fixnum } 247 assert { offset >= 0 } 248 assert { limit > 0 } 249 assert { max >= 0 } 250 251 @offset = offset 252 @limit = limit 253 @maxmatches = max if max > 0 254 @cutoff = cutoff if cutoff > 0 255 end 256 257 # Set maximum query time, in milliseconds, per-index, 258 # integer, 0 means "do not limit" 259 def SetMaxQueryTime(max) 260 assert { max.instance_of? Fixnum } 261 assert { max >= 0 } 262 @maxquerytime = max 263 end 264 265 # Set matching mode. DEPRECATED 266 def SetMatchMode(mode) 267 $stderr.puts "DEPRECATED: Do not call this method or, even better, use SphinxQL instead of an API\n" 268 assert { mode == SPH_MATCH_ALL \ 269 || mode == SPH_MATCH_ANY \ 270 || mode == SPH_MATCH_PHRASE \ 271 || mode == SPH_MATCH_BOOLEAN \ 272 || mode == SPH_MATCH_EXTENDED \ 273 || mode == SPH_MATCH_FULLSCAN \ 274 || mode == SPH_MATCH_EXTENDED2 } 275 276 @mode = mode 277 end 278 279 # Set ranking mode. 280 def SetRankingMode(ranker, rankexpr = '') 281 assert { ranker == SPH_RANK_PROXIMITY_BM25 \ 282 || ranker == SPH_RANK_BM25 \ 283 || ranker == SPH_RANK_NONE \ 284 || ranker == SPH_RANK_WORDCOUNT \ 285 || ranker == SPH_RANK_PROXIMITY \ 286 || ranker == SPH_RANK_MATCHANY \ 287 || ranker == SPH_RANK_FIELDMASK \ 288 || ranker == SPH_RANK_SPH04 \ 289 || ranker == SPH_RANK_EXPR } 290 291 @ranker = ranker 292 @rankexpr = rankexpr 293 end 294 295 # Set matches sorting mode. 296 def SetSortMode(mode, sortby = '') 297 assert { mode == SPH_SORT_RELEVANCE \ 298 || mode == SPH_SORT_ATTR_DESC \ 299 || mode == SPH_SORT_ATTR_ASC \ 300 || mode == SPH_SORT_TIME_SEGMENTS \ 301 || mode == SPH_SORT_EXTENDED \ 302 || mode == SPH_SORT_EXPR } 303 assert { sortby.instance_of? String } 304 assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? } 305 306 @sort = mode 307 @sortby = sortby 308 end 309 310 # Bind per-field weights by order. 311 # 312 # DEPRECATED; use SetFieldWeights() instead. 313 def SetWeights(weights) 314 assert { weights.instance_of? Array } 315 weights.each do |weight| 316 assert { weight.instance_of? Fixnum } 317 end 318 319 @weights = weights 320 end 321 322 # Bind per-field weights by name. 323 # 324 # Takes string (field name) to integer name (field weight) hash as an argument. 325 # * Takes precedence over SetWeights(). 326 # * Unknown names will be silently ignored. 327 # * Unbound fields will be silently given a weight of 1. 328 def SetFieldWeights(weights) 329 assert { weights.instance_of? Hash } 330 weights.each do |name, weight| 331 assert { name.instance_of? String } 332 assert { weight.instance_of? Fixnum } 333 end 334 335 @fieldweights = weights 336 end 337 338 # Bind per-index weights by name. 339 def SetIndexWeights(weights) 340 assert { weights.instance_of? Hash } 341 weights.each do |index, weight| 342 assert { index.instance_of? String } 343 assert { weight.instance_of? Fixnum } 344 end 345 346 @indexweights = weights 347 end 348 349 # Set IDs range to match. 350 # 351 # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive). 352 def SetIDRange(min, max) 353 assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) } 354 assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) } 355 assert { min <= max } 356 357 @min_id = min 358 @max_id = max 359 end 360 361 # Set values filter. 362 # 363 # Only match those records where <tt>attribute</tt> column values 364 # are in specified set. 365 def SetFilter(attribute, values, exclude = false) 366 assert { attribute.instance_of? String } 367 assert { values.instance_of? Array } 368 assert { !values.empty? } 369 370 if values.instance_of?(Array) && values.size > 0 371 values.each do |value| 372 assert { value.instance_of? Fixnum } 373 end 374 375 @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values } 376 end 377 end 378 379 # Set range filter. 380 # 381 # Only match those records where <tt>attribute</tt> column value 382 # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>). 383 def SetFilterRange(attribute, min, max, exclude = false) 384 assert { attribute.instance_of? String } 385 assert { min.instance_of? Fixnum or min.instance_of? Bignum } 386 assert { max.instance_of? Fixnum or max.instance_of? Bignum } 387 assert { min <= max } 388 389 @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max } 390 end 391 392 # Set float range filter. 393 # 394 # Only match those records where <tt>attribute</tt> column value 395 # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>). 396 def SetFilterFloatRange(attribute, min, max, exclude = false) 397 assert { attribute.instance_of? String } 398 assert { min.instance_of? Float } 399 assert { max.instance_of? Float } 400 assert { min <= max } 401 402 @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max } 403 end 404 405 # Setup anchor point for geosphere distance calculations. 406 # 407 # Required to use <tt>@geodist</tt> in filters and sorting 408 # distance will be computed to this point. Latitude and longitude 409 # must be in radians. 410 # 411 # * <tt>attrlat</tt> -- is the name of latitude attribute 412 # * <tt>attrlong</tt> -- is the name of longitude attribute 413 # * <tt>lat</tt> -- is anchor point latitude, in radians 414 # * <tt>long</tt> -- is anchor point longitude, in radians 415 def SetGeoAnchor(attrlat, attrlong, lat, long) 416 assert { attrlat.instance_of? String } 417 assert { attrlong.instance_of? String } 418 assert { lat.instance_of? Float } 419 assert { long.instance_of? Float } 420 421 @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long } 422 end 423 424 # Set grouping attribute and function. 425 # 426 # In grouping mode, all matches are assigned to different groups 427 # based on grouping function value. 428 # 429 # Each group keeps track of the total match count, and the best match 430 # (in this group) according to current sorting function. 431 # 432 # The final result set contains one best match per group, with 433 # grouping function value and matches count attached. 434 # 435 # Groups in result set could be sorted by any sorting clause, 436 # including both document attributes and the following special 437 # internal Sphinx attributes: 438 # 439 # * @id - match document ID; 440 # * @weight, @rank, @relevance - match weight; 441 # * @group - groupby function value; 442 # * @count - amount of matches in group. 443 # 444 # the default mode is to sort by groupby value in descending order, 445 # ie. by '@group desc'. 446 # 447 # 'total_found' would contain total amount of matching groups over 448 # the whole index. 449 # 450 # WARNING: grouping is done in fixed memory and thus its results 451 # are only approximate; so there might be more groups reported 452 # in total_found than actually present. @count might also 453 # be underestimated. 454 # 455 # For example, if sorting by relevance and grouping by "published" 456 # attribute with SPH_GROUPBY_DAY function, then the result set will 457 # contain one most relevant match per each day when there were any 458 # matches published, with day number and per-day match count attached, 459 # and sorted by day number in descending order (ie. recent days first). 460 def SetGroupBy(attribute, func, groupsort = '@group desc') 461 assert { attribute.instance_of? String } 462 assert { groupsort.instance_of? String } 463 assert { func == SPH_GROUPBY_DAY \ 464 || func == SPH_GROUPBY_WEEK \ 465 || func == SPH_GROUPBY_MONTH \ 466 || func == SPH_GROUPBY_YEAR \ 467 || func == SPH_GROUPBY_ATTR \ 468 || func == SPH_GROUPBY_ATTRPAIR } 469 470 @groupby = attribute 471 @groupfunc = func 472 @groupsort = groupsort 473 end 474 475 # Set count-distinct attribute for group-by queries. 476 def SetGroupDistinct(attribute) 477 assert { attribute.instance_of? String } 478 @groupdistinct = attribute 479 end 480 481 # Set distributed retries count and delay. 482 def SetRetries(count, delay = 0) 483 assert { count.instance_of? Fixnum } 484 assert { delay.instance_of? Fixnum } 485 486 @retrycount = count 487 @retrydelay = delay 488 end 489 490 # DEPRECATED: Set attribute values override 491 # 492 # There can be only one override per attribute. 493 # +values+ must be a hash that maps document IDs to attribute values. 494 def SetOverride(attrname, attrtype, values) 495 $stderr.puts "DEPRECATED: Do not call this method. Use SphinxQL REMAP() function instead.\n" 496 assert { attrname.instance_of? String } 497 assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) } 498 assert { values.instance_of? Hash } 499 500 @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values } 501 end 502 503 # Set select-list (attributes or expressions), SQL-like syntax. 504 def SetSelect(select) 505 assert { select.instance_of? String } 506 @select = select 507 end 508 509 # Clear all filters (for multi-queries). 510 def ResetFilters 511 @filters = [] 512 @anchor = [] 513 end 514 515 # Clear groupby settings (for multi-queries). 516 def ResetGroupBy 517 @groupby = '' 518 @groupfunc = SPH_GROUPBY_DAY 519 @groupsort = '@group desc' 520 @groupdistinct = '' 521 end 522 523 # Clear all attribute value overrides (for multi-queries). 524 def ResetOverrides 525 @overrides = [] 526 end 527 528 # Connect to searchd server and run given search query. 529 # 530 # <tt>query</tt> is query string 531 532 # <tt>index</tt> is index name (or names) to query. default value is "*" which means 533 # to query all indexes. Accepted characters for index names are letters, numbers, 534 # dash, and underscore; everything else is considered a separator. Therefore, 535 # all the following calls are valid and will search two indexes: 536 # 537 # sphinx.Query('test query', 'main delta') 538 # sphinx.Query('test query', 'main;delta') 539 # sphinx.Query('test query', 'main, delta') 540 # 541 # Index order matters. If identical IDs are found in two or more indexes, 542 # weight and attribute values from the very last matching index will be used 543 # for sorting and returning to client. Therefore, in the example above, 544 # matches from "delta" index will always "win" over matches from "main". 545 # 546 # Returns false on failure. 547 # Returns hash which has the following keys on success: 548 # 549 # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id. 550 # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h) 551 # * <tt>'total_found'</tt> -- total amount of matching documents in index 552 # * <tt>'time'</tt> -- search time 553 # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash 554 def Query(query, index = '*', comment = '') 555 assert { @reqs.empty? } 556 @reqs = [] 557 558 self.AddQuery(query, index, comment) 559 results = self.RunQueries 560 561 # probably network error; error message should be already filled 562 return false unless results.instance_of?(Array) 563 564 @error = results[0]['error'] 565 @warning = results[0]['warning'] 566 567 return false if results[0]['status'] == SEARCHD_ERROR 568 return results[0] 569 end 570 571 # Add query to batch. 572 # 573 # Batch queries enable searchd to perform internal optimizations, 574 # if possible; and reduce network connection overheads in all cases. 575 # 576 # For instance, running exactly the same query with different 577 # groupby settings will enable searched to perform expensive 578 # full-text search and ranking operation only once, but compute 579 # multiple groupby results from its output. 580 # 581 # Parameters are exactly the same as in <tt>Query</tt> call. 582 # Returns index to results array returned by <tt>RunQueries</tt> call. 583 def AddQuery(query, index = '*', comment = '') 584 # build request 585 586 # mode and limits 587 request = Request.new 588 request.put_int @offset, @limit, @mode, @ranker 589 # process the 'expr' ranker 590 if @ranker == SPH_RANK_EXPR 591 request.put_string @rankexpr 592 end 593 594 request.put_int @sort 595 596 request.put_string @sortby 597 # query itself 598 request.put_string query 599 # weights 600 request.put_int_array @weights 601 # indexes 602 request.put_string index 603 # id64 range marker 604 request.put_int 1 605 # id64 range 606 request.put_int64 @min_id.to_i, @max_id.to_i 607 608 # filters 609 request.put_int @filters.length 610 @filters.each do |filter| 611 request.put_string filter['attr'] 612 request.put_int filter['type'] 613 614 case filter['type'] 615 when SPH_FILTER_VALUES 616 request.put_int64_array filter['values'] 617 when SPH_FILTER_RANGE 618 request.put_int64 filter['min'], filter['max'] 619 when SPH_FILTER_FLOATRANGE 620 request.put_float filter['min'], filter['max'] 621 else 622 raise SphinxInternalError, 'Internal error: unhandled filter type' 623 end 624 request.put_int filter['exclude'] ? 1 : 0 625 end 626 627 # group-by clause, max-matches count, group-sort clause, cutoff count 628 request.put_int @groupfunc 629 request.put_string @groupby 630 request.put_int @maxmatches 631 request.put_string @groupsort 632 request.put_int @cutoff, @retrycount, @retrydelay 633 request.put_string @groupdistinct 634 635 # anchor point 636 if @anchor.empty? 637 request.put_int 0 638 else 639 request.put_int 1 640 request.put_string @anchor['attrlat'], @anchor['attrlong'] 641 request.put_float @anchor['lat'], @anchor['long'] 642 end 643 644 # per-index weights 645 request.put_int @indexweights.length 646 @indexweights.each do |idx, weight| 647 request.put_string idx 648 request.put_int weight 649 end 650 651 # max query time 652 request.put_int @maxquerytime 653 654 # per-field weights 655 request.put_int @fieldweights.length 656 @fieldweights.each do |field, weight| 657 request.put_string field 658 request.put_int weight 659 end 660 661 # comment 662 request.put_string comment 663 664 # attribute overrides 665 request.put_int @overrides.length 666 for entry in @overrides do 667 request.put_string entry['attr'] 668 request.put_int entry['type'], entry['values'].size 669 entry['values'].each do |id, val| 670 assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) } 671 assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) } 672 673 request.put_int64 id 674 case entry['type'] 675 when SPH_ATTR_FLOAT 676 request.put_float val 677 when SPH_ATTR_BIGINT 678 request.put_int64 val 679 else 680 request.put_int val 681 end 682 end 683 end 684 685 # select-list 686 request.put_string @select 687 688 # store request to requests array 689 @reqs << request.to_s; 690 return @reqs.length - 1 691 end 692 693 # Run queries batch. 694 # 695 # Returns an array of result sets on success. 696 # Returns false on network IO failure. 697 # 698 # Each result set in returned array is a hash which containts 699 # the same keys as the hash returned by <tt>Query</tt>, plus: 700 # 701 # * <tt>'error'</tt> -- search error for this query 702 # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash 703 def RunQueries 704 if @reqs.empty? 705 @error = 'No queries defined, issue AddQuery() first' 706 return false 707 end 708 709 req = @reqs.join('') 710 nreqs = @reqs.length 711 @reqs = [] 712 response = PerformRequest(:search, req, nreqs) 713 714 # parse response 715 begin 716 results = [] 717 ires = 0 718 while ires < nreqs 719 ires += 1 720 result = {} 721 722 result['error'] = '' 723 result['warning'] = '' 724 725 # extract status 726 status = result['status'] = response.get_int 727 if status != SEARCHD_OK 728 message = response.get_string 729 if status == SEARCHD_WARNING 730 result['warning'] = message 731 else 732 result['error'] = message 733 results << result 734 next 735 end 736 end 737 738 # read schema 739 fields = [] 740 attrs = {} 741 attrs_names_in_order = [] 742 743 nfields = response.get_int 744 while nfields > 0 745 nfields -= 1 746 fields << response.get_string 747 end 748 result['fields'] = fields 749 750 nattrs = response.get_int 751 while nattrs > 0 752 nattrs -= 1 753 attr = response.get_string 754 type = response.get_int 755 attrs[attr] = type 756 attrs_names_in_order << attr 757 end 758 result['attrs'] = attrs 759 760 # read match count 761 count = response.get_int 762 id64 = response.get_int 763 764 # read matches 765 result['matches'] = [] 766 while count > 0 767 count -= 1 768 769 if id64 != 0 770 doc = response.get_int64 771 weight = response.get_int 772 else 773 doc, weight = response.get_ints(2) 774 end 775 776 r = {} # This is a single result put in the result['matches'] array 777 r['id'] = doc 778 r['weight'] = weight 779 attrs_names_in_order.each do |a| 780 r['attrs'] ||= {} 781 782 case attrs[a] 783 when SPH_ATTR_BIGINT 784 # handle 64-bit ints 785 r['attrs'][a] = response.get_int64 786 when SPH_ATTR_FLOAT 787 # handle floats 788 r['attrs'][a] = response.get_float 789 when SPH_ATTR_STRING 790 # handle string 791 r['attrs'][a] = response.get_string 792 else 793 # handle everything else as unsigned ints 794 val = response.get_int 795 if attrs[a]==SPH_ATTR_MULTI 796 r['attrs'][a] = [] 797 1.upto(val) do 798 r['attrs'][a] << response.get_int 799 end 800 elsif attrs[a]==SPH_ATTR_MULTI64 801 r['attrs'][a] = [] 802 val = val/2 803 1.upto(val) do 804 r['attrs'][a] << response.get_int64 805 end 806 else 807 r['attrs'][a] = val 808 end 809 end 810 end 811 result['matches'] << r 812 end 813 result['total'], result['total_found'], msecs, words = response.get_ints(4) 814 result['time'] = '%.3f' % (msecs / 1000.0) 815 816 result['words'] = {} 817 while words > 0 818 words -= 1 819 word = response.get_string 820 docs, hits = response.get_ints(2) 821 result['words'][word] = { 'docs' => docs, 'hits' => hits } 822 end 823 824 results << result 825 end 826 #rescue EOFError 827 # @error = 'incomplete reply' 828 # raise SphinxResponseError, @error 829 end 830 831 return results 832 end 833 834 # Connect to searchd server and generate exceprts from given documents. 835 # 836 # * <tt>docs</tt> -- an array of strings which represent the documents' contents 837 # * <tt>index</tt> -- a string specifiying the index which settings will be used 838 # for stemming, lexing and case folding 839 # * <tt>words</tt> -- a string which contains the words to highlight 840 # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters. 841 # 842 # You can use following parameters: 843 # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>" 844 # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>" 845 # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... " 846 # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256 847 # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5 848 # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt> 849 # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false 850 # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer 851 # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order 852 # 853 # Returns false on failure. 854 # Returns an array of string excerpts on success. 855 def BuildExcerpts(docs, index, words, opts = {}) 856 assert { docs.instance_of? Array } 857 assert { index.instance_of? String } 858 assert { words.instance_of? String } 859 assert { opts.instance_of? Hash } 860 861 # fixup options 862 opts['before_match'] ||= '<b>'; 863 opts['after_match'] ||= '</b>'; 864 opts['chunk_separator'] ||= ' ... '; 865 opts['html_strip_mode'] ||= 'index'; 866 opts['limit'] ||= 256; 867 opts['limit_passages'] ||= 0; 868 opts['limit_words'] ||= 0; 869 opts['around'] ||= 5; 870 opts['start_passage_id'] ||= 1; 871 opts['exact_phrase'] ||= false 872 opts['single_passage'] ||= false 873 opts['use_boundaries'] ||= false 874 opts['weight_order'] ||= false 875 opts['load_files'] ||= false 876 opts['allow_empty'] ||= false 877 878 # build request 879 880 # v.1.0 req 881 flags = 1 882 flags |= 2 if opts['exact_phrase'] 883 flags |= 4 if opts['single_passage'] 884 flags |= 8 if opts['use_boundaries'] 885 flags |= 16 if opts['weight_order'] 886 flags |= 32 if opts['query_mode'] 887 flags |= 64 if opts['force_all_words'] 888 flags |= 128 if opts['load_files'] 889 flags |= 256 if opts['allow_empty'] 890 891 request = Request.new 892 request.put_int 0, flags # mode=0, flags=1 (remove spaces) 893 # req index 894 request.put_string index 895 # req words 896 request.put_string words 897 898 # options 899 request.put_string opts['before_match'] 900 request.put_string opts['after_match'] 901 request.put_string opts['chunk_separator'] 902 request.put_int opts['limit'].to_i, opts['around'].to_i 903 904 # options v1.2 905 request.put_int opts['limit_passages'].to_i 906 request.put_int opts['limit_words'].to_i 907 request.put_int opts['start_passage_id'].to_i 908 request.put_string opts['html_strip_mode'] 909 910 # documents 911 request.put_int docs.size 912 docs.each do |doc| 913 assert { doc.instance_of? String } 914 915 request.put_string doc 916 end 917 918 response = PerformRequest(:excerpt, request) 919 920 # parse response 921 begin 922 res = [] 923 docs.each do |doc| 924 res << response.get_string 925 end 926 rescue EOFError 927 @error = 'incomplete reply' 928 raise SphinxResponseError, @error 929 end 930 return res 931 end 932 933 # Connect to searchd server, and generate keyword list for a given query. 934 # 935 # Returns an array of words on success. 936 def BuildKeywords(query, index, hits) 937 assert { query.instance_of? String } 938 assert { index.instance_of? String } 939 assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) } 940 941 # build request 942 request = Request.new 943 # v.1.0 req 944 request.put_string query # req query 945 request.put_string index # req index 946 request.put_int hits ? 1 : 0 947 948 response = PerformRequest(:keywords, request) 949 950 # parse response 951 begin 952 res = [] 953 nwords = response.get_int 954 0.upto(nwords - 1) do |i| 955 tokenized = response.get_string 956 normalized = response.get_string 957 958 entry = { 'tokenized' => tokenized, 'normalized' => normalized } 959 entry['docs'], entry['hits'] = response.get_ints(2) if hits 960 961 res << entry 962 end 963 rescue EOFError 964 @error = 'incomplete reply' 965 raise SphinxResponseError, @error 966 end 967 968 return res 969 end 970 971 # Batch update given attributes in given rows in given indexes. 972 # 973 # * +index+ is a name of the index to be updated 974 # * +attrs+ is an array of attribute name strings. 975 # * +values+ is a hash where key is document id, and value is an array of 976 # * +mva+ identifies whether update MVA 977 # new attribute values 978 # * +ignoreexistent+ identifies whether silently ignore updating of non-existent columns 979 # 980 # Returns number of actually updated documents (0 or more) on success. 981 # Returns -1 on failure. 982 # 983 # Usage example: 984 # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] }) 985 def UpdateAttributes(index, attrs, values, mva = false, ignoreexistent = false ) 986 # verify everything 987 assert { index.instance_of? String } 988 assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) } 989 assert { ignoreexistent.instance_of?(TrueClass) || ignoreexistent.instance_of?(FalseClass) } 990 991 assert { attrs.instance_of? Array } 992 attrs.each do |attr| 993 assert { attr.instance_of? String } 994 end 995 996 assert { values.instance_of? Hash } 997 values.each do |id, entry| 998 assert { id.instance_of? Fixnum } 999 assert { entry.instance_of? Array } 1000 assert { entry.length == attrs.length } 1001 entry.each do |v| 1002 if mva 1003 assert { v.instance_of? Array } 1004 v.each { |vv| assert { vv.instance_of? Fixnum } } 1005 else 1006 assert { v.instance_of? Fixnum } 1007 end 1008 end 1009 end 1010 1011 # build request 1012 request = Request.new 1013 request.put_string index 1014 1015 request.put_int attrs.length 1016 request.put_int ignoreexistent ? 1 : 0 1017 for attr in attrs 1018 request.put_string attr 1019 request.put_int mva ? 1 : 0 1020 end 1021 1022 request.put_int values.length 1023 values.each do |id, entry| 1024 request.put_int64 id 1025 if mva 1026 entry.each { |v| request.put_int_array v } 1027 else 1028 request.put_int(*entry) 1029 end 1030 end 1031 1032 response = PerformRequest(:update, request) 1033 1034 # parse response 1035 begin 1036 return response.get_int 1037 rescue EOFError 1038 @error = 'incomplete reply' 1039 raise SphinxResponseError, @error 1040 end 1041 end 1042 1043 protected 1044 1045 # Connect to searchd server. 1046 def Connect 1047 begin 1048 if @host[0,1]=='/' 1049 sock = UNIXSocket.new(@host) 1050 else 1051 sock = TCPSocket.new(@host, @port) 1052 end 1053 rescue => err 1054 @error = "connection to #{@host}:#{@port} failed (error=#{err})" 1055 raise SphinxConnectError, @error 1056 end 1057 1058 v = sock.recv(4).unpack('N*').first 1059 if v < 1 1060 sock.close 1061 @error = "expected searchd protocol version 1+, got version '#{v}'" 1062 raise SphinxConnectError, @error 1063 end 1064 1065 sock.send([1].pack('N'), 0) 1066 sock 1067 end 1068 1069 # Get and check response packet from searchd server. 1070 def GetResponse(sock, client_version) 1071 response = '' 1072 len = 0 1073 1074 header = sock.recv(8) 1075 if header.length == 8 1076 status, ver, len = header.unpack('n2N') 1077 left = len.to_i 1078 while left > 0 do 1079 begin 1080 chunk = sock.recv(left) 1081 if chunk 1082 response << chunk 1083 left -= chunk.length 1084 end 1085 rescue EOFError 1086 break 1087 end 1088 end 1089 end 1090 sock.close 1091 1092 # check response 1093 read = response.length 1094 if response.empty? or read != len.to_i 1095 @error = response.empty? \ 1096 ? 'received zero-sized searchd response' \ 1097 : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" 1098 raise SphinxResponseError, @error 1099 end 1100 1101 # check status 1102 if (status == SEARCHD_WARNING) 1103 wlen = response[0, 4].unpack('N*').first 1104 @warning = response[4, wlen] 1105 return response[4 + wlen, response.length - 4 - wlen] 1106 end 1107 1108 if status == SEARCHD_ERROR 1109 @error = 'searchd error: ' + response[4, response.length - 4] 1110 raise SphinxInternalError, @error 1111 end 1112 1113 if status == SEARCHD_RETRY 1114 @error = 'temporary searchd error: ' + response[4, response.length - 4] 1115 raise SphinxTemporaryError, @error 1116 end 1117 1118 unless status == SEARCHD_OK 1119 @error = "unknown status code: '#{status}'" 1120 raise SphinxUnknownError, @error 1121 end 1122 1123 # check version 1124 if ver < client_version 1125 @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " + 1126 "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work" 1127 end 1128 1129 return response 1130 end 1131 1132 # Connect, send query, get response. 1133 def PerformRequest(command, request, additional = nil) 1134 cmd = command.to_s.upcase 1135 command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd) 1136 command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd) 1137 1138 sock = self.Connect 1139 len = request.to_s.length + (additional != nil ? 8 : 0) 1140 header = [command_id, command_ver, len].pack('nnN') 1141 header << [0, additional].pack('NN') if additional != nil 1142 sock.send(header + request.to_s, 0) 1143 response = self.GetResponse(sock, command_ver) 1144 return Response.new(response) 1145 end 1146 1147 # :stopdoc: 1148 def assert 1149 raise 'Assertion failed!' unless yield if $DEBUG 1150 end 1151 # :startdoc: 1152 end 1153end 1154