1# :title:Ruby Xapian bindings 2# =Ruby Xapian bindings 3# 4# Original version by Paul Legato (plegato@nks.net), 4/20/06. 5# 6# Copyright (C) 2006 Networked Knowledge Systems, Inc. 7# Copyright (C) 2008,2011,2019 Olly Betts 8# Copyright (C) 2010 Richard Boulton 9# 10# This program is free software; you can redistribute it and/or 11# modify it under the terms of the GNU General Public License as 12# published by the Free Software Foundation; either version 2 of the 13# License, or (at your option) any later version. 14# 15# This program is distributed in the hope that it will be useful, 16# but WITHOUT ANY WARRANTY; without even the implied warranty of 17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18# GNU General Public License for more details. 19# 20# You should have received a copy of the GNU General Public License 21# along with this program; if not, write to the Free Software 22# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 23# USA 24# 25# ==Underscore methods 26# Note: Methods whose names start with an underscore character _ are internal 27# methods from the C++ API. Their functionality is not accessible in a 28# Ruby-friendly way, so this file provides wrapper code to make it easier to 29# use them from a Ruby programming idiom. Most are also dangerous insofar as 30# misusing them can cause your program to segfault. In particular, all of 31# Xapian's *Iterator classes are wrapped into nice Ruby-friendly Arrays. 32# 33# It should never be necessary to use any method whose name starts with an 34# underscore from user-level code. Make sure you are _VERY_ certain that you 35# know exactly what you're doing if you do use one of these methods. Beware. 36# You've been warned... 37# 38 39 40module Xapian 41 ######## load the SWIG-generated library 42 require '_xapian' 43 44 45 # iterate over two dangerous iterators (i.e. those that can cause segfaults 46 # if used improperly.) 47 # If block_given? then the results are fed to it one by one, otherwise the 48 # results are returned as an Array. 49 # Users should never need to use this method. 50 # 51 # wrapper is a lambda that returns some appropriate Ruby object to wrap the 52 # results from the underlying Iterator 53 def _safelyIterate(dangerousStart, dangerousEnd, wrapper) #:nodoc: 54 item = dangerousStart 55 if block_given? 56 while not item.equals(dangerousEnd) do 57 yield wrapper.call(item) 58 item.next() 59 end 60 else 61 retval = Array.new 62 while not item.equals(dangerousEnd) do 63 retval.push(wrapper.call(item)) 64 item.next() 65 end 66 return retval 67 end 68 end # _safelyIterate 69 module_function :_safelyIterate 70 71 #-- 72 ### safe Ruby wrapper for the dangerous C++ Xapian::TermIterator class 73 class Xapian::Term 74 attr_accessor :term, :wdf, :termfreq 75 76 def initialize(term, wdf=nil, termfreq=nil) 77 @term = term 78 @wdf = wdf 79 @termfreq = termfreq 80 end 81 82 def ==(other) 83 return other.is_a?(Xapian::Term) && other.term == @term && other.wdf == @wdf && other.termfreq == @termfreq 84 end 85 end # class Term 86 87 ### Ruby wrapper for a Match, i.e. a Xapian::MSetIterator (Match Set) in C++. 88 # it's no longer an iterator in the Ruby version, but we want to preserve its 89 # non-iterative data. 90 # (MSetIterator is not dangerous, but it is inconvenient to use from a Ruby 91 # idiom, so we wrap it..) 92 class Xapian::Match 93 attr_accessor :docid, :document, :rank, :weight, :collapse_count, :percent 94 95 def initialize(docid, document, rank, weight, collapse_count, percent) 96 @docid = docid 97 @document = document 98 @rank = rank 99 @weight = weight 100 @collapse_count = collapse_count 101 @percent = percent 102 end # initialize 103 104 def ==(other) 105 return other.is_a?(Xapian::Match) && other.docid == @docid && other.rank == @rank && 106 other.weight == @weight && other.collapse_count == @collapse_count && other.percent == @percent 107 end 108 end # class Xapian::Match 109 110 # Ruby wrapper for an ExpandTerm, i.e. a Xapian::ESetIterator in C++ 111 # Not dangerous, but inconvenient to use from a Ruby programming idiom, so we 112 # wrap it. 113 class Xapian::ExpandTerm 114 attr_accessor :name, :weight 115 116 def initialize(name, weight) 117 @name = name 118 @weight = weight 119 end # initialize 120 121 def ==(other) 122 return other.is_a?(Xapian::ExpandTerm) && other.name == @name && other.weight == @weight 123 end 124 125 end # Xapian::ExpandTerm 126 127 # Ruby wrapper for Xapian::ValueIterator 128 class Xapian::Value 129 attr_accessor :value, :valueno, :docid 130 131 def initialize(value, valueno, docid) 132 @value = value 133 @valueno = valueno 134 @docid = docid 135 end # initialize 136 137 def ==(other) 138 return other.is_a?(Xapian::Value) && other.value == @value && other.valueno == @valueno && other.docid == @docid 139 end 140 end # Xapian::Value 141 142 # Refer to the 143 # {Xapian::Document C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Document.html] 144 # for methods not specific to Ruby. 145 #-- 146 # Extend Xapian::Document with a nice wrapper for its nasty input_iterators 147 class Xapian::Document 148 def terms(&block) 149 Xapian._safelyIterate(self._dangerous_termlist_begin(), 150 self._dangerous_termlist_end(), 151 lambda { 152 |item| Xapian::Term.new(item.term, item.wdf) 153 }, 154 &block) 155 end # terms 156 157 def values(&block) 158 Xapian._safelyIterate(self._dangerous_values_begin(), 159 self._dangerous_values_end(), 160 lambda { 161 |item| Xapian::Value.new(item.value, 162 item.valueno, 163 0) 164 }, 165 &block) 166 end # values 167 168 end # class Xapian::Document 169 170 # Refer to the 171 # {Xapian::Query C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Query.html] 172 # for methods not specific to Ruby. 173 #-- 174 # Extend Xapian::Query with a nice wrapper for its dangerous iterators 175 class Xapian::Query 176 def terms(&block) 177 # termfreq is not supported by TermIterators from Queries 178 Xapian._safelyIterate(self._dangerous_terms_begin(), 179 self._dangerous_terms_end(), 180 lambda { 181 |item| Xapian::Term.new(item.term, item.wdf) 182 }, 183 &block) 184 end # terms 185 186 def unique_terms(&block) 187 # termfreq is not supported by TermIterators from Queries 188 Xapian._safelyIterate(self._dangerous_unique_terms_begin(), 189 self._dangerous_unique_terms_end(), 190 lambda { 191 |item| Xapian::Term.new(item.term, item.wdf) 192 }, 193 &block) 194 end # unique_terms 195 end # Xapian::Query 196 197 # Refer to the 198 # {Xapian::Enquire C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Enquire.html] 199 # for methods not specific to Ruby. 200 #-- 201 # Extend Xapian::Enquire with a nice wrapper for its dangerous iterators 202 class Xapian::Enquire 203 # Get matching terms for some document. 204 # document can be either a Xapian::DocID or a Xapian::MSetIterator 205 def matching_terms(document, &block) 206 Xapian._safelyIterate(self._dangerous_matching_terms_begin(document), 207 self._dangerous_matching_terms_end(document), 208 lambda { |item| Xapian::Term.new(item.term, item.wdf) }, 209 &block) 210 end # matching_terms 211 end # Xapian::Enquire 212 213 # Refer to the 214 # {Xapian::MSet C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1MSet.html] 215 # for methods not specific to Ruby. 216 #-- 217 # MSetIterators are not dangerous, just inconvenient to use within a Ruby 218 # programming idiom. So we wrap them. 219 class Xapian::MSet 220 def matches(&block) 221 Xapian._safelyIterate(self._begin(), 222 self._end(), 223 lambda { |item| Xapian::Match.new(item.docid, item.document, item.rank, item.weight, item.collapse_count, item.percent) }, 224 &block) 225 end # matches 226 end # Xapian::MSet 227 228 # Refer to the 229 # {Xapian::ESet C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1ESet.html] 230 # for methods not specific to Ruby. 231 #-- 232 # ESetIterators are not dangerous, just inconvenient to use within a Ruby 233 # programming idiom. So we wrap them. 234 class Xapian::ESet 235 def terms(&block) 236 # note: in the ExpandTerm wrapper, we implicitly rename 237 # ESetIterator#term() (defined in xapian-headers.i) to ExpandTerm#term() 238 Xapian._safelyIterate(self._begin(), 239 self._end(), 240 lambda { |item| Xapian::ExpandTerm.new(item.term, item.weight) }, 241 &block) 242 end # terms 243 end # Xapian::ESet 244 245 246 #-- 247 # Wrapper for the C++ class Xapian::PostingIterator 248 class Xapian::Posting 249 attr_accessor :docid, :doclength, :wdf 250 251 def initialize(docid, doclength, wdf) 252 @docid = docid 253 @doclength = doclength 254 @wdf = wdf 255 end 256 257 def ==(other) 258 return other.is_a?(Xapian::Posting) && other.docid == @docid && other.doclength == @doclength && 259 other.wdf == @wdf 260 end 261 end # Xapian::Posting 262 263 # Refer to the 264 # {Xapian::Database C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Database.html] 265 # for methods not specific to Ruby. 266 #-- 267 # Wrap some dangerous iterators. 268 class Xapian::Database 269 # Returns an Array of all Xapian::Terms for this database. 270 def allterms(pref = '', &block) 271 Xapian._safelyIterate(self._dangerous_allterms_begin(pref), 272 self._dangerous_allterms_end(pref), 273 lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) }, 274 &block) 275 end # allterms 276 277 # Returns an Array of all metadata keys for this database. 278 def metadata_keys(pref = '', &block) 279 Xapian._safelyIterate(self._dangerous_metadata_keys_begin(pref), 280 self._dangerous_metadata_keys_end(pref), 281 lambda { |item| item.term }, 282 &block) 283 end # metadata_keys 284 285 # Returns an Array of Xapian::Postings for the given term. 286 # term is a string. 287 def postlist(term, &block) 288 Xapian._safelyIterate(self._dangerous_postlist_begin(term), 289 self._dangerous_postlist_end(term), 290 lambda { |item| Xapian::Posting.new(item.docid, item.doclength, item.wdf) }, 291 &block) 292 end # postlist(term) 293 294 # Returns an Array of Terms for the given docid. 295 def termlist(docid, &block) 296 Xapian._safelyIterate(self._dangerous_termlist_begin(docid), 297 self._dangerous_termlist_end(docid), 298 lambda { |item| Xapian::Term.new(item.term, item.wdf, item.termfreq) }, 299 &block) 300 end # termlist(docid) 301 302 # Returns an Array of term positions for the given term (a String) 303 # in the given docid. 304 def positionlist(docid, term, &block) 305 Xapian._safelyIterate(self._dangerous_positionlist_begin(docid, term), 306 self._dangerous_positionlist_end(docid, term), 307 lambda { |item| item.termpos }, 308 &block) 309 end # positionlist 310 311 # Returns an Array of Xapian::Value objects for the given slot in the 312 # database. 313 def valuestream(slot, &block) 314 Xapian._safelyIterate(self._dangerous_valuestream_begin(slot), 315 self._dangerous_valuestream_end(slot), 316 lambda { |item| Xapian::Value.new(item.value, slot, item.docid) }, 317 &block) 318 end # valuestream(slot) 319 320 # Returns an Array of Xapian::Term objects for the spelling dictionary. 321 def spellings(&block) 322 Xapian._safelyIterate(self._dangerous_spellings_begin(), 323 self._dangerous_spellings_end(), 324 lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) }, 325 &block) 326 end # spellings 327 328 # Returns an Array of synonyms of the given term. 329 def synonyms(term, &block) 330 Xapian._safelyIterate(self._dangerous_synonyms_begin(term), 331 self._dangerous_synonyms_end(term), 332 lambda { |item| item.term }, 333 &block) 334 end # synonyms 335 336 # Returns an Array of terms with synonyms. 337 def synonym_keys(&block) 338 Xapian._safelyIterate(self._dangerous_synonym_keys_begin(), 339 self._dangerous_synonym_keys_end(), 340 lambda { |item| item.term }, 341 &block) 342 end # synonym_keys 343 end # Xapian::Database 344 345 # Refer to the 346 # {Xapian::ValueCountMatchSpy C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1ValueCountMatchSpy.html] 347 # for methods not specific to Ruby. 348 #-- 349 # Wrap some dangerous iterators. 350 class Xapian::ValueCountMatchSpy 351 # Returns an Array of all the values seen, in alphabetical order 352 def values(&block) 353 Xapian._safelyIterate(self._dangerous_values_begin(), 354 self._dangerous_values_end(), 355 lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) }, 356 &block) 357 end # values 358 359 # Returns an Array of the top values seen, by frequency 360 def top_values(maxvalues, &block) 361 Xapian._safelyIterate(self._dangerous_top_values_begin(maxvalues), 362 self._dangerous_top_values_end(maxvalues), 363 lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) }, 364 &block) 365 end # top_values 366 end # Xapian::Database 367 368 # Refer to the 369 # {Xapian::LatLongCoords C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1LatLongCoords.html] 370 # for methods not specific to Ruby. 371 #-- 372 # Wrap some dangerous iterators. 373 class Xapian::LatLongCoords 374 # Returns an Array of all the values seen, in alphabetical order 375 def all(&block) 376 Xapian._safelyIterate(self._begin(), 377 self._end(), 378 lambda { |item| item.get_coord() }, 379 &block) 380 end # allterms 381 end # Xapian::LatLongCoords 382 383 class Xapian::QueryParser 384 # Returns an Array of all words in the query ignored as stopwords. 385 def stoplist(&block) 386 Xapian._safelyIterate(self._dangerous_stoplist_begin(), 387 self._dangerous_stoplist_end(), 388 lambda { |item| item.term }, 389 &block) 390 end # stoplist 391 392 # Returns an Array of all words in the query which stem to a given term. 393 def unstem(term, &block) 394 Xapian._safelyIterate(self._dangerous_unstem_begin(term), 395 self._dangerous_unstem_end(term), 396 lambda { |item| item.term }, 397 &block) 398 end # unstem 399 end # Xapian::QueryParser 400 401 # Compatibility wrapping for Xapian::BAD_VALUENO (wrapped as a constant since 402 # xapian-bindings 1.4.10). 403 def Xapian::BAD_VALUENO() 404 return Xapian::BAD_VALUENO 405 end 406 407end # Xapian module 408 # Refer to the 409 # {Xapian::LatLongCoord C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1LatLongCoord.html]. 410 class Xapian::LatLongCoord 411 end 412 # Refer to the 413 # {Xapian::MultiValueKeyMaker::KeySpec C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1MultiValueKeyMaker::KeySpec.html]. 414 class Xapian::MultiValueKeyMaker::KeySpec 415 end 416