1# :title:Ruby Xapian bindings
2# =Ruby Xapian bindings
3#
4# Original version by Paul Legato (plegato@nks.net), 4/20/06.
5#
6# Copyright (C) 2006 Networked Knowledge Systems, Inc.
7# Copyright (C) 2008,2011,2019 Olly Betts
8# Copyright (C) 2010 Richard Boulton
9#
10# This program is free software; you can redistribute it and/or
11# modify it under the terms of the GNU General Public License as
12# published by the Free Software Foundation; either version 2 of the
13# License, or (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
23# USA
24#
25# ==Underscore methods
26# Note: Methods whose names start with an underscore character _ are internal
27# methods from the C++ API. Their functionality is not accessible in a
28# Ruby-friendly way, so this file provides wrapper code to make it easier to
29# use them from a Ruby programming idiom.  Most are also dangerous insofar as
30# misusing them can cause your program to segfault.  In particular, all of
31# Xapian's *Iterator classes are wrapped into nice Ruby-friendly Arrays.
32#
33# It should never be necessary to use any method whose name starts with an
34# underscore from user-level code. Make sure you are _VERY_ certain that you
35# know exactly what you're doing if you do use one of these methods. Beware.
36# You've been warned...
37#
38
39
40module Xapian
41  ######## load the SWIG-generated library
42  require '_xapian'
43
44
45  # iterate over two dangerous iterators (i.e. those that can cause segfaults
46  # if used improperly.)
47  # If block_given? then the results are fed to it one by one, otherwise the
48  # results are returned as an Array.
49  # Users should never need to use this method.
50  #
51  # wrapper is a lambda that returns some appropriate Ruby object to wrap the
52  # results from the underlying Iterator
53  def _safelyIterate(dangerousStart, dangerousEnd, wrapper) #:nodoc:
54    item = dangerousStart
55    if block_given?
56      while not item.equals(dangerousEnd) do
57        yield wrapper.call(item)
58        item.next()
59      end
60    else
61      retval = Array.new
62      while not item.equals(dangerousEnd) do
63        retval.push(wrapper.call(item))
64        item.next()
65      end
66      return retval
67    end
68  end # _safelyIterate
69  module_function :_safelyIterate
70
71  #--
72  ### safe Ruby wrapper for the dangerous C++ Xapian::TermIterator class
73  class Xapian::Term
74    attr_accessor :term, :wdf, :termfreq
75
76    def initialize(term, wdf=nil, termfreq=nil)
77      @term = term
78      @wdf = wdf
79      @termfreq = termfreq
80    end
81
82    def ==(other)
83      return other.is_a?(Xapian::Term) && other.term == @term && other.wdf == @wdf && other.termfreq == @termfreq
84    end
85  end # class Term
86
87  ### Ruby wrapper for a Match, i.e. a Xapian::MSetIterator (Match Set) in C++.
88  # it's no longer an iterator in the Ruby version, but we want to preserve its
89  # non-iterative data.
90  # (MSetIterator is not dangerous, but it is inconvenient to use from a Ruby
91  # idiom, so we wrap it..)
92  class Xapian::Match
93    attr_accessor :docid, :document, :rank, :weight, :collapse_count, :percent
94
95    def initialize(docid, document, rank, weight, collapse_count, percent)
96      @docid = docid
97      @document = document
98      @rank = rank
99      @weight = weight
100      @collapse_count = collapse_count
101      @percent = percent
102    end # initialize
103
104    def ==(other)
105      return other.is_a?(Xapian::Match) && other.docid == @docid && other.rank == @rank &&
106        other.weight == @weight && other.collapse_count == @collapse_count && other.percent == @percent
107    end
108  end # class Xapian::Match
109
110  # Ruby wrapper for an ExpandTerm, i.e. a Xapian::ESetIterator in C++
111  # Not dangerous, but inconvenient to use from a Ruby programming idiom, so we
112  # wrap it.
113  class Xapian::ExpandTerm
114    attr_accessor :name, :weight
115
116    def initialize(name, weight)
117      @name = name
118      @weight = weight
119    end # initialize
120
121    def ==(other)
122      return other.is_a?(Xapian::ExpandTerm) && other.name == @name && other.weight == @weight
123    end
124
125  end # Xapian::ExpandTerm
126
127  # Ruby wrapper for Xapian::ValueIterator
128  class Xapian::Value
129    attr_accessor :value, :valueno, :docid
130
131    def initialize(value, valueno, docid)
132      @value = value
133      @valueno = valueno
134      @docid = docid
135    end # initialize
136
137    def ==(other)
138      return other.is_a?(Xapian::Value) && other.value == @value && other.valueno == @valueno && other.docid == @docid
139    end
140  end # Xapian::Value
141
142  # Refer to the
143  # {Xapian::Document C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Document.html]
144  # for methods not specific to Ruby.
145  #--
146  # Extend Xapian::Document with a nice wrapper for its nasty input_iterators
147  class Xapian::Document
148    def terms(&block)
149      Xapian._safelyIterate(self._dangerous_termlist_begin(),
150                            self._dangerous_termlist_end(),
151                            lambda {
152                              |item| Xapian::Term.new(item.term, item.wdf)
153                            },
154                            &block)
155    end # terms
156
157    def values(&block)
158      Xapian._safelyIterate(self._dangerous_values_begin(),
159                            self._dangerous_values_end(),
160                            lambda {
161                              |item| Xapian::Value.new(item.value,
162                                                       item.valueno,
163                                                       0)
164                            },
165                            &block)
166    end # values
167
168  end # class Xapian::Document
169
170  # Refer to the
171  # {Xapian::Query C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Query.html]
172  # for methods not specific to Ruby.
173  #--
174  # Extend Xapian::Query with a nice wrapper for its dangerous iterators
175  class Xapian::Query
176    def terms(&block)
177      # termfreq is not supported by TermIterators from Queries
178      Xapian._safelyIterate(self._dangerous_terms_begin(),
179                            self._dangerous_terms_end(),
180                            lambda {
181                              |item| Xapian::Term.new(item.term, item.wdf)
182                            },
183                            &block)
184    end # terms
185
186    def unique_terms(&block)
187      # termfreq is not supported by TermIterators from Queries
188      Xapian._safelyIterate(self._dangerous_unique_terms_begin(),
189                            self._dangerous_unique_terms_end(),
190                            lambda {
191                              |item| Xapian::Term.new(item.term, item.wdf)
192                            },
193                            &block)
194    end # unique_terms
195  end # Xapian::Query
196
197  # Refer to the
198  # {Xapian::Enquire C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Enquire.html]
199  # for methods not specific to Ruby.
200  #--
201  # Extend Xapian::Enquire with a nice wrapper for its dangerous iterators
202  class Xapian::Enquire
203    # Get matching terms for some document.
204    # document can be either a Xapian::DocID or a Xapian::MSetIterator
205    def matching_terms(document, &block)
206      Xapian._safelyIterate(self._dangerous_matching_terms_begin(document),
207                            self._dangerous_matching_terms_end(document),
208                            lambda { |item| Xapian::Term.new(item.term, item.wdf) },
209                            &block)
210    end # matching_terms
211  end # Xapian::Enquire
212
213  # Refer to the
214  # {Xapian::MSet C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1MSet.html]
215  # for methods not specific to Ruby.
216  #--
217  # MSetIterators are not dangerous, just inconvenient to use within a Ruby
218  # programming idiom. So we wrap them.
219  class Xapian::MSet
220    def matches(&block)
221      Xapian._safelyIterate(self._begin(),
222                            self._end(),
223                            lambda { |item| Xapian::Match.new(item.docid, item.document, item.rank, item.weight, item.collapse_count, item.percent) },
224                            &block)
225    end # matches
226  end # Xapian::MSet
227
228  # Refer to the
229  # {Xapian::ESet C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1ESet.html]
230  # for methods not specific to Ruby.
231  #--
232  # ESetIterators are not dangerous, just inconvenient to use within a Ruby
233  # programming idiom. So we wrap them.
234  class Xapian::ESet
235    def terms(&block)
236      # note: in the ExpandTerm wrapper, we implicitly rename
237      # ESetIterator#term() (defined in xapian-headers.i) to ExpandTerm#term()
238      Xapian._safelyIterate(self._begin(),
239                            self._end(),
240                            lambda { |item| Xapian::ExpandTerm.new(item.term, item.weight) },
241                            &block)
242    end # terms
243  end # Xapian::ESet
244
245
246  #--
247  # Wrapper for the C++ class Xapian::PostingIterator
248  class Xapian::Posting
249    attr_accessor :docid, :doclength, :wdf
250
251    def initialize(docid, doclength, wdf)
252      @docid = docid
253      @doclength = doclength
254      @wdf = wdf
255    end
256
257    def ==(other)
258      return other.is_a?(Xapian::Posting) && other.docid == @docid && other.doclength == @doclength &&
259        other.wdf == @wdf
260    end
261  end # Xapian::Posting
262
263  # Refer to the
264  # {Xapian::Database C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Database.html]
265  # for methods not specific to Ruby.
266  #--
267  # Wrap some dangerous iterators.
268  class Xapian::Database
269    # Returns an Array of all Xapian::Terms for this database.
270    def allterms(pref = '', &block)
271      Xapian._safelyIterate(self._dangerous_allterms_begin(pref),
272                            self._dangerous_allterms_end(pref),
273                            lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) },
274                            &block)
275    end # allterms
276
277    # Returns an Array of all metadata keys for this database.
278    def metadata_keys(pref = '', &block)
279      Xapian._safelyIterate(self._dangerous_metadata_keys_begin(pref),
280                            self._dangerous_metadata_keys_end(pref),
281                            lambda { |item| item.term },
282                            &block)
283    end # metadata_keys
284
285    # Returns an Array of Xapian::Postings for the given term.
286    # term is a string.
287    def postlist(term, &block)
288      Xapian._safelyIterate(self._dangerous_postlist_begin(term),
289                            self._dangerous_postlist_end(term),
290                            lambda { |item| Xapian::Posting.new(item.docid, item.doclength, item.wdf) },
291                            &block)
292    end # postlist(term)
293
294    # Returns an Array of Terms for the given docid.
295    def termlist(docid, &block)
296      Xapian._safelyIterate(self._dangerous_termlist_begin(docid),
297                            self._dangerous_termlist_end(docid),
298                            lambda { |item| Xapian::Term.new(item.term, item.wdf, item.termfreq) },
299                            &block)
300    end # termlist(docid)
301
302    # Returns an Array of term positions for the given term (a String)
303    # in the given docid.
304    def positionlist(docid, term, &block)
305      Xapian._safelyIterate(self._dangerous_positionlist_begin(docid, term),
306                            self._dangerous_positionlist_end(docid, term),
307                            lambda { |item| item.termpos },
308                            &block)
309    end # positionlist
310
311    # Returns an Array of Xapian::Value objects for the given slot in the
312    # database.
313    def valuestream(slot, &block)
314      Xapian._safelyIterate(self._dangerous_valuestream_begin(slot),
315                            self._dangerous_valuestream_end(slot),
316                            lambda { |item| Xapian::Value.new(item.value, slot, item.docid) },
317                            &block)
318    end # valuestream(slot)
319
320    # Returns an Array of Xapian::Term objects for the spelling dictionary.
321    def spellings(&block)
322      Xapian._safelyIterate(self._dangerous_spellings_begin(),
323                            self._dangerous_spellings_end(),
324                            lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) },
325                            &block)
326    end # spellings
327
328    # Returns an Array of synonyms of the given term.
329    def synonyms(term, &block)
330      Xapian._safelyIterate(self._dangerous_synonyms_begin(term),
331                            self._dangerous_synonyms_end(term),
332                            lambda { |item| item.term },
333                            &block)
334    end # synonyms
335
336    # Returns an Array of terms with synonyms.
337    def synonym_keys(&block)
338      Xapian._safelyIterate(self._dangerous_synonym_keys_begin(),
339                            self._dangerous_synonym_keys_end(),
340                            lambda { |item| item.term },
341                            &block)
342    end # synonym_keys
343  end # Xapian::Database
344
345  # Refer to the
346  # {Xapian::ValueCountMatchSpy C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1ValueCountMatchSpy.html]
347  # for methods not specific to Ruby.
348  #--
349  # Wrap some dangerous iterators.
350  class Xapian::ValueCountMatchSpy
351    # Returns an Array of all the values seen, in alphabetical order
352    def values(&block)
353      Xapian._safelyIterate(self._dangerous_values_begin(),
354                            self._dangerous_values_end(),
355                            lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) },
356                            &block)
357    end # values
358
359    # Returns an Array of the top values seen, by frequency
360    def top_values(maxvalues, &block)
361      Xapian._safelyIterate(self._dangerous_top_values_begin(maxvalues),
362                            self._dangerous_top_values_end(maxvalues),
363                            lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) },
364                            &block)
365    end # top_values
366  end # Xapian::Database
367
368  # Refer to the
369  # {Xapian::LatLongCoords C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1LatLongCoords.html]
370  # for methods not specific to Ruby.
371  #--
372  # Wrap some dangerous iterators.
373  class Xapian::LatLongCoords
374    # Returns an Array of all the values seen, in alphabetical order
375    def all(&block)
376      Xapian._safelyIterate(self._begin(),
377                            self._end(),
378                            lambda { |item| item.get_coord() },
379                            &block)
380    end # allterms
381  end # Xapian::LatLongCoords
382
383  class Xapian::QueryParser
384    # Returns an Array of all words in the query ignored as stopwords.
385    def stoplist(&block)
386      Xapian._safelyIterate(self._dangerous_stoplist_begin(),
387                            self._dangerous_stoplist_end(),
388                            lambda { |item| item.term },
389                            &block)
390    end # stoplist
391
392    # Returns an Array of all words in the query which stem to a given term.
393    def unstem(term, &block)
394      Xapian._safelyIterate(self._dangerous_unstem_begin(term),
395                            self._dangerous_unstem_end(term),
396                            lambda { |item| item.term },
397                            &block)
398    end # unstem
399  end # Xapian::QueryParser
400
401  # Compatibility wrapping for Xapian::BAD_VALUENO (wrapped as a constant since
402  # xapian-bindings 1.4.10).
403  def Xapian::BAD_VALUENO()
404    return Xapian::BAD_VALUENO
405  end
406
407end # Xapian module
408  # Refer to the
409  # {Xapian::LatLongCoord C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1LatLongCoord.html].
410  class Xapian::LatLongCoord
411  end
412  # Refer to the
413  # {Xapian::MultiValueKeyMaker::KeySpec C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1MultiValueKeyMaker::KeySpec.html].
414  class Xapian::MultiValueKeyMaker::KeySpec
415  end
416