1 /*
2  * Copyright (C) 2017 Matthieu Gautier
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
11  * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
12  * NON-INFRINGEMENT.  See the GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
17  *
18  */
19 
20 #include "xapian/myhtmlparse.h"
21 #include <zim/search_iterator.h>
22 #include <zim/search.h>
23 #include <zim/file.h>
24 #include "search_internal.h"
25 
26 namespace zim {
27 
28 
29 search_iterator::~search_iterator() = default;
30 search_iterator::search_iterator(search_iterator&& it) = default;
31 search_iterator& search_iterator::operator=(search_iterator&& it) = default;
32 
search_iterator()33 search_iterator::search_iterator() : search_iterator(nullptr)
34 {};
35 
search_iterator(InternalData * internal_data)36 search_iterator::search_iterator(InternalData* internal_data)
37   : internal(internal_data)
38 {}
39 
search_iterator(const search_iterator & it)40 search_iterator::search_iterator(const search_iterator& it)
41     : internal(nullptr)
42 {
43     if (it.internal) internal = std::unique_ptr<InternalData>(new InternalData(*it.internal));
44 }
45 
operator =(const search_iterator & it)46 search_iterator & search_iterator::operator=(const search_iterator& it) {
47     if ( ! it.internal ) internal.reset();
48     else if ( ! internal ) internal = std::unique_ptr<InternalData>(new InternalData(*it.internal));
49     else *internal = *it.internal;
50 
51     return *this;
52 }
53 
operator ==(const search_iterator & it) const54 bool search_iterator::operator==(const search_iterator& it) const {
55 #if defined(ENABLE_XAPIAN)
56     if ( ! internal && ! it.internal)
57         return true;
58     if ( ! internal || ! it.internal)
59         return false;
60     return (internal->search == it.internal->search
61          && internal->iterator == it.internal->iterator);
62 #else
63     // If there is no xapian, there is no search. There is only one iterator: end.
64     // So all iterators are equal.
65     return true;
66 #endif
67 }
68 
operator !=(const search_iterator & it) const69 bool search_iterator::operator!=(const search_iterator& it) const {
70     return ! (*this == it);
71 }
72 
operator ++()73 search_iterator& search_iterator::operator++() {
74 #if defined(ENABLE_XAPIAN)
75     if ( ! internal ) {
76         return *this;
77     }
78     ++(internal->iterator);
79     internal->document_fetched = false;
80     internal->article_fetched = false;
81 #endif
82     return *this;
83 }
84 
operator ++(int)85 search_iterator search_iterator::operator++(int) {
86     search_iterator it = *this;
87     operator++();
88     return it;
89 }
90 
operator --()91 search_iterator& search_iterator::operator--() {
92 #if defined(ENABLE_XAPIAN)
93     if ( ! internal ) {
94         return *this;
95     }
96     --(internal->iterator);
97     internal->document_fetched = false;
98     internal->article_fetched = false;
99 #endif
100     return *this;
101 }
102 
operator --(int)103 search_iterator search_iterator::operator--(int) {
104     search_iterator it = *this;
105     operator--();
106     return it;
107 }
108 
get_url() const109 std::string search_iterator::get_url() const {
110 #if defined(ENABLE_XAPIAN)
111     if ( ! internal ) {
112         return "";
113     }
114     return internal->get_document().get_data();
115 #else
116     return "";
117 #endif
118 }
119 
get_title() const120 std::string search_iterator::get_title() const {
121 #if defined(ENABLE_XAPIAN)
122     if ( ! internal ) {
123         return "";
124     }
125     if ( internal->search->valuesmap.empty() )
126     {
127         /* This is the old legacy version. Guess and try */
128         return internal->get_document().get_value(0);
129     }
130     else if ( internal->search->valuesmap.find("title") != internal->search->valuesmap.end() )
131     {
132         return internal->get_document().get_value(internal->search->valuesmap["title"]);
133     }
134 #endif
135     return "";
136 }
137 
get_score() const138 int search_iterator::get_score() const {
139 #if defined(ENABLE_XAPIAN)
140     if ( ! internal ) {
141         return 0;
142     }
143     return internal->iterator.get_percent();
144 #else
145     return 0;
146 #endif
147 }
148 
get_snippet() const149 std::string search_iterator::get_snippet() const {
150 #if defined(ENABLE_XAPIAN)
151     if ( ! internal ) {
152         return "";
153     }
154     if ( internal->search->valuesmap.empty() )
155     {
156         /* This is the old legacy version. Guess and try */
157         std::string stored_snippet = internal->get_document().get_value(1);
158         if ( ! stored_snippet.empty() )
159             return stored_snippet;
160         /* Let's continue here, and see if we can genenate one */
161     }
162     else if ( internal->search->valuesmap.find("snippet") != internal->search->valuesmap.end() )
163     {
164         return internal->get_document().get_value(internal->search->valuesmap["snippet"]);
165     }
166     /* No reader, no snippet */
167     Article& article = internal->get_article();
168     if ( ! article.good() )
169         return "";
170     /* Get the content of the article to generate a snippet.
171        We parse it and use the html dump to avoid remove html tags in the
172        content and be able to nicely cut the text at random place. */
173     zim::MyHtmlParser htmlParser;
174     std::string content = article.getData();
175     try {
176         htmlParser.parse_html(content, "UTF-8", true);
177     } catch (...) {}
178     return internal->search->internal->results.snippet(htmlParser.dump, 500);
179 #else
180     return "";
181 #endif
182 }
183 
get_size() const184 int search_iterator::get_size() const {
185 #if defined(ENABLE_XAPIAN)
186     if ( ! internal ) {
187         return -1;
188     }
189     if ( internal->search->valuesmap.empty() )
190     {
191         /* This is the old legacy version. Guess and try */
192         return internal->get_document().get_value(2).empty() == true ? -1 : atoi(internal->get_document().get_value(2).c_str());
193     }
194     else if ( internal->search->valuesmap.find("size") != internal->search->valuesmap.end() )
195     {
196         return atoi(internal->get_document().get_value(internal->search->valuesmap["size"]).c_str());
197     }
198 #endif
199     /* The size is never used. Do we really want to get the content and
200        calculate the size ? */
201     return -1;
202 }
203 
get_wordCount() const204 int search_iterator::get_wordCount() const      {
205 #if defined(ENABLE_XAPIAN)
206     if ( ! internal ) {
207         return -1;
208     }
209     if ( internal->search->valuesmap.empty() )
210     {
211         /* This is the old legacy version. Guess and try */
212         return internal->get_document().get_value(3).empty() == true ? -1 : atoi(internal->get_document().get_value(3).c_str());
213     }
214     else if ( internal->search->valuesmap.find("wordcount") != internal->search->valuesmap.end() )
215     {
216         return atoi(internal->get_document().get_value(internal->search->valuesmap["wordcount"]).c_str());
217     }
218 #endif
219     return -1;
220 }
221 
get_fileIndex() const222 int search_iterator::get_fileIndex() const {
223 #if defined(ENABLE_XAPIAN)
224     if ( internal ) {
225         return internal->get_databasenumber();
226     }
227 #endif
228     return 0;
229 }
230 
operator *() const231 search_iterator::reference search_iterator::operator*() const {
232     return internal->get_article();
233 }
234 
operator ->() const235 search_iterator::pointer search_iterator::operator->() const {
236     return &internal->get_article();
237 }
238 
239 } // namespace zim
240