1 /*
2 * Copyright (C) 2017 Matthieu Gautier
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
11 * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
12 * NON-INFRINGEMENT. See the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 */
19
20 #include "xapian/myhtmlparse.h"
21 #include <zim/search_iterator.h>
22 #include <zim/search.h>
23 #include <zim/file.h>
24 #include "search_internal.h"
25
26 namespace zim {
27
28
29 search_iterator::~search_iterator() = default;
30 search_iterator::search_iterator(search_iterator&& it) = default;
31 search_iterator& search_iterator::operator=(search_iterator&& it) = default;
32
search_iterator()33 search_iterator::search_iterator() : search_iterator(nullptr)
34 {};
35
search_iterator(InternalData * internal_data)36 search_iterator::search_iterator(InternalData* internal_data)
37 : internal(internal_data)
38 {}
39
search_iterator(const search_iterator & it)40 search_iterator::search_iterator(const search_iterator& it)
41 : internal(nullptr)
42 {
43 if (it.internal) internal = std::unique_ptr<InternalData>(new InternalData(*it.internal));
44 }
45
operator =(const search_iterator & it)46 search_iterator & search_iterator::operator=(const search_iterator& it) {
47 if ( ! it.internal ) internal.reset();
48 else if ( ! internal ) internal = std::unique_ptr<InternalData>(new InternalData(*it.internal));
49 else *internal = *it.internal;
50
51 return *this;
52 }
53
operator ==(const search_iterator & it) const54 bool search_iterator::operator==(const search_iterator& it) const {
55 #if defined(ENABLE_XAPIAN)
56 if ( ! internal && ! it.internal)
57 return true;
58 if ( ! internal || ! it.internal)
59 return false;
60 return (internal->search == it.internal->search
61 && internal->iterator == it.internal->iterator);
62 #else
63 // If there is no xapian, there is no search. There is only one iterator: end.
64 // So all iterators are equal.
65 return true;
66 #endif
67 }
68
operator !=(const search_iterator & it) const69 bool search_iterator::operator!=(const search_iterator& it) const {
70 return ! (*this == it);
71 }
72
operator ++()73 search_iterator& search_iterator::operator++() {
74 #if defined(ENABLE_XAPIAN)
75 if ( ! internal ) {
76 return *this;
77 }
78 ++(internal->iterator);
79 internal->document_fetched = false;
80 internal->article_fetched = false;
81 #endif
82 return *this;
83 }
84
operator ++(int)85 search_iterator search_iterator::operator++(int) {
86 search_iterator it = *this;
87 operator++();
88 return it;
89 }
90
operator --()91 search_iterator& search_iterator::operator--() {
92 #if defined(ENABLE_XAPIAN)
93 if ( ! internal ) {
94 return *this;
95 }
96 --(internal->iterator);
97 internal->document_fetched = false;
98 internal->article_fetched = false;
99 #endif
100 return *this;
101 }
102
operator --(int)103 search_iterator search_iterator::operator--(int) {
104 search_iterator it = *this;
105 operator--();
106 return it;
107 }
108
get_url() const109 std::string search_iterator::get_url() const {
110 #if defined(ENABLE_XAPIAN)
111 if ( ! internal ) {
112 return "";
113 }
114 return internal->get_document().get_data();
115 #else
116 return "";
117 #endif
118 }
119
get_title() const120 std::string search_iterator::get_title() const {
121 #if defined(ENABLE_XAPIAN)
122 if ( ! internal ) {
123 return "";
124 }
125 if ( internal->search->valuesmap.empty() )
126 {
127 /* This is the old legacy version. Guess and try */
128 return internal->get_document().get_value(0);
129 }
130 else if ( internal->search->valuesmap.find("title") != internal->search->valuesmap.end() )
131 {
132 return internal->get_document().get_value(internal->search->valuesmap["title"]);
133 }
134 #endif
135 return "";
136 }
137
get_score() const138 int search_iterator::get_score() const {
139 #if defined(ENABLE_XAPIAN)
140 if ( ! internal ) {
141 return 0;
142 }
143 return internal->iterator.get_percent();
144 #else
145 return 0;
146 #endif
147 }
148
get_snippet() const149 std::string search_iterator::get_snippet() const {
150 #if defined(ENABLE_XAPIAN)
151 if ( ! internal ) {
152 return "";
153 }
154 if ( internal->search->valuesmap.empty() )
155 {
156 /* This is the old legacy version. Guess and try */
157 std::string stored_snippet = internal->get_document().get_value(1);
158 if ( ! stored_snippet.empty() )
159 return stored_snippet;
160 /* Let's continue here, and see if we can genenate one */
161 }
162 else if ( internal->search->valuesmap.find("snippet") != internal->search->valuesmap.end() )
163 {
164 return internal->get_document().get_value(internal->search->valuesmap["snippet"]);
165 }
166 /* No reader, no snippet */
167 Article& article = internal->get_article();
168 if ( ! article.good() )
169 return "";
170 /* Get the content of the article to generate a snippet.
171 We parse it and use the html dump to avoid remove html tags in the
172 content and be able to nicely cut the text at random place. */
173 zim::MyHtmlParser htmlParser;
174 std::string content = article.getData();
175 try {
176 htmlParser.parse_html(content, "UTF-8", true);
177 } catch (...) {}
178 return internal->search->internal->results.snippet(htmlParser.dump, 500);
179 #else
180 return "";
181 #endif
182 }
183
get_size() const184 int search_iterator::get_size() const {
185 #if defined(ENABLE_XAPIAN)
186 if ( ! internal ) {
187 return -1;
188 }
189 if ( internal->search->valuesmap.empty() )
190 {
191 /* This is the old legacy version. Guess and try */
192 return internal->get_document().get_value(2).empty() == true ? -1 : atoi(internal->get_document().get_value(2).c_str());
193 }
194 else if ( internal->search->valuesmap.find("size") != internal->search->valuesmap.end() )
195 {
196 return atoi(internal->get_document().get_value(internal->search->valuesmap["size"]).c_str());
197 }
198 #endif
199 /* The size is never used. Do we really want to get the content and
200 calculate the size ? */
201 return -1;
202 }
203
get_wordCount() const204 int search_iterator::get_wordCount() const {
205 #if defined(ENABLE_XAPIAN)
206 if ( ! internal ) {
207 return -1;
208 }
209 if ( internal->search->valuesmap.empty() )
210 {
211 /* This is the old legacy version. Guess and try */
212 return internal->get_document().get_value(3).empty() == true ? -1 : atoi(internal->get_document().get_value(3).c_str());
213 }
214 else if ( internal->search->valuesmap.find("wordcount") != internal->search->valuesmap.end() )
215 {
216 return atoi(internal->get_document().get_value(internal->search->valuesmap["wordcount"]).c_str());
217 }
218 #endif
219 return -1;
220 }
221
get_fileIndex() const222 int search_iterator::get_fileIndex() const {
223 #if defined(ENABLE_XAPIAN)
224 if ( internal ) {
225 return internal->get_databasenumber();
226 }
227 #endif
228 return 0;
229 }
230
operator *() const231 search_iterator::reference search_iterator::operator*() const {
232 return internal->get_article();
233 }
234
operator ->() const235 search_iterator::pointer search_iterator::operator->() const {
236 return &internal->get_article();
237 }
238
239 } // namespace zim
240