1 /*
2 * Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 3 of the License, or
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17 * MA 02110-1301, USA.
18 */
19
20 #include "reader.h"
21 #include <time.h>
22
23 #include <zim/search.h>
24
25 #include "tools/otherTools.h"
26
hi(char v)27 inline char hi(char v)
28 {
29 char hex[] = "0123456789abcdef";
30 return hex[(v >> 4) & 0xf];
31 }
32
lo(char v)33 inline char lo(char v)
34 {
35 char hex[] = "0123456789abcdef";
36 return hex[v & 0xf];
37 }
38
hexUUID(std::string in)39 std::string hexUUID(std::string in)
40 {
41 std::ostringstream out;
42 for (unsigned n = 0; n < 4; ++n) {
43 out << hi(in[n]) << lo(in[n]);
44 }
45 out << '-';
46 for (unsigned n = 4; n < 6; ++n) {
47 out << hi(in[n]) << lo(in[n]);
48 }
49 out << '-';
50 for (unsigned n = 6; n < 8; ++n) {
51 out << hi(in[n]) << lo(in[n]);
52 }
53 out << '-';
54 for (unsigned n = 8; n < 10; ++n) {
55 out << hi(in[n]) << lo(in[n]);
56 }
57 out << '-';
58 for (unsigned n = 10; n < 16; ++n) {
59 out << hi(in[n]) << lo(in[n]);
60 }
61 std::string op = out.str();
62 return op;
63 }
64
65 namespace kiwix
66 {
67 /* Constructor */
Reader(const string zimFilePath)68 Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
69 {
70 string tmpZimFilePath = zimFilePath;
71
72 /* Remove potential trailing zimaa */
73 size_t found = tmpZimFilePath.rfind("zimaa");
74 if (found != string::npos && tmpZimFilePath.size() > 5
75 && found == tmpZimFilePath.size() - 5) {
76 tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
77 }
78
79 this->zimFileHandler = new zim::File(tmpZimFilePath);
80
81 if (this->zimFileHandler != NULL) {
82 this->firstArticleOffset
83 = this->zimFileHandler->getNamespaceBeginOffset('A');
84 this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
85 this->nsACount = this->zimFileHandler->getNamespaceCount('A');
86 this->nsICount = this->zimFileHandler->getNamespaceCount('I');
87 this->zimFilePath = zimFilePath;
88 }
89
90 /* initialize random seed: */
91 srand(time(NULL));
92 }
93
94 /* Destructor */
~Reader()95 Reader::~Reader()
96 {
97 if (this->zimFileHandler != NULL) {
98 delete this->zimFileHandler;
99 }
100 }
101
getZimFileHandler() const102 zim::File* Reader::getZimFileHandler() const
103 {
104 return this->zimFileHandler;
105 }
106
parseCounterMetadata() const107 MimeCounterType Reader::parseCounterMetadata() const
108 {
109 zim::Article article = this->zimFileHandler->getArticle('M', "Counter");
110
111 if (article.good()) {
112 return parseMimetypeCounter(article.getData());
113 }
114
115 return MimeCounterType();
116 }
117
118 /* Get the count of articles which can be indexed/displayed */
getArticleCount() const119 unsigned int Reader::getArticleCount() const
120 {
121 std::map<const std::string, unsigned int> counterMap
122 = this->parseCounterMetadata();
123 unsigned int counter = 0;
124
125 if (counterMap.empty()) {
126 counter = this->nsACount;
127 } else {
128 for(auto &pair:counterMap) {
129 if (startsWith(pair.first, "text/html")) {
130 counter += pair.second;
131 }
132 }
133 }
134
135 return counter;
136 }
137
138 /* Get the count of medias content in the ZIM file */
getMediaCount() const139 unsigned int Reader::getMediaCount() const
140 {
141 std::map<const std::string, unsigned int> counterMap
142 = this->parseCounterMetadata();
143 unsigned int counter = 0;
144
145 if (counterMap.empty()) {
146 counter = this->nsICount;
147 } else {
148 auto it = counterMap.find("image/jpeg");
149 if (it != counterMap.end()) {
150 counter += it->second;
151 }
152
153 it = counterMap.find("image/gif");
154 if (it != counterMap.end()) {
155 counter += it->second;
156 }
157
158 it = counterMap.find("image/png");
159 if (it != counterMap.end()) {
160 counter += it->second;
161 }
162 }
163 return counter;
164 }
165
166 /* Get the total of all items of a ZIM file, redirects included */
getGlobalCount() const167 unsigned int Reader::getGlobalCount() const
168 {
169 return this->zimFileHandler->getCountArticles();
170 }
171
172 /* Return the UID of the ZIM file */
getId() const173 string Reader::getId() const
174 {
175 std::ostringstream s;
176 s << this->zimFileHandler->getFileheader().getUuid();
177 return s.str();
178 }
179
180 /* Return a page url from a title */
getPageUrlFromTitle(const string & title,string & url) const181 bool Reader::getPageUrlFromTitle(const string& title, string& url) const
182 {
183 try {
184 auto entry = getEntryFromTitle(title);
185 entry = entry.getFinalEntry();
186 url = entry.getPath();
187 return true;
188 } catch (NoEntry& e) {
189 return false;
190 }
191 }
192
193 /* Return an URL from a title */
getRandomPageUrl() const194 string Reader::getRandomPageUrl() const
195 {
196 return getRandomPage().getPath();
197 }
198
getRandomPage() const199 Entry Reader::getRandomPage() const
200 {
201 if (!this->zimFileHandler) {
202 throw NoEntry();
203 }
204
205 zim::Article article;
206 std::string mainPagePath = this->getMainPage().getPath();
207 int watchdog = 42;
208
209 do {
210 auto idx = this->firstArticleOffset
211 + (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
212 * this->nsACount);
213 article = zimFileHandler->getArticle(idx);
214 if (!watchdog--) {
215 throw NoEntry();
216 }
217 } while (!article.good() && article.getLongUrl() == mainPagePath);
218
219 return article;
220 }
221
222 /* Return the welcome page URL */
getMainPageUrl() const223 string Reader::getMainPageUrl() const
224 {
225 return getMainPage().getPath();
226 }
227
getMainPage() const228 Entry Reader::getMainPage() const
229 {
230 if (!this->zimFileHandler) {
231 throw NoEntry();
232 }
233
234 zim::Article article;
235 if (this->zimFileHandler->getFileheader().hasMainPage())
236 {
237 article = zimFileHandler->getArticle(
238 this->zimFileHandler->getFileheader().getMainPage());
239 }
240
241 if (!article.good())
242 {
243 return getFirstPage();
244 }
245
246 return article;
247 }
248
getFavicon(string & content,string & mimeType) const249 bool Reader::getFavicon(string& content, string& mimeType) const
250 {
251 static const char* const paths[] = {"-/favicon", "-/favicon.png", "I/favicon.png", "I/favicon"};
252
253 for (auto &path: paths) {
254 try {
255 auto entry = getEntryFromPath(path);
256 entry = entry.getFinalEntry();
257 content = entry.getContent();
258 mimeType = entry.getMimetype();
259 return true;
260 } catch(NoEntry& e) {};
261 }
262
263 return false;
264 }
265
getZimFilePath() const266 string Reader::getZimFilePath() const
267 {
268 return this->zimFilePath;
269 }
270 /* Return a metatag value */
getMetadata(const string & name,string & value) const271 bool Reader::getMetadata(const string& name, string& value) const
272 {
273 try {
274 auto entry = getEntryFromPath("M/"+name);
275 value = entry.getContent();
276 return true;
277 } catch(NoEntry& e) {
278 return false;
279 }
280 }
281
282 #define METADATA(NAME) std::string v; getMetadata(NAME, v); return v;
283
getName() const284 string Reader::getName() const
285 {
286 METADATA("Name")
287 }
288
getTitle() const289 string Reader::getTitle() const
290 {
291 string value;
292 this->getMetadata("Title", value);
293 if (value.empty()) {
294 value = getLastPathElement(zimFileHandler->getFilename());
295 std::replace(value.begin(), value.end(), '_', ' ');
296 size_t pos = value.find(".zim");
297 value = value.substr(0, pos);
298 }
299 return value;
300 }
301
getCreator() const302 string Reader::getCreator() const
303 {
304 METADATA("Creator")
305 }
306
getPublisher() const307 string Reader::getPublisher() const
308 {
309 METADATA("Publisher")
310 }
311
getDate() const312 string Reader::getDate() const
313 {
314 METADATA("Date")
315 }
316
getDescription() const317 string Reader::getDescription() const
318 {
319 string value;
320 this->getMetadata("Description", value);
321
322 /* Mediawiki Collection tends to use the "Subtitle" name */
323 if (value.empty()) {
324 this->getMetadata("Subtitle", value);
325 }
326
327 return value;
328 }
329
getLongDescription() const330 string Reader::getLongDescription() const
331 {
332 METADATA("LongDescription")
333 }
334
getLanguage() const335 string Reader::getLanguage() const
336 {
337 METADATA("Language")
338 }
339
getLicense() const340 string Reader::getLicense() const
341 {
342 METADATA("License")
343 }
344
getTags(bool original) const345 string Reader::getTags(bool original) const
346 {
347 string tags_str;
348 getMetadata("Tags", tags_str);
349 if (original) {
350 return tags_str;
351 }
352 auto tags = convertTags(tags_str);
353 return join(tags, ";");
354 }
355
356
getTagStr(const std::string & tagName) const357 string Reader::getTagStr(const std::string& tagName) const
358 {
359 string tags_str;
360 getMetadata("Tags", tags_str);
361 return getTagValueFromTagList(convertTags(tags_str), tagName);
362 }
363
getTagBool(const std::string & tagName) const364 bool Reader::getTagBool(const std::string& tagName) const
365 {
366 return convertStrToBool(getTagStr(tagName));
367 }
368
getRelation() const369 string Reader::getRelation() const
370 {
371 METADATA("Relation")
372 }
373
getFlavour() const374 string Reader::getFlavour() const
375 {
376 METADATA("Flavour")
377 }
378
getSource() const379 string Reader::getSource() const
380 {
381 METADATA("Source")
382 }
383
getScraper() const384 string Reader::getScraper() const
385 {
386 METADATA("Scraper")
387 }
388 #undef METADATA
389
getOrigId() const390 string Reader::getOrigId() const
391 {
392 string value;
393 this->getMetadata("startfileuid", value);
394 if (value.empty()) {
395 return "";
396 }
397 std::string id = value;
398 std::string origID;
399 std::string temp = "";
400 unsigned int k = 0;
401 char tempArray[16] = "";
402 for (unsigned int i = 0; i < id.size(); i++) {
403 if (id[i] == '\n') {
404 tempArray[k] = atoi(temp.c_str());
405 temp = "";
406 k++;
407 } else {
408 temp += id[i];
409 }
410 }
411 origID = hexUUID(tempArray);
412 return origID;
413 }
414
415 /* Return the first page URL */
getFirstPageUrl() const416 string Reader::getFirstPageUrl() const
417 {
418 return getFirstPage().getPath();
419 }
420
getFirstPage() const421 Entry Reader::getFirstPage() const
422 {
423 if (!this->zimFileHandler) {
424 throw NoEntry();
425 }
426
427 auto firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
428 auto article = zimFileHandler->getArticle(firstPageOffset);
429
430 if (! article.good()) {
431 throw NoEntry();
432 }
433
434 return article;
435 }
436
_parseUrl(const string & url,char * ns,string & title)437 bool _parseUrl(const string& url, char* ns, string& title)
438 {
439 /* Offset to visit the url */
440 unsigned int urlLength = url.size();
441 unsigned int offset = 0;
442
443 /* Ignore the first '/' */
444 if (url[offset] == '/')
445 offset++;
446
447 if (url[offset] == '/' || offset >= urlLength)
448 return false;
449
450 /* Get namespace */
451 *ns = url[offset++];
452
453 if (url[offset] != '/' || offset >= urlLength)
454 return false;
455
456 offset++;
457
458 if ( offset >= urlLength)
459 return false;
460
461 /* Get content title */
462 title = url.substr(offset, urlLength - offset);
463
464 return true;
465 }
466
parseUrl(const string & url,char * ns,string & title) const467 bool Reader::parseUrl(const string& url, char* ns, string& title) const
468 {
469 return _parseUrl(url, ns, title);
470 }
471
getEntryFromPath(const std::string & path) const472 Entry Reader::getEntryFromPath(const std::string& path) const
473 {
474 char ns = 0;
475 std::string short_url;
476
477 if (!this->zimFileHandler) {
478 throw NoEntry();
479 }
480 _parseUrl(path, &ns, short_url);
481
482 if (short_url.empty() && ns == 0) {
483 return getMainPage();
484 }
485
486 auto article = zimFileHandler->getArticle(ns, short_url);
487 if (!article.good()) {
488 throw NoEntry();
489 }
490
491 return article;
492 }
493
getEntryFromEncodedPath(const std::string & path) const494 Entry Reader::getEntryFromEncodedPath(const std::string& path) const
495 {
496 return getEntryFromPath(urlDecode(path, true));
497 }
498
getEntryFromTitle(const std::string & title) const499 Entry Reader::getEntryFromTitle(const std::string& title) const
500 {
501 if (!this->zimFileHandler) {
502 throw NoEntry();
503 }
504
505 auto article = this->zimFileHandler->getArticleByTitle('A', title);
506 if (!article.good()) {
507 throw NoEntry();
508 }
509
510 return article;
511 }
512
513 /* Return article by url */
getArticleObjectByDecodedUrl(const string & url,zim::Article & article) const514 bool Reader::getArticleObjectByDecodedUrl(const string& url,
515 zim::Article& article) const
516 {
517 if (this->zimFileHandler == NULL) {
518 return false;
519 }
520
521 /* Parse the url */
522 char ns = 0;
523 string urlStr;
524 _parseUrl(url, &ns, urlStr);
525
526 /* Main page */
527 if (urlStr.empty() && ns == 0) {
528 _parseUrl(this->getMainPage().getPath(), &ns, urlStr);
529 }
530
531 /* Extract the content from the zim file */
532 article = zimFileHandler->getArticle(ns, urlStr);
533 return article.good();
534 }
535
536 /* Return the mimeType without the content */
getMimeTypeByUrl(const string & url,string & mimeType) const537 bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const
538 {
539 try {
540 auto entry = getEntryFromPath(url);
541 mimeType = entry.getMimetype();
542 return true;
543 } catch (NoEntry& e) {
544 mimeType = "";
545 return false;
546 }
547 }
548
get_content_by_decoded_url(const Reader & reader,const string & url,string & content,string & title,unsigned int & contentLength,string & contentType,string & baseUrl)549 bool get_content_by_decoded_url(const Reader& reader,
550 const string& url,
551 string& content,
552 string& title,
553 unsigned int& contentLength,
554 string& contentType,
555 string& baseUrl)
556 {
557 content = "";
558 contentType = "";
559 contentLength = 0;
560
561 try {
562 auto entry = reader.getEntryFromPath(url);
563 entry = entry.getFinalEntry();
564 baseUrl = entry.getPath();
565 contentType = entry.getMimetype();
566 content = entry.getContent();
567 contentLength = entry.getSize();
568 title = entry.getTitle();
569
570 /* Try to set a stub HTML header/footer if necesssary */
571 if (contentType.find("text/html") != string::npos
572 && content.find("<body") == std::string::npos
573 && content.find("<BODY") == std::string::npos) {
574 content = "<html><head><title>" + title +
575 "</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
576 "charset=utf-8\" /></head><body>" +
577 content + "</body></html>";
578 }
579 return true;
580 } catch (NoEntry& e) {
581 return false;
582 }
583 }
584
585
586 /* Get a content from a zim file */
getContentByUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType) const587 bool Reader::getContentByUrl(const string& url,
588 string& content,
589 string& title,
590 unsigned int& contentLength,
591 string& contentType) const
592 {
593 std::string stubRedirectUrl;
594 return get_content_by_decoded_url(*this,
595 kiwix::urlDecode(url),
596 content,
597 title,
598 contentLength,
599 contentType,
600 stubRedirectUrl);
601 }
602
getContentByEncodedUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType,string & baseUrl) const603 bool Reader::getContentByEncodedUrl(const string& url,
604 string& content,
605 string& title,
606 unsigned int& contentLength,
607 string& contentType,
608 string& baseUrl) const
609 {
610 return get_content_by_decoded_url(*this,
611 kiwix::urlDecode(url),
612 content,
613 title,
614 contentLength,
615 contentType,
616 baseUrl);
617 }
618
getContentByEncodedUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType) const619 bool Reader::getContentByEncodedUrl(const string& url,
620 string& content,
621 string& title,
622 unsigned int& contentLength,
623 string& contentType) const
624 {
625 std::string stubRedirectUrl;
626 return get_content_by_decoded_url(*this,
627 kiwix::urlDecode(url),
628 content,
629 title,
630 contentLength,
631 contentType,
632 stubRedirectUrl);
633 }
634
getContentByDecodedUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType) const635 bool Reader::getContentByDecodedUrl(const string& url,
636 string& content,
637 string& title,
638 unsigned int& contentLength,
639 string& contentType) const
640 {
641 std::string stubRedirectUrl;
642 return get_content_by_decoded_url(*this,
643 url,
644 content,
645 title,
646 contentLength,
647 contentType,
648 stubRedirectUrl);
649 }
650
getContentByDecodedUrl(const string & url,string & content,string & title,unsigned int & contentLength,string & contentType,string & baseUrl) const651 bool Reader::getContentByDecodedUrl(const string& url,
652 string& content,
653 string& title,
654 unsigned int& contentLength,
655 string& contentType,
656 string& baseUrl) const
657 {
658 return get_content_by_decoded_url(*this,
659 url,
660 content,
661 title,
662 contentLength,
663 contentType,
664 baseUrl);
665 }
666
667 /* Check if an article exists */
urlExists(const string & url) const668 bool Reader::urlExists(const string& url) const
669 {
670 return pathExists(url);
671 }
672
pathExists(const string & path) const673 bool Reader::pathExists(const string& path) const
674 {
675 if (!zimFileHandler)
676 {
677 return false;
678 }
679
680 char ns = 0;
681 string titleStr;
682 _parseUrl(path, &ns, titleStr);
683 zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
684 return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
685 }
686
687 /* Does the ZIM file has a fulltext index */
hasFulltextIndex() const688 bool Reader::hasFulltextIndex() const
689 {
690 if (!zimFileHandler || zimFileHandler->is_multiPart() )
691 {
692 return false;
693 }
694
695 return ( pathExists("Z//fulltextIndex/xapian")
696 || pathExists("X/fulltext/xapian"));
697 }
698
699 /* Search titles by prefix */
700
searchSuggestions(const string & prefix,unsigned int suggestionsCount,const bool reset)701 bool Reader::searchSuggestions(const string& prefix,
702 unsigned int suggestionsCount,
703 const bool reset)
704 {
705 /* Reset the suggestions otherwise check if the suggestions number is less
706 * than the suggestionsCount */
707 if (reset) {
708 this->suggestions.clear();
709 this->suggestionsOffset = this->suggestions.begin();
710 } else {
711 if (this->suggestions.size() > suggestionsCount) {
712 return false;
713 }
714 }
715
716 auto ret = searchSuggestions(prefix, suggestionsCount, this->suggestions);
717
718 /* Set the cursor to the begining */
719 this->suggestionsOffset = this->suggestions.begin();
720
721 return ret;
722 }
723
724
searchSuggestions(const string & prefix,unsigned int suggestionsCount,SuggestionsList_t & results)725 bool Reader::searchSuggestions(const string& prefix,
726 unsigned int suggestionsCount,
727 SuggestionsList_t& results)
728 {
729 bool retVal = false;
730
731 /* Return if no prefix */
732 if (prefix.size() == 0) {
733 return false;
734 }
735
736 for (auto articleItr = zimFileHandler->findByTitle('A', prefix);
737 articleItr != zimFileHandler->end()
738 && articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
739 && results.size() < suggestionsCount;
740 ++articleItr) {
741 /* Extract the interesting part of article title & url */
742 std::string normalizedArticleTitle
743 = kiwix::normalize(articleItr->getTitle());
744 std::string articleFinalUrl = "/A/" + articleItr->getUrl();
745 if (articleItr->isRedirect()) {
746 zim::Article article = *articleItr;
747 unsigned int loopCounter = 0;
748 while (article.isRedirect() && loopCounter++ < 42) {
749 article = article.getRedirectArticle();
750 }
751 articleFinalUrl = "/A/" + article.getUrl();
752 }
753
754 /* Go through all already found suggestions and skip if this
755 article is already in the suggestions list (with an other
756 title) */
757 bool insert = true;
758 std::vector<std::vector<std::string>>::iterator suggestionItr;
759 for (suggestionItr = results.begin();
760 suggestionItr != results.end();
761 suggestionItr++) {
762 int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
763 if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
764 insert = false;
765 break;
766 } else if (result < 0) {
767 break;
768 }
769 }
770
771 /* Insert if possible */
772 if (insert) {
773 std::vector<std::string> suggestion;
774 suggestion.push_back(articleItr->getTitle());
775 suggestion.push_back(articleFinalUrl);
776 suggestion.push_back(normalizedArticleTitle);
777 results.insert(suggestionItr, suggestion);
778 }
779
780 /* Suggestions where found */
781 retVal = true;
782 }
783
784 return retVal;
785 }
786
getTitleVariants(const std::string & title) const787 std::vector<std::string> Reader::getTitleVariants(
788 const std::string& title) const
789 {
790 std::vector<std::string> variants;
791 variants.push_back(title);
792 variants.push_back(kiwix::ucFirst(title));
793 variants.push_back(kiwix::lcFirst(title));
794 variants.push_back(kiwix::toTitle(title));
795 return variants;
796 }
797
798
searchSuggestionsSmart(const string & prefix,unsigned int suggestionsCount)799 bool Reader::searchSuggestionsSmart(const string& prefix,
800 unsigned int suggestionsCount)
801 {
802 this->suggestions.clear();
803 this->suggestionsOffset = this->suggestions.begin();
804
805 auto ret = searchSuggestionsSmart(prefix, suggestionsCount, this->suggestions);
806
807 this->suggestionsOffset = this->suggestions.begin();
808
809 return ret;
810 }
811
812 /* Try also a few variations of the prefix to have better results */
searchSuggestionsSmart(const string & prefix,unsigned int suggestionsCount,SuggestionsList_t & results)813 bool Reader::searchSuggestionsSmart(const string& prefix,
814 unsigned int suggestionsCount,
815 SuggestionsList_t& results)
816 {
817 std::vector<std::string> variants = this->getTitleVariants(prefix);
818 bool retVal = false;
819
820 /* Try to search in the title using fulltext search database */
821 const auto suggestionSearch
822 = this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount);
823
824 if (suggestionSearch->get_matches_estimated()) {
825 for (auto current = suggestionSearch->begin();
826 current != suggestionSearch->end();
827 current++) {
828 if (!current->good()) {
829 continue;
830 }
831 std::vector<std::string> suggestion;
832 suggestion.push_back(current->getTitle());
833 suggestion.push_back("/A/" + current->getUrl());
834 suggestion.push_back(kiwix::normalize(current->getTitle()));
835 results.push_back(suggestion);
836 }
837 retVal = true;
838 } else {
839 for (std::vector<std::string>::iterator variantsItr = variants.begin();
840 variantsItr != variants.end();
841 variantsItr++) {
842 retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results)
843 || retVal;
844 }
845 }
846
847 return retVal;
848 }
849
850 /* Get next suggestion */
getNextSuggestion(string & title)851 bool Reader::getNextSuggestion(string& title)
852 {
853 if (this->suggestionsOffset != this->suggestions.end()) {
854 /* title */
855 title = (*(this->suggestionsOffset))[0];
856
857 /* increment the cursor for the next call */
858 this->suggestionsOffset++;
859
860 return true;
861 }
862
863 return false;
864 }
865
getNextSuggestion(string & title,string & url)866 bool Reader::getNextSuggestion(string& title, string& url)
867 {
868 if (this->suggestionsOffset != this->suggestions.end()) {
869 /* title */
870 title = (*(this->suggestionsOffset))[0];
871 url = (*(this->suggestionsOffset))[1];
872
873 /* increment the cursor for the next call */
874 this->suggestionsOffset++;
875
876 return true;
877 }
878
879 return false;
880 }
881
882 /* Check if the file has as checksum */
canCheckIntegrity() const883 bool Reader::canCheckIntegrity() const
884 {
885 return this->zimFileHandler->getChecksum() != "";
886 }
887
888 /* Return true if corrupted, false otherwise */
isCorrupted() const889 bool Reader::isCorrupted() const
890 {
891 try {
892 if (this->zimFileHandler->verify() == true) {
893 return false;
894 }
895 } catch (exception& e) {
896 cerr << e.what() << endl;
897 return true;
898 }
899
900 return true;
901 }
902
903 /* Return the file size, works also for splitted files */
getFileSize() const904 unsigned int Reader::getFileSize() const
905 {
906 zim::File* file = this->getZimFileHandler();
907 zim::size_type size = 0;
908
909 if (file != NULL) {
910 size = file->getFilesize();
911 }
912
913 return (size / 1024);
914 }
915 }
916