1 /* Copyright (C) 2007-2019 J.F.Dockes
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the
14 * Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17 #include "autoconfig.h"
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <math.h>
22 #include <time.h>
23 #include <stdint.h>
24
25 #include <sstream>
26 #include <iostream>
27 #include <list>
28 using std::ostringstream;
29 using std::endl;
30 using std::list;
31
32 #include "cstr.h"
33 #include "reslistpager.h"
34 #include "log.h"
35 #include "rclconfig.h"
36 #include "smallut.h"
37 #include "rclutil.h"
38 #include "plaintorich.h"
39 #include "mimehandler.h"
40 #include "transcode.h"
41
42 // Default highlighter. No need for locking, this is query-only.
43 static const string cstr_hlfontcolor("<span style='color: blue;'>");
44 static const string cstr_hlendfont("</span>");
45 class PlainToRichHtReslist : public PlainToRich {
46 public:
startMatch(unsigned int)47 virtual string startMatch(unsigned int) {
48 return cstr_hlfontcolor;
49 }
endMatch()50 virtual string endMatch() {
51 return cstr_hlendfont;
52 }
53 };
54 static PlainToRichHtReslist g_hiliter;
55
ResListPager(int pagesize,bool alwaysSnippets)56 ResListPager::ResListPager(int pagesize, bool alwaysSnippets)
57 : m_pagesize(pagesize),
58 m_alwaysSnippets(alwaysSnippets),
59 m_newpagesize(pagesize),
60 m_resultsInCurrentPage(0),
61 m_winfirst(-1),
62 m_hasNext(true),
63 m_hiliter(&g_hiliter)
64 {
65 }
66
resultPageNext()67 void ResListPager::resultPageNext()
68 {
69 if (!m_docSource) {
70 LOGDEB("ResListPager::resultPageNext: null source\n");
71 return;
72 }
73
74 int resCnt = m_docSource->getResCnt();
75 LOGDEB("ResListPager::resultPageNext: rescnt " << resCnt <<
76 ", winfirst " << m_winfirst << "\n");
77
78 if (m_winfirst < 0) {
79 m_winfirst = 0;
80 } else {
81 m_winfirst += int(m_respage.size());
82 }
83 // Get the next page of results. Note that we look ahead by one to
84 // determine if there is actually a next page
85 vector<ResListEntry> npage;
86 int pagelen = m_docSource->getSeqSlice(m_winfirst, m_pagesize + 1, npage);
87
88 // If page was truncated, there is no next
89 m_hasNext = (pagelen == m_pagesize + 1);
90
91 // Get rid of the possible excess result
92 if (pagelen == m_pagesize + 1) {
93 npage.resize(m_pagesize);
94 pagelen--;
95 }
96
97 if (pagelen <= 0) {
98 // No results ? This can only happen on the first page or if the
99 // actual result list size is a multiple of the page pref (else
100 // there would have been no Next on the last page)
101 if (m_winfirst > 0) {
102 // Have already results. Let them show, just disable the
103 // Next button. We'd need to remove the Next link from the page
104 // too.
105 // Restore the m_winfirst value, let the current result vector alone
106 m_winfirst -= int(m_respage.size());
107 } else {
108 // No results at all (on first page)
109 m_winfirst = -1;
110 }
111 return;
112 }
113 m_resultsInCurrentPage = pagelen;
114 m_respage = npage;
115 }
maybeEscapeHtml(const string & fld)116 static string maybeEscapeHtml(const string& fld)
117 {
118 if (fld.compare(0, cstr_fldhtm.size(), cstr_fldhtm))
119 return escapeHtml(fld);
120 else
121 return fld.substr(cstr_fldhtm.size());
122 }
123
124
resultPageFor(int docnum)125 void ResListPager::resultPageFor(int docnum)
126 {
127 if (!m_docSource) {
128 LOGDEB("ResListPager::resultPageFor: null source\n");
129 return;
130 }
131
132 int resCnt = m_docSource->getResCnt();
133 LOGDEB("ResListPager::resultPageFor(" << docnum << "): rescnt " <<
134 resCnt << ", winfirst " << m_winfirst << "\n");
135 m_winfirst = (docnum / m_pagesize) * m_pagesize;
136
137 // Get the next page of results.
138 vector<ResListEntry> npage;
139 int pagelen = m_docSource->getSeqSlice(m_winfirst, m_pagesize, npage);
140
141 // If page was truncated, there is no next
142 m_hasNext = (pagelen == m_pagesize);
143
144 if (pagelen <= 0) {
145 m_winfirst = -1;
146 return;
147 }
148 m_respage = npage;
149 }
150
displayDoc(RclConfig * config,int i,Rcl::Doc & doc,const HighlightData & hdata,const string & sh)151 void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
152 const HighlightData& hdata, const string& sh)
153 {
154 ostringstream chunk;
155
156 // Determine icon to display if any
157 string iconurl = iconUrl(config, doc);
158
159 // Printable url: either utf-8 if transcoding succeeds, or url-encoded
160 string url;
161 printableUrl(config->getDefCharset(), doc.url, url);
162
163 // Same as url, but with file:// possibly stripped. output by %u instead
164 // of %U.
165 string urlOrLocal;
166 urlOrLocal = fileurltolocalpath(url);
167 if (urlOrLocal.empty())
168 urlOrLocal = url;
169
170 // Make title out of file name if none yet
171 string titleOrFilename;
172 string utf8fn;
173 doc.getmeta(Rcl::Doc::keytt, &titleOrFilename);
174 doc.getmeta(Rcl::Doc::keyfn, &utf8fn);
175 if (utf8fn.empty()) {
176 utf8fn = path_getsimple(url);
177 }
178 if (titleOrFilename.empty()) {
179 titleOrFilename = utf8fn;
180 }
181
182 // Url for the parent directory. We strip the file:// part for local
183 // paths
184 string parenturl = url_parentfolder(url);
185 {
186 string localpath = fileurltolocalpath(parenturl);
187 if (!localpath.empty())
188 parenturl = localpath;
189 }
190
191 // Result number
192 char numbuf[20];
193 int docnumforlinks = m_winfirst + 1 + i;
194 sprintf(numbuf, "%d", docnumforlinks);
195
196 // Document date: either doc or file modification times
197 string datebuf;
198 if (!doc.dmtime.empty() || !doc.fmtime.empty()) {
199 time_t mtime = doc.dmtime.empty() ?
200 atoll(doc.fmtime.c_str()) : atoll(doc.dmtime.c_str());
201 struct tm *tm = localtime(&mtime);
202 datebuf = utf8datestring(dateFormat(), tm);
203 }
204
205 // Size information. We print both doc and file if they differ a lot
206 int64_t fsize = -1, dsize = -1;
207 if (!doc.dbytes.empty())
208 dsize = static_cast<int64_t>(atoll(doc.dbytes.c_str()));
209 if (!doc.fbytes.empty())
210 fsize = static_cast<int64_t>(atoll(doc.fbytes.c_str()));
211 string sizebuf;
212 if (dsize > 0) {
213 sizebuf = displayableBytes(dsize);
214 if (fsize > 10 * dsize && fsize - dsize > 1000)
215 sizebuf += string(" / ") + displayableBytes(fsize);
216 } else if (fsize >= 0) {
217 sizebuf = displayableBytes(fsize);
218 }
219
220 string richabst;
221 bool needabstract = parFormat().find("%A") != string::npos;
222 if (needabstract && m_docSource) {
223 vector<string> vabs;
224 m_docSource->getAbstract(doc, vabs);
225 m_hiliter->set_inputhtml(false);
226
227 for (vector<string>::const_iterator it = vabs.begin();
228 it != vabs.end(); it++) {
229 if (!it->empty()) {
230 // No need to call escapeHtml(), plaintorich handles it
231 list<string> lr;
232 // There may be data like page numbers before the snippet text.
233 // will be in brackets.
234 string::size_type bckt = it->find("]");
235 if (bckt == string::npos) {
236 m_hiliter->plaintorich(*it, lr, hdata);
237 } else {
238 m_hiliter->plaintorich(it->substr(bckt), lr, hdata);
239 lr.front() = it->substr(0, bckt) + lr.front();
240 }
241 richabst += lr.front();
242 richabst += absSep();
243 }
244 }
245 }
246
247 // Links; Uses utilities from mimehandler.h
248 ostringstream linksbuf;
249 if (canIntern(&doc, config)) {
250 linksbuf << "<a href=\""<< linkPrefix()<< "P" << docnumforlinks << "\">"
251 << trans("Preview") << "</a> ";
252 }
253 if (canOpen(&doc, config)) {
254 linksbuf << "<a href=\"" <<linkPrefix() + "E" <<docnumforlinks << "\">"
255 << trans("Open") << "</a>";
256 }
257 ostringstream snipsbuf;
258 if (m_alwaysSnippets || doc.haspages) {
259 snipsbuf << "<a href=\"" <<linkPrefix()<<"A" << docnumforlinks << "\">"
260 << trans("Snippets") << "</a> ";
261 linksbuf << " " << snipsbuf.str();
262 }
263
264 string collapscnt;
265 if (doc.getmeta(Rcl::Doc::keycc, &collapscnt) && !collapscnt.empty()) {
266 ostringstream collpsbuf;
267 int clc = atoi(collapscnt.c_str()) + 1;
268 collpsbuf << "<a href=\""<<linkPrefix()<<"D" << docnumforlinks << "\">"
269 << trans("Dups") << "(" << clc << ")" << "</a> ";
270 linksbuf << " " << collpsbuf.str();
271 }
272
273 // Build the result list paragraph:
274
275 // Subheader: this is used by history
276 if (!sh.empty())
277 chunk << "<p style='clear: both;'><b>" << sh << "</p>\n<p>";
278 else
279 chunk << "<p style='margin: 0px;padding: 0px;clear: both;'>";
280
281 char xdocidbuf[100];
282 sprintf(xdocidbuf, "%lu", doc.xdocid);
283
284 // Configurable stuff
285 map<string, string> subs;
286 subs["A"] = !richabst.empty() ? richabst : "";
287 subs["D"] = datebuf;
288 subs["E"] = snipsbuf.str();
289 subs["I"] = iconurl;
290 subs["i"] = doc.ipath;
291 subs["K"] = !doc.meta[Rcl::Doc::keykw].empty() ?
292 string("[") + maybeEscapeHtml(doc.meta[Rcl::Doc::keykw]) + "]" : "";
293 subs["L"] = linksbuf.str();
294 subs["N"] = numbuf;
295 subs["M"] = doc.mimetype;
296 subs["P"] = parenturl;
297 subs["R"] = doc.meta[Rcl::Doc::keyrr];
298 subs["S"] = sizebuf;
299 subs["T"] = maybeEscapeHtml(titleOrFilename);
300 subs["t"] = maybeEscapeHtml(doc.meta[Rcl::Doc::keytt]);
301 subs["U"] = url;
302 subs["u"] = urlOrLocal;
303 subs["x"] = xdocidbuf;
304
305 // Let %(xx) access all metadata. HTML-neuter everything:
306 for (const auto& entry : doc.meta) {
307 if (!entry.first.empty())
308 subs[entry.first] = maybeEscapeHtml(entry.second);
309 }
310
311 string formatted;
312 pcSubst(parFormat(), formatted, subs);
313 chunk << formatted;
314
315 chunk << "</p>" << endl;
316 // This was to force qt 4.x to clear the margins (which it should do
317 // anyway because of the paragraph's style), but we finally took
318 // the table approach for 1.15 for now (in guiutils.cpp)
319 // chunk << "<br style='clear:both;height:0;line-height:0;'>" << endl;
320
321 LOGDEB2("Chunk: [" << chunk.rdbuf()->str() << "]\n");
322 append(chunk.rdbuf()->str(), i, doc);
323 }
324
getDoc(int num,Rcl::Doc & doc)325 bool ResListPager::getDoc(int num, Rcl::Doc& doc)
326 {
327 if (m_winfirst < 0 || m_respage.size() == 0)
328 return false;
329 if (num < m_winfirst || num >= m_winfirst + int(m_respage.size()))
330 return false;
331 doc = m_respage[num-m_winfirst].doc;
332 return true;
333 }
334
displayPage(RclConfig * config)335 void ResListPager::displayPage(RclConfig *config)
336 {
337 LOGDEB("ResListPager::displayPage. linkPrefix: " << linkPrefix() << "\n");
338 if (!m_docSource) {
339 LOGDEB("ResListPager::displayPage: null source\n");
340 return;
341 }
342 if (m_winfirst < 0 && !pageEmpty()) {
343 LOGDEB("ResListPager::displayPage: sequence error: winfirst < 0\n");
344 return;
345 }
346
347 ostringstream chunk;
348
349 // Display list header
350 // We could use a <title> but the textedit doesnt display
351 // it prominently
352 // Note: have to append text in chunks that make sense
353 // html-wise. If we break things up too much, the editor
354 // gets confused. Hence the use of the 'chunk' text
355 // accumulator
356 // Also note that there can be results beyond the estimated resCnt.
357 chunk << "<html><head>" << endl
358 << "<meta http-equiv=\"content-type\""
359 << " content=\"text/html; charset=utf-8\">" << endl
360 << headerContent()
361 << "</head><body " << bodyAttrs() << ">" << endl
362 << pageTop()
363 << "<p><span style=\"font-size:110%;\"><b>"
364 << m_docSource->title()
365 << "</b></span> ";
366
367 if (pageEmpty()) {
368 chunk << trans("<p><b>No results found</b><br>");
369 string reason = m_docSource->getReason();
370 if (!reason.empty()) {
371 chunk << "<blockquote>" << escapeHtml(reason) <<
372 "</blockquote></p>";
373 } else {
374 HighlightData hldata;
375 m_docSource->getTerms(hldata);
376 vector<string> uterms(hldata.uterms.begin(), hldata.uterms.end());
377 if (!uterms.empty()) {
378 map<string, vector<string> > spellings;
379 suggest(uterms, spellings);
380 if (!spellings.empty()) {
381 if (o_index_stripchars) {
382 chunk <<
383 trans("<p><i>Alternate spellings (accents suppressed): </i>")
384 << "<br /><blockquote>";
385 } else {
386 chunk <<
387 trans("<p><i>Alternate spellings: </i>")
388 << "<br /><blockquote>";
389
390 }
391
392 for (const auto& entry: spellings) {
393 chunk << "<b>" << entry.first << "</b> : ";
394 for (const auto& spelling : entry.second) {
395 chunk << spelling << " ";
396 }
397 chunk << "<br />";
398 }
399 chunk << "</blockquote></p>";
400 }
401 }
402 }
403 } else {
404 unsigned int resCnt = m_docSource->getResCnt();
405 if (m_winfirst + m_respage.size() < resCnt) {
406 chunk << trans("Documents") << " <b>" << m_winfirst + 1
407 << "-" << m_winfirst + m_respage.size() << "</b> "
408 << trans("out of at least") << " "
409 << resCnt << " " << trans("for") << " " ;
410 } else {
411 chunk << trans("Documents") << " <b>"
412 << m_winfirst + 1 << "-" << m_winfirst + m_respage.size()
413 << "</b> " << trans("for") << " ";
414 }
415 }
416 chunk << detailsLink();
417 if (hasPrev() || hasNext()) {
418 chunk << " ";
419 if (hasPrev()) {
420 chunk << "<a href=\"" << linkPrefix() + prevUrl() + "\"><b>"
421 << trans("Previous")
422 << "</b></a> ";
423 }
424 if (hasNext()) {
425 chunk << "<a href=\"" << linkPrefix() + nextUrl() + "\"><b>"
426 << trans("Next")
427 << "</b></a>";
428 }
429 }
430 chunk << "</p>" << endl;
431
432 append(chunk.rdbuf()->str());
433 chunk.rdbuf()->str("");
434 if (pageEmpty())
435 return;
436
437 HighlightData hdata;
438 m_docSource->getTerms(hdata);
439
440 // Emit data for result entry paragraph. Do it in chunks that make sense
441 // html-wise, else our client may get confused
442 for (int i = 0; i < (int)m_respage.size(); i++) {
443 Rcl::Doc& doc(m_respage[i].doc);
444 string& sh(m_respage[i].subHeader);
445 displayDoc(config, i, doc, hdata, sh);
446 }
447
448 // Footer
449 chunk << "<p align=\"center\">";
450 if (hasPrev() || hasNext()) {
451 if (hasPrev()) {
452 chunk << "<a href=\"" + linkPrefix() + prevUrl() + "\"><b>"
453 << trans("Previous")
454 << "</b></a> ";
455 }
456 if (hasNext()) {
457 chunk << "<a href=\"" << linkPrefix() + nextUrl() + "\"><b>"
458 << trans("Next")
459 << "</b></a>";
460 }
461 }
462 chunk << "</p>" << endl;
463 chunk << "</body></html>" << endl;
464 append(chunk.rdbuf()->str());
465 flush();
466 }
467
displaySingleDoc(RclConfig * config,int idx,Rcl::Doc & doc,const HighlightData & hdata)468 void ResListPager::displaySingleDoc(RclConfig *config, int idx,
469 Rcl::Doc& doc,
470 const HighlightData& hdata)
471 {
472 ostringstream chunk;
473
474 // Header
475 // Note: have to append text in chunks that make sense
476 // html-wise. If we break things up too much, the editor
477 // gets confused.
478 string bdtag("<body ");
479 bdtag += bodyAttrs();
480 rtrimstring(bdtag, " ");
481 bdtag += ">";
482 chunk << "<html><head>\n"
483 << "<meta http-equiv=\"content-type\""
484 << " content=\"text/html; charset=utf-8\">\n"
485 << headerContent()
486 << "</head>\n" << bdtag << "\n";
487 append(chunk.rdbuf()->str());
488 // Document
489 displayDoc(config, idx, doc, hdata, string());
490 // Footer
491 append("</body></html>\n");
492 flush();
493 }
494
495
496 // Default implementations for things that should be implemented by
497 // specializations
nextUrl()498 string ResListPager::nextUrl()
499 {
500 return "n-1";
501 }
502
prevUrl()503 string ResListPager::prevUrl()
504 {
505 return "p-1";
506 }
507
iconUrl(RclConfig * config,Rcl::Doc & doc)508 string ResListPager::iconUrl(RclConfig *config, Rcl::Doc& doc)
509 {
510 // If this is a top level doc, check for a thumbnail image
511 if (doc.ipath.empty()) {
512 vector<Rcl::Doc> docs;
513 docs.push_back(doc);
514 vector<string> paths;
515 Rcl::docsToPaths(docs, paths);
516 if (!paths.empty()) {
517 string path;
518 LOGDEB2("ResList::iconUrl: source path [" << paths[0] << "]\n");
519 if (thumbPathForUrl(cstr_fileu + paths[0], 128, path)) {
520 LOGDEB2("ResList::iconUrl: icon path [" << path << "]\n");
521 return cstr_fileu + path;
522 } else {
523 LOGDEB2("ResList::iconUrl: no icon: path [" << path << "]\n");
524 }
525 } else {
526 LOGDEB("ResList::iconUrl: docsToPaths failed\n");
527 }
528 }
529
530 // No thumbnail, look for the MIME type icon.
531 string apptag;
532 doc.getmeta(Rcl::Doc::keyapptg, &apptag);
533 return path_pathtofileurl(config->getMimeIconPath(doc.mimetype, apptag));
534 }
535
append(const string & data)536 bool ResListPager::append(const string& data)
537 {
538 fprintf(stderr, "%s", data.c_str());
539 return true;
540 }
541
trans(const string & in)542 string ResListPager::trans(const string& in)
543 {
544 return in;
545 }
546
detailsLink()547 string ResListPager::detailsLink()
548 {
549 string chunk = string("<a href=\"") + linkPrefix() + "H-1\">";
550 chunk += trans("(show query)") + "</a>";
551 return chunk;
552 }
553
parFormat()554 const string &ResListPager::parFormat()
555 {
556 static const string cstr_format("<img src=\"%I\" align=\"left\">"
557 "%R %S %L <b>%T</b><br>"
558 "%M %D <i>%U</i><br>"
559 "%A %K");
560 return cstr_format;
561 }
562
dateFormat()563 const string &ResListPager::dateFormat()
564 {
565 static const string cstr_format(" %Y-%m-%d %H:%M:%S %z");
566 return cstr_format;
567 }
568