1 /*
2 * SessionBookdownXRefs.cpp
3 *
4 * Copyright (C) 2021 by RStudio, PBC
5 *
6 * Unless you have received this program directly from RStudio pursuant
7 * to the terms of a commercial license agreement with RStudio, then
8 * this program is licensed to you under the terms of version 3 of the
9 * GNU Affero General Public License. This program is distributed WITHOUT
10 * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
12 * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
13 *
14 */
15
16 #include "SessionBookdownXRefs.hpp"
17
18 #include <boost/lambda/bind.hpp>
19
20 #include <shared_core/FilePath.hpp>
21
22 #include <core/FileSerializer.hpp>
23 #include <core/Exec.hpp>
24
25 #include <core/system/Process.hpp>
26
27 #include <r/RExec.hpp>
28
29 #include <session/SessionModuleContext.hpp>
30 #include <session/IncrementalFileChangeHandler.hpp>
31
32
33 namespace rstudio {
34 namespace session {
35
36 using namespace rstudio::core;
37
38 namespace {
39
40 const char * const kBaseDir = "baseDir";
41 const char * const kRefs = "refs";
42 const char * const kFile = "file";
43 const char * const kType = "type";
44 const char * const kId = "id";
45 const char * const kSuffix = "suffix";
46 const char * const kTitle = "title";
47
isBookdownRmd(const FileInfo & fileInfo)48 bool isBookdownRmd(const FileInfo& fileInfo)
49 {
50 FilePath filePath(fileInfo.absolutePath());
51 FilePath bookDir = projects::projectContext().buildTargetPath();
52 if (bookDir.exists())
53 return filePath.isWithin(bookDir) && (filePath.getExtensionLowerCase() == ".rmd");
54 else
55 return false;
56 }
57
bookdownSourceFiles()58 std::vector<std::string> bookdownSourceFiles()
59 {
60 std::vector<std::string> files;
61 std::string inputDir = string_utils::utf8ToSystem(projects::projectContext().buildTargetPath().getAbsolutePath());
62 Error error = r::exec::RFunction(".rs.bookdown.SourceFiles", inputDir).call(&files);
63 if (error)
64 LOG_ERROR(error);
65 return files;
66 }
67
68
bookRelativePath(const FilePath & rmdFile)69 std::string bookRelativePath(const FilePath& rmdFile)
70 {
71 return rmdFile.getRelativePath(projects::projectContext().buildTargetPath());
72 }
73
xrefIndexDirectory()74 FilePath xrefIndexDirectory()
75 {
76 FilePath xrefsPath = module_context::scopedScratchPath().completeChildPath("bookdown-crossref");
77 Error error = xrefsPath.ensureDirectory();
78 if (error)
79 LOG_ERROR(error);
80 return xrefsPath;
81 }
82
83
xrefIndexFilePath(const std::string & rmdRelativePath)84 FilePath xrefIndexFilePath(const std::string& rmdRelativePath)
85 {
86 FilePath indexFilePath = xrefIndexDirectory().completeChildPath(rmdRelativePath + ".xref");
87 Error error = indexFilePath.getParent().ensureDirectory();
88 if (error)
89 LOG_ERROR(error);
90 return indexFilePath;
91 }
92
xrefIndexFilePath(const FilePath & rmdFile)93 FilePath xrefIndexFilePath(const FilePath& rmdFile)
94 {
95 std::string rmdRelativePath = bookRelativePath(rmdFile);
96 return xrefIndexFilePath(rmdRelativePath);
97 }
98
99
100 struct XRefFileIndex
101 {
XRefFileIndexrstudio::session::__anon04cd8e340111::XRefFileIndex102 XRefFileIndex() {}
XRefFileIndexrstudio::session::__anon04cd8e340111::XRefFileIndex103 explicit XRefFileIndex(const std::string& file) : file(file) {}
104 std::string file;
105 std::vector<std::string> entries;
106 };
107
108 struct XRefIndexEntry
109 {
XRefIndexEntryrstudio::session::__anon04cd8e340111::XRefIndexEntry110 XRefIndexEntry() {}
XRefIndexEntryrstudio::session::__anon04cd8e340111::XRefIndexEntry111 XRefIndexEntry(const std::string& file, const std::string& entry)
112 : file(file), entry(entry)
113 {
114 }
115 std::string file;
116 std::string entry;
117 };
118
119
indexForDoc(const std::string & file,const std::string & contents)120 XRefFileIndex indexForDoc(const std::string& file, const std::string& contents)
121 {
122 // move rmd code chunk preamble *into* chunk (so pandoc parses it as a code block)
123 std::vector<std::string> lines;
124 boost::algorithm::split(lines, contents, boost::algorithm::is_any_of("\r\n"));
125 std::vector<std::string> indexLines;
126 boost::regex beginChunkRe("^([\\t >]*)(```+\\s*)(\\{[a-zA-Z0-9_]+( *[ ,].*)?\\}\\s*)$");
127 for (auto line : lines) {
128 boost::smatch matches;
129 if (boost::regex_search(line, matches, beginChunkRe))
130 {
131 indexLines.push_back(matches[1] + matches[2]);
132 indexLines.push_back(matches[1] + matches[3]);
133 }
134 else
135 {
136 indexLines.push_back(line);
137 }
138 }
139 std::string indexContents = boost::algorithm::join(indexLines, "\n");
140
141 // build index
142 XRefFileIndex index(file);
143
144 // if we have no lines, bail early
145 if (indexContents.empty())
146 return index;
147
148 // otherwise, run pandoc w/ custom lua filter to capture index
149 std::vector<std::string> args;
150 args.push_back("--from");
151 args.push_back("markdown");
152 args.push_back("--to");
153 FilePath resPath = session::options().rResourcesPath();
154 FilePath xrefLuaPath = resPath.completePath("xref.lua");
155 std::string xrefLua = string_utils::utf8ToSystem(xrefLuaPath.getAbsolutePath());
156 args.push_back(xrefLua);
157 core::system::ProcessResult result;
158 Error error = module_context::runPandoc(args, indexContents, &result);
159 if (error)
160 {
161 LOG_ERROR(error);
162 }
163 else if (result.exitStatus != EXIT_SUCCESS)
164 {
165 LOG_ERROR(systemError(boost::system::errc::state_not_recoverable, result.stdErr, ERROR_LOCATION));
166 }
167 else
168 {
169 boost::algorithm::split(index.entries, result.stdOut, boost::algorithm::is_any_of("\n"));
170 }
171
172 // return the index
173 return index;
174 }
175
indexForDoc(const FilePath & filePath,const std::string & contents)176 XRefFileIndex indexForDoc(const FilePath& filePath, const std::string& contents)
177 {
178 std::string file = bookRelativePath(filePath);
179 return indexForDoc(file, contents);
180 }
181
182
183
indexForDoc(const FilePath & filePath)184 XRefFileIndex indexForDoc(const FilePath& filePath)
185 {
186 std::string contents;
187 Error error = core::readStringFromFile(filePath, &contents);
188 if (error)
189 LOG_ERROR(error);
190 return indexForDoc(filePath, contents);
191 }
192
193
writeEntryId(const std::string & id,json::Object * pEntryJson)194 bool writeEntryId(const std::string& id, json::Object* pEntryJson)
195 {
196 std::size_t colonPos = id.find_first_of(':');
197 if (colonPos != std::string::npos)
198 {
199 pEntryJson->operator[](kType) = id.substr(0, colonPos);
200 pEntryJson->operator[](kId) = id.substr(colonPos + 1);
201 pEntryJson->operator[](kSuffix) = "";
202 return true;
203 }
204 else
205 {
206 return false;
207 }
208 }
209
210
211 class XRefUnsavedIndex
212 {
213 public:
214
unsavedIndexes()215 const std::map<std::string, XRefFileIndex>& unsavedIndexes(){
216 return unsavedFiles_;
217 }
218
219
updateUnsaved(const FileInfo & fileInfo,const std::string & contents,bool dirty)220 void updateUnsaved(const FileInfo& fileInfo, const std::string& contents, bool dirty)
221 {
222 // always remove to start with
223 removeUnsaved(fileInfo);
224
225 // add it back if it's dirty
226 if (dirty)
227 {
228 FilePath filePath = toFilePath(fileInfo);
229 XRefFileIndex idx = indexForDoc(filePath, contents);
230 unsavedFiles_[bookRelativePath(filePath)] = idx;
231 }
232 }
233
removeUnsaved(const FileInfo & fileInfo)234 void removeUnsaved(const FileInfo& fileInfo)
235 {
236 FilePath filePath = toFilePath(fileInfo);
237 unsavedFiles_.erase(bookRelativePath(filePath));
238
239 }
240
removeAllUnsaved()241 void removeAllUnsaved()
242 {
243 unsavedFiles_.clear();
244 }
245
246 private:
247 std::map<std::string, XRefFileIndex> unsavedFiles_;
248 };
249 XRefUnsavedIndex s_unsavedIndex;
250
251 typedef boost::function<bool(const std::string&)> IndexEntryFilter;
252
indexEntriesForProject(IndexEntryFilter filter)253 std::vector<XRefIndexEntry> indexEntriesForProject(IndexEntryFilter filter)
254 {
255 std::vector<XRefIndexEntry> indexEntries;
256
257 // find out what the docs in the book are
258 std::vector<std::string> sourceFiles = bookdownSourceFiles();
259
260 for (std::vector<std::string>::size_type i = 0; i < sourceFiles.size(); i++) {
261
262 // alias source files
263 const std::string& sourceFile = sourceFiles[i];
264
265 // prefer unsaved files
266 std::vector<std::string> entries;
267 auto unsaved = s_unsavedIndex.unsavedIndexes();
268 std::map<std::string, XRefFileIndex>::const_iterator it = unsaved.find(sourceFile);
269 if (it != unsaved.end())
270 {
271 entries = it->second.entries;
272 }
273 // then check the disk based index
274 else
275 {
276 FilePath filePath = xrefIndexFilePath(sourceFile);
277 if (filePath.exists())
278 {
279 Error error = readStringVectorFromFile(filePath, &entries);
280 if (error)
281 LOG_ERROR(error);
282 }
283 }
284
285 for (auto entry : entries)
286 {
287 if (filter(entry))
288 {
289 XRefIndexEntry indexEntry(sourceFile, entry);
290 indexEntries.push_back(indexEntry);
291 }
292 }
293 }
294
295 return indexEntries;
296 }
297
indexEntriesForFile(const XRefFileIndex & fileIndex,IndexEntryFilter filter)298 std::vector<XRefIndexEntry> indexEntriesForFile(const XRefFileIndex& fileIndex, IndexEntryFilter filter)
299 {
300 std::vector<XRefIndexEntry> indexEntries;
301 for (auto entry : fileIndex.entries)
302 {
303 if (filter(entry))
304 {
305 XRefIndexEntry indexEntry(fileIndex.file, entry);
306 indexEntries.push_back(indexEntry);
307 }
308 }
309
310 return indexEntries;
311 }
312
readMultiKeys()313 std::map<std::string,int> readMultiKeys()
314 {
315 std::map<std::string,int> multiKeys;
316 FilePath refKeys = projects::projectContext().buildTargetPath().completePath("_book/reference-keys.txt");
317 if (refKeys.exists())
318 {
319 // read the keys
320 std::vector<std::string> keys;
321 Error error = core::readStringVectorFromFile(refKeys, &keys);
322 if (error)
323 {
324 LOG_ERROR(error);
325 return multiKeys;
326 }
327
328 // look for keys with a -N suffix
329 boost::regex multiRe("^(?:[a-z]+:)?(.*?)(?:-(\\d+))$");
330 for (auto key : keys)
331 {
332 boost::smatch match;
333 if (boost::regex_search(key, match, multiRe))
334 multiKeys[match[1]] = boost::lexical_cast<int>(match[2]);
335 }
336 }
337
338 return multiKeys;
339 }
340
341
indexEntriesToXRefs(const std::vector<XRefIndexEntry> & entries,bool isBookdownProject)342 json::Array indexEntriesToXRefs(const std::vector<XRefIndexEntry>& entries, bool isBookdownProject)
343 {
344 // split out text refs (as a map) and normal entries
345 std::map<std::string,std::string> textRefs;
346 std::vector<XRefIndexEntry> normalEntries;
347 boost::regex textRefRe("^(\\(.*\\))\\s+(.*)$");
348 for (auto indexEntry : entries)
349 {
350 boost::smatch matches;
351 if (boost::regex_search(indexEntry.entry, matches, textRefRe))
352 {
353 textRefs[matches[1]] = matches[2];
354 }
355 else
356 {
357 normalEntries.push_back(indexEntry);
358 }
359 }
360
361 // read in referece-keys.txt so we can detect entires w/ suffixes
362 std::map<std::string,int> multiKeys;
363 if (isBookdownProject)
364 multiKeys = readMultiKeys();
365
366 // turn normal entries into xref json
367 json::Array xrefsJson;
368 for (auto indexEntry : normalEntries)
369 {
370 json::Object xrefJson;
371
372 xrefJson[kFile] = indexEntry.file;
373
374 auto entry = indexEntry.entry;
375 if (entry.size() > 0)
376 {
377 bool validEntryId = false;
378 std::size_t spacePos = entry.find_first_of(' ');
379 if (spacePos != std::string::npos)
380 {
381 // write the id
382 validEntryId = writeEntryId(entry.substr(0, spacePos), &xrefJson);
383
384 // get the title (substitute textref if we have one)
385 std::string title = entry.substr(spacePos + 1);
386
387 std::string textrefTitle = textRefs[title];
388 if (textrefTitle.length() > 0)
389 title = textrefTitle;
390
391 // write the title
392 xrefJson[kTitle] = title;
393 }
394 else
395 {
396 validEntryId = writeEntryId(entry, &xrefJson);
397 }
398
399 // add the entry (suffixed if necessary)
400 if (validEntryId)
401 {
402 // if this key has a suffix then add multiple items w/ suffixes
403 std::string id = xrefJson["id"].getString();
404 std::map<std::string,int>::const_iterator it = multiKeys.find(id);
405 if (it != multiKeys.end() && it->second > 1)
406 {
407 for (int i=1; i<=it->second; i++)
408 {
409 json::Object xrefJsonSuffixed = xrefJson;
410 xrefJsonSuffixed[kSuffix] = "-" + boost::lexical_cast<std::string>(i);
411 xrefsJson.push_back(xrefJsonSuffixed);
412 }
413 }
414 else
415 {
416 xrefsJson.push_back(xrefJson);
417 }
418 }
419 }
420 }
421
422 return xrefsJson;
423 }
424
425
426
fileChangeHandler(const core::system::FileChangeEvent & event)427 void fileChangeHandler(const core::system::FileChangeEvent& event)
428 {
429 // paths for the rmd file and it's corresponding index file
430 FilePath rmdFile = FilePath(event.fileInfo().absolutePath());
431 FilePath idxFile = xrefIndexFilePath(FilePath(event.fileInfo().absolutePath()));
432
433 if (event.type() == core::system::FileChangeEvent::FileAdded)
434 {
435 if (idxFile.exists() && idxFile.getLastWriteTime() > rmdFile.getLastWriteTime())
436 return;
437 }
438
439 // if this is an add or an update then re-index
440 if (event.type() == core::system::FileChangeEvent::FileAdded ||
441 event.type() == core::system::FileChangeEvent::FileModified)
442 {
443 if (rmdFile.exists())
444 {
445 XRefFileIndex idx = indexForDoc(rmdFile);
446 Error error = writeStringVectorToFile(idxFile, idx.entries);
447 if (error)
448 LOG_ERROR(error);
449 }
450 }
451 // if this is a delete then remove the index
452 else if (event.type() == core::system::FileChangeEvent::FileRemoved)
453 {
454 Error error = idxFile.removeIfExists();
455 if (error)
456 LOG_ERROR(error);
457 }
458 }
459
isBookdownContext()460 bool isBookdownContext()
461 {
462 return module_context::isBookdownProject() && module_context::isPackageInstalled("bookdown");
463 }
464
onSourceDocUpdated(boost::shared_ptr<source_database::SourceDocument> pDoc)465 void onSourceDocUpdated(boost::shared_ptr<source_database::SourceDocument> pDoc)
466 {
467 // ignore if the file doesn't have a path
468 if (pDoc->path().empty())
469 return;
470
471 // update unsaved if it's a bookdown rmd
472 FileInfo fileInfo(module_context::resolveAliasedPath(pDoc->path()));
473 if (isBookdownRmd(fileInfo))
474 s_unsavedIndex.updateUnsaved(fileInfo, pDoc->contents(), pDoc->dirty());
475
476 }
477
onSourceDocRemoved(const std::string &,const std::string & path)478 void onSourceDocRemoved(const std::string&, const std::string& path)
479 {
480 // ignore if the file has no path
481 if (path.empty())
482 return;
483
484 // remove from unsaved if it's a bookdown rmd
485 FileInfo fileInfo(module_context::resolveAliasedPath(path));
486 if (isBookdownRmd(fileInfo))
487 s_unsavedIndex.removeUnsaved(fileInfo);
488 }
489
onAllSourceDocsRemoved()490 void onAllSourceDocsRemoved()
491 {
492 s_unsavedIndex.removeAllUnsaved();
493 }
494
onDeferredInit(bool)495 void onDeferredInit(bool)
496 {
497 if (isBookdownContext())
498 {
499 // index docs
500 std::vector<boost::shared_ptr<source_database::SourceDocument> > pDocs;
501 Error error = source_database::list(&pDocs);
502 if (error)
503 LOG_ERROR(error);
504 std::for_each(pDocs.begin(), pDocs.end(), onSourceDocUpdated);
505
506 // hookup source doc events
507 source_database::events().onDocUpdated.connect(onSourceDocUpdated);
508 source_database::events().onDocRemoved.connect(onSourceDocRemoved);
509 source_database::events().onRemoveAll.connect(onAllSourceDocsRemoved);
510
511 // create an incremental file change handler (on the heap so that it
512 // survives the call to this function and is never deleted)
513 IncrementalFileChangeHandler* pFileChangeHandler =
514 new IncrementalFileChangeHandler(
515 isBookdownRmd,
516 fileChangeHandler,
517 boost::posix_time::seconds(1),
518 boost::posix_time::milliseconds(500),
519 true
520 );
521 pFileChangeHandler->subscribeToFileMonitor("Bookdown Cross References");
522 }
523
524 }
525
xrefIndexforProject(IndexEntryFilter filter)526 json::Object xrefIndexforProject(IndexEntryFilter filter)
527 {
528 json::Object indexJson;
529 indexJson[kBaseDir] = module_context::createAliasedPath(projects::projectContext().buildTargetPath());
530 std::vector<XRefIndexEntry> entries = indexEntriesForProject(filter);
531 indexJson[kRefs] = indexEntriesToXRefs(entries, true);
532 return indexJson;
533 }
534
xrefIndex(const std::string & file,IndexEntryFilter filter)535 json::Object xrefIndex(const std::string& file, IndexEntryFilter filter)
536 {
537 // resolve path
538 FilePath filePath = module_context::resolveAliasedPath(file);
539
540 // result to return
541 json::Object indexJson;
542
543 // if this is a bookdown context then send the whole project index
544 if (isBookdownContext() && filePath.isWithin(projects::projectContext().buildTargetPath()))
545 {
546 indexJson = xrefIndexforProject(filter);
547 }
548
549 // otherwise just send an index for this file (it will be in the source database)
550 else
551 {
552 indexJson[kBaseDir] = module_context::createAliasedPath(filePath.getParent());
553
554 std::string id;
555 source_database::getId(filePath, &id);
556 if (!id.empty())
557 {
558 boost::shared_ptr<source_database::SourceDocument> pDoc(
559 new source_database::SourceDocument());
560 Error error = source_database::get(id, pDoc);
561 if (error)
562 {
563 LOG_ERROR(error);
564 indexJson[kRefs] = json::Array();
565 }
566 else
567 {
568 XRefFileIndex idx = indexForDoc(filePath.getFilename(), pDoc->contents());
569 std::vector<XRefIndexEntry> entries = indexEntriesForFile(idx, filter);
570 indexJson["refs"] = indexEntriesToXRefs(entries, false);
571 }
572 }
573 else
574 {
575 indexJson[kRefs] = json::Array();
576 }
577 }
578
579 return indexJson;
580 }
581
582
xrefIndexForFile(const json::JsonRpcRequest & request,json::JsonRpcResponse * pResponse)583 Error xrefIndexForFile(const json::JsonRpcRequest& request,
584 json::JsonRpcResponse* pResponse)
585 {
586 // read params
587 std::string file;
588 Error error = json::readParams(request.params, &file);
589 if (error)
590 return error;
591
592 // filter that returns all entries
593 IndexEntryFilter includeAll = boost::lambda::constant(true);
594
595 // get index and return it
596 pResponse->setResult(xrefIndex(file, includeAll));
597
598 return Success();
599 }
600
xrefForId(const json::JsonRpcRequest & request,json::JsonRpcResponse * pResponse)601 Error xrefForId(const json::JsonRpcRequest& request,
602 json::JsonRpcResponse* pResponse)
603 {
604 // read params
605 std::string file, id;
606 Error error = json::readParams(request.params, &file, &id);
607 if (error)
608 return error;
609
610 // get index containing just the entry that matches this id
611 json::Object indexJson = xrefIndex(file, [id](const std::string& entry) {
612 std::string entryId = entry.substr(0, entry.find_first_of(' '));
613 if (id == entryId)
614 {
615 return true;
616 }
617 else
618 {
619 // headings also match on just the id part
620 entryId = boost::regex_replace(entryId, boost::regex("^h\\d\\:"), "");
621 if (id == entryId)
622 return true;
623
624 // we can also match after trimming off any provided suffix
625 std::string trimmedId = boost::regex_replace(id, boost::regex("-\\d$"), "");
626 if (trimmedId == entryId)
627 return true;
628 }
629
630 return false;
631 });
632
633 // if there is more than one item returned it could have been a suffix match,
634 // in that case winnow it down to the passed id
635 json::Array refsJson = indexJson["refs"].getArray();
636 if (refsJson.getSize() > 1)
637 {
638 for (auto refJsonValue : refsJson)
639 {
640 json::Object refJson = refJsonValue.getObject();
641 boost::format fmt("%1%:%2%%3%");
642 std::string refId = boost::str(fmt %
643 refJson[kType].getString() %
644 refJson[kId].getString() %
645 refJson[kSuffix].getString());
646 if (refId == id)
647 {
648 json::Array suffixRefsJson;
649 suffixRefsJson.push_back(refJson);
650 indexJson["refs"] = suffixRefsJson;
651 break;
652 }
653 }
654 }
655
656 // return it
657 pResponse->setResult(indexJson);
658
659 return Success();
660 }
661
662 } // anonymous namespace
663
664 namespace modules {
665 namespace rmarkdown {
666 namespace bookdown {
667 namespace xrefs {
668
initialize()669 Error initialize()
670 {
671 // deferred init (build xref file index)
672 module_context::events().onDeferredInit.connect(onDeferredInit);
673
674 // register rpc functions
675 ExecBlock initBlock;
676 initBlock.addFunctions()
677 (boost::bind(module_context::registerRpcMethod, "xref_index_for_file", xrefIndexForFile))
678 (boost::bind(module_context::registerRpcMethod, "xref_for_id", xrefForId))
679 ;
680 return initBlock.execute();
681
682
683 }
684
685 } // namespace xrefs
686 } // namespace bookdown
687 } // namespace rmarkdown
688 } // namespace modules
689
690 namespace module_context {
691
bookdownXRefIndex()692 core::json::Value bookdownXRefIndex()
693 {
694 if (isBookdownContext())
695 return xrefIndexforProject(boost::lambda::constant(true));
696 else
697 return json::Value();
698 }
699
700 } // namespace module_context
701
702 } // namespace session
703 } // namespace rstudio
704