1 /*
2  * SessionBookdownXRefs.cpp
3  *
4  * Copyright (C) 2021 by RStudio, PBC
5  *
6  * Unless you have received this program directly from RStudio pursuant
7  * to the terms of a commercial license agreement with RStudio, then
8  * this program is licensed to you under the terms of version 3 of the
9  * GNU Affero General Public License. This program is distributed WITHOUT
10  * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
11  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
12  * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
13  *
14  */
15 
16 #include "SessionBookdownXRefs.hpp"
17 
18 #include <boost/lambda/bind.hpp>
19 
20 #include <shared_core/FilePath.hpp>
21 
22 #include <core/FileSerializer.hpp>
23 #include <core/Exec.hpp>
24 
25 #include <core/system/Process.hpp>
26 
27 #include <r/RExec.hpp>
28 
29 #include <session/SessionModuleContext.hpp>
30 #include <session/IncrementalFileChangeHandler.hpp>
31 
32 
33 namespace rstudio {
34 namespace session {
35 
36 using namespace rstudio::core;
37 
38 namespace {
39 
40 const char * const kBaseDir = "baseDir";
41 const char * const kRefs = "refs";
42 const char * const kFile = "file";
43 const char * const kType = "type";
44 const char * const kId = "id";
45 const char * const kSuffix = "suffix";
46 const char * const kTitle = "title";
47 
isBookdownRmd(const FileInfo & fileInfo)48 bool isBookdownRmd(const FileInfo& fileInfo)
49 {
50    FilePath filePath(fileInfo.absolutePath());
51    FilePath bookDir = projects::projectContext().buildTargetPath();
52    if (bookDir.exists())
53       return filePath.isWithin(bookDir) && (filePath.getExtensionLowerCase() == ".rmd");
54    else
55       return false;
56 }
57 
bookdownSourceFiles()58 std::vector<std::string> bookdownSourceFiles()
59 {
60    std::vector<std::string> files;
61    std::string inputDir = string_utils::utf8ToSystem(projects::projectContext().buildTargetPath().getAbsolutePath());
62    Error error = r::exec::RFunction(".rs.bookdown.SourceFiles", inputDir).call(&files);
63    if (error)
64       LOG_ERROR(error);
65    return files;
66 }
67 
68 
bookRelativePath(const FilePath & rmdFile)69 std::string bookRelativePath(const FilePath& rmdFile)
70 {
71    return rmdFile.getRelativePath(projects::projectContext().buildTargetPath());
72 }
73 
xrefIndexDirectory()74 FilePath xrefIndexDirectory()
75 {
76    FilePath xrefsPath = module_context::scopedScratchPath().completeChildPath("bookdown-crossref");
77    Error error = xrefsPath.ensureDirectory();
78    if (error)
79       LOG_ERROR(error);
80    return xrefsPath;
81 }
82 
83 
xrefIndexFilePath(const std::string & rmdRelativePath)84 FilePath xrefIndexFilePath(const std::string& rmdRelativePath)
85 {
86    FilePath indexFilePath = xrefIndexDirectory().completeChildPath(rmdRelativePath + ".xref");
87    Error error = indexFilePath.getParent().ensureDirectory();
88    if (error)
89       LOG_ERROR(error);
90    return indexFilePath;
91 }
92 
xrefIndexFilePath(const FilePath & rmdFile)93 FilePath xrefIndexFilePath(const FilePath& rmdFile)
94 {
95    std::string rmdRelativePath = bookRelativePath(rmdFile);
96    return xrefIndexFilePath(rmdRelativePath);
97 }
98 
99 
100 struct XRefFileIndex
101 {
XRefFileIndexrstudio::session::__anon04cd8e340111::XRefFileIndex102    XRefFileIndex() {}
XRefFileIndexrstudio::session::__anon04cd8e340111::XRefFileIndex103    explicit XRefFileIndex(const std::string& file) : file(file) {}
104    std::string file;
105    std::vector<std::string> entries;
106 };
107 
108 struct XRefIndexEntry
109 {
XRefIndexEntryrstudio::session::__anon04cd8e340111::XRefIndexEntry110    XRefIndexEntry() {}
XRefIndexEntryrstudio::session::__anon04cd8e340111::XRefIndexEntry111    XRefIndexEntry(const std::string& file, const std::string& entry)
112       : file(file), entry(entry)
113    {
114    }
115    std::string file;
116    std::string entry;
117 };
118 
119 
indexForDoc(const std::string & file,const std::string & contents)120 XRefFileIndex indexForDoc(const std::string& file, const std::string& contents)
121 {
122    // move rmd code chunk preamble *into* chunk (so pandoc parses it as a code block)
123    std::vector<std::string> lines;
124    boost::algorithm::split(lines, contents, boost::algorithm::is_any_of("\r\n"));
125    std::vector<std::string> indexLines;
126    boost::regex beginChunkRe("^([\\t >]*)(```+\\s*)(\\{[a-zA-Z0-9_]+( *[ ,].*)?\\}\\s*)$");
127    for (auto line : lines) {
128       boost::smatch matches;
129       if (boost::regex_search(line, matches, beginChunkRe))
130       {
131          indexLines.push_back(matches[1] + matches[2]);
132          indexLines.push_back(matches[1] + matches[3]);
133       }
134       else
135       {
136          indexLines.push_back(line);
137       }
138    }
139    std::string indexContents = boost::algorithm::join(indexLines, "\n");
140 
141    // build index
142    XRefFileIndex index(file);
143 
144    // if we have no lines, bail early
145    if (indexContents.empty())
146       return index;
147 
148    // otherwise, run pandoc w/ custom lua filter to capture index
149    std::vector<std::string> args;
150    args.push_back("--from");
151    args.push_back("markdown");
152    args.push_back("--to");
153    FilePath resPath = session::options().rResourcesPath();
154    FilePath xrefLuaPath = resPath.completePath("xref.lua");
155    std::string xrefLua = string_utils::utf8ToSystem(xrefLuaPath.getAbsolutePath());
156    args.push_back(xrefLua);
157    core::system::ProcessResult result;
158    Error error = module_context::runPandoc(args, indexContents, &result);
159    if (error)
160    {
161       LOG_ERROR(error);
162    }
163    else if (result.exitStatus != EXIT_SUCCESS)
164    {
165       LOG_ERROR(systemError(boost::system::errc::state_not_recoverable, result.stdErr, ERROR_LOCATION));
166    }
167    else
168    {
169       boost::algorithm::split(index.entries, result.stdOut, boost::algorithm::is_any_of("\n"));
170    }
171 
172    // return the index
173    return index;
174 }
175 
indexForDoc(const FilePath & filePath,const std::string & contents)176 XRefFileIndex indexForDoc(const FilePath& filePath, const std::string& contents)
177 {
178    std::string file = bookRelativePath(filePath);
179    return indexForDoc(file, contents);
180 }
181 
182 
183 
indexForDoc(const FilePath & filePath)184 XRefFileIndex indexForDoc(const FilePath& filePath)
185 {
186    std::string contents;
187    Error error = core::readStringFromFile(filePath, &contents);
188    if (error)
189       LOG_ERROR(error);
190    return indexForDoc(filePath, contents);
191 }
192 
193 
writeEntryId(const std::string & id,json::Object * pEntryJson)194 bool writeEntryId(const std::string& id, json::Object* pEntryJson)
195 {
196    std::size_t colonPos = id.find_first_of(':');
197    if (colonPos != std::string::npos)
198    {
199       pEntryJson->operator[](kType) = id.substr(0, colonPos);
200       pEntryJson->operator[](kId) = id.substr(colonPos + 1);
201       pEntryJson->operator[](kSuffix) = "";
202       return true;
203    }
204    else
205    {
206       return false;
207    }
208 }
209 
210 
211 class XRefUnsavedIndex
212 {
213 public:
214 
unsavedIndexes()215    const std::map<std::string, XRefFileIndex>& unsavedIndexes(){
216       return unsavedFiles_;
217    }
218 
219 
updateUnsaved(const FileInfo & fileInfo,const std::string & contents,bool dirty)220    void updateUnsaved(const FileInfo& fileInfo, const std::string& contents, bool dirty)
221    {
222       // always remove to start with
223       removeUnsaved(fileInfo);
224 
225       // add it back if it's dirty
226       if (dirty)
227       {
228          FilePath filePath = toFilePath(fileInfo);
229          XRefFileIndex idx = indexForDoc(filePath, contents);
230          unsavedFiles_[bookRelativePath(filePath)] = idx;
231       }
232    }
233 
removeUnsaved(const FileInfo & fileInfo)234    void removeUnsaved(const FileInfo& fileInfo)
235    {
236       FilePath filePath = toFilePath(fileInfo);
237       unsavedFiles_.erase(bookRelativePath(filePath));
238 
239    }
240 
removeAllUnsaved()241    void removeAllUnsaved()
242    {
243       unsavedFiles_.clear();
244    }
245 
246 private:
247    std::map<std::string, XRefFileIndex> unsavedFiles_;
248 };
249 XRefUnsavedIndex s_unsavedIndex;
250 
251 typedef boost::function<bool(const std::string&)> IndexEntryFilter;
252 
indexEntriesForProject(IndexEntryFilter filter)253 std::vector<XRefIndexEntry> indexEntriesForProject(IndexEntryFilter filter)
254 {
255    std::vector<XRefIndexEntry> indexEntries;
256 
257    // find out what the docs in the book are
258    std::vector<std::string> sourceFiles = bookdownSourceFiles();
259 
260    for (std::vector<std::string>::size_type i = 0; i < sourceFiles.size(); i++) {
261 
262       // alias source files
263       const std::string& sourceFile = sourceFiles[i];
264 
265       // prefer unsaved files
266       std::vector<std::string> entries;
267       auto unsaved = s_unsavedIndex.unsavedIndexes();
268       std::map<std::string, XRefFileIndex>::const_iterator it = unsaved.find(sourceFile);
269       if (it != unsaved.end())
270       {
271          entries = it->second.entries;
272       }
273       // then check the disk based index
274       else
275       {
276          FilePath filePath = xrefIndexFilePath(sourceFile);
277          if (filePath.exists())
278          {
279             Error error = readStringVectorFromFile(filePath, &entries);
280             if (error)
281                LOG_ERROR(error);
282          }
283       }
284 
285       for (auto entry : entries)
286       {
287          if (filter(entry))
288          {
289             XRefIndexEntry indexEntry(sourceFile, entry);
290             indexEntries.push_back(indexEntry);
291          }
292       }
293    }
294 
295    return indexEntries;
296 }
297 
indexEntriesForFile(const XRefFileIndex & fileIndex,IndexEntryFilter filter)298 std::vector<XRefIndexEntry> indexEntriesForFile(const XRefFileIndex& fileIndex, IndexEntryFilter filter)
299 {
300    std::vector<XRefIndexEntry> indexEntries;
301    for (auto entry : fileIndex.entries)
302    {
303       if (filter(entry))
304       {
305          XRefIndexEntry indexEntry(fileIndex.file, entry);
306          indexEntries.push_back(indexEntry);
307       }
308    }
309 
310    return indexEntries;
311 }
312 
readMultiKeys()313 std::map<std::string,int> readMultiKeys()
314 {
315    std::map<std::string,int> multiKeys;
316    FilePath refKeys = projects::projectContext().buildTargetPath().completePath("_book/reference-keys.txt");
317    if (refKeys.exists())
318    {
319       // read the keys
320       std::vector<std::string> keys;
321       Error error = core::readStringVectorFromFile(refKeys, &keys);
322       if (error)
323       {
324          LOG_ERROR(error);
325          return multiKeys;
326       }
327 
328       // look for keys with a -N suffix
329       boost::regex multiRe("^(?:[a-z]+:)?(.*?)(?:-(\\d+))$");
330       for (auto key : keys)
331       {
332          boost::smatch match;
333          if (boost::regex_search(key, match, multiRe))
334             multiKeys[match[1]] = boost::lexical_cast<int>(match[2]);
335       }
336    }
337 
338    return multiKeys;
339 }
340 
341 
indexEntriesToXRefs(const std::vector<XRefIndexEntry> & entries,bool isBookdownProject)342 json::Array indexEntriesToXRefs(const std::vector<XRefIndexEntry>& entries, bool isBookdownProject)
343 {
344    // split out text refs (as a map) and normal entries
345    std::map<std::string,std::string> textRefs;
346    std::vector<XRefIndexEntry> normalEntries;
347    boost::regex textRefRe("^(\\(.*\\))\\s+(.*)$");
348    for (auto indexEntry : entries)
349    {
350       boost::smatch matches;
351       if (boost::regex_search(indexEntry.entry, matches, textRefRe))
352       {
353          textRefs[matches[1]] = matches[2];
354       }
355       else
356       {
357          normalEntries.push_back(indexEntry);
358       }
359    }
360 
361    // read in referece-keys.txt so we can detect entires w/ suffixes
362    std::map<std::string,int> multiKeys;
363    if (isBookdownProject)
364       multiKeys = readMultiKeys();
365 
366    // turn normal entries into xref json
367    json::Array xrefsJson;
368    for (auto indexEntry : normalEntries)
369    {
370       json::Object xrefJson;
371 
372       xrefJson[kFile] = indexEntry.file;
373 
374       auto entry = indexEntry.entry;
375       if (entry.size() > 0)
376       {
377          bool validEntryId = false;
378          std::size_t spacePos = entry.find_first_of(' ');
379          if (spacePos != std::string::npos)
380          {
381             // write the id
382             validEntryId = writeEntryId(entry.substr(0, spacePos), &xrefJson);
383 
384             // get the title (substitute textref if we have one)
385             std::string title = entry.substr(spacePos + 1);
386 
387             std::string textrefTitle = textRefs[title];
388             if (textrefTitle.length() > 0)
389                title = textrefTitle;
390 
391             // write the title
392             xrefJson[kTitle] = title;
393          }
394          else
395          {
396             validEntryId = writeEntryId(entry, &xrefJson);
397          }
398 
399          // add the entry (suffixed if necessary)
400          if (validEntryId)
401          {
402             // if this key has a suffix then add multiple items w/ suffixes
403             std::string id = xrefJson["id"].getString();
404             std::map<std::string,int>::const_iterator it = multiKeys.find(id);
405             if (it != multiKeys.end() && it->second > 1)
406             {
407                for (int i=1; i<=it->second; i++)
408                {
409                   json::Object xrefJsonSuffixed = xrefJson;
410                   xrefJsonSuffixed[kSuffix] = "-" + boost::lexical_cast<std::string>(i);
411                   xrefsJson.push_back(xrefJsonSuffixed);
412                }
413             }
414             else
415             {
416                xrefsJson.push_back(xrefJson);
417             }
418          }
419       }
420    }
421 
422    return xrefsJson;
423 }
424 
425 
426 
fileChangeHandler(const core::system::FileChangeEvent & event)427 void fileChangeHandler(const core::system::FileChangeEvent& event)
428 {
429    // paths for the rmd file and it's corresponding index file
430    FilePath rmdFile = FilePath(event.fileInfo().absolutePath());
431    FilePath idxFile = xrefIndexFilePath(FilePath(event.fileInfo().absolutePath()));
432 
433    if (event.type() == core::system::FileChangeEvent::FileAdded)
434    {
435       if (idxFile.exists() && idxFile.getLastWriteTime() > rmdFile.getLastWriteTime())
436          return;
437    }
438 
439    // if this is an add or an update then re-index
440    if (event.type() == core::system::FileChangeEvent::FileAdded ||
441        event.type() == core::system::FileChangeEvent::FileModified)
442    {
443       if (rmdFile.exists())
444       {
445          XRefFileIndex idx = indexForDoc(rmdFile);
446          Error error = writeStringVectorToFile(idxFile, idx.entries);
447          if (error)
448             LOG_ERROR(error);
449       }
450    }
451    // if this is a delete then remove the index
452    else if (event.type() == core::system::FileChangeEvent::FileRemoved)
453    {
454       Error error = idxFile.removeIfExists();
455       if (error)
456          LOG_ERROR(error);
457    }
458 }
459 
isBookdownContext()460 bool isBookdownContext()
461 {
462    return module_context::isBookdownProject() && module_context::isPackageInstalled("bookdown");
463 }
464 
onSourceDocUpdated(boost::shared_ptr<source_database::SourceDocument> pDoc)465 void onSourceDocUpdated(boost::shared_ptr<source_database::SourceDocument> pDoc)
466 {
467    // ignore if the file doesn't have a path
468    if (pDoc->path().empty())
469       return;
470 
471    // update unsaved if it's a bookdown rmd
472    FileInfo fileInfo(module_context::resolveAliasedPath(pDoc->path()));
473    if (isBookdownRmd(fileInfo))
474       s_unsavedIndex.updateUnsaved(fileInfo, pDoc->contents(), pDoc->dirty());
475 
476 }
477 
onSourceDocRemoved(const std::string &,const std::string & path)478 void onSourceDocRemoved(const std::string&, const std::string& path)
479 {
480    // ignore if the file has no path
481    if (path.empty())
482       return;
483 
484    // remove from unsaved if it's a bookdown rmd
485    FileInfo fileInfo(module_context::resolveAliasedPath(path));
486    if (isBookdownRmd(fileInfo))
487       s_unsavedIndex.removeUnsaved(fileInfo);
488 }
489 
onAllSourceDocsRemoved()490 void onAllSourceDocsRemoved()
491 {
492    s_unsavedIndex.removeAllUnsaved();
493 }
494 
onDeferredInit(bool)495 void onDeferredInit(bool)
496 {
497    if (isBookdownContext())
498    {
499       // index docs
500       std::vector<boost::shared_ptr<source_database::SourceDocument> > pDocs;
501       Error error = source_database::list(&pDocs);
502       if (error)
503          LOG_ERROR(error);
504       std::for_each(pDocs.begin(), pDocs.end(), onSourceDocUpdated);
505 
506       // hookup source doc events
507       source_database::events().onDocUpdated.connect(onSourceDocUpdated);
508       source_database::events().onDocRemoved.connect(onSourceDocRemoved);
509       source_database::events().onRemoveAll.connect(onAllSourceDocsRemoved);
510 
511       // create an incremental file change handler (on the heap so that it
512       // survives the call to this function and is never deleted)
513       IncrementalFileChangeHandler* pFileChangeHandler =
514          new IncrementalFileChangeHandler(
515             isBookdownRmd,
516             fileChangeHandler,
517             boost::posix_time::seconds(1),
518             boost::posix_time::milliseconds(500),
519             true
520          );
521       pFileChangeHandler->subscribeToFileMonitor("Bookdown Cross References");
522    }
523 
524 }
525 
xrefIndexforProject(IndexEntryFilter filter)526 json::Object xrefIndexforProject(IndexEntryFilter filter)
527 {
528    json::Object indexJson;
529    indexJson[kBaseDir] = module_context::createAliasedPath(projects::projectContext().buildTargetPath());
530    std::vector<XRefIndexEntry> entries = indexEntriesForProject(filter);
531    indexJson[kRefs] = indexEntriesToXRefs(entries, true);
532    return indexJson;
533 }
534 
xrefIndex(const std::string & file,IndexEntryFilter filter)535 json::Object xrefIndex(const std::string& file, IndexEntryFilter filter)
536 {
537    // resolve path
538    FilePath filePath = module_context::resolveAliasedPath(file);
539 
540    // result to return
541    json::Object indexJson;
542 
543    // if this is a bookdown context then send the whole project index
544    if (isBookdownContext() && filePath.isWithin(projects::projectContext().buildTargetPath()))
545    {
546       indexJson = xrefIndexforProject(filter);
547    }
548 
549    // otherwise just send an index for this file (it will be in the source database)
550    else
551    {
552       indexJson[kBaseDir] = module_context::createAliasedPath(filePath.getParent());
553 
554       std::string id;
555       source_database::getId(filePath, &id);
556       if (!id.empty())
557       {
558          boost::shared_ptr<source_database::SourceDocument> pDoc(
559                   new source_database::SourceDocument());
560          Error error = source_database::get(id, pDoc);
561          if (error)
562          {
563             LOG_ERROR(error);
564             indexJson[kRefs] = json::Array();
565          }
566          else
567          {
568             XRefFileIndex idx = indexForDoc(filePath.getFilename(), pDoc->contents());
569             std::vector<XRefIndexEntry> entries = indexEntriesForFile(idx, filter);
570             indexJson["refs"] = indexEntriesToXRefs(entries, false);
571          }
572       }
573       else
574       {
575          indexJson[kRefs] = json::Array();
576       }
577    }
578 
579    return indexJson;
580 }
581 
582 
xrefIndexForFile(const json::JsonRpcRequest & request,json::JsonRpcResponse * pResponse)583 Error xrefIndexForFile(const json::JsonRpcRequest& request,
584                        json::JsonRpcResponse* pResponse)
585 {
586    // read params
587    std::string file;
588    Error error = json::readParams(request.params, &file);
589    if (error)
590       return error;
591 
592    // filter that returns all entries
593    IndexEntryFilter includeAll = boost::lambda::constant(true);
594 
595    // get index and return it
596    pResponse->setResult(xrefIndex(file, includeAll));
597 
598    return Success();
599 }
600 
xrefForId(const json::JsonRpcRequest & request,json::JsonRpcResponse * pResponse)601 Error xrefForId(const json::JsonRpcRequest& request,
602                 json::JsonRpcResponse* pResponse)
603 {
604    // read params
605    std::string file, id;
606    Error error = json::readParams(request.params, &file, &id);
607    if (error)
608       return error;
609 
610    // get index containing just the entry that matches this id
611    json::Object indexJson = xrefIndex(file, [id](const std::string& entry) {
612       std::string entryId = entry.substr(0, entry.find_first_of(' '));
613       if (id == entryId)
614       {
615          return true;
616       }
617       else
618       {
619          // headings also match on just the id part
620          entryId = boost::regex_replace(entryId, boost::regex("^h\\d\\:"), "");
621          if (id == entryId)
622             return true;
623 
624          // we can also match after trimming off any provided suffix
625          std::string trimmedId = boost::regex_replace(id, boost::regex("-\\d$"), "");
626          if (trimmedId == entryId)
627             return true;
628       }
629 
630       return false;
631    });
632 
633    // if there is more than one item returned it could have been a suffix match,
634    // in that case winnow it down to the passed id
635    json::Array refsJson = indexJson["refs"].getArray();
636    if (refsJson.getSize() > 1)
637    {
638       for (auto refJsonValue : refsJson)
639       {
640          json::Object refJson = refJsonValue.getObject();
641          boost::format fmt("%1%:%2%%3%");
642          std::string refId = boost::str(fmt %
643                                         refJson[kType].getString() %
644                                         refJson[kId].getString() %
645                                         refJson[kSuffix].getString());
646          if (refId == id)
647          {
648             json::Array suffixRefsJson;
649             suffixRefsJson.push_back(refJson);
650             indexJson["refs"] = suffixRefsJson;
651             break;
652          }
653       }
654    }
655 
656    // return it
657    pResponse->setResult(indexJson);
658 
659    return Success();
660 }
661 
662 } // anonymous namespace
663 
664 namespace modules {
665 namespace rmarkdown {
666 namespace bookdown {
667 namespace xrefs {
668 
initialize()669 Error initialize()
670 {
671    // deferred init (build xref file index)
672    module_context::events().onDeferredInit.connect(onDeferredInit);
673 
674    // register rpc functions
675    ExecBlock initBlock;
676    initBlock.addFunctions()
677      (boost::bind(module_context::registerRpcMethod, "xref_index_for_file", xrefIndexForFile))
678      (boost::bind(module_context::registerRpcMethod, "xref_for_id", xrefForId))
679    ;
680    return initBlock.execute();
681 
682 
683 }
684 
685 } // namespace xrefs
686 } // namespace bookdown
687 } // namespace rmarkdown
688 } // namespace modules
689 
690 namespace module_context {
691 
bookdownXRefIndex()692 core::json::Value bookdownXRefIndex()
693 {
694    if (isBookdownContext())
695       return xrefIndexforProject(boost::lambda::constant(true));
696    else
697       return json::Value();
698 }
699 
700 } // namespace module_context
701 
702 } // namespace session
703 } // namespace rstudio
704