1 /*
2  * SessionSourceDatabase.cpp
3  *
4  * Copyright (C) 2021 by RStudio, PBC
5  *
6  * Unless you have received this program directly from RStudio pursuant
7  * to the terms of a commercial license agreement with RStudio, then
8  * this program is licensed to you under the terms of version 3 of the
9  * GNU Affero General Public License. This program is distributed WITHOUT
10  * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
11  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
12  * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
13  *
14  */
15 
16 #include <session/SessionSourceDatabase.hpp>
17 
18 #include <string>
19 #include <vector>
20 #include <algorithm>
21 
22 #include <boost/regex.hpp>
23 #include <boost/bind/bind.hpp>
24 #include <boost/date_time/posix_time/posix_time.hpp>
25 
26 #include <core/Log.hpp>
27 #include <core/Exec.hpp>
28 #include <shared_core/Error.hpp>
29 #include <shared_core/FilePath.hpp>
30 #include <shared_core/Hash.hpp>
31 #include <core/FileSerializer.hpp>
32 #include <core/FileUtils.hpp>
33 #include <core/RegexUtils.hpp>
34 #include <core/DateTime.hpp>
35 
36 #include <core/system/System.hpp>
37 
38 #include <core/http/Util.hpp>
39 
40 #include <r/RUtil.hpp>
41 #include <r/RSexp.hpp>
42 #include <r/RRoutines.hpp>
43 #include <r/session/RSession.hpp>
44 #include <r/RExec.hpp>
45 
46 #include <session/SessionModuleContext.hpp>
47 #include <session/projects/SessionProjects.hpp>
48 
49 #include <session/prefs/UserPrefs.hpp>
50 #include <session/prefs/Preferences.hpp>
51 
52 #include "SessionSourceDatabaseSupervisor.hpp"
53 
54 #define kContentsSuffix "-contents"
55 
56 // NOTE: if a file is deleted then its properties database entry is not
57 // deleted. this has two implications:
58 //
59 //   - storage is not reclaimed
60 //   - the properties can be "resurrected" and re-attached to another
61 //     file with the same path
62 //
63 // One way to overcome this might be to use filesystem metadata to store
64 // properties rather than a side-database
65 
66 using namespace rstudio::core;
67 using namespace boost::placeholders;
68 
69 namespace rstudio {
70 namespace session {
71 namespace source_database {
72 
73 namespace {
74 
75 // cached mapping of document id to document path (facilitates efficient path
76 // lookup)
77 std::map<std::string, std::string> s_idToPath;
78 
79 // cached mapping of document last write times
80 std::map<std::string, std::time_t> s_lastWriteTimes;
81 
82 struct PropertiesDatabase
83 {
84    FilePath path;
85    FilePath indexFile;
86    std::map<std::string,std::string> index;
87 };
88 
cacheLastWriteTime(const std::string & path,std::time_t lastWriteTime)89 void cacheLastWriteTime(const std::string& path, std::time_t lastWriteTime)
90 {
91    s_lastWriteTimes[path] = lastWriteTime;
92 }
93 
getCachedLastWriteTime(const std::string & path)94 std::time_t getCachedLastWriteTime(const std::string& path)
95 {
96    auto iter = s_lastWriteTimes.find(path);
97    if (iter != s_lastWriteTimes.end())
98       return iter->second;
99    else
100       return 0;
101 }
102 
getPropertiesDatabase(PropertiesDatabase * pDatabase)103 Error getPropertiesDatabase(PropertiesDatabase* pDatabase)
104 {
105    pDatabase->path = module_context::scopedScratchPath().completePath(kSessionSourceDatabasePrefix "/prop");
106    Error error = pDatabase->path.ensureDirectory();
107    if (error)
108       return error;
109 
110    pDatabase->indexFile = pDatabase->path.completePath("INDEX");
111 
112    if (pDatabase->indexFile.exists())
113       return readStringMapFromFile(pDatabase->indexFile, &(pDatabase->index));
114    else
115       return Success();
116 }
117 
putProperties(const std::string & path,const json::Object & properties)118 Error putProperties(const std::string& path, const json::Object& properties)
119 {
120    // url escape path (so we can use key=value persistence)
121    std::string escapedPath = http::util::urlEncode(path);
122 
123    // get properties database
124    PropertiesDatabase propertiesDB;
125    Error error = getPropertiesDatabase(&propertiesDB);
126    if (error)
127       return error;
128 
129    // use existing properties file if it exists, otherwise create new
130    bool updateIndex = false;
131    std::string propertiesFile = propertiesDB.index[escapedPath];
132    if (propertiesFile.empty())
133    {
134       FilePath propFile = file_utils::uniqueFilePath(propertiesDB.path);
135       propertiesFile = propFile.getFilename();
136       propertiesDB.index[escapedPath] = propertiesFile;
137       updateIndex = true;
138    }
139 
140    // write the file
141    FilePath propertiesFilePath = propertiesDB.path.completePath(propertiesFile);
142    error = writeStringToFile(propertiesFilePath, properties.writeFormatted());
143    if (error)
144       return error;
145 
146    // update the index if necessary
147    if (updateIndex)
148       return writeStringMapToFile(propertiesDB.indexFile, propertiesDB.index);
149    else
150       return Success();
151 }
152 
getProperties(const std::string & path,json::Object * pProperties)153 Error getProperties(const std::string& path, json::Object* pProperties)
154 {
155    // url escape path (so we can use key=value persistence)
156    std::string escapedPath = http::util::urlEncode(path);
157 
158    // get properties database
159    PropertiesDatabase propertiesDB;
160    Error error = getPropertiesDatabase(&propertiesDB);
161    if (error)
162       return error;
163 
164    // check for properties file
165    std::string propertiesFile = propertiesDB.index[escapedPath];
166    if (propertiesFile.empty())
167    {
168       // return empty object if there is none
169       *pProperties = json::Object();
170       return Success();
171    }
172 
173    // read the properties file
174    std::string contents;
175    FilePath propertiesFilePath = propertiesDB.path.completePath(propertiesFile);
176    error = readStringFromFile(propertiesFilePath, &contents,
177                               options().sourceLineEnding());
178    if (error)
179       return error;
180 
181    // parse the json
182    json::Value value;
183    if ( value.parse(contents) )
184       return systemError(boost::system::errc::bad_message, ERROR_LOCATION);
185 
186    // return it
187    if (json::isType<json::Object>(value))
188       *pProperties = value.getValue<json::Object>();
189    return Success();
190 }
191 
pathToProjectPath(const std::string & path)192 json::Value pathToProjectPath(const std::string& path)
193 {
194    // no project
195    projects::ProjectContext& projectContext = projects::projectContext();
196    if (!projectContext.hasProject())
197       return json::Value();
198 
199    // no path
200    if (path.empty())
201       return json::Value();
202 
203    // return relative path if we are within the project directory
204    FilePath filePath = module_context::resolveAliasedPath(path);
205    if (filePath.isWithin(projectContext.directory()))
206       return json::Value(filePath.getRelativePath(projectContext.directory()));
207    else
208       return json::Value();
209 }
210 
pathFromProjectPath(json::Value projPathJson)211 std::string pathFromProjectPath(json::Value projPathJson)
212 {
213    // no project
214    projects::ProjectContext& projectContext = projects::projectContext();
215    if (!projectContext.hasProject())
216       return std::string();
217 
218    // no proj path
219    std::string projPath = !projPathJson.isNull() ? projPathJson.getString() :
220                                                     std::string();
221    if (projPath.empty())
222       return std::string();
223 
224    // interpret path relative to project directory
225    FilePath filePath = projectContext.directory().completeChildPath(projPath);
226    if (filePath.exists())
227       return module_context::createAliasedPath(filePath);
228    else
229       return std::string();
230 }
231 
attemptContentsMigration(json::Object & propertiesJson,const FilePath & propertiesPath)232 Error attemptContentsMigration(json::Object& propertiesJson,
233                                const FilePath& propertiesPath)
234 {
235    // extract contents from properties (if it exists)
236    if (propertiesJson.find("contents") == propertiesJson.end())
237       return Success();
238 
239    json::Value contentsJson = propertiesJson["contents"];
240    if (!json::isType<std::string>(contentsJson))
241       return Success();
242 
243    // if the contents string is empty, bail (no need to migrate empty document;
244    // also signals that an earlier migration occurred)
245    std::string contents = contentsJson.getString();
246    if (contents.empty())
247       return Success();
248 
249    // if we already have a contents file, bail (migration already occurred)
250    FilePath contentsPath(propertiesPath.getAbsolutePath() + kContentsSuffix);
251    if (contentsPath.exists())
252       return Success();
253 
254    // write contents sidecar file
255    return writeStringToFile(contentsPath, contents);
256 }
257 
isIntendedAsReadOnly(const std::string & contents,std::vector<std::string> * pAlternatives)258 bool isIntendedAsReadOnly(const std::string& contents,
259                           std::vector<std::string>* pAlternatives)
260 {
261    boost::smatch match;
262 
263    // try stripping out an Roxygen header from a .Rd file
264    std::string header;
265    if (string_utils::extractCommentHeader(contents, "^%+\\s*", &header))
266    {
267       boost::smatch match;
268       boost::regex reRoxygen(
269                "Generated by roxygen2: do not edit by hand\n"
270                "Please edit documentation in ");
271       if (regex_utils::search(header, match, reRoxygen))
272       {
273          // found an roxygen header: parse the source files
274          std::string alternatives = header.substr(match.length());
275 
276          boost::sregex_token_iterator it(
277                   alternatives.begin(),
278                   alternatives.end(),
279                   boost::regex("\\s*,\\s*"),
280                   -1);
281          boost::sregex_token_iterator end;
282          for (; it != end; ++it)
283          {
284             pAlternatives->push_back(string_utils::trimWhitespace(*it));
285          }
286          return true;
287       }
288    }
289 
290    // otherwise, just look through the first few lines
291    std::istringstream iss(contents);
292    std::string line;
293    for (std::size_t i = 0; i < 5; i++)
294    {
295       if (!std::getline(iss, line))
296          break;
297 
298       if (line.find("do not edit by hand") != std::string::npos)
299          return true;
300    }
301 
302    return false;
303 }
304 
305 }  // anonymous namespace
306 
SourceDocument(const std::string & type)307 SourceDocument::SourceDocument(const std::string& type)
308 {
309    FilePath srcDBPath = source_database::path();
310    FilePath docPath = file_utils::uniqueFilePath(srcDBPath);
311    id_ = docPath.getFilename();
312    type_ = type;
313    setContents("");
314    dirty_ = false;
315    created_ = date_time::millisecondsSinceEpoch();
316    sourceOnSave_ = false;
317    relativeOrder_ = 0;
318    lastContentUpdate_ = static_cast<std::time_t>(date_time::millisecondsSinceEpoch());
319 }
320 
321 
getProperty(const std::string & name) const322 std::string SourceDocument::getProperty(const std::string& name) const
323 {
324    json::Object::Iterator it = properties_.find(name);
325    if (it != properties_.end())
326    {
327       json::Value valueJson = (*it).getValue();
328       if (json::isType<std::string>(valueJson))
329          return valueJson.getString();
330       else
331          return "";
332    }
333    else
334    {
335       return "";
336    }
337 }
338 
isUntitled() const339 bool SourceDocument::isUntitled() const
340 {
341    return path().empty() && !getProperty("tempName").empty();
342 }
343 
344 // set contents from string
setContents(const std::string & contents)345 void SourceDocument::setContents(const std::string& contents)
346 {
347    contents_ = contents;
348    hash_ = hash::crc32Hash(contents_);
349    lastContentUpdate_ = static_cast<std::time_t>(date_time::millisecondsSinceEpoch());
350 }
351 
352 // set contents from file
setPathAndContents(const std::string & path,bool allowSubstChars)353 Error SourceDocument::setPathAndContents(const std::string& path,
354                                          bool allowSubstChars)
355 {
356    // resolve aliased path
357    FilePath docPath = module_context::resolveAliasedPath(path);
358 
359    std::string contents;
360    Error error = module_context::readAndDecodeFile(docPath,
361                                                    encoding(),
362                                                    allowSubstChars,
363                                                    &contents);
364    if (error)
365       return error;
366 
367    // update path and contents
368    path_ = path;
369    setContents(contents);
370 
371    // cache the lastKnownWriteTime on both the document object itself and the source database
372    // if we are unable to update changes to the source database after successfully writing new contents
373    // to the actual file, we want to ensure the lastWriteTime is cached so we can fallback on it - otherwise
374    // we will likely prompt the user to reload the changed file, even though it wasn't changed
375    lastKnownWriteTime_ = docPath.getLastWriteTime();
376    cacheLastWriteTime(FilePath(path).getAbsolutePath(), lastKnownWriteTime_);
377 
378    // rewind the last content update to the file's write time
379    lastContentUpdate_ = lastKnownWriteTime_;
380 
381    return Success();
382 }
383 
contentsMatchDisk(bool * pMatches)384 Error SourceDocument::contentsMatchDisk(bool *pMatches)
385 {
386    *pMatches = false;
387    FilePath docPath = module_context::resolveAliasedPath(path());
388    if (docPath.exists() && docPath.getSize() <= (1024*1024))
389    {
390       std::string contents;
391       Error error = module_context::readAndDecodeFile(docPath,
392                                                       encoding(),
393                                                       true,
394                                                       &contents);
395       if (error)
396          return error;
397 
398       *pMatches = contents_.length() == contents.length() &&
399                   hash_ == hash::crc32Hash(contents);
400    }
401 
402    return Success();
403 }
404 
updateDirty()405 Error SourceDocument::updateDirty()
406 {
407    if (path().empty())
408    {
409       dirty_ = !contents_.empty();
410    }
411    else if (dirty_)
412    {
413       // This doesn't actually guarantee that dirty state is correct. All
414       // it does, at the most, is take a dirty document and mark it clean
415       // if the contents are the same as on disk. This is important because
416       // the client now has logic to detect when undo/redo causes a document
417       // to be reverted to its previous state (i.e. a dirty document can
418       // become clean through undo/redo), but that state doesn't get sent
419       // back to the server.
420 
421       // We don't make a clean document dirty here, even if the contents
422       // on disk are different, because we will do that on the client side
423       // and the UI logic is a little complicated.
424 
425       bool matches = false;
426       Error error = contentsMatchDisk(&matches);
427       if (error)
428          return error;
429       if (matches)
430          dirty_ = false;
431    }
432    return Success();
433 }
434 
editProperties(json::Object & properties)435 void SourceDocument::editProperties(json::Object& properties)
436 {
437    std::for_each(properties.begin(),
438                  properties.end(),
439                  boost::bind(&SourceDocument::editProperty, this, _1));
440 }
441 
checkForExternalEdit(std::time_t * pTime)442 void SourceDocument::checkForExternalEdit(std::time_t* pTime)
443 {
444    *pTime = 0;
445 
446    if (path_.empty())
447       return;
448 
449    if (lastKnownWriteTime_ == 0)
450       return;
451 
452    core::FilePath filePath = module_context::resolveAliasedPath(path_);
453    if (!filePath.exists())
454       return;
455 
456    std::time_t newTime = filePath.getLastWriteTime();
457    if (newTime != lastKnownWriteTime_)
458       *pTime = newTime;
459 }
460 
updateLastKnownWriteTime()461 void SourceDocument::updateLastKnownWriteTime()
462 {
463    lastKnownWriteTime_ = 0;
464    if (path_.empty())
465       return;
466 
467    core::FilePath filePath = module_context::resolveAliasedPath(path_);
468    if (!filePath.exists())
469       return;
470 
471    lastKnownWriteTime_ = filePath.getLastWriteTime();
472 }
473 
setLastKnownWriteTime(std::time_t time)474 void SourceDocument::setLastKnownWriteTime(std::time_t time)
475 {
476    lastKnownWriteTime_ = time;
477 }
478 
readFromJson(json::Object * pDocJson)479 Error SourceDocument::readFromJson(json::Object* pDocJson)
480 {
481    // NOTE: since this class is the one who presumably persisted the
482    // json values in the first place we don't do "checked" access to
483    // the json data elements. if the persistence format differs from
484    // what we expect things will blow up. therefore if we change the
485    // persistence format we need to make sure this code is robust
486    // in the presence of the old format
487    try
488    {
489       json::Object& docJson = *pDocJson;
490 
491       id_ = docJson["id"].getString();
492       json::Value path = docJson["path"];
493       path_ = !path.isNull() ? path.getString() : std::string();
494 
495       // if we have a project_path field then it supercedes the path field
496       // (since it would correctly survive a moved project folder)
497       std::string projPath = pathFromProjectPath(docJson["project_path"]);
498       if (!projPath.empty())
499          path_ = projPath;
500 
501       json::Value type = docJson["type"];
502       type_ = !type.isNull() ? type.getString() : std::string();
503 
504       setContents(docJson["contents"].getString());
505       dirty_ = docJson["dirty"].getBool();
506       created_ = docJson["created"].getDouble();
507       sourceOnSave_ = docJson["source_on_save"].getBool();
508 
509       // read safely (migration)
510       json::Value properties = docJson["properties"];
511       properties_ = !properties.isNull() ? properties.getValue<json::Object>() : json::Object();
512 
513       // it's possible that we could have failed to update the source database with the correct lastKnownWriteTime
514       // but the actual file has been updated more recently - in that case, take the newer version that we have cached
515       json::Value lastKnownWriteTime = docJson["lastKnownWriteTime"];
516       if (!lastKnownWriteTime.isNull())
517       {
518          int64_t val = lastKnownWriteTime.getInt64();
519          std::time_t cachedWriteTime = getCachedLastWriteTime(FilePath(path_).getAbsolutePath());
520          lastKnownWriteTime_ = cachedWriteTime > val ? cachedWriteTime : val;
521       }
522       else
523          lastKnownWriteTime_ = 0;
524 
525       json::Value encoding = docJson["encoding"];
526       encoding_ = !encoding.isNull() ? encoding.getString() : std::string();
527 
528       json::Value folds = docJson["folds"];
529       folds_ = !folds.isNull() ? folds.getString() : std::string();
530 
531       json::Value order = docJson["relative_order"];
532       relativeOrder_ = !order.isNull() ? order.getInt() : 0;
533 
534       json::Value lastContentUpdate = docJson["last_content_update"];
535       lastContentUpdate_ = !lastContentUpdate.isNull() ?
536                                lastContentUpdate.getInt64() : 0;
537 
538       json::Value collabServer = docJson["collab_server"];
539       collabServer_ = !collabServer.isNull() ? collabServer.getString() :
540                                                 std::string();
541 
542       json::Value sourceWindow = docJson["source_window"];
543       sourceWindow_ = !sourceWindow.isNull() ? sourceWindow.getString() :
544                                                 std::string();
545 
546       return Success();
547    }
548    catch(const std::exception& e)
549    {
550       return systemError(boost::system::errc::protocol_error,
551                          e.what(),
552                          ERROR_LOCATION);
553    }
554 }
555 
writeToJson(json::Object * pDocJson,bool includeContents) const556 void SourceDocument::writeToJson(json::Object* pDocJson, bool includeContents) const
557 {
558    json::Object& jsonDoc = *pDocJson;
559    jsonDoc["id"] = id();
560    jsonDoc["path"] = !path().empty() ? json::Value(path_) : json::Value();
561    jsonDoc["project_path"] = pathToProjectPath(path_);
562    jsonDoc["type"] = !type().empty() ? json::Value(type_) : json::Value();
563    jsonDoc["hash"] = hash();
564    jsonDoc["contents"] = includeContents ? contents() : std::string();
565    jsonDoc["dirty"] = dirty();
566    jsonDoc["created"] = created();
567    jsonDoc["source_on_save"] = sourceOnSave();
568    jsonDoc["relative_order"] = relativeOrder();
569    jsonDoc["properties"] = properties();
570    jsonDoc["folds"] = folds();
571    jsonDoc["lastKnownWriteTime"] = json::Value(
572          static_cast<boost::int64_t>(lastKnownWriteTime_));
573    jsonDoc["encoding"] = encoding_;
574    jsonDoc["collab_server"] = collabServer();
575    jsonDoc["source_window"] = sourceWindow_;
576    jsonDoc["last_content_update"] = json::Value(
577          static_cast<boost::int64_t>(lastContentUpdate_));
578 
579    std::vector<std::string> alternatives;
580    jsonDoc["read_only"] = isIntendedAsReadOnly(contents(), &alternatives);
581    jsonDoc["read_only_alternatives"] = json::toJsonArray(alternatives);
582 }
583 
toRObject(r::sexp::Protect * pProtect,bool includeContents) const584 SEXP SourceDocument::toRObject(r::sexp::Protect* pProtect, bool includeContents) const
585 {
586    json::Object object;
587    writeToJson(&object, includeContents);
588    return r::sexp::create(object, pProtect);
589 }
590 
writeToFile(const FilePath & filePath,bool writeContents,bool retryRewrite) const591 Error SourceDocument::writeToFile(const FilePath& filePath, bool writeContents, bool retryRewrite) const
592 {
593    // NOTE: in a previous implementation, the document properties and
594    // document contents were encoded together in the same file -- we
595    // now use the original file as the properties file (for backwards
596    // compatibility), and write the contents to '<id>-contents'. this
597    // allows newer versions of RStudio to remain backwards-compatible
598    // with older formats for the source database
599 
600    int saveTimeout = retryRewrite ? session::prefs::userPrefs().saveRetryTimeout() : 0;
601 
602    // write contents to file
603    if (writeContents)
604    {
605       FilePath contentsPath(filePath.getAbsolutePath() + kContentsSuffix);
606       Error error = writeStringToFile(contentsPath,
607                                       contents_,
608                                       string_utils::LineEndingPassthrough,
609                                       true,
610                                       saveTimeout);
611       if (error)
612          return error;
613    }
614 
615    // get document properties as json
616    json::Object jsonProperties;
617    writeToJson(&jsonProperties, false);
618 
619    // write properties to file
620    Error error = writeStringToFile(filePath,
621                                    jsonProperties.writeFormatted(),
622                                    string_utils::LineEndingPassthrough,
623                                    true,
624                                    saveTimeout);
625    return error;
626 }
627 
editProperty(const json::Object::Member & property)628 void SourceDocument::editProperty(const json::Object::Member& property)
629 {
630    if (property.getValue().isNull())
631    {
632       properties_.erase(property.getName());
633    }
634    else
635    {
636       properties_.insert(property);
637    }
638 }
639 
sortByCreated(const boost::shared_ptr<SourceDocument> & pDoc1,const boost::shared_ptr<SourceDocument> & pDoc2)640 bool sortByCreated(const boost::shared_ptr<SourceDocument>& pDoc1,
641                    const boost::shared_ptr<SourceDocument>& pDoc2)
642 {
643    return pDoc1->created() < pDoc2->created();
644 }
645 
sortByRelativeOrder(const boost::shared_ptr<SourceDocument> & pDoc1,const boost::shared_ptr<SourceDocument> & pDoc2)646 bool sortByRelativeOrder(const boost::shared_ptr<SourceDocument>& pDoc1,
647                          const boost::shared_ptr<SourceDocument>& pDoc2)
648 {
649    // if both documents are unordered, sort by creation time
650    if (pDoc1->relativeOrder() == 0 && pDoc2->relativeOrder() == 0)
651    {
652       return sortByCreated(pDoc1, pDoc2);
653    }
654    // unordered documents go at the end
655    if (pDoc1->relativeOrder() == 0)
656    {
657       return false;
658    }
659    return pDoc1->relativeOrder() < pDoc2->relativeOrder();
660 }
661 
path()662 FilePath path()
663 {
664    return supervisor::sessionDirPath();
665 }
666 
get(const std::string & id,boost::shared_ptr<SourceDocument> pDoc)667 Error get(const std::string& id, boost::shared_ptr<SourceDocument> pDoc)
668 {
669    return get(id, true, pDoc);
670 }
671 
get(const std::string & id,bool includeContents,boost::shared_ptr<SourceDocument> pDoc)672 Error get(const std::string& id, bool includeContents, boost::shared_ptr<SourceDocument> pDoc)
673 {
674    FilePath propertiesPath = source_database::path().completePath(id);
675 
676    // attempt to read file contents from sidecar file if available
677    std::string contents;
678    if (includeContents)
679    {
680       FilePath contentsPath(propertiesPath.getAbsolutePath() + kContentsSuffix);
681       if (contentsPath.exists())
682       {
683          Error error = readStringFromFile(contentsPath,
684                                           &contents,
685                                           options().sourceLineEnding());
686          if (error)
687             LOG_ERROR(error);
688       }
689    }
690 
691    if (propertiesPath.exists())
692    {
693       // read the contents of the file
694       std::string properties;
695       Error error = readStringFromFile(propertiesPath,
696                                        &properties,
697                                        options().sourceLineEnding());
698       if (error)
699          return error;
700 
701       // parse the json
702       json::Value value;
703       if (value.parse(properties))
704       {
705          return systemError(boost::system::errc::invalid_argument,
706                             ERROR_LOCATION);
707       }
708 
709       // initialize doc from json
710       json::Object jsonDoc = value.getObject();
711 
712       // migration: if we have a 'contents' field, but no '-contents' side-car
713       // file, perform a one-time generation of that sidecar file from contents
714       error = attemptContentsMigration(jsonDoc, propertiesPath);
715       if (error)
716          LOG_ERROR(error);
717 
718       if (includeContents && !contents.empty())
719          jsonDoc["contents"] = contents;
720 
721       if (jsonDoc.find("contents") == jsonDoc.end())
722          jsonDoc["contents"] = std::string();
723 
724       return pDoc->readFromJson(&jsonDoc);
725    }
726    else
727    {
728       return systemError(boost::system::errc::no_such_file_or_directory,
729                          ERROR_LOCATION);
730    }
731 }
732 
getDurableProperties(const std::string & path,json::Object * pProperties)733 Error getDurableProperties(const std::string& path, json::Object* pProperties)
734 {
735    return getProperties(path, pProperties);
736 }
737 
isSourceDocument(const FilePath & filePath)738 bool isSourceDocument(const FilePath& filePath)
739 {
740    if (filePath.isDirectory())
741       return false;
742 
743    std::string filename = filePath.getFilename();
744    if (filename == ".DS_Store" ||
745        filename == "lock_file" ||
746        filename == "suspend_file" ||
747        filename == "restart_file" ||
748        boost::algorithm::ends_with(filename, kContentsSuffix))
749    {
750       return false;
751    }
752 
753    return true;
754 }
755 
logUnsafeSourceDocument(const FilePath & filePath,const std::string & reason)756 void logUnsafeSourceDocument(const FilePath& filePath,
757                              const std::string& reason)
758 {
759    std::string msg = "Excluded unsafe source document";
760    if (!filePath.isEmpty())
761       msg += " (" + filePath.getAbsolutePath() + ")";
762    msg += ": " + reason;
763    LOG_WARNING_MESSAGE(msg);
764 }
765 
hasNullByteSequence(const std::string & contents)766 bool hasNullByteSequence(const std::string& contents)
767 {
768    std::string nullBytes;
769    nullBytes.push_back('\0');
770    nullBytes.push_back('\0');
771    return boost::algorithm::contains(contents, nullBytes);
772 }
773 
isSafeSourceDocument(const FilePath & docDbPath,boost::shared_ptr<SourceDocument> pDoc)774 bool isSafeSourceDocument(const FilePath& docDbPath,
775                           boost::shared_ptr<SourceDocument> pDoc)
776 {
777    // get a filepath and use it for filtering if we can
778    FilePath filePath;
779    if (!pDoc->path().empty())
780    {
781       filePath = FilePath(pDoc->path());
782       if (filePath.getExtensionLowerCase() == ".rdata")
783       {
784          logUnsafeSourceDocument(filePath, ".RData file");
785          return false;
786       }
787    }
788 
789    // get the size of the file in KB
790    uintmax_t docSizeKb = docDbPath.getSize() / 1024;
791    std::string kbStr = safe_convert::numberToString(docSizeKb);
792 
793    // if it's larger than 5MB then always drop it (that's the limit
794    // enforced by the editor)
795    if (docSizeKb > (5 * 1024))
796    {
797       logUnsafeSourceDocument(filePath, "File too large (" + kbStr + ")");
798       return false;
799    }
800 
801    // if it's larger then 2MB and not dirty then drop it as well
802    // (that's the file size considered "large" on the client)
803    else if (!pDoc->dirty() && (docSizeKb > (2 * 1024)))
804    {
805       logUnsafeSourceDocument(filePath, "File too large (" + kbStr + ")");
806       return false;
807    }
808 
809    // if it has a sequence of 2 null bytes then drop it
810    else if (hasNullByteSequence(pDoc->contents()))
811    {
812       logUnsafeSourceDocument(filePath,
813                               "File is binary (has null byte sequence)");
814       return false;
815    }
816 
817    else
818    {
819       return true;
820    }
821 }
822 
823 
list(std::vector<boost::shared_ptr<SourceDocument>> * pDocs)824 Error list(std::vector<boost::shared_ptr<SourceDocument> >* pDocs)
825 {
826    std::vector<FilePath> files;
827    Error error = source_database::path().getChildren(files);
828    if (error)
829       return error;
830 
831    for (FilePath& filePath : files)
832    {
833       if (isSourceDocument(filePath))
834       {
835          // get the source doc
836          boost::shared_ptr<SourceDocument> pDoc(new SourceDocument());
837          Error error = source_database::get(filePath.getFilename(), pDoc);
838          if (!error)
839          {
840             // safety filter
841             if (isSafeSourceDocument(filePath, pDoc))
842                pDocs->push_back(pDoc);
843          }
844          else
845             LOG_ERROR(error);
846       }
847    }
848 
849    return Success();
850 }
851 
list(std::vector<FilePath> * pPaths)852 Error list(std::vector<FilePath>* pPaths)
853 {
854    // list children
855    std::vector<FilePath> children;
856    Error error = source_database::path().getChildren(children);
857    if (error)
858       return error;
859 
860    // filter to actual source documents
861    core::algorithm::copy_if(
862             children.begin(),
863             children.end(),
864             std::back_inserter(*pPaths),
865             isSourceDocument);
866 
867    return Success();
868 }
869 
put(boost::shared_ptr<SourceDocument> pDoc,bool writeContents,bool retryRewrite)870 Error put(boost::shared_ptr<SourceDocument> pDoc, bool writeContents, bool retryRewrite)
871 {
872    // write to file
873    FilePath filePath = source_database::path().completePath(pDoc->id());
874    Error error = pDoc->writeToFile(filePath, writeContents, retryRewrite);
875    if (error)
876       return error;
877 
878    // write properties to durable storage (if there is a path)
879    if (!pDoc->path().empty())
880    {
881       error = putProperties(pDoc->path(), pDoc->properties());
882       if (error)
883          LOG_ERROR(error);
884    }
885 
886    return Success();
887 }
888 
remove(const std::string & id)889 Error remove(const std::string& id)
890 {
891    return source_database::path().completePath(id).removeIfExists();
892 }
893 
removeAll()894 Error removeAll()
895 {
896    std::vector<FilePath> files;
897    Error error = source_database::path().getChildren(files);
898    if (error)
899       return error;
900 
901    for (FilePath& filePath : files)
902    {
903       Error error = filePath.remove();
904       if (error)
905          return error;
906    }
907 
908    return Success();
909 }
910 
getPath(const std::string & id,std::string * pPath)911 Error getPath(const std::string& id, std::string* pPath)
912 {
913    std::map<std::string, std::string>::iterator it = s_idToPath.find(id);
914    if (it == s_idToPath.end())
915    {
916       return systemError(boost::system::errc::no_such_file_or_directory,
917                          ERROR_LOCATION);
918    }
919    *pPath = it->second;
920    return Success();
921 }
922 
getPath(const std::string & id,core::FilePath * pPath)923 Error getPath(const std::string& id, core::FilePath* pPath)
924 {
925    std::string path;
926    Error error = getPath(id, &path);
927    if (error)
928       return error;
929    *pPath = module_context::resolveAliasedPath(path);
930    return Success();
931 }
932 
getId(const std::string & path,std::string * pId)933 Error getId(const std::string& path, std::string* pId)
934 {
935    for (std::map<std::string, std::string>::iterator it = s_idToPath.begin();
936         it != s_idToPath.end();
937         it++)
938    {
939       if (it->second == path)
940       {
941          *pId = it->first;
942          return Success();
943       }
944    }
945    return systemError(boost::system::errc::no_such_file_or_directory,
946                       ERROR_LOCATION);
947 }
948 
getId(const FilePath & path,std::string * pId)949 Error getId(const FilePath& path, std::string* pId)
950 {
951    return getId(module_context::createAliasedPath(FileInfo(path)), pId);
952 }
953 
rename(const FilePath & from,const FilePath & to)954 Error rename(const FilePath& from, const FilePath& to)
955 {
956    // ensure the destination exists
957    if (!to.exists())
958       return Success();
959 
960    // ensure the file is in the source database
961    std::string id;
962    Error error = getId(from, &id);
963    if (error)
964    {
965       // rename of a file not in the sdb is a no-op
966       return Success();
967    }
968 
969    // find the file in the sdb and update it with the new path
970    boost::shared_ptr<SourceDocument> pDoc(new SourceDocument());
971    error = source_database::get(id, pDoc);
972    if (error)
973       return error;
974    error = pDoc->setPathAndContents(
975          module_context::createAliasedPath(FileInfo(to)));
976    if (error)
977       return error;
978    error = source_database::put(pDoc);
979    if (error)
980       return error;
981 
982    // success! fire event for other modules to pick up
983    events().onDocRenamed(
984          module_context::createAliasedPath(FileInfo(from)), pDoc);
985 
986    return error;
987 }
988 
detectExtendedType(const core::FilePath & filePath,std::string * pExtendedType)989 core::Error detectExtendedType(const core::FilePath& filePath, std::string* pExtendedType)
990 {
991    std::string id;
992    Error error = source_database::getId(filePath, &id);
993    if (error)
994       return error;
995 
996    boost::shared_ptr<SourceDocument> pDoc(new SourceDocument);
997    error = source_database::get(id, pDoc);
998    if (error)
999       return error;
1000 
1001    *pExtendedType = module_context::events().onDetectSourceExtendedType(pDoc);
1002    return Success();
1003 }
1004 
1005 namespace {
1006 
onQuit()1007 void onQuit()
1008 {
1009    Error error = supervisor::saveMostRecentDocuments();
1010    if (error)
1011       LOG_ERROR(error);
1012 
1013    error = supervisor::detachFromSourceDatabase();
1014    if (error)
1015       LOG_ERROR(error);
1016 }
1017 
onSuspend(const r::session::RSuspendOptions & options,core::Settings *)1018 void onSuspend(const r::session::RSuspendOptions& options, core::Settings*)
1019 {
1020    supervisor::suspendSourceDatabase(options.status);
1021 }
1022 
onResume(const Settings &)1023 void onResume(const Settings&)
1024 {
1025    supervisor::resumeSourceDatabase();
1026 }
1027 
onDocUpdated(boost::shared_ptr<SourceDocument> pDoc)1028 void onDocUpdated(boost::shared_ptr<SourceDocument> pDoc)
1029 {
1030    s_idToPath[pDoc->id()] = pDoc->path();
1031 }
1032 
onDocRemoved(const std::string & id,const std::string & path)1033 void onDocRemoved(const std::string& id, const std::string& path)
1034 {
1035    std::map<std::string, std::string>::iterator it = s_idToPath.find(id);
1036    if (it != s_idToPath.end())
1037       s_idToPath.erase(it);
1038 }
1039 
onDocRenamed(const std::string &,boost::shared_ptr<SourceDocument> pDoc)1040 void onDocRenamed(const std::string &,
1041                   boost::shared_ptr<SourceDocument> pDoc)
1042 {
1043    s_idToPath[pDoc->id()] = pDoc->path();
1044 }
1045 
onRemoveAll()1046 void onRemoveAll()
1047 {
1048    s_idToPath.clear();
1049 }
1050 
rs_getDocumentProperties(SEXP pathSEXP,SEXP includeContentsSEXP)1051 SEXP rs_getDocumentProperties(SEXP pathSEXP, SEXP includeContentsSEXP)
1052 {
1053    if (!r::exec::isMainThread())
1054    {
1055       LOG_ERROR_MESSAGE("rs_getDocumentProperties called from non main thread");
1056       return R_NilValue;
1057    }
1058    Error error;
1059    FilePath path = module_context::resolveAliasedPath(r::sexp::safeAsString(pathSEXP));
1060    bool includeContents = r::sexp::asLogical(includeContentsSEXP);
1061 
1062    std::string id;
1063    error = source_database::getId(path, &id);
1064    if (error)
1065    {
1066       return R_NilValue;
1067    }
1068 
1069    boost::shared_ptr<SourceDocument> pDoc(new SourceDocument);
1070    error = source_database::get(id, pDoc);
1071    if (error)
1072    {
1073       LOG_ERROR(error);
1074       return R_NilValue;
1075    }
1076 
1077    r::sexp::Protect protect;
1078    SEXP object = pDoc->toRObject(&protect, includeContents);
1079    return object;
1080 }
1081 
rs_detectExtendedType(SEXP pathSEXP)1082 SEXP rs_detectExtendedType(SEXP pathSEXP)
1083 {
1084    FilePath path = module_context::resolveAliasedPath(r::sexp::safeAsString(pathSEXP));
1085 
1086    std::string extendedType;
1087    Error error = source_database::detectExtendedType(path, &extendedType);
1088    if (error)
1089       return R_NilValue;
1090 
1091    r::sexp::Protect protect;
1092    return r::sexp::create(extendedType, &protect);
1093 }
1094 
1095 
1096 } // anonymous namespace
1097 
events()1098 Events& events()
1099 {
1100    static Events instance;
1101    return instance;
1102 }
1103 
initialize()1104 Error initialize()
1105 {
1106    // provision a source database directory
1107    Error error = supervisor::attachToSourceDatabase();
1108    if (error)
1109       return error;
1110 
1111    RS_REGISTER_CALL_METHOD(rs_getDocumentProperties, 2);
1112    RS_REGISTER_CALL_METHOD(rs_detectExtendedType, 1);
1113 
1114    events().onDocUpdated.connect(onDocUpdated);
1115    events().onDocRemoved.connect(onDocRemoved);
1116    events().onDocRenamed.connect(onDocRenamed);
1117    events().onRemoveAll.connect(onRemoveAll);
1118 
1119    // signup for session end/suspend events
1120    module_context::events().onQuit.connect(onQuit);
1121    module_context::addSuspendHandler(
1122          module_context::SuspendHandler(onSuspend, onResume));
1123 
1124    return Success();
1125 }
1126 
1127 } // namespace source_database
1128 } // namespace session
1129 } // namespace rstudio
1130 
1131