1 /*
2  * ZoteroCollections.cpp
3  *
4  * Copyright (C) 2009-20 by RStudio, Inc.
5  *
6  * Unless you have received this program directly from RStudio pursuant
7  * to the terms of a commercial license agreement with RStudio, then
8  * this program is licensed to you under the terms of version 3 of the
9  * GNU Affero General Public License. This program is distributed WITHOUT
10  * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
11  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
12  * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
13  *
14  */
15 
16 #include "ZoteroCollections.hpp"
17 
18 #include <shared_core/Error.hpp>
19 #include <shared_core/json/Json.hpp>
20 
21 #include <shared_core/Hash.hpp>
22 
23 #include <core/FileSerializer.hpp>
24 
25 #include <session/prefs/UserState.hpp>
26 #include <session/prefs/UserPrefs.hpp>
27 #include <session/SessionModuleContext.hpp>
28 #include <session/projects/SessionProjects.hpp>
29 #include <session/SessionAsyncDownloadFile.hpp>
30 
31 #include "ZoteroCollectionsLocal.hpp"
32 #include "ZoteroCollectionsWeb.hpp"
33 #include "ZoteroUtil.hpp"
34 
35 using namespace rstudio::core;
36 
37 namespace rstudio {
38 namespace session {
39 namespace modules {
40 namespace zotero {
41 namespace collections {
42 
43 namespace {
44 
45 const char * const kIndexFile = "INDEX";
46 const char * const kFile = "file";
47 
collectionsCacheDir(const std::string & type,const std::string & context)48 FilePath collectionsCacheDir(const std::string& type, const std::string& context)
49 {
50    // cache dir name (depends on whether bbt is enabled as when that changes it should invalidate all cache entries)
51    std::string dirName = "libraries-cache";
52    if (session::prefs::userState().zoteroUseBetterBibtex())
53       dirName += "-bbt";
54 
55    // ~/.local/share/rstudio/zotero/libraries
56    FilePath cachePath = module_context::userScratchPath()
57       .completeChildPath("zotero")
58       .completeChildPath(dirName)
59       .completeChildPath(type)
60       .completeChildPath(context);
61    Error error = cachePath.ensureDirectory();
62    if (error)
63       LOG_ERROR(error);
64    return cachePath;
65 }
66 
67 struct IndexedCollection
68 {
emptyrstudio::session::modules::zotero::collections::__anon7fd725750111::IndexedCollection69    bool empty() const { return file.empty(); }
70    int version;
71    std::string file;
72    std::string key;
73    std::string parentKey;
74 };
75 
collectionsCacheIndex(const FilePath & cacheDir)76 std::map<std::string,IndexedCollection> collectionsCacheIndex(const FilePath& cacheDir)
77 {
78    std::map<std::string,IndexedCollection> index;
79 
80    FilePath indexFile = cacheDir.completeChildPath(kIndexFile);
81    if (indexFile.exists())
82    {
83       std::string indexContents;
84       Error error = core::readStringFromFile(indexFile, &indexContents);
85       if (!error)
86       {
87          json::Object indexJson;
88          error = indexJson.parse(indexContents);
89          if (!error)
90          {
91             std::for_each(indexJson.begin(), indexJson.end(), [&index](json::Object::Member member) {
92 
93                json::Object entryJson = member.getValue().getObject();
94                IndexedCollection coll;
95                coll.version = entryJson[kVersion].getInt();
96                coll.file = entryJson[kFile].getString();
97                coll.key = entryJson[kKey].getString();
98                coll.parentKey = entryJson[kParentKey].getString();
99                index.insert(std::make_pair(member.getName(),coll));
100             });
101          }
102       }
103 
104       if (error)
105          LOG_ERROR(error);
106    }
107 
108    return index;
109 }
110 
updateCollectionsCacheIndex(const FilePath & cacheDir,const std::map<std::string,IndexedCollection> & index)111 void updateCollectionsCacheIndex(const FilePath& cacheDir, const std::map<std::string,IndexedCollection>& index)
112 {
113    // create json for index
114    json::Object indexJson;
115    for (auto item : index)
116    {
117       json::Object collJson;
118       collJson[kVersion] = item.second.version;
119       collJson[kFile] = item.second.file;
120       collJson[kKey] = item.second.key;
121       collJson[kParentKey] = item.second.parentKey;
122       indexJson[item.first] = collJson;
123    }
124 
125    // write index
126    FilePath indexFile = cacheDir.completeChildPath(kIndexFile);
127    Error error = core::writeStringToFile(indexFile, indexJson.writeFormatted());
128    if (error)
129       LOG_ERROR(error);
130 }
131 
readCollection(const FilePath & filePath,ZoteroCollection * pCollection)132 Error readCollection(const FilePath& filePath, ZoteroCollection* pCollection)
133 {
134    std::string cacheContents;
135    Error error = core::readStringFromFile(filePath, &cacheContents);
136    if (error)
137       return error;
138 
139    json::Object collectionJson;
140    error = collectionJson.parse(cacheContents);
141    if (error)
142       return error;
143 
144    pCollection->name = collectionJson[kName].getString();
145    pCollection->version = collectionJson[kVersion].getInt();
146    pCollection->key = collectionJson[kKey].getString();
147    pCollection->parentKey = collectionJson[kParentKey].getString();
148    pCollection->items = collectionJson[kItems].getArray();
149 
150    return Success();
151 }
152 
153 
cachedCollection(const std::string & type,const std::string & context,const std::string & name)154 ZoteroCollection cachedCollection(const std::string& type, const std::string& context, const std::string& name)
155 {
156    ZoteroCollection collection;
157    FilePath cacheDir = collectionsCacheDir(type, context);
158    auto index = collectionsCacheIndex(cacheDir);
159    auto coll = index[name];
160    if (!coll.empty())
161    {
162       FilePath cachePath = cacheDir.completeChildPath(coll.file);
163       Error error = readCollection(cachePath, &collection);
164       if (error)
165          LOG_ERROR(error);
166    }
167    return collection;
168 }
169 
cachedCollectionSpec(const std::string & type,const std::string & context,const std::string & name)170 ZoteroCollectionSpec cachedCollectionSpec(const std::string& type, const std::string& context, const std::string& name)
171 {
172    ZoteroCollectionSpec spec;
173    FilePath cacheDir = collectionsCacheDir(type, context);
174    auto index = collectionsCacheIndex(cacheDir);
175    auto coll = index[name];
176    if (!coll.empty())
177    {
178       spec.name = name;
179       spec.version = coll.version;
180    }
181    return spec;
182 }
183 
cachedCollectionsSpecs(const std::string & type,const std::string & context)184 ZoteroCollectionSpecs cachedCollectionsSpecs(const std::string& type, const std::string& context)
185 {
186    ZoteroCollectionSpecs specs;
187    FilePath cacheDir = collectionsCacheDir(type, context);
188    auto index = collectionsCacheIndex(cacheDir);
189    for (auto entry : index)
190    {
191       ZoteroCollectionSpec spec(entry.first, entry.second.key, entry.second.parentKey, entry.second.version);
192       specs.push_back(spec);
193    }
194    return specs;
195 }
196 
updateCachedCollection(const std::string & type,const std::string & context,const std::string & name,const ZoteroCollection & collection)197 void updateCachedCollection(const std::string& type, const std::string& context, const std::string& name, const ZoteroCollection& collection)
198 {
199    // update index
200    FilePath cacheDir = collectionsCacheDir(type, context);
201    auto index = collectionsCacheIndex(cacheDir);
202    auto coll = index[name];
203    if (coll.empty())
204       coll.file = core::system::generateShortenedUuid();
205    coll.version = collection.version;
206    index[name] = coll;
207    updateCollectionsCacheIndex(cacheDir, index);
208 
209    TRACE("Writing items", collection.items.getSize());
210 
211    // write the collection
212    json::Object collectionJson;
213    collectionJson[kName] = collection.name;
214    collectionJson[kVersion] = collection.version;
215    collectionJson[kKey] = collection.key;
216    collectionJson[kParentKey] = collection.parentKey;
217    collectionJson[kItems] = collection.items;
218    Error error = core::writeStringToFile(cacheDir.completeChildPath(coll.file), collectionJson.writeFormatted());
219    if (error)
220       LOG_ERROR(error);
221 }
222 
223 
224 // repsond with either a collection from the server cache or just name/version if the client
225 // already has the same version
responseFromServerCache(const std::string & type,const std::string & apiKey,const std::string & collection,const ZoteroCollectionSpecs & clientCacheSpecs)226 ZoteroCollection responseFromServerCache(const std::string& type,
227                                          const std::string& apiKey,
228                                          const std::string& collection,
229                                          const ZoteroCollectionSpecs& clientCacheSpecs)
230 {
231    ZoteroCollection cached = cachedCollection(type, apiKey, collection);
232    if (!cached.empty() )
233    {
234       // see if the client specs already indicate an up to date version
235       ZoteroCollectionSpecs::const_iterator clientIt = std::find_if(clientCacheSpecs.begin(), clientCacheSpecs.end(), [cached](ZoteroCollectionSpec spec) {
236          // If the version is 0 this is a local instance that isn't incrementing version numbers, do not cache
237          return spec.name == cached.name && spec.version == cached.version && spec.version != 0;
238       });
239       if (clientIt == clientCacheSpecs.end())
240       {
241          // client spec didn't match, return cached collection
242          TRACE("Returning server cache for " + collection, cached.items.getSize());
243          return cached;
244       }
245       else
246       {
247          // client had up to date version, just return the spec w/ no items
248          TRACE("Using client cache for " + collection);
249          return ZoteroCollection(*clientIt);
250       }
251    }
252    else
253    {
254       return ZoteroCollection();
255    }
256 
257 }
258 
259 struct Connection
260 {
emptyrstudio::session::modules::zotero::collections::__anon7fd725750111::Connection261    bool empty() const { return type.length() == 0; }
262    std::string type;
263    std::string context;
264    std::string cacheContext;
265    ZoteroCollectionSource source;
266 };
267 
zoteroConnection()268 Connection zoteroConnection()
269 {
270    // use local connection if available for 'auto'
271    std::string type = prefs::userState().zoteroConnectionType();
272    if ((type.empty() || type == kZoteroConnectionTypeAuto) && localZoteroAvailable())
273        type = kZoteroConnectionTypeLocal;
274 
275    // return empty connection if it's none or auto (as auto would have already been resolved)
276    if (type == kZoteroConnectionTypeAuto || type == kZoteroConnectionTypeNone)
277    {
278       return Connection();
279    }
280 
281    // initialize context
282    std::string context;
283    if (type == kZoteroConnectionTypeLocal)
284    {
285       FilePath localDataDir = zoteroDataDirectory();
286       if (!localDataDir.isEmpty())
287       {
288          if (localDataDir.exists())
289             context = localDataDir.getAbsolutePath();
290          else
291             LOG_ERROR(core::fileNotFoundError(localDataDir, ERROR_LOCATION));
292       }
293    }
294    else
295    {
296       context = prefs::userState().zoteroApiKey();
297    }
298 
299    // if we have a context then proceed to fill out the connection, otherwise
300    // just return an empty connection. we wouldn't have a context if we were
301    // configured for a local connection (the default) but there was no zotero
302    // data directory. we also woudln't have a context if we were configured
303    // for a web connection and there was no zotero API key
304    if (!context.empty())
305    {
306       Connection connection;
307       connection.type = type;
308       connection.context = context;
309       // use a hash of the context for the cacheContext (as it might not be a valid directory name)
310       connection.cacheContext = core::hash::crc32HexHash(context);
311       connection.source = type == kZoteroConnectionTypeLocal ? collections::localCollections() : collections::webCollections();
312       return connection;
313    }
314    else
315    {
316       return Connection();
317    }
318 }
319 
320 
321 } // end anonymous namespace
322 
323 const char * const kName = "name";
324 const char * const kVersion = "version";
325 const char * const kKey = "key";
326 const char * const kParentKey = "parentKey";
327 const char * const kItems = "items";
328 
329 const int kNoVersion = -1;
330 
findParentSpec(const ZoteroCollectionSpec & spec,const ZoteroCollectionSpecs & specs)331 ZoteroCollectionSpec findParentSpec(const ZoteroCollectionSpec& spec, const ZoteroCollectionSpecs& specs)
332 {
333    // search for parentKey if we have one
334    if (!spec.parentKey.empty())
335    {
336       auto it = std::find_if(specs.begin(), specs.end(), [spec](const ZoteroCollectionSpec& s) { return s.key == spec.parentKey; });
337       if (it != specs.end())
338          return *it;
339    }
340 
341    // not found
342    return ZoteroCollectionSpec();
343 }
344 
345 
getCollectionSpecs(std::vector<std::string> collections,ZoteroCollectionSpecsHandler handler)346 void getCollectionSpecs(std::vector<std::string> collections, ZoteroCollectionSpecsHandler handler)
347 {
348    // get connection if we have one
349    Connection conn = zoteroConnection();
350    if (!conn.empty())
351    {
352       conn.source.getCollectionSpecs(conn.context, collections, handler);
353    }
354    else
355    {
356       handler(Success(), std::vector<ZoteroCollectionSpec>());
357    }
358 }
359 
getLibraryNames(ZoteroLibrariesHandler handler)360 void getLibraryNames(ZoteroLibrariesHandler handler)
361 {
362    // get connection if we have one
363    Connection conn = zoteroConnection();
364    if (!conn.empty())
365    {
366       conn.source.getLibraryNames(conn.context, handler);
367    }
368    else
369    {
370       handler(Success(), std::vector<std::string>());
371    }
372 }
373 
getCollections(std::vector<std::string> collections,ZoteroCollectionSpecs cacheSpecs,bool useCache,ZoteroCollectionsHandler handler)374 void getCollections(std::vector<std::string> collections,
375                     ZoteroCollectionSpecs cacheSpecs,
376                     bool useCache,
377                     ZoteroCollectionsHandler handler)
378 {
379    // clear out client cache specs if the cache is disabled
380    if (!useCache)
381       cacheSpecs.clear();
382 
383    // get connection if we have o ne
384    Connection conn = zoteroConnection();
385    if (!conn.empty())
386    {
387       // create a set of specs based on what we have in our server cache (as we always want to keep our cache up to date)
388       ZoteroCollectionSpecs serverCacheSpecs;
389       if (useCache)
390       {
391          // request for explicit list of collections, provide specs for matching collections from the server cache
392          if (!collections.empty())
393          {
394             std::transform(collections.begin(), collections.end(), std::back_inserter(serverCacheSpecs), [conn](std::string name) {
395                ZoteroCollectionSpec cacheSpec(name);
396                ZoteroCollectionSpec cached = cachedCollectionSpec(conn.type, conn.cacheContext, name);
397                if (!cached.empty())
398                   cacheSpec.version = cached.version;
399                return cacheSpec;
400             });
401          }
402 
403          // request for all collections, provide specs for all collections in the server cache
404          else
405          {
406             serverCacheSpecs = cachedCollectionsSpecs(conn.type, conn.cacheContext);
407          }
408       }
409 
410       // get collections
411       conn.source.getCollections(conn.context, collections, serverCacheSpecs,
412                                  [conn, collections, cacheSpecs, serverCacheSpecs, handler](Error error, ZoteroCollections webCollections, std::string warning) {
413 
414          // process response -- for any collection returned w/ a version higher than that in the
415          // cache, update the cache. for any collection available (from cache or web) with a version
416          // higher than that of the client request, return the updated items (else return no items)
417          if (!error)
418          {
419             ZoteroCollections responseCollections;
420             for (auto webCollection : webCollections)
421             {
422                // see if the server side cache needs updating
423                ZoteroCollectionSpecs::const_iterator it = std::find_if(serverCacheSpecs.begin(), serverCacheSpecs.end(), [webCollection](ZoteroCollectionSpec cacheSpec) {
424                   // If the version is 0 this is a local instance that isn't incrementing version numbers, do not cache
425                   return cacheSpec.name == webCollection.name && cacheSpec.version == webCollection.version && cacheSpec.version != 0;
426                });
427                // need to update the cache -- do so and then return the just cached copy to the client
428                if (it == serverCacheSpecs.end())
429                {
430                   TRACE("Updating server cache for " + webCollection.name);
431                   updateCachedCollection(conn.type, conn.cacheContext, webCollection.name, webCollection);
432                   TRACE("Returning server cache for " + webCollection.name);
433                   TRACE("Cache contents", webCollection.items.getSize());
434                   responseCollections.push_back(webCollection);
435                }
436 
437                // we have a cache for this collection, check to see if it is recent enough (and in that
438                // case don't return the items to the client)
439                else
440                {
441                   // see we can satisfy the request from our cache
442                   ZoteroCollection cached = responseFromServerCache(conn.type, conn.cacheContext, webCollection.name, cacheSpecs);
443                   if (!cached.empty())
444                   {
445                      responseCollections.push_back(cached);
446                   }
447                   else
448                   {
449                      // shouldn't be possible to get here (as the initial condition tested in the loop ensures
450                      // that we have a cached collection)
451                      TRACE("Unexpected failure to find cache for " + webCollection.name);
452                   }
453                }
454             }
455 
456             handler(Success(), responseCollections, warning);
457 
458          // for host errors try to serve from the cache
459          } else if (isHostError(core::errorDescription(error))) {
460 
461             ZoteroCollections responseCollections;
462             for (auto collection : collections)
463             {
464                ZoteroCollection cached = responseFromServerCache(conn.type, conn.cacheContext, collection, cacheSpecs);
465                if (!cached.empty())
466                   responseCollections.push_back(cached);
467             }
468             handler(Success(),responseCollections, warning);
469 
470          // report error
471          } else {
472             handler(error, std::vector<ZoteroCollection>(), warning);
473          }
474 
475 
476       });
477    }
478    else
479    {
480       handler(Success(), std::vector<ZoteroCollection>(), "");
481    }
482 
483 }
484 
485 } // end namespace collections
486 } // end namespace zotero
487 } // end namespace modules
488 } // end namespace session
489 } // end namespace rstudio
490