1 /**
2  * \file ConverterCache.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Baruch Even
7  * \author Angus Leeming
8  * \author Georg Baum
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12 
13 #include <config.h>
14 
15 #include "ConverterCache.h"
16 
17 #include "Format.h"
18 #include "Lexer.h"
19 #include "LyXRC.h"
20 #include "Mover.h"
21 
22 #include "support/convert.h"
23 #include "support/debug.h"
24 #include "support/filetools.h"
25 #include "support/lyxtime.h"
26 #include "support/Package.h"
27 
28 #include "support/lassert.h"
29 #include <boost/crc.hpp>
30 
31 #include <algorithm>
32 #include <fstream>
33 #include <iomanip>
34 #include <map>
35 #include <sstream>
36 
37 using namespace std;
38 using namespace lyx::support;
39 
40 namespace lyx {
41 
42 namespace {
43 
do_crc(string const & s)44 unsigned long do_crc(string const & s)
45 {
46 	boost::crc_32_type crc;
47 	crc = for_each(s.begin(), s.end(), crc);
48 	return crc.checksum();
49 }
50 
51 
52 // FIXME THREAD
53 // This should be OK because it is only assigned during init()
54 static FileName cache_dir;
55 
56 
57 class CacheItem {
58 public:
CacheItem()59 	CacheItem() : timestamp(0), checksum(0) {}
CacheItem(FileName const & orig_from,string const & to_format,time_t t,unsigned long c)60 	CacheItem(FileName const & orig_from, string const & to_format,
61 		  time_t t, unsigned long c)
62 		: timestamp(t), checksum(c)
63 	{
64 		ostringstream os;
65 		os << setw(10) << setfill('0') << do_crc(orig_from.absFileName())
66 		   << '-' << to_format;
67 		cache_name = FileName(addName(cache_dir.absFileName(), os.str()));
68 		LYXERR(Debug::FILES, "Add file cache item " << orig_from
69 				     << ' ' << to_format << ' ' << cache_name
70 				     << ' ' << long(timestamp) << ' ' << checksum << '.');
71 	}
~CacheItem()72 	~CacheItem()
73 	{}
74 	FileName cache_name;
75 	time_t timestamp;
76 	unsigned long checksum;
77 };
78 
79 } // namespace
80 
81 
82 /** The cache contains one item per orig file and target format, so use a
83  *  nested map to find the cache item quickly by filename and format.
84  */
85 typedef map<string, CacheItem> FormatCacheType;
86 class FormatCache {
87 public:
88 	/// Format of the source file
89 	string from_format;
90 	/// Cache target format -> item to quickly find the item by format
91 	FormatCacheType cache;
92 };
93 typedef map<FileName, FormatCache> CacheType;
94 
95 
96 class ConverterCache::Impl {
97 public:
98 	///
99 	void readIndex();
100 	///
101 	void writeIndex();
102 	///
103 	CacheItem * find(FileName const & from, string const & format);
104 	CacheType cache;
105 };
106 
107 
readIndex()108 void ConverterCache::Impl::readIndex()
109 {
110 	time_t const now = current_time();
111 	FileName const index(addName(cache_dir.absFileName(), "index"));
112 	ifstream is(index.toFilesystemEncoding().c_str());
113 	Lexer lex;
114 	lex.setStream(is);
115 	while (lex.isOK()) {
116 		if (!lex.next(true))
117 			break;
118 		string const orig_from = lex.getString();
119 		if (!lex.next())
120 			break;
121 		string const to_format = lex.getString();
122 		if (!lex.next())
123 			break;
124 		time_t const timestamp =
125 			convert<unsigned long>(lex.getString());
126 		if (!lex.next())
127 			break;
128 		unsigned long const checksum =
129 			convert<unsigned long>(lex.getString());
130 		FileName const orig_from_name(orig_from);
131 		CacheItem item(orig_from_name, to_format, timestamp, checksum);
132 
133 		// Don't cache files that do not exist anymore
134 		if (!orig_from_name.exists()) {
135 			LYXERR(Debug::FILES, "Not caching file `"
136 				<< orig_from << "' (does not exist anymore).");
137 			item.cache_name.removeFile();
138 			continue;
139 		}
140 
141 		// Don't add items that are not in the cache anymore
142 		// This can happen if two instances of LyX are running
143 		// at the same time and update the index file independantly.
144 		if (!item.cache_name.exists()) {
145 			LYXERR(Debug::FILES, "Not caching file `" << orig_from
146 				<< "' (cached copy does not exist anymore).");
147 			continue;
148 		}
149 
150 		// Delete the cached file if it is too old
151 		if (difftime(now, item.cache_name.lastModified())
152 				> lyxrc.converter_cache_maxage) {
153 			LYXERR(Debug::FILES, "Not caching file `"
154 				<< orig_from << "' (too old).");
155 			item.cache_name.removeFile();
156 			continue;
157 		}
158 
159 		FormatCache & format_cache = cache[orig_from_name];
160 		if (format_cache.from_format.empty())
161 			format_cache.from_format =
162 				// FIXME perf: This very expensive function is called on all
163 				// cached files on opening. This slows LyX startup a lot. It
164 				// would be better if this information was retrieved in a
165 				// delayed fashion.
166 				theFormats().getFormatFromFile(orig_from_name);
167 		format_cache.cache[to_format] = item;
168 	}
169 	is.close();
170 }
171 
172 
writeIndex()173 void ConverterCache::Impl::writeIndex()
174 {
175 	FileName const index(addName(cache_dir.absFileName(), "index"));
176 	ofstream os(index.toFilesystemEncoding().c_str());
177 	os.close();
178 	if (!index.changePermission(0600))
179 		return;
180 	os.open(index.toFilesystemEncoding().c_str());
181 	CacheType::iterator it1 = cache.begin();
182 	CacheType::iterator const end1 = cache.end();
183 	for (; it1 != end1; ++it1) {
184 		FormatCacheType const & format_cache = it1->second.cache;
185 		FormatCacheType::const_iterator it2 = format_cache.begin();
186 		FormatCacheType::const_iterator const end2 = format_cache.end();
187 		for (; it2 != end2; ++it2)
188 			os << Lexer::quoteString(it1->first.absFileName())
189 			   << ' ' << it2->first << ' '
190 			   << it2->second.timestamp << ' '
191 			   << it2->second.checksum << '\n';
192 	}
193 	os.close();
194 }
195 
196 
find(FileName const & from,string const & format)197 CacheItem * ConverterCache::Impl::find(FileName const & from,
198 		string const & format)
199 {
200 	if (!lyxrc.use_converter_cache)
201 		return 0;
202 	CacheType::iterator const it1 = cache.find(from);
203 	if (it1 == cache.end())
204 		return 0;
205 	FormatCacheType & format_cache = it1->second.cache;
206 	FormatCacheType::iterator const it2 = format_cache.find(format);
207 	if (it2 == format_cache.end())
208 		return 0;
209 	return &(it2->second);
210 }
211 
212 
213 /////////////////////////////////////////////////////////////////////
214 //
215 // ConverterCache
216 //
217 /////////////////////////////////////////////////////////////////////
218 
ConverterCache()219 ConverterCache::ConverterCache()
220 	: pimpl_(new Impl)
221 {}
222 
223 
~ConverterCache()224 ConverterCache::~ConverterCache()
225 {
226 	delete pimpl_;
227 }
228 
229 
get()230 ConverterCache & ConverterCache::get()
231 {
232 	// Now return the cache
233 	static ConverterCache singleton;
234 	return singleton;
235 }
236 
237 
init()238 void ConverterCache::init()
239 {
240 	if (!lyxrc.use_converter_cache)
241 		return;
242 	// We do this here and not in the constructor because package() gets
243 	// initialized after all static variables.
244 	cache_dir = FileName(addName(package().user_support().absFileName(), "cache"));
245 	if (!cache_dir.exists())
246 		if (!cache_dir.createDirectory(0700)) {
247 			lyxerr << "Could not create cache directory `"
248 			       << cache_dir << "'." << endl;
249 			exit(EXIT_FAILURE);
250 		}
251 	get().pimpl_->readIndex();
252 }
253 
254 
writeIndex() const255 void ConverterCache::writeIndex() const
256 {
257 	if (!lyxrc.use_converter_cache
258 		  || cache_dir.empty())
259 		return;
260 	pimpl_->writeIndex();
261 }
262 
263 
add(FileName const & orig_from,string const & to_format,FileName const & converted_file) const264 void ConverterCache::add(FileName const & orig_from, string const & to_format,
265 		FileName const & converted_file) const
266 {
267 	if (!lyxrc.use_converter_cache || orig_from.empty() ||
268 	    converted_file.empty())
269 		return;
270 	LYXERR(Debug::FILES, ' ' << orig_from
271 			     << ' ' << to_format << ' ' << converted_file);
272 
273 	// FIXME: Should not hardcode this (see bug 3819 for details)
274 	if (to_format == "pstex") {
275 		FileName const converted_eps(changeExtension(converted_file.absFileName(), "eps"));
276 		add(orig_from, "eps", converted_eps);
277 	} else if (to_format == "pdftex") {
278 		FileName const converted_pdf(changeExtension(converted_file.absFileName(), "pdf"));
279 		add(orig_from, "pdf6", converted_pdf);
280 	}
281 
282 	// Is the file in the cache already?
283 	CacheItem * item = pimpl_->find(orig_from, to_format);
284 
285 	time_t const timestamp = orig_from.lastModified();
286 	Mover const & mover = getMover(to_format);
287 	if (item) {
288 		LYXERR(Debug::FILES, "ConverterCache::add(" << orig_from << "):\n"
289 					"The file is already in the cache.");
290 		// First test for timestamp
291 		if (timestamp == item->timestamp) {
292 			LYXERR(Debug::FILES, "Same timestamp.");
293 			return;
294 		}
295 		// Maybe the contents is still the same?
296 		item->timestamp = timestamp;
297 		unsigned long const checksum = orig_from.checksum();
298 		if (checksum == item->checksum) {
299 			LYXERR(Debug::FILES, "Same checksum.");
300 			return;
301 		}
302 		item->checksum = checksum;
303 		if (!mover.copy(converted_file, item->cache_name,
304 		              onlyFileName(item->cache_name.absFileName()))) {
305 			LYXERR(Debug::FILES, "Could not copy file " << orig_from << " to "
306 				<< item->cache_name);
307 		} else if (!item->cache_name.changePermission(0600)) {
308 			LYXERR(Debug::FILES, "Could not change file mode"
309 				<< item->cache_name);
310 		}
311 	} else {
312 		CacheItem new_item(orig_from, to_format, timestamp,
313 				orig_from.checksum());
314 		if (mover.copy(converted_file, new_item.cache_name,
315 		              onlyFileName(new_item.cache_name.absFileName()))) {
316 			if (!new_item.cache_name.changePermission(0600)) {
317 				LYXERR(Debug::FILES, "Could not change file mode"
318 					<< new_item.cache_name);
319 			}
320 			FormatCache & format_cache = pimpl_->cache[orig_from];
321 			if (format_cache.from_format.empty())
322 				format_cache.from_format =
323 					theFormats().getFormatFromFile(orig_from);
324 			format_cache.cache[to_format] = new_item;
325 		} else
326 			LYXERR(Debug::FILES, "ConverterCache::add(" << orig_from << "):\n"
327 						"Could not copy file.");
328 	}
329 }
330 
331 
remove(FileName const & orig_from,string const & to_format) const332 void ConverterCache::remove(FileName const & orig_from,
333 		string const & to_format) const
334 {
335 	if (!lyxrc.use_converter_cache || orig_from.empty())
336 		return;
337 	LYXERR(Debug::FILES, orig_from << ' ' << to_format);
338 
339 	CacheType::iterator const it1 = pimpl_->cache.find(orig_from);
340 	if (it1 == pimpl_->cache.end())
341 		return;
342 	FormatCacheType & format_cache = it1->second.cache;
343 	FormatCacheType::iterator const it2 = format_cache.find(to_format);
344 	if (it2 == format_cache.end())
345 		return;
346 
347 	format_cache.erase(it2);
348 	if (format_cache.empty())
349 		pimpl_->cache.erase(it1);
350 }
351 
352 
remove_all(string const & from_format,string const & to_format) const353 void ConverterCache::remove_all(string const & from_format,
354 		string const & to_format) const
355 {
356 	if (!lyxrc.use_converter_cache)
357 		return;
358 	CacheType::iterator it1 = pimpl_->cache.begin();
359 	while (it1 != pimpl_->cache.end()) {
360 		if (it1->second.from_format != from_format) {
361 			++it1;
362 			continue;
363 		}
364 		FormatCacheType & format_cache = it1->second.cache;
365 		FormatCacheType::iterator it2 = format_cache.begin();
366 		while (it2 != format_cache.end()) {
367 			if (it2->first == to_format) {
368 				LYXERR(Debug::FILES, "Removing file cache item "
369 					<< it1->first << ' ' << to_format);
370 				it2->second.cache_name.removeFile();
371 				format_cache.erase(it2);
372 				// Have to start over again since items in a
373 				// map are not ordered
374 				it2 = format_cache.begin();
375 			} else {
376 				++it2;
377 			}
378 		}
379 		if (format_cache.empty()) {
380 			pimpl_->cache.erase(it1);
381 			// Have to start over again since items in a map are
382 			// not ordered
383 			it1 = pimpl_->cache.begin();
384 		} else {
385 			++it1;
386 		}
387 	}
388 	pimpl_->writeIndex();
389 }
390 
391 
inCache(FileName const & orig_from,string const & to_format) const392 bool ConverterCache::inCache(FileName const & orig_from,
393 		string const & to_format) const
394 {
395 	if (!lyxrc.use_converter_cache || orig_from.empty())
396 		return false;
397 	LYXERR(Debug::FILES, orig_from << ' ' << to_format);
398 
399 	CacheItem * const item = pimpl_->find(orig_from, to_format);
400 	if (!item) {
401 		LYXERR(Debug::FILES, "not in cache.");
402 		return false;
403 	}
404 
405 	// Special handling of pstex and pdftex formats: These are only
406 	// considered to be in the cache if the corresponding graphics
407 	// fiels are there as well. Otherwise copy() of the graphics below
408 	// would fail.
409 	// FIXME: Should not hardcode this (see bug 3819 for details)
410 	if (to_format == "pstex") {
411 		if (!inCache(orig_from, "eps"))
412 			return false;
413 	} else if (to_format == "pdftex") {
414 		if (!inCache(orig_from, "pdf6"))
415 			return false;
416 	}
417 
418 	time_t const timestamp = orig_from.lastModified();
419 	if (item->timestamp == timestamp) {
420 		LYXERR(Debug::FILES, "identical timestamp.");
421 		return true;
422 	}
423 	if (item->checksum == orig_from.checksum()) {
424 		item->timestamp = timestamp;
425 		LYXERR(Debug::FILES, "identical checksum.");
426 		return true;
427 	}
428 	LYXERR(Debug::FILES, "in cache, but too old.");
429 	return false;
430 }
431 
432 
cacheName(FileName const & orig_from,string const & to_format) const433 FileName const & ConverterCache::cacheName(FileName const & orig_from,
434 		string const & to_format) const
435 {
436 	LYXERR(Debug::FILES, orig_from << ' ' << to_format);
437 
438 	CacheItem * const item = pimpl_->find(orig_from, to_format);
439 	LASSERT(item, { static const FileName fn; return fn; });
440 	return item->cache_name;
441 }
442 
443 
copy(FileName const & orig_from,string const & to_format,FileName const & dest) const444 bool ConverterCache::copy(FileName const & orig_from, string const & to_format,
445 		FileName const & dest) const
446 {
447 	if (!lyxrc.use_converter_cache || orig_from.empty() || dest.empty())
448 		return false;
449 	LYXERR(Debug::FILES, orig_from << ' ' << to_format << ' ' << dest);
450 
451 	// FIXME: Should not hardcode this (see bug 3819 for details)
452 	if (to_format == "pstex") {
453 		FileName const dest_eps(changeExtension(dest.absFileName(), "eps"));
454 		if (!copy(orig_from, "eps", dest_eps))
455 			return false;
456 	} else if (to_format == "pdftex") {
457 		FileName const dest_pdf(changeExtension(dest.absFileName(), "pdf"));
458 		if (!copy(orig_from, "pdf6", dest_pdf))
459 			return false;
460 	}
461 
462 	CacheItem * const item = pimpl_->find(orig_from, to_format);
463 	LASSERT(item, return false);
464 	Mover const & mover = getMover(to_format);
465 	return mover.copy(item->cache_name, dest,
466 	                  onlyFileName(dest.absFileName()));
467 }
468 
469 } // namespace lyx
470