1 // Copyright (C) 2010 Stephen Leake <stephen_leake@stephe-leake.org>
2 // Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
3 //
4 // This program is made available under the GNU GPL version 2.0 or
5 // greater. See the accompanying file COPYING for details.
6 //
7 // This program is distributed WITHOUT ANY WARRANTY; without even the
8 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9 // PURPOSE.
10 
11 #include "base.hh"
12 #include <algorithm>
13 #include <deque>
14 #include <fstream>
15 #include <iterator>
16 #include <list>
17 #include <numeric>
18 #include <set>
19 #include <sstream>
20 #include "vector.hh"
21 
22 #include <string.h>
23 #include <boost/bind.hpp>
24 #include <boost/shared_ptr.hpp>
25 #include <boost/tuple/tuple.hpp>
26 #include <boost/tuple/tuple_comparison.hpp>
27 
28 #include <botan/botan.h>
29 #include <botan/rsa.h>
30 #include <botan/pem.h>
31 #include <botan/look_pk.h>
32 #include "lazy_rng.hh"
33 
34 #include <sqlite3.h>
35 
36 #include "lexical_cast.hh"
37 
38 #include "app_state.hh"
39 #include "cert.hh"
40 #include "project.hh"
41 #include "cleanup.hh"
42 #include "constants.hh"
43 #include "dates.hh"
44 #include "database.hh"
45 #include "hash_map.hh"
46 #include "keys.hh"
47 #include "platform-wrapped.hh"
48 #include "revision.hh"
49 #include "safe_map.hh"
50 #include "sanity.hh"
51 #include "migration.hh"
52 #include "simplestring_xform.hh"
53 #include "transforms.hh"
54 #include "ui.hh" // tickers
55 #include "vocab.hh"
56 #include "vocab_cast.hh"
57 #include "xdelta.hh"
58 #include "epoch.hh"
59 #include "graph.hh"
60 #include "roster.hh"
61 #include "roster_delta.hh"
62 #include "rev_height.hh"
63 #include "vocab_hash.hh"
64 #include "globish.hh"
65 #include "work.hh"
66 #include "lua_hooks.hh"
67 #include "outdated_indicator.hh"
68 #include "lru_writeback_cache.hh"
69 #include "char_classifiers.hh"
70 
71 // defined in schema.c, generated from schema.sql:
72 extern char const schema_constant[];
73 
74 // this file defines a public, typed interface to the database.
75 // the database class encapsulates all knowledge about sqlite,
76 // the schema, and all SQL statements used to access the schema.
77 //
78 // see file schema.sql for the text of the schema.
79 
80 using std::deque;
81 using std::istream;
82 using std::make_pair;
83 using std::map;
84 using std::multimap;
85 using std::ostream;
86 using std::pair;
87 using std::remove_if;
88 using std::set;
89 using std::sort;
90 using std::string;
91 using std::vector;
92 using std::accumulate;
93 
94 using boost::shared_ptr;
95 using boost::dynamic_pointer_cast;
96 using boost::lexical_cast;
97 using boost::get;
98 using boost::tuple;
99 using boost::lexical_cast;
100 
101 #if BOTAN_VERSION_CODE >= BOTAN_VERSION_CODE_FOR(1,9,5)
102 using Botan::PK_Encryptor_EME;
103 #else
104 using Botan::PK_Encryptor;
105 #endif
106 using Botan::PK_Verifier;
107 using Botan::SecureVector;
108 using Botan::X509_PublicKey;
109 using Botan::RSA_PublicKey;
110 
111 int const one_row = 1;
112 int const one_col = 1;
113 int const any_rows = -1;
114 int const any_cols = -1;
115 
116 namespace
117 {
118   struct query_param
119   {
120     enum arg_type { text, blob, int64 };
121     arg_type type;
122     string string_data;
123     u64 int_data;
124   };
125 
126   query_param
text(string const & txt)127   text(string const & txt)
128   {
129     MM(txt);
130     for (string::const_iterator i = txt.begin();
131          i != txt.end(); ++i)
132       {
133         I(*i >= 10 && *i < 127);
134       }
135     query_param q = {
136       query_param::text,
137       txt,
138       0,
139     };
140     return q;
141   }
142 
143   query_param
blob(string const & blb)144   blob(string const & blb)
145   {
146     query_param q = {
147       query_param::blob,
148       blb,
149       0,
150     };
151     return q;
152   }
153 
154   query_param
int64(u64 const & num)155   int64(u64 const & num)
156   {
157     query_param q = {
158       query_param::int64,
159       "",
160       num,
161     };
162     return q;
163   }
164 
165   struct query
166   {
query__anon599c84e20111::query167     explicit query(string const & cmd)
168       : sql_cmd(cmd)
169     {}
170 
query__anon599c84e20111::query171     query()
172     {}
173 
operator %__anon599c84e20111::query174     query & operator %(query_param const & qp)
175     {
176       args.push_back(qp);
177       return *this;
178     }
179 
180     vector<query_param> args;
181     string sql_cmd;
182   };
183 
184   typedef vector< vector<string> > results;
185 
186   struct statement
187   {
statement__anon599c84e20111::statement188     statement() : count(0), stmt(0, sqlite3_finalize) {}
189     int count;
190     cleanup_ptr<sqlite3_stmt*, int> stmt;
191   };
192 
193   struct roster_size_estimator
194   {
operator ()__anon599c84e20111::roster_size_estimator195     unsigned long operator() (cached_roster const & cr)
196     {
197       I(cr.first);
198       I(cr.second);
199       // do estimate using a totally made up multiplier, probably wildly off
200       return (cr.first->all_nodes().size()
201               * constants::db_estimated_roster_node_sz);
202     }
203   };
204 
205   struct datasz
206   {
operator ()__anon599c84e20111::datasz207     unsigned long operator()(data const & t) { return t().size(); }
208   };
209 
210   enum open_mode { normal_mode = 0,
211                    schema_bypass_mode,
212                    format_bypass_mode,
213                    cache_bypass_mode };
214 
215   typedef hashmap::hash_map<revision_id, set<revision_id> > parent_id_map;
216   typedef hashmap::hash_map<revision_id, rev_height> height_map;
217 
218   typedef hashmap::hash_map<key_id,
219                             pair<shared_ptr<Botan::PK_Verifier>,
220                                  shared_ptr<Botan::RSA_PublicKey> >
221                             > verifier_cache;
222 
223 } // anonymous namespace
224 
225 class database_impl
226 {
227   friend class database;
228 
229   // for scoped_ptr's sake
230 public:
231   explicit database_impl(system_path const & f, db_type t,
232                          system_path const & roster_cache_performance_log);
233   ~database_impl();
234 
235 private:
236 
237   //
238   // --== Opening the database and schema checking ==--
239   //
240   system_path filename;
241   db_type type;
242   struct sqlite3 * __sql;
243 
244   void install_functions();
245   struct sqlite3 * sql(enum open_mode mode = normal_mode);
246 
247   void check_filename();
248   void check_db_exists();
249   void check_db_nonexistent();
250   void open();
251   void close();
252   void check_format();
253   void check_caches();
254 
255   bool table_has_data(string const & name);
256 
257   //
258   // --== Basic SQL interface and statement caching ==--
259   //
260   map<string, statement> statement_cache;
261 
262   void fetch(results & res,
263              int const want_cols, int const want_rows,
264              query const & q);
265   void execute(query const & q);
266 
267   bool table_has_entry(id const & key, string const & column,
268                        string const & table);
269 
270   //
271   // --== Generic database metadata gathering ==--
272   //
273   string count(string const & table);
274   string space(string const & table,
275                     string const & concatenated_columns,
276                     u64 & total);
277   unsigned int page_size();
278   unsigned int cache_size();
279 
280   //
281   // --== Transactions ==--
282   //
283   int transaction_level;
284   bool transaction_exclusive;
285   void begin_transaction(bool exclusive);
286   void commit_transaction();
287   void rollback_transaction();
288   friend class conditional_transaction_guard;
289 
290   struct roster_writeback_manager
291   {
292     database_impl & imp;
roster_writeback_managerdatabase_impl::roster_writeback_manager293     roster_writeback_manager(database_impl & imp) : imp(imp) {}
294     void writeout(revision_id const &, cached_roster const &);
295   };
296   LRUWritebackCache<revision_id, cached_roster,
297                     roster_size_estimator, roster_writeback_manager>
298     roster_cache;
299 
300   bool have_delayed_file(file_id const & id);
301   void load_delayed_file(file_id const & id, file_data & dat);
302   void cancel_delayed_file(file_id const & id);
303   void drop_or_cancel_file(file_id const & id);
304   void schedule_delayed_file(file_id const & id, file_data const & dat);
305 
306   map<file_id, file_data> delayed_files;
307   size_t delayed_writes_size;
308 
309   void flush_delayed_writes();
310   void clear_delayed_writes();
311   void write_delayed_file(file_id const & new_id,
312                           file_data const & dat);
313 
314   void write_delayed_roster(revision_id const & new_id,
315                             roster_t const & roster,
316                             marking_map const & marking);
317 
318   //
319   // --== Reading/writing delta-compressed objects ==--
320   //
321 
322   // "do we have any entry for 'ident' that is a base version"
323   bool roster_base_stored(revision_id const & ident);
324   bool roster_base_available(revision_id const & ident);
325 
326   // "do we have any entry for 'ident' that is a delta"
327   bool delta_exists(file_id const & ident,
328                     file_id const & base,
329                     string const & table);
330 
331   bool file_or_manifest_base_exists(id const & ident,
332                                     std::string const & table);
333 
334   void get_file_or_manifest_base_unchecked(id const & new_id,
335                                            data & dat,
336                                            string const & table);
337   void get_file_or_manifest_delta_unchecked(id const & ident,
338                                             id const & base,
339                                             delta & del,
340                                             string const & table);
341   void get_roster_base(revision_id const & ident,
342                        roster_t & roster, marking_map & marking);
343   void get_roster_delta(id const & ident,
344                         id const & base,
345                         roster_delta & del);
346 
347   friend struct file_and_manifest_reconstruction_graph;
348   friend struct roster_reconstruction_graph;
349 
350   LRUWritebackCache<id, data, datasz> vcache;
351 
352   void get_version(id const & ident,
353                    data & dat,
354                    string const & data_table,
355                    string const & delta_table);
356 
357   void drop(id const & base,
358             string const & table);
359 
360   void put_file_delta(file_id const & ident,
361                       file_id const & base,
362                       file_delta const & del);
363 
364   void put_file_size(file_id const & ident,
365                      file_data const & data);
366 
367   void put_roster_delta(revision_id const & ident,
368                         revision_id const & base,
369                         roster_delta const & del);
370 
371   //
372   // --== The ancestry graph ==--
373   //
374   void get_ids(string const & table, set<id> & ids);
375 
376   //
377   // --== Rosters ==--
378   //
379   struct extractor;
380   struct file_content_extractor;
381   struct markings_extractor;
382   void extract_from_deltas(revision_id const & id, extractor & x);
383 
384   height_map height_cache;
385   parent_id_map parent_cache;
386 
387   //
388   // --== Keys ==--
389   //
390   void get_keys(string const & table, vector<key_name> & keys);
391 
392   // cache of verifiers for public keys
393   verifier_cache verifiers;
394 
395   //
396   // --== Certs ==--
397   //
398   // note: this section is ridiculous. please do something about it.
399   bool cert_exists(cert const & t,
400                    string const & table);
401   void put_cert(cert const & t, string const & table);
402   void results_to_certs(results const & res,
403                         vector<cert> & certs);
404   void results_to_certs(results const & res,
405                         vector<pair<id, cert> > & certs);
406   void oldstyle_results_to_certs(results const & res,
407                                  vector<cert> & certs);
408 
409   void get_certs(vector<cert> & certs,
410                  string const & table);
411 
412   void get_oldstyle_certs(id const & ident,
413                           vector<cert> & certs,
414                           string const & table);
415 
416   void get_certs(id const & ident,
417                  vector<cert> & certs,
418                  string const & table);
419 
420   void get_certs(cert_name const & name,
421                  vector<cert> & certs,
422                  string const & table);
423 
424   void get_oldstyle_certs(cert_name const & name,
425                           vector<cert> & certs,
426                           string const & table);
427 
428   void get_certs(id const & ident,
429                  cert_name const & name,
430                  vector<cert> & certs,
431                  string const & table);
432 
433   void get_certs(id const & ident,
434                  cert_name const & name,
435                  cert_value const & val,
436                  vector<cert> & certs,
437                  string const & table);
438 
439   void get_certs(cert_name const & name,
440                  cert_value const & val,
441                  vector<pair<id, cert> > & certs,
442                  string const & table);
443 
444   outdated_indicator_factory cert_stamper;
445 
446   void add_prefix_matching_constraint(string const & colname,
447                                       string const & prefix,
448                                       query & q);
449 };
450 
451 #ifdef SUPPORT_SQLITE_BEFORE_3003014
452 // SQLite versions up to and including 3.3.12 didn't have the hex() function
453 void
sqlite3_hex_fn(sqlite3_context * f,int nargs,sqlite3_value ** args)454 sqlite3_hex_fn(sqlite3_context *f, int nargs, sqlite3_value **args)
455 {
456   if (nargs != 1)
457     {
458       sqlite3_result_error(f, "need exactly 1 arg to hex()", -1);
459       return;
460     }
461   string decoded;
462 
463   // This operation may throw (un)recoverable_failure.  We must intercept that
464   // and turn it into a call to sqlite3_result_error, or rollback will fail.
465   try
466     {
467       decoded = encode_hexenc(reinterpret_cast<char const *>(
468         sqlite3_value_text(args[0])), origin::database);
469     }
470   catch (recoverable_failure & e)
471     {
472       sqlite3_result_error(f, e.what(), -1);
473       return;
474     }
475   catch (unrecoverable_failure & e)
476     {
477       sqlite3_result_error(f, e.what(), -1);
478       return;
479     }
480 
481   sqlite3_result_blob(f, decoded.data(), decoded.size(), SQLITE_TRANSIENT);
482 }
483 #endif
484 
database_impl(system_path const & f,db_type t,system_path const & roster_cache_performance_log)485 database_impl::database_impl(system_path const & f, db_type t,
486                              system_path const & roster_cache_performance_log) :
487   filename(f),
488   type(t),
489   __sql(NULL),
490   transaction_level(0),
491   roster_cache(constants::db_roster_cache_sz,
492                constants::db_roster_cache_min_count,
493                roster_writeback_manager(*this),
494                roster_cache_performance_log.as_external()),
495   delayed_writes_size(0),
496   vcache(constants::db_version_cache_sz, 1)
497 {}
498 
~database_impl()499 database_impl::~database_impl()
500 {
501   L(FL("statement cache statistics"));
502   L(FL("prepared %d statements") % statement_cache.size());
503 
504   for (map<string, statement>::const_iterator i = statement_cache.begin();
505        i != statement_cache.end(); ++i)
506     L(FL("%d executions of %s") % i->second.count % i->first);
507   // trigger destructors to finalize cached statements
508   statement_cache.clear();
509 
510   if (__sql)
511     close();
512 }
513 
514 database_cache database::dbcache;
515 
database(app_state & app,database::dboptions d)516 database::database(app_state & app, database::dboptions d)
517   : opts(app.opts), lua(app.lua), dbopts(d)
518 {
519   init();
520 }
521 
database(options const & o,lua_hooks & l,database::dboptions d)522 database::database(options const & o, lua_hooks & l, database::dboptions d)
523   : opts(o), lua(l), dbopts(d)
524 {
525   init();
526 }
527 
528 void
init()529 database::init()
530 {
531   database_path_helper helper(lua);
532   system_path dbpath;
533   helper.get_database_path(opts, dbpath, dbopts);
534 
535   // FIXME: for all :memory: databases an empty path is returned above, thus
536   // all requests for a :memory: database point to the same database
537   // implementation. This means we cannot use two different memory databases
538   // within the same monotone process
539   if (dbcache.find(dbpath) == dbcache.end())
540     {
541       L(FL("creating new database_impl instance for %s") % dbpath);
542       dbcache.insert(make_pair(dbpath, boost::shared_ptr<database_impl>(
543         new database_impl(dbpath, opts.dbname_type, opts.roster_cache_performance_log)
544       )));
545     }
546 
547   imp = dbcache[dbpath];
548 }
549 
~database()550 database::~database()
551 {}
552 
553 void
reset_cache()554 database::reset_cache()
555 {
556   dbcache.clear();
557 }
558 
559 system_path
get_filename()560 database::get_filename()
561 {
562   return imp->filename;
563 }
564 
565 bool
is_dbfile(any_path const & file)566 database::is_dbfile(any_path const & file)
567 {
568   if (imp->type == memory_db)
569     return false;
570   system_path fn(file); // canonicalize
571   bool same = (imp->filename == fn);
572   if (same)
573     L(FL("'%s' is the database file") % file);
574   return same;
575 }
576 
577 bool
database_specified()578 database::database_specified()
579 {
580   return imp->type == memory_db || !imp->filename.empty();
581 }
582 
583 void
create_if_not_exists()584 database::create_if_not_exists()
585 {
586   imp->check_filename();
587   if (!file_exists(imp->filename))
588     {
589       P(F("initializing new database '%s'") % imp->filename);
590       initialize();
591     }
592 }
593 
594 void
check_is_not_rosterified()595 database::check_is_not_rosterified()
596 {
597   E(!imp->table_has_data("rosters"), origin::user,
598     F("this database already contains rosters"));
599 }
600 
601 bool
table_has_data(string const & name)602 database_impl::table_has_data(string const & name)
603 {
604   results res;
605   fetch(res, one_col, any_rows, query("SELECT 1 FROM " + name + " LIMIT 1"));
606   return !res.empty();
607 }
608 
609 void
check_format()610 database_impl::check_format()
611 {
612   if (table_has_data("manifests"))
613     {
614       // The rosters and heights tables should be empty.
615       I(!table_has_data("rosters") && !table_has_data("heights"));
616 
617       // they need to either changesetify or rosterify.  which?
618       if (table_has_data("revisions"))
619         E(false, origin::no_fault,
620           F("database '%s' contains old-style revisions.\n"
621             "If you are a project leader or doing local testing:\n"
622             "  see the file UPGRADE for instructions on upgrading.\n"
623             "If you are not a project leader:\n"
624             "  wait for a leader to migrate project data, and then\n"
625             "  pull into a fresh database.\n"
626             "Sorry about the inconvenience.")
627           % filename);
628       else
629         E(false, origin::no_fault,
630           F("database '%s' contains manifests but no revisions.\n"
631             "This is a very old database; it needs to be upgraded.\n"
632             "Please see 'http://wiki.monotone.ca/upgradefromchangesets/'\n"
633             "for details")
634           % filename);
635     }
636 }
637 
638 void
check_caches()639 database_impl::check_caches()
640 {
641   bool caches_are_filled = true;
642   if (table_has_data("revisions"))
643     {
644       caches_are_filled = table_has_data("rosters") &&
645                           table_has_data("heights");
646     }
647   if (table_has_data("files"))
648     {
649       caches_are_filled = caches_are_filled && table_has_data("file_sizes");
650     }
651 
652   E(caches_are_filled, origin::no_fault,
653     F("database '%s' lacks some cached data.\n"
654       "Run '%s db regenerate_caches' to restore use of this database")
655     % filename % prog_name);
656 }
657 
658 static void
sqlite3_gunzip_fn(sqlite3_context * f,int nargs,sqlite3_value ** args)659 sqlite3_gunzip_fn(sqlite3_context *f, int nargs, sqlite3_value ** args)
660 {
661   if (nargs != 1)
662     {
663       sqlite3_result_error(f, "need exactly 1 arg to gunzip()", -1);
664       return;
665     }
666   data unpacked;
667   const char *val = (const char*) sqlite3_value_blob(args[0]);
668   int bytes = sqlite3_value_bytes(args[0]);
669   decode_gzip(gzip<data>(string(val,val+bytes), origin::database), unpacked);
670   sqlite3_result_blob(f, unpacked().c_str(), unpacked().size(), SQLITE_TRANSIENT);
671 }
672 
673 struct sqlite3 *
sql(enum open_mode mode)674 database_impl::sql(enum open_mode mode)
675 {
676   if (! __sql)
677     {
678       if (type == memory_db)
679         {
680           open();
681 
682           sqlite3_exec(__sql, schema_constant, NULL, NULL, NULL);
683           assert_sqlite3_ok(__sql);
684 
685           sqlite3_exec(__sql, (FL("PRAGMA user_version = %u;")
686                                % mtn_creator_code).str().c_str(), NULL, NULL, NULL);
687           assert_sqlite3_ok(__sql);
688         }
689       else
690         {
691           check_filename();
692           check_db_exists();
693           open();
694 
695           if (mode != schema_bypass_mode)
696             {
697               check_sql_schema(__sql, filename);
698 
699               if (mode != format_bypass_mode)
700                 {
701                   check_format();
702 
703                   if (mode != cache_bypass_mode)
704                     check_caches();
705                 }
706             }
707         }
708       install_functions();
709     }
710 
711   return __sql;
712 }
713 
714 void
initialize()715 database::initialize()
716 {
717   imp->check_filename();
718   imp->check_db_nonexistent();
719   imp->open();
720 
721   sqlite3 *sql = imp->__sql;
722 
723   sqlite3_exec(sql, schema_constant, NULL, NULL, NULL);
724   assert_sqlite3_ok(sql);
725 
726   sqlite3_exec(sql, (FL("PRAGMA user_version = %u;")
727                      % mtn_creator_code).str().c_str(), NULL, NULL, NULL);
728   assert_sqlite3_ok(sql);
729 
730   // make sure what we wanted is what we got
731   check_sql_schema(sql, imp->filename);
732 
733   imp->close();
734 }
735 
736 struct
737 dump_request
738 {
dump_requestdump_request739   dump_request() : sql(), out() {};
740   struct sqlite3 *sql;
741   ostream *out;
742 };
743 
744 static void
dump_row(ostream & out,sqlite3_stmt * stmt,string const & table_name)745 dump_row(ostream &out, sqlite3_stmt *stmt, string const& table_name)
746 {
747   out << FL("INSERT INTO %s VALUES(") % table_name;
748   unsigned n = sqlite3_data_count(stmt);
749   for (unsigned i = 0; i < n; ++i)
750     {
751       if (i != 0)
752         out << ',';
753 
754       if (sqlite3_column_type(stmt, i) == SQLITE_BLOB)
755         {
756           out << "X'";
757           const char *val = (const char*) sqlite3_column_blob(stmt, i);
758           int bytes = sqlite3_column_bytes(stmt, i);
759           out << encode_hexenc(string(val,val+bytes), origin::internal);
760           out << '\'';
761         }
762       else
763         {
764           const unsigned char *val = sqlite3_column_text(stmt, i);
765           if (val == NULL)
766             out << "NULL";
767           else
768             {
769               out << '\'';
770               for (const unsigned char *cp = val; *cp; ++cp)
771                 {
772                   if (*cp == '\'')
773                     out << "''";
774                   else
775                     out << *cp;
776                 }
777               out << '\'';
778             }
779         }
780     }
781   out << ");\n";
782 }
783 
784 static int
dump_table_cb(void * data,int n,char ** vals,char ** cols)785 dump_table_cb(void *data, int n, char **vals, char **cols)
786 {
787   dump_request *dump = reinterpret_cast<dump_request *>(data);
788   I(dump != NULL);
789   I(dump->sql != NULL);
790   I(vals != NULL);
791   I(vals[0] != NULL);
792   I(vals[1] != NULL);
793   I(vals[2] != NULL);
794   I(n == 3);
795   I(string(vals[1]) == "table");
796   *(dump->out) << vals[2] << ";\n";
797   string table_name(vals[0]);
798   string query = "SELECT * FROM " + table_name;
799   sqlite3_stmt *stmt = 0;
800   sqlite3_prepare_v2(dump->sql, query.c_str(), -1, &stmt, NULL);
801   assert_sqlite3_ok(dump->sql);
802 
803   int stepresult = SQLITE_DONE;
804   do
805     {
806       stepresult = sqlite3_step(stmt);
807       I(stepresult == SQLITE_DONE || stepresult == SQLITE_ROW);
808       if (stepresult == SQLITE_ROW)
809         dump_row(*(dump->out), stmt, table_name);
810     }
811   while (stepresult == SQLITE_ROW);
812 
813   sqlite3_finalize(stmt);
814   assert_sqlite3_ok(dump->sql);
815   return 0;
816 }
817 
818 static int
dump_index_cb(void * data,int n,char ** vals,char ** cols)819 dump_index_cb(void *data, int n, char **vals, char **cols)
820 {
821   dump_request *dump = reinterpret_cast<dump_request *>(data);
822   I(dump != NULL);
823   I(dump->sql != NULL);
824   I(vals != NULL);
825   I(vals[0] != NULL);
826   I(vals[1] != NULL);
827   I(vals[2] != NULL);
828   I(n == 3);
829   I(string(vals[1]) == "index");
830   *(dump->out) << vals[2] << ";\n";
831   return 0;
832 }
833 
834 static int
dump_user_version_cb(void * data,int n,char ** vals,char ** cols)835 dump_user_version_cb(void *data, int n, char **vals, char **cols)
836 {
837   dump_request *dump = reinterpret_cast<dump_request *>(data);
838   I(dump != NULL);
839   I(dump->sql != NULL);
840   I(vals != NULL);
841   I(vals[0] != NULL);
842   I(n == 1);
843   *(dump->out) << "PRAGMA user_version = " << vals[0] << ";\n";
844   return 0;
845 }
846 
847 void
dump(ostream & out)848 database::dump(ostream & out)
849 {
850   ensure_open_for_maintenance();
851 
852   {
853     transaction_guard guard(*this);
854     dump_request req;
855     req.out = &out;
856     req.sql = imp->sql();
857     out << "BEGIN EXCLUSIVE;\n";
858     int res;
859     res = sqlite3_exec(req.sql,
860                           "SELECT name, type, sql FROM sqlite_master "
861                           "WHERE type='table' AND sql NOT NULL "
862                           "AND name not like 'sqlite_stat%' "
863                           "ORDER BY name",
864                           dump_table_cb, &req, NULL);
865     assert_sqlite3_ok(req.sql);
866     res = sqlite3_exec(req.sql,
867                           "SELECT name, type, sql FROM sqlite_master "
868                           "WHERE type='index' AND sql NOT NULL "
869                           "ORDER BY name",
870                           dump_index_cb, &req, NULL);
871     assert_sqlite3_ok(req.sql);
872     res = sqlite3_exec(req.sql,
873                        "PRAGMA user_version;",
874                        dump_user_version_cb, &req, NULL);
875     assert_sqlite3_ok(req.sql);
876     out << "COMMIT;\n";
877     guard.commit();
878   }
879 }
880 
881 void
load(istream & in)882 database::load(istream & in)
883 {
884   string line;
885   string sql_stmt;
886 
887   imp->check_filename();
888   imp->check_db_nonexistent();
889   imp->open();
890 
891   sqlite3 * sql = imp->__sql;
892 
893   // the page size can only be set before any other commands have been executed
894   sqlite3_exec(sql, "PRAGMA page_size=8192", NULL, NULL, NULL);
895   assert_sqlite3_ok(sql);
896 
897   while(in)
898     {
899       getline(in, line, ';');
900       sql_stmt += line + ';';
901 
902       if (sqlite3_complete(sql_stmt.c_str()))
903         {
904           sqlite3_exec(sql, sql_stmt.c_str(), NULL, NULL, NULL);
905           assert_sqlite3_ok(sql);
906           sql_stmt.clear();
907         }
908     }
909 
910   assert_sqlite3_ok(sql);
911 }
912 
913 
914 void
debug(string const & sql,ostream & out)915 database::debug(string const & sql, ostream & out)
916 {
917   ensure_open_for_maintenance();
918 
919   results res;
920   imp->fetch(res, any_cols, any_rows, query(sql));
921   out << '\'' << sql << "' -> " << res.size() << " rows\n\n";
922   for (size_t i = 0; i < res.size(); ++i)
923     {
924       for (size_t j = 0; j < res[i].size(); ++j)
925         {
926           if (j != 0)
927             out << " | ";
928           out << res[i][j];
929         }
930       out << '\n';
931     }
932 }
933 
934 // Subroutine of info().  This compares strings that might either be numbers
935 // or error messages surrounded by square brackets.  We want the longest
936 // number, even if there's an error message that's longer than that.
longest_number(string a,string b)937 static bool longest_number(string a, string b)
938 {
939   if(a.length() > 0 && a[0] == '[')
940     return true;  // b is longer
941   if(b.length() > 0 && b[0] == '[')
942     return false; // a is longer
943 
944   return a.length() < b.length();
945 }
946 
947 // Subroutine of info() and some things it calls.
948 // Given an informative_failure which is believed to represent an SQLite
949 // error, either return a string version of the error message (if it was an
950 // SQLite error) or rethrow the execption (if it wasn't).
951 static string
format_sqlite_error_for_info(recoverable_failure const & e)952 format_sqlite_error_for_info(recoverable_failure const & e)
953 {
954   string err(e.what());
955   string prefix = _("error: ");
956   prefix.append(_("sqlite error: "));
957   if (err.find(prefix) != 0)
958     throw;
959 
960   err.replace(0, prefix.length(), "[");
961   string::size_type nl = err.find('\n');
962   if (nl != string::npos)
963     err.erase(nl);
964 
965   err.append("]");
966   return err;
967 }
968 
969 // Subroutine of info().  Pretty-print the database's "creator code", which
970 // is a 32-bit unsigned number that we interpret as a four-character ASCII
971 // string, provided that all four characters are graphic.  (On disk, it's
972 // stored in the "user version" field of the database.)
973 static string
format_creator_code(u32 code)974 format_creator_code(u32 code)
975 {
976   char buf[5];
977   string result;
978 
979   if (code == 0)
980     return _("not set");
981 
982   buf[4] = '\0';
983   buf[3] = ((code & 0x000000ff) >>  0);
984   buf[2] = ((code & 0x0000ff00) >>  8);
985   buf[1] = ((code & 0x00ff0000) >> 16);
986   buf[0] = ((code & 0xff000000) >> 24);
987 
988   if (isgraph(buf[0]) && isgraph(buf[1]) && isgraph(buf[2]) && isgraph(buf[3]))
989     result = (FL("%s (0x%08x)") % buf % code).str();
990   else
991     result = (FL("0x%08x") % code).str();
992   if (code != mtn_creator_code)
993     result += _(" (not a monotone database)");
994   return result;
995 }
996 
997 
998 void
info(ostream & out,bool analyze)999 database::info(ostream & out, bool analyze)
1000 {
1001   // don't check the schema
1002   ensure_open_for_maintenance();
1003 
1004   // do a dummy query to confirm that the database file is an sqlite3
1005   // database.  (this doesn't happen on open() because sqlite postpones the
1006   // actual file open until the first access.  we can't piggyback it on the
1007   // query of the user version because there's a bug in sqlite 3.3.10:
1008   // the routine that reads meta-values from the database header does not
1009   // check the file format.  reported as sqlite bug #2182.)
1010   sqlite3_exec(imp->__sql, "SELECT 1 FROM sqlite_master LIMIT 0", 0, 0, 0);
1011   assert_sqlite3_ok(imp->__sql);
1012 
1013   u32 ccode;
1014   {
1015     results res;
1016     imp->fetch(res, one_col, one_row, query("PRAGMA user_version"));
1017     I(res.size() == 1);
1018     ccode = lexical_cast<u32>(res[0][0]);
1019   }
1020 
1021   vector<string> counts;
1022   counts.push_back(imp->count("rosters"));
1023   counts.push_back(imp->count("roster_deltas"));
1024   counts.push_back(imp->count("files"));
1025   counts.push_back(imp->count("file_deltas"));
1026   counts.push_back(imp->count("file_sizes"));
1027   counts.push_back(imp->count("revisions"));
1028   counts.push_back(imp->count("revision_ancestry"));
1029   counts.push_back(imp->count("revision_certs"));
1030 
1031   {
1032     results res;
1033     try
1034       {
1035         imp->fetch(res, one_col, any_rows,
1036               query("SELECT node FROM next_roster_node_number"));
1037         if (res.empty())
1038           counts.push_back("0");
1039         else
1040           {
1041             I(res.size() == 1);
1042             u64 n = lexical_cast<u64>(res[0][0]) - 1;
1043             counts.push_back((F("%u") % n).str());
1044           }
1045       }
1046     catch (recoverable_failure const & e)
1047       {
1048         counts.push_back(format_sqlite_error_for_info(e));
1049       }
1050   }
1051 
1052   vector<string> bytes;
1053   {
1054     u64 total = 0;
1055     bytes.push_back(imp->space("rosters",
1056                           "length(id) + length(checksum) + length(data)",
1057                           total));
1058     bytes.push_back(imp->space("roster_deltas",
1059                           "length(id) + length(checksum)"
1060                           "+ length(base) + length(delta)", total));
1061     bytes.push_back(imp->space("files", "length(id) + length(data)", total));
1062     bytes.push_back(imp->space("file_deltas",
1063                           "length(id) + length(base) + length(delta)", total));
1064     bytes.push_back(imp->space("file_sizes",
1065                           "length(id) + length(size)", total));
1066     bytes.push_back(imp->space("revisions", "length(id) + length(data)", total));
1067     bytes.push_back(imp->space("revision_ancestry",
1068                           "length(parent) + length(child)", total));
1069     bytes.push_back(imp->space("revision_certs",
1070                           "length(hash) + length(revision_id) + length(name)"
1071                           "+ length(value) + length(keypair_id)"
1072                           "+ length(signature)", total));
1073     bytes.push_back(imp->space("heights", "length(revision) + length(height)",
1074                           total));
1075     bytes.push_back((F("%u") % total).str());
1076   }
1077 
1078   // pad each vector's strings on the left with spaces to make them all the
1079   // same length
1080   {
1081     string::size_type width
1082       = max_element(counts.begin(), counts.end(), longest_number)->length();
1083     for(vector<string>::iterator i = counts.begin(); i != counts.end(); i++)
1084       if (width > i->length() && (*i)[0] != '[')
1085         i->insert(0U, width - i->length(), ' ');
1086 
1087     width = max_element(bytes.begin(), bytes.end(), longest_number)->length();
1088     for(vector<string>::iterator i = bytes.begin(); i != bytes.end(); i++)
1089       if (width > i->length() && (*i)[0] != '[')
1090         i->insert(0U, width - i->length(), ' ');
1091   }
1092 
1093   i18n_format form =
1094     F("creator code      : %s\n"
1095       "schema version    : %s\n"
1096       "counts:\n"
1097       "  full rosters    : %s\n"
1098       "  roster deltas   : %s\n"
1099       "  full files      : %s\n"
1100       "  file deltas     : %s\n"
1101       "  file sizes      : %s\n"
1102       "  revisions       : %s\n"
1103       "  ancestry edges  : %s\n"
1104       "  certs           : %s\n"
1105       "  logical files   : %s\n"
1106       "bytes:\n"
1107       "  full rosters    : %s\n"
1108       "  roster deltas   : %s\n"
1109       "  full files      : %s\n"
1110       "  file deltas     : %s\n"
1111       "  file sizes      : %s\n"
1112       "  revisions       : %s\n"
1113       "  cached ancestry : %s\n"
1114       "  certs           : %s\n"
1115       "  heights         : %s\n"
1116       "  total           : %s\n"
1117       "database:\n"
1118       "  page size       : %s\n"
1119       "  cache size      : %s"
1120       );
1121 
1122   form = form % format_creator_code(ccode);
1123   form = form % describe_sql_schema(imp->__sql);
1124 
1125   for (vector<string>::iterator i = counts.begin(); i != counts.end(); i++)
1126     form = form % *i;
1127 
1128   for (vector<string>::iterator i = bytes.begin(); i != bytes.end(); i++)
1129     form = form % *i;
1130 
1131   form = form % imp->page_size();
1132   form = form % imp->cache_size();
1133 
1134   out << form.str() << '\n'; // final newline is kept out of the translation
1135 
1136   // the following analyzation is only done for --verbose info
1137   if (!analyze)
1138     return;
1139 
1140 
1141   typedef map<revision_id, date_t> rev_date;
1142   rev_date rd;
1143   vector<cert> certs;
1144 
1145   L(FL("fetching revision dates"));
1146   imp->get_certs(date_cert_name, certs, "revision_certs");
1147 
1148   L(FL("analyzing revision dates"));
1149   rev_date::iterator d;
1150   for (vector<cert>::iterator i = certs.begin(); i != certs.end(); ++i)
1151     {
1152       date_t cert_date;
1153       try
1154         {
1155           cert_date = date_t(i->value());
1156         }
1157       catch (recoverable_failure & e)
1158         {
1159           // simply skip dates we cannot parse
1160           W(F("invalid date '%s' for revision %s; skipped")
1161             % i->value() % i->ident);
1162         }
1163 
1164       if (cert_date.valid())
1165         {
1166           if ((d = rd.find(i->ident)) == rd.end())
1167             rd.insert(make_pair(i->ident, cert_date));
1168           else
1169             {
1170               if (d->second > cert_date)
1171                 d->second = cert_date;
1172             }
1173         }
1174     }
1175 
1176   L(FL("fetching ancestry map"));
1177   typedef multimap<revision_id, revision_id>::const_iterator gi;
1178   rev_ancestry_map graph;
1179   get_forward_ancestry(graph);
1180 
1181   L(FL("checking timestamps differences of related revisions"));
1182   int correct = 0,
1183       equal = 0,
1184       incorrect = 0,
1185       root_anc = 0,
1186       missing = 0;
1187 
1188   vector<s64> diffs;
1189 
1190   for (gi i = graph.begin(); i != graph.end(); ++i)
1191     {
1192       revision_id anc_rid = i->first,
1193                   desc_rid = i->second;
1194 
1195       if (null_id(anc_rid))
1196         {
1197           root_anc++;
1198           continue;
1199         }
1200       I(!null_id(desc_rid));
1201 
1202       date_t anc_date,
1203              desc_date;
1204 
1205       map<revision_id, date_t>::iterator j;
1206       if ((j = rd.find(anc_rid)) != rd.end())
1207         anc_date = j->second;
1208 
1209       if ((j = rd.find(desc_rid)) != rd.end())
1210         desc_date = j->second;
1211 
1212       if (anc_date.valid() && desc_date.valid())
1213         {
1214           // we only need seconds precision here
1215           s64 diff = (desc_date - anc_date) / 1000;
1216           diffs.push_back(diff);
1217 
1218           if (anc_date < desc_date)
1219             correct++;
1220           else if (anc_date == desc_date)
1221             equal++;
1222           else
1223             {
1224               L(FL("   rev %s -> rev %s") % anc_rid % desc_rid);
1225               L(FL("   but date %s ! -> %s")
1226                 % anc_date.as_iso_8601_extended()
1227                 % desc_date.as_iso_8601_extended());
1228               L(FL("   (difference: %d seconds)")
1229                 % (anc_date - desc_date));
1230               incorrect++;
1231             }
1232         }
1233       else
1234         missing++;
1235     }
1236 
1237   // no information to provide in this case
1238   if (diffs.size() == 0)
1239     return;
1240 
1241   form =
1242     F("timestamp correctness between revisions:\n"
1243       "  correct dates   : %s edges\n"
1244       "  equal dates     : %s edges\n"
1245       "  incorrect dates : %s edges\n"
1246       "  based on root   : %s edges\n"
1247       "  missing dates   : %s edges\n"
1248       "\n"
1249       "timestamp differences between revisions:\n"
1250       "  mean            : %d sec\n"
1251       "  min             : %d sec\n"
1252       "  max             : %d sec\n"
1253       "\n"
1254       "  1st percentile  : %s sec\n"
1255       "  5th percentile  : %s sec\n"
1256       "  10th percentile : %s sec\n"
1257       "  25th percentile : %s sec\n"
1258       "  50th percentile : %s sec\n"
1259       "  75th percentile : %s sec\n"
1260       "  90th percentile : %s sec\n"
1261       "  95th percentile : %s sec\n"
1262       "  99th percentile : %s sec\n"
1263       );
1264 
1265   form = form % correct % equal % incorrect % root_anc % missing;
1266 
1267   // sort, so that we can get percentile values
1268   sort(diffs.begin(), diffs.end());
1269 
1270   // calculate mean time difference, output that, min and max
1271   s64 mean = accumulate(diffs.begin(), diffs.end(), 0);
1272   mean /= diffs.size();
1273   s64 median = *(diffs.begin() + diffs.size()/2);
1274   form = form % mean % *diffs.begin() % *diffs.rbegin()
1275     % *(diffs.begin() + int(diffs.size() * 0.01))
1276     % *(diffs.begin() + int(diffs.size() * 0.05))
1277     % *(diffs.begin() + int(diffs.size() * 0.10))
1278     % *(diffs.begin() + int(diffs.size() * 0.25))
1279     % *(diffs.begin() + int(diffs.size() * 0.50))
1280     % *(diffs.begin() + int(diffs.size() * 0.75))
1281     % *(diffs.begin() + int(diffs.size() * 0.90))
1282     % *(diffs.begin() + int(diffs.size() * 0.95))
1283     % *(diffs.begin() + int(diffs.size() * 0.99));
1284 
1285   // output the string, with some newlines out of translation
1286   out << '\n' << '\n' << form.str() << '\n';
1287 }
1288 
1289 void
version(ostream & out)1290 database::version(ostream & out)
1291 {
1292   ensure_open_for_maintenance();
1293   out << (F("database schema version: %s")
1294           % describe_sql_schema(imp->__sql)).str()
1295       << '\n';
1296 }
1297 
1298 void
migrate(key_store & keys,migration_status & mstat)1299 database::migrate(key_store & keys, migration_status & mstat)
1300 {
1301   ensure_open_for_maintenance();
1302   mstat = migrate_sql_schema(imp->__sql, keys, get_filename());
1303 }
1304 
1305 void
test_migration_step(key_store & keys,string const & schema)1306 database::test_migration_step(key_store & keys, string const & schema)
1307 {
1308   ensure_open_for_maintenance();
1309   ::test_migration_step(imp->__sql, keys, get_filename(), schema);
1310 }
1311 
1312 void
fix_bad_certs(bool drop_not_fixable)1313 database::fix_bad_certs(bool drop_not_fixable)
1314 {
1315   vector<key_id> all_keys;
1316   get_key_ids(all_keys);
1317 
1318   P(F("loading certs"));
1319   vector<pair<id, cert> > all_certs;
1320   {
1321     results res;
1322     query q("SELECT revision_id, name, value, keypair_id, signature, hash FROM revision_certs");
1323     imp->fetch(res, 6, any_rows, q);
1324     imp->results_to_certs(res, all_certs);
1325   }
1326 
1327   P(F("checking"));
1328 
1329   ticker tick_checked(_("checked"), "c", 25);
1330   ticker tick_bad(_("bad"), "b", 1);
1331   ticker tick_fixed(_("fixed"), "f", 1);
1332   shared_ptr<ticker> tick_dropped;
1333   if (drop_not_fixable)
1334     tick_dropped.reset(new ticker(_("dropped"), "d", 1));
1335   tick_checked.set_total(all_certs.size());
1336 
1337   int num_bad(0), num_fixed(0), num_dropped(0);
1338 
1339   for (vector<pair<id, cert> >::const_iterator cert_iter = all_certs.begin();
1340        cert_iter != all_certs.end(); ++cert_iter)
1341     {
1342       cert const & c(cert_iter->second);
1343       id const & certid(cert_iter->first);
1344       cert_status status = check_cert(c);
1345       ++tick_checked;
1346       if (status == cert_bad)
1347         {
1348           ++tick_bad;
1349           ++num_bad;
1350           bool fixed = false;
1351           string signable;
1352           c.signable_text(signable);
1353           for (vector<key_id>::const_iterator key_iter = all_keys.begin();
1354                key_iter != all_keys.end(); ++key_iter)
1355             {
1356               key_id const & keyid(*key_iter);
1357               if (check_signature(keyid, signable, c.sig) == cert_ok)
1358                 {
1359                   key_name candidate_name;
1360                   rsa_pub_key junk;
1361                   get_pubkey(keyid, candidate_name, junk);
1362                   id chk_id;
1363                   c.hash_code(candidate_name, chk_id);
1364                   if (chk_id == certid)
1365                     {
1366                       imp->execute(query("UPDATE revision_certs SET keypair_id = ? WHERE hash = ?")
1367                                    % blob(keyid.inner()()) % blob(certid()));
1368                       ++tick_fixed;
1369                       ++num_fixed;
1370                       fixed = true;
1371                       break;
1372                     }
1373                 }
1374             }
1375           if (!fixed)
1376             {
1377               if (drop_not_fixable)
1378                 {
1379                   imp->execute(query("DELETE FROM revision_certs WHERE hash = ?")
1380                                % blob(certid()));
1381                   ++(*tick_dropped);
1382                   ++num_dropped;
1383                 }
1384             }
1385         }
1386     }
1387   if (drop_not_fixable)
1388     {
1389       P(F("checked %d certs, found %d bad, fixed %d, dropped %d")
1390         % all_certs.size() % num_bad % num_fixed % num_dropped);
1391     }
1392   else
1393     {
1394       P(F("checked %d certs, found %d bad, fixed %d")
1395         % all_certs.size() % num_bad % num_fixed);
1396     }
1397 }
1398 
1399 void
ensure_open()1400 database::ensure_open()
1401 {
1402   imp->sql();
1403 }
1404 
1405 void
ensure_open_for_format_changes()1406 database::ensure_open_for_format_changes()
1407 {
1408   imp->sql(format_bypass_mode);
1409 }
1410 
1411 void
ensure_open_for_cache_reset()1412 database::ensure_open_for_cache_reset()
1413 {
1414   imp->sql(cache_bypass_mode);
1415 }
1416 
1417 void
ensure_open_for_maintenance()1418 database::ensure_open_for_maintenance()
1419 {
1420   imp->sql(schema_bypass_mode);
1421 }
1422 
1423 void
execute(query const & query)1424 database_impl::execute(query const & query)
1425 {
1426   results res;
1427   fetch(res, 0, 0, query);
1428 }
1429 
1430 void
fetch(results & res,int const want_cols,int const want_rows,query const & query)1431 database_impl::fetch(results & res,
1432                       int const want_cols,
1433                       int const want_rows,
1434                       query const & query)
1435 {
1436   int nrow;
1437   int ncol;
1438   int rescode;
1439 
1440   res.clear();
1441   res.resize(0);
1442 
1443   map<string, statement>::iterator i = statement_cache.find(query.sql_cmd);
1444   if (i == statement_cache.end())
1445     {
1446       statement_cache.insert(make_pair(query.sql_cmd, statement()));
1447       i = statement_cache.find(query.sql_cmd);
1448       I(i != statement_cache.end());
1449 
1450       const char * tail;
1451       sqlite3_prepare_v2(sql(), query.sql_cmd.c_str(), -1, i->second.stmt.paddr(), &tail);
1452       assert_sqlite3_ok(sql());
1453       L(FL("prepared statement %s") % query.sql_cmd);
1454 
1455       // no support for multiple statements here
1456       E(*tail == 0, origin::internal,
1457         F("multiple statements in query: %s") % query.sql_cmd);
1458     }
1459 
1460   ncol = sqlite3_column_count(i->second.stmt());
1461 
1462   E(want_cols == any_cols || want_cols == ncol, origin::database,
1463     F("wanted %d columns got %d in query: %s") % want_cols % ncol % query.sql_cmd);
1464 
1465   // bind parameters for this execution
1466 
1467   int params = sqlite3_bind_parameter_count(i->second.stmt());
1468 
1469   // Ensure that exactly the right number of parameters were given
1470   I(params == int(query.args.size()));
1471 
1472   L(FL("binding %d parameters for %s") % params % query.sql_cmd);
1473 
1474   for (int param = 1; param <= params; param++)
1475     {
1476       // profiling finds this logging to be quite expensive
1477       if (global_sanity.debug_p())
1478         {
1479           string prefix;
1480           string log;
1481 
1482           if (query.args[param-1].type == query_param::blob)
1483             {
1484               prefix = "x";
1485               log = encode_hexenc(query.args[param-1].string_data, origin::internal);
1486             }
1487           else if (query.args[param-1].type == query_param::int64)
1488             {
1489               log = lexical_cast<string>(query.args[param-1].int_data);
1490             }
1491           else
1492             {
1493               log = query.args[param-1].string_data;
1494             }
1495 
1496           if (log.size() > constants::db_log_line_sz)
1497             log = log.substr(0, constants::db_log_line_sz - 2) + "..";
1498 
1499           L(FL("binding %d with value '%s'") % param % log);
1500         }
1501 
1502       switch (idx(query.args, param - 1).type)
1503         {
1504         case query_param::text:
1505           sqlite3_bind_text(i->second.stmt(), param,
1506                             idx(query.args, param - 1).string_data.c_str(), -1,
1507                             SQLITE_STATIC);
1508           break;
1509         case query_param::blob:
1510           {
1511             string const & data = idx(query.args, param - 1).string_data;
1512             sqlite3_bind_blob(i->second.stmt(), param,
1513                               data.data(), data.size(),
1514                               SQLITE_STATIC);
1515           }
1516           break;
1517         case query_param::int64:
1518           {
1519             u64 data = idx(query.args, param - 1).int_data;
1520             sqlite3_bind_int64(i->second.stmt(), param, data);
1521           }
1522           break;
1523         default:
1524           I(false);
1525         }
1526 
1527       assert_sqlite3_ok(sql());
1528     }
1529 
1530   // execute and process results
1531 
1532   nrow = 0;
1533   for (rescode = sqlite3_step(i->second.stmt()); rescode == SQLITE_ROW;
1534        rescode = sqlite3_step(i->second.stmt()))
1535     {
1536       vector<string> row;
1537       for (int col = 0; col < ncol; col++)
1538         {
1539           // We never store NULLs, so we should never see one.
1540           int const datatype = sqlite3_column_type(i->second.stmt(), col);
1541           E(datatype != SQLITE_NULL, origin::database,
1542             F("null result in query: %s") % query.sql_cmd);
1543           const char * value = (const char*)sqlite3_column_blob(i->second.stmt(), col);
1544           int bytes = sqlite3_column_bytes(i->second.stmt(), col);
1545           if (value) {
1546             row.push_back(string(value, value + bytes));
1547           } else {
1548             // sqlite3_column_blob() returns null for zero-length
1549             I(bytes == 0);
1550             row.push_back(string());
1551           }
1552         }
1553       res.push_back(row);
1554     }
1555 
1556   if (rescode != SQLITE_DONE)
1557     assert_sqlite3_ok(sql());
1558 
1559   sqlite3_reset(i->second.stmt());
1560   assert_sqlite3_ok(sql());
1561 
1562   nrow = res.size();
1563 
1564   i->second.count++;
1565 
1566   E(want_rows == any_rows || want_rows == nrow,
1567     origin::database,
1568     F("wanted %d rows got %d in query: %s") % want_rows % nrow % query.sql_cmd);
1569 }
1570 
1571 bool
table_has_entry(id const & key,std::string const & column,std::string const & table)1572 database_impl::table_has_entry(id const & key,
1573                                std::string const & column,
1574                                std::string const & table)
1575 {
1576   results res;
1577   query q("SELECT 1 FROM " + table + " WHERE " + column + " = ? LIMIT 1");
1578   fetch(res, one_col, any_rows, q % blob(key()));
1579   return !res.empty();
1580 }
1581 
1582 // general application-level logic
1583 
1584 void
begin_transaction(bool exclusive)1585 database_impl::begin_transaction(bool exclusive)
1586 {
1587   if (transaction_level == 0)
1588     {
1589       I(delayed_files.empty());
1590       I(roster_cache.all_clean());
1591       if (exclusive)
1592         execute(query("BEGIN EXCLUSIVE"));
1593       else
1594         execute(query("BEGIN DEFERRED"));
1595       transaction_exclusive = exclusive;
1596     }
1597   else
1598     {
1599       // You can't start an exclusive transaction within a non-exclusive
1600       // transaction
1601       I(!exclusive || transaction_exclusive);
1602     }
1603   transaction_level++;
1604 }
1605 
1606 
1607 static size_t
size_delayed_file(file_id const & id,file_data const & dat)1608 size_delayed_file(file_id const & id, file_data const & dat)
1609 {
1610   return id.inner()().size() + dat.inner()().size();
1611 }
1612 
1613 bool
have_delayed_file(file_id const & id)1614 database_impl::have_delayed_file(file_id const & id)
1615 {
1616   return delayed_files.find(id) != delayed_files.end();
1617 }
1618 
1619 void
load_delayed_file(file_id const & id,file_data & dat)1620 database_impl::load_delayed_file(file_id const & id, file_data & dat)
1621 {
1622   dat = safe_get(delayed_files, id);
1623 }
1624 
1625 // precondition: have_delayed_file(an_id) == true
1626 void
cancel_delayed_file(file_id const & an_id)1627 database_impl::cancel_delayed_file(file_id const & an_id)
1628 {
1629   file_data const & dat = safe_get(delayed_files, an_id);
1630   size_t cancel_size = size_delayed_file(an_id, dat);
1631   I(cancel_size <= delayed_writes_size);
1632   delayed_writes_size -= cancel_size;
1633 
1634   safe_erase(delayed_files, an_id);
1635 }
1636 
1637 void
drop_or_cancel_file(file_id const & id)1638 database_impl::drop_or_cancel_file(file_id const & id)
1639 {
1640   if (have_delayed_file(id))
1641     cancel_delayed_file(id);
1642   else
1643     drop(id.inner(), "files");
1644 }
1645 
1646 void
schedule_delayed_file(file_id const & an_id,file_data const & dat)1647 database_impl::schedule_delayed_file(file_id const & an_id,
1648                                      file_data const & dat)
1649 {
1650   if (!have_delayed_file(an_id))
1651     {
1652       safe_insert(delayed_files, make_pair(an_id, dat));
1653       delayed_writes_size += size_delayed_file(an_id, dat);
1654     }
1655   if (delayed_writes_size > constants::db_max_delayed_file_bytes)
1656     flush_delayed_writes();
1657 }
1658 
1659 void
flush_delayed_writes()1660 database_impl::flush_delayed_writes()
1661 {
1662   for (map<file_id, file_data>::const_iterator i = delayed_files.begin();
1663        i != delayed_files.end(); ++i)
1664     write_delayed_file(i->first, i->second);
1665   clear_delayed_writes();
1666 }
1667 
1668 void
clear_delayed_writes()1669 database_impl::clear_delayed_writes()
1670 {
1671   delayed_files.clear();
1672   delayed_writes_size = 0;
1673 }
1674 
1675 void
writeout(revision_id const & id,cached_roster const & cr)1676 database_impl::roster_writeback_manager::writeout(revision_id const & id,
1677                                                   cached_roster const & cr)
1678 {
1679   I(cr.first);
1680   I(cr.second);
1681   imp.write_delayed_roster(id, *(cr.first), *(cr.second));
1682 }
1683 
1684 void
commit_transaction()1685 database_impl::commit_transaction()
1686 {
1687   if (transaction_level == 1)
1688     {
1689       flush_delayed_writes();
1690       roster_cache.clean_all();
1691       execute(query("COMMIT"));
1692     }
1693   transaction_level--;
1694 }
1695 
1696 void
rollback_transaction()1697 database_impl::rollback_transaction()
1698 {
1699   if (transaction_level == 1)
1700     {
1701       clear_delayed_writes();
1702       roster_cache.clear_and_drop_writes();
1703       execute(query("ROLLBACK"));
1704     }
1705   transaction_level--;
1706 }
1707 
1708 
1709 bool
file_or_manifest_base_exists(id const & ident,string const & table)1710 database_impl::file_or_manifest_base_exists(id const & ident,
1711                                             string const & table)
1712 {
1713   // just check for a delayed file, since there are no delayed manifests
1714   if (have_delayed_file(file_id(ident)))
1715     return true;
1716   return table_has_entry(ident, "id", table);
1717 }
1718 
1719 bool
file_or_manifest_base_exists(file_id const & ident,string const & table)1720 database::file_or_manifest_base_exists(file_id const & ident,
1721                                        string const & table)
1722 {
1723   return imp->file_or_manifest_base_exists(ident.inner(), table);
1724 }
1725 
1726 // returns true if we are currently storing (or planning to store) a
1727 // full-text for 'ident'
1728 bool
roster_base_stored(revision_id const & ident)1729 database_impl::roster_base_stored(revision_id const & ident)
1730 {
1731   if (roster_cache.exists(ident) && roster_cache.is_dirty(ident))
1732     return true;
1733   return table_has_entry(ident.inner(), "id", "rosters");
1734 }
1735 
1736 // returns true if we currently have a full-text for 'ident' available
1737 // (possibly cached).  Warning: the results of this method are invalidated
1738 // by calling roster_cache.insert_{clean,dirty}, because they can trigger
1739 // cache cleaning.
1740 bool
roster_base_available(revision_id const & ident)1741 database_impl::roster_base_available(revision_id const & ident)
1742 {
1743   if (roster_cache.exists(ident))
1744     return true;
1745   return table_has_entry(ident.inner(), "id", "rosters");
1746 }
1747 
1748 bool
delta_exists(id const & ident,string const & table)1749 database::delta_exists(id const & ident,
1750                        string const & table)
1751 {
1752   return imp->table_has_entry(ident, "id", table);
1753 }
1754 
1755 bool
delta_exists(file_id const & ident,file_id const & base,string const & table)1756 database_impl::delta_exists(file_id const & ident,
1757                             file_id const & base,
1758                             string const & table)
1759 {
1760   results res;
1761   query q("SELECT 1 FROM " + table + " WHERE id = ? and base = ? LIMIT 1");
1762   fetch(res, one_col, any_rows,
1763         q % blob(ident.inner()()) % blob(base.inner()()));
1764   return !res.empty();
1765 }
1766 
1767 string
count(string const & table)1768 database_impl::count(string const & table)
1769 {
1770   try
1771     {
1772       results res;
1773       query q("SELECT COUNT(*) FROM " + table);
1774       fetch(res, one_col, one_row, q);
1775       return (F("%u") % lexical_cast<u64>(res[0][0])).str();
1776     }
1777   catch (recoverable_failure const & e)
1778     {
1779       return format_sqlite_error_for_info(e);
1780     }
1781 
1782 }
1783 
1784 string
space(string const & table,string const & rowspace,u64 & total)1785 database_impl::space(string const & table, string const & rowspace, u64 & total)
1786 {
1787   try
1788     {
1789       results res;
1790       // SUM({empty set}) is NULL; TOTAL({empty set}) is 0.0
1791       query q("SELECT TOTAL(" + rowspace + ") FROM " + table);
1792       fetch(res, one_col, one_row, q);
1793       u64 bytes = static_cast<u64>(lexical_cast<double>(res[0][0]));
1794       total += bytes;
1795       return (F("%u") % bytes).str();
1796     }
1797   catch (recoverable_failure & e)
1798     {
1799       return format_sqlite_error_for_info(e);
1800     }
1801 }
1802 
1803 unsigned int
page_size()1804 database_impl::page_size()
1805 {
1806   results res;
1807   query q("PRAGMA page_size");
1808   fetch(res, one_col, one_row, q);
1809   return lexical_cast<unsigned int>(res[0][0]);
1810 }
1811 
1812 unsigned int
cache_size()1813 database_impl::cache_size()
1814 {
1815   // This returns the persistent (default) cache size.  It's possible to
1816   // override this setting transiently at runtime by setting PRAGMA
1817   // cache_size.
1818   results res;
1819   query q("PRAGMA default_cache_size");
1820   fetch(res, one_col, one_row, q);
1821   return lexical_cast<unsigned int>(res[0][0]);
1822 }
1823 
1824 void
get_ids(string const & table,set<id> & ids)1825 database_impl::get_ids(string const & table, set<id> & ids)
1826 {
1827   results res;
1828   query q("SELECT id FROM " + table);
1829   fetch(res, one_col, any_rows, q);
1830 
1831   for (size_t i = 0; i < res.size(); ++i)
1832     {
1833       ids.insert(id(res[i][0], origin::database));
1834     }
1835 }
1836 
1837 // for files and legacy manifest support
1838 void
get_file_or_manifest_base_unchecked(id const & ident,data & dat,string const & table)1839 database_impl::get_file_or_manifest_base_unchecked(id const & ident,
1840                                                    data & dat,
1841                                                    string const & table)
1842 {
1843   if (have_delayed_file(file_id(ident)))
1844     {
1845       file_data tmp;
1846       load_delayed_file(file_id(ident), tmp);
1847       dat = tmp.inner();
1848       return;
1849     }
1850 
1851   results res;
1852   query q("SELECT data FROM " + table + " WHERE id = ?");
1853   fetch(res, one_col, one_row, q % blob(ident()));
1854 
1855   gzip<data> rdata(res[0][0], origin::database);
1856   data rdata_unpacked;
1857   decode_gzip(rdata,rdata_unpacked);
1858 
1859   dat = rdata_unpacked;
1860 }
1861 
1862 // for files and legacy manifest support
1863 void
get_file_or_manifest_delta_unchecked(id const & ident,id const & base,delta & del,string const & table)1864 database_impl::get_file_or_manifest_delta_unchecked(id const & ident,
1865                                                     id const & base,
1866                                                     delta & del,
1867                                                     string const & table)
1868 {
1869   I(ident() != "");
1870   I(base() != "");
1871   results res;
1872   query q("SELECT delta FROM " + table + " WHERE id = ? AND base = ?");
1873   fetch(res, one_col, one_row,
1874         q % blob(ident()) % blob(base()));
1875 
1876   gzip<delta> del_packed(res[0][0], origin::database);
1877   decode_gzip(del_packed, del);
1878 }
1879 
1880 void
get_roster_base(revision_id const & ident,roster_t & roster,marking_map & marking)1881 database_impl::get_roster_base(revision_id const & ident,
1882                                roster_t & roster, marking_map & marking)
1883 {
1884   if (roster_cache.exists(ident))
1885     {
1886       cached_roster cr;
1887       roster_cache.fetch(ident, cr);
1888       I(cr.first);
1889       roster = *(cr.first);
1890       I(cr.second);
1891       marking = *(cr.second);
1892       return;
1893     }
1894   results res;
1895   query q("SELECT checksum, data FROM rosters WHERE id = ?");
1896   fetch(res, 2, one_row, q % blob(ident.inner()()));
1897 
1898   id checksum(res[0][0], origin::database);
1899   id calculated;
1900   calculate_ident(data(res[0][1], origin::database), calculated);
1901   E(calculated == checksum, origin::database,
1902     F("roster does not match hash"));
1903 
1904   gzip<data> dat_packed(res[0][1], origin::database);
1905   data dat;
1906   decode_gzip(dat_packed, dat);
1907   read_roster_and_marking(roster_data(dat), roster, marking);
1908 }
1909 
1910 void
get_roster_delta(id const & ident,id const & base,roster<delta> & del)1911 database_impl::get_roster_delta(id const & ident,
1912                                 id const & base,
1913                                 roster<delta> & del)
1914 {
1915   results res;
1916   query q("SELECT checksum, delta FROM roster_deltas WHERE id = ? AND base = ?");
1917   fetch(res, 2, one_row, q % blob(ident()) % blob(base()));
1918 
1919   id checksum(res[0][0], origin::database);
1920   id calculated;
1921   calculate_ident(data(res[0][1], origin::database), calculated);
1922   E(calculated == checksum, origin::database,
1923     F("roster_delta does not match hash"));
1924 
1925   gzip<delta> del_packed(res[0][1], origin::database);
1926   delta tmp;
1927   decode_gzip(del_packed, tmp);
1928   del = roster<delta>(tmp);
1929 }
1930 
1931 void
write_delayed_file(file_id const & ident,file_data const & dat)1932 database_impl::write_delayed_file(file_id const & ident,
1933                                    file_data const & dat)
1934 {
1935   gzip<data> dat_packed;
1936   encode_gzip(dat.inner(), dat_packed);
1937 
1938   // ident is a hash, which we should check
1939   I(!null_id(ident));
1940   file_id tid;
1941   calculate_ident(dat, tid);
1942   MM(ident);
1943   MM(tid);
1944   I(tid == ident);
1945   // and then write things to the db
1946   query q("INSERT INTO files (id, data) VALUES (?, ?)");
1947   execute(q % blob(ident.inner()()) % blob(dat_packed()));
1948 }
1949 
1950 void
write_delayed_roster(revision_id const & ident,roster_t const & roster,marking_map const & marking)1951 database_impl::write_delayed_roster(revision_id const & ident,
1952                                      roster_t const & roster,
1953                                      marking_map const & marking)
1954 {
1955   roster_data dat;
1956   write_roster_and_marking(roster, marking, dat);
1957   gzip<data> dat_packed;
1958   encode_gzip(dat.inner(), dat_packed);
1959 
1960   // ident is a number, and we should calculate a checksum on what
1961   // we write
1962   id checksum;
1963   calculate_ident(typecast_vocab<data>(dat_packed), checksum);
1964 
1965   // and then write it
1966   execute(query("INSERT INTO rosters (id, checksum, data) VALUES (?, ?, ?)")
1967           % blob(ident.inner()())
1968           % blob(checksum())
1969           % blob(dat_packed()));
1970 }
1971 
1972 
1973 void
put_file_delta(file_id const & ident,file_id const & base,file_delta const & del)1974 database::put_file_delta(file_id const & ident,
1975                          file_id const & base,
1976                          file_delta const & del)
1977 {
1978   I(!null_id(ident));
1979   I(!null_id(base));
1980 
1981   gzip<delta> del_packed;
1982   encode_gzip(del.inner(), del_packed);
1983 
1984   imp->execute(query("INSERT INTO file_deltas (id, base, delta) VALUES (?, ?, ?)")
1985                % blob(ident.inner()())
1986                % blob(base.inner()())
1987                % blob(del_packed()));
1988 }
1989 
1990 void
put_file_size(file_id const & ident,file_data const & data)1991 database_impl::put_file_size(file_id const & ident,
1992                              file_data const & data)
1993 {
1994   I(!null_id(ident));
1995   file_size size = data.inner()().size();
1996   // really identical files should be rather rare, so the cost of checking
1997   // whether an entry exists _everytime_ by hand should be higher than just
1998   // replacing any (possibly existing) entry. and since each identical file
1999   // should also have an identical size, we're done here
2000   query q("INSERT OR REPLACE INTO file_sizes(id, size) VALUES (?, ?)");
2001   execute(q % blob(ident.inner()()) % int64(size));
2002 }
2003 
2004 void
put_roster_delta(revision_id const & ident,revision_id const & base,roster_delta const & del)2005 database_impl::put_roster_delta(revision_id const & ident,
2006                                  revision_id const & base,
2007                                  roster_delta const & del)
2008 {
2009   gzip<delta> del_packed;
2010   encode_gzip(del.inner(), del_packed);
2011 
2012   id checksum;
2013   calculate_ident(typecast_vocab<data>(del_packed), checksum);
2014 
2015   query q("INSERT INTO roster_deltas (id, base, checksum, delta) VALUES (?, ?, ?, ?)");
2016   execute(q
2017           % blob(ident.inner()())
2018           % blob(base.inner()())
2019           % blob(checksum())
2020           % blob(del_packed()));
2021 }
2022 
2023 struct file_and_manifest_reconstruction_graph : public reconstruction_graph
2024 {
2025   database_impl & imp;
2026   string const & data_table;
2027   string const & delta_table;
2028 
file_and_manifest_reconstruction_graphfile_and_manifest_reconstruction_graph2029   file_and_manifest_reconstruction_graph(database_impl & imp,
2030                                          string const & data_table,
2031                                          string const & delta_table)
2032     : imp(imp), data_table(data_table), delta_table(delta_table)
2033   {}
is_basefile_and_manifest_reconstruction_graph2034   virtual bool is_base(id const & node) const
2035   {
2036     return imp.vcache.exists(node)
2037       || imp.file_or_manifest_base_exists(node, data_table);
2038   }
get_nextfile_and_manifest_reconstruction_graph2039   virtual void get_next(id const & from, set<id> & next) const
2040   {
2041     next.clear();
2042     results res;
2043     query q("SELECT base FROM " + delta_table + " WHERE id = ?");
2044     imp.fetch(res, one_col, any_rows, q % blob(from()));
2045     for (results::const_iterator i = res.begin(); i != res.end(); ++i)
2046       next.insert(id((*i)[0], origin::database));
2047   }
2048 };
2049 
2050 // used for files and legacy manifest migration
2051 void
get_version(id const & ident,data & dat,string const & data_table,string const & delta_table)2052 database_impl::get_version(id const & ident,
2053                            data & dat,
2054                            string const & data_table,
2055                            string const & delta_table)
2056 {
2057   I(ident() != "");
2058 
2059   reconstruction_path selected_path;
2060   {
2061     file_and_manifest_reconstruction_graph graph(*this, data_table, delta_table);
2062     get_reconstruction_path(ident, graph, selected_path);
2063   }
2064 
2065   I(!selected_path.empty());
2066 
2067   id curr = selected_path.back();
2068   selected_path.pop_back();
2069   data begin;
2070 
2071   if (vcache.exists(curr))
2072     I(vcache.fetch(curr, begin));
2073   else
2074     get_file_or_manifest_base_unchecked(curr, begin, data_table);
2075 
2076   shared_ptr<delta_applicator> appl = new_piecewise_applicator();
2077   appl->begin(begin());
2078 
2079   for (reconstruction_path::reverse_iterator i = selected_path.rbegin();
2080        i != selected_path.rend(); ++i)
2081     {
2082       id const nxt = id(*i);
2083 
2084       if (!vcache.exists(curr))
2085         {
2086           string tmp;
2087           appl->finish(tmp);
2088           vcache.insert_clean(curr, data(tmp, origin::database));
2089         }
2090 
2091       if (global_sanity.debug_p())
2092         L(FL("following delta %s -> %s") % curr % nxt);
2093       delta del;
2094       get_file_or_manifest_delta_unchecked(nxt, curr, del, delta_table);
2095       apply_delta(appl, del());
2096 
2097       appl->next();
2098       curr = nxt;
2099     }
2100 
2101   string tmp;
2102   appl->finish(tmp);
2103   dat = data(tmp, origin::database);
2104 
2105   id final;
2106   calculate_ident(dat, final);
2107   E(final == ident, origin::database,
2108     F("delta-reconstructed '%s' item does not match hash")
2109     % data_table);
2110 
2111   if (!vcache.exists(ident))
2112     vcache.insert_clean(ident, dat);
2113 }
2114 
2115 struct roster_reconstruction_graph : public reconstruction_graph
2116 {
2117   database_impl & imp;
roster_reconstruction_graphroster_reconstruction_graph2118   roster_reconstruction_graph(database_impl & imp) : imp(imp) {}
is_baseroster_reconstruction_graph2119   virtual bool is_base(id const & node) const
2120   {
2121     return imp.roster_base_available(revision_id(node));
2122   }
get_nextroster_reconstruction_graph2123   virtual void get_next(id const & from, set<id> & next) const
2124   {
2125     next.clear();
2126     results res;
2127     query q("SELECT base FROM roster_deltas WHERE id = ?");
2128     imp.fetch(res, one_col, any_rows, q % blob(from()));
2129     for (results::const_iterator i = res.begin(); i != res.end(); ++i)
2130       next.insert(id((*i)[0], origin::database));
2131   }
2132 };
2133 
2134 struct database_impl::extractor
2135 {
2136   virtual bool look_at_delta(roster_delta const & del) = 0;
2137   virtual void look_at_roster(roster_t const & roster, marking_map const & mm) = 0;
~extractordatabase_impl::extractor2138   virtual ~extractor() {};
2139 };
2140 
2141 struct database_impl::markings_extractor : public database_impl::extractor
2142 {
2143 private:
2144   node_id const & nid;
2145   const_marking_t & markings;
2146 
2147 public:
markings_extractordatabase_impl::markings_extractor2148   markings_extractor(node_id const & _nid, const_marking_t & _markings) :
2149     nid(_nid), markings(_markings) {} ;
2150 
look_at_deltadatabase_impl::markings_extractor2151   bool look_at_delta(roster_delta const & del)
2152   {
2153     return try_get_markings_from_roster_delta(del, nid, markings);
2154   }
2155 
look_at_rosterdatabase_impl::markings_extractor2156   void look_at_roster(roster_t const & roster, marking_map const & mm)
2157   {
2158     markings = mm.get_marking(nid);
2159   }
2160 };
2161 
2162 struct database_impl::file_content_extractor : database_impl::extractor
2163 {
2164 private:
2165   node_id const & nid;
2166   file_id & content;
2167 
2168 public:
file_content_extractordatabase_impl::file_content_extractor2169   file_content_extractor(node_id const & _nid, file_id & _content) :
2170     nid(_nid), content(_content) {} ;
2171 
look_at_deltadatabase_impl::file_content_extractor2172   bool look_at_delta(roster_delta const & del)
2173   {
2174     return try_get_content_from_roster_delta(del, nid, content);
2175   }
2176 
look_at_rosterdatabase_impl::file_content_extractor2177   void look_at_roster(roster_t const & roster, marking_map const & mm)
2178   {
2179     if (roster.has_node(nid))
2180       content = downcast_to_file_t(roster.get_node(nid))->content;
2181     else
2182       content = file_id();
2183   }
2184 };
2185 
2186 void
extract_from_deltas(revision_id const & ident,extractor & x)2187 database_impl::extract_from_deltas(revision_id const & ident, extractor & x)
2188 {
2189   reconstruction_path selected_path;
2190   {
2191     roster_reconstruction_graph graph(*this);
2192     {
2193       // we look at the nearest delta(s) first, without constructing the
2194       // whole path, as that would be a rather expensive operation.
2195       //
2196       // the reason why this strategy is worth the effort is, that in most
2197       // cases we are looking at the parent of a (content-)marked node, thus
2198       // the information we are for is right there in the delta leading to
2199       // this node.
2200       //
2201       // recording the deltas visited here in a set as to avoid inspecting
2202       // them later seems to be of little value, as it imposes a cost here,
2203       // but can seldom be exploited.
2204       set<id> deltas;
2205       graph.get_next(ident.inner(), deltas);
2206       for (set<id>::const_iterator i = deltas.begin();
2207            i != deltas.end(); ++i)
2208         {
2209           roster_delta del;
2210           get_roster_delta(ident.inner(), *i, del);
2211           bool found = x.look_at_delta(del);
2212           if (found)
2213             return;
2214         }
2215     }
2216     get_reconstruction_path(ident.inner(), graph, selected_path);
2217   }
2218 
2219   int path_length(selected_path.size());
2220   int i(0);
2221   id target_rev;
2222 
2223   for (reconstruction_path::const_iterator p = selected_path.begin();
2224        p != selected_path.end(); ++p)
2225     {
2226       if (i > 0)
2227         {
2228           roster_delta del;
2229           get_roster_delta(target_rev, id(*p), del);
2230           bool found = x.look_at_delta(del);
2231           if (found)
2232             return;
2233         }
2234       if (i == path_length-1)
2235         {
2236           // last iteration, we have reached a roster base
2237           roster_t roster;
2238           marking_map mm;
2239           get_roster_base(revision_id(*p), roster, mm);
2240           x.look_at_roster(roster, mm);
2241           return;
2242         }
2243       target_rev = id(*p);
2244       ++i;
2245     }
2246 }
2247 
2248 void
get_markings(revision_id const & id,node_id const & nid,const_marking_t & markings)2249 database::get_markings(revision_id const & id,
2250                        node_id const & nid,
2251                        const_marking_t & markings)
2252 {
2253   database_impl::markings_extractor x(nid, markings);
2254   imp->extract_from_deltas(id, x);
2255 }
2256 
2257 void
get_file_content(revision_id const & id,node_id const & nid,file_id & content)2258 database::get_file_content(revision_id const & id,
2259                            node_id const & nid,
2260                            file_id & content)
2261 {
2262   // the imaginary root revision doesn't have any file.
2263   if (null_id(id))
2264     {
2265       content = file_id();
2266       return;
2267     }
2268   database_impl::file_content_extractor x(nid, content);
2269   imp->extract_from_deltas(id, x);
2270 }
2271 
2272 void
get_roster_version(revision_id const & ros_id,cached_roster & cr)2273 database::get_roster_version(revision_id const & ros_id,
2274                              cached_roster & cr)
2275 {
2276   // if we already have it, exit early
2277   if (imp->roster_cache.exists(ros_id))
2278     {
2279       imp->roster_cache.fetch(ros_id, cr);
2280       return;
2281     }
2282 
2283   reconstruction_path selected_path;
2284   {
2285     roster_reconstruction_graph graph(*imp);
2286     get_reconstruction_path(ros_id.inner(), graph, selected_path);
2287   }
2288 
2289   id curr(selected_path.back());
2290   selected_path.pop_back();
2291   // we know that this isn't already in the cache (because of the early exit
2292   // above), so we should create new objects and spend time filling them in.
2293   shared_ptr<roster_t> roster(new roster_t);
2294   shared_ptr<marking_map> marking(new marking_map);
2295   imp->get_roster_base(revision_id(curr), *roster, *marking);
2296 
2297   for (reconstruction_path::reverse_iterator i = selected_path.rbegin();
2298        i != selected_path.rend(); ++i)
2299     {
2300       id const nxt(*i);
2301       if (global_sanity.debug_p())
2302         L(FL("following delta %s -> %s") % curr % nxt);
2303       roster_delta del;
2304       imp->get_roster_delta(nxt, curr, del);
2305       apply_roster_delta(del, *roster, *marking);
2306       curr = nxt;
2307     }
2308 
2309   // Double-check that the thing we got out looks okay.  We know that when
2310   // the roster was written to the database, it passed both of these tests,
2311   // and we also know that the data on disk has passed our checks for data
2312   // corruption -- so in theory, we know that what we got out is exactly
2313   // what we put in, and these checks are redundant.  (They cannot catch all
2314   // possible errors in any case, e.g., they don't test that the marking is
2315   // correct.)  What they can do, though, is serve as a sanity check on the
2316   // delta reconstruction code; if there is a bug where we put something
2317   // into the database and then later get something different back out, then
2318   // this is the only thing that can catch it.
2319   roster->check_sane_against(*marking);
2320   manifest_id expected_mid, actual_mid;
2321   get_revision_manifest(ros_id, expected_mid);
2322   calculate_ident(*roster, actual_mid);
2323   I(expected_mid == actual_mid);
2324 
2325   // const'ify the objects, to save them and pass them out
2326   cr.first = roster;
2327   cr.second = marking;
2328   imp->roster_cache.insert_clean(ros_id, cr);
2329 }
2330 
2331 
2332 void
drop(id const & ident,string const & table)2333 database_impl::drop(id const & ident,
2334                     string const & table)
2335 {
2336   string drop = "DELETE FROM " + table + " WHERE id = ?";
2337   execute(query(drop) % blob(ident()));
2338 }
2339 
2340 // ------------------------------------------------------------
2341 // --                                                        --
2342 // --              public interface follows                  --
2343 // --                                                        --
2344 // ------------------------------------------------------------
2345 
2346 bool
file_version_exists(file_id const & id)2347 database::file_version_exists(file_id const & id)
2348 {
2349   return delta_exists(id.inner(), "file_deltas")
2350     || imp->file_or_manifest_base_exists(id.inner(), "files");
2351 }
2352 
2353 bool
file_size_exists(file_id const & ident)2354 database::file_size_exists(file_id const & ident)
2355 {
2356   return imp->table_has_entry(ident.inner(), "id", "file_sizes");
2357 }
2358 
2359 bool
roster_version_exists(revision_id const & id)2360 database::roster_version_exists(revision_id const & id)
2361 {
2362   return delta_exists(id.inner(), "roster_deltas")
2363     || imp->roster_base_available(id);
2364 }
2365 
2366 bool
revision_exists(revision_id const & id)2367 database::revision_exists(revision_id const & id)
2368 {
2369   results res;
2370   query q("SELECT id FROM revisions WHERE id = ?");
2371   imp->fetch(res, one_col, any_rows, q % blob(id.inner()()));
2372   I(res.size() <= 1);
2373   return res.size() == 1;
2374 }
2375 
2376 void
get_file_ids(set<file_id> & ids)2377 database::get_file_ids(set<file_id> & ids)
2378 {
2379   ids.clear();
2380   set<id> tmp;
2381   imp->get_ids("files", tmp);
2382   imp->get_ids("file_deltas", tmp);
2383   add_decoration_to_container(tmp, ids);
2384 }
2385 
2386 void
get_revision_ids(set<revision_id> & ids)2387 database::get_revision_ids(set<revision_id> & ids)
2388 {
2389   ids.clear();
2390   set<id> tmp;
2391   imp->get_ids("revisions", tmp);
2392   add_decoration_to_container(tmp, ids);
2393 }
2394 
2395 void
get_roster_ids(set<revision_id> & ids)2396 database::get_roster_ids(set<revision_id> & ids)
2397 {
2398   ids.clear();
2399   set<id> tmp;
2400   imp->get_ids("rosters", tmp);
2401   add_decoration_to_container(tmp, ids);
2402   imp->get_ids("roster_deltas", tmp);
2403   add_decoration_to_container(tmp, ids);
2404 }
2405 
2406 void
get_file_version(file_id const & id,file_data & dat)2407 database::get_file_version(file_id const & id,
2408                            file_data & dat)
2409 {
2410   data tmp;
2411   imp->get_version(id.inner(), tmp, "files", "file_deltas");
2412   dat = file_data(tmp);
2413 }
2414 
2415 void
get_file_size(file_id const & ident,file_size & size)2416 database::get_file_size(file_id const & ident,
2417                         file_size & size)
2418 {
2419   results res;
2420   query q("SELECT size FROM file_sizes WHERE id = ?");
2421   imp->fetch(res, one_col, one_row, q % blob(ident.inner()()));
2422   I(!res.empty());
2423   size = lexical_cast<u64>(res[0][0]);
2424 }
2425 
2426 void
get_file_sizes(roster_t const & roster,map<file_id,file_size> & sizes)2427 database::get_file_sizes(roster_t const & roster,
2428                          map<file_id, file_size> & sizes)
2429 {
2430   sizes.clear();
2431 
2432   vector<file_id> all_file_ids;
2433   node_map const & nodes = roster.all_nodes();
2434   for (node_map::const_iterator i = nodes.begin(); i != nodes.end(); ++i)
2435     {
2436       node_id nid = i->first;
2437       if (!is_file_t(i->second))
2438         continue;
2439 
2440       file_t file = downcast_to_file_t(i->second);
2441       // filtering out already existing file ids make the whole
2442       // process slower than "accidentially" querying a double
2443       // file id later twice or thrice
2444       all_file_ids.push_back(file->content);
2445     }
2446 
2447   // The overall runtime does not improve significantly after ~15, so
2448   // 20 is a good guess. Note that large numbers over 1000 might even
2449   // lead to sqlite errors like "too many SQL max_variables"
2450   size_t max_variables = 20;
2451   for (size_t i = 0; i < all_file_ids.size(); )
2452     {
2453       results res;
2454 
2455       size_t variables = all_file_ids.size() - i > max_variables
2456         ? max_variables
2457         : all_file_ids.size() - i;
2458       I(variables > 0);
2459 
2460       query q;
2461       string placeholders = "";
2462       for (size_t j=i; j< i + variables; ++j)
2463         {
2464           placeholders += "?,";
2465           q.args.push_back(blob(all_file_ids[j].inner()()));
2466         }
2467 
2468       q.sql_cmd = "SELECT id, size FROM file_sizes "
2469                   "WHERE id IN(" + placeholders +"null)";
2470 
2471       imp->fetch(res, 2, any_rows, q);
2472       I(!res.empty());
2473 
2474       for (size_t k=0; k<res.size(); ++k)
2475         {
2476           file_id ident(res[k][0], origin::database);
2477           u64 size = lexical_cast<u64>(res[k][1]);
2478           sizes.insert(make_pair(ident, size));
2479         }
2480 
2481       i+= variables;
2482     }
2483 }
2484 
2485 void
get_manifest_version(manifest_id const & id,manifest_data & dat)2486 database::get_manifest_version(manifest_id const & id,
2487                                manifest_data & dat)
2488 {
2489   data tmp;
2490   imp->get_version(id.inner(), tmp, "manifests", "manifest_deltas");
2491   dat = manifest_data(tmp);
2492 }
2493 
2494 void
put_file(file_id const & id,file_data const & dat)2495 database::put_file(file_id const & id,
2496                    file_data const & dat)
2497 {
2498   if (file_version_exists(id))
2499     {
2500       L(FL("file version '%s' already exists in db") % id);
2501       return;
2502     }
2503   imp->schedule_delayed_file(id, dat);
2504   imp->put_file_size(id, dat);
2505 }
2506 
2507 void
put_file_version(file_id const & old_id,file_id const & new_id,file_delta const & del)2508 database::put_file_version(file_id const & old_id,
2509                            file_id const & new_id,
2510                            file_delta const & del)
2511 {
2512   I(!(old_id == new_id));
2513 
2514   if (!file_version_exists(old_id))
2515     {
2516       W(F("file preimage '%s' missing in db") % old_id);
2517       W(F("dropping delta '%s' -> '%s'") % old_id % new_id);
2518       return;
2519     }
2520 
2521   var_value delta_direction("reverse");
2522   var_key key(var_domain("database"), var_name("delta-direction"));
2523   if (var_exists(key))
2524     {
2525       get_var(key, delta_direction);
2526     }
2527   bool make_reverse_deltas(delta_direction() == "reverse" ||
2528                            delta_direction() == "both");
2529   bool make_forward_deltas(delta_direction() == "forward" ||
2530                            delta_direction() == "both");
2531   if (!make_reverse_deltas && !make_forward_deltas)
2532     {
2533       W(F("unknown delta direction '%s'; assuming 'reverse'. Valid "
2534           "values are 'reverse', 'forward', 'both'.") % delta_direction);
2535       make_reverse_deltas = true;
2536     }
2537 
2538   file_data old_data, new_data;
2539   get_file_version(old_id, old_data);
2540   {
2541     data tmp;
2542     patch(old_data.inner(), del.inner(), tmp);
2543     new_data = file_data(tmp);
2544   }
2545 
2546   file_delta reverse_delta;
2547   {
2548     string tmp;
2549     invert_xdelta(old_data.inner()(), del.inner()(), tmp);
2550     reverse_delta = file_delta(tmp, origin::database);
2551     data old_tmp;
2552     patch(new_data.inner(), reverse_delta.inner(), old_tmp);
2553     // We already have the real old data, so compare the
2554     // reconstruction to that directly instead of hashing
2555     // the reconstruction and comparing hashes.
2556     I(old_tmp == old_data.inner());
2557   }
2558 
2559   transaction_guard guard(*this);
2560   if (make_reverse_deltas)
2561     {
2562       if (!file_or_manifest_base_exists(new_id, "files"))
2563         {
2564           imp->schedule_delayed_file(new_id, new_data);
2565           imp->put_file_size(new_id, new_data);
2566         }
2567       if (!imp->delta_exists(old_id, new_id, "file_deltas"))
2568         {
2569           put_file_delta(old_id, new_id, reverse_delta);
2570         }
2571     }
2572   if (make_forward_deltas)
2573     {
2574       if (!file_or_manifest_base_exists(new_id, "files"))
2575         {
2576           imp->put_file_size(new_id, new_data);
2577         }
2578       if (!imp->delta_exists(new_id, old_id, "file_deltas"))
2579         {
2580           put_file_delta(new_id, old_id, del);
2581         }
2582     }
2583   else
2584     {
2585       imp->drop(new_id.inner(), "file_deltas");
2586     }
2587   if (file_or_manifest_base_exists(old_id, "files"))
2588     {
2589       // descendent of a head version replaces the head, therefore old head
2590       // must be disposed of
2591       if (delta_exists(old_id.inner(), "file_deltas"))
2592         imp->drop_or_cancel_file(old_id);
2593     }
2594   guard.commit();
2595 }
2596 
2597 void
get_arbitrary_file_delta(file_id const & src_id,file_id const & dst_id,file_delta & del)2598 database::get_arbitrary_file_delta(file_id const & src_id,
2599                                    file_id const & dst_id,
2600                                    file_delta & del)
2601 {
2602   delta dtmp;
2603   // Deltas stored in the database go from base -> id.
2604   results res;
2605   query q1("SELECT delta FROM file_deltas "
2606            "WHERE base = ? AND id = ?");
2607   imp->fetch(res, one_col, any_rows,
2608              q1 % blob(src_id.inner()()) % blob(dst_id.inner()()));
2609 
2610   if (!res.empty())
2611     {
2612       // Exact hit: a plain delta from src -> dst.
2613       gzip<delta> del_packed(res[0][0], origin::database);
2614       decode_gzip(del_packed, dtmp);
2615       del = file_delta(dtmp);
2616       return;
2617     }
2618 
2619   query q2("SELECT delta FROM file_deltas "
2620            "WHERE base = ? AND id = ?");
2621   imp->fetch(res, one_col, any_rows,
2622              q2 % blob(dst_id.inner()()) % blob(src_id.inner()()));
2623 
2624   if (!res.empty())
2625     {
2626       // We have a delta from dst -> src; we need to
2627       // invert this to a delta from src -> dst.
2628       gzip<delta> del_packed(res[0][0], origin::database);
2629       decode_gzip(del_packed, dtmp);
2630       string fwd_delta;
2631       file_data dst;
2632       get_file_version(dst_id, dst);
2633       invert_xdelta(dst.inner()(), dtmp(), fwd_delta);
2634       del = file_delta(fwd_delta, origin::database);
2635       return;
2636     }
2637 
2638   // No deltas of use; just load both versions and diff.
2639   file_data fd1, fd2;
2640   get_file_version(src_id, fd1);
2641   get_file_version(dst_id, fd2);
2642   diff(fd1.inner(), fd2.inner(), dtmp);
2643   del = file_delta(dtmp);
2644 }
2645 
2646 
2647 void
get_forward_ancestry(rev_ancestry_map & graph)2648 database::get_forward_ancestry(rev_ancestry_map & graph)
2649 {
2650   // share some storage
2651   id::symtab id_syms;
2652 
2653   results res;
2654   graph.clear();
2655   imp->fetch(res, 2, any_rows,
2656              query("SELECT parent,child FROM revision_ancestry"));
2657   for (size_t i = 0; i < res.size(); ++i)
2658     graph.insert(make_pair(revision_id(res[i][0], origin::database),
2659                            revision_id(res[i][1], origin::database)));
2660 }
2661 
2662 void
get_reverse_ancestry(rev_ancestry_map & graph)2663 database::get_reverse_ancestry(rev_ancestry_map & graph)
2664 {
2665   // share some storage
2666   id::symtab id_syms;
2667 
2668   results res;
2669   graph.clear();
2670   imp->fetch(res, 2, any_rows,
2671              query("SELECT child,parent FROM revision_ancestry"));
2672   for (size_t i = 0; i < res.size(); ++i)
2673     graph.insert(make_pair(revision_id(res[i][0], origin::database),
2674                            revision_id(res[i][1], origin::database)));
2675 }
2676 
2677 void
get_revision_parents(revision_id const & id,set<revision_id> & parents)2678 database::get_revision_parents(revision_id const & id,
2679                                set<revision_id> & parents)
2680 {
2681   I(!null_id(id));
2682   parent_id_map::iterator i = imp->parent_cache.find(id);
2683   if (i == imp->parent_cache.end())
2684     {
2685       results res;
2686       parents.clear();
2687       imp->fetch(res, one_col, any_rows,
2688                  query("SELECT parent FROM revision_ancestry WHERE child = ?")
2689                  % blob(id.inner()()));
2690       for (size_t i = 0; i < res.size(); ++i)
2691         parents.insert(revision_id(res[i][0], origin::database));
2692 
2693       imp->parent_cache.insert(make_pair(id, parents));
2694     }
2695   else
2696     {
2697       parents = i->second;
2698     }
2699 }
2700 
2701 void
get_revision_children(revision_id const & id,set<revision_id> & children)2702 database::get_revision_children(revision_id const & id,
2703                                 set<revision_id> & children)
2704 {
2705   results res;
2706   children.clear();
2707   imp->fetch(res, one_col, any_rows,
2708              query("SELECT child FROM revision_ancestry WHERE parent = ?")
2709         % blob(id.inner()()));
2710   for (size_t i = 0; i < res.size(); ++i)
2711     children.insert(revision_id(res[i][0], origin::database));
2712 }
2713 
2714 void
get_leaves(set<revision_id> & leaves)2715 database::get_leaves(set<revision_id> & leaves)
2716 {
2717   results res;
2718   leaves.clear();
2719   imp->fetch(res, one_col, any_rows,
2720              query("SELECT revisions.id FROM revisions "
2721                    "LEFT JOIN revision_ancestry "
2722                    "ON revisions.id = revision_ancestry.parent "
2723                    "WHERE revision_ancestry.child IS null"));
2724   for (size_t i = 0; i < res.size(); ++i)
2725     leaves.insert(revision_id(res[i][0], origin::database));
2726 }
2727 
2728 
2729 void
get_revision_manifest(revision_id const & rid,manifest_id & mid)2730 database::get_revision_manifest(revision_id const & rid,
2731                                manifest_id & mid)
2732 {
2733   revision_t rev;
2734   get_revision(rid, rev);
2735   mid = rev.new_manifest;
2736 }
2737 
2738 void
get_common_ancestors(std::set<revision_id> const & revs,std::set<revision_id> & common_ancestors)2739 database::get_common_ancestors(std::set<revision_id> const & revs,
2740                                std::set<revision_id> & common_ancestors)
2741 {
2742   set<revision_id> ancestors, all_common_ancestors;
2743   vector<revision_id> frontier;
2744   for (set<revision_id>::const_iterator i = revs.begin();
2745        i != revs.end(); ++i)
2746     {
2747       I(revision_exists(*i));
2748       ancestors.clear();
2749       ancestors.insert(*i);
2750       frontier.push_back(*i);
2751       while (!frontier.empty())
2752         {
2753           revision_id rid = frontier.back();
2754           frontier.pop_back();
2755           if(!null_id(rid))
2756             {
2757               set<revision_id> parents;
2758               get_revision_parents(rid, parents);
2759               for (set<revision_id>::const_iterator i = parents.begin();
2760                    i != parents.end(); ++i)
2761                 {
2762                   if (ancestors.find(*i) == ancestors.end())
2763                     {
2764                       frontier.push_back(*i);
2765                       ancestors.insert(*i);
2766                     }
2767                 }
2768             }
2769         }
2770       if (all_common_ancestors.empty())
2771         all_common_ancestors = ancestors;
2772       else
2773         {
2774           set<revision_id> common;
2775           set_intersection(ancestors.begin(), ancestors.end(),
2776                          all_common_ancestors.begin(), all_common_ancestors.end(),
2777                          inserter(common, common.begin()));
2778           all_common_ancestors = common;
2779         }
2780     }
2781 
2782   for (set<revision_id>::const_iterator i = all_common_ancestors.begin();
2783        i != all_common_ancestors.end(); ++i)
2784     {
2785       // null id's here come from the empty parents of root revisions.
2786       // these should not be considered as common ancestors and are skipped.
2787       if (null_id(*i)) continue;
2788       common_ancestors.insert(*i);
2789     }
2790 }
2791 
2792 bool
is_a_ancestor_of_b(revision_id const & ancestor,revision_id const & child)2793 database::is_a_ancestor_of_b(revision_id const & ancestor,
2794                              revision_id const & child)
2795 {
2796   if (ancestor == child)
2797     return false;
2798 
2799   rev_height anc_height;
2800   rev_height child_height;
2801   get_rev_height(ancestor, anc_height);
2802   get_rev_height(child, child_height);
2803 
2804   if (anc_height > child_height)
2805     return false;
2806 
2807 
2808   vector<revision_id> todo;
2809   todo.push_back(ancestor);
2810   set<revision_id> seen;
2811   while (!todo.empty())
2812     {
2813       revision_id anc = todo.back();
2814       todo.pop_back();
2815       set<revision_id> anc_children;
2816       get_revision_children(anc, anc_children);
2817       for (set<revision_id>::const_iterator i = anc_children.begin();
2818            i != anc_children.end(); ++i)
2819         {
2820           if (*i == child)
2821             return true;
2822           else if (seen.find(*i) != seen.end())
2823             continue;
2824           else
2825             {
2826               get_rev_height(*i, anc_height);
2827               if (child_height > anc_height)
2828                 {
2829                   seen.insert(*i);
2830                   todo.push_back(*i);
2831                 }
2832             }
2833         }
2834     }
2835   return false;
2836 }
2837 
2838 void
get_revision(revision_id const & id,revision_t & rev)2839 database::get_revision(revision_id const & id,
2840                        revision_t & rev)
2841 {
2842   revision_data d;
2843   get_revision(id, d);
2844   read_revision(d, rev);
2845 }
2846 
2847 void
get_revision(revision_id const & id,revision_data & dat)2848 database::get_revision(revision_id const & id,
2849                        revision_data & dat)
2850 {
2851   I(!null_id(id));
2852   results res;
2853   imp->fetch(res, one_col, one_row,
2854              query("SELECT data FROM revisions WHERE id = ?")
2855              % blob(id.inner()()));
2856   I(res.size() == 1);
2857   gzip<data> gzdata(res[0][0], origin::database);
2858   data rdat;
2859   decode_gzip(gzdata,rdat);
2860 
2861   // verify that we got a revision with the right id
2862   {
2863     revision_id tmp;
2864     calculate_ident(revision_data(rdat), tmp);
2865     E(id == tmp, origin::database,
2866       F("revision does not match hash"));
2867   }
2868 
2869   dat = revision_data(rdat);
2870 }
2871 
2872 void
get_rev_height(revision_id const & id,rev_height & height)2873 database::get_rev_height(revision_id const & id,
2874                          rev_height & height)
2875 {
2876   if (null_id(id))
2877     {
2878       height = rev_height::root_height();
2879       return;
2880     }
2881 
2882   height_map::const_iterator i = imp->height_cache.find(id);
2883   if (i == imp->height_cache.end())
2884     {
2885       results res;
2886       imp->fetch(res, one_col, one_row,
2887                  query("SELECT height FROM heights WHERE revision = ?")
2888                  % blob(id.inner()()));
2889 
2890       I(res.size() == 1);
2891 
2892       height = rev_height(res[0][0]);
2893       imp->height_cache.insert(make_pair(id, height));
2894     }
2895   else
2896     {
2897       height = i->second;
2898     }
2899 
2900   I(height.valid());
2901 }
2902 
2903 void
put_rev_height(revision_id const & id,rev_height const & height)2904 database::put_rev_height(revision_id const & id,
2905                          rev_height const & height)
2906 {
2907   I(!null_id(id));
2908   I(revision_exists(id));
2909   I(height.valid());
2910 
2911   imp->height_cache.erase(id);
2912 
2913   imp->execute(query("INSERT INTO heights VALUES(?, ?)")
2914                % blob(id.inner()())
2915                % blob(height()));
2916 }
2917 
2918 bool
has_rev_height(rev_height const & height)2919 database::has_rev_height(rev_height const & height)
2920 {
2921   results res;
2922   imp->fetch(res, one_col, any_rows,
2923              query("SELECT height FROM heights WHERE height = ?")
2924              % blob(height()));
2925   I((res.size() == 1) || (res.empty()));
2926   return res.size() == 1;
2927 }
2928 
2929 void
deltify_revision(revision_id const & rid)2930 database::deltify_revision(revision_id const & rid)
2931 {
2932   transaction_guard guard(*this);
2933   revision_t rev;
2934   MM(rev);
2935   MM(rid);
2936   get_revision(rid, rev);
2937   // Make sure that all parent revs have their files replaced with deltas
2938   // from this rev's files.
2939   {
2940     for (edge_map::const_iterator i = rev.edges.begin();
2941          i != rev.edges.end(); ++i)
2942       {
2943         for (map<file_path, pair<file_id, file_id> >::const_iterator
2944                j = edge_changes(i).deltas_applied.begin();
2945              j != edge_changes(i).deltas_applied.end(); ++j)
2946           {
2947             file_id old_id(delta_entry_src(j));
2948             file_id new_id(delta_entry_dst(j));
2949             // if not yet deltified
2950             if (file_or_manifest_base_exists(old_id, "files") &&
2951                 file_version_exists(new_id))
2952               {
2953                 file_data old_data;
2954                 file_data new_data;
2955                 get_file_version(old_id, old_data);
2956                 get_file_version(new_id, new_data);
2957                 delta delt;
2958                 diff(old_data.inner(), new_data.inner(), delt);
2959                 file_delta del(delt);
2960                 imp->drop_or_cancel_file(new_id);
2961                 imp->drop(new_id.inner(), "file_deltas");
2962                 put_file_version(old_id, new_id, del);
2963               }
2964           }
2965       }
2966   }
2967   guard.commit();
2968 }
2969 
2970 
2971 bool
put_revision(revision_id const & new_id,revision_t const & rev)2972 database::put_revision(revision_id const & new_id,
2973                        revision_t const & rev)
2974 {
2975   MM(new_id);
2976   MM(rev);
2977 
2978   I(!null_id(new_id));
2979 
2980   if (revision_exists(new_id))
2981     {
2982       if (global_sanity.debug_p())
2983         L(FL("revision '%s' already exists in db") % new_id);
2984       return false;
2985     }
2986 
2987   I(rev.made_for == made_for_database);
2988   rev.check_sane();
2989 
2990   // Phase 1: confirm the revision makes sense, and the required files
2991   // actually exist
2992   for (edge_map::const_iterator i = rev.edges.begin();
2993        i != rev.edges.end(); ++i)
2994     {
2995       if (!edge_old_revision(i).inner()().empty()
2996           && !revision_exists(edge_old_revision(i)))
2997         {
2998           W(F("missing prerequisite revision %s")
2999             % edge_old_revision(i));
3000           W(F("dropping revision %s") % new_id);
3001           return false;
3002         }
3003 
3004       for (map<file_path, file_id>::const_iterator a
3005              = edge_changes(i).files_added.begin();
3006            a != edge_changes(i).files_added.end(); ++a)
3007         {
3008           if (! file_version_exists(a->second))
3009             {
3010               W(F("missing prerequisite file %s") % a->second);
3011               W(F("dropping revision %s") % new_id);
3012               return false;
3013             }
3014         }
3015 
3016       for (map<file_path, pair<file_id, file_id> >::const_iterator d
3017              = edge_changes(i).deltas_applied.begin();
3018            d != edge_changes(i).deltas_applied.end(); ++d)
3019         {
3020           I(!delta_entry_src(d).inner()().empty());
3021           I(!delta_entry_dst(d).inner()().empty());
3022 
3023           if (! file_version_exists(delta_entry_src(d)))
3024             {
3025               W(F("missing prerequisite file pre-delta %s")
3026                 % delta_entry_src(d));
3027               W(F("dropping revision %s") % new_id);
3028               return false;
3029             }
3030 
3031           if (! file_version_exists(delta_entry_dst(d)))
3032             {
3033               W(F("missing prerequisite file post-delta %s")
3034                 % delta_entry_dst(d));
3035               W(F("dropping revision %s") % new_id);
3036               return false;
3037             }
3038         }
3039     }
3040 
3041   transaction_guard guard(*this);
3042 
3043   // Phase 2: Write the revision data (inside a transaction)
3044 
3045   revision_data d;
3046   write_revision(rev, d);
3047   gzip<data> d_packed;
3048   encode_gzip(d.inner(), d_packed);
3049   imp->execute(query("INSERT INTO revisions VALUES(?, ?)")
3050                % blob(new_id.inner()())
3051                % blob(d_packed()));
3052 
3053   for (edge_map::const_iterator e = rev.edges.begin();
3054        e != rev.edges.end(); ++e)
3055     {
3056       imp->execute(query("INSERT INTO revision_ancestry VALUES(?, ?)")
3057                    % blob(edge_old_revision(e).inner()())
3058                    % blob(new_id.inner()()));
3059     }
3060   // We don't have to clear out the child's entry in the parent_cache,
3061   // because the child did not exist before this function was called, so
3062   // it can't be in the parent_cache already.
3063 
3064   // Phase 3: Construct and write the roster (which also checks the manifest
3065   // id as it goes), but only if the roster does not already exist in the db
3066   // (i.e. because it was left over by a kill_rev_locally)
3067   // FIXME: there is no knowledge yet on speed implications for commands which
3068   // put a lot of revisions in a row (i.e. tailor or cvs_import)!
3069 
3070   if (!roster_version_exists(new_id))
3071     {
3072       put_roster_for_revision(new_id, rev);
3073     }
3074   else
3075     {
3076       L(FL("roster for revision '%s' already exists in db") % new_id);
3077     }
3078 
3079   // Phase 4: rewrite any files that need deltas added
3080 
3081   deltify_revision(new_id);
3082 
3083   // Phase 5: determine the revision height
3084 
3085   put_height_for_revision(new_id, rev);
3086 
3087   // Finally, commit.
3088 
3089   guard.commit();
3090   return true;
3091 }
3092 
3093 void
put_height_for_revision(revision_id const & new_id,revision_t const & rev)3094 database::put_height_for_revision(revision_id const & new_id,
3095                                   revision_t const & rev)
3096 {
3097   I(!null_id(new_id));
3098 
3099   rev_height highest_parent;
3100   // we always branch off the highest parent ...
3101   for (edge_map::const_iterator e = rev.edges.begin();
3102        e != rev.edges.end(); ++e)
3103     {
3104       rev_height parent; MM(parent);
3105       get_rev_height(edge_old_revision(e), parent);
3106       if (parent > highest_parent)
3107       {
3108         highest_parent = parent;
3109       }
3110     }
3111 
3112   // ... then find the first unused child
3113   u32 childnr(0);
3114   rev_height candidate; MM(candidate);
3115   while(true)
3116     {
3117       candidate = highest_parent.child_height(childnr);
3118       if (!has_rev_height(candidate))
3119         {
3120           break;
3121         }
3122       I(childnr < std::numeric_limits<u32>::max());
3123       ++childnr;
3124     }
3125   put_rev_height(new_id, candidate);
3126 }
3127 
3128 void
put_file_sizes_for_revision(revision_t const & rev)3129 database::put_file_sizes_for_revision(revision_t const & rev)
3130 {
3131   for (edge_map::const_iterator i = rev.edges.begin(); i != rev.edges.end(); ++i)
3132     {
3133       cset const & cs = edge_changes(*i);
3134 
3135       for (map<file_path, file_id>::const_iterator i = cs.files_added.begin();
3136            i != cs.files_added.end(); ++i)
3137         {
3138           file_data dat;
3139           get_file_version(i->second, dat);
3140           imp->put_file_size(i->second, dat);
3141         }
3142 
3143       for (map<file_path, pair<file_id, file_id> >::const_iterator
3144            i = cs.deltas_applied.begin(); i != cs.deltas_applied.end(); ++i)
3145         {
3146           file_data dat;
3147           get_file_version(i->second.second, dat);
3148           imp->put_file_size(i->second.second, dat);
3149         }
3150     }
3151 }
3152 
3153 void
put_roster_for_revision(revision_id const & new_id,revision_t const & rev)3154 database::put_roster_for_revision(revision_id const & new_id,
3155                                   revision_t const & rev)
3156 {
3157   // Construct, the roster, sanity-check the manifest id, and then write it
3158   // to the db
3159   shared_ptr<roster_t> ros_writeable(new roster_t); MM(*ros_writeable);
3160   shared_ptr<marking_map> mm_writeable(new marking_map); MM(*mm_writeable);
3161   manifest_id roster_manifest_id;
3162   MM(roster_manifest_id);
3163   make_roster_for_revision(*this, rev, new_id, *ros_writeable, *mm_writeable);
3164   calculate_ident(*ros_writeable, roster_manifest_id, false);
3165   E(rev.new_manifest == roster_manifest_id, rev.made_from,
3166     F("revision contains incorrect manifest_id"));
3167   // const'ify the objects, suitable for caching etc.
3168   roster_t_cp ros = ros_writeable;
3169   marking_map_cp mm = mm_writeable;
3170   put_roster(new_id, rev, ros, mm);
3171 }
3172 
3173 bool
put_revision(revision_id const & new_id,revision_data const & dat)3174 database::put_revision(revision_id const & new_id,
3175                        revision_data const & dat)
3176 {
3177   revision_t rev;
3178   read_revision(dat, rev);
3179   return put_revision(new_id, rev);
3180 }
3181 
3182 
3183 void
delete_existing_revs_and_certs()3184 database::delete_existing_revs_and_certs()
3185 {
3186   imp->execute(query("DELETE FROM revisions"));
3187   imp->execute(query("DELETE FROM revision_ancestry"));
3188   imp->execute(query("DELETE FROM revision_certs"));
3189   imp->execute(query("DELETE FROM branch_leaves"));
3190 }
3191 
3192 void
delete_existing_manifests()3193 database::delete_existing_manifests()
3194 {
3195   imp->execute(query("DELETE FROM manifests"));
3196   imp->execute(query("DELETE FROM manifest_deltas"));
3197 }
3198 
3199 void
delete_existing_rosters()3200 database::delete_existing_rosters()
3201 {
3202   imp->execute(query("DELETE FROM rosters"));
3203   imp->execute(query("DELETE FROM roster_deltas"));
3204   imp->execute(query("DELETE FROM next_roster_node_number"));
3205 }
3206 
3207 void
delete_existing_heights()3208 database::delete_existing_heights()
3209 {
3210   imp->execute(query("DELETE FROM heights"));
3211 }
3212 
3213 void
delete_existing_branch_leaves()3214 database::delete_existing_branch_leaves()
3215 {
3216   imp->execute(query("DELETE FROM branch_leaves"));
3217 }
3218 
3219 void
delete_existing_file_sizes()3220 database::delete_existing_file_sizes()
3221 {
3222   imp->execute(query("DELETE FROM file_sizes"));
3223 }
3224 
3225 /// Deletes one revision from the local database.
3226 /// @see kill_rev_locally
3227 void
delete_existing_rev_and_certs(revision_id const & rid)3228 database::delete_existing_rev_and_certs(revision_id const & rid)
3229 {
3230   transaction_guard guard (*this);
3231 
3232   // Check that the revision exists and doesn't have any children.
3233   I(revision_exists(rid));
3234   set<revision_id> children;
3235   get_revision_children(rid, children);
3236   I(children.empty());
3237 
3238 
3239   L(FL("Killing revision %s locally") % rid);
3240 
3241   // Kill the certs, ancestry, and revision.
3242   imp->execute(query("DELETE from revision_certs WHERE revision_id = ?")
3243                % blob(rid.inner()()));
3244   {
3245     results res;
3246     imp->fetch(res, one_col, any_rows,
3247                query("SELECT branch FROM branch_leaves where revision_id = ?")
3248                % blob(rid.inner()()));
3249     for (results::const_iterator i = res.begin(); i != res.end(); ++i)
3250       {
3251         recalc_branch_leaves(cert_value((*i)[0], origin::database));
3252       }
3253   }
3254   imp->cert_stamper.note_change();
3255 
3256   imp->execute(query("DELETE from revision_ancestry WHERE child = ?")
3257                % blob(rid.inner()()));
3258 
3259   imp->execute(query("DELETE from heights WHERE revision = ?")
3260                % blob(rid.inner()()));
3261 
3262   imp->execute(query("DELETE from revisions WHERE id = ?")
3263                % blob(rid.inner()()));
3264 
3265   guard.commit();
3266 }
3267 
3268 void
compute_branch_leaves(cert_value const & branch_name,set<revision_id> & revs)3269 database::compute_branch_leaves(cert_value const & branch_name, set<revision_id> & revs)
3270 {
3271   imp->execute(query("DELETE FROM branch_leaves WHERE branch = ?") % blob(branch_name()));
3272   get_revisions_with_cert(cert_name("branch"), branch_name, revs);
3273   erase_ancestors(*this, revs);
3274 }
3275 
3276 void
recalc_branch_leaves(cert_value const & branch_name)3277 database::recalc_branch_leaves(cert_value const & branch_name)
3278 {
3279   imp->execute(query("DELETE FROM branch_leaves WHERE branch = ?") % blob(branch_name()));
3280   set<revision_id> revs;
3281   compute_branch_leaves(branch_name, revs);
3282   for (set<revision_id>::const_iterator i = revs.begin(); i != revs.end(); ++i)
3283     {
3284       imp->execute(query("INSERT INTO branch_leaves (branch, revision_id) "
3285                          "VALUES (?, ?)") % blob(branch_name()) % blob((*i).inner()()));
3286     }
3287 }
3288 
delete_certs_locally(revision_id const & rev,cert_name const & name)3289 void database::delete_certs_locally(revision_id const & rev,
3290                                     cert_name const & name)
3291 {
3292   imp->execute(query("DELETE FROM revision_certs WHERE revision_id = ? AND name = ?")
3293                % blob(rev.inner()()) % text(name()));
3294   imp->cert_stamper.note_change();
3295 }
delete_certs_locally(revision_id const & rev,cert_name const & name,cert_value const & value)3296 void database::delete_certs_locally(revision_id const & rev,
3297                                     cert_name const & name,
3298                                     cert_value const & value)
3299 {
3300   imp->execute(query("DELETE FROM revision_certs WHERE revision_id = ? AND name = ? AND value = ?")
3301                % blob(rev.inner()()) % text(name()) % blob(value()));
3302   imp->cert_stamper.note_change();
3303 }
3304 
3305 // crypto key management
3306 
3307 void
get_key_ids(vector<key_id> & pubkeys)3308 database::get_key_ids(vector<key_id> & pubkeys)
3309 {
3310   pubkeys.clear();
3311   results res;
3312 
3313   imp->fetch(res, one_col, any_rows, query("SELECT id FROM public_keys"));
3314 
3315   for (size_t i = 0; i < res.size(); ++i)
3316     pubkeys.push_back(key_id(res[i][0], origin::database));
3317 }
3318 
3319 void
get_keys(string const & table,vector<key_name> & keys)3320 database_impl::get_keys(string const & table, vector<key_name> & keys)
3321 {
3322   keys.clear();
3323   results res;
3324   fetch(res, one_col, any_rows, query("SELECT id FROM " + table));
3325   for (size_t i = 0; i < res.size(); ++i)
3326     keys.push_back(key_name(res[i][0], origin::database));
3327 }
3328 
3329 void
get_public_keys(vector<key_name> & keys)3330 database::get_public_keys(vector<key_name> & keys)
3331 {
3332   imp->get_keys("public_keys", keys);
3333 }
3334 
3335 bool
public_key_exists(key_id const & hash)3336 database::public_key_exists(key_id const & hash)
3337 {
3338   MM(hash);
3339   results res;
3340   imp->fetch(res, one_col, any_rows,
3341              query("SELECT id FROM public_keys WHERE id = ?")
3342              % blob(hash.inner()()));
3343   I((res.size() == 1) || (res.empty()));
3344   if (res.size() == 1)
3345     return true;
3346   return false;
3347 }
3348 
3349 bool
public_key_exists(key_name const & id)3350 database::public_key_exists(key_name const & id)
3351 {
3352   MM(id);
3353   results res;
3354   imp->fetch(res, one_col, any_rows,
3355              query("SELECT id FROM public_keys WHERE name = ?")
3356              % text(id()));
3357   I((res.size() == 1) || (res.empty()));
3358   if (res.size() == 1)
3359     return true;
3360   return false;
3361 }
3362 
3363 void
get_pubkey(key_id const & hash,key_name & id,rsa_pub_key & pub)3364 database::get_pubkey(key_id const & hash,
3365                      key_name & id,
3366                      rsa_pub_key & pub)
3367 {
3368   MM(hash);
3369   results res;
3370   imp->fetch(res, 2, one_row,
3371              query("SELECT name, keydata FROM public_keys WHERE id = ?")
3372              % blob(hash.inner()()));
3373   id = key_name(res[0][0], origin::database);
3374   pub = rsa_pub_key(res[0][1], origin::database);
3375 }
3376 
3377 void
get_key(key_id const & pub_id,rsa_pub_key & pub)3378 database::get_key(key_id const & pub_id,
3379                   rsa_pub_key & pub)
3380 {
3381   MM(pub_id);
3382   results res;
3383   imp->fetch(res, one_col, one_row,
3384              query("SELECT keydata FROM public_keys WHERE id = ?")
3385              % blob(pub_id.inner()()));
3386   pub = rsa_pub_key(res[0][0], origin::database);
3387 }
3388 
3389 bool
put_key(key_name const & pub_id,rsa_pub_key const & pub)3390 database::put_key(key_name const & pub_id,
3391                   rsa_pub_key const & pub)
3392 {
3393   MM(pub_id);
3394   MM(pub);
3395   key_id thash;
3396   key_hash_code(pub_id, pub, thash);
3397 
3398   if (public_key_exists(thash))
3399     {
3400       L(FL("skipping existing public key %s") % pub_id);
3401       return false;
3402     }
3403 
3404   L(FL("putting public key %s") % pub_id);
3405 
3406   imp->execute(query("INSERT INTO public_keys(id, name, keydata) VALUES(?, ?, ?)")
3407                % blob(thash.inner()())
3408                % text(pub_id())
3409                % blob(pub()));
3410 
3411   return true;
3412 }
3413 
3414 void
delete_public_key(key_id const & pub_id)3415 database::delete_public_key(key_id const & pub_id)
3416 {
3417   MM(pub_id);
3418   imp->execute(query("DELETE FROM public_keys WHERE id = ?")
3419                % blob(pub_id.inner()()));
3420 }
3421 
3422 void
encrypt_rsa(key_id const & pub_id,string const & plaintext,rsa_oaep_sha_data & ciphertext)3423 database::encrypt_rsa(key_id const & pub_id,
3424                       string const & plaintext,
3425                       rsa_oaep_sha_data & ciphertext)
3426 {
3427   MM(pub_id);
3428   rsa_pub_key pub;
3429   get_key(pub_id, pub);
3430 
3431   SecureVector<Botan::byte> pub_block
3432     (reinterpret_cast<Botan::byte const *>(pub().data()), pub().size());
3433 
3434   shared_ptr<X509_PublicKey> x509_key(Botan::X509::load_key(pub_block));
3435   shared_ptr<RSA_PublicKey> pub_key
3436     = dynamic_pointer_cast<RSA_PublicKey>(x509_key);
3437   if (!pub_key)
3438     throw recoverable_failure(origin::system,
3439                               "Failed to get RSA encrypting key");
3440 
3441   SecureVector<Botan::byte> ct;
3442 
3443 #if BOTAN_VERSION_CODE >= BOTAN_VERSION_CODE_FOR(1,9,5)
3444   PK_Encryptor_EME encryptor(*pub_key, "EME1(SHA-1)");
3445   ct = encryptor.encrypt(
3446           reinterpret_cast<Botan::byte const *>(plaintext.data()),
3447           plaintext.size(), lazy_rng::get());
3448 #elif BOTAN_VERSION_CODE >= BOTAN_VERSION_CODE_FOR(1,7,7)
3449   shared_ptr<PK_Encryptor>
3450     encryptor(get_pk_encryptor(*pub_key, "EME1(SHA-1)"));
3451 
3452   ct = encryptor->encrypt(
3453           reinterpret_cast<Botan::byte const *>(plaintext.data()),
3454           plaintext.size(), lazy_rng::get());
3455 #else
3456   shared_ptr<PK_Encryptor>
3457     encryptor(Botan::get_pk_encryptor(*pub_key, "EME1(SHA-1)"));
3458 
3459   ct = encryptor->encrypt(
3460           reinterpret_cast<Botan::byte const *>(plaintext.data()),
3461           plaintext.size());
3462 #endif
3463 
3464   ciphertext = rsa_oaep_sha_data(
3465     string(reinterpret_cast<char const *>(ct.begin()), ct.size()),
3466     origin::database);
3467 }
3468 
3469 cert_status
check_signature(key_id const & id,string const & alleged_text,rsa_sha1_signature const & signature)3470 database::check_signature(key_id const & id,
3471                           string const & alleged_text,
3472                           rsa_sha1_signature const & signature)
3473 {
3474   MM(id);
3475   MM(alleged_text);
3476   shared_ptr<PK_Verifier> verifier;
3477 
3478   verifier_cache::const_iterator i = imp->verifiers.find(id);
3479   if (i != imp->verifiers.end())
3480     verifier = i->second.first;
3481 
3482   else
3483     {
3484       rsa_pub_key pub;
3485 
3486       if (!public_key_exists(id))
3487         return cert_unknown;
3488 
3489       get_key(id, pub);
3490       SecureVector<Botan::byte> pub_block
3491         (reinterpret_cast<Botan::byte const *>(pub().data()), pub().size());
3492 
3493       L(FL("building verifier for %d-byte pub key") % pub_block.size());
3494       shared_ptr<X509_PublicKey> x509_key(Botan::X509::load_key(pub_block));
3495       shared_ptr<RSA_PublicKey> pub_key
3496         = boost::dynamic_pointer_cast<RSA_PublicKey>(x509_key);
3497 
3498       E(pub_key, id.inner().made_from,
3499         F("failed to get RSA verifying key for %s") % id);
3500 
3501 #if BOTAN_VERSION_CODE >= BOTAN_VERSION_CODE_FOR(1,10,0)
3502       verifier.reset(new Botan::PK_Verifier(*pub_key, "EMSA3(SHA1)"));
3503 #else
3504       verifier.reset(Botan::get_pk_verifier(*pub_key, "EMSA3(SHA-1)"));
3505 #endif
3506 
3507       /* XXX This is ugly. We need to keep the key around
3508        * as long as the verifier is around, but the shared_ptr will go
3509        * away after we leave this scope. Hence we store a pair of
3510        * <verifier,key> so they both exist. */
3511       imp->verifiers.insert(make_pair(id, make_pair(verifier, pub_key)));
3512     }
3513 
3514   // check the text+sig against the key
3515   L(FL("checking %d-byte signature") % signature().size());
3516 
3517   if (verifier->verify_message(
3518         reinterpret_cast<Botan::byte const*>(alleged_text.data()),
3519         alleged_text.size(),
3520         reinterpret_cast<Botan::byte const*>(signature().data()),
3521         signature().size()))
3522     return cert_ok;
3523   else
3524     return cert_bad;
3525 }
3526 
3527 cert_status
check_cert(cert const & t)3528 database::check_cert(cert const & t)
3529 {
3530   string signed_text;
3531   t.signable_text(signed_text);
3532   return check_signature(t.key, signed_text, t.sig);
3533 }
3534 
3535 // cert management
3536 
3537 bool
cert_exists(cert const & t,string const & table)3538 database_impl::cert_exists(cert const & t,
3539                            string const & table)
3540 {
3541   results res;
3542   query q = query("SELECT revision_id FROM " + table + " WHERE revision_id = ? "
3543                   "AND name = ? "
3544                   "AND value = ? "
3545                   "AND keypair_id = ? "
3546                   "AND signature = ?")
3547     % blob(t.ident.inner()())
3548     % text(t.name())
3549     % blob(t.value())
3550     % blob(t.key.inner()())
3551     % blob(t.sig());
3552 
3553   fetch(res, 1, any_rows, q);
3554 
3555   I(res.empty() || res.size() == 1);
3556   return res.size() == 1;
3557 }
3558 
3559 void
put_cert(cert const & t,string const & table)3560 database_impl::put_cert(cert const & t,
3561                         string const & table)
3562 {
3563   results res;
3564   fetch(res, 1, one_row,
3565         query("SELECT name FROM public_keys WHERE id = ?")
3566         % blob(t.key.inner()()));
3567   key_name keyname(res[0][0], origin::database);
3568 
3569   id thash;
3570   t.hash_code(keyname, thash);
3571   rsa_sha1_signature sig;
3572 
3573   string insert = "INSERT INTO " + table + " VALUES(?, ?, ?, ?, ?, ?)";
3574 
3575   execute(query(insert)
3576           % blob(thash())
3577           % blob(t.ident.inner()())
3578           % text(t.name())
3579           % blob(t.value())
3580           % blob(t.key.inner()())
3581           % blob(t.sig()));
3582 }
3583 
3584 void
results_to_certs(results const & res,vector<cert> & certs)3585 database_impl::results_to_certs(results const & res,
3586                                 vector<cert> & certs)
3587 {
3588   certs.clear();
3589   for (size_t i = 0; i < res.size(); ++i)
3590     {
3591       cert t;
3592       t = cert(revision_id(res[i][0], origin::database),
3593                cert_name(res[i][1], origin::database),
3594                cert_value(res[i][2], origin::database),
3595                key_id(res[i][3], origin::database),
3596                rsa_sha1_signature(res[i][4], origin::database));
3597       certs.push_back(t);
3598     }
3599 }
3600 
3601 void
results_to_certs(results const & res,vector<pair<id,cert>> & certs)3602 database_impl::results_to_certs(results const & res,
3603                                 vector<pair<id, cert> > & certs)
3604 {
3605   certs.clear();
3606   for (size_t i = 0; i < res.size(); ++i)
3607     {
3608       cert t;
3609       t = cert(revision_id(res[i][0], origin::database),
3610                cert_name(res[i][1], origin::database),
3611                cert_value(res[i][2], origin::database),
3612                key_id(res[i][3], origin::database),
3613                rsa_sha1_signature(res[i][4], origin::database));
3614       certs.push_back(make_pair(id(res[i][5], origin::database),
3615                                 t));
3616     }
3617 }
3618 
3619 void
oldstyle_results_to_certs(results const & res,vector<cert> & certs)3620 database_impl::oldstyle_results_to_certs(results const & res,
3621                                          vector<cert> & certs)
3622 {
3623   certs.clear();
3624   for (size_t i = 0; i < res.size(); ++i)
3625     {
3626       revision_id rev_id(res[i][0], origin::database);
3627       cert_name name(res[i][1], origin::database);
3628       cert_value value(res[i][2], origin::database);
3629 
3630       key_name k_name(res[i][3], origin::database);
3631       key_id k_id;
3632       {
3633         results key_res;
3634         query lookup_key("SELECT id FROM public_keys WHERE name = ?");
3635         fetch(key_res, 1, any_rows, lookup_key % text(k_name()));
3636         if (key_res.size() == 0)
3637           break; // no key, cert is bogus
3638         else if (key_res.size() == 1)
3639           k_id = key_id(key_res[0][0], origin::database);
3640         else
3641           E(false, origin::database,
3642             F("your database contains multiple keys named '%s'") % k_name);
3643       }
3644 
3645       rsa_sha1_signature sig(res[i][4], origin::database);
3646       certs.push_back(cert(rev_id, name, value, k_id, sig));
3647     }
3648 }
3649 
3650 void
install_functions()3651 database_impl::install_functions()
3652 {
3653 #ifdef SUPPORT_SQLITE_BEFORE_3003014
3654   if (sqlite3_libversion_number() < 3003013)
3655     I(sqlite3_create_function(sql(), "hex", -1,
3656                               SQLITE_UTF8, NULL,
3657                               &sqlite3_hex_fn,
3658                               NULL, NULL) == 0);
3659 #endif
3660 
3661   // register any functions we're going to use
3662   I(sqlite3_create_function(sql(), "gunzip", -1,
3663                            SQLITE_UTF8, NULL,
3664                            &sqlite3_gunzip_fn,
3665                            NULL, NULL) == 0);
3666 }
3667 
3668 void
get_certs(vector<cert> & certs,string const & table)3669 database_impl::get_certs(vector<cert> & certs,
3670                          string const & table)
3671 {
3672   results res;
3673   query q("SELECT revision_id, name, value, keypair_id, signature FROM " + table);
3674   fetch(res, 5, any_rows, q);
3675   results_to_certs(res, certs);
3676 }
3677 
3678 
3679 void
get_oldstyle_certs(id const & ident,vector<cert> & certs,string const & table)3680 database_impl::get_oldstyle_certs(id const & ident,
3681                                   vector<cert> & certs,
3682                                   string const & table)
3683 {
3684   MM(ident);
3685   results res;
3686   query q("SELECT id, name, value, keypair, signature FROM " + table +
3687           " WHERE id = ?");
3688 
3689   fetch(res, 5, any_rows, q % blob(ident()));
3690   oldstyle_results_to_certs(res, certs);
3691 }
3692 
3693 void
get_certs(id const & ident,vector<cert> & certs,string const & table)3694 database_impl::get_certs(id const & ident,
3695                          vector<cert> & certs,
3696                          string const & table)
3697 {
3698   MM(ident);
3699   results res;
3700   query q("SELECT revision_id, name, value, keypair_id, signature FROM " + table +
3701           " WHERE revision_id = ?");
3702 
3703   fetch(res, 5, any_rows, q % blob(ident()));
3704   results_to_certs(res, certs);
3705 }
3706 
3707 void
get_certs(cert_name const & name,vector<cert> & certs,string const & table)3708 database_impl::get_certs(cert_name const & name,
3709                          vector<cert> & certs,
3710                          string const & table)
3711 {
3712   MM(name);
3713   results res;
3714   query q("SELECT revision_id, name, value, keypair_id, signature FROM " + table +
3715           " WHERE name = ?");
3716   fetch(res, 5, any_rows, q % text(name()));
3717   results_to_certs(res, certs);
3718 }
3719 
3720 void
get_oldstyle_certs(cert_name const & name,vector<cert> & certs,string const & table)3721 database_impl::get_oldstyle_certs(cert_name const & name,
3722                                   vector<cert> & certs,
3723                                   string const & table)
3724 {
3725   results res;
3726   query q("SELECT id, name, value, keypair, signature FROM " + table +
3727           " WHERE name = ?");
3728   fetch(res, 5, any_rows, q % text(name()));
3729   oldstyle_results_to_certs(res, certs);
3730 }
3731 
3732 void
get_certs(id const & ident,cert_name const & name,vector<cert> & certs,string const & table)3733 database_impl::get_certs(id const & ident,
3734                          cert_name const & name,
3735                          vector<cert> & certs,
3736                          string const & table)
3737 {
3738   results res;
3739   query q("SELECT revision_id, name, value, keypair_id, signature FROM " + table +
3740           " WHERE revision_id = ? AND name = ?");
3741 
3742   fetch(res, 5, any_rows,
3743         q % blob(ident())
3744           % text(name()));
3745   results_to_certs(res, certs);
3746 }
3747 
3748 void
get_certs(cert_name const & name,cert_value const & val,vector<pair<id,cert>> & certs,string const & table)3749 database_impl::get_certs(cert_name const & name,
3750                          cert_value const & val,
3751                          vector<pair<id, cert> > & certs,
3752                          string const & table)
3753 {
3754   results res;
3755   query q("SELECT revision_id, name, value, keypair_id, signature, hash FROM " + table +
3756           " WHERE name = ? AND value = ?");
3757 
3758   fetch(res, 6, any_rows,
3759         q % text(name())
3760           % blob(val()));
3761   results_to_certs(res, certs);
3762 }
3763 
3764 
3765 void
get_certs(id const & ident,cert_name const & name,cert_value const & value,vector<cert> & certs,string const & table)3766 database_impl::get_certs(id const & ident,
3767                          cert_name const & name,
3768                          cert_value const & value,
3769                          vector<cert> & certs,
3770                          string const & table)
3771 {
3772   results res;
3773   query q("SELECT revision_id, name, value, keypair_id, signature FROM " + table +
3774           " WHERE revision_id = ? AND name = ? AND value = ?");
3775 
3776   fetch(res, 5, any_rows,
3777         q % blob(ident())
3778           % text(name())
3779           % blob(value()));
3780   results_to_certs(res, certs);
3781 }
3782 
3783 
3784 
3785 bool
revision_cert_exists(cert const & cert)3786 database::revision_cert_exists(cert const & cert)
3787 {
3788   return imp->cert_exists(cert, "revision_certs");
3789 }
3790 
3791 bool
put_revision_cert(cert const & cert)3792 database::put_revision_cert(cert const & cert)
3793 {
3794   if (revision_cert_exists(cert))
3795     {
3796       L(FL("revision cert on '%s' already exists in db")
3797         % cert.ident);
3798       return false;
3799     }
3800 
3801   if (!revision_exists(revision_id(cert.ident)))
3802     {
3803       W(F("cert revision %s does not exist in db")
3804         % cert.ident);
3805       W(F("dropping cert"));
3806       return false;
3807     }
3808 
3809   if (cert.name() == "branch")
3810     {
3811       string branch_name = cert.value();
3812       if (branch_name.find_first_of("?,;*%%+{}[]!^") != string::npos ||
3813           branch_name.find_first_of('-') == 0)
3814         {
3815           W(F("the branch name\n"
3816               "  '%s'\n"
3817               "contains meta characters (one or more of '?,;*%%+{}[]!^') or\n"
3818               "starts with a dash, which might cause malfunctions when used\n"
3819               "in a netsync branch pattern.\n\n"
3820               "If you want to undo this operation, please use the\n"
3821               "'%s local kill_certs' command to delete the particular branch\n"
3822               "cert and re-add a valid one.")
3823             % cert.value() % prog_name);
3824         }
3825     }
3826 
3827   imp->put_cert(cert, "revision_certs");
3828 
3829   if (cert.name() == "branch")
3830     {
3831       record_as_branch_leaf(cert.value, cert.ident);
3832     }
3833 
3834   imp->cert_stamper.note_change();
3835   return true;
3836 }
3837 
3838 void
record_as_branch_leaf(cert_value const & branch,revision_id const & rev)3839 database::record_as_branch_leaf(cert_value const & branch, revision_id const & rev)
3840 {
3841   set<revision_id> parents;
3842   get_revision_parents(rev, parents);
3843   set<revision_id> current_leaves;
3844   get_branch_leaves(branch, current_leaves);
3845 
3846   set<revision_id>::const_iterator self = current_leaves.find(rev);
3847   if (self != current_leaves.end())
3848     return; // already recorded (must be adding a second branch cert)
3849 
3850   bool all_parents_were_leaves = true;
3851   bool some_ancestor_was_leaf = false;
3852   for (set<revision_id>::const_iterator p = parents.begin();
3853        p != parents.end(); ++p)
3854     {
3855       set<revision_id>::iterator l = current_leaves.find(*p);
3856       if (l == current_leaves.end())
3857         all_parents_were_leaves = false;
3858       else
3859         {
3860           some_ancestor_was_leaf = true;
3861           imp->execute(query("DELETE FROM branch_leaves "
3862                              "WHERE branch = ? AND revision_id = ?")
3863                        % blob(branch()) % blob(l->inner()()));
3864           current_leaves.erase(l);
3865         }
3866     }
3867 
3868   // This check is needed for this case:
3869   //
3870   //  r1 (branch1)
3871   //  |
3872   //  r2 (branch2)
3873   //  |
3874   //  r3 (branch1)
3875 
3876   if (!all_parents_were_leaves)
3877     {
3878       for (set<revision_id>::const_iterator r = current_leaves.begin();
3879            r != current_leaves.end(); ++r)
3880         {
3881           if (is_a_ancestor_of_b(*r, rev))
3882             {
3883               some_ancestor_was_leaf = true;
3884               imp->execute(query("DELETE FROM branch_leaves "
3885                                  "WHERE branch = ? AND revision_id = ?")
3886                            % blob(branch()) % blob(r->inner()()));
3887             }
3888         }
3889     }
3890 
3891   // are we really a leaf (ie, not an ancestor of an existing leaf)?
3892   //
3893   // see tests/branch_leaves_sync_bug for a scenario that requires this.
3894   if (!some_ancestor_was_leaf)
3895     {
3896       bool really_a_leaf = true;
3897       for (set<revision_id>::const_iterator r = current_leaves.begin();
3898            r != current_leaves.end(); ++r)
3899         {
3900           if (is_a_ancestor_of_b(rev, *r))
3901             {
3902               really_a_leaf = false;
3903               break;
3904             }
3905         }
3906       if (!really_a_leaf)
3907         return;
3908     }
3909 
3910   imp->execute(query("INSERT INTO branch_leaves(branch, revision_id) "
3911                      "VALUES (?, ?)")
3912                % blob(branch()) % blob(rev.inner()()));
3913 }
3914 
3915 outdated_indicator
get_revision_cert_nobranch_index(vector<pair<revision_id,pair<revision_id,key_id>>> & idx)3916 database::get_revision_cert_nobranch_index(vector< pair<revision_id,
3917                                            pair<revision_id, key_id> > > & idx)
3918 {
3919   // share some storage
3920   id::symtab id_syms;
3921 
3922   results res;
3923   imp->fetch(res, 3, any_rows,
3924              query("SELECT hash, revision_id, keypair_id "
3925                    "FROM revision_certs WHERE name != 'branch'"));
3926 
3927   idx.clear();
3928   idx.reserve(res.size());
3929   for (results::const_iterator i = res.begin(); i != res.end(); ++i)
3930     {
3931       idx.push_back(make_pair(revision_id((*i)[0], origin::database),
3932                               make_pair(revision_id((*i)[1], origin::database),
3933                                         key_id((*i)[2], origin::database))));
3934     }
3935   return imp->cert_stamper.get_indicator();
3936 }
3937 
3938 outdated_indicator
get_revision_certs(vector<cert> & certs)3939 database::get_revision_certs(vector<cert> & certs)
3940 {
3941   imp->get_certs(certs, "revision_certs");
3942   return imp->cert_stamper.get_indicator();
3943 }
3944 
3945 outdated_indicator
get_revision_certs(cert_name const & name,vector<cert> & certs)3946 database::get_revision_certs(cert_name const & name,
3947                             vector<cert> & certs)
3948 {
3949   imp->get_certs(name, certs, "revision_certs");
3950   return imp->cert_stamper.get_indicator();
3951 }
3952 
3953 outdated_indicator
get_revision_certs(revision_id const & id,cert_name const & name,vector<cert> & certs)3954 database::get_revision_certs(revision_id const & id,
3955                              cert_name const & name,
3956                              vector<cert> & certs)
3957 {
3958   imp->get_certs(id.inner(), name, certs, "revision_certs");
3959   return imp->cert_stamper.get_indicator();
3960 }
3961 
3962 outdated_indicator
get_revision_certs(revision_id const & id,cert_name const & name,cert_value const & val,vector<cert> & certs)3963 database::get_revision_certs(revision_id const & id,
3964                              cert_name const & name,
3965                              cert_value const & val,
3966                              vector<cert> & certs)
3967 {
3968   imp->get_certs(id.inner(), name, val, certs, "revision_certs");
3969   return imp->cert_stamper.get_indicator();
3970 }
3971 
3972 outdated_indicator
get_revisions_with_cert(cert_name const & name,cert_value const & val,set<revision_id> & revisions)3973 database::get_revisions_with_cert(cert_name const & name,
3974                                   cert_value const & val,
3975                                   set<revision_id> & revisions)
3976 {
3977   revisions.clear();
3978   results res;
3979   query q("SELECT revision_id FROM revision_certs WHERE name = ? AND value = ?");
3980   imp->fetch(res, one_col, any_rows, q % text(name()) % blob(val()));
3981   for (results::const_iterator i = res.begin(); i != res.end(); ++i)
3982     revisions.insert(revision_id((*i)[0], origin::database));
3983   return imp->cert_stamper.get_indicator();
3984 }
3985 
3986 outdated_indicator
get_branch_leaves(cert_value const & value,set<revision_id> & revisions)3987 database::get_branch_leaves(cert_value const & value,
3988                             set<revision_id> & revisions)
3989 {
3990   revisions.clear();
3991   results res;
3992   query q("SELECT revision_id FROM branch_leaves WHERE branch = ?");
3993   imp->fetch(res, one_col, any_rows, q % blob(value()));
3994   for (results::const_iterator i = res.begin(); i != res.end(); ++i)
3995     revisions.insert(revision_id((*i)[0], origin::database));
3996   return imp->cert_stamper.get_indicator();
3997 }
3998 
3999 outdated_indicator
get_revision_certs(cert_name const & name,cert_value const & val,vector<pair<id,cert>> & certs)4000 database::get_revision_certs(cert_name const & name,
4001                              cert_value const & val,
4002                              vector<pair<id, cert> > & certs)
4003 {
4004   imp->get_certs(name, val, certs, "revision_certs");
4005   return imp->cert_stamper.get_indicator();
4006 }
4007 
4008 outdated_indicator
get_revision_certs(revision_id const & id,vector<cert> & certs)4009 database::get_revision_certs(revision_id const & id,
4010                              vector<cert> & certs)
4011 {
4012   imp->get_certs(id.inner(), certs, "revision_certs");
4013   return imp->cert_stamper.get_indicator();
4014 }
4015 
4016 outdated_indicator
get_revision_certs(revision_id const & ident,vector<id> & ids)4017 database::get_revision_certs(revision_id const & ident,
4018                              vector<id> & ids)
4019 {
4020   results res;
4021   imp->fetch(res, one_col, any_rows,
4022              query("SELECT hash "
4023                    "FROM revision_certs "
4024                    "WHERE revision_id = ?")
4025              % blob(ident.inner()()));
4026   ids.clear();
4027   for (size_t i = 0; i < res.size(); ++i)
4028     ids.push_back(id(res[i][0], origin::database));
4029   return imp->cert_stamper.get_indicator();
4030 }
4031 
4032 void
get_revision_cert(id const & hash,cert & c)4033 database::get_revision_cert(id const & hash,
4034                             cert & c)
4035 {
4036   results res;
4037   vector<cert> certs;
4038   imp->fetch(res, 5, one_row,
4039              query("SELECT revision_id, name, value, keypair_id, signature "
4040                    "FROM revision_certs "
4041                    "WHERE hash = ?")
4042              % blob(hash()));
4043   imp->results_to_certs(res, certs);
4044   I(certs.size() == 1);
4045   c = certs[0];
4046 }
4047 
4048 bool
revision_cert_exists(revision_id const & hash)4049 database::revision_cert_exists(revision_id const & hash)
4050 {
4051   results res;
4052   vector<cert> certs;
4053   imp->fetch(res, one_col, any_rows,
4054              query("SELECT revision_id "
4055                    "FROM revision_certs "
4056                    "WHERE hash = ?")
4057              % blob(hash.inner()()));
4058   I(res.empty() || res.size() == 1);
4059   return (res.size() == 1);
4060 }
4061 
4062 // FIXME: the bogus-cert family of functions is ridiculous
4063 // and needs to be replaced, or at least factored.
4064 namespace {
4065   struct trust_value
4066   {
4067     set<key_id> good_sigs;
4068     set<key_id> bad_sigs;
4069     set<key_id> unknown_sigs;
4070   };
4071 
4072   // returns *one* of each trusted cert key/value
4073   // if two keys signed the same thing, we get two certs as input and
4074   // just pick one (assuming neither is invalid) to use in the output
4075   void
erase_bogus_certs_internal(vector<cert> & certs,database & db,database::cert_trust_checker const & checker)4076   erase_bogus_certs_internal(vector<cert> & certs,
4077                              database & db,
4078                              database::cert_trust_checker const & checker)
4079   {
4080     // sorry, this is a crazy data structure
4081     typedef tuple<id, cert_name, cert_value> trust_key;
4082     typedef map< trust_key, trust_value > trust_map;
4083     trust_map trust;
4084 
4085     for (vector<cert>::iterator i = certs.begin(); i != certs.end(); ++i)
4086       {
4087         trust_key key = trust_key(i->ident.inner(),
4088                                   i->name,
4089                                   i->value);
4090         trust_value & value = trust[key];
4091         switch (db.check_cert(*i))
4092           {
4093           case cert_ok:
4094             value.good_sigs.insert(i->key);
4095             break;
4096           case cert_bad:
4097             value.bad_sigs.insert(i->key);
4098             break;
4099           case cert_unknown:
4100             value.unknown_sigs.insert(i->key);
4101             break;
4102           }
4103       }
4104 
4105     certs.clear();
4106 
4107     for (trust_map::const_iterator i = trust.begin();
4108          i != trust.end(); ++i)
4109       {
4110         cert out(typecast_vocab<revision_id>(get<0>(i->first)),
4111                  get<1>(i->first), get<2>(i->first), key_id());
4112         if (!i->second.good_sigs.empty() &&
4113             checker(i->second.good_sigs,
4114                     get<0>(i->first),
4115                     get<1>(i->first),
4116                     get<2>(i->first)))
4117           {
4118             L(FL("trust function liked %d signers of %s cert on revision %s")
4119               % i->second.good_sigs.size()
4120               % get<1>(i->first)
4121               % get<0>(i->first));
4122             out.key = *i->second.good_sigs.begin();
4123             certs.push_back(out);
4124           }
4125         else
4126           {
4127             string txt;
4128             out.signable_text(txt);
4129             for (set<key_id>::const_iterator b = i->second.bad_sigs.begin();
4130                  b != i->second.bad_sigs.end(); ++b)
4131               {
4132                 W(F("ignoring bad signature by '%s' on '%s'") % *b % txt);
4133               }
4134             for (set<key_id>::const_iterator u = i->second.unknown_sigs.begin();
4135                  u != i->second.unknown_sigs.end(); ++u)
4136               {
4137                 W(F("ignoring unknown signature by '%s' on '%s'") % *u % txt);
4138               }
4139             W(F("trust function disliked %d signers of '%s' cert on revision %s")
4140               % i->second.good_sigs.size()
4141               % get<1>(i->first)
4142               % get<0>(i->first));
4143           }
4144       }
4145   }
4146   // the lua hook wants key_identity_info, but all that's been
4147   // pulled from the certs is key_id. So this is needed to translate.
4148   // use pointers for project and lua so bind() doesn't make copies
check_revision_cert_trust(project_t const * const project,lua_hooks * const lua,set<key_id> const & signers,id const & hash,cert_name const & name,cert_value const & value)4149   bool check_revision_cert_trust(project_t const * const project,
4150                                  lua_hooks * const lua,
4151                                  set<key_id> const & signers,
4152                                  id const & hash,
4153                                  cert_name const & name,
4154                                  cert_value const & value)
4155   {
4156     set<key_identity_info> signer_identities;
4157     for (set<key_id>::const_iterator i = signers.begin();
4158          i != signers.end(); ++i)
4159       {
4160         key_identity_info identity;
4161         identity.id = *i;
4162         project->complete_key_identity_from_id(*lua, identity);
4163         signer_identities.insert(identity);
4164       }
4165 
4166     return lua->hook_get_revision_cert_trust(signer_identities,
4167                                              hash, name, value);
4168   }
4169   // and the lua hook for manifest trust checking wants a key_name
check_manifest_cert_trust(database * const db,lua_hooks * const lua,set<key_id> const & signers,id const & hash,cert_name const & name,cert_value const & value)4170   bool check_manifest_cert_trust(database * const db,
4171                                  lua_hooks * const lua,
4172                                  set<key_id> const & signers,
4173                                  id const & hash,
4174                                  cert_name const & name,
4175                                  cert_value const & value)
4176   {
4177     set<key_name> signer_names;
4178     for (set<key_id>::const_iterator i = signers.begin();
4179          i != signers.end(); ++i)
4180       {
4181         key_name name;
4182         rsa_pub_key pub;
4183         db->get_pubkey(*i, name, pub);
4184         signer_names.insert(name);
4185       }
4186 
4187     return lua->hook_get_manifest_cert_trust(signer_names,
4188                                              hash, name, value);
4189   }
4190 } // anonymous namespace
4191 
4192 void
erase_bogus_certs(project_t const & project,vector<cert> & certs)4193 database::erase_bogus_certs(project_t const & project, vector<cert> & certs)
4194 {
4195   erase_bogus_certs_internal(certs, *this,
4196                              boost::bind(&check_revision_cert_trust,
4197                                          &project, &this->lua, _1, _2, _3, _4));
4198 }
4199 void
erase_bogus_certs(vector<cert> & certs,database::cert_trust_checker const & checker)4200 database::erase_bogus_certs(vector<cert> & certs,
4201                             database::cert_trust_checker const & checker)
4202 {
4203   erase_bogus_certs_internal(certs, *this, checker);
4204 }
4205 
4206 // These are only used by migration from old manifest-style ancestry, so we
4207 // don't much worry that they are not perfectly typesafe.  Also, we know
4208 // that the callers want bogus certs erased.
4209 
4210 void
get_manifest_certs(manifest_id const & id,std::vector<cert> & certs)4211 database::get_manifest_certs(manifest_id const & id, std::vector<cert> & certs)
4212 {
4213   imp->get_oldstyle_certs(id.inner(), certs, "manifest_certs");
4214   erase_bogus_certs_internal(certs, *this,
4215                              boost::bind(&check_manifest_cert_trust,
4216                                          this, &this->lua, _1, _2, _3, _4));
4217 }
4218 
4219 void
get_manifest_certs(cert_name const & name,std::vector<cert> & certs)4220 database::get_manifest_certs(cert_name const & name, std::vector<cert> & certs)
4221 {
4222   imp->get_oldstyle_certs(name, certs, "manifest_certs");
4223   erase_bogus_certs_internal(certs, *this,
4224                              boost::bind(&check_manifest_cert_trust,
4225                                          this, &this->lua, _1, _2, _3, _4));
4226 }
4227 
4228 // completions
4229 void
add_prefix_matching_constraint(string const & colname,string const & prefix,query & q)4230 database_impl::add_prefix_matching_constraint(string const & colname,
4231                                               string const & prefix,
4232                                               query & q)
4233 {
4234   L(FL("add_prefix_matching_constraint for '%s'") % prefix);
4235 
4236   if (prefix.empty())
4237     q.sql_cmd += "1";  // always true
4238   else if (prefix.size() > constants::idlen)
4239     q.sql_cmd += "0"; // always false
4240   else
4241     {
4242       for (string::const_iterator i = prefix.begin(); i != prefix.end(); ++i)
4243        {
4244          E(is_xdigit(*i), origin::user,
4245            F("bad character '%c' in id name '%s'") % *i % prefix);
4246        }
4247 
4248       string lower_hex = prefix;
4249       if (lower_hex.size() < constants::idlen)
4250         lower_hex.append(constants::idlen - lower_hex.size(), '0');
4251       string lower_bound = decode_hexenc(lower_hex, origin::internal);
4252 
4253       string upper_hex = prefix;
4254       if (upper_hex.size() < constants::idlen)
4255         upper_hex.append(constants::idlen - upper_hex.size(), 'f');
4256       string upper_bound = decode_hexenc(upper_hex, origin::internal);
4257 
4258       if (global_sanity.debug_p())
4259         L(FL("prefix_matcher: lower bound ('%s') and upper bound ('%s')")
4260           % encode_hexenc(lower_bound, origin::internal)
4261           % encode_hexenc(upper_bound, origin::internal));
4262 
4263       q.sql_cmd += colname + " BETWEEN ? AND ?";
4264       q.args.push_back(blob(lower_bound));
4265       q.args.push_back(blob(upper_bound));
4266     }
4267 }
4268 
4269 void
complete(string const & partial,set<revision_id> & completions)4270 database::complete(string const & partial,
4271                    set<revision_id> & completions)
4272 {
4273   results res;
4274   completions.clear();
4275   query q("SELECT id FROM revisions WHERE ");
4276 
4277   imp->add_prefix_matching_constraint("id", partial, q);
4278   imp->fetch(res, 1, any_rows, q);
4279 
4280   for (size_t i = 0; i < res.size(); ++i)
4281     completions.insert(revision_id(res[i][0], origin::database));
4282 }
4283 
4284 
4285 void
complete(string const & partial,set<file_id> & completions)4286 database::complete(string const & partial,
4287                    set<file_id> & completions)
4288 {
4289   results res;
4290   completions.clear();
4291 
4292   query q("SELECT id FROM files WHERE ");
4293   imp->add_prefix_matching_constraint("id", partial, q);
4294   imp->fetch(res, 1, any_rows, q);
4295 
4296   for (size_t i = 0; i < res.size(); ++i)
4297     completions.insert(file_id(res[i][0], origin::database));
4298 
4299   res.clear();
4300 
4301   q = query("SELECT id FROM file_deltas WHERE ");
4302   imp->add_prefix_matching_constraint("id", partial, q);
4303   imp->fetch(res, 1, any_rows, q);
4304 
4305   for (size_t i = 0; i < res.size(); ++i)
4306     completions.insert(file_id(res[i][0], origin::database));
4307 }
4308 
4309 void
complete(string const & partial,set<pair<key_id,utf8>> & completions)4310 database::complete(string const & partial,
4311                    set< pair<key_id, utf8 > > & completions)
4312 {
4313   results res;
4314   completions.clear();
4315   query q("SELECT id, name FROM public_keys WHERE ");
4316 
4317   imp->add_prefix_matching_constraint("id", partial, q);
4318   imp->fetch(res, 2, any_rows, q);
4319 
4320   for (size_t i = 0; i < res.size(); ++i)
4321     completions.insert(make_pair(key_id(res[i][0], origin::database),
4322                                  utf8(res[i][1], origin::database)));
4323 }
4324 
4325 // revision selectors
4326 
4327 void
select_parent(string const & partial,set<revision_id> & completions)4328 database::select_parent(string const & partial,
4329                         set<revision_id> & completions)
4330 {
4331   results res;
4332   completions.clear();
4333 
4334   query q("SELECT DISTINCT parent FROM revision_ancestry WHERE ");
4335   imp->add_prefix_matching_constraint("child", partial, q);
4336   imp->fetch(res, 1, any_rows, q);
4337 
4338   for (size_t i = 0; i < res.size(); ++i)
4339     completions.insert(revision_id(res[i][0], origin::database));
4340 }
4341 
4342 void
select_cert(string const & certname,set<revision_id> & completions)4343 database::select_cert(string const & certname,
4344                       set<revision_id> & completions)
4345 {
4346   results res;
4347   completions.clear();
4348 
4349   imp->fetch(res, 1, any_rows,
4350              query("SELECT DISTINCT revision_id FROM revision_certs WHERE name = ?")
4351              % text(certname));
4352 
4353   for (size_t i = 0; i < res.size(); ++i)
4354     completions.insert(revision_id(res[i][0], origin::database));
4355 }
4356 
4357 void
select_cert(string const & certname,string const & certvalue,set<revision_id> & completions)4358 database::select_cert(string const & certname, string const & certvalue,
4359                       set<revision_id> & completions)
4360 {
4361   results res;
4362   completions.clear();
4363 
4364   imp->fetch(res, 1, any_rows,
4365              query("SELECT DISTINCT revision_id FROM revision_certs"
4366                    " WHERE name = ? AND CAST(value AS TEXT) GLOB ?")
4367              % text(certname) % text(certvalue));
4368 
4369   for (size_t i = 0; i < res.size(); ++i)
4370     completions.insert(revision_id(res[i][0], origin::database));
4371 }
4372 
4373 void
select_author_tag_or_branch(string const & partial,set<revision_id> & completions)4374 database::select_author_tag_or_branch(string const & partial,
4375                                       set<revision_id> & completions)
4376 {
4377   results res;
4378   completions.clear();
4379 
4380   string pattern = partial + "*";
4381 
4382   imp->fetch(res, 1, any_rows,
4383              query("SELECT DISTINCT revision_id FROM revision_certs"
4384                    " WHERE (name=? OR name=? OR name=?)"
4385                    " AND CAST(value AS TEXT) GLOB ?")
4386              % text(author_cert_name()) % text(tag_cert_name())
4387              % text(branch_cert_name()) % text(pattern));
4388 
4389   for (size_t i = 0; i < res.size(); ++i)
4390     completions.insert(revision_id(res[i][0], origin::database));
4391 }
4392 
4393 void
select_date(string const & date,string const & comparison,set<revision_id> & completions)4394 database::select_date(string const & date, string const & comparison,
4395                       set<revision_id> & completions)
4396 {
4397   results res;
4398   completions.clear();
4399 
4400   query q;
4401   q.sql_cmd = ("SELECT DISTINCT revision_id FROM revision_certs "
4402                "WHERE name = ? AND CAST(value AS TEXT) ");
4403   q.sql_cmd += comparison;
4404   q.sql_cmd += " ?";
4405 
4406   imp->fetch(res, 1, any_rows,
4407              q % text(date_cert_name()) % text(date));
4408   for (size_t i = 0; i < res.size(); ++i)
4409     completions.insert(revision_id(res[i][0], origin::database));
4410 }
4411 
4412 void
select_key(key_id const & id,set<revision_id> & completions)4413 database::select_key(key_id const & id, set<revision_id> & completions)
4414 {
4415   results res;
4416   completions.clear();
4417 
4418   imp->fetch(res, 1, any_rows,
4419              query("SELECT DISTINCT revision_id FROM revision_certs"
4420                    " WHERE keypair_id = ?")
4421              % blob(id.inner()()));
4422 
4423   for (size_t i = 0; i < res.size(); ++i)
4424     completions.insert(revision_id(res[i][0], origin::database));
4425 }
4426 
4427 // epochs
4428 
4429 void
get_epochs(map<branch_name,epoch_data> & epochs)4430 database::get_epochs(map<branch_name, epoch_data> & epochs)
4431 {
4432   epochs.clear();
4433   results res;
4434   imp->fetch(res, 2, any_rows, query("SELECT branch, epoch FROM branch_epochs"));
4435   for (results::const_iterator i = res.begin(); i != res.end(); ++i)
4436     {
4437       branch_name decoded(idx(*i, 0), origin::database);
4438       I(epochs.find(decoded) == epochs.end());
4439       epochs.insert(make_pair(decoded,
4440                               epoch_data(idx(*i, 1),
4441                                          origin::database)));
4442     }
4443 }
4444 
4445 void
get_epoch(epoch_id const & eid,branch_name & branch,epoch_data & epo)4446 database::get_epoch(epoch_id const & eid,
4447                     branch_name & branch, epoch_data & epo)
4448 {
4449   I(epoch_exists(eid));
4450   results res;
4451   imp->fetch(res, 2, any_rows,
4452              query("SELECT branch, epoch FROM branch_epochs"
4453                    " WHERE hash = ?")
4454              % blob(eid.inner()()));
4455   I(res.size() == 1);
4456   branch = branch_name(idx(idx(res, 0), 0), origin::database);
4457   epo = epoch_data(idx(idx(res, 0), 1), origin::database);
4458 }
4459 
4460 bool
epoch_exists(epoch_id const & eid)4461 database::epoch_exists(epoch_id const & eid)
4462 {
4463   results res;
4464   imp->fetch(res, one_col, any_rows,
4465              query("SELECT hash FROM branch_epochs WHERE hash = ?")
4466              % blob(eid.inner()()));
4467   I(res.size() == 1 || res.empty());
4468   return res.size() == 1;
4469 }
4470 
4471 void
set_epoch(branch_name const & branch,epoch_data const & epo)4472 database::set_epoch(branch_name const & branch, epoch_data const & epo)
4473 {
4474   epoch_id eid;
4475   epoch_hash_code(branch, epo, eid);
4476   I(epo.inner()().size() == constants::epochlen_bytes);
4477   imp->execute(query("INSERT OR REPLACE INTO branch_epochs VALUES(?, ?, ?)")
4478                % blob(eid.inner()())
4479                % blob(branch())
4480                % blob(epo.inner()()));
4481 }
4482 
4483 void
clear_epoch(branch_name const & branch)4484 database::clear_epoch(branch_name const & branch)
4485 {
4486   imp->execute(query("DELETE FROM branch_epochs WHERE branch = ?")
4487                % blob(branch()));
4488 }
4489 
4490 bool
check_integrity()4491 database::check_integrity()
4492 {
4493   results res;
4494   imp->fetch(res, one_col, any_rows, query("PRAGMA integrity_check"));
4495   I(res.size() == 1);
4496   I(res[0].size() == 1);
4497 
4498   return res[0][0] == "ok";
4499 }
4500 
4501 // vars
4502 
4503 void
get_vars(map<var_key,var_value> & vars)4504 database::get_vars(map<var_key, var_value> & vars)
4505 {
4506   vars.clear();
4507   results res;
4508   imp->fetch(res, 3, any_rows, query("SELECT domain, name, value FROM db_vars"));
4509   for (results::const_iterator i = res.begin(); i != res.end(); ++i)
4510     {
4511       var_domain domain(idx(*i, 0), origin::database);
4512       var_name name(idx(*i, 1), origin::database);
4513       var_value value(idx(*i, 2), origin::database);
4514       I(vars.find(make_pair(domain, name)) == vars.end());
4515       vars.insert(make_pair(make_pair(domain, name), value));
4516     }
4517 }
4518 
4519 void
get_var(var_key const & key,var_value & value)4520 database::get_var(var_key const & key, var_value & value)
4521 {
4522   results res;
4523   imp->fetch(res, one_col, any_rows,
4524              query("SELECT value FROM db_vars "
4525                    "WHERE domain = ? AND name = ?")
4526                    % text(key.first())
4527                    % blob(key.second()));
4528   I(res.size() == 1);
4529   var_value dbvalue(res[0][0], origin::database);
4530   value = dbvalue;
4531 }
4532 
4533 bool
var_exists(var_key const & key)4534 database::var_exists(var_key const & key)
4535 {
4536   results res;
4537   imp->fetch(res, one_col, any_rows,
4538              query("SELECT 1 "
4539                    "WHERE EXISTS("
4540                    "  SELECT 1 FROM db_vars "
4541                    "  WHERE domain = ? AND name = ?)")
4542                    % text(key.first())
4543                    % blob(key.second()));
4544   return ! res.empty();
4545 }
4546 
4547 void
set_var(var_key const & key,var_value const & value)4548 database::set_var(var_key const & key, var_value const & value)
4549 {
4550   imp->execute(query("INSERT OR REPLACE INTO db_vars VALUES(?, ?, ?)")
4551                % text(key.first())
4552                % blob(key.second())
4553                % blob(value()));
4554 }
4555 
4556 void
clear_var(var_key const & key)4557 database::clear_var(var_key const & key)
4558 {
4559   imp->execute(query("DELETE FROM db_vars WHERE domain = ? AND name = ?")
4560                % text(key.first())
4561                % blob(key.second()));
4562 }
4563 
4564 #define KNOWN_WORKSPACES_KEY                        \
4565   var_key(make_pair(                                \
4566     var_domain("database", origin::internal),       \
4567     var_name("known-workspaces", origin::internal)  \
4568   ))
4569 
4570 void
register_workspace(system_path const & workspace)4571 database::register_workspace(system_path const & workspace)
4572 {
4573   var_value val;
4574   if (var_exists(KNOWN_WORKSPACES_KEY))
4575     get_var(KNOWN_WORKSPACES_KEY, val);
4576 
4577   vector<string> workspaces;
4578   split_into_lines(val(), workspaces);
4579 
4580   vector<string>::iterator pos =
4581     find(workspaces.begin(),
4582          workspaces.end(),
4583          workspace.as_internal());
4584   if (pos == workspaces.end())
4585     workspaces.push_back(workspace.as_internal());
4586 
4587   string ws;
4588   join_lines(workspaces, ws);
4589 
4590   set_var(KNOWN_WORKSPACES_KEY, var_value(ws, origin::internal));
4591 }
4592 
4593 void
unregister_workspace(system_path const & workspace)4594 database::unregister_workspace(system_path const & workspace)
4595 {
4596   if (var_exists(KNOWN_WORKSPACES_KEY))
4597     {
4598       var_value val;
4599       get_var(KNOWN_WORKSPACES_KEY, val);
4600 
4601       vector<string> workspaces;
4602       split_into_lines(val(), workspaces);
4603 
4604       vector<string>::iterator pos =
4605         find(workspaces.begin(),
4606              workspaces.end(),
4607              workspace.as_internal());
4608       if (pos != workspaces.end())
4609         workspaces.erase(pos);
4610 
4611       string ws;
4612       join_lines(workspaces, ws);
4613 
4614       set_var(KNOWN_WORKSPACES_KEY, var_value(ws, origin::internal));
4615     }
4616 }
4617 
4618 void
get_registered_workspaces(vector<system_path> & workspaces)4619 database::get_registered_workspaces(vector<system_path> & workspaces)
4620 {
4621   if (var_exists(KNOWN_WORKSPACES_KEY))
4622     {
4623       var_value val;
4624       get_var(KNOWN_WORKSPACES_KEY, val);
4625 
4626       vector<string> paths;
4627       split_into_lines(val(), paths);
4628 
4629       for (vector<string>::const_iterator i = paths.begin();
4630            i != paths.end(); ++i)
4631         {
4632           system_path workspace_path(*i, origin::database);
4633           workspaces.push_back(workspace_path);
4634         }
4635     }
4636 }
4637 
4638 void
set_registered_workspaces(vector<system_path> const & workspaces)4639 database::set_registered_workspaces(vector<system_path> const & workspaces)
4640 {
4641   vector<string> paths;
4642   for (vector<system_path>::const_iterator i = workspaces.begin();
4643        i != workspaces.end(); ++i)
4644     {
4645       paths.push_back((*i).as_internal());
4646     }
4647 
4648   string ws;
4649   join_lines(paths, ws);
4650   set_var(KNOWN_WORKSPACES_KEY, var_value(ws, origin::internal));
4651 }
4652 
4653 #undef KNOWN_WORKSPACES_KEY
4654 
4655 // branches
4656 
4657 outdated_indicator
get_branches(vector<string> & names)4658 database::get_branches(vector<string> & names)
4659 {
4660     results res;
4661     query q("SELECT DISTINCT branch FROM branch_leaves");
4662     string cert_name = "branch";
4663     imp->fetch(res, one_col, any_rows, q);
4664     for (size_t i = 0; i < res.size(); ++i)
4665       {
4666         names.push_back(res[i][0]);
4667       }
4668     return imp->cert_stamper.get_indicator();
4669 }
4670 
4671 outdated_indicator
get_branches(globish const & glob,vector<string> & names)4672 database::get_branches(globish const & glob,
4673                        vector<string> & names)
4674 {
4675     results res;
4676     query q("SELECT DISTINCT value FROM revision_certs WHERE name = ?");
4677     string cert_name = "branch";
4678     imp->fetch(res, one_col, any_rows, q % text(cert_name));
4679     for (size_t i = 0; i < res.size(); ++i)
4680       {
4681         if (glob.matches(res[i][0]))
4682           names.push_back(res[i][0]);
4683       }
4684     return imp->cert_stamper.get_indicator();
4685 }
4686 
4687 void
get_roster(revision_id const & rev_id,roster_t & roster)4688 database::get_roster(revision_id const & rev_id,
4689                      roster_t & roster)
4690 {
4691   marking_map mm;
4692   get_roster(rev_id, roster, mm);
4693 }
4694 
4695 void
get_roster(revision_id const & rev_id,roster_t & roster,marking_map & marking)4696 database::get_roster(revision_id const & rev_id,
4697                      roster_t & roster,
4698                      marking_map & marking)
4699 {
4700   if (rev_id.inner()().empty())
4701     {
4702       roster = roster_t();
4703       marking = marking_map();
4704       return;
4705     }
4706 
4707   cached_roster cr;
4708   get_roster(rev_id, cr);
4709   roster = *cr.first;
4710   marking = *cr.second;
4711 }
4712 
4713 void
get_roster(revision_id const & rev_id,cached_roster & cr)4714 database::get_roster(revision_id const & rev_id, cached_roster & cr)
4715 {
4716   get_roster_version(rev_id, cr);
4717   I(cr.first);
4718   I(cr.second);
4719 }
4720 
4721 void
put_roster(revision_id const & rev_id,revision_t const & rev,roster_t_cp const & roster,marking_map_cp const & marking)4722 database::put_roster(revision_id const & rev_id,
4723                      revision_t const & rev,
4724                      roster_t_cp const & roster,
4725                      marking_map_cp const & marking)
4726 {
4727   I(roster);
4728   I(marking);
4729   MM(rev_id);
4730 
4731   transaction_guard guard(*this);
4732 
4733   // Our task is to add this roster, and deltify all the incoming edges (if
4734   // they aren't already).
4735 
4736   imp->roster_cache.insert_dirty(rev_id, make_pair(roster, marking));
4737 
4738   // Now do what deltify would do if we bothered
4739   size_t num_edges = rev.edges.size();
4740   for (edge_map::const_iterator i = rev.edges.begin();
4741        i != rev.edges.end(); ++i)
4742     {
4743       revision_id old_rev = edge_old_revision(*i);
4744       if (null_id(old_rev))
4745         continue;
4746       if (imp->roster_base_stored(old_rev))
4747         {
4748           cached_roster cr;
4749           get_roster_version(old_rev, cr);
4750           roster_delta reverse_delta;
4751           cset const & changes = edge_changes(i);
4752           delta_rosters(*roster, *marking,
4753                         *(cr.first), *(cr.second),
4754                         reverse_delta,
4755                         num_edges > 1 ? 0 : &changes);
4756           if (imp->roster_cache.exists(old_rev))
4757             imp->roster_cache.mark_clean(old_rev);
4758           imp->drop(old_rev.inner(), "rosters");
4759           imp->put_roster_delta(old_rev, rev_id, reverse_delta);
4760         }
4761     }
4762   guard.commit();
4763 }
4764 
4765 // for get_uncommon_ancestors
4766 struct rev_height_graph : rev_graph
4767 {
rev_height_graphrev_height_graph4768   rev_height_graph(database & db) : db(db) {}
get_parentsrev_height_graph4769   virtual void get_parents(revision_id const & rev, set<revision_id> & parents) const
4770   {
4771     db.get_revision_parents(rev, parents);
4772   }
get_childrenrev_height_graph4773   virtual void get_children(revision_id const & rev, set<revision_id> & parents) const
4774   {
4775     // not required
4776     I(false);
4777   }
get_heightrev_height_graph4778   virtual void get_height(revision_id const & rev, rev_height & h) const
4779   {
4780     db.get_rev_height(rev, h);
4781   }
4782 
4783   database & db;
4784 };
4785 
4786 void
get_uncommon_ancestors(revision_id const & a,revision_id const & b,set<revision_id> & a_uncommon_ancs,set<revision_id> & b_uncommon_ancs)4787 database::get_uncommon_ancestors(revision_id const & a,
4788                                  revision_id const & b,
4789                                  set<revision_id> & a_uncommon_ancs,
4790                                  set<revision_id> & b_uncommon_ancs)
4791 {
4792 
4793   rev_height_graph graph(*this);
4794   ::get_uncommon_ancestors(a, b, graph, a_uncommon_ancs, b_uncommon_ancs);
4795 }
4796 
4797 node_id
next_node_id()4798 database::next_node_id()
4799 {
4800   transaction_guard guard(*this);
4801   results res;
4802 
4803   // We implement this as a fixed db var.
4804   imp->fetch(res, one_col, any_rows,
4805              query("SELECT node FROM next_roster_node_number"));
4806 
4807   u64 n = 1;
4808   if (res.empty())
4809     {
4810       imp->execute(query("INSERT INTO next_roster_node_number VALUES(1)"));
4811     }
4812   else
4813     {
4814       I(res.size() == 1);
4815       n = lexical_cast<u64>(res[0][0]);
4816       ++n;
4817       imp->execute(query("UPDATE next_roster_node_number SET node = ?")
4818                    % text(lexical_cast<string>(n)));
4819     }
4820   guard.commit();
4821   return static_cast<node_id>(n);
4822 }
4823 
4824 void
check_filename()4825 database_impl::check_filename()
4826 {
4827   E(!filename.empty(), origin::user, F("no database specified"));
4828 }
4829 
4830 
4831 void
check_db_exists()4832 database_impl::check_db_exists()
4833 {
4834   switch (get_path_status(filename))
4835     {
4836     case path::file:
4837       return;
4838 
4839     case path::nonexistent:
4840       E(false, origin::user, F("database '%s' does not exist") % filename);
4841 
4842     case path::directory:
4843       if (directory_is_workspace(filename))
4844         {
4845           options opts;
4846           workspace::get_options(filename, opts);
4847           E(opts.dbname.as_internal().empty(), origin::user,
4848             F("'%s' is a workspace, not a database\n"
4849               "(did you mean '%s'?)") % filename % opts.dbname);
4850         }
4851       E(false, origin::user,
4852         F("'%s' is a directory, not a database") % filename);
4853     }
4854 }
4855 
4856 void
check_db_nonexistent()4857 database_impl::check_db_nonexistent()
4858 {
4859   require_path_is_nonexistent(filename,
4860                               F("database '%s' already exists")
4861                               % filename);
4862 
4863   system_path journal(filename.as_internal() + "-journal", origin::internal);
4864   require_path_is_nonexistent(journal,
4865                               F("existing (possibly stale) journal file '%s' "
4866                                 "has same stem as new database '%s'.\n"
4867                                 "Cancelling database creation")
4868                               % journal % filename);
4869 
4870 }
4871 
4872 void
open()4873 database_impl::open()
4874 {
4875   I(!__sql);
4876 
4877   string to_open;
4878   if (type == memory_db)
4879     to_open = memory_db_identifier;
4880   else
4881     {
4882       system_path base_dir = filename.dirname();
4883       if (!directory_exists(base_dir))
4884         mkdir_p(base_dir);
4885       to_open = filename.as_external();
4886     }
4887 
4888   if (sqlite3_open(to_open.c_str(), &__sql) == SQLITE_NOMEM)
4889     throw std::bad_alloc();
4890 
4891   I(__sql);
4892   assert_sqlite3_ok(__sql);
4893 }
4894 
4895 void
close()4896 database_impl::close()
4897 {
4898   I(__sql);
4899 
4900   sqlite3_close(__sql);
4901   __sql = 0;
4902 
4903   I(!__sql);
4904 }
4905 
4906 // transaction guards
4907 
~conditional_transaction_guard()4908 conditional_transaction_guard::~conditional_transaction_guard()
4909 {
4910   if (!acquired)
4911     return;
4912   if (committed)
4913     db.imp->commit_transaction();
4914   else
4915     db.imp->rollback_transaction();
4916 }
4917 
4918 void
acquire()4919 conditional_transaction_guard::acquire()
4920 {
4921   I(!acquired);
4922   acquired = true;
4923   db.imp->begin_transaction(exclusive);
4924 }
4925 
4926 void
do_checkpoint()4927 conditional_transaction_guard::do_checkpoint()
4928 {
4929   I(acquired);
4930   db.imp->commit_transaction();
4931   db.imp->begin_transaction(exclusive);
4932   checkpointed_calls = 0;
4933   checkpointed_bytes = 0;
4934 }
4935 
4936 void
maybe_checkpoint(size_t nbytes)4937 conditional_transaction_guard::maybe_checkpoint(size_t nbytes)
4938 {
4939   I(acquired);
4940   checkpointed_calls += 1;
4941   checkpointed_bytes += nbytes;
4942   if (checkpointed_calls >= checkpoint_batch_size
4943       || checkpointed_bytes >= checkpoint_batch_bytes)
4944     do_checkpoint();
4945 }
4946 
4947 void
commit()4948 conditional_transaction_guard::commit()
4949 {
4950   I(acquired);
4951   committed = true;
4952 }
4953 
4954 void
get_database_path(options const & opts,system_path & path,database::dboptions dbopts)4955 database_path_helper::get_database_path(options const & opts,
4956                                         system_path & path,
4957                                         database::dboptions dbopts)
4958 {
4959   if (!opts.dbname_given ||
4960       (opts.dbname.as_internal().empty() &&
4961        opts.dbname_alias.empty() &&
4962        opts.dbname_type != memory_db))
4963     {
4964       if (dbopts == database::maybe_unspecified)
4965         {
4966           L(FL("no database option given or options empty"));
4967           return;
4968         }
4969       E(false, origin::user, F("no database specified"));
4970     }
4971 
4972   if (opts.dbname_type == unmanaged_db)
4973     {
4974       path = opts.dbname;
4975       return;
4976     }
4977 
4978   if (opts.dbname_type == memory_db)
4979     {
4980       return;
4981     }
4982 
4983   I(opts.dbname_type == managed_db);
4984 
4985   path_component basename;
4986   validate_and_clean_alias(opts.dbname_alias, basename);
4987 
4988   vector<system_path> candidates;
4989   vector<system_path> search_paths;
4990 
4991   E(lua.hook_get_default_database_locations(search_paths) && search_paths.size() > 0,
4992     origin::user, F("no default database location configured"));
4993 
4994   for (vector<system_path>::const_iterator i = search_paths.begin();
4995      i != search_paths.end(); ++i)
4996     {
4997       if (file_exists((*i) / basename))
4998         {
4999           candidates.push_back((*i) / basename);
5000           continue;
5001         }
5002     }
5003 
5004   MM(candidates);
5005 
5006   // if we did not found the database anywhere, use the first
5007   // available default path to possible save it there
5008   if (candidates.size() == 0)
5009     {
5010       path = (*search_paths.begin()) / basename;
5011       L(FL("no path expansions found for '%s', using '%s'")
5012           % opts.dbname_alias % path);
5013       return;
5014     }
5015 
5016   if (candidates.size() == 1)
5017     {
5018       path = (*candidates.begin());
5019       L(FL("one path expansion found for '%s': '%s'")
5020           % opts.dbname_alias % path);
5021       return;
5022     }
5023 
5024   if (candidates.size() > 1)
5025     {
5026       string err =
5027         (F("the database alias '%s' has multiple ambiguous expansions:")
5028          % opts.dbname_alias).str();
5029 
5030       for (vector<system_path>::const_iterator i = candidates.begin();
5031            i != candidates.end(); ++i)
5032         err += ("\n  " + (*i).as_internal());
5033 
5034       E(false, origin::user, i18n_format(err));
5035     }
5036 }
5037 
5038 void
maybe_set_default_alias(options & opts)5039 database_path_helper::maybe_set_default_alias(options & opts)
5040 {
5041   if (opts.dbname_given && (
5042        !opts.dbname.as_internal().empty() ||
5043        !opts.dbname_alias.empty()))
5044     {
5045       return;
5046     }
5047 
5048   string alias;
5049   E(lua.hook_get_default_database_alias(alias) && !alias.empty(),
5050     origin::user, F("could not query default database alias"));
5051 
5052   P(F("using default database '%s'") % alias);
5053   opts.dbname_given = true;
5054   opts.dbname_alias = alias;
5055   opts.dbname_type = managed_db;
5056 }
5057 
5058 void
validate_and_clean_alias(string const & alias,path_component & pc)5059 database_path_helper::validate_and_clean_alias(string const & alias, path_component & pc)
5060 {
5061   E(alias.find(':') == 0, origin::system,
5062     F("invalid database alias '%s': does not start with a colon") % alias);
5063 
5064   string pure_alias = alias.substr(1);
5065   E(pure_alias.size() > 0, origin::system,
5066     F("invalid database alias '%s': must not be empty") % alias);
5067 
5068   globish matcher;
5069   E(lua.hook_get_default_database_glob(matcher),
5070     origin::user, F("could not query default database glob"));
5071 
5072   if (!matcher.matches(pure_alias))
5073     pure_alias += ".mtn";
5074 
5075   try
5076     {
5077       pc = path_component(pure_alias, origin::system);
5078     }
5079   catch (...)
5080     {
5081       E(false, origin::system,
5082         F("invalid database alias '%s': does contain invalid characters") % alias);
5083     }
5084 }
5085 
5086 // Local Variables:
5087 // mode: C++
5088 // fill-column: 76
5089 // c-file-style: "gnu"
5090 // indent-tabs-mode: nil
5091 // End:
5092 // vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
5093