1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 use crate::error::*;
6 use rusqlite::{named_params, Connection, OpenFlags, Transaction, NO_PARAMS};
7 use serde_json::{Map, Value};
8 use sql_support::ConnExt;
9 use std::collections::HashSet;
10 use std::path::Path;
11 
12 // Simple migration from the "old" kinto-with-sqlite-backing implementation
13 // to ours.
14 // Could almost be trivially done in JS using the regular public API if not
15 // for:
16 // * We don't want to enforce the same quotas when migrating.
17 // * We'd rather do the entire migration in a single transaction for perf
18 //   reasons.
19 
20 // The sqlite database we migrate from has a very simple structure:
21 // * table collection_data with columns collection_name, record_id and record
22 // * `collection_name` is a string of form "default/{extension_id}"
23 // * `record_id` is `key-{key}`
24 // * `record` is a string with json, of form: {
25 //     id: {the record id repeated},
26 //     key: {the key},
27 //     data: {the actual data},
28 //     _status: {sync status},
29 //     last_modified: {timestamp},
30 //  }
31 // So the info we need is stored somewhat redundantly.
32 // Further:
33 // * There's a special collection_name "default/storage-sync-crypto" that
34 //   we don't want to migrate. Its record_id is 'keys' and its json has no
35 //   `data`
36 
37 // Note we don't enforce a quota - we migrate everything - even if this means
38 // it's too big for the server to store. This is a policy decision - it's better
39 // to not lose data than to try and work out what data can be disposed of, as
40 // the addon has the ability to determine this.
41 
42 // Our error strategy is "ignore read errors, propagate write errors" under the
43 // assumption that the former tends to mean a damaged DB or file-system and is
44 // unlikely to work if we try later (eg, replacing the disk isn't likely to
45 // uncorrupt the DB), where the latter is likely to be disk-space or file-system
46 // error, but retry might work (eg, replacing the disk then trying again might
47 // make the writes work)
48 
49 // The struct we read from the DB.
50 struct LegacyRow {
51     col_name: String, // collection_name column
52     record: String,   // record column
53 }
54 
55 impl LegacyRow {
56     // Parse the 2 columns from the DB into the data we need to insert into
57     // our target database.
parse(&self) -> Option<Parsed<'_>>58     fn parse(&self) -> Option<Parsed<'_>> {
59         if self.col_name.len() < 8 {
60             log::trace!("collection_name of '{}' is too short", self.col_name);
61             return None;
62         }
63         if &self.col_name[..8] != "default/" {
64             log::trace!("collection_name of '{}' isn't ours", self.col_name);
65             return None;
66         }
67         let ext_id = &self.col_name[8..];
68         let mut record_map = match serde_json::from_str(&self.record) {
69             Ok(Value::Object(m)) => m,
70             Ok(o) => {
71                 log::info!("skipping non-json-object 'record' column");
72                 log::trace!("record value is json, but not an object: {}", o);
73                 return None;
74             }
75             Err(e) => {
76                 log::info!("skipping non-json 'record' column");
77                 log::trace!("record value isn't json: {}", e);
78                 return None;
79             }
80         };
81 
82         let key = match record_map.remove("key") {
83             Some(Value::String(s)) if !s.is_empty() => s,
84             Some(o) => {
85                 log::trace!("key is json but not a string: {}", o);
86                 return None;
87             }
88             _ => {
89                 log::trace!("key doesn't exist in the map");
90                 return None;
91             }
92         };
93         let data = match record_map.remove("data") {
94             Some(d) => d,
95             _ => {
96                 log::trace!("data doesn't exist in the map");
97                 return None;
98             }
99         };
100         Some(Parsed { ext_id, key, data })
101     }
102 }
103 
104 // The info we parse from the raw DB strings.
105 struct Parsed<'a> {
106     ext_id: &'a str,
107     key: String,
108     data: serde_json::Value,
109 }
110 
migrate(tx: &Transaction<'_>, filename: &Path) -> Result<MigrationInfo>111 pub fn migrate(tx: &Transaction<'_>, filename: &Path) -> Result<MigrationInfo> {
112     // We do the grouping manually, collecting string values as we go.
113     let mut last_ext_id = "".to_string();
114     let mut curr_values: Vec<(String, serde_json::Value)> = Vec::new();
115     let (rows, mut mi) = read_rows(filename);
116     for row in rows {
117         log::trace!("processing '{}' - '{}'", row.col_name, row.record);
118         let parsed = match row.parse() {
119             Some(p) => p,
120             None => continue,
121         };
122         // Do our "grouping"
123         if parsed.ext_id != last_ext_id {
124             if last_ext_id != "" && !curr_values.is_empty() {
125                 // a different extension id - write what we have to the DB.
126                 let entries = do_insert(tx, &last_ext_id, curr_values)?;
127                 mi.extensions_successful += 1;
128                 mi.entries_successful += entries;
129             }
130             last_ext_id = parsed.ext_id.to_string();
131             curr_values = Vec::new();
132         }
133         // no 'else' here - must also enter this block on ext_id change.
134         if parsed.ext_id == last_ext_id {
135             curr_values.push((parsed.key.to_string(), parsed.data));
136             log::trace!(
137                 "extension {} now has {} keys",
138                 parsed.ext_id,
139                 curr_values.len()
140             );
141         }
142     }
143     // and the last one
144     if last_ext_id != "" && !curr_values.is_empty() {
145         // a different extension id - write what we have to the DB.
146         let entries = do_insert(tx, &last_ext_id, curr_values)?;
147         mi.extensions_successful += 1;
148         mi.entries_successful += entries;
149     }
150     log::info!("migrated {} extensions: {:?}", mi.extensions_successful, mi);
151     Ok(mi)
152 }
153 
read_rows(filename: &Path) -> (Vec<LegacyRow>, MigrationInfo)154 fn read_rows(filename: &Path) -> (Vec<LegacyRow>, MigrationInfo) {
155     let flags = OpenFlags::SQLITE_OPEN_NO_MUTEX | OpenFlags::SQLITE_OPEN_READ_ONLY;
156     let src_conn = match Connection::open_with_flags(&filename, flags) {
157         Ok(conn) => conn,
158         Err(e) => {
159             log::warn!("Failed to open the source DB: {}", e);
160             return (Vec::new(), MigrationInfo::open_failure());
161         }
162     };
163     // Failure to prepare the statement probably just means the source DB is
164     // damaged.
165     let mut stmt = match src_conn.prepare(
166         "SELECT collection_name, record FROM collection_data
167          WHERE collection_name != 'default/storage-sync-crypto'
168          ORDER BY collection_name",
169     ) {
170         Ok(stmt) => stmt,
171         Err(e) => {
172             log::warn!("Failed to prepare the statement: {}", e);
173             return (Vec::new(), MigrationInfo::open_failure());
174         }
175     };
176     let rows = match stmt.query_and_then(NO_PARAMS, |row| -> Result<LegacyRow> {
177         Ok(LegacyRow {
178             col_name: row.get(0)?,
179             record: row.get(1)?,
180         })
181     }) {
182         Ok(r) => r,
183         Err(e) => {
184             log::warn!("Failed to read any rows from the source DB: {}", e);
185             return (Vec::new(), MigrationInfo::open_failure());
186         }
187     };
188     let all_rows: Vec<Result<LegacyRow>> = rows.collect();
189     let entries = all_rows.len();
190     let successful_rows: Vec<LegacyRow> = all_rows.into_iter().filter_map(Result::ok).collect();
191     let distinct_extensions: HashSet<_> = successful_rows.iter().map(|c| &c.col_name).collect();
192 
193     let mi = MigrationInfo {
194         entries,
195         extensions: distinct_extensions.len(),
196         // Populated later.
197         extensions_successful: 0,
198         entries_successful: 0,
199         open_failure: false,
200     };
201 
202     (successful_rows, mi)
203 }
204 
205 /// Insert the extension and values. If there are multiple values with the same
206 /// key (which shouldn't be possible but who knows, database corruption causes
207 /// strange things), chooses an arbitrary one. Returns the number of entries
208 /// inserted, which could be different from `vals.len()` if multiple entries in
209 /// `vals` have the same key.
do_insert(tx: &Transaction<'_>, ext_id: &str, vals: Vec<(String, Value)>) -> Result<usize>210 fn do_insert(tx: &Transaction<'_>, ext_id: &str, vals: Vec<(String, Value)>) -> Result<usize> {
211     let mut map = Map::with_capacity(vals.len());
212     for (key, val) in vals {
213         map.insert(key, val);
214     }
215     let num_entries = map.len();
216     tx.execute_named_cached(
217         "INSERT OR REPLACE INTO storage_sync_data(ext_id, data, sync_change_counter)
218          VALUES (:ext_id, :data, 1)",
219         rusqlite::named_params! {
220             ":ext_id": &ext_id,
221             ":data": &Value::Object(map),
222         },
223     )?;
224     Ok(num_entries)
225 }
226 
227 #[derive(Debug, Clone, Default, PartialEq, serde::Serialize, serde::Deserialize)]
228 pub struct MigrationInfo {
229     /// The number of entries (rows in the original table) we attempted to
230     /// migrate. Zero if there was some error in computing this number.
231     ///
232     /// Note that for the original table, a single row stores a single
233     /// preference for one extension. That is, if you view the set of
234     /// preferences for a given extension as a HashMap (as we do), it would be a
235     /// single entry/key-value-pair in the map.
236     pub entries: usize,
237     /// The number of records we successfully migrated (equal to `entries` for
238     /// entirely successful migrations).
239     pub entries_successful: usize,
240     /// The number of extensions (distinct extension ids) in the original
241     /// table.
242     pub extensions: usize,
243     /// The number of extensions we successfully migrated
244     pub extensions_successful: usize,
245     /// True iff we failed to open the source DB at all.
246     pub open_failure: bool,
247 }
248 
249 impl MigrationInfo {
250     /// Returns a MigrationInfo indicating that we failed to read any rows due
251     /// to some error case (e.g. the database open failed, or some other very
252     /// early read error).
open_failure() -> Self253     fn open_failure() -> Self {
254         Self {
255             open_failure: true,
256             ..Self::default()
257         }
258     }
259 
260     const META_KEY: &'static str = "migration_info";
261 
262     /// Store `self` in the provided database under `Self::META_KEY`.
store(&self, conn: &Connection) -> Result<()>263     pub(crate) fn store(&self, conn: &Connection) -> Result<()> {
264         let json = serde_json::to_string(self)?;
265         conn.execute_named(
266             "INSERT OR REPLACE INTO meta(key, value) VALUES (:k, :v)",
267             named_params! {
268                 ":k": Self::META_KEY,
269                 ":v": &json
270             },
271         )?;
272         Ok(())
273     }
274 
275     /// Get the MigrationInfo stored under `Self::META_KEY` (if any) out of the
276     /// DB, and delete it.
take(tx: &Transaction<'_>) -> Result<Option<Self>>277     pub(crate) fn take(tx: &Transaction<'_>) -> Result<Option<Self>> {
278         let s = tx.try_query_one::<String>(
279             "SELECT value FROM meta WHERE key = :k",
280             named_params! {
281                 ":k": Self::META_KEY,
282             },
283             false,
284         )?;
285         tx.execute_named(
286             "DELETE FROM meta WHERE key = :k",
287             named_params! {
288                 ":k": Self::META_KEY,
289             },
290         )?;
291         if let Some(s) = s {
292             match serde_json::from_str(&s) {
293                 Ok(v) => Ok(Some(v)),
294                 Err(e) => {
295                     // Force test failure, but just log an error otherwise so that
296                     // we commit the transaction that wil.
297                     debug_assert!(false, "Failed to read migration JSON: {:?}", e);
298                     log::error!("Failed to read migration JSON: {}", e);
299                     Ok(None)
300                 }
301             }
302         } else {
303             Ok(None)
304         }
305     }
306 }
307 #[cfg(test)]
308 mod tests {
309     use super::*;
310     use crate::api;
311     use crate::db::{test::new_mem_db, StorageDb};
312     use serde_json::json;
313     use tempfile::tempdir;
314 
init_source_db(path: impl AsRef<Path>, f: impl FnOnce(&Connection))315     fn init_source_db(path: impl AsRef<Path>, f: impl FnOnce(&Connection)) {
316         let flags = OpenFlags::SQLITE_OPEN_NO_MUTEX
317             | OpenFlags::SQLITE_OPEN_CREATE
318             | OpenFlags::SQLITE_OPEN_READ_WRITE;
319         let mut conn = Connection::open_with_flags(path, flags).expect("open should work");
320         let tx = conn.transaction().expect("should be able to get a tx");
321         tx.execute_batch(
322             "CREATE TABLE collection_data (
323                 collection_name TEXT,
324                 record_id TEXT,
325                 record TEXT
326             );",
327         )
328         .expect("create should work");
329         f(&tx);
330         tx.commit().expect("should commit");
331         conn.close().expect("close should work");
332     }
333 
334     // Create a test database, populate it via the callback, migrate it, and
335     // return a connection to the new, migrated DB for further checking.
do_migrate<F>(expect_mi: MigrationInfo, f: F) -> StorageDb where F: FnOnce(&Connection),336     fn do_migrate<F>(expect_mi: MigrationInfo, f: F) -> StorageDb
337     where
338         F: FnOnce(&Connection),
339     {
340         let tmpdir = tempdir().unwrap();
341         let path = tmpdir.path().join("source.db");
342         init_source_db(&path, f);
343 
344         // now migrate
345         let mut db = new_mem_db();
346         let tx = db.transaction().expect("tx should work");
347 
348         let mi = migrate(&tx, &tmpdir.path().join("source.db")).expect("migrate should work");
349         tx.commit().expect("should work");
350         assert_eq!(mi, expect_mi);
351         db
352     }
353 
assert_has(c: &Connection, ext_id: &str, expect: Value)354     fn assert_has(c: &Connection, ext_id: &str, expect: Value) {
355         assert_eq!(
356             api::get(c, ext_id, json!(null)).expect("should get"),
357             expect
358         );
359     }
360 
361     const HAPPY_PATH_SQL: &str = r#"
362         INSERT INTO collection_data(collection_name, record)
363         VALUES
364             ('default/{e7fefcf3-b39c-4f17-5215-ebfe120a7031}', '{"id":"key-userWelcomed","key":"userWelcomed","data":1570659224457,"_status":"synced","last_modified":1579755940527}'),
365             ('default/{e7fefcf3-b39c-4f17-5215-ebfe120a7031}', '{"id":"key-isWho","key":"isWho","data":"4ec8109f","_status":"synced","last_modified":1579755940497}'),
366             ('default/storage-sync-crypto', '{"id":"keys","keys":{"default":["rQ=","lR="],"collections":{"extension@redux.devtools":["Bd=","ju="]}}}'),
367             ('default/https-everywhere@eff.org', '{"id":"key-userRules","key":"userRules","data":[],"_status":"synced","last_modified":1570079920045}'),
368             ('default/https-everywhere@eff.org', '{"id":"key-ruleActiveStates","key":"ruleActiveStates","data":{},"_status":"synced","last_modified":1570079919993}'),
369             ('default/https-everywhere@eff.org', '{"id":"key-migration_5F_version","key":"migration_version","data":2,"_status":"synced","last_modified":1570079919966}')
370     "#;
371     const HAPPY_PATH_MIGRATION_INFO: MigrationInfo = MigrationInfo {
372         entries: 5,
373         entries_successful: 5,
374         extensions: 2,
375         extensions_successful: 2,
376         open_failure: false,
377     };
378 
379     #[allow(clippy::unreadable_literal)]
380     #[test]
test_happy_paths()381     fn test_happy_paths() {
382         // some real data.
383         let conn = do_migrate(HAPPY_PATH_MIGRATION_INFO, |c| {
384             c.execute_batch(HAPPY_PATH_SQL).expect("should populate")
385         });
386 
387         assert_has(
388             &conn,
389             "{e7fefcf3-b39c-4f17-5215-ebfe120a7031}",
390             json!({"userWelcomed": 1570659224457u64, "isWho": "4ec8109f"}),
391         );
392         assert_has(
393             &conn,
394             "https-everywhere@eff.org",
395             json!({"userRules": [], "ruleActiveStates": {}, "migration_version": 2}),
396         );
397     }
398 
399     #[test]
test_sad_paths()400     fn test_sad_paths() {
401         do_migrate(
402             MigrationInfo {
403                 entries: 10,
404                 entries_successful: 0,
405                 extensions: 6,
406                 extensions_successful: 0,
407                 open_failure: false,
408             },
409             |c| {
410                 c.execute_batch(
411                     r#"INSERT INTO collection_data(collection_name, record)
412                     VALUES
413                     ('default/test', '{"key":2,"data":1}'), -- key not a string
414                     ('default/test', '{"key":"","data":1}'), -- key empty string
415                     ('default/test', '{"xey":"k","data":1}'), -- key missing
416                     ('default/test', '{"key":"k","xata":1}'), -- data missing
417                     ('default/test', '{"key":"k","data":1'), -- invalid json
418                     ('xx/test', '{"key":"k","data":1}'), -- bad key format
419                     ('default', '{"key":"k","data":1}'), -- bad key format 2
420                     ('default/', '{"key":"k","data":1}'), -- bad key format 3
421                     ('defaultx/test', '{"key":"k","data":1}'), -- bad key format 4
422                     ('', '') -- empty strings
423                     "#,
424                 )
425                 .expect("should populate");
426             },
427         );
428     }
429 
430     #[test]
test_migration_info_storage()431     fn test_migration_info_storage() {
432         let tmpdir = tempdir().unwrap();
433         let path = tmpdir.path().join("source.db");
434         init_source_db(&path, |c| {
435             c.execute_batch(HAPPY_PATH_SQL).expect("should populate")
436         });
437 
438         // now migrate
439         let db = crate::store::test::new_mem_store();
440         db.migrate(&path).expect("migration should work");
441         let mi = db
442             .take_migration_info()
443             .expect("take failed with info present");
444         assert_eq!(mi, Some(HAPPY_PATH_MIGRATION_INFO));
445         let mi2 = db
446             .take_migration_info()
447             .expect("take failed with info missing");
448         assert_eq!(mi2, None);
449     }
450 }
451