xref: /illumos-gate/usr/src/cmd/svc/configd/backend.c (revision 7257d1b4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sqlite is not compatible with _FILE_OFFSET_BITS=64, but we need to
31  * be able to statvfs(2) possibly large systems.  This define gives us
32  * access to the transitional interfaces.  See lfcompile64(5) for how
33  * _LARGEFILE64_SOURCE works.
34  */
35 #define	_LARGEFILE64_SOURCE
36 
37 #include <assert.h>
38 #include <door.h>
39 #include <dirent.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <limits.h>
43 #include <pthread.h>
44 #include <stdarg.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <sys/stat.h>
49 #include <sys/statvfs.h>
50 #include <unistd.h>
51 #include <zone.h>
52 #include <libscf_priv.h>
53 
54 #include "configd.h"
55 #include "repcache_protocol.h"
56 
57 #include <sqlite.h>
58 #include <sqlite-misc.h>
59 
60 /*
61  * This file has two purposes:
62  *
63  * 1. It contains the database schema, and the code for setting up our backend
64  *    databases, including installing said schema.
65  *
66  * 2. It provides a simplified interface to the SQL database library, and
67  *    synchronizes MT access to the database.
68  */
69 
70 typedef struct backend_spent {
71 	uint64_t bs_count;
72 	hrtime_t bs_time;
73 	hrtime_t bs_vtime;
74 } backend_spent_t;
75 
76 typedef struct backend_totals {
77 	backend_spent_t	bt_lock;	/* waiting for lock */
78 	backend_spent_t	bt_exec;	/* time spent executing SQL */
79 } backend_totals_t;
80 
81 typedef struct sqlite_backend {
82 	pthread_mutex_t	be_lock;
83 	pthread_t	be_thread;	/* thread holding lock */
84 	struct sqlite	*be_db;
85 	const char	*be_path;	/* path to db */
86 	int		be_readonly;	/* readonly at start, and still is */
87 	int		be_writing;	/* held for writing */
88 	backend_type_t	be_type;	/* type of db */
89 	hrtime_t	be_lastcheck;	/* time of last read-only check */
90 	backend_totals_t be_totals[2];	/* one for reading, one for writing */
91 } sqlite_backend_t;
92 
93 struct backend_tx {
94 	sqlite_backend_t	*bt_be;
95 	int			bt_readonly;
96 	int			bt_type;
97 	int			bt_full;	/* SQLITE_FULL during tx */
98 };
99 
100 #define	UPDATE_TOTALS_WR(sb, writing, field, ts, vts) { \
101 	backend_spent_t *__bsp = &(sb)->be_totals[!!(writing)].field; \
102 	__bsp->bs_count++;						\
103 	__bsp->bs_time += (gethrtime() - ts);				\
104 	__bsp->bs_vtime += (gethrvtime() - vts);			\
105 }
106 
107 #define	UPDATE_TOTALS(sb, field, ts, vts) \
108 	UPDATE_TOTALS_WR(sb, (sb)->be_writing, field, ts, vts)
109 
110 struct backend_query {
111 	char	*bq_buf;
112 	size_t	bq_size;
113 };
114 
115 struct backend_tbl_info {
116 	const char *bti_name;
117 	const char *bti_cols;
118 };
119 
120 struct backend_idx_info {
121 	const char *bxi_tbl;
122 	const char *bxi_idx;
123 	const char *bxi_cols;
124 };
125 
126 static pthread_mutex_t backend_panic_lock = PTHREAD_MUTEX_INITIALIZER;
127 static pthread_cond_t backend_panic_cv = PTHREAD_COND_INITIALIZER;
128 pthread_t backend_panic_thread = 0;
129 
130 int backend_do_trace = 0;		/* invoke tracing callback */
131 int backend_print_trace = 0;		/* tracing callback prints SQL */
132 int backend_panic_abort = 0;		/* abort when panicking */
133 
134 /* interval between read-only checks while starting up */
135 #define	BACKEND_READONLY_CHECK_INTERVAL	(2 * (hrtime_t)NANOSEC)
136 
137 /*
138  * Any change to the below schema should bump the version number
139  */
140 #define	BACKEND_SCHEMA_VERSION		5
141 
142 static struct backend_tbl_info tbls_normal[] = { /* BACKEND_TYPE_NORMAL */
143 	/*
144 	 * service_tbl holds all services.  svc_id is the identifier of the
145 	 * service.
146 	 */
147 	{
148 		"service_tbl",
149 		"svc_id          INTEGER PRIMARY KEY,"
150 		"svc_name        CHAR(256) NOT NULL"
151 	},
152 
153 	/*
154 	 * instance_tbl holds all of the instances.  The parent service id
155 	 * is instance_svc.
156 	 */
157 	{
158 		"instance_tbl",
159 		"instance_id     INTEGER PRIMARY KEY,"
160 		"instance_name   CHAR(256) NOT NULL,"
161 		"instance_svc    INTEGER NOT NULL"
162 	},
163 
164 	/*
165 	 * snapshot_lnk_tbl links (instance, snapshot name) with snapshots.
166 	 */
167 	{
168 		"snapshot_lnk_tbl",
169 		"lnk_id          INTEGER PRIMARY KEY,"
170 		"lnk_inst_id     INTEGER NOT NULL,"
171 		"lnk_snap_name   CHAR(256) NOT NULL,"
172 		"lnk_snap_id     INTEGER NOT NULL"
173 	},
174 
175 	/*
176 	 * snaplevel_tbl maps a snapshot id to a set of named, ordered
177 	 * snaplevels.
178 	 */
179 	{
180 		"snaplevel_tbl",
181 		"snap_id                 INTEGER NOT NULL,"
182 		"snap_level_num          INTEGER NOT NULL,"
183 		"snap_level_id           INTEGER NOT NULL,"
184 		"snap_level_service_id   INTEGER NOT NULL,"
185 		"snap_level_service      CHAR(256) NOT NULL,"
186 		"snap_level_instance_id  INTEGER NULL,"
187 		"snap_level_instance     CHAR(256) NULL"
188 	},
189 
190 	/*
191 	 * snaplevel_lnk_tbl links snaplevels to property groups.
192 	 * snaplvl_pg_* is identical to the original property group,
193 	 * and snaplvl_gen_id overrides the generation number.
194 	 * The service/instance ids are as in the snaplevel.
195 	 */
196 	{
197 		"snaplevel_lnk_tbl",
198 		"snaplvl_level_id INTEGER NOT NULL,"
199 		"snaplvl_pg_id    INTEGER NOT NULL,"
200 		"snaplvl_pg_name  CHAR(256) NOT NULL,"
201 		"snaplvl_pg_type  CHAR(256) NOT NULL,"
202 		"snaplvl_pg_flags INTEGER NOT NULL,"
203 		"snaplvl_gen_id   INTEGER NOT NULL"
204 	},
205 
206 	{ NULL, NULL }
207 };
208 
209 static struct backend_idx_info idxs_normal[] = { /* BACKEND_TYPE_NORMAL */
210 	{ "service_tbl",	"name",	"svc_name" },
211 	{ "instance_tbl",	"name",	"instance_svc, instance_name" },
212 	{ "snapshot_lnk_tbl",	"name",	"lnk_inst_id, lnk_snap_name" },
213 	{ "snapshot_lnk_tbl",	"snapid", "lnk_snap_id" },
214 	{ "snaplevel_tbl",	"id",	"snap_id" },
215 	{ "snaplevel_lnk_tbl",	"id",	"snaplvl_pg_id" },
216 	{ "snaplevel_lnk_tbl",	"level", "snaplvl_level_id" },
217 	{ NULL, NULL, NULL }
218 };
219 
220 static struct backend_tbl_info tbls_np[] = { /* BACKEND_TYPE_NONPERSIST */
221 	{ NULL, NULL }
222 };
223 
224 static struct backend_idx_info idxs_np[] = {	/* BACKEND_TYPE_NONPERSIST */
225 	{ NULL, NULL, NULL }
226 };
227 
228 static struct backend_tbl_info tbls_common[] = { /* all backend types */
229 	/*
230 	 * pg_tbl defines property groups.  They are associated with a single
231 	 * service or instance.  The pg_gen_id links them with the latest
232 	 * "edited" version of its properties.
233 	 */
234 	{
235 		"pg_tbl",
236 		"pg_id           INTEGER PRIMARY KEY,"
237 		"pg_parent_id    INTEGER NOT NULL,"
238 		"pg_name         CHAR(256) NOT NULL,"
239 		"pg_type         CHAR(256) NOT NULL,"
240 		"pg_flags        INTEGER NOT NULL,"
241 		"pg_gen_id       INTEGER NOT NULL"
242 	},
243 
244 	/*
245 	 * prop_lnk_tbl links a particular pg_id and gen_id to a set of
246 	 * (prop_name, prop_type, val_id) trios.
247 	 */
248 	{
249 		"prop_lnk_tbl",
250 		"lnk_prop_id     INTEGER PRIMARY KEY,"
251 		"lnk_pg_id       INTEGER NOT NULL,"
252 		"lnk_gen_id      INTEGER NOT NULL,"
253 		"lnk_prop_name   CHAR(256) NOT NULL,"
254 		"lnk_prop_type   CHAR(2) NOT NULL,"
255 		"lnk_val_id      INTEGER"
256 	},
257 
258 	/*
259 	 * value_tbl maps a value_id to a set of values.  For any given
260 	 * value_id, value_type is constant.
261 	 */
262 	{
263 		"value_tbl",
264 		"value_id        INTEGER NOT NULL,"
265 		"value_type      CHAR(1) NOT NULL,"
266 		"value_value     VARCHAR NOT NULL"
267 	},
268 
269 	/*
270 	 * id_tbl has one row per id space
271 	 */
272 	{
273 		"id_tbl",
274 		"id_name         STRING NOT NULL,"
275 		"id_next         INTEGER NOT NULL"
276 	},
277 
278 	/*
279 	 * schema_version has a single row, which contains
280 	 * BACKEND_SCHEMA_VERSION at the time of creation.
281 	 */
282 	{
283 		"schema_version",
284 		"schema_version  INTEGER"
285 	},
286 	{ NULL, NULL }
287 };
288 
289 static struct backend_idx_info idxs_common[] = { /* all backend types */
290 	{ "pg_tbl",		"parent", "pg_parent_id" },
291 	{ "pg_tbl",		"name",	"pg_parent_id, pg_name" },
292 	{ "pg_tbl",		"type",	"pg_parent_id, pg_type" },
293 	{ "prop_lnk_tbl",	"base",	"lnk_pg_id, lnk_gen_id" },
294 	{ "prop_lnk_tbl",	"val",	"lnk_val_id" },
295 	{ "value_tbl",		"id",	"value_id" },
296 	{ "id_tbl",		"id",	"id_name" },
297 	{ NULL, NULL, NULL }
298 };
299 
300 struct run_single_int_info {
301 	uint32_t	*rs_out;
302 	int		rs_result;
303 };
304 
305 /*ARGSUSED*/
306 static int
307 run_single_int_callback(void *arg, int columns, char **vals, char **names)
308 {
309 	struct run_single_int_info *info = arg;
310 	uint32_t val;
311 
312 	char *endptr = vals[0];
313 
314 	assert(info->rs_result != REP_PROTOCOL_SUCCESS);
315 	assert(columns == 1);
316 
317 	if (vals[0] == NULL)
318 		return (BACKEND_CALLBACK_CONTINUE);
319 
320 	errno = 0;
321 	val = strtoul(vals[0], &endptr, 10);
322 	if ((val == 0 && endptr == vals[0]) || *endptr != 0 || errno != 0)
323 		backend_panic("malformed integer \"%20s\"", vals[0]);
324 
325 	*info->rs_out = val;
326 	info->rs_result = REP_PROTOCOL_SUCCESS;
327 	return (BACKEND_CALLBACK_CONTINUE);
328 }
329 
330 /*ARGSUSED*/
331 int
332 backend_fail_if_seen(void *arg, int columns, char **vals, char **names)
333 {
334 	return (BACKEND_CALLBACK_ABORT);
335 }
336 
337 /*
338  * check to see if we can successfully start a transaction;  if not, the
339  * filesystem is mounted read-only.
340  */
341 static int
342 backend_is_readonly(struct sqlite *db, const char *path)
343 {
344 	int r;
345 	statvfs64_t stat;
346 
347 	if (statvfs64(path, &stat) == 0 && (stat.f_flag & ST_RDONLY))
348 		return (SQLITE_READONLY);
349 
350 	r = sqlite_exec(db,
351 	    "BEGIN TRANSACTION; "
352 	    "UPDATE schema_version SET schema_version = schema_version; ",
353 	    NULL, NULL, NULL);
354 	(void) sqlite_exec(db, "ROLLBACK TRANSACTION", NULL, NULL, NULL);
355 	return (r);
356 }
357 
358 static void
359 backend_trace_sql(void *arg, const char *sql)
360 {
361 	sqlite_backend_t *be = arg;
362 
363 	if (backend_print_trace) {
364 		(void) fprintf(stderr, "%d: %s\n", be->be_type, sql);
365 	}
366 }
367 
368 static sqlite_backend_t be_info[BACKEND_TYPE_TOTAL];
369 static sqlite_backend_t *bes[BACKEND_TYPE_TOTAL];
370 
371 #define	BACKEND_PANIC_TIMEOUT	(50 * MILLISEC)
372 /*
373  * backend_panic() -- some kind of database problem or corruption has been hit.
374  * We attempt to quiesce the other database users -- all of the backend sql
375  * entry points will call backend_panic(NULL) if a panic is in progress, as
376  * will any attempt to start a transaction.
377  *
378  * We give threads holding a backend lock 50ms (BACKEND_PANIC_TIMEOUT) to
379  * either drop the lock or call backend_panic().  If they don't respond in
380  * time, we'll just exit anyway.
381  */
382 void
383 backend_panic(const char *format, ...)
384 {
385 	int i;
386 	va_list args;
387 	int failed = 0;
388 
389 	(void) pthread_mutex_lock(&backend_panic_lock);
390 	if (backend_panic_thread != 0) {
391 		(void) pthread_mutex_unlock(&backend_panic_lock);
392 		/*
393 		 * first, drop any backend locks we're holding, then
394 		 * sleep forever on the panic_cv.
395 		 */
396 		for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
397 			if (bes[i] != NULL &&
398 			    bes[i]->be_thread == pthread_self())
399 				(void) pthread_mutex_unlock(&bes[i]->be_lock);
400 		}
401 		(void) pthread_mutex_lock(&backend_panic_lock);
402 		for (;;)
403 			(void) pthread_cond_wait(&backend_panic_cv,
404 			    &backend_panic_lock);
405 	}
406 	backend_panic_thread = pthread_self();
407 	(void) pthread_mutex_unlock(&backend_panic_lock);
408 
409 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
410 		if (bes[i] != NULL && bes[i]->be_thread == pthread_self())
411 			(void) pthread_mutex_unlock(&bes[i]->be_lock);
412 	}
413 
414 	va_start(args, format);
415 	configd_vcritical(format, args);
416 	va_end(args);
417 
418 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
419 		timespec_t rel;
420 
421 		rel.tv_sec = 0;
422 		rel.tv_nsec = BACKEND_PANIC_TIMEOUT;
423 
424 		if (bes[i] != NULL && bes[i]->be_thread != pthread_self()) {
425 			if (pthread_mutex_reltimedlock_np(&bes[i]->be_lock,
426 			    &rel) != 0)
427 				failed++;
428 		}
429 	}
430 	if (failed) {
431 		configd_critical("unable to quiesce database\n");
432 	}
433 
434 	if (backend_panic_abort)
435 		abort();
436 
437 	exit(CONFIGD_EXIT_DATABASE_BAD);
438 }
439 
440 /*
441  * Returns
442  *   _SUCCESS
443  *   _DONE - callback aborted query
444  *   _NO_RESOURCES - out of memory (_FULL & _TOOBIG?)
445  */
446 static int
447 backend_error(sqlite_backend_t *be, int error, char *errmsg)
448 {
449 	if (error == SQLITE_OK)
450 		return (REP_PROTOCOL_SUCCESS);
451 
452 	switch (error) {
453 	case SQLITE_ABORT:
454 		free(errmsg);
455 		return (REP_PROTOCOL_DONE);
456 
457 	case SQLITE_NOMEM:
458 	case SQLITE_FULL:
459 	case SQLITE_TOOBIG:
460 		free(errmsg);
461 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
462 
463 	default:
464 		backend_panic("%s: db error: %s", be->be_path, errmsg);
465 		/*NOTREACHED*/
466 	}
467 }
468 
469 static void
470 backend_backup_cleanup(const char **out_arg, ssize_t out_sz)
471 {
472 	char **out = (char **)out_arg;
473 
474 	while (out_sz-- > 0)
475 		free(*out++);
476 	free(out_arg);
477 }
478 
479 /*
480  * builds a inverse-time-sorted array of backup files.  The path is a
481  * a single buffer, and the pointers look like:
482  *
483  *	/this/is/a/full/path/to/repository-name-YYYYMMDDHHMMSS
484  *	^pathname		^	       ^(pathname+pathlen)
485  *				basename
486  *
487  * dirname will either be pathname, or ".".
488  *
489  * Returns the number of elements in the array, 0 if there are no previous
490  * backups, or -1 on error.
491  */
492 static ssize_t
493 backend_backup_get_prev(char *pathname, size_t pathlen, const char ***out_arg)
494 {
495 	char b_start, b_end;
496 	DIR *dir;
497 	char **out = NULL;
498 	char *name, *p;
499 	char *dirname, *basename;
500 	char *pathend;
501 	struct dirent *ent;
502 
503 	size_t count = 0;
504 	size_t baselen;
505 
506 	/*
507 	 * year, month, day, hour, min, sec, plus an '_'.
508 	 */
509 	const size_t ndigits = 4 + 5*2 + 1;
510 	const size_t baroffset = 4 + 2*2;
511 
512 	size_t idx;
513 
514 	pathend = pathname + pathlen;
515 	b_end = *pathend;
516 	*pathend = '\0';
517 
518 	basename = strrchr(pathname, '/');
519 
520 	if (basename != NULL) {
521 		assert(pathend > pathname && basename < pathend);
522 		basename++;
523 		dirname = pathname;
524 	} else {
525 		basename = pathname;
526 		dirname = ".";
527 	}
528 
529 	baselen = strlen(basename);
530 
531 	/*
532 	 * munge the string temporarily for the opendir(), then restore it.
533 	 */
534 	b_start = basename[0];
535 
536 	basename[0] = '\0';
537 	dir = opendir(dirname);
538 	basename[0] = b_start;		/* restore path */
539 
540 	if (dir == NULL)
541 		goto fail;
542 
543 
544 	while ((ent = readdir(dir)) != NULL) {
545 		/*
546 		 * Must match:
547 		 *	basename-YYYYMMDD_HHMMSS
548 		 * or we ignore it.
549 		 */
550 		if (strncmp(ent->d_name, basename, baselen) != 0)
551 			continue;
552 
553 		name = ent->d_name;
554 		if (name[baselen] != '-')
555 			continue;
556 
557 		p = name + baselen + 1;
558 
559 		for (idx = 0; idx < ndigits; idx++) {
560 			char c = p[idx];
561 			if (idx == baroffset && c != '_')
562 				break;
563 			if (idx != baroffset && (c < '0' || c > '9'))
564 				break;
565 		}
566 		if (idx != ndigits || p[idx] != '\0')
567 			continue;
568 
569 		/*
570 		 * We have a match.  insertion-sort it into our list.
571 		 */
572 		name = strdup(name);
573 		if (name == NULL)
574 			goto fail_closedir;
575 		p = strrchr(name, '-');
576 
577 		for (idx = 0; idx < count; idx++) {
578 			char *tmp = out[idx];
579 			char *tp = strrchr(tmp, '-');
580 
581 			int cmp = strcmp(p, tp);
582 			if (cmp == 0)
583 				cmp = strcmp(name, tmp);
584 
585 			if (cmp == 0) {
586 				free(name);
587 				name = NULL;
588 				break;
589 			} else if (cmp > 0) {
590 				out[idx] = name;
591 				name = tmp;
592 				p = tp;
593 			}
594 		}
595 
596 		if (idx == count) {
597 			char **new_out = realloc(out,
598 			    (count + 1) * sizeof (*out));
599 
600 			if (new_out == NULL) {
601 				free(name);
602 				goto fail_closedir;
603 			}
604 
605 			out = new_out;
606 			out[count++] = name;
607 		} else {
608 			assert(name == NULL);
609 		}
610 	}
611 	(void) closedir(dir);
612 
613 	basename[baselen] = b_end;
614 
615 	*out_arg = (const char **)out;
616 	return (count);
617 
618 fail_closedir:
619 	(void) closedir(dir);
620 fail:
621 	basename[0] = b_start;
622 	*pathend = b_end;
623 
624 	backend_backup_cleanup((const char **)out, count);
625 
626 	*out_arg = NULL;
627 	return (-1);
628 }
629 
630 /*
631  * Copies the repository path into out, a buffer of out_len bytes,
632  * removes the ".db" (or whatever) extension, and, if name is non-NULL,
633  * appends "-name" to it.  If name is non-NULL, it can fail with:
634  *
635  *	_TRUNCATED	will not fit in buffer.
636  *	_BAD_REQUEST	name is not a valid identifier
637  */
638 static rep_protocol_responseid_t
639 backend_backup_base(sqlite_backend_t *be, const char *name,
640     char *out, size_t out_len)
641 {
642 	char *p, *q;
643 	size_t len;
644 
645 	/*
646 	 * for paths of the form /path/to/foo.db, we truncate at the final
647 	 * '.'.
648 	 */
649 	(void) strlcpy(out, be->be_path, out_len);
650 
651 	p = strrchr(out, '/');
652 	q = strrchr(out, '.');
653 
654 	if (p != NULL && q != NULL && q > p)
655 		*q = 0;
656 
657 	if (name != NULL) {
658 		len = strlen(out);
659 		assert(len < out_len);
660 
661 		out += len;
662 		out_len -= len;
663 
664 		len = strlen(name);
665 
666 		/*
667 		 * verify that the name tag is entirely alphabetic,
668 		 * non-empty, and not too long.
669 		 */
670 		if (len == 0 || len >= REP_PROTOCOL_NAME_LEN ||
671 		    uu_check_name(name, UU_NAME_DOMAIN) < 0)
672 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
673 
674 		if (snprintf(out, out_len, "-%s", name) >= out_len)
675 			return (REP_PROTOCOL_FAIL_TRUNCATED);
676 	}
677 
678 	return (REP_PROTOCOL_SUCCESS);
679 }
680 
681 /*
682  * See if a backup is needed.  We do a backup unless both files are
683  * byte-for-byte identical.
684  */
685 static int
686 backend_check_backup_needed(const char *rep_name, const char *backup_name)
687 {
688 	int repfd = open(rep_name, O_RDONLY);
689 	int fd = open(backup_name, O_RDONLY);
690 	struct stat s_rep, s_backup;
691 	int c1, c2;
692 
693 	FILE *f_rep = NULL;
694 	FILE *f_backup = NULL;
695 
696 	if (repfd < 0 || fd < 0)
697 		goto fail;
698 
699 	if (fstat(repfd, &s_rep) < 0 || fstat(fd, &s_backup) < 0)
700 		goto fail;
701 
702 	/*
703 	 * if they are the same file, we need to do a backup to break the
704 	 * hard link or symlink involved.
705 	 */
706 	if (s_rep.st_ino == s_backup.st_ino && s_rep.st_dev == s_backup.st_dev)
707 		goto fail;
708 
709 	if (s_rep.st_size != s_backup.st_size)
710 		goto fail;
711 
712 	if ((f_rep = fdopen(repfd, "r")) == NULL ||
713 	    (f_backup = fdopen(fd, "r")) == NULL)
714 		goto fail;
715 
716 	do {
717 		c1 = getc(f_rep);
718 		c2 = getc(f_backup);
719 		if (c1 != c2)
720 			goto fail;
721 	} while (c1 != EOF);
722 
723 	if (!ferror(f_rep) && !ferror(f_backup)) {
724 		(void) fclose(f_rep);
725 		(void) fclose(f_backup);
726 		(void) close(repfd);
727 		(void) close(fd);
728 		return (0);
729 	}
730 
731 fail:
732 	if (f_rep != NULL)
733 		(void) fclose(f_rep);
734 	if (f_backup != NULL)
735 		(void) fclose(f_backup);
736 	if (repfd >= 0)
737 		(void) close(repfd);
738 	if (fd >= 0)
739 		(void) close(fd);
740 	return (1);
741 }
742 
743 /*
744  * This interface is called to perform the actual copy
745  *
746  * Return:
747  *	_FAIL_UNKNOWN		read/write fails
748  *	_FAIL_NO_RESOURCES	out of memory
749  *	_SUCCESS		copy succeeds
750  */
751 static rep_protocol_responseid_t
752 backend_do_copy(const char *src, int srcfd, const char *dst,
753     int dstfd, size_t *sz)
754 {
755 	char *buf;
756 	off_t nrd, nwr, n, r_off = 0, w_off = 0;
757 
758 	if ((buf = malloc(8192)) == NULL)
759 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
760 
761 	while ((nrd = read(srcfd, buf, 8192)) != 0) {
762 		if (nrd < 0) {
763 			if (errno == EINTR)
764 				continue;
765 
766 			configd_critical(
767 			    "Backend copy failed: fails to read from %s "
768 			    "at offset %d: %s\n", src, r_off, strerror(errno));
769 			free(buf);
770 			return (REP_PROTOCOL_FAIL_UNKNOWN);
771 		}
772 
773 		r_off += nrd;
774 
775 		nwr = 0;
776 		do {
777 			if ((n = write(dstfd, &buf[nwr], nrd - nwr)) < 0) {
778 				if (errno == EINTR)
779 					continue;
780 
781 				configd_critical(
782 				    "Backend copy failed: fails to write to %s "
783 				    "at offset %d: %s\n", dst, w_off,
784 				    strerror(errno));
785 				free(buf);
786 				return (REP_PROTOCOL_FAIL_UNKNOWN);
787 			}
788 
789 			nwr += n;
790 			w_off += n;
791 
792 		} while (nwr < nrd);
793 	}
794 
795 	if (sz)
796 		*sz = w_off;
797 
798 	free(buf);
799 	return (REP_PROTOCOL_SUCCESS);
800 }
801 
802 /*
803  * Can return:
804  *	_BAD_REQUEST		name is not valid
805  *	_TRUNCATED		name is too long for current repository path
806  *	_UNKNOWN		failed for unknown reason (details written to
807  *				console)
808  *	_BACKEND_READONLY	backend is not writable
809  *	_NO_RESOURCES		out of memory
810  *	_SUCCESS		Backup completed successfully.
811  */
812 static rep_protocol_responseid_t
813 backend_create_backup_locked(sqlite_backend_t *be, const char *name)
814 {
815 	const char **old_list;
816 	ssize_t old_sz;
817 	ssize_t old_max = max_repository_backups;
818 	ssize_t cur;
819 	char *finalname;
820 	char *finalpath;
821 	char *tmppath;
822 	int infd, outfd;
823 	size_t len;
824 	time_t now;
825 	struct tm now_tm;
826 	rep_protocol_responseid_t result;
827 
828 	if ((finalpath = malloc(PATH_MAX)) == NULL)
829 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
830 
831 	if ((tmppath = malloc(PATH_MAX)) == NULL) {
832 		free(finalpath);
833 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
834 	}
835 
836 	if (be->be_readonly) {
837 		result = REP_PROTOCOL_FAIL_BACKEND_READONLY;
838 		goto out;
839 	}
840 
841 	result = backend_backup_base(be, name, finalpath, PATH_MAX);
842 	if (result != REP_PROTOCOL_SUCCESS)
843 		goto out;
844 
845 	if (!backend_check_backup_needed(be->be_path, finalpath)) {
846 		result = REP_PROTOCOL_SUCCESS;
847 		goto out;
848 	}
849 
850 	/*
851 	 * remember the original length, and the basename location
852 	 */
853 	len = strlen(finalpath);
854 	finalname = strrchr(finalpath, '/');
855 	if (finalname != NULL)
856 		finalname++;
857 	else
858 		finalname = finalpath;
859 
860 	(void) strlcpy(tmppath, finalpath, PATH_MAX);
861 	if (strlcat(tmppath, "-tmpXXXXXX", PATH_MAX) >= PATH_MAX) {
862 		result = REP_PROTOCOL_FAIL_TRUNCATED;
863 		goto out;
864 	}
865 
866 	now = time(NULL);
867 	if (localtime_r(&now, &now_tm) == NULL) {
868 		configd_critical(
869 		    "\"%s\" backup failed: localtime(3C) failed: %s\n", name,
870 		    be->be_path, strerror(errno));
871 		result = REP_PROTOCOL_FAIL_UNKNOWN;
872 		goto out;
873 	}
874 
875 	if (strftime(finalpath + len, PATH_MAX - len,
876 	    "-%Y""%m""%d""_""%H""%M""%S", &now_tm) >= PATH_MAX - len) {
877 		result = REP_PROTOCOL_FAIL_TRUNCATED;
878 		goto out;
879 	}
880 
881 	infd = open(be->be_path, O_RDONLY);
882 	if (infd < 0) {
883 		configd_critical("\"%s\" backup failed: opening %s: %s\n", name,
884 		    be->be_path, strerror(errno));
885 		result = REP_PROTOCOL_FAIL_UNKNOWN;
886 		goto out;
887 	}
888 
889 	outfd = mkstemp(tmppath);
890 	if (outfd < 0) {
891 		configd_critical("\"%s\" backup failed: mkstemp(%s): %s\n",
892 		    name, tmppath, strerror(errno));
893 		(void) close(infd);
894 		result = REP_PROTOCOL_FAIL_UNKNOWN;
895 		goto out;
896 	}
897 
898 	if ((result = backend_do_copy((const char *)be->be_path, infd,
899 	    (const char *)tmppath, outfd, NULL)) != REP_PROTOCOL_SUCCESS)
900 		goto fail;
901 
902 	/*
903 	 * grab the old list before doing our re-name.
904 	 */
905 	if (old_max > 0)
906 		old_sz = backend_backup_get_prev(finalpath, len, &old_list);
907 
908 	if (rename(tmppath, finalpath) < 0) {
909 		configd_critical(
910 		    "\"%s\" backup failed: rename(%s, %s): %s\n",
911 		    name, tmppath, finalpath, strerror(errno));
912 		result = REP_PROTOCOL_FAIL_UNKNOWN;
913 		goto fail;
914 	}
915 
916 	tmppath[len] = 0;	/* strip -XXXXXX, for reference symlink */
917 
918 	(void) unlink(tmppath);
919 	if (symlink(finalname, tmppath) < 0) {
920 		configd_critical(
921 		    "\"%s\" backup completed, but updating "
922 		    "\"%s\" symlink to \"%s\" failed: %s\n",
923 		    name, tmppath, finalname, strerror(errno));
924 	}
925 
926 	if (old_max > 0 && old_sz > 0) {
927 		/* unlink all but the first (old_max - 1) files */
928 		for (cur = old_max - 1; cur < old_sz; cur++) {
929 			(void) strlcpy(finalname, old_list[cur],
930 			    PATH_MAX - (finalname - finalpath));
931 			if (unlink(finalpath) < 0)
932 				configd_critical(
933 				    "\"%s\" backup completed, but removing old "
934 				    "file \"%s\" failed: %s\n",
935 				    name, finalpath, strerror(errno));
936 		}
937 
938 		backend_backup_cleanup(old_list, old_sz);
939 	}
940 
941 	result = REP_PROTOCOL_SUCCESS;
942 
943 fail:
944 	(void) close(infd);
945 	(void) close(outfd);
946 	if (result != REP_PROTOCOL_SUCCESS)
947 		(void) unlink(tmppath);
948 
949 out:
950 	free(finalpath);
951 	free(tmppath);
952 
953 	return (result);
954 }
955 
956 static int
957 backend_check_readonly(sqlite_backend_t *be, int writing, hrtime_t t)
958 {
959 	char *errp;
960 	struct sqlite *new;
961 	int r;
962 
963 	assert(be->be_readonly);
964 	assert(be == bes[BACKEND_TYPE_NORMAL]);
965 
966 	/*
967 	 * If we don't *need* to be writable, only check every once in a
968 	 * while.
969 	 */
970 	if (!writing) {
971 		if ((uint64_t)(t - be->be_lastcheck) <
972 		    BACKEND_READONLY_CHECK_INTERVAL)
973 			return (REP_PROTOCOL_SUCCESS);
974 		be->be_lastcheck = t;
975 	}
976 
977 	new = sqlite_open(be->be_path, 0600, &errp);
978 	if (new == NULL) {
979 		backend_panic("reopening %s: %s\n", be->be_path, errp);
980 		/*NOTREACHED*/
981 	}
982 	r = backend_is_readonly(new, be->be_path);
983 
984 	if (r != SQLITE_OK) {
985 		sqlite_close(new);
986 		if (writing)
987 			return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
988 		return (REP_PROTOCOL_SUCCESS);
989 	}
990 
991 	/*
992 	 * We can write!  Swap the db handles, mark ourself writable,
993 	 * and make a backup.
994 	 */
995 	sqlite_close(be->be_db);
996 	be->be_db = new;
997 	be->be_readonly = 0;
998 
999 	if (backend_create_backup_locked(be, REPOSITORY_BOOT_BACKUP) !=
1000 	    REP_PROTOCOL_SUCCESS) {
1001 		configd_critical(
1002 		    "unable to create \"%s\" backup of \"%s\"\n",
1003 		    REPOSITORY_BOOT_BACKUP, be->be_path);
1004 	}
1005 
1006 	return (REP_PROTOCOL_SUCCESS);
1007 }
1008 
1009 /*
1010  * If t is not BACKEND_TYPE_NORMAL, can fail with
1011  *   _BACKEND_ACCESS - backend does not exist
1012  *
1013  * If writing is nonzero, can also fail with
1014  *   _BACKEND_READONLY - backend is read-only
1015  */
1016 static int
1017 backend_lock(backend_type_t t, int writing, sqlite_backend_t **bep)
1018 {
1019 	sqlite_backend_t *be = NULL;
1020 	hrtime_t ts, vts;
1021 
1022 	*bep = NULL;
1023 
1024 	assert(t == BACKEND_TYPE_NORMAL ||
1025 	    t == BACKEND_TYPE_NONPERSIST);
1026 
1027 	be = bes[t];
1028 	if (t == BACKEND_TYPE_NORMAL)
1029 		assert(be != NULL);		/* should always be there */
1030 
1031 	if (be == NULL)
1032 		return (REP_PROTOCOL_FAIL_BACKEND_ACCESS);
1033 
1034 	if (backend_panic_thread != 0)
1035 		backend_panic(NULL);		/* don't proceed */
1036 
1037 	ts = gethrtime();
1038 	vts = gethrvtime();
1039 	(void) pthread_mutex_lock(&be->be_lock);
1040 	UPDATE_TOTALS_WR(be, writing, bt_lock, ts, vts);
1041 
1042 	if (backend_panic_thread != 0) {
1043 		(void) pthread_mutex_unlock(&be->be_lock);
1044 		backend_panic(NULL);		/* don't proceed */
1045 	}
1046 	be->be_thread = pthread_self();
1047 
1048 	if (be->be_readonly) {
1049 		int r;
1050 		assert(t == BACKEND_TYPE_NORMAL);
1051 
1052 		r = backend_check_readonly(be, writing, ts);
1053 		if (r != REP_PROTOCOL_SUCCESS) {
1054 			be->be_thread = 0;
1055 			(void) pthread_mutex_unlock(&be->be_lock);
1056 			return (r);
1057 		}
1058 	}
1059 
1060 	if (backend_do_trace)
1061 		(void) sqlite_trace(be->be_db, backend_trace_sql, be);
1062 	else
1063 		(void) sqlite_trace(be->be_db, NULL, NULL);
1064 
1065 	be->be_writing = writing;
1066 	*bep = be;
1067 	return (REP_PROTOCOL_SUCCESS);
1068 }
1069 
1070 static void
1071 backend_unlock(sqlite_backend_t *be)
1072 {
1073 	be->be_writing = 0;
1074 	be->be_thread = 0;
1075 	(void) pthread_mutex_unlock(&be->be_lock);
1076 }
1077 
1078 static void
1079 backend_destroy(sqlite_backend_t *be)
1080 {
1081 	if (be->be_db != NULL) {
1082 		sqlite_close(be->be_db);
1083 		be->be_db = NULL;
1084 	}
1085 	be->be_thread = 0;
1086 	(void) pthread_mutex_unlock(&be->be_lock);
1087 	(void) pthread_mutex_destroy(&be->be_lock);
1088 }
1089 
1090 static void
1091 backend_create_finish(backend_type_t backend_id, sqlite_backend_t *be)
1092 {
1093 	assert(MUTEX_HELD(&be->be_lock));
1094 	assert(be == &be_info[backend_id]);
1095 
1096 	bes[backend_id] = be;
1097 	(void) pthread_mutex_unlock(&be->be_lock);
1098 }
1099 
1100 static int
1101 backend_fd_write(int fd, const char *mess)
1102 {
1103 	int len = strlen(mess);
1104 	int written;
1105 
1106 	while (len > 0) {
1107 		if ((written = write(fd, mess, len)) < 0)
1108 			return (-1);
1109 		mess += written;
1110 		len -= written;
1111 	}
1112 	return (0);
1113 }
1114 
1115 /*
1116  * Can return:
1117  *	_BAD_REQUEST		name is not valid
1118  *	_TRUNCATED		name is too long for current repository path
1119  *	_UNKNOWN		failed for unknown reason (details written to
1120  *				console)
1121  *	_BACKEND_READONLY	backend is not writable
1122  *	_NO_RESOURCES		out of memory
1123  *	_SUCCESS		Backup completed successfully.
1124  */
1125 rep_protocol_responseid_t
1126 backend_create_backup(const char *name)
1127 {
1128 	rep_protocol_responseid_t result;
1129 	sqlite_backend_t *be;
1130 
1131 	result = backend_lock(BACKEND_TYPE_NORMAL, 0, &be);
1132 	assert(result == REP_PROTOCOL_SUCCESS);
1133 
1134 	result = backend_create_backup_locked(be, name);
1135 	backend_unlock(be);
1136 
1137 	return (result);
1138 }
1139 
1140 /*
1141  * Copy the repository.  If the sw_back flag is not set, we are
1142  * copying the repository from the default location under /etc/svc to
1143  * the tmpfs /etc/svc/volatile location.  If the flag is set, we are
1144  * copying back to the /etc/svc location from the volatile location
1145  * after manifest-import is completed.
1146  *
1147  * Can return:
1148  *
1149  *	REP_PROTOCOL_SUCCESS		successful copy and rename
1150  *	REP_PROTOCOL_FAIL_UNKNOWN	file operation error
1151  *	REP_PROTOCOL_FAIL_NO_RESOURCES	out of memory
1152  */
1153 static rep_protocol_responseid_t
1154 backend_switch_copy(const char *src, const char *dst, int sw_back)
1155 {
1156 	int srcfd, dstfd;
1157 	char *tmppath = malloc(PATH_MAX);
1158 	rep_protocol_responseid_t res = REP_PROTOCOL_SUCCESS;
1159 	struct stat s_buf;
1160 	size_t cpsz, sz;
1161 
1162 	if (tmppath == NULL) {
1163 		res = REP_PROTOCOL_FAIL_NO_RESOURCES;
1164 		goto out;
1165 	}
1166 
1167 	/*
1168 	 * Create and open the related db files
1169 	 */
1170 	(void) strlcpy(tmppath, dst, PATH_MAX);
1171 	sz = strlcat(tmppath, "-XXXXXX", PATH_MAX);
1172 	assert(sz < PATH_MAX);
1173 	if (sz >= PATH_MAX) {
1174 		configd_critical(
1175 		    "Backend copy failed: strlcat %s: overflow\n", tmppath);
1176 		abort();
1177 	}
1178 
1179 	if ((dstfd = mkstemp(tmppath)) < 0) {
1180 		configd_critical("Backend copy failed: mkstemp %s: %s\n",
1181 		    tmppath, strerror(errno));
1182 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1183 		goto out;
1184 	}
1185 
1186 	if ((srcfd = open(src, O_RDONLY)) < 0) {
1187 		configd_critical("Backend copy failed: opening %s: %s\n",
1188 		    src, strerror(errno));
1189 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1190 		goto errexit;
1191 	}
1192 
1193 	/*
1194 	 * fstat the backend before copy for sanity check.
1195 	 */
1196 	if (fstat(srcfd, &s_buf) < 0) {
1197 		configd_critical("Backend copy failed: fstat %s: %s\n",
1198 		    src, strerror(errno));
1199 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1200 		goto errexit;
1201 	}
1202 
1203 	if ((res = backend_do_copy(src, srcfd, dst, dstfd, &cpsz)) !=
1204 	    REP_PROTOCOL_SUCCESS)
1205 		goto errexit;
1206 
1207 	if (cpsz != s_buf.st_size) {
1208 		configd_critical("Backend copy failed: incomplete copy\n");
1209 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1210 		goto errexit;
1211 	}
1212 
1213 	/*
1214 	 * Rename tmppath to dst
1215 	 */
1216 	if (rename(tmppath, dst) < 0) {
1217 		configd_critical(
1218 		    "Backend copy failed: rename %s to %s: %s\n",
1219 		    tmppath, dst, strerror(errno));
1220 		res = REP_PROTOCOL_FAIL_UNKNOWN;
1221 	}
1222 
1223 errexit:
1224 	if (res != REP_PROTOCOL_SUCCESS && unlink(tmppath) < 0)
1225 		configd_critical(
1226 		    "Backend copy failed: remove %s: %s\n",
1227 		    tmppath, strerror(errno));
1228 
1229 	(void) close(srcfd);
1230 	(void) close(dstfd);
1231 
1232 out:
1233 	free(tmppath);
1234 	if (sw_back) {
1235 		if (unlink(src) < 0)
1236 			configd_critical(
1237 			    "Backend copy failed: remove %s: %s\n",
1238 			    src, strerror(errno));
1239 	}
1240 
1241 	return (res);
1242 }
1243 
1244 /*
1245  * Perform sanity check on the repository.
1246  * Return 0 if check succeeds or -1 if fails.
1247  */
1248 static int
1249 backend_switch_check(struct sqlite *be_db, char **errp)
1250 {
1251 	struct run_single_int_info info;
1252 	uint32_t val = -1UL;
1253 	int r;
1254 
1255 	info.rs_out = &val;
1256 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1257 
1258 	r = sqlite_exec(be_db,
1259 	    "SELECT schema_version FROM schema_version;",
1260 	    run_single_int_callback, &info, errp);
1261 
1262 	if (r == SQLITE_OK &&
1263 	    info.rs_result != REP_PROTOCOL_FAIL_NOT_FOUND &&
1264 	    val == BACKEND_SCHEMA_VERSION)
1265 		return (0);
1266 	else
1267 		return (-1);
1268 }
1269 
1270 /*
1271  * Backend switch entry point.  It is called to perform the backend copy and
1272  * switch from src to dst.  First, it blocks all other clients from accessing
1273  * the repository by calling backend_lock to lock the repository.  Upon
1274  * successful lock, copying and switching of the repository are performed.
1275  *
1276  * Can return:
1277  *	REP_PROTOCOL_SUCCESS			successful switch
1278  *	REP_PROTOCOL_FAIL_BACKEND_ACCESS	backen access fails
1279  *	REP_PROTOCOL_FAIL_BACKEND_READONLY	backend is not writable
1280  *	REP_PROTOCOL_FAIL_UNKNOWN		file operation error
1281  *	REP_PROTOCOL_FAIL_NO_RESOURCES		out of memory
1282  */
1283 rep_protocol_responseid_t
1284 backend_switch(int sw_back)
1285 {
1286 	rep_protocol_responseid_t result;
1287 	sqlite_backend_t *be;
1288 	struct sqlite *new;
1289 	char *errp;
1290 	const char *dst;
1291 
1292 	result = backend_lock(BACKEND_TYPE_NORMAL, 1, &be);
1293 	if (result != REP_PROTOCOL_SUCCESS)
1294 		return (result);
1295 
1296 	if (sw_back) {
1297 		dst = REPOSITORY_DB;
1298 	} else {
1299 		dst = FAST_REPOSITORY_DB;
1300 	}
1301 
1302 	/*
1303 	 * Do the actual copy and rename
1304 	 */
1305 	result = backend_switch_copy(be->be_path, dst, sw_back);
1306 	if (result != REP_PROTOCOL_SUCCESS) {
1307 		goto errout;
1308 	}
1309 
1310 	/*
1311 	 * Do the backend sanity check and switch
1312 	 */
1313 	new = sqlite_open(dst, 0600, &errp);
1314 	if (new != NULL) {
1315 		/*
1316 		 * Sanity check
1317 		 */
1318 		if (backend_switch_check(new, &errp) == 0) {
1319 			free((char *)be->be_path);
1320 			be->be_path = strdup(dst);
1321 			if (be->be_path == NULL) {
1322 				configd_critical(
1323 				    "Backend switch failed: strdup %s: %s\n",
1324 				    dst, strerror(errno));
1325 				result = REP_PROTOCOL_FAIL_NO_RESOURCES;
1326 				sqlite_close(new);
1327 			} else {
1328 				sqlite_close(be->be_db);
1329 				be->be_db = new;
1330 			}
1331 		} else {
1332 			configd_critical(
1333 			    "Backend switch failed: integrity check %s: %s\n",
1334 			    dst, errp);
1335 			result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1336 		}
1337 	} else {
1338 		configd_critical("Backend switch failed: sqlite_open %s: %s\n",
1339 		    dst, errp);
1340 		result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
1341 	}
1342 
1343 errout:
1344 	backend_unlock(be);
1345 	return (result);
1346 }
1347 
1348 /*
1349  * This routine is called to attempt the recovery of
1350  * the most recent valid repository if possible when configd
1351  * is restarted for some reasons or when system crashes
1352  * during the switch operation.  The repository databases
1353  * referenced here are indicators of successful switch
1354  * operations.
1355  */
1356 static void
1357 backend_switch_recovery(void)
1358 {
1359 	const char *fast_db = FAST_REPOSITORY_DB;
1360 	char *errp;
1361 	struct stat s_buf;
1362 	struct sqlite *be_db;
1363 
1364 
1365 	/*
1366 	 * A good transient db containing most recent data can
1367 	 * exist if system or svc.configd crashes during the
1368 	 * switch operation.  If that is the case, check its
1369 	 * integrity and use it.
1370 	 */
1371 	if (stat(fast_db, &s_buf) < 0) {
1372 		return;
1373 	}
1374 
1375 	/*
1376 	 * Do sanity check on the db
1377 	 */
1378 	be_db = sqlite_open(fast_db, 0600, &errp);
1379 
1380 	if (be_db != NULL) {
1381 		if (backend_switch_check(be_db, &errp) == 0)
1382 			(void) backend_switch_copy(fast_db, REPOSITORY_DB, 1);
1383 	}
1384 
1385 	(void) unlink(fast_db);
1386 }
1387 
1388 /*ARGSUSED*/
1389 static int
1390 backend_integrity_callback(void *private, int narg, char **vals, char **cols)
1391 {
1392 	char **out = private;
1393 	char *old = *out;
1394 	char *new;
1395 	const char *info;
1396 	size_t len;
1397 	int x;
1398 
1399 	for (x = 0; x < narg; x++) {
1400 		if ((info = vals[x]) != NULL &&
1401 		    strcmp(info, "ok") != 0) {
1402 			len = (old == NULL)? 0 : strlen(old);
1403 			len += strlen(info) + 2;	/* '\n' + '\0' */
1404 
1405 			new = realloc(old, len);
1406 			if (new == NULL)
1407 				return (BACKEND_CALLBACK_ABORT);
1408 			if (old == NULL)
1409 				new[0] = 0;
1410 			old = *out = new;
1411 			(void) strlcat(new, info, len);
1412 			(void) strlcat(new, "\n", len);
1413 		}
1414 	}
1415 	return (BACKEND_CALLBACK_CONTINUE);
1416 }
1417 
1418 #define	BACKEND_CREATE_LOCKED		-2
1419 #define	BACKEND_CREATE_FAIL		-1
1420 #define	BACKEND_CREATE_SUCCESS		0
1421 #define	BACKEND_CREATE_READONLY		1
1422 #define	BACKEND_CREATE_NEED_INIT	2
1423 static int
1424 backend_create(backend_type_t backend_id, const char *db_file,
1425     sqlite_backend_t **bep)
1426 {
1427 	char *errp;
1428 	char *integrity_results = NULL;
1429 	sqlite_backend_t *be;
1430 	int r;
1431 	uint32_t val = -1UL;
1432 	struct run_single_int_info info;
1433 	int fd;
1434 
1435 	assert(backend_id >= 0 && backend_id < BACKEND_TYPE_TOTAL);
1436 
1437 	be = &be_info[backend_id];
1438 	assert(be->be_db == NULL);
1439 
1440 	(void) pthread_mutex_init(&be->be_lock, NULL);
1441 	(void) pthread_mutex_lock(&be->be_lock);
1442 
1443 	be->be_type = backend_id;
1444 	be->be_path = strdup(db_file);
1445 	if (be->be_path == NULL) {
1446 		perror("malloc");
1447 		goto fail;
1448 	}
1449 
1450 	be->be_db = sqlite_open(be->be_path, 0600, &errp);
1451 
1452 	if (be->be_db == NULL) {
1453 		if (strstr(errp, "out of memory") != NULL) {
1454 			configd_critical("%s: %s\n", db_file, errp);
1455 			free(errp);
1456 
1457 			goto fail;
1458 		}
1459 
1460 		/* report it as an integrity failure */
1461 		integrity_results = errp;
1462 		errp = NULL;
1463 		goto integrity_fail;
1464 	}
1465 
1466 	/*
1467 	 * check if we are inited and of the correct schema version
1468 	 *
1469 	 * Eventually, we'll support schema upgrade here.
1470 	 */
1471 	info.rs_out = &val;
1472 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1473 
1474 	r = sqlite_exec(be->be_db, "SELECT schema_version FROM schema_version;",
1475 	    run_single_int_callback, &info, &errp);
1476 	if (r == SQLITE_ERROR &&
1477 	    strcmp("no such table: schema_version", errp) == 0) {
1478 		free(errp);
1479 		/*
1480 		 * Could be an empty repository, could be pre-schema_version
1481 		 * schema.  Check for id_tbl, which has always been there.
1482 		 */
1483 		r = sqlite_exec(be->be_db, "SELECT count() FROM id_tbl;",
1484 		    NULL, NULL, &errp);
1485 		if (r == SQLITE_ERROR &&
1486 		    strcmp("no such table: id_tbl", errp) == 0) {
1487 			free(errp);
1488 			*bep = be;
1489 			return (BACKEND_CREATE_NEED_INIT);
1490 		}
1491 
1492 		configd_critical("%s: schema version mismatch\n", db_file);
1493 		goto fail;
1494 	}
1495 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1496 		free(errp);
1497 		*bep = NULL;
1498 		backend_destroy(be);
1499 		return (BACKEND_CREATE_LOCKED);
1500 	}
1501 	if (r == SQLITE_OK) {
1502 		if (info.rs_result == REP_PROTOCOL_FAIL_NOT_FOUND ||
1503 		    val != BACKEND_SCHEMA_VERSION) {
1504 			configd_critical("%s: schema version mismatch\n",
1505 			    db_file);
1506 			goto fail;
1507 		}
1508 	}
1509 
1510 	/*
1511 	 * pull in the whole database sequentially.
1512 	 */
1513 	if ((fd = open(db_file, O_RDONLY)) >= 0) {
1514 		size_t sz = 64 * 1024;
1515 		char *buffer = malloc(sz);
1516 		if (buffer != NULL) {
1517 			while (read(fd, buffer, sz) > 0)
1518 				;
1519 			free(buffer);
1520 		}
1521 		(void) close(fd);
1522 	}
1523 
1524 	/*
1525 	 * run an integrity check
1526 	 */
1527 	r = sqlite_exec(be->be_db, "PRAGMA integrity_check;",
1528 	    backend_integrity_callback, &integrity_results, &errp);
1529 
1530 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1531 		free(errp);
1532 		*bep = NULL;
1533 		backend_destroy(be);
1534 		return (BACKEND_CREATE_LOCKED);
1535 	}
1536 	if (r == SQLITE_ABORT) {
1537 		free(errp);
1538 		errp = NULL;
1539 		integrity_results = "out of memory running integrity check\n";
1540 	} else if (r != SQLITE_OK && integrity_results == NULL) {
1541 		integrity_results = errp;
1542 		errp = NULL;
1543 	}
1544 
1545 integrity_fail:
1546 	if (integrity_results != NULL) {
1547 		const char *fname = "/etc/svc/volatile/db_errors";
1548 		if ((fd = open(fname, O_CREAT|O_WRONLY|O_APPEND, 0600)) < 0) {
1549 			fname = NULL;
1550 		} else {
1551 			if (backend_fd_write(fd, "\n\n") < 0 ||
1552 			    backend_fd_write(fd, db_file) < 0 ||
1553 			    backend_fd_write(fd,
1554 			    ": PRAGMA integrity_check; failed.  Results:\n") <
1555 			    0 || backend_fd_write(fd, integrity_results) < 0 ||
1556 			    backend_fd_write(fd, "\n\n") < 0) {
1557 				fname = NULL;
1558 			}
1559 			(void) close(fd);
1560 		}
1561 
1562 		if (!is_main_repository ||
1563 		    backend_id == BACKEND_TYPE_NONPERSIST) {
1564 			if (fname != NULL)
1565 				configd_critical(
1566 				    "%s: integrity check failed. Details in "
1567 				    "%s\n", db_file, fname);
1568 			else
1569 				configd_critical(
1570 				    "%s: integrity check failed.\n",
1571 				    db_file);
1572 		} else {
1573 			(void) fprintf(stderr,
1574 "\n"
1575 "svc.configd: smf(5) database integrity check of:\n"
1576 "\n"
1577 "    %s\n"
1578 "\n"
1579 "  failed. The database might be damaged or a media error might have\n"
1580 "  prevented it from being verified.  Additional information useful to\n"
1581 "  your service provider%s%s\n"
1582 "\n"
1583 "  The system will not be able to boot until you have restored a working\n"
1584 "  database.  svc.startd(1M) will provide a sulogin(1M) prompt for recovery\n"
1585 "  purposes.  The command:\n"
1586 "\n"
1587 "    /lib/svc/bin/restore_repository\n"
1588 "\n"
1589 "  can be run to restore a backup version of your repository.  See\n"
1590 "  http://sun.com/msg/SMF-8000-MY for more information.\n"
1591 "\n",
1592 			    db_file,
1593 			    (fname == NULL)? ":\n\n" : " is in:\n\n    ",
1594 			    (fname == NULL)? integrity_results : fname);
1595 		}
1596 		free(errp);
1597 		goto fail;
1598 	}
1599 
1600 	/*
1601 	 * check if we are writable
1602 	 */
1603 	r = backend_is_readonly(be->be_db, be->be_path);
1604 
1605 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
1606 		free(errp);
1607 		*bep = NULL;
1608 		backend_destroy(be);
1609 		return (BACKEND_CREATE_LOCKED);
1610 	}
1611 	if (r != SQLITE_OK && r != SQLITE_FULL) {
1612 		free(errp);
1613 		be->be_readonly = 1;
1614 		*bep = be;
1615 		return (BACKEND_CREATE_READONLY);
1616 	}
1617 	*bep = be;
1618 	return (BACKEND_CREATE_SUCCESS);
1619 
1620 fail:
1621 	*bep = NULL;
1622 	backend_destroy(be);
1623 	return (BACKEND_CREATE_FAIL);
1624 }
1625 
1626 /*
1627  * (arg & -arg) is, through the magic of twos-complement arithmetic, the
1628  * lowest set bit in arg.
1629  */
1630 static size_t
1631 round_up_to_p2(size_t arg)
1632 {
1633 	/*
1634 	 * Don't allow a zero result.
1635 	 */
1636 	assert(arg > 0 && ((ssize_t)arg > 0));
1637 
1638 	while ((arg & (arg - 1)) != 0)
1639 		arg += (arg & -arg);
1640 
1641 	return (arg);
1642 }
1643 
1644 /*
1645  * Returns
1646  *   _NO_RESOURCES - out of memory
1647  *   _BACKEND_ACCESS - backend type t (other than _NORMAL) doesn't exist
1648  *   _DONE - callback aborted query
1649  *   _SUCCESS
1650  */
1651 int
1652 backend_run(backend_type_t t, backend_query_t *q,
1653     backend_run_callback_f *cb, void *data)
1654 {
1655 	char *errmsg = NULL;
1656 	int ret;
1657 	sqlite_backend_t *be;
1658 	hrtime_t ts, vts;
1659 
1660 	if (q == NULL || q->bq_buf == NULL)
1661 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1662 
1663 	if ((ret = backend_lock(t, 0, &be)) != REP_PROTOCOL_SUCCESS)
1664 		return (ret);
1665 
1666 	ts = gethrtime();
1667 	vts = gethrvtime();
1668 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
1669 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1670 	ret = backend_error(be, ret, errmsg);
1671 	backend_unlock(be);
1672 
1673 	return (ret);
1674 }
1675 
1676 /*
1677  * Starts a "read-only" transaction -- i.e., locks out writers as long
1678  * as it is active.
1679  *
1680  * Fails with
1681  *   _NO_RESOURCES - out of memory
1682  *
1683  * If t is not _NORMAL, can also fail with
1684  *   _BACKEND_ACCESS - backend does not exist
1685  *
1686  * If writable is true, can also fail with
1687  *   _BACKEND_READONLY
1688  */
1689 static int
1690 backend_tx_begin_common(backend_type_t t, backend_tx_t **txp, int writable)
1691 {
1692 	backend_tx_t *ret;
1693 	sqlite_backend_t *be;
1694 	int r;
1695 
1696 	*txp = NULL;
1697 
1698 	ret = uu_zalloc(sizeof (*ret));
1699 	if (ret == NULL)
1700 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1701 
1702 	if ((r = backend_lock(t, writable, &be)) != REP_PROTOCOL_SUCCESS) {
1703 		uu_free(ret);
1704 		return (r);
1705 	}
1706 
1707 	ret->bt_be = be;
1708 	ret->bt_readonly = !writable;
1709 	ret->bt_type = t;
1710 	ret->bt_full = 0;
1711 
1712 	*txp = ret;
1713 	return (REP_PROTOCOL_SUCCESS);
1714 }
1715 
1716 int
1717 backend_tx_begin_ro(backend_type_t t, backend_tx_t **txp)
1718 {
1719 	return (backend_tx_begin_common(t, txp, 0));
1720 }
1721 
1722 static void
1723 backend_tx_end(backend_tx_t *tx)
1724 {
1725 	sqlite_backend_t *be;
1726 
1727 	be = tx->bt_be;
1728 
1729 	if (tx->bt_full) {
1730 		struct sqlite *new;
1731 
1732 		/*
1733 		 * sqlite tends to be sticky with SQLITE_FULL, so we try
1734 		 * to get a fresh database handle if we got a FULL warning
1735 		 * along the way.  If that fails, no harm done.
1736 		 */
1737 		new = sqlite_open(be->be_path, 0600, NULL);
1738 		if (new != NULL) {
1739 			sqlite_close(be->be_db);
1740 			be->be_db = new;
1741 		}
1742 	}
1743 	backend_unlock(be);
1744 	tx->bt_be = NULL;
1745 	uu_free(tx);
1746 }
1747 
1748 void
1749 backend_tx_end_ro(backend_tx_t *tx)
1750 {
1751 	assert(tx->bt_readonly);
1752 	backend_tx_end(tx);
1753 }
1754 
1755 /*
1756  * Fails with
1757  *   _NO_RESOURCES - out of memory
1758  *   _BACKEND_ACCESS
1759  *   _BACKEND_READONLY
1760  */
1761 int
1762 backend_tx_begin(backend_type_t t, backend_tx_t **txp)
1763 {
1764 	int r;
1765 	char *errmsg;
1766 	hrtime_t ts, vts;
1767 
1768 	r = backend_tx_begin_common(t, txp, 1);
1769 	if (r != REP_PROTOCOL_SUCCESS)
1770 		return (r);
1771 
1772 	ts = gethrtime();
1773 	vts = gethrvtime();
1774 	r = sqlite_exec((*txp)->bt_be->be_db, "BEGIN TRANSACTION", NULL, NULL,
1775 	    &errmsg);
1776 	UPDATE_TOTALS((*txp)->bt_be, bt_exec, ts, vts);
1777 	if (r == SQLITE_FULL)
1778 		(*txp)->bt_full = 1;
1779 	r = backend_error((*txp)->bt_be, r, errmsg);
1780 
1781 	if (r != REP_PROTOCOL_SUCCESS) {
1782 		assert(r != REP_PROTOCOL_DONE);
1783 		(void) sqlite_exec((*txp)->bt_be->be_db,
1784 		    "ROLLBACK TRANSACTION", NULL, NULL, NULL);
1785 		backend_tx_end(*txp);
1786 		*txp = NULL;
1787 		return (r);
1788 	}
1789 
1790 	(*txp)->bt_readonly = 0;
1791 
1792 	return (REP_PROTOCOL_SUCCESS);
1793 }
1794 
1795 void
1796 backend_tx_rollback(backend_tx_t *tx)
1797 {
1798 	int r;
1799 	char *errmsg;
1800 	sqlite_backend_t *be;
1801 	hrtime_t ts, vts;
1802 
1803 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1804 	be = tx->bt_be;
1805 
1806 	ts = gethrtime();
1807 	vts = gethrvtime();
1808 	r = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
1809 	    &errmsg);
1810 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1811 	if (r == SQLITE_FULL)
1812 		tx->bt_full = 1;
1813 	(void) backend_error(be, r, errmsg);
1814 
1815 	backend_tx_end(tx);
1816 }
1817 
1818 /*
1819  * Fails with
1820  *   _NO_RESOURCES - out of memory
1821  */
1822 int
1823 backend_tx_commit(backend_tx_t *tx)
1824 {
1825 	int r, r2;
1826 	char *errmsg;
1827 	sqlite_backend_t *be;
1828 	hrtime_t ts, vts;
1829 
1830 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1831 	be = tx->bt_be;
1832 	ts = gethrtime();
1833 	vts = gethrvtime();
1834 	r = sqlite_exec(be->be_db, "COMMIT TRANSACTION", NULL, NULL,
1835 	    &errmsg);
1836 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1837 	if (r == SQLITE_FULL)
1838 		tx->bt_full = 1;
1839 
1840 	r = backend_error(be, r, errmsg);
1841 	assert(r != REP_PROTOCOL_DONE);
1842 
1843 	if (r != REP_PROTOCOL_SUCCESS) {
1844 		r2 = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
1845 		    &errmsg);
1846 		r2 = backend_error(be, r2, errmsg);
1847 		if (r2 != REP_PROTOCOL_SUCCESS)
1848 			backend_panic("cannot rollback failed commit");
1849 
1850 		backend_tx_end(tx);
1851 		return (r);
1852 	}
1853 	backend_tx_end(tx);
1854 	return (REP_PROTOCOL_SUCCESS);
1855 }
1856 
1857 static const char *
1858 id_space_to_name(enum id_space id)
1859 {
1860 	switch (id) {
1861 	case BACKEND_ID_SERVICE_INSTANCE:
1862 		return ("SI");
1863 	case BACKEND_ID_PROPERTYGRP:
1864 		return ("PG");
1865 	case BACKEND_ID_GENERATION:
1866 		return ("GEN");
1867 	case BACKEND_ID_PROPERTY:
1868 		return ("PROP");
1869 	case BACKEND_ID_VALUE:
1870 		return ("VAL");
1871 	case BACKEND_ID_SNAPNAME:
1872 		return ("SNAME");
1873 	case BACKEND_ID_SNAPSHOT:
1874 		return ("SHOT");
1875 	case BACKEND_ID_SNAPLEVEL:
1876 		return ("SLVL");
1877 	default:
1878 		abort();
1879 		/*NOTREACHED*/
1880 	}
1881 }
1882 
1883 /*
1884  * Returns a new id or 0 if the id argument is invalid or the query fails.
1885  */
1886 uint32_t
1887 backend_new_id(backend_tx_t *tx, enum id_space id)
1888 {
1889 	struct run_single_int_info info;
1890 	uint32_t new_id = 0;
1891 	const char *name = id_space_to_name(id);
1892 	char *errmsg;
1893 	int ret;
1894 	sqlite_backend_t *be;
1895 	hrtime_t ts, vts;
1896 
1897 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1898 	be = tx->bt_be;
1899 
1900 	info.rs_out = &new_id;
1901 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1902 
1903 	ts = gethrtime();
1904 	vts = gethrvtime();
1905 	ret = sqlite_exec_printf(be->be_db,
1906 	    "SELECT id_next FROM id_tbl WHERE (id_name = '%q');"
1907 	    "UPDATE id_tbl SET id_next = id_next + 1 WHERE (id_name = '%q');",
1908 	    run_single_int_callback, &info, &errmsg, name, name);
1909 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1910 	if (ret == SQLITE_FULL)
1911 		tx->bt_full = 1;
1912 
1913 	ret = backend_error(be, ret, errmsg);
1914 
1915 	if (ret != REP_PROTOCOL_SUCCESS) {
1916 		return (0);
1917 	}
1918 
1919 	return (new_id);
1920 }
1921 
1922 /*
1923  * Returns
1924  *   _NO_RESOURCES - out of memory
1925  *   _DONE - callback aborted query
1926  *   _SUCCESS
1927  */
1928 int
1929 backend_tx_run(backend_tx_t *tx, backend_query_t *q,
1930     backend_run_callback_f *cb, void *data)
1931 {
1932 	char *errmsg = NULL;
1933 	int ret;
1934 	sqlite_backend_t *be;
1935 	hrtime_t ts, vts;
1936 
1937 	assert(tx != NULL && tx->bt_be != NULL);
1938 	be = tx->bt_be;
1939 
1940 	if (q == NULL || q->bq_buf == NULL)
1941 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1942 
1943 	ts = gethrtime();
1944 	vts = gethrvtime();
1945 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
1946 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1947 	if (ret == SQLITE_FULL)
1948 		tx->bt_full = 1;
1949 	ret = backend_error(be, ret, errmsg);
1950 
1951 	return (ret);
1952 }
1953 
1954 /*
1955  * Returns
1956  *   _NO_RESOURCES - out of memory
1957  *   _NOT_FOUND - the query returned no results
1958  *   _SUCCESS - the query returned a single integer
1959  */
1960 int
1961 backend_tx_run_single_int(backend_tx_t *tx, backend_query_t *q, uint32_t *buf)
1962 {
1963 	struct run_single_int_info info;
1964 	int ret;
1965 
1966 	info.rs_out = buf;
1967 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
1968 
1969 	ret = backend_tx_run(tx, q, run_single_int_callback, &info);
1970 	assert(ret != REP_PROTOCOL_DONE);
1971 
1972 	if (ret != REP_PROTOCOL_SUCCESS)
1973 		return (ret);
1974 
1975 	return (info.rs_result);
1976 }
1977 
1978 /*
1979  * Fails with
1980  *   _NO_RESOURCES - out of memory
1981  */
1982 int
1983 backend_tx_run_update(backend_tx_t *tx, const char *format, ...)
1984 {
1985 	va_list a;
1986 	char *errmsg;
1987 	int ret;
1988 	sqlite_backend_t *be;
1989 	hrtime_t ts, vts;
1990 
1991 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
1992 	be = tx->bt_be;
1993 
1994 	va_start(a, format);
1995 	ts = gethrtime();
1996 	vts = gethrvtime();
1997 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
1998 	UPDATE_TOTALS(be, bt_exec, ts, vts);
1999 	if (ret == SQLITE_FULL)
2000 		tx->bt_full = 1;
2001 	va_end(a);
2002 	ret = backend_error(be, ret, errmsg);
2003 	assert(ret != REP_PROTOCOL_DONE);
2004 
2005 	return (ret);
2006 }
2007 
2008 /*
2009  * returns REP_PROTOCOL_FAIL_NOT_FOUND if no changes occured
2010  */
2011 int
2012 backend_tx_run_update_changed(backend_tx_t *tx, const char *format, ...)
2013 {
2014 	va_list a;
2015 	char *errmsg;
2016 	int ret;
2017 	sqlite_backend_t *be;
2018 	hrtime_t ts, vts;
2019 
2020 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
2021 	be = tx->bt_be;
2022 
2023 	va_start(a, format);
2024 	ts = gethrtime();
2025 	vts = gethrvtime();
2026 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
2027 	UPDATE_TOTALS(be, bt_exec, ts, vts);
2028 	if (ret == SQLITE_FULL)
2029 		tx->bt_full = 1;
2030 	va_end(a);
2031 
2032 	ret = backend_error(be, ret, errmsg);
2033 
2034 	return (ret);
2035 }
2036 
2037 #define	BACKEND_ADD_SCHEMA(be, file, tbls, idxs) \
2038 	(backend_add_schema((be), (file), \
2039 	    (tbls), sizeof (tbls) / sizeof (*(tbls)), \
2040 	    (idxs), sizeof (idxs) / sizeof (*(idxs))))
2041 
2042 static int
2043 backend_add_schema(sqlite_backend_t *be, const char *file,
2044     struct backend_tbl_info *tbls, int tbl_count,
2045     struct backend_idx_info *idxs, int idx_count)
2046 {
2047 	int i;
2048 	char *errmsg;
2049 	int ret;
2050 
2051 	/*
2052 	 * Create the tables.
2053 	 */
2054 	for (i = 0; i < tbl_count; i++) {
2055 		if (tbls[i].bti_name == NULL) {
2056 			assert(i + 1 == tbl_count);
2057 			break;
2058 		}
2059 		ret = sqlite_exec_printf(be->be_db,
2060 		    "CREATE TABLE %s (%s);\n",
2061 		    NULL, NULL, &errmsg, tbls[i].bti_name, tbls[i].bti_cols);
2062 
2063 		if (ret != SQLITE_OK) {
2064 			configd_critical(
2065 			    "%s: %s table creation fails: %s\n", file,
2066 			    tbls[i].bti_name, errmsg);
2067 			free(errmsg);
2068 			return (-1);
2069 		}
2070 	}
2071 
2072 	/*
2073 	 * Make indices on key tables and columns.
2074 	 */
2075 	for (i = 0; i < idx_count; i++) {
2076 		if (idxs[i].bxi_tbl == NULL) {
2077 			assert(i + 1 == idx_count);
2078 			break;
2079 		}
2080 
2081 		ret = sqlite_exec_printf(be->be_db,
2082 		    "CREATE INDEX %s_%s ON %s (%s);\n",
2083 		    NULL, NULL, &errmsg, idxs[i].bxi_tbl, idxs[i].bxi_idx,
2084 		    idxs[i].bxi_tbl, idxs[i].bxi_cols);
2085 
2086 		if (ret != SQLITE_OK) {
2087 			configd_critical(
2088 			    "%s: %s_%s index creation fails: %s\n", file,
2089 			    idxs[i].bxi_tbl, idxs[i].bxi_idx, errmsg);
2090 			free(errmsg);
2091 			return (-1);
2092 		}
2093 	}
2094 	return (0);
2095 }
2096 
2097 static int
2098 backend_init_schema(sqlite_backend_t *be, const char *db_file, backend_type_t t)
2099 {
2100 	int i;
2101 	char *errmsg;
2102 	int ret;
2103 
2104 	assert(t == BACKEND_TYPE_NORMAL || t == BACKEND_TYPE_NONPERSIST);
2105 
2106 	if (t == BACKEND_TYPE_NORMAL) {
2107 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_normal, idxs_normal);
2108 	} else if (t == BACKEND_TYPE_NONPERSIST) {
2109 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_np, idxs_np);
2110 	} else {
2111 		abort();		/* can't happen */
2112 	}
2113 
2114 	if (ret < 0) {
2115 		return (ret);
2116 	}
2117 
2118 	ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_common, idxs_common);
2119 	if (ret < 0) {
2120 		return (ret);
2121 	}
2122 
2123 	/*
2124 	 * Add the schema version to the table
2125 	 */
2126 	ret = sqlite_exec_printf(be->be_db,
2127 	    "INSERT INTO schema_version (schema_version) VALUES (%d)",
2128 	    NULL, NULL, &errmsg, BACKEND_SCHEMA_VERSION);
2129 	if (ret != SQLITE_OK) {
2130 		configd_critical(
2131 		    "setting schema version fails: %s\n", errmsg);
2132 		free(errmsg);
2133 	}
2134 
2135 	/*
2136 	 * Populate id_tbl with initial IDs.
2137 	 */
2138 	for (i = 0; i < BACKEND_ID_INVALID; i++) {
2139 		const char *name = id_space_to_name(i);
2140 
2141 		ret = sqlite_exec_printf(be->be_db,
2142 		    "INSERT INTO id_tbl (id_name, id_next) "
2143 		    "VALUES ('%q', %d);", NULL, NULL, &errmsg, name, 1);
2144 		if (ret != SQLITE_OK) {
2145 			configd_critical(
2146 			    "id insertion for %s fails: %s\n", name, errmsg);
2147 			free(errmsg);
2148 			return (-1);
2149 		}
2150 	}
2151 	/*
2152 	 * Set the persistance of the database.  The normal database is marked
2153 	 * "synchronous", so that all writes are synchronized to stable storage
2154 	 * before proceeding.
2155 	 */
2156 	ret = sqlite_exec_printf(be->be_db,
2157 	    "PRAGMA default_synchronous = %s; PRAGMA synchronous = %s;",
2158 	    NULL, NULL, &errmsg,
2159 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF",
2160 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF");
2161 	if (ret != SQLITE_OK) {
2162 		configd_critical("pragma setting fails: %s\n", errmsg);
2163 		free(errmsg);
2164 		return (-1);
2165 	}
2166 
2167 	return (0);
2168 }
2169 
2170 int
2171 backend_init(const char *db_file, const char *npdb_file, int have_np)
2172 {
2173 	sqlite_backend_t *be;
2174 	int r;
2175 	int writable_persist = 1;
2176 
2177 	/* set up our temporary directory */
2178 	sqlite_temp_directory = "/etc/svc/volatile";
2179 
2180 	if (strcmp(SQLITE_VERSION, sqlite_version) != 0) {
2181 		configd_critical("Mismatched link!  (%s should be %s)\n",
2182 		    sqlite_version, SQLITE_VERSION);
2183 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2184 	}
2185 
2186 	/*
2187 	 * If the system crashed during a backend switch, there might
2188 	 * be a leftover transient database which contains useful
2189 	 * information which can be used for recovery.
2190 	 */
2191 	backend_switch_recovery();
2192 
2193 	if (db_file == NULL)
2194 		db_file = REPOSITORY_DB;
2195 	if (strcmp(db_file, REPOSITORY_DB) != 0) {
2196 		is_main_repository = 0;
2197 	}
2198 
2199 	r = backend_create(BACKEND_TYPE_NORMAL, db_file, &be);
2200 	switch (r) {
2201 	case BACKEND_CREATE_FAIL:
2202 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2203 	case BACKEND_CREATE_LOCKED:
2204 		return (CONFIGD_EXIT_DATABASE_LOCKED);
2205 	case BACKEND_CREATE_SUCCESS:
2206 		break;		/* success */
2207 	case BACKEND_CREATE_READONLY:
2208 		writable_persist = 0;
2209 		break;
2210 	case BACKEND_CREATE_NEED_INIT:
2211 		if (backend_init_schema(be, db_file, BACKEND_TYPE_NORMAL)) {
2212 			backend_destroy(be);
2213 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2214 		}
2215 		break;
2216 	default:
2217 		abort();
2218 		/*NOTREACHED*/
2219 	}
2220 	backend_create_finish(BACKEND_TYPE_NORMAL, be);
2221 
2222 	if (have_np) {
2223 		if (npdb_file == NULL)
2224 			npdb_file = NONPERSIST_DB;
2225 
2226 		r = backend_create(BACKEND_TYPE_NONPERSIST, npdb_file, &be);
2227 		switch (r) {
2228 		case BACKEND_CREATE_SUCCESS:
2229 			break;		/* success */
2230 		case BACKEND_CREATE_FAIL:
2231 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2232 		case BACKEND_CREATE_LOCKED:
2233 			return (CONFIGD_EXIT_DATABASE_LOCKED);
2234 		case BACKEND_CREATE_READONLY:
2235 			configd_critical("%s: unable to write\n", npdb_file);
2236 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2237 		case BACKEND_CREATE_NEED_INIT:
2238 			if (backend_init_schema(be, db_file,
2239 			    BACKEND_TYPE_NONPERSIST)) {
2240 				backend_destroy(be);
2241 				return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
2242 			}
2243 			break;
2244 		default:
2245 			abort();
2246 			/*NOTREACHED*/
2247 		}
2248 		backend_create_finish(BACKEND_TYPE_NONPERSIST, be);
2249 
2250 		/*
2251 		 * If we started up with a writable filesystem, but the
2252 		 * non-persistent database needed initialization, we
2253 		 * are booting a non-global zone, so do a backup.
2254 		 */
2255 		if (r == BACKEND_CREATE_NEED_INIT && writable_persist &&
2256 		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
2257 		    REP_PROTOCOL_SUCCESS) {
2258 			if (backend_create_backup_locked(be,
2259 			    REPOSITORY_BOOT_BACKUP) != REP_PROTOCOL_SUCCESS) {
2260 				configd_critical(
2261 				    "unable to create \"%s\" backup of "
2262 				    "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
2263 				    be->be_path);
2264 			}
2265 			backend_unlock(be);
2266 		}
2267 	}
2268 	return (CONFIGD_EXIT_OKAY);
2269 }
2270 
2271 /*
2272  * quiesce all database activity prior to exiting
2273  */
2274 void
2275 backend_fini(void)
2276 {
2277 	sqlite_backend_t *be_normal, *be_np;
2278 
2279 	(void) backend_lock(BACKEND_TYPE_NORMAL, 1, &be_normal);
2280 	(void) backend_lock(BACKEND_TYPE_NONPERSIST, 1, &be_np);
2281 }
2282 
2283 #define	QUERY_BASE	128
2284 backend_query_t *
2285 backend_query_alloc(void)
2286 {
2287 	backend_query_t *q;
2288 	q = calloc(1, sizeof (backend_query_t));
2289 	if (q != NULL) {
2290 		q->bq_size = QUERY_BASE;
2291 		q->bq_buf = calloc(1, q->bq_size);
2292 		if (q->bq_buf == NULL) {
2293 			q->bq_size = 0;
2294 		}
2295 
2296 	}
2297 	return (q);
2298 }
2299 
2300 void
2301 backend_query_append(backend_query_t *q, const char *value)
2302 {
2303 	char *alloc;
2304 	int count;
2305 	size_t size, old_len;
2306 
2307 	if (q == NULL) {
2308 		/* We'll discover the error when we try to run the query. */
2309 		return;
2310 	}
2311 
2312 	while (q->bq_buf != NULL) {
2313 		old_len = strlen(q->bq_buf);
2314 		size = q->bq_size;
2315 		count = strlcat(q->bq_buf, value, size);
2316 
2317 		if (count < size)
2318 			break;				/* success */
2319 
2320 		q->bq_buf[old_len] = 0;
2321 		size = round_up_to_p2(count + 1);
2322 
2323 		assert(size > q->bq_size);
2324 		alloc = realloc(q->bq_buf, size);
2325 		if (alloc == NULL) {
2326 			free(q->bq_buf);
2327 			q->bq_buf = NULL;
2328 			break;				/* can't grow */
2329 		}
2330 
2331 		q->bq_buf = alloc;
2332 		q->bq_size = size;
2333 	}
2334 }
2335 
2336 void
2337 backend_query_add(backend_query_t *q, const char *format, ...)
2338 {
2339 	va_list args;
2340 	char *new;
2341 
2342 	if (q == NULL || q->bq_buf == NULL)
2343 		return;
2344 
2345 	va_start(args, format);
2346 	new = sqlite_vmprintf(format, args);
2347 	va_end(args);
2348 
2349 	if (new == NULL) {
2350 		free(q->bq_buf);
2351 		q->bq_buf = NULL;
2352 		return;
2353 	}
2354 
2355 	backend_query_append(q, new);
2356 
2357 	free(new);
2358 }
2359 
2360 void
2361 backend_query_free(backend_query_t *q)
2362 {
2363 	if (q != NULL) {
2364 		if (q->bq_buf != NULL) {
2365 			free(q->bq_buf);
2366 		}
2367 		free(q);
2368 	}
2369 }
2370