1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 2010, 2013 Oracle and/or its affiliates.  All rights reserved.
5  */
6 
7 /*
8 ** This file contains code used to implement the VACUUM command.
9 */
10 #include "sqliteInt.h"
11 #include "btreeInt.h"
12 #include "vdbeInt.h"
13 
14 #if !defined(SQLITE_OMIT_VACUUM)
15 /*
16 ** The non-standard VACUUM command is used to clean up the database,
17 ** collapse free space, etc.  It is modelled after the VACUUM command
18 ** in PostgreSQL.
19 */
sqlite3Vacuum(Parse * pParse)20 void sqlite3Vacuum(Parse *pParse) {
21 	Vdbe *v = sqlite3GetVdbe(pParse);
22 
23 	if (v)
24 		sqlite3VdbeAddOp2(v, OP_Vacuum, 0, 0);
25 }
26 
btreeVacuum(Btree * p,char ** pzErrMsg)27 int btreeVacuum(Btree *p, char **pzErrMsg) {
28 	sqlite3 *db;
29 	int rc;
30 	u_int32_t truncatedPages;
31 
32 	db = p->db;
33 
34 	/* Return directly if vacuum is on progress */
35 	if (p->inVacuum)
36 		return SQLITE_OK;
37 
38 	/*
39 	 * We're going to do updates in this transaction at the Berkeley DB
40 	 * Core level (i.e., call DB->compact), but we start it read-only at
41 	 * the SQL level to avoid overhead from checkpoint-on-commit.
42 	 */
43 	if ((rc = btreeBeginTransInternal(p, 0)) != SQLITE_OK) {
44 		sqlite3SetString(pzErrMsg, db,
45 		    "failed to begin a vacuum transaction");
46 		return rc;
47 	}
48 
49 	p->inVacuum = 1;
50 
51 	truncatedPages = 0;
52 	/* Go through all tables */
53 	do {
54 		rc = btreeIncrVacuum(p, &truncatedPages);
55 	} while (rc == SQLITE_OK);
56 	p->needVacuum = 0;
57 
58 	if (rc != SQLITE_DONE) {
59 		sqlite3SetString(pzErrMsg, db,
60 		    "error during vacuum, rolled back");
61 		(void)sqlite3BtreeRollback(p);
62 	} else if ((rc = sqlite3BtreeCommit(p)) != SQLITE_OK) {
63 		sqlite3SetString(pzErrMsg, db,
64 		    "failed to commit the vacuum transaction");
65 	}
66 
67 	p->inVacuum = 0;
68 
69 	return rc;
70 }
71 
72 /*
73 ** Free internal link list of vacuum info for Btree object
74 **/
btreeFreeVacuumInfo(Btree * p)75 void btreeFreeVacuumInfo(Btree *p)
76 {
77 	struct VacuumInfo *pInfo, *pInfoNext;
78 
79 	/* Free DBT for vacuum start */
80 	for (pInfo = p->vacuumInfo; pInfo != NULL; pInfo = pInfoNext) {
81 		pInfoNext = pInfo->next;
82 		if (pInfo->start.data)
83 			sqlite3_free(pInfo->start.data);
84 		sqlite3_free(pInfo);
85 	}
86 	p->vacuumInfo = NULL;
87 	p->needVacuum = 0;
88 	return;
89 }
90 
91 /*
92 ** A write transaction must be opened before calling this function.
93 ** It performs a single unit of work towards an incremental vacuum.
94 ** Specifically, in the Berkeley DB storage manager, it attempts to compact
95 ** one table.
96 **
97 ** If the incremental vacuum is finished after this function has run,
98 ** SQLITE_DONE is returned. If it is not finished, but no error occurred,
99 ** SQLITE_OK is returned. Otherwise an SQLite error code.
100 **
101 ** The caller can get and accumulate the number of truncated pages truncated
102 ** with input parameter truncatedPages. Also, btreeIncrVacuum would skip
103 ** the vacuum if enough pages has been truncated for optimization.
104 */
btreeIncrVacuum(Btree * p,u_int32_t * truncatedPages)105 int btreeIncrVacuum(Btree *p, u_int32_t *truncatedPages)
106 {
107 	BtShared *pBt;
108 	CACHED_DB *cached_db;
109 	DB *dbp;
110 	DBT key, data;
111 	char *fileName, *tableName, tableNameBuf[DBNAME_SIZE];
112 	void *app;
113 	int iTable, rc, ret, t_ret;
114 	u_int32_t was_create;
115 	DB_COMPACT compact_data;
116 	DBT *pStart, end;	/* start/end of db_compact() */
117 	struct VacuumInfo *pInfo;
118 	int vacuumMode;
119 
120 	assert(p->pBt->dbStorage == DB_STORE_NAMED);
121 
122 	if (!p->connected && (rc = btreeOpenEnvironment(p, 1)) != SQLITE_OK)
123 		return rc;
124 
125 	pBt = p->pBt;
126 	rc = SQLITE_OK;
127 	cached_db = NULL;
128 	dbp = NULL;
129 	memset(&end, 0, sizeof(end));
130 #ifndef BDBSQL_OMIT_LEAKCHECK
131 	/* Let BDB use the user-specified malloc function (btreeMalloc) */
132 	end.flags |= DB_DBT_MALLOC;
133 #endif
134 
135 	/*
136 	 * Turn off DB_CREATE: we don't want to create any tables that don't
137 	 * already exist.
138 	 */
139 	was_create = (pBt->db_oflags & DB_CREATE);
140 	pBt->db_oflags &= ~DB_CREATE;
141 
142 	memset(&key, 0, sizeof(key));
143 	key.data = tableNameBuf;
144 	key.ulen = sizeof(tableNameBuf);
145 	key.flags = DB_DBT_USERMEM;
146 	memset(&data, 0, sizeof(data));
147 	data.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM;
148 
149 	UPDATE_DURING_BACKUP(p);
150 
151 	if (p->compact_cursor == NULL) {
152 		if ((ret = pTablesDb->cursor(pTablesDb, pReadTxn,
153 		    &p->compact_cursor, 0)) != 0)
154 			goto err;
155 	}
156 	if ((ret = p->compact_cursor->get(p->compact_cursor,
157 	    &key, &data, DB_NEXT)) == DB_NOTFOUND) {
158 		(void)p->compact_cursor->close(p->compact_cursor);
159 		p->compact_cursor = NULL;
160 		pBt->db_oflags |= was_create;
161 		return SQLITE_DONE;
162 	} else if (ret != 0)
163 		goto err;
164 
165 	tableNameBuf[key.size] = '\0';
166 	if (strncmp(tableNameBuf, "table", 5) != 0) {
167 		iTable = 0;
168 #ifdef BDBSQL_FILE_PER_TABLE
169 		/* Cannot compact the metadata file */
170 		goto err;
171 #endif
172 
173 		/* Open a DB handle on that table. */
174 		if ((ret = db_create(&dbp, pDbEnv, 0)) != 0)
175 			goto err;
176 		if (pBt->encrypted &&
177 		    (ret = dbp->set_flags(dbp, DB_ENCRYPT)) != 0)
178 			goto err;
179 
180 		tableName = tableNameBuf;
181 		FIX_TABLENAME(pBt, fileName, tableName);
182 
183 		/*
184 		 * We know we're not creating this table, open it using the
185 		 * family transaction because that keeps the dbreg records out
186 		 * of the vacuum transaction, reducing pressure on the log
187 		 * region (since we copy the filename of every open DB handle
188 		 * into the log region).
189 		 */
190 		if ((ret = dbp->open(dbp, pFamilyTxn, fileName, tableName,
191 		    DB_BTREE, GET_AUTO_COMMIT(pBt, pFamilyTxn), 0)) != 0)
192 			goto err;
193 	} else {
194 		if ((ret = btreeTableNameToId(tableNameBuf,
195 		    key.size, &iTable)) != 0)
196 			goto err;
197 
198 		/* Try to retrieve the matching handle from the cache. */
199 		rc = btreeFindOrCreateDataTable(p, &iTable, &cached_db, 0);
200 		if (rc != SQLITE_OK)
201 			goto err;
202 		assert(cached_db != NULL && cached_db->dbp != NULL);
203 
204 		dbp = cached_db->dbp;
205 		if ((iTable & 1) == 0) {
206 			/*
207 			 * Attach the DB handle to a SQLite index, required for
208 			 * the key comparator to work correctly.  If we can't
209 			 * find an Index struct, just skip this database.  It
210 			 * may not be open yet (c.f. whereA-1.7).
211 			 */
212 #ifdef BDBSQL_SINGLE_THREAD
213 			rc = btreeGetKeyInfo(p, iTable,
214 			    (KeyInfo **)&(dbp->app_private));
215 #else
216 			rc = btreeGetKeyInfo(p, iTable,
217 			    &((TableInfo *)dbp->app_private)->pKeyInfo);
218 #endif
219 			if (rc != SQLITE_OK)
220 				goto err;
221 		}
222 	}
223 
224 	/*
225 	 * In following db_compact, we use the family transaction because
226 	 * DB->compact will then auto-commit, and it has built-in smarts
227 	 * about retrying on deadlock.
228 	 */
229 	/* Setup compact_data as configured */
230 	memset(&compact_data, 0, sizeof(compact_data));
231 	compact_data.compact_fillpercent = p->fillPercent;
232 
233 	vacuumMode = sqlite3BtreeGetAutoVacuum(p);
234 	if (vacuumMode == BTREE_AUTOVACUUM_NONE) {
235 		ret = dbp->compact(dbp, pFamilyTxn,
236 		    NULL, NULL, &compact_data, DB_FREE_SPACE, NULL);
237 	/* Skip current table if we have truncated enough pages */
238 	} else if (truncatedPages == NULL ||
239 	    (truncatedPages != NULL && *truncatedPages < p->vacuumPages)) {
240 		/* Find DBT for db_compact start */
241 		for (pInfo = p->vacuumInfo, pStart = NULL;
242 		     pInfo != NULL; pInfo = pInfo->next) {
243 			if (pInfo->iTable == iTable)
244 				break;
245 		}
246 
247 		/* Create new VacuumInfo for current iTable as needed */
248 		if (pInfo == NULL) {
249 			/* Create info for current iTable */
250 			if ((pInfo = (struct VacuumInfo *)sqlite3_malloc(
251 			    sizeof(struct VacuumInfo))) == NULL) {
252 				rc = SQLITE_NOMEM;
253 				goto err;
254 			}
255 			memset(pInfo, 0, sizeof(struct VacuumInfo));
256 			pInfo->iTable = iTable;
257 			pInfo->next = p->vacuumInfo;
258 			p->vacuumInfo = pInfo;
259 		}
260 		pStart = &(pInfo->start);
261 
262 		/* Do page compact for IncrVacuum */
263 		if (vacuumMode == BTREE_AUTOVACUUM_INCR) {
264 			/* Do compact with given arguments */
265 			compact_data.compact_pages = p->vacuumPages;
266 			if ((ret = dbp->compact(dbp, pFamilyTxn,
267 				(pStart->data == NULL) ? NULL : pStart,
268 				NULL, &compact_data, 0, &end)) != 0)
269 				goto err;
270 
271 			/* Save current vacuum position */
272 			if (pStart->data != NULL)
273 				sqlite3_free(pStart->data);
274 			memcpy(pStart, &end, sizeof(DBT));
275 			memset(&end, 0, sizeof(end));
276 
277 			/* Rewind to start if we reach the end of subdb */
278 			if (compact_data.compact_pages_free < p->vacuumPages ||
279 			    p->vacuumPages == 0) {
280 				if (pStart->data != NULL)
281 					sqlite3_free(pStart->data);
282 				memset(pStart, 0, sizeof(DBT));
283 			}
284 		}
285 		/* Because of the one-pass nature of the compaction algorithm,
286 		 * any unemptied page near the end of the file inhibits
287 		 * returning pages to the file system.
288 		 * A repeated call to the DB->compact() method with a low
289 		 * compact_fillpercent may be used to return pages in this case.
290 		 */
291 		memset(&compact_data, 0, sizeof(compact_data));
292 		compact_data.compact_fillpercent = 1;
293 		if ((ret = dbp->compact(dbp, pFamilyTxn, NULL, NULL,
294 			    &compact_data, DB_FREE_SPACE, NULL)) != 0)
295 			goto err;
296 		if (truncatedPages != NULL && *truncatedPages > 0)
297 			*truncatedPages += compact_data.compact_pages_truncated;
298 	}
299 
300 err:	/* Free cursor and DBT if run into error */
301 	if (ret != 0) {
302 		if (p->compact_cursor != NULL) {
303 			(void)p->compact_cursor->close(p->compact_cursor);
304 			p->compact_cursor = NULL;
305 		}
306 		if (end.data != NULL)
307 			sqlite3_free(end.data);
308 		btreeFreeVacuumInfo(p);
309 	}
310 
311 	if (cached_db != NULL) {
312 #ifdef BDBSQL_SINGLE_THREAD
313 		if ((app = dbp->app_private) != NULL)
314 			sqlite3DbFree(p->db, app);
315 #else
316 		if (dbp->app_private != NULL &&
317 		    (app = ((TableInfo *)dbp->app_private)->pKeyInfo) != NULL) {
318 			sqlite3DbFree(p->db, app);
319 			((TableInfo *)dbp->app_private)->pKeyInfo = NULL;
320 		}
321 #endif
322 	} else if (dbp != NULL) {
323 		app = dbp->app_private;
324 		if ((t_ret = dbp->close(dbp, DB_NOSYNC)) != 0 && ret == 0)
325 			ret = t_ret;
326 		if (app != NULL)
327 			sqlite3DbFree(p->db, app);
328 	}
329 
330 	pBt->db_oflags |= was_create;
331 
332 	return MAP_ERR(rc, ret, p);
333 }
334 
335 /*
336 ** This routine implements the OP_Vacuum opcode of the VDBE.
337 */
sqlite3RunVacuum(char ** pzErrMsg,sqlite3 * db)338 int sqlite3RunVacuum(char **pzErrMsg, sqlite3 *db) {
339 	int rc;
340 	Btree *p;
341 
342 	p = db->aDb[0].pBt;
343 	rc = SQLITE_OK;
344 
345 	if (p->pBt->dbStorage != DB_STORE_NAMED)
346 		return SQLITE_OK;
347 
348 	if ((rc = sqlite3Init(db, pzErrMsg)) != SQLITE_OK)
349 		return rc;
350 
351 	if (!db->autoCommit) {
352 		sqlite3SetString(pzErrMsg, db,
353 		    "cannot VACUUM from within a transaction");
354 		return SQLITE_ERROR;
355 	}
356 
357 	assert(sqlite3_mutex_held(db->mutex));
358 	rc = btreeVacuum(p, pzErrMsg);
359 
360 	return rc;
361 }
362 #endif  /* SQLITE_OMIT_VACUUM */
363