1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 #include "wt_internal.h"
10 
11 static void __free_page_modify(WT_SESSION_IMPL *, WT_PAGE *);
12 static void __free_page_col_var(WT_SESSION_IMPL *, WT_PAGE *);
13 static void __free_page_int(WT_SESSION_IMPL *, WT_PAGE *);
14 static void __free_page_row_leaf(WT_SESSION_IMPL *, WT_PAGE *);
15 static void __free_skip_array(
16 		WT_SESSION_IMPL *, WT_INSERT_HEAD **, uint32_t, bool);
17 static void __free_skip_list(WT_SESSION_IMPL *, WT_INSERT *, bool);
18 static void __free_update(WT_SESSION_IMPL *, WT_UPDATE **, uint32_t, bool);
19 
20 /*
21  * __wt_ref_out --
22  *	Discard an in-memory page, freeing all memory associated with it.
23  */
24 void
__wt_ref_out(WT_SESSION_IMPL * session,WT_REF * ref)25 __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref)
26 {
27 	/*
28 	 * A version of the page-out function that allows us to make additional
29 	 * diagnostic checks.
30 	 *
31 	 * The WT_REF cannot be the eviction thread's location.
32 	 */
33 	WT_ASSERT(session, S2BT(session)->evict_ref != ref);
34 
35 	/*
36 	 * Make sure no other thread has a hazard pointer on the page we are
37 	 * about to discard.  This is complicated by the fact that readers
38 	 * publish their hazard pointer before re-checking the page state, so
39 	 * our check can race with readers without indicating a real problem.
40 	 * If we find a hazard pointer, wait for it to be cleared.
41 	 */
42 	WT_ASSERT(session, __wt_hazard_check_assert(session, ref, true));
43 
44 	__wt_page_out(session, &ref->page);
45 }
46 
47 /*
48  * __wt_page_out --
49  *	Discard an in-memory page, freeing all memory associated with it.
50  */
51 void
__wt_page_out(WT_SESSION_IMPL * session,WT_PAGE ** pagep)52 __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
53 {
54 	WT_PAGE *page;
55 	WT_PAGE_HEADER *dsk;
56 	WT_PAGE_MODIFY *mod;
57 
58 	/*
59 	 * Kill our caller's reference, do our best to catch races.
60 	 */
61 	page = *pagep;
62 	*pagep = NULL;
63 
64 	/*
65 	 * Unless we have a dead handle or we're closing the database, we
66 	 * should never discard a dirty page.  We do ordinary eviction from
67 	 * dead trees until sweep gets to them, so we may not in the
68 	 * WT_SYNC_DISCARD loop.
69 	 */
70 	if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
71 	    F_ISSET(S2C(session), WT_CONN_CLOSING))
72 		__wt_page_modify_clear(session, page);
73 
74 	/* Assert we never discard a dirty page or a page queue for eviction. */
75 	WT_ASSERT(session, !__wt_page_is_modified(page));
76 	WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU));
77 
78 	/*
79 	 * If a root page split, there may be one or more pages linked from the
80 	 * page; walk the list, discarding pages.
81 	 */
82 	switch (page->type) {
83 	case WT_PAGE_COL_INT:
84 	case WT_PAGE_ROW_INT:
85 		mod = page->modify;
86 		if (mod != NULL && mod->mod_root_split != NULL)
87 			__wt_page_out(session, &mod->mod_root_split);
88 		break;
89 	}
90 
91 	/* Update the cache's information. */
92 	__wt_cache_page_evict(session, page);
93 
94 	dsk = (WT_PAGE_HEADER *)page->dsk;
95 	if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
96 		__wt_cache_page_image_decr(session, dsk->mem_size);
97 
98 	/* Discard any mapped image. */
99 	if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
100 		(void)S2BT(session)->bm->map_discard(
101 		    S2BT(session)->bm, session, dsk, (size_t)dsk->mem_size);
102 
103 	/*
104 	 * If discarding the page as part of process exit, the application may
105 	 * configure to leak the memory rather than do the work.
106 	 */
107 	if (F_ISSET(S2C(session), WT_CONN_LEAK_MEMORY))
108 		return;
109 
110 	/* Free the page modification information. */
111 	if (page->modify != NULL)
112 		__free_page_modify(session, page);
113 
114 	switch (page->type) {
115 	case WT_PAGE_COL_FIX:
116 		break;
117 	case WT_PAGE_COL_INT:
118 	case WT_PAGE_ROW_INT:
119 		__free_page_int(session, page);
120 		break;
121 	case WT_PAGE_COL_VAR:
122 		__free_page_col_var(session, page);
123 		break;
124 	case WT_PAGE_ROW_LEAF:
125 		__free_page_row_leaf(session, page);
126 		break;
127 	}
128 
129 	/* Discard any allocated disk image. */
130 	if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
131 		__wt_overwrite_and_free_len(session, dsk, dsk->mem_size);
132 
133 	__wt_overwrite_and_free(session, page);
134 }
135 
136 /*
137  * __free_page_modify --
138  *	Discard the page's associated modification structures.
139  */
140 static void
__free_page_modify(WT_SESSION_IMPL * session,WT_PAGE * page)141 __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
142 {
143 	WT_INSERT_HEAD *append;
144 	WT_MULTI *multi;
145 	WT_PAGE_MODIFY *mod;
146 	uint32_t i;
147 	bool update_ignore;
148 
149 	mod = page->modify;
150 
151 	/* In some failed-split cases, we can't discard updates. */
152 	update_ignore = F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE);
153 
154 	switch (mod->rec_result) {
155 	case WT_PM_REC_MULTIBLOCK:
156 		/* Free list of replacement blocks. */
157 		for (multi = mod->mod_multi,
158 		    i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
159 			switch (page->type) {
160 			case WT_PAGE_ROW_INT:
161 			case WT_PAGE_ROW_LEAF:
162 				__wt_free(session, multi->key.ikey);
163 				break;
164 			}
165 			__wt_free(session, multi->supd);
166 			__wt_free(session, multi->disk_image);
167 			__wt_free(session, multi->addr.addr);
168 		}
169 		__wt_free(session, mod->mod_multi);
170 		break;
171 	case WT_PM_REC_REPLACE:
172 		/*
173 		 * Discard any replacement address: this memory is usually moved
174 		 * into the parent's WT_REF, but at the root that can't happen.
175 		 */
176 		__wt_free(session, mod->mod_replace.addr);
177 		break;
178 	}
179 
180 	switch (page->type) {
181 	case WT_PAGE_COL_FIX:
182 	case WT_PAGE_COL_VAR:
183 		/* Free the append array. */
184 		if ((append = WT_COL_APPEND(page)) != NULL) {
185 			__free_skip_list(
186 			    session, WT_SKIP_FIRST(append), update_ignore);
187 			__wt_free(session, append);
188 			__wt_free(session, mod->mod_col_append);
189 		}
190 
191 		/* Free the insert/update array. */
192 		if (mod->mod_col_update != NULL)
193 			__free_skip_array(session, mod->mod_col_update,
194 			    page->type ==
195 			    WT_PAGE_COL_FIX ? 1 : page->entries, update_ignore);
196 		break;
197 	case WT_PAGE_ROW_LEAF:
198 		/*
199 		 * Free the insert array.
200 		 *
201 		 * Row-store tables have one additional slot in the insert array
202 		 * (the insert array has an extra slot to hold keys that sort
203 		 * before keys found on the original page).
204 		 */
205 		if (mod->mod_row_insert != NULL)
206 			__free_skip_array(session, mod->mod_row_insert,
207 			    page->entries + 1, update_ignore);
208 
209 		/* Free the update array. */
210 		if (mod->mod_row_update != NULL)
211 			__free_update(session, mod->mod_row_update,
212 			    page->entries, update_ignore);
213 		break;
214 	}
215 
216 	/* Free the overflow on-page, reuse and transaction-cache skiplists. */
217 	__wt_ovfl_reuse_free(session, page);
218 	__wt_ovfl_discard_free(session, page);
219 	__wt_ovfl_discard_remove(session, page);
220 
221 	__wt_free(session, page->modify->ovfl_track);
222 	__wt_spin_destroy(session, &page->modify->page_lock);
223 
224 	__wt_free(session, page->modify);
225 }
226 
227 /*
228  * __free_page_int --
229  *	Discard a WT_PAGE_COL_INT or WT_PAGE_ROW_INT page.
230  */
231 static void
__free_page_int(WT_SESSION_IMPL * session,WT_PAGE * page)232 __free_page_int(WT_SESSION_IMPL *session, WT_PAGE *page)
233 {
234 	__wt_free_ref_index(session, page, WT_INTL_INDEX_GET_SAFE(page), false);
235 }
236 
237 /*
238  * __wt_free_ref --
239  *	Discard the contents of a WT_REF structure (optionally including the
240  * pages it references).
241  */
242 void
__wt_free_ref(WT_SESSION_IMPL * session,WT_REF * ref,int page_type,bool free_pages)243 __wt_free_ref(
244     WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages)
245 {
246 	WT_IKEY *ikey;
247 
248 	if (ref == NULL)
249 		return;
250 
251 	/* Assert there are no hazard pointers. */
252 	WT_ASSERT(session, __wt_hazard_check_assert(session, ref, false));
253 
254 	/*
255 	 * Optionally free the referenced pages.  (The path to free referenced
256 	 * page is used for error cleanup, no instantiated and then discarded
257 	 * page should have WT_REF entries with real pages.  The page may have
258 	 * been marked dirty as well; page discard checks for that, so we mark
259 	 * it clean explicitly.)
260 	 */
261 	if (free_pages && ref->page != NULL) {
262 		__wt_page_modify_clear(session, ref->page);
263 		__wt_page_out(session, &ref->page);
264 	}
265 
266 	/*
267 	 * Optionally free row-store WT_REF key allocation. Historic versions of
268 	 * this code looked in a passed-in page argument, but that is dangerous,
269 	 * some of our error-path callers create WT_REF structures without ever
270 	 * setting WT_REF.home or having a parent page to which the WT_REF will
271 	 * be linked. Those WT_REF structures invariably have instantiated keys,
272 	 * (they obviously cannot be on-page keys), and we must free the memory.
273 	 */
274 	switch (page_type) {
275 	case WT_PAGE_ROW_INT:
276 	case WT_PAGE_ROW_LEAF:
277 		if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
278 			__wt_free(session, ikey);
279 		break;
280 	}
281 
282 	/* Free any address allocation. */
283 	__wt_ref_addr_free(session, ref);
284 
285 	/* Free any lookaside or page-deleted information. */
286 	__wt_free(session, ref->page_las);
287 	if (ref->page_del != NULL) {
288 		__wt_free(session, ref->page_del->update_list);
289 		__wt_free(session, ref->page_del);
290 	}
291 
292 	__wt_overwrite_and_free(session, ref);
293 }
294 
295 /*
296  * __wt_free_ref_index --
297  *	Discard a page index and its references.
298  */
299 void
__wt_free_ref_index(WT_SESSION_IMPL * session,WT_PAGE * page,WT_PAGE_INDEX * pindex,bool free_pages)300 __wt_free_ref_index(WT_SESSION_IMPL *session,
301     WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages)
302 {
303 	uint32_t i;
304 
305 	if (pindex == NULL)
306 		return;
307 
308 	for (i = 0; i < pindex->entries; ++i)
309 		__wt_free_ref(
310 		    session, pindex->index[i], page->type, free_pages);
311 	__wt_free(session, pindex);
312 }
313 
314 /*
315  * __free_page_col_var --
316  *	Discard a WT_PAGE_COL_VAR page.
317  */
318 static void
__free_page_col_var(WT_SESSION_IMPL * session,WT_PAGE * page)319 __free_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page)
320 {
321 	/* Free the RLE lookup array. */
322 	__wt_free(session, page->u.col_var.repeats);
323 }
324 
325 /*
326  * __free_page_row_leaf --
327  *	Discard a WT_PAGE_ROW_LEAF page.
328  */
329 static void
__free_page_row_leaf(WT_SESSION_IMPL * session,WT_PAGE * page)330 __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
331 {
332 	WT_IKEY *ikey;
333 	WT_ROW *rip;
334 	uint32_t i;
335 	void *copy;
336 
337 	/*
338 	 * Free the in-memory index array.
339 	 *
340 	 * For each entry, see if the key was an allocation (that is, if it
341 	 * points somewhere other than the original page), and if so, free
342 	 * the memory.
343 	 */
344 	WT_ROW_FOREACH(page, rip, i) {
345 		copy = WT_ROW_KEY_COPY(rip);
346 		(void)__wt_row_leaf_key_info(
347 		    page, copy, &ikey, NULL, NULL, NULL);
348 		__wt_free(session, ikey);
349 	}
350 }
351 
352 /*
353  * __free_skip_array --
354  *	Discard an array of skip list headers.
355  */
356 static void
__free_skip_array(WT_SESSION_IMPL * session,WT_INSERT_HEAD ** head_arg,uint32_t entries,bool update_ignore)357 __free_skip_array(WT_SESSION_IMPL *session,
358     WT_INSERT_HEAD **head_arg, uint32_t entries, bool update_ignore)
359 {
360 	WT_INSERT_HEAD **head;
361 
362 	/*
363 	 * For each non-NULL slot in the page's array of inserts, free the
364 	 * linked list anchored in that slot.
365 	 */
366 	for (head = head_arg; entries > 0; --entries, ++head)
367 		if (*head != NULL) {
368 			__free_skip_list(
369 			    session, WT_SKIP_FIRST(*head), update_ignore);
370 			__wt_free(session, *head);
371 		}
372 
373 	/* Free the header array. */
374 	__wt_free(session, head_arg);
375 }
376 
377 /*
378  * __free_skip_list --
379  *	Walk a WT_INSERT forward-linked list and free the per-thread combination
380  * of a WT_INSERT structure and its associated chain of WT_UPDATE structures.
381  */
382 static void
__free_skip_list(WT_SESSION_IMPL * session,WT_INSERT * ins,bool update_ignore)383 __free_skip_list(WT_SESSION_IMPL *session, WT_INSERT *ins, bool update_ignore)
384 {
385 	WT_INSERT *next;
386 
387 	for (; ins != NULL; ins = next) {
388 		if (!update_ignore)
389 			__wt_free_update_list(session, ins->upd);
390 		next = WT_SKIP_NEXT(ins);
391 		__wt_free(session, ins);
392 	}
393 }
394 
395 /*
396  * __free_update --
397  *	Discard the update array.
398  */
399 static void
__free_update(WT_SESSION_IMPL * session,WT_UPDATE ** update_head,uint32_t entries,bool update_ignore)400 __free_update(WT_SESSION_IMPL *session,
401     WT_UPDATE **update_head, uint32_t entries, bool update_ignore)
402 {
403 	WT_UPDATE **updp;
404 
405 	/*
406 	 * For each non-NULL slot in the page's array of updates, free the
407 	 * linked list anchored in that slot.
408 	 */
409 	if (!update_ignore)
410 		for (updp = update_head; entries > 0; --entries, ++updp)
411 			if (*updp != NULL)
412 				__wt_free_update_list(session, *updp);
413 
414 	/* Free the update array. */
415 	__wt_free(session, update_head);
416 }
417 
418 /*
419  * __wt_free_update_list --
420  *	Walk a WT_UPDATE forward-linked list and free the per-thread combination
421  *	of a WT_UPDATE structure and its associated data.
422  */
423 void
__wt_free_update_list(WT_SESSION_IMPL * session,WT_UPDATE * upd)424 __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd)
425 {
426 	WT_UPDATE *next;
427 
428 	for (; upd != NULL; upd = next) {
429 		next = upd->next;
430 		__wt_free(session, upd);
431 	}
432 }
433