1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6
7
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22 ----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39 /***********
40 * The purpose of this file is to implement the high-level logic for
41 * taking a checkpoint.
42 *
43 * There are three locks used for taking a checkpoint. They are listed below.
44 *
45 * NOTE: The reader-writer locks may be held by either multiple clients
46 * or the checkpoint function. (The checkpoint function has the role
47 * of the writer, the clients have the reader roles.)
48 *
49 * - multi_operation_lock
50 * This is a new reader-writer lock.
51 * This lock is held by the checkpoint function only for as long as is required to
52 * to set all the "pending" bits and to create the checkpoint-in-progress versions
53 * of the header and translation table (btt).
54 * The following operations must take the multi_operation_lock:
55 * - any set of operations that must be atomic with respect to begin checkpoint
56 *
57 * - checkpoint_safe_lock
58 * This is a new reader-writer lock.
59 * This lock is held for the entire duration of the checkpoint.
60 * It is used to prevent more than one checkpoint from happening at a time
61 * (the checkpoint function is non-re-entrant), and to prevent certain operations
62 * that should not happen during a checkpoint.
63 * The following operations must take the checkpoint_safe lock:
64 * - delete a dictionary
65 * - rename a dictionary
66 * The application can use this lock to disable checkpointing during other sensitive
67 * operations, such as making a backup copy of the database.
68 *
69 * Once the "pending" bits are set and the snapshots are taken of the header and btt,
70 * most normal database operations are permitted to resume.
71 *
72 *
73 *
74 *****/
75
76 #include <time.h>
77
78 #include "portability/toku_portability.h"
79 #include "portability/toku_atomic.h"
80
81 #include "ft/cachetable/cachetable.h"
82 #include "ft/cachetable/checkpoint.h"
83 #include "ft/ft.h"
84 #include "ft/logger/log-internal.h"
85 #include "ft/logger/recover.h"
86 #include "util/frwlock.h"
87 #include "util/status.h"
88
89 toku_instr_key *checkpoint_safe_mutex_key;
90 toku_instr_key *checkpoint_safe_rwlock_key;
91 toku_instr_key *multi_operation_lock_key;
92 toku_instr_key *low_priority_multi_operation_lock_key;
93
94 toku_instr_key *rwlock_cond_key;
95 toku_instr_key *rwlock_wait_read_key;
96 toku_instr_key *rwlock_wait_write_key;
97
toku_checkpoint_get_status(CACHETABLE ct,CHECKPOINT_STATUS statp)98 void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
99 cp_status.init();
100 CP_STATUS_VAL(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
101 *statp = cp_status;
102 }
103
104 static LSN last_completed_checkpoint_lsn;
105
106 static toku_mutex_t checkpoint_safe_mutex;
107 static toku::frwlock checkpoint_safe_lock;
108 static toku_pthread_rwlock_t multi_operation_lock;
109 static toku_pthread_rwlock_t low_priority_multi_operation_lock;
110
111 static bool initialized = false; // sanity check
112 static volatile bool locked_mo = false; // true when the multi_operation write lock is held (by checkpoint)
113 static volatile bool locked_cs = false; // true when the checkpoint_safe write lock is held (by checkpoint)
114 static volatile uint64_t toku_checkpoint_begin_long_threshold = 1000000; // 1 second
115 static volatile uint64_t toku_checkpoint_end_long_threshold = 1000000 * 60; // 1 minute
116
117 // Note following static functions are called from checkpoint internal logic only,
118 // and use the "writer" calls for locking and unlocking.
119
120 static void
multi_operation_lock_init(void)121 multi_operation_lock_init(void) {
122 pthread_rwlockattr_t attr;
123 pthread_rwlockattr_init(&attr);
124 #if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP)
125 pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
126 #else
127 // TODO: need to figure out how to make writer-preferential rwlocks
128 // happen on osx
129 #endif
130 toku_pthread_rwlock_init(
131 *multi_operation_lock_key, &multi_operation_lock, &attr);
132 toku_pthread_rwlock_init(*low_priority_multi_operation_lock_key,
133 &low_priority_multi_operation_lock,
134 &attr);
135 pthread_rwlockattr_destroy(&attr);
136 locked_mo = false;
137 }
138
139 static void
multi_operation_lock_destroy(void)140 multi_operation_lock_destroy(void) {
141 toku_pthread_rwlock_destroy(&multi_operation_lock);
142 toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock);
143 }
144
145 static void
multi_operation_checkpoint_lock(void)146 multi_operation_checkpoint_lock(void) {
147 toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock);
148 toku_pthread_rwlock_wrlock(&multi_operation_lock);
149 locked_mo = true;
150 }
151
152 static void
multi_operation_checkpoint_unlock(void)153 multi_operation_checkpoint_unlock(void) {
154 locked_mo = false;
155 toku_pthread_rwlock_wrunlock(&multi_operation_lock);
156 toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock);
157 }
158
checkpoint_safe_lock_init(void)159 static void checkpoint_safe_lock_init(void) {
160 toku_mutex_init(
161 *checkpoint_safe_mutex_key, &checkpoint_safe_mutex, nullptr);
162 checkpoint_safe_lock.init(&checkpoint_safe_mutex
163 #ifdef TOKU_MYSQL_WITH_PFS
164 ,
165 *checkpoint_safe_rwlock_key
166 #endif
167 );
168 locked_cs = false;
169 }
170
171 static void
checkpoint_safe_lock_destroy(void)172 checkpoint_safe_lock_destroy(void) {
173 checkpoint_safe_lock.deinit();
174 toku_mutex_destroy(&checkpoint_safe_mutex);
175 }
176
177 static void
checkpoint_safe_checkpoint_lock(void)178 checkpoint_safe_checkpoint_lock(void) {
179 toku_mutex_lock(&checkpoint_safe_mutex);
180 checkpoint_safe_lock.write_lock(false);
181 toku_mutex_unlock(&checkpoint_safe_mutex);
182 locked_cs = true;
183 }
184
185 static void
checkpoint_safe_checkpoint_unlock(void)186 checkpoint_safe_checkpoint_unlock(void) {
187 locked_cs = false;
188 toku_mutex_lock(&checkpoint_safe_mutex);
189 checkpoint_safe_lock.write_unlock();
190 toku_mutex_unlock(&checkpoint_safe_mutex);
191 }
192
193 // toku_xxx_client_(un)lock() functions are only called from client code,
194 // never from checkpoint code, and use the "reader" interface to the lock functions.
195
196 void
toku_multi_operation_client_lock(void)197 toku_multi_operation_client_lock(void) {
198 if (locked_mo)
199 (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_MO), 1);
200 toku_pthread_rwlock_rdlock(&multi_operation_lock);
201 }
202
203 void
toku_multi_operation_client_unlock(void)204 toku_multi_operation_client_unlock(void) {
205 toku_pthread_rwlock_rdunlock(&multi_operation_lock);
206 }
207
toku_low_priority_multi_operation_client_lock(void)208 void toku_low_priority_multi_operation_client_lock(void) {
209 toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock);
210 }
211
toku_low_priority_multi_operation_client_unlock(void)212 void toku_low_priority_multi_operation_client_unlock(void) {
213 toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock);
214 }
215
216 void
toku_checkpoint_safe_client_lock(void)217 toku_checkpoint_safe_client_lock(void) {
218 if (locked_cs)
219 (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_CS), 1);
220 toku_mutex_lock(&checkpoint_safe_mutex);
221 checkpoint_safe_lock.read_lock();
222 toku_mutex_unlock(&checkpoint_safe_mutex);
223 toku_multi_operation_client_lock();
224 }
225
226 void
toku_checkpoint_safe_client_unlock(void)227 toku_checkpoint_safe_client_unlock(void) {
228 toku_mutex_lock(&checkpoint_safe_mutex);
229 checkpoint_safe_lock.read_unlock();
230 toku_mutex_unlock(&checkpoint_safe_mutex);
231 toku_multi_operation_client_unlock();
232 }
233
234 // Initialize the checkpoint mechanism, must be called before any client operations.
235 void
toku_checkpoint_init(void)236 toku_checkpoint_init(void) {
237 multi_operation_lock_init();
238 checkpoint_safe_lock_init();
239 initialized = true;
240 }
241
242 void
toku_checkpoint_destroy(void)243 toku_checkpoint_destroy(void) {
244 multi_operation_lock_destroy();
245 checkpoint_safe_lock_destroy();
246 initialized = false;
247 }
248
249 #define SET_CHECKPOINT_FOOTPRINT(x) CP_STATUS_VAL(CP_FOOTPRINT) = footprint_offset + x
250
251
252 // Take a checkpoint of all currently open dictionaries
253 int
toku_checkpoint(CHECKPOINTER cp,TOKULOGGER logger,void (* callback_f)(void *),void * extra,void (* callback2_f)(void *),void * extra2,checkpoint_caller_t caller_id)254 toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
255 void (*callback_f)(void*), void * extra,
256 void (*callback2_f)(void*), void * extra2,
257 checkpoint_caller_t caller_id) {
258 int footprint_offset = (int) caller_id * 1000;
259
260 assert(initialized);
261
262 (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
263 checkpoint_safe_checkpoint_lock();
264 (void) toku_sync_fetch_and_sub(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
265
266 if (CP_STATUS_VAL(CP_WAITERS_NOW) > CP_STATUS_VAL(CP_WAITERS_MAX))
267 CP_STATUS_VAL(CP_WAITERS_MAX) = CP_STATUS_VAL(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock
268
269 SET_CHECKPOINT_FOOTPRINT(10);
270 multi_operation_checkpoint_lock();
271 SET_CHECKPOINT_FOOTPRINT(20);
272 toku_ft_open_close_lock();
273
274 SET_CHECKPOINT_FOOTPRINT(30);
275 CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
276 uint64_t t_checkpoint_begin_start = toku_current_time_microsec();
277 toku_cachetable_begin_checkpoint(cp, logger);
278 uint64_t t_checkpoint_begin_end = toku_current_time_microsec();
279
280 toku_ft_open_close_unlock();
281 multi_operation_checkpoint_unlock();
282
283 SET_CHECKPOINT_FOOTPRINT(40);
284 if (callback_f) {
285 callback_f(extra); // callback is called with checkpoint_safe_lock still held
286 }
287
288 uint64_t t_checkpoint_end_start = toku_current_time_microsec();
289 toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2);
290 uint64_t t_checkpoint_end_end = toku_current_time_microsec();
291
292 SET_CHECKPOINT_FOOTPRINT(50);
293 if (logger) {
294 last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
295 toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
296 CP_STATUS_VAL(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
297 }
298
299 SET_CHECKPOINT_FOOTPRINT(60);
300 CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
301 CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN);
302 CP_STATUS_VAL(CP_CHECKPOINT_COUNT)++;
303 uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start;
304 CP_STATUS_VAL(CP_BEGIN_TIME) += duration;
305 if (duration >= toku_checkpoint_begin_long_threshold) {
306 CP_STATUS_VAL(CP_LONG_BEGIN_TIME) += duration;
307 CP_STATUS_VAL(CP_LONG_BEGIN_COUNT) += 1;
308 }
309 duration = t_checkpoint_end_end - t_checkpoint_end_start;
310 CP_STATUS_VAL(CP_END_TIME) += duration;
311 if (duration >= toku_checkpoint_end_long_threshold) {
312 CP_STATUS_VAL(CP_LONG_END_TIME) += duration;
313 CP_STATUS_VAL(CP_LONG_END_COUNT) += 1;
314 }
315 CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
316 CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
317 CP_STATUS_VAL(CP_FOOTPRINT) = 0;
318
319 checkpoint_safe_checkpoint_unlock();
320 return 0;
321 }
322
323 #include <toku_race_tools.h>
324 void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void);
325 void
toku_checkpoint_helgrind_ignore(void)326 toku_checkpoint_helgrind_ignore(void) {
327 TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status);
328 TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo);
329 TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs);
330 }
331
332 #undef SET_CHECKPOINT_FOOTPRINT
333