1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 /***********
40  * The purpose of this file is to implement the high-level logic for
41  * taking a checkpoint.
42  *
43  * There are three locks used for taking a checkpoint.  They are listed below.
44  *
45  * NOTE: The reader-writer locks may be held by either multiple clients
46  *       or the checkpoint function.  (The checkpoint function has the role
47  *       of the writer, the clients have the reader roles.)
48  *
49  *  - multi_operation_lock
50  *    This is a new reader-writer lock.
51  *    This lock is held by the checkpoint function only for as long as is required to
52  *    to set all the "pending" bits and to create the checkpoint-in-progress versions
53  *    of the header and translation table (btt).
54  *    The following operations must take the multi_operation_lock:
55  *     - any set of operations that must be atomic with respect to begin checkpoint
56  *
57  *  - checkpoint_safe_lock
58  *    This is a new reader-writer lock.
59  *    This lock is held for the entire duration of the checkpoint.
60  *    It is used to prevent more than one checkpoint from happening at a time
61  *    (the checkpoint function is non-re-entrant), and to prevent certain operations
62  *    that should not happen during a checkpoint.
63  *    The following operations must take the checkpoint_safe lock:
64  *       - delete a dictionary
65  *       - rename a dictionary
66  *    The application can use this lock to disable checkpointing during other sensitive
67  *    operations, such as making a backup copy of the database.
68  *
69  * Once the "pending" bits are set and the snapshots are taken of the header and btt,
70  * most normal database operations are permitted to resume.
71  *
72  *
73  *
74  *****/
75 
76 #include <time.h>
77 
78 #include "portability/toku_portability.h"
79 #include "portability/toku_atomic.h"
80 
81 #include "ft/cachetable/cachetable.h"
82 #include "ft/cachetable/checkpoint.h"
83 #include "ft/ft.h"
84 #include "ft/logger/log-internal.h"
85 #include "ft/logger/recover.h"
86 #include "util/frwlock.h"
87 #include "util/status.h"
88 
89 toku_instr_key *checkpoint_safe_mutex_key;
90 toku_instr_key *checkpoint_safe_rwlock_key;
91 toku_instr_key *multi_operation_lock_key;
92 toku_instr_key *low_priority_multi_operation_lock_key;
93 
94 toku_instr_key *rwlock_cond_key;
95 toku_instr_key *rwlock_wait_read_key;
96 toku_instr_key *rwlock_wait_write_key;
97 
toku_checkpoint_get_status(CACHETABLE ct,CHECKPOINT_STATUS statp)98 void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
99     cp_status.init();
100     CP_STATUS_VAL(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
101     *statp = cp_status;
102 }
103 
104 static LSN last_completed_checkpoint_lsn;
105 
106 static toku_mutex_t checkpoint_safe_mutex;
107 static toku::frwlock checkpoint_safe_lock;
108 static toku_pthread_rwlock_t multi_operation_lock;
109 static toku_pthread_rwlock_t low_priority_multi_operation_lock;
110 
111 static bool initialized = false;     // sanity check
112 static volatile bool locked_mo = false;       // true when the multi_operation write lock is held (by checkpoint)
113 static volatile bool locked_cs = false;       // true when the checkpoint_safe write lock is held (by checkpoint)
114 static volatile uint64_t toku_checkpoint_begin_long_threshold = 1000000; // 1 second
115 static volatile uint64_t toku_checkpoint_end_long_threshold = 1000000 * 60; // 1 minute
116 
117 // Note following static functions are called from checkpoint internal logic only,
118 // and use the "writer" calls for locking and unlocking.
119 
120 static void
multi_operation_lock_init(void)121 multi_operation_lock_init(void) {
122     pthread_rwlockattr_t attr;
123     pthread_rwlockattr_init(&attr);
124 #if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP)
125     pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
126 #else
127 // TODO: need to figure out how to make writer-preferential rwlocks
128 // happen on osx
129 #endif
130     toku_pthread_rwlock_init(
131         *multi_operation_lock_key, &multi_operation_lock, &attr);
132     toku_pthread_rwlock_init(*low_priority_multi_operation_lock_key,
133                              &low_priority_multi_operation_lock,
134                              &attr);
135     pthread_rwlockattr_destroy(&attr);
136     locked_mo = false;
137 }
138 
139 static void
multi_operation_lock_destroy(void)140 multi_operation_lock_destroy(void) {
141     toku_pthread_rwlock_destroy(&multi_operation_lock);
142     toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock);
143 }
144 
145 static void
multi_operation_checkpoint_lock(void)146 multi_operation_checkpoint_lock(void) {
147     toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock);
148     toku_pthread_rwlock_wrlock(&multi_operation_lock);
149     locked_mo = true;
150 }
151 
152 static void
multi_operation_checkpoint_unlock(void)153 multi_operation_checkpoint_unlock(void) {
154     locked_mo = false;
155     toku_pthread_rwlock_wrunlock(&multi_operation_lock);
156     toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock);
157 }
158 
checkpoint_safe_lock_init(void)159 static void checkpoint_safe_lock_init(void) {
160     toku_mutex_init(
161         *checkpoint_safe_mutex_key, &checkpoint_safe_mutex, nullptr);
162     checkpoint_safe_lock.init(&checkpoint_safe_mutex
163 #ifdef TOKU_MYSQL_WITH_PFS
164                               ,
165                               *checkpoint_safe_rwlock_key
166 #endif
167                               );
168     locked_cs = false;
169 }
170 
171 static void
checkpoint_safe_lock_destroy(void)172 checkpoint_safe_lock_destroy(void) {
173     checkpoint_safe_lock.deinit();
174     toku_mutex_destroy(&checkpoint_safe_mutex);
175 }
176 
177 static void
checkpoint_safe_checkpoint_lock(void)178 checkpoint_safe_checkpoint_lock(void) {
179     toku_mutex_lock(&checkpoint_safe_mutex);
180     checkpoint_safe_lock.write_lock(false);
181     toku_mutex_unlock(&checkpoint_safe_mutex);
182     locked_cs = true;
183 }
184 
185 static void
checkpoint_safe_checkpoint_unlock(void)186 checkpoint_safe_checkpoint_unlock(void) {
187     locked_cs = false;
188     toku_mutex_lock(&checkpoint_safe_mutex);
189     checkpoint_safe_lock.write_unlock();
190     toku_mutex_unlock(&checkpoint_safe_mutex);
191 }
192 
193 // toku_xxx_client_(un)lock() functions are only called from client code,
194 // never from checkpoint code, and use the "reader" interface to the lock functions.
195 
196 void
toku_multi_operation_client_lock(void)197 toku_multi_operation_client_lock(void) {
198     if (locked_mo)
199         (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_MO), 1);
200     toku_pthread_rwlock_rdlock(&multi_operation_lock);
201 }
202 
203 void
toku_multi_operation_client_unlock(void)204 toku_multi_operation_client_unlock(void) {
205     toku_pthread_rwlock_rdunlock(&multi_operation_lock);
206 }
207 
toku_low_priority_multi_operation_client_lock(void)208 void toku_low_priority_multi_operation_client_lock(void) {
209     toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock);
210 }
211 
toku_low_priority_multi_operation_client_unlock(void)212 void toku_low_priority_multi_operation_client_unlock(void) {
213     toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock);
214 }
215 
216 void
toku_checkpoint_safe_client_lock(void)217 toku_checkpoint_safe_client_lock(void) {
218     if (locked_cs)
219         (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_CS), 1);
220     toku_mutex_lock(&checkpoint_safe_mutex);
221     checkpoint_safe_lock.read_lock();
222     toku_mutex_unlock(&checkpoint_safe_mutex);
223     toku_multi_operation_client_lock();
224 }
225 
226 void
toku_checkpoint_safe_client_unlock(void)227 toku_checkpoint_safe_client_unlock(void) {
228     toku_mutex_lock(&checkpoint_safe_mutex);
229     checkpoint_safe_lock.read_unlock();
230     toku_mutex_unlock(&checkpoint_safe_mutex);
231     toku_multi_operation_client_unlock();
232 }
233 
234 // Initialize the checkpoint mechanism, must be called before any client operations.
235 void
toku_checkpoint_init(void)236 toku_checkpoint_init(void) {
237     multi_operation_lock_init();
238     checkpoint_safe_lock_init();
239     initialized = true;
240 }
241 
242 void
toku_checkpoint_destroy(void)243 toku_checkpoint_destroy(void) {
244     multi_operation_lock_destroy();
245     checkpoint_safe_lock_destroy();
246     initialized = false;
247 }
248 
249 #define SET_CHECKPOINT_FOOTPRINT(x) CP_STATUS_VAL(CP_FOOTPRINT) = footprint_offset + x
250 
251 
252 // Take a checkpoint of all currently open dictionaries
253 int
toku_checkpoint(CHECKPOINTER cp,TOKULOGGER logger,void (* callback_f)(void *),void * extra,void (* callback2_f)(void *),void * extra2,checkpoint_caller_t caller_id)254 toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
255                 void (*callback_f)(void*),  void * extra,
256                 void (*callback2_f)(void*), void * extra2,
257                 checkpoint_caller_t caller_id) {
258     int footprint_offset = (int) caller_id * 1000;
259 
260     assert(initialized);
261 
262     (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
263     checkpoint_safe_checkpoint_lock();
264     (void) toku_sync_fetch_and_sub(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
265 
266     if (CP_STATUS_VAL(CP_WAITERS_NOW) > CP_STATUS_VAL(CP_WAITERS_MAX))
267         CP_STATUS_VAL(CP_WAITERS_MAX) = CP_STATUS_VAL(CP_WAITERS_NOW);  // threadsafe, within checkpoint_safe lock
268 
269     SET_CHECKPOINT_FOOTPRINT(10);
270     multi_operation_checkpoint_lock();
271     SET_CHECKPOINT_FOOTPRINT(20);
272     toku_ft_open_close_lock();
273 
274     SET_CHECKPOINT_FOOTPRINT(30);
275     CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
276     uint64_t t_checkpoint_begin_start = toku_current_time_microsec();
277     toku_cachetable_begin_checkpoint(cp, logger);
278     uint64_t t_checkpoint_begin_end = toku_current_time_microsec();
279 
280     toku_ft_open_close_unlock();
281     multi_operation_checkpoint_unlock();
282 
283     SET_CHECKPOINT_FOOTPRINT(40);
284     if (callback_f) {
285         callback_f(extra);      // callback is called with checkpoint_safe_lock still held
286     }
287 
288     uint64_t t_checkpoint_end_start = toku_current_time_microsec();
289     toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2);
290     uint64_t t_checkpoint_end_end = toku_current_time_microsec();
291 
292     SET_CHECKPOINT_FOOTPRINT(50);
293     if (logger) {
294         last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
295         toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
296         CP_STATUS_VAL(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
297     }
298 
299     SET_CHECKPOINT_FOOTPRINT(60);
300     CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
301     CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN);
302     CP_STATUS_VAL(CP_CHECKPOINT_COUNT)++;
303     uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start;
304     CP_STATUS_VAL(CP_BEGIN_TIME) += duration;
305     if (duration >= toku_checkpoint_begin_long_threshold) {
306         CP_STATUS_VAL(CP_LONG_BEGIN_TIME) += duration;
307         CP_STATUS_VAL(CP_LONG_BEGIN_COUNT) += 1;
308     }
309     duration = t_checkpoint_end_end - t_checkpoint_end_start;
310     CP_STATUS_VAL(CP_END_TIME) += duration;
311     if (duration >= toku_checkpoint_end_long_threshold) {
312         CP_STATUS_VAL(CP_LONG_END_TIME) += duration;
313         CP_STATUS_VAL(CP_LONG_END_COUNT) += 1;
314     }
315     CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
316     CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
317     CP_STATUS_VAL(CP_FOOTPRINT) = 0;
318 
319     checkpoint_safe_checkpoint_unlock();
320     return 0;
321 }
322 
323 #include <toku_race_tools.h>
324 void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void);
325 void
toku_checkpoint_helgrind_ignore(void)326 toku_checkpoint_helgrind_ignore(void) {
327     TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status);
328     TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo);
329     TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs);
330 }
331 
332 #undef SET_CHECKPOINT_FOOTPRINT
333