1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 /***********
40  * The purpose of this file is to implement the high-level logic for
41  * taking a checkpoint.
42  *
43  * There are three locks used for taking a checkpoint.  They are listed below.
44  *
45  * NOTE: The reader-writer locks may be held by either multiple clients
46  *       or the checkpoint function.  (The checkpoint function has the role
47  *       of the writer, the clients have the reader roles.)
48  *
49  *  - multi_operation_lock
50  *    This is a new reader-writer lock.
51  *    This lock is held by the checkpoint function only for as long as is required to
52  *    to set all the "pending" bits and to create the checkpoint-in-progress versions
53  *    of the header and translation table (btt).
54  *    The following operations must take the multi_operation_lock:
55  *     - any set of operations that must be atomic with respect to begin checkpoint
56  *
57  *  - checkpoint_safe_lock
58  *    This is a new reader-writer lock.
59  *    This lock is held for the entire duration of the checkpoint.
60  *    It is used to prevent more than one checkpoint from happening at a time
61  *    (the checkpoint function is non-re-entrant), and to prevent certain operations
62  *    that should not happen during a checkpoint.
63  *    The following operations must take the checkpoint_safe lock:
64  *       - delete a dictionary
65  *       - rename a dictionary
66  *    The application can use this lock to disable checkpointing during other sensitive
67  *    operations, such as making a backup copy of the database.
68  *
69  * Once the "pending" bits are set and the snapshots are taken of the header and btt,
70  * most normal database operations are permitted to resume.
71  *
72  *
73  *
74  *****/
75 
76 #include <my_global.h>
77 #include <time.h>
78 
79 #include "portability/toku_portability.h"
80 #include "portability/toku_atomic.h"
81 
82 #include "ft/cachetable/cachetable.h"
83 #include "ft/cachetable/checkpoint.h"
84 #include "ft/ft.h"
85 #include "ft/logger/log-internal.h"
86 #include "ft/logger/recover.h"
87 #include "util/frwlock.h"
88 #include "util/status.h"
89 
90 toku_instr_key *checkpoint_safe_mutex_key;
91 toku_instr_key *checkpoint_safe_rwlock_key;
92 toku_instr_key *multi_operation_lock_key;
93 toku_instr_key *low_priority_multi_operation_lock_key;
94 
95 toku_instr_key *rwlock_cond_key;
96 toku_instr_key *rwlock_wait_read_key;
97 toku_instr_key *rwlock_wait_write_key;
98 
toku_checkpoint_get_status(CACHETABLE ct,CHECKPOINT_STATUS statp)99 void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
100     cp_status.init();
101     CP_STATUS_VAL(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
102     *statp = cp_status;
103 }
104 
105 static LSN last_completed_checkpoint_lsn;
106 
107 static toku_mutex_t checkpoint_safe_mutex;
108 static toku::frwlock checkpoint_safe_lock;
109 static toku_pthread_rwlock_t multi_operation_lock;
110 static toku_pthread_rwlock_t low_priority_multi_operation_lock;
111 
112 static bool initialized = false;     // sanity check
113 static volatile bool locked_mo = false;       // true when the multi_operation write lock is held (by checkpoint)
114 static volatile bool locked_cs = false;       // true when the checkpoint_safe write lock is held (by checkpoint)
115 static volatile uint64_t toku_checkpoint_begin_long_threshold = 1000000; // 1 second
116 static volatile uint64_t toku_checkpoint_end_long_threshold = 1000000 * 60; // 1 minute
117 
118 // Note following static functions are called from checkpoint internal logic only,
119 // and use the "writer" calls for locking and unlocking.
120 
121 static void
multi_operation_lock_init(void)122 multi_operation_lock_init(void) {
123     pthread_rwlockattr_t attr;
124     pthread_rwlockattr_init(&attr);
125 #if defined(HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP)
126     pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
127 #else
128 // TODO: need to figure out how to make writer-preferential rwlocks
129 // happen on osx
130 #endif
131     toku_pthread_rwlock_init(
132         *multi_operation_lock_key, &multi_operation_lock, &attr);
133     toku_pthread_rwlock_init(*low_priority_multi_operation_lock_key,
134                              &low_priority_multi_operation_lock,
135                              &attr);
136     pthread_rwlockattr_destroy(&attr);
137     locked_mo = false;
138 }
139 
140 static void
multi_operation_lock_destroy(void)141 multi_operation_lock_destroy(void) {
142     toku_pthread_rwlock_destroy(&multi_operation_lock);
143     toku_pthread_rwlock_destroy(&low_priority_multi_operation_lock);
144 }
145 
146 static void
multi_operation_checkpoint_lock(void)147 multi_operation_checkpoint_lock(void) {
148     toku_pthread_rwlock_wrlock(&low_priority_multi_operation_lock);
149     toku_pthread_rwlock_wrlock(&multi_operation_lock);
150     locked_mo = true;
151 }
152 
153 static void
multi_operation_checkpoint_unlock(void)154 multi_operation_checkpoint_unlock(void) {
155     locked_mo = false;
156     toku_pthread_rwlock_wrunlock(&multi_operation_lock);
157     toku_pthread_rwlock_wrunlock(&low_priority_multi_operation_lock);
158 }
159 
checkpoint_safe_lock_init(void)160 static void checkpoint_safe_lock_init(void) {
161     toku_mutex_init(
162         *checkpoint_safe_mutex_key, &checkpoint_safe_mutex, nullptr);
163     checkpoint_safe_lock.init(&checkpoint_safe_mutex
164 #ifdef TOKU_MYSQL_WITH_PFS
165                               ,
166                               *checkpoint_safe_rwlock_key
167 #endif
168                               );
169     locked_cs = false;
170 }
171 
172 static void
checkpoint_safe_lock_destroy(void)173 checkpoint_safe_lock_destroy(void) {
174     checkpoint_safe_lock.deinit();
175     toku_mutex_destroy(&checkpoint_safe_mutex);
176 }
177 
178 static void
checkpoint_safe_checkpoint_lock(void)179 checkpoint_safe_checkpoint_lock(void) {
180     toku_mutex_lock(&checkpoint_safe_mutex);
181     checkpoint_safe_lock.write_lock(false);
182     toku_mutex_unlock(&checkpoint_safe_mutex);
183     locked_cs = true;
184 }
185 
186 static void
checkpoint_safe_checkpoint_unlock(void)187 checkpoint_safe_checkpoint_unlock(void) {
188     locked_cs = false;
189     toku_mutex_lock(&checkpoint_safe_mutex);
190     checkpoint_safe_lock.write_unlock();
191     toku_mutex_unlock(&checkpoint_safe_mutex);
192 }
193 
194 // toku_xxx_client_(un)lock() functions are only called from client code,
195 // never from checkpoint code, and use the "reader" interface to the lock functions.
196 
197 void
toku_multi_operation_client_lock(void)198 toku_multi_operation_client_lock(void) {
199     if (locked_mo)
200         (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_MO), 1);
201     toku_pthread_rwlock_rdlock(&multi_operation_lock);
202 }
203 
204 void
toku_multi_operation_client_unlock(void)205 toku_multi_operation_client_unlock(void) {
206     toku_pthread_rwlock_rdunlock(&multi_operation_lock);
207 }
208 
toku_low_priority_multi_operation_client_lock(void)209 void toku_low_priority_multi_operation_client_lock(void) {
210     toku_pthread_rwlock_rdlock(&low_priority_multi_operation_lock);
211 }
212 
toku_low_priority_multi_operation_client_unlock(void)213 void toku_low_priority_multi_operation_client_unlock(void) {
214     toku_pthread_rwlock_rdunlock(&low_priority_multi_operation_lock);
215 }
216 
217 void
toku_checkpoint_safe_client_lock(void)218 toku_checkpoint_safe_client_lock(void) {
219     if (locked_cs)
220         (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_CLIENT_WAIT_ON_CS), 1);
221     toku_mutex_lock(&checkpoint_safe_mutex);
222     checkpoint_safe_lock.read_lock();
223     toku_mutex_unlock(&checkpoint_safe_mutex);
224     toku_multi_operation_client_lock();
225 }
226 
227 void
toku_checkpoint_safe_client_unlock(void)228 toku_checkpoint_safe_client_unlock(void) {
229     toku_mutex_lock(&checkpoint_safe_mutex);
230     checkpoint_safe_lock.read_unlock();
231     toku_mutex_unlock(&checkpoint_safe_mutex);
232     toku_multi_operation_client_unlock();
233 }
234 
235 // Initialize the checkpoint mechanism, must be called before any client operations.
236 void
toku_checkpoint_init(void)237 toku_checkpoint_init(void) {
238     multi_operation_lock_init();
239     checkpoint_safe_lock_init();
240     initialized = true;
241 }
242 
243 void
toku_checkpoint_destroy(void)244 toku_checkpoint_destroy(void) {
245     multi_operation_lock_destroy();
246     checkpoint_safe_lock_destroy();
247     initialized = false;
248 }
249 
250 #define SET_CHECKPOINT_FOOTPRINT(x) CP_STATUS_VAL(CP_FOOTPRINT) = footprint_offset + x
251 
252 
253 // Take a checkpoint of all currently open dictionaries
254 int
toku_checkpoint(CHECKPOINTER cp,TOKULOGGER logger,void (* callback_f)(void *),void * extra,void (* callback2_f)(void *),void * extra2,checkpoint_caller_t caller_id)255 toku_checkpoint(CHECKPOINTER cp, TOKULOGGER logger,
256                 void (*callback_f)(void*),  void * extra,
257                 void (*callback2_f)(void*), void * extra2,
258                 checkpoint_caller_t caller_id) {
259     int footprint_offset = (int) caller_id * 1000;
260 
261     assert(initialized);
262 
263     (void) toku_sync_fetch_and_add(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
264     checkpoint_safe_checkpoint_lock();
265     (void) toku_sync_fetch_and_sub(&CP_STATUS_VAL(CP_WAITERS_NOW), 1);
266 
267     if (CP_STATUS_VAL(CP_WAITERS_NOW) > CP_STATUS_VAL(CP_WAITERS_MAX))
268         CP_STATUS_VAL(CP_WAITERS_MAX) = CP_STATUS_VAL(CP_WAITERS_NOW);  // threadsafe, within checkpoint_safe lock
269 
270     SET_CHECKPOINT_FOOTPRINT(10);
271     multi_operation_checkpoint_lock();
272     SET_CHECKPOINT_FOOTPRINT(20);
273     toku_ft_open_close_lock();
274 
275     SET_CHECKPOINT_FOOTPRINT(30);
276     CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
277     uint64_t t_checkpoint_begin_start = toku_current_time_microsec();
278     toku_cachetable_begin_checkpoint(cp, logger);
279     uint64_t t_checkpoint_begin_end = toku_current_time_microsec();
280 
281     toku_ft_open_close_unlock();
282     multi_operation_checkpoint_unlock();
283 
284     SET_CHECKPOINT_FOOTPRINT(40);
285     if (callback_f) {
286         callback_f(extra);      // callback is called with checkpoint_safe_lock still held
287     }
288 
289     uint64_t t_checkpoint_end_start = toku_current_time_microsec();
290     toku_cachetable_end_checkpoint(cp, logger, callback2_f, extra2);
291     uint64_t t_checkpoint_end_end = toku_current_time_microsec();
292 
293     SET_CHECKPOINT_FOOTPRINT(50);
294     if (logger) {
295         last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
296         toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
297         CP_STATUS_VAL(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
298     }
299 
300     SET_CHECKPOINT_FOOTPRINT(60);
301     CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
302     CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN);
303     CP_STATUS_VAL(CP_CHECKPOINT_COUNT)++;
304     uint64_t duration = t_checkpoint_begin_end - t_checkpoint_begin_start;
305     CP_STATUS_VAL(CP_BEGIN_TIME) += duration;
306     if (duration >= toku_checkpoint_begin_long_threshold) {
307         CP_STATUS_VAL(CP_LONG_BEGIN_TIME) += duration;
308         CP_STATUS_VAL(CP_LONG_BEGIN_COUNT) += 1;
309     }
310     duration = t_checkpoint_end_end - t_checkpoint_end_start;
311     CP_STATUS_VAL(CP_END_TIME) += duration;
312     if (duration >= toku_checkpoint_end_long_threshold) {
313         CP_STATUS_VAL(CP_LONG_END_TIME) += duration;
314         CP_STATUS_VAL(CP_LONG_END_COUNT) += 1;
315     }
316     CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION) += (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
317     CP_STATUS_VAL(CP_TIME_CHECKPOINT_DURATION_LAST) = (uint64_t) ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_END)) - ((time_t) CP_STATUS_VAL(CP_TIME_LAST_CHECKPOINT_BEGIN));
318     CP_STATUS_VAL(CP_FOOTPRINT) = 0;
319 
320     checkpoint_safe_checkpoint_unlock();
321     return 0;
322 }
323 
324 #include <toku_race_tools.h>
325 void __attribute__((__constructor__)) toku_checkpoint_helgrind_ignore(void);
326 void
toku_checkpoint_helgrind_ignore(void)327 toku_checkpoint_helgrind_ignore(void) {
328     TOKU_VALGRIND_HG_DISABLE_CHECKING(&cp_status, sizeof cp_status);
329     TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_mo, sizeof locked_mo);
330     TOKU_VALGRIND_HG_DISABLE_CHECKING(&locked_cs, sizeof locked_cs);
331 }
332 
333 #undef SET_CHECKPOINT_FOOTPRINT
334