1 /* Copyright (c) 2018, 2020, MariaDB Corporation.
2 This program is free software; you can redistribute it and/or modify
3 it under the terms of the GNU General Public License as published by
4 the Free Software Foundation; version 2 of the License.
5
6 This program is distributed in the hope that it will be useful,
7 but WITHOUT ANY WARRANTY; without even the implied warranty of
8 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 GNU General Public License for more details.
10
11 You should have received a copy of the GNU General Public License
12 along with this program; if not, write to the Free Software
13 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
14
15 /*
16 Implementation of BACKUP STAGE, an interface for external backup tools.
17
18 TODO:
19 - At backup_start() we call ha_prepare_for_backup() for all active
20 storage engines. If someone tries to load a new storage engine
21 that requires prepare_for_backup() for it to work, that storage
22 engines has to be blocked from loading until backup finishes.
23 As we currently don't have any loadable storage engine that
24 requires this and we have not implemented that part.
25 This can easily be done by adding a
26 PLUGIN_CANT_BE_LOADED_WHILE_BACKUP_IS_RUNNING flag to
27 maria_declare_plugin and check this before calling
28 plugin_initialize()
29 */
30
31 #include "mariadb.h"
32 #include "sql_class.h"
33 #include "sql_base.h" // flush_tables
34 #include "sql_insert.h" // kill_delayed_threads
35 #include "sql_handler.h" // mysql_ha_cleanup_no_free
36 #include <my_sys.h>
37 #include "wsrep_mysqld.h"
38
39 static const char *stage_names[]=
40 {"START", "FLUSH", "BLOCK_DDL", "BLOCK_COMMIT", "END", 0};
41
42 TYPELIB backup_stage_names=
43 { array_elements(stage_names)-1, "", stage_names, 0 };
44
45 static MDL_ticket *backup_flush_ticket;
46
47 static bool backup_start(THD *thd);
48 static bool backup_flush(THD *thd);
49 static bool backup_block_ddl(THD *thd);
50 static bool backup_block_commit(THD *thd);
51
52 /**
53 Run next stage of backup
54 */
55
backup_init()56 void backup_init()
57 {
58 backup_flush_ticket= 0;
59 }
60
run_backup_stage(THD * thd,backup_stages stage)61 bool run_backup_stage(THD *thd, backup_stages stage)
62 {
63 backup_stages next_stage;
64 DBUG_ENTER("run_backup_stage");
65
66 if (thd->current_backup_stage == BACKUP_FINISHED)
67 {
68 if (stage != BACKUP_START)
69 {
70 my_error(ER_BACKUP_NOT_RUNNING, MYF(0));
71 DBUG_RETURN(1);
72 }
73 next_stage= BACKUP_START;
74 }
75 else
76 {
77 if ((uint) thd->current_backup_stage >= (uint) stage)
78 {
79 my_error(ER_BACKUP_WRONG_STAGE, MYF(0), stage_names[stage],
80 stage_names[thd->current_backup_stage]);
81 DBUG_RETURN(1);
82 }
83 if (stage == BACKUP_END)
84 {
85 /*
86 If end is given, jump directly to stage end. This is to allow one
87 to abort backup quickly.
88 */
89 next_stage= stage;
90 }
91 else
92 {
93 /* Go trough all not used stages until we reach 'stage' */
94 next_stage= (backup_stages) ((uint) thd->current_backup_stage + 1);
95 }
96 }
97
98 do
99 {
100 bool res= false;
101 backup_stages previous_stage= thd->current_backup_stage;
102 thd->current_backup_stage= next_stage;
103 switch (next_stage) {
104 case BACKUP_START:
105 if (!(res= backup_start(thd)))
106 break;
107 /* Reset backup stage to start for next backup try */
108 previous_stage= BACKUP_FINISHED;
109 break;
110 case BACKUP_FLUSH:
111 res= backup_flush(thd);
112 break;
113 case BACKUP_WAIT_FOR_FLUSH:
114 res= backup_block_ddl(thd);
115 break;
116 case BACKUP_LOCK_COMMIT:
117 res= backup_block_commit(thd);
118 break;
119 case BACKUP_END:
120 res= backup_end(thd);
121 break;
122 case BACKUP_FINISHED:
123 DBUG_ASSERT(0);
124 }
125 if (res)
126 {
127 thd->current_backup_stage= previous_stage;
128 my_error(ER_BACKUP_STAGE_FAILED, MYF(0), stage_names[(uint) stage]);
129 DBUG_RETURN(1);
130 }
131 next_stage= (backup_stages) ((uint) next_stage + 1);
132 } while ((uint) next_stage <= (uint) stage);
133
134 DBUG_RETURN(0);
135 }
136
137
138 /**
139 Start the backup
140
141 - Wait for previous backup to stop running
142 - Start service to log changed tables (TODO)
143 - Block purge of redo files (Required at least for Aria)
144 - An handler can optionally do a checkpoint of all tables,
145 to speed up the recovery stage of the backup.
146 */
147
backup_start(THD * thd)148 static bool backup_start(THD *thd)
149 {
150 MDL_request mdl_request;
151 DBUG_ENTER("backup_start");
152
153 thd->current_backup_stage= BACKUP_FINISHED; // For next test
154 if (thd->has_read_only_protection())
155 DBUG_RETURN(1);
156 thd->current_backup_stage= BACKUP_START;
157
158 if (thd->locked_tables_mode)
159 {
160 my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0));
161 DBUG_RETURN(1);
162 }
163
164 MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_START,
165 MDL_EXPLICIT);
166 if (thd->mdl_context.acquire_lock(&mdl_request,
167 thd->variables.lock_wait_timeout))
168 DBUG_RETURN(1);
169
170 backup_flush_ticket= mdl_request.ticket;
171
172 ha_prepare_for_backup();
173 DBUG_RETURN(0);
174 }
175
176 /**
177 backup_flush()
178
179 - FLUSH all changes for not active non transactional tables, except
180 for statistics and log tables. Close the tables, to ensure they
181 are marked as closed after backup.
182
183 - BLOCK all NEW write locks for all non transactional tables
184 (except statistics and log tables). Already granted locks are
185 not affected (Running statements with non transaction tables will
186 continue running).
187
188 - The following DDL's doesn't have to be blocked as they can't set
189 the table in a non consistent state:
190 CREATE, RENAME, DROP
191 */
192
backup_flush(THD * thd)193 static bool backup_flush(THD *thd)
194 {
195 DBUG_ENTER("backup_flush");
196 /*
197 Lock all non transactional normal tables to be used in new DML's
198 */
199 if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket,
200 MDL_BACKUP_FLUSH,
201 thd->variables.lock_wait_timeout))
202 DBUG_RETURN(1);
203
204 /*
205 Free unused tables and table shares so that mariabackup knows what
206 is safe to copy
207 */
208 tc_purge();
209 tdc_purge(true);
210
211 DBUG_RETURN(0);
212 }
213
214 /**
215 backup_block_ddl()
216
217 - Kill all insert delay handlers, to ensure that all non transactional
218 tables are closed (can be improved in the future).
219
220 - Close handlers as other threads may wait for these, which can cause deadlocks.
221
222 - Wait for all statements using write locked non-transactional tables to end.
223
224 - Mark all not used active non transactional tables (except
225 statistics and log tables) to be closed with
226 handler->extra(HA_EXTRA_FLUSH)
227
228 - Block TRUNCATE TABLE, CREATE TABLE, DROP TABLE and RENAME
229 TABLE. Block also start of a new ALTER TABLE and the final rename
230 phase of ALTER TABLE. Running ALTER TABLES are not blocked. Both normal
231 and inline ALTER TABLE'S should be blocked when copying is completed but
232 before final renaming of the tables / new table is activated.
233 This will probably require a callback from the InnoDB code.
234 */
235
236 /* Retry to get inital lock for 0.1 + 0.5 + 2.25 + 11.25 + 56.25 = 70.35 sec */
237 #define MAX_RETRY_COUNT 5
238
backup_block_ddl(THD * thd)239 static bool backup_block_ddl(THD *thd)
240 {
241 uint sleep_time;
242 DBUG_ENTER("backup_block_ddl");
243
244 kill_delayed_threads();
245 mysql_ha_cleanup_no_free(thd);
246
247 /* Wait until all non trans statements has ended */
248 if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket,
249 MDL_BACKUP_WAIT_FLUSH,
250 thd->variables.lock_wait_timeout))
251 DBUG_RETURN(1);
252
253 /*
254 Remove not used tables from the table share. Flush all changes to
255 non transaction tables and mark those that are not in use in write
256 operations as closed. From backup purposes it's not critical if
257 flush_tables() returns an error. It's ok to continue with next
258 backup stage even if we got an error.
259 */
260 (void) flush_tables(thd, FLUSH_NON_TRANS_TABLES);
261 thd->clear_error();
262
263 #ifdef WITH_WSREP
264 /*
265 We desync the node for BACKUP STAGE because applier threads
266 bypass backup MDL locks (see MDL_lock::can_grant_lock)
267 */
268 if (WSREP_NNULL(thd))
269 {
270 Wsrep_server_state &server_state= Wsrep_server_state::instance();
271 if (server_state.desync_and_pause().is_undefined()) {
272 DBUG_RETURN(1);
273 }
274 thd->wsrep_desynced_backup_stage= true;
275 }
276 #endif /* WITH_WSREP */
277
278 /*
279 block new DDL's, in addition to all previous blocks
280 We didn't do this lock above, as we wanted DDL's to be executed while
281 we wait for non transactional tables (which may take a while).
282
283 We do this lock in a loop as we can get a deadlock if there are multi-object
284 ddl statements like
285 RENAME TABLE t1 TO t2, t3 TO t3
286 and the MDL happens in the middle of it.
287 */
288 sleep_time= 100; // Start with 0.1 seconds
289 for (uint i= 0 ; i <= MAX_RETRY_COUNT ; i++)
290 {
291 if (!thd->mdl_context.upgrade_shared_lock(backup_flush_ticket,
292 MDL_BACKUP_WAIT_DDL,
293 thd->variables.lock_wait_timeout))
294 break;
295 if (thd->get_stmt_da()->sql_errno() != ER_LOCK_DEADLOCK || thd->killed ||
296 i == MAX_RETRY_COUNT)
297 {
298 /*
299 Could be a timeout. Downgrade lock to what is was before this function
300 was called so that this function can be called again
301 */
302 backup_flush_ticket->downgrade_lock(MDL_BACKUP_FLUSH);
303 DBUG_RETURN(1);
304 }
305 thd->clear_error(); // Forget the DEADLOCK error
306 my_sleep(sleep_time);
307 sleep_time*= 5; // Wait a bit longer next time
308 }
309 DBUG_RETURN(0);
310 }
311
312 /**
313 backup_block_commit()
314
315 Block commits, writes to log and statistics tables and binary log
316 */
317
backup_block_commit(THD * thd)318 static bool backup_block_commit(THD *thd)
319 {
320 DBUG_ENTER("backup_block_commit");
321 if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket,
322 MDL_BACKUP_WAIT_COMMIT,
323 thd->variables.lock_wait_timeout))
324 DBUG_RETURN(1);
325
326 /* We can ignore errors from flush_tables () */
327 (void) flush_tables(thd, FLUSH_SYS_TABLES);
328
329 if (mysql_bin_log.is_open())
330 {
331 mysql_mutex_lock(mysql_bin_log.get_log_lock());
332 mysql_file_sync(mysql_bin_log.get_log_file()->file,
333 MYF(MY_WME|MY_SYNC_FILESIZE));
334 mysql_mutex_unlock(mysql_bin_log.get_log_lock());
335 }
336 thd->clear_error();
337
338 DBUG_RETURN(0);
339 }
340
341 /**
342 backup_end()
343
344 Safe to run, even if backup has not been run by this thread.
345 This is for example the case when a THD ends.
346 */
347
backup_end(THD * thd)348 bool backup_end(THD *thd)
349 {
350 DBUG_ENTER("backup_end");
351
352 if (thd->current_backup_stage != BACKUP_FINISHED)
353 {
354 ha_end_backup();
355 thd->current_backup_stage= BACKUP_FINISHED;
356 thd->mdl_context.release_lock(backup_flush_ticket);
357 #ifdef WITH_WSREP
358 if (WSREP_NNULL(thd) && thd->wsrep_desynced_backup_stage)
359 {
360 Wsrep_server_state &server_state= Wsrep_server_state::instance();
361 server_state.resume_and_resync();
362 thd->wsrep_desynced_backup_stage= false;
363 }
364 #endif /* WITH_WSREP */
365 }
366 DBUG_RETURN(0);
367 }
368
369
370 /**
371 backup_set_alter_copy_lock()
372
373 @param thd
374 @param table From table that is part of ALTER TABLE. This is only used
375 for the assert to ensure we use this function correctly.
376
377 Downgrades the MDL_BACKUP_DDL lock to MDL_BACKUP_ALTER_COPY to allow
378 copy of altered table to proceed under MDL_BACKUP_WAIT_DDL
379
380 Note that in some case when using non transactional tables,
381 the lock may be of type MDL_BACKUP_DML.
382 */
383
backup_set_alter_copy_lock(THD * thd,TABLE * table)384 void backup_set_alter_copy_lock(THD *thd, TABLE *table)
385 {
386 MDL_ticket *ticket= thd->mdl_backup_ticket;
387
388 /* Ticket maybe NULL in case of LOCK TABLES or for temporary tables*/
389 DBUG_ASSERT(ticket || thd->locked_tables_mode ||
390 table->s->tmp_table != NO_TMP_TABLE);
391 if (ticket)
392 ticket->downgrade_lock(MDL_BACKUP_ALTER_COPY);
393 }
394
395 /**
396 backup_reset_alter_copy_lock
397
398 Upgrade the lock of the original ALTER table MDL_BACKUP_DDL
399 Can fail if MDL lock was killed
400 */
401
backup_reset_alter_copy_lock(THD * thd)402 bool backup_reset_alter_copy_lock(THD *thd)
403 {
404 bool res= 0;
405 MDL_ticket *ticket= thd->mdl_backup_ticket;
406
407 /* Ticket maybe NULL in case of LOCK TABLES or for temporary tables*/
408 if (ticket)
409 res= thd->mdl_context.upgrade_shared_lock(ticket, MDL_BACKUP_DDL,
410 thd->variables.lock_wait_timeout);
411 return res;
412 }
413
414
415 /*****************************************************************************
416 Backup locks
417 These functions are used by maria_backup to ensure that there are no active
418 ddl's on the object the backup is going to copy
419 *****************************************************************************/
420
421
backup_lock(THD * thd,TABLE_LIST * table)422 bool backup_lock(THD *thd, TABLE_LIST *table)
423 {
424 /* We should leave the previous table unlocked in case of errors */
425 backup_unlock(thd);
426 if (thd->locked_tables_mode)
427 {
428 my_error(ER_LOCK_OR_ACTIVE_TRANSACTION, MYF(0));
429 return 1;
430 }
431 table->mdl_request.duration= MDL_EXPLICIT;
432 if (thd->mdl_context.acquire_lock(&table->mdl_request,
433 thd->variables.lock_wait_timeout))
434 return 1;
435 thd->mdl_backup_lock= table->mdl_request.ticket;
436 return 0;
437 }
438
439
440 /* Release old backup lock if it exists */
441
backup_unlock(THD * thd)442 void backup_unlock(THD *thd)
443 {
444 if (thd->mdl_backup_lock)
445 thd->mdl_context.release_lock(thd->mdl_backup_lock);
446 thd->mdl_backup_lock= 0;
447 }
448