1 /*****************************************************************************
2 
3 Copyright (c) 2017, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file include/clone0snapshot.h
28  Database Physical Snapshot
29 
30  *******************************************************/
31 
32 #ifndef CLONE_SNAPSHOT_INCLUDE
33 #define CLONE_SNAPSHOT_INCLUDE
34 
35 #include "univ.i"
36 
37 #include "arch0log.h"
38 #include "arch0page.h"
39 #include "clone0desc.h"
40 #include "clone0monitor.h"
41 #include "fil0fil.h"
42 #include "sql/handler.h"
43 
44 #include <map>
45 #include <vector>
46 
47 /** Vector type for storing clone files */
48 using Clone_File_Vec = std::vector<Clone_File_Meta *>;
49 
50 /** Map type for mapping space ID to clone file index */
51 using Clone_File_Map = std::map<space_id_t, uint>;
52 
53 /** Page identified by space and page number */
54 struct Clone_Page {
55   /** Tablespace ID */
56   ib_uint32_t m_space_id;
57 
58   /** Page number within tablespace */
59   ib_uint32_t m_page_no;
60 };
61 
62 /** Comparator for storing sorted page ID. */
63 struct Less_Clone_Page {
64   /** Less than operator for page ID.
65   @param[in]	page1	first page
66   @param[in]	page2	second page
67   @return true, if page1 is less than page2 */
operatorLess_Clone_Page68   inline bool operator()(const Clone_Page &page1,
69                          const Clone_Page &page2) const {
70     if (page1.m_space_id < page2.m_space_id) {
71       return (true);
72     }
73 
74     if (page1.m_space_id == page2.m_space_id &&
75         page1.m_page_no < page2.m_page_no) {
76       return (true);
77     }
78     return (false);
79   }
80 };
81 
82 /** Vector type for storing clone page IDs */
83 using Clone_Page_Vec = std::vector<Clone_Page>;
84 
85 /** Set for storing unique page IDs. */
86 using Clone_Page_Set = std::set<Clone_Page, Less_Clone_Page>;
87 
88 /** Clone handle type */
89 enum Clone_Handle_Type {
90   /** Clone Handle for COPY */
91   CLONE_HDL_COPY = 1,
92 
93   /** Clone Handle for APPLY */
94   CLONE_HDL_APPLY
95 };
96 
97 /** Default chunk size in power of 2 in unit of pages.
98 Chunks are reserved by each thread for multi-threaded clone. For 16k page
99 size, chunk size is 64M. */
100 const uint SNAPSHOT_DEF_CHUNK_SIZE_POW2 = 12;
101 
102 /** Default block size in power of 2 in unit of pages.
103 Data transfer callback is invoked once for each block. This is also
104 the maximum size of data that would be re-send if clone is stopped
105 and resumed. For 16k page size, block size is 1M. */
106 const uint SNAPSHOT_DEF_BLOCK_SIZE_POW2 = 6;
107 
108 /** Maximum block size in power of 2 in unit of pages.
109 For 16k page size, maximum block size is 64M. */
110 const uint SNAPSHOT_MAX_BLOCK_SIZE_POW2 = 12;
111 
112 /** Sleep time in microseconds while waiting for other clone/task */
113 const uint SNAPSHOT_STATE_CHANGE_SLEEP = 100 * 1000;
114 
115 /** Dynamic database snapshot: Holds metadata and handle to data */
116 class Clone_Snapshot {
117  public:
118   /** Construct snapshot
119   @param[in]	hdl_type	copy, apply
120   @param[in]	clone_type	clone type
121   @param[in]	arr_idx		index in global array
122   @param[in]	snap_id		unique snapshot ID */
123   Clone_Snapshot(Clone_Handle_Type hdl_type, Ha_clone_type clone_type,
124                  uint arr_idx, ib_uint64_t snap_id);
125 
126   /** Release contexts and free heap */
127   ~Clone_Snapshot();
128 
129   /** @return estimated bytes on disk */
get_disk_estimate()130   uint64_t get_disk_estimate() const { return (m_data_bytes_disk); }
131 
132   /** Get unique snapshot identifier
133   @return snapshot ID */
get_id()134   ib_uint64_t get_id() { return (m_snapshot_id); }
135 
136   /** Get snapshot index in global array
137   @return array index */
get_index()138   uint get_index() { return (m_snapshot_arr_idx); }
139 
140   /** Get performance schema accounting object used to monitor stage
141   progress.
142   @return PFS stage object */
get_clone_monitor()143   Clone_Monitor &get_clone_monitor() { return (m_monitor); }
144 
145   /** Get snapshot heap used for allocation during clone.
146   @return heap */
lock_heap()147   mem_heap_t *lock_heap() {
148     mutex_enter(&m_snapshot_mutex);
149     return (m_snapshot_heap);
150   }
151 
152   /* Release snapshot heap */
release_heap(mem_heap_t * & heap)153   void release_heap(mem_heap_t *&heap) {
154     heap = nullptr;
155     mutex_exit(&m_snapshot_mutex);
156   }
157 
158   /** Get snapshot state
159   @return state */
get_state()160   Snapshot_State get_state() { return (m_snapshot_state); }
161 
162   /** Get the redo file size for the snapshot
163   @return redo file size */
get_redo_file_size()164   ib_uint64_t get_redo_file_size() { return (m_redo_file_size); }
165 
166   /** Get total number of chunks for current state
167   @return number of data chunks */
get_num_chunks()168   uint get_num_chunks() { return (m_num_current_chunks); }
169 
170   /** Get maximum file length seen till now
171   @return file name length */
get_max_file_name_length()172   size_t get_max_file_name_length() { return (m_max_file_name_len); }
173 
174   /** Get maximum buffer size required for clone
175   @return maximum dynamic buffer */
get_dyn_buffer_length()176   uint get_dyn_buffer_length() {
177     uint ret_len = 0;
178 
179     if (is_copy() && m_snapshot_type != HA_CLONE_BLOCKING) {
180       ret_len = static_cast<uint>(2 * UNIV_PAGE_SIZE);
181     }
182 
183     return (ret_len);
184   }
185 
186   using File_Cbk_Func = std::function<int(Clone_File_Meta *)>;
187 
188   /** Iterate through all files in current state
189   @param[in]	func	callback function
190   @return error code */
191   int iterate_files(File_Cbk_Func &&func);
192 
193   /** Fill state descriptor from snapshot
194   @param[in]	do_estimate	estimate data bytes to transfer
195   @param[out]	state_desc	snapshot state descriptor */
196   void get_state_info(bool do_estimate, Clone_Desc_State *state_desc);
197 
198   /** Set state information during apply
199   @param[in]	state_desc	snapshot state descriptor */
200   void set_state_info(Clone_Desc_State *state_desc);
201 
202   /** Get next state based on snapshot type
203   @return next state */
204   Snapshot_State get_next_state();
205 
206   /** Try to attach to snapshot
207   @param[in]	hdl_type	copy, apply
208   @param[in]	pfs_monitor	enable PFS monitoring
209   @return true if successfully attached */
210   bool attach(Clone_Handle_Type hdl_type, bool pfs_monitor);
211 
212   /** Detach from snapshot
213   @return number of clones attached */
214   uint detach();
215 
216   /** Start transition to new state
217   @param[in]	state_desc	descriptor for next state
218   @param[in]	new_state	state to move for apply
219   @param[in]	temp_buffer	buffer used for collecting page IDs
220   @param[in]	temp_buffer_len	buffer length
221   @param[in]	cbk		alter callback for long wait
222   @param[out]	pending_clones	clones yet to transit to next state
223   @return error code */
224   int change_state(Clone_Desc_State *state_desc, Snapshot_State new_state,
225                    byte *temp_buffer, uint temp_buffer_len,
226                    Clone_Alert_Func cbk, uint &pending_clones);
227 
228   /** Check if transition is complete
229   @param[in]	new_state	new state after transition
230   @param[in]	exit_on_wait	exit from transition if needs to wait
231   @return number of clones yet to transit to next state */
232   uint check_state(Snapshot_State new_state, bool exit_on_wait);
233 
234   /* Don't allow to attach new clone - Not supported
235   void stop_attach_new_clone()
236   {
237           m_allow_new_clone = false;
238   }
239   */
240 
241   /** Add file metadata entry at destination
242   @param[in,out]	file_desc	if there, set to current descriptor
243   @param[in]	data_dir	destination data directory
244   @param[in]	desc_create	create if doesn't exist
245   @param[out]	desc_exists	descriptor already exists
246   @return error code */
247   int get_file_from_desc(Clone_File_Meta *&file_desc, const char *data_dir,
248                          bool desc_create, bool &desc_exists);
249 
250   /** Add file descriptor to file list
251   @param[in,out]	file_desc	current file descriptor
252   @return true, if it is the last file. */
253   bool add_file_from_desc(Clone_File_Meta *&file_desc);
254 
255   /** Extract file information from node and add to snapshot
256   @param[in]	node	file node
257   @return error code */
258   dberr_t add_node(fil_node_t *node);
259 
260   /** Add page ID to to the set of pages in snapshot
261   @param[in]	space_id	page tablespace
262   @param[in]	page_num	page number within tablespace
263   @return error code */
264   int add_page(ib_uint32_t space_id, ib_uint32_t page_num);
265 
266   /** Add redo file to snapshot
267   @param[in]	file_name	file name
268   @param[in]	file_size	file size in bytes
269   @param[in]	file_offset	start offset
270   @return error code. */
271   int add_redo_file(char *file_name, ib_uint64_t file_size,
272                     ib_uint64_t file_offset);
273 
274   /** Get file metadata by index for current state
275   @param[in]	index	file index
276   @return file metadata entry */
277   Clone_File_Meta *get_file_by_index(uint index);
278 
279   /** Get next block of data to transfer
280   @param[in]	chunk_num	current chunk
281   @param[in,out]	block_num	current/next block
282   @param[in,out]	file_meta	current/next block file metadata
283   @param[out]	data_offset	block offset in file
284   @param[out]	data_buf	data buffer or NULL if transfer from file
285   @param[out]	data_size	size of data in bytes
286   @return error code */
287   int get_next_block(uint chunk_num, uint &block_num,
288                      Clone_File_Meta *file_meta, ib_uint64_t &data_offset,
289                      byte *&data_buf, uint &data_size);
290 
291   /** Update snapshot block size based on caller's buffer size
292   @param[in]	buff_size	buffer size for clone transfer */
293   void update_block_size(uint buff_size);
294 
295   /** Check if copy snapshot
296   @return true if snapshot is for copy */
is_copy()297   bool is_copy() const { return (m_snapshot_handle_type == CLONE_HDL_COPY); }
298 
299   /** Update file size when file is extended during page copy
300   @param[in]	file_index	current file index
301   @param[in]	file_size	new file size */
302   void update_file_size(uint32_t file_index, uint64_t file_size);
303 
304   /** Encrypt tablespace key in header page with master key.
305   @param[in]		page_size	page size descriptor
306   @param[in,out]	page_data	page data to update
307   @return true, if successful. */
308   bool encrypt_key_in_header(const page_size_t &page_size, byte *page_data);
309 
310   /** Encrypt tablespace key in header page with master key.
311   @param[in,out]	log_header	page data to update
312   @param[in]		header_len	length of log header
313   @return true, if successful. */
314   bool encrypt_key_in_log_header(byte *log_header, uint32_t header_len);
315 
316   /** Decrypt tablespace key in header page with master key.
317   @param[in]		space		tablespace
318   @param[in]		page_size	page size descriptor
319   @param[in,out]	page_data	page data to update */
320   void decrypt_key_in_header(fil_space_t *space, const page_size_t &page_size,
321                              byte *&page_data);
322 
323  private:
324   /** Synchronize snapshot with binary log and GTID.
325   @param[in]	cbk	alert callback for long wait
326   @return error code. */
327   int synchronize_binlog_gtid(Clone_Alert_Func cbk);
328 
329   /** Make sure that the trx sys page binary log position correctly reflects
330   all transactions committed to innodb. It updates binary log position
331   in transaction sys page, if required. The caller must ensure that any new
332   transaction is committed in order of binary log.
333   @return error code. */
334   int update_binlog_position();
335 
336   /** Wait for already prepared binlog transactions to end.
337   @return error code. */
338   int wait_for_binlog_prepared_trx();
339 
340   /** Wait for a transaction to end.
341   @param[in]	thd	current THD
342   @param[in]	trx_id	transaction to wait for
343   @return error code. */
344   int wait_trx_end(THD *thd, trx_id_t trx_id);
345 
346   /** Check if state transition is in progress
347   @return true during state transition */
in_transit_state()348   bool in_transit_state() {
349     mutex_own(&m_snapshot_mutex);
350     return (m_snapshot_next_state != CLONE_SNAPSHOT_NONE);
351   }
352 
353   /** Initialize current state
354   @param[in]	state_desc	descriptor for the state
355   @param[in]	temp_buffer	buffer used during page copy initialize
356   @param[in]	temp_buffer_len	buffer length
357   @param[in]	cbk		alert callback for long wait
358   @return error code */
359   int init_state(Clone_Desc_State *state_desc, byte *temp_buffer,
360                  uint temp_buffer_len, Clone_Alert_Func cbk);
361 
362   /** Initialize snapshot state for file copy
363   @return error code */
364   int init_file_copy();
365 
366   /** Initialize disk byte estimate. */
init_disk_estimate()367   void init_disk_estimate() {
368     /* Initial size is set to the redo file size on disk. */
369     m_data_bytes_disk = log_get_file_capacity(*log_sys);
370   }
371 
372   /** Initialize snapshot state for page copy
373   @param[in]	page_buffer	temporary buffer to copy page IDs
374   @param[in]	page_buffer_len	buffer length
375   @return error code */
376   int init_page_copy(byte *page_buffer, uint page_buffer_len);
377 
378   /** Initialize snapshot state for redo copy
379   @param[in]	cbk	alert callback for long wait
380   @return error code */
381   int init_redo_copy(Clone_Alert_Func cbk);
382 
383   /** Initialize state while applying cloned data
384   @param[in]	state_desc	snapshot state descriptor
385   @return error code */
386   int init_apply_state(Clone_Desc_State *state_desc);
387 
388   /** Extend and flush files after copying data
389   @param[in]	is_redo	if true flush redo, otherwise data
390   @return error code */
391   int extend_and_flush_files(bool is_redo);
392 
393   /** Create file descriptor and add to current file list
394   @param[in]	data_dir	destination data directory
395   @param[in,out]	file_desc	file descriptor
396   @return error code */
397   int create_desc(const char *data_dir, Clone_File_Meta *&file_desc);
398 
399   /** Get file metadata for current chunk
400   @param[in]	file_vector	clone file vector
401   @param[in]	num_files	total number of files
402   @param[in]	chunk_num	current chunk number
403   @param[in]	start_index	index for starting the search
404   @return file metadata */
405   Clone_File_Meta *get_file(Clone_File_Vec &file_vector, uint num_files,
406                             uint chunk_num, uint start_index);
407 
408   /** Get next page from buffer pool
409   @param[in]	chunk_num	current chunk
410   @param[in,out]	block_num	current, next block
411   @param[in]	file_meta	file metadata for page
412   @param[out]	data_offset	offset in file
413   @param[out]	data_buf	page data
414   @param[out]	data_size	page data size
415   @return error code */
416   int get_next_page(uint chunk_num, uint &block_num, Clone_File_Meta *file_meta,
417                     ib_uint64_t &data_offset, byte *&data_buf, uint &data_size);
418 
419   /** Get page from buffer pool and make ready for write
420   @param[in]	page_id		page ID chunk
421   @param[in]	page_size	page size descriptor
422   @param[in]	file_meta	file metadata for page
423   @param[out]	page_data	data page
424   @param[out]	data_size	page size in bytes
425   @return error code */
426   int get_page_for_write(const page_id_t &page_id, const page_size_t &page_size,
427                          Clone_File_Meta *file_meta, byte *&page_data,
428                          uint &data_size);
429 
430   /* Make page ready for flush by updating LSN anc checksum
431   @param[in]		page_size	page size descriptor
432   @param[in]		page_lsn	LSN to update the page with
433   @param[in,out]	page_data	data page */
434   void page_update_for_flush(const page_size_t &page_size, lsn_t page_lsn,
435                              byte *&page_data);
436 
437   /** Build file metadata entry
438   @param[in]	file_name	name of the file
439   @param[in]	file_size	file size in bytes
440   @param[in]	file_offset	start offset
441   @param[in]	num_chunks	total number of chunks in the file
442   @param[in]	copy_file_name	copy the file name or use reference
443   @return file metadata entry */
444   Clone_File_Meta *build_file(const char *file_name, uint64_t file_size,
445                               uint64_t file_offset, uint &num_chunks,
446                               bool copy_file_name);
447 
448   /** Add buffer pool dump file to the file list
449   @return error code */
450   int add_buf_pool_file();
451 
452   /** Add file to snapshot
453   @param[in]	name		file name
454   @param[in]	size_bytes	file size in bytes
455   @param[in]	alloc_bytes	allocation size on disk for sparse file
456   @param[in]	node		file node
457   @param[in]	copy_name	copy the file name or use reference
458   @return error code. */
459   int add_file(const char *name, uint64_t size_bytes, uint64_t alloc_bytes,
460                fil_node_t *node, bool copy_name);
461 
462   /** Get chunk size
463   @return chunk size in pages */
chunk_size()464   uint chunk_size() {
465     uint size;
466 
467     size = static_cast<uint>(ut_2_exp(m_chunk_size_pow2));
468     return (size);
469   }
470 
471   /** Get block size
472   @return block size in pages */
block_size()473   uint block_size() {
474     uint size;
475 
476     ut_a(m_block_size_pow2 <= SNAPSHOT_MAX_BLOCK_SIZE_POW2);
477     size = static_cast<uint>(ut_2_exp(m_block_size_pow2));
478 
479     return (size);
480   }
481 
482   /** Get number of blocks per chunk
483   @return blocks per chunk */
blocks_per_chunk()484   uint blocks_per_chunk() {
485     ut_a(m_block_size_pow2 <= m_chunk_size_pow2);
486     return (1 << (m_chunk_size_pow2 - m_block_size_pow2));
487   }
488 
489   /** Update file name in descriptor from configuration.
490   @param[in]		data_dir	clone data directory
491   @param[in,out]	file_desc	file descriptor
492   @param[in,out]	path		buffer for updated path
493   @param[in]		path_len	path buffer length
494   @return error code */
495   int update_file_name(const char *data_dir, Clone_File_Meta *file_desc,
496                        char *path, size_t path_len);
497 
498   /** Build file name along with path for cloned data files.
499   @param[in]		data_dir	clone data directory
500   @param[in]		alloc_size	new file size to be allocated
501   @param[in,out]	file_desc	file descriptor
502   @return error code */
503   int build_file_path(const char *data_dir, ulint alloc_size,
504                       Clone_File_Meta *&file_desc);
505 
506   /** Check for existing file and add clone extension.
507   @param[in]		replace		if data directory is replaced
508   @param[in,out]	file_desc	file descriptor
509   @return error code */
510   int handle_existing_file(bool replace, Clone_File_Meta *file_desc);
511 
512   /** Compute total length of cloned data file name and path.
513   @param[in]	data_dir	clone data directory
514   @param[in]	file_desc	file descriptor
515   @return total size in bytes */
516   size_t compute_path_length(const char *data_dir,
517                              const Clone_File_Meta *file_desc);
518 
519  private:
520   /** @name Snapshot type and ID */
521 
522   /** Snapshot handle type */
523   Clone_Handle_Type m_snapshot_handle_type;
524 
525   /** Clone type */
526   Ha_clone_type m_snapshot_type;
527 
528   /** Unique snapshot ID */
529   ib_uint64_t m_snapshot_id;
530 
531   /** Index in global snapshot array */
532   uint m_snapshot_arr_idx;
533 
534   /** @name Snapshot State  */
535 
536   /** Mutex to handle access by concurrent clones */
537   ib_mutex_t m_snapshot_mutex;
538 
539   /** Allow new clones to get attached to this snapshot */
540   bool m_allow_new_clone;
541 
542   /** Number of clones attached to this snapshot */
543   uint m_num_clones;
544 
545   /** Number of clones in current state */
546   uint m_num_clones_current;
547 
548   /** Number of clones moved over to next state */
549   uint m_num_clones_next;
550 
551   /** Current state */
552   Snapshot_State m_snapshot_state;
553 
554   /** Next state to move to. Set only during state transfer. */
555   Snapshot_State m_snapshot_next_state;
556 
557   /** @name Snapshot data block */
558 
559   /** Memory allocation heap */
560   mem_heap_t *m_snapshot_heap;
561 
562   /** Chunk size in power of 2 */
563   uint m_chunk_size_pow2;
564 
565   /** Block size in power of 2 */
566   uint m_block_size_pow2;
567 
568   /** Number of chunks in current state */
569   uint m_num_current_chunks;
570 
571   /** Maximum file name length observed till now. */
572   size_t m_max_file_name_len;
573 
574   /** @name Snapshot file data */
575 
576   /** All data files for transfer */
577   Clone_File_Vec m_data_file_vector;
578 
579   /** Map space ID to file vector index */
580   Clone_File_Map m_data_file_map;
581 
582   /** Number of data files to transfer */
583   uint m_num_data_files;
584 
585   /** Total number of data chunks */
586   uint m_num_data_chunks;
587 
588   /** Number of bytes on disk. */
589   uint64_t m_data_bytes_disk;
590 
591   /** Index into m_data_file_vector for all undo files. */
592   std::vector<int> m_undo_file_indexes;
593 
594   /** @name Snapshot page data */
595 
596   /** Page archiver client */
597   Page_Arch_Client_Ctx m_page_ctx;
598 
599   /** Set of unique page IDs */
600   Clone_Page_Set m_page_set;
601 
602   /** Sorted page IDs to transfer */
603   Clone_Page_Vec m_page_vector;
604 
605   /** Number of pages to transfer */
606   uint m_num_pages;
607 
608   /** Number of duplicate pages found */
609   uint m_num_duplicate_pages;
610 
611   /** @name Snapshot redo data */
612 
613   /** redo log archiver client */
614   Log_Arch_Client_Ctx m_redo_ctx;
615 
616   /** All archived redo files to transfer */
617   Clone_File_Vec m_redo_file_vector;
618 
619   /** Start offset in first redo file */
620   ib_uint64_t m_redo_start_offset;
621 
622   /** Redo header block */
623   byte *m_redo_header;
624 
625   /** Redo header size */
626   uint m_redo_header_size;
627 
628   /** Redo trailer block */
629   byte *m_redo_trailer;
630 
631   /** Redo trailer size */
632   uint m_redo_trailer_size;
633 
634   /** Redo trailer block offset */
635   ib_uint64_t m_redo_trailer_offset;
636 
637   /** Archived redo file size */
638   ib_uint64_t m_redo_file_size;
639 
640   /** Number of archived redo files to transfer */
641   uint m_num_redo_files;
642 
643   /** Total number of redo data chunks */
644   uint m_num_redo_chunks;
645 
646   /** Enable PFS monitoring */
647   bool m_enable_pfs;
648 
649   /** Performance Schema accounting object to monitor stage progess */
650   Clone_Monitor m_monitor;
651 };
652 
653 #endif /* CLONE_SNAPSHOT_INCLUDE */
654