1 /******************************************************
2 XtraBackup: hot backup tool for InnoDB
3 (c) 2009-2013 Percona LLC and/or its affiliates.
4 Originally Created 3/3/2009 Yasufumi Kinoshita
5 Written by Alexey Kopytov, Aleksandr Kuzminsky, Stewart Smith, Vadim Tkachenko,
6 Yasufumi Kinoshita, Ignacio Nin and Baron Schwartz.
7 
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; version 2 of the License.
11 
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
20 
21 *******************************************************/
22 
23 /* Source file cursor implementation */
24 
25 #include <my_base.h>
26 
27 #include <fil0fil.h>
28 #include <fsp0sysspace.h>
29 #include <srv0start.h>
30 #include <trx0sys.h>
31 #include <univ.i>
32 
33 #include "common.h"
34 #include "fil_cur.h"
35 #include "read_filt.h"
36 #include "xtrabackup.h"
37 
38 /***********************************************************************
39 Reads the space flags from a given data file and returns the
40 page size and whether the file is compressable. */
xb_get_zip_size(const char * file_name,pfs_os_file_t file,byte * buf,page_size_t & page_size,bool & is_compressable)41 static bool xb_get_zip_size(const char *file_name, pfs_os_file_t file,
42                             byte *buf, page_size_t &page_size,
43                             bool &is_compressable) {
44   IORequest read_request(IORequest::READ | IORequest::NO_COMPRESSION);
45   const auto ret =
46       os_file_read(read_request, file_name, file, buf, 0, UNIV_PAGE_SIZE_MIN);
47   if (!ret) {
48     return (false);
49   }
50 
51   space_id_t space = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
52   const auto flags = fsp_header_get_flags(buf);
53 
54   if (space == 0) {
55     page_size.copy_from(univ_page_size);
56   } else {
57     page_size.copy_from(page_size_t(flags));
58   }
59 
60   if (page_size.is_compressed() || FSP_FLAGS_GET_ENCRYPTION(flags)) {
61     is_compressable = false;
62   } else {
63     const auto ret = os_file_read(read_request, file_name, file, buf, 0,
64                                   page_size.physical() * 2);
65     if (!ret) {
66       return (false);
67     }
68     if (Compression::is_compressed_page(buf + page_size.physical())) {
69       is_compressable = false;
70     } else {
71       is_compressable = true;
72     }
73   }
74 
75   return (true);
76 }
77 
78 /***********************************************************************
79 Extracts the relative path ("database/table.ibd") of a tablespace from a
80 specified possibly absolute path.
81 
82 For user tablespaces both "./database/table.ibd" and
83 "/remote/dir/database/table.ibd" result in "database/table.ibd".
84 
85 For system tablepsaces (i.e. When is_system is TRUE) both "/remote/dir/ibdata1"
86 and "./ibdata1" yield "ibdata1" in the output. */
xb_get_relative_path(fil_space_t * space,const char * path)87 const char *xb_get_relative_path(
88     /*=================*/
89     fil_space_t *space, /*!< in: tablespace */
90     const char *path)   /*!< in: tablespace path (either
91                         relative or absolute) */
92 {
93   const char *next;
94   const char *cur;
95   const char *prev;
96 
97   bool is_shared =
98       space != nullptr ? FSP_FLAGS_GET_SHARED(space->flags) : false;
99   bool is_system =
100       space != nullptr ? fsp_is_system_or_temp_tablespace(space->id) : false;
101   bool is_undo = space != nullptr ? fsp_is_undo_tablespace(space->id) : false;
102 
103   prev = NULL;
104   cur = path;
105 
106   while ((next = strchr(cur, OS_PATH_SEPARATOR)) != NULL) {
107     prev = cur;
108     cur = next + 1;
109   }
110 
111   if (is_system) {
112     return (cur);
113   } else {
114     return ((prev == NULL || is_shared || is_undo) ? cur : prev);
115   }
116 }
117 
118 /************************************************************************
119 Open a source file cursor and initialize the associated read filter.
120 
121 @return XB_FIL_CUR_SUCCESS on success, XB_FIL_CUR_SKIP if the source file must
122 be skipped and XB_FIL_CUR_ERROR on error. */
xb_fil_cur_open(xb_fil_cur_t * cursor,xb_read_filt_t * read_filter,fil_node_t * node,uint thread_n)123 xb_fil_cur_result_t xb_fil_cur_open(
124     /*============*/
125     xb_fil_cur_t *cursor,        /*!< out: source file cursor */
126     xb_read_filt_t *read_filter, /*!< in/out: the read filter */
127     fil_node_t *node,            /*!< in: source tablespace node */
128     uint thread_n)               /*!< thread number for diagnostics */
129 {
130   page_size_t page_size(0, 0, false);
131   ulint page_size_shift;
132   bool success;
133 
134   /* Initialize these first so xb_fil_cur_close() handles them correctly
135   in case of error */
136   cursor->orig_buf = NULL;
137   cursor->node = NULL;
138 
139   cursor->space_id = node->space->id;
140   cursor->space_flags = node->space->flags;
141   cursor->is_system = fsp_is_system_or_temp_tablespace(node->space->id);
142   cursor->is_ibd = fsp_is_ibd_tablespace(node->space->id);
143 
144   strncpy(cursor->abs_path, node->name, sizeof(cursor->abs_path));
145 
146   /* Get the relative path for the destination tablespace name, i.e. the
147   one that can be appended to the backup root directory. Non-system
148   tablespaces may have absolute paths for remote tablespaces in MySQL
149   5.6+. We want to make "local" copies for the backup. */
150   strncpy(cursor->rel_path, xb_get_relative_path(node->space, cursor->abs_path),
151           sizeof(cursor->rel_path));
152 
153   /* In the backup mode we should already have a tablespace handle created
154   by fil_load_single_table_tablespace() unless it is a system
155   tablespace or srv_close_files is true. Otherwise we open the file here.
156   srv_close_files has an effect only on IBD tablespaces. */
157   if (cursor->is_system || !srv_backup_mode ||
158       (srv_close_files && cursor->is_ibd)) {
159     node->handle = os_file_create_simple_no_error_handling(
160         0, node->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, srv_read_only_mode,
161         &success);
162     if (!success) {
163       /* The following call prints an error message */
164       os_file_get_last_error(TRUE);
165 
166       msg("[%02u] xtrabackup: error: cannot open tablespace %s\n", thread_n,
167           cursor->abs_path);
168 
169       return (XB_FIL_CUR_ERROR);
170     }
171 
172     fil_node_open_file(node);
173   }
174 
175   ut_ad(node->is_open);
176 
177   cursor->node = node;
178   cursor->file = node->handle;
179 
180   if (my_fstat(cursor->file.m_file, &cursor->statinfo)) {
181     msg("[%02u] xtrabackup: error: cannot stat %s\n", thread_n,
182         cursor->abs_path);
183 
184     xb_fil_cur_close(cursor);
185 
186     return (XB_FIL_CUR_ERROR);
187   }
188 
189   if (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT ||
190       srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC) {
191     os_file_set_nocache(cursor->file.m_file, node->name, "OPEN");
192   }
193 
194   posix_fadvise(cursor->file.m_file, 0, 0, POSIX_FADV_SEQUENTIAL);
195 
196   /* Allocate read buffer */
197   ut_a(opt_read_buffer_size >= UNIV_PAGE_SIZE);
198   cursor->buf_size = opt_read_buffer_size;
199   cursor->orig_buf =
200       static_cast<byte *>(ut_malloc_nokey(cursor->buf_size + UNIV_PAGE_SIZE));
201   cursor->buf = static_cast<byte *>(ut_align(cursor->orig_buf, UNIV_PAGE_SIZE));
202 
203   /* Determine the page size */
204   if (!xb_get_zip_size(cursor->rel_path, cursor->file, cursor->buf, page_size,
205                        cursor->is_compressable)) {
206     xb_fil_cur_close(cursor);
207     return (XB_FIL_CUR_SKIP);
208   } else if (page_size.is_compressed()) {
209     page_size_shift = get_bit_shift(page_size.physical());
210     msg("[%02u] %s is compressed with page size = "
211         "%u bytes\n",
212         thread_n, node->name, (uint)page_size.physical());
213     if (page_size_shift < 10 || page_size_shift > 14) {
214       msg("[%02u] xtrabackup: Error: Invalid "
215           "page size: %u.\n",
216           thread_n, (uint)page_size.physical());
217       ut_error;
218     }
219   } else {
220     page_size_shift = UNIV_PAGE_SIZE_SHIFT;
221   }
222   cursor->page_size = page_size.physical();
223   cursor->page_size_shift = page_size_shift;
224   cursor->zip_size = page_size.is_compressed() ? page_size.physical() : 0;
225 
226   cursor->buf_read = 0;
227   cursor->buf_npages = 0;
228   cursor->buf_offset = 0;
229   cursor->buf_page_no = 0;
230   cursor->thread_n = thread_n;
231 
232   cursor->space_size = cursor->statinfo.st_size / page_size.physical();
233   cursor->block_size = node->block_size;
234 
235   cursor->read_filter = read_filter;
236   cursor->read_filter->init(&cursor->read_filter_ctxt, cursor, node->space->id);
237 
238   cursor->scratch = static_cast<byte *>(ut_malloc_nokey(cursor->page_size * 2));
239   cursor->decrypt = static_cast<byte *>(ut_malloc_nokey(cursor->page_size));
240 
241   memcpy(cursor->encryption_key, node->space->encryption_key,
242          sizeof(cursor->encryption_key));
243   memcpy(cursor->encryption_iv, node->space->encryption_iv,
244          sizeof(cursor->encryption_iv));
245   cursor->encryption_klen = node->space->encryption_klen;
246   cursor->block_size = node->block_size;
247 
248   return (XB_FIL_CUR_SUCCESS);
249 }
250 
is_page_corrupted(bool check_lsn,const byte * read_buf,const page_size_t & page_size,bool skip_checksum)251 static bool is_page_corrupted(bool check_lsn, const byte *read_buf,
252                               const page_size_t &page_size,
253                               bool skip_checksum) {
254   BlockReporter reporter =
255       BlockReporter(check_lsn, read_buf, page_size, skip_checksum);
256   return reporter.is_corrupted();
257 }
258 
259 /************************************************************************
260 Reads and verifies the next block of pages from the source
261 file. Positions the cursor after the last read non-corrupted page.
262 
263 @return XB_FIL_CUR_SUCCESS if some have been read successfully, XB_FIL_CUR_EOF
264 if there are no more pages to read and XB_FIL_CUR_ERROR on error. */
xb_fil_cur_read(xb_fil_cur_t * cursor)265 xb_fil_cur_result_t xb_fil_cur_read(
266     /*============*/
267     xb_fil_cur_t *cursor) /*!< in/out: source file cursor */
268 {
269   dberr_t err;
270   byte *page, *page_to_check;
271   ulint i;
272   ulint npages;
273   ulint retry_count;
274   xb_fil_cur_result_t ret;
275   ib_uint64_t offset;
276   ib_uint64_t to_read;
277   ulong n_read;
278   page_size_t page_size(
279       cursor->zip_size != 0 ? cursor->zip_size : cursor->page_size,
280       cursor->page_size, cursor->zip_size != 0);
281   IORequest read_request(IORequest::READ | IORequest::NO_COMPRESSION);
282 
283   cursor->read_filter->get_next_batch(&cursor->read_filter_ctxt, &offset,
284                                       &to_read);
285 
286   if (to_read == 0LL) {
287     return (XB_FIL_CUR_EOF);
288   }
289 
290   if (to_read > (ib_uint64_t)cursor->buf_size) {
291     to_read = (ib_uint64_t)cursor->buf_size;
292   }
293 
294   xb_a(to_read > 0 && to_read <= 0xFFFFFFFFLL);
295 
296   if (to_read % cursor->page_size != 0 &&
297       offset + to_read == (ib_uint64_t)cursor->statinfo.st_size) {
298     if (to_read < (ib_uint64_t)cursor->page_size) {
299       msg("[%02u] xtrabackup: Warning: junk at the end of %s:\n",
300           cursor->thread_n, cursor->abs_path);
301       msg("[%02u] xtrabackup: Warning: offset = %llu, to_read = %llu\n",
302           cursor->thread_n, (unsigned long long)offset,
303           (unsigned long long)to_read);
304 
305       return (XB_FIL_CUR_EOF);
306     }
307 
308     to_read = (ib_uint64_t)(((ulint)to_read) & ~(cursor->page_size - 1));
309   }
310 
311   xb_a(to_read % cursor->page_size == 0);
312 
313   retry_count = 10;
314   ret = XB_FIL_CUR_SUCCESS;
315 
316 read_retry:
317   xtrabackup_io_throttling();
318 
319   cursor->buf_read = 0;
320   cursor->buf_npages = 0;
321   cursor->buf_offset = offset;
322   cursor->buf_page_no = (ulint)(offset >> cursor->page_size_shift);
323 
324   err = os_file_read_no_error_handling(read_request, cursor->rel_path,
325                                        cursor->file, cursor->buf, offset,
326                                        to_read, &n_read);
327   if (err != DB_SUCCESS) {
328     if (err == DB_IO_ERROR) {
329       /* If the file is truncated by MySQL, os_file_read will
330       fail with DB_IO_ERROR, but XtraBackup must treat this
331       error as non critical. */
332       if (my_fstat(cursor->file.m_file, &cursor->statinfo)) {
333         msg("[%02u] xtrabackup: error: cannot stat %s\n", cursor->thread_n,
334             cursor->abs_path);
335         return (XB_FIL_CUR_ERROR);
336       }
337       /* Check if we reached EOF */
338       if ((ulonglong)cursor->statinfo.st_size > offset + n_read) {
339         return (XB_FIL_CUR_ERROR);
340       }
341     }
342     return (XB_FIL_CUR_ERROR);
343   }
344 
345   read_request.encryption_algorithm(Encryption::AES);
346   read_request.encryption_key(cursor->encryption_key, cursor->encryption_klen,
347                               cursor->encryption_iv);
348   read_request.block_size(cursor->block_size);
349 
350   npages = n_read >> cursor->page_size_shift;
351 
352   /* check pages for corruption and re-read if necessary. i.e. in case of
353   partially written pages */
354   for (page = cursor->buf, i = 0; i < npages; page += cursor->page_size, i++) {
355     page_to_check = page;
356     if (Encryption::is_encrypted_page(page)) {
357       Encryption encryption(read_request.encryption_algorithm());
358 
359       page_to_check = cursor->decrypt;
360       memcpy(cursor->decrypt, page, cursor->page_size);
361 
362       const auto ret =
363           encryption.decrypt(read_request, cursor->decrypt, cursor->page_size,
364                              cursor->scratch, cursor->page_size);
365       if (ret != DB_SUCCESS) {
366         goto corruption;
367       }
368 
369       if (Compression::is_compressed_page(cursor->decrypt)) {
370         if (os_file_decompress_page(false, cursor->decrypt, cursor->scratch,
371                                     cursor->page_size) != DB_SUCCESS) {
372           goto corruption;
373         }
374       }
375     }
376 
377     if (Compression::is_compressed_page(page)) {
378       page_to_check = cursor->decrypt;
379       memcpy(cursor->decrypt, page, cursor->page_size);
380       if (os_file_decompress_page(false, cursor->decrypt, cursor->scratch,
381                                   cursor->page_size) != DB_SUCCESS) {
382         goto corruption;
383       }
384     }
385 
386     if (is_page_corrupted(true, page_to_check, page_size, false)) {
387     corruption:
388 
389       ulint page_no = cursor->buf_page_no + i;
390 
391       if (cursor->is_system && page_no >= FSP_EXTENT_SIZE &&
392           page_no < FSP_EXTENT_SIZE * 3) {
393         /* skip doublewrite buffer pages */
394         xb_a(cursor->page_size == UNIV_PAGE_SIZE);
395         msg("[%02u] xtrabackup: Page %lu is a doublewrite buffer page, "
396             "skipping.\n",
397             cursor->thread_n, page_no);
398       } else {
399         retry_count--;
400         if (retry_count == 0) {
401           msg("[%02u] xtrabackup: Error: failed to read page after 10 retries. "
402               "File %s seems to be corrupted.\n",
403               cursor->thread_n, cursor->abs_path);
404           ret = XB_FIL_CUR_ERROR;
405           break;
406         }
407         msg("[%02u] xtrabackup: Database page corruption detected at page %lu, "
408             "retrying...\n",
409             cursor->thread_n, page_no);
410 
411         os_thread_sleep(100000);
412 
413         goto read_retry;
414       }
415     }
416     cursor->buf_read += cursor->page_size;
417     cursor->buf_npages++;
418   }
419 
420   cursor->read_filter->update(&cursor->read_filter_ctxt, n_read, cursor);
421 
422   posix_fadvise(cursor->file.m_file, offset, to_read, POSIX_FADV_DONTNEED);
423 
424   return (ret);
425 }
426 
427 /************************************************************************
428 Close the source file cursor opened with xb_fil_cur_open() and its
429 associated read filter. */
xb_fil_cur_close(xb_fil_cur_t * cursor)430 void xb_fil_cur_close(
431     /*=============*/
432     xb_fil_cur_t *cursor) /*!< in/out: source file cursor */
433 {
434   cursor->read_filter->deinit(&cursor->read_filter_ctxt);
435 
436   ut_free(cursor->scratch);
437   ut_free(cursor->decrypt);
438   ut_free(cursor->orig_buf);
439   if (cursor->node != NULL) {
440     fil_node_close_file(cursor->node);
441     cursor->file = XB_FILE_UNDEFINED;
442   }
443 }
444