1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file mtr/mtr0log.cc
29 Mini-transaction log routines
30 
31 Created 12/7/1995 Heikki Tuuri
32 *******************************************************/
33 
34 #include "mtr0log.h"
35 
36 #ifdef UNIV_NONINL
37 #include "mtr0log.ic"
38 #endif
39 
40 #include "buf0buf.h"
41 #include "dict0dict.h"
42 #include "log0recv.h"
43 #include "page0page.h"
44 
45 #ifndef UNIV_HOTBACKUP
46 # include "dict0boot.h"
47 
48 /********************************************************//**
49 Catenates n bytes to the mtr log. */
50 UNIV_INTERN
51 void
mlog_catenate_string(mtr_t * mtr,const byte * str,ulint len)52 mlog_catenate_string(
53 /*=================*/
54 	mtr_t*		mtr,	/*!< in: mtr */
55 	const byte*	str,	/*!< in: string to write */
56 	ulint		len)	/*!< in: string length */
57 {
58 	dyn_array_t*	mlog;
59 
60 	if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
61 
62 		return;
63 	}
64 
65 	mlog = &(mtr->log);
66 
67 	dyn_push_string(mlog, str, len);
68 }
69 
70 /********************************************************//**
71 Writes the initial part of a log record consisting of one-byte item
72 type and four-byte space and page numbers. Also pushes info
73 to the mtr memo that a buffer page has been modified. */
74 UNIV_INTERN
75 void
mlog_write_initial_log_record(const byte * ptr,byte type,mtr_t * mtr)76 mlog_write_initial_log_record(
77 /*==========================*/
78 	const byte*	ptr,	/*!< in: pointer to (inside) a buffer
79 				frame holding the file page where
80 				modification is made */
81 	byte		type,	/*!< in: log item type: MLOG_1BYTE, ... */
82 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
83 {
84 	byte*	log_ptr;
85 
86 	ut_ad(type <= MLOG_BIGGEST_TYPE);
87 	ut_ad(type > MLOG_8BYTES);
88 
89 	log_ptr = mlog_open(mtr, 11);
90 
91 	/* If no logging is requested, we may return now */
92 	if (log_ptr == NULL) {
93 
94 		return;
95 	}
96 
97 	log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
98 
99 	mlog_close(mtr, log_ptr);
100 }
101 #endif /* !UNIV_HOTBACKUP */
102 
103 /********************************************************//**
104 Parses an initial log record written by mlog_write_initial_log_record.
105 @return	parsed record end, NULL if not a complete record */
106 UNIV_INTERN
107 byte*
mlog_parse_initial_log_record(byte * ptr,byte * end_ptr,byte * type,ulint * space,ulint * page_no)108 mlog_parse_initial_log_record(
109 /*==========================*/
110 	byte*	ptr,	/*!< in: buffer */
111 	byte*	end_ptr,/*!< in: buffer end */
112 	byte*	type,	/*!< out: log record type: MLOG_1BYTE, ... */
113 	ulint*	space,	/*!< out: space id */
114 	ulint*	page_no)/*!< out: page number */
115 {
116 	if (end_ptr < ptr + 1) {
117 
118 		return(NULL);
119 	}
120 
121 	*type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG);
122 	ut_ad(*type <= MLOG_BIGGEST_TYPE);
123 
124 	ptr++;
125 
126 	if (end_ptr < ptr + 2) {
127 
128 		return(NULL);
129 	}
130 
131 	ptr = mach_parse_compressed(ptr, end_ptr, space);
132 
133 	if (ptr == NULL) {
134 
135 		return(NULL);
136 	}
137 
138 	ptr = mach_parse_compressed(ptr, end_ptr, page_no);
139 
140 	return(ptr);
141 }
142 
143 /********************************************************//**
144 Parses a log record written by mlog_write_ulint or mlog_write_ull.
145 @return	parsed record end, NULL if not a complete record or a corrupt record */
146 UNIV_INTERN
147 byte*
mlog_parse_nbytes(ulint type,byte * ptr,byte * end_ptr,byte * page,void * page_zip)148 mlog_parse_nbytes(
149 /*==============*/
150 	ulint	type,	/*!< in: log record type: MLOG_1BYTE, ... */
151 	byte*	ptr,	/*!< in: buffer */
152 	byte*	end_ptr,/*!< in: buffer end */
153 	byte*	page,	/*!< in: page where to apply the log record, or NULL */
154 	void*	page_zip)/*!< in/out: compressed page, or NULL */
155 {
156 	ulint		offset;
157 	ulint		val;
158 	ib_uint64_t	dval;
159 
160 	ut_a(type <= MLOG_8BYTES);
161 	ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
162 
163 	if (end_ptr < ptr + 2) {
164 
165 		return(NULL);
166 	}
167 
168 	offset = mach_read_from_2(ptr);
169 	ptr += 2;
170 
171 	if (offset >= UNIV_PAGE_SIZE) {
172 		recv_sys->found_corrupt_log = TRUE;
173 
174 		return(NULL);
175 	}
176 
177 	if (type == MLOG_8BYTES) {
178 		ptr = mach_ull_parse_compressed(ptr, end_ptr, &dval);
179 
180 		if (ptr == NULL) {
181 
182 			return(NULL);
183 		}
184 
185 		if (page) {
186 			if (page_zip) {
187 				mach_write_to_8
188 					(((page_zip_des_t*) page_zip)->data
189 					 + offset, dval);
190 			}
191 			mach_write_to_8(page + offset, dval);
192 		}
193 
194 		return(ptr);
195 	}
196 
197 	ptr = mach_parse_compressed(ptr, end_ptr, &val);
198 
199 	if (ptr == NULL) {
200 
201 		return(NULL);
202 	}
203 
204 	switch (type) {
205 	case MLOG_1BYTE:
206 		if (UNIV_UNLIKELY(val > 0xFFUL)) {
207 			goto corrupt;
208 		}
209 		if (page) {
210 			if (page_zip) {
211 				mach_write_to_1
212 					(((page_zip_des_t*) page_zip)->data
213 					 + offset, val);
214 			}
215 			mach_write_to_1(page + offset, val);
216 		}
217 		break;
218 	case MLOG_2BYTES:
219 		if (UNIV_UNLIKELY(val > 0xFFFFUL)) {
220 			goto corrupt;
221 		}
222 		if (page) {
223 			if (page_zip) {
224 				mach_write_to_2
225 					(((page_zip_des_t*) page_zip)->data
226 					 + offset, val);
227 			}
228 			mach_write_to_2(page + offset, val);
229 		}
230 		break;
231 	case MLOG_4BYTES:
232 		if (page) {
233 			if (page_zip) {
234 				mach_write_to_4
235 					(((page_zip_des_t*) page_zip)->data
236 					 + offset, val);
237 			}
238 			mach_write_to_4(page + offset, val);
239 		}
240 		break;
241 	default:
242 	corrupt:
243 		recv_sys->found_corrupt_log = TRUE;
244 		ptr = NULL;
245 	}
246 
247 	return(ptr);
248 }
249 
250 /********************************************************//**
251 Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
252 record to the mini-transaction log if mtr is not NULL. */
253 UNIV_INTERN
254 void
mlog_write_ulint(byte * ptr,ulint val,byte type,mtr_t * mtr)255 mlog_write_ulint(
256 /*=============*/
257 	byte*	ptr,	/*!< in: pointer where to write */
258 	ulint	val,	/*!< in: value to write */
259 	byte	type,	/*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
260 	mtr_t*	mtr)	/*!< in: mini-transaction handle */
261 {
262 	switch (type) {
263 	case MLOG_1BYTE:
264 		mach_write_to_1(ptr, val);
265 		break;
266 	case MLOG_2BYTES:
267 		mach_write_to_2(ptr, val);
268 		break;
269 	case MLOG_4BYTES:
270 		mach_write_to_4(ptr, val);
271 		break;
272 	default:
273 		ut_error;
274 	}
275 
276 	if (mtr != 0) {
277 		byte*	log_ptr = mlog_open(mtr, 11 + 2 + 5);
278 
279 		/* If no logging is requested, we may return now */
280 
281 		if (log_ptr != 0) {
282 
283 			log_ptr = mlog_write_initial_log_record_fast(
284 				ptr, type, log_ptr, mtr);
285 
286 			mach_write_to_2(log_ptr, page_offset(ptr));
287 			log_ptr += 2;
288 
289 			log_ptr += mach_write_compressed(log_ptr, val);
290 
291 			mlog_close(mtr, log_ptr);
292 		}
293 	}
294 }
295 
296 /********************************************************//**
297 Writes 8 bytes to a file page. Writes the corresponding log
298 record to the mini-transaction log, only if mtr is not NULL */
299 UNIV_INTERN
300 void
mlog_write_ull(byte * ptr,ib_uint64_t val,mtr_t * mtr)301 mlog_write_ull(
302 /*===========*/
303 	byte*		ptr,	/*!< in: pointer where to write */
304 	ib_uint64_t	val,	/*!< in: value to write */
305 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
306 {
307 	mach_write_to_8(ptr, val);
308 
309 	if (mtr != 0) {
310 		byte*	log_ptr = mlog_open(mtr, 11 + 2 + 9);
311 
312 		/* If no logging is requested, we may return now */
313 		if (log_ptr != 0) {
314 
315 			log_ptr = mlog_write_initial_log_record_fast(
316 				ptr, MLOG_8BYTES, log_ptr, mtr);
317 
318 			mach_write_to_2(log_ptr, page_offset(ptr));
319 			log_ptr += 2;
320 
321 			log_ptr += mach_ull_write_compressed(log_ptr, val);
322 
323 			mlog_close(mtr, log_ptr);
324 		}
325 	}
326 }
327 
328 #ifndef UNIV_HOTBACKUP
329 /********************************************************//**
330 Writes a string to a file page buffered in the buffer pool. Writes the
331 corresponding log record to the mini-transaction log. */
332 UNIV_INTERN
333 void
mlog_write_string(byte * ptr,const byte * str,ulint len,mtr_t * mtr)334 mlog_write_string(
335 /*==============*/
336 	byte*		ptr,	/*!< in: pointer where to write */
337 	const byte*	str,	/*!< in: string to write */
338 	ulint		len,	/*!< in: string length */
339 	mtr_t*		mtr)	/*!< in: mini-transaction handle */
340 {
341 	ut_ad(ptr && mtr);
342 	ut_a(len < UNIV_PAGE_SIZE);
343 
344 	memcpy(ptr, str, len);
345 
346 	mlog_log_string(ptr, len, mtr);
347 }
348 
349 /********************************************************//**
350 Logs a write of a string to a file page buffered in the buffer pool.
351 Writes the corresponding log record to the mini-transaction log. */
352 UNIV_INTERN
353 void
mlog_log_string(byte * ptr,ulint len,mtr_t * mtr)354 mlog_log_string(
355 /*============*/
356 	byte*	ptr,	/*!< in: pointer written to */
357 	ulint	len,	/*!< in: string length */
358 	mtr_t*	mtr)	/*!< in: mini-transaction handle */
359 {
360 	byte*	log_ptr;
361 
362 	ut_ad(ptr && mtr);
363 	ut_ad(len <= UNIV_PAGE_SIZE);
364 
365 	log_ptr = mlog_open(mtr, 30);
366 
367 	/* If no logging is requested, we may return now */
368 	if (log_ptr == NULL) {
369 
370 		return;
371 	}
372 
373 	log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING,
374 						     log_ptr, mtr);
375 	mach_write_to_2(log_ptr, page_offset(ptr));
376 	log_ptr += 2;
377 
378 	mach_write_to_2(log_ptr, len);
379 	log_ptr += 2;
380 
381 	mlog_close(mtr, log_ptr);
382 
383 	mlog_catenate_string(mtr, ptr, len);
384 }
385 #endif /* !UNIV_HOTBACKUP */
386 
387 /********************************************************//**
388 Parses a log record written by mlog_write_string.
389 @return	parsed record end, NULL if not a complete record */
390 UNIV_INTERN
391 byte*
mlog_parse_string(byte * ptr,byte * end_ptr,byte * page,void * page_zip)392 mlog_parse_string(
393 /*==============*/
394 	byte*	ptr,	/*!< in: buffer */
395 	byte*	end_ptr,/*!< in: buffer end */
396 	byte*	page,	/*!< in: page where to apply the log record, or NULL */
397 	void*	page_zip)/*!< in/out: compressed page, or NULL */
398 {
399 	ulint	offset;
400 	ulint	len;
401 
402 	ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
403 
404 	if (end_ptr < ptr + 4) {
405 
406 		return(NULL);
407 	}
408 
409 	offset = mach_read_from_2(ptr);
410 	ptr += 2;
411 	len = mach_read_from_2(ptr);
412 	ptr += 2;
413 
414 	if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
415 	    || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) {
416 		recv_sys->found_corrupt_log = TRUE;
417 
418 		return(NULL);
419 	}
420 
421 	if (end_ptr < ptr + len) {
422 
423 		return(NULL);
424 	}
425 
426 	if (page) {
427 		if (page_zip) {
428 			memcpy(((page_zip_des_t*) page_zip)->data
429 				+ offset, ptr, len);
430 		}
431 		memcpy(page + offset, ptr, len);
432 	}
433 
434 	return(ptr + len);
435 }
436 
437 #ifndef UNIV_HOTBACKUP
438 /********************************************************//**
439 Opens a buffer for mlog, writes the initial log record and,
440 if needed, the field lengths of an index.
441 @return	buffer, NULL if log mode MTR_LOG_NONE */
442 UNIV_INTERN
443 byte*
mlog_open_and_write_index(mtr_t * mtr,const byte * rec,const dict_index_t * index,byte type,ulint size)444 mlog_open_and_write_index(
445 /*======================*/
446 	mtr_t*			mtr,	/*!< in: mtr */
447 	const byte*		rec,	/*!< in: index record or page */
448 	const dict_index_t*	index,	/*!< in: record descriptor */
449 	byte			type,	/*!< in: log item type */
450 	ulint			size)	/*!< in: requested buffer size in bytes
451 					(if 0, calls mlog_close() and
452 					returns NULL) */
453 {
454 	byte*		log_ptr;
455 	const byte*	log_start;
456 	const byte*	log_end;
457 
458 	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
459 
460 	if (!page_rec_is_comp(rec)) {
461 		log_start = log_ptr = mlog_open(mtr, 11 + size);
462 		if (!log_ptr) {
463 			return(NULL); /* logging is disabled */
464 		}
465 		log_ptr = mlog_write_initial_log_record_fast(rec, type,
466 							     log_ptr, mtr);
467 		log_end = log_ptr + 11 + size;
468 	} else {
469 		ulint	i;
470 		ulint	n	= dict_index_get_n_fields(index);
471 		/* total size needed */
472 		ulint	total	= 11 + size + (n + 2) * 2;
473 		ulint	alloc	= total;
474 		/* allocate at most DYN_ARRAY_DATA_SIZE at a time */
475 		if (alloc > DYN_ARRAY_DATA_SIZE) {
476 			alloc = DYN_ARRAY_DATA_SIZE;
477 		}
478 		log_start = log_ptr = mlog_open(mtr, alloc);
479 		if (!log_ptr) {
480 			return(NULL); /* logging is disabled */
481 		}
482 		log_end = log_ptr + alloc;
483 		log_ptr = mlog_write_initial_log_record_fast(rec, type,
484 							     log_ptr, mtr);
485 		mach_write_to_2(log_ptr, n);
486 		log_ptr += 2;
487 		mach_write_to_2(log_ptr,
488 				dict_index_get_n_unique_in_tree(index));
489 		log_ptr += 2;
490 		for (i = 0; i < n; i++) {
491 			dict_field_t*		field;
492 			const dict_col_t*	col;
493 			ulint			len;
494 
495 			field = dict_index_get_nth_field(index, i);
496 			col = dict_field_get_col(field);
497 			len = field->fixed_len;
498 			ut_ad(len < 0x7fff);
499 			if (len == 0
500 			    && (col->len > 255 || col->mtype == DATA_BLOB)) {
501 				/* variable-length field
502 				with maximum length > 255 */
503 				len = 0x7fff;
504 			}
505 			if (col->prtype & DATA_NOT_NULL) {
506 				len |= 0x8000;
507 			}
508 			if (log_ptr + 2 > log_end) {
509 				mlog_close(mtr, log_ptr);
510 				ut_a(total > (ulint) (log_ptr - log_start));
511 				total -= log_ptr - log_start;
512 				alloc = total;
513 				if (alloc > DYN_ARRAY_DATA_SIZE) {
514 					alloc = DYN_ARRAY_DATA_SIZE;
515 				}
516 				log_start = log_ptr = mlog_open(mtr, alloc);
517 				if (!log_ptr) {
518 					return(NULL); /* logging is disabled */
519 				}
520 				log_end = log_ptr + alloc;
521 			}
522 			mach_write_to_2(log_ptr, len);
523 			log_ptr += 2;
524 		}
525 	}
526 	if (size == 0) {
527 		mlog_close(mtr, log_ptr);
528 		log_ptr = NULL;
529 	} else if (log_ptr + size > log_end) {
530 		mlog_close(mtr, log_ptr);
531 		log_ptr = mlog_open(mtr, size);
532 	}
533 	return(log_ptr);
534 }
535 #endif /* !UNIV_HOTBACKUP */
536 
537 /********************************************************//**
538 Parses a log record written by mlog_open_and_write_index.
539 @return	parsed record end, NULL if not a complete record */
540 UNIV_INTERN
541 byte*
mlog_parse_index(byte * ptr,const byte * end_ptr,ibool comp,dict_index_t ** index)542 mlog_parse_index(
543 /*=============*/
544 	byte*		ptr,	/*!< in: buffer */
545 	const byte*	end_ptr,/*!< in: buffer end */
546 	ibool		comp,	/*!< in: TRUE=compact row format */
547 	dict_index_t**	index)	/*!< out, own: dummy index */
548 {
549 	ulint		i, n, n_uniq;
550 	dict_table_t*	table;
551 	dict_index_t*	ind;
552 
553 	ut_ad(comp == FALSE || comp == TRUE);
554 
555 	if (comp) {
556 		if (end_ptr < ptr + 4) {
557 			return(NULL);
558 		}
559 		n = mach_read_from_2(ptr);
560 		ptr += 2;
561 		n_uniq = mach_read_from_2(ptr);
562 		ptr += 2;
563 		ut_ad(n_uniq <= n);
564 		if (end_ptr < ptr + n * 2) {
565 			return(NULL);
566 		}
567 	} else {
568 		n = n_uniq = 1;
569 	}
570 	table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n,
571 				      comp ? DICT_TF_COMPACT : 0, 0);
572 	ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY",
573 				    DICT_HDR_SPACE, 0, n);
574 	ind->table = table;
575 	ind->n_uniq = (unsigned int) n_uniq;
576 	if (n_uniq != n) {
577 		ut_a(n_uniq + DATA_ROLL_PTR <= n);
578 		ind->type = DICT_CLUSTERED;
579 	}
580 	if (comp) {
581 		for (i = 0; i < n; i++) {
582 			ulint	len = mach_read_from_2(ptr);
583 			ptr += 2;
584 			/* The high-order bit of len is the NOT NULL flag;
585 			the rest is 0 or 0x7fff for variable-length fields,
586 			and 1..0x7ffe for fixed-length fields. */
587 			dict_mem_table_add_col(
588 				table, NULL, NULL,
589 				((len + 1) & 0x7fff) <= 1
590 				? DATA_BINARY : DATA_FIXBINARY,
591 				len & 0x8000 ? DATA_NOT_NULL : 0,
592 				len & 0x7fff);
593 
594 			dict_index_add_col(ind, table,
595 					   dict_table_get_nth_col(table, i),
596 					   0);
597 		}
598 		dict_table_add_system_columns(table, table->heap);
599 		if (n_uniq != n) {
600 			/* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */
601 			ut_a(DATA_TRX_ID_LEN
602 			     == dict_index_get_nth_col(ind, DATA_TRX_ID - 1
603 						       + n_uniq)->len);
604 			ut_a(DATA_ROLL_PTR_LEN
605 			     == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1
606 						       + n_uniq)->len);
607 			ind->fields[DATA_TRX_ID - 1 + n_uniq].col
608 				= &table->cols[n + DATA_TRX_ID];
609 			ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col
610 				= &table->cols[n + DATA_ROLL_PTR];
611 		}
612 	}
613 	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
614 	ind->cached = TRUE;
615 	*index = ind;
616 	return(ptr);
617 }
618