1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15
16 /*
17 Locking of Maria-tables.
18 Must be first request before doing any furter calls to any Maria function.
19 Is used to allow many process use the same non transactional Maria table
20 */
21
22 #include "ma_ftdefs.h"
23
24 /* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
25
maria_lock_database(MARIA_HA * info,int lock_type)26 int maria_lock_database(MARIA_HA *info, int lock_type)
27 {
28 int error;
29 uint count;
30 MARIA_SHARE *share= info->s;
31 DBUG_ENTER("maria_lock_database");
32 DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u "
33 "global_changed: %d open_count: %u name: '%s'",
34 lock_type, info->lock_type, share->r_locks,
35 share->w_locks,
36 share->global_changed, share->state.open_count,
37 share->index_file_name.str));
38 if (share->options & HA_OPTION_READ_ONLY_DATA ||
39 info->lock_type == lock_type)
40 DBUG_RETURN(0);
41 if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */
42 {
43 ++share->w_locks;
44 ++share->tot_locks;
45 info->lock_type= lock_type;
46 DBUG_RETURN(0);
47 }
48
49 error=0;
50 if (!info->intern_lock_locked)
51 mysql_mutex_lock(&share->intern_lock);
52 if (share->kfile.file >= 0) /* May only be false on windows */
53 {
54 switch (lock_type) {
55 case F_UNLCK:
56 maria_ftparser_call_deinitializer(info);
57 if (info->lock_type == F_RDLCK)
58 {
59 count= --share->r_locks;
60 if (share->lock_restore_status)
61 (*share->lock_restore_status)(info);
62 }
63 else
64 {
65 count= --share->w_locks;
66 if (share->lock.update_status)
67 _ma_update_status_with_lock(info);
68 }
69 --share->tot_locks;
70 if (info->lock_type == F_WRLCK && !share->w_locks)
71 {
72 /* pages of transactional tables get flushed at Checkpoint */
73 if (!share->base.born_transactional && !share->temporary &&
74 _ma_flush_table_files(info,
75 share->delay_key_write ? MARIA_FLUSH_DATA :
76 MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
77 FLUSH_KEEP, FLUSH_KEEP))
78 error= my_errno;
79 }
80 if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
81 {
82 if (end_io_cache(&info->rec_cache))
83 {
84 error= my_errno;
85 _ma_set_fatal_error(share, error);
86 }
87 }
88 if (!count)
89 {
90 DBUG_PRINT("info",("changed: %u w_locks: %u",
91 (uint) share->changed, share->w_locks));
92 if (share->changed && !share->w_locks)
93 {
94 #ifdef HAVE_MMAP
95 if ((share->mmaped_length !=
96 share->state.state.data_file_length) &&
97 (share->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
98 {
99 if (share->lock_key_trees)
100 mysql_rwlock_wrlock(&share->mmap_lock);
101 _ma_remap_file(info, share->state.state.data_file_length);
102 share->nonmmaped_inserts= 0;
103 if (share->lock_key_trees)
104 mysql_rwlock_unlock(&share->mmap_lock);
105 }
106 #endif
107 #ifdef MARIA_EXTERNAL_LOCKING
108 share->state.process= share->last_process=share->this_process;
109 share->state.unique= info->last_unique= info->this_unique;
110 share->state.update_count= info->last_loop= ++info->this_loop;
111 #endif
112 /* transactional tables rather flush their state at Checkpoint */
113 if (!share->base.born_transactional)
114 {
115 if (_ma_state_info_write_sub(share->kfile.file, &share->state,
116 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET))
117 error= my_errno;
118 else
119 {
120 /* A value of 0 means below means "state flushed" */
121 share->changed= 0;
122 }
123 }
124 if (maria_flush)
125 {
126 if (_ma_sync_table_files(info))
127 error= my_errno;
128 }
129 else
130 share->not_flushed=1;
131 if (error)
132 _ma_set_fatal_error(share, error);
133 }
134 }
135 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
136 info->lock_type= F_UNLCK;
137 break;
138 case F_RDLCK:
139 if (info->lock_type == F_WRLCK)
140 {
141 /*
142 Change RW to READONLY
143
144 mysqld does not turn write locks to read locks,
145 so we're never here in mysqld.
146 */
147 share->w_locks--;
148 share->r_locks++;
149 info->lock_type=lock_type;
150 break;
151 }
152 #ifdef MARIA_EXTERNAL_LOCKING
153 if (!share->r_locks && !share->w_locks)
154 {
155 /* note that a transactional table should not do this */
156 if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
157 {
158 error=my_errno;
159 break;
160 }
161 }
162 #endif
163 _ma_test_if_changed(info);
164 share->r_locks++;
165 share->tot_locks++;
166 info->lock_type=lock_type;
167 break;
168 case F_WRLCK:
169 if (info->lock_type == F_RDLCK)
170 { /* Change READONLY to RW */
171 if (share->r_locks == 1)
172 {
173 share->r_locks--;
174 share->w_locks++;
175 info->lock_type=lock_type;
176 break;
177 }
178 }
179 #ifdef MARIA_EXTERNAL_LOCKING
180 if (!(share->options & HA_OPTION_READ_ONLY_DATA))
181 {
182 if (!share->w_locks)
183 {
184 if (!share->r_locks)
185 {
186 /*
187 Note that transactional tables should not do this.
188 If we enabled this code, we should make sure to skip it if
189 born_transactional is true. We should not test
190 now_transactional to decide if we can call
191 _ma_state_info_read_dsk(), because it can temporarily be 0
192 (TRUNCATE on a partitioned table) and thus it would make a state
193 modification below without mutex, confusing a concurrent
194 checkpoint running.
195 Even if this code was enabled only for non-transactional tables:
196 in scenario LOCK TABLE t1 WRITE; INSERT INTO t1; DELETE FROM t1;
197 state on disk read by DELETE is obsolete as it was not flushed
198 at the end of INSERT. MyISAM same. It however causes no issue as
199 maria_delete_all_rows() calls _ma_reset_status() thus is not
200 influenced by the obsolete read values.
201 */
202 if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
203 {
204 error=my_errno;
205 break;
206 }
207 }
208 }
209 }
210 #endif /* defined(MARIA_EXTERNAL_LOCKING) */
211 _ma_test_if_changed(info);
212
213 info->lock_type=lock_type;
214 info->invalidator=share->invalidator;
215 share->w_locks++;
216 share->tot_locks++;
217 break;
218 default:
219 DBUG_ASSERT(0);
220 break; /* Impossible */
221 }
222 }
223 #ifdef __WIN__
224 else
225 {
226 /*
227 Check for bad file descriptors if this table is part
228 of a merge union. Failing to capture this may cause
229 a crash on windows if the table is renamed and
230 later on referenced by the merge table.
231 */
232 if( info->owned_by_merge && (info->s)->kfile.file < 0 )
233 {
234 error = HA_ERR_NO_SUCH_TABLE;
235 }
236 }
237 #endif
238 if (!info->intern_lock_locked)
239 mysql_mutex_unlock(&share->intern_lock);
240 DBUG_RETURN(error);
241 } /* maria_lock_database */
242
243
244 /****************************************************************************
245 ** functions to read / write the state
246 ****************************************************************************/
247
_ma_readinfo(register MARIA_HA * info,int lock_type,int check_keybuffer)248 int _ma_readinfo(register MARIA_HA *info __attribute__ ((unused)),
249 int lock_type __attribute__ ((unused)),
250 int check_keybuffer __attribute__ ((unused)))
251 {
252 #ifdef MARIA_EXTERNAL_LOCKING
253 DBUG_ENTER("_ma_readinfo");
254
255 if (info->lock_type == F_UNLCK)
256 {
257 MARIA_SHARE *share= info->s;
258 if (!share->tot_locks)
259 {
260 /* should not be done for transactional tables */
261 if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
262 {
263 if (!my_errno)
264 my_errno= HA_ERR_FILE_TOO_SHORT;
265 DBUG_RETURN(1);
266 }
267 }
268 if (check_keybuffer)
269 VOID(_ma_test_if_changed(info));
270 info->invalidator=share->invalidator;
271 }
272 else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
273 {
274 my_errno=EACCES; /* Not allowed to change */
275 DBUG_RETURN(-1); /* when have read_lock() */
276 }
277 DBUG_RETURN(0);
278 #else
279 return 0;
280 #endif /* defined(MARIA_EXTERNAL_LOCKING) */
281 } /* _ma_readinfo */
282
283
284 /*
285 Every isam-function that uppdates the isam-database MUST end with this
286 request
287
288 NOTES
289 my_errno is not changed if this succeeds!
290 */
291
_ma_writeinfo(register MARIA_HA * info,uint operation)292 int _ma_writeinfo(register MARIA_HA *info, uint operation)
293 {
294 int error,olderror;
295 MARIA_SHARE *share= info->s;
296 DBUG_ENTER("_ma_writeinfo");
297 DBUG_PRINT("info",("operation: %u tot_locks: %u", operation,
298 share->tot_locks));
299
300 error=0;
301 if (share->tot_locks == 0 && !share->base.born_transactional)
302 {
303 /* transactional tables flush their state at Checkpoint */
304 if (operation)
305 { /* Two threads can't be here */
306 olderror= my_errno; /* Remember last error */
307
308 #ifdef MARIA_EXTERNAL_LOCKING
309 /*
310 The following only makes sense if we want to be allow two different
311 processes access the same table at the same time
312 */
313 share->state.process= share->last_process= share->this_process;
314 share->state.unique= info->last_unique= info->this_unique;
315 share->state.update_count= info->last_loop= ++info->this_loop;
316 #endif
317
318 if ((error=
319 _ma_state_info_write_sub(share->kfile.file,
320 &share->state,
321 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)))
322 olderror=my_errno;
323 #ifdef __WIN__
324 if (maria_flush)
325 {
326 _commit(share->kfile.file);
327 _commit(info->dfile.file);
328 }
329 #endif
330 my_errno=olderror;
331 }
332 }
333 else if (operation)
334 share->changed= 1; /* Mark keyfile changed */
335 DBUG_RETURN(error);
336 } /* _ma_writeinfo */
337
338
339 /*
340 Test if an external process has changed the database
341 (Should be called after readinfo)
342 */
343
_ma_test_if_changed(register MARIA_HA * info)344 int _ma_test_if_changed(register MARIA_HA *info)
345 {
346 #ifdef MARIA_EXTERNAL_LOCKING
347 MARIA_SHARE *share= info->s;
348 if (share->state.process != share->last_process ||
349 share->state.unique != info->last_unique ||
350 share->state.update_count != info->last_loop)
351 { /* Keyfile has changed */
352 DBUG_PRINT("info",("index file changed"));
353 if (share->state.process != share->this_process)
354 VOID(flush_pagecache_blocks(share->pagecache, &share->kfile,
355 FLUSH_RELEASE));
356 share->last_process=share->state.process;
357 info->last_unique= share->state.unique;
358 info->last_loop= share->state.update_count;
359 info->update|= HA_STATE_WRITTEN; /* Must use file on next */
360 info->data_changed= 1; /* For maria_is_changed */
361 return 1;
362 }
363 #endif
364 return (!(info->update & HA_STATE_AKTIV) ||
365 (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
366 HA_STATE_KEY_CHANGED)));
367 } /* _ma_test_if_changed */
368
369
370 /*
371 Put a mark in the .MAI file that someone is updating the table
372
373 DOCUMENTATION
374 state.open_count in the .MAI file is used the following way:
375 - For the first change of the .MYI file in this process open_count is
376 incremented by _ma_mark_file_changed(). (We have a write lock on the file
377 when this happens)
378 - In maria_close() it's decremented by _ma_decrement_open_count() if it
379 was incremented in the same process.
380
381 This mean that if we are the only process using the file, the open_count
382 tells us if the MARIA file wasn't properly closed. (This is true if
383 my_disable_locking is set).
384
385 open_count is not maintained on disk for temporary tables.
386 */
387
388 #define _MA_ALREADY_MARKED_FILE_CHANGED \
389 ((share->state.changed & STATE_CHANGED) && share->global_changed)
390
_ma_mark_file_changed(register MARIA_SHARE * share)391 int _ma_mark_file_changed(register MARIA_SHARE *share)
392 {
393 if (!share->base.born_transactional)
394 {
395 if (!_MA_ALREADY_MARKED_FILE_CHANGED)
396 return _ma_mark_file_changed_now(share);
397 }
398 else
399 {
400 /*
401 For transactional tables, the table is marked changed when the first page
402 is written. Here we just mark the state to be updated so that caller
403 can do 'analyze table' and find that is has changed before any pages
404 are written.
405 */
406 if (! test_all_bits(share->state.changed,
407 (STATE_CHANGED | STATE_NOT_ANALYZED |
408 STATE_NOT_OPTIMIZED_KEYS)))
409 {
410 mysql_mutex_lock(&share->intern_lock);
411 share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
412 STATE_NOT_OPTIMIZED_KEYS);
413 mysql_mutex_unlock(&share->intern_lock);
414 }
415 }
416 return 0;
417 }
418
_ma_mark_file_changed_now(register MARIA_SHARE * share)419 int _ma_mark_file_changed_now(register MARIA_SHARE *share)
420 {
421 uchar buff[3];
422 int error= 1;
423 DBUG_ENTER("_ma_mark_file_changed_now");
424
425 if (_MA_ALREADY_MARKED_FILE_CHANGED)
426 DBUG_RETURN(0);
427 mysql_mutex_lock(&share->intern_lock); /* recheck under mutex */
428 if (! _MA_ALREADY_MARKED_FILE_CHANGED)
429 {
430 share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
431 STATE_NOT_OPTIMIZED_KEYS);
432 if (!share->global_changed)
433 {
434 share->changed= share->global_changed= 1;
435 share->state.open_count++;
436 }
437 /*
438 Temp tables don't need an open_count as they are removed on crash.
439 In theory transactional tables are fixed by log-based recovery, so don't
440 need an open_count either, but if recovery has failed and logs have been
441 removed (by maria-force-start-after-recovery-failures), we still need to
442 detect dubious tables.
443 If we didn't maintain open_count on disk for a table, after a crash
444 we wouldn't know if it was closed at crash time (thus does not need a
445 check) or not. So we would have to check all tables: overkill.
446 */
447 if (!share->temporary)
448 {
449 mi_int2store(buff,share->state.open_count);
450 buff[2]=1; /* Mark that it's changed */
451 if (my_pwrite(share->kfile.file, buff, sizeof(buff),
452 sizeof(share->state.header) +
453 MARIA_FILE_OPEN_COUNT_OFFSET,
454 MYF(MY_NABP)))
455 goto err;
456 }
457 /* Set uuid of file if not yet set (zerofilled file) */
458 if (share->base.born_transactional &&
459 !(share->state.org_changed & STATE_NOT_MOVABLE))
460 {
461 /* Lock table to current installation */
462 if (_ma_set_uuid(share, 0) ||
463 (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS &&
464 _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
465 trnman_get_min_trid(),
466 TRUE, TRUE)))
467 goto err;
468 share->state.changed|= STATE_NOT_MOVABLE;
469 share->state.org_changed|= STATE_NOT_MOVABLE;
470 }
471 }
472 error= 0;
473 err:
474 mysql_mutex_unlock(&share->intern_lock);
475 DBUG_RETURN(error);
476 #undef _MA_ALREADY_MARKED_FILE_CHANGED
477 }
478
479 /*
480 Check that a region is all zero
481
482 SYNOPSIS
483 check_if_zero()
484 pos Start of memory to check
485 length length of memory region
486
487 NOTES
488 Used mainly to detect rows with wrong extent information
489 */
490
_ma_check_if_zero(uchar * pos,size_t length)491 my_bool _ma_check_if_zero(uchar *pos, size_t length)
492 {
493 uchar *end;
494 for (end= pos+ length; pos != end ; pos++)
495 if (pos[0] != 0)
496 return 1;
497 return 0;
498 }
499
500 /*
501 This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
502 call. In these context the following code should be safe!
503 */
504
_ma_decrement_open_count(MARIA_HA * info,my_bool lock_tables)505 int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_tables)
506 {
507 uchar buff[2];
508 register MARIA_SHARE *share= info->s;
509 int lock_error=0,write_error=0;
510 DBUG_ENTER("_ma_decrement_open_count");
511
512 if (share->global_changed)
513 {
514 uint old_lock=info->lock_type;
515 share->global_changed=0;
516 lock_error= (my_disable_locking || ! lock_tables ? 0 :
517 maria_lock_database(info, F_WRLCK));
518 /* Its not fatal even if we couldn't get the lock ! */
519 if (share->state.open_count > 0)
520 {
521 share->state.open_count--;
522 share->changed= 1; /* We have to update state */
523 /*
524 For temporary tables that will just be deleted, we don't have
525 to decrement state. For transactional tables the state will be
526 updated in maria_close().
527 */
528
529 if (!share->temporary && !share->now_transactional)
530 {
531 mi_int2store(buff,share->state.open_count);
532 write_error= (int) my_pwrite(share->kfile.file, buff, sizeof(buff),
533 sizeof(share->state.header) +
534 MARIA_FILE_OPEN_COUNT_OFFSET,
535 MYF(MY_NABP));
536 }
537 }
538 if (!lock_error && !my_disable_locking && lock_tables)
539 lock_error=maria_lock_database(info,old_lock);
540 }
541 DBUG_RETURN(MY_TEST(lock_error || write_error));
542 }
543
544
545 /** @brief mark file as crashed */
546
_ma_mark_file_crashed(MARIA_SHARE * share)547 void _ma_mark_file_crashed(MARIA_SHARE *share)
548 {
549 uchar buff[2];
550 DBUG_ENTER("_ma_mark_file_crashed");
551
552 share->state.changed|= STATE_CRASHED;
553 mi_int2store(buff, share->state.changed);
554 /*
555 We can ignore the errors, as if the mark failed, there isn't anything
556 else we can do; The user should already have got an error that the
557 table was crashed.
558 */
559 (void) my_pwrite(share->kfile.file, buff, sizeof(buff),
560 sizeof(share->state.header) +
561 MARIA_FILE_CHANGED_OFFSET,
562 MYF(MY_NABP));
563 DBUG_VOID_RETURN;
564 }
565
566 /*
567 Handle a fatal error
568
569 - Mark the table as crashed
570 - Print an error message, if we had not issued an error message before
571 that the table had been crashed.
572 - set my_errno to error
573 - If 'maria_assert_if_crashed_table is set, then assert.
574 */
575
_ma_set_fatal_error(MARIA_SHARE * share,int error)576 void _ma_set_fatal_error(MARIA_SHARE *share, int error)
577 {
578 DBUG_PRINT("error", ("error: %d", error));
579 maria_mark_crashed_share(share);
580 if (!(share->state.changed & STATE_CRASHED_PRINTED))
581 {
582 share->state.changed|= STATE_CRASHED_PRINTED;
583 maria_print_error(share, error);
584 }
585 my_errno= error;
586 DBUG_ASSERT(!maria_assert_if_crashed_table);
587 }
588
589
590 /**
591 @brief Set uuid of for a Maria file
592
593 @fn _ma_set_uuid()
594 @param share Maria share
595 @param reset_uuid Instead of setting file to maria_uuid, set it to
596 0 to mark it as movable
597 */
598
_ma_set_uuid(MARIA_SHARE * share,my_bool reset_uuid)599 my_bool _ma_set_uuid(MARIA_SHARE *share, my_bool reset_uuid)
600 {
601 uchar buff[MY_UUID_SIZE], *uuid;
602
603 uuid= maria_uuid;
604 if (reset_uuid)
605 {
606 bzero(buff, sizeof(buff));
607 uuid= buff;
608 }
609 return (my_bool) my_pwrite(share->kfile.file, uuid, MY_UUID_SIZE,
610 mi_uint2korr(share->state.header.base_pos),
611 MYF(MY_NABP));
612 }
613