1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15 
16 /*
17   Locking of Maria-tables.
18   Must be first request before doing any furter calls to any Maria function.
19   Is used to allow many process use the same non transactional Maria table
20 */
21 
22 #include "ma_ftdefs.h"
23 
24 	/* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
25 
maria_lock_database(MARIA_HA * info,int lock_type)26 int maria_lock_database(MARIA_HA *info, int lock_type)
27 {
28   int error;
29   uint count;
30   MARIA_SHARE *share= info->s;
31   DBUG_ENTER("maria_lock_database");
32   DBUG_PRINT("enter",("lock_type: %d  old lock %d  r_locks: %u  w_locks: %u "
33                       "global_changed:  %d  open_count: %u  name: '%s'",
34                       lock_type, info->lock_type, share->r_locks,
35                       share->w_locks,
36                       share->global_changed, share->state.open_count,
37                       share->index_file_name.str));
38   if (share->options & HA_OPTION_READ_ONLY_DATA ||
39       info->lock_type == lock_type)
40     DBUG_RETURN(0);
41   if (lock_type == F_EXTRA_LCK)                 /* Used by TMP tables */
42   {
43     ++share->w_locks;
44     ++share->tot_locks;
45     info->lock_type= lock_type;
46     DBUG_RETURN(0);
47   }
48 
49   error=0;
50   if (!info->intern_lock_locked)
51     mysql_mutex_lock(&share->intern_lock);
52   if (share->kfile.file >= 0)		/* May only be false on windows */
53   {
54     switch (lock_type) {
55     case F_UNLCK:
56       maria_ftparser_call_deinitializer(info);
57       if (info->lock_type == F_RDLCK)
58       {
59 	count= --share->r_locks;
60         if (share->lock_restore_status)
61           (*share->lock_restore_status)(info);
62       }
63       else
64       {
65 	count= --share->w_locks;
66         if (share->lock.update_status)
67           _ma_update_status_with_lock(info);
68       }
69       --share->tot_locks;
70       if (info->lock_type == F_WRLCK && !share->w_locks)
71       {
72         /* pages of transactional tables get flushed at Checkpoint */
73         if (!share->base.born_transactional && !share->temporary &&
74             _ma_flush_table_files(info,
75                                   share->delay_key_write ? MARIA_FLUSH_DATA :
76                                   MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
77                                   FLUSH_KEEP, FLUSH_KEEP))
78           error= my_errno;
79       }
80       if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
81       {
82 	if (end_io_cache(&info->rec_cache))
83 	{
84 	  error= my_errno;
85           _ma_set_fatal_error(share, error);
86 	}
87       }
88       if (!count)
89       {
90 	DBUG_PRINT("info",("changed: %u  w_locks: %u",
91 			   (uint) share->changed, share->w_locks));
92 	if (share->changed && !share->w_locks)
93 	{
94 #ifdef HAVE_MMAP
95           if ((share->mmaped_length !=
96                share->state.state.data_file_length) &&
97               (share->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
98           {
99             if (share->lock_key_trees)
100               mysql_rwlock_wrlock(&share->mmap_lock);
101             _ma_remap_file(info, share->state.state.data_file_length);
102             share->nonmmaped_inserts= 0;
103             if (share->lock_key_trees)
104               mysql_rwlock_unlock(&share->mmap_lock);
105           }
106 #endif
107 #ifdef MARIA_EXTERNAL_LOCKING
108 	  share->state.process= share->last_process=share->this_process;
109 	  share->state.unique=   info->last_unique=  info->this_unique;
110 	  share->state.update_count= info->last_loop= ++info->this_loop;
111 #endif
112           /* transactional tables rather flush their state at Checkpoint */
113           if (!share->base.born_transactional)
114           {
115             if (_ma_state_info_write_sub(share->kfile.file, &share->state,
116                                          MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET))
117               error= my_errno;
118             else
119             {
120               /* A value of 0 means below means "state flushed" */
121               share->changed= 0;
122             }
123           }
124 	  if (maria_flush)
125 	  {
126             if (_ma_sync_table_files(info))
127 	      error= my_errno;
128 	  }
129 	  else
130 	    share->not_flushed=1;
131 	  if (error)
132             _ma_set_fatal_error(share, error);
133 	}
134       }
135       info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
136       info->lock_type= F_UNLCK;
137       break;
138     case F_RDLCK:
139       if (info->lock_type == F_WRLCK)
140       {
141         /*
142           Change RW to READONLY
143 
144           mysqld does not turn write locks to read locks,
145           so we're never here in mysqld.
146         */
147 	share->w_locks--;
148 	share->r_locks++;
149 	info->lock_type=lock_type;
150 	break;
151       }
152 #ifdef MARIA_EXTERNAL_LOCKING
153       if (!share->r_locks && !share->w_locks)
154       {
155         /* note that a transactional table should not do this */
156 	if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
157 	{
158 	  error=my_errno;
159 	  break;
160 	}
161       }
162 #endif
163       _ma_test_if_changed(info);
164       share->r_locks++;
165       share->tot_locks++;
166       info->lock_type=lock_type;
167       break;
168     case F_WRLCK:
169       if (info->lock_type == F_RDLCK)
170       {						/* Change READONLY to RW */
171 	if (share->r_locks == 1)
172 	{
173 	  share->r_locks--;
174 	  share->w_locks++;
175 	  info->lock_type=lock_type;
176 	  break;
177 	}
178       }
179 #ifdef MARIA_EXTERNAL_LOCKING
180       if (!(share->options & HA_OPTION_READ_ONLY_DATA))
181       {
182 	if (!share->w_locks)
183 	{
184 	  if (!share->r_locks)
185 	  {
186             /*
187               Note that transactional tables should not do this.
188               If we enabled this code, we should make sure to skip it if
189               born_transactional is true. We should not test
190               now_transactional to decide if we can call
191               _ma_state_info_read_dsk(), because it can temporarily be 0
192               (TRUNCATE on a partitioned table) and thus it would make a state
193               modification below without mutex, confusing a concurrent
194               checkpoint running.
195               Even if this code was enabled only for non-transactional tables:
196               in scenario LOCK TABLE t1 WRITE; INSERT INTO t1; DELETE FROM t1;
197               state on disk read by DELETE is obsolete as it was not flushed
198               at the end of INSERT. MyISAM same. It however causes no issue as
199               maria_delete_all_rows() calls _ma_reset_status() thus is not
200               influenced by the obsolete read values.
201             */
202 	    if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
203 	    {
204 	      error=my_errno;
205 	      break;
206 	    }
207 	  }
208 	}
209       }
210 #endif /* defined(MARIA_EXTERNAL_LOCKING) */
211       _ma_test_if_changed(info);
212 
213       info->lock_type=lock_type;
214       info->invalidator=share->invalidator;
215       share->w_locks++;
216       share->tot_locks++;
217       break;
218     default:
219       DBUG_ASSERT(0);
220       break;				/* Impossible */
221     }
222   }
223 #ifdef __WIN__
224   else
225   {
226     /*
227        Check for bad file descriptors if this table is part
228        of a merge union. Failing to capture this may cause
229        a crash on windows if the table is renamed and
230        later on referenced by the merge table.
231      */
232     if( info->owned_by_merge && (info->s)->kfile.file < 0 )
233     {
234       error = HA_ERR_NO_SUCH_TABLE;
235     }
236   }
237 #endif
238   if (!info->intern_lock_locked)
239     mysql_mutex_unlock(&share->intern_lock);
240   DBUG_RETURN(error);
241 } /* maria_lock_database */
242 
243 
244 /****************************************************************************
245  ** functions to read / write the state
246 ****************************************************************************/
247 
_ma_readinfo(register MARIA_HA * info,int lock_type,int check_keybuffer)248 int _ma_readinfo(register MARIA_HA *info __attribute__ ((unused)),
249                  int lock_type __attribute__ ((unused)),
250                  int check_keybuffer __attribute__ ((unused)))
251 {
252 #ifdef MARIA_EXTERNAL_LOCKING
253   DBUG_ENTER("_ma_readinfo");
254 
255   if (info->lock_type == F_UNLCK)
256   {
257     MARIA_SHARE *share= info->s;
258     if (!share->tot_locks)
259     {
260       /* should not be done for transactional tables */
261       if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
262       {
263         if (!my_errno)
264           my_errno= HA_ERR_FILE_TOO_SHORT;
265 	DBUG_RETURN(1);
266       }
267     }
268     if (check_keybuffer)
269       VOID(_ma_test_if_changed(info));
270     info->invalidator=share->invalidator;
271   }
272   else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
273   {
274     my_errno=EACCES;				/* Not allowed to change */
275     DBUG_RETURN(-1);				/* when have read_lock() */
276   }
277   DBUG_RETURN(0);
278 #else
279   return 0;
280 #endif /* defined(MARIA_EXTERNAL_LOCKING) */
281 } /* _ma_readinfo */
282 
283 
284 /*
285   Every isam-function that uppdates the isam-database MUST end with this
286   request
287 
288   NOTES
289     my_errno is not changed if this succeeds!
290 */
291 
_ma_writeinfo(register MARIA_HA * info,uint operation)292 int _ma_writeinfo(register MARIA_HA *info, uint operation)
293 {
294   int error,olderror;
295   MARIA_SHARE *share= info->s;
296   DBUG_ENTER("_ma_writeinfo");
297   DBUG_PRINT("info",("operation: %u  tot_locks: %u", operation,
298 		     share->tot_locks));
299 
300   error=0;
301   if (share->tot_locks == 0 && !share->base.born_transactional)
302   {
303     /* transactional tables flush their state at Checkpoint */
304     if (operation)
305     {					/* Two threads can't be here */
306       olderror= my_errno;               /* Remember last error */
307 
308 #ifdef MARIA_EXTERNAL_LOCKING
309       /*
310         The following only makes sense if we want to be allow two different
311         processes access the same table at the same time
312       */
313       share->state.process= share->last_process=   share->this_process;
314       share->state.unique=  info->last_unique=	   info->this_unique;
315       share->state.update_count= info->last_loop= ++info->this_loop;
316 #endif
317 
318       if ((error=
319            _ma_state_info_write_sub(share->kfile.file,
320                                     &share->state,
321                                     MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)))
322 	olderror=my_errno;
323 #ifdef __WIN__
324       if (maria_flush)
325       {
326 	_commit(share->kfile.file);
327 	_commit(info->dfile.file);
328       }
329 #endif
330       my_errno=olderror;
331     }
332   }
333   else if (operation)
334     share->changed= 1;			/* Mark keyfile changed */
335   DBUG_RETURN(error);
336 } /* _ma_writeinfo */
337 
338 
339 /*
340   Test if an external process has changed the database
341   (Should be called after readinfo)
342 */
343 
_ma_test_if_changed(register MARIA_HA * info)344 int _ma_test_if_changed(register MARIA_HA *info)
345 {
346 #ifdef MARIA_EXTERNAL_LOCKING
347   MARIA_SHARE *share= info->s;
348   if (share->state.process != share->last_process ||
349       share->state.unique  != info->last_unique ||
350       share->state.update_count != info->last_loop)
351   {						/* Keyfile has changed */
352     DBUG_PRINT("info",("index file changed"));
353     if (share->state.process != share->this_process)
354       VOID(flush_pagecache_blocks(share->pagecache, &share->kfile,
355                                   FLUSH_RELEASE));
356     share->last_process=share->state.process;
357     info->last_unique=	share->state.unique;
358     info->last_loop=	share->state.update_count;
359     info->update|=	HA_STATE_WRITTEN;	/* Must use file on next */
360     info->data_changed= 1;			/* For maria_is_changed */
361     return 1;
362   }
363 #endif
364   return (!(info->update & HA_STATE_AKTIV) ||
365 	  (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
366 			   HA_STATE_KEY_CHANGED)));
367 } /* _ma_test_if_changed */
368 
369 
370 /*
371   Put a mark in the .MAI file that someone is updating the table
372 
373   DOCUMENTATION
374   state.open_count in the .MAI file is used the following way:
375   - For the first change of the .MYI file in this process open_count is
376     incremented by _ma_mark_file_changed(). (We have a write lock on the file
377     when this happens)
378   - In maria_close() it's decremented by _ma_decrement_open_count() if it
379     was incremented in the same process.
380 
381   This mean that if we are the only process using the file, the open_count
382   tells us if the MARIA file wasn't properly closed. (This is true if
383   my_disable_locking is set).
384 
385   open_count is not maintained on disk for temporary tables.
386 */
387 
388 #define _MA_ALREADY_MARKED_FILE_CHANGED                                 \
389   ((share->state.changed & STATE_CHANGED) && share->global_changed)
390 
_ma_mark_file_changed(register MARIA_SHARE * share)391 int _ma_mark_file_changed(register MARIA_SHARE *share)
392 {
393   if (!share->base.born_transactional)
394   {
395     if (!_MA_ALREADY_MARKED_FILE_CHANGED)
396       return _ma_mark_file_changed_now(share);
397   }
398   else
399   {
400     /*
401       For transactional tables, the table is marked changed when the first page
402       is written. Here we just mark the state to be updated so that caller
403       can do 'analyze table' and find that is has changed before any pages
404       are written.
405     */
406     if (! test_all_bits(share->state.changed,
407                         (STATE_CHANGED | STATE_NOT_ANALYZED |
408                          STATE_NOT_OPTIMIZED_KEYS)))
409     {
410       mysql_mutex_lock(&share->intern_lock);
411       share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
412                              STATE_NOT_OPTIMIZED_KEYS);
413       mysql_mutex_unlock(&share->intern_lock);
414     }
415   }
416   return 0;
417 }
418 
_ma_mark_file_changed_now(register MARIA_SHARE * share)419 int _ma_mark_file_changed_now(register MARIA_SHARE *share)
420 {
421   uchar buff[3];
422   int error= 1;
423   DBUG_ENTER("_ma_mark_file_changed_now");
424 
425   if (_MA_ALREADY_MARKED_FILE_CHANGED)
426     DBUG_RETURN(0);
427   mysql_mutex_lock(&share->intern_lock); /* recheck under mutex */
428   if (! _MA_ALREADY_MARKED_FILE_CHANGED)
429   {
430     share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
431 			   STATE_NOT_OPTIMIZED_KEYS);
432     if (!share->global_changed)
433     {
434       share->changed= share->global_changed= 1;
435       share->state.open_count++;
436     }
437     /*
438       Temp tables don't need an open_count as they are removed on crash.
439       In theory transactional tables are fixed by log-based recovery, so don't
440       need an open_count either, but if recovery has failed and logs have been
441       removed (by maria-force-start-after-recovery-failures), we still need to
442       detect dubious tables.
443       If we didn't maintain open_count on disk for a table, after a crash
444       we wouldn't know if it was closed at crash time (thus does not need a
445       check) or not. So we would have to check all tables: overkill.
446     */
447     if (!share->temporary)
448     {
449       mi_int2store(buff,share->state.open_count);
450       buff[2]=1;				/* Mark that it's changed */
451       if (my_pwrite(share->kfile.file, buff, sizeof(buff),
452                     sizeof(share->state.header) +
453                     MARIA_FILE_OPEN_COUNT_OFFSET,
454                     MYF(MY_NABP)))
455         goto err;
456     }
457     /* Set uuid of file if not yet set (zerofilled file) */
458     if (share->base.born_transactional &&
459         !(share->state.org_changed & STATE_NOT_MOVABLE))
460     {
461       /* Lock table to current installation */
462       if (_ma_set_uuid(share, 0) ||
463           (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS &&
464            _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
465                                      trnman_get_min_trid(),
466                                      TRUE, TRUE)))
467         goto err;
468       share->state.changed|= STATE_NOT_MOVABLE;
469       share->state.org_changed|= STATE_NOT_MOVABLE;
470     }
471   }
472   error= 0;
473 err:
474   mysql_mutex_unlock(&share->intern_lock);
475   DBUG_RETURN(error);
476 #undef _MA_ALREADY_MARKED_FILE_CHANGED
477 }
478 
479 /*
480   Check that a region is all zero
481 
482   SYNOPSIS
483     check_if_zero()
484     pos		Start of memory to check
485     length	length of memory region
486 
487   NOTES
488     Used mainly to detect rows with wrong extent information
489 */
490 
_ma_check_if_zero(uchar * pos,size_t length)491 my_bool _ma_check_if_zero(uchar *pos, size_t length)
492 {
493   uchar *end;
494   for (end= pos+ length; pos != end ; pos++)
495     if (pos[0] != 0)
496       return 1;
497   return 0;
498 }
499 
500 /*
501   This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
502   call.  In these context the following code should be safe!
503  */
504 
_ma_decrement_open_count(MARIA_HA * info,my_bool lock_tables)505 int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_tables)
506 {
507   uchar buff[2];
508   register MARIA_SHARE *share= info->s;
509   int lock_error=0,write_error=0;
510   DBUG_ENTER("_ma_decrement_open_count");
511 
512   if (share->global_changed)
513   {
514     uint old_lock=info->lock_type;
515     share->global_changed=0;
516     lock_error= (my_disable_locking || ! lock_tables ? 0 :
517                  maria_lock_database(info, F_WRLCK));
518     /* Its not fatal even if we couldn't get the lock ! */
519     if (share->state.open_count > 0)
520     {
521       share->state.open_count--;
522       share->changed= 1;                        /* We have to update state */
523       /*
524         For temporary tables that will just be deleted, we don't have
525         to decrement state. For transactional tables the state will be
526         updated in maria_close().
527       */
528 
529       if (!share->temporary && !share->now_transactional)
530       {
531         mi_int2store(buff,share->state.open_count);
532         write_error= (int) my_pwrite(share->kfile.file, buff, sizeof(buff),
533                                      sizeof(share->state.header) +
534                                      MARIA_FILE_OPEN_COUNT_OFFSET,
535                                      MYF(MY_NABP));
536       }
537     }
538     if (!lock_error && !my_disable_locking && lock_tables)
539       lock_error=maria_lock_database(info,old_lock);
540   }
541   DBUG_RETURN(MY_TEST(lock_error || write_error));
542 }
543 
544 
545 /** @brief mark file as crashed */
546 
_ma_mark_file_crashed(MARIA_SHARE * share)547 void _ma_mark_file_crashed(MARIA_SHARE *share)
548 {
549   uchar buff[2];
550   DBUG_ENTER("_ma_mark_file_crashed");
551 
552   share->state.changed|= STATE_CRASHED;
553   mi_int2store(buff, share->state.changed);
554   /*
555     We can ignore the errors, as if the mark failed, there isn't anything
556     else we can do;  The user should already have got an error that the
557     table was crashed.
558   */
559   (void) my_pwrite(share->kfile.file, buff, sizeof(buff),
560                    sizeof(share->state.header) +
561                    MARIA_FILE_CHANGED_OFFSET,
562                    MYF(MY_NABP));
563   DBUG_VOID_RETURN;
564 }
565 
566 /*
567   Handle a fatal error
568 
569   - Mark the table as crashed
570   - Print an error message, if we had not issued an error message before
571     that the table had been crashed.
572   - set my_errno to error
573   - If 'maria_assert_if_crashed_table is set, then assert.
574 */
575 
_ma_set_fatal_error(MARIA_SHARE * share,int error)576 void _ma_set_fatal_error(MARIA_SHARE *share, int error)
577 {
578   DBUG_PRINT("error", ("error: %d", error));
579   maria_mark_crashed_share(share);
580   if (!(share->state.changed & STATE_CRASHED_PRINTED))
581   {
582     share->state.changed|= STATE_CRASHED_PRINTED;
583     maria_print_error(share, error);
584   }
585   my_errno= error;
586   DBUG_ASSERT(!maria_assert_if_crashed_table);
587 }
588 
589 
590 /**
591    @brief Set uuid of for a Maria file
592 
593    @fn _ma_set_uuid()
594    @param share		Maria share
595    @param reset_uuid    Instead of setting file to maria_uuid, set it to
596 			0 to mark it as movable
597 */
598 
_ma_set_uuid(MARIA_SHARE * share,my_bool reset_uuid)599 my_bool _ma_set_uuid(MARIA_SHARE *share, my_bool reset_uuid)
600 {
601   uchar buff[MY_UUID_SIZE], *uuid;
602 
603   uuid= maria_uuid;
604   if (reset_uuid)
605   {
606     bzero(buff, sizeof(buff));
607     uuid= buff;
608   }
609   return (my_bool) my_pwrite(share->kfile.file, uuid, MY_UUID_SIZE,
610                              mi_uint2korr(share->state.header.base_pos),
611                              MYF(MY_NABP));
612 }
613