1 /* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15 
16 
17 #include <my_global.h>
18 #include <my_sys.h>
19 #include <m_string.h>
20 #include "trnman.h"
21 #include "ma_checkpoint.h"
22 #include "ma_control_file.h"
23 
24 /*
25   status variables:
26   how many trns in the active list currently,
27   in the committed list currently, allocated since startup.
28 */
29 uint trnman_active_transactions, trnman_committed_transactions,
30   trnman_allocated_transactions;
31 
32 #ifdef WORKAROUND_GCC_4_3_2_BUG
33 volatile
34 #endif
35 /* list of active transactions in the trid order */
36 static TRN active_list_min, active_list_max;
37 /* list of committed transactions in the trid order */
38 static TRN committed_list_min, committed_list_max;
39 
40 /* a counter, used to generate transaction ids */
41 static TrID global_trid_generator;
42 
43 /*
44   The minimum existing transaction id for trnman_get_min_trid()
45   The default value is used when transaction manager not initialize;
46   Probably called from maria_chk
47 */
48 static TrID trid_min_read_from= MAX_TRID;
49 
50 /* the mutex for everything above */
51 static mysql_mutex_t LOCK_trn_list;
52 
53 /* LIFO pool of unused TRN structured for reuse */
54 static TRN *pool;
55 
56 /* a hash for committed transactions that maps trid to a TRN structure */
57 static LF_HASH trid_to_trn;
58 
59 /* an array that maps short_id of an active transaction to a TRN structure */
60 static TRN **short_trid_to_active_trn;
61 
62 /* locks for short_trid_to_active_trn and pool */
63 static my_bool default_trnman_end_trans_hook(TRN *, my_bool, my_bool);
64 static void trnman_free_trn(TRN *);
65 
66 my_bool (*trnman_end_trans_hook)(TRN *, my_bool, my_bool)=
67   default_trnman_end_trans_hook;
68 
69 /*
70   Simple interface functions
71   QQ: if they stay so simple, should we make them inline?
72 */
73 
trnman_increment_locked_tables(TRN * trn)74 uint trnman_increment_locked_tables(TRN *trn)
75 {
76   return trn->locked_tables++;
77 }
78 
trnman_has_locked_tables(TRN * trn)79 uint trnman_has_locked_tables(TRN *trn)
80 {
81   return trn->locked_tables;
82 }
83 
trnman_decrement_locked_tables(TRN * trn)84 uint trnman_decrement_locked_tables(TRN *trn)
85 {
86   return --trn->locked_tables;
87 }
88 
trnman_reset_locked_tables(TRN * trn,uint locked_tables)89 void trnman_reset_locked_tables(TRN *trn, uint locked_tables)
90 {
91   trn->locked_tables= locked_tables;
92 }
93 
94 #ifdef EXTRA_DEBUG
trnman_get_flags(TRN * trn)95 uint16 trnman_get_flags(TRN *trn)
96 {
97   return trn->flags;
98 }
99 
trnman_set_flags(TRN * trn,uint16 flags)100 void trnman_set_flags(TRN *trn, uint16 flags)
101 {
102   trn->flags= flags;
103 }
104 #endif
105 
106 /** Wake up threads waiting for this transaction */
wt_thd_release_self(TRN * trn)107 static void wt_thd_release_self(TRN *trn)
108 {
109   if (trn->wt)
110   {
111     WT_RESOURCE_ID rc;
112     rc.type= &ma_rc_dup_unique;
113     rc.value= (intptr)trn;
114     wt_thd_release(trn->wt, & rc);
115     trn->wt= 0;
116   }
117 }
118 
119 static my_bool
default_trnman_end_trans_hook(TRN * trn,my_bool commit,my_bool active_transactions)120 default_trnman_end_trans_hook(TRN *trn __attribute__ ((unused)),
121                               my_bool commit __attribute__ ((unused)),
122                               my_bool active_transactions
123                               __attribute__ ((unused)))
124 {
125   return 0;
126 }
127 
128 
trn_get_hash_key(const uchar * trn,size_t * len,my_bool unused)129 static uchar *trn_get_hash_key(const uchar *trn, size_t *len,
130                               my_bool unused __attribute__ ((unused)))
131 {
132   *len= sizeof(TrID);
133   return (uchar *) & ((*((TRN **)trn))->trid);
134 }
135 
136 
137 /**
138    @brief Initializes transaction manager.
139 
140    @param  initial_trid        Generated TrIDs will start from initial_trid+1.
141 
142    @return Operation status
143      @retval 0      OK
144      @retval !=0    Error
145 */
146 
trnman_init(TrID initial_trid)147 int trnman_init(TrID initial_trid)
148 {
149   DBUG_ENTER("trnman_init");
150   DBUG_PRINT("enter", ("initial_trid: %lu", (ulong) initial_trid));
151 
152   short_trid_to_active_trn= (TRN **)my_malloc(PSI_INSTRUMENT_ME, SHORT_TRID_MAX*sizeof(TRN*),
153                                      MYF(MY_WME|MY_ZEROFILL));
154   if (unlikely(!short_trid_to_active_trn))
155     DBUG_RETURN(1);
156   short_trid_to_active_trn--; /* min short_id is 1 */
157 
158   /*
159     Initialize lists.
160     active_list_max.min_read_from must be larger than any trid,
161     so that when an active list is empty we would could free
162     all committed list.
163     And  committed_list_max itself can not be freed so
164     committed_list_max.commit_trid must not be smaller that
165     active_list_max.min_read_from
166   */
167 
168   active_list_max.trid= active_list_min.trid= 0;
169   active_list_max.min_read_from= MAX_TRID;
170   active_list_max.next= active_list_min.prev= 0;
171   active_list_max.prev= &active_list_min;
172   active_list_min.next= &active_list_max;
173 
174   committed_list_max.commit_trid= MAX_TRID;
175   committed_list_max.next= committed_list_min.prev= 0;
176   committed_list_max.prev= &committed_list_min;
177   committed_list_min.next= &committed_list_max;
178 
179   trnman_active_transactions= 0;
180   trnman_committed_transactions= 0;
181   trnman_allocated_transactions= 0;
182   /* This is needed for recovery and repair */
183   dummy_transaction_object.min_read_from= ~(TrID) 0;
184   dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
185 
186   pool= 0;
187   global_trid_generator= initial_trid;
188   trid_min_read_from= initial_trid;
189   lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE,
190                0, 0, trn_get_hash_key, 0);
191   DBUG_PRINT("info", ("mysql_mutex_init LOCK_trn_list"));
192   mysql_mutex_init(key_LOCK_trn_list, &LOCK_trn_list, MY_MUTEX_INIT_FAST);
193 
194   DBUG_RETURN(0);
195 }
196 
197 /*
198   NOTE
199     this could only be called in the "idle" state - no transaction can be
200     running. See asserts below.
201 */
trnman_destroy()202 void trnman_destroy()
203 {
204   DBUG_ENTER("trnman_destroy");
205 
206   if (short_trid_to_active_trn == NULL) /* trnman already destroyed */
207     DBUG_VOID_RETURN;
208   DBUG_ASSERT(trid_to_trn.count == 0);
209   DBUG_ASSERT(trnman_active_transactions == 0);
210   DBUG_ASSERT(trnman_committed_transactions == 0);
211   DBUG_ASSERT(active_list_max.prev == &active_list_min);
212   DBUG_ASSERT(active_list_min.next == &active_list_max);
213   DBUG_ASSERT(committed_list_max.prev == &committed_list_min);
214   DBUG_ASSERT(committed_list_min.next == &committed_list_max);
215   while (pool)
216   {
217     TRN *trn= pool;
218     pool= pool->next;
219     DBUG_ASSERT(trn->wt == NULL);
220     mysql_mutex_destroy(&trn->state_lock);
221     my_free(trn);
222   }
223   lf_hash_destroy(&trid_to_trn);
224   DBUG_PRINT("info", ("mysql_mutex_destroy LOCK_trn_list"));
225   mysql_mutex_destroy(&LOCK_trn_list);
226   my_free(short_trid_to_active_trn+1);
227   short_trid_to_active_trn= NULL;
228 
229   DBUG_VOID_RETURN;
230 }
231 
232 /*
233   NOTE
234     TrID is limited to 6 bytes. Initial value of the generator
235     is set by the recovery code - being read from the last checkpoint
236     (or 1 on a first run).
237 */
new_trid()238 static TrID new_trid()
239 {
240   DBUG_ENTER("new_trid");
241   DBUG_ASSERT(global_trid_generator < MAX_INTERNAL_TRID);
242   DBUG_PRINT("info", ("mysql_mutex_assert_owner LOCK_trn_list"));
243   mysql_mutex_assert_owner(&LOCK_trn_list);
244   DBUG_RETURN(++global_trid_generator);
245 }
246 
get_short_trid(TRN * trn)247 static uint get_short_trid(TRN *trn)
248 {
249   int i= (int) ((global_trid_generator + (intptr)trn) * 312089 %
250                 SHORT_TRID_MAX) + 1;
251   uint res=0;
252 
253   for ( ; !res ; i= 1)
254   {
255     for ( ; i <= SHORT_TRID_MAX; i++) /* the range is [1..SHORT_TRID_MAX] */
256     {
257       void *tmp= NULL;
258       if (short_trid_to_active_trn[i] == NULL &&
259           my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn))
260       {
261         res= i;
262         break;
263       }
264     }
265   }
266   return res;
267 }
268 
269 /**
270   Allocates and initialzies a new TRN object
271 
272   @note the 'wt' parameter can only be 0 in a single-threaded code (or,
273   generally, where threads cannot block each other), otherwise the
274   first call to the deadlock detector will sigsegv.
275 */
276 
trnman_new_trn(WT_THD * wt)277 TRN *trnman_new_trn(WT_THD *wt)
278 {
279   int res;
280   TRN *trn;
281   union { TRN *trn; void *v; } tmp;
282   DBUG_ENTER("trnman_new_trn");
283 
284   /*
285     we have a mutex, to do simple things under it - allocate a TRN,
286     increment trnman_active_transactions, set trn->min_read_from.
287 
288     Note that all the above is fast. generating short_id may be slow,
289     as it involves scanning a large array - so it's done outside of the
290     mutex.
291   */
292 
293   DBUG_PRINT("info", ("mysql_mutex_lock LOCK_trn_list"));
294   mysql_mutex_lock(&LOCK_trn_list);
295 
296   /* Allocating a new TRN structure */
297   tmp.trn= pool;
298   /*
299     Popping an unused TRN from the pool
300     (ABA isn't possible, we're behind a mutex
301   */
302   while (tmp.trn && !my_atomic_casptr((void **)(char*) &pool, &tmp.v,
303                                       (void *)tmp.trn->next))
304     /* no-op */;
305 
306   /* Nothing in the pool ? Allocate a new one */
307   if (!(trn= tmp.trn))
308   {
309     /*
310       trn should be completely initialized at create time to allow
311       one to keep a known state on it.
312       (Like redo_lns, which is assumed to be 0 at start of row handling
313       and reset to zero before end of row handling)
314     */
315     trn= (TRN *)my_malloc(PSI_INSTRUMENT_ME, sizeof(TRN), MYF(MY_WME | MY_ZEROFILL));
316     if (unlikely(!trn))
317     {
318       DBUG_PRINT("info", ("mysql_mutex_unlock LOCK_trn_list"));
319       mysql_mutex_unlock(&LOCK_trn_list);
320       return 0;
321     }
322     trnman_allocated_transactions++;
323     mysql_mutex_init(key_TRN_state_lock, &trn->state_lock, MY_MUTEX_INIT_FAST);
324   }
325   trn->wt= wt;
326   trn->pins= lf_hash_get_pins(&trid_to_trn);
327   if (!trn->pins)
328   {
329     trnman_free_trn(trn);
330     mysql_mutex_unlock(&LOCK_trn_list);
331     return 0;
332   }
333 
334   trnman_active_transactions++;
335 
336   trn->min_read_from= active_list_min.next->trid;
337 
338   trn->trid= new_trid();
339 
340   trn->next= &active_list_max;
341   trn->prev= active_list_max.prev;
342   active_list_max.prev= trn->prev->next= trn;
343   trid_min_read_from= active_list_min.next->min_read_from;
344   DBUG_PRINT("info", ("mysql_mutex_unlock LOCK_trn_list"));
345   mysql_mutex_unlock(&LOCK_trn_list);
346 
347   if (unlikely(!trn->min_read_from))
348   {
349     /*
350       We are the only transaction. Set min_read_from so that we can read
351       our own rows
352     */
353     trn->min_read_from= trn->trid + 1;
354   }
355 
356   /* no other transaction can read changes done by this one */
357   trn->commit_trid=  MAX_TRID;
358   trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0;
359   trn->used_tables= 0;
360   trn->used_instances= 0;
361 
362   trn->locked_tables= 0;
363   trn->flags= 0;
364 
365   /*
366     only after the following function TRN is considered initialized,
367     so it must be done the last
368   */
369   mysql_mutex_lock(&trn->state_lock);
370   trn->short_id= get_short_trid(trn);
371   mysql_mutex_unlock(&trn->state_lock);
372 
373   res= lf_hash_insert(&trid_to_trn, trn->pins, &trn);
374   DBUG_ASSERT(res <= 0);
375   if (res)
376   {
377     trnman_end_trn(trn, 0);
378     return 0;
379   }
380 
381   DBUG_PRINT("exit", ("trn: %p  trid: 0x%lu  min_read_from: 0x%lu",
382                       trn, (ulong) trn->trid, (ulong) trn->min_read_from));
383 
384   DBUG_RETURN(trn);
385 }
386 
387 /*
388   remove a trn from the active list.
389   if necessary - move to committed list and set commit_trid
390 
391   NOTE
392     Locks are released at the end. In particular, after placing the
393     transaction in commit list, and after setting commit_trid. It's
394     important, as commit_trid affects visibility.  Locks don't affect
395     anything they simply delay execution of other threads - they could be
396     released arbitrarily late. In other words, when locks are released it
397     serves as a start banner for other threads, they start to run. So
398     everything they may need must be ready at that point.
399 
400   RETURN
401     0  ok
402     1  error
403 */
trnman_end_trn(TRN * trn,my_bool commit)404 my_bool trnman_end_trn(TRN *trn, my_bool commit)
405 {
406   int res= 1;
407   uint16 cached_short_id= trn->short_id; /* we have to cache it, see below */
408   TRN *free_me= 0;
409   LF_PINS *pins= trn->pins;
410   DBUG_ENTER("trnman_end_trn");
411   DBUG_PRINT("enter", ("trn: %p  commit: %d", trn, commit));
412 
413   /* if a rollback, all UNDO records should have been executed */
414   DBUG_ASSERT(commit || trn->undo_lsn == 0);
415   DBUG_ASSERT(trn != &dummy_transaction_object);
416   DBUG_ASSERT(trn->locked_tables == 0 && trn->used_instances == 0);
417   DBUG_PRINT("info", ("mysql_mutex_lock LOCK_trn_list"));
418 
419   mysql_mutex_lock(&LOCK_trn_list);
420 
421   /* remove from active list */
422   trn->next->prev= trn->prev;
423   trn->prev->next= trn->next;
424 
425   /*
426     if trn was the oldest active transaction, now that it goes away there
427     may be committed transactions in the list which no active transaction
428     needs to bother about - clean up the committed list
429   */
430   if (trn->prev == &active_list_min)
431   {
432     uint free_me_count;
433     TRN *t;
434     for (t= committed_list_min.next, free_me_count= 0;
435          t->commit_trid < active_list_min.next->min_read_from;
436          t= t->next, free_me_count++) /* no-op */;
437 
438     DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) ||
439                 (t == committed_list_min.next && free_me_count == 0));
440     /* found transactions committed before the oldest active one */
441     if (t != committed_list_min.next)
442     {
443       free_me= committed_list_min.next;
444       committed_list_min.next= t;
445       t->prev->next= 0;
446       t->prev= &committed_list_min;
447       trnman_committed_transactions-= free_me_count;
448     }
449   }
450 
451   mysql_mutex_lock(&trn->state_lock);
452   if (commit)
453     trn->commit_trid= global_trid_generator;
454   wt_thd_release_self(trn);
455   mysql_mutex_unlock(&trn->state_lock);
456 
457   /*
458     if transaction is committed and it was not the only active transaction -
459     add it to the committed list
460   */
461   if (commit && active_list_min.next != &active_list_max)
462   {
463     trn->next= &committed_list_max;
464     trn->prev= committed_list_max.prev;
465     trnman_committed_transactions++;
466     committed_list_max.prev= trn->prev->next= trn;
467   }
468   else
469   {
470     trn->next= free_me;
471     free_me= trn;
472   }
473   trid_min_read_from= active_list_min.next->min_read_from;
474 
475   if ((*trnman_end_trans_hook)(trn, commit,
476                                active_list_min.next != &active_list_max))
477     res= -1;
478   trnman_active_transactions--;
479 
480   DBUG_PRINT("info", ("mysql_mutex_unlock LOCK_trn_list"));
481   mysql_mutex_unlock(&LOCK_trn_list);
482 
483   /*
484     the rest is done outside of a critical section
485 
486     note that we don't own trn anymore, it may be in a shared list now.
487     Thus, we cannot dereference it, and must use cached_short_id below.
488   */
489   my_atomic_storeptr((void **)&short_trid_to_active_trn[cached_short_id], 0);
490 
491   /*
492     we, under the mutex, removed going-in-free_me transactions from the
493     active and committed lists, thus nobody else may see them when it scans
494     those lists, and thus nobody may want to free them. Now we don't
495     need a mutex to access free_me list
496   */
497   /* QQ: send them to the purge thread */
498   while (free_me)
499   {
500     TRN *t= free_me;
501     free_me= free_me->next;
502 
503     /* ignore OOM. it's harmless, and we can do nothing here anyway */
504     (void)lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID));
505 
506     trnman_free_trn(t);
507   }
508 
509   lf_hash_put_pins(pins);
510 
511   DBUG_RETURN(res < 0);
512 }
513 
514 /*
515   free a trn (add to the pool, that is)
516   note - we can never really free() a TRN if there's at least one other
517   running transaction - see, e.g., how lock waits are implemented in
518   lockman.c
519   The same is true for other lock-free data structures too. We may need some
520   kind of FLUSH command to reset them all - ensuring that no transactions are
521   running. It may even be called automatically on checkpoints if no
522   transactions are running.
523 */
trnman_free_trn(TRN * trn)524 static void trnman_free_trn(TRN *trn)
525 {
526   /*
527      union is to solve strict aliasing issue.
528      without it gcc 3.4.3 doesn't notice that updating *(void **)&tmp
529      modifies the value of tmp.
530   */
531   union { TRN *trn; void *v; } tmp;
532 
533   DBUG_ASSERT(trn != &dummy_transaction_object);
534 
535   mysql_mutex_lock(&trn->state_lock);
536   trn->short_id= 0;
537   mysql_mutex_unlock(&trn->state_lock);
538 
539   tmp.trn= pool;
540 
541   do
542   {
543     /*
544       without this volatile cast gcc-3.4.4 moves the assignment
545       down after the loop at -O2
546     */
547     *(TRN * volatile *)&(trn->next)= tmp.trn;
548   } while (!my_atomic_casptr((void **)(char*)&pool, &tmp.v, trn));
549 }
550 
551 /*
552   NOTE
553     here we access the hash in a lock-free manner.
554     It's safe, a 'found' TRN can never be freed/reused before we access it.
555     In fact, it cannot be freed before 'trn' ends, because a 'found' TRN
556     can only be removed from the hash when:
557                 found->commit_trid < ALL (trn->min_read_from)
558     that is, at least
559                 found->commit_trid < trn->min_read_from
560     but
561                 found->trid >= trn->min_read_from
562     and
563                 found->commit_trid > found->trid
564 
565   RETURN
566     1   can
567     0   cannot
568    -1   error (OOM)
569 */
trnman_can_read_from(TRN * trn,TrID trid)570 int trnman_can_read_from(TRN *trn, TrID trid)
571 {
572   TRN **found;
573   my_bool can;
574 
575   if (trid < trn->min_read_from)
576     return 1; /* Row is visible by all transactions in the system */
577 
578   if (trid >= trn->trid)
579   {
580     /*
581       We have now two cases
582       trid > trn->trid, in which case the row is from a new transaction
583       and not visible, in which case we should return 0.
584       trid == trn->trid in which case the row is from the current transaction
585       and we should return 1
586     */
587     return trid == trn->trid;
588   }
589 
590   found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
591   if (found == NULL)
592     return 0; /* not in the hash of transactions = cannot read */
593   if (found == MY_ERRPTR)
594     return -1;
595 
596   can= (*found)->commit_trid < trn->trid;
597   lf_hash_search_unpin(trn->pins);
598   return can;
599 }
600 
601 /**
602   Finds a TRN by its TrID
603 
604   @param trn    current trn. Needed for pinning pointers (see lf_pin)
605   @param trid   trid to search for
606 
607   @return found trn or 0
608 
609   @note that trn is returned with its state locked!
610 */
trnman_trid_to_trn(TRN * trn,TrID trid)611 TRN *trnman_trid_to_trn(TRN *trn, TrID trid)
612 {
613   TRN **found;
614 
615   if (trid < trn->min_read_from)
616     return 0; /* it's committed eons ago */
617 
618   found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
619   if (found == NULL || found == MY_ERRPTR)
620     return 0; /* no luck */
621 
622   /* we've found something */
623   mysql_mutex_lock(&(*found)->state_lock);
624 
625   if ((*found)->short_id == 0)
626   {
627     mysql_mutex_unlock(&(*found)->state_lock);
628     lf_hash_search_unpin(trn->pins);
629     return 0; /* but it was a ghost */
630   }
631   lf_hash_search_unpin(trn->pins);
632 
633   /* Gotcha! */
634   return *found;
635 }
636 
637 /* TODO: the stubs below are waiting for savepoints to be implemented */
638 
trnman_new_statement(TRN * trn)639 void trnman_new_statement(TRN *trn __attribute__ ((unused)))
640 {
641 }
642 
trnman_rollback_statement(TRN * trn)643 void trnman_rollback_statement(TRN *trn __attribute__ ((unused)))
644 {
645 }
646 
647 
648 /**
649    @brief Allocates buffers and stores in them some info about transactions
650 
651    Does the allocation because the caller cannot know the size itself.
652    Memory freeing is to be done by the caller (if the "str" member of the
653    LEX_STRING is not NULL).
654    The caller has the intention of doing checkpoints.
655 
656    @param[out]  str_act    pointer to where the allocated buffer,
657                            and its size, will be put; buffer will be filled
658                            with info about active transactions
659    @param[out]  str_com    pointer to where the allocated buffer,
660                            and its size, will be put; buffer will be filled
661                            with info about committed transactions
662    @param[out]  min_first_undo_lsn pointer to where the minimum
663                            first_undo_lsn of all transactions will be put
664 
665    @return Operation status
666      @retval 0      OK
667      @retval 1      Error
668 */
669 
trnman_collect_transactions(LEX_STRING * str_act,LEX_STRING * str_com,LSN * min_rec_lsn,LSN * min_first_undo_lsn)670 my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
671                                     LSN *min_rec_lsn, LSN *min_first_undo_lsn)
672 {
673   my_bool error;
674   TRN *trn;
675   char *ptr;
676   uint stored_transactions= 0;
677   LSN minimum_rec_lsn= LSN_MAX, minimum_first_undo_lsn= LSN_MAX;
678   DBUG_ENTER("trnman_collect_transactions");
679 
680   DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str));
681 
682   /* validate the use of read_non_atomic() in general: */
683   compile_time_assert((sizeof(LSN) == 8) && (sizeof(LSN_WITH_FLAGS) == 8));
684   mysql_mutex_lock(&LOCK_trn_list);
685   str_act->length= 2 + /* number of active transactions */
686     LSN_STORE_SIZE + /* minimum of their rec_lsn */
687     TRANSID_SIZE + /* current TrID generator value */
688     (2 + /* short id */
689      6 + /* long id */
690      LSN_STORE_SIZE + /* undo_lsn */
691 #ifdef MARIA_VERSIONING /* not enabled yet */
692      LSN_STORE_SIZE + /* undo_purge_lsn */
693 #endif
694      LSN_STORE_SIZE /* first_undo_lsn */
695      ) * trnman_active_transactions;
696   str_com->length= 4 + /* number of committed transactions */
697     (6 + /* long id */
698 #ifdef MARIA_VERSIONING /* not enabled yet */
699      LSN_STORE_SIZE + /* undo_purge_lsn */
700 #endif
701      LSN_STORE_SIZE /* first_undo_lsn */
702      ) * trnman_committed_transactions;
703   if ((NULL == (str_act->str= my_malloc(PSI_INSTRUMENT_ME, str_act->length, MYF(MY_WME)))) ||
704       (NULL == (str_com->str= my_malloc(PSI_INSTRUMENT_ME, str_com->length, MYF(MY_WME)))))
705     goto err;
706   /* First, the active transactions */
707   ptr= str_act->str + 2 + LSN_STORE_SIZE;
708   transid_store(ptr, global_trid_generator);
709   ptr+= TRANSID_SIZE;
710   for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
711   {
712     uint sid;
713     LSN rec_lsn, undo_lsn, first_undo_lsn;
714     mysql_mutex_lock(&trn->state_lock);
715     sid= trn->short_id;
716     mysql_mutex_unlock(&trn->state_lock);
717     if (sid == 0)
718     {
719       /*
720         Not even inited, has done nothing. Or it is the
721         dummy_transaction_object, which does only non-transactional
722         immediate-sync operations (CREATE/DROP/RENAME/REPAIR TABLE), and so
723         can be forgotten for Checkpoint.
724       */
725       continue;
726     }
727     /* needed for low-water mark calculation */
728     if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) &&
729         (cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0))
730       minimum_rec_lsn= rec_lsn;
731     /*
732       If trn has not logged LOGREC_LONG_TRANSACTION_ID, this trn will be
733       discovered when seeing that log record which is for sure located after
734       checkpoint_start_log_horizon.
735     */
736     if ((LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn) &
737          TRANSACTION_LOGGED_LONG_ID) == 0)
738       continue;
739     /*
740       On the other hand, if undo_lsn is LSN_IMPOSSIBLE, trn may later log
741       records; so we must include trn in the checkpoint now, because we cannot
742       count on LOGREC_LONG_TRANSACTION_ID (as we are already past it).
743     */
744     undo_lsn= trn->undo_lsn;
745     stored_transactions++;
746     int2store(ptr, sid);
747     ptr+= 2;
748     int6store(ptr, trn->trid);
749     ptr+= 6;
750     lsn_store(ptr, undo_lsn); /* needed for rollback */
751     ptr+= LSN_STORE_SIZE;
752     /* needed for low-water mark calculation */
753     if (((first_undo_lsn= lsn_read_non_atomic(trn->first_undo_lsn)) > 0) &&
754         (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0))
755       minimum_first_undo_lsn= first_undo_lsn;
756     lsn_store(ptr, first_undo_lsn);
757     ptr+= LSN_STORE_SIZE;
758 #ifdef MARIA_VERSIONING /* not enabled yet */
759     /* to know where purging should start (last delete of this trn) */
760     lsn_store(ptr, trn->undo_purge_lsn);
761     ptr+= LSN_STORE_SIZE;
762 #endif
763     /**
764        @todo RECOVERY: add a comment explaining why we can dirtily read some
765        vars, inspired by the text of "assumption 8" in WL#3072
766     */
767   }
768   str_act->length= ptr - str_act->str; /* as we maybe over-estimated */
769   ptr= str_act->str;
770   DBUG_PRINT("info",("collected %u active transactions",
771                      (uint)stored_transactions));
772   int2store(ptr, stored_transactions);
773   ptr+= 2;
774   /* this LSN influences how REDOs for any page can be ignored by Recovery */
775   lsn_store(ptr, minimum_rec_lsn);
776   /* one day there will also be a list of prepared transactions */
777   /* do the same for committed ones */
778   ptr= str_com->str;
779   int4store(ptr, trnman_committed_transactions);
780   ptr+= 4;
781   DBUG_PRINT("info",("collected %u committed transactions",
782                      (uint)trnman_committed_transactions));
783   for (trn= committed_list_min.next; trn != &committed_list_max;
784        trn= trn->next)
785   {
786     LSN first_undo_lsn;
787     int6store(ptr, trn->trid);
788     ptr+= 6;
789 #ifdef MARIA_VERSIONING /* not enabled yet */
790     lsn_store(ptr, trn->undo_purge_lsn);
791     ptr+= LSN_STORE_SIZE;
792 #endif
793     first_undo_lsn= LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn);
794     if (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0)
795       minimum_first_undo_lsn= first_undo_lsn;
796     lsn_store(ptr, first_undo_lsn);
797     ptr+= LSN_STORE_SIZE;
798   }
799   /*
800     TODO: if we see there exists no transaction (active and committed) we can
801     tell the lock-free structures to do some freeing (my_free()).
802   */
803   error= 0;
804   *min_rec_lsn= minimum_rec_lsn;
805   *min_first_undo_lsn= minimum_first_undo_lsn;
806   goto end;
807 err:
808   error= 1;
809 end:
810   mysql_mutex_unlock(&LOCK_trn_list);
811   DBUG_RETURN(error);
812 }
813 
814 
trnman_recreate_trn_from_recovery(uint16 shortid,TrID longid)815 TRN *trnman_recreate_trn_from_recovery(uint16 shortid, TrID longid)
816 {
817   TrID old_trid_generator= global_trid_generator;
818   TRN *trn;
819   DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
820   global_trid_generator= longid-1; /* force a correct trid in the new trn */
821   if (unlikely((trn= trnman_new_trn(NULL)) == NULL))
822     return NULL;
823   /* deallocate excessive allocations of trnman_new_trn() */
824   global_trid_generator= old_trid_generator;
825   set_if_bigger(global_trid_generator, longid);
826   short_trid_to_active_trn[trn->short_id]= 0;
827   DBUG_ASSERT(short_trid_to_active_trn[shortid] == NULL);
828   short_trid_to_active_trn[shortid]= trn;
829   trn->short_id= shortid;
830   return trn;
831 }
832 
833 
trnman_get_any_trn()834 TRN *trnman_get_any_trn()
835 {
836   TRN *trn= active_list_min.next;
837   return (trn != &active_list_max) ? trn : NULL;
838 }
839 
840 
841 /**
842   Returns the minimum existing transaction id. May return a too small
843   number in race conditions, but this is ok as the value is used to
844   remove not visible transid from index/rows.
845 */
846 
trnman_get_min_trid()847 TrID trnman_get_min_trid()
848 {
849   return trid_min_read_from;
850 }
851 
852 
853 /**
854   Returns the minimum possible transaction id
855 
856   @notes
857   If there is no transactions running, returns number for next running
858   transaction.
859   If one has an active transaction, the returned number will be less or
860   equal to this.  If one is not running in a transaction one will ge the
861   number for the next started transaction.  This is used in create table
862   to get a safe minimum trid to use.
863 */
864 
trnman_get_min_safe_trid()865 TrID trnman_get_min_safe_trid()
866 {
867   TrID trid;
868   mysql_mutex_lock(&LOCK_trn_list);
869   trid= MY_MIN(active_list_min.next->min_read_from,
870             global_trid_generator);
871   mysql_mutex_unlock(&LOCK_trn_list);
872   return trid;
873 }
874 
875 
876 /**
877   Returns maximum transaction id given to a transaction so far.
878 */
879 
trnman_get_max_trid()880 TrID trnman_get_max_trid()
881 {
882   TrID id;
883   if (short_trid_to_active_trn == NULL)
884     return 0;
885   mysql_mutex_lock(&LOCK_trn_list);
886   id= global_trid_generator;
887   mysql_mutex_unlock(&LOCK_trn_list);
888   return id;
889 }
890 
891 /**
892   @brief Check if there exist an active transaction between two commit_id's
893 
894   @todo
895     Improve speed of this.
896       - Store transactions in tree or skip list
897       - Have function to copying all active transaction id's to b-tree
898         and use b-tree for checking states.  This could be a big win
899         for checkpoint that will call this function for a lot of objects.
900 
901   @return
902     0   No transaction exists
903     1   There is at least on active transaction in the given range
904 */
905 
trnman_exists_active_transactions(TrID min_id,TrID max_id,my_bool trnman_is_locked)906 my_bool trnman_exists_active_transactions(TrID min_id, TrID max_id,
907                                           my_bool trnman_is_locked)
908 {
909   TRN *trn;
910   my_bool ret= 0;
911 
912   if (!trnman_is_locked)
913     mysql_mutex_lock(&LOCK_trn_list);
914   mysql_mutex_assert_owner(&LOCK_trn_list);
915   for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
916   {
917     /*
918       We use <= for max_id as max_id is a commit_trid and trn->trid
919       is transaction id.  When calculating commit_trid we use the
920       current value of global_trid_generator.  global_trid_generator is
921       incremented for each new transaction.
922 
923       For example, assuming we have
924       min_id = 5
925       max_id = 10
926 
927       A trid of value 5 can't see the history event between 5 & 10
928       at it vas started before min_id 5 was committed.
929       A trid of value 10 can't see the next history event (max_id = 10)
930       as it started before this was committed. In this case it must use
931       the this event.
932     */
933     if (trn->trid > min_id && trn->trid <= max_id)
934     {
935       ret= 1;
936       break;
937     }
938   }
939   if (!trnman_is_locked)
940     mysql_mutex_unlock(&LOCK_trn_list);
941   return ret;
942 }
943 
944 
945 /**
946    lock transaction list
947 */
948 
trnman_lock()949 void trnman_lock()
950 {
951   mysql_mutex_lock(&LOCK_trn_list);
952 }
953 
954 
955 /**
956    unlock transaction list
957 */
958 
trnman_unlock()959 void trnman_unlock()
960 {
961   mysql_mutex_unlock(&LOCK_trn_list);
962 }
963 
964 
965 /**
966   Is trman initialized
967 */
968 
trman_is_inited()969 my_bool trman_is_inited()
970 {
971   return (short_trid_to_active_trn != NULL);
972 }
973