1 /* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15
16
17 #include <my_global.h>
18 #include <my_sys.h>
19 #include <m_string.h>
20 #include "trnman.h"
21 #include "ma_checkpoint.h"
22 #include "ma_control_file.h"
23
24 /*
25 status variables:
26 how many trns in the active list currently,
27 in the committed list currently, allocated since startup.
28 */
29 uint trnman_active_transactions, trnman_committed_transactions,
30 trnman_allocated_transactions;
31
32 #ifdef WORKAROUND_GCC_4_3_2_BUG
33 volatile
34 #endif
35 /* list of active transactions in the trid order */
36 static TRN active_list_min, active_list_max;
37 /* list of committed transactions in the trid order */
38 static TRN committed_list_min, committed_list_max;
39
40 /* a counter, used to generate transaction ids */
41 static TrID global_trid_generator;
42
43 /*
44 The minimum existing transaction id for trnman_get_min_trid()
45 The default value is used when transaction manager not initialize;
46 Probably called from maria_chk
47 */
48 static TrID trid_min_read_from= MAX_TRID;
49
50 /* the mutex for everything above */
51 static mysql_mutex_t LOCK_trn_list;
52
53 /* LIFO pool of unused TRN structured for reuse */
54 static TRN *pool;
55
56 /* a hash for committed transactions that maps trid to a TRN structure */
57 static LF_HASH trid_to_trn;
58
59 /* an array that maps short_id of an active transaction to a TRN structure */
60 static TRN **short_trid_to_active_trn;
61
62 /* locks for short_trid_to_active_trn and pool */
63 static my_bool default_trnman_end_trans_hook(TRN *, my_bool, my_bool);
64 static void trnman_free_trn(TRN *);
65
66 my_bool (*trnman_end_trans_hook)(TRN *, my_bool, my_bool)=
67 default_trnman_end_trans_hook;
68
69 /*
70 Simple interface functions
71 QQ: if they stay so simple, should we make them inline?
72 */
73
trnman_increment_locked_tables(TRN * trn)74 uint trnman_increment_locked_tables(TRN *trn)
75 {
76 return trn->locked_tables++;
77 }
78
trnman_has_locked_tables(TRN * trn)79 uint trnman_has_locked_tables(TRN *trn)
80 {
81 return trn->locked_tables;
82 }
83
trnman_decrement_locked_tables(TRN * trn)84 uint trnman_decrement_locked_tables(TRN *trn)
85 {
86 return --trn->locked_tables;
87 }
88
trnman_reset_locked_tables(TRN * trn,uint locked_tables)89 void trnman_reset_locked_tables(TRN *trn, uint locked_tables)
90 {
91 trn->locked_tables= locked_tables;
92 }
93
94 #ifdef EXTRA_DEBUG
trnman_get_flags(TRN * trn)95 uint16 trnman_get_flags(TRN *trn)
96 {
97 return trn->flags;
98 }
99
trnman_set_flags(TRN * trn,uint16 flags)100 void trnman_set_flags(TRN *trn, uint16 flags)
101 {
102 trn->flags= flags;
103 }
104 #endif
105
106 /** Wake up threads waiting for this transaction */
wt_thd_release_self(TRN * trn)107 static void wt_thd_release_self(TRN *trn)
108 {
109 if (trn->wt)
110 {
111 WT_RESOURCE_ID rc;
112 rc.type= &ma_rc_dup_unique;
113 rc.value= (intptr)trn;
114 wt_thd_release(trn->wt, & rc);
115 trn->wt= 0;
116 }
117 }
118
119 static my_bool
default_trnman_end_trans_hook(TRN * trn,my_bool commit,my_bool active_transactions)120 default_trnman_end_trans_hook(TRN *trn __attribute__ ((unused)),
121 my_bool commit __attribute__ ((unused)),
122 my_bool active_transactions
123 __attribute__ ((unused)))
124 {
125 return 0;
126 }
127
128
trn_get_hash_key(const uchar * trn,size_t * len,my_bool unused)129 static uchar *trn_get_hash_key(const uchar *trn, size_t *len,
130 my_bool unused __attribute__ ((unused)))
131 {
132 *len= sizeof(TrID);
133 return (uchar *) & ((*((TRN **)trn))->trid);
134 }
135
136
137 /**
138 @brief Initializes transaction manager.
139
140 @param initial_trid Generated TrIDs will start from initial_trid+1.
141
142 @return Operation status
143 @retval 0 OK
144 @retval !=0 Error
145 */
146
trnman_init(TrID initial_trid)147 int trnman_init(TrID initial_trid)
148 {
149 DBUG_ENTER("trnman_init");
150 DBUG_PRINT("enter", ("initial_trid: %lu", (ulong) initial_trid));
151
152 short_trid_to_active_trn= (TRN **)my_malloc(PSI_INSTRUMENT_ME, SHORT_TRID_MAX*sizeof(TRN*),
153 MYF(MY_WME|MY_ZEROFILL));
154 if (unlikely(!short_trid_to_active_trn))
155 DBUG_RETURN(1);
156 short_trid_to_active_trn--; /* min short_id is 1 */
157
158 /*
159 Initialize lists.
160 active_list_max.min_read_from must be larger than any trid,
161 so that when an active list is empty we would could free
162 all committed list.
163 And committed_list_max itself can not be freed so
164 committed_list_max.commit_trid must not be smaller that
165 active_list_max.min_read_from
166 */
167
168 active_list_max.trid= active_list_min.trid= 0;
169 active_list_max.min_read_from= MAX_TRID;
170 active_list_max.next= active_list_min.prev= 0;
171 active_list_max.prev= &active_list_min;
172 active_list_min.next= &active_list_max;
173
174 committed_list_max.commit_trid= MAX_TRID;
175 committed_list_max.next= committed_list_min.prev= 0;
176 committed_list_max.prev= &committed_list_min;
177 committed_list_min.next= &committed_list_max;
178
179 trnman_active_transactions= 0;
180 trnman_committed_transactions= 0;
181 trnman_allocated_transactions= 0;
182 /* This is needed for recovery and repair */
183 dummy_transaction_object.min_read_from= ~(TrID) 0;
184 dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID;
185
186 pool= 0;
187 global_trid_generator= initial_trid;
188 trid_min_read_from= initial_trid;
189 lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE,
190 0, 0, trn_get_hash_key, 0);
191 DBUG_PRINT("info", ("mysql_mutex_init LOCK_trn_list"));
192 mysql_mutex_init(key_LOCK_trn_list, &LOCK_trn_list, MY_MUTEX_INIT_FAST);
193
194 DBUG_RETURN(0);
195 }
196
197 /*
198 NOTE
199 this could only be called in the "idle" state - no transaction can be
200 running. See asserts below.
201 */
trnman_destroy()202 void trnman_destroy()
203 {
204 DBUG_ENTER("trnman_destroy");
205
206 if (short_trid_to_active_trn == NULL) /* trnman already destroyed */
207 DBUG_VOID_RETURN;
208 DBUG_ASSERT(trid_to_trn.count == 0);
209 DBUG_ASSERT(trnman_active_transactions == 0);
210 DBUG_ASSERT(trnman_committed_transactions == 0);
211 DBUG_ASSERT(active_list_max.prev == &active_list_min);
212 DBUG_ASSERT(active_list_min.next == &active_list_max);
213 DBUG_ASSERT(committed_list_max.prev == &committed_list_min);
214 DBUG_ASSERT(committed_list_min.next == &committed_list_max);
215 while (pool)
216 {
217 TRN *trn= pool;
218 pool= pool->next;
219 DBUG_ASSERT(trn->wt == NULL);
220 mysql_mutex_destroy(&trn->state_lock);
221 my_free(trn);
222 }
223 lf_hash_destroy(&trid_to_trn);
224 DBUG_PRINT("info", ("mysql_mutex_destroy LOCK_trn_list"));
225 mysql_mutex_destroy(&LOCK_trn_list);
226 my_free(short_trid_to_active_trn+1);
227 short_trid_to_active_trn= NULL;
228
229 DBUG_VOID_RETURN;
230 }
231
232 /*
233 NOTE
234 TrID is limited to 6 bytes. Initial value of the generator
235 is set by the recovery code - being read from the last checkpoint
236 (or 1 on a first run).
237 */
new_trid()238 static TrID new_trid()
239 {
240 DBUG_ENTER("new_trid");
241 DBUG_ASSERT(global_trid_generator < MAX_INTERNAL_TRID);
242 DBUG_PRINT("info", ("mysql_mutex_assert_owner LOCK_trn_list"));
243 mysql_mutex_assert_owner(&LOCK_trn_list);
244 DBUG_RETURN(++global_trid_generator);
245 }
246
get_short_trid(TRN * trn)247 static uint get_short_trid(TRN *trn)
248 {
249 int i= (int) ((global_trid_generator + (intptr)trn) * 312089 %
250 SHORT_TRID_MAX) + 1;
251 uint res=0;
252
253 for ( ; !res ; i= 1)
254 {
255 for ( ; i <= SHORT_TRID_MAX; i++) /* the range is [1..SHORT_TRID_MAX] */
256 {
257 void *tmp= NULL;
258 if (short_trid_to_active_trn[i] == NULL &&
259 my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn))
260 {
261 res= i;
262 break;
263 }
264 }
265 }
266 return res;
267 }
268
269 /**
270 Allocates and initialzies a new TRN object
271
272 @note the 'wt' parameter can only be 0 in a single-threaded code (or,
273 generally, where threads cannot block each other), otherwise the
274 first call to the deadlock detector will sigsegv.
275 */
276
trnman_new_trn(WT_THD * wt)277 TRN *trnman_new_trn(WT_THD *wt)
278 {
279 int res;
280 TRN *trn;
281 union { TRN *trn; void *v; } tmp;
282 DBUG_ENTER("trnman_new_trn");
283
284 /*
285 we have a mutex, to do simple things under it - allocate a TRN,
286 increment trnman_active_transactions, set trn->min_read_from.
287
288 Note that all the above is fast. generating short_id may be slow,
289 as it involves scanning a large array - so it's done outside of the
290 mutex.
291 */
292
293 DBUG_PRINT("info", ("mysql_mutex_lock LOCK_trn_list"));
294 mysql_mutex_lock(&LOCK_trn_list);
295
296 /* Allocating a new TRN structure */
297 tmp.trn= pool;
298 /*
299 Popping an unused TRN from the pool
300 (ABA isn't possible, we're behind a mutex
301 */
302 while (tmp.trn && !my_atomic_casptr((void **)(char*) &pool, &tmp.v,
303 (void *)tmp.trn->next))
304 /* no-op */;
305
306 /* Nothing in the pool ? Allocate a new one */
307 if (!(trn= tmp.trn))
308 {
309 /*
310 trn should be completely initialized at create time to allow
311 one to keep a known state on it.
312 (Like redo_lns, which is assumed to be 0 at start of row handling
313 and reset to zero before end of row handling)
314 */
315 trn= (TRN *)my_malloc(PSI_INSTRUMENT_ME, sizeof(TRN), MYF(MY_WME | MY_ZEROFILL));
316 if (unlikely(!trn))
317 {
318 DBUG_PRINT("info", ("mysql_mutex_unlock LOCK_trn_list"));
319 mysql_mutex_unlock(&LOCK_trn_list);
320 return 0;
321 }
322 trnman_allocated_transactions++;
323 mysql_mutex_init(key_TRN_state_lock, &trn->state_lock, MY_MUTEX_INIT_FAST);
324 }
325 trn->wt= wt;
326 trn->pins= lf_hash_get_pins(&trid_to_trn);
327 if (!trn->pins)
328 {
329 trnman_free_trn(trn);
330 mysql_mutex_unlock(&LOCK_trn_list);
331 return 0;
332 }
333
334 trnman_active_transactions++;
335
336 trn->min_read_from= active_list_min.next->trid;
337
338 trn->trid= new_trid();
339
340 trn->next= &active_list_max;
341 trn->prev= active_list_max.prev;
342 active_list_max.prev= trn->prev->next= trn;
343 trid_min_read_from= active_list_min.next->min_read_from;
344 DBUG_PRINT("info", ("mysql_mutex_unlock LOCK_trn_list"));
345 mysql_mutex_unlock(&LOCK_trn_list);
346
347 if (unlikely(!trn->min_read_from))
348 {
349 /*
350 We are the only transaction. Set min_read_from so that we can read
351 our own rows
352 */
353 trn->min_read_from= trn->trid + 1;
354 }
355
356 /* no other transaction can read changes done by this one */
357 trn->commit_trid= MAX_TRID;
358 trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0;
359 trn->used_tables= 0;
360 trn->used_instances= 0;
361
362 trn->locked_tables= 0;
363 trn->flags= 0;
364
365 /*
366 only after the following function TRN is considered initialized,
367 so it must be done the last
368 */
369 mysql_mutex_lock(&trn->state_lock);
370 trn->short_id= get_short_trid(trn);
371 mysql_mutex_unlock(&trn->state_lock);
372
373 res= lf_hash_insert(&trid_to_trn, trn->pins, &trn);
374 DBUG_ASSERT(res <= 0);
375 if (res)
376 {
377 trnman_end_trn(trn, 0);
378 return 0;
379 }
380
381 DBUG_PRINT("exit", ("trn: %p trid: 0x%lu min_read_from: 0x%lu",
382 trn, (ulong) trn->trid, (ulong) trn->min_read_from));
383
384 DBUG_RETURN(trn);
385 }
386
387 /*
388 remove a trn from the active list.
389 if necessary - move to committed list and set commit_trid
390
391 NOTE
392 Locks are released at the end. In particular, after placing the
393 transaction in commit list, and after setting commit_trid. It's
394 important, as commit_trid affects visibility. Locks don't affect
395 anything they simply delay execution of other threads - they could be
396 released arbitrarily late. In other words, when locks are released it
397 serves as a start banner for other threads, they start to run. So
398 everything they may need must be ready at that point.
399
400 RETURN
401 0 ok
402 1 error
403 */
trnman_end_trn(TRN * trn,my_bool commit)404 my_bool trnman_end_trn(TRN *trn, my_bool commit)
405 {
406 int res= 1;
407 uint16 cached_short_id= trn->short_id; /* we have to cache it, see below */
408 TRN *free_me= 0;
409 LF_PINS *pins= trn->pins;
410 DBUG_ENTER("trnman_end_trn");
411 DBUG_PRINT("enter", ("trn: %p commit: %d", trn, commit));
412
413 /* if a rollback, all UNDO records should have been executed */
414 DBUG_ASSERT(commit || trn->undo_lsn == 0);
415 DBUG_ASSERT(trn != &dummy_transaction_object);
416 DBUG_ASSERT(trn->locked_tables == 0 && trn->used_instances == 0);
417 DBUG_PRINT("info", ("mysql_mutex_lock LOCK_trn_list"));
418
419 mysql_mutex_lock(&LOCK_trn_list);
420
421 /* remove from active list */
422 trn->next->prev= trn->prev;
423 trn->prev->next= trn->next;
424
425 /*
426 if trn was the oldest active transaction, now that it goes away there
427 may be committed transactions in the list which no active transaction
428 needs to bother about - clean up the committed list
429 */
430 if (trn->prev == &active_list_min)
431 {
432 uint free_me_count;
433 TRN *t;
434 for (t= committed_list_min.next, free_me_count= 0;
435 t->commit_trid < active_list_min.next->min_read_from;
436 t= t->next, free_me_count++) /* no-op */;
437
438 DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) ||
439 (t == committed_list_min.next && free_me_count == 0));
440 /* found transactions committed before the oldest active one */
441 if (t != committed_list_min.next)
442 {
443 free_me= committed_list_min.next;
444 committed_list_min.next= t;
445 t->prev->next= 0;
446 t->prev= &committed_list_min;
447 trnman_committed_transactions-= free_me_count;
448 }
449 }
450
451 mysql_mutex_lock(&trn->state_lock);
452 if (commit)
453 trn->commit_trid= global_trid_generator;
454 wt_thd_release_self(trn);
455 mysql_mutex_unlock(&trn->state_lock);
456
457 /*
458 if transaction is committed and it was not the only active transaction -
459 add it to the committed list
460 */
461 if (commit && active_list_min.next != &active_list_max)
462 {
463 trn->next= &committed_list_max;
464 trn->prev= committed_list_max.prev;
465 trnman_committed_transactions++;
466 committed_list_max.prev= trn->prev->next= trn;
467 }
468 else
469 {
470 trn->next= free_me;
471 free_me= trn;
472 }
473 trid_min_read_from= active_list_min.next->min_read_from;
474
475 if ((*trnman_end_trans_hook)(trn, commit,
476 active_list_min.next != &active_list_max))
477 res= -1;
478 trnman_active_transactions--;
479
480 DBUG_PRINT("info", ("mysql_mutex_unlock LOCK_trn_list"));
481 mysql_mutex_unlock(&LOCK_trn_list);
482
483 /*
484 the rest is done outside of a critical section
485
486 note that we don't own trn anymore, it may be in a shared list now.
487 Thus, we cannot dereference it, and must use cached_short_id below.
488 */
489 my_atomic_storeptr((void **)&short_trid_to_active_trn[cached_short_id], 0);
490
491 /*
492 we, under the mutex, removed going-in-free_me transactions from the
493 active and committed lists, thus nobody else may see them when it scans
494 those lists, and thus nobody may want to free them. Now we don't
495 need a mutex to access free_me list
496 */
497 /* QQ: send them to the purge thread */
498 while (free_me)
499 {
500 TRN *t= free_me;
501 free_me= free_me->next;
502
503 /* ignore OOM. it's harmless, and we can do nothing here anyway */
504 (void)lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID));
505
506 trnman_free_trn(t);
507 }
508
509 lf_hash_put_pins(pins);
510
511 DBUG_RETURN(res < 0);
512 }
513
514 /*
515 free a trn (add to the pool, that is)
516 note - we can never really free() a TRN if there's at least one other
517 running transaction - see, e.g., how lock waits are implemented in
518 lockman.c
519 The same is true for other lock-free data structures too. We may need some
520 kind of FLUSH command to reset them all - ensuring that no transactions are
521 running. It may even be called automatically on checkpoints if no
522 transactions are running.
523 */
trnman_free_trn(TRN * trn)524 static void trnman_free_trn(TRN *trn)
525 {
526 /*
527 union is to solve strict aliasing issue.
528 without it gcc 3.4.3 doesn't notice that updating *(void **)&tmp
529 modifies the value of tmp.
530 */
531 union { TRN *trn; void *v; } tmp;
532
533 DBUG_ASSERT(trn != &dummy_transaction_object);
534
535 mysql_mutex_lock(&trn->state_lock);
536 trn->short_id= 0;
537 mysql_mutex_unlock(&trn->state_lock);
538
539 tmp.trn= pool;
540
541 do
542 {
543 /*
544 without this volatile cast gcc-3.4.4 moves the assignment
545 down after the loop at -O2
546 */
547 *(TRN * volatile *)&(trn->next)= tmp.trn;
548 } while (!my_atomic_casptr((void **)(char*)&pool, &tmp.v, trn));
549 }
550
551 /*
552 NOTE
553 here we access the hash in a lock-free manner.
554 It's safe, a 'found' TRN can never be freed/reused before we access it.
555 In fact, it cannot be freed before 'trn' ends, because a 'found' TRN
556 can only be removed from the hash when:
557 found->commit_trid < ALL (trn->min_read_from)
558 that is, at least
559 found->commit_trid < trn->min_read_from
560 but
561 found->trid >= trn->min_read_from
562 and
563 found->commit_trid > found->trid
564
565 RETURN
566 1 can
567 0 cannot
568 -1 error (OOM)
569 */
trnman_can_read_from(TRN * trn,TrID trid)570 int trnman_can_read_from(TRN *trn, TrID trid)
571 {
572 TRN **found;
573 my_bool can;
574
575 if (trid < trn->min_read_from)
576 return 1; /* Row is visible by all transactions in the system */
577
578 if (trid >= trn->trid)
579 {
580 /*
581 We have now two cases
582 trid > trn->trid, in which case the row is from a new transaction
583 and not visible, in which case we should return 0.
584 trid == trn->trid in which case the row is from the current transaction
585 and we should return 1
586 */
587 return trid == trn->trid;
588 }
589
590 found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
591 if (found == NULL)
592 return 0; /* not in the hash of transactions = cannot read */
593 if (found == MY_ERRPTR)
594 return -1;
595
596 can= (*found)->commit_trid < trn->trid;
597 lf_hash_search_unpin(trn->pins);
598 return can;
599 }
600
601 /**
602 Finds a TRN by its TrID
603
604 @param trn current trn. Needed for pinning pointers (see lf_pin)
605 @param trid trid to search for
606
607 @return found trn or 0
608
609 @note that trn is returned with its state locked!
610 */
trnman_trid_to_trn(TRN * trn,TrID trid)611 TRN *trnman_trid_to_trn(TRN *trn, TrID trid)
612 {
613 TRN **found;
614
615 if (trid < trn->min_read_from)
616 return 0; /* it's committed eons ago */
617
618 found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid));
619 if (found == NULL || found == MY_ERRPTR)
620 return 0; /* no luck */
621
622 /* we've found something */
623 mysql_mutex_lock(&(*found)->state_lock);
624
625 if ((*found)->short_id == 0)
626 {
627 mysql_mutex_unlock(&(*found)->state_lock);
628 lf_hash_search_unpin(trn->pins);
629 return 0; /* but it was a ghost */
630 }
631 lf_hash_search_unpin(trn->pins);
632
633 /* Gotcha! */
634 return *found;
635 }
636
637 /* TODO: the stubs below are waiting for savepoints to be implemented */
638
trnman_new_statement(TRN * trn)639 void trnman_new_statement(TRN *trn __attribute__ ((unused)))
640 {
641 }
642
trnman_rollback_statement(TRN * trn)643 void trnman_rollback_statement(TRN *trn __attribute__ ((unused)))
644 {
645 }
646
647
648 /**
649 @brief Allocates buffers and stores in them some info about transactions
650
651 Does the allocation because the caller cannot know the size itself.
652 Memory freeing is to be done by the caller (if the "str" member of the
653 LEX_STRING is not NULL).
654 The caller has the intention of doing checkpoints.
655
656 @param[out] str_act pointer to where the allocated buffer,
657 and its size, will be put; buffer will be filled
658 with info about active transactions
659 @param[out] str_com pointer to where the allocated buffer,
660 and its size, will be put; buffer will be filled
661 with info about committed transactions
662 @param[out] min_first_undo_lsn pointer to where the minimum
663 first_undo_lsn of all transactions will be put
664
665 @return Operation status
666 @retval 0 OK
667 @retval 1 Error
668 */
669
trnman_collect_transactions(LEX_STRING * str_act,LEX_STRING * str_com,LSN * min_rec_lsn,LSN * min_first_undo_lsn)670 my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
671 LSN *min_rec_lsn, LSN *min_first_undo_lsn)
672 {
673 my_bool error;
674 TRN *trn;
675 char *ptr;
676 uint stored_transactions= 0;
677 LSN minimum_rec_lsn= LSN_MAX, minimum_first_undo_lsn= LSN_MAX;
678 DBUG_ENTER("trnman_collect_transactions");
679
680 DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str));
681
682 /* validate the use of read_non_atomic() in general: */
683 compile_time_assert((sizeof(LSN) == 8) && (sizeof(LSN_WITH_FLAGS) == 8));
684 mysql_mutex_lock(&LOCK_trn_list);
685 str_act->length= 2 + /* number of active transactions */
686 LSN_STORE_SIZE + /* minimum of their rec_lsn */
687 TRANSID_SIZE + /* current TrID generator value */
688 (2 + /* short id */
689 6 + /* long id */
690 LSN_STORE_SIZE + /* undo_lsn */
691 #ifdef MARIA_VERSIONING /* not enabled yet */
692 LSN_STORE_SIZE + /* undo_purge_lsn */
693 #endif
694 LSN_STORE_SIZE /* first_undo_lsn */
695 ) * trnman_active_transactions;
696 str_com->length= 4 + /* number of committed transactions */
697 (6 + /* long id */
698 #ifdef MARIA_VERSIONING /* not enabled yet */
699 LSN_STORE_SIZE + /* undo_purge_lsn */
700 #endif
701 LSN_STORE_SIZE /* first_undo_lsn */
702 ) * trnman_committed_transactions;
703 if ((NULL == (str_act->str= my_malloc(PSI_INSTRUMENT_ME, str_act->length, MYF(MY_WME)))) ||
704 (NULL == (str_com->str= my_malloc(PSI_INSTRUMENT_ME, str_com->length, MYF(MY_WME)))))
705 goto err;
706 /* First, the active transactions */
707 ptr= str_act->str + 2 + LSN_STORE_SIZE;
708 transid_store(ptr, global_trid_generator);
709 ptr+= TRANSID_SIZE;
710 for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
711 {
712 uint sid;
713 LSN rec_lsn, undo_lsn, first_undo_lsn;
714 mysql_mutex_lock(&trn->state_lock);
715 sid= trn->short_id;
716 mysql_mutex_unlock(&trn->state_lock);
717 if (sid == 0)
718 {
719 /*
720 Not even inited, has done nothing. Or it is the
721 dummy_transaction_object, which does only non-transactional
722 immediate-sync operations (CREATE/DROP/RENAME/REPAIR TABLE), and so
723 can be forgotten for Checkpoint.
724 */
725 continue;
726 }
727 /* needed for low-water mark calculation */
728 if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) &&
729 (cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0))
730 minimum_rec_lsn= rec_lsn;
731 /*
732 If trn has not logged LOGREC_LONG_TRANSACTION_ID, this trn will be
733 discovered when seeing that log record which is for sure located after
734 checkpoint_start_log_horizon.
735 */
736 if ((LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn) &
737 TRANSACTION_LOGGED_LONG_ID) == 0)
738 continue;
739 /*
740 On the other hand, if undo_lsn is LSN_IMPOSSIBLE, trn may later log
741 records; so we must include trn in the checkpoint now, because we cannot
742 count on LOGREC_LONG_TRANSACTION_ID (as we are already past it).
743 */
744 undo_lsn= trn->undo_lsn;
745 stored_transactions++;
746 int2store(ptr, sid);
747 ptr+= 2;
748 int6store(ptr, trn->trid);
749 ptr+= 6;
750 lsn_store(ptr, undo_lsn); /* needed for rollback */
751 ptr+= LSN_STORE_SIZE;
752 /* needed for low-water mark calculation */
753 if (((first_undo_lsn= lsn_read_non_atomic(trn->first_undo_lsn)) > 0) &&
754 (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0))
755 minimum_first_undo_lsn= first_undo_lsn;
756 lsn_store(ptr, first_undo_lsn);
757 ptr+= LSN_STORE_SIZE;
758 #ifdef MARIA_VERSIONING /* not enabled yet */
759 /* to know where purging should start (last delete of this trn) */
760 lsn_store(ptr, trn->undo_purge_lsn);
761 ptr+= LSN_STORE_SIZE;
762 #endif
763 /**
764 @todo RECOVERY: add a comment explaining why we can dirtily read some
765 vars, inspired by the text of "assumption 8" in WL#3072
766 */
767 }
768 str_act->length= ptr - str_act->str; /* as we maybe over-estimated */
769 ptr= str_act->str;
770 DBUG_PRINT("info",("collected %u active transactions",
771 (uint)stored_transactions));
772 int2store(ptr, stored_transactions);
773 ptr+= 2;
774 /* this LSN influences how REDOs for any page can be ignored by Recovery */
775 lsn_store(ptr, minimum_rec_lsn);
776 /* one day there will also be a list of prepared transactions */
777 /* do the same for committed ones */
778 ptr= str_com->str;
779 int4store(ptr, trnman_committed_transactions);
780 ptr+= 4;
781 DBUG_PRINT("info",("collected %u committed transactions",
782 (uint)trnman_committed_transactions));
783 for (trn= committed_list_min.next; trn != &committed_list_max;
784 trn= trn->next)
785 {
786 LSN first_undo_lsn;
787 int6store(ptr, trn->trid);
788 ptr+= 6;
789 #ifdef MARIA_VERSIONING /* not enabled yet */
790 lsn_store(ptr, trn->undo_purge_lsn);
791 ptr+= LSN_STORE_SIZE;
792 #endif
793 first_undo_lsn= LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn);
794 if (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0)
795 minimum_first_undo_lsn= first_undo_lsn;
796 lsn_store(ptr, first_undo_lsn);
797 ptr+= LSN_STORE_SIZE;
798 }
799 /*
800 TODO: if we see there exists no transaction (active and committed) we can
801 tell the lock-free structures to do some freeing (my_free()).
802 */
803 error= 0;
804 *min_rec_lsn= minimum_rec_lsn;
805 *min_first_undo_lsn= minimum_first_undo_lsn;
806 goto end;
807 err:
808 error= 1;
809 end:
810 mysql_mutex_unlock(&LOCK_trn_list);
811 DBUG_RETURN(error);
812 }
813
814
trnman_recreate_trn_from_recovery(uint16 shortid,TrID longid)815 TRN *trnman_recreate_trn_from_recovery(uint16 shortid, TrID longid)
816 {
817 TrID old_trid_generator= global_trid_generator;
818 TRN *trn;
819 DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
820 global_trid_generator= longid-1; /* force a correct trid in the new trn */
821 if (unlikely((trn= trnman_new_trn(NULL)) == NULL))
822 return NULL;
823 /* deallocate excessive allocations of trnman_new_trn() */
824 global_trid_generator= old_trid_generator;
825 set_if_bigger(global_trid_generator, longid);
826 short_trid_to_active_trn[trn->short_id]= 0;
827 DBUG_ASSERT(short_trid_to_active_trn[shortid] == NULL);
828 short_trid_to_active_trn[shortid]= trn;
829 trn->short_id= shortid;
830 return trn;
831 }
832
833
trnman_get_any_trn()834 TRN *trnman_get_any_trn()
835 {
836 TRN *trn= active_list_min.next;
837 return (trn != &active_list_max) ? trn : NULL;
838 }
839
840
841 /**
842 Returns the minimum existing transaction id. May return a too small
843 number in race conditions, but this is ok as the value is used to
844 remove not visible transid from index/rows.
845 */
846
trnman_get_min_trid()847 TrID trnman_get_min_trid()
848 {
849 return trid_min_read_from;
850 }
851
852
853 /**
854 Returns the minimum possible transaction id
855
856 @notes
857 If there is no transactions running, returns number for next running
858 transaction.
859 If one has an active transaction, the returned number will be less or
860 equal to this. If one is not running in a transaction one will ge the
861 number for the next started transaction. This is used in create table
862 to get a safe minimum trid to use.
863 */
864
trnman_get_min_safe_trid()865 TrID trnman_get_min_safe_trid()
866 {
867 TrID trid;
868 mysql_mutex_lock(&LOCK_trn_list);
869 trid= MY_MIN(active_list_min.next->min_read_from,
870 global_trid_generator);
871 mysql_mutex_unlock(&LOCK_trn_list);
872 return trid;
873 }
874
875
876 /**
877 Returns maximum transaction id given to a transaction so far.
878 */
879
trnman_get_max_trid()880 TrID trnman_get_max_trid()
881 {
882 TrID id;
883 if (short_trid_to_active_trn == NULL)
884 return 0;
885 mysql_mutex_lock(&LOCK_trn_list);
886 id= global_trid_generator;
887 mysql_mutex_unlock(&LOCK_trn_list);
888 return id;
889 }
890
891 /**
892 @brief Check if there exist an active transaction between two commit_id's
893
894 @todo
895 Improve speed of this.
896 - Store transactions in tree or skip list
897 - Have function to copying all active transaction id's to b-tree
898 and use b-tree for checking states. This could be a big win
899 for checkpoint that will call this function for a lot of objects.
900
901 @return
902 0 No transaction exists
903 1 There is at least on active transaction in the given range
904 */
905
trnman_exists_active_transactions(TrID min_id,TrID max_id,my_bool trnman_is_locked)906 my_bool trnman_exists_active_transactions(TrID min_id, TrID max_id,
907 my_bool trnman_is_locked)
908 {
909 TRN *trn;
910 my_bool ret= 0;
911
912 if (!trnman_is_locked)
913 mysql_mutex_lock(&LOCK_trn_list);
914 mysql_mutex_assert_owner(&LOCK_trn_list);
915 for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
916 {
917 /*
918 We use <= for max_id as max_id is a commit_trid and trn->trid
919 is transaction id. When calculating commit_trid we use the
920 current value of global_trid_generator. global_trid_generator is
921 incremented for each new transaction.
922
923 For example, assuming we have
924 min_id = 5
925 max_id = 10
926
927 A trid of value 5 can't see the history event between 5 & 10
928 at it vas started before min_id 5 was committed.
929 A trid of value 10 can't see the next history event (max_id = 10)
930 as it started before this was committed. In this case it must use
931 the this event.
932 */
933 if (trn->trid > min_id && trn->trid <= max_id)
934 {
935 ret= 1;
936 break;
937 }
938 }
939 if (!trnman_is_locked)
940 mysql_mutex_unlock(&LOCK_trn_list);
941 return ret;
942 }
943
944
945 /**
946 lock transaction list
947 */
948
trnman_lock()949 void trnman_lock()
950 {
951 mysql_mutex_lock(&LOCK_trn_list);
952 }
953
954
955 /**
956 unlock transaction list
957 */
958
trnman_unlock()959 void trnman_unlock()
960 {
961 mysql_mutex_unlock(&LOCK_trn_list);
962 }
963
964
965 /**
966 Is trman initialized
967 */
968
trman_is_inited()969 my_bool trman_is_inited()
970 {
971 return (short_trid_to_active_trn != NULL);
972 }
973