1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 Without limiting anything contained in the foregoing, this file,
15 which is part of C Driver for MySQL (Connector/C), is also subject to the
16 Universal FOSS Exception, version 1.0, a copy of which can be found at
17 http://oss.oracle.com/licenses/universal-foss-exception.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License, version 2.0, for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
27
28 /**
29 @file
30 These functions handle keyblock cacheing for ISAM and MyISAM tables.
31
32 One cache can handle many files.
33 It must contain buffers of the same blocksize.
34 init_key_cache() should be used to init cache handler.
35
36 The free list (free_block_list) is a stack like structure.
37 When a block is freed by free_block(), it is pushed onto the stack.
38 When a new block is required it is first tried to pop one from the stack.
39 If the stack is empty, it is tried to get a never-used block from the pool.
40 If this is empty too, then a block is taken from the LRU ring, flushing it
41 to disk, if neccessary. This is handled in find_key_block().
42 With the new free list, the blocks can have three temperatures:
43 hot, warm and cold (which is free). This is remembered in the block header
44 by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
45 temperature is neccessary to correctly count the number of warm blocks,
46 which is required to decide when blocks are allowed to become hot. Whenever
47 a block is inserted to another (sub-)chain, we take the old and new
48 temperature into account to decide if we got one more or less warm block.
49 blocks_unused is the sum of never used blocks in the pool and of currently
50 free blocks. blocks_used is the number of blocks fetched from the pool and
51 as such gives the maximum number of in-use blocks at any time.
52 */
53
54 /*
55 Key Cache Locking
56 =================
57
58 All key cache locking is done with a single mutex per key cache:
59 keycache->cache_lock. This mutex is locked almost all the time
60 when executing code in this file (mf_keycache.c).
61 However it is released for I/O and some copy operations.
62
63 The cache_lock is also released when waiting for some event. Waiting
64 and signalling is done via condition variables. In most cases the
65 thread waits on its thread->suspend condition variable. Every thread
66 has a my_thread_var structure, which contains this variable and a
67 '*next' and '**prev' pointer. These pointers are used to insert the
68 thread into a wait queue.
69
70 A thread can wait for one block and thus be in one wait queue at a
71 time only.
72
73 Before starting to wait on its condition variable with
74 mysql_cond_wait(), the thread enters itself to a specific wait queue
75 with link_into_queue() (double linked with '*next' + '**prev') or
76 wait_on_queue() (single linked with '*next').
77
78 Another thread, when releasing a resource, looks up the waiting thread
79 in the related wait queue. It sends a signal with
80 mysql_cond_signal() to the waiting thread.
81
82 NOTE: Depending on the particular wait situation, either the sending
83 thread removes the waiting thread from the wait queue with
84 unlink_from_queue() or release_whole_queue() respectively, or the waiting
85 thread removes itself.
86
87 There is one exception from this locking scheme when one thread wants
88 to reuse a block for some other address. This works by first marking
89 the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all
90 threads that are reading the block to finish. Each block has a
91 reference to a condition variable (condvar). It holds a reference to
92 the thread->suspend condition variable for the waiting thread (if such
93 a thread exists). When that thread is signaled, the reference is
94 cleared. The number of readers of a block is registered in
95 block->hash_link->requests. See wait_for_readers() / remove_reader()
96 for details. This is similar to the above, but it clearly means that
97 only one thread can wait for a particular block. There is no queue in
98 this case. Strangely enough block->convar is used for waiting for the
99 assigned hash_link only. More precisely it is used to wait for all
100 requests to be unregistered from the assigned hash_link.
101
102 The resize_queue serves two purposes:
103 1. Threads that want to do a resize wait there if in_resize is set.
104 This is not used in the server. The server refuses a second resize
105 request if one is already active. keycache->in_init is used for the
106 synchronization. See set_var.cc.
107 2. Threads that want to access blocks during resize wait here during
108 the re-initialization phase.
109 When the resize is done, all threads on the queue are signalled.
110 Hypothetical resizers can compete for resizing, and read/write
111 requests will restart to request blocks from the freshly resized
112 cache. If the cache has been resized too small, it is disabled and
113 'can_be_used' is false. In this case read/write requests bypass the
114 cache. Since they increment and decrement 'cnt_for_resize_op', the
115 next resizer can wait on the queue 'waiting_for_resize_cnt' until all
116 I/O finished.
117 */
118
119 #include "mysys_priv.h"
120 #include "mysys_err.h"
121 #include <keycache.h>
122 #include "my_static.h"
123 #include <m_string.h>
124 #include <my_bit.h>
125 #include <errno.h>
126 #include <stdarg.h>
127 #include "probes_mysql.h"
128 #include "my_thread_local.h"
129
130 #define STRUCT_PTR(TYPE, MEMBER, a) \
131 (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
132
133 /* types of condition variables */
134 #define COND_FOR_REQUESTED 0
135 #define COND_FOR_SAVED 1
136 #define COND_FOR_READERS 2
137
138 typedef mysql_cond_t KEYCACHE_CONDVAR;
139
140 /* descriptor of the page in the key cache block buffer */
141 struct st_keycache_page
142 {
143 int file; /* file to which the page belongs to */
144 my_off_t filepos; /* position of the page in the file */
145 };
146 typedef struct st_keycache_page KEYCACHE_PAGE;
147
148 /* element in the chain of a hash table bucket */
149 struct st_hash_link
150 {
151 struct st_hash_link *next, **prev; /* to connect links in the same bucket */
152 struct st_block_link *block; /* reference to the block for the page: */
153 File file; /* from such a file */
154 my_off_t diskpos; /* with such an offset */
155 uint requests; /* number of requests for the page */
156 };
157
158 /* simple states of a block */
159 #define BLOCK_ERROR 1 /* an error occured when performing file i/o */
160 #define BLOCK_READ 2 /* file block is in the block buffer */
161 #define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */
162 #define BLOCK_REASSIGNED 8 /* blk does not accept requests for old page */
163 #define BLOCK_IN_FLUSH 16 /* block is selected for flush */
164 #define BLOCK_CHANGED 32 /* block buffer contains a dirty page */
165 #define BLOCK_IN_USE 64 /* block is not free */
166 #define BLOCK_IN_EVICTION 128 /* block is selected for eviction */
167 #define BLOCK_IN_FLUSHWRITE 256 /* block is in write to file */
168 #define BLOCK_FOR_UPDATE 512 /* block is selected for buffer modification */
169
170 /* page status, returned by find_key_block */
171 #define PAGE_READ 0
172 #define PAGE_TO_BE_READ 1
173 #define PAGE_WAIT_TO_BE_READ 2
174
175 /* block temperature determines in which (sub-)chain the block currently is */
176 enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };
177
178 /* key cache block */
179 struct st_block_link
180 {
181 struct st_block_link
182 *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
183 struct st_block_link
184 *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
185 struct st_hash_link *hash_link; /* backward ptr to referring hash_link */
186 KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */
187 uint requests; /* number of requests for the block */
188 uchar *buffer; /* buffer for the block page */
189 uint offset; /* beginning of modified data in the buffer */
190 uint length; /* end of data in the buffer */
191 uint status; /* state of the block */
192 enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
193 uint hits_left; /* number of hits left until promotion */
194 ulonglong last_hit_time; /* timestamp of the last hit */
195 KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
196 };
197
198 KEY_CACHE dflt_key_cache_var;
199 KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;
200
201 #define FLUSH_CACHE 2000 /* sort this many blocks at once */
202
203 static void change_key_cache_param(KEY_CACHE *keycache,
204 ulonglong division_limit,
205 ulonglong age_threshold);
206 static int flush_all_key_blocks(KEY_CACHE *keycache,
207 st_keycache_thread_var *thread_var);
208
209 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
210 mysql_mutex_t *mutex,
211 st_keycache_thread_var *thread);
212 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue);
213
214 static void free_block(KEY_CACHE *keycache,
215 st_keycache_thread_var *thread_var,
216 BLOCK_LINK *block);
217
218 #define KEYCACHE_HASH(f, pos) \
219 (((ulong) ((pos) / keycache->key_cache_block_size) + \
220 (ulong) (f)) & (keycache->hash_entries-1))
221 #define FILE_HASH(f) ((uint) (f) & (CHANGED_BLOCKS_HASH-1))
222
223 #define BLOCK_NUMBER(b) \
224 ((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK)))
225 #define HASH_LINK_NUMBER(h) \
226 ((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK)))
227
228 #if !defined(NDEBUG)
229 static int fail_block(BLOCK_LINK *block);
230 static int fail_hlink(HASH_LINK *hlink);
231 static int cache_empty(KEY_CACHE *keycache);
232 #endif
233
next_power(uint value)234 static inline uint next_power(uint value)
235 {
236 return (uint) my_round_up_to_next_power((uint32) value) << 1;
237 }
238
239
240 /*
241 Initialize a key cache
242
243 SYNOPSIS
244 init_key_cache()
245 keycache pointer to a key cache data structure
246 key_cache_block_size size of blocks to keep cached data
247 use_mem total memory to use for the key cache
248 division_limit division limit (may be zero)
249 age_threshold age threshold (may be zero)
250
251 RETURN VALUE
252 number of blocks in the key cache, if successful,
253 0 - otherwise.
254
255 NOTES.
256 if keycache->key_cache_inited != 0 we assume that the key cache
257 is already initialized. This is for now used by myisamchk, but shouldn't
258 be something that a program should rely on!
259
260 It's assumed that no two threads call this function simultaneously
261 referring to the same key cache handle.
262
263 */
264
init_key_cache(KEY_CACHE * keycache,ulonglong key_cache_block_size,size_t use_mem,ulonglong division_limit,ulonglong age_threshold)265 int init_key_cache(KEY_CACHE *keycache, ulonglong key_cache_block_size,
266 size_t use_mem, ulonglong division_limit,
267 ulonglong age_threshold)
268 {
269 ulong blocks, hash_links;
270 size_t length;
271 int error;
272 DBUG_ENTER("init_key_cache");
273 assert(key_cache_block_size >= 512);
274
275 if (keycache->key_cache_inited && keycache->disk_blocks > 0)
276 {
277 DBUG_PRINT("warning",("key cache already in use"));
278 DBUG_RETURN(0);
279 }
280
281 keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
282 keycache->global_cache_read= keycache->global_cache_write= 0;
283 keycache->disk_blocks= -1;
284 if (! keycache->key_cache_inited)
285 {
286 keycache->key_cache_inited= 1;
287 /*
288 Initialize these variables once only.
289 Their value must survive re-initialization during resizing.
290 */
291 keycache->in_resize= 0;
292 keycache->resize_in_flush= 0;
293 keycache->cnt_for_resize_op= 0;
294 keycache->waiting_for_resize_cnt.last_thread= NULL;
295 keycache->in_init= 0;
296 mysql_mutex_init(key_KEY_CACHE_cache_lock,
297 &keycache->cache_lock, MY_MUTEX_INIT_FAST);
298 keycache->resize_queue.last_thread= NULL;
299 }
300
301 keycache->key_cache_mem_size= use_mem;
302 keycache->key_cache_block_size= (uint)key_cache_block_size;
303 DBUG_PRINT("info", ("key_cache_block_size: %llu",
304 key_cache_block_size));
305
306 blocks= (ulong) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
307 sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
308 /* It doesn't make sense to have too few blocks (less than 8) */
309 if (blocks >= 8)
310 {
311 for ( ; ; )
312 {
313 /* Set my_hash_entries to the next bigger 2 power */
314 if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
315 keycache->hash_entries<<= 1;
316 hash_links= 2 * blocks;
317 while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
318 ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
319 ALIGN_SIZE(sizeof(HASH_LINK*) *
320 keycache->hash_entries))) +
321 ((size_t) blocks * keycache->key_cache_block_size) > use_mem)
322 blocks--;
323 /* Allocate memory for cache page buffers */
324 if ((keycache->block_mem=
325 my_large_malloc(key_memory_KEY_CACHE,
326 (size_t) blocks * keycache->key_cache_block_size,
327 MYF(0))))
328 {
329 /*
330 Allocate memory for blocks, hash_links and hash entries;
331 For each block 2 hash links are allocated
332 */
333 if ((keycache->block_root= (BLOCK_LINK*) my_malloc(key_memory_KEY_CACHE,
334 length,
335 MYF(0))))
336 break;
337 my_large_free(keycache->block_mem);
338 keycache->block_mem= 0;
339 }
340 if (blocks < 8)
341 {
342 set_my_errno(ENOMEM);
343 my_error(EE_OUTOFMEMORY, MYF(ME_FATALERROR),
344 blocks * keycache->key_cache_block_size);
345 goto err;
346 }
347 blocks= blocks / 4*3;
348 }
349 keycache->blocks_unused= blocks;
350 keycache->disk_blocks= (int) blocks;
351 keycache->hash_links= hash_links;
352 keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
353 ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
354 keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
355 ALIGN_SIZE((sizeof(HASH_LINK*) *
356 keycache->hash_entries)));
357 memset(keycache->block_root, 0,
358 keycache->disk_blocks * sizeof(BLOCK_LINK));
359 memset(keycache->hash_root, 0,
360 keycache->hash_entries * sizeof(HASH_LINK*));
361 memset(keycache->hash_link_root, 0,
362 keycache->hash_links * sizeof(HASH_LINK));
363 keycache->hash_links_used= 0;
364 keycache->free_hash_list= NULL;
365 keycache->blocks_used= keycache->blocks_changed= 0;
366
367 keycache->global_blocks_changed= 0;
368 keycache->blocks_available=0; /* For debugging */
369
370 /* The LRU chain is empty after initialization */
371 keycache->used_last= NULL;
372 keycache->used_ins= NULL;
373 keycache->free_block_list= NULL;
374 keycache->keycache_time= 0;
375 keycache->warm_blocks= 0;
376 keycache->min_warm_blocks= (division_limit ?
377 blocks * division_limit / 100 + 1 :
378 blocks);
379 keycache->age_threshold= (age_threshold ?
380 blocks * age_threshold / 100 :
381 blocks);
382
383 keycache->can_be_used= 1;
384
385 keycache->waiting_for_hash_link.last_thread= NULL;
386 keycache->waiting_for_block.last_thread= NULL;
387 DBUG_PRINT("exit",
388 ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\
389 hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx",
390 keycache->disk_blocks, (long) keycache->block_root,
391 keycache->hash_entries, (long) keycache->hash_root,
392 keycache->hash_links, (long) keycache->hash_link_root));
393 memset(keycache->changed_blocks, 0,
394 sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
395 memset(keycache->file_blocks, 0,
396 sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
397 }
398 else
399 {
400 /* key_buffer_size is specified too small. Disable the cache. */
401 keycache->can_be_used= 0;
402 }
403
404 keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
405 DBUG_RETURN((int) keycache->disk_blocks);
406
407 err:
408 error= my_errno();
409 keycache->disk_blocks= 0;
410 keycache->blocks= 0;
411 if (keycache->block_mem)
412 {
413 my_large_free((uchar*) keycache->block_mem);
414 keycache->block_mem= NULL;
415 }
416 if (keycache->block_root)
417 {
418 my_free(keycache->block_root);
419 keycache->block_root= NULL;
420 }
421 set_my_errno(error);
422 keycache->can_be_used= 0;
423 DBUG_RETURN(0);
424 }
425
426
427 /*
428 Resize a key cache
429
430 SYNOPSIS
431 resize_key_cache()
432 keycache pointer to a key cache data structure
433 thread_var pointer to thread specific variables
434 key_cache_block_size size of blocks to keep cached data
435 use_mem total memory to use for the new key cache
436 division_limit new division limit (if not zero)
437 age_threshold new age threshold (if not zero)
438
439 RETURN VALUE
440 number of blocks in the key cache, if successful,
441 0 - otherwise.
442
443 NOTES.
444 The function first compares the memory size and the block size parameters
445 with the key cache values.
446
447 If they differ the function free the the memory allocated for the
448 old key cache blocks by calling the end_key_cache function and
449 then rebuilds the key cache with new blocks by calling
450 init_key_cache.
451
452 The function starts the operation only when all other threads
453 performing operations with the key cache let her to proceed
454 (when cnt_for_resize=0).
455 */
456
resize_key_cache(KEY_CACHE * keycache,st_keycache_thread_var * thread_var,ulonglong key_cache_block_size,size_t use_mem,ulonglong division_limit,ulonglong age_threshold)457 int resize_key_cache(KEY_CACHE *keycache,
458 st_keycache_thread_var *thread_var,
459 ulonglong key_cache_block_size,
460 size_t use_mem, ulonglong division_limit,
461 ulonglong age_threshold)
462 {
463 int blocks;
464 DBUG_ENTER("resize_key_cache");
465
466 if (!keycache->key_cache_inited)
467 DBUG_RETURN(keycache->disk_blocks);
468
469 if(key_cache_block_size == keycache->key_cache_block_size &&
470 use_mem == keycache->key_cache_mem_size)
471 {
472 change_key_cache_param(keycache, division_limit, age_threshold);
473 DBUG_RETURN(keycache->disk_blocks);
474 }
475
476 mysql_mutex_lock(&keycache->cache_lock);
477
478 /*
479 We may need to wait for another thread which is doing a resize
480 already. This cannot happen in the MySQL server though. It allows
481 one resizer only. In set_var.cc keycache->in_init is used to block
482 multiple attempts.
483 */
484 while (keycache->in_resize)
485 {
486 /* purecov: begin inspected */
487 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock,
488 thread_var);
489 /* purecov: end */
490 }
491
492 /*
493 Mark the operation in progress. This blocks other threads from doing
494 a resize in parallel. It prohibits new blocks to enter the cache.
495 Read/write requests can bypass the cache during the flush phase.
496 */
497 keycache->in_resize= 1;
498
499 /* Need to flush only if keycache is enabled. */
500 if (keycache->can_be_used)
501 {
502 /* Start the flush phase. */
503 keycache->resize_in_flush= 1;
504
505 if (flush_all_key_blocks(keycache, thread_var))
506 {
507 /* TODO: if this happens, we should write a warning in the log file ! */
508 keycache->resize_in_flush= 0;
509 blocks= 0;
510 keycache->can_be_used= 0;
511 goto finish;
512 }
513 assert(cache_empty(keycache));
514
515 /* End the flush phase. */
516 keycache->resize_in_flush= 0;
517 }
518
519 /*
520 Some direct read/write operations (bypassing the cache) may still be
521 unfinished. Wait until they are done. If the key cache can be used,
522 direct I/O is done in increments of key_cache_block_size. That is,
523 every block is checked if it is in the cache. We need to wait for
524 pending I/O before re-initializing the cache, because we may change
525 the block size. Otherwise they could check for blocks at file
526 positions where the new block division has none. We do also want to
527 wait for I/O done when (if) the cache was disabled. It must not
528 run in parallel with normal cache operation.
529 */
530 while (keycache->cnt_for_resize_op)
531 wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock,
532 thread_var);
533
534 /*
535 Free old cache structures, allocate new structures, and initialize
536 them. Note that the cache_lock mutex and the resize_queue are left
537 untouched. We do not lose the cache_lock and will release it only at
538 the end of this function.
539 */
540 end_key_cache(keycache, 0); /* Don't free mutex */
541 /* The following will work even if use_mem is 0 */
542 blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
543 division_limit, age_threshold);
544
545 finish:
546 /*
547 Mark the resize finished. This allows other threads to start a
548 resize or to request new cache blocks.
549 */
550 keycache->in_resize= 0;
551
552 /* Signal waiting threads. */
553 release_whole_queue(&keycache->resize_queue);
554
555 mysql_mutex_unlock(&keycache->cache_lock);
556 DBUG_RETURN(blocks);
557 }
558
559
560 /*
561 Increment counter blocking resize key cache operation
562 */
inc_counter_for_resize_op(KEY_CACHE * keycache)563 static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
564 {
565 keycache->cnt_for_resize_op++;
566 }
567
568
569 /*
570 Decrement counter blocking resize key cache operation;
571 Signal the operation to proceed when counter becomes equal zero
572 */
dec_counter_for_resize_op(KEY_CACHE * keycache)573 static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
574 {
575 if (!--keycache->cnt_for_resize_op)
576 release_whole_queue(&keycache->waiting_for_resize_cnt);
577 }
578
579 /*
580 Change the key cache parameters
581
582 SYNOPSIS
583 change_key_cache_param()
584 keycache pointer to a key cache data structure
585 division_limit new division limit (if not zero)
586 age_threshold new age threshold (if not zero)
587
588 RETURN VALUE
589 none
590
591 NOTES.
592 Presently the function resets the key cache parameters
593 concerning midpoint insertion strategy - division_limit and
594 age_threshold.
595 */
596
change_key_cache_param(KEY_CACHE * keycache,ulonglong division_limit,ulonglong age_threshold)597 static void change_key_cache_param(KEY_CACHE *keycache,
598 ulonglong division_limit,
599 ulonglong age_threshold)
600 {
601 DBUG_ENTER("change_key_cache_param");
602
603 mysql_mutex_lock(&keycache->cache_lock);
604 if (division_limit)
605 keycache->min_warm_blocks= (keycache->disk_blocks *
606 division_limit / 100 + 1);
607 if (age_threshold)
608 keycache->age_threshold= (keycache->disk_blocks *
609 age_threshold / 100);
610 mysql_mutex_unlock(&keycache->cache_lock);
611 DBUG_VOID_RETURN;
612 }
613
614
615 /*
616 Remove key_cache from memory
617
618 SYNOPSIS
619 end_key_cache()
620 keycache key cache handle
621 cleanup Complete free (Free also mutex for key cache)
622
623 RETURN VALUE
624 none
625 */
626
end_key_cache(KEY_CACHE * keycache,my_bool cleanup)627 void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
628 {
629 DBUG_ENTER("end_key_cache");
630 DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache));
631
632 if (!keycache->key_cache_inited)
633 DBUG_VOID_RETURN;
634
635 if (keycache->disk_blocks > 0)
636 {
637 if (keycache->block_mem)
638 {
639 my_large_free((uchar*) keycache->block_mem);
640 keycache->block_mem= NULL;
641 my_free(keycache->block_root);
642 keycache->block_root= NULL;
643 }
644 keycache->disk_blocks= -1;
645 /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
646 keycache->blocks_changed= 0;
647 }
648
649 DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
650 "writes: %lu r_requests: %lu reads: %lu",
651 keycache->blocks_used, keycache->global_blocks_changed,
652 (ulong) keycache->global_cache_w_requests,
653 (ulong) keycache->global_cache_write,
654 (ulong) keycache->global_cache_r_requests,
655 (ulong) keycache->global_cache_read));
656
657 /*
658 Reset these values to be able to detect a disabled key cache.
659 See Bug#44068 (RESTORE can disable the MyISAM Key Cache).
660 */
661 keycache->blocks_used= 0;
662 keycache->blocks_unused= 0;
663
664 if (cleanup)
665 {
666 mysql_mutex_destroy(&keycache->cache_lock);
667 keycache->key_cache_inited= keycache->can_be_used= 0;
668 }
669 DBUG_VOID_RETURN;
670 } /* end_key_cache */
671
672
673 /**
674 Link a thread into double-linked queue of waiting threads.
675
676 @param wqueue pointer to the queue structure
677 @param thread pointer to the keycache variables for the
678 thread to be added to the queue
679
680 Queue is represented by a circular list of the keycache variable structures.
681 Since each thread has its own keycache variables, this is equal to a list
682 of threads. The list is double-linked of the type (**prev,*next), accessed by
683 a pointer to the last element.
684 */
685
link_into_queue(KEYCACHE_WQUEUE * wqueue,st_keycache_thread_var * thread)686 static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
687 st_keycache_thread_var *thread)
688 {
689 st_keycache_thread_var *last;
690
691 assert(!thread->next && !thread->prev);
692 if (! (last= wqueue->last_thread))
693 {
694 /* Queue is empty */
695 thread->next= thread;
696 thread->prev= &thread->next;
697 }
698 else
699 {
700 thread->prev= last->next->prev;
701 last->next->prev= &thread->next;
702 thread->next= last->next;
703 last->next= thread;
704 }
705 wqueue->last_thread= thread;
706 }
707
708
709 /**
710 Unlink a thread from double-linked queue of waiting threads
711
712 @param wqueue pointer to the queue structure
713 @param thread pointer to the keycache variables for the
714 thread to be removed to the queue
715
716 @note See link_into_queue
717 */
718
unlink_from_queue(KEYCACHE_WQUEUE * wqueue,st_keycache_thread_var * thread)719 static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
720 st_keycache_thread_var *thread)
721 {
722 assert(thread->next && thread->prev);
723 if (thread->next == thread)
724 /* The queue contains only one member */
725 wqueue->last_thread= NULL;
726 else
727 {
728 thread->next->prev= thread->prev;
729 *thread->prev=thread->next;
730 if (wqueue->last_thread == thread)
731 wqueue->last_thread= STRUCT_PTR(st_keycache_thread_var, next,
732 thread->prev);
733 }
734 thread->next= NULL;
735 #if !defined(NDEBUG)
736 /*
737 This makes it easier to see it's not in a chain during debugging.
738 And some assert() rely on it.
739 */
740 thread->prev= NULL;
741 #endif
742 }
743
744
745 /*
746 Add a thread to single-linked queue of waiting threads
747
748 SYNOPSIS
749 wait_on_queue()
750 wqueue Pointer to the queue structure.
751 mutex Cache_lock to acquire after awake.
752 thread Thread to be added
753
754 RETURN VALUE
755 none
756
757 NOTES.
758 Queue is represented by a circular list of the thread structures
759 The list is single-linked of the type (*next), accessed by a pointer
760 to the last element.
761
762 The function protects against stray signals by verifying that the
763 current thread is unlinked from the queue when awaking. However,
764 since several threads can wait for the same event, it might be
765 necessary for the caller of the function to check again if the
766 condition for awake is indeed matched.
767 */
768
wait_on_queue(KEYCACHE_WQUEUE * wqueue,mysql_mutex_t * mutex,st_keycache_thread_var * thread)769 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
770 mysql_mutex_t *mutex,
771 st_keycache_thread_var *thread)
772 {
773 st_keycache_thread_var *last;
774
775 /* Add to queue. */
776 assert(!thread->next);
777 assert(!thread->prev); /* Not required, but must be true anyway. */
778 if (! (last= wqueue->last_thread))
779 thread->next= thread;
780 else
781 {
782 thread->next= last->next;
783 last->next= thread;
784 }
785 wqueue->last_thread= thread;
786
787 /*
788 Wait until thread is removed from queue by the signalling thread.
789 The loop protects against stray signals.
790 */
791 do
792 {
793 mysql_cond_wait(&thread->suspend, mutex);
794 }
795 while (thread->next);
796 }
797
798
799 /*
800 Remove all threads from queue signaling them to proceed
801
802 SYNOPSIS
803 release_whole_queue()
804 wqueue pointer to the queue structure
805
806 RETURN VALUE
807 none
808
809 NOTES.
810 See notes for wait_on_queue().
811 When removed from the queue each thread is signaled via condition
812 variable thread->suspend.
813 */
814
release_whole_queue(KEYCACHE_WQUEUE * wqueue)815 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
816 {
817 st_keycache_thread_var *last;
818 st_keycache_thread_var *next;
819 st_keycache_thread_var *thread;
820
821 /* Queue may be empty. */
822 if (!(last= wqueue->last_thread))
823 return;
824
825 next= last->next;
826 do
827 {
828 thread=next;
829 /* Signal the thread. */
830 mysql_cond_signal(&thread->suspend);
831 /* Take thread from queue. */
832 next=thread->next;
833 thread->next= NULL;
834 }
835 while (thread != last);
836
837 /* Now queue is definitely empty. */
838 wqueue->last_thread= NULL;
839 }
840
841
842 /*
843 Unlink a block from the chain of dirty/clean blocks
844 */
845
unlink_changed(BLOCK_LINK * block)846 static inline void unlink_changed(BLOCK_LINK *block)
847 {
848 assert(block->prev_changed && *block->prev_changed == block);
849 if (block->next_changed)
850 block->next_changed->prev_changed= block->prev_changed;
851 *block->prev_changed= block->next_changed;
852
853 #if !defined(NDEBUG)
854 /*
855 This makes it easier to see it's not in a chain during debugging.
856 And some assert() rely on it.
857 */
858 block->next_changed= NULL;
859 block->prev_changed= NULL;
860 #endif
861 }
862
863
864 /*
865 Link a block into the chain of dirty/clean blocks
866 */
867
link_changed(BLOCK_LINK * block,BLOCK_LINK ** phead)868 static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
869 {
870 assert(!block->next_changed);
871 assert(!block->prev_changed);
872 block->prev_changed= phead;
873 if ((block->next_changed= *phead))
874 (*phead)->prev_changed= &block->next_changed;
875 *phead= block;
876 }
877
878
879 /*
880 Link a block in a chain of clean blocks of a file.
881
882 SYNOPSIS
883 link_to_file_list()
884 keycache Key cache handle
885 block Block to relink
886 file File to be linked to
887 unlink If to unlink first
888
889 DESCRIPTION
890 Unlink a block from whichever chain it is linked in, if it's
891 asked for, and link it to the chain of clean blocks of the
892 specified file.
893
894 NOTE
895 Please do never set/clear BLOCK_CHANGED outside of
896 link_to_file_list() or link_to_changed_list().
897 You would risk to damage correct counting of changed blocks
898 and to find blocks in the wrong hash.
899
900 RETURN
901 void
902 */
903
link_to_file_list(KEY_CACHE * keycache,BLOCK_LINK * block,int file,my_bool unlink_block)904 static void link_to_file_list(KEY_CACHE *keycache,
905 BLOCK_LINK *block, int file,
906 my_bool unlink_block)
907 {
908 assert(block->status & BLOCK_IN_USE);
909 assert(block->hash_link && block->hash_link->block == block);
910 assert(block->hash_link->file == file);
911 if (unlink_block)
912 unlink_changed(block);
913 link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
914 if (block->status & BLOCK_CHANGED)
915 {
916 block->status&= ~BLOCK_CHANGED;
917 keycache->blocks_changed--;
918 keycache->global_blocks_changed--;
919 }
920 }
921
922
923 /*
924 Re-link a block from the clean chain to the dirty chain of a file.
925
926 SYNOPSIS
927 link_to_changed_list()
928 keycache key cache handle
929 block block to relink
930
931 DESCRIPTION
932 Unlink a block from the chain of clean blocks of a file
933 and link it to the chain of dirty blocks of the same file.
934
935 NOTE
936 Please do never set/clear BLOCK_CHANGED outside of
937 link_to_file_list() or link_to_changed_list().
938 You would risk to damage correct counting of changed blocks
939 and to find blocks in the wrong hash.
940
941 RETURN
942 void
943 */
944
link_to_changed_list(KEY_CACHE * keycache,BLOCK_LINK * block)945 static void link_to_changed_list(KEY_CACHE *keycache,
946 BLOCK_LINK *block)
947 {
948 assert(block->status & BLOCK_IN_USE);
949 assert(!(block->status & BLOCK_CHANGED));
950 assert(block->hash_link && block->hash_link->block == block);
951
952 unlink_changed(block);
953 link_changed(block,
954 &keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
955 block->status|=BLOCK_CHANGED;
956 keycache->blocks_changed++;
957 keycache->global_blocks_changed++;
958 }
959
960
961 /*
962 Link a block to the LRU chain at the beginning or at the end of
963 one of two parts.
964
965 SYNOPSIS
966 link_block()
967 keycache pointer to a key cache data structure
968 block pointer to the block to link to the LRU chain
969 hot <-> to link the block into the hot subchain
970 at_end <-> to link the block at the end of the subchain
971
972 RETURN VALUE
973 none
974
975 NOTES.
976 The LRU ring is represented by a circular list of block structures.
977 The list is double-linked of the type (**prev,*next) type.
978 The LRU ring is divided into two parts - hot and warm.
979 There are two pointers to access the last blocks of these two
980 parts. The beginning of the warm part follows right after the
981 end of the hot part.
982 Only blocks of the warm part can be used for eviction.
983 The first block from the beginning of this subchain is always
984 taken for eviction (keycache->last_used->next)
985
986 LRU chain: +------+ H O T +------+
987 +----| end |----...<----| beg |----+
988 | +------+last +------+ |
989 v<-link in latest hot (new end) |
990 | link in latest warm (new end)->^
991 | +------+ W A R M +------+ |
992 +----| beg |---->...----| end |----+
993 +------+ +------+ins
994 first for eviction
995
996 It is also possible that the block is selected for eviction and thus
997 not linked in the LRU ring.
998 */
999
link_block(KEY_CACHE * keycache,BLOCK_LINK * block,my_bool hot,my_bool at_end)1000 static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot,
1001 my_bool at_end)
1002 {
1003 BLOCK_LINK *ins;
1004 BLOCK_LINK **pins;
1005
1006 assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1007 assert(block->hash_link); /*backptr to block NULL from free_block()*/
1008 assert(!block->requests);
1009 assert(block->prev_changed && *block->prev_changed == block);
1010 assert(!block->next_used);
1011 assert(!block->prev_used);
1012
1013 if (!hot && keycache->waiting_for_block.last_thread)
1014 {
1015 /* Signal that in the LRU warm sub-chain an available block has appeared */
1016 st_keycache_thread_var *last_thread=
1017 keycache->waiting_for_block.last_thread;
1018 st_keycache_thread_var *first_thread= last_thread->next;
1019 st_keycache_thread_var *next_thread= first_thread;
1020 HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
1021 st_keycache_thread_var *thread;
1022 do
1023 {
1024 thread= next_thread;
1025 next_thread= thread->next;
1026 /*
1027 We notify about the event all threads that ask
1028 for the same page as the first thread in the queue
1029 */
1030 if ((HASH_LINK *) thread->opt_info == hash_link)
1031 {
1032 mysql_cond_signal(&thread->suspend);
1033 unlink_from_queue(&keycache->waiting_for_block, thread);
1034 block->requests++;
1035 }
1036 }
1037 while (thread != last_thread);
1038 hash_link->block= block;
1039 /*
1040 NOTE: We assigned the block to the hash_link and signalled the
1041 requesting thread(s). But it is possible that other threads runs
1042 first. These threads see the hash_link assigned to a block which
1043 is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
1044 This can be a problem for functions that do not select the block
1045 via its hash_link: flush and free. They do only see a block which
1046 is in a "normal" state and don't know that it will be evicted soon.
1047
1048 We cannot set BLOCK_IN_SWITCH here because only one of the
1049 requesting threads must handle the eviction. All others must wait
1050 for it to complete. If we set the flag here, the threads would not
1051 know who is in charge of the eviction. Without the flag, the first
1052 thread takes the stick and sets the flag.
1053
1054 But we need to note in the block that is has been selected for
1055 eviction. It must not be freed. The evicting thread will not
1056 expect the block in the free list. Before freeing we could also
1057 check if block->requests > 1. But I think including another flag
1058 in the check of block->status is slightly more efficient and
1059 probably easier to read.
1060 */
1061 block->status|= BLOCK_IN_EVICTION;
1062 return;
1063 }
1064
1065 pins= hot ? &keycache->used_ins : &keycache->used_last;
1066 ins= *pins;
1067 if (ins)
1068 {
1069 ins->next_used->prev_used= &block->next_used;
1070 block->next_used= ins->next_used;
1071 block->prev_used= &ins->next_used;
1072 ins->next_used= block;
1073 if (at_end)
1074 *pins= block;
1075 }
1076 else
1077 {
1078 /* The LRU ring is empty. Let the block point to itself. */
1079 keycache->used_last= keycache->used_ins= block->next_used= block;
1080 block->prev_used= &block->next_used;
1081 }
1082 assert((ulong) keycache->blocks_available <=
1083 keycache->blocks_used);
1084 }
1085
1086
1087 /*
1088 Unlink a block from the LRU chain
1089
1090 SYNOPSIS
1091 unlink_block()
1092 keycache pointer to a key cache data structure
1093 block pointer to the block to unlink from the LRU chain
1094
1095 RETURN VALUE
1096 none
1097
1098 NOTES.
1099 See NOTES for link_block
1100 */
1101
unlink_block(KEY_CACHE * keycache,BLOCK_LINK * block)1102 static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1103 {
1104 assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1105 assert(block->hash_link); /*backptr to block NULL from free_block()*/
1106 assert(!block->requests);
1107 assert(block->prev_changed && *block->prev_changed == block);
1108 assert(block->next_used && block->prev_used &&
1109 (block->next_used->prev_used == &block->next_used) &&
1110 (*block->prev_used == block));
1111 if (block->next_used == block)
1112 /* The list contains only one member */
1113 keycache->used_last= keycache->used_ins= NULL;
1114 else
1115 {
1116 block->next_used->prev_used= block->prev_used;
1117 *block->prev_used= block->next_used;
1118 if (keycache->used_last == block)
1119 keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1120 if (keycache->used_ins == block)
1121 keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1122 }
1123 block->next_used= NULL;
1124 #if !defined(NDEBUG)
1125 /*
1126 This makes it easier to see it's not in a chain during debugging.
1127 And some assert() rely on it.
1128 */
1129 block->prev_used= NULL;
1130 #endif
1131 }
1132
1133
1134 /*
1135 Register requests for a block.
1136
1137 SYNOPSIS
1138 reg_requests()
1139 keycache Pointer to a key cache data structure.
1140 block Pointer to the block to register a request on.
1141 count Number of requests. Always 1.
1142
1143 NOTE
1144 The first request unlinks the block from the LRU ring. This means
1145 that it is protected against eveiction.
1146
1147 RETURN
1148 void
1149 */
reg_requests(KEY_CACHE * keycache,BLOCK_LINK * block,int count)1150 static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1151 {
1152 assert(block->status & BLOCK_IN_USE);
1153 assert(block->hash_link);
1154
1155 if (!block->requests)
1156 unlink_block(keycache, block);
1157 block->requests+=count;
1158 }
1159
1160
1161 /*
1162 Unregister request for a block
1163 linking it to the LRU chain if it's the last request
1164
1165 SYNOPSIS
1166 unreg_request()
1167 keycache pointer to a key cache data structure
1168 block pointer to the block to link to the LRU chain
1169 at_end <-> to link the block at the end of the LRU chain
1170
1171 RETURN VALUE
1172 none
1173
1174 NOTES.
1175 Every linking to the LRU ring decrements by one a special block
1176 counter (if it's positive). If the at_end parameter is TRUE the block is
1177 added either at the end of warm sub-chain or at the end of hot sub-chain.
1178 It is added to the hot subchain if its counter is zero and number of
1179 blocks in warm sub-chain is not less than some low limit (determined by
1180 the division_limit parameter). Otherwise the block is added to the warm
1181 sub-chain. If the at_end parameter is FALSE the block is always added
1182 at beginning of the warm sub-chain.
1183 Thus a warm block can be promoted to the hot sub-chain when its counter
1184 becomes zero for the first time.
1185 At the same time the block at the very beginning of the hot subchain
1186 might be moved to the beginning of the warm subchain if it stays untouched
1187 for a too long time (this time is determined by parameter age_threshold).
1188
1189 It is also possible that the block is selected for eviction and thus
1190 not linked in the LRU ring.
1191 */
1192
unreg_request(KEY_CACHE * keycache,BLOCK_LINK * block,int at_end)1193 static void unreg_request(KEY_CACHE *keycache,
1194 BLOCK_LINK *block, int at_end)
1195 {
1196 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1197 assert(block->hash_link); /*backptr to block NULL from free_block()*/
1198 assert(block->requests);
1199 assert(block->prev_changed && *block->prev_changed == block);
1200 assert(!block->next_used);
1201 assert(!block->prev_used);
1202 /*
1203 Unregister the request, but do not link erroneous blocks into the
1204 LRU ring.
1205 */
1206 if (!--block->requests && !(block->status & BLOCK_ERROR))
1207 {
1208 my_bool hot;
1209 if (block->hits_left)
1210 block->hits_left--;
1211 hot= !block->hits_left && at_end &&
1212 keycache->warm_blocks > keycache->min_warm_blocks;
1213 if (hot)
1214 {
1215 if (block->temperature == BLOCK_WARM)
1216 keycache->warm_blocks--;
1217 block->temperature= BLOCK_HOT;
1218 }
1219 link_block(keycache, block, hot, (my_bool)at_end);
1220 block->last_hit_time= keycache->keycache_time;
1221 keycache->keycache_time++;
1222 /*
1223 At this place, the block might be in the LRU ring or not. If an
1224 evicter was waiting for a block, it was selected for eviction and
1225 not linked in the LRU ring.
1226 */
1227
1228 /*
1229 Check if we should link a hot block to the warm block sub-chain.
1230 It is possible that we select the same block as above. But it can
1231 also be another block. In any case a block from the LRU ring is
1232 selected. In other words it works even if the above block was
1233 selected for eviction and not linked in the LRU ring. Since this
1234 happens only if the LRU ring is empty, the block selected below
1235 would be NULL and the rest of the function skipped.
1236 */
1237 block= keycache->used_ins;
1238 if (block && keycache->keycache_time - block->last_hit_time >
1239 keycache->age_threshold)
1240 {
1241 unlink_block(keycache, block);
1242 link_block(keycache, block, 0, 0);
1243 if (block->temperature != BLOCK_WARM)
1244 {
1245 keycache->warm_blocks++;
1246 block->temperature= BLOCK_WARM;
1247 }
1248 }
1249 }
1250 }
1251
1252 /*
1253 Remove a reader of the page in block
1254 */
1255
remove_reader(BLOCK_LINK * block)1256 static void remove_reader(BLOCK_LINK *block)
1257 {
1258 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1259 assert(block->hash_link && block->hash_link->block == block);
1260 assert(block->prev_changed && *block->prev_changed == block);
1261 assert(!block->next_used);
1262 assert(!block->prev_used);
1263 assert(block->hash_link->requests);
1264
1265 if (! --block->hash_link->requests && block->condvar)
1266 mysql_cond_signal(block->condvar);
1267 }
1268
1269
1270 /*
1271 Wait until the last reader of the page in block
1272 signals on its termination
1273 */
1274
wait_for_readers(KEY_CACHE * keycache,BLOCK_LINK * block,st_keycache_thread_var * thread)1275 static void wait_for_readers(KEY_CACHE *keycache,
1276 BLOCK_LINK *block,
1277 st_keycache_thread_var *thread)
1278 {
1279 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1280 assert(!(block->status & (BLOCK_IN_FLUSH | BLOCK_CHANGED)));
1281 assert(block->hash_link);
1282 assert(block->hash_link->block == block);
1283 /* Linked in file_blocks or changed_blocks hash. */
1284 assert(block->prev_changed && *block->prev_changed == block);
1285 /* Not linked in LRU ring. */
1286 assert(!block->next_used);
1287 assert(!block->prev_used);
1288 while (block->hash_link->requests)
1289 {
1290 /* There must be no other waiter. We have no queue here. */
1291 assert(!block->condvar);
1292 block->condvar= &thread->suspend;
1293 mysql_cond_wait(&thread->suspend, &keycache->cache_lock);
1294 block->condvar= NULL;
1295 }
1296 }
1297
1298
1299 /*
1300 Add a hash link to a bucket in the hash_table
1301 */
1302
link_hash(HASH_LINK ** start,HASH_LINK * hash_link)1303 static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
1304 {
1305 if (*start)
1306 (*start)->prev= &hash_link->next;
1307 hash_link->next= *start;
1308 hash_link->prev= start;
1309 *start= hash_link;
1310 }
1311
1312
1313 /*
1314 Remove a hash link from the hash table
1315 */
1316
unlink_hash(KEY_CACHE * keycache,HASH_LINK * hash_link)1317 static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1318 {
1319 assert(hash_link->requests == 0);
1320 if ((*hash_link->prev= hash_link->next))
1321 hash_link->next->prev= hash_link->prev;
1322 hash_link->block= NULL;
1323
1324 if (keycache->waiting_for_hash_link.last_thread)
1325 {
1326 /* Signal that a free hash link has appeared */
1327 st_keycache_thread_var *last_thread=
1328 keycache->waiting_for_hash_link.last_thread;
1329 st_keycache_thread_var *first_thread= last_thread->next;
1330 st_keycache_thread_var *next_thread= first_thread;
1331 KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1332 st_keycache_thread_var *thread;
1333
1334 hash_link->file= first_page->file;
1335 hash_link->diskpos= first_page->filepos;
1336 do
1337 {
1338 KEYCACHE_PAGE *page;
1339 thread= next_thread;
1340 page= (KEYCACHE_PAGE *) thread->opt_info;
1341 next_thread= thread->next;
1342 /*
1343 We notify about the event all threads that ask
1344 for the same page as the first thread in the queue
1345 */
1346 if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1347 {
1348 mysql_cond_signal(&thread->suspend);
1349 unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1350 }
1351 }
1352 while (thread != last_thread);
1353 link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1354 hash_link->diskpos)],
1355 hash_link);
1356 return;
1357 }
1358 hash_link->next= keycache->free_hash_list;
1359 keycache->free_hash_list= hash_link;
1360 }
1361
1362
1363 /*
1364 Get the hash link for a page
1365 */
1366
get_hash_link(KEY_CACHE * keycache,int file,my_off_t filepos,st_keycache_thread_var * thread)1367 static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1368 int file, my_off_t filepos,
1369 st_keycache_thread_var *thread)
1370 {
1371 HASH_LINK *hash_link, **start;
1372 #ifndef NDEBUG
1373 int cnt;
1374 #endif
1375
1376 restart:
1377 /*
1378 Find the bucket in the hash table for the pair (file, filepos);
1379 start contains the head of the bucket list,
1380 hash_link points to the first member of the list
1381 */
1382 hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1383 #ifndef NDEBUG
1384 cnt= 0;
1385 #endif
1386 /* Look for an element for the pair (file, filepos) in the bucket chain */
1387 while (hash_link &&
1388 (hash_link->diskpos != filepos || hash_link->file != file))
1389 {
1390 hash_link= hash_link->next;
1391 #ifndef NDEBUG
1392 cnt++;
1393 assert(cnt <= keycache->hash_links_used);
1394 #endif
1395 }
1396 if (! hash_link)
1397 {
1398 /* There is no hash link in the hash table for the pair (file, filepos) */
1399 if (keycache->free_hash_list)
1400 {
1401 hash_link= keycache->free_hash_list;
1402 keycache->free_hash_list= hash_link->next;
1403 }
1404 else if (keycache->hash_links_used < keycache->hash_links)
1405 {
1406 hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1407 }
1408 else
1409 {
1410 /* Wait for a free hash link */
1411 KEYCACHE_PAGE page;
1412 page.file= file;
1413 page.filepos= filepos;
1414 thread->opt_info= (void *) &page;
1415 link_into_queue(&keycache->waiting_for_hash_link, thread);
1416 mysql_cond_wait(&thread->suspend,
1417 &keycache->cache_lock);
1418 thread->opt_info= NULL;
1419 goto restart;
1420 }
1421 hash_link->file= file;
1422 hash_link->diskpos= filepos;
1423 link_hash(start, hash_link);
1424 }
1425 /* Register the request for the page */
1426 hash_link->requests++;
1427
1428 return hash_link;
1429 }
1430
1431
1432 /*
1433 Get a block for the file page requested by a keycache read/write operation;
1434 If the page is not in the cache return a free block, if there is none
1435 return the lru block after saving its buffer if the page is dirty.
1436
1437 SYNOPSIS
1438
1439 find_key_block()
1440 keycache pointer to a key cache data structure
1441 thread pointer to thread specific variables
1442 file handler for the file to read page from
1443 filepos position of the page in the file
1444 init_hits_left how initialize the block counter for the page
1445 wrmode <-> get for writing
1446 page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1447
1448 RETURN VALUE
1449 Pointer to the found block if successful, 0 - otherwise
1450
1451 NOTES.
1452 For the page from file positioned at filepos the function checks whether
1453 the page is in the key cache specified by the first parameter.
1454 If this is the case it immediately returns the block.
1455 If not, the function first chooses a block for this page. If there is
1456 no not used blocks in the key cache yet, the function takes the block
1457 at the very beginning of the warm sub-chain. It saves the page in that
1458 block if it's dirty before returning the pointer to it.
1459 The function returns in the page_st parameter the following values:
1460 PAGE_READ - if page already in the block,
1461 PAGE_TO_BE_READ - if it is to be read yet by the current thread
1462 WAIT_TO_BE_READ - if it is to be read by another thread
1463 If an error occurs THE BLOCK_ERROR bit is set in the block status.
1464 It might happen that there are no blocks in LRU chain (in warm part) -
1465 all blocks are unlinked for some read/write operations. Then the function
1466 waits until first of this operations links any block back.
1467 */
1468
find_key_block(KEY_CACHE * keycache,st_keycache_thread_var * thread,File file,my_off_t filepos,int init_hits_left,int wrmode,int * page_st)1469 static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1470 st_keycache_thread_var *thread,
1471 File file, my_off_t filepos,
1472 int init_hits_left,
1473 int wrmode, int *page_st)
1474 {
1475 HASH_LINK *hash_link;
1476 BLOCK_LINK *block;
1477 int error= 0;
1478 int page_status;
1479
1480 DBUG_ENTER("find_key_block");
1481 DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
1482 file, (ulong) filepos, wrmode));
1483
1484 restart:
1485 /*
1486 If the flush phase of a resize operation fails, the cache is left
1487 unusable. This will be detected only after "goto restart".
1488 */
1489 if (!keycache->can_be_used)
1490 DBUG_RETURN(0);
1491
1492 /*
1493 Find the hash_link for the requested file block (file, filepos). We
1494 do always get a hash_link here. It has registered our request so
1495 that no other thread can use it for another file block until we
1496 release the request (which is done by remove_reader() usually). The
1497 hash_link can have a block assigned to it or not. If there is a
1498 block, it may be assigned to this hash_link or not. In cases where a
1499 block is evicted from the cache, it is taken from the LRU ring and
1500 referenced by the new hash_link. But the block can still be assigned
1501 to its old hash_link for some time if it needs to be flushed first,
1502 or if there are other threads still reading it.
1503
1504 Summary:
1505 hash_link is always returned.
1506 hash_link->block can be:
1507 - NULL or
1508 - not assigned to this hash_link or
1509 - assigned to this hash_link. If assigned, the block can have
1510 - invalid data (when freshly assigned) or
1511 - valid data. Valid data can be
1512 - changed over the file contents (dirty) or
1513 - not changed (clean).
1514 */
1515 hash_link= get_hash_link(keycache, file, filepos, thread);
1516 assert((hash_link->file == file) && (hash_link->diskpos == filepos));
1517
1518 page_status= -1;
1519 if ((block= hash_link->block) &&
1520 block->hash_link == hash_link && (block->status & BLOCK_READ))
1521 {
1522 /* Assigned block with valid (changed or unchanged) contents. */
1523 page_status= PAGE_READ;
1524 }
1525 /*
1526 else (page_status == -1)
1527 - block == NULL or
1528 - block not assigned to this hash_link or
1529 - block assigned but not yet read from file (invalid data).
1530 */
1531
1532 if (keycache->in_resize)
1533 {
1534 /* This is a request during a resize operation */
1535
1536 if (!block)
1537 {
1538 /*
1539 The file block is not in the cache. We don't need it in the
1540 cache: we are going to read or write directly to file. Cancel
1541 the request. We can simply decrement hash_link->requests because
1542 we did not release cache_lock since increasing it. So no other
1543 thread can wait for our request to become released.
1544 */
1545 if (hash_link->requests == 1)
1546 {
1547 /*
1548 We are the only one to request this hash_link (this file/pos).
1549 Free the hash_link.
1550 */
1551 hash_link->requests--;
1552 unlink_hash(keycache, hash_link);
1553 DBUG_RETURN(0);
1554 }
1555
1556 /*
1557 More requests on the hash_link. Someone tries to evict a block
1558 for this hash_link (could have started before resizing started).
1559 This means that the LRU ring is empty. Otherwise a block could
1560 be assigned immediately. Behave like a thread that wants to
1561 evict a block for this file/pos. Add to the queue of threads
1562 waiting for a block. Wait until there is one assigned.
1563
1564 Refresh the request on the hash-link so that it cannot be reused
1565 for another file/pos.
1566 */
1567 thread->opt_info= (void *) hash_link;
1568 link_into_queue(&keycache->waiting_for_block, thread);
1569 do
1570 {
1571 mysql_cond_wait(&thread->suspend,
1572 &keycache->cache_lock);
1573 } while (thread->next);
1574 thread->opt_info= NULL;
1575 /*
1576 A block should now be assigned to the hash_link. But it may
1577 still need to be evicted. Anyway, we should re-check the
1578 situation. page_status must be set correctly.
1579 */
1580 hash_link->requests--;
1581 goto restart;
1582 } /* end of if (!block) */
1583
1584 /*
1585 There is a block for this file/pos in the cache. Register a
1586 request on it. This unlinks it from the LRU ring (if it is there)
1587 and hence protects it against eviction (if not already in
1588 eviction). We need this for returning the block to the caller, for
1589 calling remove_reader() (for debugging purposes), and for calling
1590 free_block(). The only case where we don't need the request is if
1591 the block is in eviction. In that case we have to unregister the
1592 request later.
1593 */
1594 reg_requests(keycache, block, 1);
1595
1596 if (page_status != PAGE_READ)
1597 {
1598 /*
1599 - block not assigned to this hash_link or
1600 - block assigned but not yet read from file (invalid data).
1601
1602 This must be a block in eviction. It will be read soon. We need
1603 to wait here until this happened. Otherwise the caller could
1604 access a wrong block or a block which is in read. While waiting
1605 we cannot lose hash_link nor block. We have registered a request
1606 on the hash_link. Everything can happen to the block but changes
1607 in the hash_link -> block relationship. In other words:
1608 everything can happen to the block but free or another completed
1609 eviction.
1610
1611 Note that we bahave like a secondary requestor here. We just
1612 cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1613 read requests and writes on dirty blocks that are not in flush
1614 only. Waiting here on COND_FOR_REQUESTED works in all
1615 situations.
1616 */
1617 assert(((block->hash_link != hash_link) &&
1618 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1619 ((block->hash_link == hash_link) &&
1620 !(block->status & BLOCK_READ)));
1621 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock,
1622 thread);
1623 /*
1624 Here we can trust that the block has been assigned to this
1625 hash_link (block->hash_link == hash_link) and read into the
1626 buffer (BLOCK_READ). The worst things possible here are that the
1627 block is in free (BLOCK_REASSIGNED). But the block is still
1628 assigned to the hash_link. The freeing thread waits until we
1629 release our request on the hash_link. The block must not be
1630 again in eviction because we registered an request on it before
1631 starting to wait.
1632 */
1633 assert(block->hash_link == hash_link);
1634 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1635 assert(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1636 }
1637 /*
1638 The block is in the cache. Assigned to the hash_link. Valid data.
1639 Note that in case of page_st == PAGE_READ, the block can be marked
1640 for eviction. In any case it can be marked for freeing.
1641 */
1642
1643 if (!wrmode)
1644 {
1645 /* A reader can just read the block. */
1646 *page_st= PAGE_READ;
1647 assert((hash_link->file == file) &&
1648 (hash_link->diskpos == filepos) &&
1649 (block->hash_link == hash_link));
1650 DBUG_RETURN(block);
1651 }
1652
1653 /*
1654 This is a writer. No two writers for the same block can exist.
1655 This must be assured by locks outside of the key cache.
1656 */
1657 assert(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1658
1659 while (block->status & BLOCK_IN_FLUSH)
1660 {
1661 /*
1662 Wait until the block is flushed to file. Do not release the
1663 request on the hash_link yet to prevent that the block is freed
1664 or reassigned while we wait. While we wait, several things can
1665 happen to the block, including another flush. But the block
1666 cannot be reassigned to another hash_link until we release our
1667 request on it. But it can be marked BLOCK_REASSIGNED from free
1668 or eviction, while they wait for us to release the hash_link.
1669 */
1670 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock,
1671 thread);
1672 /*
1673 If the flush phase failed, the resize could have finished while
1674 we waited here.
1675 */
1676 if (!keycache->in_resize)
1677 {
1678 remove_reader(block);
1679 unreg_request(keycache, block, 1);
1680 goto restart;
1681 }
1682 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1683 assert(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1684 assert(block->hash_link == hash_link);
1685 }
1686
1687 if (block->status & BLOCK_CHANGED)
1688 {
1689 /*
1690 We want to write a block with changed contents. If the cache
1691 block size is bigger than the callers block size (e.g. MyISAM),
1692 the caller may replace part of the block only. Changes of the
1693 other part of the block must be preserved. Since the block has
1694 not yet been selected for flush, we can still add our changes.
1695 */
1696 *page_st= PAGE_READ;
1697 assert((hash_link->file == file) &&
1698 (hash_link->diskpos == filepos) &&
1699 (block->hash_link == hash_link));
1700 DBUG_RETURN(block);
1701 }
1702
1703 /*
1704 This is a write request for a clean block. We do not want to have
1705 new dirty blocks in the cache while resizing. We will free the
1706 block and write directly to file. If the block is in eviction or
1707 in free, we just let it go.
1708
1709 Unregister from the hash_link. This must be done before freeing
1710 the block. And it must be done if not freeing the block. Because
1711 we could have waited above, we need to call remove_reader(). Other
1712 threads could wait for us to release our request on the hash_link.
1713 */
1714 remove_reader(block);
1715
1716 /* If the block is not in eviction and not in free, we can free it. */
1717 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1718 BLOCK_REASSIGNED)))
1719 {
1720 /*
1721 Free block as we are going to write directly to file.
1722 Although we have an exlusive lock for the updated key part,
1723 the control can be yielded by the current thread as we might
1724 have unfinished readers of other key parts in the block
1725 buffer. Still we are guaranteed not to have any readers
1726 of the key part we are writing into until the block is
1727 removed from the cache as we set the BLOCK_REASSIGNED
1728 flag (see the code below that handles reading requests).
1729 */
1730 free_block(keycache, thread, block);
1731 }
1732 else
1733 {
1734 /*
1735 The block will be evicted/freed soon. Don't touch it in any way.
1736 Unregister the request that we registered above.
1737 */
1738 unreg_request(keycache, block, 1);
1739
1740 /*
1741 The block is still assigned to the hash_link (the file/pos that
1742 we are going to write to). Wait until the eviction/free is
1743 complete. Otherwise the direct write could complete before all
1744 readers are done with the block. So they could read outdated
1745 data.
1746
1747 Since we released our request on the hash_link, it can be reused
1748 for another file/pos. Hence we cannot just check for
1749 block->hash_link == hash_link. As long as the resize is
1750 proceeding the block cannot be reassigned to the same file/pos
1751 again. So we can terminate the loop when the block is no longer
1752 assigned to this file/pos.
1753 */
1754 do
1755 {
1756 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1757 &keycache->cache_lock, thread);
1758 /*
1759 If the flush phase failed, the resize could have finished
1760 while we waited here.
1761 */
1762 if (!keycache->in_resize)
1763 goto restart;
1764 } while (block->hash_link &&
1765 (block->hash_link->file == file) &&
1766 (block->hash_link->diskpos == filepos));
1767 }
1768 DBUG_RETURN(0);
1769 }
1770
1771 if (page_status == PAGE_READ &&
1772 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1773 BLOCK_REASSIGNED)))
1774 {
1775 /*
1776 This is a request for a block to be removed from cache. The block
1777 is assigned to this hash_link and contains valid data, but is
1778 marked for eviction or to be freed. Possible reasons why it has
1779 not yet been evicted/freed can be a flush before reassignment
1780 (BLOCK_IN_SWITCH), readers of the block have not finished yet
1781 (BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1782 the block has been selected for it (BLOCK_IN_EVICTION).
1783 */
1784
1785 /*
1786 Only reading requests can proceed until the old dirty page is flushed,
1787 all others are to be suspended, then resubmitted
1788 */
1789 if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1790 {
1791 /*
1792 This is a read request and the block not yet reassigned. We can
1793 register our request and proceed. This unlinks the block from
1794 the LRU ring and protects it against eviction.
1795 */
1796 reg_requests(keycache, block, 1);
1797 }
1798 else
1799 {
1800 /*
1801 Either this is a write request for a block that is in eviction
1802 or in free. We must not use it any more. Instead we must evict
1803 another block. But we cannot do this before the eviction/free is
1804 done. Otherwise we would find the same hash_link + block again
1805 and again.
1806
1807 Or this is a read request for a block in eviction/free that does
1808 not require a flush, but waits for readers to finish with the
1809 block. We do not read this block to let the eviction/free happen
1810 as soon as possible. Again we must wait so that we don't find
1811 the same hash_link + block again and again.
1812 */
1813 assert(hash_link->requests);
1814 hash_link->requests--;
1815 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock,
1816 thread);
1817 /*
1818 The block is no longer assigned to this hash_link.
1819 Get another one.
1820 */
1821 goto restart;
1822 }
1823 }
1824 else
1825 {
1826 /*
1827 This is a request for a new block or for a block not to be removed.
1828 Either
1829 - block == NULL or
1830 - block not assigned to this hash_link or
1831 - block assigned but not yet read from file,
1832 or
1833 - block assigned with valid (changed or unchanged) data and
1834 - it will not be reassigned/freed.
1835 */
1836 if (! block)
1837 {
1838 /* No block is assigned to the hash_link yet. */
1839 if (keycache->blocks_unused)
1840 {
1841 if (keycache->free_block_list)
1842 {
1843 /* There is a block in the free list. */
1844 block= keycache->free_block_list;
1845 keycache->free_block_list= block->next_used;
1846 block->next_used= NULL;
1847 }
1848 else
1849 {
1850 size_t block_mem_offset;
1851 /* There are some never used blocks, take first of them */
1852 assert(keycache->blocks_used <
1853 (ulong) keycache->disk_blocks);
1854 block= &keycache->block_root[keycache->blocks_used];
1855 block_mem_offset=
1856 ((size_t) keycache->blocks_used) * keycache->key_cache_block_size;
1857 block->buffer= ADD_TO_PTR(keycache->block_mem,
1858 block_mem_offset,
1859 uchar*);
1860 keycache->blocks_used++;
1861 assert(!block->next_used);
1862 }
1863 assert(!block->prev_used);
1864 assert(!block->next_changed);
1865 assert(!block->prev_changed);
1866 assert(!block->hash_link);
1867 assert(!block->status);
1868 assert(!block->requests);
1869 keycache->blocks_unused--;
1870 block->status= BLOCK_IN_USE;
1871 block->length= 0;
1872 block->offset= keycache->key_cache_block_size;
1873 block->requests= 1;
1874 block->temperature= BLOCK_COLD;
1875 block->hits_left= init_hits_left;
1876 block->last_hit_time= 0;
1877 block->hash_link= hash_link;
1878 hash_link->block= block;
1879 link_to_file_list(keycache, block, file, 0);
1880 page_status= PAGE_TO_BE_READ;
1881 }
1882 else
1883 {
1884 /*
1885 There are no free blocks and no never used blocks, use a block
1886 from the LRU ring.
1887 */
1888
1889 if (! keycache->used_last)
1890 {
1891 /*
1892 The LRU ring is empty. Wait until a new block is added to
1893 it. Several threads might wait here for the same hash_link,
1894 all of them must get the same block. While waiting for a
1895 block, after a block is selected for this hash_link, other
1896 threads can run first before this one awakes. During this
1897 time interval other threads find this hash_link pointing to
1898 the block, which is still assigned to another hash_link. In
1899 this case the block is not marked BLOCK_IN_SWITCH yet, but
1900 it is marked BLOCK_IN_EVICTION.
1901 */
1902
1903 thread->opt_info= (void *) hash_link;
1904 link_into_queue(&keycache->waiting_for_block, thread);
1905 do
1906 {
1907 mysql_cond_wait(&thread->suspend,
1908 &keycache->cache_lock);
1909 }
1910 while (thread->next);
1911 thread->opt_info= NULL;
1912 /* Assert that block has a request registered. */
1913 assert(hash_link->block->requests);
1914 /* Assert that block is not in LRU ring. */
1915 assert(!hash_link->block->next_used);
1916 assert(!hash_link->block->prev_used);
1917 }
1918
1919 /*
1920 If we waited above, hash_link->block has been assigned by
1921 link_block(). Otherwise it is still NULL. In the latter case
1922 we need to grab a block from the LRU ring ourselves.
1923 */
1924 block= hash_link->block;
1925 if (! block)
1926 {
1927 /* Select the last block from the LRU ring. */
1928 block= keycache->used_last->next_used;
1929 block->hits_left= init_hits_left;
1930 block->last_hit_time= 0;
1931 hash_link->block= block;
1932 /*
1933 Register a request on the block. This unlinks it from the
1934 LRU ring and protects it against eviction.
1935 */
1936 assert(!block->requests);
1937 reg_requests(keycache, block,1);
1938 /*
1939 We do not need to set block->status|= BLOCK_IN_EVICTION here
1940 because we will set block->status|= BLOCK_IN_SWITCH
1941 immediately without releasing the lock in between. This does
1942 also support debugging. When looking at the block, one can
1943 see if the block has been selected by link_block() after the
1944 LRU ring was empty, or if it was grabbed directly from the
1945 LRU ring in this branch.
1946 */
1947 }
1948
1949 /*
1950 If we had to wait above, there is a small chance that another
1951 thread grabbed this block for the same file block already. But
1952 in most cases the first condition is true.
1953 */
1954 if (block->hash_link != hash_link &&
1955 ! (block->status & BLOCK_IN_SWITCH) )
1956 {
1957 /* this is a primary request for a new page */
1958 block->status|= BLOCK_IN_SWITCH;
1959
1960 if (block->status & BLOCK_CHANGED)
1961 {
1962 /* The block contains a dirty page - push it out of the cache */
1963
1964 if (block->status & BLOCK_IN_FLUSH)
1965 {
1966 /*
1967 The block is marked for flush. If we do not wait here,
1968 it could happen that we write the block, reassign it to
1969 another file block, then, before the new owner can read
1970 the new file block, the flusher writes the cache block
1971 (which still has the old contents) to the new file block!
1972 */
1973 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1974 &keycache->cache_lock, thread);
1975 /*
1976 The block is marked BLOCK_IN_SWITCH. It should be left
1977 alone except for reading. No free, no write.
1978 */
1979 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1980 assert(!(block->status & (BLOCK_REASSIGNED |
1981 BLOCK_CHANGED |
1982 BLOCK_FOR_UPDATE)));
1983 }
1984 else
1985 {
1986 block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
1987 /*
1988 BLOCK_IN_EVICTION may be true or not. Other flags must
1989 have a fixed value.
1990 */
1991 assert((block->status & ~BLOCK_IN_EVICTION) ==
1992 (BLOCK_READ | BLOCK_IN_SWITCH |
1993 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1994 BLOCK_CHANGED | BLOCK_IN_USE));
1995 assert(block->hash_link);
1996
1997 mysql_mutex_unlock(&keycache->cache_lock);
1998 /*
1999 The call is thread safe because only the current
2000 thread might change the block->hash_link value
2001 */
2002 error= (int)my_pwrite(block->hash_link->file,
2003 block->buffer + block->offset,
2004 block->length - block->offset,
2005 block->hash_link->diskpos + block->offset,
2006 MYF(MY_NABP | MY_WAIT_IF_FULL));
2007 mysql_mutex_lock(&keycache->cache_lock);
2008
2009 /* Block status must not have changed. */
2010 assert((block->status & ~BLOCK_IN_EVICTION) ==
2011 (BLOCK_READ | BLOCK_IN_SWITCH |
2012 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2013 BLOCK_CHANGED | BLOCK_IN_USE) || fail_block(block));
2014 keycache->global_cache_write++;
2015 }
2016 }
2017
2018 block->status|= BLOCK_REASSIGNED;
2019 /*
2020 The block comes from the LRU ring. It must have a hash_link
2021 assigned.
2022 */
2023 assert(block->hash_link);
2024 if (block->hash_link)
2025 {
2026 /*
2027 All pending requests for this page must be resubmitted.
2028 This must be done before waiting for readers. They could
2029 wait for the flush to complete. And we must also do it
2030 after the wait. Flushers might try to free the block while
2031 we wait. They would wait until the reassignment is
2032 complete. Also the block status must reflect the correct
2033 situation: The block is not changed nor in flush any more.
2034 Note that we must not change the BLOCK_CHANGED flag
2035 outside of link_to_file_list() so that it is always in the
2036 correct queue and the *blocks_changed counters are
2037 correct.
2038 */
2039 block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
2040 link_to_file_list(keycache, block, block->hash_link->file, 1);
2041 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2042 /*
2043 The block is still assigned to its old hash_link.
2044 Wait until all pending read requests
2045 for this page are executed
2046 (we could have avoided this waiting, if we had read
2047 a page in the cache in a sweep, without yielding control)
2048 */
2049 wait_for_readers(keycache, block, thread);
2050 assert(block->hash_link && block->hash_link->block == block &&
2051 block->prev_changed);
2052 /* The reader must not have been a writer. */
2053 assert(!(block->status & BLOCK_CHANGED));
2054
2055 /* Wake flushers that might have found the block in between. */
2056 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2057
2058 /* Remove the hash link for the old file block from the hash. */
2059 unlink_hash(keycache, block->hash_link);
2060
2061 /*
2062 For sanity checks link_to_file_list() asserts that block
2063 and hash_link refer to each other. Hence we need to assign
2064 the hash_link first, but then we would not know if it was
2065 linked before. Hence we would not know if to unlink it. So
2066 unlink it here and call link_to_file_list(..., FALSE).
2067 */
2068 unlink_changed(block);
2069 }
2070 block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
2071 block->length= 0;
2072 block->offset= keycache->key_cache_block_size;
2073 block->hash_link= hash_link;
2074 link_to_file_list(keycache, block, file, 0);
2075 page_status= PAGE_TO_BE_READ;
2076
2077 assert(block->hash_link->block == block);
2078 assert(hash_link->block->hash_link == hash_link);
2079 }
2080 else
2081 {
2082 /*
2083 Either (block->hash_link == hash_link),
2084 or (block->status & BLOCK_IN_SWITCH).
2085
2086 This is for secondary requests for a new file block only.
2087 Either it is already assigned to the new hash_link meanwhile
2088 (if we had to wait due to empty LRU), or it is already in
2089 eviction by another thread. Since this block has been
2090 grabbed from the LRU ring and attached to this hash_link,
2091 another thread cannot grab the same block from the LRU ring
2092 anymore. If the block is in eviction already, it must become
2093 attached to the same hash_link and as such destined for the
2094 same file block.
2095 */
2096 page_status= (((block->hash_link == hash_link) &&
2097 (block->status & BLOCK_READ)) ?
2098 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2099 }
2100 }
2101 }
2102 else
2103 {
2104 /*
2105 Block is not NULL. This hash_link points to a block.
2106 Either
2107 - block not assigned to this hash_link (yet) or
2108 - block assigned but not yet read from file,
2109 or
2110 - block assigned with valid (changed or unchanged) data and
2111 - it will not be reassigned/freed.
2112
2113 The first condition means hash_link points to a block in
2114 eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2115 But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2116 link_block(). In both cases it is destined for this hash_link
2117 and its file block address. When this hash_link got its block
2118 address, the block was removed from the LRU ring and cannot be
2119 selected for eviction (for another hash_link) again.
2120
2121 Register a request on the block. This is another protection
2122 against eviction.
2123 */
2124 assert(((block->hash_link != hash_link) &&
2125 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2126 ((block->hash_link == hash_link) &&
2127 !(block->status & BLOCK_READ)) ||
2128 ((block->status & BLOCK_READ) &&
2129 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2130 reg_requests(keycache, block, 1);
2131 page_status= (((block->hash_link == hash_link) &&
2132 (block->status & BLOCK_READ)) ?
2133 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2134 }
2135 }
2136
2137 assert(page_status != -1);
2138 /* Same assert basically, but be very sure. */
2139 assert(block);
2140 /* Assert that block has a request and is not in LRU ring. */
2141 assert(block->requests);
2142 assert(!block->next_used);
2143 assert(!block->prev_used);
2144 /* Assert that we return the correct block. */
2145 assert((page_status == PAGE_WAIT_TO_BE_READ) ||
2146 ((block->hash_link->file == file) &&
2147 (block->hash_link->diskpos == filepos)));
2148 *page_st=page_status;
2149 DBUG_RETURN(block);
2150 }
2151
2152
2153 /*
2154 Read into a key cache block buffer from disk.
2155
2156 SYNOPSIS
2157
2158 read_block()
2159 keycache pointer to a key cache data structure
2160 thread_var pointer to thread specific variables
2161 block block to which buffer the data is to be read
2162 read_length size of data to be read
2163 min_length at least so much data must be read
2164 primary <-> the current thread will read the data
2165
2166 RETURN VALUE
2167 None
2168
2169 NOTES.
2170 The function either reads a page data from file to the block buffer,
2171 or waits until another thread reads it. What page to read is determined
2172 by a block parameter - reference to a hash link for this page.
2173 If an error occurs THE BLOCK_ERROR bit is set in the block status.
2174 We do not report error when the size of successfully read
2175 portion is less than read_length, but not less than min_length.
2176 */
2177
read_block(KEY_CACHE * keycache,st_keycache_thread_var * thread_var,BLOCK_LINK * block,uint read_length,uint min_length,my_bool primary)2178 static void read_block(KEY_CACHE *keycache,
2179 st_keycache_thread_var *thread_var,
2180 BLOCK_LINK *block, uint read_length,
2181 uint min_length, my_bool primary)
2182 {
2183 size_t got_length;
2184
2185 /* On entry cache_lock is locked */
2186
2187 if (primary)
2188 {
2189 /*
2190 This code is executed only by threads that submitted primary
2191 requests. Until block->status contains BLOCK_READ, all other
2192 request for the block become secondary requests. For a primary
2193 request the block must be properly initialized.
2194 */
2195 assert(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE) ||
2196 fail_block(block));
2197 assert((block->length == 0) || fail_block(block));
2198 assert((block->offset == keycache->key_cache_block_size) ||
2199 fail_block(block));
2200 assert((block->requests > 0) || fail_block(block));
2201
2202 keycache->global_cache_read++;
2203 /* Page is not in buffer yet, is to be read from disk */
2204 mysql_mutex_unlock(&keycache->cache_lock);
2205 /*
2206 Here other threads may step in and register as secondary readers.
2207 They will register in block->wqueue[COND_FOR_REQUESTED].
2208 */
2209 got_length= my_pread(block->hash_link->file, block->buffer,
2210 read_length, block->hash_link->diskpos, MYF(0));
2211 mysql_mutex_lock(&keycache->cache_lock);
2212 /*
2213 The block can now have been marked for free (in case of
2214 FLUSH_RELEASE). Otherwise the state must be unchanged.
2215 */
2216 assert(((block->status & ~(BLOCK_REASSIGNED |
2217 BLOCK_FOR_UPDATE)) == BLOCK_IN_USE) ||
2218 fail_block(block));
2219 assert((block->length == 0) || fail_block(block));
2220 assert((block->offset == keycache->key_cache_block_size) ||
2221 fail_block(block));
2222 assert((block->requests > 0) || fail_block(block));
2223
2224 if (got_length < min_length)
2225 block->status|= BLOCK_ERROR;
2226 else
2227 {
2228 block->status|= BLOCK_READ;
2229 block->length= (int)got_length;
2230 /*
2231 Do not set block->offset here. If this block is marked
2232 BLOCK_CHANGED later, we want to flush only the modified part. So
2233 only a writer may set block->offset down from
2234 keycache->key_cache_block_size.
2235 */
2236 }
2237 /* Signal that all pending requests for this page now can be processed */
2238 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2239 }
2240 else
2241 {
2242 /*
2243 This code is executed only by threads that submitted secondary
2244 requests. At this point it could happen that the cache block is
2245 not yet assigned to the hash_link for the requested file block.
2246 But at awake from the wait this should be the case. Unfortunately
2247 we cannot assert this here because we do not know the hash_link
2248 for the requested file block nor the file and position. So we have
2249 to assert this in the caller.
2250 */
2251 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock,
2252 thread_var);
2253 }
2254 }
2255
2256
2257 /*
2258 Read a block of data from a cached file into a buffer;
2259
2260 SYNOPSIS
2261
2262 key_cache_read()
2263 keycache pointer to a key cache data structure
2264 thread_var pointer to thread specific variables
2265 file handler for the file for the block of data to be read
2266 filepos position of the block of data in the file
2267 level determines the weight of the data
2268 buff buffer to where the data must be placed
2269 length length of the buffer
2270 block_length length of the block in the key cache buffer
2271 return_buffer return pointer to the key cache buffer with the data
2272
2273 RETURN VALUE
2274 Returns address from where the data is placed if sucessful, 0 - otherwise.
2275
2276 NOTES.
2277 The function ensures that a block of data of size length from file
2278 positioned at filepos is in the buffers for some key cache blocks.
2279 Then the function either copies the data into the buffer buff, or,
2280 if return_buffer is TRUE, it just returns the pointer to the key cache
2281 buffer with the data.
2282 Filepos must be a multiple of 'block_length', but it doesn't
2283 have to be a multiple of key_cache_block_size;
2284 */
2285
key_cache_read(KEY_CACHE * keycache,st_keycache_thread_var * thread_var,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length MY_ATTRIBUTE ((unused)),int return_buffer MY_ATTRIBUTE ((unused)))2286 uchar *key_cache_read(KEY_CACHE *keycache,
2287 st_keycache_thread_var *thread_var,
2288 File file, my_off_t filepos, int level,
2289 uchar *buff, uint length,
2290 uint block_length MY_ATTRIBUTE((unused)),
2291 int return_buffer MY_ATTRIBUTE((unused)))
2292 {
2293 my_bool locked_and_incremented= FALSE;
2294 int error=0;
2295 uchar *start= buff;
2296 DBUG_ENTER("key_cache_read");
2297 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2298 (uint) file, (ulong) filepos, length));
2299
2300 if (keycache->key_cache_inited)
2301 {
2302 /* Key cache is used */
2303 BLOCK_LINK *block;
2304 uint read_length;
2305 uint offset;
2306 int page_st;
2307
2308 if (MYSQL_KEYCACHE_READ_START_ENABLED())
2309 {
2310 MYSQL_KEYCACHE_READ_START(my_filename(file), length,
2311 (ulong) (keycache->blocks_used *
2312 keycache->key_cache_block_size),
2313 (ulong) (keycache->blocks_unused *
2314 keycache->key_cache_block_size));
2315 }
2316
2317 /*
2318 When the key cache is once initialized, we use the cache_lock to
2319 reliably distinguish the cases of normal operation, resizing, and
2320 disabled cache. We always increment and decrement
2321 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2322 */
2323 mysql_mutex_lock(&keycache->cache_lock);
2324 /*
2325 Cache resizing has two phases: Flushing and re-initializing. In
2326 the flush phase read requests are allowed to bypass the cache for
2327 blocks not in the cache. find_key_block() returns NULL in this
2328 case.
2329
2330 After the flush phase new I/O requests must wait until the
2331 re-initialization is done. The re-initialization can be done only
2332 if no I/O request is in progress. The reason is that
2333 key_cache_block_size can change. With enabled cache, I/O is done
2334 in chunks of key_cache_block_size. Every chunk tries to use a
2335 cache block first. If the block size changes in the middle, a
2336 block could be missed and old data could be read.
2337 */
2338 while (keycache->in_resize && !keycache->resize_in_flush)
2339 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock,
2340 thread_var);
2341 /* Register the I/O for the next resize. */
2342 inc_counter_for_resize_op(keycache);
2343 locked_and_incremented= TRUE;
2344 /* Requested data may not always be aligned to cache blocks. */
2345 offset= (uint) (filepos % keycache->key_cache_block_size);
2346 /* Read data in key_cache_block_size increments */
2347 do
2348 {
2349 /* Cache could be disabled in a later iteration. */
2350 if (!keycache->can_be_used)
2351 {
2352 goto no_key_cache;
2353 }
2354 /* Start reading at the beginning of the cache block. */
2355 filepos-= offset;
2356 /* Do not read beyond the end of the cache block. */
2357 read_length= length;
2358 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2359 assert(read_length > 0);
2360
2361 if (block_length > keycache->key_cache_block_size || offset)
2362 return_buffer=0;
2363
2364 /* Request the cache block that matches file/pos. */
2365 keycache->global_cache_r_requests++;
2366
2367 MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size);
2368
2369 block= find_key_block(keycache, thread_var, file, filepos, level, 0,
2370 &page_st);
2371 if (!block)
2372 {
2373 /*
2374 This happens only for requests submitted during key cache
2375 resize. The block is not in the cache and shall not go in.
2376 Read directly from file.
2377 */
2378 keycache->global_cache_read++;
2379 mysql_mutex_unlock(&keycache->cache_lock);
2380 error= (my_pread(file, (uchar*) buff, read_length,
2381 filepos + offset, MYF(MY_NABP)) != 0);
2382 mysql_mutex_lock(&keycache->cache_lock);
2383 goto next_block;
2384 }
2385 if (!(block->status & BLOCK_ERROR))
2386 {
2387 if (page_st != PAGE_READ)
2388 {
2389 MYSQL_KEYCACHE_READ_MISS();
2390 /* The requested page is to be read into the block buffer */
2391 read_block(keycache, thread_var, block,
2392 keycache->key_cache_block_size, read_length+offset,
2393 (my_bool)(page_st == PAGE_TO_BE_READ));
2394 /*
2395 A secondary request must now have the block assigned to the
2396 requested file block. It does not hurt to check it for
2397 primary requests too.
2398 */
2399 assert(keycache->can_be_used);
2400 assert(block->hash_link->file == file);
2401 assert(block->hash_link->diskpos == filepos);
2402 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2403 }
2404 else if (block->length < read_length + offset)
2405 {
2406 /*
2407 Impossible if nothing goes wrong:
2408 this could only happen if we are using a file with
2409 small key blocks and are trying to read outside the file
2410 */
2411 set_my_errno(-1);
2412 block->status|= BLOCK_ERROR;
2413 }
2414 else
2415 {
2416 MYSQL_KEYCACHE_READ_HIT();
2417 }
2418 }
2419
2420 /* block status may have added BLOCK_ERROR in the above 'if'. */
2421 if (!(block->status & BLOCK_ERROR))
2422 {
2423 {
2424 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2425 mysql_mutex_unlock(&keycache->cache_lock);
2426
2427 /* Copy data from the cache buffer */
2428 memcpy(buff, block->buffer+offset, (size_t) read_length);
2429
2430 mysql_mutex_lock(&keycache->cache_lock);
2431 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2432 }
2433 }
2434
2435 remove_reader(block);
2436
2437 /* Error injection for coverage testing. */
2438 DBUG_EXECUTE_IF("key_cache_read_block_error",
2439 block->status|= BLOCK_ERROR;);
2440
2441 /* Do not link erroneous blocks into the LRU ring, but free them. */
2442 if (!(block->status & BLOCK_ERROR))
2443 {
2444 /*
2445 Link the block into the LRU ring if it's the last submitted
2446 request for the block. This enables eviction for the block.
2447 */
2448 unreg_request(keycache, block, 1);
2449 }
2450 else
2451 {
2452 free_block(keycache, thread_var, block);
2453 error= 1;
2454 break;
2455 }
2456
2457 next_block:
2458 buff+= read_length;
2459 filepos+= read_length+offset;
2460 offset= 0;
2461
2462 } while ((length-= read_length));
2463 if (MYSQL_KEYCACHE_READ_DONE_ENABLED())
2464 {
2465 MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used *
2466 keycache->key_cache_block_size),
2467 (ulong) (keycache->blocks_unused *
2468 keycache->key_cache_block_size));
2469 }
2470 goto end;
2471 }
2472
2473 no_key_cache:
2474 /* Key cache is not used */
2475
2476 keycache->global_cache_r_requests++;
2477 keycache->global_cache_read++;
2478
2479 if (locked_and_incremented)
2480 mysql_mutex_unlock(&keycache->cache_lock);
2481 if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
2482 error= 1;
2483 if (locked_and_incremented)
2484 mysql_mutex_lock(&keycache->cache_lock);
2485
2486 end:
2487 if (locked_and_incremented)
2488 {
2489 dec_counter_for_resize_op(keycache);
2490 mysql_mutex_unlock(&keycache->cache_lock);
2491 }
2492 DBUG_PRINT("exit", ("error: %d", error ));
2493 DBUG_RETURN(error ? (uchar*) 0 : start);
2494 }
2495
2496
2497 /*
2498 Insert a block of file data from a buffer into key cache
2499
2500 SYNOPSIS
2501 key_cache_insert()
2502 keycache pointer to a key cache data structure
2503 thread_var pointer to thread specific variables
2504 file handler for the file to insert data from
2505 filepos position of the block of data in the file to insert
2506 level determines the weight of the data
2507 buff buffer to read data from
2508 length length of the data in the buffer
2509
2510 NOTES
2511 This is used by MyISAM to move all blocks from a index file to the key
2512 cache
2513
2514 RETURN VALUE
2515 0 if a success, 1 - otherwise.
2516 */
2517
key_cache_insert(KEY_CACHE * keycache,st_keycache_thread_var * thread_var,File file,my_off_t filepos,int level,uchar * buff,uint length)2518 int key_cache_insert(KEY_CACHE *keycache,
2519 st_keycache_thread_var *thread_var,
2520 File file, my_off_t filepos, int level,
2521 uchar *buff, uint length)
2522 {
2523 int error= 0;
2524 DBUG_ENTER("key_cache_insert");
2525 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2526 (uint) file,(ulong) filepos, length));
2527
2528 if (keycache->key_cache_inited)
2529 {
2530 /* Key cache is used */
2531 BLOCK_LINK *block;
2532 uint read_length;
2533 uint offset;
2534 int page_st;
2535 my_bool locked_and_incremented= FALSE;
2536
2537 /*
2538 When the keycache is once initialized, we use the cache_lock to
2539 reliably distinguish the cases of normal operation, resizing, and
2540 disabled cache. We always increment and decrement
2541 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2542 */
2543 mysql_mutex_lock(&keycache->cache_lock);
2544 /*
2545 We do not load index data into a disabled cache nor into an
2546 ongoing resize.
2547 */
2548 if (!keycache->can_be_used || keycache->in_resize)
2549 goto no_key_cache;
2550 /* Register the pseudo I/O for the next resize. */
2551 inc_counter_for_resize_op(keycache);
2552 locked_and_incremented= TRUE;
2553 /* Loaded data may not always be aligned to cache blocks. */
2554 offset= (uint) (filepos % keycache->key_cache_block_size);
2555 /* Load data in key_cache_block_size increments. */
2556 do
2557 {
2558 /* Cache could be disabled or resizing in a later iteration. */
2559 if (!keycache->can_be_used || keycache->in_resize)
2560 goto no_key_cache;
2561 /* Start loading at the beginning of the cache block. */
2562 filepos-= offset;
2563 /* Do not load beyond the end of the cache block. */
2564 read_length= length;
2565 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2566 assert(read_length > 0);
2567
2568 /* The block has been read by the caller already. */
2569 keycache->global_cache_read++;
2570 /* Request the cache block that matches file/pos. */
2571 keycache->global_cache_r_requests++;
2572 block= find_key_block(keycache, thread_var, file, filepos, level, 0,
2573 &page_st);
2574 if (!block)
2575 {
2576 /*
2577 This happens only for requests submitted during key cache
2578 resize. The block is not in the cache and shall not go in.
2579 Stop loading index data.
2580 */
2581 goto no_key_cache;
2582 }
2583 if (!(block->status & BLOCK_ERROR))
2584 {
2585 if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2586 ((page_st == PAGE_TO_BE_READ) &&
2587 (offset || (read_length < keycache->key_cache_block_size))))
2588 {
2589 /*
2590 Either
2591
2592 this is a secondary request for a block to be read into the
2593 cache. The block is in eviction. It is not yet assigned to
2594 the requested file block (It does not point to the right
2595 hash_link). So we cannot call remove_reader() on the block.
2596 And we cannot access the hash_link directly here. We need to
2597 wait until the assignment is complete. read_block() executes
2598 the correct wait when called with primary == FALSE.
2599
2600 Or
2601
2602 this is a primary request for a block to be read into the
2603 cache and the supplied data does not fill the whole block.
2604
2605 This function is called on behalf of a LOAD INDEX INTO CACHE
2606 statement, which is a read-only task and allows other
2607 readers. It is possible that a parallel running reader tries
2608 to access this block. If it needs more data than has been
2609 supplied here, it would report an error. To be sure that we
2610 have all data in the block that is available in the file, we
2611 read the block ourselves.
2612
2613 Though reading again what the caller did read already is an
2614 expensive operation, we need to do this for correctness.
2615 */
2616 read_block(keycache, thread_var, block,
2617 keycache->key_cache_block_size,
2618 read_length + offset, (page_st == PAGE_TO_BE_READ));
2619 /*
2620 A secondary request must now have the block assigned to the
2621 requested file block. It does not hurt to check it for
2622 primary requests too.
2623 */
2624 assert(keycache->can_be_used);
2625 assert(block->hash_link->file == file);
2626 assert(block->hash_link->diskpos == filepos);
2627 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2628 }
2629 else if (page_st == PAGE_TO_BE_READ)
2630 {
2631 /*
2632 This is a new block in the cache. If we come here, we have
2633 data for the whole block.
2634 */
2635 assert(block->hash_link->requests);
2636 assert(block->status & BLOCK_IN_USE);
2637 assert((page_st == PAGE_TO_BE_READ) ||
2638 (block->status & BLOCK_READ));
2639
2640 mysql_mutex_unlock(&keycache->cache_lock);
2641 /*
2642 Here other threads may step in and register as secondary readers.
2643 They will register in block->wqueue[COND_FOR_REQUESTED].
2644 */
2645
2646 /* Copy data from buff */
2647 memcpy(block->buffer+offset, buff, (size_t) read_length);
2648
2649 mysql_mutex_lock(&keycache->cache_lock);
2650 assert(block->status & BLOCK_IN_USE);
2651 assert((page_st == PAGE_TO_BE_READ) ||
2652 (block->status & BLOCK_READ));
2653 /*
2654 After the data is in the buffer, we can declare the block
2655 valid. Now other threads do not need to register as
2656 secondary readers any more. They can immediately access the
2657 block.
2658 */
2659 block->status|= BLOCK_READ;
2660 block->length= read_length+offset;
2661 /*
2662 Do not set block->offset here. If this block is marked
2663 BLOCK_CHANGED later, we want to flush only the modified part. So
2664 only a writer may set block->offset down from
2665 keycache->key_cache_block_size.
2666 */
2667 /* Signal all pending requests. */
2668 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2669 }
2670 else
2671 {
2672 /*
2673 page_st == PAGE_READ. The block is in the buffer. All data
2674 must already be present. Blocks are always read with all
2675 data available on file. Assert that the block does not have
2676 less contents than the preloader supplies. If the caller has
2677 data beyond block->length, it means that a file write has
2678 been done while this block was in cache and not extended
2679 with the new data. If the condition is met, we can simply
2680 ignore the block.
2681 */
2682 assert((page_st == PAGE_READ) &&
2683 (read_length + offset <= block->length));
2684 }
2685
2686 /*
2687 A secondary request must now have the block assigned to the
2688 requested file block. It does not hurt to check it for primary
2689 requests too.
2690 */
2691 assert(block->hash_link->file == file);
2692 assert(block->hash_link->diskpos == filepos);
2693 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2694 } /* end of if (!(block->status & BLOCK_ERROR)) */
2695
2696 remove_reader(block);
2697
2698 /* Error injection for coverage testing. */
2699 DBUG_EXECUTE_IF("key_cache_insert_block_error",
2700 block->status|= BLOCK_ERROR; errno=EIO;);
2701
2702 /* Do not link erroneous blocks into the LRU ring, but free them. */
2703 if (!(block->status & BLOCK_ERROR))
2704 {
2705 /*
2706 Link the block into the LRU ring if it's the last submitted
2707 request for the block. This enables eviction for the block.
2708 */
2709 unreg_request(keycache, block, 1);
2710 }
2711 else
2712 {
2713 free_block(keycache, thread_var, block);
2714 error= 1;
2715 break;
2716 }
2717
2718 buff+= read_length;
2719 filepos+= read_length+offset;
2720 offset= 0;
2721
2722 } while ((length-= read_length));
2723
2724 no_key_cache:
2725 if (locked_and_incremented)
2726 dec_counter_for_resize_op(keycache);
2727 mysql_mutex_unlock(&keycache->cache_lock);
2728 }
2729 DBUG_RETURN(error);
2730 }
2731
2732
2733 /*
2734 Write a buffer into a cached file.
2735
2736 SYNOPSIS
2737
2738 key_cache_write()
2739 keycache pointer to a key cache data structure
2740 thread_var pointer to thread specific variables
2741 file handler for the file to write data to
2742 filepos position in the file to write data to
2743 level determines the weight of the data
2744 buff buffer with the data
2745 length length of the buffer
2746 dont_write if is 0 then all dirty pages involved in writing
2747 should have been flushed from key cache
2748
2749 RETURN VALUE
2750 0 if a success, 1 - otherwise.
2751
2752 NOTES.
2753 The function copies the data of size length from buff into buffers
2754 for key cache blocks that are assigned to contain the portion of
2755 the file starting with position filepos.
2756 It ensures that this data is flushed to the file if dont_write is FALSE.
2757 Filepos must be a multiple of 'block_length', but it doesn't
2758 have to be a multiple of key_cache_block_size;
2759
2760 dont_write is always TRUE in the server (info->lock_type is never F_UNLCK).
2761 */
2762
key_cache_write(KEY_CACHE * keycache,st_keycache_thread_var * thread_var,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length MY_ATTRIBUTE ((unused)),int dont_write)2763 int key_cache_write(KEY_CACHE *keycache,
2764 st_keycache_thread_var *thread_var,
2765 File file, my_off_t filepos, int level,
2766 uchar *buff, uint length,
2767 uint block_length MY_ATTRIBUTE((unused)),
2768 int dont_write)
2769 {
2770 my_bool locked_and_incremented= FALSE;
2771 int error=0;
2772 DBUG_ENTER("key_cache_write");
2773 DBUG_PRINT("enter",
2774 ("fd: %u pos: %lu length: %u block_length: %u"
2775 " key_block_length: %u",
2776 (uint) file, (ulong) filepos, length, block_length,
2777 keycache ? keycache->key_cache_block_size : 0));
2778
2779 if (!dont_write)
2780 {
2781 /* purecov: begin inspected */
2782 /* Not used in the server. */
2783 /* Force writing from buff into disk. */
2784 keycache->global_cache_w_requests++;
2785 keycache->global_cache_write++;
2786 if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
2787 DBUG_RETURN(1);
2788 /* purecov: end */
2789 }
2790
2791 if (keycache->key_cache_inited)
2792 {
2793 /* Key cache is used */
2794 BLOCK_LINK *block;
2795 uint read_length;
2796 uint offset;
2797 int page_st;
2798
2799 if (MYSQL_KEYCACHE_WRITE_START_ENABLED())
2800 {
2801 MYSQL_KEYCACHE_WRITE_START(my_filename(file), length,
2802 (ulong) (keycache->blocks_used *
2803 keycache->key_cache_block_size),
2804 (ulong) (keycache->blocks_unused *
2805 keycache->key_cache_block_size));
2806 }
2807
2808 /*
2809 When the key cache is once initialized, we use the cache_lock to
2810 reliably distinguish the cases of normal operation, resizing, and
2811 disabled cache. We always increment and decrement
2812 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2813 */
2814 mysql_mutex_lock(&keycache->cache_lock);
2815 /*
2816 Cache resizing has two phases: Flushing and re-initializing. In
2817 the flush phase write requests can modify dirty blocks that are
2818 not yet in flush. Otherwise they are allowed to bypass the cache.
2819 find_key_block() returns NULL in both cases (clean blocks and
2820 non-cached blocks).
2821
2822 After the flush phase new I/O requests must wait until the
2823 re-initialization is done. The re-initialization can be done only
2824 if no I/O request is in progress. The reason is that
2825 key_cache_block_size can change. With enabled cache I/O is done in
2826 chunks of key_cache_block_size. Every chunk tries to use a cache
2827 block first. If the block size changes in the middle, a block
2828 could be missed and data could be written below a cached block.
2829 */
2830 while (keycache->in_resize && !keycache->resize_in_flush)
2831 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock,
2832 thread_var);
2833 /* Register the I/O for the next resize. */
2834 inc_counter_for_resize_op(keycache);
2835 locked_and_incremented= TRUE;
2836 /* Requested data may not always be aligned to cache blocks. */
2837 offset= (uint) (filepos % keycache->key_cache_block_size);
2838 /* Write data in key_cache_block_size increments. */
2839 do
2840 {
2841 /* Cache could be disabled in a later iteration. */
2842 if (!keycache->can_be_used)
2843 goto no_key_cache;
2844
2845 MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size);
2846 /* Start writing at the beginning of the cache block. */
2847 filepos-= offset;
2848 /* Do not write beyond the end of the cache block. */
2849 read_length= length;
2850 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2851 assert(read_length > 0);
2852
2853 /* Request the cache block that matches file/pos. */
2854 keycache->global_cache_w_requests++;
2855 block= find_key_block(keycache, thread_var, file, filepos, level, 1,
2856 &page_st);
2857 if (!block)
2858 {
2859 /*
2860 This happens only for requests submitted during key cache
2861 resize. The block is not in the cache and shall not go in.
2862 Write directly to file.
2863 */
2864 if (dont_write)
2865 {
2866 /* Used in the server. */
2867 keycache->global_cache_write++;
2868 mysql_mutex_unlock(&keycache->cache_lock);
2869 if (my_pwrite(file, (uchar*) buff, read_length, filepos + offset,
2870 MYF(MY_NABP | MY_WAIT_IF_FULL)))
2871 error=1;
2872 mysql_mutex_lock(&keycache->cache_lock);
2873 }
2874 goto next_block;
2875 }
2876 /*
2877 Prevent block from flushing and from being selected for to be
2878 freed. This must be set when we release the cache_lock.
2879 However, we must not set the status of the block before it is
2880 assigned to this file/pos.
2881 */
2882 if (page_st != PAGE_WAIT_TO_BE_READ)
2883 block->status|= BLOCK_FOR_UPDATE;
2884 /*
2885 We must read the file block first if it is not yet in the cache
2886 and we do not replace all of its contents.
2887
2888 In cases where the cache block is big enough to contain (parts
2889 of) index blocks of different indexes, our request can be
2890 secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
2891 reading the file block. If the read completes after us, it
2892 overwrites our new contents with the old contents. So we have to
2893 wait for the other thread to complete the read of this block.
2894 read_block() takes care for the wait.
2895 */
2896 if (!(block->status & BLOCK_ERROR) &&
2897 ((page_st == PAGE_TO_BE_READ &&
2898 (offset || read_length < keycache->key_cache_block_size)) ||
2899 (page_st == PAGE_WAIT_TO_BE_READ)))
2900 {
2901 read_block(keycache, thread_var, block,
2902 offset + read_length >= keycache->key_cache_block_size?
2903 offset : keycache->key_cache_block_size,
2904 offset, (page_st == PAGE_TO_BE_READ));
2905 assert(keycache->can_be_used);
2906 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2907 /*
2908 Prevent block from flushing and from being selected for to be
2909 freed. This must be set when we release the cache_lock.
2910 Here we set it in case we could not set it above.
2911 */
2912 block->status|= BLOCK_FOR_UPDATE;
2913 }
2914 /*
2915 The block should always be assigned to the requested file block
2916 here. It need not be BLOCK_READ when overwriting the whole block.
2917 */
2918 assert(block->hash_link->file == file);
2919 assert(block->hash_link->diskpos == filepos);
2920 assert(block->status & BLOCK_IN_USE);
2921 assert((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
2922 /*
2923 The block to be written must not be marked BLOCK_REASSIGNED.
2924 Otherwise it could be freed in dirty state or reused without
2925 another flush during eviction. It must also not be in flush.
2926 Otherwise the old contens may have been flushed already and
2927 the flusher could clear BLOCK_CHANGED without flushing the
2928 new changes again.
2929 */
2930 assert(!(block->status & BLOCK_REASSIGNED));
2931
2932 while (block->status & BLOCK_IN_FLUSHWRITE)
2933 {
2934 /*
2935 Another thread is flushing the block. It was dirty already.
2936 Wait until the block is flushed to file. Otherwise we could
2937 modify the buffer contents just while it is written to file.
2938 An unpredictable file block contents would be the result.
2939 While we wait, several things can happen to the block,
2940 including another flush. But the block cannot be reassigned to
2941 another hash_link until we release our request on it.
2942 */
2943 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock,
2944 thread_var);
2945 assert(keycache->can_be_used);
2946 assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2947 /* Still must not be marked for free. */
2948 assert(!(block->status & BLOCK_REASSIGNED));
2949 assert(block->hash_link && (block->hash_link->block == block));
2950 }
2951
2952 /*
2953 We could perhaps release the cache_lock during access of the
2954 data like in the other functions. Locks outside of the key cache
2955 assure that readers and a writer do not access the same range of
2956 data. Parallel accesses should happen only if the cache block
2957 contains multiple index block(fragment)s. So different parts of
2958 the buffer would be read/written. An attempt to flush during
2959 memcpy() is prevented with BLOCK_FOR_UPDATE.
2960 */
2961 if (!(block->status & BLOCK_ERROR))
2962 {
2963 mysql_mutex_unlock(&keycache->cache_lock);
2964 memcpy(block->buffer+offset, buff, (size_t) read_length);
2965
2966 mysql_mutex_lock(&keycache->cache_lock);
2967 }
2968
2969 if (!dont_write)
2970 {
2971 /* Not used in the server. buff has been written to disk at start. */
2972 if ((block->status & BLOCK_CHANGED) &&
2973 (!offset && read_length >= keycache->key_cache_block_size))
2974 link_to_file_list(keycache, block, block->hash_link->file, 1);
2975 }
2976 else if (! (block->status & BLOCK_CHANGED))
2977 link_to_changed_list(keycache, block);
2978 block->status|=BLOCK_READ;
2979 /*
2980 Allow block to be selected for to be freed. Since it is marked
2981 BLOCK_CHANGED too, it won't be selected for to be freed without
2982 a flush.
2983 */
2984 block->status&= ~BLOCK_FOR_UPDATE;
2985 set_if_smaller(block->offset, offset);
2986 set_if_bigger(block->length, read_length+offset);
2987
2988 /* Threads may be waiting for the changes to be complete. */
2989 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2990
2991 /*
2992 If only a part of the cache block is to be replaced, and the
2993 rest has been read from file, then the cache lock has been
2994 released for I/O and it could be possible that another thread
2995 wants to evict or free the block and waits for it to be
2996 released. So we must not just decrement hash_link->requests, but
2997 also wake a waiting thread.
2998 */
2999 remove_reader(block);
3000
3001 /* Error injection for coverage testing. */
3002 DBUG_EXECUTE_IF("key_cache_write_block_error",
3003 block->status|= BLOCK_ERROR;);
3004
3005 /* Do not link erroneous blocks into the LRU ring, but free them. */
3006 if (!(block->status & BLOCK_ERROR))
3007 {
3008 /*
3009 Link the block into the LRU ring if it's the last submitted
3010 request for the block. This enables eviction for the block.
3011 */
3012 unreg_request(keycache, block, 1);
3013 }
3014 else
3015 {
3016 /* Pretend a "clean" block to avoid complications. */
3017 block->status&= ~(BLOCK_CHANGED);
3018 free_block(keycache, thread_var, block);
3019 error= 1;
3020 break;
3021 }
3022
3023 next_block:
3024 buff+= read_length;
3025 filepos+= read_length+offset;
3026 offset= 0;
3027
3028 } while ((length-= read_length));
3029 goto end;
3030 }
3031
3032 no_key_cache:
3033 /* Key cache is not used */
3034 if (dont_write)
3035 {
3036 /* Used in the server. */
3037 keycache->global_cache_w_requests++;
3038 keycache->global_cache_write++;
3039 if (locked_and_incremented)
3040 mysql_mutex_unlock(&keycache->cache_lock);
3041 if (my_pwrite(file, (uchar*) buff, length, filepos,
3042 MYF(MY_NABP | MY_WAIT_IF_FULL)))
3043 error=1;
3044 if (locked_and_incremented)
3045 mysql_mutex_lock(&keycache->cache_lock);
3046 }
3047
3048 end:
3049 if (locked_and_incremented)
3050 {
3051 dec_counter_for_resize_op(keycache);
3052 mysql_mutex_unlock(&keycache->cache_lock);
3053 }
3054
3055 if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED())
3056 {
3057 MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used *
3058 keycache->key_cache_block_size),
3059 (ulong) (keycache->blocks_unused *
3060 keycache->key_cache_block_size));
3061 }
3062
3063 DBUG_RETURN(error);
3064 }
3065
3066
3067 /*
3068 Free block.
3069
3070 SYNOPSIS
3071 free_block()
3072 keycache Pointer to a key cache data structure
3073 thread_var Pointer to thread specific variables
3074 block Pointer to the block to free
3075
3076 DESCRIPTION
3077 Remove reference to block from hash table.
3078 Remove block from the chain of clean blocks.
3079 Add block to the free list.
3080
3081 NOTE
3082 Block must not be free (status == 0).
3083 Block must not be in free_block_list.
3084 Block must not be in the LRU ring.
3085 Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
3086 Block must not be in free (BLOCK_REASSIGNED).
3087 Block must not be in flush (BLOCK_IN_FLUSH).
3088 Block must not be dirty (BLOCK_CHANGED).
3089 Block must not be in changed_blocks (dirty) hash.
3090 Block must be in file_blocks (clean) hash.
3091 Block must refer to a hash_link.
3092 Block must have a request registered on it.
3093 */
3094
free_block(KEY_CACHE * keycache,st_keycache_thread_var * thread_var,BLOCK_LINK * block)3095 static void free_block(KEY_CACHE *keycache,
3096 st_keycache_thread_var *thread_var,
3097 BLOCK_LINK *block)
3098 {
3099 /*
3100 Assert that the block is not free already. And that it is in a clean
3101 state. Note that the block might just be assigned to a hash_link and
3102 not yet read (BLOCK_READ may not be set here). In this case a reader
3103 is registered in the hash_link and free_block() will wait for it
3104 below.
3105 */
3106 assert((block->status & BLOCK_IN_USE) &&
3107 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3108 BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
3109 BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
3110 /* Assert that the block is in a file_blocks chain. */
3111 assert(block->prev_changed && *block->prev_changed == block);
3112 /* Assert that the block is not in the LRU ring. */
3113 assert(!block->next_used && !block->prev_used);
3114 /*
3115 IMHO the below condition (if()) makes no sense. I can't see how it
3116 could be possible that free_block() is entered with a NULL hash_link
3117 pointer. The only place where it can become NULL is in free_block()
3118 (or before its first use ever, but for those blocks free_block() is
3119 not called). I don't remove the conditional as it cannot harm, but
3120 place an assert to confirm my hypothesis. Eventually the
3121 condition (if()) can be removed.
3122 */
3123 assert(block->hash_link && block->hash_link->block == block);
3124 if (block->hash_link)
3125 {
3126 /*
3127 While waiting for readers to finish, new readers might request the
3128 block. But since we set block->status|= BLOCK_REASSIGNED, they
3129 will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
3130 later.
3131 */
3132 block->status|= BLOCK_REASSIGNED;
3133 wait_for_readers(keycache, block, thread_var);
3134 /*
3135 The block must not have been freed by another thread. Repeat some
3136 checks. An additional requirement is that it must be read now
3137 (BLOCK_READ).
3138 */
3139 assert(block->hash_link && block->hash_link->block == block);
3140 assert((block->status & (BLOCK_READ | BLOCK_IN_USE |
3141 BLOCK_REASSIGNED)) &&
3142 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3143 BLOCK_IN_FLUSH | BLOCK_CHANGED |
3144 BLOCK_FOR_UPDATE)));
3145 assert(block->prev_changed && *block->prev_changed == block);
3146 assert(!block->prev_used);
3147 /*
3148 Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
3149 thread (through unreg_request() below), other threads must not see
3150 this flag. They could become confused.
3151 */
3152 block->status&= ~BLOCK_REASSIGNED;
3153 /*
3154 Do not release the hash_link until the block is off all lists.
3155 At least not if we hand it over for eviction in unreg_request().
3156 */
3157 }
3158
3159 /*
3160 Unregister the block request and link the block into the LRU ring.
3161 This enables eviction for the block. If the LRU ring was empty and
3162 threads are waiting for a block, then the block wil be handed over
3163 for eviction immediately. Otherwise we will unlink it from the LRU
3164 ring again, without releasing the lock in between. So decrementing
3165 the request counter and updating statistics are the only relevant
3166 operation in this case. Assert that there are no other requests
3167 registered.
3168 */
3169 assert(block->requests == 1);
3170 unreg_request(keycache, block, 0);
3171 /*
3172 Note that even without releasing the cache lock it is possible that
3173 the block is immediately selected for eviction by link_block() and
3174 thus not added to the LRU ring. In this case we must not touch the
3175 block any more.
3176 */
3177 if (block->status & BLOCK_IN_EVICTION)
3178 return;
3179
3180 /* Error blocks are not put into the LRU ring. */
3181 if (!(block->status & BLOCK_ERROR))
3182 {
3183 /* Here the block must be in the LRU ring. Unlink it again. */
3184 assert(block->next_used && block->prev_used &&
3185 *block->prev_used == block);
3186 unlink_block(keycache, block);
3187 }
3188 if (block->temperature == BLOCK_WARM)
3189 keycache->warm_blocks--;
3190 block->temperature= BLOCK_COLD;
3191
3192 /* Remove from file_blocks hash. */
3193 unlink_changed(block);
3194
3195 /* Remove reference to block from hash table. */
3196 unlink_hash(keycache, block->hash_link);
3197 block->hash_link= NULL;
3198
3199 block->status= 0;
3200 block->length= 0;
3201 block->offset= keycache->key_cache_block_size;
3202
3203 /* Enforced by unlink_changed(), but just to be sure. */
3204 assert(!block->next_changed && !block->prev_changed);
3205 /* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3206 assert(!block->next_used && !block->prev_used);
3207 /* Insert the free block in the free list. */
3208 block->next_used= keycache->free_block_list;
3209 keycache->free_block_list= block;
3210 /* Keep track of the number of currently unused blocks. */
3211 keycache->blocks_unused++;
3212
3213 /* All pending requests for this page must be resubmitted. */
3214 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3215 }
3216
3217
cmp_sec_link(BLOCK_LINK ** a,BLOCK_LINK ** b)3218 static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3219 {
3220 return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3221 ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3222 }
3223
3224
3225 /*
3226 Flush a portion of changed blocks to disk,
3227 free used blocks if requested
3228 */
3229
flush_cached_blocks(KEY_CACHE * keycache,st_keycache_thread_var * thread_var,File file,BLOCK_LINK ** cache,BLOCK_LINK ** end,enum flush_type type)3230 static int flush_cached_blocks(KEY_CACHE *keycache,
3231 st_keycache_thread_var *thread_var,
3232 File file, BLOCK_LINK **cache,
3233 BLOCK_LINK **end,
3234 enum flush_type type)
3235 {
3236 int error;
3237 int last_errno= 0;
3238 uint count= (uint) (end-cache);
3239
3240 /* Don't lock the cache during the flush */
3241 mysql_mutex_unlock(&keycache->cache_lock);
3242 /*
3243 As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3244 we are guarunteed no thread will change them
3245 */
3246 my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3247
3248 mysql_mutex_lock(&keycache->cache_lock);
3249 /*
3250 Note: Do not break the loop. We have registered a request on every
3251 block in 'cache'. These must be unregistered by free_block() or
3252 unreg_request().
3253 */
3254 for ( ; cache != end ; cache++)
3255 {
3256 BLOCK_LINK *block= *cache;
3257
3258 /*
3259 If the block contents is going to be changed, we abandon the flush
3260 for this block. flush_key_blocks_int() will restart its search and
3261 handle the block properly.
3262 */
3263 if (!(block->status & BLOCK_FOR_UPDATE))
3264 {
3265 /* Blocks coming here must have a certain status. */
3266 assert(block->hash_link);
3267 assert(block->hash_link->block == block);
3268 assert(block->hash_link->file == file);
3269 assert((block->status & ~BLOCK_IN_EVICTION) ==
3270 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3271 block->status|= BLOCK_IN_FLUSHWRITE;
3272 mysql_mutex_unlock(&keycache->cache_lock);
3273 error= (int)my_pwrite(file, block->buffer+block->offset,
3274 block->length - block->offset,
3275 block->hash_link->diskpos+ block->offset,
3276 MYF(MY_NABP | MY_WAIT_IF_FULL));
3277 mysql_mutex_lock(&keycache->cache_lock);
3278 keycache->global_cache_write++;
3279 if (error)
3280 {
3281 block->status|= BLOCK_ERROR;
3282 if (!last_errno)
3283 last_errno= errno ? errno : -1;
3284 }
3285 block->status&= ~BLOCK_IN_FLUSHWRITE;
3286 /* Block must not have changed status except BLOCK_FOR_UPDATE. */
3287 assert(block->hash_link);
3288 assert(block->hash_link->block == block);
3289 assert(block->hash_link->file == file);
3290 assert((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3291 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3292 /*
3293 Set correct status and link in right queue for free or later use.
3294 free_block() must not see BLOCK_CHANGED and it may need to wait
3295 for readers of the block. These should not see the block in the
3296 wrong hash. If not freeing the block, we need to have it in the
3297 right queue anyway.
3298 */
3299 link_to_file_list(keycache, block, file, 1);
3300 }
3301 block->status&= ~BLOCK_IN_FLUSH;
3302 /*
3303 Let to proceed for possible waiting requests to write to the block page.
3304 It might happen only during an operation to resize the key cache.
3305 */
3306 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3307 /* type will never be FLUSH_IGNORE_CHANGED here */
3308 if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3309 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3310 BLOCK_FOR_UPDATE)))
3311 {
3312 /*
3313 Note that a request has been registered against the block in
3314 flush_key_blocks_int().
3315 */
3316 free_block(keycache, thread_var, block);
3317 }
3318 else
3319 {
3320 /*
3321 Link the block into the LRU ring if it's the last submitted
3322 request for the block. This enables eviction for the block.
3323 Note that a request has been registered against the block in
3324 flush_key_blocks_int().
3325 */
3326 unreg_request(keycache, block, 1);
3327 }
3328
3329 } /* end of for ( ; cache != end ; cache++) */
3330 return last_errno;
3331 }
3332
3333
3334 /*
3335 Flush all key blocks for a file to disk, but don't do any mutex locks.
3336
3337 SYNOPSIS
3338 flush_key_blocks_int()
3339 keycache pointer to a key cache data structure
3340 thread_var pointer to thread specific variables
3341 file handler for the file to flush to
3342 flush_type type of the flush
3343
3344 NOTES
3345 This function doesn't do any mutex locks because it needs to be called both
3346 from flush_key_blocks and flush_all_key_blocks (the later one does the
3347 mutex lock in the resize_key_cache() function).
3348
3349 We do only care about changed blocks that exist when the function is
3350 entered. We do not guarantee that all changed blocks of the file are
3351 flushed if more blocks change while this function is running.
3352
3353 RETURN
3354 0 ok
3355 1 error
3356 */
3357
flush_key_blocks_int(KEY_CACHE * keycache,st_keycache_thread_var * thread_var,File file,enum flush_type type)3358 static int flush_key_blocks_int(KEY_CACHE *keycache,
3359 st_keycache_thread_var *thread_var,
3360 File file, enum flush_type type)
3361 {
3362 BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3363 int last_errno= 0;
3364 int last_errcnt= 0;
3365 DBUG_ENTER("flush_key_blocks_int");
3366 DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu",
3367 file, keycache->blocks_used, keycache->blocks_changed));
3368
3369 cache= cache_buff;
3370 if (keycache->disk_blocks > 0)
3371 {
3372 /* Key cache exists and flush is not disabled */
3373 int error= 0;
3374 uint count= FLUSH_CACHE;
3375 BLOCK_LINK **pos,**end;
3376 BLOCK_LINK *first_in_switch= NULL;
3377 BLOCK_LINK *last_in_flush;
3378 BLOCK_LINK *last_for_update;
3379 BLOCK_LINK *block, *next;
3380 #ifndef NDEBUG
3381 uint cnt=0;
3382 #endif
3383
3384 if (type != FLUSH_IGNORE_CHANGED)
3385 {
3386 /*
3387 Count how many key blocks we have to cache to be able
3388 to flush all dirty pages with minimum seek moves
3389 */
3390 count= 0;
3391 for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3392 block ;
3393 block= block->next_changed)
3394 {
3395 if ((block->hash_link->file == file) &&
3396 !(block->status & BLOCK_IN_FLUSH))
3397 {
3398 count++;
3399 assert(count<= keycache->blocks_used);
3400 }
3401 }
3402 /*
3403 Allocate a new buffer only if its bigger than the one we have.
3404 Assure that we always have some entries for the case that new
3405 changed blocks appear while we need to wait for something.
3406 */
3407 if ((count > FLUSH_CACHE) &&
3408 !(cache= (BLOCK_LINK**) my_malloc(key_memory_KEY_CACHE,
3409 sizeof(BLOCK_LINK*)*count,
3410 MYF(0))))
3411 cache= cache_buff;
3412 /*
3413 After a restart there could be more changed blocks than now.
3414 So we should not let count become smaller than the fixed buffer.
3415 */
3416 if (cache == cache_buff)
3417 count= FLUSH_CACHE;
3418 }
3419
3420 /* Retrieve the blocks and write them to a buffer to be flushed */
3421 restart:
3422 last_in_flush= NULL;
3423 last_for_update= NULL;
3424 end= (pos= cache)+count;
3425 for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3426 block ;
3427 block= next)
3428 {
3429 #ifndef NDEBUG
3430 cnt++;
3431 assert(cnt <= keycache->blocks_used);
3432 #endif
3433 next= block->next_changed;
3434 if (block->hash_link->file == file)
3435 {
3436 if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3437 {
3438 /*
3439 Note: The special handling of BLOCK_IN_SWITCH is obsolete
3440 since we set BLOCK_IN_FLUSH if the eviction includes a
3441 flush. It can be removed in a later version.
3442 */
3443 if (!(block->status & BLOCK_IN_SWITCH))
3444 {
3445 /*
3446 We care only for the blocks for which flushing was not
3447 initiated by another thread and which are not in eviction.
3448 Registering a request on the block unlinks it from the LRU
3449 ring and protects against eviction.
3450 */
3451 reg_requests(keycache, block, 1);
3452 if (type != FLUSH_IGNORE_CHANGED)
3453 {
3454 /* It's not a temporary file */
3455 if (pos == end)
3456 {
3457 /*
3458 This should happen relatively seldom. Remove the
3459 request because we won't do anything with the block
3460 but restart and pick it again in the next iteration.
3461 */
3462 unreg_request(keycache, block, 0);
3463 /*
3464 This happens only if there is not enough
3465 memory for the big block
3466 */
3467 if ((error= flush_cached_blocks(keycache, thread_var, file,
3468 cache, end, type)))
3469 {
3470 /* Do not loop infinitely trying to flush in vain. */
3471 if ((last_errno == error) && (++last_errcnt > 5))
3472 goto err;
3473 last_errno= error;
3474 }
3475 /*
3476 Restart the scan as some other thread might have changed
3477 the changed blocks chain: the blocks that were in switch
3478 state before the flush started have to be excluded
3479 */
3480 goto restart;
3481 }
3482 /*
3483 Mark the block with BLOCK_IN_FLUSH in order not to let
3484 other threads to use it for new pages and interfere with
3485 our sequence of flushing dirty file pages. We must not
3486 set this flag before actually putting the block on the
3487 write burst array called 'cache'.
3488 */
3489 block->status|= BLOCK_IN_FLUSH;
3490 /* Add block to the array for a write burst. */
3491 *pos++= block;
3492 }
3493 else
3494 {
3495 /* It's a temporary file */
3496 assert(!(block->status & BLOCK_REASSIGNED));
3497 /*
3498 free_block() must not be called with BLOCK_CHANGED. Note
3499 that we must not change the BLOCK_CHANGED flag outside of
3500 link_to_file_list() so that it is always in the correct
3501 queue and the *blocks_changed counters are correct.
3502 */
3503 link_to_file_list(keycache, block, file, 1);
3504 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3505 {
3506 /* A request has been registered against the block above. */
3507 free_block(keycache, thread_var, block);
3508 }
3509 else
3510 {
3511 /*
3512 Link the block into the LRU ring if it's the last
3513 submitted request for the block. This enables eviction
3514 for the block. A request has been registered against
3515 the block above.
3516 */
3517 unreg_request(keycache, block, 1);
3518 }
3519 }
3520 }
3521 else
3522 {
3523 /*
3524 Link the block into a list of blocks 'in switch'.
3525
3526 WARNING: Here we introduce a place where a changed block
3527 is not in the changed_blocks hash! This is acceptable for
3528 a BLOCK_IN_SWITCH. Never try this for another situation.
3529 Other parts of the key cache code rely on changed blocks
3530 being in the changed_blocks hash.
3531 */
3532 unlink_changed(block);
3533 link_changed(block, &first_in_switch);
3534 }
3535 }
3536 else if (type != FLUSH_KEEP)
3537 {
3538 /*
3539 During the normal flush at end of statement (FLUSH_KEEP) we
3540 do not need to ensure that blocks in flush or update by
3541 other threads are flushed. They will be flushed by them
3542 later. In all other cases we must assure that we do not have
3543 any changed block of this file in the cache when this
3544 function returns.
3545 */
3546 if (block->status & BLOCK_IN_FLUSH)
3547 {
3548 /* Remember the last block found to be in flush. */
3549 last_in_flush= block;
3550 }
3551 else
3552 {
3553 /* Remember the last block found to be selected for update. */
3554 last_for_update= block;
3555 }
3556 }
3557 }
3558 }
3559 if (pos != cache)
3560 {
3561 if ((error=
3562 flush_cached_blocks(keycache, thread_var, file, cache, pos, type)))
3563 {
3564 /* Do not loop inifnitely trying to flush in vain. */
3565 if ((last_errno == error) && (++last_errcnt > 5))
3566 goto err;
3567 last_errno= error;
3568 }
3569 /*
3570 Do not restart here during the normal flush at end of statement
3571 (FLUSH_KEEP). We have now flushed at least all blocks that were
3572 changed when entering this function. In all other cases we must
3573 assure that we do not have any changed block of this file in the
3574 cache when this function returns.
3575 */
3576 if (type != FLUSH_KEEP)
3577 goto restart;
3578 }
3579 if (last_in_flush)
3580 {
3581 /*
3582 There are no blocks to be flushed by this thread, but blocks in
3583 flush by other threads. Wait until one of the blocks is flushed.
3584 Re-check the condition for last_in_flush. We may have unlocked
3585 the cache_lock in flush_cached_blocks(). The state of the block
3586 could have changed.
3587 */
3588 if (last_in_flush->status & BLOCK_IN_FLUSH)
3589 wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3590 &keycache->cache_lock, thread_var);
3591 /* Be sure not to lose a block. They may be flushed in random order. */
3592 goto restart;
3593 }
3594 if (last_for_update)
3595 {
3596 /*
3597 There are no blocks to be flushed by this thread, but blocks for
3598 update by other threads. Wait until one of the blocks is updated.
3599 Re-check the condition for last_for_update. We may have unlocked
3600 the cache_lock in flush_cached_blocks(). The state of the block
3601 could have changed.
3602 */
3603 if (last_for_update->status & BLOCK_FOR_UPDATE)
3604 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3605 &keycache->cache_lock, thread_var);
3606 /* The block is now changed. Flush it. */
3607 goto restart;
3608 }
3609
3610 /*
3611 Wait until the list of blocks in switch is empty. The threads that
3612 are switching these blocks will relink them to clean file chains
3613 while we wait and thus empty the 'first_in_switch' chain.
3614 */
3615 while (first_in_switch)
3616 {
3617 #ifndef NDEBUG
3618 cnt= 0;
3619 #endif
3620 wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3621 &keycache->cache_lock, thread_var);
3622 #ifndef NDEBUG
3623 cnt++;
3624 assert(cnt <= keycache->blocks_used);
3625 #endif
3626 /*
3627 Do not restart here. We have flushed all blocks that were
3628 changed when entering this function and were not marked for
3629 eviction. Other threads have now flushed all remaining blocks in
3630 the course of their eviction.
3631 */
3632 }
3633
3634 if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3635 {
3636 BLOCK_LINK *last_for_update= NULL;
3637 BLOCK_LINK *last_in_switch= NULL;
3638 uint total_found= 0;
3639 uint found;
3640
3641 /*
3642 Finally free all clean blocks for this file.
3643 During resize this may be run by two threads in parallel.
3644 */
3645 do
3646 {
3647 found= 0;
3648 for (block= keycache->file_blocks[FILE_HASH(file)] ;
3649 block ;
3650 block= next)
3651 {
3652 /* Remember the next block. After freeing we cannot get at it. */
3653 next= block->next_changed;
3654
3655 /* Changed blocks cannot appear in the file_blocks hash. */
3656 assert(!(block->status & BLOCK_CHANGED));
3657 if (block->hash_link->file == file)
3658 {
3659 /* We must skip blocks that will be changed. */
3660 if (block->status & BLOCK_FOR_UPDATE)
3661 {
3662 last_for_update= block;
3663 continue;
3664 }
3665
3666 /*
3667 We must not free blocks in eviction (BLOCK_IN_EVICTION |
3668 BLOCK_IN_SWITCH) or blocks intended to be freed
3669 (BLOCK_REASSIGNED).
3670 */
3671 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3672 BLOCK_REASSIGNED)))
3673 {
3674 struct st_hash_link *next_hash_link= NULL;
3675 my_off_t next_diskpos= 0;
3676 File next_file= 0;
3677 uint next_status= 0;
3678 uint hash_requests= 0;
3679
3680 total_found++;
3681 found++;
3682 assert(found <= keycache->blocks_used);
3683
3684 /*
3685 Register a request. This unlinks the block from the LRU
3686 ring and protects it against eviction. This is required
3687 by free_block().
3688 */
3689 reg_requests(keycache, block, 1);
3690
3691 /*
3692 free_block() may need to wait for readers of the block.
3693 This is the moment where the other thread can move the
3694 'next' block from the chain. free_block() needs to wait
3695 if there are requests for the block pending.
3696 */
3697 if (next && (hash_requests= block->hash_link->requests))
3698 {
3699 /* Copy values from the 'next' block and its hash_link. */
3700 next_status= next->status;
3701 next_hash_link= next->hash_link;
3702 next_diskpos= next_hash_link->diskpos;
3703 next_file= next_hash_link->file;
3704 assert(next == next_hash_link->block);
3705 }
3706
3707 free_block(keycache, thread_var, block);
3708 /*
3709 If we had to wait and the state of the 'next' block
3710 changed, break the inner loop. 'next' may no longer be
3711 part of the current chain.
3712
3713 We do not want to break the loop after every free_block(),
3714 not even only after waits. The chain might be quite long
3715 and contain blocks for many files. Traversing it again and
3716 again to find more blocks for this file could become quite
3717 inefficient.
3718 */
3719 if (next && hash_requests &&
3720 ((next_status != next->status) ||
3721 (next_hash_link != next->hash_link) ||
3722 (next_file != next_hash_link->file) ||
3723 (next_diskpos != next_hash_link->diskpos) ||
3724 (next != next_hash_link->block)))
3725 break;
3726 }
3727 else
3728 {
3729 last_in_switch= block;
3730 }
3731 }
3732 } /* end for block in file_blocks */
3733 } while (found);
3734
3735 /*
3736 If any clean block has been found, we may have waited for it to
3737 become free. In this case it could be possible that another clean
3738 block became dirty. This is possible if the write request existed
3739 before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3740 */
3741 if (total_found)
3742 goto restart;
3743
3744 /*
3745 To avoid an infinite loop, wait until one of the blocks marked
3746 for update is updated.
3747 */
3748 if (last_for_update)
3749 {
3750 /* We did not wait. Block must not have changed status. */
3751 assert(last_for_update->status & BLOCK_FOR_UPDATE);
3752 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3753 &keycache->cache_lock, thread_var);
3754 goto restart;
3755 }
3756
3757 /*
3758 To avoid an infinite loop wait until one of the blocks marked
3759 for eviction is switched.
3760 */
3761 if (last_in_switch)
3762 {
3763 /* We did not wait. Block must not have changed status. */
3764 assert(last_in_switch->status & (BLOCK_IN_EVICTION |
3765 BLOCK_IN_SWITCH |
3766 BLOCK_REASSIGNED));
3767 wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
3768 &keycache->cache_lock, thread_var);
3769 goto restart;
3770 }
3771
3772 } /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
3773
3774 } /* if (keycache->disk_blocks > 0 */
3775
3776 err:
3777 if (cache != cache_buff)
3778 my_free(cache);
3779 if (last_errno)
3780 errno=last_errno; /* Return first error */
3781 DBUG_RETURN(last_errno != 0);
3782 }
3783
3784
3785 /*
3786 Flush all blocks for a file to disk
3787
3788 SYNOPSIS
3789
3790 flush_key_blocks()
3791 keycache pointer to a key cache data structure
3792 thread_var pointer to thread specific variables
3793 file handler for the file to flush to
3794 flush_type type of the flush
3795
3796 RETURN
3797 0 ok
3798 1 error
3799 */
3800
flush_key_blocks(KEY_CACHE * keycache,st_keycache_thread_var * thread_var,File file,enum flush_type type)3801 int flush_key_blocks(KEY_CACHE *keycache,
3802 st_keycache_thread_var *thread_var,
3803 File file, enum flush_type type)
3804 {
3805 int res= 0;
3806 DBUG_ENTER("flush_key_blocks");
3807 DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache));
3808
3809 if (!keycache->key_cache_inited)
3810 DBUG_RETURN(0);
3811
3812 mysql_mutex_lock(&keycache->cache_lock);
3813 /* While waiting for lock, keycache could have been ended. */
3814 if (keycache->disk_blocks > 0)
3815 {
3816 inc_counter_for_resize_op(keycache);
3817 res= flush_key_blocks_int(keycache, thread_var, file, type);
3818 dec_counter_for_resize_op(keycache);
3819 }
3820 mysql_mutex_unlock(&keycache->cache_lock);
3821 DBUG_RETURN(res);
3822 }
3823
3824
3825 /*
3826 Flush all blocks in the key cache to disk.
3827
3828 SYNOPSIS
3829 flush_all_key_blocks()
3830 keycache pointer to key cache root structure
3831 thread_var pointer to thread specific variables
3832
3833 DESCRIPTION
3834
3835 Flushing of the whole key cache is done in two phases.
3836
3837 1. Flush all changed blocks, waiting for them if necessary. Loop
3838 until there is no changed block left in the cache.
3839
3840 2. Free all clean blocks. Normally this means free all blocks. The
3841 changed blocks were flushed in phase 1 and became clean. However we
3842 may need to wait for blocks that are read by other threads. While we
3843 wait, a clean block could become changed if that operation started
3844 before the resize operation started. To be safe we must restart at
3845 phase 1.
3846
3847 When we can run through the changed_blocks and file_blocks hashes
3848 without finding a block any more, then we are done.
3849
3850 Note that we hold keycache->cache_lock all the time unless we need
3851 to wait for something.
3852
3853 RETURN
3854 0 OK
3855 != 0 Error
3856 */
3857
flush_all_key_blocks(KEY_CACHE * keycache,st_keycache_thread_var * thread_var)3858 static int flush_all_key_blocks(KEY_CACHE *keycache,
3859 st_keycache_thread_var *thread_var)
3860 {
3861 BLOCK_LINK *block;
3862 uint total_found;
3863 uint found;
3864 uint idx;
3865 DBUG_ENTER("flush_all_key_blocks");
3866
3867 do
3868 {
3869 mysql_mutex_assert_owner(&keycache->cache_lock);
3870 total_found= 0;
3871
3872 /*
3873 Phase1: Flush all changed blocks, waiting for them if necessary.
3874 Loop until there is no changed block left in the cache.
3875 */
3876 do
3877 {
3878 found= 0;
3879 /* Step over the whole changed_blocks hash array. */
3880 for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3881 {
3882 /*
3883 If an array element is non-empty, use the first block from its
3884 chain to find a file for flush. All changed blocks for this
3885 file are flushed. So the same block will not appear at this
3886 place again with the next iteration. New writes for blocks are
3887 not accepted during the flush. If multiple files share the
3888 same hash bucket, one of them will be flushed per iteration
3889 of the outer loop of phase 1.
3890 */
3891 if ((block= keycache->changed_blocks[idx]))
3892 {
3893 found++;
3894 /*
3895 Flush dirty blocks but do not free them yet. They can be used
3896 for reading until all other blocks are flushed too.
3897 */
3898 if (flush_key_blocks_int(keycache, thread_var,
3899 block->hash_link->file,
3900 FLUSH_FORCE_WRITE))
3901 DBUG_RETURN(1);
3902 }
3903 }
3904
3905 } while (found);
3906
3907 /*
3908 Phase 2: Free all clean blocks. Normally this means free all
3909 blocks. The changed blocks were flushed in phase 1 and became
3910 clean. However we may need to wait for blocks that are read by
3911 other threads. While we wait, a clean block could become changed
3912 if that operation started before the resize operation started. To
3913 be safe we must restart at phase 1.
3914 */
3915 do
3916 {
3917 found= 0;
3918 /* Step over the whole file_blocks hash array. */
3919 for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3920 {
3921 /*
3922 If an array element is non-empty, use the first block from its
3923 chain to find a file for flush. All blocks for this file are
3924 freed. So the same block will not appear at this place again
3925 with the next iteration. If multiple files share the
3926 same hash bucket, one of them will be flushed per iteration
3927 of the outer loop of phase 2.
3928 */
3929 if ((block= keycache->file_blocks[idx]))
3930 {
3931 total_found++;
3932 found++;
3933 if (flush_key_blocks_int(keycache, thread_var,
3934 block->hash_link->file,
3935 FLUSH_RELEASE))
3936 DBUG_RETURN(1);
3937 }
3938 }
3939
3940 } while (found);
3941
3942 /*
3943 If any clean block has been found, we may have waited for it to
3944 become free. In this case it could be possible that another clean
3945 block became dirty. This is possible if the write request existed
3946 before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
3947 */
3948 } while (total_found);
3949
3950 #ifndef NDEBUG
3951 /* Now there should not exist any block any more. */
3952 for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3953 {
3954 assert(!keycache->changed_blocks[idx]);
3955 assert(!keycache->file_blocks[idx]);
3956 }
3957 #endif
3958
3959 DBUG_RETURN(0);
3960 }
3961
3962
3963 /*
3964 Reset the counters of a key cache.
3965
3966 SYNOPSIS
3967 reset_key_cache_counters()
3968 name the name of a key cache
3969 key_cache pointer to the key kache to be reset
3970
3971 DESCRIPTION
3972 This procedure is used by process_key_caches() to reset the counters of all
3973 currently used key caches, both the default one and the named ones.
3974
3975 RETURN
3976 0 on success (always because it can't fail)
3977 */
3978
reset_key_cache_counters(const char * name MY_ATTRIBUTE ((unused)),KEY_CACHE * key_cache)3979 int reset_key_cache_counters(const char *name MY_ATTRIBUTE((unused)),
3980 KEY_CACHE *key_cache)
3981 {
3982 DBUG_ENTER("reset_key_cache_counters");
3983 if (!key_cache->key_cache_inited)
3984 {
3985 DBUG_PRINT("info", ("Key cache %s not initialized.", name));
3986 DBUG_RETURN(0);
3987 }
3988 DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
3989
3990 key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
3991 key_cache->global_cache_r_requests= 0; /* Key_read_requests */
3992 key_cache->global_cache_read= 0; /* Key_reads */
3993 key_cache->global_cache_w_requests= 0; /* Key_write_requests */
3994 key_cache->global_cache_write= 0; /* Key_writes */
3995 DBUG_RETURN(0);
3996 }
3997
3998
3999 #if !defined(NDEBUG)
4000 #define F_B_PRT(_f_, _v_) DBUG_PRINT("assert_fail", (_f_, _v_))
4001
fail_block(BLOCK_LINK * block)4002 static int fail_block(BLOCK_LINK *block)
4003 {
4004 F_B_PRT("block->next_used: %lx\n", (ulong) block->next_used);
4005 F_B_PRT("block->prev_used: %lx\n", (ulong) block->prev_used);
4006 F_B_PRT("block->next_changed: %lx\n", (ulong) block->next_changed);
4007 F_B_PRT("block->prev_changed: %lx\n", (ulong) block->prev_changed);
4008 F_B_PRT("block->hash_link: %lx\n", (ulong) block->hash_link);
4009 F_B_PRT("block->status: %u\n", block->status);
4010 F_B_PRT("block->length: %u\n", block->length);
4011 F_B_PRT("block->offset: %u\n", block->offset);
4012 F_B_PRT("block->requests: %u\n", block->requests);
4013 F_B_PRT("block->temperature: %u\n", block->temperature);
4014 return 0; /* Let the assert fail. */
4015 }
4016
fail_hlink(HASH_LINK * hlink)4017 static int fail_hlink(HASH_LINK *hlink)
4018 {
4019 F_B_PRT("hlink->next: %lx\n", (ulong) hlink->next);
4020 F_B_PRT("hlink->prev: %lx\n", (ulong) hlink->prev);
4021 F_B_PRT("hlink->block: %lx\n", (ulong) hlink->block);
4022 F_B_PRT("hlink->diskpos: %lu\n", (ulong) hlink->diskpos);
4023 F_B_PRT("hlink->file: %d\n", hlink->file);
4024 return 0; /* Let the assert fail. */
4025 }
4026
cache_empty(KEY_CACHE * keycache)4027 static int cache_empty(KEY_CACHE *keycache)
4028 {
4029 int errcnt= 0;
4030 int idx;
4031 if (keycache->disk_blocks <= 0)
4032 return 1;
4033 for (idx= 0; idx < keycache->disk_blocks; idx++)
4034 {
4035 BLOCK_LINK *block= keycache->block_root + idx;
4036 if (block->status || block->requests || block->hash_link)
4037 {
4038 my_message_local(INFORMATION_LEVEL, "block index: %u", idx);
4039 fail_block(block);
4040 errcnt++;
4041 }
4042 }
4043 for (idx= 0; idx < keycache->hash_links; idx++)
4044 {
4045 HASH_LINK *hash_link= keycache->hash_link_root + idx;
4046 if (hash_link->requests || hash_link->block)
4047 {
4048 my_message_local(INFORMATION_LEVEL, "hash_link index: %u", idx);
4049 fail_hlink(hash_link);
4050 errcnt++;
4051 }
4052 }
4053 if (errcnt)
4054 {
4055 my_message_local(INFORMATION_LEVEL, "blocks: %d used: %lu",
4056 keycache->disk_blocks, keycache->blocks_used);
4057 my_message_local(INFORMATION_LEVEL, "hash_links: %d used: %d",
4058 keycache->hash_links, keycache->hash_links_used);
4059 }
4060 return !errcnt;
4061 }
4062 #endif
4063
4064