1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15
16 /**
17 @file
18 These functions handle keyblock cacheing for ISAM and MyISAM tables.
19
20 One cache can handle many files.
21 It must contain buffers of the same blocksize.
22 init_key_cache() should be used to init cache handler.
23
24 The free list (free_block_list) is a stack like structure.
25 When a block is freed by free_block(), it is pushed onto the stack.
26 When a new block is required it is first tried to pop one from the stack.
27 If the stack is empty, it is tried to get a never-used block from the pool.
28 If this is empty too, then a block is taken from the LRU ring, flushing it
29 to disk, if neccessary. This is handled in find_key_block().
30 With the new free list, the blocks can have three temperatures:
31 hot, warm and cold (which is free). This is remembered in the block header
32 by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
33 temperature is neccessary to correctly count the number of warm blocks,
34 which is required to decide when blocks are allowed to become hot. Whenever
35 a block is inserted to another (sub-)chain, we take the old and new
36 temperature into account to decide if we got one more or less warm block.
37 blocks_unused is the sum of never used blocks in the pool and of currently
38 free blocks. blocks_used is the number of blocks fetched from the pool and
39 as such gives the maximum number of in-use blocks at any time.
40 */
41
42 /*
43 Key Cache Locking
44 =================
45
46 All key cache locking is done with a single mutex per key cache:
47 keycache->cache_lock. This mutex is locked almost all the time
48 when executing code in this file (mf_keycache.c).
49 However it is released for I/O and some copy operations.
50
51 The cache_lock is also released when waiting for some event. Waiting
52 and signalling is done via condition variables. In most cases the
53 thread waits on its thread->suspend condition variable. Every thread
54 has a my_thread_var structure, which contains this variable and a
55 '*next' and '**prev' pointer. These pointers are used to insert the
56 thread into a wait queue.
57
58 A thread can wait for one block and thus be in one wait queue at a
59 time only.
60
61 Before starting to wait on its condition variable with
62 mysql_cond_wait(), the thread enters itself to a specific wait queue
63 with link_into_queue() (double linked with '*next' + '**prev') or
64 wait_on_queue() (single linked with '*next').
65
66 Another thread, when releasing a resource, looks up the waiting thread
67 in the related wait queue. It sends a signal with
68 mysql_cond_signal() to the waiting thread.
69
70 NOTE: Depending on the particular wait situation, either the sending
71 thread removes the waiting thread from the wait queue with
72 unlink_from_queue() or release_whole_queue() respectively, or the waiting
73 thread removes itself.
74
75 There is one exception from this locking scheme when one thread wants
76 to reuse a block for some other address. This works by first marking
77 the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all
78 threads that are reading the block to finish. Each block has a
79 reference to a condition variable (condvar). It holds a reference to
80 the thread->suspend condition variable for the waiting thread (if such
81 a thread exists). When that thread is signaled, the reference is
82 cleared. The number of readers of a block is registered in
83 block->hash_link->requests. See wait_for_readers() / remove_reader()
84 for details. This is similar to the above, but it clearly means that
85 only one thread can wait for a particular block. There is no queue in
86 this case. Strangely enough block->convar is used for waiting for the
87 assigned hash_link only. More precisely it is used to wait for all
88 requests to be unregistered from the assigned hash_link.
89
90 The resize_queue serves two purposes:
91 1. Threads that want to do a resize wait there if in_resize is set.
92 This is not used in the server. The server refuses a second resize
93 request if one is already active. keycache->in_init is used for the
94 synchronization. See set_var.cc.
95 2. Threads that want to access blocks during resize wait here during
96 the re-initialization phase.
97 When the resize is done, all threads on the queue are signalled.
98 Hypothetical resizers can compete for resizing, and read/write
99 requests will restart to request blocks from the freshly resized
100 cache. If the cache has been resized too small, it is disabled and
101 'can_be_used' is false. In this case read/write requests bypass the
102 cache. Since they increment and decrement 'cnt_for_resize_op', the
103 next resizer can wait on the queue 'waiting_for_resize_cnt' until all
104 I/O finished.
105 */
106
107 #include "mysys_priv.h"
108 #include "mysys_err.h"
109 #include <keycache.h>
110 #include "my_static.h"
111 #include <m_string.h>
112 #include <my_bit.h>
113 #include <errno.h>
114 #include <stdarg.h>
115 #include "probes_mysql.h"
116
117 /*
118 Some compilation flags have been added specifically for this module
119 to control the following:
120 - not to let a thread to yield the control when reading directly
121 from key cache, which might improve performance in many cases;
122 to enable this add:
123 #define SERIALIZED_READ_FROM_CACHE
124 - to set an upper bound for number of threads simultaneously
125 using the key cache; this setting helps to determine an optimal
126 size for hash table and improve performance when the number of
127 blocks in the key cache much less than the number of threads
128 accessing it;
129 to set this number equal to <N> add
130 #define MAX_THREADS <N>
131 - to substitute calls of mysql_cond_wait for calls of
132 mysql_cond_timedwait (wait with timeout set up);
133 this setting should be used only when you want to trap a deadlock
134 situation, which theoretically should not happen;
135 to set timeout equal to <T> seconds add
136 #define KEYCACHE_TIMEOUT <T>
137 - to enable the module traps and to send debug information from
138 key cache module to a special debug log add:
139 #define KEYCACHE_DEBUG
140 the name of this debug log file <LOG NAME> can be set through:
141 #define KEYCACHE_DEBUG_LOG <LOG NAME>
142 if the name is not defined, it's set by default;
143 if the KEYCACHE_DEBUG flag is not set up and we are in a debug
144 mode, i.e. when ! defined(DBUG_OFF), the debug information from the
145 module is sent to the regular debug log.
146
147 Example of the settings:
148 #define SERIALIZED_READ_FROM_CACHE
149 #define MAX_THREADS 100
150 #define KEYCACHE_TIMEOUT 1
151 #define KEYCACHE_DEBUG
152 #define KEYCACHE_DEBUG_LOG "my_key_cache_debug.log"
153 */
154
155 #define STRUCT_PTR(TYPE, MEMBER, a) \
156 (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
157
158 /* types of condition variables */
159 #define COND_FOR_REQUESTED 0
160 #define COND_FOR_SAVED 1
161 #define COND_FOR_READERS 2
162
163 typedef mysql_cond_t KEYCACHE_CONDVAR;
164
165 /* descriptor of the page in the key cache block buffer */
166 struct st_keycache_page
167 {
168 int file; /* file to which the page belongs to */
169 my_off_t filepos; /* position of the page in the file */
170 };
171
172 /* element in the chain of a hash table bucket */
173 struct st_hash_link
174 {
175 struct st_hash_link *next, **prev; /* to connect links in the same bucket */
176 struct st_block_link *block; /* reference to the block for the page: */
177 File file; /* from such a file */
178 my_off_t diskpos; /* with such an offset */
179 uint requests; /* number of requests for the page */
180 };
181
182 /* simple states of a block */
183 #define BLOCK_ERROR 1 /* an error occured when performing file i/o */
184 #define BLOCK_READ 2 /* file block is in the block buffer */
185 #define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */
186 #define BLOCK_REASSIGNED 8 /* blk does not accept requests for old page */
187 #define BLOCK_IN_FLUSH 16 /* block is selected for flush */
188 #define BLOCK_CHANGED 32 /* block buffer contains a dirty page */
189 #define BLOCK_IN_USE 64 /* block is not free */
190 #define BLOCK_IN_EVICTION 128 /* block is selected for eviction */
191 #define BLOCK_IN_FLUSHWRITE 256 /* block is in write to file */
192 #define BLOCK_FOR_UPDATE 512 /* block is selected for buffer modification */
193
194 /* page status, returned by find_key_block */
195 #define PAGE_READ 0
196 #define PAGE_TO_BE_READ 1
197 #define PAGE_WAIT_TO_BE_READ 2
198
199 /* block temperature determines in which (sub-)chain the block currently is */
200 enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };
201
202 /* key cache block */
203 struct st_block_link
204 {
205 struct st_block_link
206 *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
207 struct st_block_link
208 *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
209 struct st_hash_link *hash_link; /* backward ptr to referring hash_link */
210 KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */
211 uint requests; /* number of requests for the block */
212 uchar *buffer; /* buffer for the block page */
213 uint offset; /* beginning of modified data in the buffer */
214 uint length; /* end of data in the buffer */
215 uint status; /* state of the block */
216 enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
217 uint hits_left; /* number of hits left until promotion */
218 ulonglong last_hit_time; /* timestamp of the last hit */
219 KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
220 };
221
222 KEY_CACHE dflt_key_cache_var;
223 KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;
224
225 #define FLUSH_CACHE 2000 /* sort this many blocks at once */
226
227 static int flush_all_key_blocks(KEY_CACHE *keycache);
228
229 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
230 mysql_mutex_t *mutex);
231 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue);
232
233 static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block);
234 #if !defined(DBUG_OFF)
235 static void test_key_cache(KEY_CACHE *keycache,
236 const char *where, my_bool lock);
237 #endif
238
239 #define KEYCACHE_HASH(f, pos) \
240 (((ulong) ((pos) / keycache->key_cache_block_size) + \
241 (ulong) (f)) & (keycache->hash_entries-1))
242 #define FILE_HASH(f) ((uint) (f) & (CHANGED_BLOCKS_HASH-1))
243
244 #define DEFAULT_KEYCACHE_DEBUG_LOG "keycache_debug.log"
245
246 #if defined(KEYCACHE_DEBUG) && ! defined(KEYCACHE_DEBUG_LOG)
247 #define KEYCACHE_DEBUG_LOG DEFAULT_KEYCACHE_DEBUG_LOG
248 #endif
249
250 #if defined(KEYCACHE_DEBUG_LOG)
251 static FILE *keycache_debug_log=NULL;
252 static void keycache_debug_print(const char *fmt,...);
253 #define KEYCACHE_DEBUG_OPEN \
254 if (!keycache_debug_log) \
255 { \
256 keycache_debug_log= fopen(KEYCACHE_DEBUG_LOG, "w"); \
257 (void) setvbuf(keycache_debug_log, NULL, _IOLBF, BUFSIZ); \
258 }
259
260 #define KEYCACHE_DEBUG_CLOSE \
261 if (keycache_debug_log) \
262 { \
263 fclose(keycache_debug_log); \
264 keycache_debug_log= 0; \
265 }
266 #else
267 #define KEYCACHE_DEBUG_OPEN
268 #define KEYCACHE_DEBUG_CLOSE
269 #endif /* defined(KEYCACHE_DEBUG_LOG) */
270
271 #if defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG)
272 #define KEYCACHE_DBUG_PRINT(l, m) \
273 { if (keycache_debug_log) fprintf(keycache_debug_log, "%s: ", l); \
274 keycache_debug_print m; }
275
276 #define KEYCACHE_DBUG_ASSERT(a) \
277 { if (! (a) && keycache_debug_log) fclose(keycache_debug_log); \
278 assert(a); }
279 #else
280 #define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
281 #define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
282 #endif /* defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) */
283
284 #if defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF)
285
286 static long keycache_thread_id;
287 #define KEYCACHE_THREAD_TRACE(l) \
288 KEYCACHE_DBUG_PRINT(l,("|thread %ld",keycache_thread_id))
289
290 #define KEYCACHE_THREAD_TRACE_BEGIN(l) \
291 { struct st_my_thread_var *thread_var= my_thread_var; \
292 keycache_thread_id= thread_var->id; \
293 KEYCACHE_DBUG_PRINT(l,("[thread %ld",keycache_thread_id)) }
294
295 #define KEYCACHE_THREAD_TRACE_END(l) \
296 KEYCACHE_DBUG_PRINT(l,("]thread %ld",keycache_thread_id))
297 #else
298 #define KEYCACHE_THREAD_TRACE_BEGIN(l)
299 #define KEYCACHE_THREAD_TRACE_END(l)
300 #define KEYCACHE_THREAD_TRACE(l)
301 #endif /* defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF) */
302
303 #define BLOCK_NUMBER(b) \
304 ((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK)))
305 #define HASH_LINK_NUMBER(h) \
306 ((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK)))
307
308 #if (defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)) || defined(KEYCACHE_DEBUG)
309 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
310 mysql_mutex_t *mutex);
311 #else
312 #define keycache_pthread_cond_wait(C, M) mysql_cond_wait(C, M)
313 #endif
314
315 #if defined(KEYCACHE_DEBUG)
316 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex);
317 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex);
318 static int keycache_pthread_cond_signal(mysql_cond_t *cond);
319 #else
320 #define keycache_pthread_mutex_lock(M) mysql_mutex_lock(M)
321 #define keycache_pthread_mutex_unlock(M) mysql_mutex_unlock(M)
322 #define keycache_pthread_cond_signal(C) mysql_cond_signal(C)
323 #endif /* defined(KEYCACHE_DEBUG) */
324
325 #if !defined(DBUG_OFF)
326 #if defined(inline)
327 #undef inline
328 #endif
329 #define inline /* disabled inline for easier debugging */
330 static int fail_block(BLOCK_LINK *block);
331 static int fail_hlink(HASH_LINK *hlink);
332 static int cache_empty(KEY_CACHE *keycache);
333 #endif
334
next_power(uint value)335 static inline uint next_power(uint value)
336 {
337 return (uint) my_round_up_to_next_power((uint32) value) << 1;
338 }
339
340
341 /*
342 Initialize a key cache
343
344 SYNOPSIS
345 init_key_cache()
346 keycache pointer to a key cache data structure
347 key_cache_block_size size of blocks to keep cached data
348 use_mem total memory to use for the key cache
349 division_limit division limit (may be zero)
350 age_threshold age threshold (may be zero)
351
352 RETURN VALUE
353 number of blocks in the key cache, if successful,
354 0 - otherwise.
355
356 NOTES.
357 if keycache->key_cache_inited != 0 we assume that the key cache
358 is already initialized. This is for now used by myisamchk, but shouldn't
359 be something that a program should rely on!
360
361 It's assumed that no two threads call this function simultaneously
362 referring to the same key cache handle.
363
364 */
365
init_key_cache(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold)366 int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
367 size_t use_mem, uint division_limit,
368 uint age_threshold)
369 {
370 ulong blocks, hash_links;
371 size_t length;
372 int error;
373 DBUG_ENTER("init_key_cache");
374 DBUG_ASSERT(key_cache_block_size >= 512);
375
376 KEYCACHE_DEBUG_OPEN;
377 if (keycache->key_cache_inited && keycache->disk_blocks > 0)
378 {
379 DBUG_PRINT("warning",("key cache already in use"));
380 DBUG_RETURN(0);
381 }
382
383 keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
384 keycache->global_cache_read= keycache->global_cache_write= 0;
385 keycache->disk_blocks= -1;
386 if (! keycache->key_cache_inited)
387 {
388 keycache->key_cache_inited= 1;
389 /*
390 Initialize these variables once only.
391 Their value must survive re-initialization during resizing.
392 */
393 keycache->in_resize= 0;
394 keycache->resize_in_flush= 0;
395 keycache->cnt_for_resize_op= 0;
396 keycache->waiting_for_resize_cnt.last_thread= NULL;
397 keycache->in_init= 0;
398 mysql_mutex_init(key_KEY_CACHE_cache_lock,
399 &keycache->cache_lock, MY_MUTEX_INIT_FAST);
400 keycache->resize_queue.last_thread= NULL;
401 }
402
403 keycache->key_cache_mem_size= use_mem;
404 keycache->key_cache_block_size= key_cache_block_size;
405 DBUG_PRINT("info", ("key_cache_block_size: %u",
406 key_cache_block_size));
407
408 blocks= (ulong) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
409 sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
410 /* It doesn't make sense to have too few blocks (less than 8) */
411 if (blocks >= 8)
412 {
413 for ( ; ; )
414 {
415 /* Set my_hash_entries to the next bigger 2 power */
416 if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
417 keycache->hash_entries<<= 1;
418 hash_links= 2 * blocks;
419 #if defined(MAX_THREADS)
420 if (hash_links < MAX_THREADS + blocks - 1)
421 hash_links= MAX_THREADS + blocks - 1;
422 #endif
423 while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
424 ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
425 ALIGN_SIZE(sizeof(HASH_LINK*) *
426 keycache->hash_entries))) +
427 ((size_t) blocks * keycache->key_cache_block_size) > use_mem)
428 blocks--;
429 /* Allocate memory for cache page buffers */
430 if ((keycache->block_mem=
431 my_large_malloc((size_t) blocks * keycache->key_cache_block_size,
432 MYF(0))))
433 {
434 /*
435 Allocate memory for blocks, hash_links and hash entries;
436 For each block 2 hash links are allocated
437 */
438 if ((keycache->block_root= (BLOCK_LINK*) my_malloc(length,
439 MYF(0))))
440 break;
441 my_large_free(keycache->block_mem);
442 keycache->block_mem= 0;
443 }
444 if (blocks < 8)
445 {
446 my_errno= ENOMEM;
447 my_error(EE_OUTOFMEMORY, MYF(ME_FATALERROR),
448 blocks * keycache->key_cache_block_size);
449 goto err;
450 }
451 blocks= blocks / 4*3;
452 }
453 keycache->blocks_unused= blocks;
454 keycache->disk_blocks= (int) blocks;
455 keycache->hash_links= hash_links;
456 keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
457 ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
458 keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
459 ALIGN_SIZE((sizeof(HASH_LINK*) *
460 keycache->hash_entries)));
461 bzero((uchar*) keycache->block_root,
462 keycache->disk_blocks * sizeof(BLOCK_LINK));
463 bzero((uchar*) keycache->hash_root,
464 keycache->hash_entries * sizeof(HASH_LINK*));
465 bzero((uchar*) keycache->hash_link_root,
466 keycache->hash_links * sizeof(HASH_LINK));
467 keycache->hash_links_used= 0;
468 keycache->free_hash_list= NULL;
469 keycache->blocks_used= keycache->blocks_changed= 0;
470
471 keycache->global_blocks_changed= 0;
472 keycache->blocks_available=0; /* For debugging */
473
474 /* The LRU chain is empty after initialization */
475 keycache->used_last= NULL;
476 keycache->used_ins= NULL;
477 keycache->free_block_list= NULL;
478 keycache->keycache_time= 0;
479 keycache->warm_blocks= 0;
480 keycache->min_warm_blocks= (division_limit ?
481 blocks * division_limit / 100 + 1 :
482 blocks);
483 keycache->age_threshold= (age_threshold ?
484 blocks * age_threshold / 100 :
485 blocks);
486
487 keycache->can_be_used= 1;
488
489 keycache->waiting_for_hash_link.last_thread= NULL;
490 keycache->waiting_for_block.last_thread= NULL;
491 DBUG_PRINT("exit",
492 ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\
493 hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx",
494 keycache->disk_blocks, (long) keycache->block_root,
495 keycache->hash_entries, (long) keycache->hash_root,
496 keycache->hash_links, (long) keycache->hash_link_root));
497 bzero((uchar*) keycache->changed_blocks,
498 sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
499 bzero((uchar*) keycache->file_blocks,
500 sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
501 }
502 else
503 {
504 /* key_buffer_size is specified too small. Disable the cache. */
505 keycache->can_be_used= 0;
506 }
507
508 keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
509 DBUG_RETURN((int) keycache->disk_blocks);
510
511 err:
512 error= my_errno;
513 keycache->disk_blocks= 0;
514 keycache->blocks= 0;
515 if (keycache->block_mem)
516 {
517 my_large_free((uchar*) keycache->block_mem);
518 keycache->block_mem= NULL;
519 }
520 if (keycache->block_root)
521 {
522 my_free(keycache->block_root);
523 keycache->block_root= NULL;
524 }
525 my_errno= error;
526 keycache->can_be_used= 0;
527 DBUG_RETURN(0);
528 }
529
530
531 /*
532 Resize a key cache
533
534 SYNOPSIS
535 resize_key_cache()
536 keycache pointer to a key cache data structure
537 key_cache_block_size size of blocks to keep cached data
538 use_mem total memory to use for the new key cache
539 division_limit new division limit (if not zero)
540 age_threshold new age threshold (if not zero)
541
542 RETURN VALUE
543 number of blocks in the key cache, if successful,
544 0 - otherwise.
545
546 NOTES.
547 The function first compares the memory size and the block size parameters
548 with the key cache values.
549
550 If they differ the function free the the memory allocated for the
551 old key cache blocks by calling the end_key_cache function and
552 then rebuilds the key cache with new blocks by calling
553 init_key_cache.
554
555 The function starts the operation only when all other threads
556 performing operations with the key cache let her to proceed
557 (when cnt_for_resize=0).
558 */
559
resize_key_cache(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold)560 int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
561 size_t use_mem, uint division_limit,
562 uint age_threshold)
563 {
564 int blocks;
565 DBUG_ENTER("resize_key_cache");
566
567 if (!keycache->key_cache_inited)
568 DBUG_RETURN(keycache->disk_blocks);
569
570 if(key_cache_block_size == keycache->key_cache_block_size &&
571 use_mem == keycache->key_cache_mem_size)
572 {
573 change_key_cache_param(keycache, division_limit, age_threshold);
574 DBUG_RETURN(keycache->disk_blocks);
575 }
576
577 keycache_pthread_mutex_lock(&keycache->cache_lock);
578
579 /*
580 We may need to wait for another thread which is doing a resize
581 already. This cannot happen in the MySQL server though. It allows
582 one resizer only. In set_var.cc keycache->in_init is used to block
583 multiple attempts.
584 */
585 while (keycache->in_resize)
586 {
587 /* purecov: begin inspected */
588 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
589 /* purecov: end */
590 }
591
592 /*
593 Mark the operation in progress. This blocks other threads from doing
594 a resize in parallel. It prohibits new blocks to enter the cache.
595 Read/write requests can bypass the cache during the flush phase.
596 */
597 keycache->in_resize= 1;
598
599 /* Need to flush only if keycache is enabled. */
600 if (keycache->can_be_used)
601 {
602 /* Start the flush phase. */
603 keycache->resize_in_flush= 1;
604
605 if (flush_all_key_blocks(keycache))
606 {
607 /* TODO: if this happens, we should write a warning in the log file ! */
608 keycache->resize_in_flush= 0;
609 blocks= 0;
610 keycache->can_be_used= 0;
611 goto finish;
612 }
613 DBUG_ASSERT(cache_empty(keycache));
614
615 /* End the flush phase. */
616 keycache->resize_in_flush= 0;
617 }
618
619 /*
620 Some direct read/write operations (bypassing the cache) may still be
621 unfinished. Wait until they are done. If the key cache can be used,
622 direct I/O is done in increments of key_cache_block_size. That is,
623 every block is checked if it is in the cache. We need to wait for
624 pending I/O before re-initializing the cache, because we may change
625 the block size. Otherwise they could check for blocks at file
626 positions where the new block division has none. We do also want to
627 wait for I/O done when (if) the cache was disabled. It must not
628 run in parallel with normal cache operation.
629 */
630 while (keycache->cnt_for_resize_op)
631 wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
632
633 /*
634 Free old cache structures, allocate new structures, and initialize
635 them. Note that the cache_lock mutex and the resize_queue are left
636 untouched. We do not lose the cache_lock and will release it only at
637 the end of this function.
638 */
639 end_key_cache(keycache, 0); /* Don't free mutex */
640 /* The following will work even if use_mem is 0 */
641 blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
642 division_limit, age_threshold);
643
644 finish:
645 /*
646 Mark the resize finished. This allows other threads to start a
647 resize or to request new cache blocks.
648 */
649 keycache->in_resize= 0;
650
651 /* Signal waiting threads. */
652 release_whole_queue(&keycache->resize_queue);
653
654 keycache_pthread_mutex_unlock(&keycache->cache_lock);
655 DBUG_RETURN(blocks);
656 }
657
658
659 /*
660 Increment counter blocking resize key cache operation
661 */
inc_counter_for_resize_op(KEY_CACHE * keycache)662 static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
663 {
664 keycache->cnt_for_resize_op++;
665 }
666
667
668 /*
669 Decrement counter blocking resize key cache operation;
670 Signal the operation to proceed when counter becomes equal zero
671 */
dec_counter_for_resize_op(KEY_CACHE * keycache)672 static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
673 {
674 if (!--keycache->cnt_for_resize_op)
675 release_whole_queue(&keycache->waiting_for_resize_cnt);
676 }
677
678 /*
679 Change the key cache parameters
680
681 SYNOPSIS
682 change_key_cache_param()
683 keycache pointer to a key cache data structure
684 division_limit new division limit (if not zero)
685 age_threshold new age threshold (if not zero)
686
687 RETURN VALUE
688 none
689
690 NOTES.
691 Presently the function resets the key cache parameters
692 concerning midpoint insertion strategy - division_limit and
693 age_threshold.
694 */
695
change_key_cache_param(KEY_CACHE * keycache,uint division_limit,uint age_threshold)696 void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
697 uint age_threshold)
698 {
699 DBUG_ENTER("change_key_cache_param");
700
701 keycache_pthread_mutex_lock(&keycache->cache_lock);
702 if (division_limit)
703 keycache->min_warm_blocks= (keycache->disk_blocks *
704 division_limit / 100 + 1);
705 if (age_threshold)
706 keycache->age_threshold= (keycache->disk_blocks *
707 age_threshold / 100);
708 keycache_pthread_mutex_unlock(&keycache->cache_lock);
709 DBUG_VOID_RETURN;
710 }
711
712
713 /*
714 Remove key_cache from memory
715
716 SYNOPSIS
717 end_key_cache()
718 keycache key cache handle
719 cleanup Complete free (Free also mutex for key cache)
720
721 RETURN VALUE
722 none
723 */
724
end_key_cache(KEY_CACHE * keycache,my_bool cleanup)725 void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
726 {
727 DBUG_ENTER("end_key_cache");
728 DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache));
729
730 if (!keycache->key_cache_inited)
731 DBUG_VOID_RETURN;
732
733 if (keycache->disk_blocks > 0)
734 {
735 if (keycache->block_mem)
736 {
737 my_large_free((uchar*) keycache->block_mem);
738 keycache->block_mem= NULL;
739 my_free(keycache->block_root);
740 keycache->block_root= NULL;
741 }
742 keycache->disk_blocks= -1;
743 /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
744 keycache->blocks_changed= 0;
745 }
746
747 DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
748 "writes: %lu r_requests: %lu reads: %lu",
749 keycache->blocks_used, keycache->global_blocks_changed,
750 (ulong) keycache->global_cache_w_requests,
751 (ulong) keycache->global_cache_write,
752 (ulong) keycache->global_cache_r_requests,
753 (ulong) keycache->global_cache_read));
754
755 /*
756 Reset these values to be able to detect a disabled key cache.
757 See Bug#44068 (RESTORE can disable the MyISAM Key Cache).
758 */
759 keycache->blocks_used= 0;
760 keycache->blocks_unused= 0;
761
762 if (cleanup)
763 {
764 mysql_mutex_destroy(&keycache->cache_lock);
765 keycache->key_cache_inited= keycache->can_be_used= 0;
766 KEYCACHE_DEBUG_CLOSE;
767 }
768 DBUG_VOID_RETURN;
769 } /* end_key_cache */
770
771
772 /*
773 Link a thread into double-linked queue of waiting threads.
774
775 SYNOPSIS
776 link_into_queue()
777 wqueue pointer to the queue structure
778 thread pointer to the thread to be added to the queue
779
780 RETURN VALUE
781 none
782
783 NOTES.
784 Queue is represented by a circular list of the thread structures
785 The list is double-linked of the type (**prev,*next), accessed by
786 a pointer to the last element.
787 */
788
link_into_queue(KEYCACHE_WQUEUE * wqueue,struct st_my_thread_var * thread)789 static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
790 struct st_my_thread_var *thread)
791 {
792 struct st_my_thread_var *last;
793
794 DBUG_ASSERT(!thread->next && !thread->prev);
795 if (! (last= wqueue->last_thread))
796 {
797 /* Queue is empty */
798 thread->next= thread;
799 thread->prev= &thread->next;
800 }
801 else
802 {
803 thread->prev= last->next->prev;
804 last->next->prev= &thread->next;
805 thread->next= last->next;
806 last->next= thread;
807 }
808 wqueue->last_thread= thread;
809 }
810
811 /*
812 Unlink a thread from double-linked queue of waiting threads
813
814 SYNOPSIS
815 unlink_from_queue()
816 wqueue pointer to the queue structure
817 thread pointer to the thread to be removed from the queue
818
819 RETURN VALUE
820 none
821
822 NOTES.
823 See NOTES for link_into_queue
824 */
825
unlink_from_queue(KEYCACHE_WQUEUE * wqueue,struct st_my_thread_var * thread)826 static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
827 struct st_my_thread_var *thread)
828 {
829 KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id));
830 DBUG_ASSERT(thread->next && thread->prev);
831 if (thread->next == thread)
832 /* The queue contains only one member */
833 wqueue->last_thread= NULL;
834 else
835 {
836 thread->next->prev= thread->prev;
837 *thread->prev=thread->next;
838 if (wqueue->last_thread == thread)
839 wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
840 thread->prev);
841 }
842 thread->next= NULL;
843 #if !defined(DBUG_OFF)
844 /*
845 This makes it easier to see it's not in a chain during debugging.
846 And some DBUG_ASSERT() rely on it.
847 */
848 thread->prev= NULL;
849 #endif
850 }
851
852
853 /*
854 Add a thread to single-linked queue of waiting threads
855
856 SYNOPSIS
857 wait_on_queue()
858 wqueue Pointer to the queue structure.
859 mutex Cache_lock to acquire after awake.
860
861 RETURN VALUE
862 none
863
864 NOTES.
865 Queue is represented by a circular list of the thread structures
866 The list is single-linked of the type (*next), accessed by a pointer
867 to the last element.
868
869 The function protects against stray signals by verifying that the
870 current thread is unlinked from the queue when awaking. However,
871 since several threads can wait for the same event, it might be
872 necessary for the caller of the function to check again if the
873 condition for awake is indeed matched.
874 */
875
wait_on_queue(KEYCACHE_WQUEUE * wqueue,mysql_mutex_t * mutex)876 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
877 mysql_mutex_t *mutex)
878 {
879 struct st_my_thread_var *last;
880 struct st_my_thread_var *thread= my_thread_var;
881
882 /* Add to queue. */
883 DBUG_ASSERT(!thread->next);
884 DBUG_ASSERT(!thread->prev); /* Not required, but must be true anyway. */
885 if (! (last= wqueue->last_thread))
886 thread->next= thread;
887 else
888 {
889 thread->next= last->next;
890 last->next= thread;
891 }
892 wqueue->last_thread= thread;
893
894 /*
895 Wait until thread is removed from queue by the signalling thread.
896 The loop protects against stray signals.
897 */
898 do
899 {
900 KEYCACHE_DBUG_PRINT("wait", ("suspend thread %ld", thread->id));
901 keycache_pthread_cond_wait(&thread->suspend, mutex);
902 }
903 while (thread->next);
904 }
905
906
907 /*
908 Remove all threads from queue signaling them to proceed
909
910 SYNOPSIS
911 release_whole_queue()
912 wqueue pointer to the queue structure
913
914 RETURN VALUE
915 none
916
917 NOTES.
918 See notes for wait_on_queue().
919 When removed from the queue each thread is signaled via condition
920 variable thread->suspend.
921 */
922
release_whole_queue(KEYCACHE_WQUEUE * wqueue)923 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
924 {
925 struct st_my_thread_var *last;
926 struct st_my_thread_var *next;
927 struct st_my_thread_var *thread;
928
929 /* Queue may be empty. */
930 if (!(last= wqueue->last_thread))
931 return;
932
933 next= last->next;
934 do
935 {
936 thread=next;
937 KEYCACHE_DBUG_PRINT("release_whole_queue: signal",
938 ("thread %ld", thread->id));
939 /* Signal the thread. */
940 keycache_pthread_cond_signal(&thread->suspend);
941 /* Take thread from queue. */
942 next=thread->next;
943 thread->next= NULL;
944 }
945 while (thread != last);
946
947 /* Now queue is definitely empty. */
948 wqueue->last_thread= NULL;
949 }
950
951
952 /*
953 Unlink a block from the chain of dirty/clean blocks
954 */
955
unlink_changed(BLOCK_LINK * block)956 static inline void unlink_changed(BLOCK_LINK *block)
957 {
958 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
959 if (block->next_changed)
960 block->next_changed->prev_changed= block->prev_changed;
961 *block->prev_changed= block->next_changed;
962
963 #if !defined(DBUG_OFF)
964 /*
965 This makes it easier to see it's not in a chain during debugging.
966 And some DBUG_ASSERT() rely on it.
967 */
968 block->next_changed= NULL;
969 block->prev_changed= NULL;
970 #endif
971 }
972
973
974 /*
975 Link a block into the chain of dirty/clean blocks
976 */
977
link_changed(BLOCK_LINK * block,BLOCK_LINK ** phead)978 static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
979 {
980 DBUG_ASSERT(!block->next_changed);
981 DBUG_ASSERT(!block->prev_changed);
982 block->prev_changed= phead;
983 if ((block->next_changed= *phead))
984 (*phead)->prev_changed= &block->next_changed;
985 *phead= block;
986 }
987
988
989 /*
990 Link a block in a chain of clean blocks of a file.
991
992 SYNOPSIS
993 link_to_file_list()
994 keycache Key cache handle
995 block Block to relink
996 file File to be linked to
997 unlink If to unlink first
998
999 DESCRIPTION
1000 Unlink a block from whichever chain it is linked in, if it's
1001 asked for, and link it to the chain of clean blocks of the
1002 specified file.
1003
1004 NOTE
1005 Please do never set/clear BLOCK_CHANGED outside of
1006 link_to_file_list() or link_to_changed_list().
1007 You would risk to damage correct counting of changed blocks
1008 and to find blocks in the wrong hash.
1009
1010 RETURN
1011 void
1012 */
1013
link_to_file_list(KEY_CACHE * keycache,BLOCK_LINK * block,int file,my_bool unlink_block)1014 static void link_to_file_list(KEY_CACHE *keycache,
1015 BLOCK_LINK *block, int file,
1016 my_bool unlink_block)
1017 {
1018 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1019 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1020 DBUG_ASSERT(block->hash_link->file == file);
1021 if (unlink_block)
1022 unlink_changed(block);
1023 link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
1024 if (block->status & BLOCK_CHANGED)
1025 {
1026 block->status&= ~BLOCK_CHANGED;
1027 keycache->blocks_changed--;
1028 keycache->global_blocks_changed--;
1029 }
1030 }
1031
1032
1033 /*
1034 Re-link a block from the clean chain to the dirty chain of a file.
1035
1036 SYNOPSIS
1037 link_to_changed_list()
1038 keycache key cache handle
1039 block block to relink
1040
1041 DESCRIPTION
1042 Unlink a block from the chain of clean blocks of a file
1043 and link it to the chain of dirty blocks of the same file.
1044
1045 NOTE
1046 Please do never set/clear BLOCK_CHANGED outside of
1047 link_to_file_list() or link_to_changed_list().
1048 You would risk to damage correct counting of changed blocks
1049 and to find blocks in the wrong hash.
1050
1051 RETURN
1052 void
1053 */
1054
link_to_changed_list(KEY_CACHE * keycache,BLOCK_LINK * block)1055 static void link_to_changed_list(KEY_CACHE *keycache,
1056 BLOCK_LINK *block)
1057 {
1058 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1059 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
1060 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1061
1062 unlink_changed(block);
1063 link_changed(block,
1064 &keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
1065 block->status|=BLOCK_CHANGED;
1066 keycache->blocks_changed++;
1067 keycache->global_blocks_changed++;
1068 }
1069
1070
1071 /*
1072 Link a block to the LRU chain at the beginning or at the end of
1073 one of two parts.
1074
1075 SYNOPSIS
1076 link_block()
1077 keycache pointer to a key cache data structure
1078 block pointer to the block to link to the LRU chain
1079 hot <-> to link the block into the hot subchain
1080 at_end <-> to link the block at the end of the subchain
1081
1082 RETURN VALUE
1083 none
1084
1085 NOTES.
1086 The LRU ring is represented by a circular list of block structures.
1087 The list is double-linked of the type (**prev,*next) type.
1088 The LRU ring is divided into two parts - hot and warm.
1089 There are two pointers to access the last blocks of these two
1090 parts. The beginning of the warm part follows right after the
1091 end of the hot part.
1092 Only blocks of the warm part can be used for eviction.
1093 The first block from the beginning of this subchain is always
1094 taken for eviction (keycache->last_used->next)
1095
1096 LRU chain: +------+ H O T +------+
1097 +----| end |----...<----| beg |----+
1098 | +------+last +------+ |
1099 v<-link in latest hot (new end) |
1100 | link in latest warm (new end)->^
1101 | +------+ W A R M +------+ |
1102 +----| beg |---->...----| end |----+
1103 +------+ +------+ins
1104 first for eviction
1105
1106 It is also possible that the block is selected for eviction and thus
1107 not linked in the LRU ring.
1108 */
1109
link_block(KEY_CACHE * keycache,BLOCK_LINK * block,my_bool hot,my_bool at_end)1110 static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot,
1111 my_bool at_end)
1112 {
1113 BLOCK_LINK *ins;
1114 BLOCK_LINK **pins;
1115
1116 DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1117 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1118 DBUG_ASSERT(!block->requests);
1119 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1120 DBUG_ASSERT(!block->next_used);
1121 DBUG_ASSERT(!block->prev_used);
1122
1123 if (!hot && keycache->waiting_for_block.last_thread)
1124 {
1125 /* Signal that in the LRU warm sub-chain an available block has appeared */
1126 struct st_my_thread_var *last_thread=
1127 keycache->waiting_for_block.last_thread;
1128 struct st_my_thread_var *first_thread= last_thread->next;
1129 struct st_my_thread_var *next_thread= first_thread;
1130 HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
1131 struct st_my_thread_var *thread;
1132 do
1133 {
1134 thread= next_thread;
1135 next_thread= thread->next;
1136 /*
1137 We notify about the event all threads that ask
1138 for the same page as the first thread in the queue
1139 */
1140 if ((HASH_LINK *) thread->opt_info == hash_link)
1141 {
1142 KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id));
1143 keycache_pthread_cond_signal(&thread->suspend);
1144 unlink_from_queue(&keycache->waiting_for_block, thread);
1145 block->requests++;
1146 }
1147 }
1148 while (thread != last_thread);
1149 hash_link->block= block;
1150 /*
1151 NOTE: We assigned the block to the hash_link and signalled the
1152 requesting thread(s). But it is possible that other threads runs
1153 first. These threads see the hash_link assigned to a block which
1154 is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
1155 This can be a problem for functions that do not select the block
1156 via its hash_link: flush and free. They do only see a block which
1157 is in a "normal" state and don't know that it will be evicted soon.
1158
1159 We cannot set BLOCK_IN_SWITCH here because only one of the
1160 requesting threads must handle the eviction. All others must wait
1161 for it to complete. If we set the flag here, the threads would not
1162 know who is in charge of the eviction. Without the flag, the first
1163 thread takes the stick and sets the flag.
1164
1165 But we need to note in the block that is has been selected for
1166 eviction. It must not be freed. The evicting thread will not
1167 expect the block in the free list. Before freeing we could also
1168 check if block->requests > 1. But I think including another flag
1169 in the check of block->status is slightly more efficient and
1170 probably easier to read.
1171 */
1172 block->status|= BLOCK_IN_EVICTION;
1173 KEYCACHE_THREAD_TRACE("link_block: after signaling");
1174 #if defined(KEYCACHE_DEBUG)
1175 KEYCACHE_DBUG_PRINT("link_block",
1176 ("linked,unlinked block %u status=%x #requests=%u #available=%u",
1177 BLOCK_NUMBER(block), block->status,
1178 block->requests, keycache->blocks_available));
1179 #endif
1180 return;
1181 }
1182
1183 pins= hot ? &keycache->used_ins : &keycache->used_last;
1184 ins= *pins;
1185 if (ins)
1186 {
1187 ins->next_used->prev_used= &block->next_used;
1188 block->next_used= ins->next_used;
1189 block->prev_used= &ins->next_used;
1190 ins->next_used= block;
1191 if (at_end)
1192 *pins= block;
1193 }
1194 else
1195 {
1196 /* The LRU ring is empty. Let the block point to itself. */
1197 keycache->used_last= keycache->used_ins= block->next_used= block;
1198 block->prev_used= &block->next_used;
1199 }
1200 KEYCACHE_THREAD_TRACE("link_block");
1201 #if defined(KEYCACHE_DEBUG)
1202 keycache->blocks_available++;
1203 KEYCACHE_DBUG_PRINT("link_block",
1204 ("linked block %u:%1u status=%x #requests=%u #available=%u",
1205 BLOCK_NUMBER(block), at_end, block->status,
1206 block->requests, keycache->blocks_available));
1207 KEYCACHE_DBUG_ASSERT((ulong) keycache->blocks_available <=
1208 keycache->blocks_used);
1209 #endif
1210 }
1211
1212
1213 /*
1214 Unlink a block from the LRU chain
1215
1216 SYNOPSIS
1217 unlink_block()
1218 keycache pointer to a key cache data structure
1219 block pointer to the block to unlink from the LRU chain
1220
1221 RETURN VALUE
1222 none
1223
1224 NOTES.
1225 See NOTES for link_block
1226 */
1227
unlink_block(KEY_CACHE * keycache,BLOCK_LINK * block)1228 static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1229 {
1230 DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1231 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1232 DBUG_ASSERT(!block->requests);
1233 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1234 DBUG_ASSERT(block->next_used && block->prev_used &&
1235 (block->next_used->prev_used == &block->next_used) &&
1236 (*block->prev_used == block));
1237 if (block->next_used == block)
1238 /* The list contains only one member */
1239 keycache->used_last= keycache->used_ins= NULL;
1240 else
1241 {
1242 block->next_used->prev_used= block->prev_used;
1243 *block->prev_used= block->next_used;
1244 if (keycache->used_last == block)
1245 keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1246 if (keycache->used_ins == block)
1247 keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1248 }
1249 block->next_used= NULL;
1250 #if !defined(DBUG_OFF)
1251 /*
1252 This makes it easier to see it's not in a chain during debugging.
1253 And some DBUG_ASSERT() rely on it.
1254 */
1255 block->prev_used= NULL;
1256 #endif
1257
1258 KEYCACHE_THREAD_TRACE("unlink_block");
1259 #if defined(KEYCACHE_DEBUG)
1260 KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
1261 keycache->blocks_available--;
1262 KEYCACHE_DBUG_PRINT("unlink_block",
1263 ("unlinked block %u status=%x #requests=%u #available=%u",
1264 BLOCK_NUMBER(block), block->status,
1265 block->requests, keycache->blocks_available));
1266 #endif
1267 }
1268
1269
1270 /*
1271 Register requests for a block.
1272
1273 SYNOPSIS
1274 reg_requests()
1275 keycache Pointer to a key cache data structure.
1276 block Pointer to the block to register a request on.
1277 count Number of requests. Always 1.
1278
1279 NOTE
1280 The first request unlinks the block from the LRU ring. This means
1281 that it is protected against eveiction.
1282
1283 RETURN
1284 void
1285 */
reg_requests(KEY_CACHE * keycache,BLOCK_LINK * block,int count)1286 static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1287 {
1288 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1289 DBUG_ASSERT(block->hash_link);
1290
1291 if (!block->requests)
1292 unlink_block(keycache, block);
1293 block->requests+=count;
1294 }
1295
1296
1297 /*
1298 Unregister request for a block
1299 linking it to the LRU chain if it's the last request
1300
1301 SYNOPSIS
1302 unreg_request()
1303 keycache pointer to a key cache data structure
1304 block pointer to the block to link to the LRU chain
1305 at_end <-> to link the block at the end of the LRU chain
1306
1307 RETURN VALUE
1308 none
1309
1310 NOTES.
1311 Every linking to the LRU ring decrements by one a special block
1312 counter (if it's positive). If the at_end parameter is TRUE the block is
1313 added either at the end of warm sub-chain or at the end of hot sub-chain.
1314 It is added to the hot subchain if its counter is zero and number of
1315 blocks in warm sub-chain is not less than some low limit (determined by
1316 the division_limit parameter). Otherwise the block is added to the warm
1317 sub-chain. If the at_end parameter is FALSE the block is always added
1318 at beginning of the warm sub-chain.
1319 Thus a warm block can be promoted to the hot sub-chain when its counter
1320 becomes zero for the first time.
1321 At the same time the block at the very beginning of the hot subchain
1322 might be moved to the beginning of the warm subchain if it stays untouched
1323 for a too long time (this time is determined by parameter age_threshold).
1324
1325 It is also possible that the block is selected for eviction and thus
1326 not linked in the LRU ring.
1327 */
1328
unreg_request(KEY_CACHE * keycache,BLOCK_LINK * block,int at_end)1329 static void unreg_request(KEY_CACHE *keycache,
1330 BLOCK_LINK *block, int at_end)
1331 {
1332 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1333 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1334 DBUG_ASSERT(block->requests);
1335 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1336 DBUG_ASSERT(!block->next_used);
1337 DBUG_ASSERT(!block->prev_used);
1338 /*
1339 Unregister the request, but do not link erroneous blocks into the
1340 LRU ring.
1341 */
1342 if (!--block->requests && !(block->status & BLOCK_ERROR))
1343 {
1344 my_bool hot;
1345 if (block->hits_left)
1346 block->hits_left--;
1347 hot= !block->hits_left && at_end &&
1348 keycache->warm_blocks > keycache->min_warm_blocks;
1349 if (hot)
1350 {
1351 if (block->temperature == BLOCK_WARM)
1352 keycache->warm_blocks--;
1353 block->temperature= BLOCK_HOT;
1354 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1355 keycache->warm_blocks));
1356 }
1357 link_block(keycache, block, hot, (my_bool)at_end);
1358 block->last_hit_time= keycache->keycache_time;
1359 keycache->keycache_time++;
1360 /*
1361 At this place, the block might be in the LRU ring or not. If an
1362 evicter was waiting for a block, it was selected for eviction and
1363 not linked in the LRU ring.
1364 */
1365
1366 /*
1367 Check if we should link a hot block to the warm block sub-chain.
1368 It is possible that we select the same block as above. But it can
1369 also be another block. In any case a block from the LRU ring is
1370 selected. In other words it works even if the above block was
1371 selected for eviction and not linked in the LRU ring. Since this
1372 happens only if the LRU ring is empty, the block selected below
1373 would be NULL and the rest of the function skipped.
1374 */
1375 block= keycache->used_ins;
1376 if (block && keycache->keycache_time - block->last_hit_time >
1377 keycache->age_threshold)
1378 {
1379 unlink_block(keycache, block);
1380 link_block(keycache, block, 0, 0);
1381 if (block->temperature != BLOCK_WARM)
1382 {
1383 keycache->warm_blocks++;
1384 block->temperature= BLOCK_WARM;
1385 }
1386 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1387 keycache->warm_blocks));
1388 }
1389 }
1390 }
1391
1392 /*
1393 Remove a reader of the page in block
1394 */
1395
remove_reader(BLOCK_LINK * block)1396 static void remove_reader(BLOCK_LINK *block)
1397 {
1398 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1399 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1400 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1401 DBUG_ASSERT(!block->next_used);
1402 DBUG_ASSERT(!block->prev_used);
1403 DBUG_ASSERT(block->hash_link->requests);
1404
1405 if (! --block->hash_link->requests && block->condvar)
1406 keycache_pthread_cond_signal(block->condvar);
1407 }
1408
1409
1410 /*
1411 Wait until the last reader of the page in block
1412 signals on its termination
1413 */
1414
wait_for_readers(KEY_CACHE * keycache,BLOCK_LINK * block)1415 static void wait_for_readers(KEY_CACHE *keycache,
1416 BLOCK_LINK *block)
1417 {
1418 struct st_my_thread_var *thread= my_thread_var;
1419 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1420 DBUG_ASSERT(!(block->status & (BLOCK_IN_FLUSH | BLOCK_CHANGED)));
1421 DBUG_ASSERT(block->hash_link);
1422 DBUG_ASSERT(block->hash_link->block == block);
1423 /* Linked in file_blocks or changed_blocks hash. */
1424 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1425 /* Not linked in LRU ring. */
1426 DBUG_ASSERT(!block->next_used);
1427 DBUG_ASSERT(!block->prev_used);
1428 while (block->hash_link->requests)
1429 {
1430 KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
1431 ("suspend thread %ld block %u",
1432 thread->id, BLOCK_NUMBER(block)));
1433 /* There must be no other waiter. We have no queue here. */
1434 DBUG_ASSERT(!block->condvar);
1435 block->condvar= &thread->suspend;
1436 keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1437 block->condvar= NULL;
1438 }
1439 }
1440
1441
1442 /*
1443 Add a hash link to a bucket in the hash_table
1444 */
1445
link_hash(HASH_LINK ** start,HASH_LINK * hash_link)1446 static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
1447 {
1448 if (*start)
1449 (*start)->prev= &hash_link->next;
1450 hash_link->next= *start;
1451 hash_link->prev= start;
1452 *start= hash_link;
1453 }
1454
1455
1456 /*
1457 Remove a hash link from the hash table
1458 */
1459
unlink_hash(KEY_CACHE * keycache,HASH_LINK * hash_link)1460 static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1461 {
1462 KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u",
1463 (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests));
1464 KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
1465 if ((*hash_link->prev= hash_link->next))
1466 hash_link->next->prev= hash_link->prev;
1467 hash_link->block= NULL;
1468
1469 if (keycache->waiting_for_hash_link.last_thread)
1470 {
1471 /* Signal that a free hash link has appeared */
1472 struct st_my_thread_var *last_thread=
1473 keycache->waiting_for_hash_link.last_thread;
1474 struct st_my_thread_var *first_thread= last_thread->next;
1475 struct st_my_thread_var *next_thread= first_thread;
1476 KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1477 struct st_my_thread_var *thread;
1478
1479 hash_link->file= first_page->file;
1480 hash_link->diskpos= first_page->filepos;
1481 do
1482 {
1483 KEYCACHE_PAGE *page;
1484 thread= next_thread;
1485 page= (KEYCACHE_PAGE *) thread->opt_info;
1486 next_thread= thread->next;
1487 /*
1488 We notify about the event all threads that ask
1489 for the same page as the first thread in the queue
1490 */
1491 if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1492 {
1493 KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
1494 keycache_pthread_cond_signal(&thread->suspend);
1495 unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1496 }
1497 }
1498 while (thread != last_thread);
1499 link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1500 hash_link->diskpos)],
1501 hash_link);
1502 return;
1503 }
1504 hash_link->next= keycache->free_hash_list;
1505 keycache->free_hash_list= hash_link;
1506 }
1507
1508
1509 /*
1510 Get the hash link for a page
1511 */
1512
get_hash_link(KEY_CACHE * keycache,int file,my_off_t filepos)1513 static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1514 int file, my_off_t filepos)
1515 {
1516 reg1 HASH_LINK *hash_link, **start;
1517 #if defined(KEYCACHE_DEBUG)
1518 int cnt;
1519 #endif
1520
1521 KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1522 (uint) file,(ulong) filepos));
1523
1524 restart:
1525 /*
1526 Find the bucket in the hash table for the pair (file, filepos);
1527 start contains the head of the bucket list,
1528 hash_link points to the first member of the list
1529 */
1530 hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1531 #if defined(KEYCACHE_DEBUG)
1532 cnt= 0;
1533 #endif
1534 /* Look for an element for the pair (file, filepos) in the bucket chain */
1535 while (hash_link &&
1536 (hash_link->diskpos != filepos || hash_link->file != file))
1537 {
1538 hash_link= hash_link->next;
1539 #if defined(KEYCACHE_DEBUG)
1540 cnt++;
1541 if (! (cnt <= keycache->hash_links_used))
1542 {
1543 int i;
1544 for (i=0, hash_link= *start ;
1545 i < cnt ; i++, hash_link= hash_link->next)
1546 {
1547 KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1548 (uint) hash_link->file,(ulong) hash_link->diskpos));
1549 }
1550 }
1551 KEYCACHE_DBUG_ASSERT(cnt <= keycache->hash_links_used);
1552 #endif
1553 }
1554 if (! hash_link)
1555 {
1556 /* There is no hash link in the hash table for the pair (file, filepos) */
1557 if (keycache->free_hash_list)
1558 {
1559 hash_link= keycache->free_hash_list;
1560 keycache->free_hash_list= hash_link->next;
1561 }
1562 else if (keycache->hash_links_used < keycache->hash_links)
1563 {
1564 hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1565 }
1566 else
1567 {
1568 /* Wait for a free hash link */
1569 struct st_my_thread_var *thread= my_thread_var;
1570 KEYCACHE_PAGE page;
1571 KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
1572 page.file= file;
1573 page.filepos= filepos;
1574 thread->opt_info= (void *) &page;
1575 link_into_queue(&keycache->waiting_for_hash_link, thread);
1576 KEYCACHE_DBUG_PRINT("get_hash_link: wait",
1577 ("suspend thread %ld", thread->id));
1578 keycache_pthread_cond_wait(&thread->suspend,
1579 &keycache->cache_lock);
1580 thread->opt_info= NULL;
1581 goto restart;
1582 }
1583 hash_link->file= file;
1584 hash_link->diskpos= filepos;
1585 link_hash(start, hash_link);
1586 }
1587 /* Register the request for the page */
1588 hash_link->requests++;
1589
1590 return hash_link;
1591 }
1592
1593
1594 /*
1595 Get a block for the file page requested by a keycache read/write operation;
1596 If the page is not in the cache return a free block, if there is none
1597 return the lru block after saving its buffer if the page is dirty.
1598
1599 SYNOPSIS
1600
1601 find_key_block()
1602 keycache pointer to a key cache data structure
1603 file handler for the file to read page from
1604 filepos position of the page in the file
1605 init_hits_left how initialize the block counter for the page
1606 wrmode <-> get for writing
1607 page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1608
1609 RETURN VALUE
1610 Pointer to the found block if successful, 0 - otherwise
1611
1612 NOTES.
1613 For the page from file positioned at filepos the function checks whether
1614 the page is in the key cache specified by the first parameter.
1615 If this is the case it immediately returns the block.
1616 If not, the function first chooses a block for this page. If there is
1617 no not used blocks in the key cache yet, the function takes the block
1618 at the very beginning of the warm sub-chain. It saves the page in that
1619 block if it's dirty before returning the pointer to it.
1620 The function returns in the page_st parameter the following values:
1621 PAGE_READ - if page already in the block,
1622 PAGE_TO_BE_READ - if it is to be read yet by the current thread
1623 WAIT_TO_BE_READ - if it is to be read by another thread
1624 If an error occurs THE BLOCK_ERROR bit is set in the block status.
1625 It might happen that there are no blocks in LRU chain (in warm part) -
1626 all blocks are unlinked for some read/write operations. Then the function
1627 waits until first of this operations links any block back.
1628 */
1629
find_key_block(KEY_CACHE * keycache,File file,my_off_t filepos,int init_hits_left,int wrmode,int * page_st)1630 static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1631 File file, my_off_t filepos,
1632 int init_hits_left,
1633 int wrmode, int *page_st)
1634 {
1635 HASH_LINK *hash_link;
1636 BLOCK_LINK *block;
1637 int error= 0;
1638 int page_status;
1639
1640 DBUG_ENTER("find_key_block");
1641 KEYCACHE_THREAD_TRACE("find_key_block:begin");
1642 DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
1643 file, (ulong) filepos, wrmode));
1644 KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d pos: %lu wrmode: %d",
1645 file, (ulong) filepos,
1646 wrmode));
1647 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
1648 DBUG_EXECUTE("check_keycache2",
1649 test_key_cache(keycache, "start of find_key_block", 0););
1650 #endif
1651
1652 restart:
1653 /*
1654 If the flush phase of a resize operation fails, the cache is left
1655 unusable. This will be detected only after "goto restart".
1656 */
1657 if (!keycache->can_be_used)
1658 DBUG_RETURN(0);
1659
1660 /*
1661 Find the hash_link for the requested file block (file, filepos). We
1662 do always get a hash_link here. It has registered our request so
1663 that no other thread can use it for another file block until we
1664 release the request (which is done by remove_reader() usually). The
1665 hash_link can have a block assigned to it or not. If there is a
1666 block, it may be assigned to this hash_link or not. In cases where a
1667 block is evicted from the cache, it is taken from the LRU ring and
1668 referenced by the new hash_link. But the block can still be assigned
1669 to its old hash_link for some time if it needs to be flushed first,
1670 or if there are other threads still reading it.
1671
1672 Summary:
1673 hash_link is always returned.
1674 hash_link->block can be:
1675 - NULL or
1676 - not assigned to this hash_link or
1677 - assigned to this hash_link. If assigned, the block can have
1678 - invalid data (when freshly assigned) or
1679 - valid data. Valid data can be
1680 - changed over the file contents (dirty) or
1681 - not changed (clean).
1682 */
1683 hash_link= get_hash_link(keycache, file, filepos);
1684 DBUG_ASSERT((hash_link->file == file) && (hash_link->diskpos == filepos));
1685
1686 page_status= -1;
1687 if ((block= hash_link->block) &&
1688 block->hash_link == hash_link && (block->status & BLOCK_READ))
1689 {
1690 /* Assigned block with valid (changed or unchanged) contents. */
1691 page_status= PAGE_READ;
1692 }
1693 /*
1694 else (page_status == -1)
1695 - block == NULL or
1696 - block not assigned to this hash_link or
1697 - block assigned but not yet read from file (invalid data).
1698 */
1699
1700 if (keycache->in_resize)
1701 {
1702 /* This is a request during a resize operation */
1703
1704 if (!block)
1705 {
1706 struct st_my_thread_var *thread;
1707
1708 /*
1709 The file block is not in the cache. We don't need it in the
1710 cache: we are going to read or write directly to file. Cancel
1711 the request. We can simply decrement hash_link->requests because
1712 we did not release cache_lock since increasing it. So no other
1713 thread can wait for our request to become released.
1714 */
1715 if (hash_link->requests == 1)
1716 {
1717 /*
1718 We are the only one to request this hash_link (this file/pos).
1719 Free the hash_link.
1720 */
1721 hash_link->requests--;
1722 unlink_hash(keycache, hash_link);
1723 DBUG_RETURN(0);
1724 }
1725
1726 /*
1727 More requests on the hash_link. Someone tries to evict a block
1728 for this hash_link (could have started before resizing started).
1729 This means that the LRU ring is empty. Otherwise a block could
1730 be assigned immediately. Behave like a thread that wants to
1731 evict a block for this file/pos. Add to the queue of threads
1732 waiting for a block. Wait until there is one assigned.
1733
1734 Refresh the request on the hash-link so that it cannot be reused
1735 for another file/pos.
1736 */
1737 thread= my_thread_var;
1738 thread->opt_info= (void *) hash_link;
1739 link_into_queue(&keycache->waiting_for_block, thread);
1740 do
1741 {
1742 KEYCACHE_DBUG_PRINT("find_key_block: wait",
1743 ("suspend thread %ld", thread->id));
1744 keycache_pthread_cond_wait(&thread->suspend,
1745 &keycache->cache_lock);
1746 } while (thread->next);
1747 thread->opt_info= NULL;
1748 /*
1749 A block should now be assigned to the hash_link. But it may
1750 still need to be evicted. Anyway, we should re-check the
1751 situation. page_status must be set correctly.
1752 */
1753 hash_link->requests--;
1754 goto restart;
1755 } /* end of if (!block) */
1756
1757 /*
1758 There is a block for this file/pos in the cache. Register a
1759 request on it. This unlinks it from the LRU ring (if it is there)
1760 and hence protects it against eviction (if not already in
1761 eviction). We need this for returning the block to the caller, for
1762 calling remove_reader() (for debugging purposes), and for calling
1763 free_block(). The only case where we don't need the request is if
1764 the block is in eviction. In that case we have to unregister the
1765 request later.
1766 */
1767 reg_requests(keycache, block, 1);
1768
1769 if (page_status != PAGE_READ)
1770 {
1771 /*
1772 - block not assigned to this hash_link or
1773 - block assigned but not yet read from file (invalid data).
1774
1775 This must be a block in eviction. It will be read soon. We need
1776 to wait here until this happened. Otherwise the caller could
1777 access a wrong block or a block which is in read. While waiting
1778 we cannot lose hash_link nor block. We have registered a request
1779 on the hash_link. Everything can happen to the block but changes
1780 in the hash_link -> block relationship. In other words:
1781 everything can happen to the block but free or another completed
1782 eviction.
1783
1784 Note that we bahave like a secondary requestor here. We just
1785 cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1786 read requests and writes on dirty blocks that are not in flush
1787 only. Waiting here on COND_FOR_REQUESTED works in all
1788 situations.
1789 */
1790 DBUG_ASSERT(((block->hash_link != hash_link) &&
1791 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1792 ((block->hash_link == hash_link) &&
1793 !(block->status & BLOCK_READ)));
1794 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
1795 /*
1796 Here we can trust that the block has been assigned to this
1797 hash_link (block->hash_link == hash_link) and read into the
1798 buffer (BLOCK_READ). The worst things possible here are that the
1799 block is in free (BLOCK_REASSIGNED). But the block is still
1800 assigned to the hash_link. The freeing thread waits until we
1801 release our request on the hash_link. The block must not be
1802 again in eviction because we registered an request on it before
1803 starting to wait.
1804 */
1805 DBUG_ASSERT(block->hash_link == hash_link);
1806 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1807 DBUG_ASSERT(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1808 }
1809 /*
1810 The block is in the cache. Assigned to the hash_link. Valid data.
1811 Note that in case of page_st == PAGE_READ, the block can be marked
1812 for eviction. In any case it can be marked for freeing.
1813 */
1814
1815 if (!wrmode)
1816 {
1817 /* A reader can just read the block. */
1818 *page_st= PAGE_READ;
1819 DBUG_ASSERT((hash_link->file == file) &&
1820 (hash_link->diskpos == filepos) &&
1821 (block->hash_link == hash_link));
1822 DBUG_RETURN(block);
1823 }
1824
1825 /*
1826 This is a writer. No two writers for the same block can exist.
1827 This must be assured by locks outside of the key cache.
1828 */
1829 DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1830
1831 while (block->status & BLOCK_IN_FLUSH)
1832 {
1833 /*
1834 Wait until the block is flushed to file. Do not release the
1835 request on the hash_link yet to prevent that the block is freed
1836 or reassigned while we wait. While we wait, several things can
1837 happen to the block, including another flush. But the block
1838 cannot be reassigned to another hash_link until we release our
1839 request on it. But it can be marked BLOCK_REASSIGNED from free
1840 or eviction, while they wait for us to release the hash_link.
1841 */
1842 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1843 /*
1844 If the flush phase failed, the resize could have finished while
1845 we waited here.
1846 */
1847 if (!keycache->in_resize)
1848 {
1849 remove_reader(block);
1850 unreg_request(keycache, block, 1);
1851 goto restart;
1852 }
1853 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1854 DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1855 DBUG_ASSERT(block->hash_link == hash_link);
1856 }
1857
1858 if (block->status & BLOCK_CHANGED)
1859 {
1860 /*
1861 We want to write a block with changed contents. If the cache
1862 block size is bigger than the callers block size (e.g. MyISAM),
1863 the caller may replace part of the block only. Changes of the
1864 other part of the block must be preserved. Since the block has
1865 not yet been selected for flush, we can still add our changes.
1866 */
1867 *page_st= PAGE_READ;
1868 DBUG_ASSERT((hash_link->file == file) &&
1869 (hash_link->diskpos == filepos) &&
1870 (block->hash_link == hash_link));
1871 DBUG_RETURN(block);
1872 }
1873
1874 /*
1875 This is a write request for a clean block. We do not want to have
1876 new dirty blocks in the cache while resizing. We will free the
1877 block and write directly to file. If the block is in eviction or
1878 in free, we just let it go.
1879
1880 Unregister from the hash_link. This must be done before freeing
1881 the block. And it must be done if not freeing the block. Because
1882 we could have waited above, we need to call remove_reader(). Other
1883 threads could wait for us to release our request on the hash_link.
1884 */
1885 remove_reader(block);
1886
1887 /* If the block is not in eviction and not in free, we can free it. */
1888 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1889 BLOCK_REASSIGNED)))
1890 {
1891 /*
1892 Free block as we are going to write directly to file.
1893 Although we have an exlusive lock for the updated key part,
1894 the control can be yielded by the current thread as we might
1895 have unfinished readers of other key parts in the block
1896 buffer. Still we are guaranteed not to have any readers
1897 of the key part we are writing into until the block is
1898 removed from the cache as we set the BLOCK_REASSIGNED
1899 flag (see the code below that handles reading requests).
1900 */
1901 free_block(keycache, block);
1902 }
1903 else
1904 {
1905 /*
1906 The block will be evicted/freed soon. Don't touch it in any way.
1907 Unregister the request that we registered above.
1908 */
1909 unreg_request(keycache, block, 1);
1910
1911 /*
1912 The block is still assigned to the hash_link (the file/pos that
1913 we are going to write to). Wait until the eviction/free is
1914 complete. Otherwise the direct write could complete before all
1915 readers are done with the block. So they could read outdated
1916 data.
1917
1918 Since we released our request on the hash_link, it can be reused
1919 for another file/pos. Hence we cannot just check for
1920 block->hash_link == hash_link. As long as the resize is
1921 proceeding the block cannot be reassigned to the same file/pos
1922 again. So we can terminate the loop when the block is no longer
1923 assigned to this file/pos.
1924 */
1925 do
1926 {
1927 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1928 &keycache->cache_lock);
1929 /*
1930 If the flush phase failed, the resize could have finished
1931 while we waited here.
1932 */
1933 if (!keycache->in_resize)
1934 goto restart;
1935 } while (block->hash_link &&
1936 (block->hash_link->file == file) &&
1937 (block->hash_link->diskpos == filepos));
1938 }
1939 DBUG_RETURN(0);
1940 }
1941
1942 if (page_status == PAGE_READ &&
1943 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1944 BLOCK_REASSIGNED)))
1945 {
1946 /*
1947 This is a request for a block to be removed from cache. The block
1948 is assigned to this hash_link and contains valid data, but is
1949 marked for eviction or to be freed. Possible reasons why it has
1950 not yet been evicted/freed can be a flush before reassignment
1951 (BLOCK_IN_SWITCH), readers of the block have not finished yet
1952 (BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1953 the block has been selected for it (BLOCK_IN_EVICTION).
1954 */
1955
1956 KEYCACHE_DBUG_PRINT("find_key_block",
1957 ("request for old page in block %u "
1958 "wrmode: %d block->status: %d",
1959 BLOCK_NUMBER(block), wrmode, block->status));
1960 /*
1961 Only reading requests can proceed until the old dirty page is flushed,
1962 all others are to be suspended, then resubmitted
1963 */
1964 if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1965 {
1966 /*
1967 This is a read request and the block not yet reassigned. We can
1968 register our request and proceed. This unlinks the block from
1969 the LRU ring and protects it against eviction.
1970 */
1971 reg_requests(keycache, block, 1);
1972 }
1973 else
1974 {
1975 /*
1976 Either this is a write request for a block that is in eviction
1977 or in free. We must not use it any more. Instead we must evict
1978 another block. But we cannot do this before the eviction/free is
1979 done. Otherwise we would find the same hash_link + block again
1980 and again.
1981
1982 Or this is a read request for a block in eviction/free that does
1983 not require a flush, but waits for readers to finish with the
1984 block. We do not read this block to let the eviction/free happen
1985 as soon as possible. Again we must wait so that we don't find
1986 the same hash_link + block again and again.
1987 */
1988 DBUG_ASSERT(hash_link->requests);
1989 hash_link->requests--;
1990 KEYCACHE_DBUG_PRINT("find_key_block",
1991 ("request waiting for old page to be saved"));
1992 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1993 KEYCACHE_DBUG_PRINT("find_key_block",
1994 ("request for old page resubmitted"));
1995 /*
1996 The block is no longer assigned to this hash_link.
1997 Get another one.
1998 */
1999 goto restart;
2000 }
2001 }
2002 else
2003 {
2004 /*
2005 This is a request for a new block or for a block not to be removed.
2006 Either
2007 - block == NULL or
2008 - block not assigned to this hash_link or
2009 - block assigned but not yet read from file,
2010 or
2011 - block assigned with valid (changed or unchanged) data and
2012 - it will not be reassigned/freed.
2013 */
2014 if (! block)
2015 {
2016 /* No block is assigned to the hash_link yet. */
2017 if (keycache->blocks_unused)
2018 {
2019 if (keycache->free_block_list)
2020 {
2021 /* There is a block in the free list. */
2022 block= keycache->free_block_list;
2023 keycache->free_block_list= block->next_used;
2024 block->next_used= NULL;
2025 }
2026 else
2027 {
2028 size_t block_mem_offset;
2029 /* There are some never used blocks, take first of them */
2030 DBUG_ASSERT(keycache->blocks_used <
2031 (ulong) keycache->disk_blocks);
2032 block= &keycache->block_root[keycache->blocks_used];
2033 block_mem_offset=
2034 ((size_t) keycache->blocks_used) * keycache->key_cache_block_size;
2035 block->buffer= ADD_TO_PTR(keycache->block_mem,
2036 block_mem_offset,
2037 uchar*);
2038 keycache->blocks_used++;
2039 DBUG_ASSERT(!block->next_used);
2040 }
2041 DBUG_ASSERT(!block->prev_used);
2042 DBUG_ASSERT(!block->next_changed);
2043 DBUG_ASSERT(!block->prev_changed);
2044 DBUG_ASSERT(!block->hash_link);
2045 DBUG_ASSERT(!block->status);
2046 DBUG_ASSERT(!block->requests);
2047 keycache->blocks_unused--;
2048 block->status= BLOCK_IN_USE;
2049 block->length= 0;
2050 block->offset= keycache->key_cache_block_size;
2051 block->requests= 1;
2052 block->temperature= BLOCK_COLD;
2053 block->hits_left= init_hits_left;
2054 block->last_hit_time= 0;
2055 block->hash_link= hash_link;
2056 hash_link->block= block;
2057 link_to_file_list(keycache, block, file, 0);
2058 page_status= PAGE_TO_BE_READ;
2059 KEYCACHE_DBUG_PRINT("find_key_block",
2060 ("got free or never used block %u",
2061 BLOCK_NUMBER(block)));
2062 }
2063 else
2064 {
2065 /*
2066 There are no free blocks and no never used blocks, use a block
2067 from the LRU ring.
2068 */
2069
2070 if (! keycache->used_last)
2071 {
2072 /*
2073 The LRU ring is empty. Wait until a new block is added to
2074 it. Several threads might wait here for the same hash_link,
2075 all of them must get the same block. While waiting for a
2076 block, after a block is selected for this hash_link, other
2077 threads can run first before this one awakes. During this
2078 time interval other threads find this hash_link pointing to
2079 the block, which is still assigned to another hash_link. In
2080 this case the block is not marked BLOCK_IN_SWITCH yet, but
2081 it is marked BLOCK_IN_EVICTION.
2082 */
2083
2084 struct st_my_thread_var *thread= my_thread_var;
2085 thread->opt_info= (void *) hash_link;
2086 link_into_queue(&keycache->waiting_for_block, thread);
2087 do
2088 {
2089 KEYCACHE_DBUG_PRINT("find_key_block: wait",
2090 ("suspend thread %ld", thread->id));
2091 keycache_pthread_cond_wait(&thread->suspend,
2092 &keycache->cache_lock);
2093 }
2094 while (thread->next);
2095 thread->opt_info= NULL;
2096 /* Assert that block has a request registered. */
2097 DBUG_ASSERT(hash_link->block->requests);
2098 /* Assert that block is not in LRU ring. */
2099 DBUG_ASSERT(!hash_link->block->next_used);
2100 DBUG_ASSERT(!hash_link->block->prev_used);
2101 }
2102
2103 /*
2104 If we waited above, hash_link->block has been assigned by
2105 link_block(). Otherwise it is still NULL. In the latter case
2106 we need to grab a block from the LRU ring ourselves.
2107 */
2108 block= hash_link->block;
2109 if (! block)
2110 {
2111 /* Select the last block from the LRU ring. */
2112 block= keycache->used_last->next_used;
2113 block->hits_left= init_hits_left;
2114 block->last_hit_time= 0;
2115 hash_link->block= block;
2116 /*
2117 Register a request on the block. This unlinks it from the
2118 LRU ring and protects it against eviction.
2119 */
2120 DBUG_ASSERT(!block->requests);
2121 reg_requests(keycache, block,1);
2122 /*
2123 We do not need to set block->status|= BLOCK_IN_EVICTION here
2124 because we will set block->status|= BLOCK_IN_SWITCH
2125 immediately without releasing the lock in between. This does
2126 also support debugging. When looking at the block, one can
2127 see if the block has been selected by link_block() after the
2128 LRU ring was empty, or if it was grabbed directly from the
2129 LRU ring in this branch.
2130 */
2131 }
2132
2133 /*
2134 If we had to wait above, there is a small chance that another
2135 thread grabbed this block for the same file block already. But
2136 in most cases the first condition is true.
2137 */
2138 if (block->hash_link != hash_link &&
2139 ! (block->status & BLOCK_IN_SWITCH) )
2140 {
2141 /* this is a primary request for a new page */
2142 block->status|= BLOCK_IN_SWITCH;
2143
2144 KEYCACHE_DBUG_PRINT("find_key_block",
2145 ("got block %u for new page", BLOCK_NUMBER(block)));
2146
2147 if (block->status & BLOCK_CHANGED)
2148 {
2149 /* The block contains a dirty page - push it out of the cache */
2150
2151 KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
2152 if (block->status & BLOCK_IN_FLUSH)
2153 {
2154 /*
2155 The block is marked for flush. If we do not wait here,
2156 it could happen that we write the block, reassign it to
2157 another file block, then, before the new owner can read
2158 the new file block, the flusher writes the cache block
2159 (which still has the old contents) to the new file block!
2160 */
2161 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
2162 &keycache->cache_lock);
2163 /*
2164 The block is marked BLOCK_IN_SWITCH. It should be left
2165 alone except for reading. No free, no write.
2166 */
2167 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2168 DBUG_ASSERT(!(block->status & (BLOCK_REASSIGNED |
2169 BLOCK_CHANGED |
2170 BLOCK_FOR_UPDATE)));
2171 }
2172 else
2173 {
2174 block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
2175 /*
2176 BLOCK_IN_EVICTION may be true or not. Other flags must
2177 have a fixed value.
2178 */
2179 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2180 (BLOCK_READ | BLOCK_IN_SWITCH |
2181 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2182 BLOCK_CHANGED | BLOCK_IN_USE));
2183 DBUG_ASSERT(block->hash_link);
2184
2185 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2186 /*
2187 The call is thread safe because only the current
2188 thread might change the block->hash_link value
2189 */
2190 error= my_pwrite(block->hash_link->file,
2191 block->buffer + block->offset,
2192 block->length - block->offset,
2193 block->hash_link->diskpos + block->offset,
2194 MYF(MY_NABP | MY_WAIT_IF_FULL));
2195 keycache_pthread_mutex_lock(&keycache->cache_lock);
2196
2197 /* Block status must not have changed. */
2198 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2199 (BLOCK_READ | BLOCK_IN_SWITCH |
2200 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2201 BLOCK_CHANGED | BLOCK_IN_USE) || fail_block(block));
2202 keycache->global_cache_write++;
2203 }
2204 }
2205
2206 block->status|= BLOCK_REASSIGNED;
2207 /*
2208 The block comes from the LRU ring. It must have a hash_link
2209 assigned.
2210 */
2211 DBUG_ASSERT(block->hash_link);
2212 if (block->hash_link)
2213 {
2214 /*
2215 All pending requests for this page must be resubmitted.
2216 This must be done before waiting for readers. They could
2217 wait for the flush to complete. And we must also do it
2218 after the wait. Flushers might try to free the block while
2219 we wait. They would wait until the reassignment is
2220 complete. Also the block status must reflect the correct
2221 situation: The block is not changed nor in flush any more.
2222 Note that we must not change the BLOCK_CHANGED flag
2223 outside of link_to_file_list() so that it is always in the
2224 correct queue and the *blocks_changed counters are
2225 correct.
2226 */
2227 block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
2228 link_to_file_list(keycache, block, block->hash_link->file, 1);
2229 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2230 /*
2231 The block is still assigned to its old hash_link.
2232 Wait until all pending read requests
2233 for this page are executed
2234 (we could have avoided this waiting, if we had read
2235 a page in the cache in a sweep, without yielding control)
2236 */
2237 wait_for_readers(keycache, block);
2238 DBUG_ASSERT(block->hash_link && block->hash_link->block == block &&
2239 block->prev_changed);
2240 /* The reader must not have been a writer. */
2241 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
2242
2243 /* Wake flushers that might have found the block in between. */
2244 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2245
2246 /* Remove the hash link for the old file block from the hash. */
2247 unlink_hash(keycache, block->hash_link);
2248
2249 /*
2250 For sanity checks link_to_file_list() asserts that block
2251 and hash_link refer to each other. Hence we need to assign
2252 the hash_link first, but then we would not know if it was
2253 linked before. Hence we would not know if to unlink it. So
2254 unlink it here and call link_to_file_list(..., FALSE).
2255 */
2256 unlink_changed(block);
2257 }
2258 block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
2259 block->length= 0;
2260 block->offset= keycache->key_cache_block_size;
2261 block->hash_link= hash_link;
2262 link_to_file_list(keycache, block, file, 0);
2263 page_status= PAGE_TO_BE_READ;
2264
2265 KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
2266 KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
2267 }
2268 else
2269 {
2270 /*
2271 Either (block->hash_link == hash_link),
2272 or (block->status & BLOCK_IN_SWITCH).
2273
2274 This is for secondary requests for a new file block only.
2275 Either it is already assigned to the new hash_link meanwhile
2276 (if we had to wait due to empty LRU), or it is already in
2277 eviction by another thread. Since this block has been
2278 grabbed from the LRU ring and attached to this hash_link,
2279 another thread cannot grab the same block from the LRU ring
2280 anymore. If the block is in eviction already, it must become
2281 attached to the same hash_link and as such destined for the
2282 same file block.
2283 */
2284 KEYCACHE_DBUG_PRINT("find_key_block",
2285 ("block->hash_link: %p hash_link: %p "
2286 "block->status: %u", block->hash_link,
2287 hash_link, block->status ));
2288 page_status= (((block->hash_link == hash_link) &&
2289 (block->status & BLOCK_READ)) ?
2290 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2291 }
2292 }
2293 }
2294 else
2295 {
2296 /*
2297 Block is not NULL. This hash_link points to a block.
2298 Either
2299 - block not assigned to this hash_link (yet) or
2300 - block assigned but not yet read from file,
2301 or
2302 - block assigned with valid (changed or unchanged) data and
2303 - it will not be reassigned/freed.
2304
2305 The first condition means hash_link points to a block in
2306 eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2307 But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2308 link_block(). In both cases it is destined for this hash_link
2309 and its file block address. When this hash_link got its block
2310 address, the block was removed from the LRU ring and cannot be
2311 selected for eviction (for another hash_link) again.
2312
2313 Register a request on the block. This is another protection
2314 against eviction.
2315 */
2316 DBUG_ASSERT(((block->hash_link != hash_link) &&
2317 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2318 ((block->hash_link == hash_link) &&
2319 !(block->status & BLOCK_READ)) ||
2320 ((block->status & BLOCK_READ) &&
2321 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2322 reg_requests(keycache, block, 1);
2323 KEYCACHE_DBUG_PRINT("find_key_block",
2324 ("block->hash_link: %p hash_link: %p "
2325 "block->status: %u", block->hash_link,
2326 hash_link, block->status ));
2327 page_status= (((block->hash_link == hash_link) &&
2328 (block->status & BLOCK_READ)) ?
2329 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2330 }
2331 }
2332
2333 KEYCACHE_DBUG_ASSERT(page_status != -1);
2334 /* Same assert basically, but be very sure. */
2335 KEYCACHE_DBUG_ASSERT(block);
2336 /* Assert that block has a request and is not in LRU ring. */
2337 DBUG_ASSERT(block->requests);
2338 DBUG_ASSERT(!block->next_used);
2339 DBUG_ASSERT(!block->prev_used);
2340 /* Assert that we return the correct block. */
2341 DBUG_ASSERT((page_status == PAGE_WAIT_TO_BE_READ) ||
2342 ((block->hash_link->file == file) &&
2343 (block->hash_link->diskpos == filepos)));
2344 *page_st=page_status;
2345 KEYCACHE_DBUG_PRINT("find_key_block",
2346 ("fd: %d pos: %lu block->status: %u page_status: %d",
2347 file, (ulong) filepos, block->status,
2348 page_status));
2349
2350 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
2351 DBUG_EXECUTE("check_keycache2",
2352 test_key_cache(keycache, "end of find_key_block",0););
2353 #endif
2354 KEYCACHE_THREAD_TRACE("find_key_block:end");
2355 DBUG_RETURN(block);
2356 }
2357
2358
2359 /*
2360 Read into a key cache block buffer from disk.
2361
2362 SYNOPSIS
2363
2364 read_block()
2365 keycache pointer to a key cache data structure
2366 block block to which buffer the data is to be read
2367 read_length size of data to be read
2368 min_length at least so much data must be read
2369 primary <-> the current thread will read the data
2370
2371 RETURN VALUE
2372 None
2373
2374 NOTES.
2375 The function either reads a page data from file to the block buffer,
2376 or waits until another thread reads it. What page to read is determined
2377 by a block parameter - reference to a hash link for this page.
2378 If an error occurs THE BLOCK_ERROR bit is set in the block status.
2379 We do not report error when the size of successfully read
2380 portion is less than read_length, but not less than min_length.
2381 */
2382
read_block(KEY_CACHE * keycache,BLOCK_LINK * block,uint read_length,uint min_length,my_bool primary)2383 static void read_block(KEY_CACHE *keycache,
2384 BLOCK_LINK *block, uint read_length,
2385 uint min_length, my_bool primary)
2386 {
2387 size_t got_length;
2388
2389 /* On entry cache_lock is locked */
2390
2391 KEYCACHE_THREAD_TRACE("read_block");
2392 if (primary)
2393 {
2394 /*
2395 This code is executed only by threads that submitted primary
2396 requests. Until block->status contains BLOCK_READ, all other
2397 request for the block become secondary requests. For a primary
2398 request the block must be properly initialized.
2399 */
2400 DBUG_ASSERT(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE) ||
2401 fail_block(block));
2402 DBUG_ASSERT((block->length == 0) || fail_block(block));
2403 DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2404 fail_block(block));
2405 DBUG_ASSERT((block->requests > 0) || fail_block(block));
2406
2407 KEYCACHE_DBUG_PRINT("read_block",
2408 ("page to be read by primary request"));
2409
2410 keycache->global_cache_read++;
2411 /* Page is not in buffer yet, is to be read from disk */
2412 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2413 /*
2414 Here other threads may step in and register as secondary readers.
2415 They will register in block->wqueue[COND_FOR_REQUESTED].
2416 */
2417 got_length= my_pread(block->hash_link->file, block->buffer,
2418 read_length, block->hash_link->diskpos, MYF(0));
2419 keycache_pthread_mutex_lock(&keycache->cache_lock);
2420 /*
2421 The block can now have been marked for free (in case of
2422 FLUSH_RELEASE). Otherwise the state must be unchanged.
2423 */
2424 DBUG_ASSERT(((block->status & ~(BLOCK_REASSIGNED |
2425 BLOCK_FOR_UPDATE)) == BLOCK_IN_USE) ||
2426 fail_block(block));
2427 DBUG_ASSERT((block->length == 0) || fail_block(block));
2428 DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2429 fail_block(block));
2430 DBUG_ASSERT((block->requests > 0) || fail_block(block));
2431
2432 if (got_length < min_length)
2433 block->status|= BLOCK_ERROR;
2434 else
2435 {
2436 block->status|= BLOCK_READ;
2437 block->length= got_length;
2438 /*
2439 Do not set block->offset here. If this block is marked
2440 BLOCK_CHANGED later, we want to flush only the modified part. So
2441 only a writer may set block->offset down from
2442 keycache->key_cache_block_size.
2443 */
2444 }
2445 KEYCACHE_DBUG_PRINT("read_block",
2446 ("primary request: new page in cache"));
2447 /* Signal that all pending requests for this page now can be processed */
2448 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2449 }
2450 else
2451 {
2452 /*
2453 This code is executed only by threads that submitted secondary
2454 requests. At this point it could happen that the cache block is
2455 not yet assigned to the hash_link for the requested file block.
2456 But at awake from the wait this should be the case. Unfortunately
2457 we cannot assert this here because we do not know the hash_link
2458 for the requested file block nor the file and position. So we have
2459 to assert this in the caller.
2460 */
2461 KEYCACHE_DBUG_PRINT("read_block",
2462 ("secondary request waiting for new page to be read"));
2463 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2464 KEYCACHE_DBUG_PRINT("read_block",
2465 ("secondary request: new page in cache"));
2466 }
2467 }
2468
2469
2470 /*
2471 Read a block of data from a cached file into a buffer;
2472
2473 SYNOPSIS
2474
2475 key_cache_read()
2476 keycache pointer to a key cache data structure
2477 file handler for the file for the block of data to be read
2478 filepos position of the block of data in the file
2479 level determines the weight of the data
2480 buff buffer to where the data must be placed
2481 length length of the buffer
2482 block_length length of the block in the key cache buffer
2483 return_buffer return pointer to the key cache buffer with the data
2484
2485 RETURN VALUE
2486 Returns address from where the data is placed if sucessful, 0 - otherwise.
2487
2488 NOTES.
2489 The function ensures that a block of data of size length from file
2490 positioned at filepos is in the buffers for some key cache blocks.
2491 Then the function either copies the data into the buffer buff, or,
2492 if return_buffer is TRUE, it just returns the pointer to the key cache
2493 buffer with the data.
2494 Filepos must be a multiple of 'block_length', but it doesn't
2495 have to be a multiple of key_cache_block_size;
2496 */
2497
key_cache_read(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length,int return_buffer)2498 uchar *key_cache_read(KEY_CACHE *keycache,
2499 File file, my_off_t filepos, int level,
2500 uchar *buff, uint length,
2501 uint block_length __attribute__((unused)),
2502 int return_buffer __attribute__((unused)))
2503 {
2504 my_bool locked_and_incremented= FALSE;
2505 int error=0;
2506 uchar *start= buff;
2507 DBUG_ENTER("key_cache_read");
2508 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2509 (uint) file, (ulong) filepos, length));
2510
2511 if (keycache->key_cache_inited)
2512 {
2513 /* Key cache is used */
2514 reg1 BLOCK_LINK *block;
2515 uint read_length;
2516 uint offset;
2517 int page_st;
2518
2519 if (MYSQL_KEYCACHE_READ_START_ENABLED())
2520 {
2521 MYSQL_KEYCACHE_READ_START(my_filename(file), length,
2522 (ulong) (keycache->blocks_used *
2523 keycache->key_cache_block_size),
2524 (ulong) (keycache->blocks_unused *
2525 keycache->key_cache_block_size));
2526 }
2527
2528 /*
2529 When the key cache is once initialized, we use the cache_lock to
2530 reliably distinguish the cases of normal operation, resizing, and
2531 disabled cache. We always increment and decrement
2532 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2533 */
2534 keycache_pthread_mutex_lock(&keycache->cache_lock);
2535 /*
2536 Cache resizing has two phases: Flushing and re-initializing. In
2537 the flush phase read requests are allowed to bypass the cache for
2538 blocks not in the cache. find_key_block() returns NULL in this
2539 case.
2540
2541 After the flush phase new I/O requests must wait until the
2542 re-initialization is done. The re-initialization can be done only
2543 if no I/O request is in progress. The reason is that
2544 key_cache_block_size can change. With enabled cache, I/O is done
2545 in chunks of key_cache_block_size. Every chunk tries to use a
2546 cache block first. If the block size changes in the middle, a
2547 block could be missed and old data could be read.
2548 */
2549 while (keycache->in_resize && !keycache->resize_in_flush)
2550 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2551 /* Register the I/O for the next resize. */
2552 inc_counter_for_resize_op(keycache);
2553 locked_and_incremented= TRUE;
2554 /* Requested data may not always be aligned to cache blocks. */
2555 offset= (uint) (filepos % keycache->key_cache_block_size);
2556 /* Read data in key_cache_block_size increments */
2557 do
2558 {
2559 /* Cache could be disabled in a later iteration. */
2560 if (!keycache->can_be_used)
2561 {
2562 KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache cannot be used"));
2563 goto no_key_cache;
2564 }
2565 /* Start reading at the beginning of the cache block. */
2566 filepos-= offset;
2567 /* Do not read beyond the end of the cache block. */
2568 read_length= length;
2569 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2570 KEYCACHE_DBUG_ASSERT(read_length > 0);
2571
2572 if (block_length > keycache->key_cache_block_size || offset)
2573 return_buffer=0;
2574
2575 /* Request the cache block that matches file/pos. */
2576 keycache->global_cache_r_requests++;
2577
2578 MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size);
2579
2580 block=find_key_block(keycache, file, filepos, level, 0, &page_st);
2581 if (!block)
2582 {
2583 /*
2584 This happens only for requests submitted during key cache
2585 resize. The block is not in the cache and shall not go in.
2586 Read directly from file.
2587 */
2588 keycache->global_cache_read++;
2589 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2590 error= (my_pread(file, (uchar*) buff, read_length,
2591 filepos + offset, MYF(MY_NABP)) != 0);
2592 keycache_pthread_mutex_lock(&keycache->cache_lock);
2593 goto next_block;
2594 }
2595 if (!(block->status & BLOCK_ERROR))
2596 {
2597 if (page_st != PAGE_READ)
2598 {
2599 MYSQL_KEYCACHE_READ_MISS();
2600 /* The requested page is to be read into the block buffer */
2601 read_block(keycache, block,
2602 keycache->key_cache_block_size, read_length+offset,
2603 (my_bool)(page_st == PAGE_TO_BE_READ));
2604 /*
2605 A secondary request must now have the block assigned to the
2606 requested file block. It does not hurt to check it for
2607 primary requests too.
2608 */
2609 DBUG_ASSERT(keycache->can_be_used);
2610 DBUG_ASSERT(block->hash_link->file == file);
2611 DBUG_ASSERT(block->hash_link->diskpos == filepos);
2612 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2613 }
2614 else if (block->length < read_length + offset)
2615 {
2616 /*
2617 Impossible if nothing goes wrong:
2618 this could only happen if we are using a file with
2619 small key blocks and are trying to read outside the file
2620 */
2621 my_errno= -1;
2622 block->status|= BLOCK_ERROR;
2623 }
2624 else
2625 {
2626 MYSQL_KEYCACHE_READ_HIT();
2627 }
2628 }
2629
2630 /* block status may have added BLOCK_ERROR in the above 'if'. */
2631 if (!(block->status & BLOCK_ERROR))
2632 {
2633 {
2634 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2635 #if !defined(SERIALIZED_READ_FROM_CACHE)
2636 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2637 #endif
2638
2639 /* Copy data from the cache buffer */
2640 memcpy(buff, block->buffer+offset, (size_t) read_length);
2641
2642 #if !defined(SERIALIZED_READ_FROM_CACHE)
2643 keycache_pthread_mutex_lock(&keycache->cache_lock);
2644 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2645 #endif
2646 }
2647 }
2648
2649 remove_reader(block);
2650
2651 /* Error injection for coverage testing. */
2652 DBUG_EXECUTE_IF("key_cache_read_block_error",
2653 block->status|= BLOCK_ERROR;);
2654
2655 /* Do not link erroneous blocks into the LRU ring, but free them. */
2656 if (!(block->status & BLOCK_ERROR))
2657 {
2658 /*
2659 Link the block into the LRU ring if it's the last submitted
2660 request for the block. This enables eviction for the block.
2661 */
2662 unreg_request(keycache, block, 1);
2663 }
2664 else
2665 {
2666 free_block(keycache, block);
2667 error= 1;
2668 break;
2669 }
2670
2671 next_block:
2672 buff+= read_length;
2673 filepos+= read_length+offset;
2674 offset= 0;
2675
2676 } while ((length-= read_length));
2677 if (MYSQL_KEYCACHE_READ_DONE_ENABLED())
2678 {
2679 MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used *
2680 keycache->key_cache_block_size),
2681 (ulong) (keycache->blocks_unused *
2682 keycache->key_cache_block_size));
2683 }
2684 goto end;
2685 }
2686 KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache not initialized"));
2687
2688 no_key_cache:
2689 /* Key cache is not used */
2690
2691 keycache->global_cache_r_requests++;
2692 keycache->global_cache_read++;
2693
2694 if (locked_and_incremented)
2695 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2696 if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
2697 error= 1;
2698 if (locked_and_incremented)
2699 keycache_pthread_mutex_lock(&keycache->cache_lock);
2700
2701 end:
2702 if (locked_and_incremented)
2703 {
2704 dec_counter_for_resize_op(keycache);
2705 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2706 }
2707 DBUG_PRINT("exit", ("error: %d", error ));
2708 DBUG_RETURN(error ? (uchar*) 0 : start);
2709 }
2710
2711
2712 /*
2713 Insert a block of file data from a buffer into key cache
2714
2715 SYNOPSIS
2716 key_cache_insert()
2717 keycache pointer to a key cache data structure
2718 file handler for the file to insert data from
2719 filepos position of the block of data in the file to insert
2720 level determines the weight of the data
2721 buff buffer to read data from
2722 length length of the data in the buffer
2723
2724 NOTES
2725 This is used by MyISAM to move all blocks from a index file to the key
2726 cache
2727
2728 RETURN VALUE
2729 0 if a success, 1 - otherwise.
2730 */
2731
key_cache_insert(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length)2732 int key_cache_insert(KEY_CACHE *keycache,
2733 File file, my_off_t filepos, int level,
2734 uchar *buff, uint length)
2735 {
2736 int error= 0;
2737 DBUG_ENTER("key_cache_insert");
2738 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2739 (uint) file,(ulong) filepos, length));
2740
2741 if (keycache->key_cache_inited)
2742 {
2743 /* Key cache is used */
2744 reg1 BLOCK_LINK *block;
2745 uint read_length;
2746 uint offset;
2747 int page_st;
2748 my_bool locked_and_incremented= FALSE;
2749
2750 /*
2751 When the keycache is once initialized, we use the cache_lock to
2752 reliably distinguish the cases of normal operation, resizing, and
2753 disabled cache. We always increment and decrement
2754 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2755 */
2756 keycache_pthread_mutex_lock(&keycache->cache_lock);
2757 /*
2758 We do not load index data into a disabled cache nor into an
2759 ongoing resize.
2760 */
2761 if (!keycache->can_be_used || keycache->in_resize)
2762 goto no_key_cache;
2763 /* Register the pseudo I/O for the next resize. */
2764 inc_counter_for_resize_op(keycache);
2765 locked_and_incremented= TRUE;
2766 /* Loaded data may not always be aligned to cache blocks. */
2767 offset= (uint) (filepos % keycache->key_cache_block_size);
2768 /* Load data in key_cache_block_size increments. */
2769 do
2770 {
2771 /* Cache could be disabled or resizing in a later iteration. */
2772 if (!keycache->can_be_used || keycache->in_resize)
2773 goto no_key_cache;
2774 /* Start loading at the beginning of the cache block. */
2775 filepos-= offset;
2776 /* Do not load beyond the end of the cache block. */
2777 read_length= length;
2778 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2779 KEYCACHE_DBUG_ASSERT(read_length > 0);
2780
2781 /* The block has been read by the caller already. */
2782 keycache->global_cache_read++;
2783 /* Request the cache block that matches file/pos. */
2784 keycache->global_cache_r_requests++;
2785 block= find_key_block(keycache, file, filepos, level, 0, &page_st);
2786 if (!block)
2787 {
2788 /*
2789 This happens only for requests submitted during key cache
2790 resize. The block is not in the cache and shall not go in.
2791 Stop loading index data.
2792 */
2793 goto no_key_cache;
2794 }
2795 if (!(block->status & BLOCK_ERROR))
2796 {
2797 if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2798 ((page_st == PAGE_TO_BE_READ) &&
2799 (offset || (read_length < keycache->key_cache_block_size))))
2800 {
2801 /*
2802 Either
2803
2804 this is a secondary request for a block to be read into the
2805 cache. The block is in eviction. It is not yet assigned to
2806 the requested file block (It does not point to the right
2807 hash_link). So we cannot call remove_reader() on the block.
2808 And we cannot access the hash_link directly here. We need to
2809 wait until the assignment is complete. read_block() executes
2810 the correct wait when called with primary == FALSE.
2811
2812 Or
2813
2814 this is a primary request for a block to be read into the
2815 cache and the supplied data does not fill the whole block.
2816
2817 This function is called on behalf of a LOAD INDEX INTO CACHE
2818 statement, which is a read-only task and allows other
2819 readers. It is possible that a parallel running reader tries
2820 to access this block. If it needs more data than has been
2821 supplied here, it would report an error. To be sure that we
2822 have all data in the block that is available in the file, we
2823 read the block ourselves.
2824
2825 Though reading again what the caller did read already is an
2826 expensive operation, we need to do this for correctness.
2827 */
2828 read_block(keycache, block, keycache->key_cache_block_size,
2829 read_length + offset, (page_st == PAGE_TO_BE_READ));
2830 /*
2831 A secondary request must now have the block assigned to the
2832 requested file block. It does not hurt to check it for
2833 primary requests too.
2834 */
2835 DBUG_ASSERT(keycache->can_be_used);
2836 DBUG_ASSERT(block->hash_link->file == file);
2837 DBUG_ASSERT(block->hash_link->diskpos == filepos);
2838 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2839 }
2840 else if (page_st == PAGE_TO_BE_READ)
2841 {
2842 /*
2843 This is a new block in the cache. If we come here, we have
2844 data for the whole block.
2845 */
2846 DBUG_ASSERT(block->hash_link->requests);
2847 DBUG_ASSERT(block->status & BLOCK_IN_USE);
2848 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
2849 (block->status & BLOCK_READ));
2850
2851 #if !defined(SERIALIZED_READ_FROM_CACHE)
2852 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2853 /*
2854 Here other threads may step in and register as secondary readers.
2855 They will register in block->wqueue[COND_FOR_REQUESTED].
2856 */
2857 #endif
2858
2859 /* Copy data from buff */
2860 memcpy(block->buffer+offset, buff, (size_t) read_length);
2861
2862 #if !defined(SERIALIZED_READ_FROM_CACHE)
2863 keycache_pthread_mutex_lock(&keycache->cache_lock);
2864 DBUG_ASSERT(block->status & BLOCK_IN_USE);
2865 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
2866 (block->status & BLOCK_READ));
2867 #endif
2868 /*
2869 After the data is in the buffer, we can declare the block
2870 valid. Now other threads do not need to register as
2871 secondary readers any more. They can immediately access the
2872 block.
2873 */
2874 block->status|= BLOCK_READ;
2875 block->length= read_length+offset;
2876 /*
2877 Do not set block->offset here. If this block is marked
2878 BLOCK_CHANGED later, we want to flush only the modified part. So
2879 only a writer may set block->offset down from
2880 keycache->key_cache_block_size.
2881 */
2882 KEYCACHE_DBUG_PRINT("key_cache_insert",
2883 ("primary request: new page in cache"));
2884 /* Signal all pending requests. */
2885 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2886 }
2887 else
2888 {
2889 /*
2890 page_st == PAGE_READ. The block is in the buffer. All data
2891 must already be present. Blocks are always read with all
2892 data available on file. Assert that the block does not have
2893 less contents than the preloader supplies. If the caller has
2894 data beyond block->length, it means that a file write has
2895 been done while this block was in cache and not extended
2896 with the new data. If the condition is met, we can simply
2897 ignore the block.
2898 */
2899 DBUG_ASSERT((page_st == PAGE_READ) &&
2900 (read_length + offset <= block->length));
2901 }
2902
2903 /*
2904 A secondary request must now have the block assigned to the
2905 requested file block. It does not hurt to check it for primary
2906 requests too.
2907 */
2908 DBUG_ASSERT(block->hash_link->file == file);
2909 DBUG_ASSERT(block->hash_link->diskpos == filepos);
2910 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2911 } /* end of if (!(block->status & BLOCK_ERROR)) */
2912
2913 remove_reader(block);
2914
2915 /* Error injection for coverage testing. */
2916 DBUG_EXECUTE_IF("key_cache_insert_block_error",
2917 block->status|= BLOCK_ERROR; errno=EIO;);
2918
2919 /* Do not link erroneous blocks into the LRU ring, but free them. */
2920 if (!(block->status & BLOCK_ERROR))
2921 {
2922 /*
2923 Link the block into the LRU ring if it's the last submitted
2924 request for the block. This enables eviction for the block.
2925 */
2926 unreg_request(keycache, block, 1);
2927 }
2928 else
2929 {
2930 free_block(keycache, block);
2931 error= 1;
2932 break;
2933 }
2934
2935 buff+= read_length;
2936 filepos+= read_length+offset;
2937 offset= 0;
2938
2939 } while ((length-= read_length));
2940
2941 no_key_cache:
2942 if (locked_and_incremented)
2943 dec_counter_for_resize_op(keycache);
2944 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2945 }
2946 DBUG_RETURN(error);
2947 }
2948
2949
2950 /*
2951 Write a buffer into a cached file.
2952
2953 SYNOPSIS
2954
2955 key_cache_write()
2956 keycache pointer to a key cache data structure
2957 file handler for the file to write data to
2958 filepos position in the file to write data to
2959 level determines the weight of the data
2960 buff buffer with the data
2961 length length of the buffer
2962 dont_write if is 0 then all dirty pages involved in writing
2963 should have been flushed from key cache
2964
2965 RETURN VALUE
2966 0 if a success, 1 - otherwise.
2967
2968 NOTES.
2969 The function copies the data of size length from buff into buffers
2970 for key cache blocks that are assigned to contain the portion of
2971 the file starting with position filepos.
2972 It ensures that this data is flushed to the file if dont_write is FALSE.
2973 Filepos must be a multiple of 'block_length', but it doesn't
2974 have to be a multiple of key_cache_block_size;
2975
2976 dont_write is always TRUE in the server (info->lock_type is never F_UNLCK).
2977 */
2978
key_cache_write(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length,int dont_write)2979 int key_cache_write(KEY_CACHE *keycache,
2980 File file, my_off_t filepos, int level,
2981 uchar *buff, uint length,
2982 uint block_length __attribute__((unused)),
2983 int dont_write)
2984 {
2985 my_bool locked_and_incremented= FALSE;
2986 int error=0;
2987 DBUG_ENTER("key_cache_write");
2988 DBUG_PRINT("enter",
2989 ("fd: %u pos: %lu length: %u block_length: %u"
2990 " key_block_length: %u",
2991 (uint) file, (ulong) filepos, length, block_length,
2992 keycache ? keycache->key_cache_block_size : 0));
2993
2994 if (!dont_write)
2995 {
2996 /* purecov: begin inspected */
2997 /* Not used in the server. */
2998 /* Force writing from buff into disk. */
2999 keycache->global_cache_w_requests++;
3000 keycache->global_cache_write++;
3001 if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3002 DBUG_RETURN(1);
3003 /* purecov: end */
3004 }
3005
3006 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3007 DBUG_EXECUTE("check_keycache",
3008 test_key_cache(keycache, "start of key_cache_write", 1););
3009 #endif
3010
3011 if (keycache->key_cache_inited)
3012 {
3013 /* Key cache is used */
3014 reg1 BLOCK_LINK *block;
3015 uint read_length;
3016 uint offset;
3017 int page_st;
3018
3019 if (MYSQL_KEYCACHE_WRITE_START_ENABLED())
3020 {
3021 MYSQL_KEYCACHE_WRITE_START(my_filename(file), length,
3022 (ulong) (keycache->blocks_used *
3023 keycache->key_cache_block_size),
3024 (ulong) (keycache->blocks_unused *
3025 keycache->key_cache_block_size));
3026 }
3027
3028 /*
3029 When the key cache is once initialized, we use the cache_lock to
3030 reliably distinguish the cases of normal operation, resizing, and
3031 disabled cache. We always increment and decrement
3032 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
3033 */
3034 keycache_pthread_mutex_lock(&keycache->cache_lock);
3035 /*
3036 Cache resizing has two phases: Flushing and re-initializing. In
3037 the flush phase write requests can modify dirty blocks that are
3038 not yet in flush. Otherwise they are allowed to bypass the cache.
3039 find_key_block() returns NULL in both cases (clean blocks and
3040 non-cached blocks).
3041
3042 After the flush phase new I/O requests must wait until the
3043 re-initialization is done. The re-initialization can be done only
3044 if no I/O request is in progress. The reason is that
3045 key_cache_block_size can change. With enabled cache I/O is done in
3046 chunks of key_cache_block_size. Every chunk tries to use a cache
3047 block first. If the block size changes in the middle, a block
3048 could be missed and data could be written below a cached block.
3049 */
3050 while (keycache->in_resize && !keycache->resize_in_flush)
3051 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
3052 /* Register the I/O for the next resize. */
3053 inc_counter_for_resize_op(keycache);
3054 locked_and_incremented= TRUE;
3055 /* Requested data may not always be aligned to cache blocks. */
3056 offset= (uint) (filepos % keycache->key_cache_block_size);
3057 /* Write data in key_cache_block_size increments. */
3058 do
3059 {
3060 /* Cache could be disabled in a later iteration. */
3061 if (!keycache->can_be_used)
3062 goto no_key_cache;
3063
3064 MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size);
3065 /* Start writing at the beginning of the cache block. */
3066 filepos-= offset;
3067 /* Do not write beyond the end of the cache block. */
3068 read_length= length;
3069 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
3070 KEYCACHE_DBUG_ASSERT(read_length > 0);
3071
3072 /* Request the cache block that matches file/pos. */
3073 keycache->global_cache_w_requests++;
3074 block= find_key_block(keycache, file, filepos, level, 1, &page_st);
3075 if (!block)
3076 {
3077 /*
3078 This happens only for requests submitted during key cache
3079 resize. The block is not in the cache and shall not go in.
3080 Write directly to file.
3081 */
3082 if (dont_write)
3083 {
3084 /* Used in the server. */
3085 keycache->global_cache_write++;
3086 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3087 if (my_pwrite(file, (uchar*) buff, read_length, filepos + offset,
3088 MYF(MY_NABP | MY_WAIT_IF_FULL)))
3089 error=1;
3090 keycache_pthread_mutex_lock(&keycache->cache_lock);
3091 }
3092 goto next_block;
3093 }
3094 /*
3095 Prevent block from flushing and from being selected for to be
3096 freed. This must be set when we release the cache_lock.
3097 However, we must not set the status of the block before it is
3098 assigned to this file/pos.
3099 */
3100 if (page_st != PAGE_WAIT_TO_BE_READ)
3101 block->status|= BLOCK_FOR_UPDATE;
3102 /*
3103 We must read the file block first if it is not yet in the cache
3104 and we do not replace all of its contents.
3105
3106 In cases where the cache block is big enough to contain (parts
3107 of) index blocks of different indexes, our request can be
3108 secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
3109 reading the file block. If the read completes after us, it
3110 overwrites our new contents with the old contents. So we have to
3111 wait for the other thread to complete the read of this block.
3112 read_block() takes care for the wait.
3113 */
3114 if (!(block->status & BLOCK_ERROR) &&
3115 ((page_st == PAGE_TO_BE_READ &&
3116 (offset || read_length < keycache->key_cache_block_size)) ||
3117 (page_st == PAGE_WAIT_TO_BE_READ)))
3118 {
3119 read_block(keycache, block,
3120 offset + read_length >= keycache->key_cache_block_size?
3121 offset : keycache->key_cache_block_size,
3122 offset, (page_st == PAGE_TO_BE_READ));
3123 DBUG_ASSERT(keycache->can_be_used);
3124 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3125 /*
3126 Prevent block from flushing and from being selected for to be
3127 freed. This must be set when we release the cache_lock.
3128 Here we set it in case we could not set it above.
3129 */
3130 block->status|= BLOCK_FOR_UPDATE;
3131 }
3132 /*
3133 The block should always be assigned to the requested file block
3134 here. It need not be BLOCK_READ when overwriting the whole block.
3135 */
3136 DBUG_ASSERT(block->hash_link->file == file);
3137 DBUG_ASSERT(block->hash_link->diskpos == filepos);
3138 DBUG_ASSERT(block->status & BLOCK_IN_USE);
3139 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
3140 /*
3141 The block to be written must not be marked BLOCK_REASSIGNED.
3142 Otherwise it could be freed in dirty state or reused without
3143 another flush during eviction. It must also not be in flush.
3144 Otherwise the old contens may have been flushed already and
3145 the flusher could clear BLOCK_CHANGED without flushing the
3146 new changes again.
3147 */
3148 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3149
3150 while (block->status & BLOCK_IN_FLUSHWRITE)
3151 {
3152 /*
3153 Another thread is flushing the block. It was dirty already.
3154 Wait until the block is flushed to file. Otherwise we could
3155 modify the buffer contents just while it is written to file.
3156 An unpredictable file block contents would be the result.
3157 While we wait, several things can happen to the block,
3158 including another flush. But the block cannot be reassigned to
3159 another hash_link until we release our request on it.
3160 */
3161 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
3162 DBUG_ASSERT(keycache->can_be_used);
3163 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3164 /* Still must not be marked for free. */
3165 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3166 DBUG_ASSERT(block->hash_link && (block->hash_link->block == block));
3167 }
3168
3169 /*
3170 We could perhaps release the cache_lock during access of the
3171 data like in the other functions. Locks outside of the key cache
3172 assure that readers and a writer do not access the same range of
3173 data. Parallel accesses should happen only if the cache block
3174 contains multiple index block(fragment)s. So different parts of
3175 the buffer would be read/written. An attempt to flush during
3176 memcpy() is prevented with BLOCK_FOR_UPDATE.
3177 */
3178 if (!(block->status & BLOCK_ERROR))
3179 {
3180 #if !defined(SERIALIZED_READ_FROM_CACHE)
3181 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3182 #endif
3183 memcpy(block->buffer+offset, buff, (size_t) read_length);
3184
3185 #if !defined(SERIALIZED_READ_FROM_CACHE)
3186 keycache_pthread_mutex_lock(&keycache->cache_lock);
3187 #endif
3188 }
3189
3190 if (!dont_write)
3191 {
3192 /* Not used in the server. buff has been written to disk at start. */
3193 if ((block->status & BLOCK_CHANGED) &&
3194 (!offset && read_length >= keycache->key_cache_block_size))
3195 link_to_file_list(keycache, block, block->hash_link->file, 1);
3196 }
3197 else if (! (block->status & BLOCK_CHANGED))
3198 link_to_changed_list(keycache, block);
3199 block->status|=BLOCK_READ;
3200 /*
3201 Allow block to be selected for to be freed. Since it is marked
3202 BLOCK_CHANGED too, it won't be selected for to be freed without
3203 a flush.
3204 */
3205 block->status&= ~BLOCK_FOR_UPDATE;
3206 set_if_smaller(block->offset, offset);
3207 set_if_bigger(block->length, read_length+offset);
3208
3209 /* Threads may be waiting for the changes to be complete. */
3210 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
3211
3212 /*
3213 If only a part of the cache block is to be replaced, and the
3214 rest has been read from file, then the cache lock has been
3215 released for I/O and it could be possible that another thread
3216 wants to evict or free the block and waits for it to be
3217 released. So we must not just decrement hash_link->requests, but
3218 also wake a waiting thread.
3219 */
3220 remove_reader(block);
3221
3222 /* Error injection for coverage testing. */
3223 DBUG_EXECUTE_IF("key_cache_write_block_error",
3224 block->status|= BLOCK_ERROR;);
3225
3226 /* Do not link erroneous blocks into the LRU ring, but free them. */
3227 if (!(block->status & BLOCK_ERROR))
3228 {
3229 /*
3230 Link the block into the LRU ring if it's the last submitted
3231 request for the block. This enables eviction for the block.
3232 */
3233 unreg_request(keycache, block, 1);
3234 }
3235 else
3236 {
3237 /* Pretend a "clean" block to avoid complications. */
3238 block->status&= ~(BLOCK_CHANGED);
3239 free_block(keycache, block);
3240 error= 1;
3241 break;
3242 }
3243
3244 next_block:
3245 buff+= read_length;
3246 filepos+= read_length+offset;
3247 offset= 0;
3248
3249 } while ((length-= read_length));
3250 goto end;
3251 }
3252
3253 no_key_cache:
3254 /* Key cache is not used */
3255 if (dont_write)
3256 {
3257 /* Used in the server. */
3258 keycache->global_cache_w_requests++;
3259 keycache->global_cache_write++;
3260 if (locked_and_incremented)
3261 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3262 if (my_pwrite(file, (uchar*) buff, length, filepos,
3263 MYF(MY_NABP | MY_WAIT_IF_FULL)))
3264 error=1;
3265 if (locked_and_incremented)
3266 keycache_pthread_mutex_lock(&keycache->cache_lock);
3267 }
3268
3269 end:
3270 if (locked_and_incremented)
3271 {
3272 dec_counter_for_resize_op(keycache);
3273 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3274 }
3275
3276 if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED())
3277 {
3278 MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used *
3279 keycache->key_cache_block_size),
3280 (ulong) (keycache->blocks_unused *
3281 keycache->key_cache_block_size));
3282 }
3283
3284 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3285 DBUG_EXECUTE("exec",
3286 test_key_cache(keycache, "end of key_cache_write", 1););
3287 #endif
3288 DBUG_RETURN(error);
3289 }
3290
3291
3292 /*
3293 Free block.
3294
3295 SYNOPSIS
3296 free_block()
3297 keycache Pointer to a key cache data structure
3298 block Pointer to the block to free
3299
3300 DESCRIPTION
3301 Remove reference to block from hash table.
3302 Remove block from the chain of clean blocks.
3303 Add block to the free list.
3304
3305 NOTE
3306 Block must not be free (status == 0).
3307 Block must not be in free_block_list.
3308 Block must not be in the LRU ring.
3309 Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
3310 Block must not be in free (BLOCK_REASSIGNED).
3311 Block must not be in flush (BLOCK_IN_FLUSH).
3312 Block must not be dirty (BLOCK_CHANGED).
3313 Block must not be in changed_blocks (dirty) hash.
3314 Block must be in file_blocks (clean) hash.
3315 Block must refer to a hash_link.
3316 Block must have a request registered on it.
3317 */
3318
free_block(KEY_CACHE * keycache,BLOCK_LINK * block)3319 static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
3320 {
3321 KEYCACHE_THREAD_TRACE("free block");
3322 KEYCACHE_DBUG_PRINT("free_block",
3323 ("block %u to be freed, hash_link %p status: %u",
3324 BLOCK_NUMBER(block), block->hash_link,
3325 block->status));
3326 /*
3327 Assert that the block is not free already. And that it is in a clean
3328 state. Note that the block might just be assigned to a hash_link and
3329 not yet read (BLOCK_READ may not be set here). In this case a reader
3330 is registered in the hash_link and free_block() will wait for it
3331 below.
3332 */
3333 DBUG_ASSERT((block->status & BLOCK_IN_USE) &&
3334 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3335 BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
3336 BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
3337 /* Assert that the block is in a file_blocks chain. */
3338 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3339 /* Assert that the block is not in the LRU ring. */
3340 DBUG_ASSERT(!block->next_used && !block->prev_used);
3341 /*
3342 IMHO the below condition (if()) makes no sense. I can't see how it
3343 could be possible that free_block() is entered with a NULL hash_link
3344 pointer. The only place where it can become NULL is in free_block()
3345 (or before its first use ever, but for those blocks free_block() is
3346 not called). I don't remove the conditional as it cannot harm, but
3347 place an DBUG_ASSERT to confirm my hypothesis. Eventually the
3348 condition (if()) can be removed.
3349 */
3350 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3351 if (block->hash_link)
3352 {
3353 /*
3354 While waiting for readers to finish, new readers might request the
3355 block. But since we set block->status|= BLOCK_REASSIGNED, they
3356 will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
3357 later.
3358 */
3359 block->status|= BLOCK_REASSIGNED;
3360 wait_for_readers(keycache, block);
3361 /*
3362 The block must not have been freed by another thread. Repeat some
3363 checks. An additional requirement is that it must be read now
3364 (BLOCK_READ).
3365 */
3366 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3367 DBUG_ASSERT((block->status & (BLOCK_READ | BLOCK_IN_USE |
3368 BLOCK_REASSIGNED)) &&
3369 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3370 BLOCK_IN_FLUSH | BLOCK_CHANGED |
3371 BLOCK_FOR_UPDATE)));
3372 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3373 DBUG_ASSERT(!block->prev_used);
3374 /*
3375 Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
3376 thread (through unreg_request() below), other threads must not see
3377 this flag. They could become confused.
3378 */
3379 block->status&= ~BLOCK_REASSIGNED;
3380 /*
3381 Do not release the hash_link until the block is off all lists.
3382 At least not if we hand it over for eviction in unreg_request().
3383 */
3384 }
3385
3386 /*
3387 Unregister the block request and link the block into the LRU ring.
3388 This enables eviction for the block. If the LRU ring was empty and
3389 threads are waiting for a block, then the block wil be handed over
3390 for eviction immediately. Otherwise we will unlink it from the LRU
3391 ring again, without releasing the lock in between. So decrementing
3392 the request counter and updating statistics are the only relevant
3393 operation in this case. Assert that there are no other requests
3394 registered.
3395 */
3396 DBUG_ASSERT(block->requests == 1);
3397 unreg_request(keycache, block, 0);
3398 /*
3399 Note that even without releasing the cache lock it is possible that
3400 the block is immediately selected for eviction by link_block() and
3401 thus not added to the LRU ring. In this case we must not touch the
3402 block any more.
3403 */
3404 if (block->status & BLOCK_IN_EVICTION)
3405 return;
3406
3407 /* Error blocks are not put into the LRU ring. */
3408 if (!(block->status & BLOCK_ERROR))
3409 {
3410 /* Here the block must be in the LRU ring. Unlink it again. */
3411 DBUG_ASSERT(block->next_used && block->prev_used &&
3412 *block->prev_used == block);
3413 unlink_block(keycache, block);
3414 }
3415 if (block->temperature == BLOCK_WARM)
3416 keycache->warm_blocks--;
3417 block->temperature= BLOCK_COLD;
3418
3419 /* Remove from file_blocks hash. */
3420 unlink_changed(block);
3421
3422 /* Remove reference to block from hash table. */
3423 unlink_hash(keycache, block->hash_link);
3424 block->hash_link= NULL;
3425
3426 block->status= 0;
3427 block->length= 0;
3428 block->offset= keycache->key_cache_block_size;
3429 KEYCACHE_THREAD_TRACE("free block");
3430 KEYCACHE_DBUG_PRINT("free_block", ("block is freed"));
3431
3432 /* Enforced by unlink_changed(), but just to be sure. */
3433 DBUG_ASSERT(!block->next_changed && !block->prev_changed);
3434 /* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3435 DBUG_ASSERT(!block->next_used && !block->prev_used);
3436 /* Insert the free block in the free list. */
3437 block->next_used= keycache->free_block_list;
3438 keycache->free_block_list= block;
3439 /* Keep track of the number of currently unused blocks. */
3440 keycache->blocks_unused++;
3441
3442 /* All pending requests for this page must be resubmitted. */
3443 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3444 }
3445
3446
cmp_sec_link(BLOCK_LINK ** a,BLOCK_LINK ** b)3447 static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3448 {
3449 return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3450 ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3451 }
3452
3453
3454 /*
3455 Flush a portion of changed blocks to disk,
3456 free used blocks if requested
3457 */
3458
flush_cached_blocks(KEY_CACHE * keycache,File file,BLOCK_LINK ** cache,BLOCK_LINK ** end,enum flush_type type)3459 static int flush_cached_blocks(KEY_CACHE *keycache,
3460 File file, BLOCK_LINK **cache,
3461 BLOCK_LINK **end,
3462 enum flush_type type)
3463 {
3464 int error;
3465 int last_errno= 0;
3466 uint count= (uint) (end-cache);
3467
3468 /* Don't lock the cache during the flush */
3469 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3470 /*
3471 As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3472 we are guarunteed no thread will change them
3473 */
3474 my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3475
3476 keycache_pthread_mutex_lock(&keycache->cache_lock);
3477 /*
3478 Note: Do not break the loop. We have registered a request on every
3479 block in 'cache'. These must be unregistered by free_block() or
3480 unreg_request().
3481 */
3482 for ( ; cache != end ; cache++)
3483 {
3484 BLOCK_LINK *block= *cache;
3485
3486 KEYCACHE_DBUG_PRINT("flush_cached_blocks",
3487 ("block %u to be flushed", BLOCK_NUMBER(block)));
3488 /*
3489 If the block contents is going to be changed, we abandon the flush
3490 for this block. flush_key_blocks_int() will restart its search and
3491 handle the block properly.
3492 */
3493 if (!(block->status & BLOCK_FOR_UPDATE))
3494 {
3495 /* Blocks coming here must have a certain status. */
3496 DBUG_ASSERT(block->hash_link);
3497 DBUG_ASSERT(block->hash_link->block == block);
3498 DBUG_ASSERT(block->hash_link->file == file);
3499 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
3500 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3501 block->status|= BLOCK_IN_FLUSHWRITE;
3502 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3503 error= my_pwrite(file, block->buffer+block->offset,
3504 block->length - block->offset,
3505 block->hash_link->diskpos+ block->offset,
3506 MYF(MY_NABP | MY_WAIT_IF_FULL));
3507 keycache_pthread_mutex_lock(&keycache->cache_lock);
3508 keycache->global_cache_write++;
3509 if (error)
3510 {
3511 block->status|= BLOCK_ERROR;
3512 if (!last_errno)
3513 last_errno= errno ? errno : -1;
3514 }
3515 block->status&= ~BLOCK_IN_FLUSHWRITE;
3516 /* Block must not have changed status except BLOCK_FOR_UPDATE. */
3517 DBUG_ASSERT(block->hash_link);
3518 DBUG_ASSERT(block->hash_link->block == block);
3519 DBUG_ASSERT(block->hash_link->file == file);
3520 DBUG_ASSERT((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3521 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3522 /*
3523 Set correct status and link in right queue for free or later use.
3524 free_block() must not see BLOCK_CHANGED and it may need to wait
3525 for readers of the block. These should not see the block in the
3526 wrong hash. If not freeing the block, we need to have it in the
3527 right queue anyway.
3528 */
3529 link_to_file_list(keycache, block, file, 1);
3530 }
3531 block->status&= ~BLOCK_IN_FLUSH;
3532 /*
3533 Let to proceed for possible waiting requests to write to the block page.
3534 It might happen only during an operation to resize the key cache.
3535 */
3536 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3537 /* type will never be FLUSH_IGNORE_CHANGED here */
3538 if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3539 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3540 BLOCK_FOR_UPDATE)))
3541 {
3542 /*
3543 Note that a request has been registered against the block in
3544 flush_key_blocks_int().
3545 */
3546 free_block(keycache, block);
3547 }
3548 else
3549 {
3550 /*
3551 Link the block into the LRU ring if it's the last submitted
3552 request for the block. This enables eviction for the block.
3553 Note that a request has been registered against the block in
3554 flush_key_blocks_int().
3555 */
3556 unreg_request(keycache, block, 1);
3557 }
3558
3559 } /* end of for ( ; cache != end ; cache++) */
3560 return last_errno;
3561 }
3562
3563
3564 /*
3565 Flush all key blocks for a file to disk, but don't do any mutex locks.
3566
3567 SYNOPSIS
3568 flush_key_blocks_int()
3569 keycache pointer to a key cache data structure
3570 file handler for the file to flush to
3571 flush_type type of the flush
3572
3573 NOTES
3574 This function doesn't do any mutex locks because it needs to be called both
3575 from flush_key_blocks and flush_all_key_blocks (the later one does the
3576 mutex lock in the resize_key_cache() function).
3577
3578 We do only care about changed blocks that exist when the function is
3579 entered. We do not guarantee that all changed blocks of the file are
3580 flushed if more blocks change while this function is running.
3581
3582 RETURN
3583 0 ok
3584 1 error
3585 */
3586
flush_key_blocks_int(KEY_CACHE * keycache,File file,enum flush_type type)3587 static int flush_key_blocks_int(KEY_CACHE *keycache,
3588 File file, enum flush_type type)
3589 {
3590 BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3591 int last_errno= 0;
3592 int last_errcnt= 0;
3593 DBUG_ENTER("flush_key_blocks_int");
3594 DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu",
3595 file, keycache->blocks_used, keycache->blocks_changed));
3596
3597 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3598 DBUG_EXECUTE("check_keycache",
3599 test_key_cache(keycache, "start of flush_key_blocks", 0););
3600 #endif
3601
3602 cache= cache_buff;
3603 if (keycache->disk_blocks > 0 &&
3604 (!my_disable_flush_key_blocks || type != FLUSH_KEEP))
3605 {
3606 /* Key cache exists and flush is not disabled */
3607 int error= 0;
3608 uint count= FLUSH_CACHE;
3609 BLOCK_LINK **pos,**end;
3610 BLOCK_LINK *first_in_switch= NULL;
3611 BLOCK_LINK *last_in_flush;
3612 BLOCK_LINK *last_for_update;
3613 BLOCK_LINK *block, *next;
3614 #if defined(KEYCACHE_DEBUG)
3615 uint cnt=0;
3616 #endif
3617
3618 if (type != FLUSH_IGNORE_CHANGED)
3619 {
3620 /*
3621 Count how many key blocks we have to cache to be able
3622 to flush all dirty pages with minimum seek moves
3623 */
3624 count= 0;
3625 for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3626 block ;
3627 block= block->next_changed)
3628 {
3629 if ((block->hash_link->file == file) &&
3630 !(block->status & BLOCK_IN_FLUSH))
3631 {
3632 count++;
3633 KEYCACHE_DBUG_ASSERT(count<= keycache->blocks_used);
3634 }
3635 }
3636 /*
3637 Allocate a new buffer only if its bigger than the one we have.
3638 Assure that we always have some entries for the case that new
3639 changed blocks appear while we need to wait for something.
3640 */
3641 if ((count > FLUSH_CACHE) &&
3642 !(cache= (BLOCK_LINK**) my_malloc(sizeof(BLOCK_LINK*)*count,
3643 MYF(0))))
3644 cache= cache_buff;
3645 /*
3646 After a restart there could be more changed blocks than now.
3647 So we should not let count become smaller than the fixed buffer.
3648 */
3649 if (cache == cache_buff)
3650 count= FLUSH_CACHE;
3651 }
3652
3653 /* Retrieve the blocks and write them to a buffer to be flushed */
3654 restart:
3655 last_in_flush= NULL;
3656 last_for_update= NULL;
3657 end= (pos= cache)+count;
3658 for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3659 block ;
3660 block= next)
3661 {
3662 #if defined(KEYCACHE_DEBUG)
3663 cnt++;
3664 KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3665 #endif
3666 next= block->next_changed;
3667 if (block->hash_link->file == file)
3668 {
3669 if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3670 {
3671 /*
3672 Note: The special handling of BLOCK_IN_SWITCH is obsolete
3673 since we set BLOCK_IN_FLUSH if the eviction includes a
3674 flush. It can be removed in a later version.
3675 */
3676 if (!(block->status & BLOCK_IN_SWITCH))
3677 {
3678 /*
3679 We care only for the blocks for which flushing was not
3680 initiated by another thread and which are not in eviction.
3681 Registering a request on the block unlinks it from the LRU
3682 ring and protects against eviction.
3683 */
3684 reg_requests(keycache, block, 1);
3685 if (type != FLUSH_IGNORE_CHANGED)
3686 {
3687 /* It's not a temporary file */
3688 if (pos == end)
3689 {
3690 /*
3691 This should happen relatively seldom. Remove the
3692 request because we won't do anything with the block
3693 but restart and pick it again in the next iteration.
3694 */
3695 unreg_request(keycache, block, 0);
3696 /*
3697 This happens only if there is not enough
3698 memory for the big block
3699 */
3700 if ((error= flush_cached_blocks(keycache, file, cache,
3701 end,type)))
3702 {
3703 /* Do not loop infinitely trying to flush in vain. */
3704 if ((last_errno == error) && (++last_errcnt > 5))
3705 goto err;
3706 last_errno= error;
3707 }
3708 /*
3709 Restart the scan as some other thread might have changed
3710 the changed blocks chain: the blocks that were in switch
3711 state before the flush started have to be excluded
3712 */
3713 goto restart;
3714 }
3715 /*
3716 Mark the block with BLOCK_IN_FLUSH in order not to let
3717 other threads to use it for new pages and interfere with
3718 our sequence of flushing dirty file pages. We must not
3719 set this flag before actually putting the block on the
3720 write burst array called 'cache'.
3721 */
3722 block->status|= BLOCK_IN_FLUSH;
3723 /* Add block to the array for a write burst. */
3724 *pos++= block;
3725 }
3726 else
3727 {
3728 /* It's a temporary file */
3729 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3730 /*
3731 free_block() must not be called with BLOCK_CHANGED. Note
3732 that we must not change the BLOCK_CHANGED flag outside of
3733 link_to_file_list() so that it is always in the correct
3734 queue and the *blocks_changed counters are correct.
3735 */
3736 link_to_file_list(keycache, block, file, 1);
3737 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3738 {
3739 /* A request has been registered against the block above. */
3740 free_block(keycache, block);
3741 }
3742 else
3743 {
3744 /*
3745 Link the block into the LRU ring if it's the last
3746 submitted request for the block. This enables eviction
3747 for the block. A request has been registered against
3748 the block above.
3749 */
3750 unreg_request(keycache, block, 1);
3751 }
3752 }
3753 }
3754 else
3755 {
3756 /*
3757 Link the block into a list of blocks 'in switch'.
3758
3759 WARNING: Here we introduce a place where a changed block
3760 is not in the changed_blocks hash! This is acceptable for
3761 a BLOCK_IN_SWITCH. Never try this for another situation.
3762 Other parts of the key cache code rely on changed blocks
3763 being in the changed_blocks hash.
3764 */
3765 unlink_changed(block);
3766 link_changed(block, &first_in_switch);
3767 }
3768 }
3769 else if (type != FLUSH_KEEP)
3770 {
3771 /*
3772 During the normal flush at end of statement (FLUSH_KEEP) we
3773 do not need to ensure that blocks in flush or update by
3774 other threads are flushed. They will be flushed by them
3775 later. In all other cases we must assure that we do not have
3776 any changed block of this file in the cache when this
3777 function returns.
3778 */
3779 if (block->status & BLOCK_IN_FLUSH)
3780 {
3781 /* Remember the last block found to be in flush. */
3782 last_in_flush= block;
3783 }
3784 else
3785 {
3786 /* Remember the last block found to be selected for update. */
3787 last_for_update= block;
3788 }
3789 }
3790 }
3791 }
3792 if (pos != cache)
3793 {
3794 if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
3795 {
3796 /* Do not loop inifnitely trying to flush in vain. */
3797 if ((last_errno == error) && (++last_errcnt > 5))
3798 goto err;
3799 last_errno= error;
3800 }
3801 /*
3802 Do not restart here during the normal flush at end of statement
3803 (FLUSH_KEEP). We have now flushed at least all blocks that were
3804 changed when entering this function. In all other cases we must
3805 assure that we do not have any changed block of this file in the
3806 cache when this function returns.
3807 */
3808 if (type != FLUSH_KEEP)
3809 goto restart;
3810 }
3811 if (last_in_flush)
3812 {
3813 /*
3814 There are no blocks to be flushed by this thread, but blocks in
3815 flush by other threads. Wait until one of the blocks is flushed.
3816 Re-check the condition for last_in_flush. We may have unlocked
3817 the cache_lock in flush_cached_blocks(). The state of the block
3818 could have changed.
3819 */
3820 if (last_in_flush->status & BLOCK_IN_FLUSH)
3821 wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3822 &keycache->cache_lock);
3823 /* Be sure not to lose a block. They may be flushed in random order. */
3824 goto restart;
3825 }
3826 if (last_for_update)
3827 {
3828 /*
3829 There are no blocks to be flushed by this thread, but blocks for
3830 update by other threads. Wait until one of the blocks is updated.
3831 Re-check the condition for last_for_update. We may have unlocked
3832 the cache_lock in flush_cached_blocks(). The state of the block
3833 could have changed.
3834 */
3835 if (last_for_update->status & BLOCK_FOR_UPDATE)
3836 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3837 &keycache->cache_lock);
3838 /* The block is now changed. Flush it. */
3839 goto restart;
3840 }
3841
3842 /*
3843 Wait until the list of blocks in switch is empty. The threads that
3844 are switching these blocks will relink them to clean file chains
3845 while we wait and thus empty the 'first_in_switch' chain.
3846 */
3847 while (first_in_switch)
3848 {
3849 #if defined(KEYCACHE_DEBUG)
3850 cnt= 0;
3851 #endif
3852 wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3853 &keycache->cache_lock);
3854 #if defined(KEYCACHE_DEBUG)
3855 cnt++;
3856 KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3857 #endif
3858 /*
3859 Do not restart here. We have flushed all blocks that were
3860 changed when entering this function and were not marked for
3861 eviction. Other threads have now flushed all remaining blocks in
3862 the course of their eviction.
3863 */
3864 }
3865
3866 if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3867 {
3868 BLOCK_LINK *last_for_update= NULL;
3869 BLOCK_LINK *last_in_switch= NULL;
3870 uint total_found= 0;
3871 uint found;
3872
3873 /*
3874 Finally free all clean blocks for this file.
3875 During resize this may be run by two threads in parallel.
3876 */
3877 do
3878 {
3879 found= 0;
3880 for (block= keycache->file_blocks[FILE_HASH(file)] ;
3881 block ;
3882 block= next)
3883 {
3884 /* Remember the next block. After freeing we cannot get at it. */
3885 next= block->next_changed;
3886
3887 /* Changed blocks cannot appear in the file_blocks hash. */
3888 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
3889 if (block->hash_link->file == file)
3890 {
3891 /* We must skip blocks that will be changed. */
3892 if (block->status & BLOCK_FOR_UPDATE)
3893 {
3894 last_for_update= block;
3895 continue;
3896 }
3897
3898 /*
3899 We must not free blocks in eviction (BLOCK_IN_EVICTION |
3900 BLOCK_IN_SWITCH) or blocks intended to be freed
3901 (BLOCK_REASSIGNED).
3902 */
3903 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3904 BLOCK_REASSIGNED)))
3905 {
3906 struct st_hash_link *UNINIT_VAR(next_hash_link);
3907 my_off_t UNINIT_VAR(next_diskpos);
3908 File UNINIT_VAR(next_file);
3909 uint UNINIT_VAR(next_status);
3910 uint UNINIT_VAR(hash_requests);
3911
3912 total_found++;
3913 found++;
3914 KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used);
3915
3916 /*
3917 Register a request. This unlinks the block from the LRU
3918 ring and protects it against eviction. This is required
3919 by free_block().
3920 */
3921 reg_requests(keycache, block, 1);
3922
3923 /*
3924 free_block() may need to wait for readers of the block.
3925 This is the moment where the other thread can move the
3926 'next' block from the chain. free_block() needs to wait
3927 if there are requests for the block pending.
3928 */
3929 if (next && (hash_requests= block->hash_link->requests))
3930 {
3931 /* Copy values from the 'next' block and its hash_link. */
3932 next_status= next->status;
3933 next_hash_link= next->hash_link;
3934 next_diskpos= next_hash_link->diskpos;
3935 next_file= next_hash_link->file;
3936 DBUG_ASSERT(next == next_hash_link->block);
3937 }
3938
3939 free_block(keycache, block);
3940 /*
3941 If we had to wait and the state of the 'next' block
3942 changed, break the inner loop. 'next' may no longer be
3943 part of the current chain.
3944
3945 We do not want to break the loop after every free_block(),
3946 not even only after waits. The chain might be quite long
3947 and contain blocks for many files. Traversing it again and
3948 again to find more blocks for this file could become quite
3949 inefficient.
3950 */
3951 if (next && hash_requests &&
3952 ((next_status != next->status) ||
3953 (next_hash_link != next->hash_link) ||
3954 (next_file != next_hash_link->file) ||
3955 (next_diskpos != next_hash_link->diskpos) ||
3956 (next != next_hash_link->block)))
3957 break;
3958 }
3959 else
3960 {
3961 last_in_switch= block;
3962 }
3963 }
3964 } /* end for block in file_blocks */
3965 } while (found);
3966
3967 /*
3968 If any clean block has been found, we may have waited for it to
3969 become free. In this case it could be possible that another clean
3970 block became dirty. This is possible if the write request existed
3971 before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3972 */
3973 if (total_found)
3974 goto restart;
3975
3976 /*
3977 To avoid an infinite loop, wait until one of the blocks marked
3978 for update is updated.
3979 */
3980 if (last_for_update)
3981 {
3982 /* We did not wait. Block must not have changed status. */
3983 DBUG_ASSERT(last_for_update->status & BLOCK_FOR_UPDATE);
3984 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3985 &keycache->cache_lock);
3986 goto restart;
3987 }
3988
3989 /*
3990 To avoid an infinite loop wait until one of the blocks marked
3991 for eviction is switched.
3992 */
3993 if (last_in_switch)
3994 {
3995 /* We did not wait. Block must not have changed status. */
3996 DBUG_ASSERT(last_in_switch->status & (BLOCK_IN_EVICTION |
3997 BLOCK_IN_SWITCH |
3998 BLOCK_REASSIGNED));
3999 wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
4000 &keycache->cache_lock);
4001 goto restart;
4002 }
4003
4004 } /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
4005
4006 } /* if (keycache->disk_blocks > 0 */
4007
4008 #ifndef DBUG_OFF
4009 DBUG_EXECUTE("check_keycache",
4010 test_key_cache(keycache, "end of flush_key_blocks", 0););
4011 #endif
4012 err:
4013 if (cache != cache_buff)
4014 my_free(cache);
4015 if (last_errno)
4016 errno=last_errno; /* Return first error */
4017 DBUG_RETURN(last_errno != 0);
4018 }
4019
4020
4021 /*
4022 Flush all blocks for a file to disk
4023
4024 SYNOPSIS
4025
4026 flush_key_blocks()
4027 keycache pointer to a key cache data structure
4028 file handler for the file to flush to
4029 flush_type type of the flush
4030
4031 RETURN
4032 0 ok
4033 1 error
4034 */
4035
flush_key_blocks(KEY_CACHE * keycache,File file,enum flush_type type)4036 int flush_key_blocks(KEY_CACHE *keycache,
4037 File file, enum flush_type type)
4038 {
4039 int res= 0;
4040 DBUG_ENTER("flush_key_blocks");
4041 DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache));
4042
4043 if (!keycache->key_cache_inited)
4044 DBUG_RETURN(0);
4045
4046 keycache_pthread_mutex_lock(&keycache->cache_lock);
4047 /* While waiting for lock, keycache could have been ended. */
4048 if (keycache->disk_blocks > 0)
4049 {
4050 inc_counter_for_resize_op(keycache);
4051 res= flush_key_blocks_int(keycache, file, type);
4052 dec_counter_for_resize_op(keycache);
4053 }
4054 keycache_pthread_mutex_unlock(&keycache->cache_lock);
4055 DBUG_RETURN(res);
4056 }
4057
4058
4059 /*
4060 Flush all blocks in the key cache to disk.
4061
4062 SYNOPSIS
4063 flush_all_key_blocks()
4064 keycache pointer to key cache root structure
4065
4066 DESCRIPTION
4067
4068 Flushing of the whole key cache is done in two phases.
4069
4070 1. Flush all changed blocks, waiting for them if necessary. Loop
4071 until there is no changed block left in the cache.
4072
4073 2. Free all clean blocks. Normally this means free all blocks. The
4074 changed blocks were flushed in phase 1 and became clean. However we
4075 may need to wait for blocks that are read by other threads. While we
4076 wait, a clean block could become changed if that operation started
4077 before the resize operation started. To be safe we must restart at
4078 phase 1.
4079
4080 When we can run through the changed_blocks and file_blocks hashes
4081 without finding a block any more, then we are done.
4082
4083 Note that we hold keycache->cache_lock all the time unless we need
4084 to wait for something.
4085
4086 RETURN
4087 0 OK
4088 != 0 Error
4089 */
4090
flush_all_key_blocks(KEY_CACHE * keycache)4091 static int flush_all_key_blocks(KEY_CACHE *keycache)
4092 {
4093 BLOCK_LINK *block;
4094 uint total_found;
4095 uint found;
4096 uint idx;
4097 DBUG_ENTER("flush_all_key_blocks");
4098
4099 do
4100 {
4101 mysql_mutex_assert_owner(&keycache->cache_lock);
4102 total_found= 0;
4103
4104 /*
4105 Phase1: Flush all changed blocks, waiting for them if necessary.
4106 Loop until there is no changed block left in the cache.
4107 */
4108 do
4109 {
4110 found= 0;
4111 /* Step over the whole changed_blocks hash array. */
4112 for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4113 {
4114 /*
4115 If an array element is non-empty, use the first block from its
4116 chain to find a file for flush. All changed blocks for this
4117 file are flushed. So the same block will not appear at this
4118 place again with the next iteration. New writes for blocks are
4119 not accepted during the flush. If multiple files share the
4120 same hash bucket, one of them will be flushed per iteration
4121 of the outer loop of phase 1.
4122 */
4123 if ((block= keycache->changed_blocks[idx]))
4124 {
4125 found++;
4126 /*
4127 Flush dirty blocks but do not free them yet. They can be used
4128 for reading until all other blocks are flushed too.
4129 */
4130 if (flush_key_blocks_int(keycache, block->hash_link->file,
4131 FLUSH_FORCE_WRITE))
4132 DBUG_RETURN(1);
4133 }
4134 }
4135
4136 } while (found);
4137
4138 /*
4139 Phase 2: Free all clean blocks. Normally this means free all
4140 blocks. The changed blocks were flushed in phase 1 and became
4141 clean. However we may need to wait for blocks that are read by
4142 other threads. While we wait, a clean block could become changed
4143 if that operation started before the resize operation started. To
4144 be safe we must restart at phase 1.
4145 */
4146 do
4147 {
4148 found= 0;
4149 /* Step over the whole file_blocks hash array. */
4150 for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4151 {
4152 /*
4153 If an array element is non-empty, use the first block from its
4154 chain to find a file for flush. All blocks for this file are
4155 freed. So the same block will not appear at this place again
4156 with the next iteration. If multiple files share the
4157 same hash bucket, one of them will be flushed per iteration
4158 of the outer loop of phase 2.
4159 */
4160 if ((block= keycache->file_blocks[idx]))
4161 {
4162 total_found++;
4163 found++;
4164 if (flush_key_blocks_int(keycache, block->hash_link->file,
4165 FLUSH_RELEASE))
4166 DBUG_RETURN(1);
4167 }
4168 }
4169
4170 } while (found);
4171
4172 /*
4173 If any clean block has been found, we may have waited for it to
4174 become free. In this case it could be possible that another clean
4175 block became dirty. This is possible if the write request existed
4176 before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
4177 */
4178 } while (total_found);
4179
4180 #ifndef DBUG_OFF
4181 /* Now there should not exist any block any more. */
4182 for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4183 {
4184 DBUG_ASSERT(!keycache->changed_blocks[idx]);
4185 DBUG_ASSERT(!keycache->file_blocks[idx]);
4186 }
4187 #endif
4188
4189 DBUG_RETURN(0);
4190 }
4191
4192
4193 /*
4194 Reset the counters of a key cache.
4195
4196 SYNOPSIS
4197 reset_key_cache_counters()
4198 name the name of a key cache
4199 key_cache pointer to the key kache to be reset
4200
4201 DESCRIPTION
4202 This procedure is used by process_key_caches() to reset the counters of all
4203 currently used key caches, both the default one and the named ones.
4204
4205 RETURN
4206 0 on success (always because it can't fail)
4207 */
4208
reset_key_cache_counters(const char * name,KEY_CACHE * key_cache)4209 int reset_key_cache_counters(const char *name __attribute__((unused)),
4210 KEY_CACHE *key_cache)
4211 {
4212 DBUG_ENTER("reset_key_cache_counters");
4213 if (!key_cache->key_cache_inited)
4214 {
4215 DBUG_PRINT("info", ("Key cache %s not initialized.", name));
4216 DBUG_RETURN(0);
4217 }
4218 DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
4219
4220 key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
4221 key_cache->global_cache_r_requests= 0; /* Key_read_requests */
4222 key_cache->global_cache_read= 0; /* Key_reads */
4223 key_cache->global_cache_w_requests= 0; /* Key_write_requests */
4224 key_cache->global_cache_write= 0; /* Key_writes */
4225 DBUG_RETURN(0);
4226 }
4227
4228
4229 #ifndef DBUG_OFF
4230 /*
4231 Test if disk-cache is ok
4232 */
test_key_cache(KEY_CACHE * keycache,const char * where,my_bool lock)4233 static void test_key_cache(KEY_CACHE *keycache __attribute__((unused)),
4234 const char *where __attribute__((unused)),
4235 my_bool lock __attribute__((unused)))
4236 {
4237 /* TODO */
4238 }
4239 #endif
4240
4241 #if defined(KEYCACHE_TIMEOUT)
4242
4243 #define KEYCACHE_DUMP_FILE "keycache_dump.txt"
4244 #define MAX_QUEUE_LEN 100
4245
4246
keycache_dump(KEY_CACHE * keycache)4247 static void keycache_dump(KEY_CACHE *keycache)
4248 {
4249 FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
4250 struct st_my_thread_var *last;
4251 struct st_my_thread_var *thread;
4252 BLOCK_LINK *block;
4253 HASH_LINK *hash_link;
4254 KEYCACHE_PAGE *page;
4255 uint i;
4256
4257 fprintf(keycache_dump_file, "thread:%u\n", thread->id);
4258
4259 i=0;
4260 thread=last=waiting_for_hash_link.last_thread;
4261 fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
4262 if (thread)
4263 do
4264 {
4265 thread=thread->next;
4266 page= (KEYCACHE_PAGE *) thread->opt_info;
4267 fprintf(keycache_dump_file,
4268 "thread:%u, (file,filepos)=(%u,%lu)\n",
4269 thread->id,(uint) page->file,(ulong) page->filepos);
4270 if (++i == MAX_QUEUE_LEN)
4271 break;
4272 }
4273 while (thread != last);
4274
4275 i=0;
4276 thread=last=waiting_for_block.last_thread;
4277 fprintf(keycache_dump_file, "queue of threads waiting for block\n");
4278 if (thread)
4279 do
4280 {
4281 thread=thread->next;
4282 hash_link= (HASH_LINK *) thread->opt_info;
4283 fprintf(keycache_dump_file,
4284 "thread:%u hash_link:%u (file,filepos)=(%u,%lu)\n",
4285 thread->id, (uint) HASH_LINK_NUMBER(hash_link),
4286 (uint) hash_link->file,(ulong) hash_link->diskpos);
4287 if (++i == MAX_QUEUE_LEN)
4288 break;
4289 }
4290 while (thread != last);
4291
4292 for (i=0 ; i< keycache->blocks_used ; i++)
4293 {
4294 int j;
4295 block= &keycache->block_root[i];
4296 hash_link= block->hash_link;
4297 fprintf(keycache_dump_file,
4298 "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
4299 i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1),
4300 block->status, block->requests, block->condvar ? 1 : 0);
4301 for (j=0 ; j < 2; j++)
4302 {
4303 KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
4304 thread= last= wqueue->last_thread;
4305 fprintf(keycache_dump_file, "queue #%d\n", j);
4306 if (thread)
4307 {
4308 do
4309 {
4310 thread=thread->next;
4311 fprintf(keycache_dump_file,
4312 "thread:%u\n", thread->id);
4313 if (++i == MAX_QUEUE_LEN)
4314 break;
4315 }
4316 while (thread != last);
4317 }
4318 }
4319 }
4320 fprintf(keycache_dump_file, "LRU chain:");
4321 block= keycache= used_last;
4322 if (block)
4323 {
4324 do
4325 {
4326 block= block->next_used;
4327 fprintf(keycache_dump_file,
4328 "block:%u, ", BLOCK_NUMBER(block));
4329 }
4330 while (block != keycache->used_last);
4331 }
4332 fprintf(keycache_dump_file, "\n");
4333
4334 fclose(keycache_dump_file);
4335 }
4336
4337 #endif /* defined(KEYCACHE_TIMEOUT) */
4338
4339 #if defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)
4340
4341
keycache_pthread_cond_wait(mysql_cond_t * cond,mysql_mutex_t * mutex)4342 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4343 mysql_mutex_t *mutex)
4344 {
4345 int rc;
4346 struct timeval now; /* time when we started waiting */
4347 struct timespec timeout; /* timeout value for the wait function */
4348 struct timezone tz;
4349 #if defined(KEYCACHE_DEBUG)
4350 int cnt=0;
4351 #endif
4352
4353 /* Get current time */
4354 gettimeofday(&now, &tz);
4355 /* Prepare timeout value */
4356 timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
4357 /*
4358 timeval uses microseconds.
4359 timespec uses nanoseconds.
4360 1 nanosecond = 1000 micro seconds
4361 */
4362 timeout.tv_nsec= now.tv_usec * 1000;
4363 KEYCACHE_THREAD_TRACE_END("started waiting");
4364 #if defined(KEYCACHE_DEBUG)
4365 cnt++;
4366 if (cnt % 100 == 0)
4367 fprintf(keycache_debug_log, "waiting...\n");
4368 fflush(keycache_debug_log);
4369 #endif
4370 rc= mysql_cond_timedwait(cond, mutex, &timeout);
4371 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4372 if (rc == ETIMEDOUT || rc == ETIME)
4373 {
4374 #if defined(KEYCACHE_DEBUG)
4375 fprintf(keycache_debug_log,"aborted by keycache timeout\n");
4376 fclose(keycache_debug_log);
4377 abort();
4378 #endif
4379 keycache_dump();
4380 }
4381
4382 #if defined(KEYCACHE_DEBUG)
4383 KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
4384 #else
4385 assert(rc != ETIMEDOUT);
4386 #endif
4387 return rc;
4388 }
4389 #else
4390 #if defined(KEYCACHE_DEBUG)
keycache_pthread_cond_wait(mysql_cond_t * cond,mysql_mutex_t * mutex)4391 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4392 mysql_mutex_t *mutex)
4393 {
4394 int rc;
4395 KEYCACHE_THREAD_TRACE_END("started waiting");
4396 rc= mysql_cond_wait(cond, mutex);
4397 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4398 return rc;
4399 }
4400 #endif
4401 #endif /* defined(KEYCACHE_TIMEOUT) && !defined(__WIN__) */
4402
4403 #if defined(KEYCACHE_DEBUG)
4404
4405
keycache_pthread_mutex_lock(mysql_mutex_t * mutex)4406 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex)
4407 {
4408 int rc;
4409 rc= mysql_mutex_lock(mutex);
4410 KEYCACHE_THREAD_TRACE_BEGIN("");
4411 return rc;
4412 }
4413
4414
keycache_pthread_mutex_unlock(mysql_mutex_t * mutex)4415 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex)
4416 {
4417 KEYCACHE_THREAD_TRACE_END("");
4418 mysql_mutex_unlock(mutex);
4419 }
4420
4421
keycache_pthread_cond_signal(mysql_cond_t * cond)4422 static int keycache_pthread_cond_signal(mysql_cond_t *cond)
4423 {
4424 int rc;
4425 KEYCACHE_THREAD_TRACE("signal");
4426 rc= mysql_cond_signal(cond);
4427 return rc;
4428 }
4429
4430
4431 #if defined(KEYCACHE_DEBUG_LOG)
4432
4433
keycache_debug_print(const char * fmt,...)4434 static void keycache_debug_print(const char * fmt,...)
4435 {
4436 va_list args;
4437 va_start(args,fmt);
4438 if (keycache_debug_log)
4439 {
4440 (void) vfprintf(keycache_debug_log, fmt, args);
4441 (void) fputc('\n',keycache_debug_log);
4442 }
4443 va_end(args);
4444 }
4445 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4446
4447 #if defined(KEYCACHE_DEBUG_LOG)
4448
4449
keycache_debug_log_close(void)4450 void keycache_debug_log_close(void)
4451 {
4452 if (keycache_debug_log)
4453 fclose(keycache_debug_log);
4454 }
4455 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4456
4457 #endif /* defined(KEYCACHE_DEBUG) */
4458
4459 #if !defined(DBUG_OFF)
4460 #define F_B_PRT(_f_, _v_) DBUG_PRINT("assert_fail", (_f_, _v_))
4461
fail_block(BLOCK_LINK * block)4462 static int fail_block(BLOCK_LINK *block)
4463 {
4464 F_B_PRT("block->next_used: %lx\n", (ulong) block->next_used);
4465 F_B_PRT("block->prev_used: %lx\n", (ulong) block->prev_used);
4466 F_B_PRT("block->next_changed: %lx\n", (ulong) block->next_changed);
4467 F_B_PRT("block->prev_changed: %lx\n", (ulong) block->prev_changed);
4468 F_B_PRT("block->hash_link: %lx\n", (ulong) block->hash_link);
4469 F_B_PRT("block->status: %u\n", block->status);
4470 F_B_PRT("block->length: %u\n", block->length);
4471 F_B_PRT("block->offset: %u\n", block->offset);
4472 F_B_PRT("block->requests: %u\n", block->requests);
4473 F_B_PRT("block->temperature: %u\n", block->temperature);
4474 return 0; /* Let the assert fail. */
4475 }
4476
fail_hlink(HASH_LINK * hlink)4477 static int fail_hlink(HASH_LINK *hlink)
4478 {
4479 F_B_PRT("hlink->next: %lx\n", (ulong) hlink->next);
4480 F_B_PRT("hlink->prev: %lx\n", (ulong) hlink->prev);
4481 F_B_PRT("hlink->block: %lx\n", (ulong) hlink->block);
4482 F_B_PRT("hlink->diskpos: %lu\n", (ulong) hlink->diskpos);
4483 F_B_PRT("hlink->file: %d\n", hlink->file);
4484 return 0; /* Let the assert fail. */
4485 }
4486
cache_empty(KEY_CACHE * keycache)4487 static int cache_empty(KEY_CACHE *keycache)
4488 {
4489 int errcnt= 0;
4490 int idx;
4491 if (keycache->disk_blocks <= 0)
4492 return 1;
4493 for (idx= 0; idx < keycache->disk_blocks; idx++)
4494 {
4495 BLOCK_LINK *block= keycache->block_root + idx;
4496 if (block->status || block->requests || block->hash_link)
4497 {
4498 fprintf(stderr, "block index: %u\n", idx);
4499 fail_block(block);
4500 errcnt++;
4501 }
4502 }
4503 for (idx= 0; idx < keycache->hash_links; idx++)
4504 {
4505 HASH_LINK *hash_link= keycache->hash_link_root + idx;
4506 if (hash_link->requests || hash_link->block)
4507 {
4508 fprintf(stderr, "hash_link index: %u\n", idx);
4509 fail_hlink(hash_link);
4510 errcnt++;
4511 }
4512 }
4513 if (errcnt)
4514 {
4515 fprintf(stderr, "blocks: %d used: %lu\n",
4516 keycache->disk_blocks, keycache->blocks_used);
4517 fprintf(stderr, "hash_links: %d used: %d\n",
4518 keycache->hash_links, keycache->hash_links_used);
4519 fprintf(stderr, "\n");
4520 }
4521 return !errcnt;
4522 }
4523 #endif
4524
4525