1 /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 Without limiting anything contained in the foregoing, this file,
15 which is part of C Driver for MySQL (Connector/C), is also subject to the
16 Universal FOSS Exception, version 1.0, a copy of which can be found at
17 http://oss.oracle.com/licenses/universal-foss-exception.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License, version 2.0, for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
27
28 /**
29 @file
30 These functions handle keyblock cacheing for ISAM and MyISAM tables.
31
32 One cache can handle many files.
33 It must contain buffers of the same blocksize.
34 init_key_cache() should be used to init cache handler.
35
36 The free list (free_block_list) is a stack like structure.
37 When a block is freed by free_block(), it is pushed onto the stack.
38 When a new block is required it is first tried to pop one from the stack.
39 If the stack is empty, it is tried to get a never-used block from the pool.
40 If this is empty too, then a block is taken from the LRU ring, flushing it
41 to disk, if neccessary. This is handled in find_key_block().
42 With the new free list, the blocks can have three temperatures:
43 hot, warm and cold (which is free). This is remembered in the block header
44 by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
45 temperature is neccessary to correctly count the number of warm blocks,
46 which is required to decide when blocks are allowed to become hot. Whenever
47 a block is inserted to another (sub-)chain, we take the old and new
48 temperature into account to decide if we got one more or less warm block.
49 blocks_unused is the sum of never used blocks in the pool and of currently
50 free blocks. blocks_used is the number of blocks fetched from the pool and
51 as such gives the maximum number of in-use blocks at any time.
52 */
53
54 /*
55 Key Cache Locking
56 =================
57
58 All key cache locking is done with a single mutex per key cache:
59 keycache->cache_lock. This mutex is locked almost all the time
60 when executing code in this file (mf_keycache.c).
61 However it is released for I/O and some copy operations.
62
63 The cache_lock is also released when waiting for some event. Waiting
64 and signalling is done via condition variables. In most cases the
65 thread waits on its thread->suspend condition variable. Every thread
66 has a my_thread_var structure, which contains this variable and a
67 '*next' and '**prev' pointer. These pointers are used to insert the
68 thread into a wait queue.
69
70 A thread can wait for one block and thus be in one wait queue at a
71 time only.
72
73 Before starting to wait on its condition variable with
74 mysql_cond_wait(), the thread enters itself to a specific wait queue
75 with link_into_queue() (double linked with '*next' + '**prev') or
76 wait_on_queue() (single linked with '*next').
77
78 Another thread, when releasing a resource, looks up the waiting thread
79 in the related wait queue. It sends a signal with
80 mysql_cond_signal() to the waiting thread.
81
82 NOTE: Depending on the particular wait situation, either the sending
83 thread removes the waiting thread from the wait queue with
84 unlink_from_queue() or release_whole_queue() respectively, or the waiting
85 thread removes itself.
86
87 There is one exception from this locking scheme when one thread wants
88 to reuse a block for some other address. This works by first marking
89 the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all
90 threads that are reading the block to finish. Each block has a
91 reference to a condition variable (condvar). It holds a reference to
92 the thread->suspend condition variable for the waiting thread (if such
93 a thread exists). When that thread is signaled, the reference is
94 cleared. The number of readers of a block is registered in
95 block->hash_link->requests. See wait_for_readers() / remove_reader()
96 for details. This is similar to the above, but it clearly means that
97 only one thread can wait for a particular block. There is no queue in
98 this case. Strangely enough block->convar is used for waiting for the
99 assigned hash_link only. More precisely it is used to wait for all
100 requests to be unregistered from the assigned hash_link.
101
102 The resize_queue serves two purposes:
103 1. Threads that want to do a resize wait there if in_resize is set.
104 This is not used in the server. The server refuses a second resize
105 request if one is already active. keycache->in_init is used for the
106 synchronization. See set_var.cc.
107 2. Threads that want to access blocks during resize wait here during
108 the re-initialization phase.
109 When the resize is done, all threads on the queue are signalled.
110 Hypothetical resizers can compete for resizing, and read/write
111 requests will restart to request blocks from the freshly resized
112 cache. If the cache has been resized too small, it is disabled and
113 'can_be_used' is false. In this case read/write requests bypass the
114 cache. Since they increment and decrement 'cnt_for_resize_op', the
115 next resizer can wait on the queue 'waiting_for_resize_cnt' until all
116 I/O finished.
117 */
118
119 #include "mysys_priv.h"
120 #include "mysys_err.h"
121 #include <keycache.h>
122 #include "my_static.h"
123 #include <m_string.h>
124 #include <my_bit.h>
125 #include <errno.h>
126 #include <stdarg.h>
127 #include "probes_mysql.h"
128
129 /*
130 Some compilation flags have been added specifically for this module
131 to control the following:
132 - not to let a thread to yield the control when reading directly
133 from key cache, which might improve performance in many cases;
134 to enable this add:
135 #define SERIALIZED_READ_FROM_CACHE
136 - to set an upper bound for number of threads simultaneously
137 using the key cache; this setting helps to determine an optimal
138 size for hash table and improve performance when the number of
139 blocks in the key cache much less than the number of threads
140 accessing it;
141 to set this number equal to <N> add
142 #define MAX_THREADS <N>
143 - to substitute calls of mysql_cond_wait for calls of
144 mysql_cond_timedwait (wait with timeout set up);
145 this setting should be used only when you want to trap a deadlock
146 situation, which theoretically should not happen;
147 to set timeout equal to <T> seconds add
148 #define KEYCACHE_TIMEOUT <T>
149 - to enable the module traps and to send debug information from
150 key cache module to a special debug log add:
151 #define KEYCACHE_DEBUG
152 the name of this debug log file <LOG NAME> can be set through:
153 #define KEYCACHE_DEBUG_LOG <LOG NAME>
154 if the name is not defined, it's set by default;
155 if the KEYCACHE_DEBUG flag is not set up and we are in a debug
156 mode, i.e. when ! defined(DBUG_OFF), the debug information from the
157 module is sent to the regular debug log.
158
159 Example of the settings:
160 #define SERIALIZED_READ_FROM_CACHE
161 #define MAX_THREADS 100
162 #define KEYCACHE_TIMEOUT 1
163 #define KEYCACHE_DEBUG
164 #define KEYCACHE_DEBUG_LOG "my_key_cache_debug.log"
165 */
166
167 #define STRUCT_PTR(TYPE, MEMBER, a) \
168 (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
169
170 /* types of condition variables */
171 #define COND_FOR_REQUESTED 0
172 #define COND_FOR_SAVED 1
173 #define COND_FOR_READERS 2
174
175 typedef mysql_cond_t KEYCACHE_CONDVAR;
176
177 /* descriptor of the page in the key cache block buffer */
178 struct st_keycache_page
179 {
180 int file; /* file to which the page belongs to */
181 my_off_t filepos; /* position of the page in the file */
182 };
183
184 /* element in the chain of a hash table bucket */
185 struct st_hash_link
186 {
187 struct st_hash_link *next, **prev; /* to connect links in the same bucket */
188 struct st_block_link *block; /* reference to the block for the page: */
189 File file; /* from such a file */
190 my_off_t diskpos; /* with such an offset */
191 uint requests; /* number of requests for the page */
192 };
193
194 /* simple states of a block */
195 #define BLOCK_ERROR 1 /* an error occured when performing file i/o */
196 #define BLOCK_READ 2 /* file block is in the block buffer */
197 #define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */
198 #define BLOCK_REASSIGNED 8 /* blk does not accept requests for old page */
199 #define BLOCK_IN_FLUSH 16 /* block is selected for flush */
200 #define BLOCK_CHANGED 32 /* block buffer contains a dirty page */
201 #define BLOCK_IN_USE 64 /* block is not free */
202 #define BLOCK_IN_EVICTION 128 /* block is selected for eviction */
203 #define BLOCK_IN_FLUSHWRITE 256 /* block is in write to file */
204 #define BLOCK_FOR_UPDATE 512 /* block is selected for buffer modification */
205
206 /* page status, returned by find_key_block */
207 #define PAGE_READ 0
208 #define PAGE_TO_BE_READ 1
209 #define PAGE_WAIT_TO_BE_READ 2
210
211 /* block temperature determines in which (sub-)chain the block currently is */
212 enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };
213
214 /* key cache block */
215 struct st_block_link
216 {
217 struct st_block_link
218 *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
219 struct st_block_link
220 *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
221 struct st_hash_link *hash_link; /* backward ptr to referring hash_link */
222 KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */
223 uint requests; /* number of requests for the block */
224 uchar *buffer; /* buffer for the block page */
225 uint offset; /* beginning of modified data in the buffer */
226 uint length; /* end of data in the buffer */
227 uint status; /* state of the block */
228 enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
229 uint hits_left; /* number of hits left until promotion */
230 ulonglong last_hit_time; /* timestamp of the last hit */
231 KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
232 };
233
234 KEY_CACHE dflt_key_cache_var;
235 KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;
236
237 #define FLUSH_CACHE 2000 /* sort this many blocks at once */
238
239 static int flush_all_key_blocks(KEY_CACHE *keycache);
240
241 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
242 mysql_mutex_t *mutex);
243 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue);
244
245 static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block);
246 #if !defined(DBUG_OFF)
247 static void test_key_cache(KEY_CACHE *keycache,
248 const char *where, my_bool lock);
249 #endif
250
251 #define KEYCACHE_HASH(f, pos) \
252 (((ulong) ((pos) / keycache->key_cache_block_size) + \
253 (ulong) (f)) & (keycache->hash_entries-1))
254 #define FILE_HASH(f) ((uint) (f) & (CHANGED_BLOCKS_HASH-1))
255
256 #define DEFAULT_KEYCACHE_DEBUG_LOG "keycache_debug.log"
257
258 #if defined(KEYCACHE_DEBUG) && ! defined(KEYCACHE_DEBUG_LOG)
259 #define KEYCACHE_DEBUG_LOG DEFAULT_KEYCACHE_DEBUG_LOG
260 #endif
261
262 #if defined(KEYCACHE_DEBUG_LOG)
263 static FILE *keycache_debug_log=NULL;
264 static void keycache_debug_print(const char *fmt,...);
265 #define KEYCACHE_DEBUG_OPEN \
266 if (!keycache_debug_log) \
267 { \
268 keycache_debug_log= fopen(KEYCACHE_DEBUG_LOG, "w"); \
269 (void) setvbuf(keycache_debug_log, NULL, _IOLBF, BUFSIZ); \
270 }
271
272 #define KEYCACHE_DEBUG_CLOSE \
273 if (keycache_debug_log) \
274 { \
275 fclose(keycache_debug_log); \
276 keycache_debug_log= 0; \
277 }
278 #else
279 #define KEYCACHE_DEBUG_OPEN
280 #define KEYCACHE_DEBUG_CLOSE
281 #endif /* defined(KEYCACHE_DEBUG_LOG) */
282
283 #if defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG)
284 #define KEYCACHE_DBUG_PRINT(l, m) \
285 { if (keycache_debug_log) fprintf(keycache_debug_log, "%s: ", l); \
286 keycache_debug_print m; }
287
288 #define KEYCACHE_DBUG_ASSERT(a) \
289 { if (! (a) && keycache_debug_log) fclose(keycache_debug_log); \
290 assert(a); }
291 #else
292 #define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
293 #define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
294 #endif /* defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) */
295
296 #if defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF)
297
298 static long keycache_thread_id;
299 #define KEYCACHE_THREAD_TRACE(l) \
300 KEYCACHE_DBUG_PRINT(l,("|thread %ld",keycache_thread_id))
301
302 #define KEYCACHE_THREAD_TRACE_BEGIN(l) \
303 { struct st_my_thread_var *thread_var= my_thread_var; \
304 keycache_thread_id= thread_var->id; \
305 KEYCACHE_DBUG_PRINT(l,("[thread %ld",keycache_thread_id)) }
306
307 #define KEYCACHE_THREAD_TRACE_END(l) \
308 KEYCACHE_DBUG_PRINT(l,("]thread %ld",keycache_thread_id))
309 #else
310 #define KEYCACHE_THREAD_TRACE_BEGIN(l)
311 #define KEYCACHE_THREAD_TRACE_END(l)
312 #define KEYCACHE_THREAD_TRACE(l)
313 #endif /* defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF) */
314
315 #define BLOCK_NUMBER(b) \
316 ((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK)))
317 #define HASH_LINK_NUMBER(h) \
318 ((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK)))
319
320 #if (defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)) || defined(KEYCACHE_DEBUG)
321 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
322 mysql_mutex_t *mutex);
323 #else
324 #define keycache_pthread_cond_wait(C, M) mysql_cond_wait(C, M)
325 #endif
326
327 #if defined(KEYCACHE_DEBUG)
328 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex);
329 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex);
330 static int keycache_pthread_cond_signal(mysql_cond_t *cond);
331 #else
332 #define keycache_pthread_mutex_lock(M) mysql_mutex_lock(M)
333 #define keycache_pthread_mutex_unlock(M) mysql_mutex_unlock(M)
334 #define keycache_pthread_cond_signal(C) mysql_cond_signal(C)
335 #endif /* defined(KEYCACHE_DEBUG) */
336
337 #if !defined(DBUG_OFF)
338 #if defined(inline)
339 #undef inline
340 #endif
341 #define inline /* disabled inline for easier debugging */
342 static int fail_block(BLOCK_LINK *block);
343 static int fail_hlink(HASH_LINK *hlink);
344 static int cache_empty(KEY_CACHE *keycache);
345 #endif
346
next_power(uint value)347 static inline uint next_power(uint value)
348 {
349 return (uint) my_round_up_to_next_power((uint32) value) << 1;
350 }
351
352
353 /*
354 Initialize a key cache
355
356 SYNOPSIS
357 init_key_cache()
358 keycache pointer to a key cache data structure
359 key_cache_block_size size of blocks to keep cached data
360 use_mem total memory to use for the key cache
361 division_limit division limit (may be zero)
362 age_threshold age threshold (may be zero)
363
364 RETURN VALUE
365 number of blocks in the key cache, if successful,
366 0 - otherwise.
367
368 NOTES.
369 if keycache->key_cache_inited != 0 we assume that the key cache
370 is already initialized. This is for now used by myisamchk, but shouldn't
371 be something that a program should rely on!
372
373 It's assumed that no two threads call this function simultaneously
374 referring to the same key cache handle.
375
376 */
377
init_key_cache(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold)378 int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
379 size_t use_mem, uint division_limit,
380 uint age_threshold)
381 {
382 ulong blocks, hash_links;
383 size_t length;
384 int error;
385 DBUG_ENTER("init_key_cache");
386 DBUG_ASSERT(key_cache_block_size >= 512);
387
388 KEYCACHE_DEBUG_OPEN;
389 if (keycache->key_cache_inited && keycache->disk_blocks > 0)
390 {
391 DBUG_PRINT("warning",("key cache already in use"));
392 DBUG_RETURN(0);
393 }
394
395 keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
396 keycache->global_cache_read= keycache->global_cache_write= 0;
397 keycache->disk_blocks= -1;
398 if (! keycache->key_cache_inited)
399 {
400 keycache->key_cache_inited= 1;
401 /*
402 Initialize these variables once only.
403 Their value must survive re-initialization during resizing.
404 */
405 keycache->in_resize= 0;
406 keycache->resize_in_flush= 0;
407 keycache->cnt_for_resize_op= 0;
408 keycache->waiting_for_resize_cnt.last_thread= NULL;
409 keycache->in_init= 0;
410 mysql_mutex_init(key_KEY_CACHE_cache_lock,
411 &keycache->cache_lock, MY_MUTEX_INIT_FAST);
412 keycache->resize_queue.last_thread= NULL;
413 }
414
415 keycache->key_cache_mem_size= use_mem;
416 keycache->key_cache_block_size= key_cache_block_size;
417 DBUG_PRINT("info", ("key_cache_block_size: %u",
418 key_cache_block_size));
419
420 blocks= (ulong) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
421 sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
422 /* It doesn't make sense to have too few blocks (less than 8) */
423 if (blocks >= 8)
424 {
425 for ( ; ; )
426 {
427 /* Set my_hash_entries to the next bigger 2 power */
428 if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
429 keycache->hash_entries<<= 1;
430 hash_links= 2 * blocks;
431 #if defined(MAX_THREADS)
432 if (hash_links < MAX_THREADS + blocks - 1)
433 hash_links= MAX_THREADS + blocks - 1;
434 #endif
435 while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
436 ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
437 ALIGN_SIZE(sizeof(HASH_LINK*) *
438 keycache->hash_entries))) +
439 ((size_t) blocks * keycache->key_cache_block_size) > use_mem)
440 blocks--;
441 /* Allocate memory for cache page buffers */
442 if ((keycache->block_mem=
443 my_large_malloc((size_t) blocks * keycache->key_cache_block_size,
444 MYF(0))))
445 {
446 /*
447 Allocate memory for blocks, hash_links and hash entries;
448 For each block 2 hash links are allocated
449 */
450 if ((keycache->block_root= (BLOCK_LINK*) my_malloc(length,
451 MYF(0))))
452 break;
453 my_large_free(keycache->block_mem);
454 keycache->block_mem= 0;
455 }
456 if (blocks < 8)
457 {
458 my_errno= ENOMEM;
459 my_error(EE_OUTOFMEMORY, MYF(ME_FATALERROR),
460 blocks * keycache->key_cache_block_size);
461 goto err;
462 }
463 blocks= blocks / 4*3;
464 }
465 keycache->blocks_unused= blocks;
466 keycache->disk_blocks= (int) blocks;
467 keycache->hash_links= hash_links;
468 keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
469 ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
470 keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
471 ALIGN_SIZE((sizeof(HASH_LINK*) *
472 keycache->hash_entries)));
473 memset(keycache->block_root, 0,
474 keycache->disk_blocks * sizeof(BLOCK_LINK));
475 memset(keycache->hash_root, 0,
476 keycache->hash_entries * sizeof(HASH_LINK*));
477 memset(keycache->hash_link_root, 0,
478 keycache->hash_links * sizeof(HASH_LINK));
479 keycache->hash_links_used= 0;
480 keycache->free_hash_list= NULL;
481 keycache->blocks_used= keycache->blocks_changed= 0;
482
483 keycache->global_blocks_changed= 0;
484 keycache->blocks_available=0; /* For debugging */
485
486 /* The LRU chain is empty after initialization */
487 keycache->used_last= NULL;
488 keycache->used_ins= NULL;
489 keycache->free_block_list= NULL;
490 keycache->keycache_time= 0;
491 keycache->warm_blocks= 0;
492 keycache->min_warm_blocks= (division_limit ?
493 blocks * division_limit / 100 + 1 :
494 blocks);
495 keycache->age_threshold= (age_threshold ?
496 blocks * age_threshold / 100 :
497 blocks);
498
499 keycache->can_be_used= 1;
500
501 keycache->waiting_for_hash_link.last_thread= NULL;
502 keycache->waiting_for_block.last_thread= NULL;
503 DBUG_PRINT("exit",
504 ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\
505 hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx",
506 keycache->disk_blocks, (long) keycache->block_root,
507 keycache->hash_entries, (long) keycache->hash_root,
508 keycache->hash_links, (long) keycache->hash_link_root));
509 memset(keycache->changed_blocks, 0,
510 sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
511 memset(keycache->file_blocks, 0,
512 sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
513 }
514 else
515 {
516 /* key_buffer_size is specified too small. Disable the cache. */
517 keycache->can_be_used= 0;
518 }
519
520 keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
521 DBUG_RETURN((int) keycache->disk_blocks);
522
523 err:
524 error= my_errno;
525 keycache->disk_blocks= 0;
526 keycache->blocks= 0;
527 if (keycache->block_mem)
528 {
529 my_large_free((uchar*) keycache->block_mem);
530 keycache->block_mem= NULL;
531 }
532 if (keycache->block_root)
533 {
534 my_free(keycache->block_root);
535 keycache->block_root= NULL;
536 }
537 my_errno= error;
538 keycache->can_be_used= 0;
539 DBUG_RETURN(0);
540 }
541
542
543 /*
544 Resize a key cache
545
546 SYNOPSIS
547 resize_key_cache()
548 keycache pointer to a key cache data structure
549 key_cache_block_size size of blocks to keep cached data
550 use_mem total memory to use for the new key cache
551 division_limit new division limit (if not zero)
552 age_threshold new age threshold (if not zero)
553
554 RETURN VALUE
555 number of blocks in the key cache, if successful,
556 0 - otherwise.
557
558 NOTES.
559 The function first compares the memory size and the block size parameters
560 with the key cache values.
561
562 If they differ the function free the the memory allocated for the
563 old key cache blocks by calling the end_key_cache function and
564 then rebuilds the key cache with new blocks by calling
565 init_key_cache.
566
567 The function starts the operation only when all other threads
568 performing operations with the key cache let her to proceed
569 (when cnt_for_resize=0).
570 */
571
resize_key_cache(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold)572 int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
573 size_t use_mem, uint division_limit,
574 uint age_threshold)
575 {
576 int blocks;
577 DBUG_ENTER("resize_key_cache");
578
579 if (!keycache->key_cache_inited)
580 DBUG_RETURN(keycache->disk_blocks);
581
582 if(key_cache_block_size == keycache->key_cache_block_size &&
583 use_mem == keycache->key_cache_mem_size)
584 {
585 change_key_cache_param(keycache, division_limit, age_threshold);
586 DBUG_RETURN(keycache->disk_blocks);
587 }
588
589 keycache_pthread_mutex_lock(&keycache->cache_lock);
590
591 /*
592 We may need to wait for another thread which is doing a resize
593 already. This cannot happen in the MySQL server though. It allows
594 one resizer only. In set_var.cc keycache->in_init is used to block
595 multiple attempts.
596 */
597 while (keycache->in_resize)
598 {
599 /* purecov: begin inspected */
600 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
601 /* purecov: end */
602 }
603
604 /*
605 Mark the operation in progress. This blocks other threads from doing
606 a resize in parallel. It prohibits new blocks to enter the cache.
607 Read/write requests can bypass the cache during the flush phase.
608 */
609 keycache->in_resize= 1;
610
611 /* Need to flush only if keycache is enabled. */
612 if (keycache->can_be_used)
613 {
614 /* Start the flush phase. */
615 keycache->resize_in_flush= 1;
616
617 if (flush_all_key_blocks(keycache))
618 {
619 /* TODO: if this happens, we should write a warning in the log file ! */
620 keycache->resize_in_flush= 0;
621 blocks= 0;
622 keycache->can_be_used= 0;
623 goto finish;
624 }
625 DBUG_ASSERT(cache_empty(keycache));
626
627 /* End the flush phase. */
628 keycache->resize_in_flush= 0;
629 }
630
631 /*
632 Some direct read/write operations (bypassing the cache) may still be
633 unfinished. Wait until they are done. If the key cache can be used,
634 direct I/O is done in increments of key_cache_block_size. That is,
635 every block is checked if it is in the cache. We need to wait for
636 pending I/O before re-initializing the cache, because we may change
637 the block size. Otherwise they could check for blocks at file
638 positions where the new block division has none. We do also want to
639 wait for I/O done when (if) the cache was disabled. It must not
640 run in parallel with normal cache operation.
641 */
642 while (keycache->cnt_for_resize_op)
643 wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
644
645 /*
646 Free old cache structures, allocate new structures, and initialize
647 them. Note that the cache_lock mutex and the resize_queue are left
648 untouched. We do not lose the cache_lock and will release it only at
649 the end of this function.
650 */
651 end_key_cache(keycache, 0); /* Don't free mutex */
652 /* The following will work even if use_mem is 0 */
653 blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
654 division_limit, age_threshold);
655
656 finish:
657 /*
658 Mark the resize finished. This allows other threads to start a
659 resize or to request new cache blocks.
660 */
661 keycache->in_resize= 0;
662
663 /* Signal waiting threads. */
664 release_whole_queue(&keycache->resize_queue);
665
666 keycache_pthread_mutex_unlock(&keycache->cache_lock);
667 DBUG_RETURN(blocks);
668 }
669
670
671 /*
672 Increment counter blocking resize key cache operation
673 */
inc_counter_for_resize_op(KEY_CACHE * keycache)674 static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
675 {
676 keycache->cnt_for_resize_op++;
677 }
678
679
680 /*
681 Decrement counter blocking resize key cache operation;
682 Signal the operation to proceed when counter becomes equal zero
683 */
dec_counter_for_resize_op(KEY_CACHE * keycache)684 static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
685 {
686 if (!--keycache->cnt_for_resize_op)
687 release_whole_queue(&keycache->waiting_for_resize_cnt);
688 }
689
690 /*
691 Change the key cache parameters
692
693 SYNOPSIS
694 change_key_cache_param()
695 keycache pointer to a key cache data structure
696 division_limit new division limit (if not zero)
697 age_threshold new age threshold (if not zero)
698
699 RETURN VALUE
700 none
701
702 NOTES.
703 Presently the function resets the key cache parameters
704 concerning midpoint insertion strategy - division_limit and
705 age_threshold.
706 */
707
change_key_cache_param(KEY_CACHE * keycache,uint division_limit,uint age_threshold)708 void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
709 uint age_threshold)
710 {
711 DBUG_ENTER("change_key_cache_param");
712
713 keycache_pthread_mutex_lock(&keycache->cache_lock);
714 if (division_limit)
715 keycache->min_warm_blocks= (keycache->disk_blocks *
716 division_limit / 100 + 1);
717 if (age_threshold)
718 keycache->age_threshold= (keycache->disk_blocks *
719 age_threshold / 100);
720 keycache_pthread_mutex_unlock(&keycache->cache_lock);
721 DBUG_VOID_RETURN;
722 }
723
724
725 /*
726 Remove key_cache from memory
727
728 SYNOPSIS
729 end_key_cache()
730 keycache key cache handle
731 cleanup Complete free (Free also mutex for key cache)
732
733 RETURN VALUE
734 none
735 */
736
end_key_cache(KEY_CACHE * keycache,my_bool cleanup)737 void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
738 {
739 DBUG_ENTER("end_key_cache");
740 DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache));
741
742 if (!keycache->key_cache_inited)
743 DBUG_VOID_RETURN;
744
745 if (keycache->disk_blocks > 0)
746 {
747 if (keycache->block_mem)
748 {
749 my_large_free((uchar*) keycache->block_mem);
750 keycache->block_mem= NULL;
751 my_free(keycache->block_root);
752 keycache->block_root= NULL;
753 }
754 keycache->disk_blocks= -1;
755 /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
756 keycache->blocks_changed= 0;
757 }
758
759 DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
760 "writes: %lu r_requests: %lu reads: %lu",
761 keycache->blocks_used, keycache->global_blocks_changed,
762 (ulong) keycache->global_cache_w_requests,
763 (ulong) keycache->global_cache_write,
764 (ulong) keycache->global_cache_r_requests,
765 (ulong) keycache->global_cache_read));
766
767 /*
768 Reset these values to be able to detect a disabled key cache.
769 See Bug#44068 (RESTORE can disable the MyISAM Key Cache).
770 */
771 keycache->blocks_used= 0;
772 keycache->blocks_unused= 0;
773
774 if (cleanup)
775 {
776 mysql_mutex_destroy(&keycache->cache_lock);
777 keycache->key_cache_inited= keycache->can_be_used= 0;
778 KEYCACHE_DEBUG_CLOSE;
779 }
780 DBUG_VOID_RETURN;
781 } /* end_key_cache */
782
783
784 /*
785 Link a thread into double-linked queue of waiting threads.
786
787 SYNOPSIS
788 link_into_queue()
789 wqueue pointer to the queue structure
790 thread pointer to the thread to be added to the queue
791
792 RETURN VALUE
793 none
794
795 NOTES.
796 Queue is represented by a circular list of the thread structures
797 The list is double-linked of the type (**prev,*next), accessed by
798 a pointer to the last element.
799 */
800
link_into_queue(KEYCACHE_WQUEUE * wqueue,struct st_my_thread_var * thread)801 static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
802 struct st_my_thread_var *thread)
803 {
804 struct st_my_thread_var *last;
805
806 DBUG_ASSERT(!thread->next && !thread->prev);
807 if (! (last= wqueue->last_thread))
808 {
809 /* Queue is empty */
810 thread->next= thread;
811 thread->prev= &thread->next;
812 }
813 else
814 {
815 thread->prev= last->next->prev;
816 last->next->prev= &thread->next;
817 thread->next= last->next;
818 last->next= thread;
819 }
820 wqueue->last_thread= thread;
821 }
822
823 /*
824 Unlink a thread from double-linked queue of waiting threads
825
826 SYNOPSIS
827 unlink_from_queue()
828 wqueue pointer to the queue structure
829 thread pointer to the thread to be removed from the queue
830
831 RETURN VALUE
832 none
833
834 NOTES.
835 See NOTES for link_into_queue
836 */
837
unlink_from_queue(KEYCACHE_WQUEUE * wqueue,struct st_my_thread_var * thread)838 static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
839 struct st_my_thread_var *thread)
840 {
841 KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id));
842 DBUG_ASSERT(thread->next && thread->prev);
843 if (thread->next == thread)
844 /* The queue contains only one member */
845 wqueue->last_thread= NULL;
846 else
847 {
848 thread->next->prev= thread->prev;
849 *thread->prev=thread->next;
850 if (wqueue->last_thread == thread)
851 wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
852 thread->prev);
853 }
854 thread->next= NULL;
855 #if !defined(DBUG_OFF)
856 /*
857 This makes it easier to see it's not in a chain during debugging.
858 And some DBUG_ASSERT() rely on it.
859 */
860 thread->prev= NULL;
861 #endif
862 }
863
864
865 /*
866 Add a thread to single-linked queue of waiting threads
867
868 SYNOPSIS
869 wait_on_queue()
870 wqueue Pointer to the queue structure.
871 mutex Cache_lock to acquire after awake.
872
873 RETURN VALUE
874 none
875
876 NOTES.
877 Queue is represented by a circular list of the thread structures
878 The list is single-linked of the type (*next), accessed by a pointer
879 to the last element.
880
881 The function protects against stray signals by verifying that the
882 current thread is unlinked from the queue when awaking. However,
883 since several threads can wait for the same event, it might be
884 necessary for the caller of the function to check again if the
885 condition for awake is indeed matched.
886 */
887
wait_on_queue(KEYCACHE_WQUEUE * wqueue,mysql_mutex_t * mutex)888 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
889 mysql_mutex_t *mutex)
890 {
891 struct st_my_thread_var *last;
892 struct st_my_thread_var *thread= my_thread_var;
893
894 /* Add to queue. */
895 DBUG_ASSERT(!thread->next);
896 DBUG_ASSERT(!thread->prev); /* Not required, but must be true anyway. */
897 if (! (last= wqueue->last_thread))
898 thread->next= thread;
899 else
900 {
901 thread->next= last->next;
902 last->next= thread;
903 }
904 wqueue->last_thread= thread;
905
906 /*
907 Wait until thread is removed from queue by the signalling thread.
908 The loop protects against stray signals.
909 */
910 do
911 {
912 KEYCACHE_DBUG_PRINT("wait", ("suspend thread %ld", thread->id));
913 keycache_pthread_cond_wait(&thread->suspend, mutex);
914 }
915 while (thread->next);
916 }
917
918
919 /*
920 Remove all threads from queue signaling them to proceed
921
922 SYNOPSIS
923 release_whole_queue()
924 wqueue pointer to the queue structure
925
926 RETURN VALUE
927 none
928
929 NOTES.
930 See notes for wait_on_queue().
931 When removed from the queue each thread is signaled via condition
932 variable thread->suspend.
933 */
934
release_whole_queue(KEYCACHE_WQUEUE * wqueue)935 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
936 {
937 struct st_my_thread_var *last;
938 struct st_my_thread_var *next;
939 struct st_my_thread_var *thread;
940
941 /* Queue may be empty. */
942 if (!(last= wqueue->last_thread))
943 return;
944
945 next= last->next;
946 do
947 {
948 thread=next;
949 KEYCACHE_DBUG_PRINT("release_whole_queue: signal",
950 ("thread %ld", thread->id));
951 /* Signal the thread. */
952 keycache_pthread_cond_signal(&thread->suspend);
953 /* Take thread from queue. */
954 next=thread->next;
955 thread->next= NULL;
956 }
957 while (thread != last);
958
959 /* Now queue is definitely empty. */
960 wqueue->last_thread= NULL;
961 }
962
963
964 /*
965 Unlink a block from the chain of dirty/clean blocks
966 */
967
unlink_changed(BLOCK_LINK * block)968 static inline void unlink_changed(BLOCK_LINK *block)
969 {
970 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
971 if (block->next_changed)
972 block->next_changed->prev_changed= block->prev_changed;
973 *block->prev_changed= block->next_changed;
974
975 #if !defined(DBUG_OFF)
976 /*
977 This makes it easier to see it's not in a chain during debugging.
978 And some DBUG_ASSERT() rely on it.
979 */
980 block->next_changed= NULL;
981 block->prev_changed= NULL;
982 #endif
983 }
984
985
986 /*
987 Link a block into the chain of dirty/clean blocks
988 */
989
link_changed(BLOCK_LINK * block,BLOCK_LINK ** phead)990 static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
991 {
992 DBUG_ASSERT(!block->next_changed);
993 DBUG_ASSERT(!block->prev_changed);
994 block->prev_changed= phead;
995 if ((block->next_changed= *phead))
996 (*phead)->prev_changed= &block->next_changed;
997 *phead= block;
998 }
999
1000
1001 /*
1002 Link a block in a chain of clean blocks of a file.
1003
1004 SYNOPSIS
1005 link_to_file_list()
1006 keycache Key cache handle
1007 block Block to relink
1008 file File to be linked to
1009 unlink If to unlink first
1010
1011 DESCRIPTION
1012 Unlink a block from whichever chain it is linked in, if it's
1013 asked for, and link it to the chain of clean blocks of the
1014 specified file.
1015
1016 NOTE
1017 Please do never set/clear BLOCK_CHANGED outside of
1018 link_to_file_list() or link_to_changed_list().
1019 You would risk to damage correct counting of changed blocks
1020 and to find blocks in the wrong hash.
1021
1022 RETURN
1023 void
1024 */
1025
link_to_file_list(KEY_CACHE * keycache,BLOCK_LINK * block,int file,my_bool unlink_block)1026 static void link_to_file_list(KEY_CACHE *keycache,
1027 BLOCK_LINK *block, int file,
1028 my_bool unlink_block)
1029 {
1030 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1031 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1032 DBUG_ASSERT(block->hash_link->file == file);
1033 if (unlink_block)
1034 unlink_changed(block);
1035 link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
1036 if (block->status & BLOCK_CHANGED)
1037 {
1038 block->status&= ~BLOCK_CHANGED;
1039 keycache->blocks_changed--;
1040 keycache->global_blocks_changed--;
1041 }
1042 }
1043
1044
1045 /*
1046 Re-link a block from the clean chain to the dirty chain of a file.
1047
1048 SYNOPSIS
1049 link_to_changed_list()
1050 keycache key cache handle
1051 block block to relink
1052
1053 DESCRIPTION
1054 Unlink a block from the chain of clean blocks of a file
1055 and link it to the chain of dirty blocks of the same file.
1056
1057 NOTE
1058 Please do never set/clear BLOCK_CHANGED outside of
1059 link_to_file_list() or link_to_changed_list().
1060 You would risk to damage correct counting of changed blocks
1061 and to find blocks in the wrong hash.
1062
1063 RETURN
1064 void
1065 */
1066
link_to_changed_list(KEY_CACHE * keycache,BLOCK_LINK * block)1067 static void link_to_changed_list(KEY_CACHE *keycache,
1068 BLOCK_LINK *block)
1069 {
1070 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1071 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
1072 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1073
1074 unlink_changed(block);
1075 link_changed(block,
1076 &keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
1077 block->status|=BLOCK_CHANGED;
1078 keycache->blocks_changed++;
1079 keycache->global_blocks_changed++;
1080 }
1081
1082
1083 /*
1084 Link a block to the LRU chain at the beginning or at the end of
1085 one of two parts.
1086
1087 SYNOPSIS
1088 link_block()
1089 keycache pointer to a key cache data structure
1090 block pointer to the block to link to the LRU chain
1091 hot <-> to link the block into the hot subchain
1092 at_end <-> to link the block at the end of the subchain
1093
1094 RETURN VALUE
1095 none
1096
1097 NOTES.
1098 The LRU ring is represented by a circular list of block structures.
1099 The list is double-linked of the type (**prev,*next) type.
1100 The LRU ring is divided into two parts - hot and warm.
1101 There are two pointers to access the last blocks of these two
1102 parts. The beginning of the warm part follows right after the
1103 end of the hot part.
1104 Only blocks of the warm part can be used for eviction.
1105 The first block from the beginning of this subchain is always
1106 taken for eviction (keycache->last_used->next)
1107
1108 LRU chain: +------+ H O T +------+
1109 +----| end |----...<----| beg |----+
1110 | +------+last +------+ |
1111 v<-link in latest hot (new end) |
1112 | link in latest warm (new end)->^
1113 | +------+ W A R M +------+ |
1114 +----| beg |---->...----| end |----+
1115 +------+ +------+ins
1116 first for eviction
1117
1118 It is also possible that the block is selected for eviction and thus
1119 not linked in the LRU ring.
1120 */
1121
link_block(KEY_CACHE * keycache,BLOCK_LINK * block,my_bool hot,my_bool at_end)1122 static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot,
1123 my_bool at_end)
1124 {
1125 BLOCK_LINK *ins;
1126 BLOCK_LINK **pins;
1127
1128 DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1129 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1130 DBUG_ASSERT(!block->requests);
1131 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1132 DBUG_ASSERT(!block->next_used);
1133 DBUG_ASSERT(!block->prev_used);
1134
1135 if (!hot && keycache->waiting_for_block.last_thread)
1136 {
1137 /* Signal that in the LRU warm sub-chain an available block has appeared */
1138 struct st_my_thread_var *last_thread=
1139 keycache->waiting_for_block.last_thread;
1140 struct st_my_thread_var *first_thread= last_thread->next;
1141 struct st_my_thread_var *next_thread= first_thread;
1142 HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
1143 struct st_my_thread_var *thread;
1144 do
1145 {
1146 thread= next_thread;
1147 next_thread= thread->next;
1148 /*
1149 We notify about the event all threads that ask
1150 for the same page as the first thread in the queue
1151 */
1152 if ((HASH_LINK *) thread->opt_info == hash_link)
1153 {
1154 KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id));
1155 keycache_pthread_cond_signal(&thread->suspend);
1156 unlink_from_queue(&keycache->waiting_for_block, thread);
1157 block->requests++;
1158 }
1159 }
1160 while (thread != last_thread);
1161 hash_link->block= block;
1162 /*
1163 NOTE: We assigned the block to the hash_link and signalled the
1164 requesting thread(s). But it is possible that other threads runs
1165 first. These threads see the hash_link assigned to a block which
1166 is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
1167 This can be a problem for functions that do not select the block
1168 via its hash_link: flush and free. They do only see a block which
1169 is in a "normal" state and don't know that it will be evicted soon.
1170
1171 We cannot set BLOCK_IN_SWITCH here because only one of the
1172 requesting threads must handle the eviction. All others must wait
1173 for it to complete. If we set the flag here, the threads would not
1174 know who is in charge of the eviction. Without the flag, the first
1175 thread takes the stick and sets the flag.
1176
1177 But we need to note in the block that is has been selected for
1178 eviction. It must not be freed. The evicting thread will not
1179 expect the block in the free list. Before freeing we could also
1180 check if block->requests > 1. But I think including another flag
1181 in the check of block->status is slightly more efficient and
1182 probably easier to read.
1183 */
1184 block->status|= BLOCK_IN_EVICTION;
1185 KEYCACHE_THREAD_TRACE("link_block: after signaling");
1186 #if defined(KEYCACHE_DEBUG)
1187 KEYCACHE_DBUG_PRINT("link_block",
1188 ("linked,unlinked block %u status=%x #requests=%u #available=%u",
1189 BLOCK_NUMBER(block), block->status,
1190 block->requests, keycache->blocks_available));
1191 #endif
1192 return;
1193 }
1194
1195 pins= hot ? &keycache->used_ins : &keycache->used_last;
1196 ins= *pins;
1197 if (ins)
1198 {
1199 ins->next_used->prev_used= &block->next_used;
1200 block->next_used= ins->next_used;
1201 block->prev_used= &ins->next_used;
1202 ins->next_used= block;
1203 if (at_end)
1204 *pins= block;
1205 }
1206 else
1207 {
1208 /* The LRU ring is empty. Let the block point to itself. */
1209 keycache->used_last= keycache->used_ins= block->next_used= block;
1210 block->prev_used= &block->next_used;
1211 }
1212 KEYCACHE_THREAD_TRACE("link_block");
1213 #if defined(KEYCACHE_DEBUG)
1214 keycache->blocks_available++;
1215 KEYCACHE_DBUG_PRINT("link_block",
1216 ("linked block %u:%1u status=%x #requests=%u #available=%u",
1217 BLOCK_NUMBER(block), at_end, block->status,
1218 block->requests, keycache->blocks_available));
1219 KEYCACHE_DBUG_ASSERT((ulong) keycache->blocks_available <=
1220 keycache->blocks_used);
1221 #endif
1222 }
1223
1224
1225 /*
1226 Unlink a block from the LRU chain
1227
1228 SYNOPSIS
1229 unlink_block()
1230 keycache pointer to a key cache data structure
1231 block pointer to the block to unlink from the LRU chain
1232
1233 RETURN VALUE
1234 none
1235
1236 NOTES.
1237 See NOTES for link_block
1238 */
1239
unlink_block(KEY_CACHE * keycache,BLOCK_LINK * block)1240 static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1241 {
1242 DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1243 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1244 DBUG_ASSERT(!block->requests);
1245 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1246 DBUG_ASSERT(block->next_used && block->prev_used &&
1247 (block->next_used->prev_used == &block->next_used) &&
1248 (*block->prev_used == block));
1249 if (block->next_used == block)
1250 /* The list contains only one member */
1251 keycache->used_last= keycache->used_ins= NULL;
1252 else
1253 {
1254 block->next_used->prev_used= block->prev_used;
1255 *block->prev_used= block->next_used;
1256 if (keycache->used_last == block)
1257 keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1258 if (keycache->used_ins == block)
1259 keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1260 }
1261 block->next_used= NULL;
1262 #if !defined(DBUG_OFF)
1263 /*
1264 This makes it easier to see it's not in a chain during debugging.
1265 And some DBUG_ASSERT() rely on it.
1266 */
1267 block->prev_used= NULL;
1268 #endif
1269
1270 KEYCACHE_THREAD_TRACE("unlink_block");
1271 #if defined(KEYCACHE_DEBUG)
1272 KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
1273 keycache->blocks_available--;
1274 KEYCACHE_DBUG_PRINT("unlink_block",
1275 ("unlinked block %u status=%x #requests=%u #available=%u",
1276 BLOCK_NUMBER(block), block->status,
1277 block->requests, keycache->blocks_available));
1278 #endif
1279 }
1280
1281
1282 /*
1283 Register requests for a block.
1284
1285 SYNOPSIS
1286 reg_requests()
1287 keycache Pointer to a key cache data structure.
1288 block Pointer to the block to register a request on.
1289 count Number of requests. Always 1.
1290
1291 NOTE
1292 The first request unlinks the block from the LRU ring. This means
1293 that it is protected against eveiction.
1294
1295 RETURN
1296 void
1297 */
reg_requests(KEY_CACHE * keycache,BLOCK_LINK * block,int count)1298 static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1299 {
1300 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1301 DBUG_ASSERT(block->hash_link);
1302
1303 if (!block->requests)
1304 unlink_block(keycache, block);
1305 block->requests+=count;
1306 }
1307
1308
1309 /*
1310 Unregister request for a block
1311 linking it to the LRU chain if it's the last request
1312
1313 SYNOPSIS
1314 unreg_request()
1315 keycache pointer to a key cache data structure
1316 block pointer to the block to link to the LRU chain
1317 at_end <-> to link the block at the end of the LRU chain
1318
1319 RETURN VALUE
1320 none
1321
1322 NOTES.
1323 Every linking to the LRU ring decrements by one a special block
1324 counter (if it's positive). If the at_end parameter is TRUE the block is
1325 added either at the end of warm sub-chain or at the end of hot sub-chain.
1326 It is added to the hot subchain if its counter is zero and number of
1327 blocks in warm sub-chain is not less than some low limit (determined by
1328 the division_limit parameter). Otherwise the block is added to the warm
1329 sub-chain. If the at_end parameter is FALSE the block is always added
1330 at beginning of the warm sub-chain.
1331 Thus a warm block can be promoted to the hot sub-chain when its counter
1332 becomes zero for the first time.
1333 At the same time the block at the very beginning of the hot subchain
1334 might be moved to the beginning of the warm subchain if it stays untouched
1335 for a too long time (this time is determined by parameter age_threshold).
1336
1337 It is also possible that the block is selected for eviction and thus
1338 not linked in the LRU ring.
1339 */
1340
unreg_request(KEY_CACHE * keycache,BLOCK_LINK * block,int at_end)1341 static void unreg_request(KEY_CACHE *keycache,
1342 BLOCK_LINK *block, int at_end)
1343 {
1344 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1345 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1346 DBUG_ASSERT(block->requests);
1347 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1348 DBUG_ASSERT(!block->next_used);
1349 DBUG_ASSERT(!block->prev_used);
1350 /*
1351 Unregister the request, but do not link erroneous blocks into the
1352 LRU ring.
1353 */
1354 if (!--block->requests && !(block->status & BLOCK_ERROR))
1355 {
1356 my_bool hot;
1357 if (block->hits_left)
1358 block->hits_left--;
1359 hot= !block->hits_left && at_end &&
1360 keycache->warm_blocks > keycache->min_warm_blocks;
1361 if (hot)
1362 {
1363 if (block->temperature == BLOCK_WARM)
1364 keycache->warm_blocks--;
1365 block->temperature= BLOCK_HOT;
1366 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1367 keycache->warm_blocks));
1368 }
1369 link_block(keycache, block, hot, (my_bool)at_end);
1370 block->last_hit_time= keycache->keycache_time;
1371 keycache->keycache_time++;
1372 /*
1373 At this place, the block might be in the LRU ring or not. If an
1374 evicter was waiting for a block, it was selected for eviction and
1375 not linked in the LRU ring.
1376 */
1377
1378 /*
1379 Check if we should link a hot block to the warm block sub-chain.
1380 It is possible that we select the same block as above. But it can
1381 also be another block. In any case a block from the LRU ring is
1382 selected. In other words it works even if the above block was
1383 selected for eviction and not linked in the LRU ring. Since this
1384 happens only if the LRU ring is empty, the block selected below
1385 would be NULL and the rest of the function skipped.
1386 */
1387 block= keycache->used_ins;
1388 if (block && keycache->keycache_time - block->last_hit_time >
1389 keycache->age_threshold)
1390 {
1391 unlink_block(keycache, block);
1392 link_block(keycache, block, 0, 0);
1393 if (block->temperature != BLOCK_WARM)
1394 {
1395 keycache->warm_blocks++;
1396 block->temperature= BLOCK_WARM;
1397 }
1398 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1399 keycache->warm_blocks));
1400 }
1401 }
1402 }
1403
1404 /*
1405 Remove a reader of the page in block
1406 */
1407
remove_reader(BLOCK_LINK * block)1408 static void remove_reader(BLOCK_LINK *block)
1409 {
1410 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1411 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1412 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1413 DBUG_ASSERT(!block->next_used);
1414 DBUG_ASSERT(!block->prev_used);
1415 DBUG_ASSERT(block->hash_link->requests);
1416
1417 if (! --block->hash_link->requests && block->condvar)
1418 keycache_pthread_cond_signal(block->condvar);
1419 }
1420
1421
1422 /*
1423 Wait until the last reader of the page in block
1424 signals on its termination
1425 */
1426
wait_for_readers(KEY_CACHE * keycache,BLOCK_LINK * block)1427 static void wait_for_readers(KEY_CACHE *keycache,
1428 BLOCK_LINK *block)
1429 {
1430 struct st_my_thread_var *thread= my_thread_var;
1431 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1432 DBUG_ASSERT(!(block->status & (BLOCK_IN_FLUSH | BLOCK_CHANGED)));
1433 DBUG_ASSERT(block->hash_link);
1434 DBUG_ASSERT(block->hash_link->block == block);
1435 /* Linked in file_blocks or changed_blocks hash. */
1436 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1437 /* Not linked in LRU ring. */
1438 DBUG_ASSERT(!block->next_used);
1439 DBUG_ASSERT(!block->prev_used);
1440 while (block->hash_link->requests)
1441 {
1442 KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
1443 ("suspend thread %ld block %u",
1444 thread->id, BLOCK_NUMBER(block)));
1445 /* There must be no other waiter. We have no queue here. */
1446 DBUG_ASSERT(!block->condvar);
1447 block->condvar= &thread->suspend;
1448 keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1449 block->condvar= NULL;
1450 }
1451 }
1452
1453
1454 /*
1455 Add a hash link to a bucket in the hash_table
1456 */
1457
link_hash(HASH_LINK ** start,HASH_LINK * hash_link)1458 static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
1459 {
1460 if (*start)
1461 (*start)->prev= &hash_link->next;
1462 hash_link->next= *start;
1463 hash_link->prev= start;
1464 *start= hash_link;
1465 }
1466
1467
1468 /*
1469 Remove a hash link from the hash table
1470 */
1471
unlink_hash(KEY_CACHE * keycache,HASH_LINK * hash_link)1472 static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1473 {
1474 KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u",
1475 (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests));
1476 KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
1477 if ((*hash_link->prev= hash_link->next))
1478 hash_link->next->prev= hash_link->prev;
1479 hash_link->block= NULL;
1480
1481 if (keycache->waiting_for_hash_link.last_thread)
1482 {
1483 /* Signal that a free hash link has appeared */
1484 struct st_my_thread_var *last_thread=
1485 keycache->waiting_for_hash_link.last_thread;
1486 struct st_my_thread_var *first_thread= last_thread->next;
1487 struct st_my_thread_var *next_thread= first_thread;
1488 KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1489 struct st_my_thread_var *thread;
1490
1491 hash_link->file= first_page->file;
1492 hash_link->diskpos= first_page->filepos;
1493 do
1494 {
1495 KEYCACHE_PAGE *page;
1496 thread= next_thread;
1497 page= (KEYCACHE_PAGE *) thread->opt_info;
1498 next_thread= thread->next;
1499 /*
1500 We notify about the event all threads that ask
1501 for the same page as the first thread in the queue
1502 */
1503 if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1504 {
1505 KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
1506 keycache_pthread_cond_signal(&thread->suspend);
1507 unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1508 }
1509 }
1510 while (thread != last_thread);
1511 link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1512 hash_link->diskpos)],
1513 hash_link);
1514 return;
1515 }
1516 hash_link->next= keycache->free_hash_list;
1517 keycache->free_hash_list= hash_link;
1518 }
1519
1520
1521 /*
1522 Get the hash link for a page
1523 */
1524
get_hash_link(KEY_CACHE * keycache,int file,my_off_t filepos)1525 static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1526 int file, my_off_t filepos)
1527 {
1528 reg1 HASH_LINK *hash_link, **start;
1529 #if defined(KEYCACHE_DEBUG)
1530 int cnt;
1531 #endif
1532
1533 KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1534 (uint) file,(ulong) filepos));
1535
1536 restart:
1537 /*
1538 Find the bucket in the hash table for the pair (file, filepos);
1539 start contains the head of the bucket list,
1540 hash_link points to the first member of the list
1541 */
1542 hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1543 #if defined(KEYCACHE_DEBUG)
1544 cnt= 0;
1545 #endif
1546 /* Look for an element for the pair (file, filepos) in the bucket chain */
1547 while (hash_link &&
1548 (hash_link->diskpos != filepos || hash_link->file != file))
1549 {
1550 hash_link= hash_link->next;
1551 #if defined(KEYCACHE_DEBUG)
1552 cnt++;
1553 if (! (cnt <= keycache->hash_links_used))
1554 {
1555 int i;
1556 for (i=0, hash_link= *start ;
1557 i < cnt ; i++, hash_link= hash_link->next)
1558 {
1559 KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1560 (uint) hash_link->file,(ulong) hash_link->diskpos));
1561 }
1562 }
1563 KEYCACHE_DBUG_ASSERT(cnt <= keycache->hash_links_used);
1564 #endif
1565 }
1566 if (! hash_link)
1567 {
1568 /* There is no hash link in the hash table for the pair (file, filepos) */
1569 if (keycache->free_hash_list)
1570 {
1571 hash_link= keycache->free_hash_list;
1572 keycache->free_hash_list= hash_link->next;
1573 }
1574 else if (keycache->hash_links_used < keycache->hash_links)
1575 {
1576 hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1577 }
1578 else
1579 {
1580 /* Wait for a free hash link */
1581 struct st_my_thread_var *thread= my_thread_var;
1582 KEYCACHE_PAGE page;
1583 KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
1584 page.file= file;
1585 page.filepos= filepos;
1586 thread->opt_info= (void *) &page;
1587 link_into_queue(&keycache->waiting_for_hash_link, thread);
1588 KEYCACHE_DBUG_PRINT("get_hash_link: wait",
1589 ("suspend thread %ld", thread->id));
1590 keycache_pthread_cond_wait(&thread->suspend,
1591 &keycache->cache_lock);
1592 thread->opt_info= NULL;
1593 goto restart;
1594 }
1595 hash_link->file= file;
1596 hash_link->diskpos= filepos;
1597 link_hash(start, hash_link);
1598 }
1599 /* Register the request for the page */
1600 hash_link->requests++;
1601
1602 return hash_link;
1603 }
1604
1605
1606 /*
1607 Get a block for the file page requested by a keycache read/write operation;
1608 If the page is not in the cache return a free block, if there is none
1609 return the lru block after saving its buffer if the page is dirty.
1610
1611 SYNOPSIS
1612
1613 find_key_block()
1614 keycache pointer to a key cache data structure
1615 file handler for the file to read page from
1616 filepos position of the page in the file
1617 init_hits_left how initialize the block counter for the page
1618 wrmode <-> get for writing
1619 page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1620
1621 RETURN VALUE
1622 Pointer to the found block if successful, 0 - otherwise
1623
1624 NOTES.
1625 For the page from file positioned at filepos the function checks whether
1626 the page is in the key cache specified by the first parameter.
1627 If this is the case it immediately returns the block.
1628 If not, the function first chooses a block for this page. If there is
1629 no not used blocks in the key cache yet, the function takes the block
1630 at the very beginning of the warm sub-chain. It saves the page in that
1631 block if it's dirty before returning the pointer to it.
1632 The function returns in the page_st parameter the following values:
1633 PAGE_READ - if page already in the block,
1634 PAGE_TO_BE_READ - if it is to be read yet by the current thread
1635 WAIT_TO_BE_READ - if it is to be read by another thread
1636 If an error occurs THE BLOCK_ERROR bit is set in the block status.
1637 It might happen that there are no blocks in LRU chain (in warm part) -
1638 all blocks are unlinked for some read/write operations. Then the function
1639 waits until first of this operations links any block back.
1640 */
1641
find_key_block(KEY_CACHE * keycache,File file,my_off_t filepos,int init_hits_left,int wrmode,int * page_st)1642 static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1643 File file, my_off_t filepos,
1644 int init_hits_left,
1645 int wrmode, int *page_st)
1646 {
1647 HASH_LINK *hash_link;
1648 BLOCK_LINK *block;
1649 int error= 0;
1650 int page_status;
1651
1652 DBUG_ENTER("find_key_block");
1653 KEYCACHE_THREAD_TRACE("find_key_block:begin");
1654 DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
1655 file, (ulong) filepos, wrmode));
1656 KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d pos: %lu wrmode: %d",
1657 file, (ulong) filepos,
1658 wrmode));
1659 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
1660 DBUG_EXECUTE("check_keycache2",
1661 test_key_cache(keycache, "start of find_key_block", 0););
1662 #endif
1663
1664 restart:
1665 /*
1666 If the flush phase of a resize operation fails, the cache is left
1667 unusable. This will be detected only after "goto restart".
1668 */
1669 if (!keycache->can_be_used)
1670 DBUG_RETURN(0);
1671
1672 /*
1673 Find the hash_link for the requested file block (file, filepos). We
1674 do always get a hash_link here. It has registered our request so
1675 that no other thread can use it for another file block until we
1676 release the request (which is done by remove_reader() usually). The
1677 hash_link can have a block assigned to it or not. If there is a
1678 block, it may be assigned to this hash_link or not. In cases where a
1679 block is evicted from the cache, it is taken from the LRU ring and
1680 referenced by the new hash_link. But the block can still be assigned
1681 to its old hash_link for some time if it needs to be flushed first,
1682 or if there are other threads still reading it.
1683
1684 Summary:
1685 hash_link is always returned.
1686 hash_link->block can be:
1687 - NULL or
1688 - not assigned to this hash_link or
1689 - assigned to this hash_link. If assigned, the block can have
1690 - invalid data (when freshly assigned) or
1691 - valid data. Valid data can be
1692 - changed over the file contents (dirty) or
1693 - not changed (clean).
1694 */
1695 hash_link= get_hash_link(keycache, file, filepos);
1696 DBUG_ASSERT((hash_link->file == file) && (hash_link->diskpos == filepos));
1697
1698 page_status= -1;
1699 if ((block= hash_link->block) &&
1700 block->hash_link == hash_link && (block->status & BLOCK_READ))
1701 {
1702 /* Assigned block with valid (changed or unchanged) contents. */
1703 page_status= PAGE_READ;
1704 }
1705 /*
1706 else (page_status == -1)
1707 - block == NULL or
1708 - block not assigned to this hash_link or
1709 - block assigned but not yet read from file (invalid data).
1710 */
1711
1712 if (keycache->in_resize)
1713 {
1714 /* This is a request during a resize operation */
1715
1716 if (!block)
1717 {
1718 struct st_my_thread_var *thread;
1719
1720 /*
1721 The file block is not in the cache. We don't need it in the
1722 cache: we are going to read or write directly to file. Cancel
1723 the request. We can simply decrement hash_link->requests because
1724 we did not release cache_lock since increasing it. So no other
1725 thread can wait for our request to become released.
1726 */
1727 if (hash_link->requests == 1)
1728 {
1729 /*
1730 We are the only one to request this hash_link (this file/pos).
1731 Free the hash_link.
1732 */
1733 hash_link->requests--;
1734 unlink_hash(keycache, hash_link);
1735 DBUG_RETURN(0);
1736 }
1737
1738 /*
1739 More requests on the hash_link. Someone tries to evict a block
1740 for this hash_link (could have started before resizing started).
1741 This means that the LRU ring is empty. Otherwise a block could
1742 be assigned immediately. Behave like a thread that wants to
1743 evict a block for this file/pos. Add to the queue of threads
1744 waiting for a block. Wait until there is one assigned.
1745
1746 Refresh the request on the hash-link so that it cannot be reused
1747 for another file/pos.
1748 */
1749 thread= my_thread_var;
1750 thread->opt_info= (void *) hash_link;
1751 link_into_queue(&keycache->waiting_for_block, thread);
1752 do
1753 {
1754 KEYCACHE_DBUG_PRINT("find_key_block: wait",
1755 ("suspend thread %ld", thread->id));
1756 keycache_pthread_cond_wait(&thread->suspend,
1757 &keycache->cache_lock);
1758 } while (thread->next);
1759 thread->opt_info= NULL;
1760 /*
1761 A block should now be assigned to the hash_link. But it may
1762 still need to be evicted. Anyway, we should re-check the
1763 situation. page_status must be set correctly.
1764 */
1765 hash_link->requests--;
1766 goto restart;
1767 } /* end of if (!block) */
1768
1769 /*
1770 There is a block for this file/pos in the cache. Register a
1771 request on it. This unlinks it from the LRU ring (if it is there)
1772 and hence protects it against eviction (if not already in
1773 eviction). We need this for returning the block to the caller, for
1774 calling remove_reader() (for debugging purposes), and for calling
1775 free_block(). The only case where we don't need the request is if
1776 the block is in eviction. In that case we have to unregister the
1777 request later.
1778 */
1779 reg_requests(keycache, block, 1);
1780
1781 if (page_status != PAGE_READ)
1782 {
1783 /*
1784 - block not assigned to this hash_link or
1785 - block assigned but not yet read from file (invalid data).
1786
1787 This must be a block in eviction. It will be read soon. We need
1788 to wait here until this happened. Otherwise the caller could
1789 access a wrong block or a block which is in read. While waiting
1790 we cannot lose hash_link nor block. We have registered a request
1791 on the hash_link. Everything can happen to the block but changes
1792 in the hash_link -> block relationship. In other words:
1793 everything can happen to the block but free or another completed
1794 eviction.
1795
1796 Note that we bahave like a secondary requestor here. We just
1797 cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1798 read requests and writes on dirty blocks that are not in flush
1799 only. Waiting here on COND_FOR_REQUESTED works in all
1800 situations.
1801 */
1802 DBUG_ASSERT(((block->hash_link != hash_link) &&
1803 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1804 ((block->hash_link == hash_link) &&
1805 !(block->status & BLOCK_READ)));
1806 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
1807 /*
1808 Here we can trust that the block has been assigned to this
1809 hash_link (block->hash_link == hash_link) and read into the
1810 buffer (BLOCK_READ). The worst things possible here are that the
1811 block is in free (BLOCK_REASSIGNED). But the block is still
1812 assigned to the hash_link. The freeing thread waits until we
1813 release our request on the hash_link. The block must not be
1814 again in eviction because we registered an request on it before
1815 starting to wait.
1816 */
1817 DBUG_ASSERT(block->hash_link == hash_link);
1818 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1819 DBUG_ASSERT(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1820 }
1821 /*
1822 The block is in the cache. Assigned to the hash_link. Valid data.
1823 Note that in case of page_st == PAGE_READ, the block can be marked
1824 for eviction. In any case it can be marked for freeing.
1825 */
1826
1827 if (!wrmode)
1828 {
1829 /* A reader can just read the block. */
1830 *page_st= PAGE_READ;
1831 DBUG_ASSERT((hash_link->file == file) &&
1832 (hash_link->diskpos == filepos) &&
1833 (block->hash_link == hash_link));
1834 DBUG_RETURN(block);
1835 }
1836
1837 /*
1838 This is a writer. No two writers for the same block can exist.
1839 This must be assured by locks outside of the key cache.
1840 */
1841 DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1842
1843 while (block->status & BLOCK_IN_FLUSH)
1844 {
1845 /*
1846 Wait until the block is flushed to file. Do not release the
1847 request on the hash_link yet to prevent that the block is freed
1848 or reassigned while we wait. While we wait, several things can
1849 happen to the block, including another flush. But the block
1850 cannot be reassigned to another hash_link until we release our
1851 request on it. But it can be marked BLOCK_REASSIGNED from free
1852 or eviction, while they wait for us to release the hash_link.
1853 */
1854 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1855 /*
1856 If the flush phase failed, the resize could have finished while
1857 we waited here.
1858 */
1859 if (!keycache->in_resize)
1860 {
1861 remove_reader(block);
1862 unreg_request(keycache, block, 1);
1863 goto restart;
1864 }
1865 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1866 DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1867 DBUG_ASSERT(block->hash_link == hash_link);
1868 }
1869
1870 if (block->status & BLOCK_CHANGED)
1871 {
1872 /*
1873 We want to write a block with changed contents. If the cache
1874 block size is bigger than the callers block size (e.g. MyISAM),
1875 the caller may replace part of the block only. Changes of the
1876 other part of the block must be preserved. Since the block has
1877 not yet been selected for flush, we can still add our changes.
1878 */
1879 *page_st= PAGE_READ;
1880 DBUG_ASSERT((hash_link->file == file) &&
1881 (hash_link->diskpos == filepos) &&
1882 (block->hash_link == hash_link));
1883 DBUG_RETURN(block);
1884 }
1885
1886 /*
1887 This is a write request for a clean block. We do not want to have
1888 new dirty blocks in the cache while resizing. We will free the
1889 block and write directly to file. If the block is in eviction or
1890 in free, we just let it go.
1891
1892 Unregister from the hash_link. This must be done before freeing
1893 the block. And it must be done if not freeing the block. Because
1894 we could have waited above, we need to call remove_reader(). Other
1895 threads could wait for us to release our request on the hash_link.
1896 */
1897 remove_reader(block);
1898
1899 /* If the block is not in eviction and not in free, we can free it. */
1900 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1901 BLOCK_REASSIGNED)))
1902 {
1903 /*
1904 Free block as we are going to write directly to file.
1905 Although we have an exlusive lock for the updated key part,
1906 the control can be yielded by the current thread as we might
1907 have unfinished readers of other key parts in the block
1908 buffer. Still we are guaranteed not to have any readers
1909 of the key part we are writing into until the block is
1910 removed from the cache as we set the BLOCK_REASSIGNED
1911 flag (see the code below that handles reading requests).
1912 */
1913 free_block(keycache, block);
1914 }
1915 else
1916 {
1917 /*
1918 The block will be evicted/freed soon. Don't touch it in any way.
1919 Unregister the request that we registered above.
1920 */
1921 unreg_request(keycache, block, 1);
1922
1923 /*
1924 The block is still assigned to the hash_link (the file/pos that
1925 we are going to write to). Wait until the eviction/free is
1926 complete. Otherwise the direct write could complete before all
1927 readers are done with the block. So they could read outdated
1928 data.
1929
1930 Since we released our request on the hash_link, it can be reused
1931 for another file/pos. Hence we cannot just check for
1932 block->hash_link == hash_link. As long as the resize is
1933 proceeding the block cannot be reassigned to the same file/pos
1934 again. So we can terminate the loop when the block is no longer
1935 assigned to this file/pos.
1936 */
1937 do
1938 {
1939 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1940 &keycache->cache_lock);
1941 /*
1942 If the flush phase failed, the resize could have finished
1943 while we waited here.
1944 */
1945 if (!keycache->in_resize)
1946 goto restart;
1947 } while (block->hash_link &&
1948 (block->hash_link->file == file) &&
1949 (block->hash_link->diskpos == filepos));
1950 }
1951 DBUG_RETURN(0);
1952 }
1953
1954 if (page_status == PAGE_READ &&
1955 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1956 BLOCK_REASSIGNED)))
1957 {
1958 /*
1959 This is a request for a block to be removed from cache. The block
1960 is assigned to this hash_link and contains valid data, but is
1961 marked for eviction or to be freed. Possible reasons why it has
1962 not yet been evicted/freed can be a flush before reassignment
1963 (BLOCK_IN_SWITCH), readers of the block have not finished yet
1964 (BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1965 the block has been selected for it (BLOCK_IN_EVICTION).
1966 */
1967
1968 KEYCACHE_DBUG_PRINT("find_key_block",
1969 ("request for old page in block %u "
1970 "wrmode: %d block->status: %d",
1971 BLOCK_NUMBER(block), wrmode, block->status));
1972 /*
1973 Only reading requests can proceed until the old dirty page is flushed,
1974 all others are to be suspended, then resubmitted
1975 */
1976 if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1977 {
1978 /*
1979 This is a read request and the block not yet reassigned. We can
1980 register our request and proceed. This unlinks the block from
1981 the LRU ring and protects it against eviction.
1982 */
1983 reg_requests(keycache, block, 1);
1984 }
1985 else
1986 {
1987 /*
1988 Either this is a write request for a block that is in eviction
1989 or in free. We must not use it any more. Instead we must evict
1990 another block. But we cannot do this before the eviction/free is
1991 done. Otherwise we would find the same hash_link + block again
1992 and again.
1993
1994 Or this is a read request for a block in eviction/free that does
1995 not require a flush, but waits for readers to finish with the
1996 block. We do not read this block to let the eviction/free happen
1997 as soon as possible. Again we must wait so that we don't find
1998 the same hash_link + block again and again.
1999 */
2000 DBUG_ASSERT(hash_link->requests);
2001 hash_link->requests--;
2002 KEYCACHE_DBUG_PRINT("find_key_block",
2003 ("request waiting for old page to be saved"));
2004 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2005 KEYCACHE_DBUG_PRINT("find_key_block",
2006 ("request for old page resubmitted"));
2007 /*
2008 The block is no longer assigned to this hash_link.
2009 Get another one.
2010 */
2011 goto restart;
2012 }
2013 }
2014 else
2015 {
2016 /*
2017 This is a request for a new block or for a block not to be removed.
2018 Either
2019 - block == NULL or
2020 - block not assigned to this hash_link or
2021 - block assigned but not yet read from file,
2022 or
2023 - block assigned with valid (changed or unchanged) data and
2024 - it will not be reassigned/freed.
2025 */
2026 if (! block)
2027 {
2028 /* No block is assigned to the hash_link yet. */
2029 if (keycache->blocks_unused)
2030 {
2031 if (keycache->free_block_list)
2032 {
2033 /* There is a block in the free list. */
2034 block= keycache->free_block_list;
2035 keycache->free_block_list= block->next_used;
2036 block->next_used= NULL;
2037 }
2038 else
2039 {
2040 size_t block_mem_offset;
2041 /* There are some never used blocks, take first of them */
2042 DBUG_ASSERT(keycache->blocks_used <
2043 (ulong) keycache->disk_blocks);
2044 block= &keycache->block_root[keycache->blocks_used];
2045 block_mem_offset=
2046 ((size_t) keycache->blocks_used) * keycache->key_cache_block_size;
2047 block->buffer= ADD_TO_PTR(keycache->block_mem,
2048 block_mem_offset,
2049 uchar*);
2050 keycache->blocks_used++;
2051 DBUG_ASSERT(!block->next_used);
2052 }
2053 DBUG_ASSERT(!block->prev_used);
2054 DBUG_ASSERT(!block->next_changed);
2055 DBUG_ASSERT(!block->prev_changed);
2056 DBUG_ASSERT(!block->hash_link);
2057 DBUG_ASSERT(!block->status);
2058 DBUG_ASSERT(!block->requests);
2059 keycache->blocks_unused--;
2060 block->status= BLOCK_IN_USE;
2061 block->length= 0;
2062 block->offset= keycache->key_cache_block_size;
2063 block->requests= 1;
2064 block->temperature= BLOCK_COLD;
2065 block->hits_left= init_hits_left;
2066 block->last_hit_time= 0;
2067 block->hash_link= hash_link;
2068 hash_link->block= block;
2069 link_to_file_list(keycache, block, file, 0);
2070 page_status= PAGE_TO_BE_READ;
2071 KEYCACHE_DBUG_PRINT("find_key_block",
2072 ("got free or never used block %u",
2073 BLOCK_NUMBER(block)));
2074 }
2075 else
2076 {
2077 /*
2078 There are no free blocks and no never used blocks, use a block
2079 from the LRU ring.
2080 */
2081
2082 if (! keycache->used_last)
2083 {
2084 /*
2085 The LRU ring is empty. Wait until a new block is added to
2086 it. Several threads might wait here for the same hash_link,
2087 all of them must get the same block. While waiting for a
2088 block, after a block is selected for this hash_link, other
2089 threads can run first before this one awakes. During this
2090 time interval other threads find this hash_link pointing to
2091 the block, which is still assigned to another hash_link. In
2092 this case the block is not marked BLOCK_IN_SWITCH yet, but
2093 it is marked BLOCK_IN_EVICTION.
2094 */
2095
2096 struct st_my_thread_var *thread= my_thread_var;
2097 thread->opt_info= (void *) hash_link;
2098 link_into_queue(&keycache->waiting_for_block, thread);
2099 do
2100 {
2101 KEYCACHE_DBUG_PRINT("find_key_block: wait",
2102 ("suspend thread %ld", thread->id));
2103 keycache_pthread_cond_wait(&thread->suspend,
2104 &keycache->cache_lock);
2105 }
2106 while (thread->next);
2107 thread->opt_info= NULL;
2108 /* Assert that block has a request registered. */
2109 DBUG_ASSERT(hash_link->block->requests);
2110 /* Assert that block is not in LRU ring. */
2111 DBUG_ASSERT(!hash_link->block->next_used);
2112 DBUG_ASSERT(!hash_link->block->prev_used);
2113 }
2114
2115 /*
2116 If we waited above, hash_link->block has been assigned by
2117 link_block(). Otherwise it is still NULL. In the latter case
2118 we need to grab a block from the LRU ring ourselves.
2119 */
2120 block= hash_link->block;
2121 if (! block)
2122 {
2123 /* Select the last block from the LRU ring. */
2124 block= keycache->used_last->next_used;
2125 block->hits_left= init_hits_left;
2126 block->last_hit_time= 0;
2127 hash_link->block= block;
2128 /*
2129 Register a request on the block. This unlinks it from the
2130 LRU ring and protects it against eviction.
2131 */
2132 DBUG_ASSERT(!block->requests);
2133 reg_requests(keycache, block,1);
2134 /*
2135 We do not need to set block->status|= BLOCK_IN_EVICTION here
2136 because we will set block->status|= BLOCK_IN_SWITCH
2137 immediately without releasing the lock in between. This does
2138 also support debugging. When looking at the block, one can
2139 see if the block has been selected by link_block() after the
2140 LRU ring was empty, or if it was grabbed directly from the
2141 LRU ring in this branch.
2142 */
2143 }
2144
2145 /*
2146 If we had to wait above, there is a small chance that another
2147 thread grabbed this block for the same file block already. But
2148 in most cases the first condition is true.
2149 */
2150 if (block->hash_link != hash_link &&
2151 ! (block->status & BLOCK_IN_SWITCH) )
2152 {
2153 /* this is a primary request for a new page */
2154 block->status|= BLOCK_IN_SWITCH;
2155
2156 KEYCACHE_DBUG_PRINT("find_key_block",
2157 ("got block %u for new page", BLOCK_NUMBER(block)));
2158
2159 if (block->status & BLOCK_CHANGED)
2160 {
2161 /* The block contains a dirty page - push it out of the cache */
2162
2163 KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
2164 if (block->status & BLOCK_IN_FLUSH)
2165 {
2166 /*
2167 The block is marked for flush. If we do not wait here,
2168 it could happen that we write the block, reassign it to
2169 another file block, then, before the new owner can read
2170 the new file block, the flusher writes the cache block
2171 (which still has the old contents) to the new file block!
2172 */
2173 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
2174 &keycache->cache_lock);
2175 /*
2176 The block is marked BLOCK_IN_SWITCH. It should be left
2177 alone except for reading. No free, no write.
2178 */
2179 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2180 DBUG_ASSERT(!(block->status & (BLOCK_REASSIGNED |
2181 BLOCK_CHANGED |
2182 BLOCK_FOR_UPDATE)));
2183 }
2184 else
2185 {
2186 block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
2187 /*
2188 BLOCK_IN_EVICTION may be true or not. Other flags must
2189 have a fixed value.
2190 */
2191 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2192 (BLOCK_READ | BLOCK_IN_SWITCH |
2193 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2194 BLOCK_CHANGED | BLOCK_IN_USE));
2195 DBUG_ASSERT(block->hash_link);
2196
2197 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2198 /*
2199 The call is thread safe because only the current
2200 thread might change the block->hash_link value
2201 */
2202 error= my_pwrite(block->hash_link->file,
2203 block->buffer + block->offset,
2204 block->length - block->offset,
2205 block->hash_link->diskpos + block->offset,
2206 MYF(MY_NABP | MY_WAIT_IF_FULL));
2207 keycache_pthread_mutex_lock(&keycache->cache_lock);
2208
2209 /* Block status must not have changed. */
2210 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2211 (BLOCK_READ | BLOCK_IN_SWITCH |
2212 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2213 BLOCK_CHANGED | BLOCK_IN_USE) || fail_block(block));
2214 keycache->global_cache_write++;
2215 }
2216 }
2217
2218 block->status|= BLOCK_REASSIGNED;
2219 /*
2220 The block comes from the LRU ring. It must have a hash_link
2221 assigned.
2222 */
2223 DBUG_ASSERT(block->hash_link);
2224 if (block->hash_link)
2225 {
2226 /*
2227 All pending requests for this page must be resubmitted.
2228 This must be done before waiting for readers. They could
2229 wait for the flush to complete. And we must also do it
2230 after the wait. Flushers might try to free the block while
2231 we wait. They would wait until the reassignment is
2232 complete. Also the block status must reflect the correct
2233 situation: The block is not changed nor in flush any more.
2234 Note that we must not change the BLOCK_CHANGED flag
2235 outside of link_to_file_list() so that it is always in the
2236 correct queue and the *blocks_changed counters are
2237 correct.
2238 */
2239 block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
2240 link_to_file_list(keycache, block, block->hash_link->file, 1);
2241 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2242 /*
2243 The block is still assigned to its old hash_link.
2244 Wait until all pending read requests
2245 for this page are executed
2246 (we could have avoided this waiting, if we had read
2247 a page in the cache in a sweep, without yielding control)
2248 */
2249 wait_for_readers(keycache, block);
2250 DBUG_ASSERT(block->hash_link && block->hash_link->block == block &&
2251 block->prev_changed);
2252 /* The reader must not have been a writer. */
2253 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
2254
2255 /* Wake flushers that might have found the block in between. */
2256 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2257
2258 /* Remove the hash link for the old file block from the hash. */
2259 unlink_hash(keycache, block->hash_link);
2260
2261 /*
2262 For sanity checks link_to_file_list() asserts that block
2263 and hash_link refer to each other. Hence we need to assign
2264 the hash_link first, but then we would not know if it was
2265 linked before. Hence we would not know if to unlink it. So
2266 unlink it here and call link_to_file_list(..., FALSE).
2267 */
2268 unlink_changed(block);
2269 }
2270 block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
2271 block->length= 0;
2272 block->offset= keycache->key_cache_block_size;
2273 block->hash_link= hash_link;
2274 link_to_file_list(keycache, block, file, 0);
2275 page_status= PAGE_TO_BE_READ;
2276
2277 KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
2278 KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
2279 }
2280 else
2281 {
2282 /*
2283 Either (block->hash_link == hash_link),
2284 or (block->status & BLOCK_IN_SWITCH).
2285
2286 This is for secondary requests for a new file block only.
2287 Either it is already assigned to the new hash_link meanwhile
2288 (if we had to wait due to empty LRU), or it is already in
2289 eviction by another thread. Since this block has been
2290 grabbed from the LRU ring and attached to this hash_link,
2291 another thread cannot grab the same block from the LRU ring
2292 anymore. If the block is in eviction already, it must become
2293 attached to the same hash_link and as such destined for the
2294 same file block.
2295 */
2296 KEYCACHE_DBUG_PRINT("find_key_block",
2297 ("block->hash_link: %p hash_link: %p "
2298 "block->status: %u", block->hash_link,
2299 hash_link, block->status ));
2300 page_status= (((block->hash_link == hash_link) &&
2301 (block->status & BLOCK_READ)) ?
2302 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2303 }
2304 }
2305 }
2306 else
2307 {
2308 /*
2309 Block is not NULL. This hash_link points to a block.
2310 Either
2311 - block not assigned to this hash_link (yet) or
2312 - block assigned but not yet read from file,
2313 or
2314 - block assigned with valid (changed or unchanged) data and
2315 - it will not be reassigned/freed.
2316
2317 The first condition means hash_link points to a block in
2318 eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2319 But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2320 link_block(). In both cases it is destined for this hash_link
2321 and its file block address. When this hash_link got its block
2322 address, the block was removed from the LRU ring and cannot be
2323 selected for eviction (for another hash_link) again.
2324
2325 Register a request on the block. This is another protection
2326 against eviction.
2327 */
2328 DBUG_ASSERT(((block->hash_link != hash_link) &&
2329 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2330 ((block->hash_link == hash_link) &&
2331 !(block->status & BLOCK_READ)) ||
2332 ((block->status & BLOCK_READ) &&
2333 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2334 reg_requests(keycache, block, 1);
2335 KEYCACHE_DBUG_PRINT("find_key_block",
2336 ("block->hash_link: %p hash_link: %p "
2337 "block->status: %u", block->hash_link,
2338 hash_link, block->status ));
2339 page_status= (((block->hash_link == hash_link) &&
2340 (block->status & BLOCK_READ)) ?
2341 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2342 }
2343 }
2344
2345 KEYCACHE_DBUG_ASSERT(page_status != -1);
2346 /* Same assert basically, but be very sure. */
2347 KEYCACHE_DBUG_ASSERT(block);
2348 /* Assert that block has a request and is not in LRU ring. */
2349 DBUG_ASSERT(block->requests);
2350 DBUG_ASSERT(!block->next_used);
2351 DBUG_ASSERT(!block->prev_used);
2352 /* Assert that we return the correct block. */
2353 DBUG_ASSERT((page_status == PAGE_WAIT_TO_BE_READ) ||
2354 ((block->hash_link->file == file) &&
2355 (block->hash_link->diskpos == filepos)));
2356 *page_st=page_status;
2357 KEYCACHE_DBUG_PRINT("find_key_block",
2358 ("fd: %d pos: %lu block->status: %u page_status: %d",
2359 file, (ulong) filepos, block->status,
2360 page_status));
2361
2362 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
2363 DBUG_EXECUTE("check_keycache2",
2364 test_key_cache(keycache, "end of find_key_block",0););
2365 #endif
2366 KEYCACHE_THREAD_TRACE("find_key_block:end");
2367 DBUG_RETURN(block);
2368 }
2369
2370
2371 /*
2372 Read into a key cache block buffer from disk.
2373
2374 SYNOPSIS
2375
2376 read_block()
2377 keycache pointer to a key cache data structure
2378 block block to which buffer the data is to be read
2379 read_length size of data to be read
2380 min_length at least so much data must be read
2381 primary <-> the current thread will read the data
2382
2383 RETURN VALUE
2384 None
2385
2386 NOTES.
2387 The function either reads a page data from file to the block buffer,
2388 or waits until another thread reads it. What page to read is determined
2389 by a block parameter - reference to a hash link for this page.
2390 If an error occurs THE BLOCK_ERROR bit is set in the block status.
2391 We do not report error when the size of successfully read
2392 portion is less than read_length, but not less than min_length.
2393 */
2394
read_block(KEY_CACHE * keycache,BLOCK_LINK * block,uint read_length,uint min_length,my_bool primary)2395 static void read_block(KEY_CACHE *keycache,
2396 BLOCK_LINK *block, uint read_length,
2397 uint min_length, my_bool primary)
2398 {
2399 size_t got_length;
2400
2401 /* On entry cache_lock is locked */
2402
2403 KEYCACHE_THREAD_TRACE("read_block");
2404 if (primary)
2405 {
2406 /*
2407 This code is executed only by threads that submitted primary
2408 requests. Until block->status contains BLOCK_READ, all other
2409 request for the block become secondary requests. For a primary
2410 request the block must be properly initialized.
2411 */
2412 DBUG_ASSERT(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE) ||
2413 fail_block(block));
2414 DBUG_ASSERT((block->length == 0) || fail_block(block));
2415 DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2416 fail_block(block));
2417 DBUG_ASSERT((block->requests > 0) || fail_block(block));
2418
2419 KEYCACHE_DBUG_PRINT("read_block",
2420 ("page to be read by primary request"));
2421
2422 keycache->global_cache_read++;
2423 /* Page is not in buffer yet, is to be read from disk */
2424 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2425 /*
2426 Here other threads may step in and register as secondary readers.
2427 They will register in block->wqueue[COND_FOR_REQUESTED].
2428 */
2429 got_length= my_pread(block->hash_link->file, block->buffer,
2430 read_length, block->hash_link->diskpos, MYF(0));
2431 keycache_pthread_mutex_lock(&keycache->cache_lock);
2432 /*
2433 The block can now have been marked for free (in case of
2434 FLUSH_RELEASE). Otherwise the state must be unchanged.
2435 */
2436 DBUG_ASSERT(((block->status & ~(BLOCK_REASSIGNED |
2437 BLOCK_FOR_UPDATE)) == BLOCK_IN_USE) ||
2438 fail_block(block));
2439 DBUG_ASSERT((block->length == 0) || fail_block(block));
2440 DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2441 fail_block(block));
2442 DBUG_ASSERT((block->requests > 0) || fail_block(block));
2443
2444 if (got_length < min_length)
2445 block->status|= BLOCK_ERROR;
2446 else
2447 {
2448 block->status|= BLOCK_READ;
2449 block->length= got_length;
2450 /*
2451 Do not set block->offset here. If this block is marked
2452 BLOCK_CHANGED later, we want to flush only the modified part. So
2453 only a writer may set block->offset down from
2454 keycache->key_cache_block_size.
2455 */
2456 }
2457 KEYCACHE_DBUG_PRINT("read_block",
2458 ("primary request: new page in cache"));
2459 /* Signal that all pending requests for this page now can be processed */
2460 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2461 }
2462 else
2463 {
2464 /*
2465 This code is executed only by threads that submitted secondary
2466 requests. At this point it could happen that the cache block is
2467 not yet assigned to the hash_link for the requested file block.
2468 But at awake from the wait this should be the case. Unfortunately
2469 we cannot assert this here because we do not know the hash_link
2470 for the requested file block nor the file and position. So we have
2471 to assert this in the caller.
2472 */
2473 KEYCACHE_DBUG_PRINT("read_block",
2474 ("secondary request waiting for new page to be read"));
2475 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2476 KEYCACHE_DBUG_PRINT("read_block",
2477 ("secondary request: new page in cache"));
2478 }
2479 }
2480
2481
2482 /*
2483 Read a block of data from a cached file into a buffer;
2484
2485 SYNOPSIS
2486
2487 key_cache_read()
2488 keycache pointer to a key cache data structure
2489 file handler for the file for the block of data to be read
2490 filepos position of the block of data in the file
2491 level determines the weight of the data
2492 buff buffer to where the data must be placed
2493 length length of the buffer
2494 block_length length of the block in the key cache buffer
2495 return_buffer return pointer to the key cache buffer with the data
2496
2497 RETURN VALUE
2498 Returns address from where the data is placed if sucessful, 0 - otherwise.
2499
2500 NOTES.
2501 The function ensures that a block of data of size length from file
2502 positioned at filepos is in the buffers for some key cache blocks.
2503 Then the function either copies the data into the buffer buff, or,
2504 if return_buffer is TRUE, it just returns the pointer to the key cache
2505 buffer with the data.
2506 Filepos must be a multiple of 'block_length', but it doesn't
2507 have to be a multiple of key_cache_block_size;
2508 */
2509
key_cache_read(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length MY_ATTRIBUTE ((unused)),int return_buffer MY_ATTRIBUTE ((unused)))2510 uchar *key_cache_read(KEY_CACHE *keycache,
2511 File file, my_off_t filepos, int level,
2512 uchar *buff, uint length,
2513 uint block_length MY_ATTRIBUTE((unused)),
2514 int return_buffer MY_ATTRIBUTE((unused)))
2515 {
2516 my_bool locked_and_incremented= FALSE;
2517 int error=0;
2518 uchar *start= buff;
2519 DBUG_ENTER("key_cache_read");
2520 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2521 (uint) file, (ulong) filepos, length));
2522
2523 if (keycache->key_cache_inited)
2524 {
2525 /* Key cache is used */
2526 reg1 BLOCK_LINK *block;
2527 uint read_length;
2528 uint offset;
2529 int page_st;
2530
2531 if (MYSQL_KEYCACHE_READ_START_ENABLED())
2532 {
2533 MYSQL_KEYCACHE_READ_START(my_filename(file), length,
2534 (ulong) (keycache->blocks_used *
2535 keycache->key_cache_block_size),
2536 (ulong) (keycache->blocks_unused *
2537 keycache->key_cache_block_size));
2538 }
2539
2540 /*
2541 When the key cache is once initialized, we use the cache_lock to
2542 reliably distinguish the cases of normal operation, resizing, and
2543 disabled cache. We always increment and decrement
2544 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2545 */
2546 keycache_pthread_mutex_lock(&keycache->cache_lock);
2547 /*
2548 Cache resizing has two phases: Flushing and re-initializing. In
2549 the flush phase read requests are allowed to bypass the cache for
2550 blocks not in the cache. find_key_block() returns NULL in this
2551 case.
2552
2553 After the flush phase new I/O requests must wait until the
2554 re-initialization is done. The re-initialization can be done only
2555 if no I/O request is in progress. The reason is that
2556 key_cache_block_size can change. With enabled cache, I/O is done
2557 in chunks of key_cache_block_size. Every chunk tries to use a
2558 cache block first. If the block size changes in the middle, a
2559 block could be missed and old data could be read.
2560 */
2561 while (keycache->in_resize && !keycache->resize_in_flush)
2562 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2563 /* Register the I/O for the next resize. */
2564 inc_counter_for_resize_op(keycache);
2565 locked_and_incremented= TRUE;
2566 /* Requested data may not always be aligned to cache blocks. */
2567 offset= (uint) (filepos % keycache->key_cache_block_size);
2568 /* Read data in key_cache_block_size increments */
2569 do
2570 {
2571 /* Cache could be disabled in a later iteration. */
2572 if (!keycache->can_be_used)
2573 {
2574 KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache cannot be used"));
2575 goto no_key_cache;
2576 }
2577 /* Start reading at the beginning of the cache block. */
2578 filepos-= offset;
2579 /* Do not read beyond the end of the cache block. */
2580 read_length= length;
2581 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2582 KEYCACHE_DBUG_ASSERT(read_length > 0);
2583
2584 if (block_length > keycache->key_cache_block_size || offset)
2585 return_buffer=0;
2586
2587 /* Request the cache block that matches file/pos. */
2588 keycache->global_cache_r_requests++;
2589
2590 MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size);
2591
2592 block=find_key_block(keycache, file, filepos, level, 0, &page_st);
2593 if (!block)
2594 {
2595 /*
2596 This happens only for requests submitted during key cache
2597 resize. The block is not in the cache and shall not go in.
2598 Read directly from file.
2599 */
2600 keycache->global_cache_read++;
2601 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2602 error= (my_pread(file, (uchar*) buff, read_length,
2603 filepos + offset, MYF(MY_NABP)) != 0);
2604 keycache_pthread_mutex_lock(&keycache->cache_lock);
2605 goto next_block;
2606 }
2607 if (!(block->status & BLOCK_ERROR))
2608 {
2609 if (page_st != PAGE_READ)
2610 {
2611 MYSQL_KEYCACHE_READ_MISS();
2612 /* The requested page is to be read into the block buffer */
2613 read_block(keycache, block,
2614 keycache->key_cache_block_size, read_length+offset,
2615 (my_bool)(page_st == PAGE_TO_BE_READ));
2616 /*
2617 A secondary request must now have the block assigned to the
2618 requested file block. It does not hurt to check it for
2619 primary requests too.
2620 */
2621 DBUG_ASSERT(keycache->can_be_used);
2622 DBUG_ASSERT(block->hash_link->file == file);
2623 DBUG_ASSERT(block->hash_link->diskpos == filepos);
2624 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2625 }
2626 else if (block->length < read_length + offset)
2627 {
2628 /*
2629 Impossible if nothing goes wrong:
2630 this could only happen if we are using a file with
2631 small key blocks and are trying to read outside the file
2632 */
2633 my_errno= -1;
2634 block->status|= BLOCK_ERROR;
2635 }
2636 else
2637 {
2638 MYSQL_KEYCACHE_READ_HIT();
2639 }
2640 }
2641
2642 /* block status may have added BLOCK_ERROR in the above 'if'. */
2643 if (!(block->status & BLOCK_ERROR))
2644 {
2645 {
2646 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2647 #if !defined(SERIALIZED_READ_FROM_CACHE)
2648 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2649 #endif
2650
2651 /* Copy data from the cache buffer */
2652 memcpy(buff, block->buffer+offset, (size_t) read_length);
2653
2654 #if !defined(SERIALIZED_READ_FROM_CACHE)
2655 keycache_pthread_mutex_lock(&keycache->cache_lock);
2656 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2657 #endif
2658 }
2659 }
2660
2661 remove_reader(block);
2662
2663 /* Error injection for coverage testing. */
2664 DBUG_EXECUTE_IF("key_cache_read_block_error",
2665 block->status|= BLOCK_ERROR;);
2666
2667 /* Do not link erroneous blocks into the LRU ring, but free them. */
2668 if (!(block->status & BLOCK_ERROR))
2669 {
2670 /*
2671 Link the block into the LRU ring if it's the last submitted
2672 request for the block. This enables eviction for the block.
2673 */
2674 unreg_request(keycache, block, 1);
2675 }
2676 else
2677 {
2678 free_block(keycache, block);
2679 error= 1;
2680 break;
2681 }
2682
2683 next_block:
2684 buff+= read_length;
2685 filepos+= read_length+offset;
2686 offset= 0;
2687
2688 } while ((length-= read_length));
2689 if (MYSQL_KEYCACHE_READ_DONE_ENABLED())
2690 {
2691 MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used *
2692 keycache->key_cache_block_size),
2693 (ulong) (keycache->blocks_unused *
2694 keycache->key_cache_block_size));
2695 }
2696 goto end;
2697 }
2698 KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache not initialized"));
2699
2700 no_key_cache:
2701 /* Key cache is not used */
2702
2703 keycache->global_cache_r_requests++;
2704 keycache->global_cache_read++;
2705
2706 if (locked_and_incremented)
2707 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2708 if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
2709 error= 1;
2710 if (locked_and_incremented)
2711 keycache_pthread_mutex_lock(&keycache->cache_lock);
2712
2713 end:
2714 if (locked_and_incremented)
2715 {
2716 dec_counter_for_resize_op(keycache);
2717 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2718 }
2719 DBUG_PRINT("exit", ("error: %d", error ));
2720 DBUG_RETURN(error ? (uchar*) 0 : start);
2721 }
2722
2723
2724 /*
2725 Insert a block of file data from a buffer into key cache
2726
2727 SYNOPSIS
2728 key_cache_insert()
2729 keycache pointer to a key cache data structure
2730 file handler for the file to insert data from
2731 filepos position of the block of data in the file to insert
2732 level determines the weight of the data
2733 buff buffer to read data from
2734 length length of the data in the buffer
2735
2736 NOTES
2737 This is used by MyISAM to move all blocks from a index file to the key
2738 cache
2739
2740 RETURN VALUE
2741 0 if a success, 1 - otherwise.
2742 */
2743
key_cache_insert(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length)2744 int key_cache_insert(KEY_CACHE *keycache,
2745 File file, my_off_t filepos, int level,
2746 uchar *buff, uint length)
2747 {
2748 int error= 0;
2749 DBUG_ENTER("key_cache_insert");
2750 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2751 (uint) file,(ulong) filepos, length));
2752
2753 if (keycache->key_cache_inited)
2754 {
2755 /* Key cache is used */
2756 reg1 BLOCK_LINK *block;
2757 uint read_length;
2758 uint offset;
2759 int page_st;
2760 my_bool locked_and_incremented= FALSE;
2761
2762 /*
2763 When the keycache is once initialized, we use the cache_lock to
2764 reliably distinguish the cases of normal operation, resizing, and
2765 disabled cache. We always increment and decrement
2766 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2767 */
2768 keycache_pthread_mutex_lock(&keycache->cache_lock);
2769 /*
2770 We do not load index data into a disabled cache nor into an
2771 ongoing resize.
2772 */
2773 if (!keycache->can_be_used || keycache->in_resize)
2774 goto no_key_cache;
2775 /* Register the pseudo I/O for the next resize. */
2776 inc_counter_for_resize_op(keycache);
2777 locked_and_incremented= TRUE;
2778 /* Loaded data may not always be aligned to cache blocks. */
2779 offset= (uint) (filepos % keycache->key_cache_block_size);
2780 /* Load data in key_cache_block_size increments. */
2781 do
2782 {
2783 /* Cache could be disabled or resizing in a later iteration. */
2784 if (!keycache->can_be_used || keycache->in_resize)
2785 goto no_key_cache;
2786 /* Start loading at the beginning of the cache block. */
2787 filepos-= offset;
2788 /* Do not load beyond the end of the cache block. */
2789 read_length= length;
2790 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2791 KEYCACHE_DBUG_ASSERT(read_length > 0);
2792
2793 /* The block has been read by the caller already. */
2794 keycache->global_cache_read++;
2795 /* Request the cache block that matches file/pos. */
2796 keycache->global_cache_r_requests++;
2797 block= find_key_block(keycache, file, filepos, level, 0, &page_st);
2798 if (!block)
2799 {
2800 /*
2801 This happens only for requests submitted during key cache
2802 resize. The block is not in the cache and shall not go in.
2803 Stop loading index data.
2804 */
2805 goto no_key_cache;
2806 }
2807 if (!(block->status & BLOCK_ERROR))
2808 {
2809 if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2810 ((page_st == PAGE_TO_BE_READ) &&
2811 (offset || (read_length < keycache->key_cache_block_size))))
2812 {
2813 /*
2814 Either
2815
2816 this is a secondary request for a block to be read into the
2817 cache. The block is in eviction. It is not yet assigned to
2818 the requested file block (It does not point to the right
2819 hash_link). So we cannot call remove_reader() on the block.
2820 And we cannot access the hash_link directly here. We need to
2821 wait until the assignment is complete. read_block() executes
2822 the correct wait when called with primary == FALSE.
2823
2824 Or
2825
2826 this is a primary request for a block to be read into the
2827 cache and the supplied data does not fill the whole block.
2828
2829 This function is called on behalf of a LOAD INDEX INTO CACHE
2830 statement, which is a read-only task and allows other
2831 readers. It is possible that a parallel running reader tries
2832 to access this block. If it needs more data than has been
2833 supplied here, it would report an error. To be sure that we
2834 have all data in the block that is available in the file, we
2835 read the block ourselves.
2836
2837 Though reading again what the caller did read already is an
2838 expensive operation, we need to do this for correctness.
2839 */
2840 read_block(keycache, block, keycache->key_cache_block_size,
2841 read_length + offset, (page_st == PAGE_TO_BE_READ));
2842 /*
2843 A secondary request must now have the block assigned to the
2844 requested file block. It does not hurt to check it for
2845 primary requests too.
2846 */
2847 DBUG_ASSERT(keycache->can_be_used);
2848 DBUG_ASSERT(block->hash_link->file == file);
2849 DBUG_ASSERT(block->hash_link->diskpos == filepos);
2850 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2851 }
2852 else if (page_st == PAGE_TO_BE_READ)
2853 {
2854 /*
2855 This is a new block in the cache. If we come here, we have
2856 data for the whole block.
2857 */
2858 DBUG_ASSERT(block->hash_link->requests);
2859 DBUG_ASSERT(block->status & BLOCK_IN_USE);
2860 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
2861 (block->status & BLOCK_READ));
2862
2863 #if !defined(SERIALIZED_READ_FROM_CACHE)
2864 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2865 /*
2866 Here other threads may step in and register as secondary readers.
2867 They will register in block->wqueue[COND_FOR_REQUESTED].
2868 */
2869 #endif
2870
2871 /* Copy data from buff */
2872 memcpy(block->buffer+offset, buff, (size_t) read_length);
2873
2874 #if !defined(SERIALIZED_READ_FROM_CACHE)
2875 keycache_pthread_mutex_lock(&keycache->cache_lock);
2876 DBUG_ASSERT(block->status & BLOCK_IN_USE);
2877 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
2878 (block->status & BLOCK_READ));
2879 #endif
2880 /*
2881 After the data is in the buffer, we can declare the block
2882 valid. Now other threads do not need to register as
2883 secondary readers any more. They can immediately access the
2884 block.
2885 */
2886 block->status|= BLOCK_READ;
2887 block->length= read_length+offset;
2888 /*
2889 Do not set block->offset here. If this block is marked
2890 BLOCK_CHANGED later, we want to flush only the modified part. So
2891 only a writer may set block->offset down from
2892 keycache->key_cache_block_size.
2893 */
2894 KEYCACHE_DBUG_PRINT("key_cache_insert",
2895 ("primary request: new page in cache"));
2896 /* Signal all pending requests. */
2897 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2898 }
2899 else
2900 {
2901 /*
2902 page_st == PAGE_READ. The block is in the buffer. All data
2903 must already be present. Blocks are always read with all
2904 data available on file. Assert that the block does not have
2905 less contents than the preloader supplies. If the caller has
2906 data beyond block->length, it means that a file write has
2907 been done while this block was in cache and not extended
2908 with the new data. If the condition is met, we can simply
2909 ignore the block.
2910 */
2911 DBUG_ASSERT((page_st == PAGE_READ) &&
2912 (read_length + offset <= block->length));
2913 }
2914
2915 /*
2916 A secondary request must now have the block assigned to the
2917 requested file block. It does not hurt to check it for primary
2918 requests too.
2919 */
2920 DBUG_ASSERT(block->hash_link->file == file);
2921 DBUG_ASSERT(block->hash_link->diskpos == filepos);
2922 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2923 } /* end of if (!(block->status & BLOCK_ERROR)) */
2924
2925 remove_reader(block);
2926
2927 /* Error injection for coverage testing. */
2928 DBUG_EXECUTE_IF("key_cache_insert_block_error",
2929 block->status|= BLOCK_ERROR; errno=EIO;);
2930
2931 /* Do not link erroneous blocks into the LRU ring, but free them. */
2932 if (!(block->status & BLOCK_ERROR))
2933 {
2934 /*
2935 Link the block into the LRU ring if it's the last submitted
2936 request for the block. This enables eviction for the block.
2937 */
2938 unreg_request(keycache, block, 1);
2939 }
2940 else
2941 {
2942 free_block(keycache, block);
2943 error= 1;
2944 break;
2945 }
2946
2947 buff+= read_length;
2948 filepos+= read_length+offset;
2949 offset= 0;
2950
2951 } while ((length-= read_length));
2952
2953 no_key_cache:
2954 if (locked_and_incremented)
2955 dec_counter_for_resize_op(keycache);
2956 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2957 }
2958 DBUG_RETURN(error);
2959 }
2960
2961
2962 /*
2963 Write a buffer into a cached file.
2964
2965 SYNOPSIS
2966
2967 key_cache_write()
2968 keycache pointer to a key cache data structure
2969 file handler for the file to write data to
2970 filepos position in the file to write data to
2971 level determines the weight of the data
2972 buff buffer with the data
2973 length length of the buffer
2974 dont_write if is 0 then all dirty pages involved in writing
2975 should have been flushed from key cache
2976
2977 RETURN VALUE
2978 0 if a success, 1 - otherwise.
2979
2980 NOTES.
2981 The function copies the data of size length from buff into buffers
2982 for key cache blocks that are assigned to contain the portion of
2983 the file starting with position filepos.
2984 It ensures that this data is flushed to the file if dont_write is FALSE.
2985 Filepos must be a multiple of 'block_length', but it doesn't
2986 have to be a multiple of key_cache_block_size;
2987
2988 dont_write is always TRUE in the server (info->lock_type is never F_UNLCK).
2989 */
2990
key_cache_write(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length MY_ATTRIBUTE ((unused)),int dont_write)2991 int key_cache_write(KEY_CACHE *keycache,
2992 File file, my_off_t filepos, int level,
2993 uchar *buff, uint length,
2994 uint block_length MY_ATTRIBUTE((unused)),
2995 int dont_write)
2996 {
2997 my_bool locked_and_incremented= FALSE;
2998 int error=0;
2999 DBUG_ENTER("key_cache_write");
3000 DBUG_PRINT("enter",
3001 ("fd: %u pos: %lu length: %u block_length: %u"
3002 " key_block_length: %u",
3003 (uint) file, (ulong) filepos, length, block_length,
3004 keycache ? keycache->key_cache_block_size : 0));
3005
3006 if (!dont_write)
3007 {
3008 /* purecov: begin inspected */
3009 /* Not used in the server. */
3010 /* Force writing from buff into disk. */
3011 keycache->global_cache_w_requests++;
3012 keycache->global_cache_write++;
3013 if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3014 DBUG_RETURN(1);
3015 /* purecov: end */
3016 }
3017
3018 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3019 DBUG_EXECUTE("check_keycache",
3020 test_key_cache(keycache, "start of key_cache_write", 1););
3021 #endif
3022
3023 if (keycache->key_cache_inited)
3024 {
3025 /* Key cache is used */
3026 reg1 BLOCK_LINK *block;
3027 uint read_length;
3028 uint offset;
3029 int page_st;
3030
3031 if (MYSQL_KEYCACHE_WRITE_START_ENABLED())
3032 {
3033 MYSQL_KEYCACHE_WRITE_START(my_filename(file), length,
3034 (ulong) (keycache->blocks_used *
3035 keycache->key_cache_block_size),
3036 (ulong) (keycache->blocks_unused *
3037 keycache->key_cache_block_size));
3038 }
3039
3040 /*
3041 When the key cache is once initialized, we use the cache_lock to
3042 reliably distinguish the cases of normal operation, resizing, and
3043 disabled cache. We always increment and decrement
3044 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
3045 */
3046 keycache_pthread_mutex_lock(&keycache->cache_lock);
3047 /*
3048 Cache resizing has two phases: Flushing and re-initializing. In
3049 the flush phase write requests can modify dirty blocks that are
3050 not yet in flush. Otherwise they are allowed to bypass the cache.
3051 find_key_block() returns NULL in both cases (clean blocks and
3052 non-cached blocks).
3053
3054 After the flush phase new I/O requests must wait until the
3055 re-initialization is done. The re-initialization can be done only
3056 if no I/O request is in progress. The reason is that
3057 key_cache_block_size can change. With enabled cache I/O is done in
3058 chunks of key_cache_block_size. Every chunk tries to use a cache
3059 block first. If the block size changes in the middle, a block
3060 could be missed and data could be written below a cached block.
3061 */
3062 while (keycache->in_resize && !keycache->resize_in_flush)
3063 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
3064 /* Register the I/O for the next resize. */
3065 inc_counter_for_resize_op(keycache);
3066 locked_and_incremented= TRUE;
3067 /* Requested data may not always be aligned to cache blocks. */
3068 offset= (uint) (filepos % keycache->key_cache_block_size);
3069 /* Write data in key_cache_block_size increments. */
3070 do
3071 {
3072 /* Cache could be disabled in a later iteration. */
3073 if (!keycache->can_be_used)
3074 goto no_key_cache;
3075
3076 MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size);
3077 /* Start writing at the beginning of the cache block. */
3078 filepos-= offset;
3079 /* Do not write beyond the end of the cache block. */
3080 read_length= length;
3081 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
3082 KEYCACHE_DBUG_ASSERT(read_length > 0);
3083
3084 /* Request the cache block that matches file/pos. */
3085 keycache->global_cache_w_requests++;
3086 block= find_key_block(keycache, file, filepos, level, 1, &page_st);
3087 if (!block)
3088 {
3089 /*
3090 This happens only for requests submitted during key cache
3091 resize. The block is not in the cache and shall not go in.
3092 Write directly to file.
3093 */
3094 if (dont_write)
3095 {
3096 /* Used in the server. */
3097 keycache->global_cache_write++;
3098 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3099 if (my_pwrite(file, (uchar*) buff, read_length, filepos + offset,
3100 MYF(MY_NABP | MY_WAIT_IF_FULL)))
3101 error=1;
3102 keycache_pthread_mutex_lock(&keycache->cache_lock);
3103 }
3104 goto next_block;
3105 }
3106 /*
3107 Prevent block from flushing and from being selected for to be
3108 freed. This must be set when we release the cache_lock.
3109 However, we must not set the status of the block before it is
3110 assigned to this file/pos.
3111 */
3112 if (page_st != PAGE_WAIT_TO_BE_READ)
3113 block->status|= BLOCK_FOR_UPDATE;
3114 /*
3115 We must read the file block first if it is not yet in the cache
3116 and we do not replace all of its contents.
3117
3118 In cases where the cache block is big enough to contain (parts
3119 of) index blocks of different indexes, our request can be
3120 secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
3121 reading the file block. If the read completes after us, it
3122 overwrites our new contents with the old contents. So we have to
3123 wait for the other thread to complete the read of this block.
3124 read_block() takes care for the wait.
3125 */
3126 if (!(block->status & BLOCK_ERROR) &&
3127 ((page_st == PAGE_TO_BE_READ &&
3128 (offset || read_length < keycache->key_cache_block_size)) ||
3129 (page_st == PAGE_WAIT_TO_BE_READ)))
3130 {
3131 read_block(keycache, block,
3132 offset + read_length >= keycache->key_cache_block_size?
3133 offset : keycache->key_cache_block_size,
3134 offset, (page_st == PAGE_TO_BE_READ));
3135 DBUG_ASSERT(keycache->can_be_used);
3136 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3137 /*
3138 Prevent block from flushing and from being selected for to be
3139 freed. This must be set when we release the cache_lock.
3140 Here we set it in case we could not set it above.
3141 */
3142 block->status|= BLOCK_FOR_UPDATE;
3143 }
3144 /*
3145 The block should always be assigned to the requested file block
3146 here. It need not be BLOCK_READ when overwriting the whole block.
3147 */
3148 DBUG_ASSERT(block->hash_link->file == file);
3149 DBUG_ASSERT(block->hash_link->diskpos == filepos);
3150 DBUG_ASSERT(block->status & BLOCK_IN_USE);
3151 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
3152 /*
3153 The block to be written must not be marked BLOCK_REASSIGNED.
3154 Otherwise it could be freed in dirty state or reused without
3155 another flush during eviction. It must also not be in flush.
3156 Otherwise the old contens may have been flushed already and
3157 the flusher could clear BLOCK_CHANGED without flushing the
3158 new changes again.
3159 */
3160 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3161
3162 while (block->status & BLOCK_IN_FLUSHWRITE)
3163 {
3164 /*
3165 Another thread is flushing the block. It was dirty already.
3166 Wait until the block is flushed to file. Otherwise we could
3167 modify the buffer contents just while it is written to file.
3168 An unpredictable file block contents would be the result.
3169 While we wait, several things can happen to the block,
3170 including another flush. But the block cannot be reassigned to
3171 another hash_link until we release our request on it.
3172 */
3173 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
3174 DBUG_ASSERT(keycache->can_be_used);
3175 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3176 /* Still must not be marked for free. */
3177 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3178 DBUG_ASSERT(block->hash_link && (block->hash_link->block == block));
3179 }
3180
3181 /*
3182 We could perhaps release the cache_lock during access of the
3183 data like in the other functions. Locks outside of the key cache
3184 assure that readers and a writer do not access the same range of
3185 data. Parallel accesses should happen only if the cache block
3186 contains multiple index block(fragment)s. So different parts of
3187 the buffer would be read/written. An attempt to flush during
3188 memcpy() is prevented with BLOCK_FOR_UPDATE.
3189 */
3190 if (!(block->status & BLOCK_ERROR))
3191 {
3192 #if !defined(SERIALIZED_READ_FROM_CACHE)
3193 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3194 #endif
3195 memcpy(block->buffer+offset, buff, (size_t) read_length);
3196
3197 #if !defined(SERIALIZED_READ_FROM_CACHE)
3198 keycache_pthread_mutex_lock(&keycache->cache_lock);
3199 #endif
3200 }
3201
3202 if (!dont_write)
3203 {
3204 /* Not used in the server. buff has been written to disk at start. */
3205 if ((block->status & BLOCK_CHANGED) &&
3206 (!offset && read_length >= keycache->key_cache_block_size))
3207 link_to_file_list(keycache, block, block->hash_link->file, 1);
3208 }
3209 else if (! (block->status & BLOCK_CHANGED))
3210 link_to_changed_list(keycache, block);
3211 block->status|=BLOCK_READ;
3212 /*
3213 Allow block to be selected for to be freed. Since it is marked
3214 BLOCK_CHANGED too, it won't be selected for to be freed without
3215 a flush.
3216 */
3217 block->status&= ~BLOCK_FOR_UPDATE;
3218 set_if_smaller(block->offset, offset);
3219 set_if_bigger(block->length, read_length+offset);
3220
3221 /* Threads may be waiting for the changes to be complete. */
3222 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
3223
3224 /*
3225 If only a part of the cache block is to be replaced, and the
3226 rest has been read from file, then the cache lock has been
3227 released for I/O and it could be possible that another thread
3228 wants to evict or free the block and waits for it to be
3229 released. So we must not just decrement hash_link->requests, but
3230 also wake a waiting thread.
3231 */
3232 remove_reader(block);
3233
3234 /* Error injection for coverage testing. */
3235 DBUG_EXECUTE_IF("key_cache_write_block_error",
3236 block->status|= BLOCK_ERROR;);
3237
3238 /* Do not link erroneous blocks into the LRU ring, but free them. */
3239 if (!(block->status & BLOCK_ERROR))
3240 {
3241 /*
3242 Link the block into the LRU ring if it's the last submitted
3243 request for the block. This enables eviction for the block.
3244 */
3245 unreg_request(keycache, block, 1);
3246 }
3247 else
3248 {
3249 /* Pretend a "clean" block to avoid complications. */
3250 block->status&= ~(BLOCK_CHANGED);
3251 free_block(keycache, block);
3252 error= 1;
3253 break;
3254 }
3255
3256 next_block:
3257 buff+= read_length;
3258 filepos+= read_length+offset;
3259 offset= 0;
3260
3261 } while ((length-= read_length));
3262 goto end;
3263 }
3264
3265 no_key_cache:
3266 /* Key cache is not used */
3267 if (dont_write)
3268 {
3269 /* Used in the server. */
3270 keycache->global_cache_w_requests++;
3271 keycache->global_cache_write++;
3272 if (locked_and_incremented)
3273 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3274 if (my_pwrite(file, (uchar*) buff, length, filepos,
3275 MYF(MY_NABP | MY_WAIT_IF_FULL)))
3276 error=1;
3277 if (locked_and_incremented)
3278 keycache_pthread_mutex_lock(&keycache->cache_lock);
3279 }
3280
3281 end:
3282 if (locked_and_incremented)
3283 {
3284 dec_counter_for_resize_op(keycache);
3285 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3286 }
3287
3288 if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED())
3289 {
3290 MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used *
3291 keycache->key_cache_block_size),
3292 (ulong) (keycache->blocks_unused *
3293 keycache->key_cache_block_size));
3294 }
3295
3296 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3297 DBUG_EXECUTE("exec",
3298 test_key_cache(keycache, "end of key_cache_write", 1););
3299 #endif
3300 DBUG_RETURN(error);
3301 }
3302
3303
3304 /*
3305 Free block.
3306
3307 SYNOPSIS
3308 free_block()
3309 keycache Pointer to a key cache data structure
3310 block Pointer to the block to free
3311
3312 DESCRIPTION
3313 Remove reference to block from hash table.
3314 Remove block from the chain of clean blocks.
3315 Add block to the free list.
3316
3317 NOTE
3318 Block must not be free (status == 0).
3319 Block must not be in free_block_list.
3320 Block must not be in the LRU ring.
3321 Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
3322 Block must not be in free (BLOCK_REASSIGNED).
3323 Block must not be in flush (BLOCK_IN_FLUSH).
3324 Block must not be dirty (BLOCK_CHANGED).
3325 Block must not be in changed_blocks (dirty) hash.
3326 Block must be in file_blocks (clean) hash.
3327 Block must refer to a hash_link.
3328 Block must have a request registered on it.
3329 */
3330
free_block(KEY_CACHE * keycache,BLOCK_LINK * block)3331 static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
3332 {
3333 KEYCACHE_THREAD_TRACE("free block");
3334 KEYCACHE_DBUG_PRINT("free_block",
3335 ("block %u to be freed, hash_link %p status: %u",
3336 BLOCK_NUMBER(block), block->hash_link,
3337 block->status));
3338 /*
3339 Assert that the block is not free already. And that it is in a clean
3340 state. Note that the block might just be assigned to a hash_link and
3341 not yet read (BLOCK_READ may not be set here). In this case a reader
3342 is registered in the hash_link and free_block() will wait for it
3343 below.
3344 */
3345 DBUG_ASSERT((block->status & BLOCK_IN_USE) &&
3346 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3347 BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
3348 BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
3349 /* Assert that the block is in a file_blocks chain. */
3350 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3351 /* Assert that the block is not in the LRU ring. */
3352 DBUG_ASSERT(!block->next_used && !block->prev_used);
3353 /*
3354 IMHO the below condition (if()) makes no sense. I can't see how it
3355 could be possible that free_block() is entered with a NULL hash_link
3356 pointer. The only place where it can become NULL is in free_block()
3357 (or before its first use ever, but for those blocks free_block() is
3358 not called). I don't remove the conditional as it cannot harm, but
3359 place an DBUG_ASSERT to confirm my hypothesis. Eventually the
3360 condition (if()) can be removed.
3361 */
3362 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3363 if (block->hash_link)
3364 {
3365 /*
3366 While waiting for readers to finish, new readers might request the
3367 block. But since we set block->status|= BLOCK_REASSIGNED, they
3368 will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
3369 later.
3370 */
3371 block->status|= BLOCK_REASSIGNED;
3372 wait_for_readers(keycache, block);
3373 /*
3374 The block must not have been freed by another thread. Repeat some
3375 checks. An additional requirement is that it must be read now
3376 (BLOCK_READ).
3377 */
3378 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3379 DBUG_ASSERT((block->status & (BLOCK_READ | BLOCK_IN_USE |
3380 BLOCK_REASSIGNED)) &&
3381 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3382 BLOCK_IN_FLUSH | BLOCK_CHANGED |
3383 BLOCK_FOR_UPDATE)));
3384 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3385 DBUG_ASSERT(!block->prev_used);
3386 /*
3387 Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
3388 thread (through unreg_request() below), other threads must not see
3389 this flag. They could become confused.
3390 */
3391 block->status&= ~BLOCK_REASSIGNED;
3392 /*
3393 Do not release the hash_link until the block is off all lists.
3394 At least not if we hand it over for eviction in unreg_request().
3395 */
3396 }
3397
3398 /*
3399 Unregister the block request and link the block into the LRU ring.
3400 This enables eviction for the block. If the LRU ring was empty and
3401 threads are waiting for a block, then the block wil be handed over
3402 for eviction immediately. Otherwise we will unlink it from the LRU
3403 ring again, without releasing the lock in between. So decrementing
3404 the request counter and updating statistics are the only relevant
3405 operation in this case. Assert that there are no other requests
3406 registered.
3407 */
3408 DBUG_ASSERT(block->requests == 1);
3409 unreg_request(keycache, block, 0);
3410 /*
3411 Note that even without releasing the cache lock it is possible that
3412 the block is immediately selected for eviction by link_block() and
3413 thus not added to the LRU ring. In this case we must not touch the
3414 block any more.
3415 */
3416 if (block->status & BLOCK_IN_EVICTION)
3417 return;
3418
3419 /* Error blocks are not put into the LRU ring. */
3420 if (!(block->status & BLOCK_ERROR))
3421 {
3422 /* Here the block must be in the LRU ring. Unlink it again. */
3423 DBUG_ASSERT(block->next_used && block->prev_used &&
3424 *block->prev_used == block);
3425 unlink_block(keycache, block);
3426 }
3427 if (block->temperature == BLOCK_WARM)
3428 keycache->warm_blocks--;
3429 block->temperature= BLOCK_COLD;
3430
3431 /* Remove from file_blocks hash. */
3432 unlink_changed(block);
3433
3434 /* Remove reference to block from hash table. */
3435 unlink_hash(keycache, block->hash_link);
3436 block->hash_link= NULL;
3437
3438 block->status= 0;
3439 block->length= 0;
3440 block->offset= keycache->key_cache_block_size;
3441 KEYCACHE_THREAD_TRACE("free block");
3442 KEYCACHE_DBUG_PRINT("free_block", ("block is freed"));
3443
3444 /* Enforced by unlink_changed(), but just to be sure. */
3445 DBUG_ASSERT(!block->next_changed && !block->prev_changed);
3446 /* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3447 DBUG_ASSERT(!block->next_used && !block->prev_used);
3448 /* Insert the free block in the free list. */
3449 block->next_used= keycache->free_block_list;
3450 keycache->free_block_list= block;
3451 /* Keep track of the number of currently unused blocks. */
3452 keycache->blocks_unused++;
3453
3454 /* All pending requests for this page must be resubmitted. */
3455 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3456 }
3457
3458
cmp_sec_link(BLOCK_LINK ** a,BLOCK_LINK ** b)3459 static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3460 {
3461 return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3462 ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3463 }
3464
3465
3466 /*
3467 Flush a portion of changed blocks to disk,
3468 free used blocks if requested
3469 */
3470
flush_cached_blocks(KEY_CACHE * keycache,File file,BLOCK_LINK ** cache,BLOCK_LINK ** end,enum flush_type type)3471 static int flush_cached_blocks(KEY_CACHE *keycache,
3472 File file, BLOCK_LINK **cache,
3473 BLOCK_LINK **end,
3474 enum flush_type type)
3475 {
3476 int error;
3477 int last_errno= 0;
3478 uint count= (uint) (end-cache);
3479
3480 /* Don't lock the cache during the flush */
3481 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3482 /*
3483 As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3484 we are guarunteed no thread will change them
3485 */
3486 my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3487
3488 keycache_pthread_mutex_lock(&keycache->cache_lock);
3489 /*
3490 Note: Do not break the loop. We have registered a request on every
3491 block in 'cache'. These must be unregistered by free_block() or
3492 unreg_request().
3493 */
3494 for ( ; cache != end ; cache++)
3495 {
3496 BLOCK_LINK *block= *cache;
3497
3498 KEYCACHE_DBUG_PRINT("flush_cached_blocks",
3499 ("block %u to be flushed", BLOCK_NUMBER(block)));
3500 /*
3501 If the block contents is going to be changed, we abandon the flush
3502 for this block. flush_key_blocks_int() will restart its search and
3503 handle the block properly.
3504 */
3505 if (!(block->status & BLOCK_FOR_UPDATE))
3506 {
3507 /* Blocks coming here must have a certain status. */
3508 DBUG_ASSERT(block->hash_link);
3509 DBUG_ASSERT(block->hash_link->block == block);
3510 DBUG_ASSERT(block->hash_link->file == file);
3511 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
3512 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3513 block->status|= BLOCK_IN_FLUSHWRITE;
3514 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3515 error= my_pwrite(file, block->buffer+block->offset,
3516 block->length - block->offset,
3517 block->hash_link->diskpos+ block->offset,
3518 MYF(MY_NABP | MY_WAIT_IF_FULL));
3519 keycache_pthread_mutex_lock(&keycache->cache_lock);
3520 keycache->global_cache_write++;
3521 if (error)
3522 {
3523 block->status|= BLOCK_ERROR;
3524 if (!last_errno)
3525 last_errno= errno ? errno : -1;
3526 }
3527 block->status&= ~BLOCK_IN_FLUSHWRITE;
3528 /* Block must not have changed status except BLOCK_FOR_UPDATE. */
3529 DBUG_ASSERT(block->hash_link);
3530 DBUG_ASSERT(block->hash_link->block == block);
3531 DBUG_ASSERT(block->hash_link->file == file);
3532 DBUG_ASSERT((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3533 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3534 /*
3535 Set correct status and link in right queue for free or later use.
3536 free_block() must not see BLOCK_CHANGED and it may need to wait
3537 for readers of the block. These should not see the block in the
3538 wrong hash. If not freeing the block, we need to have it in the
3539 right queue anyway.
3540 */
3541 link_to_file_list(keycache, block, file, 1);
3542 }
3543 block->status&= ~BLOCK_IN_FLUSH;
3544 /*
3545 Let to proceed for possible waiting requests to write to the block page.
3546 It might happen only during an operation to resize the key cache.
3547 */
3548 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3549 /* type will never be FLUSH_IGNORE_CHANGED here */
3550 if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3551 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3552 BLOCK_FOR_UPDATE)))
3553 {
3554 /*
3555 Note that a request has been registered against the block in
3556 flush_key_blocks_int().
3557 */
3558 free_block(keycache, block);
3559 }
3560 else
3561 {
3562 /*
3563 Link the block into the LRU ring if it's the last submitted
3564 request for the block. This enables eviction for the block.
3565 Note that a request has been registered against the block in
3566 flush_key_blocks_int().
3567 */
3568 unreg_request(keycache, block, 1);
3569 }
3570
3571 } /* end of for ( ; cache != end ; cache++) */
3572 return last_errno;
3573 }
3574
3575
3576 /*
3577 Flush all key blocks for a file to disk, but don't do any mutex locks.
3578
3579 SYNOPSIS
3580 flush_key_blocks_int()
3581 keycache pointer to a key cache data structure
3582 file handler for the file to flush to
3583 flush_type type of the flush
3584
3585 NOTES
3586 This function doesn't do any mutex locks because it needs to be called both
3587 from flush_key_blocks and flush_all_key_blocks (the later one does the
3588 mutex lock in the resize_key_cache() function).
3589
3590 We do only care about changed blocks that exist when the function is
3591 entered. We do not guarantee that all changed blocks of the file are
3592 flushed if more blocks change while this function is running.
3593
3594 RETURN
3595 0 ok
3596 1 error
3597 */
3598
flush_key_blocks_int(KEY_CACHE * keycache,File file,enum flush_type type)3599 static int flush_key_blocks_int(KEY_CACHE *keycache,
3600 File file, enum flush_type type)
3601 {
3602 BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3603 int last_errno= 0;
3604 int last_errcnt= 0;
3605 DBUG_ENTER("flush_key_blocks_int");
3606 DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu",
3607 file, keycache->blocks_used, keycache->blocks_changed));
3608
3609 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3610 DBUG_EXECUTE("check_keycache",
3611 test_key_cache(keycache, "start of flush_key_blocks", 0););
3612 #endif
3613
3614 cache= cache_buff;
3615 if (keycache->disk_blocks > 0 &&
3616 (!my_disable_flush_key_blocks || type != FLUSH_KEEP))
3617 {
3618 /* Key cache exists and flush is not disabled */
3619 int error= 0;
3620 uint count= FLUSH_CACHE;
3621 BLOCK_LINK **pos,**end;
3622 BLOCK_LINK *first_in_switch= NULL;
3623 BLOCK_LINK *last_in_flush;
3624 BLOCK_LINK *last_for_update;
3625 BLOCK_LINK *block, *next;
3626 #if defined(KEYCACHE_DEBUG)
3627 uint cnt=0;
3628 #endif
3629
3630 if (type != FLUSH_IGNORE_CHANGED)
3631 {
3632 /*
3633 Count how many key blocks we have to cache to be able
3634 to flush all dirty pages with minimum seek moves
3635 */
3636 count= 0;
3637 for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3638 block ;
3639 block= block->next_changed)
3640 {
3641 if ((block->hash_link->file == file) &&
3642 !(block->status & BLOCK_IN_FLUSH))
3643 {
3644 count++;
3645 KEYCACHE_DBUG_ASSERT(count<= keycache->blocks_used);
3646 }
3647 }
3648 /*
3649 Allocate a new buffer only if its bigger than the one we have.
3650 Assure that we always have some entries for the case that new
3651 changed blocks appear while we need to wait for something.
3652 */
3653 if ((count > FLUSH_CACHE) &&
3654 !(cache= (BLOCK_LINK**) my_malloc(sizeof(BLOCK_LINK*)*count,
3655 MYF(0))))
3656 cache= cache_buff;
3657 /*
3658 After a restart there could be more changed blocks than now.
3659 So we should not let count become smaller than the fixed buffer.
3660 */
3661 if (cache == cache_buff)
3662 count= FLUSH_CACHE;
3663 }
3664
3665 /* Retrieve the blocks and write them to a buffer to be flushed */
3666 restart:
3667 last_in_flush= NULL;
3668 last_for_update= NULL;
3669 end= (pos= cache)+count;
3670 for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3671 block ;
3672 block= next)
3673 {
3674 #if defined(KEYCACHE_DEBUG)
3675 cnt++;
3676 KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3677 #endif
3678 next= block->next_changed;
3679 if (block->hash_link->file == file)
3680 {
3681 if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3682 {
3683 /*
3684 Note: The special handling of BLOCK_IN_SWITCH is obsolete
3685 since we set BLOCK_IN_FLUSH if the eviction includes a
3686 flush. It can be removed in a later version.
3687 */
3688 if (!(block->status & BLOCK_IN_SWITCH))
3689 {
3690 /*
3691 We care only for the blocks for which flushing was not
3692 initiated by another thread and which are not in eviction.
3693 Registering a request on the block unlinks it from the LRU
3694 ring and protects against eviction.
3695 */
3696 reg_requests(keycache, block, 1);
3697 if (type != FLUSH_IGNORE_CHANGED)
3698 {
3699 /* It's not a temporary file */
3700 if (pos == end)
3701 {
3702 /*
3703 This should happen relatively seldom. Remove the
3704 request because we won't do anything with the block
3705 but restart and pick it again in the next iteration.
3706 */
3707 unreg_request(keycache, block, 0);
3708 /*
3709 This happens only if there is not enough
3710 memory for the big block
3711 */
3712 if ((error= flush_cached_blocks(keycache, file, cache,
3713 end,type)))
3714 {
3715 /* Do not loop infinitely trying to flush in vain. */
3716 if ((last_errno == error) && (++last_errcnt > 5))
3717 goto err;
3718 last_errno= error;
3719 }
3720 /*
3721 Restart the scan as some other thread might have changed
3722 the changed blocks chain: the blocks that were in switch
3723 state before the flush started have to be excluded
3724 */
3725 goto restart;
3726 }
3727 /*
3728 Mark the block with BLOCK_IN_FLUSH in order not to let
3729 other threads to use it for new pages and interfere with
3730 our sequence of flushing dirty file pages. We must not
3731 set this flag before actually putting the block on the
3732 write burst array called 'cache'.
3733 */
3734 block->status|= BLOCK_IN_FLUSH;
3735 /* Add block to the array for a write burst. */
3736 *pos++= block;
3737 }
3738 else
3739 {
3740 /* It's a temporary file */
3741 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3742 /*
3743 free_block() must not be called with BLOCK_CHANGED. Note
3744 that we must not change the BLOCK_CHANGED flag outside of
3745 link_to_file_list() so that it is always in the correct
3746 queue and the *blocks_changed counters are correct.
3747 */
3748 link_to_file_list(keycache, block, file, 1);
3749 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3750 {
3751 /* A request has been registered against the block above. */
3752 free_block(keycache, block);
3753 }
3754 else
3755 {
3756 /*
3757 Link the block into the LRU ring if it's the last
3758 submitted request for the block. This enables eviction
3759 for the block. A request has been registered against
3760 the block above.
3761 */
3762 unreg_request(keycache, block, 1);
3763 }
3764 }
3765 }
3766 else
3767 {
3768 /*
3769 Link the block into a list of blocks 'in switch'.
3770
3771 WARNING: Here we introduce a place where a changed block
3772 is not in the changed_blocks hash! This is acceptable for
3773 a BLOCK_IN_SWITCH. Never try this for another situation.
3774 Other parts of the key cache code rely on changed blocks
3775 being in the changed_blocks hash.
3776 */
3777 unlink_changed(block);
3778 link_changed(block, &first_in_switch);
3779 }
3780 }
3781 else if (type != FLUSH_KEEP)
3782 {
3783 /*
3784 During the normal flush at end of statement (FLUSH_KEEP) we
3785 do not need to ensure that blocks in flush or update by
3786 other threads are flushed. They will be flushed by them
3787 later. In all other cases we must assure that we do not have
3788 any changed block of this file in the cache when this
3789 function returns.
3790 */
3791 if (block->status & BLOCK_IN_FLUSH)
3792 {
3793 /* Remember the last block found to be in flush. */
3794 last_in_flush= block;
3795 }
3796 else
3797 {
3798 /* Remember the last block found to be selected for update. */
3799 last_for_update= block;
3800 }
3801 }
3802 }
3803 }
3804 if (pos != cache)
3805 {
3806 if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
3807 {
3808 /* Do not loop inifnitely trying to flush in vain. */
3809 if ((last_errno == error) && (++last_errcnt > 5))
3810 goto err;
3811 last_errno= error;
3812 }
3813 /*
3814 Do not restart here during the normal flush at end of statement
3815 (FLUSH_KEEP). We have now flushed at least all blocks that were
3816 changed when entering this function. In all other cases we must
3817 assure that we do not have any changed block of this file in the
3818 cache when this function returns.
3819 */
3820 if (type != FLUSH_KEEP)
3821 goto restart;
3822 }
3823 if (last_in_flush)
3824 {
3825 /*
3826 There are no blocks to be flushed by this thread, but blocks in
3827 flush by other threads. Wait until one of the blocks is flushed.
3828 Re-check the condition for last_in_flush. We may have unlocked
3829 the cache_lock in flush_cached_blocks(). The state of the block
3830 could have changed.
3831 */
3832 if (last_in_flush->status & BLOCK_IN_FLUSH)
3833 wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3834 &keycache->cache_lock);
3835 /* Be sure not to lose a block. They may be flushed in random order. */
3836 goto restart;
3837 }
3838 if (last_for_update)
3839 {
3840 /*
3841 There are no blocks to be flushed by this thread, but blocks for
3842 update by other threads. Wait until one of the blocks is updated.
3843 Re-check the condition for last_for_update. We may have unlocked
3844 the cache_lock in flush_cached_blocks(). The state of the block
3845 could have changed.
3846 */
3847 if (last_for_update->status & BLOCK_FOR_UPDATE)
3848 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3849 &keycache->cache_lock);
3850 /* The block is now changed. Flush it. */
3851 goto restart;
3852 }
3853
3854 /*
3855 Wait until the list of blocks in switch is empty. The threads that
3856 are switching these blocks will relink them to clean file chains
3857 while we wait and thus empty the 'first_in_switch' chain.
3858 */
3859 while (first_in_switch)
3860 {
3861 #if defined(KEYCACHE_DEBUG)
3862 cnt= 0;
3863 #endif
3864 wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3865 &keycache->cache_lock);
3866 #if defined(KEYCACHE_DEBUG)
3867 cnt++;
3868 KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3869 #endif
3870 /*
3871 Do not restart here. We have flushed all blocks that were
3872 changed when entering this function and were not marked for
3873 eviction. Other threads have now flushed all remaining blocks in
3874 the course of their eviction.
3875 */
3876 }
3877
3878 if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3879 {
3880 BLOCK_LINK *last_for_update= NULL;
3881 BLOCK_LINK *last_in_switch= NULL;
3882 uint total_found= 0;
3883 uint found;
3884
3885 /*
3886 Finally free all clean blocks for this file.
3887 During resize this may be run by two threads in parallel.
3888 */
3889 do
3890 {
3891 found= 0;
3892 for (block= keycache->file_blocks[FILE_HASH(file)] ;
3893 block ;
3894 block= next)
3895 {
3896 /* Remember the next block. After freeing we cannot get at it. */
3897 next= block->next_changed;
3898
3899 /* Changed blocks cannot appear in the file_blocks hash. */
3900 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
3901 if (block->hash_link->file == file)
3902 {
3903 /* We must skip blocks that will be changed. */
3904 if (block->status & BLOCK_FOR_UPDATE)
3905 {
3906 last_for_update= block;
3907 continue;
3908 }
3909
3910 /*
3911 We must not free blocks in eviction (BLOCK_IN_EVICTION |
3912 BLOCK_IN_SWITCH) or blocks intended to be freed
3913 (BLOCK_REASSIGNED).
3914 */
3915 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3916 BLOCK_REASSIGNED)))
3917 {
3918 struct st_hash_link *UNINIT_VAR(next_hash_link);
3919 my_off_t UNINIT_VAR(next_diskpos);
3920 File UNINIT_VAR(next_file);
3921 uint UNINIT_VAR(next_status);
3922 uint UNINIT_VAR(hash_requests);
3923
3924 total_found++;
3925 found++;
3926 KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used);
3927
3928 /*
3929 Register a request. This unlinks the block from the LRU
3930 ring and protects it against eviction. This is required
3931 by free_block().
3932 */
3933 reg_requests(keycache, block, 1);
3934
3935 /*
3936 free_block() may need to wait for readers of the block.
3937 This is the moment where the other thread can move the
3938 'next' block from the chain. free_block() needs to wait
3939 if there are requests for the block pending.
3940 */
3941 if (next && (hash_requests= block->hash_link->requests))
3942 {
3943 /* Copy values from the 'next' block and its hash_link. */
3944 next_status= next->status;
3945 next_hash_link= next->hash_link;
3946 next_diskpos= next_hash_link->diskpos;
3947 next_file= next_hash_link->file;
3948 DBUG_ASSERT(next == next_hash_link->block);
3949 }
3950
3951 free_block(keycache, block);
3952 /*
3953 If we had to wait and the state of the 'next' block
3954 changed, break the inner loop. 'next' may no longer be
3955 part of the current chain.
3956
3957 We do not want to break the loop after every free_block(),
3958 not even only after waits. The chain might be quite long
3959 and contain blocks for many files. Traversing it again and
3960 again to find more blocks for this file could become quite
3961 inefficient.
3962 */
3963 if (next && hash_requests &&
3964 ((next_status != next->status) ||
3965 (next_hash_link != next->hash_link) ||
3966 (next_file != next_hash_link->file) ||
3967 (next_diskpos != next_hash_link->diskpos) ||
3968 (next != next_hash_link->block)))
3969 break;
3970 }
3971 else
3972 {
3973 last_in_switch= block;
3974 }
3975 }
3976 } /* end for block in file_blocks */
3977 } while (found);
3978
3979 /*
3980 If any clean block has been found, we may have waited for it to
3981 become free. In this case it could be possible that another clean
3982 block became dirty. This is possible if the write request existed
3983 before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3984 */
3985 if (total_found)
3986 goto restart;
3987
3988 /*
3989 To avoid an infinite loop, wait until one of the blocks marked
3990 for update is updated.
3991 */
3992 if (last_for_update)
3993 {
3994 /* We did not wait. Block must not have changed status. */
3995 DBUG_ASSERT(last_for_update->status & BLOCK_FOR_UPDATE);
3996 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3997 &keycache->cache_lock);
3998 goto restart;
3999 }
4000
4001 /*
4002 To avoid an infinite loop wait until one of the blocks marked
4003 for eviction is switched.
4004 */
4005 if (last_in_switch)
4006 {
4007 /* We did not wait. Block must not have changed status. */
4008 DBUG_ASSERT(last_in_switch->status & (BLOCK_IN_EVICTION |
4009 BLOCK_IN_SWITCH |
4010 BLOCK_REASSIGNED));
4011 wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
4012 &keycache->cache_lock);
4013 goto restart;
4014 }
4015
4016 } /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
4017
4018 } /* if (keycache->disk_blocks > 0 */
4019
4020 #ifndef DBUG_OFF
4021 DBUG_EXECUTE("check_keycache",
4022 test_key_cache(keycache, "end of flush_key_blocks", 0););
4023 #endif
4024 err:
4025 if (cache != cache_buff)
4026 my_free(cache);
4027 if (last_errno)
4028 errno=last_errno; /* Return first error */
4029 DBUG_RETURN(last_errno != 0);
4030 }
4031
4032
4033 /*
4034 Flush all blocks for a file to disk
4035
4036 SYNOPSIS
4037
4038 flush_key_blocks()
4039 keycache pointer to a key cache data structure
4040 file handler for the file to flush to
4041 flush_type type of the flush
4042
4043 RETURN
4044 0 ok
4045 1 error
4046 */
4047
flush_key_blocks(KEY_CACHE * keycache,File file,enum flush_type type)4048 int flush_key_blocks(KEY_CACHE *keycache,
4049 File file, enum flush_type type)
4050 {
4051 int res= 0;
4052 DBUG_ENTER("flush_key_blocks");
4053 DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache));
4054
4055 if (!keycache->key_cache_inited)
4056 DBUG_RETURN(0);
4057
4058 keycache_pthread_mutex_lock(&keycache->cache_lock);
4059 /* While waiting for lock, keycache could have been ended. */
4060 if (keycache->disk_blocks > 0)
4061 {
4062 inc_counter_for_resize_op(keycache);
4063 res= flush_key_blocks_int(keycache, file, type);
4064 dec_counter_for_resize_op(keycache);
4065 }
4066 keycache_pthread_mutex_unlock(&keycache->cache_lock);
4067 DBUG_RETURN(res);
4068 }
4069
4070
4071 /*
4072 Flush all blocks in the key cache to disk.
4073
4074 SYNOPSIS
4075 flush_all_key_blocks()
4076 keycache pointer to key cache root structure
4077
4078 DESCRIPTION
4079
4080 Flushing of the whole key cache is done in two phases.
4081
4082 1. Flush all changed blocks, waiting for them if necessary. Loop
4083 until there is no changed block left in the cache.
4084
4085 2. Free all clean blocks. Normally this means free all blocks. The
4086 changed blocks were flushed in phase 1 and became clean. However we
4087 may need to wait for blocks that are read by other threads. While we
4088 wait, a clean block could become changed if that operation started
4089 before the resize operation started. To be safe we must restart at
4090 phase 1.
4091
4092 When we can run through the changed_blocks and file_blocks hashes
4093 without finding a block any more, then we are done.
4094
4095 Note that we hold keycache->cache_lock all the time unless we need
4096 to wait for something.
4097
4098 RETURN
4099 0 OK
4100 != 0 Error
4101 */
4102
flush_all_key_blocks(KEY_CACHE * keycache)4103 static int flush_all_key_blocks(KEY_CACHE *keycache)
4104 {
4105 BLOCK_LINK *block;
4106 uint total_found;
4107 uint found;
4108 uint idx;
4109 DBUG_ENTER("flush_all_key_blocks");
4110
4111 do
4112 {
4113 mysql_mutex_assert_owner(&keycache->cache_lock);
4114 total_found= 0;
4115
4116 /*
4117 Phase1: Flush all changed blocks, waiting for them if necessary.
4118 Loop until there is no changed block left in the cache.
4119 */
4120 do
4121 {
4122 found= 0;
4123 /* Step over the whole changed_blocks hash array. */
4124 for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4125 {
4126 /*
4127 If an array element is non-empty, use the first block from its
4128 chain to find a file for flush. All changed blocks for this
4129 file are flushed. So the same block will not appear at this
4130 place again with the next iteration. New writes for blocks are
4131 not accepted during the flush. If multiple files share the
4132 same hash bucket, one of them will be flushed per iteration
4133 of the outer loop of phase 1.
4134 */
4135 if ((block= keycache->changed_blocks[idx]))
4136 {
4137 found++;
4138 /*
4139 Flush dirty blocks but do not free them yet. They can be used
4140 for reading until all other blocks are flushed too.
4141 */
4142 if (flush_key_blocks_int(keycache, block->hash_link->file,
4143 FLUSH_FORCE_WRITE))
4144 DBUG_RETURN(1);
4145 }
4146 }
4147
4148 } while (found);
4149
4150 /*
4151 Phase 2: Free all clean blocks. Normally this means free all
4152 blocks. The changed blocks were flushed in phase 1 and became
4153 clean. However we may need to wait for blocks that are read by
4154 other threads. While we wait, a clean block could become changed
4155 if that operation started before the resize operation started. To
4156 be safe we must restart at phase 1.
4157 */
4158 do
4159 {
4160 found= 0;
4161 /* Step over the whole file_blocks hash array. */
4162 for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4163 {
4164 /*
4165 If an array element is non-empty, use the first block from its
4166 chain to find a file for flush. All blocks for this file are
4167 freed. So the same block will not appear at this place again
4168 with the next iteration. If multiple files share the
4169 same hash bucket, one of them will be flushed per iteration
4170 of the outer loop of phase 2.
4171 */
4172 if ((block= keycache->file_blocks[idx]))
4173 {
4174 total_found++;
4175 found++;
4176 if (flush_key_blocks_int(keycache, block->hash_link->file,
4177 FLUSH_RELEASE))
4178 DBUG_RETURN(1);
4179 }
4180 }
4181
4182 } while (found);
4183
4184 /*
4185 If any clean block has been found, we may have waited for it to
4186 become free. In this case it could be possible that another clean
4187 block became dirty. This is possible if the write request existed
4188 before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
4189 */
4190 } while (total_found);
4191
4192 #ifndef DBUG_OFF
4193 /* Now there should not exist any block any more. */
4194 for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4195 {
4196 DBUG_ASSERT(!keycache->changed_blocks[idx]);
4197 DBUG_ASSERT(!keycache->file_blocks[idx]);
4198 }
4199 #endif
4200
4201 DBUG_RETURN(0);
4202 }
4203
4204
4205 /*
4206 Reset the counters of a key cache.
4207
4208 SYNOPSIS
4209 reset_key_cache_counters()
4210 name the name of a key cache
4211 key_cache pointer to the key kache to be reset
4212
4213 DESCRIPTION
4214 This procedure is used by process_key_caches() to reset the counters of all
4215 currently used key caches, both the default one and the named ones.
4216
4217 RETURN
4218 0 on success (always because it can't fail)
4219 */
4220
reset_key_cache_counters(const char * name MY_ATTRIBUTE ((unused)),KEY_CACHE * key_cache)4221 int reset_key_cache_counters(const char *name MY_ATTRIBUTE((unused)),
4222 KEY_CACHE *key_cache)
4223 {
4224 DBUG_ENTER("reset_key_cache_counters");
4225 if (!key_cache->key_cache_inited)
4226 {
4227 DBUG_PRINT("info", ("Key cache %s not initialized.", name));
4228 DBUG_RETURN(0);
4229 }
4230 DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
4231
4232 key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
4233 key_cache->global_cache_r_requests= 0; /* Key_read_requests */
4234 key_cache->global_cache_read= 0; /* Key_reads */
4235 key_cache->global_cache_w_requests= 0; /* Key_write_requests */
4236 key_cache->global_cache_write= 0; /* Key_writes */
4237 DBUG_RETURN(0);
4238 }
4239
4240
4241 #ifndef DBUG_OFF
4242 /*
4243 Test if disk-cache is ok
4244 */
test_key_cache(KEY_CACHE * keycache MY_ATTRIBUTE ((unused)),const char * where MY_ATTRIBUTE ((unused)),my_bool lock MY_ATTRIBUTE ((unused)))4245 static void test_key_cache(KEY_CACHE *keycache MY_ATTRIBUTE((unused)),
4246 const char *where MY_ATTRIBUTE((unused)),
4247 my_bool lock MY_ATTRIBUTE((unused)))
4248 {
4249 /* TODO */
4250 }
4251 #endif
4252
4253 #if defined(KEYCACHE_TIMEOUT)
4254
4255 #define KEYCACHE_DUMP_FILE "keycache_dump.txt"
4256 #define MAX_QUEUE_LEN 100
4257
4258
keycache_dump(KEY_CACHE * keycache)4259 static void keycache_dump(KEY_CACHE *keycache)
4260 {
4261 FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
4262 struct st_my_thread_var *last;
4263 struct st_my_thread_var *thread;
4264 BLOCK_LINK *block;
4265 HASH_LINK *hash_link;
4266 KEYCACHE_PAGE *page;
4267 uint i;
4268
4269 fprintf(keycache_dump_file, "thread:%u\n", thread->id);
4270
4271 i=0;
4272 thread=last=waiting_for_hash_link.last_thread;
4273 fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
4274 if (thread)
4275 do
4276 {
4277 thread=thread->next;
4278 page= (KEYCACHE_PAGE *) thread->opt_info;
4279 fprintf(keycache_dump_file,
4280 "thread:%u, (file,filepos)=(%u,%lu)\n",
4281 thread->id,(uint) page->file,(ulong) page->filepos);
4282 if (++i == MAX_QUEUE_LEN)
4283 break;
4284 }
4285 while (thread != last);
4286
4287 i=0;
4288 thread=last=waiting_for_block.last_thread;
4289 fprintf(keycache_dump_file, "queue of threads waiting for block\n");
4290 if (thread)
4291 do
4292 {
4293 thread=thread->next;
4294 hash_link= (HASH_LINK *) thread->opt_info;
4295 fprintf(keycache_dump_file,
4296 "thread:%u hash_link:%u (file,filepos)=(%u,%lu)\n",
4297 thread->id, (uint) HASH_LINK_NUMBER(hash_link),
4298 (uint) hash_link->file,(ulong) hash_link->diskpos);
4299 if (++i == MAX_QUEUE_LEN)
4300 break;
4301 }
4302 while (thread != last);
4303
4304 for (i=0 ; i< keycache->blocks_used ; i++)
4305 {
4306 int j;
4307 block= &keycache->block_root[i];
4308 hash_link= block->hash_link;
4309 fprintf(keycache_dump_file,
4310 "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
4311 i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1),
4312 block->status, block->requests, block->condvar ? 1 : 0);
4313 for (j=0 ; j < 2; j++)
4314 {
4315 KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
4316 thread= last= wqueue->last_thread;
4317 fprintf(keycache_dump_file, "queue #%d\n", j);
4318 if (thread)
4319 {
4320 do
4321 {
4322 thread=thread->next;
4323 fprintf(keycache_dump_file,
4324 "thread:%u\n", thread->id);
4325 if (++i == MAX_QUEUE_LEN)
4326 break;
4327 }
4328 while (thread != last);
4329 }
4330 }
4331 }
4332 fprintf(keycache_dump_file, "LRU chain:");
4333 block= keycache= used_last;
4334 if (block)
4335 {
4336 do
4337 {
4338 block= block->next_used;
4339 fprintf(keycache_dump_file,
4340 "block:%u, ", BLOCK_NUMBER(block));
4341 }
4342 while (block != keycache->used_last);
4343 }
4344 fprintf(keycache_dump_file, "\n");
4345
4346 fclose(keycache_dump_file);
4347 }
4348
4349 #endif /* defined(KEYCACHE_TIMEOUT) */
4350
4351 #if defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)
4352
4353
keycache_pthread_cond_wait(mysql_cond_t * cond,mysql_mutex_t * mutex)4354 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4355 mysql_mutex_t *mutex)
4356 {
4357 int rc;
4358 struct timeval now; /* time when we started waiting */
4359 struct timespec timeout; /* timeout value for the wait function */
4360 struct timezone tz;
4361 #if defined(KEYCACHE_DEBUG)
4362 int cnt=0;
4363 #endif
4364
4365 /* Get current time */
4366 gettimeofday(&now, &tz);
4367 /* Prepare timeout value */
4368 timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
4369 /*
4370 timeval uses microseconds.
4371 timespec uses nanoseconds.
4372 1 nanosecond = 1000 micro seconds
4373 */
4374 timeout.tv_nsec= now.tv_usec * 1000;
4375 KEYCACHE_THREAD_TRACE_END("started waiting");
4376 #if defined(KEYCACHE_DEBUG)
4377 cnt++;
4378 if (cnt % 100 == 0)
4379 fprintf(keycache_debug_log, "waiting...\n");
4380 fflush(keycache_debug_log);
4381 #endif
4382 rc= mysql_cond_timedwait(cond, mutex, &timeout);
4383 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4384 if (rc == ETIMEDOUT || rc == ETIME)
4385 {
4386 #if defined(KEYCACHE_DEBUG)
4387 fprintf(keycache_debug_log,"aborted by keycache timeout\n");
4388 fclose(keycache_debug_log);
4389 abort();
4390 #endif
4391 keycache_dump();
4392 }
4393
4394 #if defined(KEYCACHE_DEBUG)
4395 KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
4396 #else
4397 assert(rc != ETIMEDOUT);
4398 #endif
4399 return rc;
4400 }
4401 #else
4402 #if defined(KEYCACHE_DEBUG)
keycache_pthread_cond_wait(mysql_cond_t * cond,mysql_mutex_t * mutex)4403 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4404 mysql_mutex_t *mutex)
4405 {
4406 int rc;
4407 KEYCACHE_THREAD_TRACE_END("started waiting");
4408 rc= mysql_cond_wait(cond, mutex);
4409 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4410 return rc;
4411 }
4412 #endif
4413 #endif /* defined(KEYCACHE_TIMEOUT) && !defined(__WIN__) */
4414
4415 #if defined(KEYCACHE_DEBUG)
4416
4417
keycache_pthread_mutex_lock(mysql_mutex_t * mutex)4418 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex)
4419 {
4420 int rc;
4421 rc= mysql_mutex_lock(mutex);
4422 KEYCACHE_THREAD_TRACE_BEGIN("");
4423 return rc;
4424 }
4425
4426
keycache_pthread_mutex_unlock(mysql_mutex_t * mutex)4427 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex)
4428 {
4429 KEYCACHE_THREAD_TRACE_END("");
4430 mysql_mutex_unlock(mutex);
4431 }
4432
4433
keycache_pthread_cond_signal(mysql_cond_t * cond)4434 static int keycache_pthread_cond_signal(mysql_cond_t *cond)
4435 {
4436 int rc;
4437 KEYCACHE_THREAD_TRACE("signal");
4438 rc= mysql_cond_signal(cond);
4439 return rc;
4440 }
4441
4442
4443 #if defined(KEYCACHE_DEBUG_LOG)
4444
4445
keycache_debug_print(const char * fmt,...)4446 static void keycache_debug_print(const char * fmt,...)
4447 {
4448 va_list args;
4449 va_start(args,fmt);
4450 if (keycache_debug_log)
4451 {
4452 (void) vfprintf(keycache_debug_log, fmt, args);
4453 (void) fputc('\n',keycache_debug_log);
4454 }
4455 va_end(args);
4456 }
4457 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4458
4459 #if defined(KEYCACHE_DEBUG_LOG)
4460
4461
keycache_debug_log_close(void)4462 void keycache_debug_log_close(void)
4463 {
4464 if (keycache_debug_log)
4465 fclose(keycache_debug_log);
4466 }
4467 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4468
4469 #endif /* defined(KEYCACHE_DEBUG) */
4470
4471 #if !defined(DBUG_OFF)
4472 #define F_B_PRT(_f_, _v_) DBUG_PRINT("assert_fail", (_f_, _v_))
4473
fail_block(BLOCK_LINK * block)4474 static int fail_block(BLOCK_LINK *block)
4475 {
4476 F_B_PRT("block->next_used: %lx\n", (ulong) block->next_used);
4477 F_B_PRT("block->prev_used: %lx\n", (ulong) block->prev_used);
4478 F_B_PRT("block->next_changed: %lx\n", (ulong) block->next_changed);
4479 F_B_PRT("block->prev_changed: %lx\n", (ulong) block->prev_changed);
4480 F_B_PRT("block->hash_link: %lx\n", (ulong) block->hash_link);
4481 F_B_PRT("block->status: %u\n", block->status);
4482 F_B_PRT("block->length: %u\n", block->length);
4483 F_B_PRT("block->offset: %u\n", block->offset);
4484 F_B_PRT("block->requests: %u\n", block->requests);
4485 F_B_PRT("block->temperature: %u\n", block->temperature);
4486 return 0; /* Let the assert fail. */
4487 }
4488
fail_hlink(HASH_LINK * hlink)4489 static int fail_hlink(HASH_LINK *hlink)
4490 {
4491 F_B_PRT("hlink->next: %lx\n", (ulong) hlink->next);
4492 F_B_PRT("hlink->prev: %lx\n", (ulong) hlink->prev);
4493 F_B_PRT("hlink->block: %lx\n", (ulong) hlink->block);
4494 F_B_PRT("hlink->diskpos: %lu\n", (ulong) hlink->diskpos);
4495 F_B_PRT("hlink->file: %d\n", hlink->file);
4496 return 0; /* Let the assert fail. */
4497 }
4498
cache_empty(KEY_CACHE * keycache)4499 static int cache_empty(KEY_CACHE *keycache)
4500 {
4501 int errcnt= 0;
4502 int idx;
4503 if (keycache->disk_blocks <= 0)
4504 return 1;
4505 for (idx= 0; idx < keycache->disk_blocks; idx++)
4506 {
4507 BLOCK_LINK *block= keycache->block_root + idx;
4508 if (block->status || block->requests || block->hash_link)
4509 {
4510 fprintf(stderr, "block index: %u\n", idx);
4511 fail_block(block);
4512 errcnt++;
4513 }
4514 }
4515 for (idx= 0; idx < keycache->hash_links; idx++)
4516 {
4517 HASH_LINK *hash_link= keycache->hash_link_root + idx;
4518 if (hash_link->requests || hash_link->block)
4519 {
4520 fprintf(stderr, "hash_link index: %u\n", idx);
4521 fail_hlink(hash_link);
4522 errcnt++;
4523 }
4524 }
4525 if (errcnt)
4526 {
4527 fprintf(stderr, "blocks: %d used: %lu\n",
4528 keycache->disk_blocks, keycache->blocks_used);
4529 fprintf(stderr, "hash_links: %d used: %d\n",
4530 keycache->hash_links, keycache->hash_links_used);
4531 fprintf(stderr, "\n");
4532 }
4533 return !errcnt;
4534 }
4535 #endif
4536
4537