1 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
2 Copyright (c) 2017, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
16
17
18
19 /**
20 @file
21 The file contains the following modules:
22
23 Simple Key Cache Module
24
25 Partitioned Key Cache Module
26
27 Key Cache Interface Module
28
29 */
30
31 #include "mysys_priv.h"
32 #include "mysys_err.h"
33 #include <keycache.h>
34 #include "my_static.h"
35 #include <m_string.h>
36 #include <my_bit.h>
37 #include <errno.h>
38 #include <stdarg.h>
39 #include "probes_mysql.h"
40
41 /******************************************************************************
42 Simple Key Cache Module
43
44 The module contains implementations of all key cache interface functions
45 employed by partitioned key caches.
46
47 ******************************************************************************/
48
49 /*
50 These functions handle keyblock cacheing for ISAM and MyISAM tables.
51
52 One cache can handle many files.
53 It must contain buffers of the same blocksize.
54
55 init_key_cache() should be used to init cache handler.
56
57 The free list (free_block_list) is a stack like structure.
58 When a block is freed by free_block(), it is pushed onto the stack.
59 When a new block is required it is first tried to pop one from the stack.
60 If the stack is empty, it is tried to get a never-used block from the pool.
61 If this is empty too, then a block is taken from the LRU ring, flushing it
62 to disk, if necessary. This is handled in find_key_block().
63 With the new free list, the blocks can have three temperatures:
64 hot, warm and cold (which is free). This is remembered in the block header
65 by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
66 temperature is necessary to correctly count the number of warm blocks,
67 which is required to decide when blocks are allowed to become hot. Whenever
68 a block is inserted to another (sub-)chain, we take the old and new
69 temperature into account to decide if we got one more or less warm block.
70 blocks_unused is the sum of never used blocks in the pool and of currently
71 free blocks. blocks_used is the number of blocks fetched from the pool and
72 as such gives the maximum number of in-use blocks at any time.
73
74 Key Cache Locking
75 =================
76
77 All key cache locking is done with a single mutex per key cache:
78 keycache->cache_lock. This mutex is locked almost all the time
79 when executing code in this file (mf_keycache.c).
80 However it is released for I/O and some copy operations.
81
82 The cache_lock is also released when waiting for some event. Waiting
83 and signalling is done via condition variables. In most cases the
84 thread waits on its thread->suspend condition variable. Every thread
85 has a my_thread_var structure, which contains this variable and a
86 '*next' and '**prev' pointer. These pointers are used to insert the
87 thread into a wait queue.
88
89 A thread can wait for one block and thus be in one wait queue at a
90 time only.
91
92 Before starting to wait on its condition variable with
93 mysql_cond_wait(), the thread enters itself to a specific wait queue
94 with link_into_queue() (double linked with '*next' + '**prev') or
95 wait_on_queue() (single linked with '*next').
96
97 Another thread, when releasing a resource, looks up the waiting thread
98 in the related wait queue. It sends a signal with
99 mysql_cond_signal() to the waiting thread.
100
101 NOTE: Depending on the particular wait situation, either the sending
102 thread removes the waiting thread from the wait queue with
103 unlink_from_queue() or release_whole_queue() respectively, or the waiting
104 thread removes itself.
105
106 There is one exception from this locking scheme when one thread wants
107 to reuse a block for some other address. This works by first marking
108 the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all
109 threads that are reading the block to finish. Each block has a
110 reference to a condition variable (condvar). It holds a reference to
111 the thread->suspend condition variable for the waiting thread (if such
112 a thread exists). When that thread is signaled, the reference is
113 cleared. The number of readers of a block is registered in
114 block->hash_link->requests. See wait_for_readers() / remove_reader()
115 for details. This is similar to the above, but it clearly means that
116 only one thread can wait for a particular block. There is no queue in
117 this case. Strangely enough block->convar is used for waiting for the
118 assigned hash_link only. More precisely it is used to wait for all
119 requests to be unregistered from the assigned hash_link.
120
121 The resize_queue serves two purposes:
122 1. Threads that want to do a resize wait there if in_resize is set.
123 This is not used in the server. The server refuses a second resize
124 request if one is already active. keycache->in_init is used for the
125 synchronization. See set_var.cc.
126 2. Threads that want to access blocks during resize wait here during
127 the re-initialization phase.
128 When the resize is done, all threads on the queue are signalled.
129 Hypothetical resizers can compete for resizing, and read/write
130 requests will restart to request blocks from the freshly resized
131 cache. If the cache has been resized too small, it is disabled and
132 'can_be_used' is false. In this case read/write requests bypass the
133 cache. Since they increment and decrement 'cnt_for_resize_op', the
134 next resizer can wait on the queue 'waiting_for_resize_cnt' until all
135 I/O finished.
136 */
137
138 /* declare structures that is used by st_key_cache */
139
140 struct st_block_link;
141 typedef struct st_block_link BLOCK_LINK;
142 struct st_keycache_page;
143 typedef struct st_keycache_page KEYCACHE_PAGE;
144 struct st_hash_link;
145 typedef struct st_hash_link HASH_LINK;
146
147 /* info about requests in a waiting queue */
148 typedef struct st_keycache_wqueue
149 {
150 struct st_my_thread_var *last_thread; /* circular list of waiting threads */
151 } KEYCACHE_WQUEUE;
152
153 /* Default size of hash for changed files */
154 #define MIN_CHANGED_BLOCKS_HASH_SIZE 128
155
156 /* Control block for a simple (non-partitioned) key cache */
157
158 typedef struct st_simple_key_cache_cb
159 {
160 my_bool key_cache_inited; /* <=> control block is allocated */
161 my_bool in_resize; /* true during resize operation */
162 my_bool resize_in_flush; /* true during flush of resize operation */
163 my_bool can_be_used; /* usage of cache for read/write is allowed */
164 size_t key_cache_mem_size; /* specified size of the cache memory */
165 size_t allocated_mem_size; /* size of the memory actually allocated */
166 uint key_cache_block_size; /* size of the page buffer of a cache block */
167 size_t min_warm_blocks; /* min number of warm blocks; */
168 size_t age_threshold; /* age threshold for hot blocks */
169 ulonglong keycache_time; /* total number of block link operations */
170 uint hash_entries; /* max number of entries in the hash table */
171 uint changed_blocks_hash_size; /* Number of hash buckets for file blocks */
172 int hash_links; /* max number of hash links */
173 int hash_links_used; /* number of hash links currently used */
174 int disk_blocks; /* max number of blocks in the cache */
175 size_t blocks_used; /* maximum number of concurrently used blocks */
176 size_t blocks_unused; /* number of currently unused blocks */
177 size_t blocks_changed; /* number of currently dirty blocks */
178 size_t warm_blocks; /* number of blocks in warm sub-chain */
179 ulong cnt_for_resize_op; /* counter to block resize operation */
180 long blocks_available; /* number of blocks available in the LRU chain */
181 HASH_LINK **hash_root; /* arr. of entries into hash table buckets */
182 HASH_LINK *hash_link_root; /* memory for hash table links */
183 HASH_LINK *free_hash_list; /* list of free hash links */
184 BLOCK_LINK *free_block_list; /* list of free blocks */
185 BLOCK_LINK *block_root; /* memory for block links */
186 uchar *block_mem; /* memory for block buffers */
187 BLOCK_LINK *used_last; /* ptr to the last block of the LRU chain */
188 BLOCK_LINK *used_ins; /* ptr to the insertion block in LRU chain */
189 mysql_mutex_t cache_lock; /* to lock access to the cache structure */
190 KEYCACHE_WQUEUE resize_queue; /* threads waiting during resize operation */
191 /*
192 Waiting for a zero resize count. Using a queue for symmetry though
193 only one thread can wait here.
194 */
195 KEYCACHE_WQUEUE waiting_for_resize_cnt;
196 KEYCACHE_WQUEUE waiting_for_hash_link; /* waiting for a free hash link */
197 KEYCACHE_WQUEUE waiting_for_block; /* requests waiting for a free block */
198 BLOCK_LINK **changed_blocks; /* hash for dirty file bl.*/
199 BLOCK_LINK **file_blocks; /* hash for other file bl.*/
200
201 /* Statistics variables. These are reset in reset_key_cache_counters(). */
202 ulong global_blocks_changed; /* number of currently dirty blocks */
203 ulonglong global_cache_w_requests;/* number of write requests (write hits) */
204 ulonglong global_cache_write; /* number of writes from cache to files */
205 ulonglong global_cache_r_requests;/* number of read requests (read hits) */
206 ulonglong global_cache_read; /* number of reads from files to cache */
207
208 int blocks; /* max number of blocks in the cache */
209 uint hash_factor; /* factor used to calculate hash function */
210 my_bool in_init; /* Set to 1 in MySQL during init/resize */
211 } SIMPLE_KEY_CACHE_CB;
212
213 /*
214 Some compilation flags have been added specifically for this module
215 to control the following:
216 - not to let a thread to yield the control when reading directly
217 from key cache, which might improve performance in many cases;
218 to enable this add:
219 #define SERIALIZED_READ_FROM_CACHE
220 - to set an upper bound for number of threads simultaneously
221 using the key cache; this setting helps to determine an optimal
222 size for hash table and improve performance when the number of
223 blocks in the key cache much less than the number of threads
224 accessing it;
225 to set this number equal to <N> add
226 #define MAX_THREADS <N>
227 - to substitute calls of mysql_cond_wait for calls of
228 mysql_cond_timedwait (wait with timeout set up);
229 this setting should be used only when you want to trap a deadlock
230 situation, which theoretically should not happen;
231 to set timeout equal to <T> seconds add
232 #define KEYCACHE_TIMEOUT <T>
233 - to enable the module traps and to send debug information from
234 key cache module to a special debug log add:
235 #define KEYCACHE_DEBUG
236 the name of this debug log file <LOG NAME> can be set through:
237 #define KEYCACHE_DEBUG_LOG <LOG NAME>
238 if the name is not defined, it's set by default;
239 if the KEYCACHE_DEBUG flag is not set up and we are in a debug
240 mode, i.e. when ! defined(DBUG_OFF), the debug information from the
241 module is sent to the regular debug log.
242
243 Example of the settings:
244 #define SERIALIZED_READ_FROM_CACHE
245 #define MAX_THREADS 100
246 #define KEYCACHE_TIMEOUT 1
247 #define KEYCACHE_DEBUG
248 #define KEYCACHE_DEBUG_LOG "my_key_cache_debug.log"
249 */
250
251 #define STRUCT_PTR(TYPE, MEMBER, a) \
252 (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
253
254 /* types of condition variables */
255 #define COND_FOR_REQUESTED 0
256 #define COND_FOR_SAVED 1
257 #define COND_FOR_READERS 2
258
259 typedef mysql_cond_t KEYCACHE_CONDVAR;
260
261 /* descriptor of the page in the key cache block buffer */
262 struct st_keycache_page
263 {
264 int file; /* file to which the page belongs to */
265 my_off_t filepos; /* position of the page in the file */
266 };
267
268 /* element in the chain of a hash table bucket */
269 struct st_hash_link
270 {
271 struct st_hash_link *next, **prev; /* to connect links in the same bucket */
272 struct st_block_link *block; /* reference to the block for the page: */
273 File file; /* from such a file */
274 my_off_t diskpos; /* with such an offset */
275 uint requests; /* number of requests for the page */
276 };
277
278 /* simple states of a block */
279 #define BLOCK_ERROR 1U/* an error occurred when performing file i/o */
280 #define BLOCK_READ 2U/* file block is in the block buffer */
281 #define BLOCK_IN_SWITCH 4U/* block is preparing to read new page */
282 #define BLOCK_REASSIGNED 8U/* blk does not accept requests for old page */
283 #define BLOCK_IN_FLUSH 16U/* block is selected for flush */
284 #define BLOCK_CHANGED 32U/* block buffer contains a dirty page */
285 #define BLOCK_IN_USE 64U/* block is not free */
286 #define BLOCK_IN_EVICTION 128U/* block is selected for eviction */
287 #define BLOCK_IN_FLUSHWRITE 256U/* block is in write to file */
288 #define BLOCK_FOR_UPDATE 512U/* block is selected for buffer modification */
289
290 /* page status, returned by find_key_block */
291 #define PAGE_READ 0
292 #define PAGE_TO_BE_READ 1
293 #define PAGE_WAIT_TO_BE_READ 2
294
295 /* block temperature determines in which (sub-)chain the block currently is */
296 enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };
297
298 /* key cache block */
299 struct st_block_link
300 {
301 struct st_block_link
302 *next_used, **prev_used; /* to connect links in the LRU chain (ring) */
303 struct st_block_link
304 *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
305 struct st_hash_link *hash_link; /* backward ptr to referring hash_link */
306 KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */
307 uint requests; /* number of requests for the block */
308 uchar *buffer; /* buffer for the block page */
309 uint offset; /* beginning of modified data in the buffer */
310 uint length; /* end of data in the buffer */
311 uint status; /* state of the block */
312 enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
313 uint hits_left; /* number of hits left until promotion */
314 ulonglong last_hit_time; /* timestamp of the last hit */
315 KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */
316 };
317
318 KEY_CACHE dflt_key_cache_var;
319 KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;
320
321 #define FLUSH_CACHE 2000 /* sort this many blocks at once */
322
323 static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache);
324 static void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup);
325 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
326 mysql_mutex_t *mutex);
327 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue);
328 static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block);
329 #ifndef DBUG_OFF
330 static void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
331 const char *where, my_bool lock);
332 #endif
333 #define KEYCACHE_BASE_EXPR(f, pos) \
334 ((ulong) ((pos) / keycache->key_cache_block_size) + (ulong) (f))
335 #define KEYCACHE_HASH(f, pos) \
336 ((KEYCACHE_BASE_EXPR(f, pos) / keycache->hash_factor) & \
337 (keycache->hash_entries-1))
338 #define FILE_HASH(f, cache) ((uint) (f) & (cache->changed_blocks_hash_size-1))
339
340 #define DEFAULT_KEYCACHE_DEBUG_LOG "keycache_debug.log"
341
342 #if defined(KEYCACHE_DEBUG) && ! defined(KEYCACHE_DEBUG_LOG)
343 #define KEYCACHE_DEBUG_LOG DEFAULT_KEYCACHE_DEBUG_LOG
344 #endif
345
346 #if defined(KEYCACHE_DEBUG_LOG)
347 static FILE *keycache_debug_log=NULL;
348 static void keycache_debug_print(const char *fmt,...);
349 #define KEYCACHE_DEBUG_OPEN \
350 if (!keycache_debug_log) \
351 { \
352 keycache_debug_log= fopen(KEYCACHE_DEBUG_LOG, "w"); \
353 (void) setvbuf(keycache_debug_log, NULL, _IOLBF, BUFSIZ); \
354 }
355
356 #define KEYCACHE_DEBUG_CLOSE \
357 if (keycache_debug_log) \
358 { \
359 fclose(keycache_debug_log); \
360 keycache_debug_log= 0; \
361 }
362 #else
363 #define KEYCACHE_DEBUG_OPEN
364 #define KEYCACHE_DEBUG_CLOSE
365 #endif /* defined(KEYCACHE_DEBUG_LOG) */
366
367 #if defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG)
368 #define KEYCACHE_DBUG_PRINT(l, m) \
369 { if (keycache_debug_log) fprintf(keycache_debug_log, "%s: ", l); \
370 keycache_debug_print m; }
371
372 #define KEYCACHE_DBUG_ASSERT(a) \
373 { if (! (a) && keycache_debug_log) fclose(keycache_debug_log); \
374 assert(a); }
375 #else
376 #define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m)
377 #define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a)
378 #endif /* defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) */
379
380 #if defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF)
381 static long keycache_thread_id;
382 #define KEYCACHE_THREAD_TRACE(l) \
383 KEYCACHE_DBUG_PRINT(l,("|thread %ld",keycache_thread_id))
384
385 #define KEYCACHE_THREAD_TRACE_BEGIN(l) \
386 { struct st_my_thread_var *thread_var= my_thread_var; \
387 keycache_thread_id= thread_var->id; \
388 KEYCACHE_DBUG_PRINT(l,("[thread %ld",keycache_thread_id)) }
389
390 #define KEYCACHE_THREAD_TRACE_END(l) \
391 KEYCACHE_DBUG_PRINT(l,("]thread %ld",keycache_thread_id))
392 #else
393 #define KEYCACHE_THREAD_TRACE_BEGIN(l)
394 #define KEYCACHE_THREAD_TRACE_END(l)
395 #define KEYCACHE_THREAD_TRACE(l)
396 #endif /* defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF) */
397
398 #define BLOCK_NUMBER(b) \
399 ((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK)))
400 #define HASH_LINK_NUMBER(h) \
401 ((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK)))
402
403 #if (defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)) || defined(KEYCACHE_DEBUG)
404 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
405 mysql_mutex_t *mutex);
406 #else
407 #define keycache_pthread_cond_wait(C, M) mysql_cond_wait(C, M)
408 #endif
409
410 #if defined(KEYCACHE_DEBUG)
411 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex);
412 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex);
413 static int keycache_pthread_cond_signal(mysql_cond_t *cond);
414 #else
415 #define keycache_pthread_mutex_lock(M) mysql_mutex_lock(M)
416 #define keycache_pthread_mutex_unlock(M) mysql_mutex_unlock(M)
417 #define keycache_pthread_cond_signal(C) mysql_cond_signal(C)
418 #endif /* defined(KEYCACHE_DEBUG) */
419
420 #if !defined(DBUG_OFF)
421 #if defined(inline)
422 #undef inline
423 #endif
424 #define inline /* disabled inline for easier debugging */
425 static int fail_hlink(HASH_LINK *hlink);
426 static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache);
427 #endif
428 #ifdef DBUG_ASSERT_EXISTS
429 static int fail_block(BLOCK_LINK *block);
430 #endif
431
next_power(uint value)432 static inline uint next_power(uint value)
433 {
434 return (uint) my_round_up_to_next_power((uint32) value) << 1;
435 }
436
437
438 /*
439 Initialize a simple key cache
440
441 SYNOPSIS
442 init_simple_key_cache()
443 keycache pointer to the control block of a simple key cache
444 key_cache_block_size size of blocks to keep cached data
445 use_mem memory to use for the key cache buferrs/structures
446 division_limit division limit (may be zero)
447 age_threshold age threshold (may be zero)
448
449 DESCRIPTION
450 This function is the implementation of the init_key_cache interface
451 function that is employed by simple (non-partitioned) key caches.
452 The function builds a simple key cache and initializes the control block
453 structure of the type SIMPLE_KEY_CACHE_CB that is used for this key cache.
454 The parameter keycache is supposed to point to this structure.
455 The parameter key_cache_block_size specifies the size of the blocks in
456 the key cache to be built. The parameters division_limit and age_threshold
457 determine the initial values of those characteristics of the key cache
458 that are used for midpoint insertion strategy. The parameter use_mem
459 specifies the total amount of memory to be allocated for key cache blocks
460 and auxiliary structures.
461
462 RETURN VALUE
463 number of blocks in the key cache, if successful,
464 <= 0 - otherwise.
465
466 NOTES.
467 if keycache->key_cache_inited != 0 we assume that the key cache
468 is already initialized. This is for now used by myisamchk, but shouldn't
469 be something that a program should rely on!
470
471 It's assumed that no two threads call this function simultaneously
472 referring to the same key cache handle.
473 */
474
475 static
init_simple_key_cache(SIMPLE_KEY_CACHE_CB * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold,uint changed_blocks_hash_size)476 int init_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
477 uint key_cache_block_size,
478 size_t use_mem, uint division_limit,
479 uint age_threshold, uint changed_blocks_hash_size)
480 {
481 size_t blocks, hash_links;
482 size_t length;
483 int error;
484 DBUG_ENTER("init_simple_key_cache");
485 DBUG_ASSERT(key_cache_block_size >= 512);
486
487 KEYCACHE_DEBUG_OPEN;
488 if (keycache->key_cache_inited && keycache->disk_blocks > 0)
489 {
490 DBUG_PRINT("warning",("key cache already in use"));
491 DBUG_RETURN(0);
492 }
493
494 keycache->blocks_used= keycache->blocks_unused= 0;
495 keycache->global_blocks_changed= 0;
496 keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
497 keycache->global_cache_read= keycache->global_cache_write= 0;
498 keycache->disk_blocks= -1;
499 if (! keycache->key_cache_inited)
500 {
501 keycache->key_cache_inited= 1;
502 keycache->hash_factor= 1;
503 /*
504 Initialize these variables once only.
505 Their value must survive re-initialization during resizing.
506 */
507 keycache->in_resize= 0;
508 keycache->resize_in_flush= 0;
509 keycache->cnt_for_resize_op= 0;
510 keycache->waiting_for_resize_cnt.last_thread= NULL;
511 keycache->in_init= 0;
512 mysql_mutex_init(key_KEY_CACHE_cache_lock,
513 &keycache->cache_lock, MY_MUTEX_INIT_FAST);
514 keycache->resize_queue.last_thread= NULL;
515 }
516
517 keycache->key_cache_mem_size= use_mem;
518 keycache->key_cache_block_size= key_cache_block_size;
519 DBUG_PRINT("info", ("key_cache_block_size: %u",
520 key_cache_block_size));
521
522 blocks= use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
523 sizeof(HASH_LINK*) * 5/4 + key_cache_block_size);
524
525 /* Changed blocks hash needs to be a power of 2 */
526 changed_blocks_hash_size= my_round_up_to_next_power(MY_MAX(changed_blocks_hash_size,
527 MIN_CHANGED_BLOCKS_HASH_SIZE));
528
529 /* It doesn't make sense to have too few blocks (less than 8) */
530 if (blocks >= 8)
531 {
532 for ( ; ; )
533 {
534 /* Set my_hash_entries to the next bigger 2 power */
535 if ((keycache->hash_entries= next_power((uint)blocks)) < blocks * 5/4)
536 keycache->hash_entries<<= 1;
537 hash_links= 2 * blocks;
538 #if defined(MAX_THREADS)
539 if (hash_links < MAX_THREADS + blocks - 1)
540 hash_links= MAX_THREADS + blocks - 1;
541 #endif
542 while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
543 ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
544 ALIGN_SIZE(sizeof(HASH_LINK*) *
545 keycache->hash_entries) +
546 sizeof(BLOCK_LINK*)* ((size_t)changed_blocks_hash_size*2))) +
547 (blocks * keycache->key_cache_block_size) > use_mem && blocks > 8)
548 blocks--;
549 keycache->allocated_mem_size= blocks * keycache->key_cache_block_size;
550 if ((keycache->block_mem= my_large_malloc(&keycache->allocated_mem_size,
551 MYF(0))))
552 {
553 /*
554 Allocate memory for blocks, hash_links and hash entries;
555 For each block 2 hash links are allocated
556 */
557 if (my_multi_malloc_large(key_memory_KEY_CACHE, MYF(MY_ZEROFILL),
558 &keycache->block_root,
559 (ulonglong) (blocks * sizeof(BLOCK_LINK)),
560 &keycache->hash_root,
561 (ulonglong) (sizeof(HASH_LINK*) *
562 keycache->hash_entries),
563 &keycache->hash_link_root,
564 (ulonglong) (hash_links * sizeof(HASH_LINK)),
565 &keycache->changed_blocks,
566 (ulonglong) (sizeof(BLOCK_LINK*) *
567 changed_blocks_hash_size),
568 &keycache->file_blocks,
569 (ulonglong) (sizeof(BLOCK_LINK*) *
570 changed_blocks_hash_size),
571 NullS))
572 break;
573 my_large_free(keycache->block_mem, keycache->allocated_mem_size);
574 keycache->block_mem= 0;
575 }
576 if (blocks < 8)
577 {
578 my_errno= ENOMEM;
579 my_error(EE_OUTOFMEMORY, MYF(ME_FATAL),
580 blocks * keycache->key_cache_block_size);
581 goto err;
582 }
583 blocks= blocks / 4*3;
584 }
585 keycache->blocks_unused= blocks;
586 keycache->disk_blocks= (int) blocks;
587 keycache->hash_links= (int)hash_links;
588 keycache->hash_links_used= 0;
589 keycache->free_hash_list= NULL;
590 keycache->blocks_used= keycache->blocks_changed= 0;
591
592 keycache->global_blocks_changed= 0;
593 keycache->blocks_available=0; /* For debugging */
594
595 /* The LRU chain is empty after initialization */
596 keycache->used_last= NULL;
597 keycache->used_ins= NULL;
598 keycache->free_block_list= NULL;
599 keycache->keycache_time= 0;
600 keycache->warm_blocks= 0;
601 keycache->min_warm_blocks= (division_limit ?
602 blocks * division_limit / 100 + 1 :
603 blocks);
604 keycache->age_threshold= (age_threshold ?
605 blocks * age_threshold / 100 :
606 blocks);
607 keycache->changed_blocks_hash_size= changed_blocks_hash_size;
608 keycache->can_be_used= 1;
609
610 keycache->waiting_for_hash_link.last_thread= NULL;
611 keycache->waiting_for_block.last_thread= NULL;
612 DBUG_PRINT("exit",
613 ("disk_blocks: %d block_root: %p hash_entries: %d\
614 hash_root: %p hash_links: %d hash_link_root: %p",
615 keycache->disk_blocks, keycache->block_root,
616 keycache->hash_entries, keycache->hash_root,
617 keycache->hash_links, keycache->hash_link_root));
618 }
619 else
620 {
621 /* key_buffer_size is specified too small. Disable the cache. */
622 keycache->can_be_used= 0;
623 }
624
625 keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
626 DBUG_RETURN((int) keycache->disk_blocks);
627
628 err:
629 error= my_errno;
630 keycache->disk_blocks= 0;
631 keycache->blocks= 0;
632 if (keycache->block_mem)
633 {
634 my_large_free((uchar*) keycache->block_mem, keycache->allocated_mem_size);
635 keycache->block_mem= NULL;
636 }
637 if (keycache->block_root)
638 {
639 my_free(keycache->block_root);
640 keycache->block_root= NULL;
641 }
642 my_errno= error;
643 keycache->can_be_used= 0;
644 DBUG_RETURN(0);
645 }
646
647
648 /*
649 Prepare for resizing a simple key cache
650
651 SYNOPSIS
652 prepare_resize_simple_key_cache()
653 keycache pointer to the control block of a simple key cache
654 release_lock <=> release the key cache lock before return
655
656 DESCRIPTION
657 This function flushes all dirty pages from a simple key cache and after
658 this it destroys the key cache calling end_simple_key_cache. The function
659 takes the parameter keycache as a pointer to the control block
660 structure of the type SIMPLE_KEY_CACHE_CB for this key cache.
661 The parameter release_lock says whether the key cache lock must be
662 released before return from the function.
663
664 RETURN VALUE
665 0 - on success,
666 1 - otherwise.
667
668 NOTES
669 This function is the called by resize_simple_key_cache and
670 resize_partitioned_key_cache that resize simple and partitioned key caches
671 respectively.
672 */
673
674 static
prepare_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB * keycache,my_bool release_lock)675 int prepare_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
676 my_bool release_lock)
677 {
678 int res= 0;
679 DBUG_ENTER("prepare_resize_simple_key_cache");
680
681 keycache_pthread_mutex_lock(&keycache->cache_lock);
682
683 /*
684 We may need to wait for another thread which is doing a resize
685 already. This cannot happen in the MySQL server though. It allows
686 one resizer only. In set_var.cc keycache->in_init is used to block
687 multiple attempts.
688 */
689 while (keycache->in_resize)
690 {
691 /* purecov: begin inspected */
692 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
693 /* purecov: end */
694 }
695
696 /*
697 Mark the operation in progress. This blocks other threads from doing
698 a resize in parallel. It prohibits new blocks to enter the cache.
699 Read/write requests can bypass the cache during the flush phase.
700 */
701 keycache->in_resize= 1;
702
703 /* Need to flush only if keycache is enabled. */
704 if (keycache->can_be_used)
705 {
706 /* Start the flush phase. */
707 keycache->resize_in_flush= 1;
708
709 if (flush_all_key_blocks(keycache))
710 {
711 /* TODO: if this happens, we should write a warning in the log file ! */
712 keycache->resize_in_flush= 0;
713 keycache->can_be_used= 0;
714 res= 1;
715 goto finish;
716 }
717 DBUG_SLOW_ASSERT(cache_empty(keycache));
718
719 /* End the flush phase. */
720 keycache->resize_in_flush= 0;
721 }
722
723 /*
724 Some direct read/write operations (bypassing the cache) may still be
725 unfinished. Wait until they are done. If the key cache can be used,
726 direct I/O is done in increments of key_cache_block_size. That is,
727 every block is checked if it is in the cache. We need to wait for
728 pending I/O before re-initializing the cache, because we may change
729 the block size. Otherwise they could check for blocks at file
730 positions where the new block division has none. We do also want to
731 wait for I/O done when (if) the cache was disabled. It must not
732 run in parallel with normal cache operation.
733 */
734 while (keycache->cnt_for_resize_op)
735 wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
736
737 end_simple_key_cache(keycache, 0);
738
739 finish:
740 if (release_lock)
741 keycache_pthread_mutex_unlock(&keycache->cache_lock);
742 DBUG_RETURN(res);
743 }
744
745
746 /*
747 Finalize resizing a simple key cache
748
749 SYNOPSIS
750 finish_resize_simple_key_cache()
751 keycache pointer to the control block of a simple key cache
752
753 DESCRIPTION
754 This function performs finalizing actions for the operation of
755 resizing a simple key cache. The function takes the parameter
756 keycache as a pointer to the control block structure of the type
757 SIMPLE_KEY_CACHE_CB for this key cache. The function sets the flag
758 in_resize in this structure to FALSE.
759
760 RETURN VALUE
761 none
762
763 NOTES
764 This function is the called by resize_simple_key_cache and
765 resize_partitioned_key_cache that resize simple and partitioned key caches
766 respectively.
767 */
768
769 static
finish_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB * keycache)770 void finish_resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache)
771 {
772 DBUG_ENTER("finish_resize_simple_key_cache");
773
774 mysql_mutex_assert_owner(&keycache->cache_lock);
775
776 /*
777 Mark the resize finished. This allows other threads to start a
778 resize or to request new cache blocks.
779 */
780 keycache->in_resize= 0;
781
782
783 /* Signal waiting threads. */
784 release_whole_queue(&keycache->resize_queue);
785
786
787 keycache_pthread_mutex_unlock(&keycache->cache_lock);
788
789 DBUG_VOID_RETURN;
790 }
791
792
793 /*
794 Resize a simple key cache
795
796 SYNOPSIS
797 resize_simple_key_cache()
798 keycache pointer to the control block of a simple key cache
799 key_cache_block_size size of blocks to keep cached data
800 use_mem memory to use for the key cache buffers/structures
801 division_limit new division limit (if not zero)
802 age_threshold new age threshold (if not zero)
803
804 DESCRIPTION
805 This function is the implementation of the resize_key_cache interface
806 function that is employed by simple (non-partitioned) key caches.
807 The function takes the parameter keycache as a pointer to the
808 control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key
809 cache to be resized.
810 The parameter key_cache_block_size specifies the new size of the blocks in
811 the key cache. The parameters division_limit and age_threshold
812 determine the new initial values of those characteristics of the key cache
813 that are used for midpoint insertion strategy. The parameter use_mem
814 specifies the total amount of memory to be allocated for key cache blocks
815 and auxiliary structures in the new key cache.
816
817 RETURN VALUE
818 number of blocks in the key cache, if successful,
819 0 - otherwise.
820
821 NOTES.
822 The function first calls the function prepare_resize_simple_key_cache
823 to flush all dirty blocks from key cache, to free memory used
824 for key cache blocks and auxiliary structures. After this the
825 function builds a new key cache with new parameters.
826
827 This implementation doesn't block the calls and executions of other
828 functions from the key cache interface. However it assumes that the
829 calls of resize_simple_key_cache itself are serialized.
830
831 The function starts the operation only when all other threads
832 performing operations with the key cache let her to proceed
833 (when cnt_for_resize=0).
834 */
835
836 static
resize_simple_key_cache(SIMPLE_KEY_CACHE_CB * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold,uint changed_blocks_hash_size)837 int resize_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache,
838 uint key_cache_block_size,
839 size_t use_mem, uint division_limit,
840 uint age_threshold, uint changed_blocks_hash_size)
841 {
842 int blocks= 0;
843 DBUG_ENTER("resize_simple_key_cache");
844
845 DBUG_ASSERT(keycache->key_cache_inited);
846
847 /*
848 Note that the cache_lock mutex and the resize_queue are left untouched.
849 We do not lose the cache_lock and will release it only at the end of
850 this function.
851 */
852 if (prepare_resize_simple_key_cache(keycache, 0))
853 goto finish;
854
855 /* The following will work even if use_mem is 0 */
856 blocks= init_simple_key_cache(keycache, key_cache_block_size, use_mem,
857 division_limit, age_threshold,
858 changed_blocks_hash_size);
859
860 finish:
861 finish_resize_simple_key_cache(keycache);
862
863 DBUG_RETURN(blocks);
864 }
865
866
867 /*
868 Increment counter blocking resize key cache operation
869 */
inc_counter_for_resize_op(SIMPLE_KEY_CACHE_CB * keycache)870 static inline void inc_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache)
871 {
872 keycache->cnt_for_resize_op++;
873 }
874
875
876 /*
877 Decrement counter blocking resize key cache operation;
878 Signal the operation to proceed when counter becomes equal zero
879 */
dec_counter_for_resize_op(SIMPLE_KEY_CACHE_CB * keycache)880 static inline void dec_counter_for_resize_op(SIMPLE_KEY_CACHE_CB *keycache)
881 {
882 if (!--keycache->cnt_for_resize_op)
883 release_whole_queue(&keycache->waiting_for_resize_cnt);
884 }
885
886
887 /*
888 Change key cache parameters of a simple key cache
889
890 SYNOPSIS
891 change_simple_key_cache_param()
892 keycache pointer to the control block of a simple key cache
893 division_limit new division limit (if not zero)
894 age_threshold new age threshold (if not zero)
895
896 DESCRIPTION
897 This function is the implementation of the change_key_cache_param interface
898 function that is employed by simple (non-partitioned) key caches.
899 The function takes the parameter keycache as a pointer to the
900 control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key
901 cache where new values of the division limit and the age threshold used
902 for midpoint insertion strategy are to be set. The parameters
903 division_limit and age_threshold provide these new values.
904
905 RETURN VALUE
906 none
907
908 NOTES.
909 Presently the function resets the key cache parameters concerning
910 midpoint insertion strategy - division_limit and age_threshold.
911 This function changes some parameters of a given key cache without
912 reformatting it. The function does not touch the contents the key
913 cache blocks.
914 */
915
916 static
change_simple_key_cache_param(SIMPLE_KEY_CACHE_CB * keycache,uint division_limit,uint age_threshold)917 void change_simple_key_cache_param(SIMPLE_KEY_CACHE_CB *keycache, uint division_limit,
918 uint age_threshold)
919 {
920 DBUG_ENTER("change_simple_key_cache_param");
921 keycache_pthread_mutex_lock(&keycache->cache_lock);
922 if (division_limit)
923 keycache->min_warm_blocks= (keycache->disk_blocks *
924 division_limit / 100 + 1);
925 if (age_threshold)
926 keycache->age_threshold= (keycache->disk_blocks *
927 age_threshold / 100);
928 keycache_pthread_mutex_unlock(&keycache->cache_lock);
929 DBUG_VOID_RETURN;
930 }
931
932
933 /*
934 Destroy a simple key cache
935
936 SYNOPSIS
937 end_simple_key_cache()
938 keycache pointer to the control block of a simple key cache
939 cleanup <=> complete free (free also mutex for key cache)
940
941 DESCRIPTION
942 This function is the implementation of the end_key_cache interface
943 function that is employed by simple (non-partitioned) key caches.
944 The function takes the parameter keycache as a pointer to the
945 control block structure of the type SIMPLE_KEY_CACHE_CB for the simple key
946 cache to be destroyed.
947 The function frees the memory allocated for the key cache blocks and
948 auxiliary structures. If the value of the parameter cleanup is TRUE
949 then even the key cache mutex is freed.
950
951 RETURN VALUE
952 none
953 */
954
955 static
end_simple_key_cache(SIMPLE_KEY_CACHE_CB * keycache,my_bool cleanup)956 void end_simple_key_cache(SIMPLE_KEY_CACHE_CB *keycache, my_bool cleanup)
957 {
958 DBUG_ENTER("end_simple_key_cache");
959 DBUG_PRINT("enter", ("key_cache: %p", keycache));
960
961 if (!keycache->key_cache_inited)
962 DBUG_VOID_RETURN;
963
964 if (keycache->disk_blocks > 0)
965 {
966 if (keycache->block_mem)
967 {
968 my_large_free((uchar*) keycache->block_mem, keycache->allocated_mem_size);
969 keycache->block_mem= NULL;
970 my_free(keycache->block_root);
971 keycache->block_root= NULL;
972 }
973 keycache->disk_blocks= -1;
974 /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
975 keycache->blocks_changed= 0;
976 }
977
978 DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
979 "writes: %lu r_requests: %lu reads: %lu",
980 keycache->blocks_used, keycache->global_blocks_changed,
981 (ulong) keycache->global_cache_w_requests,
982 (ulong) keycache->global_cache_write,
983 (ulong) keycache->global_cache_r_requests,
984 (ulong) keycache->global_cache_read));
985
986 /*
987 Reset these values to be able to detect a disabled key cache.
988 See Bug#44068 (RESTORE can disable the MyISAM Key Cache).
989 */
990 keycache->blocks_used= 0;
991 keycache->blocks_unused= 0;
992
993 if (cleanup)
994 {
995 mysql_mutex_destroy(&keycache->cache_lock);
996 keycache->key_cache_inited= keycache->can_be_used= 0;
997 KEYCACHE_DEBUG_CLOSE;
998 }
999 DBUG_VOID_RETURN;
1000 } /* end_key_cache */
1001
1002
1003 /*
1004 Link a thread into double-linked queue of waiting threads.
1005
1006 SYNOPSIS
1007 link_into_queue()
1008 wqueue pointer to the queue structure
1009 thread pointer to the thread to be added to the queue
1010
1011 RETURN VALUE
1012 none
1013
1014 NOTES.
1015 Queue is represented by a circular list of the thread structures
1016 The list is double-linked of the type (**prev,*next), accessed by
1017 a pointer to the last element.
1018 */
1019
link_into_queue(KEYCACHE_WQUEUE * wqueue,struct st_my_thread_var * thread)1020 static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
1021 struct st_my_thread_var *thread)
1022 {
1023 struct st_my_thread_var *last;
1024 DBUG_ASSERT(!thread->next && !thread->prev);
1025
1026 if (! (last= wqueue->last_thread))
1027 {
1028 /* Queue is empty */
1029 thread->next= thread;
1030 thread->prev= &thread->next;
1031 }
1032 else
1033 {
1034 DBUG_ASSERT(last->next->prev == &last->next);
1035 /* Add backlink to previous element */
1036 thread->prev= last->next->prev;
1037 /* Fix first in list to point backwords to current */
1038 last->next->prev= &thread->next;
1039 /* Next should point to the first element in list */
1040 thread->next= last->next;
1041 /* Fix old element to point to new one */
1042 last->next= thread;
1043 }
1044 wqueue->last_thread= thread;
1045 }
1046
1047 /*
1048 Unlink a thread from double-linked queue of waiting threads
1049
1050 SYNOPSIS
1051 unlink_from_queue()
1052 wqueue pointer to the queue structure
1053 thread pointer to the thread to be removed from the queue
1054
1055 RETURN VALUE
1056 none
1057
1058 NOTES.
1059 See NOTES for link_into_queue
1060 */
1061
unlink_from_queue(KEYCACHE_WQUEUE * wqueue,struct st_my_thread_var * thread)1062 static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
1063 struct st_my_thread_var *thread)
1064 {
1065 KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", (ulong) thread->id));
1066 DBUG_ASSERT(thread->next && thread->prev);
1067
1068 if (thread->next == thread)
1069 {
1070 /* The queue contains only one member */
1071 wqueue->last_thread= NULL;
1072 }
1073 else
1074 {
1075 /* Remove current element from list */
1076 thread->next->prev= thread->prev;
1077 *thread->prev= thread->next;
1078 /* If first element, change list pointer to point to previous element */
1079 if (wqueue->last_thread == thread)
1080 wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
1081 thread->prev);
1082 }
1083 thread->next= NULL;
1084 #ifdef DBUG_ASSERT_EXISTS
1085 /*
1086 This makes it easier to see it's not in a chain during debugging.
1087 And some DBUG_ASSERT() rely on it.
1088 */
1089 thread->prev= NULL;
1090 #endif
1091 }
1092
1093
1094 /*
1095 Add a thread to single-linked queue of waiting threads
1096
1097 SYNOPSIS
1098 wait_on_queue()
1099 wqueue Pointer to the queue structure.
1100 mutex Cache_lock to acquire after awake.
1101
1102 RETURN VALUE
1103 none
1104
1105 NOTES.
1106 Queue is represented by a circular list of the thread structures
1107 The list is single-linked of the type (*next), accessed by a pointer
1108 to the last element.
1109
1110 The function protects against stray signals by verifying that the
1111 current thread is unlinked from the queue when awaking. However,
1112 since several threads can wait for the same event, it might be
1113 necessary for the caller of the function to check again if the
1114 condition for awake is indeed matched.
1115 */
1116
wait_on_queue(KEYCACHE_WQUEUE * wqueue,mysql_mutex_t * mutex)1117 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
1118 mysql_mutex_t *mutex)
1119 {
1120 struct st_my_thread_var *last;
1121 struct st_my_thread_var *thread= my_thread_var;
1122 DBUG_ASSERT(!thread->next);
1123 DBUG_ASSERT(!thread->prev); /* Not required, but must be true anyway. */
1124 mysql_mutex_assert_owner(mutex);
1125
1126 /* Add to queue. */
1127 if (! (last= wqueue->last_thread))
1128 thread->next= thread;
1129 else
1130 {
1131 thread->next= last->next;
1132 last->next= thread;
1133 }
1134 wqueue->last_thread= thread;
1135
1136 /*
1137 Wait until thread is removed from queue by the signaling thread.
1138 The loop protects against stray signals.
1139 */
1140 do
1141 {
1142 KEYCACHE_DBUG_PRINT("wait", ("suspend thread %ld", (ulong) thread->id));
1143 keycache_pthread_cond_wait(&thread->suspend, mutex);
1144 }
1145 while (thread->next);
1146 }
1147
1148
1149 /*
1150 Remove all threads from queue signaling them to proceed
1151
1152 SYNOPSIS
1153 release_whole_queue()
1154 wqueue pointer to the queue structure
1155
1156 RETURN VALUE
1157 none
1158
1159 NOTES.
1160 See notes for wait_on_queue().
1161 When removed from the queue each thread is signaled via condition
1162 variable thread->suspend.
1163 */
1164
release_whole_queue(KEYCACHE_WQUEUE * wqueue)1165 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
1166 {
1167 struct st_my_thread_var *last;
1168 struct st_my_thread_var *next;
1169 struct st_my_thread_var *thread;
1170
1171 /* Queue may be empty. */
1172 if (!(last= wqueue->last_thread))
1173 return;
1174
1175 next= last->next; /* First (oldest) element */
1176 do
1177 {
1178 thread=next;
1179 DBUG_ASSERT(thread && thread->init == 1);
1180 KEYCACHE_DBUG_PRINT("release_whole_queue: signal",
1181 ("thread %ld", (ulong) thread->id));
1182 /* Take thread from queue. */
1183 next= thread->next;
1184 thread->next= NULL;
1185
1186 /* Signal the thread. */
1187 keycache_pthread_cond_signal(&thread->suspend);
1188 }
1189 while (thread != last);
1190
1191 /* Now queue is definitely empty. */
1192 wqueue->last_thread= NULL;
1193 }
1194
1195
1196 /*
1197 Unlink a block from the chain of dirty/clean blocks
1198 */
1199
unlink_changed(BLOCK_LINK * block)1200 static inline void unlink_changed(BLOCK_LINK *block)
1201 {
1202 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1203 if (block->next_changed)
1204 block->next_changed->prev_changed= block->prev_changed;
1205 *block->prev_changed= block->next_changed;
1206
1207 #ifdef DBUG_ASSERT_EXISTS
1208 /*
1209 This makes it easier to see it's not in a chain during debugging.
1210 And some DBUG_ASSERT() rely on it.
1211 */
1212 block->next_changed= NULL;
1213 block->prev_changed= NULL;
1214 #endif
1215 }
1216
1217
1218 /*
1219 Link a block into the chain of dirty/clean blocks
1220 */
1221
link_changed(BLOCK_LINK * block,BLOCK_LINK ** phead)1222 static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
1223 {
1224 DBUG_ASSERT(!block->next_changed);
1225 DBUG_ASSERT(!block->prev_changed);
1226 block->prev_changed= phead;
1227 if ((block->next_changed= *phead))
1228 (*phead)->prev_changed= &block->next_changed;
1229 *phead= block;
1230 }
1231
1232
1233 /*
1234 Link a block in a chain of clean blocks of a file.
1235
1236 SYNOPSIS
1237 link_to_file_list()
1238 keycache Key cache handle
1239 block Block to relink
1240 file File to be linked to
1241 unlink If to unlink first
1242
1243 DESCRIPTION
1244 Unlink a block from whichever chain it is linked in, if it's
1245 asked for, and link it to the chain of clean blocks of the
1246 specified file.
1247
1248 NOTE
1249 Please do never set/clear BLOCK_CHANGED outside of
1250 link_to_file_list() or link_to_changed_list().
1251 You would risk to damage correct counting of changed blocks
1252 and to find blocks in the wrong hash.
1253
1254 RETURN
1255 void
1256 */
1257
link_to_file_list(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block,int file,my_bool unlink_block)1258 static void link_to_file_list(SIMPLE_KEY_CACHE_CB *keycache,
1259 BLOCK_LINK *block, int file,
1260 my_bool unlink_block)
1261 {
1262 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1263 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1264 DBUG_ASSERT(block->hash_link->file == file);
1265 if (unlink_block)
1266 unlink_changed(block);
1267 link_changed(block, &keycache->file_blocks[FILE_HASH(file, keycache)]);
1268 if (block->status & BLOCK_CHANGED)
1269 {
1270 block->status&= ~BLOCK_CHANGED;
1271 keycache->blocks_changed--;
1272 keycache->global_blocks_changed--;
1273 }
1274 }
1275
1276
1277 /*
1278 Re-link a block from the clean chain to the dirty chain of a file.
1279
1280 SYNOPSIS
1281 link_to_changed_list()
1282 keycache key cache handle
1283 block block to relink
1284
1285 DESCRIPTION
1286 Unlink a block from the chain of clean blocks of a file
1287 and link it to the chain of dirty blocks of the same file.
1288
1289 NOTE
1290 Please do never set/clear BLOCK_CHANGED outside of
1291 link_to_file_list() or link_to_changed_list().
1292 You would risk to damage correct counting of changed blocks
1293 and to find blocks in the wrong hash.
1294
1295 RETURN
1296 void
1297 */
1298
link_to_changed_list(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block)1299 static void link_to_changed_list(SIMPLE_KEY_CACHE_CB *keycache,
1300 BLOCK_LINK *block)
1301 {
1302 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1303 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
1304 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1305
1306 unlink_changed(block);
1307 link_changed(block,
1308 &keycache->changed_blocks[FILE_HASH(block->hash_link->file, keycache)]);
1309 block->status|=BLOCK_CHANGED;
1310 keycache->blocks_changed++;
1311 keycache->global_blocks_changed++;
1312 }
1313
1314
1315 /*
1316 Link a block to the LRU chain at the beginning or at the end of
1317 one of two parts.
1318
1319 SYNOPSIS
1320 link_block()
1321 keycache pointer to a key cache data structure
1322 block pointer to the block to link to the LRU chain
1323 hot <-> to link the block into the hot subchain
1324 at_end <-> to link the block at the end of the subchain
1325
1326 RETURN VALUE
1327 none
1328
1329 NOTES.
1330 The LRU ring is represented by a circular list of block structures.
1331 The list is double-linked of the type (**prev,*next) type.
1332 The LRU ring is divided into two parts - hot and warm.
1333 There are two pointers to access the last blocks of these two
1334 parts. The beginning of the warm part follows right after the
1335 end of the hot part.
1336 Only blocks of the warm part can be used for eviction.
1337 The first block from the beginning of this subchain is always
1338 taken for eviction (keycache->last_used->next)
1339
1340 LRU chain: +------+ H O T +------+
1341 +----| end |----...<----| beg |----+
1342 | +------+last +------+ |
1343 v<-link in latest hot (new end) |
1344 | link in latest warm (new end)->^
1345 | +------+ W A R M +------+ |
1346 +----| beg |---->...----| end |----+
1347 +------+ +------+ins
1348 first for eviction
1349
1350 It is also possible that the block is selected for eviction and thus
1351 not linked in the LRU ring.
1352 */
1353
link_block(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block,my_bool hot,my_bool at_end)1354 static void link_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block,
1355 my_bool hot, my_bool at_end)
1356 {
1357 BLOCK_LINK *ins;
1358 BLOCK_LINK **pins;
1359
1360 DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1361 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1362 DBUG_ASSERT(!block->requests);
1363 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1364 DBUG_ASSERT(!block->next_used);
1365 DBUG_ASSERT(!block->prev_used);
1366 if (!hot && keycache->waiting_for_block.last_thread)
1367 {
1368 /* Signal that in the LRU warm sub-chain an available block has appeared */
1369 struct st_my_thread_var *last_thread=
1370 keycache->waiting_for_block.last_thread;
1371 struct st_my_thread_var *first_thread= last_thread->next;
1372 struct st_my_thread_var *next_thread= first_thread;
1373 HASH_LINK *hash_link= (HASH_LINK *) first_thread->keycache_link;
1374 struct st_my_thread_var *thread;
1375 do
1376 {
1377 thread= next_thread;
1378 next_thread= thread->next;
1379 /*
1380 We notify about the event all threads that ask
1381 for the same page as the first thread in the queue
1382 */
1383 if ((HASH_LINK *) thread->keycache_link == hash_link)
1384 {
1385 KEYCACHE_DBUG_PRINT("link_block: signal",
1386 ("thread %ld", (ulong) thread->id));
1387 keycache_pthread_cond_signal(&thread->suspend);
1388 unlink_from_queue(&keycache->waiting_for_block, thread);
1389 block->requests++;
1390 }
1391 }
1392 while (thread != last_thread);
1393 hash_link->block= block;
1394 /*
1395 NOTE: We assigned the block to the hash_link and signalled the
1396 requesting thread(s). But it is possible that other threads runs
1397 first. These threads see the hash_link assigned to a block which
1398 is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
1399 This can be a problem for functions that do not select the block
1400 via its hash_link: flush and free. They do only see a block which
1401 is in a "normal" state and don't know that it will be evicted soon.
1402
1403 We cannot set BLOCK_IN_SWITCH here because only one of the
1404 requesting threads must handle the eviction. All others must wait
1405 for it to complete. If we set the flag here, the threads would not
1406 know who is in charge of the eviction. Without the flag, the first
1407 thread takes the stick and sets the flag.
1408
1409 But we need to note in the block that is has been selected for
1410 eviction. It must not be freed. The evicting thread will not
1411 expect the block in the free list. Before freeing we could also
1412 check if block->requests > 1. But I think including another flag
1413 in the check of block->status is slightly more efficient and
1414 probably easier to read.
1415 */
1416 block->status|= BLOCK_IN_EVICTION;
1417 KEYCACHE_THREAD_TRACE("link_block: after signaling");
1418 #if defined(KEYCACHE_DEBUG)
1419 KEYCACHE_DBUG_PRINT("link_block",
1420 ("linked,unlinked block %u status=%x #requests=%u #available=%u",
1421 BLOCK_NUMBER(block), block->status,
1422 block->requests, keycache->blocks_available));
1423 #endif
1424 return;
1425 }
1426 pins= hot ? &keycache->used_ins : &keycache->used_last;
1427 ins= *pins;
1428 if (ins)
1429 {
1430 ins->next_used->prev_used= &block->next_used;
1431 block->next_used= ins->next_used;
1432 block->prev_used= &ins->next_used;
1433 ins->next_used= block;
1434 if (at_end)
1435 *pins= block;
1436 }
1437 else
1438 {
1439 /* The LRU ring is empty. Let the block point to itself. */
1440 keycache->used_last= keycache->used_ins= block->next_used= block;
1441 block->prev_used= &block->next_used;
1442 }
1443 KEYCACHE_THREAD_TRACE("link_block");
1444 #if defined(KEYCACHE_DEBUG)
1445 keycache->blocks_available++;
1446 KEYCACHE_DBUG_PRINT("link_block",
1447 ("linked block %u:%1u status=%x #requests=%u #available=%u",
1448 BLOCK_NUMBER(block), at_end, block->status,
1449 block->requests, keycache->blocks_available));
1450 KEYCACHE_DBUG_ASSERT((ulong) keycache->blocks_available <=
1451 keycache->blocks_used);
1452 #endif
1453 }
1454
1455
1456 /*
1457 Unlink a block from the LRU chain
1458
1459 SYNOPSIS
1460 unlink_block()
1461 keycache pointer to a key cache data structure
1462 block pointer to the block to unlink from the LRU chain
1463
1464 RETURN VALUE
1465 none
1466
1467 NOTES.
1468 See NOTES for link_block
1469 */
1470
unlink_block(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block)1471 static void unlink_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block)
1472 {
1473 DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1474 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1475 DBUG_ASSERT(!block->requests);
1476 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1477 DBUG_ASSERT(block->next_used && block->prev_used &&
1478 (block->next_used->prev_used == &block->next_used) &&
1479 (*block->prev_used == block));
1480 if (block->next_used == block)
1481 /* The list contains only one member */
1482 keycache->used_last= keycache->used_ins= NULL;
1483 else
1484 {
1485 block->next_used->prev_used= block->prev_used;
1486 *block->prev_used= block->next_used;
1487 if (keycache->used_last == block)
1488 keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1489 if (keycache->used_ins == block)
1490 keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1491 }
1492 block->next_used= NULL;
1493 #ifdef DBUG_ASSERT_EXISTS
1494 /*
1495 This makes it easier to see it's not in a chain during debugging.
1496 And some DBUG_ASSERT() rely on it.
1497 */
1498 block->prev_used= NULL;
1499 #endif
1500
1501 KEYCACHE_THREAD_TRACE("unlink_block");
1502 #if defined(KEYCACHE_DEBUG)
1503 KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
1504 keycache->blocks_available--;
1505 KEYCACHE_DBUG_PRINT("unlink_block",
1506 ("unlinked block %u status=%x #requests=%u #available=%u",
1507 BLOCK_NUMBER(block), block->status,
1508 block->requests, keycache->blocks_available));
1509 #endif
1510 }
1511
1512
1513 /*
1514 Register requests for a block.
1515
1516 SYNOPSIS
1517 reg_requests()
1518 keycache Pointer to a key cache data structure.
1519 block Pointer to the block to register a request on.
1520 count Number of requests. Always 1.
1521
1522 NOTE
1523 The first request unlinks the block from the LRU ring. This means
1524 that it is protected against eveiction.
1525
1526 RETURN
1527 void
1528 */
reg_requests(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block,int count)1529 static void reg_requests(SIMPLE_KEY_CACHE_CB *keycache,
1530 BLOCK_LINK *block, int count)
1531 {
1532 DBUG_ASSERT(block->status & BLOCK_IN_USE);
1533 DBUG_ASSERT(block->hash_link);
1534
1535 if (!block->requests)
1536 unlink_block(keycache, block);
1537 block->requests+=count;
1538 }
1539
1540
1541 /*
1542 Unregister request for a block
1543 linking it to the LRU chain if it's the last request
1544
1545 SYNOPSIS
1546 unreg_request()
1547 keycache pointer to a key cache data structure
1548 block pointer to the block to link to the LRU chain
1549 at_end <-> to link the block at the end of the LRU chain
1550
1551 RETURN VALUE
1552 none
1553
1554 NOTES.
1555 Every linking to the LRU ring decrements by one a special block
1556 counter (if it's positive). If the at_end parameter is TRUE the block is
1557 added either at the end of warm sub-chain or at the end of hot sub-chain.
1558 It is added to the hot subchain if its counter is zero and number of
1559 blocks in warm sub-chain is not less than some low limit (determined by
1560 the division_limit parameter). Otherwise the block is added to the warm
1561 sub-chain. If the at_end parameter is FALSE the block is always added
1562 at beginning of the warm sub-chain.
1563 Thus a warm block can be promoted to the hot sub-chain when its counter
1564 becomes zero for the first time.
1565 At the same time the block at the very beginning of the hot subchain
1566 might be moved to the beginning of the warm subchain if it stays untouched
1567 for a too long time (this time is determined by parameter age_threshold).
1568
1569 It is also possible that the block is selected for eviction and thus
1570 not linked in the LRU ring.
1571 */
1572
unreg_request(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block,int at_end)1573 static void unreg_request(SIMPLE_KEY_CACHE_CB *keycache,
1574 BLOCK_LINK *block, int at_end)
1575 {
1576 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1577 DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1578 DBUG_ASSERT(block->requests);
1579 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1580 DBUG_ASSERT(!block->next_used);
1581 DBUG_ASSERT(!block->prev_used);
1582 /*
1583 Unregister the request, but do not link erroneous blocks into the
1584 LRU ring.
1585 */
1586 if (!--block->requests && !(block->status & BLOCK_ERROR))
1587 {
1588 my_bool hot;
1589 if (block->hits_left)
1590 block->hits_left--;
1591 hot= !block->hits_left && at_end &&
1592 keycache->warm_blocks > keycache->min_warm_blocks;
1593 if (hot)
1594 {
1595 if (block->temperature == BLOCK_WARM)
1596 keycache->warm_blocks--;
1597 block->temperature= BLOCK_HOT;
1598 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1599 keycache->warm_blocks));
1600 }
1601 link_block(keycache, block, hot, (my_bool)at_end);
1602 block->last_hit_time= keycache->keycache_time;
1603 keycache->keycache_time++;
1604 /*
1605 At this place, the block might be in the LRU ring or not. If an
1606 evicter was waiting for a block, it was selected for eviction and
1607 not linked in the LRU ring.
1608 */
1609
1610 /*
1611 Check if we should link a hot block to the warm block sub-chain.
1612 It is possible that we select the same block as above. But it can
1613 also be another block. In any case a block from the LRU ring is
1614 selected. In other words it works even if the above block was
1615 selected for eviction and not linked in the LRU ring. Since this
1616 happens only if the LRU ring is empty, the block selected below
1617 would be NULL and the rest of the function skipped.
1618 */
1619 block= keycache->used_ins;
1620 if (block && keycache->keycache_time - block->last_hit_time >
1621 keycache->age_threshold)
1622 {
1623 unlink_block(keycache, block);
1624 link_block(keycache, block, 0, 0);
1625 if (block->temperature != BLOCK_WARM)
1626 {
1627 keycache->warm_blocks++;
1628 block->temperature= BLOCK_WARM;
1629 }
1630 KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1631 keycache->warm_blocks));
1632 }
1633 }
1634 }
1635
1636 /*
1637 Remove a reader of the page in block
1638 */
1639
remove_reader(BLOCK_LINK * block)1640 static void remove_reader(BLOCK_LINK *block)
1641 {
1642 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1643 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1644 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1645 DBUG_ASSERT(!block->next_used);
1646 DBUG_ASSERT(!block->prev_used);
1647 DBUG_ASSERT(block->hash_link->requests);
1648 if (! --block->hash_link->requests && block->condvar)
1649 keycache_pthread_cond_signal(block->condvar);
1650 }
1651
1652
1653 /*
1654 Wait until the last reader of the page in block
1655 signals on its termination
1656 */
1657
wait_for_readers(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block)1658 static void wait_for_readers(SIMPLE_KEY_CACHE_CB *keycache,
1659 BLOCK_LINK *block)
1660 {
1661 struct st_my_thread_var *thread= my_thread_var;
1662 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1663 DBUG_ASSERT(!(block->status & (BLOCK_IN_FLUSH | BLOCK_CHANGED)));
1664 DBUG_ASSERT(block->hash_link);
1665 DBUG_ASSERT(block->hash_link->block == block);
1666 /* Linked in file_blocks or changed_blocks hash. */
1667 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1668 /* Not linked in LRU ring. */
1669 DBUG_ASSERT(!block->next_used);
1670 DBUG_ASSERT(!block->prev_used);
1671 while (block->hash_link->requests)
1672 {
1673 KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
1674 ("suspend thread %ld block %u",
1675 (ulong) thread->id, BLOCK_NUMBER(block)));
1676 /* There must be no other waiter. We have no queue here. */
1677 DBUG_ASSERT(!block->condvar);
1678 block->condvar= &thread->suspend;
1679 keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1680 block->condvar= NULL;
1681 }
1682 }
1683
1684
1685 /*
1686 Add a hash link to a bucket in the hash_table
1687 */
1688
link_hash(HASH_LINK ** start,HASH_LINK * hash_link)1689 static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
1690 {
1691 if (*start)
1692 (*start)->prev= &hash_link->next;
1693 hash_link->next= *start;
1694 hash_link->prev= start;
1695 *start= hash_link;
1696 }
1697
1698
1699 /*
1700 Remove a hash link from the hash table
1701 */
1702
unlink_hash(SIMPLE_KEY_CACHE_CB * keycache,HASH_LINK * hash_link)1703 static void unlink_hash(SIMPLE_KEY_CACHE_CB *keycache, HASH_LINK *hash_link)
1704 {
1705 KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u",
1706 (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests));
1707 KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
1708 if ((*hash_link->prev= hash_link->next))
1709 hash_link->next->prev= hash_link->prev;
1710 hash_link->block= NULL;
1711 if (keycache->waiting_for_hash_link.last_thread)
1712 {
1713 /* Signal that a free hash link has appeared */
1714 struct st_my_thread_var *last_thread=
1715 keycache->waiting_for_hash_link.last_thread;
1716 struct st_my_thread_var *first_thread= last_thread->next;
1717 struct st_my_thread_var *next_thread= first_thread;
1718 KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->keycache_link);
1719 struct st_my_thread_var *thread;
1720
1721 hash_link->file= first_page->file;
1722 hash_link->diskpos= first_page->filepos;
1723 do
1724 {
1725 KEYCACHE_PAGE *page;
1726 thread= next_thread;
1727 page= (KEYCACHE_PAGE *) thread->keycache_link;
1728 next_thread= thread->next;
1729 /*
1730 We notify about the event all threads that ask
1731 for the same page as the first thread in the queue
1732 */
1733 if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1734 {
1735 KEYCACHE_DBUG_PRINT("unlink_hash: signal",
1736 ("thread %ld", (ulong) thread->id));
1737 keycache_pthread_cond_signal(&thread->suspend);
1738 unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1739 }
1740 }
1741 while (thread != last_thread);
1742 link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1743 hash_link->diskpos)],
1744 hash_link);
1745 return;
1746 }
1747 hash_link->next= keycache->free_hash_list;
1748 keycache->free_hash_list= hash_link;
1749 }
1750
1751
1752 /*
1753 Get the hash link for a page
1754 */
1755
get_hash_link(SIMPLE_KEY_CACHE_CB * keycache,int file,my_off_t filepos)1756 static HASH_LINK *get_hash_link(SIMPLE_KEY_CACHE_CB *keycache,
1757 int file, my_off_t filepos)
1758 {
1759 reg1 HASH_LINK *hash_link, **start;
1760 #if defined(KEYCACHE_DEBUG)
1761 int cnt;
1762 #endif
1763
1764 KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1765 (uint) file,(ulong) filepos));
1766
1767 restart:
1768 /*
1769 Find the bucket in the hash table for the pair (file, filepos);
1770 start contains the head of the bucket list,
1771 hash_link points to the first member of the list
1772 */
1773 hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1774 #if defined(KEYCACHE_DEBUG)
1775 cnt= 0;
1776 #endif
1777 /* Look for an element for the pair (file, filepos) in the bucket chain */
1778 while (hash_link &&
1779 (hash_link->diskpos != filepos || hash_link->file != file))
1780 {
1781 hash_link= hash_link->next;
1782 #if defined(KEYCACHE_DEBUG)
1783 cnt++;
1784 if (! (cnt <= keycache->hash_links_used))
1785 {
1786 int i;
1787 for (i=0, hash_link= *start ;
1788 i < cnt ; i++, hash_link= hash_link->next)
1789 {
1790 KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu",
1791 (uint) hash_link->file,(ulong) hash_link->diskpos));
1792 }
1793 }
1794 KEYCACHE_DBUG_ASSERT(cnt <= keycache->hash_links_used);
1795 #endif
1796 }
1797 if (! hash_link)
1798 {
1799 /* There is no hash link in the hash table for the pair (file, filepos) */
1800 if (keycache->free_hash_list)
1801 {
1802 hash_link= keycache->free_hash_list;
1803 keycache->free_hash_list= hash_link->next;
1804 }
1805 else if (keycache->hash_links_used < keycache->hash_links)
1806 {
1807 hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1808 }
1809 else
1810 {
1811 /* Wait for a free hash link */
1812 struct st_my_thread_var *thread= my_thread_var;
1813 KEYCACHE_PAGE page;
1814 KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
1815 page.file= file;
1816 page.filepos= filepos;
1817 thread->keycache_link= (void *) &page;
1818 link_into_queue(&keycache->waiting_for_hash_link, thread);
1819 KEYCACHE_DBUG_PRINT("get_hash_link: wait",
1820 ("suspend thread %ld", (ulong) thread->id));
1821 keycache_pthread_cond_wait(&thread->suspend,
1822 &keycache->cache_lock);
1823 thread->keycache_link= NULL;
1824 goto restart;
1825 }
1826 hash_link->file= file;
1827 hash_link->diskpos= filepos;
1828 link_hash(start, hash_link);
1829 }
1830 /* Register the request for the page */
1831 hash_link->requests++;
1832
1833 return hash_link;
1834 }
1835
1836
1837 /*
1838 Get a block for the file page requested by a keycache read/write operation;
1839 If the page is not in the cache return a free block, if there is none
1840 return the lru block after saving its buffer if the page is dirty.
1841
1842 SYNOPSIS
1843
1844 find_key_block()
1845 keycache pointer to a key cache data structure
1846 file handler for the file to read page from
1847 filepos position of the page in the file
1848 init_hits_left how initialize the block counter for the page
1849 wrmode <-> get for writing
1850 page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1851
1852 RETURN VALUE
1853 Pointer to the found block if successful, 0 - otherwise
1854
1855 NOTES.
1856 For the page from file positioned at filepos the function checks whether
1857 the page is in the key cache specified by the first parameter.
1858 If this is the case it immediately returns the block.
1859 If not, the function first chooses a block for this page. If there is
1860 no not used blocks in the key cache yet, the function takes the block
1861 at the very beginning of the warm sub-chain. It saves the page in that
1862 block if it's dirty before returning the pointer to it.
1863 The function returns in the page_st parameter the following values:
1864 PAGE_READ - if page already in the block,
1865 PAGE_TO_BE_READ - if it is to be read yet by the current thread
1866 WAIT_TO_BE_READ - if it is to be read by another thread
1867 If an error occurs THE BLOCK_ERROR bit is set in the block status.
1868 It might happen that there are no blocks in LRU chain (in warm part) -
1869 all blocks are unlinked for some read/write operations. Then the function
1870 waits until first of this operations links any block back.
1871 */
1872
find_key_block(SIMPLE_KEY_CACHE_CB * keycache,File file,my_off_t filepos,int init_hits_left,int wrmode,int * page_st)1873 static BLOCK_LINK *find_key_block(SIMPLE_KEY_CACHE_CB *keycache,
1874 File file, my_off_t filepos,
1875 int init_hits_left,
1876 int wrmode, int *page_st)
1877 {
1878 HASH_LINK *hash_link;
1879 BLOCK_LINK *block;
1880 int error= 0;
1881 int page_status;
1882
1883 DBUG_ENTER("find_key_block");
1884 KEYCACHE_THREAD_TRACE("find_key_block:begin");
1885 DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d",
1886 file, (ulong) filepos, wrmode));
1887 KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d pos: %lu wrmode: %d",
1888 file, (ulong) filepos,
1889 wrmode));
1890 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
1891 DBUG_EXECUTE("check_keycache2",
1892 test_key_cache(keycache, "start of find_key_block", 0););
1893 #endif
1894
1895 restart:
1896 /*
1897 If the flush phase of a resize operation fails, the cache is left
1898 unusable. This will be detected only after "goto restart".
1899 */
1900 if (!keycache->can_be_used)
1901 DBUG_RETURN(0);
1902
1903 /*
1904 Find the hash_link for the requested file block (file, filepos). We
1905 do always get a hash_link here. It has registered our request so
1906 that no other thread can use it for another file block until we
1907 release the request (which is done by remove_reader() usually). The
1908 hash_link can have a block assigned to it or not. If there is a
1909 block, it may be assigned to this hash_link or not. In cases where a
1910 block is evicted from the cache, it is taken from the LRU ring and
1911 referenced by the new hash_link. But the block can still be assigned
1912 to its old hash_link for some time if it needs to be flushed first,
1913 or if there are other threads still reading it.
1914
1915 Summary:
1916 hash_link is always returned.
1917 hash_link->block can be:
1918 - NULL or
1919 - not assigned to this hash_link or
1920 - assigned to this hash_link. If assigned, the block can have
1921 - invalid data (when freshly assigned) or
1922 - valid data. Valid data can be
1923 - changed over the file contents (dirty) or
1924 - not changed (clean).
1925 */
1926 hash_link= get_hash_link(keycache, file, filepos);
1927 DBUG_ASSERT((hash_link->file == file) && (hash_link->diskpos == filepos));
1928
1929 page_status= -1;
1930 if ((block= hash_link->block) &&
1931 block->hash_link == hash_link && (block->status & BLOCK_READ))
1932 {
1933 /* Assigned block with valid (changed or unchanged) contents. */
1934 page_status= PAGE_READ;
1935 }
1936 /*
1937 else (page_status == -1)
1938 - block == NULL or
1939 - block not assigned to this hash_link or
1940 - block assigned but not yet read from file (invalid data).
1941 */
1942
1943 if (keycache->in_resize)
1944 {
1945 /* This is a request during a resize operation */
1946
1947 if (!block)
1948 {
1949 struct st_my_thread_var *thread;
1950
1951 /*
1952 The file block is not in the cache. We don't need it in the
1953 cache: we are going to read or write directly to file. Cancel
1954 the request. We can simply decrement hash_link->requests because
1955 we did not release cache_lock since increasing it. So no other
1956 thread can wait for our request to become released.
1957 */
1958 if (hash_link->requests == 1)
1959 {
1960 /*
1961 We are the only one to request this hash_link (this file/pos).
1962 Free the hash_link.
1963 */
1964 hash_link->requests--;
1965 unlink_hash(keycache, hash_link);
1966 DBUG_RETURN(0);
1967 }
1968
1969 /*
1970 More requests on the hash_link. Someone tries to evict a block
1971 for this hash_link (could have started before resizing started).
1972 This means that the LRU ring is empty. Otherwise a block could
1973 be assigned immediately. Behave like a thread that wants to
1974 evict a block for this file/pos. Add to the queue of threads
1975 waiting for a block. Wait until there is one assigned.
1976
1977 Refresh the request on the hash-link so that it cannot be reused
1978 for another file/pos.
1979 */
1980 thread= my_thread_var;
1981 thread->keycache_link= (void *) hash_link;
1982 link_into_queue(&keycache->waiting_for_block, thread);
1983 do
1984 {
1985 KEYCACHE_DBUG_PRINT("find_key_block: wait",
1986 ("suspend thread %ld", (ulong) thread->id));
1987 keycache_pthread_cond_wait(&thread->suspend,
1988 &keycache->cache_lock);
1989 } while (thread->next);
1990 thread->keycache_link= NULL;
1991 /*
1992 A block should now be assigned to the hash_link. But it may
1993 still need to be evicted. Anyway, we should re-check the
1994 situation. page_status must be set correctly.
1995 */
1996 hash_link->requests--;
1997 goto restart;
1998 } /* end of if (!block) */
1999
2000 /*
2001 There is a block for this file/pos in the cache. Register a
2002 request on it. This unlinks it from the LRU ring (if it is there)
2003 and hence protects it against eviction (if not already in
2004 eviction). We need this for returning the block to the caller, for
2005 calling remove_reader() (for debugging purposes), and for calling
2006 free_block(). The only case where we don't need the request is if
2007 the block is in eviction. In that case we have to unregister the
2008 request later.
2009 */
2010 reg_requests(keycache, block, 1);
2011
2012 if (page_status != PAGE_READ)
2013 {
2014 /*
2015 - block not assigned to this hash_link or
2016 - block assigned but not yet read from file (invalid data).
2017
2018 This must be a block in eviction. It will be read soon. We need
2019 to wait here until this happened. Otherwise the caller could
2020 access a wrong block or a block which is in read. While waiting
2021 we cannot lose hash_link nor block. We have registered a request
2022 on the hash_link. Everything can happen to the block but changes
2023 in the hash_link -> block relationship. In other words:
2024 everything can happen to the block but free or another completed
2025 eviction.
2026
2027 Note that we bahave like a secondary requestor here. We just
2028 cannot return with PAGE_WAIT_TO_BE_READ. This would work for
2029 read requests and writes on dirty blocks that are not in flush
2030 only. Waiting here on COND_FOR_REQUESTED works in all
2031 situations.
2032 */
2033 DBUG_ASSERT(((block->hash_link != hash_link) &&
2034 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2035 ((block->hash_link == hash_link) &&
2036 !(block->status & BLOCK_READ)));
2037 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2038 /*
2039 Here we can trust that the block has been assigned to this
2040 hash_link (block->hash_link == hash_link) and read into the
2041 buffer (BLOCK_READ). The worst things possible here are that the
2042 block is in free (BLOCK_REASSIGNED). But the block is still
2043 assigned to the hash_link. The freeing thread waits until we
2044 release our request on the hash_link. The block must not be
2045 again in eviction because we registered an request on it before
2046 starting to wait.
2047 */
2048 DBUG_ASSERT(block->hash_link == hash_link);
2049 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2050 DBUG_ASSERT(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
2051 }
2052 /*
2053 The block is in the cache. Assigned to the hash_link. Valid data.
2054 Note that in case of page_st == PAGE_READ, the block can be marked
2055 for eviction. In any case it can be marked for freeing.
2056 */
2057
2058 if (!wrmode)
2059 {
2060 /* A reader can just read the block. */
2061 *page_st= PAGE_READ;
2062 DBUG_ASSERT((hash_link->file == file) &&
2063 (hash_link->diskpos == filepos) &&
2064 (block->hash_link == hash_link));
2065 DBUG_RETURN(block);
2066 }
2067
2068 /*
2069 This is a writer. No two writers for the same block can exist.
2070 This must be assured by locks outside of the key cache.
2071 */
2072 DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
2073
2074 while (block->status & BLOCK_IN_FLUSH)
2075 {
2076 /*
2077 Wait until the block is flushed to file. Do not release the
2078 request on the hash_link yet to prevent that the block is freed
2079 or reassigned while we wait. While we wait, several things can
2080 happen to the block, including another flush. But the block
2081 cannot be reassigned to another hash_link until we release our
2082 request on it. But it can be marked BLOCK_REASSIGNED from free
2083 or eviction, while they wait for us to release the hash_link.
2084 */
2085 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2086 /*
2087 If the flush phase failed, the resize could have finished while
2088 we waited here.
2089 */
2090 if (!keycache->in_resize)
2091 {
2092 remove_reader(block);
2093 unreg_request(keycache, block, 1);
2094 goto restart;
2095 }
2096 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2097 DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
2098 DBUG_ASSERT(block->hash_link == hash_link);
2099 }
2100
2101 if (block->status & BLOCK_CHANGED)
2102 {
2103 /*
2104 We want to write a block with changed contents. If the cache
2105 block size is bigger than the callers block size (e.g. MyISAM),
2106 the caller may replace part of the block only. Changes of the
2107 other part of the block must be preserved. Since the block has
2108 not yet been selected for flush, we can still add our changes.
2109 */
2110 *page_st= PAGE_READ;
2111 DBUG_ASSERT((hash_link->file == file) &&
2112 (hash_link->diskpos == filepos) &&
2113 (block->hash_link == hash_link));
2114 DBUG_RETURN(block);
2115 }
2116
2117 /*
2118 This is a write request for a clean block. We do not want to have
2119 new dirty blocks in the cache while resizing. We will free the
2120 block and write directly to file. If the block is in eviction or
2121 in free, we just let it go.
2122
2123 Unregister from the hash_link. This must be done before freeing
2124 the block. And it must be done if not freeing the block. Because
2125 we could have waited above, we need to call remove_reader(). Other
2126 threads could wait for us to release our request on the hash_link.
2127 */
2128 remove_reader(block);
2129
2130 /* If the block is not in eviction and not in free, we can free it. */
2131 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2132 BLOCK_REASSIGNED)))
2133 {
2134 /*
2135 Free block as we are going to write directly to file.
2136 Although we have an exlusive lock for the updated key part,
2137 the control can be yielded by the current thread as we might
2138 have unfinished readers of other key parts in the block
2139 buffer. Still we are guaranteed not to have any readers
2140 of the key part we are writing into until the block is
2141 removed from the cache as we set the BLOCK_REASSIGNED
2142 flag (see the code below that handles reading requests).
2143 */
2144 free_block(keycache, block);
2145 }
2146 else
2147 {
2148 /*
2149 The block will be evicted/freed soon. Don't touch it in any way.
2150 Unregister the request that we registered above.
2151 */
2152 unreg_request(keycache, block, 1);
2153
2154 /*
2155 The block is still assigned to the hash_link (the file/pos that
2156 we are going to write to). Wait until the eviction/free is
2157 complete. Otherwise the direct write could complete before all
2158 readers are done with the block. So they could read outdated
2159 data.
2160
2161 Since we released our request on the hash_link, it can be reused
2162 for another file/pos. Hence we cannot just check for
2163 block->hash_link == hash_link. As long as the resize is
2164 proceeding the block cannot be reassigned to the same file/pos
2165 again. So we can terminate the loop when the block is no longer
2166 assigned to this file/pos.
2167 */
2168 do
2169 {
2170 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
2171 &keycache->cache_lock);
2172 /*
2173 If the flush phase failed, the resize could have finished
2174 while we waited here.
2175 */
2176 if (!keycache->in_resize)
2177 goto restart;
2178 } while (block->hash_link &&
2179 (block->hash_link->file == file) &&
2180 (block->hash_link->diskpos == filepos));
2181 }
2182 DBUG_RETURN(0);
2183 }
2184
2185 if (page_status == PAGE_READ &&
2186 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2187 BLOCK_REASSIGNED)))
2188 {
2189 /*
2190 This is a request for a block to be removed from cache. The block
2191 is assigned to this hash_link and contains valid data, but is
2192 marked for eviction or to be freed. Possible reasons why it has
2193 not yet been evicted/freed can be a flush before reassignment
2194 (BLOCK_IN_SWITCH), readers of the block have not finished yet
2195 (BLOCK_REASSIGNED), or the evicting thread did not yet awake after
2196 the block has been selected for it (BLOCK_IN_EVICTION).
2197 */
2198
2199 KEYCACHE_DBUG_PRINT("find_key_block",
2200 ("request for old page in block %u "
2201 "wrmode: %d block->status: %d",
2202 BLOCK_NUMBER(block), wrmode, block->status));
2203 /*
2204 Only reading requests can proceed until the old dirty page is flushed,
2205 all others are to be suspended, then resubmitted
2206 */
2207 if (!wrmode && !(block->status & BLOCK_REASSIGNED))
2208 {
2209 /*
2210 This is a read request and the block not yet reassigned. We can
2211 register our request and proceed. This unlinks the block from
2212 the LRU ring and protects it against eviction.
2213 */
2214 reg_requests(keycache, block, 1);
2215 }
2216 else
2217 {
2218 /*
2219 Either this is a write request for a block that is in eviction
2220 or in free. We must not use it any more. Instead we must evict
2221 another block. But we cannot do this before the eviction/free is
2222 done. Otherwise we would find the same hash_link + block again
2223 and again.
2224
2225 Or this is a read request for a block in eviction/free that does
2226 not require a flush, but waits for readers to finish with the
2227 block. We do not read this block to let the eviction/free happen
2228 as soon as possible. Again we must wait so that we don't find
2229 the same hash_link + block again and again.
2230 */
2231 DBUG_ASSERT(hash_link->requests);
2232 hash_link->requests--;
2233 KEYCACHE_DBUG_PRINT("find_key_block",
2234 ("request waiting for old page to be saved"));
2235 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2236 KEYCACHE_DBUG_PRINT("find_key_block",
2237 ("request for old page resubmitted"));
2238 /*
2239 The block is no longer assigned to this hash_link.
2240 Get another one.
2241 */
2242 goto restart;
2243 }
2244 }
2245 else
2246 {
2247 /*
2248 This is a request for a new block or for a block not to be removed.
2249 Either
2250 - block == NULL or
2251 - block not assigned to this hash_link or
2252 - block assigned but not yet read from file,
2253 or
2254 - block assigned with valid (changed or unchanged) data and
2255 - it will not be reassigned/freed.
2256 */
2257 if (! block)
2258 {
2259 /* No block is assigned to the hash_link yet. */
2260 if (keycache->blocks_unused)
2261 {
2262 if (keycache->free_block_list)
2263 {
2264 /* There is a block in the free list. */
2265 block= keycache->free_block_list;
2266 keycache->free_block_list= block->next_used;
2267 block->next_used= NULL;
2268 }
2269 else
2270 {
2271 size_t block_mem_offset;
2272 /* There are some never used blocks, take first of them */
2273 DBUG_ASSERT(keycache->blocks_used <
2274 (ulong) keycache->disk_blocks);
2275 block= &keycache->block_root[keycache->blocks_used];
2276 block_mem_offset=
2277 ((size_t) keycache->blocks_used) * keycache->key_cache_block_size;
2278 block->buffer= ADD_TO_PTR(keycache->block_mem,
2279 block_mem_offset,
2280 uchar*);
2281 keycache->blocks_used++;
2282 DBUG_ASSERT(!block->next_used);
2283 }
2284 DBUG_ASSERT(!block->prev_used);
2285 DBUG_ASSERT(!block->next_changed);
2286 DBUG_ASSERT(!block->prev_changed);
2287 DBUG_ASSERT(!block->hash_link);
2288 DBUG_ASSERT(!block->status);
2289 DBUG_ASSERT(!block->requests);
2290 keycache->blocks_unused--;
2291 block->status= BLOCK_IN_USE;
2292 block->length= 0;
2293 block->offset= keycache->key_cache_block_size;
2294 block->requests= 1;
2295 block->temperature= BLOCK_COLD;
2296 block->hits_left= init_hits_left;
2297 block->last_hit_time= 0;
2298 block->hash_link= hash_link;
2299 hash_link->block= block;
2300 link_to_file_list(keycache, block, file, 0);
2301 page_status= PAGE_TO_BE_READ;
2302 KEYCACHE_DBUG_PRINT("find_key_block",
2303 ("got free or never used block %u",
2304 BLOCK_NUMBER(block)));
2305 }
2306 else
2307 {
2308 /*
2309 There are no free blocks and no never used blocks, use a block
2310 from the LRU ring.
2311 */
2312
2313 if (! keycache->used_last)
2314 {
2315 /*
2316 The LRU ring is empty. Wait until a new block is added to
2317 it. Several threads might wait here for the same hash_link,
2318 all of them must get the same block. While waiting for a
2319 block, after a block is selected for this hash_link, other
2320 threads can run first before this one awakes. During this
2321 time interval other threads find this hash_link pointing to
2322 the block, which is still assigned to another hash_link. In
2323 this case the block is not marked BLOCK_IN_SWITCH yet, but
2324 it is marked BLOCK_IN_EVICTION.
2325 */
2326
2327 struct st_my_thread_var *thread= my_thread_var;
2328 thread->keycache_link= (void *) hash_link;
2329 link_into_queue(&keycache->waiting_for_block, thread);
2330 do
2331 {
2332 KEYCACHE_DBUG_PRINT("find_key_block: wait",
2333 ("suspend thread %ld", (ulong) thread->id));
2334 keycache_pthread_cond_wait(&thread->suspend,
2335 &keycache->cache_lock);
2336 }
2337 while (thread->next);
2338 thread->keycache_link= NULL;
2339 /* Assert that block has a request registered. */
2340 DBUG_ASSERT(hash_link->block->requests);
2341 /* Assert that block is not in LRU ring. */
2342 DBUG_ASSERT(!hash_link->block->next_used);
2343 DBUG_ASSERT(!hash_link->block->prev_used);
2344 }
2345 /*
2346 If we waited above, hash_link->block has been assigned by
2347 link_block(). Otherwise it is still NULL. In the latter case
2348 we need to grab a block from the LRU ring ourselves.
2349 */
2350 block= hash_link->block;
2351 if (! block)
2352 {
2353 /* Select the last block from the LRU ring. */
2354 block= keycache->used_last->next_used;
2355 block->hits_left= init_hits_left;
2356 block->last_hit_time= 0;
2357 hash_link->block= block;
2358 /*
2359 Register a request on the block. This unlinks it from the
2360 LRU ring and protects it against eviction.
2361 */
2362 DBUG_ASSERT(!block->requests);
2363 reg_requests(keycache, block,1);
2364 /*
2365 We do not need to set block->status|= BLOCK_IN_EVICTION here
2366 because we will set block->status|= BLOCK_IN_SWITCH
2367 immediately without releasing the lock in between. This does
2368 also support debugging. When looking at the block, one can
2369 see if the block has been selected by link_block() after the
2370 LRU ring was empty, or if it was grabbed directly from the
2371 LRU ring in this branch.
2372 */
2373 }
2374
2375 /*
2376 If we had to wait above, there is a small chance that another
2377 thread grabbed this block for the same file block already. But
2378 in most cases the first condition is true.
2379 */
2380 if (block->hash_link != hash_link &&
2381 ! (block->status & BLOCK_IN_SWITCH) )
2382 {
2383 /* this is a primary request for a new page */
2384 block->status|= BLOCK_IN_SWITCH;
2385
2386 KEYCACHE_DBUG_PRINT("find_key_block",
2387 ("got block %u for new page", BLOCK_NUMBER(block)));
2388
2389 if (block->status & BLOCK_CHANGED)
2390 {
2391 /* The block contains a dirty page - push it out of the cache */
2392
2393 KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
2394 if (block->status & BLOCK_IN_FLUSH)
2395 {
2396 /*
2397 The block is marked for flush. If we do not wait here,
2398 it could happen that we write the block, reassign it to
2399 another file block, then, before the new owner can read
2400 the new file block, the flusher writes the cache block
2401 (which still has the old contents) to the new file block!
2402 */
2403 wait_on_queue(&block->wqueue[COND_FOR_SAVED],
2404 &keycache->cache_lock);
2405 /*
2406 The block is marked BLOCK_IN_SWITCH. It should be left
2407 alone except for reading. No free, no write.
2408 */
2409 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2410 DBUG_ASSERT(!(block->status & (BLOCK_REASSIGNED |
2411 BLOCK_CHANGED |
2412 BLOCK_FOR_UPDATE)));
2413 }
2414 else
2415 {
2416 block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
2417 /*
2418 BLOCK_IN_EVICTION may be true or not. Other flags must
2419 have a fixed value.
2420 */
2421 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2422 (BLOCK_READ | BLOCK_IN_SWITCH |
2423 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2424 BLOCK_CHANGED | BLOCK_IN_USE));
2425 DBUG_ASSERT(block->hash_link);
2426
2427 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2428 /*
2429 The call is thread safe because only the current
2430 thread might change the block->hash_link value
2431 */
2432 error= (int)my_pwrite(block->hash_link->file,
2433 block->buffer + block->offset,
2434 block->length - block->offset,
2435 block->hash_link->diskpos + block->offset,
2436 MYF(MY_NABP | MY_WAIT_IF_FULL));
2437 keycache_pthread_mutex_lock(&keycache->cache_lock);
2438
2439 /* Block status must not have changed. */
2440 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2441 (BLOCK_READ | BLOCK_IN_SWITCH |
2442 BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2443 BLOCK_CHANGED | BLOCK_IN_USE) || fail_block(block));
2444 keycache->global_cache_write++;
2445 }
2446 }
2447
2448 block->status|= BLOCK_REASSIGNED;
2449 /*
2450 The block comes from the LRU ring. It must have a hash_link
2451 assigned.
2452 */
2453 DBUG_ASSERT(block->hash_link);
2454 if (block->hash_link)
2455 {
2456 /*
2457 All pending requests for this page must be resubmitted.
2458 This must be done before waiting for readers. They could
2459 wait for the flush to complete. And we must also do it
2460 after the wait. Flushers might try to free the block while
2461 we wait. They would wait until the reassignment is
2462 complete. Also the block status must reflect the correct
2463 situation: The block is not changed nor in flush any more.
2464 Note that we must not change the BLOCK_CHANGED flag
2465 outside of link_to_file_list() so that it is always in the
2466 correct queue and the *blocks_changed counters are
2467 correct.
2468 */
2469 block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
2470 link_to_file_list(keycache, block, block->hash_link->file, 1);
2471 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2472 /*
2473 The block is still assigned to its old hash_link.
2474 Wait until all pending read requests
2475 for this page are executed
2476 (we could have avoided this waiting, if we had read
2477 a page in the cache in a sweep, without yielding control)
2478 */
2479 wait_for_readers(keycache, block);
2480 DBUG_ASSERT(block->hash_link && block->hash_link->block == block &&
2481 block->prev_changed);
2482 /* The reader must not have been a writer. */
2483 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
2484
2485 /* Wake flushers that might have found the block in between. */
2486 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2487
2488 /* Remove the hash link for the old file block from the hash. */
2489 unlink_hash(keycache, block->hash_link);
2490
2491 /*
2492 For sanity checks link_to_file_list() asserts that block
2493 and hash_link refer to each other. Hence we need to assign
2494 the hash_link first, but then we would not know if it was
2495 linked before. Hence we would not know if to unlink it. So
2496 unlink it here and call link_to_file_list(..., FALSE).
2497 */
2498 unlink_changed(block);
2499 }
2500 block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
2501 block->length= 0;
2502 block->offset= keycache->key_cache_block_size;
2503 block->hash_link= hash_link;
2504 link_to_file_list(keycache, block, file, 0);
2505 page_status= PAGE_TO_BE_READ;
2506
2507 KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
2508 KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
2509 }
2510 else
2511 {
2512 /*
2513 Either (block->hash_link == hash_link),
2514 or (block->status & BLOCK_IN_SWITCH).
2515
2516 This is for secondary requests for a new file block only.
2517 Either it is already assigned to the new hash_link meanwhile
2518 (if we had to wait due to empty LRU), or it is already in
2519 eviction by another thread. Since this block has been
2520 grabbed from the LRU ring and attached to this hash_link,
2521 another thread cannot grab the same block from the LRU ring
2522 anymore. If the block is in eviction already, it must become
2523 attached to the same hash_link and as such destined for the
2524 same file block.
2525 */
2526 KEYCACHE_DBUG_PRINT("find_key_block",
2527 ("block->hash_link: %p hash_link: %p "
2528 "block->status: %u", block->hash_link,
2529 hash_link, block->status ));
2530 page_status= (((block->hash_link == hash_link) &&
2531 (block->status & BLOCK_READ)) ?
2532 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2533 }
2534 }
2535 }
2536 else
2537 {
2538 /*
2539 Block is not NULL. This hash_link points to a block.
2540 Either
2541 - block not assigned to this hash_link (yet) or
2542 - block assigned but not yet read from file,
2543 or
2544 - block assigned with valid (changed or unchanged) data and
2545 - it will not be reassigned/freed.
2546
2547 The first condition means hash_link points to a block in
2548 eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2549 But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2550 link_block(). In both cases it is destined for this hash_link
2551 and its file block address. When this hash_link got its block
2552 address, the block was removed from the LRU ring and cannot be
2553 selected for eviction (for another hash_link) again.
2554
2555 Register a request on the block. This is another protection
2556 against eviction.
2557 */
2558 DBUG_ASSERT(((block->hash_link != hash_link) &&
2559 (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2560 ((block->hash_link == hash_link) &&
2561 !(block->status & BLOCK_READ)) ||
2562 ((block->status & BLOCK_READ) &&
2563 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2564 reg_requests(keycache, block, 1);
2565 KEYCACHE_DBUG_PRINT("find_key_block",
2566 ("block->hash_link: %p hash_link: %p "
2567 "block->status: %u", block->hash_link,
2568 hash_link, block->status ));
2569 page_status= (((block->hash_link == hash_link) &&
2570 (block->status & BLOCK_READ)) ?
2571 PAGE_READ : PAGE_WAIT_TO_BE_READ);
2572 }
2573 }
2574
2575 KEYCACHE_DBUG_ASSERT(page_status != -1);
2576 /* Same assert basically, but be very sure. */
2577 KEYCACHE_DBUG_ASSERT(block);
2578 /* Assert that block has a request and is not in LRU ring. */
2579 DBUG_ASSERT(block->requests);
2580 DBUG_ASSERT(!block->next_used);
2581 DBUG_ASSERT(!block->prev_used);
2582 /* Assert that we return the correct block. */
2583 DBUG_ASSERT((page_status == PAGE_WAIT_TO_BE_READ) ||
2584 ((block->hash_link->file == file) &&
2585 (block->hash_link->diskpos == filepos)));
2586 *page_st=page_status;
2587 KEYCACHE_DBUG_PRINT("find_key_block",
2588 ("fd: %d pos: %lu block->status: %u page_status: %d",
2589 file, (ulong) filepos, block->status,
2590 page_status));
2591
2592 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
2593 DBUG_EXECUTE("check_keycache2",
2594 test_key_cache(keycache, "end of find_key_block",0););
2595 #endif
2596 KEYCACHE_THREAD_TRACE("find_key_block:end");
2597 DBUG_RETURN(block);
2598 }
2599
2600
2601 /*
2602 Read into a key cache block buffer from disk.
2603
2604 SYNOPSIS
2605
2606 read_block_{primary|secondary}()
2607 keycache pointer to a key cache data structure
2608 block block to which buffer the data is to be read
2609 read_length size of data to be read
2610 min_length at least so much data must be read
2611
2612 RETURN VALUE
2613 None
2614
2615 NOTES.
2616 The function either reads a page data from file to the block buffer,
2617 or waits until another thread reads it. What page to read is determined
2618 by a block parameter - reference to a hash link for this page.
2619 If an error occurs THE BLOCK_ERROR bit is set in the block status.
2620 We do not report error when the size of successfully read
2621 portion is less than read_length, but not less than min_length.
2622 */
2623
read_block_primary(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block,uint read_length,uint min_length)2624 static void read_block_primary(SIMPLE_KEY_CACHE_CB *keycache,
2625 BLOCK_LINK *block, uint read_length,
2626 uint min_length)
2627 {
2628 size_t got_length;
2629
2630 /* On entry cache_lock is locked */
2631
2632 KEYCACHE_THREAD_TRACE("read_block_primary");
2633
2634 /*
2635 This code is executed only by threads that submitted primary
2636 requests. Until block->status contains BLOCK_READ, all other
2637 request for the block become secondary requests. For a primary
2638 request the block must be properly initialized.
2639 */
2640 DBUG_ASSERT(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE) ||
2641 fail_block(block));
2642 DBUG_ASSERT((block->length == 0) || fail_block(block));
2643 DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2644 fail_block(block));
2645 DBUG_ASSERT((block->requests > 0) || fail_block(block));
2646
2647 KEYCACHE_DBUG_PRINT("read_block_primary",
2648 ("page to be read by primary request"));
2649
2650 keycache->global_cache_read++;
2651 /* Page is not in buffer yet, is to be read from disk */
2652 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2653 /*
2654 Here other threads may step in and register as secondary readers.
2655 They will register in block->wqueue[COND_FOR_REQUESTED].
2656 */
2657 got_length= my_pread(block->hash_link->file, block->buffer,
2658 read_length, block->hash_link->diskpos, MYF(0));
2659 keycache_pthread_mutex_lock(&keycache->cache_lock);
2660 /*
2661 The block can now have been marked for free (in case of
2662 FLUSH_RELEASE). Otherwise the state must be unchanged.
2663 */
2664 DBUG_ASSERT(((block->status & ~(BLOCK_REASSIGNED |
2665 BLOCK_FOR_UPDATE)) == BLOCK_IN_USE) ||
2666 fail_block(block));
2667 DBUG_ASSERT((block->length == 0) || fail_block(block));
2668 DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2669 fail_block(block));
2670 DBUG_ASSERT((block->requests > 0) || fail_block(block));
2671
2672 if (got_length < min_length)
2673 block->status|= BLOCK_ERROR;
2674 else
2675 {
2676 block->status|= BLOCK_READ;
2677 block->length= (uint)got_length;
2678 /*
2679 Do not set block->offset here. If this block is marked
2680 BLOCK_CHANGED later, we want to flush only the modified part. So
2681 only a writer may set block->offset down from
2682 keycache->key_cache_block_size.
2683 */
2684 }
2685 KEYCACHE_DBUG_PRINT("read_block_primary",
2686 ("primary request: new page in cache"));
2687 /* Signal that all pending requests for this page now can be processed */
2688 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2689
2690 DBUG_ASSERT(keycache->can_be_used);
2691 }
2692
2693
read_block_secondary(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block)2694 static void read_block_secondary(SIMPLE_KEY_CACHE_CB *keycache,
2695 BLOCK_LINK *block)
2696 {
2697 KEYCACHE_THREAD_TRACE("read_block_secondary");
2698
2699 /*
2700 This code is executed only by threads that submitted secondary
2701 requests. At this point it could happen that the cache block is
2702 not yet assigned to the hash_link for the requested file block.
2703 But at awake from the wait this should be the case. Unfortunately
2704 we cannot assert this here because we do not know the hash_link
2705 for the requested file block nor the file and position. So we have
2706 to assert this in the caller.
2707 */
2708 KEYCACHE_DBUG_PRINT("read_block_secondary",
2709 ("secondary request waiting for new page to be read"));
2710
2711 wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2712
2713 KEYCACHE_DBUG_PRINT("read_block_secondary",
2714 ("secondary request: new page in cache"));
2715
2716 DBUG_ASSERT(keycache->can_be_used);
2717 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2718 }
2719
2720
2721 /*
2722 Read a block of data from a simple key cache into a buffer
2723
2724 SYNOPSIS
2725
2726 simple_key_cache_read()
2727 keycache pointer to the control block of a simple key cache
2728 file handler for the file for the block of data to be read
2729 filepos position of the block of data in the file
2730 level determines the weight of the data
2731 buff buffer to where the data must be placed
2732 length length of the buffer
2733 block_length length of the read data from a key cache block
2734 return_buffer return pointer to the key cache buffer with the data
2735
2736 DESCRIPTION
2737 This function is the implementation of the key_cache_read interface
2738 function that is employed by simple (non-partitioned) key caches.
2739 The function takes the parameter keycache as a pointer to the
2740 control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
2741 cache.
2742 In a general case the function reads a block of data from the key cache
2743 into the buffer buff of the size specified by the parameter length. The
2744 beginning of the block of data to be read is specified by the parameters
2745 file and filepos. The length of the read data is the same as the length
2746 of the buffer. The data is read into the buffer in key_cache_block_size
2747 increments. If the next portion of the data is not found in any key cache
2748 block, first it is read from file into the key cache.
2749 If the parameter return_buffer is not ignored and its value is TRUE, and
2750 the data to be read of the specified size block_length can be read from one
2751 key cache buffer, then the function returns a pointer to the data in the
2752 key cache buffer.
2753 The function takse into account parameters block_length and return buffer
2754 only in a single-threaded environment.
2755 The parameter 'level' is used only by the midpoint insertion strategy
2756 when the data or its portion cannot be found in the key cache.
2757
2758 RETURN VALUE
2759 Returns address from where the data is placed if successful, 0 - otherwise.
2760
2761 NOTES
2762 Filepos must be a multiple of 'block_length', but it doesn't
2763 have to be a multiple of key_cache_block_size;
2764 */
2765
simple_key_cache_read(SIMPLE_KEY_CACHE_CB * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length,int return_buffer)2766 uchar *simple_key_cache_read(SIMPLE_KEY_CACHE_CB *keycache,
2767 File file, my_off_t filepos, int level,
2768 uchar *buff, uint length,
2769 uint block_length __attribute__((unused)),
2770 int return_buffer __attribute__((unused)))
2771 {
2772 my_bool locked_and_incremented= FALSE;
2773 int error=0;
2774 uchar *start= buff;
2775 DBUG_ENTER("simple_key_cache_read");
2776 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
2777 (uint) file, (ulong) filepos, length));
2778
2779 if (keycache->key_cache_inited)
2780 {
2781 /* Key cache is used */
2782 reg1 BLOCK_LINK *block;
2783 uint read_length;
2784 uint offset;
2785 int page_st;
2786
2787 if (MYSQL_KEYCACHE_READ_START_ENABLED())
2788 {
2789 MYSQL_KEYCACHE_READ_START(my_filename(file), length,
2790 (ulong) (keycache->blocks_used *
2791 keycache->key_cache_block_size),
2792 (ulong) (keycache->blocks_unused *
2793 keycache->key_cache_block_size));
2794 }
2795
2796 /*
2797 When the key cache is once initialized, we use the cache_lock to
2798 reliably distinguish the cases of normal operation, resizing, and
2799 disabled cache. We always increment and decrement
2800 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2801 */
2802 keycache_pthread_mutex_lock(&keycache->cache_lock);
2803 /*
2804 Cache resizing has two phases: Flushing and re-initializing. In
2805 the flush phase read requests are allowed to bypass the cache for
2806 blocks not in the cache. find_key_block() returns NULL in this
2807 case.
2808
2809 After the flush phase new I/O requests must wait until the
2810 re-initialization is done. The re-initialization can be done only
2811 if no I/O request is in progress. The reason is that
2812 key_cache_block_size can change. With enabled cache, I/O is done
2813 in chunks of key_cache_block_size. Every chunk tries to use a
2814 cache block first. If the block size changes in the middle, a
2815 block could be missed and old data could be read.
2816 */
2817 while (keycache->in_resize && !keycache->resize_in_flush)
2818 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2819 /* Register the I/O for the next resize. */
2820 inc_counter_for_resize_op(keycache);
2821 locked_and_incremented= TRUE;
2822 /* Requested data may not always be aligned to cache blocks. */
2823 offset= (uint) (filepos % keycache->key_cache_block_size);
2824 /* Read data in key_cache_block_size increments */
2825 do
2826 {
2827 /* Cache could be disabled in a later iteration. */
2828 if (!keycache->can_be_used)
2829 {
2830 KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache cannot be used"));
2831 goto no_key_cache;
2832 }
2833 /* Start reading at the beginning of the cache block. */
2834 filepos-= offset;
2835 /* Do not read beyond the end of the cache block. */
2836 read_length= length;
2837 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2838 KEYCACHE_DBUG_ASSERT(read_length > 0);
2839
2840 /* Request the cache block that matches file/pos. */
2841 keycache->global_cache_r_requests++;
2842
2843 MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size);
2844
2845 block=find_key_block(keycache, file, filepos, level, 0, &page_st);
2846 if (!block)
2847 {
2848 /*
2849 This happens only for requests submitted during key cache
2850 resize. The block is not in the cache and shall not go in.
2851 Read directly from file.
2852 */
2853 keycache->global_cache_read++;
2854 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2855 error= (my_pread(file, (uchar*) buff, read_length,
2856 filepos + offset, MYF(MY_NABP)) != 0);
2857 keycache_pthread_mutex_lock(&keycache->cache_lock);
2858 goto next_block;
2859 }
2860 if (!(block->status & BLOCK_ERROR))
2861 {
2862 if (page_st == PAGE_TO_BE_READ)
2863 {
2864 MYSQL_KEYCACHE_READ_MISS();
2865 read_block_primary(keycache, block,
2866 keycache->key_cache_block_size, read_length+offset);
2867 }
2868 else if (page_st == PAGE_WAIT_TO_BE_READ)
2869 {
2870 MYSQL_KEYCACHE_READ_MISS();
2871 /* The requested page is to be read into the block buffer */
2872 read_block_secondary(keycache, block);
2873
2874 /*
2875 A secondary request must now have the block assigned to the
2876 requested file block.
2877 */
2878 DBUG_ASSERT(block->hash_link->file == file);
2879 DBUG_ASSERT(block->hash_link->diskpos == filepos);
2880 }
2881 else if (block->length < read_length + offset)
2882 {
2883 /*
2884 Impossible if nothing goes wrong:
2885 this could only happen if we are using a file with
2886 small key blocks and are trying to read outside the file
2887 */
2888 my_errno= -1;
2889 block->status|= BLOCK_ERROR;
2890 }
2891 else
2892 {
2893 MYSQL_KEYCACHE_READ_HIT();
2894 }
2895 }
2896
2897 /* block status may have added BLOCK_ERROR in the above 'if'. */
2898 if (!(block->status & BLOCK_ERROR))
2899 {
2900 {
2901 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2902 #if !defined(SERIALIZED_READ_FROM_CACHE)
2903 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2904 #endif
2905
2906 /* Copy data from the cache buffer */
2907 memcpy(buff, block->buffer+offset, (size_t) read_length);
2908
2909 #if !defined(SERIALIZED_READ_FROM_CACHE)
2910 keycache_pthread_mutex_lock(&keycache->cache_lock);
2911 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2912 #endif
2913 }
2914 }
2915
2916 remove_reader(block);
2917
2918 /* Error injection for coverage testing. */
2919 DBUG_EXECUTE_IF("key_cache_read_block_error",
2920 block->status|= BLOCK_ERROR;);
2921
2922 /* Do not link erroneous blocks into the LRU ring, but free them. */
2923 if (!(block->status & BLOCK_ERROR))
2924 {
2925 /*
2926 Link the block into the LRU ring if it's the last submitted
2927 request for the block. This enables eviction for the block.
2928 */
2929 unreg_request(keycache, block, 1);
2930 }
2931 else
2932 {
2933 free_block(keycache, block);
2934 error= 1;
2935 break;
2936 }
2937
2938 next_block:
2939 buff+= read_length;
2940 filepos+= read_length+offset;
2941 offset= 0;
2942
2943 } while ((length-= read_length));
2944 if (MYSQL_KEYCACHE_READ_DONE_ENABLED())
2945 {
2946 MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used *
2947 keycache->key_cache_block_size),
2948 (ulong) (keycache->blocks_unused *
2949 keycache->key_cache_block_size));
2950 }
2951 goto end;
2952 }
2953 KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache not initialized"));
2954
2955 no_key_cache:
2956 /* Key cache is not used */
2957
2958 keycache->global_cache_r_requests++;
2959 keycache->global_cache_read++;
2960
2961 if (locked_and_incremented)
2962 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2963 if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
2964 error= 1;
2965 if (locked_and_incremented)
2966 keycache_pthread_mutex_lock(&keycache->cache_lock);
2967
2968 end:
2969 if (locked_and_incremented)
2970 {
2971 dec_counter_for_resize_op(keycache);
2972 keycache_pthread_mutex_unlock(&keycache->cache_lock);
2973 }
2974 DBUG_PRINT("exit", ("error: %d", error ));
2975 DBUG_RETURN(error ? (uchar*) 0 : start);
2976 }
2977
2978
2979 /*
2980 Insert a block of file data from a buffer into a simple key cache
2981
2982 SYNOPSIS
2983 simple_key_cache_insert()
2984 keycache pointer to the control block of a simple key cache
2985 file handler for the file to insert data from
2986 filepos position of the block of data in the file to insert
2987 level determines the weight of the data
2988 buff buffer to read data from
2989 length length of the data in the buffer
2990
2991 DESCRIPTION
2992 This function is the implementation of the key_cache_insert interface
2993 function that is employed by simple (non-partitioned) key caches.
2994 The function takes the parameter keycache as a pointer to the
2995 control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
2996 cache.
2997 The function writes a block of file data from a buffer into the key cache.
2998 The buffer is specified with the parameters buff and length - the pointer
2999 to the beginning of the buffer and its size respectively. It's assumed
3000 the buffer contains the data from 'file' allocated from the position
3001 filepos. The data is copied from the buffer in key_cache_block_size
3002 increments.
3003 The parameter level is used to set one characteristic for the key buffers
3004 loaded with the data from buff. The characteristic is used only by the
3005 midpoint insertion strategy.
3006
3007 RETURN VALUE
3008 0 if a success, 1 - otherwise.
3009
3010 NOTES
3011 The function is used by MyISAM to move all blocks from a index file to
3012 the key cache. It can be performed in parallel with reading the file data
3013 from the key buffers by other threads.
3014
3015 */
3016
3017 static
simple_key_cache_insert(SIMPLE_KEY_CACHE_CB * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length)3018 int simple_key_cache_insert(SIMPLE_KEY_CACHE_CB *keycache,
3019 File file, my_off_t filepos, int level,
3020 uchar *buff, uint length)
3021 {
3022 int error= 0;
3023 DBUG_ENTER("key_cache_insert");
3024 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
3025 (uint) file,(ulong) filepos, length));
3026
3027 if (keycache->key_cache_inited)
3028 {
3029 /* Key cache is used */
3030 reg1 BLOCK_LINK *block;
3031 uint read_length;
3032 uint offset;
3033 int page_st;
3034 my_bool locked_and_incremented= FALSE;
3035
3036 /*
3037 When the keycache is once initialized, we use the cache_lock to
3038 reliably distinguish the cases of normal operation, resizing, and
3039 disabled cache. We always increment and decrement
3040 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
3041 */
3042 keycache_pthread_mutex_lock(&keycache->cache_lock);
3043 /*
3044 We do not load index data into a disabled cache nor into an
3045 ongoing resize.
3046 */
3047 if (!keycache->can_be_used || keycache->in_resize)
3048 goto no_key_cache;
3049 /* Register the pseudo I/O for the next resize. */
3050 inc_counter_for_resize_op(keycache);
3051 locked_and_incremented= TRUE;
3052 /* Loaded data may not always be aligned to cache blocks. */
3053 offset= (uint) (filepos % keycache->key_cache_block_size);
3054 /* Load data in key_cache_block_size increments. */
3055 do
3056 {
3057 /* Cache could be disabled or resizing in a later iteration. */
3058 if (!keycache->can_be_used || keycache->in_resize)
3059 goto no_key_cache;
3060 /* Start loading at the beginning of the cache block. */
3061 filepos-= offset;
3062 /* Do not load beyond the end of the cache block. */
3063 read_length= length;
3064 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
3065 KEYCACHE_DBUG_ASSERT(read_length > 0);
3066
3067 /* The block has been read by the caller already. */
3068 keycache->global_cache_read++;
3069 /* Request the cache block that matches file/pos. */
3070 keycache->global_cache_r_requests++;
3071 block= find_key_block(keycache, file, filepos, level, 0, &page_st);
3072 if (!block)
3073 {
3074 /*
3075 This happens only for requests submitted during key cache
3076 resize. The block is not in the cache and shall not go in.
3077 Stop loading index data.
3078 */
3079 goto no_key_cache;
3080 }
3081 if (!(block->status & BLOCK_ERROR))
3082 {
3083 if (page_st == PAGE_WAIT_TO_BE_READ)
3084 {
3085 /*
3086 this is a secondary request for a block to be read into the
3087 cache. The block is in eviction. It is not yet assigned to
3088 the requested file block (It does not point to the right
3089 hash_link). So we cannot call remove_reader() on the block.
3090 And we cannot access the hash_link directly here. We need to
3091 wait until the assignment is complete. read_block_secondary()
3092 executes the correct wait.
3093 */
3094 read_block_secondary(keycache, block);
3095
3096 /*
3097 A secondary request must now have the block assigned to the
3098 requested file block.
3099 */
3100 DBUG_ASSERT(block->hash_link->file == file);
3101 DBUG_ASSERT(block->hash_link->diskpos == filepos);
3102 }
3103 else if (page_st == PAGE_TO_BE_READ &&
3104 (offset || (read_length < keycache->key_cache_block_size)))
3105 {
3106 /*
3107 this is a primary request for a block to be read into the
3108 cache and the supplied data does not fill the whole block.
3109
3110 This function is called on behalf of a LOAD INDEX INTO CACHE
3111 statement, which is a read-only task and allows other
3112 readers. It is possible that a parallel running reader tries
3113 to access this block. If it needs more data than has been
3114 supplied here, it would report an error. To be sure that we
3115 have all data in the block that is available in the file, we
3116 read the block ourselves.
3117
3118 Though reading again what the caller did read already is an
3119 expensive operation, we need to do this for correctness.
3120 */
3121 read_block_primary(keycache, block, keycache->key_cache_block_size,
3122 read_length + offset);
3123 }
3124 else if (page_st == PAGE_TO_BE_READ)
3125 {
3126 /*
3127 This is a new block in the cache. If we come here, we have
3128 data for the whole block.
3129 */
3130 DBUG_ASSERT(block->hash_link->requests);
3131 DBUG_ASSERT(block->status & BLOCK_IN_USE);
3132 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
3133 (block->status & BLOCK_READ));
3134
3135 #if !defined(SERIALIZED_READ_FROM_CACHE)
3136 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3137 /*
3138 Here other threads may step in and register as secondary readers.
3139 They will register in block->wqueue[COND_FOR_REQUESTED].
3140 */
3141 #endif
3142
3143 /* Copy data from buff */
3144 memcpy(block->buffer+offset, buff, (size_t) read_length);
3145
3146 #if !defined(SERIALIZED_READ_FROM_CACHE)
3147 keycache_pthread_mutex_lock(&keycache->cache_lock);
3148 DBUG_ASSERT(block->status & BLOCK_IN_USE);
3149 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
3150 (block->status & BLOCK_READ));
3151 #endif
3152 /*
3153 After the data is in the buffer, we can declare the block
3154 valid. Now other threads do not need to register as
3155 secondary readers any more. They can immediately access the
3156 block.
3157 */
3158 block->status|= BLOCK_READ;
3159 block->length= read_length+offset;
3160 /*
3161 Do not set block->offset here. If this block is marked
3162 BLOCK_CHANGED later, we want to flush only the modified part. So
3163 only a writer may set block->offset down from
3164 keycache->key_cache_block_size.
3165 */
3166 KEYCACHE_DBUG_PRINT("key_cache_insert",
3167 ("primary request: new page in cache"));
3168 /* Signal all pending requests. */
3169 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
3170 }
3171 else
3172 {
3173 /*
3174 page_st == PAGE_READ. The block is in the buffer. All data
3175 must already be present. Blocks are always read with all
3176 data available on file. Assert that the block does not have
3177 less contents than the preloader supplies. If the caller has
3178 data beyond block->length, it means that a file write has
3179 been done while this block was in cache and not extended
3180 with the new data. If the condition is met, we can simply
3181 ignore the block.
3182 */
3183 DBUG_ASSERT((page_st == PAGE_READ) &&
3184 (read_length + offset <= block->length));
3185 }
3186
3187 /*
3188 A secondary request must now have the block assigned to the
3189 requested file block. It does not hurt to check it for primary
3190 requests too.
3191 */
3192 DBUG_ASSERT(block->hash_link->file == file);
3193 DBUG_ASSERT(block->hash_link->diskpos == filepos);
3194 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3195 } /* end of if (!(block->status & BLOCK_ERROR)) */
3196
3197 remove_reader(block);
3198
3199 /* Error injection for coverage testing. */
3200 DBUG_EXECUTE_IF("key_cache_insert_block_error",
3201 block->status|= BLOCK_ERROR; errno=EIO;);
3202
3203 /* Do not link erroneous blocks into the LRU ring, but free them. */
3204 if (!(block->status & BLOCK_ERROR))
3205 {
3206 /*
3207 Link the block into the LRU ring if it's the last submitted
3208 request for the block. This enables eviction for the block.
3209 */
3210 unreg_request(keycache, block, 1);
3211 }
3212 else
3213 {
3214 free_block(keycache, block);
3215 error= 1;
3216 break;
3217 }
3218
3219 buff+= read_length;
3220 filepos+= read_length+offset;
3221 offset= 0;
3222
3223 } while ((length-= read_length));
3224
3225 no_key_cache:
3226 if (locked_and_incremented)
3227 dec_counter_for_resize_op(keycache);
3228 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3229 }
3230 DBUG_RETURN(error);
3231 }
3232
3233
3234 /*
3235 Write a buffer into a simple key cache
3236
3237 SYNOPSIS
3238
3239 simple_key_cache_write()
3240 keycache pointer to the control block of a simple key cache
3241 file handler for the file to write data to
3242 file_extra maps of key cache partitions containing
3243 dirty pages from file
3244 filepos position in the file to write data to
3245 level determines the weight of the data
3246 buff buffer with the data
3247 length length of the buffer
3248 dont_write if is 0 then all dirty pages involved in writing
3249 should have been flushed from key cache
3250
3251 DESCRIPTION
3252 This function is the implementation of the key_cache_write interface
3253 function that is employed by simple (non-partitioned) key caches.
3254 The function takes the parameter keycache as a pointer to the
3255 control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
3256 cache.
3257 In a general case the function copies data from a buffer into the key
3258 cache. The buffer is specified with the parameters buff and length -
3259 the pointer to the beginning of the buffer and its size respectively.
3260 It's assumed the buffer contains the data to be written into 'file'
3261 starting from the position filepos. The data is copied from the buffer
3262 in key_cache_block_size increments.
3263 If the value of the parameter dont_write is FALSE then the function
3264 also writes the data into file.
3265 The parameter level is used to set one characteristic for the key buffers
3266 filled with the data from buff. The characteristic is employed only by
3267 the midpoint insertion strategy.
3268 The parameter file_extra currently makes sense only for simple key caches
3269 that are elements of a partitioned key cache. It provides a pointer to the
3270 shared bitmap of the partitions that may contains dirty pages for the file.
3271 This bitmap is used to optimize the function
3272 flush_partitioned_key_cache_blocks.
3273
3274 RETURN VALUE
3275 0 if a success, 1 - otherwise.
3276
3277 NOTES
3278 This implementation exploits the fact that the function is called only
3279 when a thread has got an exclusive lock for the key file.
3280 */
3281
3282 static
simple_key_cache_write(SIMPLE_KEY_CACHE_CB * keycache,File file,void * file_extra,my_off_t filepos,int level,uchar * buff,uint length,uint block_length,int dont_write)3283 int simple_key_cache_write(SIMPLE_KEY_CACHE_CB *keycache,
3284 File file, void *file_extra __attribute__((unused)),
3285 my_off_t filepos, int level,
3286 uchar *buff, uint length,
3287 uint block_length __attribute__((unused)),
3288 int dont_write)
3289 {
3290 my_bool locked_and_incremented= FALSE;
3291 int error=0;
3292 DBUG_ENTER("simple_key_cache_write");
3293 DBUG_PRINT("enter",
3294 ("fd: %u pos: %lu length: %u block_length: %u"
3295 " key_block_length: %u",
3296 (uint) file, (ulong) filepos, length, block_length,
3297 keycache ? keycache->key_cache_block_size : 0));
3298
3299 if (!dont_write)
3300 {
3301 /* purecov: begin inspected */
3302 /* Not used in the server. */
3303 /* Force writing from buff into disk. */
3304 keycache->global_cache_w_requests++;
3305 keycache->global_cache_write++;
3306 if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3307 DBUG_RETURN(1);
3308 /* purecov: end */
3309 }
3310
3311 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3312 DBUG_EXECUTE("check_keycache",
3313 test_key_cache(keycache, "start of key_cache_write", 1););
3314 #endif
3315
3316 if (keycache->key_cache_inited)
3317 {
3318 /* Key cache is used */
3319 reg1 BLOCK_LINK *block;
3320 uint read_length;
3321 uint offset;
3322 int page_st;
3323
3324 if (MYSQL_KEYCACHE_WRITE_START_ENABLED())
3325 {
3326 MYSQL_KEYCACHE_WRITE_START(my_filename(file), length,
3327 (ulong) (keycache->blocks_used *
3328 keycache->key_cache_block_size),
3329 (ulong) (keycache->blocks_unused *
3330 keycache->key_cache_block_size));
3331 }
3332
3333 /*
3334 When the key cache is once initialized, we use the cache_lock to
3335 reliably distinguish the cases of normal operation, resizing, and
3336 disabled cache. We always increment and decrement
3337 'cnt_for_resize_op' so that a resizer can wait for pending I/O.
3338 */
3339 keycache_pthread_mutex_lock(&keycache->cache_lock);
3340 /*
3341 Cache resizing has two phases: Flushing and re-initializing. In
3342 the flush phase write requests can modify dirty blocks that are
3343 not yet in flush. Otherwise they are allowed to bypass the cache.
3344 find_key_block() returns NULL in both cases (clean blocks and
3345 non-cached blocks).
3346
3347 After the flush phase new I/O requests must wait until the
3348 re-initialization is done. The re-initialization can be done only
3349 if no I/O request is in progress. The reason is that
3350 key_cache_block_size can change. With enabled cache I/O is done in
3351 chunks of key_cache_block_size. Every chunk tries to use a cache
3352 block first. If the block size changes in the middle, a block
3353 could be missed and data could be written below a cached block.
3354 */
3355 while (keycache->in_resize && !keycache->resize_in_flush)
3356 wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
3357 /* Register the I/O for the next resize. */
3358 inc_counter_for_resize_op(keycache);
3359 locked_and_incremented= TRUE;
3360 /* Requested data may not always be aligned to cache blocks. */
3361 offset= (uint) (filepos % keycache->key_cache_block_size);
3362 /* Write data in key_cache_block_size increments. */
3363 do
3364 {
3365 /* Cache could be disabled in a later iteration. */
3366 if (!keycache->can_be_used)
3367 goto no_key_cache;
3368
3369 MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size);
3370 /* Start writing at the beginning of the cache block. */
3371 filepos-= offset;
3372 /* Do not write beyond the end of the cache block. */
3373 read_length= length;
3374 set_if_smaller(read_length, keycache->key_cache_block_size-offset);
3375 KEYCACHE_DBUG_ASSERT(read_length > 0);
3376
3377 /* Request the cache block that matches file/pos. */
3378 keycache->global_cache_w_requests++;
3379 block= find_key_block(keycache, file, filepos, level, 1, &page_st);
3380 if (!block)
3381 {
3382 /*
3383 This happens only for requests submitted during key cache
3384 resize. The block is not in the cache and shall not go in.
3385 Write directly to file.
3386 */
3387 if (dont_write)
3388 {
3389 /* Used in the server. */
3390 keycache->global_cache_write++;
3391 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3392 if (my_pwrite(file, (uchar*) buff, read_length, filepos + offset,
3393 MYF(MY_NABP | MY_WAIT_IF_FULL)))
3394 error=1;
3395 keycache_pthread_mutex_lock(&keycache->cache_lock);
3396 }
3397 goto next_block;
3398 }
3399 /*
3400 Prevent block from flushing and from being selected for to be
3401 freed. This must be set when we release the cache_lock.
3402 However, we must not set the status of the block before it is
3403 assigned to this file/pos.
3404 */
3405 if (page_st != PAGE_WAIT_TO_BE_READ)
3406 block->status|= BLOCK_FOR_UPDATE;
3407 /*
3408 We must read the file block first if it is not yet in the cache
3409 and we do not replace all of its contents.
3410
3411 In cases where the cache block is big enough to contain (parts
3412 of) index blocks of different indexes, our request can be
3413 secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
3414 reading the file block. If the read completes after us, it
3415 overwrites our new contents with the old contents. So we have to
3416 wait for the other thread to complete the read of this block.
3417 read_block_primary|secondary() takes care for the wait.
3418 */
3419 if (!(block->status & BLOCK_ERROR))
3420 {
3421 if (page_st == PAGE_TO_BE_READ &&
3422 (offset || read_length < keycache->key_cache_block_size))
3423 {
3424 read_block_primary(keycache, block,
3425 offset + read_length >= keycache->key_cache_block_size?
3426 offset : keycache->key_cache_block_size,
3427 offset);
3428 /*
3429 Prevent block from flushing and from being selected for to be
3430 freed. This must be set when we release the cache_lock.
3431 Here we set it in case we could not set it above.
3432 */
3433 block->status|= BLOCK_FOR_UPDATE;
3434 }
3435 else if (page_st == PAGE_WAIT_TO_BE_READ)
3436 {
3437 read_block_secondary(keycache, block);
3438 block->status|= BLOCK_FOR_UPDATE;
3439 }
3440 }
3441 /*
3442 The block should always be assigned to the requested file block
3443 here. It need not be BLOCK_READ when overwriting the whole block.
3444 */
3445 DBUG_ASSERT(block->hash_link->file == file);
3446 DBUG_ASSERT(block->hash_link->diskpos == filepos);
3447 DBUG_ASSERT(block->status & BLOCK_IN_USE);
3448 DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
3449 /*
3450 The block to be written must not be marked BLOCK_REASSIGNED.
3451 Otherwise it could be freed in dirty state or reused without
3452 another flush during eviction. It must also not be in flush.
3453 Otherwise the old contens may have been flushed already and
3454 the flusher could clear BLOCK_CHANGED without flushing the
3455 new changes again.
3456 */
3457 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3458
3459 while (block->status & BLOCK_IN_FLUSHWRITE)
3460 {
3461 /*
3462 Another thread is flushing the block. It was dirty already.
3463 Wait until the block is flushed to file. Otherwise we could
3464 modify the buffer contents just while it is written to file.
3465 An unpredictable file block contents would be the result.
3466 While we wait, several things can happen to the block,
3467 including another flush. But the block cannot be reassigned to
3468 another hash_link until we release our request on it.
3469 */
3470 wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
3471 DBUG_ASSERT(keycache->can_be_used);
3472 DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3473 /* Still must not be marked for free. */
3474 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3475 DBUG_ASSERT(block->hash_link && (block->hash_link->block == block));
3476 }
3477
3478 /*
3479 We could perhaps release the cache_lock during access of the
3480 data like in the other functions. Locks outside of the key cache
3481 assure that readers and a writer do not access the same range of
3482 data. Parallel accesses should happen only if the cache block
3483 contains multiple index block(fragment)s. So different parts of
3484 the buffer would be read/written. An attempt to flush during
3485 memcpy() is prevented with BLOCK_FOR_UPDATE.
3486 */
3487 if (!(block->status & BLOCK_ERROR))
3488 {
3489 #if !defined(SERIALIZED_READ_FROM_CACHE)
3490 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3491 #endif
3492 memcpy(block->buffer+offset, buff, (size_t) read_length);
3493
3494 #if !defined(SERIALIZED_READ_FROM_CACHE)
3495 keycache_pthread_mutex_lock(&keycache->cache_lock);
3496 #endif
3497 }
3498
3499 if (!dont_write)
3500 {
3501 /* Not used in the server. buff has been written to disk at start. */
3502 if ((block->status & BLOCK_CHANGED) &&
3503 (!offset && read_length >= keycache->key_cache_block_size))
3504 link_to_file_list(keycache, block, block->hash_link->file, 1);
3505 }
3506 else if (! (block->status & BLOCK_CHANGED))
3507 link_to_changed_list(keycache, block);
3508 block->status|=BLOCK_READ;
3509 /*
3510 Allow block to be selected for to be freed. Since it is marked
3511 BLOCK_CHANGED too, it won't be selected for to be freed without
3512 a flush.
3513 */
3514 block->status&= ~BLOCK_FOR_UPDATE;
3515 set_if_smaller(block->offset, offset);
3516 set_if_bigger(block->length, read_length+offset);
3517
3518 /* Threads may be waiting for the changes to be complete. */
3519 release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
3520
3521 /*
3522 If only a part of the cache block is to be replaced, and the
3523 rest has been read from file, then the cache lock has been
3524 released for I/O and it could be possible that another thread
3525 wants to evict or free the block and waits for it to be
3526 released. So we must not just decrement hash_link->requests, but
3527 also wake a waiting thread.
3528 */
3529 remove_reader(block);
3530
3531 /* Error injection for coverage testing. */
3532 DBUG_EXECUTE_IF("key_cache_write_block_error",
3533 block->status|= BLOCK_ERROR;);
3534
3535 /* Do not link erroneous blocks into the LRU ring, but free them. */
3536 if (!(block->status & BLOCK_ERROR))
3537 {
3538 /*
3539 Link the block into the LRU ring if it's the last submitted
3540 request for the block. This enables eviction for the block.
3541 */
3542 unreg_request(keycache, block, 1);
3543 }
3544 else
3545 {
3546 /* Pretend a "clean" block to avoid complications. */
3547 block->status&= ~(BLOCK_CHANGED);
3548 free_block(keycache, block);
3549 error= 1;
3550 break;
3551 }
3552
3553 next_block:
3554 buff+= read_length;
3555 filepos+= read_length+offset;
3556 offset= 0;
3557
3558 } while ((length-= read_length));
3559 goto end;
3560 }
3561
3562 no_key_cache:
3563 /* Key cache is not used */
3564 if (dont_write)
3565 {
3566 /* Used in the server. */
3567 keycache->global_cache_w_requests++;
3568 keycache->global_cache_write++;
3569 if (locked_and_incremented)
3570 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3571 if (my_pwrite(file, (uchar*) buff, length, filepos,
3572 MYF(MY_NABP | MY_WAIT_IF_FULL)))
3573 error=1;
3574 if (locked_and_incremented)
3575 keycache_pthread_mutex_lock(&keycache->cache_lock);
3576 }
3577
3578 end:
3579 if (locked_and_incremented)
3580 {
3581 dec_counter_for_resize_op(keycache);
3582 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3583 }
3584
3585 if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED())
3586 {
3587 MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used *
3588 keycache->key_cache_block_size),
3589 (ulong) (keycache->blocks_unused *
3590 keycache->key_cache_block_size));
3591 }
3592
3593 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3594 DBUG_EXECUTE("exec",
3595 test_key_cache(keycache, "end of key_cache_write", 1););
3596 #endif
3597 DBUG_RETURN(error);
3598 }
3599
3600
3601 /*
3602 Free block.
3603
3604 SYNOPSIS
3605 free_block()
3606 keycache Pointer to a key cache data structure
3607 block Pointer to the block to free
3608
3609 DESCRIPTION
3610 Remove reference to block from hash table.
3611 Remove block from the chain of clean blocks.
3612 Add block to the free list.
3613
3614 NOTE
3615 Block must not be free (status == 0).
3616 Block must not be in free_block_list.
3617 Block must not be in the LRU ring.
3618 Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
3619 Block must not be in free (BLOCK_REASSIGNED).
3620 Block must not be in flush (BLOCK_IN_FLUSH).
3621 Block must not be dirty (BLOCK_CHANGED).
3622 Block must not be in changed_blocks (dirty) hash.
3623 Block must be in file_blocks (clean) hash.
3624 Block must refer to a hash_link.
3625 Block must have a request registered on it.
3626 */
3627
free_block(SIMPLE_KEY_CACHE_CB * keycache,BLOCK_LINK * block)3628 static void free_block(SIMPLE_KEY_CACHE_CB *keycache, BLOCK_LINK *block)
3629 {
3630 KEYCACHE_THREAD_TRACE("free block");
3631 KEYCACHE_DBUG_PRINT("free_block",
3632 ("block %u to be freed, hash_link %p status: %u",
3633 BLOCK_NUMBER(block), block->hash_link,
3634 block->status));
3635 /*
3636 Assert that the block is not free already. And that it is in a clean
3637 state. Note that the block might just be assigned to a hash_link and
3638 not yet read (BLOCK_READ may not be set here). In this case a reader
3639 is registered in the hash_link and free_block() will wait for it
3640 below.
3641 */
3642 DBUG_ASSERT((block->status & BLOCK_IN_USE) &&
3643 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3644 BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
3645 BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
3646 /* Assert that the block is in a file_blocks chain. */
3647 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3648 /* Assert that the block is not in the LRU ring. */
3649 DBUG_ASSERT(!block->next_used && !block->prev_used);
3650 /*
3651 IMHO the below condition (if()) makes no sense. I can't see how it
3652 could be possible that free_block() is entered with a NULL hash_link
3653 pointer. The only place where it can become NULL is in free_block()
3654 (or before its first use ever, but for those blocks free_block() is
3655 not called). I don't remove the conditional as it cannot harm, but
3656 place an DBUG_ASSERT to confirm my hypothesis. Eventually the
3657 condition (if()) can be removed.
3658 */
3659 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3660 if (block->hash_link)
3661 {
3662 /*
3663 While waiting for readers to finish, new readers might request the
3664 block. But since we set block->status|= BLOCK_REASSIGNED, they
3665 will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
3666 later.
3667 */
3668 block->status|= BLOCK_REASSIGNED;
3669 wait_for_readers(keycache, block);
3670 /*
3671 The block must not have been freed by another thread. Repeat some
3672 checks. An additional requirement is that it must be read now
3673 (BLOCK_READ).
3674 */
3675 DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3676 DBUG_ASSERT((block->status & (BLOCK_READ | BLOCK_IN_USE |
3677 BLOCK_REASSIGNED)) &&
3678 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3679 BLOCK_IN_FLUSH | BLOCK_CHANGED |
3680 BLOCK_FOR_UPDATE)));
3681 DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3682 DBUG_ASSERT(!block->prev_used);
3683 /*
3684 Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
3685 thread (through unreg_request() below), other threads must not see
3686 this flag. They could become confused.
3687 */
3688 block->status&= ~BLOCK_REASSIGNED;
3689 /*
3690 Do not release the hash_link until the block is off all lists.
3691 At least not if we hand it over for eviction in unreg_request().
3692 */
3693 }
3694
3695 /*
3696 Unregister the block request and link the block into the LRU ring.
3697 This enables eviction for the block. If the LRU ring was empty and
3698 threads are waiting for a block, then the block wil be handed over
3699 for eviction immediately. Otherwise we will unlink it from the LRU
3700 ring again, without releasing the lock in between. So decrementing
3701 the request counter and updating statistics are the only relevant
3702 operation in this case. Assert that there are no other requests
3703 registered.
3704 */
3705 DBUG_ASSERT(block->requests == 1);
3706 unreg_request(keycache, block, 0);
3707 /*
3708 Note that even without releasing the cache lock it is possible that
3709 the block is immediately selected for eviction by link_block() and
3710 thus not added to the LRU ring. In this case we must not touch the
3711 block any more.
3712 */
3713 if (block->status & BLOCK_IN_EVICTION)
3714 return;
3715
3716 /* Error blocks are not put into the LRU ring. */
3717 if (!(block->status & BLOCK_ERROR))
3718 {
3719 /* Here the block must be in the LRU ring. Unlink it again. */
3720 DBUG_ASSERT(block->next_used && block->prev_used &&
3721 *block->prev_used == block);
3722 unlink_block(keycache, block);
3723 }
3724 if (block->temperature == BLOCK_WARM)
3725 keycache->warm_blocks--;
3726 block->temperature= BLOCK_COLD;
3727
3728 /* Remove from file_blocks hash. */
3729 unlink_changed(block);
3730
3731 /* Remove reference to block from hash table. */
3732 unlink_hash(keycache, block->hash_link);
3733 block->hash_link= NULL;
3734
3735 block->status= 0;
3736 block->length= 0;
3737 block->offset= keycache->key_cache_block_size;
3738 KEYCACHE_THREAD_TRACE("free block");
3739 KEYCACHE_DBUG_PRINT("free_block", ("block is freed"));
3740
3741 /* Enforced by unlink_changed(), but just to be sure. */
3742 DBUG_ASSERT(!block->next_changed && !block->prev_changed);
3743 /* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3744 DBUG_ASSERT(!block->next_used && !block->prev_used);
3745 /* Insert the free block in the free list. */
3746 block->next_used= keycache->free_block_list;
3747 keycache->free_block_list= block;
3748 /* Keep track of the number of currently unused blocks. */
3749 keycache->blocks_unused++;
3750
3751 /* All pending requests for this page must be resubmitted. */
3752 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3753 }
3754
3755
cmp_sec_link(BLOCK_LINK ** a,BLOCK_LINK ** b)3756 static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3757 {
3758 return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3759 ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3760 }
3761
3762
3763 /*
3764 Flush a portion of changed blocks to disk,
3765 free used blocks if requested
3766 */
3767
flush_cached_blocks(SIMPLE_KEY_CACHE_CB * keycache,File file,BLOCK_LINK ** cache,BLOCK_LINK ** end,enum flush_type type)3768 static int flush_cached_blocks(SIMPLE_KEY_CACHE_CB *keycache,
3769 File file, BLOCK_LINK **cache,
3770 BLOCK_LINK **end,
3771 enum flush_type type)
3772 {
3773 int error;
3774 int last_errno= 0;
3775 uint count= (uint) (end-cache);
3776
3777 /* Don't lock the cache during the flush */
3778 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3779 /*
3780 As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3781 we are guarunteed no thread will change them
3782 */
3783 my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3784
3785 keycache_pthread_mutex_lock(&keycache->cache_lock);
3786 /*
3787 Note: Do not break the loop. We have registered a request on every
3788 block in 'cache'. These must be unregistered by free_block() or
3789 unreg_request().
3790 */
3791 for ( ; cache != end ; cache++)
3792 {
3793 BLOCK_LINK *block= *cache;
3794
3795 KEYCACHE_DBUG_PRINT("flush_cached_blocks",
3796 ("block %u to be flushed", BLOCK_NUMBER(block)));
3797 /*
3798 If the block contents is going to be changed, we abandon the flush
3799 for this block. flush_key_blocks_int() will restart its search and
3800 handle the block properly.
3801 */
3802 if (!(block->status & BLOCK_FOR_UPDATE))
3803 {
3804 /* Blocks coming here must have a certain status. */
3805 DBUG_ASSERT(block->hash_link);
3806 DBUG_ASSERT(block->hash_link->block == block);
3807 DBUG_ASSERT(block->hash_link->file == file);
3808 DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
3809 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3810 block->status|= BLOCK_IN_FLUSHWRITE;
3811 keycache_pthread_mutex_unlock(&keycache->cache_lock);
3812 error= (int)my_pwrite(file, block->buffer + block->offset,
3813 block->length - block->offset,
3814 block->hash_link->diskpos + block->offset,
3815 MYF(MY_NABP | MY_WAIT_IF_FULL));
3816 keycache_pthread_mutex_lock(&keycache->cache_lock);
3817 keycache->global_cache_write++;
3818 if (error)
3819 {
3820 block->status|= BLOCK_ERROR;
3821 if (!last_errno)
3822 last_errno= errno ? errno : -1;
3823 }
3824 block->status&= ~BLOCK_IN_FLUSHWRITE;
3825 /* Block must not have changed status except BLOCK_FOR_UPDATE. */
3826 DBUG_ASSERT(block->hash_link);
3827 DBUG_ASSERT(block->hash_link->block == block);
3828 DBUG_ASSERT(block->hash_link->file == file);
3829 DBUG_ASSERT((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3830 (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3831 /*
3832 Set correct status and link in right queue for free or later use.
3833 free_block() must not see BLOCK_CHANGED and it may need to wait
3834 for readers of the block. These should not see the block in the
3835 wrong hash. If not freeing the block, we need to have it in the
3836 right queue anyway.
3837 */
3838 link_to_file_list(keycache, block, file, 1);
3839 }
3840 block->status&= ~BLOCK_IN_FLUSH;
3841 /*
3842 Let to proceed for possible waiting requests to write to the block page.
3843 It might happen only during an operation to resize the key cache.
3844 */
3845 release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3846 /* type will never be FLUSH_IGNORE_CHANGED here */
3847 if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3848 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3849 BLOCK_FOR_UPDATE)))
3850 {
3851 /*
3852 Note that a request has been registered against the block in
3853 flush_key_blocks_int().
3854 */
3855 free_block(keycache, block);
3856 }
3857 else
3858 {
3859 /*
3860 Link the block into the LRU ring if it's the last submitted
3861 request for the block. This enables eviction for the block.
3862 Note that a request has been registered against the block in
3863 flush_key_blocks_int().
3864 */
3865 unreg_request(keycache, block, 1);
3866 }
3867
3868 } /* end of for ( ; cache != end ; cache++) */
3869 return last_errno;
3870 }
3871
3872
3873 /*
3874 Flush all key blocks for a file to disk, but don't do any mutex locks
3875
3876 SYNOPSIS
3877 flush_key_blocks_int()
3878 keycache pointer to a key cache data structure
3879 file handler for the file to flush to
3880 flush_type type of the flush
3881
3882 NOTES
3883 This function doesn't do any mutex locks because it needs to be called both
3884 from flush_key_blocks and flush_all_key_blocks (the later one does the
3885 mutex lock in the resize_key_cache() function).
3886
3887 We do only care about changed blocks that exist when the function is
3888 entered. We do not guarantee that all changed blocks of the file are
3889 flushed if more blocks change while this function is running.
3890
3891 RETURN
3892 0 ok
3893 1 error
3894 */
3895
flush_key_blocks_int(SIMPLE_KEY_CACHE_CB * keycache,File file,enum flush_type type)3896 static int flush_key_blocks_int(SIMPLE_KEY_CACHE_CB *keycache,
3897 File file, enum flush_type type)
3898 {
3899 BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3900 int last_errno= 0;
3901 int last_errcnt= 0;
3902 DBUG_ENTER("flush_key_blocks_int");
3903 DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu",
3904 file, keycache->blocks_used, keycache->blocks_changed));
3905
3906 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3907 DBUG_EXECUTE("check_keycache",
3908 test_key_cache(keycache, "start of flush_key_blocks", 0););
3909 #endif
3910
3911 DBUG_ASSERT(type != FLUSH_KEEP_LAZY);
3912 cache= cache_buff;
3913 if (keycache->disk_blocks > 0 &&
3914 (!my_disable_flush_key_blocks || type != FLUSH_KEEP))
3915 {
3916 /* Key cache exists and flush is not disabled */
3917 int error= 0;
3918 uint count= FLUSH_CACHE;
3919 BLOCK_LINK **pos,**end;
3920 BLOCK_LINK *first_in_switch= NULL;
3921 BLOCK_LINK *last_in_flush;
3922 BLOCK_LINK *last_for_update;
3923 BLOCK_LINK *block, *next;
3924 #if defined(KEYCACHE_DEBUG)
3925 uint cnt=0;
3926 #endif
3927
3928 if (type != FLUSH_IGNORE_CHANGED)
3929 {
3930 /*
3931 Count how many key blocks we have to cache to be able
3932 to flush all dirty pages with minimum seek moves
3933 */
3934 count= 0;
3935 for (block= keycache->changed_blocks[FILE_HASH(file, keycache)] ;
3936 block ;
3937 block= block->next_changed)
3938 {
3939 if ((block->hash_link->file == file) &&
3940 !(block->status & BLOCK_IN_FLUSH))
3941 {
3942 count++;
3943 KEYCACHE_DBUG_ASSERT(count<= keycache->blocks_used);
3944 }
3945 }
3946 /*
3947 Allocate a new buffer only if its bigger than the one we have.
3948 Assure that we always have some entries for the case that new
3949 changed blocks appear while we need to wait for something.
3950 */
3951 if ((count > FLUSH_CACHE) &&
3952 !(cache= (BLOCK_LINK**) my_malloc(key_memory_KEY_CACHE,
3953 sizeof(BLOCK_LINK*)*count, MYF(0))))
3954 cache= cache_buff;
3955 /*
3956 After a restart there could be more changed blocks than now.
3957 So we should not let count become smaller than the fixed buffer.
3958 */
3959 if (cache == cache_buff)
3960 count= FLUSH_CACHE;
3961 }
3962
3963 /* Retrieve the blocks and write them to a buffer to be flushed */
3964 restart:
3965 last_in_flush= NULL;
3966 last_for_update= NULL;
3967 end= (pos= cache)+count;
3968 for (block= keycache->changed_blocks[FILE_HASH(file, keycache)] ;
3969 block ;
3970 block= next)
3971 {
3972 #if defined(KEYCACHE_DEBUG)
3973 cnt++;
3974 KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3975 #endif
3976 next= block->next_changed;
3977 if (block->hash_link->file == file)
3978 {
3979 if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3980 {
3981 /*
3982 Note: The special handling of BLOCK_IN_SWITCH is obsolete
3983 since we set BLOCK_IN_FLUSH if the eviction includes a
3984 flush. It can be removed in a later version.
3985 */
3986 if (!(block->status & BLOCK_IN_SWITCH))
3987 {
3988 /*
3989 We care only for the blocks for which flushing was not
3990 initiated by another thread and which are not in eviction.
3991 Registering a request on the block unlinks it from the LRU
3992 ring and protects against eviction.
3993 */
3994 reg_requests(keycache, block, 1);
3995 if (type != FLUSH_IGNORE_CHANGED)
3996 {
3997 /* It's not a temporary file */
3998 if (pos == end)
3999 {
4000 /*
4001 This should happen relatively seldom. Remove the
4002 request because we won't do anything with the block
4003 but restart and pick it again in the next iteration.
4004 */
4005 unreg_request(keycache, block, 0);
4006 /*
4007 This happens only if there is not enough
4008 memory for the big block
4009 */
4010 if ((error= flush_cached_blocks(keycache, file, cache,
4011 end,type)))
4012 {
4013 /* Do not loop infinitely trying to flush in vain. */
4014 if ((last_errno == error) && (++last_errcnt > 5))
4015 goto err;
4016 last_errno= error;
4017 }
4018 /*
4019 Restart the scan as some other thread might have changed
4020 the changed blocks chain: the blocks that were in switch
4021 state before the flush started have to be excluded
4022 */
4023 goto restart;
4024 }
4025 /*
4026 Mark the block with BLOCK_IN_FLUSH in order not to let
4027 other threads to use it for new pages and interfere with
4028 our sequence of flushing dirty file pages. We must not
4029 set this flag before actually putting the block on the
4030 write burst array called 'cache'.
4031 */
4032 block->status|= BLOCK_IN_FLUSH;
4033 /* Add block to the array for a write burst. */
4034 *pos++= block;
4035 }
4036 else
4037 {
4038 /* It's a temporary file */
4039 DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
4040 /*
4041 free_block() must not be called with BLOCK_CHANGED. Note
4042 that we must not change the BLOCK_CHANGED flag outside of
4043 link_to_file_list() so that it is always in the correct
4044 queue and the *blocks_changed counters are correct.
4045 */
4046 link_to_file_list(keycache, block, file, 1);
4047 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
4048 {
4049 /* A request has been registered against the block above. */
4050 free_block(keycache, block);
4051 }
4052 else
4053 {
4054 /*
4055 Link the block into the LRU ring if it's the last
4056 submitted request for the block. This enables eviction
4057 for the block. A request has been registered against
4058 the block above.
4059 */
4060 unreg_request(keycache, block, 1);
4061 }
4062 }
4063 }
4064 else
4065 {
4066 /*
4067 Link the block into a list of blocks 'in switch'.
4068
4069 WARNING: Here we introduce a place where a changed block
4070 is not in the changed_blocks hash! This is acceptable for
4071 a BLOCK_IN_SWITCH. Never try this for another situation.
4072 Other parts of the key cache code rely on changed blocks
4073 being in the changed_blocks hash.
4074 */
4075 unlink_changed(block);
4076 link_changed(block, &first_in_switch);
4077 }
4078 }
4079 else if (type != FLUSH_KEEP)
4080 {
4081 /*
4082 During the normal flush at end of statement (FLUSH_KEEP) we
4083 do not need to ensure that blocks in flush or update by
4084 other threads are flushed. They will be flushed by them
4085 later. In all other cases we must assure that we do not have
4086 any changed block of this file in the cache when this
4087 function returns.
4088 */
4089 if (block->status & BLOCK_IN_FLUSH)
4090 {
4091 /* Remember the last block found to be in flush. */
4092 last_in_flush= block;
4093 }
4094 else
4095 {
4096 /* Remember the last block found to be selected for update. */
4097 last_for_update= block;
4098 }
4099 }
4100 }
4101 }
4102 if (pos != cache)
4103 {
4104 if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
4105 {
4106 /* Do not loop inifnitely trying to flush in vain. */
4107 if ((last_errno == error) && (++last_errcnt > 5))
4108 goto err;
4109 last_errno= error;
4110 }
4111 /*
4112 Do not restart here during the normal flush at end of statement
4113 (FLUSH_KEEP). We have now flushed at least all blocks that were
4114 changed when entering this function. In all other cases we must
4115 assure that we do not have any changed block of this file in the
4116 cache when this function returns.
4117 */
4118 if (type != FLUSH_KEEP)
4119 goto restart;
4120 }
4121 if (last_in_flush)
4122 {
4123 /*
4124 There are no blocks to be flushed by this thread, but blocks in
4125 flush by other threads. Wait until one of the blocks is flushed.
4126 Re-check the condition for last_in_flush. We may have unlocked
4127 the cache_lock in flush_cached_blocks(). The state of the block
4128 could have changed.
4129 */
4130 if (last_in_flush->status & BLOCK_IN_FLUSH)
4131 wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
4132 &keycache->cache_lock);
4133 /* Be sure not to lose a block. They may be flushed in random order. */
4134 goto restart;
4135 }
4136 if (last_for_update)
4137 {
4138 /*
4139 There are no blocks to be flushed by this thread, but blocks for
4140 update by other threads. Wait until one of the blocks is updated.
4141 Re-check the condition for last_for_update. We may have unlocked
4142 the cache_lock in flush_cached_blocks(). The state of the block
4143 could have changed.
4144 */
4145 if (last_for_update->status & BLOCK_FOR_UPDATE)
4146 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
4147 &keycache->cache_lock);
4148 /* The block is now changed. Flush it. */
4149 goto restart;
4150 }
4151
4152 /*
4153 Wait until the list of blocks in switch is empty. The threads that
4154 are switching these blocks will relink them to clean file chains
4155 while we wait and thus empty the 'first_in_switch' chain.
4156 */
4157 while (first_in_switch)
4158 {
4159 #if defined(KEYCACHE_DEBUG)
4160 cnt= 0;
4161 #endif
4162 wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
4163 &keycache->cache_lock);
4164 #if defined(KEYCACHE_DEBUG)
4165 cnt++;
4166 KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
4167 #endif
4168 /*
4169 Do not restart here. We have flushed all blocks that were
4170 changed when entering this function and were not marked for
4171 eviction. Other threads have now flushed all remaining blocks in
4172 the course of their eviction.
4173 */
4174 }
4175
4176 if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
4177 {
4178 BLOCK_LINK *last_in_switch= NULL;
4179 uint total_found= 0;
4180 uint found;
4181 last_for_update= NULL;
4182
4183 /*
4184 Finally free all clean blocks for this file.
4185 During resize this may be run by two threads in parallel.
4186 */
4187 do
4188 {
4189 found= 0;
4190 for (block= keycache->file_blocks[FILE_HASH(file, keycache)] ;
4191 block ;
4192 block= next)
4193 {
4194 /* Remember the next block. After freeing we cannot get at it. */
4195 next= block->next_changed;
4196
4197 /* Changed blocks cannot appear in the file_blocks hash. */
4198 DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
4199 if (block->hash_link->file == file)
4200 {
4201 /* We must skip blocks that will be changed. */
4202 if (block->status & BLOCK_FOR_UPDATE)
4203 {
4204 last_for_update= block;
4205 continue;
4206 }
4207
4208 /*
4209 We must not free blocks in eviction (BLOCK_IN_EVICTION |
4210 BLOCK_IN_SWITCH) or blocks intended to be freed
4211 (BLOCK_REASSIGNED).
4212 */
4213 if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
4214 BLOCK_REASSIGNED)))
4215 {
4216 struct st_hash_link *UNINIT_VAR(next_hash_link);
4217 my_off_t UNINIT_VAR(next_diskpos);
4218 File UNINIT_VAR(next_file);
4219 uint UNINIT_VAR(next_status);
4220 uint UNINIT_VAR(hash_requests);
4221
4222 total_found++;
4223 found++;
4224 KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used);
4225
4226 /*
4227 Register a request. This unlinks the block from the LRU
4228 ring and protects it against eviction. This is required
4229 by free_block().
4230 */
4231 reg_requests(keycache, block, 1);
4232
4233 /*
4234 free_block() may need to wait for readers of the block.
4235 This is the moment where the other thread can move the
4236 'next' block from the chain. free_block() needs to wait
4237 if there are requests for the block pending.
4238 */
4239 if (next && (hash_requests= block->hash_link->requests))
4240 {
4241 /* Copy values from the 'next' block and its hash_link. */
4242 next_status= next->status;
4243 next_hash_link= next->hash_link;
4244 next_diskpos= next_hash_link->diskpos;
4245 next_file= next_hash_link->file;
4246 DBUG_ASSERT(next == next_hash_link->block);
4247 }
4248
4249 free_block(keycache, block);
4250 /*
4251 If we had to wait and the state of the 'next' block
4252 changed, break the inner loop. 'next' may no longer be
4253 part of the current chain.
4254
4255 We do not want to break the loop after every free_block(),
4256 not even only after waits. The chain might be quite long
4257 and contain blocks for many files. Traversing it again and
4258 again to find more blocks for this file could become quite
4259 inefficient.
4260 */
4261 if (next && hash_requests &&
4262 ((next_status != next->status) ||
4263 (next_hash_link != next->hash_link) ||
4264 (next_file != next_hash_link->file) ||
4265 (next_diskpos != next_hash_link->diskpos) ||
4266 (next != next_hash_link->block)))
4267 break;
4268 }
4269 else
4270 {
4271 last_in_switch= block;
4272 }
4273 }
4274 } /* end for block in file_blocks */
4275 } while (found);
4276
4277 /*
4278 If any clean block has been found, we may have waited for it to
4279 become free. In this case it could be possible that another clean
4280 block became dirty. This is possible if the write request existed
4281 before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
4282 */
4283 if (total_found)
4284 goto restart;
4285
4286 /*
4287 To avoid an infinite loop, wait until one of the blocks marked
4288 for update is updated.
4289 */
4290 if (last_for_update)
4291 {
4292 /* We did not wait. Block must not have changed status. */
4293 DBUG_ASSERT(last_for_update->status & BLOCK_FOR_UPDATE);
4294 wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
4295 &keycache->cache_lock);
4296 goto restart;
4297 }
4298
4299 /*
4300 To avoid an infinite loop wait until one of the blocks marked
4301 for eviction is switched.
4302 */
4303 if (last_in_switch)
4304 {
4305 /* We did not wait. Block must not have changed status. */
4306 DBUG_ASSERT(last_in_switch->status & (BLOCK_IN_EVICTION |
4307 BLOCK_IN_SWITCH |
4308 BLOCK_REASSIGNED));
4309 wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
4310 &keycache->cache_lock);
4311 goto restart;
4312 }
4313
4314 } /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
4315
4316 } /* if (keycache->disk_blocks > 0 */
4317
4318 DBUG_EXECUTE("check_keycache",
4319 test_key_cache(keycache, "end of flush_key_blocks", 0););
4320 err:
4321 if (cache != cache_buff)
4322 my_free(cache);
4323 if (last_errno)
4324 errno=last_errno; /* Return first error */
4325 DBUG_RETURN(last_errno != 0);
4326 }
4327
4328
4329 /*
4330 Flush all blocks for a file from key buffers of a simple key cache
4331
4332 SYNOPSIS
4333
4334 flush_simple_key_blocks()
4335 keycache pointer to the control block of a simple key cache
4336 file handler for the file to flush to
4337 file_extra maps of key cache partitions containing
4338 dirty pages from file (not used)
4339 flush_type type of the flush operation
4340
4341 DESCRIPTION
4342 This function is the implementation of the flush_key_blocks interface
4343 function that is employed by simple (non-partitioned) key caches.
4344 The function takes the parameter keycache as a pointer to the
4345 control block structure of the type S_KEY_CACHE_CB for a simple key
4346 cache.
4347 In a general case the function flushes the data from all dirty key
4348 buffers related to the file 'file' into this file. The function does
4349 exactly this if the value of the parameter type is FLUSH_KEEP. If the
4350 value of this parameter is FLUSH_RELEASE, the function additionally
4351 releases the key buffers containing data from 'file' for new usage.
4352 If the value of the parameter type is FLUSH_IGNORE_CHANGED the function
4353 just releases the key buffers containing data from 'file'.
4354 The parameter file_extra currently is not used by this function.
4355
4356 RETURN
4357 0 ok
4358 1 error
4359
4360 NOTES
4361 This implementation exploits the fact that the function is called only
4362 when a thread has got an exclusive lock for the key file.
4363 */
4364
4365 static
flush_simple_key_cache_blocks(SIMPLE_KEY_CACHE_CB * keycache,File file,void * file_extra,enum flush_type type)4366 int flush_simple_key_cache_blocks(SIMPLE_KEY_CACHE_CB *keycache,
4367 File file,
4368 void *file_extra __attribute__((unused)),
4369 enum flush_type type)
4370 {
4371 int res= 0;
4372 DBUG_ENTER("flush_key_blocks");
4373 DBUG_PRINT("enter", ("keycache: %p", keycache));
4374
4375 if (!keycache->key_cache_inited)
4376 DBUG_RETURN(0);
4377
4378 keycache_pthread_mutex_lock(&keycache->cache_lock);
4379 /* While waiting for lock, keycache could have been ended. */
4380 if (keycache->disk_blocks > 0)
4381 {
4382 inc_counter_for_resize_op(keycache);
4383 res= flush_key_blocks_int(keycache, file, type);
4384 dec_counter_for_resize_op(keycache);
4385 }
4386 keycache_pthread_mutex_unlock(&keycache->cache_lock);
4387 DBUG_RETURN(res);
4388 }
4389
4390
4391 /*
4392 Flush all blocks in the key cache to disk.
4393
4394 SYNOPSIS
4395 flush_all_key_blocks()
4396 keycache pointer to key cache root structure
4397
4398 DESCRIPTION
4399
4400 Flushing of the whole key cache is done in two phases.
4401
4402 1. Flush all changed blocks, waiting for them if necessary. Loop
4403 until there is no changed block left in the cache.
4404
4405 2. Free all clean blocks. Normally this means free all blocks. The
4406 changed blocks were flushed in phase 1 and became clean. However we
4407 may need to wait for blocks that are read by other threads. While we
4408 wait, a clean block could become changed if that operation started
4409 before the resize operation started. To be safe we must restart at
4410 phase 1.
4411
4412 When we can run through the changed_blocks and file_blocks hashes
4413 without finding a block any more, then we are done.
4414
4415 Note that we hold keycache->cache_lock all the time unless we need
4416 to wait for something.
4417
4418 RETURN
4419 0 OK
4420 != 0 Error
4421 */
4422
flush_all_key_blocks(SIMPLE_KEY_CACHE_CB * keycache)4423 static int flush_all_key_blocks(SIMPLE_KEY_CACHE_CB *keycache)
4424 {
4425 BLOCK_LINK *block;
4426 uint total_found;
4427 uint found;
4428 uint idx;
4429 uint changed_blocks_hash_size= keycache->changed_blocks_hash_size;
4430 DBUG_ENTER("flush_all_key_blocks");
4431
4432 do
4433 {
4434 mysql_mutex_assert_owner(&keycache->cache_lock);
4435 total_found= 0;
4436
4437 /*
4438 Phase1: Flush all changed blocks, waiting for them if necessary.
4439 Loop until there is no changed block left in the cache.
4440 */
4441 do
4442 {
4443 found= 0;
4444 /* Step over the whole changed_blocks hash array. */
4445 for (idx= 0; idx < changed_blocks_hash_size; idx++)
4446 {
4447 /*
4448 If an array element is non-empty, use the first block from its
4449 chain to find a file for flush. All changed blocks for this
4450 file are flushed. So the same block will not appear at this
4451 place again with the next iteration. New writes for blocks are
4452 not accepted during the flush. If multiple files share the
4453 same hash bucket, one of them will be flushed per iteration
4454 of the outer loop of phase 1.
4455 */
4456 while ((block= keycache->changed_blocks[idx]))
4457 {
4458 found++;
4459 /*
4460 Flush dirty blocks but do not free them yet. They can be used
4461 for reading until all other blocks are flushed too.
4462 */
4463 if (flush_key_blocks_int(keycache, block->hash_link->file,
4464 FLUSH_FORCE_WRITE))
4465 DBUG_RETURN(1);
4466 }
4467 }
4468 } while (found);
4469
4470 /*
4471 Phase 2: Free all clean blocks. Normally this means free all
4472 blocks. The changed blocks were flushed in phase 1 and became
4473 clean. However we may need to wait for blocks that are read by
4474 other threads. While we wait, a clean block could become changed
4475 if that operation started before the resize operation started. To
4476 be safe we must restart at phase 1.
4477 */
4478 do
4479 {
4480 found= 0;
4481 /* Step over the whole file_blocks hash array. */
4482 for (idx= 0; idx < changed_blocks_hash_size; idx++)
4483 {
4484 /*
4485 If an array element is non-empty, use the first block from its
4486 chain to find a file for flush. All blocks for this file are
4487 freed. So the same block will not appear at this place again
4488 with the next iteration. If multiple files share the
4489 same hash bucket, one of them will be flushed per iteration
4490 of the outer loop of phase 2.
4491 */
4492 while ((block= keycache->file_blocks[idx]))
4493 {
4494 total_found++;
4495 found++;
4496 if (flush_key_blocks_int(keycache, block->hash_link->file,
4497 FLUSH_RELEASE))
4498 DBUG_RETURN(1);
4499 }
4500 }
4501 } while (found);
4502
4503 /*
4504 If any clean block has been found, we may have waited for it to
4505 become free. In this case it could be possible that another clean
4506 block became dirty. This is possible if the write request existed
4507 before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
4508 */
4509 } while (total_found);
4510
4511 #ifndef DBUG_OFF
4512 /* Now there should not exist any block any more. */
4513 for (idx= 0; idx < changed_blocks_hash_size; idx++)
4514 {
4515 DBUG_ASSERT(!keycache->changed_blocks[idx]);
4516 DBUG_ASSERT(!keycache->file_blocks[idx]);
4517 }
4518 #endif
4519
4520 DBUG_RETURN(0);
4521 }
4522
4523
4524 /*
4525 Reset the counters of a simple key cache
4526
4527 SYNOPSIS
4528 reset_simple_key_cache_counters()
4529 name the name of a key cache
4530 keycache pointer to the control block of a simple key cache
4531
4532 DESCRIPTION
4533 This function is the implementation of the reset_key_cache_counters
4534 interface function that is employed by simple (non-partitioned) key caches.
4535 The function takes the parameter keycache as a pointer to the
4536 control block structure of the type S_KEY_CACHE_CB for a simple key cache.
4537 This function resets the values of all statistical counters for the key
4538 cache to 0.
4539 The parameter name is currently not used.
4540
4541 RETURN
4542 0 on success (always because it can't fail)
4543 */
4544
4545 static
reset_simple_key_cache_counters(const char * name,SIMPLE_KEY_CACHE_CB * keycache)4546 int reset_simple_key_cache_counters(const char *name __attribute__((unused)),
4547 SIMPLE_KEY_CACHE_CB *keycache)
4548 {
4549 DBUG_ENTER("reset_simple_key_cache_counters");
4550 if (!keycache->key_cache_inited)
4551 {
4552 DBUG_PRINT("info", ("Key cache %s not initialized.", name));
4553 DBUG_RETURN(0);
4554 }
4555 DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
4556
4557 keycache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
4558 keycache->global_cache_r_requests= 0; /* Key_read_requests */
4559 keycache->global_cache_read= 0; /* Key_reads */
4560 keycache->global_cache_w_requests= 0; /* Key_write_requests */
4561 keycache->global_cache_write= 0; /* Key_writes */
4562 DBUG_RETURN(0);
4563 }
4564
4565
4566 #ifndef DBUG_OFF
4567 /*
4568 Test if disk-cache is ok
4569 */
4570 static
test_key_cache(SIMPLE_KEY_CACHE_CB * keycache,const char * where,my_bool lock)4571 void test_key_cache(SIMPLE_KEY_CACHE_CB *keycache __attribute__((unused)),
4572 const char *where __attribute__((unused)),
4573 my_bool lock __attribute__((unused)))
4574 {
4575 /* TODO */
4576 }
4577 #endif
4578
4579 #if defined(KEYCACHE_TIMEOUT)
4580
4581 #define KEYCACHE_DUMP_FILE "keycache_dump.txt"
4582 #define MAX_QUEUE_LEN 100
4583
4584
keycache_dump(SIMPLE_KEY_CACHE_CB * keycache)4585 static void keycache_dump(SIMPLE_KEY_CACHE_CB *keycache)
4586 {
4587 FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
4588 struct st_my_thread_var *last;
4589 struct st_my_thread_var *thread;
4590 BLOCK_LINK *block;
4591 HASH_LINK *hash_link;
4592 KEYCACHE_PAGE *page;
4593 uint i;
4594
4595 fprintf(keycache_dump_file, "thread:%lu\n", (ulong) thread->id);
4596
4597 i=0;
4598 thread=last=waiting_for_hash_link.last_thread;
4599 fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
4600 if (thread)
4601 do
4602 {
4603 thread=thread->next;
4604 page= (KEYCACHE_PAGE *) thread->keycache_link;
4605 fprintf(keycache_dump_file,
4606 "thread:%lu, (file,filepos)=(%u,%lu)\n",
4607 (ulong) thread->id,(uint) page->file,(ulong) page->filepos);
4608 if (++i == MAX_QUEUE_LEN)
4609 break;
4610 }
4611 while (thread != last);
4612
4613 i=0;
4614 thread=last=waiting_for_block.last_thread;
4615 fprintf(keycache_dump_file, "queue of threads waiting for block\n");
4616 if (thread)
4617 do
4618 {
4619 thread=thread->next;
4620 hash_link= (HASH_LINK *) thread->keycache_link;
4621 fprintf(keycache_dump_file,
4622 "thread:%lu hash_link:%u (file,filepos)=(%u,%lu)\n",
4623 (ulong) thread->id, (uint) HASH_LINK_NUMBER(hash_link),
4624 (uint) hash_link->file,(ulong) hash_link->diskpos);
4625 if (++i == MAX_QUEUE_LEN)
4626 break;
4627 }
4628 while (thread != last);
4629
4630 for (i=0 ; i< keycache->blocks_used ; i++)
4631 {
4632 int j;
4633 block= &keycache->block_root[i];
4634 hash_link= block->hash_link;
4635 fprintf(keycache_dump_file,
4636 "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
4637 i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1),
4638 block->status, block->requests, block->condvar ? 1 : 0);
4639 for (j=0 ; j < 2; j++)
4640 {
4641 KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
4642 thread= last= wqueue->last_thread;
4643 fprintf(keycache_dump_file, "queue #%d\n", j);
4644 if (thread)
4645 {
4646 do
4647 {
4648 thread=thread->next;
4649 fprintf(keycache_dump_file,
4650 "thread:%lu\n", (ulong) thread->id);
4651 if (++i == MAX_QUEUE_LEN)
4652 break;
4653 }
4654 while (thread != last);
4655 }
4656 }
4657 }
4658 fprintf(keycache_dump_file, "LRU chain:");
4659 block= keycache= used_last;
4660 if (block)
4661 {
4662 do
4663 {
4664 block= block->next_used;
4665 fprintf(keycache_dump_file,
4666 "block:%u, ", BLOCK_NUMBER(block));
4667 }
4668 while (block != keycache->used_last);
4669 }
4670 fprintf(keycache_dump_file, "\n");
4671
4672 fclose(keycache_dump_file);
4673 }
4674
4675 #endif /* defined(KEYCACHE_TIMEOUT) */
4676
4677 #if defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)
4678
4679
keycache_pthread_cond_wait(mysql_cond_t * cond,mysql_mutex_t * mutex)4680 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4681 mysql_mutex_t *mutex)
4682 {
4683 int rc;
4684 struct timeval now; /* time when we started waiting */
4685 struct timespec timeout; /* timeout value for the wait function */
4686 struct timezone tz;
4687 #if defined(KEYCACHE_DEBUG)
4688 int cnt=0;
4689 #endif
4690
4691 /* Get current time */
4692 gettimeofday(&now, &tz);
4693 /* Prepare timeout value */
4694 timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
4695 /*
4696 timeval uses microseconds.
4697 timespec uses nanoseconds.
4698 1 nanosecond = 1000 micro seconds
4699 */
4700 timeout.tv_nsec= now.tv_usec * 1000;
4701 KEYCACHE_THREAD_TRACE_END("started waiting");
4702 #if defined(KEYCACHE_DEBUG)
4703 cnt++;
4704 if (cnt % 100 == 0)
4705 fprintf(keycache_debug_log, "waiting...\n");
4706 fflush(keycache_debug_log);
4707 #endif
4708 rc= mysql_cond_timedwait(cond, mutex, &timeout);
4709 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4710 if (rc == ETIMEDOUT || rc == ETIME)
4711 {
4712 #if defined(KEYCACHE_DEBUG)
4713 fprintf(keycache_debug_log,"aborted by keycache timeout\n");
4714 fclose(keycache_debug_log);
4715 abort();
4716 #endif
4717 keycache_dump();
4718 }
4719
4720 #if defined(KEYCACHE_DEBUG)
4721 KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
4722 #else
4723 assert(rc != ETIMEDOUT);
4724 #endif
4725 return rc;
4726 }
4727 #else
4728 #if defined(KEYCACHE_DEBUG)
keycache_pthread_cond_wait(mysql_cond_t * cond,mysql_mutex_t * mutex)4729 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4730 mysql_mutex_t *mutex)
4731 {
4732 int rc;
4733 KEYCACHE_THREAD_TRACE_END("started waiting");
4734 rc= mysql_cond_wait(cond, mutex);
4735 KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4736 return rc;
4737 }
4738 #endif
4739 #endif /* defined(KEYCACHE_TIMEOUT) && !defined(__WIN__) */
4740
4741 #if defined(KEYCACHE_DEBUG)
4742
4743
keycache_pthread_mutex_lock(mysql_mutex_t * mutex)4744 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex)
4745 {
4746 int rc;
4747 rc= mysql_mutex_lock(mutex);
4748 KEYCACHE_THREAD_TRACE_BEGIN("");
4749 return rc;
4750 }
4751
4752
keycache_pthread_mutex_unlock(mysql_mutex_t * mutex)4753 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex)
4754 {
4755 KEYCACHE_THREAD_TRACE_END("");
4756 mysql_mutex_unlock(mutex);
4757 }
4758
4759
keycache_pthread_cond_signal(mysql_cond_t * cond)4760 static int keycache_pthread_cond_signal(mysql_cond_t *cond)
4761 {
4762 int rc;
4763 KEYCACHE_THREAD_TRACE("signal");
4764 rc= mysql_cond_signal(cond);
4765 return rc;
4766 }
4767
4768
4769 #if defined(KEYCACHE_DEBUG_LOG)
4770
4771
keycache_debug_print(const char * fmt,...)4772 static void keycache_debug_print(const char * fmt,...)
4773 {
4774 va_list args;
4775 va_start(args,fmt);
4776 if (keycache_debug_log)
4777 {
4778 (void) vfprintf(keycache_debug_log, fmt, args);
4779 (void) fputc('\n',keycache_debug_log);
4780 }
4781 va_end(args);
4782 }
4783 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4784
4785 #if defined(KEYCACHE_DEBUG_LOG)
4786
4787
keycache_debug_log_close(void)4788 void keycache_debug_log_close(void)
4789 {
4790 if (keycache_debug_log)
4791 fclose(keycache_debug_log);
4792 }
4793 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4794
4795 #endif /* defined(KEYCACHE_DEBUG) */
4796
4797 #ifdef DBUG_ASSERT_EXISTS
4798 #define F_B_PRT(_f_, _v_) DBUG_PRINT("assert_fail", (_f_, _v_))
4799
fail_block(BLOCK_LINK * block)4800 static int fail_block(BLOCK_LINK *block __attribute__((unused)))
4801 {
4802 #ifndef DBUG_OFF
4803 F_B_PRT("block->next_used: %p\n", block->next_used);
4804 F_B_PRT("block->prev_used: %p\n", block->prev_used);
4805 F_B_PRT("block->next_changed: %p\n", block->next_changed);
4806 F_B_PRT("block->prev_changed: %p\n", block->prev_changed);
4807 F_B_PRT("block->hash_link: %p\n", block->hash_link);
4808 F_B_PRT("block->status: %u\n", block->status);
4809 F_B_PRT("block->length: %u\n", block->length);
4810 F_B_PRT("block->offset: %u\n", block->offset);
4811 F_B_PRT("block->requests: %u\n", block->requests);
4812 F_B_PRT("block->temperature: %u\n", block->temperature);
4813 #endif
4814 return 0; /* Let the assert fail. */
4815 }
4816 #endif
4817
4818 #ifndef DBUG_OFF
fail_hlink(HASH_LINK * hlink)4819 static int fail_hlink(HASH_LINK *hlink __attribute__((unused)))
4820 {
4821 F_B_PRT("hlink->next: %p\n", hlink->next);
4822 F_B_PRT("hlink->prev: %p\n", hlink->prev);
4823 F_B_PRT("hlink->block: %p\n", hlink->block);
4824 F_B_PRT("hlink->diskpos: %lu\n", (ulong) hlink->diskpos);
4825 F_B_PRT("hlink->file: %d\n", hlink->file);
4826 return 0; /* Let the assert fail. */
4827 }
4828
cache_empty(SIMPLE_KEY_CACHE_CB * keycache)4829 static int cache_empty(SIMPLE_KEY_CACHE_CB *keycache)
4830 {
4831 int errcnt= 0;
4832 int idx;
4833 if (keycache->disk_blocks <= 0)
4834 return 1;
4835 for (idx= 0; idx < keycache->disk_blocks; idx++)
4836 {
4837 BLOCK_LINK *block= keycache->block_root + idx;
4838 if (block->status || block->requests || block->hash_link)
4839 {
4840 fprintf(stderr, "block index: %u\n", idx);
4841 fail_block(block);
4842 errcnt++;
4843 }
4844 }
4845 for (idx= 0; idx < keycache->hash_links; idx++)
4846 {
4847 HASH_LINK *hash_link= keycache->hash_link_root + idx;
4848 if (hash_link->requests || hash_link->block)
4849 {
4850 fprintf(stderr, "hash_link index: %u\n", idx);
4851 fail_hlink(hash_link);
4852 errcnt++;
4853 }
4854 }
4855 if (errcnt)
4856 {
4857 fprintf(stderr, "blocks: %d used: %zu\n",
4858 keycache->disk_blocks, keycache->blocks_used);
4859 fprintf(stderr, "hash_links: %d used: %d\n",
4860 keycache->hash_links, keycache->hash_links_used);
4861 fprintf(stderr, "\n");
4862 }
4863 return !errcnt;
4864 }
4865 #endif
4866
4867
4868 /*
4869 Get statistics for a simple key cache
4870
4871 SYNOPSIS
4872 get_simple_key_cache_statistics()
4873 keycache pointer to the control block of a simple key cache
4874 partition_no partition number (not used)
4875 key_cache_stats OUT pointer to the structure for the returned statistics
4876
4877 DESCRIPTION
4878 This function is the implementation of the get_key_cache_statistics
4879 interface function that is employed by simple (non-partitioned) key caches.
4880 The function takes the parameter keycache as a pointer to the
4881 control block structure of the type SIMPLE_KEY_CACHE_CB for a simple key
4882 cache. This function returns the statistical data for the key cache.
4883 The parameter partition_no is not used by this function.
4884
4885 RETURN
4886 none
4887 */
4888
4889 static
get_simple_key_cache_statistics(SIMPLE_KEY_CACHE_CB * keycache,uint partition_no,KEY_CACHE_STATISTICS * keycache_stats)4890 void get_simple_key_cache_statistics(SIMPLE_KEY_CACHE_CB *keycache,
4891 uint partition_no __attribute__((unused)),
4892 KEY_CACHE_STATISTICS *keycache_stats)
4893 {
4894 DBUG_ENTER("simple_get_key_cache_statistics");
4895
4896 keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size;
4897 keycache_stats->block_size= (longlong) keycache->key_cache_block_size;
4898 keycache_stats->blocks_used= keycache->blocks_used;
4899 keycache_stats->blocks_unused= keycache->blocks_unused;
4900 keycache_stats->blocks_changed= keycache->global_blocks_changed;
4901 keycache_stats->blocks_warm= keycache->warm_blocks;
4902 keycache_stats->read_requests= keycache->global_cache_r_requests;
4903 keycache_stats->reads= keycache->global_cache_read;
4904 keycache_stats->write_requests= keycache->global_cache_w_requests;
4905 keycache_stats->writes= keycache->global_cache_write;
4906 DBUG_VOID_RETURN;
4907 }
4908
4909
4910 /*
4911 The array of pointer to the key cache interface functions used for simple
4912 key caches. Any simple key cache objects including those incorporated into
4913 partitioned keys caches exploit this array.
4914
4915 The current implementation of these functions allows to call them from
4916 the MySQL server code directly. We don't do it though.
4917 */
4918
4919 static KEY_CACHE_FUNCS simple_key_cache_funcs =
4920 {
4921 (INIT_KEY_CACHE) init_simple_key_cache,
4922 (RESIZE_KEY_CACHE) resize_simple_key_cache,
4923 (CHANGE_KEY_CACHE_PARAM) change_simple_key_cache_param,
4924 (KEY_CACHE_READ) simple_key_cache_read,
4925 (KEY_CACHE_INSERT) simple_key_cache_insert,
4926 (KEY_CACHE_WRITE) simple_key_cache_write,
4927 (FLUSH_KEY_BLOCKS) flush_simple_key_cache_blocks,
4928 (RESET_KEY_CACHE_COUNTERS) reset_simple_key_cache_counters,
4929 (END_KEY_CACHE) end_simple_key_cache,
4930 (GET_KEY_CACHE_STATISTICS) get_simple_key_cache_statistics,
4931 };
4932
4933
4934 /******************************************************************************
4935 Partitioned Key Cache Module
4936
4937 The module contains implementations of all key cache interface functions
4938 employed by partitioned key caches.
4939
4940 A partitioned key cache is a collection of structures for simple key caches
4941 called key cache partitions. Any page from a file can be placed into a buffer
4942 of only one partition. The number of the partition is calculated from
4943 the file number and the position of the page in the file, and it's always the
4944 same for the page. The function that maps pages into partitions takes care
4945 of even distribution of pages among partitions.
4946
4947 Partition key cache mitigate one of the major problem of simple key cache:
4948 thread contention for key cache lock (mutex). Every call of a key cache
4949 interface function must acquire this lock. So threads compete for this lock
4950 even in the case when they have acquired shared locks for the file and
4951 pages they want read from are in the key cache buffers.
4952 When working with a partitioned key cache any key cache interface function
4953 that needs only one page has to acquire the key cache lock only for the
4954 partition the page is ascribed to. This makes the chances for threads not
4955 compete for the same key cache lock better. Unfortunately if we use a
4956 partitioned key cache with N partitions for B-tree indexes we can't say
4957 that the chances becomes N times less. The fact is that any index lookup
4958 operation requires reading from the root page that, for any index, is always
4959 ascribed to the same partition. To resolve this problem we should have
4960 employed more sophisticated mechanisms of working with root pages.
4961
4962 Currently the number of partitions in a partitioned key cache is limited
4963 by 64. We could increase this limit. Simultaneously we would have to increase
4964 accordingly the size of the bitmap dirty_part_map from the MYISAM_SHARE
4965 structure.
4966
4967 ******************************************************************************/
4968
4969 /* Control block for a partitioned key cache */
4970
4971 typedef struct st_partitioned_key_cache_cb
4972 {
4973 my_bool key_cache_inited; /*<=> control block is allocated */
4974 SIMPLE_KEY_CACHE_CB **partition_array; /* the key cache partitions */
4975 size_t key_cache_mem_size; /* specified size of the cache memory */
4976 uint key_cache_block_size; /* size of the page buffer of a cache block */
4977 uint partitions; /* number of partitions in the key cache */
4978 } PARTITIONED_KEY_CACHE_CB;
4979
4980 static
4981 void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
4982 my_bool cleanup);
4983
4984 static int
4985 reset_partitioned_key_cache_counters(const char *name,
4986 PARTITIONED_KEY_CACHE_CB *keycache);
4987
4988 /*
4989 Determine the partition to which the index block to read is ascribed
4990
4991 SYNOPSIS
4992 get_key_cache_partition()
4993 keycache pointer to the control block of a partitioned key cache
4994 file handler for the file for the block of data to be read
4995 filepos position of the block of data in the file
4996
4997 DESCRIPTION
4998 The function determines the number of the partition in whose buffer the
4999 block from 'file' at the position filepos has to be placed for reading.
5000 The function returns the control block of the simple key cache for this
5001 partition to the caller.
5002
5003 RETURN VALUE
5004 The pointer to the control block of the partition to which the specified
5005 file block is ascribed.
5006 */
5007
5008 static
5009 SIMPLE_KEY_CACHE_CB *
get_key_cache_partition(PARTITIONED_KEY_CACHE_CB * keycache,File file,my_off_t filepos)5010 get_key_cache_partition(PARTITIONED_KEY_CACHE_CB *keycache,
5011 File file, my_off_t filepos)
5012 {
5013 uint i= KEYCACHE_BASE_EXPR(file, filepos) % keycache->partitions;
5014 return keycache->partition_array[i];
5015 }
5016
5017
5018 /*
5019 Determine the partition to which the index block to write is ascribed
5020
5021 SYNOPSIS
5022 get_key_cache_partition()
5023 keycache pointer to the control block of a partitioned key cache
5024 file handler for the file for the block of data to be read
5025 filepos position of the block of data in the file
5026 dirty_part_map pointer to the bitmap of dirty partitions for the file
5027
5028 DESCRIPTION
5029 The function determines the number of the partition in whose buffer the
5030 block from 'file' at the position filepos has to be placed for writing and
5031 marks the partition as dirty in the dirty_part_map bitmap.
5032 The function returns the control block of the simple key cache for this
5033 partition to the caller.
5034
5035 RETURN VALUE
5036 The pointer to the control block of the partition to which the specified
5037 file block is ascribed.
5038 */
5039
5040 static SIMPLE_KEY_CACHE_CB
get_key_cache_partition_for_write(PARTITIONED_KEY_CACHE_CB * keycache,File file,my_off_t filepos,ulonglong * dirty_part_map)5041 *get_key_cache_partition_for_write(PARTITIONED_KEY_CACHE_CB *keycache,
5042 File file, my_off_t filepos,
5043 ulonglong* dirty_part_map)
5044 {
5045 uint i= KEYCACHE_BASE_EXPR( file, filepos) % keycache->partitions;
5046 *dirty_part_map|= 1ULL << i;
5047 return keycache->partition_array[i];
5048 }
5049
5050
5051 /*
5052 Initialize a partitioned key cache
5053
5054 SYNOPSIS
5055 init_partitioned_key_cache()
5056 keycache pointer to the control block of a partitioned key cache
5057 key_cache_block_size size of blocks to keep cached data
5058 use_mem total memory to use for all key cache partitions
5059 division_limit division limit (may be zero)
5060 age_threshold age threshold (may be zero)
5061
5062 DESCRIPTION
5063 This function is the implementation of the init_key_cache
5064 interface function that is employed by partitioned key caches.
5065
5066 The function builds and initializes an array of simple key caches,
5067 and then initializes the control block structure of the type
5068 PARTITIONED_KEY_CACHE_CB that is used for a partitioned key
5069 cache. The parameter keycache is supposed to point to this
5070 structure. The number of partitions in the partitioned key cache
5071 to be built must be passed through the field 'partitions' of this
5072 structure.
5073 The parameter key_cache_block_size specifies the size of the
5074 blocks in the the simple key caches to be built.
5075 The parameters division_limit and age_threshold determine the initial
5076 values of those characteristics of the simple key caches that are used for
5077 midpoint insertion strategy. The parameter use_mem specifies the total
5078 amount of memory to be allocated for the key cache blocks in all simple key
5079 caches and for all auxiliary structures.
5080
5081 RETURN VALUE
5082 total number of blocks in key cache partitions, if successful,
5083 <= 0 - otherwise.
5084
5085 NOTES
5086 If keycache->key_cache_inited != 0 then we assume that the memory for
5087 the array of partitions has been already allocated.
5088
5089 It's assumed that no two threads call this function simultaneously
5090 referring to the same key cache handle.
5091 */
5092
5093 static
init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold,uint changed_blocks_hash_size)5094 int init_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
5095 uint key_cache_block_size,
5096 size_t use_mem, uint division_limit,
5097 uint age_threshold, uint changed_blocks_hash_size)
5098 {
5099 int i;
5100 size_t mem_per_cache;
5101 size_t mem_decr;
5102 int cnt;
5103 SIMPLE_KEY_CACHE_CB *partition;
5104 SIMPLE_KEY_CACHE_CB **partition_ptr;
5105 uint partitions= keycache->partitions;
5106 int blocks= 0;
5107 DBUG_ENTER("partitioned_init_key_cache");
5108
5109 keycache->key_cache_block_size = key_cache_block_size;
5110
5111 if (keycache->key_cache_inited)
5112 partition_ptr= keycache->partition_array;
5113 else
5114 {
5115 if(!(partition_ptr=
5116 (SIMPLE_KEY_CACHE_CB **) my_malloc(key_memory_KEY_CACHE,
5117 sizeof(SIMPLE_KEY_CACHE_CB *) *
5118 partitions, MYF(MY_WME))))
5119 DBUG_RETURN(-1);
5120 bzero(partition_ptr, sizeof(SIMPLE_KEY_CACHE_CB *) * partitions);
5121 keycache->partition_array= partition_ptr;
5122 }
5123
5124 mem_per_cache = use_mem / partitions;
5125 mem_decr= mem_per_cache / 5;
5126
5127 for (i= 0; i < (int) partitions; i++)
5128 {
5129 my_bool key_cache_inited= keycache->key_cache_inited;
5130 if (key_cache_inited)
5131 partition= *partition_ptr;
5132 else
5133 {
5134 if (!(partition=
5135 (SIMPLE_KEY_CACHE_CB *) my_malloc(key_memory_KEY_CACHE,
5136 sizeof(SIMPLE_KEY_CACHE_CB),
5137 MYF(MY_WME))))
5138 continue;
5139 partition->key_cache_inited= 0;
5140 }
5141
5142 cnt= init_simple_key_cache(partition, key_cache_block_size, mem_per_cache,
5143 division_limit, age_threshold,
5144 changed_blocks_hash_size);
5145 if (cnt <= 0)
5146 {
5147 end_simple_key_cache(partition, 1);
5148 if (!key_cache_inited)
5149 {
5150 my_free(partition);
5151 partition= 0;
5152 }
5153 if ((i == 0 && cnt < 0) || i > 0)
5154 {
5155 /*
5156 Here we have two cases:
5157 1. i == 0 and cnt < 0
5158 cnt < 0 => mem_per_cache is not big enough to allocate minimal
5159 number of key blocks in the key cache of the partition.
5160 Decrease the the number of the partitions by 1 and start again.
5161 2. i > 0
5162 There is not enough memory for one of the succeeding partitions.
5163 Just skip this partition decreasing the number of partitions in
5164 the key cache by one.
5165 Do not change the value of mem_per_cache in both cases.
5166 */
5167 if (key_cache_inited)
5168 {
5169 my_free(partition);
5170 partition= 0;
5171 if(key_cache_inited)
5172 memmove(partition_ptr, partition_ptr+1,
5173 sizeof(partition_ptr)*(partitions-i-1));
5174 }
5175 if (!--partitions)
5176 break;
5177 }
5178 else
5179 {
5180 /*
5181 We come here when i == 0 && cnt == 0.
5182 cnt == 0 => the memory allocator fails to allocate a block of
5183 memory of the size mem_per_cache. Decrease the value of
5184 mem_per_cache without changing the current number of partitions
5185 and start again. Make sure that such a decrease may happen not
5186 more than 5 times in total.
5187 */
5188 if (use_mem <= mem_decr)
5189 break;
5190 use_mem-= mem_decr;
5191 }
5192 i--;
5193 mem_per_cache= use_mem/partitions;
5194 continue;
5195 }
5196 else
5197 {
5198 blocks+= cnt;
5199 *partition_ptr++= partition;
5200 }
5201 }
5202
5203 keycache->partitions= partitions= (uint) (partition_ptr-keycache->partition_array);
5204 keycache->key_cache_mem_size= mem_per_cache * partitions;
5205 for (i= 0; i < (int) partitions; i++)
5206 keycache->partition_array[i]->hash_factor= partitions;
5207
5208 keycache->key_cache_inited= 1;
5209
5210 if (!partitions)
5211 blocks= -1;
5212
5213 DBUG_RETURN(blocks);
5214 }
5215
5216
5217 /*
5218 Resize a partitioned key cache
5219
5220 SYNOPSIS
5221 resize_partitioned_key_cache()
5222 keycache pointer to the control block of a partitioned key cache
5223 key_cache_block_size size of blocks to keep cached data
5224 use_mem total memory to use for the new key cache
5225 division_limit new division limit (if not zero)
5226 age_threshold new age threshold (if not zero)
5227
5228 DESCRIPTION
5229 This function is the implementation of the resize_key_cache interface
5230 function that is employed by partitioned key caches.
5231 The function takes the parameter keycache as a pointer to the
5232 control block structure of the type PARTITIONED_KEY_CACHE_CB for the
5233 partitioned key cache to be resized.
5234 The parameter key_cache_block_size specifies the new size of the blocks in
5235 the simple key caches that comprise the partitioned key cache.
5236 The parameters division_limit and age_threshold determine the new initial
5237 values of those characteristics of the simple key cache that are used for
5238 midpoint insertion strategy. The parameter use-mem specifies the total
5239 amount of memory to be allocated for the key cache blocks in all new
5240 simple key caches and for all auxiliary structures.
5241
5242 RETURN VALUE
5243 number of blocks in the key cache, if successful,
5244 0 - otherwise.
5245
5246 NOTES.
5247 The function first calls prepare_resize_simple_key_cache for each simple
5248 key cache effectively flushing all dirty pages from it and destroying
5249 the key cache. Then init_partitioned_key_cache is called. This call builds
5250 a new array of simple key caches containing the same number of elements
5251 as the old one. After this the function calls the function
5252 finish_resize_simple_key_cache for each simple key cache from this array.
5253
5254 This implementation doesn't block the calls and executions of other
5255 functions from the key cache interface. However it assumes that the
5256 calls of resize_partitioned_key_cache itself are serialized.
5257 */
5258
5259 static
resize_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold,uint changed_blocks_hash_size)5260 int resize_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
5261 uint key_cache_block_size,
5262 size_t use_mem, uint division_limit,
5263 uint age_threshold,
5264 uint changed_blocks_hash_size)
5265 {
5266 uint i;
5267 uint partitions= keycache->partitions;
5268 my_bool cleanup= use_mem == 0;
5269 int blocks= -1;
5270 int err= 0;
5271 DBUG_ENTER("partitioned_resize_key_cache");
5272 if (cleanup)
5273 {
5274 end_partitioned_key_cache(keycache, 0);
5275 DBUG_RETURN(-1);
5276 }
5277 for (i= 0; i < partitions; i++)
5278 {
5279 err|= prepare_resize_simple_key_cache(keycache->partition_array[i], 1);
5280 }
5281 if (!err)
5282 blocks= init_partitioned_key_cache(keycache, key_cache_block_size,
5283 use_mem, division_limit, age_threshold,
5284 changed_blocks_hash_size);
5285 if (blocks > 0)
5286 {
5287 for (i= 0; i < partitions; i++)
5288 {
5289 keycache_pthread_mutex_lock(&keycache->partition_array[i]->cache_lock);
5290 finish_resize_simple_key_cache(keycache->partition_array[i]);
5291 }
5292 }
5293 DBUG_RETURN(blocks);
5294 }
5295
5296
5297 /*
5298 Change key cache parameters of a partitioned key cache
5299
5300 SYNOPSIS
5301 partitioned_change_key_cache_param()
5302 keycache pointer to the control block of a partitioned key cache
5303 division_limit new division limit (if not zero)
5304 age_threshold new age threshold (if not zero)
5305
5306 DESCRIPTION
5307 This function is the implementation of the change_key_cache_param interface
5308 function that is employed by partitioned key caches.
5309 The function takes the parameter keycache as a pointer to the
5310 control block structure of the type PARTITIONED_KEY_CACHE_CB for the simple
5311 key cache where new values of the division limit and the age threshold used
5312 for midpoint insertion strategy are to be set. The parameters
5313 division_limit and age_threshold provide these new values.
5314
5315 RETURN VALUE
5316 none
5317
5318 NOTES
5319 The function just calls change_simple_key_cache_param for each element from
5320 the array of simple caches that comprise the partitioned key cache.
5321 */
5322
5323 static
change_partitioned_key_cache_param(PARTITIONED_KEY_CACHE_CB * keycache,uint division_limit,uint age_threshold)5324 void change_partitioned_key_cache_param(PARTITIONED_KEY_CACHE_CB *keycache,
5325 uint division_limit,
5326 uint age_threshold)
5327 {
5328 uint i;
5329 uint partitions= keycache->partitions;
5330 DBUG_ENTER("partitioned_change_key_cache_param");
5331 for (i= 0; i < partitions; i++)
5332 {
5333 change_simple_key_cache_param(keycache->partition_array[i], division_limit,
5334 age_threshold);
5335 }
5336 DBUG_VOID_RETURN;
5337 }
5338
5339
5340 /*
5341 Destroy a partitioned key cache
5342
5343 SYNOPSIS
5344 end_partitioned_key_cache()
5345 keycache pointer to the control block of a partitioned key cache
5346 cleanup <=> complete free (free also control block structures
5347 for all simple key caches)
5348
5349 DESCRIPTION
5350 This function is the implementation of the end_key_cache interface
5351 function that is employed by partitioned key caches.
5352 The function takes the parameter keycache as a pointer to the
5353 control block structure of the type PARTITIONED_KEY_CACHE_CB for the
5354 partitioned key cache to be destroyed.
5355 The function frees the memory allocated for the cache blocks and
5356 auxiliary structures used by simple key caches that comprise the
5357 partitioned key cache. If the value of the parameter cleanup is TRUE
5358 then even the memory used for control blocks of the simple key caches
5359 and the array of pointers to them are freed.
5360
5361 RETURN VALUE
5362 none
5363 */
5364
5365 static
end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB * keycache,my_bool cleanup)5366 void end_partitioned_key_cache(PARTITIONED_KEY_CACHE_CB *keycache,
5367 my_bool cleanup)
5368 {
5369 uint i;
5370 uint partitions= keycache->partitions;
5371 DBUG_ENTER("partitioned_end_key_cache");
5372 DBUG_PRINT("enter", ("key_cache: %p", keycache));
5373
5374 for (i= 0; i < partitions; i++)
5375 {
5376 end_simple_key_cache(keycache->partition_array[i], cleanup);
5377 }
5378 if (cleanup)
5379 {
5380 for (i= 0; i < partitions; i++)
5381 my_free(keycache->partition_array[i]);
5382 my_free(keycache->partition_array);
5383 keycache->key_cache_inited= 0;
5384 }
5385 DBUG_VOID_RETURN;
5386 }
5387
5388
5389 /*
5390 Read a block of data from a partitioned key cache into a buffer
5391
5392 SYNOPSIS
5393
5394 partitioned_key_cache_read()
5395 keycache pointer to the control block of a partitioned key cache
5396 file handler for the file for the block of data to be read
5397 filepos position of the block of data in the file
5398 level determines the weight of the data
5399 buff buffer to where the data must be placed
5400 length length of the buffer
5401 block_length length of the read data from a key cache block
5402 return_buffer return pointer to the key cache buffer with the data
5403
5404 DESCRIPTION
5405 This function is the implementation of the key_cache_read interface
5406 function that is employed by partitioned key caches.
5407 The function takes the parameter keycache as a pointer to the
5408 control block structure of the type PARTITIONED_KEY_CACHE_CB for a
5409 partitioned key cache.
5410 In a general case the function reads a block of data from the key cache
5411 into the buffer buff of the size specified by the parameter length. The
5412 beginning of the block of data to be read is specified by the parameters
5413 file and filepos. The length of the read data is the same as the length
5414 of the buffer. The data is read into the buffer in key_cache_block_size
5415 increments. To read each portion the function first finds out in what
5416 partition of the key cache this portion(page) is to be saved, and calls
5417 simple_key_cache_read with the pointer to the corresponding simple key as
5418 its first parameter.
5419 If the parameter return_buffer is not ignored and its value is TRUE, and
5420 the data to be read of the specified size block_length can be read from one
5421 key cache buffer, then the function returns a pointer to the data in the
5422 key cache buffer.
5423 The function takes into account parameters block_length and return buffer
5424 only in a single-threaded environment.
5425 The parameter 'level' is used only by the midpoint insertion strategy
5426 when the data or its portion cannot be found in the key cache.
5427
5428 RETURN VALUE
5429 Returns address from where the data is placed if successful, 0 - otherwise.
5430 */
5431
5432 static
partitioned_key_cache_read(PARTITIONED_KEY_CACHE_CB * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length,int return_buffer)5433 uchar *partitioned_key_cache_read(PARTITIONED_KEY_CACHE_CB *keycache,
5434 File file, my_off_t filepos, int level,
5435 uchar *buff, uint length,
5436 uint block_length __attribute__((unused)),
5437 int return_buffer __attribute__((unused)))
5438 {
5439 uint r_length;
5440 uint offset= (uint) (filepos % keycache->key_cache_block_size);
5441 uchar *start= buff;
5442 DBUG_ENTER("partitioned_key_cache_read");
5443 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
5444 (uint) file, (ulong) filepos, length));
5445
5446
5447 /* Read data in key_cache_block_size increments */
5448 do
5449 {
5450 SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache,
5451 file, filepos);
5452 uchar *ret_buff= 0;
5453 r_length= length;
5454 set_if_smaller(r_length, keycache->key_cache_block_size - offset);
5455 ret_buff= simple_key_cache_read((void *) partition,
5456 file, filepos, level,
5457 buff, r_length,
5458 block_length, return_buffer);
5459 if (ret_buff == 0)
5460 DBUG_RETURN(0);
5461 filepos+= r_length;
5462 buff+= r_length;
5463 offset= 0;
5464 } while ((length-= r_length));
5465
5466 DBUG_RETURN(start);
5467 }
5468
5469
5470 /*
5471 Insert a block of file data from a buffer into a partitioned key cache
5472
5473 SYNOPSIS
5474 partitioned_key_cache_insert()
5475 keycache pointer to the control block of a partitioned key cache
5476 file handler for the file to insert data from
5477 filepos position of the block of data in the file to insert
5478 level determines the weight of the data
5479 buff buffer to read data from
5480 length length of the data in the buffer
5481
5482 DESCRIPTION
5483 This function is the implementation of the key_cache_insert interface
5484 function that is employed by partitioned key caches.
5485 The function takes the parameter keycache as a pointer to the
5486 control block structure of the type PARTITIONED_KEY_CACHE_CB for a
5487 partitioned key cache.
5488 The function writes a block of file data from a buffer into the key cache.
5489 The buffer is specified with the parameters buff and length - the pointer
5490 to the beginning of the buffer and its size respectively. It's assumed
5491 that the buffer contains the data from 'file' allocated from the position
5492 filepos. The data is copied from the buffer in key_cache_block_size
5493 increments. For every portion of data the function finds out in what simple
5494 key cache from the array of partitions the data must be stored, and after
5495 this calls simple_key_cache_insert to copy the data into a key buffer of
5496 this simple key cache.
5497 The parameter level is used to set one characteristic for the key buffers
5498 loaded with the data from buff. The characteristic is used only by the
5499 midpoint insertion strategy.
5500
5501 RETURN VALUE
5502 0 if a success, 1 - otherwise.
5503
5504 NOTES
5505 The function is used by MyISAM to move all blocks from a index file to
5506 the key cache. It can be performed in parallel with reading the file data
5507 from the key buffers by other threads.
5508 */
5509
5510 static
partitioned_key_cache_insert(PARTITIONED_KEY_CACHE_CB * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length)5511 int partitioned_key_cache_insert(PARTITIONED_KEY_CACHE_CB *keycache,
5512 File file, my_off_t filepos, int level,
5513 uchar *buff, uint length)
5514 {
5515 uint w_length;
5516 uint offset= (uint) (filepos % keycache->key_cache_block_size);
5517 DBUG_ENTER("partitioned_key_cache_insert");
5518 DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u",
5519 (uint) file,(ulong) filepos, length));
5520
5521
5522 /* Write data in key_cache_block_size increments */
5523 do
5524 {
5525 SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition(keycache,
5526 file, filepos);
5527 w_length= length;
5528 set_if_smaller(w_length, keycache->key_cache_block_size - offset);
5529 if (simple_key_cache_insert((void *) partition,
5530 file, filepos, level,
5531 buff, w_length))
5532 DBUG_RETURN(1);
5533
5534 filepos+= w_length;
5535 buff+= w_length;
5536 offset = 0;
5537 } while ((length-= w_length));
5538
5539 DBUG_RETURN(0);
5540 }
5541
5542
5543 /*
5544 Write data from a buffer into a partitioned key cache
5545
5546 SYNOPSIS
5547
5548 partitioned_key_cache_write()
5549 keycache pointer to the control block of a partitioned key cache
5550 file handler for the file to write data to
5551 filepos position in the file to write data to
5552 level determines the weight of the data
5553 buff buffer with the data
5554 length length of the buffer
5555 dont_write if is 0 then all dirty pages involved in writing
5556 should have been flushed from key cache
5557 file_extra maps of key cache partitions containing
5558 dirty pages from file
5559
5560 DESCRIPTION
5561 This function is the implementation of the key_cache_write interface
5562 function that is employed by partitioned key caches.
5563 The function takes the parameter keycache as a pointer to the
5564 control block structure of the type PARTITIONED_KEY_CACHE_CB for a
5565 partitioned key cache.
5566 In a general case the function copies data from a buffer into the key
5567 cache. The buffer is specified with the parameters buff and length -
5568 the pointer to the beginning of the buffer and its size respectively.
5569 It's assumed the buffer contains the data to be written into 'file'
5570 starting from the position filepos. The data is copied from the buffer
5571 in key_cache_block_size increments. For every portion of data the
5572 function finds out in what simple key cache from the array of partitions
5573 the data must be stored, and after this calls simple_key_cache_write to
5574 copy the data into a key buffer of this simple key cache.
5575 If the value of the parameter dont_write is FALSE then the function
5576 also writes the data into file.
5577 The parameter level is used to set one characteristic for the key buffers
5578 filled with the data from buff. The characteristic is employed only by
5579 the midpoint insertion strategy.
5580 The parameter file_expra provides a pointer to the shared bitmap of
5581 the partitions that may contains dirty pages for the file. This bitmap
5582 is used to optimize the function flush_partitioned_key_cache_blocks.
5583
5584 RETURN VALUE
5585 0 if a success, 1 - otherwise.
5586
5587 NOTES
5588 This implementation exploits the fact that the function is called only
5589 when a thread has got an exclusive lock for the key file.
5590 */
5591
5592 static
partitioned_key_cache_write(PARTITIONED_KEY_CACHE_CB * keycache,File file,void * file_extra,my_off_t filepos,int level,uchar * buff,uint length,uint block_length,int dont_write)5593 int partitioned_key_cache_write(PARTITIONED_KEY_CACHE_CB *keycache,
5594 File file, void *file_extra,
5595 my_off_t filepos, int level,
5596 uchar *buff, uint length,
5597 uint block_length __attribute__((unused)),
5598 int dont_write)
5599 {
5600 uint w_length;
5601 ulonglong *part_map= (ulonglong *) file_extra;
5602 uint offset= (uint) (filepos % keycache->key_cache_block_size);
5603 DBUG_ENTER("partitioned_key_cache_write");
5604 DBUG_PRINT("enter",
5605 ("fd: %u pos: %lu length: %u block_length: %u"
5606 " key_block_length: %u",
5607 (uint) file, (ulong) filepos, length, block_length,
5608 keycache ? keycache->key_cache_block_size : 0));
5609
5610
5611 /* Write data in key_cache_block_size increments */
5612 do
5613 {
5614 SIMPLE_KEY_CACHE_CB *partition= get_key_cache_partition_for_write(keycache,
5615 file,
5616 filepos,
5617 part_map);
5618 w_length = length;
5619 set_if_smaller(w_length, keycache->key_cache_block_size - offset );
5620 if (simple_key_cache_write(partition,
5621 file, 0, filepos, level,
5622 buff, w_length, block_length,
5623 dont_write))
5624 DBUG_RETURN(1);
5625
5626 filepos+= w_length;
5627 buff+= w_length;
5628 offset= 0;
5629 } while ((length-= w_length));
5630
5631 DBUG_RETURN(0);
5632 }
5633
5634
5635 /*
5636 Flush all blocks for a file from key buffers of a partitioned key cache
5637
5638 SYNOPSIS
5639
5640 flush_partitioned_key_cache_blocks()
5641 keycache pointer to the control block of a partitioned key cache
5642 file handler for the file to flush to
5643 file_extra maps of key cache partitions containing
5644 dirty pages from file (not used)
5645 flush_type type of the flush operation
5646
5647 DESCRIPTION
5648 This function is the implementation of the flush_key_blocks interface
5649 function that is employed by partitioned key caches.
5650 The function takes the parameter keycache as a pointer to the
5651 control block structure of the type PARTITIONED_KEY_CACHE_CB for a
5652 partitioned key cache.
5653 In a general case the function flushes the data from all dirty key
5654 buffers related to the file 'file' into this file. The function does
5655 exactly this if the value of the parameter type is FLUSH_KEEP. If the
5656 value of this parameter is FLUSH_RELEASE, the function additionally
5657 releases the key buffers containing data from 'file' for new usage.
5658 If the value of the parameter type is FLUSH_IGNORE_CHANGED the function
5659 just releases the key buffers containing data from 'file'.
5660 The function performs the operation by calling the function
5661 flush_simple_key_cache_blocks for the elements of the array of the
5662 simple key caches that comprise the partitioned key_cache. If the value
5663 of the parameter type is FLUSH_KEEP s_flush_key_blocks is called only
5664 for the partitions with possibly dirty pages marked in the bitmap
5665 pointed to by the parameter file_extra.
5666
5667 RETURN
5668 0 ok
5669 1 error
5670
5671 NOTES
5672 This implementation exploits the fact that the function is called only
5673 when a thread has got an exclusive lock for the key file.
5674 */
5675
5676 static
flush_partitioned_key_cache_blocks(PARTITIONED_KEY_CACHE_CB * keycache,File file,void * file_extra,enum flush_type type)5677 int flush_partitioned_key_cache_blocks(PARTITIONED_KEY_CACHE_CB *keycache,
5678 File file, void *file_extra,
5679 enum flush_type type)
5680 {
5681 uint i;
5682 uint partitions= keycache->partitions;
5683 int err= 0;
5684 ulonglong *dirty_part_map= (ulonglong *) file_extra;
5685 DBUG_ENTER("partitioned_flush_key_blocks");
5686 DBUG_PRINT("enter", ("keycache: %p", keycache));
5687
5688 for (i= 0; i < partitions; i++)
5689 {
5690 SIMPLE_KEY_CACHE_CB *partition= keycache->partition_array[i];
5691 if ((type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
5692 !((*dirty_part_map) & ((ulonglong) 1 << i)))
5693 continue;
5694 err|= MY_TEST(flush_simple_key_cache_blocks(partition, file, 0, type));
5695 }
5696 *dirty_part_map= 0;
5697
5698 DBUG_RETURN(err);
5699 }
5700
5701
5702 /*
5703 Reset the counters of a partitioned key cache
5704
5705 SYNOPSIS
5706 reset_partitioned_key_cache_counters()
5707 name the name of a key cache
5708 keycache pointer to the control block of a partitioned key cache
5709
5710 DESCRIPTION
5711 This function is the implementation of the reset_key_cache_counters
5712 interface function that is employed by partitioned key caches.
5713 The function takes the parameter keycache as a pointer to the
5714 control block structure of the type PARTITIONED_KEY_CACHE_CB for a partitioned
5715 key cache.
5716 This function resets the values of the statistical counters of the simple
5717 key caches comprising partitioned key cache to 0. It does it by calling
5718 reset_simple_key_cache_counters for each key cache partition.
5719 The parameter name is currently not used.
5720
5721 RETURN
5722 0 on success (always because it can't fail)
5723 */
5724
5725 static int
reset_partitioned_key_cache_counters(const char * name,PARTITIONED_KEY_CACHE_CB * keycache)5726 reset_partitioned_key_cache_counters(const char *name __attribute__((unused)),
5727 PARTITIONED_KEY_CACHE_CB *keycache)
5728 {
5729 uint i;
5730 uint partitions= keycache->partitions;
5731 DBUG_ENTER("partitioned_reset_key_cache_counters");
5732
5733 for (i = 0; i < partitions; i++)
5734 {
5735 reset_simple_key_cache_counters(name, keycache->partition_array[i]);
5736 }
5737 DBUG_RETURN(0);
5738 }
5739
5740
5741 /*
5742 Get statistics for a partition key cache
5743
5744 SYNOPSIS
5745 get_partitioned_key_cache_statistics()
5746 keycache pointer to the control block of a partitioned key cache
5747 partition_no partition number to get statistics for
5748 key_cache_stats OUT pointer to the structure for the returned statistics
5749
5750 DESCRIPTION
5751 This function is the implementation of the get_key_cache_statistics
5752 interface function that is employed by partitioned key caches.
5753 The function takes the parameter keycache as a pointer to the
5754 control block structure of the type PARTITIONED_KEY_CACHE_CB for
5755 a partitioned key cache.
5756 If the value of the parameter partition_no is equal to 0 then aggregated
5757 statistics for all partitions is returned in the fields of the
5758 structure key_cache_stat of the type KEY_CACHE_STATISTICS . Otherwise
5759 the function returns data for the partition number partition_no of the
5760 key cache in the structure key_cache_stat. (Here partitions are numbered
5761 starting from 1.)
5762
5763 RETURN
5764 none
5765 */
5766
5767 static
5768 void
get_partitioned_key_cache_statistics(PARTITIONED_KEY_CACHE_CB * keycache,uint partition_no,KEY_CACHE_STATISTICS * keycache_stats)5769 get_partitioned_key_cache_statistics(PARTITIONED_KEY_CACHE_CB *keycache,
5770 uint partition_no,
5771 KEY_CACHE_STATISTICS *keycache_stats)
5772 {
5773 uint i;
5774 SIMPLE_KEY_CACHE_CB *partition;
5775 uint partitions= keycache->partitions;
5776 DBUG_ENTER("get_partitioned_key_cache_statistics");
5777
5778 if (partition_no != 0)
5779 {
5780 partition= keycache->partition_array[partition_no-1];
5781 get_simple_key_cache_statistics((void *) partition, 0, keycache_stats);
5782 DBUG_VOID_RETURN;
5783 }
5784 bzero(keycache_stats, sizeof(KEY_CACHE_STATISTICS));
5785 keycache_stats->mem_size= (longlong) keycache->key_cache_mem_size;
5786 keycache_stats->block_size= (longlong) keycache->key_cache_block_size;
5787 for (i = 0; i < partitions; i++)
5788 {
5789 partition= keycache->partition_array[i];
5790 keycache_stats->blocks_used+= partition->blocks_used;
5791 keycache_stats->blocks_unused+= partition->blocks_unused;
5792 keycache_stats->blocks_changed+= partition->global_blocks_changed;
5793 keycache_stats->blocks_warm+= partition->warm_blocks;
5794 keycache_stats->read_requests+= partition->global_cache_r_requests;
5795 keycache_stats->reads+= partition->global_cache_read;
5796 keycache_stats->write_requests+= partition->global_cache_w_requests;
5797 keycache_stats->writes+= partition->global_cache_write;
5798 }
5799 DBUG_VOID_RETURN;
5800 }
5801
5802 /*
5803 The array of pointers to the key cache interface functions used by
5804 partitioned key caches. Any partitioned key cache object caches exploits
5805 this array.
5806
5807 The current implementation of these functions does not allow to call
5808 them from the MySQL server code directly. The key cache interface
5809 wrappers must be used for this purpose.
5810 */
5811
5812 static KEY_CACHE_FUNCS partitioned_key_cache_funcs =
5813 {
5814 (INIT_KEY_CACHE) init_partitioned_key_cache,
5815 (RESIZE_KEY_CACHE) resize_partitioned_key_cache,
5816 (CHANGE_KEY_CACHE_PARAM) change_partitioned_key_cache_param,
5817 (KEY_CACHE_READ) partitioned_key_cache_read,
5818 (KEY_CACHE_INSERT) partitioned_key_cache_insert,
5819 (KEY_CACHE_WRITE) partitioned_key_cache_write,
5820 (FLUSH_KEY_BLOCKS) flush_partitioned_key_cache_blocks,
5821 (RESET_KEY_CACHE_COUNTERS) reset_partitioned_key_cache_counters,
5822 (END_KEY_CACHE) end_partitioned_key_cache,
5823 (GET_KEY_CACHE_STATISTICS) get_partitioned_key_cache_statistics,
5824 };
5825
5826
5827 /******************************************************************************
5828 Key Cache Interface Module
5829
5830 The module contains wrappers for all key cache interface functions.
5831
5832 Currently there are key caches of two types: simple key caches and
5833 partitioned key caches. Each type (class) has its own implementation of the
5834 basic key cache operations used the MyISAM storage engine. The pointers
5835 to the implementation functions are stored in two static structures of the
5836 type KEY_CACHE_FUNC: simple_key_cache_funcs - for simple key caches, and
5837 partitioned_key_cache_funcs - for partitioned key caches. When a key cache
5838 object is created the constructor procedure init_key_cache places a pointer
5839 to the corresponding table into one of its fields. The procedure also
5840 initializes a control block for the key cache oject and saves the pointer
5841 to this block in another field of the key cache object.
5842 When a key cache wrapper function is invoked for a key cache object to
5843 perform a basic key cache operation it looks into the interface table
5844 associated with the key cache oject and calls the corresponding
5845 implementation of the operation. It passes the saved key cache control
5846 block to this implementation. If, for some reasons, the control block
5847 has not been fully initialized yet, the wrapper function either does not
5848 do anything or, in the case when it perform a read/write operation, the
5849 function do it directly through the system i/o functions.
5850
5851 As we can see the model with which the key cache interface is supported
5852 as quite conventional for interfaces in general.
5853
5854 ******************************************************************************/
5855
5856 static
5857 int repartition_key_cache_internal(KEY_CACHE *keycache,
5858 uint key_cache_block_size, size_t use_mem,
5859 uint division_limit, uint age_threshold,
5860 uint changed_blocks_hash_size,
5861 uint partitions, my_bool use_op_lock);
5862
5863 /*
5864 Initialize a key cache : internal
5865
5866 SYNOPSIS
5867 init_key_cache_internal()
5868 keycache pointer to the key cache to be initialized
5869 key_cache_block_size size of blocks to keep cached data
5870 use_mem total memory to use for cache buffers/structures
5871 division_limit division limit (may be zero)
5872 age_threshold age threshold (may be zero)
5873 changed_blocks_hash_size Number of hash buckets to hold a link of different
5874 files. Should be proportional to number of different
5875 files sused.
5876 partitions Number of partitions in the key cache
5877 use_op_lock if TRUE use keycache->op_lock, otherwise - ignore it
5878
5879 DESCRIPTION
5880 The function performs the actions required from init_key_cache().
5881 It has an additional parameter: use_op_lock. When the parameter
5882 is TRUE than the function initializes keycache->op_lock if needed,
5883 then locks it, and unlocks it before the return. Otherwise the actions
5884 with the lock are omitted.
5885
5886 RETURN VALUE
5887 total number of blocks in key cache partitions, if successful,
5888 <= 0 - otherwise.
5889
5890 NOTES
5891 if keycache->key_cache_inited != 0 we assume that the memory
5892 for the control block of the key cache has been already allocated.
5893 */
5894
5895 static
init_key_cache_internal(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold,uint changed_blocks_hash_size,uint partitions,my_bool use_op_lock)5896 int init_key_cache_internal(KEY_CACHE *keycache, uint key_cache_block_size,
5897 size_t use_mem, uint division_limit,
5898 uint age_threshold, uint changed_blocks_hash_size,
5899 uint partitions,
5900 my_bool use_op_lock)
5901 {
5902 void *keycache_cb;
5903 int blocks;
5904 if (keycache->key_cache_inited)
5905 {
5906 if (use_op_lock)
5907 pthread_mutex_lock(&keycache->op_lock);
5908 keycache_cb= keycache->keycache_cb;
5909 }
5910 else
5911 {
5912 if (partitions == 0)
5913 {
5914 if (!(keycache_cb= (void *) my_malloc(key_memory_KEY_CACHE,
5915 sizeof(SIMPLE_KEY_CACHE_CB),
5916 MYF(0))))
5917 return 0;
5918 ((SIMPLE_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0;
5919 keycache->key_cache_type= SIMPLE_KEY_CACHE;
5920 keycache->interface_funcs= &simple_key_cache_funcs;
5921 }
5922 else
5923 {
5924 if (!(keycache_cb= (void *) my_malloc(key_memory_KEY_CACHE,
5925 sizeof(PARTITIONED_KEY_CACHE_CB),
5926 MYF(0))))
5927 return 0;
5928 ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->key_cache_inited= 0;
5929 keycache->key_cache_type= PARTITIONED_KEY_CACHE;
5930 keycache->interface_funcs= &partitioned_key_cache_funcs;
5931 }
5932 /*
5933 Initialize op_lock if it's not initialized before.
5934 The mutex may have been initialized before if we are being called
5935 from repartition_key_cache_internal().
5936 */
5937 if (use_op_lock)
5938 pthread_mutex_init(&keycache->op_lock, MY_MUTEX_INIT_FAST);
5939 keycache->keycache_cb= keycache_cb;
5940 keycache->key_cache_inited= 1;
5941 if (use_op_lock)
5942 pthread_mutex_lock(&keycache->op_lock);
5943 }
5944
5945 if (partitions != 0)
5946 {
5947 ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions= partitions;
5948 }
5949 keycache->can_be_used= 0;
5950 blocks= keycache->interface_funcs->init(keycache_cb, key_cache_block_size,
5951 use_mem, division_limit,
5952 age_threshold, changed_blocks_hash_size);
5953 keycache->partitions= partitions ?
5954 ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->partitions :
5955 0;
5956 DBUG_ASSERT(partitions <= MAX_KEY_CACHE_PARTITIONS);
5957 keycache->key_cache_mem_size=
5958 keycache->partitions ?
5959 ((PARTITIONED_KEY_CACHE_CB *) keycache_cb)->key_cache_mem_size :
5960 ((SIMPLE_KEY_CACHE_CB *) keycache_cb)->key_cache_mem_size;
5961 if (blocks > 0)
5962 keycache->can_be_used= 1;
5963 if (use_op_lock)
5964 pthread_mutex_unlock(&keycache->op_lock);
5965 return blocks;
5966 }
5967
5968
5969 /*
5970 Initialize a key cache
5971
5972 SYNOPSIS
5973 init_key_cache()
5974 keycache pointer to the key cache to be initialized
5975 key_cache_block_size size of blocks to keep cached data
5976 use_mem total memory to use for cache buffers/structures
5977 division_limit division limit (may be zero)
5978 age_threshold age threshold (may be zero)
5979 partitions number of partitions in the key cache
5980
5981 DESCRIPTION
5982 The function creates a control block structure for a key cache and
5983 places the pointer to this block in the structure keycache.
5984 If the value of the parameter 'partitions' is 0 then a simple key cache
5985 is created. Otherwise a partitioned key cache with the specified number
5986 of partitions is created.
5987 The parameter key_cache_block_size specifies the size of the blocks in
5988 the key cache to be created. The parameters division_limit and
5989 age_threshold determine the initial values of those characteristics of
5990 the key cache that are used for midpoint insertion strategy. The parameter
5991 use_mem specifies the total amount of memory to be allocated for the
5992 key cache buffers and for all auxiliary structures.
5993 The function calls init_key_cache_internal() to perform all these actions
5994 with the last parameter set to TRUE.
5995
5996 RETURN VALUE
5997 total number of blocks in key cache partitions, if successful,
5998 <= 0 - otherwise.
5999
6000 NOTES
6001 It's assumed that no two threads call this function simultaneously
6002 referring to the same key cache handle.
6003 */
6004
init_key_cache(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold,uint changed_blocks_hash_size,uint partitions)6005 int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
6006 size_t use_mem, uint division_limit,
6007 uint age_threshold, uint changed_blocks_hash_size,
6008 uint partitions)
6009 {
6010 return init_key_cache_internal(keycache, key_cache_block_size, use_mem,
6011 division_limit, age_threshold,
6012 changed_blocks_hash_size, partitions, 1);
6013 }
6014
6015
6016 /*
6017 Resize a key cache
6018
6019 SYNOPSIS
6020 resize_key_cache()
6021 keycache pointer to the key cache to be resized
6022 key_cache_block_size size of blocks to keep cached data
6023 use_mem total memory to use for the new key cache
6024 division_limit new division limit (if not zero)
6025 age_threshold new age threshold (if not zero)
6026
6027 DESCRIPTION
6028 The function operates over the key cache key cache.
6029 The parameter key_cache_block_size specifies the new size of the block
6030 buffers in the key cache. The parameters division_limit and age_threshold
6031 determine the new initial values of those characteristics of the key cache
6032 that are used for midpoint insertion strategy. The parameter use_mem
6033 specifies the total amount of memory to be allocated for the key cache
6034 buffers and for all auxiliary structures.
6035
6036 RETURN VALUE
6037 number of blocks in the key cache, if successful,
6038 0 - otherwise.
6039
6040 NOTES
6041 The function does not block the calls and executions of other functions
6042 from the key cache interface. However it assumes that the calls of
6043 resize_key_cache itself are serialized.
6044
6045 Currently the function is called when the values of the variables
6046 key_buffer_size and/or key_cache_block_size are being reset for
6047 the key cache keycache.
6048 */
6049
resize_key_cache(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold,uint changed_blocks_hash_size)6050 int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
6051 size_t use_mem, uint division_limit, uint age_threshold,
6052 uint changed_blocks_hash_size)
6053 {
6054 int blocks= -1;
6055 if (keycache->key_cache_inited)
6056 {
6057 pthread_mutex_lock(&keycache->op_lock);
6058 if ((uint) keycache->param_partitions != keycache->partitions && use_mem)
6059 blocks= repartition_key_cache_internal(keycache,
6060 key_cache_block_size, use_mem,
6061 division_limit, age_threshold,
6062 changed_blocks_hash_size,
6063 (uint) keycache->param_partitions,
6064 0);
6065 else
6066 {
6067 blocks= keycache->interface_funcs->resize(keycache->keycache_cb,
6068 key_cache_block_size,
6069 use_mem, division_limit,
6070 age_threshold,
6071 changed_blocks_hash_size);
6072
6073 if (keycache->partitions)
6074 keycache->partitions=
6075 ((PARTITIONED_KEY_CACHE_CB *)(keycache->keycache_cb))->partitions;
6076 }
6077
6078 keycache->key_cache_mem_size=
6079 keycache->partitions ?
6080 ((PARTITIONED_KEY_CACHE_CB *)(keycache->keycache_cb))->key_cache_mem_size :
6081 ((SIMPLE_KEY_CACHE_CB *)(keycache->keycache_cb))->key_cache_mem_size;
6082
6083 keycache->can_be_used= (blocks >= 0);
6084 pthread_mutex_unlock(&keycache->op_lock);
6085 }
6086 return blocks;
6087 }
6088
6089
6090 /*
6091 Change key cache parameters of a key cache
6092
6093 SYNOPSIS
6094 change_key_cache_param()
6095 keycache pointer to the key cache to change parameters for
6096 division_limit new division limit (if not zero)
6097 age_threshold new age threshold (if not zero)
6098
6099 DESCRIPTION
6100 The function sets new values of the division limit and the age threshold
6101 used when the key cache keycach employs midpoint insertion strategy.
6102 The parameters division_limit and age_threshold provide these new values.
6103
6104 RETURN VALUE
6105 none
6106
6107 NOTES
6108 Currently the function is called when the values of the variables
6109 key_cache_division_limit and/or key_cache_age_threshold are being reset
6110 for the key cache keycache.
6111 */
6112
change_key_cache_param(KEY_CACHE * keycache,uint division_limit,uint age_threshold)6113 void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
6114 uint age_threshold)
6115 {
6116 if (keycache->key_cache_inited)
6117 {
6118 pthread_mutex_lock(&keycache->op_lock);
6119 keycache->interface_funcs->change_param(keycache->keycache_cb,
6120 division_limit,
6121 age_threshold);
6122 pthread_mutex_unlock(&keycache->op_lock);
6123 }
6124 }
6125
6126
6127 /*
6128 Destroy a key cache : internal
6129
6130 SYNOPSIS
6131 end_key_cache_internal()
6132 keycache pointer to the key cache to be destroyed
6133 cleanup <=> complete free
6134 use_op_lock if TRUE use keycache->op_lock, otherwise - ignore it
6135
6136 DESCRIPTION
6137 The function performs the actions required from end_key_cache().
6138 It has an additional parameter: use_op_lock. When the parameter
6139 is TRUE than the function destroys keycache->op_lock if cleanup is true.
6140 Otherwise the action with the lock is omitted.
6141
6142 RETURN VALUE
6143 none
6144 */
6145
6146 static
end_key_cache_internal(KEY_CACHE * keycache,my_bool cleanup,my_bool use_op_lock)6147 void end_key_cache_internal(KEY_CACHE *keycache, my_bool cleanup,
6148 my_bool use_op_lock)
6149 {
6150 if (keycache->key_cache_inited)
6151 {
6152 keycache->interface_funcs->end(keycache->keycache_cb, cleanup);
6153 if (cleanup)
6154 {
6155 if (keycache->keycache_cb)
6156 {
6157 my_free(keycache->keycache_cb);
6158 keycache->keycache_cb= 0;
6159 }
6160 /*
6161 We do not destroy op_lock if we are going to reuse the same key cache.
6162 This happens if we are called from repartition_key_cache_internal().
6163 */
6164 if (use_op_lock)
6165 pthread_mutex_destroy(&keycache->op_lock);
6166 keycache->key_cache_inited= 0;
6167 }
6168 keycache->can_be_used= 0;
6169 }
6170 }
6171
6172
6173 /*
6174 Destroy a key cache
6175
6176 SYNOPSIS
6177 end_key_cache()
6178 keycache pointer to the key cache to be destroyed
6179 cleanup <=> complete free
6180
6181 DESCRIPTION
6182 The function frees the memory allocated for the cache blocks and
6183 auxiliary structures used by the key cache keycache. If the value
6184 of the parameter cleanup is TRUE then all resources used by the key
6185 cache are to be freed.
6186 The function calls end_key_cache_internal() to perform all these actions
6187 with the last parameter set to TRUE.
6188
6189 RETURN VALUE
6190 none
6191 */
6192
end_key_cache(KEY_CACHE * keycache,my_bool cleanup)6193 void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
6194 {
6195 end_key_cache_internal(keycache, cleanup, 1);
6196 }
6197
6198
6199 /*
6200 Read a block of data from a key cache into a buffer
6201
6202 SYNOPSIS
6203
6204 key_cache_read()
6205 keycache pointer to the key cache to read data from
6206 file handler for the file for the block of data to be read
6207 filepos position of the block of data in the file
6208 level determines the weight of the data
6209 buff buffer to where the data must be placed
6210 length length of the buffer
6211 block_length length of the data read from a key cache block
6212 return_buffer return pointer to the key cache buffer with the data
6213
6214 DESCRIPTION
6215 The function operates over buffers of the key cache keycache.
6216 In a general case the function reads a block of data from the key cache
6217 into the buffer buff of the size specified by the parameter length. The
6218 beginning of the block of data to be read is specified by the parameters
6219 file and filepos. The length of the read data is the same as the length
6220 of the buffer.
6221 If the parameter return_buffer is not ignored and its value is TRUE, and
6222 the data to be read of the specified size block_length can be read from one
6223 key cache buffer, then the function returns a pointer to the data in the
6224 key cache buffer.
6225 The parameter 'level' is used only by the midpoint insertion strategy
6226 when the data or its portion cannot be found in the key cache.
6227 The function reads data into the buffer directly from file if the control
6228 block of the key cache has not been initialized yet.
6229
6230 RETURN VALUE
6231 Returns address from where the data is placed if successful, 0 - otherwise.
6232
6233 NOTES.
6234 Filepos must be a multiple of 'block_length', but it doesn't
6235 have to be a multiple of key_cache_block_size;
6236 */
6237
key_cache_read(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length,int return_buffer)6238 uchar *key_cache_read(KEY_CACHE *keycache,
6239 File file, my_off_t filepos, int level,
6240 uchar *buff, uint length,
6241 uint block_length, int return_buffer)
6242 {
6243 if (keycache->can_be_used)
6244 return keycache->interface_funcs->read(keycache->keycache_cb,
6245 file, filepos, level,
6246 buff, length,
6247 block_length, return_buffer);
6248
6249 /* We can't use mutex here as the key cache may not be initialized */
6250
6251 if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
6252 return (uchar *) 0;
6253
6254 return buff;
6255 }
6256
6257
6258 /*
6259 Insert a block of file data from a buffer into a key cache
6260
6261 SYNOPSIS
6262 key_cache_insert()
6263 keycache pointer to the key cache to insert data into
6264 file handler for the file to insert data from
6265 filepos position of the block of data in the file to insert
6266 level determines the weight of the data
6267 buff buffer to read data from
6268 length length of the data in the buffer
6269
6270 DESCRIPTION
6271 The function operates over buffers of the key cache keycache.
6272 The function writes a block of file data from a buffer into the key cache.
6273 The buffer is specified with the parameters buff and length - the pointer
6274 to the beginning of the buffer and its size respectively. It's assumed
6275 that the buffer contains the data from 'file' allocated from the position
6276 filepos.
6277 The parameter level is used to set one characteristic for the key buffers
6278 loaded with the data from buff. The characteristic is used only by the
6279 midpoint insertion strategy.
6280
6281 RETURN VALUE
6282 0 if a success, 1 - otherwise.
6283
6284 NOTES
6285 The function is used by MyISAM to move all blocks from a index file to
6286 the key cache.
6287 It is assumed that it may be performed in parallel with reading the file
6288 data from the key buffers by other threads.
6289 */
6290
key_cache_insert(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length)6291 int key_cache_insert(KEY_CACHE *keycache,
6292 File file, my_off_t filepos, int level,
6293 uchar *buff, uint length)
6294 {
6295 if (keycache->can_be_used)
6296 return keycache->interface_funcs->insert(keycache->keycache_cb,
6297 file, filepos, level,
6298 buff, length);
6299 return 0;
6300 }
6301
6302
6303 /*
6304 Write data from a buffer into a key cache
6305
6306 SYNOPSIS
6307
6308 key_cache_write()
6309 keycache pointer to the key cache to write data to
6310 file handler for the file to write data to
6311 filepos position in the file to write data to
6312 level determines the weight of the data
6313 buff buffer with the data
6314 length length of the buffer
6315 dont_write if is 0 then all dirty pages involved in writing
6316 should have been flushed from key cache
6317 file_extra pointer to optional file attributes
6318
6319 DESCRIPTION
6320 The function operates over buffers of the key cache keycache.
6321 In a general case the function writes data from a buffer into the key
6322 cache. The buffer is specified with the parameters buff and length -
6323 the pointer to the beginning of the buffer and its size respectively.
6324 It's assumed the buffer contains the data to be written into 'file'
6325 starting from the position filepos.
6326 If the value of the parameter dont_write is FALSE then the function
6327 also writes the data into file.
6328 The parameter level is used to set one characteristic for the key buffers
6329 filled with the data from buff. The characteristic is employed only by
6330 the midpoint insertion strategy.
6331 The parameter file_expra may point to additional file attributes used
6332 for optimization or other purposes.
6333 The function writes data from the buffer directly into file if the control
6334 block of the key cache has not been initialized yet.
6335
6336 RETURN VALUE
6337 0 if a success, 1 - otherwise.
6338
6339 NOTES
6340 This implementation may exploit the fact that the function is called only
6341 when a thread has got an exclusive lock for the key file.
6342 */
6343
key_cache_write(KEY_CACHE * keycache,File file,void * file_extra,my_off_t filepos,int level,uchar * buff,uint length,uint block_length,int force_write)6344 int key_cache_write(KEY_CACHE *keycache,
6345 File file, void *file_extra,
6346 my_off_t filepos, int level,
6347 uchar *buff, uint length,
6348 uint block_length, int force_write)
6349 {
6350 if (keycache->can_be_used)
6351 return keycache->interface_funcs->write(keycache->keycache_cb,
6352 file, file_extra,
6353 filepos, level,
6354 buff, length,
6355 block_length, force_write);
6356
6357 /* We can't use mutex here as the key cache may not be initialized */
6358 if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
6359 return 1;
6360
6361 return 0;
6362 }
6363
6364
6365 /*
6366 Flush all blocks for a file from key buffers of a key cache
6367
6368 SYNOPSIS
6369
6370 flush_key_blocks()
6371 keycache pointer to the key cache whose blocks are to be flushed
6372 file handler for the file to flush to
6373 file_extra maps of key cache (used for partitioned key caches)
6374 flush_type type of the flush operation
6375
6376 DESCRIPTION
6377 The function operates over buffers of the key cache keycache.
6378 In a general case the function flushes the data from all dirty key
6379 buffers related to the file 'file' into this file. The function does
6380 exactly this if the value of the parameter type is FLUSH_KEEP. If the
6381 value of this parameter is FLUSH_RELEASE, the function additionally
6382 releases the key buffers containing data from 'file' for new usage.
6383 If the value of the parameter type is FLUSH_IGNORE_CHANGED the function
6384 just releases the key buffers containing data from 'file'.
6385 If the value of the parameter type is FLUSH_KEEP the function may use
6386 the value of the parameter file_extra pointing to possibly dirty
6387 partitions to optimize the operation for partitioned key caches.
6388
6389 RETURN
6390 0 ok
6391 1 error
6392
6393 NOTES
6394 Any implementation of the function may exploit the fact that the function
6395 is called only when a thread has got an exclusive lock for the key file.
6396 */
6397
flush_key_blocks(KEY_CACHE * keycache,int file,void * file_extra,enum flush_type type)6398 int flush_key_blocks(KEY_CACHE *keycache,
6399 int file, void *file_extra,
6400 enum flush_type type)
6401 {
6402 if (keycache->can_be_used)
6403 return keycache->interface_funcs->flush(keycache->keycache_cb,
6404 file, file_extra, type);
6405 return 0;
6406 }
6407
6408
6409 /*
6410 Reset the counters of a key cache
6411
6412 SYNOPSIS
6413 reset_key_cache_counters()
6414 name the name of a key cache (unused)
6415 keycache pointer to the key cache for which to reset counters
6416
6417 DESCRIPTION
6418 This function resets the values of the statistical counters for the key
6419 cache keycache.
6420 The parameter name is currently not used.
6421
6422 RETURN
6423 0 on success (always because it can't fail)
6424
6425 NOTES
6426 This procedure is used by process_key_caches() to reset the counters of all
6427 currently used key caches, both the default one and the named ones.
6428 */
6429
reset_key_cache_counters(const char * name,KEY_CACHE * keycache,void * unused)6430 int reset_key_cache_counters(const char *name __attribute__((unused)),
6431 KEY_CACHE *keycache,
6432 void *unused __attribute__((unused)))
6433 {
6434 int rc= 0;
6435 if (keycache->key_cache_inited)
6436 {
6437 pthread_mutex_lock(&keycache->op_lock);
6438 rc= keycache->interface_funcs->reset_counters(name,
6439 keycache->keycache_cb);
6440 pthread_mutex_unlock(&keycache->op_lock);
6441 }
6442 return rc;
6443 }
6444
6445
6446 /*
6447 Get statistics for a key cache
6448
6449 SYNOPSIS
6450 get_key_cache_statistics()
6451 keycache pointer to the key cache to get statistics for
6452 partition_no partition number to get statistics for
6453 key_cache_stats OUT pointer to the structure for the returned statistics
6454
6455 DESCRIPTION
6456 If the value of the parameter partition_no is equal to 0 then statistics
6457 for the whole key cache keycache (aggregated statistics) is returned in the
6458 fields of the structure key_cache_stat of the type KEY_CACHE_STATISTICS.
6459 Otherwise the value of the parameter partition_no makes sense only for
6460 a partitioned key cache. In this case the function returns statistics
6461 for the partition with the specified number partition_no.
6462
6463 RETURN
6464 none
6465 */
6466
get_key_cache_statistics(KEY_CACHE * keycache,uint partition_no,KEY_CACHE_STATISTICS * key_cache_stats)6467 void get_key_cache_statistics(KEY_CACHE *keycache, uint partition_no,
6468 KEY_CACHE_STATISTICS *key_cache_stats)
6469 {
6470 if (keycache->key_cache_inited)
6471 {
6472 pthread_mutex_lock(&keycache->op_lock);
6473 keycache->interface_funcs->get_stats(keycache->keycache_cb,
6474 partition_no, key_cache_stats);
6475 pthread_mutex_unlock(&keycache->op_lock);
6476 }
6477 }
6478
6479
6480 /*
6481 Repartition a key cache : internal
6482
6483 SYNOPSIS
6484 repartition_key_cache_internal()
6485 keycache pointer to the key cache to be repartitioned
6486 key_cache_block_size size of blocks to keep cached data
6487 use_mem total memory to use for the new key cache
6488 division_limit new division limit (if not zero)
6489 age_threshold new age threshold (if not zero)
6490 partitions new number of partitions in the key cache
6491 use_op_lock if TRUE use keycache->op_lock, otherwise - ignore it
6492
6493 DESCRIPTION
6494 The function performs the actions required from repartition_key_cache().
6495 It has an additional parameter: use_op_lock. When the parameter
6496 is TRUE then the function locks keycache->op_lock at start and
6497 unlocks it before the return. Otherwise the actions with the lock
6498 are omitted.
6499
6500 RETURN VALUE
6501 number of blocks in the key cache, if successful,
6502 0 - otherwise.
6503 */
6504
6505 static
repartition_key_cache_internal(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold,uint changed_blocks_hash_size,uint partitions,my_bool use_op_lock)6506 int repartition_key_cache_internal(KEY_CACHE *keycache,
6507 uint key_cache_block_size, size_t use_mem,
6508 uint division_limit, uint age_threshold,
6509 uint changed_blocks_hash_size,
6510 uint partitions, my_bool use_op_lock)
6511 {
6512 uint blocks= -1;
6513 if (keycache->key_cache_inited)
6514 {
6515 if (use_op_lock)
6516 pthread_mutex_lock(&keycache->op_lock);
6517 keycache->interface_funcs->resize(keycache->keycache_cb,
6518 key_cache_block_size, 0,
6519 division_limit, age_threshold,
6520 changed_blocks_hash_size);
6521 end_key_cache_internal(keycache, 1, 0);
6522 blocks= init_key_cache_internal(keycache, key_cache_block_size, use_mem,
6523 division_limit, age_threshold,
6524 changed_blocks_hash_size, partitions,
6525 0);
6526 if (use_op_lock)
6527 pthread_mutex_unlock(&keycache->op_lock);
6528 }
6529 return blocks;
6530 }
6531
6532 /*
6533 Repartition a key cache
6534
6535 SYNOPSIS
6536 repartition_key_cache()
6537 keycache pointer to the key cache to be repartitioned
6538 key_cache_block_size size of blocks to keep cached data
6539 use_mem total memory to use for the new key cache
6540 division_limit new division limit (if not zero)
6541 age_threshold new age threshold (if not zero)
6542 partitions new number of partitions in the key cache
6543
6544 DESCRIPTION
6545 The function operates over the key cache keycache.
6546 The parameter partitions specifies the number of partitions in the key
6547 cache after repartitioning. If the value of this parameter is 0 then
6548 a simple key cache must be created instead of the old one.
6549 The parameter key_cache_block_size specifies the new size of the block
6550 buffers in the key cache. The parameters division_limit and age_threshold
6551 determine the new initial values of those characteristics of the key cache
6552 that are used for midpoint insertion strategy. The parameter use_mem
6553 specifies the total amount of memory to be allocated for the new key
6554 cache buffers and for all auxiliary structures.
6555 The function calls repartition_key_cache_internal() to perform all these
6556 actions with the last parameter set to TRUE.
6557
6558 RETURN VALUE
6559 number of blocks in the key cache, if successful,
6560 0 - otherwise.
6561
6562 NOTES
6563 Currently the function is called when the value of the variable
6564 key_cache_partitions is being reset for the key cache keycache.
6565 */
6566
repartition_key_cache(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold,uint changed_blocks_hash_size,uint partitions)6567 int repartition_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
6568 size_t use_mem, uint division_limit,
6569 uint age_threshold, uint changed_blocks_hash_size,
6570 uint partitions)
6571 {
6572 return repartition_key_cache_internal(keycache, key_cache_block_size, use_mem,
6573 division_limit, age_threshold,
6574 changed_blocks_hash_size,
6575 partitions, 1);
6576 }
6577
6578