1 /* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    Without limiting anything contained in the foregoing, this file,
15    which is part of C Driver for MySQL (Connector/C), is also subject to the
16    Universal FOSS Exception, version 1.0, a copy of which can be found at
17    http://oss.oracle.com/licenses/universal-foss-exception.
18 
19    This program is distributed in the hope that it will be useful,
20    but WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22    GNU General Public License, version 2.0, for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
27 
28 /**
29   @file
30   These functions handle keyblock cacheing for ISAM and MyISAM tables.
31 
32   One cache can handle many files.
33   It must contain buffers of the same blocksize.
34   init_key_cache() should be used to init cache handler.
35 
36   The free list (free_block_list) is a stack like structure.
37   When a block is freed by free_block(), it is pushed onto the stack.
38   When a new block is required it is first tried to pop one from the stack.
39   If the stack is empty, it is tried to get a never-used block from the pool.
40   If this is empty too, then a block is taken from the LRU ring, flushing it
41   to disk, if neccessary. This is handled in find_key_block().
42   With the new free list, the blocks can have three temperatures:
43   hot, warm and cold (which is free). This is remembered in the block header
44   by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
45   temperature is neccessary to correctly count the number of warm blocks,
46   which is required to decide when blocks are allowed to become hot. Whenever
47   a block is inserted to another (sub-)chain, we take the old and new
48   temperature into account to decide if we got one more or less warm block.
49   blocks_unused is the sum of never used blocks in the pool and of currently
50   free blocks. blocks_used is the number of blocks fetched from the pool and
51   as such gives the maximum number of in-use blocks at any time.
52 */
53 
54 /*
55   Key Cache Locking
56   =================
57 
58   All key cache locking is done with a single mutex per key cache:
59   keycache->cache_lock. This mutex is locked almost all the time
60   when executing code in this file (mf_keycache.c).
61   However it is released for I/O and some copy operations.
62 
63   The cache_lock is also released when waiting for some event. Waiting
64   and signalling is done via condition variables. In most cases the
65   thread waits on its thread->suspend condition variable. Every thread
66   has a my_thread_var structure, which contains this variable and a
67   '*next' and '**prev' pointer. These pointers are used to insert the
68   thread into a wait queue.
69 
70   A thread can wait for one block and thus be in one wait queue at a
71   time only.
72 
73   Before starting to wait on its condition variable with
74   mysql_cond_wait(), the thread enters itself to a specific wait queue
75   with link_into_queue() (double linked with '*next' + '**prev') or
76   wait_on_queue() (single linked with '*next').
77 
78   Another thread, when releasing a resource, looks up the waiting thread
79   in the related wait queue. It sends a signal with
80   mysql_cond_signal() to the waiting thread.
81 
82   NOTE: Depending on the particular wait situation, either the sending
83   thread removes the waiting thread from the wait queue with
84   unlink_from_queue() or release_whole_queue() respectively, or the waiting
85   thread removes itself.
86 
87   There is one exception from this locking scheme when one thread wants
88   to reuse a block for some other address. This works by first marking
89   the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all
90   threads that are reading the block to finish. Each block has a
91   reference to a condition variable (condvar). It holds a reference to
92   the thread->suspend condition variable for the waiting thread (if such
93   a thread exists). When that thread is signaled, the reference is
94   cleared. The number of readers of a block is registered in
95   block->hash_link->requests. See wait_for_readers() / remove_reader()
96   for details. This is similar to the above, but it clearly means that
97   only one thread can wait for a particular block. There is no queue in
98   this case. Strangely enough block->convar is used for waiting for the
99   assigned hash_link only. More precisely it is used to wait for all
100   requests to be unregistered from the assigned hash_link.
101 
102   The resize_queue serves two purposes:
103   1. Threads that want to do a resize wait there if in_resize is set.
104      This is not used in the server. The server refuses a second resize
105      request if one is already active. keycache->in_init is used for the
106      synchronization. See set_var.cc.
107   2. Threads that want to access blocks during resize wait here during
108      the re-initialization phase.
109   When the resize is done, all threads on the queue are signalled.
110   Hypothetical resizers can compete for resizing, and read/write
111   requests will restart to request blocks from the freshly resized
112   cache. If the cache has been resized too small, it is disabled and
113   'can_be_used' is false. In this case read/write requests bypass the
114   cache. Since they increment and decrement 'cnt_for_resize_op', the
115   next resizer can wait on the queue 'waiting_for_resize_cnt' until all
116   I/O finished.
117 */
118 
119 #include "mysys_priv.h"
120 #include "mysys_err.h"
121 #include <keycache.h>
122 #include "my_static.h"
123 #include <m_string.h>
124 #include <my_bit.h>
125 #include <errno.h>
126 #include <stdarg.h>
127 #include "probes_mysql.h"
128 
129 /*
130   Some compilation flags have been added specifically for this module
131   to control the following:
132   - not to let a thread to yield the control when reading directly
133     from key cache, which might improve performance in many cases;
134     to enable this add:
135     #define SERIALIZED_READ_FROM_CACHE
136   - to set an upper bound for number of threads simultaneously
137     using the key cache; this setting helps to determine an optimal
138     size for hash table and improve performance when the number of
139     blocks in the key cache much less than the number of threads
140     accessing it;
141     to set this number equal to <N> add
142       #define MAX_THREADS <N>
143   - to substitute calls of mysql_cond_wait for calls of
144     mysql_cond_timedwait (wait with timeout set up);
145     this setting should be used only when you want to trap a deadlock
146     situation, which theoretically should not happen;
147     to set timeout equal to <T> seconds add
148       #define KEYCACHE_TIMEOUT <T>
149   - to enable the module traps and to send debug information from
150     key cache module to a special debug log add:
151       #define KEYCACHE_DEBUG
152     the name of this debug log file <LOG NAME> can be set through:
153       #define KEYCACHE_DEBUG_LOG  <LOG NAME>
154     if the name is not defined, it's set by default;
155     if the KEYCACHE_DEBUG flag is not set up and we are in a debug
156     mode, i.e. when ! defined(DBUG_OFF), the debug information from the
157     module is sent to the regular debug log.
158 
159   Example of the settings:
160     #define SERIALIZED_READ_FROM_CACHE
161     #define MAX_THREADS   100
162     #define KEYCACHE_TIMEOUT  1
163     #define KEYCACHE_DEBUG
164     #define KEYCACHE_DEBUG_LOG  "my_key_cache_debug.log"
165 */
166 
167 #define STRUCT_PTR(TYPE, MEMBER, a)                                           \
168           (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
169 
170 /* types of condition variables */
171 #define  COND_FOR_REQUESTED 0
172 #define  COND_FOR_SAVED     1
173 #define  COND_FOR_READERS   2
174 
175 typedef mysql_cond_t KEYCACHE_CONDVAR;
176 
177 /* descriptor of the page in the key cache block buffer */
178 struct st_keycache_page
179 {
180   int file;               /* file to which the page belongs to  */
181   my_off_t filepos;       /* position of the page in the file   */
182 };
183 
184 /* element in the chain of a hash table bucket */
185 struct st_hash_link
186 {
187   struct st_hash_link *next, **prev; /* to connect links in the same bucket  */
188   struct st_block_link *block;       /* reference to the block for the page: */
189   File file;                         /* from such a file                     */
190   my_off_t diskpos;                  /* with such an offset                  */
191   uint requests;                     /* number of requests for the page      */
192 };
193 
194 /* simple states of a block */
195 #define BLOCK_ERROR           1 /* an error occured when performing file i/o */
196 #define BLOCK_READ            2 /* file block is in the block buffer         */
197 #define BLOCK_IN_SWITCH       4 /* block is preparing to read new page       */
198 #define BLOCK_REASSIGNED      8 /* blk does not accept requests for old page */
199 #define BLOCK_IN_FLUSH       16 /* block is selected for flush               */
200 #define BLOCK_CHANGED        32 /* block buffer contains a dirty page        */
201 #define BLOCK_IN_USE         64 /* block is not free                         */
202 #define BLOCK_IN_EVICTION   128 /* block is selected for eviction            */
203 #define BLOCK_IN_FLUSHWRITE 256 /* block is in write to file                 */
204 #define BLOCK_FOR_UPDATE    512 /* block is selected for buffer modification */
205 
206 /* page status, returned by find_key_block */
207 #define PAGE_READ               0
208 #define PAGE_TO_BE_READ         1
209 #define PAGE_WAIT_TO_BE_READ    2
210 
211 /* block temperature determines in which (sub-)chain the block currently is */
212 enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };
213 
214 /* key cache block */
215 struct st_block_link
216 {
217   struct st_block_link
218     *next_used, **prev_used;   /* to connect links in the LRU chain (ring)   */
219   struct st_block_link
220     *next_changed, **prev_changed; /* for lists of file dirty/clean blocks   */
221   struct st_hash_link *hash_link; /* backward ptr to referring hash_link     */
222   KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */
223   uint requests;          /* number of requests for the block                */
224   uchar *buffer;           /* buffer for the block page                       */
225   uint offset;            /* beginning of modified data in the buffer        */
226   uint length;            /* end of data in the buffer                       */
227   uint status;            /* state of the block                              */
228   enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
229   uint hits_left;         /* number of hits left until promotion             */
230   ulonglong last_hit_time; /* timestamp of the last hit                      */
231   KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event    */
232 };
233 
234 KEY_CACHE dflt_key_cache_var;
235 KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;
236 
237 #define FLUSH_CACHE         2000            /* sort this many blocks at once */
238 
239 static int flush_all_key_blocks(KEY_CACHE *keycache);
240 
241 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
242                           mysql_mutex_t *mutex);
243 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue);
244 
245 static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block);
246 #if !defined(DBUG_OFF)
247 static void test_key_cache(KEY_CACHE *keycache,
248                            const char *where, my_bool lock);
249 #endif
250 
251 #define KEYCACHE_HASH(f, pos)                                                 \
252 (((ulong) ((pos) / keycache->key_cache_block_size) +                          \
253                                      (ulong) (f)) & (keycache->hash_entries-1))
254 #define FILE_HASH(f)                 ((uint) (f) & (CHANGED_BLOCKS_HASH-1))
255 
256 #define DEFAULT_KEYCACHE_DEBUG_LOG  "keycache_debug.log"
257 
258 #if defined(KEYCACHE_DEBUG) && ! defined(KEYCACHE_DEBUG_LOG)
259 #define KEYCACHE_DEBUG_LOG  DEFAULT_KEYCACHE_DEBUG_LOG
260 #endif
261 
262 #if defined(KEYCACHE_DEBUG_LOG)
263 static FILE *keycache_debug_log=NULL;
264 static void keycache_debug_print(const char *fmt,...);
265 #define KEYCACHE_DEBUG_OPEN                                                   \
266           if (!keycache_debug_log)                                            \
267           {                                                                   \
268             keycache_debug_log= fopen(KEYCACHE_DEBUG_LOG, "w");               \
269             (void) setvbuf(keycache_debug_log, NULL, _IOLBF, BUFSIZ);         \
270           }
271 
272 #define KEYCACHE_DEBUG_CLOSE                                                  \
273           if (keycache_debug_log)                                             \
274           {                                                                   \
275             fclose(keycache_debug_log);                                       \
276             keycache_debug_log= 0;                                            \
277           }
278 #else
279 #define KEYCACHE_DEBUG_OPEN
280 #define KEYCACHE_DEBUG_CLOSE
281 #endif /* defined(KEYCACHE_DEBUG_LOG) */
282 
283 #if defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG)
284 #define KEYCACHE_DBUG_PRINT(l, m)                                             \
285             { if (keycache_debug_log) fprintf(keycache_debug_log, "%s: ", l); \
286               keycache_debug_print m; }
287 
288 #define KEYCACHE_DBUG_ASSERT(a)                                               \
289             { if (! (a) && keycache_debug_log) fclose(keycache_debug_log);    \
290               assert(a); }
291 #else
292 #define KEYCACHE_DBUG_PRINT(l, m)  DBUG_PRINT(l, m)
293 #define KEYCACHE_DBUG_ASSERT(a)    DBUG_ASSERT(a)
294 #endif /* defined(KEYCACHE_DEBUG_LOG) && defined(KEYCACHE_DEBUG) */
295 
296 #if defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF)
297 
298 static long keycache_thread_id;
299 #define KEYCACHE_THREAD_TRACE(l)                                              \
300              KEYCACHE_DBUG_PRINT(l,("|thread %ld",keycache_thread_id))
301 
302 #define KEYCACHE_THREAD_TRACE_BEGIN(l)                                        \
303             { struct st_my_thread_var *thread_var= my_thread_var;             \
304               keycache_thread_id= thread_var->id;                             \
305               KEYCACHE_DBUG_PRINT(l,("[thread %ld",keycache_thread_id)) }
306 
307 #define KEYCACHE_THREAD_TRACE_END(l)                                          \
308             KEYCACHE_DBUG_PRINT(l,("]thread %ld",keycache_thread_id))
309 #else
310 #define KEYCACHE_THREAD_TRACE_BEGIN(l)
311 #define KEYCACHE_THREAD_TRACE_END(l)
312 #define KEYCACHE_THREAD_TRACE(l)
313 #endif /* defined(KEYCACHE_DEBUG) || !defined(DBUG_OFF) */
314 
315 #define BLOCK_NUMBER(b)                                                       \
316   ((uint) (((char*)(b)-(char *) keycache->block_root)/sizeof(BLOCK_LINK)))
317 #define HASH_LINK_NUMBER(h)                                                   \
318   ((uint) (((char*)(h)-(char *) keycache->hash_link_root)/sizeof(HASH_LINK)))
319 
320 #if (defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)) || defined(KEYCACHE_DEBUG)
321 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
322                                       mysql_mutex_t *mutex);
323 #else
324 #define keycache_pthread_cond_wait(C, M) mysql_cond_wait(C, M)
325 #endif
326 
327 #if defined(KEYCACHE_DEBUG)
328 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex);
329 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex);
330 static int keycache_pthread_cond_signal(mysql_cond_t *cond);
331 #else
332 #define keycache_pthread_mutex_lock(M) mysql_mutex_lock(M)
333 #define keycache_pthread_mutex_unlock(M) mysql_mutex_unlock(M)
334 #define keycache_pthread_cond_signal(C) mysql_cond_signal(C)
335 #endif /* defined(KEYCACHE_DEBUG) */
336 
337 #if !defined(DBUG_OFF)
338 #if defined(inline)
339 #undef inline
340 #endif
341 #define inline  /* disabled inline for easier debugging */
342 static int fail_block(BLOCK_LINK *block);
343 static int fail_hlink(HASH_LINK *hlink);
344 static int cache_empty(KEY_CACHE *keycache);
345 #endif
346 
next_power(uint value)347 static inline uint next_power(uint value)
348 {
349   return (uint) my_round_up_to_next_power((uint32) value) << 1;
350 }
351 
352 
353 /*
354   Initialize a key cache
355 
356   SYNOPSIS
357     init_key_cache()
358     keycache			pointer to a key cache data structure
359     key_cache_block_size	size of blocks to keep cached data
360     use_mem                 	total memory to use for the key cache
361     division_limit		division limit (may be zero)
362     age_threshold		age threshold (may be zero)
363 
364   RETURN VALUE
365     number of blocks in the key cache, if successful,
366     0 - otherwise.
367 
368   NOTES.
369     if keycache->key_cache_inited != 0 we assume that the key cache
370     is already initialized.  This is for now used by myisamchk, but shouldn't
371     be something that a program should rely on!
372 
373     It's assumed that no two threads call this function simultaneously
374     referring to the same key cache handle.
375 
376 */
377 
init_key_cache(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold)378 int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
379                    size_t use_mem, uint division_limit,
380                    uint age_threshold)
381 {
382   ulong blocks, hash_links;
383   size_t length;
384   int error;
385   DBUG_ENTER("init_key_cache");
386   DBUG_ASSERT(key_cache_block_size >= 512);
387 
388   KEYCACHE_DEBUG_OPEN;
389   if (keycache->key_cache_inited && keycache->disk_blocks > 0)
390   {
391     DBUG_PRINT("warning",("key cache already in use"));
392     DBUG_RETURN(0);
393   }
394 
395   keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
396   keycache->global_cache_read= keycache->global_cache_write= 0;
397   keycache->disk_blocks= -1;
398   if (! keycache->key_cache_inited)
399   {
400     keycache->key_cache_inited= 1;
401     /*
402       Initialize these variables once only.
403       Their value must survive re-initialization during resizing.
404     */
405     keycache->in_resize= 0;
406     keycache->resize_in_flush= 0;
407     keycache->cnt_for_resize_op= 0;
408     keycache->waiting_for_resize_cnt.last_thread= NULL;
409     keycache->in_init= 0;
410     mysql_mutex_init(key_KEY_CACHE_cache_lock,
411                      &keycache->cache_lock, MY_MUTEX_INIT_FAST);
412     keycache->resize_queue.last_thread= NULL;
413   }
414 
415   keycache->key_cache_mem_size= use_mem;
416   keycache->key_cache_block_size= key_cache_block_size;
417   DBUG_PRINT("info", ("key_cache_block_size: %u",
418 		      key_cache_block_size));
419 
420   blocks= (ulong) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
421                               sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
422   /* It doesn't make sense to have too few blocks (less than 8) */
423   if (blocks >= 8)
424   {
425     for ( ; ; )
426     {
427       /* Set my_hash_entries to the next bigger 2 power */
428       if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
429         keycache->hash_entries<<= 1;
430       hash_links= 2 * blocks;
431 #if defined(MAX_THREADS)
432       if (hash_links < MAX_THREADS + blocks - 1)
433         hash_links= MAX_THREADS + blocks - 1;
434 #endif
435       while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
436 		       ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
437 		       ALIGN_SIZE(sizeof(HASH_LINK*) *
438                                   keycache->hash_entries))) +
439 	     ((size_t) blocks * keycache->key_cache_block_size) > use_mem)
440         blocks--;
441       /* Allocate memory for cache page buffers */
442       if ((keycache->block_mem=
443 	   my_large_malloc((size_t) blocks * keycache->key_cache_block_size,
444 			  MYF(0))))
445       {
446         /*
447 	  Allocate memory for blocks, hash_links and hash entries;
448 	  For each block 2 hash links are allocated
449         */
450         if ((keycache->block_root= (BLOCK_LINK*) my_malloc(length,
451                                                            MYF(0))))
452           break;
453         my_large_free(keycache->block_mem);
454         keycache->block_mem= 0;
455       }
456       if (blocks < 8)
457       {
458         my_errno= ENOMEM;
459         my_error(EE_OUTOFMEMORY, MYF(ME_FATALERROR),
460                  blocks * keycache->key_cache_block_size);
461         goto err;
462       }
463       blocks= blocks / 4*3;
464     }
465     keycache->blocks_unused= blocks;
466     keycache->disk_blocks= (int) blocks;
467     keycache->hash_links= hash_links;
468     keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
469 				        ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
470     keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
471 				            ALIGN_SIZE((sizeof(HASH_LINK*) *
472 							keycache->hash_entries)));
473     memset(keycache->block_root, 0,
474 	  keycache->disk_blocks * sizeof(BLOCK_LINK));
475     memset(keycache->hash_root, 0,
476           keycache->hash_entries * sizeof(HASH_LINK*));
477     memset(keycache->hash_link_root, 0,
478 	  keycache->hash_links * sizeof(HASH_LINK));
479     keycache->hash_links_used= 0;
480     keycache->free_hash_list= NULL;
481     keycache->blocks_used= keycache->blocks_changed= 0;
482 
483     keycache->global_blocks_changed= 0;
484     keycache->blocks_available=0;		/* For debugging */
485 
486     /* The LRU chain is empty after initialization */
487     keycache->used_last= NULL;
488     keycache->used_ins= NULL;
489     keycache->free_block_list= NULL;
490     keycache->keycache_time= 0;
491     keycache->warm_blocks= 0;
492     keycache->min_warm_blocks= (division_limit ?
493 				blocks * division_limit / 100 + 1 :
494 				blocks);
495     keycache->age_threshold= (age_threshold ?
496 			      blocks * age_threshold / 100 :
497 			      blocks);
498 
499     keycache->can_be_used= 1;
500 
501     keycache->waiting_for_hash_link.last_thread= NULL;
502     keycache->waiting_for_block.last_thread= NULL;
503     DBUG_PRINT("exit",
504 	       ("disk_blocks: %d  block_root: 0x%lx  hash_entries: %d\
505  hash_root: 0x%lx  hash_links: %d  hash_link_root: 0x%lx",
506 		keycache->disk_blocks,  (long) keycache->block_root,
507 		keycache->hash_entries, (long) keycache->hash_root,
508 		keycache->hash_links,   (long) keycache->hash_link_root));
509     memset(keycache->changed_blocks, 0,
510 	  sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
511     memset(keycache->file_blocks, 0,
512 	  sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
513   }
514   else
515   {
516     /* key_buffer_size is specified too small. Disable the cache. */
517     keycache->can_be_used= 0;
518   }
519 
520   keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
521   DBUG_RETURN((int) keycache->disk_blocks);
522 
523 err:
524   error= my_errno;
525   keycache->disk_blocks= 0;
526   keycache->blocks=  0;
527   if (keycache->block_mem)
528   {
529     my_large_free((uchar*) keycache->block_mem);
530     keycache->block_mem= NULL;
531   }
532   if (keycache->block_root)
533   {
534     my_free(keycache->block_root);
535     keycache->block_root= NULL;
536   }
537   my_errno= error;
538   keycache->can_be_used= 0;
539   DBUG_RETURN(0);
540 }
541 
542 
543 /*
544   Resize a key cache
545 
546   SYNOPSIS
547     resize_key_cache()
548     keycache     	        pointer to a key cache data structure
549     key_cache_block_size        size of blocks to keep cached data
550     use_mem			total memory to use for the new key cache
551     division_limit		new division limit (if not zero)
552     age_threshold		new age threshold (if not zero)
553 
554   RETURN VALUE
555     number of blocks in the key cache, if successful,
556     0 - otherwise.
557 
558   NOTES.
559     The function first compares the memory size and the block size parameters
560     with the key cache values.
561 
562     If they differ the function free the the memory allocated for the
563     old key cache blocks by calling the end_key_cache function and
564     then rebuilds the key cache with new blocks by calling
565     init_key_cache.
566 
567     The function starts the operation only when all other threads
568     performing operations with the key cache let her to proceed
569     (when cnt_for_resize=0).
570 */
571 
resize_key_cache(KEY_CACHE * keycache,uint key_cache_block_size,size_t use_mem,uint division_limit,uint age_threshold)572 int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
573                      size_t use_mem, uint division_limit,
574                      uint age_threshold)
575 {
576   int blocks;
577   DBUG_ENTER("resize_key_cache");
578 
579   if (!keycache->key_cache_inited)
580     DBUG_RETURN(keycache->disk_blocks);
581 
582   if(key_cache_block_size == keycache->key_cache_block_size &&
583      use_mem == keycache->key_cache_mem_size)
584   {
585     change_key_cache_param(keycache, division_limit, age_threshold);
586     DBUG_RETURN(keycache->disk_blocks);
587   }
588 
589   keycache_pthread_mutex_lock(&keycache->cache_lock);
590 
591   /*
592     We may need to wait for another thread which is doing a resize
593     already. This cannot happen in the MySQL server though. It allows
594     one resizer only. In set_var.cc keycache->in_init is used to block
595     multiple attempts.
596   */
597   while (keycache->in_resize)
598   {
599     /* purecov: begin inspected */
600     wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
601     /* purecov: end */
602   }
603 
604   /*
605     Mark the operation in progress. This blocks other threads from doing
606     a resize in parallel. It prohibits new blocks to enter the cache.
607     Read/write requests can bypass the cache during the flush phase.
608   */
609   keycache->in_resize= 1;
610 
611   /* Need to flush only if keycache is enabled. */
612   if (keycache->can_be_used)
613   {
614     /* Start the flush phase. */
615     keycache->resize_in_flush= 1;
616 
617     if (flush_all_key_blocks(keycache))
618     {
619       /* TODO: if this happens, we should write a warning in the log file ! */
620       keycache->resize_in_flush= 0;
621       blocks= 0;
622       keycache->can_be_used= 0;
623       goto finish;
624     }
625     DBUG_ASSERT(cache_empty(keycache));
626 
627     /* End the flush phase. */
628     keycache->resize_in_flush= 0;
629   }
630 
631   /*
632     Some direct read/write operations (bypassing the cache) may still be
633     unfinished. Wait until they are done. If the key cache can be used,
634     direct I/O is done in increments of key_cache_block_size. That is,
635     every block is checked if it is in the cache. We need to wait for
636     pending I/O before re-initializing the cache, because we may change
637     the block size. Otherwise they could check for blocks at file
638     positions where the new block division has none. We do also want to
639     wait for I/O done when (if) the cache was disabled. It must not
640     run in parallel with normal cache operation.
641   */
642   while (keycache->cnt_for_resize_op)
643     wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
644 
645   /*
646     Free old cache structures, allocate new structures, and initialize
647     them. Note that the cache_lock mutex and the resize_queue are left
648     untouched. We do not lose the cache_lock and will release it only at
649     the end of this function.
650   */
651   end_key_cache(keycache, 0);			/* Don't free mutex */
652   /* The following will work even if use_mem is 0 */
653   blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
654 			 division_limit, age_threshold);
655 
656 finish:
657   /*
658     Mark the resize finished. This allows other threads to start a
659     resize or to request new cache blocks.
660   */
661   keycache->in_resize= 0;
662 
663   /* Signal waiting threads. */
664   release_whole_queue(&keycache->resize_queue);
665 
666   keycache_pthread_mutex_unlock(&keycache->cache_lock);
667   DBUG_RETURN(blocks);
668 }
669 
670 
671 /*
672   Increment counter blocking resize key cache operation
673 */
inc_counter_for_resize_op(KEY_CACHE * keycache)674 static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
675 {
676   keycache->cnt_for_resize_op++;
677 }
678 
679 
680 /*
681   Decrement counter blocking resize key cache operation;
682   Signal the operation to proceed when counter becomes equal zero
683 */
dec_counter_for_resize_op(KEY_CACHE * keycache)684 static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
685 {
686   if (!--keycache->cnt_for_resize_op)
687     release_whole_queue(&keycache->waiting_for_resize_cnt);
688 }
689 
690 /*
691   Change the key cache parameters
692 
693   SYNOPSIS
694     change_key_cache_param()
695     keycache			pointer to a key cache data structure
696     division_limit		new division limit (if not zero)
697     age_threshold		new age threshold (if not zero)
698 
699   RETURN VALUE
700     none
701 
702   NOTES.
703     Presently the function resets the key cache parameters
704     concerning midpoint insertion strategy - division_limit and
705     age_threshold.
706 */
707 
change_key_cache_param(KEY_CACHE * keycache,uint division_limit,uint age_threshold)708 void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
709 			    uint age_threshold)
710 {
711   DBUG_ENTER("change_key_cache_param");
712 
713   keycache_pthread_mutex_lock(&keycache->cache_lock);
714   if (division_limit)
715     keycache->min_warm_blocks= (keycache->disk_blocks *
716 				division_limit / 100 + 1);
717   if (age_threshold)
718     keycache->age_threshold=   (keycache->disk_blocks *
719 				age_threshold / 100);
720   keycache_pthread_mutex_unlock(&keycache->cache_lock);
721   DBUG_VOID_RETURN;
722 }
723 
724 
725 /*
726   Remove key_cache from memory
727 
728   SYNOPSIS
729     end_key_cache()
730     keycache		key cache handle
731     cleanup		Complete free (Free also mutex for key cache)
732 
733   RETURN VALUE
734     none
735 */
736 
end_key_cache(KEY_CACHE * keycache,my_bool cleanup)737 void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
738 {
739   DBUG_ENTER("end_key_cache");
740   DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) keycache));
741 
742   if (!keycache->key_cache_inited)
743     DBUG_VOID_RETURN;
744 
745   if (keycache->disk_blocks > 0)
746   {
747     if (keycache->block_mem)
748     {
749       my_large_free((uchar*) keycache->block_mem);
750       keycache->block_mem= NULL;
751       my_free(keycache->block_root);
752       keycache->block_root= NULL;
753     }
754     keycache->disk_blocks= -1;
755     /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
756     keycache->blocks_changed= 0;
757   }
758 
759   DBUG_PRINT("status", ("used: %lu  changed: %lu  w_requests: %lu  "
760                         "writes: %lu  r_requests: %lu  reads: %lu",
761                         keycache->blocks_used, keycache->global_blocks_changed,
762                         (ulong) keycache->global_cache_w_requests,
763                         (ulong) keycache->global_cache_write,
764                         (ulong) keycache->global_cache_r_requests,
765                         (ulong) keycache->global_cache_read));
766 
767   /*
768     Reset these values to be able to detect a disabled key cache.
769     See Bug#44068 (RESTORE can disable the MyISAM Key Cache).
770   */
771   keycache->blocks_used= 0;
772   keycache->blocks_unused= 0;
773 
774   if (cleanup)
775   {
776     mysql_mutex_destroy(&keycache->cache_lock);
777     keycache->key_cache_inited= keycache->can_be_used= 0;
778     KEYCACHE_DEBUG_CLOSE;
779   }
780   DBUG_VOID_RETURN;
781 } /* end_key_cache */
782 
783 
784 /*
785   Link a thread into double-linked queue of waiting threads.
786 
787   SYNOPSIS
788     link_into_queue()
789       wqueue              pointer to the queue structure
790       thread              pointer to the thread to be added to the queue
791 
792   RETURN VALUE
793     none
794 
795   NOTES.
796     Queue is represented by a circular list of the thread structures
797     The list is double-linked of the type (**prev,*next), accessed by
798     a pointer to the last element.
799 */
800 
link_into_queue(KEYCACHE_WQUEUE * wqueue,struct st_my_thread_var * thread)801 static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
802                                    struct st_my_thread_var *thread)
803 {
804   struct st_my_thread_var *last;
805 
806   DBUG_ASSERT(!thread->next && !thread->prev);
807   if (! (last= wqueue->last_thread))
808   {
809     /* Queue is empty */
810     thread->next= thread;
811     thread->prev= &thread->next;
812   }
813   else
814   {
815     thread->prev= last->next->prev;
816     last->next->prev= &thread->next;
817     thread->next= last->next;
818     last->next= thread;
819   }
820   wqueue->last_thread= thread;
821 }
822 
823 /*
824   Unlink a thread from double-linked queue of waiting threads
825 
826   SYNOPSIS
827     unlink_from_queue()
828       wqueue              pointer to the queue structure
829       thread              pointer to the thread to be removed from the queue
830 
831   RETURN VALUE
832     none
833 
834   NOTES.
835     See NOTES for link_into_queue
836 */
837 
unlink_from_queue(KEYCACHE_WQUEUE * wqueue,struct st_my_thread_var * thread)838 static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
839                                      struct st_my_thread_var *thread)
840 {
841   KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id));
842   DBUG_ASSERT(thread->next && thread->prev);
843   if (thread->next == thread)
844     /* The queue contains only one member */
845     wqueue->last_thread= NULL;
846   else
847   {
848     thread->next->prev= thread->prev;
849     *thread->prev=thread->next;
850     if (wqueue->last_thread == thread)
851       wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
852                                       thread->prev);
853   }
854   thread->next= NULL;
855 #if !defined(DBUG_OFF)
856   /*
857     This makes it easier to see it's not in a chain during debugging.
858     And some DBUG_ASSERT() rely on it.
859   */
860   thread->prev= NULL;
861 #endif
862 }
863 
864 
865 /*
866   Add a thread to single-linked queue of waiting threads
867 
868   SYNOPSIS
869     wait_on_queue()
870       wqueue            Pointer to the queue structure.
871       mutex             Cache_lock to acquire after awake.
872 
873   RETURN VALUE
874     none
875 
876   NOTES.
877     Queue is represented by a circular list of the thread structures
878     The list is single-linked of the type (*next), accessed by a pointer
879     to the last element.
880 
881     The function protects against stray signals by verifying that the
882     current thread is unlinked from the queue when awaking. However,
883     since several threads can wait for the same event, it might be
884     necessary for the caller of the function to check again if the
885     condition for awake is indeed matched.
886 */
887 
wait_on_queue(KEYCACHE_WQUEUE * wqueue,mysql_mutex_t * mutex)888 static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
889                           mysql_mutex_t *mutex)
890 {
891   struct st_my_thread_var *last;
892   struct st_my_thread_var *thread= my_thread_var;
893 
894   /* Add to queue. */
895   DBUG_ASSERT(!thread->next);
896   DBUG_ASSERT(!thread->prev); /* Not required, but must be true anyway. */
897   if (! (last= wqueue->last_thread))
898     thread->next= thread;
899   else
900   {
901     thread->next= last->next;
902     last->next= thread;
903   }
904   wqueue->last_thread= thread;
905 
906   /*
907     Wait until thread is removed from queue by the signalling thread.
908     The loop protects against stray signals.
909   */
910   do
911   {
912     KEYCACHE_DBUG_PRINT("wait", ("suspend thread %ld", thread->id));
913     keycache_pthread_cond_wait(&thread->suspend, mutex);
914   }
915   while (thread->next);
916 }
917 
918 
919 /*
920   Remove all threads from queue signaling them to proceed
921 
922   SYNOPSIS
923     release_whole_queue()
924       wqueue            pointer to the queue structure
925 
926   RETURN VALUE
927     none
928 
929   NOTES.
930     See notes for wait_on_queue().
931     When removed from the queue each thread is signaled via condition
932     variable thread->suspend.
933 */
934 
release_whole_queue(KEYCACHE_WQUEUE * wqueue)935 static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
936 {
937   struct st_my_thread_var *last;
938   struct st_my_thread_var *next;
939   struct st_my_thread_var *thread;
940 
941   /* Queue may be empty. */
942   if (!(last= wqueue->last_thread))
943     return;
944 
945   next= last->next;
946   do
947   {
948     thread=next;
949     KEYCACHE_DBUG_PRINT("release_whole_queue: signal",
950                         ("thread %ld", thread->id));
951     /* Signal the thread. */
952     keycache_pthread_cond_signal(&thread->suspend);
953     /* Take thread from queue. */
954     next=thread->next;
955     thread->next= NULL;
956   }
957   while (thread != last);
958 
959   /* Now queue is definitely empty. */
960   wqueue->last_thread= NULL;
961 }
962 
963 
964 /*
965   Unlink a block from the chain of dirty/clean blocks
966 */
967 
unlink_changed(BLOCK_LINK * block)968 static inline void unlink_changed(BLOCK_LINK *block)
969 {
970   DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
971   if (block->next_changed)
972     block->next_changed->prev_changed= block->prev_changed;
973   *block->prev_changed= block->next_changed;
974 
975 #if !defined(DBUG_OFF)
976   /*
977     This makes it easier to see it's not in a chain during debugging.
978     And some DBUG_ASSERT() rely on it.
979   */
980   block->next_changed= NULL;
981   block->prev_changed= NULL;
982 #endif
983 }
984 
985 
986 /*
987   Link a block into the chain of dirty/clean blocks
988 */
989 
link_changed(BLOCK_LINK * block,BLOCK_LINK ** phead)990 static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
991 {
992   DBUG_ASSERT(!block->next_changed);
993   DBUG_ASSERT(!block->prev_changed);
994   block->prev_changed= phead;
995   if ((block->next_changed= *phead))
996     (*phead)->prev_changed= &block->next_changed;
997   *phead= block;
998 }
999 
1000 
1001 /*
1002   Link a block in a chain of clean blocks of a file.
1003 
1004   SYNOPSIS
1005     link_to_file_list()
1006       keycache		Key cache handle
1007       block             Block to relink
1008       file              File to be linked to
1009       unlink            If to unlink first
1010 
1011   DESCRIPTION
1012     Unlink a block from whichever chain it is linked in, if it's
1013     asked for, and link it to the chain of clean blocks of the
1014     specified file.
1015 
1016   NOTE
1017     Please do never set/clear BLOCK_CHANGED outside of
1018     link_to_file_list() or link_to_changed_list().
1019     You would risk to damage correct counting of changed blocks
1020     and to find blocks in the wrong hash.
1021 
1022   RETURN
1023     void
1024 */
1025 
link_to_file_list(KEY_CACHE * keycache,BLOCK_LINK * block,int file,my_bool unlink_block)1026 static void link_to_file_list(KEY_CACHE *keycache,
1027                               BLOCK_LINK *block, int file,
1028                               my_bool unlink_block)
1029 {
1030   DBUG_ASSERT(block->status & BLOCK_IN_USE);
1031   DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1032   DBUG_ASSERT(block->hash_link->file == file);
1033   if (unlink_block)
1034     unlink_changed(block);
1035   link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
1036   if (block->status & BLOCK_CHANGED)
1037   {
1038     block->status&= ~BLOCK_CHANGED;
1039     keycache->blocks_changed--;
1040     keycache->global_blocks_changed--;
1041   }
1042 }
1043 
1044 
1045 /*
1046   Re-link a block from the clean chain to the dirty chain of a file.
1047 
1048   SYNOPSIS
1049     link_to_changed_list()
1050       keycache		key cache handle
1051       block             block to relink
1052 
1053   DESCRIPTION
1054     Unlink a block from the chain of clean blocks of a file
1055     and link it to the chain of dirty blocks of the same file.
1056 
1057   NOTE
1058     Please do never set/clear BLOCK_CHANGED outside of
1059     link_to_file_list() or link_to_changed_list().
1060     You would risk to damage correct counting of changed blocks
1061     and to find blocks in the wrong hash.
1062 
1063   RETURN
1064     void
1065 */
1066 
link_to_changed_list(KEY_CACHE * keycache,BLOCK_LINK * block)1067 static void link_to_changed_list(KEY_CACHE *keycache,
1068                                  BLOCK_LINK *block)
1069 {
1070   DBUG_ASSERT(block->status & BLOCK_IN_USE);
1071   DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
1072   DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1073 
1074   unlink_changed(block);
1075   link_changed(block,
1076                &keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
1077   block->status|=BLOCK_CHANGED;
1078   keycache->blocks_changed++;
1079   keycache->global_blocks_changed++;
1080 }
1081 
1082 
1083 /*
1084   Link a block to the LRU chain at the beginning or at the end of
1085   one of two parts.
1086 
1087   SYNOPSIS
1088     link_block()
1089       keycache            pointer to a key cache data structure
1090       block               pointer to the block to link to the LRU chain
1091       hot                 <-> to link the block into the hot subchain
1092       at_end              <-> to link the block at the end of the subchain
1093 
1094   RETURN VALUE
1095     none
1096 
1097   NOTES.
1098     The LRU ring is represented by a circular list of block structures.
1099     The list is double-linked of the type (**prev,*next) type.
1100     The LRU ring is divided into two parts - hot and warm.
1101     There are two pointers to access the last blocks of these two
1102     parts. The beginning of the warm part follows right after the
1103     end of the hot part.
1104     Only blocks of the warm part can be used for eviction.
1105     The first block from the beginning of this subchain is always
1106     taken for eviction (keycache->last_used->next)
1107 
1108     LRU chain:       +------+   H O T    +------+
1109                 +----| end  |----...<----| beg  |----+
1110                 |    +------+last        +------+    |
1111                 v<-link in latest hot (new end)      |
1112                 |     link in latest warm (new end)->^
1113                 |    +------+  W A R M   +------+    |
1114                 +----| beg  |---->...----| end  |----+
1115                      +------+            +------+ins
1116                   first for eviction
1117 
1118     It is also possible that the block is selected for eviction and thus
1119     not linked in the LRU ring.
1120 */
1121 
link_block(KEY_CACHE * keycache,BLOCK_LINK * block,my_bool hot,my_bool at_end)1122 static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, my_bool hot,
1123                        my_bool at_end)
1124 {
1125   BLOCK_LINK *ins;
1126   BLOCK_LINK **pins;
1127 
1128   DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1129   DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1130   DBUG_ASSERT(!block->requests);
1131   DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1132   DBUG_ASSERT(!block->next_used);
1133   DBUG_ASSERT(!block->prev_used);
1134 
1135   if (!hot && keycache->waiting_for_block.last_thread)
1136   {
1137     /* Signal that in the LRU warm sub-chain an available block has appeared */
1138     struct st_my_thread_var *last_thread=
1139                                keycache->waiting_for_block.last_thread;
1140     struct st_my_thread_var *first_thread= last_thread->next;
1141     struct st_my_thread_var *next_thread= first_thread;
1142     HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
1143     struct st_my_thread_var *thread;
1144     do
1145     {
1146       thread= next_thread;
1147       next_thread= thread->next;
1148       /*
1149          We notify about the event all threads that ask
1150          for the same page as the first thread in the queue
1151       */
1152       if ((HASH_LINK *) thread->opt_info == hash_link)
1153       {
1154         KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id));
1155         keycache_pthread_cond_signal(&thread->suspend);
1156         unlink_from_queue(&keycache->waiting_for_block, thread);
1157         block->requests++;
1158       }
1159     }
1160     while (thread != last_thread);
1161     hash_link->block= block;
1162     /*
1163       NOTE: We assigned the block to the hash_link and signalled the
1164       requesting thread(s). But it is possible that other threads runs
1165       first. These threads see the hash_link assigned to a block which
1166       is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
1167       This can be a problem for functions that do not select the block
1168       via its hash_link: flush and free. They do only see a block which
1169       is in a "normal" state and don't know that it will be evicted soon.
1170 
1171       We cannot set BLOCK_IN_SWITCH here because only one of the
1172       requesting threads must handle the eviction. All others must wait
1173       for it to complete. If we set the flag here, the threads would not
1174       know who is in charge of the eviction. Without the flag, the first
1175       thread takes the stick and sets the flag.
1176 
1177       But we need to note in the block that is has been selected for
1178       eviction. It must not be freed. The evicting thread will not
1179       expect the block in the free list. Before freeing we could also
1180       check if block->requests > 1. But I think including another flag
1181       in the check of block->status is slightly more efficient and
1182       probably easier to read.
1183     */
1184     block->status|= BLOCK_IN_EVICTION;
1185     KEYCACHE_THREAD_TRACE("link_block: after signaling");
1186 #if defined(KEYCACHE_DEBUG)
1187     KEYCACHE_DBUG_PRINT("link_block",
1188         ("linked,unlinked block %u  status=%x  #requests=%u  #available=%u",
1189          BLOCK_NUMBER(block), block->status,
1190          block->requests, keycache->blocks_available));
1191 #endif
1192     return;
1193   }
1194 
1195   pins= hot ? &keycache->used_ins : &keycache->used_last;
1196   ins= *pins;
1197   if (ins)
1198   {
1199     ins->next_used->prev_used= &block->next_used;
1200     block->next_used= ins->next_used;
1201     block->prev_used= &ins->next_used;
1202     ins->next_used= block;
1203     if (at_end)
1204       *pins= block;
1205   }
1206   else
1207   {
1208     /* The LRU ring is empty. Let the block point to itself. */
1209     keycache->used_last= keycache->used_ins= block->next_used= block;
1210     block->prev_used= &block->next_used;
1211   }
1212   KEYCACHE_THREAD_TRACE("link_block");
1213 #if defined(KEYCACHE_DEBUG)
1214   keycache->blocks_available++;
1215   KEYCACHE_DBUG_PRINT("link_block",
1216       ("linked block %u:%1u  status=%x  #requests=%u  #available=%u",
1217        BLOCK_NUMBER(block), at_end, block->status,
1218        block->requests, keycache->blocks_available));
1219   KEYCACHE_DBUG_ASSERT((ulong) keycache->blocks_available <=
1220                        keycache->blocks_used);
1221 #endif
1222 }
1223 
1224 
1225 /*
1226   Unlink a block from the LRU chain
1227 
1228   SYNOPSIS
1229     unlink_block()
1230       keycache            pointer to a key cache data structure
1231       block               pointer to the block to unlink from the LRU chain
1232 
1233   RETURN VALUE
1234     none
1235 
1236   NOTES.
1237     See NOTES for link_block
1238 */
1239 
unlink_block(KEY_CACHE * keycache,BLOCK_LINK * block)1240 static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1241 {
1242   DBUG_ASSERT((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1243   DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1244   DBUG_ASSERT(!block->requests);
1245   DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1246   DBUG_ASSERT(block->next_used && block->prev_used &&
1247               (block->next_used->prev_used == &block->next_used) &&
1248               (*block->prev_used == block));
1249   if (block->next_used == block)
1250     /* The list contains only one member */
1251     keycache->used_last= keycache->used_ins= NULL;
1252   else
1253   {
1254     block->next_used->prev_used= block->prev_used;
1255     *block->prev_used= block->next_used;
1256     if (keycache->used_last == block)
1257       keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1258     if (keycache->used_ins == block)
1259       keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1260   }
1261   block->next_used= NULL;
1262 #if !defined(DBUG_OFF)
1263   /*
1264     This makes it easier to see it's not in a chain during debugging.
1265     And some DBUG_ASSERT() rely on it.
1266   */
1267   block->prev_used= NULL;
1268 #endif
1269 
1270   KEYCACHE_THREAD_TRACE("unlink_block");
1271 #if defined(KEYCACHE_DEBUG)
1272   KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
1273   keycache->blocks_available--;
1274   KEYCACHE_DBUG_PRINT("unlink_block",
1275     ("unlinked block %u  status=%x   #requests=%u  #available=%u",
1276      BLOCK_NUMBER(block), block->status,
1277      block->requests, keycache->blocks_available));
1278 #endif
1279 }
1280 
1281 
1282 /*
1283   Register requests for a block.
1284 
1285   SYNOPSIS
1286     reg_requests()
1287       keycache          Pointer to a key cache data structure.
1288       block             Pointer to the block to register a request on.
1289       count             Number of requests. Always 1.
1290 
1291   NOTE
1292     The first request unlinks the block from the LRU ring. This means
1293     that it is protected against eveiction.
1294 
1295   RETURN
1296     void
1297 */
reg_requests(KEY_CACHE * keycache,BLOCK_LINK * block,int count)1298 static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1299 {
1300   DBUG_ASSERT(block->status & BLOCK_IN_USE);
1301   DBUG_ASSERT(block->hash_link);
1302 
1303   if (!block->requests)
1304     unlink_block(keycache, block);
1305   block->requests+=count;
1306 }
1307 
1308 
1309 /*
1310   Unregister request for a block
1311   linking it to the LRU chain if it's the last request
1312 
1313   SYNOPSIS
1314     unreg_request()
1315     keycache            pointer to a key cache data structure
1316     block               pointer to the block to link to the LRU chain
1317     at_end              <-> to link the block at the end of the LRU chain
1318 
1319   RETURN VALUE
1320     none
1321 
1322   NOTES.
1323     Every linking to the LRU ring decrements by one a special block
1324     counter (if it's positive). If the at_end parameter is TRUE the block is
1325     added either at the end of warm sub-chain or at the end of hot sub-chain.
1326     It is added to the hot subchain if its counter is zero and number of
1327     blocks in warm sub-chain is not less than some low limit (determined by
1328     the division_limit parameter). Otherwise the block is added to the warm
1329     sub-chain. If the at_end parameter is FALSE the block is always added
1330     at beginning of the warm sub-chain.
1331     Thus a warm block can be promoted to the hot sub-chain when its counter
1332     becomes zero for the first time.
1333     At the same time  the block at the very beginning of the hot subchain
1334     might be moved to the beginning of the warm subchain if it stays untouched
1335     for a too long time (this time is determined by parameter age_threshold).
1336 
1337     It is also possible that the block is selected for eviction and thus
1338     not linked in the LRU ring.
1339 */
1340 
unreg_request(KEY_CACHE * keycache,BLOCK_LINK * block,int at_end)1341 static void unreg_request(KEY_CACHE *keycache,
1342                           BLOCK_LINK *block, int at_end)
1343 {
1344   DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1345   DBUG_ASSERT(block->hash_link); /*backptr to block NULL from free_block()*/
1346   DBUG_ASSERT(block->requests);
1347   DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1348   DBUG_ASSERT(!block->next_used);
1349   DBUG_ASSERT(!block->prev_used);
1350   /*
1351     Unregister the request, but do not link erroneous blocks into the
1352     LRU ring.
1353   */
1354   if (!--block->requests && !(block->status & BLOCK_ERROR))
1355   {
1356     my_bool hot;
1357     if (block->hits_left)
1358       block->hits_left--;
1359     hot= !block->hits_left && at_end &&
1360       keycache->warm_blocks > keycache->min_warm_blocks;
1361     if (hot)
1362     {
1363       if (block->temperature == BLOCK_WARM)
1364         keycache->warm_blocks--;
1365       block->temperature= BLOCK_HOT;
1366       KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1367                            keycache->warm_blocks));
1368     }
1369     link_block(keycache, block, hot, (my_bool)at_end);
1370     block->last_hit_time= keycache->keycache_time;
1371     keycache->keycache_time++;
1372     /*
1373       At this place, the block might be in the LRU ring or not. If an
1374       evicter was waiting for a block, it was selected for eviction and
1375       not linked in the LRU ring.
1376     */
1377 
1378     /*
1379       Check if we should link a hot block to the warm block sub-chain.
1380       It is possible that we select the same block as above. But it can
1381       also be another block. In any case a block from the LRU ring is
1382       selected. In other words it works even if the above block was
1383       selected for eviction and not linked in the LRU ring. Since this
1384       happens only if the LRU ring is empty, the block selected below
1385       would be NULL and the rest of the function skipped.
1386     */
1387     block= keycache->used_ins;
1388     if (block && keycache->keycache_time - block->last_hit_time >
1389 	keycache->age_threshold)
1390     {
1391       unlink_block(keycache, block);
1392       link_block(keycache, block, 0, 0);
1393       if (block->temperature != BLOCK_WARM)
1394       {
1395         keycache->warm_blocks++;
1396         block->temperature= BLOCK_WARM;
1397       }
1398       KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu",
1399                            keycache->warm_blocks));
1400     }
1401   }
1402 }
1403 
1404 /*
1405   Remove a reader of the page in block
1406 */
1407 
remove_reader(BLOCK_LINK * block)1408 static void remove_reader(BLOCK_LINK *block)
1409 {
1410   DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1411   DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
1412   DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1413   DBUG_ASSERT(!block->next_used);
1414   DBUG_ASSERT(!block->prev_used);
1415   DBUG_ASSERT(block->hash_link->requests);
1416 
1417   if (! --block->hash_link->requests && block->condvar)
1418     keycache_pthread_cond_signal(block->condvar);
1419 }
1420 
1421 
1422 /*
1423   Wait until the last reader of the page in block
1424   signals on its termination
1425 */
1426 
wait_for_readers(KEY_CACHE * keycache,BLOCK_LINK * block)1427 static void wait_for_readers(KEY_CACHE *keycache,
1428                              BLOCK_LINK *block)
1429 {
1430   struct st_my_thread_var *thread= my_thread_var;
1431   DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1432   DBUG_ASSERT(!(block->status & (BLOCK_IN_FLUSH | BLOCK_CHANGED)));
1433   DBUG_ASSERT(block->hash_link);
1434   DBUG_ASSERT(block->hash_link->block == block);
1435   /* Linked in file_blocks or changed_blocks hash. */
1436   DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
1437   /* Not linked in LRU ring. */
1438   DBUG_ASSERT(!block->next_used);
1439   DBUG_ASSERT(!block->prev_used);
1440   while (block->hash_link->requests)
1441   {
1442     KEYCACHE_DBUG_PRINT("wait_for_readers: wait",
1443                         ("suspend thread %ld  block %u",
1444                          thread->id, BLOCK_NUMBER(block)));
1445     /* There must be no other waiter. We have no queue here. */
1446     DBUG_ASSERT(!block->condvar);
1447     block->condvar= &thread->suspend;
1448     keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1449     block->condvar= NULL;
1450   }
1451 }
1452 
1453 
1454 /*
1455   Add a hash link to a bucket in the hash_table
1456 */
1457 
link_hash(HASH_LINK ** start,HASH_LINK * hash_link)1458 static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
1459 {
1460   if (*start)
1461     (*start)->prev= &hash_link->next;
1462   hash_link->next= *start;
1463   hash_link->prev= start;
1464   *start= hash_link;
1465 }
1466 
1467 
1468 /*
1469   Remove a hash link from the hash table
1470 */
1471 
unlink_hash(KEY_CACHE * keycache,HASH_LINK * hash_link)1472 static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1473 {
1474   KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u  pos_ %lu  #requests=%u",
1475       (uint) hash_link->file,(ulong) hash_link->diskpos, hash_link->requests));
1476   KEYCACHE_DBUG_ASSERT(hash_link->requests == 0);
1477   if ((*hash_link->prev= hash_link->next))
1478     hash_link->next->prev= hash_link->prev;
1479   hash_link->block= NULL;
1480 
1481   if (keycache->waiting_for_hash_link.last_thread)
1482   {
1483     /* Signal that a free hash link has appeared */
1484     struct st_my_thread_var *last_thread=
1485                                keycache->waiting_for_hash_link.last_thread;
1486     struct st_my_thread_var *first_thread= last_thread->next;
1487     struct st_my_thread_var *next_thread= first_thread;
1488     KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1489     struct st_my_thread_var *thread;
1490 
1491     hash_link->file= first_page->file;
1492     hash_link->diskpos= first_page->filepos;
1493     do
1494     {
1495       KEYCACHE_PAGE *page;
1496       thread= next_thread;
1497       page= (KEYCACHE_PAGE *) thread->opt_info;
1498       next_thread= thread->next;
1499       /*
1500          We notify about the event all threads that ask
1501          for the same page as the first thread in the queue
1502       */
1503       if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1504       {
1505         KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
1506         keycache_pthread_cond_signal(&thread->suspend);
1507         unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1508       }
1509     }
1510     while (thread != last_thread);
1511     link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1512 					         hash_link->diskpos)],
1513               hash_link);
1514     return;
1515   }
1516   hash_link->next= keycache->free_hash_list;
1517   keycache->free_hash_list= hash_link;
1518 }
1519 
1520 
1521 /*
1522   Get the hash link for a page
1523 */
1524 
get_hash_link(KEY_CACHE * keycache,int file,my_off_t filepos)1525 static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1526                                 int file, my_off_t filepos)
1527 {
1528   HASH_LINK *hash_link, **start;
1529 #if defined(KEYCACHE_DEBUG)
1530   int cnt;
1531 #endif
1532 
1533   KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u  pos: %lu",
1534                       (uint) file,(ulong) filepos));
1535 
1536 restart:
1537   /*
1538      Find the bucket in the hash table for the pair (file, filepos);
1539      start contains the head of the bucket list,
1540      hash_link points to the first member of the list
1541   */
1542   hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1543 #if defined(KEYCACHE_DEBUG)
1544   cnt= 0;
1545 #endif
1546   /* Look for an element for the pair (file, filepos) in the bucket chain */
1547   while (hash_link &&
1548          (hash_link->diskpos != filepos || hash_link->file != file))
1549   {
1550     hash_link= hash_link->next;
1551 #if defined(KEYCACHE_DEBUG)
1552     cnt++;
1553     if (! (cnt <= keycache->hash_links_used))
1554     {
1555       int i;
1556       for (i=0, hash_link= *start ;
1557            i < cnt ; i++, hash_link= hash_link->next)
1558       {
1559         KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u  pos: %lu",
1560             (uint) hash_link->file,(ulong) hash_link->diskpos));
1561       }
1562     }
1563     KEYCACHE_DBUG_ASSERT(cnt <= keycache->hash_links_used);
1564 #endif
1565   }
1566   if (! hash_link)
1567   {
1568     /* There is no hash link in the hash table for the pair (file, filepos) */
1569     if (keycache->free_hash_list)
1570     {
1571       hash_link= keycache->free_hash_list;
1572       keycache->free_hash_list= hash_link->next;
1573     }
1574     else if (keycache->hash_links_used < keycache->hash_links)
1575     {
1576       hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1577     }
1578     else
1579     {
1580       /* Wait for a free hash link */
1581       struct st_my_thread_var *thread= my_thread_var;
1582       KEYCACHE_PAGE page;
1583       KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting"));
1584       page.file= file;
1585       page.filepos= filepos;
1586       thread->opt_info= (void *) &page;
1587       link_into_queue(&keycache->waiting_for_hash_link, thread);
1588       KEYCACHE_DBUG_PRINT("get_hash_link: wait",
1589                         ("suspend thread %ld", thread->id));
1590       keycache_pthread_cond_wait(&thread->suspend,
1591                                  &keycache->cache_lock);
1592       thread->opt_info= NULL;
1593       goto restart;
1594     }
1595     hash_link->file= file;
1596     hash_link->diskpos= filepos;
1597     link_hash(start, hash_link);
1598   }
1599   /* Register the request for the page */
1600   hash_link->requests++;
1601 
1602   return hash_link;
1603 }
1604 
1605 
1606 /*
1607   Get a block for the file page requested by a keycache read/write operation;
1608   If the page is not in the cache return a free block, if there is none
1609   return the lru block after saving its buffer if the page is dirty.
1610 
1611   SYNOPSIS
1612 
1613     find_key_block()
1614       keycache            pointer to a key cache data structure
1615       file                handler for the file to read page from
1616       filepos             position of the page in the file
1617       init_hits_left      how initialize the block counter for the page
1618       wrmode              <-> get for writing
1619       page_st        out  {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1620 
1621   RETURN VALUE
1622     Pointer to the found block if successful, 0 - otherwise
1623 
1624   NOTES.
1625     For the page from file positioned at filepos the function checks whether
1626     the page is in the key cache specified by the first parameter.
1627     If this is the case it immediately returns the block.
1628     If not, the function first chooses  a block for this page. If there is
1629     no not used blocks in the key cache yet, the function takes the block
1630     at the very beginning of the warm sub-chain. It saves the page in that
1631     block if it's dirty before returning the pointer to it.
1632     The function returns in the page_st parameter the following values:
1633       PAGE_READ         - if page already in the block,
1634       PAGE_TO_BE_READ   - if it is to be read yet by the current thread
1635       WAIT_TO_BE_READ   - if it is to be read by another thread
1636     If an error occurs THE BLOCK_ERROR bit is set in the block status.
1637     It might happen that there are no blocks in LRU chain (in warm part) -
1638     all blocks  are unlinked for some read/write operations. Then the function
1639     waits until first of this operations links any block back.
1640 */
1641 
find_key_block(KEY_CACHE * keycache,File file,my_off_t filepos,int init_hits_left,int wrmode,int * page_st)1642 static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1643                                   File file, my_off_t filepos,
1644                                   int init_hits_left,
1645                                   int wrmode, int *page_st)
1646 {
1647   HASH_LINK *hash_link;
1648   BLOCK_LINK *block;
1649   int error= 0;
1650   int page_status;
1651 
1652   DBUG_ENTER("find_key_block");
1653   KEYCACHE_THREAD_TRACE("find_key_block:begin");
1654   DBUG_PRINT("enter", ("fd: %d  pos: %lu  wrmode: %d",
1655                        file, (ulong) filepos, wrmode));
1656   KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %d  pos: %lu  wrmode: %d",
1657                                          file, (ulong) filepos,
1658                                          wrmode));
1659 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
1660   DBUG_EXECUTE("check_keycache2",
1661                test_key_cache(keycache, "start of find_key_block", 0););
1662 #endif
1663 
1664 restart:
1665   /*
1666     If the flush phase of a resize operation fails, the cache is left
1667     unusable. This will be detected only after "goto restart".
1668   */
1669   if (!keycache->can_be_used)
1670     DBUG_RETURN(0);
1671 
1672   /*
1673     Find the hash_link for the requested file block (file, filepos). We
1674     do always get a hash_link here. It has registered our request so
1675     that no other thread can use it for another file block until we
1676     release the request (which is done by remove_reader() usually). The
1677     hash_link can have a block assigned to it or not. If there is a
1678     block, it may be assigned to this hash_link or not. In cases where a
1679     block is evicted from the cache, it is taken from the LRU ring and
1680     referenced by the new hash_link. But the block can still be assigned
1681     to its old hash_link for some time if it needs to be flushed first,
1682     or if there are other threads still reading it.
1683 
1684     Summary:
1685       hash_link is always returned.
1686       hash_link->block can be:
1687       - NULL or
1688       - not assigned to this hash_link or
1689       - assigned to this hash_link. If assigned, the block can have
1690         - invalid data (when freshly assigned) or
1691         - valid data. Valid data can be
1692           - changed over the file contents (dirty) or
1693           - not changed (clean).
1694   */
1695   hash_link= get_hash_link(keycache, file, filepos);
1696   DBUG_ASSERT((hash_link->file == file) && (hash_link->diskpos == filepos));
1697 
1698   page_status= -1;
1699   if ((block= hash_link->block) &&
1700       block->hash_link == hash_link && (block->status & BLOCK_READ))
1701   {
1702     /* Assigned block with valid (changed or unchanged) contents. */
1703     page_status= PAGE_READ;
1704   }
1705   /*
1706     else (page_status == -1)
1707       - block == NULL or
1708       - block not assigned to this hash_link or
1709       - block assigned but not yet read from file (invalid data).
1710   */
1711 
1712   if (keycache->in_resize)
1713   {
1714     /* This is a request during a resize operation */
1715 
1716     if (!block)
1717     {
1718       struct st_my_thread_var *thread;
1719 
1720       /*
1721         The file block is not in the cache. We don't need it in the
1722         cache: we are going to read or write directly to file. Cancel
1723         the request. We can simply decrement hash_link->requests because
1724         we did not release cache_lock since increasing it. So no other
1725         thread can wait for our request to become released.
1726       */
1727       if (hash_link->requests == 1)
1728       {
1729         /*
1730           We are the only one to request this hash_link (this file/pos).
1731           Free the hash_link.
1732         */
1733         hash_link->requests--;
1734         unlink_hash(keycache, hash_link);
1735         DBUG_RETURN(0);
1736       }
1737 
1738       /*
1739         More requests on the hash_link. Someone tries to evict a block
1740         for this hash_link (could have started before resizing started).
1741         This means that the LRU ring is empty. Otherwise a block could
1742         be assigned immediately. Behave like a thread that wants to
1743         evict a block for this file/pos. Add to the queue of threads
1744         waiting for a block. Wait until there is one assigned.
1745 
1746         Refresh the request on the hash-link so that it cannot be reused
1747         for another file/pos.
1748       */
1749       thread= my_thread_var;
1750       thread->opt_info= (void *) hash_link;
1751       link_into_queue(&keycache->waiting_for_block, thread);
1752       do
1753       {
1754         KEYCACHE_DBUG_PRINT("find_key_block: wait",
1755                             ("suspend thread %ld", thread->id));
1756         keycache_pthread_cond_wait(&thread->suspend,
1757                                    &keycache->cache_lock);
1758       } while (thread->next);
1759       thread->opt_info= NULL;
1760       /*
1761         A block should now be assigned to the hash_link. But it may
1762         still need to be evicted. Anyway, we should re-check the
1763         situation. page_status must be set correctly.
1764       */
1765       hash_link->requests--;
1766       goto restart;
1767     } /* end of if (!block) */
1768 
1769     /*
1770       There is a block for this file/pos in the cache. Register a
1771       request on it. This unlinks it from the LRU ring (if it is there)
1772       and hence protects it against eviction (if not already in
1773       eviction). We need this for returning the block to the caller, for
1774       calling remove_reader() (for debugging purposes), and for calling
1775       free_block(). The only case where we don't need the request is if
1776       the block is in eviction. In that case we have to unregister the
1777       request later.
1778     */
1779     reg_requests(keycache, block, 1);
1780 
1781     if (page_status != PAGE_READ)
1782     {
1783       /*
1784         - block not assigned to this hash_link or
1785         - block assigned but not yet read from file (invalid data).
1786 
1787         This must be a block in eviction. It will be read soon. We need
1788         to wait here until this happened. Otherwise the caller could
1789         access a wrong block or a block which is in read. While waiting
1790         we cannot lose hash_link nor block. We have registered a request
1791         on the hash_link. Everything can happen to the block but changes
1792         in the hash_link -> block relationship. In other words:
1793         everything can happen to the block but free or another completed
1794         eviction.
1795 
1796         Note that we bahave like a secondary requestor here. We just
1797         cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1798         read requests and writes on dirty blocks that are not in flush
1799         only. Waiting here on COND_FOR_REQUESTED works in all
1800         situations.
1801       */
1802       DBUG_ASSERT(((block->hash_link != hash_link) &&
1803                    (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1804                   ((block->hash_link == hash_link) &&
1805                    !(block->status & BLOCK_READ)));
1806       wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
1807       /*
1808         Here we can trust that the block has been assigned to this
1809         hash_link (block->hash_link == hash_link) and read into the
1810         buffer (BLOCK_READ). The worst things possible here are that the
1811         block is in free (BLOCK_REASSIGNED). But the block is still
1812         assigned to the hash_link. The freeing thread waits until we
1813         release our request on the hash_link. The block must not be
1814         again in eviction because we registered an request on it before
1815         starting to wait.
1816       */
1817       DBUG_ASSERT(block->hash_link == hash_link);
1818       DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1819       DBUG_ASSERT(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1820     }
1821     /*
1822       The block is in the cache. Assigned to the hash_link. Valid data.
1823       Note that in case of page_st == PAGE_READ, the block can be marked
1824       for eviction. In any case it can be marked for freeing.
1825     */
1826 
1827     if (!wrmode)
1828     {
1829       /* A reader can just read the block. */
1830       *page_st= PAGE_READ;
1831       DBUG_ASSERT((hash_link->file == file) &&
1832                   (hash_link->diskpos == filepos) &&
1833                   (block->hash_link == hash_link));
1834       DBUG_RETURN(block);
1835     }
1836 
1837     /*
1838       This is a writer. No two writers for the same block can exist.
1839       This must be assured by locks outside of the key cache.
1840     */
1841     DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1842 
1843     while (block->status & BLOCK_IN_FLUSH)
1844     {
1845       /*
1846         Wait until the block is flushed to file. Do not release the
1847         request on the hash_link yet to prevent that the block is freed
1848         or reassigned while we wait. While we wait, several things can
1849         happen to the block, including another flush. But the block
1850         cannot be reassigned to another hash_link until we release our
1851         request on it. But it can be marked BLOCK_REASSIGNED from free
1852         or eviction, while they wait for us to release the hash_link.
1853       */
1854       wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1855       /*
1856         If the flush phase failed, the resize could have finished while
1857         we waited here.
1858       */
1859       if (!keycache->in_resize)
1860       {
1861         remove_reader(block);
1862         unreg_request(keycache, block, 1);
1863         goto restart;
1864       }
1865       DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
1866       DBUG_ASSERT(!(block->status & BLOCK_FOR_UPDATE) || fail_block(block));
1867       DBUG_ASSERT(block->hash_link == hash_link);
1868     }
1869 
1870     if (block->status & BLOCK_CHANGED)
1871     {
1872       /*
1873         We want to write a block with changed contents. If the cache
1874         block size is bigger than the callers block size (e.g. MyISAM),
1875         the caller may replace part of the block only. Changes of the
1876         other part of the block must be preserved. Since the block has
1877         not yet been selected for flush, we can still add our changes.
1878       */
1879       *page_st= PAGE_READ;
1880       DBUG_ASSERT((hash_link->file == file) &&
1881                   (hash_link->diskpos == filepos) &&
1882                   (block->hash_link == hash_link));
1883       DBUG_RETURN(block);
1884     }
1885 
1886     /*
1887       This is a write request for a clean block. We do not want to have
1888       new dirty blocks in the cache while resizing. We will free the
1889       block and write directly to file. If the block is in eviction or
1890       in free, we just let it go.
1891 
1892       Unregister from the hash_link. This must be done before freeing
1893       the block. And it must be done if not freeing the block. Because
1894       we could have waited above, we need to call remove_reader(). Other
1895       threads could wait for us to release our request on the hash_link.
1896     */
1897     remove_reader(block);
1898 
1899     /* If the block is not in eviction and not in free, we can free it. */
1900     if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1901                            BLOCK_REASSIGNED)))
1902     {
1903       /*
1904         Free block as we are going to write directly to file.
1905         Although we have an exlusive lock for the updated key part,
1906         the control can be yielded by the current thread as we might
1907         have unfinished readers of other key parts in the block
1908         buffer. Still we are guaranteed not to have any readers
1909         of the key part we are writing into until the block is
1910         removed from the cache as we set the BLOCK_REASSIGNED
1911         flag (see the code below that handles reading requests).
1912       */
1913       free_block(keycache, block);
1914     }
1915     else
1916     {
1917       /*
1918         The block will be evicted/freed soon. Don't touch it in any way.
1919         Unregister the request that we registered above.
1920       */
1921       unreg_request(keycache, block, 1);
1922 
1923       /*
1924         The block is still assigned to the hash_link (the file/pos that
1925         we are going to write to). Wait until the eviction/free is
1926         complete. Otherwise the direct write could complete before all
1927         readers are done with the block. So they could read outdated
1928         data.
1929 
1930         Since we released our request on the hash_link, it can be reused
1931         for another file/pos. Hence we cannot just check for
1932         block->hash_link == hash_link. As long as the resize is
1933         proceeding the block cannot be reassigned to the same file/pos
1934         again. So we can terminate the loop when the block is no longer
1935         assigned to this file/pos.
1936       */
1937       do
1938       {
1939         wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1940                       &keycache->cache_lock);
1941         /*
1942           If the flush phase failed, the resize could have finished
1943           while we waited here.
1944         */
1945         if (!keycache->in_resize)
1946           goto restart;
1947       } while (block->hash_link &&
1948                (block->hash_link->file == file) &&
1949                (block->hash_link->diskpos == filepos));
1950     }
1951     DBUG_RETURN(0);
1952   }
1953 
1954   if (page_status == PAGE_READ &&
1955       (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1956                         BLOCK_REASSIGNED)))
1957   {
1958     /*
1959       This is a request for a block to be removed from cache. The block
1960       is assigned to this hash_link and contains valid data, but is
1961       marked for eviction or to be freed. Possible reasons why it has
1962       not yet been evicted/freed can be a flush before reassignment
1963       (BLOCK_IN_SWITCH), readers of the block have not finished yet
1964       (BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1965       the block has been selected for it (BLOCK_IN_EVICTION).
1966     */
1967 
1968     KEYCACHE_DBUG_PRINT("find_key_block",
1969                         ("request for old page in block %u "
1970                          "wrmode: %d  block->status: %d",
1971                          BLOCK_NUMBER(block), wrmode, block->status));
1972     /*
1973        Only reading requests can proceed until the old dirty page is flushed,
1974        all others are to be suspended, then resubmitted
1975     */
1976     if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1977     {
1978       /*
1979         This is a read request and the block not yet reassigned. We can
1980         register our request and proceed. This unlinks the block from
1981         the LRU ring and protects it against eviction.
1982       */
1983       reg_requests(keycache, block, 1);
1984     }
1985     else
1986     {
1987       /*
1988         Either this is a write request for a block that is in eviction
1989         or in free. We must not use it any more. Instead we must evict
1990         another block. But we cannot do this before the eviction/free is
1991         done. Otherwise we would find the same hash_link + block again
1992         and again.
1993 
1994         Or this is a read request for a block in eviction/free that does
1995         not require a flush, but waits for readers to finish with the
1996         block. We do not read this block to let the eviction/free happen
1997         as soon as possible. Again we must wait so that we don't find
1998         the same hash_link + block again and again.
1999       */
2000       DBUG_ASSERT(hash_link->requests);
2001       hash_link->requests--;
2002       KEYCACHE_DBUG_PRINT("find_key_block",
2003                           ("request waiting for old page to be saved"));
2004       wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2005       KEYCACHE_DBUG_PRINT("find_key_block",
2006                           ("request for old page resubmitted"));
2007       /*
2008         The block is no longer assigned to this hash_link.
2009         Get another one.
2010       */
2011       goto restart;
2012     }
2013   }
2014   else
2015   {
2016     /*
2017       This is a request for a new block or for a block not to be removed.
2018       Either
2019       - block == NULL or
2020       - block not assigned to this hash_link or
2021       - block assigned but not yet read from file,
2022       or
2023       - block assigned with valid (changed or unchanged) data and
2024       - it will not be reassigned/freed.
2025     */
2026     if (! block)
2027     {
2028       /* No block is assigned to the hash_link yet. */
2029       if (keycache->blocks_unused)
2030       {
2031         if (keycache->free_block_list)
2032         {
2033           /* There is a block in the free list. */
2034           block= keycache->free_block_list;
2035           keycache->free_block_list= block->next_used;
2036           block->next_used= NULL;
2037         }
2038         else
2039         {
2040           size_t block_mem_offset;
2041           /* There are some never used blocks, take first of them */
2042           DBUG_ASSERT(keycache->blocks_used <
2043                       (ulong) keycache->disk_blocks);
2044           block= &keycache->block_root[keycache->blocks_used];
2045           block_mem_offset=
2046            ((size_t) keycache->blocks_used) * keycache->key_cache_block_size;
2047           block->buffer= ADD_TO_PTR(keycache->block_mem,
2048                                     block_mem_offset,
2049                                     uchar*);
2050           keycache->blocks_used++;
2051           DBUG_ASSERT(!block->next_used);
2052         }
2053         DBUG_ASSERT(!block->prev_used);
2054         DBUG_ASSERT(!block->next_changed);
2055         DBUG_ASSERT(!block->prev_changed);
2056         DBUG_ASSERT(!block->hash_link);
2057         DBUG_ASSERT(!block->status);
2058         DBUG_ASSERT(!block->requests);
2059         keycache->blocks_unused--;
2060         block->status= BLOCK_IN_USE;
2061         block->length= 0;
2062         block->offset= keycache->key_cache_block_size;
2063         block->requests= 1;
2064         block->temperature= BLOCK_COLD;
2065         block->hits_left= init_hits_left;
2066         block->last_hit_time= 0;
2067         block->hash_link= hash_link;
2068         hash_link->block= block;
2069         link_to_file_list(keycache, block, file, 0);
2070         page_status= PAGE_TO_BE_READ;
2071         KEYCACHE_DBUG_PRINT("find_key_block",
2072                             ("got free or never used block %u",
2073                              BLOCK_NUMBER(block)));
2074       }
2075       else
2076       {
2077 	/*
2078           There are no free blocks and no never used blocks, use a block
2079           from the LRU ring.
2080         */
2081 
2082         if (! keycache->used_last)
2083         {
2084           /*
2085             The LRU ring is empty. Wait until a new block is added to
2086             it. Several threads might wait here for the same hash_link,
2087             all of them must get the same block. While waiting for a
2088             block, after a block is selected for this hash_link, other
2089             threads can run first before this one awakes. During this
2090             time interval other threads find this hash_link pointing to
2091             the block, which is still assigned to another hash_link. In
2092             this case the block is not marked BLOCK_IN_SWITCH yet, but
2093             it is marked BLOCK_IN_EVICTION.
2094           */
2095 
2096           struct st_my_thread_var *thread= my_thread_var;
2097           thread->opt_info= (void *) hash_link;
2098           link_into_queue(&keycache->waiting_for_block, thread);
2099           do
2100           {
2101             KEYCACHE_DBUG_PRINT("find_key_block: wait",
2102                                 ("suspend thread %ld", thread->id));
2103             keycache_pthread_cond_wait(&thread->suspend,
2104                                        &keycache->cache_lock);
2105           }
2106           while (thread->next);
2107           thread->opt_info= NULL;
2108           /* Assert that block has a request registered. */
2109           DBUG_ASSERT(hash_link->block->requests);
2110           /* Assert that block is not in LRU ring. */
2111           DBUG_ASSERT(!hash_link->block->next_used);
2112           DBUG_ASSERT(!hash_link->block->prev_used);
2113         }
2114 
2115         /*
2116           If we waited above, hash_link->block has been assigned by
2117           link_block(). Otherwise it is still NULL. In the latter case
2118           we need to grab a block from the LRU ring ourselves.
2119         */
2120         block= hash_link->block;
2121         if (! block)
2122         {
2123           /* Select the last block from the LRU ring. */
2124           block= keycache->used_last->next_used;
2125           block->hits_left= init_hits_left;
2126           block->last_hit_time= 0;
2127           hash_link->block= block;
2128           /*
2129             Register a request on the block. This unlinks it from the
2130             LRU ring and protects it against eviction.
2131           */
2132           DBUG_ASSERT(!block->requests);
2133           reg_requests(keycache, block,1);
2134           /*
2135             We do not need to set block->status|= BLOCK_IN_EVICTION here
2136             because we will set block->status|= BLOCK_IN_SWITCH
2137             immediately without releasing the lock in between. This does
2138             also support debugging. When looking at the block, one can
2139             see if the block has been selected by link_block() after the
2140             LRU ring was empty, or if it was grabbed directly from the
2141             LRU ring in this branch.
2142           */
2143         }
2144 
2145         /*
2146           If we had to wait above, there is a small chance that another
2147           thread grabbed this block for the same file block already. But
2148           in most cases the first condition is true.
2149         */
2150         if (block->hash_link != hash_link &&
2151 	    ! (block->status & BLOCK_IN_SWITCH) )
2152         {
2153 	  /* this is a primary request for a new page */
2154           block->status|= BLOCK_IN_SWITCH;
2155 
2156           KEYCACHE_DBUG_PRINT("find_key_block",
2157                         ("got block %u for new page", BLOCK_NUMBER(block)));
2158 
2159           if (block->status & BLOCK_CHANGED)
2160           {
2161 	    /* The block contains a dirty page - push it out of the cache */
2162 
2163             KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
2164             if (block->status & BLOCK_IN_FLUSH)
2165             {
2166               /*
2167                 The block is marked for flush. If we do not wait here,
2168                 it could happen that we write the block, reassign it to
2169                 another file block, then, before the new owner can read
2170                 the new file block, the flusher writes the cache block
2171                 (which still has the old contents) to the new file block!
2172               */
2173               wait_on_queue(&block->wqueue[COND_FOR_SAVED],
2174                             &keycache->cache_lock);
2175               /*
2176                 The block is marked BLOCK_IN_SWITCH. It should be left
2177                 alone except for reading. No free, no write.
2178               */
2179               DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2180               DBUG_ASSERT(!(block->status & (BLOCK_REASSIGNED |
2181                                              BLOCK_CHANGED |
2182                                              BLOCK_FOR_UPDATE)));
2183             }
2184             else
2185             {
2186               block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
2187               /*
2188                 BLOCK_IN_EVICTION may be true or not. Other flags must
2189                 have a fixed value.
2190               */
2191               DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2192                           (BLOCK_READ | BLOCK_IN_SWITCH |
2193                            BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2194                            BLOCK_CHANGED | BLOCK_IN_USE));
2195               DBUG_ASSERT(block->hash_link);
2196 
2197               keycache_pthread_mutex_unlock(&keycache->cache_lock);
2198               /*
2199                 The call is thread safe because only the current
2200                 thread might change the block->hash_link value
2201               */
2202               error= my_pwrite(block->hash_link->file,
2203                                block->buffer + block->offset,
2204                                block->length - block->offset,
2205                                block->hash_link->diskpos + block->offset,
2206                                MYF(MY_NABP | MY_WAIT_IF_FULL));
2207               keycache_pthread_mutex_lock(&keycache->cache_lock);
2208 
2209               /* Block status must not have changed. */
2210               DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
2211                           (BLOCK_READ | BLOCK_IN_SWITCH |
2212                            BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
2213                            BLOCK_CHANGED | BLOCK_IN_USE) || fail_block(block));
2214               keycache->global_cache_write++;
2215             }
2216           }
2217 
2218           block->status|= BLOCK_REASSIGNED;
2219           /*
2220             The block comes from the LRU ring. It must have a hash_link
2221             assigned.
2222           */
2223           DBUG_ASSERT(block->hash_link);
2224           if (block->hash_link)
2225           {
2226             /*
2227               All pending requests for this page must be resubmitted.
2228               This must be done before waiting for readers. They could
2229               wait for the flush to complete. And we must also do it
2230               after the wait. Flushers might try to free the block while
2231               we wait. They would wait until the reassignment is
2232               complete. Also the block status must reflect the correct
2233               situation: The block is not changed nor in flush any more.
2234               Note that we must not change the BLOCK_CHANGED flag
2235               outside of link_to_file_list() so that it is always in the
2236               correct queue and the *blocks_changed counters are
2237               correct.
2238             */
2239             block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
2240             link_to_file_list(keycache, block, block->hash_link->file, 1);
2241             release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2242             /*
2243               The block is still assigned to its old hash_link.
2244 	      Wait until all pending read requests
2245 	      for this page are executed
2246 	      (we could have avoided this waiting, if we had read
2247 	      a page in the cache in a sweep, without yielding control)
2248             */
2249             wait_for_readers(keycache, block);
2250             DBUG_ASSERT(block->hash_link && block->hash_link->block == block &&
2251                         block->prev_changed);
2252             /* The reader must not have been a writer. */
2253             DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
2254 
2255             /* Wake flushers that might have found the block in between. */
2256             release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
2257 
2258             /* Remove the hash link for the old file block from the hash. */
2259             unlink_hash(keycache, block->hash_link);
2260 
2261             /*
2262               For sanity checks link_to_file_list() asserts that block
2263               and hash_link refer to each other. Hence we need to assign
2264               the hash_link first, but then we would not know if it was
2265               linked before. Hence we would not know if to unlink it. So
2266               unlink it here and call link_to_file_list(..., FALSE).
2267             */
2268             unlink_changed(block);
2269           }
2270           block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
2271           block->length= 0;
2272           block->offset= keycache->key_cache_block_size;
2273           block->hash_link= hash_link;
2274           link_to_file_list(keycache, block, file, 0);
2275           page_status= PAGE_TO_BE_READ;
2276 
2277           KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
2278           KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
2279         }
2280         else
2281         {
2282           /*
2283             Either (block->hash_link == hash_link),
2284 	    or     (block->status & BLOCK_IN_SWITCH).
2285 
2286             This is for secondary requests for a new file block only.
2287             Either it is already assigned to the new hash_link meanwhile
2288             (if we had to wait due to empty LRU), or it is already in
2289             eviction by another thread. Since this block has been
2290             grabbed from the LRU ring and attached to this hash_link,
2291             another thread cannot grab the same block from the LRU ring
2292             anymore. If the block is in eviction already, it must become
2293             attached to the same hash_link and as such destined for the
2294             same file block.
2295           */
2296           KEYCACHE_DBUG_PRINT("find_key_block",
2297                               ("block->hash_link: %p  hash_link: %p  "
2298                                "block->status: %u", block->hash_link,
2299                                hash_link, block->status ));
2300           page_status= (((block->hash_link == hash_link) &&
2301                          (block->status & BLOCK_READ)) ?
2302                         PAGE_READ : PAGE_WAIT_TO_BE_READ);
2303         }
2304       }
2305     }
2306     else
2307     {
2308       /*
2309         Block is not NULL. This hash_link points to a block.
2310         Either
2311         - block not assigned to this hash_link (yet) or
2312         - block assigned but not yet read from file,
2313         or
2314         - block assigned with valid (changed or unchanged) data and
2315         - it will not be reassigned/freed.
2316 
2317         The first condition means hash_link points to a block in
2318         eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2319         But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2320         link_block(). In both cases it is destined for this hash_link
2321         and its file block address. When this hash_link got its block
2322         address, the block was removed from the LRU ring and cannot be
2323         selected for eviction (for another hash_link) again.
2324 
2325         Register a request on the block. This is another protection
2326         against eviction.
2327       */
2328       DBUG_ASSERT(((block->hash_link != hash_link) &&
2329                    (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2330                   ((block->hash_link == hash_link) &&
2331                    !(block->status & BLOCK_READ)) ||
2332                   ((block->status & BLOCK_READ) &&
2333                    !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2334       reg_requests(keycache, block, 1);
2335       KEYCACHE_DBUG_PRINT("find_key_block",
2336                           ("block->hash_link: %p  hash_link: %p  "
2337                            "block->status: %u", block->hash_link,
2338                            hash_link, block->status ));
2339       page_status= (((block->hash_link == hash_link) &&
2340                      (block->status & BLOCK_READ)) ?
2341                     PAGE_READ : PAGE_WAIT_TO_BE_READ);
2342     }
2343   }
2344 
2345   KEYCACHE_DBUG_ASSERT(page_status != -1);
2346   /* Same assert basically, but be very sure. */
2347   KEYCACHE_DBUG_ASSERT(block);
2348   /* Assert that block has a request and is not in LRU ring. */
2349   DBUG_ASSERT(block->requests);
2350   DBUG_ASSERT(!block->next_used);
2351   DBUG_ASSERT(!block->prev_used);
2352   /* Assert that we return the correct block. */
2353   DBUG_ASSERT((page_status == PAGE_WAIT_TO_BE_READ) ||
2354               ((block->hash_link->file == file) &&
2355                (block->hash_link->diskpos == filepos)));
2356   *page_st=page_status;
2357   KEYCACHE_DBUG_PRINT("find_key_block",
2358                       ("fd: %d  pos: %lu  block->status: %u  page_status: %d",
2359                        file, (ulong) filepos, block->status,
2360                        page_status));
2361 
2362 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
2363   DBUG_EXECUTE("check_keycache2",
2364                test_key_cache(keycache, "end of find_key_block",0););
2365 #endif
2366   KEYCACHE_THREAD_TRACE("find_key_block:end");
2367   DBUG_RETURN(block);
2368 }
2369 
2370 
2371 /*
2372   Read into a key cache block buffer from disk.
2373 
2374   SYNOPSIS
2375 
2376     read_block()
2377       keycache            pointer to a key cache data structure
2378       block               block to which buffer the data is to be read
2379       read_length         size of data to be read
2380       min_length          at least so much data must be read
2381       primary             <-> the current thread will read the data
2382 
2383   RETURN VALUE
2384     None
2385 
2386   NOTES.
2387     The function either reads a page data from file to the block buffer,
2388     or waits until another thread reads it. What page to read is determined
2389     by a block parameter - reference to a hash link for this page.
2390     If an error occurs THE BLOCK_ERROR bit is set in the block status.
2391     We do not report error when the size of successfully read
2392     portion is less than read_length, but not less than min_length.
2393 */
2394 
read_block(KEY_CACHE * keycache,BLOCK_LINK * block,uint read_length,uint min_length,my_bool primary)2395 static void read_block(KEY_CACHE *keycache,
2396                        BLOCK_LINK *block, uint read_length,
2397                        uint min_length, my_bool primary)
2398 {
2399   size_t got_length;
2400 
2401   /* On entry cache_lock is locked */
2402 
2403   KEYCACHE_THREAD_TRACE("read_block");
2404   if (primary)
2405   {
2406     /*
2407       This code is executed only by threads that submitted primary
2408       requests. Until block->status contains BLOCK_READ, all other
2409       request for the block become secondary requests. For a primary
2410       request the block must be properly initialized.
2411     */
2412     DBUG_ASSERT(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE) ||
2413                 fail_block(block));
2414     DBUG_ASSERT((block->length == 0) || fail_block(block));
2415     DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2416                 fail_block(block));
2417     DBUG_ASSERT((block->requests > 0) || fail_block(block));
2418 
2419     KEYCACHE_DBUG_PRINT("read_block",
2420                         ("page to be read by primary request"));
2421 
2422     keycache->global_cache_read++;
2423     /* Page is not in buffer yet, is to be read from disk */
2424     keycache_pthread_mutex_unlock(&keycache->cache_lock);
2425     /*
2426       Here other threads may step in and register as secondary readers.
2427       They will register in block->wqueue[COND_FOR_REQUESTED].
2428     */
2429     got_length= my_pread(block->hash_link->file, block->buffer,
2430                          read_length, block->hash_link->diskpos, MYF(0));
2431     keycache_pthread_mutex_lock(&keycache->cache_lock);
2432     /*
2433       The block can now have been marked for free (in case of
2434       FLUSH_RELEASE). Otherwise the state must be unchanged.
2435     */
2436     DBUG_ASSERT(((block->status & ~(BLOCK_REASSIGNED |
2437                                     BLOCK_FOR_UPDATE)) == BLOCK_IN_USE) ||
2438                 fail_block(block));
2439     DBUG_ASSERT((block->length == 0) || fail_block(block));
2440     DBUG_ASSERT((block->offset == keycache->key_cache_block_size) ||
2441                 fail_block(block));
2442     DBUG_ASSERT((block->requests > 0) || fail_block(block));
2443 
2444     if (got_length < min_length)
2445       block->status|= BLOCK_ERROR;
2446     else
2447     {
2448       block->status|= BLOCK_READ;
2449       block->length= got_length;
2450       /*
2451         Do not set block->offset here. If this block is marked
2452         BLOCK_CHANGED later, we want to flush only the modified part. So
2453         only a writer may set block->offset down from
2454         keycache->key_cache_block_size.
2455       */
2456     }
2457     KEYCACHE_DBUG_PRINT("read_block",
2458                         ("primary request: new page in cache"));
2459     /* Signal that all pending requests for this page now can be processed */
2460     release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2461   }
2462   else
2463   {
2464     /*
2465       This code is executed only by threads that submitted secondary
2466       requests. At this point it could happen that the cache block is
2467       not yet assigned to the hash_link for the requested file block.
2468       But at awake from the wait this should be the case. Unfortunately
2469       we cannot assert this here because we do not know the hash_link
2470       for the requested file block nor the file and position. So we have
2471       to assert this in the caller.
2472     */
2473     KEYCACHE_DBUG_PRINT("read_block",
2474                       ("secondary request waiting for new page to be read"));
2475     wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2476     KEYCACHE_DBUG_PRINT("read_block",
2477                         ("secondary request: new page in cache"));
2478   }
2479 }
2480 
2481 
2482 /*
2483   Read a block of data from a cached file into a buffer;
2484 
2485   SYNOPSIS
2486 
2487     key_cache_read()
2488       keycache            pointer to a key cache data structure
2489       file                handler for the file for the block of data to be read
2490       filepos             position of the block of data in the file
2491       level               determines the weight of the data
2492       buff                buffer to where the data must be placed
2493       length              length of the buffer
2494       block_length        length of the block in the key cache buffer
2495       return_buffer       return pointer to the key cache buffer with the data
2496 
2497   RETURN VALUE
2498     Returns address from where the data is placed if sucessful, 0 - otherwise.
2499 
2500   NOTES.
2501     The function ensures that a block of data of size length from file
2502     positioned at filepos is in the buffers for some key cache blocks.
2503     Then the function either copies the data into the buffer buff, or,
2504     if return_buffer is TRUE, it just returns the pointer to the key cache
2505     buffer with the data.
2506     Filepos must be a multiple of 'block_length', but it doesn't
2507     have to be a multiple of key_cache_block_size;
2508 */
2509 
key_cache_read(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length MY_ATTRIBUTE ((unused)),int return_buffer MY_ATTRIBUTE ((unused)))2510 uchar *key_cache_read(KEY_CACHE *keycache,
2511                       File file, my_off_t filepos, int level,
2512                       uchar *buff, uint length,
2513                       uint block_length MY_ATTRIBUTE((unused)),
2514                       int return_buffer MY_ATTRIBUTE((unused)))
2515 {
2516   my_bool locked_and_incremented= FALSE;
2517   int error=0;
2518   uchar *start= buff;
2519   DBUG_ENTER("key_cache_read");
2520   DBUG_PRINT("enter", ("fd: %u  pos: %lu  length: %u",
2521                (uint) file, (ulong) filepos, length));
2522 
2523   if (keycache->key_cache_inited)
2524   {
2525     /* Key cache is used */
2526     BLOCK_LINK *block;
2527     uint read_length;
2528     uint offset;
2529     int page_st;
2530 
2531     if (MYSQL_KEYCACHE_READ_START_ENABLED())
2532     {
2533       MYSQL_KEYCACHE_READ_START(my_filename(file), length,
2534                                 (ulong) (keycache->blocks_used *
2535                                          keycache->key_cache_block_size),
2536                                 (ulong) (keycache->blocks_unused *
2537                                          keycache->key_cache_block_size));
2538     }
2539 
2540     /*
2541       When the key cache is once initialized, we use the cache_lock to
2542       reliably distinguish the cases of normal operation, resizing, and
2543       disabled cache. We always increment and decrement
2544       'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2545     */
2546     keycache_pthread_mutex_lock(&keycache->cache_lock);
2547     /*
2548       Cache resizing has two phases: Flushing and re-initializing. In
2549       the flush phase read requests are allowed to bypass the cache for
2550       blocks not in the cache. find_key_block() returns NULL in this
2551       case.
2552 
2553       After the flush phase new I/O requests must wait until the
2554       re-initialization is done. The re-initialization can be done only
2555       if no I/O request is in progress. The reason is that
2556       key_cache_block_size can change. With enabled cache, I/O is done
2557       in chunks of key_cache_block_size. Every chunk tries to use a
2558       cache block first. If the block size changes in the middle, a
2559       block could be missed and old data could be read.
2560     */
2561     while (keycache->in_resize && !keycache->resize_in_flush)
2562       wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2563     /* Register the I/O for the next resize. */
2564     inc_counter_for_resize_op(keycache);
2565     locked_and_incremented= TRUE;
2566     /* Requested data may not always be aligned to cache blocks. */
2567     offset= (uint) (filepos % keycache->key_cache_block_size);
2568     /* Read data in key_cache_block_size increments */
2569     do
2570     {
2571       /* Cache could be disabled in a later iteration. */
2572       if (!keycache->can_be_used)
2573       {
2574         KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache cannot be used"));
2575         goto no_key_cache;
2576       }
2577       /* Start reading at the beginning of the cache block. */
2578       filepos-= offset;
2579       /* Do not read beyond the end of the cache block. */
2580       read_length= length;
2581       set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2582       KEYCACHE_DBUG_ASSERT(read_length > 0);
2583 
2584       if (block_length > keycache->key_cache_block_size || offset)
2585 	return_buffer=0;
2586 
2587       /* Request the cache block that matches file/pos. */
2588       keycache->global_cache_r_requests++;
2589 
2590       MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size);
2591 
2592       block=find_key_block(keycache, file, filepos, level, 0, &page_st);
2593       if (!block)
2594       {
2595         /*
2596           This happens only for requests submitted during key cache
2597           resize. The block is not in the cache and shall not go in.
2598           Read directly from file.
2599         */
2600         keycache->global_cache_read++;
2601         keycache_pthread_mutex_unlock(&keycache->cache_lock);
2602         error= (my_pread(file, (uchar*) buff, read_length,
2603                          filepos + offset, MYF(MY_NABP)) != 0);
2604         keycache_pthread_mutex_lock(&keycache->cache_lock);
2605         goto next_block;
2606       }
2607       if (!(block->status & BLOCK_ERROR))
2608       {
2609         if (page_st != PAGE_READ)
2610         {
2611           MYSQL_KEYCACHE_READ_MISS();
2612           /* The requested page is to be read into the block buffer */
2613           read_block(keycache, block,
2614                      keycache->key_cache_block_size, read_length+offset,
2615                      (my_bool)(page_st == PAGE_TO_BE_READ));
2616           /*
2617             A secondary request must now have the block assigned to the
2618             requested file block. It does not hurt to check it for
2619             primary requests too.
2620           */
2621           DBUG_ASSERT(keycache->can_be_used);
2622           DBUG_ASSERT(block->hash_link->file == file);
2623           DBUG_ASSERT(block->hash_link->diskpos == filepos);
2624           DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2625         }
2626         else if (block->length < read_length + offset)
2627         {
2628           /*
2629             Impossible if nothing goes wrong:
2630             this could only happen if we are using a file with
2631             small key blocks and are trying to read outside the file
2632           */
2633           my_errno= -1;
2634           block->status|= BLOCK_ERROR;
2635         }
2636         else
2637         {
2638           MYSQL_KEYCACHE_READ_HIT();
2639         }
2640       }
2641 
2642       /* block status may have added BLOCK_ERROR in the above 'if'. */
2643       if (!(block->status & BLOCK_ERROR))
2644       {
2645         {
2646           DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2647 #if !defined(SERIALIZED_READ_FROM_CACHE)
2648           keycache_pthread_mutex_unlock(&keycache->cache_lock);
2649 #endif
2650 
2651           /* Copy data from the cache buffer */
2652           memcpy(buff, block->buffer+offset, (size_t) read_length);
2653 
2654 #if !defined(SERIALIZED_READ_FROM_CACHE)
2655           keycache_pthread_mutex_lock(&keycache->cache_lock);
2656           DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2657 #endif
2658         }
2659       }
2660 
2661       remove_reader(block);
2662 
2663       /* Error injection for coverage testing. */
2664       DBUG_EXECUTE_IF("key_cache_read_block_error",
2665                       block->status|= BLOCK_ERROR;);
2666 
2667       /* Do not link erroneous blocks into the LRU ring, but free them. */
2668       if (!(block->status & BLOCK_ERROR))
2669       {
2670         /*
2671           Link the block into the LRU ring if it's the last submitted
2672           request for the block. This enables eviction for the block.
2673         */
2674         unreg_request(keycache, block, 1);
2675       }
2676       else
2677       {
2678         free_block(keycache, block);
2679         error= 1;
2680         break;
2681       }
2682 
2683     next_block:
2684       buff+= read_length;
2685       filepos+= read_length+offset;
2686       offset= 0;
2687 
2688     } while ((length-= read_length));
2689     if (MYSQL_KEYCACHE_READ_DONE_ENABLED())
2690     {
2691       MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used *
2692                                         keycache->key_cache_block_size),
2693                                (ulong) (keycache->blocks_unused *
2694                                         keycache->key_cache_block_size));
2695     }
2696     goto end;
2697   }
2698   KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache not initialized"));
2699 
2700 no_key_cache:
2701   /* Key cache is not used */
2702 
2703   keycache->global_cache_r_requests++;
2704   keycache->global_cache_read++;
2705 
2706   if (locked_and_incremented)
2707     keycache_pthread_mutex_unlock(&keycache->cache_lock);
2708   if (my_pread(file, (uchar*) buff, length, filepos, MYF(MY_NABP)))
2709     error= 1;
2710   if (locked_and_incremented)
2711     keycache_pthread_mutex_lock(&keycache->cache_lock);
2712 
2713 end:
2714   if (locked_and_incremented)
2715   {
2716     dec_counter_for_resize_op(keycache);
2717     keycache_pthread_mutex_unlock(&keycache->cache_lock);
2718   }
2719   DBUG_PRINT("exit", ("error: %d", error ));
2720   DBUG_RETURN(error ? (uchar*) 0 : start);
2721 }
2722 
2723 
2724 /*
2725   Insert a block of file data from a buffer into key cache
2726 
2727   SYNOPSIS
2728     key_cache_insert()
2729     keycache            pointer to a key cache data structure
2730     file                handler for the file to insert data from
2731     filepos             position of the block of data in the file to insert
2732     level               determines the weight of the data
2733     buff                buffer to read data from
2734     length              length of the data in the buffer
2735 
2736   NOTES
2737     This is used by MyISAM to move all blocks from a index file to the key
2738     cache
2739 
2740   RETURN VALUE
2741     0 if a success, 1 - otherwise.
2742 */
2743 
key_cache_insert(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length)2744 int key_cache_insert(KEY_CACHE *keycache,
2745                      File file, my_off_t filepos, int level,
2746                      uchar *buff, uint length)
2747 {
2748   int error= 0;
2749   DBUG_ENTER("key_cache_insert");
2750   DBUG_PRINT("enter", ("fd: %u  pos: %lu  length: %u",
2751                (uint) file,(ulong) filepos, length));
2752 
2753   if (keycache->key_cache_inited)
2754   {
2755     /* Key cache is used */
2756     BLOCK_LINK *block;
2757     uint read_length;
2758     uint offset;
2759     int page_st;
2760     my_bool locked_and_incremented= FALSE;
2761 
2762     /*
2763       When the keycache is once initialized, we use the cache_lock to
2764       reliably distinguish the cases of normal operation, resizing, and
2765       disabled cache. We always increment and decrement
2766       'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2767     */
2768     keycache_pthread_mutex_lock(&keycache->cache_lock);
2769     /*
2770       We do not load index data into a disabled cache nor into an
2771       ongoing resize.
2772     */
2773     if (!keycache->can_be_used || keycache->in_resize)
2774 	goto no_key_cache;
2775     /* Register the pseudo I/O for the next resize. */
2776     inc_counter_for_resize_op(keycache);
2777     locked_and_incremented= TRUE;
2778     /* Loaded data may not always be aligned to cache blocks. */
2779     offset= (uint) (filepos % keycache->key_cache_block_size);
2780     /* Load data in key_cache_block_size increments. */
2781     do
2782     {
2783       /* Cache could be disabled or resizing in a later iteration. */
2784       if (!keycache->can_be_used || keycache->in_resize)
2785 	goto no_key_cache;
2786       /* Start loading at the beginning of the cache block. */
2787       filepos-= offset;
2788       /* Do not load beyond the end of the cache block. */
2789       read_length= length;
2790       set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2791       KEYCACHE_DBUG_ASSERT(read_length > 0);
2792 
2793       /* The block has been read by the caller already. */
2794       keycache->global_cache_read++;
2795       /* Request the cache block that matches file/pos. */
2796       keycache->global_cache_r_requests++;
2797       block= find_key_block(keycache, file, filepos, level, 0, &page_st);
2798       if (!block)
2799       {
2800         /*
2801           This happens only for requests submitted during key cache
2802           resize. The block is not in the cache and shall not go in.
2803           Stop loading index data.
2804         */
2805         goto no_key_cache;
2806       }
2807       if (!(block->status & BLOCK_ERROR))
2808       {
2809         if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2810             ((page_st == PAGE_TO_BE_READ) &&
2811              (offset || (read_length < keycache->key_cache_block_size))))
2812         {
2813           /*
2814             Either
2815 
2816             this is a secondary request for a block to be read into the
2817             cache. The block is in eviction. It is not yet assigned to
2818             the requested file block (It does not point to the right
2819             hash_link). So we cannot call remove_reader() on the block.
2820             And we cannot access the hash_link directly here. We need to
2821             wait until the assignment is complete. read_block() executes
2822             the correct wait when called with primary == FALSE.
2823 
2824             Or
2825 
2826             this is a primary request for a block to be read into the
2827             cache and the supplied data does not fill the whole block.
2828 
2829             This function is called on behalf of a LOAD INDEX INTO CACHE
2830             statement, which is a read-only task and allows other
2831             readers. It is possible that a parallel running reader tries
2832             to access this block. If it needs more data than has been
2833             supplied here, it would report an error. To be sure that we
2834             have all data in the block that is available in the file, we
2835             read the block ourselves.
2836 
2837             Though reading again what the caller did read already is an
2838             expensive operation, we need to do this for correctness.
2839           */
2840           read_block(keycache, block, keycache->key_cache_block_size,
2841                      read_length + offset, (page_st == PAGE_TO_BE_READ));
2842           /*
2843             A secondary request must now have the block assigned to the
2844             requested file block. It does not hurt to check it for
2845             primary requests too.
2846           */
2847           DBUG_ASSERT(keycache->can_be_used);
2848           DBUG_ASSERT(block->hash_link->file == file);
2849           DBUG_ASSERT(block->hash_link->diskpos == filepos);
2850           DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2851         }
2852         else if (page_st == PAGE_TO_BE_READ)
2853         {
2854           /*
2855             This is a new block in the cache. If we come here, we have
2856             data for the whole block.
2857           */
2858           DBUG_ASSERT(block->hash_link->requests);
2859           DBUG_ASSERT(block->status & BLOCK_IN_USE);
2860           DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
2861                       (block->status & BLOCK_READ));
2862 
2863 #if !defined(SERIALIZED_READ_FROM_CACHE)
2864           keycache_pthread_mutex_unlock(&keycache->cache_lock);
2865           /*
2866             Here other threads may step in and register as secondary readers.
2867             They will register in block->wqueue[COND_FOR_REQUESTED].
2868           */
2869 #endif
2870 
2871           /* Copy data from buff */
2872           memcpy(block->buffer+offset, buff, (size_t) read_length);
2873 
2874 #if !defined(SERIALIZED_READ_FROM_CACHE)
2875           keycache_pthread_mutex_lock(&keycache->cache_lock);
2876           DBUG_ASSERT(block->status & BLOCK_IN_USE);
2877           DBUG_ASSERT((page_st == PAGE_TO_BE_READ) ||
2878                       (block->status & BLOCK_READ));
2879 #endif
2880           /*
2881             After the data is in the buffer, we can declare the block
2882             valid. Now other threads do not need to register as
2883             secondary readers any more. They can immediately access the
2884             block.
2885           */
2886           block->status|= BLOCK_READ;
2887           block->length= read_length+offset;
2888           /*
2889             Do not set block->offset here. If this block is marked
2890             BLOCK_CHANGED later, we want to flush only the modified part. So
2891             only a writer may set block->offset down from
2892             keycache->key_cache_block_size.
2893           */
2894           KEYCACHE_DBUG_PRINT("key_cache_insert",
2895                               ("primary request: new page in cache"));
2896           /* Signal all pending requests. */
2897           release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2898         }
2899         else
2900         {
2901           /*
2902             page_st == PAGE_READ. The block is in the buffer. All data
2903             must already be present. Blocks are always read with all
2904             data available on file. Assert that the block does not have
2905             less contents than the preloader supplies. If the caller has
2906             data beyond block->length, it means that a file write has
2907             been done while this block was in cache and not extended
2908             with the new data. If the condition is met, we can simply
2909             ignore the block.
2910           */
2911           DBUG_ASSERT((page_st == PAGE_READ) &&
2912                       (read_length + offset <= block->length));
2913         }
2914 
2915         /*
2916           A secondary request must now have the block assigned to the
2917           requested file block. It does not hurt to check it for primary
2918           requests too.
2919         */
2920         DBUG_ASSERT(block->hash_link->file == file);
2921         DBUG_ASSERT(block->hash_link->diskpos == filepos);
2922         DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
2923       } /* end of if (!(block->status & BLOCK_ERROR)) */
2924 
2925       remove_reader(block);
2926 
2927       /* Error injection for coverage testing. */
2928       DBUG_EXECUTE_IF("key_cache_insert_block_error",
2929                       block->status|= BLOCK_ERROR; errno=EIO;);
2930 
2931       /* Do not link erroneous blocks into the LRU ring, but free them. */
2932       if (!(block->status & BLOCK_ERROR))
2933       {
2934         /*
2935           Link the block into the LRU ring if it's the last submitted
2936           request for the block. This enables eviction for the block.
2937         */
2938         unreg_request(keycache, block, 1);
2939       }
2940       else
2941       {
2942         free_block(keycache, block);
2943         error= 1;
2944         break;
2945       }
2946 
2947       buff+= read_length;
2948       filepos+= read_length+offset;
2949       offset= 0;
2950 
2951     } while ((length-= read_length));
2952 
2953   no_key_cache:
2954     if (locked_and_incremented)
2955       dec_counter_for_resize_op(keycache);
2956     keycache_pthread_mutex_unlock(&keycache->cache_lock);
2957   }
2958   DBUG_RETURN(error);
2959 }
2960 
2961 
2962 /*
2963   Write a buffer into a cached file.
2964 
2965   SYNOPSIS
2966 
2967     key_cache_write()
2968       keycache            pointer to a key cache data structure
2969       file                handler for the file to write data to
2970       filepos             position in the file to write data to
2971       level               determines the weight of the data
2972       buff                buffer with the data
2973       length              length of the buffer
2974       dont_write          if is 0 then all dirty pages involved in writing
2975                           should have been flushed from key cache
2976 
2977   RETURN VALUE
2978     0 if a success, 1 - otherwise.
2979 
2980   NOTES.
2981     The function copies the data of size length from buff into buffers
2982     for key cache blocks that are  assigned to contain the portion of
2983     the file starting with position filepos.
2984     It ensures that this data is flushed to the file if dont_write is FALSE.
2985     Filepos must be a multiple of 'block_length', but it doesn't
2986     have to be a multiple of key_cache_block_size;
2987 
2988     dont_write is always TRUE in the server (info->lock_type is never F_UNLCK).
2989 */
2990 
key_cache_write(KEY_CACHE * keycache,File file,my_off_t filepos,int level,uchar * buff,uint length,uint block_length MY_ATTRIBUTE ((unused)),int dont_write)2991 int key_cache_write(KEY_CACHE *keycache,
2992                     File file, my_off_t filepos, int level,
2993                     uchar *buff, uint length,
2994                     uint block_length  MY_ATTRIBUTE((unused)),
2995                     int dont_write)
2996 {
2997   my_bool locked_and_incremented= FALSE;
2998   int error=0;
2999   DBUG_ENTER("key_cache_write");
3000   DBUG_PRINT("enter",
3001              ("fd: %u  pos: %lu  length: %u  block_length: %u"
3002               "  key_block_length: %u",
3003               (uint) file, (ulong) filepos, length, block_length,
3004               keycache ? keycache->key_cache_block_size : 0));
3005 
3006   if (!dont_write)
3007   {
3008     /* purecov: begin inspected */
3009     /* Not used in the server. */
3010     /* Force writing from buff into disk. */
3011     keycache->global_cache_w_requests++;
3012     keycache->global_cache_write++;
3013     if (my_pwrite(file, buff, length, filepos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3014       DBUG_RETURN(1);
3015     /* purecov: end */
3016   }
3017 
3018 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3019   DBUG_EXECUTE("check_keycache",
3020                test_key_cache(keycache, "start of key_cache_write", 1););
3021 #endif
3022 
3023   if (keycache->key_cache_inited)
3024   {
3025     /* Key cache is used */
3026     BLOCK_LINK *block;
3027     uint read_length;
3028     uint offset;
3029     int page_st;
3030 
3031     if (MYSQL_KEYCACHE_WRITE_START_ENABLED())
3032     {
3033       MYSQL_KEYCACHE_WRITE_START(my_filename(file), length,
3034                                  (ulong) (keycache->blocks_used *
3035                                           keycache->key_cache_block_size),
3036                                  (ulong) (keycache->blocks_unused *
3037                                           keycache->key_cache_block_size));
3038     }
3039 
3040     /*
3041       When the key cache is once initialized, we use the cache_lock to
3042       reliably distinguish the cases of normal operation, resizing, and
3043       disabled cache. We always increment and decrement
3044       'cnt_for_resize_op' so that a resizer can wait for pending I/O.
3045     */
3046     keycache_pthread_mutex_lock(&keycache->cache_lock);
3047     /*
3048       Cache resizing has two phases: Flushing and re-initializing. In
3049       the flush phase write requests can modify dirty blocks that are
3050       not yet in flush. Otherwise they are allowed to bypass the cache.
3051       find_key_block() returns NULL in both cases (clean blocks and
3052       non-cached blocks).
3053 
3054       After the flush phase new I/O requests must wait until the
3055       re-initialization is done. The re-initialization can be done only
3056       if no I/O request is in progress. The reason is that
3057       key_cache_block_size can change. With enabled cache I/O is done in
3058       chunks of key_cache_block_size. Every chunk tries to use a cache
3059       block first. If the block size changes in the middle, a block
3060       could be missed and data could be written below a cached block.
3061     */
3062     while (keycache->in_resize && !keycache->resize_in_flush)
3063       wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
3064     /* Register the I/O for the next resize. */
3065     inc_counter_for_resize_op(keycache);
3066     locked_and_incremented= TRUE;
3067     /* Requested data may not always be aligned to cache blocks. */
3068     offset= (uint) (filepos % keycache->key_cache_block_size);
3069     /* Write data in key_cache_block_size increments. */
3070     do
3071     {
3072       /* Cache could be disabled in a later iteration. */
3073       if (!keycache->can_be_used)
3074 	goto no_key_cache;
3075 
3076       MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size);
3077       /* Start writing at the beginning of the cache block. */
3078       filepos-= offset;
3079       /* Do not write beyond the end of the cache block. */
3080       read_length= length;
3081       set_if_smaller(read_length, keycache->key_cache_block_size-offset);
3082       KEYCACHE_DBUG_ASSERT(read_length > 0);
3083 
3084       /* Request the cache block that matches file/pos. */
3085       keycache->global_cache_w_requests++;
3086       block= find_key_block(keycache, file, filepos, level, 1, &page_st);
3087       if (!block)
3088       {
3089         /*
3090           This happens only for requests submitted during key cache
3091           resize. The block is not in the cache and shall not go in.
3092           Write directly to file.
3093         */
3094         if (dont_write)
3095         {
3096           /* Used in the server. */
3097           keycache->global_cache_write++;
3098           keycache_pthread_mutex_unlock(&keycache->cache_lock);
3099           if (my_pwrite(file, (uchar*) buff, read_length, filepos + offset,
3100                         MYF(MY_NABP | MY_WAIT_IF_FULL)))
3101             error=1;
3102           keycache_pthread_mutex_lock(&keycache->cache_lock);
3103         }
3104         goto next_block;
3105       }
3106       /*
3107         Prevent block from flushing and from being selected for to be
3108         freed. This must be set when we release the cache_lock.
3109         However, we must not set the status of the block before it is
3110         assigned to this file/pos.
3111       */
3112       if (page_st != PAGE_WAIT_TO_BE_READ)
3113         block->status|= BLOCK_FOR_UPDATE;
3114       /*
3115         We must read the file block first if it is not yet in the cache
3116         and we do not replace all of its contents.
3117 
3118         In cases where the cache block is big enough to contain (parts
3119         of) index blocks of different indexes, our request can be
3120         secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
3121         reading the file block. If the read completes after us, it
3122         overwrites our new contents with the old contents. So we have to
3123         wait for the other thread to complete the read of this block.
3124         read_block() takes care for the wait.
3125       */
3126       if (!(block->status & BLOCK_ERROR) &&
3127           ((page_st == PAGE_TO_BE_READ &&
3128             (offset || read_length < keycache->key_cache_block_size)) ||
3129            (page_st == PAGE_WAIT_TO_BE_READ)))
3130       {
3131         read_block(keycache, block,
3132                    offset + read_length >= keycache->key_cache_block_size?
3133                    offset : keycache->key_cache_block_size,
3134                    offset, (page_st == PAGE_TO_BE_READ));
3135         DBUG_ASSERT(keycache->can_be_used);
3136         DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3137         /*
3138           Prevent block from flushing and from being selected for to be
3139           freed. This must be set when we release the cache_lock.
3140           Here we set it in case we could not set it above.
3141         */
3142         block->status|= BLOCK_FOR_UPDATE;
3143       }
3144       /*
3145         The block should always be assigned to the requested file block
3146         here. It need not be BLOCK_READ when overwriting the whole block.
3147       */
3148       DBUG_ASSERT(block->hash_link->file == file);
3149       DBUG_ASSERT(block->hash_link->diskpos == filepos);
3150       DBUG_ASSERT(block->status & BLOCK_IN_USE);
3151       DBUG_ASSERT((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
3152       /*
3153         The block to be written must not be marked BLOCK_REASSIGNED.
3154         Otherwise it could be freed in dirty state or reused without
3155         another flush during eviction. It must also not be in flush.
3156         Otherwise the old contens may have been flushed already and
3157         the flusher could clear BLOCK_CHANGED without flushing the
3158         new changes again.
3159       */
3160       DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3161 
3162       while (block->status & BLOCK_IN_FLUSHWRITE)
3163       {
3164         /*
3165           Another thread is flushing the block. It was dirty already.
3166           Wait until the block is flushed to file. Otherwise we could
3167           modify the buffer contents just while it is written to file.
3168           An unpredictable file block contents would be the result.
3169           While we wait, several things can happen to the block,
3170           including another flush. But the block cannot be reassigned to
3171           another hash_link until we release our request on it.
3172         */
3173         wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
3174         DBUG_ASSERT(keycache->can_be_used);
3175         DBUG_ASSERT(block->status & (BLOCK_READ | BLOCK_IN_USE));
3176         /* Still must not be marked for free. */
3177         DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3178         DBUG_ASSERT(block->hash_link && (block->hash_link->block == block));
3179       }
3180 
3181       /*
3182         We could perhaps release the cache_lock during access of the
3183         data like in the other functions. Locks outside of the key cache
3184         assure that readers and a writer do not access the same range of
3185         data. Parallel accesses should happen only if the cache block
3186         contains multiple index block(fragment)s. So different parts of
3187         the buffer would be read/written. An attempt to flush during
3188         memcpy() is prevented with BLOCK_FOR_UPDATE.
3189       */
3190       if (!(block->status & BLOCK_ERROR))
3191       {
3192 #if !defined(SERIALIZED_READ_FROM_CACHE)
3193         keycache_pthread_mutex_unlock(&keycache->cache_lock);
3194 #endif
3195         memcpy(block->buffer+offset, buff, (size_t) read_length);
3196 
3197 #if !defined(SERIALIZED_READ_FROM_CACHE)
3198         keycache_pthread_mutex_lock(&keycache->cache_lock);
3199 #endif
3200       }
3201 
3202       if (!dont_write)
3203       {
3204         /* Not used in the server. buff has been written to disk at start. */
3205         if ((block->status & BLOCK_CHANGED) &&
3206             (!offset && read_length >= keycache->key_cache_block_size))
3207              link_to_file_list(keycache, block, block->hash_link->file, 1);
3208       }
3209       else if (! (block->status & BLOCK_CHANGED))
3210         link_to_changed_list(keycache, block);
3211       block->status|=BLOCK_READ;
3212       /*
3213         Allow block to be selected for to be freed. Since it is marked
3214         BLOCK_CHANGED too, it won't be selected for to be freed without
3215         a flush.
3216       */
3217       block->status&= ~BLOCK_FOR_UPDATE;
3218       set_if_smaller(block->offset, offset);
3219       set_if_bigger(block->length, read_length+offset);
3220 
3221       /* Threads may be waiting for the changes to be complete. */
3222       release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
3223 
3224       /*
3225         If only a part of the cache block is to be replaced, and the
3226         rest has been read from file, then the cache lock has been
3227         released for I/O and it could be possible that another thread
3228         wants to evict or free the block and waits for it to be
3229         released. So we must not just decrement hash_link->requests, but
3230         also wake a waiting thread.
3231       */
3232       remove_reader(block);
3233 
3234       /* Error injection for coverage testing. */
3235       DBUG_EXECUTE_IF("key_cache_write_block_error",
3236                       block->status|= BLOCK_ERROR;);
3237 
3238       /* Do not link erroneous blocks into the LRU ring, but free them. */
3239       if (!(block->status & BLOCK_ERROR))
3240       {
3241         /*
3242           Link the block into the LRU ring if it's the last submitted
3243           request for the block. This enables eviction for the block.
3244         */
3245         unreg_request(keycache, block, 1);
3246       }
3247       else
3248       {
3249         /* Pretend a "clean" block to avoid complications. */
3250         block->status&= ~(BLOCK_CHANGED);
3251         free_block(keycache, block);
3252         error= 1;
3253         break;
3254       }
3255 
3256     next_block:
3257       buff+= read_length;
3258       filepos+= read_length+offset;
3259       offset= 0;
3260 
3261     } while ((length-= read_length));
3262     goto end;
3263   }
3264 
3265 no_key_cache:
3266   /* Key cache is not used */
3267   if (dont_write)
3268   {
3269     /* Used in the server. */
3270     keycache->global_cache_w_requests++;
3271     keycache->global_cache_write++;
3272     if (locked_and_incremented)
3273       keycache_pthread_mutex_unlock(&keycache->cache_lock);
3274     if (my_pwrite(file, (uchar*) buff, length, filepos,
3275 		  MYF(MY_NABP | MY_WAIT_IF_FULL)))
3276       error=1;
3277     if (locked_and_incremented)
3278       keycache_pthread_mutex_lock(&keycache->cache_lock);
3279   }
3280 
3281 end:
3282   if (locked_and_incremented)
3283   {
3284     dec_counter_for_resize_op(keycache);
3285     keycache_pthread_mutex_unlock(&keycache->cache_lock);
3286   }
3287 
3288   if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED())
3289   {
3290     MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used *
3291                                        keycache->key_cache_block_size),
3292                               (ulong) (keycache->blocks_unused *
3293                                        keycache->key_cache_block_size));
3294   }
3295 
3296 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3297   DBUG_EXECUTE("exec",
3298                test_key_cache(keycache, "end of key_cache_write", 1););
3299 #endif
3300   DBUG_RETURN(error);
3301 }
3302 
3303 
3304 /*
3305   Free block.
3306 
3307   SYNOPSIS
3308     free_block()
3309       keycache          Pointer to a key cache data structure
3310       block             Pointer to the block to free
3311 
3312   DESCRIPTION
3313     Remove reference to block from hash table.
3314     Remove block from the chain of clean blocks.
3315     Add block to the free list.
3316 
3317   NOTE
3318     Block must not be free (status == 0).
3319     Block must not be in free_block_list.
3320     Block must not be in the LRU ring.
3321     Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
3322     Block must not be in free (BLOCK_REASSIGNED).
3323     Block must not be in flush (BLOCK_IN_FLUSH).
3324     Block must not be dirty (BLOCK_CHANGED).
3325     Block must not be in changed_blocks (dirty) hash.
3326     Block must be in file_blocks (clean) hash.
3327     Block must refer to a hash_link.
3328     Block must have a request registered on it.
3329 */
3330 
free_block(KEY_CACHE * keycache,BLOCK_LINK * block)3331 static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
3332 {
3333   KEYCACHE_THREAD_TRACE("free block");
3334   KEYCACHE_DBUG_PRINT("free_block",
3335                       ("block %u to be freed, hash_link %p  status: %u",
3336                        BLOCK_NUMBER(block), block->hash_link,
3337                        block->status));
3338   /*
3339     Assert that the block is not free already. And that it is in a clean
3340     state. Note that the block might just be assigned to a hash_link and
3341     not yet read (BLOCK_READ may not be set here). In this case a reader
3342     is registered in the hash_link and free_block() will wait for it
3343     below.
3344   */
3345   DBUG_ASSERT((block->status & BLOCK_IN_USE) &&
3346               !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3347                                  BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
3348                                  BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
3349   /* Assert that the block is in a file_blocks chain. */
3350   DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3351   /* Assert that the block is not in the LRU ring. */
3352   DBUG_ASSERT(!block->next_used && !block->prev_used);
3353   /*
3354     IMHO the below condition (if()) makes no sense. I can't see how it
3355     could be possible that free_block() is entered with a NULL hash_link
3356     pointer. The only place where it can become NULL is in free_block()
3357     (or before its first use ever, but for those blocks free_block() is
3358     not called). I don't remove the conditional as it cannot harm, but
3359     place an DBUG_ASSERT to confirm my hypothesis. Eventually the
3360     condition (if()) can be removed.
3361   */
3362   DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3363   if (block->hash_link)
3364   {
3365     /*
3366       While waiting for readers to finish, new readers might request the
3367       block. But since we set block->status|= BLOCK_REASSIGNED, they
3368       will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
3369       later.
3370     */
3371     block->status|= BLOCK_REASSIGNED;
3372     wait_for_readers(keycache, block);
3373     /*
3374       The block must not have been freed by another thread. Repeat some
3375       checks. An additional requirement is that it must be read now
3376       (BLOCK_READ).
3377     */
3378     DBUG_ASSERT(block->hash_link && block->hash_link->block == block);
3379     DBUG_ASSERT((block->status & (BLOCK_READ | BLOCK_IN_USE |
3380                                   BLOCK_REASSIGNED)) &&
3381                 !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3382                                    BLOCK_IN_FLUSH | BLOCK_CHANGED |
3383                                    BLOCK_FOR_UPDATE)));
3384     DBUG_ASSERT(block->prev_changed && *block->prev_changed == block);
3385     DBUG_ASSERT(!block->prev_used);
3386     /*
3387       Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
3388       thread (through unreg_request() below), other threads must not see
3389       this flag. They could become confused.
3390     */
3391     block->status&= ~BLOCK_REASSIGNED;
3392     /*
3393       Do not release the hash_link until the block is off all lists.
3394       At least not if we hand it over for eviction in unreg_request().
3395     */
3396   }
3397 
3398   /*
3399     Unregister the block request and link the block into the LRU ring.
3400     This enables eviction for the block. If the LRU ring was empty and
3401     threads are waiting for a block, then the block wil be handed over
3402     for eviction immediately. Otherwise we will unlink it from the LRU
3403     ring again, without releasing the lock in between. So decrementing
3404     the request counter and updating statistics are the only relevant
3405     operation in this case. Assert that there are no other requests
3406     registered.
3407   */
3408   DBUG_ASSERT(block->requests == 1);
3409   unreg_request(keycache, block, 0);
3410   /*
3411     Note that even without releasing the cache lock it is possible that
3412     the block is immediately selected for eviction by link_block() and
3413     thus not added to the LRU ring. In this case we must not touch the
3414     block any more.
3415   */
3416   if (block->status & BLOCK_IN_EVICTION)
3417     return;
3418 
3419   /* Error blocks are not put into the LRU ring. */
3420   if (!(block->status & BLOCK_ERROR))
3421   {
3422     /* Here the block must be in the LRU ring. Unlink it again. */
3423     DBUG_ASSERT(block->next_used && block->prev_used &&
3424                 *block->prev_used == block);
3425     unlink_block(keycache, block);
3426   }
3427   if (block->temperature == BLOCK_WARM)
3428     keycache->warm_blocks--;
3429   block->temperature= BLOCK_COLD;
3430 
3431   /* Remove from file_blocks hash. */
3432   unlink_changed(block);
3433 
3434   /* Remove reference to block from hash table. */
3435   unlink_hash(keycache, block->hash_link);
3436   block->hash_link= NULL;
3437 
3438   block->status= 0;
3439   block->length= 0;
3440   block->offset= keycache->key_cache_block_size;
3441   KEYCACHE_THREAD_TRACE("free block");
3442   KEYCACHE_DBUG_PRINT("free_block", ("block is freed"));
3443 
3444   /* Enforced by unlink_changed(), but just to be sure. */
3445   DBUG_ASSERT(!block->next_changed && !block->prev_changed);
3446   /* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3447   DBUG_ASSERT(!block->next_used && !block->prev_used);
3448   /* Insert the free block in the free list. */
3449   block->next_used= keycache->free_block_list;
3450   keycache->free_block_list= block;
3451   /* Keep track of the number of currently unused blocks. */
3452   keycache->blocks_unused++;
3453 
3454   /* All pending requests for this page must be resubmitted. */
3455   release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3456 }
3457 
3458 
cmp_sec_link(BLOCK_LINK ** a,BLOCK_LINK ** b)3459 static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3460 {
3461   return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3462       ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3463 }
3464 
3465 
3466 /*
3467   Flush a portion of changed blocks to disk,
3468   free used blocks if requested
3469 */
3470 
flush_cached_blocks(KEY_CACHE * keycache,File file,BLOCK_LINK ** cache,BLOCK_LINK ** end,enum flush_type type)3471 static int flush_cached_blocks(KEY_CACHE *keycache,
3472                                File file, BLOCK_LINK **cache,
3473                                BLOCK_LINK **end,
3474                                enum flush_type type)
3475 {
3476   int error;
3477   int last_errno= 0;
3478   uint count= (uint) (end-cache);
3479 
3480   /* Don't lock the cache during the flush */
3481   keycache_pthread_mutex_unlock(&keycache->cache_lock);
3482   /*
3483      As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3484      we are guarunteed no thread will change them
3485   */
3486   my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3487 
3488   keycache_pthread_mutex_lock(&keycache->cache_lock);
3489   /*
3490     Note: Do not break the loop. We have registered a request on every
3491     block in 'cache'. These must be unregistered by free_block() or
3492     unreg_request().
3493   */
3494   for ( ; cache != end ; cache++)
3495   {
3496     BLOCK_LINK *block= *cache;
3497 
3498     KEYCACHE_DBUG_PRINT("flush_cached_blocks",
3499                         ("block %u to be flushed", BLOCK_NUMBER(block)));
3500     /*
3501       If the block contents is going to be changed, we abandon the flush
3502       for this block. flush_key_blocks_int() will restart its search and
3503       handle the block properly.
3504     */
3505     if (!(block->status & BLOCK_FOR_UPDATE))
3506     {
3507       /* Blocks coming here must have a certain status. */
3508       DBUG_ASSERT(block->hash_link);
3509       DBUG_ASSERT(block->hash_link->block == block);
3510       DBUG_ASSERT(block->hash_link->file == file);
3511       DBUG_ASSERT((block->status & ~BLOCK_IN_EVICTION) ==
3512                   (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3513       block->status|= BLOCK_IN_FLUSHWRITE;
3514       keycache_pthread_mutex_unlock(&keycache->cache_lock);
3515       error= my_pwrite(file, block->buffer+block->offset,
3516                        block->length - block->offset,
3517                        block->hash_link->diskpos+ block->offset,
3518                        MYF(MY_NABP | MY_WAIT_IF_FULL));
3519       keycache_pthread_mutex_lock(&keycache->cache_lock);
3520       keycache->global_cache_write++;
3521       if (error)
3522       {
3523         block->status|= BLOCK_ERROR;
3524         if (!last_errno)
3525           last_errno= errno ? errno : -1;
3526       }
3527       block->status&= ~BLOCK_IN_FLUSHWRITE;
3528       /* Block must not have changed status except BLOCK_FOR_UPDATE. */
3529       DBUG_ASSERT(block->hash_link);
3530       DBUG_ASSERT(block->hash_link->block == block);
3531       DBUG_ASSERT(block->hash_link->file == file);
3532       DBUG_ASSERT((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3533                   (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3534       /*
3535         Set correct status and link in right queue for free or later use.
3536         free_block() must not see BLOCK_CHANGED and it may need to wait
3537         for readers of the block. These should not see the block in the
3538         wrong hash. If not freeing the block, we need to have it in the
3539         right queue anyway.
3540       */
3541       link_to_file_list(keycache, block, file, 1);
3542     }
3543     block->status&= ~BLOCK_IN_FLUSH;
3544     /*
3545       Let to proceed for possible waiting requests to write to the block page.
3546       It might happen only during an operation to resize the key cache.
3547     */
3548     release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3549     /* type will never be FLUSH_IGNORE_CHANGED here */
3550     if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3551         !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3552                            BLOCK_FOR_UPDATE)))
3553     {
3554       /*
3555         Note that a request has been registered against the block in
3556         flush_key_blocks_int().
3557       */
3558       free_block(keycache, block);
3559     }
3560     else
3561     {
3562       /*
3563         Link the block into the LRU ring if it's the last submitted
3564         request for the block. This enables eviction for the block.
3565         Note that a request has been registered against the block in
3566         flush_key_blocks_int().
3567       */
3568       unreg_request(keycache, block, 1);
3569     }
3570 
3571   } /* end of for ( ; cache != end ; cache++) */
3572   return last_errno;
3573 }
3574 
3575 
3576 /*
3577   Flush all key blocks for a file to disk, but don't do any mutex locks.
3578 
3579   SYNOPSIS
3580     flush_key_blocks_int()
3581       keycache            pointer to a key cache data structure
3582       file                handler for the file to flush to
3583       flush_type          type of the flush
3584 
3585   NOTES
3586     This function doesn't do any mutex locks because it needs to be called both
3587     from flush_key_blocks and flush_all_key_blocks (the later one does the
3588     mutex lock in the resize_key_cache() function).
3589 
3590     We do only care about changed blocks that exist when the function is
3591     entered. We do not guarantee that all changed blocks of the file are
3592     flushed if more blocks change while this function is running.
3593 
3594   RETURN
3595     0   ok
3596     1  error
3597 */
3598 
flush_key_blocks_int(KEY_CACHE * keycache,File file,enum flush_type type)3599 static int flush_key_blocks_int(KEY_CACHE *keycache,
3600 				File file, enum flush_type type)
3601 {
3602   BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3603   int last_errno= 0;
3604   int last_errcnt= 0;
3605   DBUG_ENTER("flush_key_blocks_int");
3606   DBUG_PRINT("enter",("file: %d  blocks_used: %lu  blocks_changed: %lu",
3607               file, keycache->blocks_used, keycache->blocks_changed));
3608 
3609 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
3610   DBUG_EXECUTE("check_keycache",
3611                test_key_cache(keycache, "start of flush_key_blocks", 0););
3612 #endif
3613 
3614   cache= cache_buff;
3615   if (keycache->disk_blocks > 0 &&
3616       (!my_disable_flush_key_blocks || type != FLUSH_KEEP))
3617   {
3618     /* Key cache exists and flush is not disabled */
3619     int error= 0;
3620     uint count= FLUSH_CACHE;
3621     BLOCK_LINK **pos,**end;
3622     BLOCK_LINK *first_in_switch= NULL;
3623     BLOCK_LINK *last_in_flush;
3624     BLOCK_LINK *last_for_update;
3625     BLOCK_LINK *block, *next;
3626 #if defined(KEYCACHE_DEBUG)
3627     uint cnt=0;
3628 #endif
3629 
3630     if (type != FLUSH_IGNORE_CHANGED)
3631     {
3632       /*
3633          Count how many key blocks we have to cache to be able
3634          to flush all dirty pages with minimum seek moves
3635       */
3636       count= 0;
3637       for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3638            block ;
3639            block= block->next_changed)
3640       {
3641         if ((block->hash_link->file == file) &&
3642             !(block->status & BLOCK_IN_FLUSH))
3643         {
3644           count++;
3645           KEYCACHE_DBUG_ASSERT(count<= keycache->blocks_used);
3646         }
3647       }
3648       /*
3649         Allocate a new buffer only if its bigger than the one we have.
3650         Assure that we always have some entries for the case that new
3651         changed blocks appear while we need to wait for something.
3652       */
3653       if ((count > FLUSH_CACHE) &&
3654           !(cache= (BLOCK_LINK**) my_malloc(sizeof(BLOCK_LINK*)*count,
3655                                             MYF(0))))
3656         cache= cache_buff;
3657       /*
3658         After a restart there could be more changed blocks than now.
3659         So we should not let count become smaller than the fixed buffer.
3660       */
3661       if (cache == cache_buff)
3662         count= FLUSH_CACHE;
3663     }
3664 
3665     /* Retrieve the blocks and write them to a buffer to be flushed */
3666 restart:
3667     last_in_flush= NULL;
3668     last_for_update= NULL;
3669     end= (pos= cache)+count;
3670     for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3671          block ;
3672          block= next)
3673     {
3674 #if defined(KEYCACHE_DEBUG)
3675       cnt++;
3676       KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3677 #endif
3678       next= block->next_changed;
3679       if (block->hash_link->file == file)
3680       {
3681         if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3682         {
3683           /*
3684             Note: The special handling of BLOCK_IN_SWITCH is obsolete
3685             since we set BLOCK_IN_FLUSH if the eviction includes a
3686             flush. It can be removed in a later version.
3687           */
3688           if (!(block->status & BLOCK_IN_SWITCH))
3689           {
3690             /*
3691               We care only for the blocks for which flushing was not
3692               initiated by another thread and which are not in eviction.
3693               Registering a request on the block unlinks it from the LRU
3694               ring and protects against eviction.
3695             */
3696             reg_requests(keycache, block, 1);
3697             if (type != FLUSH_IGNORE_CHANGED)
3698             {
3699               /* It's not a temporary file */
3700               if (pos == end)
3701               {
3702                 /*
3703                   This should happen relatively seldom. Remove the
3704                   request because we won't do anything with the block
3705                   but restart and pick it again in the next iteration.
3706                 */
3707                 unreg_request(keycache, block, 0);
3708                 /*
3709                   This happens only if there is not enough
3710                   memory for the big block
3711                 */
3712                 if ((error= flush_cached_blocks(keycache, file, cache,
3713                                                 end,type)))
3714                 {
3715                   /* Do not loop infinitely trying to flush in vain. */
3716                   if ((last_errno == error) && (++last_errcnt > 5))
3717                     goto err;
3718                   last_errno= error;
3719                 }
3720                 /*
3721                   Restart the scan as some other thread might have changed
3722                   the changed blocks chain: the blocks that were in switch
3723                   state before the flush started have to be excluded
3724                 */
3725                 goto restart;
3726               }
3727               /*
3728                 Mark the block with BLOCK_IN_FLUSH in order not to let
3729                 other threads to use it for new pages and interfere with
3730                 our sequence of flushing dirty file pages. We must not
3731                 set this flag before actually putting the block on the
3732                 write burst array called 'cache'.
3733               */
3734               block->status|= BLOCK_IN_FLUSH;
3735               /* Add block to the array for a write burst. */
3736               *pos++= block;
3737             }
3738             else
3739             {
3740               /* It's a temporary file */
3741               DBUG_ASSERT(!(block->status & BLOCK_REASSIGNED));
3742               /*
3743                 free_block() must not be called with BLOCK_CHANGED. Note
3744                 that we must not change the BLOCK_CHANGED flag outside of
3745                 link_to_file_list() so that it is always in the correct
3746                 queue and the *blocks_changed counters are correct.
3747               */
3748               link_to_file_list(keycache, block, file, 1);
3749               if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3750               {
3751                 /* A request has been registered against the block above. */
3752                 free_block(keycache, block);
3753               }
3754               else
3755               {
3756                 /*
3757                   Link the block into the LRU ring if it's the last
3758                   submitted request for the block. This enables eviction
3759                   for the block. A request has been registered against
3760                   the block above.
3761                 */
3762                 unreg_request(keycache, block, 1);
3763               }
3764             }
3765           }
3766           else
3767           {
3768             /*
3769               Link the block into a list of blocks 'in switch'.
3770 
3771               WARNING: Here we introduce a place where a changed block
3772               is not in the changed_blocks hash! This is acceptable for
3773               a BLOCK_IN_SWITCH. Never try this for another situation.
3774               Other parts of the key cache code rely on changed blocks
3775               being in the changed_blocks hash.
3776             */
3777             unlink_changed(block);
3778             link_changed(block, &first_in_switch);
3779           }
3780         }
3781         else if (type != FLUSH_KEEP)
3782         {
3783           /*
3784             During the normal flush at end of statement (FLUSH_KEEP) we
3785             do not need to ensure that blocks in flush or update by
3786             other threads are flushed. They will be flushed by them
3787             later. In all other cases we must assure that we do not have
3788             any changed block of this file in the cache when this
3789             function returns.
3790           */
3791           if (block->status & BLOCK_IN_FLUSH)
3792           {
3793             /* Remember the last block found to be in flush. */
3794             last_in_flush= block;
3795           }
3796           else
3797           {
3798             /* Remember the last block found to be selected for update. */
3799             last_for_update= block;
3800           }
3801         }
3802       }
3803     }
3804     if (pos != cache)
3805     {
3806       if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
3807       {
3808         /* Do not loop inifnitely trying to flush in vain. */
3809         if ((last_errno == error) && (++last_errcnt > 5))
3810           goto err;
3811         last_errno= error;
3812       }
3813       /*
3814         Do not restart here during the normal flush at end of statement
3815         (FLUSH_KEEP). We have now flushed at least all blocks that were
3816         changed when entering this function. In all other cases we must
3817         assure that we do not have any changed block of this file in the
3818         cache when this function returns.
3819       */
3820       if (type != FLUSH_KEEP)
3821         goto restart;
3822     }
3823     if (last_in_flush)
3824     {
3825       /*
3826         There are no blocks to be flushed by this thread, but blocks in
3827         flush by other threads. Wait until one of the blocks is flushed.
3828         Re-check the condition for last_in_flush. We may have unlocked
3829         the cache_lock in flush_cached_blocks(). The state of the block
3830         could have changed.
3831       */
3832       if (last_in_flush->status & BLOCK_IN_FLUSH)
3833         wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3834                       &keycache->cache_lock);
3835       /* Be sure not to lose a block. They may be flushed in random order. */
3836       goto restart;
3837     }
3838     if (last_for_update)
3839     {
3840       /*
3841         There are no blocks to be flushed by this thread, but blocks for
3842         update by other threads. Wait until one of the blocks is updated.
3843         Re-check the condition for last_for_update. We may have unlocked
3844         the cache_lock in flush_cached_blocks(). The state of the block
3845         could have changed.
3846       */
3847       if (last_for_update->status & BLOCK_FOR_UPDATE)
3848         wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3849                       &keycache->cache_lock);
3850       /* The block is now changed. Flush it. */
3851       goto restart;
3852     }
3853 
3854     /*
3855       Wait until the list of blocks in switch is empty. The threads that
3856       are switching these blocks will relink them to clean file chains
3857       while we wait and thus empty the 'first_in_switch' chain.
3858     */
3859     while (first_in_switch)
3860     {
3861 #if defined(KEYCACHE_DEBUG)
3862       cnt= 0;
3863 #endif
3864       wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3865                     &keycache->cache_lock);
3866 #if defined(KEYCACHE_DEBUG)
3867       cnt++;
3868       KEYCACHE_DBUG_ASSERT(cnt <= keycache->blocks_used);
3869 #endif
3870       /*
3871         Do not restart here. We have flushed all blocks that were
3872         changed when entering this function and were not marked for
3873         eviction. Other threads have now flushed all remaining blocks in
3874         the course of their eviction.
3875       */
3876     }
3877 
3878     if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3879     {
3880       BLOCK_LINK *last_for_update= NULL;
3881       BLOCK_LINK *last_in_switch= NULL;
3882       uint total_found= 0;
3883       uint found;
3884 
3885       /*
3886         Finally free all clean blocks for this file.
3887         During resize this may be run by two threads in parallel.
3888       */
3889       do
3890       {
3891         found= 0;
3892         for (block= keycache->file_blocks[FILE_HASH(file)] ;
3893              block ;
3894              block= next)
3895         {
3896           /* Remember the next block. After freeing we cannot get at it. */
3897           next= block->next_changed;
3898 
3899           /* Changed blocks cannot appear in the file_blocks hash. */
3900           DBUG_ASSERT(!(block->status & BLOCK_CHANGED));
3901           if (block->hash_link->file == file)
3902           {
3903             /* We must skip blocks that will be changed. */
3904             if (block->status & BLOCK_FOR_UPDATE)
3905             {
3906               last_for_update= block;
3907               continue;
3908             }
3909 
3910             /*
3911               We must not free blocks in eviction (BLOCK_IN_EVICTION |
3912               BLOCK_IN_SWITCH) or blocks intended to be freed
3913               (BLOCK_REASSIGNED).
3914             */
3915             if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3916                                    BLOCK_REASSIGNED)))
3917             {
3918               struct st_hash_link *UNINIT_VAR(next_hash_link);
3919               my_off_t UNINIT_VAR(next_diskpos);
3920               File UNINIT_VAR(next_file);
3921               uint UNINIT_VAR(next_status);
3922               uint UNINIT_VAR(hash_requests);
3923 
3924               total_found++;
3925               found++;
3926               KEYCACHE_DBUG_ASSERT(found <= keycache->blocks_used);
3927 
3928               /*
3929                 Register a request. This unlinks the block from the LRU
3930                 ring and protects it against eviction. This is required
3931                 by free_block().
3932               */
3933               reg_requests(keycache, block, 1);
3934 
3935               /*
3936                 free_block() may need to wait for readers of the block.
3937                 This is the moment where the other thread can move the
3938                 'next' block from the chain. free_block() needs to wait
3939                 if there are requests for the block pending.
3940               */
3941               if (next && (hash_requests= block->hash_link->requests))
3942               {
3943                 /* Copy values from the 'next' block and its hash_link. */
3944                 next_status=    next->status;
3945                 next_hash_link= next->hash_link;
3946                 next_diskpos=   next_hash_link->diskpos;
3947                 next_file=      next_hash_link->file;
3948                 DBUG_ASSERT(next == next_hash_link->block);
3949               }
3950 
3951               free_block(keycache, block);
3952               /*
3953                 If we had to wait and the state of the 'next' block
3954                 changed, break the inner loop. 'next' may no longer be
3955                 part of the current chain.
3956 
3957                 We do not want to break the loop after every free_block(),
3958                 not even only after waits. The chain might be quite long
3959                 and contain blocks for many files. Traversing it again and
3960                 again to find more blocks for this file could become quite
3961                 inefficient.
3962               */
3963               if (next && hash_requests &&
3964                   ((next_status    != next->status) ||
3965                    (next_hash_link != next->hash_link) ||
3966                    (next_file      != next_hash_link->file) ||
3967                    (next_diskpos   != next_hash_link->diskpos) ||
3968                    (next           != next_hash_link->block)))
3969                 break;
3970             }
3971             else
3972             {
3973               last_in_switch= block;
3974             }
3975           }
3976         } /* end for block in file_blocks */
3977       } while (found);
3978 
3979       /*
3980         If any clean block has been found, we may have waited for it to
3981         become free. In this case it could be possible that another clean
3982         block became dirty. This is possible if the write request existed
3983         before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3984       */
3985       if (total_found)
3986         goto restart;
3987 
3988       /*
3989         To avoid an infinite loop, wait until one of the blocks marked
3990         for update is updated.
3991       */
3992       if (last_for_update)
3993       {
3994         /* We did not wait. Block must not have changed status. */
3995         DBUG_ASSERT(last_for_update->status & BLOCK_FOR_UPDATE);
3996         wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3997                       &keycache->cache_lock);
3998         goto restart;
3999       }
4000 
4001       /*
4002         To avoid an infinite loop wait until one of the blocks marked
4003         for eviction is switched.
4004       */
4005       if (last_in_switch)
4006       {
4007         /* We did not wait. Block must not have changed status. */
4008         DBUG_ASSERT(last_in_switch->status & (BLOCK_IN_EVICTION |
4009                                               BLOCK_IN_SWITCH |
4010                                               BLOCK_REASSIGNED));
4011         wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
4012                       &keycache->cache_lock);
4013         goto restart;
4014       }
4015 
4016     } /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
4017 
4018   } /* if (keycache->disk_blocks > 0 */
4019 
4020 #ifndef DBUG_OFF
4021   DBUG_EXECUTE("check_keycache",
4022                test_key_cache(keycache, "end of flush_key_blocks", 0););
4023 #endif
4024 err:
4025   if (cache != cache_buff)
4026     my_free(cache);
4027   if (last_errno)
4028     errno=last_errno;                /* Return first error */
4029   DBUG_RETURN(last_errno != 0);
4030 }
4031 
4032 
4033 /*
4034   Flush all blocks for a file to disk
4035 
4036   SYNOPSIS
4037 
4038     flush_key_blocks()
4039       keycache            pointer to a key cache data structure
4040       file                handler for the file to flush to
4041       flush_type          type of the flush
4042 
4043   RETURN
4044     0   ok
4045     1  error
4046 */
4047 
flush_key_blocks(KEY_CACHE * keycache,File file,enum flush_type type)4048 int flush_key_blocks(KEY_CACHE *keycache,
4049                      File file, enum flush_type type)
4050 {
4051   int res= 0;
4052   DBUG_ENTER("flush_key_blocks");
4053   DBUG_PRINT("enter", ("keycache: 0x%lx", (long) keycache));
4054 
4055   if (!keycache->key_cache_inited)
4056     DBUG_RETURN(0);
4057 
4058   keycache_pthread_mutex_lock(&keycache->cache_lock);
4059   /* While waiting for lock, keycache could have been ended. */
4060   if (keycache->disk_blocks > 0)
4061   {
4062     inc_counter_for_resize_op(keycache);
4063     res= flush_key_blocks_int(keycache, file, type);
4064     dec_counter_for_resize_op(keycache);
4065   }
4066   keycache_pthread_mutex_unlock(&keycache->cache_lock);
4067   DBUG_RETURN(res);
4068 }
4069 
4070 
4071 /*
4072   Flush all blocks in the key cache to disk.
4073 
4074   SYNOPSIS
4075     flush_all_key_blocks()
4076       keycache                  pointer to key cache root structure
4077 
4078   DESCRIPTION
4079 
4080     Flushing of the whole key cache is done in two phases.
4081 
4082     1. Flush all changed blocks, waiting for them if necessary. Loop
4083     until there is no changed block left in the cache.
4084 
4085     2. Free all clean blocks. Normally this means free all blocks. The
4086     changed blocks were flushed in phase 1 and became clean. However we
4087     may need to wait for blocks that are read by other threads. While we
4088     wait, a clean block could become changed if that operation started
4089     before the resize operation started. To be safe we must restart at
4090     phase 1.
4091 
4092     When we can run through the changed_blocks and file_blocks hashes
4093     without finding a block any more, then we are done.
4094 
4095     Note that we hold keycache->cache_lock all the time unless we need
4096     to wait for something.
4097 
4098   RETURN
4099     0           OK
4100     != 0        Error
4101 */
4102 
flush_all_key_blocks(KEY_CACHE * keycache)4103 static int flush_all_key_blocks(KEY_CACHE *keycache)
4104 {
4105   BLOCK_LINK    *block;
4106   uint          total_found;
4107   uint          found;
4108   uint          idx;
4109   DBUG_ENTER("flush_all_key_blocks");
4110 
4111   do
4112   {
4113     mysql_mutex_assert_owner(&keycache->cache_lock);
4114     total_found= 0;
4115 
4116     /*
4117       Phase1: Flush all changed blocks, waiting for them if necessary.
4118       Loop until there is no changed block left in the cache.
4119     */
4120     do
4121     {
4122       found= 0;
4123       /* Step over the whole changed_blocks hash array. */
4124       for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4125       {
4126         /*
4127           If an array element is non-empty, use the first block from its
4128           chain to find a file for flush. All changed blocks for this
4129           file are flushed. So the same block will not appear at this
4130           place again with the next iteration. New writes for blocks are
4131           not accepted during the flush. If multiple files share the
4132           same hash bucket, one of them will be flushed per iteration
4133           of the outer loop of phase 1.
4134         */
4135         if ((block= keycache->changed_blocks[idx]))
4136         {
4137           found++;
4138           /*
4139             Flush dirty blocks but do not free them yet. They can be used
4140             for reading until all other blocks are flushed too.
4141           */
4142           if (flush_key_blocks_int(keycache, block->hash_link->file,
4143                                    FLUSH_FORCE_WRITE))
4144             DBUG_RETURN(1);
4145         }
4146       }
4147 
4148     } while (found);
4149 
4150     /*
4151       Phase 2: Free all clean blocks. Normally this means free all
4152       blocks. The changed blocks were flushed in phase 1 and became
4153       clean. However we may need to wait for blocks that are read by
4154       other threads. While we wait, a clean block could become changed
4155       if that operation started before the resize operation started. To
4156       be safe we must restart at phase 1.
4157     */
4158     do
4159     {
4160       found= 0;
4161       /* Step over the whole file_blocks hash array. */
4162       for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4163       {
4164         /*
4165           If an array element is non-empty, use the first block from its
4166           chain to find a file for flush. All blocks for this file are
4167           freed. So the same block will not appear at this place again
4168           with the next iteration. If multiple files share the
4169           same hash bucket, one of them will be flushed per iteration
4170           of the outer loop of phase 2.
4171         */
4172         if ((block= keycache->file_blocks[idx]))
4173         {
4174           total_found++;
4175           found++;
4176           if (flush_key_blocks_int(keycache, block->hash_link->file,
4177                                    FLUSH_RELEASE))
4178             DBUG_RETURN(1);
4179         }
4180       }
4181 
4182     } while (found);
4183 
4184     /*
4185       If any clean block has been found, we may have waited for it to
4186       become free. In this case it could be possible that another clean
4187       block became dirty. This is possible if the write request existed
4188       before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
4189     */
4190   } while (total_found);
4191 
4192 #ifndef DBUG_OFF
4193   /* Now there should not exist any block any more. */
4194   for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
4195   {
4196     DBUG_ASSERT(!keycache->changed_blocks[idx]);
4197     DBUG_ASSERT(!keycache->file_blocks[idx]);
4198   }
4199 #endif
4200 
4201   DBUG_RETURN(0);
4202 }
4203 
4204 
4205 /*
4206   Reset the counters of a key cache.
4207 
4208   SYNOPSIS
4209     reset_key_cache_counters()
4210     name       the name of a key cache
4211     key_cache  pointer to the key kache to be reset
4212 
4213   DESCRIPTION
4214    This procedure is used by process_key_caches() to reset the counters of all
4215    currently used key caches, both the default one and the named ones.
4216 
4217   RETURN
4218     0 on success (always because it can't fail)
4219 */
4220 
reset_key_cache_counters(const char * name MY_ATTRIBUTE ((unused)),KEY_CACHE * key_cache)4221 int reset_key_cache_counters(const char *name MY_ATTRIBUTE((unused)),
4222                              KEY_CACHE *key_cache)
4223 {
4224   DBUG_ENTER("reset_key_cache_counters");
4225   if (!key_cache->key_cache_inited)
4226   {
4227     DBUG_PRINT("info", ("Key cache %s not initialized.", name));
4228     DBUG_RETURN(0);
4229   }
4230   DBUG_PRINT("info", ("Resetting counters for key cache %s.", name));
4231 
4232   key_cache->global_blocks_changed= 0;   /* Key_blocks_not_flushed */
4233   key_cache->global_cache_r_requests= 0; /* Key_read_requests */
4234   key_cache->global_cache_read= 0;       /* Key_reads */
4235   key_cache->global_cache_w_requests= 0; /* Key_write_requests */
4236   key_cache->global_cache_write= 0;      /* Key_writes */
4237   DBUG_RETURN(0);
4238 }
4239 
4240 
4241 #ifndef DBUG_OFF
4242 /*
4243   Test if disk-cache is ok
4244 */
test_key_cache(KEY_CACHE * keycache MY_ATTRIBUTE ((unused)),const char * where MY_ATTRIBUTE ((unused)),my_bool lock MY_ATTRIBUTE ((unused)))4245 static void test_key_cache(KEY_CACHE *keycache MY_ATTRIBUTE((unused)),
4246                            const char *where MY_ATTRIBUTE((unused)),
4247                            my_bool lock MY_ATTRIBUTE((unused)))
4248 {
4249   /* TODO */
4250 }
4251 #endif
4252 
4253 #if defined(KEYCACHE_TIMEOUT)
4254 
4255 #define KEYCACHE_DUMP_FILE  "keycache_dump.txt"
4256 #define MAX_QUEUE_LEN  100
4257 
4258 
keycache_dump(KEY_CACHE * keycache)4259 static void keycache_dump(KEY_CACHE *keycache)
4260 {
4261   FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
4262   struct st_my_thread_var *last;
4263   struct st_my_thread_var *thread;
4264   BLOCK_LINK *block;
4265   HASH_LINK *hash_link;
4266   KEYCACHE_PAGE *page;
4267   uint i;
4268 
4269   fprintf(keycache_dump_file, "thread:%u\n", thread->id);
4270 
4271   i=0;
4272   thread=last=waiting_for_hash_link.last_thread;
4273   fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
4274   if (thread)
4275     do
4276     {
4277       thread=thread->next;
4278       page= (KEYCACHE_PAGE *) thread->opt_info;
4279       fprintf(keycache_dump_file,
4280               "thread:%u, (file,filepos)=(%u,%lu)\n",
4281               thread->id,(uint) page->file,(ulong) page->filepos);
4282       if (++i == MAX_QUEUE_LEN)
4283         break;
4284     }
4285     while (thread != last);
4286 
4287   i=0;
4288   thread=last=waiting_for_block.last_thread;
4289   fprintf(keycache_dump_file, "queue of threads waiting for block\n");
4290   if (thread)
4291     do
4292     {
4293       thread=thread->next;
4294       hash_link= (HASH_LINK *) thread->opt_info;
4295       fprintf(keycache_dump_file,
4296         "thread:%u hash_link:%u (file,filepos)=(%u,%lu)\n",
4297         thread->id, (uint) HASH_LINK_NUMBER(hash_link),
4298         (uint) hash_link->file,(ulong) hash_link->diskpos);
4299       if (++i == MAX_QUEUE_LEN)
4300         break;
4301     }
4302     while (thread != last);
4303 
4304   for (i=0 ; i< keycache->blocks_used ; i++)
4305   {
4306     int j;
4307     block= &keycache->block_root[i];
4308     hash_link= block->hash_link;
4309     fprintf(keycache_dump_file,
4310             "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
4311             i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1),
4312             block->status, block->requests, block->condvar ? 1 : 0);
4313     for (j=0 ; j < 2; j++)
4314     {
4315       KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
4316       thread= last= wqueue->last_thread;
4317       fprintf(keycache_dump_file, "queue #%d\n", j);
4318       if (thread)
4319       {
4320         do
4321         {
4322           thread=thread->next;
4323           fprintf(keycache_dump_file,
4324                   "thread:%u\n", thread->id);
4325           if (++i == MAX_QUEUE_LEN)
4326             break;
4327         }
4328         while (thread != last);
4329       }
4330     }
4331   }
4332   fprintf(keycache_dump_file, "LRU chain:");
4333   block= keycache= used_last;
4334   if (block)
4335   {
4336     do
4337     {
4338       block= block->next_used;
4339       fprintf(keycache_dump_file,
4340               "block:%u, ", BLOCK_NUMBER(block));
4341     }
4342     while (block != keycache->used_last);
4343   }
4344   fprintf(keycache_dump_file, "\n");
4345 
4346   fclose(keycache_dump_file);
4347 }
4348 
4349 #endif /* defined(KEYCACHE_TIMEOUT) */
4350 
4351 #if defined(KEYCACHE_TIMEOUT) && !defined(__WIN__)
4352 
4353 
keycache_pthread_cond_wait(mysql_cond_t * cond,mysql_mutex_t * mutex)4354 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4355                                       mysql_mutex_t *mutex)
4356 {
4357   int rc;
4358   struct timeval  now;            /* time when we started waiting        */
4359   struct timespec timeout;        /* timeout value for the wait function */
4360   struct timezone tz;
4361 #if defined(KEYCACHE_DEBUG)
4362   int cnt=0;
4363 #endif
4364 
4365   /* Get current time */
4366   gettimeofday(&now, &tz);
4367   /* Prepare timeout value */
4368   timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
4369  /*
4370    timeval uses microseconds.
4371    timespec uses nanoseconds.
4372    1 nanosecond = 1000 micro seconds
4373  */
4374   timeout.tv_nsec= now.tv_usec * 1000;
4375   KEYCACHE_THREAD_TRACE_END("started waiting");
4376 #if defined(KEYCACHE_DEBUG)
4377   cnt++;
4378   if (cnt % 100 == 0)
4379     fprintf(keycache_debug_log, "waiting...\n");
4380     fflush(keycache_debug_log);
4381 #endif
4382   rc= mysql_cond_timedwait(cond, mutex, &timeout);
4383   KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4384   if (rc == ETIMEDOUT || rc == ETIME)
4385   {
4386 #if defined(KEYCACHE_DEBUG)
4387     fprintf(keycache_debug_log,"aborted by keycache timeout\n");
4388     fclose(keycache_debug_log);
4389     abort();
4390 #endif
4391     keycache_dump();
4392   }
4393 
4394 #if defined(KEYCACHE_DEBUG)
4395   KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT);
4396 #else
4397   assert(rc != ETIMEDOUT);
4398 #endif
4399   return rc;
4400 }
4401 #else
4402 #if defined(KEYCACHE_DEBUG)
keycache_pthread_cond_wait(mysql_cond_t * cond,mysql_mutex_t * mutex)4403 static int keycache_pthread_cond_wait(mysql_cond_t *cond,
4404                                       mysql_mutex_t *mutex)
4405 {
4406   int rc;
4407   KEYCACHE_THREAD_TRACE_END("started waiting");
4408   rc= mysql_cond_wait(cond, mutex);
4409   KEYCACHE_THREAD_TRACE_BEGIN("finished waiting");
4410   return rc;
4411 }
4412 #endif
4413 #endif /* defined(KEYCACHE_TIMEOUT) && !defined(__WIN__) */
4414 
4415 #if defined(KEYCACHE_DEBUG)
4416 
4417 
keycache_pthread_mutex_lock(mysql_mutex_t * mutex)4418 static int keycache_pthread_mutex_lock(mysql_mutex_t *mutex)
4419 {
4420   int rc;
4421   rc= mysql_mutex_lock(mutex);
4422   KEYCACHE_THREAD_TRACE_BEGIN("");
4423   return rc;
4424 }
4425 
4426 
keycache_pthread_mutex_unlock(mysql_mutex_t * mutex)4427 static void keycache_pthread_mutex_unlock(mysql_mutex_t *mutex)
4428 {
4429   KEYCACHE_THREAD_TRACE_END("");
4430   mysql_mutex_unlock(mutex);
4431 }
4432 
4433 
keycache_pthread_cond_signal(mysql_cond_t * cond)4434 static int keycache_pthread_cond_signal(mysql_cond_t *cond)
4435 {
4436   int rc;
4437   KEYCACHE_THREAD_TRACE("signal");
4438   rc= mysql_cond_signal(cond);
4439   return rc;
4440 }
4441 
4442 
4443 #if defined(KEYCACHE_DEBUG_LOG)
4444 
4445 
keycache_debug_print(const char * fmt,...)4446 static void keycache_debug_print(const char * fmt,...)
4447 {
4448   va_list args;
4449   va_start(args,fmt);
4450   if (keycache_debug_log)
4451   {
4452     (void) vfprintf(keycache_debug_log, fmt, args);
4453     (void) fputc('\n',keycache_debug_log);
4454   }
4455   va_end(args);
4456 }
4457 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4458 
4459 #if defined(KEYCACHE_DEBUG_LOG)
4460 
4461 
keycache_debug_log_close(void)4462 void keycache_debug_log_close(void)
4463 {
4464   if (keycache_debug_log)
4465     fclose(keycache_debug_log);
4466 }
4467 #endif /* defined(KEYCACHE_DEBUG_LOG) */
4468 
4469 #endif /* defined(KEYCACHE_DEBUG) */
4470 
4471 #if !defined(DBUG_OFF)
4472 #define F_B_PRT(_f_, _v_) DBUG_PRINT("assert_fail", (_f_, _v_))
4473 
fail_block(BLOCK_LINK * block)4474 static int fail_block(BLOCK_LINK *block)
4475 {
4476   F_B_PRT("block->next_used:    %lx\n", (ulong) block->next_used);
4477   F_B_PRT("block->prev_used:    %lx\n", (ulong) block->prev_used);
4478   F_B_PRT("block->next_changed: %lx\n", (ulong) block->next_changed);
4479   F_B_PRT("block->prev_changed: %lx\n", (ulong) block->prev_changed);
4480   F_B_PRT("block->hash_link:    %lx\n", (ulong) block->hash_link);
4481   F_B_PRT("block->status:       %u\n", block->status);
4482   F_B_PRT("block->length:       %u\n", block->length);
4483   F_B_PRT("block->offset:       %u\n", block->offset);
4484   F_B_PRT("block->requests:     %u\n", block->requests);
4485   F_B_PRT("block->temperature:  %u\n", block->temperature);
4486   return 0; /* Let the assert fail. */
4487 }
4488 
fail_hlink(HASH_LINK * hlink)4489 static int fail_hlink(HASH_LINK *hlink)
4490 {
4491   F_B_PRT("hlink->next:    %lx\n", (ulong) hlink->next);
4492   F_B_PRT("hlink->prev:    %lx\n", (ulong) hlink->prev);
4493   F_B_PRT("hlink->block:   %lx\n", (ulong) hlink->block);
4494   F_B_PRT("hlink->diskpos: %lu\n", (ulong) hlink->diskpos);
4495   F_B_PRT("hlink->file:    %d\n", hlink->file);
4496   return 0; /* Let the assert fail. */
4497 }
4498 
cache_empty(KEY_CACHE * keycache)4499 static int cache_empty(KEY_CACHE *keycache)
4500 {
4501   int errcnt= 0;
4502   int idx;
4503   if (keycache->disk_blocks <= 0)
4504     return 1;
4505   for (idx= 0; idx < keycache->disk_blocks; idx++)
4506   {
4507     BLOCK_LINK *block= keycache->block_root + idx;
4508     if (block->status || block->requests || block->hash_link)
4509     {
4510       fprintf(stderr, "block index: %u\n", idx);
4511       fail_block(block);
4512       errcnt++;
4513     }
4514   }
4515   for (idx= 0; idx < keycache->hash_links; idx++)
4516   {
4517     HASH_LINK *hash_link= keycache->hash_link_root + idx;
4518     if (hash_link->requests || hash_link->block)
4519     {
4520       fprintf(stderr, "hash_link index: %u\n", idx);
4521       fail_hlink(hash_link);
4522       errcnt++;
4523     }
4524   }
4525   if (errcnt)
4526   {
4527     fprintf(stderr, "blocks: %d  used: %lu\n",
4528             keycache->disk_blocks, keycache->blocks_used);
4529     fprintf(stderr, "hash_links: %d  used: %d\n",
4530             keycache->hash_links, keycache->hash_links_used);
4531     fprintf(stderr, "\n");
4532   }
4533   return !errcnt;
4534 }
4535 #endif
4536 
4537