1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3 #ident "$Id$"
4 /*======
5 This file is part of PerconaFT.
6 
7 
8 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9 
10     PerconaFT is free software: you can redistribute it and/or modify
11     it under the terms of the GNU General Public License, version 2,
12     as published by the Free Software Foundation.
13 
14     PerconaFT is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
21 
22 ----------------------------------------
23 
24     PerconaFT is free software: you can redistribute it and/or modify
25     it under the terms of the GNU Affero General Public License, version 3,
26     as published by the Free Software Foundation.
27 
28     PerconaFT is distributed in the hope that it will be useful,
29     but WITHOUT ANY WARRANTY; without even the implied warranty of
30     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31     GNU Affero General Public License for more details.
32 
33     You should have received a copy of the GNU Affero General Public License
34     along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
35 ======= */
36 
37 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38 
39 #pragma once
40 
41 #include <fcntl.h>
42 
43 #include "ft/logger/logger.h"
44 #include "ft/serialize/block_table.h"
45 #include "ft/txn/txn.h"
46 #include "ft/ft-status.h"
47 #include "util/minicron.h"
48 
49 // Maintain a cache mapping from cachekeys to values (void*)
50 // Some of the keys can be pinned.  Don't pin too many or for too long.
51 // If the cachetable is too full, it will call the flush_callback() function with the key, the value, and the otherargs
52 // and then remove the key-value pair from the cache.
53 // The callback won't be any of the currently pinned keys.
54 // Also when flushing an object, the cachetable drops all references to it,
55 // so you may need to free() it.
56 // Note: The cachetable should use a common pool of memory, flushing things across cachetables.
57 //  (The first implementation doesn't)
58 // If you pin something twice, you must unpin it twice.
59 // table_size is the initial size of the cache table hash table (in number of entries)
60 // size limit is the upper bound of the sum of size of the entries in the cache table (total number of bytes)
61 
62 typedef BLOCKNUM CACHEKEY;
63 
64 class checkpointer;
65 typedef class checkpointer *CHECKPOINTER;
66 typedef struct cachetable *CACHETABLE;
67 typedef struct cachefile *CACHEFILE;
68 typedef struct ctpair *PAIR;
69 
70 // This struct hold information about values stored in the cachetable.
71 // As one can tell from the names, we are probably violating an
72 // abstraction layer by placing names.
73 //
74 // The purpose of having this struct is to have a way for the
75 // cachetable to accumulate the some totals we are interested in.
76 // Breaking this abstraction layer by having these names was the
77 // easiest way.
78 //
79 typedef struct pair_attr_s {
80     long size; // size PAIR's value takes in memory
81     long nonleaf_size; // size if PAIR is a nonleaf node, 0 otherwise, used only for engine status
82     long leaf_size; // size if PAIR is a leaf node, 0 otherwise, used only for engine status
83     long rollback_size; // size of PAIR is a rollback node, 0 otherwise, used only for engine status
84     long cache_pressure_size; // amount PAIR contributes to cache pressure, is sum of buffer sizes and workdone counts
85     bool is_valid;
86 } PAIR_ATTR;
87 
make_pair_attr(long size)88 static inline PAIR_ATTR make_pair_attr(long size) {
89     PAIR_ATTR result={
90         .size = size,
91         .nonleaf_size = 0,
92         .leaf_size = 0,
93         .rollback_size = 0,
94         .cache_pressure_size = 0,
95         .is_valid = true
96     };
97     return result;
98 }
99 
100 void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period);
101 uint32_t toku_get_cleaner_period_unlocked (CACHETABLE ct);
102 void toku_set_cleaner_iterations (CACHETABLE ct, uint32_t new_iterations);
103 uint32_t toku_get_cleaner_iterations (CACHETABLE ct);
104 uint32_t toku_get_cleaner_iterations_unlocked (CACHETABLE ct);
105 void toku_set_enable_partial_eviction (CACHETABLE ct, bool enabled);
106 bool toku_get_enable_partial_eviction (CACHETABLE ct);
107 
108 // cachetable operations
109 
110 // create and initialize a cache table
111 // size_limit is the upper limit on the size of the size of the values in the table
112 // pass 0 if you want the default
113 int toku_cachetable_create_ex(CACHETABLE *result, long size_limit,
114                            unsigned long client_pool_threads,
115                            unsigned long cachetable_pool_threads,
116                            unsigned long checkpoint_pool_threads,
117                            LSN initial_lsn, struct tokulogger *logger);
118 
119 #define toku_cachetable_create(r, s, l, o) \
120     toku_cachetable_create_ex(r, s, 0, 0, 0, l, o);
121 
122 // Create a new cachetable.
123 // Effects: a new cachetable is created and initialized.
124 // The cachetable pointer is stored into result.
125 // The sum of the sizes of the memory objects is set to size_limit, in whatever
126 // units make sense to the user of the cachetable.
127 // Returns: If success, returns 0 and result points to the new cachetable. Otherwise,
128 // returns an error number.
129 
130 // Returns a pointer to the checkpointer within the given cachetable.
131 CHECKPOINTER toku_cachetable_get_checkpointer(CACHETABLE ct);
132 
133 // What is the cachefile that goes with a particular filenum?
134 // During a transaction, we cannot reuse a filenum.
135 int toku_cachefile_of_filenum (CACHETABLE t, FILENUM filenum, CACHEFILE *cf);
136 
137 // What is the cachefile that goes with a particular iname (relative to env)?
138 // During a transaction, we cannot reuse an iname.
139 int toku_cachefile_of_iname_in_env (CACHETABLE ct, const char *iname_in_env, CACHEFILE *cf);
140 
141 // Get the iname (within the cwd) associated with the cachefile
142 // Return the filename
143 char *toku_cachefile_fname_in_cwd (CACHEFILE cf);
144 
145 void toku_cachetable_begin_checkpoint (CHECKPOINTER cp, struct tokulogger *logger);
146 
147 void toku_cachetable_end_checkpoint(CHECKPOINTER cp, struct tokulogger *logger,
148                                    void (*testcallback_f)(void*),  void * testextra);
149 
150 
151 // Shuts down checkpoint thread
152 // Requires no locks be held that are taken by the checkpoint function
153 void toku_cachetable_minicron_shutdown(CACHETABLE ct);
154 
155 // Prepare to close the cachetable.  This informs the cachetable that it is about to be closed
156 // so that it can tune its checkpoint resource use.
157 void toku_cachetable_prepare_close(CACHETABLE ct);
158 
159 // Close the cachetable.
160 // Effects: All of the memory objects are flushed to disk, and the cachetable is destroyed.
161 void toku_cachetable_close(CACHETABLE *ct);
162 
163 // Open a file and bind the file to a new cachefile object. (For use by test programs only.)
164 int toku_cachetable_openf(CACHEFILE *,CACHETABLE, const char *fname_in_env, int flags, mode_t mode);
165 
166 // Bind a file to a new cachefile object.
167 int toku_cachetable_openfd(CACHEFILE *,CACHETABLE, int fd,
168                             const char *fname_relative_to_env);
169 int toku_cachetable_openfd_with_filenum (CACHEFILE *,CACHETABLE, int fd,
170                                          const char *fname_in_env,
171                                          FILENUM filenum, bool* was_open);
172 
173 // reserve a unique filenum
174 FILENUM toku_cachetable_reserve_filenum(CACHETABLE ct);
175 
176 // Effect: Reserve a fraction of the cachetable memory.
177 // Returns the amount reserved.
178 // To return the memory to the cachetable, call toku_cachetable_release_reserved_memory
179 // Requires 0<fraction<1.
180 uint64_t toku_cachetable_reserve_memory(CACHETABLE, double fraction, uint64_t upper_bound);
181 void toku_cachetable_release_reserved_memory(CACHETABLE, uint64_t);
182 
183 // cachefile operations
184 
185 // Does an fsync of a cachefile.
186 void toku_cachefile_fsync(CACHEFILE cf);
187 
188 enum partial_eviction_cost {
189     PE_CHEAP=0, // running partial eviction is cheap, and can be done on the client thread
190     PE_EXPENSIVE=1, // running partial eviction is expensive, and should not be done on the client thread
191 };
192 
193 // cachetable pair clean or dirty WRT external memory
194 enum cachetable_dirty {
195     CACHETABLE_CLEAN=0, // the cached object is clean WRT the cachefile
196     CACHETABLE_DIRTY=1, // the cached object is dirty WRT the cachefile
197 };
198 
199 // The flush callback is called when a key value pair is being written to storage and possibly removed from the cachetable.
200 // When write_me is true, the value should be written to storage.
201 // When keep_me is false, the value should be freed.
202 // When for_checkpoint is true, this was a 'pending' write
203 // Returns: 0 if success, otherwise an error number.
204 // Can access fd (fd is protected by a readlock during call)
205 typedef void (*CACHETABLE_FLUSH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, void *value, void **disk_data, void *write_extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, bool write_me, bool keep_me, bool for_checkpoint, bool is_clone);
206 
207 // The fetch callback is called when a thread is attempting to get and pin a memory
208 // object and it is not in the cachetable.
209 // Returns: 0 if success, otherwise an error number.  The address and size of the object
210 // associated with the key are returned.
211 // Can access fd (fd is protected by a readlock during call)
212 typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, PAIR p, int fd, CACHEKEY key, uint32_t fullhash, void **value_data, void **disk_data, PAIR_ATTR *sizep, int *dirtyp, void *read_extraargs);
213 
214 // The cachetable calls the partial eviction estimate callback to determine if
215 // partial eviction is a cheap operation that may be called by on the client thread
216 // or whether partial eviction is expensive and should be done on a background (writer) thread.
217 // The callback conveys this information by setting cost to either PE_CHEAP or PE_EXPENSIVE.
218 // If cost is PE_EXPENSIVE, then the callback also sets bytes_freed_estimate
219 // to return an estimate of the number of bytes it will free
220 // so that the cachetable can estimate how much data is being evicted on background threads.
221 // If cost is PE_CHEAP, then the callback does not set bytes_freed_estimate.
222 typedef void (*CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK)(void *ftnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void *write_extraargs);
223 
224 // The cachetable calls the partial eviction callback is to possibly try and partially evict pieces
225 // of the PAIR. The callback determines the strategy for what to evict. The callback may choose to free
226 // nothing, or may choose to free as much as possible. When the partial eviction callback is finished,
227 // it must call finalize with the new PAIR_ATTR and the given finalize_extra. After this point, the
228 // write lock will be released on the PAIR and it is no longer safe to operate on any of the passed arguments.
229 // This is useful for doing expensive cleanup work outside of the PAIR's write lock (such as destroying objects, etc)
230 //
231 // on entry, requires a write lock to be held on the PAIR in the cachetable while this function is called
232 // on exit, the finalize continuation is called
233 typedef int (*CACHETABLE_PARTIAL_EVICTION_CALLBACK)(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
234                                                     void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra);
235 
236 // The cachetable calls this function to determine if get_and_pin call requires a partial fetch. If this function returns true,
237 // then the cachetable will subsequently call CACHETABLE_PARTIAL_FETCH_CALLBACK to perform
238 // a partial fetch. If this function returns false, then the PAIR's value is returned to the caller as is.
239 //
240 // An alternative to having this callback is to always call CACHETABLE_PARTIAL_FETCH_CALLBACK, and let
241 // CACHETABLE_PARTIAL_FETCH_CALLBACK decide whether to do any partial fetching or not.
242 // There is no particular reason why this alternative was not chosen.
243 // Requires: a read lock to be held on the PAIR
244 typedef bool (*CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK)(void *ftnode_pv, void *read_extraargs);
245 
246 // The cachetable calls the partial fetch callback when a thread needs to read or decompress a subset of a PAIR into memory.
247 // An example is needing to read a basement node into memory. Another example is decompressing an internal node's
248 // message buffer. The cachetable determines if a partial fetch is necessary by first calling CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK.
249 // The new PAIR_ATTR of the PAIR is returned in sizep
250 // Can access fd (fd is protected by a readlock during call)
251 // Returns: 0 if success, otherwise an error number.
252 typedef int (*CACHETABLE_PARTIAL_FETCH_CALLBACK)(void *value_data, void* disk_data, void *read_extraargs, int fd, PAIR_ATTR *sizep);
253 
254 // The cachetable calls the put callback during a cachetable_put command to provide the opaque PAIR.
255 // The PAIR can then be used to later unpin the pair.
256 // Returns: 0 if success, otherwise an error number.
257 typedef void (*CACHETABLE_PUT_CALLBACK)(CACHEKEY key, void *value_data, PAIR p);
258 
259 // TODO(leif) XXX TODO XXX
260 typedef int (*CACHETABLE_CLEANER_CALLBACK)(void *ftnode_pv, BLOCKNUM blocknum, uint32_t fullhash, void *write_extraargs);
261 
262 typedef void (*CACHETABLE_CLONE_CALLBACK)(void* value_data, void** cloned_value_data, long* clone_size, PAIR_ATTR* new_attr, bool for_checkpoint, void* write_extraargs);
263 
264 typedef void (*CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK)(void *value_data);
265 
266 typedef struct {
267     CACHETABLE_FLUSH_CALLBACK flush_callback;
268     CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback;
269     CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback;
270     CACHETABLE_CLEANER_CALLBACK cleaner_callback;
271     CACHETABLE_CLONE_CALLBACK clone_callback;
272     CACHETABLE_CHECKPOINT_COMPLETE_CALLBACK checkpoint_complete_callback;
273     void* write_extraargs; // parameter for flush_callback, pe_est_callback, pe_callback, and cleaner_callback
274 } CACHETABLE_WRITE_CALLBACK;
275 
276 typedef void (*CACHETABLE_GET_KEY_AND_FULLHASH)(CACHEKEY* cachekey, uint32_t* fullhash, void* extra);
277 
278 typedef void (*CACHETABLE_REMOVE_KEY)(CACHEKEY* cachekey, bool for_checkpoint, void* extra);
279 
280 void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata,
281     void (*log_fassociate_during_checkpoint)(CACHEFILE, void*),
282     void (*close_userdata)(CACHEFILE, int, void*, bool, LSN),
283     void (*free_userdata)(CACHEFILE, void*),
284     void (*checkpoint_userdata)(CACHEFILE, int, void*),
285     void (*begin_checkpoint_userdata)(LSN, void*),
286     void (*end_checkpoint_userdata)(CACHEFILE, int, void*),
287     void (*note_pin_by_checkpoint)(CACHEFILE, void*),
288     void (*note_unpin_by_checkpoint)(CACHEFILE, void*));
289 // Effect: Store some cachefile-specific user data.  When the last reference to a cachefile is closed, we call close_userdata().
290 // Before starting a checkpoint, we call checkpoint_prepare_userdata().
291 // When the cachefile needs to be checkpointed, we call checkpoint_userdata().
292 // If userdata is already non-NULL, then we simply overwrite it.
293 
294 void *toku_cachefile_get_userdata(CACHEFILE);
295 // Effect: Get the user data.
296 
297 CACHETABLE toku_cachefile_get_cachetable(CACHEFILE cf);
298 // Effect: Get the cachetable.
299 
300 CACHEFILE toku_pair_get_cachefile(PAIR);
301 // Effect: Get the cachefile of the pair
302 
303 void toku_cachetable_swap_pair_values(PAIR old_pair, PAIR new_pair);
304 // Effect: Swaps the value_data of old_pair and new_pair.
305 // Requires: both old_pair and new_pair to be pinned with write locks.
306 
307 typedef enum {
308     PL_READ = 0,
309     PL_WRITE_CHEAP,
310     PL_WRITE_EXPENSIVE
311 } pair_lock_type;
312 
313 // put something into the cachetable and checkpoint dependent pairs
314 // if the checkpointing is necessary
315 void toku_cachetable_put_with_dep_pairs(
316     CACHEFILE cachefile,
317     CACHETABLE_GET_KEY_AND_FULLHASH get_key_and_fullhash,
318     void *value,
319     PAIR_ATTR attr,
320     CACHETABLE_WRITE_CALLBACK write_callback,
321     void *get_key_and_fullhash_extra,
322     uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint
323     PAIR* dependent_pairs,
324     enum cachetable_dirty* dependent_dirty, // array stating dirty/cleanness of dependent pairs
325     CACHEKEY* key,
326     uint32_t* fullhash,
327     CACHETABLE_PUT_CALLBACK put_callback
328     );
329 
330 // Put a memory object into the cachetable.
331 // Effects: Lookup the key in the cachetable. If the key is not in the cachetable,
332 // then insert the pair and pin it. Otherwise return an error.  Some of the key
333 // value pairs may be evicted from the cachetable when the cachetable gets too big.
334 void toku_cachetable_put(CACHEFILE cf, CACHEKEY key, uint32_t fullhash,
335 			void *value, PAIR_ATTR size,
336 			CACHETABLE_WRITE_CALLBACK write_callback,
337                         CACHETABLE_PUT_CALLBACK put_callback
338                         );
339 
340 // Get and pin the memory object of a PAIR, and write dependent pairs to disk
341 // if the dependent pairs are pending a checkpoint.
342 // Effects: If the memory object is in the cachetable, acquire a PAIR lock on it.
343 // Otherwise, fetch it from storage by calling the fetch callback.  If the fetch
344 // succeeded, add the memory object to the cachetable with a PAIR lock on it.
345 // Before returning to the user, if the PAIR object being retrieved, or any of the
346 // dependent pairs passed in as parameters must be written to disk for checkpoint,
347 // then the required PAIRs are written to disk for checkpoint.
348 // KEY PROPERTY OF DEPENDENT PAIRS: They are already locked by the client
349 // Returns: 0 if the memory object is in memory, otherwise an error number.
350 int toku_cachetable_get_and_pin_with_dep_pairs (
351     CACHEFILE cachefile,
352     CACHEKEY key,
353     uint32_t fullhash,
354     void**value,
355     CACHETABLE_WRITE_CALLBACK write_callback,
356     CACHETABLE_FETCH_CALLBACK fetch_callback,
357     CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback,
358     CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
359     pair_lock_type lock_type,
360     void* read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback
361     uint32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint
362     PAIR* dependent_pairs,
363     enum cachetable_dirty* dependent_dirty // array stating dirty/cleanness of dependent pairs
364     );
365 
366 // Get and pin a memory object.
367 // Effects: If the memory object is in the cachetable acquire the PAIR lock on it.
368 // Otherwise, fetch it from storage by calling the fetch callback.  If the fetch
369 // succeeded, add the memory object to the cachetable with a read lock on it.
370 // Returns: 0 if the memory object is in memory, otherwise an error number.
371 int toku_cachetable_get_and_pin (
372     CACHEFILE cachefile,
373     CACHEKEY key,
374     uint32_t fullhash,
375     void**value,
376     CACHETABLE_WRITE_CALLBACK write_callback,
377     CACHETABLE_FETCH_CALLBACK fetch_callback,
378     CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback,
379     CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
380     bool may_modify_value,
381     void* read_extraargs // parameter for fetch_callback, pf_req_callback, and pf_callback
382     );
383 
384 // does partial fetch on a pinned pair
385 void toku_cachetable_pf_pinned_pair(
386     void* value,
387     CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
388     void* read_extraargs,
389     CACHEFILE cf,
390     CACHEKEY key,
391     uint32_t fullhash
392     );
393 
394 struct unlockers {
395     bool       locked;
396     void (*f)(void* extra);
397     void      *extra;
398     struct unlockers *next;
399 };
400 typedef struct unlockers *UNLOCKERS;
401 
402 // Effect:  If the block is in the cachetable, then return it.
403 //   Otherwise call the functions in unlockers, fetch the data (but don't pin it, since we'll just end up pinning it again later), and return TOKUDB_TRY_AGAIN.
404 int toku_cachetable_get_and_pin_nonblocking (
405     CACHEFILE cf,
406     CACHEKEY key,
407     uint32_t fullhash,
408     void**value,
409     CACHETABLE_WRITE_CALLBACK write_callback,
410     CACHETABLE_FETCH_CALLBACK fetch_callback,
411     CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback,
412     CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
413     pair_lock_type lock_type,
414     void *read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback
415     UNLOCKERS unlockers
416     );
417 
418 int toku_cachetable_maybe_get_and_pin (CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, pair_lock_type, void**);
419 // Effect: Maybe get and pin a memory object.
420 //  This function is similar to the get_and_pin function except that it
421 //  will not attempt to fetch a memory object that is not in the cachetable or requires any kind of blocking to get it.
422 // Returns: If the the item is already in memory, then return 0 and store it in the
423 // void**.  If the item is not in memory, then return a nonzero error number.
424 
425 int toku_cachetable_maybe_get_and_pin_clean (CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, pair_lock_type, void**);
426 // Effect: Like maybe get and pin, but may pin a clean pair.
427 
428 int toku_cachetable_get_attr(CACHEFILE, CACHEKEY, uint32_t /*fullhash*/, PAIR_ATTR *);
429 // Effect: get the attributes for cachekey
430 // Returns: 0 if success, non-zero if cachekey is not cached
431 // Notes: this function exists for tests
432 
433 int toku_cachetable_unpin(CACHEFILE, PAIR, enum cachetable_dirty dirty, PAIR_ATTR size);
434 // Effect: Unpin a memory object
435 // Modifies: If the memory object is in the cachetable, then OR the dirty flag,
436 // update the size, and release the read lock on the memory object.
437 // Returns: 0 if success, otherwise returns an error number.
438 // Requires: The ct is locked.
439 
440 int toku_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE, PAIR, enum cachetable_dirty dirty, PAIR_ATTR size);
441 // Effect: The same as tokud_cachetable_unpin, except that the ct must not be locked.
442 // Requires: The ct is NOT locked.
443 
444 int toku_cachetable_unpin_and_remove (CACHEFILE, PAIR, CACHETABLE_REMOVE_KEY, void*); /* Removing something already present is OK. */
445 // Effect: Remove an object from the cachetable.  Don't write it back.
446 // Requires: The object must be pinned exactly once.
447 
448 // test-only wrapper that use CACHEKEY and fullhash
449 int toku_test_cachetable_unpin(CACHEFILE, CACHEKEY, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR size);
450 
451 // test-only wrapper that use CACHEKEY and fullhash
452 int toku_test_cachetable_unpin_ct_prelocked_no_flush(CACHEFILE, CACHEKEY, uint32_t fullhash, enum cachetable_dirty dirty, PAIR_ATTR size);
453 
454 // test-only wrapper that use CACHEKEY
455 int toku_test_cachetable_unpin_and_remove (CACHEFILE, CACHEKEY, CACHETABLE_REMOVE_KEY, void*); /* Removing something already present is OK. */
456 
457 int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, uint32_t fullhash,
458                             CACHETABLE_WRITE_CALLBACK write_callback,
459                             CACHETABLE_FETCH_CALLBACK fetch_callback,
460                             CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback,
461                             CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback,
462                             void *read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback
463                             bool *doing_prefetch);
464 // Effect: Prefetch a memory object for a given key into the cachetable
465 // Precondition: The cachetable mutex is NOT held.
466 // Postcondition: The cachetable mutex is NOT held.
467 // Returns: 0 if success
468 // Implement Note:
469 //  1) The pair's rwlock is acquired (for write) (there is not a deadlock here because the rwlock is a pthread_cond_wait using the cachetable mutex).
470 //  Case A:  Single-threaded.
471 //    A1)  Call cachetable_fetch_pair, which
472 //      a) Obtains a readlock on the cachefile's fd (to prevent multipler readers at once)
473 //      b) Unlocks the cachetable
474 //      c) Does the fetch off disk.
475 //      d) Locks the cachetable
476 //      e) Unlocks the fd lock.
477 //      f) Unlocks the pair rwlock.
478 //  Case B: Multithreaded
479 //      a) Enqueue a cachetable_reader into the workqueue.
480 //      b) Unlock the cache table.
481 //      c) The enqueue'd job later locks the cachetable, and calls cachetable_fetch_pair (doing the steps in A1 above).
482 
483 int toku_cachetable_assert_all_unpinned (CACHETABLE);
484 
485 int toku_cachefile_count_pinned (CACHEFILE, int /*printthem*/ );
486 
487 // Close the cachefile.
488 // Effects: All of the cached object associated with the cachefile are evicted from
489 // the cachetable.  The flush callback is called for each of these objects.  The
490 // close function does not return until all of the objects are evicted.  The cachefile
491 // object is freed.
492 // If oplsn_valid is true then use oplsn as the LSN of the close instead of asking the logger.  oplsn_valid being true is only allowed during recovery, and requires that you are removing the last reference (otherwise the lsn wouldn't make it in.)
493 void toku_cachefile_close (CACHEFILE*, bool oplsn_valid, LSN oplsn);
494 
495 // Return on success (different from pread and pwrite)
496 //int cachefile_pwrite (CACHEFILE, const void *buf, size_t count, toku_off_t offset);
497 //int cachefile_pread  (CACHEFILE, void *buf, size_t count, toku_off_t offset);
498 
499 // Get the file descriptor associated with the cachefile
500 // Return the file descriptor
501 // Grabs a read lock protecting the fd
502 int toku_cachefile_get_fd (CACHEFILE);
503 
504 // Get the iname (within the environment) associated with the cachefile
505 // Return the filename
506 char * toku_cachefile_fname_in_env (CACHEFILE cf);
507 
508 void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env);
509 
510 // Make it so when the cachefile closes, the underlying file is unlinked
511 void toku_cachefile_unlink_on_close(CACHEFILE cf);
512 
513 // is this cachefile marked as unlink on close?
514 bool toku_cachefile_is_unlink_on_close(CACHEFILE cf);
515 
516 void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf);
517 void toku_cachefile_do_log_recover_on_close(CACHEFILE cf);
518 bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf);
519 
520 // Return the logger associated with the cachefile
521 struct tokulogger *toku_cachefile_logger(CACHEFILE cf);
522 
523 // Return the filenum associated with the cachefile
524 FILENUM toku_cachefile_filenum(CACHEFILE cf);
525 
526 // Effect: Return a 32-bit hash key.  The hash key shall be suitable for using with bitmasking for a table of size power-of-two.
527 uint32_t toku_cachetable_hash(CACHEFILE cf, CACHEKEY key);
528 
529 uint32_t toku_cachefile_fullhash_of_header(CACHEFILE cf);
530 
531 // debug functions
532 
533 // Print the contents of the cachetable. This is mainly used from gdb
534 void toku_cachetable_print_state (CACHETABLE ct);
535 
536 // Get the state of the cachetable. This is used to verify the cachetable
537 void toku_cachetable_get_state(CACHETABLE ct, int *num_entries_ptr, int *hash_size_ptr, long *size_current_ptr, long *size_limit_ptr);
538 
539 // Get the state of a cachetable entry by key. This is used to verify the cachetable
540 int toku_cachetable_get_key_state(CACHETABLE ct, CACHEKEY key, CACHEFILE cf,
541                                   void **value_ptr,
542 				  int *dirty_ptr,
543                                   long long *pin_ptr,
544                                   long *size_ptr);
545 
546 // Verify the whole cachetable that the cachefile is in.  Slow.
547 void toku_cachefile_verify (CACHEFILE cf);
548 
549 // Verify the cachetable. Slow.
550 void toku_cachetable_verify (CACHETABLE t);
551 
552 // Not for use in production, but useful for testing.
553 void toku_cachetable_print_hash_histogram (void) __attribute__((__visibility__("default")));
554 
555 void toku_cachetable_maybe_flush_some(CACHETABLE ct);
556 
557 // for stat64
558 uint64_t toku_cachefile_size(CACHEFILE cf);
559 
560 void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s);
561 
562 void toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir);
563 char * toku_construct_full_name(int count, ...);
564 char * toku_cachetable_get_fname_in_cwd(CACHETABLE ct, const char * fname_in_env);
565 
566 void cachefile_kibbutz_enq (CACHEFILE cf, void (*f)(void*), void *extra);
567 // Effect: Add a job to the cachetable's collection of work to do.  Note that function f must call remove_background_job_from_cf()
568 
569 void remove_background_job_from_cf (CACHEFILE cf);
570 // Effect: When a kibbutz job or cleaner thread finishes in a cachefile,
571 // the cachetable must be notified.
572 
573 // test-only function
574 int toku_cachetable_get_checkpointing_user_data_status(void);
575 
576 // test-only function
577 int toku_cleaner_thread_for_test(CACHETABLE ct);
578 int toku_cleaner_thread(void *cleaner_v);
579 
580 // test function. Exported in the ydb layer and used by tests that want to run DRD
581 // The default of 1M is too high for drd tests, so this is a mechanism to set a smaller number.
582 void toku_pair_list_set_lock_size(uint32_t num_locks);
583 
584 // Used by ft-ops.cc to figure out if it has the write lock on a pair.
585 // Pretty hacky and not accurate enough, should be improved at the frwlock
586 // layer.
587 __attribute__((const,nonnull))
588 bool toku_ctpair_is_write_locked(PAIR pair);
589