1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * Copyright by The HDF Group.                                               *
3  * Copyright by the Board of Trustees of the University of Illinois.         *
4  * All rights reserved.                                                      *
5  *                                                                           *
6  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
7  * terms governing use, modification, and redistribution, is contained in    *
8  * the COPYING file, which can be found at the root of the source code       *
9  * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
10  * If you do not have access to either file, you may request a copy from     *
11  * help@hdfgroup.org.                                                        *
12  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13 
14 /*-------------------------------------------------------------------------
15  *
16  * Created:             H5PB.c
17  *
18  * Purpose:             Page Buffer routines.
19  *
20  *-------------------------------------------------------------------------
21  */
22 
23 /****************/
24 /* Module Setup */
25 /****************/
26 
27 #define H5F_FRIEND        /*suppress error about including H5Fpkg      */
28 #include "H5PBmodule.h"         /* This source code file is part of the H5PB module */
29 
30 
31 /***********/
32 /* Headers */
33 /***********/
34 #include "H5private.h"        /* Generic Functions            */
35 #include "H5Eprivate.h"        /* Error handling              */
36 #include "H5Fpkg.h"        /* Files                */
37 #include "H5FDprivate.h"    /* File drivers                */
38 #include "H5Iprivate.h"        /* IDs                      */
39 #include "H5PBpkg.h"            /* File access                */
40 #include "H5SLprivate.h"    /* Skip List                */
41 
42 
43 /****************/
44 /* Local Macros */
45 /****************/
46 #define H5PB__PREPEND(page_ptr, head_ptr, tail_ptr, len) {              \
47         if((head_ptr) == NULL) {                                        \
48             (head_ptr) = (page_ptr);                                    \
49             (tail_ptr) = (page_ptr);                                    \
50         } /* end if */                                                  \
51         else {                                                          \
52             (head_ptr)->prev = (page_ptr);                              \
53             (page_ptr)->next = (head_ptr);                              \
54             (head_ptr) = (page_ptr);                                    \
55         } /* end else */                                                \
56         (len)++;                                                        \
57 } /* H5PB__PREPEND() */
58 
59 #define H5PB__REMOVE(page_ptr, head_ptr, tail_ptr, len) {               \
60         if((head_ptr) == (page_ptr)) {                                  \
61             (head_ptr) = (page_ptr)->next;                              \
62             if((head_ptr) != NULL)                                      \
63                 (head_ptr)->prev = NULL;                                \
64         } /* end if */                                                  \
65         else                                                            \
66             (page_ptr)->prev->next = (page_ptr)->next;                  \
67         if((tail_ptr) == (page_ptr)) {                                  \
68             (tail_ptr) = (page_ptr)->prev;                              \
69             if((tail_ptr) != NULL)                                      \
70                 (tail_ptr)->next = NULL;                                \
71         } /* end if */                                                  \
72         else                                                            \
73             (page_ptr)->next->prev = (page_ptr)->prev;                  \
74         page_ptr->next = NULL;                                          \
75         page_ptr->prev = NULL;                                          \
76         (len)--;                                                        \
77 }
78 
79 #define H5PB__INSERT_LRU(page_buf, page_ptr) {                          \
80         HDassert(page_buf);                                             \
81         HDassert(page_ptr);                                             \
82         /* insert the entry at the head of the list. */                 \
83         H5PB__PREPEND((page_ptr), (page_buf)->LRU_head_ptr,             \
84                       (page_buf)->LRU_tail_ptr, (page_buf)->LRU_list_len) \
85 }
86 
87 #define H5PB__REMOVE_LRU(page_buf, page_ptr) {                          \
88         HDassert(page_buf);                                             \
89         HDassert(page_ptr);                                             \
90         /* remove the entry from the list. */                           \
91         H5PB__REMOVE((page_ptr), (page_buf)->LRU_head_ptr,              \
92                      (page_buf)->LRU_tail_ptr, (page_buf)->LRU_list_len) \
93 }
94 
95 #define H5PB__MOVE_TO_TOP_LRU(page_buf, page_ptr) {                     \
96         HDassert(page_buf);                                             \
97         HDassert(page_ptr);                                             \
98         /* Remove entry and insert at the head of the list. */          \
99         H5PB__REMOVE((page_ptr), (page_buf)->LRU_head_ptr,              \
100                      (page_buf)->LRU_tail_ptr, (page_buf)->LRU_list_len) \
101         H5PB__PREPEND((page_ptr), (page_buf)->LRU_head_ptr,             \
102                        (page_buf)->LRU_tail_ptr, (page_buf)->LRU_list_len) \
103 }
104 
105 
106 /******************/
107 /* Local Typedefs */
108 /******************/
109 
110 /* Iteration context for destroying page buffer */
111 typedef struct {
112     H5PB_t *page_buf;
113     hbool_t actual_slist;
114 } H5PB_ud1_t;
115 
116 
117 /********************/
118 /* Package Typedefs */
119 /********************/
120 
121 
122 /********************/
123 /* Local Prototypes */
124 /********************/
125 static herr_t H5PB__insert_entry(H5PB_t *page_buf, H5PB_entry_t *page_entry);
126 static htri_t H5PB__make_space(H5F_t *f, H5PB_t *page_buf, H5FD_mem_t inserted_type);
127 static herr_t H5PB__write_entry(H5F_t *f, H5PB_entry_t *page_entry);
128 
129 
130 /*********************/
131 /* Package Variables */
132 /*********************/
133 
134 /* Package initialization variable */
135 hbool_t H5_PKG_INIT_VAR = FALSE;
136 
137 
138 /*****************************/
139 /* Library Private Variables */
140 /*****************************/
141 
142 
143 /*******************/
144 /* Local Variables */
145 /*******************/
146 /* Declare a free list to manage the H5PB_t struct */
147 H5FL_DEFINE_STATIC(H5PB_t);
148 
149 /* Declare a free list to manage the H5PB_entry_t struct */
150 H5FL_DEFINE_STATIC(H5PB_entry_t);
151 
152 
153 
154 /*-------------------------------------------------------------------------
155  * Function:    H5PB_reset_stats
156  *
157  * Purpose:     This function was created without documentation.
158  *              What follows is my best understanding of Mohamad's intent.
159  *
160  *              Reset statistics collected for the page buffer layer.
161  *
162  * Return:      Non-negative on success/Negative on failure
163  *
164  * Programmer:    Mohamad Chaarawi
165  *
166  *-------------------------------------------------------------------------
167  */
168 herr_t
H5PB_reset_stats(H5PB_t * page_buf)169 H5PB_reset_stats(H5PB_t *page_buf)
170 {
171     FUNC_ENTER_NOAPI_NOERR
172 
173     /* Sanity checks */
174     HDassert(page_buf);
175 
176     page_buf->accesses[0] = 0;
177     page_buf->accesses[1] = 0;
178     page_buf->hits[0] = 0;
179     page_buf->hits[1] = 0;
180     page_buf->misses[0] = 0;
181     page_buf->misses[1] = 0;
182     page_buf->evictions[0] = 0;
183     page_buf->evictions[1] = 0;
184     page_buf->bypasses[0] = 0;
185     page_buf->bypasses[1] = 0;
186 
187     FUNC_LEAVE_NOAPI(SUCCEED)
188 }  /* H5PB_reset_stats() */
189 
190 
191 /*-------------------------------------------------------------------------
192  * Function:    H5PB_get_stats
193  *
194  * Purpose:     This function was created without documentation.
195  *              What follows is my best understanding of Mohamad's intent.
196  *
197  *              Retrieve statistics collected about page accesses for the page buffer layer.
198  *              --accesses: the number of metadata and raw data accesses to the page buffer layer
199  *              --hits: the number of metadata and raw data hits in the page buffer layer
200  *              --misses: the number of metadata and raw data misses in the page buffer layer
201  *              --evictions: the number of metadata and raw data evictions from the page buffer layer
202  *              --bypasses: the number of metadata and raw data accesses that bypass the page buffer layer
203  *
204  * Return:        Non-negative on success/Negative on failure
205  *
206  * Programmer:    Mohamad Chaarawi
207  *
208  *-------------------------------------------------------------------------
209  */
210 herr_t
H5PB_get_stats(const H5PB_t * page_buf,unsigned accesses[2],unsigned hits[2],unsigned misses[2],unsigned evictions[2],unsigned bypasses[2])211 H5PB_get_stats(const H5PB_t *page_buf, unsigned accesses[2], unsigned hits[2],
212     unsigned misses[2], unsigned evictions[2], unsigned bypasses[2])
213 {
214     FUNC_ENTER_NOAPI_NOERR
215 
216     /* Sanity checks */
217     HDassert(page_buf);
218 
219     accesses[0] = page_buf->accesses[0];
220     accesses[1] = page_buf->accesses[1];
221     hits[0] = page_buf->hits[0];
222     hits[1] = page_buf->hits[1];
223     misses[0] = page_buf->misses[0];
224     misses[1] = page_buf->misses[1];
225     evictions[0] = page_buf->evictions[0];
226     evictions[1] = page_buf->evictions[1];
227     bypasses[0] = page_buf->bypasses[0];
228     bypasses[1] = page_buf->bypasses[1];
229 
230     FUNC_LEAVE_NOAPI(SUCCEED)
231 }  /* H5PB_get_stats */
232 
233 
234 /*-------------------------------------------------------------------------
235  * Function:    H5PB_print_stats()
236  *
237  * Purpose:     This function was created without documentation.
238  *              What follows is my best understanding of Mohamad's intent.
239  *
240  *              Print out statistics collected for the page buffer layer.
241  *
242  * Return:        Non-negative on success/Negative on failure
243  *
244  * Programmer:    Mohamad Chaarawi
245  *
246  *-------------------------------------------------------------------------
247  */
248 herr_t
H5PB_print_stats(const H5PB_t * page_buf)249 H5PB_print_stats(const H5PB_t *page_buf)
250 {
251     FUNC_ENTER_NOAPI_NOINIT_NOERR
252 
253     HDassert(page_buf);
254 
255     HDprintf("PAGE BUFFER STATISTICS:\n");
256 
257     HDprintf("******* METADATA\n");
258     HDprintf("\t Total Accesses: %u\n", page_buf->accesses[0]);
259     HDprintf("\t Hits: %u\n", page_buf->hits[0]);
260     HDprintf("\t Misses: %u\n", page_buf->misses[0]);
261     HDprintf("\t Evictions: %u\n", page_buf->evictions[0]);
262     HDprintf("\t Bypasses: %u\n", page_buf->bypasses[0]);
263     HDprintf("\t Hit Rate = %f%%\n", ((double)page_buf->hits[0]/(page_buf->accesses[0] - page_buf->bypasses[0]))*100);
264     HDprintf("*****************\n\n");
265 
266     HDprintf("******* RAWDATA\n");
267     HDprintf("\t Total Accesses: %u\n", page_buf->accesses[1]);
268     HDprintf("\t Hits: %u\n", page_buf->hits[1]);
269     HDprintf("\t Misses: %u\n", page_buf->misses[1]);
270     HDprintf("\t Evictions: %u\n", page_buf->evictions[1]);
271     HDprintf("\t Bypasses: %u\n", page_buf->bypasses[1]);
272     HDprintf("\t Hit Rate = %f%%\n", ((double)page_buf->hits[1]/(page_buf->accesses[1]-page_buf->bypasses[0]))*100);
273     HDprintf("*****************\n\n");
274 
275     FUNC_LEAVE_NOAPI(SUCCEED)
276 } /* H5PB_print_stats */
277 
278 
279 /*-------------------------------------------------------------------------
280  * Function:    H5PB_create
281  *
282  * Purpose:    Create and setup the PB on the file.
283  *
284  * Return:    Non-negative on success/Negative on failure
285  *
286  * Programmer:    Mohamad Chaarawi
287  *
288  *-------------------------------------------------------------------------
289  */
290 herr_t
H5PB_create(H5F_t * f,size_t size,unsigned page_buf_min_meta_perc,unsigned page_buf_min_raw_perc)291 H5PB_create(H5F_t *f, size_t size, unsigned page_buf_min_meta_perc, unsigned page_buf_min_raw_perc)
292 {
293     H5PB_t *page_buf = NULL;
294     herr_t ret_value = SUCCEED;    /* Return value */
295 
296     FUNC_ENTER_NOAPI(FAIL)
297 
298     /* Sanity checks */
299     HDassert(f);
300     HDassert(f->shared);
301 
302     /* Check args */
303     if(f->shared->fs_strategy != H5F_FSPACE_STRATEGY_PAGE)
304         HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "Enabling Page Buffering requires PAGE file space strategy")
305     /* round down the size if it is larger than the page size */
306     else if(size > f->shared->fs_page_size) {
307         hsize_t temp_size;
308 
309         temp_size = (size / f->shared->fs_page_size) * f->shared->fs_page_size;
310         H5_CHECKED_ASSIGN(size, size_t, temp_size, hsize_t);
311     } /* end if */
312     else if(0 != size % f->shared->fs_page_size)
313         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTINIT, FAIL, "Page Buffer size must be >= to the page size")
314 
315     /* Allocate the new page buffering structure */
316     if(NULL == (page_buf = H5FL_CALLOC(H5PB_t)))
317     HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "memory allocation failed")
318 
319     page_buf->max_size = size;
320     H5_CHECKED_ASSIGN(page_buf->page_size, size_t, f->shared->fs_page_size, hsize_t);
321     page_buf->min_meta_perc = page_buf_min_meta_perc;
322     page_buf->min_raw_perc = page_buf_min_raw_perc;
323 
324     /* Calculate the minimum page count for metadata and raw data
325      * based on the fractions provided
326      */
327     page_buf->min_meta_count = (unsigned)((size * page_buf_min_meta_perc) / (f->shared->fs_page_size * 100));
328     page_buf->min_raw_count = (unsigned)((size * page_buf_min_raw_perc) / (f->shared->fs_page_size * 100));
329 
330     if(NULL == (page_buf->slist_ptr = H5SL_create(H5SL_TYPE_HADDR, NULL)))
331         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTCREATE, FAIL, "can't create skip list")
332     if(NULL == (page_buf->mf_slist_ptr = H5SL_create(H5SL_TYPE_HADDR, NULL)))
333         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTCREATE, FAIL, "can't create skip list")
334 
335     if(NULL == (page_buf->page_fac = H5FL_fac_init(page_buf->page_size)))
336         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTINIT, FAIL, "can't create page factory")
337 
338     f->shared->page_buf = page_buf;
339 
340 done:
341     if(ret_value < 0) {
342         if(page_buf != NULL) {
343             if(page_buf->slist_ptr != NULL)
344                 H5SL_close(page_buf->slist_ptr);
345             if(page_buf->mf_slist_ptr != NULL)
346                 H5SL_close(page_buf->mf_slist_ptr);
347             if(page_buf->page_fac != NULL)
348                 H5FL_fac_term(page_buf->page_fac);
349             page_buf = H5FL_FREE(H5PB_t, page_buf);
350         } /* end if */
351     } /* end if */
352 
353     FUNC_LEAVE_NOAPI(ret_value)
354 } /* H5PB_create */
355 
356 
357 /*-------------------------------------------------------------------------
358  * Function:    H5PB__flush_cb
359  *
360  * Purpose:    Callback to flush PB skiplist entries.
361  *
362  * Return:    Non-negative on success/Negative on failure
363  *
364  * Programmer:    Mohamad Chaarawi
365  *
366  *-------------------------------------------------------------------------
367  */
368 static herr_t
H5PB__flush_cb(void * item,void H5_ATTR_UNUSED * key,void * _op_data)369 H5PB__flush_cb(void *item, void H5_ATTR_UNUSED *key, void *_op_data)
370 {
371     H5PB_entry_t *page_entry = (H5PB_entry_t *)item;    /* Pointer to page entry node */
372     H5F_t *f = (H5F_t *)_op_data;
373     herr_t  ret_value = SUCCEED;    /* Return value */
374 
375     FUNC_ENTER_STATIC
376 
377     /* Sanity checks */
378     HDassert(page_entry);
379     HDassert(f);
380 
381     /* Flush the page if it's dirty */
382     if(page_entry->is_dirty)
383         if(H5PB__write_entry(f, page_entry) < 0)
384             HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "file write failed")
385 
386 done:
387     FUNC_LEAVE_NOAPI(ret_value)
388 } /* H5PB__flush_cb() */
389 
390 
391 /*-------------------------------------------------------------------------
392  * Function:    H5PB_flush
393  *
394  * Purpose:    Flush/Free all the PB entries to the file.
395  *
396  * Return:    Non-negative on success/Negative on failure
397  *
398  * Programmer:    Mohamad Chaarawi
399  *
400  *-------------------------------------------------------------------------
401  */
402 herr_t
H5PB_flush(H5F_t * f)403 H5PB_flush(H5F_t *f)
404 {
405     herr_t  ret_value = SUCCEED;    /* Return value */
406 
407     FUNC_ENTER_NOAPI(FAIL)
408 
409     /* Sanity check */
410     HDassert(f);
411 
412     /* Flush all the entries in the PB skiplist, if we have write access on the file */
413     if(f->shared->page_buf && (H5F_ACC_RDWR & H5F_INTENT(f))) {
414         H5PB_t *page_buf = f->shared->page_buf;
415 
416         /* Iterate over all entries in page buffer skip list */
417         if(H5SL_iterate(page_buf->slist_ptr, H5PB__flush_cb, (void *)f))
418             HGOTO_ERROR(H5E_PAGEBUF, H5E_BADITER, FAIL, "can't flush page buffer skip list")
419     } /* end if */
420 
421 done:
422     FUNC_LEAVE_NOAPI(ret_value)
423 } /* H5PB_flush */
424 
425 
426 /*-------------------------------------------------------------------------
427  * Function:    H5PB__dest_cb
428  *
429  * Purpose:    Callback to free PB skiplist entries.
430  *
431  * Return:    Non-negative on success/Negative on failure
432  *
433  * Programmer:    Mohamad Chaarawi
434  *
435  *-------------------------------------------------------------------------
436  */
437 static herr_t
H5PB__dest_cb(void * item,void H5_ATTR_UNUSED * key,void * _op_data)438 H5PB__dest_cb(void *item, void H5_ATTR_UNUSED *key, void *_op_data)
439 {
440     H5PB_entry_t *page_entry = (H5PB_entry_t *)item;       /* Pointer to page entry node */
441     H5PB_ud1_t *op_data = (H5PB_ud1_t *)_op_data;
442 
443     FUNC_ENTER_STATIC_NOERR
444 
445     /* Sanity checking */
446     HDassert(page_entry);
447     HDassert(op_data);
448     HDassert(op_data->page_buf);
449 
450     /* Remove entry from LRU list */
451     if(op_data->actual_slist) {
452         H5PB__REMOVE_LRU(op_data->page_buf, page_entry)
453         page_entry->page_buf_ptr = H5FL_FAC_FREE(op_data->page_buf->page_fac, page_entry->page_buf_ptr);
454     } /* end if */
455 
456     /* Free page entry */
457     page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
458 
459     FUNC_LEAVE_NOAPI(SUCCEED)
460 } /* H5PB__dest_cb() */
461 
462 
463 /*-------------------------------------------------------------------------
464  * Function:    H5PB_dest
465  *
466  * Purpose:    Flush and destroy the PB on the file if it exists.
467  *
468  * Return:    Non-negative on success/Negative on failure
469  *
470  * Programmer:    Mohamad Chaarawi
471  *
472  *-------------------------------------------------------------------------
473  */
474 herr_t
H5PB_dest(H5F_t * f)475 H5PB_dest(H5F_t *f)
476 {
477     herr_t  ret_value = SUCCEED;        /* Return value */
478 
479     FUNC_ENTER_NOAPI(FAIL)
480 
481     /* Sanity checks */
482     HDassert(f);
483 
484     /* flush and destroy the page buffer, if it exists */
485     if(f->shared->page_buf) {
486         H5PB_t *page_buf = f->shared->page_buf;
487         H5PB_ud1_t op_data;                 /* Iteration context */
488 
489         if(H5PB_flush(f) < 0)
490             HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTFLUSH, FAIL, "can't flush page buffer")
491 
492         /* Set up context info */
493         op_data.page_buf = page_buf;
494 
495         /* Destroy the skip list containing all the entries in the PB */
496         op_data.actual_slist = TRUE;
497         if(H5SL_destroy(page_buf->slist_ptr, H5PB__dest_cb, &op_data))
498             HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTCLOSEOBJ, FAIL, "can't destroy page buffer skip list")
499 
500         /* Destroy the skip list containing the new entries */
501         op_data.actual_slist = FALSE;
502         if(H5SL_destroy(page_buf->mf_slist_ptr, H5PB__dest_cb, &op_data))
503             HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTCLOSEOBJ, FAIL, "can't destroy page buffer skip list")
504 
505         /* Destroy the page factory */
506         if(H5FL_fac_term(page_buf->page_fac) < 0)
507             HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTRELEASE, FAIL, "can't destroy page buffer page factory")
508 
509         f->shared->page_buf = H5FL_FREE(H5PB_t, page_buf);
510     } /* end if */
511 
512 done:
513     FUNC_LEAVE_NOAPI(ret_value)
514 } /* H5PB_dest */
515 
516 
517 /*-------------------------------------------------------------------------
518  * Function:    H5PB_add_new_page
519  *
520  * Purpose:    Add a new page to the new page skip list. This is called
521  *              from the MF layer when a new page is allocated to
522  *              indicate to the page buffer layer that a read of the page
523  *              from the file is not necessary since it's an empty page.
524  *
525  * Return:    Non-negative on success/Negative on failure
526  *
527  * Programmer:    Mohamad Chaarawi
528  *
529  *-------------------------------------------------------------------------
530  */
531 herr_t
H5PB_add_new_page(H5F_t * f,H5FD_mem_t type,haddr_t page_addr)532 H5PB_add_new_page(H5F_t *f, H5FD_mem_t type, haddr_t page_addr)
533 {
534     H5PB_t *page_buf = f->shared->page_buf;
535     H5PB_entry_t *page_entry = NULL;    /* pointer to the corresponding page entry */
536     herr_t ret_value = SUCCEED;    /* Return value */
537 
538     FUNC_ENTER_NOAPI(FAIL)
539 
540     /* Sanity checks */
541     HDassert(page_buf);
542 
543     /* If there is an existing page, this means that at some point the
544      * file free space manager freed and re-allocated a page at the same
545      * address.  No need to do anything here then...
546      */
547     /* MSC - to be safe, might want to dig in the MF layer and remove
548      * the page when it is freed from this list if it still exists and
549      * remove this check
550      */
551     if(NULL == H5SL_search(page_buf->mf_slist_ptr, &(page_addr))) {
552         /* Create the new PB entry */
553         if(NULL == (page_entry = H5FL_CALLOC(H5PB_entry_t)))
554             HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "memory allocation failed")
555 
556         /* Initialize page fields */
557         page_entry->addr = page_addr;
558         page_entry->type = (H5F_mem_page_t)type;
559         page_entry->is_dirty = FALSE;
560 
561         /* Insert entry in skip list */
562         if(H5SL_insert(page_buf->mf_slist_ptr, page_entry, &(page_entry->addr)) < 0)
563             HGOTO_ERROR(H5E_PAGEBUF, H5E_BADVALUE, FAIL, "Can't insert entry in skip list")
564     } /* end if */
565 
566 done:
567     if(ret_value < 0)
568         if(page_entry)
569             page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
570 
571     FUNC_LEAVE_NOAPI(ret_value)
572 } /* H5PB_add_new_page */
573 
574 
575 /*-------------------------------------------------------------------------
576  * Function:    H5PB_update_entry
577  *
578  * Purpose:    In PHDF5, entries that are written by other processes and just
579  *              marked clean by this process have to have their corresponding
580  *              pages updated if they exist in the page buffer.
581  *              This routine checks and update the pages.
582  *
583  * Return:    Non-negative on success/Negative on failure
584  *
585  * Programmer:    Mohamad Chaarawi
586  *
587  *-------------------------------------------------------------------------
588  */
589 herr_t
H5PB_update_entry(H5PB_t * page_buf,haddr_t addr,size_t size,const void * buf)590 H5PB_update_entry(H5PB_t *page_buf, haddr_t addr, size_t size, const void *buf)
591 {
592     H5PB_entry_t *page_entry;   /* Pointer to the corresponding page entry */
593     haddr_t page_addr;
594 
595     FUNC_ENTER_NOAPI_NOERR
596 
597     /* Sanity checks */
598     HDassert(page_buf);
599     HDassert(size <= page_buf->page_size);
600     HDassert(buf);
601 
602     /* calculate the aligned address of the first page */
603     page_addr = (addr / page_buf->page_size) * page_buf->page_size;
604 
605     /* search for the page and update if found */
606     page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&page_addr));
607     if(page_entry) {
608         haddr_t offset;
609 
610         HDassert(addr + size <= page_addr + page_buf->page_size);
611         offset = addr - page_addr;
612         HDmemcpy((uint8_t *)page_entry->page_buf_ptr + offset, buf, size);
613 
614         /* move to top of LRU list */
615         H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
616     } /* end if */
617 
618     FUNC_LEAVE_NOAPI(SUCCEED)
619 } /* H5PB_update_entry */
620 
621 
622 /*-------------------------------------------------------------------------
623  * Function:    H5PB_remove_entry
624  *
625  * Purpose:     Remove possible metadata entry with ADDR from the PB cache.
626  *              This is in response to the data corruption bug from fheap.c
627  *              with page buffering + page strategy.
628  *              Note: Large metadata page bypasses the PB cache.
629  *              Note: Update of raw data page (large or small sized) is handled by the PB cache.
630  *
631  * Return:      Non-negative on success/Negative on failure
632  *
633  * Programmer:  Vailin Choi; Feb 2017
634  *
635  *-------------------------------------------------------------------------
636  */
637 herr_t
H5PB_remove_entry(const H5F_t * f,haddr_t addr)638 H5PB_remove_entry(const H5F_t *f, haddr_t addr)
639 {
640     H5PB_t *page_buf = f->shared->page_buf;
641     H5PB_entry_t *page_entry = NULL;        /* pointer to the page entry being searched */
642     herr_t ret_value = SUCCEED;             /* Return value */
643 
644     FUNC_ENTER_NOAPI(FAIL)
645 
646     /* Sanity checks */
647     HDassert(page_buf);
648 
649     /* Search for address in the skip list */
650     page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&addr));
651 
652     /* If found, remove the entry from the PB cache */
653     if(page_entry) {
654         HDassert(page_entry->type != H5F_MEM_PAGE_DRAW);
655         if(NULL == H5SL_remove(page_buf->slist_ptr, &(page_entry->addr)))
656             HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, "Page Entry is not in skip list")
657 
658         /* Remove from LRU list */
659         H5PB__REMOVE_LRU(page_buf, page_entry)
660         HDassert(H5SL_count(page_buf->slist_ptr) == page_buf->LRU_list_len);
661 
662         page_buf->meta_count--;
663 
664         page_entry->page_buf_ptr = H5FL_FAC_FREE(page_buf->page_fac, page_entry->page_buf_ptr);
665         page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
666     } /* end if */
667 
668 done:
669     FUNC_LEAVE_NOAPI(ret_value)
670 } /* H5PB_remove_entry */
671 
672 
673 /*-------------------------------------------------------------------------
674  * Function:    H5PB_read
675  *
676  * Purpose:    Reads in the data from the page containing it if it exists
677  *              in the PB cache; otherwise reads in the page through the VFD.
678  *
679  * Return:    Non-negative on success/Negative on failure
680  *
681  * Programmer:    Mohamad Chaarawi
682  *
683  *-------------------------------------------------------------------------
684  */
685 herr_t
H5PB_read(H5F_t * f,H5FD_mem_t type,haddr_t addr,size_t size,void * buf)686 H5PB_read(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size, void *buf/*out*/)
687 {
688     H5PB_t *page_buf;                   /* Page buffering info for this file */
689     H5PB_entry_t *page_entry;           /* Pointer to the corresponding page entry */
690     H5FD_t *file;                       /* File driver pointer */
691     haddr_t first_page_addr, last_page_addr;    /* Addresses of the first and last pages covered by I/O */
692     haddr_t offset;
693     haddr_t search_addr;                /* Address of current page */
694     hsize_t num_touched_pages;          /* Number of pages accessed */
695     size_t access_size;
696     hbool_t bypass_pb = FALSE;          /* Whether to bypass page buffering */
697     hsize_t i;                          /* Local index variable */
698     herr_t ret_value = SUCCEED;         /* Return value */
699 
700     FUNC_ENTER_NOAPI(FAIL)
701 
702     /* Sanity checks */
703     HDassert(f);
704     HDassert(type != H5FD_MEM_GHEAP);
705 
706     /* Get pointer to page buffer info for this file */
707     page_buf = f->shared->page_buf;
708 
709 #ifdef H5_HAVE_PARALLEL
710     if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
711 #if 1
712         bypass_pb = TRUE;
713 #else
714         /* MSC - why this stopped working ? */
715         int mpi_size;
716 
717         if((mpi_size = H5F_mpi_get_size(f)) < 0)
718             HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "can't retrieve MPI communicator size")
719         if(1 != mpi_size)
720             bypass_pb = TRUE;
721 #endif
722     } /* end if */
723 #endif
724 
725     /* If page buffering is disabled, or the I/O size is larger than that of a
726      * single page, or if this is a parallel raw data access, bypass page
727      * buffering.
728      */
729     if(NULL == page_buf || size >= page_buf->page_size ||
730            (bypass_pb && H5FD_MEM_DRAW == type)) {
731         if(H5F__accum_read(f, type, addr, size, buf) < 0)
732             HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, "read through metadata accumulator failed")
733 
734         /* Update statistics */
735         if(page_buf) {
736             if(type == H5FD_MEM_DRAW)
737                 page_buf->bypasses[1] ++;
738             else
739                 page_buf->bypasses[0] ++;
740         } /* end if */
741 
742         /* If page buffering is disabled, or if this is a large metadata access,
743          * or if this is parallel raw data access, we are done here
744          */
745         if(NULL == page_buf || (size >= page_buf->page_size && H5FD_MEM_DRAW != type) ||
746                 (bypass_pb && H5FD_MEM_DRAW == type))
747             HGOTO_DONE(SUCCEED)
748     } /* end if */
749 
750     /* Update statistics */
751     if(page_buf) {
752         if(type == H5FD_MEM_DRAW)
753             page_buf->accesses[1]++;
754         else
755             page_buf->accesses[0]++;
756     } /* end if */
757 
758     /* Calculate the aligned address of the first page */
759     first_page_addr = (addr / page_buf->page_size) * page_buf->page_size;
760 
761     /* For Raw data calculate the aligned address of the last page and
762      * the number of pages accessed if more than 1 page is accessed
763      */
764     if(H5FD_MEM_DRAW == type) {
765         last_page_addr = ((addr + size - 1) / page_buf->page_size) * page_buf->page_size;
766 
767         /* How many pages does this write span */
768         num_touched_pages = (last_page_addr / page_buf->page_size + 1) -
769                 (first_page_addr / page_buf->page_size);
770         if(first_page_addr == last_page_addr) {
771             HDassert(1 == num_touched_pages);
772             last_page_addr = HADDR_UNDEF;
773         } /* end if */
774     } /* end if */
775     /* Otherwise set last page addr to HADDR_UNDEF */
776     else {
777         num_touched_pages = 1;
778         last_page_addr = HADDR_UNDEF;
779     } /* end else */
780 
781     /* Translate to file driver I/O info object */
782     file = f->shared->lf;
783 
784     /* Copy raw data from dirty pages into the read buffer if the read
785        request spans pages in the page buffer*/
786     if(H5FD_MEM_DRAW == type && size >= page_buf->page_size) {
787         H5SL_node_t *node;
788 
789         /* For each touched page in the page buffer, check if it
790          * exists in the page Buffer and is dirty. If it does, we
791          * update the buffer with what's in the page so we get the up
792          * to date data into the buffer after the big read from the file.
793          */
794         node = H5SL_find(page_buf->slist_ptr, (void *)(&first_page_addr));
795         for(i = 0; i < num_touched_pages; i++) {
796             search_addr = i*page_buf->page_size + first_page_addr;
797 
798             /* if we still haven't located a starting page, search again */
799             if(!node && i!=0)
800                 node = H5SL_find(page_buf->slist_ptr, (void *)(&search_addr));
801 
802             /* if the current page is in the Page Buffer, do the updates */
803             if(node) {
804                 page_entry = (H5PB_entry_t *)H5SL_item(node);
805 
806                 HDassert(page_entry);
807 
808                 /* If the current page address falls out of the access
809                    block, then there are no more pages to go over */
810                 if(page_entry->addr >= addr + size)
811                     break;
812 
813                 HDassert(page_entry->addr == search_addr);
814 
815                 if(page_entry->is_dirty) {
816                     /* special handling for the first page if it is not a full page access */
817                     if(i == 0 && first_page_addr != addr) {
818                         offset = addr - first_page_addr;
819                         HDassert(page_buf->page_size > offset);
820 
821                         HDmemcpy(buf, (uint8_t *)page_entry->page_buf_ptr + offset,
822                                  page_buf->page_size - (size_t)offset);
823 
824                         /* move to top of LRU list */
825                         H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
826                     } /* end if */
827                     /* special handling for the last page if it is not a full page access */
828                     else if(num_touched_pages > 1 && i == num_touched_pages-1 && search_addr < addr+size) {
829                         offset = (num_touched_pages-2)*page_buf->page_size +
830                             (page_buf->page_size - (addr - first_page_addr));
831 
832                         HDmemcpy((uint8_t *)buf + offset, page_entry->page_buf_ptr,
833                                  (size_t)((addr + size) - last_page_addr));
834 
835                         /* move to top of LRU list */
836                         H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
837                     } /* end else-if */
838                     /* copy the entire fully accessed pages */
839                     else {
840                         offset = i*page_buf->page_size;
841 
842                         HDmemcpy((uint8_t *)buf+(i*page_buf->page_size) , page_entry->page_buf_ptr,
843                              page_buf->page_size);
844                     } /* end else */
845                 } /* end if */
846                 node = H5SL_next(node);
847             } /* end if */
848         } /* end for */
849     } /* end if */
850     else {
851         /* A raw data access could span 1 or 2 PB entries at this point so
852            we need to handle that */
853         HDassert(1 == num_touched_pages || 2 == num_touched_pages);
854         for(i = 0 ; i < num_touched_pages; i++) {
855             haddr_t buf_offset;
856 
857             /* Calculate the aligned address of the page to search for it in the skip list */
858             search_addr = (0==i ? first_page_addr : last_page_addr);
859 
860             /* Calculate the access size if the access spans more than 1 page */
861             if(1 == num_touched_pages)
862                 access_size = size;
863             else
864                 access_size = (0 == i ? (size_t)((first_page_addr + page_buf->page_size) - addr) : (size - access_size));
865 
866             /* Lookup the page in the skip list */
867             page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&search_addr));
868 
869             /* if found */
870             if(page_entry) {
871                 offset = (0 == i ? addr - page_entry->addr : 0);
872                 buf_offset = (0 == i ? 0 : size - access_size);
873 
874                 /* copy the requested data from the page into the input buffer */
875                 HDmemcpy((uint8_t *)buf + buf_offset, (uint8_t *)page_entry->page_buf_ptr + offset, access_size);
876 
877                 /* Update LRU */
878                 H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
879 
880                 /* Update statistics */
881                 if(type == H5FD_MEM_DRAW)
882                     page_buf->hits[1]++;
883                 else
884                     page_buf->hits[0]++;
885             } /* end if */
886             /* if not found */
887             else {
888                 void *new_page_buf = NULL;
889                 size_t page_size = page_buf->page_size;
890                 haddr_t eoa;
891 
892                 /* make space for new entry */
893                 if((H5SL_count(page_buf->slist_ptr) * page_buf->page_size) >= page_buf->max_size) {
894                     htri_t can_make_space;
895 
896                     /* check if we can make space in page buffer */
897                     if((can_make_space = H5PB__make_space(f, page_buf, type)) < 0)
898                         HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "make space in Page buffer Failed")
899 
900                     /* if make_space returns 0, then we can't use the page
901                        buffer for this I/O and we need to bypass */
902                     if(0 == can_make_space) {
903                         /* make space can't return FALSE on second touched page since the first is of the same type */
904                         HDassert(0 == i);
905 
906                         /* read entire block from VFD and return */
907                         if(H5FD_read(file, type, addr, size, buf) < 0)
908                             HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, "driver read request failed")
909 
910                         /* Break out of loop */
911                         break;
912                     } /* end if */
913                 } /* end if */
914 
915                 /* Read page from VFD */
916                 if(NULL == (new_page_buf = H5FL_FAC_MALLOC(page_buf->page_fac)))
917                     HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTALLOC, FAIL, "memory allocation failed for page buffer entry")
918 
919                 /* Read page through the VFD layer, but make sure we don't read past the EOA. */
920 
921                 /* Retrieve the 'eoa' for the file */
922                 if(HADDR_UNDEF == (eoa = H5F_get_eoa(f, type)))
923                     HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "driver get_eoa request failed")
924 
925                 /* If the entire page falls outside the EOA, then fail */
926                 if(search_addr > eoa)
927                     HGOTO_ERROR(H5E_PAGEBUF, H5E_BADVALUE, FAIL, "reading an entire page that is outside the file EOA")
928 
929                 /* Adjust the read size to not go beyond the EOA */
930                 if(search_addr + page_size > eoa)
931                     page_size = (size_t)(eoa - search_addr);
932 
933                 /* Read page from VFD */
934                 if(H5FD_read(file, type, search_addr, page_size, new_page_buf) < 0)
935                     HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, "driver read request failed")
936 
937                 /* Copy the requested data from the page into the input buffer */
938                 offset = (0 == i ? addr - search_addr : 0);
939                 buf_offset = (0 == i ? 0 : size - access_size);
940                 HDmemcpy((uint8_t *)buf + buf_offset, (uint8_t *)new_page_buf + offset, access_size);
941 
942                 /* Create the new PB entry */
943                 if(NULL == (page_entry = H5FL_CALLOC(H5PB_entry_t)))
944                     HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "memory allocation failed")
945 
946                 page_entry->page_buf_ptr = new_page_buf;
947                 page_entry->addr = search_addr;
948                 page_entry->type = (H5F_mem_page_t)type;
949                 page_entry->is_dirty = FALSE;
950 
951                 /* Insert page into PB */
952                 if(H5PB__insert_entry(page_buf, page_entry) < 0)
953                     HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTSET, FAIL, "error inserting new page in page buffer")
954 
955                 /* Update statistics */
956                 if(type == H5FD_MEM_DRAW)
957                     page_buf->misses[1]++;
958                 else
959                     page_buf->misses[0]++;
960             } /* end else */
961         } /* end for */
962     } /* end else */
963 
964 done:
965     FUNC_LEAVE_NOAPI(ret_value)
966 } /* end H5PB_read() */
967 
968 
969 /*-------------------------------------------------------------------------
970  * Function:    H5PB_write
971  *
972  * Purpose: Write data into the Page Buffer. If the page exists in the
973  *          cache, update it; otherwise read it from disk, update it, and
974  *          insert into cache.
975  *
976  * Return:    Non-negative on success/Negative on failure
977  *
978  * Programmer:    Mohamad Chaarawi
979  *
980  *-------------------------------------------------------------------------
981  */
982 herr_t
H5PB_write(H5F_t * f,H5FD_mem_t type,haddr_t addr,size_t size,const void * buf)983 H5PB_write(H5F_t *f, H5FD_mem_t type, haddr_t addr,
984     size_t size, const void *buf)
985 {
986     H5PB_t *page_buf;                   /* Page buffering info for this file */
987     H5PB_entry_t *page_entry;           /* Pointer to the corresponding page entry */
988     H5FD_t *file;                       /* File driver pointer */
989     haddr_t first_page_addr, last_page_addr;    /* Addresses of the first and last pages covered by I/O */
990     haddr_t offset;
991     haddr_t search_addr;                /* Address of current page */
992     hsize_t num_touched_pages;          /* Number of pages accessed */
993     size_t access_size;
994     hbool_t bypass_pb = FALSE;          /* Whether to bypass page buffering */
995     hsize_t i;                          /* Local index variable */
996     herr_t  ret_value = SUCCEED;        /* Return value */
997 
998     FUNC_ENTER_NOAPI(FAIL)
999 
1000     /* Sanity checks */
1001     HDassert(f);
1002 
1003     /* Get pointer to page buffer info for this file */
1004     page_buf = f->shared->page_buf;
1005 
1006 #ifdef H5_HAVE_PARALLEL
1007     if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
1008 #if 1
1009         bypass_pb = TRUE;
1010 #else
1011         /* MSC - why this stopped working ? */
1012         int mpi_size;
1013 
1014         if((mpi_size = H5F_mpi_get_size(f)) < 0)
1015             HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "can't retrieve MPI communicator size")
1016         if(1 != mpi_size)
1017             bypass_pb = TRUE;
1018 #endif
1019     } /* end if */
1020 #endif
1021 
1022     /* If page buffering is disabled, or the I/O size is larger than that of a
1023      * single page, or if this is a parallel raw data access, bypass page
1024      * buffering.
1025      */
1026     if(NULL == page_buf || size >= page_buf->page_size || bypass_pb) {
1027         if(H5F__accum_write(f, type, addr, size, buf) < 0)
1028             HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "write through metadata accumulator failed")
1029 
1030         /* Update statistics */
1031         if(page_buf) {
1032             if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
1033                 page_buf->bypasses[1]++;
1034             else
1035                 page_buf->bypasses[0]++;
1036         } /* end if */
1037 
1038         /* If page buffering is disabled, or if this is a large metadata access,
1039          * or if this is a parallel raw data access, we are done here
1040          */
1041         if(NULL == page_buf || (size >= page_buf->page_size && H5FD_MEM_DRAW != type) ||
1042                 (bypass_pb && H5FD_MEM_DRAW == type))
1043             HGOTO_DONE(SUCCEED)
1044 
1045 #ifdef H5_HAVE_PARALLEL
1046         if(bypass_pb) {
1047             if(H5PB_update_entry(page_buf, addr, size, buf) > 0)
1048                 HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTUPDATE, FAIL, "failed to update PB with metadata cache")
1049             HGOTO_DONE(SUCCEED)
1050         } /* end if */
1051 #endif
1052     } /* end if */
1053 
1054     /* Update statistics */
1055     if(page_buf) {
1056         if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
1057             page_buf->accesses[1]++;
1058         else
1059             page_buf->accesses[0]++;
1060     } /* end if */
1061 
1062     /* Calculate the aligned address of the first page */
1063     first_page_addr = (addr / page_buf->page_size) * page_buf->page_size;
1064 
1065     /* For raw data calculate the aligned address of the last page and
1066      * the number of pages accessed if more than 1 page is accessed
1067      */
1068     if(H5FD_MEM_DRAW == type) {
1069         last_page_addr = (addr + size - 1) / page_buf->page_size * page_buf->page_size;
1070 
1071         /* how many pages does this write span */
1072         num_touched_pages = (last_page_addr/page_buf->page_size + 1) -
1073             (first_page_addr / page_buf->page_size);
1074         if(first_page_addr == last_page_addr) {
1075             HDassert(1 == num_touched_pages);
1076             last_page_addr = HADDR_UNDEF;
1077         } /* end if */
1078     } /* end if */
1079     /* Otherwise set last page addr to HADDR_UNDEF */
1080     else {
1081         num_touched_pages = 1;
1082         last_page_addr = HADDR_UNDEF;
1083     } /* end else */
1084 
1085     /* Translate to file driver I/O info object */
1086     file = f->shared->lf;
1087 
1088     /* Check if existing pages for raw data need to be updated since raw data access is not atomic */
1089     if(H5FD_MEM_DRAW == type && size >= page_buf->page_size) {
1090         /* For each touched page, check if it exists in the page buffer, and
1091          * update it with the data in the buffer to keep it up to date
1092          */
1093         for(i = 0; i < num_touched_pages; i++) {
1094             search_addr = i * page_buf->page_size + first_page_addr;
1095 
1096             /* Special handling for the first page if it is not a full page update */
1097             if(i == 0 && first_page_addr != addr) {
1098                 /* Lookup the page in the skip list */
1099                 page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&search_addr));
1100                 if(page_entry) {
1101                     offset = addr - first_page_addr;
1102                     HDassert(page_buf->page_size > offset);
1103 
1104                     /* Update page's data */
1105                     HDmemcpy((uint8_t *)page_entry->page_buf_ptr + offset, buf, page_buf->page_size - (size_t)offset);
1106 
1107                     /* Mark page dirty and push to top of LRU */
1108                     page_entry->is_dirty = TRUE;
1109                     H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
1110                 } /* end if */
1111             } /* end if */
1112             /* Special handling for the last page if it is not a full page update */
1113             else if(num_touched_pages > 1 && i == (num_touched_pages - 1) &&
1114                     (search_addr + page_buf->page_size) != (addr + size)) {
1115                 HDassert(search_addr+page_buf->page_size > addr+size);
1116 
1117                 /* Lookup the page in the skip list */
1118                 page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&search_addr));
1119                 if(page_entry) {
1120                     offset = (num_touched_pages - 2) * page_buf->page_size +
1121                         (page_buf->page_size - (addr - first_page_addr));
1122 
1123                     /* Update page's data */
1124                     HDmemcpy(page_entry->page_buf_ptr, (const uint8_t *)buf + offset,
1125                              (size_t)((addr + size) - last_page_addr));
1126 
1127                     /* Mark page dirty and push to top of LRU */
1128                     page_entry->is_dirty = TRUE;
1129                     H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
1130                 } /* end if */
1131             } /* end else-if */
1132             /* Discard all fully written pages from the page buffer */
1133             else {
1134                 page_entry = (H5PB_entry_t *)H5SL_remove(page_buf->slist_ptr, (void *)(&search_addr));
1135                 if(page_entry) {
1136                     /* Remove from LRU list */
1137                     H5PB__REMOVE_LRU(page_buf, page_entry)
1138 
1139                     /* Decrement page count of appropriate type */
1140                     if(H5F_MEM_PAGE_DRAW == page_entry->type || H5F_MEM_PAGE_GHEAP == page_entry->type)
1141                         page_buf->raw_count--;
1142                     else
1143                         page_buf->meta_count--;
1144 
1145                     /* Free page info */
1146                     page_entry->page_buf_ptr = H5FL_FAC_FREE(page_buf->page_fac, page_entry->page_buf_ptr);
1147                     page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
1148                 } /* end if */
1149             } /* end else */
1150         } /* end for */
1151     } /* end if */
1152     else {
1153         /* An access could span 1 or 2 PBs at this point so we need to handle that */
1154         HDassert(1 == num_touched_pages || 2 == num_touched_pages);
1155         for(i = 0; i < num_touched_pages; i++) {
1156             haddr_t buf_offset;
1157 
1158             /* Calculate the aligned address of the page to search for it in the skip list */
1159             search_addr = (0 == i ? first_page_addr : last_page_addr);
1160 
1161             /* Calculate the access size if the access spans more than 1 page */
1162             if(1 == num_touched_pages)
1163                 access_size = size;
1164             else
1165                 access_size = (0 == i ? (size_t)(first_page_addr + page_buf->page_size - addr) : (size - access_size));
1166 
1167             /* Lookup the page in the skip list */
1168             page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&search_addr));
1169 
1170             /* If found */
1171             if(page_entry) {
1172                 offset = (0 == i ? addr - page_entry->addr : 0);
1173                 buf_offset = (0 == i ? 0 : size - access_size);
1174 
1175                 /* Copy the requested data from the input buffer into the page */
1176                 HDmemcpy((uint8_t *)page_entry->page_buf_ptr + offset, (const uint8_t *)buf + buf_offset, access_size);
1177 
1178                 /* Mark page dirty and push to top of LRU */
1179                 page_entry->is_dirty = TRUE;
1180                 H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
1181 
1182                 /* Update statistics */
1183                 if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
1184                     page_buf->hits[1]++;
1185                 else
1186                     page_buf->hits[0]++;
1187             } /* end if */
1188             /* If not found */
1189             else {
1190                 void *new_page_buf;
1191                 size_t page_size = page_buf->page_size;
1192 
1193                 /* Make space for new entry */
1194                 if((H5SL_count(page_buf->slist_ptr) * page_buf->page_size) >= page_buf->max_size) {
1195                     htri_t can_make_space;
1196 
1197                     /* Check if we can make space in page buffer */
1198                     if((can_make_space = H5PB__make_space(f, page_buf, type)) < 0)
1199                         HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "make space in Page buffer Failed")
1200 
1201                     /* If make_space returns 0, then we can't use the page
1202                      * buffer for this I/O and we need to bypass
1203                      */
1204                     if(0 == can_make_space) {
1205                         HDassert(0 == i);
1206 
1207                         /* Write to VFD and return */
1208                         if(H5FD_write(file, type, addr, size, buf) < 0)
1209                             HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "driver write request failed")
1210 
1211                         /* Break out of loop */
1212                         break;
1213                     } /* end if */
1214                 } /* end if */
1215 
1216                 /* Don't bother searching if there is no write access */
1217                 if(H5F_ACC_RDWR & H5F_INTENT(f))
1218                     /* Lookup & remove the page from the new skip list page if
1219                      * it exists to see if this is a new page from the MF layer
1220                      */
1221                     page_entry = (H5PB_entry_t *)H5SL_remove(page_buf->mf_slist_ptr, (void *)(&search_addr));
1222 
1223                 /* Calculate offset into the buffer of the page and the user buffer */
1224                 offset = (0 == i ? addr - search_addr : 0);
1225                 buf_offset = (0 == i ? 0 : size - access_size);
1226 
1227                 /* If found, then just update the buffer pointer to the newly allocate buffer */
1228                 if(page_entry) {
1229                     /* Allocate space for the page buffer */
1230                     if(NULL == (new_page_buf = H5FL_FAC_MALLOC(page_buf->page_fac)))
1231                         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTALLOC, FAIL, "memory allocation failed for page buffer entry")
1232                     HDmemset(new_page_buf, 0, (size_t)offset);
1233                     HDmemset((uint8_t *)new_page_buf + offset + access_size, 0, page_size - ((size_t)offset + access_size));
1234 
1235                     page_entry->page_buf_ptr = new_page_buf;
1236 
1237                     /* Update statistics */
1238                     if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
1239                         page_buf->hits[1]++;
1240                     else
1241                         page_buf->hits[0]++;
1242                 } /* end if */
1243                 /* Otherwise read page through the VFD layer, but make sure we don't read past the EOA. */
1244                 else {
1245                     haddr_t eoa, eof = HADDR_UNDEF;
1246 
1247                     /* Allocate space for the page buffer */
1248                     if(NULL == (new_page_buf = H5FL_FAC_CALLOC(page_buf->page_fac)))
1249                         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTALLOC, FAIL, "memory allocation failed for page buffer entry")
1250 
1251                     /* Create the new loaded PB entry */
1252                     if(NULL == (page_entry = H5FL_CALLOC(H5PB_entry_t)))
1253                         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTALLOC, FAIL, "memory allocation failed")
1254 
1255                     page_entry->page_buf_ptr = new_page_buf;
1256                     page_entry->addr = search_addr;
1257                     page_entry->type = (H5F_mem_page_t)type;
1258 
1259                     /* Retrieve the 'eoa' for the file */
1260                     if(HADDR_UNDEF == (eoa = H5F_get_eoa(f, type)))
1261                         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "driver get_eoa request failed")
1262 
1263                     /* If the entire page falls outside the EOA, then fail */
1264                     if(search_addr > eoa)
1265                         HGOTO_ERROR(H5E_PAGEBUF, H5E_BADVALUE, FAIL, "writing to a page that is outside the file EOA")
1266 
1267                     /* Retrieve the 'eof' for the file - The MPI-VFD EOF
1268                      * returned will most likely be HADDR_UNDEF, so skip
1269                      * that check.
1270                      */
1271                     if(!H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI))
1272                         if(HADDR_UNDEF == (eof = H5FD_get_eof(f->shared->lf, H5FD_MEM_DEFAULT)))
1273                             HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "driver get_eof request failed")
1274 
1275                     /* Adjust the read size to not go beyond the EOA */
1276                     if(search_addr + page_size > eoa)
1277                         page_size = (size_t)(eoa - search_addr);
1278 
1279                     if(search_addr < eof) {
1280                         if(H5FD_read(file, type, search_addr, page_size, new_page_buf) < 0)
1281                             HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, "driver read request failed")
1282 
1283                         /* Update statistics */
1284                         if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
1285                             page_buf->misses[1]++;
1286                         else
1287                             page_buf->misses[0]++;
1288                     } /* end if */
1289                 } /* end else */
1290 
1291                 /* Copy the requested data from the page into the input buffer */
1292                 HDmemcpy((uint8_t *)new_page_buf + offset, (const uint8_t *)buf+buf_offset, access_size);
1293 
1294                 /* Page is dirty now */
1295                 page_entry->is_dirty = TRUE;
1296 
1297                 /* Insert page into PB, evicting other pages as necessary */
1298                 if(H5PB__insert_entry(page_buf, page_entry) < 0)
1299                     HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTSET, FAIL, "error inserting new page in page buffer")
1300             } /* end else */
1301         } /* end for */
1302     } /* end else */
1303 
1304 done:
1305     FUNC_LEAVE_NOAPI(ret_value)
1306 } /* end H5PB_write() */
1307 
1308 
1309 /*-------------------------------------------------------------------------
1310  * Function:    H5PB__insert_entry()
1311  *
1312  * Purpose: ???
1313  *
1314  *          This function was created without documentation.
1315  *          What follows is my best understanding of Mohamad's intent.
1316  *
1317  *        Insert the supplied page into the page buffer, both the
1318  *          skip list and the LRU.
1319  *
1320  *          As best I can tell, this function imposes no limit on the
1321  *          number of entries in the page buffer beyond an assertion
1322  *          failure it the page count exceeds the limit.
1323  *
1324  *                                               JRM -- 12/22/16
1325  *
1326  *
1327  * Return:    Non-negative on success/Negative on failure
1328  *
1329  * Programmer:    Mohamad Chaarawi
1330  *
1331  *-------------------------------------------------------------------------
1332  */
1333 static herr_t
H5PB__insert_entry(H5PB_t * page_buf,H5PB_entry_t * page_entry)1334 H5PB__insert_entry(H5PB_t *page_buf, H5PB_entry_t *page_entry)
1335 {
1336     herr_t ret_value = SUCCEED;    /* Return value */
1337 
1338     FUNC_ENTER_STATIC
1339 
1340     /* Insert entry in skip list */
1341     if(H5SL_insert(page_buf->slist_ptr, page_entry, &(page_entry->addr)) < 0)
1342         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTINSERT, FAIL, "can't insert entry in skip list")
1343     HDassert(H5SL_count(page_buf->slist_ptr) * page_buf->page_size <= page_buf->max_size);
1344 
1345     /* Increment appropriate page count */
1346     if(H5F_MEM_PAGE_DRAW == page_entry->type || H5F_MEM_PAGE_GHEAP == page_entry->type)
1347         page_buf->raw_count++;
1348     else
1349         page_buf->meta_count++;
1350 
1351     /* Insert entry in LRU */
1352     H5PB__INSERT_LRU(page_buf, page_entry)
1353 
1354 done:
1355     FUNC_LEAVE_NOAPI(ret_value)
1356 } /* end H5PB__insert_entry() */
1357 
1358 
1359 /*-------------------------------------------------------------------------
1360  * Function:    H5PB__make_space()
1361  *
1362  * Purpose: ???
1363  *
1364  *          This function was created without documentation.
1365  *          What follows is my best understanding of Mohamad's intent.
1366  *
1367  *          If necessary and if possible, evict a page from the page
1368  *          buffer to make space for the supplied page.  Depending on
1369  *        the page buffer configuration and contents, and the page
1370  *          supplied this may or may not be possible.
1371  *
1372  *                                             JRM -- 12/22/16
1373  *
1374  * Return:    Non-negative on success/Negative on failure
1375  *
1376  * Programmer:    Mohamad Chaarawi
1377  *
1378  *-------------------------------------------------------------------------
1379  */
1380 static htri_t
H5PB__make_space(H5F_t * f,H5PB_t * page_buf,H5FD_mem_t inserted_type)1381 H5PB__make_space(H5F_t *f, H5PB_t *page_buf, H5FD_mem_t inserted_type)
1382 {
1383     H5PB_entry_t *page_entry;   /* Pointer to page eviction candidate */
1384     htri_t ret_value = TRUE;    /* Return value */
1385 
1386     FUNC_ENTER_STATIC
1387 
1388     /* Sanity check */
1389     HDassert(f);
1390     HDassert(page_buf);
1391 
1392     /* Get oldest entry */
1393     page_entry = page_buf->LRU_tail_ptr;
1394 
1395     if(H5FD_MEM_DRAW == inserted_type) {
1396         /* If threshould is 100% metadata and page buffer is full of
1397            metadata, then we can't make space for raw data */
1398         if(0 == page_buf->raw_count && page_buf->min_meta_count == page_buf->meta_count) {
1399             HDassert(page_buf->meta_count * page_buf->page_size == page_buf->max_size);
1400             HGOTO_DONE(FALSE)
1401         } /* end if */
1402 
1403         /* check the metadata threshold before evicting metadata items */
1404         while(1) {
1405             if(page_entry->prev && H5F_MEM_PAGE_META == page_entry->type &&
1406                     page_buf->min_meta_count >= page_buf->meta_count)
1407                 page_entry = page_entry->prev;
1408             else
1409                 break;
1410         } /* end while */
1411     } /* end if */
1412     else {
1413         /* If threshould is 100% raw data and page buffer is full of
1414            raw data, then we can't make space for meta data */
1415         if(0 == page_buf->meta_count && page_buf->min_raw_count == page_buf->raw_count) {
1416             HDassert(page_buf->raw_count * page_buf->page_size == page_buf->max_size);
1417             HGOTO_DONE(FALSE)
1418         } /* end if */
1419 
1420         /* check the raw data threshold before evicting raw data items */
1421         while(1) {
1422             if(page_entry->prev && (H5F_MEM_PAGE_DRAW == page_entry->type || H5F_MEM_PAGE_GHEAP == page_entry->type) &&
1423                     page_buf->min_raw_count >= page_buf->raw_count)
1424                 page_entry = page_entry->prev;
1425             else
1426                 break;
1427         } /* end while */
1428     } /* end else */
1429 
1430     /* Remove from page index */
1431     if(NULL == H5SL_remove(page_buf->slist_ptr, &(page_entry->addr)))
1432         HGOTO_ERROR(H5E_PAGEBUF, H5E_BADVALUE, FAIL, "Tail Page Entry is not in skip list")
1433 
1434     /* Remove entry from LRU list */
1435     H5PB__REMOVE_LRU(page_buf, page_entry)
1436     HDassert(H5SL_count(page_buf->slist_ptr) == page_buf->LRU_list_len);
1437 
1438     /* Decrement appropriate page type counter */
1439     if(H5F_MEM_PAGE_DRAW == page_entry->type || H5F_MEM_PAGE_GHEAP == page_entry->type)
1440         page_buf->raw_count--;
1441     else
1442         page_buf->meta_count--;
1443 
1444     /* Flush page if dirty */
1445     if(page_entry->is_dirty)
1446         if(H5PB__write_entry(f, page_entry) < 0)
1447             HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "file write failed")
1448 
1449     /* Update statistics */
1450     if(page_entry->type == H5F_MEM_PAGE_DRAW || H5F_MEM_PAGE_GHEAP == page_entry->type)
1451         page_buf->evictions[1]++;
1452     else
1453         page_buf->evictions[0]++;
1454 
1455     /* Release page */
1456     page_entry->page_buf_ptr = H5FL_FAC_FREE(page_buf->page_fac, page_entry->page_buf_ptr);
1457     page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
1458 
1459 done:
1460     FUNC_LEAVE_NOAPI(ret_value)
1461 } /* end H5PB__make_space() */
1462 
1463 
1464 /*-------------------------------------------------------------------------
1465  * Function:    H5PB__write_entry()
1466  *
1467  * Purpose: ???
1468  *
1469  *          This function was created without documentation.
1470  *          What follows is my best understanding of Mohamad's intent.
1471  *
1472  *
1473  * Return:    Non-negative on success/Negative on failure
1474  *
1475  * Programmer:    Mohamad Chaarawi
1476  *
1477  *-------------------------------------------------------------------------
1478  */
1479 static herr_t
H5PB__write_entry(H5F_t * f,H5PB_entry_t * page_entry)1480 H5PB__write_entry(H5F_t *f, H5PB_entry_t *page_entry)
1481 {
1482     haddr_t eoa;                    /* Current EOA for the file */
1483     herr_t ret_value = SUCCEED;    /* Return value */
1484 
1485     FUNC_ENTER_STATIC
1486 
1487     /* Sanity check */
1488     HDassert(f);
1489     HDassert(page_entry);
1490 
1491     /* Retrieve the 'eoa' for the file */
1492     if(HADDR_UNDEF == (eoa = H5F_get_eoa(f, (H5FD_mem_t)page_entry->type)))
1493         HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "driver get_eoa request failed")
1494 
1495     /* If the starting address of the page is larger than
1496      * the EOA, then the entire page is discarded without writing.
1497      */
1498     if(page_entry->addr <= eoa) {
1499         H5FD_t *file;                   /* File driver I/O info */
1500         size_t page_size = f->shared->page_buf->page_size;
1501 
1502         /* Adjust the page length if it exceeds the EOA */
1503         if((page_entry->addr + page_size) > eoa)
1504             page_size = (size_t)(eoa - page_entry->addr);
1505 
1506         /* Translate to file driver I/O info object */
1507         file = f->shared->lf;
1508 
1509         if(H5FD_write(file, (H5FD_mem_t)page_entry->type, page_entry->addr, page_size, page_entry->page_buf_ptr) < 0)
1510             HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "file write failed")
1511     } /* end if */
1512 
1513     page_entry->is_dirty = FALSE;
1514 
1515 done:
1516     FUNC_LEAVE_NOAPI(ret_value)
1517 } /* end H5PB__write_entry() */
1518 
1519