1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Copyright by The HDF Group. *
3 * Copyright by the Board of Trustees of the University of Illinois. *
4 * All rights reserved. *
5 * *
6 * This file is part of HDF5. The full HDF5 copyright notice, including *
7 * terms governing use, modification, and redistribution, is contained in *
8 * the COPYING file, which can be found at the root of the source code *
9 * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases. *
10 * If you do not have access to either file, you may request a copy from *
11 * help@hdfgroup.org. *
12 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13
14 /*-------------------------------------------------------------------------
15 *
16 * Created: H5ACmpio.c
17 * Jun 20 2015
18 * Quincey Koziol <koziol@hdfgroup.org>
19 *
20 * Purpose: Functions in this file implement support for parallel
21 * I/O cache functionality
22 *
23 *-------------------------------------------------------------------------
24 */
25
26 /****************/
27 /* Module Setup */
28 /****************/
29
30 #include "H5ACmodule.h" /* This source code file is part of the H5AC module */
31 #define H5F_FRIEND /*suppress error about including H5Fpkg */
32
33
34 /***********/
35 /* Headers */
36 /***********/
37 #include "H5private.h" /* Generic Functions */
38 #include "H5ACpkg.h" /* Metadata cache */
39 #include "H5Cprivate.h" /* Cache */
40 #include "H5Eprivate.h" /* Error handling */
41 #include "H5Fpkg.h" /* Files */
42 #include "H5MMprivate.h" /* Memory management */
43
44 #ifdef H5_HAVE_PARALLEL
45
46 /****************/
47 /* Local Macros */
48 /****************/
49
50
51 /******************/
52 /* Local Typedefs */
53 /******************/
54
55 /****************************************************************************
56 *
57 * structure H5AC_slist_entry_t
58 *
59 * The dirty entry list maintained via the d_slist_ptr field of H5AC_aux_t
60 * and the cleaned entry list maintained via the c_slist_ptr field of
61 * H5AC_aux_t are just lists of the file offsets of the dirty/cleaned
62 * entries. Unfortunately, the slist code makes us define a dynamically
63 * allocated structure to store these offsets in. This structure serves
64 * that purpose. Its fields are as follows:
65 *
66 * addr: file offset of a metadata entry. Entries are added to this
67 * list (if they aren't there already) when they are marked
68 * dirty in an unprotect, inserted, or moved. They are
69 * removed when they appear in a clean entries broadcast.
70 *
71 ****************************************************************************/
72 typedef struct H5AC_slist_entry_t
73 {
74 haddr_t addr;
75 } H5AC_slist_entry_t;
76
77 /* User data for address list building callbacks */
78 typedef struct H5AC_addr_list_ud_t
79 {
80 H5AC_aux_t * aux_ptr; /* 'Auxiliary' parallel cache info */
81 haddr_t * addr_buf_ptr; /* Array to store addresses */
82 unsigned u; /* Counter for position in array */
83 } H5AC_addr_list_ud_t;
84
85
86 /********************/
87 /* Local Prototypes */
88 /********************/
89
90 static herr_t H5AC__broadcast_candidate_list(H5AC_t *cache_ptr,
91 unsigned *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr);
92 static herr_t H5AC__broadcast_clean_list(H5AC_t *cache_ptr);
93 static herr_t H5AC__construct_candidate_list(H5AC_t *cache_ptr,
94 H5AC_aux_t *aux_ptr, int sync_point_op);
95 static herr_t H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr,
96 unsigned *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr);
97 static herr_t H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id);
98 static herr_t H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f,
99 hid_t dxpl_id);
100 static herr_t H5AC__receive_haddr_list(MPI_Comm mpi_comm, unsigned *num_entries_ptr,
101 haddr_t **haddr_buf_ptr_ptr);
102 static herr_t H5AC__receive_candidate_list(const H5AC_t *cache_ptr,
103 unsigned *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr);
104 static herr_t H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id);
105 static herr_t H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, unsigned num_candidates,
106 haddr_t *candidates_list_ptr);
107 static herr_t H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id);
108 static herr_t H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id);
109 static herr_t H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id);
110 static herr_t H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id);
111
112
113 /*********************/
114 /* Package Variables */
115 /*********************/
116
117 /* Declare a free list to manage the H5AC_aux_t struct */
118 H5FL_DEFINE(H5AC_aux_t);
119
120
121 /*****************************/
122 /* Library Private Variables */
123 /*****************************/
124
125
126 /*******************/
127 /* Local Variables */
128 /*******************/
129
130 /* Declare a free list to manage the H5AC_slist_entry_t struct */
131 H5FL_DEFINE_STATIC(H5AC_slist_entry_t);
132
133
134
135 /*-------------------------------------------------------------------------
136 * Function: H5AC__set_sync_point_done_callback
137 *
138 * Purpose: Set the value of the sync_point_done callback. This
139 * callback is used by the parallel test code to verify
140 * that the expected writes and only the expected writes
141 * take place during a sync point.
142 *
143 * Return: Non-negative on success/Negative on failure
144 *
145 * Programmer: John Mainzer
146 * 5/9/10
147 *
148 *-------------------------------------------------------------------------
149 */
150 herr_t
H5AC__set_sync_point_done_callback(H5C_t * cache_ptr,void (* sync_point_done)(unsigned num_writes,haddr_t * written_entries_tbl))151 H5AC__set_sync_point_done_callback(H5C_t * cache_ptr,
152 void (* sync_point_done)(unsigned num_writes, haddr_t * written_entries_tbl))
153 {
154 H5AC_aux_t * aux_ptr;
155
156 FUNC_ENTER_PACKAGE_NOERR
157
158 /* Sanity checks */
159 HDassert(cache_ptr);
160 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
161 HDassert(aux_ptr != NULL);
162 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
163
164 aux_ptr->sync_point_done = sync_point_done;
165
166 FUNC_LEAVE_NOAPI(SUCCEED)
167 } /* H5AC__set_sync_point_done_callback() */
168
169
170 /*-------------------------------------------------------------------------
171 * Function: H5AC__set_write_done_callback
172 *
173 * Purpose: Set the value of the write_done callback. This callback
174 * is used to improve performance of the parallel test bed
175 * for the cache.
176 *
177 * Return: Non-negative on success/Negative on failure
178 *
179 * Programmer: John Mainzer
180 * 5/11/06
181 *
182 *-------------------------------------------------------------------------
183 */
184 herr_t
H5AC__set_write_done_callback(H5C_t * cache_ptr,void (* write_done)(void))185 H5AC__set_write_done_callback(H5C_t * cache_ptr, void (* write_done)(void))
186 {
187 H5AC_aux_t * aux_ptr;
188
189 FUNC_ENTER_PACKAGE_NOERR
190
191 /* Sanity checks */
192 HDassert(cache_ptr);
193 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
194 HDassert(aux_ptr != NULL);
195 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
196
197 aux_ptr->write_done = write_done;
198
199 FUNC_LEAVE_NOAPI(SUCCEED)
200 } /* H5AC__set_write_done_callback() */
201
202
203 /*-------------------------------------------------------------------------
204 * Function: H5AC_add_candidate()
205 *
206 * Purpose: Add the supplied metadata entry address to the candidate
207 * list. Verify that each entry added does not appear in
208 * the list prior to its insertion.
209 *
210 * This function is intended for used in constructing list
211 * of entried to be flushed during sync points. It shouldn't
212 * be called anywhere else.
213 *
214 * Return: Non-negative on success/Negative on failure
215 *
216 * Programmer: John Mainzer
217 * 3/17/10
218 *
219 *-------------------------------------------------------------------------
220 */
221 herr_t
H5AC_add_candidate(H5AC_t * cache_ptr,haddr_t addr)222 H5AC_add_candidate(H5AC_t * cache_ptr, haddr_t addr)
223 {
224 H5AC_aux_t * aux_ptr;
225 H5AC_slist_entry_t * slist_entry_ptr = NULL;
226 herr_t ret_value = SUCCEED; /* Return value */
227
228 FUNC_ENTER_NOAPI(FAIL)
229
230 /* Sanity checks */
231 HDassert(cache_ptr != NULL);
232 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
233 HDassert(aux_ptr != NULL);
234 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
235 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
236 HDassert(aux_ptr->candidate_slist_ptr != NULL);
237
238 /* Construct an entry for the supplied address, and insert
239 * it into the candidate slist.
240 */
241 if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
242 HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate candidate slist entry")
243 slist_entry_ptr->addr = addr;
244
245 if(H5SL_insert(aux_ptr->candidate_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
246 HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist")
247
248 done:
249 /* Clean up on error */
250 if(ret_value < 0)
251 if(slist_entry_ptr)
252 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
253
254 FUNC_LEAVE_NOAPI(ret_value)
255 } /* H5AC_add_candidate() */
256
257
258 /*-------------------------------------------------------------------------
259 *
260 * Function: H5AC__broadcast_candidate_list()
261 *
262 * Purpose: Broadcast the contents of the process 0 candidate entry
263 * slist. In passing, also remove all entries from said
264 * list. As the application of this will be handled by
265 * the same functions on all processes, construct and
266 * return a copy of the list in the same format as that
267 * received by the other processes. Note that if this
268 * copy is returned in *haddr_buf_ptr_ptr, the caller
269 * must free it.
270 *
271 * This function must only be called by the process with
272 * MPI_rank 0.
273 *
274 * Return SUCCEED on success, and FAIL on failure.
275 *
276 * Return: Non-negative on success/Negative on failure.
277 *
278 * Programmer: John Mainzer, 7/1/05
279 *
280 *-------------------------------------------------------------------------
281 */
282 static herr_t
H5AC__broadcast_candidate_list(H5AC_t * cache_ptr,unsigned * num_entries_ptr,haddr_t ** haddr_buf_ptr_ptr)283 H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, unsigned *num_entries_ptr,
284 haddr_t **haddr_buf_ptr_ptr)
285 {
286 H5AC_aux_t * aux_ptr = NULL;
287 haddr_t * haddr_buf_ptr = NULL;
288 int mpi_result;
289 unsigned num_entries;
290 herr_t ret_value = SUCCEED; /* Return value */
291
292 FUNC_ENTER_STATIC
293
294 /* Sanity checks */
295 HDassert(cache_ptr != NULL);
296 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
297 HDassert(aux_ptr != NULL);
298 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
299 HDassert(aux_ptr->mpi_rank == 0);
300 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
301 HDassert(aux_ptr->candidate_slist_ptr != NULL);
302 HDassert(num_entries_ptr != NULL);
303 HDassert(*num_entries_ptr == 0);
304 HDassert(haddr_buf_ptr_ptr != NULL);
305 HDassert(*haddr_buf_ptr_ptr == NULL);
306
307 /* First broadcast the number of entries in the list so that the
308 * receivers can set up buffers to receive them. If there aren't
309 * any, we are done.
310 */
311 num_entries = (unsigned)H5SL_count(aux_ptr->candidate_slist_ptr);
312 if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_UNSIGNED, 0, aux_ptr->mpi_comm)))
313 HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
314
315 if(num_entries > 0) {
316 size_t buf_size = 0;
317 unsigned chk_num_entries = 0;
318
319 /* convert the candidate list into the format we
320 * are used to receiving from process 0, and also load it
321 * into a buffer for transmission.
322 */
323 if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0)
324 HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.")
325 HDassert(chk_num_entries == num_entries);
326 HDassert(haddr_buf_ptr != NULL);
327
328 /* Now broadcast the list of candidate entries */
329 buf_size = sizeof(haddr_t) * num_entries;
330 if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm)))
331 HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
332 } /* end if */
333
334 /* Pass the number of entries and the buffer pointer
335 * back to the caller. Do this so that we can use the same code
336 * to apply the candidate list to all the processes.
337 */
338 *num_entries_ptr = num_entries;
339 *haddr_buf_ptr_ptr = haddr_buf_ptr;
340
341 done:
342 if(ret_value < 0)
343 if(haddr_buf_ptr)
344 haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
345
346 FUNC_LEAVE_NOAPI(ret_value)
347 } /* H5AC__broadcast_candidate_list() */
348
349
350 /*-------------------------------------------------------------------------
351 *
352 * Function: H5AC__broadcast_clean_list_cb()
353 *
354 * Purpose: Skip list callback for building array of addresses for
355 * broadcasting the clean list.
356 *
357 * Return: Non-negative on success/Negative on failure.
358 *
359 * Programmer: Quincey Koziol, 6/12/15
360 *
361 *-------------------------------------------------------------------------
362 */
363 static herr_t
H5AC__broadcast_clean_list_cb(void * _item,void H5_ATTR_UNUSED * _key,void * _udata)364 H5AC__broadcast_clean_list_cb(void *_item, void H5_ATTR_UNUSED *_key,
365 void *_udata)
366 {
367 H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */
368 H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */
369 haddr_t addr;
370
371 FUNC_ENTER_STATIC_NOERR
372
373 /* Sanity checks */
374 HDassert(slist_entry_ptr);
375 HDassert(udata);
376
377 /* Store the entry's address in the buffer */
378 addr = slist_entry_ptr->addr;
379 udata->addr_buf_ptr[udata->u] = addr;
380 udata->u++;
381
382 /* now release the entry */
383 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
384
385 /* and also remove the matching entry from the dirtied list
386 * if it exists.
387 */
388 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(udata->aux_ptr->d_slist_ptr, (void *)(&addr))))
389 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
390
391 FUNC_LEAVE_NOAPI(SUCCEED)
392 } /* H5AC__broadcast_clean_list_cb() */
393
394
395 /*-------------------------------------------------------------------------
396 *
397 * Function: H5AC__broadcast_clean_list()
398 *
399 * Purpose: Broadcast the contents of the process 0 cleaned entry
400 * slist. In passing, also remove all entries from said
401 * list, and also remove any matching entries from the dirtied
402 * slist.
403 *
404 * This function must only be called by the process with
405 * MPI_rank 0.
406 *
407 * Return SUCCEED on success, and FAIL on failure.
408 *
409 * Return: Non-negative on success/Negative on failure.
410 *
411 * Programmer: John Mainzer, 7/1/05
412 *
413 *-------------------------------------------------------------------------
414 */
415 static herr_t
H5AC__broadcast_clean_list(H5AC_t * cache_ptr)416 H5AC__broadcast_clean_list(H5AC_t * cache_ptr)
417 {
418 haddr_t * addr_buf_ptr = NULL;
419 H5AC_aux_t * aux_ptr;
420 int mpi_result;
421 unsigned num_entries = 0;
422 herr_t ret_value = SUCCEED; /* Return value */
423
424 FUNC_ENTER_STATIC
425
426 /* Sanity checks */
427 HDassert(cache_ptr != NULL);
428 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
429 HDassert(aux_ptr != NULL);
430 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
431 HDassert(aux_ptr->mpi_rank == 0);
432 HDassert(aux_ptr->c_slist_ptr != NULL);
433
434 /* First broadcast the number of entries in the list so that the
435 * receives can set up a buffer to receive them. If there aren't
436 * any, we are done.
437 */
438 num_entries = (unsigned)H5SL_count(aux_ptr->c_slist_ptr);
439 if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_UNSIGNED, 0, aux_ptr->mpi_comm)))
440 HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
441
442 if(num_entries > 0) {
443 H5AC_addr_list_ud_t udata;
444 size_t buf_size;
445
446 /* allocate a buffer to store the list of entry base addresses in */
447 buf_size = sizeof(haddr_t) * num_entries;
448 if(NULL == (addr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size)))
449 HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for addr buffer")
450
451 /* Set up user data for callback */
452 udata.aux_ptr = aux_ptr;
453 udata.addr_buf_ptr = addr_buf_ptr;
454 udata.u = 0;
455
456 /* Free all the clean list entries, building the address list in the callback */
457 /* (Callback also removes the matching entries from the dirtied list) */
458 if(H5SL_free(aux_ptr->c_slist_ptr, H5AC__broadcast_clean_list_cb, &udata) < 0)
459 HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for clean entries")
460
461 /* Now broadcast the list of cleaned entries */
462 if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)addr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm)))
463 HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
464 } /* end if */
465
466 /* if it is defined, call the sync point done callback. Note
467 * that this callback is defined purely for testing purposes,
468 * and should be undefined under normal operating circumstances.
469 */
470 if(aux_ptr->sync_point_done)
471 (aux_ptr->sync_point_done)(num_entries, addr_buf_ptr);
472
473 done:
474 if(addr_buf_ptr)
475 addr_buf_ptr = (haddr_t *)H5MM_xfree((void *)addr_buf_ptr);
476
477 FUNC_LEAVE_NOAPI(ret_value)
478 } /* H5AC__broadcast_clean_list() */
479
480
481 /*-------------------------------------------------------------------------
482 * Function: H5AC__construct_candidate_list()
483 *
484 * Purpose: In the parallel case when the metadata_write_strategy is
485 * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, process 0 uses
486 * this function to construct the list of cache entries to
487 * be flushed. This list is then propagated to the other
488 * caches, and then flushed in a distributed fashion.
489 *
490 * The sync_point_op parameter is used to determine the extent
491 * of the flush.
492 *
493 * Return: Non-negative on success/Negative on failure
494 *
495 * Programmer: John Mainzer
496 * 3/17/10
497 *
498 *-------------------------------------------------------------------------
499 */
500 static herr_t
H5AC__construct_candidate_list(H5AC_t * cache_ptr,H5AC_aux_t * aux_ptr,int sync_point_op)501 H5AC__construct_candidate_list(H5AC_t *cache_ptr, H5AC_aux_t *aux_ptr,
502 int sync_point_op)
503 {
504 herr_t ret_value = SUCCEED; /* Return value */
505
506 FUNC_ENTER_STATIC
507
508 /* Sanity checks */
509 HDassert(cache_ptr != NULL);
510 HDassert(aux_ptr != NULL);
511 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
512 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
513 HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE) || (aux_ptr->mpi_rank == 0));
514 HDassert(aux_ptr->d_slist_ptr != NULL);
515 HDassert(aux_ptr->c_slist_ptr != NULL);
516 HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0);
517 HDassert(aux_ptr->candidate_slist_ptr != NULL);
518 HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0);
519 HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) || (sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE));
520
521 switch(sync_point_op) {
522 case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
523 if(H5C_construct_candidate_list__min_clean((H5C_t *)cache_ptr) < 0)
524 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__min_clean() failed.")
525 break;
526
527 case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
528 if(H5C_construct_candidate_list__clean_cache((H5C_t *)cache_ptr) < 0)
529 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__clean_cache() failed.")
530 break;
531
532 default:
533 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown sync point operation.")
534 break;
535 } /* end switch */
536
537 done:
538 FUNC_LEAVE_NOAPI(ret_value)
539 } /* H5AC__construct_candidate_list() */
540
541
542 /*-------------------------------------------------------------------------
543 *
544 * Function: H5AC__copy_candidate_list_to_buffer_cb
545 *
546 * Purpose: Skip list callback for building array of addresses for
547 * broadcasting the candidate list.
548 *
549 * Return: Return SUCCEED on success, and FAIL on failure.
550 *
551 * Programmer: Quincey Koziol, 6/12/15
552 *
553 *-------------------------------------------------------------------------
554 */
555 static herr_t
H5AC__copy_candidate_list_to_buffer_cb(void * _item,void H5_ATTR_UNUSED * _key,void * _udata)556 H5AC__copy_candidate_list_to_buffer_cb(void *_item, void H5_ATTR_UNUSED *_key,
557 void *_udata)
558 {
559 H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */
560 H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */
561
562 FUNC_ENTER_STATIC_NOERR
563
564 /* Sanity checks */
565 HDassert(slist_entry_ptr);
566 HDassert(udata);
567
568 /* Store the entry's address in the buffer */
569 udata->addr_buf_ptr[udata->u] = slist_entry_ptr->addr;
570 udata->u++;
571
572 /* now release the entry */
573 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
574
575 FUNC_LEAVE_NOAPI(SUCCEED)
576 } /* H5AC__copy_candidate_list_to_buffer_cb() */
577
578
579 /*-------------------------------------------------------------------------
580 *
581 * Function: H5AC__copy_candidate_list_to_buffer
582 *
583 * Purpose: Allocate buffer(s) and copy the contents of the candidate
584 * entry slist into it (them). In passing, remove all
585 * entries from the candidate slist. Note that the
586 * candidate slist must not be empty.
587 *
588 * If MPI_Offset_buf_ptr_ptr is not NULL, allocate a buffer
589 * of MPI_Offset, copy the contents of the candidate
590 * entry list into it with the appropriate conversions,
591 * and return the base address of the buffer in
592 * *MPI_Offset_buf_ptr. Note that this is the buffer
593 * used by process 0 to transmit the list of entries to
594 * be flushed to all other processes (in this file group).
595 *
596 * Similarly, allocate a buffer of haddr_t, load the contents
597 * of the candidate list into this buffer, and return its
598 * base address in *haddr_buf_ptr_ptr. Note that this
599 * latter buffer is constructed unconditionally.
600 *
601 * In passing, also remove all entries from the candidate
602 * entry slist.
603 *
604 * Return: Return SUCCEED on success, and FAIL on failure.
605 *
606 * Programmer: John Mainzer, 4/19/10
607 *
608 *-------------------------------------------------------------------------
609 */
610 static herr_t
H5AC__copy_candidate_list_to_buffer(const H5AC_t * cache_ptr,unsigned * num_entries_ptr,haddr_t ** haddr_buf_ptr_ptr)611 H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, unsigned *num_entries_ptr,
612 haddr_t **haddr_buf_ptr_ptr)
613 {
614 H5AC_aux_t * aux_ptr = NULL;
615 H5AC_addr_list_ud_t udata;
616 haddr_t * haddr_buf_ptr = NULL;
617 size_t buf_size;
618 unsigned num_entries = 0;
619 herr_t ret_value = SUCCEED; /* Return value */
620
621 FUNC_ENTER_STATIC
622
623 /* Sanity checks */
624 HDassert(cache_ptr != NULL);
625 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
626 HDassert(aux_ptr != NULL);
627 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
628 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
629 HDassert(aux_ptr->candidate_slist_ptr != NULL);
630 HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) > 0);
631 HDassert(num_entries_ptr != NULL);
632 HDassert(*num_entries_ptr == 0);
633 HDassert(haddr_buf_ptr_ptr != NULL);
634 HDassert(*haddr_buf_ptr_ptr == NULL);
635
636 num_entries = (unsigned)H5SL_count(aux_ptr->candidate_slist_ptr);
637
638 /* allocate a buffer(s) to store the list of candidate entry
639 * base addresses in
640 */
641 buf_size = sizeof(haddr_t) * num_entries;
642 if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size)))
643 HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer")
644
645 /* Set up user data for callback */
646 udata.aux_ptr = aux_ptr;
647 udata.addr_buf_ptr = haddr_buf_ptr;
648 udata.u = 0;
649
650 /* Free all the candidate list entries, building the address list in the callback */
651 if(H5SL_free(aux_ptr->candidate_slist_ptr, H5AC__copy_candidate_list_to_buffer_cb, &udata) < 0)
652 HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for candidate entries")
653
654 /* Pass the number of entries and the buffer pointer
655 * back to the caller.
656 */
657 *num_entries_ptr = num_entries;
658 *haddr_buf_ptr_ptr = haddr_buf_ptr;
659
660 done:
661 if(ret_value < 0)
662 if(haddr_buf_ptr)
663 haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
664
665 FUNC_LEAVE_NOAPI(ret_value)
666 } /* H5AC__copy_candidate_list_to_buffer() */
667
668
669 /*-------------------------------------------------------------------------
670 *
671 * Function: H5AC__log_deleted_entry()
672 *
673 * Purpose: Log an entry which has been deleted.
674 *
675 * Only called for mpi_rank 0. We must make sure that the entry
676 * doesn't appear in the cleaned or dirty entry lists.
677 *
678 * Return SUCCEED on success, and FAIL on failure.
679 *
680 * Return: Non-negative on success/Negative on failure.
681 *
682 * Programmer: John Mainzer, 6/29/05
683 *
684 *-------------------------------------------------------------------------
685 */
686 herr_t
H5AC__log_deleted_entry(const H5AC_info_t * entry_ptr)687 H5AC__log_deleted_entry(const H5AC_info_t *entry_ptr)
688 {
689 H5AC_t * cache_ptr;
690 H5AC_aux_t * aux_ptr;
691 H5AC_slist_entry_t * slist_entry_ptr = NULL;
692 haddr_t addr;
693
694 FUNC_ENTER_PACKAGE_NOERR
695
696 /* Sanity checks */
697 HDassert(entry_ptr);
698 addr = entry_ptr->addr;
699 cache_ptr = entry_ptr->cache_ptr;
700 HDassert(cache_ptr != NULL);
701 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
702 HDassert(aux_ptr != NULL);
703 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
704 HDassert(aux_ptr->mpi_rank == 0);
705 HDassert(aux_ptr->d_slist_ptr != NULL);
706 HDassert(aux_ptr->c_slist_ptr != NULL);
707
708 /* if the entry appears in the dirtied entry slist, remove it. */
709 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))))
710 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
711
712 /* if the entry appears in the cleaned entry slist, remove it. */
713 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
714 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
715
716 FUNC_LEAVE_NOAPI(SUCCEED)
717 } /* H5AC__log_deleted_entry() */
718
719
720 /*-------------------------------------------------------------------------
721 *
722 * Function: H5AC__log_dirtied_entry()
723 *
724 * Purpose: Update the dirty_bytes count for a newly dirtied entry.
725 *
726 * If mpi_rank isn't 0, this simply means adding the size
727 * of the entries to the dirty_bytes count.
728 *
729 * If mpi_rank is 0, we must first check to see if the entry
730 * appears in the dirty entries slist. If it is, do nothing.
731 * If it isn't, add the size to the dirty_bytes count, add the
732 * entry to the dirty entries slist, and remove it from the
733 * cleaned list (if it is present there).
734 *
735 * Return: Non-negative on success/Negative on failure.
736 *
737 * Programmer: John Mainzer, 6/29/05
738 *
739 *-------------------------------------------------------------------------
740 */
741 herr_t
H5AC__log_dirtied_entry(const H5AC_info_t * entry_ptr)742 H5AC__log_dirtied_entry(const H5AC_info_t *entry_ptr)
743 {
744 H5AC_t * cache_ptr;
745 H5AC_aux_t * aux_ptr;
746 herr_t ret_value = SUCCEED; /* Return value */
747
748 FUNC_ENTER_PACKAGE
749
750 /* Sanity checks */
751 HDassert(entry_ptr);
752 HDassert(entry_ptr->is_dirty == FALSE);
753 cache_ptr = entry_ptr->cache_ptr;
754 HDassert(cache_ptr != NULL);
755 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
756 HDassert(aux_ptr != NULL);
757 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
758
759 if(aux_ptr->mpi_rank == 0) {
760 H5AC_slist_entry_t *slist_entry_ptr;
761 haddr_t addr = entry_ptr->addr;
762
763 /* Sanity checks */
764 HDassert(aux_ptr->d_slist_ptr != NULL);
765 HDassert(aux_ptr->c_slist_ptr != NULL);
766
767 if(NULL == H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) {
768 /* insert the address of the entry in the dirty entry list, and
769 * add its size to the dirty_bytes count.
770 */
771 if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
772 HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .")
773 slist_entry_ptr->addr = addr;
774
775 if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
776 HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
777
778 aux_ptr->dirty_bytes += entry_ptr->size;
779 #if H5AC_DEBUG_DIRTY_BYTES_CREATION
780 aux_ptr->unprotect_dirty_bytes += entry_ptr->size;
781 aux_ptr->unprotect_dirty_bytes_updates += 1;
782 #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
783 } /* end if */
784
785 /* the entry is dirty. If it exists on the cleaned entries list,
786 * remove it.
787 */
788 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
789 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
790 } /* end if */
791 else {
792 aux_ptr->dirty_bytes += entry_ptr->size;
793 #if H5AC_DEBUG_DIRTY_BYTES_CREATION
794 aux_ptr->unprotect_dirty_bytes += entry_size;
795 aux_ptr->unprotect_dirty_bytes_updates += 1;
796 #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
797 } /* end else */
798
799 done:
800 FUNC_LEAVE_NOAPI(ret_value)
801 } /* H5AC__log_dirtied_entry() */
802
803
804 /*-------------------------------------------------------------------------
805 *
806 * Function: H5AC__log_cleaned_entry()
807 *
808 * Purpose: Treat this operation as a 'clear' and remove the entry
809 * from both the cleaned and dirtied lists if it is present.
810 * Reduces the dirty_bytes count by the size of the entry.
811 *
812 * Return: Non-negative on success/Negative on failure.
813 *
814 * Programmer: Quincey Koziol
815 * 7/23/16
816 *
817 *-------------------------------------------------------------------------
818 */
819 herr_t
H5AC__log_cleaned_entry(const H5AC_info_t * entry_ptr)820 H5AC__log_cleaned_entry(const H5AC_info_t *entry_ptr)
821 {
822 H5AC_t * cache_ptr;
823 H5AC_aux_t * aux_ptr;
824 herr_t ret_value = SUCCEED; /* Return value */
825
826 FUNC_ENTER_PACKAGE
827
828 /* Sanity check */
829 HDassert(entry_ptr);
830 HDassert(entry_ptr->is_dirty == FALSE);
831 cache_ptr = entry_ptr->cache_ptr;
832 HDassert(cache_ptr != NULL);
833 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
834 HDassert(aux_ptr != NULL);
835 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
836
837 if(aux_ptr->mpi_rank == 0) {
838 H5AC_slist_entry_t *slist_entry_ptr;
839 haddr_t addr = entry_ptr->addr;
840
841 /* Sanity checks */
842 HDassert(aux_ptr->d_slist_ptr != NULL);
843 HDassert(aux_ptr->c_slist_ptr != NULL);
844
845 /* Remove it from both the cleaned list and the dirtied list. */
846 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
847 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
848 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))))
849 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
850
851 } /* end if */
852
853 /* Decrement the dirty byte count */
854 aux_ptr->dirty_bytes -= entry_ptr->size;
855
856 done:
857 FUNC_LEAVE_NOAPI(ret_value)
858 } /* H5AC__log_cleaned_entry() */
859
860
861 /*-------------------------------------------------------------------------
862 *
863 * Function: H5AC__log_flushed_entry()
864 *
865 * Purpose: Update the clean entry slist for the flush of an entry --
866 * specifically, if the entry has been cleared, remove it
867 * from both the cleaned and dirtied lists if it is present.
868 * Otherwise, if the entry was dirty, insert the indicated
869 * entry address in the clean slist if it isn't there already.
870 *
871 * This function is only used in PHDF5, and should only
872 * be called for the process with mpi rank 0.
873 *
874 * Return SUCCEED on success, and FAIL on failure.
875 *
876 * Return: Non-negative on success/Negative on failure.
877 *
878 * Programmer: John Mainzer, 6/29/05
879 *
880 *-------------------------------------------------------------------------
881 */
882 herr_t
H5AC__log_flushed_entry(H5C_t * cache_ptr,haddr_t addr,hbool_t was_dirty,unsigned flags)883 H5AC__log_flushed_entry(H5C_t *cache_ptr, haddr_t addr, hbool_t was_dirty,
884 unsigned flags)
885 {
886 hbool_t cleared;
887 H5AC_aux_t * aux_ptr;
888 H5AC_slist_entry_t * slist_entry_ptr = NULL;
889 herr_t ret_value = SUCCEED; /* Return value */
890
891 FUNC_ENTER_PACKAGE
892
893 /* Sanity check */
894 HDassert(cache_ptr != NULL);
895 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
896 HDassert(aux_ptr != NULL);
897 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
898 HDassert(aux_ptr->mpi_rank == 0);
899 HDassert(aux_ptr->c_slist_ptr != NULL);
900
901 /* Set local flags */
902 cleared = ((flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0);
903
904 if(cleared) {
905 /* If the entry has been cleared, must remove it from both the
906 * cleaned list and the dirtied list.
907 */
908 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
909 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
910 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))))
911 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
912 } /* end if */
913 else if(was_dirty) {
914 if(NULL == H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) {
915 /* insert the address of the entry in the clean entry list. */
916 if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
917 HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate clean slist entry .")
918 slist_entry_ptr->addr = addr;
919
920 if(H5SL_insert(aux_ptr->c_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
921 HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into clean entry slist.")
922 } /* end if */
923 } /* end else-if */
924
925 done:
926 FUNC_LEAVE_NOAPI(ret_value)
927 } /* H5AC__log_flushed_entry() */
928
929
930 /*-------------------------------------------------------------------------
931 *
932 * Function: H5AC__log_inserted_entry()
933 *
934 * Purpose: Update the dirty_bytes count for a newly inserted entry.
935 *
936 * If mpi_rank isnt 0, this simply means adding the size
937 * of the entry to the dirty_bytes count.
938 *
939 * If mpi_rank is 0, we must also add the entry to the
940 * dirty entries slist.
941 *
942 * Return SUCCEED on success, and FAIL on failure.
943 *
944 * Return: Non-negative on success/Negative on failure.
945 *
946 * Programmer: John Mainzer, 6/30/05
947 *
948 *-------------------------------------------------------------------------
949 */
950 herr_t
H5AC__log_inserted_entry(const H5AC_info_t * entry_ptr)951 H5AC__log_inserted_entry(const H5AC_info_t *entry_ptr)
952 {
953 H5AC_t * cache_ptr;
954 H5AC_aux_t * aux_ptr;
955 herr_t ret_value = SUCCEED; /* Return value */
956
957 FUNC_ENTER_PACKAGE
958
959 /* Sanity checks */
960 HDassert(entry_ptr);
961 cache_ptr = entry_ptr->cache_ptr;
962 HDassert(cache_ptr != NULL);
963 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
964 HDassert(aux_ptr != NULL);
965 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
966
967 if(aux_ptr->mpi_rank == 0) {
968 H5AC_slist_entry_t *slist_entry_ptr;
969
970 HDassert(aux_ptr->d_slist_ptr != NULL);
971 HDassert(aux_ptr->c_slist_ptr != NULL);
972
973 /* Entry to insert should not be in dirty list currently */
974 if(NULL != H5SL_search(aux_ptr->d_slist_ptr, (const void *)(&entry_ptr->addr)))
975 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry already in dirty slist.")
976
977 /* insert the address of the entry in the dirty entry list, and
978 * add its size to the dirty_bytes count.
979 */
980 if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
981 HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .")
982 slist_entry_ptr->addr = entry_ptr->addr;
983 if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
984 HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
985
986 /* Entry to insert should not be in clean list either */
987 if(NULL != H5SL_search(aux_ptr->c_slist_ptr, (const void *)(&entry_ptr->addr)))
988 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry in clean slist.")
989 } /* end if */
990
991 aux_ptr->dirty_bytes += entry_ptr->size;
992
993 #if H5AC_DEBUG_DIRTY_BYTES_CREATION
994 aux_ptr->insert_dirty_bytes += size;
995 aux_ptr->insert_dirty_bytes_updates += 1;
996 #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
997
998 done:
999 FUNC_LEAVE_NOAPI(ret_value)
1000 } /* H5AC__log_inserted_entry() */
1001
1002
1003 /*-------------------------------------------------------------------------
1004 *
1005 * Function: H5AC__log_moved_entry()
1006 *
1007 * Purpose: Update the dirty_bytes count for a moved entry.
1008 *
1009 * WARNING
1010 *
1011 * At present, the way that the move call is used ensures
1012 * that the moved entry is present in all caches by
1013 * moving in a collective operation and immediately after
1014 * unprotecting the target entry.
1015 *
1016 * This function uses this invariant, and will cause arcane
1017 * failures if it is not met. If maintaining this invariant
1018 * becomes impossible, we will have to rework this function
1019 * extensively, and likely include a bit of IPC for
1020 * synchronization. A better option might be to subsume
1021 * move in the unprotect operation.
1022 *
1023 * Given that the target entry is in all caches, the function
1024 * proceeds as follows:
1025 *
1026 * For processes with mpi rank other 0, it simply checks to
1027 * see if the entry was dirty prior to the move, and adds
1028 * the entries size to the dirty bytes count.
1029 *
1030 * In the process with mpi rank 0, the function first checks
1031 * to see if the entry was dirty prior to the move. If it
1032 * was, and if the entry doesn't appear in the dirtied list
1033 * under its old address, it adds the entry's size to the
1034 * dirty bytes count.
1035 *
1036 * The rank 0 process then removes any references to the
1037 * entry under its old address from the cleands and dirtied
1038 * lists, and inserts an entry in the dirtied list under the
1039 * new address.
1040 *
1041 * Return SUCCEED on success, and FAIL on failure.
1042 *
1043 * Return: Non-negative on success/Negative on failure.
1044 *
1045 * Programmer: John Mainzer, 6/30/05
1046 *
1047 *-------------------------------------------------------------------------
1048 */
1049 herr_t
H5AC__log_moved_entry(const H5F_t * f,haddr_t old_addr,haddr_t new_addr)1050 H5AC__log_moved_entry(const H5F_t *f, haddr_t old_addr, haddr_t new_addr)
1051 {
1052 H5AC_t * cache_ptr;
1053 H5AC_aux_t * aux_ptr;
1054 hbool_t entry_in_cache;
1055 hbool_t entry_dirty;
1056 size_t entry_size;
1057 herr_t ret_value = SUCCEED; /* Return value */
1058
1059 FUNC_ENTER_PACKAGE
1060
1061 /* Sanity checks */
1062 HDassert(f);
1063 HDassert(f->shared);
1064 cache_ptr = (H5AC_t *)f->shared->cache;
1065 HDassert(cache_ptr);
1066 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
1067 HDassert(aux_ptr != NULL);
1068 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
1069
1070 /* get entry status, size, etc here */
1071 if(H5C_get_entry_status(f, old_addr, &entry_size, &entry_in_cache,
1072 &entry_dirty, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
1073 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't get entry status.")
1074 if(!entry_in_cache)
1075 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry not in cache.")
1076
1077 if(aux_ptr->mpi_rank == 0) {
1078 H5AC_slist_entry_t * slist_entry_ptr;
1079
1080 HDassert(aux_ptr->d_slist_ptr != NULL);
1081 HDassert(aux_ptr->c_slist_ptr != NULL);
1082
1083 /* if the entry appears in the cleaned entry slist, under its old
1084 * address, remove it.
1085 */
1086 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&old_addr))))
1087 slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
1088
1089 /* if the entry appears in the dirtied entry slist under its old
1090 * address, remove it, but don't free it. Set addr to new_addr.
1091 */
1092 if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&old_addr))))
1093 slist_entry_ptr->addr = new_addr;
1094 else {
1095 /* otherwise, allocate a new entry that is ready
1096 * for insertion, and increment dirty_bytes.
1097 *
1098 * Note that the fact that the entry wasn't in the dirtied
1099 * list under its old address implies that it must have
1100 * been clean to start with.
1101 */
1102 HDassert(!entry_dirty);
1103 if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
1104 HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .")
1105 slist_entry_ptr->addr = new_addr;
1106
1107 aux_ptr->dirty_bytes += entry_size;
1108
1109 #if H5AC_DEBUG_DIRTY_BYTES_CREATION
1110 aux_ptr->move_dirty_bytes += entry_size;
1111 aux_ptr->move_dirty_bytes_updates += 1;
1112 #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
1113 } /* end else */
1114
1115 /* insert / reinsert the entry in the dirty slist */
1116 if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
1117 HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
1118 } /* end if */
1119 else if(!entry_dirty) {
1120 aux_ptr->dirty_bytes += entry_size;
1121
1122 #if H5AC_DEBUG_DIRTY_BYTES_CREATION
1123 aux_ptr->move_dirty_bytes += entry_size;
1124 aux_ptr->move_dirty_bytes_updates += 1;
1125 #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
1126 } /* end else-if */
1127
1128 done:
1129 FUNC_LEAVE_NOAPI(ret_value)
1130 } /* H5AC__log_moved_entry() */
1131
1132
1133 /*-------------------------------------------------------------------------
1134 * Function: H5AC__propagate_and_apply_candidate_list
1135 *
1136 * Purpose: Prior to the addition of support for multiple metadata
1137 * write strategies, in PHDF5, only the metadata cache with
1138 * mpi rank 0 was allowed to write to file. All other
1139 * metadata caches on processes with rank greater than 0
1140 * were required to retain dirty entries until they were
1141 * notified that the entry was clean.
1142 *
1143 * This constraint is relaxed with the distributed
1144 * metadata write strategy, in which a list of candidate
1145 * metadata cache entries is constructed by the process 0
1146 * cache and then distributed to the caches of all the other
1147 * processes. Once the listed is distributed, many (if not
1148 * all) processes writing writing a unique subset of the
1149 * entries, and marking the remainder clean. The subsets
1150 * are chosen so that each entry in the list of candidates
1151 * is written by exactly one cache, and all entries are
1152 * marked as being clean in all caches.
1153 *
1154 * While the list of candidate cache entries is prepared
1155 * elsewhere, this function is the main routine for distributing
1156 * and applying the list. It must be run simultaniously on
1157 * all processes that have the relevant file open. To ensure
1158 * proper synchronization, there is a barrier at the beginning
1159 * of this function.
1160 *
1161 * At present, this function is called under one of two
1162 * circumstances:
1163 *
1164 * 1) Dirty byte creation exceeds some user specified value.
1165 *
1166 * While metadata reads may occur independently, all
1167 * operations writing metadata must be collective. Thus
1168 * all metadata caches see the same sequence of operations,
1169 * and therefore the same dirty data creation.
1170 *
1171 * This fact is used to synchronize the caches for purposes
1172 * of propagating the list of candidate entries, by simply
1173 * calling this function from all caches whenever some user
1174 * specified threshold on dirty data is exceeded. (the
1175 * process 0 cache creates the candidate list just before
1176 * calling this function).
1177 *
1178 * 2) Under direct user control -- this operation must be
1179 * collective.
1180 *
1181 * The operations to be managed by this function are as
1182 * follows:
1183 *
1184 * All processes:
1185 *
1186 * 1) Participate in an opening barrier.
1187 *
1188 * For the process with mpi rank 0:
1189 *
1190 * 1) Load the contents of the candidate list
1191 * (candidate_slist_ptr) into a buffer, and broadcast that
1192 * buffer to all the other caches. Clear the candidate
1193 * list in passing.
1194 *
1195 * If there is a positive number of candidates, proceed with
1196 * the following:
1197 *
1198 * 2) Apply the candidate entry list.
1199 *
1200 * 3) Particpate in a closing barrier.
1201 *
1202 * 4) Remove from the dirty list (d_slist_ptr) and from the
1203 * flushed and still clean entries list (c_slist_ptr),
1204 * all addresses that appeared in the candidate list, as
1205 * these entries are now clean.
1206 *
1207 *
1208 * For all processes with mpi rank greater than 0:
1209 *
1210 * 1) Receive the candidate entry list broadcast
1211 *
1212 * If there is a positive number of candidates, proceed with
1213 * the following:
1214 *
1215 * 2) Apply the candidate entry list.
1216 *
1217 * 3) Particpate in a closing barrier.
1218 *
1219 * Return: Success: non-negative
1220 *
1221 * Failure: negative
1222 *
1223 * Programmer: John Mainzer
1224 * 3/17/10
1225 *
1226 *-------------------------------------------------------------------------
1227 */
1228 static herr_t
H5AC__propagate_and_apply_candidate_list(H5F_t * f,hid_t dxpl_id)1229 H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id)
1230 {
1231 H5AC_t * cache_ptr;
1232 H5AC_aux_t * aux_ptr;
1233 haddr_t * candidates_list_ptr = NULL;
1234 int mpi_result;
1235 unsigned num_candidates = 0;
1236 herr_t ret_value = SUCCEED; /* Return value */
1237
1238 FUNC_ENTER_STATIC
1239
1240 /* Sanity checks */
1241 HDassert(f != NULL);
1242 cache_ptr = f->shared->cache;
1243 HDassert(cache_ptr != NULL);
1244 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
1245 HDassert(aux_ptr != NULL);
1246 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
1247 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
1248
1249 /* to prevent "messages from the future" we must synchronize all
1250 * processes before we write any entries.
1251 */
1252 if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
1253 HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
1254
1255 if(aux_ptr->mpi_rank == 0) {
1256 if(H5AC__broadcast_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0)
1257 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast candidate slist.")
1258
1259 HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0);
1260 } /* end if */
1261 else {
1262 if(H5AC__receive_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0)
1263 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive candidate broadcast.")
1264 } /* end else */
1265
1266 if(num_candidates > 0) {
1267 herr_t result;
1268
1269 /* all processes apply the candidate list.
1270 * H5C_apply_candidate_list() handles the details of
1271 * distributing the writes across the processes.
1272 */
1273
1274 /* Enable writes during this operation */
1275 aux_ptr->write_permitted = TRUE;
1276
1277 /* Apply the candidate list */
1278 result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_candidates,
1279 candidates_list_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size);
1280
1281 /* Disable writes again */
1282 aux_ptr->write_permitted = FALSE;
1283
1284 /* Check for error on the write operation */
1285 if(result < 0)
1286 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.")
1287
1288 /* this code exists primarily for the test bed -- it allows us to
1289 * enforce posix semantics on the server that pretends to be a
1290 * file system in our parallel tests.
1291 */
1292 if(aux_ptr->write_done)
1293 (aux_ptr->write_done)();
1294
1295 /* to prevent "messages from the past" we must synchronize all
1296 * processes again before we go on.
1297 */
1298 if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
1299 HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
1300
1301 /* if this is process zero, tidy up the dirtied,
1302 * and flushed and still clean lists.
1303 */
1304 if(aux_ptr->mpi_rank == 0)
1305 if(H5AC__tidy_cache_0_lists(cache_ptr, num_candidates, candidates_list_ptr) < 0)
1306 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.")
1307 } /* end if */
1308
1309 /* if it is defined, call the sync point done callback. Note
1310 * that this callback is defined purely for testing purposes,
1311 * and should be undefined under normal operating circumstances.
1312 */
1313 if(aux_ptr->sync_point_done)
1314 (aux_ptr->sync_point_done)(num_candidates, candidates_list_ptr);
1315
1316 done:
1317 if(candidates_list_ptr)
1318 candidates_list_ptr = (haddr_t *)H5MM_xfree((void *)candidates_list_ptr);
1319
1320 FUNC_LEAVE_NOAPI(ret_value)
1321 } /* H5AC__propagate_and_apply_candidate_list() */
1322
1323
1324 /*-------------------------------------------------------------------------
1325 * Function: H5AC__propagate_flushed_and_still_clean_entries_list
1326 *
1327 * Purpose: In PHDF5, if the process 0 only metadata write strategy
1328 * is selected, only the metadata cache with mpi rank 0 is
1329 * allowed to write to file. All other metadata caches on
1330 * processes with rank greater than 0 must retain dirty
1331 * entries until they are notified that the entry is now
1332 * clean.
1333 *
1334 * This function is the main routine for handling this
1335 * notification proceedure. It must be called
1336 * simultaniously on all processes that have the relevant
1337 * file open. To this end, it is called only during a
1338 * sync point, with a barrier prior to the call.
1339 *
1340 * Note that any metadata entry writes by process 0 will
1341 * occur after the barrier and just before this call.
1342 *
1343 * Typicaly, calls to this function will be triggered in
1344 * one of two ways:
1345 *
1346 * 1) Dirty byte creation exceeds some user specified value.
1347 *
1348 * While metadata reads may occur independently, all
1349 * operations writing metadata must be collective. Thus
1350 * all metadata caches see the same sequence of operations,
1351 * and therefore the same dirty data creation.
1352 *
1353 * This fact is used to synchronize the caches for purposes
1354 * of propagating the list of flushed and still clean
1355 * entries, by simply calling this function from all
1356 * caches whenever some user specified threshold on dirty
1357 * data is exceeded.
1358 *
1359 * 2) Under direct user control -- this operation must be
1360 * collective.
1361 *
1362 * The operations to be managed by this function are as
1363 * follows:
1364 *
1365 * For the process with mpi rank 0:
1366 *
1367 * 1) Load the contents of the flushed and still clean entries
1368 * list (c_slist_ptr) into a buffer, and broadcast that
1369 * buffer to all the other caches.
1370 *
1371 * 2) Clear the flushed and still clean entries list
1372 * (c_slist_ptr).
1373 *
1374 *
1375 * For all processes with mpi rank greater than 0:
1376 *
1377 * 1) Receive the flushed and still clean entries list broadcast
1378 *
1379 * 2) Mark the specified entries as clean.
1380 *
1381 *
1382 * For all processes:
1383 *
1384 * 1) Reset the dirtied bytes count to 0.
1385 *
1386 * Return: Success: non-negative
1387 *
1388 * Failure: negative
1389 *
1390 * Programmer: John Mainzer
1391 * July 5, 2005
1392 *
1393 *-------------------------------------------------------------------------
1394 */
1395 static herr_t
H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t * f,hid_t dxpl_id)1396 H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, hid_t dxpl_id)
1397 {
1398 H5AC_t * cache_ptr;
1399 H5AC_aux_t * aux_ptr;
1400 herr_t ret_value = SUCCEED; /* Return value */
1401
1402 FUNC_ENTER_STATIC
1403
1404 /* Sanity checks */
1405 HDassert(f != NULL);
1406 cache_ptr = f->shared->cache;
1407 HDassert(cache_ptr != NULL);
1408 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
1409 HDassert(aux_ptr != NULL);
1410 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
1411 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
1412
1413 if(aux_ptr->mpi_rank == 0) {
1414 if(H5AC__broadcast_clean_list(cache_ptr) < 0)
1415 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast clean slist.")
1416 HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0);
1417 } /* end if */
1418 else {
1419 if(H5AC__receive_and_apply_clean_list(f, dxpl_id) < 0)
1420 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive and/or process clean slist broadcast.")
1421 } /* end else */
1422
1423 done:
1424 FUNC_LEAVE_NOAPI(ret_value)
1425 } /* H5AC__propagate_flushed_and_still_clean_entries_list() */
1426
1427
1428 /*-------------------------------------------------------------------------
1429 *
1430 * Function: H5AC_receive_haddr_list()
1431 *
1432 * Purpose: Receive the list of entry addresses from process 0,
1433 * and return it in a buffer pointed to by *haddr_buf_ptr_ptr.
1434 * Note that the caller must free this buffer if it is
1435 * returned.
1436 *
1437 * This function must only be called by the process with
1438 * MPI_rank greater than 0.
1439 *
1440 * Return SUCCEED on success, and FAIL on failure.
1441 *
1442 * Return: Non-negative on success/Negative on failure.
1443 *
1444 * Programmer: Quincey Koziol, 6/11/2015
1445 *
1446 *-------------------------------------------------------------------------
1447 */
1448 static herr_t
H5AC__receive_haddr_list(MPI_Comm mpi_comm,unsigned * num_entries_ptr,haddr_t ** haddr_buf_ptr_ptr)1449 H5AC__receive_haddr_list(MPI_Comm mpi_comm, unsigned *num_entries_ptr,
1450 haddr_t **haddr_buf_ptr_ptr)
1451 {
1452 haddr_t * haddr_buf_ptr = NULL;
1453 int mpi_result;
1454 unsigned num_entries;
1455 herr_t ret_value = SUCCEED; /* Return value */
1456
1457 FUNC_ENTER_STATIC
1458
1459 /* Sanity checks */
1460 HDassert(num_entries_ptr != NULL);
1461 HDassert(*num_entries_ptr == 0);
1462 HDassert(haddr_buf_ptr_ptr != NULL);
1463 HDassert(*haddr_buf_ptr_ptr == NULL);
1464
1465 /* First receive the number of entries in the list so that we
1466 * can set up a buffer to receive them. If there aren't
1467 * any, we are done.
1468 */
1469 if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_UNSIGNED, 0, mpi_comm)))
1470 HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
1471
1472 if(num_entries > 0) {
1473 size_t buf_size;
1474
1475 /* allocate buffers to store the list of entry base addresses in */
1476 buf_size = sizeof(haddr_t) * num_entries;
1477 if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size)))
1478 HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer")
1479
1480 /* Now receive the list of candidate entries */
1481 if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, mpi_comm)))
1482 HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
1483 } /* end if */
1484
1485 /* finally, pass the number of entries and the buffer pointer
1486 * back to the caller.
1487 */
1488 *num_entries_ptr = num_entries;
1489 *haddr_buf_ptr_ptr = haddr_buf_ptr;
1490
1491 done:
1492 if(ret_value < 0)
1493 if(haddr_buf_ptr)
1494 haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
1495
1496 FUNC_LEAVE_NOAPI(ret_value)
1497 } /* H5AC_receive_haddr_list() */
1498
1499
1500 /*-------------------------------------------------------------------------
1501 *
1502 * Function: H5AC__receive_and_apply_clean_list()
1503 *
1504 * Purpose: Receive the list of cleaned entries from process 0,
1505 * and mark the specified entries as clean.
1506 *
1507 * This function must only be called by the process with
1508 * MPI_rank greater than 0.
1509 *
1510 * Return SUCCEED on success, and FAIL on failure.
1511 *
1512 * Return: Non-negative on success/Negative on failure.
1513 *
1514 * Programmer: John Mainzer, 7/4/05
1515 *
1516 *-------------------------------------------------------------------------
1517 */
1518 static herr_t
H5AC__receive_and_apply_clean_list(H5F_t * f,hid_t dxpl_id)1519 H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id)
1520 {
1521 H5AC_t * cache_ptr;
1522 H5AC_aux_t * aux_ptr;
1523 haddr_t * haddr_buf_ptr = NULL;
1524 unsigned num_entries = 0;
1525 herr_t ret_value = SUCCEED; /* Return value */
1526
1527 FUNC_ENTER_STATIC
1528
1529 /* Sanity check */
1530 HDassert(f != NULL);
1531 cache_ptr = f->shared->cache;
1532 HDassert(cache_ptr != NULL);
1533 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
1534 HDassert(aux_ptr != NULL);
1535 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
1536 HDassert(aux_ptr->mpi_rank != 0);
1537
1538 /* Retrieve the clean list from process 0 */
1539 if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, &num_entries, &haddr_buf_ptr) < 0)
1540 HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list")
1541
1542 if(num_entries > 0)
1543 /* mark the indicated entries as clean */
1544 if(H5C_mark_entries_as_clean(f, dxpl_id, num_entries, haddr_buf_ptr) < 0)
1545 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't mark entries clean.")
1546
1547 /* if it is defined, call the sync point done callback. Note
1548 * that this callback is defined purely for testing purposes,
1549 * and should be undefined under normal operating circumstances.
1550 */
1551 if(aux_ptr->sync_point_done)
1552 (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr);
1553
1554 done:
1555 if(haddr_buf_ptr)
1556 haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
1557
1558 FUNC_LEAVE_NOAPI(ret_value)
1559 } /* H5AC__receive_and_apply_clean_list() */
1560
1561
1562 /*-------------------------------------------------------------------------
1563 *
1564 * Function: H5AC__receive_candidate_list()
1565 *
1566 * Purpose: Receive the list of candidate entries from process 0,
1567 * and return it in a buffer pointed to by *haddr_buf_ptr_ptr.
1568 * Note that the caller must free this buffer if it is
1569 * returned.
1570 *
1571 * This function must only be called by the process with
1572 * MPI_rank greater than 0.
1573 *
1574 * Return SUCCEED on success, and FAIL on failure.
1575 *
1576 * Return: Non-negative on success/Negative on failure.
1577 *
1578 * Programmer: John Mainzer, 3/17/10
1579 *
1580 *-------------------------------------------------------------------------
1581 */
1582 static herr_t
H5AC__receive_candidate_list(const H5AC_t * cache_ptr,unsigned * num_entries_ptr,haddr_t ** haddr_buf_ptr_ptr)1583 H5AC__receive_candidate_list(const H5AC_t *cache_ptr, unsigned *num_entries_ptr,
1584 haddr_t **haddr_buf_ptr_ptr)
1585 {
1586 H5AC_aux_t * aux_ptr;
1587 herr_t ret_value = SUCCEED; /* Return value */
1588
1589 FUNC_ENTER_STATIC
1590
1591 /* Sanity checks */
1592 HDassert(cache_ptr != NULL);
1593 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
1594 HDassert(aux_ptr != NULL);
1595 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
1596 HDassert(aux_ptr->mpi_rank != 0);
1597 HDassert(aux_ptr-> metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
1598 HDassert(num_entries_ptr != NULL);
1599 HDassert(*num_entries_ptr == 0);
1600 HDassert(haddr_buf_ptr_ptr != NULL);
1601 HDassert(*haddr_buf_ptr_ptr == NULL);
1602
1603 /* Retrieve the candidate list from process 0 */
1604 if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, num_entries_ptr, haddr_buf_ptr_ptr) < 0)
1605 HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list")
1606
1607 done:
1608 FUNC_LEAVE_NOAPI(ret_value)
1609 } /* H5AC__receive_candidate_list() */
1610
1611
1612 /*-------------------------------------------------------------------------
1613 * Function: H5AC__rsp__dist_md_write__flush
1614 *
1615 * Purpose: Routine for handling the details of running a sync point
1616 * that is triggered by a flush -- which in turn must have been
1617 * triggered by either a flush API call or a file close --
1618 * when the distributed metadata write strategy is selected.
1619 *
1620 * Upon entry, each process generates it own candidate list,
1621 * being a sorted list of all dirty metadata entries currently
1622 * in the metadata cache. Note that this list must be idendical
1623 * across all processes, as all processes see the same stream
1624 * of dirty metadata coming in, and use the same lists of
1625 * candidate entries at each sync point. (At first glance, this
1626 * argument sounds circular, but think of it in the sense of
1627 * a recursive proof).
1628 *
1629 * If this this list is empty, we are done, and the function
1630 * returns
1631 *
1632 * Otherwise, after the sorted list dirty metadata entries is
1633 * constructed, each process uses the same algorithm to assign
1634 * each entry on the candidate list to exactly one process for
1635 * flushing.
1636 *
1637 * At this point, all processes participate in a barrier to
1638 * avoid messages from the past/future bugs.
1639 *
1640 * Each process then flushes the entries assigned to it, and
1641 * marks all other entries on the candidate list as clean.
1642 *
1643 * Finally, all processes participate in a second barrier to
1644 * avoid messages from the past/future bugs.
1645 *
1646 * At the end of this process, process 0 and only process 0
1647 * must tidy up its lists of dirtied and cleaned entries.
1648 * These lists are not used in the distributed metadata write
1649 * strategy, but they must be maintained should we shift
1650 * to a strategy that uses them.
1651 *
1652 * Return: Success: non-negative
1653 *
1654 * Failure: negative
1655 *
1656 * Programmer: John Mainzer
1657 * April 28, 2010
1658 *
1659 *-------------------------------------------------------------------------
1660 */
1661 static herr_t
H5AC__rsp__dist_md_write__flush(H5F_t * f,hid_t dxpl_id)1662 H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id)
1663 {
1664 H5AC_t * cache_ptr;
1665 H5AC_aux_t * aux_ptr;
1666 haddr_t * haddr_buf_ptr = NULL;
1667 int mpi_result;
1668 unsigned num_entries = 0;
1669 herr_t ret_value = SUCCEED; /* Return value */
1670
1671 FUNC_ENTER_STATIC
1672
1673 /* Sanity checks */
1674 HDassert(f != NULL);
1675 cache_ptr = f->shared->cache;
1676 HDassert(cache_ptr != NULL);
1677 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
1678 HDassert(aux_ptr != NULL);
1679 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
1680 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
1681
1682 /* first construct the candidate list -- initially, this will be in the
1683 * form of a skip list. We will convert it later.
1684 */
1685 if(H5C_construct_candidate_list__clean_cache(cache_ptr) < 0)
1686 HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.")
1687
1688 if(H5SL_count(aux_ptr->candidate_slist_ptr) > 0) {
1689 herr_t result;
1690
1691 /* convert the candidate list into the format we
1692 * are used to receiving from process 0.
1693 */
1694 if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &num_entries, &haddr_buf_ptr) < 0)
1695 HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.")
1696
1697 /* initial sync point barrier */
1698 if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
1699 HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
1700
1701 /* Enable writes during this operation */
1702 aux_ptr->write_permitted = TRUE;
1703
1704 /* Apply the candidate list */
1705 result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_entries,
1706 haddr_buf_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size);
1707
1708 /* Disable writes again */
1709 aux_ptr->write_permitted = FALSE;
1710
1711 /* Check for error on the write operation */
1712 if(result < 0)
1713 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.")
1714
1715 /* this code exists primarily for the test bed -- it allows us to
1716 * enforce posix semantics on the server that pretends to be a
1717 * file system in our parallel tests.
1718 */
1719 if(aux_ptr->write_done)
1720 (aux_ptr->write_done)();
1721
1722 /* final sync point barrier */
1723 if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
1724 HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
1725
1726 /* if this is process zero, tidy up the dirtied,
1727 * and flushed and still clean lists.
1728 */
1729 if(aux_ptr->mpi_rank == 0)
1730 if(H5AC__tidy_cache_0_lists(cache_ptr, num_entries, haddr_buf_ptr) < 0)
1731 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.")
1732 } /* end if */
1733
1734 /* if it is defined, call the sync point done callback. Note
1735 * that this callback is defined purely for testing purposes,
1736 * and should be undefined under normal operating circumstances.
1737 */
1738 if(aux_ptr->sync_point_done)
1739 (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr);
1740
1741 done:
1742 if(haddr_buf_ptr)
1743 haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
1744
1745 FUNC_LEAVE_NOAPI(ret_value)
1746 } /* H5AC__rsp__dist_md_write__flush() */
1747
1748
1749 /*-------------------------------------------------------------------------
1750 * Function: H5AC__rsp__dist_md_write__flush_to_min_clean
1751 *
1752 * Purpose: Routine for handling the details of running a sync point
1753 * triggered by the accumulation of dirty metadata (as
1754 * opposed to a flush call to the API) when the distributed
1755 * metadata write strategy is selected.
1756 *
1757 * After invocation and initial sanity checking this function
1758 * first checks to see if evictions are enabled -- if they
1759 * are not, the function does nothing and returns.
1760 *
1761 * Otherwise, process zero constructs a list of entries to
1762 * be flushed in order to bring the process zero cache back
1763 * within its min clean requirement. Note that this list
1764 * (the candidate list) may be empty.
1765 *
1766 * Then, all processes participate in a barrier.
1767 *
1768 * After the barrier, process 0 broadcasts the number of
1769 * entries in the candidate list prepared above, and all
1770 * other processes receive this number.
1771 *
1772 * If this number is zero, we are done, and the function
1773 * returns without further action.
1774 *
1775 * Otherwise, process 0 broadcasts the sorted list of
1776 * candidate entries, and all other processes receive it.
1777 *
1778 * Then, each process uses the same algorithm to assign
1779 * each entry on the candidate list to exactly one process
1780 * for flushing.
1781 *
1782 * Each process then flushes the entries assigned to it, and
1783 * marks all other entries on the candidate list as clean.
1784 *
1785 * Finally, all processes participate in a second barrier to
1786 * avoid messages from the past/future bugs.
1787 *
1788 * At the end of this process, process 0 and only process 0
1789 * must tidy up its lists of dirtied and cleaned entries.
1790 * These lists are not used in the distributed metadata write
1791 * strategy, but they must be maintained should we shift
1792 * to a strategy that uses them.
1793 *
1794 * Return: Success: non-negative
1795 *
1796 * Failure: negative
1797 *
1798 * Programmer: John Mainzer
1799 * April 28, 2010
1800 *
1801 *-------------------------------------------------------------------------
1802 */
1803 static herr_t
H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t * f,hid_t dxpl_id)1804 H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id)
1805 {
1806 H5AC_t * cache_ptr;
1807 H5AC_aux_t * aux_ptr;
1808 hbool_t evictions_enabled;
1809 herr_t ret_value = SUCCEED; /* Return value */
1810
1811 FUNC_ENTER_STATIC
1812
1813 /* Sanity checks */
1814 HDassert(f != NULL);
1815 cache_ptr = f->shared->cache;
1816 HDassert(cache_ptr != NULL);
1817 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
1818 HDassert(aux_ptr != NULL);
1819 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
1820 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
1821
1822 /* Query if evictions are allowed */
1823 if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0)
1824 HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
1825
1826 if(evictions_enabled) {
1827 /* construct candidate list -- process 0 only */
1828 if(aux_ptr->mpi_rank == 0)
1829 if(H5AC__construct_candidate_list(cache_ptr, aux_ptr, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
1830 HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.")
1831
1832 /* propagate and apply candidate list -- all processes */
1833 if(H5AC__propagate_and_apply_candidate_list(f, dxpl_id) < 0)
1834 HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate and apply candidate list.")
1835 } /* evictions enabled */
1836
1837 done:
1838 FUNC_LEAVE_NOAPI(ret_value)
1839 } /* H5AC__rsp__dist_md_write__flush_to_min_clean() */
1840
1841
1842 /*-------------------------------------------------------------------------
1843 * Function: H5AC__rsp__p0_only__flush
1844 *
1845 * Purpose: Routine for handling the details of running a sync point
1846 * that is triggered a flush -- which in turn must have been
1847 * triggered by either a flush API call or a file close --
1848 * when the process 0 only metadata write strategy is selected.
1849 *
1850 * First, all processes participate in a barrier.
1851 *
1852 * Then process zero flushes all dirty entries, and broadcasts
1853 * they number of clean entries (if any) to all the other
1854 * caches.
1855 *
1856 * If this number is zero, we are done.
1857 *
1858 * Otherwise, process 0 broadcasts the list of cleaned
1859 * entries, and all other processes which are part of this
1860 * file group receive it, and mark the listed entries as
1861 * clean in their caches.
1862 *
1863 * Since all processes have the same set of dirty
1864 * entries at the beginning of the sync point, and all
1865 * entries that will be written are written before
1866 * process zero broadcasts the number of cleaned entries,
1867 * there is no need for a closing barrier.
1868 *
1869 * Return: Success: non-negative
1870 *
1871 * Failure: negative
1872 *
1873 * Programmer: John Mainzer
1874 * April 28, 2010
1875 *
1876 *-------------------------------------------------------------------------
1877 */
1878 static herr_t
H5AC__rsp__p0_only__flush(H5F_t * f,hid_t dxpl_id)1879 H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id)
1880 {
1881 H5AC_t * cache_ptr;
1882 H5AC_aux_t * aux_ptr;
1883 int mpi_result;
1884 herr_t ret_value = SUCCEED; /* Return value */
1885
1886 FUNC_ENTER_STATIC
1887
1888 /* Sanity checks */
1889 HDassert(f != NULL);
1890 cache_ptr = f->shared->cache;
1891 HDassert(cache_ptr != NULL);
1892 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
1893 HDassert(aux_ptr != NULL);
1894 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
1895 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
1896
1897 /* to prevent "messages from the future" we must
1898 * synchronize all processes before we start the flush.
1899 * Hence the following barrier.
1900 */
1901 if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
1902 HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
1903
1904 /* Flush data to disk, from rank 0 process */
1905 if(aux_ptr->mpi_rank == 0) {
1906 herr_t result;
1907
1908 /* Enable writes during this operation */
1909 aux_ptr->write_permitted = TRUE;
1910
1911 /* Flush the cache */
1912 result = H5C_flush_cache(f, dxpl_id, H5AC__NO_FLAGS_SET);
1913
1914 /* Disable writes again */
1915 aux_ptr->write_permitted = FALSE;
1916
1917 /* Check for error on the write operation */
1918 if(result < 0)
1919 HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.")
1920
1921 /* this code exists primarily for the test bed -- it allows us to
1922 * enforce posix semantics on the server that pretends to be a
1923 * file system in our parallel tests.
1924 */
1925 if(aux_ptr->write_done)
1926 (aux_ptr->write_done)();
1927 } /* end if */
1928
1929 /* Propagate cleaned entries to other ranks. */
1930 if(H5AC__propagate_flushed_and_still_clean_entries_list(f, dxpl_id) < 0)
1931 HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
1932
1933 done:
1934 FUNC_LEAVE_NOAPI(ret_value)
1935 } /* H5AC__rsp__p0_only__flush() */
1936
1937
1938 /*-------------------------------------------------------------------------
1939 * Function: H5AC__rsp__p0_only__flush_to_min_clean
1940 *
1941 * Purpose: Routine for handling the details of running a sync point
1942 * triggered by the accumulation of dirty metadata (as
1943 * opposed to a flush call to the API) when the process 0
1944 * only metadata write strategy is selected.
1945 *
1946 * After invocation and initial sanity checking this function
1947 * first checks to see if evictions are enabled -- if they
1948 * are not, the function does nothing and returns.
1949 *
1950 * Otherwise, all processes participate in a barrier.
1951 *
1952 * After the barrier, if this is process 0, the function
1953 * causes the cache to flush sufficient entries to get the
1954 * cache back within its minimum clean fraction, and broadcast
1955 * the number of entries which have been flushed since
1956 * the last sync point, and are still clean.
1957 *
1958 * If this number is zero, we are done.
1959 *
1960 * Otherwise, process 0 broadcasts the list of cleaned
1961 * entries, and all other processes which are part of this
1962 * file group receive it, and mark the listed entries as
1963 * clean in their caches.
1964 *
1965 * Since all processes have the same set of dirty
1966 * entries at the beginning of the sync point, and all
1967 * entries that will be written are written before
1968 * process zero broadcasts the number of cleaned entries,
1969 * there is no need for a closing barrier.
1970 *
1971 * Return: Success: non-negative
1972 *
1973 * Failure: negative
1974 *
1975 * Programmer: John Mainzer
1976 * April 28, 2010
1977 *
1978 *-------------------------------------------------------------------------
1979 */
1980 static herr_t
H5AC__rsp__p0_only__flush_to_min_clean(H5F_t * f,hid_t dxpl_id)1981 H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id)
1982 {
1983 H5AC_t * cache_ptr;
1984 H5AC_aux_t * aux_ptr;
1985 hbool_t evictions_enabled;
1986 herr_t ret_value = SUCCEED; /* Return value */
1987
1988 FUNC_ENTER_STATIC
1989
1990 /* Sanity checks */
1991 HDassert(f != NULL);
1992 cache_ptr = f->shared->cache;
1993 HDassert(cache_ptr != NULL);
1994 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
1995 HDassert(aux_ptr != NULL);
1996 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
1997 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
1998
1999 /* Query if evictions are allowed */
2000 if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0)
2001 HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
2002
2003 /* Flush if evictions are allowed -- following call
2004 * will cause process 0 to flush to min clean size,
2005 * and then propagate the newly clean entries to the
2006 * other processes.
2007 *
2008 * Otherwise, do nothing.
2009 */
2010 if(evictions_enabled) {
2011 int mpi_result;
2012
2013 /* to prevent "messages from the future" we must synchronize all
2014 * processes before we start the flush. This synchronization may
2015 * already be done -- hence the do_barrier parameter.
2016 */
2017 if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
2018 HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
2019
2020 if(0 == aux_ptr->mpi_rank) {
2021 herr_t result;
2022
2023 /* here, process 0 flushes as many entries as necessary to
2024 * comply with the currently specified min clean size.
2025 * Note that it is quite possible that no entries will be
2026 * flushed.
2027 */
2028
2029 /* Enable writes during this operation */
2030 aux_ptr->write_permitted = TRUE;
2031
2032 /* Flush the cache */
2033 result = H5C_flush_to_min_clean(f, dxpl_id);
2034
2035 /* Disable writes again */
2036 aux_ptr->write_permitted = FALSE;
2037
2038 /* Check for error on the write operation */
2039 if(result < 0)
2040 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.")
2041
2042 /* this call exists primarily for the test code -- it is used
2043 * to enforce POSIX semantics on the process used to simulate
2044 * reads and writes in t_cache.c.
2045 */
2046 if(aux_ptr->write_done)
2047 (aux_ptr->write_done)();
2048 } /* end if */
2049
2050 if(H5AC__propagate_flushed_and_still_clean_entries_list(f, dxpl_id) < 0)
2051 HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
2052 } /* end if */
2053
2054 done:
2055 FUNC_LEAVE_NOAPI(ret_value)
2056 } /* H5AC__rsp__p0_only__flush_to_min_clean() */
2057
2058
2059 /*-------------------------------------------------------------------------
2060 * Function: H5AC__run_sync_point
2061 *
2062 * Purpose: Top level routine for managing a sync point between all
2063 * meta data caches in the parallel case. Since all caches
2064 * see the same sequence of dirty metadata, we simply count
2065 * bytes of dirty metadata, and run a sync point whenever the
2066 * number of dirty bytes of metadata seen since the last
2067 * sync point exceeds a threshold that is common across all
2068 * processes. We also run sync points in response to
2069 * HDF5 API calls triggering either a flush or a file close.
2070 *
2071 * In earlier versions of PHDF5, only the metadata cache with
2072 * mpi rank 0 was allowed to write to file. All other
2073 * metadata caches on processes with rank greater than 0 were
2074 * required to retain dirty entries until they were notified
2075 * that the entry is was clean.
2076 *
2077 * This function was created to make it easier for us to
2078 * experiment with other options, as it is a single point
2079 * for the execution of sync points.
2080 *
2081 * Return: Success: non-negative
2082 *
2083 * Failure: negative
2084 *
2085 * Programmer: John Mainzer
2086 * March 11, 2010
2087 *
2088 *-------------------------------------------------------------------------
2089 */
2090 herr_t
H5AC__run_sync_point(H5F_t * f,hid_t dxpl_id,int sync_point_op)2091 H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op)
2092 {
2093 H5AC_t * cache_ptr;
2094 H5AC_aux_t * aux_ptr;
2095 herr_t ret_value = SUCCEED; /* Return value */
2096
2097 FUNC_ENTER_PACKAGE
2098
2099 /* Sanity checks */
2100 HDassert(f != NULL);
2101 cache_ptr = f->shared->cache;
2102 HDassert(cache_ptr != NULL);
2103 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
2104 HDassert(aux_ptr != NULL);
2105 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
2106 HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) ||
2107 (sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED));
2108
2109 #if H5AC_DEBUG_DIRTY_BYTES_CREATION
2110 HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu/%u\n",
2111 aux_ptr->mpi_rank,
2112 aux_ptr->dirty_bytes_propagations,
2113 aux_ptr->unprotect_dirty_bytes,
2114 aux_ptr->unprotect_dirty_bytes_updates,
2115 aux_ptr->insert_dirty_bytes,
2116 aux_ptr->insert_dirty_bytes_updates,
2117 aux_ptr->rename_dirty_bytes,
2118 aux_ptr->rename_dirty_bytes_updates);
2119 #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
2120
2121 /* clear collective access flag on half of the entries in the
2122 cache and mark them as independent in case they need to be
2123 evicted later. All ranks are guranteed to mark the same entries
2124 since we don't modify the order of the collectively accessed
2125 entries except through collective access. */
2126 if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
2127 HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.")
2128
2129 switch(aux_ptr->metadata_write_strategy) {
2130 case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
2131 switch(sync_point_op) {
2132 case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
2133 if(H5AC__rsp__p0_only__flush_to_min_clean(f, dxpl_id) < 0)
2134 HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush_to_min_clean() failed.")
2135 break;
2136
2137 case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
2138 if(H5AC__rsp__p0_only__flush(f, dxpl_id) < 0)
2139 HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush() failed.")
2140 break;
2141
2142 default:
2143 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op");
2144 break;
2145 } /* end switch */
2146 break;
2147
2148 case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
2149 switch(sync_point_op) {
2150 case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
2151 if(H5AC__rsp__dist_md_write__flush_to_min_clean(f, dxpl_id) < 0)
2152 HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush_to_min_clean() failed.")
2153 break;
2154
2155 case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
2156 if(H5AC__rsp__dist_md_write__flush(f, dxpl_id) < 0)
2157 HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush() failed.")
2158 break;
2159
2160 default:
2161 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op");
2162 break;
2163 } /* end switch */
2164 break;
2165
2166 default:
2167 HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Unknown metadata write strategy.")
2168 break;
2169 } /* end switch */
2170
2171 /* reset the dirty bytes count */
2172 aux_ptr->dirty_bytes = 0;
2173
2174 #if H5AC_DEBUG_DIRTY_BYTES_CREATION
2175 aux_ptr->dirty_bytes_propagations += 1;
2176 aux_ptr->unprotect_dirty_bytes = 0;
2177 aux_ptr->unprotect_dirty_bytes_updates = 0;
2178 aux_ptr->insert_dirty_bytes = 0;
2179 aux_ptr->insert_dirty_bytes_updates = 0;
2180 aux_ptr->rename_dirty_bytes = 0;
2181 aux_ptr->rename_dirty_bytes_updates = 0;
2182 #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
2183
2184 done:
2185 FUNC_LEAVE_NOAPI(ret_value)
2186 } /* H5AC__run_sync_point() */
2187
2188
2189 /*-------------------------------------------------------------------------
2190 * Function: H5AC__tidy_cache_0_lists()
2191 *
2192 * Purpose: In the distributed metadata write strategy, not all dirty
2193 * entries are written by process 0 -- thus we must tidy
2194 * up the dirtied, and flushed and still clean lists
2195 * maintained by process zero after each sync point.
2196 *
2197 * This procedure exists to tend to this issue.
2198 *
2199 * At this point, all entries that process 0 cleared should
2200 * have been removed from both the dirty and flushed and
2201 * still clean lists, and entries that process 0 has flushed
2202 * should have been removed from the dirtied list and added
2203 * to the flushed and still clean list.
2204 *
2205 * However, since the distributed metadata write strategy
2206 * doesn't make use of these lists, the objective is simply
2207 * to maintain these lists in consistent state that allows
2208 * them to be used should the metadata write strategy change
2209 * to one that uses these lists.
2210 *
2211 * Thus for our purposes, all we need to do is remove from
2212 * the dirtied and flushed and still clean lists all
2213 * references to entries that appear in the candidate list.
2214 *
2215 * Return: Success: non-negative
2216 *
2217 * Failure: negative
2218 *
2219 * Programmer: John Mainzer
2220 * 4/20/10
2221 *
2222 *-------------------------------------------------------------------------
2223 */
2224 static herr_t
H5AC__tidy_cache_0_lists(H5AC_t * cache_ptr,unsigned num_candidates,haddr_t * candidates_list_ptr)2225 H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, unsigned num_candidates,
2226 haddr_t *candidates_list_ptr)
2227 {
2228 H5AC_aux_t * aux_ptr;
2229 unsigned u;
2230
2231 FUNC_ENTER_STATIC_NOERR
2232
2233 /* Sanity checks */
2234 HDassert(cache_ptr != NULL);
2235 aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
2236 HDassert(aux_ptr != NULL);
2237 HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
2238 HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
2239 HDassert(aux_ptr->mpi_rank == 0);
2240 HDassert(num_candidates > 0);
2241 HDassert(candidates_list_ptr != NULL);
2242
2243 /* clean up dirtied and flushed and still clean lists by removing
2244 * all entries on the candidate list. Cleared entries should
2245 * have been removed from both the dirty and cleaned lists at
2246 * this point, flushed entries should have been added to the
2247 * cleaned list. However, for this metadata write strategy,
2248 * we just want to remove all references to the candidate entries.
2249 */
2250 for(u = 0; u < num_candidates; u++) {
2251 H5AC_slist_entry_t * d_slist_entry_ptr;
2252 H5AC_slist_entry_t * c_slist_entry_ptr;
2253 haddr_t addr;
2254
2255 addr = candidates_list_ptr[u];
2256
2257 /* addr may be either on the dirtied list, or on the flushed
2258 * and still clean list. Remove it.
2259 */
2260 if(NULL != (d_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)&addr)))
2261 d_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, d_slist_entry_ptr);
2262 if(NULL != (c_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)&addr)))
2263 c_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, c_slist_entry_ptr);
2264 } /* end for */
2265
2266 FUNC_LEAVE_NOAPI(SUCCEED)
2267 } /* H5AC__tidy_cache_0_lists() */
2268
2269
2270 /*-------------------------------------------------------------------------
2271 * Function: H5AC__flush_entries
2272 *
2273 * Purpose: Flush the metadata cache associated with the specified file,
2274 * only writing from rank 0, but propagating the cleaned entries
2275 * to all ranks.
2276 *
2277 * Return: Non-negative on success/Negative on failure if there was a
2278 * request to flush all items and something was protected.
2279 *
2280 * Programmer: Quincey Koziol
2281 * koziol@hdfgroup.org
2282 * Aug 22 2009
2283 *
2284 *-------------------------------------------------------------------------
2285 */
2286 herr_t
H5AC__flush_entries(H5F_t * f,hid_t dxpl_id)2287 H5AC__flush_entries(H5F_t *f, hid_t dxpl_id)
2288 {
2289 herr_t ret_value = SUCCEED; /* Return value */
2290
2291 FUNC_ENTER_PACKAGE
2292
2293 /* Sanity checks */
2294 HDassert(f);
2295 HDassert(f->shared->cache);
2296
2297 /* Check if we have >1 ranks */
2298 if(H5C_get_aux_ptr(f->shared->cache))
2299 if(H5AC__run_sync_point(f, dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_CACHE) < 0)
2300 HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
2301
2302 done:
2303 FUNC_LEAVE_NOAPI(ret_value)
2304 } /* H5AC__flush_entries() */
2305 #endif /* H5_HAVE_PARALLEL */
2306
2307