1 /*============================================================================
2  * File and directory operations, with parallel file I/O
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 #include "cs_defs.h"
28 
29 /*----------------------------------------------------------------------------*/
30 
31 /*
32   Force LARGEFILE_SOURCE if largefiles enabled under 32-bit Linux or Blue Gene
33   (otherwise, we may encounter bugs with glibc 2.3 due to fseeko end ftello
34   not being correctly defined). Compiling with -D_GNU_SOURCE instead
35   of -D_POSIX_C_SOURCE=200112L seems to be another way to solve the problem.
36 */
37 
38 #if (SIZEOF_LONG < 8) && (_FILE_OFFSET_BITS == 64)
39 # if defined(__linux__)
40 #  if !defined(_POSIX_SOURCE)
41 #    define _GNU_SOURCE 1
42 #  endif
43 #  if !defined(_GNU_SOURCE) && !defined(_LARGEFILE_SOURCE)
44 #   define _LARGEFILE_SOURCE 1
45 #  endif
46 # endif
47 #endif
48 
49 /*----------------------------------------------------------------------------
50  * Standard C library headers
51  *----------------------------------------------------------------------------*/
52 
53 #include <assert.h>
54 #include <errno.h>
55 #include <limits.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <string.h>
59 
60 #if defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_STAT_H)
61 # include <sys/stat.h>
62 # include <sys/types.h>
63 # if defined(HAVE_UNISTD_H)
64 #  include <unistd.h>
65 # endif
66 #endif /* defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_STAT_H) */
67 
68 #if defined(HAVE_DIRENT_H)
69 #include <dirent.h>
70 #endif
71 
72 #if defined(WIN32) || defined(_WIN32)
73 #include <io.h>
74 #endif
75 
76 #if defined(HAVE_MPI_IO)
77 #include <limits.h>
78 #endif
79 
80 #if defined(HAVE_ZLIB)
81 #include <zlib.h>
82 #endif
83 
84 /*----------------------------------------------------------------------------
85  * Local headers
86  *----------------------------------------------------------------------------*/
87 
88 #include "bft_mem.h"
89 #include "bft_error.h"
90 #include "bft_printf.h"
91 #include "cs_log.h"
92 
93 /*----------------------------------------------------------------------------
94  * Header for the current file
95  *----------------------------------------------------------------------------*/
96 
97 #include "cs_file.h"
98 
99 /*----------------------------------------------------------------------------*/
100 
101 BEGIN_C_DECLS
102 
103 /*=============================================================================
104  * Additional doxygen documentation
105  *============================================================================*/
106 
107 /*!
108   \file cs_file.c
109         File and directory operations, with parallel IO.
110 
111   \typedef cs_file_t
112            File descriptor (opaque object)
113 
114   \typedef cs_file_off_t
115            Offset for file position indicator
116 
117   \enum cs_file_mode_t
118 
119   \brief File acces modes
120 
121   \var CS_FILE_MODE_READ
122        Read mode
123   \var CS_FILE_MODE_WRITE
124        Write mode
125   \var CS_FILE_MODE_APPEND
126        Append
127 
128   \enum cs_file_seek_t
129 
130   \brief seek semantics (third argument of \ref cs_file_seek)
131 
132   \var CS_FILE_SEEK_SET
133        Seek from beginning of file
134   \var CS_FILE_SEEK_CUR
135        Seek from current position
136   \var CS_FILE_SEEK_END
137        Seek from end of file
138 
139   \enum cs_file_access_t
140 
141   \brief Shared file access methods
142 
143   \var CS_FILE_STDIO_SERIAL
144        Default IO option
145   \var CS_FILE_STDIO_SERIAL
146        Serial standard C IO (funnelled through rank 0 in parallel)
147   \var CS_FILE_STDIO_PARALLEL
148        Per-process standard C IO (for reading only)
149   \var CS_FILE_MPI_INDEPENDENT
150        Non-collective MPI-IO with independent file open and close
151        (for reading only)
152   \var CS_FILE_MPI_NON_COLLECTIVE
153        Non-collective MPI-IO with collective file open and close
154   \var CS_FILE_MPI_COLLECTIVE
155        Collective MPI-IO
156 
157   \enum cs_file_mpi_positioning_t
158 
159   \brief MPI-IO positioning methods
160   \details It is not always known whether a performance or robustness
161           difference is to be expected using explicit file offsets
162           or individual file pointers. Perusal of a sampling of ROMIO
163           code would seem to indicate that no difference is to be
164           expected, but this might change with MPI IO variants
165           or file systems, so an advanced setting is made possible.
166 
167   \var CS_FILE_MPI_EXPLICIT_OFFSETS
168        Use explicit offsets positioning with MPI-IO
169   \var CS_FILE_MPI_INDIVIDUAL_POINTERS
170        Use individual file pointer positioning with MPI-IO
171 */
172 
173 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
174 
175 /*=============================================================================
176  * Macro definitions
177  *============================================================================*/
178 
179 /* MPI tag for file operations */
180 #define CS_FILE_MPI_TAG  (int)('C'+'S'+'_'+'F'+'I'+'L'+'E')
181 
182 /*============================================================================
183  * Type definitions
184  *============================================================================*/
185 
186 /* File descriptor */
187 
188 struct _cs_file_t {
189 
190   char              *name;         /* File name */
191   cs_file_mode_t     mode;         /* File mode */
192   cs_file_access_t   method;       /* File access method */
193   int                rank;         /* MPI rank */
194   int                n_ranks;      /* MPI rank */
195   bool               swap_endian;  /* Swap big-endian and little-endian ? */
196 
197   FILE              *sh;           /* Serial file handle */
198 
199 #if defined(HAVE_ZLIB)
200   gzFile             gzh;          /* Zlib (serial) file handle */
201 #endif
202 
203 #if defined(HAVE_MPI)
204   int                rank_step;    /* Rank step between ranks and io ranks */
205   cs_gnum_t         *block_size;   /* Block sizes on IO ranks in case
206                                       of rank stepping */
207   MPI_Comm           comm;         /* Associated MPI communicator */
208   MPI_Comm           io_comm;      /* Associated MPI-IO communicator */
209 #endif
210 #if defined(HAVE_MPI_IO)
211   MPI_File           fh;           /* MPI file handle */
212   MPI_Info           info;         /* MPI file info */
213   MPI_Offset         offset;       /* MPI file offset */
214 #else
215   cs_file_off_t      offset;       /* File offset */
216 #endif
217 
218 };
219 
220 /* Associated typedef documentation (for cs_file.h) */
221 
222 /*!
223  * \typedef cs_file_t
224  * \brief Pointer to opaque file descriptor
225  */
226 
227 #if defined(HAVE_MPI)
228 
229 /* Helper structure for IO serialization */
230 
231 struct _cs_file_serializer_t {
232 
233   int          rank_id;        /* Local rank in communicator */
234   int          n_ranks;        /* Number of ranks in communicator */
235 
236   cs_gnum_t    range[2];       /* Global start and past-the-end numbers
237                                   for local rank */
238 
239   size_t       size;           /* datatype size (may include stride) */
240 
241   cs_gnum_t    next_g_num;     /* Next global number */
242   int          next_rank_id;   /* Next rank with which we will communicate */
243 
244   cs_lnum_t   *count;          /* Number of elements in each block */
245 
246   void        *buf;            /* pointer to external buffer */
247   void        *recv_buf;       /* pointer to external buffer if
248                                   buf_block_size >= max_block_size,
249                                   or to buf otherwise */
250 
251   MPI_Comm     comm;           /* Associated MPI communicator */
252 };
253 
254 #endif /* defined(HAVE_MPI) */
255 
256 /* Offset type for zlib */
257 
258 #if defined(HAVE_ZLIB)
259 
260 /* Zlib API may be broken when using large file support, as z_off_t
261    is based on current off_t, and not on a value fixed at compilation time.
262    We redefine prototypes for gzseek() and gztell() ;
263    This is ugly, but not as wrong as zlib's logic, and should work with an
264    unmodified Zlib (as of Zlib 1.2.11). */
265 
266 #if defined (SIZEOF_Z_OFF_T)
267 #  if (SIZEOF_Z_OFF_T == SIZEOF_LONG)
268 typedef long _cs_z_off_t;
269 #  elif defined (HAVE_LONG_LONG)
270 #    if (SIZEOF_Z_OFF_T == SIZEOF_LONG_LONG)
271 typedef long long _cs_z_off_t;
272 #    else
273 #      error "z_off_t returned by zlibCompileFlags() neither long nor long long"
274 #    endif
275 #  endif
276 #else
277 typedef z_off_t _cs_z_off_t;
278 #endif
279 
280 typedef _cs_z_off_t (cs_gzseek_t) (gzFile file,
281                                   _cs_z_off_t offset,
282                                    int whence);
283 
284 typedef _cs_z_off_t (cs_gztell_t) (gzFile file);
285 
286 #endif /* HAVE_ZLIB */
287 
288 /*============================================================================
289  * Static global variables
290  *============================================================================*/
291 
292 /* Default access */
293 
294 static cs_file_mpi_positioning_t
295   _mpi_io_positioning = CS_FILE_MPI_EXPLICIT_OFFSETS;
296 
297 static cs_file_access_t _default_access_r = CS_FILE_DEFAULT;
298 static cs_file_access_t _default_access_w = CS_FILE_DEFAULT;
299 
300 /* Communicator and hints used for file operations */
301 
302 #if defined(HAVE_MPI)
303 
304 static bool     _mpi_defaults_are_set = false;
305 static int      _mpi_rank_step = 1;
306 static MPI_Comm _mpi_comm = MPI_COMM_NULL;
307 static MPI_Comm _mpi_io_comm = MPI_COMM_NULL;
308 static MPI_Info _mpi_io_hints_r = MPI_INFO_NULL;
309 static MPI_Info _mpi_io_hints_w = MPI_INFO_NULL;
310 
311 #endif
312 
313 #if defined(HAVE_ZLIB)
314 
315 /* Zlib API broken offset size workaround, continued... */
316 
317 static cs_gzseek_t  *_cs_gzseek = (cs_gzseek_t *)gzseek;
318 static cs_gztell_t  *_cs_gztell = (cs_gztell_t *)gztell;
319 
320 #endif /* HAVE_ZLIB */
321 
322 /*============================================================================
323  * Global variables
324  *============================================================================*/
325 
326 /* names associated with file I/O methods */
327 
328 const char  *cs_file_access_name[]
329   = {N_("default"),
330      N_("standard input and output, serial access"),
331      N_("standard input and output, parallel access"),
332      N_("non-collective MPI-IO, independent file open/close"),
333      N_("non-collective MPI-IO, collective file open/close"),
334      N_("collective MPI-IO")};
335 
336 /* names associated with MPI-IO positioning */
337 
338 #if defined(HAVE_MPI_IO)
339 const char *cs_file_mpi_positioning_name[] = {N_("explicit offsets"),
340                                               N_("individual file pointers")};
341 #endif
342 
343 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
344 
345 /*============================================================================
346  * Private function definitions
347  *============================================================================*/
348 
349 /*----------------------------------------------------------------------------
350  * Evaluate an access method, transforming default to actual value.
351  *
352  * parameters:
353  *   m <-- access method
354  *   w <-- true if write access (false for readonly)
355  *
356  * returns:
357  *   actual access method
358  *----------------------------------------------------------------------------*/
359 
360 static cs_file_access_t
_access_method(cs_file_access_t m,bool w)361 _access_method(cs_file_access_t  m,
362                bool              w)
363 {
364   cs_file_access_t  _m = m;
365 
366   /* Handle default */
367 
368   if (_m == CS_FILE_DEFAULT) {
369 
370 #if defined(HAVE_MPI)
371 #  if defined(HAVE_MPI_IO)
372     _m = CS_FILE_MPI_COLLECTIVE;
373 #  else
374     _m = CS_FILE_STDIO_PARALLEL;
375 #  endif
376 #else
377     _m = CS_FILE_STDIO_SERIAL;
378 #endif
379 
380   }
381 
382   /* Restrict to possible values */
383 
384 #if defined(HAVE_MPI)
385 #  if !defined(HAVE_MPI_IO)
386   _m = CS_MAX(_m, CS_FILE_STDIO_PARALLEL);
387 #  endif
388   if (cs_glob_mpi_comm == MPI_COMM_NULL)
389     _m = CS_FILE_STDIO_SERIAL;
390 #else
391   _m = CS_FILE_STDIO_SERIAL;
392 #endif
393 
394   if (w && _m == CS_FILE_STDIO_PARALLEL)
395     _m = CS_FILE_STDIO_SERIAL;
396 
397   return _m;
398 }
399 
400 #if defined(HAVE_MPI)
401 
402 /*----------------------------------------------------------------------------
403  * Initialize an cs_file_serializer_t structure.
404  *
405  * The buf_block_size argument is optional, and may be used when the buffer
406  * on rank 0 is larger than (global_num_end - global_num_start)*size*stride
407  * bytes. If zero, a block size of (global_num_end - global_num_start) on
408  * rank 0 is assumed; a buffer may not be smaller than this, as it must
409  * initially contain all data on rank 0's block.
410  *
411  * parameters:
412  *   s                <-> pointer to structure that should be initialized
413  *   size             <-- size of each item of data in bytes
414  *   global_num_start <-- global number of first block item (1 to n numbering)
415  *   global_num_end   <-- global number of past-the end block item
416  *                        (1 to n numbering)
417  *   buf_block_size   <-- Local data buffer block size, or 0 for default
418  *                        global_num_end - global_num_start
419  *                        (only useful on rank 0)
420  *   buf              <-- pointer to local block data buffer
421  *   comm             <-- associated MPI communicator
422  *----------------------------------------------------------------------------*/
423 
424 static void
_serializer_init(cs_file_serializer_t * s,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end,size_t buf_block_size,void * buf,MPI_Comm comm)425 _serializer_init(cs_file_serializer_t  *s,
426                  size_t                 size,
427                  cs_gnum_t              global_num_start,
428                  cs_gnum_t              global_num_end,
429                  size_t                 buf_block_size,
430                  void                  *buf,
431                  MPI_Comm               comm)
432 {
433   cs_lnum_t l_count = 0;
434 
435   s->range[0] = global_num_start;
436   s->range[1] = global_num_end;
437 
438   s->size = size;
439 
440   if (s->range[1] > s->range[0])
441     l_count = s->range[1] - s->range[0];
442 
443   /* Get local rank and size of the current MPI communicator */
444 
445   if (comm != MPI_COMM_NULL) {
446 
447     MPI_Comm_rank(comm, &(s->rank_id));
448     MPI_Comm_size(comm, &(s->n_ranks));
449 
450     s->next_rank_id = 0;
451     s->next_g_num = global_num_start;
452 
453     /* Initialize counter */
454 
455     if (s->rank_id == 0)
456       BFT_MALLOC(s->count, s->n_ranks, cs_lnum_t);
457     else
458       s->count = NULL;
459 
460     MPI_Gather(&l_count, 1, CS_MPI_LNUM, s->count, 1, CS_MPI_LNUM, 0, comm);
461 
462     /* Allocate local buffer if necessary, or point to external buffer */
463 
464     s->buf = buf;
465     s->recv_buf = NULL;
466 
467     if (s->rank_id == 0) {
468       int i;
469       cs_lnum_t _max_block_size = 0;
470       cs_lnum_t _buf_block_size = CS_MAX((cs_lnum_t)buf_block_size, l_count);
471       for (i = 0; i < s->n_ranks; i++)
472         _max_block_size = CS_MAX(_max_block_size, s->count[i]);
473       if (_max_block_size > _buf_block_size)
474         BFT_MALLOC(s->recv_buf, _max_block_size*size, unsigned char);
475       else
476         s->recv_buf = buf;
477     }
478 
479   }
480 
481   else { /* if (comm == MPI_COMM_NULL) */
482 
483     s->rank_id = -1;
484     s->n_ranks = 0;
485 
486     s->next_rank_id = 0;
487     s->next_g_num = 0;
488 
489     s->count = NULL;
490 
491     s->buf = buf;
492     s->recv_buf = NULL;
493 
494   }
495 
496   s->comm = comm;
497 }
498 
499 /*----------------------------------------------------------------------------
500  * Finalize an cs_file_serializer_t structure.
501  *
502  * parameters:
503  *   s <-- pointer to structure that should be finalized
504  *----------------------------------------------------------------------------*/
505 
506 static void
_serializer_finalize(cs_file_serializer_t * s)507 _serializer_finalize(cs_file_serializer_t  *s)
508 {
509   s->next_rank_id = 0;
510   s->next_g_num = 1;
511 
512   if (s->count != NULL)
513     BFT_FREE(s->count);
514 
515   if (s->recv_buf != s->buf && s->recv_buf != NULL)
516     BFT_FREE(s->recv_buf);
517 }
518 
519 #endif /* defined(HAVE_MPI) */
520 
521 /*----------------------------------------------------------------------------
522  * Convert data from "little-endian" to "big-endian" or the reverse.
523  *
524  * The memory areas pointed to by src and dest should overlap either
525  * exactly or not at all.
526  *
527  * parameters:
528  *   dest <-- pointer to converted data location.
529  *   src  --> pointer to source data location.
530  *   size <-- size of each item of data in bytes.
531  *   ni   <-- number of data items.
532  *----------------------------------------------------------------------------*/
533 
534 static void
_swap_endian(void * dest,const void * src,size_t size,size_t ni)535 _swap_endian(void        *dest,
536              const void  *src,
537              size_t       size,
538              size_t       ni)
539 {
540   size_t   i, ib, shift;
541   unsigned char  tmpswap;
542 
543   unsigned char  *pdest = (unsigned char *)dest;
544   const unsigned char  *psrc = (const unsigned char *)src;
545 
546   for (i = 0; i < ni; i++) {
547 
548     shift = i * size;
549 
550     for (ib = 0; ib < (size / 2); ib++) {
551 
552       tmpswap = *(psrc + shift + ib);
553       *(pdest + shift + ib) = *(psrc + shift + (size - 1) - ib);
554       *(pdest + shift + (size - 1) - ib) = tmpswap;
555 
556     }
557 
558   }
559 
560   if (dest != src && size == 1)
561     memcpy(dest, src, ni);
562 }
563 
564 /*----------------------------------------------------------------------------
565  * Open a file using standard C IO.
566  *
567  * parameters:
568  *   f    <-- pointer to file handler
569  *
570  * returns:
571  *   0 in case of success, error number in case of failure
572  *----------------------------------------------------------------------------*/
573 
574 static int
_file_open(cs_file_t * f)575 _file_open(cs_file_t  *f)
576 {
577   int retval = 0;
578 
579   assert(f != NULL);
580 
581   if (f->sh != NULL)
582     return 0;
583 
584   /* Compressed with gzip ? (currently for reading only) */
585 
586 #if defined(HAVE_ZLIB)
587 
588   if (f->gzh != NULL)
589     return 0;
590 
591   if (f->mode == CS_FILE_MODE_READ) {
592 
593     bool gzipped = false;
594 
595     size_t l = strlen(f->name);
596     if (l > 3 && (strncmp((f->name + l-3), ".gz", 3) == 0))
597       gzipped = true;
598 
599     if (gzipped) {
600       f->gzh = gzopen(f->name, "r");
601 
602       if (f->gzh == NULL) {
603         const char *err_str
604           = (errno == 0) ? zError(Z_MEM_ERROR) : strerror(errno);
605         retval = (errno == 0) ? Z_MEM_ERROR : errno;
606         bft_error(__FILE__, __LINE__, 0,
607                   _("Error opening file \"%s\":\n\n"
608                     "  %s"), f->name, err_str);
609       }
610       return retval;
611     }
612 
613   }
614 
615 #endif
616 
617   /* The file handler exists and the corresponding file is closed */
618 
619   switch (f->mode) {
620   case CS_FILE_MODE_APPEND:
621     if (f->rank == 0)
622       f->sh = fopen(f->name, "ab");
623     else
624       f->sh = fopen(f->name, "a+b");
625     break;
626   case CS_FILE_MODE_WRITE:
627     if (f->rank == 0)
628       f->sh = fopen(f->name, "wb");
629     else
630       f->sh = fopen(f->name, "a+b");
631     break;
632   default:
633     assert(f->mode == CS_FILE_MODE_READ);
634     f->sh = fopen(f->name, "rb");
635   }
636 
637   if (f->sh == NULL) {
638     bft_error(__FILE__, __LINE__, 0,
639               _("Error opening file \"%s\":\n\n"
640                 "  %s"), f->name, strerror(errno));
641     retval = errno;
642   }
643 
644   return retval;
645 }
646 
647 /*----------------------------------------------------------------------------
648  * Close a file using standard C IO.
649  *
650  * parameters:
651  *   f <-> pointer to file handler
652  *
653  * returns:
654  *   0 in case of success, -1 in case of failure
655  *----------------------------------------------------------------------------*/
656 
657 static int
_file_close(cs_file_t * f)658 _file_close(cs_file_t  *f)
659 {
660   int retval = 0;
661 
662   if (f->sh != NULL)
663     retval = fclose(f->sh);
664 
665   /* Compressed with gzip ? (currently for reading only) */
666 
667 #if defined(HAVE_ZLIB)
668 
669   else if (f->gzh != NULL) {
670     retval = gzclose(f->gzh);
671     if (retval != 0) {
672       bft_error(__FILE__, __LINE__, 0,
673                 _("Error closing file \"%s\":\n\n"
674                   "  %s"), f->name, gzerror(f->gzh, &retval));
675       return retval;
676     }
677     f->gzh = NULL;
678   }
679 
680 #endif
681 
682   if (retval != 0) {
683     bft_error(__FILE__, __LINE__, 0,
684               _("Error closing file \"%s\":\n\n"
685                 "  %s"), f->name, strerror(errno));
686     retval = errno;
687   }
688   f->sh = NULL;
689 
690   return retval;
691 }
692 
693 /*----------------------------------------------------------------------------
694  * Read data to a buffer using standard C IO.
695  *
696  * parameters:
697  *   f    <-- cs_file_t descriptor
698  *   buf  --> pointer to location receiving data
699  *   size <-- size of each item of data in bytes
700  *   ni   <-- number of items to read
701  *
702  * returns:
703  *   the (local) number of items (not bytes) sucessfully read;
704  *----------------------------------------------------------------------------*/
705 
706 static size_t
_file_read(cs_file_t * f,void * buf,size_t size,size_t ni)707 _file_read(cs_file_t  *f,
708            void       *buf,
709            size_t      size,
710            size_t      ni)
711 {
712   size_t retval = 0;
713 
714   if (f->sh != NULL) {
715 
716     if (ni != 0)
717       retval = fread(buf, size, ni, f->sh);
718 
719     /* In case of error, determine error type */
720 
721     if (retval != ni) {
722       int err_num = ferror(f->sh);
723       if (err_num != 0)
724         bft_error(__FILE__, __LINE__, 0,
725                   _("Error reading file \"%s\":\n\n  %s"),
726                   f->name, strerror(err_num));
727       else if (feof(f->sh) != 0)
728         bft_error(__FILE__, __LINE__, 0,
729                   _("Premature end of file \"%s\""), f->name);
730       else
731         bft_error(__FILE__, __LINE__, 0,
732                   _("Error reading file \"%s\""), f->name);
733     }
734 
735     return retval;
736   }
737 
738 #if defined(HAVE_ZLIB)
739 
740   else if (f->gzh != NULL) {
741 
742     if (ni != 0)
743       retval = fread(buf, size, ni, f->sh);
744 
745     size_t rec_size = size * ni;
746 
747     retval = ((size_t)gzread(f->gzh, buf, rec_size)) / size;
748 
749     if (retval != ni) {
750       int err_num = 0;
751       const char *err_str = gzerror(f->gzh, &err_num);
752       if (err_num != 0)
753         bft_error(__FILE__, __LINE__, 0,
754                   _("Error reading file \"%s\":\n\n  %s"),
755                   f->name, err_str);
756       else if (gzeof(f->gzh) != 0)
757         bft_error(__FILE__, __LINE__, 0,
758                   _("Premature end of file \"%s\""), f->name);
759       else
760         bft_error(__FILE__, __LINE__, 0,
761                   _("Error reading file \"%s\""), f->name);
762     }
763 
764     return retval;
765 
766   }
767 
768 #endif /* defined(HAVE_ZLIB) */
769 
770   assert(0);
771 
772   return retval;
773 }
774 
775 /*----------------------------------------------------------------------------
776  * Write data to a file using standard C IO.
777  *
778  * parameters:
779  *   f    <-- cs_file_t descriptor
780  *   buf  --> pointer to location receiving data
781  *   size <-- size of each item of data in bytes
782  *   ni   <-- number of items to read
783  *
784  * returns:
785  *   the (local) number of items (not bytes) sucessfully read;
786  *----------------------------------------------------------------------------*/
787 
788 static size_t
_file_write(cs_file_t * f,const void * buf,size_t size,size_t ni)789 _file_write(cs_file_t   *f,
790             const void  *buf,
791             size_t       size,
792             size_t       ni)
793 {
794   size_t retval = 0;
795 
796   assert(f->sh != NULL);
797 
798   if (ni != 0)
799     retval = fwrite(buf, size, ni, f->sh);
800 
801   /* In case of error, determine error type */
802 
803   if (retval != ni) {
804     int err_num = ferror(f->sh);
805     if (err_num != 0)
806       bft_error(__FILE__, __LINE__, 0,
807                 _("Error writing file \"%s\":\n\n  %s"),
808                 f->name, strerror(err_num));
809     else
810       bft_error(__FILE__, __LINE__, 0,
811                 _("Error writing file \"%s\""), f->name);
812   }
813 
814   return retval;
815 }
816 
817 /*----------------------------------------------------------------------------
818  * Sets a file's position indicator using standard C IO.
819  *
820  * This function may call the libc's fseek() or fseeko() function.
821  * The C 99 standard specifies that for a text file, the offset
822  * argument to fseek() should be zero or a value returned by an earlier
823  * successful call to ftell().
824  *
825  * A successful call to this function clears the end-of-file indicator for
826  * this file.
827  *
828  * parameters:
829  *   f      <-> file descriptor.
830  *   offset <-- add to position specified to whence to obtain new
831  *              position, measured in characters from the beginning of
832  *              the file.
833  *   whence <-- beginning if CS_FILE_SEEK_SET, current if
834  *              CS_FILE_SEEK_CUR, or end-of-file if CS_FILE_SEEK_END.
835  *
836  * returns:
837  *   0 upon success, nonzero otherwise.
838  *----------------------------------------------------------------------------*/
839 
840 static int
_file_seek(cs_file_t * f,cs_file_off_t offset,cs_file_seek_t whence)841 _file_seek(cs_file_t       *f,
842            cs_file_off_t    offset,
843            cs_file_seek_t   whence)
844 {
845   static int _stdio_seek[3] = {SEEK_SET, SEEK_CUR, SEEK_END};
846 
847   int _whence = _stdio_seek[whence];
848   int retval = 0;
849 
850   const char err_fmt[] = "Error setting position in file \"%s\":\n\n  %s";
851 
852   /* Convert cs_file_seek to stdio values */
853 
854   assert(f != NULL);
855 
856   if (f->sh != NULL) {
857 
858 #if (SIZEOF_LONG < 8)
859 
860     /* For 32-bit systems, large file support may be necessary */
861 
862 # if defined(HAVE_FSEEKO) && (_FILE_OFFSET_BITS == 64)
863 
864     retval = fseeko(f->sh, (off_t)offset, _whence);
865 
866     if (retval != 0)
867       bft_error(__FILE__, __LINE__, errno, _(err_fmt),
868                 f->name, strerror(errno));
869 # else
870 
871     /* Test if offset larger than allowed */
872 
873     long _offset = offset;
874 
875     if (_offset == offset) {
876       retval = fseek(f->sh, (long)offset, _whence);
877       if (retval != 0)
878         bft_error(__FILE__, __LINE__, errno, _(err_fmt),
879                   f->name, strerror(errno));
880     }
881     else {
882       retval = -1;
883       bft_error
884         (__FILE__, __LINE__, 0, _(err_fmt),
885          f->name,
886          _("sizeof(off_t) > sizeof(long) but fseeko() not available"));
887     }
888 
889 # endif /* defined(HAVE_FSEEKO) && (_FILE_OFFSET_BITS == 64) */
890 
891 #else /* SIZEOF_LONG >= 8 */
892 
893     /* For 64-bit systems, standard fseek should be enough */
894 
895     retval = fseek(f->sh, (long)offset, _whence);
896     if (retval != 0)
897       bft_error(__FILE__, __LINE__, errno, _(err_fmt),
898                 f->name, strerror(errno));
899 
900 #endif /* SIZEOF_LONG */
901   }
902 
903 #if defined(HAVE_ZLIB)
904 
905   else if (f->gzh != NULL) {
906 
907     retval = _cs_gzseek(f->gzh, (_cs_z_off_t)offset, _whence);
908 
909     if (retval != 0) {
910       int err_num = 0;
911       const char *err_str = gzerror(f->gzh, &err_num);
912       if (err_num == 0)
913         err_str = "";
914 
915       bft_error(__FILE__, __LINE__, 0, _(err_fmt),
916                 f->name, err_str);
917     }
918   }
919 
920 #endif
921 
922   return retval;
923 }
924 
925 /*----------------------------------------------------------------------------
926  * Obtain the current value of a file's position indicator.
927  *
928  * parameters:
929  *   f  <-- file descriptor.
930  *
931  * returns:
932  *   current value of the file's position indicator, or -1 in case of failure.
933  *----------------------------------------------------------------------------*/
934 
935 static cs_file_off_t
_file_tell(cs_file_t * f)936 _file_tell(cs_file_t  *f)
937 {
938   cs_file_off_t offset = 0;
939 
940   assert(f != NULL);
941 
942   if (f->sh != NULL) {
943 
944     /* For 32-bit systems, large file support may be necessary */
945 
946 #if (SIZEOF_LONG < 8)
947 
948 # if defined(HAVE_FSEEKO) && (_FILE_OFFSET_BITS == 64)
949     offset = ftello(f->sh);
950 # else
951     /*
952       Without ftello, ftell will fail above 2 Gigabytes, in which case
953       offset == -1 and errno == EOVERFLOW, but should work on smaller
954       files. We prefer not to be too strict about fseeko availability, as
955       the only 32-bit case without ftello we have encountered is Cygwin
956       (for which ftello requires additional non-default libraries), which
957       is expected to be used mainly for small cases.
958     */
959     offset = ftell(f->sh);
960 # endif
961 
962     /* For 64-bit systems, standard ftell should be enough */
963 
964 #else /* SIZEOF_LONG >= 8 */
965     offset = ftell(f->sh);
966 #endif
967 
968   }
969 
970   if (offset < 0)
971     bft_error(__FILE__, __LINE__, 0,
972               _("Error obtaining position in file \"%s\":\n\n  %s"),
973               f->name, strerror(errno));
974 
975 #if defined(HAVE_ZLIB)
976 
977   else if (f->gzh != NULL) {
978     offset = (cs_file_off_t)_cs_gztell(f->gzh);
979 
980     if (offset < 0) {
981       int err_num = 0;
982       const char *err_str = gzerror(f->gzh, &err_num);
983       if (err_num == 0)
984         err_str = "";
985 
986       bft_error(__FILE__, __LINE__, 0,
987                 _("Error obtaining position in file \"%s\":\n\n  %s"),
988                 f->name, err_str);
989     }
990   }
991 
992 #endif
993 
994   return offset;
995 }
996 
997 /*----------------------------------------------------------------------------
998  * Formatted input from a text file if possible (as fgets()).
999  *
1000  * This function is the base for ecs_file_gets() and ecs_file_gets_try();
1001  * depending on the allow_eof parameter, failure to read a line due to
1002  * an end-of-file condition is considered an error or not.
1003  *
1004  * parameters:
1005  *   s:         --> buffer to which string is to be read.
1006  *   size:      <-- maximum number of characters to be read plus one.
1007  *   f:         <-- ecs_file_t descriptor.
1008  *   line:      <-> file line number if available, or NULL.
1009  *   allow_eof: <-- 1 if EOF is allowed, 0 if considered an error.
1010  *
1011  * returns:
1012  *   s on success, NULL on error or when end of file occurs and
1013  *   no characters have been read.
1014  *----------------------------------------------------------------------------*/
1015 
1016 static char *
_cs_file_gets(char * s,const int size,const cs_file_t * f,int * line,const int allow_eof)1017 _cs_file_gets(char             *s,
1018               const int         size,
1019               const cs_file_t  *f,
1020               int              *line,
1021               const int         allow_eof)
1022 {
1023   char *retval = NULL;
1024 
1025   assert(f != NULL);
1026 
1027   if (f->sh != NULL)
1028     retval = fgets(s, size, f->sh);
1029 
1030 #if defined(HAVE_ZLIB)
1031 
1032   else if (f->gzh != NULL)
1033     retval = gzgets(f->gzh, s, size);
1034 
1035 #endif /* defined(HAVE_ZLIB) */
1036 
1037   else {
1038     if (cs_glob_n_ranks > 1)
1039       bft_error(__FILE__, __LINE__, 0,
1040                 _("Error: reading from file \"%s\",\n"
1041                   "       which is not open on rank %d."),
1042               f->name, cs_glob_rank_id);
1043     else
1044       bft_error(__FILE__, __LINE__, 0,
1045                 _("Error: reading from file \"%s\",\n"
1046                   "       which is not open."),
1047                 f->name);
1048   }
1049 
1050   if (retval != NULL) {
1051 
1052     /* Convert Windows type line ending to Unix type line ending if needed */
1053     int i = strlen(s) - 2;
1054     if (i > 0) {
1055       if (s[i] == '\r' && s[i+1] == '\n') {
1056         s[i] = '\n';
1057         s[i+1] = '\0';
1058       }
1059     }
1060 
1061     if (line != NULL)
1062       *line += 1;
1063 
1064     return retval;
1065   }
1066 
1067   /* We should reach this point only in case of a failed read */
1068 
1069   assert(retval == NULL);
1070 
1071   int is_eof = 0;
1072   if (allow_eof) {
1073     if (feof(f->sh) != 0)
1074       is_eof = 1;
1075 
1076 #if defined(HAVE_ZLIB)
1077     else if (gzeof(f->gzh) != 0)
1078       is_eof = 1;
1079 #endif
1080   }
1081 
1082   if (allow_eof == 0 || is_eof == 0) {
1083 
1084     const char *err_str = cs_empty_string;
1085 
1086     if (f->sh != NULL) {
1087       int err_num = ferror(f->sh);
1088       if (err_num != 0)
1089         err_str = strerror(err_num);
1090     }
1091 
1092 #if defined(HAVE_ZLIB)
1093 
1094     else if (f->gzh != NULL) {
1095       int err_num = 0;
1096       err_str = gzerror(f->gzh, &err_num);
1097       if (err_num == 0)
1098         err_str = cs_empty_string;
1099     }
1100 
1101 #endif /* defined(HAVE_ZLIB) */
1102 
1103     if (line != NULL)
1104       bft_error(__FILE__, __LINE__, 0,
1105                 _("Error reading line %d of file \"%s\":\n\n  %s"),
1106                 *line, f->name, err_str);
1107     else
1108       bft_error(__FILE__, __LINE__, 0,
1109                 _("Error reading text file \"%s\":\n\n  %s"),
1110                 f->name, err_str);
1111   }
1112 
1113   return retval;
1114 }
1115 
1116 /*----------------------------------------------------------------------------
1117  * Read data to a buffer, distributing a contiguous part of it to each
1118  * process associated with a file.
1119  *
1120  * Each process should receive a (possibly empty) block of the data,
1121  * and we should have:
1122  *   global_num_start at rank 0 = 1
1123  *   global_num_start at rank i+1 = global_num_end at rank i.
1124  * Otherwise, behavior (especially positioning for future reads) is undefined.
1125  *
1126  * This version does not use MPI-IO
1127  *
1128  * parameters:
1129  *   f                <-- cs_file_t descriptor
1130  *   buf              --> pointer to location receiving data
1131  *   size             <-- size of each item of data in bytes
1132  *   global_num_start <-- global number of first block item (1 to n numbering)
1133  *   global_num_end   <-- global number of past-the end block item
1134  *                        (1 to n numbering)
1135  *
1136  * returns:
1137  *   the (local) number of items (not bytes) sucessfully read;
1138  *----------------------------------------------------------------------------*/
1139 
1140 static size_t
_file_read_block_s(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)1141 _file_read_block_s(cs_file_t  *f,
1142                    void       *buf,
1143                    size_t      size,
1144                    cs_gnum_t   global_num_start,
1145                    cs_gnum_t   global_num_end)
1146 {
1147   size_t retval = 0;
1148 
1149   if (f->rank == 0)
1150     retval = _file_read(f,
1151                         buf,
1152                         size,
1153                         (size_t)(global_num_end - global_num_start));
1154 
1155 #if defined(HAVE_MPI)
1156 
1157   if (f->comm != MPI_COMM_NULL) {
1158 
1159     MPI_Status status;
1160 
1161     cs_lnum_t loc_count = global_num_end - global_num_start;
1162     int _counts[64];
1163     int *counts = NULL;
1164 
1165     MPI_Datatype ent_type = MPI_BYTE;
1166     size_t _size = size;
1167 
1168     if (f->rank == 0) {
1169       if (f->n_ranks < 64)
1170         counts = _counts;
1171       else
1172         BFT_MALLOC(counts, f->n_ranks, int);
1173     }
1174 
1175     /* Exchange counts */
1176 
1177     MPI_Gather(&loc_count, 1, MPI_INT, counts, 1, MPI_INT, 0, f->comm);
1178 
1179     /* Rank 0 reads data for other ranks from file and distributes it */
1180 
1181     if (f->rank == 0) {
1182 
1183       int dist_rank;
1184       cs_lnum_t _buf_size = global_num_end - global_num_start;
1185       unsigned char *_buf = NULL;
1186 
1187       /* Allocate exchange buffer */
1188 
1189       for (dist_rank = 1; dist_rank < f->n_ranks; dist_rank++)
1190         _buf_size = CS_MAX(_buf_size, counts[dist_rank]);
1191 
1192       BFT_MALLOC(_buf, _buf_size*size, unsigned char);
1193 
1194       if (_buf_size*size > INT_MAX) {
1195         MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
1196         MPI_Type_commit(&ent_type);
1197         _size = 1;
1198       }
1199 
1200       /* Loop on distant ranks */
1201 
1202       for (dist_rank = 1; dist_rank < f->n_ranks; dist_rank++) {
1203 
1204         if (counts[dist_rank] == 0)
1205           continue;
1206 
1207         /* Read data from file */
1208 
1209         counts[dist_rank]
1210           = (int)_file_read(f, _buf, size, (size_t)counts[dist_rank]);
1211 
1212         /* Send to corresponding rank */
1213 
1214         MPI_Send(_buf, counts[dist_rank]*_size, ent_type, dist_rank,
1215                  CS_FILE_MPI_TAG, f->comm);
1216 
1217       } /* End of loop on distant ranks */
1218 
1219       BFT_FREE(_buf);
1220 
1221     }
1222 
1223     /* Other ranks receive data from rank 0 */
1224 
1225     else if (loc_count > 0) {
1226 
1227       if (loc_count*size > INT_MAX) {
1228         MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
1229         MPI_Type_commit(&ent_type);
1230         _size = 1;
1231       }
1232 
1233       /* Receive data */
1234 
1235       MPI_Recv(buf, (int)(loc_count*_size), ent_type, 0,
1236                CS_FILE_MPI_TAG, f->comm, &status);
1237 
1238       MPI_Get_count(&status, ent_type, &loc_count);
1239       retval = loc_count / _size;
1240 
1241     }
1242 
1243     if (ent_type != MPI_BYTE)
1244       MPI_Type_free(&ent_type);
1245 
1246     if (counts != NULL && counts != _counts)
1247       BFT_FREE(counts);
1248   }
1249 
1250 #endif /* defined(HAVE_MPI) */
1251 
1252   return retval;
1253 }
1254 
1255 /*----------------------------------------------------------------------------
1256  * Read data to a buffer, distributing a contiguous part of it to each
1257  * process associated with a file.
1258  *
1259  * Each process should receive a (possibly empty) block of the data,
1260  * and we should have:
1261  *   global_num_start at rank 0 = 1
1262  *   global_num_start at rank i+1 = global_num_end at rank i.
1263  * Otherwise, behavior (especially positioning for future reads) is undefined.
1264  *
1265  * This version does not use MPI-IO
1266  *
1267  * parameters:
1268  *   f                <-- cs_file_t descriptor
1269  *   buf              --> pointer to location receiving data
1270  *   size             <-- size of each item of data in bytes
1271  *   global_num_start <-- global number of first block item (1 to n numbering)
1272  *   global_num_end   <-- global number of past-the end block item
1273  *                        (1 to n numbering)
1274  *
1275  * returns:
1276  *   the (local) number of items (not bytes) sucessfully read;
1277  *----------------------------------------------------------------------------*/
1278 
1279 static size_t
_file_read_block_p(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)1280 _file_read_block_p(cs_file_t  *f,
1281                    void       *buf,
1282                    size_t      size,
1283                    cs_gnum_t   global_num_start,
1284                    cs_gnum_t   global_num_end)
1285 {
1286   size_t retval = 0;
1287   cs_gnum_t loc_count = global_num_end - global_num_start;
1288 
1289   if (loc_count > 0) {
1290 
1291     /* Only rank 0 initially opened (to check existence/rights, and
1292        as all ranks might not participate), so open here if needed */
1293 
1294     cs_file_off_t offset = f->offset + ((global_num_start - 1) * size);
1295 
1296     if (f->sh == NULL)
1297       _file_open(f);
1298 
1299     if (_file_seek(f, offset, CS_FILE_SEEK_SET) == 0)
1300       retval = _file_read(f, buf, size, (size_t)loc_count);
1301 
1302   }
1303 
1304   return retval;
1305 }
1306 
1307 /*----------------------------------------------------------------------------
1308  * Write data to a file, each associated process providing a contiguous part
1309  * of this data.
1310  *
1311  * Each process should provide a (possibly empty) block of the data,
1312  * and we should have:
1313  *   global_num_start at rank 0 = 1
1314  *   global_num_start at rank i+1 = global_num_end at rank i.
1315  * Otherwise, behavior (especially positioning for future reads) is undefined.
1316  *
1317  * This version does not use MPI-IO
1318  *
1319  * parameters:
1320  *   f                <-- cs_file_t descriptor
1321  *   buf              <-> pointer to location containing data
1322  *   size             <-- size of each item of data in bytes
1323  *   global_num_start <-- global number of first block item (1 to n numbering)
1324  *   global_num_end   <-- global number of past-the end block item
1325  *                        (1 to n numbering)
1326  *
1327  * returns:
1328  *   the (local) number of items (not bytes) sucessfully written;
1329  *----------------------------------------------------------------------------*/
1330 
1331 static size_t
_file_write_block_s(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)1332 _file_write_block_s(cs_file_t  *f,
1333                     void       *buf,
1334                     size_t      size,
1335                     cs_gnum_t   global_num_start,
1336                     cs_gnum_t   global_num_end)
1337 {
1338   size_t retval = 0;
1339 
1340   if (f->n_ranks == 1)
1341     retval = _file_write(f,
1342                          buf,
1343                          size,
1344                          (size_t)(global_num_end - global_num_start));
1345 
1346 #if defined(HAVE_MPI)
1347 
1348   if (f->n_ranks > 1) {
1349 
1350     cs_file_serializer_t  s;
1351     cs_lnum_t  local_count;
1352     cs_lnum_t *count = NULL;
1353     void  *write_buf = NULL;
1354 
1355     _serializer_init(&s,
1356                      size,
1357                      global_num_start,
1358                      global_num_end,
1359                      0,
1360                      buf,
1361                      f->io_comm);
1362 
1363     do {
1364 
1365       int dist_rank = s.next_rank_id;
1366 
1367       write_buf = cs_file_serializer_advance(&s, NULL);
1368 
1369       if (write_buf != NULL) /* only on rank 0 */
1370         s.count[dist_rank]
1371           = (cs_lnum_t)_file_write(f,
1372                                    write_buf,
1373                                    size,
1374                                    (size_t)(s.count[dist_rank]));
1375 
1376     } while (write_buf != NULL);
1377 
1378     /* Exchange return codes */
1379 
1380     if (s.rank_id == 0)
1381       count = s.count;
1382     else
1383       BFT_MALLOC(count, s.n_ranks, cs_lnum_t);
1384 
1385     MPI_Scatter(count, 1, CS_MPI_LNUM,
1386                 &local_count, 1, CS_MPI_LNUM,
1387                 0, f->comm);
1388     retval = local_count;
1389 
1390     if (s.rank_id != 0)
1391       BFT_FREE(count);
1392 
1393     _serializer_finalize(&s);
1394   }
1395 
1396 #endif /* defined(HAVE_MPI) */
1397 
1398   return retval;
1399 }
1400 
1401 /*----------------------------------------------------------------------------
1402  * Write data to a file, each associated process providing a contiguous part
1403  * of this data.
1404  *
1405  * Each process should provide a (possibly empty) block of the data,
1406  * and we should have:
1407  *   global_num_start at rank 0 = 1
1408  *   global_num_start at rank i+1 = global_num_end at rank i.
1409  * Otherwise, behavior (especially positioning for future reads) is undefined.
1410  *
1411  * This version does not use MPI-IO
1412  *
1413  * parameters:
1414  *   f                <-- cs_file_t descriptor
1415  *   buf              <-> pointer to location containing data
1416  *   size             <-- size of each item of data in bytes
1417  *   global_num_start <-- global number of first block item (1 to n numbering)
1418  *   global_num_end   <-- global number of past-the end block item
1419  *                        (1 to n numbering)
1420  *
1421  * returns:
1422  *   the (local) number of items (not bytes) sucessfully written;
1423  *----------------------------------------------------------------------------*/
1424 
1425 static size_t
_file_write_block_p(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)1426 _file_write_block_p(cs_file_t  *f,
1427                     void       *buf,
1428                     size_t      size,
1429                     cs_gnum_t   global_num_start,
1430                     cs_gnum_t   global_num_end)
1431 {
1432   size_t retval = 0;
1433   cs_gnum_t loc_count = 0;
1434 
1435   if (global_num_end > global_num_start) {
1436 
1437     loc_count = global_num_end - global_num_start;
1438 
1439     if (f->n_ranks == 1)
1440       retval = _file_write(f, buf, size, (size_t)loc_count);
1441 
1442 #if defined(HAVE_MPI)
1443 
1444     if (f->n_ranks > 1) {
1445 
1446       cs_file_off_t offset = f->offset + ((global_num_start - 1) * size);
1447 
1448       /* Only rank 0 initially opened (to check existence/rights, as
1449          all ranks might not participate), so open here if needed */
1450 
1451       if (f->sh == NULL)
1452         _file_open(f);
1453 
1454       if (_file_seek(f, offset, SEEK_SET) == 0)
1455         retval = _file_write(f, buf, size, (size_t)loc_count);
1456 
1457     }
1458 
1459 #endif /* defined(HAVE_MPI) */
1460 
1461   }
1462 
1463   return retval;
1464 }
1465 
1466 #if defined(HAVE_MPI)
1467 
1468 /*----------------------------------------------------------------------------
1469  * Gather blocks sizes across several ranks and allocates matching buffer
1470  *
1471  * The caller is responsible for freeing the returned buffer once it is
1472  * no longer needed.
1473  *
1474  * parameters:
1475  *   f                <-- cs_file_t descriptor
1476  *   size             <-- size of each item of data in bytes
1477  *   global_num_start <-- global number of first block item (1 to n numbering)
1478  *   global_num_end   <-> pointer to global number of past-the end block item
1479  *                        (1 to n numbering)
1480  *
1481  * returns:
1482  *   pointer to gathered values buffer for gathering rank, NULL for others
1483  *----------------------------------------------------------------------------*/
1484 
1485 static void *
_gather_block_sizes(cs_file_t * f,size_t size,cs_gnum_t global_num_start,cs_gnum_t * global_num_end)1486 _gather_block_sizes(cs_file_t   *f,
1487                     size_t       size,
1488                     cs_gnum_t    global_num_start,
1489                     cs_gnum_t   *global_num_end)
1490 {
1491   unsigned char *gather_buf = NULL;
1492 
1493   assert(f != NULL);
1494 
1495   cs_gnum_t _global_num_end = *global_num_end;
1496 
1497   static const int tag = 'f'+'a'+'g'+'g'+'r'+'e'+'g'+'a'+'t'+'e';
1498 
1499   /* Aggregator rank */
1500 
1501   if (f->rank % f->rank_step == 0) {
1502 
1503     f->block_size[0] = _global_num_end - global_num_start;
1504     size_t block_size = f->block_size[0];
1505 
1506     int rank_end = f->rank + f->rank_step;
1507     if (rank_end >= f->n_ranks)
1508       rank_end = f->n_ranks;
1509 
1510     int n_aggr = rank_end - f->rank;
1511 
1512     /* Receive counts */
1513 
1514     for (int i = 1; i < n_aggr; i++) {
1515       int src_rank = f->rank + i;
1516       MPI_Status status;
1517       MPI_Recv(f->block_size + i, 1, CS_MPI_GNUM,
1518                src_rank, tag, f->comm, &status);
1519       block_size += f->block_size[i];
1520     }
1521 
1522     /* Allocate buffer */
1523 
1524     size_t alloc_size = size * (size_t)block_size;
1525     BFT_MALLOC(gather_buf, alloc_size, unsigned char);
1526 
1527     *global_num_end = global_num_start + block_size;
1528   }
1529 
1530   /* Sending rank */
1531 
1532   else {
1533 
1534     int dest_rank = f->rank - (f->rank % f->rank_step);
1535     cs_gnum_t block_size = _global_num_end - global_num_start;
1536     f->block_size[0] = block_size;
1537 
1538     /* Send counts */
1539 
1540     MPI_Send(&block_size, 1, CS_MPI_GNUM,
1541              dest_rank, tag, f->comm);
1542 
1543     *global_num_end = global_num_start;  /* For empty message */
1544   }
1545 
1546   return (void *)gather_buf;
1547 }
1548 
1549 /*----------------------------------------------------------------------------
1550  * Gather blocks across several ranks
1551  *
1552  * The caller is responsible for freeing the returned buffer once it is
1553  * no longer needed.
1554  *
1555  * parameters:
1556  * parameters:
1557  *   f                <-- cs_file_t descriptor
1558  *   buf              <-> pointer to location containing data
1559  *   size             <-- size of each item of data in bytes
1560  *   global_num_start <-- global number of first block item (1 to n numbering)
1561  *   global_num_end   <-> pointer to global number of past-the end block item
1562  *                        (1 to n numbering)
1563  *
1564  * returns:
1565  *   pointer to gathered values buffer for gathering rank, NULL for others
1566  *----------------------------------------------------------------------------*/
1567 
1568 static void *
_gather_blocks(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t * global_num_end)1569 _gather_blocks(cs_file_t   *f,
1570                void        *buf,
1571                size_t       size,
1572                cs_gnum_t    global_num_start,
1573                cs_gnum_t   *global_num_end)
1574 {
1575   assert(f != NULL);
1576 
1577   unsigned char *gather_buf = _gather_block_sizes(f,
1578                                                   size,
1579                                                   global_num_start,
1580                                                   global_num_end);
1581 
1582   MPI_Datatype ent_type = MPI_BYTE;
1583   size_t _size = size;
1584 
1585   static const int tag = 'f'+'a'+'g'+'g'+'r'+'e'+'g'+'a'+'t'+'e';
1586 
1587   /* Aggregator rank */
1588 
1589   if (f->rank % f->rank_step == 0) {
1590 
1591     int rank_end = f->rank + f->rank_step;
1592     if (rank_end >= f->n_ranks)
1593       rank_end = f->n_ranks;
1594 
1595     int n_aggr = rank_end - f->rank;
1596 
1597     /* Precaution for large messages */
1598 
1599     for (int i = 1; i < n_aggr; i++) {
1600       if (_size > 1 && (size * (size_t)(f->block_size[i])) > INT_MAX) {
1601         MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
1602         MPI_Type_commit(&ent_type);
1603         _size = 1;
1604       }
1605     }
1606 
1607     /* Copy local data to gather buffer */
1608 
1609     size_t gather_buf_count = size * (size_t)(f->block_size[0]);
1610     memcpy(gather_buf, buf, gather_buf_count);
1611 
1612     /* Receive data */
1613 
1614     for (int i = 1; i < n_aggr; i++) {
1615       int src_rank = f->rank + i;
1616       MPI_Status status;
1617       size_t add_buf_count = size * (size_t)(f->block_size[i]);
1618       int recv_count = f->block_size[i];
1619       if (recv_count == 0)
1620         continue;
1621       if (size * (size_t)recv_count > INT_MAX) {
1622         MPI_Recv(gather_buf + gather_buf_count, recv_count, ent_type,
1623                  src_rank, tag, f->comm, &status);
1624       }
1625       else {
1626         recv_count *= size;
1627         MPI_Recv(gather_buf + gather_buf_count, recv_count, MPI_BYTE,
1628                  src_rank, tag, f->comm, &status);
1629       }
1630       gather_buf_count += add_buf_count;
1631     }
1632 
1633   }
1634 
1635   /* Sending rank */
1636 
1637   else {
1638 
1639     int dest_rank = f->rank - (f->rank % f->rank_step);
1640     cs_gnum_t block_size = f->block_size[0];
1641 
1642     size_t message_size = _size * (size_t)block_size;
1643 
1644     /* Precaution for large messages */
1645 
1646     if (message_size > INT_MAX) {
1647       MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
1648       MPI_Type_commit(&ent_type);
1649       _size = 1;
1650     }
1651 
1652     int send_count = _size * block_size;
1653 
1654     /* Send data */
1655 
1656     if (send_count > 0)
1657       MPI_Send(buf, send_count, ent_type, dest_rank, tag, f->comm);
1658   }
1659 
1660   if (ent_type != MPI_BYTE)
1661     MPI_Type_free(&ent_type);
1662 
1663   return (void *)gather_buf;
1664 }
1665 
1666 /*----------------------------------------------------------------------------
1667  * Gather blocks across several ranks
1668  *
1669  * The caller is responsible for freeing the returned buffer once it is
1670  * no longer needed.
1671  *
1672  * parameters:
1673  * parameters:
1674  *   f                <-- cs_file_t descriptor
1675  *   io_buf           <-> pointer to location containing read data
1676  *   buf              <-> pointer to location containing scattered data
1677  *   size             <-- size of each item of data in bytes
1678  *
1679  * returns:
1680  *   number of values in block after scatter
1681  *----------------------------------------------------------------------------*/
1682 
1683 static int
_scatter_blocks(cs_file_t * f,void * io_buf,void * buf,size_t size)1684 _scatter_blocks(cs_file_t   *f,
1685                 void        *io_buf,
1686                 void        *buf,
1687                 size_t       size)
1688 {
1689   assert(f != NULL);
1690 
1691   MPI_Datatype ent_type = MPI_BYTE;
1692   size_t _size = size;
1693 
1694   static const int tag = 'f'+'a'+'g'+'g'+'r'+'e'+'g'+'a'+'t'+'e';
1695 
1696   /* Aggregator rank */
1697 
1698   if (f->rank % f->rank_step == 0) {
1699 
1700     int rank_end = f->rank + f->rank_step;
1701     if (rank_end >= f->n_ranks)
1702       rank_end = f->n_ranks;
1703 
1704     int n_aggr = rank_end - f->rank;
1705 
1706     /* Precaution for large messages */
1707 
1708     for (int i = 1; i < n_aggr; i++) {
1709       if (_size > 1 && (size * (size_t)(f->block_size[i])) > INT_MAX) {
1710         MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
1711         MPI_Type_commit(&ent_type);
1712         _size = 1;
1713       }
1714     }
1715 
1716     /* Send local data to destination buffer */
1717 
1718     unsigned char *scatter_buf = io_buf;
1719     size_t scatter_buf_count = size * (size_t)(f->block_size[0]);
1720     memcpy(buf, io_buf, scatter_buf_count);
1721 
1722     /* Send data */
1723 
1724     for (int i = 1; i < n_aggr; i++) {
1725       int src_rank = f->rank + i;
1726       size_t add_buf_count = size * (size_t)(f->block_size[i]);
1727       int send_count = f->block_size[i];
1728       if (send_count == 0)
1729         continue;
1730       if (size * (size_t)send_count > INT_MAX) {
1731         MPI_Send(scatter_buf + scatter_buf_count, send_count, ent_type,
1732                  src_rank, tag, f->comm);
1733       }
1734       else {
1735         send_count *= size;
1736         MPI_Send(scatter_buf + scatter_buf_count, send_count, MPI_BYTE,
1737                  src_rank, tag, f->comm);
1738       }
1739       scatter_buf_count += add_buf_count;
1740     }
1741 
1742   }
1743 
1744   /* Receving rank */
1745 
1746   else {
1747 
1748     int dest_rank = f->rank - (f->rank % f->rank_step);
1749     cs_gnum_t block_size = f->block_size[0];
1750 
1751     size_t message_size = _size * (size_t)block_size;
1752 
1753     /* Precaution for large messages */
1754 
1755     if (message_size > INT_MAX) {
1756       MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
1757       MPI_Type_commit(&ent_type);
1758       _size = 1;
1759     }
1760 
1761     int recv_count = _size * block_size;
1762     MPI_Status status;
1763 
1764     /* Receive data */
1765 
1766     if (recv_count > 0)
1767       MPI_Recv(buf, recv_count, ent_type, dest_rank, tag, f->comm, &status);
1768   }
1769 
1770   if (ent_type != MPI_BYTE)
1771     MPI_Type_free(&ent_type);
1772 
1773   return f->block_size[0];
1774 }
1775 
1776 #endif /* defined(HAVE_MPI) */
1777 
1778 #if defined(HAVE_MPI_IO)
1779 
1780 /*----------------------------------------------------------------------------
1781  * Output MPI error message.
1782  *
1783  * This supposes that the default MPI errorhandler is not used
1784  *
1785  * parameters:
1786  *   file_name  <-- file name
1787  *   error_code <-- associated MPI error code
1788  *
1789  * returns:
1790  *   0 in case of success, system error code in case of failure
1791  *----------------------------------------------------------------------------*/
1792 
1793 static void
_mpi_io_error_message(const char * file_name,int error_code)1794 _mpi_io_error_message
1795 (
1796  const char  *file_name,
1797  int          error_code
1798 )
1799 {
1800   char buffer[MPI_MAX_ERROR_STRING];
1801   int  buffer_len;
1802 
1803   MPI_Error_string(error_code, buffer, &buffer_len);
1804 
1805   bft_error(__FILE__, __LINE__, 0,
1806             _("MPI IO error for file: %s\n"
1807               "Error type: %s"), file_name, buffer);
1808 }
1809 
1810 /*----------------------------------------------------------------------------
1811  * Open a file using MPI IO.
1812  *
1813  * parameters:
1814  *   f     <-- pointer to file handler
1815  *   mode  <-- file access mode: read, write, or append
1816  *
1817  * returns:
1818  *   MPI_SUCCESS in case of success, MPI error code in case of failure
1819  *----------------------------------------------------------------------------*/
1820 
1821 static int
_mpi_file_open(cs_file_t * f,cs_file_mode_t mode)1822 _mpi_file_open(cs_file_t       *f,
1823                cs_file_mode_t   mode)
1824 {
1825   int amode = MPI_MODE_RDWR;
1826   int retval = 0;
1827 
1828   assert(f != NULL);
1829 
1830   if (f->fh != MPI_FILE_NULL)
1831     return 0;
1832 
1833   /* Set access mode */
1834 
1835   f->mode = mode;
1836 
1837   if (f->mode == CS_FILE_MODE_APPEND)
1838     amode = MPI_MODE_WRONLY | MPI_MODE_APPEND;
1839 
1840   else if (f->mode == CS_FILE_MODE_WRITE) {
1841     int rank;
1842     if (f->method == CS_FILE_MPI_INDEPENDENT && f->rank > 0)
1843       amode = MPI_MODE_WRONLY;
1844     else
1845       amode = MPI_MODE_WRONLY | MPI_MODE_CREATE;
1846     MPI_Comm_rank(f->comm, &rank);
1847     if (rank < 1)
1848       cs_file_remove(f->name);
1849   }
1850 
1851   else if (f->mode == CS_FILE_MODE_READ)
1852     amode = MPI_MODE_RDONLY;
1853 
1854   /* Open file (for independent access, only on rank 0 initially) */
1855 
1856   if (f->io_comm != MPI_COMM_NULL) {
1857     retval = MPI_File_open(f->io_comm, f->name, amode, f->info, &(f->fh));
1858     if (retval == MPI_SUCCESS)
1859       retval = MPI_File_get_position(f->fh, &(f->offset));
1860   }
1861 
1862   if (retval != MPI_SUCCESS)
1863     _mpi_io_error_message(f->name, retval);
1864 
1865   if (f->mode == CS_FILE_MODE_APPEND)
1866     f->offset = cs_file_tell(f);
1867 
1868   return retval;
1869 }
1870 
1871 /*----------------------------------------------------------------------------
1872  * Open a file independently of other ranks if required using MPI IO.
1873  *
1874  * This function is used in the case of independent file IO, to allow
1875  * files to be opened only on ranks reading/writing nonempty blocks.
1876  *
1877  * parameters:
1878  *   f     <-- pointer to file handler
1879  *
1880  * returns:
1881  *   MPI_SUCCESS in case of success, MPI error code in case of failure
1882  *----------------------------------------------------------------------------*/
1883 
1884 static int
_mpi_file_ensure_isopen(cs_file_t * f)1885 _mpi_file_ensure_isopen(cs_file_t *f)
1886 {
1887   int retval = 0;
1888 
1889   assert(f != NULL);
1890 
1891   if (f->io_comm != MPI_COMM_NULL && f->fh == MPI_FILE_NULL) {
1892 
1893     int amode = MPI_MODE_RDWR;
1894     if (f->mode == CS_FILE_MODE_APPEND)
1895       amode = MPI_MODE_WRONLY | MPI_MODE_APPEND;
1896     else if (f->mode == CS_FILE_MODE_WRITE)
1897       amode = MPI_MODE_WRONLY | MPI_MODE_CREATE;
1898     else if (f->mode == CS_FILE_MODE_READ)
1899       amode = MPI_MODE_RDONLY;
1900 
1901     retval = MPI_File_open(MPI_COMM_SELF, f->name, amode, f->info, &(f->fh));
1902     if (retval != MPI_SUCCESS)
1903       _mpi_io_error_message(f->name, retval);
1904 
1905   }
1906 
1907   return retval;
1908 }
1909 
1910 /*----------------------------------------------------------------------------
1911  * Close a file using MPI IO.
1912  *
1913  * parameters:
1914  *   f <-> pointer to file handler
1915  *
1916  * returns:
1917  *   MPI_SUCCESS in case of success, MPI error code in case of failure
1918  *----------------------------------------------------------------------------*/
1919 
1920 static int
_mpi_file_close(cs_file_t * f)1921 _mpi_file_close(cs_file_t  *f)
1922 {
1923   int retval = 0;
1924 
1925   assert(f != NULL);
1926 
1927   if (f->fh == MPI_FILE_NULL)
1928     return 0;
1929 
1930   /* Close file */
1931 
1932   retval = MPI_File_close(&(f->fh));
1933 
1934   if (retval != MPI_SUCCESS)
1935     _mpi_io_error_message(f->name, retval);
1936 
1937   return retval;
1938 }
1939 
1940 /*----------------------------------------------------------------------------
1941  * Read data to a buffer, distributing a contiguous part of it to each
1942  * process associated with a file.
1943  *
1944  * Each process should receive a block of the data, and we should have:
1945  *   global_num_start at rank 0 = 1
1946  *   global_num_start at rank i+1 = global_num_end at rank i.
1947  * Otherwise, behavior (especially positioning for future reads) is undefined.
1948  *
1949  * There are 3 variants, depending on the semantics:
1950  *   _mpi_file_read_block_noncoll (non-collective)
1951  *   _mpi_file_read_block_eo (using explicit offsets)
1952  *   _mpi_file_read_block_ip (using individual pointers, setting a file view)
1953  *
1954  * parameters:
1955  *   f                <-- cs_file_t descriptor
1956  *   buf              --> pointer to location receiving data
1957  *   size             <-- size of each item of data in bytes
1958  *   global_num_start <-- global number of first block item (1 to n numbering)
1959  *   global_num_end   <-- global number of past-the end block item
1960  *                        (1 to n numbering)
1961  *
1962  * returns:
1963  *   the (local) number of items (not bytes) sucessfully read;
1964  *----------------------------------------------------------------------------*/
1965 
1966 static size_t
_mpi_file_read_block_noncoll(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)1967 _mpi_file_read_block_noncoll(cs_file_t  *f,
1968                              void       *buf,
1969                              size_t      size,
1970                              cs_gnum_t   global_num_start,
1971                              cs_gnum_t   global_num_end)
1972 {
1973   cs_gnum_t gcount = (global_num_end - global_num_start)*size;
1974   size_t retval = 0;
1975 
1976   if (f->fh == MPI_FILE_NULL)
1977     return retval;
1978 
1979   if (gcount > 0) {
1980 
1981     int errcode, count;
1982     MPI_Status status;
1983 
1984     MPI_Offset disp = f->offset + ((global_num_start - 1) * size);
1985     MPI_Datatype ent_type = MPI_BYTE;
1986 
1987     if (gcount > INT_MAX) {
1988       MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
1989       MPI_Type_commit(&ent_type);
1990       count = global_num_end - global_num_start;
1991     }
1992     else
1993       count = gcount;
1994 
1995     errcode = _mpi_file_ensure_isopen(f);
1996 
1997     if (errcode == MPI_SUCCESS) {
1998 
1999       if (_mpi_io_positioning == CS_FILE_MPI_EXPLICIT_OFFSETS)
2000         errcode = MPI_File_read_at(f->fh, disp, buf, count, ent_type, &status);
2001 
2002       else {
2003         errcode = MPI_File_seek(f->fh, disp, MPI_SEEK_SET);
2004         if (errcode == MPI_SUCCESS)
2005           errcode = MPI_File_read(f->fh, buf, count, ent_type, &status);
2006       }
2007 
2008     }
2009 
2010     if (errcode != MPI_SUCCESS)
2011       _mpi_io_error_message(f->name, errcode);
2012 
2013     MPI_Get_count(&status, ent_type, &count);
2014 
2015     if (ent_type != MPI_BYTE) {
2016       MPI_Type_free(&ent_type);
2017       retval = count;
2018     }
2019     else
2020       retval = count / size;
2021 
2022   }
2023 
2024   return retval;
2025 }
2026 
2027 static size_t
_mpi_file_read_block_eo(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)2028 _mpi_file_read_block_eo(cs_file_t  *f,
2029                         void       *buf,
2030                         size_t      size,
2031                         cs_gnum_t   global_num_start,
2032                         cs_gnum_t   global_num_end)
2033 {
2034   MPI_Status status;
2035   int errcode, count;
2036   cs_gnum_t gcount = (global_num_end - global_num_start)*size;
2037   MPI_Datatype ent_type = MPI_BYTE;
2038   MPI_Offset disp = f->offset + ((global_num_start - 1) * size);
2039 
2040   size_t retval = 0;
2041 
2042   assert(gcount == 0 || f->fh != MPI_FILE_NULL);
2043 
2044   if (f->fh == MPI_FILE_NULL)
2045     return retval;
2046 
2047   if (gcount > INT_MAX) {
2048     MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
2049     MPI_Type_commit(&ent_type);
2050     count = global_num_end - global_num_start;
2051   }
2052   else
2053     count = gcount;
2054 
2055   errcode = MPI_File_read_at_all(f->fh, disp, buf, count, ent_type, &status);
2056 
2057   if (errcode != MPI_SUCCESS)
2058     _mpi_io_error_message(f->name, errcode);
2059 
2060   if (count > 0)
2061     MPI_Get_count(&status, ent_type, &count);
2062 
2063   if (ent_type != MPI_BYTE) {
2064     MPI_Type_free(&ent_type);
2065     retval = count;
2066   }
2067   else {
2068     if (count > 0)
2069       retval = count / size;
2070     else
2071       retval = 0;
2072   }
2073 
2074   return retval;
2075 }
2076 
2077 static size_t
_mpi_file_read_block_ip(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)2078 _mpi_file_read_block_ip(cs_file_t  *f,
2079                         void       *buf,
2080                         size_t      size,
2081                         cs_gnum_t   global_num_start,
2082                         cs_gnum_t   global_num_end)
2083 {
2084   int errcode;
2085   int lengths[1];
2086   MPI_Aint disps[1];
2087   MPI_Status status;
2088   MPI_Datatype file_type;
2089 
2090   int count = 0;
2091   char datarep[] = "native";
2092   MPI_Datatype ent_type = MPI_BYTE;
2093   cs_gnum_t gcount = (global_num_end - global_num_start) * size;
2094   cs_gnum_t gdisp = (global_num_start - 1) * size;
2095 
2096   size_t retval = 0;
2097 
2098   assert(gcount == 0 || f->fh != MPI_FILE_NULL);
2099 
2100   if (f->fh == MPI_FILE_NULL)
2101     return retval;
2102 
2103   if (gcount > INT_MAX || gdisp > INT_MAX) {
2104     MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
2105     MPI_Type_commit(&ent_type);
2106     lengths[0] = global_num_end - global_num_start;
2107     disps[0] = global_num_start - 1;
2108   }
2109   else {
2110     lengths[0] = gcount;
2111     disps[0] = gdisp;
2112   }
2113 
2114   MPI_Type_create_hindexed(1, lengths, disps, ent_type, &file_type);
2115   MPI_Type_commit(&file_type);
2116 
2117   MPI_File_set_view(f->fh, f->offset, ent_type, file_type, datarep, f->info);
2118 
2119   errcode = MPI_File_read_all(f->fh, buf, lengths[0], ent_type, &status);
2120 
2121   if (errcode != MPI_SUCCESS)
2122     _mpi_io_error_message(f->name, errcode);
2123 
2124   MPI_Type_free(&file_type);
2125 
2126   if (lengths[0] > 0)
2127     MPI_Get_count(&status, ent_type, &count);
2128 
2129   if (ent_type != MPI_BYTE) {
2130     MPI_Type_free(&ent_type);
2131     retval = count;
2132   }
2133   else
2134     retval = count / size;
2135 
2136   return retval;
2137 }
2138 
2139 /*----------------------------------------------------------------------------
2140  * Write data to a file, each associated process providing a contiguous part
2141  * of this data.
2142  *
2143  * Each process should provide a (possibly empty) block of the data,
2144  * and we should have:
2145  *   global_num_start at rank 0 = 1
2146  *   global_num_start at rank i+1 = global_num_end at rank i.
2147  * Otherwise, behavior (especially positioning for future reads) is undefined.
2148  *
2149  * There are 3 variants, depending on the semantics:
2150  *   _mpi_file_write_block_noncoll (non-collective)
2151  *   _mpi_file_write_block_eo (using explicit offsets)
2152  *   _mpi_file_write_block_ip (using individual pointers, setting a file view)
2153  *
2154  * parameters:
2155  *   f                <-- cs_file_t descriptor
2156  *   buf              --> pointer to location receiving data
2157  *   size             <-- size of each item of data in bytes
2158  *   global_num_start <-- global number of first block item (1 to n numbering)
2159  *   global_num_end   <-- global number of past-the end block item
2160  *                        (1 to n numbering)
2161  *
2162  * returns:
2163  *   the (local) number of items (not bytes) sucessfully read;
2164  *----------------------------------------------------------------------------*/
2165 
2166 static size_t
_mpi_file_write_block_noncoll(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)2167 _mpi_file_write_block_noncoll(cs_file_t  *f,
2168                               void       *buf,
2169                               size_t      size,
2170                               cs_gnum_t   global_num_start,
2171                               cs_gnum_t   global_num_end)
2172 {
2173   cs_gnum_t gcount = (global_num_end - global_num_start)*size;
2174   size_t retval = 0;
2175 
2176   if (f->fh == MPI_FILE_NULL)
2177     return retval;
2178 
2179   if (gcount > 0) {
2180 
2181     int errcode, count;
2182     MPI_Status status;
2183     MPI_Offset disp = f->offset + ((global_num_start - 1) * size);
2184     MPI_Datatype ent_type = MPI_BYTE;
2185 
2186     if (gcount > INT_MAX) {
2187       MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
2188       MPI_Type_commit(&ent_type);
2189       count = global_num_end - global_num_start;
2190     }
2191     else
2192       count = gcount;
2193 
2194     errcode = _mpi_file_ensure_isopen(f);
2195 
2196     if (errcode == MPI_SUCCESS) {
2197 
2198       if (_mpi_io_positioning == CS_FILE_MPI_EXPLICIT_OFFSETS)
2199         errcode = MPI_File_write_at(f->fh, disp, buf, count, ent_type, &status);
2200 
2201       else {
2202         errcode = MPI_File_seek(f->fh, disp, MPI_SEEK_SET);
2203         if (errcode == MPI_SUCCESS)
2204           errcode = MPI_File_write(f->fh, buf, count, ent_type, &status);
2205       }
2206 
2207     }
2208 
2209     if (errcode != MPI_SUCCESS)
2210       _mpi_io_error_message(f->name, errcode);
2211 
2212     if (count > 0)
2213       MPI_Get_count(&status, ent_type, &count);
2214 
2215     if (ent_type != MPI_BYTE) {
2216       MPI_Type_free(&ent_type);
2217       retval = count;
2218     }
2219     else
2220       retval = count / size;
2221 
2222   }
2223 
2224   return retval;
2225 }
2226 
2227 static size_t
_mpi_file_write_block_eo(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)2228 _mpi_file_write_block_eo(cs_file_t  *f,
2229                          void       *buf,
2230                          size_t      size,
2231                          cs_gnum_t   global_num_start,
2232                          cs_gnum_t   global_num_end)
2233 {
2234   MPI_Status status;
2235   int errcode, count;
2236 
2237   MPI_Datatype ent_type = MPI_BYTE;
2238   MPI_Offset disp = f->offset + ((global_num_start - 1) * size);
2239   cs_gnum_t gcount = (global_num_end - global_num_start)*size;
2240 
2241   size_t retval = 0;
2242 
2243   assert(gcount == 0 || f->fh != MPI_FILE_NULL);
2244 
2245   if (f->fh == MPI_FILE_NULL)
2246     return retval;
2247 
2248   if (gcount > INT_MAX) {
2249     MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
2250     MPI_Type_commit(&ent_type);
2251     count = global_num_end - global_num_start;
2252   }
2253   else
2254     count = gcount;
2255 
2256   errcode = MPI_File_write_at_all(f->fh, disp, buf, count, ent_type, &status);
2257 
2258   if (errcode != MPI_SUCCESS)
2259     _mpi_io_error_message(f->name, errcode);
2260 
2261   if (count > 0)
2262     MPI_Get_count(&status, ent_type, &count);
2263 
2264   if (ent_type != MPI_BYTE) {
2265     MPI_Type_free(&ent_type);
2266     retval = count;
2267   }
2268   else
2269     retval = count / size;
2270 
2271   return retval;
2272 }
2273 
2274 static size_t
_mpi_file_write_block_ip(cs_file_t * f,void * buf,size_t size,cs_gnum_t global_num_start,cs_gnum_t global_num_end)2275 _mpi_file_write_block_ip(cs_file_t  *f,
2276                          void       *buf,
2277                          size_t      size,
2278                          cs_gnum_t   global_num_start,
2279                          cs_gnum_t   global_num_end)
2280 {
2281   int lengths[1];
2282   MPI_Aint disps[1];
2283   MPI_Status status;
2284   MPI_Datatype file_type;
2285 
2286   int errcode = MPI_SUCCESS, count = 0;
2287   char datarep[] = "native";
2288   MPI_Datatype ent_type = MPI_BYTE;
2289   cs_gnum_t gcount = (global_num_end - global_num_start) * size;
2290   cs_gnum_t gdisp = (global_num_start - 1) * size;
2291 
2292   size_t retval = 0;
2293 
2294   assert(gcount == 0 || f->fh != MPI_FILE_NULL);
2295 
2296   if (f->fh == MPI_FILE_NULL)
2297     return retval;
2298 
2299   if (gcount > INT_MAX || gdisp > INT_MAX) {
2300     MPI_Type_contiguous(size, MPI_BYTE, &ent_type);
2301     MPI_Type_commit(&ent_type);
2302     lengths[0] = global_num_end - global_num_start;
2303     disps[0] = global_num_start - 1;
2304   }
2305   else {
2306     lengths[0] = gcount;
2307     disps[0] = gdisp;
2308   }
2309 
2310   MPI_Type_create_hindexed(1, lengths, disps, ent_type, &file_type);
2311   MPI_Type_commit(&file_type);
2312 
2313   MPI_File_set_view(f->fh, f->offset, ent_type, file_type, datarep, f->info);
2314 
2315   errcode = MPI_File_write_all(f->fh, buf, (int)(lengths[0]), ent_type,
2316                                &status);
2317 
2318   if (errcode != MPI_SUCCESS)
2319     _mpi_io_error_message(f->name, errcode);
2320 
2321   MPI_Type_free(&file_type);
2322 
2323   if (lengths[0] > 0)
2324     MPI_Get_count(&status, ent_type, &count);
2325 
2326   if (ent_type != MPI_BYTE) {
2327     MPI_Type_free(&ent_type);
2328     retval = count;
2329   }
2330   else
2331     retval = count / size;
2332 
2333   return retval;
2334 }
2335 
2336 #endif /* defined(HAVE_MPI_IO) */
2337 
2338 /*----------------------------------------------------------------------------
2339  * Compare strings (qsort function).
2340  *
2341  * parameters:
2342  *   a <-> pointer to first string
2343  *   b <-> pointer to second string
2344  *
2345  * returns:
2346  *   result of strcmp() on strings
2347  *----------------------------------------------------------------------------*/
2348 
2349 static int
_cs_file_compare_names(const void * a,const void * b)2350 _cs_file_compare_names(const void  *a,
2351                        const void  *b)
2352 {
2353   return strcmp(*((const char *const *)a), *((const char *const *)b));
2354 }
2355 
2356 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
2357 
2358 /*=============================================================================
2359  * Public function definitions
2360  *============================================================================*/
2361 
2362 #if defined(HAVE_MPI)
2363 
2364 /*----------------------------------------------------------------------------*/
2365 /*!
2366  * \brief Create a file descriptor and open the associated file.
2367  *
2368  * By default, data is written or read as native data. This behavior may be
2369  * modified by cs_file_set_swap_endian().
2370  *
2371  * \param[in]  name        file name
2372  * \param[in]  mode        file acces mode: read, write, or append
2373  * \param[in]  method      file access method
2374  * \param[in]  hints       associated hints for MPI-IO, or MPI_INFO_NULL
2375  * \param[in]  block_comm  handle to MPI communicator used for distributed file
2376  *                         block access (may be a subset of comm if some ranks
2377  *                         do not directly access distributed data blocks)
2378  * \param[in]  comm        handle to main MPI communicator
2379  *
2380  * \return pointer to cs_file_t file descriptor (NULL in case of failure);
2381  *   currently, errors are fatal.
2382  */
2383 /*----------------------------------------------------------------------------*/
2384 
2385 #else
2386 
2387 /*----------------------------------------------------------------------------*/
2388 /*!
2389  * \brief Create a file descriptor and open the associated file.
2390  *
2391  * By default, data is written or read as native data. This behavior may be
2392  * modified by cs_file_set_swap_endian().
2393  *
2394  * \param[in]  name        file name
2395  * \param[in]  mode        file access mode: read, write, or append
2396  * \param[in]  method      file access method (currently only C standard-IO
2397  *                         when built without MPI)
2398  * \param[in]  hints       associated hints for MPI-IO, or MPI_INFO_NULL
2399  * \param[in]  block_comm  handle to MPI communicator used for distributed
2400  *                         file block access (may be a subset of comm if some
2401  *                         ranks do not directly access distributed data blocks)
2402  * \param[in]  comm        handle to main MPI communicator
2403  *
2404  * \return pointer to cs_file_t file descriptor (NULL in case of failure);
2405  *   currently, errors are fatal.
2406  */
2407 /*----------------------------------------------------------------------------*/
2408 
2409 #endif
2410 
2411 #if defined(HAVE_MPI)
2412 
2413 cs_file_t *
cs_file_open(const char * name,cs_file_mode_t mode,cs_file_access_t method,MPI_Info hints,MPI_Comm block_comm,MPI_Comm comm)2414 cs_file_open(const char        *name,
2415              cs_file_mode_t     mode,
2416              cs_file_access_t   method,
2417              MPI_Info           hints,
2418              MPI_Comm           block_comm,
2419              MPI_Comm           comm)
2420 
2421 #else
2422 
2423 cs_file_t *
2424 cs_file_open(const char        *name,
2425              cs_file_mode_t     mode,
2426              cs_file_access_t   method)
2427 
2428 #endif
2429 {
2430   int errcode = 0;
2431   cs_file_t * f = NULL;
2432 
2433   BFT_MALLOC(f, 1, cs_file_t);
2434 
2435   f->sh = NULL;
2436 
2437 #if defined(HAVE_ZLIB)
2438   f->gzh = NULL;
2439 #endif
2440 
2441 #if defined(HAVE_MPI)
2442   f->comm = MPI_COMM_NULL;
2443   f->io_comm = MPI_COMM_NULL;
2444 #if defined(HAVE_MPI_IO)
2445   f->fh = MPI_FILE_NULL;
2446   f->info = hints;
2447 #endif
2448 #endif
2449 
2450   f->offset = 0;
2451 
2452   BFT_MALLOC(f->name, strlen(name) + 1, char);
2453   strcpy(f->name, name);
2454 
2455   f->mode = mode;
2456   f->method = method = _access_method(method, (mode != CS_FILE_MODE_READ));
2457 
2458   f->rank = 0;
2459   f->n_ranks = 1;
2460 
2461   f->swap_endian = false; /* Use native endianness by default */
2462 
2463   /* Set communicator */
2464 
2465 #if defined(HAVE_MPI)
2466   {
2467     int n_io_ranks = f->n_ranks;
2468 
2469     if (comm != MPI_COMM_NULL) {
2470       MPI_Comm_size(comm, &(f->n_ranks));
2471       if (f->n_ranks > 1) {
2472         f->comm = comm;
2473         f->io_comm = block_comm;
2474         MPI_Comm_rank(f->comm, &(f->rank));
2475         if (f->io_comm != f->comm) {
2476           int _n_io_ranks = 0;
2477           if (f->io_comm != MPI_COMM_NULL)
2478             MPI_Comm_size(f->io_comm, &_n_io_ranks);
2479           MPI_Allreduce(&_n_io_ranks, &n_io_ranks, 1, MPI_INT, MPI_MAX,
2480                         f->comm);
2481         }
2482       }
2483       else {
2484         f->comm = MPI_COMM_NULL;
2485         f->io_comm = MPI_COMM_NULL;
2486       }
2487     }
2488 
2489     if (n_io_ranks < 1)
2490       n_io_ranks = 1;
2491     f->rank_step = f->n_ranks / n_io_ranks;
2492     if (f->n_ranks % n_io_ranks)
2493       f->rank_step += 1;
2494 
2495     f->block_size = NULL;
2496     if (f->rank_step > 1) {
2497       if (f->io_comm != MPI_COMM_NULL)
2498         BFT_MALLOC(f->block_size, f->rank_step, cs_gnum_t);
2499       else
2500         BFT_MALLOC(f->block_size, 1, cs_gnum_t);
2501     }
2502 
2503     if (f->comm == MPI_COMM_NULL)
2504       f->method = CS_FILE_STDIO_SERIAL;
2505   }
2506 #else
2507   f->method = CS_FILE_STDIO_SERIAL;
2508 #endif
2509 
2510   /* Use MPI IO ? */
2511 
2512 #if !defined(HAVE_MPI_IO)
2513   if (f->method > CS_FILE_STDIO_PARALLEL)
2514     bft_error(__FILE__, __LINE__, 0,
2515               _("Error opening file:\n%s\n"
2516                 "MPI-IO is requested, but not available."),
2517               name);
2518 #endif
2519 
2520   /* Open file. In case of failure, destroy the allocated structure;
2521      this is only useful with a non-default error handler,
2522      as the program is terminated by default */
2523 
2524   if (f->method <= CS_FILE_STDIO_PARALLEL && f->rank == 0)
2525     errcode = _file_open(f);
2526 
2527 #if defined(HAVE_MPI_IO)
2528   if (f->method == CS_FILE_MPI_INDEPENDENT) {
2529     f->io_comm = MPI_COMM_SELF;
2530     if (f->rank == 0)
2531       errcode = _mpi_file_open(f, f->mode);
2532   }
2533   else if (f->method > CS_FILE_MPI_INDEPENDENT)
2534     errcode = _mpi_file_open(f, f->mode);
2535 #endif
2536 
2537   if (errcode != 0)
2538     f = cs_file_free(f);
2539 
2540   return f;
2541 }
2542 
2543 /*----------------------------------------------------------------------------*/
2544 /*!
2545  * \brief Create a file descriptor and open the associated file, using the
2546  *        default file communicator and access method.
2547  *
2548  * By default, data is written or read as native data. This behavior may be
2549  * modified by cs_file_set_swap_endian().
2550  *
2551  * \param[in]  name   file name
2552  * \param[in]  mode   file access mode: read, write, or append
2553  *
2554  * \return pointer to cs_file_t file descriptor (NULL in case of failure);
2555  *   currently, errors are fatal.
2556  */
2557 /*----------------------------------------------------------------------------*/
2558 
2559 cs_file_t *
cs_file_open_default(const char * name,cs_file_mode_t mode)2560 cs_file_open_default(const char      *name,
2561                      cs_file_mode_t   mode)
2562 {
2563   cs_file_t *f = NULL;
2564 
2565   if (mode == CS_FILE_MODE_READ) {
2566 #if defined(HAVE_MPI)
2567     f = cs_file_open(name,
2568                      mode,
2569                      _default_access_r,
2570                      _mpi_io_hints_r,
2571                      _mpi_io_comm,
2572                      cs_glob_mpi_comm);
2573 #else
2574     f = cs_file_open(name,
2575                      mode,
2576                      _default_access_r);
2577 #endif
2578   }
2579   else {
2580 #if defined(HAVE_MPI)
2581     f = cs_file_open(name,
2582                      mode,
2583                      _default_access_w,
2584                      _mpi_io_hints_w,
2585                      _mpi_io_comm,
2586                      cs_glob_mpi_comm);
2587 #else
2588     f = cs_file_open(name,
2589                      mode,
2590                      _default_access_w);
2591 #endif
2592   }
2593 
2594   return f;
2595 }
2596 
2597 /*----------------------------------------------------------------------------*/
2598 /*!
2599  * \brief Create a file descriptor and open the associated file, using the
2600  *        serial IO on the root rank.
2601  *
2602  * By default, data is written or read as native data. This behavior may be
2603  * modified by cs_file_set_swap_endian().
2604  *
2605  * \param[in]  name   file name
2606  * \param[in]  mode   file access mode: read, write, or append
2607  *
2608  * \return pointer to cs_file_t file descriptor (NULL in case of failure);
2609  *   currently, errors are fatal.
2610  */
2611 /*----------------------------------------------------------------------------*/
2612 
2613 cs_file_t *
cs_file_open_serial(const char * name,cs_file_mode_t mode)2614 cs_file_open_serial(const char      *name,
2615                     cs_file_mode_t   mode)
2616 {
2617   cs_file_t *f = NULL;
2618 
2619 #if defined(HAVE_MPI)
2620   f = cs_file_open(name,
2621                    mode,
2622                    CS_FILE_STDIO_SERIAL,
2623                    MPI_INFO_NULL,
2624                    MPI_COMM_NULL,
2625                    cs_glob_mpi_comm);
2626 #else
2627   f = cs_file_open(name,
2628                    mode,
2629                    CS_FILE_STDIO_SERIAL);
2630 #endif
2631 
2632   return f;
2633 }
2634 
2635 /*----------------------------------------------------------------------------*/
2636 /*!
2637  * \brief Destroy a file descriptor and close the associated file.
2638  *
2639  * \param[in, out]  f  file descriptor to destroy
2640  */
2641 /*----------------------------------------------------------------------------*/
2642 
2643 cs_file_t *
cs_file_free(cs_file_t * f)2644 cs_file_free(cs_file_t  *f)
2645 {
2646   cs_file_t  *_f = f;
2647 
2648   if (_f->sh != NULL)
2649     _file_close(_f);
2650 
2651 #if defined(HAVE_MPI_IO)
2652   else if (_f->fh != MPI_FILE_NULL)
2653     _mpi_file_close(_f);
2654   BFT_FREE(f->block_size);
2655 #endif
2656 
2657   BFT_FREE(_f->name);
2658   BFT_FREE(_f);
2659 
2660   return NULL;
2661 }
2662 
2663 /*----------------------------------------------------------------------------*/
2664 /*!
2665  * \brief Return a file's name.
2666  *
2667  * \param[in]  f  cs_file_t descriptor
2668  *
2669  * \return pointer to the file's name.
2670  */
2671 /*----------------------------------------------------------------------------*/
2672 
2673 const char *
cs_file_get_name(const cs_file_t * f)2674 cs_file_get_name(const cs_file_t  *f)
2675 {
2676   assert(f != NULL);
2677 
2678   return f->name;
2679 }
2680 
2681 /*----------------------------------------------------------------------------*/
2682 /*!
2683  * \brief Ensure that data is read or written in big-endian
2684  * (network standard) format.
2685  *
2686  * \param[in, out]  f  cs_file_t descriptor
2687  */
2688 /*----------------------------------------------------------------------------*/
2689 
2690 void
cs_file_set_big_endian(cs_file_t * f)2691 cs_file_set_big_endian(cs_file_t  *f)
2692 {
2693   unsigned  int_endian;
2694 
2695   /* Check if system is "big-endian" or "little-endian" */
2696 
2697   int_endian = 0;
2698   *((char *)(&int_endian)) = '\1';
2699 
2700   if (int_endian == 1)
2701     f->swap_endian = 1;
2702 
2703 #if defined(DEBUG) && !defined(NDEBUG)
2704 
2705   else {
2706     int_endian = 0;
2707     *((char *) (&int_endian) + sizeof(unsigned) - 1) = '\1';
2708     assert(int_endian == 1);
2709   }
2710 
2711 #endif
2712 }
2713 
2714 /*----------------------------------------------------------------------------*/
2715 /*!
2716  * \brief Return a file's byte-swapping behavior.
2717  *
2718  * \param[in]  f  cs_file_t descriptor
2719  *
2720  * \return 0 if file's endianness is the same as the system's, 1 otherwise.
2721  */
2722 /*----------------------------------------------------------------------------*/
2723 
2724 int
cs_file_get_swap_endian(const cs_file_t * f)2725 cs_file_get_swap_endian(const cs_file_t  *f)
2726 {
2727   assert(f != NULL);
2728 
2729   return f->swap_endian;
2730 }
2731 
2732 /*----------------------------------------------------------------------------*/
2733 /*!
2734  * \brief Set a file's byte-swapping behavior.
2735  *
2736  * \param[in, out]  f     cs_file_t descriptor
2737  * \param[in]       swap  1 if bytes must be swapped, 0 otherwise
2738  */
2739 /*----------------------------------------------------------------------------*/
2740 
2741 void
cs_file_set_swap_endian(cs_file_t * f,int swap)2742 cs_file_set_swap_endian(cs_file_t  *f,
2743                         int         swap)
2744 {
2745   assert(f != NULL);
2746 
2747   f->swap_endian = swap;
2748 }
2749 
2750 /*----------------------------------------------------------------------------*/
2751 /*!
2752  * \brief Read global data from a file, distributing it to all processes
2753  * associated with that file.
2754  *
2755  * \param[in]  f     cs_file_t descriptor
2756  * \param[out] buf   pointer to location receiving data
2757  * \param[in]  size  size of each item of data in bytes
2758  * \param[in]  ni    number of items to read
2759  *
2760  * \return the number of items (not bytes) sucessfully read;
2761  *         currently, errors are fatal.
2762  */
2763 /*----------------------------------------------------------------------------*/
2764 
2765 size_t
cs_file_read_global(cs_file_t * f,void * buf,size_t size,size_t ni)2766 cs_file_read_global(cs_file_t  *f,
2767                     void       *buf,
2768                     size_t      size,
2769                     size_t      ni)
2770 {
2771   size_t retval = 0;
2772 
2773   if (f->method <= CS_FILE_STDIO_PARALLEL) {
2774     if (f->rank == 0) {
2775       if (_file_seek(f, f->offset, CS_FILE_SEEK_SET) == 0)
2776         retval = _file_read(f, buf, size, ni);
2777     }
2778   }
2779 
2780 #if defined(HAVE_MPI_IO)
2781 
2782   else if ((f->method > CS_FILE_STDIO_PARALLEL)) {
2783 
2784     MPI_Status status;
2785     int errcode = MPI_SUCCESS, count = 0;
2786 
2787     if (_mpi_io_positioning == CS_FILE_MPI_EXPLICIT_OFFSETS) {
2788       if (f->rank == 0) {
2789         errcode = MPI_File_read_at(f->fh,
2790                                    f->offset,
2791                                    buf,
2792                                    size*ni,
2793                                    MPI_BYTE,
2794                                    &status);
2795         MPI_Get_count(&status, MPI_BYTE, &count);
2796       }
2797     }
2798 
2799     else {
2800       MPI_Datatype file_type;
2801       MPI_Aint disps[1];
2802       int lengths[1];
2803       char datarep[] = "native";
2804       lengths[0] = ni * size;
2805       disps[0] = 0;
2806       MPI_Type_create_hindexed(1, lengths, disps, MPI_BYTE, &file_type);
2807       MPI_Type_commit(&file_type);
2808       MPI_File_set_view(f->fh, f->offset, MPI_BYTE, file_type,
2809                         datarep, f->info);
2810       if (f->rank == 0) {
2811         errcode = MPI_File_read(f->fh, buf, size*ni, MPI_BYTE, &status);
2812         MPI_Get_count(&status, MPI_BYTE, &count);
2813       }
2814       MPI_Type_free(&file_type);
2815     }
2816 
2817     if (errcode != MPI_SUCCESS)
2818       _mpi_io_error_message(f->name, errcode);
2819 
2820     retval = count / size;
2821 
2822   }
2823 
2824 #endif /* defined(HAVE_MPI_IO) */
2825 
2826 #if defined(HAVE_MPI)
2827   if (f->comm != MPI_COMM_NULL) {
2828     long _retval = retval;
2829     MPI_Bcast(buf, size*ni, MPI_BYTE, 0, f->comm);
2830     MPI_Bcast(&_retval, 1, MPI_LONG, 0, f->comm);
2831     retval = _retval;
2832   }
2833 #endif
2834 
2835   /* Update offset */
2836 
2837   f->offset += (cs_file_off_t)ni * (cs_file_off_t)size;
2838 
2839   if (f->swap_endian == true && size > 1)
2840     _swap_endian(buf, buf, size, retval);
2841 
2842   return retval;
2843 }
2844 
2845 /*----------------------------------------------------------------------------*/
2846 /*!
2847  * \brief Write global data to a file.
2848  *
2849  * Under MPI, data is only written by the associated communicator's root
2850  * rank. The buffers on other ranks are ignored, though the file offset
2851  * is updated (i.e. the call to this function is collective).
2852  *
2853  * \param[in]  f     cs_file_t descriptor
2854  * \param[in]  buf   pointer to location containing data
2855  * \param[in]  size  size of each item of data in bytes
2856  * \param[in]  ni    number of items to write
2857  *
2858  * \return the number of items (not bytes) sucessfully written;
2859  *         currently, errors are fatal.
2860  */
2861 /*----------------------------------------------------------------------------*/
2862 
2863 size_t
cs_file_write_global(cs_file_t * f,const void * buf,size_t size,size_t ni)2864 cs_file_write_global(cs_file_t   *f,
2865                      const void  *buf,
2866                      size_t       size,
2867                      size_t       ni)
2868 {
2869   size_t retval = ni;
2870 
2871   unsigned char _copybuf[1024];
2872   unsigned char *copybuf = _copybuf;
2873   const void *_buf = buf;
2874 
2875   /* Copy contents to ensure buffer constedness if necessary */
2876 
2877   if (   f->rank == 0
2878       && (   (f->swap_endian == true && size > 1)
2879           || (f->method > CS_FILE_STDIO_PARALLEL))) {
2880 
2881     if (size*ni > sizeof(_copybuf))
2882       BFT_MALLOC(copybuf, size*ni, unsigned char);
2883     memcpy(copybuf, buf, size*ni);
2884 
2885     if (f->swap_endian == true && size > 1)
2886       _swap_endian(copybuf, copybuf, size, ni);
2887 
2888     _buf = copybuf;
2889   }
2890 
2891   if (f->rank == 0 && f->sh != NULL && f->method <= CS_FILE_STDIO_PARALLEL) {
2892     if (f->method == CS_FILE_STDIO_PARALLEL) {
2893       if (_file_seek(f, f->offset, CS_FILE_SEEK_SET) != 0)
2894         retval = 0;
2895     }
2896     if (retval != 0)
2897       retval = _file_write(f, _buf, size, ni);
2898   }
2899 
2900 #if defined(HAVE_MPI_IO)
2901 
2902   else if ((f->method > CS_FILE_STDIO_PARALLEL)) {
2903 
2904     MPI_Status status;
2905     int errcode = MPI_SUCCESS, count = 0;
2906 
2907     if (_mpi_io_positioning == CS_FILE_MPI_EXPLICIT_OFFSETS) {
2908       if (f->rank == 0) {
2909         errcode = MPI_File_write_at(f->fh,
2910                                     f->offset,
2911                                     copybuf,
2912                                     size*ni,
2913                                     MPI_BYTE,
2914                                     &status);
2915         MPI_Get_count(&status, MPI_BYTE, &count);
2916       }
2917     }
2918 
2919     else {
2920       MPI_Datatype file_type;
2921       MPI_Aint disps[1];
2922       int lengths[1];
2923       char datarep[] = "native";
2924       lengths[0] = ni * size;
2925       disps[0] = 0;
2926       MPI_Type_create_hindexed(1, lengths, disps, MPI_BYTE, &file_type);
2927       MPI_Type_commit(&file_type);
2928       MPI_File_set_view(f->fh, f->offset, MPI_BYTE,
2929                         file_type, datarep, f->info);
2930       if (f->rank == 0) {
2931         errcode = MPI_File_write(f->fh,
2932                                  copybuf,
2933                                  size*ni,
2934                                  MPI_BYTE,
2935                                  &status);
2936         MPI_Get_count(&status, MPI_BYTE, &count);
2937       }
2938       MPI_Type_free(&file_type);
2939     }
2940 
2941     if (errcode != MPI_SUCCESS)
2942       _mpi_io_error_message(f->name, errcode);
2943 
2944     retval = count / size;
2945 
2946   }
2947 
2948 #endif /* defined(HAVE_MPI_IO) */
2949 
2950   if (copybuf != _copybuf) /* Free allocated memory if necessary */
2951     BFT_FREE(copybuf);
2952 
2953 #if defined(HAVE_MPI)
2954   if (f->comm != MPI_COMM_NULL) {
2955     long _retval = retval;
2956     MPI_Bcast(&_retval, 1, MPI_LONG, 0, f->comm);
2957     retval = _retval;
2958   }
2959 #endif
2960 
2961   /* Update offset */
2962 
2963   f->offset += (cs_file_off_t)ni * (cs_file_off_t)size;
2964 
2965   return retval;
2966 }
2967 
2968 /*----------------------------------------------------------------------------*/
2969 /*!
2970  * \brief Read data to a buffer, distributing a contiguous part of it to each
2971  * process associated with a file.
2972  *
2973  * Each process should receive a (possibly empty) block of the data,
2974  * and we should have:
2975  *   global_num_start at rank 0 = 1
2976  *   global_num_start at rank i+1 = global_num_end at rank i.
2977  * Otherwise, behavior (especially positioning for future reads) is undefined.
2978  *
2979  * \param[in]  f                 cs_file_t descriptor
2980  * \param[out] buf               pointer to location receiving data
2981  * \param[in]  size              size of each item of data in bytes
2982  * \param[in]  stride            number of (interlaced) values per block item
2983  * \param[in]  global_num_start  global number of first block item
2984  *                               (1 to n numbering)
2985  * \param[in]  global_num_end    global number of past-the end block item
2986  *                               (1 to n numbering)
2987  *
2988  * \return the (local) number of items (not bytes) sucessfully read;
2989  *         currently, errors are fatal.
2990  */
2991 /*----------------------------------------------------------------------------*/
2992 
2993 size_t
cs_file_read_block(cs_file_t * f,void * buf,size_t size,size_t stride,cs_gnum_t global_num_start,cs_gnum_t global_num_end)2994 cs_file_read_block(cs_file_t  *f,
2995                    void       *buf,
2996                    size_t      size,
2997                    size_t      stride,
2998                    cs_gnum_t   global_num_start,
2999                    cs_gnum_t   global_num_end)
3000 {
3001   size_t retval = 0;
3002 
3003   cs_gnum_t global_num_end_last = global_num_end;
3004 
3005   cs_gnum_t _global_num_start = (global_num_start-1)*stride + 1;
3006   cs_gnum_t _global_num_end = (global_num_end-1)*stride + 1;
3007 
3008   if (_global_num_end < _global_num_start)
3009     _global_num_end = _global_num_start;
3010 
3011   void *_buf = buf;
3012 
3013 #if defined(HAVE_MPI)
3014   if (f->rank_step > 1)
3015     _buf = _gather_block_sizes(f,
3016                                size,
3017                                _global_num_start,
3018                                &_global_num_end);
3019 #endif
3020 
3021   assert(global_num_end >= global_num_start);
3022 
3023   switch(f->method) {
3024 
3025   case CS_FILE_STDIO_SERIAL:
3026     retval = _file_read_block_s(f,
3027                                 _buf,
3028                                 size,
3029                                 _global_num_start,
3030                                 _global_num_end);
3031     break;
3032 
3033   case CS_FILE_STDIO_PARALLEL:
3034     retval = _file_read_block_p(f,
3035                                 _buf,
3036                                 size,
3037                                 _global_num_start,
3038                                 _global_num_end);
3039     break;
3040 
3041 #if defined(HAVE_MPI_IO)
3042 
3043   case CS_FILE_MPI_INDEPENDENT:
3044   case CS_FILE_MPI_NON_COLLECTIVE:
3045     retval = _mpi_file_read_block_noncoll(f,
3046                                           _buf,
3047                                           size,
3048                                           _global_num_start,
3049                                           _global_num_end);
3050     break;
3051 
3052   case CS_FILE_MPI_COLLECTIVE:
3053 
3054     if (_mpi_io_positioning == CS_FILE_MPI_EXPLICIT_OFFSETS)
3055       retval = _mpi_file_read_block_eo(f,
3056                                        _buf,
3057                                        size,
3058                                        _global_num_start,
3059                                        _global_num_end);
3060     else
3061       retval = _mpi_file_read_block_ip(f,
3062                                        _buf,
3063                                        size,
3064                                        _global_num_start,
3065                                        _global_num_end);
3066     break;
3067 
3068 #endif /* defined(HAVE_MPI_IO) */
3069 
3070   default:
3071     assert(0);
3072   }
3073 
3074   /* Update offset */
3075 
3076   assert(f->rank > 0 || global_num_start == 1);
3077 
3078 #if defined(HAVE_MPI)
3079   if (f->n_ranks > 1)
3080     MPI_Bcast(&global_num_end_last, 1, CS_MPI_GNUM, f->n_ranks-1, f->comm);
3081 #endif
3082 
3083   f->offset += ((global_num_end_last - 1) * size * stride);
3084 
3085 #if defined(HAVE_MPI)
3086   if (f->rank_step > 1) {
3087     retval = _scatter_blocks(f, _buf, buf, size);
3088     if (_buf != buf)
3089       BFT_FREE(_buf);
3090   }
3091 #endif
3092 
3093   if (f->swap_endian == true && size > 1)
3094     _swap_endian(buf, buf, size, retval);
3095 
3096   return retval;
3097 }
3098 
3099 /*----------------------------------------------------------------------------*/
3100 /*!
3101  * \brief Write data to a file, each associated process providing a
3102  * contiguous part of this data.
3103  *
3104  * Each process should provide a (possibly empty) block of the data,
3105  * and we should have:
3106  *   global_num_start at rank 0 = 1
3107  *   global_num_start at rank i+1 = global_num_end at rank i.
3108  * Otherwise, behavior (especially positioning for future reads) is undefined.
3109  *
3110  * This function may require an internal copy of the data to ensure that
3111  * the buffer contents are not modified, so if the buffer contents are
3112  * temporary values, to be deleted after writing, using
3113  * cs_file_write_block_buffer() instead may be used to avoid an unneeded
3114  * memory allocation and copy.
3115  *
3116  * \param[in]  f                 cs_file_t descriptor
3117  * \param[in]  buf               pointer to location containing data
3118  * \param[in]  size              size of each item of data in bytes
3119  * \param[in]  stride            number of (interlaced) values per block item
3120  * \param[in]  global_num_start  global number of first block item
3121  *                               (1 to n numbering)
3122  * \param[in]  global_num_end    global number of past-the end block item
3123  *                               (1 to n numbering)
3124  *
3125  * \return the (local) number of items (not bytes) sucessfully written;
3126  *         currently, errors are fatal.
3127  */
3128 /*----------------------------------------------------------------------------*/
3129 
3130 size_t
cs_file_write_block(cs_file_t * f,const void * buf,size_t size,size_t stride,cs_gnum_t global_num_start,cs_gnum_t global_num_end)3131 cs_file_write_block(cs_file_t   *f,
3132                     const void  *buf,
3133                     size_t       size,
3134                     size_t       stride,
3135                     cs_gnum_t    global_num_start,
3136                     cs_gnum_t    global_num_end)
3137 {
3138   size_t retval = 0;
3139 
3140   const size_t bufsize = (global_num_end - global_num_start)*stride*size;
3141 
3142   /* Copy contents to ensure buffer constedness if necessary */
3143 
3144   bool direct_w = true;
3145 
3146   if (f->swap_endian == true && size > 1)
3147     direct_w = false;
3148 
3149 #if defined(HAVE_MPI)
3150   if (f->n_ranks > 1) {
3151     if (f->rank_step > 1 || f->method != CS_FILE_STDIO_PARALLEL)
3152       direct_w = false;
3153   }
3154 #endif
3155 
3156   if (direct_w == false) {
3157 
3158     unsigned char *copybuf = NULL;
3159 
3160     BFT_MALLOC(copybuf, bufsize, unsigned char);
3161 
3162     if (copybuf != NULL)
3163       memcpy(copybuf, buf, bufsize);
3164 
3165     retval = cs_file_write_block_buffer(f,
3166                                         copybuf,
3167                                         size,
3168                                         stride,
3169                                         global_num_start,
3170                                         global_num_end);
3171 
3172     BFT_FREE(copybuf);
3173   }
3174 
3175   /* Using Standard IO with no byte-swapping or serialization, write directly */
3176 
3177   else {
3178 
3179     cs_gnum_t global_num_end_last = global_num_end;
3180 
3181     const cs_gnum_t _global_num_start = (global_num_start-1)*stride + 1;
3182     const cs_gnum_t _global_num_end = (global_num_end-1)*stride + 1;
3183 
3184     if (_global_num_end > _global_num_start) {
3185 
3186       if (f->sh == NULL)
3187         _file_open(f);
3188 
3189       retval = _file_write(f,
3190                            buf,
3191                            size,
3192                            (_global_num_end - _global_num_start));
3193 
3194     }
3195 
3196     /* Update offset */
3197 
3198 #if defined(HAVE_MPI)
3199     if (f->n_ranks > 1)
3200       MPI_Bcast(&global_num_end_last, 1, CS_MPI_GNUM, f->n_ranks-1, f->comm);
3201 #endif
3202 
3203     f->offset += ((global_num_end_last - 1) * size * stride);
3204 
3205   }
3206 
3207   return retval;
3208 }
3209 
3210 /*----------------------------------------------------------------------------*/
3211 /*!
3212  * \brief Write data to a file, each associated process providing a
3213  * contiguous part of this data.
3214  *
3215  * Each process should provide a (possibly empty) block of the data,
3216  * and we should have:
3217  *   global_num_start at rank 0 = 1
3218  *   global_num_start at rank i+1 = global_num_end at rank i.
3219  * Otherwise, behavior (especially positioning for future reads) is undefined.
3220  *
3221  * This function is intended to be used mainly data that is already a
3222  * copy of original data (such as data that has been redistributed across
3223  * processors just for the sake of output), or that is to be deleted after
3224  * writing, so it may modify the values in its input buffer (notably to
3225  * convert from little-endian to big-endian of vice-versa if necessary).
3226  *
3227  * \param[in]  f                 cs_file_t descriptor
3228  * \param[in, out]  buf          pointer to location containing data
3229  * \param[in]  size              size of each item of data in bytes
3230  * \param[in]  stride            number of (interlaced) values per block item
3231  * \param[in]  global_num_start  global number of first block item
3232  *                               (1 to n numbering)
3233  * \param[in]  global_num_end    global number of past-the end block item
3234  *                               (1 to n numbering)
3235  *
3236  * \return the (local) number of items (not bytes) sucessfully written;
3237  *         currently, errors are fatal.
3238  */
3239 /*----------------------------------------------------------------------------*/
3240 
3241 size_t
cs_file_write_block_buffer(cs_file_t * f,void * buf,size_t size,size_t stride,cs_gnum_t global_num_start,cs_gnum_t global_num_end)3242 cs_file_write_block_buffer(cs_file_t  *f,
3243                            void       *buf,
3244                            size_t      size,
3245                            size_t      stride,
3246                            cs_gnum_t   global_num_start,
3247                            cs_gnum_t   global_num_end)
3248 {
3249   size_t retval = 0;
3250 
3251   cs_gnum_t global_num_end_last = global_num_end;
3252 
3253   cs_gnum_t _global_num_start = (global_num_start-1)*stride + 1;
3254   cs_gnum_t _global_num_end = (global_num_end-1)*stride + 1;
3255 
3256   if (_global_num_end < _global_num_start)
3257     _global_num_end = _global_num_start;
3258 
3259   void *_buf = buf;
3260 
3261   /* Swap bytes prior to writing if necessary */
3262 
3263   if (f->swap_endian == true && size > 1)
3264     _swap_endian(buf,
3265                  buf,
3266                  size,
3267                  (_global_num_end - _global_num_start));
3268 
3269 #if defined(HAVE_MPI)
3270    if (f->rank_step > 1)
3271     _buf = _gather_blocks(f, buf, size, _global_num_start,
3272                           &_global_num_end);
3273 #endif
3274 
3275   /* Write to file using chosen method */
3276 
3277   switch(f->method) {
3278 
3279   case CS_FILE_STDIO_SERIAL:
3280     retval = _file_write_block_s(f,
3281                                  _buf,
3282                                  size,
3283                                  _global_num_start,
3284                                  _global_num_end);
3285     break;
3286 
3287   case CS_FILE_STDIO_PARALLEL:
3288     retval = _file_write_block_p(f,
3289                                  _buf,
3290                                  size,
3291                                  _global_num_start,
3292                                  _global_num_end);
3293     break;
3294 
3295 #if defined(HAVE_MPI_IO)
3296 
3297   case CS_FILE_MPI_INDEPENDENT:
3298   case CS_FILE_MPI_NON_COLLECTIVE:
3299       retval = _mpi_file_write_block_noncoll(f,
3300                                              _buf,
3301                                              size,
3302                                              _global_num_start,
3303                                              _global_num_end);
3304       break;
3305 
3306   case CS_FILE_MPI_COLLECTIVE:
3307     if (_mpi_io_positioning == CS_FILE_MPI_EXPLICIT_OFFSETS)
3308       retval = _mpi_file_write_block_eo(f,
3309                                         _buf,
3310                                         size,
3311                                         _global_num_start,
3312                                         _global_num_end);
3313     else
3314       retval = _mpi_file_write_block_ip(f,
3315                                         _buf,
3316                                         size,
3317                                         _global_num_start,
3318                                         _global_num_end);
3319     break;
3320 
3321 #endif /* defined(HAVE_MPI_IO) */
3322 
3323   default:
3324     assert(0);
3325   }
3326 
3327 #if defined(HAVE_MPI)
3328   if (f->rank_step > 1) {
3329     if (f->rank % f->rank_step == 0) {
3330       /* Check for inconsistent read sizes */
3331       int rank_end = f->rank + f->rank_step;
3332       if (rank_end >= f->n_ranks)
3333         rank_end = f->n_ranks;
3334       int n_aggr = rank_end - f->rank;
3335       cs_gnum_t retval_cmp = 0;
3336       for (int i = 0; i < n_aggr; i++)
3337         retval_cmp += f->block_size[i];
3338       if (retval_cmp != retval)  /* Error in this case */
3339         f->block_size[0] = retval_cmp;
3340     }
3341     retval = f->block_size[0];
3342     if (_buf != buf)
3343       BFT_FREE(_buf);
3344   }
3345 
3346   /* Update offset */
3347 
3348   if (f->n_ranks > 1)
3349     MPI_Bcast(&global_num_end_last, 1, CS_MPI_GNUM, f->n_ranks-1, f->comm);
3350 #endif
3351 
3352   f->offset += ((global_num_end_last - 1) * size * stride);
3353 
3354   return retval;
3355 }
3356 
3357 /*----------------------------------------------------------------------------*/
3358 /*!
3359  * \brief Update the file pointer according to whence.
3360  *
3361  * \param[in, out]  f       cs_file_t descriptor
3362  * \param[in]       offset  add to position specified to whence to obtain
3363  *                          new position, measured in characters from the
3364  *                          beginning of the file
3365  * \param[in]       whence  beginning if CS_FILE_SEEK_SET,
3366  *                          current if CS_FILE_SEEK_CUR,
3367  *                          or end-of-file if CS_FILE_SEEK_END
3368  *
3369  * \return 0 upon success, nonzero otherwise; currently, errors are fatal.
3370  */
3371 /*----------------------------------------------------------------------------*/
3372 
3373 int
cs_file_seek(cs_file_t * f,cs_file_off_t offset,cs_file_seek_t whence)3374 cs_file_seek(cs_file_t       *f,
3375              cs_file_off_t    offset,
3376              cs_file_seek_t   whence)
3377 {
3378   int retval = 0;
3379 
3380   /* Always update f->offset, regardless of mode */
3381 
3382   switch(whence) {
3383 
3384   case CS_FILE_SEEK_SET:
3385 
3386     f->offset = offset;
3387     break;
3388 
3389   case CS_FILE_SEEK_CUR:
3390 
3391     f->offset += offset;
3392     break;
3393 
3394   case CS_FILE_SEEK_END:
3395 
3396     if (f->sh != NULL)
3397       f->offset = cs_file_tell(f) + offset;
3398 
3399 #if defined(HAVE_MPI_IO)
3400     if (f->fh != MPI_FILE_NULL) {
3401       MPI_Offset f_size = 0;
3402       retval = MPI_File_get_size(f->fh, &f_size);
3403       f->offset = f_size + offset;
3404     }
3405 #endif
3406 
3407 #if defined(HAVE_MPI)
3408   if (f->comm != MPI_COMM_NULL) {
3409 #if defined(MPI_LONG_LONG)
3410     long long offset_g;
3411     long long offset_l = f->offset;
3412     MPI_Datatype  _mpi_datatype_offset = MPI_LONG_LONG;
3413 #else
3414     long offset_g;
3415     long offset_l = f->offset;
3416     MPI_Datatype  _mpi_datatype_offset = MPI_LONG_INT;
3417 #endif
3418     MPI_Allreduce(&offset_l, &offset_g, 1, _mpi_datatype_offset, MPI_MAX,
3419                   f->comm);
3420     f->offset = offset_g;
3421   }
3422 #endif
3423 
3424   break;
3425   }
3426 
3427   /* Now update actual file position */
3428 
3429   if (f->sh != NULL)
3430       retval = _file_seek(f, offset, whence);
3431 
3432 #if defined(HAVE_MPI_IO)
3433 
3434   else if (   f->fh != MPI_FILE_NULL
3435            && _mpi_io_positioning == CS_FILE_MPI_INDIVIDUAL_POINTERS) {
3436 
3437     retval = MPI_File_seek(f->fh, f->offset, MPI_SEEK_SET);
3438 
3439     if (retval != MPI_SUCCESS)
3440       _mpi_io_error_message(f->name, retval);
3441 
3442   }
3443 
3444 #endif /* defined(HAVE_MPI_IO) */
3445 
3446   return retval;
3447 }
3448 
3449 /*----------------------------------------------------------------------------*/
3450 /*!
3451  * \brief Return the position of the file pointer.
3452  *
3453  * In parallel, we consider the file pointer to be equal to the highest
3454  * value of the individual file pointers.
3455  *
3456  * \param[in]  f  cs_file_t descriptor
3457  *
3458  * \return current position of the file pointer.
3459  */
3460 /*----------------------------------------------------------------------------*/
3461 
3462 cs_file_off_t
cs_file_tell(cs_file_t * f)3463 cs_file_tell(cs_file_t  *f)
3464 {
3465   cs_file_off_t retval = f->offset;
3466 
3467   if (f->method == CS_FILE_STDIO_SERIAL && f->rank == 0 && f->sh != NULL)
3468     retval = _file_tell(f);
3469 
3470 #if defined(HAVE_MPI)
3471   if (f->comm != MPI_COMM_NULL) {
3472 #if defined(MPI_LONG_LONG)
3473     long long _offset = retval;
3474     MPI_Datatype  _mpi_datatype_offset = MPI_LONG_LONG;
3475 #else
3476     long _offset = retval;
3477     MPI_Datatype  _mpi_datatype_offset = MPI_LONG_INT;
3478 #endif
3479     MPI_Bcast(&_offset, 1, _mpi_datatype_offset, 0, f->comm);
3480     retval = _offset;
3481   }
3482 #endif
3483 
3484   /*
3485     Note that in case of individual file pointers, using
3486     MPI_File_get_position() and MPI_File_get_byte_offset() should also
3487     work, but fail after certain collective writes with some processes
3488     writing zero values (at least on Open MPI 1.2.6), so we prefer to keep
3489     track of the global offset (which we need for seeking or views anyways).
3490   */
3491 
3492   return retval;
3493 }
3494 
3495 /*----------------------------------------------------------------------------*/
3496 /*!
3497  * \brief Formatted input from a text file (as fgets()).
3498  *
3499  * \param [out]      s     buffer to which string is to be read.
3500  * \param [in]       size  maximum number of characters to be read plus one.
3501  * \param [in]       f     ecs_file_t descriptor.
3502  * \param [in, out]  line  file line number if available, or NULL.
3503  *
3504  * \return s on success, NULL on error or when end of file occurs and
3505  *         no characters have been read.
3506  */
3507 /*----------------------------------------------------------------------------*/
3508 
3509 char *
cs_file_gets(char * s,const int size,const cs_file_t * f,int * line)3510 cs_file_gets(char             *s,
3511              const int         size,
3512              const cs_file_t  *f,
3513              int              *line)
3514 {
3515   return _cs_file_gets(s, size, f, line, 0);
3516 }
3517 
3518 /*----------------------------------------------------------------------------*/
3519 /*!
3520  * \brief Formatted input from a text file if possible (as fgets()).
3521  *
3522  * This function is similar to cs_file_gets(), but failure to read
3523  * a line due to an end-of-file condition is not considered an error with
3524  * this variant, which may be used to read text files or sections thereof
3525  * of unknown length.
3526  *
3527  * \param [out]      s     buffer to which string is to be read.
3528  * \param [in]       size  maximum number of characters to be read plus one.
3529  * \param [in]       f     cs_file_t descriptor.
3530  * \param [in, out]  line  file line number if available, or NULL.
3531  *
3532  * \return s on success, NULL on error or when end of file occurs and
3533  *         no characters have been read.
3534  */
3535 /*----------------------------------------------------------------------------*/
3536 
3537 char *
cs_file_gets_try(char * s,const int size,const cs_file_t * f,int * line)3538 cs_file_gets_try(char             *s,
3539                  const int         size,
3540                  const cs_file_t  *f,
3541                  int              *line)
3542 {
3543   return _cs_file_gets(s, size, f, line, 1);
3544 }
3545 
3546 /*----------------------------------------------------------------------------*/
3547 /*!
3548  * \brief Dump the metadata of a file structure in human readable form.
3549  *
3550  * \param[in]  f  cs_file_t descriptor
3551  */
3552 /*----------------------------------------------------------------------------*/
3553 
3554 void
cs_file_dump(const cs_file_t * f)3555 cs_file_dump(const cs_file_t  *f)
3556 {
3557   const char *mode_name[] = {"CS_FILE_MODE_READ",
3558                              "CS_FILE_MODE_WRITE",
3559                              "CS_FILE_MODE_APPEND"};
3560   const char *access_name[] = {"CS_FILE_STDIO_SERIAL",
3561                                "CS_FILE_STDIO_PARALLEL",
3562                                "CS_FILE_MPI_INDEPENDENT",
3563                                "CS_FILE_MPI_NON_COLLECTIVE",
3564                                "CS_FILE_MPI_COLLECTIVE"};
3565 
3566   if (f == NULL) {
3567     bft_printf("\n"
3568                "Null file dump:\n");
3569     return;
3570   }
3571 
3572 #if defined(HAVE_MPI)
3573   bft_printf("\n"
3574              "File name:                   \"%s\"\n"
3575              "Access mode:                 %s\n"
3576              "Access method:               %s\n"
3577              "Rank:                        %d\n"
3578              "N ranks:                     %d\n"
3579              "rank step:                   %d\n"
3580              "Swap endian:                 %d\n"
3581              "Serial handle:               %p\n",
3582              f->name, mode_name[f->mode], access_name[f->method-1],
3583              f->rank, f->n_ranks, f->rank_step, (int)(f->swap_endian),
3584              (const void *)f->sh);
3585 #else
3586   bft_printf("\n"
3587              "File name:                   \"%s\"\n"
3588              "Access mode:                 %s\n"
3589              "Access method:               %s\n"
3590              "Rank:                        %d\n"
3591              "N ranks:                     %d\n"
3592              "Swap endian:                 %d\n"
3593              "Serial handle:               %p\n",
3594              f->name, mode_name[f->mode], access_name[f->method-1],
3595              f->rank, f->n_ranks, (int)(f->swap_endian), (const void *)f->sh);
3596 #endif
3597 
3598 #if defined(HAVE_MPI)
3599   bft_printf("Associated io communicator:  %llu\n",
3600              (unsigned long long)(f->io_comm));
3601   bft_printf("Associated communicator:     %llu\n",
3602              (unsigned long long)(f->comm));
3603 #if defined(HAVE_MPI_IO)
3604   bft_printf("MPI file handle:             %llu\n"
3605              "MPI file offset:             %llu\n",
3606              (unsigned long long)(f->fh),
3607              (unsigned long long)(f->offset));
3608 #endif
3609 #endif
3610 
3611   bft_printf("\n");
3612 }
3613 
3614 /*----------------------------------------------------------------------------*/
3615 /*!
3616  * \brief Free the default options for file access.
3617  */
3618 /*----------------------------------------------------------------------------*/
3619 
3620 void
cs_file_free_defaults(void)3621 cs_file_free_defaults(void)
3622 {
3623   _mpi_io_positioning = CS_FILE_MPI_EXPLICIT_OFFSETS;
3624 
3625   _default_access_r = CS_FILE_DEFAULT;
3626   _default_access_w = CS_FILE_DEFAULT;
3627 
3628   /* Communicator and hints used for file operations */
3629 
3630 #if defined(HAVE_MPI)
3631   _mpi_defaults_are_set = false;
3632   _mpi_rank_step = 1;
3633   _mpi_comm = MPI_COMM_NULL;
3634 
3635   if (_mpi_io_comm != MPI_COMM_NULL) {
3636     MPI_Comm_free(&_mpi_io_comm);
3637     _mpi_io_comm = MPI_COMM_NULL;
3638   }
3639 #endif
3640 
3641 #if defined(HAVE_MPI_IO)
3642 #  if MPI_VERSION > 1
3643   if (_mpi_io_hints_r != MPI_INFO_NULL)
3644     MPI_Info_free(&_mpi_io_hints_r);
3645   if (_mpi_io_hints_w != MPI_INFO_NULL)
3646     MPI_Info_free(&_mpi_io_hints_w);
3647 #  endif /* MPI_VERSION > 1 */
3648 #endif /* defined(HAVE_MPI_IO) */
3649 }
3650 
3651 #if defined(HAVE_MPI)
3652 
3653 /*----------------------------------------------------------------------------*/
3654 /*!
3655  * \brief Get the default options for file access.
3656  *
3657  * \param[in]    mode    file mode for which the default is queried
3658  *                       (write and append use the same method, and are
3659  *                       interchangeable here)
3660  * \param[out]   method  default file access method, or NULL
3661  * \param[out]   hints   MPI-IO hints, or NULL
3662  */
3663 /*----------------------------------------------------------------------------*/
3664 
3665 void
cs_file_get_default_access(cs_file_mode_t mode,cs_file_access_t * method,MPI_Info * hints)3666 cs_file_get_default_access(cs_file_mode_t     mode,
3667                            cs_file_access_t  *method,
3668                            MPI_Info          *hints)
3669 {
3670   if (mode == CS_FILE_MODE_READ) {
3671     if (method != NULL)
3672       *method = _access_method(_default_access_r, false);
3673     if (hints != NULL)
3674       *hints = _mpi_io_hints_r;
3675   }
3676   else {
3677     if (method != NULL)
3678       *method = _access_method(_default_access_w, true);
3679     if (hints != NULL)
3680       *hints = _mpi_io_hints_w;
3681   }
3682 }
3683 
3684 #else /* if !defined(HAVE_MPI) */
3685 
3686 /*----------------------------------------------------------------------------*/
3687 /*!
3688  * \brief Get the default options for file access.
3689  *
3690  * \param[in]    mode    file mode for which the default is queried
3691  *                       (write and append use the same method, and are
3692  *                       interchangeable here)
3693  * \param[out]   method  default file access method, or NULL
3694  */
3695 /*----------------------------------------------------------------------------*/
3696 
3697 void
cs_file_get_default_access(cs_file_mode_t mode,cs_file_access_t * method)3698 cs_file_get_default_access(cs_file_mode_t     mode,
3699                            cs_file_access_t  *method)
3700 {
3701   if (mode == CS_FILE_MODE_READ) {
3702     if (method != NULL)
3703       *method = _access_method(_default_access_r, false);
3704   }
3705   else {
3706     if (method != NULL)
3707       *method = _access_method(_default_access_w, true);
3708   }
3709 }
3710 
3711 #endif /* defined(HAVE_MPI) */
3712 
3713 #if defined(HAVE_MPI)
3714 
3715 /*----------------------------------------------------------------------------*/
3716 /*!
3717  * \brief Set the default options for file access.
3718  *
3719  * If the method given contains incompatible values, such as when setting
3720  * MPI-IO methods when MPI-IO is not available, a "reasonable" default
3721  * is used instead.
3722  *
3723  * \param[in]  mode       file mode for which the default is being set
3724  *                        (write and append use the same method, and are
3725  *                        interchangeable here)
3726  * \param[in]  method     default access method to set
3727  * \param[in]  hints      MPI-IO hints, or MPI_INFO_NULL
3728  */
3729 /*----------------------------------------------------------------------------*/
3730 
3731 void
cs_file_set_default_access(cs_file_mode_t mode,cs_file_access_t method,MPI_Info hints)3732 cs_file_set_default_access(cs_file_mode_t    mode,
3733                            cs_file_access_t  method,
3734                            MPI_Info          hints)
3735 {
3736   cs_file_access_t  _method;
3737 
3738   if (mode == CS_FILE_MODE_READ) {
3739     _method = _access_method(method, false);
3740     _default_access_r = _method;
3741   }
3742   else { /* if (mode == CS_FILE_MODE_WRITE || mode == CS_FILE_MODE_APPEND) */
3743     _method = _access_method(method, true);
3744     _default_access_w = _method;
3745   }
3746 
3747 #if defined(HAVE_MPI_IO)
3748 #  if MPI_VERSION > 1
3749 
3750   /* Free previous info objects */
3751 
3752   if (mode == CS_FILE_MODE_READ && _mpi_io_hints_r != MPI_INFO_NULL)
3753     MPI_Info_free(&_mpi_io_hints_r);
3754   else if (    (mode == CS_FILE_MODE_WRITE || mode == CS_FILE_MODE_APPEND)
3755            && _mpi_io_hints_w != MPI_INFO_NULL)
3756     MPI_Info_free(&_mpi_io_hints_w);
3757 
3758   /* Set info objects */
3759 
3760   if (_method > CS_FILE_STDIO_PARALLEL && hints != MPI_INFO_NULL) {
3761     if (mode == CS_FILE_MODE_READ)
3762       MPI_Info_dup(hints, &_mpi_io_hints_r);
3763     else if (mode == CS_FILE_MODE_WRITE || mode == CS_FILE_MODE_APPEND)
3764       MPI_Info_dup(hints, &_mpi_io_hints_w);
3765   }
3766 
3767 #  endif /* MPI_VERSION > 1 */
3768 #endif /* defined(HAVE_MPI_IO) */
3769 }
3770 
3771 #else /* if !defined(HAVE_MPI) */
3772 
3773 /*----------------------------------------------------------------------------*/
3774 /*!
3775  * \brief Set the default options for file access.
3776  *
3777  * If the method given contains incompatible values, such as when setting
3778  * MPI-IO methods when MPI-IO is not available, a "reasonable" default
3779  * is used instead.
3780  *
3781  * \param[in]  mode       file mode for which the default is being set
3782  *                        (write and append use the same method, and are
3783  *                        interchangeable here)
3784  * \param[in]  method     default access method to set
3785  */
3786 /*----------------------------------------------------------------------------*/
3787 
3788 void
cs_file_set_default_access(cs_file_mode_t mode,cs_file_access_t method)3789 cs_file_set_default_access(cs_file_mode_t    mode,
3790                            cs_file_access_t  method)
3791 {
3792   if (mode == CS_FILE_MODE_READ)
3793     _default_access_r = _access_method(method, false);
3794   else if (mode == CS_FILE_MODE_WRITE || mode == CS_FILE_MODE_APPEND)
3795     _default_access_w = _access_method(method, true);
3796 }
3797 
3798 #endif /* defined(HAVE_MPI) */
3799 
3800 #if defined(HAVE_MPI)
3801 
3802 /*----------------------------------------------------------------------------*/
3803 /*!
3804  * \brief Get default MPI communicator values for file access.
3805  *
3806  * A block rank stepping value may be used, allowing the use of a reduced
3807  * communicator for distributed block reads and writes.
3808  * If this value is greater than 1, ranks not a multiple of this step must be
3809  * guaranteed to be empty for block reads and writes with files opened using
3810  * this default.
3811  *
3812  * \param[out]   block_rank_step  MPI rank stepping between non-empty
3813  *                                distributed blocks, or NULL
3814  * \param[out]   block_comm       Handle to MPI communicator used for
3815  *                                distributed file block access, or NULL
3816  * \param[out]   comm             Handle to main MPI communicator, or NULL
3817  */
3818 /*----------------------------------------------------------------------------*/
3819 
3820 void
cs_file_get_default_comm(int * block_rank_step,MPI_Comm * block_comm,MPI_Comm * comm)3821 cs_file_get_default_comm(int       *block_rank_step,
3822                          MPI_Comm  *block_comm,
3823                          MPI_Comm  *comm)
3824 {
3825   /* Initialize defauts if not already done */
3826 
3827   if (_mpi_defaults_are_set == false && cs_glob_mpi_comm != MPI_COMM_NULL) {
3828     cs_file_set_default_comm(0, MPI_COMM_SELF);
3829     _mpi_defaults_are_set = true;
3830   }
3831 
3832   /* Return defaults */
3833 
3834   if (block_rank_step != NULL)
3835     *block_rank_step = _mpi_rank_step;
3836 
3837   if (block_comm != NULL) {
3838     if (_mpi_comm != MPI_COMM_NULL)
3839       *block_comm = _mpi_io_comm;
3840     else
3841       *block_comm = cs_glob_mpi_comm;
3842   }
3843 
3844   if (comm != NULL) {
3845     if (_mpi_comm != MPI_COMM_NULL)
3846       *comm = _mpi_comm;
3847     else
3848       *comm = cs_glob_mpi_comm;
3849   }
3850 }
3851 
3852 /*----------------------------------------------------------------------------*/
3853 /*!
3854  * \brief Set default MPI communicator values for file access.
3855  *
3856  * A block rank stepping value may be used, allowing the use of a reduced
3857  * communicator for distributed block reads and writes.
3858  * If this value is greater than 1, ranks not a multiple of this step must be
3859  * guaranteed to be empty for block reads and writes with files opened using
3860  * this default.
3861  *
3862  * For each argument, an "out of range" value may be used to avoid modifying
3863  * the previous default for that argument.
3864  *
3865  * \param[in]  block_rank_step  MPI rank stepping between non-empty blocks for
3866  *                              file block reads and writes (not set if <= 0)
3867  * \param[in]  comm             Handle to main MPI communicator
3868  *                              (not set if MPI_COMM_SELF)
3869  */
3870 /*----------------------------------------------------------------------------*/
3871 
3872 void
cs_file_set_default_comm(int block_rank_step,MPI_Comm comm)3873 cs_file_set_default_comm(int       block_rank_step,
3874                          MPI_Comm  comm)
3875 {
3876   if (block_rank_step > 0) {
3877     if (block_rank_step > cs_glob_n_ranks)
3878       block_rank_step = cs_glob_n_ranks;
3879     _mpi_rank_step = block_rank_step;
3880   }
3881 
3882   if (comm != MPI_COMM_SELF)
3883     _mpi_comm = comm;
3884   else if (_mpi_defaults_are_set == false)
3885     _mpi_comm = cs_glob_mpi_comm;
3886 
3887   if (   comm != MPI_COMM_SELF
3888       || block_rank_step > 0
3889       || _mpi_defaults_are_set == false) {
3890 
3891     if (_mpi_io_comm != MPI_COMM_NULL) {
3892       MPI_Comm_free(&_mpi_io_comm);
3893       _mpi_io_comm = MPI_COMM_NULL;
3894     }
3895 
3896     if (_mpi_comm != MPI_COMM_NULL) {
3897 
3898       if (_mpi_rank_step < 2) {
3899         _mpi_rank_step = 1;
3900         MPI_Comm_dup(_mpi_comm, &_mpi_io_comm);
3901       }
3902 
3903       else /* Create reduced communicator */
3904         _mpi_io_comm = cs_file_block_comm(_mpi_rank_step, _mpi_comm);
3905 
3906     }
3907 
3908   }
3909 
3910   _mpi_defaults_are_set = true;
3911 }
3912 
3913 /*----------------------------------------------------------------------------*/
3914 /*!
3915  * \brief Create an MPI communicator for distributed block parallel IO.
3916  *
3917  * \param[in]  block_rank_step  MPI rank stepping between non-empty blocks
3918  * \param[in]  comm             Handle to main MPI communicator
3919  *
3920  * \return communicator associated with IO, MPI_COMM_NULL for ranks not
3921  *         participating in parallel IO (including ranks participating in IO
3922  *         where communicator size would be 1)
3923  */
3924 /*----------------------------------------------------------------------------*/
3925 
3926 MPI_Comm
cs_file_block_comm(int block_rank_step,MPI_Comm comm)3927 cs_file_block_comm(int       block_rank_step,
3928                    MPI_Comm  comm)
3929 {
3930   MPI_Comm  new_comm = MPI_COMM_NULL;
3931 
3932   if (comm == MPI_COMM_NULL)
3933     return new_comm;
3934 
3935   int rank_id, n_ranks;
3936   MPI_Comm_rank(comm, &rank_id);
3937   MPI_Comm_size(comm, &n_ranks);
3938   if (n_ranks < 2) {
3939     new_comm = MPI_COMM_NULL;
3940     return new_comm;
3941   }
3942 
3943   if (block_rank_step > n_ranks)
3944     block_rank_step = n_ranks;
3945 
3946   if (block_rank_step < 2)
3947     MPI_Comm_dup(comm, &new_comm);
3948 
3949   /* Create reduced communicator in more general case */
3950 
3951   else {
3952 
3953     int ranges[1][3];
3954     MPI_Group old_group, new_group;
3955 
3956     MPI_Comm_group(comm, &old_group);
3957 
3958     MPI_Barrier(comm); /* For debugging */
3959 
3960     ranges[0][0] = 0;
3961     ranges[0][1] = n_ranks - 1;
3962     ranges[0][2] = block_rank_step;
3963 
3964     MPI_Group_range_incl(old_group, 1, ranges, &new_group);
3965     MPI_Comm_create(comm, new_group, &new_comm);
3966     MPI_Group_free(&new_group);
3967 
3968     MPI_Group_free(&old_group);
3969 
3970     if (rank_id % block_rank_step)
3971       new_comm = MPI_COMM_NULL;
3972 
3973     MPI_Barrier(comm); /* For debugging */
3974 
3975   }
3976 
3977   return new_comm;
3978 }
3979 
3980 #endif /* defined(HAVE_MPI) */
3981 
3982 /*----------------------------------------------------------------------------*/
3983 /*!
3984  * \brief Get the positioning method for MPI-IO
3985  *
3986  * For details, see \ref cs_file_set_mpi_io_positioning.
3987  *
3988  * \return  positioning method for MPI-IO
3989  */
3990 /*----------------------------------------------------------------------------*/
3991 
3992 cs_file_mpi_positioning_t
cs_file_get_mpi_io_positioning(void)3993 cs_file_get_mpi_io_positioning(void)
3994 {
3995   return _mpi_io_positioning;
3996 }
3997 
3998 /*----------------------------------------------------------------------------*/
3999 /*!
4000  * \brief Set the positioning method for MPI-IO
4001  *
4002  * It is not always known whether a performance or robustness difference is
4003  * to be expected using explicit file offsets or individual file pointers.
4004  * Perusal of a sampling of ROMIO code would seem to indicate that no
4005  * difference is to be expected, but this might change with MPI IO variants
4006  * or file systems, so this advanced setting is made possible.
4007  *
4008  * This setting is not available on a per-file basis, though this could be
4009  * done in the future in the unexpected case of performance results
4010  * showing this would be useful.
4011  *
4012  * \param[in]  positioning  chosen positioning method for MPI-IO
4013  */
4014 /*----------------------------------------------------------------------------*/
4015 
4016 void
cs_file_set_mpi_io_positioning(cs_file_mpi_positioning_t positioning)4017 cs_file_set_mpi_io_positioning(cs_file_mpi_positioning_t  positioning)
4018 {
4019   _mpi_io_positioning = positioning;
4020 }
4021 
4022 /*----------------------------------------------------------------------------*/
4023 /*!
4024  * \brief Print information on default options for file access.
4025  */
4026 /*----------------------------------------------------------------------------*/
4027 
4028 void
cs_file_defaults_info(void)4029 cs_file_defaults_info(void)
4030 {
4031 #if defined(HAVE_MPI)
4032 
4033   int             log_id;
4034   cs_log_t logs[] = {CS_LOG_DEFAULT, CS_LOG_PERFORMANCE};
4035 
4036   const char *fmt[4] = {N_("  I/O read method:     %s\n"),
4037                         N_("  I/O write method:    %s\n"),
4038                         N_("  I/O read method:     %s (%s)\n"),
4039                         N_("  I/O write method:    %s (%s)\n")};
4040 
4041   for (log_id = 0; log_id < 2; log_id++)
4042     cs_log_printf(logs[log_id], "\n");
4043 
4044   for (cs_file_mode_t mode = CS_FILE_MODE_READ;
4045        mode < CS_FILE_MODE_APPEND;
4046        mode++) {
4047 
4048     MPI_Info hints;
4049     cs_file_access_t method;
4050 
4051     cs_file_get_default_access(mode, &method, &hints);
4052 
4053 #if defined(HAVE_MPI_IO)
4054     if (method > CS_FILE_STDIO_PARALLEL) {
4055       for (log_id = 0; log_id < 2; log_id++)
4056         cs_log_printf(logs[log_id],
4057                       _(fmt[mode + 2]),
4058                       _(cs_file_access_name[method]),
4059                       _(cs_file_mpi_positioning_name[_mpi_io_positioning]));
4060     }
4061 #endif
4062     if (method <= CS_FILE_STDIO_PARALLEL) {
4063       for (log_id = 0; log_id < 2; log_id++)
4064         cs_log_printf(logs[log_id],
4065                       _(fmt[mode]), _(cs_file_access_name[method]));
4066     }
4067 
4068 #if MPI_VERSION > 1
4069 
4070     if (hints != MPI_INFO_NULL) {
4071       int i, n_keys, flag;
4072       char *val;
4073       char key[MPI_MAX_INFO_KEY + 1];
4074       BFT_MALLOC(val, MPI_MAX_INFO_VAL + 1, char);
4075       MPI_Info_get_nkeys(hints, &n_keys);
4076       if (n_keys > 0)
4077         bft_printf(_("    hints:\n"));
4078       for (i = 0; i < n_keys; i++) {
4079         MPI_Info_get_nthkey(hints, i, key);
4080         MPI_Info_get(hints, key, MPI_MAX_INFO_VAL, val, &flag);
4081         if (flag) {
4082           val[MPI_MAX_INFO_VAL] = '\0';
4083           for (log_id = 0; log_id < 2; log_id++)
4084             cs_log_printf(logs[log_id],
4085                           _("      %s: %s\n"), key, val);
4086         }
4087       }
4088       BFT_FREE(val);
4089     }
4090 
4091 #endif /* MPI_VERSION > 1 */
4092 
4093   }
4094 
4095   if (cs_glob_n_ranks > 1) {
4096     int block_rank_step;
4097     cs_file_get_default_comm(&block_rank_step, NULL, NULL);
4098     for (log_id = 0; log_id < 2; log_id++)
4099       cs_log_printf(logs[log_id],
4100                     _("  I/O rank step:        %d\n"), block_rank_step);
4101   }
4102 
4103   cs_log_printf(CS_LOG_PERFORMANCE, "\n");
4104   cs_log_separator(CS_LOG_PERFORMANCE);
4105 
4106 #endif
4107 }
4108 
4109 #if defined(HAVE_MPI)
4110 
4111 /*----------------------------------------------------------------------------*/
4112 /*!
4113  * \brief Create a cs_file_serializer_t structure.
4114  *
4115  * The buf_block_size argument is optional, and may be used when the buffer
4116  * on rank 0 is larger than (global_num_end - global_num_start)*size*stride
4117  * bytes. If zero, a block size of (global_num_end - global_num_start) on
4118  * rank 0 is assumed; a buffer may not be smaller than this, as it must
4119  * initially contain all data on rank 0's block.
4120  *
4121  * \param[in]  size              size of each item of data in bytes
4122  * \param[in]  stride            number of (interlaced) values per block item
4123  * \param[in]  global_num_start  global number of first block item
4124  *                               (1 to n numbering)
4125  * \param[in]  global_num_end    global number of past-the end block item
4126  *                               (1 to n numbering)
4127  * \param[in]  buf_block_size    Local data buffer block size, or 0 for
4128  *                               default global_num_end - global_num_start
4129  *                               (only useful on rank 0)
4130  * \param[in]  buf               pointer to local block data buffer
4131  * \param[in]  comm              associated MPI communicator
4132  *
4133  * \return pointer to new serializer structure.
4134  */
4135 /*----------------------------------------------------------------------------*/
4136 
4137 cs_file_serializer_t *
cs_file_serializer_create(size_t size,size_t stride,cs_gnum_t global_num_start,cs_gnum_t global_num_end,size_t buf_block_size,void * buf,MPI_Comm comm)4138 cs_file_serializer_create(size_t       size,
4139                           size_t       stride,
4140                           cs_gnum_t    global_num_start,
4141                           cs_gnum_t    global_num_end,
4142                           size_t       buf_block_size,
4143                           void        *buf,
4144                           MPI_Comm     comm)
4145 {
4146   cs_file_serializer_t  *s = NULL;
4147 
4148   BFT_MALLOC(s, 1, cs_file_serializer_t);
4149 
4150   _serializer_init(s,
4151                    size * stride,
4152                    global_num_start,
4153                    global_num_end,
4154                    buf_block_size,
4155                    buf,
4156                    comm);
4157 
4158   return s;
4159 }
4160 
4161 /*----------------------------------------------------------------------------*/
4162 /*!
4163  * \brief Destroy a cs_file_serializer_t structure.
4164  *
4165  * \param[in, out]  s  pointer to pointer structure that should be destroyed
4166  */
4167 /*----------------------------------------------------------------------------*/
4168 
4169 void
cs_file_serializer_destroy(cs_file_serializer_t ** s)4170 cs_file_serializer_destroy(cs_file_serializer_t  **s)
4171 {
4172   if (s != NULL) {
4173     _serializer_finalize(*s);
4174     BFT_FREE(*s);
4175   }
4176 }
4177 
4178 /*----------------------------------------------------------------------------*/
4179 /*!
4180  * \brief Advance a cs_file_serializer_t structure.
4181  *
4182  * Data from the buffer of the next communicating rank is copied
4183  * to rank 0 (this is a no-op the first time this function is called,
4184  * as rank 0 already has its data).
4185  *
4186  * On rank 0, the return value may point to the buffer defined when
4187  * initializing the serializer, or to an aditional buffer if the former is
4188  * too small to receive data from all ranks.
4189  *
4190  * Note also that for ranks > 0, this function always returns NULL,
4191  * as only one call is needed for those ranks.
4192  *
4193  * \param[in]   s          pointer to serializer structure
4194  * \param[out]  cur_range  optional start and past-the end global numbers for
4195  *                         the current block (size: 2), or NULL; only on rank 0
4196  *
4197  * \return a pointer to the buffer containing new data (first call counts as
4198  *          new), or NULL if we are finished; always NULL on ranks > 0.
4199  */
4200 /*----------------------------------------------------------------------------*/
4201 
4202 void *
cs_file_serializer_advance(cs_file_serializer_t * s,cs_gnum_t cur_range[2])4203 cs_file_serializer_advance(cs_file_serializer_t  *s,
4204                            cs_gnum_t              cur_range[2])
4205 {
4206   MPI_Status status;
4207   cs_gnum_t sync_range[2] = {s->next_g_num, 0};
4208 
4209   void *retval = NULL;
4210 
4211   /* Rank 0 receives data */
4212 
4213   if (s->rank_id == 0) {
4214 
4215     int count = 0;
4216 
4217     while (count == 0) {
4218 
4219       int dist_rank = s->next_rank_id;
4220 
4221       count = 0;
4222 
4223       if (s->next_rank_id >= s->n_ranks)
4224         return NULL;
4225 
4226       else if (s->next_rank_id != 0) {
4227 
4228         count = s->count[dist_rank];
4229 
4230         /* Forced synchronization */
4231         sync_range[1] = sync_range[0] + count;
4232         MPI_Send(&sync_range, 2, CS_MPI_GNUM, dist_rank, CS_FILE_MPI_TAG, s->comm);
4233 
4234         /* Receive data */
4235         MPI_Recv(s->recv_buf, (count * s->size), MPI_BYTE, dist_rank,
4236                  CS_FILE_MPI_TAG, s->comm, &status);
4237 
4238         retval = s->recv_buf;
4239       }
4240 
4241       else { /* First call, rank id 0 */
4242         count = s->count[dist_rank];
4243         retval = s->buf;
4244       }
4245 
4246       /* Update status */
4247 
4248       s->next_rank_id += 1;
4249       while (s->next_rank_id < s->n_ranks) {
4250         if (s->count[s->next_rank_id] > 0)
4251           break;
4252         else
4253           s->next_rank_id += 1;
4254       }
4255 
4256       if (cur_range != NULL) {
4257         cur_range[0] = s->next_g_num;
4258         cur_range[1] = cur_range[0] + count;
4259       }
4260 
4261       s->next_g_num += count;
4262 
4263     };
4264 
4265   }
4266 
4267   /* Other ranks send data */
4268 
4269   else {
4270 
4271     int count = s->range[1] - s->range[0];
4272 
4273     if (count > 0) {
4274 
4275       assert(s->rank_id > -1);
4276 
4277       /* Forced synchronization */
4278       MPI_Recv(&sync_range, 2, CS_MPI_GNUM, 0, CS_FILE_MPI_TAG, s->comm, &status);
4279       count = (sync_range[1] - sync_range[0]);
4280 
4281       if (sync_range[0] != s->range[0] || sync_range[1] != s->range[1])
4282         bft_error(__FILE__, __LINE__, 0,
4283                   _("Error serializing data:\n\n"
4284                     "  requested range: [%llu, %llu[\n"
4285                     "  local range:     [%llu, %llu["),
4286                   (unsigned long long)sync_range[0],
4287                   (unsigned long long)sync_range[1],
4288                   (unsigned long long)(s->range[0]),
4289                   (unsigned long long)(s->range[1]));
4290 
4291       /* Send data */
4292       MPI_Send(s->buf, (count * s->size), MPI_BYTE, 0, CS_FILE_MPI_TAG, s->comm);
4293 
4294     }
4295 
4296   }
4297 
4298 #if 0 && defined(DEBUG) && !defined(NDEBUG)
4299   MPI_Barrier(comm);
4300 #endif
4301 
4302   return retval;
4303 }
4304 
4305 #endif /* defined(HAVE_MPI) */
4306 
4307 /*----------------------------------------------------------------------------*/
4308 /*!
4309  * \brief Create a new directory using default permissions.
4310  *
4311  * This function is similar to the POSIX function mkdir(), except that
4312  * it has no "mode" argument: by default, on a POSIX type system,
4313  * permissions include read, write, and execute access for the user,
4314  * group and others, modified by the users umask value (so with a
4315  * typical configuration, the user will have read, write, and execute
4316  * pemission, the group and others will only have read and execute
4317  * permission, but this behavior may be modified).
4318  *
4319  * Also, contrary to the usual mkdir(), if the directory already
4320  * exists (and is truly a directory), this is considered a success
4321  * and not a failure, and 0 is returned: the aim of this function
4322  * is to make a directory available, so if it already exists,
4323  * this is considered acceptable.
4324  *
4325  * \param[in]  path  name of new directory.
4326  *
4327  * \return 0 on success, -1 if an error occured (in which case errno
4328  *         contains the appropriate error code). If the underlying
4329  *         system has no mkdir() function or it was not detected
4330  *         upon BFT configuration, 1 is returned.
4331  */
4332 /*----------------------------------------------------------------------------*/
4333 
4334 int
cs_file_mkdir_default(const char * path)4335 cs_file_mkdir_default(const char  *path)
4336 {
4337   static const char  *str_fail = N_("Failure to create "
4338                                     "directory \"%s\":\n\n%s");
4339 
4340 #if defined(HAVE_MKDIR)
4341 
4342 #if defined(WIN32) || defined(_WIN32)
4343 
4344   mkdir(path);
4345   return 0;
4346 
4347 #else
4348 
4349   if (mkdir(path, S_IRWXU|S_IRWXG|S_IRWXO) != 0) {
4350 
4351     if (errno == EEXIST) {
4352 
4353 #if defined(HAVE_SYS_STAT_H)
4354 
4355       struct stat buf;
4356 
4357       if (stat(path, &buf) != 0)
4358         bft_error(__FILE__, __LINE__, 0, _(str_fail),
4359                   path,
4360                   _("  A similarly named file or directory exists "
4361                     "and its status is\n  not available."));
4362       else if (S_ISDIR(buf.st_mode) != 1)
4363         bft_error(__FILE__, __LINE__, 0, _(str_fail),
4364                   path,
4365                   _("  A similarly named file exists and is "
4366                     "not a directory."));
4367       else
4368         return 0;
4369 
4370 #endif
4371 
4372       errno = EEXIST; /* In case modified by stat() */
4373 
4374     }
4375     else {
4376       bft_error(__FILE__, __LINE__, errno, _(str_fail),
4377                 path,
4378                 _("  A similarly named file exists and is "
4379                   "not a directory."));
4380 
4381     }
4382 
4383     return -1;
4384 
4385   } /* End of directory creation failure case */
4386 
4387 #endif
4388 
4389   return 0;
4390 
4391 #else /* #if defined(HAVE_MKDIR) */
4392 
4393   return 1;
4394 
4395 #endif /* #if defined(HAVE_MKDIR) */
4396 
4397 }
4398 
4399 /*----------------------------------------------------------------------------*/
4400 /*!
4401  * \brief Check if a file exists and is a regular file.
4402  *
4403  * \param[in]  path  file path.
4404  *
4405  * \return 1 if file exists and is a regular file, 0 otherwise.
4406  */
4407 /*----------------------------------------------------------------------------*/
4408 
4409 int
cs_file_isreg(const char * path)4410 cs_file_isreg(const char  *path)
4411 {
4412   int retval = 0;
4413 
4414 #if defined(HAVE_SYS_STAT_H)
4415 
4416   struct stat s;
4417 
4418   if (stat(path, &s) != 0) {
4419     if (errno != ENOENT)
4420       bft_error(__FILE__, __LINE__, errno,
4421                 _("Error querying information for file:\n%s."),
4422                 path);
4423   }
4424   else {
4425     if (S_ISREG(s.st_mode) != 0)
4426       retval = 1;
4427   }
4428 
4429 #else /* defined(HAVE_SYS_STAT_H) */
4430 
4431   /* If Posix-type API is not available, revert to basic method */
4432 
4433   FILE *f;
4434 
4435   if ((f = fopen(fic_path, "r")) != NULL) {
4436     retval = 1;
4437     fclose(f);
4438   }
4439 
4440 #endif /* defined(HAVE_SYS_STAT_H) */
4441 
4442   return retval;
4443 }
4444 
4445 /*----------------------------------------------------------------------------*/
4446 /*!
4447  * \brief Check if a directory exists.
4448  *
4449  * \param[in]  path  directory path.
4450  *
4451  * \return 1 if directory exists, 0 otherwise.
4452  */
4453 /*----------------------------------------------------------------------------*/
4454 
4455 int
cs_file_isdir(const char * path)4456 cs_file_isdir(const char  *path)
4457 {
4458   int retval = 0;
4459 
4460 #if defined(HAVE_SYS_STAT_H)
4461 
4462   struct stat s;
4463 
4464   if (stat(path, &s) != 0) {
4465     if (errno != ENOENT)
4466       bft_error(__FILE__, __LINE__, errno,
4467                 _("Error querying information for directory:\n%s."),
4468                 path);
4469   }
4470   else {
4471     if (S_ISDIR(s.st_mode) != 0)
4472       retval = 1;
4473   }
4474 
4475 #else /* defined(HAVE_SYS_STAT_H) */
4476 
4477   /* If Posix-type API is not available,
4478      consider that directories are not available either */
4479 
4480   retval = 0;
4481 
4482 #endif /* defined(HAVE_SYS_STAT_H) */
4483 
4484   return retval;
4485 }
4486 
4487 /*----------------------------------------------------------------------------*/
4488 /*!
4489  * \brief List files inside a directory.
4490  *
4491  * The array returned must be freed by the caller using BFT_FREE,
4492  * as well as the individual entries in the array.
4493  *
4494  * \param[in]  path name of directory.
4495  *
4496  * \return an array of file names in a directory. The last entry is
4497  *         set to NULL. If no means to list the directory or an error
4498  *         occured, the return value is simply NULL.
4499  */
4500 /*----------------------------------------------------------------------------*/
4501 
4502 char **
cs_file_listdir(const char * path)4503 cs_file_listdir(const char  *path)
4504 {
4505   char **dirnames = NULL;
4506 
4507 #if defined(HAVE_SYS_TYPES_H) && defined(HAVE_DIRENT_H)
4508 
4509   struct dirent *ent;
4510   int n_ent = 0;
4511   DIR *d = opendir(path);
4512 
4513   if (d == NULL) {
4514     bft_error(__FILE__, __LINE__, 0,
4515               _("Error opening directory \"%s\":\n\n"
4516                 "  %s"), path, strerror(errno));
4517     return NULL;
4518   }
4519 
4520   /* Counting pass */
4521 
4522   while(readdir(d) != NULL)
4523     n_ent += 1;
4524 
4525   rewinddir(d);
4526 
4527   BFT_MALLOC(dirnames, n_ent + 1, char *);
4528 
4529   n_ent = 0;
4530   while((ent = readdir(d)) != NULL) {
4531     BFT_MALLOC(dirnames[n_ent], strlen(ent->d_name) + 1, char);
4532     strcpy(dirnames[n_ent], ent->d_name);
4533     n_ent += 1;
4534   }
4535   dirnames[n_ent] = NULL;
4536 
4537   closedir(d);
4538 
4539   qsort(dirnames, n_ent, sizeof(char *), &_cs_file_compare_names);
4540 
4541 #endif /* defined(HAVE_SYS_TYPES_H) && defined(HAVE_DIRENT_H) */
4542 
4543   return dirnames;
4544 }
4545 
4546 /*----------------------------------------------------------------------------*/
4547 /*!
4548  * \brief Return the size of a file.
4549  *
4550  * If the file does not exist, 0 is returned.
4551  *
4552  * Note also that for some special files, such as files in the Linux /proc
4553  * directory, this may return 0.
4554  *
4555  * \param[in]  path  file path.
4556  *
4557  * \return size of file.
4558  */
4559 /*----------------------------------------------------------------------------*/
4560 
4561 cs_file_off_t
cs_file_size(const char * path)4562 cs_file_size(const char  *path)
4563 {
4564   cs_file_off_t retval = 0;
4565 
4566 #if defined(HAVE_SYS_STAT_H)
4567 
4568   struct stat s;
4569 
4570   if (stat(path, &s) != 0) {
4571     if (errno != ENOENT)
4572       bft_error(__FILE__, __LINE__, errno,
4573                 _("Error querying information for file:\n%s."),
4574                 path);
4575   }
4576   else
4577     retval = s.st_size;
4578 
4579 #else /* defined(HAVE_SYS_STAT_H) */
4580 
4581   /* If Posix-type API is not available, revert to basic method */
4582 
4583   FILE *f;
4584 
4585   if ((f = fopen(fic_path, "r")) != NULL) {
4586 
4587 # if defined(HAVE_FSEEKO) && (_FILE_OFFSET_BITS == 64)
4588     if (fseeko(f, 0, SEEK_END) == 0)
4589       retval = ftello(f);
4590 # else
4591     if (fseek(f, 0, SEEK_END) == 0)
4592       retval = ftell(f);
4593 # endif
4594 
4595     fclose(f);
4596   }
4597 
4598 #endif /* defined(HAVE_SYS_STAT_H) */
4599 
4600   return retval;
4601 }
4602 
4603 /*----------------------------------------------------------------------------*/
4604 /*!
4605  * \brief Remove a file if it exists and is a regular file or an empty
4606  *        directory.
4607  *
4608  * \param[in]  path  file path.
4609  *
4610  * \return 0 in case of success or if file does not exist,  not 0 otherwise.
4611  */
4612 /*----------------------------------------------------------------------------*/
4613 
4614 int
cs_file_remove(const char * path)4615 cs_file_remove(const char  *path)
4616 {
4617   int retval = 0;
4618 
4619 #if defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_STAT_H) \
4620                               && defined(HAVE_UNISTD_H)
4621 
4622   struct stat s;
4623 
4624   if (stat(path, &s) == 0) {
4625     if (S_ISREG(s.st_mode) != 0) {
4626       retval = unlink(path);
4627       if (retval != 0) {
4628         /* Some error types are accepted */
4629         if (errno == ENOENT)
4630           retval = 0;
4631       }
4632     }
4633     else if (S_ISDIR(s.st_mode) != 0) {
4634       retval = rmdir(path);
4635       if (retval != 0) {
4636         /* Some error types are accepted */
4637         if (   errno == ENOTDIR || errno == EEXIST
4638             || errno == ENOTEMPTY || errno == EBUSY)
4639           retval = 0;
4640       }
4641     }
4642   }
4643 
4644 #else
4645 
4646   /* If Posix-type API is not available, revert to basic method */
4647 
4648   FILE *f;
4649 
4650   if ((f = fopen(path, "w")) != NULL) {
4651     fclose(f);
4652     retval = remove(f);
4653     /* Some error types are accepted */
4654     if (errno == ENOENT)
4655       retval = 0;
4656   }
4657 
4658 #endif
4659 
4660   if (retval != 0)
4661     bft_error(__FILE__, __LINE__, 0,
4662               _("Error removing file \"%s\":\n\n"
4663                 "  %s"), path, strerror(errno));
4664 
4665   return retval;
4666 }
4667 
4668 /*----------------------------------------------------------------------------*/
4669 /*!
4670  * \brief Check if file name/path ends with a specific string
4671  *
4672  * The function returns an int: 1 if the file name ends with the
4673  * given string, 0 otherwise.
4674  *
4675  * \param[in]  path  name of file
4676  * \param[in]  end   end string to test
4677  *
4678  * \return  1 if the path ends with the given string, 0 otherwise.
4679  */
4680 /*----------------------------------------------------------------------------*/
4681 
4682 int
cs_file_endswith(const char * path,const char * end)4683 cs_file_endswith(const char  *path,
4684                  const char  *end)
4685 {
4686   int retval = 0;
4687 
4688   /* If either pointers is NULL, return 0 */
4689   if (path == NULL || end == NULL)
4690     retval = 0;
4691 
4692   else {
4693 
4694     const int lpath = strlen(path);
4695     const int lext  = strlen(end);
4696 
4697     /* If either string is empty, or if the path is shorter than the end
4698      * string, return 0 */
4699     if (lpath == 0 || lext == 0)
4700       retval = 0;
4701 
4702     else if (lext > lpath)
4703       retval = 0;
4704 
4705     else
4706       retval = (strcmp(path + (lpath-lext), end) == 0);
4707   }
4708 
4709   return retval;
4710 }
4711 
4712 /*----------------------------------------------------------------------------*/
4713 END_C_DECLS
4714