1 /* -*- c++ -*- ----------------------------------------------------------
2    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
3    https://www.lammps.org/, Sandia National Laboratories
4    Steve Plimpton, sjplimp@sandia.gov
5 
6    Copyright (2003) Sandia Corporation.  Under the terms of Contract
7    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
8    certain rights in this software.  This software is distributed under
9    the GNU General Public License.
10 
11    See the README file in the top-level LAMMPS directory.
12 ------------------------------------------------------------------------- */
13 
14 #ifndef LMP_UTILS_H
15 #define LMP_UTILS_H
16 
17 /*! \file utils.h */
18 
19 #include "fmt/format.h"
20 #include "lmptype.h"
21 
22 #include <mpi.h>
23 
24 #include <vector> // IWYU pragma: export
25 
26 namespace LAMMPS_NS {
27 
28 // forward declarations
29 class Error;
30 class LAMMPS;
31 
32 namespace utils {
33 
34   /*! Match text against a simplified regex pattern
35    *
36    *  \param text the text to be matched against the pattern
37    *  \param pattern the search pattern, which may contain regexp markers
38    *  \return true if the pattern matches, false if not */
39 
40   bool strmatch(const std::string &text, const std::string &pattern);
41 
42   /*! Find sub-string that matches a simplified regex pattern
43    *
44    *  \param text the text to be matched against the pattern
45    *  \param pattern the search pattern, which may contain regexp markers
46    *  \return the string that matches the pattern or an empty one */
47 
48   std::string strfind(const std::string &text, const std::string &pattern);
49 
50   /* Internal function handling the argument list for logmesg(). */
51 
52   void fmtargs_logmesg(LAMMPS *lmp, fmt::string_view format, fmt::format_args args);
53 
54   /*! Send formatted message to screen and logfile, if available
55    *
56    * This function simplifies the repetitive task of outputting some
57    * message to both the screen and/or the log file. The template
58    * wrapper with fmtlib format and argument processing allows
59    * this function to work similar to ``fmt::print()``.
60    *
61    *  \param lmp    pointer to LAMMPS class instance
62    *  \param format format string of message to be printed
63    *  \param args   arguments to format string */
64 
logmesg(LAMMPS * lmp,const S & format,Args &&...args)65   template <typename S, typename... Args> void logmesg(LAMMPS *lmp, const S &format, Args &&...args)
66   {
67     fmtargs_logmesg(lmp, format, fmt::make_args_checked<Args...>(format, args...));
68   }
69 
70   /*! \overload
71    *
72    *  \param lmp    pointer to LAMMPS class instance
73    *  \param mesg   string with message to be printed */
74 
75   void logmesg(LAMMPS *lmp, const std::string &mesg);
76 
77   /*! Return a string representing the current system error status
78    *
79    *  This is a wrapper around calling strerror(errno).
80    *
81    *  \return  error string */
82 
83   std::string getsyserror();
84 
85   /*! Wrapper around fgets() which reads whole lines but truncates the
86    *  data to the buffer size and ensures a newline char at the end.
87    *
88    *  This function is useful for reading line based text files with
89    *  possible comments that should be parsed later. This applies to
90    *  data files, potential files, atomfile variable files and so on.
91    *  It is used instead of fgets() by utils::read_lines_from_file().
92    *
93    *  \param s        buffer for storing the result of fgets()
94    *  \param size     size of buffer s (max number of bytes returned)
95    *  \param fp       file pointer used by fgets() */
96 
97   char *fgets_trunc(char *s, int size, FILE *fp);
98 
99   /*! Safe wrapper around fgets() which aborts on errors
100    *  or EOF and prints a suitable error message to help debugging.
101    *
102    *  Use nullptr as the error parameter to avoid the abort on EOF or error.
103    *
104    *  \param srcname  name of the calling source file (from FLERR macro)
105    *  \param srcline  line in the calling source file (from FLERR macro)
106    *  \param s        buffer for storing the result of fgets()
107    *  \param size     size of buffer s (max number of bytes read by fgets())
108    *  \param fp       file pointer used by fgets()
109    *  \param filename file name associated with fp (may be a null pointer; then LAMMPS will try to detect)
110    *  \param error    pointer to Error class instance (for abort) or nullptr */
111 
112   void sfgets(const char *srcname, int srcline, char *s, int size, FILE *fp, const char *filename,
113               Error *error);
114 
115   /*! Safe wrapper around fread() which aborts on errors
116    *  or EOF and prints a suitable error message to help debugging.
117    *
118    *  Use nullptr as the error parameter to avoid the abort on EOF or error.
119    *
120    *  \param srcname  name of the calling source file (from FLERR macro)
121    *  \param srcline  line in the calling source file (from FLERR macro)
122    *  \param s        buffer for storing the result of fread()
123    *  \param size     size of data elements read by fread()
124    *  \param num      number of data elements read by fread()
125    *  \param fp       file pointer used by fread()
126    *  \param filename file name associated with fp (may be a null pointer; then LAMMPS will try to detect)
127    *  \param error    pointer to Error class instance (for abort) or nullptr */
128 
129   void sfread(const char *srcname, int srcline, void *s, size_t size, size_t num, FILE *fp,
130               const char *filename, Error *error);
131 
132   /*! Read N lines of text from file into buffer and broadcast them
133    *
134    * This function uses repeated calls to fread() to fill a buffer with
135    * newline terminated text.  If a line does not end in a newline (e.g.
136    * at the end of a file), it is added.  The caller has to allocate an
137    * nlines by nmax sized buffer for storing the text data.
138    * Reading is done by MPI rank 0 of the given communicator only, and
139    * thus only MPI rank 0 needs to provide a valid file pointer.
140    *
141    *  \param fp       file pointer used by fread
142    *  \param nlines   number of lines to be read
143    *  \param nmax     maximum length of a single line
144    *  \param buffer   buffer for storing the data.
145    *  \param me       MPI rank of calling process in MPI communicator
146    *  \param comm     MPI communicator for broadcast
147    *  \return         1 if the read was short, 0 if read was successful */
148 
149   int read_lines_from_file(FILE *fp, int nlines, int nmax, char *buffer, int me, MPI_Comm comm);
150 
151   /*! Report if a requested style is in a package or may have a typo
152    *
153    *  \param style type of style that is to be checked for
154    *  \param name  name of style that was not found
155    *  \param lmp   pointer to top-level LAMMPS class instance
156    *  \return string usable for error messages */
157 
158   std::string check_packages_for_style(const std::string &style, const std::string &name,
159                                        LAMMPS *lmp);
160 
161   /*! Convert a string to a floating point number while checking
162    *  if it is a valid floating point or integer number
163    *
164    *  \param file     name of source file for error message
165    *  \param line     line number in source file for error message
166    *  \param str      string to be converted to number
167    *  \param do_abort determines whether to call Error::one() or Error::all()
168    *  \param lmp      pointer to top-level LAMMPS class instance
169    *  \return         double precision floating point number */
170 
171   double numeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp);
172 
173   /*! Convert a string to an integer number while checking
174    *  if it is a valid integer number (regular int)
175    *
176    *  \param file     name of source file for error message
177    *  \param line     line number in source file for error message
178    *  \param str      string to be converted to number
179    *  \param do_abort determines whether to call Error::one() or Error::all()
180    *  \param lmp      pointer to top-level LAMMPS class instance
181    *  \return         integer number (regular int)  */
182 
183   int inumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp);
184 
185   /*! Convert a string to an integer number while checking
186    *  if it is a valid integer number (bigint)
187    *
188    *  \param file     name of source file for error message
189    *  \param line     line number in source file for error message
190    *  \param str      string to be converted to number
191    *  \param do_abort determines whether to call Error::one() or Error::all()
192    *  \param lmp      pointer to top-level LAMMPS class instance
193    *  \return         integer number (bigint) */
194 
195   bigint bnumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp);
196 
197   /*! Convert a string to an integer number while checking
198    *  if it is a valid integer number (tagint)
199    *
200    * \param file     name of source file for error message
201    * \param line     line number in source file for error message
202    * \param str      string to be converted to number
203    * \param do_abort determines whether to call Error::one() or Error::all()
204    * \param lmp      pointer to top-level LAMMPS class instance
205    * \return         integer number (tagint) */
206 
207   tagint tnumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp);
208 
209   /*! Compute index bounds derived from a string with a possible wildcard
210    *
211    * This functions processes the string in *str* and set the values of *nlo*
212    * and *nhi* according to the following five cases:
213    *
214    * - a single number, i: nlo = i; nhi = i;
215    * - a single asterisk, \*: nlo = nmin; nhi = nmax;
216    * - a single number followed by an asterisk, i\*: nlo = i; nhi = nmax;
217    * - a single asterisk followed by a number, \*i: nlo = nmin; nhi = i;
218    * - two numbers with an asterisk in between. i\*j: nlo = i; nhi = j;
219    *
220    * \param file     name of source file for error message
221    * \param line     line number in source file for error message
222    * \param str      string to be processed
223    * \param nmin     smallest possible lower bound
224    * \param nmax     largest allowed upper bound
225    * \param nlo      lower bound
226    * \param nhi      upper bound
227    * \param error    pointer to Error class for out-of-bounds messages */
228 
229   template <typename TYPE>
230   void bounds(const char *file, int line, const std::string &str, bigint nmin, bigint nmax,
231               TYPE &nlo, TYPE &nhi, Error *error);
232 
233   /*! Expand list of arguments when containing fix/compute wildcards
234    *
235    *  This function searches the list of arguments in *arg* for strings
236    *  of the kind c_ID[*] or f_ID[*] referring to computes or fixes.
237    *  Any such strings are replaced by one or more strings with the
238    *  '*' character replaced by the corresponding possible numbers as
239    *  determined from the fix or compute instance.  Other strings are
240    *  just copied. If the *mode* parameter is set to 0, expand global
241    *  vectors, but not global arrays; if it is set to 1, expand global
242    *  arrays (by column) but not global vectors.
243    *
244    *  If any expansion happens, the earg list and all its
245    *  strings are new allocations and must be freed explicitly by the
246    *  caller. Otherwise arg and earg will point to the same address
247    *  and no explicit de-allocation is needed by the caller.
248    *
249    * \param file  name of source file for error message
250    * \param line  line number in source file for error message
251    * \param narg  number of arguments in current list
252    * \param arg   argument list, possibly containing wildcards
253    * \param mode  select between global vectors(=0) and arrays (=1)
254    * \param earg  new argument list with wildcards expanded
255    * \param lmp   pointer to top-level LAMMPS class instance
256    * \return      number of arguments in expanded list */
257 
258   int expand_args(const char *file, int line, int narg, char **arg, int mode, char **&earg,
259                   LAMMPS *lmp);
260 
261   /*! Make C-style copy of string in new storage
262    *
263    * This allocates a storage buffer and copies the C-style or
264    * C++ style string into it.  The buffer is allocated with "new"
265    * and thus needs to be deallocated with "delete[]".
266    *
267    * \param text  string that should be copied
268    * \return new buffer with copy of string */
269 
270   char *strdup(const std::string &text);
271 
272   /*! Trim leading and trailing whitespace. Like TRIM() in Fortran.
273    *
274    * \param line  string that should be trimmed
275    * \return new string without whitespace (string) */
276 
277   std::string trim(const std::string &line);
278 
279   /*! Return string with anything from '#' onward removed
280    *
281    * \param line  string that should be trimmed
282    * \return new string without comment (string) */
283 
284   std::string trim_comment(const std::string &line);
285 
286   /*! Check if a string will likely have UTF-8 encoded characters
287    *
288    * UTF-8 uses the 7-bit standard ASCII table for the first 127 characters and
289    * all other characters are encoded as multiple bytes.  For the multi-byte
290    * characters the first byte has either the highest two, three, or four bits
291    * set followed by a zero bit and followed by one, two, or three more bytes,
292    * respectively, where the highest bit is set and the second highest bit set
293    * to 0.  The remaining bits combined are the character code, which is thus
294    * limited to 21-bits.
295    *
296    * For the sake of efficiency this test only checks if a character in the string
297    * has the highest bit set and thus is very likely an UTF-8 character.  It will
298    * not be able to tell this this is a valid UTF-8 character or whether it is a
299    * 2-byte, 3-byte, or 4-byte character.
300    *
301 \verbatim embed:rst
302 
303 *See also*
304    :cpp:func:`utils::utf8_subst`
305 
306 \endverbatim
307    * \param line  string that should be checked
308    * \return true if string contains UTF-8 encoded characters (bool) */
309 
has_utf8(const std::string & line)310   inline bool has_utf8(const std::string &line)
311   {
312     for (auto c : line)
313       if (c & 0x80U) return true;
314     return false;
315   }
316 
317   /*! Replace known UTF-8 characters with ASCII equivalents
318    *
319 \verbatim embed:rst
320 
321 *See also*
322    :cpp:func:`utils::has_utf8`
323 
324 \endverbatim
325    * \param line  string that should be converted
326    * \return new string with ascii replacements (string) */
327 
328   std::string utf8_subst(const std::string &line);
329 
330   /*! Count words in string with custom choice of separating characters
331    *
332    * \param text string that should be searched
333    * \param separators string containing characters that will be treated as whitespace
334    * \return number of words found */
335 
336   size_t count_words(const std::string &text, const std::string &separators);
337 
338   /*! Count words in string, ignore any whitespace matching " \t\r\n\f"
339    *
340    * \param text string that should be searched
341    * \return number of words found */
342 
343   size_t count_words(const std::string &text);
344 
345   /*! Count words in C-string, ignore any whitespace matching " \t\r\n\f"
346    *
347    * \param text string that should be searched
348    * \return number of words found */
349 
350   size_t count_words(const char *text);
351 
352   /*! Count words in a single line, trim anything from '#' onward
353    *
354    * \param text string that should be trimmed and searched
355    * \param separators string containing characters that will be treated as whitespace
356    * \return number of words found */
357 
358   size_t trim_and_count_words(const std::string &text, const std::string &separators = " \t\r\n\f");
359 
360   /*! Take text and split into non-whitespace words.
361    *
362    * This can handle strings with single and double quotes, escaped quotes,
363    * and escaped codes within quotes, but due to using an STL container and
364    * STL strings is rather slow because of making copies. Designed for
365    * parsing command lines and similar text and not for time critical
366    * processing.  Use a tokenizer class if performance matters.
367    *
368 \verbatim embed:rst
369 
370 *See also*
371    :cpp:class:`Tokenizer`, :cpp:class:`ValueTokenizer`
372 
373 \endverbatim
374    * \param text string that should be split
375    * \return STL vector with the words */
376 
377   std::vector<std::string> split_words(const std::string &text);
378 
379   /*! Take multi-line text and split into lines
380    *
381    * \param text string that should be split
382    * \return STL vector with the lines */
383   std::vector<std::string> split_lines(const std::string &text);
384 
385   /*! Check if string can be converted to valid integer
386    *
387    * \param str string that should be checked
388    * \return true, if string contains valid a integer, false otherwise */
389 
390   bool is_integer(const std::string &str);
391 
392   /*! Check if string can be converted to valid floating-point number
393    *
394    * \param str string that should be checked
395    * \return true, if string contains valid number, false otherwise */
396 
397   bool is_double(const std::string &str);
398 
399   /*! Check if string is a valid ID
400    * ID strings may contain only letters, numbers, and underscores.
401    *
402    * \param str string that should be checked
403    * \return true, if string contains valid id, false otherwise */
404 
405   bool is_id(const std::string &str);
406 
407   /*! Try to detect pathname from FILE pointer.
408    *
409    * Currently supported on Linux, MacOS, and Windows, otherwise will report "(unknown)".
410    *
411    *  \param buf  storage buffer for pathname. output will be truncated if not large enough
412    *  \param len  size of storage buffer. output will be truncated to this length - 1
413    *  \param fp   FILE pointer struct from STDIO library for which we want to detect the name
414    *  \return pointer to the storage buffer, i.e. buf */
415 
416   const char *guesspath(char *buf, int len, FILE *fp);
417 
418   /*! Strip off leading part of path, return just the filename
419    *
420    * \param path file path
421    * \return file name */
422 
423   std::string path_basename(const std::string &path);
424 
425   /*! Return the directory part of a path. Return "." if empty
426    *
427    * \param path file path
428    * \return directory name */
429 
430   std::string path_dirname(const std::string &path);
431 
432   /*! Join two pathname segments
433    *
434    * This uses the forward slash '/' character unless LAMMPS is compiled
435    * for Windows where it used the equivalent backward slash '\\'.
436    *
437    * \param   a  first path
438    * \param   b  second path
439    * \return     combined path */
440 
441   std::string path_join(const std::string &a, const std::string &b);
442 
443   /*! Check if file exists and is readable
444    *
445    * \param path file path
446    * \return true if file exists and is readable */
447 
448   bool file_is_readable(const std::string &path);
449 
450   /*! Determine full path of potential file. If file is not found in current directory,
451    *  search directories listed in LAMMPS_POTENTIALS environment variable
452    *
453    * \param path file path
454    * \return full path to potential file */
455 
456   std::string get_potential_file_path(const std::string &path);
457 
458   /*! Read potential file and return DATE field if it is present
459    *
460    * \param path file path
461    * \param potential_name name of potential that is being read
462    * \return DATE field if present */
463 
464   std::string get_potential_date(const std::string &path, const std::string &potential_name);
465 
466   /*! Read potential file and return UNITS field if it is present
467    *
468    * \param path file path
469    * \param potential_name name of potential that is being read
470    * \return UNITS field if present */
471 
472   std::string get_potential_units(const std::string &path, const std::string &potential_name);
473 
474   enum { NOCONVERT = 0, METAL2REAL = 1, REAL2METAL = 1 << 1 };
475   enum { UNKNOWN = 0, ENERGY };
476 
477   /*! Return bitmask of available conversion factors for a given property
478    *
479    * \param property property to be converted
480    * \return bitmask indicating available conversions */
481 
482   int get_supported_conversions(const int property);
483 
484   /*! Return unit conversion factor for given property and selected from/to units
485    *
486    * \param property property to be converted
487    * \param conversion constant indicating the conversion
488    * \return conversion factor */
489 
490   double get_conversion_factor(const int property, const int conversion);
491 
492   /*! Open a potential file as specified by *name*
493    *
494    * If opening the file directly fails, the function will search for
495    * it in the list of folder pointed to by the environment variable
496    * ``LAMMPS_POTENTIALS`` (if it is set).
497    *
498    * If the potential file has a ``UNITS`` tag in the first line, the
499    * tag's value is compared to the current unit style setting.
500    * The behavior of the function then depends on the value of the
501    * *auto_convert* parameter.  If it is a null pointer, then the unit
502    * values must match or else the open will fail with an error.  Otherwise
503    * the bitmask that *auto_convert* points to is used check for
504    * compatibility with possible automatic conversions by the calling
505    * function.  If compatible, the bitmask is set to the required
506    * conversion or ``utils::NOCONVERT``.
507    *
508    * \param name          file- or pathname of the potential file
509    * \param lmp           pointer to top-level LAMMPS class instance
510    * \param auto_convert  pointer to unit conversion bitmask or ``nullptr``
511    * \return              FILE pointer of the opened potential file or ``nullptr`` */
512 
513   FILE *open_potential(const std::string &name, LAMMPS *lmp, int *auto_convert);
514 
515   /*! Convert a time string to seconds
516    *
517    * The strings "off" and "unlimited" result in -1
518    *
519    * \param timespec a string in the following format: ([[HH:]MM:]SS)
520    * \return total in seconds */
521 
522   double timespec2seconds(const std::string &timespec);
523 
524   /*! Convert a LAMMPS version date to a number
525    *
526    * This will generate a number YYYYMMDD from a date string
527    * (with or without blanks) that is suitable for numerical
528    * comparisons, i.e. later dates will generate a larger number.
529    *
530    * The day may or may not have a leading zero, the month
531    * is identified by the first 3 letters (so there may be more)
532    * and the year may be 2 or 4 digits (the missing 2 digits will
533    * be assumed as 20. That is 04 corresponds to 2004).
534    *
535    * No check is made whether the date is valid.
536    *
537    * \param  date  string in the format (Day Month Year)
538    * \return       date code */
539 
540   int date2num(const std::string &date);
541 
542   /*! Return current date as string
543    *
544    * This will generate a string containing the current date in YYYY-MM-DD format.
545    *
546    * \return       string with current date */
547 
548   std::string current_date();
549 
550   /*! Binary search in a vector of ascending doubles of length N
551    *
552    * If the value is smaller than the smallest value in the vector, 0 is returned.
553    * If the value is larger or equal than the largest value in the vector, N-1 is returned.
554    * Otherwise the index that satisfies the condition
555    *
556    * haystack[index] <= value < haystack[index+1]
557    *
558    * is returned, i.e. a value from 1 to N-2. Note that if there are tied values in the
559    * haystack, always the larger index is returned as only that satisfied the condition.
560    *
561    * \param  needle    search value for which are are looking for the closest index
562    * \param  n         size of the haystack array
563    * \param  haystack  array with data in ascending order.
564    * \return           index of value in the haystack array smaller or equal to needle */
565   int binary_search(const double needle, const int n, const double *haystack);
566 
567   /*! Custom merge sort implementation
568    *
569    * This function provides a custom upward hybrid merge sort
570    * implementation with support to pass an opaque pointer to
571    * the comparison function, e.g. for access to class members.
572    * This avoids having to use global variables.  For improved
573    * performance, it uses an in-place insertion sort on initial
574    * chunks of up to 64 elements and switches to merge sort from
575    * then on.
576    *
577    * \param  index  Array with indices to be sorted
578    * \param  num    Length of the index array
579    * \param  ptr    Pointer to opaque object passed to comparison function
580    * \param  comp   Pointer to comparison function */
581 
582   void merge_sort(int *index, int num, void *ptr, int (*comp)(int, int, void *));
583 }    // namespace utils
584 }    // namespace LAMMPS_NS
585 
586 #endif
587 
588 /* ERROR/WARNING messages:
589 
590 */
591