1 /* -*- c++ -*- ---------------------------------------------------------- 2 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator 3 https://www.lammps.org/, Sandia National Laboratories 4 Steve Plimpton, sjplimp@sandia.gov 5 6 Copyright (2003) Sandia Corporation. Under the terms of Contract 7 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains 8 certain rights in this software. This software is distributed under 9 the GNU General Public License. 10 11 See the README file in the top-level LAMMPS directory. 12 ------------------------------------------------------------------------- */ 13 14 #ifndef LMP_UTILS_H 15 #define LMP_UTILS_H 16 17 /*! \file utils.h */ 18 19 #include "fmt/format.h" 20 #include "lmptype.h" 21 22 #include <mpi.h> 23 24 #include <vector> // IWYU pragma: export 25 26 namespace LAMMPS_NS { 27 28 // forward declarations 29 class Error; 30 class LAMMPS; 31 32 namespace utils { 33 34 /*! Match text against a simplified regex pattern 35 * 36 * \param text the text to be matched against the pattern 37 * \param pattern the search pattern, which may contain regexp markers 38 * \return true if the pattern matches, false if not */ 39 40 bool strmatch(const std::string &text, const std::string &pattern); 41 42 /*! Find sub-string that matches a simplified regex pattern 43 * 44 * \param text the text to be matched against the pattern 45 * \param pattern the search pattern, which may contain regexp markers 46 * \return the string that matches the pattern or an empty one */ 47 48 std::string strfind(const std::string &text, const std::string &pattern); 49 50 /* Internal function handling the argument list for logmesg(). */ 51 52 void fmtargs_logmesg(LAMMPS *lmp, fmt::string_view format, fmt::format_args args); 53 54 /*! Send formatted message to screen and logfile, if available 55 * 56 * This function simplifies the repetitive task of outputting some 57 * message to both the screen and/or the log file. The template 58 * wrapper with fmtlib format and argument processing allows 59 * this function to work similar to ``fmt::print()``. 60 * 61 * \param lmp pointer to LAMMPS class instance 62 * \param format format string of message to be printed 63 * \param args arguments to format string */ 64 logmesg(LAMMPS * lmp,const S & format,Args &&...args)65 template <typename S, typename... Args> void logmesg(LAMMPS *lmp, const S &format, Args &&...args) 66 { 67 fmtargs_logmesg(lmp, format, fmt::make_args_checked<Args...>(format, args...)); 68 } 69 70 /*! \overload 71 * 72 * \param lmp pointer to LAMMPS class instance 73 * \param mesg string with message to be printed */ 74 75 void logmesg(LAMMPS *lmp, const std::string &mesg); 76 77 /*! Return a string representing the current system error status 78 * 79 * This is a wrapper around calling strerror(errno). 80 * 81 * \return error string */ 82 83 std::string getsyserror(); 84 85 /*! Wrapper around fgets() which reads whole lines but truncates the 86 * data to the buffer size and ensures a newline char at the end. 87 * 88 * This function is useful for reading line based text files with 89 * possible comments that should be parsed later. This applies to 90 * data files, potential files, atomfile variable files and so on. 91 * It is used instead of fgets() by utils::read_lines_from_file(). 92 * 93 * \param s buffer for storing the result of fgets() 94 * \param size size of buffer s (max number of bytes returned) 95 * \param fp file pointer used by fgets() */ 96 97 char *fgets_trunc(char *s, int size, FILE *fp); 98 99 /*! Safe wrapper around fgets() which aborts on errors 100 * or EOF and prints a suitable error message to help debugging. 101 * 102 * Use nullptr as the error parameter to avoid the abort on EOF or error. 103 * 104 * \param srcname name of the calling source file (from FLERR macro) 105 * \param srcline line in the calling source file (from FLERR macro) 106 * \param s buffer for storing the result of fgets() 107 * \param size size of buffer s (max number of bytes read by fgets()) 108 * \param fp file pointer used by fgets() 109 * \param filename file name associated with fp (may be a null pointer; then LAMMPS will try to detect) 110 * \param error pointer to Error class instance (for abort) or nullptr */ 111 112 void sfgets(const char *srcname, int srcline, char *s, int size, FILE *fp, const char *filename, 113 Error *error); 114 115 /*! Safe wrapper around fread() which aborts on errors 116 * or EOF and prints a suitable error message to help debugging. 117 * 118 * Use nullptr as the error parameter to avoid the abort on EOF or error. 119 * 120 * \param srcname name of the calling source file (from FLERR macro) 121 * \param srcline line in the calling source file (from FLERR macro) 122 * \param s buffer for storing the result of fread() 123 * \param size size of data elements read by fread() 124 * \param num number of data elements read by fread() 125 * \param fp file pointer used by fread() 126 * \param filename file name associated with fp (may be a null pointer; then LAMMPS will try to detect) 127 * \param error pointer to Error class instance (for abort) or nullptr */ 128 129 void sfread(const char *srcname, int srcline, void *s, size_t size, size_t num, FILE *fp, 130 const char *filename, Error *error); 131 132 /*! Read N lines of text from file into buffer and broadcast them 133 * 134 * This function uses repeated calls to fread() to fill a buffer with 135 * newline terminated text. If a line does not end in a newline (e.g. 136 * at the end of a file), it is added. The caller has to allocate an 137 * nlines by nmax sized buffer for storing the text data. 138 * Reading is done by MPI rank 0 of the given communicator only, and 139 * thus only MPI rank 0 needs to provide a valid file pointer. 140 * 141 * \param fp file pointer used by fread 142 * \param nlines number of lines to be read 143 * \param nmax maximum length of a single line 144 * \param buffer buffer for storing the data. 145 * \param me MPI rank of calling process in MPI communicator 146 * \param comm MPI communicator for broadcast 147 * \return 1 if the read was short, 0 if read was successful */ 148 149 int read_lines_from_file(FILE *fp, int nlines, int nmax, char *buffer, int me, MPI_Comm comm); 150 151 /*! Report if a requested style is in a package or may have a typo 152 * 153 * \param style type of style that is to be checked for 154 * \param name name of style that was not found 155 * \param lmp pointer to top-level LAMMPS class instance 156 * \return string usable for error messages */ 157 158 std::string check_packages_for_style(const std::string &style, const std::string &name, 159 LAMMPS *lmp); 160 161 /*! Convert a string to a floating point number while checking 162 * if it is a valid floating point or integer number 163 * 164 * \param file name of source file for error message 165 * \param line line number in source file for error message 166 * \param str string to be converted to number 167 * \param do_abort determines whether to call Error::one() or Error::all() 168 * \param lmp pointer to top-level LAMMPS class instance 169 * \return double precision floating point number */ 170 171 double numeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp); 172 173 /*! Convert a string to an integer number while checking 174 * if it is a valid integer number (regular int) 175 * 176 * \param file name of source file for error message 177 * \param line line number in source file for error message 178 * \param str string to be converted to number 179 * \param do_abort determines whether to call Error::one() or Error::all() 180 * \param lmp pointer to top-level LAMMPS class instance 181 * \return integer number (regular int) */ 182 183 int inumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp); 184 185 /*! Convert a string to an integer number while checking 186 * if it is a valid integer number (bigint) 187 * 188 * \param file name of source file for error message 189 * \param line line number in source file for error message 190 * \param str string to be converted to number 191 * \param do_abort determines whether to call Error::one() or Error::all() 192 * \param lmp pointer to top-level LAMMPS class instance 193 * \return integer number (bigint) */ 194 195 bigint bnumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp); 196 197 /*! Convert a string to an integer number while checking 198 * if it is a valid integer number (tagint) 199 * 200 * \param file name of source file for error message 201 * \param line line number in source file for error message 202 * \param str string to be converted to number 203 * \param do_abort determines whether to call Error::one() or Error::all() 204 * \param lmp pointer to top-level LAMMPS class instance 205 * \return integer number (tagint) */ 206 207 tagint tnumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp); 208 209 /*! Compute index bounds derived from a string with a possible wildcard 210 * 211 * This functions processes the string in *str* and set the values of *nlo* 212 * and *nhi* according to the following five cases: 213 * 214 * - a single number, i: nlo = i; nhi = i; 215 * - a single asterisk, \*: nlo = nmin; nhi = nmax; 216 * - a single number followed by an asterisk, i\*: nlo = i; nhi = nmax; 217 * - a single asterisk followed by a number, \*i: nlo = nmin; nhi = i; 218 * - two numbers with an asterisk in between. i\*j: nlo = i; nhi = j; 219 * 220 * \param file name of source file for error message 221 * \param line line number in source file for error message 222 * \param str string to be processed 223 * \param nmin smallest possible lower bound 224 * \param nmax largest allowed upper bound 225 * \param nlo lower bound 226 * \param nhi upper bound 227 * \param error pointer to Error class for out-of-bounds messages */ 228 229 template <typename TYPE> 230 void bounds(const char *file, int line, const std::string &str, bigint nmin, bigint nmax, 231 TYPE &nlo, TYPE &nhi, Error *error); 232 233 /*! Expand list of arguments when containing fix/compute wildcards 234 * 235 * This function searches the list of arguments in *arg* for strings 236 * of the kind c_ID[*] or f_ID[*] referring to computes or fixes. 237 * Any such strings are replaced by one or more strings with the 238 * '*' character replaced by the corresponding possible numbers as 239 * determined from the fix or compute instance. Other strings are 240 * just copied. If the *mode* parameter is set to 0, expand global 241 * vectors, but not global arrays; if it is set to 1, expand global 242 * arrays (by column) but not global vectors. 243 * 244 * If any expansion happens, the earg list and all its 245 * strings are new allocations and must be freed explicitly by the 246 * caller. Otherwise arg and earg will point to the same address 247 * and no explicit de-allocation is needed by the caller. 248 * 249 * \param file name of source file for error message 250 * \param line line number in source file for error message 251 * \param narg number of arguments in current list 252 * \param arg argument list, possibly containing wildcards 253 * \param mode select between global vectors(=0) and arrays (=1) 254 * \param earg new argument list with wildcards expanded 255 * \param lmp pointer to top-level LAMMPS class instance 256 * \return number of arguments in expanded list */ 257 258 int expand_args(const char *file, int line, int narg, char **arg, int mode, char **&earg, 259 LAMMPS *lmp); 260 261 /*! Make C-style copy of string in new storage 262 * 263 * This allocates a storage buffer and copies the C-style or 264 * C++ style string into it. The buffer is allocated with "new" 265 * and thus needs to be deallocated with "delete[]". 266 * 267 * \param text string that should be copied 268 * \return new buffer with copy of string */ 269 270 char *strdup(const std::string &text); 271 272 /*! Trim leading and trailing whitespace. Like TRIM() in Fortran. 273 * 274 * \param line string that should be trimmed 275 * \return new string without whitespace (string) */ 276 277 std::string trim(const std::string &line); 278 279 /*! Return string with anything from '#' onward removed 280 * 281 * \param line string that should be trimmed 282 * \return new string without comment (string) */ 283 284 std::string trim_comment(const std::string &line); 285 286 /*! Check if a string will likely have UTF-8 encoded characters 287 * 288 * UTF-8 uses the 7-bit standard ASCII table for the first 127 characters and 289 * all other characters are encoded as multiple bytes. For the multi-byte 290 * characters the first byte has either the highest two, three, or four bits 291 * set followed by a zero bit and followed by one, two, or three more bytes, 292 * respectively, where the highest bit is set and the second highest bit set 293 * to 0. The remaining bits combined are the character code, which is thus 294 * limited to 21-bits. 295 * 296 * For the sake of efficiency this test only checks if a character in the string 297 * has the highest bit set and thus is very likely an UTF-8 character. It will 298 * not be able to tell this this is a valid UTF-8 character or whether it is a 299 * 2-byte, 3-byte, or 4-byte character. 300 * 301 \verbatim embed:rst 302 303 *See also* 304 :cpp:func:`utils::utf8_subst` 305 306 \endverbatim 307 * \param line string that should be checked 308 * \return true if string contains UTF-8 encoded characters (bool) */ 309 has_utf8(const std::string & line)310 inline bool has_utf8(const std::string &line) 311 { 312 for (auto c : line) 313 if (c & 0x80U) return true; 314 return false; 315 } 316 317 /*! Replace known UTF-8 characters with ASCII equivalents 318 * 319 \verbatim embed:rst 320 321 *See also* 322 :cpp:func:`utils::has_utf8` 323 324 \endverbatim 325 * \param line string that should be converted 326 * \return new string with ascii replacements (string) */ 327 328 std::string utf8_subst(const std::string &line); 329 330 /*! Count words in string with custom choice of separating characters 331 * 332 * \param text string that should be searched 333 * \param separators string containing characters that will be treated as whitespace 334 * \return number of words found */ 335 336 size_t count_words(const std::string &text, const std::string &separators); 337 338 /*! Count words in string, ignore any whitespace matching " \t\r\n\f" 339 * 340 * \param text string that should be searched 341 * \return number of words found */ 342 343 size_t count_words(const std::string &text); 344 345 /*! Count words in C-string, ignore any whitespace matching " \t\r\n\f" 346 * 347 * \param text string that should be searched 348 * \return number of words found */ 349 350 size_t count_words(const char *text); 351 352 /*! Count words in a single line, trim anything from '#' onward 353 * 354 * \param text string that should be trimmed and searched 355 * \param separators string containing characters that will be treated as whitespace 356 * \return number of words found */ 357 358 size_t trim_and_count_words(const std::string &text, const std::string &separators = " \t\r\n\f"); 359 360 /*! Take text and split into non-whitespace words. 361 * 362 * This can handle strings with single and double quotes, escaped quotes, 363 * and escaped codes within quotes, but due to using an STL container and 364 * STL strings is rather slow because of making copies. Designed for 365 * parsing command lines and similar text and not for time critical 366 * processing. Use a tokenizer class if performance matters. 367 * 368 \verbatim embed:rst 369 370 *See also* 371 :cpp:class:`Tokenizer`, :cpp:class:`ValueTokenizer` 372 373 \endverbatim 374 * \param text string that should be split 375 * \return STL vector with the words */ 376 377 std::vector<std::string> split_words(const std::string &text); 378 379 /*! Take multi-line text and split into lines 380 * 381 * \param text string that should be split 382 * \return STL vector with the lines */ 383 std::vector<std::string> split_lines(const std::string &text); 384 385 /*! Check if string can be converted to valid integer 386 * 387 * \param str string that should be checked 388 * \return true, if string contains valid a integer, false otherwise */ 389 390 bool is_integer(const std::string &str); 391 392 /*! Check if string can be converted to valid floating-point number 393 * 394 * \param str string that should be checked 395 * \return true, if string contains valid number, false otherwise */ 396 397 bool is_double(const std::string &str); 398 399 /*! Check if string is a valid ID 400 * ID strings may contain only letters, numbers, and underscores. 401 * 402 * \param str string that should be checked 403 * \return true, if string contains valid id, false otherwise */ 404 405 bool is_id(const std::string &str); 406 407 /*! Try to detect pathname from FILE pointer. 408 * 409 * Currently supported on Linux, MacOS, and Windows, otherwise will report "(unknown)". 410 * 411 * \param buf storage buffer for pathname. output will be truncated if not large enough 412 * \param len size of storage buffer. output will be truncated to this length - 1 413 * \param fp FILE pointer struct from STDIO library for which we want to detect the name 414 * \return pointer to the storage buffer, i.e. buf */ 415 416 const char *guesspath(char *buf, int len, FILE *fp); 417 418 /*! Strip off leading part of path, return just the filename 419 * 420 * \param path file path 421 * \return file name */ 422 423 std::string path_basename(const std::string &path); 424 425 /*! Return the directory part of a path. Return "." if empty 426 * 427 * \param path file path 428 * \return directory name */ 429 430 std::string path_dirname(const std::string &path); 431 432 /*! Join two pathname segments 433 * 434 * This uses the forward slash '/' character unless LAMMPS is compiled 435 * for Windows where it used the equivalent backward slash '\\'. 436 * 437 * \param a first path 438 * \param b second path 439 * \return combined path */ 440 441 std::string path_join(const std::string &a, const std::string &b); 442 443 /*! Check if file exists and is readable 444 * 445 * \param path file path 446 * \return true if file exists and is readable */ 447 448 bool file_is_readable(const std::string &path); 449 450 /*! Determine full path of potential file. If file is not found in current directory, 451 * search directories listed in LAMMPS_POTENTIALS environment variable 452 * 453 * \param path file path 454 * \return full path to potential file */ 455 456 std::string get_potential_file_path(const std::string &path); 457 458 /*! Read potential file and return DATE field if it is present 459 * 460 * \param path file path 461 * \param potential_name name of potential that is being read 462 * \return DATE field if present */ 463 464 std::string get_potential_date(const std::string &path, const std::string &potential_name); 465 466 /*! Read potential file and return UNITS field if it is present 467 * 468 * \param path file path 469 * \param potential_name name of potential that is being read 470 * \return UNITS field if present */ 471 472 std::string get_potential_units(const std::string &path, const std::string &potential_name); 473 474 enum { NOCONVERT = 0, METAL2REAL = 1, REAL2METAL = 1 << 1 }; 475 enum { UNKNOWN = 0, ENERGY }; 476 477 /*! Return bitmask of available conversion factors for a given property 478 * 479 * \param property property to be converted 480 * \return bitmask indicating available conversions */ 481 482 int get_supported_conversions(const int property); 483 484 /*! Return unit conversion factor for given property and selected from/to units 485 * 486 * \param property property to be converted 487 * \param conversion constant indicating the conversion 488 * \return conversion factor */ 489 490 double get_conversion_factor(const int property, const int conversion); 491 492 /*! Open a potential file as specified by *name* 493 * 494 * If opening the file directly fails, the function will search for 495 * it in the list of folder pointed to by the environment variable 496 * ``LAMMPS_POTENTIALS`` (if it is set). 497 * 498 * If the potential file has a ``UNITS`` tag in the first line, the 499 * tag's value is compared to the current unit style setting. 500 * The behavior of the function then depends on the value of the 501 * *auto_convert* parameter. If it is a null pointer, then the unit 502 * values must match or else the open will fail with an error. Otherwise 503 * the bitmask that *auto_convert* points to is used check for 504 * compatibility with possible automatic conversions by the calling 505 * function. If compatible, the bitmask is set to the required 506 * conversion or ``utils::NOCONVERT``. 507 * 508 * \param name file- or pathname of the potential file 509 * \param lmp pointer to top-level LAMMPS class instance 510 * \param auto_convert pointer to unit conversion bitmask or ``nullptr`` 511 * \return FILE pointer of the opened potential file or ``nullptr`` */ 512 513 FILE *open_potential(const std::string &name, LAMMPS *lmp, int *auto_convert); 514 515 /*! Convert a time string to seconds 516 * 517 * The strings "off" and "unlimited" result in -1 518 * 519 * \param timespec a string in the following format: ([[HH:]MM:]SS) 520 * \return total in seconds */ 521 522 double timespec2seconds(const std::string ×pec); 523 524 /*! Convert a LAMMPS version date to a number 525 * 526 * This will generate a number YYYYMMDD from a date string 527 * (with or without blanks) that is suitable for numerical 528 * comparisons, i.e. later dates will generate a larger number. 529 * 530 * The day may or may not have a leading zero, the month 531 * is identified by the first 3 letters (so there may be more) 532 * and the year may be 2 or 4 digits (the missing 2 digits will 533 * be assumed as 20. That is 04 corresponds to 2004). 534 * 535 * No check is made whether the date is valid. 536 * 537 * \param date string in the format (Day Month Year) 538 * \return date code */ 539 540 int date2num(const std::string &date); 541 542 /*! Return current date as string 543 * 544 * This will generate a string containing the current date in YYYY-MM-DD format. 545 * 546 * \return string with current date */ 547 548 std::string current_date(); 549 550 /*! Binary search in a vector of ascending doubles of length N 551 * 552 * If the value is smaller than the smallest value in the vector, 0 is returned. 553 * If the value is larger or equal than the largest value in the vector, N-1 is returned. 554 * Otherwise the index that satisfies the condition 555 * 556 * haystack[index] <= value < haystack[index+1] 557 * 558 * is returned, i.e. a value from 1 to N-2. Note that if there are tied values in the 559 * haystack, always the larger index is returned as only that satisfied the condition. 560 * 561 * \param needle search value for which are are looking for the closest index 562 * \param n size of the haystack array 563 * \param haystack array with data in ascending order. 564 * \return index of value in the haystack array smaller or equal to needle */ 565 int binary_search(const double needle, const int n, const double *haystack); 566 567 /*! Custom merge sort implementation 568 * 569 * This function provides a custom upward hybrid merge sort 570 * implementation with support to pass an opaque pointer to 571 * the comparison function, e.g. for access to class members. 572 * This avoids having to use global variables. For improved 573 * performance, it uses an in-place insertion sort on initial 574 * chunks of up to 64 elements and switches to merge sort from 575 * then on. 576 * 577 * \param index Array with indices to be sorted 578 * \param num Length of the index array 579 * \param ptr Pointer to opaque object passed to comparison function 580 * \param comp Pointer to comparison function */ 581 582 void merge_sort(int *index, int num, void *ptr, int (*comp)(int, int, void *)); 583 } // namespace utils 584 } // namespace LAMMPS_NS 585 586 #endif 587 588 /* ERROR/WARNING messages: 589 590 */ 591