1 /* ----------------------------------------------------------------------
2    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
3    https://www.lammps.org/, Sandia National Laboratories
4    Steve Plimpton, sjplimp@sandia.gov
5 
6    Copyright (2003) Sandia Corporation.  Under the terms of Contract
7    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
8    certain rights in this software.  This software is distributed under
9    the GNU General Public License.
10 
11    See the README file in the top-level LAMMPS directory.
12 ------------------------------------------------------------------------- */
13 
14 #include "utils.h"
15 
16 #include "atom.h"
17 #include "comm.h"
18 #include "compute.h"
19 #include "error.h"
20 #include "fix.h"
21 #include "fmt/chrono.h"
22 #include "memory.h"
23 #include "modify.h"
24 #include "text_file_reader.h"
25 #include "tokenizer.h"
26 #include "update.h"
27 
28 #include <cctype>
29 #include <cerrno>
30 #include <cstring>
31 #include <ctime>
32 
33 #if defined(__linux__)
34 #include <unistd.h>    // for readlink
35 #endif
36 
37 #if defined(__APPLE__)
38 #include <fcntl.h>    // for fcntl
39 #include <sys/syslimits.h>
40 #endif
41 
42 #if defined(_WIN32)
43 // target Windows version is Windows 7 and later
44 #if defined(_WIN32_WINNT)
45 #undef _WIN32_WINNT
46 #endif
47 #define _WIN32_WINNT _WIN32_WINNT_WIN7
48 #include <io.h>
49 #include <windows.h>
50 #endif
51 
52 /*! \file utils.cpp */
53 
54 /*
55  * Mini regex-module adapted from https://github.com/kokke/tiny-regex-c
56  * which is in the public domain.
57  *
58  * Supports:
59  * ---------
60  *   '.'        Dot, matches any character
61  *   '^'        Start anchor, matches beginning of string
62  *   '$'        End anchor, matches end of string
63  *   '*'        Asterisk, match zero or more (greedy)
64  *   '+'        Plus, match one or more (greedy)
65  *   '?'        Question, match zero or one (non-greedy)
66  *   '[abc]'    Character class, match if one of {'a', 'b', 'c'}
67  *   '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z }
68  *   '\s'       Whitespace, \t \f \r \n \v and spaces
69  *   '\S'       Non-whitespace
70  *   '\w'       Alphanumeric, [a-zA-Z0-9_]
71  *   '\W'       Non-alphanumeric
72  *   '\d'       Digits, [0-9]
73  *   '\D'       Non-digits
74  *   '\i'       Integer chars, [0-9], '+' and '-'
75  *   '\I'       Non-integers
76  *   '\f'       Floating point number chars, [0-9], '.', 'e', 'E', '+' and '-'
77  *   '\F'       Non-floats
78  *
79  * *NOT* supported:
80  *   '[^abc]'   Inverted class
81  *   'a|b'      Branches
82  *   '(abc)+'   Groups
83  */
84 
85 extern "C" {
86 /** Match text against a (simplified) regular expression
87    * (regexp will be compiled automatically). */
88 static int re_match(const char *text, const char *pattern);
89 
90 /** Match find substring that matches a (simplified) regular expression
91    * (regexp will be compiled automatically). */
92 static int re_find(const char *text, const char *pattern, int *matchlen);
93 }
94 
95 ////////////////////////////////////////////////////////////////////////
96 // Merge sort support functions
97 
98 static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi, void *ptr,
99                      int (*comp)(int, int, void *));
100 static void insertion_sort(int *index, int num, void *ptr, int (*comp)(int, int, void *));
101 
102 ////////////////////////////////////////////////////////////////////////
103 
104 using namespace LAMMPS_NS;
105 
106 /** More flexible and specific matching of a string against a pattern.
107  *  This function is supposed to be a more safe, more specific and
108  *  simple to use API to find pattern matches. The purpose is to replace
109  *  uses of either strncmp() or strstr() in the code base to find
110  *  sub-strings safely. With strncmp() finding prefixes, the number of
111  *  characters to match must be counted, which can lead to errors,
112  *  while using "^pattern" will do the same with less problems.
113  *  Matching for suffixes using strstr() is not as specific as 'pattern$',
114  *  and complex matches, e.g. "^rigid.*\/small.*", to match all small
115  *  body optimized rigid fixes require only one test.
116  *
117  *  The use of std::string arguments allows for simple concatenation
118  *  even with char * type variables.
119  *  Example: utils::strmatch(text, std::string("^") + charptr)
120  */
strmatch(const std::string & text,const std::string & pattern)121 bool utils::strmatch(const std::string &text, const std::string &pattern)
122 {
123   const int pos = re_match(text.c_str(), pattern.c_str());
124   return (pos >= 0);
125 }
126 
127 /** This function is a companion function to utils::strmatch(). Arguments
128  *  and logic is the same, but instead of a boolean, it returns the
129  *  sub-string that matches the regex pattern.  There can be only one match.
130  *  This can be used as a more flexible alternative to strstr().
131  */
strfind(const std::string & text,const std::string & pattern)132 std::string utils::strfind(const std::string &text, const std::string &pattern)
133 {
134   int matchlen;
135   const int pos = re_find(text.c_str(), pattern.c_str(), &matchlen);
136   if ((pos >= 0) && (matchlen > 0))
137     return text.substr(pos, matchlen);
138   else
139     return "";
140 }
141 
142 /* specialization for the case of just a single string argument */
143 
logmesg(LAMMPS * lmp,const std::string & mesg)144 void utils::logmesg(LAMMPS *lmp, const std::string &mesg)
145 {
146   if (lmp->screen) fputs(mesg.c_str(), lmp->screen);
147   if (lmp->logfile) fputs(mesg.c_str(), lmp->logfile);
148 }
149 
fmtargs_logmesg(LAMMPS * lmp,fmt::string_view format,fmt::format_args args)150 void utils::fmtargs_logmesg(LAMMPS *lmp, fmt::string_view format, fmt::format_args args)
151 {
152   try {
153     logmesg(lmp, fmt::vformat(format, args));
154   } catch (fmt::format_error &e) {
155     logmesg(lmp, std::string(e.what()) + "\n");
156   }
157 }
158 
159 /* define this here, so we won't have to include the headers
160    everywhere and utils.h will more likely be included anyway. */
161 
getsyserror()162 std::string utils::getsyserror()
163 {
164   return std::string(strerror(errno));
165 }
166 
167 /** On Linux the folder /proc/self/fd holds symbolic links to the actual
168  * pathnames associated with each open file descriptor of the current process.
169  * On MacOS the same kind of information can be obtained using ``fcntl(fd,F_GETPATH,buf)``.
170  * On Windows we use ``GetFinalPathNameByHandleA()`` which is available with
171  * Windows Vista and later.
172  *
173  * This function is used to provide a filename with error messages in functions
174  * where the filename is not passed as an argument, but the FILE * pointer.
175  */
guesspath(char * buf,int len,FILE * fp)176 const char *utils::guesspath(char *buf, int len, FILE *fp)
177 {
178   memset(buf, 0, len);
179 
180 #if defined(__linux__)
181   int fd = fileno(fp);
182   // get pathname from /proc or copy (unknown)
183   if (readlink(fmt::format("/proc/self/fd/{}", fd).c_str(), buf, len - 1) <= 0)
184     strncpy(buf, "(unknown)", len - 1);
185 #elif defined(__APPLE__)
186   int fd = fileno(fp);
187   char filepath[PATH_MAX];
188   if (fcntl(fd, F_GETPATH, filepath) != -1)
189     strncpy(buf, filepath, len - 1);
190   else
191     strncpy(buf, "(unknown)", len - 1);
192 #elif defined(_WIN32)
193   char filepath[MAX_PATH];
194   HANDLE h = (HANDLE) _get_osfhandle(_fileno(fp));
195   if (GetFinalPathNameByHandleA(h, filepath, PATH_MAX, FILE_NAME_NORMALIZED) > 0)
196     strncpy(buf, filepath, len - 1);
197   else
198     strncpy(buf, "(unknown)", len - 1);
199 #else
200   strncpy(buf, "(unknown)", len - 1);
201 #endif
202   return buf;
203 }
204 
205 // read line into buffer. if line is too long keep reading until EOL or EOF
206 // but return only the first part with a newline at the end.
207 
fgets_trunc(char * buf,int size,FILE * fp)208 char *utils::fgets_trunc(char *buf, int size, FILE *fp)
209 {
210   constexpr int MAXDUMMY = 256;
211   char dummy[MAXDUMMY];
212   char *ptr = fgets(buf, size, fp);
213 
214   // EOF?
215   if (!ptr) return nullptr;
216 
217   int n = strlen(buf);
218 
219   // check the string being read in:
220   // - if string is shorter than the buffer make sure it has a final newline and return
221   // - if string is exactly the size of the buffer and has a final newline return
222   // - otherwise truncate with final newline and read into dummy buffer until EOF or newline is found
223   if (n < size - 1) {
224     if (buf[n - 1] != '\n') {
225       buf[n] = '\n';
226       buf[n + 1] = '\0';
227     }
228     return buf;
229   } else if (buf[n - 1] == '\n') {
230     return buf;
231   } else
232     buf[size - 2] = '\n';
233 
234   // continue reading into dummy buffer until end of line or file
235   do {
236     ptr = fgets(dummy, MAXDUMMY, fp);
237     if (ptr)
238       n = strlen(ptr);
239     else
240       n = 0;
241   } while (n == MAXDUMMY - 1 && ptr[MAXDUMMY - 1] != '\n');
242 
243   // return truncated chunk
244   return buf;
245 }
246 
247 /* like fgets() but aborts with an error or EOF is encountered */
sfgets(const char * srcname,int srcline,char * s,int size,FILE * fp,const char * filename,Error * error)248 void utils::sfgets(const char *srcname, int srcline, char *s, int size, FILE *fp,
249                    const char *filename, Error *error)
250 {
251   constexpr int MAXPATHLENBUF = 1024;
252   char *rv = fgets(s, size, fp);
253   if (rv == nullptr) {    // something went wrong
254     char buf[MAXPATHLENBUF];
255     std::string errmsg;
256 
257     // try to figure out the file name from the file pointer
258     if (!filename) filename = guesspath(buf, MAXPATHLENBUF, fp);
259 
260     if (feof(fp)) {
261       errmsg = "Unexpected end of file while reading file '";
262     } else if (ferror(fp)) {
263       errmsg = "Unexpected error while reading file '";
264     } else {
265       errmsg = "Unexpected short read while reading file '";
266     }
267     errmsg += filename;
268     errmsg += "'";
269 
270     if (error) error->one(srcname, srcline, errmsg);
271     if (s) *s = '\0';    // truncate string to empty in case error is null pointer
272   }
273   return;
274 }
275 
276 /* like fread() but aborts with an error or EOF is encountered */
sfread(const char * srcname,int srcline,void * s,size_t size,size_t num,FILE * fp,const char * filename,Error * error)277 void utils::sfread(const char *srcname, int srcline, void *s, size_t size, size_t num, FILE *fp,
278                    const char *filename, Error *error)
279 {
280   constexpr int MAXPATHLENBUF = 1024;
281   size_t rv = fread(s, size, num, fp);
282   if (rv != num) {    // something went wrong
283     char buf[MAXPATHLENBUF];
284     std::string errmsg;
285 
286     // try to figure out the file name from the file pointer
287     if (!filename) filename = guesspath(buf, MAXPATHLENBUF, fp);
288 
289     if (feof(fp)) {
290       errmsg = "Unexpected end of file while reading file '";
291     } else if (ferror(fp)) {
292       errmsg = "Unexpected error while reading file '";
293     } else {
294       errmsg = "Unexpected short read while reading file '";
295     }
296     errmsg += filename;
297     errmsg += "'";
298 
299     if (error) error->one(srcname, srcline, errmsg);
300   }
301   return;
302 }
303 
304 /* ------------------------------------------------------------------ */
305 
306 /* read N lines and broadcast */
read_lines_from_file(FILE * fp,int nlines,int nmax,char * buffer,int me,MPI_Comm comm)307 int utils::read_lines_from_file(FILE *fp, int nlines, int nmax, char *buffer, int me, MPI_Comm comm)
308 {
309   char *ptr = buffer;
310   *ptr = '\0';
311 
312   if (me == 0) {
313     if (fp) {
314       for (int i = 0; i < nlines; i++) {
315         ptr = fgets_trunc(ptr, nmax, fp);
316         if (!ptr) break;    // EOF?
317         // advance ptr to end of string
318         ptr += strlen(ptr);
319         // ensure buffer is null terminated. null char is start of next line.
320         *ptr = '\0';
321       }
322     }
323   }
324 
325   int n = strlen(buffer);
326   MPI_Bcast(&n, 1, MPI_INT, 0, comm);
327   if (n == 0) return 1;
328   MPI_Bcast(buffer, n + 1, MPI_CHAR, 0, comm);
329   return 0;
330 }
331 
332 /* ------------------------------------------------------------------ */
333 
check_packages_for_style(const std::string & style,const std::string & name,LAMMPS * lmp)334 std::string utils::check_packages_for_style(const std::string &style, const std::string &name,
335                                             LAMMPS *lmp)
336 {
337   std::string errmsg = "Unrecognized " + style + " style '" + name + "'";
338   const char *pkg = lmp->match_style(style.c_str(), name.c_str());
339 
340   if (pkg) {
341     errmsg += fmt::format(" is part of the {} package", pkg);
342     if (lmp->is_installed_pkg(pkg))
343       errmsg += ", but seems to be missing because of a dependency";
344     else
345       errmsg += " which is not enabled in this LAMMPS binary.";
346   }
347   return errmsg;
348 }
349 
350 /* ----------------------------------------------------------------------
351    read a floating point value from a string
352    generate an error if not a legitimate floating point value
353    called by various commands to check validity of their arguments
354 ------------------------------------------------------------------------- */
355 
numeric(const char * file,int line,const char * str,bool do_abort,LAMMPS * lmp)356 double utils::numeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp)
357 {
358   int n = 0;
359 
360   if (str) n = strlen(str);
361   if (n == 0) {
362     const char msg[] = "Expected floating point parameter instead of"
363                        " NULL or empty string in input script or data file";
364     if (do_abort)
365       lmp->error->one(file, line, msg);
366     else
367       lmp->error->all(file, line, msg);
368   }
369 
370   std::string buf(str);
371   if (has_utf8(buf)) buf = utf8_subst(buf);
372 
373   if (buf.find_first_not_of("0123456789-+.eE") != std::string::npos) {
374     std::string msg("Expected floating point parameter instead of '");
375     msg += buf + "' in input script or data file";
376     if (do_abort)
377       lmp->error->one(file, line, msg);
378     else
379       lmp->error->all(file, line, msg);
380   }
381 
382   return atof(buf.c_str());
383 }
384 
385 /* ----------------------------------------------------------------------
386    read an integer value from a string
387    generate an error if not a legitimate integer value
388    called by various commands to check validity of their arguments
389 ------------------------------------------------------------------------- */
390 
inumeric(const char * file,int line,const char * str,bool do_abort,LAMMPS * lmp)391 int utils::inumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp)
392 {
393   int n = 0;
394 
395   if (str) n = strlen(str);
396   if (n == 0) {
397     const char msg[] = "Expected integer parameter instead of"
398                        " NULL or empty string in input script or data file";
399     if (do_abort)
400       lmp->error->one(file, line, msg);
401     else
402       lmp->error->all(file, line, msg);
403   }
404 
405   std::string buf(str);
406   if (has_utf8(buf)) buf = utf8_subst(buf);
407 
408   if (buf.find_first_not_of("0123456789-+") != std::string::npos) {
409     std::string msg("Expected integer parameter instead of '");
410     msg += buf + "' in input script or data file";
411     if (do_abort)
412       lmp->error->one(file, line, msg);
413     else
414       lmp->error->all(file, line, msg);
415   }
416 
417   return atoi(buf.c_str());
418 }
419 
420 /* ----------------------------------------------------------------------
421    read a big integer value from a string
422    generate an error if not a legitimate integer value
423    called by various commands to check validity of their arguments
424 ------------------------------------------------------------------------- */
425 
bnumeric(const char * file,int line,const char * str,bool do_abort,LAMMPS * lmp)426 bigint utils::bnumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp)
427 {
428   int n = 0;
429 
430   if (str) n = strlen(str);
431   if (n == 0) {
432     const char msg[] = "Expected integer parameter instead of"
433                        " NULL or empty string in input script or data file";
434     if (do_abort)
435       lmp->error->one(file, line, msg);
436     else
437       lmp->error->all(file, line, msg);
438   }
439 
440   std::string buf(str);
441   if (has_utf8(buf)) buf = utf8_subst(buf);
442 
443   if (buf.find_first_not_of("0123456789-+") != std::string::npos) {
444     std::string msg("Expected integer parameter instead of '");
445     msg += buf + "' in input script or data file";
446     if (do_abort)
447       lmp->error->one(file, line, msg);
448     else
449       lmp->error->all(file, line, msg);
450   }
451 
452   return ATOBIGINT(buf.c_str());
453 }
454 
455 /* ----------------------------------------------------------------------
456    read a tag integer value from a string
457    generate an error if not a legitimate integer value
458    called by various commands to check validity of their arguments
459 ------------------------------------------------------------------------- */
460 
tnumeric(const char * file,int line,const char * str,bool do_abort,LAMMPS * lmp)461 tagint utils::tnumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp)
462 {
463   int n = 0;
464 
465   if (str) n = strlen(str);
466   if (n == 0) {
467     const char msg[] = "Expected integer parameter instead of"
468                        " NULL or empty string in input script or data file";
469     if (do_abort)
470       lmp->error->one(file, line, msg);
471     else
472       lmp->error->all(file, line, msg);
473   }
474 
475   std::string buf(str);
476   if (has_utf8(buf)) buf = utf8_subst(buf);
477 
478   if (buf.find_first_not_of("0123456789-+") != std::string::npos) {
479     std::string msg("Expected integer parameter instead of '");
480     msg += buf + "' in input script or data file";
481     if (do_abort)
482       lmp->error->one(file, line, msg);
483     else
484       lmp->error->all(file, line, msg);
485   }
486 
487   return ATOTAGINT(buf.c_str());
488 }
489 
490 /* ----------------------------------------------------------------------
491    compute bounds implied by numeric str with a possible wildcard asterisk
492 ------------------------------------------------------------------------- */
493 // clang-format off
494 template <typename TYPE>
bounds(const char * file,int line,const std::string & str,bigint nmin,bigint nmax,TYPE & nlo,TYPE & nhi,Error * error)495 void utils::bounds(const char *file, int line, const std::string &str,
496                    bigint nmin, bigint nmax, TYPE &nlo, TYPE &nhi, Error *error)
497 {
498   nlo = nhi = -1;
499 
500   // check for illegal charcters
501   size_t found = str.find_first_not_of("*-0123456789");
502   if (found != std::string::npos) {
503     if (error) error->all(file, line, fmt::format("Invalid range string: {}", str));
504     return;
505   }
506 
507   found = str.find_first_of('*');
508   if (found == std::string::npos) {    // contains no '*'
509     nlo = nhi = strtol(str.c_str(), nullptr, 10);
510   } else if (str.size() == 1) {    // is only '*'
511     nlo = nmin;
512     nhi = nmax;
513   } else if (found == 0) {    // is '*j'
514     nlo = nmin;
515     nhi = strtol(str.substr(1).c_str(), nullptr, 10);
516   } else if (str.size() == found + 1) {    // is 'i*'
517     nlo = strtol(str.c_str(), nullptr, 10);
518     nhi = nmax;
519   } else {    // is 'i*j'
520     nlo = strtol(str.c_str(), nullptr, 10);
521     nhi = strtol(str.substr(found + 1).c_str(), nullptr, 10);
522   }
523 
524   if (error) {
525     if ((nlo <= 0) || (nhi <= 0))
526       error->all(file, line, fmt::format("Invalid range string: {}", str));
527 
528     if (nlo < nmin)
529       error->all(file, line, fmt::format("Numeric index {} is out of bounds "
530                              "({}-{})", nlo, nmin, nmax));
531     else if (nhi > nmax)
532       error->all(file, line, fmt::format("Numeric index {} is out of bounds "
533                              "({}-{})", nhi, nmin, nmax));
534     else if (nlo > nhi)
535       error->all(file, line, fmt::format("Numeric index {} is out of bounds "
536                              "({}-{})", nlo, nmin, nhi));
537   }
538 }
539 
540 template void utils::bounds<>(const char *, int, const std::string &,
541                               bigint, bigint, int &, int &, Error *);
542 template void utils::bounds<>(const char *, int, const std::string &,
543                               bigint, bigint, long &, long &, Error *);
544 template void utils::bounds<>(const char *, int, const std::string &,
545                               bigint, bigint, long long &, long long &, Error *);
546 // clang-format on
547 
548 /* -------------------------------------------------------------------------
549    Expand list of arguments in arg to earg if arg contains wildcards
550 ------------------------------------------------------------------------- */
551 
expand_args(const char * file,int line,int narg,char ** arg,int mode,char ** & earg,LAMMPS * lmp)552 int utils::expand_args(const char *file, int line, int narg, char **arg, int mode, char **&earg,
553                        LAMMPS *lmp)
554 {
555   int iarg;
556 
557   char *ptr = nullptr;
558   for (iarg = 0; iarg < narg; iarg++) {
559     ptr = strchr(arg[iarg], '*');
560     if (ptr) break;
561   }
562 
563   if (!ptr) {
564     earg = arg;
565     return narg;
566   }
567 
568   // maxarg should always end up equal to newarg, so caller can free earg
569 
570   int maxarg = narg - iarg;
571   earg = (char **) lmp->memory->smalloc(maxarg * sizeof(char *), "input:earg");
572 
573   int newarg = 0, expandflag, nlo, nhi, nmax;
574   std::string id, wc, tail;
575 
576   for (iarg = 0; iarg < narg; iarg++) {
577     std::string word(arg[iarg]);
578     expandflag = 0;
579 
580     // match compute, fix, or custom property array reference with a '*' wildcard
581     // number range in the first pair of square brackets
582 
583     if (strmatch(word, "^[cf]_\\w+\\[\\d*\\*\\d*\\]") ||
584         strmatch(word, "^[id]2_\\w+\\[\\d*\\*\\d*\\]")) {
585 
586       // split off the compute/fix/property ID, the wildcard and trailing text
587 
588       size_t first = word.find('[');
589       size_t second = word.find(']', first + 1);
590       if (word[1] == '2')
591         id = word.substr(3, first - 3);
592       else
593         id = word.substr(2, first - 2);
594 
595       wc = word.substr(first + 1, second - first - 1);
596       tail = word.substr(second + 1);
597 
598       // compute
599 
600       if (word[0] == 'c') {
601         int icompute = lmp->modify->find_compute(id);
602 
603         // check for global vector/array, peratom array, local array
604 
605         if (icompute >= 0) {
606           Compute *compute = lmp->modify->compute[icompute];
607           if (mode == 0 && compute->vector_flag) {
608             nmax = compute->size_vector;
609             expandflag = 1;
610           } else if (mode == 1 && compute->array_flag) {
611             nmax = compute->size_array_cols;
612             expandflag = 1;
613           } else if (compute->peratom_flag && compute->size_peratom_cols) {
614             nmax = compute->size_peratom_cols;
615             expandflag = 1;
616           } else if (compute->local_flag && compute->size_local_cols) {
617             nmax = compute->size_local_cols;
618             expandflag = 1;
619           }
620         }
621 
622         // fix
623 
624       } else if (word[0] == 'f') {
625         int ifix = lmp->modify->find_fix(id);
626 
627         // check for global vector/array, peratom array, local array
628 
629         if (ifix >= 0) {
630           Fix *fix = lmp->modify->fix[ifix];
631 
632           if (mode == 0 && fix->vector_flag) {
633             nmax = fix->size_vector;
634             expandflag = 1;
635           } else if (mode == 1 && fix->array_flag) {
636             nmax = fix->size_array_cols;
637             expandflag = 1;
638           } else if (fix->peratom_flag && fix->size_peratom_cols) {
639             nmax = fix->size_peratom_cols;
640             expandflag = 1;
641           } else if (fix->local_flag && fix->size_local_cols) {
642             nmax = fix->size_local_cols;
643             expandflag = 1;
644           }
645         }
646 
647         // only match custom array reference with a '*' wildcard
648         // number range in the first pair of square brackets
649 
650       } else if ((word[0] == 'i') || (word[0] == 'd')) {
651         int flag, cols;
652         int icustom = lmp->atom->find_custom(id.c_str(), flag, cols);
653 
654         if ((icustom >= 0) && (mode == 1) && (cols > 0)) {
655 
656           // check for custom per-atom array
657 
658           if (((word[0] == 'i') && (flag == 0)) || ((word[0] == 'd') && (flag == 1))) {
659             nmax = cols;
660             expandflag = 1;
661           }
662         }
663       }
664     }
665 
666     // expansion will take place
667 
668     if (expandflag) {
669 
670       // expand wild card string to nlo/nhi numbers
671       utils::bounds(file, line, wc, 1, nmax, nlo, nhi, lmp->error);
672 
673       if (newarg + nhi - nlo + 1 > maxarg) {
674         maxarg += nhi - nlo + 1;
675         earg = (char **) lmp->memory->srealloc(earg, maxarg * sizeof(char *), "input:earg");
676       }
677 
678       for (int index = nlo; index <= nhi; index++) {
679         earg[newarg] = utils::strdup(fmt::format("{}2_{}[{}]{}", word[0], id, index, tail));
680         newarg++;
681       }
682     } else {
683       // no expansion: duplicate original string
684       if (newarg == maxarg) {
685         maxarg++;
686         earg = (char **) lmp->memory->srealloc(earg, maxarg * sizeof(char *), "input:earg");
687       }
688       earg[newarg] = utils::strdup(word);
689       newarg++;
690     }
691   }
692 
693   //printf("NEWARG %d\n",newarg);
694   //for (int i = 0; i < newarg; i++)
695   //  printf("  arg %d: %s\n",i,earg[i]);
696 
697   return newarg;
698 }
699 
700 /* ----------------------------------------------------------------------
701    Make copy of string in new storage. Works like the (non-portable)
702    C-style strdup() but also accepts a C++ string as argument.
703 ------------------------------------------------------------------------- */
704 
strdup(const std::string & text)705 char *utils::strdup(const std::string &text)
706 {
707   char *tmp = new char[text.size() + 1];
708   strcpy(tmp, text.c_str());    // NOLINT
709   return tmp;
710 }
711 
712 /* ----------------------------------------------------------------------
713    Return string without leading or trailing whitespace
714 ------------------------------------------------------------------------- */
715 
trim(const std::string & line)716 std::string utils::trim(const std::string &line)
717 {
718   int beg = re_match(line.c_str(), "\\S+");
719   int end = re_match(line.c_str(), "\\s+$");
720   if (beg < 0) beg = 0;
721   if (end < 0) end = line.size();
722 
723   return line.substr(beg, end - beg);
724 }
725 
726 /* ----------------------------------------------------------------------
727    Return string without trailing # comment
728 ------------------------------------------------------------------------- */
729 
trim_comment(const std::string & line)730 std::string utils::trim_comment(const std::string &line)
731 {
732   auto end = line.find_first_of('#');
733   if (end != std::string::npos) { return line.substr(0, end); }
734   return std::string(line);
735 }
736 
737 /* ----------------------------------------------------------------------
738    Replace UTF-8 encoded chars with known ASCII equivalents
739 ------------------------------------------------------------------------- */
740 
utf8_subst(const std::string & line)741 std::string utils::utf8_subst(const std::string &line)
742 {
743   const unsigned char *const in = (const unsigned char *) line.c_str();
744   const int len = line.size();
745   std::string out;
746 
747   for (int i = 0; i < len; ++i) {
748 
749     // UTF-8 2-byte character
750     if ((in[i] & 0xe0U) == 0xc0U) {
751       if ((i + 1) < len) {
752         // NON-BREAKING SPACE (U+00A0)
753         if ((in[i] == 0xc2U) && (in[i + 1] == 0xa0U)) out += ' ', ++i;
754         // MODIFIER LETTER PLUS SIGN (U+02D6)
755         if ((in[i] == 0xcbU) && (in[i + 1] == 0x96U)) out += '+', ++i;
756         // MODIFIER LETTER MINUS SIGN (U+02D7)
757         if ((in[i] == 0xcbU) && (in[i + 1] == 0x97U)) out += '-', ++i;
758       }
759       // UTF-8 3-byte character
760     } else if ((in[i] & 0xf0U) == 0xe0U) {
761       if ((i + 2) < len) {
762         // EN QUAD (U+2000)
763         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x80U)) out += ' ', i += 2;
764         // EM QUAD (U+2001)
765         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x81U)) out += ' ', i += 2;
766         // EN SPACE (U+2002)
767         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x82U)) out += ' ', i += 2;
768         // EM SPACE (U+2003)
769         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x83U)) out += ' ', i += 2;
770         // THREE-PER-EM SPACE (U+2004)
771         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x84U)) out += ' ', i += 2;
772         // FOUR-PER-EM SPACE (U+2005)
773         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x85U)) out += ' ', i += 2;
774         // SIX-PER-EM SPACE (U+2006)
775         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x86U)) out += ' ', i += 2;
776         // FIGURE SPACE (U+2007)
777         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x87U)) out += ' ', i += 2;
778         // PUNCTUATION SPACE (U+2008)
779         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x88U)) out += ' ', i += 2;
780         // THIN SPACE (U+2009)
781         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x89U)) out += ' ', i += 2;
782         // HAIR SPACE (U+200A)
783         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x8aU)) out += ' ', i += 2;
784         // ZERO WIDTH SPACE (U+200B)
785         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x8bU)) out += ' ', i += 2;
786         // LEFT SINGLE QUOTATION MARK (U+2018)
787         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x98U)) out += '\'', i += 2;
788         // RIGHT SINGLE QUOTATION MARK (U+2019)
789         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x99U)) out += '\'', i += 2;
790         // LEFT DOUBLE QUOTATION MARK (U+201C)
791         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x9cU)) out += '"', i += 2;
792         // RIGHT DOUBLE QUOTATION MARK (U+201D)
793         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x9dU)) out += '"', i += 2;
794         // NARROW NO-BREAK SPACE (U+202F)
795         if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0xafU)) out += ' ', i += 2;
796         // WORD JOINER (U+2060)
797         if ((in[i] == 0xe2U) && (in[i + 1] == 0x81U) && (in[i + 2] == 0xa0U)) out += ' ', i += 2;
798         // INVISIBLE SEPARATOR (U+2063)
799         if ((in[i] == 0xe2U) && (in[i + 1] == 0x81U) && (in[i + 2] == 0xa3U)) out += ' ', i += 2;
800         // INVISIBLE PLUS (U+2064)
801         if ((in[i] == 0xe2U) && (in[i + 1] == 0x81U) && (in[i + 2] == 0xa4U)) out += '+', i += 2;
802         // MINUS SIGN (U+2212)
803         if ((in[i] == 0xe2U) && (in[i + 1] == 0x88U) && (in[i + 2] == 0x92U)) out += '-', i += 2;
804         // ZERO WIDTH NO-BREAK SPACE (U+FEFF)
805         if ((in[i] == 0xefU) && (in[i + 1] == 0xbbU) && (in[i + 2] == 0xbfU)) out += ' ', i += 2;
806       }
807       // UTF-8 4-byte character
808     } else if ((in[i] & 0xf8U) == 0xf0U) {
809       if ((i + 3) < len) { ; }
810     } else
811       out += in[i];
812   }
813   return out;
814 }
815 
816 /* ----------------------------------------------------------------------
817    return number of words
818 ------------------------------------------------------------------------- */
819 
count_words(const char * text)820 size_t utils::count_words(const char *text)
821 {
822   size_t count = 0;
823   const char *buf = text;
824   char c = *buf;
825 
826   while (c) {
827     if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f') {
828       c = *++buf;
829       continue;
830     };
831 
832     ++count;
833     c = *++buf;
834 
835     while (c) {
836       if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f') { break; }
837       c = *++buf;
838     }
839   }
840 
841   return count;
842 }
843 
844 /* ----------------------------------------------------------------------
845    return number of words
846 ------------------------------------------------------------------------- */
847 
count_words(const std::string & text)848 size_t utils::count_words(const std::string &text)
849 {
850   return utils::count_words(text.c_str());
851 }
852 
853 /* ----------------------------------------------------------------------
854    Return number of words
855 ------------------------------------------------------------------------- */
856 
count_words(const std::string & text,const std::string & separators)857 size_t utils::count_words(const std::string &text, const std::string &separators)
858 {
859   size_t count = 0;
860   size_t start = text.find_first_not_of(separators);
861 
862   while (start != std::string::npos) {
863     size_t end = text.find_first_of(separators, start);
864     ++count;
865 
866     if (end == std::string::npos) {
867       return count;
868     } else {
869       start = text.find_first_not_of(separators, end + 1);
870     }
871   }
872   return count;
873 }
874 
875 /* ----------------------------------------------------------------------
876    Trim comment from string and return number of words
877 ------------------------------------------------------------------------- */
878 
trim_and_count_words(const std::string & text,const std::string & separators)879 size_t utils::trim_and_count_words(const std::string &text, const std::string &separators)
880 {
881   return utils::count_words(utils::trim_comment(text), separators);
882 }
883 
884 /* ----------------------------------------------------------------------
885    Convert string into words on whitespace while handling single and
886    double quotes.
887 ------------------------------------------------------------------------- */
split_words(const std::string & text)888 std::vector<std::string> utils::split_words(const std::string &text)
889 {
890   std::vector<std::string> list;
891   const char *buf = text.c_str();
892   std::size_t beg = 0;
893   std::size_t len = 0;
894   std::size_t add = 0;
895   char c = *buf;
896 
897   while (c) {
898     // leading whitespace
899     if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f') {
900       c = *++buf;
901       ++beg;
902       continue;
903     };
904     len = 0;
905 
906   // handle escaped/quoted text.
907   quoted:
908 
909     // handle single quote
910     if (c == '\'') {
911       ++beg;
912       add = 1;
913       c = *++buf;
914       while (((c != '\'') && (c != '\0')) || ((c == '\\') && (buf[1] == '\''))) {
915         if ((c == '\\') && (buf[1] == '\'')) {
916           ++buf;
917           ++len;
918         }
919         c = *++buf;
920         ++len;
921       }
922       if (c != '\'') ++len;
923       c = *++buf;
924 
925       // handle triple double quotation marks
926     } else if ((c == '"') && (buf[1] == '"') && (buf[2] == '"') && (buf[3] != '"')) {
927       len = 3;
928       add = 1;
929       buf += 3;
930       c = *buf;
931 
932       // handle double quote
933     } else if (c == '"') {
934       ++beg;
935       add = 1;
936       c = *++buf;
937       while (((c != '"') && (c != '\0')) || ((c == '\\') && (buf[1] == '"'))) {
938         if ((c == '\\') && (buf[1] == '"')) {
939           ++buf;
940           ++len;
941         }
942         c = *++buf;
943         ++len;
944       }
945       if (c != '"') ++len;
946       c = *++buf;
947     }
948 
949     // unquoted
950     while (1) {
951       if ((c == '\'') || (c == '"')) goto quoted;
952       // skip escaped quote
953       if ((c == '\\') && ((buf[1] == '\'') || (buf[1] == '"'))) {
954         ++buf;
955         ++len;
956         c = *++buf;
957         ++len;
958       }
959       if ((c == ' ') || (c == '\t') || (c == '\r') || (c == '\n') || (c == '\f') || (c == '\0')) {
960         list.push_back(text.substr(beg, len));
961         beg += len + add;
962         break;
963       }
964       c = *++buf;
965       ++len;
966     }
967   }
968   return list;
969 }
970 
971 /* ----------------------------------------------------------------------
972    Convert multi-line string into lines
973 ------------------------------------------------------------------------- */
split_lines(const std::string & text)974 std::vector<std::string> utils::split_lines(const std::string &text)
975 {
976   return Tokenizer(text, "\n").as_vector();
977 }
978 
979 /* ----------------------------------------------------------------------
980    Return whether string is a valid integer number
981 ------------------------------------------------------------------------- */
982 
is_integer(const std::string & str)983 bool utils::is_integer(const std::string &str)
984 {
985   if (str.empty()) return false;
986 
987   for (auto c : str) {
988     if (isdigit(c) || c == '-' || c == '+') continue;
989     return false;
990   }
991   return true;
992 }
993 
994 /* ----------------------------------------------------------------------
995    Return whether string is a valid floating-point number
996 ------------------------------------------------------------------------- */
997 
is_double(const std::string & str)998 bool utils::is_double(const std::string &str)
999 {
1000   if (str.empty()) return false;
1001 
1002   for (auto c : str) {
1003     if (isdigit(c)) continue;
1004     if (c == '-' || c == '+' || c == '.') continue;
1005     if (c == 'e' || c == 'E') continue;
1006     return false;
1007   }
1008   return true;
1009 }
1010 
1011 /* ----------------------------------------------------------------------
1012    Return whether string is a valid ID string
1013 ------------------------------------------------------------------------- */
1014 
is_id(const std::string & str)1015 bool utils::is_id(const std::string &str)
1016 {
1017   if (str.empty()) return false;
1018 
1019   for (auto c : str) {
1020     if (isalnum(c) || (c == '_')) continue;
1021     return false;
1022   }
1023   return true;
1024 }
1025 
1026 /* ----------------------------------------------------------------------
1027    strip off leading part of path, return just the filename
1028 ------------------------------------------------------------------------- */
1029 
path_basename(const std::string & path)1030 std::string utils::path_basename(const std::string &path)
1031 {
1032 #if defined(_WIN32)
1033   size_t start = path.find_last_of("/\\");
1034 #else
1035   size_t start = path.find_last_of('/');
1036 #endif
1037 
1038   if (start == std::string::npos) {
1039     start = 0;
1040   } else {
1041     start += 1;
1042   }
1043 
1044   return path.substr(start);
1045 }
1046 
1047 /* ----------------------------------------------------------------------
1048    Return only the leading part of a path, return just the directory
1049 ------------------------------------------------------------------------- */
1050 
path_dirname(const std::string & path)1051 std::string utils::path_dirname(const std::string &path)
1052 {
1053 #if defined(_WIN32)
1054   size_t start = path.find_last_of("/\\");
1055 #else
1056   size_t start = path.find_last_of('/');
1057 #endif
1058 
1059   if (start == std::string::npos) return ".";
1060 
1061   return path.substr(0, start);
1062 }
1063 
1064 /* ----------------------------------------------------------------------
1065    join two paths
1066 ------------------------------------------------------------------------- */
1067 
path_join(const std::string & a,const std::string & b)1068 std::string utils::path_join(const std::string &a, const std::string &b)
1069 {
1070 #if defined(_WIN32)
1071   return fmt::format("{}\\{}", a, b);
1072 #else
1073   return fmt::format("{}/{}", a, b);
1074 #endif
1075 }
1076 
1077 /* ----------------------------------------------------------------------
1078    try to open file for reading
1079 ------------------------------------------------------------------------- */
1080 
file_is_readable(const std::string & path)1081 bool utils::file_is_readable(const std::string &path)
1082 {
1083   FILE *fp = fopen(path.c_str(), "r");
1084   if (fp) {
1085     fclose(fp);
1086     return true;
1087   }
1088   return false;
1089 }
1090 
1091 /* ----------------------------------------------------------------------
1092    try to find potential file as specified by name
1093    search current directory and the LAMMPS_POTENTIALS directory if
1094    specified
1095 ------------------------------------------------------------------------- */
1096 
get_potential_file_path(const std::string & path)1097 std::string utils::get_potential_file_path(const std::string &path)
1098 {
1099   std::string filepath = path;
1100   std::string filename = utils::path_basename(path);
1101 
1102   if (utils::file_is_readable(filepath)) {
1103     return filepath;
1104   } else {
1105     // try the environment variable directory
1106     const char *var = getenv("LAMMPS_POTENTIALS");
1107 
1108     if (var != nullptr) {
1109 #if defined(_WIN32)
1110       Tokenizer dirs(var, ";");
1111 #else
1112       Tokenizer dirs(var, ":");
1113 #endif
1114       while (dirs.has_next()) {
1115         auto pot = utils::path_basename(filepath);
1116         auto dir = dirs.next();
1117         filepath = utils::path_join(dir, pot);
1118 
1119         if (utils::file_is_readable(filepath)) { return filepath; }
1120       }
1121     }
1122   }
1123   return "";
1124 }
1125 
1126 /* ----------------------------------------------------------------------
1127    read first line of potential file
1128    if it has a DATE field, return the following word
1129 ------------------------------------------------------------------------- */
1130 
get_potential_date(const std::string & path,const std::string & potential_name)1131 std::string utils::get_potential_date(const std::string &path, const std::string &potential_name)
1132 {
1133   TextFileReader reader(path, potential_name);
1134   reader.ignore_comments = false;
1135 
1136   char *line = reader.next_line();
1137   if (line == nullptr) return "";
1138   Tokenizer words(line);
1139   while (words.has_next()) {
1140     if (words.next() == "DATE:") {
1141       if (words.has_next()) return words.next();
1142     }
1143   }
1144   return "";
1145 }
1146 
1147 /* ----------------------------------------------------------------------
1148    read first line of potential file
1149    if it has UNITS field, return following word
1150 ------------------------------------------------------------------------- */
1151 
get_potential_units(const std::string & path,const std::string & potential_name)1152 std::string utils::get_potential_units(const std::string &path, const std::string &potential_name)
1153 {
1154   TextFileReader reader(path, potential_name);
1155   reader.ignore_comments = false;
1156 
1157   char *line = reader.next_line();
1158   if (line == nullptr) return "";
1159   Tokenizer words(line);
1160   while (words.has_next()) {
1161     if (words.next() == "UNITS:") {
1162       if (words.has_next()) return words.next();
1163     }
1164   }
1165   return "";
1166 }
1167 
1168 /* ----------------------------------------------------------------------
1169    return bitmask of supported conversions for a given property
1170 ------------------------------------------------------------------------- */
get_supported_conversions(const int property)1171 int utils::get_supported_conversions(const int property)
1172 {
1173   if (property == ENERGY)
1174     return METAL2REAL | REAL2METAL;
1175   else
1176     return NOCONVERT;
1177 }
1178 
1179 /* ----------------------------------------------------------------------
1180    return conversion factor for a given property and conversion setting
1181    return 0.0 if unknown.
1182 ------------------------------------------------------------------------- */
1183 
get_conversion_factor(const int property,const int conversion)1184 double utils::get_conversion_factor(const int property, const int conversion)
1185 {
1186   if (property == ENERGY) {
1187     if (conversion == NOCONVERT) {
1188       return 1.0;
1189     } else if (conversion == METAL2REAL) {
1190       return 23.060549;
1191     } else if (conversion == REAL2METAL) {
1192       return 1.0 / 23.060549;
1193     }
1194   }
1195   return 0.0;
1196 }
1197 
1198 /* ----------------------------------------------------------------------
1199    open a potential file as specified by name
1200    if fails, search in dir specified by env variable LAMMPS_POTENTIALS
1201 ------------------------------------------------------------------------- */
1202 
open_potential(const std::string & name,LAMMPS * lmp,int * auto_convert)1203 FILE *utils::open_potential(const std::string &name, LAMMPS *lmp, int *auto_convert)
1204 {
1205   auto error = lmp->error;
1206   auto me = lmp->comm->me;
1207 
1208   std::string filepath = get_potential_file_path(name);
1209 
1210   if (!filepath.empty()) {
1211     std::string unit_style = lmp->update->unit_style;
1212     std::string date = get_potential_date(filepath, "potential");
1213     std::string units = get_potential_units(filepath, "potential");
1214 
1215     if (!date.empty() && (me == 0))
1216       logmesg(lmp, "Reading potential file {} with DATE: {}\n", name, date);
1217 
1218     if (auto_convert == nullptr) {
1219       if (!units.empty() && (units != unit_style) && (me == 0)) {
1220         error->one(FLERR, "Potential file {} requires {} units but {} units are in use", name,
1221                    units, unit_style);
1222         return nullptr;
1223       }
1224     } else {
1225       if (units.empty() || units == unit_style) {
1226         *auto_convert = NOCONVERT;
1227       } else {
1228         if ((units == "metal") && (unit_style == "real") && (*auto_convert & METAL2REAL)) {
1229           *auto_convert = METAL2REAL;
1230         } else if ((units == "real") && (unit_style == "metal") && (*auto_convert & REAL2METAL)) {
1231           *auto_convert = REAL2METAL;
1232         } else {
1233           error->one(FLERR, "Potential file {} requires {} units but {} units are in use", name,
1234                      units, unit_style);
1235           return nullptr;
1236         }
1237       }
1238       if ((*auto_convert != NOCONVERT) && (me == 0))
1239         error->warning(FLERR, "Converting potential file in {} units to {} units", units,
1240                        unit_style);
1241     }
1242     return fopen(filepath.c_str(), "r");
1243   }
1244   return nullptr;
1245 }
1246 
1247 /* ----------------------------------------------------------------------
1248    convert a timespec ([[HH:]MM:]SS) to seconds
1249    the strings "off" and "unlimited" result in -1.0;
1250 ------------------------------------------------------------------------- */
1251 
timespec2seconds(const std::string & timespec)1252 double utils::timespec2seconds(const std::string &timespec)
1253 {
1254   double vals[3];
1255   int i = 0;
1256 
1257   // first handle allowed textual inputs
1258   if (timespec == "off") return -1.0;
1259   if (timespec == "unlimited") return -1.0;
1260 
1261   vals[0] = vals[1] = vals[2] = 0;
1262 
1263   ValueTokenizer values(timespec, ":");
1264 
1265   try {
1266     for (i = 0; i < 3; i++) {
1267       if (!values.has_next()) break;
1268       vals[i] = values.next_int();
1269     }
1270   } catch (TokenizerException &e) {
1271     return -1.0;
1272   }
1273 
1274   if (i == 3)
1275     return (vals[0] * 60 + vals[1]) * 60 + vals[2];
1276   else if (i == 2)
1277     return vals[0] * 60 + vals[1];
1278   return vals[0];
1279 }
1280 
1281 /* ----------------------------------------------------------------------
1282    convert a LAMMPS version date (1Jan01) to a number
1283 ------------------------------------------------------------------------- */
1284 
date2num(const std::string & date)1285 int utils::date2num(const std::string &date)
1286 {
1287   std::size_t found = date.find_first_not_of("0123456789 ");
1288   int num = strtol(date.substr(0, found).c_str(), nullptr, 10);
1289   auto month = date.substr(found);
1290   found = month.find_first_of("0123456789 ");
1291   num += strtol(month.substr(found).c_str(), nullptr, 10) * 10000;
1292   if (num < 1000000) num += 20000000;
1293 
1294   if (strmatch(month, "^Jan"))
1295     num += 100;
1296   else if (strmatch(month, "^Feb"))
1297     num += 200;
1298   else if (strmatch(month, "^Mar"))
1299     num += 300;
1300   else if (strmatch(month, "^Apr"))
1301     num += 400;
1302   else if (strmatch(month, "^May"))
1303     num += 500;
1304   else if (strmatch(month, "^Jun"))
1305     num += 600;
1306   else if (strmatch(month, "^Jul"))
1307     num += 700;
1308   else if (strmatch(month, "^Aug"))
1309     num += 800;
1310   else if (strmatch(month, "^Sep"))
1311     num += 900;
1312   else if (strmatch(month, "^Oct"))
1313     num += 1000;
1314   else if (strmatch(month, "^Nov"))
1315     num += 1100;
1316   else if (strmatch(month, "^Dec"))
1317     num += 1200;
1318   return num;
1319 }
1320 
1321 /* ----------------------------------------------------------------------
1322    get formatted string of current date from fmtlib
1323 ------------------------------------------------------------------------- */
1324 
current_date()1325 std::string utils::current_date()
1326 {
1327   time_t tv = time(nullptr);
1328   std::tm today = fmt::localtime(tv);
1329   return fmt::format("{:%Y-%m-%d}", today);
1330 }
1331 
1332 /* ----------------------------------------------------------------------
1333    binary search in vector of ascending doubles
1334 ------------------------------------------------------------------------- */
1335 
binary_search(const double needle,const int n,const double * haystack)1336 int utils::binary_search(const double needle, const int n, const double *haystack)
1337 {
1338   int lo = 0;
1339   int hi = n - 1;
1340 
1341   if (needle < haystack[lo]) return lo;
1342   if (needle >= haystack[hi]) return hi;
1343 
1344   // insure haystack[lo] <= needle < haystack[hi] at every iteration
1345   // done when lo,hi are adjacent
1346 
1347   int index = (lo + hi) / 2;
1348   while (lo < hi - 1) {
1349     if (needle < haystack[index])
1350       hi = index;
1351     else if (needle >= haystack[index])
1352       lo = index;
1353     index = (lo + hi) / 2;
1354   }
1355 
1356   return index;
1357 }
1358 
1359 /* ----------------------------------------------------------------------
1360  * Merge sort part 1: Loop over sublists doubling in size with each iteration.
1361  * Pre-sort small sublists with insertion sort for better overall performance.
1362 ------------------------------------------------------------------------- */
1363 
merge_sort(int * index,int num,void * ptr,int (* comp)(int,int,void *))1364 void utils::merge_sort(int *index, int num, void *ptr, int (*comp)(int, int, void *))
1365 {
1366   if (num < 2) return;
1367 
1368   int chunk, i, j;
1369 
1370   // do insertion sort on chunks of up to 64 elements
1371 
1372   chunk = 64;
1373   for (i = 0; i < num; i += chunk) {
1374     j = (i + chunk > num) ? num - i : chunk;
1375     insertion_sort(index + i, j, ptr, comp);
1376   }
1377 
1378   // already done?
1379 
1380   if (chunk >= num) return;
1381 
1382   // continue with merge sort on the pre-sorted chunks.
1383   // we need an extra buffer for temporary storage and two
1384   // pointers to operate on, so we can swap the pointers
1385   // rather than copying to the hold buffer in each pass
1386 
1387   int *buf = new int[num];
1388   int *dest = index;
1389   int *hold = buf;
1390 
1391   while (chunk < num) {
1392     int m;
1393 
1394     // swap hold and destination buffer
1395 
1396     int *tmp = dest;
1397     dest = hold;
1398     hold = tmp;
1399 
1400     // merge from hold array to destination array
1401 
1402     for (i = 0; i < num - 1; i += 2 * chunk) {
1403       j = i + 2 * chunk;
1404       if (j > num) j = num;
1405       m = i + chunk;
1406       if (m > num) m = num;
1407       do_merge(dest, hold, i, m, m, j, ptr, comp);
1408     }
1409 
1410     // copy all indices not handled by the chunked merge sort loop
1411 
1412     for (; i < num; i++) dest[i] = hold[i];
1413     chunk *= 2;
1414   }
1415 
1416   // if the final sorted data is in buf, copy back to index
1417 
1418   if (dest == buf) memcpy(index, buf, sizeof(int) * num);
1419 
1420   delete[] buf;
1421 }
1422 
1423 /* ------------------------------------------------------------------ */
1424 
1425 /* ----------------------------------------------------------------------
1426  * Merge sort part 2: Insertion sort for pre-sorting of small chunks
1427 ------------------------------------------------------------------------- */
1428 
insertion_sort(int * index,int num,void * ptr,int (* comp)(int,int,void *))1429 void insertion_sort(int *index, int num, void *ptr, int (*comp)(int, int, void *))
1430 {
1431   if (num < 2) return;
1432   for (int i = 1; i < num; ++i) {
1433     int tmp = index[i];
1434     for (int j = i - 1; j >= 0; --j) {
1435       if ((*comp)(index[j], tmp, ptr) > 0) {
1436         index[j + 1] = index[j];
1437       } else {
1438         index[j + 1] = tmp;
1439         break;
1440       }
1441       if (j == 0) index[0] = tmp;
1442     }
1443   }
1444 }
1445 
1446 /* ----------------------------------------------------------------------
1447  * Merge sort part 3: Merge two sublists
1448 ------------------------------------------------------------------------- */
1449 
do_merge(int * idx,int * buf,int llo,int lhi,int rlo,int rhi,void * ptr,int (* comp)(int,int,void *))1450 static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi, void *ptr,
1451                      int (*comp)(int, int, void *))
1452 {
1453   int i = llo;
1454   int l = llo;
1455   int r = rlo;
1456   while ((l < lhi) && (r < rhi)) {
1457     if ((*comp)(buf[l], buf[r], ptr) < 0)
1458       idx[i++] = buf[l++];
1459     else
1460       idx[i++] = buf[r++];
1461   }
1462 
1463   while (l < lhi) idx[i++] = buf[l++];
1464   while (r < rhi) idx[i++] = buf[r++];
1465 }
1466 
1467 /* ------------------------------------------------------------------ */
1468 
1469 extern "C" {
1470 
1471 /* Typedef'd pointer to get abstract datatype. */
1472 typedef struct regex_t *re_t;
1473 typedef struct regex_context_t *re_ctx_t;
1474 
1475 /* Compile regex string pattern to a regex_t-array. */
1476 static re_t re_compile(re_ctx_t context, const char *pattern);
1477 
1478 /* Find matches of the compiled pattern inside text. */
1479 static int re_matchp(const char *text, re_t pattern, int *matchlen);
1480 
1481 /* Definitions: */
1482 
1483 #define MAX_REGEXP_OBJECTS 30 /* Max number of regex symbols in expression. */
1484 #define MAX_CHAR_CLASS_LEN 40 /* Max length of character-class buffer in.   */
1485 
1486 enum {
1487   RX_UNUSED,
1488   RX_DOT,
1489   RX_BEGIN,
1490   RX_END,
1491   RX_QUESTIONMARK,
1492   RX_STAR,
1493   RX_PLUS,
1494   RX_CHAR,
1495   RX_CHAR_CLASS,
1496   RX_INV_CHAR_CLASS,
1497   RX_DIGIT,
1498   RX_NOT_DIGIT,
1499   RX_INTEGER,
1500   RX_NOT_INTEGER,
1501   RX_FLOAT,
1502   RX_NOT_FLOAT,
1503   RX_ALPHA,
1504   RX_NOT_ALPHA,
1505   RX_WHITESPACE,
1506   RX_NOT_WHITESPACE /*, BRANCH */
1507 };
1508 
1509 typedef struct regex_t {
1510   unsigned char type; /* CHAR, STAR, etc.                      */
1511   union {
1512     unsigned char ch;   /*      the character itself             */
1513     unsigned char *ccl; /*  OR  a pointer to characters in class */
1514   } u;
1515 } regex_t;
1516 
1517 typedef struct regex_context_t {
1518   /* MAX_REGEXP_OBJECTS is the max number of symbols in the expression.
1519        MAX_CHAR_CLASS_LEN determines the size of buffer for chars in all char-classes in the expression. */
1520   regex_t re_compiled[MAX_REGEXP_OBJECTS];
1521   unsigned char ccl_buf[MAX_CHAR_CLASS_LEN];
1522 } regex_context_t;
1523 
re_match(const char * text,const char * pattern)1524 int re_match(const char *text, const char *pattern)
1525 {
1526   regex_context_t context;
1527   int dummy;
1528   return re_matchp(text, re_compile(&context, pattern), &dummy);
1529 }
1530 
re_find(const char * text,const char * pattern,int * matchlen)1531 int re_find(const char *text, const char *pattern, int *matchlen)
1532 {
1533   regex_context_t context;
1534   return re_matchp(text, re_compile(&context, pattern), matchlen);
1535 }
1536 
1537 /* Private function declarations: */
1538 static int matchpattern(regex_t *pattern, const char *text, int *matchlen);
1539 static int matchcharclass(char c, const char *str);
1540 static int matchstar(regex_t p, regex_t *pattern, const char *text, int *matchlen);
1541 static int matchplus(regex_t p, regex_t *pattern, const char *text, int *matchlen);
1542 static int matchone(regex_t p, char c);
1543 static int matchdigit(char c);
1544 static int matchint(char c);
1545 static int matchfloat(char c);
1546 static int matchalpha(char c);
1547 static int matchwhitespace(char c);
1548 static int matchmetachar(char c, const char *str);
1549 static int matchrange(char c, const char *str);
1550 static int matchdot(char c);
1551 static int ismetachar(char c);
1552 
1553 /* Semi-public functions: */
re_matchp(const char * text,re_t pattern,int * matchlen)1554 int re_matchp(const char *text, re_t pattern, int *matchlen)
1555 {
1556   *matchlen = 0;
1557   if (pattern != 0) {
1558     if (pattern[0].type == RX_BEGIN) {
1559       return ((matchpattern(&pattern[1], text, matchlen)) ? 0 : -1);
1560     } else {
1561       int idx = -1;
1562 
1563       do {
1564         idx += 1;
1565 
1566         if (matchpattern(pattern, text, matchlen)) {
1567           if (text[0] == '\0') return -1;
1568 
1569           return idx;
1570         }
1571       } while (*text++ != '\0');
1572     }
1573   }
1574   return -1;
1575 }
1576 
re_compile(re_ctx_t context,const char * pattern)1577 re_t re_compile(re_ctx_t context, const char *pattern)
1578 {
1579   regex_t *const re_compiled = context->re_compiled;
1580   unsigned char *const ccl_buf = context->ccl_buf;
1581   int ccl_bufidx = 1;
1582 
1583   char c;    /* current char in pattern   */
1584   int i = 0; /* index into pattern        */
1585   int j = 0; /* index into re_compiled    */
1586 
1587   while (pattern[i] != '\0' && (j + 1 < MAX_REGEXP_OBJECTS)) {
1588     c = pattern[i];
1589 
1590     switch (c) {
1591         /* Meta-characters: */
1592       case '^': {
1593         re_compiled[j].type = RX_BEGIN;
1594       } break;
1595       case '$': {
1596         re_compiled[j].type = RX_END;
1597       } break;
1598       case '.': {
1599         re_compiled[j].type = RX_DOT;
1600       } break;
1601       case '*': {
1602         re_compiled[j].type = RX_STAR;
1603       } break;
1604       case '+': {
1605         re_compiled[j].type = RX_PLUS;
1606       } break;
1607       case '?': {
1608         re_compiled[j].type = RX_QUESTIONMARK;
1609       } break;
1610 
1611         /* Escaped character-classes (\s \w ...): */
1612       case '\\': {
1613         if (pattern[i + 1] != '\0') {
1614           /* Skip the escape-char '\\' */
1615           i += 1;
1616           /* ... and check the next */
1617           switch (pattern[i]) {
1618               /* Meta-character: */
1619             case 'd': {
1620               re_compiled[j].type = RX_DIGIT;
1621             } break;
1622             case 'D': {
1623               re_compiled[j].type = RX_NOT_DIGIT;
1624             } break;
1625             case 'i': {
1626               re_compiled[j].type = RX_INTEGER;
1627             } break;
1628             case 'I': {
1629               re_compiled[j].type = RX_NOT_INTEGER;
1630             } break;
1631             case 'f': {
1632               re_compiled[j].type = RX_FLOAT;
1633             } break;
1634             case 'F': {
1635               re_compiled[j].type = RX_NOT_FLOAT;
1636             } break;
1637             case 'w': {
1638               re_compiled[j].type = RX_ALPHA;
1639             } break;
1640             case 'W': {
1641               re_compiled[j].type = RX_NOT_ALPHA;
1642             } break;
1643             case 's': {
1644               re_compiled[j].type = RX_WHITESPACE;
1645             } break;
1646             case 'S': {
1647               re_compiled[j].type = RX_NOT_WHITESPACE;
1648             } break;
1649 
1650               /* Escaped character, e.g. '.' or '$' */
1651             default: {
1652               re_compiled[j].type = RX_CHAR;
1653               re_compiled[j].u.ch = pattern[i];
1654             } break;
1655           }
1656         }
1657         /* '\\' as last char in pattern -> invalid regular expression. */
1658       } break;
1659 
1660         /* Character class: */
1661       case '[': {
1662         /* Remember where the char-buffer starts. */
1663         int buf_begin = ccl_bufidx;
1664 
1665         /* Look-ahead to determine if negated */
1666         if (pattern[i + 1] == '^') {
1667           re_compiled[j].type = RX_INV_CHAR_CLASS;
1668           i += 1;                  /* Increment i to avoid including '^' in the char-buffer */
1669           if (pattern[i + 1] == 0) /* incomplete pattern, missing non-zero char after '^' */
1670           {
1671             return 0;
1672           }
1673         } else {
1674           re_compiled[j].type = RX_CHAR_CLASS;
1675         }
1676 
1677         /* Copy characters inside [..] to buffer */
1678         while ((pattern[++i] != ']') && (pattern[i] != '\0')) {
1679           /* Missing ] */
1680           if (pattern[i] == '\\') {
1681             if (ccl_bufidx >= MAX_CHAR_CLASS_LEN - 1) { return 0; }
1682             if (pattern[i + 1] == 0) /* incomplete pattern, missing non-zero char after '\\' */
1683             {
1684               return 0;
1685             }
1686             ccl_buf[ccl_bufidx++] = pattern[i++];
1687           } else if (ccl_bufidx >= MAX_CHAR_CLASS_LEN) {
1688             return 0;
1689           }
1690           ccl_buf[ccl_bufidx++] = pattern[i];
1691         }
1692         if (ccl_bufidx >= MAX_CHAR_CLASS_LEN) {
1693           /* Catches cases such as [00000000000000000000000000000000000000][ */
1694           return 0;
1695         }
1696         /* Null-terminate string end */
1697         ccl_buf[ccl_bufidx++] = 0;
1698         re_compiled[j].u.ccl = &ccl_buf[buf_begin];
1699       } break;
1700 
1701         /* Other characters: */
1702       default: {
1703         re_compiled[j].type = RX_CHAR;
1704         re_compiled[j].u.ch = c;
1705       } break;
1706     }
1707     /* no buffer-out-of-bounds access on invalid patterns -
1708      * see https://github.com/kokke/tiny-regex-c/commit/1a279e04014b70b0695fba559a7c05d55e6ee90b */
1709     if (pattern[i] == 0) { return 0; }
1710 
1711     i += 1;
1712     j += 1;
1713   }
1714   /* 'RX_UNUSED' is a sentinel used to indicate end-of-pattern */
1715   re_compiled[j].type = RX_UNUSED;
1716 
1717   return (re_t) re_compiled;
1718 }
1719 
1720 /* Private functions: */
matchdigit(char c)1721 static int matchdigit(char c)
1722 {
1723   return ((c >= '0') && (c <= '9'));
1724 }
1725 
matchint(char c)1726 static int matchint(char c)
1727 {
1728   return (matchdigit(c) || (c == '-') || (c == '+'));
1729 }
1730 
matchfloat(char c)1731 static int matchfloat(char c)
1732 {
1733   return (matchint(c) || (c == '.') || (c == 'e') || (c == 'E'));
1734 }
1735 
matchalpha(char c)1736 static int matchalpha(char c)
1737 {
1738   return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
1739 }
1740 
matchwhitespace(char c)1741 static int matchwhitespace(char c)
1742 {
1743   return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\f') || (c == '\v'));
1744 }
1745 
matchalphanum(char c)1746 static int matchalphanum(char c)
1747 {
1748   return ((c == '_') || matchalpha(c) || matchdigit(c));
1749 }
1750 
matchrange(char c,const char * str)1751 static int matchrange(char c, const char *str)
1752 {
1753   return ((c != '-') && (str[0] != '\0') && (str[0] != '-') && (str[1] == '-') &&
1754           (str[1] != '\0') && (str[2] != '\0') && ((c >= str[0]) && (c <= str[2])));
1755 }
1756 
matchdot(char c)1757 static int matchdot(char c)
1758 {
1759 #if defined(RE_DOT_MATCHES_NEWLINE) && (RE_DOT_MATCHES_NEWLINE == 1)
1760   (void) c;
1761   return 1;
1762 #else
1763   return c != '\n' && c != '\r';
1764 #endif
1765 }
1766 
ismetachar(char c)1767 static int ismetachar(char c)
1768 {
1769   return ((c == 's') || (c == 'S') || (c == 'w') || (c == 'W') || (c == 'd') || (c == 'D'));
1770 }
1771 
matchmetachar(char c,const char * str)1772 static int matchmetachar(char c, const char *str)
1773 {
1774   switch (str[0]) {
1775     case 'd':
1776       return matchdigit(c);
1777     case 'D':
1778       return !matchdigit(c);
1779     case 'i':
1780       return matchint(c);
1781     case 'I':
1782       return !matchint(c);
1783     case 'f':
1784       return matchfloat(c);
1785     case 'F':
1786       return !matchfloat(c);
1787     case 'w':
1788       return matchalphanum(c);
1789     case 'W':
1790       return !matchalphanum(c);
1791     case 's':
1792       return matchwhitespace(c);
1793     case 'S':
1794       return !matchwhitespace(c);
1795     default:
1796       return (c == str[0]);
1797   }
1798 }
1799 
matchcharclass(char c,const char * str)1800 static int matchcharclass(char c, const char *str)
1801 {
1802   do {
1803     if (matchrange(c, str)) {
1804       return 1;
1805     } else if (str[0] == '\\') {
1806       /* Escape-char: increment str-ptr and match on next char */
1807       str += 1;
1808       if (matchmetachar(c, str)) {
1809         return 1;
1810       } else if ((c == str[0]) && !ismetachar(c)) {
1811         return 1;
1812       }
1813     } else if (c == str[0]) {
1814       if (c == '-') {
1815         return ((str[-1] == '\0') || (str[1] == '\0'));
1816       } else {
1817         return 1;
1818       }
1819     }
1820   } while (*str++ != '\0');
1821 
1822   return 0;
1823 }
1824 
matchone(regex_t p,char c)1825 static int matchone(regex_t p, char c)
1826 {
1827   switch (p.type) {
1828     case RX_DOT:
1829       return matchdot(c);
1830     case RX_CHAR_CLASS:
1831       return matchcharclass(c, (const char *) p.u.ccl);
1832     case RX_INV_CHAR_CLASS:
1833       return !matchcharclass(c, (const char *) p.u.ccl);
1834     case RX_DIGIT:
1835       return matchdigit(c);
1836     case RX_NOT_DIGIT:
1837       return !matchdigit(c);
1838     case RX_INTEGER:
1839       return matchint(c);
1840     case RX_NOT_INTEGER:
1841       return !matchint(c);
1842     case RX_FLOAT:
1843       return matchfloat(c);
1844     case RX_NOT_FLOAT:
1845       return !matchfloat(c);
1846     case RX_ALPHA:
1847       return matchalphanum(c);
1848     case RX_NOT_ALPHA:
1849       return !matchalphanum(c);
1850     case RX_WHITESPACE:
1851       return matchwhitespace(c);
1852     case RX_NOT_WHITESPACE:
1853       return !matchwhitespace(c);
1854     default:
1855       return (p.u.ch == c);
1856   }
1857 }
1858 
matchstar(regex_t p,regex_t * pattern,const char * text,int * matchlen)1859 static int matchstar(regex_t p, regex_t *pattern, const char *text, int *matchlen)
1860 {
1861   int prelen = *matchlen;
1862   const char *prepos = text;
1863   while ((text[0] != '\0') && matchone(p, *text)) {
1864     text++;
1865     (*matchlen)++;
1866   }
1867   while (text >= prepos) {
1868     if (matchpattern(pattern, text--, matchlen)) return 1;
1869     (*matchlen)--;
1870   }
1871 
1872   *matchlen = prelen;
1873   return 0;
1874 }
1875 
matchplus(regex_t p,regex_t * pattern,const char * text,int * matchlen)1876 static int matchplus(regex_t p, regex_t *pattern, const char *text, int *matchlen)
1877 {
1878   const char *prepos = text;
1879   while ((text[0] != '\0') && matchone(p, *text)) {
1880     text++;
1881     (*matchlen)++;
1882   }
1883   while (text > prepos) {
1884     if (matchpattern(pattern, text--, matchlen)) return 1;
1885     (*matchlen)--;
1886   }
1887   return 0;
1888 }
1889 
matchquestion(regex_t p,regex_t * pattern,const char * text,int * matchlen)1890 static int matchquestion(regex_t p, regex_t *pattern, const char *text, int *matchlen)
1891 {
1892   if (p.type == RX_UNUSED) return 1;
1893   if (matchpattern(pattern, text, matchlen)) return 1;
1894   if (*text && matchone(p, *text++)) {
1895     if (matchpattern(pattern, text, matchlen)) {
1896       (*matchlen)++;
1897       return 1;
1898     }
1899   }
1900   return 0;
1901 }
1902 
1903 /* Iterative matching */
matchpattern(regex_t * pattern,const char * text,int * matchlen)1904 static int matchpattern(regex_t *pattern, const char *text, int *matchlen)
1905 {
1906   int pre = *matchlen;
1907   do {
1908     if ((pattern[0].type == RX_UNUSED) || (pattern[1].type == RX_QUESTIONMARK)) {
1909       return matchquestion(pattern[0], &pattern[2], text, matchlen);
1910     } else if (pattern[1].type == RX_STAR) {
1911       return matchstar(pattern[0], &pattern[2], text, matchlen);
1912     } else if (pattern[1].type == RX_PLUS) {
1913       return matchplus(pattern[0], &pattern[2], text, matchlen);
1914     } else if ((pattern[0].type == RX_END) && pattern[1].type == RX_UNUSED) {
1915       return (text[0] == '\0');
1916     }
1917     (*matchlen)++;
1918   } while ((text[0] != '\0') && matchone(*pattern++, *text++));
1919 
1920   *matchlen = pre;
1921   return 0;
1922 }
1923 }
1924