1 /* ----------------------------------------------------------------------
2 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
3 https://www.lammps.org/, Sandia National Laboratories
4 Steve Plimpton, sjplimp@sandia.gov
5
6 Copyright (2003) Sandia Corporation. Under the terms of Contract
7 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
8 certain rights in this software. This software is distributed under
9 the GNU General Public License.
10
11 See the README file in the top-level LAMMPS directory.
12 ------------------------------------------------------------------------- */
13
14 #include "utils.h"
15
16 #include "atom.h"
17 #include "comm.h"
18 #include "compute.h"
19 #include "error.h"
20 #include "fix.h"
21 #include "fmt/chrono.h"
22 #include "memory.h"
23 #include "modify.h"
24 #include "text_file_reader.h"
25 #include "tokenizer.h"
26 #include "update.h"
27
28 #include <cctype>
29 #include <cerrno>
30 #include <cstring>
31 #include <ctime>
32
33 #if defined(__linux__)
34 #include <unistd.h> // for readlink
35 #endif
36
37 #if defined(__APPLE__)
38 #include <fcntl.h> // for fcntl
39 #include <sys/syslimits.h>
40 #endif
41
42 #if defined(_WIN32)
43 // target Windows version is Windows 7 and later
44 #if defined(_WIN32_WINNT)
45 #undef _WIN32_WINNT
46 #endif
47 #define _WIN32_WINNT _WIN32_WINNT_WIN7
48 #include <io.h>
49 #include <windows.h>
50 #endif
51
52 /*! \file utils.cpp */
53
54 /*
55 * Mini regex-module adapted from https://github.com/kokke/tiny-regex-c
56 * which is in the public domain.
57 *
58 * Supports:
59 * ---------
60 * '.' Dot, matches any character
61 * '^' Start anchor, matches beginning of string
62 * '$' End anchor, matches end of string
63 * '*' Asterisk, match zero or more (greedy)
64 * '+' Plus, match one or more (greedy)
65 * '?' Question, match zero or one (non-greedy)
66 * '[abc]' Character class, match if one of {'a', 'b', 'c'}
67 * '[a-zA-Z]' Character ranges, the character set of the ranges { a-z | A-Z }
68 * '\s' Whitespace, \t \f \r \n \v and spaces
69 * '\S' Non-whitespace
70 * '\w' Alphanumeric, [a-zA-Z0-9_]
71 * '\W' Non-alphanumeric
72 * '\d' Digits, [0-9]
73 * '\D' Non-digits
74 * '\i' Integer chars, [0-9], '+' and '-'
75 * '\I' Non-integers
76 * '\f' Floating point number chars, [0-9], '.', 'e', 'E', '+' and '-'
77 * '\F' Non-floats
78 *
79 * *NOT* supported:
80 * '[^abc]' Inverted class
81 * 'a|b' Branches
82 * '(abc)+' Groups
83 */
84
85 extern "C" {
86 /** Match text against a (simplified) regular expression
87 * (regexp will be compiled automatically). */
88 static int re_match(const char *text, const char *pattern);
89
90 /** Match find substring that matches a (simplified) regular expression
91 * (regexp will be compiled automatically). */
92 static int re_find(const char *text, const char *pattern, int *matchlen);
93 }
94
95 ////////////////////////////////////////////////////////////////////////
96 // Merge sort support functions
97
98 static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi, void *ptr,
99 int (*comp)(int, int, void *));
100 static void insertion_sort(int *index, int num, void *ptr, int (*comp)(int, int, void *));
101
102 ////////////////////////////////////////////////////////////////////////
103
104 using namespace LAMMPS_NS;
105
106 /** More flexible and specific matching of a string against a pattern.
107 * This function is supposed to be a more safe, more specific and
108 * simple to use API to find pattern matches. The purpose is to replace
109 * uses of either strncmp() or strstr() in the code base to find
110 * sub-strings safely. With strncmp() finding prefixes, the number of
111 * characters to match must be counted, which can lead to errors,
112 * while using "^pattern" will do the same with less problems.
113 * Matching for suffixes using strstr() is not as specific as 'pattern$',
114 * and complex matches, e.g. "^rigid.*\/small.*", to match all small
115 * body optimized rigid fixes require only one test.
116 *
117 * The use of std::string arguments allows for simple concatenation
118 * even with char * type variables.
119 * Example: utils::strmatch(text, std::string("^") + charptr)
120 */
strmatch(const std::string & text,const std::string & pattern)121 bool utils::strmatch(const std::string &text, const std::string &pattern)
122 {
123 const int pos = re_match(text.c_str(), pattern.c_str());
124 return (pos >= 0);
125 }
126
127 /** This function is a companion function to utils::strmatch(). Arguments
128 * and logic is the same, but instead of a boolean, it returns the
129 * sub-string that matches the regex pattern. There can be only one match.
130 * This can be used as a more flexible alternative to strstr().
131 */
strfind(const std::string & text,const std::string & pattern)132 std::string utils::strfind(const std::string &text, const std::string &pattern)
133 {
134 int matchlen;
135 const int pos = re_find(text.c_str(), pattern.c_str(), &matchlen);
136 if ((pos >= 0) && (matchlen > 0))
137 return text.substr(pos, matchlen);
138 else
139 return "";
140 }
141
142 /* specialization for the case of just a single string argument */
143
logmesg(LAMMPS * lmp,const std::string & mesg)144 void utils::logmesg(LAMMPS *lmp, const std::string &mesg)
145 {
146 if (lmp->screen) fputs(mesg.c_str(), lmp->screen);
147 if (lmp->logfile) fputs(mesg.c_str(), lmp->logfile);
148 }
149
fmtargs_logmesg(LAMMPS * lmp,fmt::string_view format,fmt::format_args args)150 void utils::fmtargs_logmesg(LAMMPS *lmp, fmt::string_view format, fmt::format_args args)
151 {
152 try {
153 logmesg(lmp, fmt::vformat(format, args));
154 } catch (fmt::format_error &e) {
155 logmesg(lmp, std::string(e.what()) + "\n");
156 }
157 }
158
159 /* define this here, so we won't have to include the headers
160 everywhere and utils.h will more likely be included anyway. */
161
getsyserror()162 std::string utils::getsyserror()
163 {
164 return std::string(strerror(errno));
165 }
166
167 /** On Linux the folder /proc/self/fd holds symbolic links to the actual
168 * pathnames associated with each open file descriptor of the current process.
169 * On MacOS the same kind of information can be obtained using ``fcntl(fd,F_GETPATH,buf)``.
170 * On Windows we use ``GetFinalPathNameByHandleA()`` which is available with
171 * Windows Vista and later.
172 *
173 * This function is used to provide a filename with error messages in functions
174 * where the filename is not passed as an argument, but the FILE * pointer.
175 */
guesspath(char * buf,int len,FILE * fp)176 const char *utils::guesspath(char *buf, int len, FILE *fp)
177 {
178 memset(buf, 0, len);
179
180 #if defined(__linux__)
181 int fd = fileno(fp);
182 // get pathname from /proc or copy (unknown)
183 if (readlink(fmt::format("/proc/self/fd/{}", fd).c_str(), buf, len - 1) <= 0)
184 strncpy(buf, "(unknown)", len - 1);
185 #elif defined(__APPLE__)
186 int fd = fileno(fp);
187 char filepath[PATH_MAX];
188 if (fcntl(fd, F_GETPATH, filepath) != -1)
189 strncpy(buf, filepath, len - 1);
190 else
191 strncpy(buf, "(unknown)", len - 1);
192 #elif defined(_WIN32)
193 char filepath[MAX_PATH];
194 HANDLE h = (HANDLE) _get_osfhandle(_fileno(fp));
195 if (GetFinalPathNameByHandleA(h, filepath, PATH_MAX, FILE_NAME_NORMALIZED) > 0)
196 strncpy(buf, filepath, len - 1);
197 else
198 strncpy(buf, "(unknown)", len - 1);
199 #else
200 strncpy(buf, "(unknown)", len - 1);
201 #endif
202 return buf;
203 }
204
205 // read line into buffer. if line is too long keep reading until EOL or EOF
206 // but return only the first part with a newline at the end.
207
fgets_trunc(char * buf,int size,FILE * fp)208 char *utils::fgets_trunc(char *buf, int size, FILE *fp)
209 {
210 constexpr int MAXDUMMY = 256;
211 char dummy[MAXDUMMY];
212 char *ptr = fgets(buf, size, fp);
213
214 // EOF?
215 if (!ptr) return nullptr;
216
217 int n = strlen(buf);
218
219 // check the string being read in:
220 // - if string is shorter than the buffer make sure it has a final newline and return
221 // - if string is exactly the size of the buffer and has a final newline return
222 // - otherwise truncate with final newline and read into dummy buffer until EOF or newline is found
223 if (n < size - 1) {
224 if (buf[n - 1] != '\n') {
225 buf[n] = '\n';
226 buf[n + 1] = '\0';
227 }
228 return buf;
229 } else if (buf[n - 1] == '\n') {
230 return buf;
231 } else
232 buf[size - 2] = '\n';
233
234 // continue reading into dummy buffer until end of line or file
235 do {
236 ptr = fgets(dummy, MAXDUMMY, fp);
237 if (ptr)
238 n = strlen(ptr);
239 else
240 n = 0;
241 } while (n == MAXDUMMY - 1 && ptr[MAXDUMMY - 1] != '\n');
242
243 // return truncated chunk
244 return buf;
245 }
246
247 /* like fgets() but aborts with an error or EOF is encountered */
sfgets(const char * srcname,int srcline,char * s,int size,FILE * fp,const char * filename,Error * error)248 void utils::sfgets(const char *srcname, int srcline, char *s, int size, FILE *fp,
249 const char *filename, Error *error)
250 {
251 constexpr int MAXPATHLENBUF = 1024;
252 char *rv = fgets(s, size, fp);
253 if (rv == nullptr) { // something went wrong
254 char buf[MAXPATHLENBUF];
255 std::string errmsg;
256
257 // try to figure out the file name from the file pointer
258 if (!filename) filename = guesspath(buf, MAXPATHLENBUF, fp);
259
260 if (feof(fp)) {
261 errmsg = "Unexpected end of file while reading file '";
262 } else if (ferror(fp)) {
263 errmsg = "Unexpected error while reading file '";
264 } else {
265 errmsg = "Unexpected short read while reading file '";
266 }
267 errmsg += filename;
268 errmsg += "'";
269
270 if (error) error->one(srcname, srcline, errmsg);
271 if (s) *s = '\0'; // truncate string to empty in case error is null pointer
272 }
273 return;
274 }
275
276 /* like fread() but aborts with an error or EOF is encountered */
sfread(const char * srcname,int srcline,void * s,size_t size,size_t num,FILE * fp,const char * filename,Error * error)277 void utils::sfread(const char *srcname, int srcline, void *s, size_t size, size_t num, FILE *fp,
278 const char *filename, Error *error)
279 {
280 constexpr int MAXPATHLENBUF = 1024;
281 size_t rv = fread(s, size, num, fp);
282 if (rv != num) { // something went wrong
283 char buf[MAXPATHLENBUF];
284 std::string errmsg;
285
286 // try to figure out the file name from the file pointer
287 if (!filename) filename = guesspath(buf, MAXPATHLENBUF, fp);
288
289 if (feof(fp)) {
290 errmsg = "Unexpected end of file while reading file '";
291 } else if (ferror(fp)) {
292 errmsg = "Unexpected error while reading file '";
293 } else {
294 errmsg = "Unexpected short read while reading file '";
295 }
296 errmsg += filename;
297 errmsg += "'";
298
299 if (error) error->one(srcname, srcline, errmsg);
300 }
301 return;
302 }
303
304 /* ------------------------------------------------------------------ */
305
306 /* read N lines and broadcast */
read_lines_from_file(FILE * fp,int nlines,int nmax,char * buffer,int me,MPI_Comm comm)307 int utils::read_lines_from_file(FILE *fp, int nlines, int nmax, char *buffer, int me, MPI_Comm comm)
308 {
309 char *ptr = buffer;
310 *ptr = '\0';
311
312 if (me == 0) {
313 if (fp) {
314 for (int i = 0; i < nlines; i++) {
315 ptr = fgets_trunc(ptr, nmax, fp);
316 if (!ptr) break; // EOF?
317 // advance ptr to end of string
318 ptr += strlen(ptr);
319 // ensure buffer is null terminated. null char is start of next line.
320 *ptr = '\0';
321 }
322 }
323 }
324
325 int n = strlen(buffer);
326 MPI_Bcast(&n, 1, MPI_INT, 0, comm);
327 if (n == 0) return 1;
328 MPI_Bcast(buffer, n + 1, MPI_CHAR, 0, comm);
329 return 0;
330 }
331
332 /* ------------------------------------------------------------------ */
333
check_packages_for_style(const std::string & style,const std::string & name,LAMMPS * lmp)334 std::string utils::check_packages_for_style(const std::string &style, const std::string &name,
335 LAMMPS *lmp)
336 {
337 std::string errmsg = "Unrecognized " + style + " style '" + name + "'";
338 const char *pkg = lmp->match_style(style.c_str(), name.c_str());
339
340 if (pkg) {
341 errmsg += fmt::format(" is part of the {} package", pkg);
342 if (lmp->is_installed_pkg(pkg))
343 errmsg += ", but seems to be missing because of a dependency";
344 else
345 errmsg += " which is not enabled in this LAMMPS binary.";
346 }
347 return errmsg;
348 }
349
350 /* ----------------------------------------------------------------------
351 read a floating point value from a string
352 generate an error if not a legitimate floating point value
353 called by various commands to check validity of their arguments
354 ------------------------------------------------------------------------- */
355
numeric(const char * file,int line,const char * str,bool do_abort,LAMMPS * lmp)356 double utils::numeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp)
357 {
358 int n = 0;
359
360 if (str) n = strlen(str);
361 if (n == 0) {
362 const char msg[] = "Expected floating point parameter instead of"
363 " NULL or empty string in input script or data file";
364 if (do_abort)
365 lmp->error->one(file, line, msg);
366 else
367 lmp->error->all(file, line, msg);
368 }
369
370 std::string buf(str);
371 if (has_utf8(buf)) buf = utf8_subst(buf);
372
373 if (buf.find_first_not_of("0123456789-+.eE") != std::string::npos) {
374 std::string msg("Expected floating point parameter instead of '");
375 msg += buf + "' in input script or data file";
376 if (do_abort)
377 lmp->error->one(file, line, msg);
378 else
379 lmp->error->all(file, line, msg);
380 }
381
382 return atof(buf.c_str());
383 }
384
385 /* ----------------------------------------------------------------------
386 read an integer value from a string
387 generate an error if not a legitimate integer value
388 called by various commands to check validity of their arguments
389 ------------------------------------------------------------------------- */
390
inumeric(const char * file,int line,const char * str,bool do_abort,LAMMPS * lmp)391 int utils::inumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp)
392 {
393 int n = 0;
394
395 if (str) n = strlen(str);
396 if (n == 0) {
397 const char msg[] = "Expected integer parameter instead of"
398 " NULL or empty string in input script or data file";
399 if (do_abort)
400 lmp->error->one(file, line, msg);
401 else
402 lmp->error->all(file, line, msg);
403 }
404
405 std::string buf(str);
406 if (has_utf8(buf)) buf = utf8_subst(buf);
407
408 if (buf.find_first_not_of("0123456789-+") != std::string::npos) {
409 std::string msg("Expected integer parameter instead of '");
410 msg += buf + "' in input script or data file";
411 if (do_abort)
412 lmp->error->one(file, line, msg);
413 else
414 lmp->error->all(file, line, msg);
415 }
416
417 return atoi(buf.c_str());
418 }
419
420 /* ----------------------------------------------------------------------
421 read a big integer value from a string
422 generate an error if not a legitimate integer value
423 called by various commands to check validity of their arguments
424 ------------------------------------------------------------------------- */
425
bnumeric(const char * file,int line,const char * str,bool do_abort,LAMMPS * lmp)426 bigint utils::bnumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp)
427 {
428 int n = 0;
429
430 if (str) n = strlen(str);
431 if (n == 0) {
432 const char msg[] = "Expected integer parameter instead of"
433 " NULL or empty string in input script or data file";
434 if (do_abort)
435 lmp->error->one(file, line, msg);
436 else
437 lmp->error->all(file, line, msg);
438 }
439
440 std::string buf(str);
441 if (has_utf8(buf)) buf = utf8_subst(buf);
442
443 if (buf.find_first_not_of("0123456789-+") != std::string::npos) {
444 std::string msg("Expected integer parameter instead of '");
445 msg += buf + "' in input script or data file";
446 if (do_abort)
447 lmp->error->one(file, line, msg);
448 else
449 lmp->error->all(file, line, msg);
450 }
451
452 return ATOBIGINT(buf.c_str());
453 }
454
455 /* ----------------------------------------------------------------------
456 read a tag integer value from a string
457 generate an error if not a legitimate integer value
458 called by various commands to check validity of their arguments
459 ------------------------------------------------------------------------- */
460
tnumeric(const char * file,int line,const char * str,bool do_abort,LAMMPS * lmp)461 tagint utils::tnumeric(const char *file, int line, const char *str, bool do_abort, LAMMPS *lmp)
462 {
463 int n = 0;
464
465 if (str) n = strlen(str);
466 if (n == 0) {
467 const char msg[] = "Expected integer parameter instead of"
468 " NULL or empty string in input script or data file";
469 if (do_abort)
470 lmp->error->one(file, line, msg);
471 else
472 lmp->error->all(file, line, msg);
473 }
474
475 std::string buf(str);
476 if (has_utf8(buf)) buf = utf8_subst(buf);
477
478 if (buf.find_first_not_of("0123456789-+") != std::string::npos) {
479 std::string msg("Expected integer parameter instead of '");
480 msg += buf + "' in input script or data file";
481 if (do_abort)
482 lmp->error->one(file, line, msg);
483 else
484 lmp->error->all(file, line, msg);
485 }
486
487 return ATOTAGINT(buf.c_str());
488 }
489
490 /* ----------------------------------------------------------------------
491 compute bounds implied by numeric str with a possible wildcard asterisk
492 ------------------------------------------------------------------------- */
493 // clang-format off
494 template <typename TYPE>
bounds(const char * file,int line,const std::string & str,bigint nmin,bigint nmax,TYPE & nlo,TYPE & nhi,Error * error)495 void utils::bounds(const char *file, int line, const std::string &str,
496 bigint nmin, bigint nmax, TYPE &nlo, TYPE &nhi, Error *error)
497 {
498 nlo = nhi = -1;
499
500 // check for illegal charcters
501 size_t found = str.find_first_not_of("*-0123456789");
502 if (found != std::string::npos) {
503 if (error) error->all(file, line, fmt::format("Invalid range string: {}", str));
504 return;
505 }
506
507 found = str.find_first_of('*');
508 if (found == std::string::npos) { // contains no '*'
509 nlo = nhi = strtol(str.c_str(), nullptr, 10);
510 } else if (str.size() == 1) { // is only '*'
511 nlo = nmin;
512 nhi = nmax;
513 } else if (found == 0) { // is '*j'
514 nlo = nmin;
515 nhi = strtol(str.substr(1).c_str(), nullptr, 10);
516 } else if (str.size() == found + 1) { // is 'i*'
517 nlo = strtol(str.c_str(), nullptr, 10);
518 nhi = nmax;
519 } else { // is 'i*j'
520 nlo = strtol(str.c_str(), nullptr, 10);
521 nhi = strtol(str.substr(found + 1).c_str(), nullptr, 10);
522 }
523
524 if (error) {
525 if ((nlo <= 0) || (nhi <= 0))
526 error->all(file, line, fmt::format("Invalid range string: {}", str));
527
528 if (nlo < nmin)
529 error->all(file, line, fmt::format("Numeric index {} is out of bounds "
530 "({}-{})", nlo, nmin, nmax));
531 else if (nhi > nmax)
532 error->all(file, line, fmt::format("Numeric index {} is out of bounds "
533 "({}-{})", nhi, nmin, nmax));
534 else if (nlo > nhi)
535 error->all(file, line, fmt::format("Numeric index {} is out of bounds "
536 "({}-{})", nlo, nmin, nhi));
537 }
538 }
539
540 template void utils::bounds<>(const char *, int, const std::string &,
541 bigint, bigint, int &, int &, Error *);
542 template void utils::bounds<>(const char *, int, const std::string &,
543 bigint, bigint, long &, long &, Error *);
544 template void utils::bounds<>(const char *, int, const std::string &,
545 bigint, bigint, long long &, long long &, Error *);
546 // clang-format on
547
548 /* -------------------------------------------------------------------------
549 Expand list of arguments in arg to earg if arg contains wildcards
550 ------------------------------------------------------------------------- */
551
expand_args(const char * file,int line,int narg,char ** arg,int mode,char ** & earg,LAMMPS * lmp)552 int utils::expand_args(const char *file, int line, int narg, char **arg, int mode, char **&earg,
553 LAMMPS *lmp)
554 {
555 int iarg;
556
557 char *ptr = nullptr;
558 for (iarg = 0; iarg < narg; iarg++) {
559 ptr = strchr(arg[iarg], '*');
560 if (ptr) break;
561 }
562
563 if (!ptr) {
564 earg = arg;
565 return narg;
566 }
567
568 // maxarg should always end up equal to newarg, so caller can free earg
569
570 int maxarg = narg - iarg;
571 earg = (char **) lmp->memory->smalloc(maxarg * sizeof(char *), "input:earg");
572
573 int newarg = 0, expandflag, nlo, nhi, nmax;
574 std::string id, wc, tail;
575
576 for (iarg = 0; iarg < narg; iarg++) {
577 std::string word(arg[iarg]);
578 expandflag = 0;
579
580 // match compute, fix, or custom property array reference with a '*' wildcard
581 // number range in the first pair of square brackets
582
583 if (strmatch(word, "^[cf]_\\w+\\[\\d*\\*\\d*\\]") ||
584 strmatch(word, "^[id]2_\\w+\\[\\d*\\*\\d*\\]")) {
585
586 // split off the compute/fix/property ID, the wildcard and trailing text
587
588 size_t first = word.find('[');
589 size_t second = word.find(']', first + 1);
590 if (word[1] == '2')
591 id = word.substr(3, first - 3);
592 else
593 id = word.substr(2, first - 2);
594
595 wc = word.substr(first + 1, second - first - 1);
596 tail = word.substr(second + 1);
597
598 // compute
599
600 if (word[0] == 'c') {
601 int icompute = lmp->modify->find_compute(id);
602
603 // check for global vector/array, peratom array, local array
604
605 if (icompute >= 0) {
606 Compute *compute = lmp->modify->compute[icompute];
607 if (mode == 0 && compute->vector_flag) {
608 nmax = compute->size_vector;
609 expandflag = 1;
610 } else if (mode == 1 && compute->array_flag) {
611 nmax = compute->size_array_cols;
612 expandflag = 1;
613 } else if (compute->peratom_flag && compute->size_peratom_cols) {
614 nmax = compute->size_peratom_cols;
615 expandflag = 1;
616 } else if (compute->local_flag && compute->size_local_cols) {
617 nmax = compute->size_local_cols;
618 expandflag = 1;
619 }
620 }
621
622 // fix
623
624 } else if (word[0] == 'f') {
625 int ifix = lmp->modify->find_fix(id);
626
627 // check for global vector/array, peratom array, local array
628
629 if (ifix >= 0) {
630 Fix *fix = lmp->modify->fix[ifix];
631
632 if (mode == 0 && fix->vector_flag) {
633 nmax = fix->size_vector;
634 expandflag = 1;
635 } else if (mode == 1 && fix->array_flag) {
636 nmax = fix->size_array_cols;
637 expandflag = 1;
638 } else if (fix->peratom_flag && fix->size_peratom_cols) {
639 nmax = fix->size_peratom_cols;
640 expandflag = 1;
641 } else if (fix->local_flag && fix->size_local_cols) {
642 nmax = fix->size_local_cols;
643 expandflag = 1;
644 }
645 }
646
647 // only match custom array reference with a '*' wildcard
648 // number range in the first pair of square brackets
649
650 } else if ((word[0] == 'i') || (word[0] == 'd')) {
651 int flag, cols;
652 int icustom = lmp->atom->find_custom(id.c_str(), flag, cols);
653
654 if ((icustom >= 0) && (mode == 1) && (cols > 0)) {
655
656 // check for custom per-atom array
657
658 if (((word[0] == 'i') && (flag == 0)) || ((word[0] == 'd') && (flag == 1))) {
659 nmax = cols;
660 expandflag = 1;
661 }
662 }
663 }
664 }
665
666 // expansion will take place
667
668 if (expandflag) {
669
670 // expand wild card string to nlo/nhi numbers
671 utils::bounds(file, line, wc, 1, nmax, nlo, nhi, lmp->error);
672
673 if (newarg + nhi - nlo + 1 > maxarg) {
674 maxarg += nhi - nlo + 1;
675 earg = (char **) lmp->memory->srealloc(earg, maxarg * sizeof(char *), "input:earg");
676 }
677
678 for (int index = nlo; index <= nhi; index++) {
679 earg[newarg] = utils::strdup(fmt::format("{}2_{}[{}]{}", word[0], id, index, tail));
680 newarg++;
681 }
682 } else {
683 // no expansion: duplicate original string
684 if (newarg == maxarg) {
685 maxarg++;
686 earg = (char **) lmp->memory->srealloc(earg, maxarg * sizeof(char *), "input:earg");
687 }
688 earg[newarg] = utils::strdup(word);
689 newarg++;
690 }
691 }
692
693 //printf("NEWARG %d\n",newarg);
694 //for (int i = 0; i < newarg; i++)
695 // printf(" arg %d: %s\n",i,earg[i]);
696
697 return newarg;
698 }
699
700 /* ----------------------------------------------------------------------
701 Make copy of string in new storage. Works like the (non-portable)
702 C-style strdup() but also accepts a C++ string as argument.
703 ------------------------------------------------------------------------- */
704
strdup(const std::string & text)705 char *utils::strdup(const std::string &text)
706 {
707 char *tmp = new char[text.size() + 1];
708 strcpy(tmp, text.c_str()); // NOLINT
709 return tmp;
710 }
711
712 /* ----------------------------------------------------------------------
713 Return string without leading or trailing whitespace
714 ------------------------------------------------------------------------- */
715
trim(const std::string & line)716 std::string utils::trim(const std::string &line)
717 {
718 int beg = re_match(line.c_str(), "\\S+");
719 int end = re_match(line.c_str(), "\\s+$");
720 if (beg < 0) beg = 0;
721 if (end < 0) end = line.size();
722
723 return line.substr(beg, end - beg);
724 }
725
726 /* ----------------------------------------------------------------------
727 Return string without trailing # comment
728 ------------------------------------------------------------------------- */
729
trim_comment(const std::string & line)730 std::string utils::trim_comment(const std::string &line)
731 {
732 auto end = line.find_first_of('#');
733 if (end != std::string::npos) { return line.substr(0, end); }
734 return std::string(line);
735 }
736
737 /* ----------------------------------------------------------------------
738 Replace UTF-8 encoded chars with known ASCII equivalents
739 ------------------------------------------------------------------------- */
740
utf8_subst(const std::string & line)741 std::string utils::utf8_subst(const std::string &line)
742 {
743 const unsigned char *const in = (const unsigned char *) line.c_str();
744 const int len = line.size();
745 std::string out;
746
747 for (int i = 0; i < len; ++i) {
748
749 // UTF-8 2-byte character
750 if ((in[i] & 0xe0U) == 0xc0U) {
751 if ((i + 1) < len) {
752 // NON-BREAKING SPACE (U+00A0)
753 if ((in[i] == 0xc2U) && (in[i + 1] == 0xa0U)) out += ' ', ++i;
754 // MODIFIER LETTER PLUS SIGN (U+02D6)
755 if ((in[i] == 0xcbU) && (in[i + 1] == 0x96U)) out += '+', ++i;
756 // MODIFIER LETTER MINUS SIGN (U+02D7)
757 if ((in[i] == 0xcbU) && (in[i + 1] == 0x97U)) out += '-', ++i;
758 }
759 // UTF-8 3-byte character
760 } else if ((in[i] & 0xf0U) == 0xe0U) {
761 if ((i + 2) < len) {
762 // EN QUAD (U+2000)
763 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x80U)) out += ' ', i += 2;
764 // EM QUAD (U+2001)
765 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x81U)) out += ' ', i += 2;
766 // EN SPACE (U+2002)
767 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x82U)) out += ' ', i += 2;
768 // EM SPACE (U+2003)
769 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x83U)) out += ' ', i += 2;
770 // THREE-PER-EM SPACE (U+2004)
771 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x84U)) out += ' ', i += 2;
772 // FOUR-PER-EM SPACE (U+2005)
773 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x85U)) out += ' ', i += 2;
774 // SIX-PER-EM SPACE (U+2006)
775 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x86U)) out += ' ', i += 2;
776 // FIGURE SPACE (U+2007)
777 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x87U)) out += ' ', i += 2;
778 // PUNCTUATION SPACE (U+2008)
779 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x88U)) out += ' ', i += 2;
780 // THIN SPACE (U+2009)
781 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x89U)) out += ' ', i += 2;
782 // HAIR SPACE (U+200A)
783 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x8aU)) out += ' ', i += 2;
784 // ZERO WIDTH SPACE (U+200B)
785 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x8bU)) out += ' ', i += 2;
786 // LEFT SINGLE QUOTATION MARK (U+2018)
787 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x98U)) out += '\'', i += 2;
788 // RIGHT SINGLE QUOTATION MARK (U+2019)
789 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x99U)) out += '\'', i += 2;
790 // LEFT DOUBLE QUOTATION MARK (U+201C)
791 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x9cU)) out += '"', i += 2;
792 // RIGHT DOUBLE QUOTATION MARK (U+201D)
793 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0x9dU)) out += '"', i += 2;
794 // NARROW NO-BREAK SPACE (U+202F)
795 if ((in[i] == 0xe2U) && (in[i + 1] == 0x80U) && (in[i + 2] == 0xafU)) out += ' ', i += 2;
796 // WORD JOINER (U+2060)
797 if ((in[i] == 0xe2U) && (in[i + 1] == 0x81U) && (in[i + 2] == 0xa0U)) out += ' ', i += 2;
798 // INVISIBLE SEPARATOR (U+2063)
799 if ((in[i] == 0xe2U) && (in[i + 1] == 0x81U) && (in[i + 2] == 0xa3U)) out += ' ', i += 2;
800 // INVISIBLE PLUS (U+2064)
801 if ((in[i] == 0xe2U) && (in[i + 1] == 0x81U) && (in[i + 2] == 0xa4U)) out += '+', i += 2;
802 // MINUS SIGN (U+2212)
803 if ((in[i] == 0xe2U) && (in[i + 1] == 0x88U) && (in[i + 2] == 0x92U)) out += '-', i += 2;
804 // ZERO WIDTH NO-BREAK SPACE (U+FEFF)
805 if ((in[i] == 0xefU) && (in[i + 1] == 0xbbU) && (in[i + 2] == 0xbfU)) out += ' ', i += 2;
806 }
807 // UTF-8 4-byte character
808 } else if ((in[i] & 0xf8U) == 0xf0U) {
809 if ((i + 3) < len) { ; }
810 } else
811 out += in[i];
812 }
813 return out;
814 }
815
816 /* ----------------------------------------------------------------------
817 return number of words
818 ------------------------------------------------------------------------- */
819
count_words(const char * text)820 size_t utils::count_words(const char *text)
821 {
822 size_t count = 0;
823 const char *buf = text;
824 char c = *buf;
825
826 while (c) {
827 if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f') {
828 c = *++buf;
829 continue;
830 };
831
832 ++count;
833 c = *++buf;
834
835 while (c) {
836 if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f') { break; }
837 c = *++buf;
838 }
839 }
840
841 return count;
842 }
843
844 /* ----------------------------------------------------------------------
845 return number of words
846 ------------------------------------------------------------------------- */
847
count_words(const std::string & text)848 size_t utils::count_words(const std::string &text)
849 {
850 return utils::count_words(text.c_str());
851 }
852
853 /* ----------------------------------------------------------------------
854 Return number of words
855 ------------------------------------------------------------------------- */
856
count_words(const std::string & text,const std::string & separators)857 size_t utils::count_words(const std::string &text, const std::string &separators)
858 {
859 size_t count = 0;
860 size_t start = text.find_first_not_of(separators);
861
862 while (start != std::string::npos) {
863 size_t end = text.find_first_of(separators, start);
864 ++count;
865
866 if (end == std::string::npos) {
867 return count;
868 } else {
869 start = text.find_first_not_of(separators, end + 1);
870 }
871 }
872 return count;
873 }
874
875 /* ----------------------------------------------------------------------
876 Trim comment from string and return number of words
877 ------------------------------------------------------------------------- */
878
trim_and_count_words(const std::string & text,const std::string & separators)879 size_t utils::trim_and_count_words(const std::string &text, const std::string &separators)
880 {
881 return utils::count_words(utils::trim_comment(text), separators);
882 }
883
884 /* ----------------------------------------------------------------------
885 Convert string into words on whitespace while handling single and
886 double quotes.
887 ------------------------------------------------------------------------- */
split_words(const std::string & text)888 std::vector<std::string> utils::split_words(const std::string &text)
889 {
890 std::vector<std::string> list;
891 const char *buf = text.c_str();
892 std::size_t beg = 0;
893 std::size_t len = 0;
894 std::size_t add = 0;
895 char c = *buf;
896
897 while (c) {
898 // leading whitespace
899 if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f') {
900 c = *++buf;
901 ++beg;
902 continue;
903 };
904 len = 0;
905
906 // handle escaped/quoted text.
907 quoted:
908
909 // handle single quote
910 if (c == '\'') {
911 ++beg;
912 add = 1;
913 c = *++buf;
914 while (((c != '\'') && (c != '\0')) || ((c == '\\') && (buf[1] == '\''))) {
915 if ((c == '\\') && (buf[1] == '\'')) {
916 ++buf;
917 ++len;
918 }
919 c = *++buf;
920 ++len;
921 }
922 if (c != '\'') ++len;
923 c = *++buf;
924
925 // handle triple double quotation marks
926 } else if ((c == '"') && (buf[1] == '"') && (buf[2] == '"') && (buf[3] != '"')) {
927 len = 3;
928 add = 1;
929 buf += 3;
930 c = *buf;
931
932 // handle double quote
933 } else if (c == '"') {
934 ++beg;
935 add = 1;
936 c = *++buf;
937 while (((c != '"') && (c != '\0')) || ((c == '\\') && (buf[1] == '"'))) {
938 if ((c == '\\') && (buf[1] == '"')) {
939 ++buf;
940 ++len;
941 }
942 c = *++buf;
943 ++len;
944 }
945 if (c != '"') ++len;
946 c = *++buf;
947 }
948
949 // unquoted
950 while (1) {
951 if ((c == '\'') || (c == '"')) goto quoted;
952 // skip escaped quote
953 if ((c == '\\') && ((buf[1] == '\'') || (buf[1] == '"'))) {
954 ++buf;
955 ++len;
956 c = *++buf;
957 ++len;
958 }
959 if ((c == ' ') || (c == '\t') || (c == '\r') || (c == '\n') || (c == '\f') || (c == '\0')) {
960 list.push_back(text.substr(beg, len));
961 beg += len + add;
962 break;
963 }
964 c = *++buf;
965 ++len;
966 }
967 }
968 return list;
969 }
970
971 /* ----------------------------------------------------------------------
972 Convert multi-line string into lines
973 ------------------------------------------------------------------------- */
split_lines(const std::string & text)974 std::vector<std::string> utils::split_lines(const std::string &text)
975 {
976 return Tokenizer(text, "\n").as_vector();
977 }
978
979 /* ----------------------------------------------------------------------
980 Return whether string is a valid integer number
981 ------------------------------------------------------------------------- */
982
is_integer(const std::string & str)983 bool utils::is_integer(const std::string &str)
984 {
985 if (str.empty()) return false;
986
987 for (auto c : str) {
988 if (isdigit(c) || c == '-' || c == '+') continue;
989 return false;
990 }
991 return true;
992 }
993
994 /* ----------------------------------------------------------------------
995 Return whether string is a valid floating-point number
996 ------------------------------------------------------------------------- */
997
is_double(const std::string & str)998 bool utils::is_double(const std::string &str)
999 {
1000 if (str.empty()) return false;
1001
1002 for (auto c : str) {
1003 if (isdigit(c)) continue;
1004 if (c == '-' || c == '+' || c == '.') continue;
1005 if (c == 'e' || c == 'E') continue;
1006 return false;
1007 }
1008 return true;
1009 }
1010
1011 /* ----------------------------------------------------------------------
1012 Return whether string is a valid ID string
1013 ------------------------------------------------------------------------- */
1014
is_id(const std::string & str)1015 bool utils::is_id(const std::string &str)
1016 {
1017 if (str.empty()) return false;
1018
1019 for (auto c : str) {
1020 if (isalnum(c) || (c == '_')) continue;
1021 return false;
1022 }
1023 return true;
1024 }
1025
1026 /* ----------------------------------------------------------------------
1027 strip off leading part of path, return just the filename
1028 ------------------------------------------------------------------------- */
1029
path_basename(const std::string & path)1030 std::string utils::path_basename(const std::string &path)
1031 {
1032 #if defined(_WIN32)
1033 size_t start = path.find_last_of("/\\");
1034 #else
1035 size_t start = path.find_last_of('/');
1036 #endif
1037
1038 if (start == std::string::npos) {
1039 start = 0;
1040 } else {
1041 start += 1;
1042 }
1043
1044 return path.substr(start);
1045 }
1046
1047 /* ----------------------------------------------------------------------
1048 Return only the leading part of a path, return just the directory
1049 ------------------------------------------------------------------------- */
1050
path_dirname(const std::string & path)1051 std::string utils::path_dirname(const std::string &path)
1052 {
1053 #if defined(_WIN32)
1054 size_t start = path.find_last_of("/\\");
1055 #else
1056 size_t start = path.find_last_of('/');
1057 #endif
1058
1059 if (start == std::string::npos) return ".";
1060
1061 return path.substr(0, start);
1062 }
1063
1064 /* ----------------------------------------------------------------------
1065 join two paths
1066 ------------------------------------------------------------------------- */
1067
path_join(const std::string & a,const std::string & b)1068 std::string utils::path_join(const std::string &a, const std::string &b)
1069 {
1070 #if defined(_WIN32)
1071 return fmt::format("{}\\{}", a, b);
1072 #else
1073 return fmt::format("{}/{}", a, b);
1074 #endif
1075 }
1076
1077 /* ----------------------------------------------------------------------
1078 try to open file for reading
1079 ------------------------------------------------------------------------- */
1080
file_is_readable(const std::string & path)1081 bool utils::file_is_readable(const std::string &path)
1082 {
1083 FILE *fp = fopen(path.c_str(), "r");
1084 if (fp) {
1085 fclose(fp);
1086 return true;
1087 }
1088 return false;
1089 }
1090
1091 /* ----------------------------------------------------------------------
1092 try to find potential file as specified by name
1093 search current directory and the LAMMPS_POTENTIALS directory if
1094 specified
1095 ------------------------------------------------------------------------- */
1096
get_potential_file_path(const std::string & path)1097 std::string utils::get_potential_file_path(const std::string &path)
1098 {
1099 std::string filepath = path;
1100 std::string filename = utils::path_basename(path);
1101
1102 if (utils::file_is_readable(filepath)) {
1103 return filepath;
1104 } else {
1105 // try the environment variable directory
1106 const char *var = getenv("LAMMPS_POTENTIALS");
1107
1108 if (var != nullptr) {
1109 #if defined(_WIN32)
1110 Tokenizer dirs(var, ";");
1111 #else
1112 Tokenizer dirs(var, ":");
1113 #endif
1114 while (dirs.has_next()) {
1115 auto pot = utils::path_basename(filepath);
1116 auto dir = dirs.next();
1117 filepath = utils::path_join(dir, pot);
1118
1119 if (utils::file_is_readable(filepath)) { return filepath; }
1120 }
1121 }
1122 }
1123 return "";
1124 }
1125
1126 /* ----------------------------------------------------------------------
1127 read first line of potential file
1128 if it has a DATE field, return the following word
1129 ------------------------------------------------------------------------- */
1130
get_potential_date(const std::string & path,const std::string & potential_name)1131 std::string utils::get_potential_date(const std::string &path, const std::string &potential_name)
1132 {
1133 TextFileReader reader(path, potential_name);
1134 reader.ignore_comments = false;
1135
1136 char *line = reader.next_line();
1137 if (line == nullptr) return "";
1138 Tokenizer words(line);
1139 while (words.has_next()) {
1140 if (words.next() == "DATE:") {
1141 if (words.has_next()) return words.next();
1142 }
1143 }
1144 return "";
1145 }
1146
1147 /* ----------------------------------------------------------------------
1148 read first line of potential file
1149 if it has UNITS field, return following word
1150 ------------------------------------------------------------------------- */
1151
get_potential_units(const std::string & path,const std::string & potential_name)1152 std::string utils::get_potential_units(const std::string &path, const std::string &potential_name)
1153 {
1154 TextFileReader reader(path, potential_name);
1155 reader.ignore_comments = false;
1156
1157 char *line = reader.next_line();
1158 if (line == nullptr) return "";
1159 Tokenizer words(line);
1160 while (words.has_next()) {
1161 if (words.next() == "UNITS:") {
1162 if (words.has_next()) return words.next();
1163 }
1164 }
1165 return "";
1166 }
1167
1168 /* ----------------------------------------------------------------------
1169 return bitmask of supported conversions for a given property
1170 ------------------------------------------------------------------------- */
get_supported_conversions(const int property)1171 int utils::get_supported_conversions(const int property)
1172 {
1173 if (property == ENERGY)
1174 return METAL2REAL | REAL2METAL;
1175 else
1176 return NOCONVERT;
1177 }
1178
1179 /* ----------------------------------------------------------------------
1180 return conversion factor for a given property and conversion setting
1181 return 0.0 if unknown.
1182 ------------------------------------------------------------------------- */
1183
get_conversion_factor(const int property,const int conversion)1184 double utils::get_conversion_factor(const int property, const int conversion)
1185 {
1186 if (property == ENERGY) {
1187 if (conversion == NOCONVERT) {
1188 return 1.0;
1189 } else if (conversion == METAL2REAL) {
1190 return 23.060549;
1191 } else if (conversion == REAL2METAL) {
1192 return 1.0 / 23.060549;
1193 }
1194 }
1195 return 0.0;
1196 }
1197
1198 /* ----------------------------------------------------------------------
1199 open a potential file as specified by name
1200 if fails, search in dir specified by env variable LAMMPS_POTENTIALS
1201 ------------------------------------------------------------------------- */
1202
open_potential(const std::string & name,LAMMPS * lmp,int * auto_convert)1203 FILE *utils::open_potential(const std::string &name, LAMMPS *lmp, int *auto_convert)
1204 {
1205 auto error = lmp->error;
1206 auto me = lmp->comm->me;
1207
1208 std::string filepath = get_potential_file_path(name);
1209
1210 if (!filepath.empty()) {
1211 std::string unit_style = lmp->update->unit_style;
1212 std::string date = get_potential_date(filepath, "potential");
1213 std::string units = get_potential_units(filepath, "potential");
1214
1215 if (!date.empty() && (me == 0))
1216 logmesg(lmp, "Reading potential file {} with DATE: {}\n", name, date);
1217
1218 if (auto_convert == nullptr) {
1219 if (!units.empty() && (units != unit_style) && (me == 0)) {
1220 error->one(FLERR, "Potential file {} requires {} units but {} units are in use", name,
1221 units, unit_style);
1222 return nullptr;
1223 }
1224 } else {
1225 if (units.empty() || units == unit_style) {
1226 *auto_convert = NOCONVERT;
1227 } else {
1228 if ((units == "metal") && (unit_style == "real") && (*auto_convert & METAL2REAL)) {
1229 *auto_convert = METAL2REAL;
1230 } else if ((units == "real") && (unit_style == "metal") && (*auto_convert & REAL2METAL)) {
1231 *auto_convert = REAL2METAL;
1232 } else {
1233 error->one(FLERR, "Potential file {} requires {} units but {} units are in use", name,
1234 units, unit_style);
1235 return nullptr;
1236 }
1237 }
1238 if ((*auto_convert != NOCONVERT) && (me == 0))
1239 error->warning(FLERR, "Converting potential file in {} units to {} units", units,
1240 unit_style);
1241 }
1242 return fopen(filepath.c_str(), "r");
1243 }
1244 return nullptr;
1245 }
1246
1247 /* ----------------------------------------------------------------------
1248 convert a timespec ([[HH:]MM:]SS) to seconds
1249 the strings "off" and "unlimited" result in -1.0;
1250 ------------------------------------------------------------------------- */
1251
timespec2seconds(const std::string & timespec)1252 double utils::timespec2seconds(const std::string ×pec)
1253 {
1254 double vals[3];
1255 int i = 0;
1256
1257 // first handle allowed textual inputs
1258 if (timespec == "off") return -1.0;
1259 if (timespec == "unlimited") return -1.0;
1260
1261 vals[0] = vals[1] = vals[2] = 0;
1262
1263 ValueTokenizer values(timespec, ":");
1264
1265 try {
1266 for (i = 0; i < 3; i++) {
1267 if (!values.has_next()) break;
1268 vals[i] = values.next_int();
1269 }
1270 } catch (TokenizerException &e) {
1271 return -1.0;
1272 }
1273
1274 if (i == 3)
1275 return (vals[0] * 60 + vals[1]) * 60 + vals[2];
1276 else if (i == 2)
1277 return vals[0] * 60 + vals[1];
1278 return vals[0];
1279 }
1280
1281 /* ----------------------------------------------------------------------
1282 convert a LAMMPS version date (1Jan01) to a number
1283 ------------------------------------------------------------------------- */
1284
date2num(const std::string & date)1285 int utils::date2num(const std::string &date)
1286 {
1287 std::size_t found = date.find_first_not_of("0123456789 ");
1288 int num = strtol(date.substr(0, found).c_str(), nullptr, 10);
1289 auto month = date.substr(found);
1290 found = month.find_first_of("0123456789 ");
1291 num += strtol(month.substr(found).c_str(), nullptr, 10) * 10000;
1292 if (num < 1000000) num += 20000000;
1293
1294 if (strmatch(month, "^Jan"))
1295 num += 100;
1296 else if (strmatch(month, "^Feb"))
1297 num += 200;
1298 else if (strmatch(month, "^Mar"))
1299 num += 300;
1300 else if (strmatch(month, "^Apr"))
1301 num += 400;
1302 else if (strmatch(month, "^May"))
1303 num += 500;
1304 else if (strmatch(month, "^Jun"))
1305 num += 600;
1306 else if (strmatch(month, "^Jul"))
1307 num += 700;
1308 else if (strmatch(month, "^Aug"))
1309 num += 800;
1310 else if (strmatch(month, "^Sep"))
1311 num += 900;
1312 else if (strmatch(month, "^Oct"))
1313 num += 1000;
1314 else if (strmatch(month, "^Nov"))
1315 num += 1100;
1316 else if (strmatch(month, "^Dec"))
1317 num += 1200;
1318 return num;
1319 }
1320
1321 /* ----------------------------------------------------------------------
1322 get formatted string of current date from fmtlib
1323 ------------------------------------------------------------------------- */
1324
current_date()1325 std::string utils::current_date()
1326 {
1327 time_t tv = time(nullptr);
1328 std::tm today = fmt::localtime(tv);
1329 return fmt::format("{:%Y-%m-%d}", today);
1330 }
1331
1332 /* ----------------------------------------------------------------------
1333 binary search in vector of ascending doubles
1334 ------------------------------------------------------------------------- */
1335
binary_search(const double needle,const int n,const double * haystack)1336 int utils::binary_search(const double needle, const int n, const double *haystack)
1337 {
1338 int lo = 0;
1339 int hi = n - 1;
1340
1341 if (needle < haystack[lo]) return lo;
1342 if (needle >= haystack[hi]) return hi;
1343
1344 // insure haystack[lo] <= needle < haystack[hi] at every iteration
1345 // done when lo,hi are adjacent
1346
1347 int index = (lo + hi) / 2;
1348 while (lo < hi - 1) {
1349 if (needle < haystack[index])
1350 hi = index;
1351 else if (needle >= haystack[index])
1352 lo = index;
1353 index = (lo + hi) / 2;
1354 }
1355
1356 return index;
1357 }
1358
1359 /* ----------------------------------------------------------------------
1360 * Merge sort part 1: Loop over sublists doubling in size with each iteration.
1361 * Pre-sort small sublists with insertion sort for better overall performance.
1362 ------------------------------------------------------------------------- */
1363
merge_sort(int * index,int num,void * ptr,int (* comp)(int,int,void *))1364 void utils::merge_sort(int *index, int num, void *ptr, int (*comp)(int, int, void *))
1365 {
1366 if (num < 2) return;
1367
1368 int chunk, i, j;
1369
1370 // do insertion sort on chunks of up to 64 elements
1371
1372 chunk = 64;
1373 for (i = 0; i < num; i += chunk) {
1374 j = (i + chunk > num) ? num - i : chunk;
1375 insertion_sort(index + i, j, ptr, comp);
1376 }
1377
1378 // already done?
1379
1380 if (chunk >= num) return;
1381
1382 // continue with merge sort on the pre-sorted chunks.
1383 // we need an extra buffer for temporary storage and two
1384 // pointers to operate on, so we can swap the pointers
1385 // rather than copying to the hold buffer in each pass
1386
1387 int *buf = new int[num];
1388 int *dest = index;
1389 int *hold = buf;
1390
1391 while (chunk < num) {
1392 int m;
1393
1394 // swap hold and destination buffer
1395
1396 int *tmp = dest;
1397 dest = hold;
1398 hold = tmp;
1399
1400 // merge from hold array to destination array
1401
1402 for (i = 0; i < num - 1; i += 2 * chunk) {
1403 j = i + 2 * chunk;
1404 if (j > num) j = num;
1405 m = i + chunk;
1406 if (m > num) m = num;
1407 do_merge(dest, hold, i, m, m, j, ptr, comp);
1408 }
1409
1410 // copy all indices not handled by the chunked merge sort loop
1411
1412 for (; i < num; i++) dest[i] = hold[i];
1413 chunk *= 2;
1414 }
1415
1416 // if the final sorted data is in buf, copy back to index
1417
1418 if (dest == buf) memcpy(index, buf, sizeof(int) * num);
1419
1420 delete[] buf;
1421 }
1422
1423 /* ------------------------------------------------------------------ */
1424
1425 /* ----------------------------------------------------------------------
1426 * Merge sort part 2: Insertion sort for pre-sorting of small chunks
1427 ------------------------------------------------------------------------- */
1428
insertion_sort(int * index,int num,void * ptr,int (* comp)(int,int,void *))1429 void insertion_sort(int *index, int num, void *ptr, int (*comp)(int, int, void *))
1430 {
1431 if (num < 2) return;
1432 for (int i = 1; i < num; ++i) {
1433 int tmp = index[i];
1434 for (int j = i - 1; j >= 0; --j) {
1435 if ((*comp)(index[j], tmp, ptr) > 0) {
1436 index[j + 1] = index[j];
1437 } else {
1438 index[j + 1] = tmp;
1439 break;
1440 }
1441 if (j == 0) index[0] = tmp;
1442 }
1443 }
1444 }
1445
1446 /* ----------------------------------------------------------------------
1447 * Merge sort part 3: Merge two sublists
1448 ------------------------------------------------------------------------- */
1449
do_merge(int * idx,int * buf,int llo,int lhi,int rlo,int rhi,void * ptr,int (* comp)(int,int,void *))1450 static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi, void *ptr,
1451 int (*comp)(int, int, void *))
1452 {
1453 int i = llo;
1454 int l = llo;
1455 int r = rlo;
1456 while ((l < lhi) && (r < rhi)) {
1457 if ((*comp)(buf[l], buf[r], ptr) < 0)
1458 idx[i++] = buf[l++];
1459 else
1460 idx[i++] = buf[r++];
1461 }
1462
1463 while (l < lhi) idx[i++] = buf[l++];
1464 while (r < rhi) idx[i++] = buf[r++];
1465 }
1466
1467 /* ------------------------------------------------------------------ */
1468
1469 extern "C" {
1470
1471 /* Typedef'd pointer to get abstract datatype. */
1472 typedef struct regex_t *re_t;
1473 typedef struct regex_context_t *re_ctx_t;
1474
1475 /* Compile regex string pattern to a regex_t-array. */
1476 static re_t re_compile(re_ctx_t context, const char *pattern);
1477
1478 /* Find matches of the compiled pattern inside text. */
1479 static int re_matchp(const char *text, re_t pattern, int *matchlen);
1480
1481 /* Definitions: */
1482
1483 #define MAX_REGEXP_OBJECTS 30 /* Max number of regex symbols in expression. */
1484 #define MAX_CHAR_CLASS_LEN 40 /* Max length of character-class buffer in. */
1485
1486 enum {
1487 RX_UNUSED,
1488 RX_DOT,
1489 RX_BEGIN,
1490 RX_END,
1491 RX_QUESTIONMARK,
1492 RX_STAR,
1493 RX_PLUS,
1494 RX_CHAR,
1495 RX_CHAR_CLASS,
1496 RX_INV_CHAR_CLASS,
1497 RX_DIGIT,
1498 RX_NOT_DIGIT,
1499 RX_INTEGER,
1500 RX_NOT_INTEGER,
1501 RX_FLOAT,
1502 RX_NOT_FLOAT,
1503 RX_ALPHA,
1504 RX_NOT_ALPHA,
1505 RX_WHITESPACE,
1506 RX_NOT_WHITESPACE /*, BRANCH */
1507 };
1508
1509 typedef struct regex_t {
1510 unsigned char type; /* CHAR, STAR, etc. */
1511 union {
1512 unsigned char ch; /* the character itself */
1513 unsigned char *ccl; /* OR a pointer to characters in class */
1514 } u;
1515 } regex_t;
1516
1517 typedef struct regex_context_t {
1518 /* MAX_REGEXP_OBJECTS is the max number of symbols in the expression.
1519 MAX_CHAR_CLASS_LEN determines the size of buffer for chars in all char-classes in the expression. */
1520 regex_t re_compiled[MAX_REGEXP_OBJECTS];
1521 unsigned char ccl_buf[MAX_CHAR_CLASS_LEN];
1522 } regex_context_t;
1523
re_match(const char * text,const char * pattern)1524 int re_match(const char *text, const char *pattern)
1525 {
1526 regex_context_t context;
1527 int dummy;
1528 return re_matchp(text, re_compile(&context, pattern), &dummy);
1529 }
1530
re_find(const char * text,const char * pattern,int * matchlen)1531 int re_find(const char *text, const char *pattern, int *matchlen)
1532 {
1533 regex_context_t context;
1534 return re_matchp(text, re_compile(&context, pattern), matchlen);
1535 }
1536
1537 /* Private function declarations: */
1538 static int matchpattern(regex_t *pattern, const char *text, int *matchlen);
1539 static int matchcharclass(char c, const char *str);
1540 static int matchstar(regex_t p, regex_t *pattern, const char *text, int *matchlen);
1541 static int matchplus(regex_t p, regex_t *pattern, const char *text, int *matchlen);
1542 static int matchone(regex_t p, char c);
1543 static int matchdigit(char c);
1544 static int matchint(char c);
1545 static int matchfloat(char c);
1546 static int matchalpha(char c);
1547 static int matchwhitespace(char c);
1548 static int matchmetachar(char c, const char *str);
1549 static int matchrange(char c, const char *str);
1550 static int matchdot(char c);
1551 static int ismetachar(char c);
1552
1553 /* Semi-public functions: */
re_matchp(const char * text,re_t pattern,int * matchlen)1554 int re_matchp(const char *text, re_t pattern, int *matchlen)
1555 {
1556 *matchlen = 0;
1557 if (pattern != 0) {
1558 if (pattern[0].type == RX_BEGIN) {
1559 return ((matchpattern(&pattern[1], text, matchlen)) ? 0 : -1);
1560 } else {
1561 int idx = -1;
1562
1563 do {
1564 idx += 1;
1565
1566 if (matchpattern(pattern, text, matchlen)) {
1567 if (text[0] == '\0') return -1;
1568
1569 return idx;
1570 }
1571 } while (*text++ != '\0');
1572 }
1573 }
1574 return -1;
1575 }
1576
re_compile(re_ctx_t context,const char * pattern)1577 re_t re_compile(re_ctx_t context, const char *pattern)
1578 {
1579 regex_t *const re_compiled = context->re_compiled;
1580 unsigned char *const ccl_buf = context->ccl_buf;
1581 int ccl_bufidx = 1;
1582
1583 char c; /* current char in pattern */
1584 int i = 0; /* index into pattern */
1585 int j = 0; /* index into re_compiled */
1586
1587 while (pattern[i] != '\0' && (j + 1 < MAX_REGEXP_OBJECTS)) {
1588 c = pattern[i];
1589
1590 switch (c) {
1591 /* Meta-characters: */
1592 case '^': {
1593 re_compiled[j].type = RX_BEGIN;
1594 } break;
1595 case '$': {
1596 re_compiled[j].type = RX_END;
1597 } break;
1598 case '.': {
1599 re_compiled[j].type = RX_DOT;
1600 } break;
1601 case '*': {
1602 re_compiled[j].type = RX_STAR;
1603 } break;
1604 case '+': {
1605 re_compiled[j].type = RX_PLUS;
1606 } break;
1607 case '?': {
1608 re_compiled[j].type = RX_QUESTIONMARK;
1609 } break;
1610
1611 /* Escaped character-classes (\s \w ...): */
1612 case '\\': {
1613 if (pattern[i + 1] != '\0') {
1614 /* Skip the escape-char '\\' */
1615 i += 1;
1616 /* ... and check the next */
1617 switch (pattern[i]) {
1618 /* Meta-character: */
1619 case 'd': {
1620 re_compiled[j].type = RX_DIGIT;
1621 } break;
1622 case 'D': {
1623 re_compiled[j].type = RX_NOT_DIGIT;
1624 } break;
1625 case 'i': {
1626 re_compiled[j].type = RX_INTEGER;
1627 } break;
1628 case 'I': {
1629 re_compiled[j].type = RX_NOT_INTEGER;
1630 } break;
1631 case 'f': {
1632 re_compiled[j].type = RX_FLOAT;
1633 } break;
1634 case 'F': {
1635 re_compiled[j].type = RX_NOT_FLOAT;
1636 } break;
1637 case 'w': {
1638 re_compiled[j].type = RX_ALPHA;
1639 } break;
1640 case 'W': {
1641 re_compiled[j].type = RX_NOT_ALPHA;
1642 } break;
1643 case 's': {
1644 re_compiled[j].type = RX_WHITESPACE;
1645 } break;
1646 case 'S': {
1647 re_compiled[j].type = RX_NOT_WHITESPACE;
1648 } break;
1649
1650 /* Escaped character, e.g. '.' or '$' */
1651 default: {
1652 re_compiled[j].type = RX_CHAR;
1653 re_compiled[j].u.ch = pattern[i];
1654 } break;
1655 }
1656 }
1657 /* '\\' as last char in pattern -> invalid regular expression. */
1658 } break;
1659
1660 /* Character class: */
1661 case '[': {
1662 /* Remember where the char-buffer starts. */
1663 int buf_begin = ccl_bufidx;
1664
1665 /* Look-ahead to determine if negated */
1666 if (pattern[i + 1] == '^') {
1667 re_compiled[j].type = RX_INV_CHAR_CLASS;
1668 i += 1; /* Increment i to avoid including '^' in the char-buffer */
1669 if (pattern[i + 1] == 0) /* incomplete pattern, missing non-zero char after '^' */
1670 {
1671 return 0;
1672 }
1673 } else {
1674 re_compiled[j].type = RX_CHAR_CLASS;
1675 }
1676
1677 /* Copy characters inside [..] to buffer */
1678 while ((pattern[++i] != ']') && (pattern[i] != '\0')) {
1679 /* Missing ] */
1680 if (pattern[i] == '\\') {
1681 if (ccl_bufidx >= MAX_CHAR_CLASS_LEN - 1) { return 0; }
1682 if (pattern[i + 1] == 0) /* incomplete pattern, missing non-zero char after '\\' */
1683 {
1684 return 0;
1685 }
1686 ccl_buf[ccl_bufidx++] = pattern[i++];
1687 } else if (ccl_bufidx >= MAX_CHAR_CLASS_LEN) {
1688 return 0;
1689 }
1690 ccl_buf[ccl_bufidx++] = pattern[i];
1691 }
1692 if (ccl_bufidx >= MAX_CHAR_CLASS_LEN) {
1693 /* Catches cases such as [00000000000000000000000000000000000000][ */
1694 return 0;
1695 }
1696 /* Null-terminate string end */
1697 ccl_buf[ccl_bufidx++] = 0;
1698 re_compiled[j].u.ccl = &ccl_buf[buf_begin];
1699 } break;
1700
1701 /* Other characters: */
1702 default: {
1703 re_compiled[j].type = RX_CHAR;
1704 re_compiled[j].u.ch = c;
1705 } break;
1706 }
1707 /* no buffer-out-of-bounds access on invalid patterns -
1708 * see https://github.com/kokke/tiny-regex-c/commit/1a279e04014b70b0695fba559a7c05d55e6ee90b */
1709 if (pattern[i] == 0) { return 0; }
1710
1711 i += 1;
1712 j += 1;
1713 }
1714 /* 'RX_UNUSED' is a sentinel used to indicate end-of-pattern */
1715 re_compiled[j].type = RX_UNUSED;
1716
1717 return (re_t) re_compiled;
1718 }
1719
1720 /* Private functions: */
matchdigit(char c)1721 static int matchdigit(char c)
1722 {
1723 return ((c >= '0') && (c <= '9'));
1724 }
1725
matchint(char c)1726 static int matchint(char c)
1727 {
1728 return (matchdigit(c) || (c == '-') || (c == '+'));
1729 }
1730
matchfloat(char c)1731 static int matchfloat(char c)
1732 {
1733 return (matchint(c) || (c == '.') || (c == 'e') || (c == 'E'));
1734 }
1735
matchalpha(char c)1736 static int matchalpha(char c)
1737 {
1738 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
1739 }
1740
matchwhitespace(char c)1741 static int matchwhitespace(char c)
1742 {
1743 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\f') || (c == '\v'));
1744 }
1745
matchalphanum(char c)1746 static int matchalphanum(char c)
1747 {
1748 return ((c == '_') || matchalpha(c) || matchdigit(c));
1749 }
1750
matchrange(char c,const char * str)1751 static int matchrange(char c, const char *str)
1752 {
1753 return ((c != '-') && (str[0] != '\0') && (str[0] != '-') && (str[1] == '-') &&
1754 (str[1] != '\0') && (str[2] != '\0') && ((c >= str[0]) && (c <= str[2])));
1755 }
1756
matchdot(char c)1757 static int matchdot(char c)
1758 {
1759 #if defined(RE_DOT_MATCHES_NEWLINE) && (RE_DOT_MATCHES_NEWLINE == 1)
1760 (void) c;
1761 return 1;
1762 #else
1763 return c != '\n' && c != '\r';
1764 #endif
1765 }
1766
ismetachar(char c)1767 static int ismetachar(char c)
1768 {
1769 return ((c == 's') || (c == 'S') || (c == 'w') || (c == 'W') || (c == 'd') || (c == 'D'));
1770 }
1771
matchmetachar(char c,const char * str)1772 static int matchmetachar(char c, const char *str)
1773 {
1774 switch (str[0]) {
1775 case 'd':
1776 return matchdigit(c);
1777 case 'D':
1778 return !matchdigit(c);
1779 case 'i':
1780 return matchint(c);
1781 case 'I':
1782 return !matchint(c);
1783 case 'f':
1784 return matchfloat(c);
1785 case 'F':
1786 return !matchfloat(c);
1787 case 'w':
1788 return matchalphanum(c);
1789 case 'W':
1790 return !matchalphanum(c);
1791 case 's':
1792 return matchwhitespace(c);
1793 case 'S':
1794 return !matchwhitespace(c);
1795 default:
1796 return (c == str[0]);
1797 }
1798 }
1799
matchcharclass(char c,const char * str)1800 static int matchcharclass(char c, const char *str)
1801 {
1802 do {
1803 if (matchrange(c, str)) {
1804 return 1;
1805 } else if (str[0] == '\\') {
1806 /* Escape-char: increment str-ptr and match on next char */
1807 str += 1;
1808 if (matchmetachar(c, str)) {
1809 return 1;
1810 } else if ((c == str[0]) && !ismetachar(c)) {
1811 return 1;
1812 }
1813 } else if (c == str[0]) {
1814 if (c == '-') {
1815 return ((str[-1] == '\0') || (str[1] == '\0'));
1816 } else {
1817 return 1;
1818 }
1819 }
1820 } while (*str++ != '\0');
1821
1822 return 0;
1823 }
1824
matchone(regex_t p,char c)1825 static int matchone(regex_t p, char c)
1826 {
1827 switch (p.type) {
1828 case RX_DOT:
1829 return matchdot(c);
1830 case RX_CHAR_CLASS:
1831 return matchcharclass(c, (const char *) p.u.ccl);
1832 case RX_INV_CHAR_CLASS:
1833 return !matchcharclass(c, (const char *) p.u.ccl);
1834 case RX_DIGIT:
1835 return matchdigit(c);
1836 case RX_NOT_DIGIT:
1837 return !matchdigit(c);
1838 case RX_INTEGER:
1839 return matchint(c);
1840 case RX_NOT_INTEGER:
1841 return !matchint(c);
1842 case RX_FLOAT:
1843 return matchfloat(c);
1844 case RX_NOT_FLOAT:
1845 return !matchfloat(c);
1846 case RX_ALPHA:
1847 return matchalphanum(c);
1848 case RX_NOT_ALPHA:
1849 return !matchalphanum(c);
1850 case RX_WHITESPACE:
1851 return matchwhitespace(c);
1852 case RX_NOT_WHITESPACE:
1853 return !matchwhitespace(c);
1854 default:
1855 return (p.u.ch == c);
1856 }
1857 }
1858
matchstar(regex_t p,regex_t * pattern,const char * text,int * matchlen)1859 static int matchstar(regex_t p, regex_t *pattern, const char *text, int *matchlen)
1860 {
1861 int prelen = *matchlen;
1862 const char *prepos = text;
1863 while ((text[0] != '\0') && matchone(p, *text)) {
1864 text++;
1865 (*matchlen)++;
1866 }
1867 while (text >= prepos) {
1868 if (matchpattern(pattern, text--, matchlen)) return 1;
1869 (*matchlen)--;
1870 }
1871
1872 *matchlen = prelen;
1873 return 0;
1874 }
1875
matchplus(regex_t p,regex_t * pattern,const char * text,int * matchlen)1876 static int matchplus(regex_t p, regex_t *pattern, const char *text, int *matchlen)
1877 {
1878 const char *prepos = text;
1879 while ((text[0] != '\0') && matchone(p, *text)) {
1880 text++;
1881 (*matchlen)++;
1882 }
1883 while (text > prepos) {
1884 if (matchpattern(pattern, text--, matchlen)) return 1;
1885 (*matchlen)--;
1886 }
1887 return 0;
1888 }
1889
matchquestion(regex_t p,regex_t * pattern,const char * text,int * matchlen)1890 static int matchquestion(regex_t p, regex_t *pattern, const char *text, int *matchlen)
1891 {
1892 if (p.type == RX_UNUSED) return 1;
1893 if (matchpattern(pattern, text, matchlen)) return 1;
1894 if (*text && matchone(p, *text++)) {
1895 if (matchpattern(pattern, text, matchlen)) {
1896 (*matchlen)++;
1897 return 1;
1898 }
1899 }
1900 return 0;
1901 }
1902
1903 /* Iterative matching */
matchpattern(regex_t * pattern,const char * text,int * matchlen)1904 static int matchpattern(regex_t *pattern, const char *text, int *matchlen)
1905 {
1906 int pre = *matchlen;
1907 do {
1908 if ((pattern[0].type == RX_UNUSED) || (pattern[1].type == RX_QUESTIONMARK)) {
1909 return matchquestion(pattern[0], &pattern[2], text, matchlen);
1910 } else if (pattern[1].type == RX_STAR) {
1911 return matchstar(pattern[0], &pattern[2], text, matchlen);
1912 } else if (pattern[1].type == RX_PLUS) {
1913 return matchplus(pattern[0], &pattern[2], text, matchlen);
1914 } else if ((pattern[0].type == RX_END) && pattern[1].type == RX_UNUSED) {
1915 return (text[0] == '\0');
1916 }
1917 (*matchlen)++;
1918 } while ((text[0] != '\0') && matchone(*pattern++, *text++));
1919
1920 *matchlen = pre;
1921 return 0;
1922 }
1923 }
1924