1 /*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2011-2018, The GROMACS development team.
5 * Copyright (c) 2019, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
9 *
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
14 *
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
32 *
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
35 */
36 /*! \internal \file
37 * \brief
38 * Implements functions and classes in stringutil.h.
39 *
40 * \author Teemu Murtola <teemu.murtola@gmail.com>
41 * \ingroup module_utility
42 */
43 #include "gmxpre.h"
44
45 #include "stringutil.h"
46
47 #include <cctype>
48 #include <cstdarg>
49 #include <cstdio>
50 #include <cstring>
51
52 #include <algorithm>
53 #include <sstream>
54 #include <string>
55 #include <vector>
56
57 #include "gromacs/utility/exceptions.h"
58 #include "gromacs/utility/gmxassert.h"
59
60 namespace gmx
61 {
62
countWords(const char * s)63 std::size_t countWords(const char* s)
64 {
65 std::size_t nWords = 0;
66 // Use length variable to avoid N^2 complexity when executing strlen(s) every iteration
67 std::size_t length = std::strlen(s);
68
69 for (std::size_t i = 0; i < length; i++)
70 {
71 // If we found a new word, increase counter and step through the word
72 if (std::isalnum(s[i]))
73 {
74 ++nWords;
75 // If we hit string end, '\0' is not alphanumerical
76 while (std::isalnum(s[i]))
77 {
78 // This might increment i to the string end, and then the outer
79 // loop will increment i one unit beyond that, but since
80 // we compare to the string length in the outer loop this is fine.
81 i++;
82 }
83 }
84 }
85 return nWords;
86 }
87
88
countWords(const std::string & str)89 std::size_t countWords(const std::string& str)
90 {
91 // Under out beautiful C++ interface hides an ugly c-string implementation :-)
92 return countWords(str.c_str());
93 }
94
endsWith(const char * str,const char * suffix)95 bool endsWith(const char* str, const char* suffix)
96 {
97 if (isNullOrEmpty(suffix))
98 {
99 return true;
100 }
101 const size_t strLength = std::strlen(str);
102 const size_t suffixLength = std::strlen(suffix);
103 return (strLength >= suffixLength && std::strcmp(&str[strLength - suffixLength], suffix) == 0);
104 }
105
stripSuffixIfPresent(const std::string & str,const char * suffix)106 std::string stripSuffixIfPresent(const std::string& str, const char* suffix)
107 {
108 if (suffix != nullptr)
109 {
110 size_t suffixLength = std::strlen(suffix);
111 if (suffixLength > 0 && endsWith(str, suffix))
112 {
113 return str.substr(0, str.length() - suffixLength);
114 }
115 }
116 return str;
117 }
118
stripString(const std::string & str)119 std::string stripString(const std::string& str)
120 {
121 std::string::const_iterator start = str.begin();
122 std::string::const_iterator end = str.end();
123 while (start != end && std::isspace(*start))
124 {
125 ++start;
126 }
127 while (start != end && (std::isspace(*(end - 1)) != 0))
128 {
129 --end;
130 }
131 return std::string(start, end);
132 }
133
formatString(gmx_fmtstr const char * fmt,...)134 std::string formatString(gmx_fmtstr const char* fmt, ...)
135 {
136 va_list ap;
137 va_start(ap, fmt);
138 std::string result = formatStringV(fmt, ap);
139 va_end(ap);
140 return result;
141 }
142
formatStringV(const char * fmt,va_list ap)143 std::string formatStringV(const char* fmt, va_list ap)
144 {
145 va_list ap_copy;
146 char staticBuf[1024];
147 int length = 1024;
148 std::vector<char> dynamicBuf;
149 char* buf = staticBuf;
150
151 // TODO: There may be a better way of doing this on Windows, Microsoft
152 // provides their own way of doing things...
153 while (true)
154 {
155 va_copy(ap_copy, ap);
156 int n = vsnprintf(buf, length, fmt, ap_copy);
157 va_end(ap_copy);
158 if (n > -1 && n < length)
159 {
160 std::string result(buf);
161 return result;
162 }
163 if (n > -1)
164 {
165 length = n + 1;
166 }
167 else
168 {
169 length *= 2;
170 }
171 dynamicBuf.resize(length);
172 buf = dynamicBuf.data();
173 }
174 }
175
splitString(const std::string & str)176 std::vector<std::string> splitString(const std::string& str)
177 {
178 std::vector<std::string> result;
179 std::string::const_iterator currPos = str.begin();
180 const std::string::const_iterator end = str.end();
181 while (currPos != end)
182 {
183 while (currPos != end && std::isspace(*currPos))
184 {
185 ++currPos;
186 }
187 const std::string::const_iterator startPos = currPos;
188 while (currPos != end && !std::isspace(*currPos))
189 {
190 ++currPos;
191 }
192 if (startPos != end)
193 {
194 result.emplace_back(startPos, currPos);
195 }
196 }
197 return result;
198 }
199
splitDelimitedString(const std::string & str,char delim)200 std::vector<std::string> splitDelimitedString(const std::string& str, char delim)
201 {
202 std::vector<std::string> result;
203 size_t currPos = 0;
204 const size_t len = str.length();
205 if (len > 0)
206 {
207 size_t nextDelim;
208 do
209 {
210 nextDelim = str.find(delim, currPos);
211 result.push_back(str.substr(currPos, nextDelim - currPos));
212 currPos = nextDelim < len ? nextDelim + 1 : len;
213 } while (currPos < len || nextDelim < len);
214 }
215 return result;
216 }
217
splitAndTrimDelimitedString(const std::string & str,char delim)218 std::vector<std::string> splitAndTrimDelimitedString(const std::string& str, char delim)
219 {
220 std::vector<std::string> result;
221
222 result = splitDelimitedString(str, delim);
223 std::transform(result.begin(), result.end(), result.begin(), stripString);
224 return result;
225 }
226
227 namespace
228 {
229
230 /*! \brief
231 * Helper function to identify word boundaries for replaceAllWords().
232 *
233 * \returns `true` if the character is considered part of a word.
234 *
235 * \ingroup module_utility
236 */
isWordChar(char c)237 bool isWordChar(char c)
238 {
239 return (std::isalnum(c) != 0) || c == '-' || c == '_';
240 }
241
242 /*! \brief
243 * Common implementation for string replacement functions.
244 *
245 * \param[in] input Input string.
246 * \param[in] from String to find.
247 * \param[in] to String to use to replace \p from.
248 * \param[in] bWholeWords Whether to only consider matches to whole words.
249 * \returns \p input with all occurrences of \p from replaced with \p to.
250 * \throws std::bad_alloc if out of memory.
251 *
252 * \ingroup module_utility
253 */
replaceInternal(const std::string & input,const char * from,const char * to,bool bWholeWords)254 std::string replaceInternal(const std::string& input, const char* from, const char* to, bool bWholeWords)
255 {
256 GMX_RELEASE_ASSERT(from != nullptr && to != nullptr, "Replacement strings must not be NULL");
257 size_t matchLength = std::strlen(from);
258 std::string result;
259 size_t inputPos = 0;
260 size_t matchPos = input.find(from);
261 while (matchPos < input.length())
262 {
263 size_t matchEnd = matchPos + matchLength;
264 if (bWholeWords)
265 {
266 if (!((matchPos == 0 || !isWordChar(input[matchPos - 1]))
267 && (matchEnd == input.length() || !isWordChar(input[matchEnd]))))
268 {
269 matchPos = input.find(from, matchPos + 1);
270 continue;
271 }
272 }
273 result.append(input, inputPos, matchPos - inputPos);
274 result.append(to);
275 inputPos = matchEnd;
276 matchPos = input.find(from, inputPos);
277 }
278 result.append(input, inputPos, matchPos - inputPos);
279 return result;
280 }
281
282 } // namespace
283
replaceAll(const std::string & input,const char * from,const char * to)284 std::string replaceAll(const std::string& input, const char* from, const char* to)
285 {
286 return replaceInternal(input, from, to, false);
287 }
288
replaceAll(const std::string & input,const std::string & from,const std::string & to)289 std::string replaceAll(const std::string& input, const std::string& from, const std::string& to)
290 {
291 return replaceInternal(input, from.c_str(), to.c_str(), false);
292 }
293
replaceAllWords(const std::string & input,const char * from,const char * to)294 std::string replaceAllWords(const std::string& input, const char* from, const char* to)
295 {
296 return replaceInternal(input, from, to, true);
297 }
298
replaceAllWords(const std::string & input,const std::string & from,const std::string & to)299 std::string replaceAllWords(const std::string& input, const std::string& from, const std::string& to)
300 {
301 return replaceInternal(input, from.c_str(), to.c_str(), true);
302 }
303
equalCaseInsensitive(const std::string & source,const std::string & target)304 bool equalCaseInsensitive(const std::string& source, const std::string& target)
305 {
306 return source.length() == target.length()
307 && std::equal(source.begin(), source.end(), target.begin(), [](const char& s, const char& t) {
308 return std::tolower(s) == std::tolower(t);
309 });
310 }
311
equalCaseInsensitive(const std::string & source,const std::string & target,size_t maxLengthOfComparison)312 bool equalCaseInsensitive(const std::string& source, const std::string& target, size_t maxLengthOfComparison)
313 {
314 std::string::const_iterator comparisonEnd;
315 if (source.length() < maxLengthOfComparison)
316 {
317 if (source.length() != target.length())
318 {
319 return false;
320 }
321 comparisonEnd = source.end();
322 }
323 else
324 {
325 if (maxLengthOfComparison > target.length())
326 {
327 return false;
328 }
329 comparisonEnd = source.begin() + maxLengthOfComparison;
330 }
331 return std::equal(source.begin(), comparisonEnd, target.begin(),
332 [](const char& s, const char& t) { return std::tolower(s) == std::tolower(t); });
333 }
334
335 /********************************************************************
336 * TextLineWrapperSettings
337 */
338
TextLineWrapperSettings()339 TextLineWrapperSettings::TextLineWrapperSettings() :
340 maxLength_(0),
341 indent_(0),
342 firstLineIndent_(-1),
343 bKeepFinalSpaces_(false),
344 continuationChar_('\0')
345 {
346 }
347
348
349 /********************************************************************
350 * TextLineWrapper
351 */
352
isTrivial() const353 bool TextLineWrapper::isTrivial() const
354 {
355 return settings_.lineLength() == 0 && settings_.indent() == 0 && settings_.firstLineIndent_ <= 0;
356 }
357
findNextLine(const char * input,size_t lineStart) const358 size_t TextLineWrapper::findNextLine(const char* input, size_t lineStart) const
359 {
360 size_t inputLength = std::strlen(input);
361 bool bFirstLine = (lineStart == 0 || input[lineStart - 1] == '\n');
362 // Ignore leading whitespace if necessary.
363 if (!bFirstLine)
364 {
365 lineStart += std::strspn(input + lineStart, " ");
366 if (lineStart >= inputLength)
367 {
368 return inputLength;
369 }
370 }
371
372 int indent = (bFirstLine ? settings_.firstLineIndent() : settings_.indent());
373 size_t lastAllowedBreakPoint =
374 (settings_.lineLength() > 0 ? std::min(lineStart + settings_.lineLength() - indent, inputLength)
375 : inputLength);
376 // Ignore trailing whitespace.
377 lastAllowedBreakPoint += std::strspn(input + lastAllowedBreakPoint, " ");
378 size_t lineEnd = lineStart;
379 do
380 {
381 const char* nextBreakPtr = std::strpbrk(input + lineEnd, " \n");
382 size_t nextBreak = (nextBreakPtr != nullptr ? nextBreakPtr - input : inputLength);
383 if (nextBreak > lastAllowedBreakPoint && lineEnd > lineStart)
384 {
385 break;
386 }
387 lineEnd = nextBreak + 1;
388 } while (lineEnd < lastAllowedBreakPoint && input[lineEnd - 1] != '\n');
389 return (lineEnd < inputLength ? lineEnd : inputLength);
390 }
391
findNextLine(const std::string & input,size_t lineStart) const392 size_t TextLineWrapper::findNextLine(const std::string& input, size_t lineStart) const
393 {
394 return findNextLine(input.c_str(), lineStart);
395 }
396
formatLine(const std::string & input,size_t lineStart,size_t lineEnd) const397 std::string TextLineWrapper::formatLine(const std::string& input, size_t lineStart, size_t lineEnd) const
398 {
399 size_t inputLength = input.length();
400 bool bFirstLine = (lineStart == 0 || input[lineStart - 1] == '\n');
401 // Strip leading whitespace if necessary.
402 if (!bFirstLine)
403 {
404 lineStart = input.find_first_not_of(' ', lineStart);
405 if (lineStart >= inputLength)
406 {
407 return std::string();
408 }
409 }
410 int indent = (bFirstLine ? settings_.firstLineIndent() : settings_.indent());
411 bool bContinuation = (lineEnd < inputLength && input[lineEnd - 1] != '\n');
412 // Remove explicit line breaks in input
413 // (the returned line should not contain line breaks).
414 while (lineEnd > lineStart && input[lineEnd - 1] == '\n')
415 {
416 --lineEnd;
417 }
418 // Strip trailing whitespace, unless they are explicit in the input and it
419 // has been requested to keep them.
420 if (bContinuation || !settings_.bKeepFinalSpaces_)
421 {
422 while (lineEnd > lineStart && std::isspace(input[lineEnd - 1]))
423 {
424 --lineEnd;
425 }
426 }
427
428 const size_t lineLength = lineEnd - lineStart;
429 if (lineLength == 0)
430 {
431 return std::string();
432 }
433 std::string result(indent, ' ');
434 result.append(input, lineStart, lineLength);
435 if (bContinuation && settings_.continuationChar_ != '\0')
436 {
437 result.append(1, ' ');
438 result.append(1, settings_.continuationChar_);
439 }
440 return result;
441 }
442
wrapToString(const std::string & input) const443 std::string TextLineWrapper::wrapToString(const std::string& input) const
444 {
445 std::string result;
446 size_t lineStart = 0;
447 size_t length = input.length();
448 while (lineStart < length)
449 {
450 size_t nextLineStart = findNextLine(input, lineStart);
451 result.append(formatLine(input, lineStart, nextLineStart));
452 if (nextLineStart < length || (nextLineStart == length && input[length - 1] == '\n'))
453 {
454 result.append("\n");
455 }
456 lineStart = nextLineStart;
457 }
458 return result;
459 }
460
wrapToVector(const std::string & input) const461 std::vector<std::string> TextLineWrapper::wrapToVector(const std::string& input) const
462 {
463 std::vector<std::string> result;
464 size_t lineStart = 0;
465 size_t length = input.length();
466 while (lineStart < length)
467 {
468 size_t nextLineStart = findNextLine(input, lineStart);
469 result.push_back(formatLine(input, lineStart, nextLineStart));
470 lineStart = nextLineStart;
471 }
472 return result;
473 }
474
475 } // namespace gmx
476