1 /*
2  * This program source code file is part of KiCad, a free EDA CAD application.
3  *
4  * Copyright (C) 2004-2021 KiCad Developers, see change_log.txt for contributors.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you may find one here:
18  * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
19  * or you may search the http://www.gnu.org website for the version 2 license,
20  * or you may write to the Free Software Foundation, Inc.,
21  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
22  */
23 
24 #ifndef STRING_UTILS_H
25 #define STRING_UTILS_H
26 
27 #include "config.h"
28 
29 #include <string>
30 #include <vector>
31 #include <wx/string.h>
32 #include <wx/filename.h>
33 
34 
35 void ConvertMarkdown2Html( const wxString& aMarkdownInput, wxString& aHtmlOutput );
36 
37 /**
38  * Convert the old `~...~` overbar notation to the new `~{...}` one.
39  */
40 wxString ConvertToNewOverbarNotation( const wxString& aOldStr );
41 
42 /**
43  * Convert curly quotes and em/en dashes to straight quotes and dashes.
44  *
45  * @return true if any characters required conversion.
46  */
47 bool ConvertSmartQuotesAndDashes( wxString* aString );
48 
49 /**
50  * Escape/Unescape routines to safely encode reserved-characters in various contexts.
51  */
52 enum ESCAPE_CONTEXT
53 {
54     CTX_NETNAME,
55     CTX_LIBID,
56     CTX_QUOTED_STR,
57     CTX_LINE,
58     CTX_FILENAME,
59     CTX_NO_SPACE        // to replace spaces in names that do not accept spaces
60 };
61 
62 /**
63  * The Escape/Unescape routines use HTML-entity-reference-style encoding to handle
64  * characters which are:
65  *   (a) not legal in filenames
66  *   (b) used as control characters in LIB_IDs
67  *   (c) used to delineate hierarchical paths
68  */
69 wxString EscapeString( const wxString& aSource, ESCAPE_CONTEXT aContext );
70 
71 wxString UnescapeString( const wxString& aSource );
72 
73 /**
74  * Remove markup (such as overbar or subscript) that we can't render to menu items.
75  */
76 wxString PrettyPrintForMenu( const wxString& aString );
77 
78 /**
79  * Capitalize the first letter in each word.
80  */
81 wxString TitleCaps( const wxString& aString );
82 
83 /**
84  * Copy bytes from @a aSource delimited string segment to @a aDest buffer.
85  *
86  * The extracted string will be null terminated even if truncation is necessary
87  * because aDestSize was not large enough.
88  *
89  * @param aDest is the destination byte buffer.
90  * @param aSource is the source bytes as a C string.
91  * @param aDestSize is the size of the destination byte buffer.
92  * @return the number of bytes read from source, which may be more than the number copied,
93  *         due to escaping of double quotes and the escape byte itself.
94  * @deprecated should use the one which fetches a wxString, below.
95  */
96 int ReadDelimitedText( char* aDest, const char* aSource, int aDestSize );
97 
98 /**
99  * Copy bytes from @a aSource delimited string segment to @a aDest wxString.
100  *
101  * @param aDest is the destination wxString.
102  * @param aSource is the source C string holding utf8 encoded bytes.
103  * @return the number of bytes read from source, which may be more than the number copied,
104  *         due to escaping of double quotes and the escape byte itself.
105  */
106 int ReadDelimitedText( wxString* aDest, const char* aSource );
107 
108 /**
109  * Return an 8 bit UTF8 string given aString in Unicode form.
110  *
111  * Any double quoted or back slashes are prefixed with a '\\' byte and the form
112  * of this UTF8 byte string is compatible with function ReadDelimitedText().
113  *
114  * @param aString is the input string to convert.
115  * @return the escaped input text, without the wrapping double quotes.
116  */
117 std::string EscapedUTF8( const wxString& aString );
118 
119 /**
120  * Return a new wxString escaped for embedding in HTML.
121  */
122 wxString EscapeHTML( const wxString& aString );
123 
124 /**
125  * Read one line line from \a aFile.
126  *
127  * @return a pointer the first useful line read by eliminating blank lines and comments.
128  */
129 char* GetLine( FILE* aFile, char* Line, int* LineNum = nullptr, int SizeLine = 255 );
130 
131 /**
132  * Return true if the string is empty or contains only whitespace.
133  */
134 bool NoPrintableChars( const wxString& aString );
135 
136 /**
137  * Return the number of printable (ie: non-formatting) chars.  Used to approximate rendered
138  * text size when speed is more important than accuracy.
139  */
140 int PrintableCharCount( const wxString& aString );
141 
142 /**
143  * Remove leading and training spaces, tabs and end of line chars in \a text
144  *
145  * @return a pointer on the first n char in text
146  */
147 char* StrPurge( char* text );
148 
149 /**
150  * @return a string giving the current date and time.
151  */
152 wxString DateAndTime();
153 
154 /**
155  * Compare two strings with alphanumerical content.
156  *
157  * This function is equivalent to strncmp() or strncasecmp() if \a aIgnoreCase is true
158  * except that strings containing numbers are compared by their integer value not
159  * by their ASCII code.  In other words U10 would be greater than U2.
160  *
161  * @param aString1 A wxString reference to the reference string.
162  * @param aString2 A wxString reference to the comparison string.
163  * @param aIgnoreCase Use true to make the comparison case insensitive.
164  * @return An integer value of -1 if \a aString1 is less than \a aString2, 0 if
165  *         \a aString1 is equal to \a aString2, or 1 if \a aString1 is greater
166  *         than \a aString2.
167  */
168 int StrNumCmp( const wxString& aString1, const wxString& aString2, bool aIgnoreCase = false );
169 
170 /**
171  * Compare a string against wild card (* and ?) pattern using the usual rules.
172  *
173  * @return true if pattern matched otherwise false.
174  */
175 bool WildCompareString( const wxString& pattern,
176                         const wxString& string_to_tst,
177                         bool            case_sensitive = true );
178 
179 /**
180  * Compare strings like the strcmp function but handle numbers and modifiers within the
181  * string text correctly for sorting.  eg. 1mF > 55uF
182  *
183  * @return -1 if first string is less than the second, 0 if the strings are equal, or
184  *          1 if the first string is greater than the second.
185  */
186 int ValueStringCompare( const wxString& strFWord, const wxString& strSWord );
187 
188 /**
189  * Break a string into three parts: he alphabetic preamble, the numeric part, and any
190  * alphabetic ending.
191  *
192  * For example C10A is split to C 10 A
193  */
194 int SplitString( const wxString& strToSplit,
195                  wxString* strBeginning,
196                  wxString* strDigits,
197                  wxString* strEnd );
198 
199 /**
200  * Gets the trailing int, if any, from a string.
201  *
202  * @param aStr the string to check.
203  * @return the trailing int or 0 if none found.
204  */
205 int GetTrailingInt( const wxString& aStr );
206 
207 /**
208  * @return a wxString object containing the illegal file name characters for all platforms.
209  */
210 wxString GetIllegalFileNameWxChars();
211 
212 /**
213  * Checks \a aName for illegal file name characters.
214  *
215  * The Windows (DOS) file system forbidden characters already include the forbidden file
216  * name characters for both Posix and OSX systems.  The characters \/?*|"\<\> are illegal
217  * and are replaced with %xx where xx the hexadecimal equivalent of the replaced character.
218  * This replacement may not be as elegant as using an underscore ('_') or hyphen ('-') but
219  * it guarantees that there will be no naming conflicts when fixing footprint library names.
220  * however, if aReplaceChar is given, it will replace the illegal chars
221  *
222  * @param aName is a point to a std::string object containing the footprint name to verify.
223  * @param aReplaceChar (if not 0) is the replacement char.
224  * @return true if any characters have been replaced in \a aName.
225  */
226 bool ReplaceIllegalFileNameChars( std::string* aName, int aReplaceChar = 0 );
227 bool ReplaceIllegalFileNameChars( wxString& aName, int aReplaceChar = 0 );
228 
229 #ifndef HAVE_STRTOKR
230 // common/strtok_r.c optionally:
231 extern "C" char* strtok_r( char* str, const char* delim, char** nextp );
232 #endif
233 
234 
235 /**
236  * A helper for sorting strings from the rear.
237  *
238  * Useful for things like 3D model names where they tend to be largely repetitious at the front.
239  */
240 struct rsort_wxString
241 {
operatorrsort_wxString242     bool operator() ( const wxString& strA, const wxString& strB ) const
243     {
244         wxString::const_reverse_iterator sA = strA.rbegin();
245         wxString::const_reverse_iterator eA = strA.rend();
246 
247         wxString::const_reverse_iterator sB = strB.rbegin();
248         wxString::const_reverse_iterator eB = strB.rend();
249 
250         if( strA.empty() )
251         {
252             if( strB.empty() )
253                 return false;
254 
255             // note: this rule implies that a null string is first in the sort order
256             return true;
257         }
258 
259         if( strB.empty() )
260             return false;
261 
262         while( sA != eA && sB != eB )
263         {
264             if( ( *sA ) == ( *sB ) )
265             {
266                 ++sA;
267                 ++sB;
268                 continue;
269             }
270 
271             if( ( *sA ) < ( *sB ) )
272                 return true;
273             else
274                 return false;
275         }
276 
277         if( sB == eB )
278             return false;
279 
280         return true;
281     }
282 };
283 
284 /**
285  * Split the input string into a vector of output strings
286  *
287  * @note Multiple delimiters are considered to be separate records with empty strings
288  *
289  * @param aStr Input string with 0 or more delimiters.
290  * @param aDelim The string of delimiter.  Multiple characters here denote alternate delimiters.
291  * @return a vector of strings
292  */
split(const std::string & aStr,const std::string & aDelim)293 static inline std::vector<std::string> split( const std::string& aStr, const std::string& aDelim )
294 {
295     size_t pos = 0;
296     size_t last_pos = 0;
297     size_t len;
298 
299     std::vector<std::string> tokens;
300 
301     while( pos < aStr.size() )
302     {
303         pos = aStr.find_first_of( aDelim, last_pos );
304 
305         if( pos == std::string::npos )
306             pos = aStr.size();
307 
308         len = pos - last_pos;
309 
310         tokens.push_back( aStr.substr( last_pos, len ) );
311 
312         last_pos = pos + 1;
313     }
314 
315     return tokens;
316 }
317 
318 /// Utility to build comma separated lists in messages
AccumulateDescription(wxString & aDesc,const wxString & aItem)319 inline void AccumulateDescription( wxString& aDesc, const wxString& aItem )
320 {
321     if( !aDesc.IsEmpty() )
322         aDesc << wxT( ", " );
323 
324     aDesc << aItem;
325 }
326 
327 /**
328  * Split \a aString to a string list separated at \a aSplitter.
329  *
330  * @param aText is the text to split.
331  * @param aStrings will contain the split lines.
332  * @param aSplitter is the 'split' character.
333  */
334 void wxStringSplit( const wxString& aText, wxArrayString& aStrings, wxChar aSplitter );
335 
336 /**
337  * Remove trailing zeros from a string containing a converted float number.
338  *
339  * The trailing zeros are removed if the mantissa has more than \a aTrailingZeroAllowed
340  * digits and some trailing zeros.
341  */
342 void StripTrailingZeros( wxString& aStringValue, unsigned aTrailingZeroAllowed = 1 );
343 
344 /**
345  * Print a float number without using scientific notation and no trailing 0
346  * We want to avoid scientific notation in S-expr files (not easy to read)
347  * for floating numbers.
348  *
349  * We cannot always just use the %g or the %f format to print a fp number
350  * this helper function uses the %f format when needed, or %g when %f is
351  * not well working and then removes trailing 0
352  */
353 std::string Double2Str( double aValue );
354 
355 /**
356  * A helper to convert the \a double \a aAngle (in internal unit) to a string in degrees.
357  */
358 wxString AngleToStringDegrees( double aAngle );
359 
360 #endif  // STRING_UTILS_H
361