1 // OpenNN: Open Neural Networks Library
2 // www.opennn.net
3 //
4 // O P E N N N S T R I N G S
5 //
6 // Artificial Intelligence Techniques, SL
7 // artelnics@artelnics.com
8
9 #include "opennn_strings.h"
10 namespace OpenNN
11 {
12
13 /// Returns the number of strings delimited by separator.
14 /// If separator does not match anywhere in the string, this method returns 0.
15 /// @param str String to be tokenized.
16
count_tokens(string & str,const char & separator)17 Index count_tokens(string& str, const char& separator)
18 {
19 // if(!(this->find(separator) != string::npos))
20 // {
21 // ostringstream buffer;
22 //
23 // buffer << "OpenNN Exception:\n"
24 // << "string class.\n"
25 // << "inline Index count_tokens(const string&) const method.\n"
26 // << "Separator not found in string: \"" << separator << "\".\n";
27 //
28 // throw logic_error(buffer.str());
29 // }
30
31 trim(str);
32
33 Index tokens_count = 0;
34
35 // Skip delimiters at beginning.
36
37 string::size_type last_pos = str.find_first_not_of(separator, 0);
38
39 // Find first "non-delimiter".
40
41 string::size_type pos = str.find_first_of(separator, last_pos);
42
43 while(string::npos != pos || string::npos != last_pos)
44 {
45 // Found a token, add it to the vector
46
47 tokens_count++;
48
49 // Skip delimiters. Note the "not_of"
50
51 last_pos = str.find_first_not_of(separator, pos);
52
53 // Find next "non-delimiter"
54
55 pos = str.find_first_of(separator, last_pos);
56 }
57
58 return tokens_count;
59 }
60
61
count_tokens(const string & s,const char & c)62 Index count_tokens(const string& s, const char& c)
63 {
64 return static_cast<Index>(count(s.begin(), s.end(), c) + 1);
65 }
66
67
68 /// Splits the string into substrings(tokens) wherever separator occurs, and returns a vector with those strings.
69 /// If separator does not match anywhere in the string, this method returns a single-element list containing this string.
70 /// @param str String to be tokenized.
71
get_tokens(const string & str,const char & separator)72 Tensor<string, 1> get_tokens(const string& str, const char& separator)
73 {
74 // const string new_string = get_trimmed(str);
75 const Index tokens_number = count_tokens(str, separator);
76
77 Tensor<string, 1> tokens(tokens_number);
78
79 // Skip delimiters at beginning.
80
81 string::size_type lastPos = str.find_first_not_of(separator, 0);
82
83 // Find first "non-delimiter"
84
85 Index index = 0;
86 Index old_pos;
87
88 string::size_type pos = str.find_first_of(separator, lastPos);
89
90 while(string::npos != pos || string::npos != lastPos)
91 {
92
93 if((lastPos-old_pos != 1) && index!= 0){
94 tokens[index] = "";
95 index++;
96 old_pos = old_pos+1;
97 continue;
98 }
99 else{
100 // Found a token, add it to the vector
101 tokens[index] = str.substr(lastPos, pos - lastPos);
102 }
103
104 old_pos = pos;
105
106 // Skip delimiters. Note the "not_of"
107 lastPos = str.find_first_not_of(separator, pos);
108
109 // Find next "non-delimiter"
110 pos = str.find_first_of(separator, lastPos);
111
112 index++;
113
114 }
115
116 return tokens;
117 }
118
119
120 /// Splits the string into substrings(tokens) wherever separator occurs, and returns a vector with those strings.
121 /// If separator does not match anywhere in the string, this method returns a single-element list containing this string.
122 /// @param str String to be tokenized.
123
fill_tokens(const string & str,const char & separator,Tensor<string,1> & tokens)124 void fill_tokens(const string& str, const char& separator, Tensor<string, 1>& tokens)
125 {
126 tokens.setConstant("");
127
128 // Skip delimiters at beginning.
129
130 string::size_type last_position = str.find_first_not_of(separator, 0);
131
132 string::size_type position = str.find_first_of(separator, last_position);
133
134 // Find first "non-delimiter"
135
136 Index index = 0;
137
138 Index old_pos;
139
140
141 while(string::npos != position || string::npos != last_position)
142 {
143 // Found a token, add it to the vector
144
145 if((last_position-old_pos != 1) && index!= 0)
146 {
147 tokens[index] = "";
148 index++;
149 old_pos = old_pos+1;
150 continue;
151 }
152 else
153 {
154 // Found a token, add it to the vector
155
156 tokens[index] = str.substr(last_position, position - last_position);
157 }
158
159 old_pos = position;
160
161 // Skip delimiters. Note the "not_of"
162
163 last_position = str.find_first_not_of(separator, position);
164
165 // Find next "non-delimiter"
166
167 position = str.find_first_of(separator, last_position);
168
169 index++;
170 }
171 }
172
173
174 /// Returns a new vector with the elements of this string vector casted to type.
175
to_type_vector(const string & str,const char & separator)176 Tensor<type, 1> to_type_vector(const string& str, const char& separator)
177 {
178 const Tensor<string, 1> tokens = get_tokens(str, separator);
179
180 const Index tokens_size = tokens.dimension(0);
181
182 Tensor<type, 1> type_vector(tokens_size);
183
184 for(Index i = 0; i < tokens_size; i++)
185 {
186 try
187 {
188 stringstream buffer;
189
190 buffer << tokens[i];
191
192 type_vector(i) = stof(buffer.str());
193 }
194 catch(const logic_error&)
195 {
196 type_vector(i) = static_cast<type>(nan(""));
197 }
198 }
199
200 return type_vector;
201 }
202
203
204 /// Returns true if the string passed as argument represents a number, and false otherwise.
205 /// @param str String to be checked.
206
is_numeric_string(const string & str)207 bool is_numeric_string(const string& str)
208 {
209 std::string::size_type index;
210
211 std::istringstream iss(str.data());
212
213 type dTestSink;
214
215 iss >> dTestSink;
216
217 // was any input successfully consumed/converted?
218
219 if(!iss)
220 {
221 return false;
222 }
223
224 // was all the input successfully consumed/converted?
225 try
226 {
227 stod(str, &index);
228
229 if(index == str.size() || (str.find("%") != std::string::npos && index+1 == str.size()))
230 {
231 return true;
232 }
233 else
234 {
235 return false;
236 }
237 }
238 catch (exception)
239 {
240 return false;
241 }
242
243 // if(!std::isdigit(str[0])) return false;
244 // return !str.empty() && std::find_if(str.begin(),
245 // str.end(), [](unsigned char c) { return (!std::isdigit(c) && !std::isspace(c) && c != '-' && c != '+' && c != '.' && c != 'e' && c != 'E'); }) == str.end();
246 }
247
248
249 /// Returns true if given string vector is constant, false otherwise.
250 /// @param str vector to be checked.
251 ///
is_constant_string(const Tensor<string,1> & str)252 bool is_constant_string(const Tensor<string, 1>& str)
253 {
254 const string str0 = str[0];
255 string str1;
256
257 for (int i = 1; i < str.size(); i++)
258 {
259 str1 = str[i];
260 if (str1.compare(str0) != 0)
261 return false;
262 }
263 return true;
264 }
265
266 /// Returns true if given numeric vector is constant, false otherwise.
267 /// @param str vector to be checked.
268
is_constant_numeric(const Tensor<type,1> & str)269 bool is_constant_numeric(const Tensor<type, 1>& str)
270 {
271 const type a0 = str[0];
272
273 for (int i = 1; i < str.size(); i++)
274 {
275 if (abs(str[i]-a0)>1e-3 || ::isnan(str[i]) || ::isnan(a0))
276 return false;
277 }
278 return true;
279 }
280
281 /// Returns true if given string is a date, false otherwise.
282 /// @param str String to be checked.
283
is_date_time_string(const string & str)284 bool is_date_time_string(const string& str)
285 {
286 if(is_numeric_string(str))return false;
287
288 const regex regular_expression("20[0-9][0-9]|19[0-9][0-9]+[-|/|.](0[1-9]|1[0-2])"
289 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])"
290 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])"
291 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])"
292 "|(0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|\\s|/|.](0[1-9]|1[0-2])+[-|\\s|/|.](201[0-9]|202[0-9]|19[0-9][0-9])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])"
293 "|(0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|\\s|/|.](0[1-9]|1[0-2])+[-|\\s|/|.](201[0-9]|202[0-9]|19[0-9][0-9])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])"
294 "|(0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|\\s|/|.](0[1-9]|1[0-2])+[-|\\s|/|.](201[0-9]|202[0-9]|19[0-9][0-9])"
295 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.]([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj}un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+[-|/|.] (0[1-9]|1[0-9]|2[0-9]|3[0-1])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])"
296 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.]([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj}un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])"
297 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.]([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj}un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])"
298 "|([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj}un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+ (0[1-9]|1[0-9]|2[0-9]|3[0-1])+[| ][,|.| ](201[0-9]|202[0-9]|19[0-9][0-9])"
299 "|([0-2][0-9])+[:]([0-5][0-9])+[:]([0-5][0-9])"
300 "|([1-9]|0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|/|.](201[0-9]|202[0-9]|19[0-9][0-9])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])+[,| ||-][AP]M"
301 );
302
303 if(regex_match(str,regular_expression))
304 {
305 return true;
306 }
307 else
308 {
309 return false;
310 }
311 }
312
313
314 /// Transforms human date into timestamp.
315 /// @param date Date in string fortmat to be converted.
316 /// @param gmt Greenwich Mean Time.
317
date_to_timestamp(const string & date,const Index & gmt)318 time_t date_to_timestamp(const string& date, const Index& gmt)
319 {
320 struct tm time_structure;
321
322 smatch month;
323
324 const regex months("([Jj]an(?:uary)?)|([Ff]eb(?:ruary)?)|([Mm]ar(?:ch)?)|([Aa]pr(?:il)?)|([Mm]ay)|([Jj]un(?:e)?)|([Jj]ul(?:y)?)"
325 "|([Aa]ug(?:gust)?)|([Ss]ep(?:tember)?)|([Oo]ct(?:ober)?)|([Nn]ov(?:ember)?)|([Dd]ec(?:ember)?)");
326
327 smatch matchs;
328
329 const string format_1 = "(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])";
330 const string format_2 = "(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])";
331 const string format_3 = "(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])";
332 const string format_4 = "(0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|\\s|/|.](0[1-9]|1[0-2])+[-|\\s|/|.](201[0-9]|202[0-9]|19[0-9][0-9])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])";
333 const string format_5 = "(0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|\\s|/|.](0[1-9]|1[0-2])+[-|\\s|/|.](201[0-9]|202[0-9]|19[0-9][0-9])+ ([0-1][0-9]|2[0-3])+[:]([0-5][0-9])";
334 const string format_6 = "(0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|\\s|/|.](0[1-9]|1[0-2])+[-|\\s|/|.](201[0-9]|202[0-9]|19[0-9][0-9])";
335 const string format_7 = "(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.]([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj]un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])";
336 const string format_8 = "(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.]([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj]un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])";
337 const string format_9 = "(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.]([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj]un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])";
338 const string format_10 = "([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj]un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+ (0[1-9]|1[0-9]|2[0-9]|3[0-1])+[| ][,|.| ](201[0-9]|202[0-9]|19[0-9][0-9])";
339 const string format_11 = "(20[0-9][0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])";
340 const string format_12 = "([0-2][0-9])+[:]([0-5][0-9])+[:]([0-5][0-9])";
341 const string format_13 = "([1-9]|0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|/|.](201[0-9]|202[0-9]|19[0-9][0-9])+[,| ||-]([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])+[,| ||-][AP]M";
342
343
344 const regex regular_expression(format_1 + "|" + format_2 + "|" + format_3 + "|" + format_4 + "|" + format_5 + "|" + format_6 + "|" + format_7 + "|" + format_8
345 + "|" + format_9 + "|" + format_10 + "|" + format_11 +"|" + format_12 +"|" + format_13);
346
347 const regex regular("(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+ ([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])"
348 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+ ([0-1][0-9]|2[0-3])+[:]([0-5][0-9])"
349 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])"
350 "|(0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|\\s|/|.](0[1-9]|1[0-2])+[-|\\s|/|.](201[0-9]|202[0-9]|19[0-9][0-9])+ ([0-1][0-9]|2[0-3])+[:]([0-5][0-9])"
351 "|(0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|\\s|/|.](0[1-9]|1[0-2])+[-|\\s|/|.](201[0-9]|202[0-9]|19[0-9][0-9])+ ([0-1][0-9]|2[0-3])+[:]([0-5][0-9])"
352 "|(0[1-9]|1[0-9]|2[0-9]|3[0-1])+[-|\\s|/|.](0[1-9]|1[0-2])+[-|\\s|/|.](201[0-9]|202[0-9]|19[0-9][0-9])"
353 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.]([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj}un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+ ([0-1][0-9]|2[0-3])+[:]([0-5][0-9])+[:]([0-5][0-9])"
354 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.]([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj}un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])+ ([0-1][0-9]|2[0-3])+[:]([0-5][0-9])"
355 "|(201[0-9]|202[0-9]|19[0-9][0-9])+[-|/|.]([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj}un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+[-|/|.](0[1-9]|1[0-9]|2[0-9]|3[0-1])"
356 "|([Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj}un(?:e)?|[Jj]ul(?:y)|[Aa]ug(?:gust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)+ (0[1-9]|1[0-9]|2[0-9]|3[0-1])+[,|.| ](201[0-9]|202[0-9]|19[0-9][0-9])"
357 "|(20[0-9][0-9]|19[0-9][0-9])+[-|/|.](0[1-9]|1[0-2])");
358
359 regex_search(date, matchs, regular_expression);
360
361
362 if(matchs[1] != "") // yyyy/mm/dd hh:mm:ss
363 {
364 if(stoi(matchs[1].str()) < 1970)
365 {
366 ostringstream buffer;
367
368 buffer << "OpenNN Exception: DataSet Class.\n"
369 << "time_t date_to_timestamp(const string&) method.\n"
370 << "Cannot convert dates below 1970.\n";
371
372 throw logic_error(buffer.str());
373 }
374 else
375 {
376
377 time_structure.tm_year = stoi(matchs[1].str())-1900;
378 time_structure.tm_mon = stoi(matchs[2].str())-1;
379 time_structure.tm_mday = stoi(matchs[3].str());
380 time_structure.tm_hour = stoi(matchs[4].str()) - static_cast<int>(gmt);
381 time_structure.tm_min = stoi(matchs[5].str());
382 time_structure.tm_sec = stoi(matchs[6].str());
383
384 }
385 }
386 else if (matchs[7] != "") // yyyy/mm/dd hh:mm
387 {
388 if(stoi(matchs[7].str()) < 1970)
389 {
390 ostringstream buffer;
391
392 buffer << "OpenNN Exception: DataSet Class.\n"
393 << "time_t date_to_timestamp(const string&) method.\n"
394 << "Cannot convert dates below 1970.\n";
395
396 throw logic_error(buffer.str());
397 }
398 else
399 {
400 time_structure.tm_year = stoi(matchs[7].str())-1900;
401 time_structure.tm_mon = stoi(matchs[8].str())-1;
402 time_structure.tm_mday = stoi(matchs[9].str());
403 time_structure.tm_hour = stoi(matchs[10].str()) - static_cast<int>(gmt);
404 time_structure.tm_min = stoi(matchs[11].str());
405 time_structure.tm_sec = 0;
406 }
407 }
408 else if (matchs[12] != "") // yyyy/mm/dd
409 {
410 if(stoi(matchs[12].str()) < 1970)
411 {
412 ostringstream buffer;
413
414 buffer << "OpenNN Exception: DataSet Class.\n"
415 << "time_t date_to_timestamp(const string&) method.\n"
416 << "Cannot convert dates below 1970.\n";
417
418 throw logic_error(buffer.str());
419 }
420 else
421 {
422 time_structure.tm_year = stoi(matchs[12].str())-1900;
423 time_structure.tm_mon = stoi(matchs[13].str())-1;
424 time_structure.tm_mday = stoi(matchs[14].str());
425 time_structure.tm_hour = 0;
426 time_structure.tm_min = 0;
427 time_structure.tm_sec = 0;
428
429 }
430 }
431 else if (matchs[15] != "") // dd/mm/yyyy hh:mm:ss
432 {
433 if(stoi(matchs[17].str()) < 1970)
434 {
435 ostringstream buffer;
436
437 buffer << "OpenNN Exception: DataSet Class.\n"
438 << "time_t date_to_timestamp(const string&) method.\n"
439 << "Cannot convert dates below 1970.\n";
440
441 throw logic_error(buffer.str());
442 }
443 else
444 {
445 time_structure.tm_year = stoi(matchs[17].str())-1900;
446 time_structure.tm_mon = stoi(matchs[16].str())-1;
447 time_structure.tm_mday = stoi(matchs[15].str());
448 time_structure.tm_hour = stoi(matchs[18].str()) - static_cast<int>(gmt);
449 time_structure.tm_min = stoi(matchs[19].str());
450 time_structure.tm_sec = stoi(matchs[20].str());
451 }
452 }
453 else if (matchs[21] != "") // dd/mm/yyyy hh:mm
454 {
455 if(stoi(matchs[23].str()) < 1970)
456 {
457 ostringstream buffer;
458
459 buffer << "OpenNN Exception: DataSet Class.\n"
460 << "time_t date_to_timestamp(const string&) method.\n"
461 << "Cannot convert dates below 1970.\n";
462
463 throw logic_error(buffer.str());
464 }
465 else
466 {
467 time_structure.tm_year = stoi(matchs[23].str())-1900;
468 time_structure.tm_mon = stoi(matchs[22].str())-1;
469 time_structure.tm_mday = stoi(matchs[21].str());
470 time_structure.tm_hour = stoi(matchs[24].str()) - static_cast<int>(gmt);
471 time_structure.tm_min = stoi(matchs[25].str());
472 time_structure.tm_sec = 0;
473 }
474 }
475 else if (matchs[26] != "") // dd/mm/yyyy
476 {
477 if(stoi(matchs[28].str()) < 1970)
478 {
479 ostringstream buffer;
480
481 buffer << "OpenNN Exception: DataSet Class.\n"
482 << "time_t date_to_timestamp(const string&) method.\n"
483 << "Cannot convert dates below 1970.\n";
484
485 throw logic_error(buffer.str());
486 }
487 else
488 {
489 time_structure.tm_year = stoi(matchs[28].str())-1900;
490 time_structure.tm_mon = stoi(matchs[27].str())-1;
491 time_structure.tm_mday = stoi(matchs[26].str());
492 time_structure.tm_hour = 0;
493 time_structure.tm_min = 0;
494 time_structure.tm_sec = 0;
495 }
496 }
497 else if (matchs[29] != "") // yyyy/mmm|mmmm/dd hh:mm:ss
498 {
499 if(stoi(matchs[29].str()) < 1970)
500 {
501 ostringstream buffer;
502
503 buffer << "OpenNN Exception: DataSet Class.\n"
504 << "time_t date_to_timestamp(const string&) method.\n"
505 << "Cannot convert dates below 1970.\n";
506
507 throw logic_error(buffer.str());
508 }
509 else
510 {
511 regex_search(date, month, months);
512
513 Index month_number = 0;
514
515 if(!month.empty())
516 {
517 for(Index i = 1; i < 13; i++)
518 {
519 if(month[static_cast<size_t>(i)] != "") month_number = i;
520 }
521 }
522
523 time_structure.tm_year = stoi(matchs[29].str())-1900;
524 time_structure.tm_mon = static_cast<int>(month_number) - 1;
525 time_structure.tm_mday = stoi(matchs[31].str());
526 time_structure.tm_hour = stoi(matchs[32].str()) - static_cast<int>(gmt);
527 time_structure.tm_min = stoi(matchs[33].str());
528 time_structure.tm_sec = stoi(matchs[34].str());
529 }
530 }
531 else if (matchs[35] != "") // yyyy/mmm|mmmm/dd hh:mm
532 {
533 if(stoi(matchs[35].str()) < 1970)
534 {
535 ostringstream buffer;
536
537 buffer << "OpenNN Exception: DataSet Class.\n"
538 << "time_t date_to_timestamp(const string&) method.\n"
539 << "Cannot convert dates below 1970.\n";
540
541 throw logic_error(buffer.str());
542 }
543 else
544 {
545 regex_search(date, month, months);
546
547 Index month_number = 0;
548 if(!month.empty())
549 {
550 for(Index i =1 ; i<13 ; i++)
551 {
552 if(month[static_cast<size_t>(i)] != "") month_number = i;
553 }
554 }
555
556 time_structure.tm_year = stoi(matchs[35].str())-1900;
557 time_structure.tm_mon = static_cast<int>(month_number) - 1;
558 time_structure.tm_mday = stoi(matchs[37].str());
559 time_structure.tm_hour = stoi(matchs[38].str())- static_cast<int>(gmt);
560 time_structure.tm_min = stoi(matchs[39].str());
561 time_structure.tm_sec = 0;
562 }
563 }
564 else if(matchs[40] != "") // yyyy/mmm|mmmm/dd
565 {
566 if(stoi(matchs[40].str()) < 1970)
567 {
568 ostringstream buffer;
569
570 buffer << "OpenNN Exception: DataSet Class.\n"
571 << "time_t date_to_timestamp(const string&) method.\n"
572 << "Cannot convert dates below 1970.\n";
573
574 throw logic_error(buffer.str());
575 }
576 else
577 {
578 regex_search(date, month, months);
579
580 Index month_number = 0;
581 if(!month.empty())
582 {
583 for(Index i =1 ; i<13 ; i++)
584 {
585 if(month[static_cast<size_t>(i)] != "") month_number = i;
586 }
587 }
588
589 time_structure.tm_year = stoi(matchs[40].str())-1900;
590 time_structure.tm_mon = static_cast<int>(month_number)-1;
591 time_structure.tm_mday = stoi(matchs[42].str())- static_cast<int>(gmt);
592 time_structure.tm_hour = 0;
593 time_structure.tm_min = 0;
594 time_structure.tm_sec = 0;
595 }
596 }
597 else if (matchs[43] != "") // mmm dd, yyyy
598 {
599 if(stoi(matchs[45].str()) < 1970)
600 {
601 ostringstream buffer;
602
603 buffer << "OpenNN Exception: DataSet Class.\n"
604 << "time_t date_to_timestamp(const string&) method.\n"
605 << "Cannot convert dates below 1970.\n";
606
607 throw logic_error(buffer.str());
608 }
609 else
610 {
611 regex_search(date,month,months);
612
613 Index month_number = 0;
614
615 if(!month.empty())
616 {
617 for(Index i =1 ; i<13 ; i++)
618 {
619 if(month[static_cast<size_t>(i)] != "") month_number = i;
620 }
621 }
622
623 time_structure.tm_year = stoi(matchs[45].str())-1900;
624 time_structure.tm_mon = static_cast<int>(month_number)-1;
625 time_structure.tm_mday = stoi(matchs[44].str());
626 time_structure.tm_hour = 0;
627 time_structure.tm_min = 0;
628 time_structure.tm_sec = 0;
629 }
630 }
631 else if(matchs[46] != "") // yyyy/ mm
632 {
633 if(stoi(matchs[46].str()) < 1970)
634 {
635 ostringstream buffer;
636
637 buffer << "OpenNN Exception: DataSet Class.\n"
638 << "time_t date_to_timestamp(const string&) method.\n"
639 << "Cannot convert dates below 1970.\n";
640
641 throw logic_error(buffer.str());
642 }
643 else
644 {
645 time_structure.tm_year = stoi(matchs[46].str())-1900;
646 time_structure.tm_mon = stoi(matchs[47].str())-1;
647 time_structure.tm_mday = 1;
648 time_structure.tm_hour = 0;
649 time_structure.tm_min = 0;
650 time_structure.tm_sec = 0;
651 }
652 }
653 else if(matchs[48] != "") // hh:mm:ss
654 {
655 time_structure.tm_year = 70;
656 time_structure.tm_mon = 0;
657 time_structure.tm_mday = 1;
658 time_structure.tm_hour = stoi(matchs[48].str());
659 time_structure.tm_min = stoi(matchs[49].str());
660 time_structure.tm_sec = stoi(matchs[50].str());
661
662 }
663 else if(matchs[51] != "") // mm/dd/yyyy hh:mm:ss [AP]M
664 {
665 time_structure.tm_year = stoi(matchs[53].str())-1900;
666 time_structure.tm_mon = stoi(matchs[51].str());
667 time_structure.tm_mday = stoi(matchs[52].str());
668 time_structure.tm_min = stoi(matchs[55].str());
669 time_structure.tm_sec = stoi(matchs[56].str());
670 if(matchs[57].str()=="PM"){
671 time_structure.tm_hour = stoi(matchs[54].str())+12;
672 }
673 else{
674 time_structure.tm_hour = stoi(matchs[54].str());
675 }
676
677 }
678 else if(is_numeric_string(date)){
679 }
680 else
681 {
682 ostringstream buffer;
683
684 buffer << "OpenNN Exception: DataSet Class.\n"
685 << "time_t date_to_timestamp(const string&) method.\n"
686 << "Date format (" << date << ") is not implemented.\n";
687
688 throw logic_error(buffer.str());
689 }
690
691 if(is_numeric_string(date)){
692 time_t time_t_date = stoi(date);
693 return(time_t_date);
694 }
695 else{
696 return mktime(&time_structure);
697 }
698 }
699
700
701 /// Returns true if the string contains the given substring, false otherwise.
702 /// @param str String.
703 /// @param sub_str Substring to search.
704
contains_substring(const string & str,const string & sub_str)705 bool contains_substring(const string& str, const string& sub_str)
706 {
707 if(str.find(sub_str) != string::npos)
708 {
709 return true;
710 }
711 return false;
712 }
713
714
715 /// Removes whitespaces from the start and the end of the string passed as argument.
716 /// This includes the ASCII characters "\t", "\n", "\v", "\f", "\r", and " ".
717 /// @param str String to be checked.
718
trim(string & str)719 void trim(string& str)
720 {
721 //prefixing spaces
722
723 str.erase(0, str.find_first_not_of(' '));
724
725 //surfixing spaces
726
727 str.erase(str.find_last_not_of(' ') + 1);
728 }
729
730
erase(string & s,const char & c)731 void erase(string& s, const char& c)
732 {
733 s.erase(remove(s.begin(), s.end(), c), s.end());
734 }
735
736
737 /// Returns a string that has whitespace removed from the start and the end.
738 /// This includes the ASCII characters "\t", "\n", "\v", "\f", "\r", and " ".
739 /// @param str String to be checked.
740
get_trimmed(const string & str)741 string get_trimmed(const string& str)
742 {
743 string output(str);
744
745 //prefixing spaces
746
747 output.erase(0, output.find_first_not_of(' '));
748
749 //surfixing spaces
750
751 output.erase(output.find_last_not_of(' ') + 1);
752
753 return output;
754 }
755
756
757 /// Prepends the string pre to the beginning of the string str and returns the whole string.
758 /// @param pre String to be prepended.
759 /// @param str original string.
760
prepend(const string & pre,const string & str)761 string prepend(const string& pre, const string& str)
762 {
763 ostringstream buffer;
764
765 buffer << pre << str;
766
767 return buffer.str();
768 }
769
770
771 /// Returns true if all the elements in a string list are numeric, and false otherwise.
772 /// @param v String list to be checked.
773
is_numeric_string_vector(const Tensor<string,1> & v)774 bool is_numeric_string_vector(const Tensor<string, 1>& v)
775 {
776 for(Index i = 0; i < v.size(); i++)
777 {
778 if(!is_numeric_string(v[i])) return false;
779 }
780
781 return true;
782 }
783
784
has_numbers(const Tensor<string,1> & v)785 bool has_numbers(const Tensor<string, 1>& v)
786 {
787 for(Index i = 0; i < v.size(); i++)
788 {
789 if(is_numeric_string(v[i])) return true;
790 }
791
792 return false;
793 }
794
795
has_strings(const Tensor<string,1> & v)796 bool has_strings(const Tensor<string, 1>& v)
797 {
798 for(Index i = 0; i < v.size(); i++)
799 {
800 if(!is_numeric_string(v[i])) return true;
801 }
802
803 return false;
804 }
805
806 /// Returns true if none element in a string list is numeric, and false otherwise.
807 /// @param v String list to be checked.
808
is_not_numeric(const Tensor<string,1> & v)809 bool is_not_numeric(const Tensor<string, 1>& v)
810 {
811 for(Index i = 0; i < v.size(); i++)
812 {
813 if(is_numeric_string(v[i])) return false;
814 }
815
816 return true;
817 }
818
819
820 /// Returns true if some the elements in a string list are numeric and some others are not numeric.
821 /// @param v String list to be checked.
822
is_mixed(const Tensor<string,1> & v)823 bool is_mixed(const Tensor<string, 1>& v)
824 {
825 unsigned count_numeric = 0;
826 unsigned count_not_numeric = 0;
827
828 for(Index i = 0; i < v.size(); i++)
829 {
830 if(is_numeric_string(v[i]))
831 {
832 count_numeric++;
833 }
834 else
835 {
836 count_not_numeric++;
837 }
838 }
839
840 if(count_numeric > 0 && count_not_numeric > 0)
841 {
842 return true;
843 }
844 else
845 {
846 return false;
847 }
848 }
849
850
851 /// Replaces a substring by another one in each element of this vector.
852 /// @param find_what String to be replaced.
853 /// @param replace_with String to be put instead.
854
replace_substring(Tensor<string,1> & vector,const string & find_what,const string & replace_with)855 void replace_substring(Tensor<string, 1>& vector, const string& find_what, const string& replace_with)
856 {
857 const Index size = vector.dimension(0);
858
859 for(Index i = 0; i < size; i++)
860 {
861 Index position = 0;
862
863 while((position = vector(i).find(find_what, position)) != string::npos)
864 {
865 vector(i).replace(position, find_what.length(), replace_with);
866
867 position += replace_with.length();
868 }
869 }
870 }
871
872
replace(string & source,const string & find_what,const string & replace_with)873 void replace(string& source, const string& find_what, const string& replace_with)
874 {
875 Index position = 0;
876
877 while((position = source.find(find_what, position)) != string::npos)
878 {
879 source.replace(position, find_what.length(), replace_with);
880
881 position += replace_with.length();
882 }
883 }
884 }
885
886 // OpenNN: Open Neural Networks Library.
887 // Copyright(C) 2005-2020 Artificial Intelligence Techniques, SL.
888 //
889 // This library is free software; you can redistribute it and/or
890 // modify it under the terms of the GNU Lesser General Public
891 // License as published by the Free Software Foundation; either
892 // version 2.1 of the License, or any later version.
893 //
894 // This library is distributed in the hope that it will be useful,
895 // but WITHOUT ANY WARRANTY; without even the implied warranty of
896 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
897 // Lesser General Public License for more details.
898
899 // You should have received a copy of the GNU Lesser General Public
900 // License along with this library; if not, write to the Free Software
901 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
902