1 #ifndef _DATE_TIME_DATE_PARSING_HPP___ 2 #define _DATE_TIME_DATE_PARSING_HPP___ 3 4 /* Copyright (c) 2002,2003,2005 CrystalClear Software, Inc. 5 * Use, modification and distribution is subject to the 6 * Boost Software License, Version 1.0. (See accompanying 7 * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 8 * Author: Jeff Garland, Bart Garst 9 * $Date$ 10 */ 11 12 #include <map> 13 #include <string> 14 #include <sstream> 15 #include <iterator> 16 #include <algorithm> 17 #include <boost/tokenizer.hpp> 18 #include <boost/lexical_cast.hpp> 19 #include <boost/date_time/compiler_config.hpp> 20 #include <boost/date_time/parse_format_base.hpp> 21 #include <boost/date_time/period.hpp> 22 23 #if defined(BOOST_DATE_TIME_NO_LOCALE) 24 #include <cctype> // ::tolower(int) 25 #else 26 #include <locale> // std::tolower(char, locale) 27 #endif 28 29 namespace boost { 30 namespace date_time { 31 32 //! A function to replace the std::transform( , , ,tolower) construct 33 /*! This function simply takes a string, and changes all the characters 34 * in that string to lowercase (according to the default system locale). 35 * In the event that a compiler does not support locales, the old 36 * C style tolower() is used. 37 */ 38 inline 39 std::string convert_to_lower(std::string inp)40 convert_to_lower(std::string inp) 41 { 42 #if !defined(BOOST_DATE_TIME_NO_LOCALE) 43 const std::locale loc(std::locale::classic()); 44 #endif 45 std::string::size_type i = 0, n = inp.length(); 46 for (; i < n; ++i) { 47 inp[i] = 48 #if defined(BOOST_DATE_TIME_NO_LOCALE) 49 static_cast<char>(std::tolower(inp[i])); 50 #else 51 // tolower and others were brought in to std for borland >= v564 52 // in compiler_config.hpp 53 std::tolower(inp[i], loc); 54 #endif 55 } 56 return inp; 57 } 58 59 //! Helper function for parse_date. 60 template<class month_type> 61 inline unsigned short month_str_to_ushort(std::string const & s)62 month_str_to_ushort(std::string const& s) { 63 if((s.at(0) >= '0') && (s.at(0) <= '9')) { 64 return boost::lexical_cast<unsigned short>(s); 65 } 66 else { 67 std::string str = convert_to_lower(s); 68 //c++98 support 69 #if defined(BOOST_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX) 70 static std::map<std::string, unsigned short> month_map; 71 typedef std::map<std::string, unsigned short>::value_type vtype; 72 if( month_map.empty() ) { 73 month_map.insert( vtype("jan", static_cast<unsigned short>(1)) ); 74 month_map.insert( vtype("january", static_cast<unsigned short>(1)) ); 75 month_map.insert( vtype("feb", static_cast<unsigned short>(2)) ); 76 month_map.insert( vtype("february", static_cast<unsigned short>(2)) ); 77 month_map.insert( vtype("mar", static_cast<unsigned short>(3)) ); 78 month_map.insert( vtype("march", static_cast<unsigned short>(3)) ); 79 month_map.insert( vtype("apr", static_cast<unsigned short>(4)) ); 80 month_map.insert( vtype("april", static_cast<unsigned short>(4)) ); 81 month_map.insert( vtype("may", static_cast<unsigned short>(5)) ); 82 month_map.insert( vtype("jun", static_cast<unsigned short>(6)) ); 83 month_map.insert( vtype("june", static_cast<unsigned short>(6)) ); 84 month_map.insert( vtype("jul", static_cast<unsigned short>(7)) ); 85 month_map.insert( vtype("july", static_cast<unsigned short>(7)) ); 86 month_map.insert( vtype("aug", static_cast<unsigned short>(8)) ); 87 month_map.insert( vtype("august", static_cast<unsigned short>(8)) ); 88 month_map.insert( vtype("sep", static_cast<unsigned short>(9)) ); 89 month_map.insert( vtype("september", static_cast<unsigned short>(9)) ); 90 month_map.insert( vtype("oct", static_cast<unsigned short>(10)) ); 91 month_map.insert( vtype("october", static_cast<unsigned short>(10)) ); 92 month_map.insert( vtype("nov", static_cast<unsigned short>(11)) ); 93 month_map.insert( vtype("november", static_cast<unsigned short>(11)) ); 94 month_map.insert( vtype("dec", static_cast<unsigned short>(12)) ); 95 month_map.insert( vtype("december", static_cast<unsigned short>(12)) ); 96 } 97 #else //c+11 and beyond 98 static std::map<std::string, unsigned short> month_map = 99 { { "jan", static_cast<unsigned short>(1) }, { "january", static_cast<unsigned short>(1) }, 100 { "feb", static_cast<unsigned short>(2) }, { "february", static_cast<unsigned short>(2) }, 101 { "mar", static_cast<unsigned short>(3) }, { "march", static_cast<unsigned short>(3) }, 102 { "apr", static_cast<unsigned short>(4) }, { "april", static_cast<unsigned short>(4) }, 103 { "may", static_cast<unsigned short>(5) }, 104 { "jun", static_cast<unsigned short>(6) }, { "june", static_cast<unsigned short>(6) }, 105 { "jul", static_cast<unsigned short>(7) }, { "july", static_cast<unsigned short>(7) }, 106 { "aug", static_cast<unsigned short>(8) }, { "august", static_cast<unsigned short>(8) }, 107 { "sep", static_cast<unsigned short>(9) }, { "september", static_cast<unsigned short>(9) }, 108 { "oct", static_cast<unsigned short>(10) }, { "october", static_cast<unsigned short>(10)}, 109 { "nov", static_cast<unsigned short>(11) }, { "november", static_cast<unsigned short>(11)}, 110 { "dec", static_cast<unsigned short>(12) }, { "december", static_cast<unsigned short>(12)} 111 }; 112 #endif 113 std::map<std::string, unsigned short>::const_iterator mitr = month_map.find( str ); 114 if ( mitr != month_map.end() ) { 115 return mitr->second; 116 } 117 } 118 return 13; // intentionally out of range - name not found 119 } 120 121 122 //! Generic function to parse a delimited date (eg: 2002-02-10) 123 /*! Accepted formats are: "2003-02-10" or " 2003-Feb-10" or 124 * "2003-Feburary-10" 125 * The order in which the Month, Day, & Year appear in the argument 126 * string can be accomodated by passing in the appropriate ymd_order_spec 127 */ 128 template<class date_type> 129 date_type parse_date(const std::string & s,int order_spec=ymd_order_iso)130 parse_date(const std::string& s, int order_spec = ymd_order_iso) { 131 std::string spec_str; 132 if(order_spec == ymd_order_iso) { 133 spec_str = "ymd"; 134 } 135 else if(order_spec == ymd_order_dmy) { 136 spec_str = "dmy"; 137 } 138 else { // (order_spec == ymd_order_us) 139 spec_str = "mdy"; 140 } 141 142 typedef typename date_type::month_type month_type; 143 unsigned pos = 0; 144 unsigned short year(0), month(0), day(0); 145 typedef typename std::basic_string<char>::traits_type traits_type; 146 typedef boost::char_separator<char, traits_type> char_separator_type; 147 typedef boost::tokenizer<char_separator_type, 148 std::basic_string<char>::const_iterator, 149 std::basic_string<char> > tokenizer; 150 typedef boost::tokenizer<char_separator_type, 151 std::basic_string<char>::const_iterator, 152 std::basic_string<char> >::iterator tokenizer_iterator; 153 // may need more delimiters, these work for the regression tests 154 const char sep_char[] = {',','-','.',' ','/','\0'}; 155 char_separator_type sep(sep_char); 156 tokenizer tok(s,sep); 157 for(tokenizer_iterator beg=tok.begin(); 158 beg!=tok.end() && pos < spec_str.size(); 159 ++beg, ++pos) { 160 switch(spec_str.at(pos)) { 161 case 'y': 162 { 163 year = boost::lexical_cast<unsigned short>(*beg); 164 break; 165 } 166 case 'm': 167 { 168 month = month_str_to_ushort<month_type>(*beg); 169 break; 170 } 171 case 'd': 172 { 173 day = boost::lexical_cast<unsigned short>(*beg); 174 break; 175 } 176 default: break; 177 } //switch 178 } 179 return date_type(year, month, day); 180 } 181 182 //! Generic function to parse undelimited date (eg: 20020201) 183 template<class date_type> 184 date_type parse_undelimited_date(const std::string & s)185 parse_undelimited_date(const std::string& s) { 186 int offsets[] = {4,2,2}; 187 int pos = 0; 188 //typename date_type::ymd_type ymd((year_type::min)(),1,1); 189 unsigned short y = 0, m = 0, d = 0; 190 191 /* The two bool arguments state that parsing will not wrap 192 * (only the first 8 characters will be parsed) and partial 193 * strings will not be parsed. 194 * Ex: 195 * "2005121" will parse 2005 & 12, but not the "1" */ 196 boost::offset_separator osf(offsets, offsets+3, false, false); 197 198 typedef typename boost::tokenizer<boost::offset_separator, 199 std::basic_string<char>::const_iterator, 200 std::basic_string<char> > tokenizer_type; 201 tokenizer_type tok(s, osf); 202 for(typename tokenizer_type::iterator ti=tok.begin(); ti!=tok.end();++ti) { 203 unsigned short i = boost::lexical_cast<unsigned short>(*ti); 204 switch(pos) { 205 case 0: y = i; break; 206 case 1: m = i; break; 207 case 2: d = i; break; 208 default: break; 209 } 210 pos++; 211 } 212 return date_type(y,m,d); 213 } 214 215 //! Helper function for 'date gregorian::from_stream()' 216 /*! Creates a string from the iterators that reference the 217 * begining & end of a char[] or string. All elements are 218 * used in output string */ 219 template<class date_type, class iterator_type> 220 inline 221 date_type from_stream_type(iterator_type & beg,iterator_type const & end,char)222 from_stream_type(iterator_type& beg, 223 iterator_type const& end, 224 char) 225 { 226 std::ostringstream ss; 227 while(beg != end) { 228 ss << *beg++; 229 } 230 return parse_date<date_type>(ss.str()); 231 } 232 233 //! Helper function for 'date gregorian::from_stream()' 234 /*! Returns the first string found in the stream referenced by the 235 * begining & end iterators */ 236 template<class date_type, class iterator_type> 237 inline 238 date_type from_stream_type(iterator_type & beg,iterator_type const &,std::string const &)239 from_stream_type(iterator_type& beg, 240 iterator_type const& /* end */, 241 std::string const&) 242 { 243 return parse_date<date_type>(*beg); 244 } 245 246 /* I believe the wchar stuff would be best elsewhere, perhaps in 247 * parse_date<>()? In the mean time this gets us started... */ 248 //! Helper function for 'date gregorian::from_stream()' 249 /*! Creates a string from the iterators that reference the 250 * begining & end of a wstring. All elements are 251 * used in output string */ 252 template<class date_type, class iterator_type> 253 inline from_stream_type(iterator_type & beg,iterator_type const & end,wchar_t)254 date_type from_stream_type(iterator_type& beg, 255 iterator_type const& end, 256 wchar_t) 257 { 258 std::ostringstream ss; 259 #if !defined(BOOST_DATE_TIME_NO_LOCALE) 260 std::locale loc; 261 std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc); 262 while(beg != end) { 263 ss << fac.narrow(*beg++, 'X'); // 'X' will cause exception to be thrown 264 } 265 #else 266 while(beg != end) { 267 char c = 'X'; // 'X' will cause exception to be thrown 268 const wchar_t wc = *beg++; 269 if (wc >= 0 && wc <= 127) 270 c = static_cast< char >(wc); 271 ss << c; 272 } 273 #endif 274 return parse_date<date_type>(ss.str()); 275 } 276 #ifndef BOOST_NO_STD_WSTRING 277 //! Helper function for 'date gregorian::from_stream()' 278 /*! Creates a string from the first wstring found in the stream 279 * referenced by the begining & end iterators */ 280 template<class date_type, class iterator_type> 281 inline 282 date_type from_stream_type(iterator_type & beg,iterator_type const &,std::wstring const &)283 from_stream_type(iterator_type& beg, 284 iterator_type const& /* end */, 285 std::wstring const&) { 286 std::wstring ws = *beg; 287 std::ostringstream ss; 288 std::wstring::iterator wsb = ws.begin(), wse = ws.end(); 289 #if !defined(BOOST_DATE_TIME_NO_LOCALE) 290 std::locale loc; 291 std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc); 292 while(wsb != wse) { 293 ss << fac.narrow(*wsb++, 'X'); // 'X' will cause exception to be thrown 294 } 295 #else 296 while(wsb != wse) { 297 char c = 'X'; // 'X' will cause exception to be thrown 298 const wchar_t wc = *wsb++; 299 if (wc >= 0 && wc <= 127) 300 c = static_cast< char >(wc); 301 ss << c; 302 } 303 #endif 304 return parse_date<date_type>(ss.str()); 305 } 306 #endif // BOOST_NO_STD_WSTRING 307 #if (defined(BOOST_MSVC) && (_MSC_VER < 1300)) 308 // This function cannot be compiled with MSVC 6.0 due to internal compiler shorcomings 309 #else 310 //! function called by wrapper functions: date_period_from_(w)string() 311 template<class date_type, class charT> 312 period<date_type, typename date_type::duration_type> from_simple_string_type(const std::basic_string<charT> & s)313 from_simple_string_type(const std::basic_string<charT>& s){ 314 typedef typename std::basic_string<charT>::traits_type traits_type; 315 typedef typename boost::char_separator<charT, traits_type> char_separator; 316 typedef typename boost::tokenizer<char_separator, 317 typename std::basic_string<charT>::const_iterator, 318 std::basic_string<charT> > tokenizer; 319 const charT sep_list[4] = {'[','/',']','\0'}; 320 char_separator sep(sep_list); 321 tokenizer tokens(s, sep); 322 typename tokenizer::iterator tok_it = tokens.begin(); 323 std::basic_string<charT> date_string = *tok_it; 324 // get 2 string iterators and generate a date from them 325 typename std::basic_string<charT>::iterator date_string_start = date_string.begin(), 326 date_string_end = date_string.end(); 327 typedef typename std::iterator_traits<typename std::basic_string<charT>::iterator>::value_type value_type; 328 date_type d1 = from_stream_type<date_type>(date_string_start, date_string_end, value_type()); 329 date_string = *(++tok_it); // next token 330 date_string_start = date_string.begin(), date_string_end = date_string.end(); 331 date_type d2 = from_stream_type<date_type>(date_string_start, date_string_end, value_type()); 332 return period<date_type, typename date_type::duration_type>(d1, d2); 333 } 334 #endif 335 336 } } //namespace date_time 337 338 339 340 341 #endif 342 343