1 #ifndef _DATE_TIME_DATE_PARSING_HPP___
2 #define _DATE_TIME_DATE_PARSING_HPP___
3 
4 /* Copyright (c) 2002,2003,2005 CrystalClear Software, Inc.
5  * Use, modification and distribution is subject to the
6  * Boost Software License, Version 1.0. (See accompanying
7  * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
8  * Author: Jeff Garland, Bart Garst
9  * $Date$
10  */
11 
12 #include <string>
13 #include <iterator>
14 #include <algorithm>
15 #include <boost/tokenizer.hpp>
16 #include <boost/lexical_cast.hpp>
17 #include <boost/date_time/compiler_config.hpp>
18 #include <boost/date_time/parse_format_base.hpp>
19 
20 #if defined(BOOST_DATE_TIME_NO_LOCALE)
21 #include <cctype> // ::tolower(int)
22 #else
23 #include <locale> // std::tolower(char, locale)
24 #endif
25 
26 namespace boost {
27 namespace date_time {
28 
29   //! A function to replace the std::transform( , , ,tolower) construct
30   /*! This function simply takes a string, and changes all the characters
31    * in that string to lowercase (according to the default system locale).
32    * In the event that a compiler does not support locales, the old
33    * C style tolower() is used.
34    */
35   inline
36   std::string
convert_to_lower(std::string inp)37   convert_to_lower(std::string inp)
38   {
39 #if !defined(BOOST_DATE_TIME_NO_LOCALE)
40     const std::locale loc(std::locale::classic());
41 #endif
42     std::string::size_type i = 0, n = inp.length();
43     for (; i < n; ++i) {
44       inp[i] =
45 #if defined(BOOST_DATE_TIME_NO_LOCALE)
46         static_cast<char>(std::tolower(inp[i]));
47 #else
48         // tolower and others were brought in to std for borland >= v564
49         // in compiler_config.hpp
50         std::tolower(inp[i], loc);
51 #endif
52     }
53     return inp;
54   }
55 
56     //! Helper function for parse_date.
57     /* Used by-value parameter because we change the string and may
58      * want to preserve the original argument */
59     template<class month_type>
60     inline unsigned short
month_str_to_ushort(std::string const & s)61     month_str_to_ushort(std::string const& s) {
62       if((s.at(0) >= '0') && (s.at(0) <= '9')) {
63         return boost::lexical_cast<unsigned short>(s);
64       }
65       else {
66         std::string str = convert_to_lower(s);
67         typename month_type::month_map_ptr_type ptr = month_type::get_month_map_ptr();
68         typename month_type::month_map_type::iterator iter = ptr->find(str);
69         if(iter != ptr->end()) { // required for STLport
70           return iter->second;
71         }
72       }
73       return 13; // intentionally out of range - name not found
74     }
75 
76     //! Find index of a string in either of 2 arrays
77     /*! find_match searches both arrays for a match to 's'. Both arrays
78      * must contain 'size' elements. The index of the match is returned.
79      * If no match is found, 'size' is returned.
80      * Ex. "Jan" returns 0, "Dec" returns 11, "Tue" returns 2.
81      * 'size' can be sent in with: (greg_month::max)() (which 12),
82      * (greg_weekday::max)() + 1 (which is 7) or date_time::NumSpecialValues */
83     template<class charT>
find_match(const charT * const * short_names,const charT * const * long_names,short size,const std::basic_string<charT> & s)84     short find_match(const charT* const* short_names,
85                      const charT* const* long_names,
86                      short size,
87                      const std::basic_string<charT>& s) {
88       for(short i = 0; i < size; ++i){
89         if(short_names[i] == s || long_names[i] == s){
90           return i;
91         }
92       }
93       return size; // not-found, return a value out of range
94     }
95 
96     //! Generic function to parse a delimited date (eg: 2002-02-10)
97     /*! Accepted formats are: "2003-02-10" or " 2003-Feb-10" or
98      * "2003-Feburary-10"
99      * The order in which the Month, Day, & Year appear in the argument
100      * string can be accomodated by passing in the appropriate ymd_order_spec
101      */
102     template<class date_type>
103     date_type
parse_date(const std::string & s,int order_spec=ymd_order_iso)104     parse_date(const std::string& s, int order_spec = ymd_order_iso) {
105       std::string spec_str;
106       if(order_spec == ymd_order_iso) {
107         spec_str = "ymd";
108       }
109       else if(order_spec == ymd_order_dmy) {
110         spec_str = "dmy";
111       }
112       else { // (order_spec == ymd_order_us)
113         spec_str = "mdy";
114       }
115 
116       typedef typename date_type::month_type month_type;
117       unsigned pos = 0;
118       unsigned short year(0), month(0), day(0);
119       typedef typename std::basic_string<char>::traits_type traits_type;
120       typedef boost::char_separator<char, traits_type> char_separator_type;
121       typedef boost::tokenizer<char_separator_type,
122                                std::basic_string<char>::const_iterator,
123                                std::basic_string<char> > tokenizer;
124       typedef boost::tokenizer<char_separator_type,
125                                std::basic_string<char>::const_iterator,
126                                std::basic_string<char> >::iterator tokenizer_iterator;
127       // may need more delimiters, these work for the regression tests
128       const char sep_char[] = {',','-','.',' ','/','\0'};
129       char_separator_type sep(sep_char);
130       tokenizer tok(s,sep);
131       for(tokenizer_iterator beg=tok.begin();
132           beg!=tok.end() && pos < spec_str.size();
133           ++beg, ++pos) {
134         switch(spec_str.at(pos)) {
135           case 'y':
136           {
137             year = boost::lexical_cast<unsigned short>(*beg);
138             break;
139           }
140           case 'm':
141           {
142             month = month_str_to_ushort<month_type>(*beg);
143             break;
144           }
145           case 'd':
146           {
147             day = boost::lexical_cast<unsigned short>(*beg);
148             break;
149           }
150           default: break;
151         } //switch
152       }
153       return date_type(year, month, day);
154     }
155 
156     //! Generic function to parse undelimited date (eg: 20020201)
157     template<class date_type>
158     date_type
parse_undelimited_date(const std::string & s)159     parse_undelimited_date(const std::string& s) {
160       int offsets[] = {4,2,2};
161       int pos = 0;
162       //typename date_type::ymd_type ymd((year_type::min)(),1,1);
163       unsigned short y = 0, m = 0, d = 0;
164 
165       /* The two bool arguments state that parsing will not wrap
166        * (only the first 8 characters will be parsed) and partial
167        * strings will not be parsed.
168        * Ex:
169        * "2005121" will parse 2005 & 12, but not the "1" */
170       boost::offset_separator osf(offsets, offsets+3, false, false);
171 
172       typedef typename boost::tokenizer<boost::offset_separator,
173                                         std::basic_string<char>::const_iterator,
174                                         std::basic_string<char> > tokenizer_type;
175       tokenizer_type tok(s, osf);
176       for(typename tokenizer_type::iterator ti=tok.begin(); ti!=tok.end();++ti) {
177         unsigned short i = boost::lexical_cast<unsigned short>(*ti);
178         switch(pos) {
179         case 0: y = i; break;
180         case 1: m = i; break;
181         case 2: d = i; break;
182         default:       break;
183         }
184         pos++;
185       }
186       return date_type(y,m,d);
187     }
188 
189     //! Helper function for 'date gregorian::from_stream()'
190     /*! Creates a string from the iterators that reference the
191      * begining & end of a char[] or string. All elements are
192      * used in output string */
193     template<class date_type, class iterator_type>
194     inline
195     date_type
from_stream_type(iterator_type & beg,iterator_type const & end,char)196     from_stream_type(iterator_type& beg,
197                      iterator_type const& end,
198                      char)
199     {
200       std::ostringstream ss;
201       while(beg != end) {
202         ss << *beg++;
203       }
204       return parse_date<date_type>(ss.str());
205     }
206 
207     //! Helper function for 'date gregorian::from_stream()'
208     /*! Returns the first string found in the stream referenced by the
209      * begining & end iterators */
210     template<class date_type, class iterator_type>
211     inline
212     date_type
from_stream_type(iterator_type & beg,iterator_type const &,std::string const &)213     from_stream_type(iterator_type& beg,
214                      iterator_type const& /* end */,
215                      std::string const&)
216     {
217       return parse_date<date_type>(*beg);
218     }
219 
220     /* I believe the wchar stuff would be best elsewhere, perhaps in
221      * parse_date<>()? In the mean time this gets us started... */
222     //! Helper function for 'date gregorian::from_stream()'
223     /*! Creates a string from the iterators that reference the
224      * begining & end of a wstring. All elements are
225      * used in output string */
226     template<class date_type, class iterator_type>
227     inline
from_stream_type(iterator_type & beg,iterator_type const & end,wchar_t)228     date_type from_stream_type(iterator_type& beg,
229                                iterator_type const& end,
230                                wchar_t)
231     {
232       std::ostringstream ss;
233 #if !defined(BOOST_DATE_TIME_NO_LOCALE)
234       std::locale loc;
235       std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc);
236       while(beg != end) {
237         ss << fac.narrow(*beg++, 'X'); // 'X' will cause exception to be thrown
238       }
239 #else
240       while(beg != end) {
241         char c = 'X'; // 'X' will cause exception to be thrown
242         const wchar_t wc = *beg++;
243         if (wc >= 0 && wc <= 127)
244           c = static_cast< char >(wc);
245         ss << c;
246       }
247 #endif
248       return parse_date<date_type>(ss.str());
249     }
250 #ifndef BOOST_NO_STD_WSTRING
251     //! Helper function for 'date gregorian::from_stream()'
252     /*! Creates a string from the first wstring found in the stream
253      * referenced by the begining & end iterators */
254     template<class date_type, class iterator_type>
255     inline
256     date_type
from_stream_type(iterator_type & beg,iterator_type const &,std::wstring const &)257     from_stream_type(iterator_type& beg,
258                      iterator_type const& /* end */,
259                      std::wstring const&) {
260       std::wstring ws = *beg;
261       std::ostringstream ss;
262       std::wstring::iterator wsb = ws.begin(), wse = ws.end();
263 #if !defined(BOOST_DATE_TIME_NO_LOCALE)
264       std::locale loc;
265       std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc);
266       while(wsb != wse) {
267         ss << fac.narrow(*wsb++, 'X'); // 'X' will cause exception to be thrown
268       }
269 #else
270       while(wsb != wse) {
271         char c = 'X'; // 'X' will cause exception to be thrown
272         const wchar_t wc = *wsb++;
273         if (wc >= 0 && wc <= 127)
274           c = static_cast< char >(wc);
275         ss << c;
276       }
277 #endif
278       return parse_date<date_type>(ss.str());
279     }
280 #endif // BOOST_NO_STD_WSTRING
281 #if (defined(BOOST_MSVC) && (_MSC_VER < 1300))
282     // This function cannot be compiled with MSVC 6.0 due to internal compiler shorcomings
283 #else
284     //! function called by wrapper functions: date_period_from_(w)string()
285     template<class date_type, class charT>
286     period<date_type, typename date_type::duration_type>
from_simple_string_type(const std::basic_string<charT> & s)287     from_simple_string_type(const std::basic_string<charT>& s){
288       typedef typename std::basic_string<charT>::traits_type traits_type;
289       typedef typename boost::char_separator<charT, traits_type> char_separator;
290       typedef typename boost::tokenizer<char_separator,
291                                         typename std::basic_string<charT>::const_iterator,
292                                         std::basic_string<charT> > tokenizer;
293       const charT sep_list[4] = {'[','/',']','\0'};
294       char_separator sep(sep_list);
295       tokenizer tokens(s, sep);
296       typename tokenizer::iterator tok_it = tokens.begin();
297       std::basic_string<charT> date_string = *tok_it;
298       // get 2 string iterators and generate a date from them
299       typename std::basic_string<charT>::iterator date_string_start = date_string.begin(),
300                                                   date_string_end = date_string.end();
301       typedef typename std::iterator_traits<typename std::basic_string<charT>::iterator>::value_type value_type;
302       date_type d1 = from_stream_type<date_type>(date_string_start, date_string_end, value_type());
303       date_string = *(++tok_it); // next token
304       date_string_start = date_string.begin(), date_string_end = date_string.end();
305       date_type d2 = from_stream_type<date_type>(date_string_start, date_string_end, value_type());
306       return period<date_type, typename date_type::duration_type>(d1, d2);
307     }
308 #endif
309 
310 } } //namespace date_time
311 
312 
313 
314 
315 #endif
316 
317