1 #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
2 #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
3 
4 /* Copyright (c) 2004-2005 CrystalClear Software, Inc.
5  * Use, modification and distribution is subject to the
6  * Boost Software License, Version 1.0. (See accompanying
7  * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
8  * Author: Jeff Garland, Bart Garst
9  * $Date$
10  */
11 
12 
13 #include <boost/algorithm/string/case_conv.hpp>
14 #include <cctype>
15 #include <map>
16 #include <string>
17 #include <vector>
18 #include <ostream>
19 #include <iterator>
20 #include <algorithm>
21 
22 namespace boost { namespace date_time {
23 
24 
25 template<typename charT>
26 struct parse_match_result
27 {
parse_match_resultboost::date_time::parse_match_result28   parse_match_result() :
29     match_depth(0),
30     current_match(PARSE_ERROR)
31   {}
32   typedef std::basic_string<charT> string_type;
remainingboost::date_time::parse_match_result33   string_type remaining() const
34   {
35     if (match_depth == cache.size()) {
36       return string_type();
37     }
38     if (current_match == PARSE_ERROR) {
39       return cache;
40     }
41     //some of the cache was used return the rest
42     return string_type(cache, match_depth);
43   }
last_charboost::date_time::parse_match_result44   charT last_char() const
45   {
46     return cache[cache.size()-1];
47   }
48   //! Returns true if more characters were parsed than was necessary
49   /*! Should be used in conjunction with last_char()
50    *  to get the remaining character.
51    */
has_remainingboost::date_time::parse_match_result52   bool has_remaining() const
53   {
54     return (cache.size() > match_depth);
55   }
56 
57   // cache will hold characters that have been read from the stream
58   string_type cache;
59   unsigned short match_depth;
60   short current_match;
61   enum PARSE_STATE { PARSE_ERROR = -1 };
62 };
63 
64   //for debug -- really only char streams...
65 template<typename charT>
66 std::basic_ostream<charT>&
operator <<(std::basic_ostream<charT> & os,parse_match_result<charT> & mr)67 operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr)
68 {
69   os << "cm: " << mr.current_match
70      << " C: '" << mr.cache
71      << "' md: " << mr.match_depth
72      << " R: " << mr.remaining();
73   return os;
74 }
75 
76 
77 
78 //! Recursive data structure to allow efficient parsing of various strings
79 /*! This class provides a quick lookup by building what amounts to a
80  *  tree data structure.  It also features a match function which can
81  *  can handle nasty input interators by caching values as it recurses
82  *  the tree so that it can backtrack as needed.
83  */
84 template<typename charT>
85 struct string_parse_tree
86 {
87 #if BOOST_WORKAROUND( BOOST_BORLANDC, BOOST_TESTED_AT(0x581) )
88   typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll;
89 #else
90   typedef std::multimap<charT, string_parse_tree > ptree_coll;
91 #endif
92   typedef typename ptree_coll::value_type value_type;
93   typedef typename ptree_coll::iterator iterator;
94   typedef typename ptree_coll::const_iterator const_iterator;
95   typedef std::basic_string<charT> string_type;
96   typedef std::vector<std::basic_string<charT> > collection_type;
97   typedef parse_match_result<charT> parse_match_result_type;
98 
99   /*! Parameter "starting_point" designates where the numbering begins.
100    * A starting_point of zero will start the numbering at zero
101    * (Sun=0, Mon=1, ...) were a starting_point of one starts the
102    * numbering at one (Jan=1, Feb=2, ...). The default is zero,
103    * negative vaules are not allowed */
string_parse_treeboost::date_time::string_parse_tree104   string_parse_tree(collection_type names, unsigned int starting_point=0) :
105     m_value(parse_match_result_type::PARSE_ERROR)
106   {
107     // iterate thru all the elements and build the tree
108     unsigned short index = 0;
109     while (index != names.size() ) {
110       string_type s = boost::algorithm::to_lower_copy(names[index]);
111       insert(s, static_cast<unsigned short>(index + starting_point));
112       index++;
113     }
114     //set the last tree node = index+1  indicating a value
115     index++;
116   }
117 
118 
string_parse_treeboost::date_time::string_parse_tree119   string_parse_tree(short value = parse_match_result_type::PARSE_ERROR) :
120     m_value(value)
121   {}
122   ptree_coll m_next_chars;
123   short m_value;
124 
insertboost::date_time::string_parse_tree125   void insert(const string_type& s, unsigned short value)
126   {
127     unsigned int i = 0;
128     iterator ti;
129     while(i < s.size()) {
130       if (i==0) {
131         if (i == (s.size()-1)) {
132           ti = m_next_chars.insert(value_type(s[i],
133                                               string_parse_tree<charT>(value)));
134         }
135         else {
136           ti = m_next_chars.insert(value_type(s[i],
137                                               string_parse_tree<charT>()));
138         }
139       }
140       else {
141         if (i == (s.size()-1)) {
142           ti = ti->second.m_next_chars.insert(value_type(s[i],
143                                                          string_parse_tree<charT>(value)));
144         }
145 
146         else {
147           ti = ti->second.m_next_chars.insert(value_type(s[i],
148                                                          string_parse_tree<charT>()));
149         }
150 
151       }
152       i++;
153     }
154   }
155 
156 
157   //! Recursive function that finds a matching string in the tree.
158   /*! Must check match_results::has_remaining() after match() is
159    * called. This is required so the user can determine if
160    * stream iterator is already pointing to the expected
161    * character or not (match() might advance sitr to next char in stream).
162    *
163    * A parse_match_result that has been returned from a failed match
164    * attempt can be sent in to the match function of a different
165    * string_parse_tree to attempt a match there. Use the iterators
166    * for the partially consumed stream, the parse_match_result object,
167    * and '0' for the level parameter. */
168   short
matchboost::date_time::string_parse_tree169   match(std::istreambuf_iterator<charT>& sitr,
170         std::istreambuf_iterator<charT>& stream_end,
171         parse_match_result_type& result,
172         unsigned int& level)  const
173   {
174 
175     level++;
176     charT c;
177     // if we conditionally advance sitr, we won't have
178     // to consume the next character past the input
179     bool adv_itr = true;
180     if (level > result.cache.size()) {
181       if (sitr == stream_end) return 0; //bail - input exhausted
182       c = static_cast<charT>(std::tolower(*sitr));
183       //result.cache += c;
184       //sitr++;
185     }
186     else {
187       // if we're looking for characters from the cache,
188       // we don't want to increment sitr
189       adv_itr = false;
190       c = static_cast<charT>(std::tolower(result.cache[level-1]));
191     }
192     const_iterator litr = m_next_chars.lower_bound(c);
193     const_iterator uitr = m_next_chars.upper_bound(c);
194     while (litr != uitr) { // equal if not found
195       if(adv_itr) {
196         sitr++;
197         result.cache += c;
198       }
199       if (litr->second.m_value != -1) { // -1 is default value
200         if (result.match_depth < level) {
201           result.current_match = litr->second.m_value;
202           result.match_depth = static_cast<unsigned short>(level);
203         }
204         litr->second.match(sitr, stream_end,
205                            result, level);
206         level--;
207       }
208       else {
209         litr->second.match(sitr, stream_end,
210                            result, level);
211         level--;
212       }
213 
214       if(level <= result.cache.size()) {
215         adv_itr = false;
216       }
217 
218       litr++;
219     }
220     return result.current_match;
221 
222   }
223 
224   /*! Must check match_results::has_remaining() after match() is
225    * called. This is required so the user can determine if
226    * stream iterator is already pointing to the expected
227    * character or not (match() might advance sitr to next char in stream).
228    */
229   parse_match_result_type
matchboost::date_time::string_parse_tree230   match(std::istreambuf_iterator<charT>& sitr,
231         std::istreambuf_iterator<charT>& stream_end) const
232   {
233     // lookup to_lower of char in tree.
234     unsigned int level = 0;
235     //    string_type cache;
236     parse_match_result_type result;
237     match(sitr, stream_end, result, level);
238     return result;
239   }
240 
printmeboost::date_time::string_parse_tree241   void printme(std::ostream& os, int& level)
242   {
243     level++;
244     iterator itr = m_next_chars.begin();
245     iterator end = m_next_chars.end();
246     //    os << "starting level: " << level << std::endl;
247     while (itr != end) {
248       os << "level:  " << level
249          << " node:  " << itr->first
250          << " value: " << itr->second.m_value
251          << std::endl;
252       itr->second.printme(os, level);
253       itr++;
254     }
255     level--;
256   }
257 
printboost::date_time::string_parse_tree258   void print(std::ostream& os)
259   {
260     int level = 0;
261     printme(os, level);
262   }
263 
printmatchboost::date_time::string_parse_tree264   void printmatch(std::ostream& os, charT c)
265   {
266     iterator litr = m_next_chars.lower_bound(c);
267     iterator uitr = m_next_chars.upper_bound(c);
268     os << "matches for: " << c << std::endl;
269     while (litr != uitr) {
270       os << " node:  " << litr->first
271          << " value: " << litr->second.m_value
272          << std::endl;
273       litr++;
274     }
275   }
276 
277 };
278 
279 
280 } } //namespace
281 #endif
282