1 #ifndef DATE_TIME_TZ_DB_BASE_HPP__ 2 #define DATE_TIME_TZ_DB_BASE_HPP__ 3 4 /* Copyright (c) 2003-2005 CrystalClear Software, Inc. 5 * Subject to the Boost Software License, Version 1.0. 6 * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) 7 * Author: Jeff Garland, Bart Garst 8 * $Date$ 9 */ 10 11 #include <map> 12 #include <vector> 13 #include <string> 14 #include <sstream> 15 #include <fstream> 16 #include <stdexcept> 17 #include <boost/tokenizer.hpp> 18 #include <boost/shared_ptr.hpp> 19 #include <boost/throw_exception.hpp> 20 #include <boost/date_time/compiler_config.hpp> 21 #include <boost/date_time/time_zone_names.hpp> 22 #include <boost/date_time/time_zone_base.hpp> 23 #include <boost/date_time/time_parsing.hpp> 24 25 namespace boost { 26 namespace date_time { 27 28 //! Exception thrown when tz database cannot locate requested data file 29 class data_not_accessible : public std::logic_error 30 { 31 public: data_not_accessible()32 data_not_accessible() : 33 std::logic_error(std::string("Unable to locate or access the required datafile.")) 34 {} data_not_accessible(const std::string & filespec)35 data_not_accessible(const std::string& filespec) : 36 std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) 37 {} 38 }; 39 40 //! Exception thrown when tz database locates incorrect field structure in data file 41 class bad_field_count : public std::out_of_range 42 { 43 public: bad_field_count(const std::string & s)44 bad_field_count(const std::string& s) : 45 std::out_of_range(s) 46 {} 47 }; 48 49 //! Creates a database of time_zones from csv datafile 50 /*! The csv file containing the zone_specs used by the 51 * tz_db_base is intended to be customized by the 52 * library user. When customizing this file (or creating your own) the 53 * file must follow a specific format. 54 * 55 * This first line is expected to contain column headings and is therefore 56 * not processed by the tz_db_base. 57 * 58 * Each record (line) must have eleven fields. Some of those fields can 59 * be empty. Every field (even empty ones) must be enclosed in 60 * double-quotes. 61 * Ex: 62 * @code 63 * "America/Phoenix" <- string enclosed in quotes 64 * "" <- empty field 65 * @endcode 66 * 67 * Some fields represent a length of time. The format of these fields 68 * must be: 69 * @code 70 * "{+|-}hh:mm[:ss]" <- length-of-time format 71 * @endcode 72 * Where the plus or minus is mandatory and the seconds are optional. 73 * 74 * Since some time zones do not use daylight savings it is not always 75 * necessary for every field in a zone_spec to contain a value. All 76 * zone_specs must have at least ID and GMT offset. Zones that use 77 * daylight savings must have all fields filled except: 78 * STD ABBR, STD NAME, DST NAME. You should take note 79 * that DST ABBR is mandatory for zones that use daylight savings 80 * (see field descriptions for further details). 81 * 82 * ******* Fields and their description/details ********* 83 * 84 * ID: 85 * Contains the identifying string for the zone_spec. Any string will 86 * do as long as it's unique. No two ID's can be the same. 87 * 88 * STD ABBR: 89 * STD NAME: 90 * DST ABBR: 91 * DST NAME: 92 * These four are all the names and abbreviations used by the time 93 * zone being described. While any string will do in these fields, 94 * care should be taken. These fields hold the strings that will be 95 * used in the output of many of the local_time classes. 96 * Ex: 97 * @code 98 * time_zone nyc = tz_db.time_zone_from_region("America/New_York"); 99 * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); 100 * cout << ny_time.to_long_string() << endl; 101 * // 2004-Aug-30 00:00:00 Eastern Daylight Time 102 * cout << ny_time.to_short_string() << endl; 103 * // 2004-Aug-30 00:00:00 EDT 104 * @endcode 105 * 106 * NOTE: The exact format/function names may vary - see local_time 107 * documentation for further details. 108 * 109 * GMT offset: 110 * This is the number of hours added to utc to get the local time 111 * before any daylight savings adjustments are made. Some examples 112 * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. 113 * The format must follow the length-of-time format described above. 114 * 115 * DST adjustment: 116 * The amount of time added to gmt_offset when daylight savings is in 117 * effect. The format must follow the length-of-time format described 118 * above. 119 * 120 * DST Start Date rule: 121 * This is a specially formatted string that describes the day of year 122 * in which the transition take place. It holds three fields of it's own, 123 * separated by semicolons. 124 * The first field indicates the "nth" weekday of the month. The possible 125 * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), 126 * and -1 (last). 127 * The second field indicates the day-of-week from 0-6 (Sun=0). 128 * The third field indicates the month from 1-12 (Jan=1). 129 * 130 * Examples are: "-1;5;9"="Last Friday of September", 131 * "2;1;3"="Second Monday of March" 132 * 133 * Start time: 134 * Start time is the number of hours past midnight, on the day of the 135 * start transition, the transition takes place. More simply put, the 136 * time of day the transition is made (in 24 hours format). The format 137 * must follow the length-of-time format described above with the 138 * exception that it must always be positive. 139 * 140 * DST End date rule: 141 * See DST Start date rule. The difference here is this is the day 142 * daylight savings ends (transition to STD). 143 * 144 * End time: 145 * Same as Start time. 146 */ 147 template<class time_zone_type, class rule_type> 148 class tz_db_base { 149 public: 150 /* Having CharT as a template parameter created problems 151 * with posix_time::duration_from_string. Templatizing 152 * duration_from_string was not possible at this time, however, 153 * it should be possible in the future (when poor compilers get 154 * fixed or stop being used). 155 * Since this class was designed to use CharT as a parameter it 156 * is simply typedef'd here to ease converting in back to a 157 * parameter the future */ 158 typedef char char_type; 159 160 typedef typename time_zone_type::base_type time_zone_base_type; 161 typedef typename time_zone_type::time_duration_type time_duration_type; 162 typedef time_zone_names_base<char_type> time_zone_names; 163 typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; 164 typedef std::basic_string<char_type> string_type; 165 166 //! Constructs an empty database tz_db_base()167 tz_db_base() {} 168 169 //! Process csv data file, may throw exceptions 170 /*! May throw bad_field_count exceptions */ load_from_stream(std::istream & in)171 void load_from_stream(std::istream &in) 172 { 173 std::string buff; 174 while( std::getline(in, buff)) { 175 parse_string(buff); 176 } 177 } 178 179 //! Process csv data file, may throw exceptions 180 /*! May throw data_not_accessible, or bad_field_count exceptions */ load_from_file(const std::string & pathspec)181 void load_from_file(const std::string& pathspec) 182 { 183 std::string buff; 184 185 std::ifstream ifs(pathspec.c_str()); 186 if(!ifs){ 187 boost::throw_exception(data_not_accessible(pathspec)); 188 } 189 std::getline(ifs, buff); // first line is column headings 190 this->load_from_stream(ifs); 191 } 192 193 //! returns true if record successfully added to map 194 /*! Takes a region name in the form of "America/Phoenix", and a 195 * time_zone object for that region. The id string must be a unique 196 * name that does not already exist in the database. */ add_record(const string_type & region,boost::shared_ptr<time_zone_base_type> tz)197 bool add_record(const string_type& region, 198 boost::shared_ptr<time_zone_base_type> tz) 199 { 200 typename map_type::value_type p(region, tz); 201 return (m_zone_map.insert(p)).second; 202 } 203 204 //! Returns a time_zone object built from the specs for the given region 205 /*! Returns a time_zone object built from the specs for the given 206 * region. If region does not exist a local_time::record_not_found 207 * exception will be thrown */ 208 boost::shared_ptr<time_zone_base_type> time_zone_from_region(const string_type & region) const209 time_zone_from_region(const string_type& region) const 210 { 211 // get the record 212 typename map_type::const_iterator record = m_zone_map.find(region); 213 if(record == m_zone_map.end()){ 214 return boost::shared_ptr<time_zone_base_type>(); //null pointer 215 } 216 return record->second; 217 } 218 219 //! Returns a vector of strings holding the time zone regions in the database region_list() const220 std::vector<std::string> region_list() const 221 { 222 typedef std::vector<std::string> vector_type; 223 vector_type regions; 224 typename map_type::const_iterator itr = m_zone_map.begin(); 225 while(itr != m_zone_map.end()) { 226 regions.push_back(itr->first); 227 ++itr; 228 } 229 return regions; 230 } 231 232 private: 233 typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; 234 map_type m_zone_map; 235 236 // start and end rule are of the same type 237 typedef typename rule_type::start_rule::week_num week_num; 238 239 /* TODO: mechanisms need to be put in place to handle different 240 * types of rule specs. parse_rules() only handles nth_kday 241 * rule types. */ 242 243 //! parses rule specs for transition day rules parse_rules(const string_type & sr,const string_type & er) const244 rule_type* parse_rules(const string_type& sr, const string_type& er) const 245 { 246 using namespace gregorian; 247 // start and end rule are of the same type, 248 // both are included here for readability 249 typedef typename rule_type::start_rule start_rule; 250 typedef typename rule_type::end_rule end_rule; 251 252 // these are: [start|end] nth, day, month 253 int s_nth = 0, s_d = 0, s_m = 0; 254 int e_nth = 0, e_d = 0, e_m = 0; 255 split_rule_spec(s_nth, s_d, s_m, sr); 256 split_rule_spec(e_nth, e_d, e_m, er); 257 258 typename start_rule::week_num s_wn, e_wn; 259 s_wn = get_week_num(s_nth); 260 e_wn = get_week_num(e_nth); 261 262 263 return new rule_type(start_rule(s_wn, 264 static_cast<unsigned short>(s_d), 265 static_cast<unsigned short>(s_m)), 266 end_rule(e_wn, 267 static_cast<unsigned short>(e_d), 268 static_cast<unsigned short>(e_m))); 269 } 270 //! helper function for parse_rules() get_week_num(int nth) const271 week_num get_week_num(int nth) const 272 { 273 typedef typename rule_type::start_rule start_rule; 274 switch(nth){ 275 case 1: 276 return start_rule::first; 277 case 2: 278 return start_rule::second; 279 case 3: 280 return start_rule::third; 281 case 4: 282 return start_rule::fourth; 283 case 5: 284 case -1: 285 return start_rule::fifth; 286 default: 287 // shouldn't get here - add error handling later 288 break; 289 } 290 return start_rule::fifth; // silence warnings 291 } 292 293 //! splits the [start|end]_date_rule string into 3 ints split_rule_spec(int & nth,int & d,int & m,string_type rule) const294 void split_rule_spec(int& nth, int& d, int& m, string_type rule) const 295 { 296 typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type; 297 typedef boost::tokenizer<char_separator_type, 298 std::basic_string<char_type>::const_iterator, 299 std::basic_string<char_type> > tokenizer; 300 typedef boost::tokenizer<char_separator_type, 301 std::basic_string<char_type>::const_iterator, 302 std::basic_string<char_type> >::iterator tokenizer_iterator; 303 304 const char_type sep_char[] = { ';', '\0'}; 305 char_separator_type sep(sep_char); 306 tokenizer tokens(rule, sep); // 3 fields 307 308 if ( std::distance ( tokens.begin(), tokens.end ()) != 3 ) { 309 std::ostringstream msg; 310 msg << "Expecting 3 fields, got " 311 << std::distance ( tokens.begin(), tokens.end ()) 312 << " fields in line: " << rule; 313 boost::throw_exception(bad_field_count(msg.str())); 314 } 315 316 tokenizer_iterator tok_iter = tokens.begin(); 317 nth = std::atoi(tok_iter->c_str()); ++tok_iter; 318 d = std::atoi(tok_iter->c_str()); ++tok_iter; 319 m = std::atoi(tok_iter->c_str()); 320 } 321 322 323 //! Take a line from the csv, turn it into a time_zone_type. 324 /*! Take a line from the csv, turn it into a time_zone_type, 325 * and add it to the map. Zone_specs in csv file are expected to 326 * have eleven fields that describe the time zone. Returns true if 327 * zone_spec successfully added to database */ parse_string(string_type & s)328 bool parse_string(string_type& s) 329 { 330 std::vector<string_type> result; 331 typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type; 332 333 token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>()); 334 335 token_iter_type end; 336 while (i != end) { 337 result.push_back(*i); 338 i++; 339 } 340 341 enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, 342 DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, 343 END_TIME, FIELD_COUNT }; 344 345 //take a shot at fixing gcc 4.x error 346 const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); 347 if (result.size() != expected_fields) { 348 std::ostringstream msg; 349 msg << "Expecting " << FIELD_COUNT << " fields, got " 350 << result.size() << " fields in line: " << s; 351 boost::throw_exception(bad_field_count(msg.str())); 352 BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach 353 } 354 355 // initializations 356 bool has_dst = true; 357 if(result[DSTABBR] == std::string()){ 358 has_dst = false; 359 } 360 361 362 // start building components of a time_zone 363 time_zone_names names(result[STDNAME], result[STDABBR], 364 result[DSTNAME], result[DSTABBR]); 365 366 time_duration_type utc_offset = 367 str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]); 368 369 dst_adjustment_offsets adjust(time_duration_type(0,0,0), 370 time_duration_type(0,0,0), 371 time_duration_type(0,0,0)); 372 373 boost::shared_ptr<rule_type> rules; 374 375 if(has_dst){ 376 adjust = dst_adjustment_offsets( 377 str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]), 378 str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]), 379 str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME]) 380 ); 381 382 rules = 383 boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE], 384 result[END_DATE_RULE])); 385 } 386 string_type id(result[ID]); 387 boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); 388 return (add_record(id, zone)); 389 390 } 391 392 }; 393 394 } } // namespace 395 396 #endif // DATE_TIME_TZ_DB_BASE_HPP__ 397