1 // 2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See 5 // accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 #ifndef BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED 9 #define BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED 10 #include <boost/locale/config.hpp> 11 #ifdef BOOST_MSVC 12 # pragma warning(push) 13 # pragma warning(disable : 4275 4251 4231 4660) 14 #endif 15 #include <locale> 16 #include <string> 17 #include <iosfwd> 18 #include <iterator> 19 20 21 namespace boost { 22 namespace locale { 23 namespace boundary { 24 /// \cond INTERNAL 25 namespace details { 26 template<typename LeftIterator,typename RightIterator> compare_text(LeftIterator l_begin,LeftIterator l_end,RightIterator r_begin,RightIterator r_end)27 int compare_text(LeftIterator l_begin,LeftIterator l_end,RightIterator r_begin,RightIterator r_end) 28 { 29 typedef LeftIterator left_iterator; 30 typedef RightIterator right_iterator; 31 typedef typename std::iterator_traits<left_iterator>::value_type char_type; 32 typedef std::char_traits<char_type> traits; 33 while(l_begin!=l_end && r_begin!=r_end) { 34 char_type lchar = *l_begin++; 35 char_type rchar = *r_begin++; 36 if(traits::eq(lchar,rchar)) 37 continue; 38 if(traits::lt(lchar,rchar)) 39 return -1; 40 else 41 return 1; 42 } 43 if(l_begin==l_end && r_begin==r_end) 44 return 0; 45 if(l_begin==l_end) 46 return -1; 47 else 48 return 1; 49 } 50 51 52 template<typename Left,typename Right> compare_text(Left const & l,Right const & r)53 int compare_text(Left const &l,Right const &r) 54 { 55 return compare_text(l.begin(),l.end(),r.begin(),r.end()); 56 } 57 58 template<typename Left,typename Char> compare_string(Left const & l,Char const * begin)59 int compare_string(Left const &l,Char const *begin) 60 { 61 Char const *end = begin; 62 while(*end!=0) 63 end++; 64 return compare_text(l.begin(),l.end(),begin,end); 65 } 66 67 template<typename Right,typename Char> compare_string(Char const * begin,Right const & r)68 int compare_string(Char const *begin,Right const &r) 69 { 70 Char const *end = begin; 71 while(*end!=0) 72 end++; 73 return compare_text(begin,end,r.begin(),r.end()); 74 } 75 76 } 77 /// \endcond 78 79 /// 80 /// \addtogroup boundary 81 /// @{ 82 83 /// 84 /// \brief a segment object that represents a pair of two iterators that define the range where 85 /// this segment exits and a rule that defines it. 86 /// 87 /// This type of object is dereferenced by the iterators of segment_index. Using a rule() member function 88 /// you can get a specific rule this segment was selected with. For example, when you use 89 /// word boundary analysis, you can check if the specific word contains Kana letters by checking (rule() & \ref word_kana)!=0 90 /// For a sentence analysis you can check if the sentence is selected because a sentence terminator is found (\ref sentence_term) or 91 /// there is a line break (\ref sentence_sep). 92 /// 93 /// This object can be automatically converted to std::basic_string with the same type of character. It is also 94 /// valid range that has begin() and end() member functions returning iterators on the location of the segment. 95 /// 96 /// \see 97 /// 98 /// - \ref segment_index 99 /// - \ref boundary_point 100 /// - \ref boundary_point_index 101 /// 102 template<typename IteratorType> 103 class segment : public std::pair<IteratorType,IteratorType> { 104 public: 105 /// 106 /// The type of the underlying character 107 /// 108 typedef typename std::iterator_traits<IteratorType>::value_type char_type; 109 /// 110 /// The type of the string it is converted to 111 /// 112 typedef std::basic_string<char_type> string_type; 113 /// 114 /// The value that iterators return - the character itself 115 /// 116 typedef char_type value_type; 117 /// 118 /// The iterator that allows to iterate the range 119 /// 120 typedef IteratorType iterator; 121 /// 122 /// The iterator that allows to iterate the range 123 /// 124 typedef IteratorType const_iterator; 125 /// 126 /// The type that represent a difference between two iterators 127 /// 128 typedef typename std::iterator_traits<IteratorType>::difference_type difference_type; 129 130 /// 131 /// Default constructor 132 /// segment()133 segment() {} 134 /// 135 /// Create a segment using two iterators and a rule that represents this point 136 /// segment(iterator b,iterator e,rule_type r)137 segment(iterator b,iterator e,rule_type r) : 138 std::pair<IteratorType,IteratorType>(b,e), 139 rule_(r) 140 { 141 } 142 /// 143 /// Set the start of the range 144 /// begin(iterator const & v)145 void begin(iterator const &v) 146 { 147 this->first = v; 148 } 149 /// 150 /// Set the end of the range 151 /// end(iterator const & v)152 void end(iterator const &v) 153 { 154 this->second = v; 155 } 156 157 /// 158 /// Get the start of the range 159 /// begin() const160 IteratorType begin() const 161 { 162 return this->first; 163 } 164 /// 165 /// Set the end of the range 166 /// end() const167 IteratorType end() const 168 { 169 return this->second; 170 } 171 172 /// 173 /// Convert the range to a string automatically 174 /// 175 template <class T, class A> operator std::basic_string<char_type,T,A>() const176 operator std::basic_string<char_type, T, A> ()const 177 { 178 return std::basic_string<char_type, T, A>(this->first, this->second); 179 } 180 181 /// 182 /// Create a string from the range explicitly 183 /// str() const184 string_type str() const 185 { 186 return string_type(begin(),end()); 187 } 188 189 /// 190 /// Get the length of the text chunk 191 /// 192 length() const193 size_t length() const 194 { 195 return std::distance(begin(),end()); 196 } 197 198 /// 199 /// Check if the segment is empty 200 /// empty() const201 bool empty() const 202 { 203 return begin() == end(); 204 } 205 206 /// 207 /// Get the rule that is used for selection of this segment. 208 /// rule() const209 rule_type rule() const 210 { 211 return rule_; 212 } 213 /// 214 /// Set a rule that is used for segment selection 215 /// rule(rule_type r)216 void rule(rule_type r) 217 { 218 rule_ = r; 219 } 220 221 // make sure we override std::pair's operator== 222 223 /// Compare two segments operator ==(segment const & other)224 bool operator==(segment const &other) 225 { 226 return details::compare_text(*this,other) == 0; 227 } 228 229 /// Compare two segments operator !=(segment const & other)230 bool operator!=(segment const &other) 231 { 232 return details::compare_text(*this,other) != 0; 233 } 234 235 private: 236 rule_type rule_; 237 238 }; 239 240 241 /// Compare two segments 242 template<typename IteratorL,typename IteratorR> operator ==(segment<IteratorL> const & l,segment<IteratorR> const & r)243 bool operator==(segment<IteratorL> const &l,segment<IteratorR> const &r) 244 { 245 return details::compare_text(l,r) == 0; 246 } 247 /// Compare two segments 248 template<typename IteratorL,typename IteratorR> operator !=(segment<IteratorL> const & l,segment<IteratorR> const & r)249 bool operator!=(segment<IteratorL> const &l,segment<IteratorR> const &r) 250 { 251 return details::compare_text(l,r) != 0; 252 } 253 254 /// Compare two segments 255 template<typename IteratorL,typename IteratorR> operator <(segment<IteratorL> const & l,segment<IteratorR> const & r)256 bool operator<(segment<IteratorL> const &l,segment<IteratorR> const &r) 257 { 258 return details::compare_text(l,r) < 0; 259 } 260 /// Compare two segments 261 template<typename IteratorL,typename IteratorR> operator <=(segment<IteratorL> const & l,segment<IteratorR> const & r)262 bool operator<=(segment<IteratorL> const &l,segment<IteratorR> const &r) 263 { 264 return details::compare_text(l,r) <= 0; 265 } 266 /// Compare two segments 267 template<typename IteratorL,typename IteratorR> operator >(segment<IteratorL> const & l,segment<IteratorR> const & r)268 bool operator>(segment<IteratorL> const &l,segment<IteratorR> const &r) 269 { 270 return details::compare_text(l,r) > 0; 271 } 272 /// Compare two segments 273 template<typename IteratorL,typename IteratorR> operator >=(segment<IteratorL> const & l,segment<IteratorR> const & r)274 bool operator>=(segment<IteratorL> const &l,segment<IteratorR> const &r) 275 { 276 return details::compare_text(l,r) >= 0; 277 } 278 279 /// Compare string and segment 280 template<typename CharType,typename Traits,typename Alloc,typename IteratorR> operator ==(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)281 bool operator==(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r) 282 { 283 return details::compare_text(l,r) == 0; 284 } 285 /// Compare string and segment 286 template<typename CharType,typename Traits,typename Alloc,typename IteratorR> operator !=(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)287 bool operator!=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r) 288 { 289 return details::compare_text(l,r) != 0; 290 } 291 292 /// Compare string and segment 293 template<typename CharType,typename Traits,typename Alloc,typename IteratorR> operator <(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)294 bool operator<(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r) 295 { 296 return details::compare_text(l,r) < 0; 297 } 298 /// Compare string and segment 299 template<typename CharType,typename Traits,typename Alloc,typename IteratorR> operator <=(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)300 bool operator<=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r) 301 { 302 return details::compare_text(l,r) <= 0; 303 } 304 /// Compare string and segment 305 template<typename CharType,typename Traits,typename Alloc,typename IteratorR> operator >(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)306 bool operator>(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r) 307 { 308 return details::compare_text(l,r) > 0; 309 } 310 /// Compare string and segment 311 template<typename CharType,typename Traits,typename Alloc,typename IteratorR> operator >=(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)312 bool operator>=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r) 313 { 314 return details::compare_text(l,r) >= 0; 315 } 316 317 /// Compare string and segment 318 template<typename Iterator,typename CharType,typename Traits,typename Alloc> operator ==(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)319 bool operator==(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r) 320 { 321 return details::compare_text(l,r) == 0; 322 } 323 /// Compare string and segment 324 template<typename Iterator,typename CharType,typename Traits,typename Alloc> operator !=(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)325 bool operator!=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r) 326 { 327 return details::compare_text(l,r) != 0; 328 } 329 330 /// Compare string and segment 331 template<typename Iterator,typename CharType,typename Traits,typename Alloc> operator <(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)332 bool operator<(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r) 333 { 334 return details::compare_text(l,r) < 0; 335 } 336 /// Compare string and segment 337 template<typename Iterator,typename CharType,typename Traits,typename Alloc> operator <=(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)338 bool operator<=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r) 339 { 340 return details::compare_text(l,r) <= 0; 341 } 342 /// Compare string and segment 343 template<typename Iterator,typename CharType,typename Traits,typename Alloc> operator >(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)344 bool operator>(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r) 345 { 346 return details::compare_text(l,r) > 0; 347 } 348 /// Compare string and segment 349 template<typename Iterator,typename CharType,typename Traits,typename Alloc> operator >=(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)350 bool operator>=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r) 351 { 352 return details::compare_text(l,r) >= 0; 353 } 354 355 356 /// Compare C string and segment 357 template<typename CharType,typename IteratorR> operator ==(CharType const * l,segment<IteratorR> const & r)358 bool operator==(CharType const *l,segment<IteratorR> const &r) 359 { 360 return details::compare_string(l,r) == 0; 361 } 362 /// Compare C string and segment 363 template<typename CharType,typename IteratorR> operator !=(CharType const * l,segment<IteratorR> const & r)364 bool operator!=(CharType const *l,segment<IteratorR> const &r) 365 { 366 return details::compare_string(l,r) != 0; 367 } 368 369 /// Compare C string and segment 370 template<typename CharType,typename IteratorR> operator <(CharType const * l,segment<IteratorR> const & r)371 bool operator<(CharType const *l,segment<IteratorR> const &r) 372 { 373 return details::compare_string(l,r) < 0; 374 } 375 /// Compare C string and segment 376 template<typename CharType,typename IteratorR> operator <=(CharType const * l,segment<IteratorR> const & r)377 bool operator<=(CharType const *l,segment<IteratorR> const &r) 378 { 379 return details::compare_string(l,r) <= 0; 380 } 381 /// Compare C string and segment 382 template<typename CharType,typename IteratorR> operator >(CharType const * l,segment<IteratorR> const & r)383 bool operator>(CharType const *l,segment<IteratorR> const &r) 384 { 385 return details::compare_string(l,r) > 0; 386 } 387 /// Compare C string and segment 388 template<typename CharType,typename IteratorR> operator >=(CharType const * l,segment<IteratorR> const & r)389 bool operator>=(CharType const *l,segment<IteratorR> const &r) 390 { 391 return details::compare_string(l,r) >= 0; 392 } 393 394 /// Compare C string and segment 395 template<typename Iterator,typename CharType> operator ==(segment<Iterator> const & l,CharType const * r)396 bool operator==(segment<Iterator> const &l,CharType const *r) 397 { 398 return details::compare_string(l,r) == 0; 399 } 400 /// Compare C string and segment 401 template<typename Iterator,typename CharType> operator !=(segment<Iterator> const & l,CharType const * r)402 bool operator!=(segment<Iterator> const &l,CharType const *r) 403 { 404 return details::compare_string(l,r) != 0; 405 } 406 407 /// Compare C string and segment 408 template<typename Iterator,typename CharType> operator <(segment<Iterator> const & l,CharType const * r)409 bool operator<(segment<Iterator> const &l,CharType const *r) 410 { 411 return details::compare_string(l,r) < 0; 412 } 413 /// Compare C string and segment 414 template<typename Iterator,typename CharType> operator <=(segment<Iterator> const & l,CharType const * r)415 bool operator<=(segment<Iterator> const &l,CharType const *r) 416 { 417 return details::compare_string(l,r) <= 0; 418 } 419 /// Compare C string and segment 420 template<typename Iterator,typename CharType> operator >(segment<Iterator> const & l,CharType const * r)421 bool operator>(segment<Iterator> const &l,CharType const *r) 422 { 423 return details::compare_string(l,r) > 0; 424 } 425 /// Compare C string and segment 426 template<typename Iterator,typename CharType> operator >=(segment<Iterator> const & l,CharType const * r)427 bool operator>=(segment<Iterator> const &l,CharType const *r) 428 { 429 return details::compare_string(l,r) >= 0; 430 } 431 432 433 434 435 436 437 typedef segment<std::string::const_iterator> ssegment; ///< convenience typedef 438 typedef segment<std::wstring::const_iterator> wssegment; ///< convenience typedef 439 #ifdef BOOST_HAS_CHAR16_T 440 typedef segment<std::u16string::const_iterator> u16ssegment;///< convenience typedef 441 #endif 442 #ifdef BOOST_HAS_CHAR32_T 443 typedef segment<std::u32string::const_iterator> u32ssegment;///< convenience typedef 444 #endif 445 446 typedef segment<char const *> csegment; ///< convenience typedef 447 typedef segment<wchar_t const *> wcsegment; ///< convenience typedef 448 #ifdef BOOST_HAS_CHAR16_T 449 typedef segment<char16_t const *> u16csegment; ///< convenience typedef 450 #endif 451 #ifdef BOOST_HAS_CHAR32_T 452 typedef segment<char32_t const *> u32csegment; ///< convenience typedef 453 #endif 454 455 456 457 458 459 /// 460 /// Write the segment to the stream character by character 461 /// 462 template<typename CharType,typename TraitsType,typename Iterator> operator <<(std::basic_ostream<CharType,TraitsType> & out,segment<Iterator> const & tok)463 std::basic_ostream<CharType,TraitsType> &operator<<( 464 std::basic_ostream<CharType,TraitsType> &out, 465 segment<Iterator> const &tok) 466 { 467 for(Iterator p=tok.begin(),e=tok.end();p!=e;++p) 468 out << *p; 469 return out; 470 } 471 472 /// @} 473 474 } // boundary 475 } // locale 476 } // boost 477 478 #ifdef BOOST_MSVC 479 #pragma warning(pop) 480 #endif 481 482 #endif 483 484 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 485