1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
9 #define BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
10 #include <boost/locale/config.hpp>
11 #ifdef BOOST_MSVC
12 #  pragma warning(push)
13 #  pragma warning(disable : 4275 4251 4231 4660)
14 #endif
15 #include <locale>
16 #include <string>
17 #include <iosfwd>
18 #include <iterator>
19 
20 
21 namespace boost {
22 namespace locale {
23 namespace boundary {
24     /// \cond INTERNAL
25     namespace details {
26         template<typename LeftIterator,typename RightIterator>
compare_text(LeftIterator l_begin,LeftIterator l_end,RightIterator r_begin,RightIterator r_end)27         int compare_text(LeftIterator l_begin,LeftIterator l_end,RightIterator r_begin,RightIterator r_end)
28         {
29             typedef LeftIterator left_iterator;
30             typedef RightIterator right_iterator;
31             typedef typename std::iterator_traits<left_iterator>::value_type char_type;
32             typedef std::char_traits<char_type> traits;
33             while(l_begin!=l_end && r_begin!=r_end) {
34                 char_type lchar = *l_begin++;
35                 char_type rchar = *r_begin++;
36                 if(traits::eq(lchar,rchar))
37                     continue;
38                 if(traits::lt(lchar,rchar))
39                     return -1;
40                 else
41                     return 1;
42             }
43             if(l_begin==l_end && r_begin==r_end)
44                 return 0;
45             if(l_begin==l_end)
46                 return -1;
47             else
48                 return 1;
49         }
50 
51 
52         template<typename Left,typename Right>
compare_text(Left const & l,Right const & r)53         int compare_text(Left const &l,Right const &r)
54         {
55             return compare_text(l.begin(),l.end(),r.begin(),r.end());
56         }
57 
58         template<typename Left,typename Char>
compare_string(Left const & l,Char const * begin)59         int compare_string(Left const &l,Char const *begin)
60         {
61             Char const *end = begin;
62             while(*end!=0)
63                 end++;
64             return compare_text(l.begin(),l.end(),begin,end);
65         }
66 
67         template<typename Right,typename Char>
compare_string(Char const * begin,Right const & r)68         int compare_string(Char const *begin,Right const &r)
69         {
70             Char const *end = begin;
71             while(*end!=0)
72                 end++;
73             return compare_text(begin,end,r.begin(),r.end());
74         }
75 
76     }
77     /// \endcond
78 
79     ///
80     /// \addtogroup boundary
81     /// @{
82 
83     ///
84     /// \brief a segment object that represents a pair of two iterators that define the range where
85     /// this segment exits and a rule that defines it.
86     ///
87     /// This type of object is dereferenced by the iterators of segment_index. Using a rule() member function
88     /// you can get a specific rule this segment was selected with. For example, when you use
89     /// word boundary analysis, you can check if the specific word contains Kana letters by checking (rule() & \ref word_kana)!=0
90     /// For a sentence analysis you can check if the sentence is selected because a sentence terminator is found (\ref sentence_term) or
91     /// there is a line break (\ref sentence_sep).
92     ///
93     /// This object can be automatically converted to std::basic_string with the same type of character. It is also
94     /// valid range that has begin() and end() member functions returning iterators on the location of the segment.
95     ///
96     /// \see
97     ///
98     /// - \ref segment_index
99     /// - \ref boundary_point
100     /// - \ref boundary_point_index
101     ///
102     template<typename IteratorType>
103     class segment : public std::pair<IteratorType,IteratorType> {
104     public:
105         ///
106         /// The type of the underlying character
107         ///
108         typedef typename std::iterator_traits<IteratorType>::value_type char_type;
109         ///
110         /// The type of the string it is converted to
111         ///
112         typedef std::basic_string<char_type> string_type;
113         ///
114         /// The value that iterators return  - the character itself
115         ///
116         typedef char_type value_type;
117         ///
118         /// The iterator that allows to iterate the range
119         ///
120         typedef IteratorType iterator;
121         ///
122         /// The iterator that allows to iterate the range
123         ///
124         typedef IteratorType const_iterator;
125         ///
126         /// The type that represent a difference between two iterators
127         ///
128         typedef typename std::iterator_traits<IteratorType>::difference_type difference_type;
129 
130         ///
131         /// Default constructor
132         ///
segment()133         segment() {}
134         ///
135         /// Create a segment using two iterators and a rule that represents this point
136         ///
segment(iterator b,iterator e,rule_type r)137         segment(iterator b,iterator e,rule_type r) :
138             std::pair<IteratorType,IteratorType>(b,e),
139             rule_(r)
140         {
141         }
142         ///
143         /// Set the start of the range
144         ///
begin(iterator const & v)145         void begin(iterator const &v)
146         {
147             this->first = v;
148         }
149         ///
150         /// Set the end of the range
151         ///
end(iterator const & v)152          void end(iterator const &v)
153         {
154             this->second = v;
155         }
156 
157         ///
158         /// Get the start of the range
159         ///
begin() const160         IteratorType begin() const
161         {
162             return this->first;
163         }
164         ///
165         /// Set the end of the range
166         ///
end() const167         IteratorType end() const
168         {
169             return this->second;
170         }
171 
172         ///
173         /// Convert the range to a string automatically
174         ///
175         template <class T, class A>
operator std::basic_string<char_type,T,A>() const176         operator std::basic_string<char_type, T, A> ()const
177         {
178             return std::basic_string<char_type, T, A>(this->first, this->second);
179         }
180 
181         ///
182         /// Create a string from the range explicitly
183         ///
str() const184         string_type str() const
185         {
186             return string_type(begin(),end());
187         }
188 
189         ///
190         /// Get the length of the text chunk
191         ///
192 
length() const193         size_t length() const
194         {
195             return std::distance(begin(),end());
196         }
197 
198         ///
199         /// Check if the segment is empty
200         ///
empty() const201         bool empty() const
202         {
203             return begin() == end();
204         }
205 
206         ///
207         /// Get the rule that is used for selection of this segment.
208         ///
rule() const209         rule_type rule() const
210         {
211             return rule_;
212         }
213         ///
214         /// Set a rule that is used for segment selection
215         ///
rule(rule_type r)216         void rule(rule_type r)
217         {
218             rule_ = r;
219         }
220 
221         // make sure we override std::pair's operator==
222 
223         /// Compare two segments
operator ==(segment const & other)224         bool operator==(segment const &other)
225         {
226             return details::compare_text(*this,other) == 0;
227         }
228 
229         /// Compare two segments
operator !=(segment const & other)230         bool operator!=(segment const &other)
231         {
232             return details::compare_text(*this,other) != 0;
233         }
234 
235     private:
236         rule_type rule_;
237 
238     };
239 
240 
241     /// Compare two segments
242     template<typename IteratorL,typename IteratorR>
operator ==(segment<IteratorL> const & l,segment<IteratorR> const & r)243     bool operator==(segment<IteratorL> const &l,segment<IteratorR> const &r)
244     {
245         return details::compare_text(l,r) == 0;
246     }
247     /// Compare two segments
248     template<typename IteratorL,typename IteratorR>
operator !=(segment<IteratorL> const & l,segment<IteratorR> const & r)249     bool operator!=(segment<IteratorL> const &l,segment<IteratorR> const &r)
250     {
251         return details::compare_text(l,r) != 0;
252     }
253 
254     /// Compare two segments
255     template<typename IteratorL,typename IteratorR>
operator <(segment<IteratorL> const & l,segment<IteratorR> const & r)256     bool operator<(segment<IteratorL> const &l,segment<IteratorR> const &r)
257     {
258         return details::compare_text(l,r) < 0;
259     }
260     /// Compare two segments
261     template<typename IteratorL,typename IteratorR>
operator <=(segment<IteratorL> const & l,segment<IteratorR> const & r)262     bool operator<=(segment<IteratorL> const &l,segment<IteratorR> const &r)
263     {
264         return details::compare_text(l,r) <= 0;
265     }
266     /// Compare two segments
267     template<typename IteratorL,typename IteratorR>
operator >(segment<IteratorL> const & l,segment<IteratorR> const & r)268     bool operator>(segment<IteratorL> const &l,segment<IteratorR> const &r)
269     {
270         return details::compare_text(l,r) > 0;
271     }
272     /// Compare two segments
273     template<typename IteratorL,typename IteratorR>
operator >=(segment<IteratorL> const & l,segment<IteratorR> const & r)274     bool operator>=(segment<IteratorL> const &l,segment<IteratorR> const &r)
275     {
276         return details::compare_text(l,r) >= 0;
277     }
278 
279     /// Compare string and segment
280     template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
operator ==(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)281     bool operator==(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
282     {
283         return details::compare_text(l,r) == 0;
284     }
285     /// Compare string and segment
286     template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
operator !=(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)287     bool operator!=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
288     {
289         return details::compare_text(l,r) != 0;
290     }
291 
292     /// Compare string and segment
293     template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
operator <(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)294     bool operator<(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
295     {
296         return details::compare_text(l,r) < 0;
297     }
298     /// Compare string and segment
299     template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
operator <=(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)300     bool operator<=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
301     {
302         return details::compare_text(l,r) <= 0;
303     }
304     /// Compare string and segment
305     template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
operator >(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)306     bool operator>(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
307     {
308         return details::compare_text(l,r) > 0;
309     }
310     /// Compare string and segment
311     template<typename CharType,typename Traits,typename Alloc,typename IteratorR>
operator >=(std::basic_string<CharType,Traits,Alloc> const & l,segment<IteratorR> const & r)312     bool operator>=(std::basic_string<CharType,Traits,Alloc> const &l,segment<IteratorR> const &r)
313     {
314         return details::compare_text(l,r) >= 0;
315     }
316 
317     /// Compare string and segment
318     template<typename Iterator,typename CharType,typename Traits,typename Alloc>
operator ==(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)319     bool operator==(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
320     {
321         return details::compare_text(l,r) == 0;
322     }
323     /// Compare string and segment
324     template<typename Iterator,typename CharType,typename Traits,typename Alloc>
operator !=(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)325     bool operator!=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
326     {
327         return details::compare_text(l,r) != 0;
328     }
329 
330     /// Compare string and segment
331     template<typename Iterator,typename CharType,typename Traits,typename Alloc>
operator <(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)332     bool operator<(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
333     {
334         return details::compare_text(l,r) < 0;
335     }
336     /// Compare string and segment
337     template<typename Iterator,typename CharType,typename Traits,typename Alloc>
operator <=(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)338     bool operator<=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
339     {
340         return details::compare_text(l,r) <= 0;
341     }
342     /// Compare string and segment
343     template<typename Iterator,typename CharType,typename Traits,typename Alloc>
operator >(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)344     bool operator>(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
345     {
346         return details::compare_text(l,r) > 0;
347     }
348     /// Compare string and segment
349     template<typename Iterator,typename CharType,typename Traits,typename Alloc>
operator >=(segment<Iterator> const & l,std::basic_string<CharType,Traits,Alloc> const & r)350     bool operator>=(segment<Iterator> const &l,std::basic_string<CharType,Traits,Alloc> const &r)
351     {
352         return details::compare_text(l,r) >= 0;
353     }
354 
355 
356     /// Compare C string and segment
357     template<typename CharType,typename IteratorR>
operator ==(CharType const * l,segment<IteratorR> const & r)358     bool operator==(CharType const *l,segment<IteratorR> const &r)
359     {
360         return details::compare_string(l,r) == 0;
361     }
362     /// Compare C string and segment
363     template<typename CharType,typename IteratorR>
operator !=(CharType const * l,segment<IteratorR> const & r)364     bool operator!=(CharType const *l,segment<IteratorR> const &r)
365     {
366         return details::compare_string(l,r) != 0;
367     }
368 
369     /// Compare C string and segment
370     template<typename CharType,typename IteratorR>
operator <(CharType const * l,segment<IteratorR> const & r)371     bool operator<(CharType const *l,segment<IteratorR> const &r)
372     {
373         return details::compare_string(l,r) < 0;
374     }
375     /// Compare C string and segment
376     template<typename CharType,typename IteratorR>
operator <=(CharType const * l,segment<IteratorR> const & r)377     bool operator<=(CharType const *l,segment<IteratorR> const &r)
378     {
379         return details::compare_string(l,r) <= 0;
380     }
381     /// Compare C string and segment
382     template<typename CharType,typename IteratorR>
operator >(CharType const * l,segment<IteratorR> const & r)383     bool operator>(CharType const *l,segment<IteratorR> const &r)
384     {
385         return details::compare_string(l,r) > 0;
386     }
387     /// Compare C string and segment
388     template<typename CharType,typename IteratorR>
operator >=(CharType const * l,segment<IteratorR> const & r)389     bool operator>=(CharType const *l,segment<IteratorR> const &r)
390     {
391         return details::compare_string(l,r) >= 0;
392     }
393 
394     /// Compare C string and segment
395     template<typename Iterator,typename CharType>
operator ==(segment<Iterator> const & l,CharType const * r)396     bool operator==(segment<Iterator> const &l,CharType const *r)
397     {
398         return details::compare_string(l,r) == 0;
399     }
400     /// Compare C string and segment
401     template<typename Iterator,typename CharType>
operator !=(segment<Iterator> const & l,CharType const * r)402     bool operator!=(segment<Iterator> const &l,CharType const *r)
403     {
404         return details::compare_string(l,r) != 0;
405     }
406 
407     /// Compare C string and segment
408     template<typename Iterator,typename CharType>
operator <(segment<Iterator> const & l,CharType const * r)409     bool operator<(segment<Iterator> const &l,CharType const *r)
410     {
411         return details::compare_string(l,r) < 0;
412     }
413     /// Compare C string and segment
414     template<typename Iterator,typename CharType>
operator <=(segment<Iterator> const & l,CharType const * r)415     bool operator<=(segment<Iterator> const &l,CharType const *r)
416     {
417         return details::compare_string(l,r) <= 0;
418     }
419     /// Compare C string and segment
420     template<typename Iterator,typename CharType>
operator >(segment<Iterator> const & l,CharType const * r)421     bool operator>(segment<Iterator> const &l,CharType const *r)
422     {
423         return details::compare_string(l,r) > 0;
424     }
425     /// Compare C string and segment
426     template<typename Iterator,typename CharType>
operator >=(segment<Iterator> const & l,CharType const * r)427     bool operator>=(segment<Iterator> const &l,CharType const *r)
428     {
429         return details::compare_string(l,r) >= 0;
430     }
431 
432 
433 
434 
435 
436 
437     typedef segment<std::string::const_iterator> ssegment;      ///< convenience typedef
438     typedef segment<std::wstring::const_iterator> wssegment;    ///< convenience typedef
439     #ifdef BOOST_HAS_CHAR16_T
440     typedef segment<std::u16string::const_iterator> u16ssegment;///< convenience typedef
441     #endif
442     #ifdef BOOST_HAS_CHAR32_T
443     typedef segment<std::u32string::const_iterator> u32ssegment;///< convenience typedef
444     #endif
445 
446     typedef segment<char const *> csegment;                     ///< convenience typedef
447     typedef segment<wchar_t const *> wcsegment;                 ///< convenience typedef
448     #ifdef BOOST_HAS_CHAR16_T
449     typedef segment<char16_t const *> u16csegment;              ///< convenience typedef
450     #endif
451     #ifdef BOOST_HAS_CHAR32_T
452     typedef segment<char32_t const *> u32csegment;              ///< convenience typedef
453     #endif
454 
455 
456 
457 
458 
459     ///
460     /// Write the segment to the stream character by character
461     ///
462     template<typename CharType,typename TraitsType,typename Iterator>
operator <<(std::basic_ostream<CharType,TraitsType> & out,segment<Iterator> const & tok)463     std::basic_ostream<CharType,TraitsType> &operator<<(
464             std::basic_ostream<CharType,TraitsType> &out,
465             segment<Iterator> const &tok)
466     {
467         for(Iterator p=tok.begin(),e=tok.end();p!=e;++p)
468             out << *p;
469         return out;
470     }
471 
472     /// @}
473 
474 } // boundary
475 } // locale
476 } // boost
477 
478 #ifdef BOOST_MSVC
479 #pragma warning(pop)
480 #endif
481 
482 #endif
483 
484 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
485