1 //
2 //  Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
9 #define BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
10 
11 #include <boost/locale/config.hpp>
12 #include <boost/cstdint.hpp>
13 #include <boost/assert.hpp>
14 #ifdef BOOST_MSVC
15 #  pragma warning(push)
16 #  pragma warning(disable : 4275 4251 4231 4660)
17 #endif
18 
19 
20 namespace boost {
21 
22     namespace locale {
23 
24         ///
25         /// \brief This namespase contains all operations required for boundary analysis of text
26         ///
27         namespace boundary {
28             ///
29             /// \defgroup boundary Boundary Analysis
30             ///
31             /// This module contains all operations required for boundary analysis of text: character, word, like and sentence boundaries
32             ///
33             /// @{
34             ///
35 
36             ///
37             /// This type describes a possible boundary analysis alternatives.
38             ///
39             enum boundary_type {
40                 character,  ///< Analyse the text for character boundaries
41                 word,       ///< Analyse the text for word boundaries
42                 sentence,   ///< Analyse the text for Find sentence boundaries
43                 line        ///< Analyse the text for positions suitable for line breaks
44             };
45 
46             ///
47             /// \brief Flags used with word boundary analysis -- the type of the word, line or sentence boundary found.
48             ///
49             /// It is a bit-mask that represents various combinations of rules used to select this specific boundary.
50             ///
51             typedef uint32_t rule_type;
52 
53             ///
54             /// \anchor bl_boundary_word_rules
55             /// \name Flags that describe a type of word selected
56             /// @{
57             static const rule_type
58                 word_none       =  0x0000F,   ///< Not a word, like white space or punctuation mark
59                 word_number     =  0x000F0,   ///< Word that appear to be a number
60                 word_letter     =  0x00F00,   ///< Word that contains letters, excluding kana and ideographic characters
61                 word_kana       =  0x0F000,   ///< Word that contains kana characters
62                 word_ideo       =  0xF0000,   ///< Word that contains ideographic characters
63                 word_any        =  0xFFFF0,   ///< Any word including numbers, 0 is special flag, equivalent to 15
64                 word_letters    =  0xFFF00,   ///< Any word, excluding numbers but including letters, kana and ideograms.
65                 word_kana_ideo  =  0xFF000,   ///< Word that includes kana or ideographic characters
66                 word_mask       =  0xFFFFF;   ///< Full word mask - select all possible variants
67             /// @}
68 
69             ///
70             /// \anchor bl_boundary_line_rules
71             /// \name Flags that describe a type of line break
72             /// @{
73             static const rule_type
74                 line_soft       =  0x0F,   ///< Soft line break: optional but not required
75                 line_hard       =  0xF0,   ///< Hard line break: like break is required (as per CR/LF)
76                 line_any        =  0xFF,   ///< Soft or Hard line break
77                 line_mask       =  0xFF;   ///< Select all types of line breaks
78 
79             /// @}
80 
81             ///
82             /// \anchor bl_boundary_sentence_rules
83             /// \name Flags that describe a type of sentence break
84             ///
85             /// @{
86             static const rule_type
87                 sentence_term   =  0x0F,    ///< \brief The sentence was terminated with a sentence terminator
88                                             ///  like ".", "!" possible followed by hard separator like CR, LF, PS
89                 sentence_sep    =  0xF0,    ///< \brief The sentence does not contain terminator like ".", "!" but ended with hard separator
90                                             ///  like CR, LF, PS or end of input.
91                 sentence_any    =  0xFF,    ///< Either first or second sentence break type;.
92                 sentence_mask   =  0xFF;    ///< Select all sentence breaking points
93 
94             ///@}
95 
96             ///
97             /// \name  Flags that describe a type of character break.
98             ///
99             /// At this point break iterator does not distinguish different
100             /// kinds of characters so it is used for consistency.
101             ///@{
102             static const rule_type
103                 character_any   =  0xF,     ///< Not in use, just for consistency
104                 character_mask  =  0xF;     ///< Select all character breaking points
105 
106             ///@}
107 
108             ///
109             /// This function returns the mask that covers all variants for specific boundary type
110             ///
boundary_rule(boundary_type t)111             inline rule_type boundary_rule(boundary_type t)
112             {
113                 switch(t) {
114                 case character: return character_mask;
115                 case word:      return word_mask;
116                 case sentence:  return sentence_mask;
117                 case line:      return line_mask;
118                 default:        return 0;
119                 }
120             }
121 
122             ///
123             ///@}
124             ///
125 
126         } // boundary
127     } // locale
128 } // boost
129 
130 
131 #ifdef BOOST_MSVC
132 #pragma warning(pop)
133 #endif
134 
135 #endif
136 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
137