1 //     Copyright Toru Niina 2017.
2 // Distributed under the MIT License.
3 #ifndef TOML11_COMBINATOR_HPP
4 #define TOML11_COMBINATOR_HPP
5 #include <cassert>
6 #include <cctype>
7 #include <cstdio>
8 
9 #include <array>
10 #include <iomanip>
11 #include <iterator>
12 #include <limits>
13 #include <type_traits>
14 
15 #include "region.hpp"
16 #include "result.hpp"
17 #include "traits.hpp"
18 #include "utility.hpp"
19 
20 // they scans characters and returns region if it matches to the condition.
21 // when they fail, it does not change the location.
22 // in lexer.hpp, these are used.
23 
24 namespace toml
25 {
26 namespace detail
27 {
28 
29 // to output character as an error message.
show_char(const char c)30 inline std::string show_char(const char c)
31 {
32     // It supress an error that occurs only in Debug mode of MSVC++ on Windows.
33     // I'm not completely sure but they check the value of char to be in the
34     // range [0, 256) and some of the COMPLETELY VALID utf-8 character sometimes
35     // has negative value (if char has sign). So here it re-interprets c as
36     // unsigned char through pointer. In general, converting pointer to a
37     // pointer that has different type cause UB, but `(signed|unsigned)?char`
38     // are one of the exceptions. Converting pointer only to char and std::byte
39     // (c++17) are valid.
40     if(std::isgraph(*reinterpret_cast<unsigned char const*>(std::addressof(c))))
41     {
42         return std::string(1, c);
43     }
44     else
45     {
46         std::array<char, 5> buf;
47         buf.fill('\0');
48         const auto r = std::snprintf(
49                 buf.data(), buf.size(), "0x%02x", static_cast<int>(c) & 0xFF);
50         (void) r; // Unused variable warning
51         assert(r == static_cast<int>(buf.size()) - 1);
52         return std::string(buf.data());
53     }
54 }
55 
56 template<char C>
57 struct character
58 {
59     static constexpr char target = C;
60 
61     static result<region, none_t>
invoketoml::detail::character62     invoke(location& loc)
63     {
64         if(loc.iter() == loc.end()) {return none();}
65         const auto first = loc.iter();
66 
67         const char c = *(loc.iter());
68         if(c != target)
69         {
70             return none();
71         }
72         loc.advance(); // update location
73 
74         return ok(region(loc, first, loc.iter()));
75     }
76 };
77 template<char C>
78 constexpr char character<C>::target;
79 
80 // closed interval [Low, Up]. both Low and Up are included.
81 template<char Low, char Up>
82 struct in_range
83 {
84     // assuming ascii part of UTF-8...
85     static_assert(Low <= Up, "lower bound should be less than upper bound.");
86 
87     static constexpr char upper = Up;
88     static constexpr char lower = Low;
89 
90     static result<region, none_t>
invoketoml::detail::in_range91     invoke(location& loc)
92     {
93         if(loc.iter() == loc.end()) {return none();}
94         const auto first = loc.iter();
95 
96         const char c = *(loc.iter());
97         if(c < lower || upper < c)
98         {
99             return none();
100         }
101 
102         loc.advance();
103         return ok(region(loc, first, loc.iter()));
104     }
105 };
106 template<char L, char U> constexpr char in_range<L, U>::upper;
107 template<char L, char U> constexpr char in_range<L, U>::lower;
108 
109 // keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char.
110 // for detecting invalid characters, like control sequences in toml string.
111 template<typename Combinator>
112 struct exclude
113 {
114     static result<region, none_t>
invoketoml::detail::exclude115     invoke(location& loc)
116     {
117         if(loc.iter() == loc.end()) {return none();}
118         auto first = loc.iter();
119 
120         auto rslt = Combinator::invoke(loc);
121         if(rslt.is_ok())
122         {
123             loc.reset(first);
124             return none();
125         }
126         loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but...
127         return ok(region(loc, first, loc.iter()));
128     }
129 };
130 
131 // increment `iter`, if matches. otherwise, just return empty string.
132 template<typename Combinator>
133 struct maybe
134 {
135     static result<region, none_t>
invoketoml::detail::maybe136     invoke(location& loc)
137     {
138         const auto rslt = Combinator::invoke(loc);
139         if(rslt.is_ok())
140         {
141             return rslt;
142         }
143         return ok(region(loc));
144     }
145 };
146 
147 template<typename ... Ts>
148 struct sequence;
149 
150 template<typename Head, typename ... Tail>
151 struct sequence<Head, Tail...>
152 {
153     static result<region, none_t>
invoketoml::detail::sequence154     invoke(location& loc)
155     {
156         const auto first = loc.iter();
157         const auto rslt = Head::invoke(loc);
158         if(rslt.is_err())
159         {
160             loc.reset(first);
161             return none();
162         }
163         return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first);
164     }
165 
166     // called from the above function only, recursively.
167     template<typename Iterator>
168     static result<region, none_t>
invoketoml::detail::sequence169     invoke(location& loc, region reg, Iterator first)
170     {
171         const auto rslt = Head::invoke(loc);
172         if(rslt.is_err())
173         {
174             loc.reset(first);
175             return none();
176         }
177         reg += rslt.unwrap(); // concat regions
178         return sequence<Tail...>::invoke(loc, std::move(reg), first);
179     }
180 };
181 
182 template<typename Head>
183 struct sequence<Head>
184 {
185     // would be called from sequence<T ...>::invoke only.
186     template<typename Iterator>
187     static result<region, none_t>
invoketoml::detail::sequence188     invoke(location& loc, region reg, Iterator first)
189     {
190         const auto rslt = Head::invoke(loc);
191         if(rslt.is_err())
192         {
193             loc.reset(first);
194             return none();
195         }
196         reg += rslt.unwrap(); // concat regions
197         return ok(reg);
198     }
199 };
200 
201 template<typename ... Ts>
202 struct either;
203 
204 template<typename Head, typename ... Tail>
205 struct either<Head, Tail...>
206 {
207     static result<region, none_t>
invoketoml::detail::either208     invoke(location& loc)
209     {
210         const auto rslt = Head::invoke(loc);
211         if(rslt.is_ok()) {return rslt;}
212         return either<Tail...>::invoke(loc);
213     }
214 };
215 template<typename Head>
216 struct either<Head>
217 {
218     static result<region, none_t>
invoketoml::detail::either219     invoke(location& loc)
220     {
221         return Head::invoke(loc);
222     }
223 };
224 
225 template<typename T, typename N>
226 struct repeat;
227 
228 template<std::size_t N> struct exactly{};
229 template<std::size_t N> struct at_least{};
230 struct unlimited{};
231 
232 template<typename T, std::size_t N>
233 struct repeat<T, exactly<N>>
234 {
235     static result<region, none_t>
invoketoml::detail::repeat236     invoke(location& loc)
237     {
238         region retval(loc);
239         const auto first = loc.iter();
240         for(std::size_t i=0; i<N; ++i)
241         {
242             auto rslt = T::invoke(loc);
243             if(rslt.is_err())
244             {
245                 loc.reset(first);
246                 return none();
247             }
248             retval += rslt.unwrap();
249         }
250         return ok(std::move(retval));
251     }
252 };
253 
254 template<typename T, std::size_t N>
255 struct repeat<T, at_least<N>>
256 {
257     static result<region, none_t>
invoketoml::detail::repeat258     invoke(location& loc)
259     {
260         region retval(loc);
261 
262         const auto first = loc.iter();
263         for(std::size_t i=0; i<N; ++i)
264         {
265             auto rslt = T::invoke(loc);
266             if(rslt.is_err())
267             {
268                 loc.reset(first);
269                 return none();
270             }
271             retval += rslt.unwrap();
272         }
273         while(true)
274         {
275             auto rslt = T::invoke(loc);
276             if(rslt.is_err())
277             {
278                 return ok(std::move(retval));
279             }
280             retval += rslt.unwrap();
281         }
282     }
283 };
284 
285 template<typename T>
286 struct repeat<T, unlimited>
287 {
288     static result<region, none_t>
invoketoml::detail::repeat289     invoke(location& loc)
290     {
291         region retval(loc);
292         while(true)
293         {
294             auto rslt = T::invoke(loc);
295             if(rslt.is_err())
296             {
297                 return ok(std::move(retval));
298             }
299             retval += rslt.unwrap();
300         }
301     }
302 };
303 
304 } // detail
305 } // toml
306 #endif// TOML11_COMBINATOR_HPP
307