xref: /openbsd/gnu/usr.bin/perl/invlist_inline.h (revision e0680481)
1 /*    invlist_inline.h
2  *
3  *    Copyright (C) 2012 by Larry Wall and others
4  *
5  *    You may distribute under the terms of either the GNU General Public
6  *    License or the Artistic License, as specified in the README file.
7  */
8 
9 #ifndef PERL_INVLIST_INLINE_H_
10 #define PERL_INVLIST_INLINE_H_
11 
12 #if defined(PERL_IN_UTF8_C)             \
13  || defined(PERL_IN_REGCOMP_ANY)        \
14  || defined(PERL_IN_REGEXEC_C)          \
15  || defined(PERL_IN_TOKE_C)             \
16  || defined(PERL_IN_PP_C)               \
17  || defined(PERL_IN_OP_C)               \
18  || defined(PERL_IN_DOOP_C)
19 
20 /* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
21  * etc */
22 #define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1))
23 #define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i))
24 
25 /* This converts to/from our UVs to what the SV code is expecting: bytes. */
26 #define TO_INTERNAL_SIZE(x) ((x) * sizeof(UV))
27 #define FROM_INTERNAL_SIZE(x) ((x)/ sizeof(UV))
28 
29 PERL_STATIC_INLINE bool
S_is_invlist(const SV * const invlist)30 S_is_invlist(const SV* const invlist)
31 {
32     return invlist != NULL && SvTYPE(invlist) == SVt_INVLIST;
33 }
34 
35 PERL_STATIC_INLINE bool*
S_get_invlist_offset_addr(SV * invlist)36 S_get_invlist_offset_addr(SV* invlist)
37 {
38     /* Return the address of the field that says whether the inversion list is
39      * offset (it contains 1) or not (contains 0) */
40     PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR;
41 
42     assert(is_invlist(invlist));
43 
44     return &(((XINVLIST*) SvANY(invlist))->is_offset);
45 }
46 
47 PERL_STATIC_INLINE UV
S__invlist_len(SV * const invlist)48 S__invlist_len(SV* const invlist)
49 {
50     /* Returns the current number of elements stored in the inversion list's
51      * array */
52 
53     PERL_ARGS_ASSERT__INVLIST_LEN;
54 
55     assert(is_invlist(invlist));
56 
57     return (SvCUR(invlist) == 0)
58            ? 0
59            : FROM_INTERNAL_SIZE(SvCUR(invlist)) - *get_invlist_offset_addr(invlist);
60 }
61 
62 PERL_STATIC_INLINE bool
S__invlist_contains_cp(SV * const invlist,const UV cp)63 S__invlist_contains_cp(SV* const invlist, const UV cp)
64 {
65     /* Does <invlist> contain code point <cp> as part of the set? */
66 
67     IV index = _invlist_search(invlist, cp);
68 
69     PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP;
70 
71     return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index);
72 }
73 
74 PERL_STATIC_INLINE UV*
S_invlist_array(SV * const invlist)75 S_invlist_array(SV* const invlist)
76 {
77     /* Returns the pointer to the inversion list's array.  Every time the
78      * length changes, this needs to be called in case malloc or realloc moved
79      * it */
80 
81     PERL_ARGS_ASSERT_INVLIST_ARRAY;
82 
83     /* Must not be empty.  If these fail, you probably didn't check for <len>
84      * being non-zero before trying to get the array */
85     assert(_invlist_len(invlist));
86 
87     /* The very first element always contains zero, The array begins either
88      * there, or if the inversion list is offset, at the element after it.
89      * The offset header field determines which; it contains 0 or 1 to indicate
90      * how much additionally to add */
91     assert(0 == *(SvPVX(invlist)));
92     return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist));
93 }
94 
95 #endif
96 #if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
97 
98 PERL_STATIC_INLINE void
S_invlist_extend(pTHX_ SV * const invlist,const UV new_max)99 S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
100 {
101     /* Grow the maximum size of an inversion list */
102 
103     PERL_ARGS_ASSERT_INVLIST_EXTEND;
104 
105     assert(SvTYPE(invlist) == SVt_INVLIST);
106 
107     /* Add one to account for the zero element at the beginning which may not
108      * be counted by the calling parameters */
109     SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1));
110 }
111 
112 PERL_STATIC_INLINE void
S_invlist_set_len(pTHX_ SV * const invlist,const UV len,const bool offset)113 S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
114 {
115     /* Sets the current number of elements stored in the inversion list.
116      * Updates SvCUR correspondingly */
117     PERL_UNUSED_CONTEXT;
118     PERL_ARGS_ASSERT_INVLIST_SET_LEN;
119 
120     assert(SvTYPE(invlist) == SVt_INVLIST);
121 
122     SvCUR_set(invlist,
123               (len == 0)
124                ? 0
125                : TO_INTERNAL_SIZE(len + offset));
126     assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist));
127 }
128 
129 PERL_STATIC_INLINE SV*
S_add_cp_to_invlist(pTHX_ SV * invlist,const UV cp)130 S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) {
131     return _add_range_to_invlist(invlist, cp, cp);
132 }
133 
134 PERL_STATIC_INLINE UV
S_invlist_highest(SV * const invlist)135 S_invlist_highest(SV* const invlist)
136 {
137     /* Returns the highest code point that matches an inversion list.  This API
138      * has an ambiguity, as it returns 0 under either the highest is actually
139      * 0, or if the list is empty.  If this distinction matters to you, check
140      * for emptiness before calling this function */
141 
142     UV len = _invlist_len(invlist);
143     UV *array;
144 
145     PERL_ARGS_ASSERT_INVLIST_HIGHEST;
146 
147     if (len == 0) {
148         return 0;
149     }
150 
151     array = invlist_array(invlist);
152 
153     /* The last element in the array in the inversion list always starts a
154      * range that goes to infinity.  That range may be for code points that are
155      * matched in the inversion list, or it may be for ones that aren't
156      * matched.  In the latter case, the highest code point in the set is one
157      * less than the beginning of this range; otherwise it is the final element
158      * of this range: infinity */
159     return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
160            ? UV_MAX
161            : array[len - 1] - 1;
162 }
163 
164 #  if defined(PERL_IN_REGCOMP_ANY)
165 
166 PERL_STATIC_INLINE UV
S_invlist_highest_range_start(SV * const invlist)167 S_invlist_highest_range_start(SV* const invlist)
168 {
169     /* Returns the lowest code point of the highest range in the inversion
170      * list parameter.  This API has an ambiguity: it returns 0 either when
171      * the lowest such point is actually 0 or when the list is empty.  If this
172      * distinction matters to you, check for emptiness before calling this
173      * function. */
174 
175     UV len = _invlist_len(invlist);
176     UV *array;
177 
178     PERL_ARGS_ASSERT_INVLIST_HIGHEST_RANGE_START;
179 
180     if (len == 0) {
181         return 0;
182     }
183 
184     array = invlist_array(invlist);
185 
186     /* The last element in the array in the inversion list always starts a
187      * range that goes to infinity.  That range may be for code points that are
188      * matched in the inversion list, or it may be for ones that aren't
189      * matched.  In the first case, the lowest code point in the matching range
190      * is that the one that started the range.  If the other case, the final
191      * matching range begins at the next element down (which may be 0 in the
192      * edge case). */
193     return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
194            ? array[len - 1]
195            : len == 1
196              ? 0
197              : array[len - 2];
198 }
199 
200 #  endif
201 #endif
202 #if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C)
203 
204 PERL_STATIC_INLINE STRLEN*
S_get_invlist_iter_addr(SV * invlist)205 S_get_invlist_iter_addr(SV* invlist)
206 {
207     /* Return the address of the UV that contains the current iteration
208      * position */
209 
210     PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR;
211 
212     assert(is_invlist(invlist));
213 
214     return &(((XINVLIST*) SvANY(invlist))->iterator);
215 }
216 
217 PERL_STATIC_INLINE void
S_invlist_iterinit(SV * invlist)218 S_invlist_iterinit(SV* invlist)	/* Initialize iterator for invlist */
219 {
220     PERL_ARGS_ASSERT_INVLIST_ITERINIT;
221 
222     *get_invlist_iter_addr(invlist) = 0;
223 }
224 
225 PERL_STATIC_INLINE void
S_invlist_iterfinish(SV * invlist)226 S_invlist_iterfinish(SV* invlist)
227 {
228     /* Terminate iterator for invlist.  This is to catch development errors.
229      * Any iteration that is interrupted before completed should call this
230      * function.  Functions that add code points anywhere else but to the end
231      * of an inversion list assert that they are not in the middle of an
232      * iteration.  If they were, the addition would make the iteration
233      * problematical: if the iteration hadn't reached the place where things
234      * were being added, it would be ok */
235 
236     PERL_ARGS_ASSERT_INVLIST_ITERFINISH;
237 
238     *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX;
239 }
240 
241 STATIC bool
S_invlist_iternext(SV * invlist,UV * start,UV * end)242 S_invlist_iternext(SV* invlist, UV* start, UV* end)
243 {
244     /* An C<invlist_iterinit> call on <invlist> must be used to set this up.
245      * This call sets in <*start> and <*end>, the next range in <invlist>.
246      * Returns <TRUE> if successful and the next call will return the next
247      * range; <FALSE> if was already at the end of the list.  If the latter,
248      * <*start> and <*end> are unchanged, and the next call to this function
249      * will start over at the beginning of the list */
250 
251     STRLEN* pos = get_invlist_iter_addr(invlist);
252     UV len = _invlist_len(invlist);
253     UV *array;
254 
255     PERL_ARGS_ASSERT_INVLIST_ITERNEXT;
256 
257     if (*pos >= len) {
258         *pos = (STRLEN) UV_MAX;	/* Force iterinit() to be required next time */
259         return FALSE;
260     }
261 
262     array = invlist_array(invlist);
263 
264     *start = array[(*pos)++];
265 
266     if (*pos >= len) {
267         *end = UV_MAX;
268     }
269     else {
270         *end = array[(*pos)++] - 1;
271     }
272 
273     return TRUE;
274 }
275 
276 #endif
277 
278 #ifndef PERL_IN_REGCOMP_ANY
279 
280 /* These symbols are only needed later in regcomp.c */
281 #       undef TO_INTERNAL_SIZE
282 #       undef FROM_INTERNAL_SIZE
283 #endif
284 
285 #ifdef PERL_IN_REGCOMP_ANY
286 PERL_STATIC_INLINE
287 bool
S_invlist_is_iterating(const SV * const invlist)288 S_invlist_is_iterating(const SV* const invlist)
289 {
290     PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
291 
292     /* get_invlist_iter_addr()'s sv is non-const only because it returns a
293      * value that can be used to modify the invlist, it doesn't modify the
294      * invlist itself */
295     return *(get_invlist_iter_addr((SV*)invlist)) < (STRLEN) UV_MAX;
296 }
297 
298 PERL_STATIC_INLINE
299 SV *
S_invlist_contents(pTHX_ SV * const invlist,const bool traditional_style)300 S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
301 {
302     /* Get the contents of an inversion list into a string SV so that they can
303      * be printed out.  If 'traditional_style' is TRUE, it uses the format
304      * traditionally done for debug tracing; otherwise it uses a format
305      * suitable for just copying to the output, with blanks between ranges and
306      * a dash between range components */
307 
308     UV start, end;
309     SV* output;
310     const char intra_range_delimiter = (traditional_style ? '\t' : '-');
311     const char inter_range_delimiter = (traditional_style ? '\n' : ' ');
312 
313     if (traditional_style) {
314         output = newSVpvs("\n");
315     }
316     else {
317         output = newSVpvs("");
318     }
319 
320     PERL_ARGS_ASSERT_INVLIST_CONTENTS;
321 
322     assert(! invlist_is_iterating(invlist));
323 
324     invlist_iterinit(invlist);
325     while (invlist_iternext(invlist, &start, &end)) {
326         if (end == UV_MAX) {
327             Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c",
328                                           start, intra_range_delimiter,
329                                                  inter_range_delimiter);
330         }
331         else if (end != start) {
332             Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
333                                           start,
334                                                    intra_range_delimiter,
335                                                   end, inter_range_delimiter);
336         }
337         else {
338             Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
339                                           start, inter_range_delimiter);
340         }
341     }
342 
343     if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */
344         SvCUR_set(output, SvCUR(output) - 1);
345     }
346 
347     return output;
348 }
349 
350 PERL_STATIC_INLINE
351 UV
S_invlist_lowest(SV * const invlist)352 S_invlist_lowest(SV* const invlist)
353 {
354     /* Returns the lowest code point that matches an inversion list.  This API
355      * has an ambiguity, as it returns 0 under either the lowest is actually
356      * 0, or if the list is empty.  If this distinction matters to you, check
357      * for emptiness before calling this function */
358 
359     UV len = _invlist_len(invlist);
360     UV *array;
361 
362     PERL_ARGS_ASSERT_INVLIST_LOWEST;
363 
364     if (len == 0) {
365         return 0;
366     }
367 
368     array = invlist_array(invlist);
369 
370     return array[0];
371 }
372 
373 #endif
374 
375 #endif /* PERL_INVLIST_INLINE_H_ */
376