1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1999-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  utf16.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999sep09
16 *   created by: Markus W. Scherer
17 */
18 
19 /**
20  * \file
21  * \brief C API: 16-bit Unicode handling macros
22  *
23  * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
24  *
25  * For more information see utf.h and the ICU User Guide Strings chapter
26  * (https://unicode-org.github.io/icu/userguide/strings).
27  *
28  * <em>Usage:</em>
29  * ICU coding guidelines for if() statements should be followed when using these macros.
30  * Compound statements (curly braces {}) must be used  for if-else-while...
31  * bodies and all macro statements should be terminated with semicolon.
32  */
33 
34 #ifndef __UTF16_H__
35 #define __UTF16_H__
36 
37 #include <stdbool.h>
38 #include "unicode/umachine.h"
39 #ifndef __UTF_H__
40 #   include "unicode/utf.h"
41 #endif
42 
43 /* single-code point definitions -------------------------------------------- */
44 
45 /**
46  * Does this code unit alone encode a code point (BMP, not a surrogate)?
47  * @param c 16-bit code unit
48  * @return true or false
49  * @stable ICU 2.4
50  */
51 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
52 
53 /**
54  * Is this code unit a lead surrogate (U+d800..U+dbff)?
55  * @param c 16-bit code unit
56  * @return true or false
57  * @stable ICU 2.4
58  */
59 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
60 
61 /**
62  * Is this code unit a trail surrogate (U+dc00..U+dfff)?
63  * @param c 16-bit code unit
64  * @return true or false
65  * @stable ICU 2.4
66  */
67 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
68 
69 /**
70  * Is this code unit a surrogate (U+d800..U+dfff)?
71  * @param c 16-bit code unit
72  * @return true or false
73  * @stable ICU 2.4
74  */
75 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
76 
77 /**
78  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
79  * is it a lead surrogate?
80  * @param c 16-bit code unit
81  * @return true or false
82  * @stable ICU 2.4
83  */
84 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
85 
86 /**
87  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
88  * is it a trail surrogate?
89  * @param c 16-bit code unit
90  * @return true or false
91  * @stable ICU 4.2
92  */
93 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
94 
95 /**
96  * Helper constant for U16_GET_SUPPLEMENTARY.
97  * @internal
98  */
99 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
100 
101 /**
102  * Get a supplementary code point value (U+10000..U+10ffff)
103  * from its lead and trail surrogates.
104  * The result is undefined if the input values are not
105  * lead and trail surrogates.
106  *
107  * @param lead lead surrogate (U+d800..U+dbff)
108  * @param trail trail surrogate (U+dc00..U+dfff)
109  * @return supplementary code point (U+10000..U+10ffff)
110  * @stable ICU 2.4
111  */
112 #define U16_GET_SUPPLEMENTARY(lead, trail) \
113     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
114 
115 
116 /**
117  * Get the lead surrogate (0xd800..0xdbff) for a
118  * supplementary code point (0x10000..0x10ffff).
119  * @param supplementary 32-bit code point (U+10000..U+10ffff)
120  * @return lead surrogate (U+d800..U+dbff) for supplementary
121  * @stable ICU 2.4
122  */
123 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
124 
125 /**
126  * Get the trail surrogate (0xdc00..0xdfff) for a
127  * supplementary code point (0x10000..0x10ffff).
128  * @param supplementary 32-bit code point (U+10000..U+10ffff)
129  * @return trail surrogate (U+dc00..U+dfff) for supplementary
130  * @stable ICU 2.4
131  */
132 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
133 
134 /**
135  * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
136  * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
137  * @param c 32-bit code point
138  * @return 1 or 2
139  * @stable ICU 2.4
140  */
141 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
142 
143 /**
144  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
145  * @return 2
146  * @stable ICU 2.4
147  */
148 #define U16_MAX_LENGTH 2
149 
150 /**
151  * Get a code point from a string at a random-access offset,
152  * without changing the offset.
153  * "Unsafe" macro, assumes well-formed UTF-16.
154  *
155  * The offset may point to either the lead or trail surrogate unit
156  * for a supplementary code point, in which case the macro will read
157  * the adjacent matching surrogate as well.
158  * The result is undefined if the offset points to a single, unpaired surrogate.
159  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
160  *
161  * @param s const UChar * string
162  * @param i string offset
163  * @param c output UChar32 variable
164  * @see U16_GET
165  * @stable ICU 2.4
166  */
167 #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
168     (c)=(s)[i]; \
169     if(U16_IS_SURROGATE(c)) { \
170         if(U16_IS_SURROGATE_LEAD(c)) { \
171             (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
172         } else { \
173             (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
174         } \
175     } \
176 } UPRV_BLOCK_MACRO_END
177 
178 /**
179  * Get a code point from a string at a random-access offset,
180  * without changing the offset.
181  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
182  *
183  * The offset may point to either the lead or trail surrogate unit
184  * for a supplementary code point, in which case the macro will read
185  * the adjacent matching surrogate as well.
186  *
187  * The length can be negative for a NUL-terminated string.
188  *
189  * If the offset points to a single, unpaired surrogate, then
190  * c is set to that unpaired surrogate.
191  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
192  *
193  * @param s const UChar * string
194  * @param start starting string offset (usually 0)
195  * @param i string offset, must be start<=i<length
196  * @param length string length
197  * @param c output UChar32 variable
198  * @see U16_GET_UNSAFE
199  * @stable ICU 2.4
200  */
201 #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
202     (c)=(s)[i]; \
203     if(U16_IS_SURROGATE(c)) { \
204         uint16_t __c2; \
205         if(U16_IS_SURROGATE_LEAD(c)) { \
206             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
207                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
208             } \
209         } else { \
210             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
211                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
212             } \
213         } \
214     } \
215 } UPRV_BLOCK_MACRO_END
216 
217 /**
218  * Get a code point from a string at a random-access offset,
219  * without changing the offset.
220  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
221  *
222  * The offset may point to either the lead or trail surrogate unit
223  * for a supplementary code point, in which case the macro will read
224  * the adjacent matching surrogate as well.
225  *
226  * The length can be negative for a NUL-terminated string.
227  *
228  * If the offset points to a single, unpaired surrogate, then
229  * c is set to U+FFFD.
230  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
231  *
232  * @param s const UChar * string
233  * @param start starting string offset (usually 0)
234  * @param i string offset, must be start<=i<length
235  * @param length string length
236  * @param c output UChar32 variable
237  * @see U16_GET_UNSAFE
238  * @stable ICU 60
239  */
240 #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
241     (c)=(s)[i]; \
242     if(U16_IS_SURROGATE(c)) { \
243         uint16_t __c2; \
244         if(U16_IS_SURROGATE_LEAD(c)) { \
245             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
246                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
247             } else { \
248                 (c)=0xfffd; \
249             } \
250         } else { \
251             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
252                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
253             } else { \
254                 (c)=0xfffd; \
255             } \
256         } \
257     } \
258 } UPRV_BLOCK_MACRO_END
259 
260 /* definitions with forward iteration --------------------------------------- */
261 
262 /**
263  * Get a code point from a string at a code point boundary offset,
264  * and advance the offset to the next code point boundary.
265  * (Post-incrementing forward iteration.)
266  * "Unsafe" macro, assumes well-formed UTF-16.
267  *
268  * The offset may point to the lead surrogate unit
269  * for a supplementary code point, in which case the macro will read
270  * the following trail surrogate as well.
271  * If the offset points to a trail surrogate, then that itself
272  * will be returned as the code point.
273  * The result is undefined if the offset points to a single, unpaired lead surrogate.
274  *
275  * @param s const UChar * string
276  * @param i string offset
277  * @param c output UChar32 variable
278  * @see U16_NEXT
279  * @stable ICU 2.4
280  */
281 #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
282     (c)=(s)[(i)++]; \
283     if(U16_IS_LEAD(c)) { \
284         (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
285     } \
286 } UPRV_BLOCK_MACRO_END
287 
288 /**
289  * Get a code point from a string at a code point boundary offset,
290  * and advance the offset to the next code point boundary.
291  * (Post-incrementing forward iteration.)
292  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
293  *
294  * The length can be negative for a NUL-terminated string.
295  *
296  * The offset may point to the lead surrogate unit
297  * for a supplementary code point, in which case the macro will read
298  * the following trail surrogate as well.
299  * If the offset points to a trail surrogate or
300  * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
301  *
302  * @param s const UChar * string
303  * @param i string offset, must be i<length
304  * @param length string length
305  * @param c output UChar32 variable
306  * @see U16_NEXT_UNSAFE
307  * @stable ICU 2.4
308  */
309 #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
310     (c)=(s)[(i)++]; \
311     if(U16_IS_LEAD(c)) { \
312         uint16_t __c2; \
313         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
314             ++(i); \
315             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
316         } \
317     } \
318 } UPRV_BLOCK_MACRO_END
319 
320 /**
321  * Get a code point from a string at a code point boundary offset,
322  * and advance the offset to the next code point boundary.
323  * (Post-incrementing forward iteration.)
324  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
325  *
326  * The length can be negative for a NUL-terminated string.
327  *
328  * The offset may point to the lead surrogate unit
329  * for a supplementary code point, in which case the macro will read
330  * the following trail surrogate as well.
331  * If the offset points to a trail surrogate or
332  * to a single, unpaired lead surrogate, then c is set to U+FFFD.
333  *
334  * @param s const UChar * string
335  * @param i string offset, must be i<length
336  * @param length string length
337  * @param c output UChar32 variable
338  * @see U16_NEXT_UNSAFE
339  * @stable ICU 60
340  */
341 #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
342     (c)=(s)[(i)++]; \
343     if(U16_IS_SURROGATE(c)) { \
344         uint16_t __c2; \
345         if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
346             ++(i); \
347             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
348         } else { \
349             (c)=0xfffd; \
350         } \
351     } \
352 } UPRV_BLOCK_MACRO_END
353 
354 /**
355  * Append a code point to a string, overwriting 1 or 2 code units.
356  * The offset points to the current end of the string contents
357  * and is advanced (post-increment).
358  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
359  * Otherwise, the result is undefined.
360  *
361  * @param s const UChar * string buffer
362  * @param i string offset
363  * @param c code point to append
364  * @see U16_APPEND
365  * @stable ICU 2.4
366  */
367 #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
368     if((uint32_t)(c)<=0xffff) { \
369         (s)[(i)++]=(uint16_t)(c); \
370     } else { \
371         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
372         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
373     } \
374 } UPRV_BLOCK_MACRO_END
375 
376 /**
377  * Append a code point to a string, overwriting 1 or 2 code units.
378  * The offset points to the current end of the string contents
379  * and is advanced (post-increment).
380  * "Safe" macro, checks for a valid code point.
381  * If a surrogate pair is written, checks for sufficient space in the string.
382  * If the code point is not valid or a trail surrogate does not fit,
383  * then isError is set to true.
384  *
385  * @param s const UChar * string buffer
386  * @param i string offset, must be i<capacity
387  * @param capacity size of the string buffer
388  * @param c code point to append
389  * @param isError output UBool set to true if an error occurs, otherwise not modified
390  * @see U16_APPEND_UNSAFE
391  * @stable ICU 2.4
392  */
393 #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
394     if((uint32_t)(c)<=0xffff) { \
395         (s)[(i)++]=(uint16_t)(c); \
396     } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
397         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
398         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
399     } else /* c>0x10ffff or not enough space */ { \
400         (isError)=true; \
401     } \
402 } UPRV_BLOCK_MACRO_END
403 
404 /**
405  * Advance the string offset from one code point boundary to the next.
406  * (Post-incrementing iteration.)
407  * "Unsafe" macro, assumes well-formed UTF-16.
408  *
409  * @param s const UChar * string
410  * @param i string offset
411  * @see U16_FWD_1
412  * @stable ICU 2.4
413  */
414 #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
415     if(U16_IS_LEAD((s)[(i)++])) { \
416         ++(i); \
417     } \
418 } UPRV_BLOCK_MACRO_END
419 
420 /**
421  * Advance the string offset from one code point boundary to the next.
422  * (Post-incrementing iteration.)
423  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
424  *
425  * The length can be negative for a NUL-terminated string.
426  *
427  * @param s const UChar * string
428  * @param i string offset, must be i<length
429  * @param length string length
430  * @see U16_FWD_1_UNSAFE
431  * @stable ICU 2.4
432  */
433 #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
434     if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
435         ++(i); \
436     } \
437 } UPRV_BLOCK_MACRO_END
438 
439 /**
440  * Advance the string offset from one code point boundary to the n-th next one,
441  * i.e., move forward by n code points.
442  * (Post-incrementing iteration.)
443  * "Unsafe" macro, assumes well-formed UTF-16.
444  *
445  * @param s const UChar * string
446  * @param i string offset
447  * @param n number of code points to skip
448  * @see U16_FWD_N
449  * @stable ICU 2.4
450  */
451 #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
452     int32_t __N=(n); \
453     while(__N>0) { \
454         U16_FWD_1_UNSAFE(s, i); \
455         --__N; \
456     } \
457 } UPRV_BLOCK_MACRO_END
458 
459 /**
460  * Advance the string offset from one code point boundary to the n-th next one,
461  * i.e., move forward by n code points.
462  * (Post-incrementing iteration.)
463  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
464  *
465  * The length can be negative for a NUL-terminated string.
466  *
467  * @param s const UChar * string
468  * @param i int32_t string offset, must be i<length
469  * @param length int32_t string length
470  * @param n number of code points to skip
471  * @see U16_FWD_N_UNSAFE
472  * @stable ICU 2.4
473  */
474 #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
475     int32_t __N=(n); \
476     while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
477         U16_FWD_1(s, i, length); \
478         --__N; \
479     } \
480 } UPRV_BLOCK_MACRO_END
481 
482 /**
483  * Adjust a random-access offset to a code point boundary
484  * at the start of a code point.
485  * If the offset points to the trail surrogate of a surrogate pair,
486  * then the offset is decremented.
487  * Otherwise, it is not modified.
488  * "Unsafe" macro, assumes well-formed UTF-16.
489  *
490  * @param s const UChar * string
491  * @param i string offset
492  * @see U16_SET_CP_START
493  * @stable ICU 2.4
494  */
495 #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
496     if(U16_IS_TRAIL((s)[i])) { \
497         --(i); \
498     } \
499 } UPRV_BLOCK_MACRO_END
500 
501 /**
502  * Adjust a random-access offset to a code point boundary
503  * at the start of a code point.
504  * If the offset points to the trail surrogate of a surrogate pair,
505  * then the offset is decremented.
506  * Otherwise, it is not modified.
507  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
508  *
509  * @param s const UChar * string
510  * @param start starting string offset (usually 0)
511  * @param i string offset, must be start<=i
512  * @see U16_SET_CP_START_UNSAFE
513  * @stable ICU 2.4
514  */
515 #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
516     if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
517         --(i); \
518     } \
519 } UPRV_BLOCK_MACRO_END
520 
521 /* definitions with backward iteration -------------------------------------- */
522 
523 /**
524  * Move the string offset from one code point boundary to the previous one
525  * and get the code point between them.
526  * (Pre-decrementing backward iteration.)
527  * "Unsafe" macro, assumes well-formed UTF-16.
528  *
529  * The input offset may be the same as the string length.
530  * If the offset is behind a trail surrogate unit
531  * for a supplementary code point, then the macro will read
532  * the preceding lead surrogate as well.
533  * If the offset is behind a lead surrogate, then that itself
534  * will be returned as the code point.
535  * The result is undefined if the offset is behind a single, unpaired trail surrogate.
536  *
537  * @param s const UChar * string
538  * @param i string offset
539  * @param c output UChar32 variable
540  * @see U16_PREV
541  * @stable ICU 2.4
542  */
543 #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
544     (c)=(s)[--(i)]; \
545     if(U16_IS_TRAIL(c)) { \
546         (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
547     } \
548 } UPRV_BLOCK_MACRO_END
549 
550 /**
551  * Move the string offset from one code point boundary to the previous one
552  * and get the code point between them.
553  * (Pre-decrementing backward iteration.)
554  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
555  *
556  * The input offset may be the same as the string length.
557  * If the offset is behind a trail surrogate unit
558  * for a supplementary code point, then the macro will read
559  * the preceding lead surrogate as well.
560  * If the offset is behind a lead surrogate or behind a single, unpaired
561  * trail surrogate, then c is set to that unpaired surrogate.
562  *
563  * @param s const UChar * string
564  * @param start starting string offset (usually 0)
565  * @param i string offset, must be start<i
566  * @param c output UChar32 variable
567  * @see U16_PREV_UNSAFE
568  * @stable ICU 2.4
569  */
570 #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
571     (c)=(s)[--(i)]; \
572     if(U16_IS_TRAIL(c)) { \
573         uint16_t __c2; \
574         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
575             --(i); \
576             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
577         } \
578     } \
579 } UPRV_BLOCK_MACRO_END
580 
581 /**
582  * Move the string offset from one code point boundary to the previous one
583  * and get the code point between them.
584  * (Pre-decrementing backward iteration.)
585  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
586  *
587  * The input offset may be the same as the string length.
588  * If the offset is behind a trail surrogate unit
589  * for a supplementary code point, then the macro will read
590  * the preceding lead surrogate as well.
591  * If the offset is behind a lead surrogate or behind a single, unpaired
592  * trail surrogate, then c is set to U+FFFD.
593  *
594  * @param s const UChar * string
595  * @param start starting string offset (usually 0)
596  * @param i string offset, must be start<i
597  * @param c output UChar32 variable
598  * @see U16_PREV_UNSAFE
599  * @stable ICU 60
600  */
601 #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
602     (c)=(s)[--(i)]; \
603     if(U16_IS_SURROGATE(c)) { \
604         uint16_t __c2; \
605         if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
606             --(i); \
607             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
608         } else { \
609             (c)=0xfffd; \
610         } \
611     } \
612 } UPRV_BLOCK_MACRO_END
613 
614 /**
615  * Move the string offset from one code point boundary to the previous one.
616  * (Pre-decrementing backward iteration.)
617  * The input offset may be the same as the string length.
618  * "Unsafe" macro, assumes well-formed UTF-16.
619  *
620  * @param s const UChar * string
621  * @param i string offset
622  * @see U16_BACK_1
623  * @stable ICU 2.4
624  */
625 #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
626     if(U16_IS_TRAIL((s)[--(i)])) { \
627         --(i); \
628     } \
629 } UPRV_BLOCK_MACRO_END
630 
631 /**
632  * Move the string offset from one code point boundary to the previous one.
633  * (Pre-decrementing backward iteration.)
634  * The input offset may be the same as the string length.
635  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
636  *
637  * @param s const UChar * string
638  * @param start starting string offset (usually 0)
639  * @param i string offset, must be start<i
640  * @see U16_BACK_1_UNSAFE
641  * @stable ICU 2.4
642  */
643 #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
644     if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
645         --(i); \
646     } \
647 } UPRV_BLOCK_MACRO_END
648 
649 /**
650  * Move the string offset from one code point boundary to the n-th one before it,
651  * i.e., move backward by n code points.
652  * (Pre-decrementing backward iteration.)
653  * The input offset may be the same as the string length.
654  * "Unsafe" macro, assumes well-formed UTF-16.
655  *
656  * @param s const UChar * string
657  * @param i string offset
658  * @param n number of code points to skip
659  * @see U16_BACK_N
660  * @stable ICU 2.4
661  */
662 #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
663     int32_t __N=(n); \
664     while(__N>0) { \
665         U16_BACK_1_UNSAFE(s, i); \
666         --__N; \
667     } \
668 } UPRV_BLOCK_MACRO_END
669 
670 /**
671  * Move the string offset from one code point boundary to the n-th one before it,
672  * i.e., move backward by n code points.
673  * (Pre-decrementing backward iteration.)
674  * The input offset may be the same as the string length.
675  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
676  *
677  * @param s const UChar * string
678  * @param start start of string
679  * @param i string offset, must be start<i
680  * @param n number of code points to skip
681  * @see U16_BACK_N_UNSAFE
682  * @stable ICU 2.4
683  */
684 #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
685     int32_t __N=(n); \
686     while(__N>0 && (i)>(start)) { \
687         U16_BACK_1(s, start, i); \
688         --__N; \
689     } \
690 } UPRV_BLOCK_MACRO_END
691 
692 /**
693  * Adjust a random-access offset to a code point boundary after a code point.
694  * If the offset is behind the lead surrogate of a surrogate pair,
695  * then the offset is incremented.
696  * Otherwise, it is not modified.
697  * The input offset may be the same as the string length.
698  * "Unsafe" macro, assumes well-formed UTF-16.
699  *
700  * @param s const UChar * string
701  * @param i string offset
702  * @see U16_SET_CP_LIMIT
703  * @stable ICU 2.4
704  */
705 #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
706     if(U16_IS_LEAD((s)[(i)-1])) { \
707         ++(i); \
708     } \
709 } UPRV_BLOCK_MACRO_END
710 
711 /**
712  * Adjust a random-access offset to a code point boundary after a code point.
713  * If the offset is behind the lead surrogate of a surrogate pair,
714  * then the offset is incremented.
715  * Otherwise, it is not modified.
716  * The input offset may be the same as the string length.
717  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
718  *
719  * The length can be negative for a NUL-terminated string.
720  *
721  * @param s const UChar * string
722  * @param start int32_t starting string offset (usually 0)
723  * @param i int32_t string offset, start<=i<=length
724  * @param length int32_t string length
725  * @see U16_SET_CP_LIMIT_UNSAFE
726  * @stable ICU 2.4
727  */
728 #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
729     if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
730         ++(i); \
731     } \
732 } UPRV_BLOCK_MACRO_END
733 
734 #endif
735