1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2002-2010, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  propsvec.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2002feb22
16 *   created by: Markus W. Scherer
17 *
18 *   Store bits (Unicode character properties) in bit set vectors.
19 */
20 
21 #ifndef __UPROPSVEC_H__
22 #define __UPROPSVEC_H__
23 
24 #include "unicode/utypes.h"
25 #include "utrie.h"
26 #include "utrie2.h"
27 
28 U_CDECL_BEGIN
29 
30 /**
31  * Unicode Properties Vectors associated with code point ranges.
32  *
33  * Rows of uint32_t integers in a contiguous array store
34  * the range limits and the properties vectors.
35  *
36  * Logically, each row has a certain number of uint32_t values,
37  * which is set via the upvec_open() "columns" parameter.
38  *
39  * Internally, two additional columns are stored.
40  * In each internal row,
41  * row[0] contains the start code point and
42  * row[1] contains the limit code point,
43  * which is the start of the next range.
44  *
45  * Initially, there is only one "normal" row for
46  * range [0..0x110000[ with values 0.
47  * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
48  *
49  * It would be possible to store only one range boundary per row,
50  * but self-contained rows allow to later sort them by contents.
51  */
52 struct UPropsVectors;
53 typedef struct UPropsVectors UPropsVectors;
54 
55 /*
56  * Special pseudo code points for storing the initialValue and the errorValue,
57  * which are used to initialize a UTrie2 or similar.
58  */
59 #define UPVEC_FIRST_SPECIAL_CP 0x110000
60 #define UPVEC_INITIAL_VALUE_CP 0x110000
61 #define UPVEC_ERROR_VALUE_CP 0x110001
62 #define UPVEC_MAX_CP 0x110001
63 
64 /*
65  * Special pseudo code point used in upvec_compact() signalling the end of
66  * delivering special values and the beginning of delivering real ones.
67  * Stable value, unlike UPVEC_MAX_CP which might grow over time.
68  */
69 #define UPVEC_START_REAL_VALUES_CP 0x200000
70 
71 /*
72  * Open a UPropsVectors object.
73  * @param columns Number of value integers (uint32_t) per row.
74  */
75 U_CAPI UPropsVectors * U_EXPORT2
76 upvec_open(int32_t columns, UErrorCode *pErrorCode);
77 
78 U_CAPI void U_EXPORT2
79 upvec_close(UPropsVectors *pv);
80 
81 /*
82  * In rows for code points [start..end], select the column,
83  * reset the mask bits and set the value bits (ANDed with the mask).
84  *
85  * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
86  */
87 U_CAPI void U_EXPORT2
88 upvec_setValue(UPropsVectors *pv,
89                UChar32 start, UChar32 end,
90                int32_t column,
91                uint32_t value, uint32_t mask,
92                UErrorCode *pErrorCode);
93 
94 /*
95  * Logically const but must not be used on the same pv concurrently!
96  * Always returns 0 if called after upvec_compact().
97  */
98 U_CAPI uint32_t U_EXPORT2
99 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
100 
101 /*
102  * pRangeStart and pRangeEnd can be NULL.
103  * @return NULL if rowIndex out of range and for illegal arguments,
104  *         or if called after upvec_compact()
105  */
106 U_CAPI uint32_t * U_EXPORT2
107 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
108              UChar32 *pRangeStart, UChar32 *pRangeEnd);
109 
110 /*
111  * Compact the vectors:
112  * - modify the memory
113  * - keep only unique vectors
114  * - store them contiguously from the beginning of the memory
115  * - for each (non-unique) row, call the handler function
116  *
117  * The handler's rowIndex is the index of the row in the compacted
118  * memory block.
119  * (Therefore, it starts at 0 increases in increments of the columns value.)
120  *
121  * In a first phase, only special values are delivered (each exactly once),
122  * with start==end both equalling a special pseudo code point.
123  * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
124  * where rowIndex is the length of the compacted array,
125  * and the row is arbitrary (but not NULL).
126  * Then, in the second phase, the handler is called for each row of real values.
127  */
128 typedef void U_CALLCONV
129 UPVecCompactHandler(void *context,
130                     UChar32 start, UChar32 end,
131                     int32_t rowIndex, uint32_t *row, int32_t columns,
132                     UErrorCode *pErrorCode);
133 
134 U_CAPI void U_EXPORT2
135 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
136 
137 /*
138  * Get the vectors array after calling upvec_compact().
139  * The caller must not modify nor release the returned array.
140  * Returns NULL if called before upvec_compact().
141  */
142 U_CAPI const uint32_t * U_EXPORT2
143 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
144 
145 /*
146  * Get a clone of the vectors array after calling upvec_compact().
147  * The caller owns the returned array and must uprv_free() it.
148  * Returns NULL if called before upvec_compact().
149  */
150 U_CAPI uint32_t * U_EXPORT2
151 upvec_cloneArray(const UPropsVectors *pv,
152                  int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
153 
154 /*
155  * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
156  * vectors array, and freeze the trie.
157  */
158 U_CAPI UTrie2 * U_EXPORT2
159 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
160 
161 struct UPVecToUTrie2Context {
162     UTrie2 *trie;
163     int32_t initialValue;
164     int32_t errorValue;
165     int32_t maxValue;
166 };
167 typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
168 
169 /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
170 U_CAPI void U_CALLCONV
171 upvec_compactToUTrie2Handler(void *context,
172                              UChar32 start, UChar32 end,
173                              int32_t rowIndex, uint32_t *row, int32_t columns,
174                              UErrorCode *pErrorCode);
175 
176 U_CDECL_END
177 
178 #endif
179