1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: bytestriebuilder.h 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010sep25 14 * created by: Markus W. Scherer 15 */ 16 17 /** 18 * \file 19 * \brief C++ API: Builder for icu::BytesTrie 20 */ 21 22 #ifndef __BYTESTRIEBUILDER_H__ 23 #define __BYTESTRIEBUILDER_H__ 24 25 #include "unicode/utypes.h" 26 27 #if U_SHOW_CPLUSPLUS_API 28 29 #include "unicode/bytestrie.h" 30 #include "unicode/stringpiece.h" 31 #include "unicode/stringtriebuilder.h" 32 33 class BytesTrieTest; 34 35 U_NAMESPACE_BEGIN 36 37 class BytesTrieElement; 38 class CharString; 39 /** 40 * Builder class for BytesTrie. 41 * 42 * This class is not intended for public subclassing. 43 * @stable ICU 4.8 44 */ 45 class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { 46 public: 47 /** 48 * Constructs an empty builder. 49 * @param errorCode Standard ICU error code. 50 * @stable ICU 4.8 51 */ 52 BytesTrieBuilder(UErrorCode &errorCode); 53 54 /** 55 * Destructor. 56 * @stable ICU 4.8 57 */ 58 virtual ~BytesTrieBuilder(); 59 60 /** 61 * Adds a (byte sequence, value) pair. 62 * The byte sequence must be unique. 63 * The bytes will be copied; the builder does not keep 64 * a reference to the input StringPiece or its data(). 65 * @param s The input byte sequence. 66 * @param value The value associated with this byte sequence. 67 * @param errorCode Standard ICU error code. Its input value must 68 * pass the U_SUCCESS() test, or else the function returns 69 * immediately. Check for U_FAILURE() on output or use with 70 * function chaining. (See User Guide for details.) 71 * @return *this 72 * @stable ICU 4.8 73 */ 74 BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode); 75 76 /** 77 * Builds a BytesTrie for the add()ed data. 78 * Once built, no further data can be add()ed until clear() is called. 79 * 80 * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 81 * must have been add()ed. 82 * 83 * This method passes ownership of the builder's internal result array to the new trie object. 84 * Another call to any build() variant will re-serialize the trie. 85 * After clear() has been called, a new array will be used as well. 86 * @param buildOption Build option, see UStringTrieBuildOption. 87 * @param errorCode Standard ICU error code. Its input value must 88 * pass the U_SUCCESS() test, or else the function returns 89 * immediately. Check for U_FAILURE() on output or use with 90 * function chaining. (See User Guide for details.) 91 * @return A new BytesTrie for the add()ed data. 92 * @stable ICU 4.8 93 */ 94 BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 95 96 /** 97 * Builds a BytesTrie for the add()ed data and byte-serializes it. 98 * Once built, no further data can be add()ed until clear() is called. 99 * 100 * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 101 * must have been add()ed. 102 * 103 * Multiple calls to buildStringPiece() return StringPieces referring to the 104 * builder's same byte array, without rebuilding. 105 * If buildStringPiece() is called after build(), the trie will be 106 * re-serialized into a new array (because build() passes on ownership). 107 * If build() is called after buildStringPiece(), the trie object returned 108 * by build() will become the owner of the underlying string for the 109 * previously returned StringPiece. 110 * After clear() has been called, a new array will be used as well. 111 * @param buildOption Build option, see UStringTrieBuildOption. 112 * @param errorCode Standard ICU error code. Its input value must 113 * pass the U_SUCCESS() test, or else the function returns 114 * immediately. Check for U_FAILURE() on output or use with 115 * function chaining. (See User Guide for details.) 116 * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. 117 * @stable ICU 4.8 118 */ 119 StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 120 121 /** 122 * Removes all (byte sequence, value) pairs. 123 * New data can then be add()ed and a new trie can be built. 124 * @return *this 125 * @stable ICU 4.8 126 */ 127 BytesTrieBuilder &clear(); 128 129 private: 130 friend class ::BytesTrieTest; 131 132 BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor 133 BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator 134 135 void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 136 137 virtual int32_t getElementStringLength(int32_t i) const; 138 virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const; 139 virtual int32_t getElementValue(int32_t i) const; 140 141 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; 142 143 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; 144 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; 145 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const; 146 matchNodesCanHaveValues()147 virtual UBool matchNodesCanHaveValues() const { return false; } 148 getMaxBranchLinearSubNodeLength()149 virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } getMinLinearMatch()150 virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } getMaxLinearMatchLength()151 virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } 152 153 /** 154 * @internal (private) 155 */ 156 class BTLinearMatchNode : public LinearMatchNode { 157 public: 158 BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); 159 virtual UBool operator==(const Node &other) const; 160 virtual void write(StringTrieBuilder &builder); 161 private: 162 const char *s; 163 }; 164 165 virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 166 Node *nextNode) const; 167 168 UBool ensureCapacity(int32_t length); 169 virtual int32_t write(int32_t byte); 170 int32_t write(const char *b, int32_t length); 171 virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length); 172 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); 173 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); 174 virtual int32_t writeDeltaTo(int32_t jumpTarget); 175 static int32_t internalEncodeDelta(int32_t i, char intBytes[]); 176 177 CharString *strings; // Pointer not object so we need not #include internal charstr.h. 178 BytesTrieElement *elements; 179 int32_t elementsCapacity; 180 int32_t elementsLength; 181 182 // Byte serialization of the trie. 183 // Grows from the back: bytesLength measures from the end of the buffer! 184 char *bytes; 185 int32_t bytesCapacity; 186 int32_t bytesLength; 187 }; 188 189 U_NAMESPACE_END 190 191 #endif /* U_SHOW_CPLUSPLUS_API */ 192 193 #endif // __BYTESTRIEBUILDER_H__ 194