1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2009-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  n2builder.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2009nov25
16 *   created by: Markus W. Scherer
17 */
18 
19 #ifndef __N2BUILDER_H__
20 #define __N2BUILDER_H__
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_NORMALIZATION
25 
26 #include "unicode/errorcode.h"
27 #include "unicode/umutablecptrie.h"
28 #include "unicode/unistr.h"
29 #include "normalizer2impl.h"  // for IX_COUNT
30 #include "toolutil.h"
31 #include "norms.h"
32 
33 U_NAMESPACE_BEGIN
34 
35 extern UBool beVerbose, haveCopyright;
36 
37 class Normalizer2DataBuilder {
38 public:
39     Normalizer2DataBuilder(UErrorCode &errorCode);
40     ~Normalizer2DataBuilder();
41 
42     enum OverrideHandling {
43         OVERRIDE_NONE,
44         OVERRIDE_ANY,
45         OVERRIDE_PREVIOUS
46     };
47 
48     void setOverrideHandling(OverrideHandling oh);
49 
50     enum Optimization {
51         OPTIMIZE_NORMAL,
52         OPTIMIZE_FAST
53     };
54 
setOptimization(Optimization opt)55     void setOptimization(Optimization opt) { optimization=opt; }
56 
57     void setCC(UChar32 c, uint8_t cc);
58     void setOneWayMapping(UChar32 c, const UnicodeString &m);
59     void setRoundTripMapping(UChar32 c, const UnicodeString &m);
60     void removeMapping(UChar32 c);
61 
62     void setUnicodeVersion(const char *v);
63 
64     void writeBinaryFile(const char *filename);
65     void writeCSourceFile(const char *filename);
66     void writeDataFile(const char *filename, bool writeRemoved) const;
67 
68     static void computeDiff(const Normalizer2DataBuilder &b1,
69                             const Normalizer2DataBuilder &b2,
70                             Normalizer2DataBuilder &diff);
71 
72 private:
73     friend class Norm16Writer;
74 
75     Normalizer2DataBuilder(const Normalizer2DataBuilder &other) = delete;
76     Normalizer2DataBuilder &operator=(const Normalizer2DataBuilder &other) = delete;
77 
78     Norm *checkNormForMapping(Norm *p, UChar32 c);  // check for permitted overrides
79 
80     /**
81      * A starter character with a mapping does not have a composition boundary after it
82      * if the character itself combines-forward (which is tested by the caller of this function),
83      * or it is deleted (mapped to the empty string),
84      * or its mapping contains no starter,
85      * or the last starter combines-forward.
86      */
87     UBool mappingHasCompBoundaryAfter(const BuilderReorderingBuffer &buffer,
88                                       Norm::MappingType mappingType) const;
89     /** Returns TRUE if the mapping by itself recomposes, that is, it is not comp-normalized. */
90     UBool mappingRecomposes(const BuilderReorderingBuffer &buffer) const;
91     void postProcess(Norm &norm);
92 
93     void setSmallFCD(UChar32 c);
getMinNoNoDelta()94     int32_t getMinNoNoDelta() const {
95         return indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]-
96             ((2*Normalizer2Impl::MAX_DELTA+1)<<Normalizer2Impl::DELTA_SHIFT);
97     }
98     void writeNorm16(UMutableCPTrie *norm16Trie, UChar32 start, UChar32 end, Norm &norm);
99     void setHangulData(UMutableCPTrie *norm16Trie);
100     LocalUCPTriePointer processData();
101 
102     Norms norms;
103 
104     int32_t phase;
105     OverrideHandling overrideHandling;
106 
107     Optimization optimization;
108 
109     int32_t indexes[Normalizer2Impl::IX_COUNT];
110     uint8_t *norm16TrieBytes;
111     int32_t norm16TrieLength;
112     UnicodeString extraData;
113     uint8_t smallFCD[0x100];
114 
115     UVersionInfo unicodeVersion;
116 };
117 
118 U_NAMESPACE_END
119 
120 #endif // #if !UCONFIG_NO_NORMALIZATION
121 
122 #endif  // __N2BUILDER_H__
123