1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2007-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File DTPTNGEN.H
10 *
11 *******************************************************************************
12 */
13 
14 #ifndef __DTPTNGEN_IMPL_H__
15 #define __DTPTNGEN_IMPL_H__
16 
17 #include "unicode/udatpg.h"
18 
19 #include "unicode/strenum.h"
20 #include "unicode/unistr.h"
21 #include "uvector.h"
22 
23 // TODO(claireho): Split off Builder class.
24 // TODO(claireho): If splitting off Builder class: As subclass or independent?
25 
26 #define MAX_PATTERN_ENTRIES 52
27 #define MAX_CLDR_FIELD_LEN  60
28 #define MAX_DT_TOKEN        50
29 #define MAX_RESOURCE_FIELD  12
30 #define MAX_AVAILABLE_FORMATS  12
31 #define NONE          0
32 #define EXTRA_FIELD   0x10000
33 #define MISSING_FIELD  0x1000
34 #define MAX_STRING_ENUMERATION  200
35 #define SINGLE_QUOTE      ((UChar)0x0027)
36 #define FORWARDSLASH      ((UChar)0x002F)
37 #define BACKSLASH         ((UChar)0x005C)
38 #define SPACE             ((UChar)0x0020)
39 #define QUOTATION_MARK    ((UChar)0x0022)
40 #define ASTERISK          ((UChar)0x002A)
41 #define PLUSSITN          ((UChar)0x002B)
42 #define COMMA             ((UChar)0x002C)
43 #define HYPHEN            ((UChar)0x002D)
44 #define DOT               ((UChar)0x002E)
45 #define COLON             ((UChar)0x003A)
46 #define CAP_A             ((UChar)0x0041)
47 #define CAP_B             ((UChar)0x0042)
48 #define CAP_C             ((UChar)0x0043)
49 #define CAP_D             ((UChar)0x0044)
50 #define CAP_E             ((UChar)0x0045)
51 #define CAP_F             ((UChar)0x0046)
52 #define CAP_G             ((UChar)0x0047)
53 #define CAP_H             ((UChar)0x0048)
54 #define CAP_J             ((UChar)0x004A)
55 #define CAP_K             ((UChar)0x004B)
56 #define CAP_L             ((UChar)0x004C)
57 #define CAP_M             ((UChar)0x004D)
58 #define CAP_O             ((UChar)0x004F)
59 #define CAP_Q             ((UChar)0x0051)
60 #define CAP_S             ((UChar)0x0053)
61 #define CAP_T             ((UChar)0x0054)
62 #define CAP_U             ((UChar)0x0055)
63 #define CAP_V             ((UChar)0x0056)
64 #define CAP_W             ((UChar)0x0057)
65 #define CAP_X             ((UChar)0x0058)
66 #define CAP_Y             ((UChar)0x0059)
67 #define CAP_Z             ((UChar)0x005A)
68 #define LOWLINE           ((UChar)0x005F)
69 #define LOW_A             ((UChar)0x0061)
70 #define LOW_B             ((UChar)0x0062)
71 #define LOW_C             ((UChar)0x0063)
72 #define LOW_D             ((UChar)0x0064)
73 #define LOW_E             ((UChar)0x0065)
74 #define LOW_F             ((UChar)0x0066)
75 #define LOW_G             ((UChar)0x0067)
76 #define LOW_H             ((UChar)0x0068)
77 #define LOW_I             ((UChar)0x0069)
78 #define LOW_J             ((UChar)0x006A)
79 #define LOW_K             ((UChar)0x006B)
80 #define LOW_L             ((UChar)0x006C)
81 #define LOW_M             ((UChar)0x006D)
82 #define LOW_N             ((UChar)0x006E)
83 #define LOW_O             ((UChar)0x006F)
84 #define LOW_P             ((UChar)0x0070)
85 #define LOW_Q             ((UChar)0x0071)
86 #define LOW_R             ((UChar)0x0072)
87 #define LOW_S             ((UChar)0x0073)
88 #define LOW_T             ((UChar)0x0074)
89 #define LOW_U             ((UChar)0x0075)
90 #define LOW_V             ((UChar)0x0076)
91 #define LOW_W             ((UChar)0x0077)
92 #define LOW_X             ((UChar)0x0078)
93 #define LOW_Y             ((UChar)0x0079)
94 #define LOW_Z             ((UChar)0x007A)
95 #define DT_NARROW         -0x101
96 #define DT_SHORTER        -0x102
97 #define DT_SHORT          -0x103
98 #define DT_LONG           -0x104
99 #define DT_NUMERIC         0x100
100 #define DT_DELTA           0x10
101 
102 U_NAMESPACE_BEGIN
103 
104 const int32_t UDATPG_FRACTIONAL_MASK = 1<<UDATPG_FRACTIONAL_SECOND_FIELD;
105 const int32_t UDATPG_SECOND_AND_FRACTIONAL_MASK = (1<<UDATPG_SECOND_FIELD) | (1<<UDATPG_FRACTIONAL_SECOND_FIELD);
106 
107 typedef enum dtStrEnum {
108     DT_BASESKELETON,
109     DT_SKELETON,
110     DT_PATTERN
111 }dtStrEnum;
112 
113 typedef struct dtTypeElem {
114     UChar                  patternChar;
115     UDateTimePatternField  field;
116     int16_t                type;
117     int16_t                minLen;
118     int16_t                weight;
119 } dtTypeElem;
120 
121 // A compact storage mechanism for skeleton field strings.  Several dozen of these will be created
122 // for a typical DateTimePatternGenerator instance.
123 class SkeletonFields : public UMemory {
124 public:
125     SkeletonFields();
126     void clear();
127     void copyFrom(const SkeletonFields& other);
128     void clearField(int32_t field);
129     UChar getFieldChar(int32_t field) const;
130     int32_t getFieldLength(int32_t field) const;
131     void populate(int32_t field, const UnicodeString& value);
132     void populate(int32_t field, UChar repeatChar, int32_t repeatCount);
133     UBool isFieldEmpty(int32_t field) const;
134     UnicodeString& appendTo(UnicodeString& string) const;
135     UnicodeString& appendFieldTo(int32_t field, UnicodeString& string) const;
136     UChar getFirstChar() const;
137     inline UBool operator==(const SkeletonFields& other) const;
138     inline UBool operator!=(const SkeletonFields& other) const;
139 
140 private:
141     int8_t chars[UDATPG_FIELD_COUNT];
142     int8_t lengths[UDATPG_FIELD_COUNT];
143 };
144 
145 inline UBool SkeletonFields::operator==(const SkeletonFields& other) const {
146     return (uprv_memcmp(chars, other.chars, sizeof(chars)) == 0
147         && uprv_memcmp(lengths, other.lengths, sizeof(lengths)) == 0);
148 }
149 
150 inline UBool SkeletonFields::operator!=(const SkeletonFields& other) const {
151     return (! operator==(other));
152 }
153 
154 class PtnSkeleton : public UMemory {
155 public:
156     int32_t type[UDATPG_FIELD_COUNT];
157     SkeletonFields original;
158     SkeletonFields baseOriginal;
159     UBool addedDefaultDayPeriod;
160 
161     PtnSkeleton();
162     PtnSkeleton(const PtnSkeleton& other);
163     void copyFrom(const PtnSkeleton& other);
164     void clear();
165     UBool equals(const PtnSkeleton& other) const;
166     UnicodeString getSkeleton() const;
167     UnicodeString getBaseSkeleton() const;
168     UChar getFirstChar() const;
169 
170     // TODO: Why is this virtual, as well as the other destructors in this file? We don't want
171     // vtables when we don't use class objects polymorphically.
172     virtual ~PtnSkeleton();
173 };
174 
175 class PtnElem : public UMemory {
176 public:
177     UnicodeString basePattern;
178     LocalPointer<PtnSkeleton> skeleton;
179     UnicodeString pattern;
180     UBool         skeletonWasSpecified; // if specified in availableFormats, not derived
181     LocalPointer<PtnElem> next;
182 
183     PtnElem(const UnicodeString &basePattern, const UnicodeString &pattern);
184     virtual ~PtnElem();
185 };
186 
187 class FormatParser : public UMemory {
188 public:
189     UnicodeString items[MAX_DT_TOKEN];
190     int32_t itemNumber;
191 
192     FormatParser();
193     virtual ~FormatParser();
194     void set(const UnicodeString& patternString);
195     void getQuoteLiteral(UnicodeString& quote, int32_t *itemIndex);
196     UBool isPatternSeparator(const UnicodeString& field) const;
197     static UBool isQuoteLiteral(const UnicodeString& s);
getCanonicalIndex(const UnicodeString & s)198     static int32_t getCanonicalIndex(const UnicodeString& s) { return getCanonicalIndex(s, true); }
199     static int32_t getCanonicalIndex(const UnicodeString& s, UBool strict);
200 
201 private:
202    typedef enum TokenStatus {
203        START,
204        ADD_TOKEN,
205        SYNTAX_ERROR,
206        DONE
207    } TokenStatus;
208 
209    TokenStatus status;
210    virtual TokenStatus setTokens(const UnicodeString& pattern, int32_t startPos, int32_t *len);
211 };
212 
213 class DistanceInfo : public UMemory {
214 public:
215     int32_t missingFieldMask;
216     int32_t extraFieldMask;
217 
DistanceInfo()218     DistanceInfo() {}
219     virtual ~DistanceInfo();
clear()220     void clear() { missingFieldMask = extraFieldMask = 0; }
221     void setTo(const DistanceInfo& other);
addMissing(int32_t field)222     void addMissing(int32_t field) { missingFieldMask |= (1<<field); }
addExtra(int32_t field)223     void addExtra(int32_t field) { extraFieldMask |= (1<<field); }
224 };
225 
226 class DateTimeMatcher: public UMemory {
227 public:
228     PtnSkeleton skeleton;
229 
230     void getBasePattern(UnicodeString& basePattern);
231     UnicodeString getPattern();
232     void set(const UnicodeString& pattern, FormatParser* fp);
233     void set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton& skeleton);
234     void copyFrom(const PtnSkeleton& skeleton);
235     void copyFrom();
236     PtnSkeleton* getSkeletonPtr();
237     UBool equals(const DateTimeMatcher* other) const;
238     int32_t getDistance(const DateTimeMatcher& other, int32_t includeMask, DistanceInfo& distanceInfo) const;
239     DateTimeMatcher();
240     DateTimeMatcher(const DateTimeMatcher& other);
241     DateTimeMatcher& operator=(const DateTimeMatcher& other);
242     virtual ~DateTimeMatcher();
243     int32_t getFieldMask() const;
244 };
245 
246 class PatternMap : public UMemory {
247 public:
248     PtnElem *boot[MAX_PATTERN_ENTRIES];
249     PatternMap();
250     virtual  ~PatternMap();
251     void  add(const UnicodeString& basePattern, const PtnSkeleton& skeleton, const UnicodeString& value, UBool skeletonWasSpecified, UErrorCode& status);
252     const UnicodeString* getPatternFromBasePattern(const UnicodeString& basePattern, UBool& skeletonWasSpecified) const;
253     const UnicodeString* getPatternFromSkeleton(const PtnSkeleton& skeleton, const PtnSkeleton** specifiedSkeletonPtr = 0) const;
254     void copyFrom(const PatternMap& other, UErrorCode& status);
255     PtnElem* getHeader(UChar baseChar) const;
256     UBool equals(const PatternMap& other) const;
257 private:
258     UBool isDupAllowed;
259     PtnElem*  getDuplicateElem(const UnicodeString& basePattern, const PtnSkeleton& skeleton, PtnElem *baseElem);
260 }; // end  PatternMap
261 
262 class PatternMapIterator : public UMemory {
263 public:
264     PatternMapIterator(UErrorCode &status);
265     virtual ~PatternMapIterator();
266     void set(PatternMap& patternMap);
267     PtnSkeleton* getSkeleton() const;
268     UBool hasNext() const;
269     DateTimeMatcher& next();
270 private:
271     int32_t bootIndex;
272     PtnElem *nodePtr;
273     LocalPointer<DateTimeMatcher> matcher;
274     PatternMap *patternMap;
275 };
276 
277 class DTSkeletonEnumeration : public StringEnumeration {
278 public:
279     DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum type, UErrorCode& status);
280     virtual ~DTSkeletonEnumeration();
281     static UClassID U_EXPORT2 getStaticClassID(void);
282     virtual UClassID getDynamicClassID(void) const;
283     virtual const UnicodeString* snext(UErrorCode& status);
284     virtual void reset(UErrorCode& status);
285     virtual int32_t count(UErrorCode& status) const;
286 private:
287     int32_t pos;
288     UBool isCanonicalItem(const UnicodeString& item);
289     LocalPointer<UVector> fSkeletons;
290 };
291 
292 class DTRedundantEnumeration : public StringEnumeration {
293 public:
294     DTRedundantEnumeration();
295     virtual ~DTRedundantEnumeration();
296     static UClassID U_EXPORT2 getStaticClassID(void);
297     virtual UClassID getDynamicClassID(void) const;
298     virtual const UnicodeString* snext(UErrorCode& status);
299     virtual void reset(UErrorCode& status);
300     virtual int32_t count(UErrorCode& status) const;
301     void add(const UnicodeString &pattern, UErrorCode& status);
302 private:
303     int32_t pos;
304     UBool isCanonicalItem(const UnicodeString& item) const;
305     LocalPointer<UVector> fPatterns;
306 };
307 
308 U_NAMESPACE_END
309 
310 #endif
311