1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: October 30 2002
8 * Since: ICU 2.4
9 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
10 **********************************************************************
11 */
12 #include "propname.h"
13 #include "unicode/uchar.h"
14 #include "unicode/udata.h"
15 #include "unicode/uscript.h"
16 #include "umutex.h"
17 #include "cmemory.h"
18 #include "cstring.h"
19 #include "uarrsort.h"
20 #include "uinvchar.h"
21 
22 #define INCLUDED_FROM_PROPNAME_CPP
23 #include "propname_data.h"
24 
25 U_CDECL_BEGIN
26 
27 /**
28  * Get the next non-ignorable ASCII character from a property name
29  * and lowercases it.
30  * @return ((advance count for the name)<<8)|character
31  */
32 static inline int32_t
getASCIIPropertyNameChar(const char * name)33 getASCIIPropertyNameChar(const char *name) {
34     int32_t i;
35     char c;
36 
37     /* Ignore delimiters '-', '_', and ASCII White_Space */
38     for(i=0;
39         (c=name[i++])==0x2d || c==0x5f ||
40         c==0x20 || (0x09<=c && c<=0x0d);
41     ) {}
42 
43     if(c!=0) {
44         return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
45     } else {
46         return i<<8;
47     }
48 }
49 
50 /**
51  * Get the next non-ignorable EBCDIC character from a property name
52  * and lowercases it.
53  * @return ((advance count for the name)<<8)|character
54  */
55 static inline int32_t
getEBCDICPropertyNameChar(const char * name)56 getEBCDICPropertyNameChar(const char *name) {
57     int32_t i;
58     char c;
59 
60     /* Ignore delimiters '-', '_', and EBCDIC White_Space */
61     for(i=0;
62         (c=name[i++])==0x60 || c==0x6d ||
63         c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
64     ) {}
65 
66     if(c!=0) {
67         return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
68     } else {
69         return i<<8;
70     }
71 }
72 
73 /**
74  * Unicode property names and property value names are compared "loosely".
75  *
76  * UCD.html 4.0.1 says:
77  *   For all property names, property value names, and for property values for
78  *   Enumerated, Binary, or Catalog properties, use the following
79  *   loose matching rule:
80  *
81  *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
82  *
83  * This function does just that, for (char *) name strings.
84  * It is almost identical to ucnv_compareNames() but also ignores
85  * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
86  *
87  * @internal
88  */
89 
90 U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char * name1,const char * name2)91 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
92     int32_t rc, r1, r2;
93 
94     for(;;) {
95         r1=getASCIIPropertyNameChar(name1);
96         r2=getASCIIPropertyNameChar(name2);
97 
98         /* If we reach the ends of both strings then they match */
99         if(((r1|r2)&0xff)==0) {
100             return 0;
101         }
102 
103         /* Compare the lowercased characters */
104         if(r1!=r2) {
105             rc=(r1&0xff)-(r2&0xff);
106             if(rc!=0) {
107                 return rc;
108             }
109         }
110 
111         name1+=r1>>8;
112         name2+=r2>>8;
113     }
114 }
115 
116 U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char * name1,const char * name2)117 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
118     int32_t rc, r1, r2;
119 
120     for(;;) {
121         r1=getEBCDICPropertyNameChar(name1);
122         r2=getEBCDICPropertyNameChar(name2);
123 
124         /* If we reach the ends of both strings then they match */
125         if(((r1|r2)&0xff)==0) {
126             return 0;
127         }
128 
129         /* Compare the lowercased characters */
130         if(r1!=r2) {
131             rc=(r1&0xff)-(r2&0xff);
132             if(rc!=0) {
133                 return rc;
134             }
135         }
136 
137         name1+=r1>>8;
138         name2+=r2>>8;
139     }
140 }
141 
142 U_CDECL_END
143 
144 U_NAMESPACE_BEGIN
145 
findProperty(int32_t property)146 int32_t PropNameData::findProperty(int32_t property) {
147     int32_t i=1;  // valueMaps index, initially after numRanges
148     for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
149         // Read and skip the start and limit of this range.
150         int32_t start=valueMaps[i];
151         int32_t limit=valueMaps[i+1];
152         i+=2;
153         if(property<start) {
154             break;
155         }
156         if(property<limit) {
157             return i+(property-start)*2;
158         }
159         i+=(limit-start)*2;  // Skip all entries for this range.
160     }
161     return 0;
162 }
163 
findPropertyValueNameGroup(int32_t valueMapIndex,int32_t value)164 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
165     if(valueMapIndex==0) {
166         return 0;  // The property does not have named values.
167     }
168     ++valueMapIndex;  // Skip the BytesTrie offset.
169     int32_t numRanges=valueMaps[valueMapIndex++];
170     if(numRanges<0x10) {
171         // Ranges of values.
172         for(; numRanges>0; --numRanges) {
173             // Read and skip the start and limit of this range.
174             int32_t start=valueMaps[valueMapIndex];
175             int32_t limit=valueMaps[valueMapIndex+1];
176             valueMapIndex+=2;
177             if(value<start) {
178                 break;
179             }
180             if(value<limit) {
181                 return valueMaps[valueMapIndex+value-start];
182             }
183             valueMapIndex+=limit-start;  // Skip all entries for this range.
184         }
185     } else {
186         // List of values.
187         int32_t valuesStart=valueMapIndex;
188         int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
189         do {
190             int32_t v=valueMaps[valueMapIndex];
191             if(value<v) {
192                 break;
193             }
194             if(value==v) {
195                 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
196             }
197         } while(++valueMapIndex<nameGroupOffsetsStart);
198     }
199     return 0;
200 }
201 
getName(const char * nameGroup,int32_t nameIndex)202 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
203     int32_t numNames=*nameGroup++;
204     if(nameIndex<0 || numNames<=nameIndex) {
205         return NULL;
206     }
207     // Skip nameIndex names.
208     for(; nameIndex>0; --nameIndex) {
209         nameGroup=uprv_strchr(nameGroup, 0)+1;
210     }
211     if(*nameGroup==0) {
212         return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
213     }
214     return nameGroup;
215 }
216 
containsName(BytesTrie & trie,const char * name)217 UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
218     if(name==NULL) {
219         return FALSE;
220     }
221     UStringTrieResult result=USTRINGTRIE_NO_VALUE;
222     char c;
223     while((c=*name++)!=0) {
224         c=uprv_invCharToLowercaseAscii(c);
225         // Ignore delimiters '-', '_', and ASCII White_Space.
226         if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
227             continue;
228         }
229         if(!USTRINGTRIE_HAS_NEXT(result)) {
230             return FALSE;
231         }
232         result=trie.next((uint8_t)c);
233     }
234     return USTRINGTRIE_HAS_VALUE(result);
235 }
236 
getPropertyName(int32_t property,int32_t nameChoice)237 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
238     int32_t valueMapIndex=findProperty(property);
239     if(valueMapIndex==0) {
240         return NULL;  // Not a known property.
241     }
242     return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
243 }
244 
getPropertyValueName(int32_t property,int32_t value,int32_t nameChoice)245 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
246     int32_t valueMapIndex=findProperty(property);
247     if(valueMapIndex==0) {
248         return NULL;  // Not a known property.
249     }
250     int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
251     if(nameGroupOffset==0) {
252         return NULL;
253     }
254     return getName(nameGroups+nameGroupOffset, nameChoice);
255 }
256 
getPropertyOrValueEnum(int32_t bytesTrieOffset,const char * alias)257 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
258     BytesTrie trie(bytesTries+bytesTrieOffset);
259     if(containsName(trie, alias)) {
260         return trie.getValue();
261     } else {
262         return UCHAR_INVALID_CODE;
263     }
264 }
265 
getPropertyEnum(const char * alias)266 int32_t PropNameData::getPropertyEnum(const char *alias) {
267     return getPropertyOrValueEnum(0, alias);
268 }
269 
getPropertyValueEnum(int32_t property,const char * alias)270 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
271     int32_t valueMapIndex=findProperty(property);
272     if(valueMapIndex==0) {
273         return UCHAR_INVALID_CODE;  // Not a known property.
274     }
275     valueMapIndex=valueMaps[valueMapIndex+1];
276     if(valueMapIndex==0) {
277         return UCHAR_INVALID_CODE;  // The property does not have named values.
278     }
279     // valueMapIndex is the start of the property's valueMap,
280     // where the first word is the BytesTrie offset.
281     return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
282 }
283 U_NAMESPACE_END
284 
285 //----------------------------------------------------------------------
286 // Public API implementation
287 
288 U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,UPropertyNameChoice nameChoice)289 u_getPropertyName(UProperty property,
290                   UPropertyNameChoice nameChoice) {
291     U_NAMESPACE_USE
292     return PropNameData::getPropertyName(property, nameChoice);
293 }
294 
295 U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char * alias)296 u_getPropertyEnum(const char* alias) {
297     U_NAMESPACE_USE
298     return (UProperty)PropNameData::getPropertyEnum(alias);
299 }
300 
301 U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,int32_t value,UPropertyNameChoice nameChoice)302 u_getPropertyValueName(UProperty property,
303                        int32_t value,
304                        UPropertyNameChoice nameChoice) {
305     U_NAMESPACE_USE
306     return PropNameData::getPropertyValueName(property, value, nameChoice);
307 }
308 
309 U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,const char * alias)310 u_getPropertyValueEnum(UProperty property,
311                        const char* alias) {
312     U_NAMESPACE_USE
313     return PropNameData::getPropertyValueEnum(property, alias);
314 }
315 
316 U_CAPI const char*  U_EXPORT2
uscript_getName(UScriptCode scriptCode)317 uscript_getName(UScriptCode scriptCode){
318     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
319                                   U_LONG_PROPERTY_NAME);
320 }
321 
322 U_CAPI const char*  U_EXPORT2
uscript_getShortName(UScriptCode scriptCode)323 uscript_getShortName(UScriptCode scriptCode){
324     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
325                                   U_SHORT_PROPERTY_NAME);
326 }
327