1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ***************************************************************************
5  * Copyright (C) 2008-2015, International Business Machines Corporation
6  * and others. All Rights Reserved.
7  ***************************************************************************
8  *   file name:  uspoof_build.cpp
9  *   encoding:   UTF-8
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2008 Dec 8
14  *   created by: Andy Heninger
15  *
16  *   Unicode Spoof Detection Data Builder
17  *   Builder-related functions are kept in separate files so that applications not needing
18  *   the builder can more easily exclude them, typically by means of static linking.
19  *
20  *   There are three relatively independent sets of Spoof data,
21  *      Confusables,
22  *      Whole Script Confusables
23  *      ID character extensions.
24  *
25  *   The data tables for each are built separately, each from its own definitions
26  */
27 
28 #include "unicode/utypes.h"
29 #include "unicode/uspoof.h"
30 #include "unicode/unorm.h"
31 #include "unicode/uregex.h"
32 #include "unicode/ustring.h"
33 #include "cmemory.h"
34 #include "uspoof_impl.h"
35 #include "uhash.h"
36 #include "uvector.h"
37 #include "uassert.h"
38 #include "uarrsort.h"
39 #include "uspoof_conf.h"
40 
41 #if !UCONFIG_NO_NORMALIZATION
42 
43 U_NAMESPACE_USE
44 
45 // Defined in uspoof.cpp, initializes file-static variables.
46 U_CFUNC void uspoof_internalInitStatics(UErrorCode *status);
47 
48 // The main data building function
49 
50 U_CAPI USpoofChecker * U_EXPORT2
uspoof_openFromSource(const char * confusables,int32_t confusablesLen,const char *,int32_t,int32_t * errorType,UParseError * pe,UErrorCode * status)51 uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
52                       const char* /*confusablesWholeScript*/, int32_t /*confusablesWholeScriptLen*/,
53                       int32_t *errorType, UParseError *pe, UErrorCode *status) {
54     uspoof_internalInitStatics(status);
55     if (U_FAILURE(*status)) {
56         return NULL;
57     }
58 #if UCONFIG_NO_REGULAR_EXPRESSIONS
59     *status = U_UNSUPPORTED_ERROR;
60     return NULL;
61 #else
62     if (errorType!=NULL) {
63         *errorType = 0;
64     }
65     if (pe != NULL) {
66         pe->line = 0;
67         pe->offset = 0;
68         pe->preContext[0] = 0;
69         pe->postContext[0] = 0;
70     }
71 
72     // Set up a shell of a spoof detector, with empty data.
73     SpoofData *newSpoofData = new SpoofData(*status);
74 
75     if (newSpoofData == NULL) {
76         *status = U_MEMORY_ALLOCATION_ERROR;
77         return NULL;
78     }
79 
80     if (U_FAILURE(*status)) {
81         delete newSpoofData;
82         return NULL;
83     }
84     SpoofImpl *This = new SpoofImpl(newSpoofData, *status);
85 
86     if (This == NULL) {
87         *status = U_MEMORY_ALLOCATION_ERROR;
88         delete newSpoofData; // explicit delete as the destructor for SpoofImpl won't be called.
89         return NULL;
90     }
91 
92     if (U_FAILURE(*status)) {
93         delete This; // no delete for newSpoofData, as the SpoofImpl destructor will delete it.
94         return NULL;
95     }
96 
97     // Compile the binary data from the source (text) format.
98     ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status);
99 
100     if (U_FAILURE(*status)) {
101         delete This;
102         This = NULL;
103     }
104     return (USpoofChecker *)This;
105 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
106 }
107 
108 #endif
109