1 /*
2    Copyright (C) 2020 MariaDB Corporation
3 
4    This program is free software; you can redistribute it and/or
5    modify it under the terms of the GNU General Public License
6    as published by the Free Software Foundation; version 2 of
7    the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17    MA 02110-1301, USA. */
18 #ifndef COLLATION_H_INCLUDED
19 #define COLLATION_H_INCLUDED
20 
21 #include "mcsconfig.h"
22 #include "exceptclasses.h"
23 #include "conststring.h"
24 
25 /*
26   Redefine definitions used by MariaDB m_ctype.h.
27   This is needed to avoid including <mariadb.h> and <my_sys.h>,
28   which conflict with many MCS and boost headers.
29 */
30 
31 #ifndef FALSE
32 #define FALSE (0)
33 #endif
34 
35 #ifndef TRUE
36 #define TRUE  (1)
37 #endif
38 
39 #ifndef DBUG_ASSERT
40 #define DBUG_ASSERT(x)  idbassert(x)
41 #define DBUG_ASSERT_TEMPORARILY_DEFINED
42 #endif
43 
44 #ifndef MYSQL_PLUGIN_IMPORT
45 #if (defined(_WIN32) && defined(MYSQL_DYNAMIC_PLUGIN))
46 #define MYSQL_PLUGIN_IMPORT __declspec(dllimport)
47 #else
48 #define MYSQL_PLUGIN_IMPORT
49 #endif
50 #endif
51 
52 typedef long long int longlong;
53 typedef unsigned long long int ulonglong;
54 typedef uint32_t uint32;
55 typedef uint16_t uint16;
56 typedef char my_bool;
57 typedef unsigned char uchar;
58 
59 #if defined(__GNUC__) && !defined(_lint)
60 typedef char	pchar;		/* Mixed prototypes can take char */
61 typedef char	puchar;		/* Mixed prototypes can take char */
62 typedef char	pbool;		/* Mixed prototypes can take char */
63 typedef short	pshort;		/* Mixed prototypes can take short int */
64 typedef float	pfloat;		/* Mixed prototypes can take float */
65 #else
66 typedef int	pchar;		/* Mixed prototypes can't take char */
67 typedef uint	puchar;		/* Mixed prototypes can't take char */
68 typedef int	pbool;		/* Mixed prototypes can't take char */
69 typedef int	pshort;		/* Mixed prototypes can't take short int */
70 typedef double	pfloat;		/* Mixed prototypes can't take float */
71 #endif
72 
73 typedef const struct charset_info_st CHARSET_INFO;
74 extern "C" MYSQL_PLUGIN_IMPORT CHARSET_INFO *default_charset_info;
75 
76 #include "my_compiler.h"
77 #include "m_ctype.h"
78 
79 #undef FALSE
80 #undef TRUE
81 
82 #ifdef DBUG_ASSERT_TEMPORARILY_DEFINED
83 #undef DBUG_ASSERT
84 #endif
85 
86 
87 namespace datatypes
88 {
89 
90 class MariaDBHasher
91 {
92     static const ulong mPart1DefValue = 1;
93     static const ulong mPart2DefValue = 4;
94 
95     ulong mPart1;
96     ulong mPart2;
97 public:
MariaDBHasher()98     MariaDBHasher()
99         :mPart1(mPart1DefValue), mPart2(mPart2DefValue)
100     { }
add(CHARSET_INFO * cs,const char * str,size_t length)101     MariaDBHasher & add(CHARSET_INFO * cs, const char *str, size_t length)
102     {
103         cs->hash_sort((const uchar *) str, length, &mPart1, &mPart2);
104         return *this;
105     }
add(CHARSET_INFO * cs,const utils::ConstString & str)106     MariaDBHasher & add(CHARSET_INFO *cs, const utils::ConstString &str)
107     {
108         return add(cs, str.str(), str.length());
109     }
finalize()110     uint64_t finalize() const
111     {
112         return mPart1;
113     }
wasUsed()114     bool wasUsed() const
115     {
116         return mPart1 != mPart1DefValue || mPart2 != mPart2DefValue;
117     }
118 };
119 
120 
121 // A reference to MariaDB CHARSET_INFO.
122 
123 class Charset
124 {
125 protected:
126     const struct charset_info_st * mCharset;
127 public:
Charset(CHARSET_INFO & cs)128     Charset(CHARSET_INFO & cs) :mCharset(&cs) { }
Charset(CHARSET_INFO * cs)129     Charset(CHARSET_INFO *cs)
130        :mCharset(cs ? cs : &my_charset_bin)
131     { }
132     Charset(uint32_t charsetNumber);
getCharset()133     CHARSET_INFO & getCharset() const { return *mCharset; }
hash(const char * data,uint64_t len)134     uint32_t hash(const char *data, uint64_t len) const
135     {
136         return MariaDBHasher().add(mCharset, data, len).finalize();
137     }
eq(const std::string & str1,const std::string & str2)138     bool eq(const std::string & str1, const std::string & str2) const
139     {
140         return mCharset->strnncollsp(str1.data(), str1.length(),
141                                      str2.data(), str2.length()) == 0;
142     }
strnncollsp(const utils::ConstString & str1,const utils::ConstString & str2)143     int strnncollsp(const utils::ConstString &str1,
144                     const utils::ConstString &str2) const
145     {
146         return mCharset->strnncollsp(str1.str(), str1.length(),
147                                      str2.str(), str2.length());
148     }
strnncollsp(const char * str1,size_t length1,const char * str2,size_t length2)149     int strnncollsp(const char *str1, size_t length1,
150                     const char *str2, size_t length2) const
151     {
152       return mCharset->strnncollsp(str1, length1, str2, length2);
153     }
strnncollsp(const unsigned char * str1,size_t length1,const unsigned char * str2,size_t length2)154     int strnncollsp(const unsigned char *str1, size_t length1,
155                     const unsigned char *str2, size_t length2) const
156     {
157       return mCharset->strnncollsp((const char *) str1, length1,
158                                    (const char *) str2, length2);
159     }
test_if_important_data(const char * str,const char * end)160     bool test_if_important_data(const char *str, const char *end) const
161     {
162         if (mCharset->state & MY_CS_NOPAD)
163           return str < end;
164         return str + mCharset->scan(str, end, MY_SEQ_SPACES) < end;
165 
166     }
like(bool neg,const utils::ConstString & subject,const utils::ConstString & pattern)167     bool like(bool neg,
168               const utils::ConstString &subject,
169               const utils::ConstString &pattern) const
170     {
171       bool res= !mCharset->wildcmp(subject.str(), subject.end(),
172                                    pattern.str(), pattern.end(),
173                                    '\\','_','%');
174       return neg ? !res : res;
175     }
176 };
177 
178 
179 class CollationAwareHasher: public Charset
180 {
181 public:
CollationAwareHasher(const Charset & cs)182     CollationAwareHasher(const Charset &cs)
183         :Charset(cs)
184     { }
operator()185     inline uint32_t operator()(const std::string& s) const
186     {
187         return operator()(s.data(), s.length());
188     }
operator()189     inline uint32_t operator()(const char* data, uint64_t len) const
190     {
191         return Charset::hash(data, len);
192     }
193 };
194 
195 
196 class CollationAwareComparator: public Charset
197 {
198 public:
CollationAwareComparator(const Charset & cs)199     CollationAwareComparator(const Charset &cs)
200        :Charset(cs)
201     { }
operator()202     bool operator()(const std::string & str1, const std::string & str2) const
203     {
204         return Charset::eq(str1, str2);
205     }
206 };
207 
208 
209 
210 } // end of namespace datatypes
211 
212 #endif
213