1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2017, 2019, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file include/data0type.h
22 Data types
23 
24 Created 1/16/1996 Heikki Tuuri
25 *******************************************************/
26 
27 #ifndef data0type_h
28 #define data0type_h
29 
30 #include "univ.i"
31 
32 /** Special length indicating a missing instantly added column */
33 #define UNIV_SQL_DEFAULT (UNIV_SQL_NULL - 1)
34 
35 /** @return whether a length is actually stored in a field */
36 #define len_is_stored(len) (len != UNIV_SQL_NULL && len != UNIV_SQL_DEFAULT)
37 
38 extern ulint	data_mysql_default_charset_coll;
39 #define DATA_MYSQL_BINARY_CHARSET_COLL 63
40 
41 /* SQL data type struct */
42 struct dtype_t;
43 
44 /** SQL Like operator comparison types */
45 enum ib_like_t {
46 	IB_LIKE_EXACT,	/**< e.g.  STRING */
47 	IB_LIKE_PREFIX	/**< e.g., STRING% */
48 };
49 
50 /*-------------------------------------------*/
51 /* The 'MAIN TYPE' of a column */
52 #define DATA_MISSING	0	/* missing column */
53 #define	DATA_VARCHAR	1	/* character varying of the
54 				latin1_swedish_ci charset-collation; note
55 				that the MySQL format for this, DATA_BINARY,
56 				DATA_VARMYSQL, is also affected by whether the
57 				'precise type' contains
58 				DATA_MYSQL_TRUE_VARCHAR */
59 #define DATA_CHAR	2	/* fixed length character of the
60 				latin1_swedish_ci charset-collation */
61 #define DATA_FIXBINARY	3	/* binary string of fixed length */
62 #define DATA_BINARY	4	/* binary string */
63 #define DATA_BLOB	5	/* binary large object, or a TEXT type;
64 				if prtype & DATA_BINARY_TYPE == 0, then this is
65 				actually a TEXT column (or a BLOB created
66 				with < 4.0.14; since column prefix indexes
67 				came only in 4.0.14, the missing flag in BLOBs
68 				created before that does not cause any harm) */
69 #define	DATA_INT	6	/* integer: can be any size 1 - 8 bytes */
70 #define	DATA_SYS_CHILD	7	/* address of the child page in node pointer */
71 #define	DATA_SYS	8	/* system column */
72 
73 /* Data types >= DATA_FLOAT must be compared using the whole field, not as
74 binary strings */
75 
76 #define DATA_FLOAT	9
77 #define DATA_DOUBLE	10
78 #define DATA_DECIMAL	11	/* decimal number stored as an ASCII string */
79 #define	DATA_VARMYSQL	12	/* any charset varying length char */
80 #define	DATA_MYSQL	13	/* any charset fixed length char */
81 				/* NOTE that 4.1.1 used DATA_MYSQL and
82 				DATA_VARMYSQL for all character sets, and the
83 				charset-collation for tables created with it
84 				can also be latin1_swedish_ci */
85 
86 /* DATA_GEOMETRY includes all standard geometry datatypes as described in
87 OGC standard(point, line_string, polygon, multi_point, multi_polygon,
88 multi_line_string, geometry_collection, geometry).
89 Currently, geometry data is stored in the standard Well-Known Binary(WKB)
90 format (http://www.opengeospatial.org/standards/sfa).
91 We use BLOB as the underlying datatype. */
92 #define DATA_GEOMETRY	14	/* geometry datatype of variable length */
93 #define DATA_MTYPE_MAX	63	/* dtype_store_for_order_and_null_size()
94 				requires the values are <= 63 */
95 
96 #define DATA_MTYPE_CURRENT_MIN	DATA_VARCHAR	/* minimum value of mtype */
97 #define DATA_MTYPE_CURRENT_MAX	DATA_GEOMETRY	/* maximum value of mtype */
98 /*-------------------------------------------*/
99 /* The 'PRECISE TYPE' of a column */
100 /*
101 Tables created by a MySQL user have the following convention:
102 
103 - In the least significant byte in the precise type we store the MySQL type
104 code (not applicable for system columns).
105 
106 - In the second least significant byte we OR flags DATA_NOT_NULL,
107 DATA_UNSIGNED, DATA_BINARY_TYPE.
108 
109 - In the third least significant byte of the precise type of string types we
110 store the MySQL charset-collation code. In DATA_BLOB columns created with
111 < 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
112 are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
113 problem, though.
114 
115 Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
116 precise type, since the charset was always the default charset of the MySQL
117 installation. If the stored charset code is 0 in the system table SYS_COLUMNS
118 of InnoDB, that means that the default charset of this MySQL installation
119 should be used.
120 
121 When loading a table definition from the system tables to the InnoDB data
122 dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
123 if the stored charset-collation is 0, and if that is the case and the type is
124 a non-binary string, replace that 0 by the default charset-collation code of
125 this MySQL installation. In short, in old tables, the charset-collation code
126 in the system tables on disk can be 0, but in in-memory data structures
127 (dtype_t), the charset-collation code is always != 0 for non-binary string
128 types.
129 
130 In new tables, in binary string types, the charset-collation code is the
131 MySQL code for the 'binary charset', that is, != 0.
132 
133 For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
134 DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
135 InnoDB performs all comparisons internally, without resorting to the MySQL
136 comparison functions. This is to save CPU time.
137 
138 InnoDB's own internal system tables have different precise types for their
139 columns, and for them the precise type is usually not used at all.
140 */
141 
142 #define DATA_ENGLISH	4	/* English language character string: this
143 				is a relic from pre-MySQL time and only used
144 				for InnoDB's own system tables */
145 #define DATA_ERROR	111	/* another relic from pre-MySQL time */
146 
147 #define DATA_MYSQL_TYPE_MASK 255U/* AND with this mask to extract the MySQL
148 				 type from the precise type */
149 #define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
150 				   format true VARCHAR */
151 
152 /* Precise data types for system columns and the length of those columns;
153 NOTE: the values must run from 0 up in the order given! All codes must
154 be less than 256 */
155 #define	DATA_ROW_ID	0	/* row id: a 48-bit integer */
156 #define DATA_ROW_ID_LEN	6	/* stored length for row id */
157 
158 #define DATA_TRX_ID	1	/* transaction id: 6 bytes */
159 #define DATA_TRX_ID_LEN	6
160 
161 #define	DATA_ROLL_PTR	2	/* rollback data pointer: 7 bytes */
162 #define DATA_ROLL_PTR_LEN 7
163 
164 #define	DATA_N_SYS_COLS 3	/* number of system columns defined above */
165 
166 #define DATA_FTS_DOC_ID	3	/* Used as FTS DOC ID column */
167 
168 #define DATA_SYS_PRTYPE_MASK 0xFU /* mask to extract the above from prtype */
169 
170 /* Flags ORed to the precise data type */
171 #define DATA_NOT_NULL	256U	/* this is ORed to the precise type when
172 				the column is declared as NOT NULL */
173 #define DATA_UNSIGNED	512U	/* this id ORed to the precise type when
174 				we have an unsigned integer type */
175 #define	DATA_BINARY_TYPE 1024U	/* if the data type is a binary character
176 				string, this is ORed to the precise type:
177 				this only holds for tables created with
178 				>= MySQL-4.0.14 */
179 /* #define	DATA_NONLATIN1	2048 This is a relic from < 4.1.2 and < 5.0.1.
180 				In earlier versions this was set for some
181 				BLOB columns.
182 */
183 #define DATA_GIS_MBR	2048U	/* Used as GIS MBR column */
184 #define DATA_MBR_LEN	SPDIMS * 2 * sizeof(double) /* GIS MBR length*/
185 
186 #define	DATA_LONG_TRUE_VARCHAR 4096U	/* this is ORed to the precise data
187 				type when the column is true VARCHAR where
188 				MySQL uses 2 bytes to store the data len;
189 				for shorter VARCHARs MySQL uses only 1 byte */
190 #define	DATA_VIRTUAL	8192U	/* Virtual column */
191 
192 /** System Versioning */
193 #define DATA_VERS_START	16384U	/* start system field */
194 #define DATA_VERS_END	32768U	/* end system field */
195 /** system-versioned user data column */
196 #define DATA_VERSIONED (DATA_VERS_START|DATA_VERS_END)
197 
198 /** Check whether locking is disabled (never). */
199 #define dict_table_is_locking_disabled(table) false
200 
201 /*-------------------------------------------*/
202 
203 /* This many bytes we need to store the type information affecting the
204 alphabetical order for a single field and decide the storage size of an
205 SQL null*/
206 #define DATA_ORDER_NULL_TYPE_BUF_SIZE		4
207 /* In the >= 4.1.x storage format we add 2 bytes more so that we can also
208 store the charset-collation number; one byte is left unused, though */
209 #define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE	6
210 
211 /* Maximum multi-byte character length in bytes, plus 1 */
212 #define DATA_MBMAX	8
213 
214 /* For checking if mtype is GEOMETRY datatype */
215 #define DATA_GEOMETRY_MTYPE(mtype)	((mtype) == DATA_GEOMETRY)
216 
217 /* For checking if mtype is BLOB or GEOMETRY, since we use BLOB as
218 the underlying datatype of GEOMETRY data. */
219 #define DATA_LARGE_MTYPE(mtype) ((mtype) == DATA_BLOB			\
220 				 || (mtype) == DATA_GEOMETRY)
221 
222 /* For checking if data type is big length data type. */
223 #define DATA_BIG_LEN_MTYPE(len, mtype) ((len) > 255 || DATA_LARGE_MTYPE(mtype))
224 
225 /* For checking if the column is a big length column. */
226 #define DATA_BIG_COL(col) DATA_BIG_LEN_MTYPE((col)->len, (col)->mtype)
227 
228 /* For checking if data type is large binary data type. */
229 #define DATA_LARGE_BINARY(mtype,prtype) ((mtype) == DATA_GEOMETRY || \
230 	((mtype) == DATA_BLOB && !((prtype) & DATA_BINARY_TYPE)))
231 
232 /* We now support 15 bits (up to 32767) collation number */
233 #define MAX_CHAR_COLL_NUM	32767
234 
235 /* Mask to get the Charset Collation number (0x7fff) */
236 #define CHAR_COLL_MASK		MAX_CHAR_COLL_NUM
237 
238 /*********************************************************************//**
239 Gets the MySQL type code from a dtype.
240 @return MySQL type code; this is NOT an InnoDB type code! */
241 UNIV_INLINE
242 ulint
243 dtype_get_mysql_type(
244 /*=================*/
245 	const dtype_t*	type);	/*!< in: type struct */
246 /*********************************************************************//**
247 Determine how many bytes the first n characters of the given string occupy.
248 If the string is shorter than n characters, returns the number of bytes
249 the characters in the string occupy.
250 @return length of the prefix, in bytes */
251 ulint
252 dtype_get_at_most_n_mbchars(
253 /*========================*/
254 	ulint		prtype,		/*!< in: precise type */
255 	ulint		mbminlen,	/*!< in: minimum length of
256 					a multi-byte character, in bytes */
257 	ulint		mbmaxlen,	/*!< in: maximum length of
258 					a multi-byte character, in bytes */
259 	ulint		prefix_len,	/*!< in: length of the requested
260 					prefix, in characters, multiplied by
261 					dtype_get_mbmaxlen(dtype) */
262 	ulint		data_len,	/*!< in: length of str (in bytes) */
263 	const char*	str);		/*!< in: the string whose prefix
264 					length is being determined */
265 /*********************************************************************//**
266 Checks if a data main type is a string type. Also a BLOB is considered a
267 string type.
268 @return TRUE if string type */
269 ibool
270 dtype_is_string_type(
271 /*=================*/
272 	ulint	mtype);	/*!< in: InnoDB main data type code: DATA_CHAR, ... */
273 /*********************************************************************//**
274 Checks if a type is a binary string type. Note that for tables created with
275 < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
276 those DATA_BLOB columns this function currently returns FALSE.
277 @return TRUE if binary string type */
278 ibool
279 dtype_is_binary_string_type(
280 /*========================*/
281 	ulint	mtype,	/*!< in: main data type */
282 	ulint	prtype);/*!< in: precise type */
283 /*********************************************************************//**
284 Checks if a type is a non-binary string type. That is, dtype_is_string_type is
285 TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
286 with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
287 For those DATA_BLOB columns this function currently returns TRUE.
288 @return TRUE if non-binary string type */
289 ibool
290 dtype_is_non_binary_string_type(
291 /*============================*/
292 	ulint	mtype,	/*!< in: main data type */
293 	ulint	prtype);/*!< in: precise type */
294 /*********************************************************************//**
295 Sets a data type structure. */
296 UNIV_INLINE
297 void
298 dtype_set(
299 /*======*/
300 	dtype_t*	type,	/*!< in: type struct to init */
301 	ulint		mtype,	/*!< in: main data type */
302 	ulint		prtype,	/*!< in: precise type */
303 	ulint		len);	/*!< in: precision of type */
304 /*********************************************************************//**
305 Copies a data type structure. */
306 UNIV_INLINE
307 void
308 dtype_copy(
309 /*=======*/
310 	dtype_t*	type1,	/*!< in: type struct to copy to */
311 	const dtype_t*	type2);	/*!< in: type struct to copy from */
312 /*********************************************************************//**
313 Gets the SQL main data type.
314 @return SQL main data type */
315 UNIV_INLINE
316 ulint
317 dtype_get_mtype(
318 /*============*/
319 	const dtype_t*	type);	/*!< in: data type */
320 /*********************************************************************//**
321 Gets the precise data type.
322 @return precise data type */
323 UNIV_INLINE
324 ulint
325 dtype_get_prtype(
326 /*=============*/
327 	const dtype_t*	type);	/*!< in: data type */
328 
329 /*********************************************************************//**
330 Compute the mbminlen and mbmaxlen members of a data type structure. */
331 UNIV_INLINE
332 void
333 dtype_get_mblen(
334 /*============*/
335 	ulint	mtype,		/*!< in: main type */
336 	ulint	prtype,		/*!< in: precise type (and collation) */
337 	ulint*	mbminlen,	/*!< out: minimum length of a
338 				multi-byte character */
339 	ulint*	mbmaxlen);	/*!< out: maximum length of a
340 				multi-byte character */
341 /*********************************************************************//**
342 Gets the MySQL charset-collation code for MySQL string types.
343 @return MySQL charset-collation code */
344 UNIV_INLINE
345 ulint
346 dtype_get_charset_coll(
347 /*===================*/
348 	ulint	prtype);/*!< in: precise data type */
349 /** Form a precise type from the < 4.1.2 format precise type plus the
350 charset-collation code.
351 @param[in]	old_prtype	MySQL type code and the flags
352 				DATA_BINARY_TYPE etc.
353 @param[in]	charset_coll	character-set collation code
354 @return precise type, including the charset-collation code */
355 UNIV_INLINE
356 uint32_t
dtype_form_prtype(ulint old_prtype,ulint charset_coll)357 dtype_form_prtype(ulint old_prtype, ulint charset_coll)
358 {
359 	ut_ad(old_prtype < 256 * 256);
360 	ut_ad(charset_coll <= MAX_CHAR_COLL_NUM);
361 	return(uint32_t(old_prtype + (charset_coll << 16)));
362 }
363 
364 /*********************************************************************//**
365 Determines if a MySQL string type is a subset of UTF-8.  This function
366 may return false negatives, in case further character-set collation
367 codes are introduced in MySQL later.
368 @return whether a subset of UTF-8 */
369 UNIV_INLINE
370 bool
371 dtype_is_utf8(
372 /*==========*/
373 	ulint	prtype);/*!< in: precise data type */
374 /*********************************************************************//**
375 Gets the type length.
376 @return fixed length of the type, in bytes, or 0 if variable-length */
377 UNIV_INLINE
378 ulint
379 dtype_get_len(
380 /*==========*/
381 	const dtype_t*	type);	/*!< in: data type */
382 
383 /*********************************************************************//**
384 Gets the minimum length of a character, in bytes.
385 @return minimum length of a char, in bytes, or 0 if this is not a
386 character type */
387 UNIV_INLINE
388 ulint
389 dtype_get_mbminlen(
390 /*===============*/
391 	const dtype_t*	type);	/*!< in: type */
392 /*********************************************************************//**
393 Gets the maximum length of a character, in bytes.
394 @return maximum length of a char, in bytes, or 0 if this is not a
395 character type */
396 UNIV_INLINE
397 ulint
398 dtype_get_mbmaxlen(
399 /*===============*/
400 	const dtype_t*	type);	/*!< in: type */
401 /***********************************************************************//**
402 Returns the size of a fixed size data type, 0 if not a fixed size type.
403 @return fixed size, or 0 */
404 UNIV_INLINE
405 ulint
406 dtype_get_fixed_size_low(
407 /*=====================*/
408 	ulint	mtype,		/*!< in: main type */
409 	ulint	prtype,		/*!< in: precise type */
410 	ulint	len,		/*!< in: length */
411 	ulint	mbminlen,	/*!< in: minimum length of a
412 				multibyte character, in bytes */
413 	ulint	mbmaxlen,	/*!< in: maximum length of a
414 				multibyte character, in bytes */
415 	ulint	comp);		/*!< in: nonzero=ROW_FORMAT=COMPACT  */
416 
417 /***********************************************************************//**
418 Returns the minimum size of a data type.
419 @return minimum size */
420 UNIV_INLINE
421 ulint
422 dtype_get_min_size_low(
423 /*===================*/
424 	ulint	mtype,		/*!< in: main type */
425 	ulint	prtype,		/*!< in: precise type */
426 	ulint	len,		/*!< in: length */
427 	ulint	mbminlen,	/*!< in: minimum length of a character */
428 	ulint	mbmaxlen);	/*!< in: maximum length of a character */
429 /***********************************************************************//**
430 Returns the maximum size of a data type. Note: types in system tables may be
431 incomplete and return incorrect information.
432 @return maximum size */
433 UNIV_INLINE
434 ulint
435 dtype_get_max_size_low(
436 /*===================*/
437 	ulint	mtype,		/*!< in: main type */
438 	ulint	len);		/*!< in: length */
439 /***********************************************************************//**
440 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
441 For fixed length types it is the fixed length of the type, otherwise 0.
442 @return SQL null storage size in ROW_FORMAT=REDUNDANT */
443 UNIV_INLINE
444 ulint
445 dtype_get_sql_null_size(
446 /*====================*/
447 	const dtype_t*	type,	/*!< in: type */
448 	ulint		comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
449 
450 /**********************************************************************//**
451 Reads to a type the stored information which determines its alphabetical
452 ordering and the storage size of an SQL NULL value. */
453 UNIV_INLINE
454 void
455 dtype_read_for_order_and_null_size(
456 /*===============================*/
457 	dtype_t*	type,	/*!< in: type struct */
458 	const byte*	buf);	/*!< in: buffer for the stored order info */
459 /**********************************************************************//**
460 Stores for a type the information which determines its alphabetical ordering
461 and the storage size of an SQL NULL value. This is the >= 4.1.x storage
462 format. */
463 UNIV_INLINE
464 void
465 dtype_new_store_for_order_and_null_size(
466 /*====================================*/
467 	byte*		buf,	/*!< in: buffer for
468 				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
469 				bytes where we store the info */
470 	const dtype_t*	type,	/*!< in: type struct */
471 	ulint		prefix_len);/*!< in: prefix length to
472 				replace type->len, or 0 */
473 /**********************************************************************//**
474 Reads to a type the stored information which determines its alphabetical
475 ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
476 format. */
477 UNIV_INLINE
478 void
479 dtype_new_read_for_order_and_null_size(
480 /*===================================*/
481 	dtype_t*	type,	/*!< in: type struct */
482 	const byte*	buf);	/*!< in: buffer for stored type order info */
483 
484 /*********************************************************************//**
485 Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len
486 @return the SQL type name */
487 UNIV_INLINE
488 char*
489 dtype_sql_name(
490 /*===========*/
491 	unsigned	mtype,	/*!< in: mtype */
492 	unsigned	prtype,	/*!< in: prtype */
493 	unsigned	len,	/*!< in: len */
494 	char*		name,	/*!< out: SQL name */
495 	unsigned	name_sz);/*!< in: size of the name buffer */
496 
497 /*********************************************************************//**
498 Validates a data type structure.
499 @return TRUE if ok */
500 ibool
501 dtype_validate(
502 /*===========*/
503 	const dtype_t*	type);	/*!< in: type struct to validate */
504 #ifdef UNIV_DEBUG
505 /** Print a data type structure.
506 @param[in]	type	data type */
507 void
508 dtype_print(
509 	const dtype_t*	type);
510 #endif /* UNIV_DEBUG */
511 
512 /* Structure for an SQL data type.
513 If you add fields to this structure, be sure to initialize them everywhere.
514 This structure is initialized in the following functions:
515 dtype_set()
516 dtype_read_for_order_and_null_size()
517 dtype_new_read_for_order_and_null_size()
518 sym_tab_add_null_lit() */
519 
520 struct dtype_t{
521 	unsigned	prtype:32;	/*!< precise type; MySQL data
522 					type, charset code, flags to
523 					indicate nullability,
524 					signedness, whether this is a
525 					binary string, whether this is
526 					a true VARCHAR where MySQL
527 					uses 2 bytes to store the length */
528 	unsigned	mtype:8;	/*!< main data type */
529 
530 	/* the remaining fields do not affect alphabetical ordering: */
531 
532 	unsigned	len:16;		/*!< length; for MySQL data this
533 					is field->pack_length(),
534 					except that for a >= 5.0.3
535 					type true VARCHAR this is the
536 					maximum byte length of the
537 					string data (in addition to
538 					the string, MySQL uses 1 or 2
539 					bytes to store the string length) */
540 	unsigned	mbminlen:3;	/*!< minimum length of a character,
541 					in bytes */
542 	unsigned	mbmaxlen:3;	/*!< maximum length of a character,
543 					in bytes */
544 
545 	/** @return whether this is system versioned user field */
is_versioneddtype_t546 	bool is_versioned() const { return !(~prtype & DATA_VERSIONED); }
547 	/** @return whether this is the system field start */
vers_sys_startdtype_t548 	bool vers_sys_start() const
549 	{
550 		return (prtype & DATA_VERSIONED) == DATA_VERS_START;
551 	}
552 	/** @return whether this is the system field end */
vers_sys_enddtype_t553 	bool vers_sys_end() const
554 	{
555 		return (prtype & DATA_VERSIONED) == DATA_VERS_END;
556 	}
557 };
558 
559 /** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */
560 extern const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
561 
562 #include "data0type.inl"
563 
564 #endif
565