1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file include/data0type.h
29 Data types
30 
31 Created 1/16/1996 Heikki Tuuri
32 *******************************************************/
33 
34 #ifndef data0type_h
35 #define data0type_h
36 
37 #include "univ.i"
38 
39 extern ulint	data_mysql_default_charset_coll;
40 #define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
41 #define DATA_MYSQL_BINARY_CHARSET_COLL 63
42 
43 /* SQL data type struct */
44 struct dtype_t;
45 
46 /* SQL Like operator comparison types */
47 enum ib_like_t {
48 	IB_LIKE_EXACT,                  /* e.g.  STRING */
49 	IB_LIKE_PREFIX,                 /* e.g., STRING% */
50 	IB_LIKE_SUFFIX,                 /* e.g., %STRING */
51 	IB_LIKE_SUBSTR,                 /* e.g., %STRING% */
52 	IB_LIKE_REGEXP                  /* Future */
53 };
54 
55 /*-------------------------------------------*/
56 /* The 'MAIN TYPE' of a column */
57 #define DATA_MISSING	0	/* missing column */
58 #define	DATA_VARCHAR	1	/* character varying of the
59 				latin1_swedish_ci charset-collation; note
60 				that the MySQL format for this, DATA_BINARY,
61 				DATA_VARMYSQL, is also affected by whether the
62 				'precise type' contains
63 				DATA_MYSQL_TRUE_VARCHAR */
64 #define DATA_CHAR	2	/* fixed length character of the
65 				latin1_swedish_ci charset-collation */
66 #define DATA_FIXBINARY	3	/* binary string of fixed length */
67 #define DATA_BINARY	4	/* binary string */
68 #define DATA_BLOB	5	/* binary large object, or a TEXT type;
69 				if prtype & DATA_BINARY_TYPE == 0, then this is
70 				actually a TEXT column (or a BLOB created
71 				with < 4.0.14; since column prefix indexes
72 				came only in 4.0.14, the missing flag in BLOBs
73 				created before that does not cause any harm) */
74 #define	DATA_INT	6	/* integer: can be any size 1 - 8 bytes */
75 #define	DATA_SYS_CHILD	7	/* address of the child page in node pointer */
76 #define	DATA_SYS	8	/* system column */
77 
78 /* Data types >= DATA_FLOAT must be compared using the whole field, not as
79 binary strings */
80 
81 #define DATA_FLOAT	9
82 #define DATA_DOUBLE	10
83 #define DATA_DECIMAL	11	/* decimal number stored as an ASCII string */
84 #define	DATA_VARMYSQL	12	/* any charset varying length char */
85 #define	DATA_MYSQL	13	/* any charset fixed length char */
86 				/* NOTE that 4.1.1 used DATA_MYSQL and
87 				DATA_VARMYSQL for all character sets, and the
88 				charset-collation for tables created with it
89 				can also be latin1_swedish_ci */
90 #define DATA_MTYPE_MAX	63	/* dtype_store_for_order_and_null_size()
91 				requires the values are <= 63 */
92 /*-------------------------------------------*/
93 /* The 'PRECISE TYPE' of a column */
94 /*
95 Tables created by a MySQL user have the following convention:
96 
97 - In the least significant byte in the precise type we store the MySQL type
98 code (not applicable for system columns).
99 
100 - In the second least significant byte we OR flags DATA_NOT_NULL,
101 DATA_UNSIGNED, DATA_BINARY_TYPE.
102 
103 - In the third least significant byte of the precise type of string types we
104 store the MySQL charset-collation code. In DATA_BLOB columns created with
105 < 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
106 are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
107 problem, though.
108 
109 Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
110 precise type, since the charset was always the default charset of the MySQL
111 installation. If the stored charset code is 0 in the system table SYS_COLUMNS
112 of InnoDB, that means that the default charset of this MySQL installation
113 should be used.
114 
115 When loading a table definition from the system tables to the InnoDB data
116 dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
117 if the stored charset-collation is 0, and if that is the case and the type is
118 a non-binary string, replace that 0 by the default charset-collation code of
119 this MySQL installation. In short, in old tables, the charset-collation code
120 in the system tables on disk can be 0, but in in-memory data structures
121 (dtype_t), the charset-collation code is always != 0 for non-binary string
122 types.
123 
124 In new tables, in binary string types, the charset-collation code is the
125 MySQL code for the 'binary charset', that is, != 0.
126 
127 For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
128 DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
129 InnoDB performs all comparisons internally, without resorting to the MySQL
130 comparison functions. This is to save CPU time.
131 
132 InnoDB's own internal system tables have different precise types for their
133 columns, and for them the precise type is usually not used at all.
134 */
135 
136 #define DATA_ENGLISH	4	/* English language character string: this
137 				is a relic from pre-MySQL time and only used
138 				for InnoDB's own system tables */
139 #define DATA_ERROR	111	/* another relic from pre-MySQL time */
140 
141 #define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
142 				 type from the precise type */
143 #define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
144 				   format true VARCHAR */
145 
146 /* Precise data types for system columns and the length of those columns;
147 NOTE: the values must run from 0 up in the order given! All codes must
148 be less than 256 */
149 #define	DATA_ROW_ID	0	/* row id: a 48-bit integer */
150 #define DATA_ROW_ID_LEN	6	/* stored length for row id */
151 
152 #define DATA_TRX_ID	1	/* transaction id: 6 bytes */
153 #define DATA_TRX_ID_LEN	6
154 
155 #define	DATA_ROLL_PTR	2	/* rollback data pointer: 7 bytes */
156 #define DATA_ROLL_PTR_LEN 7
157 
158 #define	DATA_N_SYS_COLS 3	/* number of system columns defined above */
159 
160 #define DATA_FTS_DOC_ID	3	/* Used as FTS DOC ID column */
161 
162 #define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
163 
164 /* Flags ORed to the precise data type */
165 #define DATA_NOT_NULL	256	/* this is ORed to the precise type when
166 				the column is declared as NOT NULL */
167 #define DATA_UNSIGNED	512	/* this id ORed to the precise type when
168 				we have an unsigned integer type */
169 #define	DATA_BINARY_TYPE 1024	/* if the data type is a binary character
170 				string, this is ORed to the precise type:
171 				this only holds for tables created with
172 				>= MySQL-4.0.14 */
173 /* #define	DATA_NONLATIN1	2048 This is a relic from < 4.1.2 and < 5.0.1.
174 				In earlier versions this was set for some
175 				BLOB columns.
176 */
177 #define	DATA_LONG_TRUE_VARCHAR 4096	/* this is ORed to the precise data
178 				type when the column is true VARCHAR where
179 				MySQL uses 2 bytes to store the data len;
180 				for shorter VARCHARs MySQL uses only 1 byte */
181 #define	DATA_COMPRESSED	16384	/* this is ORed to the precise data
182 				type when the column has COLUMN_FORMAT =
183 				COMPRESSED attribute*/
184 /*-------------------------------------------*/
185 
186 /* This many bytes we need to store the type information affecting the
187 alphabetical order for a single field and decide the storage size of an
188 SQL null*/
189 #define DATA_ORDER_NULL_TYPE_BUF_SIZE		4
190 /* In the >= 4.1.x storage format we add 2 bytes more so that we can also
191 store the charset-collation number; one byte is left unused, though */
192 #define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE	6
193 
194 /* Maximum multi-byte character length in bytes, plus 1 */
195 #define DATA_MBMAX	5
196 
197 /* Pack mbminlen, mbmaxlen to mbminmaxlen. */
198 #define DATA_MBMINMAXLEN(mbminlen, mbmaxlen)	\
199 	((mbmaxlen) * DATA_MBMAX + (mbminlen))
200 /* Get mbminlen from mbminmaxlen. Cast the result of UNIV_EXPECT to ulint
201 because in GCC it returns a long. */
202 #define DATA_MBMINLEN(mbminmaxlen) ((ulint) \
203                                     UNIV_EXPECT(((mbminmaxlen) % DATA_MBMAX), \
204                                                 1))
205 /* Get mbmaxlen from mbminmaxlen. */
206 #define DATA_MBMAXLEN(mbminmaxlen) ((ulint) ((mbminmaxlen) / DATA_MBMAX))
207 
208 /* We now support 15 bits (up to 32767) collation number */
209 #define MAX_CHAR_COLL_NUM	32767
210 
211 /* Mask to get the Charset Collation number (0x7fff) */
212 #define CHAR_COLL_MASK		MAX_CHAR_COLL_NUM
213 
214 #ifndef UNIV_HOTBACKUP
215 /*********************************************************************//**
216 Gets the MySQL type code from a dtype.
217 @return	MySQL type code; this is NOT an InnoDB type code! */
218 UNIV_INLINE
219 ulint
220 dtype_get_mysql_type(
221 /*=================*/
222 	const dtype_t*	type);	/*!< in: type struct */
223 /*********************************************************************//**
224 Determine how many bytes the first n characters of the given string occupy.
225 If the string is shorter than n characters, returns the number of bytes
226 the characters in the string occupy.
227 @return	length of the prefix, in bytes */
228 UNIV_INTERN
229 ulint
230 dtype_get_at_most_n_mbchars(
231 /*========================*/
232 	ulint		prtype,		/*!< in: precise type */
233 	ulint		mbminmaxlen,	/*!< in: minimum and maximum length of
234 					a multi-byte character */
235 	ulint		prefix_len,	/*!< in: length of the requested
236 					prefix, in characters, multiplied by
237 					dtype_get_mbmaxlen(dtype) */
238 	ulint		data_len,	/*!< in: length of str (in bytes) */
239 	const char*	str);		/*!< in: the string whose prefix
240 					length is being determined */
241 #endif /* !UNIV_HOTBACKUP */
242 /*********************************************************************//**
243 Checks if a data main type is a string type. Also a BLOB is considered a
244 string type.
245 @return	TRUE if string type */
246 UNIV_INTERN
247 ibool
248 dtype_is_string_type(
249 /*=================*/
250 	ulint	mtype);	/*!< in: InnoDB main data type code: DATA_CHAR, ... */
251 /*********************************************************************//**
252 Checks if a type is a binary string type. Note that for tables created with
253 < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
254 those DATA_BLOB columns this function currently returns FALSE.
255 @return	TRUE if binary string type */
256 UNIV_INTERN
257 ibool
258 dtype_is_binary_string_type(
259 /*========================*/
260 	ulint	mtype,	/*!< in: main data type */
261 	ulint	prtype);/*!< in: precise type */
262 /*********************************************************************//**
263 Checks if a type is a non-binary string type. That is, dtype_is_string_type is
264 TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
265 with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
266 For those DATA_BLOB columns this function currently returns TRUE.
267 @return	TRUE if non-binary string type */
268 UNIV_INTERN
269 ibool
270 dtype_is_non_binary_string_type(
271 /*============================*/
272 	ulint	mtype,	/*!< in: main data type */
273 	ulint	prtype);/*!< in: precise type */
274 /*********************************************************************//**
275 Sets a data type structure. */
276 UNIV_INLINE
277 void
278 dtype_set(
279 /*======*/
280 	dtype_t*	type,	/*!< in: type struct to init */
281 	ulint		mtype,	/*!< in: main data type */
282 	ulint		prtype,	/*!< in: precise type */
283 	ulint		len);	/*!< in: precision of type */
284 /*********************************************************************//**
285 Copies a data type structure. */
286 UNIV_INLINE
287 void
288 dtype_copy(
289 /*=======*/
290 	dtype_t*	type1,	/*!< in: type struct to copy to */
291 	const dtype_t*	type2);	/*!< in: type struct to copy from */
292 /*********************************************************************//**
293 Gets the SQL main data type.
294 @return	SQL main data type */
295 UNIV_INLINE
296 ulint
297 dtype_get_mtype(
298 /*============*/
299 	const dtype_t*	type);	/*!< in: data type */
300 /*********************************************************************//**
301 Gets the precise data type.
302 @return	precise data type */
303 UNIV_INLINE
304 ulint
305 dtype_get_prtype(
306 /*=============*/
307 	const dtype_t*	type);	/*!< in: data type */
308 #ifndef UNIV_HOTBACKUP
309 /*********************************************************************//**
310 Compute the mbminlen and mbmaxlen members of a data type structure. */
311 UNIV_INLINE
312 void
313 dtype_get_mblen(
314 /*============*/
315 	ulint	mtype,		/*!< in: main type */
316 	ulint	prtype,		/*!< in: precise type (and collation) */
317 	ulint*	mbminlen,	/*!< out: minimum length of a
318 				multi-byte character */
319 	ulint*	mbmaxlen);	/*!< out: maximum length of a
320 				multi-byte character */
321 /*********************************************************************//**
322 Gets the MySQL charset-collation code for MySQL string types.
323 @return	MySQL charset-collation code */
324 UNIV_INLINE
325 ulint
326 dtype_get_charset_coll(
327 /*===================*/
328 	ulint	prtype);/*!< in: precise data type */
329 /*********************************************************************//**
330 Forms a precise type from the < 4.1.2 format precise type plus the
331 charset-collation code.
332 @return precise type, including the charset-collation code */
333 UNIV_INTERN
334 ulint
335 dtype_form_prtype(
336 /*==============*/
337 	ulint	old_prtype,	/*!< in: the MySQL type code and the flags
338 				DATA_BINARY_TYPE etc. */
339 	ulint	charset_coll);	/*!< in: MySQL charset-collation code */
340 /*********************************************************************//**
341 Determines if a MySQL string type is a subset of UTF-8.  This function
342 may return false negatives, in case further character-set collation
343 codes are introduced in MySQL later.
344 @return	TRUE if a subset of UTF-8 */
345 UNIV_INLINE
346 ibool
347 dtype_is_utf8(
348 /*==========*/
349 	ulint	prtype);/*!< in: precise data type */
350 #endif /* !UNIV_HOTBACKUP */
351 /*********************************************************************//**
352 Gets the type length.
353 @return	fixed length of the type, in bytes, or 0 if variable-length */
354 UNIV_INLINE
355 ulint
356 dtype_get_len(
357 /*==========*/
358 	const dtype_t*	type);	/*!< in: data type */
359 #ifndef UNIV_HOTBACKUP
360 /*********************************************************************//**
361 Gets the minimum length of a character, in bytes.
362 @return minimum length of a char, in bytes, or 0 if this is not a
363 character type */
364 UNIV_INLINE
365 ulint
366 dtype_get_mbminlen(
367 /*===============*/
368 	const dtype_t*	type);	/*!< in: type */
369 /*********************************************************************//**
370 Gets the maximum length of a character, in bytes.
371 @return maximum length of a char, in bytes, or 0 if this is not a
372 character type */
373 UNIV_INLINE
374 ulint
375 dtype_get_mbmaxlen(
376 /*===============*/
377 	const dtype_t*	type);	/*!< in: type */
378 /*********************************************************************//**
379 Sets the minimum and maximum length of a character, in bytes. */
380 UNIV_INLINE
381 void
382 dtype_set_mbminmaxlen(
383 /*==================*/
384 	dtype_t*	type,		/*!< in/out: type */
385 	ulint		mbminlen,	/*!< in: minimum length of a char,
386 					in bytes, or 0 if this is not
387 					a character type */
388 	ulint		mbmaxlen);	/*!< in: maximum length of a char,
389 					in bytes, or 0 if this is not
390 					a character type */
391 /*********************************************************************//**
392 Gets the padding character code for the type.
393 @return	padding character code, or ULINT_UNDEFINED if no padding specified */
394 UNIV_INLINE
395 ulint
396 dtype_get_pad_char(
397 /*===============*/
398 	ulint	mtype,		/*!< in: main type */
399 	ulint	prtype);	/*!< in: precise type */
400 #endif /* !UNIV_HOTBACKUP */
401 /***********************************************************************//**
402 Returns the size of a fixed size data type, 0 if not a fixed size type.
403 @return	fixed size, or 0 */
404 UNIV_INLINE
405 ulint
406 dtype_get_fixed_size_low(
407 /*=====================*/
408 	ulint	mtype,		/*!< in: main type */
409 	ulint	prtype,		/*!< in: precise type */
410 	ulint	len,		/*!< in: length */
411 	ulint	mbminmaxlen,	/*!< in: minimum and maximum length of a
412 				multibyte character, in bytes */
413 	ulint	comp);		/*!< in: nonzero=ROW_FORMAT=COMPACT  */
414 #ifndef UNIV_HOTBACKUP
415 /***********************************************************************//**
416 Returns the minimum size of a data type.
417 @return	minimum size */
418 UNIV_INLINE
419 ulint
420 dtype_get_min_size_low(
421 /*===================*/
422 	ulint	mtype,		/*!< in: main type */
423 	ulint	prtype,		/*!< in: precise type */
424 	ulint	len,		/*!< in: length */
425 	ulint	mbminmaxlen);	/*!< in: minimum and maximum length of a
426 				multibyte character */
427 /***********************************************************************//**
428 Returns the maximum size of a data type. Note: types in system tables may be
429 incomplete and return incorrect information.
430 @return	maximum size */
431 UNIV_INLINE
432 ulint
433 dtype_get_max_size_low(
434 /*===================*/
435 	ulint	mtype,		/*!< in: main type */
436 	ulint	len);		/*!< in: length */
437 #endif /* !UNIV_HOTBACKUP */
438 /***********************************************************************//**
439 Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
440 For fixed length types it is the fixed length of the type, otherwise 0.
441 @return	SQL null storage size in ROW_FORMAT=REDUNDANT */
442 UNIV_INLINE
443 ulint
444 dtype_get_sql_null_size(
445 /*====================*/
446 	const dtype_t*	type,	/*!< in: type */
447 	ulint		comp);	/*!< in: nonzero=ROW_FORMAT=COMPACT  */
448 #ifndef UNIV_HOTBACKUP
449 /**********************************************************************//**
450 Reads to a type the stored information which determines its alphabetical
451 ordering and the storage size of an SQL NULL value. */
452 UNIV_INLINE
453 void
454 dtype_read_for_order_and_null_size(
455 /*===============================*/
456 	dtype_t*	type,	/*!< in: type struct */
457 	const byte*	buf);	/*!< in: buffer for the stored order info */
458 /**********************************************************************//**
459 Stores for a type the information which determines its alphabetical ordering
460 and the storage size of an SQL NULL value. This is the >= 4.1.x storage
461 format. */
462 UNIV_INLINE
463 void
464 dtype_new_store_for_order_and_null_size(
465 /*====================================*/
466 	byte*		buf,	/*!< in: buffer for
467 				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
468 				bytes where we store the info */
469 	const dtype_t*	type,	/*!< in: type struct */
470 	ulint		prefix_len);/*!< in: prefix length to
471 				replace type->len, or 0 */
472 /**********************************************************************//**
473 Reads to a type the stored information which determines its alphabetical
474 ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
475 format. */
476 UNIV_INLINE
477 void
478 dtype_new_read_for_order_and_null_size(
479 /*===================================*/
480 	dtype_t*	type,	/*!< in: type struct */
481 	const byte*	buf);	/*!< in: buffer for stored type order info */
482 
483 /*********************************************************************//**
484 Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len
485 @return the SQL type name */
486 UNIV_INLINE
487 char*
488 dtype_sql_name(
489 /*===========*/
490 	unsigned	mtype,	/*!< in: mtype */
491 	unsigned	prtype,	/*!< in: prtype */
492 	unsigned	len,	/*!< in: len */
493 	char*		name,	/*!< out: SQL name */
494 	unsigned	name_sz);/*!< in: size of the name buffer */
495 
496 #endif /* !UNIV_HOTBACKUP */
497 
498 /*********************************************************************//**
499 Validates a data type structure.
500 @return	TRUE if ok */
501 UNIV_INTERN
502 ibool
503 dtype_validate(
504 /*===========*/
505 	const dtype_t*	type);	/*!< in: type struct to validate */
506 /*********************************************************************//**
507 Prints a data type structure. */
508 UNIV_INTERN
509 void
510 dtype_print(
511 /*========*/
512 	const dtype_t*	type);	/*!< in: type */
513 
514 /**
515 Calculates the number of extra bytes needed for compression header
516 depending on precise column type.
517 @reval 0 if prtype does not include DATA_COMPRESSED flag
518 @reval ZIP_COLUMN_HEADER_LENGTH if prtype includes DATA_COMPRESSED flag
519 */
520 UNIV_INLINE
521 ulint
522 prtype_get_compression_extra(
523 	ulint		prtype);	/*!< in: precise type */
524 
525 /* Structure for an SQL data type.
526 If you add fields to this structure, be sure to initialize them everywhere.
527 This structure is initialized in the following functions:
528 dtype_set()
529 dtype_read_for_order_and_null_size()
530 dtype_new_read_for_order_and_null_size()
531 sym_tab_add_null_lit() */
532 
533 struct dtype_t{
534 	unsigned	prtype:32;	/*!< precise type; MySQL data
535 					type, charset code, flags to
536 					indicate nullability,
537 					signedness, whether this is a
538 					binary string, whether this is
539 					a true VARCHAR where MySQL
540 					uses 2 bytes to store the length */
541 	unsigned	mtype:8;	/*!< main data type */
542 
543 	/* the remaining fields do not affect alphabetical ordering: */
544 
545 	unsigned	len:16;		/*!< length; for MySQL data this
546 					is field->pack_length(),
547 					except that for a >= 5.0.3
548 					type true VARCHAR this is the
549 					maximum byte length of the
550 					string data (in addition to
551 					the string, MySQL uses 1 or 2
552 					bytes to store the string length) */
553 #ifndef UNIV_HOTBACKUP
554 	unsigned	mbminmaxlen:5;	/*!< minimum and maximum length of a
555 					character, in bytes;
556 					DATA_MBMINMAXLEN(mbminlen,mbmaxlen);
557 					mbminlen=DATA_MBMINLEN(mbminmaxlen);
558 					mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */
559 #endif /* !UNIV_HOTBACKUP */
560 };
561 
562 #ifndef UNIV_NONINL
563 #include "data0type.ic"
564 #endif
565 
566 #endif
567