1 /* Copyright (C) 2004-2008 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2 Copyright (C) 2008-2009 Sun Microsystems, Inc.
3 Copyright (c) 2009, 2017, MariaDB Corporation.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
17
18
19 #ifdef USE_PRAGMA_IMPLEMENTATION
20 #pragma implementation // gcc: Class implementation
21 #endif
22
23 #define MYSQL_SERVER 1
24 #include <my_global.h>
25 #include <m_ctype.h>
26 #include <my_dir.h>
27 #include <myisampack.h>
28 #include <my_bit.h>
29 #include "ha_maria.h"
30 #include "trnman_public.h"
31 #include "trnman.h"
32
33 C_MODE_START
34 #include "maria_def.h"
35 #include "ma_rt_index.h"
36 #include "ma_blockrec.h"
37 #include "ma_checkpoint.h"
38 #include "ma_recovery.h"
39 C_MODE_END
40 #include "ma_trnman.h"
41
42 //#include "sql_priv.h"
43 #include "protocol.h"
44 #include "sql_class.h"
45 #include "key.h"
46 #include "log.h"
47 #include "sql_parse.h"
48
49 /*
50 Note that in future versions, only *transactional* Maria tables can
51 rollback, so this flag should be up or down conditionally.
52 */
53 #ifdef MARIA_CANNOT_ROLLBACK
54 #define CANNOT_ROLLBACK_FLAG HA_NO_TRANSACTIONS
55 #define trans_register_ha(A, B, C) do { /* nothing */ } while(0)
56 #else
57 #define CANNOT_ROLLBACK_FLAG 0
58 #endif
59 #define THD_TRN (*(TRN **)thd_ha_data(thd, maria_hton))
60
61 ulong pagecache_division_limit, pagecache_age_threshold, pagecache_file_hash_size;
62 ulonglong pagecache_buffer_size;
63 const char *zerofill_error_msg=
64 "Table is from another system and must be zerofilled or repaired to be "
65 "usable on this system";
66
67 /**
68 As the auto-repair is initiated when opened from the SQL layer
69 (open_unireg_entry(), check_and_repair()), it does not happen when Maria's
70 Recovery internally opens the table to apply log records to it, which is
71 good. It would happen only after Recovery, if the table is still
72 corrupted.
73 */
74 ulonglong maria_recover_options= HA_RECOVER_NONE;
75 handlerton *maria_hton;
76
77 /* bits in maria_recover_options */
78 const char *maria_recover_names[]=
79 {
80 /*
81 Compared to MyISAM, "default" was renamed to "normal" as it collided with
82 SET var=default which sets to the var's default i.e. what happens when the
83 var is not set i.e. HA_RECOVER_NONE.
84 OFF flag is ignored.
85 */
86 "NORMAL", "BACKUP", "FORCE", "QUICK", "OFF", NullS
87 };
88 TYPELIB maria_recover_typelib=
89 {
90 array_elements(maria_recover_names) - 1, "",
91 maria_recover_names, NULL
92 };
93
94 const char *maria_stats_method_names[]=
95 {
96 "nulls_unequal", "nulls_equal",
97 "nulls_ignored", NullS
98 };
99 TYPELIB maria_stats_method_typelib=
100 {
101 array_elements(maria_stats_method_names) - 1, "",
102 maria_stats_method_names, NULL
103 };
104
105 /* transactions log purge mode */
106 const char *maria_translog_purge_type_names[]=
107 {
108 "immediate", "external", "at_flush", NullS
109 };
110 TYPELIB maria_translog_purge_type_typelib=
111 {
112 array_elements(maria_translog_purge_type_names) - 1, "",
113 maria_translog_purge_type_names, NULL
114 };
115
116 /* transactional log directory sync */
117 const char *maria_sync_log_dir_names[]=
118 {
119 "NEVER", "NEWFILE", "ALWAYS", NullS
120 };
121 TYPELIB maria_sync_log_dir_typelib=
122 {
123 array_elements(maria_sync_log_dir_names) - 1, "",
124 maria_sync_log_dir_names, NULL
125 };
126
127 /* transactional log group commit */
128 const char *maria_group_commit_names[]=
129 {
130 "none", "hard", "soft", NullS
131 };
132 TYPELIB maria_group_commit_typelib=
133 {
134 array_elements(maria_group_commit_names) - 1, "",
135 maria_group_commit_names, NULL
136 };
137
138 /** Interval between background checkpoints in seconds */
139 static ulong checkpoint_interval;
140 static void update_checkpoint_interval(MYSQL_THD thd,
141 struct st_mysql_sys_var *var,
142 void *var_ptr, const void *save);
143 static void update_maria_group_commit(MYSQL_THD thd,
144 struct st_mysql_sys_var *var,
145 void *var_ptr, const void *save);
146 static void update_maria_group_commit_interval(MYSQL_THD thd,
147 struct st_mysql_sys_var *var,
148 void *var_ptr, const void *save);
149 /** After that many consecutive recovery failures, remove logs */
150 static ulong force_start_after_recovery_failures;
151 static void update_log_file_size(MYSQL_THD thd,
152 struct st_mysql_sys_var *var,
153 void *var_ptr, const void *save);
154
155 static MYSQL_SYSVAR_ULONG(block_size, maria_block_size,
156 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
157 "Block size to be used for Aria index pages.", 0, 0,
158 MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
159 MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);
160
161 static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval,
162 PLUGIN_VAR_RQCMDARG,
163 "Interval between tries to do an automatic checkpoints. In seconds; 0 means"
164 " 'no automatic checkpoints' which makes sense only for testing.",
165 NULL, update_checkpoint_interval, 30, 0, UINT_MAX, 1);
166
167 static MYSQL_SYSVAR_ULONG(checkpoint_log_activity, maria_checkpoint_min_log_activity,
168 PLUGIN_VAR_RQCMDARG,
169 "Number of bytes that the transaction log has to grow between checkpoints before a new "
170 "checkpoint is written to the log.",
171 NULL, NULL, 1024*1024, 0, UINT_MAX, 1);
172
173 static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures,
174 force_start_after_recovery_failures,
175 /*
176 Read-only because setting it on the fly has no useful effect,
177 should be set on command-line.
178 */
179 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
180 "Number of consecutive log recovery failures after which logs will be"
181 " automatically deleted to cure the problem; 0 (the default) disables"
182 " the feature.", NULL, NULL, 0, 0, UINT_MAX8, 1);
183
184 static MYSQL_SYSVAR_BOOL(page_checksum, maria_page_checksums, 0,
185 "Maintain page checksums (can be overridden per table "
186 "with PAGE_CHECKSUM clause in CREATE TABLE)", 0, 0, 1);
187
188 /* It is only command line argument */
189 static MYSQL_SYSVAR_STR(log_dir_path, maria_data_root,
190 PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
191 "Path to the directory where to store transactional log",
192 NULL, NULL, mysql_real_data_home);
193
194
195 static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size,
196 PLUGIN_VAR_RQCMDARG,
197 "Limit for transaction log size",
198 NULL, update_log_file_size, TRANSLOG_FILE_SIZE,
199 TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE);
200
201 static MYSQL_SYSVAR_ENUM(group_commit, maria_group_commit,
202 PLUGIN_VAR_RQCMDARG,
203 "Specifies Aria group commit mode. "
204 "Possible values are \"none\" (no group commit), "
205 "\"hard\" (with waiting to actual commit), "
206 "\"soft\" (no wait for commit (DANGEROUS!!!))",
207 NULL, update_maria_group_commit,
208 TRANSLOG_GCOMMIT_NONE, &maria_group_commit_typelib);
209
210 static MYSQL_SYSVAR_ULONG(group_commit_interval, maria_group_commit_interval,
211 PLUGIN_VAR_RQCMDARG,
212 "Interval between commite in microseconds (1/1000000c)."
213 " 0 stands for no waiting"
214 " for other threads to come and do a commit in \"hard\" mode and no"
215 " sync()/commit at all in \"soft\" mode. Option has only an effect"
216 " if aria_group_commit is used",
217 NULL, update_maria_group_commit_interval, 0, 0, UINT_MAX, 1);
218
219 static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type,
220 PLUGIN_VAR_RQCMDARG,
221 "Specifies how Aria transactional log will be purged",
222 NULL, NULL, TRANSLOG_PURGE_IMMIDIATE,
223 &maria_translog_purge_type_typelib);
224
225 static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
226 maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
227 "Don't use the fast sort index method to created index if the "
228 "temporary file would get bigger than this.",
229 0, 0, MAX_FILE_SIZE & ~((ulonglong) (1*MB-1)),
230 0, MAX_FILE_SIZE, 1*MB);
231
232 static MYSQL_SYSVAR_ULONG(pagecache_age_threshold,
233 pagecache_age_threshold, PLUGIN_VAR_RQCMDARG,
234 "This characterizes the number of hits a hot block has to be untouched "
235 "until it is considered aged enough to be downgraded to a warm block. "
236 "This specifies the percentage ratio of that number of hits to the "
237 "total number of blocks in the page cache.", 0, 0,
238 300, 100, ~ (ulong) 0L, 100);
239
240 static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size,
241 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
242 "The size of the buffer used for index blocks for Aria tables. "
243 "Increase this to get better index handling (for all reads and "
244 "multiple writes) to as much as you can afford.", 0, 0,
245 KEY_CACHE_SIZE, 8192*16L, ~(ulonglong) 0, 1);
246
247 static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit,
248 PLUGIN_VAR_RQCMDARG,
249 "The minimum percentage of warm blocks in key cache", 0, 0,
250 100, 1, 100, 1);
251
252 static MYSQL_SYSVAR_ULONG(pagecache_file_hash_size, pagecache_file_hash_size,
253 PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
254 "Number of hash buckets for open and changed files. If you have a lot of Aria "
255 "files open you should increase this for faster flush of changes. A good "
256 "value is probably 1/10 of number of possible open Aria files.", 0,0,
257 512, 128, 16384, 1);
258
259 static MYSQL_SYSVAR_SET(recover_options, maria_recover_options, PLUGIN_VAR_OPCMDARG,
260 "Specifies how corrupted tables should be automatically repaired",
261 NULL, NULL, HA_RECOVER_BACKUP|HA_RECOVER_QUICK, &maria_recover_typelib);
262
263 static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
264 "Number of threads to use when repairing Aria tables. The value of 1 "
265 "disables parallel repair.",
266 0, 0, 1, 1, 128, 1);
267
268 static MYSQL_THDVAR_ULONGLONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG,
269 "The buffer that is allocated when sorting the index when doing a "
270 "REPAIR or when creating indexes with CREATE INDEX or ALTER TABLE.", NULL, NULL,
271 SORT_BUFFER_INIT, MIN_SORT_BUFFER, SIZE_T_MAX, 1);
272
273 static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG,
274 "Specifies how Aria index statistics collection code should treat "
275 "NULLs", 0, 0, 0, &maria_stats_method_typelib);
276
277 static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir, PLUGIN_VAR_RQCMDARG,
278 "Controls syncing directory after log file growth and new file "
279 "creation", NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE,
280 &maria_sync_log_dir_typelib);
281
282 #ifdef USE_ARIA_FOR_TMP_TABLES
283 #define USE_ARIA_FOR_TMP_TABLES_VAL 1
284 #else
285 #define USE_ARIA_FOR_TMP_TABLES_VAL 0
286 #endif
287 my_bool use_maria_for_temp_tables= USE_ARIA_FOR_TMP_TABLES_VAL;
288
289 static MYSQL_SYSVAR_BOOL(used_for_temp_tables,
290 use_maria_for_temp_tables, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT,
291 "Whether temporary tables should be MyISAM or Aria", 0, 0,
292 1);
293
294 static MYSQL_SYSVAR_BOOL(encrypt_tables, maria_encrypt_tables, PLUGIN_VAR_OPCMDARG,
295 "Encrypt tables (only for tables with ROW_FORMAT=PAGE (default) "
296 "and not FIXED/DYNAMIC)",
297 0, 0, 0);
298
299 #if defined HAVE_PSI_INTERFACE && !defined EMBEDDED_LIBRARY
300
301 static PSI_mutex_info all_aria_mutexes[]=
302 {
303 { &key_THR_LOCK_maria, "THR_LOCK_maria", PSI_FLAG_GLOBAL},
304 { &key_LOCK_soft_sync, "LOCK_soft_sync", PSI_FLAG_GLOBAL},
305 { &key_LOCK_trn_list, "LOCK_trn_list", PSI_FLAG_GLOBAL},
306 { &key_SHARE_BITMAP_lock, "SHARE::bitmap::bitmap_lock", 0},
307 { &key_SORT_INFO_mutex, "SORT_INFO::mutex", 0},
308 { &key_TRANSLOG_BUFFER_mutex, "TRANSLOG_BUFFER::mutex", 0},
309 { &key_TRANSLOG_DESCRIPTOR_dirty_buffer_mask_lock, "TRANSLOG_DESCRIPTOR::dirty_buffer_mask_lock", 0},
310 { &key_TRANSLOG_DESCRIPTOR_sent_to_disk_lock, "TRANSLOG_DESCRIPTOR::sent_to_disk_lock", 0},
311 { &key_TRANSLOG_DESCRIPTOR_log_flush_lock, "TRANSLOG_DESCRIPTOR::log_flush_lock", 0},
312 { &key_TRANSLOG_DESCRIPTOR_file_header_lock, "TRANSLOG_DESCRIPTOR::file_header_lock", 0},
313 { &key_TRANSLOG_DESCRIPTOR_unfinished_files_lock, "TRANSLOG_DESCRIPTOR::unfinished_files_lock", 0},
314 { &key_TRANSLOG_DESCRIPTOR_purger_lock, "TRANSLOG_DESCRIPTOR::purger_lock", 0},
315 { &key_SHARE_intern_lock, "SHARE::intern_lock", 0},
316 { &key_SHARE_key_del_lock, "SHARE::key_del_lock", 0},
317 { &key_SHARE_close_lock, "SHARE::close_lock", 0},
318 { &key_SERVICE_THREAD_CONTROL_lock, "SERVICE_THREAD_CONTROL::LOCK_control", 0},
319 { &key_TRN_state_lock, "TRN::state_lock", 0},
320 { &key_PAGECACHE_cache_lock, "PAGECACHE::cache_lock", 0}
321 };
322
323 static PSI_cond_info all_aria_conds[]=
324 {
325 { &key_COND_soft_sync, "COND_soft_sync", PSI_FLAG_GLOBAL},
326 { &key_SHARE_key_del_cond, "SHARE::key_del_cond", 0},
327 { &key_SERVICE_THREAD_CONTROL_cond, "SERVICE_THREAD_CONTROL::COND_control", 0},
328 { &key_SORT_INFO_cond, "SORT_INFO::cond", 0},
329 { &key_SHARE_BITMAP_cond, "BITMAP::bitmap_cond", 0},
330 { &key_TRANSLOG_BUFFER_waiting_filling_buffer, "TRANSLOG_BUFFER::waiting_filling_buffer", 0},
331 { &key_TRANSLOG_BUFFER_prev_sent_to_disk_cond, "TRANSLOG_BUFFER::prev_sent_to_disk_cond", 0},
332 { &key_TRANSLOG_DESCRIPTOR_log_flush_cond, "TRANSLOG_DESCRIPTOR::log_flush_cond", 0},
333 { &key_TRANSLOG_DESCRIPTOR_new_goal_cond, "TRANSLOG_DESCRIPTOR::new_goal_cond", 0}
334 };
335
336 static PSI_rwlock_info all_aria_rwlocks[]=
337 {
338 { &key_KEYINFO_root_lock, "KEYINFO::root_lock", 0},
339 { &key_SHARE_mmap_lock, "SHARE::mmap_lock", 0},
340 { &key_TRANSLOG_DESCRIPTOR_open_files_lock, "TRANSLOG_DESCRIPTOR::open_files_lock", 0}
341 };
342
343 static PSI_thread_info all_aria_threads[]=
344 {
345 { &key_thread_checkpoint, "checkpoint_background", PSI_FLAG_GLOBAL},
346 { &key_thread_soft_sync, "soft_sync_background", PSI_FLAG_GLOBAL},
347 { &key_thread_find_all_keys, "thr_find_all_keys", 0}
348 };
349
350 static PSI_file_info all_aria_files[]=
351 {
352 { &key_file_translog, "translog", 0},
353 { &key_file_kfile, "MAI", 0},
354 { &key_file_dfile, "MAD", 0},
355 { &key_file_control, "control", PSI_FLAG_GLOBAL}
356 };
357
358 # ifdef HAVE_PSI_STAGE_INTERFACE
359 static PSI_stage_info *all_aria_stages[]=
360 {
361 & stage_waiting_for_a_resource
362 };
363 # endif /* HAVE_PSI_STAGE_INTERFACE */
364
init_aria_psi_keys(void)365 static void init_aria_psi_keys(void)
366 {
367 const char* category= "aria";
368 int count;
369
370 count= array_elements(all_aria_mutexes);
371 mysql_mutex_register(category, all_aria_mutexes, count);
372
373 count= array_elements(all_aria_rwlocks);
374 mysql_rwlock_register(category, all_aria_rwlocks, count);
375
376 count= array_elements(all_aria_conds);
377 mysql_cond_register(category, all_aria_conds, count);
378
379 count= array_elements(all_aria_threads);
380 mysql_thread_register(category, all_aria_threads, count);
381
382 count= array_elements(all_aria_files);
383 mysql_file_register(category, all_aria_files, count);
384 # ifdef HAVE_PSI_STAGE_INTERFACE
385 count= array_elements(all_aria_stages);
386 mysql_stage_register(category, all_aria_stages, count);
387 # endif /* HAVE_PSI_STAGE_INTERFACE */
388 }
389 #else
390 #define init_aria_psi_keys() /* no-op */
391 #endif /* HAVE_PSI_INTERFACE */
392
393 /*****************************************************************************
394 ** MARIA tables
395 *****************************************************************************/
396
maria_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)397 static handler *maria_create_handler(handlerton *hton,
398 TABLE_SHARE * table,
399 MEM_ROOT *mem_root)
400 {
401 return new (mem_root) ha_maria(hton, table);
402 }
403
404
405 // collect errors printed by maria_check routines
406
_ma_check_print_msg(HA_CHECK * param,const char * msg_type,const char * fmt,va_list args)407 static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
408 const char *fmt, va_list args)
409 {
410 THD *thd= (THD *) param->thd;
411 Protocol *protocol= thd->protocol;
412 size_t length, msg_length;
413 char msgbuf[MYSQL_ERRMSG_SIZE];
414 char name[NAME_LEN * 2 + 2];
415
416 if (param->testflag & T_SUPPRESS_ERR_HANDLING)
417 return;
418
419 msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
420 msgbuf[sizeof(msgbuf) - 1]= 0; // healthy paranoia
421
422 DBUG_PRINT(msg_type, ("message: %s", msgbuf));
423
424 if (!thd->vio_ok())
425 {
426 sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf);
427 return;
428 }
429
430 if (param->testflag &
431 (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR))
432 {
433 my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME));
434 if (thd->variables.log_warnings > 2)
435 sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf);
436 return;
437 }
438 length= (uint) (strxmov(name, param->db_name, ".", param->table_name,
439 NullS) - name);
440 /*
441 TODO: switch from protocol to push_warning here. The main reason we didn't
442 it yet is parallel repair, which threads have no THD object accessible via
443 current_thd.
444
445 Also we likely need to lock mutex here (in both cases with protocol and
446 push_warning).
447 */
448 protocol->prepare_for_resend();
449 protocol->store(name, (uint)length, system_charset_info);
450 protocol->store(param->op_name, system_charset_info);
451 protocol->store(msg_type, system_charset_info);
452 protocol->store(msgbuf, (uint)msg_length, system_charset_info);
453 if (protocol->write())
454 sql_print_error("Failed on my_net_write, writing to stderr instead: %s.%s: %s\n",
455 param->db_name, param->table_name, msgbuf);
456 else if (thd->variables.log_warnings > 2)
457 sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf);
458
459 return;
460 }
461
462
463 /*
464 Convert TABLE object to Maria key and column definition
465
466 SYNOPSIS
467 table2maria()
468 table_arg in TABLE object.
469 keydef_out out Maria key definition.
470 recinfo_out out Maria column definition.
471 records_out out Number of fields.
472
473 DESCRIPTION
474 This function will allocate and initialize Maria key and column
475 definition for further use in ma_create or for a check for underlying
476 table conformance in merge engine.
477
478 The caller needs to free *recinfo_out after use. Since *recinfo_out
479 and *keydef_out are allocated with a my_multi_malloc, *keydef_out
480 is freed automatically when *recinfo_out is freed.
481
482 RETURN VALUE
483 0 OK
484 # error code
485 */
486
table2maria(TABLE * table_arg,data_file_type row_type,MARIA_KEYDEF ** keydef_out,MARIA_COLUMNDEF ** recinfo_out,uint * records_out,MARIA_CREATE_INFO * create_info)487 static int table2maria(TABLE *table_arg, data_file_type row_type,
488 MARIA_KEYDEF **keydef_out,
489 MARIA_COLUMNDEF **recinfo_out, uint *records_out,
490 MARIA_CREATE_INFO *create_info)
491 {
492 uint i, j, recpos, minpos, fieldpos, temp_length, length;
493 enum ha_base_keytype type= HA_KEYTYPE_BINARY;
494 uchar *record;
495 KEY *pos;
496 MARIA_KEYDEF *keydef;
497 MARIA_COLUMNDEF *recinfo, *recinfo_pos;
498 HA_KEYSEG *keyseg;
499 TABLE_SHARE *share= table_arg->s;
500 uint options= share->db_options_in_use;
501 DBUG_ENTER("table2maria");
502
503 if (row_type == BLOCK_RECORD)
504 options|= HA_OPTION_PACK_RECORD;
505
506 if (!(my_multi_malloc(MYF(MY_WME),
507 recinfo_out, (share->fields * 2 + 2) * sizeof(MARIA_COLUMNDEF),
508 keydef_out, share->keys * sizeof(MARIA_KEYDEF),
509 &keyseg,
510 (share->key_parts + share->keys) * sizeof(HA_KEYSEG),
511 NullS)))
512 DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
513 keydef= *keydef_out;
514 recinfo= *recinfo_out;
515 pos= table_arg->key_info;
516 for (i= 0; i < share->keys; i++, pos++)
517 {
518 keydef[i].flag= (uint16) (pos->flags & (HA_NOSAME | HA_FULLTEXT |
519 HA_SPATIAL));
520 keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ?
521 (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) :
522 pos->algorithm;
523 keydef[i].block_length= pos->block_size;
524 keydef[i].seg= keyseg;
525 keydef[i].keysegs= pos->user_defined_key_parts;
526 for (j= 0; j < pos->user_defined_key_parts; j++)
527 {
528 Field *field= pos->key_part[j].field;
529
530 if (!table_arg->field[field->field_index]->stored_in_db())
531 {
532 my_free(*recinfo_out);
533 my_error(ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN, MYF(0));
534 DBUG_RETURN(HA_ERR_UNSUPPORTED);
535 }
536
537 type= field->key_type();
538 keydef[i].seg[j].flag= pos->key_part[j].key_part_flag;
539
540 if (options & HA_OPTION_PACK_KEYS ||
541 (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY |
542 HA_SPACE_PACK_USED)))
543 {
544 if (pos->key_part[j].length > 8 &&
545 (type == HA_KEYTYPE_TEXT ||
546 type == HA_KEYTYPE_NUM ||
547 (type == HA_KEYTYPE_BINARY && !field->zero_pack())))
548 {
549 /* No blobs here */
550 if (j == 0)
551 keydef[i].flag|= HA_PACK_KEY;
552 if (!(field->flags & ZEROFILL_FLAG) &&
553 (field->type() == MYSQL_TYPE_STRING ||
554 field->type() == MYSQL_TYPE_VAR_STRING ||
555 ((int) (pos->key_part[j].length - field->decimals())) >= 4))
556 keydef[i].seg[j].flag|= HA_SPACE_PACK;
557 }
558 else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16))
559 keydef[i].flag|= HA_BINARY_PACK_KEY;
560 }
561 keydef[i].seg[j].type= (int) type;
562 keydef[i].seg[j].start= pos->key_part[j].offset;
563 keydef[i].seg[j].length= pos->key_part[j].length;
564 keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_length= 0;
565 keydef[i].seg[j].bit_pos= 0;
566 keydef[i].seg[j].language= field->charset()->number;
567
568 if (field->null_ptr)
569 {
570 keydef[i].seg[j].null_bit= field->null_bit;
571 keydef[i].seg[j].null_pos= (uint) (field->null_ptr-
572 (uchar*) table_arg->record[0]);
573 }
574 else
575 {
576 keydef[i].seg[j].null_bit= 0;
577 keydef[i].seg[j].null_pos= 0;
578 }
579 if (field->type() == MYSQL_TYPE_BLOB ||
580 field->type() == MYSQL_TYPE_GEOMETRY)
581 {
582 keydef[i].seg[j].flag|= HA_BLOB_PART;
583 /* save number of bytes used to pack length */
584 keydef[i].seg[j].bit_start= (uint) (field->pack_length() -
585 portable_sizeof_char_ptr);
586 }
587 else if (field->type() == MYSQL_TYPE_BIT)
588 {
589 keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len;
590 keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs;
591 keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
592 (uchar*) table_arg->record[0]);
593 }
594 }
595 keyseg+= pos->user_defined_key_parts;
596 }
597 if (table_arg->found_next_number_field)
598 keydef[share->next_number_index].flag|= HA_AUTO_KEY;
599 record= table_arg->record[0];
600 recpos= 0;
601 recinfo_pos= recinfo;
602 create_info->null_bytes= table_arg->s->null_bytes;
603
604 while (recpos < (uint) share->stored_rec_length)
605 {
606 Field **field, *found= 0;
607 minpos= share->reclength;
608 length= 0;
609
610 for (field= table_arg->field; *field; field++)
611 {
612 if ((fieldpos= (*field)->offset(record)) >= recpos &&
613 fieldpos <= minpos)
614 {
615 /* skip null fields */
616 if (!(temp_length= (*field)->pack_length_in_rec()))
617 continue; /* Skip null-fields */
618 if (! found || fieldpos < minpos ||
619 (fieldpos == minpos && temp_length < length))
620 {
621 minpos= fieldpos;
622 found= *field;
623 length= temp_length;
624 }
625 }
626 }
627 DBUG_PRINT("loop", ("found: %p recpos: %d minpos: %d length: %d",
628 found, recpos, minpos, length));
629 if (!found)
630 break;
631
632 if (found->flags & BLOB_FLAG)
633 recinfo_pos->type= FIELD_BLOB;
634 else if (found->type() == MYSQL_TYPE_TIMESTAMP)
635 recinfo_pos->type= FIELD_NORMAL;
636 else if (found->type() == MYSQL_TYPE_VARCHAR)
637 recinfo_pos->type= FIELD_VARCHAR;
638 else if (!(options & HA_OPTION_PACK_RECORD) ||
639 (found->zero_pack() && (found->flags & PRI_KEY_FLAG)))
640 recinfo_pos->type= FIELD_NORMAL;
641 else if (found->zero_pack())
642 recinfo_pos->type= FIELD_SKIP_ZERO;
643 else
644 recinfo_pos->type= ((length <= 3 ||
645 (found->flags & ZEROFILL_FLAG)) ?
646 FIELD_NORMAL :
647 found->type() == MYSQL_TYPE_STRING ||
648 found->type() == MYSQL_TYPE_VAR_STRING ?
649 FIELD_SKIP_ENDSPACE :
650 FIELD_SKIP_PRESPACE);
651 if (found->null_ptr)
652 {
653 recinfo_pos->null_bit= found->null_bit;
654 recinfo_pos->null_pos= (uint) (found->null_ptr -
655 (uchar*) table_arg->record[0]);
656 }
657 else
658 {
659 recinfo_pos->null_bit= 0;
660 recinfo_pos->null_pos= 0;
661 }
662 (recinfo_pos++)->length= (uint16) length;
663 recpos= minpos + length;
664 DBUG_PRINT("loop", ("length: %d type: %d",
665 recinfo_pos[-1].length,recinfo_pos[-1].type));
666 }
667 *records_out= (uint) (recinfo_pos - recinfo);
668 DBUG_RETURN(0);
669 }
670
671
672 /*
673 Check for underlying table conformance
674
675 SYNOPSIS
676 maria_check_definition()
677 t1_keyinfo in First table key definition
678 t1_recinfo in First table record definition
679 t1_keys in Number of keys in first table
680 t1_recs in Number of records in first table
681 t2_keyinfo in Second table key definition
682 t2_recinfo in Second table record definition
683 t2_keys in Number of keys in second table
684 t2_recs in Number of records in second table
685 strict in Strict check switch
686
687 DESCRIPTION
688 This function compares two Maria definitions. By intention it was done
689 to compare merge table definition against underlying table definition.
690 It may also be used to compare dot-frm and MAI definitions of Maria
691 table as well to compare different Maria table definitions.
692
693 For merge table it is not required that number of keys in merge table
694 must exactly match number of keys in underlying table. When calling this
695 function for underlying table conformance check, 'strict' flag must be
696 set to false, and converted merge definition must be passed as t1_*.
697
698 Otherwise 'strict' flag must be set to 1 and it is not required to pass
699 converted dot-frm definition as t1_*.
700
701 RETURN VALUE
702 0 - Equal definitions.
703 1 - Different definitions.
704
705 TODO
706 - compare FULLTEXT keys;
707 - compare SPATIAL keys;
708 - compare FIELD_SKIP_ZERO which is converted to FIELD_NORMAL correctly
709 (should be correctly detected in table2maria).
710
711 FIXME:
712 maria_check_definition() is never used! CHECK TABLE does not detect the
713 corruption! Do maria_check_definition() like check_definition() is done
714 by MyISAM (related to MDEV-25803).
715 */
716
maria_check_definition(MARIA_KEYDEF * t1_keyinfo,MARIA_COLUMNDEF * t1_recinfo,uint t1_keys,uint t1_recs,MARIA_KEYDEF * t2_keyinfo,MARIA_COLUMNDEF * t2_recinfo,uint t2_keys,uint t2_recs,bool strict)717 int maria_check_definition(MARIA_KEYDEF *t1_keyinfo,
718 MARIA_COLUMNDEF *t1_recinfo,
719 uint t1_keys, uint t1_recs,
720 MARIA_KEYDEF *t2_keyinfo,
721 MARIA_COLUMNDEF *t2_recinfo,
722 uint t2_keys, uint t2_recs, bool strict)
723 {
724 uint i, j;
725 DBUG_ENTER("maria_check_definition");
726 if ((strict ? t1_keys != t2_keys : t1_keys > t2_keys))
727 {
728 DBUG_PRINT("error", ("Number of keys differs: t1_keys=%u, t2_keys=%u",
729 t1_keys, t2_keys));
730 DBUG_RETURN(1);
731 }
732 if (t1_recs != t2_recs)
733 {
734 DBUG_PRINT("error", ("Number of recs differs: t1_recs=%u, t2_recs=%u",
735 t1_recs, t2_recs));
736 DBUG_RETURN(1);
737 }
738 for (i= 0; i < t1_keys; i++)
739 {
740 HA_KEYSEG *t1_keysegs= t1_keyinfo[i].seg;
741 HA_KEYSEG *t2_keysegs= t2_keyinfo[i].seg;
742 if (t1_keyinfo[i].flag & HA_FULLTEXT && t2_keyinfo[i].flag & HA_FULLTEXT)
743 continue;
744 else if (t1_keyinfo[i].flag & HA_FULLTEXT ||
745 t2_keyinfo[i].flag & HA_FULLTEXT)
746 {
747 DBUG_PRINT("error", ("Key %d has different definition", i));
748 DBUG_PRINT("error", ("t1_fulltext= %d, t2_fulltext=%d",
749 MY_TEST(t1_keyinfo[i].flag & HA_FULLTEXT),
750 MY_TEST(t2_keyinfo[i].flag & HA_FULLTEXT)));
751 DBUG_RETURN(1);
752 }
753 if (t1_keyinfo[i].flag & HA_SPATIAL && t2_keyinfo[i].flag & HA_SPATIAL)
754 continue;
755 else if (t1_keyinfo[i].flag & HA_SPATIAL ||
756 t2_keyinfo[i].flag & HA_SPATIAL)
757 {
758 DBUG_PRINT("error", ("Key %d has different definition", i));
759 DBUG_PRINT("error", ("t1_spatial= %d, t2_spatial=%d",
760 MY_TEST(t1_keyinfo[i].flag & HA_SPATIAL),
761 MY_TEST(t2_keyinfo[i].flag & HA_SPATIAL)));
762 DBUG_RETURN(1);
763 }
764 if (t1_keyinfo[i].keysegs != t2_keyinfo[i].keysegs ||
765 t1_keyinfo[i].key_alg != t2_keyinfo[i].key_alg)
766 {
767 DBUG_PRINT("error", ("Key %d has different definition", i));
768 DBUG_PRINT("error", ("t1_keysegs=%d, t1_key_alg=%d",
769 t1_keyinfo[i].keysegs, t1_keyinfo[i].key_alg));
770 DBUG_PRINT("error", ("t2_keysegs=%d, t2_key_alg=%d",
771 t2_keyinfo[i].keysegs, t2_keyinfo[i].key_alg));
772 DBUG_RETURN(1);
773 }
774 for (j= t1_keyinfo[i].keysegs; j--;)
775 {
776 uint8 t1_keysegs_j__type= t1_keysegs[j].type;
777 /*
778 Table migration from 4.1 to 5.1. In 5.1 a *TEXT key part is
779 always HA_KEYTYPE_VARTEXT2. In 4.1 we had only the equivalent of
780 HA_KEYTYPE_VARTEXT1. Since we treat both the same on MyISAM
781 level, we can ignore a mismatch between these types.
782 */
783 if ((t1_keysegs[j].flag & HA_BLOB_PART) &&
784 (t2_keysegs[j].flag & HA_BLOB_PART))
785 {
786 if ((t1_keysegs_j__type == HA_KEYTYPE_VARTEXT2) &&
787 (t2_keysegs[j].type == HA_KEYTYPE_VARTEXT1))
788 t1_keysegs_j__type= HA_KEYTYPE_VARTEXT1; /* purecov: tested */
789 else if ((t1_keysegs_j__type == HA_KEYTYPE_VARBINARY2) &&
790 (t2_keysegs[j].type == HA_KEYTYPE_VARBINARY1))
791 t1_keysegs_j__type= HA_KEYTYPE_VARBINARY1; /* purecov: inspected */
792 }
793
794 if (t1_keysegs_j__type != t2_keysegs[j].type ||
795 t1_keysegs[j].language != t2_keysegs[j].language ||
796 t1_keysegs[j].null_bit != t2_keysegs[j].null_bit ||
797 t1_keysegs[j].length != t2_keysegs[j].length)
798 {
799 DBUG_PRINT("error", ("Key segment %d (key %d) has different "
800 "definition", j, i));
801 DBUG_PRINT("error", ("t1_type=%d, t1_language=%d, t1_null_bit=%d, "
802 "t1_length=%d",
803 t1_keysegs[j].type, t1_keysegs[j].language,
804 t1_keysegs[j].null_bit, t1_keysegs[j].length));
805 DBUG_PRINT("error", ("t2_type=%d, t2_language=%d, t2_null_bit=%d, "
806 "t2_length=%d",
807 t2_keysegs[j].type, t2_keysegs[j].language,
808 t2_keysegs[j].null_bit, t2_keysegs[j].length));
809
810 DBUG_RETURN(1);
811 }
812 }
813 }
814
815 for (i= 0; i < t1_recs; i++)
816 {
817 MARIA_COLUMNDEF *t1_rec= &t1_recinfo[i];
818 MARIA_COLUMNDEF *t2_rec= &t2_recinfo[i];
819 /*
820 FIELD_SKIP_ZERO can be changed to FIELD_NORMAL in maria_create,
821 see NOTE1 in ma_create.c
822 */
823 if ((t1_rec->type != t2_rec->type &&
824 !(t1_rec->type == (int) FIELD_SKIP_ZERO &&
825 t1_rec->length == 1 &&
826 t2_rec->type == (int) FIELD_NORMAL)) ||
827 t1_rec->length != t2_rec->length ||
828 t1_rec->null_bit != t2_rec->null_bit)
829 {
830 DBUG_PRINT("error", ("Field %d has different definition", i));
831 DBUG_PRINT("error", ("t1_type=%d, t1_length=%d, t1_null_bit=%d",
832 t1_rec->type, t1_rec->length, t1_rec->null_bit));
833 DBUG_PRINT("error", ("t2_type=%d, t2_length=%d, t2_null_bit=%d",
834 t2_rec->type, t2_rec->length, t2_rec->null_bit));
835 DBUG_RETURN(1);
836 }
837 }
838 DBUG_RETURN(0);
839 }
840
841
842 extern "C" {
843
_ma_killed_ptr(HA_CHECK * param)844 int _ma_killed_ptr(HA_CHECK *param)
845 {
846 if (likely(thd_killed((THD*)param->thd)) == 0)
847 return 0;
848 my_errno= HA_ERR_ABORTED_BY_USER;
849 return 1;
850 }
851
852
853 /*
854 Report progress to mysqld
855
856 This is a bit more complex than what a normal progress report
857 function normally is.
858
859 The reason is that this is called by enable_index/repair which
860 is one stage in ALTER TABLE and we can't use the external
861 stage/max_stage for this.
862
863 thd_progress_init/thd_progress_next_stage is to be called by
864 high level commands like CHECK TABLE or REPAIR TABLE, not
865 by sub commands like enable_index().
866
867 In ma_check.c it's easier to work with stages than with a total
868 progress, so we use internal stage/max_stage here to keep the
869 code simple.
870 */
871
_ma_report_progress(HA_CHECK * param,ulonglong progress,ulonglong max_progress)872 void _ma_report_progress(HA_CHECK *param, ulonglong progress,
873 ulonglong max_progress)
874 {
875 thd_progress_report((THD*)param->thd,
876 progress + max_progress * param->stage,
877 max_progress * param->max_stage);
878 }
879
880
_ma_check_print_error(HA_CHECK * param,const char * fmt,...)881 void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
882 {
883 va_list args;
884 DBUG_ENTER("_ma_check_print_error");
885 param->error_printed |= 1;
886 param->out_flag |= O_DATA_LOST;
887 if (param->testflag & T_SUPPRESS_ERR_HANDLING)
888 DBUG_VOID_RETURN;
889 va_start(args, fmt);
890 _ma_check_print_msg(param, "error", fmt, args);
891 va_end(args);
892 DBUG_VOID_RETURN;
893 }
894
895
_ma_check_print_info(HA_CHECK * param,const char * fmt,...)896 void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...)
897 {
898 va_list args;
899 DBUG_ENTER("_ma_check_print_info");
900 va_start(args, fmt);
901 _ma_check_print_msg(param, "info", fmt, args);
902 va_end(args);
903 DBUG_VOID_RETURN;
904 }
905
906
_ma_check_print_warning(HA_CHECK * param,const char * fmt,...)907 void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
908 {
909 va_list args;
910 DBUG_ENTER("_ma_check_print_warning");
911 param->warning_printed= 1;
912 param->out_flag |= O_DATA_LOST;
913 va_start(args, fmt);
914 _ma_check_print_msg(param, "warning", fmt, args);
915 va_end(args);
916 DBUG_VOID_RETURN;
917 }
918
919 /*
920 Create a transaction object
921
922 SYNOPSIS
923 info Maria handler
924
925 RETURN
926 0 ok
927 # Error number (HA_ERR_OUT_OF_MEM)
928 */
929
maria_create_trn_for_mysql(MARIA_HA * info)930 static int maria_create_trn_for_mysql(MARIA_HA *info)
931 {
932 THD *thd= ((TABLE*) info->external_ref)->in_use;
933 TRN *trn= THD_TRN;
934 DBUG_ENTER("maria_create_trn_for_mysql");
935
936 if (!trn) /* no transaction yet - open it now */
937 {
938 trn= trnman_new_trn(& thd->transaction.wt);
939 if (unlikely(!trn))
940 DBUG_RETURN(HA_ERR_OUT_OF_MEM);
941 THD_TRN= trn;
942 if (thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
943 trans_register_ha(thd, TRUE, maria_hton);
944 }
945 _ma_set_trn_for_table(info, trn);
946 if (!trnman_increment_locked_tables(trn))
947 {
948 trans_register_ha(thd, FALSE, maria_hton);
949 trnman_new_statement(trn);
950 }
951 #ifdef EXTRA_DEBUG
952 if (info->lock_type == F_WRLCK &&
953 ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
954 {
955 trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED |
956 TRN_STATE_TABLES_CAN_CHANGE);
957 (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
958 (uchar*) thd->query(),
959 thd->query_length());
960 }
961 else
962 {
963 DBUG_PRINT("info", ("lock_type: %d trnman_flags: %u",
964 info->lock_type, trnman_get_flags(trn)));
965 }
966
967 #endif
968 DBUG_RETURN(0);
969 }
970
ma_killed_in_mariadb(MARIA_HA * info)971 my_bool ma_killed_in_mariadb(MARIA_HA *info)
972 {
973 return (((TABLE*) (info->external_ref))->in_use->killed != 0);
974 }
975
976 } /* extern "C" */
977
978 /**
979 Transactional table doing bulk insert with one single UNDO
980 (UNDO_BULK_INSERT) and with repair.
981 */
982 #define BULK_INSERT_SINGLE_UNDO_AND_REPAIR 1
983 /**
984 Transactional table doing bulk insert with one single UNDO
985 (UNDO_BULK_INSERT) and without repair.
986 */
987 #define BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR 2
988 /**
989 None of BULK_INSERT_SINGLE_UNDO_AND_REPAIR and
990 BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR.
991 */
992 #define BULK_INSERT_NONE 0
993
ha_maria(handlerton * hton,TABLE_SHARE * table_arg)994 ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg):
995 handler(hton, table_arg), file(0),
996 int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
997 HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
998 HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
999 HA_FILE_BASED | HA_CAN_GEOMETRY | CANNOT_ROLLBACK_FLAG |
1000 HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_CAN_REPAIR |
1001 HA_CAN_VIRTUAL_COLUMNS | HA_CAN_EXPORT |
1002 HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT |
1003 HA_CAN_TABLES_WITHOUT_ROLLBACK),
1004 can_enable_indexes(1), bulk_insert_single_undo(BULK_INSERT_NONE)
1005 {}
1006
1007
clone(const char * name,MEM_ROOT * mem_root)1008 handler *ha_maria::clone(const char *name __attribute__((unused)),
1009 MEM_ROOT *mem_root)
1010 {
1011 ha_maria *new_handler=
1012 static_cast <ha_maria *>(handler::clone(file->s->open_file_name.str,
1013 mem_root));
1014 if (new_handler)
1015 {
1016 new_handler->file->state= file->state;
1017 /* maria_create_trn_for_mysql() is never called for clone() tables */
1018 new_handler->file->trn= file->trn;
1019 }
1020 return new_handler;
1021 }
1022
1023
1024 static const char *ha_maria_exts[]=
1025 {
1026 MARIA_NAME_IEXT,
1027 MARIA_NAME_DEXT,
1028 NullS
1029 };
1030
1031
index_type(uint key_number)1032 const char *ha_maria::index_type(uint key_number)
1033 {
1034 return ((table->key_info[key_number].flags & HA_FULLTEXT) ?
1035 "FULLTEXT" :
1036 (table->key_info[key_number].flags & HA_SPATIAL) ?
1037 "SPATIAL" :
1038 (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ?
1039 "RTREE" : "BTREE");
1040 }
1041
1042
index_flags(uint inx,uint part,bool all_parts) const1043 ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const
1044 {
1045 ulong flags;
1046 if (table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT)
1047 flags= 0;
1048 else
1049 if ((table_share->key_info[inx].flags & HA_SPATIAL ||
1050 table_share->key_info[inx].algorithm == HA_KEY_ALG_RTREE))
1051 {
1052 /* All GIS scans are non-ROR scans. We also disable IndexConditionPushdown */
1053 flags= HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
1054 HA_READ_ORDER | HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
1055 }
1056 else
1057 {
1058 flags= HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
1059 HA_READ_ORDER | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN;
1060 }
1061 return flags;
1062 }
1063
1064
scan_time()1065 double ha_maria::scan_time()
1066 {
1067 if (file->s->data_file_type == BLOCK_RECORD)
1068 return ulonglong2double(stats.data_file_length - file->s->block_size) / MY_MAX(file->s->block_size / 2, IO_SIZE) + 2;
1069 return handler::scan_time();
1070 }
1071
1072 /*
1073 We need to be able to store at least 2 keys on an index page as the
1074 splitting algorithms depends on this. (With only one key on a page
1075 we also can't use any compression, which may make the index file much
1076 larger)
1077 We use HA_MAX_KEY_LENGTH as this is a stack restriction imposed by the
1078 handler interface. If we want to increase this, we have also to
1079 increase HA_MARIA_KEY_BUFF and MARIA_MAX_KEY_BUFF as the buffer needs
1080 to take be able to store the extra lenght bytes that is part of the stored
1081 key.
1082
1083 We also need to reserve place for a record pointer (8) and 3 bytes
1084 per key segment to store the length of the segment + possible null bytes.
1085 These extra bytes are required here so that maria_create() will surely
1086 accept any keys created which the returned key data storage length.
1087 */
1088
max_supported_key_length() const1089 uint ha_maria::max_supported_key_length() const
1090 {
1091 return maria_max_key_length();
1092 }
1093
1094 /* Name is here without an extension */
1095
open(const char * name,int mode,uint test_if_locked)1096 int ha_maria::open(const char *name, int mode, uint test_if_locked)
1097 {
1098 uint i;
1099
1100 #ifdef NOT_USED
1101 /*
1102 If the user wants to have memory mapped data files, add an
1103 open_flag. Do not memory map temporary tables because they are
1104 expected to be inserted and thus extended a lot. Memory mapping is
1105 efficient for files that keep their size, but very inefficient for
1106 growing files. Using an open_flag instead of calling ma_extra(...
1107 HA_EXTRA_MMAP ...) after maxs_open() has the advantage that the
1108 mapping is not repeated for every open, but just done on the initial
1109 open, when the MyISAM share is created. Every time the server
1110 requires to open a new instance of a table it calls this method. We
1111 will always supply HA_OPEN_MMAP for a permanent table. However, the
1112 Maria storage engine will ignore this flag if this is a secondary
1113 open of a table that is in use by other threads already (if the
1114 Maria share exists already).
1115 */
1116 if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap)
1117 test_if_locked|= HA_OPEN_MMAP;
1118 #endif
1119
1120 if (maria_recover_options & HA_RECOVER_ANY)
1121 {
1122 /* user asked to trigger a repair if table was not properly closed */
1123 test_if_locked|= HA_OPEN_ABORT_IF_CRASHED;
1124 }
1125
1126 if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER)))
1127 {
1128 if (my_errno == HA_ERR_OLD_FILE)
1129 {
1130 push_warning(current_thd, Sql_condition::WARN_LEVEL_NOTE,
1131 ER_CRASHED_ON_USAGE,
1132 zerofill_error_msg);
1133 }
1134 return (my_errno ? my_errno : -1);
1135 }
1136
1137 file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref;
1138 /* Set external_ref, mainly for temporary tables */
1139 file->external_ref= (void*) table; // For ma_killed()
1140
1141 if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
1142 maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0);
1143
1144 info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
1145 if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED))
1146 maria_extra(file, HA_EXTRA_WAIT_LOCK, 0);
1147 if ((data_file_type= file->s->data_file_type) != STATIC_RECORD)
1148 int_table_flags |= HA_REC_NOT_IN_SEQ;
1149 if (!file->s->base.born_transactional)
1150 {
1151 /*
1152 INSERT DELAYED cannot work with transactional tables (because it cannot
1153 stand up to "when client gets ok the data is safe on disk": the record
1154 may not even be inserted). In the future, we could enable it back (as a
1155 client doing INSERT DELAYED knows the specificities; but we then should
1156 make sure to regularly commit in the delayed_insert thread).
1157 */
1158 int_table_flags|= HA_CAN_INSERT_DELAYED;
1159 }
1160 if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
1161 int_table_flags |= HA_HAS_NEW_CHECKSUM;
1162
1163 /*
1164 For static size rows, tell MariaDB that we will access all bytes
1165 in the record when writing it. This signals MariaDB to initialize
1166 the full row to ensure we don't get any errors from valgrind and
1167 that all bytes in the row is properly reset.
1168 */
1169 if (file->s->data_file_type == STATIC_RECORD &&
1170 (file->s->has_varchar_fields || file->s->has_null_fields))
1171 int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE;
1172
1173 for (i= 0; i < table->s->keys; i++)
1174 {
1175 plugin_ref parser= table->key_info[i].parser;
1176 if (table->key_info[i].flags & HA_USES_PARSER)
1177 file->s->keyinfo[i].parser=
1178 (struct st_mysql_ftparser *)plugin_decl(parser)->info;
1179 table->key_info[i].block_size= file->s->keyinfo[i].block_length;
1180 }
1181 my_errno= 0;
1182
1183 /* Count statistics of usage for newly open normal files */
1184 if (file->s->reopen == 1 && ! (test_if_locked & HA_OPEN_TMP_TABLE))
1185 {
1186 if (file->s->delay_key_write)
1187 feature_files_opened_with_delayed_keys++;
1188 }
1189
1190 return my_errno;
1191 }
1192
1193
close(void)1194 int ha_maria::close(void)
1195 {
1196 MARIA_HA *tmp= file;
1197 if (!tmp)
1198 return 0;
1199 file= 0;
1200 return maria_close(tmp);
1201 }
1202
1203
write_row(uchar * buf)1204 int ha_maria::write_row(uchar * buf)
1205 {
1206 /*
1207 If we have an auto_increment column and we are writing a changed row
1208 or a new row, then update the auto_increment value in the record.
1209 */
1210 if (table->next_number_field && buf == table->record[0])
1211 {
1212 int error;
1213 if ((error= update_auto_increment()))
1214 return error;
1215 }
1216 return maria_write(file, buf);
1217 }
1218
1219
check(THD * thd,HA_CHECK_OPT * check_opt)1220 int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
1221 {
1222 int error;
1223 HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1224 MARIA_SHARE *share= file->s;
1225 const char *old_proc_info;
1226 TRN *old_trn= file->trn;
1227
1228 if (!file || !param) return HA_ADMIN_INTERNAL_ERROR;
1229
1230 unmap_file(file);
1231 maria_chk_init(param);
1232 param->thd= thd;
1233 param->op_name= "check";
1234 param->db_name= table->s->db.str;
1235 param->table_name= table->alias.c_ptr();
1236 param->testflag= check_opt->flags | T_CHECK | T_SILENT;
1237 param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1238
1239 if (!(table->db_stat & HA_READ_ONLY))
1240 param->testflag |= T_STATISTICS;
1241 param->using_global_keycache= 1;
1242
1243 if (!maria_is_crashed(file) &&
1244 (((param->testflag & T_CHECK_ONLY_CHANGED) &&
1245 !(share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS |
1246 STATE_IN_REPAIR)) &&
1247 share->state.open_count == 0) ||
1248 ((param->testflag & T_FAST) && (share->state.open_count ==
1249 (uint) (share->global_changed ? 1 :
1250 0)))))
1251 return HA_ADMIN_ALREADY_DONE;
1252
1253 maria_chk_init_for_check(param, file);
1254
1255 if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
1256 STATE_MOVED)
1257 {
1258 _ma_check_print_error(param, "%s", zerofill_error_msg);
1259 return HA_ADMIN_CORRUPT;
1260 }
1261
1262 old_proc_info= thd_proc_info(thd, "Checking status");
1263 thd_progress_init(thd, 3);
1264 error= maria_chk_status(param, file); // Not fatal
1265 /* maria_chk_size() will flush the page cache for this file */
1266 if (maria_chk_size(param, file))
1267 error= 1;
1268 if (!error)
1269 error|= maria_chk_del(param, file, param->testflag);
1270 thd_proc_info(thd, "Checking keys");
1271 thd_progress_next_stage(thd);
1272 if (!error)
1273 error= maria_chk_key(param, file);
1274 thd_proc_info(thd, "Checking data");
1275 thd_progress_next_stage(thd);
1276 if (!error)
1277 {
1278 if ((!(param->testflag & T_QUICK) &&
1279 ((share->options &
1280 (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ||
1281 (param->testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file))
1282 {
1283 ulonglong old_testflag= param->testflag;
1284 param->testflag |= T_MEDIUM;
1285 if (!(error= init_io_cache(¶m->read_cache, file->dfile.file,
1286 my_default_record_cache_size, READ_CACHE,
1287 share->pack.header_length, 1, MYF(MY_WME))))
1288 {
1289 error= maria_chk_data_link(param, file,
1290 MY_TEST(param->testflag & T_EXTEND));
1291 end_io_cache(¶m->read_cache);
1292 }
1293 param->testflag= old_testflag;
1294 }
1295 }
1296 if (!error)
1297 {
1298 if ((share->state.changed & (STATE_CHANGED |
1299 STATE_CRASHED_FLAGS |
1300 STATE_IN_REPAIR | STATE_NOT_ANALYZED)) ||
1301 (param->testflag & T_STATISTICS) || maria_is_crashed(file))
1302 {
1303 file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1304 mysql_mutex_lock(&share->intern_lock);
1305 DBUG_PRINT("info", ("Reseting crashed state"));
1306 share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
1307 STATE_IN_REPAIR);
1308 if (!(table->db_stat & HA_READ_ONLY))
1309 error= maria_update_state_info(param, file,
1310 UPDATE_TIME | UPDATE_OPEN_COUNT |
1311 UPDATE_STAT);
1312 mysql_mutex_unlock(&share->intern_lock);
1313 info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
1314 HA_STATUS_CONST);
1315 }
1316 }
1317 else if (!maria_is_crashed(file) && !thd->killed)
1318 {
1319 maria_mark_crashed(file);
1320 file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1321 }
1322
1323 /* Reset trn, that may have been set by repair */
1324 if (old_trn && old_trn != file->trn)
1325 _ma_set_trn_for_table(file, old_trn);
1326 thd_proc_info(thd, old_proc_info);
1327 thd_progress_end(thd);
1328 return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
1329 }
1330
1331
1332 /*
1333 Analyze the key distribution in the table
1334 As the table may be only locked for read, we have to take into account that
1335 two threads may do an analyze at the same time!
1336 */
1337
analyze(THD * thd,HA_CHECK_OPT * check_opt)1338 int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
1339 {
1340 int error= 0;
1341 HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1342 MARIA_SHARE *share= file->s;
1343 const char *old_proc_info;
1344
1345 if (!param)
1346 return HA_ADMIN_INTERNAL_ERROR;
1347
1348 maria_chk_init(param);
1349 param->thd= thd;
1350 param->op_name= "analyze";
1351 param->db_name= table->s->db.str;
1352 param->table_name= table->alias.c_ptr();
1353 param->testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
1354 T_DONT_CHECK_CHECKSUM);
1355 param->using_global_keycache= 1;
1356 param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1357
1358 if (!(share->state.changed & STATE_NOT_ANALYZED))
1359 return HA_ADMIN_ALREADY_DONE;
1360
1361 old_proc_info= thd_proc_info(thd, "Scanning");
1362 thd_progress_init(thd, 1);
1363 error= maria_chk_key(param, file);
1364 if (!error)
1365 {
1366 mysql_mutex_lock(&share->intern_lock);
1367 error= maria_update_state_info(param, file, UPDATE_STAT);
1368 mysql_mutex_unlock(&share->intern_lock);
1369 }
1370 else if (!maria_is_crashed(file) && !thd->killed)
1371 maria_mark_crashed(file);
1372 thd_proc_info(thd, old_proc_info);
1373 thd_progress_end(thd);
1374 return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
1375 }
1376
repair(THD * thd,HA_CHECK_OPT * check_opt)1377 int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
1378 {
1379 int error;
1380 HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1381 ha_rows start_records;
1382 const char *old_proc_info;
1383
1384 if (!file || !param)
1385 return HA_ADMIN_INTERNAL_ERROR;
1386
1387 maria_chk_init(param);
1388 param->thd= thd;
1389 param->op_name= "repair";
1390 param->testflag= ((check_opt->flags & ~(T_EXTEND)) |
1391 T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
1392 (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
1393 param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
1394 param->backup_time= check_opt->start_time;
1395 start_records= file->state->records;
1396 old_proc_info= thd_proc_info(thd, "Checking table");
1397 thd_progress_init(thd, 1);
1398 while ((error= repair(thd, param, 0)) && param->retry_repair)
1399 {
1400 param->retry_repair= 0;
1401 file->state->records= start_records;
1402 if (test_all_bits(param->testflag,
1403 (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK)))
1404 {
1405 param->testflag&= ~(T_RETRY_WITHOUT_QUICK | T_QUICK);
1406 /* Ensure we don't loose any rows when retrying without quick */
1407 param->testflag|= T_SAFE_REPAIR;
1408 if (thd->vio_ok())
1409 _ma_check_print_info(param, "Retrying repair without quick");
1410 else
1411 sql_print_information("Retrying repair of: '%s' without quick",
1412 table->s->path.str);
1413 continue;
1414 }
1415 param->testflag &= ~T_QUICK;
1416 if (param->testflag & T_REP_BY_SORT)
1417 {
1418 param->testflag= (param->testflag & ~T_REP_BY_SORT) | T_REP;
1419 if (thd->vio_ok())
1420 _ma_check_print_info(param, "Retrying repair with keycache");
1421 sql_print_information("Retrying repair of: '%s' with keycache",
1422 table->s->path.str);
1423 continue;
1424 }
1425 break;
1426 }
1427 if (!error && start_records != file->state->records &&
1428 !(check_opt->flags & T_VERY_SILENT))
1429 {
1430 char llbuff[22], llbuff2[22];
1431 sql_print_information("Found %s of %s rows when repairing '%s'",
1432 llstr(file->state->records, llbuff),
1433 llstr(start_records, llbuff2),
1434 table->s->path.str);
1435 }
1436 thd_proc_info(thd, old_proc_info);
1437 thd_progress_end(thd);
1438 return error;
1439 }
1440
zerofill(THD * thd,HA_CHECK_OPT * check_opt)1441 int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt)
1442 {
1443 int error;
1444 HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1445 TRN *old_trn;
1446 MARIA_SHARE *share= file->s;
1447
1448 if (!file || !param)
1449 return HA_ADMIN_INTERNAL_ERROR;
1450
1451 unmap_file(file);
1452 old_trn= file->trn;
1453 maria_chk_init(param);
1454 param->thd= thd;
1455 param->op_name= "zerofill";
1456 param->testflag= check_opt->flags | T_SILENT | T_ZEROFILL;
1457 param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
1458 error=maria_zerofill(param, file, share->open_file_name.str);
1459
1460 /* Reset trn, that may have been set by repair */
1461 if (old_trn && old_trn != file->trn)
1462 _ma_set_trn_for_table(file, old_trn);
1463
1464 if (!error)
1465 {
1466 TrID create_trid= trnman_get_min_safe_trid();
1467 mysql_mutex_lock(&share->intern_lock);
1468 share->state.changed|= STATE_NOT_MOVABLE;
1469 maria_update_state_info(param, file, UPDATE_TIME | UPDATE_OPEN_COUNT);
1470 _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE, create_trid,
1471 TRUE, TRUE);
1472 mysql_mutex_unlock(&share->intern_lock);
1473 }
1474 return error;
1475 }
1476
optimize(THD * thd,HA_CHECK_OPT * check_opt)1477 int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
1478 {
1479 int error;
1480 HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1481
1482 if (!file || !param)
1483 return HA_ADMIN_INTERNAL_ERROR;
1484
1485 maria_chk_init(param);
1486 param->thd= thd;
1487 param->op_name= "optimize";
1488 param->testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
1489 T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
1490 param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
1491 thd_progress_init(thd, 1);
1492 if ((error= repair(thd, param, 1)) && param->retry_repair)
1493 {
1494 sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
1495 my_errno, param->db_name, param->table_name);
1496 param->testflag &= ~T_REP_BY_SORT;
1497 error= repair(thd, param, 0);
1498 }
1499 thd_progress_end(thd);
1500 return error;
1501 }
1502
1503
repair(THD * thd,HA_CHECK * param,bool do_optimize)1504 int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
1505 {
1506 int error= 0;
1507 ulonglong local_testflag= param->testflag;
1508 bool optimize_done= !do_optimize, statistics_done= 0, full_repair_done= 0;
1509 const char *old_proc_info= thd->proc_info;
1510 char fixed_name[FN_REFLEN];
1511 MARIA_SHARE *share= file->s;
1512 ha_rows rows= file->state->records;
1513 TRN *old_trn= file->trn;
1514 my_bool locking= 0;
1515 DBUG_ENTER("ha_maria::repair");
1516
1517 /*
1518 Normally this method is entered with a properly opened table. If the
1519 repair fails, it can be repeated with more elaborate options. Under
1520 special circumstances it can happen that a repair fails so that it
1521 closed the data file and cannot re-open it. In this case file->dfile
1522 is set to -1. We must not try another repair without an open data
1523 file. (Bug #25289)
1524 */
1525 if (file->dfile.file == -1)
1526 {
1527 sql_print_information("Retrying repair of: '%s' failed. "
1528 "Please try REPAIR EXTENDED or aria_chk",
1529 table->s->path.str);
1530 DBUG_RETURN(HA_ADMIN_FAILED);
1531 }
1532
1533 /*
1534 If transactions was not enabled for a transactional table then
1535 file->s->status is not up to date. This is needed for repair_by_sort
1536 to work
1537 */
1538 if (share->base.born_transactional && !share->now_transactional)
1539 _ma_copy_nontrans_state_information(file);
1540
1541 param->db_name= table->s->db.str;
1542 param->table_name= table->alias.c_ptr();
1543 param->tmpfile_createflag= O_RDWR | O_TRUNC;
1544 param->using_global_keycache= 1;
1545 param->thd= thd;
1546 param->tmpdir= &mysql_tmpdir_list;
1547 param->out_flag= 0;
1548 share->state.dupp_key= MI_MAX_KEY;
1549 strmov(fixed_name, share->open_file_name.str);
1550 unmap_file(file);
1551
1552 /*
1553 Don't lock tables if we have used LOCK TABLE or if we come from
1554 enable_index()
1555 */
1556 if (!thd->locked_tables_mode && ! (param->testflag & T_NO_LOCKS))
1557 {
1558 locking= 1;
1559 if (maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK))
1560 {
1561 _ma_check_print_error(param, ER_THD(thd, ER_CANT_LOCK), my_errno);
1562 DBUG_RETURN(HA_ADMIN_FAILED);
1563 }
1564 }
1565
1566 if (!do_optimize ||
1567 (((share->data_file_type == BLOCK_RECORD) ?
1568 (share->state.changed & STATE_NOT_OPTIMIZED_ROWS) :
1569 (file->state->del ||
1570 share->state.split != file->state->records)) &&
1571 (!(param->testflag & T_QUICK) ||
1572 (share->state.changed & (STATE_NOT_OPTIMIZED_KEYS |
1573 STATE_NOT_OPTIMIZED_ROWS)))))
1574 {
1575 ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ?
1576 maria_get_mask_all_keys_active(share->base.keys) :
1577 share->state.key_map);
1578 ulonglong save_testflag= param->testflag;
1579 if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) &&
1580 (local_testflag & T_REP_BY_SORT))
1581 {
1582 local_testflag |= T_STATISTICS;
1583 param->testflag |= T_STATISTICS; // We get this for free
1584 statistics_done= 1;
1585 /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */
1586 if (THDVAR(thd,repair_threads) > 1 &&
1587 share->data_file_type != BLOCK_RECORD)
1588 {
1589 char buf[40];
1590 /* TODO: respect maria_repair_threads variable */
1591 my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map));
1592 thd_proc_info(thd, buf);
1593 param->testflag|= T_REP_PARALLEL;
1594 error= maria_repair_parallel(param, file, fixed_name,
1595 MY_TEST(param->testflag & T_QUICK));
1596 /* to reset proc_info, as it was pointing to local buffer */
1597 thd_proc_info(thd, "Repair done");
1598 }
1599 else
1600 {
1601 thd_proc_info(thd, "Repair by sorting");
1602 param->testflag|= T_REP_BY_SORT;
1603 error= maria_repair_by_sort(param, file, fixed_name,
1604 MY_TEST(param->testflag & T_QUICK));
1605 }
1606 if (error && file->create_unique_index_by_sort &&
1607 share->state.dupp_key != MAX_KEY)
1608 {
1609 my_errno= HA_ERR_FOUND_DUPP_KEY;
1610 print_keydup_error(table, &table->key_info[share->state.dupp_key],
1611 MYF(0));
1612 }
1613 }
1614 else
1615 {
1616 thd_proc_info(thd, "Repair with keycache");
1617 param->testflag &= ~(T_REP_BY_SORT | T_REP_PARALLEL);
1618 error= maria_repair(param, file, fixed_name,
1619 MY_TEST(param->testflag & T_QUICK));
1620 }
1621 param->testflag= save_testflag | (param->testflag & T_RETRY_WITHOUT_QUICK);
1622 optimize_done= 1;
1623 /*
1624 set full_repair_done if we re-wrote all rows and all keys
1625 (and thus removed all transid's from the table
1626 */
1627 full_repair_done= !MY_TEST(param->testflag & T_QUICK);
1628 }
1629 if (!error)
1630 {
1631 if ((local_testflag & T_SORT_INDEX) &&
1632 (share->state.changed & STATE_NOT_SORTED_PAGES))
1633 {
1634 optimize_done= 1;
1635 thd_proc_info(thd, "Sorting index");
1636 error= maria_sort_index(param, file, fixed_name);
1637 }
1638 if (!error && !statistics_done && (local_testflag & T_STATISTICS))
1639 {
1640 if (share->state.changed & STATE_NOT_ANALYZED)
1641 {
1642 optimize_done= 1;
1643 thd_proc_info(thd, "Analyzing");
1644 error= maria_chk_key(param, file);
1645 }
1646 else
1647 local_testflag &= ~T_STATISTICS; // Don't update statistics
1648 }
1649 }
1650 thd_proc_info(thd, "Saving state");
1651 if (full_repair_done && !error &&
1652 !(param->testflag & T_NO_CREATE_RENAME_LSN))
1653 {
1654 /* Set trid (needed if the table was moved from another system) */
1655 share->state.create_trid= trnman_get_min_safe_trid();
1656 }
1657 mysql_mutex_lock(&share->intern_lock);
1658 if (!error)
1659 {
1660 if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file))
1661 {
1662 DBUG_PRINT("info", ("Reseting crashed state"));
1663 share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
1664 STATE_IN_REPAIR | STATE_MOVED);
1665 file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1666 }
1667 /*
1668 repair updates share->state.state. Ensure that file->state is up to date
1669 */
1670 if (file->state != &share->state.state)
1671 *file->state= share->state.state;
1672
1673 if (share->base.auto_key)
1674 _ma_update_auto_increment_key(param, file, 1);
1675 if (optimize_done)
1676 error= maria_update_state_info(param, file,
1677 UPDATE_TIME | UPDATE_OPEN_COUNT |
1678 (local_testflag &
1679 T_STATISTICS ? UPDATE_STAT : 0));
1680 /* File is repaired; Mark the file as moved to this system */
1681 (void) _ma_set_uuid(share, 0);
1682
1683 info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
1684 HA_STATUS_CONST);
1685 if (rows != file->state->records && !(param->testflag & T_VERY_SILENT))
1686 {
1687 char llbuff[22], llbuff2[22];
1688 _ma_check_print_warning(param, "Number of rows changed from %s to %s",
1689 llstr(rows, llbuff),
1690 llstr(file->state->records, llbuff2));
1691 }
1692 }
1693 else
1694 {
1695 maria_mark_crashed_on_repair(file);
1696 file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1697 maria_update_state_info(param, file, 0);
1698 }
1699 mysql_mutex_unlock(&share->intern_lock);
1700 thd_proc_info(thd, old_proc_info);
1701 thd_progress_end(thd); // Mark done
1702 if (locking)
1703 maria_lock_database(file, F_UNLCK);
1704
1705 /* Reset trn, that may have been set by repair */
1706 if (old_trn && old_trn != file->trn)
1707 _ma_set_trn_for_table(file, old_trn);
1708 error= error ? HA_ADMIN_FAILED :
1709 (optimize_done ?
1710 (write_log_record_for_repair(param, file) ? HA_ADMIN_FAILED :
1711 HA_ADMIN_OK) : HA_ADMIN_ALREADY_DONE);
1712 DBUG_RETURN(error);
1713 }
1714
1715
1716 /*
1717 Assign table indexes to a specific key cache.
1718 */
1719
assign_to_keycache(THD * thd,HA_CHECK_OPT * check_opt)1720 int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt)
1721 {
1722 #if 0 && NOT_IMPLEMENTED
1723 PAGECACHE *new_pagecache= check_opt->pagecache;
1724 const char *errmsg= 0;
1725 int error= HA_ADMIN_OK;
1726 ulonglong map;
1727 TABLE_LIST *table_list= table->pos_in_table_list;
1728 DBUG_ENTER("ha_maria::assign_to_keycache");
1729
1730 table->keys_in_use_for_query.clear_all();
1731
1732 if (table_list->process_index_hints(table))
1733 DBUG_RETURN(HA_ADMIN_FAILED);
1734 map= ~(ulonglong) 0;
1735 if (!table->keys_in_use_for_query.is_clear_all())
1736 /* use all keys if there's no list specified by the user through hints */
1737 map= table->keys_in_use_for_query.to_ulonglong();
1738
1739 if ((error= maria_assign_to_pagecache(file, map, new_pagecache)))
1740 {
1741 char buf[STRING_BUFFER_USUAL_SIZE];
1742 my_snprintf(buf, sizeof(buf),
1743 "Failed to flush to index file (errno: %d)", error);
1744 errmsg= buf;
1745 error= HA_ADMIN_CORRUPT;
1746 }
1747
1748 if (error != HA_ADMIN_OK)
1749 {
1750 /* Send error to user */
1751 HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1752 if (!param)
1753 return HA_ADMIN_INTERNAL_ERROR;
1754
1755 maria_chk_init(param);
1756 param->thd= thd;
1757 param->op_name= "assign_to_keycache";
1758 param->db_name= table->s->db.str;
1759 param->table_name= table->s->table_name.str;
1760 param->testflag= 0;
1761 _ma_check_print_error(param, errmsg);
1762 }
1763 DBUG_RETURN(error);
1764 #else
1765 return HA_ADMIN_NOT_IMPLEMENTED;
1766 #endif
1767 }
1768
1769
1770 /*
1771 Preload pages of the index file for a table into the key cache.
1772 */
1773
preload_keys(THD * thd,HA_CHECK_OPT * check_opt)1774 int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt)
1775 {
1776 ulonglong map;
1777 TABLE_LIST *table_list= table->pos_in_table_list;
1778
1779 DBUG_ENTER("ha_maria::preload_keys");
1780
1781 table->keys_in_use_for_query.clear_all();
1782
1783 if (table_list->process_index_hints(table))
1784 DBUG_RETURN(HA_ADMIN_FAILED);
1785
1786 map= ~(ulonglong) 0;
1787 /* Check validity of the index references */
1788 if (!table->keys_in_use_for_query.is_clear_all())
1789 /* use all keys if there's no list specified by the user through hints */
1790 map= table->keys_in_use_for_query.to_ulonglong();
1791
1792 maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE,
1793 (void*) &thd->variables.preload_buff_size);
1794
1795 int error;
1796
1797 if ((error= maria_preload(file, map, table_list->ignore_leaves)))
1798 {
1799 char buf[MYSQL_ERRMSG_SIZE+20];
1800 const char *errmsg;
1801
1802 switch (error) {
1803 case HA_ERR_NON_UNIQUE_BLOCK_SIZE:
1804 errmsg= "Indexes use different block sizes";
1805 break;
1806 case HA_ERR_OUT_OF_MEM:
1807 errmsg= "Failed to allocate buffer";
1808 break;
1809 default:
1810 my_snprintf(buf, sizeof(buf),
1811 "Failed to read from index file (errno: %d)", my_errno);
1812 errmsg= buf;
1813 }
1814
1815 HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1816 if (!param)
1817 return HA_ADMIN_INTERNAL_ERROR;
1818
1819 maria_chk_init(param);
1820 param->thd= thd;
1821 param->op_name= "preload_keys";
1822 param->db_name= table->s->db.str;
1823 param->table_name= table->s->table_name.str;
1824 param->testflag= 0;
1825 _ma_check_print_error(param, "%s", errmsg);
1826 DBUG_RETURN(HA_ADMIN_FAILED);
1827 }
1828 DBUG_RETURN(HA_ADMIN_OK);
1829 }
1830
1831
1832 /*
1833 Disable indexes, making it persistent if requested.
1834
1835 SYNOPSIS
1836 disable_indexes()
1837 mode mode of operation:
1838 HA_KEY_SWITCH_NONUNIQ disable all non-unique keys
1839 HA_KEY_SWITCH_ALL disable all keys
1840 HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent
1841 HA_KEY_SWITCH_ALL_SAVE dis. all keys and make persistent
1842
1843 IMPLEMENTATION
1844 HA_KEY_SWITCH_NONUNIQ is not implemented.
1845 HA_KEY_SWITCH_ALL_SAVE is not implemented.
1846
1847 RETURN
1848 0 ok
1849 HA_ERR_WRONG_COMMAND mode not implemented.
1850 */
1851
disable_indexes(uint mode)1852 int ha_maria::disable_indexes(uint mode)
1853 {
1854 int error;
1855
1856 if (mode == HA_KEY_SWITCH_ALL)
1857 {
1858 /* call a storage engine function to switch the key map */
1859 error= maria_disable_indexes(file);
1860 }
1861 else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
1862 {
1863 maria_extra(file, HA_EXTRA_NO_KEYS, 0);
1864 info(HA_STATUS_CONST); // Read new key info
1865 error= 0;
1866 }
1867 else
1868 {
1869 /* mode not implemented */
1870 error= HA_ERR_WRONG_COMMAND;
1871 }
1872 return error;
1873 }
1874
1875
1876 /*
1877 Enable indexes, making it persistent if requested.
1878
1879 SYNOPSIS
1880 enable_indexes()
1881 mode mode of operation:
1882 HA_KEY_SWITCH_NONUNIQ enable all non-unique keys
1883 HA_KEY_SWITCH_ALL enable all keys
1884 HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent
1885 HA_KEY_SWITCH_ALL_SAVE en. all keys and make persistent
1886
1887 DESCRIPTION
1888 Enable indexes, which might have been disabled by disable_index() before.
1889 The modes without _SAVE work only if both data and indexes are empty,
1890 since the MARIA repair would enable them persistently.
1891 To be sure in these cases, call handler::delete_all_rows() before.
1892
1893 IMPLEMENTATION
1894 HA_KEY_SWITCH_NONUNIQ is not implemented.
1895 HA_KEY_SWITCH_ALL_SAVE is not implemented.
1896
1897 RETURN
1898 0 ok
1899 !=0 Error, among others:
1900 HA_ERR_CRASHED data or index is non-empty. Delete all rows and retry.
1901 HA_ERR_WRONG_COMMAND mode not implemented.
1902 */
1903
enable_indexes(uint mode)1904 int ha_maria::enable_indexes(uint mode)
1905 {
1906 int error;
1907 ha_rows start_rows= file->state->records;
1908 DBUG_PRINT("info", ("ha_maria::enable_indexes mode: %d", mode));
1909 if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys))
1910 {
1911 /* All indexes are enabled already. */
1912 return 0;
1913 }
1914
1915 if (mode == HA_KEY_SWITCH_ALL)
1916 {
1917 error= maria_enable_indexes(file);
1918 /*
1919 Do not try to repair on error,
1920 as this could make the enabled state persistent,
1921 but mode==HA_KEY_SWITCH_ALL forbids it.
1922 */
1923 }
1924 else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
1925 {
1926 THD *thd= table->in_use;
1927 HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1928 if (!param)
1929 return HA_ADMIN_INTERNAL_ERROR;
1930
1931 const char *save_proc_info= thd_proc_info(thd, "Creating index");
1932
1933 maria_chk_init(param);
1934 param->op_name= "recreating_index";
1935 param->testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK |
1936 T_CREATE_MISSING_KEYS | T_SAFE_REPAIR);
1937 /*
1938 Don't lock and unlock table if it's locked.
1939 Normally table should be locked. This test is mostly for safety.
1940 */
1941 if (likely(file->lock_type != F_UNLCK))
1942 param->testflag|= T_NO_LOCKS;
1943
1944 if (file->create_unique_index_by_sort)
1945 param->testflag|= T_CREATE_UNIQUE_BY_SORT;
1946
1947 if (bulk_insert_single_undo == BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)
1948 {
1949 bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_REPAIR;
1950 /*
1951 Don't bump create_rename_lsn, because UNDO_BULK_INSERT
1952 should not be skipped in case of crash during repair.
1953 */
1954 param->testflag|= T_NO_CREATE_RENAME_LSN;
1955 }
1956
1957 param->myf_rw &= ~MY_WAIT_IF_FULL;
1958 param->sort_buffer_length= THDVAR(thd,sort_buffer_size);
1959 param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1960 param->tmpdir= &mysql_tmpdir_list;
1961 if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param->retry_repair)
1962 {
1963 sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, "
1964 "retrying",
1965 my_errno, param->db_name, param->table_name);
1966 /* This should never fail normally */
1967 DBUG_ASSERT(thd->killed != 0);
1968 /* Repairing by sort failed. Now try standard repair method. */
1969 param->testflag &= ~T_REP_BY_SORT;
1970 file->state->records= start_rows;
1971 error= (repair(thd, param, 0) != HA_ADMIN_OK);
1972 /*
1973 If the standard repair succeeded, clear all error messages which
1974 might have been set by the first repair. They can still be seen
1975 with SHOW WARNINGS then.
1976 */
1977 if (!error)
1978 thd->clear_error();
1979 }
1980 info(HA_STATUS_CONST);
1981 thd_proc_info(thd, save_proc_info);
1982 }
1983 else
1984 {
1985 /* mode not implemented */
1986 error= HA_ERR_WRONG_COMMAND;
1987 }
1988 DBUG_EXECUTE_IF("maria_flush_whole_log",
1989 {
1990 DBUG_PRINT("maria_flush_whole_log", ("now"));
1991 translog_flush(translog_get_horizon());
1992 });
1993 DBUG_EXECUTE_IF("maria_crash_enable_index",
1994 {
1995 DBUG_PRINT("maria_crash_enable_index", ("now"));
1996 DBUG_SUICIDE();
1997 });
1998 return error;
1999 }
2000
2001
2002 /*
2003 Test if indexes are disabled.
2004
2005
2006 SYNOPSIS
2007 indexes_are_disabled()
2008 no parameters
2009
2010
2011 RETURN
2012 0 indexes are not disabled
2013 1 all indexes are disabled
2014 [2 non-unique indexes are disabled - NOT YET IMPLEMENTED]
2015 */
2016
indexes_are_disabled(void)2017 int ha_maria::indexes_are_disabled(void)
2018 {
2019 return maria_indexes_are_disabled(file);
2020 }
2021
2022
2023 /*
2024 prepare for a many-rows insert operation
2025 e.g. - disable indexes (if they can be recreated fast) or
2026 activate special bulk-insert optimizations
2027
2028 SYNOPSIS
2029 start_bulk_insert(rows, flags)
2030 rows Rows to be inserted
2031 0 if we don't know
2032 flags Flags to control index creation
2033
2034 NOTICE
2035 Do not forget to call end_bulk_insert() later!
2036 */
2037
start_bulk_insert(ha_rows rows,uint flags)2038 void ha_maria::start_bulk_insert(ha_rows rows, uint flags)
2039 {
2040 DBUG_ENTER("ha_maria::start_bulk_insert");
2041 THD *thd= table->in_use;
2042 MARIA_SHARE *share= file->s;
2043 DBUG_PRINT("info", ("start_bulk_insert: rows %lu", (ulong) rows));
2044
2045 /* don't enable row cache if too few rows */
2046 if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
2047 {
2048 ulonglong size= thd->variables.read_buff_size, tmp;
2049 if (rows)
2050 {
2051 if (file->state->records)
2052 {
2053 MARIA_INFO maria_info;
2054 maria_status(file, &maria_info, HA_STATUS_NO_LOCK |HA_STATUS_VARIABLE);
2055 set_if_smaller(size, maria_info.mean_reclength * rows);
2056 }
2057 else if (table->s->avg_row_length)
2058 set_if_smaller(size, (size_t) (table->s->avg_row_length * rows));
2059 }
2060 tmp= (ulong) size; // Safe becasue of limits
2061 maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &tmp);
2062 }
2063
2064 can_enable_indexes= (maria_is_all_keys_active(share->state.key_map,
2065 share->base.keys));
2066 bulk_insert_single_undo= BULK_INSERT_NONE;
2067
2068 if (!(specialflag & SPECIAL_SAFE_MODE))
2069 {
2070 /*
2071 Only disable old index if the table was empty and we are inserting
2072 a lot of rows.
2073 We should not do this for only a few rows as this is slower and
2074 we don't want to update the key statistics based of only a few rows.
2075 Index file rebuild requires an exclusive lock, so if versioning is on
2076 don't do it (see how ha_maria::store_lock() tries to predict repair).
2077 We can repair index only if we have an exclusive (TL_WRITE) lock or
2078 if this is inside an ALTER TABLE, in which case lock_type == TL_UNLOCK.
2079
2080 To see if table is empty, we shouldn't rely on the old record
2081 count from our transaction's start (if that old count is 0 but
2082 now there are records in the table, we would wrongly destroy
2083 them). So we need to look at share->state.state.records. As a
2084 safety net for now, we don't remove the test of
2085 file->state->records, because there is uncertainty on what will
2086 happen during repair if the two states disagree.
2087
2088 We also have to check in case of transactional tables that the
2089 user has not used LOCK TABLE on the table twice.
2090 */
2091 if ((file->state->records == 0) &&
2092 (share->state.state.records == 0) && can_enable_indexes &&
2093 (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) &&
2094 (file->lock.type == TL_WRITE || file->lock.type == TL_UNLOCK) &&
2095 (!share->have_versioning || !share->now_transactional ||
2096 file->used_tables->use_count == 1))
2097 {
2098 /**
2099 @todo for a single-row INSERT SELECT, we will go into repair, which
2100 is more costly (flushes, syncs) than a row write.
2101 */
2102 if (file->open_flags & HA_OPEN_INTERNAL_TABLE)
2103 {
2104 /* Internal table; If we get a duplicate something is very wrong */
2105 file->update|= HA_STATE_CHANGED;
2106 maria_clear_all_keys_active(file->s->state.key_map);
2107 }
2108 else
2109 {
2110 my_bool all_keys= MY_TEST(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
2111 maria_disable_indexes_for_rebuild(file, rows, all_keys);
2112 }
2113 if (share->now_transactional)
2114 {
2115 bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
2116 write_log_record_for_bulk_insert(file);
2117 _ma_tmp_disable_logging_for_table(file, TRUE);
2118 /*
2119 Pages currently in the page cache have type PAGECACHE_LSN_PAGE, we
2120 are not allowed to overwrite them with PAGECACHE_PLAIN_PAGE, so
2121 throw them away. It is not losing data, because we just wrote and
2122 forced an UNDO which will for sure empty the table if we crash. The
2123 upcoming unique-key insertions however need a proper index, so we
2124 cannot leave the corrupted on-disk index file, thus we truncate it.
2125 */
2126 maria_delete_all_rows(file);
2127 }
2128 }
2129 else if (!file->bulk_insert &&
2130 (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT))
2131 {
2132 maria_init_bulk_insert(file,
2133 (size_t) thd->variables.bulk_insert_buff_size,
2134 rows);
2135 }
2136 }
2137 DBUG_VOID_RETURN;
2138 }
2139
2140
2141 /*
2142 end special bulk-insert optimizations,
2143 which have been activated by start_bulk_insert().
2144
2145 SYNOPSIS
2146 end_bulk_insert()
2147 no arguments
2148
2149 RETURN
2150 0 OK
2151 != 0 Error
2152 */
2153
end_bulk_insert()2154 int ha_maria::end_bulk_insert()
2155 {
2156 int first_error, error;
2157 my_bool abort= file->s->deleting;
2158 DBUG_ENTER("ha_maria::end_bulk_insert");
2159
2160 if ((first_error= maria_end_bulk_insert(file, abort)))
2161 abort= 1;
2162
2163 if ((error= maria_extra(file, HA_EXTRA_NO_CACHE, 0)))
2164 {
2165 first_error= first_error ? first_error : error;
2166 abort= 1;
2167 }
2168
2169 if (!abort && can_enable_indexes)
2170 if ((error= enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE)))
2171 first_error= first_error ? first_error : error;
2172
2173 if (bulk_insert_single_undo != BULK_INSERT_NONE)
2174 {
2175 DBUG_ASSERT(can_enable_indexes);
2176 /*
2177 Table was transactional just before start_bulk_insert().
2178 No need to flush pages if we did a repair (which already flushed).
2179 */
2180 if ((error= _ma_reenable_logging_for_table(file,
2181 bulk_insert_single_undo ==
2182 BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)))
2183 first_error= first_error ? first_error : error;
2184 bulk_insert_single_undo= BULK_INSERT_NONE; // Safety
2185 }
2186 DBUG_RETURN(first_error);
2187 }
2188
2189
check_and_repair(THD * thd)2190 bool ha_maria::check_and_repair(THD *thd)
2191 {
2192 int error, crashed;
2193 HA_CHECK_OPT check_opt;
2194 const CSET_STRING query_backup= thd->query_string;
2195 DBUG_ENTER("ha_maria::check_and_repair");
2196
2197 check_opt.init();
2198 check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
2199
2200 error= 1;
2201 if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
2202 STATE_MOVED)
2203 {
2204 /* Remove error about crashed table */
2205 thd->get_stmt_da()->clear_warning_info(thd->query_id);
2206 push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
2207 ER_CRASHED_ON_USAGE,
2208 "Zerofilling moved table %s", table->s->path.str);
2209 sql_print_information("Zerofilling moved table: '%s'",
2210 table->s->path.str);
2211 if (!(error= zerofill(thd, &check_opt)))
2212 DBUG_RETURN(0);
2213 }
2214
2215 /*
2216 if we got this far - the table is crashed.
2217 but don't auto-repair if maria_recover_options is not set
2218 */
2219 if (!maria_recover_options)
2220 DBUG_RETURN(error);
2221
2222 error= 0;
2223 // Don't use quick if deleted rows
2224 if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK))
2225 check_opt.flags |= T_QUICK;
2226
2227 thd->set_query((char*) table->s->table_name.str,
2228 (uint) table->s->table_name.length, system_charset_info);
2229
2230 if (!(crashed= maria_is_crashed(file)))
2231 {
2232 sql_print_warning("Checking table: '%s'", table->s->path.str);
2233 crashed= check(thd, &check_opt);
2234 }
2235
2236 if (crashed)
2237 {
2238 bool save_log_all_errors;
2239 sql_print_warning("Recovering table: '%s'", table->s->path.str);
2240 save_log_all_errors= thd->log_all_errors;
2241 thd->log_all_errors|= (thd->variables.log_warnings > 2);
2242 check_opt.flags=
2243 ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) |
2244 (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) |
2245 T_AUTO_REPAIR);
2246 if (repair(thd, &check_opt))
2247 error= 1;
2248 thd->log_all_errors= save_log_all_errors;
2249 }
2250 thd->set_query(query_backup);
2251 DBUG_RETURN(error);
2252 }
2253
2254
is_crashed() const2255 bool ha_maria::is_crashed() const
2256 {
2257 return (file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED) ||
2258 (my_disable_locking && file->s->state.open_count));
2259 }
2260
2261 #define CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING(msg) \
2262 do { \
2263 if (file->lock.type == TL_WRITE_CONCURRENT_INSERT && !table->s->sequence) \
2264 { \
2265 my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), msg); \
2266 return 1; \
2267 } \
2268 } while(0)
2269
update_row(const uchar * old_data,const uchar * new_data)2270 int ha_maria::update_row(const uchar * old_data, const uchar * new_data)
2271 {
2272 CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("UPDATE in WRITE CONCURRENT");
2273 return maria_update(file, old_data, new_data);
2274 }
2275
2276
delete_row(const uchar * buf)2277 int ha_maria::delete_row(const uchar * buf)
2278 {
2279 CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("DELETE in WRITE CONCURRENT");
2280 return maria_delete(file, buf);
2281 }
2282
index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)2283 int ha_maria::index_read_map(uchar * buf, const uchar * key,
2284 key_part_map keypart_map,
2285 enum ha_rkey_function find_flag)
2286 {
2287 DBUG_ASSERT(inited == INDEX);
2288 int error= maria_rkey(file, buf, active_index, key, keypart_map, find_flag);
2289 return error;
2290 }
2291
2292
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)2293 int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
2294 key_part_map keypart_map,
2295 enum ha_rkey_function find_flag)
2296 {
2297 int error;
2298 /* Use the pushed index condition if it matches the index we're scanning */
2299 end_range= NULL;
2300 if (index == pushed_idx_cond_keyno)
2301 ma_set_index_cond_func(file, handler_index_cond_check, this);
2302
2303 error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
2304
2305 ma_set_index_cond_func(file, NULL, 0);
2306 return error;
2307 }
2308
2309
index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)2310 int ha_maria::index_read_last_map(uchar * buf, const uchar * key,
2311 key_part_map keypart_map)
2312 {
2313 DBUG_ENTER("ha_maria::index_read_last_map");
2314 DBUG_ASSERT(inited == INDEX);
2315 int error= maria_rkey(file, buf, active_index, key, keypart_map,
2316 HA_READ_PREFIX_LAST);
2317 DBUG_RETURN(error);
2318 }
2319
2320
index_next(uchar * buf)2321 int ha_maria::index_next(uchar * buf)
2322 {
2323 DBUG_ASSERT(inited == INDEX);
2324 int error= maria_rnext(file, buf, active_index);
2325 return error;
2326 }
2327
2328
index_prev(uchar * buf)2329 int ha_maria::index_prev(uchar * buf)
2330 {
2331 DBUG_ASSERT(inited == INDEX);
2332 int error= maria_rprev(file, buf, active_index);
2333 return error;
2334 }
2335
2336
index_first(uchar * buf)2337 int ha_maria::index_first(uchar * buf)
2338 {
2339 DBUG_ASSERT(inited == INDEX);
2340 int error= maria_rfirst(file, buf, active_index);
2341 return error;
2342 }
2343
2344
index_last(uchar * buf)2345 int ha_maria::index_last(uchar * buf)
2346 {
2347 DBUG_ASSERT(inited == INDEX);
2348 int error= maria_rlast(file, buf, active_index);
2349 return error;
2350 }
2351
2352
index_next_same(uchar * buf,const uchar * key,uint length)2353 int ha_maria::index_next_same(uchar * buf,
2354 const uchar *key __attribute__ ((unused)),
2355 uint length __attribute__ ((unused)))
2356 {
2357 int error;
2358 DBUG_ASSERT(inited == INDEX);
2359 /*
2360 TODO: Delete this loop in Maria 1.5 as versioning will ensure this never
2361 happens
2362 */
2363 do
2364 {
2365 error= maria_rnext_same(file,buf);
2366 } while (error == HA_ERR_RECORD_DELETED);
2367 return error;
2368 }
2369
2370
index_init(uint idx,bool sorted)2371 int ha_maria::index_init(uint idx, bool sorted)
2372 {
2373 active_index=idx;
2374 if (pushed_idx_cond_keyno == idx)
2375 ma_set_index_cond_func(file, handler_index_cond_check, this);
2376 return 0;
2377 }
2378
2379
index_end()2380 int ha_maria::index_end()
2381 {
2382 active_index=MAX_KEY;
2383 ma_set_index_cond_func(file, NULL, 0);
2384 in_range_check_pushed_down= FALSE;
2385 ds_mrr.dsmrr_close();
2386 return 0;
2387 }
2388
2389
rnd_init(bool scan)2390 int ha_maria::rnd_init(bool scan)
2391 {
2392 if (scan)
2393 return maria_scan_init(file);
2394 return maria_reset(file); // Free buffers
2395 }
2396
2397
rnd_end()2398 int ha_maria::rnd_end()
2399 {
2400 ds_mrr.dsmrr_close();
2401 /* Safe to call even if we don't have started a scan */
2402 maria_scan_end(file);
2403 return 0;
2404 }
2405
2406
rnd_next(uchar * buf)2407 int ha_maria::rnd_next(uchar *buf)
2408 {
2409 int error= maria_scan(file, buf);
2410 return error;
2411 }
2412
2413
remember_rnd_pos()2414 int ha_maria::remember_rnd_pos()
2415 {
2416 return (*file->s->scan_remember_pos)(file, &remember_pos);
2417 }
2418
2419
restart_rnd_next(uchar * buf)2420 int ha_maria::restart_rnd_next(uchar *buf)
2421 {
2422 int error;
2423 if ((error= (*file->s->scan_restore_pos)(file, remember_pos)))
2424 return error;
2425 return rnd_next(buf);
2426 }
2427
2428
rnd_pos(uchar * buf,uchar * pos)2429 int ha_maria::rnd_pos(uchar *buf, uchar *pos)
2430 {
2431 int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length));
2432 return error;
2433 }
2434
2435
position(const uchar * record)2436 void ha_maria::position(const uchar *record)
2437 {
2438 my_off_t row_position= maria_position(file);
2439 my_store_ptr(ref, ref_length, row_position);
2440 }
2441
2442
info(uint flag)2443 int ha_maria::info(uint flag)
2444 {
2445 MARIA_INFO maria_info;
2446 char name_buff[FN_REFLEN];
2447
2448 (void) maria_status(file, &maria_info, flag);
2449 if (flag & HA_STATUS_VARIABLE)
2450 {
2451 stats.records= maria_info.records;
2452 stats.deleted= maria_info.deleted;
2453 stats.data_file_length= maria_info.data_file_length;
2454 stats.index_file_length= maria_info.index_file_length;
2455 stats.delete_length= maria_info.delete_length;
2456 stats.check_time= maria_info.check_time;
2457 stats.mean_rec_length= maria_info.mean_reclength;
2458 stats.checksum= file->state->checksum;
2459 }
2460 if (flag & HA_STATUS_CONST)
2461 {
2462 TABLE_SHARE *share= table->s;
2463 stats.max_data_file_length= maria_info.max_data_file_length;
2464 stats.max_index_file_length= maria_info.max_index_file_length;
2465 stats.create_time= maria_info.create_time;
2466 ref_length= maria_info.reflength;
2467 share->db_options_in_use= maria_info.options;
2468 stats.block_size= maria_block_size;
2469 stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = MY_MAX(sizeof(void *))
2470
2471 /* Update share */
2472 share->keys_in_use.set_prefix(share->keys);
2473 share->keys_in_use.intersect_extended(maria_info.key_map);
2474 share->keys_for_keyread.intersect(share->keys_in_use);
2475 share->db_record_offset= maria_info.record_offset;
2476 if (share->key_parts)
2477 {
2478 ulong *to= table->key_info[0].rec_per_key, *end;
2479 double *from= maria_info.rec_per_key;
2480 for (end= to+ share->key_parts ; to < end ; to++, from++)
2481 *to= (ulong) (*from + 0.5);
2482 }
2483
2484 /*
2485 Set data_file_name and index_file_name to point at the symlink value
2486 if table is symlinked (Ie; Real name is not same as generated name)
2487 */
2488 data_file_name= index_file_name= 0;
2489 fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_DEXT,
2490 MY_APPEND_EXT | MY_UNPACK_FILENAME);
2491 if (strcmp(name_buff, maria_info.data_file_name))
2492 data_file_name =maria_info.data_file_name;
2493 fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_IEXT,
2494 MY_APPEND_EXT | MY_UNPACK_FILENAME);
2495 if (strcmp(name_buff, maria_info.index_file_name))
2496 index_file_name=maria_info.index_file_name;
2497 }
2498 if (flag & HA_STATUS_ERRKEY)
2499 {
2500 errkey= maria_info.errkey;
2501 my_store_ptr(dup_ref, ref_length, maria_info.dup_key_pos);
2502 }
2503 if (flag & HA_STATUS_TIME)
2504 stats.update_time= maria_info.update_time;
2505 if (flag & HA_STATUS_AUTO)
2506 stats.auto_increment_value= maria_info.auto_increment;
2507
2508 return 0;
2509 }
2510
2511
extra(enum ha_extra_function operation)2512 int ha_maria::extra(enum ha_extra_function operation)
2513 {
2514 int tmp;
2515 TRN *old_trn= file->trn;
2516 if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
2517 return 0;
2518 #ifdef NOT_USED
2519 if (operation == HA_EXTRA_MMAP && !opt_maria_use_mmap)
2520 return 0;
2521 #endif
2522
2523 /*
2524 We have to set file->trn here because in some cases we call
2525 extern_lock(F_UNLOCK) (which resets file->trn) followed by maria_close()
2526 without calling commit/rollback in between. If file->trn is not set
2527 we can't remove file->share from the transaction list in the extra() call.
2528
2529 In current code we don't have to do this for HA_EXTRA_PREPARE_FOR_RENAME
2530 as this is only used the intermediate table used by ALTER TABLE which
2531 is not part of the transaction (it's not in the TRN list). Better to
2532 keep this for now, to not break anything in a stable release.
2533 When HA_EXTRA_PREPARE_FOR_RENAME is not handled below, we can change
2534 the warnings in _ma_remove_table_from_trnman() to asserts.
2535
2536 table->in_use is not set in the case this is a done as part of closefrm()
2537 as part of drop table.
2538 */
2539
2540 if (file->s->now_transactional && table->in_use &&
2541 (operation == HA_EXTRA_PREPARE_FOR_DROP ||
2542 operation == HA_EXTRA_PREPARE_FOR_RENAME ||
2543 operation == HA_EXTRA_PREPARE_FOR_FORCED_CLOSE))
2544 {
2545 THD *thd= table->in_use;
2546 TRN *trn= THD_TRN;
2547 _ma_set_tmp_trn_for_table(file, trn);
2548 }
2549 DBUG_ASSERT(file->s->base.born_transactional || file->trn == 0 ||
2550 file->trn == &dummy_transaction_object);
2551
2552 tmp= maria_extra(file, operation, 0);
2553 file->trn= old_trn; // Reset trn if was used
2554 return tmp;
2555 }
2556
reset(void)2557 int ha_maria::reset(void)
2558 {
2559 ma_set_index_cond_func(file, NULL, 0);
2560 ds_mrr.dsmrr_close();
2561 if (file->trn)
2562 {
2563 /* Next statement is a new statement. Ensure it's logged */
2564 trnman_set_flags(file->trn,
2565 trnman_get_flags(file->trn) & ~TRN_STATE_INFO_LOGGED);
2566 }
2567 return maria_reset(file);
2568 }
2569
2570 /* To be used with WRITE_CACHE and EXTRA_CACHE */
2571
extra_opt(enum ha_extra_function operation,ulong cache_size)2572 int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size)
2573 {
2574 if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
2575 return 0;
2576 return maria_extra(file, operation, (void*) &cache_size);
2577 }
2578
2579
delete_all_rows()2580 int ha_maria::delete_all_rows()
2581 {
2582 THD *thd= table->in_use;
2583 TRN *trn= file->trn;
2584 CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("TRUNCATE in WRITE CONCURRENT");
2585 #ifdef EXTRA_DEBUG
2586 if (trn && ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
2587 {
2588 trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED |
2589 TRN_STATE_TABLES_CAN_CHANGE);
2590 (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
2591 (uchar*) thd->query(), thd->query_length());
2592 }
2593 #endif
2594 /*
2595 If we are under LOCK TABLES, we have to do a commit as
2596 delete_all_rows() can't be rolled back
2597 */
2598 if (table->in_use->locked_tables_mode && trn &&
2599 trnman_has_locked_tables(trn))
2600 {
2601 int error;
2602 if ((error= implicit_commit(thd, 1)))
2603 return error;
2604 }
2605
2606 /* Note that this can't be rolled back */
2607 return maria_delete_all_rows(file);
2608 }
2609
2610
delete_table(const char * name)2611 int ha_maria::delete_table(const char *name)
2612 {
2613 THD *thd= current_thd;
2614 (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
2615 (uchar*) thd->query(), thd->query_length());
2616 return maria_delete_table(name);
2617 }
2618
2619
2620 /* This is mainly for temporary tables, so no logging necessary */
2621
drop_table(const char * name)2622 void ha_maria::drop_table(const char *name)
2623 {
2624 DBUG_ASSERT(file->s->temporary);
2625 (void) ha_close();
2626 (void) maria_delete_table_files(name, 1, 0);
2627 }
2628
2629
change_table_ptr(TABLE * table_arg,TABLE_SHARE * share)2630 void ha_maria::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
2631 {
2632 handler::change_table_ptr(table_arg, share);
2633 if (file)
2634 file->external_ref= table_arg;
2635 }
2636
2637
external_lock(THD * thd,int lock_type)2638 int ha_maria::external_lock(THD *thd, int lock_type)
2639 {
2640 DBUG_ENTER("ha_maria::external_lock");
2641 file->external_ref= (void*) table; // For ma_killed()
2642 /*
2643 We don't test now_transactional because it may vary between lock/unlock
2644 and thus confuse our reference counting.
2645 It is critical to skip non-transactional tables: user-visible temporary
2646 tables get an external_lock() when read/written for the first time, but no
2647 corresponding unlock (they just stay locked and are later dropped while
2648 locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp"
2649 would never commit as its "locked_tables" count would stay 1.
2650 When Maria has has_transactions()==TRUE, open_temporary_table()
2651 (sql_base.cc) will use TRANSACTIONAL_TMP_TABLE and thus the
2652 external_lock(F_UNLCK) will happen and we can then allow the user to
2653 create transactional temporary tables.
2654 */
2655 if (file->s->base.born_transactional)
2656 {
2657 /* Transactional table */
2658 if (lock_type != F_UNLCK)
2659 {
2660 if (file->trn)
2661 {
2662 /* This can only happen with tables created with clone() */
2663 DBUG_PRINT("info",("file->trn: %p", file->trn));
2664 trnman_increment_locked_tables(file->trn);
2665 }
2666
2667 if (!thd->transaction.on)
2668 {
2669 /*
2670 No need to log REDOs/UNDOs. If this is an internal temporary table
2671 which will be renamed to a permanent table (like in ALTER TABLE),
2672 the rename happens after unlocking so will be durable (and the table
2673 will get its create_rename_lsn).
2674 Note: if we wanted to enable users to have an old backup and apply
2675 tons of archived logs to roll-forward, we could then not disable
2676 REDOs/UNDOs in this case.
2677 */
2678 DBUG_PRINT("info", ("Disabling logging for table"));
2679 _ma_tmp_disable_logging_for_table(file, TRUE);
2680 }
2681 }
2682 else
2683 {
2684 /* We have to test for THD_TRN to protect against implicit commits */
2685 TRN *trn= (file->trn != &dummy_transaction_object && THD_TRN ? file->trn : 0);
2686 /* End of transaction */
2687
2688 /*
2689 We always re-enable, don't rely on thd->transaction.on as it is
2690 sometimes reset to true after unlocking (see mysql_truncate() for a
2691 partitioned table based on Maria).
2692 Note that we can come here without having an exclusive lock on the
2693 table, for example in this case:
2694 external_lock(F_(WR|RD)LCK); thr_lock() which fails due to lock
2695 abortion; external_lock(F_UNLCK). Fortunately, the re-enabling happens
2696 only if we were the thread which disabled logging.
2697 */
2698 if (_ma_reenable_logging_for_table(file, TRUE))
2699 DBUG_RETURN(1);
2700 _ma_reset_trn_for_table(file);
2701 /*
2702 Ensure that file->state points to the current number of rows. This
2703 is needed if someone calls maria_info() without first doing an
2704 external lock of the table
2705 */
2706 file->state= &file->s->state.state;
2707 if (trn)
2708 {
2709 DBUG_PRINT("info",
2710 ("locked_tables: %u", trnman_has_locked_tables(trn)));
2711 DBUG_ASSERT(trnman_has_locked_tables(trn) > 0);
2712 if (trnman_has_locked_tables(trn) &&
2713 !trnman_decrement_locked_tables(trn))
2714 {
2715 /*
2716 OK should not have been sent to client yet (ACID).
2717 This is a bit excessive, ACID requires this only if there are some
2718 changes to commit (rollback shouldn't be tested).
2719 */
2720 DBUG_ASSERT(!thd->get_stmt_da()->is_sent() ||
2721 thd->killed);
2722 /* autocommit ? rollback a transaction */
2723 #ifdef MARIA_CANNOT_ROLLBACK
2724 if (ma_commit(trn))
2725 DBUG_RETURN(1);
2726 THD_TRN= 0;
2727 #else
2728 if (!(thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
2729 {
2730 trnman_rollback_trn(trn);
2731 DBUG_PRINT("info", ("THD_TRN set to 0x0"));
2732 THD_TRN= 0;
2733 }
2734 #endif
2735 }
2736 trnman_set_flags(trn, trnman_get_flags(trn) & ~ TRN_STATE_INFO_LOGGED);
2737 }
2738 }
2739 } /* if transactional table */
2740 int result = maria_lock_database(file, !table->s->tmp_table ?
2741 lock_type : ((lock_type == F_UNLCK) ?
2742 F_UNLCK : F_EXTRA_LCK));
2743 if (!file->s->base.born_transactional)
2744 file->state= &file->s->state.state; // Restore state if clone
2745 DBUG_RETURN(result);
2746 }
2747
start_stmt(THD * thd,thr_lock_type lock_type)2748 int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type)
2749 {
2750 TRN *trn;
2751 if (file->s->base.born_transactional)
2752 {
2753 trn= THD_TRN;
2754 DBUG_ASSERT(trn); // this may be called only after external_lock()
2755 DBUG_ASSERT(trnman_has_locked_tables(trn));
2756 DBUG_ASSERT(lock_type != TL_UNLOCK);
2757 DBUG_ASSERT(file->trn == trn);
2758
2759 /*
2760 As external_lock() was already called, don't increment locked_tables.
2761 Note that we call the function below possibly several times when
2762 statement starts (once per table). This is ok as long as that function
2763 does cheap operations. Otherwise, we will need to do it only on first
2764 call to start_stmt().
2765 */
2766 trnman_new_statement(trn);
2767
2768 #ifdef EXTRA_DEBUG
2769 if (!(trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED) &&
2770 trnman_get_flags(trn) & TRN_STATE_TABLES_CAN_CHANGE)
2771 {
2772 trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED);
2773 (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
2774 (uchar*) thd->query(),
2775 thd->query_length());
2776 }
2777 #endif
2778 }
2779 return 0;
2780 }
2781
2782
2783 /*
2784 Reset THD_TRN and all file->trn related to the transaction
2785 This is needed as some calls, like extra() or external_lock() may access
2786 it before next transaction is started
2787 */
2788
reset_thd_trn(THD * thd,MARIA_HA * first_table)2789 static void reset_thd_trn(THD *thd, MARIA_HA *first_table)
2790 {
2791 DBUG_ENTER("reset_thd_trn");
2792 THD_TRN= NULL;
2793 for (MARIA_HA *table= first_table; table ;
2794 table= table->trn_next)
2795 {
2796 _ma_reset_trn_for_table(table);
2797
2798 /*
2799 If table has changed by this statement, invalidate it from the query
2800 cache
2801 */
2802 if (table->row_changes != table->start_row_changes)
2803 {
2804 table->start_row_changes= table->row_changes;
2805 DBUG_ASSERT(table->s->chst_invalidator != NULL);
2806 (*table->s->chst_invalidator)(table->s->data_file_name.str);
2807 }
2808 }
2809 DBUG_VOID_RETURN;
2810 }
2811
2812
2813 /**
2814 Performs an implicit commit of the Maria transaction and creates a new
2815 one.
2816
2817 This can be considered a hack. When Maria loses HA_NO_TRANSACTIONS it will
2818 be participant in the connection's transaction and so the implicit commits
2819 (ha_commit()) (like in end_active_trans()) will do the implicit commit
2820 without need to call this function which can then be removed.
2821
2822 @param thd THD object
2823 @param new_trn if a new transaction should be created; a new
2824 transaction is not needed when we know that the
2825 tables will be unlocked very soon.
2826 */
2827
implicit_commit(THD * thd,bool new_trn)2828 int ha_maria::implicit_commit(THD *thd, bool new_trn)
2829 {
2830 #ifndef MARIA_CANNOT_ROLLBACK
2831 #error this method should be removed
2832 #endif
2833 TRN *trn;
2834 int error;
2835 uint locked_tables;
2836 extern my_bool plugins_are_initialized;
2837 MARIA_HA *used_tables, *trn_next;
2838 DBUG_ENTER("ha_maria::implicit_commit");
2839
2840 if (!maria_hton || !plugins_are_initialized || !(trn= THD_TRN))
2841 DBUG_RETURN(0);
2842 if (!new_trn && (thd->locked_tables_mode == LTM_LOCK_TABLES ||
2843 thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES))
2844 {
2845 /*
2846 No commit inside LOCK TABLES.
2847
2848 Note that we come here only at the end of the top statement
2849 (dispatch_command()), we are never committing inside a sub-statement./
2850 */
2851 DBUG_PRINT("info", ("locked_tables, skipping"));
2852 DBUG_RETURN(0);
2853 }
2854
2855 locked_tables= trnman_has_locked_tables(trn);
2856
2857 used_tables= (MARIA_HA*) trn->used_instances;
2858 error= 0;
2859 if (unlikely(ma_commit(trn)))
2860 error= 1;
2861 if (!new_trn)
2862 {
2863 reset_thd_trn(thd, used_tables);
2864 goto end;
2865 }
2866
2867 /*
2868 We need to create a new transaction and put it in THD_TRN. Indeed,
2869 tables may be under LOCK TABLES, and so they will start the next
2870 statement assuming they have a trn (see ha_maria::start_stmt()).
2871 */
2872 trn= trnman_new_trn(& thd->transaction.wt);
2873 THD_TRN= trn;
2874 if (unlikely(trn == NULL))
2875 {
2876 reset_thd_trn(thd, used_tables);
2877 error= HA_ERR_OUT_OF_MEM;
2878 goto end;
2879 }
2880 /*
2881 Move all locked tables to the new transaction
2882 We must do it here as otherwise file->thd and file->state may be
2883 stale pointers. We can't do this in start_stmt() as we don't know
2884 when we should call _ma_setup_live_state() and in some cases, like
2885 in check table, we use the table without calling start_stmt().
2886 */
2887
2888 for (MARIA_HA *handler= used_tables; handler ;
2889 handler= trn_next)
2890 {
2891 trn_next= handler->trn_next;
2892 DBUG_ASSERT(handler->s->base.born_transactional);
2893
2894 /* If handler uses versioning */
2895 if (handler->s->lock_key_trees)
2896 {
2897 /* _ma_set_trn_for_table() will be called indirectly */
2898 if (_ma_setup_live_state(handler))
2899 error= HA_ERR_OUT_OF_MEM;
2900 }
2901 else
2902 _ma_set_trn_for_table(handler, trn);
2903 }
2904 /* This is just a commit, tables stay locked if they were: */
2905 trnman_reset_locked_tables(trn, locked_tables);
2906
2907 end:
2908 DBUG_RETURN(error);
2909 }
2910
2911
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)2912 THR_LOCK_DATA **ha_maria::store_lock(THD *thd,
2913 THR_LOCK_DATA **to,
2914 enum thr_lock_type lock_type)
2915 {
2916 /* Test if we can fix test below */
2917 DBUG_ASSERT(lock_type != TL_UNLOCK &&
2918 (lock_type == TL_IGNORE || file->lock.type == TL_UNLOCK));
2919 if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK)
2920 {
2921 const enum enum_sql_command sql_command= thd->lex->sql_command;
2922 /*
2923 We have to disable concurrent inserts for INSERT ... SELECT or
2924 INSERT/UPDATE/DELETE with sub queries if we are using statement based
2925 logging. We take the safe route here and disable this for all commands
2926 that only does reading that are not SELECT.
2927 */
2928 if (lock_type <= TL_READ_HIGH_PRIORITY &&
2929 !thd->is_current_stmt_binlog_format_row() &&
2930 (sql_command != SQLCOM_SELECT &&
2931 sql_command != SQLCOM_LOCK_TABLES) &&
2932 (thd->variables.option_bits & OPTION_BIN_LOG) &&
2933 mysql_bin_log.is_open())
2934 lock_type= TL_READ_NO_INSERT;
2935 else if (lock_type == TL_WRITE_CONCURRENT_INSERT)
2936 {
2937 const enum enum_duplicates duplicates= thd->lex->duplicates;
2938 /*
2939 Explanation for the 3 conditions below, in order:
2940
2941 - Bulk insert may use repair, which will cause problems if other
2942 threads try to read/insert to the table: disable versioning.
2943 Note that our read of file->state->records is incorrect, as such
2944 variable may have changed when we come to start_bulk_insert() (worse
2945 case: we see != 0 so allow versioning, start_bulk_insert() sees 0 and
2946 uses repair). This is prevented because start_bulk_insert() will not
2947 try repair if we enabled versioning.
2948 - INSERT SELECT ON DUPLICATE KEY UPDATE comes here with
2949 TL_WRITE_CONCURRENT_INSERT but shouldn't because it can do
2950 update/delete of a row and versioning doesn't support that
2951 - same for LOAD DATA CONCURRENT REPLACE.
2952 */
2953 if ((file->state->records == 0) ||
2954 (sql_command == SQLCOM_INSERT_SELECT && duplicates == DUP_UPDATE) ||
2955 (sql_command == SQLCOM_LOAD && duplicates == DUP_REPLACE))
2956 lock_type= TL_WRITE;
2957 }
2958 file->lock.type= lock_type;
2959 }
2960 *to++= &file->lock;
2961 return to;
2962 }
2963
2964
update_create_info(HA_CREATE_INFO * create_info)2965 void ha_maria::update_create_info(HA_CREATE_INFO *create_info)
2966 {
2967 ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST);
2968 if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
2969 {
2970 create_info->auto_increment_value= stats.auto_increment_value;
2971 }
2972 create_info->data_file_name= data_file_name;
2973 create_info->index_file_name= index_file_name;
2974 /*
2975 Keep user-specified row_type for ALTER,
2976 but show the actually used one in SHOW
2977 */
2978 if (create_info->row_type != ROW_TYPE_DEFAULT &&
2979 !(thd_sql_command(ha_thd()) == SQLCOM_ALTER_TABLE))
2980 create_info->row_type= get_row_type();
2981 /*
2982 Show always page checksums, as this can be forced with
2983 maria_page_checksums variable
2984 */
2985 if (create_info->page_checksum == HA_CHOICE_UNDEF)
2986 create_info->page_checksum=
2987 (file->s->options & HA_OPTION_PAGE_CHECKSUM) ? HA_CHOICE_YES :
2988 HA_CHOICE_NO;
2989 }
2990
2991
get_row_type() const2992 enum row_type ha_maria::get_row_type() const
2993 {
2994 switch (file->s->data_file_type) {
2995 case STATIC_RECORD: return ROW_TYPE_FIXED;
2996 case DYNAMIC_RECORD: return ROW_TYPE_DYNAMIC;
2997 case BLOCK_RECORD: return ROW_TYPE_PAGE;
2998 case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED;
2999 default: return ROW_TYPE_NOT_USED;
3000 }
3001 }
3002
3003
maria_row_type(HA_CREATE_INFO * info)3004 static enum data_file_type maria_row_type(HA_CREATE_INFO *info)
3005 {
3006 if (info->transactional == HA_CHOICE_YES)
3007 return BLOCK_RECORD;
3008 switch (info->row_type) {
3009 case ROW_TYPE_FIXED: return STATIC_RECORD;
3010 case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD;
3011 default: return BLOCK_RECORD;
3012 }
3013 }
3014
3015
create(const char * name,TABLE * table_arg,HA_CREATE_INFO * ha_create_info)3016 int ha_maria::create(const char *name, TABLE *table_arg,
3017 HA_CREATE_INFO *ha_create_info)
3018 {
3019 int error;
3020 uint create_flags= 0, record_count= 0, i;
3021 char buff[FN_REFLEN];
3022 MARIA_KEYDEF *keydef;
3023 MARIA_COLUMNDEF *recinfo;
3024 MARIA_CREATE_INFO create_info;
3025 TABLE_SHARE *share= table_arg->s;
3026 uint options= share->db_options_in_use;
3027 enum data_file_type row_type;
3028 THD *thd= current_thd;
3029 DBUG_ENTER("ha_maria::create");
3030
3031 for (i= 0; i < share->keys; i++)
3032 {
3033 if (table_arg->key_info[i].flags & HA_USES_PARSER)
3034 {
3035 create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
3036 break;
3037 }
3038 }
3039 /* Note: BLOCK_RECORD is used if table is transactional */
3040 row_type= maria_row_type(ha_create_info);
3041 if (ha_create_info->transactional == HA_CHOICE_YES &&
3042 ha_create_info->row_type != ROW_TYPE_PAGE &&
3043 ha_create_info->row_type != ROW_TYPE_NOT_USED &&
3044 ha_create_info->row_type != ROW_TYPE_DEFAULT)
3045 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
3046 ER_ILLEGAL_HA_CREATE_OPTION,
3047 "Row format set to PAGE because of TRANSACTIONAL=1 option");
3048
3049 if (share->table_type == TABLE_TYPE_SEQUENCE)
3050 {
3051 /* For sequences, the simples record type is appropriate */
3052 row_type= STATIC_RECORD;
3053 ha_create_info->transactional= HA_CHOICE_NO;
3054 }
3055
3056 bzero((char*) &create_info, sizeof(create_info));
3057 if ((error= table2maria(table_arg, row_type, &keydef, &recinfo,
3058 &record_count, &create_info)))
3059 DBUG_RETURN(error); /* purecov: inspected */
3060 create_info.max_rows= share->max_rows;
3061 create_info.reloc_rows= share->min_rows;
3062 create_info.with_auto_increment= share->next_number_key_offset == 0;
3063 create_info.auto_increment= (ha_create_info->auto_increment_value ?
3064 ha_create_info->auto_increment_value -1 :
3065 (ulonglong) 0);
3066 create_info.data_file_length= ((ulonglong) share->max_rows *
3067 share->avg_row_length);
3068 create_info.data_file_name= ha_create_info->data_file_name;
3069 create_info.index_file_name= ha_create_info->index_file_name;
3070 create_info.language= share->table_charset->number;
3071
3072 /*
3073 Table is transactional:
3074 - If the user specify that table is transactional (in this case
3075 row type is forced to BLOCK_RECORD)
3076 - If they specify BLOCK_RECORD without specifying transactional behaviour
3077
3078 Shouldn't this test be pushed down to maria_create()? Because currently,
3079 ma_test1 -T crashes: it creates a table with DYNAMIC_RECORD but has
3080 born_transactional==1, which confuses some recovery-related code.
3081 */
3082 create_info.transactional= (row_type == BLOCK_RECORD &&
3083 ha_create_info->transactional != HA_CHOICE_NO);
3084
3085 if (ha_create_info->tmp_table())
3086 {
3087 create_flags|= HA_CREATE_TMP_TABLE | HA_CREATE_DELAY_KEY_WRITE;
3088 create_info.transactional= 0;
3089 }
3090 if (ha_create_info->options & HA_CREATE_KEEP_FILES)
3091 create_flags|= HA_CREATE_KEEP_FILES;
3092 if (options & HA_OPTION_PACK_RECORD)
3093 create_flags|= HA_PACK_RECORD;
3094 if (options & HA_OPTION_CHECKSUM)
3095 create_flags|= HA_CREATE_CHECKSUM;
3096 if (options & HA_OPTION_DELAY_KEY_WRITE)
3097 create_flags|= HA_CREATE_DELAY_KEY_WRITE;
3098 if ((ha_create_info->page_checksum == HA_CHOICE_UNDEF &&
3099 maria_page_checksums) ||
3100 ha_create_info->page_checksum == HA_CHOICE_YES)
3101 create_flags|= HA_CREATE_PAGE_CHECKSUM;
3102
3103 (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
3104 (uchar*) thd->query(), thd->query_length());
3105
3106 /* TODO: Check that the following fn_format is really needed */
3107 error=
3108 maria_create(fn_format(buff, name, "", "",
3109 MY_UNPACK_FILENAME | MY_APPEND_EXT),
3110 row_type, share->keys, keydef,
3111 record_count, recinfo,
3112 0, (MARIA_UNIQUEDEF *) 0,
3113 &create_info, create_flags);
3114
3115 my_free(recinfo);
3116 DBUG_RETURN(error);
3117 }
3118
3119
rename_table(const char * from,const char * to)3120 int ha_maria::rename_table(const char *from, const char *to)
3121 {
3122 THD *thd= current_thd;
3123 (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
3124 (uchar*) thd->query(), thd->query_length());
3125 return maria_rename(from, to);
3126 }
3127
3128
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3129 void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment,
3130 ulonglong nb_desired_values,
3131 ulonglong *first_value,
3132 ulonglong *nb_reserved_values)
3133 {
3134 ulonglong nr;
3135 int error;
3136 uchar key[MARIA_MAX_KEY_BUFF];
3137
3138 if (!table->s->next_number_key_offset)
3139 { // Autoincrement at key-start
3140 ha_maria::info(HA_STATUS_AUTO);
3141 *first_value= stats.auto_increment_value;
3142 /* Maria has only table-level lock for now, so reserves to +inf */
3143 *nb_reserved_values= ULONGLONG_MAX;
3144 return;
3145 }
3146
3147 /* it's safe to call the following if bulk_insert isn't on */
3148 maria_flush_bulk_insert(file, table->s->next_number_index);
3149
3150 (void) extra(HA_EXTRA_KEYREAD);
3151 key_copy(key, table->record[0],
3152 table->key_info + table->s->next_number_index,
3153 table->s->next_number_key_offset);
3154 error= maria_rkey(file, table->record[1], (int) table->s->next_number_index,
3155 key, make_prev_keypart_map(table->s->next_number_keypart),
3156 HA_READ_PREFIX_LAST);
3157 if (error)
3158 nr= 1;
3159 else
3160 {
3161 /* Get data from record[1] */
3162 nr= ((ulonglong) table->next_number_field->
3163 val_int_offset(table->s->rec_buff_length) + 1);
3164 }
3165 extra(HA_EXTRA_NO_KEYREAD);
3166 *first_value= nr;
3167 /*
3168 MySQL needs to call us for next row: assume we are inserting ("a",null)
3169 here, we return 3, and next this statement will want to insert ("b",null):
3170 there is no reason why ("b",3+1) would be the good row to insert: maybe it
3171 already exists, maybe 3+1 is too large...
3172 */
3173 *nb_reserved_values= 1;
3174 }
3175
3176
3177 /*
3178 Find out how many rows there is in the given range
3179
3180 SYNOPSIS
3181 records_in_range()
3182 inx Index to use
3183 min_key Start of range. Null pointer if from first key
3184 max_key End of range. Null pointer if to last key
3185
3186 NOTES
3187 min_key.flag can have one of the following values:
3188 HA_READ_KEY_EXACT Include the key in the range
3189 HA_READ_AFTER_KEY Don't include key in range
3190
3191 max_key.flag can have one of the following values:
3192 HA_READ_BEFORE_KEY Don't include key in range
3193 HA_READ_AFTER_KEY Include all 'end_key' values in the range
3194
3195 RETURN
3196 HA_POS_ERROR Something is wrong with the index tree.
3197 0 There is no matching keys in the given range
3198 number > 0 There is approximately 'number' matching rows in
3199 the range.
3200 */
3201
records_in_range(uint inx,key_range * min_key,key_range * max_key)3202 ha_rows ha_maria::records_in_range(uint inx, key_range *min_key,
3203 key_range *max_key)
3204 {
3205 return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key);
3206 }
3207
3208
ft_read(uchar * buf)3209 int ha_maria::ft_read(uchar * buf)
3210 {
3211 int error;
3212
3213 if (!ft_handler)
3214 return -1;
3215
3216 thread_safe_increment(table->in_use->status_var.ha_read_next_count,
3217 &LOCK_status); // why ?
3218
3219 error= ft_handler->please->read_next(ft_handler, (char*) buf);
3220
3221 return error;
3222 }
3223
3224
check_if_incompatible_data(HA_CREATE_INFO * create_info,uint table_changes)3225 bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *create_info,
3226 uint table_changes)
3227 {
3228 DBUG_ENTER("check_if_incompatible_data");
3229 uint options= table->s->db_options_in_use;
3230 enum ha_choice page_checksum= table->s->page_checksum;
3231
3232 if (page_checksum == HA_CHOICE_UNDEF)
3233 page_checksum= file->s->options & HA_OPTION_PAGE_CHECKSUM ? HA_CHOICE_YES
3234 : HA_CHOICE_NO;
3235
3236 if (create_info->auto_increment_value != stats.auto_increment_value ||
3237 create_info->data_file_name != data_file_name ||
3238 create_info->index_file_name != index_file_name ||
3239 create_info->page_checksum != page_checksum ||
3240 create_info->transactional != table->s->transactional ||
3241 (maria_row_type(create_info) != data_file_type &&
3242 create_info->row_type != ROW_TYPE_DEFAULT) ||
3243 table_changes == IS_EQUAL_NO ||
3244 (table_changes & IS_EQUAL_PACK_LENGTH)) // Not implemented yet
3245 DBUG_RETURN(COMPATIBLE_DATA_NO);
3246
3247 if ((options & (HA_OPTION_CHECKSUM |
3248 HA_OPTION_DELAY_KEY_WRITE)) !=
3249 (create_info->table_options & (HA_OPTION_CHECKSUM |
3250 HA_OPTION_DELAY_KEY_WRITE)))
3251 DBUG_RETURN(COMPATIBLE_DATA_NO);
3252 DBUG_RETURN(COMPATIBLE_DATA_YES);
3253 }
3254
3255
maria_hton_panic(handlerton * hton,ha_panic_function flag)3256 static int maria_hton_panic(handlerton *hton, ha_panic_function flag)
3257 {
3258 /* If no background checkpoints, we need to do one now */
3259 int ret=0;
3260
3261 if (!checkpoint_interval)
3262 ret= ma_checkpoint_execute(CHECKPOINT_FULL, FALSE);
3263
3264 ret|= maria_panic(flag);
3265
3266 maria_hton= 0;
3267 return ret;
3268 }
3269
3270
maria_commit(handlerton * hton,THD * thd,bool all)3271 static int maria_commit(handlerton *hton __attribute__ ((unused)),
3272 THD *thd, bool all)
3273 {
3274 TRN *trn= THD_TRN;
3275 int res;
3276 MARIA_HA *used_instances= (MARIA_HA*) trn->used_instances;
3277 DBUG_ENTER("maria_commit");
3278
3279 trnman_reset_locked_tables(trn, 0);
3280 trnman_set_flags(trn, trnman_get_flags(trn) & ~TRN_STATE_INFO_LOGGED);
3281
3282 /* statement or transaction ? */
3283 if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
3284 !all)
3285 DBUG_RETURN(0); // end of statement
3286 res= ma_commit(trn);
3287 reset_thd_trn(thd, used_instances);
3288 DBUG_RETURN(res);
3289 }
3290
3291
maria_rollback(handlerton * hton,THD * thd,bool all)3292 static int maria_rollback(handlerton *hton __attribute__ ((unused)),
3293 THD *thd, bool all)
3294 {
3295 TRN *trn= THD_TRN;
3296 DBUG_ENTER("maria_rollback");
3297 trnman_reset_locked_tables(trn, 0);
3298 /* statement or transaction ? */
3299 if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all)
3300 {
3301 trnman_rollback_statement(trn);
3302 DBUG_RETURN(0); // end of statement
3303 }
3304 reset_thd_trn(thd, (MARIA_HA*) trn->used_instances);
3305 DBUG_RETURN(trnman_rollback_trn(trn) ?
3306 HA_ERR_OUT_OF_MEM : 0); // end of transaction
3307 }
3308
3309
3310
3311 /**
3312 @brief flush log handler
3313
3314 @param hton maria handlerton (unused)
3315
3316 @retval FALSE OK
3317 @retval TRUE Error
3318 */
3319
maria_flush_logs(handlerton * hton)3320 bool maria_flush_logs(handlerton *hton)
3321 {
3322 return MY_TEST(translog_purge_at_flush());
3323 }
3324
3325
maria_checkpoint_state(handlerton * hton,bool disabled)3326 int maria_checkpoint_state(handlerton *hton, bool disabled)
3327 {
3328 maria_checkpoint_disabled= (my_bool) disabled;
3329 return 0;
3330 }
3331
3332
3333
3334 #define SHOW_MSG_LEN (FN_REFLEN + 20)
3335 /**
3336 @brief show status handler
3337
3338 @param hton maria handlerton
3339 @param thd thread handler
3340 @param print print function
3341 @param stat type of status
3342 */
3343
maria_show_status(handlerton * hton,THD * thd,stat_print_fn * print,enum ha_stat_type stat)3344 bool maria_show_status(handlerton *hton,
3345 THD *thd,
3346 stat_print_fn *print,
3347 enum ha_stat_type stat)
3348 {
3349 const LEX_CSTRING *engine_name= hton_name(hton);
3350 switch (stat) {
3351 case HA_ENGINE_LOGS:
3352 {
3353 TRANSLOG_ADDRESS horizon= translog_get_horizon();
3354 uint32 last_file= LSN_FILE_NO(horizon);
3355 uint32 first_needed= translog_get_first_needed_file();
3356 uint32 first_file= translog_get_first_file(horizon);
3357 uint32 i;
3358 const char unknown[]= "unknown";
3359 const char needed[]= "in use";
3360 const char unneeded[]= "free";
3361 char path[FN_REFLEN];
3362
3363 if (first_file == 0)
3364 {
3365 const char error[]= "error";
3366 print(thd, engine_name->str, engine_name->length,
3367 STRING_WITH_LEN(""), error, sizeof(error) - 1);
3368 break;
3369 }
3370
3371 for (i= first_file; i <= last_file; i++)
3372 {
3373 char *file;
3374 const char *status;
3375 size_t length, status_len;
3376 MY_STAT stat_buff, *stat;
3377 const char error[]= "can't stat";
3378 char object[SHOW_MSG_LEN];
3379 file= translog_filename_by_fileno(i, path);
3380 if (!(stat= mysql_file_stat(key_file_translog, file, &stat_buff, MYF(0))))
3381 {
3382 status= error;
3383 status_len= sizeof(error) - 1;
3384 length= my_snprintf(object, SHOW_MSG_LEN, "Size unknown ; %s", file);
3385 }
3386 else
3387 {
3388 if (first_needed == 0)
3389 {
3390 status= unknown;
3391 status_len= sizeof(unknown) - 1;
3392 }
3393 else if (i < first_needed)
3394 {
3395 status= unneeded;
3396 status_len= sizeof(unneeded) - 1;
3397 }
3398 else
3399 {
3400 status= needed;
3401 status_len= sizeof(needed) - 1;
3402 }
3403 length= my_snprintf(object, SHOW_MSG_LEN, "Size %12llu ; %s",
3404 (ulonglong) stat->st_size, file);
3405 }
3406
3407 print(thd, engine_name->str, engine_name->length,
3408 object, length, status, status_len);
3409 }
3410 break;
3411 }
3412 case HA_ENGINE_STATUS:
3413 case HA_ENGINE_MUTEX:
3414 default:
3415 break;
3416 }
3417 return 0;
3418 }
3419
3420
3421 /**
3422 Callback to delete all logs in directory. This is lower-level than other
3423 functions in ma_loghandler.c which delete logs, as it does not rely on
3424 translog_init() having been called first.
3425
3426 @param directory directory where file is
3427 @param filename base name of the file to delete
3428 */
3429
translog_callback_delete_all(const char * directory,const char * filename)3430 static my_bool translog_callback_delete_all(const char *directory,
3431 const char *filename)
3432 {
3433 char complete_name[FN_REFLEN];
3434 fn_format(complete_name, filename, directory, "", MYF(MY_UNPACK_FILENAME));
3435 return mysql_file_delete(key_file_translog, complete_name, MYF(MY_WME));
3436 }
3437
3438
3439 /**
3440 Helper function for option aria-force-start-after-recovery-failures.
3441 Deletes logs if too many failures. Otherwise, increments the counter of
3442 failures in the control file.
3443 Notice how this has to be called _before_ translog_init() (if log is
3444 corrupted, translog_init() might crash the server, so we need to remove logs
3445 before).
3446
3447 @param log_dir directory where logs to be deleted are
3448 */
3449
mark_recovery_start(const char * log_dir)3450 static int mark_recovery_start(const char* log_dir)
3451 {
3452 int res;
3453 DBUG_ENTER("mark_recovery_start");
3454 if (!(maria_recover_options & HA_RECOVER_ANY))
3455 ma_message_no_user(ME_JUST_WARNING, "Please consider using option"
3456 " --aria-recover-options[=...] to automatically check and"
3457 " repair tables when logs are removed by option"
3458 " --aria-force-start-after-recovery-failures=#");
3459 if (recovery_failures >= force_start_after_recovery_failures)
3460 {
3461 /*
3462 Remove logs which cause the problem; keep control file which has
3463 critical info like uuid, max_trid (removing control file may make
3464 correct tables look corrupted!).
3465 */
3466 char msg[100];
3467 res= translog_walk_filenames(log_dir, &translog_callback_delete_all);
3468 my_snprintf(msg, sizeof(msg),
3469 "%s logs after %u consecutive failures of"
3470 " recovery from logs",
3471 (res ? "failed to remove some" : "removed all"),
3472 recovery_failures);
3473 ma_message_no_user((res ? 0 : ME_JUST_WARNING), msg);
3474 }
3475 else
3476 res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
3477 max_trid_in_control_file,
3478 recovery_failures + 1);
3479 DBUG_RETURN(res);
3480 }
3481
3482
3483 /**
3484 Helper function for option aria-force-start-after-recovery-failures.
3485 Records in the control file that recovery was a success, so that it's not
3486 counted for aria-force-start-after-recovery-failures.
3487 */
3488
mark_recovery_success(void)3489 static int mark_recovery_success(void)
3490 {
3491 /* success of recovery, reset recovery_failures: */
3492 int res;
3493 DBUG_ENTER("mark_recovery_success");
3494 res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
3495 max_trid_in_control_file, 0);
3496 DBUG_RETURN(res);
3497 }
3498
3499
3500 /*
3501 Return 1 if table has changed during the current transaction
3502 */
3503
is_changed() const3504 bool ha_maria::is_changed() const
3505 {
3506 return file->state->changed;
3507 }
3508
3509
ha_maria_init(void * p)3510 static int ha_maria_init(void *p)
3511 {
3512 int res;
3513 const char *log_dir= maria_data_root;
3514
3515 #ifdef HAVE_PSI_INTERFACE
3516 init_aria_psi_keys();
3517 #endif
3518
3519 maria_hton= (handlerton *)p;
3520 maria_hton->state= SHOW_OPTION_YES;
3521 maria_hton->db_type= DB_TYPE_ARIA;
3522 maria_hton->create= maria_create_handler;
3523 maria_hton->panic= maria_hton_panic;
3524 maria_hton->tablefile_extensions= ha_maria_exts;
3525 maria_hton->commit= maria_commit;
3526 maria_hton->rollback= maria_rollback;
3527 maria_hton->checkpoint_state= maria_checkpoint_state;
3528 #ifdef MARIA_CANNOT_ROLLBACK
3529 maria_hton->commit= 0;
3530 #endif
3531 maria_hton->flush_logs= maria_flush_logs;
3532 maria_hton->show_status= maria_show_status;
3533 /* TODO: decide if we support Maria being used for log tables */
3534 maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
3535 bzero(maria_log_pagecache, sizeof(*maria_log_pagecache));
3536 maria_tmpdir= &mysql_tmpdir_list; /* For REDO */
3537 res= maria_upgrade() || maria_init() || ma_control_file_open(TRUE, TRUE) ||
3538 ((force_start_after_recovery_failures != 0) &&
3539 mark_recovery_start(log_dir)) ||
3540 !init_pagecache(maria_pagecache,
3541 (size_t) pagecache_buffer_size, pagecache_division_limit,
3542 pagecache_age_threshold, maria_block_size, pagecache_file_hash_size,
3543 0) ||
3544 !init_pagecache(maria_log_pagecache,
3545 TRANSLOG_PAGECACHE_SIZE, 0, 0,
3546 TRANSLOG_PAGE_SIZE, 0, 0) ||
3547 translog_init(maria_data_root, log_file_size,
3548 MYSQL_VERSION_ID, server_id, maria_log_pagecache,
3549 TRANSLOG_DEFAULT_FLAGS, 0) ||
3550 maria_recovery_from_log() ||
3551 ((force_start_after_recovery_failures != 0 ||
3552 maria_recovery_changed_data || recovery_failures) &&
3553 mark_recovery_success()) ||
3554 ma_checkpoint_init(checkpoint_interval);
3555 maria_multi_threaded= maria_in_ha_maria= TRUE;
3556 maria_create_trn_hook= maria_create_trn_for_mysql;
3557 maria_pagecache->extra_debug= 1;
3558 maria_assert_if_crashed_table= debug_assert_if_crashed_table;
3559
3560 if (res)
3561 maria_hton= 0;
3562
3563 ma_killed= ma_killed_in_mariadb;
3564
3565 return res ? HA_ERR_INITIALIZATION : 0;
3566 }
3567
3568
3569 #ifdef HAVE_QUERY_CACHE
3570 /**
3571 @brief Register a named table with a call back function to the query cache.
3572
3573 @param thd The thread handle
3574 @param table_key A pointer to the table name in the table cache
3575 @param key_length The length of the table name
3576 @param[out] engine_callback The pointer to the storage engine call back
3577 function, currently 0
3578 @param[out] engine_data Engine data will be set to 0.
3579
3580 @note Despite the name of this function, it is used to check each statement
3581 before it is cached and not to register a table or callback function.
3582
3583 @see handler::register_query_cache_table
3584
3585 @return The error code. The engine_data and engine_callback will be set to 0.
3586 @retval TRUE Success
3587 @retval FALSE An error occurred
3588 */
3589
register_query_cache_table(THD * thd,const char * table_name,uint table_name_len,qc_engine_callback * engine_callback,ulonglong * engine_data)3590 my_bool ha_maria::register_query_cache_table(THD *thd, const char *table_name,
3591 uint table_name_len,
3592 qc_engine_callback
3593 *engine_callback,
3594 ulonglong *engine_data)
3595 {
3596 ulonglong actual_data_file_length;
3597 ulonglong current_data_file_length;
3598 DBUG_ENTER("ha_maria::register_query_cache_table");
3599
3600 /*
3601 No call back function is needed to determine if a cached statement
3602 is valid or not.
3603 */
3604 *engine_callback= 0;
3605
3606 /*
3607 No engine data is needed.
3608 */
3609 *engine_data= 0;
3610
3611 if (file->s->now_transactional && file->s->have_versioning)
3612 DBUG_RETURN(file->trn->trid >= file->s->state.last_change_trn);
3613
3614 /*
3615 If a concurrent INSERT has happened just before the currently processed
3616 SELECT statement, the total size of the table is unknown.
3617
3618 To determine if the table size is known, the current thread's snap shot of
3619 the table size with the actual table size are compared.
3620
3621 If the table size is unknown the SELECT statement can't be cached.
3622 */
3623
3624 /*
3625 POSIX visibility rules specify that "2. Whatever memory values a
3626 thread can see when it unlocks a mutex <...> can also be seen by any
3627 thread that later locks the same mutex". In this particular case,
3628 concurrent insert thread had modified the data_file_length in
3629 MYISAM_SHARE before it has unlocked (or even locked)
3630 structure_guard_mutex. So, here we're guaranteed to see at least that
3631 value after we've locked the same mutex. We can see a later value
3632 (modified by some other thread) though, but it's ok, as we only want
3633 to know if the variable was changed, the actual new value doesn't matter
3634 */
3635 actual_data_file_length= file->s->state.state.data_file_length;
3636 current_data_file_length= file->state->data_file_length;
3637
3638 /* Return whether is ok to try to cache current statement. */
3639 DBUG_RETURN(!(file->s->non_transactional_concurrent_insert &&
3640 current_data_file_length != actual_data_file_length));
3641 }
3642 #endif
3643
3644 struct st_mysql_sys_var* system_variables[]= {
3645 MYSQL_SYSVAR(block_size),
3646 MYSQL_SYSVAR(checkpoint_interval),
3647 MYSQL_SYSVAR(checkpoint_log_activity),
3648 MYSQL_SYSVAR(force_start_after_recovery_failures),
3649 MYSQL_SYSVAR(group_commit),
3650 MYSQL_SYSVAR(group_commit_interval),
3651 MYSQL_SYSVAR(log_dir_path),
3652 MYSQL_SYSVAR(log_file_size),
3653 MYSQL_SYSVAR(log_purge_type),
3654 MYSQL_SYSVAR(max_sort_file_size),
3655 MYSQL_SYSVAR(page_checksum),
3656 MYSQL_SYSVAR(pagecache_age_threshold),
3657 MYSQL_SYSVAR(pagecache_buffer_size),
3658 MYSQL_SYSVAR(pagecache_division_limit),
3659 MYSQL_SYSVAR(pagecache_file_hash_size),
3660 MYSQL_SYSVAR(recover_options),
3661 MYSQL_SYSVAR(repair_threads),
3662 MYSQL_SYSVAR(sort_buffer_size),
3663 MYSQL_SYSVAR(stats_method),
3664 MYSQL_SYSVAR(sync_log_dir),
3665 MYSQL_SYSVAR(used_for_temp_tables),
3666 MYSQL_SYSVAR(encrypt_tables),
3667 NULL
3668 };
3669
3670
3671 /**
3672 @brief Updates the checkpoint interval and restarts the background thread.
3673 */
3674
update_checkpoint_interval(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3675 static void update_checkpoint_interval(MYSQL_THD thd,
3676 struct st_mysql_sys_var *var,
3677 void *var_ptr, const void *save)
3678 {
3679 ma_checkpoint_end();
3680 ma_checkpoint_init(*(ulong *)var_ptr= (ulong)(*(long *)save));
3681 }
3682
3683
3684 /**
3685 @brief Updates group commit mode
3686 */
3687
update_maria_group_commit(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3688 static void update_maria_group_commit(MYSQL_THD thd,
3689 struct st_mysql_sys_var *var,
3690 void *var_ptr, const void *save)
3691 {
3692 ulong value= (ulong)*((long *)var_ptr);
3693 DBUG_ENTER("update_maria_group_commit");
3694 DBUG_PRINT("enter", ("old value: %lu new value %lu rate %lu",
3695 value, (ulong)(*(long *)save),
3696 maria_group_commit_interval));
3697 /* old value */
3698 switch (value) {
3699 case TRANSLOG_GCOMMIT_NONE:
3700 break;
3701 case TRANSLOG_GCOMMIT_HARD:
3702 translog_hard_group_commit(FALSE);
3703 break;
3704 case TRANSLOG_GCOMMIT_SOFT:
3705 translog_soft_sync(FALSE);
3706 if (maria_group_commit_interval)
3707 translog_soft_sync_end();
3708 break;
3709 default:
3710 DBUG_ASSERT(0); /* impossible */
3711 }
3712 value= *(ulong *)var_ptr= (ulong)(*(long *)save);
3713 translog_sync();
3714 /* new value */
3715 switch (value) {
3716 case TRANSLOG_GCOMMIT_NONE:
3717 break;
3718 case TRANSLOG_GCOMMIT_HARD:
3719 translog_hard_group_commit(TRUE);
3720 break;
3721 case TRANSLOG_GCOMMIT_SOFT:
3722 translog_soft_sync(TRUE);
3723 /* variable change made under global lock so we can just read it */
3724 if (maria_group_commit_interval)
3725 translog_soft_sync_start();
3726 break;
3727 default:
3728 DBUG_ASSERT(0); /* impossible */
3729 }
3730 DBUG_VOID_RETURN;
3731 }
3732
3733 /**
3734 @brief Updates group commit interval
3735 */
3736
update_maria_group_commit_interval(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3737 static void update_maria_group_commit_interval(MYSQL_THD thd,
3738 struct st_mysql_sys_var *var,
3739 void *var_ptr, const void *save)
3740 {
3741 ulong new_value= (ulong)*((long *)save);
3742 ulong *value_ptr= (ulong*) var_ptr;
3743 DBUG_ENTER("update_maria_group_commit_interval");
3744 DBUG_PRINT("enter", ("old value: %lu new value %lu group commit %lu",
3745 *value_ptr, new_value, maria_group_commit));
3746
3747 /* variable change made under global lock so we can just read it */
3748 switch (maria_group_commit) {
3749 case TRANSLOG_GCOMMIT_NONE:
3750 *value_ptr= new_value;
3751 translog_set_group_commit_interval(new_value);
3752 break;
3753 case TRANSLOG_GCOMMIT_HARD:
3754 *value_ptr= new_value;
3755 translog_set_group_commit_interval(new_value);
3756 break;
3757 case TRANSLOG_GCOMMIT_SOFT:
3758 if (*value_ptr)
3759 translog_soft_sync_end();
3760 translog_set_group_commit_interval(new_value);
3761 if ((*value_ptr= new_value))
3762 translog_soft_sync_start();
3763 break;
3764 default:
3765 DBUG_ASSERT(0); /* impossible */
3766 }
3767 DBUG_VOID_RETURN;
3768 }
3769
3770 /**
3771 @brief Updates the transaction log file limit.
3772 */
3773
update_log_file_size(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3774 static void update_log_file_size(MYSQL_THD thd,
3775 struct st_mysql_sys_var *var,
3776 void *var_ptr, const void *save)
3777 {
3778 uint32 size= (uint32)((ulong)(*(long *)save));
3779 translog_set_file_size(size);
3780 *(ulong *)var_ptr= size;
3781 }
3782
3783
3784 SHOW_VAR status_variables[]= {
3785 {"pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG},
3786 {"pagecache_blocks_unused", (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG},
3787 {"pagecache_blocks_used", (char*) &maria_pagecache_var.blocks_used, SHOW_LONG},
3788 {"pagecache_read_requests", (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG},
3789 {"pagecache_reads", (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG},
3790 {"pagecache_write_requests", (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG},
3791 {"pagecache_writes", (char*) &maria_pagecache_var.global_cache_write, SHOW_LONGLONG},
3792 {"transaction_log_syncs", (char*) &translog_syncs, SHOW_LONGLONG},
3793 {NullS, NullS, SHOW_LONG}
3794 };
3795
3796 /****************************************************************************
3797 * Maria MRR implementation: use DS-MRR
3798 ***************************************************************************/
3799
multi_range_read_init(RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)3800 int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
3801 uint n_ranges, uint mode,
3802 HANDLER_BUFFER *buf)
3803 {
3804 return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
3805 }
3806
multi_range_read_next(range_id_t * range_info)3807 int ha_maria::multi_range_read_next(range_id_t *range_info)
3808 {
3809 return ds_mrr.dsmrr_next(range_info);
3810 }
3811
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)3812 ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
3813 void *seq_init_param,
3814 uint n_ranges, uint *bufsz,
3815 uint *flags, Cost_estimate *cost)
3816 {
3817 /*
3818 This call is here because there is no location where this->table would
3819 already be known.
3820 TODO: consider moving it into some per-query initialization call.
3821 */
3822 ds_mrr.init(this, table);
3823 return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
3824 flags, cost);
3825 }
3826
multi_range_read_info(uint keyno,uint n_ranges,uint keys,uint key_parts,uint * bufsz,uint * flags,Cost_estimate * cost)3827 ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
3828 uint key_parts, uint *bufsz,
3829 uint *flags, Cost_estimate *cost)
3830 {
3831 ds_mrr.init(this, table);
3832 return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
3833 }
3834
multi_range_read_explain_info(uint mrr_mode,char * str,size_t size)3835 int ha_maria::multi_range_read_explain_info(uint mrr_mode, char *str,
3836 size_t size)
3837 {
3838 return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
3839 }
3840 /* MyISAM MRR implementation ends */
3841
3842
3843 /* Index condition pushdown implementation*/
3844
3845
idx_cond_push(uint keyno_arg,Item * idx_cond_arg)3846 Item *ha_maria::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
3847 {
3848 /*
3849 Check if the key contains a blob field. If it does then MyISAM
3850 should not accept the pushed index condition since MyISAM will not
3851 read the blob field from the index entry during evaluation of the
3852 pushed index condition and the BLOB field might be part of the
3853 range evaluation done by the ICP code.
3854 */
3855 const KEY *key= &table_share->key_info[keyno_arg];
3856
3857 for (uint k= 0; k < key->user_defined_key_parts; ++k)
3858 {
3859 const KEY_PART_INFO *key_part= &key->key_part[k];
3860 if (key_part->key_part_flag & HA_BLOB_PART)
3861 {
3862 /* Let the server handle the index condition */
3863 return idx_cond_arg;
3864 }
3865 }
3866
3867 pushed_idx_cond_keyno= keyno_arg;
3868 pushed_idx_cond= idx_cond_arg;
3869 in_range_check_pushed_down= TRUE;
3870 if (active_index == pushed_idx_cond_keyno)
3871 ma_set_index_cond_func(file, handler_index_cond_check, this);
3872 return NULL;
3873 }
3874
3875 /**
3876 Find record by unique constrain (used in temporary tables)
3877
3878 @param record (IN|OUT) the record to find
3879 @param constrain_no (IN) number of constrain (for this engine)
3880
3881 @note It is like hp_search but uses function for raw where hp_search
3882 uses functions for index.
3883
3884 @retval 0 OK
3885 @retval 1 Not found
3886 @retval -1 Error
3887 */
3888
find_unique_row(uchar * record,uint constrain_no)3889 int ha_maria::find_unique_row(uchar *record, uint constrain_no)
3890 {
3891 int rc;
3892 if (file->s->state.header.uniques)
3893 {
3894 DBUG_ASSERT(file->s->state.header.uniques > constrain_no);
3895 MARIA_UNIQUEDEF *def= file->s->uniqueinfo + constrain_no;
3896 ha_checksum unique_hash= _ma_unique_hash(def, record);
3897 rc= _ma_check_unique(file, def, record, unique_hash, HA_OFFSET_ERROR);
3898 if (rc)
3899 {
3900 file->cur_row.lastpos= file->dup_key_pos;
3901 if ((*file->read_record)(file, record, file->cur_row.lastpos))
3902 return -1;
3903 file->update|= HA_STATE_AKTIV; /* Record is read */
3904 }
3905 // invert logic
3906 rc= !MY_TEST(rc);
3907 }
3908 else
3909 {
3910 /*
3911 It is case when just unique index used instead unicue constrain
3912 (conversion from heap table).
3913 */
3914 DBUG_ASSERT(file->s->state.header.keys > constrain_no);
3915 MARIA_KEY key;
3916 file->once_flags|= USE_PACKED_KEYS;
3917 (*file->s->keyinfo[constrain_no].make_key)
3918 (file, &key, constrain_no, file->lastkey_buff2, record, 0, 0);
3919 rc= maria_rkey(file, record, constrain_no, key.data, key.data_length,
3920 HA_READ_KEY_EXACT);
3921 rc= MY_TEST(rc);
3922 }
3923 return rc;
3924 }
3925
3926 struct st_mysql_storage_engine maria_storage_engine=
3927 { MYSQL_HANDLERTON_INTERFACE_VERSION };
3928
maria_declare_plugin(aria)3929 maria_declare_plugin(aria)
3930 {
3931 MYSQL_STORAGE_ENGINE_PLUGIN,
3932 &maria_storage_engine,
3933 "Aria",
3934 "Monty Program Ab",
3935 "Crash-safe tables with MyISAM heritage",
3936 PLUGIN_LICENSE_GPL,
3937 ha_maria_init, /* Plugin Init */
3938 NULL, /* Plugin Deinit */
3939 0x0105, /* 1.5 */
3940 status_variables, /* status variables */
3941 system_variables, /* system variables */
3942 "1.5", /* string version */
3943 MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
3944 }
3945 maria_declare_plugin_end;
3946