1 /* Copyright (C) 2004-2008 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2    Copyright (C) 2008-2009 Sun Microsystems, Inc.
3    Copyright (c) 2009, 2021, MariaDB Corporation Ab
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; version 2 of the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
17 
18 
19 #ifdef USE_PRAGMA_IMPLEMENTATION
20 #pragma implementation                          // gcc: Class implementation
21 #endif
22 
23 #define MYSQL_SERVER 1
24 #include <my_global.h>
25 #include <m_ctype.h>
26 #include <my_dir.h>
27 #include <myisampack.h>
28 #include <my_bit.h>
29 #include "ha_maria.h"
30 #include "trnman_public.h"
31 #include "trnman.h"
32 
33 C_MODE_START
34 #include "maria_def.h"
35 #include "ma_rt_index.h"
36 #include "ma_blockrec.h"
37 #include "ma_checkpoint.h"
38 #include "ma_recovery.h"
39 C_MODE_END
40 #include "ma_trnman.h"
41 
42 //#include "sql_priv.h"
43 #include "protocol.h"
44 #include "sql_class.h"
45 #include "key.h"
46 #include "log.h"
47 #include "sql_parse.h"
48 
49 /*
50   Note that in future versions, only *transactional* Maria tables can
51   rollback, so this flag should be up or down conditionally.
52 */
53 #ifdef ARIA_HAS_TRANSACTIONS
54 #define TRANSACTION_STATE
55 #else
56 #define TRANSACTION_STATE HA_NO_TRANSACTIONS
57 #endif
58 
59 #define THD_TRN (TRN*) thd_get_ha_data(thd, maria_hton)
60 
61 ulong pagecache_division_limit, pagecache_age_threshold, pagecache_file_hash_size;
62 ulonglong pagecache_buffer_size;
63 const char *zerofill_error_msg=
64   "Table is probably from another system and must be zerofilled or repaired ('REPAIR TABLE table_name') to be usable on this system";
65 
66 /**
67    As the auto-repair is initiated when opened from the SQL layer
68    (open_unireg_entry(), check_and_repair()), it does not happen when Maria's
69    Recovery internally opens the table to apply log records to it, which is
70    good. It would happen only after Recovery, if the table is still
71    corrupted.
72 */
73 ulonglong maria_recover_options= HA_RECOVER_NONE;
74 handlerton *maria_hton;
75 
76 /* bits in maria_recover_options */
77 const char *maria_recover_names[]=
78 {
79   /*
80     Compared to MyISAM, "default" was renamed to "normal" as it collided with
81     SET var=default which sets to the var's default i.e. what happens when the
82     var is not set i.e. HA_RECOVER_NONE.
83     OFF flag is ignored.
84   */
85   "NORMAL", "BACKUP", "FORCE", "QUICK", "OFF", NullS
86 };
87 TYPELIB maria_recover_typelib=
88 {
89   array_elements(maria_recover_names) - 1, "",
90   maria_recover_names, NULL
91 };
92 
93 const char *maria_stats_method_names[]=
94 {
95   "nulls_unequal", "nulls_equal",
96   "nulls_ignored", NullS
97 };
98 TYPELIB maria_stats_method_typelib=
99 {
100   array_elements(maria_stats_method_names) - 1, "",
101   maria_stats_method_names, NULL
102 };
103 
104 /* transactions log purge mode */
105 const char *maria_translog_purge_type_names[]=
106 {
107   "immediate", "external", "at_flush", NullS
108 };
109 TYPELIB maria_translog_purge_type_typelib=
110 {
111   array_elements(maria_translog_purge_type_names) - 1, "",
112   maria_translog_purge_type_names, NULL
113 };
114 
115 /* transactional log directory sync */
116 const char *maria_sync_log_dir_names[]=
117 {
118   "NEVER", "NEWFILE", "ALWAYS", NullS
119 };
120 TYPELIB maria_sync_log_dir_typelib=
121 {
122   array_elements(maria_sync_log_dir_names) - 1, "",
123   maria_sync_log_dir_names, NULL
124 };
125 
126 /* transactional log group commit */
127 const char *maria_group_commit_names[]=
128 {
129   "none", "hard", "soft", NullS
130 };
131 TYPELIB maria_group_commit_typelib=
132 {
133   array_elements(maria_group_commit_names) - 1, "",
134   maria_group_commit_names, NULL
135 };
136 
137 /** Interval between background checkpoints in seconds */
138 static ulong checkpoint_interval;
139 static void update_checkpoint_interval(MYSQL_THD thd,
140                                        struct st_mysql_sys_var *var,
141                                        void *var_ptr, const void *save);
142 static void update_maria_group_commit(MYSQL_THD thd,
143                                       struct st_mysql_sys_var *var,
144                                       void *var_ptr, const void *save);
145 static void update_maria_group_commit_interval(MYSQL_THD thd,
146                                            struct st_mysql_sys_var *var,
147                                            void *var_ptr, const void *save);
148 /** After that many consecutive recovery failures, remove logs */
149 static ulong force_start_after_recovery_failures;
150 static void update_log_file_size(MYSQL_THD thd,
151                                  struct st_mysql_sys_var *var,
152                                  void *var_ptr, const void *save);
153 
154 /* The 4096 is there because of MariaDB privilege tables */
155 static MYSQL_SYSVAR_ULONG(block_size, maria_block_size,
156        PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
157        "Block size to be used for Aria index pages.", 0, 0,
158        MARIA_KEY_BLOCK_LENGTH, 4096,
159        MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);
160 
161 static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval,
162        PLUGIN_VAR_RQCMDARG,
163        "Interval between tries to do an automatic checkpoints. In seconds; 0 means"
164        " 'no automatic checkpoints' which makes sense only for testing.",
165        NULL, update_checkpoint_interval, 30, 0, UINT_MAX, 1);
166 
167 static MYSQL_SYSVAR_ULONG(checkpoint_log_activity, maria_checkpoint_min_log_activity,
168        PLUGIN_VAR_RQCMDARG,
169        "Number of bytes that the transaction log has to grow between checkpoints before a new "
170        "checkpoint is written to the log.",
171        NULL, NULL, 1024*1024, 0, UINT_MAX, 1);
172 
173 static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures,
174        force_start_after_recovery_failures,
175        /*
176          Read-only because setting it on the fly has no useful effect,
177          should be set on command-line.
178        */
179        PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
180        "Number of consecutive log recovery failures after which logs will be"
181        " automatically deleted to cure the problem; 0 (the default) disables"
182        " the feature.", NULL, NULL, 0, 0, UINT_MAX8, 1);
183 
184 static MYSQL_SYSVAR_BOOL(page_checksum, maria_page_checksums, 0,
185        "Maintain page checksums (can be overridden per table "
186        "with PAGE_CHECKSUM clause in CREATE TABLE)", 0, 0, 1);
187 
188 /* It is only command line argument */
189 static MYSQL_SYSVAR_CONST_STR(log_dir_path, maria_data_root,
190        PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
191        "Path to the directory where to store transactional log",
192        NULL, NULL, mysql_real_data_home);
193 
194 static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size,
195        PLUGIN_VAR_RQCMDARG,
196        "Limit for transaction log size",
197        NULL, update_log_file_size, TRANSLOG_FILE_SIZE,
198        TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE);
199 
200 static MYSQL_SYSVAR_ENUM(group_commit, maria_group_commit,
201        PLUGIN_VAR_RQCMDARG,
202        "Specifies Aria group commit mode. "
203        "Possible values are \"none\" (no group commit), "
204        "\"hard\" (with waiting to actual commit), "
205        "\"soft\" (no wait for commit (DANGEROUS!!!))",
206        NULL, update_maria_group_commit,
207        TRANSLOG_GCOMMIT_NONE, &maria_group_commit_typelib);
208 
209 static MYSQL_SYSVAR_ULONG(group_commit_interval, maria_group_commit_interval,
210        PLUGIN_VAR_RQCMDARG,
211        "Interval between commits in microseconds (1/1000000 sec)."
212        " 0 stands for no waiting"
213        " for other threads to come and do a commit in \"hard\" mode and no"
214        " sync()/commit at all in \"soft\" mode.  Option has only an effect"
215        " if aria_group_commit is used",
216        NULL, update_maria_group_commit_interval, 0, 0, UINT_MAX, 1);
217 
218 static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type,
219        PLUGIN_VAR_RQCMDARG,
220        "Specifies how Aria transactional log will be purged",
221        NULL, NULL, TRANSLOG_PURGE_IMMIDIATE,
222        &maria_translog_purge_type_typelib);
223 
224 static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
225        maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
226        "Don't use the fast sort index method to created index if the "
227        "temporary file would get bigger than this.",
228        0, 0, MAX_FILE_SIZE & ~((ulonglong) (1*MB-1)),
229        0, MAX_FILE_SIZE, 1*MB);
230 
231 static MYSQL_SYSVAR_ULONG(pagecache_age_threshold,
232        pagecache_age_threshold, PLUGIN_VAR_RQCMDARG,
233        "This characterizes the number of hits a hot block has to be untouched "
234        "until it is considered aged enough to be downgraded to a warm block. "
235        "This specifies the percentage ratio of that number of hits to the "
236        "total number of blocks in the page cache.", 0, 0,
237        300, 100, ~ (ulong) 0L, 100);
238 
239 static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size,
240        PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
241        "The size of the buffer used for index blocks for Aria tables. "
242        "Increase this to get better index handling (for all reads and "
243        "multiple writes) to as much as you can afford.", 0, 0,
244        KEY_CACHE_SIZE, 8192*16L, ~(ulonglong) 0, 1);
245 
246 static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit,
247        PLUGIN_VAR_RQCMDARG,
248        "The minimum percentage of warm blocks in key cache", 0, 0,
249        100,  1, 100, 1);
250 
251 static MYSQL_SYSVAR_ULONG(pagecache_file_hash_size, pagecache_file_hash_size,
252        PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
253        "Number of hash buckets for open and changed files.  If you have a lot of Aria "
254        "files open you should increase this for faster flush of changes. A good "
255        "value is probably 1/10 of number of possible open Aria files.", 0,0,
256        512, 128, 16384, 1);
257 
258 static MYSQL_SYSVAR_SET(recover_options, maria_recover_options, PLUGIN_VAR_OPCMDARG,
259        "Specifies how corrupted tables should be automatically repaired",
260        NULL, NULL, HA_RECOVER_BACKUP|HA_RECOVER_QUICK, &maria_recover_typelib);
261 
262 static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
263        "Number of threads to use when repairing Aria tables. The value of 1 "
264        "disables parallel repair.",
265        0, 0, 1, 1, 128, 1);
266 
267 static MYSQL_THDVAR_ULONGLONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG,
268        "The buffer that is allocated when sorting the index when doing a "
269        "REPAIR or when creating indexes with CREATE INDEX or ALTER TABLE.",
270        NULL, NULL,
271        SORT_BUFFER_INIT, MIN_SORT_BUFFER, SIZE_T_MAX/2, 1);
272 
273 static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG,
274        "Specifies how Aria index statistics collection code should treat "
275        "NULLs", 0, 0, 0, &maria_stats_method_typelib);
276 
277 static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir, PLUGIN_VAR_RQCMDARG,
278        "Controls syncing directory after log file growth and new file "
279        "creation", NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE,
280        &maria_sync_log_dir_typelib);
281 
282 #ifdef USE_ARIA_FOR_TMP_TABLES
283 #define USE_ARIA_FOR_TMP_TABLES_VAL 1
284 #else
285 #define USE_ARIA_FOR_TMP_TABLES_VAL 0
286 #endif
287 my_bool use_maria_for_temp_tables= USE_ARIA_FOR_TMP_TABLES_VAL;
288 
289 static MYSQL_SYSVAR_BOOL(used_for_temp_tables,
290        use_maria_for_temp_tables, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT,
291        "Whether temporary tables should be MyISAM or Aria", 0, 0,
292        1);
293 
294 static MYSQL_SYSVAR_BOOL(encrypt_tables, maria_encrypt_tables, PLUGIN_VAR_OPCMDARG,
295        "Encrypt tables (only for tables with ROW_FORMAT=PAGE (default) "
296        "and not FIXED/DYNAMIC)",
297        0, 0, 0);
298 
299 #if defined HAVE_PSI_INTERFACE && !defined EMBEDDED_LIBRARY
300 
301 static PSI_mutex_info all_aria_mutexes[]=
302 {
303   { &key_THR_LOCK_maria, "THR_LOCK_maria", PSI_FLAG_GLOBAL},
304   { &key_LOCK_soft_sync, "LOCK_soft_sync", PSI_FLAG_GLOBAL},
305   { &key_LOCK_trn_list, "LOCK_trn_list", PSI_FLAG_GLOBAL},
306   { &key_SHARE_BITMAP_lock, "SHARE::bitmap::bitmap_lock", 0},
307   { &key_SORT_INFO_mutex, "SORT_INFO::mutex", 0},
308   { &key_TRANSLOG_BUFFER_mutex, "TRANSLOG_BUFFER::mutex", 0},
309   { &key_TRANSLOG_DESCRIPTOR_dirty_buffer_mask_lock, "TRANSLOG_DESCRIPTOR::dirty_buffer_mask_lock", 0},
310   { &key_TRANSLOG_DESCRIPTOR_sent_to_disk_lock, "TRANSLOG_DESCRIPTOR::sent_to_disk_lock", 0},
311   { &key_TRANSLOG_DESCRIPTOR_log_flush_lock, "TRANSLOG_DESCRIPTOR::log_flush_lock", 0},
312   { &key_TRANSLOG_DESCRIPTOR_file_header_lock, "TRANSLOG_DESCRIPTOR::file_header_lock", 0},
313   { &key_TRANSLOG_DESCRIPTOR_unfinished_files_lock, "TRANSLOG_DESCRIPTOR::unfinished_files_lock", 0},
314   { &key_TRANSLOG_DESCRIPTOR_purger_lock, "TRANSLOG_DESCRIPTOR::purger_lock", 0},
315   { &key_SHARE_intern_lock, "SHARE::intern_lock", 0},
316   { &key_SHARE_key_del_lock, "SHARE::key_del_lock", 0},
317   { &key_SHARE_close_lock, "SHARE::close_lock", 0},
318   { &key_SERVICE_THREAD_CONTROL_lock, "SERVICE_THREAD_CONTROL::LOCK_control", 0},
319   { &key_TRN_state_lock, "TRN::state_lock", 0},
320   { &key_PAGECACHE_cache_lock, "PAGECACHE::cache_lock", 0}
321 };
322 
323 static PSI_cond_info all_aria_conds[]=
324 {
325   { &key_COND_soft_sync, "COND_soft_sync", PSI_FLAG_GLOBAL},
326   { &key_SHARE_key_del_cond, "SHARE::key_del_cond", 0},
327   { &key_SERVICE_THREAD_CONTROL_cond, "SERVICE_THREAD_CONTROL::COND_control", 0},
328   { &key_SORT_INFO_cond, "SORT_INFO::cond", 0},
329   { &key_SHARE_BITMAP_cond, "BITMAP::bitmap_cond", 0},
330   { &key_TRANSLOG_BUFFER_waiting_filling_buffer, "TRANSLOG_BUFFER::waiting_filling_buffer", 0},
331   { &key_TRANSLOG_BUFFER_prev_sent_to_disk_cond, "TRANSLOG_BUFFER::prev_sent_to_disk_cond", 0},
332   { &key_TRANSLOG_DESCRIPTOR_log_flush_cond, "TRANSLOG_DESCRIPTOR::log_flush_cond", 0},
333   { &key_TRANSLOG_DESCRIPTOR_new_goal_cond, "TRANSLOG_DESCRIPTOR::new_goal_cond", 0}
334 };
335 
336 static PSI_rwlock_info all_aria_rwlocks[]=
337 {
338   { &key_KEYINFO_root_lock, "KEYINFO::root_lock", 0},
339   { &key_SHARE_mmap_lock, "SHARE::mmap_lock", 0},
340   { &key_TRANSLOG_DESCRIPTOR_open_files_lock, "TRANSLOG_DESCRIPTOR::open_files_lock", 0}
341 };
342 
343 static PSI_thread_info all_aria_threads[]=
344 {
345   { &key_thread_checkpoint, "checkpoint_background", PSI_FLAG_GLOBAL},
346   { &key_thread_soft_sync, "soft_sync_background", PSI_FLAG_GLOBAL},
347   { &key_thread_find_all_keys, "thr_find_all_keys", 0}
348 };
349 
350 static PSI_file_info all_aria_files[]=
351 {
352   { &key_file_translog, "translog", 0},
353   { &key_file_kfile, "MAI", 0},
354   { &key_file_dfile, "MAD", 0},
355   { &key_file_control, "control", PSI_FLAG_GLOBAL}
356 };
357 
358 # ifdef HAVE_PSI_STAGE_INTERFACE
359 static PSI_stage_info *all_aria_stages[]=
360 {
361   & stage_waiting_for_a_resource
362 };
363 # endif /* HAVE_PSI_STAGE_INTERFACE */
364 
init_aria_psi_keys(void)365 static void init_aria_psi_keys(void)
366 {
367   const char* category= "aria";
368   int count;
369 
370   count= array_elements(all_aria_mutexes);
371   mysql_mutex_register(category, all_aria_mutexes, count);
372 
373   count= array_elements(all_aria_rwlocks);
374   mysql_rwlock_register(category, all_aria_rwlocks, count);
375 
376   count= array_elements(all_aria_conds);
377   mysql_cond_register(category, all_aria_conds, count);
378 
379   count= array_elements(all_aria_threads);
380   mysql_thread_register(category, all_aria_threads, count);
381 
382   count= array_elements(all_aria_files);
383   mysql_file_register(category, all_aria_files, count);
384 # ifdef HAVE_PSI_STAGE_INTERFACE
385   count= array_elements(all_aria_stages);
386   mysql_stage_register(category, all_aria_stages, count);
387 # endif /* HAVE_PSI_STAGE_INTERFACE */
388 }
389 #else
390 #define init_aria_psi_keys() /* no-op */
391 #endif /* HAVE_PSI_INTERFACE */
392 
393 const char *MA_CHECK_INFO= "info";
394 const char *MA_CHECK_WARNING= "warning";
395 const char *MA_CHECK_ERROR= "error";
396 
397 /*****************************************************************************
398 ** MARIA tables
399 *****************************************************************************/
400 
maria_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)401 static handler *maria_create_handler(handlerton *hton,
402                                      TABLE_SHARE * table,
403                                      MEM_ROOT *mem_root)
404 {
405   return new (mem_root) ha_maria(hton, table);
406 }
407 
408 
_ma_check_print(HA_CHECK * param,const char * msg_type,const char * msgbuf)409 static void _ma_check_print(HA_CHECK *param, const char* msg_type,
410                             const char *msgbuf)
411 {
412   if (msg_type == MA_CHECK_INFO)
413     sql_print_information("%s.%s: %s", param->db_name, param->table_name,
414                           msgbuf);
415   else if (msg_type == MA_CHECK_WARNING)
416     sql_print_warning("%s.%s: %s", param->db_name, param->table_name,
417                       msgbuf);
418   else
419     sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf);
420 }
421 
422 
423 // collect errors printed by maria_check routines
424 
_ma_check_print_msg(HA_CHECK * param,const char * msg_type,const char * fmt,va_list args)425 static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
426                                 const char *fmt, va_list args)
427 {
428   THD *thd= (THD *) param->thd;
429   Protocol *protocol= thd->protocol;
430   size_t length, msg_length;
431   char msgbuf[MYSQL_ERRMSG_SIZE];
432   char name[NAME_LEN * 2 + 2];
433 
434   if (param->testflag & T_SUPPRESS_ERR_HANDLING)
435     return;
436 
437   msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
438   msgbuf[sizeof(msgbuf) - 1]= 0;                // healthy paranoia
439 
440   DBUG_PRINT(msg_type, ("message: %s", msgbuf));
441 
442   if (!thd->vio_ok())
443   {
444     _ma_check_print(param, msg_type, msgbuf);
445     return;
446   }
447 
448   if (param->testflag &
449       (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR))
450   {
451     myf flag= 0;
452     if (msg_type == MA_CHECK_INFO)
453       flag= ME_NOTE;
454     else if (msg_type == MA_CHECK_WARNING)
455       flag= ME_WARNING;
456     my_message(ER_NOT_KEYFILE, msgbuf, MYF(flag));
457     if (thd->variables.log_warnings > 2)
458       _ma_check_print(param, msg_type, msgbuf);
459     return;
460   }
461   length= (uint) (strxmov(name, param->db_name, ".", param->table_name,
462                           NullS) - name);
463   /*
464     TODO: switch from protocol to push_warning here. The main reason we didn't
465     it yet is parallel repair, which threads have no THD object accessible via
466     current_thd.
467 
468     Also we likely need to lock mutex here (in both cases with protocol and
469     push_warning).
470   */
471   protocol->prepare_for_resend();
472   protocol->store(name, (uint)length, system_charset_info);
473   protocol->store(param->op_name, system_charset_info);
474   protocol->store(msg_type, system_charset_info);
475   protocol->store(msgbuf, (uint)msg_length, system_charset_info);
476   if (protocol->write())
477     sql_print_error("Failed on my_net_write, writing to stderr instead: %s.%s: %s\n",
478                     param->db_name, param->table_name, msgbuf);
479   else if (thd->variables.log_warnings > 2)
480     _ma_check_print(param, msg_type, msgbuf);
481 
482   return;
483 }
484 
485 
486 /*
487   Convert TABLE object to Maria key and column definition
488 
489   SYNOPSIS
490     table2maria()
491       table_arg   in     TABLE object.
492       keydef_out  out    Maria key definition.
493       recinfo_out out    Maria column definition.
494       records_out out    Number of fields.
495 
496   DESCRIPTION
497     This function will allocate and initialize Maria key and column
498     definition for further use in ma_create or for a check for underlying
499     table conformance in merge engine.
500 
501     The caller needs to free *recinfo_out after use. Since *recinfo_out
502     and *keydef_out are allocated with a my_multi_malloc, *keydef_out
503     is freed automatically when *recinfo_out is freed.
504 
505   RETURN VALUE
506     0  OK
507     # error code
508 */
509 
table2maria(TABLE * table_arg,data_file_type row_type,MARIA_KEYDEF ** keydef_out,MARIA_COLUMNDEF ** recinfo_out,uint * records_out,MARIA_CREATE_INFO * create_info)510 static int table2maria(TABLE *table_arg, data_file_type row_type,
511                        MARIA_KEYDEF **keydef_out,
512                        MARIA_COLUMNDEF **recinfo_out, uint *records_out,
513                        MARIA_CREATE_INFO *create_info)
514 {
515   uint i, j, recpos, minpos, fieldpos, temp_length, length;
516   enum ha_base_keytype type= HA_KEYTYPE_BINARY;
517   uchar *record;
518   KEY *pos;
519   MARIA_KEYDEF *keydef;
520   MARIA_COLUMNDEF *recinfo, *recinfo_pos;
521   HA_KEYSEG *keyseg;
522   TABLE_SHARE *share= table_arg->s;
523   uint options= share->db_options_in_use;
524   DBUG_ENTER("table2maria");
525 
526   if (row_type == BLOCK_RECORD)
527     options|= HA_OPTION_PACK_RECORD;
528 
529   if (!(my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME),
530           recinfo_out, (share->fields * 2 + 2) * sizeof(MARIA_COLUMNDEF),
531           keydef_out, share->keys * sizeof(MARIA_KEYDEF),
532           &keyseg,
533           (share->key_parts + share->keys) * sizeof(HA_KEYSEG),
534           NullS)))
535     DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
536   keydef= *keydef_out;
537   recinfo= *recinfo_out;
538   pos= table_arg->key_info;
539   for (i= 0; i < share->keys; i++, pos++)
540   {
541     keydef[i].flag= (uint16) (pos->flags & (HA_NOSAME | HA_FULLTEXT |
542                                             HA_SPATIAL));
543     keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ?
544       (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) :
545       pos->algorithm;
546     keydef[i].block_length= pos->block_size;
547     keydef[i].seg= keyseg;
548     keydef[i].keysegs= pos->user_defined_key_parts;
549     for (j= 0; j < pos->user_defined_key_parts; j++)
550     {
551       Field *field= pos->key_part[j].field;
552 
553       if (!table_arg->field[field->field_index]->stored_in_db())
554       {
555         my_free(*recinfo_out);
556         if (table_arg->s->long_unique_table)
557         {
558           my_error(ER_TOO_LONG_KEY, MYF(0), table_arg->file->max_key_length());
559           DBUG_RETURN(HA_ERR_INDEX_COL_TOO_LONG);
560         }
561         my_error(ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN, MYF(0));
562         DBUG_RETURN(HA_ERR_UNSUPPORTED);
563       }
564 
565       type= field->key_type();
566       keydef[i].seg[j].flag= pos->key_part[j].key_part_flag;
567 
568       if (options & HA_OPTION_PACK_KEYS ||
569           (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY |
570                          HA_SPACE_PACK_USED)))
571       {
572         if (pos->key_part[j].length > 8 &&
573             (type == HA_KEYTYPE_TEXT ||
574              type == HA_KEYTYPE_NUM ||
575              (type == HA_KEYTYPE_BINARY && !field->zero_pack())))
576         {
577           /* No blobs here */
578           if (j == 0)
579             keydef[i].flag|= HA_PACK_KEY;
580           if (!(field->flags & ZEROFILL_FLAG) &&
581               (field->type() == MYSQL_TYPE_STRING ||
582                field->type() == MYSQL_TYPE_VAR_STRING ||
583                ((int) (pos->key_part[j].length - field->decimals())) >= 4))
584             keydef[i].seg[j].flag|= HA_SPACE_PACK;
585         }
586         else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16))
587           keydef[i].flag|= HA_BINARY_PACK_KEY;
588       }
589       keydef[i].seg[j].type= (int) type;
590       keydef[i].seg[j].start= pos->key_part[j].offset;
591       keydef[i].seg[j].length= pos->key_part[j].length;
592       keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_length= 0;
593       keydef[i].seg[j].bit_pos= 0;
594       keydef[i].seg[j].language= field->charset()->number;
595 
596       if (field->null_ptr)
597       {
598         keydef[i].seg[j].null_bit= field->null_bit;
599         keydef[i].seg[j].null_pos= (uint) (field->null_ptr-
600                                            (uchar*) table_arg->record[0]);
601       }
602       else
603       {
604         keydef[i].seg[j].null_bit= 0;
605         keydef[i].seg[j].null_pos= 0;
606       }
607       if (field->type() == MYSQL_TYPE_BLOB ||
608           field->type() == MYSQL_TYPE_GEOMETRY)
609       {
610         keydef[i].seg[j].flag|= HA_BLOB_PART;
611         /* save number of bytes used to pack length */
612         keydef[i].seg[j].bit_start= (uint) (field->pack_length() -
613                                             portable_sizeof_char_ptr);
614       }
615       else if (field->type() == MYSQL_TYPE_BIT)
616       {
617         keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len;
618         keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs;
619         keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
620                                           (uchar*) table_arg->record[0]);
621       }
622     }
623     keyseg+= pos->user_defined_key_parts;
624   }
625   if (table_arg->found_next_number_field)
626     keydef[share->next_number_index].flag|= HA_AUTO_KEY;
627   record= table_arg->record[0];
628   recpos= 0;
629   recinfo_pos= recinfo;
630   create_info->null_bytes= table_arg->s->null_bytes;
631 
632   while (recpos < (uint) share->stored_rec_length)
633   {
634     Field **field, *found= 0;
635     minpos= share->reclength;
636     length= 0;
637 
638     for (field= table_arg->field; *field; field++)
639     {
640       if ((fieldpos= (*field)->offset(record)) >= recpos &&
641           fieldpos <= minpos)
642       {
643         /* skip null fields */
644         if (!(temp_length= (*field)->pack_length_in_rec()))
645           continue; /* Skip null-fields */
646         if (! found || fieldpos < minpos ||
647             (fieldpos == minpos && temp_length < length))
648         {
649           minpos= fieldpos;
650           found= *field;
651           length= temp_length;
652         }
653       }
654     }
655     DBUG_PRINT("loop", ("found: %p  recpos: %d  minpos: %d  length: %d",
656                         found, recpos, minpos, length));
657     if (!found)
658       break;
659 
660     if (found->flags & BLOB_FLAG)
661       recinfo_pos->type= FIELD_BLOB;
662     else if (found->type() == MYSQL_TYPE_TIMESTAMP)
663       recinfo_pos->type= FIELD_NORMAL;
664     else if (found->type() == MYSQL_TYPE_VARCHAR)
665       recinfo_pos->type= FIELD_VARCHAR;
666     else if (!(options & HA_OPTION_PACK_RECORD) ||
667              (found->zero_pack() && (found->flags & PRI_KEY_FLAG)))
668       recinfo_pos->type= FIELD_NORMAL;
669     else if (found->zero_pack())
670       recinfo_pos->type= FIELD_SKIP_ZERO;
671     else
672       recinfo_pos->type= ((length <= 3 ||
673                            (found->flags & ZEROFILL_FLAG)) ?
674                           FIELD_NORMAL :
675                           found->type() == MYSQL_TYPE_STRING ||
676                           found->type() == MYSQL_TYPE_VAR_STRING ?
677                           FIELD_SKIP_ENDSPACE :
678                           FIELD_SKIP_PRESPACE);
679     if (found->null_ptr)
680     {
681       recinfo_pos->null_bit= found->null_bit;
682       recinfo_pos->null_pos= (uint) (found->null_ptr -
683                                      (uchar*) table_arg->record[0]);
684     }
685     else
686     {
687       recinfo_pos->null_bit= 0;
688       recinfo_pos->null_pos= 0;
689     }
690     (recinfo_pos++)->length= (uint16) length;
691     recpos= minpos + length;
692     DBUG_PRINT("loop", ("length: %d  type: %d",
693                         recinfo_pos[-1].length,recinfo_pos[-1].type));
694   }
695   *records_out= (uint) (recinfo_pos - recinfo);
696   DBUG_RETURN(0);
697 }
698 
699 
700 /*
701   Check for underlying table conformance
702 
703   SYNOPSIS
704     maria_check_definition()
705       t1_keyinfo       in    First table key definition
706       t1_recinfo       in    First table record definition
707       t1_keys          in    Number of keys in first table
708       t1_recs          in    Number of records in first table
709       t2_keyinfo       in    Second table key definition
710       t2_recinfo       in    Second table record definition
711       t2_keys          in    Number of keys in second table
712       t2_recs          in    Number of records in second table
713       strict           in    Strict check switch
714 
715   DESCRIPTION
716     This function compares two Maria definitions. By intention it was done
717     to compare merge table definition against underlying table definition.
718     It may also be used to compare dot-frm and MAI definitions of Maria
719     table as well to compare different Maria table definitions.
720 
721     For merge table it is not required that number of keys in merge table
722     must exactly match number of keys in underlying table. When calling this
723     function for underlying table conformance check, 'strict' flag must be
724     set to false, and converted merge definition must be passed as t1_*.
725 
726     Otherwise 'strict' flag must be set to 1 and it is not required to pass
727     converted dot-frm definition as t1_*.
728 
729   RETURN VALUE
730     0 - Equal definitions.
731     1 - Different definitions.
732 
733   TODO
734     - compare FULLTEXT keys;
735     - compare SPATIAL keys;
736     - compare FIELD_SKIP_ZERO which is converted to FIELD_NORMAL correctly
737       (should be correctly detected in table2maria).
738 
739   FIXME:
740     maria_check_definition() is never used! CHECK TABLE does not detect the
741     corruption! Do maria_check_definition() like check_definition() is done
742     by MyISAM (related to MDEV-25803).
743 */
744 
maria_check_definition(MARIA_KEYDEF * t1_keyinfo,MARIA_COLUMNDEF * t1_recinfo,uint t1_keys,uint t1_recs,MARIA_KEYDEF * t2_keyinfo,MARIA_COLUMNDEF * t2_recinfo,uint t2_keys,uint t2_recs,bool strict)745 int maria_check_definition(MARIA_KEYDEF *t1_keyinfo,
746                            MARIA_COLUMNDEF *t1_recinfo,
747                            uint t1_keys, uint t1_recs,
748                            MARIA_KEYDEF *t2_keyinfo,
749                            MARIA_COLUMNDEF *t2_recinfo,
750                            uint t2_keys, uint t2_recs, bool strict)
751 {
752   uint i, j;
753   DBUG_ENTER("maria_check_definition");
754   if ((strict ? t1_keys != t2_keys : t1_keys > t2_keys))
755   {
756     DBUG_PRINT("error", ("Number of keys differs: t1_keys=%u, t2_keys=%u",
757                          t1_keys, t2_keys));
758     DBUG_RETURN(1);
759   }
760   if (t1_recs != t2_recs)
761   {
762     DBUG_PRINT("error", ("Number of recs differs: t1_recs=%u, t2_recs=%u",
763                          t1_recs, t2_recs));
764     DBUG_RETURN(1);
765   }
766   for (i= 0; i < t1_keys; i++)
767   {
768     HA_KEYSEG *t1_keysegs= t1_keyinfo[i].seg;
769     HA_KEYSEG *t2_keysegs= t2_keyinfo[i].seg;
770     if (t1_keyinfo[i].flag & HA_FULLTEXT && t2_keyinfo[i].flag & HA_FULLTEXT)
771       continue;
772     else if (t1_keyinfo[i].flag & HA_FULLTEXT ||
773              t2_keyinfo[i].flag & HA_FULLTEXT)
774     {
775        DBUG_PRINT("error", ("Key %d has different definition", i));
776        DBUG_PRINT("error", ("t1_fulltext= %d, t2_fulltext=%d",
777                             MY_TEST(t1_keyinfo[i].flag & HA_FULLTEXT),
778                             MY_TEST(t2_keyinfo[i].flag & HA_FULLTEXT)));
779        DBUG_RETURN(1);
780     }
781     if (t1_keyinfo[i].flag & HA_SPATIAL && t2_keyinfo[i].flag & HA_SPATIAL)
782       continue;
783     else if (t1_keyinfo[i].flag & HA_SPATIAL ||
784              t2_keyinfo[i].flag & HA_SPATIAL)
785     {
786        DBUG_PRINT("error", ("Key %d has different definition", i));
787        DBUG_PRINT("error", ("t1_spatial= %d, t2_spatial=%d",
788                             MY_TEST(t1_keyinfo[i].flag & HA_SPATIAL),
789                             MY_TEST(t2_keyinfo[i].flag & HA_SPATIAL)));
790        DBUG_RETURN(1);
791     }
792     if (t1_keyinfo[i].keysegs != t2_keyinfo[i].keysegs ||
793         t1_keyinfo[i].key_alg != t2_keyinfo[i].key_alg)
794     {
795       DBUG_PRINT("error", ("Key %d has different definition", i));
796       DBUG_PRINT("error", ("t1_keysegs=%d, t1_key_alg=%d",
797                            t1_keyinfo[i].keysegs, t1_keyinfo[i].key_alg));
798       DBUG_PRINT("error", ("t2_keysegs=%d, t2_key_alg=%d",
799                            t2_keyinfo[i].keysegs, t2_keyinfo[i].key_alg));
800       DBUG_RETURN(1);
801     }
802     for (j=  t1_keyinfo[i].keysegs; j--;)
803     {
804       uint8 t1_keysegs_j__type= t1_keysegs[j].type;
805       /*
806         Table migration from 4.1 to 5.1. In 5.1 a *TEXT key part is
807         always HA_KEYTYPE_VARTEXT2. In 4.1 we had only the equivalent of
808         HA_KEYTYPE_VARTEXT1. Since we treat both the same on MyISAM
809         level, we can ignore a mismatch between these types.
810       */
811       if ((t1_keysegs[j].flag & HA_BLOB_PART) &&
812           (t2_keysegs[j].flag & HA_BLOB_PART))
813       {
814         if ((t1_keysegs_j__type == HA_KEYTYPE_VARTEXT2) &&
815             (t2_keysegs[j].type == HA_KEYTYPE_VARTEXT1))
816           t1_keysegs_j__type= HA_KEYTYPE_VARTEXT1; /* purecov: tested */
817         else if ((t1_keysegs_j__type == HA_KEYTYPE_VARBINARY2) &&
818                  (t2_keysegs[j].type == HA_KEYTYPE_VARBINARY1))
819           t1_keysegs_j__type= HA_KEYTYPE_VARBINARY1; /* purecov: inspected */
820       }
821 
822       if (t1_keysegs_j__type != t2_keysegs[j].type ||
823           t1_keysegs[j].language != t2_keysegs[j].language ||
824           t1_keysegs[j].null_bit != t2_keysegs[j].null_bit ||
825           t1_keysegs[j].length != t2_keysegs[j].length)
826       {
827         DBUG_PRINT("error", ("Key segment %d (key %d) has different "
828                              "definition", j, i));
829         DBUG_PRINT("error", ("t1_type=%d, t1_language=%d, t1_null_bit=%d, "
830                              "t1_length=%d",
831                              t1_keysegs[j].type, t1_keysegs[j].language,
832                              t1_keysegs[j].null_bit, t1_keysegs[j].length));
833         DBUG_PRINT("error", ("t2_type=%d, t2_language=%d, t2_null_bit=%d, "
834                              "t2_length=%d",
835                              t2_keysegs[j].type, t2_keysegs[j].language,
836                              t2_keysegs[j].null_bit, t2_keysegs[j].length));
837 
838         DBUG_RETURN(1);
839       }
840     }
841   }
842 
843   for (i= 0; i < t1_recs; i++)
844   {
845     MARIA_COLUMNDEF *t1_rec= &t1_recinfo[i];
846     MARIA_COLUMNDEF *t2_rec= &t2_recinfo[i];
847     /*
848       FIELD_SKIP_ZERO can be changed to FIELD_NORMAL in maria_create,
849       see NOTE1 in ma_create.c
850     */
851     if ((t1_rec->type != t2_rec->type &&
852          !(t1_rec->type == (int) FIELD_SKIP_ZERO &&
853            t1_rec->length == 1 &&
854            t2_rec->type == (int) FIELD_NORMAL)) ||
855         t1_rec->length != t2_rec->length ||
856         t1_rec->null_bit != t2_rec->null_bit)
857     {
858       DBUG_PRINT("error", ("Field %d has different definition", i));
859       DBUG_PRINT("error", ("t1_type=%d, t1_length=%d, t1_null_bit=%d",
860                            t1_rec->type, t1_rec->length, t1_rec->null_bit));
861       DBUG_PRINT("error", ("t2_type=%d, t2_length=%d, t2_null_bit=%d",
862                            t2_rec->type, t2_rec->length, t2_rec->null_bit));
863       DBUG_RETURN(1);
864     }
865   }
866   DBUG_RETURN(0);
867 }
868 
869 
870 extern "C" {
871 
_ma_killed_ptr(HA_CHECK * param)872 int _ma_killed_ptr(HA_CHECK *param)
873 {
874   if (likely(thd_killed((THD*)param->thd)) == 0)
875     return 0;
876   my_errno= HA_ERR_ABORTED_BY_USER;
877   return 1;
878 }
879 
880 
881 /*
882   Report progress to mysqld
883 
884   This is a bit more complex than what a normal progress report
885   function normally is.
886 
887   The reason is that this is called by enable_index/repair which
888   is one stage in ALTER TABLE and we can't use the external
889   stage/max_stage for this.
890 
891   thd_progress_init/thd_progress_next_stage is to be called by
892   high level commands like CHECK TABLE or REPAIR TABLE, not
893   by sub commands like enable_index().
894 
895   In ma_check.c it's easier to work with stages than with a total
896   progress, so we use internal stage/max_stage here to keep the
897   code simple.
898 */
899 
_ma_report_progress(HA_CHECK * param,ulonglong progress,ulonglong max_progress)900 void _ma_report_progress(HA_CHECK *param, ulonglong progress,
901                          ulonglong max_progress)
902 {
903   thd_progress_report((THD*)param->thd,
904                       progress + max_progress * param->stage,
905                       max_progress * param->max_stage);
906 }
907 
908 
_ma_check_print_error(HA_CHECK * param,const char * fmt,...)909 void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
910 {
911   va_list args;
912   DBUG_ENTER("_ma_check_print_error");
913   param->error_printed++;
914   param->out_flag |= O_DATA_LOST;
915   if (param->testflag & T_SUPPRESS_ERR_HANDLING)
916     DBUG_VOID_RETURN;
917   va_start(args, fmt);
918   _ma_check_print_msg(param, MA_CHECK_ERROR, fmt, args);
919   va_end(args);
920   DBUG_VOID_RETURN;
921 }
922 
923 
_ma_check_print_info(HA_CHECK * param,const char * fmt,...)924 void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...)
925 {
926   va_list args;
927   DBUG_ENTER("_ma_check_print_info");
928   va_start(args, fmt);
929   _ma_check_print_msg(param, MA_CHECK_INFO, fmt, args);
930   va_end(args);
931   DBUG_VOID_RETURN;
932 }
933 
934 
_ma_check_print_warning(HA_CHECK * param,const char * fmt,...)935 void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
936 {
937   va_list args;
938   DBUG_ENTER("_ma_check_print_warning");
939   param->warning_printed++;
940   param->out_flag |= O_DATA_LOST;
941   va_start(args, fmt);
942   _ma_check_print_msg(param, MA_CHECK_WARNING, fmt, args);
943   va_end(args);
944   DBUG_VOID_RETURN;
945 }
946 
947 /*
948   Create a transaction object
949 
950   SYNOPSIS
951     info	Maria handler
952 
953   RETURN
954     0 		ok
955     #		Error number (HA_ERR_OUT_OF_MEM)
956 */
957 
maria_create_trn_for_mysql(MARIA_HA * info)958 static int maria_create_trn_for_mysql(MARIA_HA *info)
959 {
960   THD *thd= ((TABLE*) info->external_ref)->in_use;
961   TRN *trn= THD_TRN;
962   DBUG_ENTER("maria_create_trn_for_mysql");
963 
964   if (!trn)  /* no transaction yet - open it now */
965   {
966     trn= trnman_new_trn(& thd->transaction->wt);
967     if (unlikely(!trn))
968       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
969     thd_set_ha_data(thd, maria_hton, trn);
970     if (thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
971       trans_register_ha(thd, TRUE, maria_hton, trn->trid);
972   }
973   _ma_set_trn_for_table(info, trn);
974   if (!trnman_increment_locked_tables(trn))
975   {
976     trans_register_ha(thd, FALSE, maria_hton, trn->trid);
977     trnman_new_statement(trn);
978   }
979 #ifdef EXTRA_DEBUG
980   if (info->lock_type == F_WRLCK &&
981       ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
982   {
983     trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED |
984                      TRN_STATE_TABLES_CAN_CHANGE);
985     (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
986                                    (uchar*) thd->query(),
987                                    thd->query_length());
988   }
989   else
990   {
991     DBUG_PRINT("info", ("lock_type: %d  trnman_flags: %u",
992                         info->lock_type, trnman_get_flags(trn)));
993   }
994 
995 #endif
996   DBUG_RETURN(0);
997 }
998 
ma_killed_in_mariadb(MARIA_HA * info)999 my_bool ma_killed_in_mariadb(MARIA_HA *info)
1000 {
1001   return (((TABLE*) (info->external_ref))->in_use->killed != 0);
1002 }
1003 
1004 } /* extern "C" */
1005 
1006 /**
1007   Transactional table doing bulk insert with one single UNDO
1008   (UNDO_BULK_INSERT) and with repair.
1009 */
1010 #define BULK_INSERT_SINGLE_UNDO_AND_REPAIR    1
1011 /**
1012   Transactional table doing bulk insert with one single UNDO
1013   (UNDO_BULK_INSERT) and without repair.
1014 */
1015 #define BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR 2
1016 /**
1017   None of BULK_INSERT_SINGLE_UNDO_AND_REPAIR and
1018   BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR.
1019 */
1020 #define BULK_INSERT_NONE      0
1021 
ha_maria(handlerton * hton,TABLE_SHARE * table_arg)1022 ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg):
1023 handler(hton, table_arg), file(0),
1024 int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
1025                 HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
1026                 HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
1027                 HA_FILE_BASED | HA_CAN_GEOMETRY | TRANSACTION_STATE |
1028                 HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_CAN_REPAIR |
1029                 HA_CAN_VIRTUAL_COLUMNS | HA_CAN_EXPORT |
1030                 HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT |
1031                 HA_CAN_TABLES_WITHOUT_ROLLBACK),
1032 can_enable_indexes(0), bulk_insert_single_undo(BULK_INSERT_NONE)
1033 {}
1034 
1035 
clone(const char * name,MEM_ROOT * mem_root)1036 handler *ha_maria::clone(const char *name __attribute__((unused)),
1037                          MEM_ROOT *mem_root)
1038 {
1039   ha_maria *new_handler=
1040     static_cast <ha_maria *>(handler::clone(file->s->open_file_name.str,
1041                                             mem_root));
1042   if (new_handler)
1043   {
1044     new_handler->file->state= file->state;
1045     /* maria_create_trn_for_mysql() is never called for clone() tables */
1046     new_handler->file->trn= file->trn;
1047     DBUG_ASSERT(new_handler->file->trn_prev == 0 &&
1048                 new_handler->file->trn_next == 0);
1049   }
1050   return new_handler;
1051 }
1052 
1053 
1054 static const char *ha_maria_exts[]=
1055 {
1056   MARIA_NAME_IEXT,
1057   MARIA_NAME_DEXT,
1058   NullS
1059 };
1060 
1061 
index_type(uint key_number)1062 const char *ha_maria::index_type(uint key_number)
1063 {
1064   return ((table->key_info[key_number].flags & HA_FULLTEXT) ?
1065           "FULLTEXT" :
1066           (table->key_info[key_number].flags & HA_SPATIAL) ?
1067           "SPATIAL" :
1068           (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ?
1069           "RTREE" : "BTREE");
1070 }
1071 
1072 
index_flags(uint inx,uint part,bool all_parts) const1073 ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const
1074 {
1075   ulong flags;
1076   if (table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT)
1077     flags= 0;
1078   else
1079   if ((table_share->key_info[inx].flags & HA_SPATIAL ||
1080       table_share->key_info[inx].algorithm == HA_KEY_ALG_RTREE))
1081   {
1082     /* All GIS scans are non-ROR scans. We also disable IndexConditionPushdown */
1083     flags= HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
1084            HA_READ_ORDER | HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
1085   }
1086   else
1087   {
1088     flags= HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
1089           HA_READ_ORDER | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN;
1090   }
1091   return flags;
1092 }
1093 
1094 
scan_time()1095 double ha_maria::scan_time()
1096 {
1097   if (file->s->data_file_type == BLOCK_RECORD)
1098     return (ulonglong2double(stats.data_file_length - file->s->block_size) /
1099             file->s->block_size) + 2;
1100   return handler::scan_time();
1101 }
1102 
1103 /*
1104   We need to be able to store at least 2 keys on an index page as the
1105   splitting algorithms depends on this. (With only one key on a page
1106   we also can't use any compression, which may make the index file much
1107   larger)
1108   We use MARIA_MAX_KEY_LENGTH to limit the key size as we don't want to use
1109   too much stack when searching in the b_tree.
1110 
1111   We also need to reserve place for a record pointer (8) and 3 bytes
1112   per key segment to store the length of the segment + possible null bytes.
1113   These extra bytes are required here so that maria_create() will surely
1114   accept any keys created which the returned key data storage length.
1115 */
1116 
max_supported_key_length() const1117 uint ha_maria::max_supported_key_length() const
1118 {
1119   return maria_max_key_length();
1120 }
1121 
1122 /* Name is here without an extension */
1123 
open(const char * name,int mode,uint test_if_locked)1124 int ha_maria::open(const char *name, int mode, uint test_if_locked)
1125 {
1126   uint i;
1127 
1128 #ifdef NOT_USED
1129   /*
1130     If the user wants to have memory mapped data files, add an
1131     open_flag. Do not memory map temporary tables because they are
1132     expected to be inserted and thus extended a lot. Memory mapping is
1133     efficient for files that keep their size, but very inefficient for
1134     growing files. Using an open_flag instead of calling ma_extra(...
1135     HA_EXTRA_MMAP ...) after maxs_open() has the advantage that the
1136     mapping is not repeated for every open, but just done on the initial
1137     open, when the MyISAM share is created. Every time the server
1138     requires to open a new instance of a table it calls this method. We
1139     will always supply HA_OPEN_MMAP for a permanent table. However, the
1140     Maria storage engine will ignore this flag if this is a secondary
1141     open of a table that is in use by other threads already (if the
1142     Maria share exists already).
1143   */
1144   if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap)
1145     test_if_locked|= HA_OPEN_MMAP;
1146 #endif
1147 
1148   if (maria_recover_options & HA_RECOVER_ANY)
1149   {
1150     /* user asked to trigger a repair if table was not properly closed */
1151     test_if_locked|= HA_OPEN_ABORT_IF_CRASHED;
1152   }
1153 
1154   if (aria_readonly)
1155     test_if_locked|= HA_OPEN_IGNORE_MOVED_STATE;
1156 
1157   if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER,
1158                          s3_open_args())))
1159   {
1160     if (my_errno == HA_ERR_OLD_FILE)
1161     {
1162       push_warning(current_thd, Sql_condition::WARN_LEVEL_NOTE,
1163                    ER_CRASHED_ON_USAGE,
1164                    zerofill_error_msg);
1165     }
1166     return (my_errno ? my_errno : -1);
1167   }
1168   if (aria_readonly)
1169     file->s->options|= HA_OPTION_READ_ONLY_DATA;
1170 
1171   file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref;
1172   /* Set external_ref, mainly for temporary tables */
1173   file->external_ref= (void*) table;            // For ma_killed()
1174 
1175   if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
1176     maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0);
1177 
1178   info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
1179   if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED))
1180     maria_extra(file, HA_EXTRA_WAIT_LOCK, 0);
1181   if ((data_file_type= file->s->data_file_type) != STATIC_RECORD)
1182     int_table_flags |= HA_REC_NOT_IN_SEQ;
1183   if (!file->s->base.born_transactional)
1184   {
1185     /*
1186       INSERT DELAYED cannot work with transactional tables (because it cannot
1187       stand up to "when client gets ok the data is safe on disk": the record
1188       may not even be inserted). In the future, we could enable it back (as a
1189       client doing INSERT DELAYED knows the specificities; but we then should
1190       make sure to regularly commit in the delayed_insert thread).
1191     */
1192     int_table_flags|= HA_CAN_INSERT_DELAYED | HA_NO_TRANSACTIONS;
1193   }
1194   else
1195     int_table_flags|= HA_CRASH_SAFE;
1196 
1197   if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
1198     int_table_flags |= HA_HAS_NEW_CHECKSUM;
1199 
1200   /*
1201     We can only do online backup on transactional tables with checksum.
1202     Checksums are needed to avoid half writes.
1203   */
1204   if (file->s->options & HA_OPTION_PAGE_CHECKSUM &&
1205       file->s->base.born_transactional)
1206     int_table_flags |= HA_CAN_ONLINE_BACKUPS;
1207 
1208   /*
1209     For static size rows, tell MariaDB that we will access all bytes
1210     in the record when writing it.  This signals MariaDB to initialize
1211     the full row to ensure we don't get any errors from valgrind and
1212     that all bytes in the row is properly reset.
1213   */
1214   if (file->s->data_file_type == STATIC_RECORD &&
1215       (file->s->has_varchar_fields || file->s->has_null_fields))
1216     int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE;
1217 
1218   for (i= 0; i < table->s->keys; i++)
1219   {
1220     plugin_ref parser= table->key_info[i].parser;
1221     if (table->key_info[i].flags & HA_USES_PARSER)
1222       file->s->keyinfo[i].parser=
1223         (struct st_mysql_ftparser *)plugin_decl(parser)->info;
1224     table->key_info[i].block_size= file->s->keyinfo[i].block_length;
1225   }
1226   my_errno= 0;
1227 
1228   /* Count statistics of usage for newly open normal files */
1229   if (file->s->reopen == 1 && ! (test_if_locked & HA_OPEN_TMP_TABLE))
1230   {
1231     if (file->s->delay_key_write)
1232       feature_files_opened_with_delayed_keys++;
1233   }
1234 
1235   return my_errno;
1236 }
1237 
1238 
close(void)1239 int ha_maria::close(void)
1240 {
1241   MARIA_HA *tmp= file;
1242   if (!tmp)
1243     return 0;
1244   DBUG_ASSERT(file->trn == 0 || file->trn == &dummy_transaction_object);
1245   DBUG_ASSERT(file->trn_next == 0 && file->trn_prev == 0);
1246   file= 0;
1247   return maria_close(tmp);
1248 }
1249 
1250 
write_row(const uchar * buf)1251 int ha_maria::write_row(const uchar * buf)
1252 {
1253   /*
1254      If we have an auto_increment column and we are writing a changed row
1255      or a new row, then update the auto_increment value in the record.
1256   */
1257   if (table->next_number_field && buf == table->record[0])
1258   {
1259     int error;
1260     if ((error= update_auto_increment()))
1261       return error;
1262   }
1263   return maria_write(file, buf);
1264 }
1265 
1266 
check(THD * thd,HA_CHECK_OPT * check_opt)1267 int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
1268 {
1269   int error, fatal_error;
1270   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1271   MARIA_SHARE *share= file->s;
1272   const char *old_proc_info;
1273   TRN *old_trn= file->trn;
1274 
1275   if (!file || !param) return HA_ADMIN_INTERNAL_ERROR;
1276 
1277   unmap_file(file);
1278   register_handler(file);
1279   maria_chk_init(param);
1280   param->thd= thd;
1281   param->op_name= "check";
1282   param->db_name= table->s->db.str;
1283   param->table_name= table->alias.c_ptr();
1284   param->testflag= check_opt->flags | T_CHECK | T_SILENT;
1285   param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1286 
1287   if (!(table->db_stat & HA_READ_ONLY))
1288     param->testflag |= T_STATISTICS;
1289   param->using_global_keycache= 1;
1290 
1291   if (!maria_is_crashed(file) &&
1292       (((param->testflag & T_CHECK_ONLY_CHANGED) &&
1293         !(share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS |
1294                                   STATE_IN_REPAIR)) &&
1295         share->state.open_count == 0) ||
1296        ((param->testflag & T_FAST) && (share->state.open_count ==
1297                                       (uint) (share->global_changed ? 1 :
1298                                               0)))))
1299     return HA_ADMIN_ALREADY_DONE;
1300 
1301   maria_chk_init_for_check(param, file);
1302   param->max_allowed_lsn= translog_get_horizon();
1303 
1304   if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
1305       STATE_MOVED)
1306   {
1307     _ma_check_print_error(param, "%s", zerofill_error_msg);
1308     return HA_ADMIN_CORRUPT;
1309   }
1310 
1311   old_proc_info= thd_proc_info(thd, "Checking status");
1312   thd_progress_init(thd, 3);
1313   error= maria_chk_status(param, file);                // Not fatal
1314   /* maria_chk_size() will flush the page cache for this file */
1315   if (maria_chk_size(param, file))
1316     error= 1;
1317   if (!error)
1318     error|= maria_chk_del(param, file, param->testflag);
1319   thd_proc_info(thd, "Checking keys");
1320   thd_progress_next_stage(thd);
1321   if (!error)
1322     error= maria_chk_key(param, file);
1323   thd_proc_info(thd, "Checking data");
1324   thd_progress_next_stage(thd);
1325   if (!error)
1326   {
1327     if ((!(param->testflag & T_QUICK) &&
1328          ((share->options &
1329            (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ||
1330           (param->testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file))
1331     {
1332       ulonglong old_testflag= param->testflag;
1333       param->testflag |= T_MEDIUM;
1334 
1335       /* BLOCK_RECORD does not need a cache as it is using the page cache */
1336       if (file->s->data_file_type != BLOCK_RECORD)
1337         error= init_io_cache(&param->read_cache, file->dfile.file,
1338                              my_default_record_cache_size, READ_CACHE,
1339                              share->pack.header_length, 1, MYF(MY_WME));
1340       if (!error)
1341         error= maria_chk_data_link(param, file,
1342                                    MY_TEST(param->testflag & T_EXTEND));
1343 
1344       if (file->s->data_file_type != BLOCK_RECORD)
1345         end_io_cache(&param->read_cache);
1346       param->testflag= old_testflag;
1347     }
1348   }
1349   fatal_error= error;
1350   if (param->error_printed &&
1351       param->error_printed == (param->skip_lsn_error_count +
1352                                param->not_visible_rows_found) &&
1353       !(share->state.changed & (STATE_CRASHED_FLAGS | STATE_IN_REPAIR)))
1354   {
1355     _ma_check_print_error(param, "%s", zerofill_error_msg);
1356     /* This ensures that a future REPAIR TABLE will only do a zerofill */
1357     file->update|= STATE_MOVED;
1358     share->state.changed|= STATE_MOVED;
1359     fatal_error= 0;
1360   }
1361   if (!fatal_error)
1362   {
1363     if ((share->state.changed & (STATE_CHANGED | STATE_MOVED |
1364                                  STATE_CRASHED_FLAGS |
1365                                  STATE_IN_REPAIR | STATE_NOT_ANALYZED)) ||
1366         (param->testflag & T_STATISTICS) || maria_is_crashed(file))
1367     {
1368       file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1369       mysql_mutex_lock(&share->intern_lock);
1370       DBUG_PRINT("info", ("Resetting crashed state"));
1371       share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
1372                                STATE_IN_REPAIR);
1373       if (!(table->db_stat & HA_READ_ONLY))
1374       {
1375         int tmp;
1376         if ((tmp= maria_update_state_info(param, file,
1377                                           UPDATE_TIME | UPDATE_OPEN_COUNT |
1378                                           UPDATE_STAT)))
1379           error= tmp;
1380       }
1381       mysql_mutex_unlock(&share->intern_lock);
1382       info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
1383            HA_STATUS_CONST);
1384 
1385       /*
1386         Write a 'table is ok' message to error log if table is ok and
1387         we have written to error log that table was getting checked
1388       */
1389       if (!error && !(table->db_stat & HA_READ_ONLY) &&
1390           !maria_is_crashed(file) && thd->error_printed_to_log &&
1391           (param->warning_printed || param->error_printed ||
1392            param->note_printed))
1393         _ma_check_print_info(param, "Table is fixed");
1394     }
1395   }
1396   else if (!maria_is_crashed(file) && !thd->killed)
1397   {
1398     maria_mark_crashed(file);
1399     file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1400   }
1401 
1402   /* Reset trn, that may have been set by repair */
1403   if (old_trn && old_trn != file->trn)
1404   {
1405     DBUG_ASSERT(old_trn->used_instances == 0);
1406     _ma_set_trn_for_table(file, old_trn);
1407   }
1408   thd_proc_info(thd, old_proc_info);
1409   thd_progress_end(thd);
1410   return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
1411 }
1412 
1413 
1414 /*
1415   Analyze the key distribution in the table
1416   As the table may be only locked for read, we have to take into account that
1417   two threads may do an analyze at the same time!
1418 */
1419 
analyze(THD * thd,HA_CHECK_OPT * check_opt)1420 int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
1421 {
1422   int error= 0;
1423   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1424   MARIA_SHARE *share= file->s;
1425   const char *old_proc_info;
1426 
1427   if (!param)
1428     return HA_ADMIN_INTERNAL_ERROR;
1429 
1430   maria_chk_init(param);
1431   param->thd= thd;
1432   param->op_name= "analyze";
1433   param->db_name= table->s->db.str;
1434   param->table_name= table->alias.c_ptr();
1435   param->testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
1436                    T_DONT_CHECK_CHECKSUM);
1437   param->using_global_keycache= 1;
1438   param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1439 
1440   if (!(share->state.changed & STATE_NOT_ANALYZED))
1441     return HA_ADMIN_ALREADY_DONE;
1442 
1443   old_proc_info= thd_proc_info(thd, "Scanning");
1444   thd_progress_init(thd, 1);
1445   error= maria_chk_key(param, file);
1446   if (!error)
1447   {
1448     mysql_mutex_lock(&share->intern_lock);
1449     error= maria_update_state_info(param, file, UPDATE_STAT);
1450     mysql_mutex_unlock(&share->intern_lock);
1451   }
1452   else if (!maria_is_crashed(file) && !thd->killed)
1453     maria_mark_crashed(file);
1454   thd_proc_info(thd, old_proc_info);
1455   thd_progress_end(thd);
1456   return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
1457 }
1458 
repair(THD * thd,HA_CHECK_OPT * check_opt)1459 int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
1460 {
1461   int error;
1462   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1463   ha_rows start_records;
1464   const char *old_proc_info;
1465 
1466   if (!file || !param)
1467     return HA_ADMIN_INTERNAL_ERROR;
1468 
1469   maria_chk_init(param);
1470   param->thd= thd;
1471   param->op_name= "repair";
1472 
1473   /*
1474     The following can only be true if the table was marked as STATE_MOVED
1475     during a CHECK TABLE and the table has not been used since then
1476   */
1477   if ((file->s->state.changed & STATE_MOVED) &&
1478       !(file->s->state.changed & STATE_CRASHED_FLAGS))
1479   {
1480     param->db_name= table->s->db.str;
1481     param->table_name= table->alias.c_ptr();
1482     _ma_check_print_info(param, "Running zerofill on moved table");
1483     return zerofill(thd, check_opt);
1484   }
1485 
1486   param->testflag= ((check_opt->flags & ~(T_EXTEND)) |
1487                    T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
1488                    (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
1489   param->orig_sort_buffer_length= THDVAR(thd, sort_buffer_size);
1490   param->backup_time= check_opt->start_time;
1491   start_records= file->state->records;
1492   old_proc_info= thd_proc_info(thd, "Checking table");
1493   thd_progress_init(thd, 1);
1494   while ((error= repair(thd, param, 0)) && param->retry_repair)
1495   {
1496     param->retry_repair= 0;
1497     file->state->records= start_records;
1498     if (test_all_bits(param->testflag,
1499                       (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK)))
1500     {
1501       param->testflag&= ~(T_RETRY_WITHOUT_QUICK | T_QUICK);
1502       /* Ensure we don't loose any rows when retrying without quick */
1503       param->testflag|= T_SAFE_REPAIR;
1504       if (thd->vio_ok())
1505         _ma_check_print_info(param, "Retrying repair without quick");
1506       else
1507         sql_print_information("Retrying repair of: '%s' without quick",
1508                               table->s->path.str);
1509       continue;
1510     }
1511     param->testflag &= ~T_QUICK;
1512     if (param->testflag & T_REP_BY_SORT)
1513     {
1514       param->testflag= (param->testflag & ~T_REP_BY_SORT) | T_REP;
1515       if (thd->vio_ok())
1516         _ma_check_print_info(param, "Retrying repair with keycache");
1517       sql_print_information("Retrying repair of: '%s' with keycache",
1518                             table->s->path.str);
1519       continue;
1520     }
1521     break;
1522   }
1523   /*
1524     Commit is needed in the case of tables are locked to ensure that repair
1525     is registered in the recovery log
1526   */
1527   if (implicit_commit(thd, TRUE))
1528     error= HA_ADMIN_COMMIT_ERROR;
1529 
1530   if (!error && start_records != file->state->records &&
1531       !(check_opt->flags & T_VERY_SILENT))
1532   {
1533     char llbuff[22], llbuff2[22];
1534     sql_print_information("Found %s of %s rows when repairing '%s'",
1535                           llstr(file->state->records, llbuff),
1536                           llstr(start_records, llbuff2),
1537                           table->s->path.str);
1538   }
1539   thd_proc_info(thd, old_proc_info);
1540   thd_progress_end(thd);
1541   return error;
1542 }
1543 
zerofill(THD * thd,HA_CHECK_OPT * check_opt)1544 int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt)
1545 {
1546   int error;
1547   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1548   TRN *old_trn;
1549   MARIA_SHARE *share= file->s;
1550 
1551   if (!file || !param)
1552     return HA_ADMIN_INTERNAL_ERROR;
1553 
1554   unmap_file(file);
1555   old_trn= file->trn;
1556   maria_chk_init(param);
1557   param->thd= thd;
1558   param->op_name= "zerofill";
1559   param->testflag= check_opt->flags | T_SILENT | T_ZEROFILL;
1560   param->orig_sort_buffer_length= THDVAR(thd, sort_buffer_size);
1561   param->db_name= table->s->db.str;
1562   param->table_name= table->alias.c_ptr();
1563 
1564   error=maria_zerofill(param, file, share->open_file_name.str);
1565 
1566   /* Reset trn, that may have been set by repair */
1567   if (old_trn && old_trn != file->trn)
1568     _ma_set_trn_for_table(file, old_trn);
1569 
1570   if (!error)
1571   {
1572     TrID create_trid= trnman_get_min_safe_trid();
1573     mysql_mutex_lock(&share->intern_lock);
1574     share->state.changed|= STATE_NOT_MOVABLE;
1575     maria_update_state_info(param, file, UPDATE_TIME | UPDATE_OPEN_COUNT);
1576     _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE, create_trid,
1577                               TRUE, TRUE);
1578     mysql_mutex_unlock(&share->intern_lock);
1579   }
1580   return error;
1581 }
1582 
optimize(THD * thd,HA_CHECK_OPT * check_opt)1583 int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
1584 {
1585   int error;
1586   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1587 
1588   if (!file || !param)
1589     return HA_ADMIN_INTERNAL_ERROR;
1590 
1591   maria_chk_init(param);
1592   param->thd= thd;
1593   param->op_name= "optimize";
1594   param->testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
1595                    T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
1596   param->orig_sort_buffer_length= THDVAR(thd, sort_buffer_size);
1597   thd_progress_init(thd, 1);
1598   if ((error= repair(thd, param, 1)) && param->retry_repair)
1599   {
1600     sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
1601                       my_errno, param->db_name, param->table_name);
1602     param->testflag &= ~T_REP_BY_SORT;
1603     error= repair(thd, param, 0);
1604   }
1605   thd_progress_end(thd);
1606   return error;
1607 }
1608 
1609 
repair(THD * thd,HA_CHECK * param,bool do_optimize)1610 int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
1611 {
1612   int error= 0;
1613   ulonglong local_testflag= param->testflag;
1614   bool optimize_done= !do_optimize, statistics_done= 0, full_repair_done= 0;
1615   const char *old_proc_info= thd->proc_info;
1616   char fixed_name[FN_REFLEN];
1617   MARIA_SHARE *share= file->s;
1618   ha_rows rows= file->state->records;
1619   TRN *old_trn= file->trn;
1620   my_bool locking= 0;
1621   DBUG_ENTER("ha_maria::repair");
1622 
1623   /*
1624     Normally this method is entered with a properly opened table. If the
1625     repair fails, it can be repeated with more elaborate options. Under
1626     special circumstances it can happen that a repair fails so that it
1627     closed the data file and cannot re-open it. In this case file->dfile
1628     is set to -1. We must not try another repair without an open data
1629     file. (Bug #25289)
1630   */
1631   if (file->dfile.file == -1)
1632   {
1633     sql_print_information("Retrying repair of: '%s' failed. "
1634                           "Please try REPAIR EXTENDED or aria_chk",
1635                           table->s->path.str);
1636     DBUG_RETURN(HA_ADMIN_FAILED);
1637   }
1638 
1639   /*
1640     If transactions was not enabled for a transactional table then
1641     file->s->status is not up to date. This is needed for repair_by_sort
1642     to work
1643   */
1644   if (share->base.born_transactional && !share->now_transactional)
1645     _ma_copy_nontrans_state_information(file);
1646 
1647   param->db_name= table->s->db.str;
1648   param->table_name= table->alias.c_ptr();
1649   param->tmpfile_createflag= O_RDWR | O_TRUNC;
1650   param->using_global_keycache= 1;
1651   param->thd= thd;
1652   param->tmpdir= &mysql_tmpdir_list;
1653   param->out_flag= 0;
1654   share->state.dupp_key= MI_MAX_KEY;
1655   strmov(fixed_name, share->open_file_name.str);
1656   unmap_file(file);
1657 
1658   /*
1659     Don't lock tables if we have used LOCK TABLE or if we come from
1660     enable_index()
1661   */
1662   if (!thd->locked_tables_mode && ! (param->testflag & T_NO_LOCKS))
1663   {
1664     locking= 1;
1665     if (maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK))
1666     {
1667       _ma_check_print_error(param, ER_THD(thd, ER_CANT_LOCK), my_errno);
1668       DBUG_RETURN(HA_ADMIN_FAILED);
1669     }
1670   }
1671 
1672   if (!do_optimize ||
1673       (((share->data_file_type == BLOCK_RECORD) ?
1674         (share->state.changed & STATE_NOT_OPTIMIZED_ROWS) :
1675         (file->state->del ||
1676          share->state.split != file->state->records)) &&
1677        (!(param->testflag & T_QUICK) ||
1678         (share->state.changed & (STATE_NOT_OPTIMIZED_KEYS |
1679                                  STATE_NOT_OPTIMIZED_ROWS)))))
1680   {
1681     ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ?
1682                         maria_get_mask_all_keys_active(share->base.keys) :
1683                         share->state.key_map);
1684     ulonglong save_testflag= param->testflag;
1685     if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) &&
1686         (local_testflag & T_REP_BY_SORT))
1687     {
1688       local_testflag |= T_STATISTICS;
1689       param->testflag |= T_STATISTICS;           // We get this for free
1690       statistics_done= 1;
1691       /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */
1692       if (THDVAR(thd,repair_threads) > 1 &&
1693           share->data_file_type != BLOCK_RECORD)
1694       {
1695         char buf[40];
1696         /* TODO: respect maria_repair_threads variable */
1697         my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map));
1698         thd_proc_info(thd, buf);
1699         param->testflag|= T_REP_PARALLEL;
1700         error= maria_repair_parallel(param, file, fixed_name,
1701                                      MY_TEST(param->testflag & T_QUICK));
1702         /* to reset proc_info, as it was pointing to local buffer */
1703         thd_proc_info(thd, "Repair done");
1704       }
1705       else
1706       {
1707         thd_proc_info(thd, "Repair by sorting");
1708         param->testflag|= T_REP_BY_SORT;
1709         error= maria_repair_by_sort(param, file, fixed_name,
1710                                     MY_TEST(param->testflag & T_QUICK));
1711       }
1712       if (error && file->create_unique_index_by_sort &&
1713           share->state.dupp_key != MAX_KEY)
1714       {
1715         my_errno= HA_ERR_FOUND_DUPP_KEY;
1716         print_keydup_error(table, &table->key_info[share->state.dupp_key],
1717                            MYF(0));
1718       }
1719     }
1720     else
1721     {
1722       thd_proc_info(thd, "Repair with keycache");
1723       param->testflag &= ~(T_REP_BY_SORT | T_REP_PARALLEL);
1724       error= maria_repair(param, file, fixed_name,
1725                           MY_TEST(param->testflag & T_QUICK));
1726     }
1727     param->testflag= save_testflag | (param->testflag & T_RETRY_WITHOUT_QUICK);
1728     optimize_done= 1;
1729     /*
1730       set full_repair_done if we re-wrote all rows and all keys
1731       (and thus removed all transid's from the table
1732     */
1733     full_repair_done= !MY_TEST(param->testflag & T_QUICK);
1734   }
1735   if (!error)
1736   {
1737     if ((local_testflag & T_SORT_INDEX) &&
1738         (share->state.changed & STATE_NOT_SORTED_PAGES))
1739     {
1740       optimize_done= 1;
1741       thd_proc_info(thd, "Sorting index");
1742       error= maria_sort_index(param, file, fixed_name);
1743     }
1744     if (!error && !statistics_done && (local_testflag & T_STATISTICS))
1745     {
1746       if (share->state.changed & STATE_NOT_ANALYZED)
1747       {
1748         optimize_done= 1;
1749         thd_proc_info(thd, "Analyzing");
1750         error= maria_chk_key(param, file);
1751       }
1752       else
1753         local_testflag &= ~T_STATISTICS;        // Don't update statistics
1754     }
1755   }
1756   thd_proc_info(thd, "Saving state");
1757   if (full_repair_done && !error &&
1758       !(param->testflag & T_NO_CREATE_RENAME_LSN))
1759   {
1760     /* Set trid (needed if the table was moved from another system) */
1761     share->state.create_trid= trnman_get_min_safe_trid();
1762   }
1763   mysql_mutex_lock(&share->intern_lock);
1764   if (!error)
1765   {
1766     if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file))
1767     {
1768       DBUG_PRINT("info", ("Resetting crashed state"));
1769       share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
1770                                STATE_IN_REPAIR | STATE_MOVED);
1771       file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1772     }
1773     /*
1774       repair updates share->state.state. Ensure that file->state is up to date
1775     */
1776     if (file->state != &share->state.state)
1777       *file->state= share->state.state;
1778 
1779     if (share->base.auto_key)
1780       _ma_update_auto_increment_key(param, file, 1);
1781     if (optimize_done)
1782       error= maria_update_state_info(param, file,
1783                                      UPDATE_TIME | UPDATE_OPEN_COUNT |
1784                                      (local_testflag &
1785                                       T_STATISTICS ? UPDATE_STAT : 0));
1786     /* File is repaired; Mark the file as moved to this system */
1787     (void) _ma_set_uuid(share, 0);
1788 
1789     info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
1790          HA_STATUS_CONST);
1791     if (rows != file->state->records && !(param->testflag & T_VERY_SILENT))
1792     {
1793       char llbuff[22], llbuff2[22];
1794       _ma_check_print_warning(param, "Number of rows changed from %s to %s",
1795                               llstr(rows, llbuff),
1796                               llstr(file->state->records, llbuff2));
1797     }
1798   }
1799   else
1800   {
1801     maria_mark_crashed_on_repair(file);
1802     file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1803     maria_update_state_info(param, file, 0);
1804   }
1805   mysql_mutex_unlock(&share->intern_lock);
1806   thd_proc_info(thd, old_proc_info);
1807   thd_progress_end(thd);                        // Mark done
1808   if (locking)
1809     maria_lock_database(file, F_UNLCK);
1810 
1811   /* Reset trn, that may have been set by repair */
1812   if (old_trn && old_trn != file->trn)
1813     _ma_set_trn_for_table(file, old_trn);
1814   error= error ? HA_ADMIN_FAILED :
1815     (optimize_done ?
1816      (write_log_record_for_repair(param, file) ? HA_ADMIN_FAILED :
1817       HA_ADMIN_OK) : HA_ADMIN_ALREADY_DONE);
1818   DBUG_RETURN(error);
1819 }
1820 
1821 
1822 /*
1823   Assign table indexes to a specific key cache.
1824 */
1825 
assign_to_keycache(THD * thd,HA_CHECK_OPT * check_opt)1826 int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt)
1827 {
1828 #if 0 && NOT_IMPLEMENTED
1829   PAGECACHE *new_pagecache= check_opt->pagecache;
1830   const char *errmsg= 0;
1831   int error= HA_ADMIN_OK;
1832   ulonglong map;
1833   TABLE_LIST *table_list= table->pos_in_table_list;
1834   DBUG_ENTER("ha_maria::assign_to_keycache");
1835 
1836   table->keys_in_use_for_query.clear_all();
1837 
1838   if (table_list->process_index_hints(table))
1839     DBUG_RETURN(HA_ADMIN_FAILED);
1840   map= ~(ulonglong) 0;
1841   if (!table->keys_in_use_for_query.is_clear_all())
1842     /* use all keys if there's no list specified by the user through hints */
1843     map= table->keys_in_use_for_query.to_ulonglong();
1844 
1845   if ((error= maria_assign_to_pagecache(file, map, new_pagecache)))
1846   {
1847     char buf[STRING_BUFFER_USUAL_SIZE];
1848     my_snprintf(buf, sizeof(buf),
1849                 "Failed to flush to index file (errno: %d)", error);
1850     errmsg= buf;
1851     error= HA_ADMIN_CORRUPT;
1852   }
1853 
1854   if (error != HA_ADMIN_OK)
1855   {
1856     /* Send error to user */
1857     HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1858     if (!param)
1859       return HA_ADMIN_INTERNAL_ERROR;
1860 
1861     maria_chk_init(param);
1862     param->thd= thd;
1863     param->op_name= "assign_to_keycache";
1864     param->db_name= table->s->db.str;
1865     param->table_name= table->s->table_name.str;
1866     param->testflag= 0;
1867     _ma_check_print_error(param, errmsg);
1868   }
1869   DBUG_RETURN(error);
1870 #else
1871   return  HA_ADMIN_NOT_IMPLEMENTED;
1872 #endif
1873 }
1874 
1875 
1876 /*
1877   Preload pages of the index file for a table into the key cache.
1878 */
1879 
preload_keys(THD * thd,HA_CHECK_OPT * check_opt)1880 int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt)
1881 {
1882   ulonglong map;
1883   TABLE_LIST *table_list= table->pos_in_table_list;
1884 
1885   DBUG_ENTER("ha_maria::preload_keys");
1886 
1887   table->keys_in_use_for_query.clear_all();
1888 
1889   if (table_list->process_index_hints(table))
1890     DBUG_RETURN(HA_ADMIN_FAILED);
1891 
1892   map= ~(ulonglong) 0;
1893   /* Check validity of the index references */
1894   if (!table->keys_in_use_for_query.is_clear_all())
1895     /* use all keys if there's no list specified by the user through hints */
1896     map= table->keys_in_use_for_query.to_ulonglong();
1897 
1898   maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE,
1899               (void*) &thd->variables.preload_buff_size);
1900 
1901   int error;
1902 
1903   if ((error= maria_preload(file, map, table_list->ignore_leaves)))
1904   {
1905     char buf[MYSQL_ERRMSG_SIZE+20];
1906     const char *errmsg;
1907 
1908     switch (error) {
1909     case HA_ERR_NON_UNIQUE_BLOCK_SIZE:
1910       errmsg= "Indexes use different block sizes";
1911       break;
1912     case HA_ERR_OUT_OF_MEM:
1913       errmsg= "Failed to allocate buffer";
1914       break;
1915     default:
1916       my_snprintf(buf, sizeof(buf),
1917                   "Failed to read from index file (errno: %d)", my_errno);
1918       errmsg= buf;
1919     }
1920 
1921     HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1922     if (!param)
1923       return HA_ADMIN_INTERNAL_ERROR;
1924 
1925     maria_chk_init(param);
1926     param->thd= thd;
1927     param->op_name= "preload_keys";
1928     param->db_name= table->s->db.str;
1929     param->table_name= table->s->table_name.str;
1930     param->testflag= 0;
1931     _ma_check_print_error(param, "%s", errmsg);
1932     DBUG_RETURN(HA_ADMIN_FAILED);
1933   }
1934   DBUG_RETURN(HA_ADMIN_OK);
1935 }
1936 
1937 
1938 /*
1939   Disable indexes, making it persistent if requested.
1940 
1941   SYNOPSIS
1942     disable_indexes()
1943     mode        mode of operation:
1944                 HA_KEY_SWITCH_NONUNIQ      disable all non-unique keys
1945                 HA_KEY_SWITCH_ALL          disable all keys
1946                 HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent
1947                 HA_KEY_SWITCH_ALL_SAVE     dis. all keys and make persistent
1948 
1949   IMPLEMENTATION
1950     HA_KEY_SWITCH_NONUNIQ       is not implemented.
1951     HA_KEY_SWITCH_ALL_SAVE      is not implemented.
1952 
1953   RETURN
1954     0  ok
1955     HA_ERR_WRONG_COMMAND  mode not implemented.
1956 */
1957 
disable_indexes(uint mode)1958 int ha_maria::disable_indexes(uint mode)
1959 {
1960   int error;
1961 
1962   if (mode == HA_KEY_SWITCH_ALL)
1963   {
1964     /* call a storage engine function to switch the key map */
1965     error= maria_disable_indexes(file);
1966   }
1967   else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
1968   {
1969     maria_extra(file, HA_EXTRA_NO_KEYS, 0);
1970     info(HA_STATUS_CONST);                      // Read new key info
1971     error= 0;
1972   }
1973   else
1974   {
1975     /* mode not implemented */
1976     error= HA_ERR_WRONG_COMMAND;
1977   }
1978   return error;
1979 }
1980 
1981 
1982 /*
1983   Enable indexes, making it persistent if requested.
1984 
1985   SYNOPSIS
1986     enable_indexes()
1987     mode        mode of operation:
1988                 HA_KEY_SWITCH_NONUNIQ      enable all non-unique keys
1989                 HA_KEY_SWITCH_ALL          enable all keys
1990                 HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent
1991                 HA_KEY_SWITCH_ALL_SAVE     en. all keys and make persistent
1992 
1993   DESCRIPTION
1994     Enable indexes, which might have been disabled by disable_index() before.
1995     The modes without _SAVE work only if both data and indexes are empty,
1996     since the MARIA repair would enable them persistently.
1997     To be sure in these cases, call handler::delete_all_rows() before.
1998 
1999   IMPLEMENTATION
2000     HA_KEY_SWITCH_NONUNIQ       is not implemented.
2001     HA_KEY_SWITCH_ALL_SAVE      is not implemented.
2002 
2003   RETURN
2004     0  ok
2005     !=0  Error, among others:
2006     HA_ERR_CRASHED  data or index is non-empty. Delete all rows and retry.
2007     HA_ERR_WRONG_COMMAND  mode not implemented.
2008 */
2009 
enable_indexes(uint mode)2010 int ha_maria::enable_indexes(uint mode)
2011 {
2012   int error;
2013   ha_rows start_rows= file->state->records;
2014   DBUG_PRINT("info", ("ha_maria::enable_indexes mode: %d", mode));
2015   if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys))
2016   {
2017     /* All indexes are enabled already. */
2018     return 0;
2019   }
2020 
2021   if (mode == HA_KEY_SWITCH_ALL)
2022   {
2023     error= maria_enable_indexes(file);
2024     /*
2025        Do not try to repair on error,
2026        as this could make the enabled state persistent,
2027        but mode==HA_KEY_SWITCH_ALL forbids it.
2028     */
2029   }
2030   else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
2031   {
2032     THD *thd= table->in_use;
2033     HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
2034     if (!param)
2035       return HA_ADMIN_INTERNAL_ERROR;
2036 
2037     const char *save_proc_info= thd_proc_info(thd, "Creating index");
2038 
2039     maria_chk_init(param);
2040     param->op_name= "recreating_index";
2041     param->testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK |
2042                      T_CREATE_MISSING_KEYS | T_SAFE_REPAIR);
2043     /*
2044       Don't lock and unlock table if it's locked.
2045       Normally table should be locked.  This test is mostly for safety.
2046     */
2047     if (likely(file->lock_type != F_UNLCK))
2048       param->testflag|= T_NO_LOCKS;
2049 
2050     if (file->create_unique_index_by_sort)
2051       param->testflag|= T_CREATE_UNIQUE_BY_SORT;
2052 
2053     if (bulk_insert_single_undo == BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)
2054     {
2055       bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_REPAIR;
2056       /*
2057         Don't bump create_rename_lsn, because UNDO_BULK_INSERT
2058         should not be skipped in case of crash during repair.
2059       */
2060       param->testflag|= T_NO_CREATE_RENAME_LSN;
2061     }
2062 
2063     param->myf_rw &= ~MY_WAIT_IF_FULL;
2064     param->orig_sort_buffer_length= THDVAR(thd,sort_buffer_size);
2065     param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
2066     param->tmpdir= &mysql_tmpdir_list;
2067     if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param->retry_repair)
2068     {
2069       sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, "
2070                         "retrying",
2071                         my_errno, param->db_name, param->table_name);
2072       /* This should never fail normally */
2073       DBUG_ASSERT(thd->killed != 0);
2074       /* Repairing by sort failed. Now try standard repair method. */
2075       param->testflag &= ~T_REP_BY_SORT;
2076       file->state->records= start_rows;
2077       error= (repair(thd, param, 0) != HA_ADMIN_OK);
2078       /*
2079         If the standard repair succeeded, clear all error messages which
2080         might have been set by the first repair. They can still be seen
2081         with SHOW WARNINGS then.
2082       */
2083       if (!error)
2084         thd->clear_error();
2085     }
2086     info(HA_STATUS_CONST);
2087     thd_proc_info(thd, save_proc_info);
2088   }
2089   else
2090   {
2091     /* mode not implemented */
2092     error= HA_ERR_WRONG_COMMAND;
2093   }
2094   DBUG_EXECUTE_IF("maria_flush_whole_log",
2095                   {
2096                     DBUG_PRINT("maria_flush_whole_log", ("now"));
2097                     translog_flush(translog_get_horizon());
2098                   });
2099   DBUG_EXECUTE_IF("maria_crash_enable_index",
2100                   {
2101                     DBUG_PRINT("maria_crash_enable_index", ("now"));
2102                     DBUG_SUICIDE();
2103                   });
2104   return error;
2105 }
2106 
2107 
2108 /*
2109   Test if indexes are disabled.
2110 
2111 
2112   SYNOPSIS
2113     indexes_are_disabled()
2114       no parameters
2115 
2116 
2117   RETURN
2118     0  indexes are not disabled
2119     1  all indexes are disabled
2120    [2  non-unique indexes are disabled - NOT YET IMPLEMENTED]
2121 */
2122 
indexes_are_disabled(void)2123 int ha_maria::indexes_are_disabled(void)
2124 {
2125   return maria_indexes_are_disabled(file);
2126 }
2127 
2128 
2129 /*
2130   prepare for a many-rows insert operation
2131   e.g. - disable indexes (if they can be recreated fast) or
2132   activate special bulk-insert optimizations
2133 
2134   SYNOPSIS
2135    start_bulk_insert(rows, flags)
2136    rows        Rows to be inserted
2137                 0 if we don't know
2138    flags       Flags to control index creation
2139 
2140   NOTICE
2141     Do not forget to call end_bulk_insert() later!
2142 */
2143 
start_bulk_insert(ha_rows rows,uint flags)2144 void ha_maria::start_bulk_insert(ha_rows rows, uint flags)
2145 {
2146   DBUG_ENTER("ha_maria::start_bulk_insert");
2147   THD *thd= table->in_use;
2148   MARIA_SHARE *share= file->s;
2149   bool index_disabled= 0;
2150   DBUG_PRINT("info", ("start_bulk_insert: rows %lu", (ulong) rows));
2151 
2152   /* don't enable row cache if too few rows */
2153   if ((!rows || rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE) && !has_long_unique())
2154   {
2155     ulonglong size= thd->variables.read_buff_size, tmp;
2156     if (rows)
2157     {
2158       if (file->state->records)
2159       {
2160         MARIA_INFO maria_info;
2161         maria_status(file, &maria_info, HA_STATUS_NO_LOCK |HA_STATUS_VARIABLE);
2162         set_if_smaller(size, maria_info.mean_reclength * rows);
2163       }
2164       else if (table->s->avg_row_length)
2165         set_if_smaller(size, (size_t) (table->s->avg_row_length * rows));
2166     }
2167     tmp= (ulong) size;                          // Safe becasue of limits
2168     maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &tmp);
2169   }
2170 
2171   can_enable_indexes= (maria_is_all_keys_active(share->state.key_map,
2172                                                 share->base.keys));
2173   bulk_insert_single_undo= BULK_INSERT_NONE;
2174 
2175   if (!(specialflag & SPECIAL_SAFE_MODE))
2176   {
2177     /*
2178        Only disable old index if the table was empty and we are inserting
2179        a lot of rows.
2180        We should not do this for only a few rows as this is slower and
2181        we don't want to update the key statistics based of only a few rows.
2182        Index file rebuild requires an exclusive lock, so if versioning is on
2183        don't do it (see how ha_maria::store_lock() tries to predict repair).
2184        We can repair index only if we have an exclusive (TL_WRITE) lock or
2185        if this is inside an ALTER TABLE, in which case lock_type == TL_UNLOCK.
2186 
2187        To see if table is empty, we shouldn't rely on the old record
2188        count from our transaction's start (if that old count is 0 but
2189        now there are records in the table, we would wrongly destroy
2190        them).  So we need to look at share->state.state.records.  As a
2191        safety net for now, we don't remove the test of
2192        file->state->records, because there is uncertainty on what will
2193        happen during repair if the two states disagree.
2194 
2195        We also have to check in case of transactional tables that the
2196        user has not used LOCK TABLE on the table twice.
2197     */
2198     if ((file->state->records == 0) &&
2199         (share->state.state.records == 0) && can_enable_indexes &&
2200         (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) &&
2201         (file->lock.type == TL_WRITE || file->lock.type == TL_UNLOCK) &&
2202         (!share->have_versioning || !share->now_transactional ||
2203          file->used_tables->use_count == 1))
2204     {
2205       /**
2206          @todo for a single-row INSERT SELECT, we will go into repair, which
2207          is more costly (flushes, syncs) than a row write.
2208       */
2209       if (file->open_flags & HA_OPEN_INTERNAL_TABLE)
2210       {
2211         /* Internal table; If we get a duplicate something is very wrong */
2212         file->update|= HA_STATE_CHANGED;
2213         index_disabled= share->base.keys > 0;
2214         maria_clear_all_keys_active(file->s->state.key_map);
2215       }
2216       else
2217       {
2218         my_bool all_keys= MY_TEST(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
2219         /*
2220           Deactivate all indexes that can be recreated fast.
2221           These include packed keys on which sorting will use more temporary
2222           space than the max allowed file length or for which the unpacked keys
2223           will take much more space than packed keys.
2224           Note that 'rows' may be zero for the case when we don't know how many
2225           rows we will put into the file.
2226         */
2227         MARIA_SHARE *share= file->s;
2228         MARIA_KEYDEF    *key=share->keyinfo;
2229         uint          i;
2230 
2231         DBUG_ASSERT(share->state.state.records == 0 &&
2232                     (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES));
2233         for (i=0 ; i < share->base.keys ; i++,key++)
2234         {
2235           if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY | HA_RTREE_INDEX)) &&
2236               ! maria_too_big_key_for_sort(key,rows) && share->base.auto_key != i+1 &&
2237               (all_keys || !(key->flag & HA_NOSAME)) &&
2238               table->key_info[i].algorithm != HA_KEY_ALG_LONG_HASH)
2239           {
2240             maria_clear_key_active(share->state.key_map, i);
2241             index_disabled= 1;
2242             file->update|= HA_STATE_CHANGED;
2243             file->create_unique_index_by_sort= all_keys;
2244           }
2245         }
2246       }
2247       if (share->now_transactional)
2248       {
2249         bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
2250         write_log_record_for_bulk_insert(file);
2251         _ma_tmp_disable_logging_for_table(file, TRUE);
2252         /*
2253           Pages currently in the page cache have type PAGECACHE_LSN_PAGE, we
2254           are not allowed to overwrite them with PAGECACHE_PLAIN_PAGE, so
2255           throw them away. It is not losing data, because we just wrote and
2256           forced an UNDO which will for sure empty the table if we crash. The
2257           upcoming unique-key insertions however need a proper index, so we
2258           cannot leave the corrupted on-disk index file, thus we truncate it.
2259         */
2260         maria_delete_all_rows(file);
2261       }
2262     }
2263     else if (!file->bulk_insert &&
2264              (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT))
2265     {
2266       maria_init_bulk_insert(file,
2267                              (size_t) thd->variables.bulk_insert_buff_size,
2268                              rows);
2269     }
2270   }
2271   can_enable_indexes= index_disabled;
2272   DBUG_VOID_RETURN;
2273 }
2274 
2275 
2276 /*
2277   end special bulk-insert optimizations,
2278   which have been activated by start_bulk_insert().
2279 
2280   SYNOPSIS
2281     end_bulk_insert()
2282     no arguments
2283 
2284   RETURN
2285     0     OK
2286     != 0  Error
2287 */
2288 
end_bulk_insert()2289 int ha_maria::end_bulk_insert()
2290 {
2291   int first_error, error;
2292   my_bool abort= file->s->deleting;
2293   DBUG_ENTER("ha_maria::end_bulk_insert");
2294 
2295   if ((first_error= maria_end_bulk_insert(file, abort)))
2296     abort= 1;
2297 
2298   if ((error= maria_extra(file, HA_EXTRA_NO_CACHE, 0)))
2299   {
2300     first_error= first_error ? first_error : error;
2301     abort= 1;
2302   }
2303 
2304   if (!abort && can_enable_indexes)
2305     if ((error= enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE)))
2306       first_error= first_error ? first_error : error;
2307 
2308   if (bulk_insert_single_undo != BULK_INSERT_NONE)
2309   {
2310     /*
2311       Table was transactional just before start_bulk_insert().
2312       No need to flush pages if we did a repair (which already flushed).
2313     */
2314     if ((error= _ma_reenable_logging_for_table(file,
2315                                                bulk_insert_single_undo ==
2316                                                BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)))
2317       first_error= first_error ? first_error : error;
2318     bulk_insert_single_undo= BULK_INSERT_NONE;  // Safety
2319   }
2320   can_enable_indexes= 0;
2321   DBUG_RETURN(first_error);
2322 }
2323 
2324 
check_and_repair(THD * thd)2325 bool ha_maria::check_and_repair(THD *thd)
2326 {
2327   int error, crashed;
2328   HA_CHECK_OPT check_opt;
2329   const CSET_STRING query_backup= thd->query_string;
2330   DBUG_ENTER("ha_maria::check_and_repair");
2331 
2332   check_opt.init();
2333   check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
2334 
2335   error= 1;
2336   if (!aria_readonly &&
2337       (file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
2338       STATE_MOVED)
2339   {
2340     /* Remove error about crashed table */
2341     thd->get_stmt_da()->clear_warning_info(thd->query_id);
2342     push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
2343                         ER_CRASHED_ON_USAGE,
2344                         "Zerofilling moved table %s", table->s->path.str);
2345     sql_print_information("Zerofilling moved table:  '%s'",
2346                           table->s->path.str);
2347     if (!(error= zerofill(thd, &check_opt)))
2348       DBUG_RETURN(0);
2349   }
2350 
2351   /*
2352     if we got this far - the table is crashed.
2353     but don't auto-repair if maria_recover_options is not set
2354   */
2355   if (!maria_recover_options)
2356     DBUG_RETURN(error);
2357 
2358   error= 0;
2359   // Don't use quick if deleted rows
2360   if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK))
2361     check_opt.flags |= T_QUICK;
2362 
2363   thd->set_query((char*) table->s->table_name.str,
2364                  (uint) table->s->table_name.length, system_charset_info);
2365 
2366   if (!(crashed= maria_is_crashed(file)))
2367   {
2368     sql_print_warning("Checking table:   '%s'", table->s->path.str);
2369     crashed= check(thd, &check_opt);
2370   }
2371 
2372   if (crashed)
2373   {
2374     bool save_log_all_errors;
2375     sql_print_warning("Recovering table: '%s'", table->s->path.str);
2376     save_log_all_errors= thd->log_all_errors;
2377     thd->log_all_errors|= (thd->variables.log_warnings > 2);
2378     check_opt.flags=
2379       ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) |
2380        (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) |
2381        T_AUTO_REPAIR);
2382     if (repair(thd, &check_opt))
2383       error= 1;
2384     thd->log_all_errors= save_log_all_errors;
2385   }
2386   thd->set_query(query_backup);
2387   DBUG_RETURN(error);
2388 }
2389 
2390 
is_crashed() const2391 bool ha_maria::is_crashed() const
2392 {
2393   return (file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED) ||
2394           (my_disable_locking && file->s->state.open_count));
2395 }
2396 
2397 #define CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING(msg) \
2398   do { \
2399     if (file->lock.type == TL_WRITE_CONCURRENT_INSERT && !table->s->sequence) \
2400     { \
2401       my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), msg); \
2402       return 1; \
2403     } \
2404   } while(0)
2405 
update_row(const uchar * old_data,const uchar * new_data)2406 int ha_maria::update_row(const uchar * old_data, const uchar * new_data)
2407 {
2408   CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("UPDATE in WRITE CONCURRENT");
2409   return maria_update(file, old_data, new_data);
2410 }
2411 
2412 
delete_row(const uchar * buf)2413 int ha_maria::delete_row(const uchar * buf)
2414 {
2415   CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("DELETE in WRITE CONCURRENT");
2416   return maria_delete(file, buf);
2417 }
2418 
index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)2419 int ha_maria::index_read_map(uchar * buf, const uchar * key,
2420 			     key_part_map keypart_map,
2421 			     enum ha_rkey_function find_flag)
2422 {
2423   DBUG_ASSERT(inited == INDEX);
2424   register_handler(file);
2425   int error= maria_rkey(file, buf, active_index, key, keypart_map, find_flag);
2426   return error;
2427 }
2428 
2429 
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)2430 int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
2431 				 key_part_map keypart_map,
2432 				 enum ha_rkey_function find_flag)
2433 {
2434   int error;
2435   register_handler(file);
2436 
2437   /* Use the pushed index condition if it matches the index we're scanning */
2438   end_range= NULL;
2439   if (index == pushed_idx_cond_keyno)
2440     ma_set_index_cond_func(file, handler_index_cond_check, this);
2441 
2442   error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
2443 
2444   ma_set_index_cond_func(file, NULL, 0);
2445   return error;
2446 }
2447 
2448 
index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)2449 int ha_maria::index_read_last_map(uchar * buf, const uchar * key,
2450 				  key_part_map keypart_map)
2451 {
2452   DBUG_ENTER("ha_maria::index_read_last_map");
2453   DBUG_ASSERT(inited == INDEX);
2454   register_handler(file);
2455   int error= maria_rkey(file, buf, active_index, key, keypart_map,
2456                         HA_READ_PREFIX_LAST);
2457   DBUG_RETURN(error);
2458 }
2459 
2460 
index_next(uchar * buf)2461 int ha_maria::index_next(uchar * buf)
2462 {
2463   DBUG_ASSERT(inited == INDEX);
2464   register_handler(file);
2465   int error= maria_rnext(file, buf, active_index);
2466   return error;
2467 }
2468 
2469 
index_prev(uchar * buf)2470 int ha_maria::index_prev(uchar * buf)
2471 {
2472   DBUG_ASSERT(inited == INDEX);
2473   register_handler(file);
2474   int error= maria_rprev(file, buf, active_index);
2475   return error;
2476 }
2477 
2478 
index_first(uchar * buf)2479 int ha_maria::index_first(uchar * buf)
2480 {
2481   DBUG_ASSERT(inited == INDEX);
2482   register_handler(file);
2483   int error= maria_rfirst(file, buf, active_index);
2484   return error;
2485 }
2486 
2487 
index_last(uchar * buf)2488 int ha_maria::index_last(uchar * buf)
2489 {
2490   DBUG_ASSERT(inited == INDEX);
2491   register_handler(file);
2492   int error= maria_rlast(file, buf, active_index);
2493   return error;
2494 }
2495 
2496 
index_next_same(uchar * buf,const uchar * key,uint length)2497 int ha_maria::index_next_same(uchar * buf,
2498                               const uchar *key __attribute__ ((unused)),
2499                               uint length __attribute__ ((unused)))
2500 {
2501   int error;
2502   DBUG_ASSERT(inited == INDEX);
2503   register_handler(file);
2504   /*
2505     TODO: Delete this loop in Maria 1.5 as versioning will ensure this never
2506     happens
2507   */
2508   do
2509   {
2510     error= maria_rnext_same(file,buf);
2511   } while (error == HA_ERR_RECORD_DELETED);
2512   return error;
2513 }
2514 
2515 
index_init(uint idx,bool sorted)2516 int ha_maria::index_init(uint idx, bool sorted)
2517 {
2518   active_index=idx;
2519   if (pushed_idx_cond_keyno == idx)
2520     ma_set_index_cond_func(file, handler_index_cond_check, this);
2521   return 0;
2522 }
2523 
2524 
index_end()2525 int ha_maria::index_end()
2526 {
2527   active_index=MAX_KEY;
2528   ma_set_index_cond_func(file, NULL, 0);
2529   in_range_check_pushed_down= FALSE;
2530   ds_mrr.dsmrr_close();
2531   return 0;
2532 }
2533 
2534 
rnd_init(bool scan)2535 int ha_maria::rnd_init(bool scan)
2536 {
2537   if (scan)
2538     return maria_scan_init(file);
2539   return maria_reset(file);                        // Free buffers
2540 }
2541 
2542 
rnd_end()2543 int ha_maria::rnd_end()
2544 {
2545   ds_mrr.dsmrr_close();
2546   /* Safe to call even if we don't have started a scan */
2547   maria_scan_end(file);
2548   return 0;
2549 }
2550 
2551 
rnd_next(uchar * buf)2552 int ha_maria::rnd_next(uchar *buf)
2553 {
2554   register_handler(file);
2555   return maria_scan(file, buf);
2556 }
2557 
2558 
remember_rnd_pos()2559 int ha_maria::remember_rnd_pos()
2560 {
2561   register_handler(file);
2562   return (*file->s->scan_remember_pos)(file, &remember_pos);
2563 }
2564 
2565 
restart_rnd_next(uchar * buf)2566 int ha_maria::restart_rnd_next(uchar *buf)
2567 {
2568   int error;
2569   register_handler(file);
2570   if ((error= (*file->s->scan_restore_pos)(file, remember_pos)))
2571     return error;
2572   return rnd_next(buf);
2573 }
2574 
2575 
rnd_pos(uchar * buf,uchar * pos)2576 int ha_maria::rnd_pos(uchar *buf, uchar *pos)
2577 {
2578   register_handler(file);
2579   int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length));
2580   return error;
2581 }
2582 
2583 
position(const uchar * record)2584 void ha_maria::position(const uchar *record)
2585 {
2586   my_off_t row_position= maria_position(file);
2587   my_store_ptr(ref, ref_length, row_position);
2588 }
2589 
2590 
info(uint flag)2591 int ha_maria::info(uint flag)
2592 {
2593   MARIA_INFO maria_info;
2594   char name_buff[FN_REFLEN];
2595 
2596   (void) maria_status(file, &maria_info, flag);
2597   if (flag & HA_STATUS_VARIABLE)
2598   {
2599     stats.records=           maria_info.records;
2600     stats.deleted=           maria_info.deleted;
2601     stats.data_file_length=  maria_info.data_file_length;
2602     stats.index_file_length= maria_info.index_file_length;
2603     stats.delete_length=     maria_info.delete_length;
2604     stats.check_time=        maria_info.check_time;
2605     stats.mean_rec_length=   maria_info.mean_reclength;
2606     stats.checksum=          file->state->checksum;
2607   }
2608   if (flag & HA_STATUS_CONST)
2609   {
2610     TABLE_SHARE *share= table->s;
2611     stats.max_data_file_length=  maria_info.max_data_file_length;
2612     stats.max_index_file_length= maria_info.max_index_file_length;
2613     stats.create_time= maria_info.create_time;
2614     ref_length= maria_info.reflength;
2615     share->db_options_in_use= maria_info.options;
2616     stats.block_size= maria_block_size;
2617     stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = MY_MAX(sizeof(void *))
2618 
2619     /* Update share */
2620     share->keys_in_use.set_prefix(share->keys);
2621     share->keys_in_use.intersect_extended(maria_info.key_map);
2622     share->keys_for_keyread.intersect(share->keys_in_use);
2623     share->db_record_offset= maria_info.record_offset;
2624     if (share->key_parts)
2625     {
2626       ulong *to= table->key_info[0].rec_per_key, *end;
2627       double *from= maria_info.rec_per_key;
2628       for (end= to+ share->key_parts ; to < end ; to++, from++)
2629         *to= (ulong) (*from + 0.5);
2630     }
2631 
2632     /*
2633        Set data_file_name and index_file_name to point at the symlink value
2634        if table is symlinked (Ie;  Real name is not same as generated name)
2635     */
2636     data_file_name= index_file_name= 0;
2637     fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_DEXT,
2638               MY_APPEND_EXT | MY_UNPACK_FILENAME);
2639     if (strcmp(name_buff, maria_info.data_file_name) &&
2640         maria_info.data_file_name[0])
2641       data_file_name= maria_info.data_file_name;
2642     fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_IEXT,
2643               MY_APPEND_EXT | MY_UNPACK_FILENAME);
2644     if (strcmp(name_buff, maria_info.index_file_name) &&
2645         maria_info.index_file_name[0])
2646       index_file_name=maria_info.index_file_name;
2647   }
2648   if (flag & HA_STATUS_ERRKEY)
2649   {
2650     errkey= maria_info.errkey;
2651     my_store_ptr(dup_ref, ref_length, maria_info.dup_key_pos);
2652   }
2653   if (flag & HA_STATUS_TIME)
2654     stats.update_time= maria_info.update_time;
2655   if (flag & HA_STATUS_AUTO)
2656     stats.auto_increment_value= maria_info.auto_increment;
2657 
2658   return 0;
2659 }
2660 
2661 
extra(enum ha_extra_function operation)2662 int ha_maria::extra(enum ha_extra_function operation)
2663 {
2664   int tmp;
2665   TRN *old_trn= file->trn;
2666   if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
2667     return 0;
2668 #ifdef NOT_USED
2669   if (operation == HA_EXTRA_MMAP && !opt_maria_use_mmap)
2670     return 0;
2671 #endif
2672   if (operation == HA_EXTRA_WRITE_CACHE && has_long_unique())
2673     return 0;
2674 
2675   /*
2676     We have to set file->trn here because in some cases we call
2677     extern_lock(F_UNLOCK) (which resets file->trn) followed by maria_close()
2678     without calling commit/rollback in between.  If file->trn is not set
2679     we can't remove file->share from the transaction list in the extra() call.
2680 
2681     In current code we don't have to do this for HA_EXTRA_PREPARE_FOR_RENAME
2682     as this is only used the intermediate table used by ALTER TABLE which
2683     is not part of the transaction (it's not in the TRN list). Better to
2684     keep this for now, to not break anything in a stable release.
2685     When HA_EXTRA_PREPARE_FOR_RENAME is not handled below, we can change
2686     the warnings in _ma_remove_table_from_trnman() to asserts.
2687 
2688     table->in_use is not set in the case this is a done as part of closefrm()
2689     as part of drop table.
2690   */
2691 
2692   if (file->s->now_transactional && table->in_use &&
2693       (operation == HA_EXTRA_PREPARE_FOR_DROP ||
2694        operation == HA_EXTRA_PREPARE_FOR_RENAME ||
2695        operation == HA_EXTRA_PREPARE_FOR_FORCED_CLOSE))
2696   {
2697     THD *thd= table->in_use;
2698     file->trn= THD_TRN;
2699   }
2700   DBUG_ASSERT(file->s->base.born_transactional || file->trn == 0 ||
2701               file->trn == &dummy_transaction_object);
2702 
2703   tmp= maria_extra(file, operation, 0);
2704   /*
2705     Restore trn if it was changed above.
2706     Note that table could be removed from trn->used_tables and
2707     trn->used_instances if trn was set and some of the above operations
2708     was used. This is ok as the table should not be part of any transaction
2709     after this and thus doesn't need to be part of any of the above lists.
2710   */
2711   file->trn= old_trn;
2712   return tmp;
2713 }
2714 
reset(void)2715 int ha_maria::reset(void)
2716 {
2717   ma_set_index_cond_func(file, NULL, 0);
2718   ds_mrr.dsmrr_close();
2719   if (file->trn)
2720   {
2721     /* Next statement is a new statement. Ensure it's logged */
2722     trnman_set_flags(file->trn,
2723                      trnman_get_flags(file->trn) & ~TRN_STATE_INFO_LOGGED);
2724   }
2725   return maria_reset(file);
2726 }
2727 
2728 /* To be used with WRITE_CACHE and EXTRA_CACHE */
2729 
extra_opt(enum ha_extra_function operation,ulong cache_size)2730 int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size)
2731 {
2732   if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
2733     return 0;
2734   return maria_extra(file, operation, (void*) &cache_size);
2735 }
2736 
2737 
auto_repair(int error) const2738 bool ha_maria::auto_repair(int error) const
2739 {
2740   /* Always auto-repair moved tables (error == HA_ERR_OLD_FILE) */
2741   return ((MY_TEST(maria_recover_options & HA_RECOVER_ANY) &&
2742            error == HA_ERR_CRASHED_ON_USAGE) ||
2743           error == HA_ERR_OLD_FILE);
2744 
2745 }
2746 
2747 
delete_all_rows()2748 int ha_maria::delete_all_rows()
2749 {
2750   THD *thd= table->in_use;
2751   TRN *trn= file->trn;
2752   CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("TRUNCATE in WRITE CONCURRENT");
2753 #ifdef EXTRA_DEBUG
2754   if (trn && ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
2755   {
2756     trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED |
2757                      TRN_STATE_TABLES_CAN_CHANGE);
2758     (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
2759                                    (uchar*) thd->query(), thd->query_length());
2760   }
2761 #endif
2762   /*
2763     If we are under LOCK TABLES, we have to do a commit as
2764     delete_all_rows() can't be rolled back
2765   */
2766   if (table->in_use->locked_tables_mode && trn &&
2767       trnman_has_locked_tables(trn))
2768   {
2769     int error;
2770     if ((error= implicit_commit(thd, 1)))
2771       return error;
2772   }
2773 
2774   /* Note that this can't be rolled back */
2775   return maria_delete_all_rows(file);
2776 }
2777 
2778 
delete_table(const char * name)2779 int ha_maria::delete_table(const char *name)
2780 {
2781   THD *thd= current_thd;
2782   (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
2783                                  (uchar*) thd->query(), thd->query_length());
2784   return maria_delete_table(name);
2785 }
2786 
2787 
2788 /* This is mainly for temporary tables, so no logging necessary */
2789 
drop_table(const char * name)2790 void ha_maria::drop_table(const char *name)
2791 {
2792   DBUG_ASSERT(!file || file->s->temporary);
2793   (void) ha_close();
2794   (void) maria_delete_table_files(name, 1, MY_WME);
2795 }
2796 
2797 
change_table_ptr(TABLE * table_arg,TABLE_SHARE * share)2798 void ha_maria::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
2799 {
2800   handler::change_table_ptr(table_arg, share);
2801   if (file)
2802     file->external_ref= table_arg;
2803 }
2804 
2805 
external_lock(THD * thd,int lock_type)2806 int ha_maria::external_lock(THD *thd, int lock_type)
2807 {
2808   int result= 0, result2;
2809   DBUG_ENTER("ha_maria::external_lock");
2810   file->external_ref= (void*) table;            // For ma_killed()
2811   /*
2812     We don't test now_transactional because it may vary between lock/unlock
2813     and thus confuse our reference counting.
2814     It is critical to skip non-transactional tables: user-visible temporary
2815     tables get an external_lock() when read/written for the first time, but no
2816     corresponding unlock (they just stay locked and are later dropped while
2817     locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp"
2818     would never commit as its "locked_tables" count would stay 1.
2819     When Maria has has_transactions()==TRUE, open_temporary_table()
2820     (sql_base.cc) will use TRANSACTIONAL_TMP_TABLE and thus the
2821     external_lock(F_UNLCK) will happen and we can then allow the user to
2822     create transactional temporary tables.
2823   */
2824   if (file->s->base.born_transactional)
2825   {
2826     /* Transactional table */
2827     if (lock_type != F_UNLCK)
2828     {
2829       if (file->trn)
2830       {
2831         /* This can only happen with tables created with clone() */
2832         DBUG_PRINT("info",("file->trn: %p", file->trn));
2833         trnman_increment_locked_tables(file->trn);
2834       }
2835 
2836       if (!thd->transaction->on)
2837       {
2838         /*
2839           No need to log REDOs/UNDOs. If this is an internal temporary table
2840           which will be renamed to a permanent table (like in ALTER TABLE),
2841           the rename happens after unlocking so will be durable (and the table
2842           will get its create_rename_lsn).
2843           Note: if we wanted to enable users to have an old backup and apply
2844           tons of archived logs to roll-forward, we could then not disable
2845           REDOs/UNDOs in this case.
2846         */
2847         DBUG_PRINT("info", ("Disabling logging for table"));
2848         _ma_tmp_disable_logging_for_table(file, TRUE);
2849         file->autocommit= 0;
2850       }
2851       else
2852         file->autocommit= !(thd->variables.option_bits &
2853                             (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
2854 #ifndef ARIA_HAS_TRANSACTIONS
2855       /*
2856         Until Aria has full transactions support, including MVCC support for
2857         delete and update and purging of old states, we have to commit for
2858         every statement.
2859       */
2860       file->autocommit=1;
2861 #endif
2862     }
2863     else
2864     {
2865       /* We have to test for THD_TRN to protect against implicit commits */
2866       TRN *trn= (file->trn != &dummy_transaction_object && THD_TRN ? file->trn : 0);
2867       /* End of transaction */
2868 
2869       /*
2870         We always re-enable, don't rely on thd->transaction.on as it is
2871         sometimes reset to true after unlocking (see mysql_truncate() for a
2872         partitioned table based on Maria).
2873         Note that we can come here without having an exclusive lock on the
2874         table, for example in this case:
2875         external_lock(F_(WR|RD)LCK); thr_lock() which fails due to lock
2876         abortion; external_lock(F_UNLCK). Fortunately, the re-enabling happens
2877         only if we were the thread which disabled logging.
2878       */
2879       if (_ma_reenable_logging_for_table(file, TRUE))
2880         DBUG_RETURN(1);
2881       _ma_reset_trn_for_table(file);
2882       /*
2883         Ensure that file->state points to the current number of rows. This
2884         is needed if someone calls maria_info() without first doing an
2885         external lock of the table
2886       */
2887       file->state= &file->s->state.state;
2888       if (trn)
2889       {
2890         DBUG_PRINT("info",
2891                    ("locked_tables: %u", trnman_has_locked_tables(trn)));
2892         DBUG_ASSERT(trnman_has_locked_tables(trn) > 0);
2893         if (trnman_has_locked_tables(trn) &&
2894             !trnman_decrement_locked_tables(trn))
2895         {
2896           /*
2897             OK should not have been sent to client yet (ACID).
2898             This is a bit excessive, ACID requires this only if there are some
2899             changes to commit (rollback shouldn't be tested).
2900           */
2901           DBUG_ASSERT(!thd->get_stmt_da()->is_sent() ||
2902                       thd->killed);
2903           /*
2904             If autocommit, commit transaction. This can happen when open and
2905             lock tables as part of creating triggers, in which case commit
2906             is not called.
2907             Until ARIA_HAS_TRANSACTIONS is not defined, always commit.
2908           */
2909           if (file->autocommit)
2910           {
2911             if (ma_commit(trn))
2912               result= HA_ERR_COMMIT_ERROR;
2913             thd_set_ha_data(thd, maria_hton, 0);
2914           }
2915         }
2916         trnman_set_flags(trn, trnman_get_flags(trn) & ~ TRN_STATE_INFO_LOGGED);
2917       }
2918     }
2919   } /* if transactional table */
2920   if ((result2= maria_lock_database(file, !table->s->tmp_table ?
2921                                     lock_type : ((lock_type == F_UNLCK) ?
2922                                                  F_UNLCK : F_EXTRA_LCK))))
2923     result= result2;
2924   if (!file->s->base.born_transactional)
2925     file->state= &file->s->state.state;         // Restore state if clone
2926 
2927   /* Remember stack end for this thread */
2928   file->stack_end_ptr= &ha_thd()->mysys_var->stack_ends_here;
2929   DBUG_RETURN(result);
2930 }
2931 
start_stmt(THD * thd,thr_lock_type lock_type)2932 int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type)
2933 {
2934   TRN *trn;
2935   if (file->s->base.born_transactional)
2936   {
2937     trn= THD_TRN;
2938     DBUG_ASSERT(trn); // this may be called only after external_lock()
2939     DBUG_ASSERT(trnman_has_locked_tables(trn));
2940     DBUG_ASSERT(lock_type != TL_UNLOCK);
2941     DBUG_ASSERT(file->trn == trn);
2942 
2943     /*
2944       As external_lock() was already called, don't increment locked_tables.
2945       Note that we call the function below possibly several times when
2946       statement starts (once per table). This is ok as long as that function
2947       does cheap operations. Otherwise, we will need to do it only on first
2948       call to start_stmt().
2949     */
2950     trnman_new_statement(trn);
2951 
2952 #ifdef EXTRA_DEBUG
2953     if (!(trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED) &&
2954         trnman_get_flags(trn) & TRN_STATE_TABLES_CAN_CHANGE)
2955     {
2956       trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED);
2957       (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
2958                                      (uchar*) thd->query(),
2959                                      thd->query_length());
2960     }
2961 #endif
2962   }
2963   return 0;
2964 }
2965 
2966 
2967 /*
2968   Reset THD_TRN and all file->trn related to the transaction
2969   This is needed as some calls, like extra() or external_lock() may access
2970   it before next transaction is started
2971 */
2972 
reset_thd_trn(THD * thd,MARIA_HA * first_table)2973 static void reset_thd_trn(THD *thd, MARIA_HA *first_table)
2974 {
2975   DBUG_ENTER("reset_thd_trn");
2976   thd_set_ha_data(thd, maria_hton, 0);
2977   MARIA_HA *next;
2978   for (MARIA_HA *table= first_table; table ; table= next)
2979   {
2980     next= table->trn_next;
2981     _ma_reset_trn_for_table(table);
2982 
2983     /*
2984       If table has changed by this statement, invalidate it from the query
2985       cache
2986     */
2987     if (table->row_changes != table->start_row_changes)
2988     {
2989       table->start_row_changes= table->row_changes;
2990       DBUG_ASSERT(table->s->chst_invalidator != NULL);
2991       (*table->s->chst_invalidator)(table->s->data_file_name.str);
2992     }
2993   }
2994   DBUG_VOID_RETURN;
2995 }
2996 
has_active_transaction(THD * thd)2997 bool ha_maria::has_active_transaction(THD *thd)
2998 {
2999   return (maria_hton && THD_TRN);
3000 }
3001 
3002 /**
3003   Performs an implicit commit of the Maria transaction and creates a new
3004   one.
3005 
3006   This can be considered a hack. When Maria loses HA_NO_TRANSACTIONS it will
3007   be participant in the connection's transaction and so the implicit commits
3008   (ha_commit()) (like in end_active_trans()) will do the implicit commit
3009   without need to call this function which can then be removed.
3010 
3011   @param  thd              THD object
3012   @param  new_trn          if a new transaction should be created; a new
3013                            transaction is not needed when we know that the
3014                            tables will be unlocked very soon.
3015 */
3016 
implicit_commit(THD * thd,bool new_trn)3017 int ha_maria::implicit_commit(THD *thd, bool new_trn)
3018 {
3019 #ifndef MARIA_CANNOT_ROLLBACK
3020 #error this method should be removed
3021 #endif
3022   TRN *trn;
3023   int error;
3024   uint locked_tables;
3025   extern my_bool plugins_are_initialized;
3026   MARIA_HA *used_tables, *trn_next;
3027   DBUG_ENTER("ha_maria::implicit_commit");
3028 
3029   if (!maria_hton || !plugins_are_initialized || !(trn= THD_TRN))
3030     DBUG_RETURN(0);
3031   if (!new_trn && (thd->locked_tables_mode == LTM_LOCK_TABLES ||
3032                    thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES))
3033   {
3034     /*
3035       No commit inside LOCK TABLES.
3036 
3037       Note that we come here only at the end of the top statement
3038       (dispatch_command()), we are never committing inside a sub-statement./
3039     */
3040     DBUG_PRINT("info", ("locked_tables, skipping"));
3041     DBUG_RETURN(0);
3042   }
3043 
3044   /* Prepare to move used_instances and locked tables to new TRN object */
3045   locked_tables= trnman_has_locked_tables(trn);
3046   trnman_reset_locked_tables(trn, 0);
3047   relink_trn_used_instances(&used_tables, trn);
3048 
3049   error= 0;
3050   if (unlikely(ma_commit(trn)))
3051     error= HA_ERR_COMMIT_ERROR;
3052   if (!new_trn)
3053   {
3054     reset_thd_trn(thd, used_tables);
3055     goto end;
3056   }
3057 
3058   /*
3059     We need to create a new transaction and put it in THD_TRN. Indeed,
3060     tables may be under LOCK TABLES, and so they will start the next
3061     statement assuming they have a trn (see ha_maria::start_stmt()).
3062   */
3063   trn= trnman_new_trn(& thd->transaction->wt);
3064   thd_set_ha_data(thd, maria_hton, trn);
3065   if (unlikely(trn == NULL))
3066   {
3067     reset_thd_trn(thd, used_tables);
3068     error= HA_ERR_OUT_OF_MEM;
3069     goto end;
3070   }
3071   /*
3072     Move all locked tables to the new transaction
3073     We must do it here as otherwise file->thd and file->state may be
3074     stale pointers. We can't do this in start_stmt() as we don't know
3075     when we should call _ma_setup_live_state() and in some cases, like
3076     in check table, we use the table without calling start_stmt().
3077   */
3078 
3079   for (MARIA_HA *handler= used_tables; handler ;
3080        handler= trn_next)
3081   {
3082     trn_next= handler->trn_next;
3083     DBUG_ASSERT(handler->s->base.born_transactional);
3084 
3085     /* If handler uses versioning */
3086     if (handler->s->lock_key_trees)
3087     {
3088       /* _ma_set_trn_for_table() will be called indirectly */
3089       if (_ma_setup_live_state(handler))
3090         error= HA_ERR_OUT_OF_MEM;
3091     }
3092     else
3093       _ma_set_trn_for_table(handler, trn);
3094   }
3095   /* This is just a commit, tables stay locked if they were: */
3096   trnman_reset_locked_tables(trn, locked_tables);
3097 
3098 end:
3099   DBUG_RETURN(error);
3100 }
3101 
3102 
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)3103 THR_LOCK_DATA **ha_maria::store_lock(THD *thd,
3104                                      THR_LOCK_DATA **to,
3105                                      enum thr_lock_type lock_type)
3106 {
3107   /* Test if we can fix test below */
3108   DBUG_ASSERT(lock_type != TL_UNLOCK &&
3109               (lock_type == TL_IGNORE || file->lock.type == TL_UNLOCK));
3110   if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK)
3111   {
3112     const enum enum_sql_command sql_command= thd->lex->sql_command;
3113     /*
3114       We have to disable concurrent inserts for INSERT ... SELECT or
3115       INSERT/UPDATE/DELETE with sub queries if we are using statement based
3116       logging.  We take the safe route here and disable this for all commands
3117       that only does reading that are not SELECT.
3118     */
3119     if (lock_type <= TL_READ_HIGH_PRIORITY &&
3120         !thd->is_current_stmt_binlog_format_row() &&
3121         (sql_command != SQLCOM_SELECT &&
3122          sql_command != SQLCOM_LOCK_TABLES) &&
3123         (thd->variables.option_bits & OPTION_BIN_LOG) &&
3124         mysql_bin_log.is_open())
3125       lock_type= TL_READ_NO_INSERT;
3126     else if (lock_type == TL_WRITE_CONCURRENT_INSERT)
3127     {
3128       const enum enum_duplicates duplicates= thd->lex->duplicates;
3129       /*
3130         Explanation for the 3 conditions below, in order:
3131 
3132         - Bulk insert may use repair, which will cause problems if other
3133         threads try to read/insert to the table: disable versioning.
3134         Note that our read of file->state->records is incorrect, as such
3135         variable may have changed when we come to start_bulk_insert() (worse
3136         case: we see != 0 so allow versioning, start_bulk_insert() sees 0 and
3137         uses repair). This is prevented because start_bulk_insert() will not
3138         try repair if we enabled versioning.
3139         - INSERT SELECT ON DUPLICATE KEY UPDATE comes here with
3140         TL_WRITE_CONCURRENT_INSERT but shouldn't because it can do
3141         update/delete of a row and versioning doesn't support that
3142         - same for LOAD DATA CONCURRENT REPLACE.
3143       */
3144       if ((file->state->records == 0) ||
3145           (sql_command == SQLCOM_INSERT_SELECT && duplicates == DUP_UPDATE) ||
3146           (sql_command == SQLCOM_LOAD && duplicates == DUP_REPLACE))
3147         lock_type= TL_WRITE;
3148     }
3149     file->lock.type= lock_type;
3150   }
3151   *to++= &file->lock;
3152   return to;
3153 }
3154 
3155 
update_create_info(HA_CREATE_INFO * create_info)3156 void ha_maria::update_create_info(HA_CREATE_INFO *create_info)
3157 {
3158   ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST);
3159   if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
3160   {
3161     create_info->auto_increment_value= stats.auto_increment_value;
3162   }
3163   create_info->data_file_name= data_file_name;
3164   create_info->index_file_name= index_file_name;
3165   /*
3166     Keep user-specified row_type for ALTER,
3167     but show the actually used one in SHOW
3168   */
3169   if (create_info->row_type != ROW_TYPE_DEFAULT &&
3170       !(thd_sql_command(ha_thd()) == SQLCOM_ALTER_TABLE))
3171     create_info->row_type= get_row_type();
3172   /*
3173     Show always page checksums, as this can be forced with
3174     maria_page_checksums variable
3175   */
3176   if (create_info->page_checksum == HA_CHOICE_UNDEF)
3177     create_info->page_checksum=
3178       (file->s->options & HA_OPTION_PAGE_CHECKSUM) ? HA_CHOICE_YES :
3179       HA_CHOICE_NO;
3180 }
3181 
3182 
get_row_type() const3183 enum row_type ha_maria::get_row_type() const
3184 {
3185   switch (file->s->data_file_type) {
3186   case STATIC_RECORD:     return ROW_TYPE_FIXED;
3187   case DYNAMIC_RECORD:    return ROW_TYPE_DYNAMIC;
3188   case BLOCK_RECORD:      return ROW_TYPE_PAGE;
3189   case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED;
3190   default:                return ROW_TYPE_NOT_USED;
3191   }
3192 }
3193 
3194 
maria_row_type(HA_CREATE_INFO * info)3195 static enum data_file_type maria_row_type(HA_CREATE_INFO *info)
3196 {
3197   if (info->transactional == HA_CHOICE_YES)
3198     return BLOCK_RECORD;
3199   switch (info->row_type) {
3200   case ROW_TYPE_FIXED:   return STATIC_RECORD;
3201   case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD;
3202   default:               return BLOCK_RECORD;
3203   }
3204 }
3205 
3206 
create(const char * name,TABLE * table_arg,HA_CREATE_INFO * ha_create_info)3207 int ha_maria::create(const char *name, TABLE *table_arg,
3208                      HA_CREATE_INFO *ha_create_info)
3209 {
3210   int error;
3211   uint create_flags= 0, record_count= 0, i;
3212   char buff[FN_REFLEN];
3213   MARIA_KEYDEF *keydef;
3214   MARIA_COLUMNDEF *recinfo;
3215   MARIA_CREATE_INFO create_info;
3216   TABLE_SHARE *share= table_arg->s;
3217   uint options= share->db_options_in_use;
3218   ha_table_option_struct *table_options= table_arg->s->option_struct;
3219   enum data_file_type row_type;
3220   THD *thd= current_thd;
3221   DBUG_ENTER("ha_maria::create");
3222 
3223   for (i= 0; i < share->keys; i++)
3224   {
3225     if (table_arg->key_info[i].flags & HA_USES_PARSER)
3226     {
3227       create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
3228       break;
3229     }
3230   }
3231   /* Note: BLOCK_RECORD is used if table is transactional */
3232   row_type= maria_row_type(ha_create_info);
3233   if (ha_create_info->transactional == HA_CHOICE_YES &&
3234       ha_create_info->row_type != ROW_TYPE_PAGE &&
3235       ha_create_info->row_type != ROW_TYPE_NOT_USED &&
3236       ha_create_info->row_type != ROW_TYPE_DEFAULT)
3237     push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
3238                  ER_ILLEGAL_HA_CREATE_OPTION,
3239                  "Row format set to PAGE because of TRANSACTIONAL=1 option");
3240 
3241   if (share->table_type == TABLE_TYPE_SEQUENCE)
3242   {
3243     /* For sequences, the simples record type is appropriate */
3244     row_type= STATIC_RECORD;
3245     ha_create_info->transactional= HA_CHOICE_NO;
3246   }
3247 
3248   bzero((char*) &create_info, sizeof(create_info));
3249   if ((error= table2maria(table_arg, row_type, &keydef, &recinfo,
3250                           &record_count, &create_info)))
3251     DBUG_RETURN(error); /* purecov: inspected */
3252   create_info.max_rows= share->max_rows;
3253   create_info.reloc_rows= share->min_rows;
3254   create_info.with_auto_increment= share->next_number_key_offset == 0;
3255   create_info.auto_increment= (ha_create_info->auto_increment_value ?
3256                                ha_create_info->auto_increment_value -1 :
3257                                (ulonglong) 0);
3258   create_info.data_file_length= ((ulonglong) share->max_rows *
3259                                  share->avg_row_length);
3260   create_info.data_file_name= ha_create_info->data_file_name;
3261   create_info.index_file_name= ha_create_info->index_file_name;
3262   create_info.language= share->table_charset->number;
3263   if (ht != maria_hton)
3264   {
3265     /* S3 engine */
3266     create_info.s3_block_size= (ulong) table_options->s3_block_size;
3267     create_info.compression_algorithm= table_options->compression_algorithm;
3268   }
3269 
3270   /*
3271     Table is transactional:
3272     - If the user specify that table is transactional (in this case
3273       row type is forced to BLOCK_RECORD)
3274     - If they specify BLOCK_RECORD without specifying transactional behaviour
3275 
3276     Shouldn't this test be pushed down to maria_create()? Because currently,
3277     ma_test1 -T crashes: it creates a table with DYNAMIC_RECORD but has
3278     born_transactional==1, which confuses some recovery-related code.
3279   */
3280   create_info.transactional= (row_type == BLOCK_RECORD &&
3281                               ha_create_info->transactional != HA_CHOICE_NO);
3282 
3283   if (ha_create_info->tmp_table())
3284   {
3285     create_flags|= HA_CREATE_TMP_TABLE | HA_CREATE_DELAY_KEY_WRITE;
3286     create_info.transactional= 0;
3287   }
3288   if (ha_create_info->options & HA_CREATE_KEEP_FILES)
3289     create_flags|= HA_CREATE_KEEP_FILES;
3290   if (options & HA_OPTION_PACK_RECORD)
3291     create_flags|= HA_PACK_RECORD;
3292   if (options & HA_OPTION_CHECKSUM)
3293     create_flags|= HA_CREATE_CHECKSUM;
3294   if (options & HA_OPTION_DELAY_KEY_WRITE)
3295     create_flags|= HA_CREATE_DELAY_KEY_WRITE;
3296   if ((ha_create_info->page_checksum == HA_CHOICE_UNDEF &&
3297        maria_page_checksums) ||
3298        ha_create_info->page_checksum ==  HA_CHOICE_YES)
3299     create_flags|= HA_CREATE_PAGE_CHECKSUM;
3300 
3301   (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
3302                                  (uchar*) thd->query(), thd->query_length());
3303 
3304   create_info.encrypted= maria_encrypt_tables && ht == maria_hton;
3305   /* TODO: Check that the following fn_format is really needed */
3306   error=
3307     maria_create(fn_format(buff, name, "", "",
3308                            MY_UNPACK_FILENAME | MY_APPEND_EXT),
3309                  row_type, share->keys, keydef,
3310                  record_count,  recinfo,
3311                  0, (MARIA_UNIQUEDEF *) 0,
3312                  &create_info, create_flags);
3313 
3314   my_free(recinfo);
3315   DBUG_RETURN(error);
3316 }
3317 
3318 
rename_table(const char * from,const char * to)3319 int ha_maria::rename_table(const char *from, const char *to)
3320 {
3321   THD *thd= current_thd;
3322   (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
3323                                  (uchar*) thd->query(), thd->query_length());
3324   return maria_rename(from, to);
3325 }
3326 
3327 
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3328 void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment,
3329                                   ulonglong nb_desired_values,
3330                                   ulonglong *first_value,
3331                                   ulonglong *nb_reserved_values)
3332 {
3333   ulonglong nr;
3334   int error;
3335   uchar key[MARIA_MAX_KEY_BUFF];
3336 
3337   if (!table->s->next_number_key_offset)
3338   {                                             // Autoincrement at key-start
3339     ha_maria::info(HA_STATUS_AUTO);
3340     *first_value= stats.auto_increment_value;
3341     /* Maria has only table-level lock for now, so reserves to +inf */
3342     *nb_reserved_values= ULONGLONG_MAX;
3343     return;
3344   }
3345 
3346   /* it's safe to call the following if bulk_insert isn't on */
3347   maria_flush_bulk_insert(file, table->s->next_number_index);
3348 
3349   (void) extra(HA_EXTRA_KEYREAD);
3350   key_copy(key, table->record[0],
3351            table->key_info + table->s->next_number_index,
3352            table->s->next_number_key_offset);
3353   error= maria_rkey(file, table->record[1], (int) table->s->next_number_index,
3354                     key, make_prev_keypart_map(table->s->next_number_keypart),
3355                     HA_READ_PREFIX_LAST);
3356   if (error)
3357     nr= 1;
3358   else
3359   {
3360     /* Get data from record[1] */
3361     nr= ((ulonglong) table->next_number_field->
3362          val_int_offset(table->s->rec_buff_length) + 1);
3363   }
3364   extra(HA_EXTRA_NO_KEYREAD);
3365   *first_value= nr;
3366   /*
3367     MySQL needs to call us for next row: assume we are inserting ("a",null)
3368     here, we return 3, and next this statement will want to insert ("b",null):
3369     there is no reason why ("b",3+1) would be the good row to insert: maybe it
3370     already exists, maybe 3+1 is too large...
3371   */
3372   *nb_reserved_values= 1;
3373 }
3374 
3375 
3376 /*
3377   Find out how many rows there is in the given range
3378 
3379   SYNOPSIS
3380     records_in_range()
3381     inx                 Index to use
3382     min_key             Start of range.  Null pointer if from first key
3383     max_key             End of range. Null pointer if to last key
3384     pages               Store first and last page for the range in case of
3385                         b-trees. In other cases it's not touched.
3386 
3387   NOTES
3388     min_key.flag can have one of the following values:
3389       HA_READ_KEY_EXACT         Include the key in the range
3390       HA_READ_AFTER_KEY         Don't include key in range
3391 
3392     max_key.flag can have one of the following values:
3393       HA_READ_BEFORE_KEY        Don't include key in range
3394       HA_READ_AFTER_KEY         Include all 'end_key' values in the range
3395 
3396   RETURN
3397    HA_POS_ERROR         Something is wrong with the index tree.
3398    0                    There is no matching keys in the given range
3399    number > 0           There is approximately 'number' matching rows in
3400                         the range.
3401 */
3402 
records_in_range(uint inx,const key_range * min_key,const key_range * max_key,page_range * pages)3403 ha_rows ha_maria::records_in_range(uint inx, const key_range *min_key,
3404                                    const key_range *max_key, page_range *pages)
3405 {
3406   register_handler(file);
3407   return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key,
3408                                           pages);
3409 }
3410 
3411 
ft_init_ext(uint flags,uint inx,String * key)3412 FT_INFO *ha_maria::ft_init_ext(uint flags, uint inx, String * key)
3413 {
3414   return maria_ft_init_search(flags, file, inx,
3415                               (uchar *) key->ptr(), key->length(),
3416                               key->charset(), table->record[0]);
3417 }
3418 
3419 
ft_read(uchar * buf)3420 int ha_maria::ft_read(uchar * buf)
3421 {
3422   int error;
3423 
3424   if (!ft_handler)
3425     return -1;
3426 
3427   register_handler(file);
3428 
3429   thread_safe_increment(table->in_use->status_var.ha_read_next_count,
3430                         &LOCK_status);  // why ?
3431 
3432   error= ft_handler->please->read_next(ft_handler, (char*) buf);
3433 
3434   return error;
3435 }
3436 
3437 
check_if_incompatible_data(HA_CREATE_INFO * create_info,uint table_changes)3438 bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *create_info,
3439                                           uint table_changes)
3440 {
3441   DBUG_ENTER("check_if_incompatible_data");
3442   uint options= table->s->db_options_in_use;
3443   enum ha_choice page_checksum= table->s->page_checksum;
3444 
3445   if (page_checksum == HA_CHOICE_UNDEF)
3446     page_checksum= file->s->options & HA_OPTION_PAGE_CHECKSUM ? HA_CHOICE_YES
3447                                                               : HA_CHOICE_NO;
3448 
3449   if (create_info->auto_increment_value != stats.auto_increment_value ||
3450       create_info->data_file_name != data_file_name ||
3451       create_info->index_file_name != index_file_name ||
3452       create_info->page_checksum != page_checksum ||
3453       create_info->transactional != table->s->transactional ||
3454       (maria_row_type(create_info) != data_file_type &&
3455        create_info->row_type != ROW_TYPE_DEFAULT) ||
3456       table_changes == IS_EQUAL_NO ||
3457       (table_changes & IS_EQUAL_PACK_LENGTH)) // Not implemented yet
3458     DBUG_RETURN(COMPATIBLE_DATA_NO);
3459 
3460   if ((options & (HA_OPTION_CHECKSUM |
3461                   HA_OPTION_DELAY_KEY_WRITE)) !=
3462       (create_info->table_options & (HA_OPTION_CHECKSUM |
3463                               HA_OPTION_DELAY_KEY_WRITE)))
3464     DBUG_RETURN(COMPATIBLE_DATA_NO);
3465   DBUG_RETURN(COMPATIBLE_DATA_YES);
3466 }
3467 
3468 
maria_hton_panic(handlerton * hton,ha_panic_function flag)3469 static int maria_hton_panic(handlerton *hton, ha_panic_function flag)
3470 {
3471   /* If no background checkpoints, we need to do one now */
3472   int ret=0;
3473 
3474   if (!checkpoint_interval && !aria_readonly)
3475     ret= ma_checkpoint_execute(CHECKPOINT_FULL, FALSE);
3476 
3477   ret|= maria_panic(flag);
3478 
3479   maria_hton= 0;
3480   return ret;
3481 }
3482 
3483 
maria_commit(handlerton * hton,THD * thd,bool all)3484 static int maria_commit(handlerton *hton __attribute__ ((unused)),
3485                         THD *thd, bool all)
3486 {
3487   TRN *trn= THD_TRN;
3488   int res= 0;
3489   MARIA_HA *used_instances;
3490   DBUG_ENTER("maria_commit");
3491 
3492   /* No commit inside lock_tables() */
3493   if ((!trn ||
3494        thd->locked_tables_mode == LTM_LOCK_TABLES ||
3495        thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES))
3496     DBUG_RETURN(0);
3497 
3498   /* statement or transaction ? */
3499   if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
3500       !all)
3501     DBUG_RETURN(0); // end of statement
3502 
3503   used_instances= (MARIA_HA*) trn->used_instances;
3504   trnman_reset_locked_tables(trn, 0);
3505   trnman_set_flags(trn, trnman_get_flags(trn) & ~TRN_STATE_INFO_LOGGED);
3506   trn->used_instances= 0;
3507   if (ma_commit(trn))
3508     res= HA_ERR_COMMIT_ERROR;
3509   reset_thd_trn(thd, used_instances);
3510   thd_set_ha_data(thd, maria_hton, 0);
3511   DBUG_RETURN(res);
3512 }
3513 
3514 #ifdef MARIA_CANNOT_ROLLBACK
maria_rollback(handlerton * hton,THD * thd,bool all)3515 static int maria_rollback(handlerton *hton, THD *thd, bool all)
3516 {
3517   TRN *trn= THD_TRN;
3518   DBUG_ENTER("maria_rollback");
3519   if (!trn)
3520     DBUG_RETURN(0);
3521   if (trn->undo_lsn)
3522     push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
3523                         ER_DATA_WAS_COMMITED_UNDER_ROLLBACK,
3524                         ER_THD(thd, ER_DATA_WAS_COMMITED_UNDER_ROLLBACK),
3525                         "Aria");
3526   if (all)
3527     DBUG_RETURN(maria_commit(hton, thd, all));
3528   /* Statement rollbacks are ignored. Commit will happen in external_lock */
3529   DBUG_RETURN(0);
3530 }
3531 
3532 #else
3533 
maria_rollback(handlerton * hton,THD * thd,bool all)3534 static int maria_rollback(handlerton *hton __attribute__ ((unused)),
3535                           THD *thd, bool all)
3536 {
3537   TRN *trn= THD_TRN;
3538   DBUG_ENTER("maria_rollback");
3539 
3540   DBUG_ASSERT(trnman_has_locked_tables(trn) == 0);
3541   trnman_reset_locked_tables(trn, 0);
3542   /* statement or transaction ? */
3543   if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
3544       !all)
3545   {
3546     trnman_rollback_statement(trn);
3547     DBUG_RETURN(0); // end of statement
3548   }
3549   reset_thd_trn(thd, (MARIA_HA*) trn->used_instances);
3550   DBUG_RETURN(trnman_rollback_trn(trn) ?
3551               HA_ERR_OUT_OF_MEM : 0); // end of transaction
3552 }
3553 #endif /* MARIA_CANNOT_ROLLBACK */
3554 
3555 
3556 /**
3557   @brief flush log handler
3558 
3559   @param hton            maria handlerton (unused)
3560 
3561   @retval FALSE OK
3562   @retval TRUE  Error
3563 */
3564 
maria_flush_logs(handlerton * hton)3565 bool maria_flush_logs(handlerton *hton)
3566 {
3567   return MY_TEST(translog_purge_at_flush());
3568 }
3569 
3570 
maria_checkpoint_state(handlerton * hton,bool disabled)3571 int maria_checkpoint_state(handlerton *hton, bool disabled)
3572 {
3573   maria_checkpoint_disabled= (my_bool) disabled;
3574   return 0;
3575 }
3576 
3577 
3578 /*
3579   Handle backup calls
3580 */
3581 
maria_prepare_for_backup()3582 void maria_prepare_for_backup()
3583 {
3584   translog_disable_purge();
3585 }
3586 
maria_end_backup()3587 void maria_end_backup()
3588 {
3589   translog_enable_purge();
3590 }
3591 
3592 
3593 
3594 #define SHOW_MSG_LEN (FN_REFLEN + 20)
3595 /**
3596   @brief show status handler
3597 
3598   @param hton            maria handlerton
3599   @param thd             thread handler
3600   @param print           print function
3601   @param stat            type of status
3602 */
3603 
maria_show_status(handlerton * hton,THD * thd,stat_print_fn * print,enum ha_stat_type stat)3604 bool maria_show_status(handlerton *hton,
3605                        THD *thd,
3606                        stat_print_fn *print,
3607                        enum ha_stat_type stat)
3608 {
3609   const LEX_CSTRING *engine_name= hton_name(hton);
3610   switch (stat) {
3611   case HA_ENGINE_LOGS:
3612   {
3613     TRANSLOG_ADDRESS horizon= translog_get_horizon();
3614     uint32 last_file= LSN_FILE_NO(horizon);
3615     uint32 first_needed= translog_get_first_needed_file();
3616     uint32 first_file= translog_get_first_file(horizon);
3617     uint32 i;
3618     const char unknown[]= "unknown";
3619     const char needed[]= "in use";
3620     const char unneeded[]= "free";
3621     char path[FN_REFLEN];
3622 
3623     if (first_file == 0)
3624     {
3625       const char error[]= "error";
3626       print(thd, engine_name->str, engine_name->length,
3627             STRING_WITH_LEN(""), error, sizeof(error) - 1);
3628       break;
3629     }
3630 
3631     for (i= first_file; i <= last_file; i++)
3632     {
3633       char *file;
3634       const char *status;
3635       size_t length, status_len;
3636       MY_STAT stat_buff, *stat;
3637       const char error[]= "can't stat";
3638       char object[SHOW_MSG_LEN];
3639       file= translog_filename_by_fileno(i, path);
3640       if (!(stat= mysql_file_stat(key_file_translog, file, &stat_buff, MYF(0))))
3641       {
3642         status= error;
3643         status_len= sizeof(error) - 1;
3644         length= my_snprintf(object, SHOW_MSG_LEN, "Size unknown ; %s", file);
3645       }
3646       else
3647       {
3648         if (first_needed == 0)
3649         {
3650           status= unknown;
3651           status_len= sizeof(unknown) - 1;
3652         }
3653         else if (i < first_needed)
3654         {
3655           status= unneeded;
3656           status_len= sizeof(unneeded) - 1;
3657         }
3658         else
3659         {
3660           status= needed;
3661           status_len= sizeof(needed) - 1;
3662         }
3663         length= my_snprintf(object, SHOW_MSG_LEN, "Size %12llu ; %s",
3664                             (ulonglong) stat->st_size, file);
3665       }
3666 
3667       print(thd, engine_name->str, engine_name->length,
3668             object, length, status, status_len);
3669     }
3670     break;
3671   }
3672   case HA_ENGINE_STATUS:
3673   case HA_ENGINE_MUTEX:
3674   default:
3675     break;
3676   }
3677   return 0;
3678 }
3679 
3680 
3681 /**
3682   Callback to delete all logs in directory. This is lower-level than other
3683   functions in ma_loghandler.c which delete logs, as it does not rely on
3684   translog_init() having been called first.
3685 
3686   @param  directory        directory where file is
3687   @param  filename         base name of the file to delete
3688 */
3689 
translog_callback_delete_all(const char * directory,const char * filename)3690 static my_bool translog_callback_delete_all(const char *directory,
3691                                             const char *filename)
3692 {
3693   char complete_name[FN_REFLEN];
3694   fn_format(complete_name, filename, directory, "", MYF(MY_UNPACK_FILENAME));
3695   return mysql_file_delete(key_file_translog, complete_name, MYF(MY_WME));
3696 }
3697 
3698 
3699 /**
3700   Helper function for option aria-force-start-after-recovery-failures.
3701   Deletes logs if too many failures. Otherwise, increments the counter of
3702   failures in the control file.
3703   Notice how this has to be called _before_ translog_init() (if log is
3704   corrupted, translog_init() might crash the server, so we need to remove logs
3705   before).
3706 
3707   @param  log_dir          directory where logs to be deleted are
3708 */
3709 
mark_recovery_start(const char * log_dir)3710 static int mark_recovery_start(const char* log_dir)
3711 {
3712   int res;
3713   DBUG_ENTER("mark_recovery_start");
3714   if (!(maria_recover_options & HA_RECOVER_ANY))
3715     ma_message_no_user(ME_WARNING, "Please consider using option"
3716                        " --aria-recover-options[=...] to automatically check and"
3717                        " repair tables when logs are removed by option"
3718                        " --aria-force-start-after-recovery-failures=#");
3719   if (recovery_failures >= force_start_after_recovery_failures)
3720   {
3721     /*
3722       Remove logs which cause the problem; keep control file which has
3723       critical info like uuid, max_trid (removing control file may make
3724       correct tables look corrupted!).
3725     */
3726     char msg[100];
3727     res= translog_walk_filenames(log_dir, &translog_callback_delete_all);
3728     my_snprintf(msg, sizeof(msg),
3729                 "%s logs after %u consecutive failures of"
3730                 " recovery from logs",
3731                 (res ? "failed to remove some" : "removed all"),
3732                 recovery_failures);
3733     ma_message_no_user((res ? 0 : ME_WARNING), msg);
3734   }
3735   else
3736     res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
3737                                          max_trid_in_control_file,
3738                                          recovery_failures + 1);
3739   DBUG_RETURN(res);
3740 }
3741 
3742 
3743 /**
3744   Helper function for option aria-force-start-after-recovery-failures.
3745   Records in the control file that recovery was a success, so that it's not
3746   counted for aria-force-start-after-recovery-failures.
3747 */
3748 
mark_recovery_success(void)3749 static int mark_recovery_success(void)
3750 {
3751   /* success of recovery, reset recovery_failures: */
3752   int res;
3753   DBUG_ENTER("mark_recovery_success");
3754   res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
3755                                        max_trid_in_control_file, 0);
3756   DBUG_RETURN(res);
3757 }
3758 
3759 
3760 /*
3761   Return 1 if table has changed during the current transaction
3762 */
3763 
is_changed() const3764 bool ha_maria::is_changed() const
3765 {
3766   return file->state->changed;
3767 }
3768 
3769 
ha_maria_init(void * p)3770 static int ha_maria_init(void *p)
3771 {
3772   int res= 0, tmp;
3773   const char *log_dir= maria_data_root;
3774 
3775   /*
3776     If aria_readonly is set, then we don't run recovery and we don't allow
3777     opening of tables that are crashed. Used by mysqld --help
3778    */
3779   if ((aria_readonly= opt_help != 0))
3780   {
3781     maria_recover_options= 0;
3782     checkpoint_interval= 0;
3783   }
3784 
3785 #ifdef HAVE_PSI_INTERFACE
3786   init_aria_psi_keys();
3787 #endif
3788 
3789   maria_hton= (handlerton *)p;
3790   maria_hton->db_type= DB_TYPE_ARIA;
3791   maria_hton->create= maria_create_handler;
3792   maria_hton->panic= maria_hton_panic;
3793   maria_hton->tablefile_extensions= ha_maria_exts;
3794   maria_hton->commit= maria_commit;
3795   maria_hton->rollback= maria_rollback;
3796   maria_hton->checkpoint_state= maria_checkpoint_state;
3797   maria_hton->flush_logs= maria_flush_logs;
3798   maria_hton->show_status= maria_show_status;
3799   maria_hton->prepare_for_backup= maria_prepare_for_backup;
3800   maria_hton->end_backup= maria_end_backup;
3801 
3802   /* TODO: decide if we support Maria being used for log tables */
3803   maria_hton->flags= (HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES |
3804                       HTON_NO_ROLLBACK |
3805                       HTON_TRANSACTIONAL_AND_NON_TRANSACTIONAL);
3806   bzero(maria_log_pagecache, sizeof(*maria_log_pagecache));
3807   maria_tmpdir= &mysql_tmpdir_list;             /* For REDO */
3808 
3809   if (!aria_readonly)
3810     res= maria_upgrade();
3811   res= res || maria_init();
3812   tmp= ma_control_file_open(!aria_readonly, !aria_readonly, !aria_readonly);
3813   res= res || aria_readonly ? tmp == CONTROL_FILE_LOCKED : tmp != 0;
3814   res= res ||
3815     ((force_start_after_recovery_failures != 0 && !aria_readonly) &&
3816      mark_recovery_start(log_dir)) ||
3817     !init_pagecache(maria_pagecache,
3818                     (size_t) pagecache_buffer_size, pagecache_division_limit,
3819                     pagecache_age_threshold, maria_block_size, pagecache_file_hash_size,
3820                     0) ||
3821     !init_pagecache(maria_log_pagecache,
3822                     TRANSLOG_PAGECACHE_SIZE, 0, 0,
3823                     TRANSLOG_PAGE_SIZE, 0, 0) ||
3824     (!aria_readonly &&
3825      translog_init(maria_data_root, log_file_size,
3826                    MYSQL_VERSION_ID, server_id, maria_log_pagecache,
3827                    TRANSLOG_DEFAULT_FLAGS, 0)) ||
3828     (!aria_readonly &&
3829      (maria_recovery_from_log() ||
3830       ((force_start_after_recovery_failures != 0 ||
3831         maria_recovery_changed_data || recovery_failures) &&
3832        mark_recovery_success()))) ||
3833     (aria_readonly && trnman_init(MAX_INTERNAL_TRID-16)) ||
3834     ma_checkpoint_init(checkpoint_interval);
3835   maria_multi_threaded= maria_in_ha_maria= TRUE;
3836   maria_create_trn_hook= maria_create_trn_for_mysql;
3837   maria_pagecache->extra_debug= 1;
3838   maria_assert_if_crashed_table= debug_assert_if_crashed_table;
3839 
3840   if (res)
3841   {
3842     maria_hton= 0;
3843     maria_panic(HA_PANIC_CLOSE);
3844   }
3845 
3846   ma_killed= ma_killed_in_mariadb;
3847   if (res)
3848     maria_panic(HA_PANIC_CLOSE);
3849 
3850   return res ? HA_ERR_INITIALIZATION : 0;
3851 }
3852 
3853 
3854 #ifdef HAVE_QUERY_CACHE
3855 /**
3856   @brief Register a named table with a call back function to the query cache.
3857 
3858   @param thd The thread handle
3859   @param table_key A pointer to the table name in the table cache
3860   @param key_length The length of the table name
3861   @param[out] engine_callback The pointer to the storage engine call back
3862     function, currently 0
3863   @param[out] engine_data Engine data will be set to 0.
3864 
3865   @note Despite the name of this function, it is used to check each statement
3866     before it is cached and not to register a table or callback function.
3867 
3868   @see handler::register_query_cache_table
3869 
3870   @return The error code. The engine_data and engine_callback will be set to 0.
3871     @retval TRUE Success
3872     @retval FALSE An error occurred
3873 */
3874 
register_query_cache_table(THD * thd,const char * table_name,uint table_name_len,qc_engine_callback * engine_callback,ulonglong * engine_data)3875 my_bool ha_maria::register_query_cache_table(THD *thd, const char *table_name,
3876 					     uint table_name_len,
3877 					     qc_engine_callback
3878 					     *engine_callback,
3879 					     ulonglong *engine_data)
3880 {
3881   ulonglong actual_data_file_length;
3882   ulonglong current_data_file_length;
3883   DBUG_ENTER("ha_maria::register_query_cache_table");
3884 
3885   /*
3886     No call back function is needed to determine if a cached statement
3887     is valid or not.
3888   */
3889   *engine_callback= 0;
3890 
3891   /*
3892     No engine data is needed.
3893   */
3894   *engine_data= 0;
3895 
3896   if (file->s->now_transactional && file->s->have_versioning)
3897     DBUG_RETURN(file->trn->trid >= file->s->state.last_change_trn);
3898 
3899   /*
3900     If a concurrent INSERT has happened just before the currently processed
3901     SELECT statement, the total size of the table is unknown.
3902 
3903     To determine if the table size is known, the current thread's snap shot of
3904     the table size with the actual table size are compared.
3905 
3906     If the table size is unknown the SELECT statement can't be cached.
3907   */
3908 
3909   /*
3910     POSIX visibility rules specify that "2. Whatever memory values a
3911     thread can see when it unlocks a mutex <...> can also be seen by any
3912     thread that later locks the same mutex". In this particular case,
3913     concurrent insert thread had modified the data_file_length in
3914     MYISAM_SHARE before it has unlocked (or even locked)
3915     structure_guard_mutex. So, here we're guaranteed to see at least that
3916     value after we've locked the same mutex. We can see a later value
3917     (modified by some other thread) though, but it's ok, as we only want
3918     to know if the variable was changed, the actual new value doesn't matter
3919   */
3920   actual_data_file_length= file->s->state.state.data_file_length;
3921   current_data_file_length= file->state->data_file_length;
3922 
3923   /* Return whether is ok to try to cache current statement. */
3924   DBUG_RETURN(!(file->s->non_transactional_concurrent_insert &&
3925                 current_data_file_length != actual_data_file_length));
3926 }
3927 #endif
3928 
3929 static struct st_mysql_sys_var *system_variables[]= {
3930   MYSQL_SYSVAR(block_size),
3931   MYSQL_SYSVAR(checkpoint_interval),
3932   MYSQL_SYSVAR(checkpoint_log_activity),
3933   MYSQL_SYSVAR(force_start_after_recovery_failures),
3934   MYSQL_SYSVAR(group_commit),
3935   MYSQL_SYSVAR(group_commit_interval),
3936   MYSQL_SYSVAR(log_dir_path),
3937   MYSQL_SYSVAR(log_file_size),
3938   MYSQL_SYSVAR(log_purge_type),
3939   MYSQL_SYSVAR(max_sort_file_size),
3940   MYSQL_SYSVAR(page_checksum),
3941   MYSQL_SYSVAR(pagecache_age_threshold),
3942   MYSQL_SYSVAR(pagecache_buffer_size),
3943   MYSQL_SYSVAR(pagecache_division_limit),
3944   MYSQL_SYSVAR(pagecache_file_hash_size),
3945   MYSQL_SYSVAR(recover_options),
3946   MYSQL_SYSVAR(repair_threads),
3947   MYSQL_SYSVAR(sort_buffer_size),
3948   MYSQL_SYSVAR(stats_method),
3949   MYSQL_SYSVAR(sync_log_dir),
3950   MYSQL_SYSVAR(used_for_temp_tables),
3951   MYSQL_SYSVAR(encrypt_tables),
3952   NULL
3953 };
3954 
3955 
3956 /**
3957    @brief Updates the checkpoint interval and restarts the background thread.
3958 */
3959 
update_checkpoint_interval(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3960 static void update_checkpoint_interval(MYSQL_THD thd,
3961                                         struct st_mysql_sys_var *var,
3962                                         void *var_ptr, const void *save)
3963 {
3964   ma_checkpoint_end();
3965   ma_checkpoint_init(*(ulong *)var_ptr= (ulong)(*(long *)save));
3966 }
3967 
3968 
3969 /**
3970    @brief Updates group commit mode
3971 */
3972 
update_maria_group_commit(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3973 static void update_maria_group_commit(MYSQL_THD thd,
3974                                       struct st_mysql_sys_var *var,
3975                                       void *var_ptr, const void *save)
3976 {
3977   ulong value= (ulong)*((long *)var_ptr);
3978   DBUG_ENTER("update_maria_group_commit");
3979   DBUG_PRINT("enter", ("old value: %lu  new value %lu  rate %lu",
3980                        value, (ulong)(*(long *)save),
3981                        maria_group_commit_interval));
3982   /* old value */
3983   switch (value) {
3984   case TRANSLOG_GCOMMIT_NONE:
3985     break;
3986   case TRANSLOG_GCOMMIT_HARD:
3987     translog_hard_group_commit(FALSE);
3988     break;
3989   case TRANSLOG_GCOMMIT_SOFT:
3990     translog_soft_sync(FALSE);
3991     if (maria_group_commit_interval)
3992       translog_soft_sync_end();
3993     break;
3994   default:
3995     DBUG_ASSERT(0); /* impossible */
3996   }
3997   value= *(ulong *)var_ptr= (ulong)(*(long *)save);
3998   translog_sync();
3999   /* new value */
4000   switch (value) {
4001   case TRANSLOG_GCOMMIT_NONE:
4002     break;
4003   case TRANSLOG_GCOMMIT_HARD:
4004     translog_hard_group_commit(TRUE);
4005     break;
4006   case TRANSLOG_GCOMMIT_SOFT:
4007     translog_soft_sync(TRUE);
4008     /* variable change made under global lock so we can just read it */
4009     if (maria_group_commit_interval)
4010       translog_soft_sync_start();
4011     break;
4012   default:
4013     DBUG_ASSERT(0); /* impossible */
4014   }
4015   DBUG_VOID_RETURN;
4016 }
4017 
4018 /**
4019    @brief Updates group commit interval
4020 */
4021 
update_maria_group_commit_interval(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)4022 static void update_maria_group_commit_interval(MYSQL_THD thd,
4023                                                struct st_mysql_sys_var *var,
4024                                                void *var_ptr, const void *save)
4025 {
4026   ulong new_value= (ulong)*((long *)save);
4027   ulong *value_ptr= (ulong*) var_ptr;
4028   DBUG_ENTER("update_maria_group_commit_interval");
4029   DBUG_PRINT("enter", ("old value: %lu  new value %lu  group commit %lu",
4030                         *value_ptr, new_value, maria_group_commit));
4031 
4032   /* variable change made under global lock so we can just read it */
4033   switch (maria_group_commit) {
4034     case TRANSLOG_GCOMMIT_NONE:
4035       *value_ptr= new_value;
4036       translog_set_group_commit_interval(new_value);
4037       break;
4038     case TRANSLOG_GCOMMIT_HARD:
4039       *value_ptr= new_value;
4040       translog_set_group_commit_interval(new_value);
4041       break;
4042     case TRANSLOG_GCOMMIT_SOFT:
4043       if (*value_ptr)
4044         translog_soft_sync_end();
4045       translog_set_group_commit_interval(new_value);
4046       if ((*value_ptr= new_value))
4047         translog_soft_sync_start();
4048       break;
4049     default:
4050       DBUG_ASSERT(0); /* impossible */
4051   }
4052   DBUG_VOID_RETURN;
4053 }
4054 
4055 /**
4056    @brief Updates the transaction log file limit.
4057 */
4058 
update_log_file_size(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)4059 static void update_log_file_size(MYSQL_THD thd,
4060                                  struct st_mysql_sys_var *var,
4061                                  void *var_ptr, const void *save)
4062 {
4063   uint32 size= (uint32)((ulong)(*(long *)save));
4064   translog_set_file_size(size);
4065   *(ulong *)var_ptr= size;
4066 }
4067 
4068 
4069 static SHOW_VAR status_variables[]= {
4070   {"pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG},
4071   {"pagecache_blocks_unused",      (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG},
4072   {"pagecache_blocks_used",        (char*) &maria_pagecache_var.blocks_used, SHOW_LONG},
4073   {"pagecache_read_requests",      (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG},
4074   {"pagecache_reads",              (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG},
4075   {"pagecache_write_requests",     (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG},
4076   {"pagecache_writes",             (char*) &maria_pagecache_var.global_cache_write, SHOW_LONGLONG},
4077   {"transaction_log_syncs",        (char*) &translog_syncs, SHOW_LONGLONG},
4078   {NullS, NullS, SHOW_LONG}
4079 };
4080 
4081 /****************************************************************************
4082  * Maria MRR implementation: use DS-MRR
4083  ***************************************************************************/
4084 
multi_range_read_init(RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)4085 int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
4086                                     uint n_ranges, uint mode,
4087                                     HANDLER_BUFFER *buf)
4088 {
4089   return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
4090 }
4091 
multi_range_read_next(range_id_t * range_info)4092 int ha_maria::multi_range_read_next(range_id_t *range_info)
4093 {
4094   return ds_mrr.dsmrr_next(range_info);
4095 }
4096 
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)4097 ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
4098                                                void *seq_init_param,
4099                                                uint n_ranges, uint *bufsz,
4100                                                uint *flags, Cost_estimate *cost)
4101 {
4102   /*
4103     This call is here because there is no location where this->table would
4104     already be known.
4105     TODO: consider moving it into some per-query initialization call.
4106   */
4107   ds_mrr.init(this, table);
4108   return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
4109                                  flags, cost);
4110 }
4111 
multi_range_read_info(uint keyno,uint n_ranges,uint keys,uint key_parts,uint * bufsz,uint * flags,Cost_estimate * cost)4112 ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
4113                                        uint key_parts, uint *bufsz,
4114                                        uint *flags, Cost_estimate *cost)
4115 {
4116   ds_mrr.init(this, table);
4117   return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
4118 }
4119 
multi_range_read_explain_info(uint mrr_mode,char * str,size_t size)4120 int ha_maria::multi_range_read_explain_info(uint mrr_mode, char *str,
4121                                             size_t size)
4122 {
4123   return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
4124 }
4125 /* MyISAM MRR implementation ends */
4126 
4127 
4128 /* Index condition pushdown implementation*/
4129 
4130 
idx_cond_push(uint keyno_arg,Item * idx_cond_arg)4131 Item *ha_maria::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
4132 {
4133   /*
4134     Check if the key contains a blob field. If it does then MyISAM
4135     should not accept the pushed index condition since MyISAM will not
4136     read the blob field from the index entry during evaluation of the
4137     pushed index condition and the BLOB field might be part of the
4138     range evaluation done by the ICP code.
4139   */
4140   const KEY *key= &table_share->key_info[keyno_arg];
4141 
4142   for (uint k= 0; k < key->user_defined_key_parts; ++k)
4143   {
4144     const KEY_PART_INFO *key_part= &key->key_part[k];
4145     if (key_part->key_part_flag & HA_BLOB_PART)
4146     {
4147       /* Let the server handle the index condition */
4148       return idx_cond_arg;
4149     }
4150   }
4151 
4152   pushed_idx_cond_keyno= keyno_arg;
4153   pushed_idx_cond= idx_cond_arg;
4154   in_range_check_pushed_down= TRUE;
4155   if (active_index == pushed_idx_cond_keyno)
4156     ma_set_index_cond_func(file, handler_index_cond_check, this);
4157   return NULL;
4158 }
4159 
4160 /**
4161   Find record by unique constrain (used in temporary tables)
4162 
4163   @param record          (IN|OUT) the record to find
4164   @param constrain_no    (IN) number of constrain (for this engine)
4165 
4166   @note It is like hp_search but uses function for raw where hp_search
4167         uses functions for index.
4168 
4169   @retval  0 OK
4170   @retval  1 Not found
4171   @retval -1 Error
4172 */
4173 
find_unique_row(uchar * record,uint constrain_no)4174 int ha_maria::find_unique_row(uchar *record, uint constrain_no)
4175 {
4176   int rc;
4177   register_handler(file);
4178   if (file->s->state.header.uniques)
4179   {
4180     DBUG_ASSERT(file->s->state.header.uniques > constrain_no);
4181     MARIA_UNIQUEDEF *def= file->s->uniqueinfo + constrain_no;
4182     ha_checksum unique_hash= _ma_unique_hash(def, record);
4183     rc= _ma_check_unique(file, def, record, unique_hash, HA_OFFSET_ERROR);
4184     if (rc)
4185     {
4186       file->cur_row.lastpos= file->dup_key_pos;
4187       if ((*file->read_record)(file, record, file->cur_row.lastpos))
4188         return -1;
4189       file->update|= HA_STATE_AKTIV;                     /* Record is read */
4190     }
4191     // invert logic
4192     rc= !MY_TEST(rc);
4193   }
4194   else
4195   {
4196     /*
4197      It is case when just unique index used instead unicue constrain
4198      (conversion from heap table).
4199      */
4200     DBUG_ASSERT(file->s->state.header.keys > constrain_no);
4201     MARIA_KEY key;
4202     file->once_flags|= USE_PACKED_KEYS;
4203     (*file->s->keyinfo[constrain_no].make_key)
4204       (file, &key, constrain_no, file->lastkey_buff2, record, 0, 0);
4205     rc= maria_rkey(file, record, constrain_no, key.data, key.data_length,
4206                    HA_READ_KEY_EXACT);
4207     rc= MY_TEST(rc);
4208   }
4209   return rc;
4210 }
4211 
4212 
4213 /**
4214    Check if a table needs to be repaired
4215 */
4216 
check_for_upgrade(HA_CHECK_OPT * check)4217 int ha_maria::check_for_upgrade(HA_CHECK_OPT *check)
4218 {
4219   if (table->s->mysql_version && table->s->mysql_version <= 100509 &&
4220       (file->s->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
4221   {
4222     /*
4223       Encrypted tables before 10.5.9 had a bug where LSN was not
4224       stored on the pages. These must be repaired!
4225     */
4226     return HA_ADMIN_NEEDS_ALTER;
4227   }
4228   return HA_ADMIN_OK;
4229 }
4230 
4231 
4232 struct st_mysql_storage_engine maria_storage_engine=
4233 { MYSQL_HANDLERTON_INTERFACE_VERSION };
4234 
maria_declare_plugin(aria)4235 maria_declare_plugin(aria)
4236 {
4237   MYSQL_STORAGE_ENGINE_PLUGIN,
4238   &maria_storage_engine,
4239   "Aria",
4240   "MariaDB Corporation Ab",
4241   "Crash-safe tables with MyISAM heritage. Used for internal temporary tables and privilege tables",
4242   PLUGIN_LICENSE_GPL,
4243   ha_maria_init,                /* Plugin Init      */
4244   NULL,                         /* Plugin Deinit    */
4245   0x0105,                       /* 1.5              */
4246   status_variables,             /* status variables */
4247   system_variables,             /* system variables */
4248   "1.5",                        /* string version   */
4249   MariaDB_PLUGIN_MATURITY_STABLE /* maturity         */
4250 }
4251 maria_declare_plugin_end;
4252