1 /* Copyright (C) 2004-2008 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2    Copyright (C) 2008-2009 Sun Microsystems, Inc.
3    Copyright (c) 2009, 2017, MariaDB Corporation.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; version 2 of the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
17 
18 
19 #ifdef USE_PRAGMA_IMPLEMENTATION
20 #pragma implementation                          // gcc: Class implementation
21 #endif
22 
23 #define MYSQL_SERVER 1
24 #include <my_global.h>
25 #include <m_ctype.h>
26 #include <my_dir.h>
27 #include <myisampack.h>
28 #include <my_bit.h>
29 #include "ha_maria.h"
30 #include "trnman_public.h"
31 #include "trnman.h"
32 
33 C_MODE_START
34 #include "maria_def.h"
35 #include "ma_rt_index.h"
36 #include "ma_blockrec.h"
37 #include "ma_checkpoint.h"
38 #include "ma_recovery.h"
39 C_MODE_END
40 #include "ma_trnman.h"
41 
42 //#include "sql_priv.h"
43 #include "protocol.h"
44 #include "sql_class.h"
45 #include "key.h"
46 #include "log.h"
47 #include "sql_parse.h"
48 
49 /*
50   Note that in future versions, only *transactional* Maria tables can
51   rollback, so this flag should be up or down conditionally.
52 */
53 #ifdef MARIA_CANNOT_ROLLBACK
54 #define CANNOT_ROLLBACK_FLAG HA_NO_TRANSACTIONS
55 #define trans_register_ha(A, B, C)  do { /* nothing */ } while(0)
56 #else
57 #define CANNOT_ROLLBACK_FLAG 0
58 #endif
59 #define THD_TRN (*(TRN **)thd_ha_data(thd, maria_hton))
60 
61 ulong pagecache_division_limit, pagecache_age_threshold, pagecache_file_hash_size;
62 ulonglong pagecache_buffer_size;
63 const char *zerofill_error_msg=
64   "Table is from another system and must be zerofilled or repaired to be "
65   "usable on this system";
66 
67 /**
68    As the auto-repair is initiated when opened from the SQL layer
69    (open_unireg_entry(), check_and_repair()), it does not happen when Maria's
70    Recovery internally opens the table to apply log records to it, which is
71    good. It would happen only after Recovery, if the table is still
72    corrupted.
73 */
74 ulonglong maria_recover_options= HA_RECOVER_NONE;
75 handlerton *maria_hton;
76 
77 /* bits in maria_recover_options */
78 const char *maria_recover_names[]=
79 {
80   /*
81     Compared to MyISAM, "default" was renamed to "normal" as it collided with
82     SET var=default which sets to the var's default i.e. what happens when the
83     var is not set i.e. HA_RECOVER_NONE.
84     OFF flag is ignored.
85   */
86   "NORMAL", "BACKUP", "FORCE", "QUICK", "OFF", NullS
87 };
88 TYPELIB maria_recover_typelib=
89 {
90   array_elements(maria_recover_names) - 1, "",
91   maria_recover_names, NULL
92 };
93 
94 const char *maria_stats_method_names[]=
95 {
96   "nulls_unequal", "nulls_equal",
97   "nulls_ignored", NullS
98 };
99 TYPELIB maria_stats_method_typelib=
100 {
101   array_elements(maria_stats_method_names) - 1, "",
102   maria_stats_method_names, NULL
103 };
104 
105 /* transactions log purge mode */
106 const char *maria_translog_purge_type_names[]=
107 {
108   "immediate", "external", "at_flush", NullS
109 };
110 TYPELIB maria_translog_purge_type_typelib=
111 {
112   array_elements(maria_translog_purge_type_names) - 1, "",
113   maria_translog_purge_type_names, NULL
114 };
115 
116 /* transactional log directory sync */
117 const char *maria_sync_log_dir_names[]=
118 {
119   "NEVER", "NEWFILE", "ALWAYS", NullS
120 };
121 TYPELIB maria_sync_log_dir_typelib=
122 {
123   array_elements(maria_sync_log_dir_names) - 1, "",
124   maria_sync_log_dir_names, NULL
125 };
126 
127 /* transactional log group commit */
128 const char *maria_group_commit_names[]=
129 {
130   "none", "hard", "soft", NullS
131 };
132 TYPELIB maria_group_commit_typelib=
133 {
134   array_elements(maria_group_commit_names) - 1, "",
135   maria_group_commit_names, NULL
136 };
137 
138 /** Interval between background checkpoints in seconds */
139 static ulong checkpoint_interval;
140 static void update_checkpoint_interval(MYSQL_THD thd,
141                                        struct st_mysql_sys_var *var,
142                                        void *var_ptr, const void *save);
143 static void update_maria_group_commit(MYSQL_THD thd,
144                                       struct st_mysql_sys_var *var,
145                                       void *var_ptr, const void *save);
146 static void update_maria_group_commit_interval(MYSQL_THD thd,
147                                            struct st_mysql_sys_var *var,
148                                            void *var_ptr, const void *save);
149 /** After that many consecutive recovery failures, remove logs */
150 static ulong force_start_after_recovery_failures;
151 static void update_log_file_size(MYSQL_THD thd,
152                                  struct st_mysql_sys_var *var,
153                                  void *var_ptr, const void *save);
154 
155 static MYSQL_SYSVAR_ULONG(block_size, maria_block_size,
156        PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
157        "Block size to be used for Aria index pages.", 0, 0,
158        MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
159        MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);
160 
161 static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval,
162        PLUGIN_VAR_RQCMDARG,
163        "Interval between tries to do an automatic checkpoints. In seconds; 0 means"
164        " 'no automatic checkpoints' which makes sense only for testing.",
165        NULL, update_checkpoint_interval, 30, 0, UINT_MAX, 1);
166 
167 static MYSQL_SYSVAR_ULONG(checkpoint_log_activity, maria_checkpoint_min_log_activity,
168        PLUGIN_VAR_RQCMDARG,
169        "Number of bytes that the transaction log has to grow between checkpoints before a new "
170        "checkpoint is written to the log.",
171        NULL, NULL, 1024*1024, 0, UINT_MAX, 1);
172 
173 static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures,
174        force_start_after_recovery_failures,
175        /*
176          Read-only because setting it on the fly has no useful effect,
177          should be set on command-line.
178        */
179        PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
180        "Number of consecutive log recovery failures after which logs will be"
181        " automatically deleted to cure the problem; 0 (the default) disables"
182        " the feature.", NULL, NULL, 0, 0, UINT_MAX8, 1);
183 
184 static MYSQL_SYSVAR_BOOL(page_checksum, maria_page_checksums, 0,
185        "Maintain page checksums (can be overridden per table "
186        "with PAGE_CHECKSUM clause in CREATE TABLE)", 0, 0, 1);
187 
188 /* It is only command line argument */
189 static MYSQL_SYSVAR_STR(log_dir_path, maria_data_root,
190        PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
191        "Path to the directory where to store transactional log",
192        NULL, NULL, mysql_real_data_home);
193 
194 
195 static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size,
196        PLUGIN_VAR_RQCMDARG,
197        "Limit for transaction log size",
198        NULL, update_log_file_size, TRANSLOG_FILE_SIZE,
199        TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE);
200 
201 static MYSQL_SYSVAR_ENUM(group_commit, maria_group_commit,
202        PLUGIN_VAR_RQCMDARG,
203        "Specifies Aria group commit mode. "
204        "Possible values are \"none\" (no group commit), "
205        "\"hard\" (with waiting to actual commit), "
206        "\"soft\" (no wait for commit (DANGEROUS!!!))",
207        NULL, update_maria_group_commit,
208        TRANSLOG_GCOMMIT_NONE, &maria_group_commit_typelib);
209 
210 static MYSQL_SYSVAR_ULONG(group_commit_interval, maria_group_commit_interval,
211        PLUGIN_VAR_RQCMDARG,
212        "Interval between commite in microseconds (1/1000000c)."
213        " 0 stands for no waiting"
214        " for other threads to come and do a commit in \"hard\" mode and no"
215        " sync()/commit at all in \"soft\" mode.  Option has only an effect"
216        " if aria_group_commit is used",
217        NULL, update_maria_group_commit_interval, 0, 0, UINT_MAX, 1);
218 
219 static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type,
220        PLUGIN_VAR_RQCMDARG,
221        "Specifies how Aria transactional log will be purged",
222        NULL, NULL, TRANSLOG_PURGE_IMMIDIATE,
223        &maria_translog_purge_type_typelib);
224 
225 static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
226        maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
227        "Don't use the fast sort index method to created index if the "
228        "temporary file would get bigger than this.",
229        0, 0, MAX_FILE_SIZE & ~((ulonglong) (1*MB-1)),
230        0, MAX_FILE_SIZE, 1*MB);
231 
232 static MYSQL_SYSVAR_ULONG(pagecache_age_threshold,
233        pagecache_age_threshold, PLUGIN_VAR_RQCMDARG,
234        "This characterizes the number of hits a hot block has to be untouched "
235        "until it is considered aged enough to be downgraded to a warm block. "
236        "This specifies the percentage ratio of that number of hits to the "
237        "total number of blocks in the page cache.", 0, 0,
238        300, 100, ~ (ulong) 0L, 100);
239 
240 static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size,
241        PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
242        "The size of the buffer used for index blocks for Aria tables. "
243        "Increase this to get better index handling (for all reads and "
244        "multiple writes) to as much as you can afford.", 0, 0,
245        KEY_CACHE_SIZE, 8192*16L, ~(ulonglong) 0, 1);
246 
247 static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit,
248        PLUGIN_VAR_RQCMDARG,
249        "The minimum percentage of warm blocks in key cache", 0, 0,
250        100,  1, 100, 1);
251 
252 static MYSQL_SYSVAR_ULONG(pagecache_file_hash_size, pagecache_file_hash_size,
253        PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
254        "Number of hash buckets for open and changed files.  If you have a lot of Aria "
255        "files open you should increase this for faster flush of changes. A good "
256        "value is probably 1/10 of number of possible open Aria files.", 0,0,
257        512, 128, 16384, 1);
258 
259 static MYSQL_SYSVAR_SET(recover_options, maria_recover_options, PLUGIN_VAR_OPCMDARG,
260        "Specifies how corrupted tables should be automatically repaired",
261        NULL, NULL, HA_RECOVER_BACKUP|HA_RECOVER_QUICK, &maria_recover_typelib);
262 
263 static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
264        "Number of threads to use when repairing Aria tables. The value of 1 "
265        "disables parallel repair.",
266        0, 0, 1, 1, 128, 1);
267 
268 static MYSQL_THDVAR_ULONGLONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG,
269        "The buffer that is allocated when sorting the index when doing a "
270        "REPAIR or when creating indexes with CREATE INDEX or ALTER TABLE.", NULL, NULL,
271        SORT_BUFFER_INIT, MIN_SORT_BUFFER, SIZE_T_MAX, 1);
272 
273 static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG,
274        "Specifies how Aria index statistics collection code should treat "
275        "NULLs", 0, 0, 0, &maria_stats_method_typelib);
276 
277 static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir, PLUGIN_VAR_RQCMDARG,
278        "Controls syncing directory after log file growth and new file "
279        "creation", NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE,
280        &maria_sync_log_dir_typelib);
281 
282 #ifdef USE_ARIA_FOR_TMP_TABLES
283 #define USE_ARIA_FOR_TMP_TABLES_VAL 1
284 #else
285 #define USE_ARIA_FOR_TMP_TABLES_VAL 0
286 #endif
287 my_bool use_maria_for_temp_tables= USE_ARIA_FOR_TMP_TABLES_VAL;
288 
289 static MYSQL_SYSVAR_BOOL(used_for_temp_tables,
290        use_maria_for_temp_tables, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT,
291        "Whether temporary tables should be MyISAM or Aria", 0, 0,
292        1);
293 
294 static MYSQL_SYSVAR_BOOL(encrypt_tables, maria_encrypt_tables, PLUGIN_VAR_OPCMDARG,
295        "Encrypt tables (only for tables with ROW_FORMAT=PAGE (default) "
296        "and not FIXED/DYNAMIC)",
297        0, 0, 0);
298 
299 #if defined HAVE_PSI_INTERFACE && !defined EMBEDDED_LIBRARY
300 
301 static PSI_mutex_info all_aria_mutexes[]=
302 {
303   { &key_THR_LOCK_maria, "THR_LOCK_maria", PSI_FLAG_GLOBAL},
304   { &key_LOCK_soft_sync, "LOCK_soft_sync", PSI_FLAG_GLOBAL},
305   { &key_LOCK_trn_list, "LOCK_trn_list", PSI_FLAG_GLOBAL},
306   { &key_SHARE_BITMAP_lock, "SHARE::bitmap::bitmap_lock", 0},
307   { &key_SORT_INFO_mutex, "SORT_INFO::mutex", 0},
308   { &key_TRANSLOG_BUFFER_mutex, "TRANSLOG_BUFFER::mutex", 0},
309   { &key_TRANSLOG_DESCRIPTOR_dirty_buffer_mask_lock, "TRANSLOG_DESCRIPTOR::dirty_buffer_mask_lock", 0},
310   { &key_TRANSLOG_DESCRIPTOR_sent_to_disk_lock, "TRANSLOG_DESCRIPTOR::sent_to_disk_lock", 0},
311   { &key_TRANSLOG_DESCRIPTOR_log_flush_lock, "TRANSLOG_DESCRIPTOR::log_flush_lock", 0},
312   { &key_TRANSLOG_DESCRIPTOR_file_header_lock, "TRANSLOG_DESCRIPTOR::file_header_lock", 0},
313   { &key_TRANSLOG_DESCRIPTOR_unfinished_files_lock, "TRANSLOG_DESCRIPTOR::unfinished_files_lock", 0},
314   { &key_TRANSLOG_DESCRIPTOR_purger_lock, "TRANSLOG_DESCRIPTOR::purger_lock", 0},
315   { &key_SHARE_intern_lock, "SHARE::intern_lock", 0},
316   { &key_SHARE_key_del_lock, "SHARE::key_del_lock", 0},
317   { &key_SHARE_close_lock, "SHARE::close_lock", 0},
318   { &key_SERVICE_THREAD_CONTROL_lock, "SERVICE_THREAD_CONTROL::LOCK_control", 0},
319   { &key_TRN_state_lock, "TRN::state_lock", 0},
320   { &key_PAGECACHE_cache_lock, "PAGECACHE::cache_lock", 0}
321 };
322 
323 static PSI_cond_info all_aria_conds[]=
324 {
325   { &key_COND_soft_sync, "COND_soft_sync", PSI_FLAG_GLOBAL},
326   { &key_SHARE_key_del_cond, "SHARE::key_del_cond", 0},
327   { &key_SERVICE_THREAD_CONTROL_cond, "SERVICE_THREAD_CONTROL::COND_control", 0},
328   { &key_SORT_INFO_cond, "SORT_INFO::cond", 0},
329   { &key_SHARE_BITMAP_cond, "BITMAP::bitmap_cond", 0},
330   { &key_TRANSLOG_BUFFER_waiting_filling_buffer, "TRANSLOG_BUFFER::waiting_filling_buffer", 0},
331   { &key_TRANSLOG_BUFFER_prev_sent_to_disk_cond, "TRANSLOG_BUFFER::prev_sent_to_disk_cond", 0},
332   { &key_TRANSLOG_DESCRIPTOR_log_flush_cond, "TRANSLOG_DESCRIPTOR::log_flush_cond", 0},
333   { &key_TRANSLOG_DESCRIPTOR_new_goal_cond, "TRANSLOG_DESCRIPTOR::new_goal_cond", 0}
334 };
335 
336 static PSI_rwlock_info all_aria_rwlocks[]=
337 {
338   { &key_KEYINFO_root_lock, "KEYINFO::root_lock", 0},
339   { &key_SHARE_mmap_lock, "SHARE::mmap_lock", 0},
340   { &key_TRANSLOG_DESCRIPTOR_open_files_lock, "TRANSLOG_DESCRIPTOR::open_files_lock", 0}
341 };
342 
343 static PSI_thread_info all_aria_threads[]=
344 {
345   { &key_thread_checkpoint, "checkpoint_background", PSI_FLAG_GLOBAL},
346   { &key_thread_soft_sync, "soft_sync_background", PSI_FLAG_GLOBAL},
347   { &key_thread_find_all_keys, "thr_find_all_keys", 0}
348 };
349 
350 static PSI_file_info all_aria_files[]=
351 {
352   { &key_file_translog, "translog", 0},
353   { &key_file_kfile, "MAI", 0},
354   { &key_file_dfile, "MAD", 0},
355   { &key_file_control, "control", PSI_FLAG_GLOBAL}
356 };
357 
358 # ifdef HAVE_PSI_STAGE_INTERFACE
359 static PSI_stage_info *all_aria_stages[]=
360 {
361   & stage_waiting_for_a_resource
362 };
363 # endif /* HAVE_PSI_STAGE_INTERFACE */
364 
init_aria_psi_keys(void)365 static void init_aria_psi_keys(void)
366 {
367   const char* category= "aria";
368   int count;
369 
370   count= array_elements(all_aria_mutexes);
371   mysql_mutex_register(category, all_aria_mutexes, count);
372 
373   count= array_elements(all_aria_rwlocks);
374   mysql_rwlock_register(category, all_aria_rwlocks, count);
375 
376   count= array_elements(all_aria_conds);
377   mysql_cond_register(category, all_aria_conds, count);
378 
379   count= array_elements(all_aria_threads);
380   mysql_thread_register(category, all_aria_threads, count);
381 
382   count= array_elements(all_aria_files);
383   mysql_file_register(category, all_aria_files, count);
384 # ifdef HAVE_PSI_STAGE_INTERFACE
385   count= array_elements(all_aria_stages);
386   mysql_stage_register(category, all_aria_stages, count);
387 # endif /* HAVE_PSI_STAGE_INTERFACE */
388 }
389 #else
390 #define init_aria_psi_keys() /* no-op */
391 #endif /* HAVE_PSI_INTERFACE */
392 
393 /*****************************************************************************
394 ** MARIA tables
395 *****************************************************************************/
396 
maria_create_handler(handlerton * hton,TABLE_SHARE * table,MEM_ROOT * mem_root)397 static handler *maria_create_handler(handlerton *hton,
398                                      TABLE_SHARE * table,
399                                      MEM_ROOT *mem_root)
400 {
401   return new (mem_root) ha_maria(hton, table);
402 }
403 
404 
405 // collect errors printed by maria_check routines
406 
_ma_check_print_msg(HA_CHECK * param,const char * msg_type,const char * fmt,va_list args)407 static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
408                                 const char *fmt, va_list args)
409 {
410   THD *thd= (THD *) param->thd;
411   Protocol *protocol= thd->protocol;
412   size_t length, msg_length;
413   char msgbuf[MYSQL_ERRMSG_SIZE];
414   char name[NAME_LEN * 2 + 2];
415 
416   if (param->testflag & T_SUPPRESS_ERR_HANDLING)
417     return;
418 
419   msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
420   msgbuf[sizeof(msgbuf) - 1]= 0;                // healthy paranoia
421 
422   DBUG_PRINT(msg_type, ("message: %s", msgbuf));
423 
424   if (!thd->vio_ok())
425   {
426     sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf);
427     return;
428   }
429 
430   if (param->testflag &
431       (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR))
432   {
433     my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME));
434     if (thd->variables.log_warnings > 2)
435       sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf);
436     return;
437   }
438   length= (uint) (strxmov(name, param->db_name, ".", param->table_name,
439                           NullS) - name);
440   /*
441     TODO: switch from protocol to push_warning here. The main reason we didn't
442     it yet is parallel repair, which threads have no THD object accessible via
443     current_thd.
444 
445     Also we likely need to lock mutex here (in both cases with protocol and
446     push_warning).
447   */
448   protocol->prepare_for_resend();
449   protocol->store(name, (uint)length, system_charset_info);
450   protocol->store(param->op_name, system_charset_info);
451   protocol->store(msg_type, system_charset_info);
452   protocol->store(msgbuf, (uint)msg_length, system_charset_info);
453   if (protocol->write())
454     sql_print_error("Failed on my_net_write, writing to stderr instead: %s.%s: %s\n",
455                     param->db_name, param->table_name, msgbuf);
456   else if (thd->variables.log_warnings > 2)
457     sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf);
458 
459   return;
460 }
461 
462 
463 /*
464   Convert TABLE object to Maria key and column definition
465 
466   SYNOPSIS
467     table2maria()
468       table_arg   in     TABLE object.
469       keydef_out  out    Maria key definition.
470       recinfo_out out    Maria column definition.
471       records_out out    Number of fields.
472 
473   DESCRIPTION
474     This function will allocate and initialize Maria key and column
475     definition for further use in ma_create or for a check for underlying
476     table conformance in merge engine.
477 
478     The caller needs to free *recinfo_out after use. Since *recinfo_out
479     and *keydef_out are allocated with a my_multi_malloc, *keydef_out
480     is freed automatically when *recinfo_out is freed.
481 
482   RETURN VALUE
483     0  OK
484     # error code
485 */
486 
table2maria(TABLE * table_arg,data_file_type row_type,MARIA_KEYDEF ** keydef_out,MARIA_COLUMNDEF ** recinfo_out,uint * records_out,MARIA_CREATE_INFO * create_info)487 static int table2maria(TABLE *table_arg, data_file_type row_type,
488                        MARIA_KEYDEF **keydef_out,
489                        MARIA_COLUMNDEF **recinfo_out, uint *records_out,
490                        MARIA_CREATE_INFO *create_info)
491 {
492   uint i, j, recpos, minpos, fieldpos, temp_length, length;
493   enum ha_base_keytype type= HA_KEYTYPE_BINARY;
494   uchar *record;
495   KEY *pos;
496   MARIA_KEYDEF *keydef;
497   MARIA_COLUMNDEF *recinfo, *recinfo_pos;
498   HA_KEYSEG *keyseg;
499   TABLE_SHARE *share= table_arg->s;
500   uint options= share->db_options_in_use;
501   DBUG_ENTER("table2maria");
502 
503   if (row_type == BLOCK_RECORD)
504     options|= HA_OPTION_PACK_RECORD;
505 
506   if (!(my_multi_malloc(MYF(MY_WME),
507           recinfo_out, (share->fields * 2 + 2) * sizeof(MARIA_COLUMNDEF),
508           keydef_out, share->keys * sizeof(MARIA_KEYDEF),
509           &keyseg,
510           (share->key_parts + share->keys) * sizeof(HA_KEYSEG),
511           NullS)))
512     DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
513   keydef= *keydef_out;
514   recinfo= *recinfo_out;
515   pos= table_arg->key_info;
516   for (i= 0; i < share->keys; i++, pos++)
517   {
518     keydef[i].flag= (uint16) (pos->flags & (HA_NOSAME | HA_FULLTEXT |
519                                             HA_SPATIAL));
520     keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ?
521       (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) :
522       pos->algorithm;
523     keydef[i].block_length= pos->block_size;
524     keydef[i].seg= keyseg;
525     keydef[i].keysegs= pos->user_defined_key_parts;
526     for (j= 0; j < pos->user_defined_key_parts; j++)
527     {
528       Field *field= pos->key_part[j].field;
529 
530       if (!table_arg->field[field->field_index]->stored_in_db())
531       {
532         my_free(*recinfo_out);
533         my_error(ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN, MYF(0));
534         DBUG_RETURN(HA_ERR_UNSUPPORTED);
535       }
536 
537       type= field->key_type();
538       keydef[i].seg[j].flag= pos->key_part[j].key_part_flag;
539 
540       if (options & HA_OPTION_PACK_KEYS ||
541           (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY |
542                          HA_SPACE_PACK_USED)))
543       {
544         if (pos->key_part[j].length > 8 &&
545             (type == HA_KEYTYPE_TEXT ||
546              type == HA_KEYTYPE_NUM ||
547              (type == HA_KEYTYPE_BINARY && !field->zero_pack())))
548         {
549           /* No blobs here */
550           if (j == 0)
551             keydef[i].flag|= HA_PACK_KEY;
552           if (!(field->flags & ZEROFILL_FLAG) &&
553               (field->type() == MYSQL_TYPE_STRING ||
554                field->type() == MYSQL_TYPE_VAR_STRING ||
555                ((int) (pos->key_part[j].length - field->decimals())) >= 4))
556             keydef[i].seg[j].flag|= HA_SPACE_PACK;
557         }
558         else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16))
559           keydef[i].flag|= HA_BINARY_PACK_KEY;
560       }
561       keydef[i].seg[j].type= (int) type;
562       keydef[i].seg[j].start= pos->key_part[j].offset;
563       keydef[i].seg[j].length= pos->key_part[j].length;
564       keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_length= 0;
565       keydef[i].seg[j].bit_pos= 0;
566       keydef[i].seg[j].language= field->charset()->number;
567 
568       if (field->null_ptr)
569       {
570         keydef[i].seg[j].null_bit= field->null_bit;
571         keydef[i].seg[j].null_pos= (uint) (field->null_ptr-
572                                            (uchar*) table_arg->record[0]);
573       }
574       else
575       {
576         keydef[i].seg[j].null_bit= 0;
577         keydef[i].seg[j].null_pos= 0;
578       }
579       if (field->type() == MYSQL_TYPE_BLOB ||
580           field->type() == MYSQL_TYPE_GEOMETRY)
581       {
582         keydef[i].seg[j].flag|= HA_BLOB_PART;
583         /* save number of bytes used to pack length */
584         keydef[i].seg[j].bit_start= (uint) (field->pack_length() -
585                                             portable_sizeof_char_ptr);
586       }
587       else if (field->type() == MYSQL_TYPE_BIT)
588       {
589         keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len;
590         keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs;
591         keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
592                                           (uchar*) table_arg->record[0]);
593       }
594     }
595     keyseg+= pos->user_defined_key_parts;
596   }
597   if (table_arg->found_next_number_field)
598     keydef[share->next_number_index].flag|= HA_AUTO_KEY;
599   record= table_arg->record[0];
600   recpos= 0;
601   recinfo_pos= recinfo;
602   create_info->null_bytes= table_arg->s->null_bytes;
603 
604   while (recpos < (uint) share->stored_rec_length)
605   {
606     Field **field, *found= 0;
607     minpos= share->reclength;
608     length= 0;
609 
610     for (field= table_arg->field; *field; field++)
611     {
612       if ((fieldpos= (*field)->offset(record)) >= recpos &&
613           fieldpos <= minpos)
614       {
615         /* skip null fields */
616         if (!(temp_length= (*field)->pack_length_in_rec()))
617           continue; /* Skip null-fields */
618         if (! found || fieldpos < minpos ||
619             (fieldpos == minpos && temp_length < length))
620         {
621           minpos= fieldpos;
622           found= *field;
623           length= temp_length;
624         }
625       }
626     }
627     DBUG_PRINT("loop", ("found: %p  recpos: %d  minpos: %d  length: %d",
628                         found, recpos, minpos, length));
629     if (!found)
630       break;
631 
632     if (found->flags & BLOB_FLAG)
633       recinfo_pos->type= FIELD_BLOB;
634     else if (found->type() == MYSQL_TYPE_TIMESTAMP)
635       recinfo_pos->type= FIELD_NORMAL;
636     else if (found->type() == MYSQL_TYPE_VARCHAR)
637       recinfo_pos->type= FIELD_VARCHAR;
638     else if (!(options & HA_OPTION_PACK_RECORD) ||
639              (found->zero_pack() && (found->flags & PRI_KEY_FLAG)))
640       recinfo_pos->type= FIELD_NORMAL;
641     else if (found->zero_pack())
642       recinfo_pos->type= FIELD_SKIP_ZERO;
643     else
644       recinfo_pos->type= ((length <= 3 ||
645                            (found->flags & ZEROFILL_FLAG)) ?
646                           FIELD_NORMAL :
647                           found->type() == MYSQL_TYPE_STRING ||
648                           found->type() == MYSQL_TYPE_VAR_STRING ?
649                           FIELD_SKIP_ENDSPACE :
650                           FIELD_SKIP_PRESPACE);
651     if (found->null_ptr)
652     {
653       recinfo_pos->null_bit= found->null_bit;
654       recinfo_pos->null_pos= (uint) (found->null_ptr -
655                                      (uchar*) table_arg->record[0]);
656     }
657     else
658     {
659       recinfo_pos->null_bit= 0;
660       recinfo_pos->null_pos= 0;
661     }
662     (recinfo_pos++)->length= (uint16) length;
663     recpos= minpos + length;
664     DBUG_PRINT("loop", ("length: %d  type: %d",
665                         recinfo_pos[-1].length,recinfo_pos[-1].type));
666   }
667   *records_out= (uint) (recinfo_pos - recinfo);
668   DBUG_RETURN(0);
669 }
670 
671 
672 /*
673   Check for underlying table conformance
674 
675   SYNOPSIS
676     maria_check_definition()
677       t1_keyinfo       in    First table key definition
678       t1_recinfo       in    First table record definition
679       t1_keys          in    Number of keys in first table
680       t1_recs          in    Number of records in first table
681       t2_keyinfo       in    Second table key definition
682       t2_recinfo       in    Second table record definition
683       t2_keys          in    Number of keys in second table
684       t2_recs          in    Number of records in second table
685       strict           in    Strict check switch
686 
687   DESCRIPTION
688     This function compares two Maria definitions. By intention it was done
689     to compare merge table definition against underlying table definition.
690     It may also be used to compare dot-frm and MAI definitions of Maria
691     table as well to compare different Maria table definitions.
692 
693     For merge table it is not required that number of keys in merge table
694     must exactly match number of keys in underlying table. When calling this
695     function for underlying table conformance check, 'strict' flag must be
696     set to false, and converted merge definition must be passed as t1_*.
697 
698     Otherwise 'strict' flag must be set to 1 and it is not required to pass
699     converted dot-frm definition as t1_*.
700 
701   RETURN VALUE
702     0 - Equal definitions.
703     1 - Different definitions.
704 
705   TODO
706     - compare FULLTEXT keys;
707     - compare SPATIAL keys;
708     - compare FIELD_SKIP_ZERO which is converted to FIELD_NORMAL correctly
709       (should be correctly detected in table2maria).
710 
711   FIXME:
712     maria_check_definition() is never used! CHECK TABLE does not detect the
713     corruption! Do maria_check_definition() like check_definition() is done
714     by MyISAM (related to MDEV-25803).
715 */
716 
maria_check_definition(MARIA_KEYDEF * t1_keyinfo,MARIA_COLUMNDEF * t1_recinfo,uint t1_keys,uint t1_recs,MARIA_KEYDEF * t2_keyinfo,MARIA_COLUMNDEF * t2_recinfo,uint t2_keys,uint t2_recs,bool strict)717 int maria_check_definition(MARIA_KEYDEF *t1_keyinfo,
718                            MARIA_COLUMNDEF *t1_recinfo,
719                            uint t1_keys, uint t1_recs,
720                            MARIA_KEYDEF *t2_keyinfo,
721                            MARIA_COLUMNDEF *t2_recinfo,
722                            uint t2_keys, uint t2_recs, bool strict)
723 {
724   uint i, j;
725   DBUG_ENTER("maria_check_definition");
726   if ((strict ? t1_keys != t2_keys : t1_keys > t2_keys))
727   {
728     DBUG_PRINT("error", ("Number of keys differs: t1_keys=%u, t2_keys=%u",
729                          t1_keys, t2_keys));
730     DBUG_RETURN(1);
731   }
732   if (t1_recs != t2_recs)
733   {
734     DBUG_PRINT("error", ("Number of recs differs: t1_recs=%u, t2_recs=%u",
735                          t1_recs, t2_recs));
736     DBUG_RETURN(1);
737   }
738   for (i= 0; i < t1_keys; i++)
739   {
740     HA_KEYSEG *t1_keysegs= t1_keyinfo[i].seg;
741     HA_KEYSEG *t2_keysegs= t2_keyinfo[i].seg;
742     if (t1_keyinfo[i].flag & HA_FULLTEXT && t2_keyinfo[i].flag & HA_FULLTEXT)
743       continue;
744     else if (t1_keyinfo[i].flag & HA_FULLTEXT ||
745              t2_keyinfo[i].flag & HA_FULLTEXT)
746     {
747        DBUG_PRINT("error", ("Key %d has different definition", i));
748        DBUG_PRINT("error", ("t1_fulltext= %d, t2_fulltext=%d",
749                             MY_TEST(t1_keyinfo[i].flag & HA_FULLTEXT),
750                             MY_TEST(t2_keyinfo[i].flag & HA_FULLTEXT)));
751        DBUG_RETURN(1);
752     }
753     if (t1_keyinfo[i].flag & HA_SPATIAL && t2_keyinfo[i].flag & HA_SPATIAL)
754       continue;
755     else if (t1_keyinfo[i].flag & HA_SPATIAL ||
756              t2_keyinfo[i].flag & HA_SPATIAL)
757     {
758        DBUG_PRINT("error", ("Key %d has different definition", i));
759        DBUG_PRINT("error", ("t1_spatial= %d, t2_spatial=%d",
760                             MY_TEST(t1_keyinfo[i].flag & HA_SPATIAL),
761                             MY_TEST(t2_keyinfo[i].flag & HA_SPATIAL)));
762        DBUG_RETURN(1);
763     }
764     if (t1_keyinfo[i].keysegs != t2_keyinfo[i].keysegs ||
765         t1_keyinfo[i].key_alg != t2_keyinfo[i].key_alg)
766     {
767       DBUG_PRINT("error", ("Key %d has different definition", i));
768       DBUG_PRINT("error", ("t1_keysegs=%d, t1_key_alg=%d",
769                            t1_keyinfo[i].keysegs, t1_keyinfo[i].key_alg));
770       DBUG_PRINT("error", ("t2_keysegs=%d, t2_key_alg=%d",
771                            t2_keyinfo[i].keysegs, t2_keyinfo[i].key_alg));
772       DBUG_RETURN(1);
773     }
774     for (j=  t1_keyinfo[i].keysegs; j--;)
775     {
776       uint8 t1_keysegs_j__type= t1_keysegs[j].type;
777       /*
778         Table migration from 4.1 to 5.1. In 5.1 a *TEXT key part is
779         always HA_KEYTYPE_VARTEXT2. In 4.1 we had only the equivalent of
780         HA_KEYTYPE_VARTEXT1. Since we treat both the same on MyISAM
781         level, we can ignore a mismatch between these types.
782       */
783       if ((t1_keysegs[j].flag & HA_BLOB_PART) &&
784           (t2_keysegs[j].flag & HA_BLOB_PART))
785       {
786         if ((t1_keysegs_j__type == HA_KEYTYPE_VARTEXT2) &&
787             (t2_keysegs[j].type == HA_KEYTYPE_VARTEXT1))
788           t1_keysegs_j__type= HA_KEYTYPE_VARTEXT1; /* purecov: tested */
789         else if ((t1_keysegs_j__type == HA_KEYTYPE_VARBINARY2) &&
790                  (t2_keysegs[j].type == HA_KEYTYPE_VARBINARY1))
791           t1_keysegs_j__type= HA_KEYTYPE_VARBINARY1; /* purecov: inspected */
792       }
793 
794       if (t1_keysegs_j__type != t2_keysegs[j].type ||
795           t1_keysegs[j].language != t2_keysegs[j].language ||
796           t1_keysegs[j].null_bit != t2_keysegs[j].null_bit ||
797           t1_keysegs[j].length != t2_keysegs[j].length)
798       {
799         DBUG_PRINT("error", ("Key segment %d (key %d) has different "
800                              "definition", j, i));
801         DBUG_PRINT("error", ("t1_type=%d, t1_language=%d, t1_null_bit=%d, "
802                              "t1_length=%d",
803                              t1_keysegs[j].type, t1_keysegs[j].language,
804                              t1_keysegs[j].null_bit, t1_keysegs[j].length));
805         DBUG_PRINT("error", ("t2_type=%d, t2_language=%d, t2_null_bit=%d, "
806                              "t2_length=%d",
807                              t2_keysegs[j].type, t2_keysegs[j].language,
808                              t2_keysegs[j].null_bit, t2_keysegs[j].length));
809 
810         DBUG_RETURN(1);
811       }
812     }
813   }
814 
815   for (i= 0; i < t1_recs; i++)
816   {
817     MARIA_COLUMNDEF *t1_rec= &t1_recinfo[i];
818     MARIA_COLUMNDEF *t2_rec= &t2_recinfo[i];
819     /*
820       FIELD_SKIP_ZERO can be changed to FIELD_NORMAL in maria_create,
821       see NOTE1 in ma_create.c
822     */
823     if ((t1_rec->type != t2_rec->type &&
824          !(t1_rec->type == (int) FIELD_SKIP_ZERO &&
825            t1_rec->length == 1 &&
826            t2_rec->type == (int) FIELD_NORMAL)) ||
827         t1_rec->length != t2_rec->length ||
828         t1_rec->null_bit != t2_rec->null_bit)
829     {
830       DBUG_PRINT("error", ("Field %d has different definition", i));
831       DBUG_PRINT("error", ("t1_type=%d, t1_length=%d, t1_null_bit=%d",
832                            t1_rec->type, t1_rec->length, t1_rec->null_bit));
833       DBUG_PRINT("error", ("t2_type=%d, t2_length=%d, t2_null_bit=%d",
834                            t2_rec->type, t2_rec->length, t2_rec->null_bit));
835       DBUG_RETURN(1);
836     }
837   }
838   DBUG_RETURN(0);
839 }
840 
841 
842 extern "C" {
843 
_ma_killed_ptr(HA_CHECK * param)844 int _ma_killed_ptr(HA_CHECK *param)
845 {
846   if (likely(thd_killed((THD*)param->thd)) == 0)
847     return 0;
848   my_errno= HA_ERR_ABORTED_BY_USER;
849   return 1;
850 }
851 
852 
853 /*
854   Report progress to mysqld
855 
856   This is a bit more complex than what a normal progress report
857   function normally is.
858 
859   The reason is that this is called by enable_index/repair which
860   is one stage in ALTER TABLE and we can't use the external
861   stage/max_stage for this.
862 
863   thd_progress_init/thd_progress_next_stage is to be called by
864   high level commands like CHECK TABLE or REPAIR TABLE, not
865   by sub commands like enable_index().
866 
867   In ma_check.c it's easier to work with stages than with a total
868   progress, so we use internal stage/max_stage here to keep the
869   code simple.
870 */
871 
_ma_report_progress(HA_CHECK * param,ulonglong progress,ulonglong max_progress)872 void _ma_report_progress(HA_CHECK *param, ulonglong progress,
873                          ulonglong max_progress)
874 {
875   thd_progress_report((THD*)param->thd,
876                       progress + max_progress * param->stage,
877                       max_progress * param->max_stage);
878 }
879 
880 
_ma_check_print_error(HA_CHECK * param,const char * fmt,...)881 void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
882 {
883   va_list args;
884   DBUG_ENTER("_ma_check_print_error");
885   param->error_printed |= 1;
886   param->out_flag |= O_DATA_LOST;
887   if (param->testflag & T_SUPPRESS_ERR_HANDLING)
888     DBUG_VOID_RETURN;
889   va_start(args, fmt);
890   _ma_check_print_msg(param, "error", fmt, args);
891   va_end(args);
892   DBUG_VOID_RETURN;
893 }
894 
895 
_ma_check_print_info(HA_CHECK * param,const char * fmt,...)896 void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...)
897 {
898   va_list args;
899   DBUG_ENTER("_ma_check_print_info");
900   va_start(args, fmt);
901   _ma_check_print_msg(param, "info", fmt, args);
902   va_end(args);
903   DBUG_VOID_RETURN;
904 }
905 
906 
_ma_check_print_warning(HA_CHECK * param,const char * fmt,...)907 void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
908 {
909   va_list args;
910   DBUG_ENTER("_ma_check_print_warning");
911   param->warning_printed= 1;
912   param->out_flag |= O_DATA_LOST;
913   va_start(args, fmt);
914   _ma_check_print_msg(param, "warning", fmt, args);
915   va_end(args);
916   DBUG_VOID_RETURN;
917 }
918 
919 /*
920   Create a transaction object
921 
922   SYNOPSIS
923     info	Maria handler
924 
925   RETURN
926     0 		ok
927     #		Error number (HA_ERR_OUT_OF_MEM)
928 */
929 
maria_create_trn_for_mysql(MARIA_HA * info)930 static int maria_create_trn_for_mysql(MARIA_HA *info)
931 {
932   THD *thd= ((TABLE*) info->external_ref)->in_use;
933   TRN *trn= THD_TRN;
934   DBUG_ENTER("maria_create_trn_for_mysql");
935 
936   if (!trn)  /* no transaction yet - open it now */
937   {
938     trn= trnman_new_trn(& thd->transaction.wt);
939     if (unlikely(!trn))
940       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
941     THD_TRN= trn;
942     if (thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
943       trans_register_ha(thd, TRUE, maria_hton);
944   }
945   _ma_set_trn_for_table(info, trn);
946   if (!trnman_increment_locked_tables(trn))
947   {
948     trans_register_ha(thd, FALSE, maria_hton);
949     trnman_new_statement(trn);
950   }
951 #ifdef EXTRA_DEBUG
952   if (info->lock_type == F_WRLCK &&
953       ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
954   {
955     trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED |
956                      TRN_STATE_TABLES_CAN_CHANGE);
957     (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
958                                    (uchar*) thd->query(),
959                                    thd->query_length());
960   }
961   else
962   {
963     DBUG_PRINT("info", ("lock_type: %d  trnman_flags: %u",
964                         info->lock_type, trnman_get_flags(trn)));
965   }
966 
967 #endif
968   DBUG_RETURN(0);
969 }
970 
ma_killed_in_mariadb(MARIA_HA * info)971 my_bool ma_killed_in_mariadb(MARIA_HA *info)
972 {
973   return (((TABLE*) (info->external_ref))->in_use->killed != 0);
974 }
975 
976 } /* extern "C" */
977 
978 /**
979   Transactional table doing bulk insert with one single UNDO
980   (UNDO_BULK_INSERT) and with repair.
981 */
982 #define BULK_INSERT_SINGLE_UNDO_AND_REPAIR    1
983 /**
984   Transactional table doing bulk insert with one single UNDO
985   (UNDO_BULK_INSERT) and without repair.
986 */
987 #define BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR 2
988 /**
989   None of BULK_INSERT_SINGLE_UNDO_AND_REPAIR and
990   BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR.
991 */
992 #define BULK_INSERT_NONE      0
993 
ha_maria(handlerton * hton,TABLE_SHARE * table_arg)994 ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg):
995 handler(hton, table_arg), file(0),
996 int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
997                 HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
998                 HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
999                 HA_FILE_BASED | HA_CAN_GEOMETRY | CANNOT_ROLLBACK_FLAG |
1000                 HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_CAN_REPAIR |
1001                 HA_CAN_VIRTUAL_COLUMNS | HA_CAN_EXPORT |
1002                 HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT |
1003                 HA_CAN_TABLES_WITHOUT_ROLLBACK),
1004 can_enable_indexes(1), bulk_insert_single_undo(BULK_INSERT_NONE)
1005 {}
1006 
1007 
clone(const char * name,MEM_ROOT * mem_root)1008 handler *ha_maria::clone(const char *name __attribute__((unused)),
1009                          MEM_ROOT *mem_root)
1010 {
1011   ha_maria *new_handler=
1012     static_cast <ha_maria *>(handler::clone(file->s->open_file_name.str,
1013                                             mem_root));
1014   if (new_handler)
1015   {
1016     new_handler->file->state= file->state;
1017     /* maria_create_trn_for_mysql() is never called for clone() tables */
1018     new_handler->file->trn= file->trn;
1019   }
1020   return new_handler;
1021 }
1022 
1023 
1024 static const char *ha_maria_exts[]=
1025 {
1026   MARIA_NAME_IEXT,
1027   MARIA_NAME_DEXT,
1028   NullS
1029 };
1030 
1031 
index_type(uint key_number)1032 const char *ha_maria::index_type(uint key_number)
1033 {
1034   return ((table->key_info[key_number].flags & HA_FULLTEXT) ?
1035           "FULLTEXT" :
1036           (table->key_info[key_number].flags & HA_SPATIAL) ?
1037           "SPATIAL" :
1038           (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ?
1039           "RTREE" : "BTREE");
1040 }
1041 
1042 
index_flags(uint inx,uint part,bool all_parts) const1043 ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const
1044 {
1045   ulong flags;
1046   if (table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT)
1047     flags= 0;
1048   else
1049   if ((table_share->key_info[inx].flags & HA_SPATIAL ||
1050       table_share->key_info[inx].algorithm == HA_KEY_ALG_RTREE))
1051   {
1052     /* All GIS scans are non-ROR scans. We also disable IndexConditionPushdown */
1053     flags= HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
1054            HA_READ_ORDER | HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
1055   }
1056   else
1057   {
1058     flags= HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
1059           HA_READ_ORDER | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN;
1060   }
1061   return flags;
1062 }
1063 
1064 
scan_time()1065 double ha_maria::scan_time()
1066 {
1067   if (file->s->data_file_type == BLOCK_RECORD)
1068     return ulonglong2double(stats.data_file_length - file->s->block_size) / MY_MAX(file->s->block_size / 2, IO_SIZE) + 2;
1069   return handler::scan_time();
1070 }
1071 
1072 /*
1073   We need to be able to store at least 2 keys on an index page as the
1074   splitting algorithms depends on this. (With only one key on a page
1075   we also can't use any compression, which may make the index file much
1076   larger)
1077   We use HA_MAX_KEY_LENGTH as this is a stack restriction imposed by the
1078   handler interface.  If we want to increase this, we have also to
1079   increase HA_MARIA_KEY_BUFF and MARIA_MAX_KEY_BUFF as the buffer needs
1080   to take be able to store the extra lenght bytes that is part of the stored
1081   key.
1082 
1083   We also need to reserve place for a record pointer (8) and 3 bytes
1084   per key segment to store the length of the segment + possible null bytes.
1085   These extra bytes are required here so that maria_create() will surely
1086   accept any keys created which the returned key data storage length.
1087 */
1088 
max_supported_key_length() const1089 uint ha_maria::max_supported_key_length() const
1090 {
1091   return maria_max_key_length();
1092 }
1093 
1094 /* Name is here without an extension */
1095 
open(const char * name,int mode,uint test_if_locked)1096 int ha_maria::open(const char *name, int mode, uint test_if_locked)
1097 {
1098   uint i;
1099 
1100 #ifdef NOT_USED
1101   /*
1102     If the user wants to have memory mapped data files, add an
1103     open_flag. Do not memory map temporary tables because they are
1104     expected to be inserted and thus extended a lot. Memory mapping is
1105     efficient for files that keep their size, but very inefficient for
1106     growing files. Using an open_flag instead of calling ma_extra(...
1107     HA_EXTRA_MMAP ...) after maxs_open() has the advantage that the
1108     mapping is not repeated for every open, but just done on the initial
1109     open, when the MyISAM share is created. Every time the server
1110     requires to open a new instance of a table it calls this method. We
1111     will always supply HA_OPEN_MMAP for a permanent table. However, the
1112     Maria storage engine will ignore this flag if this is a secondary
1113     open of a table that is in use by other threads already (if the
1114     Maria share exists already).
1115   */
1116   if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap)
1117     test_if_locked|= HA_OPEN_MMAP;
1118 #endif
1119 
1120   if (maria_recover_options & HA_RECOVER_ANY)
1121   {
1122     /* user asked to trigger a repair if table was not properly closed */
1123     test_if_locked|= HA_OPEN_ABORT_IF_CRASHED;
1124   }
1125 
1126   if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER)))
1127   {
1128     if (my_errno == HA_ERR_OLD_FILE)
1129     {
1130       push_warning(current_thd, Sql_condition::WARN_LEVEL_NOTE,
1131                    ER_CRASHED_ON_USAGE,
1132                    zerofill_error_msg);
1133     }
1134     return (my_errno ? my_errno : -1);
1135   }
1136 
1137   file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref;
1138   /* Set external_ref, mainly for temporary tables */
1139   file->external_ref= (void*) table;            // For ma_killed()
1140 
1141   if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
1142     maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0);
1143 
1144   info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
1145   if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED))
1146     maria_extra(file, HA_EXTRA_WAIT_LOCK, 0);
1147   if ((data_file_type= file->s->data_file_type) != STATIC_RECORD)
1148     int_table_flags |= HA_REC_NOT_IN_SEQ;
1149   if (!file->s->base.born_transactional)
1150   {
1151     /*
1152       INSERT DELAYED cannot work with transactional tables (because it cannot
1153       stand up to "when client gets ok the data is safe on disk": the record
1154       may not even be inserted). In the future, we could enable it back (as a
1155       client doing INSERT DELAYED knows the specificities; but we then should
1156       make sure to regularly commit in the delayed_insert thread).
1157     */
1158     int_table_flags|= HA_CAN_INSERT_DELAYED;
1159   }
1160   if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
1161     int_table_flags |= HA_HAS_NEW_CHECKSUM;
1162 
1163   /*
1164     For static size rows, tell MariaDB that we will access all bytes
1165     in the record when writing it.  This signals MariaDB to initialize
1166     the full row to ensure we don't get any errors from valgrind and
1167     that all bytes in the row is properly reset.
1168   */
1169   if (file->s->data_file_type == STATIC_RECORD &&
1170       (file->s->has_varchar_fields || file->s->has_null_fields))
1171     int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE;
1172 
1173   for (i= 0; i < table->s->keys; i++)
1174   {
1175     plugin_ref parser= table->key_info[i].parser;
1176     if (table->key_info[i].flags & HA_USES_PARSER)
1177       file->s->keyinfo[i].parser=
1178         (struct st_mysql_ftparser *)plugin_decl(parser)->info;
1179     table->key_info[i].block_size= file->s->keyinfo[i].block_length;
1180   }
1181   my_errno= 0;
1182 
1183   /* Count statistics of usage for newly open normal files */
1184   if (file->s->reopen == 1 && ! (test_if_locked & HA_OPEN_TMP_TABLE))
1185   {
1186     if (file->s->delay_key_write)
1187       feature_files_opened_with_delayed_keys++;
1188   }
1189 
1190   return my_errno;
1191 }
1192 
1193 
close(void)1194 int ha_maria::close(void)
1195 {
1196   MARIA_HA *tmp= file;
1197   if (!tmp)
1198     return 0;
1199   file= 0;
1200   return maria_close(tmp);
1201 }
1202 
1203 
write_row(uchar * buf)1204 int ha_maria::write_row(uchar * buf)
1205 {
1206   /*
1207      If we have an auto_increment column and we are writing a changed row
1208      or a new row, then update the auto_increment value in the record.
1209   */
1210   if (table->next_number_field && buf == table->record[0])
1211   {
1212     int error;
1213     if ((error= update_auto_increment()))
1214       return error;
1215   }
1216   return maria_write(file, buf);
1217 }
1218 
1219 
check(THD * thd,HA_CHECK_OPT * check_opt)1220 int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
1221 {
1222   int error;
1223   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1224   MARIA_SHARE *share= file->s;
1225   const char *old_proc_info;
1226   TRN *old_trn= file->trn;
1227 
1228   if (!file || !param) return HA_ADMIN_INTERNAL_ERROR;
1229 
1230   unmap_file(file);
1231   maria_chk_init(param);
1232   param->thd= thd;
1233   param->op_name= "check";
1234   param->db_name= table->s->db.str;
1235   param->table_name= table->alias.c_ptr();
1236   param->testflag= check_opt->flags | T_CHECK | T_SILENT;
1237   param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1238 
1239   if (!(table->db_stat & HA_READ_ONLY))
1240     param->testflag |= T_STATISTICS;
1241   param->using_global_keycache= 1;
1242 
1243   if (!maria_is_crashed(file) &&
1244       (((param->testflag & T_CHECK_ONLY_CHANGED) &&
1245         !(share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS |
1246                                   STATE_IN_REPAIR)) &&
1247         share->state.open_count == 0) ||
1248        ((param->testflag & T_FAST) && (share->state.open_count ==
1249                                       (uint) (share->global_changed ? 1 :
1250                                               0)))))
1251     return HA_ADMIN_ALREADY_DONE;
1252 
1253   maria_chk_init_for_check(param, file);
1254 
1255   if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
1256       STATE_MOVED)
1257   {
1258     _ma_check_print_error(param, "%s", zerofill_error_msg);
1259     return HA_ADMIN_CORRUPT;
1260   }
1261 
1262   old_proc_info= thd_proc_info(thd, "Checking status");
1263   thd_progress_init(thd, 3);
1264   error= maria_chk_status(param, file);                // Not fatal
1265   /* maria_chk_size() will flush the page cache for this file */
1266   if (maria_chk_size(param, file))
1267     error= 1;
1268   if (!error)
1269     error|= maria_chk_del(param, file, param->testflag);
1270   thd_proc_info(thd, "Checking keys");
1271   thd_progress_next_stage(thd);
1272   if (!error)
1273     error= maria_chk_key(param, file);
1274   thd_proc_info(thd, "Checking data");
1275   thd_progress_next_stage(thd);
1276   if (!error)
1277   {
1278     if ((!(param->testflag & T_QUICK) &&
1279          ((share->options &
1280            (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ||
1281           (param->testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file))
1282     {
1283       ulonglong old_testflag= param->testflag;
1284       param->testflag |= T_MEDIUM;
1285       if (!(error= init_io_cache(&param->read_cache, file->dfile.file,
1286                                  my_default_record_cache_size, READ_CACHE,
1287                                  share->pack.header_length, 1, MYF(MY_WME))))
1288       {
1289         error= maria_chk_data_link(param, file,
1290                                    MY_TEST(param->testflag & T_EXTEND));
1291         end_io_cache(&param->read_cache);
1292       }
1293       param->testflag= old_testflag;
1294     }
1295   }
1296   if (!error)
1297   {
1298     if ((share->state.changed & (STATE_CHANGED |
1299                                  STATE_CRASHED_FLAGS |
1300                                  STATE_IN_REPAIR | STATE_NOT_ANALYZED)) ||
1301         (param->testflag & T_STATISTICS) || maria_is_crashed(file))
1302     {
1303       file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1304       mysql_mutex_lock(&share->intern_lock);
1305       DBUG_PRINT("info", ("Reseting crashed state"));
1306       share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
1307                                STATE_IN_REPAIR);
1308       if (!(table->db_stat & HA_READ_ONLY))
1309         error= maria_update_state_info(param, file,
1310                                        UPDATE_TIME | UPDATE_OPEN_COUNT |
1311                                        UPDATE_STAT);
1312       mysql_mutex_unlock(&share->intern_lock);
1313       info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
1314            HA_STATUS_CONST);
1315     }
1316   }
1317   else if (!maria_is_crashed(file) && !thd->killed)
1318   {
1319     maria_mark_crashed(file);
1320     file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1321   }
1322 
1323   /* Reset trn, that may have been set by repair */
1324   if (old_trn && old_trn != file->trn)
1325     _ma_set_trn_for_table(file, old_trn);
1326   thd_proc_info(thd, old_proc_info);
1327   thd_progress_end(thd);
1328   return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
1329 }
1330 
1331 
1332 /*
1333   Analyze the key distribution in the table
1334   As the table may be only locked for read, we have to take into account that
1335   two threads may do an analyze at the same time!
1336 */
1337 
analyze(THD * thd,HA_CHECK_OPT * check_opt)1338 int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
1339 {
1340   int error= 0;
1341   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1342   MARIA_SHARE *share= file->s;
1343   const char *old_proc_info;
1344 
1345   if (!param)
1346     return HA_ADMIN_INTERNAL_ERROR;
1347 
1348   maria_chk_init(param);
1349   param->thd= thd;
1350   param->op_name= "analyze";
1351   param->db_name= table->s->db.str;
1352   param->table_name= table->alias.c_ptr();
1353   param->testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
1354                    T_DONT_CHECK_CHECKSUM);
1355   param->using_global_keycache= 1;
1356   param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1357 
1358   if (!(share->state.changed & STATE_NOT_ANALYZED))
1359     return HA_ADMIN_ALREADY_DONE;
1360 
1361   old_proc_info= thd_proc_info(thd, "Scanning");
1362   thd_progress_init(thd, 1);
1363   error= maria_chk_key(param, file);
1364   if (!error)
1365   {
1366     mysql_mutex_lock(&share->intern_lock);
1367     error= maria_update_state_info(param, file, UPDATE_STAT);
1368     mysql_mutex_unlock(&share->intern_lock);
1369   }
1370   else if (!maria_is_crashed(file) && !thd->killed)
1371     maria_mark_crashed(file);
1372   thd_proc_info(thd, old_proc_info);
1373   thd_progress_end(thd);
1374   return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
1375 }
1376 
repair(THD * thd,HA_CHECK_OPT * check_opt)1377 int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
1378 {
1379   int error;
1380   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1381   ha_rows start_records;
1382   const char *old_proc_info;
1383 
1384   if (!file || !param)
1385     return HA_ADMIN_INTERNAL_ERROR;
1386 
1387   maria_chk_init(param);
1388   param->thd= thd;
1389   param->op_name= "repair";
1390   param->testflag= ((check_opt->flags & ~(T_EXTEND)) |
1391                    T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
1392                    (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
1393   param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
1394   param->backup_time= check_opt->start_time;
1395   start_records= file->state->records;
1396   old_proc_info= thd_proc_info(thd, "Checking table");
1397   thd_progress_init(thd, 1);
1398   while ((error= repair(thd, param, 0)) && param->retry_repair)
1399   {
1400     param->retry_repair= 0;
1401     file->state->records= start_records;
1402     if (test_all_bits(param->testflag,
1403                       (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK)))
1404     {
1405       param->testflag&= ~(T_RETRY_WITHOUT_QUICK | T_QUICK);
1406       /* Ensure we don't loose any rows when retrying without quick */
1407       param->testflag|= T_SAFE_REPAIR;
1408       if (thd->vio_ok())
1409         _ma_check_print_info(param, "Retrying repair without quick");
1410       else
1411         sql_print_information("Retrying repair of: '%s' without quick",
1412                               table->s->path.str);
1413       continue;
1414     }
1415     param->testflag &= ~T_QUICK;
1416     if (param->testflag & T_REP_BY_SORT)
1417     {
1418       param->testflag= (param->testflag & ~T_REP_BY_SORT) | T_REP;
1419       if (thd->vio_ok())
1420         _ma_check_print_info(param, "Retrying repair with keycache");
1421       sql_print_information("Retrying repair of: '%s' with keycache",
1422                             table->s->path.str);
1423       continue;
1424     }
1425     break;
1426   }
1427   if (!error && start_records != file->state->records &&
1428       !(check_opt->flags & T_VERY_SILENT))
1429   {
1430     char llbuff[22], llbuff2[22];
1431     sql_print_information("Found %s of %s rows when repairing '%s'",
1432                           llstr(file->state->records, llbuff),
1433                           llstr(start_records, llbuff2),
1434                           table->s->path.str);
1435   }
1436   thd_proc_info(thd, old_proc_info);
1437   thd_progress_end(thd);
1438   return error;
1439 }
1440 
zerofill(THD * thd,HA_CHECK_OPT * check_opt)1441 int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt)
1442 {
1443   int error;
1444   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1445   TRN *old_trn;
1446   MARIA_SHARE *share= file->s;
1447 
1448   if (!file || !param)
1449     return HA_ADMIN_INTERNAL_ERROR;
1450 
1451   unmap_file(file);
1452   old_trn= file->trn;
1453   maria_chk_init(param);
1454   param->thd= thd;
1455   param->op_name= "zerofill";
1456   param->testflag= check_opt->flags | T_SILENT | T_ZEROFILL;
1457   param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
1458   error=maria_zerofill(param, file, share->open_file_name.str);
1459 
1460   /* Reset trn, that may have been set by repair */
1461   if (old_trn && old_trn != file->trn)
1462     _ma_set_trn_for_table(file, old_trn);
1463 
1464   if (!error)
1465   {
1466     TrID create_trid= trnman_get_min_safe_trid();
1467     mysql_mutex_lock(&share->intern_lock);
1468     share->state.changed|= STATE_NOT_MOVABLE;
1469     maria_update_state_info(param, file, UPDATE_TIME | UPDATE_OPEN_COUNT);
1470     _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE, create_trid,
1471                               TRUE, TRUE);
1472     mysql_mutex_unlock(&share->intern_lock);
1473   }
1474   return error;
1475 }
1476 
optimize(THD * thd,HA_CHECK_OPT * check_opt)1477 int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
1478 {
1479   int error;
1480   HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1481 
1482   if (!file || !param)
1483     return HA_ADMIN_INTERNAL_ERROR;
1484 
1485   maria_chk_init(param);
1486   param->thd= thd;
1487   param->op_name= "optimize";
1488   param->testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
1489                    T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
1490   param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
1491   thd_progress_init(thd, 1);
1492   if ((error= repair(thd, param, 1)) && param->retry_repair)
1493   {
1494     sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
1495                       my_errno, param->db_name, param->table_name);
1496     param->testflag &= ~T_REP_BY_SORT;
1497     error= repair(thd, param, 0);
1498   }
1499   thd_progress_end(thd);
1500   return error;
1501 }
1502 
1503 
repair(THD * thd,HA_CHECK * param,bool do_optimize)1504 int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
1505 {
1506   int error= 0;
1507   ulonglong local_testflag= param->testflag;
1508   bool optimize_done= !do_optimize, statistics_done= 0, full_repair_done= 0;
1509   const char *old_proc_info= thd->proc_info;
1510   char fixed_name[FN_REFLEN];
1511   MARIA_SHARE *share= file->s;
1512   ha_rows rows= file->state->records;
1513   TRN *old_trn= file->trn;
1514   my_bool locking= 0;
1515   DBUG_ENTER("ha_maria::repair");
1516 
1517   /*
1518     Normally this method is entered with a properly opened table. If the
1519     repair fails, it can be repeated with more elaborate options. Under
1520     special circumstances it can happen that a repair fails so that it
1521     closed the data file and cannot re-open it. In this case file->dfile
1522     is set to -1. We must not try another repair without an open data
1523     file. (Bug #25289)
1524   */
1525   if (file->dfile.file == -1)
1526   {
1527     sql_print_information("Retrying repair of: '%s' failed. "
1528                           "Please try REPAIR EXTENDED or aria_chk",
1529                           table->s->path.str);
1530     DBUG_RETURN(HA_ADMIN_FAILED);
1531   }
1532 
1533   /*
1534     If transactions was not enabled for a transactional table then
1535     file->s->status is not up to date. This is needed for repair_by_sort
1536     to work
1537   */
1538   if (share->base.born_transactional && !share->now_transactional)
1539     _ma_copy_nontrans_state_information(file);
1540 
1541   param->db_name= table->s->db.str;
1542   param->table_name= table->alias.c_ptr();
1543   param->tmpfile_createflag= O_RDWR | O_TRUNC;
1544   param->using_global_keycache= 1;
1545   param->thd= thd;
1546   param->tmpdir= &mysql_tmpdir_list;
1547   param->out_flag= 0;
1548   share->state.dupp_key= MI_MAX_KEY;
1549   strmov(fixed_name, share->open_file_name.str);
1550   unmap_file(file);
1551 
1552   /*
1553     Don't lock tables if we have used LOCK TABLE or if we come from
1554     enable_index()
1555   */
1556   if (!thd->locked_tables_mode && ! (param->testflag & T_NO_LOCKS))
1557   {
1558     locking= 1;
1559     if (maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK))
1560     {
1561       _ma_check_print_error(param, ER_THD(thd, ER_CANT_LOCK), my_errno);
1562       DBUG_RETURN(HA_ADMIN_FAILED);
1563     }
1564   }
1565 
1566   if (!do_optimize ||
1567       (((share->data_file_type == BLOCK_RECORD) ?
1568         (share->state.changed & STATE_NOT_OPTIMIZED_ROWS) :
1569         (file->state->del ||
1570          share->state.split != file->state->records)) &&
1571        (!(param->testflag & T_QUICK) ||
1572         (share->state.changed & (STATE_NOT_OPTIMIZED_KEYS |
1573                                  STATE_NOT_OPTIMIZED_ROWS)))))
1574   {
1575     ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ?
1576                         maria_get_mask_all_keys_active(share->base.keys) :
1577                         share->state.key_map);
1578     ulonglong save_testflag= param->testflag;
1579     if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) &&
1580         (local_testflag & T_REP_BY_SORT))
1581     {
1582       local_testflag |= T_STATISTICS;
1583       param->testflag |= T_STATISTICS;           // We get this for free
1584       statistics_done= 1;
1585       /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */
1586       if (THDVAR(thd,repair_threads) > 1 &&
1587           share->data_file_type != BLOCK_RECORD)
1588       {
1589         char buf[40];
1590         /* TODO: respect maria_repair_threads variable */
1591         my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map));
1592         thd_proc_info(thd, buf);
1593         param->testflag|= T_REP_PARALLEL;
1594         error= maria_repair_parallel(param, file, fixed_name,
1595                                      MY_TEST(param->testflag & T_QUICK));
1596         /* to reset proc_info, as it was pointing to local buffer */
1597         thd_proc_info(thd, "Repair done");
1598       }
1599       else
1600       {
1601         thd_proc_info(thd, "Repair by sorting");
1602         param->testflag|= T_REP_BY_SORT;
1603         error= maria_repair_by_sort(param, file, fixed_name,
1604                                     MY_TEST(param->testflag & T_QUICK));
1605       }
1606       if (error && file->create_unique_index_by_sort &&
1607           share->state.dupp_key != MAX_KEY)
1608       {
1609         my_errno= HA_ERR_FOUND_DUPP_KEY;
1610         print_keydup_error(table, &table->key_info[share->state.dupp_key],
1611                            MYF(0));
1612       }
1613     }
1614     else
1615     {
1616       thd_proc_info(thd, "Repair with keycache");
1617       param->testflag &= ~(T_REP_BY_SORT | T_REP_PARALLEL);
1618       error= maria_repair(param, file, fixed_name,
1619                           MY_TEST(param->testflag & T_QUICK));
1620     }
1621     param->testflag= save_testflag | (param->testflag & T_RETRY_WITHOUT_QUICK);
1622     optimize_done= 1;
1623     /*
1624       set full_repair_done if we re-wrote all rows and all keys
1625       (and thus removed all transid's from the table
1626     */
1627     full_repair_done= !MY_TEST(param->testflag & T_QUICK);
1628   }
1629   if (!error)
1630   {
1631     if ((local_testflag & T_SORT_INDEX) &&
1632         (share->state.changed & STATE_NOT_SORTED_PAGES))
1633     {
1634       optimize_done= 1;
1635       thd_proc_info(thd, "Sorting index");
1636       error= maria_sort_index(param, file, fixed_name);
1637     }
1638     if (!error && !statistics_done && (local_testflag & T_STATISTICS))
1639     {
1640       if (share->state.changed & STATE_NOT_ANALYZED)
1641       {
1642         optimize_done= 1;
1643         thd_proc_info(thd, "Analyzing");
1644         error= maria_chk_key(param, file);
1645       }
1646       else
1647         local_testflag &= ~T_STATISTICS;        // Don't update statistics
1648     }
1649   }
1650   thd_proc_info(thd, "Saving state");
1651   if (full_repair_done && !error &&
1652       !(param->testflag & T_NO_CREATE_RENAME_LSN))
1653   {
1654     /* Set trid (needed if the table was moved from another system) */
1655     share->state.create_trid= trnman_get_min_safe_trid();
1656   }
1657   mysql_mutex_lock(&share->intern_lock);
1658   if (!error)
1659   {
1660     if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file))
1661     {
1662       DBUG_PRINT("info", ("Reseting crashed state"));
1663       share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
1664                                STATE_IN_REPAIR | STATE_MOVED);
1665       file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1666     }
1667     /*
1668       repair updates share->state.state. Ensure that file->state is up to date
1669     */
1670     if (file->state != &share->state.state)
1671       *file->state= share->state.state;
1672 
1673     if (share->base.auto_key)
1674       _ma_update_auto_increment_key(param, file, 1);
1675     if (optimize_done)
1676       error= maria_update_state_info(param, file,
1677                                      UPDATE_TIME | UPDATE_OPEN_COUNT |
1678                                      (local_testflag &
1679                                       T_STATISTICS ? UPDATE_STAT : 0));
1680     /* File is repaired; Mark the file as moved to this system */
1681     (void) _ma_set_uuid(share, 0);
1682 
1683     info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
1684          HA_STATUS_CONST);
1685     if (rows != file->state->records && !(param->testflag & T_VERY_SILENT))
1686     {
1687       char llbuff[22], llbuff2[22];
1688       _ma_check_print_warning(param, "Number of rows changed from %s to %s",
1689                               llstr(rows, llbuff),
1690                               llstr(file->state->records, llbuff2));
1691     }
1692   }
1693   else
1694   {
1695     maria_mark_crashed_on_repair(file);
1696     file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
1697     maria_update_state_info(param, file, 0);
1698   }
1699   mysql_mutex_unlock(&share->intern_lock);
1700   thd_proc_info(thd, old_proc_info);
1701   thd_progress_end(thd);                        // Mark done
1702   if (locking)
1703     maria_lock_database(file, F_UNLCK);
1704 
1705   /* Reset trn, that may have been set by repair */
1706   if (old_trn && old_trn != file->trn)
1707     _ma_set_trn_for_table(file, old_trn);
1708   error= error ? HA_ADMIN_FAILED :
1709     (optimize_done ?
1710      (write_log_record_for_repair(param, file) ? HA_ADMIN_FAILED :
1711       HA_ADMIN_OK) : HA_ADMIN_ALREADY_DONE);
1712   DBUG_RETURN(error);
1713 }
1714 
1715 
1716 /*
1717   Assign table indexes to a specific key cache.
1718 */
1719 
assign_to_keycache(THD * thd,HA_CHECK_OPT * check_opt)1720 int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt)
1721 {
1722 #if 0 && NOT_IMPLEMENTED
1723   PAGECACHE *new_pagecache= check_opt->pagecache;
1724   const char *errmsg= 0;
1725   int error= HA_ADMIN_OK;
1726   ulonglong map;
1727   TABLE_LIST *table_list= table->pos_in_table_list;
1728   DBUG_ENTER("ha_maria::assign_to_keycache");
1729 
1730   table->keys_in_use_for_query.clear_all();
1731 
1732   if (table_list->process_index_hints(table))
1733     DBUG_RETURN(HA_ADMIN_FAILED);
1734   map= ~(ulonglong) 0;
1735   if (!table->keys_in_use_for_query.is_clear_all())
1736     /* use all keys if there's no list specified by the user through hints */
1737     map= table->keys_in_use_for_query.to_ulonglong();
1738 
1739   if ((error= maria_assign_to_pagecache(file, map, new_pagecache)))
1740   {
1741     char buf[STRING_BUFFER_USUAL_SIZE];
1742     my_snprintf(buf, sizeof(buf),
1743                 "Failed to flush to index file (errno: %d)", error);
1744     errmsg= buf;
1745     error= HA_ADMIN_CORRUPT;
1746   }
1747 
1748   if (error != HA_ADMIN_OK)
1749   {
1750     /* Send error to user */
1751     HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1752     if (!param)
1753       return HA_ADMIN_INTERNAL_ERROR;
1754 
1755     maria_chk_init(param);
1756     param->thd= thd;
1757     param->op_name= "assign_to_keycache";
1758     param->db_name= table->s->db.str;
1759     param->table_name= table->s->table_name.str;
1760     param->testflag= 0;
1761     _ma_check_print_error(param, errmsg);
1762   }
1763   DBUG_RETURN(error);
1764 #else
1765   return  HA_ADMIN_NOT_IMPLEMENTED;
1766 #endif
1767 }
1768 
1769 
1770 /*
1771   Preload pages of the index file for a table into the key cache.
1772 */
1773 
preload_keys(THD * thd,HA_CHECK_OPT * check_opt)1774 int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt)
1775 {
1776   ulonglong map;
1777   TABLE_LIST *table_list= table->pos_in_table_list;
1778 
1779   DBUG_ENTER("ha_maria::preload_keys");
1780 
1781   table->keys_in_use_for_query.clear_all();
1782 
1783   if (table_list->process_index_hints(table))
1784     DBUG_RETURN(HA_ADMIN_FAILED);
1785 
1786   map= ~(ulonglong) 0;
1787   /* Check validity of the index references */
1788   if (!table->keys_in_use_for_query.is_clear_all())
1789     /* use all keys if there's no list specified by the user through hints */
1790     map= table->keys_in_use_for_query.to_ulonglong();
1791 
1792   maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE,
1793               (void*) &thd->variables.preload_buff_size);
1794 
1795   int error;
1796 
1797   if ((error= maria_preload(file, map, table_list->ignore_leaves)))
1798   {
1799     char buf[MYSQL_ERRMSG_SIZE+20];
1800     const char *errmsg;
1801 
1802     switch (error) {
1803     case HA_ERR_NON_UNIQUE_BLOCK_SIZE:
1804       errmsg= "Indexes use different block sizes";
1805       break;
1806     case HA_ERR_OUT_OF_MEM:
1807       errmsg= "Failed to allocate buffer";
1808       break;
1809     default:
1810       my_snprintf(buf, sizeof(buf),
1811                   "Failed to read from index file (errno: %d)", my_errno);
1812       errmsg= buf;
1813     }
1814 
1815     HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1816     if (!param)
1817       return HA_ADMIN_INTERNAL_ERROR;
1818 
1819     maria_chk_init(param);
1820     param->thd= thd;
1821     param->op_name= "preload_keys";
1822     param->db_name= table->s->db.str;
1823     param->table_name= table->s->table_name.str;
1824     param->testflag= 0;
1825     _ma_check_print_error(param, "%s", errmsg);
1826     DBUG_RETURN(HA_ADMIN_FAILED);
1827   }
1828   DBUG_RETURN(HA_ADMIN_OK);
1829 }
1830 
1831 
1832 /*
1833   Disable indexes, making it persistent if requested.
1834 
1835   SYNOPSIS
1836     disable_indexes()
1837     mode        mode of operation:
1838                 HA_KEY_SWITCH_NONUNIQ      disable all non-unique keys
1839                 HA_KEY_SWITCH_ALL          disable all keys
1840                 HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent
1841                 HA_KEY_SWITCH_ALL_SAVE     dis. all keys and make persistent
1842 
1843   IMPLEMENTATION
1844     HA_KEY_SWITCH_NONUNIQ       is not implemented.
1845     HA_KEY_SWITCH_ALL_SAVE      is not implemented.
1846 
1847   RETURN
1848     0  ok
1849     HA_ERR_WRONG_COMMAND  mode not implemented.
1850 */
1851 
disable_indexes(uint mode)1852 int ha_maria::disable_indexes(uint mode)
1853 {
1854   int error;
1855 
1856   if (mode == HA_KEY_SWITCH_ALL)
1857   {
1858     /* call a storage engine function to switch the key map */
1859     error= maria_disable_indexes(file);
1860   }
1861   else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
1862   {
1863     maria_extra(file, HA_EXTRA_NO_KEYS, 0);
1864     info(HA_STATUS_CONST);                      // Read new key info
1865     error= 0;
1866   }
1867   else
1868   {
1869     /* mode not implemented */
1870     error= HA_ERR_WRONG_COMMAND;
1871   }
1872   return error;
1873 }
1874 
1875 
1876 /*
1877   Enable indexes, making it persistent if requested.
1878 
1879   SYNOPSIS
1880     enable_indexes()
1881     mode        mode of operation:
1882                 HA_KEY_SWITCH_NONUNIQ      enable all non-unique keys
1883                 HA_KEY_SWITCH_ALL          enable all keys
1884                 HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent
1885                 HA_KEY_SWITCH_ALL_SAVE     en. all keys and make persistent
1886 
1887   DESCRIPTION
1888     Enable indexes, which might have been disabled by disable_index() before.
1889     The modes without _SAVE work only if both data and indexes are empty,
1890     since the MARIA repair would enable them persistently.
1891     To be sure in these cases, call handler::delete_all_rows() before.
1892 
1893   IMPLEMENTATION
1894     HA_KEY_SWITCH_NONUNIQ       is not implemented.
1895     HA_KEY_SWITCH_ALL_SAVE      is not implemented.
1896 
1897   RETURN
1898     0  ok
1899     !=0  Error, among others:
1900     HA_ERR_CRASHED  data or index is non-empty. Delete all rows and retry.
1901     HA_ERR_WRONG_COMMAND  mode not implemented.
1902 */
1903 
enable_indexes(uint mode)1904 int ha_maria::enable_indexes(uint mode)
1905 {
1906   int error;
1907   ha_rows start_rows= file->state->records;
1908   DBUG_PRINT("info", ("ha_maria::enable_indexes mode: %d", mode));
1909   if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys))
1910   {
1911     /* All indexes are enabled already. */
1912     return 0;
1913   }
1914 
1915   if (mode == HA_KEY_SWITCH_ALL)
1916   {
1917     error= maria_enable_indexes(file);
1918     /*
1919        Do not try to repair on error,
1920        as this could make the enabled state persistent,
1921        but mode==HA_KEY_SWITCH_ALL forbids it.
1922     */
1923   }
1924   else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
1925   {
1926     THD *thd= table->in_use;
1927     HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
1928     if (!param)
1929       return HA_ADMIN_INTERNAL_ERROR;
1930 
1931     const char *save_proc_info= thd_proc_info(thd, "Creating index");
1932 
1933     maria_chk_init(param);
1934     param->op_name= "recreating_index";
1935     param->testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK |
1936                      T_CREATE_MISSING_KEYS | T_SAFE_REPAIR);
1937     /*
1938       Don't lock and unlock table if it's locked.
1939       Normally table should be locked.  This test is mostly for safety.
1940     */
1941     if (likely(file->lock_type != F_UNLCK))
1942       param->testflag|= T_NO_LOCKS;
1943 
1944     if (file->create_unique_index_by_sort)
1945       param->testflag|= T_CREATE_UNIQUE_BY_SORT;
1946 
1947     if (bulk_insert_single_undo == BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)
1948     {
1949       bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_REPAIR;
1950       /*
1951         Don't bump create_rename_lsn, because UNDO_BULK_INSERT
1952         should not be skipped in case of crash during repair.
1953       */
1954       param->testflag|= T_NO_CREATE_RENAME_LSN;
1955     }
1956 
1957     param->myf_rw &= ~MY_WAIT_IF_FULL;
1958     param->sort_buffer_length= THDVAR(thd,sort_buffer_size);
1959     param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
1960     param->tmpdir= &mysql_tmpdir_list;
1961     if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param->retry_repair)
1962     {
1963       sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, "
1964                         "retrying",
1965                         my_errno, param->db_name, param->table_name);
1966       /* This should never fail normally */
1967       DBUG_ASSERT(thd->killed != 0);
1968       /* Repairing by sort failed. Now try standard repair method. */
1969       param->testflag &= ~T_REP_BY_SORT;
1970       file->state->records= start_rows;
1971       error= (repair(thd, param, 0) != HA_ADMIN_OK);
1972       /*
1973         If the standard repair succeeded, clear all error messages which
1974         might have been set by the first repair. They can still be seen
1975         with SHOW WARNINGS then.
1976       */
1977       if (!error)
1978         thd->clear_error();
1979     }
1980     info(HA_STATUS_CONST);
1981     thd_proc_info(thd, save_proc_info);
1982   }
1983   else
1984   {
1985     /* mode not implemented */
1986     error= HA_ERR_WRONG_COMMAND;
1987   }
1988   DBUG_EXECUTE_IF("maria_flush_whole_log",
1989                   {
1990                     DBUG_PRINT("maria_flush_whole_log", ("now"));
1991                     translog_flush(translog_get_horizon());
1992                   });
1993   DBUG_EXECUTE_IF("maria_crash_enable_index",
1994                   {
1995                     DBUG_PRINT("maria_crash_enable_index", ("now"));
1996                     DBUG_SUICIDE();
1997                   });
1998   return error;
1999 }
2000 
2001 
2002 /*
2003   Test if indexes are disabled.
2004 
2005 
2006   SYNOPSIS
2007     indexes_are_disabled()
2008       no parameters
2009 
2010 
2011   RETURN
2012     0  indexes are not disabled
2013     1  all indexes are disabled
2014    [2  non-unique indexes are disabled - NOT YET IMPLEMENTED]
2015 */
2016 
indexes_are_disabled(void)2017 int ha_maria::indexes_are_disabled(void)
2018 {
2019   return maria_indexes_are_disabled(file);
2020 }
2021 
2022 
2023 /*
2024   prepare for a many-rows insert operation
2025   e.g. - disable indexes (if they can be recreated fast) or
2026   activate special bulk-insert optimizations
2027 
2028   SYNOPSIS
2029    start_bulk_insert(rows, flags)
2030    rows        Rows to be inserted
2031                 0 if we don't know
2032    flags       Flags to control index creation
2033 
2034   NOTICE
2035     Do not forget to call end_bulk_insert() later!
2036 */
2037 
start_bulk_insert(ha_rows rows,uint flags)2038 void ha_maria::start_bulk_insert(ha_rows rows, uint flags)
2039 {
2040   DBUG_ENTER("ha_maria::start_bulk_insert");
2041   THD *thd= table->in_use;
2042   MARIA_SHARE *share= file->s;
2043   DBUG_PRINT("info", ("start_bulk_insert: rows %lu", (ulong) rows));
2044 
2045   /* don't enable row cache if too few rows */
2046   if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE))
2047   {
2048     ulonglong size= thd->variables.read_buff_size, tmp;
2049     if (rows)
2050     {
2051       if (file->state->records)
2052       {
2053         MARIA_INFO maria_info;
2054         maria_status(file, &maria_info, HA_STATUS_NO_LOCK |HA_STATUS_VARIABLE);
2055         set_if_smaller(size, maria_info.mean_reclength * rows);
2056       }
2057       else if (table->s->avg_row_length)
2058         set_if_smaller(size, (size_t) (table->s->avg_row_length * rows));
2059     }
2060     tmp= (ulong) size;                          // Safe becasue of limits
2061     maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &tmp);
2062   }
2063 
2064   can_enable_indexes= (maria_is_all_keys_active(share->state.key_map,
2065                                                 share->base.keys));
2066   bulk_insert_single_undo= BULK_INSERT_NONE;
2067 
2068   if (!(specialflag & SPECIAL_SAFE_MODE))
2069   {
2070     /*
2071        Only disable old index if the table was empty and we are inserting
2072        a lot of rows.
2073        We should not do this for only a few rows as this is slower and
2074        we don't want to update the key statistics based of only a few rows.
2075        Index file rebuild requires an exclusive lock, so if versioning is on
2076        don't do it (see how ha_maria::store_lock() tries to predict repair).
2077        We can repair index only if we have an exclusive (TL_WRITE) lock or
2078        if this is inside an ALTER TABLE, in which case lock_type == TL_UNLOCK.
2079 
2080        To see if table is empty, we shouldn't rely on the old record
2081        count from our transaction's start (if that old count is 0 but
2082        now there are records in the table, we would wrongly destroy
2083        them).  So we need to look at share->state.state.records.  As a
2084        safety net for now, we don't remove the test of
2085        file->state->records, because there is uncertainty on what will
2086        happen during repair if the two states disagree.
2087 
2088        We also have to check in case of transactional tables that the
2089        user has not used LOCK TABLE on the table twice.
2090     */
2091     if ((file->state->records == 0) &&
2092         (share->state.state.records == 0) && can_enable_indexes &&
2093         (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) &&
2094         (file->lock.type == TL_WRITE || file->lock.type == TL_UNLOCK) &&
2095         (!share->have_versioning || !share->now_transactional ||
2096          file->used_tables->use_count == 1))
2097     {
2098       /**
2099          @todo for a single-row INSERT SELECT, we will go into repair, which
2100          is more costly (flushes, syncs) than a row write.
2101       */
2102       if (file->open_flags & HA_OPEN_INTERNAL_TABLE)
2103       {
2104         /* Internal table; If we get a duplicate something is very wrong */
2105         file->update|= HA_STATE_CHANGED;
2106         maria_clear_all_keys_active(file->s->state.key_map);
2107       }
2108       else
2109       {
2110         my_bool all_keys= MY_TEST(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
2111         maria_disable_indexes_for_rebuild(file, rows, all_keys);
2112       }
2113       if (share->now_transactional)
2114       {
2115         bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
2116         write_log_record_for_bulk_insert(file);
2117         _ma_tmp_disable_logging_for_table(file, TRUE);
2118         /*
2119           Pages currently in the page cache have type PAGECACHE_LSN_PAGE, we
2120           are not allowed to overwrite them with PAGECACHE_PLAIN_PAGE, so
2121           throw them away. It is not losing data, because we just wrote and
2122           forced an UNDO which will for sure empty the table if we crash. The
2123           upcoming unique-key insertions however need a proper index, so we
2124           cannot leave the corrupted on-disk index file, thus we truncate it.
2125         */
2126         maria_delete_all_rows(file);
2127       }
2128     }
2129     else if (!file->bulk_insert &&
2130              (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT))
2131     {
2132       maria_init_bulk_insert(file,
2133                              (size_t) thd->variables.bulk_insert_buff_size,
2134                              rows);
2135     }
2136   }
2137   DBUG_VOID_RETURN;
2138 }
2139 
2140 
2141 /*
2142   end special bulk-insert optimizations,
2143   which have been activated by start_bulk_insert().
2144 
2145   SYNOPSIS
2146     end_bulk_insert()
2147     no arguments
2148 
2149   RETURN
2150     0     OK
2151     != 0  Error
2152 */
2153 
end_bulk_insert()2154 int ha_maria::end_bulk_insert()
2155 {
2156   int first_error, error;
2157   my_bool abort= file->s->deleting;
2158   DBUG_ENTER("ha_maria::end_bulk_insert");
2159 
2160   if ((first_error= maria_end_bulk_insert(file, abort)))
2161     abort= 1;
2162 
2163   if ((error= maria_extra(file, HA_EXTRA_NO_CACHE, 0)))
2164   {
2165     first_error= first_error ? first_error : error;
2166     abort= 1;
2167   }
2168 
2169   if (!abort && can_enable_indexes)
2170     if ((error= enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE)))
2171       first_error= first_error ? first_error : error;
2172 
2173   if (bulk_insert_single_undo != BULK_INSERT_NONE)
2174   {
2175     DBUG_ASSERT(can_enable_indexes);
2176     /*
2177       Table was transactional just before start_bulk_insert().
2178       No need to flush pages if we did a repair (which already flushed).
2179     */
2180     if ((error= _ma_reenable_logging_for_table(file,
2181                                                bulk_insert_single_undo ==
2182                                                BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)))
2183       first_error= first_error ? first_error : error;
2184     bulk_insert_single_undo= BULK_INSERT_NONE;  // Safety
2185   }
2186   DBUG_RETURN(first_error);
2187 }
2188 
2189 
check_and_repair(THD * thd)2190 bool ha_maria::check_and_repair(THD *thd)
2191 {
2192   int error, crashed;
2193   HA_CHECK_OPT check_opt;
2194   const CSET_STRING query_backup= thd->query_string;
2195   DBUG_ENTER("ha_maria::check_and_repair");
2196 
2197   check_opt.init();
2198   check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
2199 
2200   error= 1;
2201   if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
2202       STATE_MOVED)
2203   {
2204     /* Remove error about crashed table */
2205     thd->get_stmt_da()->clear_warning_info(thd->query_id);
2206     push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
2207                         ER_CRASHED_ON_USAGE,
2208                         "Zerofilling moved table %s", table->s->path.str);
2209     sql_print_information("Zerofilling moved table:  '%s'",
2210                           table->s->path.str);
2211     if (!(error= zerofill(thd, &check_opt)))
2212       DBUG_RETURN(0);
2213   }
2214 
2215   /*
2216     if we got this far - the table is crashed.
2217     but don't auto-repair if maria_recover_options is not set
2218   */
2219   if (!maria_recover_options)
2220     DBUG_RETURN(error);
2221 
2222   error= 0;
2223   // Don't use quick if deleted rows
2224   if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK))
2225     check_opt.flags |= T_QUICK;
2226 
2227   thd->set_query((char*) table->s->table_name.str,
2228                  (uint) table->s->table_name.length, system_charset_info);
2229 
2230   if (!(crashed= maria_is_crashed(file)))
2231   {
2232     sql_print_warning("Checking table:   '%s'", table->s->path.str);
2233     crashed= check(thd, &check_opt);
2234   }
2235 
2236   if (crashed)
2237   {
2238     bool save_log_all_errors;
2239     sql_print_warning("Recovering table: '%s'", table->s->path.str);
2240     save_log_all_errors= thd->log_all_errors;
2241     thd->log_all_errors|= (thd->variables.log_warnings > 2);
2242     check_opt.flags=
2243       ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) |
2244        (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) |
2245        T_AUTO_REPAIR);
2246     if (repair(thd, &check_opt))
2247       error= 1;
2248     thd->log_all_errors= save_log_all_errors;
2249   }
2250   thd->set_query(query_backup);
2251   DBUG_RETURN(error);
2252 }
2253 
2254 
is_crashed() const2255 bool ha_maria::is_crashed() const
2256 {
2257   return (file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED) ||
2258           (my_disable_locking && file->s->state.open_count));
2259 }
2260 
2261 #define CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING(msg) \
2262   do { \
2263     if (file->lock.type == TL_WRITE_CONCURRENT_INSERT && !table->s->sequence) \
2264     { \
2265       my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), msg); \
2266       return 1; \
2267     } \
2268   } while(0)
2269 
update_row(const uchar * old_data,const uchar * new_data)2270 int ha_maria::update_row(const uchar * old_data, const uchar * new_data)
2271 {
2272   CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("UPDATE in WRITE CONCURRENT");
2273   return maria_update(file, old_data, new_data);
2274 }
2275 
2276 
delete_row(const uchar * buf)2277 int ha_maria::delete_row(const uchar * buf)
2278 {
2279   CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("DELETE in WRITE CONCURRENT");
2280   return maria_delete(file, buf);
2281 }
2282 
index_read_map(uchar * buf,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)2283 int ha_maria::index_read_map(uchar * buf, const uchar * key,
2284 			     key_part_map keypart_map,
2285 			     enum ha_rkey_function find_flag)
2286 {
2287   DBUG_ASSERT(inited == INDEX);
2288   int error= maria_rkey(file, buf, active_index, key, keypart_map, find_flag);
2289   return error;
2290 }
2291 
2292 
index_read_idx_map(uchar * buf,uint index,const uchar * key,key_part_map keypart_map,enum ha_rkey_function find_flag)2293 int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
2294 				 key_part_map keypart_map,
2295 				 enum ha_rkey_function find_flag)
2296 {
2297   int error;
2298   /* Use the pushed index condition if it matches the index we're scanning */
2299   end_range= NULL;
2300   if (index == pushed_idx_cond_keyno)
2301     ma_set_index_cond_func(file, handler_index_cond_check, this);
2302 
2303   error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
2304 
2305   ma_set_index_cond_func(file, NULL, 0);
2306   return error;
2307 }
2308 
2309 
index_read_last_map(uchar * buf,const uchar * key,key_part_map keypart_map)2310 int ha_maria::index_read_last_map(uchar * buf, const uchar * key,
2311 				  key_part_map keypart_map)
2312 {
2313   DBUG_ENTER("ha_maria::index_read_last_map");
2314   DBUG_ASSERT(inited == INDEX);
2315   int error= maria_rkey(file, buf, active_index, key, keypart_map,
2316                         HA_READ_PREFIX_LAST);
2317   DBUG_RETURN(error);
2318 }
2319 
2320 
index_next(uchar * buf)2321 int ha_maria::index_next(uchar * buf)
2322 {
2323   DBUG_ASSERT(inited == INDEX);
2324   int error= maria_rnext(file, buf, active_index);
2325   return error;
2326 }
2327 
2328 
index_prev(uchar * buf)2329 int ha_maria::index_prev(uchar * buf)
2330 {
2331   DBUG_ASSERT(inited == INDEX);
2332   int error= maria_rprev(file, buf, active_index);
2333   return error;
2334 }
2335 
2336 
index_first(uchar * buf)2337 int ha_maria::index_first(uchar * buf)
2338 {
2339   DBUG_ASSERT(inited == INDEX);
2340   int error= maria_rfirst(file, buf, active_index);
2341   return error;
2342 }
2343 
2344 
index_last(uchar * buf)2345 int ha_maria::index_last(uchar * buf)
2346 {
2347   DBUG_ASSERT(inited == INDEX);
2348   int error= maria_rlast(file, buf, active_index);
2349   return error;
2350 }
2351 
2352 
index_next_same(uchar * buf,const uchar * key,uint length)2353 int ha_maria::index_next_same(uchar * buf,
2354                               const uchar *key __attribute__ ((unused)),
2355                               uint length __attribute__ ((unused)))
2356 {
2357   int error;
2358   DBUG_ASSERT(inited == INDEX);
2359   /*
2360     TODO: Delete this loop in Maria 1.5 as versioning will ensure this never
2361     happens
2362   */
2363   do
2364   {
2365     error= maria_rnext_same(file,buf);
2366   } while (error == HA_ERR_RECORD_DELETED);
2367   return error;
2368 }
2369 
2370 
index_init(uint idx,bool sorted)2371 int ha_maria::index_init(uint idx, bool sorted)
2372 {
2373   active_index=idx;
2374   if (pushed_idx_cond_keyno == idx)
2375     ma_set_index_cond_func(file, handler_index_cond_check, this);
2376   return 0;
2377 }
2378 
2379 
index_end()2380 int ha_maria::index_end()
2381 {
2382   active_index=MAX_KEY;
2383   ma_set_index_cond_func(file, NULL, 0);
2384   in_range_check_pushed_down= FALSE;
2385   ds_mrr.dsmrr_close();
2386   return 0;
2387 }
2388 
2389 
rnd_init(bool scan)2390 int ha_maria::rnd_init(bool scan)
2391 {
2392   if (scan)
2393     return maria_scan_init(file);
2394   return maria_reset(file);                        // Free buffers
2395 }
2396 
2397 
rnd_end()2398 int ha_maria::rnd_end()
2399 {
2400   ds_mrr.dsmrr_close();
2401   /* Safe to call even if we don't have started a scan */
2402   maria_scan_end(file);
2403   return 0;
2404 }
2405 
2406 
rnd_next(uchar * buf)2407 int ha_maria::rnd_next(uchar *buf)
2408 {
2409   int error= maria_scan(file, buf);
2410   return error;
2411 }
2412 
2413 
remember_rnd_pos()2414 int ha_maria::remember_rnd_pos()
2415 {
2416   return (*file->s->scan_remember_pos)(file, &remember_pos);
2417 }
2418 
2419 
restart_rnd_next(uchar * buf)2420 int ha_maria::restart_rnd_next(uchar *buf)
2421 {
2422   int error;
2423   if ((error= (*file->s->scan_restore_pos)(file, remember_pos)))
2424     return error;
2425   return rnd_next(buf);
2426 }
2427 
2428 
rnd_pos(uchar * buf,uchar * pos)2429 int ha_maria::rnd_pos(uchar *buf, uchar *pos)
2430 {
2431   int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length));
2432   return error;
2433 }
2434 
2435 
position(const uchar * record)2436 void ha_maria::position(const uchar *record)
2437 {
2438   my_off_t row_position= maria_position(file);
2439   my_store_ptr(ref, ref_length, row_position);
2440 }
2441 
2442 
info(uint flag)2443 int ha_maria::info(uint flag)
2444 {
2445   MARIA_INFO maria_info;
2446   char name_buff[FN_REFLEN];
2447 
2448   (void) maria_status(file, &maria_info, flag);
2449   if (flag & HA_STATUS_VARIABLE)
2450   {
2451     stats.records=           maria_info.records;
2452     stats.deleted=           maria_info.deleted;
2453     stats.data_file_length=  maria_info.data_file_length;
2454     stats.index_file_length= maria_info.index_file_length;
2455     stats.delete_length=     maria_info.delete_length;
2456     stats.check_time=        maria_info.check_time;
2457     stats.mean_rec_length=   maria_info.mean_reclength;
2458     stats.checksum=          file->state->checksum;
2459   }
2460   if (flag & HA_STATUS_CONST)
2461   {
2462     TABLE_SHARE *share= table->s;
2463     stats.max_data_file_length=  maria_info.max_data_file_length;
2464     stats.max_index_file_length= maria_info.max_index_file_length;
2465     stats.create_time= maria_info.create_time;
2466     ref_length= maria_info.reflength;
2467     share->db_options_in_use= maria_info.options;
2468     stats.block_size= maria_block_size;
2469     stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = MY_MAX(sizeof(void *))
2470 
2471     /* Update share */
2472     share->keys_in_use.set_prefix(share->keys);
2473     share->keys_in_use.intersect_extended(maria_info.key_map);
2474     share->keys_for_keyread.intersect(share->keys_in_use);
2475     share->db_record_offset= maria_info.record_offset;
2476     if (share->key_parts)
2477     {
2478       ulong *to= table->key_info[0].rec_per_key, *end;
2479       double *from= maria_info.rec_per_key;
2480       for (end= to+ share->key_parts ; to < end ; to++, from++)
2481         *to= (ulong) (*from + 0.5);
2482     }
2483 
2484     /*
2485        Set data_file_name and index_file_name to point at the symlink value
2486        if table is symlinked (Ie;  Real name is not same as generated name)
2487     */
2488     data_file_name= index_file_name= 0;
2489     fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_DEXT,
2490               MY_APPEND_EXT | MY_UNPACK_FILENAME);
2491     if (strcmp(name_buff, maria_info.data_file_name))
2492       data_file_name =maria_info.data_file_name;
2493     fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_IEXT,
2494               MY_APPEND_EXT | MY_UNPACK_FILENAME);
2495     if (strcmp(name_buff, maria_info.index_file_name))
2496       index_file_name=maria_info.index_file_name;
2497   }
2498   if (flag & HA_STATUS_ERRKEY)
2499   {
2500     errkey= maria_info.errkey;
2501     my_store_ptr(dup_ref, ref_length, maria_info.dup_key_pos);
2502   }
2503   if (flag & HA_STATUS_TIME)
2504     stats.update_time= maria_info.update_time;
2505   if (flag & HA_STATUS_AUTO)
2506     stats.auto_increment_value= maria_info.auto_increment;
2507 
2508   return 0;
2509 }
2510 
2511 
extra(enum ha_extra_function operation)2512 int ha_maria::extra(enum ha_extra_function operation)
2513 {
2514   int tmp;
2515   TRN *old_trn= file->trn;
2516   if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
2517     return 0;
2518 #ifdef NOT_USED
2519   if (operation == HA_EXTRA_MMAP && !opt_maria_use_mmap)
2520     return 0;
2521 #endif
2522 
2523   /*
2524     We have to set file->trn here because in some cases we call
2525     extern_lock(F_UNLOCK) (which resets file->trn) followed by maria_close()
2526     without calling commit/rollback in between.  If file->trn is not set
2527     we can't remove file->share from the transaction list in the extra() call.
2528 
2529     In current code we don't have to do this for HA_EXTRA_PREPARE_FOR_RENAME
2530     as this is only used the intermediate table used by ALTER TABLE which
2531     is not part of the transaction (it's not in the TRN list). Better to
2532     keep this for now, to not break anything in a stable release.
2533     When HA_EXTRA_PREPARE_FOR_RENAME is not handled below, we can change
2534     the warnings in _ma_remove_table_from_trnman() to asserts.
2535 
2536     table->in_use is not set in the case this is a done as part of closefrm()
2537     as part of drop table.
2538   */
2539 
2540   if (file->s->now_transactional && table->in_use &&
2541       (operation == HA_EXTRA_PREPARE_FOR_DROP ||
2542        operation == HA_EXTRA_PREPARE_FOR_RENAME ||
2543        operation == HA_EXTRA_PREPARE_FOR_FORCED_CLOSE))
2544   {
2545     THD *thd= table->in_use;
2546     TRN *trn= THD_TRN;
2547     _ma_set_tmp_trn_for_table(file, trn);
2548   }
2549   DBUG_ASSERT(file->s->base.born_transactional || file->trn == 0 ||
2550               file->trn == &dummy_transaction_object);
2551 
2552   tmp= maria_extra(file, operation, 0);
2553   file->trn= old_trn;                           // Reset trn if was used
2554   return tmp;
2555 }
2556 
reset(void)2557 int ha_maria::reset(void)
2558 {
2559   ma_set_index_cond_func(file, NULL, 0);
2560   ds_mrr.dsmrr_close();
2561   if (file->trn)
2562   {
2563     /* Next statement is a new statement. Ensure it's logged */
2564     trnman_set_flags(file->trn,
2565                      trnman_get_flags(file->trn) & ~TRN_STATE_INFO_LOGGED);
2566   }
2567   return maria_reset(file);
2568 }
2569 
2570 /* To be used with WRITE_CACHE and EXTRA_CACHE */
2571 
extra_opt(enum ha_extra_function operation,ulong cache_size)2572 int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size)
2573 {
2574   if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
2575     return 0;
2576   return maria_extra(file, operation, (void*) &cache_size);
2577 }
2578 
2579 
delete_all_rows()2580 int ha_maria::delete_all_rows()
2581 {
2582   THD *thd= table->in_use;
2583   TRN *trn= file->trn;
2584   CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("TRUNCATE in WRITE CONCURRENT");
2585 #ifdef EXTRA_DEBUG
2586   if (trn && ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
2587   {
2588     trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED |
2589                      TRN_STATE_TABLES_CAN_CHANGE);
2590     (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
2591                                    (uchar*) thd->query(), thd->query_length());
2592   }
2593 #endif
2594   /*
2595     If we are under LOCK TABLES, we have to do a commit as
2596     delete_all_rows() can't be rolled back
2597   */
2598   if (table->in_use->locked_tables_mode && trn &&
2599       trnman_has_locked_tables(trn))
2600   {
2601     int error;
2602     if ((error= implicit_commit(thd, 1)))
2603       return error;
2604   }
2605 
2606   /* Note that this can't be rolled back */
2607   return maria_delete_all_rows(file);
2608 }
2609 
2610 
delete_table(const char * name)2611 int ha_maria::delete_table(const char *name)
2612 {
2613   THD *thd= current_thd;
2614   (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
2615                                  (uchar*) thd->query(), thd->query_length());
2616   return maria_delete_table(name);
2617 }
2618 
2619 
2620 /* This is mainly for temporary tables, so no logging necessary */
2621 
drop_table(const char * name)2622 void ha_maria::drop_table(const char *name)
2623 {
2624   DBUG_ASSERT(file->s->temporary);
2625   (void) ha_close();
2626   (void) maria_delete_table_files(name, 1, 0);
2627 }
2628 
2629 
change_table_ptr(TABLE * table_arg,TABLE_SHARE * share)2630 void ha_maria::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
2631 {
2632   handler::change_table_ptr(table_arg, share);
2633   if (file)
2634     file->external_ref= table_arg;
2635 }
2636 
2637 
external_lock(THD * thd,int lock_type)2638 int ha_maria::external_lock(THD *thd, int lock_type)
2639 {
2640   DBUG_ENTER("ha_maria::external_lock");
2641   file->external_ref= (void*) table;            // For ma_killed()
2642   /*
2643     We don't test now_transactional because it may vary between lock/unlock
2644     and thus confuse our reference counting.
2645     It is critical to skip non-transactional tables: user-visible temporary
2646     tables get an external_lock() when read/written for the first time, but no
2647     corresponding unlock (they just stay locked and are later dropped while
2648     locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp"
2649     would never commit as its "locked_tables" count would stay 1.
2650     When Maria has has_transactions()==TRUE, open_temporary_table()
2651     (sql_base.cc) will use TRANSACTIONAL_TMP_TABLE and thus the
2652     external_lock(F_UNLCK) will happen and we can then allow the user to
2653     create transactional temporary tables.
2654   */
2655   if (file->s->base.born_transactional)
2656   {
2657     /* Transactional table */
2658     if (lock_type != F_UNLCK)
2659     {
2660       if (file->trn)
2661       {
2662         /* This can only happen with tables created with clone() */
2663         DBUG_PRINT("info",("file->trn: %p", file->trn));
2664         trnman_increment_locked_tables(file->trn);
2665       }
2666 
2667       if (!thd->transaction.on)
2668       {
2669         /*
2670           No need to log REDOs/UNDOs. If this is an internal temporary table
2671           which will be renamed to a permanent table (like in ALTER TABLE),
2672           the rename happens after unlocking so will be durable (and the table
2673           will get its create_rename_lsn).
2674           Note: if we wanted to enable users to have an old backup and apply
2675           tons of archived logs to roll-forward, we could then not disable
2676           REDOs/UNDOs in this case.
2677         */
2678         DBUG_PRINT("info", ("Disabling logging for table"));
2679         _ma_tmp_disable_logging_for_table(file, TRUE);
2680       }
2681     }
2682     else
2683     {
2684       /* We have to test for THD_TRN to protect against implicit commits */
2685       TRN *trn= (file->trn != &dummy_transaction_object && THD_TRN ? file->trn : 0);
2686       /* End of transaction */
2687 
2688       /*
2689         We always re-enable, don't rely on thd->transaction.on as it is
2690         sometimes reset to true after unlocking (see mysql_truncate() for a
2691         partitioned table based on Maria).
2692         Note that we can come here without having an exclusive lock on the
2693         table, for example in this case:
2694         external_lock(F_(WR|RD)LCK); thr_lock() which fails due to lock
2695         abortion; external_lock(F_UNLCK). Fortunately, the re-enabling happens
2696         only if we were the thread which disabled logging.
2697       */
2698       if (_ma_reenable_logging_for_table(file, TRUE))
2699         DBUG_RETURN(1);
2700       _ma_reset_trn_for_table(file);
2701       /*
2702         Ensure that file->state points to the current number of rows. This
2703         is needed if someone calls maria_info() without first doing an
2704         external lock of the table
2705       */
2706       file->state= &file->s->state.state;
2707       if (trn)
2708       {
2709         DBUG_PRINT("info",
2710                    ("locked_tables: %u", trnman_has_locked_tables(trn)));
2711         DBUG_ASSERT(trnman_has_locked_tables(trn) > 0);
2712         if (trnman_has_locked_tables(trn) &&
2713             !trnman_decrement_locked_tables(trn))
2714         {
2715           /*
2716             OK should not have been sent to client yet (ACID).
2717             This is a bit excessive, ACID requires this only if there are some
2718             changes to commit (rollback shouldn't be tested).
2719           */
2720           DBUG_ASSERT(!thd->get_stmt_da()->is_sent() ||
2721                       thd->killed);
2722           /* autocommit ? rollback a transaction */
2723 #ifdef MARIA_CANNOT_ROLLBACK
2724           if (ma_commit(trn))
2725             DBUG_RETURN(1);
2726           THD_TRN= 0;
2727 #else
2728           if (!(thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
2729           {
2730             trnman_rollback_trn(trn);
2731             DBUG_PRINT("info", ("THD_TRN set to 0x0"));
2732             THD_TRN= 0;
2733           }
2734 #endif
2735         }
2736         trnman_set_flags(trn, trnman_get_flags(trn) & ~ TRN_STATE_INFO_LOGGED);
2737       }
2738     }
2739   } /* if transactional table */
2740   int result = maria_lock_database(file, !table->s->tmp_table ?
2741                                   lock_type : ((lock_type == F_UNLCK) ?
2742                                                F_UNLCK : F_EXTRA_LCK));
2743   if (!file->s->base.born_transactional)
2744     file->state= &file->s->state.state;         // Restore state if clone
2745   DBUG_RETURN(result);
2746 }
2747 
start_stmt(THD * thd,thr_lock_type lock_type)2748 int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type)
2749 {
2750   TRN *trn;
2751   if (file->s->base.born_transactional)
2752   {
2753     trn= THD_TRN;
2754     DBUG_ASSERT(trn); // this may be called only after external_lock()
2755     DBUG_ASSERT(trnman_has_locked_tables(trn));
2756     DBUG_ASSERT(lock_type != TL_UNLOCK);
2757     DBUG_ASSERT(file->trn == trn);
2758 
2759     /*
2760       As external_lock() was already called, don't increment locked_tables.
2761       Note that we call the function below possibly several times when
2762       statement starts (once per table). This is ok as long as that function
2763       does cheap operations. Otherwise, we will need to do it only on first
2764       call to start_stmt().
2765     */
2766     trnman_new_statement(trn);
2767 
2768 #ifdef EXTRA_DEBUG
2769     if (!(trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED) &&
2770         trnman_get_flags(trn) & TRN_STATE_TABLES_CAN_CHANGE)
2771     {
2772       trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED);
2773       (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
2774                                      (uchar*) thd->query(),
2775                                      thd->query_length());
2776     }
2777 #endif
2778   }
2779   return 0;
2780 }
2781 
2782 
2783 /*
2784   Reset THD_TRN and all file->trn related to the transaction
2785   This is needed as some calls, like extra() or external_lock() may access
2786   it before next transaction is started
2787 */
2788 
reset_thd_trn(THD * thd,MARIA_HA * first_table)2789 static void reset_thd_trn(THD *thd, MARIA_HA *first_table)
2790 {
2791   DBUG_ENTER("reset_thd_trn");
2792   THD_TRN= NULL;
2793   for (MARIA_HA *table= first_table; table ;
2794        table= table->trn_next)
2795   {
2796     _ma_reset_trn_for_table(table);
2797 
2798     /*
2799       If table has changed by this statement, invalidate it from the query
2800       cache
2801     */
2802     if (table->row_changes != table->start_row_changes)
2803     {
2804       table->start_row_changes= table->row_changes;
2805       DBUG_ASSERT(table->s->chst_invalidator != NULL);
2806       (*table->s->chst_invalidator)(table->s->data_file_name.str);
2807     }
2808   }
2809   DBUG_VOID_RETURN;
2810 }
2811 
2812 
2813 /**
2814   Performs an implicit commit of the Maria transaction and creates a new
2815   one.
2816 
2817   This can be considered a hack. When Maria loses HA_NO_TRANSACTIONS it will
2818   be participant in the connection's transaction and so the implicit commits
2819   (ha_commit()) (like in end_active_trans()) will do the implicit commit
2820   without need to call this function which can then be removed.
2821 
2822   @param  thd              THD object
2823   @param  new_trn          if a new transaction should be created; a new
2824                            transaction is not needed when we know that the
2825                            tables will be unlocked very soon.
2826 */
2827 
implicit_commit(THD * thd,bool new_trn)2828 int ha_maria::implicit_commit(THD *thd, bool new_trn)
2829 {
2830 #ifndef MARIA_CANNOT_ROLLBACK
2831 #error this method should be removed
2832 #endif
2833   TRN *trn;
2834   int error;
2835   uint locked_tables;
2836   extern my_bool plugins_are_initialized;
2837   MARIA_HA *used_tables, *trn_next;
2838   DBUG_ENTER("ha_maria::implicit_commit");
2839 
2840   if (!maria_hton || !plugins_are_initialized || !(trn= THD_TRN))
2841     DBUG_RETURN(0);
2842   if (!new_trn && (thd->locked_tables_mode == LTM_LOCK_TABLES ||
2843                    thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES))
2844   {
2845     /*
2846       No commit inside LOCK TABLES.
2847 
2848       Note that we come here only at the end of the top statement
2849       (dispatch_command()), we are never committing inside a sub-statement./
2850     */
2851     DBUG_PRINT("info", ("locked_tables, skipping"));
2852     DBUG_RETURN(0);
2853   }
2854 
2855   locked_tables= trnman_has_locked_tables(trn);
2856 
2857   used_tables= (MARIA_HA*) trn->used_instances;
2858   error= 0;
2859   if (unlikely(ma_commit(trn)))
2860     error= 1;
2861   if (!new_trn)
2862   {
2863     reset_thd_trn(thd, used_tables);
2864     goto end;
2865   }
2866 
2867   /*
2868     We need to create a new transaction and put it in THD_TRN. Indeed,
2869     tables may be under LOCK TABLES, and so they will start the next
2870     statement assuming they have a trn (see ha_maria::start_stmt()).
2871   */
2872   trn= trnman_new_trn(& thd->transaction.wt);
2873   THD_TRN= trn;
2874   if (unlikely(trn == NULL))
2875   {
2876     reset_thd_trn(thd, used_tables);
2877     error= HA_ERR_OUT_OF_MEM;
2878     goto end;
2879   }
2880   /*
2881     Move all locked tables to the new transaction
2882     We must do it here as otherwise file->thd and file->state may be
2883     stale pointers. We can't do this in start_stmt() as we don't know
2884     when we should call _ma_setup_live_state() and in some cases, like
2885     in check table, we use the table without calling start_stmt().
2886   */
2887 
2888   for (MARIA_HA *handler= used_tables; handler ;
2889        handler= trn_next)
2890   {
2891     trn_next= handler->trn_next;
2892     DBUG_ASSERT(handler->s->base.born_transactional);
2893 
2894     /* If handler uses versioning */
2895     if (handler->s->lock_key_trees)
2896     {
2897       /* _ma_set_trn_for_table() will be called indirectly */
2898       if (_ma_setup_live_state(handler))
2899         error= HA_ERR_OUT_OF_MEM;
2900     }
2901     else
2902       _ma_set_trn_for_table(handler, trn);
2903   }
2904   /* This is just a commit, tables stay locked if they were: */
2905   trnman_reset_locked_tables(trn, locked_tables);
2906 
2907 end:
2908   DBUG_RETURN(error);
2909 }
2910 
2911 
store_lock(THD * thd,THR_LOCK_DATA ** to,enum thr_lock_type lock_type)2912 THR_LOCK_DATA **ha_maria::store_lock(THD *thd,
2913                                      THR_LOCK_DATA **to,
2914                                      enum thr_lock_type lock_type)
2915 {
2916   /* Test if we can fix test below */
2917   DBUG_ASSERT(lock_type != TL_UNLOCK &&
2918               (lock_type == TL_IGNORE || file->lock.type == TL_UNLOCK));
2919   if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK)
2920   {
2921     const enum enum_sql_command sql_command= thd->lex->sql_command;
2922     /*
2923       We have to disable concurrent inserts for INSERT ... SELECT or
2924       INSERT/UPDATE/DELETE with sub queries if we are using statement based
2925       logging.  We take the safe route here and disable this for all commands
2926       that only does reading that are not SELECT.
2927     */
2928     if (lock_type <= TL_READ_HIGH_PRIORITY &&
2929         !thd->is_current_stmt_binlog_format_row() &&
2930         (sql_command != SQLCOM_SELECT &&
2931          sql_command != SQLCOM_LOCK_TABLES) &&
2932         (thd->variables.option_bits & OPTION_BIN_LOG) &&
2933         mysql_bin_log.is_open())
2934       lock_type= TL_READ_NO_INSERT;
2935     else if (lock_type == TL_WRITE_CONCURRENT_INSERT)
2936     {
2937       const enum enum_duplicates duplicates= thd->lex->duplicates;
2938       /*
2939         Explanation for the 3 conditions below, in order:
2940 
2941         - Bulk insert may use repair, which will cause problems if other
2942         threads try to read/insert to the table: disable versioning.
2943         Note that our read of file->state->records is incorrect, as such
2944         variable may have changed when we come to start_bulk_insert() (worse
2945         case: we see != 0 so allow versioning, start_bulk_insert() sees 0 and
2946         uses repair). This is prevented because start_bulk_insert() will not
2947         try repair if we enabled versioning.
2948         - INSERT SELECT ON DUPLICATE KEY UPDATE comes here with
2949         TL_WRITE_CONCURRENT_INSERT but shouldn't because it can do
2950         update/delete of a row and versioning doesn't support that
2951         - same for LOAD DATA CONCURRENT REPLACE.
2952       */
2953       if ((file->state->records == 0) ||
2954           (sql_command == SQLCOM_INSERT_SELECT && duplicates == DUP_UPDATE) ||
2955           (sql_command == SQLCOM_LOAD && duplicates == DUP_REPLACE))
2956         lock_type= TL_WRITE;
2957     }
2958     file->lock.type= lock_type;
2959   }
2960   *to++= &file->lock;
2961   return to;
2962 }
2963 
2964 
update_create_info(HA_CREATE_INFO * create_info)2965 void ha_maria::update_create_info(HA_CREATE_INFO *create_info)
2966 {
2967   ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST);
2968   if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
2969   {
2970     create_info->auto_increment_value= stats.auto_increment_value;
2971   }
2972   create_info->data_file_name= data_file_name;
2973   create_info->index_file_name= index_file_name;
2974   /*
2975     Keep user-specified row_type for ALTER,
2976     but show the actually used one in SHOW
2977   */
2978   if (create_info->row_type != ROW_TYPE_DEFAULT &&
2979       !(thd_sql_command(ha_thd()) == SQLCOM_ALTER_TABLE))
2980     create_info->row_type= get_row_type();
2981   /*
2982     Show always page checksums, as this can be forced with
2983     maria_page_checksums variable
2984   */
2985   if (create_info->page_checksum == HA_CHOICE_UNDEF)
2986     create_info->page_checksum=
2987       (file->s->options & HA_OPTION_PAGE_CHECKSUM) ? HA_CHOICE_YES :
2988       HA_CHOICE_NO;
2989 }
2990 
2991 
get_row_type() const2992 enum row_type ha_maria::get_row_type() const
2993 {
2994   switch (file->s->data_file_type) {
2995   case STATIC_RECORD:     return ROW_TYPE_FIXED;
2996   case DYNAMIC_RECORD:    return ROW_TYPE_DYNAMIC;
2997   case BLOCK_RECORD:      return ROW_TYPE_PAGE;
2998   case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED;
2999   default:                return ROW_TYPE_NOT_USED;
3000   }
3001 }
3002 
3003 
maria_row_type(HA_CREATE_INFO * info)3004 static enum data_file_type maria_row_type(HA_CREATE_INFO *info)
3005 {
3006   if (info->transactional == HA_CHOICE_YES)
3007     return BLOCK_RECORD;
3008   switch (info->row_type) {
3009   case ROW_TYPE_FIXED:   return STATIC_RECORD;
3010   case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD;
3011   default:               return BLOCK_RECORD;
3012   }
3013 }
3014 
3015 
create(const char * name,TABLE * table_arg,HA_CREATE_INFO * ha_create_info)3016 int ha_maria::create(const char *name, TABLE *table_arg,
3017                      HA_CREATE_INFO *ha_create_info)
3018 {
3019   int error;
3020   uint create_flags= 0, record_count= 0, i;
3021   char buff[FN_REFLEN];
3022   MARIA_KEYDEF *keydef;
3023   MARIA_COLUMNDEF *recinfo;
3024   MARIA_CREATE_INFO create_info;
3025   TABLE_SHARE *share= table_arg->s;
3026   uint options= share->db_options_in_use;
3027   enum data_file_type row_type;
3028   THD *thd= current_thd;
3029   DBUG_ENTER("ha_maria::create");
3030 
3031   for (i= 0; i < share->keys; i++)
3032   {
3033     if (table_arg->key_info[i].flags & HA_USES_PARSER)
3034     {
3035       create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
3036       break;
3037     }
3038   }
3039   /* Note: BLOCK_RECORD is used if table is transactional */
3040   row_type= maria_row_type(ha_create_info);
3041   if (ha_create_info->transactional == HA_CHOICE_YES &&
3042       ha_create_info->row_type != ROW_TYPE_PAGE &&
3043       ha_create_info->row_type != ROW_TYPE_NOT_USED &&
3044       ha_create_info->row_type != ROW_TYPE_DEFAULT)
3045     push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
3046                  ER_ILLEGAL_HA_CREATE_OPTION,
3047                  "Row format set to PAGE because of TRANSACTIONAL=1 option");
3048 
3049   if (share->table_type == TABLE_TYPE_SEQUENCE)
3050   {
3051     /* For sequences, the simples record type is appropriate */
3052     row_type= STATIC_RECORD;
3053     ha_create_info->transactional= HA_CHOICE_NO;
3054   }
3055 
3056   bzero((char*) &create_info, sizeof(create_info));
3057   if ((error= table2maria(table_arg, row_type, &keydef, &recinfo,
3058                           &record_count, &create_info)))
3059     DBUG_RETURN(error); /* purecov: inspected */
3060   create_info.max_rows= share->max_rows;
3061   create_info.reloc_rows= share->min_rows;
3062   create_info.with_auto_increment= share->next_number_key_offset == 0;
3063   create_info.auto_increment= (ha_create_info->auto_increment_value ?
3064                                ha_create_info->auto_increment_value -1 :
3065                                (ulonglong) 0);
3066   create_info.data_file_length= ((ulonglong) share->max_rows *
3067                                  share->avg_row_length);
3068   create_info.data_file_name= ha_create_info->data_file_name;
3069   create_info.index_file_name= ha_create_info->index_file_name;
3070   create_info.language= share->table_charset->number;
3071 
3072   /*
3073     Table is transactional:
3074     - If the user specify that table is transactional (in this case
3075       row type is forced to BLOCK_RECORD)
3076     - If they specify BLOCK_RECORD without specifying transactional behaviour
3077 
3078     Shouldn't this test be pushed down to maria_create()? Because currently,
3079     ma_test1 -T crashes: it creates a table with DYNAMIC_RECORD but has
3080     born_transactional==1, which confuses some recovery-related code.
3081   */
3082   create_info.transactional= (row_type == BLOCK_RECORD &&
3083                               ha_create_info->transactional != HA_CHOICE_NO);
3084 
3085   if (ha_create_info->tmp_table())
3086   {
3087     create_flags|= HA_CREATE_TMP_TABLE | HA_CREATE_DELAY_KEY_WRITE;
3088     create_info.transactional= 0;
3089   }
3090   if (ha_create_info->options & HA_CREATE_KEEP_FILES)
3091     create_flags|= HA_CREATE_KEEP_FILES;
3092   if (options & HA_OPTION_PACK_RECORD)
3093     create_flags|= HA_PACK_RECORD;
3094   if (options & HA_OPTION_CHECKSUM)
3095     create_flags|= HA_CREATE_CHECKSUM;
3096   if (options & HA_OPTION_DELAY_KEY_WRITE)
3097     create_flags|= HA_CREATE_DELAY_KEY_WRITE;
3098   if ((ha_create_info->page_checksum == HA_CHOICE_UNDEF &&
3099        maria_page_checksums) ||
3100        ha_create_info->page_checksum ==  HA_CHOICE_YES)
3101     create_flags|= HA_CREATE_PAGE_CHECKSUM;
3102 
3103   (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
3104                                  (uchar*) thd->query(), thd->query_length());
3105 
3106   /* TODO: Check that the following fn_format is really needed */
3107   error=
3108     maria_create(fn_format(buff, name, "", "",
3109                            MY_UNPACK_FILENAME | MY_APPEND_EXT),
3110                  row_type, share->keys, keydef,
3111                  record_count,  recinfo,
3112                  0, (MARIA_UNIQUEDEF *) 0,
3113                  &create_info, create_flags);
3114 
3115   my_free(recinfo);
3116   DBUG_RETURN(error);
3117 }
3118 
3119 
rename_table(const char * from,const char * to)3120 int ha_maria::rename_table(const char *from, const char *to)
3121 {
3122   THD *thd= current_thd;
3123   (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
3124                                  (uchar*) thd->query(), thd->query_length());
3125   return maria_rename(from, to);
3126 }
3127 
3128 
get_auto_increment(ulonglong offset,ulonglong increment,ulonglong nb_desired_values,ulonglong * first_value,ulonglong * nb_reserved_values)3129 void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment,
3130                                   ulonglong nb_desired_values,
3131                                   ulonglong *first_value,
3132                                   ulonglong *nb_reserved_values)
3133 {
3134   ulonglong nr;
3135   int error;
3136   uchar key[MARIA_MAX_KEY_BUFF];
3137 
3138   if (!table->s->next_number_key_offset)
3139   {                                             // Autoincrement at key-start
3140     ha_maria::info(HA_STATUS_AUTO);
3141     *first_value= stats.auto_increment_value;
3142     /* Maria has only table-level lock for now, so reserves to +inf */
3143     *nb_reserved_values= ULONGLONG_MAX;
3144     return;
3145   }
3146 
3147   /* it's safe to call the following if bulk_insert isn't on */
3148   maria_flush_bulk_insert(file, table->s->next_number_index);
3149 
3150   (void) extra(HA_EXTRA_KEYREAD);
3151   key_copy(key, table->record[0],
3152            table->key_info + table->s->next_number_index,
3153            table->s->next_number_key_offset);
3154   error= maria_rkey(file, table->record[1], (int) table->s->next_number_index,
3155                     key, make_prev_keypart_map(table->s->next_number_keypart),
3156                     HA_READ_PREFIX_LAST);
3157   if (error)
3158     nr= 1;
3159   else
3160   {
3161     /* Get data from record[1] */
3162     nr= ((ulonglong) table->next_number_field->
3163          val_int_offset(table->s->rec_buff_length) + 1);
3164   }
3165   extra(HA_EXTRA_NO_KEYREAD);
3166   *first_value= nr;
3167   /*
3168     MySQL needs to call us for next row: assume we are inserting ("a",null)
3169     here, we return 3, and next this statement will want to insert ("b",null):
3170     there is no reason why ("b",3+1) would be the good row to insert: maybe it
3171     already exists, maybe 3+1 is too large...
3172   */
3173   *nb_reserved_values= 1;
3174 }
3175 
3176 
3177 /*
3178   Find out how many rows there is in the given range
3179 
3180   SYNOPSIS
3181     records_in_range()
3182     inx                 Index to use
3183     min_key             Start of range.  Null pointer if from first key
3184     max_key             End of range. Null pointer if to last key
3185 
3186   NOTES
3187     min_key.flag can have one of the following values:
3188       HA_READ_KEY_EXACT         Include the key in the range
3189       HA_READ_AFTER_KEY         Don't include key in range
3190 
3191     max_key.flag can have one of the following values:
3192       HA_READ_BEFORE_KEY        Don't include key in range
3193       HA_READ_AFTER_KEY         Include all 'end_key' values in the range
3194 
3195   RETURN
3196    HA_POS_ERROR         Something is wrong with the index tree.
3197    0                    There is no matching keys in the given range
3198    number > 0           There is approximately 'number' matching rows in
3199                         the range.
3200 */
3201 
records_in_range(uint inx,key_range * min_key,key_range * max_key)3202 ha_rows ha_maria::records_in_range(uint inx, key_range *min_key,
3203                                    key_range *max_key)
3204 {
3205   return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key);
3206 }
3207 
3208 
ft_read(uchar * buf)3209 int ha_maria::ft_read(uchar * buf)
3210 {
3211   int error;
3212 
3213   if (!ft_handler)
3214     return -1;
3215 
3216   thread_safe_increment(table->in_use->status_var.ha_read_next_count,
3217                         &LOCK_status);  // why ?
3218 
3219   error= ft_handler->please->read_next(ft_handler, (char*) buf);
3220 
3221   return error;
3222 }
3223 
3224 
check_if_incompatible_data(HA_CREATE_INFO * create_info,uint table_changes)3225 bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *create_info,
3226                                           uint table_changes)
3227 {
3228   DBUG_ENTER("check_if_incompatible_data");
3229   uint options= table->s->db_options_in_use;
3230   enum ha_choice page_checksum= table->s->page_checksum;
3231 
3232   if (page_checksum == HA_CHOICE_UNDEF)
3233     page_checksum= file->s->options & HA_OPTION_PAGE_CHECKSUM ? HA_CHOICE_YES
3234                                                               : HA_CHOICE_NO;
3235 
3236   if (create_info->auto_increment_value != stats.auto_increment_value ||
3237       create_info->data_file_name != data_file_name ||
3238       create_info->index_file_name != index_file_name ||
3239       create_info->page_checksum != page_checksum ||
3240       create_info->transactional != table->s->transactional ||
3241       (maria_row_type(create_info) != data_file_type &&
3242        create_info->row_type != ROW_TYPE_DEFAULT) ||
3243       table_changes == IS_EQUAL_NO ||
3244       (table_changes & IS_EQUAL_PACK_LENGTH)) // Not implemented yet
3245     DBUG_RETURN(COMPATIBLE_DATA_NO);
3246 
3247   if ((options & (HA_OPTION_CHECKSUM |
3248                   HA_OPTION_DELAY_KEY_WRITE)) !=
3249       (create_info->table_options & (HA_OPTION_CHECKSUM |
3250                               HA_OPTION_DELAY_KEY_WRITE)))
3251     DBUG_RETURN(COMPATIBLE_DATA_NO);
3252   DBUG_RETURN(COMPATIBLE_DATA_YES);
3253 }
3254 
3255 
maria_hton_panic(handlerton * hton,ha_panic_function flag)3256 static int maria_hton_panic(handlerton *hton, ha_panic_function flag)
3257 {
3258   /* If no background checkpoints, we need to do one now */
3259   int ret=0;
3260 
3261   if (!checkpoint_interval)
3262     ret= ma_checkpoint_execute(CHECKPOINT_FULL, FALSE);
3263 
3264   ret|= maria_panic(flag);
3265 
3266   maria_hton= 0;
3267   return ret;
3268 }
3269 
3270 
maria_commit(handlerton * hton,THD * thd,bool all)3271 static int maria_commit(handlerton *hton __attribute__ ((unused)),
3272                         THD *thd, bool all)
3273 {
3274   TRN *trn= THD_TRN;
3275   int res;
3276   MARIA_HA *used_instances= (MARIA_HA*) trn->used_instances;
3277   DBUG_ENTER("maria_commit");
3278 
3279   trnman_reset_locked_tables(trn, 0);
3280   trnman_set_flags(trn, trnman_get_flags(trn) & ~TRN_STATE_INFO_LOGGED);
3281 
3282   /* statement or transaction ? */
3283   if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
3284       !all)
3285     DBUG_RETURN(0); // end of statement
3286   res= ma_commit(trn);
3287   reset_thd_trn(thd, used_instances);
3288   DBUG_RETURN(res);
3289 }
3290 
3291 
maria_rollback(handlerton * hton,THD * thd,bool all)3292 static int maria_rollback(handlerton *hton __attribute__ ((unused)),
3293                           THD *thd, bool all)
3294 {
3295   TRN *trn= THD_TRN;
3296   DBUG_ENTER("maria_rollback");
3297   trnman_reset_locked_tables(trn, 0);
3298   /* statement or transaction ? */
3299   if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all)
3300   {
3301     trnman_rollback_statement(trn);
3302     DBUG_RETURN(0); // end of statement
3303   }
3304   reset_thd_trn(thd, (MARIA_HA*) trn->used_instances);
3305   DBUG_RETURN(trnman_rollback_trn(trn) ?
3306               HA_ERR_OUT_OF_MEM : 0); // end of transaction
3307 }
3308 
3309 
3310 
3311 /**
3312   @brief flush log handler
3313 
3314   @param hton            maria handlerton (unused)
3315 
3316   @retval FALSE OK
3317   @retval TRUE  Error
3318 */
3319 
maria_flush_logs(handlerton * hton)3320 bool maria_flush_logs(handlerton *hton)
3321 {
3322   return MY_TEST(translog_purge_at_flush());
3323 }
3324 
3325 
maria_checkpoint_state(handlerton * hton,bool disabled)3326 int maria_checkpoint_state(handlerton *hton, bool disabled)
3327 {
3328   maria_checkpoint_disabled= (my_bool) disabled;
3329   return 0;
3330 }
3331 
3332 
3333 
3334 #define SHOW_MSG_LEN (FN_REFLEN + 20)
3335 /**
3336   @brief show status handler
3337 
3338   @param hton            maria handlerton
3339   @param thd             thread handler
3340   @param print           print function
3341   @param stat            type of status
3342 */
3343 
maria_show_status(handlerton * hton,THD * thd,stat_print_fn * print,enum ha_stat_type stat)3344 bool maria_show_status(handlerton *hton,
3345                        THD *thd,
3346                        stat_print_fn *print,
3347                        enum ha_stat_type stat)
3348 {
3349   const LEX_CSTRING *engine_name= hton_name(hton);
3350   switch (stat) {
3351   case HA_ENGINE_LOGS:
3352   {
3353     TRANSLOG_ADDRESS horizon= translog_get_horizon();
3354     uint32 last_file= LSN_FILE_NO(horizon);
3355     uint32 first_needed= translog_get_first_needed_file();
3356     uint32 first_file= translog_get_first_file(horizon);
3357     uint32 i;
3358     const char unknown[]= "unknown";
3359     const char needed[]= "in use";
3360     const char unneeded[]= "free";
3361     char path[FN_REFLEN];
3362 
3363     if (first_file == 0)
3364     {
3365       const char error[]= "error";
3366       print(thd, engine_name->str, engine_name->length,
3367             STRING_WITH_LEN(""), error, sizeof(error) - 1);
3368       break;
3369     }
3370 
3371     for (i= first_file; i <= last_file; i++)
3372     {
3373       char *file;
3374       const char *status;
3375       size_t length, status_len;
3376       MY_STAT stat_buff, *stat;
3377       const char error[]= "can't stat";
3378       char object[SHOW_MSG_LEN];
3379       file= translog_filename_by_fileno(i, path);
3380       if (!(stat= mysql_file_stat(key_file_translog, file, &stat_buff, MYF(0))))
3381       {
3382         status= error;
3383         status_len= sizeof(error) - 1;
3384         length= my_snprintf(object, SHOW_MSG_LEN, "Size unknown ; %s", file);
3385       }
3386       else
3387       {
3388         if (first_needed == 0)
3389         {
3390           status= unknown;
3391           status_len= sizeof(unknown) - 1;
3392         }
3393         else if (i < first_needed)
3394         {
3395           status= unneeded;
3396           status_len= sizeof(unneeded) - 1;
3397         }
3398         else
3399         {
3400           status= needed;
3401           status_len= sizeof(needed) - 1;
3402         }
3403         length= my_snprintf(object, SHOW_MSG_LEN, "Size %12llu ; %s",
3404                             (ulonglong) stat->st_size, file);
3405       }
3406 
3407       print(thd, engine_name->str, engine_name->length,
3408             object, length, status, status_len);
3409     }
3410     break;
3411   }
3412   case HA_ENGINE_STATUS:
3413   case HA_ENGINE_MUTEX:
3414   default:
3415     break;
3416   }
3417   return 0;
3418 }
3419 
3420 
3421 /**
3422   Callback to delete all logs in directory. This is lower-level than other
3423   functions in ma_loghandler.c which delete logs, as it does not rely on
3424   translog_init() having been called first.
3425 
3426   @param  directory        directory where file is
3427   @param  filename         base name of the file to delete
3428 */
3429 
translog_callback_delete_all(const char * directory,const char * filename)3430 static my_bool translog_callback_delete_all(const char *directory,
3431                                             const char *filename)
3432 {
3433   char complete_name[FN_REFLEN];
3434   fn_format(complete_name, filename, directory, "", MYF(MY_UNPACK_FILENAME));
3435   return mysql_file_delete(key_file_translog, complete_name, MYF(MY_WME));
3436 }
3437 
3438 
3439 /**
3440   Helper function for option aria-force-start-after-recovery-failures.
3441   Deletes logs if too many failures. Otherwise, increments the counter of
3442   failures in the control file.
3443   Notice how this has to be called _before_ translog_init() (if log is
3444   corrupted, translog_init() might crash the server, so we need to remove logs
3445   before).
3446 
3447   @param  log_dir          directory where logs to be deleted are
3448 */
3449 
mark_recovery_start(const char * log_dir)3450 static int mark_recovery_start(const char* log_dir)
3451 {
3452   int res;
3453   DBUG_ENTER("mark_recovery_start");
3454   if (!(maria_recover_options & HA_RECOVER_ANY))
3455     ma_message_no_user(ME_JUST_WARNING, "Please consider using option"
3456                        " --aria-recover-options[=...] to automatically check and"
3457                        " repair tables when logs are removed by option"
3458                        " --aria-force-start-after-recovery-failures=#");
3459   if (recovery_failures >= force_start_after_recovery_failures)
3460   {
3461     /*
3462       Remove logs which cause the problem; keep control file which has
3463       critical info like uuid, max_trid (removing control file may make
3464       correct tables look corrupted!).
3465     */
3466     char msg[100];
3467     res= translog_walk_filenames(log_dir, &translog_callback_delete_all);
3468     my_snprintf(msg, sizeof(msg),
3469                 "%s logs after %u consecutive failures of"
3470                 " recovery from logs",
3471                 (res ? "failed to remove some" : "removed all"),
3472                 recovery_failures);
3473     ma_message_no_user((res ? 0 : ME_JUST_WARNING), msg);
3474   }
3475   else
3476     res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
3477                                          max_trid_in_control_file,
3478                                          recovery_failures + 1);
3479   DBUG_RETURN(res);
3480 }
3481 
3482 
3483 /**
3484   Helper function for option aria-force-start-after-recovery-failures.
3485   Records in the control file that recovery was a success, so that it's not
3486   counted for aria-force-start-after-recovery-failures.
3487 */
3488 
mark_recovery_success(void)3489 static int mark_recovery_success(void)
3490 {
3491   /* success of recovery, reset recovery_failures: */
3492   int res;
3493   DBUG_ENTER("mark_recovery_success");
3494   res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
3495                                        max_trid_in_control_file, 0);
3496   DBUG_RETURN(res);
3497 }
3498 
3499 
3500 /*
3501   Return 1 if table has changed during the current transaction
3502 */
3503 
is_changed() const3504 bool ha_maria::is_changed() const
3505 {
3506   return file->state->changed;
3507 }
3508 
3509 
ha_maria_init(void * p)3510 static int ha_maria_init(void *p)
3511 {
3512   int res;
3513   const char *log_dir= maria_data_root;
3514 
3515 #ifdef HAVE_PSI_INTERFACE
3516   init_aria_psi_keys();
3517 #endif
3518 
3519   maria_hton= (handlerton *)p;
3520   maria_hton->state= SHOW_OPTION_YES;
3521   maria_hton->db_type= DB_TYPE_ARIA;
3522   maria_hton->create= maria_create_handler;
3523   maria_hton->panic= maria_hton_panic;
3524   maria_hton->tablefile_extensions= ha_maria_exts;
3525   maria_hton->commit= maria_commit;
3526   maria_hton->rollback= maria_rollback;
3527   maria_hton->checkpoint_state= maria_checkpoint_state;
3528 #ifdef MARIA_CANNOT_ROLLBACK
3529   maria_hton->commit= 0;
3530 #endif
3531   maria_hton->flush_logs= maria_flush_logs;
3532   maria_hton->show_status= maria_show_status;
3533   /* TODO: decide if we support Maria being used for log tables */
3534   maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
3535   bzero(maria_log_pagecache, sizeof(*maria_log_pagecache));
3536   maria_tmpdir= &mysql_tmpdir_list;             /* For REDO */
3537   res= maria_upgrade() || maria_init() || ma_control_file_open(TRUE, TRUE) ||
3538     ((force_start_after_recovery_failures != 0) &&
3539      mark_recovery_start(log_dir)) ||
3540     !init_pagecache(maria_pagecache,
3541                     (size_t) pagecache_buffer_size, pagecache_division_limit,
3542                     pagecache_age_threshold, maria_block_size, pagecache_file_hash_size,
3543                     0) ||
3544     !init_pagecache(maria_log_pagecache,
3545                     TRANSLOG_PAGECACHE_SIZE, 0, 0,
3546                     TRANSLOG_PAGE_SIZE, 0, 0) ||
3547     translog_init(maria_data_root, log_file_size,
3548                   MYSQL_VERSION_ID, server_id, maria_log_pagecache,
3549                   TRANSLOG_DEFAULT_FLAGS, 0) ||
3550     maria_recovery_from_log() ||
3551     ((force_start_after_recovery_failures != 0 ||
3552       maria_recovery_changed_data || recovery_failures) &&
3553      mark_recovery_success()) ||
3554     ma_checkpoint_init(checkpoint_interval);
3555   maria_multi_threaded= maria_in_ha_maria= TRUE;
3556   maria_create_trn_hook= maria_create_trn_for_mysql;
3557   maria_pagecache->extra_debug= 1;
3558   maria_assert_if_crashed_table= debug_assert_if_crashed_table;
3559 
3560   if (res)
3561     maria_hton= 0;
3562 
3563   ma_killed= ma_killed_in_mariadb;
3564 
3565   return res ? HA_ERR_INITIALIZATION : 0;
3566 }
3567 
3568 
3569 #ifdef HAVE_QUERY_CACHE
3570 /**
3571   @brief Register a named table with a call back function to the query cache.
3572 
3573   @param thd The thread handle
3574   @param table_key A pointer to the table name in the table cache
3575   @param key_length The length of the table name
3576   @param[out] engine_callback The pointer to the storage engine call back
3577     function, currently 0
3578   @param[out] engine_data Engine data will be set to 0.
3579 
3580   @note Despite the name of this function, it is used to check each statement
3581     before it is cached and not to register a table or callback function.
3582 
3583   @see handler::register_query_cache_table
3584 
3585   @return The error code. The engine_data and engine_callback will be set to 0.
3586     @retval TRUE Success
3587     @retval FALSE An error occurred
3588 */
3589 
register_query_cache_table(THD * thd,const char * table_name,uint table_name_len,qc_engine_callback * engine_callback,ulonglong * engine_data)3590 my_bool ha_maria::register_query_cache_table(THD *thd, const char *table_name,
3591 					     uint table_name_len,
3592 					     qc_engine_callback
3593 					     *engine_callback,
3594 					     ulonglong *engine_data)
3595 {
3596   ulonglong actual_data_file_length;
3597   ulonglong current_data_file_length;
3598   DBUG_ENTER("ha_maria::register_query_cache_table");
3599 
3600   /*
3601     No call back function is needed to determine if a cached statement
3602     is valid or not.
3603   */
3604   *engine_callback= 0;
3605 
3606   /*
3607     No engine data is needed.
3608   */
3609   *engine_data= 0;
3610 
3611   if (file->s->now_transactional && file->s->have_versioning)
3612     DBUG_RETURN(file->trn->trid >= file->s->state.last_change_trn);
3613 
3614   /*
3615     If a concurrent INSERT has happened just before the currently processed
3616     SELECT statement, the total size of the table is unknown.
3617 
3618     To determine if the table size is known, the current thread's snap shot of
3619     the table size with the actual table size are compared.
3620 
3621     If the table size is unknown the SELECT statement can't be cached.
3622   */
3623 
3624   /*
3625     POSIX visibility rules specify that "2. Whatever memory values a
3626     thread can see when it unlocks a mutex <...> can also be seen by any
3627     thread that later locks the same mutex". In this particular case,
3628     concurrent insert thread had modified the data_file_length in
3629     MYISAM_SHARE before it has unlocked (or even locked)
3630     structure_guard_mutex. So, here we're guaranteed to see at least that
3631     value after we've locked the same mutex. We can see a later value
3632     (modified by some other thread) though, but it's ok, as we only want
3633     to know if the variable was changed, the actual new value doesn't matter
3634   */
3635   actual_data_file_length= file->s->state.state.data_file_length;
3636   current_data_file_length= file->state->data_file_length;
3637 
3638   /* Return whether is ok to try to cache current statement. */
3639   DBUG_RETURN(!(file->s->non_transactional_concurrent_insert &&
3640                 current_data_file_length != actual_data_file_length));
3641 }
3642 #endif
3643 
3644 struct st_mysql_sys_var* system_variables[]= {
3645   MYSQL_SYSVAR(block_size),
3646   MYSQL_SYSVAR(checkpoint_interval),
3647   MYSQL_SYSVAR(checkpoint_log_activity),
3648   MYSQL_SYSVAR(force_start_after_recovery_failures),
3649   MYSQL_SYSVAR(group_commit),
3650   MYSQL_SYSVAR(group_commit_interval),
3651   MYSQL_SYSVAR(log_dir_path),
3652   MYSQL_SYSVAR(log_file_size),
3653   MYSQL_SYSVAR(log_purge_type),
3654   MYSQL_SYSVAR(max_sort_file_size),
3655   MYSQL_SYSVAR(page_checksum),
3656   MYSQL_SYSVAR(pagecache_age_threshold),
3657   MYSQL_SYSVAR(pagecache_buffer_size),
3658   MYSQL_SYSVAR(pagecache_division_limit),
3659   MYSQL_SYSVAR(pagecache_file_hash_size),
3660   MYSQL_SYSVAR(recover_options),
3661   MYSQL_SYSVAR(repair_threads),
3662   MYSQL_SYSVAR(sort_buffer_size),
3663   MYSQL_SYSVAR(stats_method),
3664   MYSQL_SYSVAR(sync_log_dir),
3665   MYSQL_SYSVAR(used_for_temp_tables),
3666   MYSQL_SYSVAR(encrypt_tables),
3667   NULL
3668 };
3669 
3670 
3671 /**
3672    @brief Updates the checkpoint interval and restarts the background thread.
3673 */
3674 
update_checkpoint_interval(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3675 static void update_checkpoint_interval(MYSQL_THD thd,
3676                                         struct st_mysql_sys_var *var,
3677                                         void *var_ptr, const void *save)
3678 {
3679   ma_checkpoint_end();
3680   ma_checkpoint_init(*(ulong *)var_ptr= (ulong)(*(long *)save));
3681 }
3682 
3683 
3684 /**
3685    @brief Updates group commit mode
3686 */
3687 
update_maria_group_commit(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3688 static void update_maria_group_commit(MYSQL_THD thd,
3689                                       struct st_mysql_sys_var *var,
3690                                       void *var_ptr, const void *save)
3691 {
3692   ulong value= (ulong)*((long *)var_ptr);
3693   DBUG_ENTER("update_maria_group_commit");
3694   DBUG_PRINT("enter", ("old value: %lu  new value %lu  rate %lu",
3695                        value, (ulong)(*(long *)save),
3696                        maria_group_commit_interval));
3697   /* old value */
3698   switch (value) {
3699   case TRANSLOG_GCOMMIT_NONE:
3700     break;
3701   case TRANSLOG_GCOMMIT_HARD:
3702     translog_hard_group_commit(FALSE);
3703     break;
3704   case TRANSLOG_GCOMMIT_SOFT:
3705     translog_soft_sync(FALSE);
3706     if (maria_group_commit_interval)
3707       translog_soft_sync_end();
3708     break;
3709   default:
3710     DBUG_ASSERT(0); /* impossible */
3711   }
3712   value= *(ulong *)var_ptr= (ulong)(*(long *)save);
3713   translog_sync();
3714   /* new value */
3715   switch (value) {
3716   case TRANSLOG_GCOMMIT_NONE:
3717     break;
3718   case TRANSLOG_GCOMMIT_HARD:
3719     translog_hard_group_commit(TRUE);
3720     break;
3721   case TRANSLOG_GCOMMIT_SOFT:
3722     translog_soft_sync(TRUE);
3723     /* variable change made under global lock so we can just read it */
3724     if (maria_group_commit_interval)
3725       translog_soft_sync_start();
3726     break;
3727   default:
3728     DBUG_ASSERT(0); /* impossible */
3729   }
3730   DBUG_VOID_RETURN;
3731 }
3732 
3733 /**
3734    @brief Updates group commit interval
3735 */
3736 
update_maria_group_commit_interval(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3737 static void update_maria_group_commit_interval(MYSQL_THD thd,
3738                                                struct st_mysql_sys_var *var,
3739                                                void *var_ptr, const void *save)
3740 {
3741   ulong new_value= (ulong)*((long *)save);
3742   ulong *value_ptr= (ulong*) var_ptr;
3743   DBUG_ENTER("update_maria_group_commit_interval");
3744   DBUG_PRINT("enter", ("old value: %lu  new value %lu  group commit %lu",
3745                         *value_ptr, new_value, maria_group_commit));
3746 
3747   /* variable change made under global lock so we can just read it */
3748   switch (maria_group_commit) {
3749     case TRANSLOG_GCOMMIT_NONE:
3750       *value_ptr= new_value;
3751       translog_set_group_commit_interval(new_value);
3752       break;
3753     case TRANSLOG_GCOMMIT_HARD:
3754       *value_ptr= new_value;
3755       translog_set_group_commit_interval(new_value);
3756       break;
3757     case TRANSLOG_GCOMMIT_SOFT:
3758       if (*value_ptr)
3759         translog_soft_sync_end();
3760       translog_set_group_commit_interval(new_value);
3761       if ((*value_ptr= new_value))
3762         translog_soft_sync_start();
3763       break;
3764     default:
3765       DBUG_ASSERT(0); /* impossible */
3766   }
3767   DBUG_VOID_RETURN;
3768 }
3769 
3770 /**
3771    @brief Updates the transaction log file limit.
3772 */
3773 
update_log_file_size(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)3774 static void update_log_file_size(MYSQL_THD thd,
3775                                  struct st_mysql_sys_var *var,
3776                                  void *var_ptr, const void *save)
3777 {
3778   uint32 size= (uint32)((ulong)(*(long *)save));
3779   translog_set_file_size(size);
3780   *(ulong *)var_ptr= size;
3781 }
3782 
3783 
3784 SHOW_VAR status_variables[]= {
3785   {"pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG},
3786   {"pagecache_blocks_unused",      (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG},
3787   {"pagecache_blocks_used",        (char*) &maria_pagecache_var.blocks_used, SHOW_LONG},
3788   {"pagecache_read_requests",      (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG},
3789   {"pagecache_reads",              (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG},
3790   {"pagecache_write_requests",     (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG},
3791   {"pagecache_writes",             (char*) &maria_pagecache_var.global_cache_write, SHOW_LONGLONG},
3792   {"transaction_log_syncs",        (char*) &translog_syncs, SHOW_LONGLONG},
3793   {NullS, NullS, SHOW_LONG}
3794 };
3795 
3796 /****************************************************************************
3797  * Maria MRR implementation: use DS-MRR
3798  ***************************************************************************/
3799 
multi_range_read_init(RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)3800 int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
3801                                     uint n_ranges, uint mode,
3802                                     HANDLER_BUFFER *buf)
3803 {
3804   return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
3805 }
3806 
multi_range_read_next(range_id_t * range_info)3807 int ha_maria::multi_range_read_next(range_id_t *range_info)
3808 {
3809   return ds_mrr.dsmrr_next(range_info);
3810 }
3811 
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)3812 ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
3813                                                void *seq_init_param,
3814                                                uint n_ranges, uint *bufsz,
3815                                                uint *flags, Cost_estimate *cost)
3816 {
3817   /*
3818     This call is here because there is no location where this->table would
3819     already be known.
3820     TODO: consider moving it into some per-query initialization call.
3821   */
3822   ds_mrr.init(this, table);
3823   return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
3824                                  flags, cost);
3825 }
3826 
multi_range_read_info(uint keyno,uint n_ranges,uint keys,uint key_parts,uint * bufsz,uint * flags,Cost_estimate * cost)3827 ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
3828                                        uint key_parts, uint *bufsz,
3829                                        uint *flags, Cost_estimate *cost)
3830 {
3831   ds_mrr.init(this, table);
3832   return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
3833 }
3834 
multi_range_read_explain_info(uint mrr_mode,char * str,size_t size)3835 int ha_maria::multi_range_read_explain_info(uint mrr_mode, char *str,
3836                                             size_t size)
3837 {
3838   return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
3839 }
3840 /* MyISAM MRR implementation ends */
3841 
3842 
3843 /* Index condition pushdown implementation*/
3844 
3845 
idx_cond_push(uint keyno_arg,Item * idx_cond_arg)3846 Item *ha_maria::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
3847 {
3848   /*
3849     Check if the key contains a blob field. If it does then MyISAM
3850     should not accept the pushed index condition since MyISAM will not
3851     read the blob field from the index entry during evaluation of the
3852     pushed index condition and the BLOB field might be part of the
3853     range evaluation done by the ICP code.
3854   */
3855   const KEY *key= &table_share->key_info[keyno_arg];
3856 
3857   for (uint k= 0; k < key->user_defined_key_parts; ++k)
3858   {
3859     const KEY_PART_INFO *key_part= &key->key_part[k];
3860     if (key_part->key_part_flag & HA_BLOB_PART)
3861     {
3862       /* Let the server handle the index condition */
3863       return idx_cond_arg;
3864     }
3865   }
3866 
3867   pushed_idx_cond_keyno= keyno_arg;
3868   pushed_idx_cond= idx_cond_arg;
3869   in_range_check_pushed_down= TRUE;
3870   if (active_index == pushed_idx_cond_keyno)
3871     ma_set_index_cond_func(file, handler_index_cond_check, this);
3872   return NULL;
3873 }
3874 
3875 /**
3876   Find record by unique constrain (used in temporary tables)
3877 
3878   @param record          (IN|OUT) the record to find
3879   @param constrain_no    (IN) number of constrain (for this engine)
3880 
3881   @note It is like hp_search but uses function for raw where hp_search
3882         uses functions for index.
3883 
3884   @retval  0 OK
3885   @retval  1 Not found
3886   @retval -1 Error
3887 */
3888 
find_unique_row(uchar * record,uint constrain_no)3889 int ha_maria::find_unique_row(uchar *record, uint constrain_no)
3890 {
3891   int rc;
3892   if (file->s->state.header.uniques)
3893   {
3894     DBUG_ASSERT(file->s->state.header.uniques > constrain_no);
3895     MARIA_UNIQUEDEF *def= file->s->uniqueinfo + constrain_no;
3896     ha_checksum unique_hash= _ma_unique_hash(def, record);
3897     rc= _ma_check_unique(file, def, record, unique_hash, HA_OFFSET_ERROR);
3898     if (rc)
3899     {
3900       file->cur_row.lastpos= file->dup_key_pos;
3901       if ((*file->read_record)(file, record, file->cur_row.lastpos))
3902         return -1;
3903       file->update|= HA_STATE_AKTIV;                     /* Record is read */
3904     }
3905     // invert logic
3906     rc= !MY_TEST(rc);
3907   }
3908   else
3909   {
3910     /*
3911      It is case when just unique index used instead unicue constrain
3912      (conversion from heap table).
3913      */
3914     DBUG_ASSERT(file->s->state.header.keys > constrain_no);
3915     MARIA_KEY key;
3916     file->once_flags|= USE_PACKED_KEYS;
3917     (*file->s->keyinfo[constrain_no].make_key)
3918       (file, &key, constrain_no, file->lastkey_buff2, record, 0, 0);
3919     rc= maria_rkey(file, record, constrain_no, key.data, key.data_length,
3920                    HA_READ_KEY_EXACT);
3921     rc= MY_TEST(rc);
3922   }
3923   return rc;
3924 }
3925 
3926 struct st_mysql_storage_engine maria_storage_engine=
3927 { MYSQL_HANDLERTON_INTERFACE_VERSION };
3928 
maria_declare_plugin(aria)3929 maria_declare_plugin(aria)
3930 {
3931   MYSQL_STORAGE_ENGINE_PLUGIN,
3932   &maria_storage_engine,
3933   "Aria",
3934   "Monty Program Ab",
3935   "Crash-safe tables with MyISAM heritage",
3936   PLUGIN_LICENSE_GPL,
3937   ha_maria_init,                /* Plugin Init      */
3938   NULL,                         /* Plugin Deinit    */
3939   0x0105,                       /* 1.5              */
3940   status_variables,             /* status variables */
3941   system_variables,             /* system variables */
3942   "1.5",                        /* string version   */
3943   MariaDB_PLUGIN_MATURITY_STABLE /* maturity         */
3944 }
3945 maria_declare_plugin_end;
3946