1 #ifndef HA_PARTITION_INCLUDED 2 #define HA_PARTITION_INCLUDED 3 4 /* 5 Copyright (c) 2005, 2012, Oracle and/or its affiliates. 6 Copyright (c) 2009, 2021, MariaDB Corporation. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; version 2 of the License. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ 20 21 #include "sql_partition.h" /* part_id_range, partition_element */ 22 #include "queues.h" /* QUEUE */ 23 24 struct Ordered_blob_storage 25 { 26 String blob; 27 bool set_read_value; Ordered_blob_storageOrdered_blob_storage28 Ordered_blob_storage() : set_read_value(false) 29 {} 30 }; 31 32 #define PARTITION_BYTES_IN_POS 2 33 #define ORDERED_PART_NUM_OFFSET sizeof(Ordered_blob_storage **) 34 #define ORDERED_REC_OFFSET (ORDERED_PART_NUM_OFFSET + PARTITION_BYTES_IN_POS) 35 36 37 /** Struct used for partition_name_hash */ 38 typedef struct st_part_name_def 39 { 40 uchar *partition_name; 41 uint length; 42 uint32 part_id; 43 my_bool is_subpart; 44 } PART_NAME_DEF; 45 46 /** class where to save partitions Handler_share's */ 47 class Parts_share_refs 48 { 49 public: 50 uint num_parts; /**< Size of ha_share array */ 51 Handler_share **ha_shares; /**< Storage for each part */ Parts_share_refs()52 Parts_share_refs() 53 { 54 num_parts= 0; 55 ha_shares= NULL; 56 } ~Parts_share_refs()57 ~Parts_share_refs() 58 { 59 uint i; 60 for (i= 0; i < num_parts; i++) 61 delete ha_shares[i]; 62 delete[] ha_shares; 63 } init(uint arg_num_parts)64 bool init(uint arg_num_parts) 65 { 66 DBUG_ASSERT(!num_parts && !ha_shares); 67 num_parts= arg_num_parts; 68 /* Allocate an array of Handler_share pointers */ 69 ha_shares= new Handler_share *[num_parts]; 70 if (!ha_shares) 71 { 72 num_parts= 0; 73 return true; 74 } 75 memset(ha_shares, 0, sizeof(Handler_share*) * num_parts); 76 return false; 77 } 78 }; 79 80 class ha_partition; 81 82 /* Partition Full Text Search info */ 83 struct st_partition_ft_info 84 { 85 struct _ft_vft *please; 86 st_partition_ft_info *next; 87 ha_partition *file; 88 FT_INFO **part_ft_info; 89 }; 90 91 92 #ifdef HAVE_PSI_MUTEX_INTERFACE 93 extern PSI_mutex_key key_partition_auto_inc_mutex; 94 #endif 95 96 /** 97 Partition specific Handler_share. 98 */ 99 class Partition_share : public Handler_share 100 { 101 public: 102 bool auto_inc_initialized; 103 mysql_mutex_t auto_inc_mutex; /**< protecting auto_inc val */ 104 ulonglong next_auto_inc_val; /**< first non reserved value */ 105 /** 106 Hash of partition names. Initialized in the first ha_partition::open() 107 for the table_share. After that it is read-only, i.e. no locking required. 108 */ 109 bool partition_name_hash_initialized; 110 HASH partition_name_hash; 111 /** Storage for each partitions Handler_share */ 112 Parts_share_refs partitions_share_refs; Partition_share()113 Partition_share() 114 : auto_inc_initialized(false), 115 next_auto_inc_val(0), 116 partition_name_hash_initialized(false), 117 partition_names(NULL) 118 { 119 mysql_mutex_init(key_partition_auto_inc_mutex, 120 &auto_inc_mutex, 121 MY_MUTEX_INIT_FAST); 122 } 123 ~Partition_share()124 ~Partition_share() 125 { 126 mysql_mutex_destroy(&auto_inc_mutex); 127 if (partition_names) 128 { 129 my_free(partition_names); 130 } 131 if (partition_name_hash_initialized) 132 { 133 my_hash_free(&partition_name_hash); 134 } 135 } 136 137 bool init(uint num_parts); 138 139 /** 140 Release reserved auto increment values not used. 141 @param thd Thread. 142 @param table_share Table Share 143 @param next_insert_id Next insert id (first non used auto inc value). 144 @param max_reserved End of reserved auto inc range. 145 */ 146 void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share, 147 const ulonglong next_insert_id, 148 const ulonglong max_reserved); 149 150 /** lock mutex protecting auto increment value next_auto_inc_val. */ lock_auto_inc()151 inline void lock_auto_inc() 152 { 153 mysql_mutex_lock(&auto_inc_mutex); 154 } 155 /** unlock mutex protecting auto increment value next_auto_inc_val. */ unlock_auto_inc()156 inline void unlock_auto_inc() 157 { 158 mysql_mutex_unlock(&auto_inc_mutex); 159 } 160 /** 161 Populate partition_name_hash with partition and subpartition names 162 from part_info. 163 @param part_info Partition info containing all partitions metadata. 164 165 @return Operation status. 166 @retval false Success. 167 @retval true Failure. 168 */ 169 bool populate_partition_name_hash(partition_info *part_info); 170 /** Get partition name. 171 172 @param part_id Partition id (for subpartitioned table only subpartition 173 names will be returned.) 174 175 @return partition name or NULL if error. 176 */ 177 const char *get_partition_name(size_t part_id) const; 178 private: 179 const uchar **partition_names; 180 /** 181 Insert [sub]partition name into partition_name_hash 182 @param name Partition name. 183 @param part_id Partition id. 184 @param is_subpart True if subpartition else partition. 185 186 @return Operation status. 187 @retval false Success. 188 @retval true Failure. 189 */ 190 bool insert_partition_name_in_hash(const char *name, 191 uint part_id, 192 bool is_subpart); 193 }; 194 195 196 /* 197 List of ranges to be scanned by ha_partition's MRR implementation 198 199 This object is 200 - A KEY_MULTI_RANGE structure (the MRR range) 201 - Storage for the range endpoints that the KEY_MULTI_RANGE has pointers to 202 - list of such ranges (connected through the "next" pointer). 203 */ 204 205 typedef struct st_partition_key_multi_range 206 { 207 /* 208 Number of the range. The ranges are numbered in the order RANGE_SEQ_IF has 209 emitted them, starting from 1. The numbering in used by ordered MRR scans. 210 */ 211 uint id; 212 uchar *key[2]; 213 /* 214 Sizes of allocated memory in key[]. These may be larger then the actual 215 values as this structure is reused across MRR scans 216 */ 217 uint length[2]; 218 219 /* 220 The range. 221 key_multi_range.ptr is a pointer to the this PARTITION_KEY_MULTI_RANGE 222 object 223 */ 224 KEY_MULTI_RANGE key_multi_range; 225 226 // Range id from the SQL layer 227 range_id_t ptr; 228 229 // The next element in the list of MRR ranges. 230 st_partition_key_multi_range *next; 231 } PARTITION_KEY_MULTI_RANGE; 232 233 234 /* 235 List of ranges to be scanned in a certain [sub]partition 236 237 The idea is that there's a list of ranges to be scanned in the table 238 (formed by PARTITION_KEY_MULTI_RANGE structures), 239 and for each [sub]partition, we only need to scan a subset of that list. 240 241 PKMR1 --> PKMR2 --> PKMR3 -->... // list of PARTITION_KEY_MULTI_RANGE 242 ^ ^ 243 | | 244 PPKMR1 ----------> PPKMR2 -->... // list of PARTITION_PART_KEY_MULTI_RANGE 245 246 This way, per-partition lists of PARTITION_PART_KEY_MULTI_RANGE have pointers 247 to the elements of the global list of PARTITION_KEY_MULTI_RANGE. 248 */ 249 250 typedef struct st_partition_part_key_multi_range 251 { 252 PARTITION_KEY_MULTI_RANGE *partition_key_multi_range; 253 st_partition_part_key_multi_range *next; 254 } PARTITION_PART_KEY_MULTI_RANGE; 255 256 257 class ha_partition; 258 259 /* 260 The structure holding information about range sequence to be used with one 261 partition. 262 (pointer to this is used as seq_init_param for RANGE_SEQ_IF structure when 263 invoking MRR for an individual partition) 264 */ 265 266 typedef struct st_partition_part_key_multi_range_hld 267 { 268 /* Owner object */ 269 ha_partition *partition; 270 271 /* id of the the partition this structure is for */ 272 uint32 part_id; 273 274 /* Current range we're iterating through */ 275 PARTITION_PART_KEY_MULTI_RANGE *partition_part_key_multi_range; 276 } PARTITION_PART_KEY_MULTI_RANGE_HLD; 277 278 279 extern "C" int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2); 280 extern "C" int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); 281 282 class ha_partition :public handler 283 { 284 private: 285 enum partition_index_scan_type 286 { 287 partition_index_read= 0, 288 partition_index_first= 1, 289 partition_index_last= 3, 290 partition_index_read_last= 4, 291 partition_read_range = 5, 292 partition_no_index_scan= 6, 293 partition_read_multi_range = 7, 294 partition_ft_read= 8 295 }; 296 /* Data for the partition handler */ 297 int m_mode; // Open mode 298 uint m_open_test_lock; // Open test_if_locked 299 uchar *m_file_buffer; // Content of the .par file 300 char *m_name_buffer_ptr; // Pointer to first partition name 301 MEM_ROOT m_mem_root; 302 plugin_ref *m_engine_array; // Array of types of the handlers 303 handler **m_file; // Array of references to handler inst. 304 uint m_file_tot_parts; // Debug 305 handler **m_new_file; // Array of references to new handlers 306 handler **m_reorged_file; // Reorganised partitions 307 handler **m_added_file; // Added parts kept for errors 308 LEX_CSTRING *m_connect_string; 309 partition_info *m_part_info; // local reference to partition 310 Field **m_part_field_array; // Part field array locally to save acc 311 uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan 312 st_partition_ft_info *ft_first; 313 st_partition_ft_info *ft_current; 314 /* 315 Current index. 316 When used in key_rec_cmp: If clustered pk, index compare 317 must compare pk if given index is same for two rows. 318 So normally m_curr_key_info[0]= current index and m_curr_key[1]= NULL, 319 and if clustered pk, [0]= current index, [1]= pk, [2]= NULL 320 */ 321 KEY *m_curr_key_info[3]; // Current index 322 uchar *m_rec0; // table->record[0] 323 const uchar *m_err_rec; // record which gave error 324 QUEUE m_queue; // Prio queue used by sorted read 325 326 /* 327 Length of an element in m_ordered_rec_buffer. The elements are composed of 328 329 [part_no] [table->record copy] [underlying_table_rowid] 330 331 underlying_table_rowid is only stored when the table has no extended keys. 332 */ 333 size_t m_priority_queue_rec_len; 334 335 /* 336 If true, then sorting records by key value also sorts them by their 337 underlying_table_rowid. 338 */ 339 bool m_using_extended_keys; 340 341 /* 342 Since the partition handler is a handler on top of other handlers, it 343 is necessary to keep information about what the underlying handler 344 characteristics is. It is not possible to keep any handler instances 345 for this since the MySQL Server sometimes allocating the handler object 346 without freeing them. 347 */ 348 enum enum_handler_status 349 { 350 handler_not_initialized= 0, 351 handler_initialized, 352 handler_opened, 353 handler_closed 354 }; 355 enum_handler_status m_handler_status; 356 357 uint m_reorged_parts; // Number of reorganised parts 358 uint m_tot_parts; // Total number of partitions; 359 uint m_num_locks; // For engines like ha_blackhole, which needs no locks 360 uint m_last_part; // Last file that we update,write,read 361 part_id_range m_part_spec; // Which parts to scan 362 uint m_scan_value; // Value passed in rnd_init 363 // call 364 uint m_ref_length; // Length of position in this 365 // handler object 366 key_range m_start_key; // index read key range 367 enum partition_index_scan_type m_index_scan_type;// What type of index 368 // scan 369 uint m_top_entry; // Which partition is to 370 // deliver next result 371 uint m_rec_length; // Local copy of record length 372 373 bool m_ordered; // Ordered/Unordered index scan 374 bool m_pkey_is_clustered; // Is primary key clustered 375 bool m_create_handler; // Handler used to create table 376 bool m_is_sub_partitioned; // Is subpartitioned 377 bool m_ordered_scan_ongoing; 378 bool m_rnd_init_and_first; 379 bool m_ft_init_and_first; 380 381 /* 382 If set, this object was created with ha_partition::clone and doesn't 383 "own" the m_part_info structure. 384 */ 385 ha_partition *m_is_clone_of; 386 MEM_ROOT *m_clone_mem_root; 387 388 /* 389 We keep track if all underlying handlers are MyISAM since MyISAM has a 390 great number of extra flags not needed by other handlers. 391 */ 392 bool m_myisam; // Are all underlying handlers 393 // MyISAM 394 /* 395 We keep track of InnoDB handlers below since it requires proper setting 396 of query_id in fields at index_init and index_read calls. 397 */ 398 bool m_innodb; // Are all underlying handlers 399 // InnoDB 400 /* 401 When calling extra(HA_EXTRA_CACHE) we do not pass this to the underlying 402 handlers immediately. Instead we cache it and call the underlying 403 immediately before starting the scan on the partition. This is to 404 prevent allocating a READ CACHE for each partition in parallel when 405 performing a full table scan on MyISAM partitioned table. 406 This state is cleared by extra(HA_EXTRA_NO_CACHE). 407 */ 408 bool m_extra_cache; 409 uint m_extra_cache_size; 410 /* The same goes for HA_EXTRA_PREPARE_FOR_UPDATE */ 411 bool m_extra_prepare_for_update; 412 /* Which partition has active cache */ 413 uint m_extra_cache_part_id; 414 415 void init_handler_variables(); 416 /* 417 Variables for lock structures. 418 */ 419 420 bool auto_increment_lock; /**< lock reading/updating auto_inc */ 421 /** 422 Flag to keep the auto_increment lock through out the statement. 423 This to ensure it will work with statement based replication. 424 */ 425 bool auto_increment_safe_stmt_log_lock; 426 /** For optimizing ha_start_bulk_insert calls */ 427 MY_BITMAP m_bulk_insert_started; 428 ha_rows m_bulk_inserted_rows; 429 /** used for prediction of start_bulk_insert rows */ 430 enum_monotonicity_info m_part_func_monotonicity_info; 431 part_id_range m_direct_update_part_spec; 432 bool m_pre_calling; 433 bool m_pre_call_use_parallel; 434 /* Keep track of bulk access requests */ 435 bool bulk_access_executing; 436 437 /** keep track of locked partitions */ 438 MY_BITMAP m_locked_partitions; 439 /** Stores shared auto_increment etc. */ 440 Partition_share *part_share; 441 /** Temporary storage for new partitions Handler_shares during ALTER */ 442 List<Parts_share_refs> m_new_partitions_share_refs; 443 /** Sorted array of partition ids in descending order of number of rows. */ 444 uint32 *m_part_ids_sorted_by_num_of_records; 445 /* Compare function for my_qsort2, for reversed order. */ 446 static int compare_number_of_records(ha_partition *me, 447 const uint32 *a, 448 const uint32 *b); 449 /** keep track of partitions to call ha_reset */ 450 MY_BITMAP m_partitions_to_reset; 451 /** partitions that returned HA_ERR_KEY_NOT_FOUND. */ 452 MY_BITMAP m_key_not_found_partitions; 453 bool m_key_not_found; 454 List<String> *m_partitions_to_open; 455 MY_BITMAP m_opened_partitions; 456 /** This is one of the m_file-s that it guaranteed to be opened. */ 457 /** It is set in open_read_partitions() */ 458 handler *m_file_sample; 459 public: get_child_handlers()460 handler **get_child_handlers() 461 { 462 return m_file; 463 } get_part_spec()464 virtual part_id_range *get_part_spec() 465 { 466 return &m_part_spec; 467 } get_no_current_part_id()468 virtual uint get_no_current_part_id() 469 { 470 return NO_CURRENT_PART_ID; 471 } get_part_share()472 Partition_share *get_part_share() { return part_share; } 473 handler *clone(const char *name, MEM_ROOT *mem_root); set_part_info(partition_info * part_info)474 virtual void set_part_info(partition_info *part_info) 475 { 476 m_part_info= part_info; 477 m_is_sub_partitioned= part_info->is_sub_partitioned(); 478 } 479 480 virtual void return_record_by_parent(); 481 vers_can_native(THD * thd)482 virtual bool vers_can_native(THD *thd) 483 { 484 if (thd->lex->part_info) 485 { 486 // PARTITION BY SYSTEM_TIME is not supported for now 487 return thd->lex->part_info->part_type != VERSIONING_PARTITION; 488 } 489 else 490 { 491 bool can= true; 492 for (uint i= 0; i < m_tot_parts && can; i++) 493 can= can && m_file[i]->vers_can_native(thd); 494 return can; 495 } 496 } 497 498 /* 499 ------------------------------------------------------------------------- 500 MODULE create/delete handler object 501 ------------------------------------------------------------------------- 502 Object create/delete method. Normally called when a table object 503 exists. There is also a method to create the handler object with only 504 partition information. This is used from mysql_create_table when the 505 table is to be created and the engine type is deduced to be the 506 partition handler. 507 ------------------------------------------------------------------------- 508 */ 509 ha_partition(handlerton *hton, TABLE_SHARE * table); 510 ha_partition(handlerton *hton, partition_info * part_info); 511 ha_partition(handlerton *hton, TABLE_SHARE *share, 512 partition_info *part_info_arg, 513 ha_partition *clone_arg, 514 MEM_ROOT *clone_mem_root_arg); 515 ~ha_partition(); 516 void ha_partition_init(); 517 /* 518 A partition handler has no characteristics in itself. It only inherits 519 those from the underlying handlers. Here we set-up those constants to 520 enable later calls of the methods to retrieve constants from the under- 521 lying handlers. Returns false if not successful. 522 */ 523 bool initialize_partition(MEM_ROOT *mem_root); 524 525 /* 526 ------------------------------------------------------------------------- 527 MODULE meta data changes 528 ------------------------------------------------------------------------- 529 Meta data routines to CREATE, DROP, RENAME table and often used at 530 ALTER TABLE (update_create_info used from ALTER TABLE and SHOW ..). 531 532 create_partitioning_metadata is called before opening a new handler object 533 with openfrm to call create. It is used to create any local handler 534 object needed in opening the object in openfrm 535 ------------------------------------------------------------------------- 536 */ 537 virtual int delete_table(const char *from); 538 virtual int rename_table(const char *from, const char *to); 539 virtual int create(const char *name, TABLE *form, 540 HA_CREATE_INFO *create_info); 541 virtual int create_partitioning_metadata(const char *name, 542 const char *old_name, int action_flag); 543 virtual void update_create_info(HA_CREATE_INFO *create_info); 544 virtual int change_partitions(HA_CREATE_INFO *create_info, 545 const char *path, 546 ulonglong * const copied, 547 ulonglong * const deleted, 548 const uchar *pack_frm_data, 549 size_t pack_frm_len); 550 virtual int drop_partitions(const char *path); 551 virtual int rename_partitions(const char *path); get_no_parts(const char * name,uint * num_parts)552 bool get_no_parts(const char *name, uint *num_parts) 553 { 554 DBUG_ENTER("ha_partition::get_no_parts"); 555 *num_parts= m_tot_parts; 556 DBUG_RETURN(0); 557 } 558 virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share); 559 virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info, 560 uint table_changes); update_part_create_info(HA_CREATE_INFO * create_info,uint part_id)561 void update_part_create_info(HA_CREATE_INFO *create_info, uint part_id) 562 { 563 m_file[part_id]->update_create_info(create_info); 564 } 565 private: 566 int copy_partitions(ulonglong * const copied, ulonglong * const deleted); 567 void cleanup_new_partition(uint part_count); 568 int prepare_new_partition(TABLE *table, HA_CREATE_INFO *create_info, 569 handler *file, const char *part_name, 570 partition_element *p_elem, 571 uint disable_non_uniq_indexes); 572 /* 573 delete_table and rename_table uses very similar logic which 574 is packed into this routine. 575 */ 576 uint del_ren_table(const char *from, const char *to); 577 /* 578 One method to create the table_name.par file containing the names of the 579 underlying partitions, their engine and the number of partitions. 580 And one method to read it in. 581 */ 582 bool create_handler_file(const char *name); 583 bool setup_engine_array(MEM_ROOT *mem_root); 584 bool read_par_file(const char *name); 585 bool get_from_handler_file(const char *name, MEM_ROOT *mem_root, 586 bool is_clone); 587 bool new_handlers_from_part_info(MEM_ROOT *mem_root); 588 bool create_handlers(MEM_ROOT *mem_root); 589 void clear_handler_file(); 590 int set_up_table_before_create(TABLE *table_arg, 591 const char *partition_name_with_path, 592 HA_CREATE_INFO *info, 593 partition_element *p_elem); 594 partition_element *find_partition_element(uint part_id); 595 bool insert_partition_name_in_hash(const char *name, uint part_id, 596 bool is_subpart); 597 bool populate_partition_name_hash(); 598 Partition_share *get_share(); 599 bool set_ha_share_ref(Handler_share **ha_share); 600 void fix_data_dir(char* path); 601 bool init_partition_bitmaps(); 602 void free_partition_bitmaps(); 603 604 public: 605 606 /* 607 ------------------------------------------------------------------------- 608 MODULE open/close object 609 ------------------------------------------------------------------------- 610 Open and close handler object to ensure all underlying files and 611 objects allocated and deallocated for query handling is handled 612 properly. 613 ------------------------------------------------------------------------- 614 615 A handler object is opened as part of its initialisation and before 616 being used for normal queries (not before meta-data changes always. 617 If the object was opened it will also be closed before being deleted. 618 */ 619 virtual int open(const char *name, int mode, uint test_if_locked); 620 virtual int close(void); 621 622 /* 623 ------------------------------------------------------------------------- 624 MODULE start/end statement 625 ------------------------------------------------------------------------- 626 This module contains methods that are used to understand start/end of 627 statements, transaction boundaries, and aid for proper concurrency 628 control. 629 The partition handler need not implement abort and commit since this 630 will be handled by any underlying handlers implementing transactions. 631 There is only one call to each handler type involved per transaction 632 and these go directly to the handlers supporting transactions 633 ------------------------------------------------------------------------- 634 */ 635 virtual THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, 636 enum thr_lock_type lock_type); 637 virtual int external_lock(THD * thd, int lock_type); engine_name()638 LEX_CSTRING *engine_name() { return hton_name(partition_ht()); } 639 /* 640 When table is locked a statement is started by calling start_stmt 641 instead of external_lock 642 */ 643 virtual int start_stmt(THD * thd, thr_lock_type lock_type); 644 /* 645 Lock count is number of locked underlying handlers (I assume) 646 */ 647 virtual uint lock_count(void) const; 648 /* 649 Call to unlock rows not to be updated in transaction 650 */ 651 virtual void unlock_row(); 652 /* 653 Check if semi consistent read 654 */ 655 virtual bool was_semi_consistent_read(); 656 /* 657 Call to hint about semi consistent read 658 */ 659 virtual void try_semi_consistent_read(bool); 660 661 /* 662 NOTE: due to performance and resource issues with many partitions, 663 we only use the m_psi on the ha_partition handler, excluding all 664 partitions m_psi. 665 */ 666 #ifdef HAVE_M_PSI_PER_PARTITION 667 /* 668 Bind the table/handler thread to track table i/o. 669 */ 670 virtual void unbind_psi(); 671 virtual void rebind_psi(); 672 #endif 673 /* 674 ------------------------------------------------------------------------- 675 MODULE change record 676 ------------------------------------------------------------------------- 677 This part of the handler interface is used to change the records 678 after INSERT, DELETE, UPDATE, REPLACE method calls but also other 679 special meta-data operations as ALTER TABLE, LOAD DATA, TRUNCATE. 680 ------------------------------------------------------------------------- 681 682 These methods are used for insert (write_row), update (update_row) 683 and delete (delete_row). All methods to change data always work on 684 one row at a time. update_row and delete_row also contains the old 685 row. 686 delete_all_rows will delete all rows in the table in one call as a 687 special optimisation for DELETE from table; 688 689 Bulk inserts are supported if all underlying handlers support it. 690 start_bulk_insert and end_bulk_insert is called before and after a 691 number of calls to write_row. 692 */ 693 virtual int write_row(uchar * buf); 694 virtual bool start_bulk_update(); 695 virtual int exec_bulk_update(ha_rows *dup_key_found); 696 virtual int end_bulk_update(); 697 virtual int bulk_update_row(const uchar *old_data, const uchar *new_data, 698 ha_rows *dup_key_found); 699 virtual int update_row(const uchar * old_data, const uchar * new_data); 700 virtual int direct_update_rows_init(List<Item> *update_fields); 701 virtual int pre_direct_update_rows_init(List<Item> *update_fields); 702 virtual int direct_update_rows(ha_rows *update_rows); 703 virtual int pre_direct_update_rows(); 704 virtual bool start_bulk_delete(); 705 virtual int end_bulk_delete(); 706 virtual int delete_row(const uchar * buf); 707 virtual int direct_delete_rows_init(); 708 virtual int pre_direct_delete_rows_init(); 709 virtual int direct_delete_rows(ha_rows *delete_rows); 710 virtual int pre_direct_delete_rows(); 711 virtual int delete_all_rows(void); 712 virtual int truncate(); 713 virtual void start_bulk_insert(ha_rows rows, uint flags); 714 virtual int end_bulk_insert(); 715 private: 716 ha_rows guess_bulk_insert_rows(); 717 void start_part_bulk_insert(THD *thd, uint part_id); 718 long estimate_read_buffer_size(long original_size); 719 public: 720 721 /* 722 Method for truncating a specific partition. 723 (i.e. ALTER TABLE t1 TRUNCATE PARTITION p). 724 725 @remark This method is a partitioning-specific hook 726 and thus not a member of the general SE API. 727 */ 728 int truncate_partition(Alter_info *, bool *binlog_stmt); 729 is_fatal_error(int error,uint flags)730 virtual bool is_fatal_error(int error, uint flags) 731 { 732 if (!handler::is_fatal_error(error, flags) || 733 error == HA_ERR_NO_PARTITION_FOUND || 734 error == HA_ERR_NOT_IN_LOCK_PARTITIONS) 735 return FALSE; 736 return TRUE; 737 } 738 739 740 /* 741 ------------------------------------------------------------------------- 742 MODULE full table scan 743 ------------------------------------------------------------------------- 744 This module is used for the most basic access method for any table 745 handler. This is to fetch all data through a full table scan. No 746 indexes are needed to implement this part. 747 It contains one method to start the scan (rnd_init) that can also be 748 called multiple times (typical in a nested loop join). Then proceeding 749 to the next record (rnd_next) and closing the scan (rnd_end). 750 To remember a record for later access there is a method (position) 751 and there is a method used to retrieve the record based on the stored 752 position. 753 The position can be a file position, a primary key, a ROWID dependent 754 on the handler below. 755 ------------------------------------------------------------------------- 756 */ 757 /* 758 unlike index_init(), rnd_init() can be called two times 759 without rnd_end() in between (it only makes sense if scan=1). 760 then the second call should prepare for the new table scan 761 (e.g if rnd_init allocates the cursor, second call should 762 position it to the start of the table, no need to deallocate 763 and allocate it again 764 */ 765 virtual int rnd_init(bool scan); 766 virtual int rnd_end(); 767 virtual int rnd_next(uchar * buf); 768 virtual int rnd_pos(uchar * buf, uchar * pos); 769 virtual int rnd_pos_by_record(uchar *record); 770 virtual void position(const uchar * record); 771 772 /* 773 ------------------------------------------------------------------------- 774 MODULE index scan 775 ------------------------------------------------------------------------- 776 This part of the handler interface is used to perform access through 777 indexes. The interface is defined as a scan interface but the handler 778 can also use key lookup if the index is a unique index or a primary 779 key index. 780 Index scans are mostly useful for SELECT queries but are an important 781 part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT 782 and so forth. 783 Naturally an index is needed for an index scan and indexes can either 784 be ordered, hash based. Some ordered indexes can return data in order 785 but not necessarily all of them. 786 There are many flags that define the behavior of indexes in the 787 various handlers. These methods are found in the optimizer module. 788 ------------------------------------------------------------------------- 789 790 index_read is called to start a scan of an index. The find_flag defines 791 the semantics of the scan. These flags are defined in 792 include/my_base.h 793 index_read_idx is the same but also initializes index before calling doing 794 the same thing as index_read. Thus it is similar to index_init followed 795 by index_read. This is also how we implement it. 796 797 index_read/index_read_idx does also return the first row. Thus for 798 key lookups, the index_read will be the only call to the handler in 799 the index scan. 800 801 index_init initializes an index before using it and index_end does 802 any end processing needed. 803 */ 804 virtual int index_read_map(uchar * buf, const uchar * key, 805 key_part_map keypart_map, 806 enum ha_rkey_function find_flag); 807 virtual int index_init(uint idx, bool sorted); 808 virtual int index_end(); 809 810 /** 811 @breif 812 Positions an index cursor to the index specified in the handle. Fetches the 813 row if available. If the key value is null, begin at first key of the 814 index. 815 */ 816 virtual int index_read_idx_map(uchar *buf, uint index, const uchar *key, 817 key_part_map keypart_map, 818 enum ha_rkey_function find_flag); 819 /* 820 These methods are used to jump to next or previous entry in the index 821 scan. There are also methods to jump to first and last entry. 822 */ 823 virtual int index_next(uchar * buf); 824 virtual int index_prev(uchar * buf); 825 virtual int index_first(uchar * buf); 826 virtual int index_last(uchar * buf); 827 virtual int index_next_same(uchar * buf, const uchar * key, uint keylen); 828 829 int index_read_last_map(uchar *buf, 830 const uchar *key, 831 key_part_map keypart_map); 832 833 /* 834 read_first_row is virtual method but is only implemented by 835 handler.cc, no storage engine has implemented it so neither 836 will the partition handler. 837 838 virtual int read_first_row(uchar *buf, uint primary_key); 839 */ 840 841 842 virtual int read_range_first(const key_range * start_key, 843 const key_range * end_key, 844 bool eq_range, bool sorted); 845 virtual int read_range_next(); 846 847 848 HANDLER_BUFFER *m_mrr_buffer; 849 uint *m_mrr_buffer_size; 850 uchar *m_mrr_full_buffer; 851 uint m_mrr_full_buffer_size; 852 uint m_mrr_new_full_buffer_size; 853 MY_BITMAP m_mrr_used_partitions; 854 uint *m_stock_range_seq; 855 /* not used: uint m_current_range_seq; */ 856 857 /* Value of mrr_mode passed to ha_partition::multi_range_read_init */ 858 uint m_mrr_mode; 859 860 /* Value of n_ranges passed to ha_partition::multi_range_read_init */ 861 uint m_mrr_n_ranges; 862 863 /* 864 Ordered MRR mode: m_range_info[N] has the range_id of the last record that 865 we've got from partition N 866 */ 867 range_id_t *m_range_info; 868 869 /* 870 TRUE <=> This ha_partition::multi_range_read_next() call is the first one 871 */ 872 bool m_multi_range_read_first; 873 874 /* not used: uint m_mrr_range_init_flags; */ 875 876 /* Number of elements in the list pointed by m_mrr_range_first. Not used */ 877 uint m_mrr_range_length; 878 879 /* Linked list of ranges to scan */ 880 PARTITION_KEY_MULTI_RANGE *m_mrr_range_first; 881 PARTITION_KEY_MULTI_RANGE *m_mrr_range_current; 882 883 /* 884 For each partition: number of ranges MRR scan will scan in the partition 885 */ 886 uint *m_part_mrr_range_length; 887 888 /* For each partition: List of ranges to scan in this partition */ 889 PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_first; 890 PARTITION_PART_KEY_MULTI_RANGE **m_part_mrr_range_current; 891 PARTITION_PART_KEY_MULTI_RANGE_HLD *m_partition_part_key_multi_range_hld; 892 893 /* 894 Sequence of ranges to be scanned (TODO: why not store this in 895 handler::mrr_{iter,funcs}?) 896 */ 897 range_seq_t m_seq; 898 RANGE_SEQ_IF *m_seq_if; 899 900 /* Range iterator structure to be supplied to partitions */ 901 RANGE_SEQ_IF m_part_seq_if; 902 903 virtual int multi_range_key_create_key( 904 RANGE_SEQ_IF *seq, 905 range_seq_t seq_it 906 ); 907 virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, 908 void *seq_init_param, 909 uint n_ranges, uint *bufsz, 910 uint *mrr_mode, 911 Cost_estimate *cost); 912 virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, 913 uint key_parts, uint *bufsz, 914 uint *mrr_mode, Cost_estimate *cost); 915 virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, 916 uint n_ranges, uint mrr_mode, 917 HANDLER_BUFFER *buf); 918 virtual int multi_range_read_next(range_id_t *range_info); 919 virtual int multi_range_read_explain_info(uint mrr_mode, char *str, 920 size_t size); last_part()921 uint last_part() { return m_last_part; } 922 923 private: 924 bool init_record_priority_queue(); 925 void destroy_record_priority_queue(); 926 int common_index_read(uchar * buf, bool have_start_key); 927 int common_first_last(uchar * buf); 928 int partition_scan_set_up(uchar * buf, bool idx_read_flag); 929 bool check_parallel_search(); 930 int handle_pre_scan(bool reverse_order, bool use_parallel); 931 int handle_unordered_next(uchar * buf, bool next_same); 932 int handle_unordered_scan_next_partition(uchar * buf); 933 int handle_ordered_index_scan(uchar * buf, bool reverse_order); 934 int handle_ordered_index_scan_key_not_found(); 935 int handle_ordered_next(uchar * buf, bool next_same); 936 int handle_ordered_prev(uchar * buf); 937 void return_top_record(uchar * buf); 938 void swap_blobs(uchar* rec_buf, Ordered_blob_storage ** storage, bool restore); 939 public: 940 /* 941 ------------------------------------------------------------------------- 942 MODULE information calls 943 ------------------------------------------------------------------------- 944 This calls are used to inform the handler of specifics of the ongoing 945 scans and other actions. Most of these are used for optimisation 946 purposes. 947 ------------------------------------------------------------------------- 948 */ 949 virtual int info(uint); 950 void get_dynamic_partition_info(PARTITION_STATS *stat_info, 951 uint part_id); 952 void set_partitions_to_open(List<String> *partition_names); 953 int change_partitions_to_open(List<String> *partition_names); 954 int open_read_partitions(char *name_buff, size_t name_buff_size); 955 virtual int extra(enum ha_extra_function operation); 956 virtual int extra_opt(enum ha_extra_function operation, ulong arg); 957 virtual int reset(void); 958 virtual uint count_query_cache_dependant_tables(uint8 *tables_type); 959 virtual my_bool 960 register_query_cache_dependant_tables(THD *thd, 961 Query_cache *cache, 962 Query_cache_block_table **block, 963 uint *n); 964 965 private: 966 typedef int handler_callback(handler *, void *); 967 968 my_bool reg_query_cache_dependant_table(THD *thd, 969 char *engine_key, 970 uint engine_key_len, 971 char *query_key, uint query_key_len, 972 uint8 type, 973 Query_cache *cache, 974 Query_cache_block_table 975 **block_table, 976 handler *file, uint *n); 977 static const uint NO_CURRENT_PART_ID= NOT_A_PARTITION_ID; 978 int loop_partitions(handler_callback callback, void *param); 979 int loop_extra_alter(enum ha_extra_function operations); 980 void late_extra_cache(uint partition_id); 981 void late_extra_no_cache(uint partition_id); 982 void prepare_extra_cache(uint cachesize); get_open_file_sample()983 handler *get_open_file_sample() const { return m_file_sample; } 984 public: 985 986 /* 987 ------------------------------------------------------------------------- 988 MODULE optimiser support 989 ------------------------------------------------------------------------- 990 ------------------------------------------------------------------------- 991 */ 992 993 /* 994 NOTE !!!!!! 995 ------------------------------------------------------------------------- 996 ------------------------------------------------------------------------- 997 One important part of the public handler interface that is not depicted in 998 the methods is the attribute records 999 1000 which is defined in the base class. This is looked upon directly and is 1001 set by calling info(HA_STATUS_INFO) ? 1002 ------------------------------------------------------------------------- 1003 */ 1004 1005 private: 1006 /* Helper functions for optimizer hints. */ 1007 ha_rows min_rows_for_estimate(); 1008 uint get_biggest_used_partition(uint *part_index); 1009 public: 1010 1011 /* 1012 keys_to_use_for_scanning can probably be implemented as the 1013 intersection of all underlying handlers if mixed handlers are used. 1014 This method is used to derive whether an index can be used for 1015 index-only scanning when performing an ORDER BY query. 1016 Only called from one place in sql_select.cc 1017 */ 1018 virtual const key_map *keys_to_use_for_scanning(); 1019 1020 /* 1021 Called in test_quick_select to determine if indexes should be used. 1022 */ 1023 virtual double scan_time(); 1024 1025 /* 1026 The next method will never be called if you do not implement indexes. 1027 */ 1028 virtual double read_time(uint index, uint ranges, ha_rows rows); 1029 /* 1030 For the given range how many records are estimated to be in this range. 1031 Used by optimiser to calculate cost of using a particular index. 1032 */ 1033 virtual ha_rows records_in_range(uint inx, key_range * min_key, 1034 key_range * max_key); 1035 1036 /* 1037 Upper bound of number records returned in scan is sum of all 1038 underlying handlers. 1039 */ 1040 virtual ha_rows estimate_rows_upper_bound(); 1041 1042 /* 1043 table_cache_type is implemented by the underlying handler but all 1044 underlying handlers must have the same implementation for it to work. 1045 */ 1046 virtual uint8 table_cache_type(); 1047 virtual ha_rows records(); 1048 1049 /* Calculate hash value for PARTITION BY KEY tables. */ 1050 static uint32 calculate_key_hash_value(Field **field_array); 1051 1052 /* 1053 ------------------------------------------------------------------------- 1054 MODULE print messages 1055 ------------------------------------------------------------------------- 1056 This module contains various methods that returns text messages for 1057 table types, index type and error messages. 1058 ------------------------------------------------------------------------- 1059 */ 1060 /* 1061 The name of the index type that will be used for display 1062 Here we must ensure that all handlers use the same index type 1063 for each index created. 1064 */ 1065 virtual const char *index_type(uint inx); 1066 1067 /* The name of the table type that will be used for display purposes */ 1068 virtual const char *table_type() const; 1069 1070 /* The name of the row type used for the underlying tables. */ 1071 virtual enum row_type get_row_type() const; 1072 1073 /* 1074 Handler specific error messages 1075 */ 1076 virtual void print_error(int error, myf errflag); 1077 virtual bool get_error_message(int error, String * buf); 1078 /* 1079 ------------------------------------------------------------------------- 1080 MODULE handler characteristics 1081 ------------------------------------------------------------------------- 1082 This module contains a number of methods defining limitations and 1083 characteristics of the handler. The partition handler will calculate 1084 this characteristics based on underlying handler characteristics. 1085 ------------------------------------------------------------------------- 1086 1087 This is a list of flags that says what the storage engine 1088 implements. The current table flags are documented in handler.h 1089 The partition handler will support whatever the underlying handlers 1090 support except when specifically mentioned below about exceptions 1091 to this rule. 1092 NOTE: This cannot be cached since it can depend on TRANSACTION ISOLATION 1093 LEVEL which is dynamic, see bug#39084. 1094 1095 HA_READ_RND_SAME: 1096 Not currently used. (Means that the handler supports the rnd_same() call) 1097 (MyISAM, HEAP) 1098 1099 HA_TABLE_SCAN_ON_INDEX: 1100 Used to avoid scanning full tables on an index. If this flag is set then 1101 the handler always has a primary key (hidden if not defined) and this 1102 index is used for scanning rather than a full table scan in all 1103 situations. 1104 (InnoDB, Federated) 1105 1106 HA_REC_NOT_IN_SEQ: 1107 This flag is set for handlers that cannot guarantee that the rows are 1108 returned according to incremental positions (0, 1, 2, 3...). 1109 This also means that rnd_next() should return HA_ERR_RECORD_DELETED 1110 if it finds a deleted row. 1111 (MyISAM (not fixed length row), HEAP, InnoDB) 1112 1113 HA_CAN_GEOMETRY: 1114 Can the storage engine handle spatial data. 1115 Used to check that no spatial attributes are declared unless 1116 the storage engine is capable of handling it. 1117 (MyISAM) 1118 1119 HA_FAST_KEY_READ: 1120 Setting this flag indicates that the handler is equally fast in 1121 finding a row by key as by position. 1122 This flag is used in a very special situation in conjunction with 1123 filesort's. For further explanation see intro to init_read_record. 1124 (HEAP, InnoDB) 1125 1126 HA_NULL_IN_KEY: 1127 Is NULL values allowed in indexes. 1128 If this is not allowed then it is not possible to use an index on a 1129 NULLable field. 1130 (HEAP, MyISAM, InnoDB) 1131 1132 HA_DUPLICATE_POS: 1133 Tells that we can the position for the conflicting duplicate key 1134 record is stored in table->file->dupp_ref. (insert uses rnd_pos() on 1135 this to find the duplicated row) 1136 (MyISAM) 1137 1138 HA_CAN_INDEX_BLOBS: 1139 Is the storage engine capable of defining an index of a prefix on 1140 a BLOB attribute. 1141 (Federated, MyISAM, InnoDB) 1142 1143 HA_AUTO_PART_KEY: 1144 Auto increment fields can be part of a multi-part key. For second part 1145 auto-increment keys, the auto_incrementing is done in handler.cc 1146 (Federated, MyISAM) 1147 1148 HA_REQUIRE_PRIMARY_KEY: 1149 Can't define a table without primary key (and cannot handle a table 1150 with hidden primary key) 1151 (No handler has this limitation currently) 1152 1153 HA_WANTS_PRIMARY_KEY: 1154 Can't define a table without primary key except sequences 1155 (Only InnoDB has this when using innodb_force_primary_key == ON) 1156 1157 HA_STATS_RECORDS_IS_EXACT: 1158 Does the counter of records after the info call specify an exact 1159 value or not. If it does this flag is set. 1160 Only MyISAM and HEAP uses exact count. 1161 1162 HA_CAN_INSERT_DELAYED: 1163 Can the storage engine support delayed inserts. 1164 To start with the partition handler will not support delayed inserts. 1165 Further investigation needed. 1166 (HEAP, MyISAM) 1167 1168 HA_PRIMARY_KEY_IN_READ_INDEX: 1169 This parameter is set when the handler will also return the primary key 1170 when doing read-only-key on another index. 1171 1172 HA_NOT_DELETE_WITH_CACHE: 1173 Seems to be an old MyISAM feature that is no longer used. No handler 1174 has it defined but it is checked in init_read_record. 1175 Further investigation needed. 1176 (No handler defines it) 1177 1178 HA_NO_PREFIX_CHAR_KEYS: 1179 Indexes on prefixes of character fields is not allowed. 1180 (Federated) 1181 1182 HA_CAN_FULLTEXT: 1183 Does the storage engine support fulltext indexes 1184 The partition handler will start by not supporting fulltext indexes. 1185 (MyISAM) 1186 1187 HA_CAN_SQL_HANDLER: 1188 Can the HANDLER interface in the MySQL API be used towards this 1189 storage engine. 1190 (MyISAM, InnoDB) 1191 1192 HA_NO_AUTO_INCREMENT: 1193 Set if the storage engine does not support auto increment fields. 1194 (Currently not set by any handler) 1195 1196 HA_HAS_CHECKSUM: 1197 Special MyISAM feature. Has special SQL support in CREATE TABLE. 1198 No special handling needed by partition handler. 1199 (MyISAM) 1200 1201 HA_FILE_BASED: 1202 Should file names always be in lower case (used by engines 1203 that map table names to file names. 1204 Since partition handler has a local file this flag is set. 1205 (Federated, MyISAM) 1206 1207 HA_CAN_BIT_FIELD: 1208 Is the storage engine capable of handling bit fields? 1209 (MyISAM) 1210 1211 HA_NEED_READ_RANGE_BUFFER: 1212 Is Read Multi-Range supported => need multi read range buffer 1213 This parameter specifies whether a buffer for read multi range 1214 is needed by the handler. Whether the handler supports this 1215 feature or not is dependent of whether the handler implements 1216 read_multi_range* calls or not. The only handler currently 1217 supporting this feature is NDB so the partition handler need 1218 not handle this call. There are methods in handler.cc that will 1219 transfer those calls into index_read and other calls in the 1220 index scan module. 1221 (No handler defines it) 1222 1223 HA_PRIMARY_KEY_REQUIRED_FOR_POSITION: 1224 Does the storage engine need a PK for position? 1225 (InnoDB) 1226 1227 HA_FILE_BASED is always set for partition handler since we use a 1228 special file for handling names of partitions, engine types. 1229 HA_REC_NOT_IN_SEQ is always set for partition handler since we cannot 1230 guarantee that the records will be returned in sequence. 1231 HA_DUPLICATE_POS, 1232 HA_CAN_INSERT_DELAYED, HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is disabled 1233 until further investigated. 1234 */ 1235 virtual Table_flags table_flags() const; 1236 1237 /* 1238 This is a bitmap of flags that says how the storage engine 1239 implements indexes. The current index flags are documented in 1240 handler.h. If you do not implement indexes, just return zero 1241 here. 1242 1243 part is the key part to check. First key part is 0 1244 If all_parts it's set, MySQL want to know the flags for the combined 1245 index up to and including 'part'. 1246 1247 HA_READ_NEXT: 1248 Does the index support read next, this is assumed in the server 1249 code and never checked so all indexes must support this. 1250 Note that the handler can be used even if it doesn't have any index. 1251 (HEAP, MyISAM, Federated, InnoDB) 1252 1253 HA_READ_PREV: 1254 Can the index be used to scan backwards. 1255 (HEAP, MyISAM, InnoDB) 1256 1257 HA_READ_ORDER: 1258 Can the index deliver its record in index order. Typically true for 1259 all ordered indexes and not true for hash indexes. 1260 In first step this is not true for partition handler until a merge 1261 sort has been implemented in partition handler. 1262 Used to set keymap part_of_sortkey 1263 This keymap is only used to find indexes usable for resolving an ORDER BY 1264 in the query. Thus in most cases index_read will work just fine without 1265 order in result production. When this flag is set it is however safe to 1266 order all output started by index_read since most engines do this. With 1267 read_multi_range calls there is a specific flag setting order or not 1268 order so in those cases ordering of index output can be avoided. 1269 (InnoDB, HEAP, MyISAM) 1270 1271 HA_READ_RANGE: 1272 Specify whether index can handle ranges, typically true for all 1273 ordered indexes and not true for hash indexes. 1274 Used by optimiser to check if ranges (as key >= 5) can be optimised 1275 by index. 1276 (InnoDB, MyISAM, HEAP) 1277 1278 HA_ONLY_WHOLE_INDEX: 1279 Can't use part key searches. This is typically true for hash indexes 1280 and typically not true for ordered indexes. 1281 (Federated, HEAP) 1282 1283 HA_KEYREAD_ONLY: 1284 Does the storage engine support index-only scans on this index. 1285 Enables use of HA_EXTRA_KEYREAD and HA_EXTRA_NO_KEYREAD 1286 Used to set key_map keys_for_keyread and to check in optimiser for 1287 index-only scans. When doing a read under HA_EXTRA_KEYREAD the handler 1288 only have to fill in the columns the key covers. If 1289 HA_PRIMARY_KEY_IN_READ_INDEX is set then also the PRIMARY KEY columns 1290 must be updated in the row. 1291 (InnoDB, MyISAM) 1292 */ index_flags(uint inx,uint part,bool all_parts)1293 virtual ulong index_flags(uint inx, uint part, bool all_parts) const 1294 { 1295 /* 1296 The following code is not safe if you are using different 1297 storage engines or different index types per partition. 1298 */ 1299 return m_file[0]->index_flags(inx, part, all_parts); 1300 } 1301 1302 /** 1303 wrapper function for handlerton alter_table_flags, since 1304 the ha_partition_hton cannot know all its capabilities 1305 */ 1306 virtual alter_table_operations alter_table_flags(alter_table_operations flags); 1307 /* 1308 unireg.cc will call the following to make sure that the storage engine 1309 can handle the data it is about to send. 1310 1311 The maximum supported values is the minimum of all handlers in the table 1312 */ 1313 uint min_of_the_max_uint(uint (handler::*operator_func)(void) const) const; 1314 virtual uint max_supported_record_length() const; 1315 virtual uint max_supported_keys() const; 1316 virtual uint max_supported_key_parts() const; 1317 virtual uint max_supported_key_length() const; 1318 virtual uint max_supported_key_part_length() const; 1319 virtual uint min_record_length(uint options) const; 1320 1321 /* 1322 Primary key is clustered can only be true if all underlying handlers have 1323 this feature. 1324 */ primary_key_is_clustered()1325 virtual bool primary_key_is_clustered() 1326 { return m_pkey_is_clustered; } 1327 1328 /* 1329 ------------------------------------------------------------------------- 1330 MODULE compare records 1331 ------------------------------------------------------------------------- 1332 cmp_ref checks if two references are the same. For most handlers this is 1333 a simple memcmp of the reference. However some handlers use primary key 1334 as reference and this can be the same even if memcmp says they are 1335 different. This is due to character sets and end spaces and so forth. 1336 For the partition handler the reference is first two bytes providing the 1337 partition identity of the referred record and then the reference of the 1338 underlying handler. 1339 Thus cmp_ref for the partition handler always returns FALSE for records 1340 not in the same partition and uses cmp_ref on the underlying handler 1341 to check whether the rest of the reference part is also the same. 1342 ------------------------------------------------------------------------- 1343 */ 1344 virtual int cmp_ref(const uchar * ref1, const uchar * ref2); 1345 /* 1346 ------------------------------------------------------------------------- 1347 MODULE auto increment 1348 ------------------------------------------------------------------------- 1349 This module is used to handle the support of auto increments. 1350 1351 This variable in the handler is used as part of the handler interface 1352 It is maintained by the parent handler object and should not be 1353 touched by child handler objects (see handler.cc for its use). 1354 1355 auto_increment_column_changed 1356 ------------------------------------------------------------------------- 1357 */ 1358 virtual bool need_info_for_auto_inc(); 1359 virtual bool can_use_for_auto_inc_init(); 1360 virtual void get_auto_increment(ulonglong offset, ulonglong increment, 1361 ulonglong nb_desired_values, 1362 ulonglong *first_value, 1363 ulonglong *nb_reserved_values); 1364 virtual void release_auto_increment(); 1365 private: 1366 virtual int reset_auto_increment(ulonglong value); 1367 void update_next_auto_inc_val(); lock_auto_increment()1368 virtual void lock_auto_increment() 1369 { 1370 /* lock already taken */ 1371 if (auto_increment_safe_stmt_log_lock) 1372 return; 1373 if (table_share->tmp_table == NO_TMP_TABLE) 1374 { 1375 part_share->lock_auto_inc(); 1376 DBUG_ASSERT(!auto_increment_lock); 1377 auto_increment_lock= TRUE; 1378 } 1379 } unlock_auto_increment()1380 virtual void unlock_auto_increment() 1381 { 1382 /* 1383 If auto_increment_safe_stmt_log_lock is true, we have to keep the lock. 1384 It will be set to false and thus unlocked at the end of the statement by 1385 ha_partition::release_auto_increment. 1386 */ 1387 if (auto_increment_lock && !auto_increment_safe_stmt_log_lock) 1388 { 1389 auto_increment_lock= FALSE; 1390 part_share->unlock_auto_inc(); 1391 } 1392 } set_auto_increment_if_higher(Field * field)1393 virtual void set_auto_increment_if_higher(Field *field) 1394 { 1395 ulonglong nr= (((Field_num*) field)->unsigned_flag || 1396 field->val_int() > 0) ? field->val_int() : 0; 1397 lock_auto_increment(); 1398 DBUG_ASSERT(part_share->auto_inc_initialized || 1399 !can_use_for_auto_inc_init()); 1400 /* must check when the mutex is taken */ 1401 if (nr >= part_share->next_auto_inc_val) 1402 part_share->next_auto_inc_val= nr + 1; 1403 unlock_auto_increment(); 1404 } 1405 check_insert_autoincrement()1406 void check_insert_autoincrement() 1407 { 1408 /* 1409 If we INSERT into the table having the AUTO_INCREMENT column, 1410 we have to read all partitions for the next autoincrement value 1411 unless we already did it. 1412 */ 1413 if (!part_share->auto_inc_initialized && 1414 ha_thd()->lex->sql_command == SQLCOM_INSERT && 1415 table->found_next_number_field) 1416 bitmap_set_all(&m_part_info->read_partitions); 1417 } 1418 1419 public: 1420 1421 /* 1422 ------------------------------------------------------------------------- 1423 MODULE initialize handler for HANDLER call 1424 ------------------------------------------------------------------------- 1425 This method is a special InnoDB method called before a HANDLER query. 1426 ------------------------------------------------------------------------- 1427 */ 1428 virtual void init_table_handle_for_HANDLER(); 1429 1430 /* 1431 The remainder of this file defines the handler methods not implemented 1432 by the partition handler 1433 */ 1434 1435 /* 1436 ------------------------------------------------------------------------- 1437 MODULE foreign key support 1438 ------------------------------------------------------------------------- 1439 The following methods are used to implement foreign keys as supported by 1440 InnoDB. Implement this ?? 1441 get_foreign_key_create_info is used by SHOW CREATE TABLE to get a textual 1442 description of how the CREATE TABLE part to define FOREIGN KEY's is done. 1443 free_foreign_key_create_info is used to free the memory area that provided 1444 this description. 1445 can_switch_engines checks if it is ok to switch to a new engine based on 1446 the foreign key info in the table. 1447 ------------------------------------------------------------------------- 1448 1449 virtual char* get_foreign_key_create_info() 1450 virtual void free_foreign_key_create_info(char* str) 1451 1452 virtual int get_foreign_key_list(THD *thd, 1453 List<FOREIGN_KEY_INFO> *f_key_list) 1454 virtual uint referenced_by_foreign_key() 1455 */ 1456 virtual bool can_switch_engines(); 1457 /* 1458 ------------------------------------------------------------------------- 1459 MODULE fulltext index 1460 ------------------------------------------------------------------------- 1461 */ 1462 void ft_close_search(FT_INFO *handler); 1463 virtual int ft_init(); 1464 virtual int pre_ft_init(); 1465 virtual void ft_end(); 1466 virtual int pre_ft_end(); 1467 virtual FT_INFO *ft_init_ext(uint flags, uint inx, String *key); 1468 virtual int ft_read(uchar *buf); 1469 virtual int pre_ft_read(bool use_parallel); 1470 1471 /* 1472 ------------------------------------------------------------------------- 1473 MODULE restart full table scan at position (MyISAM) 1474 ------------------------------------------------------------------------- 1475 The following method is only used by MyISAM when used as 1476 temporary tables in a join. 1477 virtual int restart_rnd_next(uchar *buf, uchar *pos); 1478 */ 1479 1480 /* 1481 ------------------------------------------------------------------------- 1482 MODULE in-place ALTER TABLE 1483 ------------------------------------------------------------------------- 1484 These methods are in the handler interface. (used by innodb-plugin) 1485 They are used for in-place alter table: 1486 ------------------------------------------------------------------------- 1487 */ 1488 virtual enum_alter_inplace_result 1489 check_if_supported_inplace_alter(TABLE *altered_table, 1490 Alter_inplace_info *ha_alter_info); 1491 virtual bool prepare_inplace_alter_table(TABLE *altered_table, 1492 Alter_inplace_info *ha_alter_info); 1493 virtual bool inplace_alter_table(TABLE *altered_table, 1494 Alter_inplace_info *ha_alter_info); 1495 virtual bool commit_inplace_alter_table(TABLE *altered_table, 1496 Alter_inplace_info *ha_alter_info, 1497 bool commit); 1498 virtual void notify_table_changed(); 1499 1500 /* 1501 ------------------------------------------------------------------------- 1502 MODULE tablespace support 1503 ------------------------------------------------------------------------- 1504 Admin of table spaces is not applicable to the partition handler (InnoDB) 1505 This means that the following method is not implemented: 1506 ------------------------------------------------------------------------- 1507 virtual int discard_or_import_tablespace(my_bool discard) 1508 */ 1509 1510 /* 1511 ------------------------------------------------------------------------- 1512 MODULE admin MyISAM 1513 ------------------------------------------------------------------------- 1514 1515 ------------------------------------------------------------------------- 1516 OPTIMIZE TABLE, CHECK TABLE, ANALYZE TABLE and REPAIR TABLE are 1517 mapped to a routine that handles looping over a given set of 1518 partitions and those routines send a flag indicating to execute on 1519 all partitions. 1520 ------------------------------------------------------------------------- 1521 */ 1522 virtual int optimize(THD* thd, HA_CHECK_OPT *check_opt); 1523 virtual int analyze(THD* thd, HA_CHECK_OPT *check_opt); 1524 virtual int check(THD* thd, HA_CHECK_OPT *check_opt); 1525 virtual int repair(THD* thd, HA_CHECK_OPT *check_opt); 1526 virtual bool check_and_repair(THD *thd); 1527 virtual bool auto_repair(int error) const; 1528 virtual bool is_crashed() const; 1529 virtual int check_for_upgrade(HA_CHECK_OPT *check_opt); 1530 1531 /* 1532 ------------------------------------------------------------------------- 1533 MODULE condition pushdown 1534 ------------------------------------------------------------------------- 1535 */ 1536 virtual const COND *cond_push(const COND *cond); 1537 virtual void cond_pop(); 1538 virtual void clear_top_table_fields(); 1539 virtual int info_push(uint info_type, void *info); 1540 1541 private: 1542 int handle_opt_partitions(THD *thd, HA_CHECK_OPT *check_opt, uint flags); 1543 int handle_opt_part(THD *thd, HA_CHECK_OPT *check_opt, uint part_id, 1544 uint flag); 1545 /** 1546 Check if the rows are placed in the correct partition. If the given 1547 argument is true, then move the rows to the correct partition. 1548 */ 1549 int check_misplaced_rows(uint read_part_id, bool repair); 1550 void append_row_to_str(String &str); 1551 public: 1552 1553 /* Enabled keycache for performance reasons, WL#4571 */ 1554 virtual int assign_to_keycache(THD* thd, HA_CHECK_OPT *check_opt); 1555 virtual int preload_keys(THD* thd, HA_CHECK_OPT* check_opt); 1556 virtual TABLE_LIST *get_next_global_for_child(); 1557 1558 /* 1559 ------------------------------------------------------------------------- 1560 MODULE enable/disable indexes 1561 ------------------------------------------------------------------------- 1562 Enable/Disable Indexes are only supported by HEAP and MyISAM. 1563 ------------------------------------------------------------------------- 1564 */ 1565 virtual int disable_indexes(uint mode); 1566 virtual int enable_indexes(uint mode); 1567 virtual int indexes_are_disabled(void); 1568 1569 /* 1570 ------------------------------------------------------------------------- 1571 MODULE append_create_info 1572 ------------------------------------------------------------------------- 1573 append_create_info is only used by MyISAM MERGE tables and the partition 1574 handler will not support this handler as underlying handler. 1575 Implement this?? 1576 ------------------------------------------------------------------------- 1577 virtual void append_create_info(String *packet) 1578 */ 1579 1580 /* 1581 the following heavily relies on the fact that all partitions 1582 are in the same storage engine. 1583 1584 When this limitation is lifted, the following hack should go away, 1585 and a proper interface for engines needs to be introduced: 1586 1587 an PARTITION_SHARE structure that has a pointer to the TABLE_SHARE. 1588 is given to engines everywhere where TABLE_SHARE is used now 1589 has members like option_struct, ha_data 1590 perhaps TABLE needs to be split the same way too... 1591 1592 this can also be done before partition will support a mix of engines, 1593 but preferably together with other incompatible API changes. 1594 */ partition_ht()1595 virtual handlerton *partition_ht() const 1596 { 1597 handlerton *h= m_file[0]->ht; 1598 for (uint i=1; i < m_tot_parts; i++) 1599 DBUG_ASSERT(h == m_file[i]->ht); 1600 return h; 1601 } 1602 part_records(partition_element * part_elem)1603 ha_rows part_records(partition_element *part_elem) 1604 { 1605 DBUG_ASSERT(m_part_info); 1606 uint32 sub_factor= m_part_info->num_subparts ? m_part_info->num_subparts : 1; 1607 uint32 part_id= part_elem->id * sub_factor; 1608 uint32 part_id_end= part_id + sub_factor; 1609 DBUG_ASSERT(part_id_end <= m_tot_parts); 1610 ha_rows part_recs= 0; 1611 for (; part_id < part_id_end; ++part_id) 1612 { 1613 handler *file= m_file[part_id]; 1614 DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); 1615 file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_OPEN); 1616 part_recs+= file->stats.records; 1617 } 1618 return part_recs; 1619 } 1620 1621 friend int cmp_key_rowid_part_id(void *ptr, uchar *ref1, uchar *ref2); 1622 friend int cmp_key_part_id(void *key_p, uchar *ref1, uchar *ref2); 1623 }; 1624 #endif /* HA_PARTITION_INCLUDED */ 1625