1 #ifndef PARTITION_HANDLER_INCLUDED
2 #define PARTITION_HANDLER_INCLUDED
3 
4 /*
5    Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License, version 2.0,
9    as published by the Free Software Foundation.
10 
11    This program is also distributed with certain software (including
12    but not limited to OpenSSL) that is licensed under separate terms,
13    as designated in a particular file or component or in included license
14    documentation.  The authors of MySQL hereby grant you an additional
15    permission to link the program and your derivative works with the
16    separately licensed software that they have included with MySQL.
17 
18    This program is distributed in the hope that it will be useful,
19    but WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21    GNU General Public License, version 2.0, for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
26 */
27 
28 #include "my_global.h"            // uint etc.
29 #include "my_base.h"              // ha_rows.
30 #include "handler.h"              // Handler_share
31 #include "sql_partition.h"        // part_id_range
32 #include "mysqld_error.h"         // ER_ILLEGAL_HA
33 #include "priority_queue.h"
34 #include "key.h"                  // key_rec_cmp
35 #include <vector>
36 
37 #define PARTITION_BYTES_IN_POS 2
38 
39 /* forward declarations */
40 typedef struct st_ha_create_information HA_CREATE_INFO;
41 typedef struct st_mem_root MEM_ROOT;
42 
43 static const uint NO_CURRENT_PART_ID= UINT_MAX32;
44 
45 /**
46   bits in Partition_handler::alter_flags():
47 
48   HA_PARTITION_FUNCTION_SUPPORTED indicates that the function is
49   supported at all.
50   HA_FAST_CHANGE_PARTITION means that optimized variants of the changes
51   exists but they are not necessarily done online.
52 
53   HA_ONLINE_DOUBLE_WRITE means that the handler supports writing to both
54   the new partition and to the old partitions when updating through the
55   old partitioning schema while performing a change of the partitioning.
56   This means that we can support updating of the table while performing
57   the copy phase of the change. For no lock at all also a double write
58   from new to old must exist and this is not required when this flag is
59   set.
60   This is actually removed even before it was introduced the first time.
61   The new idea is that handlers will handle the lock level already in
62   store_lock for ALTER TABLE partitions.
63   TODO: Implement this via the alter-inplace api.
64 */
65 #define HA_PARTITION_FUNCTION_SUPPORTED         (1L << 0)
66 #define HA_FAST_CHANGE_PARTITION                (1L << 1)
67 
68 enum enum_part_operation {
69   OPTIMIZE_PARTS= 0,
70   ANALYZE_PARTS,
71   CHECK_PARTS,
72   REPAIR_PARTS,
73   ASSIGN_KEYCACHE_PARTS,
74   PRELOAD_KEYS_PARTS
75 };
76 
77 /** Struct used for partition_name_hash */
78 typedef struct st_part_name_def
79 {
80   uchar *partition_name;
81   uint length;
82   uint32 part_id;
83   my_bool is_subpart;
84 } PART_NAME_DEF;
85 
86 
87 /**
88   Initialize partitioning (currently only PSI keys).
89 */
90 void partitioning_init();
91 
92 
93 /**
94   Partition specific Handler_share.
95 */
96 class Partition_share : public Handler_share
97 {
98 public:
99   Partition_share();
100   ~Partition_share();
101 
102   /** Set if auto increment is used an initialized. */
103   bool auto_inc_initialized;
104   /**
105     Mutex protecting next_auto_inc_val.
106     Initialized if table uses auto increment.
107   */
108   mysql_mutex_t *auto_inc_mutex;
109   /** First non reserved auto increment value. */
110   ulonglong next_auto_inc_val;
111   /**
112     Hash of partition names. Initialized by the first handler instance of a
113     table_share calling populate_partition_name_hash().
114     After that it is read-only, i.e. no locking required for reading.
115   */
116   HASH partition_name_hash;
117   /** flag that the name hash is initialized, so it only will do it once. */
118   bool partition_name_hash_initialized;
119 
120   /**
121     Initializes and sets auto_inc_mutex.
122     Only needed to be called if the table have an auto increment.
123     Must hold TABLE_SHARE::LOCK_ha_data when calling.
124   */
125   bool init_auto_inc_mutex(TABLE_SHARE *table_share);
126   /**
127     Release reserved auto increment values not used.
128     @param thd             Thread.
129     @param table_share     Table Share
130     @param next_insert_id  Next insert id (first non used auto inc value).
131     @param max_reserved    End of reserved auto inc range.
132   */
133   void release_auto_inc_if_possible(THD *thd, TABLE_SHARE *table_share,
134                                     const ulonglong next_insert_id,
135                                     const ulonglong max_reserved);
136 
137   /** lock mutex protecting auto increment value next_auto_inc_val. */
lock_auto_inc()138   inline void lock_auto_inc()
139   {
140     DBUG_ASSERT(auto_inc_mutex);
141     mysql_mutex_lock(auto_inc_mutex);
142   }
143   /** unlock mutex protecting auto increment value next_auto_inc_val. */
unlock_auto_inc()144   inline void unlock_auto_inc()
145   {
146     DBUG_ASSERT(auto_inc_mutex);
147     mysql_mutex_unlock(auto_inc_mutex);
148   }
149   /**
150     Populate partition_name_hash with partition and subpartition names
151     from part_info.
152     @param part_info  Partition info containing all partitions metadata.
153 
154     @return Operation status.
155       @retval false Success.
156       @retval true  Failure.
157   */
158   bool populate_partition_name_hash(partition_info *part_info);
159   /** Get partition name.
160 
161   @param part_id  Partition id (for subpartitioned table only subpartition
162                   names will be returned.)
163 
164   @return partition name or NULL if error.
165   */
166   const char *get_partition_name(size_t part_id) const;
167 private:
168   const uchar **partition_names;
169   /**
170     Insert [sub]partition name into  partition_name_hash
171     @param name        Partition name.
172     @param part_id     Partition id.
173     @param is_subpart  True if subpartition else partition.
174 
175     @return Operation status.
176       @retval false Success.
177       @retval true  Failure.
178   */
179   bool insert_partition_name_in_hash(const char *name,
180                                      uint part_id,
181                                      bool is_subpart);
182 };
183 
184 
185 /**
186   Class for partitioning specific operations.
187 
188   Returned from handler::get_partition_handler().
189 */
190 class Partition_handler :public Sql_alloc
191 {
192 public:
Partition_handler()193   Partition_handler() {}
~Partition_handler()194   ~Partition_handler() {}
195 
196   /**
197     Get dynamic table information from partition.
198 
199     @param[out] stat_info  Statistics struct to fill in.
200     @param[out] check_sum  Check sum value to fill in if supported.
201     @param[in]  part_id    Partition to report for.
202 
203     @note stat_info and check_sum are initialized by caller.
204     check_sum is only expected to be updated if HA_HAS_CHECKSUM.
205   */
206   virtual void get_dynamic_partition_info(ha_statistics *stat_info,
207                                           ha_checksum *check_sum,
208                                           uint part_id) = 0;
209   /**
210     Get default number of partitions.
211 
212     Used during creating a partitioned table.
213 
214     @param info  Create info.
215     @return Number of default partitions.
216   */
get_default_num_partitions(HA_CREATE_INFO * info)217   virtual int get_default_num_partitions(HA_CREATE_INFO *info) { return 1;}
218   /**
219     Setup auto partitioning.
220 
221     Called for engines with HA_USE_AUTO_PARTITION to setup the partition info
222     object
223 
224     @param[in,out] part_info  Partition object to setup.
225   */
set_auto_partitions(partition_info * part_info)226   virtual void set_auto_partitions(partition_info *part_info) { return; }
227   /**
228     Get number of partitions for table in SE
229 
230     @param name normalized path(same as open) to the table
231 
232     @param[out] num_parts Number of partitions
233 
234     @retval false for success
235     @retval true for failure, for example table didn't exist in engine
236   */
get_num_parts(const char * name,uint * num_parts)237   virtual bool get_num_parts(const char *name,
238                             uint *num_parts)
239   {
240     *num_parts= 0;
241     return false;
242   }
243   /**
244     Set the partition info object to be used by the handler.
245 
246     @param part_info  Partition info to be used by the handler.
247     @param early      True if called when part_info only created and parsed,
248                       but not setup, checked or fixed.
249   */
250   virtual void set_part_info(partition_info *part_info, bool early) = 0;
251   /**
252     Initialize partition.
253 
254     @param mem_root  Memory root for memory allocations.
255 
256     @return Operation status
257       @retval false  Success.
258       @retval true   Failure.
259   */
initialize_partition(MEM_ROOT * mem_root)260   virtual bool initialize_partition(MEM_ROOT *mem_root) {return false;}
261 
262 
263   /**
264     Truncate partitions.
265 
266     Truncate all partitions matching table->part_info->read_partitions.
267     Handler level wrapper for truncating partitions, will ensure that
268     mark_trx_read_write() is called and also checks locking assertions.
269 
270     @return Operation status.
271       @retval    0  Success.
272       @retval != 0  Error code.
273   */
truncate_partition()274   int truncate_partition()
275   {
276     handler *file= get_handler();
277     if (!file)
278     {
279       return HA_ERR_WRONG_COMMAND;
280     }
281     DBUG_ASSERT(file->table_share->tmp_table != NO_TMP_TABLE ||
282                 file->m_lock_type == F_WRLCK);
283     file->mark_trx_read_write();
284     return truncate_partition_low();
285   }
286   /**
287     Change partitions.
288 
289     Change partitions according to their partition_element::part_state set up
290     in prep_alter_part_table(). Will create new partitions and copy requested
291     partitions there. Also updating part_state to reflect current state.
292 
293     Handler level wrapper for changing partitions.
294     This is the reason for having Partition_handler a friend class of handler,
295     mark_trx_read_write() is called and also checks locking assertions.
296     to ensure that mark_trx_read_write() is called and checking the asserts.
297 
298     @param[in]     create_info  Table create info.
299     @param[in]     path         Path including table name.
300     @param[out]    copied       Number of rows copied.
301     @param[out]    deleted      Number of rows deleted.
302   */
change_partitions(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted)303   int change_partitions(HA_CREATE_INFO *create_info,
304                         const char *path,
305                         ulonglong * const copied,
306                         ulonglong * const deleted)
307   {
308     handler *file= get_handler();
309     if (!file)
310     {
311       my_error(ER_ILLEGAL_HA, MYF(0), create_info->alias);
312       return HA_ERR_WRONG_COMMAND;
313     }
314     DBUG_ASSERT(file->table_share->tmp_table != NO_TMP_TABLE ||
315                 file->m_lock_type != F_UNLCK);
316     file->mark_trx_read_write();
317     return change_partitions_low(create_info, path, copied, deleted);
318   }
319   /**
320     Alter flags.
321 
322     Given a set of alter table flags, return which is supported.
323 
324     @param flags  Alter table operation flags.
325 
326     @return Supported alter table flags.
327   */
alter_flags(uint flags)328   virtual uint alter_flags(uint flags) const
329   { return 0; }
330 
331   /**
332     Get partition row type from SE
333     @param       part_id    Id of partition for which row type to be retrieved
334     @return      Partition row type.
335   */
get_partition_row_type(uint part_id)336   virtual enum row_type get_partition_row_type(uint part_id) {
337     return ROW_TYPE_NOT_USED;
338   }
339 
340 private:
341   /**
342     Truncate partition.
343 
344     Low-level primitive for handler, implementing
345     Partition_handler::truncate_partition().
346 
347     @return Operation status
348       @retval    0  Success.
349       @retval != 0  Error code.
350   */
truncate_partition_low()351   virtual int truncate_partition_low()
352   { return HA_ERR_WRONG_COMMAND; }
353   /**
354     Truncate partition.
355 
356     Low-level primitive for handler, implementing
357     Partition_handler::change_partitions().
358 
359     @param[in]     create_info  Table create info.
360     @param[in]     path         Path including table name.
361     @param[out]    copied       Number of rows copied.
362     @param[out]    deleted      Number of rows deleted.
363 
364     @return Operation status
365       @retval    0  Success.
366       @retval != 0  Error code.
367   */
change_partitions_low(HA_CREATE_INFO * create_info,const char * path,ulonglong * const copied,ulonglong * const deleted)368   virtual int change_partitions_low(HA_CREATE_INFO *create_info,
369                                     const char *path,
370                                     ulonglong * const copied,
371                                     ulonglong * const deleted)
372   {
373     my_error(ER_ILLEGAL_HA, MYF(0), create_info->alias);
374     return HA_ERR_WRONG_COMMAND;
375   }
376   /**
377     Return the table handler.
378 
379     For some partitioning specific functions it is still needed to access
380     the handler directly for transaction handling (mark_trx_read_write())
381     and to assert correct locking.
382 
383     @return handler or NULL if not supported.
384   */
get_handler()385   virtual handler *get_handler()
386   { return NULL; }
387 };
388 
389 
390 /// Maps compare function to strict weak ordering required by Priority_queue.
391 struct Key_rec_less
392 {
393   typedef int (*key_compare_fun)(KEY**, uchar *, uchar *);
394 
Key_rec_lessKey_rec_less395   explicit Key_rec_less(KEY **keys)
396     : m_keys(keys), m_fun(key_rec_cmp), m_max_at_top(false)
397   {
398   }
399 
operatorKey_rec_less400   bool operator()(uchar *first, uchar *second)
401   {
402     const int cmpval=
403      (*m_fun)(m_keys, first + m_rec_offset, second + m_rec_offset);
404     return m_max_at_top ? cmpval < 0 : cmpval > 0;
405   }
406 
407   KEY **m_keys;
408   key_compare_fun m_fun;
409   uint m_rec_offset;
410   bool m_max_at_top;
411 };
412 
413 
414 /**
415   Partition_helper is a helper class that implements most generic partitioning
416   functionality such as:
417   table scan, index scan (both ordered and non-ordered),
418   insert (write_row()), delete and update.
419   And includes ALTER TABLE ... ADD/COALESCE/DROP/REORGANIZE/... PARTITION
420   support.
421   It also implements a cache for the auto increment value and check/repair for
422   rows in wrong partition.
423 
424   How to use it:
425   Inherit it and implement:
426   - *_in_part() functions for row operations.
427   - prepare_for_new_partitions(), create_new_partition(), close_new_partitions()
428     write_row_in_new_part() for handling 'fast' alter partition.
429 */
430 class Partition_helper : public Sql_alloc
431 {
432   typedef Priority_queue<uchar *, std::vector<uchar*>, Key_rec_less> Prio_queue;
433 public:
434   Partition_helper(handler *main_handler);
435   ~Partition_helper();
436 
437   /**
438     Set partition info.
439 
440     To be called from Partition_handler.
441 
442     @param  part_info  Partition info to use.
443     @param  early      True if called when part_info only created and parsed,
444                        but not setup, checked or fixed.
445   */
446   virtual void set_part_info_low(partition_info *part_info, bool early);
447   /**
448     Initialize variables used before the table is opened.
449 
450     @param mem_root  Memory root to allocate things from (not yet used).
451 
452     @return Operation status.
453       @retval false success.
454       @retval true  failure.
455   */
init_partitioning(MEM_ROOT * mem_root)456   inline bool init_partitioning(MEM_ROOT *mem_root)
457   {
458 #ifndef DBUG_OFF
459     m_key_not_found_partitions.bitmap= NULL;
460 #endif
461     return false;
462   }
463 
464 
465   /**
466     INSERT/UPDATE/DELETE functions.
467     @see handler.h
468     @{
469   */
470 
471   /**
472     Insert a row to the partitioned table.
473 
474     @param buf The row in MySQL Row Format.
475 
476     @return Operation status.
477       @retval    0 Success
478       @retval != 0 Error code
479   */
480   int ph_write_row(uchar *buf);
481   /**
482     Update an existing row in the partitioned table.
483 
484     Yes, update_row() does what you expect, it updates a row. old_data will
485     have the previous row record in it, while new_data will have the newest
486     data in it.
487     Keep in mind that the server can do updates based on ordering if an
488     ORDER BY clause was used. Consecutive ordering is not guaranteed.
489 
490     If the new record belongs to a different partition than the old record
491     then it will be inserted into the new partition and deleted from the old.
492 
493     new_data is always record[0]
494     old_data is always record[1]
495 
496     @param old_data  The old record in MySQL Row Format.
497     @param new_data  The new record in MySQL Row Format.
498 
499     @return Operation status.
500       @retval    0 Success
501       @retval != 0 Error code
502   */
503   int ph_update_row(const uchar *old_data, uchar *new_data);
504   /**
505     Delete an existing row in the partitioned table.
506 
507     This will delete a row. buf will contain a copy of the row to be deleted.
508     The server will call this right after the current row has been read
509     (from either a previous rnd_xxx() or index_xxx() call).
510     If you keep a pointer to the last row or can access a primary key it will
511     make doing the deletion quite a bit easier.
512     Keep in mind that the server does no guarantee consecutive deletions.
513     ORDER BY clauses can be used.
514 
515     buf is either record[0] or record[1]
516 
517     @param buf  The record in MySQL Row Format.
518 
519     @return Operation status.
520       @retval    0 Success
521       @retval != 0 Error code
522   */
523   int ph_delete_row(const uchar *buf);
524 
525   /** @} */
526 
527   /** Release unused auto increment values. */
528   void ph_release_auto_increment();
529   /**
530     Calculate key hash value from an null terminated array of fields.
531     Support function for KEY partitioning.
532 
533     @param field_array   An array of the fields in KEY partitioning
534 
535     @return hash_value calculated
536 
537     @note Uses the hash function on the character set of the field.
538     Integer and floating point fields use the binary character set by default.
539   */
540   static uint32 ph_calculate_key_hash_value(Field **field_array);
541   /** Get checksum for table.
542     @return Checksum or 0 if not supported (which also may be a correct checksum!).
543   */
544   ha_checksum ph_checksum() const;
545 
546   /**
547     MODULE full table scan
548 
549     This module is used for the most basic access method for any table
550     handler. This is to fetch all data through a full table scan. No
551     indexes are needed to implement this part.
552     It contains one method to start the scan (rnd_init) that can also be
553     called multiple times (typical in a nested loop join). Then proceeding
554     to the next record (rnd_next) and closing the scan (rnd_end).
555     To remember a record for later access there is a method (position)
556     and there is a method used to retrieve the record based on the stored
557     position.
558     The position can be a file position, a primary key, a ROWID dependent
559     on the handler below.
560 
561     unlike index_init(), rnd_init() can be called two times
562     without rnd_end() in between (it only makes sense if scan=1).
563     then the second call should prepare for the new table scan
564     (e.g if rnd_init allocates the cursor, second call should
565     position it to the start of the table, no need to deallocate
566     and allocate it again.
567     @see handler.h
568     @{
569   */
570 
571   int ph_rnd_init(bool scan);
572   int ph_rnd_end();
573   int ph_rnd_next(uchar *buf);
574   void ph_position(const uchar *record);
575   int ph_rnd_pos(uchar *buf, uchar *pos);
576 
577   /** @} */
578 
579   /**
580     MODULE index scan
581 
582     This part of the handler interface is used to perform access through
583     indexes. The interface is defined as a scan interface but the handler
584     can also use key lookup if the index is a unique index or a primary
585     key index.
586     Index scans are mostly useful for SELECT queries but are an important
587     part also of UPDATE, DELETE, REPLACE and CREATE TABLE table AS SELECT
588     and so forth.
589     Naturally an index is needed for an index scan and indexes can either
590     be ordered, hash based. Some ordered indexes can return data in order
591     but not necessarily all of them.
592     There are many flags that define the behavior of indexes in the
593     various handlers. These methods are found in the optimizer module.
594     -------------------------------------------------------------------------
595 
596     index_read is called to start a scan of an index. The find_flag defines
597     the semantics of the scan. These flags are defined in
598     include/my_base.h
599     index_read_idx is the same but also initializes index before calling doing
600     the same thing as index_read. Thus it is similar to index_init followed
601     by index_read. This is also how we implement it.
602 
603     index_read/index_read_idx does also return the first row. Thus for
604     key lookups, the index_read will be the only call to the handler in
605     the index scan.
606 
607     index_init initializes an index before using it and index_end does
608     any end processing needed.
609     @{
610   */
611 
612   int ph_index_init_setup(uint key_nr, bool sorted);
613   int ph_index_init(uint key_nr, bool sorted);
614   int ph_index_end();
615   /*
616     These methods are used to jump to next or previous entry in the index
617     scan. There are also methods to jump to first and last entry.
618   */
619   int ph_index_first(uchar *buf);
620   int ph_index_last(uchar *buf);
621   int ph_index_next(uchar *buf);
622   int ph_index_next_same(uchar *buf, const uchar *key, uint keylen);
623   int ph_index_prev(uchar *buf);
624   int ph_index_read_map(uchar *buf,
625                         const uchar *key,
626                         key_part_map keypart_map,
627                         enum ha_rkey_function find_flag);
628   int ph_index_read_last_map(uchar *buf,
629                              const uchar *key,
630                              key_part_map keypart_map);
631   int ph_index_read_idx_map(uchar *buf,
632                             uint index,
633                             const uchar *key,
634                             key_part_map keypart_map,
635                             enum ha_rkey_function find_flag);
636   int ph_read_range_first(const key_range *start_key,
637                           const key_range *end_key,
638                           bool eq_range_arg,
639                           bool sorted);
640   int ph_read_range_next();
641   /** @} */
642 
643   /**
644     Functions matching Partition_handler API.
645     @{
646   */
647 
648   /**
649     Get statistics from a specific partition.
650     @param[out] stat_info  Area to report values into.
651     @param[out] check_sum  Check sum of partition.
652     @param[in]  part_id    Partition to report from.
653   */
654   virtual void get_dynamic_partition_info_low(ha_statistics *stat_info,
655                                               ha_checksum *check_sum,
656                                               uint part_id);
657 
658   /**
659     Implement the partition changes defined by ALTER TABLE of partitions.
660 
661     Add and copy if needed a number of partitions, during this operation
662     only read operation is ongoing in the server. This is used by
663     ADD PARTITION all types as well as by REORGANIZE PARTITION. For
664     one-phased implementations it is used also by DROP and COALESCE
665     PARTITIONs.
666     One-phased implementation needs the new frm file, other handlers will
667     get zero length and a NULL reference here.
668 
669     @param[in]  create_info       HA_CREATE_INFO object describing all
670                                   fields and indexes in table
671     @param[in]  path              Complete path of db and table name
672     @param[out] copied            Output parameter where number of copied
673                                   records are added
674     @param[out] deleted           Output parameter where number of deleted
675                                   records are added
676 
677     @return Operation status
678       @retval    0 Success
679       @retval != 0 Failure
680   */
681   virtual int change_partitions(HA_CREATE_INFO *create_info,
682                                 const char *path,
683                                 ulonglong * const copied,
684                                 ulonglong * const deleted);
685   /** @} */
686 
687 protected:
688   /* Common helper functions to be used by inheriting engines. */
689 
690   /*
691     open/close functions.
692   */
693 
694   /**
695     Set m_part_share, Allocate internal bitmaps etc. used by open tables.
696 
697     @param mem_root  Memory root to allocate things from (not yet used).
698 
699     @return Operation status.
700       @retval false success.
701       @retval true  failure.
702   */
703   bool open_partitioning(Partition_share *part_share);
704   /**
705     Close partitioning for a table.
706 
707     Frees memory and release other resources.
708   */
709   void close_partitioning();
710 
711   /**
712     Lock auto increment value if needed.
713   */
lock_auto_increment()714   inline void lock_auto_increment()
715   {
716     /* lock already taken */
717     if (m_auto_increment_safe_stmt_log_lock)
718       return;
719     DBUG_ASSERT(!m_auto_increment_lock);
720     if(m_table->s->tmp_table == NO_TMP_TABLE)
721     {
722       m_auto_increment_lock= true;
723       m_part_share->lock_auto_inc();
724     }
725   }
726   /**
727     unlock auto increment.
728   */
unlock_auto_increment()729   inline void unlock_auto_increment()
730   {
731     /*
732       If m_auto_increment_safe_stmt_log_lock is true, we have to keep the lock.
733       It will be set to false and thus unlocked at the end of the statement by
734       ha_partition::release_auto_increment.
735     */
736     if(m_auto_increment_lock && !m_auto_increment_safe_stmt_log_lock)
737     {
738       m_part_share->unlock_auto_inc();
739       m_auto_increment_lock= false;
740     }
741   }
742   /**
743     Get auto increment.
744 
745     Only to be used for auto increment values that are the first field in
746     an unique index.
747 
748     @param[in]  increment           Increment between generated numbers.
749     @param[in]  nb_desired_values   Number of values requested.
750     @param[out] first_value         First reserved value (ULLONG_MAX on error).
751     @param[out] nb_reserved_values  Number of values reserved.
752   */
753   void get_auto_increment_first_field(ulonglong increment,
754                                       ulonglong nb_desired_values,
755                                       ulonglong *first_value,
756                                       ulonglong *nb_reserved_values);
757 
758   /**
759     Initialize the record priority queue used for sorted index scans.
760     @return Operation status.
761       @retval    0   Success.
762       @retval != 0   Error code.
763   */
764   int init_record_priority_queue();
765   /**
766     Destroy the record priority queue used for sorted index scans.
767   */
768   void destroy_record_priority_queue();
769   /*
770     Administrative support functions.
771   */
772 
773   /** Print partitioning specific error.
774     @param error   Error code.
775     @param errflag Error flag.
776     @return false if error is printed else true.
777   */
778   bool print_partition_error(int error, myf errflag);
779   /**
780     Print a message row formatted for ANALYZE/CHECK/OPTIMIZE/REPAIR TABLE.
781 
782     Modeled after mi_check_print_msg.
783 
784     @param thd         Thread context.
785     @param len         Needed length for message buffer.
786     @param msg_type    Message type.
787     @param db_name     Database name.
788     @param table_name  Table name.
789     @param op_name     Operation name.
790     @param fmt         Message (in printf format with additional arguments).
791 
792     @return Operation status.
793       @retval false for success else true.
794   */
795   bool print_admin_msg(THD *thd,
796                        uint len,
797                        const char *msg_type,
798                        const char *db_name,
799                        const char *table_name,
800                        const char *op_name,
801                        const char *fmt,
802                        ...);
803   /**
804     Check/fix misplaced rows.
805 
806     @param part_id  Partition to check/fix.
807     @param repair   If true, move misplaced rows to correct partition.
808 
809     @return Operation status.
810       @retval    0  Success
811       @retval != 0  Error
812   */
813   int check_misplaced_rows(uint part_id, bool repair);
814   /**
815     Set used partitions bitmap from Alter_info.
816 
817     @return false if success else true.
818   */
819   bool set_altered_partitions();
820 
821 private:
822   enum partition_index_scan_type
823   {
824     PARTITION_INDEX_READ= 1,
825     PARTITION_INDEX_FIRST,
826     PARTITION_INDEX_FIRST_UNORDERED,
827     PARTITION_INDEX_LAST,
828     PARTITION_INDEX_READ_LAST,
829     PARTITION_READ_RANGE,
830     PARTITION_NO_INDEX_SCAN
831   };
832 
833   /** handler to use (ha_partition, ha_innopart etc.) */
834   handler *m_handler;
835 
836   /*
837     Access methods to protected areas in handler to avoid adding
838     friend class Partition_helper in class handler.
839   */
840   virtual THD *get_thd() const = 0;
841   virtual TABLE *get_table() const = 0;
842   virtual bool get_eq_range() const = 0;
843   virtual void set_eq_range(bool eq_range) = 0;
844   virtual void set_range_key_part(KEY_PART_INFO *key_part) = 0;
845 
846   /*
847     Implementation of per partition operation by instantiated engine.
848     These must be implemented in the 'real' partition_helper subclass.
849   */
850 
851   /**
852     Write a row in the specified partition.
853 
854     @see handler::write_row().
855 
856     @param  part_id  Partition to write to.
857     @param  buf      Buffer with data to write.
858 
859     @return Operation status.
860       @retval    0  Success.
861       @retval != 0  Error code.
862   */
863   virtual int write_row_in_part(uint part_id, uchar *buf) = 0;
864   /**
865     Update a row in the specified partition.
866 
867     @see handler::update_row().
868 
869     @param  part_id   Partition to update in.
870     @param  old_data  Buffer containing old row.
871     @param  new_data  Buffer containing new row.
872 
873     @return Operation status.
874       @retval    0  Success.
875       @retval != 0  Error code.
876   */
877   virtual int update_row_in_part(uint new_part_id,
878                                  const uchar *old_data,
879                                  uchar *new_data) = 0;
880   /**
881     Delete an existing row in the specified partition.
882 
883     @see handler::delete_row().
884 
885     @param  part_id  Partition to delete from.
886     @param  buf      Buffer containing row to delete.
887 
888     @return Operation status.
889       @retval    0  Success.
890       @retval != 0  Error code.
891   */
892   virtual int delete_row_in_part(uint part_id, const uchar *buf) = 0;
893   /**
894     Initialize the shared auto increment value.
895 
896     @param no_lock  If HA_STATUS_NO_LOCK should be used in info(HA_STATUS_AUTO).
897 
898     Also sets stats.auto_increment_value.
899   */
900   virtual int initialize_auto_increment(bool no_lock) = 0;
901   /** Release auto_increment in all underlying partitions. */
release_auto_increment_all_parts()902   virtual void release_auto_increment_all_parts() {}
903   /** Save or persist the current max auto increment. */
save_auto_increment(ulonglong nr)904   virtual void save_auto_increment(ulonglong nr) {}
905   /**
906     Per partition equivalent of rnd_* and index_* functions.
907 
908     @see class handler.
909   */
910   virtual int rnd_init_in_part(uint part_id, bool table_scan) = 0;
911   int ph_rnd_next_in_part(uint part_id, uchar *buf);
912   virtual int rnd_next_in_part(uint part_id, uchar *buf) = 0;
913   virtual int rnd_end_in_part(uint part_id, bool scan) = 0;
914   virtual void position_in_last_part(uchar *ref, const uchar *row) = 0;
915   /* If ph_rnd_pos is used then this needs to be implemented! */
rnd_pos_in_part(uint part_id,uchar * buf,uchar * pos)916   virtual int rnd_pos_in_part(uint part_id, uchar *buf, uchar *pos)
917   { DBUG_ASSERT(0); return HA_ERR_WRONG_COMMAND; }
index_init_in_part(uint part,uint keynr,bool sorted)918   virtual int index_init_in_part(uint part, uint keynr, bool sorted)
919   { DBUG_ASSERT(0); return HA_ERR_WRONG_COMMAND; }
index_end_in_part(uint part)920   virtual int index_end_in_part(uint part)
921   { DBUG_ASSERT(0); return HA_ERR_WRONG_COMMAND; }
922   virtual int index_first_in_part(uint part, uchar *buf) = 0;
923   virtual int index_last_in_part(uint part, uchar *buf) = 0;
924   virtual int index_prev_in_part(uint part, uchar *buf) = 0;
925   virtual int index_next_in_part(uint part, uchar *buf) = 0;
926   virtual int index_next_same_in_part(uint part,
927                                       uchar *buf,
928                                       const uchar *key,
929                                       uint length) = 0;
930   virtual int index_read_map_in_part(uint part,
931                                      uchar *buf,
932                                      const uchar *key,
933                                      key_part_map keypart_map,
934                                      enum ha_rkey_function find_flag) = 0;
935   virtual int index_read_last_map_in_part(uint part,
936                                           uchar *buf,
937                                           const uchar *key,
938                                           key_part_map keypart_map) = 0;
939   /**
940     Do read_range_first in the specified partition.
941     If buf is set, then copy the result there instead of table->record[0].
942   */
943   virtual int read_range_first_in_part(uint part,
944                                        uchar *buf,
945                                        const key_range *start_key,
946                                        const key_range *end_key,
947                                        bool eq_range,
948                                        bool sorted) = 0;
949   /**
950     Do read_range_next in the specified partition.
951     If buf is set, then copy the result there instead of table->record[0].
952   */
953   virtual int read_range_next_in_part(uint part, uchar *buf) = 0;
954   virtual int index_read_idx_map_in_part(uint part,
955                                          uchar *buf,
956                                          uint index,
957                                          const uchar *key,
958                                          key_part_map keypart_map,
959                                          enum ha_rkey_function find_flag) = 0;
960   /**
961     Initialize engine specific resources for the record priority queue
962     used duing ordered index reads for multiple partitions.
963 
964     @param used_parts  Number of partitions used in query
965                        (number of set bits in m_part_info->read_partitions).
966 
967     @return Operation status.
968       @retval    0   Success.
969       @retval != 0   Error code.
970   */
init_record_priority_queue_for_parts(uint used_parts)971   virtual int init_record_priority_queue_for_parts(uint used_parts)
972   {
973     return 0;
974   }
975   /**
976     Destroy and release engine specific resources used by the record
977     priority queue.
978   */
destroy_record_priority_queue_for_parts()979   virtual void destroy_record_priority_queue_for_parts() {}
980   /**
981     Checksum for a partition.
982 
983     @param part_id  Partition to checksum.
984   */
checksum_in_part(uint part_id)985   virtual ha_checksum checksum_in_part(uint part_id) const
986   { DBUG_ASSERT(0); return 0; }
987   /**
988     Copy a cached row.
989 
990     Used when copying a row from the record priority queue to the return buffer.
991     For some engines, like InnoDB, only marked columns must be copied,
992     to preserve non-read columns.
993 
994     @param[out] to_rec    Buffer to copy to.
995     @param[in]  from_rec  Buffer to copy from.
996   */
copy_cached_row(uchar * to_rec,const uchar * from_rec)997   virtual void copy_cached_row(uchar *to_rec, const uchar *from_rec)
998   { memcpy(to_rec, from_rec, m_rec_length); }
999   /**
1000     Prepare for creating new partitions during ALTER TABLE ... PARTITION.
1001     @param  num_partitions  Number of new partitions to be created.
1002     @param  only_create     True if only creating the partition
1003                             (no open/lock is needed).
1004 
1005     @return Operation status.
1006       @retval    0  Success.
1007       @retval != 0  Error code.
1008   */
1009   virtual int prepare_for_new_partitions(uint num_partitions,
1010                                          bool only_create) = 0;
1011   /**
1012     Create a new partition to be filled during ALTER TABLE ... PARTITION.
1013     @param   table         Table to create the partition in.
1014     @param   create_info   Table/partition specific create info.
1015     @param   part_name     Partition name.
1016     @param   new_part_id   Partition id in new table.
1017     @param   part_elem     Partition element.
1018 
1019     @return Operation status.
1020       @retval    0  Success.
1021       @retval != 0  Error code.
1022   */
1023   virtual int create_new_partition(TABLE *table,
1024                                    HA_CREATE_INFO *create_info,
1025                                    const char *part_name,
1026                                    uint new_part_id,
1027                                    partition_element *part_elem) = 0;
1028   /**
1029     Close and finalize new partitions.
1030   */
1031   virtual void close_new_partitions() = 0;
1032   /**
1033     write row to new partition.
1034     @param  new_part   New partition to write to.
1035 
1036     @return Operation status.
1037       @retval    0  Success.
1038       @retval != 0  Error code.
1039   */
1040   virtual int write_row_in_new_part(uint new_part) = 0;
1041 
1042   /* Internal helper functions*/
1043   /**
1044     Update auto increment value if current row contains a higher value.
1045   */
1046   inline void set_auto_increment_if_higher();
1047   /**
1048     Common routine to set up index scans.
1049 
1050     Find out which partitions we'll need to read when scanning the specified
1051     range.
1052 
1053     If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
1054     as we will not need to do merge ordering.
1055 
1056     @param buf            Buffer to later return record in (this function
1057                           needs it to calculate partitioning function values)
1058 
1059     @param idx_read_flag  True <=> m_start_key has range start endpoint which
1060                           probably can be used to determine the set of
1061                           partitions to scan.
1062                           False <=> there is no start endpoint.
1063 
1064     @return Operation status.
1065       @retval   0  Success
1066       @retval !=0  Error code
1067   */
1068   int partition_scan_set_up(uchar *buf, bool idx_read_flag);
1069   /**
1070     Common routine to handle index_next with unordered results.
1071 
1072     These routines are used to scan partitions without considering order.
1073     This is performed in two situations.
1074     1) In read_multi_range this is the normal case
1075     2) When performing any type of index_read, index_first, index_last where
1076     all fields in the partition function is bound. In this case the index
1077     scan is performed on only one partition and thus it isn't necessary to
1078     perform any sort.
1079 
1080     @param[out] buf        Read row in MySQL Row Format.
1081     @param[in]  next_same  Called from index_next_same.
1082 
1083     @return Operation status.
1084       @retval HA_ERR_END_OF_FILE  End of scan
1085       @retval 0                   Success
1086       @retval other               Error code
1087   */
1088   int handle_unordered_next(uchar *buf, bool is_next_same);
1089   /**
1090     Handle index_next when changing to new partition.
1091 
1092     This routine is used to start the index scan on the next partition.
1093     Both initial start and after completing scan on one partition.
1094 
1095     @param[out] buf  Read row in MySQL Row Format
1096 
1097     @return Operation status.
1098       @retval HA_ERR_END_OF_FILE  End of scan
1099       @retval 0                   Success
1100       @retval other               Error code
1101   */
1102   int handle_unordered_scan_next_partition(uchar *buf);
1103   /**
1104     Common routine to start index scan with ordered results.
1105 
1106     @param[out] buf  Read row in MySQL Row Format
1107 
1108     @return Operation status
1109       @retval HA_ERR_END_OF_FILE    End of scan
1110       @retval HA_ERR_KEY_NOT_FOUND  End of scan
1111       @retval 0                     Success
1112       @retval other                 Error code
1113   */
1114   int handle_ordered_index_scan(uchar *buf);
1115   /**
1116     Add index_next/prev results from partitions without exact match.
1117 
1118     If there where any partitions that returned HA_ERR_KEY_NOT_FOUND when
1119     ha_index_read_map was done, those partitions must be included in the
1120     following index_next/prev call.
1121 
1122     @return Operation status
1123       @retval HA_ERR_END_OF_FILE    End of scan
1124       @retval 0                     Success
1125       @retval other                 Error code
1126   */
1127   int handle_ordered_index_scan_key_not_found();
1128   /**
1129     Common routine to handle index_prev with ordered results.
1130 
1131     @param[out] buf  Read row in MySQL Row Format.
1132 
1133     @return Operation status.
1134       @retval HA_ERR_END_OF_FILE  End of scan
1135       @retval 0                   Success
1136       @retval other               Error code
1137   */
1138   int handle_ordered_prev(uchar *buf);
1139   /**
1140     Common routine to handle index_next with ordered results.
1141 
1142     @param[out] buf        Read row in MySQL Row Format.
1143     @param[in]  next_same  Called from index_next_same.
1144 
1145     @return Operation status.
1146       @retval HA_ERR_END_OF_FILE  End of scan
1147       @retval 0                   Success
1148       @retval other               Error code
1149   */
1150   int handle_ordered_next(uchar *buf, bool is_next_same);
1151   /**
1152     Common routine for a number of index_read variants.
1153 
1154     @param[out] buf             Buffer where the record should be returned.
1155     @param[in]  have_start_key  TRUE <=> the left endpoint is available, i.e.
1156                                 we're in index_read call or in read_range_first
1157                                 call and the range has left endpoint.
1158                                 FALSE <=> there is no left endpoint (we're in
1159                                 read_range_first() call and the range has no
1160                                 left endpoint).
1161 
1162     @return Operation status
1163       @retval 0                    OK
1164       @retval HA_ERR_END_OF_FILE   Whole index scanned, without finding the record.
1165       @retval HA_ERR_KEY_NOT_FOUND Record not found, but index cursor positioned.
1166       @retval other                Error code.
1167   */
1168   int common_index_read(uchar *buf, bool have_start_key);
1169   /**
1170     Common routine for index_first/index_last.
1171 
1172     @param[out] buf  Read row in MySQL Row Format.
1173 
1174     @return Operation status.
1175       @retval    0  Success
1176       @retval != 0  Error code
1177   */
1178   int common_first_last(uchar *buf);
1179   /**
1180     Return the top record in sort order.
1181 
1182     @param[out] buf  Row returned in MySQL Row Format.
1183   */
1184   void return_top_record(uchar *buf);
1185   /**
1186     Copy partitions as part of ALTER TABLE of partitions.
1187 
1188     change_partitions has done all the preparations, now it is time to
1189     actually copy the data from the reorganized partitions to the new
1190     partitions.
1191 
1192     @param[out] copied   Number of records copied.
1193     @param[out] deleted  Number of records deleted.
1194 
1195     @return Operation status
1196       @retval  0  Success
1197       @retval >0  Error code
1198   */
1199   virtual int copy_partitions(ulonglong * const copied,
1200                               ulonglong * const deleted);
1201 
1202   /**
1203     Set table->read_set taking partitioning expressions into account.
1204   */
1205   void set_partition_read_set();
1206 
1207   /*
1208     These could be private as well,
1209     but easier to expose them to derived classes to use.
1210   */
1211 protected:
1212 
1213   /** Convenience pointer to table from m_handler (i.e. m_handler->table). */
1214   TABLE *m_table;
1215   /** All internal partitioning data! @{ */
1216   /** Tables partitioning info (same as table->part_info) */
1217   partition_info *m_part_info;
1218   /** Is primary key clustered. */
1219   bool m_pkey_is_clustered;
1220   /** Cached value of m_part_info->is_sub_partitioned(). */
1221   bool m_is_sub_partitioned;
1222   /** Partition share for auto_inc handling. */
1223   Partition_share *m_part_share;
1224   /** Total number of partitions. */
1225   uint m_tot_parts;
1226   uint m_last_part;                      // Last accessed partition.
1227   const uchar *m_err_rec;                // record which gave error.
1228   bool m_auto_increment_safe_stmt_log_lock;
1229   bool m_auto_increment_lock;
1230   part_id_range m_part_spec;             // Which parts to scan
1231   uint m_scan_value;                     // Value passed in rnd_init
1232                                          // call
1233   key_range m_start_key;                 // index read key range
1234   enum partition_index_scan_type m_index_scan_type;// What type of index
1235                                                    // scan
1236   uint m_rec_length;                     // Local copy of record length
1237 
1238   bool m_ordered;                        // Ordered/Unordered index scan.
1239   bool m_ordered_scan_ongoing;           // Ordered index scan ongoing.
1240   bool m_reverse_order;                  // Scanning in reverse order (prev).
1241   /** Row and key buffer for ordered index scan. */
1242   uchar *m_ordered_rec_buffer;
1243   /** Prio queue used by sorted read. */
1244   Prio_queue *m_queue;
1245   /** Which partition is to deliver next result. */
1246   uint m_top_entry;
1247   /** Offset in m_ordered_rec_buffer from part buffer to its record buffer. */
1248   uint m_rec_offset;
1249   /**
1250     Current index used for sorting.
1251     If clustered PK exists, then it will be used as secondary index to
1252     sort on if the first is equal in key_rec_cmp.
1253     So if clustered pk: m_curr_key_info[0]= current index and
1254     m_curr_key_info[1]= pk and [2]= NULL.
1255     Otherwise [0]= current index, [1]= NULL, and we will
1256     sort by rowid as secondary sort key if equal first key.
1257   */
1258   KEY *m_curr_key_info[3];
1259   enum enum_using_ref {
1260     /** handler::ref is not copied to the PQ. */
1261     REF_NOT_USED= 0,
1262     /**
1263       handler::ref is copied to the PQ but does not need to be used in sorting.
1264     */
1265     REF_STORED_IN_PQ,
1266     /** handler::ref is copied to the PQ and must be used during sorting. */
1267     REF_USED_FOR_SORT};
1268   /** How handler::ref is used in the priority queue. */
1269   enum_using_ref m_ref_usage;
1270   /** Set if previous index_* call returned HA_ERR_KEY_NOT_FOUND. */
1271   bool m_key_not_found;
1272   /** Partitions that returned HA_ERR_KEY_NOT_FOUND. */
1273   MY_BITMAP m_key_not_found_partitions;
1274   /** @} */
1275 };
1276 #endif /* PARTITION_HANDLER_INCLUDED */
1277