1 /*****************************************************************************
2 
3 Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file include/row0pread-histogram.h
28 Parallel read histogram interface.
29 
30 Created 2019-04-20 by Darshan M N. */
31 
32 #ifndef row0pread_histogram_h
33 #define row0pread_histogram_h
34 
35 #include <random>
36 #include "row0pread.h"
37 #include "ut0counter.h"
38 
39 class Histogram_sampler {
40  public:
41   /** Constructor.
42   @param[in]  max_threads         Maximum number of threads to use.
43   @param[in]  sampling_seed       seed to be used for sampling
44   @param[in]  sampling_percentage percentage of sampling that needs to be done
45   @param[in]  sampling_method     sampling method to be used for sampling */
46   explicit Histogram_sampler(size_t max_threads, int sampling_seed,
47                              double sampling_percentage,
48                              enum_sampling_method sampling_method);
49 
50   /** Destructor. */
51   ~Histogram_sampler();
52 
53   /** Initialize the sampler context.
54   @param[in]  trx   Transaction used for parallel read.
55   @param[in]  index clustered index.
56   @param[in]  prebuilt  prebuilt info
57   @retval true on success. */
58   bool init(trx_t *trx, dict_index_t *index, row_prebuilt_t *prebuilt);
59 
60   /** Buffer next row.
61   @return error code */
62   dberr_t buffer_next();
63 
64   /** End parallel read in case the reader thread is still active and wait for
65   its exit. This can happen if we're ending sampling prematurely. */
66   void buffer_end();
67 
68   /** Set the buffer.
69   @param[in]  buf buffer to be used to store the row converted to MySQL
70   format. */
set(byte * buf)71   void set(byte *buf) { m_buf = buf; }
72 
73   /** Start the sampling process.
74   @return DB_SUCCESS or error code. */
75   dberr_t run();
76 
77   /** Check if the processing of the record needs to be skipped.
78   In case of record belonging to non-leaf page, we decide if the child page
79   pertaining to the record needs to be skipped.
80   In case of record belonging to leaf page, we read the page regardless.
81   @return true if it needs to be skipped, else false. */
82   bool skip();
83 
84  private:
85   /** Wait till there is a request to buffer the next row. */
86   void wait_for_start_of_buffering();
87 
88   /** Wait till the buffering of the row is complete. */
89   void wait_for_end_of_buffering();
90 
91   /** Signal that the next row needs to be buffered. */
92   void signal_start_of_buffering();
93 
94   /** Signal that the buffering of the row is complete. */
95   void signal_end_of_buffering();
96 
97   /** Set the error state.
98   @param[in] err                Error state to set to. */
set_error_state(dberr_t err)99   void set_error_state(dberr_t err) { m_err = err; }
100 
101   /** @return true if in error state. */
is_error_set()102   bool is_error_set() const MY_ATTRIBUTE((warn_unused_result)) {
103     return (m_err != DB_SUCCESS);
104   }
105 
106   /** Each parallel reader thread's init function.
107   @param[in]  reader_thread_ctx  context information related to the thread
108   @return DB_SUCCESS or error code. */
109   dberr_t start_callback(Parallel_reader::Thread_ctx *reader_thread_ctx)
110       MY_ATTRIBUTE((warn_unused_result));
111 
112   /** Each parallel reader thread's end function.
113   @param[in]  reader_thread_ctx  context information related to the thread
114   @return DB_SUCCESS or error code. */
115   dberr_t finish_callback(Parallel_reader::Thread_ctx *reader_thread_ctx)
116       MY_ATTRIBUTE((warn_unused_result));
117 
118   /** Convert the row in InnoDB format to MySQL format and store in the buffer
119   for server to use.
120   @param[in]  ctx       Parallel read context.
121   @param[in]  rec       record that needs to be converted
122   @param[in]  offsets   offsets belonging to the record
123   @param[in]  index     index of the record
124   @param[in]  prebuilt  Row meta-data cache.
125   @return DB_SUCCESS or error code. */
126   dberr_t sample_rec(const Parallel_reader::Ctx *ctx, const rec_t *rec,
127                      ulint *offsets, const dict_index_t *index,
128                      row_prebuilt_t *prebuilt);
129 
130   /** For each record in a non-leaf block at level 1 (if leaf level is 0)
131   check if the child page needs to be sampled and if so sample all the rows in
132   the child page.
133   @param[in]  ctx       Parallel read context.
134   @param[in]  prebuilt  Row meta-data cache.
135   @return error code */
136   dberr_t process_non_leaf_rec(const Parallel_reader::Ctx *ctx,
137                                row_prebuilt_t *prebuilt)
138       MY_ATTRIBUTE((warn_unused_result));
139 
140   /** Process the record in the leaf page. This would happen only when the root
141   page is the leaf page and in such a case we process the page regardless of
142   the sampling percentage.
143   @param[in]  ctx       Parallel read context.
144   @param[in]  prebuilt  Row meta-data cache.
145   @return error code */
146   dberr_t process_leaf_rec(const Parallel_reader::Ctx *ctx,
147                            row_prebuilt_t *prebuilt)
148       MY_ATTRIBUTE((warn_unused_result));
149 
150  private:
151   /** Buffer to store the sampled row which is in the MySQL format. */
152   byte *m_buf{nullptr};
153 
154   /** Event to notify if the next row needs to be buffered. */
155   os_event_t m_start_buffer_event;
156 
157   /** Event to notify if the next row has been buffered. */
158   os_event_t m_end_buffer_event;
159 
160   /** Error code when the row was buffered. */
161   dberr_t m_err{DB_SUCCESS};
162 
163   /** The parallel reader. */
164   Parallel_reader m_parallel_reader;
165 
166   /** Random generator engine used to provide us random uniformly distrubuted
167   values required to decide if the row in question needs to be sampled or
168   not. */
169   std::mt19937 m_random_generator;
170 
171   /** Uniform distribution used by the random generator. */
172   static std::uniform_real_distribution<double> m_distribution;
173 
174   /** Sampling method to be used for sampling. */
175   enum_sampling_method m_sampling_method{enum_sampling_method::NONE};
176 
177   /** Sampling percentage to be used for sampling */
178   double m_sampling_percentage{};
179 
180   /** Sampling seed to be used for sampling */
181   int m_sampling_seed{};
182 
183   /** Number of rows sampled */
184   std::atomic_size_t m_n_sampled;
185 };
186 
187 #endif /* !row0pread_histogram_h */
188