1 /*
2    Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #ifndef THRMAN_H
26 #define THRMAN_H
27 
28 #include <SimulatedBlock.hpp>
29 #include <LocalProxy.hpp>
30 #include <NdbGetRUsage.h>
31 #include <NdbTick.h>
32 #include <mt.hpp>
33 #include <NdbMutex.h>
34 #include <NdbCondition.h>
35 #include <signaldata/Sync.hpp>
36 
37 #define JAM_FILE_ID 340
38 
39 //#define DEBUG_CPU_USAGE 1
40 class Thrman : public SimulatedBlock
41 {
42   friend class ThrmanProxy;
43 public:
44   Thrman(Block_context& ctx, Uint32 instanceNumber = 0);
45   virtual ~Thrman();
46   BLOCK_DEFINES(Thrman);
47 
48   void execDBINFO_SCANREQ(Signal*);
49   void execCONTINUEB(Signal*);
50   void execGET_CPU_USAGE_REQ(Signal*);
51   void execOVERLOAD_STATUS_REP(Signal*);
52   void execNODE_OVERLOAD_STATUS_ORD(Signal*);
53   void execREAD_CONFIG_REQ(Signal*);
54   void execSEND_THREAD_STATUS_REP(Signal*);
55   void execSET_WAKEUP_THREAD_ORD(Signal*);
56   void execWAKEUP_THREAD_ORD(Signal*);
57   void execSEND_WAKEUP_THREAD_ORD(Signal*);
58   void execFREEZE_THREAD_REQ(Signal*);
59   void execFREEZE_ACTION_CONF(Signal*);
60   void execSTTOR(Signal*);
61   void execMEASURE_WAKEUP_TIME_ORD(Signal*);
62   void execDUMP_STATE_ORD(Signal*);
63 
64 public:
65   /* Normally called locally, but can be called from mt.cpp as well. */
66   void check_spintime(bool local_call);
67 protected:
68 
69 private:
70 
71   /* Private variables */
72   Uint32 m_num_send_threads;
73   Uint32 m_num_threads;
74   Uint32 m_send_thread_percentage;
75   Uint32 m_node_overload_level;
76 
77   Uint32 m_spin_time_change_count;
78   bool m_recv_thread;
79   bool m_tc_thread;
80   bool m_ldm_thread;
81 
82   const char *m_thread_name;
83   const char *m_send_thread_name;
84   const char *m_thread_description;
85   const char *m_send_thread_description;
86 
87   struct ndb_rusage m_last_50ms_rusage;
88   struct ndb_rusage m_last_1sec_rusage;
89   struct ndb_rusage m_last_20sec_rusage;
90 
91   NDB_TICKS prev_50ms_tick;
92   NDB_TICKS prev_1sec_tick;
93   NDB_TICKS prev_20sec_tick;
94 
95   static const Uint32 ZCONTINUEB_MEASURE_CPU_USAGE = 1;
96   static const Uint32 ZWAIT_ALL_STOP = 2;
97   static const Uint32 ZWAIT_ALL_START = 3;
98   static const Uint32 ZCONTINUEB_CHECK_SPINTIME = 4
99 ;
100   static const Uint32 default_cpu_load = 95;
101 
102   /**
103    * Variables and methods used to synchronize all threads
104    * in the data node to perform a synchronized action.
105    */
106   void wait_freeze(bool ret);
107   void wait_all_stop(Signal*);
108   void wait_all_start(Signal*);
109 
110   FreezeThreadReq m_freeze_req;
111 
112   struct MeasurementRecord
113   {
MeasurementRecordThrman::MeasurementRecord114     MeasurementRecord()
115       : m_first_measure_done(false)
116     {}
117 
118     /**
119      * This represents one measurement and we collect the following
120      * information:
121      *
122      * User time as reported by GetRUsage
123      * Kernel time as reported by GetRUsage
124      * Idle time as calculated by the above two
125      *
126      * Sleep time as reported by calls in thread itself
127      * Send time as reported by calls in thread itself
128      * Execution time as reported by calls in thread itself
129      * Also time spent waiting for buffer full condition to
130      * disappear.
131      *
132      * Elapsed time for this measurement
133      */
134     Uint64 m_user_time_os;
135     Uint64 m_kernel_time_os;
136     Uint64 m_idle_time_os;
137 
138     Uint64 m_exec_time_thread;
139     Uint64 m_spin_time_thread;
140     Uint64 m_sleep_time_thread;
141     Uint64 m_send_time_thread;
142     Uint64 m_buffer_full_time_thread;
143 
144     Uint64 m_elapsed_time;
145     bool m_first_measure_done;
146 
147     union {
148       Uint32 nextPool;
149       Uint32 nextList;
150     };
151     Uint32 prevList;
152   };
153   typedef Ptr<MeasurementRecord> MeasurementRecordPtr;
154   typedef ArrayPool<MeasurementRecord> MeasurementRecord_pool;
155   typedef DLCFifoList<MeasurementRecord_pool> MeasurementRecord_fifo;
156 
157   MeasurementRecord_pool c_measurementRecordPool;
158 
159   MeasurementRecord_fifo c_next_50ms_measure;
160   MeasurementRecord_fifo c_next_1sec_measure;
161   MeasurementRecord_fifo c_next_20sec_measure;
162 
163   MeasurementRecord m_last_50ms_base_measure;
164   MeasurementRecord m_last_1sec_base_measure;
165   MeasurementRecord m_last_20sec_base_measure;
166 
167   struct SendThreadMeasurement
168   {
SendThreadMeasurementThrman::SendThreadMeasurement169     SendThreadMeasurement()
170       : m_first_measure_done(false)
171     {}
172 
173     bool m_first_measure_done;
174     Uint64 m_elapsed_time;
175     Uint64 m_exec_time;
176     Uint64 m_spin_time;
177     Uint64 m_sleep_time;
178     Uint64 m_user_time_os;
179     Uint64 m_kernel_time_os;
180     Uint64 m_idle_time_os;
181     Uint64 m_elapsed_time_os;
182     union {
183       Uint32 nextPool;
184       Uint32 nextList;
185     };
186     Uint32 prevList;
187   };
188   typedef Ptr<SendThreadMeasurement> SendThreadMeasurementPtr;
189   typedef ArrayPool<SendThreadMeasurement> SendThreadMeasurement_pool;
190   typedef DLCFifoList<SendThreadMeasurement_pool> SendThreadMeasurement_fifo;
191   typedef LocalDLCFifoList<SendThreadMeasurement_pool>
192                                Local_SendThreadMeasurement_fifo;
193 
194   SendThreadMeasurement_pool c_sendThreadMeasurementPool;
195 
196   struct SendThreadRecord
197   {
198     SendThreadMeasurement m_last_50ms_send_thread_measure;
199     SendThreadMeasurement m_last_1sec_send_thread_measure;
200     SendThreadMeasurement m_last_20sec_send_thread_measure;
201 
202     SendThreadMeasurement_fifo::Head m_send_thread_50ms_measurements;
203     SendThreadMeasurement_fifo::Head m_send_thread_1sec_measurements;
204     SendThreadMeasurement_fifo::Head m_send_thread_20sec_measurements;
205 
206     union {
207       Uint32 nextPool;
208       Uint32 nextList;
209     };
210   };
211   typedef Ptr<SendThreadRecord> SendThreadPtr;
212   typedef ArrayPool<SendThreadRecord> SendThreadRecord_pool;
213 
214   SendThreadRecord_pool c_sendThreadRecordPool;
215 
216   struct MeasureStats
217   {
218     Uint64 min_os_percentage;
219     Uint64 min_next_os_percentage;
220     Uint64 max_os_percentage;
221     Uint64 max_next_os_percentage;
222     Uint64 avg_os_percentage;
223 
224     Uint64 min_thread_percentage;
225     Uint64 min_next_thread_percentage;
226     Uint64 max_thread_percentage;
227     Uint64 max_next_thread_percentage;
228     Uint64 avg_thread_percentage;
229 
230     Uint64 avg_send_percentage;
231   };
232 
233   Uint32 m_configured_spintime;
234   Uint32 m_current_spintime;
235   Uint32 m_gain_spintime_in_us;
236   Uint32 m_current_cpu_usage;
237 
238   NDB_TICKS m_measured_wait_time;
239   Uint64 m_tot_nanos_wait;
240   bool m_phase2_done;
241   bool m_is_idle;
242   Uint32 m_failed_wakeup_measurements;
243 
244   /* Private variables used for handling overload control */
245   bool m_shared_environment;
246   bool m_overload_handling_activated;
247   bool m_enable_adaptive_spinning;
248   Uint32 m_allowed_spin_overhead;
249   Int32 m_warning_level;
250   Uint32 m_max_warning_level;
251   Uint32 m_burstiness;
252   OverloadStatus m_current_overload_status;
253 
254   struct ThreadOverloadStatus
255   {
256     OverloadStatus overload_status;
257     Uint32 wakeup_instance;
258   };
259 
260   ThreadOverloadStatus m_thread_overload_status[MAX_BLOCK_THREADS + 1];
261 
262   MeasureStats c_1sec_stats;
263   MeasureStats c_20sec_stats;
264   MeasureStats c_400sec_stats;
265   MeasureStats *m_current_decision_stats;
266 
267   /* Private methods */
268   void sendSTTORRY(Signal*, bool);
269   void sendNextCONTINUEB(Signal*, Uint32 delay, Uint32 type);
270   void measure_cpu_usage(Signal*);
271   void mark_measurements_not_done();
272   void check_overload_status(Signal*, bool, bool);
273   void set_spin_stat(Uint32, bool);
274   Uint32 calc_new_spin(ndb_spin_stat*);
275   void measure_wakeup_time(Signal*, Uint32);
276 
277   void set_configured_spintime(Uint32 val, bool specific);
278   void set_allowed_spin_overhead(Uint32 val);
279   void set_enable_adaptive_spinning(bool val);
280   void set_spintime_per_call(Uint32 val);
281 
282   Uint32 calculate_mean_send_thread_load();
283   void calculate_measurement(MeasurementRecordPtr measurePtr,
284                              struct ndb_rusage *curr_rusage,
285                              struct ndb_rusage *base_rusage,
286                              MeasurementRecord *curr_measure,
287                              MeasurementRecord *base_measure,
288                              Uint64 elapsed_micros);
289 
290   void calculate_send_measurement(
291     SendThreadMeasurementPtr sendThreadMeasurementPtr,
292     SendThreadMeasurement *curr_send_thread_measure,
293     SendThreadMeasurement *last_send_thread_measure,
294     Uint64 elapsed_time,
295     Uint32 send_instance);
296 
297   void sum_measures(MeasurementRecord *dest, MeasurementRecord *source);
298   void calc_stats(MeasureStats *stats, MeasurementRecord *measure);
299   void calc_avgs(MeasureStats *stats, Uint32 num_stats);
300   void init_stats(MeasureStats *stats);
301 
302   void handle_decisions();
303   void check_burstiness();
304 
305   void inc_warning(Uint32 inc_factor);
306   void dec_warning(Uint32 dec_factor);
307   void down_warning(Uint32 down_factor);
308 
309   Int32 get_load_status(Uint32 load, Uint32 send_load);
310   Uint32 calculate_load(MeasureStats & stats, Uint32 & burstiness);
311   void change_warning_level(Int32 diff_status, Uint32 factor);
312   void handle_overload_stats_1sec();
313   void handle_overload_stats_20sec();
314   void handle_overload_stats_400sec();
315 
316   void handle_state_change(Signal *signal);
317   void sendOVERLOAD_STATUS_REP(Signal *signal);
318   void sendSEND_THREAD_STATUS_REP(Signal *signal, Uint32 send_pct);
319   void sendSET_WAKEUP_THREAD_ORD(Signal *signal,
320                                  Uint32 instance_no,
321                                  Uint32 wakeup_instance);
322   void get_idle_block_threads(Uint32 *thread_list,
323                               Uint32 & num_threads_found);
324   void assign_wakeup_threads(Signal*, Uint32*, Uint32);
325   void update_current_wakeup_instance(Uint32 * threads_list,
326                                       Uint32 num_threads_found,
327                                       Uint32 & index,
328                                       Uint32 & current_wakeup_instance);
329 
330   bool calculate_stats_last_400seconds(MeasureStats *stats);
331   bool calculate_stats_last_20seconds(MeasureStats *stats);
332   bool calculate_stats_last_second(MeasureStats *stats);
333   bool calculate_stats_last_100ms(MeasureStats *stats);
334 
335   bool calculate_cpu_load_last_second(MeasurementRecord *measure);
336   bool calculate_cpu_load_last_20seconds(MeasurementRecord *measure);
337   bool calculate_cpu_load_last_400seconds(MeasurementRecord *measure);
338 
339   bool calculate_send_thread_load_last_second(Uint32 send_instance,
340                                               SendThreadMeasurement *measure);
341 };
342 
343 class ThrmanProxy : public LocalProxy
344 {
345 public:
346   ThrmanProxy(Block_context& ctx);
347   virtual ~ThrmanProxy();
348   BLOCK_DEFINES(ThrmanProxy);
349   void execFREEZE_THREAD_REQ(Signal*);
350 
351 protected:
352   virtual SimulatedBlock* newWorker(Uint32 instanceNo);
353 
354 };
355 #undef JAM_FILE_ID
356 
357 #endif
358