1 /* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
2 
3   This program is free software; you can redistribute it and/or modify
4   it under the terms of the GNU General Public License, version 2.0,
5   as published by the Free Software Foundation.
6 
7   This program is also distributed with certain software (including
8   but not limited to OpenSSL) that is licensed under separate terms,
9   as designated in a particular file or component or in included license
10   documentation.  The authors of MySQL hereby grant you an additional
11   permission to link the program and your derivative works with the
12   separately licensed software that they have included with MySQL.
13 
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License, version 2.0, for more details.
18 
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software Foundation,
21   51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
22 
23 #ifndef PFS_INSTR_H
24 #define PFS_INSTR_H
25 
26 /**
27   @file storage/perfschema/pfs_instr.h
28   Performance schema instruments (declarations).
29 */
30 
31 struct PFS_mutex_class;
32 struct PFS_rwlock_class;
33 struct PFS_cond_class;
34 struct PFS_file_class;
35 struct PFS_table_share;
36 struct PFS_thread_class;
37 struct PFS_socket_class;
38 
39 #include "my_global.h"
40 #ifdef __WIN__
41 #include <winsock2.h>
42 #else
43 #include <arpa/inet.h>
44 #endif
45 #include "my_compiler.h"
46 #include "pfs_lock.h"
47 #include "pfs_stat.h"
48 #include "pfs_instr_class.h"
49 #include "pfs_events_waits.h"
50 #include "pfs_events_stages.h"
51 #include "pfs_events_statements.h"
52 #include "pfs_server.h"
53 #include "lf.h"
54 #include "pfs_con_slice.h"
55 
56 /**
57   @addtogroup Performance_schema_buffers
58   @{
59 */
60 
61 struct PFS_thread;
62 struct PFS_host;
63 struct PFS_user;
64 struct PFS_account;
65 
66 /** Base structure for wait instruments. */
67 struct PFS_instr
68 {
69   /** Internal lock. */
70   pfs_lock m_lock;
71   /** Enabled flag. */
72   bool m_enabled;
73   /** Timed flag. */
74   bool m_timed;
75 };
76 
77 /** Instrumented mutex implementation. @see PSI_mutex. */
78 struct PFS_ALIGNED PFS_mutex : public PFS_instr
79 {
80   /** Mutex identity, typically a pthread_mutex_t. */
81   const void *m_identity;
82   /** Mutex class. */
83   PFS_mutex_class *m_class;
84   /** Instrument statistics. */
85   PFS_mutex_stat m_mutex_stat;
86   /** Current owner. */
87   PFS_thread *m_owner;
88   /**
89     Timestamp of the last lock.
90     This statistic is not exposed in user visible tables yet.
91   */
92   ulonglong m_last_locked;
93 };
94 
95 /** Instrumented rwlock implementation. @see PSI_rwlock. */
96 struct PFS_ALIGNED PFS_rwlock : public PFS_instr
97 {
98   /** RWLock identity, typically a pthread_rwlock_t. */
99   const void *m_identity;
100   /** RWLock class. */
101   PFS_rwlock_class *m_class;
102   /** Instrument statistics. */
103   PFS_rwlock_stat m_rwlock_stat;
104   /** Current writer thread. */
105   PFS_thread *m_writer;
106   /** Current count of readers. */
107   uint m_readers;
108   /**
109     Timestamp of the last write.
110     This statistic is not exposed in user visible tables yet.
111   */
112   ulonglong m_last_written;
113   /**
114     Timestamp of the last read.
115     This statistic is not exposed in user visible tables yet.
116   */
117   ulonglong m_last_read;
118 };
119 
120 /** Instrumented cond implementation. @see PSI_cond. */
121 struct PFS_ALIGNED PFS_cond : public PFS_instr
122 {
123   /** Condition identity, typically a pthread_cond_t. */
124   const void *m_identity;
125   /** Condition class. */
126   PFS_cond_class *m_class;
127   /** Instrument wait statistics. */
128   PFS_single_stat m_wait_stat;
129   /** Condition instance usage statistics. */
130   PFS_cond_stat m_cond_stat;
131 };
132 
133 /** Instrumented File and FILE implementation. @see PSI_file. */
134 struct PFS_ALIGNED PFS_file : public PFS_instr
135 {
136   uint32 get_version()
137   { return m_lock.get_version(); }
138 
139   /** File identity */
140   const void *m_identity;
141   /** File name. */
142   char m_filename[FN_REFLEN];
143   /** File name length in bytes. */
144   uint m_filename_length;
145   /** File class. */
146   PFS_file_class *m_class;
147   /** File usage statistics. */
148   PFS_file_stat m_file_stat;
149 };
150 
151 /** Instrumented table implementation. @see PSI_table. */
152 struct PFS_ALIGNED PFS_table
153 {
154   /**
155     True if table io instrumentation is enabled.
156     This flag is computed.
157   */
158   bool m_io_enabled;
159   /**
160     True if table lock instrumentation is enabled.
161     This flag is computed.
162   */
163   bool m_lock_enabled;
164   /**
165     True if table io instrumentation is timed.
166     This flag is computed.
167   */
168   bool m_io_timed;
169   /**
170     True if table lock instrumentation is timed.
171     This flag is computed.
172   */
173   bool m_lock_timed;
174 
175   /** True if table io statistics have been collected. */
176   bool m_has_io_stats;
177 
178   /** True if table lock statistics have been collected. */
179   bool m_has_lock_stats;
180 
181 public:
182   /**
183     Aggregate this table handle statistics to the parents.
184     Only use this method for handles owned by the calling code.
185     @sa sanitized_aggregate.
186   */
187   void aggregate(void)
188   {
189     if (m_has_io_stats && m_has_lock_stats)
190     {
191       safe_aggregate(& m_table_stat, m_share);
192       m_has_io_stats= false;
193       m_has_lock_stats= false;
194     }
195     else if (m_has_io_stats)
196     {
197       safe_aggregate_io(& m_table_stat, m_share);
198       m_has_io_stats= false;
199     }
200     else if (m_has_lock_stats)
201     {
202       safe_aggregate_lock(& m_table_stat, m_share);
203       m_has_lock_stats= false;
204     }
205   }
206 
207   /**
208     Aggregate this table handle statistics to the parents.
209     This method is safe to call on handles not owned by the calling code.
210     @sa aggregate
211     @sa sanitized_aggregate_io
212     @sa sanitized_aggregate_lock
213   */
214   void sanitized_aggregate(void);
215 
216   /**
217     Aggregate this table handle io statistics to the parents.
218     This method is safe to call on handles not owned by the calling code.
219   */
220   void sanitized_aggregate_io(void);
221 
222   /**
223     Aggregate this table handle lock statistics to the parents.
224     This method is safe to call on handles not owned by the calling code.
225   */
226   void sanitized_aggregate_lock(void);
227 
228   /** Internal lock. */
229   pfs_lock m_lock;
230   /** Owner. */
231   PFS_thread *m_thread_owner;
232   /** Table share. */
233   PFS_table_share *m_share;
234   /** Table identity, typically a handler. */
235   const void *m_identity;
236   /** Table statistics. */
237   PFS_table_stat m_table_stat;
238 
239 private:
240   static void safe_aggregate(PFS_table_stat *stat,
241                              PFS_table_share *safe_share);
242   static void safe_aggregate_io(PFS_table_stat *stat,
243                                 PFS_table_share *safe_share);
244   static void safe_aggregate_lock(PFS_table_stat *stat,
245                                   PFS_table_share *safe_share);
246 };
247 
248 /** Instrumented socket implementation. @see PSI_socket. */
249 struct PFS_ALIGNED PFS_socket : public PFS_instr
250 {
251   uint32 get_version()
252   { return m_lock.get_version(); }
253 
254   /** Socket identity, typically int */
255   const void *m_identity;
256   /** Owning thread, if applicable */
257   PFS_thread *m_thread_owner;
258   /** Socket file descriptor */
259   uint m_fd;
260   /** Raw socket address */
261   struct sockaddr_storage  m_sock_addr;
262   /** Length of address */
263   socklen_t m_addr_len;
264   /** Idle flag. */
265   bool m_idle;
266   /** Socket class. */
267   PFS_socket_class *m_class;
268   /** Socket usage statistics. */
269   PFS_socket_stat m_socket_stat;
270 };
271 
272 /**
273   @def WAIT_STACK_LOGICAL_SIZE
274   Maximum number of nested waits.
275   Some waits, such as:
276   - "wait/io/table/sql/handler"
277   - "wait/lock/table/sql/handler"
278   are implemented by calling code in a storage engine,
279   that can cause nested waits (file io, mutex, ...)
280   Because of partitioned tables, a table io event (on the whole table)
281   can contain a nested table io event (on a partition).
282   Because of additional debug instrumentation,
283   waiting on what looks like a "mutex" (safe_mutex, innodb sync0sync, ...)
284   can cause nested waits to be recorded.
285   For example, a wait on innodb mutexes can lead to:
286   - wait/sync/mutex/innobase/some_mutex
287     - wait/sync/mutex/innobase/sync0sync
288       - wait/sync/mutex/innobase/os0sync
289   The max depth of the event stack must be sufficient
290   for these low level details to be visible.
291 */
292 #define WAIT_STACK_LOGICAL_SIZE 5
293 /**
294   @def WAIT_STACK_BOTTOM
295   Maximum number dummy waits records.
296   One dummy record is reserved for the parent stage / statement,
297   at the bottom of the wait stack.
298 */
299 #define WAIT_STACK_BOTTOM 1
300 /**
301   @def WAIT_STACK_SIZE
302   Physical size of the waits stack
303 */
304 #define WAIT_STACK_SIZE (WAIT_STACK_BOTTOM + WAIT_STACK_LOGICAL_SIZE)
305 
306 /** Max size of the statements stack. */
307 extern uint statement_stack_max;
308 /** Max size of the digests token array. */
309 extern size_t pfs_max_digest_length;
310 
311 /**
312   @def PFS_MAX_ALLOC_RETRY
313   Maximum number of times the code attempts to allocate an item
314   from internal buffers, before giving up.
315 */
316 #define PFS_MAX_ALLOC_RETRY 1000
317 
318 /** The maximun number of passes in @sa PFS_scan. */
319 #define PFS_MAX_SCAN_PASS 2
320 
321 /**
322   Helper to scan circular buffers.
323   Given a buffer of size [0, max_size - 1],
324   and a random starting point in the buffer,
325   this helper returns up to two [first, last -1] intervals that:
326   - fit into the [0, max_size - 1] range,
327   - have a maximum combined length of at most PFS_MAX_ALLOC_RETRY.
328 */
329 struct PFS_scan
330 {
331 public:
332   /**
333     Initialize a new scan.
334     @param random a random index to start from
335     @param max_size the max size of the interval to scan
336   */
337   void init(uint random, uint max_size);
338 
339   /**
340     Predicate, has a next pass.
341     @return true if there is a next pass to perform.
342   */
343   bool has_pass() const
344   { return (m_pass < m_pass_max); }
345 
346   /**
347     Iterator, proceed to the next pass.
348   */
349   void next_pass()
350   { m_pass++; }
351 
352   /** First index for this pass. */
353   uint first() const
354   { return m_first[m_pass]; }
355 
356   /** Last index for this pass. */
357   uint last() const
358   { return m_last[m_pass]; }
359 
360 private:
361   /** Current pass. */
362   uint m_pass;
363   /** Maximum number of passes. */
364   uint m_pass_max;
365   /** First element for each pass. */
366   uint m_first[PFS_MAX_SCAN_PASS];
367   /** Last element for each pass. */
368   uint m_last[PFS_MAX_SCAN_PASS];
369 };
370 
371 
372 /** Instrumented thread implementation. @see PSI_thread. */
373 struct PFS_ALIGNED PFS_thread : PFS_connection_slice
374 {
375   static PFS_thread* get_current_thread(void);
376 
377   /** Thread instrumentation flag. */
378   bool m_enabled;
379   /** Current wait event in the event stack. */
380   PFS_events_waits *m_events_waits_current;
381   /** Event ID counter */
382   ulonglong m_event_id;
383   /**
384     Internal lock.
385     This lock is exclusively used to protect against races
386     when creating and destroying PFS_thread.
387     Do not use this lock to protect thread attributes,
388     use one of @c m_stmt_lock or @c m_session_lock instead.
389   */
390   pfs_lock m_lock;
391   /** Pins for filename_hash. */
392   LF_PINS *m_filename_hash_pins;
393   /** Pins for table_share_hash. */
394   LF_PINS *m_table_share_hash_pins;
395   /** Pins for setup_actor_hash. */
396   LF_PINS *m_setup_actor_hash_pins;
397   /** Pins for setup_object_hash. */
398   LF_PINS *m_setup_object_hash_pins;
399   /** Pins for host_hash. */
400   LF_PINS *m_host_hash_pins;
401   /** Pins for user_hash. */
402   LF_PINS *m_user_hash_pins;
403   /** Pins for account_hash. */
404   LF_PINS *m_account_hash_pins;
405   /** Pins for digest_hash. */
406   LF_PINS *m_digest_hash_pins;
407   /** Internal thread identifier, unique. */
408   ulonglong m_thread_internal_id;
409   /** Parent internal thread identifier. */
410   ulonglong m_parent_thread_internal_id;
411   /** External (SHOW PROCESSLIST) thread identifier, not unique. */
412   ulong m_processlist_id;
413   /** Thread class. */
414   PFS_thread_class *m_class;
415   /**
416     Stack of events waits.
417     This member holds the data for the table PERFORMANCE_SCHEMA.EVENTS_WAITS_CURRENT.
418     Note that stack[0] is a dummy record that represents the parent stage/statement.
419     For example, assuming the following tree:
420     - STAGE ID 100
421       - WAIT ID 101, parent STAGE 100
422         - WAIT ID 102, parent wait 101
423     the data in the stack will be:
424     stack[0].m_event_id= 100, set by the stage instrumentation
425     stack[0].m_event_type= STAGE, set by the stage instrumentation
426     stack[0].m_nesting_event_id= unused
427     stack[0].m_nesting_event_type= unused
428     stack[1].m_event_id= 101
429     stack[1].m_event_type= WAIT
430     stack[1].m_nesting_event_id= stack[0].m_event_id= 100
431     stack[1].m_nesting_event_type= stack[0].m_event_type= STAGE
432     stack[2].m_event_id= 102
433     stack[2].m_event_type= WAIT
434     stack[2].m_nesting_event_id= stack[1].m_event_id= 101
435     stack[2].m_nesting_event_type= stack[1].m_event_type= WAIT
436 
437     The whole point of the stack[0] record is to allow this optimization
438     in the code, in the instrumentation for wait events:
439       wait->m_nesting_event_id= (wait-1)->m_event_id;
440       wait->m_nesting_event_type= (wait-1)->m_event_type;
441     This code works for both the top level wait, and nested waits,
442     and works without if conditions, which helps performances.
443   */
444   PFS_events_waits m_events_waits_stack[WAIT_STACK_SIZE];
445   /** True if the circular buffer @c m_waits_history is full. */
446   bool m_waits_history_full;
447   /** Current index in the circular buffer @c m_waits_history. */
448   uint m_waits_history_index;
449   /**
450     Waits history circular buffer.
451     This member holds the data for the table
452     PERFORMANCE_SCHEMA.EVENTS_WAITS_HISTORY.
453   */
454   PFS_events_waits *m_waits_history;
455 
456   /** True if the circular buffer @c m_stages_history is full. */
457   bool m_stages_history_full;
458   /** Current index in the circular buffer @c m_stages_history. */
459   uint m_stages_history_index;
460   /**
461     Stages history circular buffer.
462     This member holds the data for the table
463     PERFORMANCE_SCHEMA.EVENTS_STAGES_HISTORY.
464   */
465   PFS_events_stages *m_stages_history;
466 
467   /** True if the circular buffer @c m_statements_history is full. */
468   bool m_statements_history_full;
469   /** Current index in the circular buffer @c m_statements_history. */
470   uint m_statements_history_index;
471   /**
472     Statements history circular buffer.
473     This member holds the data for the table
474     PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_HISTORY.
475   */
476   PFS_events_statements *m_statements_history;
477 
478   /**
479     Internal lock, for session attributes.
480     Statement attributes are expected to be updated in frequently,
481     typically per session execution.
482   */
483   pfs_lock m_session_lock;
484   /**
485     User name.
486     Protected by @c m_session_lock.
487   */
488   char m_username[USERNAME_LENGTH];
489   /**
490     Length of @c m_username.
491     Protected by @c m_session_lock.
492   */
493   uint m_username_length;
494   /**
495     Host name.
496     Protected by @c m_session_lock.
497   */
498   char m_hostname[HOSTNAME_LENGTH];
499   /**
500     Length of @c m_hostname.
501     Protected by @c m_session_lock.
502   */
503   uint m_hostname_length;
504   /**
505     Database name.
506     Protected by @c m_stmt_lock.
507   */
508   char m_dbname[NAME_LEN];
509   /**
510     Length of @c m_dbname.
511     Protected by @c m_stmt_lock.
512   */
513   uint m_dbname_length;
514   /** Current command. */
515   int m_command;
516   /** Start time. */
517   time_t m_start_time;
518   /**
519     Internal lock, for statement attributes.
520     Statement attributes are expected to be updated frequently,
521     typically per statement execution.
522   */
523   pfs_lock m_stmt_lock;
524   /** Processlist state (derived from stage). */
525   PFS_stage_key m_stage;
526   /**
527     Processlist info.
528     Protected by @c m_stmt_lock.
529   */
530   char m_processlist_info[COL_INFO_SIZE];
531   /**
532     Length of @c m_processlist_info_length.
533     Protected by @c m_stmt_lock.
534   */
535   uint m_processlist_info_length;
536 
537   PFS_events_stages m_stage_current;
538 
539   /** Size of @c m_events_statements_stack. */
540   uint m_events_statements_count;
541   PFS_events_statements *m_statement_stack;
542 
543   PFS_host *m_host;
544   PFS_user *m_user;
545   PFS_account *m_account;
546 
547   /** Reset session connect attributes */
548   void reset_session_connect_attrs();
549 
550   /**
551     Buffer for the connection attributes.
552     Protected by @c m_session_lock.
553   */
554   char *m_session_connect_attrs;
555   /**
556     Length used by @c m_connect_attrs.
557     Protected by @c m_session_lock.
558   */
559   uint m_session_connect_attrs_length;
560   /**
561     Character set in which @c m_connect_attrs are encoded.
562     Protected by @c m_session_lock.
563   */
564   uint m_session_connect_attrs_cs_number;
565 };
566 
567 extern PFS_stage_stat *global_instr_class_stages_array;
568 extern PFS_statement_stat *global_instr_class_statements_array;
569 
570 PFS_mutex *sanitize_mutex(PFS_mutex *unsafe);
571 PFS_rwlock *sanitize_rwlock(PFS_rwlock *unsafe);
572 PFS_cond *sanitize_cond(PFS_cond *unsafe);
573 PFS_thread *sanitize_thread(PFS_thread *unsafe);
574 PFS_file *sanitize_file(PFS_file *unsafe);
575 PFS_socket *sanitize_socket(PFS_socket *unsafe);
576 
577 int init_instruments(const PFS_global_param *param);
578 void cleanup_instruments();
579 int init_file_hash();
580 void cleanup_file_hash();
581 PFS_mutex* create_mutex(PFS_mutex_class *mutex_class, const void *identity);
582 void destroy_mutex(PFS_mutex *pfs);
583 PFS_rwlock* create_rwlock(PFS_rwlock_class *klass, const void *identity);
584 void destroy_rwlock(PFS_rwlock *pfs);
585 PFS_cond* create_cond(PFS_cond_class *klass, const void *identity);
586 void destroy_cond(PFS_cond *pfs);
587 
588 PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
589                           ulonglong processlist_id);
590 
591 void destroy_thread(PFS_thread *pfs);
592 
593 PFS_file* find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
594                               const char *filename, uint len, bool create);
595 
596 void release_file(PFS_file *pfs);
597 void destroy_file(PFS_thread *thread, PFS_file *pfs);
598 PFS_table* create_table(PFS_table_share *share, PFS_thread *opening_thread,
599                         const void *identity);
600 void destroy_table(PFS_table *pfs);
601 
602 PFS_socket* create_socket(PFS_socket_class *socket_class,
603                           const my_socket *fd,
604                           const struct sockaddr *addr,
605                           socklen_t addr_len);
606 void destroy_socket(PFS_socket *pfs);
607 
608 /* For iterators and show status. */
609 
610 extern ulong mutex_max;
611 extern ulong mutex_lost;
612 extern ulong rwlock_max;
613 extern ulong rwlock_lost;
614 extern ulong cond_max;
615 extern ulong cond_lost;
616 extern ulong thread_max;
617 extern ulong thread_lost;
618 extern ulong file_max;
619 extern ulong file_lost;
620 extern long file_handle_max;
621 extern ulong file_handle_lost;
622 extern ulong table_max;
623 extern ulong table_lost;
624 extern ulong socket_max;
625 extern ulong socket_lost;
626 extern ulong events_waits_history_per_thread;
627 extern ulong events_stages_history_per_thread;
628 extern ulong events_statements_history_per_thread;
629 extern ulong locker_lost;
630 extern ulong statement_lost;
631 extern ulong session_connect_attrs_lost;
632 extern ulong session_connect_attrs_size_per_thread;
633 
634 /* Exposing the data directly, for iterators. */
635 
636 extern PFS_mutex *mutex_array;
637 extern PFS_rwlock *rwlock_array;
638 extern PFS_cond *cond_array;
639 extern PFS_thread *thread_array;
640 extern PFS_file *file_array;
641 extern PFS_file **file_handle_array;
642 extern PFS_table *table_array;
643 extern PFS_socket *socket_array;
644 
645 void reset_events_waits_by_instance();
646 void reset_file_instance_io();
647 void reset_socket_instance_io();
648 
649 void aggregate_all_event_names(PFS_single_stat *from_array,
650                                PFS_single_stat *to_array);
651 void aggregate_all_event_names(PFS_single_stat *from_array,
652                                PFS_single_stat *to_array_1,
653                                PFS_single_stat *to_array_2);
654 
655 void aggregate_all_stages(PFS_stage_stat *from_array,
656                           PFS_stage_stat *to_array);
657 void aggregate_all_stages(PFS_stage_stat *from_array,
658                           PFS_stage_stat *to_array_1,
659                           PFS_stage_stat *to_array_2);
660 
661 void aggregate_all_statements(PFS_statement_stat *from_array,
662                               PFS_statement_stat *to_array);
663 void aggregate_all_statements(PFS_statement_stat *from_array,
664                               PFS_statement_stat *to_array_1,
665                               PFS_statement_stat *to_array_2);
666 
667 void aggregate_thread(PFS_thread *thread,
668                       PFS_account *safe_account,
669                       PFS_user *safe_user,
670                       PFS_host *safe_host);
671 void aggregate_thread_waits(PFS_thread *thread,
672                             PFS_account *safe_account,
673                             PFS_user *safe_user,
674                             PFS_host *safe_host);
675 void aggregate_thread_stages(PFS_thread *thread,
676                              PFS_account *safe_account,
677                              PFS_user *safe_user,
678                              PFS_host *safe_host);
679 void aggregate_thread_statements(PFS_thread *thread,
680                                  PFS_account *safe_account,
681                                  PFS_user *safe_user,
682                                  PFS_host *safe_host);
683 void clear_thread_account(PFS_thread *thread);
684 void set_thread_account(PFS_thread *thread);
685 
686 /** Update derived flags for all mutex instances. */
687 void update_mutex_derived_flags();
688 /** Update derived flags for all rwlock instances. */
689 void update_rwlock_derived_flags();
690 /** Update derived flags for all condition instances. */
691 void update_cond_derived_flags();
692 /** Update derived flags for all file handles. */
693 void update_file_derived_flags();
694 /** Update derived flags for all table handles. */
695 void update_table_derived_flags();
696 /** Update derived flags for all socket instances. */
697 void update_socket_derived_flags();
698 /** Update derived flags for all instruments. */
699 void update_instruments_derived_flags();
700 
701 extern LF_HASH filename_hash;
702 
703 /** @} */
704 #endif
705 
706