1 /* Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
2 
3   This program is free software; you can redistribute it and/or modify
4   it under the terms of the GNU General Public License, version 2.0,
5   as published by the Free Software Foundation.
6 
7   This program is also distributed with certain software (including
8   but not limited to OpenSSL) that is licensed under separate terms,
9   as designated in a particular file or component or in included license
10   documentation.  The authors of MySQL hereby grant you an additional
11   permission to link the program and your derivative works with the
12   separately licensed software that they have included with MySQL.
13 
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License, version 2.0, for more details.
18 
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software Foundation,
21   51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 /**
24   @file storage/perfschema/pfs.cc
25   The performance schema implementation of all instruments.
26 */
27 #include "my_global.h"
28 #include "thr_lock.h"
29 #include "mysql/psi/psi.h"
30 #include "mysql/psi/mysql_thread.h"
31 #include "my_pthread.h"
32 #include "sql_const.h"
33 #include "pfs.h"
34 #include "pfs_instr_class.h"
35 #include "pfs_instr.h"
36 #include "pfs_host.h"
37 #include "pfs_user.h"
38 #include "pfs_account.h"
39 #include "pfs_global.h"
40 #include "pfs_column_values.h"
41 #include "pfs_timer.h"
42 #include "pfs_events_waits.h"
43 #include "pfs_events_stages.h"
44 #include "pfs_events_statements.h"
45 #include "pfs_setup_actor.h"
46 #include "pfs_setup_object.h"
47 #include "sql_error.h"
48 #include "sql_acl.h"
49 #include "sp_head.h"
50 #include "pfs_digest.h"
51 
52 using std::min;
53 /**
54   @page PAGE_PERFORMANCE_SCHEMA The Performance Schema main page
55   MySQL PERFORMANCE_SCHEMA implementation.
56 
57   @section INTRO Introduction
58   The PERFORMANCE_SCHEMA is a way to introspect the internal execution of
59   the server at runtime.
60   The performance schema focuses primarily on performance data,
61   as opposed to the INFORMATION_SCHEMA whose purpose is to inspect metadata.
62 
63   From a user point of view, the performance schema consists of:
64   - a dedicated database schema, named PERFORMANCE_SCHEMA,
65   - SQL tables, used to query the server internal state or change
66   configuration settings.
67 
68   From an implementation point of view, the performance schema is a dedicated
69   Storage Engine which exposes data collected by 'Instrumentation Points'
70   placed in the server code.
71 
72   @section INTERFACES Multiple interfaces
73 
74   The performance schema exposes many different interfaces,
75   for different components, and for different purposes.
76 
77   @subsection INT_INSTRUMENTING Instrumenting interface
78 
79   All the data representing the server internal state exposed
80   in the performance schema must be first collected:
81   this is the role of the instrumenting interface.
82   The instrumenting interface is a coding interface provided
83   by implementors (of the performance schema) to implementors
84   (of the server or server components).
85 
86   This interface is available to:
87   - C implementations
88   - C++ implementations
89   - the core SQL layer (/sql)
90   - the mysys library (/mysys)
91   - MySQL plugins, including storage engines,
92   - third party plugins, including third party storage engines.
93 
94   For details, see the @ref PAGE_INSTRUMENTATION_INTERFACE
95   "instrumentation interface page".
96 
97   @subsection INT_COMPILING Compiling interface
98 
99   The implementation of the performance schema can be enabled or disabled at
100   build time, when building MySQL from the source code.
101 
102   When building with the performance schema code, some compilation flags
103   are available to change the default values used in the code, if required.
104 
105   For more details, see:
106   @verbatim ./configure --help @endverbatim
107 
108   To compile with the performance schema:
109   @verbatim ./configure --with-perfschema @endverbatim
110 
111   The implementation of all the compiling options is located in
112   @verbatim ./storage/perfschema/plug.in @endverbatim
113 
114   @subsection INT_STARTUP Server startup interface
115 
116   The server startup interface consists of the "./mysqld ..."
117   command line used to start the server.
118   When the performance schema is compiled in the server binary,
119   extra command line options are available.
120 
121   These extra start options allow the DBA to:
122   - enable or disable the performance schema
123   - specify some sizing parameters.
124 
125   To see help for the performance schema startup options, see:
126   @verbatim ./sql/mysqld --verbose --help  @endverbatim
127 
128   The implementation of all the startup options is located in
129   @verbatim ./sql/mysqld.cc, my_long_options[] @endverbatim
130 
131   @subsection INT_BOOTSTRAP Server bootstrap interface
132 
133   The bootstrap interface is a private interface exposed by
134   the performance schema, and used by the SQL layer.
135   Its role is to advertise all the SQL tables natively
136   supported by the performance schema to the SQL server.
137   The code consists of creating MySQL tables for the
138   performance schema itself, and is used in './mysql --bootstrap'
139   mode when a server is installed.
140 
141   The implementation of the database creation script is located in
142   @verbatim ./scripts/mysql_system_tables.sql @endverbatim
143 
144   @subsection INT_CONFIG Runtime configuration interface
145 
146   When the performance schema is used at runtime, various configuration
147   parameters can be used to specify what kind of data is collected,
148   what kind of aggregations are computed, what kind of timers are used,
149   what events are timed, etc.
150 
151   For all these capabilities, not a single statement or special syntax
152   was introduced in the parser.
153   Instead of new SQL statements, the interface consists of DML
154   (SELECT, INSERT, UPDATE, DELETE) against special "SETUP" tables.
155 
156   For example:
157   @verbatim mysql> update performance_schema.SETUP_INSTRUMENTS
158     set ENABLED='YES', TIMED='YES';
159   Query OK, 234 rows affected (0.00 sec)
160   Rows matched: 234  Changed: 234  Warnings: 0 @endverbatim
161 
162   @subsection INT_STATUS Internal audit interface
163 
164   The internal audit interface is provided to the DBA to inspect if the
165   performance schema code itself is functioning properly.
166   This interface is necessary because a failure caused while
167   instrumenting code in the server should not cause failures in the
168   MySQL server itself, so that the performance schema implementation
169   never raises errors during runtime execution.
170 
171   This auditing interface consists of:
172   @verbatim SHOW ENGINE PERFORMANCE_SCHEMA STATUS; @endverbatim
173   It displays data related to the memory usage of the performance schema,
174   as well as statistics about lost events, if any.
175 
176   The SHOW STATUS command is implemented in
177   @verbatim ./storage/perfschema/pfs_engine_table.cc @endverbatim
178 
179   @subsection INT_QUERY Query interface
180 
181   The query interface is used to query the internal state of a running server.
182   It is provided as SQL tables.
183 
184   For example:
185   @verbatim mysql> select * from performance_schema.EVENTS_WAITS_CURRENT;
186   @endverbatim
187 
188   @section DESIGN_PRINCIPLES Design principles
189 
190   @subsection PRINCIPLE_BEHAVIOR No behavior changes
191 
192   The primary goal of the performance schema is to measure (instrument) the
193   execution of the server. A good measure should not cause any change
194   in behavior.
195 
196   To achieve this, the overall design of the performance schema complies
197   with the following very severe design constraints:
198 
199   The parser is unchanged. There are no new keywords, no new statements.
200   This guarantees that existing applications will run the same way with or
201   without the performance schema.
202 
203   All the instrumentation points return "void", there are no error codes.
204   Even if the performance schema internally fails, execution of the server
205   code will proceed.
206 
207   None of the instrumentation points allocate memory.
208   All the memory used by the performance schema is pre-allocated at startup,
209   and is considered "static" during the server life time.
210 
211   None of the instrumentation points use any pthread_mutex, pthread_rwlock,
212   or pthread_cond (or platform equivalents).
213   Executing the instrumentation point should not cause thread scheduling to
214   change in the server.
215 
216   In other words, the implementation of the instrumentation points,
217   including all the code called by the instrumentation points, is:
218   - malloc free
219   - mutex free
220   - rwlock free
221 
222   TODO: All the code located in storage/perfschema is malloc free,
223   but unfortunately the usage of LF_HASH introduces some memory allocation.
224   This should be revised if possible, to use a lock-free,
225   malloc-free hash code table.
226 
227   @subsection PRINCIPLE_PERFORMANCE No performance hit
228 
229   The instrumentation of the server should be as fast as possible.
230   In cases when there are choices between:
231   - doing some processing when recording the performance data
232   in the instrumentation,
233   - doing some processing when retrieving the performance data,
234 
235   priority is given in the design to make the instrumentation faster,
236   pushing some complexity to data retrieval.
237 
238   As a result, some parts of the design, related to:
239   - the setup code path,
240   - the query code path,
241 
242   might appear to be sub-optimal.
243 
244   The criterion used here is to optimize primarily the critical path (data
245   collection), possibly at the expense of non-critical code paths.
246 
247   @subsection PRINCIPLE_NOT_INTRUSIVE Unintrusive instrumentation
248 
249   For the performance schema in general to be successful, the barrier
250   of entry for a developer should be low, so it's easy to instrument code.
251 
252   In particular, the instrumentation interface:
253   - is available for C and C++ code (so it's a C interface),
254   - does not require parameters that the calling code can't easily provide,
255   - supports partial instrumentation (for example, instrumenting mutexes does
256   not require that every mutex is instrumented)
257 
258   @subsection PRINCIPLE_EXTENDABLE Extendable instrumentation
259 
260   As the content of the performance schema improves,
261   with more tables exposed and more data collected,
262   the instrumentation interface will also be augmented
263   to support instrumenting new concepts.
264   Existing instrumentations should not be affected when additional
265   instrumentation is made available, and making a new instrumentation
266   available should not require existing instrumented code to support it.
267 
268   @subsection PRINCIPLE_VERSIONED Versioned instrumentation
269 
270   Given that the instrumentation offered by the performance schema will
271   be augmented with time, when more features are implemented,
272   the interface itself should be versioned, to keep compatibility
273   with previous instrumented code.
274 
275   For example, after both plugin-A and plugin-B have been instrumented for
276   mutexes, read write locks and conditions, using the instrumentation
277   interface, we can anticipate that the instrumentation interface
278   is expanded to support file based operations.
279 
280   Plugin-A, a file based storage engine, will most likely use the expanded
281   interface and instrument its file usage, using the version 2
282   interface, while Plugin-B, a network based storage engine, will not change
283   its code and not release a new binary.
284 
285   When later the instrumentation interface is expanded to support network
286   based operations (which will define interface version 3), the Plugin-B code
287   can then be changed to make use of it.
288 
289   Note, this is just an example to illustrate the design concept here.
290   Both mutexes and file instrumentation are already available
291   since version 1 of the instrumentation interface.
292 
293   @subsection PRINCIPLE_DEPLOYMENT Easy deployment
294 
295   Internally, we might want every plugin implementation to upgrade the
296   instrumented code to the latest available, but this will cause additional
297   work and this is not practical if the code change is monolithic.
298 
299   Externally, for third party plugin implementors, asking implementors to
300   always stay aligned to the latest instrumentation and make new releases,
301   even when the change does not provide new functionality for them,
302   is a bad idea.
303 
304   For example, requiring a network based engine to re-release because the
305   instrumentation interface changed for file based operations, will create
306   too many deployment issues.
307 
308   So, the performance schema implementation must support concurrently,
309   in the same deployment, multiple versions of the instrumentation
310   interface, and ensure binary compatibility with each version.
311 
312   In addition to this, the performance schema can be included or excluded
313   from the server binary, using build time configuration options.
314 
315   Regardless, the following types of deployment are valid:
316   - a server supporting the performance schema + a storage engine
317   that is not instrumented
318   - a server not supporting the performance schema + a storage engine
319   that is instrumented
320 */
321 
322 /**
323   @page PAGE_INSTRUMENTATION_INTERFACE Performance schema: instrumentation interface page.
324   MySQL performance schema instrumentation interface.
325 
326   @section INTRO Introduction
327 
328   The instrumentation interface consist of two layers:
329   - a raw ABI (Application Binary Interface) layer, that exposes the primitive
330   instrumentation functions exported by the performance schema instrumentation
331   - an API (Application Programing Interface) layer,
332   that provides many helpers for a developer instrumenting some code,
333   to make the instrumentation as easy as possible.
334 
335   The ABI layer consists of:
336 @code
337 #include "mysql/psi/psi.h"
338 @endcode
339 
340   The API layer consists of:
341 @code
342 #include "mysql/psi/mutex_mutex.h"
343 #include "mysql/psi/mutex_file.h"
344 @endcode
345 
346   The first helper is for mutexes, rwlocks and conditions,
347   the second for file io.
348 
349   The API layer exposes C macros and typedefs which will expand:
350   - either to non-instrumented code, when compiled without the performance
351   schema instrumentation
352   - or to instrumented code, that will issue the raw calls to the ABI layer
353   so that the implementation can collect data.
354 
355   Note that all the names introduced (for example, @c mysql_mutex_lock) do not
356   collide with any other namespace.
357   In particular, the macro @c mysql_mutex_lock is on purpose not named
358   @c pthread_mutex_lock.
359   This is to:
360   - avoid overloading @c pthread_mutex_lock with yet another macro,
361   which is dangerous as it can affect user code and pollute
362   the end-user namespace.
363   - allow the developer instrumenting code to selectively instrument
364   some code but not all.
365 
366   @section PRINCIPLES Design principles
367 
368   The ABI part is designed as a facade, that exposes basic primitives.
369   The expectation is that each primitive will be very stable over time,
370   but the list will constantly grow when more instruments are supported.
371   To support binary compatibility with plugins compiled with a different
372   version of the instrumentation, the ABI itself is versioned
373   (see @c PSI_v1, @c PSI_v2).
374 
375   For a given instrumentation point in the API, the basic coding pattern
376   used is:
377   - (a) notify the performance schema of the operation
378   about to be performed.
379   - (b) execute the instrumented code.
380   - (c) notify the performance schema that the operation
381   is completed.
382 
383   An opaque "locker" pointer is returned by (a), that is given to (c).
384   This pointer helps the implementation to keep context, for performances.
385 
386   The following code fragment is annotated to show how in detail this pattern
387   in implemented, when the instrumentation is compiled in:
388 
389 @verbatim
390 static inline int mysql_mutex_lock(
391   mysql_mutex_t *that, myf flags, const char *src_file, uint src_line)
392 {
393   int result;
394   struct PSI_mutex_locker_state state;
395   struct PSI_mutex_locker *locker= NULL;
396 
397   ............... (a)
398   locker= PSI_server->start_mutex_wait(&state, that->p_psi,
399                                        PSI_MUTEX_LOCK, locker, src_file, src_line);
400 
401   ............... (b)
402   result= pthread_mutex_lock(&that->m_mutex);
403 
404   ............... (c)
405   PSI_server->end_mutex_wait(locker, result);
406 
407   return result;
408 }
409 @endverbatim
410 
411   When the performance schema instrumentation is not compiled in,
412   the code becomes simply a wrapper, expanded in line by the compiler:
413 
414 @verbatim
415 static inline int mysql_mutex_lock(...)
416 {
417   int result;
418 
419   ............... (b)
420   result= pthread_mutex_lock(&that->m_mutex);
421 
422   return result;
423 }
424 @endverbatim
425 */
426 
427 /**
428   @page PAGE_AGGREGATES Performance schema: the aggregates page.
429   Performance schema aggregates.
430 
431   @section INTRO Introduction
432 
433   Aggregates tables are tables that can be formally defined as
434   SELECT ... from EVENTS_WAITS_HISTORY_INFINITE ... group by 'group clause'.
435 
436   Each group clause defines a different kind of aggregate, and corresponds to
437   a different table exposed by the performance schema.
438 
439   Aggregates can be either:
440   - computed on the fly,
441   - computed on demand, based on other available data.
442 
443   'EVENTS_WAITS_HISTORY_INFINITE' is a table that does not exist,
444   the best approximation is EVENTS_WAITS_HISTORY_LONG.
445   Aggregates computed on the fly in fact are based on EVENTS_WAITS_CURRENT,
446   while aggregates computed on demand are based on other
447   EVENTS_WAITS_SUMMARY_BY_xxx tables.
448 
449   To better understand the implementation itself, a bit of math is
450   required first, to understand the model behind the code:
451   the code is deceptively simple, the real complexity resides
452   in the flyweight of pointers between various performance schema buffers.
453 
454   @section DIMENSION Concept of dimension
455 
456   An event measured by the instrumentation has many attributes.
457   An event is represented as a data point P(x1, x2, ..., xN),
458   where each x_i coordinate represents a given attribute value.
459 
460   Examples of attributes are:
461   - the time waited
462   - the object waited on
463   - the instrument waited on
464   - the thread that waited
465   - the operation performed
466   - per object or per operation additional attributes, such as spins,
467   number of bytes, etc.
468 
469   Computing an aggregate per thread is fundamentally different from
470   computing an aggregate by instrument, so the "_BY_THREAD" and
471   "_BY_EVENT_NAME" aggregates are different dimensions,
472   operating on different x_i and x_j coordinates.
473   These aggregates are "orthogonal".
474 
475   @section PROJECTION Concept of projection
476 
477   A given x_i attribute value can convey either just one basic information,
478   such as a number of bytes, or can convey implied information,
479   such as an object fully qualified name.
480 
481   For example, from the value "test.t1", the name of the object schema
482   "test" can be separated from the object name "t1", so that now aggregates
483   by object schema can be implemented.
484 
485   In math terms, that corresponds to defining a function:
486   F_i (x): x --> y
487   Applying this function to our point P gives another point P':
488 
489   F_i (P):
490   P(x1, x2, ..., x{i-1}, x_i, x{i+1}, ..., x_N)
491   --> P' (x1, x2, ..., x{i-1}, f_i(x_i), x{i+1}, ..., x_N)
492 
493   That function defines in fact an aggregate !
494   In SQL terms, this aggregate would look like the following table:
495 
496 @verbatim
497   CREATE VIEW EVENTS_WAITS_SUMMARY_BY_Func_i AS
498   SELECT col_1, col_2, ..., col_{i-1},
499          Func_i(col_i),
500          COUNT(col_i),
501          MIN(col_i), AVG(col_i), MAX(col_i), -- if col_i is a numeric value
502          col_{i+1}, ..., col_N
503          FROM EVENTS_WAITS_HISTORY_INFINITE
504          group by col_1, col_2, ..., col_{i-1}, col{i+1}, ..., col_N.
505 @endverbatim
506 
507   Note that not all columns have to be included,
508   in particular some columns that are dependent on the x_i column should
509   be removed, so that in practice, MySQL's aggregation method tends to
510   remove many attributes at each aggregation steps.
511 
512   For example, when aggregating wait events by object instances,
513   - the wait_time and number_of_bytes can be summed,
514   and sum(wait_time) now becomes an object instance attribute.
515   - the source, timer_start, timer_end columns are not in the
516   _BY_INSTANCE table, because these attributes are only
517   meaningful for a wait.
518 
519   @section COMPOSITION Concept of composition
520 
521   Now, the "test.t1" --> "test" example was purely theory,
522   just to explain the concept, and does not lead very far.
523   Let's look at a more interesting example of data that can be derived
524   from the row event.
525 
526   An event creates a transient object, PFS_wait_locker, per operation.
527   This object's life cycle is extremely short: it's created just
528   before the start_wait() instrumentation call, and is destroyed in
529   the end_wait() call.
530 
531   The wait locker itself contains a pointer to the object instance
532   waited on.
533   That allows to implement a wait_locker --> object instance projection,
534   with m_target.
535   The object instance life cycle depends on _init and _destroy calls
536   from the code, such as mysql_mutex_init()
537   and mysql_mutex_destroy() for a mutex.
538 
539   The object instance waited on contains a pointer to the object class,
540   which is represented by the instrument name.
541   That allows to implement an object instance --> object class projection.
542   The object class life cycle is permanent, as instruments are loaded in
543   the server and never removed.
544 
545   The object class is named in such a way
546   (for example, "wait/sync/mutex/sql/LOCK_open",
547   "wait/io/file/maria/data_file) that the component ("sql", "maria")
548   that it belongs to can be inferred.
549   That allows to implement an object class --> server component projection.
550 
551   Back to math again, we have, for example for mutexes:
552 
553   F1 (l) : PFS_wait_locker l --> PFS_mutex m = l->m_target.m_mutex
554 
555   F1_to_2 (m) : PFS_mutex m --> PFS_mutex_class i = m->m_class
556 
557   F2_to_3 (i) : PFS_mutex_class i --> const char *component =
558                                         substring(i->m_name, ...)
559 
560   Per components aggregates are not implemented, this is just an illustration.
561 
562   F1 alone defines this aggregate:
563 
564   EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_INSTANCE
565   (or MUTEX_INSTANCE)
566 
567   F1_to_2 alone could define this aggregate:
568 
569   EVENTS_WAITS_SUMMARY_BY_INSTANCE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
570 
571   Alternatively, using function composition, with
572   F2 = F1_to_2 o F1, F2 defines:
573 
574   EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
575 
576   Likewise, F_2_to_3 defines:
577 
578   EVENTS_WAITS_SUMMARY_BY_EVENT_NAME --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
579 
580   and F3 = F_2_to_3 o F_1_to_2 o F1 defines:
581 
582   EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
583 
584   What has all this to do with the code ?
585 
586   Functions (or aggregates) such as F_3 are not implemented as is.
587   Instead, they are decomposed into F_2_to_3 o F_1_to_2 o F1,
588   and each intermediate aggregate is stored into an internal buffer.
589   This allows to support every F1, F2, F3 aggregates from shared
590   internal buffers, where computation already performed to compute F2
591   is reused when computing F3.
592 
593   @section OBJECT_GRAPH Object graph
594 
595   In terms of object instances, or records, pointers between
596   different buffers define an object instance graph.
597 
598   For example, assuming the following scenario:
599   - A mutex class "M" is instrumented, the instrument name
600   is "wait/sync/mutex/sql/M"
601   - This mutex instrument has been instantiated twice,
602   mutex instances are noted M-1 and M-2
603   - Threads T-A and T-B are locking mutex instance M-1
604   - Threads T-C and T-D are locking mutex instance M-2
605 
606   The performance schema will record the following data:
607   - EVENTS_WAITS_CURRENT has 4 rows, one for each mutex locker
608   - EVENTS_WAITS_SUMMARY_BY_INSTANCE shows 2 rows, for M-1 and M-2
609   - EVENTS_WAITS_SUMMARY_BY_EVENT_NAME shows 1 row, for M
610 
611   The graph of structures will look like:
612 
613 @verbatim
614   PFS_wait_locker (T-A, M-1) ----------
615                                       |
616                                       v
617                                  PFS_mutex (M-1)
618                                  - m_wait_stat    ------------
619                                       ^                      |
620                                       |                      |
621   PFS_wait_locker (T-B, M-1) ----------                      |
622                                                              v
623                                                         PFS_mutex_class (M)
624                                                         - m_wait_stat
625   PFS_wait_locker (T-C, M-2) ----------                      ^
626                                       |                      |
627                                       v                      |
628                                  PFS_mutex (M-2)             |
629                                  - m_wait_stat    ------------
630                                       ^
631                                       |
632   PFS_wait_locker (T-D, M-2) ----------
633 
634             ||                        ||                     ||
635             ||                        ||                     ||
636             vv                        vv                     vv
637 
638   EVENTS_WAITS_CURRENT ..._SUMMARY_BY_INSTANCE ..._SUMMARY_BY_EVENT_NAME
639 @endverbatim
640 
641   @section ON_THE_FLY On the fly aggregates
642 
643   'On the fly' aggregates are computed during the code execution.
644   This is necessary because the data the aggregate is based on is volatile,
645   and can not be kept indefinitely.
646 
647   With on the fly aggregates:
648   - the writer thread does all the computation
649   - the reader thread accesses the result directly
650 
651   This model is to be avoided if possible, due to the overhead
652   caused when instrumenting code.
653 
654   @section HIGHER_LEVEL Higher level aggregates
655 
656   'Higher level' aggregates are implemented on demand only.
657   The code executing a SELECT from the aggregate table is
658   collecting data from multiple internal buffers to produce the result.
659 
660   With higher level aggregates:
661   - the reader thread does all the computation
662   - the writer thread has no overhead.
663 
664   @section MIXED Mixed level aggregates
665 
666   The 'Mixed' model is a compromise between 'On the fly' and 'Higher level'
667   aggregates, for internal buffers that are not permanent.
668 
669   While an object is present in a buffer, the higher level model is used.
670   When an object is about to be destroyed, statistics are saved into
671   a 'parent' buffer with a longer life cycle, to follow the on the fly model.
672 
673   With mixed aggregates:
674   - the reader thread does a lot of complex computation,
675   - the writer thread has minimal overhead, on destroy events.
676 
677   @section IMPL_WAIT Implementation for waits aggregates
678 
679   For waits, the tables that contains aggregated wait data are:
680   - EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
681   - EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME
682   - EVENTS_WAITS_SUMMARY_BY_INSTANCE
683   - EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
684   - EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME
685   - EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME
686   - FILE_SUMMARY_BY_EVENT_NAME
687   - FILE_SUMMARY_BY_INSTANCE
688   - SOCKET_SUMMARY_BY_INSTANCE
689   - SOCKET_SUMMARY_BY_EVENT_NAME
690   - OBJECTS_SUMMARY_GLOBAL_BY_TYPE
691 
692   The instrumented code that generates waits events consist of:
693   - mutexes (mysql_mutex_t)
694   - rwlocks (mysql_rwlock_t)
695   - conditions (mysql_cond_t)
696   - file io (MYSQL_FILE)
697   - socket io (MYSQL_SOCKET)
698   - table io
699   - table lock
700   - idle
701 
702   The flow of data between aggregates tables varies for each instrumentation.
703 
704   @subsection IMPL_WAIT_MUTEX Mutex waits
705 
706 @verbatim
707   mutex_locker(T, M)
708    |
709    | [1]
710    |
711    |-> pfs_mutex(M)                           =====>> [B], [C]
712    |    |
713    |    | [2]
714    |    |
715    |    |-> pfs_mutex_class(M.class)          =====>> [C]
716    |
717    |-> pfs_thread(T).event_name(M)            =====>> [A], [D], [E], [F]
718         |
719         | [3]
720         |
721      3a |-> pfs_account(U, H).event_name(M)   =====>> [D], [E], [F]
722         .    |
723         .    | [4-RESET]
724         .    |
725      3b .....+-> pfs_user(U).event_name(M)    =====>> [E]
726         .    |
727      3c .....+-> pfs_host(H).event_name(M)    =====>> [F]
728 @endverbatim
729 
730   How to read this diagram:
731   - events that occur during the instrumented code execution are noted with numbers,
732   as in [1]. Code executed by these events has an impact on overhead.
733   - events that occur during TRUNCATE TABLE operations are noted with numbers,
734   followed by "-RESET", as in [4-RESET].
735   Code executed by these events has no impact on overhead,
736   since they are executed by independent monitoring sessions.
737   - events that occur when a reader extracts data from a performance schema table
738   are noted with letters, as in [A]. The name of the table involved,
739   and the method that builds a row are documented. Code executed by these events
740   has no impact on the instrumentation overhead. Note that the table
741   implementation may pull data from different buffers.
742   - nominal code paths are in plain lines. A "nominal" code path corresponds to
743   cases where the performance schema buffers are sized so that no records are lost.
744   - degenerated code paths are in dotted lines. A "degenerated" code path corresponds
745   to edge cases where parent buffers are full, which forces the code to aggregate to
746   grand parents directly.
747 
748   Implemented as:
749   - [1] @c start_mutex_wait_v1(), @c end_mutex_wait_v1()
750   - [2] @c destroy_mutex_v1()
751   - [3] @c aggregate_thread_waits()
752   - [4] @c PFS_account::aggregate_waits()
753   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
754         @c table_ews_by_thread_by_event_name::make_row()
755   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
756         @c table_events_waits_summary_by_instance::make_mutex_row()
757   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
758         @c table_ews_global_by_event_name::make_mutex_row()
759   - [D] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
760         @c table_ews_by_account_by_event_name::make_row()
761   - [E] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
762         @c table_ews_by_user_by_event_name::make_row()
763   - [F] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
764         @c table_ews_by_host_by_event_name::make_row()
765 
766   Table EVENTS_WAITS_SUMMARY_BY_INSTANCE is a 'on the fly' aggregate,
767   because the data is collected on the fly by (1) and stored into a buffer,
768   pfs_mutex. The table implementation [B] simply reads the results directly
769   from this buffer.
770 
771   Table EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME is a 'mixed' aggregate,
772   because some data is collected on the fly (1),
773   some data is preserved with (2) at a later time in the life cycle,
774   and two different buffers pfs_mutex and pfs_mutex_class are used to store the
775   statistics collected. The table implementation [C] is more complex, since
776   it reads from two buffers pfs_mutex and pfs_mutex_class.
777 
778   @subsection IMPL_WAIT_RWLOCK Rwlock waits
779 
780 @verbatim
781   rwlock_locker(T, R)
782    |
783    | [1]
784    |
785    |-> pfs_rwlock(R)                          =====>> [B], [C]
786    |    |
787    |    | [2]
788    |    |
789    |    |-> pfs_rwlock_class(R.class)         =====>> [C]
790    |
791    |-> pfs_thread(T).event_name(R)            =====>> [A]
792         |
793        ...
794 @endverbatim
795 
796   Implemented as:
797   - [1] @c start_rwlock_rdwait_v1(), @c end_rwlock_rdwait_v1(), ...
798   - [2] @c destroy_rwlock_v1()
799   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
800         @c table_ews_by_thread_by_event_name::make_row()
801   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
802         @c table_events_waits_summary_by_instance::make_rwlock_row()
803   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
804         @c table_ews_global_by_event_name::make_rwlock_row()
805 
806   @subsection IMPL_WAIT_COND Cond waits
807 
808 @verbatim
809   cond_locker(T, C)
810    |
811    | [1]
812    |
813    |-> pfs_cond(C)                            =====>> [B], [C]
814    |    |
815    |    | [2]
816    |    |
817    |    |-> pfs_cond_class(C.class)           =====>> [C]
818    |
819    |-> pfs_thread(T).event_name(C)            =====>> [A]
820         |
821        ...
822 @endverbatim
823 
824   Implemented as:
825   - [1] @c start_cond_wait_v1(), @c end_cond_wait_v1()
826   - [2] @c destroy_cond_v1()
827   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
828         @c table_ews_by_thread_by_event_name::make_row()
829   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
830         @c table_events_waits_summary_by_instance::make_cond_row()
831   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
832         @c table_ews_global_by_event_name::make_cond_row()
833 
834   @subsection IMPL_WAIT_FILE File waits
835 
836 @verbatim
837   file_locker(T, F)
838    |
839    | [1]
840    |
841    |-> pfs_file(F)                            =====>> [B], [C], [D], [E]
842    |    |
843    |    | [2]
844    |    |
845    |    |-> pfs_file_class(F.class)           =====>> [C], [D]
846    |
847    |-> pfs_thread(T).event_name(F)            =====>> [A]
848         |
849        ...
850 @endverbatim
851 
852   Implemented as:
853   - [1] @c get_thread_file_name_locker_v1(), @c start_file_wait_v1(),
854         @c end_file_wait_v1(), ...
855   - [2] @c close_file_v1()
856   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
857         @c table_ews_by_thread_by_event_name::make_row()
858   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
859         @c table_events_waits_summary_by_instance::make_file_row()
860   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
861         @c table_ews_global_by_event_name::make_file_row()
862   - [D] FILE_SUMMARY_BY_EVENT_NAME,
863         @c table_file_summary_by_event_name::make_row()
864   - [E] FILE_SUMMARY_BY_INSTANCE,
865         @c table_file_summary_by_instance::make_row()
866 
867   @subsection IMPL_WAIT_SOCKET Socket waits
868 
869 @verbatim
870   socket_locker(T, S)
871    |
872    | [1]
873    |
874    |-> pfs_socket(S)                            =====>> [A], [B], [C], [D], [E]
875         |
876         | [2]
877         |
878         |-> pfs_socket_class(S.class)           =====>> [C], [D]
879         |
880         |-> pfs_thread(T).event_name(S)         =====>> [A]
881         |
882         | [3]
883         |
884      3a |-> pfs_account(U, H).event_name(S)     =====>> [F], [G], [H]
885         .    |
886         .    | [4-RESET]
887         .    |
888      3b .....+-> pfs_user(U).event_name(S)      =====>> [G]
889         .    |
890      3c .....+-> pfs_host(H).event_name(S)      =====>> [H]
891 @endverbatim
892 
893   Implemented as:
894   - [1] @c start_socket_wait_v1(), @c end_socket_wait_v1().
895   - [2] @c close_socket_v1()
896   - [3] @c aggregate_thread_waits()
897   - [4] @c PFS_account::aggregate_waits()
898   - [5] @c PFS_host::aggregate_waits()
899   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
900         @c table_ews_by_thread_by_event_name::make_row()
901   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
902         @c table_events_waits_summary_by_instance::make_socket_row()
903   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
904         @c table_ews_global_by_event_name::make_socket_row()
905   - [D] SOCKET_SUMMARY_BY_EVENT_NAME,
906         @c table_socket_summary_by_event_name::make_row()
907   - [E] SOCKET_SUMMARY_BY_INSTANCE,
908         @c table_socket_summary_by_instance::make_row()
909   - [F] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
910         @c table_ews_by_account_by_event_name::make_row()
911   - [G] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
912         @c table_ews_by_user_by_event_name::make_row()
913   - [H] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
914         @c table_ews_by_host_by_event_name::make_row()
915 
916   @subsection IMPL_WAIT_TABLE Table waits
917 
918 @verbatim
919   table_locker(Thread Th, Table Tb, Event = io or lock)
920    |
921    | [1]
922    |
923 1a |-> pfs_table(Tb)                          =====>> [A], [B], [C]
924    |    |
925    |    | [2]
926    |    |
927    |    |-> pfs_table_share(Tb.share)         =====>> [B], [C]
928    |         |
929    |         | [3]
930    |         |
931    |         |-> global_table_io_stat         =====>> [C]
932    |         |
933    |         |-> global_table_lock_stat       =====>> [C]
934    |
935 1b |-> pfs_thread(Th).event_name(E)           =====>> [D], [E], [F], [G]
936    |    |
937    |    | [ 4-RESET]
938    |    |
939    |    |-> pfs_account(U, H).event_name(E)   =====>> [E], [F], [G]
940    |    .    |
941    |    .    | [5-RESET]
942    |    .    |
943    |    .....+-> pfs_user(U).event_name(E)    =====>> [F]
944    |    .    |
945    |    .....+-> pfs_host(H).event_name(E)    =====>> [G]
946    |
947 1c |-> pfs_thread(Th).waits_current(W)        =====>> [H]
948    |
949 1d |-> pfs_thread(Th).waits_history(W)        =====>> [I]
950    |
951 1e |-> waits_history_long(W)                  =====>> [J]
952 @endverbatim
953 
954   Implemented as:
955   - [1] @c start_table_io_wait_v1(), @c end_table_io_wait_v1()
956   - [2] @c close_table_v1()
957   - [3] @c drop_table_share_v1()
958   - [4] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
959   - [5] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
960   - [A] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
961         @c table_events_waits_summary_by_instance::make_table_row()
962   - [B] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
963         @c table_os_global_by_type::make_row()
964   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
965         @c table_ews_global_by_event_name::make_table_io_row(),
966         @c table_ews_global_by_event_name::make_table_lock_row()
967   - [D] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
968         @c table_ews_by_thread_by_event_name::make_row()
969   - [E] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
970         @c table_ews_by_user_by_account_name::make_row()
971   - [F] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
972         @c table_ews_by_user_by_event_name::make_row()
973   - [G] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
974         @c table_ews_by_host_by_event_name::make_row()
975   - [H] EVENTS_WAITS_CURRENT,
976         @c table_events_waits_common::make_row()
977   - [I] EVENTS_WAITS_HISTORY,
978         @c table_events_waits_common::make_row()
979   - [J] EVENTS_WAITS_HISTORY_LONG,
980         @c table_events_waits_common::make_row()
981 
982   @section IMPL_STAGE Implementation for stages aggregates
983 
984   For stages, the tables that contains aggregated data are:
985   - EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
986   - EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME
987   - EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME
988   - EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME
989   - EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME
990 
991 @verbatim
992   start_stage(T, S)
993    |
994    | [1]
995    |
996 1a |-> pfs_thread(T).event_name(S)            =====>> [A], [B], [C], [D], [E]
997    |    |
998    |    | [2]
999    |    |
1000    | 2a |-> pfs_account(U, H).event_name(S)   =====>> [B], [C], [D], [E]
1001    |    .    |
1002    |    .    | [3-RESET]
1003    |    .    |
1004    | 2b .....+-> pfs_user(U).event_name(S)    =====>> [C]
1005    |    .    |
1006    | 2c .....+-> pfs_host(H).event_name(S)    =====>> [D], [E]
1007    |    .    .    |
1008    |    .    .    | [4-RESET]
1009    | 2d .    .    |
1010 1b |----+----+----+-> pfs_stage_class(S)      =====>> [E]
1011 
1012 @endverbatim
1013 
1014   Implemented as:
1015   - [1] @c start_stage_v1()
1016   - [2] @c delete_thread_v1(), @c aggregate_thread_stages()
1017   - [3] @c PFS_account::aggregate_stages()
1018   - [4] @c PFS_host::aggregate_stages()
1019   - [A] EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1020         @c table_esgs_by_thread_by_event_name::make_row()
1021   - [B] EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1022         @c table_esgs_by_account_by_event_name::make_row()
1023   - [C] EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME,
1024         @c table_esgs_by_user_by_event_name::make_row()
1025   - [D] EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME,
1026         @c table_esgs_by_host_by_event_name::make_row()
1027   - [E] EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME,
1028         @c table_esgs_global_by_event_name::make_row()
1029 
1030 @section IMPL_STATEMENT Implementation for statements consumers
1031 
1032   For statements, the tables that contains individual event data are:
1033   - EVENTS_STATEMENTS_CURRENT
1034   - EVENTS_STATEMENTS_HISTORY
1035   - EVENTS_STATEMENTS_HISTORY_LONG
1036 
1037   For statements, the tables that contains aggregated data are:
1038   - EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
1039   - EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME
1040   - EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME
1041   - EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME
1042   - EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME
1043   - EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1044 
1045 @verbatim
1046   statement_locker(T, S)
1047    |
1048    | [1]
1049    |
1050 1a |-> pfs_thread(T).event_name(S)            =====>> [A], [B], [C], [D], [E]
1051    |    |
1052    |    | [2]
1053    |    |
1054    | 2a |-> pfs_account(U, H).event_name(S)   =====>> [B], [C], [D], [E]
1055    |    .    |
1056    |    .    | [3-RESET]
1057    |    .    |
1058    | 2b .....+-> pfs_user(U).event_name(S)    =====>> [C]
1059    |    .    |
1060    | 2c .....+-> pfs_host(H).event_name(S)    =====>> [D], [E]
1061    |    .    .    |
1062    |    .    .    | [4-RESET]
1063    | 2d .    .    |
1064 1b |----+----+----+-> pfs_statement_class(S)  =====>> [E]
1065    |
1066 1c |-> pfs_thread(T).statement_current(S)     =====>> [F]
1067    |
1068 1d |-> pfs_thread(T).statement_history(S)     =====>> [G]
1069    |
1070 1e |-> statement_history_long(S)              =====>> [H]
1071    |
1072 1f |-> statement_digest(S)                    =====>> [I]
1073 
1074 @endverbatim
1075 
1076   Implemented as:
1077   - [1] @c start_statement_v1(), end_statement_v1()
1078        (1a, 1b) is an aggregation by EVENT_NAME,
1079         (1c, 1d, 1e) is an aggregation by TIME,
1080         (1f) is an aggregation by DIGEST
1081         all of these are orthogonal,
1082         and implemented in end_statement_v1().
1083   - [2] @c delete_thread_v1(), @c aggregate_thread_statements()
1084   - [3] @c PFS_account::aggregate_statements()
1085   - [4] @c PFS_host::aggregate_statements()
1086   - [A] EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1087         @c table_esms_by_thread_by_event_name::make_row()
1088   - [B] EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1089         @c table_esms_by_account_by_event_name::make_row()
1090   - [C] EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME,
1091         @c table_esms_by_user_by_event_name::make_row()
1092   - [D] EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME,
1093         @c table_esms_by_host_by_event_name::make_row()
1094   - [E] EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME,
1095         @c table_esms_global_by_event_name::make_row()
1096   - [F] EVENTS_STATEMENTS_CURRENT,
1097         @c table_events_statements_current::rnd_next(),
1098         @c table_events_statements_common::make_row()
1099   - [G] EVENTS_STATEMENTS_HISTORY,
1100         @c table_events_statements_history::rnd_next(),
1101         @c table_events_statements_common::make_row()
1102   - [H] EVENTS_STATEMENTS_HISTORY_LONG,
1103         @c table_events_statements_history_long::rnd_next(),
1104         @c table_events_statements_common::make_row()
1105   - [I] EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1106         @c table_esms_by_digest::make_row()
1107 */
1108 
1109 /**
1110   @defgroup Performance_schema Performance Schema
1111   The performance schema component.
1112   For details, see the
1113   @ref PAGE_PERFORMANCE_SCHEMA "performance schema main page".
1114 
1115   @defgroup Performance_schema_implementation Performance Schema Implementation
1116   @ingroup Performance_schema
1117 
1118   @defgroup Performance_schema_tables Performance Schema Tables
1119   @ingroup Performance_schema_implementation
1120 */
1121 
1122 pthread_key(PFS_thread*, THR_PFS);
1123 bool THR_PFS_initialized= false;
1124 
1125 /**
1126   Conversion map from PSI_mutex_operation to enum_operation_type.
1127   Indexed by enum PSI_mutex_operation.
1128 */
1129 static enum_operation_type mutex_operation_map[]=
1130 {
1131   OPERATION_TYPE_LOCK,
1132   OPERATION_TYPE_TRYLOCK
1133 };
1134 
1135 /**
1136   Conversion map from PSI_rwlock_operation to enum_operation_type.
1137   Indexed by enum PSI_rwlock_operation.
1138 */
1139 static enum_operation_type rwlock_operation_map[]=
1140 {
1141   OPERATION_TYPE_READLOCK,
1142   OPERATION_TYPE_WRITELOCK,
1143   OPERATION_TYPE_TRYREADLOCK,
1144   OPERATION_TYPE_TRYWRITELOCK
1145 };
1146 
1147 /**
1148   Conversion map from PSI_cond_operation to enum_operation_type.
1149   Indexed by enum PSI_cond_operation.
1150 */
1151 static enum_operation_type cond_operation_map[]=
1152 {
1153   OPERATION_TYPE_WAIT,
1154   OPERATION_TYPE_TIMEDWAIT
1155 };
1156 
1157 /**
1158   Conversion map from PSI_file_operation to enum_operation_type.
1159   Indexed by enum PSI_file_operation.
1160 */
1161 static enum_operation_type file_operation_map[]=
1162 {
1163   OPERATION_TYPE_FILECREATE,
1164   OPERATION_TYPE_FILECREATETMP,
1165   OPERATION_TYPE_FILEOPEN,
1166   OPERATION_TYPE_FILESTREAMOPEN,
1167   OPERATION_TYPE_FILECLOSE,
1168   OPERATION_TYPE_FILESTREAMCLOSE,
1169   OPERATION_TYPE_FILEREAD,
1170   OPERATION_TYPE_FILEWRITE,
1171   OPERATION_TYPE_FILESEEK,
1172   OPERATION_TYPE_FILETELL,
1173   OPERATION_TYPE_FILEFLUSH,
1174   OPERATION_TYPE_FILESTAT,
1175   OPERATION_TYPE_FILEFSTAT,
1176   OPERATION_TYPE_FILECHSIZE,
1177   OPERATION_TYPE_FILEDELETE,
1178   OPERATION_TYPE_FILERENAME,
1179   OPERATION_TYPE_FILESYNC
1180 };
1181 
1182 /**
1183   Conversion map from PSI_table_operation to enum_operation_type.
1184   Indexed by enum PSI_table_io_operation.
1185 */
1186 static enum_operation_type table_io_operation_map[]=
1187 {
1188   OPERATION_TYPE_TABLE_FETCH,
1189   OPERATION_TYPE_TABLE_WRITE_ROW,
1190   OPERATION_TYPE_TABLE_UPDATE_ROW,
1191   OPERATION_TYPE_TABLE_DELETE_ROW
1192 };
1193 
1194 /**
1195   Conversion map from enum PFS_TL_LOCK_TYPE to enum_operation_type.
1196   Indexed by enum PFS_TL_LOCK_TYPE.
1197 */
1198 static enum_operation_type table_lock_operation_map[]=
1199 {
1200   OPERATION_TYPE_TL_READ_NORMAL, /* PFS_TL_READ */
1201   OPERATION_TYPE_TL_READ_WITH_SHARED_LOCKS, /* PFS_TL_READ_WITH_SHARED_LOCKS */
1202   OPERATION_TYPE_TL_READ_HIGH_PRIORITY, /* PFS_TL_READ_HIGH_PRIORITY */
1203   OPERATION_TYPE_TL_READ_NO_INSERTS, /* PFS_TL_READ_NO_INSERT */
1204   OPERATION_TYPE_TL_WRITE_ALLOW_WRITE, /* PFS_TL_WRITE_ALLOW_WRITE */
1205   OPERATION_TYPE_TL_WRITE_CONCURRENT_INSERT, /* PFS_TL_WRITE_CONCURRENT_INSERT */
1206   OPERATION_TYPE_TL_WRITE_DELAYED, /* PFS_TL_WRITE_DELAYED */
1207   OPERATION_TYPE_TL_WRITE_LOW_PRIORITY, /* PFS_TL_WRITE_LOW_PRIORITY */
1208   OPERATION_TYPE_TL_WRITE_NORMAL, /* PFS_TL_WRITE */
1209   OPERATION_TYPE_TL_READ_EXTERNAL, /* PFS_TL_READ_EXTERNAL */
1210   OPERATION_TYPE_TL_WRITE_EXTERNAL /* PFS_TL_WRITE_EXTERNAL */
1211 };
1212 
1213 /**
1214   Conversion map from PSI_socket_operation to enum_operation_type.
1215   Indexed by enum PSI_socket_operation.
1216 */
1217 static enum_operation_type socket_operation_map[]=
1218 {
1219   OPERATION_TYPE_SOCKETCREATE,
1220   OPERATION_TYPE_SOCKETCONNECT,
1221   OPERATION_TYPE_SOCKETBIND,
1222   OPERATION_TYPE_SOCKETCLOSE,
1223   OPERATION_TYPE_SOCKETSEND,
1224   OPERATION_TYPE_SOCKETRECV,
1225   OPERATION_TYPE_SOCKETSENDTO,
1226   OPERATION_TYPE_SOCKETRECVFROM,
1227   OPERATION_TYPE_SOCKETSENDMSG,
1228   OPERATION_TYPE_SOCKETRECVMSG,
1229   OPERATION_TYPE_SOCKETSEEK,
1230   OPERATION_TYPE_SOCKETOPT,
1231   OPERATION_TYPE_SOCKETSTAT,
1232   OPERATION_TYPE_SOCKETSHUTDOWN,
1233   OPERATION_TYPE_SOCKETSELECT
1234 };
1235 
1236 /**
1237   Build the prefix name of a class of instruments in a category.
1238   For example, this function builds the string 'wait/sync/mutex/sql/' from
1239   a prefix 'wait/sync/mutex' and a category 'sql'.
1240   This prefix is used later to build each instrument name, such as
1241   'wait/sync/mutex/sql/LOCK_open'.
1242   @param prefix               Prefix for this class of instruments
1243   @param category             Category name
1244   @param [out] output         Buffer of length PFS_MAX_INFO_NAME_LENGTH.
1245   @param [out] output_length  Length of the resulting output string.
1246   @return 0 for success, non zero for errors
1247 */
build_prefix(const LEX_STRING * prefix,const char * category,char * output,int * output_length)1248 static int build_prefix(const LEX_STRING *prefix, const char *category,
1249                         char *output, int *output_length)
1250 {
1251   int len= strlen(category);
1252   char *out_ptr= output;
1253   int prefix_length= prefix->length;
1254 
1255   if (unlikely((prefix_length + len + 1) >=
1256                PFS_MAX_FULL_PREFIX_NAME_LENGTH))
1257   {
1258     pfs_print_error("build_prefix: prefix+category is too long <%s> <%s>\n",
1259                     prefix->str, category);
1260     return 1;
1261   }
1262 
1263   if (unlikely(strchr(category, '/') != NULL))
1264   {
1265     pfs_print_error("build_prefix: invalid category <%s>\n",
1266                     category);
1267     return 1;
1268   }
1269 
1270   /* output = prefix + category + '/' */
1271   memcpy(out_ptr, prefix->str, prefix_length);
1272   out_ptr+= prefix_length;
1273   memcpy(out_ptr, category, len);
1274   out_ptr+= len;
1275   *out_ptr= '/';
1276   out_ptr++;
1277   *output_length= out_ptr - output;
1278 
1279   return 0;
1280 }
1281 
1282 #define REGISTER_BODY_V1(KEY_T, PREFIX, REGISTER_FUNC)                \
1283   KEY_T key;                                                          \
1284   char formatted_name[PFS_MAX_INFO_NAME_LENGTH];                      \
1285   int prefix_length;                                                  \
1286   int len;                                                            \
1287   int full_length;                                                    \
1288                                                                       \
1289   DBUG_ASSERT(category != NULL);                                      \
1290   DBUG_ASSERT(info != NULL);                                          \
1291   if (unlikely(build_prefix(&PREFIX, category,                        \
1292                    formatted_name, &prefix_length)))                  \
1293   {                                                                   \
1294     for (; count>0; count--, info++)                                  \
1295       *(info->m_key)= 0;                                              \
1296     return ;                                                          \
1297   }                                                                   \
1298                                                                       \
1299   for (; count>0; count--, info++)                                    \
1300   {                                                                   \
1301     DBUG_ASSERT(info->m_key != NULL);                                 \
1302     DBUG_ASSERT(info->m_name != NULL);                                \
1303     len= strlen(info->m_name);                                        \
1304     full_length= prefix_length + len;                                 \
1305     if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))              \
1306     {                                                                 \
1307       memcpy(formatted_name + prefix_length, info->m_name, len);      \
1308       key= REGISTER_FUNC(formatted_name, full_length, info->m_flags); \
1309     }                                                                 \
1310     else                                                              \
1311     {                                                                 \
1312       pfs_print_error("REGISTER_BODY_V1: name too long <%s> <%s>\n",  \
1313                       category, info->m_name);                        \
1314       key= 0;                                                         \
1315     }                                                                 \
1316                                                                       \
1317     *(info->m_key)= key;                                              \
1318   }                                                                   \
1319   return;
1320 
1321 /* Use C linkage for the interface functions. */
1322 
1323 C_MODE_START
1324 
1325 /**
1326   Implementation of the mutex instrumentation interface.
1327   @sa PSI_v1::register_mutex.
1328 */
register_mutex_v1(const char * category,PSI_mutex_info_v1 * info,int count)1329 static void register_mutex_v1(const char *category,
1330                               PSI_mutex_info_v1 *info,
1331                               int count)
1332 {
1333   REGISTER_BODY_V1(PSI_mutex_key,
1334                    mutex_instrument_prefix,
1335                    register_mutex_class)
1336 }
1337 
1338 /**
1339   Implementation of the rwlock instrumentation interface.
1340   @sa PSI_v1::register_rwlock.
1341 */
register_rwlock_v1(const char * category,PSI_rwlock_info_v1 * info,int count)1342 static void register_rwlock_v1(const char *category,
1343                                PSI_rwlock_info_v1 *info,
1344                                int count)
1345 {
1346   REGISTER_BODY_V1(PSI_rwlock_key,
1347                    rwlock_instrument_prefix,
1348                    register_rwlock_class)
1349 }
1350 
1351 /**
1352   Implementation of the cond instrumentation interface.
1353   @sa PSI_v1::register_cond.
1354 */
register_cond_v1(const char * category,PSI_cond_info_v1 * info,int count)1355 static void register_cond_v1(const char *category,
1356                              PSI_cond_info_v1 *info,
1357                              int count)
1358 {
1359   REGISTER_BODY_V1(PSI_cond_key,
1360                    cond_instrument_prefix,
1361                    register_cond_class)
1362 }
1363 
1364 /**
1365   Implementation of the thread instrumentation interface.
1366   @sa PSI_v1::register_thread.
1367 */
register_thread_v1(const char * category,PSI_thread_info_v1 * info,int count)1368 static void register_thread_v1(const char *category,
1369                                PSI_thread_info_v1 *info,
1370                                int count)
1371 {
1372   REGISTER_BODY_V1(PSI_thread_key,
1373                    thread_instrument_prefix,
1374                    register_thread_class)
1375 }
1376 
1377 /**
1378   Implementation of the file instrumentation interface.
1379   @sa PSI_v1::register_file.
1380 */
register_file_v1(const char * category,PSI_file_info_v1 * info,int count)1381 static void register_file_v1(const char *category,
1382                              PSI_file_info_v1 *info,
1383                              int count)
1384 {
1385   REGISTER_BODY_V1(PSI_file_key,
1386                    file_instrument_prefix,
1387                    register_file_class)
1388 }
1389 
register_stage_v1(const char * category,PSI_stage_info_v1 ** info_array,int count)1390 static void register_stage_v1(const char *category,
1391                               PSI_stage_info_v1 **info_array,
1392                               int count)
1393 {
1394   char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1395   int prefix_length;
1396   int len;
1397   int full_length;
1398   PSI_stage_info_v1 *info;
1399 
1400   DBUG_ASSERT(category != NULL);
1401   DBUG_ASSERT(info_array != NULL);
1402   if (unlikely(build_prefix(&stage_instrument_prefix, category,
1403                formatted_name, &prefix_length)))
1404   {
1405     for (; count>0; count--, info_array++)
1406       (*info_array)->m_key= 0;
1407     return ;
1408   }
1409 
1410   for (; count>0; count--, info_array++)
1411   {
1412     info= *info_array;
1413     DBUG_ASSERT(info != NULL);
1414     DBUG_ASSERT(info->m_name != NULL);
1415     len= strlen(info->m_name);
1416     full_length= prefix_length + len;
1417     if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1418     {
1419       memcpy(formatted_name + prefix_length, info->m_name, len);
1420       info->m_key= register_stage_class(formatted_name,
1421                                         prefix_length,
1422                                         full_length,
1423                                         info->m_flags);
1424     }
1425     else
1426     {
1427       pfs_print_error("register_stage_v1: name too long <%s> <%s>\n",
1428                       category, info->m_name);
1429       info->m_key= 0;
1430     }
1431   }
1432   return;
1433 }
1434 
register_statement_v1(const char * category,PSI_statement_info_v1 * info,int count)1435 static void register_statement_v1(const char *category,
1436                                   PSI_statement_info_v1 *info,
1437                                   int count)
1438 {
1439   char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1440   int prefix_length;
1441   int len;
1442   int full_length;
1443 
1444   DBUG_ASSERT(category != NULL);
1445   DBUG_ASSERT(info != NULL);
1446   if (unlikely(build_prefix(&statement_instrument_prefix,
1447                             category, formatted_name, &prefix_length)))
1448   {
1449     for (; count>0; count--, info++)
1450       info->m_key= 0;
1451     return ;
1452   }
1453 
1454   for (; count>0; count--, info++)
1455   {
1456     DBUG_ASSERT(info->m_name != NULL);
1457     len= strlen(info->m_name);
1458     full_length= prefix_length + len;
1459     if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1460     {
1461       memcpy(formatted_name + prefix_length, info->m_name, len);
1462       info->m_key= register_statement_class(formatted_name, full_length, info->m_flags);
1463     }
1464     else
1465     {
1466       pfs_print_error("register_statement_v1: name too long <%s>\n",
1467                       info->m_name);
1468       info->m_key= 0;
1469     }
1470   }
1471   return;
1472 }
1473 
register_socket_v1(const char * category,PSI_socket_info_v1 * info,int count)1474 static void register_socket_v1(const char *category,
1475                              PSI_socket_info_v1 *info,
1476                              int count)
1477 {
1478   REGISTER_BODY_V1(PSI_socket_key,
1479                    socket_instrument_prefix,
1480                    register_socket_class)
1481 }
1482 
1483 #define INIT_BODY_V1(T, KEY, ID)                                            \
1484   PFS_##T##_class *klass;                                                   \
1485   PFS_##T *pfs;                                                             \
1486   klass= find_##T##_class(KEY);                                             \
1487   if (unlikely(klass == NULL))                                              \
1488     return NULL;                                                            \
1489   if (! klass->m_enabled)                                                   \
1490     return NULL;                                                            \
1491   pfs= create_##T(klass, ID);                                               \
1492   return reinterpret_cast<PSI_##T *> (pfs)
1493 
1494 /**
1495   Implementation of the mutex instrumentation interface.
1496   @sa PSI_v1::init_mutex.
1497 */
1498 static PSI_mutex*
init_mutex_v1(PSI_mutex_key key,const void * identity)1499 init_mutex_v1(PSI_mutex_key key, const void *identity)
1500 {
1501   INIT_BODY_V1(mutex, key, identity);
1502 }
1503 
1504 /**
1505   Implementation of the mutex instrumentation interface.
1506   @sa PSI_v1::destroy_mutex.
1507 */
destroy_mutex_v1(PSI_mutex * mutex)1508 static void destroy_mutex_v1(PSI_mutex* mutex)
1509 {
1510   PFS_mutex *pfs= reinterpret_cast<PFS_mutex*> (mutex);
1511 
1512   DBUG_ASSERT(pfs != NULL);
1513 
1514   destroy_mutex(pfs);
1515 }
1516 
1517 /**
1518   Implementation of the rwlock instrumentation interface.
1519   @sa PSI_v1::init_rwlock.
1520 */
1521 static PSI_rwlock*
init_rwlock_v1(PSI_rwlock_key key,const void * identity)1522 init_rwlock_v1(PSI_rwlock_key key, const void *identity)
1523 {
1524   INIT_BODY_V1(rwlock, key, identity);
1525 }
1526 
1527 /**
1528   Implementation of the rwlock instrumentation interface.
1529   @sa PSI_v1::destroy_rwlock.
1530 */
destroy_rwlock_v1(PSI_rwlock * rwlock)1531 static void destroy_rwlock_v1(PSI_rwlock* rwlock)
1532 {
1533   PFS_rwlock *pfs= reinterpret_cast<PFS_rwlock*> (rwlock);
1534 
1535   DBUG_ASSERT(pfs != NULL);
1536 
1537   destroy_rwlock(pfs);
1538 }
1539 
1540 /**
1541   Implementation of the cond instrumentation interface.
1542   @sa PSI_v1::init_cond.
1543 */
1544 static PSI_cond*
init_cond_v1(PSI_cond_key key,const void * identity)1545 init_cond_v1(PSI_cond_key key, const void *identity)
1546 {
1547   INIT_BODY_V1(cond, key, identity);
1548 }
1549 
1550 /**
1551   Implementation of the cond instrumentation interface.
1552   @sa PSI_v1::destroy_cond.
1553 */
destroy_cond_v1(PSI_cond * cond)1554 static void destroy_cond_v1(PSI_cond* cond)
1555 {
1556   PFS_cond *pfs= reinterpret_cast<PFS_cond*> (cond);
1557 
1558   DBUG_ASSERT(pfs != NULL);
1559 
1560   destroy_cond(pfs);
1561 }
1562 
1563 /**
1564   Implementation of the table instrumentation interface.
1565   @sa PSI_v1::get_table_share.
1566 */
1567 static PSI_table_share*
get_table_share_v1(my_bool temporary,TABLE_SHARE * share)1568 get_table_share_v1(my_bool temporary, TABLE_SHARE *share)
1569 {
1570   /* Ignore temporary tables and views. */
1571   if (temporary || share->is_view)
1572     return NULL;
1573   /* An instrumented thread is required, for LF_PINS. */
1574   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1575   if (unlikely(pfs_thread == NULL))
1576     return NULL;
1577   PFS_table_share* pfs_share;
1578   pfs_share= find_or_create_table_share(pfs_thread, temporary, share);
1579   return reinterpret_cast<PSI_table_share*> (pfs_share);
1580 }
1581 
1582 /**
1583   Implementation of the table instrumentation interface.
1584   @sa PSI_v1::release_table_share.
1585 */
release_table_share_v1(PSI_table_share * share)1586 static void release_table_share_v1(PSI_table_share* share)
1587 {
1588   PFS_table_share* pfs= reinterpret_cast<PFS_table_share*> (share);
1589 
1590   if (unlikely(pfs == NULL))
1591     return;
1592 
1593   release_table_share(pfs);
1594 }
1595 
1596 /**
1597   Implementation of the table instrumentation interface.
1598   @sa PSI_v1::drop_table_share.
1599 */
1600 static void
drop_table_share_v1(my_bool temporary,const char * schema_name,int schema_name_length,const char * table_name,int table_name_length)1601 drop_table_share_v1(my_bool temporary,
1602                     const char *schema_name, int schema_name_length,
1603                     const char *table_name, int table_name_length)
1604 {
1605   /* Ignore temporary tables. */
1606   if (temporary)
1607     return;
1608   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1609   if (unlikely(pfs_thread == NULL))
1610     return;
1611   /* TODO: temporary tables */
1612   drop_table_share(pfs_thread, temporary, schema_name, schema_name_length,
1613                    table_name, table_name_length);
1614 }
1615 
1616 /**
1617   Implementation of the table instrumentation interface.
1618   @sa PSI_v1::open_table.
1619 */
1620 static PSI_table*
open_table_v1(PSI_table_share * share,const void * identity)1621 open_table_v1(PSI_table_share *share, const void *identity)
1622 {
1623   PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1624 
1625   if (unlikely(pfs_table_share == NULL))
1626     return NULL;
1627 
1628   /* This object is not to be instrumented. */
1629   if (! pfs_table_share->m_enabled)
1630     return NULL;
1631 
1632   /* This object is instrumented, but all table instruments are disabled. */
1633   if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1634     return NULL;
1635 
1636   /*
1637     When the performance schema is off, do not instrument anything.
1638     Table handles have short life cycle, instrumentation will happen
1639     again if needed during the next open().
1640   */
1641   if (! flag_global_instrumentation)
1642     return NULL;
1643 
1644   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1645   if (unlikely(thread == NULL))
1646     return NULL;
1647 
1648   PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1649   return reinterpret_cast<PSI_table *> (pfs_table);
1650 }
1651 
1652 /**
1653   Implementation of the table instrumentation interface.
1654   @sa PSI_v1::unbind_table.
1655 */
unbind_table_v1(PSI_table * table)1656 static void unbind_table_v1(PSI_table *table)
1657 {
1658   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1659   if (likely(pfs != NULL))
1660   {
1661     pfs->m_thread_owner= NULL;
1662   }
1663 }
1664 
1665 /**
1666   Implementation of the table instrumentation interface.
1667   @sa PSI_v1::rebind_table.
1668 */
1669 static PSI_table *
rebind_table_v1(PSI_table_share * share,const void * identity,PSI_table * table)1670 rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
1671 {
1672   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1673   if (likely(pfs != NULL))
1674   {
1675     PFS_thread *thread;
1676     DBUG_ASSERT(pfs->m_thread_owner == NULL);
1677 
1678     /* The table handle was already instrumented, reuse it for this thread. */
1679     thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1680 
1681     if (unlikely(! pfs->m_share->m_enabled))
1682     {
1683       destroy_table(pfs);
1684       return NULL;
1685     }
1686 
1687     if (unlikely(! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled))
1688     {
1689       destroy_table(pfs);
1690       return NULL;
1691     }
1692 
1693     if (unlikely(! flag_global_instrumentation))
1694     {
1695       destroy_table(pfs);
1696       return NULL;
1697     }
1698 
1699     pfs->m_thread_owner= thread;
1700     return table;
1701   }
1702 
1703   /* See open_table_v1() */
1704 
1705   PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1706 
1707   if (unlikely(pfs_table_share == NULL))
1708     return NULL;
1709 
1710   if (! pfs_table_share->m_enabled)
1711     return NULL;
1712 
1713   if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1714     return NULL;
1715 
1716   if (! flag_global_instrumentation)
1717     return NULL;
1718 
1719   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1720   if (unlikely(thread == NULL))
1721     return NULL;
1722 
1723   PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1724   return reinterpret_cast<PSI_table *> (pfs_table);
1725 }
1726 
1727 /**
1728   Implementation of the table instrumentation interface.
1729   @sa PSI_v1::close_table.
1730 */
close_table_v1(PSI_table * table)1731 static void close_table_v1(PSI_table *table)
1732 {
1733   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1734   if (unlikely(pfs == NULL))
1735     return;
1736   pfs->aggregate();
1737   destroy_table(pfs);
1738 }
1739 
1740 static PSI_socket*
init_socket_v1(PSI_socket_key key,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)1741 init_socket_v1(PSI_socket_key key, const my_socket *fd,
1742                const struct sockaddr *addr, socklen_t addr_len)
1743 {
1744   PFS_socket_class *klass;
1745   PFS_socket *pfs;
1746   klass= find_socket_class(key);
1747   if (unlikely(klass == NULL))
1748     return NULL;
1749   if (! klass->m_enabled)
1750     return NULL;
1751   pfs= create_socket(klass, fd, addr, addr_len);
1752   return reinterpret_cast<PSI_socket *> (pfs);
1753 }
1754 
destroy_socket_v1(PSI_socket * socket)1755 static void destroy_socket_v1(PSI_socket *socket)
1756 {
1757   PFS_socket *pfs= reinterpret_cast<PFS_socket*> (socket);
1758 
1759   DBUG_ASSERT(pfs != NULL);
1760 
1761   destroy_socket(pfs);
1762 }
1763 
1764 /**
1765   Implementation of the file instrumentation interface.
1766   @sa PSI_v1::create_file.
1767 */
create_file_v1(PSI_file_key key,const char * name,File file)1768 static void create_file_v1(PSI_file_key key, const char *name, File file)
1769 {
1770   if (! flag_global_instrumentation)
1771     return;
1772   int index= (int) file;
1773   if (unlikely(index < 0))
1774     return;
1775   PFS_file_class *klass= find_file_class(key);
1776   if (unlikely(klass == NULL))
1777     return;
1778   if (! klass->m_enabled)
1779     return;
1780 
1781   /* A thread is needed for LF_PINS */
1782   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1783   if (unlikely(pfs_thread == NULL))
1784     return;
1785 
1786   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
1787     return;
1788 
1789   /*
1790     We want this check after pfs_thread->m_enabled,
1791     to avoid reporting false loss.
1792   */
1793   if (unlikely(index >= file_handle_max))
1794   {
1795     file_handle_lost++;
1796     return;
1797   }
1798 
1799   uint len= strlen(name);
1800   PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len, true);
1801 
1802   file_handle_array[index]= pfs_file;
1803 }
1804 
1805 /**
1806   Arguments given from a parent to a child thread, packaged in one structure.
1807   This data is used when spawning a new instrumented thread.
1808   @sa pfs_spawn_thread.
1809 */
1810 struct PFS_spawn_thread_arg
1811 {
1812   ulonglong m_thread_internal_id;
1813   char m_username[USERNAME_LENGTH];
1814   uint m_username_length;
1815   char m_hostname[HOSTNAME_LENGTH];
1816   uint m_hostname_length;
1817 
1818   PSI_thread_key m_child_key;
1819   const void *m_child_identity;
1820   void *(*m_user_start_routine)(void*);
1821   void *m_user_arg;
1822 };
1823 
pfs_spawn_thread(void * arg)1824 void* pfs_spawn_thread(void *arg)
1825 {
1826   PFS_spawn_thread_arg *typed_arg= (PFS_spawn_thread_arg*) arg;
1827   void *user_arg;
1828   void *(*user_start_routine)(void*);
1829 
1830   PFS_thread *pfs;
1831 
1832   /* First, attach instrumentation to this newly created pthread. */
1833   PFS_thread_class *klass= find_thread_class(typed_arg->m_child_key);
1834   if (likely(klass != NULL))
1835   {
1836     pfs= create_thread(klass, typed_arg->m_child_identity, 0);
1837     if (likely(pfs != NULL))
1838     {
1839       clear_thread_account(pfs);
1840 
1841       pfs->m_parent_thread_internal_id= typed_arg->m_thread_internal_id;
1842 
1843       memcpy(pfs->m_username, typed_arg->m_username, sizeof(pfs->m_username));
1844       pfs->m_username_length= typed_arg->m_username_length;
1845 
1846       memcpy(pfs->m_hostname, typed_arg->m_hostname, sizeof(pfs->m_hostname));
1847       pfs->m_hostname_length= typed_arg->m_hostname_length;
1848 
1849       set_thread_account(pfs);
1850     }
1851   }
1852   else
1853   {
1854     pfs= NULL;
1855   }
1856   my_pthread_setspecific_ptr(THR_PFS, pfs);
1857 
1858   /*
1859     Secondly, free the memory allocated in spawn_thread_v1().
1860     It is preferable to do this before invoking the user
1861     routine, to avoid memory leaks at shutdown, in case
1862     the server exits without waiting for this thread.
1863   */
1864   user_start_routine= typed_arg->m_user_start_routine;
1865   user_arg= typed_arg->m_user_arg;
1866   my_free(typed_arg);
1867 
1868   /* Then, execute the user code for this thread. */
1869   (*user_start_routine)(user_arg);
1870 
1871   return NULL;
1872 }
1873 
1874 /**
1875   Implementation of the thread instrumentation interface.
1876   @sa PSI_v1::spawn_thread.
1877 */
spawn_thread_v1(PSI_thread_key key,pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)1878 static int spawn_thread_v1(PSI_thread_key key,
1879                            pthread_t *thread, const pthread_attr_t *attr,
1880                            void *(*start_routine)(void*), void *arg)
1881 {
1882   PFS_spawn_thread_arg *psi_arg;
1883   PFS_thread *parent;
1884 
1885   /* psi_arg can not be global, and can not be a local variable. */
1886   psi_arg= (PFS_spawn_thread_arg*) my_malloc(sizeof(PFS_spawn_thread_arg),
1887                                              MYF(MY_WME));
1888   if (unlikely(psi_arg == NULL))
1889     return EAGAIN;
1890 
1891   psi_arg->m_child_key= key;
1892   psi_arg->m_child_identity= (arg ? arg : thread);
1893   psi_arg->m_user_start_routine= start_routine;
1894   psi_arg->m_user_arg= arg;
1895 
1896   parent= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1897   if (parent != NULL)
1898   {
1899     /*
1900       Make a copy of the parent attributes.
1901       This is required, because instrumentation for this thread (the parent)
1902       may be destroyed before the child thread instrumentation is created.
1903     */
1904     psi_arg->m_thread_internal_id= parent->m_thread_internal_id;
1905 
1906     memcpy(psi_arg->m_username, parent->m_username, sizeof(psi_arg->m_username));
1907     psi_arg->m_username_length= parent->m_username_length;
1908 
1909     memcpy(psi_arg->m_hostname, parent->m_hostname, sizeof(psi_arg->m_hostname));
1910     psi_arg->m_hostname_length= parent->m_hostname_length;
1911   }
1912   else
1913   {
1914     psi_arg->m_thread_internal_id= 0;
1915     psi_arg->m_username_length= 0;
1916     psi_arg->m_hostname_length= 0;
1917   }
1918 
1919   int result= pthread_create(thread, attr, pfs_spawn_thread, psi_arg);
1920   if (unlikely(result != 0))
1921     my_free(psi_arg);
1922   return result;
1923 }
1924 
1925 /**
1926   Implementation of the thread instrumentation interface.
1927   @sa PSI_v1::new_thread.
1928 */
1929 static PSI_thread*
new_thread_v1(PSI_thread_key key,const void * identity,ulonglong processlist_id)1930 new_thread_v1(PSI_thread_key key, const void *identity, ulonglong processlist_id)
1931 {
1932   PFS_thread *pfs;
1933 
1934   PFS_thread_class *klass= find_thread_class(key);
1935   if (likely(klass != NULL))
1936     pfs= create_thread(klass, identity, processlist_id);
1937   else
1938     pfs= NULL;
1939 
1940   return reinterpret_cast<PSI_thread*> (pfs);
1941 }
1942 
1943 /**
1944   Implementation of the thread instrumentation interface.
1945   @sa PSI_v1::set_thread_id.
1946 */
set_thread_id_v1(PSI_thread * thread,ulonglong processlist_id)1947 static void set_thread_id_v1(PSI_thread *thread, ulonglong processlist_id)
1948 {
1949   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
1950   if (unlikely(pfs == NULL))
1951     return;
1952   pfs->m_processlist_id= processlist_id;
1953 }
1954 
1955 /**
1956   Implementation of the thread instrumentation interface.
1957   @sa PSI_v1::get_thread_id.
1958 */
1959 static PSI_thread*
get_thread_v1(void)1960 get_thread_v1(void)
1961 {
1962   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1963   return reinterpret_cast<PSI_thread*> (pfs);
1964 }
1965 
1966 /**
1967   Implementation of the thread instrumentation interface.
1968   @sa PSI_v1::set_thread_user.
1969 */
set_thread_user_v1(const char * user,int user_len)1970 static void set_thread_user_v1(const char *user, int user_len)
1971 {
1972   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1973 
1974   DBUG_ASSERT((user != NULL) || (user_len == 0));
1975   DBUG_ASSERT(user_len >= 0);
1976   DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
1977 
1978   if (unlikely(pfs == NULL))
1979     return;
1980 
1981   aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
1982 
1983   pfs->m_session_lock.allocated_to_dirty();
1984 
1985   clear_thread_account(pfs);
1986 
1987   if (user_len > 0)
1988     memcpy(pfs->m_username, user, user_len);
1989   pfs->m_username_length= user_len;
1990 
1991   set_thread_account(pfs);
1992 
1993   bool enabled= true;
1994   if (flag_thread_instrumentation)
1995   {
1996     if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
1997     {
1998       /*
1999         TODO: performance improvement.
2000         Once performance_schema.USERS is exposed,
2001         we can use PFS_user::m_enabled instead of looking up
2002         SETUP_ACTORS every time.
2003       */
2004       lookup_setup_actor(pfs,
2005                          pfs->m_username, pfs->m_username_length,
2006                          pfs->m_hostname, pfs->m_hostname_length,
2007                          &enabled);
2008     }
2009   }
2010 
2011   pfs->m_enabled= enabled;
2012 
2013   pfs->m_session_lock.dirty_to_allocated();
2014 }
2015 
2016 /**
2017   Implementation of the thread instrumentation interface.
2018   @sa PSI_v1::set_thread_account.
2019 */
set_thread_account_v1(const char * user,int user_len,const char * host,int host_len)2020 static void set_thread_account_v1(const char *user, int user_len,
2021                                     const char *host, int host_len)
2022 {
2023   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2024 
2025   DBUG_ASSERT((user != NULL) || (user_len == 0));
2026   DBUG_ASSERT(user_len >= 0);
2027   DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
2028   DBUG_ASSERT((host != NULL) || (host_len == 0));
2029   DBUG_ASSERT(host_len >= 0);
2030 
2031   host_len= min<size_t>(host_len, sizeof(pfs->m_hostname));
2032 
2033   if (unlikely(pfs == NULL))
2034     return;
2035 
2036   pfs->m_session_lock.allocated_to_dirty();
2037 
2038   clear_thread_account(pfs);
2039 
2040   if (acl_is_utility_user(user, host, NULL)) {
2041     /* We do not want the utility user to show up in any PFS statistics,
2042     so we keep this pfs session dirty. This fixes many, but not all tables.
2043     The remaining seems to honor m_enabled, so we also set that to false. */
2044     pfs->m_enabled= false;
2045     pfs->m_disable_instrumentation = true;
2046     return;
2047   }
2048 
2049   if (host_len > 0)
2050     memcpy(pfs->m_hostname, host, host_len);
2051   pfs->m_hostname_length= host_len;
2052 
2053   if (user_len > 0)
2054     memcpy(pfs->m_username, user, user_len);
2055   pfs->m_username_length= user_len;
2056 
2057   set_thread_account(pfs);
2058 
2059   bool enabled= true;
2060   if (flag_thread_instrumentation)
2061   {
2062     if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
2063     {
2064       /*
2065         TODO: performance improvement.
2066         Once performance_schema.USERS is exposed,
2067         we can use PFS_user::m_enabled instead of looking up
2068         SETUP_ACTORS every time.
2069       */
2070       lookup_setup_actor(pfs,
2071                          pfs->m_username, pfs->m_username_length,
2072                          pfs->m_hostname, pfs->m_hostname_length,
2073                          &enabled);
2074     }
2075   }
2076   pfs->m_enabled= enabled;
2077 
2078   pfs->m_session_lock.dirty_to_allocated();
2079 }
2080 
2081 /**
2082   Implementation of the thread instrumentation interface.
2083   @sa PSI_v1::set_thread_db.
2084 */
set_thread_db_v1(const char * db,int db_len)2085 static void set_thread_db_v1(const char* db, int db_len)
2086 {
2087   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2088 
2089   DBUG_ASSERT((db != NULL) || (db_len == 0));
2090   DBUG_ASSERT(db_len >= 0);
2091   DBUG_ASSERT((uint) db_len <= sizeof(pfs->m_dbname));
2092 
2093   if (likely(pfs != NULL))
2094   {
2095     pfs->m_stmt_lock.allocated_to_dirty();
2096     if (db_len > 0)
2097       memcpy(pfs->m_dbname, db, db_len);
2098     pfs->m_dbname_length= db_len;
2099     pfs->m_stmt_lock.dirty_to_allocated();
2100   }
2101 }
2102 
2103 /**
2104   Implementation of the thread instrumentation interface.
2105   @sa PSI_v1::set_thread_command.
2106 */
set_thread_command_v1(int command)2107 static void set_thread_command_v1(int command)
2108 {
2109   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2110 
2111   DBUG_ASSERT(command >= 0);
2112   DBUG_ASSERT(command <= (int) COM_END);
2113 
2114   if (likely(pfs != NULL))
2115   {
2116     pfs->m_command= command;
2117   }
2118 }
2119 
2120 /**
2121   Implementation of the thread instrumentation interface.
2122   @sa PSI_v1::set_thread_start_time.
2123 */
set_thread_start_time_v1(time_t start_time)2124 static void set_thread_start_time_v1(time_t start_time)
2125 {
2126   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2127 
2128   if (likely(pfs != NULL))
2129   {
2130     pfs->m_start_time= start_time;
2131   }
2132 }
2133 
2134 /**
2135   Implementation of the thread instrumentation interface.
2136   @sa PSI_v1::set_thread_state.
2137 */
set_thread_state_v1(const char * state)2138 static void set_thread_state_v1(const char* state)
2139 {
2140   /* DEPRECATED. */
2141 }
2142 
2143 /**
2144   Implementation of the thread instrumentation interface.
2145   @sa PSI_v1::set_thread_info.
2146 */
set_thread_info_v1(const char * info,uint info_len)2147 static void set_thread_info_v1(const char* info, uint info_len)
2148 {
2149   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2150 
2151   DBUG_ASSERT((info != NULL) || (info_len == 0));
2152 
2153   if (likely(pfs != NULL))
2154   {
2155     if ((info != NULL) && (info_len > 0))
2156     {
2157       if (info_len > sizeof(pfs->m_processlist_info))
2158         info_len= sizeof(pfs->m_processlist_info);
2159 
2160       pfs->m_stmt_lock.allocated_to_dirty();
2161       memcpy(pfs->m_processlist_info, info, info_len);
2162       pfs->m_processlist_info_length= info_len;
2163       pfs->m_stmt_lock.dirty_to_allocated();
2164     }
2165     else
2166     {
2167       pfs->m_stmt_lock.allocated_to_dirty();
2168       pfs->m_processlist_info_length= 0;
2169       pfs->m_stmt_lock.dirty_to_allocated();
2170     }
2171   }
2172 }
2173 
2174 /**
2175   Implementation of the thread instrumentation interface.
2176   @sa PSI_v1::set_thread.
2177 */
set_thread_v1(PSI_thread * thread)2178 static void set_thread_v1(PSI_thread* thread)
2179 {
2180   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2181   my_pthread_setspecific_ptr(THR_PFS, pfs);
2182 }
2183 
2184 /**
2185   Implementation of the thread instrumentation interface.
2186   @sa PSI_v1::delete_current_thread.
2187 */
delete_current_thread_v1(void)2188 static void delete_current_thread_v1(void)
2189 {
2190   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2191   if (thread != NULL)
2192   {
2193     aggregate_thread(thread, thread->m_account, thread->m_user, thread->m_host);
2194     my_pthread_setspecific_ptr(THR_PFS, NULL);
2195     destroy_thread(thread);
2196   }
2197 }
2198 
2199 /**
2200   Implementation of the thread instrumentation interface.
2201   @sa PSI_v1::delete_thread.
2202 */
delete_thread_v1(PSI_thread * thread)2203 static void delete_thread_v1(PSI_thread *thread)
2204 {
2205   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2206 
2207   if (pfs != NULL)
2208   {
2209     aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
2210     destroy_thread(pfs);
2211   }
2212 }
2213 
2214 /**
2215   Implementation of the mutex instrumentation interface.
2216   @sa PSI_v1::start_mutex_wait.
2217 */
2218 static PSI_mutex_locker*
start_mutex_wait_v1(PSI_mutex_locker_state * state,PSI_mutex * mutex,PSI_mutex_operation op,const char * src_file,uint src_line)2219 start_mutex_wait_v1(PSI_mutex_locker_state *state,
2220                     PSI_mutex *mutex, PSI_mutex_operation op,
2221                     const char *src_file, uint src_line)
2222 {
2223   PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
2224   DBUG_ASSERT((int) op >= 0);
2225   DBUG_ASSERT((uint) op < array_elements(mutex_operation_map));
2226   DBUG_ASSERT(state != NULL);
2227 
2228   DBUG_ASSERT(pfs_mutex != NULL);
2229   DBUG_ASSERT(pfs_mutex->m_class != NULL);
2230 
2231   if (! pfs_mutex->m_enabled)
2232     return NULL;
2233 
2234   uint flags;
2235   ulonglong timer_start= 0;
2236 
2237   if (flag_thread_instrumentation)
2238   {
2239     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2240     if (unlikely(pfs_thread == NULL))
2241       return NULL;
2242     if (! pfs_thread->m_enabled)
2243       return NULL;
2244     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2245     flags= STATE_FLAG_THREAD;
2246 
2247     if (pfs_mutex->m_timed)
2248     {
2249       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2250       state->m_timer_start= timer_start;
2251       flags|= STATE_FLAG_TIMED;
2252     }
2253 
2254     if (flag_events_waits_current)
2255     {
2256       if (unlikely(pfs_thread->m_events_waits_current >=
2257                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2258       {
2259         locker_lost++;
2260         return NULL;
2261       }
2262       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2263       state->m_wait= wait;
2264       flags|= STATE_FLAG_EVENT;
2265 
2266       PFS_events_waits *parent_event= wait - 1;
2267       wait->m_event_type= EVENT_TYPE_WAIT;
2268       wait->m_nesting_event_id= parent_event->m_event_id;
2269       wait->m_nesting_event_type= parent_event->m_event_type;
2270 
2271       wait->m_thread= pfs_thread;
2272       wait->m_class= pfs_mutex->m_class;
2273       wait->m_timer_start= timer_start;
2274       wait->m_timer_end= 0;
2275       wait->m_object_instance_addr= pfs_mutex->m_identity;
2276       wait->m_event_id= pfs_thread->m_event_id++;
2277       wait->m_end_event_id= 0;
2278       wait->m_operation= mutex_operation_map[(int) op];
2279       wait->m_source_file= src_file;
2280       wait->m_source_line= src_line;
2281       wait->m_wait_class= WAIT_CLASS_MUTEX;
2282 
2283       pfs_thread->m_events_waits_current++;
2284     }
2285   }
2286   else
2287   {
2288     if (pfs_mutex->m_timed)
2289     {
2290       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2291       state->m_timer_start= timer_start;
2292       flags= STATE_FLAG_TIMED;
2293       state->m_thread= NULL;
2294     }
2295     else
2296     {
2297       /*
2298         Complete shortcut.
2299       */
2300       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2301       pfs_mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
2302       return NULL;
2303     }
2304   }
2305 
2306   state->m_flags= flags;
2307   state->m_mutex= mutex;
2308   return reinterpret_cast<PSI_mutex_locker*> (state);
2309 }
2310 
2311 /**
2312   Implementation of the rwlock instrumentation interface.
2313   @sa PSI_v1::start_rwlock_rdwait
2314   @sa PSI_v1::start_rwlock_wrwait
2315 */
2316 static PSI_rwlock_locker*
start_rwlock_wait_v1(PSI_rwlock_locker_state * state,PSI_rwlock * rwlock,PSI_rwlock_operation op,const char * src_file,uint src_line)2317 start_rwlock_wait_v1(PSI_rwlock_locker_state *state,
2318                      PSI_rwlock *rwlock,
2319                      PSI_rwlock_operation op,
2320                      const char *src_file, uint src_line)
2321 {
2322   PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
2323   DBUG_ASSERT(static_cast<int> (op) >= 0);
2324   DBUG_ASSERT(static_cast<uint> (op) < array_elements(rwlock_operation_map));
2325   DBUG_ASSERT(state != NULL);
2326   DBUG_ASSERT(pfs_rwlock != NULL);
2327   DBUG_ASSERT(pfs_rwlock->m_class != NULL);
2328 
2329   if (! pfs_rwlock->m_enabled)
2330     return NULL;
2331 
2332   uint flags;
2333   ulonglong timer_start= 0;
2334 
2335   if (flag_thread_instrumentation)
2336   {
2337     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2338     if (unlikely(pfs_thread == NULL))
2339       return NULL;
2340     if (! pfs_thread->m_enabled)
2341       return NULL;
2342     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2343     flags= STATE_FLAG_THREAD;
2344 
2345     if (pfs_rwlock->m_timed)
2346     {
2347       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2348       state->m_timer_start= timer_start;
2349       flags|= STATE_FLAG_TIMED;
2350     }
2351 
2352     if (flag_events_waits_current)
2353     {
2354       if (unlikely(pfs_thread->m_events_waits_current >=
2355                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2356       {
2357         locker_lost++;
2358         return NULL;
2359       }
2360       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2361       state->m_wait= wait;
2362       flags|= STATE_FLAG_EVENT;
2363 
2364       PFS_events_waits *parent_event= wait - 1;
2365       wait->m_event_type= EVENT_TYPE_WAIT;
2366       wait->m_nesting_event_id= parent_event->m_event_id;
2367       wait->m_nesting_event_type= parent_event->m_event_type;
2368 
2369       wait->m_thread= pfs_thread;
2370       wait->m_class= pfs_rwlock->m_class;
2371       wait->m_timer_start= timer_start;
2372       wait->m_timer_end= 0;
2373       wait->m_object_instance_addr= pfs_rwlock->m_identity;
2374       wait->m_event_id= pfs_thread->m_event_id++;
2375       wait->m_end_event_id= 0;
2376       wait->m_operation= rwlock_operation_map[static_cast<int> (op)];
2377       wait->m_source_file= src_file;
2378       wait->m_source_line= src_line;
2379       wait->m_wait_class= WAIT_CLASS_RWLOCK;
2380 
2381       pfs_thread->m_events_waits_current++;
2382     }
2383   }
2384   else
2385   {
2386     if (pfs_rwlock->m_timed)
2387     {
2388       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2389       state->m_timer_start= timer_start;
2390       flags= STATE_FLAG_TIMED;
2391       state->m_thread= NULL;
2392     }
2393     else
2394     {
2395       /*
2396         Complete shortcut.
2397       */
2398       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2399       pfs_rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
2400       return NULL;
2401     }
2402   }
2403 
2404   state->m_flags= flags;
2405   state->m_rwlock= rwlock;
2406   return reinterpret_cast<PSI_rwlock_locker*> (state);
2407 }
2408 
2409 /**
2410   Implementation of the cond instrumentation interface.
2411   @sa PSI_v1::start_cond_wait.
2412 */
2413 static PSI_cond_locker*
start_cond_wait_v1(PSI_cond_locker_state * state,PSI_cond * cond,PSI_mutex * mutex,PSI_cond_operation op,const char * src_file,uint src_line)2414 start_cond_wait_v1(PSI_cond_locker_state *state,
2415                    PSI_cond *cond, PSI_mutex *mutex,
2416                    PSI_cond_operation op,
2417                    const char *src_file, uint src_line)
2418 {
2419   /*
2420     Note about the unused PSI_mutex *mutex parameter:
2421     In the pthread library, a call to pthread_cond_wait()
2422     causes an unlock() + lock() on the mutex associated with the condition.
2423     This mutex operation is not instrumented, so the mutex will still
2424     appear as locked when a thread is waiting on a condition.
2425     This has no impact now, as unlock_mutex() is not recording events.
2426     When unlock_mutex() is implemented by later work logs,
2427     this parameter here will be used to adjust the mutex state,
2428     in start_cond_wait_v1() and end_cond_wait_v1().
2429   */
2430   PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
2431   DBUG_ASSERT(static_cast<int> (op) >= 0);
2432   DBUG_ASSERT(static_cast<uint> (op) < array_elements(cond_operation_map));
2433   DBUG_ASSERT(state != NULL);
2434   DBUG_ASSERT(pfs_cond != NULL);
2435   DBUG_ASSERT(pfs_cond->m_class != NULL);
2436 
2437   if (! pfs_cond->m_enabled)
2438     return NULL;
2439 
2440   uint flags;
2441   ulonglong timer_start= 0;
2442 
2443   if (flag_thread_instrumentation)
2444   {
2445     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2446     if (unlikely(pfs_thread == NULL))
2447       return NULL;
2448     if (! pfs_thread->m_enabled)
2449       return NULL;
2450     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2451     flags= STATE_FLAG_THREAD;
2452 
2453     if (pfs_cond->m_timed)
2454     {
2455       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2456       state->m_timer_start= timer_start;
2457       flags|= STATE_FLAG_TIMED;
2458     }
2459 
2460     if (flag_events_waits_current)
2461     {
2462       if (unlikely(pfs_thread->m_events_waits_current >=
2463                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2464       {
2465         locker_lost++;
2466         return NULL;
2467       }
2468       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2469       state->m_wait= wait;
2470       flags|= STATE_FLAG_EVENT;
2471 
2472       PFS_events_waits *parent_event= wait - 1;
2473       wait->m_event_type= EVENT_TYPE_WAIT;
2474       wait->m_nesting_event_id= parent_event->m_event_id;
2475       wait->m_nesting_event_type= parent_event->m_event_type;
2476 
2477       wait->m_thread= pfs_thread;
2478       wait->m_class= pfs_cond->m_class;
2479       wait->m_timer_start= timer_start;
2480       wait->m_timer_end= 0;
2481       wait->m_object_instance_addr= pfs_cond->m_identity;
2482       wait->m_event_id= pfs_thread->m_event_id++;
2483       wait->m_end_event_id= 0;
2484       wait->m_operation= cond_operation_map[static_cast<int> (op)];
2485       wait->m_source_file= src_file;
2486       wait->m_source_line= src_line;
2487       wait->m_wait_class= WAIT_CLASS_COND;
2488 
2489       pfs_thread->m_events_waits_current++;
2490     }
2491   }
2492   else
2493   {
2494     if (pfs_cond->m_timed)
2495     {
2496       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2497       state->m_timer_start= timer_start;
2498       flags= STATE_FLAG_TIMED;
2499     }
2500     else
2501     {
2502       /*
2503         Complete shortcut.
2504       */
2505       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2506       pfs_cond->m_cond_stat.m_wait_stat.aggregate_counted();
2507       return NULL;
2508     }
2509   }
2510 
2511   state->m_flags= flags;
2512   state->m_cond= cond;
2513   state->m_mutex= mutex;
2514   return reinterpret_cast<PSI_cond_locker*> (state);
2515 }
2516 
lock_flags_to_lock_type(uint flags)2517 static inline PFS_TL_LOCK_TYPE lock_flags_to_lock_type(uint flags)
2518 {
2519   enum thr_lock_type value= static_cast<enum thr_lock_type> (flags);
2520 
2521   switch (value)
2522   {
2523     case TL_READ:
2524       return PFS_TL_READ;
2525     case TL_READ_WITH_SHARED_LOCKS:
2526       return PFS_TL_READ_WITH_SHARED_LOCKS;
2527     case TL_READ_HIGH_PRIORITY:
2528       return PFS_TL_READ_HIGH_PRIORITY;
2529     case TL_READ_NO_INSERT:
2530       return PFS_TL_READ_NO_INSERT;
2531     case TL_WRITE_ALLOW_WRITE:
2532       return PFS_TL_WRITE_ALLOW_WRITE;
2533     case TL_WRITE_CONCURRENT_INSERT:
2534       return PFS_TL_WRITE_CONCURRENT_INSERT;
2535     case TL_WRITE_DELAYED:
2536       return PFS_TL_WRITE_DELAYED;
2537     case TL_WRITE_LOW_PRIORITY:
2538       return PFS_TL_WRITE_LOW_PRIORITY;
2539     case TL_WRITE:
2540       return PFS_TL_WRITE;
2541 
2542     case TL_WRITE_ONLY:
2543     case TL_IGNORE:
2544     case TL_UNLOCK:
2545     case TL_READ_DEFAULT:
2546     case TL_WRITE_DEFAULT:
2547     default:
2548       DBUG_ASSERT(false);
2549   }
2550 
2551   /* Dead code */
2552   return PFS_TL_READ;
2553 }
2554 
external_lock_flags_to_lock_type(uint flags)2555 static inline PFS_TL_LOCK_TYPE external_lock_flags_to_lock_type(uint flags)
2556 {
2557   DBUG_ASSERT(flags == F_RDLCK || flags == F_WRLCK);
2558   return (flags == F_RDLCK ? PFS_TL_READ_EXTERNAL : PFS_TL_WRITE_EXTERNAL);
2559 }
2560 
2561 /**
2562   Implementation of the table instrumentation interface.
2563   @sa PSI_v1::start_table_io_wait_v1
2564 */
2565 static PSI_table_locker*
start_table_io_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_io_operation op,uint index,const char * src_file,uint src_line)2566 start_table_io_wait_v1(PSI_table_locker_state *state,
2567                        PSI_table *table,
2568                        PSI_table_io_operation op,
2569                        uint index,
2570                        const char *src_file, uint src_line)
2571 {
2572   DBUG_ASSERT(static_cast<int> (op) >= 0);
2573   DBUG_ASSERT(static_cast<uint> (op) < array_elements(table_io_operation_map));
2574   DBUG_ASSERT(state != NULL);
2575   PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2576   DBUG_ASSERT(pfs_table != NULL);
2577   DBUG_ASSERT(pfs_table->m_share != NULL);
2578 
2579   if (! pfs_table->m_io_enabled)
2580     return NULL;
2581 
2582   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2583 
2584   uint flags;
2585   ulonglong timer_start= 0;
2586 
2587   if (flag_thread_instrumentation)
2588   {
2589     if (pfs_thread == NULL)
2590       return NULL;
2591     if (! pfs_thread->m_enabled)
2592       return NULL;
2593     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2594     flags= STATE_FLAG_THREAD;
2595 
2596     if (pfs_table->m_io_timed)
2597     {
2598       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2599       state->m_timer_start= timer_start;
2600       flags|= STATE_FLAG_TIMED;
2601     }
2602 
2603     if (flag_events_waits_current)
2604     {
2605       if (unlikely(pfs_thread->m_events_waits_current >=
2606                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2607       {
2608         locker_lost++;
2609         return NULL;
2610       }
2611       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2612       state->m_wait= wait;
2613       flags|= STATE_FLAG_EVENT;
2614 
2615       PFS_events_waits *parent_event= wait - 1;
2616       wait->m_event_type= EVENT_TYPE_WAIT;
2617       wait->m_nesting_event_id= parent_event->m_event_id;
2618       wait->m_nesting_event_type= parent_event->m_event_type;
2619 
2620       PFS_table_share *share= pfs_table->m_share;
2621       wait->m_thread= pfs_thread;
2622       wait->m_class= &global_table_io_class;
2623       wait->m_timer_start= timer_start;
2624       wait->m_timer_end= 0;
2625       wait->m_object_instance_addr= pfs_table->m_identity;
2626       wait->m_event_id= pfs_thread->m_event_id++;
2627       wait->m_end_event_id= 0;
2628       wait->m_operation= table_io_operation_map[static_cast<int> (op)];
2629       wait->m_flags= 0;
2630       wait->m_object_type= share->get_object_type();
2631       wait->m_weak_table_share= share;
2632       wait->m_weak_version= share->get_version();
2633       wait->m_index= index;
2634       wait->m_source_file= src_file;
2635       wait->m_source_line= src_line;
2636       wait->m_wait_class= WAIT_CLASS_TABLE;
2637 
2638       pfs_thread->m_events_waits_current++;
2639     }
2640   }
2641   else
2642   {
2643     if (pfs_table->m_io_timed)
2644     {
2645       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2646       state->m_timer_start= timer_start;
2647       flags= STATE_FLAG_TIMED;
2648     }
2649     else
2650     {
2651       /* TODO: consider a shortcut here */
2652       flags= 0;
2653     }
2654   }
2655 
2656   state->m_flags= flags;
2657   state->m_table= table;
2658   state->m_io_operation= op;
2659   state->m_index= index;
2660   return reinterpret_cast<PSI_table_locker*> (state);
2661 }
2662 
2663 /**
2664   Implementation of the table instrumentation interface.
2665   @sa PSI_v1::start_table_lock_wait.
2666 */
2667 static PSI_table_locker*
start_table_lock_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_lock_operation op,ulong op_flags,const char * src_file,uint src_line)2668 start_table_lock_wait_v1(PSI_table_locker_state *state,
2669                          PSI_table *table,
2670                          PSI_table_lock_operation op,
2671                          ulong op_flags,
2672                          const char *src_file, uint src_line)
2673 {
2674   DBUG_ASSERT(state != NULL);
2675   DBUG_ASSERT((op == PSI_TABLE_LOCK) || (op == PSI_TABLE_EXTERNAL_LOCK));
2676 
2677   PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2678 
2679   DBUG_ASSERT(pfs_table != NULL);
2680   DBUG_ASSERT(pfs_table->m_share != NULL);
2681 
2682   if (! pfs_table->m_lock_enabled)
2683     return NULL;
2684 
2685   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2686 
2687   PFS_TL_LOCK_TYPE lock_type;
2688 
2689   switch (op)
2690   {
2691     case PSI_TABLE_LOCK:
2692       lock_type= lock_flags_to_lock_type(op_flags);
2693       break;
2694     case PSI_TABLE_EXTERNAL_LOCK:
2695       /*
2696         See the handler::external_lock() API design,
2697         there is no handler::external_unlock().
2698       */
2699       if (op_flags == F_UNLCK)
2700         return NULL;
2701       lock_type= external_lock_flags_to_lock_type(op_flags);
2702       break;
2703     default:
2704       lock_type= PFS_TL_READ;
2705       DBUG_ASSERT(false);
2706   }
2707 
2708   DBUG_ASSERT((uint) lock_type < array_elements(table_lock_operation_map));
2709 
2710   uint flags;
2711   ulonglong timer_start= 0;
2712 
2713   if (flag_thread_instrumentation)
2714   {
2715     if (pfs_thread == NULL)
2716       return NULL;
2717     if (! pfs_thread->m_enabled)
2718       return NULL;
2719     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2720     flags= STATE_FLAG_THREAD;
2721 
2722     if (pfs_table->m_lock_timed)
2723     {
2724       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2725       state->m_timer_start= timer_start;
2726       flags|= STATE_FLAG_TIMED;
2727     }
2728 
2729     if (flag_events_waits_current)
2730     {
2731       if (unlikely(pfs_thread->m_events_waits_current >=
2732                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2733       {
2734         locker_lost++;
2735         return NULL;
2736       }
2737       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2738       state->m_wait= wait;
2739       flags|= STATE_FLAG_EVENT;
2740 
2741       PFS_events_waits *parent_event= wait - 1;
2742       wait->m_event_type= EVENT_TYPE_WAIT;
2743       wait->m_nesting_event_id= parent_event->m_event_id;
2744       wait->m_nesting_event_type= parent_event->m_event_type;
2745 
2746       PFS_table_share *share= pfs_table->m_share;
2747       wait->m_thread= pfs_thread;
2748       wait->m_class= &global_table_lock_class;
2749       wait->m_timer_start= timer_start;
2750       wait->m_timer_end= 0;
2751       wait->m_object_instance_addr= pfs_table->m_identity;
2752       wait->m_event_id= pfs_thread->m_event_id++;
2753       wait->m_end_event_id= 0;
2754       wait->m_operation= table_lock_operation_map[lock_type];
2755       wait->m_flags= 0;
2756       wait->m_object_type= share->get_object_type();
2757       wait->m_weak_table_share= share;
2758       wait->m_weak_version= share->get_version();
2759       wait->m_index= 0;
2760       wait->m_source_file= src_file;
2761       wait->m_source_line= src_line;
2762       wait->m_wait_class= WAIT_CLASS_TABLE;
2763 
2764       pfs_thread->m_events_waits_current++;
2765     }
2766   }
2767   else
2768   {
2769     if (pfs_table->m_lock_timed)
2770     {
2771       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2772       state->m_timer_start= timer_start;
2773       flags= STATE_FLAG_TIMED;
2774     }
2775     else
2776     {
2777       /* TODO: consider a shortcut here */
2778       flags= 0;
2779     }
2780   }
2781 
2782   state->m_flags= flags;
2783   state->m_table= table;
2784   state->m_index= lock_type;
2785   return reinterpret_cast<PSI_table_locker*> (state);
2786 }
2787 
2788 /**
2789   Implementation of the file instrumentation interface.
2790   @sa PSI_v1::get_thread_file_name_locker.
2791 */
2792 static PSI_file_locker*
get_thread_file_name_locker_v1(PSI_file_locker_state * state,PSI_file_key key,PSI_file_operation op,const char * name,const void * identity)2793 get_thread_file_name_locker_v1(PSI_file_locker_state *state,
2794                                PSI_file_key key,
2795                                PSI_file_operation op,
2796                                const char *name, const void *identity)
2797 {
2798   DBUG_ASSERT(static_cast<int> (op) >= 0);
2799   DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2800   DBUG_ASSERT(state != NULL);
2801 
2802   if (! flag_global_instrumentation)
2803     return NULL;
2804   PFS_file_class *klass= find_file_class(key);
2805   if (unlikely(klass == NULL))
2806     return NULL;
2807   if (! klass->m_enabled)
2808     return NULL;
2809 
2810   /* Needed for the LF_HASH */
2811   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2812   if (unlikely(pfs_thread == NULL))
2813     return NULL;
2814 
2815   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
2816     return NULL;
2817 
2818   uint flags;
2819 
2820   state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2821   flags= STATE_FLAG_THREAD;
2822 
2823   if (klass->m_timed)
2824     flags|= STATE_FLAG_TIMED;
2825 
2826   if (flag_events_waits_current)
2827   {
2828     if (unlikely(pfs_thread->m_events_waits_current >=
2829                  & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2830     {
2831       locker_lost++;
2832       return NULL;
2833     }
2834     PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2835     state->m_wait= wait;
2836     flags|= STATE_FLAG_EVENT;
2837 
2838     PFS_events_waits *parent_event= wait - 1;
2839     wait->m_event_type= EVENT_TYPE_WAIT;
2840     wait->m_nesting_event_id= parent_event->m_event_id;
2841     wait->m_nesting_event_type= parent_event->m_event_type;
2842 
2843     wait->m_thread= pfs_thread;
2844     wait->m_class= klass;
2845     wait->m_timer_start= 0;
2846     wait->m_timer_end= 0;
2847     wait->m_object_instance_addr= NULL;
2848     wait->m_weak_file= NULL;
2849     wait->m_weak_version= 0;
2850     wait->m_event_id= pfs_thread->m_event_id++;
2851     wait->m_end_event_id= 0;
2852     wait->m_operation= file_operation_map[static_cast<int> (op)];
2853     wait->m_wait_class= WAIT_CLASS_FILE;
2854 
2855     pfs_thread->m_events_waits_current++;
2856   }
2857 
2858   state->m_flags= flags;
2859   state->m_file= NULL;
2860   state->m_name= name;
2861   state->m_class= klass;
2862   state->m_operation= op;
2863   return reinterpret_cast<PSI_file_locker*> (state);
2864 }
2865 
2866 /**
2867   Implementation of the file instrumentation interface.
2868   @sa PSI_v1::get_thread_file_stream_locker.
2869 */
2870 static PSI_file_locker*
get_thread_file_stream_locker_v1(PSI_file_locker_state * state,PSI_file * file,PSI_file_operation op)2871 get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
2872                                  PSI_file *file, PSI_file_operation op)
2873 {
2874   PFS_file *pfs_file= reinterpret_cast<PFS_file*> (file);
2875   DBUG_ASSERT(static_cast<int> (op) >= 0);
2876   DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2877   DBUG_ASSERT(state != NULL);
2878 
2879   if (unlikely(pfs_file == NULL))
2880     return NULL;
2881   DBUG_ASSERT(pfs_file->m_class != NULL);
2882   PFS_file_class *klass= pfs_file->m_class;
2883 
2884   if (! pfs_file->m_enabled)
2885     return NULL;
2886 
2887   uint flags;
2888 
2889   if (flag_thread_instrumentation)
2890   {
2891     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2892     if (unlikely(pfs_thread == NULL))
2893       return NULL;
2894     if (! pfs_thread->m_enabled)
2895       return NULL;
2896     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2897     flags= STATE_FLAG_THREAD;
2898 
2899     if (pfs_file->m_timed)
2900       flags|= STATE_FLAG_TIMED;
2901 
2902     if (flag_events_waits_current)
2903     {
2904       if (unlikely(pfs_thread->m_events_waits_current >=
2905                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2906       {
2907         locker_lost++;
2908         return NULL;
2909       }
2910       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2911       state->m_wait= wait;
2912       flags|= STATE_FLAG_EVENT;
2913 
2914       PFS_events_waits *parent_event= wait - 1;
2915       wait->m_event_type= EVENT_TYPE_WAIT;
2916       wait->m_nesting_event_id= parent_event->m_event_id;
2917       wait->m_nesting_event_type= parent_event->m_event_type;
2918 
2919       wait->m_thread= pfs_thread;
2920       wait->m_class= klass;
2921       wait->m_timer_start= 0;
2922       wait->m_timer_end= 0;
2923       wait->m_object_instance_addr= pfs_file;
2924       wait->m_weak_file= pfs_file;
2925       wait->m_weak_version= pfs_file->get_version();
2926       wait->m_event_id= pfs_thread->m_event_id++;
2927       wait->m_end_event_id= 0;
2928       wait->m_operation= file_operation_map[static_cast<int> (op)];
2929       wait->m_wait_class= WAIT_CLASS_FILE;
2930 
2931       pfs_thread->m_events_waits_current++;
2932     }
2933   }
2934   else
2935   {
2936     state->m_thread= NULL;
2937     if (pfs_file->m_timed)
2938     {
2939       flags= STATE_FLAG_TIMED;
2940     }
2941     else
2942     {
2943       /* TODO: consider a shortcut. */
2944       flags= 0;
2945     }
2946   }
2947 
2948   state->m_flags= flags;
2949   state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
2950   state->m_operation= op;
2951   state->m_name= NULL;
2952   state->m_class= klass;
2953   return reinterpret_cast<PSI_file_locker*> (state);
2954 }
2955 
2956 /**
2957   Implementation of the file instrumentation interface.
2958   @sa PSI_v1::get_thread_file_descriptor_locker.
2959 */
2960 static PSI_file_locker*
get_thread_file_descriptor_locker_v1(PSI_file_locker_state * state,File file,PSI_file_operation op)2961 get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
2962                                      File file, PSI_file_operation op)
2963 {
2964   int index= static_cast<int> (file);
2965   DBUG_ASSERT(static_cast<int> (op) >= 0);
2966   DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2967   DBUG_ASSERT(state != NULL);
2968 
2969   if (unlikely((index < 0) || (index >= file_handle_max)))
2970     return NULL;
2971 
2972   PFS_file *pfs_file= file_handle_array[index];
2973   if (unlikely(pfs_file == NULL))
2974     return NULL;
2975 
2976   /*
2977     We are about to close a file by descriptor number,
2978     and the calling code still holds the descriptor.
2979     Cleanup the file descriptor <--> file instrument association.
2980     Remove the instrumentation *before* the close to avoid race
2981     conditions with another thread opening a file
2982     (that could be given the same descriptor).
2983   */
2984   if (op == PSI_FILE_CLOSE)
2985     file_handle_array[index]= NULL;
2986 
2987   if (! pfs_file->m_enabled)
2988     return NULL;
2989 
2990   DBUG_ASSERT(pfs_file->m_class != NULL);
2991   PFS_file_class *klass= pfs_file->m_class;
2992 
2993   uint flags;
2994 
2995   if (flag_thread_instrumentation)
2996   {
2997     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2998     if (unlikely(pfs_thread == NULL))
2999       return NULL;
3000     if (! pfs_thread->m_enabled)
3001       return NULL;
3002     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3003     flags= STATE_FLAG_THREAD;
3004 
3005     if (pfs_file->m_timed)
3006       flags|= STATE_FLAG_TIMED;
3007 
3008     if (flag_events_waits_current)
3009     {
3010       if (unlikely(pfs_thread->m_events_waits_current >=
3011                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3012       {
3013         locker_lost++;
3014         return NULL;
3015       }
3016       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3017       state->m_wait= wait;
3018       flags|= STATE_FLAG_EVENT;
3019 
3020       PFS_events_waits *parent_event= wait - 1;
3021       wait->m_event_type= EVENT_TYPE_WAIT;
3022       wait->m_nesting_event_id= parent_event->m_event_id;
3023       wait->m_nesting_event_type= parent_event->m_event_type;
3024 
3025       wait->m_thread= pfs_thread;
3026       wait->m_class= klass;
3027       wait->m_timer_start= 0;
3028       wait->m_timer_end= 0;
3029       wait->m_object_instance_addr= pfs_file;
3030       wait->m_weak_file= pfs_file;
3031       wait->m_weak_version= pfs_file->get_version();
3032       wait->m_event_id= pfs_thread->m_event_id++;
3033       wait->m_end_event_id= 0;
3034       wait->m_operation= file_operation_map[static_cast<int> (op)];
3035       wait->m_wait_class= WAIT_CLASS_FILE;
3036 
3037       pfs_thread->m_events_waits_current++;
3038     }
3039   }
3040   else
3041   {
3042     state->m_thread= NULL;
3043     if (pfs_file->m_timed)
3044     {
3045       flags= STATE_FLAG_TIMED;
3046     }
3047     else
3048     {
3049       /* TODO: consider a shortcut. */
3050       flags= 0;
3051     }
3052   }
3053 
3054   state->m_flags= flags;
3055   state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3056   state->m_operation= op;
3057   state->m_name= NULL;
3058   state->m_class= klass;
3059   return reinterpret_cast<PSI_file_locker*> (state);
3060 }
3061 
3062 /** Socket locker */
3063 
3064 static PSI_socket_locker*
start_socket_wait_v1(PSI_socket_locker_state * state,PSI_socket * socket,PSI_socket_operation op,size_t count,const char * src_file,uint src_line)3065 start_socket_wait_v1(PSI_socket_locker_state *state,
3066                      PSI_socket *socket,
3067                      PSI_socket_operation op,
3068                      size_t count,
3069                      const char *src_file, uint src_line)
3070 {
3071   DBUG_ASSERT(static_cast<int> (op) >= 0);
3072   DBUG_ASSERT(static_cast<uint> (op) < array_elements(socket_operation_map));
3073   DBUG_ASSERT(state != NULL);
3074   PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*> (socket);
3075 
3076   DBUG_ASSERT(pfs_socket != NULL);
3077   DBUG_ASSERT(pfs_socket->m_class != NULL);
3078 
3079   if (!pfs_socket->m_enabled || pfs_socket->m_idle)
3080     return NULL;
3081 
3082   uint flags= 0;
3083   ulonglong timer_start= 0;
3084 
3085   if (flag_thread_instrumentation)
3086   {
3087     /*
3088        Do not use pfs_socket->m_thread_owner here,
3089        as different threads may use concurrently the same socket,
3090        for example during a KILL.
3091     */
3092     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3093 
3094     if (unlikely(pfs_thread == NULL))
3095       return NULL;
3096 
3097     if (!pfs_thread->m_enabled)
3098       return NULL;
3099 
3100     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3101     flags= STATE_FLAG_THREAD;
3102 
3103     if (pfs_socket->m_timed)
3104     {
3105       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3106       state->m_timer_start= timer_start;
3107       flags|= STATE_FLAG_TIMED;
3108     }
3109 
3110     if (flag_events_waits_current)
3111     {
3112       if (unlikely(pfs_thread->m_events_waits_current >=
3113                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3114       {
3115         locker_lost++;
3116         return NULL;
3117       }
3118       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3119       state->m_wait= wait;
3120       flags|= STATE_FLAG_EVENT;
3121 
3122       PFS_events_waits *parent_event= wait - 1;
3123       wait->m_event_type= EVENT_TYPE_WAIT;
3124       wait->m_nesting_event_id=   parent_event->m_event_id;
3125       wait->m_nesting_event_type= parent_event->m_event_type;
3126       wait->m_thread=       pfs_thread;
3127       wait->m_class=        pfs_socket->m_class;
3128       wait->m_timer_start=  timer_start;
3129       wait->m_timer_end=    0;
3130       wait->m_object_instance_addr= pfs_socket->m_identity;
3131       wait->m_weak_socket=  pfs_socket;
3132       wait->m_weak_version= pfs_socket->get_version();
3133       wait->m_event_id=     pfs_thread->m_event_id++;
3134       wait->m_end_event_id= 0;
3135       wait->m_operation=    socket_operation_map[static_cast<int>(op)];
3136       wait->m_source_file= src_file;
3137       wait->m_source_line= src_line;
3138       wait->m_number_of_bytes= count;
3139       wait->m_wait_class=   WAIT_CLASS_SOCKET;
3140 
3141       pfs_thread->m_events_waits_current++;
3142     }
3143   }
3144   else
3145   {
3146     if (pfs_socket->m_timed)
3147     {
3148       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3149       state->m_timer_start= timer_start;
3150       flags= STATE_FLAG_TIMED;
3151     }
3152     else
3153     {
3154       /*
3155         Even if timing is disabled, end_socket_wait() still needs a locker to
3156         capture the number of bytes sent or received by the socket operation.
3157         For operations that do not have a byte count, then just increment the
3158         event counter and return a NULL locker.
3159       */
3160       switch (op)
3161       {
3162         case PSI_SOCKET_CONNECT:
3163         case PSI_SOCKET_CREATE:
3164         case PSI_SOCKET_BIND:
3165         case PSI_SOCKET_SEEK:
3166         case PSI_SOCKET_OPT:
3167         case PSI_SOCKET_STAT:
3168         case PSI_SOCKET_SHUTDOWN:
3169         case PSI_SOCKET_CLOSE:
3170         case PSI_SOCKET_SELECT:
3171           pfs_socket->m_socket_stat.m_io_stat.m_misc.aggregate_counted();
3172           return NULL;
3173         default:
3174           break;
3175       }
3176     }
3177   }
3178 
3179   state->m_flags= flags;
3180   state->m_socket= socket;
3181   state->m_operation= op;
3182   return reinterpret_cast<PSI_socket_locker*> (state);
3183 }
3184 
3185 /**
3186   Implementation of the mutex instrumentation interface.
3187   @sa PSI_v1::unlock_mutex.
3188 */
unlock_mutex_v1(PSI_mutex * mutex)3189 static void unlock_mutex_v1(PSI_mutex *mutex)
3190 {
3191   PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
3192 
3193   DBUG_ASSERT(pfs_mutex != NULL);
3194 
3195   /*
3196     Note that this code is still protected by the instrumented mutex,
3197     and therefore is thread safe. See inline_mysql_mutex_unlock().
3198   */
3199 
3200   /* Always update the instrumented state */
3201   pfs_mutex->m_owner= NULL;
3202   pfs_mutex->m_last_locked= 0;
3203 
3204 #ifdef LATER_WL2333
3205   /*
3206     See WL#2333: SHOW ENGINE ... LOCK STATUS.
3207     PFS_mutex::m_lock_stat is not exposed in user visible tables
3208     currently, so there is no point spending time computing it.
3209   */
3210   if (! pfs_mutex->m_enabled)
3211     return;
3212 
3213   if (! pfs_mutex->m_timed)
3214     return;
3215 
3216   ulonglong locked_time;
3217   locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
3218   pfs_mutex->m_mutex_stat.m_lock_stat.aggregate_value(locked_time);
3219 #endif
3220 }
3221 
3222 /**
3223   Implementation of the rwlock instrumentation interface.
3224   @sa PSI_v1::unlock_rwlock.
3225 */
unlock_rwlock_v1(PSI_rwlock * rwlock)3226 static void unlock_rwlock_v1(PSI_rwlock *rwlock)
3227 {
3228   PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
3229   DBUG_ASSERT(pfs_rwlock != NULL);
3230   DBUG_ASSERT(pfs_rwlock == sanitize_rwlock(pfs_rwlock));
3231   DBUG_ASSERT(pfs_rwlock->m_class != NULL);
3232   DBUG_ASSERT(pfs_rwlock->m_lock.is_populated());
3233 
3234   bool last_writer= false;
3235   bool last_reader= false;
3236 
3237   /*
3238     Note that this code is still protected by the instrumented rwlock,
3239     and therefore is:
3240     - thread safe for write locks
3241     - almost thread safe for read locks (pfs_rwlock->m_readers is unsafe).
3242     See inline_mysql_rwlock_unlock()
3243   */
3244 
3245   /* Always update the instrumented state */
3246   if (pfs_rwlock->m_writer != NULL)
3247   {
3248     /* Nominal case, a writer is unlocking. */
3249     last_writer= true;
3250     pfs_rwlock->m_writer= NULL;
3251     /* Reset the readers stats, they could be off */
3252     pfs_rwlock->m_readers= 0;
3253   }
3254   else if (likely(pfs_rwlock->m_readers > 0))
3255   {
3256     /* Nominal case, a reader is unlocking. */
3257     if (--(pfs_rwlock->m_readers) == 0)
3258       last_reader= true;
3259   }
3260   else
3261   {
3262     /*
3263       Edge case, we have no writer and no readers,
3264       on an unlock event.
3265       This is possible for:
3266       - partial instrumentation
3267       - instrumentation disabled at runtime,
3268         see when get_thread_rwlock_locker_v1() returns NULL
3269       No further action is taken here, the next
3270       write lock will put the statistics is a valid state.
3271     */
3272   }
3273 
3274 #ifdef LATER_WL2333
3275   /* See WL#2333: SHOW ENGINE ... LOCK STATUS. */
3276 
3277   if (! pfs_rwlock->m_enabled)
3278     return;
3279 
3280   if (! pfs_rwlock->m_timed)
3281     return;
3282 
3283   ulonglong locked_time;
3284   if (last_writer)
3285   {
3286     locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
3287     pfs_rwlock->m_rwlock_stat.m_write_lock_stat.aggregate_value(locked_time);
3288   }
3289   else if (last_reader)
3290   {
3291     locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
3292     pfs_rwlock->m_rwlock_stat.m_read_lock_stat.aggregate_value(locked_time);
3293   }
3294 #else
3295   (void) last_reader;
3296   (void) last_writer;
3297 #endif
3298 }
3299 
3300 /**
3301   Implementation of the cond instrumentation interface.
3302   @sa PSI_v1::signal_cond.
3303 */
signal_cond_v1(PSI_cond * cond)3304 static void signal_cond_v1(PSI_cond* cond)
3305 {
3306   PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3307 
3308   DBUG_ASSERT(pfs_cond != NULL);
3309 
3310   pfs_cond->m_cond_stat.m_signal_count++;
3311 }
3312 
3313 /**
3314   Implementation of the cond instrumentation interface.
3315   @sa PSI_v1::broadcast_cond.
3316 */
broadcast_cond_v1(PSI_cond * cond)3317 static void broadcast_cond_v1(PSI_cond* cond)
3318 {
3319   PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3320 
3321   DBUG_ASSERT(pfs_cond != NULL);
3322 
3323   pfs_cond->m_cond_stat.m_broadcast_count++;
3324 }
3325 
3326 /**
3327   Implementation of the idle instrumentation interface.
3328   @sa PSI_v1::start_idle_wait.
3329 */
3330 static PSI_idle_locker*
start_idle_wait_v1(PSI_idle_locker_state * state,const char * src_file,uint src_line)3331 start_idle_wait_v1(PSI_idle_locker_state* state, const char *src_file, uint src_line)
3332 {
3333   DBUG_ASSERT(state != NULL);
3334 
3335   if (!flag_global_instrumentation)
3336     return NULL;
3337 
3338   if (!global_idle_class.m_enabled)
3339     return NULL;
3340 
3341   uint flags= 0;
3342   ulonglong timer_start= 0;
3343 
3344   if (flag_thread_instrumentation)
3345   {
3346     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3347     if (unlikely(pfs_thread == NULL))
3348       return NULL;
3349     if (!pfs_thread->m_enabled)
3350       return NULL;
3351     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3352     flags= STATE_FLAG_THREAD;
3353 
3354     DBUG_ASSERT(pfs_thread->m_events_statements_count == 0);
3355 
3356     if (global_idle_class.m_timed)
3357     {
3358       timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3359       state->m_timer_start= timer_start;
3360       flags|= STATE_FLAG_TIMED;
3361     }
3362 
3363     if (flag_events_waits_current)
3364     {
3365       if (unlikely(pfs_thread->m_events_waits_current >=
3366                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3367       {
3368         locker_lost++;
3369         return NULL;
3370       }
3371       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3372       state->m_wait= wait;
3373       flags|= STATE_FLAG_EVENT;
3374 
3375       wait->m_event_type= EVENT_TYPE_WAIT;
3376       /*
3377         IDLE events are waits, but by definition we know that
3378         such waits happen outside of any STAGE and STATEMENT,
3379         so they have no parents.
3380       */
3381       wait->m_nesting_event_id= 0;
3382       /* no need to set wait->m_nesting_event_type */
3383 
3384       wait->m_thread= pfs_thread;
3385       wait->m_class= &global_idle_class;
3386       wait->m_timer_start= timer_start;
3387       wait->m_timer_end= 0;
3388       wait->m_event_id= pfs_thread->m_event_id++;
3389       wait->m_end_event_id= 0;
3390       wait->m_operation= OPERATION_TYPE_IDLE;
3391       wait->m_source_file= src_file;
3392       wait->m_source_line= src_line;
3393       wait->m_wait_class= WAIT_CLASS_IDLE;
3394 
3395       pfs_thread->m_events_waits_current++;
3396     }
3397   }
3398   else
3399   {
3400     if (global_idle_class.m_timed)
3401     {
3402       timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3403       state->m_timer_start= timer_start;
3404       flags= STATE_FLAG_TIMED;
3405     }
3406   }
3407 
3408   state->m_flags= flags;
3409   return reinterpret_cast<PSI_idle_locker*> (state);
3410 }
3411 
3412 /**
3413   Implementation of the mutex instrumentation interface.
3414   @sa PSI_v1::end_idle_wait.
3415 */
end_idle_wait_v1(PSI_idle_locker * locker)3416 static void end_idle_wait_v1(PSI_idle_locker* locker)
3417 {
3418   PSI_idle_locker_state *state= reinterpret_cast<PSI_idle_locker_state*> (locker);
3419   DBUG_ASSERT(state != NULL);
3420   ulonglong timer_end= 0;
3421   ulonglong wait_time= 0;
3422 
3423   uint flags= state->m_flags;
3424 
3425   if (flags & STATE_FLAG_TIMED)
3426   {
3427     timer_end= state->m_timer();
3428     wait_time= timer_end - state->m_timer_start;
3429   }
3430 
3431   if (flags & STATE_FLAG_THREAD)
3432   {
3433     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3434     PFS_single_stat *event_name_array;
3435     event_name_array= thread->m_instr_class_waits_stats;
3436 
3437     if (flags & STATE_FLAG_TIMED)
3438     {
3439       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3440       event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_value(wait_time);
3441     }
3442     else
3443     {
3444       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3445       event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_counted();
3446     }
3447 
3448     if (flags & STATE_FLAG_EVENT)
3449     {
3450       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3451       DBUG_ASSERT(wait != NULL);
3452 
3453       wait->m_timer_end= timer_end;
3454       wait->m_end_event_id= thread->m_event_id;
3455       if (flag_events_waits_history)
3456         insert_events_waits_history(thread, wait);
3457       if (flag_events_waits_history_long)
3458         insert_events_waits_history_long(wait);
3459       thread->m_events_waits_current--;
3460 
3461       DBUG_ASSERT(wait == thread->m_events_waits_current);
3462     }
3463   }
3464 
3465   if (flags & STATE_FLAG_TIMED)
3466   {
3467     /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (timed) */
3468     global_idle_stat.aggregate_value(wait_time);
3469   }
3470   else
3471   {
3472     /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (counted) */
3473     global_idle_stat.aggregate_counted();
3474   }
3475 }
3476 
3477 /**
3478   Implementation of the mutex instrumentation interface.
3479   @sa PSI_v1::end_mutex_wait.
3480 */
end_mutex_wait_v1(PSI_mutex_locker * locker,int rc)3481 static void end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
3482 {
3483   PSI_mutex_locker_state *state= reinterpret_cast<PSI_mutex_locker_state*> (locker);
3484   DBUG_ASSERT(state != NULL);
3485 
3486   ulonglong timer_end= 0;
3487   ulonglong wait_time= 0;
3488 
3489   PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex);
3490   DBUG_ASSERT(mutex != NULL);
3491   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3492 
3493   uint flags= state->m_flags;
3494 
3495   if (flags & STATE_FLAG_TIMED)
3496   {
3497     timer_end= state->m_timer();
3498     wait_time= timer_end - state->m_timer_start;
3499     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3500     mutex->m_mutex_stat.m_wait_stat.aggregate_value(wait_time);
3501   }
3502   else
3503   {
3504     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3505     mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
3506   }
3507 
3508   if (likely(rc == 0))
3509   {
3510     mutex->m_owner= thread;
3511     mutex->m_last_locked= timer_end;
3512   }
3513 
3514   if (flags & STATE_FLAG_THREAD)
3515   {
3516     PFS_single_stat *event_name_array;
3517     event_name_array= thread->m_instr_class_waits_stats;
3518     uint index= mutex->m_class->m_event_name_index;
3519 
3520     if (flags & STATE_FLAG_TIMED)
3521     {
3522       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3523       event_name_array[index].aggregate_value(wait_time);
3524     }
3525     else
3526     {
3527       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3528       event_name_array[index].aggregate_counted();
3529     }
3530 
3531     if (flags & STATE_FLAG_EVENT)
3532     {
3533       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3534       DBUG_ASSERT(wait != NULL);
3535 
3536       wait->m_timer_end= timer_end;
3537       wait->m_end_event_id= thread->m_event_id;
3538       if (flag_events_waits_history)
3539         insert_events_waits_history(thread, wait);
3540       if (flag_events_waits_history_long)
3541         insert_events_waits_history_long(wait);
3542       thread->m_events_waits_current--;
3543 
3544       DBUG_ASSERT(wait == thread->m_events_waits_current);
3545     }
3546   }
3547 }
3548 
3549 /**
3550   Implementation of the rwlock instrumentation interface.
3551   @sa PSI_v1::end_rwlock_rdwait.
3552 */
end_rwlock_rdwait_v1(PSI_rwlock_locker * locker,int rc)3553 static void end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
3554 {
3555   PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3556   DBUG_ASSERT(state != NULL);
3557 
3558   ulonglong timer_end= 0;
3559   ulonglong wait_time= 0;
3560 
3561   PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3562   DBUG_ASSERT(rwlock != NULL);
3563 
3564   if (state->m_flags & STATE_FLAG_TIMED)
3565   {
3566     timer_end= state->m_timer();
3567     wait_time= timer_end - state->m_timer_start;
3568     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3569     rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3570   }
3571   else
3572   {
3573     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3574     rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3575   }
3576 
3577   if (rc == 0)
3578   {
3579     /*
3580       Warning:
3581       Multiple threads can execute this section concurrently
3582       (since multiple readers can execute in parallel).
3583       The statistics generated are not safe, which is why they are
3584       just statistics, not facts.
3585     */
3586     if (rwlock->m_readers == 0)
3587       rwlock->m_last_read= timer_end;
3588     rwlock->m_writer= NULL;
3589     rwlock->m_readers++;
3590   }
3591 
3592   if (state->m_flags & STATE_FLAG_THREAD)
3593   {
3594     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3595     DBUG_ASSERT(thread != NULL);
3596 
3597     PFS_single_stat *event_name_array;
3598     event_name_array= thread->m_instr_class_waits_stats;
3599     uint index= rwlock->m_class->m_event_name_index;
3600 
3601     if (state->m_flags & STATE_FLAG_TIMED)
3602     {
3603       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3604       event_name_array[index].aggregate_value(wait_time);
3605     }
3606     else
3607     {
3608       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3609       event_name_array[index].aggregate_counted();
3610     }
3611 
3612     if (state->m_flags & STATE_FLAG_EVENT)
3613     {
3614       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3615       DBUG_ASSERT(wait != NULL);
3616 
3617       wait->m_timer_end= timer_end;
3618       wait->m_end_event_id= thread->m_event_id;
3619       if (flag_events_waits_history)
3620         insert_events_waits_history(thread, wait);
3621       if (flag_events_waits_history_long)
3622         insert_events_waits_history_long(wait);
3623       thread->m_events_waits_current--;
3624 
3625       DBUG_ASSERT(wait == thread->m_events_waits_current);
3626     }
3627   }
3628 }
3629 
3630 /**
3631   Implementation of the rwlock instrumentation interface.
3632   @sa PSI_v1::end_rwlock_wrwait.
3633 */
end_rwlock_wrwait_v1(PSI_rwlock_locker * locker,int rc)3634 static void end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
3635 {
3636   PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3637   DBUG_ASSERT(state != NULL);
3638 
3639   ulonglong timer_end= 0;
3640   ulonglong wait_time= 0;
3641 
3642   PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3643   DBUG_ASSERT(rwlock != NULL);
3644   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3645 
3646   if (state->m_flags & STATE_FLAG_TIMED)
3647   {
3648     timer_end= state->m_timer();
3649     wait_time= timer_end - state->m_timer_start;
3650     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3651     rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3652   }
3653   else
3654   {
3655     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3656     rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3657   }
3658 
3659   if (likely(rc == 0))
3660   {
3661     /* Thread safe : we are protected by the instrumented rwlock */
3662     rwlock->m_writer= thread;
3663     rwlock->m_last_written= timer_end;
3664     /* Reset the readers stats, they could be off */
3665     rwlock->m_readers= 0;
3666     rwlock->m_last_read= 0;
3667   }
3668 
3669   if (state->m_flags & STATE_FLAG_THREAD)
3670   {
3671     PFS_single_stat *event_name_array;
3672     event_name_array= thread->m_instr_class_waits_stats;
3673     uint index= rwlock->m_class->m_event_name_index;
3674 
3675     if (state->m_flags & STATE_FLAG_TIMED)
3676     {
3677       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3678       event_name_array[index].aggregate_value(wait_time);
3679     }
3680     else
3681     {
3682       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3683       event_name_array[index].aggregate_counted();
3684     }
3685 
3686     if (state->m_flags & STATE_FLAG_EVENT)
3687     {
3688       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3689       DBUG_ASSERT(wait != NULL);
3690 
3691       wait->m_timer_end= timer_end;
3692       wait->m_end_event_id= thread->m_event_id;
3693       if (flag_events_waits_history)
3694         insert_events_waits_history(thread, wait);
3695       if (flag_events_waits_history_long)
3696         insert_events_waits_history_long(wait);
3697       thread->m_events_waits_current--;
3698 
3699       DBUG_ASSERT(wait == thread->m_events_waits_current);
3700     }
3701   }
3702 }
3703 
3704 /**
3705   Implementation of the cond instrumentation interface.
3706   @sa PSI_v1::end_cond_wait.
3707 */
end_cond_wait_v1(PSI_cond_locker * locker,int rc)3708 static void end_cond_wait_v1(PSI_cond_locker* locker, int rc)
3709 {
3710   PSI_cond_locker_state *state= reinterpret_cast<PSI_cond_locker_state*> (locker);
3711   DBUG_ASSERT(state != NULL);
3712 
3713   ulonglong timer_end= 0;
3714   ulonglong wait_time= 0;
3715 
3716   PFS_cond *cond= reinterpret_cast<PFS_cond *> (state->m_cond);
3717   /* PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex); */
3718 
3719   if (state->m_flags & STATE_FLAG_TIMED)
3720   {
3721     timer_end= state->m_timer();
3722     wait_time= timer_end - state->m_timer_start;
3723     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3724     cond->m_cond_stat.m_wait_stat.aggregate_value(wait_time);
3725   }
3726   else
3727   {
3728     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3729     cond->m_cond_stat.m_wait_stat.aggregate_counted();
3730   }
3731 
3732   if (state->m_flags & STATE_FLAG_THREAD)
3733   {
3734     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3735     DBUG_ASSERT(thread != NULL);
3736 
3737     PFS_single_stat *event_name_array;
3738     event_name_array= thread->m_instr_class_waits_stats;
3739     uint index= cond->m_class->m_event_name_index;
3740 
3741     if (state->m_flags & STATE_FLAG_TIMED)
3742     {
3743       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3744       event_name_array[index].aggregate_value(wait_time);
3745     }
3746     else
3747     {
3748       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3749       event_name_array[index].aggregate_counted();
3750     }
3751 
3752     if (state->m_flags & STATE_FLAG_EVENT)
3753     {
3754       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3755       DBUG_ASSERT(wait != NULL);
3756 
3757       wait->m_timer_end= timer_end;
3758       wait->m_end_event_id= thread->m_event_id;
3759       if (flag_events_waits_history)
3760         insert_events_waits_history(thread, wait);
3761       if (flag_events_waits_history_long)
3762         insert_events_waits_history_long(wait);
3763       thread->m_events_waits_current--;
3764 
3765       DBUG_ASSERT(wait == thread->m_events_waits_current);
3766     }
3767   }
3768 }
3769 
3770 /**
3771   Implementation of the table instrumentation interface.
3772   @sa PSI_v1::end_table_io_wait.
3773 */
end_table_io_wait_v1(PSI_table_locker * locker)3774 static void end_table_io_wait_v1(PSI_table_locker* locker)
3775 {
3776   PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3777   DBUG_ASSERT(state != NULL);
3778 
3779   ulonglong timer_end= 0;
3780   ulonglong wait_time= 0;
3781 
3782   PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3783   DBUG_ASSERT(table != NULL);
3784 
3785   PFS_single_stat *stat;
3786   PFS_table_io_stat *table_io_stat;
3787 
3788   DBUG_ASSERT((state->m_index < table->m_share->m_key_count) ||
3789               (state->m_index == MAX_INDEXES));
3790 
3791   table_io_stat= & table->m_table_stat.m_index_stat[state->m_index];
3792   table_io_stat->m_has_data= true;
3793 
3794   switch (state->m_io_operation)
3795   {
3796   case PSI_TABLE_FETCH_ROW:
3797     stat= & table_io_stat->m_fetch;
3798     break;
3799   case PSI_TABLE_WRITE_ROW:
3800     stat= & table_io_stat->m_insert;
3801     break;
3802   case PSI_TABLE_UPDATE_ROW:
3803     stat= & table_io_stat->m_update;
3804     break;
3805   case PSI_TABLE_DELETE_ROW:
3806     stat= & table_io_stat->m_delete;
3807     break;
3808   default:
3809     DBUG_ASSERT(false);
3810     stat= NULL;
3811     break;
3812   }
3813 
3814   uint flags= state->m_flags;
3815 
3816   if (flags & STATE_FLAG_TIMED)
3817   {
3818     timer_end= state->m_timer();
3819     wait_time= timer_end - state->m_timer_start;
3820     stat->aggregate_value(wait_time);
3821   }
3822   else
3823   {
3824     stat->aggregate_counted();
3825   }
3826 
3827   if (flags & STATE_FLAG_THREAD)
3828   {
3829     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3830     DBUG_ASSERT(thread != NULL);
3831 
3832     PFS_single_stat *event_name_array;
3833     event_name_array= thread->m_instr_class_waits_stats;
3834 
3835     /*
3836       Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3837       (for wait/io/table/sql/handler)
3838     */
3839     if (flags & STATE_FLAG_TIMED)
3840     {
3841       event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_value(wait_time);
3842     }
3843     else
3844     {
3845       event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_counted();
3846     }
3847 
3848     if (flags & STATE_FLAG_EVENT)
3849     {
3850       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3851       DBUG_ASSERT(wait != NULL);
3852 
3853       wait->m_timer_end= timer_end;
3854       wait->m_end_event_id= thread->m_event_id;
3855       if (flag_events_waits_history)
3856         insert_events_waits_history(thread, wait);
3857       if (flag_events_waits_history_long)
3858         insert_events_waits_history_long(wait);
3859       thread->m_events_waits_current--;
3860 
3861       DBUG_ASSERT(wait == thread->m_events_waits_current);
3862     }
3863   }
3864 
3865   table->m_has_io_stats= true;
3866 }
3867 
3868 /**
3869   Implementation of the table instrumentation interface.
3870   @sa PSI_v1::end_table_lock_wait.
3871 */
end_table_lock_wait_v1(PSI_table_locker * locker)3872 static void end_table_lock_wait_v1(PSI_table_locker* locker)
3873 {
3874   PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3875   DBUG_ASSERT(state != NULL);
3876 
3877   ulonglong timer_end= 0;
3878   ulonglong wait_time= 0;
3879 
3880   PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3881   DBUG_ASSERT(table != NULL);
3882 
3883   PFS_single_stat *stat= & table->m_table_stat.m_lock_stat.m_stat[state->m_index];
3884 
3885   uint flags= state->m_flags;
3886 
3887   if (flags & STATE_FLAG_TIMED)
3888   {
3889     timer_end= state->m_timer();
3890     wait_time= timer_end - state->m_timer_start;
3891     stat->aggregate_value(wait_time);
3892   }
3893   else
3894   {
3895     stat->aggregate_counted();
3896   }
3897 
3898   if (flags & STATE_FLAG_THREAD)
3899   {
3900     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3901     DBUG_ASSERT(thread != NULL);
3902 
3903     PFS_single_stat *event_name_array;
3904     event_name_array= thread->m_instr_class_waits_stats;
3905 
3906     /*
3907       Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3908       (for wait/lock/table/sql/handler)
3909     */
3910     if (flags & STATE_FLAG_TIMED)
3911     {
3912       event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_value(wait_time);
3913     }
3914     else
3915     {
3916       event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_counted();
3917     }
3918 
3919     if (flags & STATE_FLAG_EVENT)
3920     {
3921       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3922       DBUG_ASSERT(wait != NULL);
3923 
3924       wait->m_timer_end= timer_end;
3925       wait->m_end_event_id= thread->m_event_id;
3926       if (flag_events_waits_history)
3927         insert_events_waits_history(thread, wait);
3928       if (flag_events_waits_history_long)
3929         insert_events_waits_history_long(wait);
3930       thread->m_events_waits_current--;
3931 
3932       DBUG_ASSERT(wait == thread->m_events_waits_current);
3933     }
3934   }
3935 
3936   table->m_has_lock_stats= true;
3937 }
3938 
3939 static void start_file_wait_v1(PSI_file_locker *locker,
3940                                size_t count,
3941                                const char *src_file,
3942                                uint src_line);
3943 
3944 static void end_file_wait_v1(PSI_file_locker *locker,
3945                              size_t count);
3946 
3947 /**
3948   Implementation of the file instrumentation interface.
3949   @sa PSI_v1::start_file_open_wait.
3950 */
start_file_open_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)3951 static void start_file_open_wait_v1(PSI_file_locker *locker,
3952                                     const char *src_file,
3953                                     uint src_line)
3954 {
3955   start_file_wait_v1(locker, 0, src_file, src_line);
3956 
3957   return;
3958 }
3959 
3960 /**
3961   Implementation of the file instrumentation interface.
3962   @sa PSI_v1::end_file_open_wait.
3963 */
end_file_open_wait_v1(PSI_file_locker * locker,void * result)3964 static PSI_file* end_file_open_wait_v1(PSI_file_locker *locker,
3965                                        void *result)
3966 {
3967   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3968   DBUG_ASSERT(state != NULL);
3969 
3970   switch (state->m_operation)
3971   {
3972   case PSI_FILE_STAT:
3973   case PSI_FILE_RENAME:
3974     break;
3975   case PSI_FILE_STREAM_OPEN:
3976   case PSI_FILE_CREATE:
3977   case PSI_FILE_OPEN:
3978     if (result != NULL)
3979     {
3980       PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
3981       PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
3982       const char *name= state->m_name;
3983       uint len= strlen(name);
3984       PFS_file *pfs_file= find_or_create_file(thread, klass, name, len, true);
3985       state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3986     }
3987     break;
3988   default:
3989     DBUG_ASSERT(false);
3990     break;
3991   }
3992 
3993   end_file_wait_v1(locker, 0);
3994 
3995   return state->m_file;
3996 }
3997 
3998 /**
3999   Implementation of the file instrumentation interface.
4000   @sa PSI_v1::end_file_open_wait_and_bind_to_descriptor.
4001 */
end_file_open_wait_and_bind_to_descriptor_v1(PSI_file_locker * locker,File file)4002 static void end_file_open_wait_and_bind_to_descriptor_v1
4003   (PSI_file_locker *locker, File file)
4004 {
4005   PFS_file *pfs_file= NULL;
4006   int index= (int) file;
4007   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4008   DBUG_ASSERT(state != NULL);
4009 
4010   if (index >= 0)
4011   {
4012     PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
4013     PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4014     const char *name= state->m_name;
4015     uint len= strlen(name);
4016     pfs_file= find_or_create_file(thread, klass, name, len, true);
4017     state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4018   }
4019 
4020   end_file_wait_v1(locker, 0);
4021 
4022   if (likely(index >= 0))
4023   {
4024     if (likely(index < file_handle_max))
4025       file_handle_array[index]= pfs_file;
4026     else
4027     {
4028       if (pfs_file != NULL)
4029         release_file(pfs_file);
4030       file_handle_lost++;
4031     }
4032   }
4033 }
4034 
4035 /**
4036   Implementation of the file instrumentation interface.
4037   @sa PSI_v1::start_file_wait.
4038 */
start_file_wait_v1(PSI_file_locker * locker,size_t count,const char * src_file,uint src_line)4039 static void start_file_wait_v1(PSI_file_locker *locker,
4040                                size_t count,
4041                                const char *src_file,
4042                                uint src_line)
4043 {
4044   ulonglong timer_start= 0;
4045   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4046   DBUG_ASSERT(state != NULL);
4047 
4048   uint flags= state->m_flags;
4049 
4050   if (flags & STATE_FLAG_TIMED)
4051   {
4052     timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
4053     state->m_timer_start= timer_start;
4054   }
4055 
4056   if (flags & STATE_FLAG_EVENT)
4057   {
4058     PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4059     DBUG_ASSERT(wait != NULL);
4060 
4061     wait->m_timer_start= timer_start;
4062     wait->m_source_file= src_file;
4063     wait->m_source_line= src_line;
4064     wait->m_number_of_bytes= count;
4065   }
4066 }
4067 
4068 /**
4069   Implementation of the file instrumentation interface.
4070   @sa PSI_v1::end_file_wait.
4071 */
end_file_wait_v1(PSI_file_locker * locker,size_t byte_count)4072 static void end_file_wait_v1(PSI_file_locker *locker,
4073                              size_t byte_count)
4074 {
4075   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4076   DBUG_ASSERT(state != NULL);
4077   PFS_file *file= reinterpret_cast<PFS_file *> (state->m_file);
4078   PFS_file_class *klass= reinterpret_cast<PFS_file_class *> (state->m_class);
4079   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4080 
4081   ulonglong timer_end= 0;
4082   ulonglong wait_time= 0;
4083   PFS_byte_stat *byte_stat;
4084   uint flags= state->m_flags;
4085   size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
4086 
4087   PFS_file_stat *file_stat;
4088 
4089   if (file != NULL)
4090   {
4091     file_stat= & file->m_file_stat;
4092   }
4093   else
4094   {
4095     file_stat= & klass->m_file_stat;
4096   }
4097 
4098   switch (state->m_operation)
4099   {
4100     /* Group read operations */
4101     case PSI_FILE_READ:
4102       byte_stat= &file_stat->m_io_stat.m_read;
4103       break;
4104     /* Group write operations */
4105     case PSI_FILE_WRITE:
4106       byte_stat= &file_stat->m_io_stat.m_write;
4107       break;
4108     /* Group remaining operations as miscellaneous */
4109     case PSI_FILE_CREATE:
4110     case PSI_FILE_CREATE_TMP:
4111     case PSI_FILE_OPEN:
4112     case PSI_FILE_STREAM_OPEN:
4113     case PSI_FILE_STREAM_CLOSE:
4114     case PSI_FILE_SEEK:
4115     case PSI_FILE_TELL:
4116     case PSI_FILE_FLUSH:
4117     case PSI_FILE_FSTAT:
4118     case PSI_FILE_CHSIZE:
4119     case PSI_FILE_DELETE:
4120     case PSI_FILE_RENAME:
4121     case PSI_FILE_SYNC:
4122     case PSI_FILE_STAT:
4123     case PSI_FILE_CLOSE:
4124       byte_stat= &file_stat->m_io_stat.m_misc;
4125       break;
4126     default:
4127       DBUG_ASSERT(false);
4128       byte_stat= NULL;
4129       break;
4130   }
4131 
4132   /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
4133   if (flags & STATE_FLAG_TIMED)
4134   {
4135     timer_end= state->m_timer();
4136     wait_time= timer_end - state->m_timer_start;
4137     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
4138     byte_stat->aggregate(wait_time, bytes);
4139   }
4140   else
4141   {
4142     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
4143     byte_stat->aggregate_counted(bytes);
4144   }
4145 
4146   if (flags & STATE_FLAG_THREAD)
4147   {
4148     DBUG_ASSERT(thread != NULL);
4149 
4150     PFS_single_stat *event_name_array;
4151     event_name_array= thread->m_instr_class_waits_stats;
4152     uint index= klass->m_event_name_index;
4153 
4154     if (flags & STATE_FLAG_TIMED)
4155     {
4156       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4157       event_name_array[index].aggregate_value(wait_time);
4158     }
4159     else
4160     {
4161       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4162       event_name_array[index].aggregate_counted();
4163     }
4164 
4165     if (state->m_flags & STATE_FLAG_EVENT)
4166     {
4167       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4168       DBUG_ASSERT(wait != NULL);
4169 
4170       wait->m_timer_end= timer_end;
4171       wait->m_number_of_bytes= bytes;
4172       wait->m_end_event_id= thread->m_event_id;
4173       wait->m_object_instance_addr= file;
4174       wait->m_weak_file= file;
4175       wait->m_weak_version= (file ? file->get_version() : 0);
4176 
4177       if (flag_events_waits_history)
4178         insert_events_waits_history(thread, wait);
4179       if (flag_events_waits_history_long)
4180         insert_events_waits_history_long(wait);
4181       thread->m_events_waits_current--;
4182 
4183       DBUG_ASSERT(wait == thread->m_events_waits_current);
4184     }
4185   }
4186 }
4187 
4188 /**
4189   Implementation of the file instrumentation interface.
4190   @sa PSI_v1::start_file_close_wait.
4191 */
start_file_close_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)4192 static void start_file_close_wait_v1(PSI_file_locker *locker,
4193                                      const char *src_file,
4194                                      uint src_line)
4195 {
4196   PFS_thread *thread;
4197   const char *name;
4198   uint len;
4199   PFS_file *pfs_file;
4200   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4201   DBUG_ASSERT(state != NULL);
4202 
4203   switch (state->m_operation)
4204   {
4205   case PSI_FILE_DELETE:
4206     thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4207     name= state->m_name;
4208     len= strlen(name);
4209     pfs_file= find_or_create_file(thread, NULL, name, len, false);
4210     state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4211     break;
4212   case PSI_FILE_STREAM_CLOSE:
4213   case PSI_FILE_CLOSE:
4214     break;
4215   default:
4216     DBUG_ASSERT(false);
4217     break;
4218   }
4219 
4220   start_file_wait_v1(locker, 0, src_file, src_line);
4221 
4222   return;
4223 }
4224 
4225 /**
4226   Implementation of the file instrumentation interface.
4227   @sa PSI_v1::end_file_close_wait.
4228 */
end_file_close_wait_v1(PSI_file_locker * locker,int rc)4229 static void end_file_close_wait_v1(PSI_file_locker *locker, int rc)
4230 {
4231   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4232   DBUG_ASSERT(state != NULL);
4233 
4234   end_file_wait_v1(locker, 0);
4235 
4236   if (rc == 0)
4237   {
4238     PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4239     PFS_file *file= reinterpret_cast<PFS_file*> (state->m_file);
4240 
4241     /* Release or destroy the file if necessary */
4242     switch(state->m_operation)
4243     {
4244     case PSI_FILE_CLOSE:
4245     case PSI_FILE_STREAM_CLOSE:
4246       if (file != NULL)
4247         release_file(file);
4248       break;
4249     case PSI_FILE_DELETE:
4250       if (file != NULL)
4251         destroy_file(thread, file);
4252       break;
4253     default:
4254       DBUG_ASSERT(false);
4255       break;
4256     }
4257   }
4258   return;
4259 }
4260 
start_stage_v1(PSI_stage_key key,const char * src_file,int src_line)4261 static void start_stage_v1(PSI_stage_key key, const char *src_file, int src_line)
4262 {
4263   ulonglong timer_value= 0;
4264 
4265   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4266   if (unlikely(pfs_thread == NULL))
4267     return;
4268 
4269   /* Always update column threads.processlist_state. */
4270   pfs_thread->m_stage= key;
4271 
4272   if (! flag_global_instrumentation)
4273     return;
4274 
4275   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4276     return;
4277 
4278   PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4279   PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4280   PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4281 
4282   PFS_instr_class *old_class= pfs->m_class;
4283   if (old_class != NULL)
4284   {
4285     PFS_stage_stat *event_name_array;
4286     event_name_array= pfs_thread->m_instr_class_stages_stats;
4287     uint index= old_class->m_event_name_index;
4288 
4289     /* Finish old event */
4290     if (old_class->m_timed)
4291     {
4292       timer_value= get_timer_raw_value(stage_timer);;
4293       pfs->m_timer_end= timer_value;
4294 
4295       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4296       ulonglong stage_time= timer_value - pfs->m_timer_start;
4297       event_name_array[index].aggregate_value(stage_time);
4298     }
4299     else
4300     {
4301       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4302       event_name_array[index].aggregate_counted();
4303     }
4304 
4305     if (flag_events_stages_current)
4306     {
4307       pfs->m_end_event_id= pfs_thread->m_event_id;
4308       if (flag_events_stages_history)
4309         insert_events_stages_history(pfs_thread, pfs);
4310       if (flag_events_stages_history_long)
4311         insert_events_stages_history_long(pfs);
4312     }
4313 
4314     /* This stage event is now complete. */
4315     pfs->m_class= NULL;
4316 
4317     /* New waits will now be attached directly to the parent statement. */
4318     child_wait->m_event_id= parent_statement->m_event_id;
4319     child_wait->m_event_type= parent_statement->m_event_type;
4320     /* See below for new stages, that may overwrite this. */
4321   }
4322 
4323   /* Start new event */
4324 
4325   PFS_stage_class *new_klass= find_stage_class(key);
4326   if (unlikely(new_klass == NULL))
4327     return;
4328 
4329   if (! new_klass->m_enabled)
4330     return;
4331 
4332   pfs->m_class= new_klass;
4333   if (new_klass->m_timed)
4334   {
4335     /*
4336       Do not call the timer again if we have a
4337       TIMER_END for the previous stage already.
4338     */
4339     if (timer_value == 0)
4340       timer_value= get_timer_raw_value(stage_timer);
4341     pfs->m_timer_start= timer_value;
4342   }
4343   else
4344     pfs->m_timer_start= 0;
4345   pfs->m_timer_end= 0;
4346 
4347   if (flag_events_stages_current)
4348   {
4349     /* m_thread_internal_id is immutable and already set */
4350     DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4351     pfs->m_event_id= pfs_thread->m_event_id++;
4352     pfs->m_end_event_id= 0;
4353     pfs->m_source_file= src_file;
4354     pfs->m_source_line= src_line;
4355 
4356     /* New wait events will have this new stage as parent. */
4357     child_wait->m_event_id= pfs->m_event_id;
4358     child_wait->m_event_type= EVENT_TYPE_STAGE;
4359   }
4360 }
4361 
end_stage_v1()4362 static void end_stage_v1()
4363 {
4364   ulonglong timer_value= 0;
4365 
4366   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4367   if (unlikely(pfs_thread == NULL))
4368     return;
4369 
4370   pfs_thread->m_stage= 0;
4371 
4372   if (! flag_global_instrumentation)
4373     return;
4374 
4375   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4376     return;
4377 
4378   PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4379 
4380   PFS_instr_class *old_class= pfs->m_class;
4381   if (old_class != NULL)
4382   {
4383     PFS_stage_stat *event_name_array;
4384     event_name_array= pfs_thread->m_instr_class_stages_stats;
4385     uint index= old_class->m_event_name_index;
4386 
4387     /* Finish old event */
4388     if (old_class->m_timed)
4389     {
4390       timer_value= get_timer_raw_value(stage_timer);;
4391       pfs->m_timer_end= timer_value;
4392 
4393       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4394       ulonglong stage_time= timer_value - pfs->m_timer_start;
4395       event_name_array[index].aggregate_value(stage_time);
4396     }
4397     else
4398     {
4399       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4400       event_name_array[index].aggregate_counted();
4401     }
4402 
4403     if (flag_events_stages_current)
4404     {
4405       pfs->m_end_event_id= pfs_thread->m_event_id;
4406       if (flag_events_stages_history)
4407         insert_events_stages_history(pfs_thread, pfs);
4408       if (flag_events_stages_history_long)
4409         insert_events_stages_history_long(pfs);
4410     }
4411 
4412     /* New waits will now be attached directly to the parent statement. */
4413     PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4414     PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4415     child_wait->m_event_id= parent_statement->m_event_id;
4416     child_wait->m_event_type= parent_statement->m_event_type;
4417 
4418     /* This stage is completed */
4419     pfs->m_class= NULL;
4420   }
4421 }
4422 
4423 static PSI_statement_locker*
get_thread_statement_locker_v1(PSI_statement_locker_state * state,PSI_statement_key key,const void * charset)4424 get_thread_statement_locker_v1(PSI_statement_locker_state *state,
4425                                PSI_statement_key key,
4426                                const void *charset)
4427 {
4428   DBUG_ASSERT(state != NULL);
4429   DBUG_ASSERT(charset != NULL);
4430 
4431   if (! flag_global_instrumentation)
4432     return NULL;
4433   PFS_statement_class *klass= find_statement_class(key);
4434   if (unlikely(klass == NULL))
4435     return NULL;
4436   if (! klass->m_enabled)
4437     return NULL;
4438 
4439   uint flags;
4440 
4441   if (flag_thread_instrumentation)
4442   {
4443     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4444     if (unlikely(pfs_thread == NULL))
4445       return NULL;
4446     if (! pfs_thread->m_enabled)
4447       return NULL;
4448     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
4449     flags= STATE_FLAG_THREAD;
4450 
4451     if (klass->m_timed)
4452       flags|= STATE_FLAG_TIMED;
4453 
4454     if (flag_events_statements_current)
4455     {
4456       ulonglong event_id= pfs_thread->m_event_id++;
4457 
4458       if (pfs_thread->m_events_statements_count >= statement_stack_max)
4459       {
4460         return NULL;
4461       }
4462 
4463       pfs_thread->m_stmt_lock.allocated_to_dirty();
4464       PFS_events_statements *pfs= & pfs_thread->m_statement_stack[pfs_thread->m_events_statements_count];
4465       /* m_thread_internal_id is immutable and already set */
4466       DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4467       pfs->m_event_id= event_id;
4468       pfs->m_end_event_id= 0;
4469       pfs->m_class= klass;
4470       pfs->m_timer_start= 0;
4471       pfs->m_timer_end= 0;
4472       pfs->m_lock_time= 0;
4473       pfs->m_current_schema_name_length= 0;
4474       pfs->m_sqltext_length= 0;
4475       pfs->m_sqltext_truncated= false;
4476       pfs->m_sqltext_cs_number= system_charset_info->number; /* default */
4477 
4478       pfs->m_message_text[0]= '\0';
4479       pfs->m_sql_errno= 0;
4480       pfs->m_sqlstate[0]= '\0';
4481       pfs->m_error_count= 0;
4482       pfs->m_warning_count= 0;
4483       pfs->m_rows_affected= 0;
4484 
4485       pfs->m_rows_sent= 0;
4486       pfs->m_rows_examined= 0;
4487       pfs->m_created_tmp_disk_tables= 0;
4488       pfs->m_created_tmp_tables= 0;
4489       pfs->m_select_full_join= 0;
4490       pfs->m_select_full_range_join= 0;
4491       pfs->m_select_range= 0;
4492       pfs->m_select_range_check= 0;
4493       pfs->m_select_scan= 0;
4494       pfs->m_sort_merge_passes= 0;
4495       pfs->m_sort_range= 0;
4496       pfs->m_sort_rows= 0;
4497       pfs->m_sort_scan= 0;
4498       pfs->m_no_index_used= 0;
4499       pfs->m_no_good_index_used= 0;
4500       pfs->m_digest_storage.reset();
4501 
4502       /* New stages will have this statement as parent */
4503       PFS_events_stages *child_stage= & pfs_thread->m_stage_current;
4504       child_stage->m_nesting_event_id= event_id;
4505       child_stage->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4506 
4507       /* New waits will have this statement as parent, if no stage is instrumented */
4508       PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4509       child_wait->m_nesting_event_id= event_id;
4510       child_wait->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4511 
4512       state->m_statement= pfs;
4513       flags|= STATE_FLAG_EVENT;
4514 
4515       pfs_thread->m_events_statements_count++;
4516       pfs_thread->m_stmt_lock.dirty_to_allocated();
4517     }
4518   }
4519   else
4520   {
4521     if (klass->m_timed)
4522       flags= STATE_FLAG_TIMED;
4523     else
4524       flags= 0;
4525   }
4526 
4527   if (flag_statements_digest)
4528   {
4529     flags|= STATE_FLAG_DIGEST;
4530   }
4531 
4532   state->m_discarded= false;
4533   state->m_class= klass;
4534   state->m_flags= flags;
4535 
4536   state->m_lock_time= 0;
4537   state->m_rows_sent= 0;
4538   state->m_rows_examined= 0;
4539   state->m_created_tmp_disk_tables= 0;
4540   state->m_created_tmp_tables= 0;
4541   state->m_select_full_join= 0;
4542   state->m_select_full_range_join= 0;
4543   state->m_select_range= 0;
4544   state->m_select_range_check= 0;
4545   state->m_select_scan= 0;
4546   state->m_sort_merge_passes= 0;
4547   state->m_sort_range= 0;
4548   state->m_sort_rows= 0;
4549   state->m_sort_scan= 0;
4550   state->m_no_index_used= 0;
4551   state->m_no_good_index_used= 0;
4552 
4553   state->m_digest= NULL;
4554 
4555   state->m_schema_name_length= 0;
4556   state->m_cs_number= ((CHARSET_INFO *)charset)->number;
4557 
4558   return reinterpret_cast<PSI_statement_locker*> (state);
4559 }
4560 
4561 static PSI_statement_locker*
refine_statement_v1(PSI_statement_locker * locker,PSI_statement_key key)4562 refine_statement_v1(PSI_statement_locker *locker,
4563                     PSI_statement_key key)
4564 {
4565   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4566   if (state == NULL)
4567     return NULL;
4568   DBUG_ASSERT(state->m_class != NULL);
4569   PFS_statement_class *klass;
4570   /* Only refine statements for mutable instrumentation */
4571   klass= reinterpret_cast<PFS_statement_class*> (state->m_class);
4572   DBUG_ASSERT(klass->is_mutable());
4573   klass= find_statement_class(key);
4574 
4575   uint flags= state->m_flags;
4576 
4577   if (unlikely(klass == NULL) || !klass->m_enabled)
4578   {
4579     /* pop statement stack */
4580     if (flags & STATE_FLAG_THREAD)
4581     {
4582       PFS_thread *pfs_thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4583       DBUG_ASSERT(pfs_thread != NULL);
4584       if (pfs_thread->m_events_statements_count > 0)
4585         pfs_thread->m_events_statements_count--;
4586     }
4587 
4588     state->m_discarded= true;
4589     return NULL;
4590   }
4591 
4592   if ((flags & STATE_FLAG_TIMED) && ! klass->m_timed)
4593     flags= flags & ~STATE_FLAG_TIMED;
4594 
4595   if (flags & STATE_FLAG_EVENT)
4596   {
4597     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4598     DBUG_ASSERT(pfs != NULL);
4599 
4600     /* mutate EVENTS_STATEMENTS_CURRENT.EVENT_NAME */
4601     pfs->m_class= klass;
4602   }
4603 
4604   state->m_class= klass;
4605   state->m_flags= flags;
4606   return reinterpret_cast<PSI_statement_locker*> (state);
4607 }
4608 
start_statement_v1(PSI_statement_locker * locker,const char * db,uint db_len,const char * src_file,uint src_line)4609 static void start_statement_v1(PSI_statement_locker *locker,
4610                                const char *db, uint db_len,
4611                                const char *src_file, uint src_line)
4612 {
4613   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4614   DBUG_ASSERT(state != NULL);
4615 
4616   uint flags= state->m_flags;
4617   ulonglong timer_start= 0;
4618 
4619   if (flags & STATE_FLAG_TIMED)
4620   {
4621     timer_start= get_timer_raw_value_and_function(statement_timer, & state->m_timer);
4622     state->m_timer_start= timer_start;
4623   }
4624 
4625   compile_time_assert(PSI_SCHEMA_NAME_LEN == NAME_LEN);
4626   DBUG_ASSERT(db_len <= sizeof(state->m_schema_name));
4627 
4628   if (db_len > 0)
4629     memcpy(state->m_schema_name, db, db_len);
4630   state->m_schema_name_length= db_len;
4631 
4632   if (flags & STATE_FLAG_EVENT)
4633   {
4634     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4635     DBUG_ASSERT(pfs != NULL);
4636 
4637     pfs->m_timer_start= timer_start;
4638     pfs->m_source_file= src_file;
4639     pfs->m_source_line= src_line;
4640 
4641     DBUG_ASSERT(db_len <= sizeof(pfs->m_current_schema_name));
4642     if (db_len > 0)
4643       memcpy(pfs->m_current_schema_name, db, db_len);
4644     pfs->m_current_schema_name_length= db_len;
4645   }
4646 }
4647 
set_statement_text_v1(PSI_statement_locker * locker,const char * text,uint text_len)4648 static void set_statement_text_v1(PSI_statement_locker *locker,
4649                                   const char *text, uint text_len)
4650 {
4651   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4652   DBUG_ASSERT(state != NULL);
4653 
4654   if (state->m_discarded)
4655     return;
4656 
4657   if (state->m_flags & STATE_FLAG_EVENT)
4658   {
4659     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4660     DBUG_ASSERT(pfs != NULL);
4661     if (text_len > sizeof (pfs->m_sqltext))
4662     {
4663       text_len= sizeof(pfs->m_sqltext);
4664       pfs->m_sqltext_truncated= true;
4665     }
4666     if (text_len)
4667       memcpy(pfs->m_sqltext, text, text_len);
4668     pfs->m_sqltext_length= text_len;
4669     pfs->m_sqltext_cs_number= state->m_cs_number;
4670   }
4671 
4672   return;
4673 }
4674 
4675 #define SET_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE)                    \
4676   PSI_statement_locker_state *state;                                    \
4677   state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER);        \
4678   if (unlikely(state == NULL))                                          \
4679     return;                                                             \
4680   if (state->m_discarded)                                               \
4681     return;                                                             \
4682   state->ATTR= VALUE;                                                   \
4683   if (state->m_flags & STATE_FLAG_EVENT)                                \
4684   {                                                                     \
4685     PFS_events_statements *pfs;                                         \
4686     pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4687     DBUG_ASSERT(pfs != NULL);                                           \
4688     pfs->ATTR= VALUE;                                                   \
4689   }                                                                     \
4690   return;
4691 
4692 #define INC_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE)                    \
4693   PSI_statement_locker_state *state;                                    \
4694   state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER);        \
4695   if (unlikely(state == NULL))                                          \
4696     return;                                                             \
4697   if (state->m_discarded)                                               \
4698     return;                                                             \
4699   state->ATTR+= VALUE;                                                  \
4700   if (state->m_flags & STATE_FLAG_EVENT)                                \
4701   {                                                                     \
4702     PFS_events_statements *pfs;                                         \
4703     pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4704     DBUG_ASSERT(pfs != NULL);                                           \
4705     pfs->ATTR+= VALUE;                                                  \
4706   }                                                                     \
4707   return;
4708 
set_statement_lock_time_v1(PSI_statement_locker * locker,ulonglong count)4709 static void set_statement_lock_time_v1(PSI_statement_locker *locker,
4710                                        ulonglong count)
4711 {
4712   SET_STATEMENT_ATTR_BODY(locker, m_lock_time, count);
4713 }
4714 
set_statement_rows_sent_v1(PSI_statement_locker * locker,ulonglong count)4715 static void set_statement_rows_sent_v1(PSI_statement_locker *locker,
4716                                        ulonglong count)
4717 {
4718   SET_STATEMENT_ATTR_BODY(locker, m_rows_sent, count);
4719 }
4720 
set_statement_rows_examined_v1(PSI_statement_locker * locker,ulonglong count)4721 static void set_statement_rows_examined_v1(PSI_statement_locker *locker,
4722                                            ulonglong count)
4723 {
4724   SET_STATEMENT_ATTR_BODY(locker, m_rows_examined, count);
4725 }
4726 
inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker * locker,ulong count)4727 static void inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker *locker,
4728                                                     ulong count)
4729 {
4730   INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_disk_tables, count);
4731 }
4732 
inc_statement_created_tmp_tables_v1(PSI_statement_locker * locker,ulong count)4733 static void inc_statement_created_tmp_tables_v1(PSI_statement_locker *locker,
4734                                                 ulong count)
4735 {
4736   INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_tables, count);
4737 }
4738 
inc_statement_select_full_join_v1(PSI_statement_locker * locker,ulong count)4739 static void inc_statement_select_full_join_v1(PSI_statement_locker *locker,
4740                                               ulong count)
4741 {
4742   INC_STATEMENT_ATTR_BODY(locker, m_select_full_join, count);
4743 }
4744 
inc_statement_select_full_range_join_v1(PSI_statement_locker * locker,ulong count)4745 static void inc_statement_select_full_range_join_v1(PSI_statement_locker *locker,
4746                                                     ulong count)
4747 {
4748   INC_STATEMENT_ATTR_BODY(locker, m_select_full_range_join, count);
4749 }
4750 
inc_statement_select_range_v1(PSI_statement_locker * locker,ulong count)4751 static void inc_statement_select_range_v1(PSI_statement_locker *locker,
4752                                           ulong count)
4753 {
4754   INC_STATEMENT_ATTR_BODY(locker, m_select_range, count);
4755 }
4756 
inc_statement_select_range_check_v1(PSI_statement_locker * locker,ulong count)4757 static void inc_statement_select_range_check_v1(PSI_statement_locker *locker,
4758                                                 ulong count)
4759 {
4760   INC_STATEMENT_ATTR_BODY(locker, m_select_range_check, count);
4761 }
4762 
inc_statement_select_scan_v1(PSI_statement_locker * locker,ulong count)4763 static void inc_statement_select_scan_v1(PSI_statement_locker *locker,
4764                                          ulong count)
4765 {
4766   INC_STATEMENT_ATTR_BODY(locker, m_select_scan, count);
4767 }
4768 
inc_statement_sort_merge_passes_v1(PSI_statement_locker * locker,ulong count)4769 static void inc_statement_sort_merge_passes_v1(PSI_statement_locker *locker,
4770                                                ulong count)
4771 {
4772   INC_STATEMENT_ATTR_BODY(locker, m_sort_merge_passes, count);
4773 }
4774 
inc_statement_sort_range_v1(PSI_statement_locker * locker,ulong count)4775 static void inc_statement_sort_range_v1(PSI_statement_locker *locker,
4776                                         ulong count)
4777 {
4778   INC_STATEMENT_ATTR_BODY(locker, m_sort_range, count);
4779 }
4780 
inc_statement_sort_rows_v1(PSI_statement_locker * locker,ulong count)4781 static void inc_statement_sort_rows_v1(PSI_statement_locker *locker,
4782                                        ulong count)
4783 {
4784   INC_STATEMENT_ATTR_BODY(locker, m_sort_rows, count);
4785 }
4786 
inc_statement_sort_scan_v1(PSI_statement_locker * locker,ulong count)4787 static void inc_statement_sort_scan_v1(PSI_statement_locker *locker,
4788                                        ulong count)
4789 {
4790   INC_STATEMENT_ATTR_BODY(locker, m_sort_scan, count);
4791 }
4792 
set_statement_no_index_used_v1(PSI_statement_locker * locker)4793 static void set_statement_no_index_used_v1(PSI_statement_locker *locker)
4794 {
4795   SET_STATEMENT_ATTR_BODY(locker, m_no_index_used, 1);
4796 }
4797 
set_statement_no_good_index_used_v1(PSI_statement_locker * locker)4798 static void set_statement_no_good_index_used_v1(PSI_statement_locker *locker)
4799 {
4800   SET_STATEMENT_ATTR_BODY(locker, m_no_good_index_used, 1);
4801 }
4802 
end_statement_v1(PSI_statement_locker * locker,void * stmt_da)4803 static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
4804 {
4805   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4806   Diagnostics_area *da= reinterpret_cast<Diagnostics_area*> (stmt_da);
4807   DBUG_ASSERT(state != NULL);
4808   DBUG_ASSERT(da != NULL);
4809 
4810   if (state->m_discarded)
4811     return;
4812 
4813   PFS_statement_class *klass= reinterpret_cast<PFS_statement_class *> (state->m_class);
4814   DBUG_ASSERT(klass != NULL);
4815 
4816   ulonglong timer_end= 0;
4817   ulonglong wait_time= 0;
4818   uint flags= state->m_flags;
4819 
4820   if (flags & STATE_FLAG_TIMED)
4821   {
4822     timer_end= state->m_timer();
4823     wait_time= timer_end - state->m_timer_start;
4824   }
4825 
4826   PFS_statement_stat *event_name_array;
4827   uint index= klass->m_event_name_index;
4828   PFS_statement_stat *stat;
4829 
4830   /*
4831    Capture statement stats by digest.
4832   */
4833   const sql_digest_storage *digest_storage= NULL;
4834   PFS_statement_stat *digest_stat= NULL;
4835 
4836   if (flags & STATE_FLAG_THREAD)
4837   {
4838     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4839     DBUG_ASSERT(thread != NULL);
4840     event_name_array= thread->m_instr_class_statements_stats;
4841     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME */
4842     stat= & event_name_array[index];
4843 
4844     if (flags & STATE_FLAG_DIGEST)
4845     {
4846       digest_storage= state->m_digest;
4847 
4848       if (digest_storage != NULL)
4849       {
4850         /* Populate PFS_statements_digest_stat with computed digest information.*/
4851         digest_stat= find_or_create_digest(thread, digest_storage,
4852                                            state->m_schema_name,
4853                                            state->m_schema_name_length);
4854       }
4855     }
4856 
4857     if (flags & STATE_FLAG_EVENT)
4858     {
4859       PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4860       DBUG_ASSERT(pfs != NULL);
4861 
4862       thread->m_stmt_lock.allocated_to_dirty();
4863 
4864       switch(da->status())
4865       {
4866         case Diagnostics_area::DA_EMPTY:
4867           break;
4868         case Diagnostics_area::DA_OK:
4869           memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4870           pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4871           pfs->m_rows_affected= da->affected_rows();
4872           pfs->m_warning_count= da->statement_warn_count();
4873           memcpy(pfs->m_sqlstate, "00000", SQLSTATE_LENGTH);
4874           break;
4875         case Diagnostics_area::DA_EOF:
4876           pfs->m_warning_count= da->statement_warn_count();
4877           break;
4878         case Diagnostics_area::DA_ERROR:
4879           memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4880           pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4881           pfs->m_sql_errno= da->sql_errno();
4882           pfs->m_error_count++;
4883           memcpy(pfs->m_sqlstate, da->get_sqlstate(), SQLSTATE_LENGTH);
4884           break;
4885         case Diagnostics_area::DA_DISABLED:
4886           break;
4887       }
4888 
4889       pfs->m_timer_end= timer_end;
4890       pfs->m_end_event_id= thread->m_event_id;
4891 
4892       if (digest_storage != NULL)
4893       {
4894         /*
4895           The following columns in events_statement_current:
4896           - DIGEST,
4897           - DIGEST_TEXT
4898           are computed from the digest storage.
4899         */
4900         pfs->m_digest_storage.copy(digest_storage);
4901       }
4902 
4903       if (flag_events_statements_history)
4904         insert_events_statements_history(thread, pfs);
4905       if (flag_events_statements_history_long)
4906         insert_events_statements_history_long(pfs);
4907 
4908       DBUG_ASSERT(thread->m_events_statements_count > 0);
4909       thread->m_events_statements_count--;
4910       thread->m_stmt_lock.dirty_to_allocated();
4911     }
4912   }
4913   else
4914   {
4915     if (flags & STATE_FLAG_DIGEST)
4916     {
4917       PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4918 
4919       /* An instrumented thread is required, for LF_PINS. */
4920       if (thread != NULL)
4921       {
4922         /* Set digest stat. */
4923         digest_storage= state->m_digest;
4924 
4925         if (digest_storage != NULL)
4926         {
4927           /* Populate statements_digest_stat with computed digest information. */
4928           digest_stat= find_or_create_digest(thread, digest_storage,
4929                                              state->m_schema_name,
4930                                              state->m_schema_name_length);
4931         }
4932       }
4933     }
4934 
4935     event_name_array= global_instr_class_statements_array;
4936     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME */
4937     stat= & event_name_array[index];
4938   }
4939 
4940   if (flags & STATE_FLAG_TIMED)
4941   {
4942     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (timed) */
4943     stat->aggregate_value(wait_time);
4944   }
4945   else
4946   {
4947     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (counted) */
4948     stat->aggregate_counted();
4949   }
4950 
4951   stat->m_lock_time+= state->m_lock_time;
4952   stat->m_rows_sent+= state->m_rows_sent;
4953   stat->m_rows_examined+= state->m_rows_examined;
4954   stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4955   stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4956   stat->m_select_full_join+= state->m_select_full_join;
4957   stat->m_select_full_range_join+= state->m_select_full_range_join;
4958   stat->m_select_range+= state->m_select_range;
4959   stat->m_select_range_check+= state->m_select_range_check;
4960   stat->m_select_scan+= state->m_select_scan;
4961   stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4962   stat->m_sort_range+= state->m_sort_range;
4963   stat->m_sort_rows+= state->m_sort_rows;
4964   stat->m_sort_scan+= state->m_sort_scan;
4965   stat->m_no_index_used+= state->m_no_index_used;
4966   stat->m_no_good_index_used+= state->m_no_good_index_used;
4967 
4968   if (digest_stat != NULL)
4969   {
4970     if (flags & STATE_FLAG_TIMED)
4971     {
4972       digest_stat->aggregate_value(wait_time);
4973     }
4974     else
4975     {
4976       digest_stat->aggregate_counted();
4977     }
4978 
4979     digest_stat->m_lock_time+= state->m_lock_time;
4980     digest_stat->m_rows_sent+= state->m_rows_sent;
4981     digest_stat->m_rows_examined+= state->m_rows_examined;
4982     digest_stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4983     digest_stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4984     digest_stat->m_select_full_join+= state->m_select_full_join;
4985     digest_stat->m_select_full_range_join+= state->m_select_full_range_join;
4986     digest_stat->m_select_range+= state->m_select_range;
4987     digest_stat->m_select_range_check+= state->m_select_range_check;
4988     digest_stat->m_select_scan+= state->m_select_scan;
4989     digest_stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4990     digest_stat->m_sort_range+= state->m_sort_range;
4991     digest_stat->m_sort_rows+= state->m_sort_rows;
4992     digest_stat->m_sort_scan+= state->m_sort_scan;
4993     digest_stat->m_no_index_used+= state->m_no_index_used;
4994     digest_stat->m_no_good_index_used+= state->m_no_good_index_used;
4995   }
4996 
4997   switch (da->status())
4998   {
4999     case Diagnostics_area::DA_EMPTY:
5000       break;
5001     case Diagnostics_area::DA_OK:
5002       stat->m_rows_affected+= da->affected_rows();
5003       stat->m_warning_count+= da->statement_warn_count();
5004       if (digest_stat != NULL)
5005       {
5006         digest_stat->m_rows_affected+= da->affected_rows();
5007         digest_stat->m_warning_count+= da->statement_warn_count();
5008       }
5009       break;
5010     case Diagnostics_area::DA_EOF:
5011       stat->m_warning_count+= da->statement_warn_count();
5012       if (digest_stat != NULL)
5013       {
5014         digest_stat->m_warning_count+= da->statement_warn_count();
5015       }
5016       break;
5017     case Diagnostics_area::DA_ERROR:
5018       stat->m_error_count++;
5019       if (digest_stat != NULL)
5020       {
5021         digest_stat->m_error_count++;
5022       }
5023       break;
5024     case Diagnostics_area::DA_DISABLED:
5025       break;
5026   }
5027 }
5028 
5029 /**
5030   Implementation of the socket instrumentation interface.
5031   @sa PSI_v1::end_socket_wait.
5032 */
end_socket_wait_v1(PSI_socket_locker * locker,size_t byte_count)5033 static void end_socket_wait_v1(PSI_socket_locker *locker, size_t byte_count)
5034 {
5035   PSI_socket_locker_state *state= reinterpret_cast<PSI_socket_locker_state*> (locker);
5036   DBUG_ASSERT(state != NULL);
5037 
5038   PFS_socket *socket= reinterpret_cast<PFS_socket *>(state->m_socket);
5039   DBUG_ASSERT(socket != NULL);
5040 
5041   ulonglong timer_end= 0;
5042   ulonglong wait_time= 0;
5043   PFS_byte_stat *byte_stat;
5044   uint flags= state->m_flags;
5045   size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
5046 
5047   switch (state->m_operation)
5048   {
5049     /* Group read operations */
5050     case PSI_SOCKET_RECV:
5051     case PSI_SOCKET_RECVFROM:
5052     case PSI_SOCKET_RECVMSG:
5053       byte_stat= &socket->m_socket_stat.m_io_stat.m_read;
5054       break;
5055     /* Group write operations */
5056     case PSI_SOCKET_SEND:
5057     case PSI_SOCKET_SENDTO:
5058     case PSI_SOCKET_SENDMSG:
5059       byte_stat= &socket->m_socket_stat.m_io_stat.m_write;
5060       break;
5061     /* Group remaining operations as miscellaneous */
5062     case PSI_SOCKET_CONNECT:
5063     case PSI_SOCKET_CREATE:
5064     case PSI_SOCKET_BIND:
5065     case PSI_SOCKET_SEEK:
5066     case PSI_SOCKET_OPT:
5067     case PSI_SOCKET_STAT:
5068     case PSI_SOCKET_SHUTDOWN:
5069     case PSI_SOCKET_SELECT:
5070     case PSI_SOCKET_CLOSE:
5071       byte_stat= &socket->m_socket_stat.m_io_stat.m_misc;
5072       break;
5073     default:
5074       DBUG_ASSERT(false);
5075       byte_stat= NULL;
5076       break;
5077   }
5078 
5079   /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
5080   if (flags & STATE_FLAG_TIMED)
5081   {
5082     timer_end= state->m_timer();
5083     wait_time= timer_end - state->m_timer_start;
5084 
5085     /* Aggregate to the socket instrument for now (timed) */
5086     byte_stat->aggregate(wait_time, bytes);
5087   }
5088   else
5089   {
5090     /* Aggregate to the socket instrument (event count and byte count) */
5091     byte_stat->aggregate_counted(bytes);
5092   }
5093 
5094   /* Aggregate to EVENTS_WAITS_HISTORY and EVENTS_WAITS_HISTORY_LONG */
5095   if (flags & STATE_FLAG_EVENT)
5096   {
5097     PFS_thread *thread= reinterpret_cast<PFS_thread *>(state->m_thread);
5098     DBUG_ASSERT(thread != NULL);
5099     PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
5100     DBUG_ASSERT(wait != NULL);
5101 
5102     wait->m_timer_end= timer_end;
5103     wait->m_end_event_id= thread->m_event_id;
5104     wait->m_number_of_bytes= bytes;
5105 
5106     if (flag_events_waits_history)
5107       insert_events_waits_history(thread, wait);
5108     if (flag_events_waits_history_long)
5109       insert_events_waits_history_long(wait);
5110     thread->m_events_waits_current--;
5111 
5112     DBUG_ASSERT(wait == thread->m_events_waits_current);
5113   }
5114 }
5115 
set_socket_state_v1(PSI_socket * socket,PSI_socket_state state)5116 static void set_socket_state_v1(PSI_socket *socket, PSI_socket_state state)
5117 {
5118   DBUG_ASSERT((state == PSI_SOCKET_STATE_IDLE) || (state == PSI_SOCKET_STATE_ACTIVE));
5119   PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5120   DBUG_ASSERT(pfs != NULL);
5121   DBUG_ASSERT(pfs->m_idle || (state == PSI_SOCKET_STATE_IDLE));
5122   DBUG_ASSERT(!pfs->m_idle || (state == PSI_SOCKET_STATE_ACTIVE));
5123   pfs->m_idle= (state == PSI_SOCKET_STATE_IDLE);
5124 }
5125 
5126 /**
5127   Set socket descriptor and address info.
5128 */
set_socket_info_v1(PSI_socket * socket,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)5129 static void set_socket_info_v1(PSI_socket *socket,
5130                                const my_socket *fd,
5131                                const struct sockaddr *addr,
5132                                socklen_t addr_len)
5133 {
5134   PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5135   DBUG_ASSERT(pfs != NULL);
5136 
5137   /** Set socket descriptor */
5138   if (fd != NULL)
5139     pfs->m_fd= *fd;
5140 
5141   /** Set raw socket address and length */
5142   if (likely(addr != NULL && addr_len > 0))
5143   {
5144     pfs->m_addr_len= addr_len;
5145 
5146     /** Restrict address length to size of struct */
5147     if (unlikely(pfs->m_addr_len > sizeof(sockaddr_storage)))
5148       pfs->m_addr_len= sizeof(struct sockaddr_storage);
5149 
5150     memcpy(&pfs->m_sock_addr, addr, pfs->m_addr_len);
5151   }
5152 }
5153 
5154 /**
5155   Implementation of the socket instrumentation interface.
5156   @sa PSI_v1::set_socket_info.
5157 */
set_socket_thread_owner_v1(PSI_socket * socket)5158 static void set_socket_thread_owner_v1(PSI_socket *socket)
5159 {
5160   PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*>(socket);
5161   DBUG_ASSERT(pfs_socket != NULL);
5162   pfs_socket->m_thread_owner= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5163 }
5164 
5165 struct PSI_digest_locker*
pfs_digest_start_v1(PSI_statement_locker * locker)5166 pfs_digest_start_v1(PSI_statement_locker *locker)
5167 {
5168   PSI_statement_locker_state *statement_state;
5169   statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5170   DBUG_ASSERT(statement_state != NULL);
5171 
5172   if (statement_state->m_discarded)
5173     return NULL;
5174 
5175   if (statement_state->m_flags & STATE_FLAG_DIGEST)
5176   {
5177     return reinterpret_cast<PSI_digest_locker*> (locker);
5178   }
5179 
5180   return NULL;
5181 }
5182 
pfs_digest_end_v1(PSI_digest_locker * locker,const sql_digest_storage * digest)5183 void pfs_digest_end_v1(PSI_digest_locker *locker, const sql_digest_storage *digest)
5184 {
5185   PSI_statement_locker_state *statement_state;
5186   statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5187   DBUG_ASSERT(statement_state != NULL);
5188   DBUG_ASSERT(digest != NULL);
5189 
5190   if (statement_state->m_discarded)
5191     return;
5192 
5193   if (statement_state->m_flags & STATE_FLAG_DIGEST)
5194   {
5195     statement_state->m_digest= digest;
5196   }
5197 }
5198 
5199 /**
5200   Implementation of the thread attribute connection interface
5201   @sa PSI_v1::set_thread_connect_attr.
5202 */
set_thread_connect_attrs_v1(const char * buffer,uint length,const void * from_cs)5203 static int set_thread_connect_attrs_v1(const char *buffer, uint length,
5204                                        const void *from_cs)
5205 {
5206 
5207   PFS_thread *thd= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5208 
5209   DBUG_ASSERT(buffer != NULL);
5210 
5211   if (likely(thd != NULL) && session_connect_attrs_size_per_thread > 0)
5212   {
5213     const CHARSET_INFO *cs = static_cast<const CHARSET_INFO *> (from_cs);
5214 
5215     /* copy from the input buffer as much as we can fit */
5216     uint copy_size= (uint)(length < session_connect_attrs_size_per_thread ?
5217                            length : session_connect_attrs_size_per_thread);
5218     thd->m_session_lock.allocated_to_dirty();
5219     memcpy(thd->m_session_connect_attrs, buffer, copy_size);
5220     thd->m_session_connect_attrs_length= copy_size;
5221     thd->m_session_connect_attrs_cs_number= cs->number;
5222     thd->m_session_lock.dirty_to_allocated();
5223 
5224     if (copy_size == length)
5225       return 0;
5226 
5227     session_connect_attrs_lost++;
5228     return 1;
5229   }
5230   return 0;
5231 }
5232 
5233 
5234 /**
5235   Implementation of the instrumentation interface.
5236   @sa PSI_v1.
5237 */
5238 PSI_v1 PFS_v1=
5239 {
5240   register_mutex_v1,
5241   register_rwlock_v1,
5242   register_cond_v1,
5243   register_thread_v1,
5244   register_file_v1,
5245   register_stage_v1,
5246   register_statement_v1,
5247   register_socket_v1,
5248   init_mutex_v1,
5249   destroy_mutex_v1,
5250   init_rwlock_v1,
5251   destroy_rwlock_v1,
5252   init_cond_v1,
5253   destroy_cond_v1,
5254   init_socket_v1,
5255   destroy_socket_v1,
5256   get_table_share_v1,
5257   release_table_share_v1,
5258   drop_table_share_v1,
5259   open_table_v1,
5260   unbind_table_v1,
5261   rebind_table_v1,
5262   close_table_v1,
5263   create_file_v1,
5264   spawn_thread_v1,
5265   new_thread_v1,
5266   set_thread_id_v1,
5267   get_thread_v1,
5268   set_thread_user_v1,
5269   set_thread_account_v1,
5270   set_thread_db_v1,
5271   set_thread_command_v1,
5272   set_thread_start_time_v1,
5273   set_thread_state_v1,
5274   set_thread_info_v1,
5275   set_thread_v1,
5276   delete_current_thread_v1,
5277   delete_thread_v1,
5278   get_thread_file_name_locker_v1,
5279   get_thread_file_stream_locker_v1,
5280   get_thread_file_descriptor_locker_v1,
5281   unlock_mutex_v1,
5282   unlock_rwlock_v1,
5283   signal_cond_v1,
5284   broadcast_cond_v1,
5285   start_idle_wait_v1,
5286   end_idle_wait_v1,
5287   start_mutex_wait_v1,
5288   end_mutex_wait_v1,
5289   start_rwlock_wait_v1, /* read */
5290   end_rwlock_rdwait_v1,
5291   start_rwlock_wait_v1, /* write */
5292   end_rwlock_wrwait_v1,
5293   start_cond_wait_v1,
5294   end_cond_wait_v1,
5295   start_table_io_wait_v1,
5296   end_table_io_wait_v1,
5297   start_table_lock_wait_v1,
5298   end_table_lock_wait_v1,
5299   start_file_open_wait_v1,
5300   end_file_open_wait_v1,
5301   end_file_open_wait_and_bind_to_descriptor_v1,
5302   start_file_wait_v1,
5303   end_file_wait_v1,
5304   start_file_close_wait_v1,
5305   end_file_close_wait_v1,
5306   start_stage_v1,
5307   end_stage_v1,
5308   get_thread_statement_locker_v1,
5309   refine_statement_v1,
5310   start_statement_v1,
5311   set_statement_text_v1,
5312   set_statement_lock_time_v1,
5313   set_statement_rows_sent_v1,
5314   set_statement_rows_examined_v1,
5315   inc_statement_created_tmp_disk_tables_v1,
5316   inc_statement_created_tmp_tables_v1,
5317   inc_statement_select_full_join_v1,
5318   inc_statement_select_full_range_join_v1,
5319   inc_statement_select_range_v1,
5320   inc_statement_select_range_check_v1,
5321   inc_statement_select_scan_v1,
5322   inc_statement_sort_merge_passes_v1,
5323   inc_statement_sort_range_v1,
5324   inc_statement_sort_rows_v1,
5325   inc_statement_sort_scan_v1,
5326   set_statement_no_index_used_v1,
5327   set_statement_no_good_index_used_v1,
5328   end_statement_v1,
5329   start_socket_wait_v1,
5330   end_socket_wait_v1,
5331   set_socket_state_v1,
5332   set_socket_info_v1,
5333   set_socket_thread_owner_v1,
5334   pfs_digest_start_v1,
5335   pfs_digest_end_v1,
5336   set_thread_connect_attrs_v1,
5337 };
5338 
get_interface(int version)5339 static void* get_interface(int version)
5340 {
5341   switch (version)
5342   {
5343   case PSI_VERSION_1:
5344     return &PFS_v1;
5345   default:
5346     return NULL;
5347   }
5348 }
5349 
5350 C_MODE_END
5351 
5352 struct PSI_bootstrap PFS_bootstrap=
5353 {
5354   get_interface
5355 };
5356