1 /* Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
2 
3   This program is free software; you can redistribute it and/or modify
4   it under the terms of the GNU General Public License, version 2.0,
5   as published by the Free Software Foundation.
6 
7   This program is also distributed with certain software (including
8   but not limited to OpenSSL) that is licensed under separate terms,
9   as designated in a particular file or component or in included license
10   documentation.  The authors of MySQL hereby grant you an additional
11   permission to link the program and your derivative works with the
12   separately licensed software that they have included with MySQL.
13 
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License, version 2.0, for more details.
18 
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software Foundation,
21   51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 /**
24   @file storage/perfschema/pfs.cc
25   The performance schema implementation of all instruments.
26 */
27 #include "my_global.h"
28 #include "thr_lock.h"
29 #include "mysql/psi/psi.h"
30 #include "mysql/psi/mysql_thread.h"
31 #include "my_pthread.h"
32 #include "sql_const.h"
33 #include "pfs.h"
34 #include "pfs_instr_class.h"
35 #include "pfs_instr.h"
36 #include "pfs_host.h"
37 #include "pfs_user.h"
38 #include "pfs_account.h"
39 #include "pfs_global.h"
40 #include "pfs_column_values.h"
41 #include "pfs_timer.h"
42 #include "pfs_events_waits.h"
43 #include "pfs_events_stages.h"
44 #include "pfs_events_statements.h"
45 #include "pfs_setup_actor.h"
46 #include "pfs_setup_object.h"
47 #include "sql_error.h"
48 #include "sp_head.h"
49 #include "pfs_digest.h"
50 
51 using std::min;
52 /**
53   @page PAGE_PERFORMANCE_SCHEMA The Performance Schema main page
54   MySQL PERFORMANCE_SCHEMA implementation.
55 
56   @section INTRO Introduction
57   The PERFORMANCE_SCHEMA is a way to introspect the internal execution of
58   the server at runtime.
59   The performance schema focuses primarily on performance data,
60   as opposed to the INFORMATION_SCHEMA whose purpose is to inspect metadata.
61 
62   From a user point of view, the performance schema consists of:
63   - a dedicated database schema, named PERFORMANCE_SCHEMA,
64   - SQL tables, used to query the server internal state or change
65   configuration settings.
66 
67   From an implementation point of view, the performance schema is a dedicated
68   Storage Engine which exposes data collected by 'Instrumentation Points'
69   placed in the server code.
70 
71   @section INTERFACES Multiple interfaces
72 
73   The performance schema exposes many different interfaces,
74   for different components, and for different purposes.
75 
76   @subsection INT_INSTRUMENTING Instrumenting interface
77 
78   All the data representing the server internal state exposed
79   in the performance schema must be first collected:
80   this is the role of the instrumenting interface.
81   The instrumenting interface is a coding interface provided
82   by implementors (of the performance schema) to implementors
83   (of the server or server components).
84 
85   This interface is available to:
86   - C implementations
87   - C++ implementations
88   - the core SQL layer (/sql)
89   - the mysys library (/mysys)
90   - MySQL plugins, including storage engines,
91   - third party plugins, including third party storage engines.
92 
93   For details, see the @ref PAGE_INSTRUMENTATION_INTERFACE
94   "instrumentation interface page".
95 
96   @subsection INT_COMPILING Compiling interface
97 
98   The implementation of the performance schema can be enabled or disabled at
99   build time, when building MySQL from the source code.
100 
101   When building with the performance schema code, some compilation flags
102   are available to change the default values used in the code, if required.
103 
104   For more details, see:
105   @verbatim ./configure --help @endverbatim
106 
107   To compile with the performance schema:
108   @verbatim ./configure --with-perfschema @endverbatim
109 
110   The implementation of all the compiling options is located in
111   @verbatim ./storage/perfschema/plug.in @endverbatim
112 
113   @subsection INT_STARTUP Server startup interface
114 
115   The server startup interface consists of the "./mysqld ..."
116   command line used to start the server.
117   When the performance schema is compiled in the server binary,
118   extra command line options are available.
119 
120   These extra start options allow the DBA to:
121   - enable or disable the performance schema
122   - specify some sizing parameters.
123 
124   To see help for the performance schema startup options, see:
125   @verbatim ./sql/mysqld --verbose --help  @endverbatim
126 
127   The implementation of all the startup options is located in
128   @verbatim ./sql/mysqld.cc, my_long_options[] @endverbatim
129 
130   @subsection INT_BOOTSTRAP Server bootstrap interface
131 
132   The bootstrap interface is a private interface exposed by
133   the performance schema, and used by the SQL layer.
134   Its role is to advertise all the SQL tables natively
135   supported by the performance schema to the SQL server.
136   The code consists of creating MySQL tables for the
137   performance schema itself, and is used in './mysql --bootstrap'
138   mode when a server is installed.
139 
140   The implementation of the database creation script is located in
141   @verbatim ./scripts/mysql_system_tables.sql @endverbatim
142 
143   @subsection INT_CONFIG Runtime configuration interface
144 
145   When the performance schema is used at runtime, various configuration
146   parameters can be used to specify what kind of data is collected,
147   what kind of aggregations are computed, what kind of timers are used,
148   what events are timed, etc.
149 
150   For all these capabilities, not a single statement or special syntax
151   was introduced in the parser.
152   Instead of new SQL statements, the interface consists of DML
153   (SELECT, INSERT, UPDATE, DELETE) against special "SETUP" tables.
154 
155   For example:
156   @verbatim mysql> update performance_schema.SETUP_INSTRUMENTS
157     set ENABLED='YES', TIMED='YES';
158   Query OK, 234 rows affected (0.00 sec)
159   Rows matched: 234  Changed: 234  Warnings: 0 @endverbatim
160 
161   @subsection INT_STATUS Internal audit interface
162 
163   The internal audit interface is provided to the DBA to inspect if the
164   performance schema code itself is functioning properly.
165   This interface is necessary because a failure caused while
166   instrumenting code in the server should not cause failures in the
167   MySQL server itself, so that the performance schema implementation
168   never raises errors during runtime execution.
169 
170   This auditing interface consists of:
171   @verbatim SHOW ENGINE PERFORMANCE_SCHEMA STATUS; @endverbatim
172   It displays data related to the memory usage of the performance schema,
173   as well as statistics about lost events, if any.
174 
175   The SHOW STATUS command is implemented in
176   @verbatim ./storage/perfschema/pfs_engine_table.cc @endverbatim
177 
178   @subsection INT_QUERY Query interface
179 
180   The query interface is used to query the internal state of a running server.
181   It is provided as SQL tables.
182 
183   For example:
184   @verbatim mysql> select * from performance_schema.EVENTS_WAITS_CURRENT;
185   @endverbatim
186 
187   @section DESIGN_PRINCIPLES Design principles
188 
189   @subsection PRINCIPLE_BEHAVIOR No behavior changes
190 
191   The primary goal of the performance schema is to measure (instrument) the
192   execution of the server. A good measure should not cause any change
193   in behavior.
194 
195   To achieve this, the overall design of the performance schema complies
196   with the following very severe design constraints:
197 
198   The parser is unchanged. There are no new keywords, no new statements.
199   This guarantees that existing applications will run the same way with or
200   without the performance schema.
201 
202   All the instrumentation points return "void", there are no error codes.
203   Even if the performance schema internally fails, execution of the server
204   code will proceed.
205 
206   None of the instrumentation points allocate memory.
207   All the memory used by the performance schema is pre-allocated at startup,
208   and is considered "static" during the server life time.
209 
210   None of the instrumentation points use any pthread_mutex, pthread_rwlock,
211   or pthread_cond (or platform equivalents).
212   Executing the instrumentation point should not cause thread scheduling to
213   change in the server.
214 
215   In other words, the implementation of the instrumentation points,
216   including all the code called by the instrumentation points, is:
217   - malloc free
218   - mutex free
219   - rwlock free
220 
221   TODO: All the code located in storage/perfschema is malloc free,
222   but unfortunately the usage of LF_HASH introduces some memory allocation.
223   This should be revised if possible, to use a lock-free,
224   malloc-free hash code table.
225 
226   @subsection PRINCIPLE_PERFORMANCE No performance hit
227 
228   The instrumentation of the server should be as fast as possible.
229   In cases when there are choices between:
230   - doing some processing when recording the performance data
231   in the instrumentation,
232   - doing some processing when retrieving the performance data,
233 
234   priority is given in the design to make the instrumentation faster,
235   pushing some complexity to data retrieval.
236 
237   As a result, some parts of the design, related to:
238   - the setup code path,
239   - the query code path,
240 
241   might appear to be sub-optimal.
242 
243   The criterion used here is to optimize primarily the critical path (data
244   collection), possibly at the expense of non-critical code paths.
245 
246   @subsection PRINCIPLE_NOT_INTRUSIVE Unintrusive instrumentation
247 
248   For the performance schema in general to be successful, the barrier
249   of entry for a developer should be low, so it's easy to instrument code.
250 
251   In particular, the instrumentation interface:
252   - is available for C and C++ code (so it's a C interface),
253   - does not require parameters that the calling code can't easily provide,
254   - supports partial instrumentation (for example, instrumenting mutexes does
255   not require that every mutex is instrumented)
256 
257   @subsection PRINCIPLE_EXTENDABLE Extendable instrumentation
258 
259   As the content of the performance schema improves,
260   with more tables exposed and more data collected,
261   the instrumentation interface will also be augmented
262   to support instrumenting new concepts.
263   Existing instrumentations should not be affected when additional
264   instrumentation is made available, and making a new instrumentation
265   available should not require existing instrumented code to support it.
266 
267   @subsection PRINCIPLE_VERSIONED Versioned instrumentation
268 
269   Given that the instrumentation offered by the performance schema will
270   be augmented with time, when more features are implemented,
271   the interface itself should be versioned, to keep compatibility
272   with previous instrumented code.
273 
274   For example, after both plugin-A and plugin-B have been instrumented for
275   mutexes, read write locks and conditions, using the instrumentation
276   interface, we can anticipate that the instrumentation interface
277   is expanded to support file based operations.
278 
279   Plugin-A, a file based storage engine, will most likely use the expanded
280   interface and instrument its file usage, using the version 2
281   interface, while Plugin-B, a network based storage engine, will not change
282   its code and not release a new binary.
283 
284   When later the instrumentation interface is expanded to support network
285   based operations (which will define interface version 3), the Plugin-B code
286   can then be changed to make use of it.
287 
288   Note, this is just an example to illustrate the design concept here.
289   Both mutexes and file instrumentation are already available
290   since version 1 of the instrumentation interface.
291 
292   @subsection PRINCIPLE_DEPLOYMENT Easy deployment
293 
294   Internally, we might want every plugin implementation to upgrade the
295   instrumented code to the latest available, but this will cause additional
296   work and this is not practical if the code change is monolithic.
297 
298   Externally, for third party plugin implementors, asking implementors to
299   always stay aligned to the latest instrumentation and make new releases,
300   even when the change does not provide new functionality for them,
301   is a bad idea.
302 
303   For example, requiring a network based engine to re-release because the
304   instrumentation interface changed for file based operations, will create
305   too many deployment issues.
306 
307   So, the performance schema implementation must support concurrently,
308   in the same deployment, multiple versions of the instrumentation
309   interface, and ensure binary compatibility with each version.
310 
311   In addition to this, the performance schema can be included or excluded
312   from the server binary, using build time configuration options.
313 
314   Regardless, the following types of deployment are valid:
315   - a server supporting the performance schema + a storage engine
316   that is not instrumented
317   - a server not supporting the performance schema + a storage engine
318   that is instrumented
319 */
320 
321 /**
322   @page PAGE_INSTRUMENTATION_INTERFACE Performance schema: instrumentation interface page.
323   MySQL performance schema instrumentation interface.
324 
325   @section INTRO Introduction
326 
327   The instrumentation interface consist of two layers:
328   - a raw ABI (Application Binary Interface) layer, that exposes the primitive
329   instrumentation functions exported by the performance schema instrumentation
330   - an API (Application Programing Interface) layer,
331   that provides many helpers for a developer instrumenting some code,
332   to make the instrumentation as easy as possible.
333 
334   The ABI layer consists of:
335 @code
336 #include "mysql/psi/psi.h"
337 @endcode
338 
339   The API layer consists of:
340 @code
341 #include "mysql/psi/mutex_mutex.h"
342 #include "mysql/psi/mutex_file.h"
343 @endcode
344 
345   The first helper is for mutexes, rwlocks and conditions,
346   the second for file io.
347 
348   The API layer exposes C macros and typedefs which will expand:
349   - either to non-instrumented code, when compiled without the performance
350   schema instrumentation
351   - or to instrumented code, that will issue the raw calls to the ABI layer
352   so that the implementation can collect data.
353 
354   Note that all the names introduced (for example, @c mysql_mutex_lock) do not
355   collide with any other namespace.
356   In particular, the macro @c mysql_mutex_lock is on purpose not named
357   @c pthread_mutex_lock.
358   This is to:
359   - avoid overloading @c pthread_mutex_lock with yet another macro,
360   which is dangerous as it can affect user code and pollute
361   the end-user namespace.
362   - allow the developer instrumenting code to selectively instrument
363   some code but not all.
364 
365   @section PRINCIPLES Design principles
366 
367   The ABI part is designed as a facade, that exposes basic primitives.
368   The expectation is that each primitive will be very stable over time,
369   but the list will constantly grow when more instruments are supported.
370   To support binary compatibility with plugins compiled with a different
371   version of the instrumentation, the ABI itself is versioned
372   (see @c PSI_v1, @c PSI_v2).
373 
374   For a given instrumentation point in the API, the basic coding pattern
375   used is:
376   - (a) notify the performance schema of the operation
377   about to be performed.
378   - (b) execute the instrumented code.
379   - (c) notify the performance schema that the operation
380   is completed.
381 
382   An opaque "locker" pointer is returned by (a), that is given to (c).
383   This pointer helps the implementation to keep context, for performances.
384 
385   The following code fragment is annotated to show how in detail this pattern
386   in implemented, when the instrumentation is compiled in:
387 
388 @verbatim
389 static inline int mysql_mutex_lock(
390   mysql_mutex_t *that, myf flags, const char *src_file, uint src_line)
391 {
392   int result;
393   struct PSI_mutex_locker_state state;
394   struct PSI_mutex_locker *locker= NULL;
395 
396   ............... (a)
397   locker= PSI_server->start_mutex_wait(&state, that->p_psi,
398                                        PSI_MUTEX_LOCK, locker, src_file, src_line);
399 
400   ............... (b)
401   result= pthread_mutex_lock(&that->m_mutex);
402 
403   ............... (c)
404   PSI_server->end_mutex_wait(locker, result);
405 
406   return result;
407 }
408 @endverbatim
409 
410   When the performance schema instrumentation is not compiled in,
411   the code becomes simply a wrapper, expanded in line by the compiler:
412 
413 @verbatim
414 static inline int mysql_mutex_lock(...)
415 {
416   int result;
417 
418   ............... (b)
419   result= pthread_mutex_lock(&that->m_mutex);
420 
421   return result;
422 }
423 @endverbatim
424 */
425 
426 /**
427   @page PAGE_AGGREGATES Performance schema: the aggregates page.
428   Performance schema aggregates.
429 
430   @section INTRO Introduction
431 
432   Aggregates tables are tables that can be formally defined as
433   SELECT ... from EVENTS_WAITS_HISTORY_INFINITE ... group by 'group clause'.
434 
435   Each group clause defines a different kind of aggregate, and corresponds to
436   a different table exposed by the performance schema.
437 
438   Aggregates can be either:
439   - computed on the fly,
440   - computed on demand, based on other available data.
441 
442   'EVENTS_WAITS_HISTORY_INFINITE' is a table that does not exist,
443   the best approximation is EVENTS_WAITS_HISTORY_LONG.
444   Aggregates computed on the fly in fact are based on EVENTS_WAITS_CURRENT,
445   while aggregates computed on demand are based on other
446   EVENTS_WAITS_SUMMARY_BY_xxx tables.
447 
448   To better understand the implementation itself, a bit of math is
449   required first, to understand the model behind the code:
450   the code is deceptively simple, the real complexity resides
451   in the flyweight of pointers between various performance schema buffers.
452 
453   @section DIMENSION Concept of dimension
454 
455   An event measured by the instrumentation has many attributes.
456   An event is represented as a data point P(x1, x2, ..., xN),
457   where each x_i coordinate represents a given attribute value.
458 
459   Examples of attributes are:
460   - the time waited
461   - the object waited on
462   - the instrument waited on
463   - the thread that waited
464   - the operation performed
465   - per object or per operation additional attributes, such as spins,
466   number of bytes, etc.
467 
468   Computing an aggregate per thread is fundamentally different from
469   computing an aggregate by instrument, so the "_BY_THREAD" and
470   "_BY_EVENT_NAME" aggregates are different dimensions,
471   operating on different x_i and x_j coordinates.
472   These aggregates are "orthogonal".
473 
474   @section PROJECTION Concept of projection
475 
476   A given x_i attribute value can convey either just one basic information,
477   such as a number of bytes, or can convey implied information,
478   such as an object fully qualified name.
479 
480   For example, from the value "test.t1", the name of the object schema
481   "test" can be separated from the object name "t1", so that now aggregates
482   by object schema can be implemented.
483 
484   In math terms, that corresponds to defining a function:
485   F_i (x): x --> y
486   Applying this function to our point P gives another point P':
487 
488   F_i (P):
489   P(x1, x2, ..., x{i-1}, x_i, x{i+1}, ..., x_N)
490   --> P' (x1, x2, ..., x{i-1}, f_i(x_i), x{i+1}, ..., x_N)
491 
492   That function defines in fact an aggregate !
493   In SQL terms, this aggregate would look like the following table:
494 
495 @verbatim
496   CREATE VIEW EVENTS_WAITS_SUMMARY_BY_Func_i AS
497   SELECT col_1, col_2, ..., col_{i-1},
498          Func_i(col_i),
499          COUNT(col_i),
500          MIN(col_i), AVG(col_i), MAX(col_i), -- if col_i is a numeric value
501          col_{i+1}, ..., col_N
502          FROM EVENTS_WAITS_HISTORY_INFINITE
503          group by col_1, col_2, ..., col_{i-1}, col{i+1}, ..., col_N.
504 @endverbatim
505 
506   Note that not all columns have to be included,
507   in particular some columns that are dependent on the x_i column should
508   be removed, so that in practice, MySQL's aggregation method tends to
509   remove many attributes at each aggregation steps.
510 
511   For example, when aggregating wait events by object instances,
512   - the wait_time and number_of_bytes can be summed,
513   and sum(wait_time) now becomes an object instance attribute.
514   - the source, timer_start, timer_end columns are not in the
515   _BY_INSTANCE table, because these attributes are only
516   meaningful for a wait.
517 
518   @section COMPOSITION Concept of composition
519 
520   Now, the "test.t1" --> "test" example was purely theory,
521   just to explain the concept, and does not lead very far.
522   Let's look at a more interesting example of data that can be derived
523   from the row event.
524 
525   An event creates a transient object, PFS_wait_locker, per operation.
526   This object's life cycle is extremely short: it's created just
527   before the start_wait() instrumentation call, and is destroyed in
528   the end_wait() call.
529 
530   The wait locker itself contains a pointer to the object instance
531   waited on.
532   That allows to implement a wait_locker --> object instance projection,
533   with m_target.
534   The object instance life cycle depends on _init and _destroy calls
535   from the code, such as mysql_mutex_init()
536   and mysql_mutex_destroy() for a mutex.
537 
538   The object instance waited on contains a pointer to the object class,
539   which is represented by the instrument name.
540   That allows to implement an object instance --> object class projection.
541   The object class life cycle is permanent, as instruments are loaded in
542   the server and never removed.
543 
544   The object class is named in such a way
545   (for example, "wait/sync/mutex/sql/LOCK_open",
546   "wait/io/file/maria/data_file) that the component ("sql", "maria")
547   that it belongs to can be inferred.
548   That allows to implement an object class --> server component projection.
549 
550   Back to math again, we have, for example for mutexes:
551 
552   F1 (l) : PFS_wait_locker l --> PFS_mutex m = l->m_target.m_mutex
553 
554   F1_to_2 (m) : PFS_mutex m --> PFS_mutex_class i = m->m_class
555 
556   F2_to_3 (i) : PFS_mutex_class i --> const char *component =
557                                         substring(i->m_name, ...)
558 
559   Per components aggregates are not implemented, this is just an illustration.
560 
561   F1 alone defines this aggregate:
562 
563   EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_INSTANCE
564   (or MUTEX_INSTANCE)
565 
566   F1_to_2 alone could define this aggregate:
567 
568   EVENTS_WAITS_SUMMARY_BY_INSTANCE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
569 
570   Alternatively, using function composition, with
571   F2 = F1_to_2 o F1, F2 defines:
572 
573   EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
574 
575   Likewise, F_2_to_3 defines:
576 
577   EVENTS_WAITS_SUMMARY_BY_EVENT_NAME --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
578 
579   and F3 = F_2_to_3 o F_1_to_2 o F1 defines:
580 
581   EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
582 
583   What has all this to do with the code ?
584 
585   Functions (or aggregates) such as F_3 are not implemented as is.
586   Instead, they are decomposed into F_2_to_3 o F_1_to_2 o F1,
587   and each intermediate aggregate is stored into an internal buffer.
588   This allows to support every F1, F2, F3 aggregates from shared
589   internal buffers, where computation already performed to compute F2
590   is reused when computing F3.
591 
592   @section OBJECT_GRAPH Object graph
593 
594   In terms of object instances, or records, pointers between
595   different buffers define an object instance graph.
596 
597   For example, assuming the following scenario:
598   - A mutex class "M" is instrumented, the instrument name
599   is "wait/sync/mutex/sql/M"
600   - This mutex instrument has been instantiated twice,
601   mutex instances are noted M-1 and M-2
602   - Threads T-A and T-B are locking mutex instance M-1
603   - Threads T-C and T-D are locking mutex instance M-2
604 
605   The performance schema will record the following data:
606   - EVENTS_WAITS_CURRENT has 4 rows, one for each mutex locker
607   - EVENTS_WAITS_SUMMARY_BY_INSTANCE shows 2 rows, for M-1 and M-2
608   - EVENTS_WAITS_SUMMARY_BY_EVENT_NAME shows 1 row, for M
609 
610   The graph of structures will look like:
611 
612 @verbatim
613   PFS_wait_locker (T-A, M-1) ----------
614                                       |
615                                       v
616                                  PFS_mutex (M-1)
617                                  - m_wait_stat    ------------
618                                       ^                      |
619                                       |                      |
620   PFS_wait_locker (T-B, M-1) ----------                      |
621                                                              v
622                                                         PFS_mutex_class (M)
623                                                         - m_wait_stat
624   PFS_wait_locker (T-C, M-2) ----------                      ^
625                                       |                      |
626                                       v                      |
627                                  PFS_mutex (M-2)             |
628                                  - m_wait_stat    ------------
629                                       ^
630                                       |
631   PFS_wait_locker (T-D, M-2) ----------
632 
633             ||                        ||                     ||
634             ||                        ||                     ||
635             vv                        vv                     vv
636 
637   EVENTS_WAITS_CURRENT ..._SUMMARY_BY_INSTANCE ..._SUMMARY_BY_EVENT_NAME
638 @endverbatim
639 
640   @section ON_THE_FLY On the fly aggregates
641 
642   'On the fly' aggregates are computed during the code execution.
643   This is necessary because the data the aggregate is based on is volatile,
644   and can not be kept indefinitely.
645 
646   With on the fly aggregates:
647   - the writer thread does all the computation
648   - the reader thread accesses the result directly
649 
650   This model is to be avoided if possible, due to the overhead
651   caused when instrumenting code.
652 
653   @section HIGHER_LEVEL Higher level aggregates
654 
655   'Higher level' aggregates are implemented on demand only.
656   The code executing a SELECT from the aggregate table is
657   collecting data from multiple internal buffers to produce the result.
658 
659   With higher level aggregates:
660   - the reader thread does all the computation
661   - the writer thread has no overhead.
662 
663   @section MIXED Mixed level aggregates
664 
665   The 'Mixed' model is a compromise between 'On the fly' and 'Higher level'
666   aggregates, for internal buffers that are not permanent.
667 
668   While an object is present in a buffer, the higher level model is used.
669   When an object is about to be destroyed, statistics are saved into
670   a 'parent' buffer with a longer life cycle, to follow the on the fly model.
671 
672   With mixed aggregates:
673   - the reader thread does a lot of complex computation,
674   - the writer thread has minimal overhead, on destroy events.
675 
676   @section IMPL_WAIT Implementation for waits aggregates
677 
678   For waits, the tables that contains aggregated wait data are:
679   - EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
680   - EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME
681   - EVENTS_WAITS_SUMMARY_BY_INSTANCE
682   - EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
683   - EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME
684   - EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME
685   - FILE_SUMMARY_BY_EVENT_NAME
686   - FILE_SUMMARY_BY_INSTANCE
687   - SOCKET_SUMMARY_BY_INSTANCE
688   - SOCKET_SUMMARY_BY_EVENT_NAME
689   - OBJECTS_SUMMARY_GLOBAL_BY_TYPE
690 
691   The instrumented code that generates waits events consist of:
692   - mutexes (mysql_mutex_t)
693   - rwlocks (mysql_rwlock_t)
694   - conditions (mysql_cond_t)
695   - file io (MYSQL_FILE)
696   - socket io (MYSQL_SOCKET)
697   - table io
698   - table lock
699   - idle
700 
701   The flow of data between aggregates tables varies for each instrumentation.
702 
703   @subsection IMPL_WAIT_MUTEX Mutex waits
704 
705 @verbatim
706   mutex_locker(T, M)
707    |
708    | [1]
709    |
710    |-> pfs_mutex(M)                           =====>> [B], [C]
711    |    |
712    |    | [2]
713    |    |
714    |    |-> pfs_mutex_class(M.class)          =====>> [C]
715    |
716    |-> pfs_thread(T).event_name(M)            =====>> [A], [D], [E], [F]
717         |
718         | [3]
719         |
720      3a |-> pfs_account(U, H).event_name(M)   =====>> [D], [E], [F]
721         .    |
722         .    | [4-RESET]
723         .    |
724      3b .....+-> pfs_user(U).event_name(M)    =====>> [E]
725         .    |
726      3c .....+-> pfs_host(H).event_name(M)    =====>> [F]
727 @endverbatim
728 
729   How to read this diagram:
730   - events that occur during the instrumented code execution are noted with numbers,
731   as in [1]. Code executed by these events has an impact on overhead.
732   - events that occur during TRUNCATE TABLE operations are noted with numbers,
733   followed by "-RESET", as in [4-RESET].
734   Code executed by these events has no impact on overhead,
735   since they are executed by independent monitoring sessions.
736   - events that occur when a reader extracts data from a performance schema table
737   are noted with letters, as in [A]. The name of the table involved,
738   and the method that builds a row are documented. Code executed by these events
739   has no impact on the instrumentation overhead. Note that the table
740   implementation may pull data from different buffers.
741   - nominal code paths are in plain lines. A "nominal" code path corresponds to
742   cases where the performance schema buffers are sized so that no records are lost.
743   - degenerated code paths are in dotted lines. A "degenerated" code path corresponds
744   to edge cases where parent buffers are full, which forces the code to aggregate to
745   grand parents directly.
746 
747   Implemented as:
748   - [1] @c start_mutex_wait_v1(), @c end_mutex_wait_v1()
749   - [2] @c destroy_mutex_v1()
750   - [3] @c aggregate_thread_waits()
751   - [4] @c PFS_account::aggregate_waits()
752   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
753         @c table_ews_by_thread_by_event_name::make_row()
754   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
755         @c table_events_waits_summary_by_instance::make_mutex_row()
756   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
757         @c table_ews_global_by_event_name::make_mutex_row()
758   - [D] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
759         @c table_ews_by_account_by_event_name::make_row()
760   - [E] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
761         @c table_ews_by_user_by_event_name::make_row()
762   - [F] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
763         @c table_ews_by_host_by_event_name::make_row()
764 
765   Table EVENTS_WAITS_SUMMARY_BY_INSTANCE is a 'on the fly' aggregate,
766   because the data is collected on the fly by (1) and stored into a buffer,
767   pfs_mutex. The table implementation [B] simply reads the results directly
768   from this buffer.
769 
770   Table EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME is a 'mixed' aggregate,
771   because some data is collected on the fly (1),
772   some data is preserved with (2) at a later time in the life cycle,
773   and two different buffers pfs_mutex and pfs_mutex_class are used to store the
774   statistics collected. The table implementation [C] is more complex, since
775   it reads from two buffers pfs_mutex and pfs_mutex_class.
776 
777   @subsection IMPL_WAIT_RWLOCK Rwlock waits
778 
779 @verbatim
780   rwlock_locker(T, R)
781    |
782    | [1]
783    |
784    |-> pfs_rwlock(R)                          =====>> [B], [C]
785    |    |
786    |    | [2]
787    |    |
788    |    |-> pfs_rwlock_class(R.class)         =====>> [C]
789    |
790    |-> pfs_thread(T).event_name(R)            =====>> [A]
791         |
792        ...
793 @endverbatim
794 
795   Implemented as:
796   - [1] @c start_rwlock_rdwait_v1(), @c end_rwlock_rdwait_v1(), ...
797   - [2] @c destroy_rwlock_v1()
798   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
799         @c table_ews_by_thread_by_event_name::make_row()
800   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
801         @c table_events_waits_summary_by_instance::make_rwlock_row()
802   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
803         @c table_ews_global_by_event_name::make_rwlock_row()
804 
805   @subsection IMPL_WAIT_COND Cond waits
806 
807 @verbatim
808   cond_locker(T, C)
809    |
810    | [1]
811    |
812    |-> pfs_cond(C)                            =====>> [B], [C]
813    |    |
814    |    | [2]
815    |    |
816    |    |-> pfs_cond_class(C.class)           =====>> [C]
817    |
818    |-> pfs_thread(T).event_name(C)            =====>> [A]
819         |
820        ...
821 @endverbatim
822 
823   Implemented as:
824   - [1] @c start_cond_wait_v1(), @c end_cond_wait_v1()
825   - [2] @c destroy_cond_v1()
826   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
827         @c table_ews_by_thread_by_event_name::make_row()
828   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
829         @c table_events_waits_summary_by_instance::make_cond_row()
830   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
831         @c table_ews_global_by_event_name::make_cond_row()
832 
833   @subsection IMPL_WAIT_FILE File waits
834 
835 @verbatim
836   file_locker(T, F)
837    |
838    | [1]
839    |
840    |-> pfs_file(F)                            =====>> [B], [C], [D], [E]
841    |    |
842    |    | [2]
843    |    |
844    |    |-> pfs_file_class(F.class)           =====>> [C], [D]
845    |
846    |-> pfs_thread(T).event_name(F)            =====>> [A]
847         |
848        ...
849 @endverbatim
850 
851   Implemented as:
852   - [1] @c get_thread_file_name_locker_v1(), @c start_file_wait_v1(),
853         @c end_file_wait_v1(), ...
854   - [2] @c close_file_v1()
855   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
856         @c table_ews_by_thread_by_event_name::make_row()
857   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
858         @c table_events_waits_summary_by_instance::make_file_row()
859   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
860         @c table_ews_global_by_event_name::make_file_row()
861   - [D] FILE_SUMMARY_BY_EVENT_NAME,
862         @c table_file_summary_by_event_name::make_row()
863   - [E] FILE_SUMMARY_BY_INSTANCE,
864         @c table_file_summary_by_instance::make_row()
865 
866   @subsection IMPL_WAIT_SOCKET Socket waits
867 
868 @verbatim
869   socket_locker(T, S)
870    |
871    | [1]
872    |
873    |-> pfs_socket(S)                            =====>> [A], [B], [C], [D], [E]
874         |
875         | [2]
876         |
877         |-> pfs_socket_class(S.class)           =====>> [C], [D]
878         |
879         |-> pfs_thread(T).event_name(S)         =====>> [A]
880         |
881         | [3]
882         |
883      3a |-> pfs_account(U, H).event_name(S)     =====>> [F], [G], [H]
884         .    |
885         .    | [4-RESET]
886         .    |
887      3b .....+-> pfs_user(U).event_name(S)      =====>> [G]
888         .    |
889      3c .....+-> pfs_host(H).event_name(S)      =====>> [H]
890 @endverbatim
891 
892   Implemented as:
893   - [1] @c start_socket_wait_v1(), @c end_socket_wait_v1().
894   - [2] @c close_socket_v1()
895   - [3] @c aggregate_thread_waits()
896   - [4] @c PFS_account::aggregate_waits()
897   - [5] @c PFS_host::aggregate_waits()
898   - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
899         @c table_ews_by_thread_by_event_name::make_row()
900   - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
901         @c table_events_waits_summary_by_instance::make_socket_row()
902   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
903         @c table_ews_global_by_event_name::make_socket_row()
904   - [D] SOCKET_SUMMARY_BY_EVENT_NAME,
905         @c table_socket_summary_by_event_name::make_row()
906   - [E] SOCKET_SUMMARY_BY_INSTANCE,
907         @c table_socket_summary_by_instance::make_row()
908   - [F] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
909         @c table_ews_by_account_by_event_name::make_row()
910   - [G] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
911         @c table_ews_by_user_by_event_name::make_row()
912   - [H] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
913         @c table_ews_by_host_by_event_name::make_row()
914 
915   @subsection IMPL_WAIT_TABLE Table waits
916 
917 @verbatim
918   table_locker(Thread Th, Table Tb, Event = io or lock)
919    |
920    | [1]
921    |
922 1a |-> pfs_table(Tb)                          =====>> [A], [B], [C]
923    |    |
924    |    | [2]
925    |    |
926    |    |-> pfs_table_share(Tb.share)         =====>> [B], [C]
927    |         |
928    |         | [3]
929    |         |
930    |         |-> global_table_io_stat         =====>> [C]
931    |         |
932    |         |-> global_table_lock_stat       =====>> [C]
933    |
934 1b |-> pfs_thread(Th).event_name(E)           =====>> [D], [E], [F], [G]
935    |    |
936    |    | [ 4-RESET]
937    |    |
938    |    |-> pfs_account(U, H).event_name(E)   =====>> [E], [F], [G]
939    |    .    |
940    |    .    | [5-RESET]
941    |    .    |
942    |    .....+-> pfs_user(U).event_name(E)    =====>> [F]
943    |    .    |
944    |    .....+-> pfs_host(H).event_name(E)    =====>> [G]
945    |
946 1c |-> pfs_thread(Th).waits_current(W)        =====>> [H]
947    |
948 1d |-> pfs_thread(Th).waits_history(W)        =====>> [I]
949    |
950 1e |-> waits_history_long(W)                  =====>> [J]
951 @endverbatim
952 
953   Implemented as:
954   - [1] @c start_table_io_wait_v1(), @c end_table_io_wait_v1()
955   - [2] @c close_table_v1()
956   - [3] @c drop_table_share_v1()
957   - [4] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
958   - [5] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
959   - [A] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
960         @c table_events_waits_summary_by_instance::make_table_row()
961   - [B] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
962         @c table_os_global_by_type::make_row()
963   - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
964         @c table_ews_global_by_event_name::make_table_io_row(),
965         @c table_ews_global_by_event_name::make_table_lock_row()
966   - [D] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
967         @c table_ews_by_thread_by_event_name::make_row()
968   - [E] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
969         @c table_ews_by_user_by_account_name::make_row()
970   - [F] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
971         @c table_ews_by_user_by_event_name::make_row()
972   - [G] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
973         @c table_ews_by_host_by_event_name::make_row()
974   - [H] EVENTS_WAITS_CURRENT,
975         @c table_events_waits_common::make_row()
976   - [I] EVENTS_WAITS_HISTORY,
977         @c table_events_waits_common::make_row()
978   - [J] EVENTS_WAITS_HISTORY_LONG,
979         @c table_events_waits_common::make_row()
980 
981   @section IMPL_STAGE Implementation for stages aggregates
982 
983   For stages, the tables that contains aggregated data are:
984   - EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
985   - EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME
986   - EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME
987   - EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME
988   - EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME
989 
990 @verbatim
991   start_stage(T, S)
992    |
993    | [1]
994    |
995 1a |-> pfs_thread(T).event_name(S)            =====>> [A], [B], [C], [D], [E]
996    |    |
997    |    | [2]
998    |    |
999    | 2a |-> pfs_account(U, H).event_name(S)   =====>> [B], [C], [D], [E]
1000    |    .    |
1001    |    .    | [3-RESET]
1002    |    .    |
1003    | 2b .....+-> pfs_user(U).event_name(S)    =====>> [C]
1004    |    .    |
1005    | 2c .....+-> pfs_host(H).event_name(S)    =====>> [D], [E]
1006    |    .    .    |
1007    |    .    .    | [4-RESET]
1008    | 2d .    .    |
1009 1b |----+----+----+-> pfs_stage_class(S)      =====>> [E]
1010 
1011 @endverbatim
1012 
1013   Implemented as:
1014   - [1] @c start_stage_v1()
1015   - [2] @c delete_thread_v1(), @c aggregate_thread_stages()
1016   - [3] @c PFS_account::aggregate_stages()
1017   - [4] @c PFS_host::aggregate_stages()
1018   - [A] EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1019         @c table_esgs_by_thread_by_event_name::make_row()
1020   - [B] EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1021         @c table_esgs_by_account_by_event_name::make_row()
1022   - [C] EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME,
1023         @c table_esgs_by_user_by_event_name::make_row()
1024   - [D] EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME,
1025         @c table_esgs_by_host_by_event_name::make_row()
1026   - [E] EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME,
1027         @c table_esgs_global_by_event_name::make_row()
1028 
1029 @section IMPL_STATEMENT Implementation for statements consumers
1030 
1031   For statements, the tables that contains individual event data are:
1032   - EVENTS_STATEMENTS_CURRENT
1033   - EVENTS_STATEMENTS_HISTORY
1034   - EVENTS_STATEMENTS_HISTORY_LONG
1035 
1036   For statements, the tables that contains aggregated data are:
1037   - EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
1038   - EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME
1039   - EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME
1040   - EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME
1041   - EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME
1042   - EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1043 
1044 @verbatim
1045   statement_locker(T, S)
1046    |
1047    | [1]
1048    |
1049 1a |-> pfs_thread(T).event_name(S)            =====>> [A], [B], [C], [D], [E]
1050    |    |
1051    |    | [2]
1052    |    |
1053    | 2a |-> pfs_account(U, H).event_name(S)   =====>> [B], [C], [D], [E]
1054    |    .    |
1055    |    .    | [3-RESET]
1056    |    .    |
1057    | 2b .....+-> pfs_user(U).event_name(S)    =====>> [C]
1058    |    .    |
1059    | 2c .....+-> pfs_host(H).event_name(S)    =====>> [D], [E]
1060    |    .    .    |
1061    |    .    .    | [4-RESET]
1062    | 2d .    .    |
1063 1b |----+----+----+-> pfs_statement_class(S)  =====>> [E]
1064    |
1065 1c |-> pfs_thread(T).statement_current(S)     =====>> [F]
1066    |
1067 1d |-> pfs_thread(T).statement_history(S)     =====>> [G]
1068    |
1069 1e |-> statement_history_long(S)              =====>> [H]
1070    |
1071 1f |-> statement_digest(S)                    =====>> [I]
1072 
1073 @endverbatim
1074 
1075   Implemented as:
1076   - [1] @c start_statement_v1(), end_statement_v1()
1077        (1a, 1b) is an aggregation by EVENT_NAME,
1078         (1c, 1d, 1e) is an aggregation by TIME,
1079         (1f) is an aggregation by DIGEST
1080         all of these are orthogonal,
1081         and implemented in end_statement_v1().
1082   - [2] @c delete_thread_v1(), @c aggregate_thread_statements()
1083   - [3] @c PFS_account::aggregate_statements()
1084   - [4] @c PFS_host::aggregate_statements()
1085   - [A] EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1086         @c table_esms_by_thread_by_event_name::make_row()
1087   - [B] EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1088         @c table_esms_by_account_by_event_name::make_row()
1089   - [C] EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME,
1090         @c table_esms_by_user_by_event_name::make_row()
1091   - [D] EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME,
1092         @c table_esms_by_host_by_event_name::make_row()
1093   - [E] EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME,
1094         @c table_esms_global_by_event_name::make_row()
1095   - [F] EVENTS_STATEMENTS_CURRENT,
1096         @c table_events_statements_current::rnd_next(),
1097         @c table_events_statements_common::make_row()
1098   - [G] EVENTS_STATEMENTS_HISTORY,
1099         @c table_events_statements_history::rnd_next(),
1100         @c table_events_statements_common::make_row()
1101   - [H] EVENTS_STATEMENTS_HISTORY_LONG,
1102         @c table_events_statements_history_long::rnd_next(),
1103         @c table_events_statements_common::make_row()
1104   - [I] EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1105         @c table_esms_by_digest::make_row()
1106 */
1107 
1108 /**
1109   @defgroup Performance_schema Performance Schema
1110   The performance schema component.
1111   For details, see the
1112   @ref PAGE_PERFORMANCE_SCHEMA "performance schema main page".
1113 
1114   @defgroup Performance_schema_implementation Performance Schema Implementation
1115   @ingroup Performance_schema
1116 
1117   @defgroup Performance_schema_tables Performance Schema Tables
1118   @ingroup Performance_schema_implementation
1119 */
1120 
1121 pthread_key(PFS_thread*, THR_PFS);
1122 bool THR_PFS_initialized= false;
1123 
1124 /**
1125   Conversion map from PSI_mutex_operation to enum_operation_type.
1126   Indexed by enum PSI_mutex_operation.
1127 */
1128 static enum_operation_type mutex_operation_map[]=
1129 {
1130   OPERATION_TYPE_LOCK,
1131   OPERATION_TYPE_TRYLOCK
1132 };
1133 
1134 /**
1135   Conversion map from PSI_rwlock_operation to enum_operation_type.
1136   Indexed by enum PSI_rwlock_operation.
1137 */
1138 static enum_operation_type rwlock_operation_map[]=
1139 {
1140   OPERATION_TYPE_READLOCK,
1141   OPERATION_TYPE_WRITELOCK,
1142   OPERATION_TYPE_TRYREADLOCK,
1143   OPERATION_TYPE_TRYWRITELOCK
1144 };
1145 
1146 /**
1147   Conversion map from PSI_cond_operation to enum_operation_type.
1148   Indexed by enum PSI_cond_operation.
1149 */
1150 static enum_operation_type cond_operation_map[]=
1151 {
1152   OPERATION_TYPE_WAIT,
1153   OPERATION_TYPE_TIMEDWAIT
1154 };
1155 
1156 /**
1157   Conversion map from PSI_file_operation to enum_operation_type.
1158   Indexed by enum PSI_file_operation.
1159 */
1160 static enum_operation_type file_operation_map[]=
1161 {
1162   OPERATION_TYPE_FILECREATE,
1163   OPERATION_TYPE_FILECREATETMP,
1164   OPERATION_TYPE_FILEOPEN,
1165   OPERATION_TYPE_FILESTREAMOPEN,
1166   OPERATION_TYPE_FILECLOSE,
1167   OPERATION_TYPE_FILESTREAMCLOSE,
1168   OPERATION_TYPE_FILEREAD,
1169   OPERATION_TYPE_FILEWRITE,
1170   OPERATION_TYPE_FILESEEK,
1171   OPERATION_TYPE_FILETELL,
1172   OPERATION_TYPE_FILEFLUSH,
1173   OPERATION_TYPE_FILESTAT,
1174   OPERATION_TYPE_FILEFSTAT,
1175   OPERATION_TYPE_FILECHSIZE,
1176   OPERATION_TYPE_FILEDELETE,
1177   OPERATION_TYPE_FILERENAME,
1178   OPERATION_TYPE_FILESYNC
1179 };
1180 
1181 /**
1182   Conversion map from PSI_table_operation to enum_operation_type.
1183   Indexed by enum PSI_table_io_operation.
1184 */
1185 static enum_operation_type table_io_operation_map[]=
1186 {
1187   OPERATION_TYPE_TABLE_FETCH,
1188   OPERATION_TYPE_TABLE_WRITE_ROW,
1189   OPERATION_TYPE_TABLE_UPDATE_ROW,
1190   OPERATION_TYPE_TABLE_DELETE_ROW
1191 };
1192 
1193 /**
1194   Conversion map from enum PFS_TL_LOCK_TYPE to enum_operation_type.
1195   Indexed by enum PFS_TL_LOCK_TYPE.
1196 */
1197 static enum_operation_type table_lock_operation_map[]=
1198 {
1199   OPERATION_TYPE_TL_READ_NORMAL, /* PFS_TL_READ */
1200   OPERATION_TYPE_TL_READ_WITH_SHARED_LOCKS, /* PFS_TL_READ_WITH_SHARED_LOCKS */
1201   OPERATION_TYPE_TL_READ_HIGH_PRIORITY, /* PFS_TL_READ_HIGH_PRIORITY */
1202   OPERATION_TYPE_TL_READ_NO_INSERTS, /* PFS_TL_READ_NO_INSERT */
1203   OPERATION_TYPE_TL_WRITE_ALLOW_WRITE, /* PFS_TL_WRITE_ALLOW_WRITE */
1204   OPERATION_TYPE_TL_WRITE_CONCURRENT_INSERT, /* PFS_TL_WRITE_CONCURRENT_INSERT */
1205   OPERATION_TYPE_TL_WRITE_DELAYED, /* PFS_TL_WRITE_DELAYED */
1206   OPERATION_TYPE_TL_WRITE_LOW_PRIORITY, /* PFS_TL_WRITE_LOW_PRIORITY */
1207   OPERATION_TYPE_TL_WRITE_NORMAL, /* PFS_TL_WRITE */
1208   OPERATION_TYPE_TL_READ_EXTERNAL, /* PFS_TL_READ_EXTERNAL */
1209   OPERATION_TYPE_TL_WRITE_EXTERNAL /* PFS_TL_WRITE_EXTERNAL */
1210 };
1211 
1212 /**
1213   Conversion map from PSI_socket_operation to enum_operation_type.
1214   Indexed by enum PSI_socket_operation.
1215 */
1216 static enum_operation_type socket_operation_map[]=
1217 {
1218   OPERATION_TYPE_SOCKETCREATE,
1219   OPERATION_TYPE_SOCKETCONNECT,
1220   OPERATION_TYPE_SOCKETBIND,
1221   OPERATION_TYPE_SOCKETCLOSE,
1222   OPERATION_TYPE_SOCKETSEND,
1223   OPERATION_TYPE_SOCKETRECV,
1224   OPERATION_TYPE_SOCKETSENDTO,
1225   OPERATION_TYPE_SOCKETRECVFROM,
1226   OPERATION_TYPE_SOCKETSENDMSG,
1227   OPERATION_TYPE_SOCKETRECVMSG,
1228   OPERATION_TYPE_SOCKETSEEK,
1229   OPERATION_TYPE_SOCKETOPT,
1230   OPERATION_TYPE_SOCKETSTAT,
1231   OPERATION_TYPE_SOCKETSHUTDOWN,
1232   OPERATION_TYPE_SOCKETSELECT
1233 };
1234 
1235 /**
1236   Build the prefix name of a class of instruments in a category.
1237   For example, this function builds the string 'wait/sync/mutex/sql/' from
1238   a prefix 'wait/sync/mutex' and a category 'sql'.
1239   This prefix is used later to build each instrument name, such as
1240   'wait/sync/mutex/sql/LOCK_open'.
1241   @param prefix               Prefix for this class of instruments
1242   @param category             Category name
1243   @param [out] output         Buffer of length PFS_MAX_INFO_NAME_LENGTH.
1244   @param [out] output_length  Length of the resulting output string.
1245   @return 0 for success, non zero for errors
1246 */
build_prefix(const LEX_STRING * prefix,const char * category,char * output,int * output_length)1247 static int build_prefix(const LEX_STRING *prefix, const char *category,
1248                         char *output, int *output_length)
1249 {
1250   int len= strlen(category);
1251   char *out_ptr= output;
1252   int prefix_length= prefix->length;
1253 
1254   if (unlikely((prefix_length + len + 1) >=
1255                PFS_MAX_FULL_PREFIX_NAME_LENGTH))
1256   {
1257     pfs_print_error("build_prefix: prefix+category is too long <%s> <%s>\n",
1258                     prefix->str, category);
1259     return 1;
1260   }
1261 
1262   if (unlikely(strchr(category, '/') != NULL))
1263   {
1264     pfs_print_error("build_prefix: invalid category <%s>\n",
1265                     category);
1266     return 1;
1267   }
1268 
1269   /* output = prefix + category + '/' */
1270   memcpy(out_ptr, prefix->str, prefix_length);
1271   out_ptr+= prefix_length;
1272   memcpy(out_ptr, category, len);
1273   out_ptr+= len;
1274   *out_ptr= '/';
1275   out_ptr++;
1276   *output_length= out_ptr - output;
1277 
1278   return 0;
1279 }
1280 
1281 #define REGISTER_BODY_V1(KEY_T, PREFIX, REGISTER_FUNC)                \
1282   KEY_T key;                                                          \
1283   char formatted_name[PFS_MAX_INFO_NAME_LENGTH];                      \
1284   int prefix_length;                                                  \
1285   int len;                                                            \
1286   int full_length;                                                    \
1287                                                                       \
1288   DBUG_ASSERT(category != NULL);                                      \
1289   DBUG_ASSERT(info != NULL);                                          \
1290   if (unlikely(build_prefix(&PREFIX, category,                        \
1291                    formatted_name, &prefix_length)))                  \
1292   {                                                                   \
1293     for (; count>0; count--, info++)                                  \
1294       *(info->m_key)= 0;                                              \
1295     return ;                                                          \
1296   }                                                                   \
1297                                                                       \
1298   for (; count>0; count--, info++)                                    \
1299   {                                                                   \
1300     DBUG_ASSERT(info->m_key != NULL);                                 \
1301     DBUG_ASSERT(info->m_name != NULL);                                \
1302     len= strlen(info->m_name);                                        \
1303     full_length= prefix_length + len;                                 \
1304     if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))              \
1305     {                                                                 \
1306       memcpy(formatted_name + prefix_length, info->m_name, len);      \
1307       key= REGISTER_FUNC(formatted_name, full_length, info->m_flags); \
1308     }                                                                 \
1309     else                                                              \
1310     {                                                                 \
1311       pfs_print_error("REGISTER_BODY_V1: name too long <%s> <%s>\n",  \
1312                       category, info->m_name);                        \
1313       key= 0;                                                         \
1314     }                                                                 \
1315                                                                       \
1316     *(info->m_key)= key;                                              \
1317   }                                                                   \
1318   return;
1319 
1320 /* Use C linkage for the interface functions. */
1321 
1322 C_MODE_START
1323 
1324 /**
1325   Implementation of the mutex instrumentation interface.
1326   @sa PSI_v1::register_mutex.
1327 */
register_mutex_v1(const char * category,PSI_mutex_info_v1 * info,int count)1328 static void register_mutex_v1(const char *category,
1329                               PSI_mutex_info_v1 *info,
1330                               int count)
1331 {
1332   REGISTER_BODY_V1(PSI_mutex_key,
1333                    mutex_instrument_prefix,
1334                    register_mutex_class)
1335 }
1336 
1337 /**
1338   Implementation of the rwlock instrumentation interface.
1339   @sa PSI_v1::register_rwlock.
1340 */
register_rwlock_v1(const char * category,PSI_rwlock_info_v1 * info,int count)1341 static void register_rwlock_v1(const char *category,
1342                                PSI_rwlock_info_v1 *info,
1343                                int count)
1344 {
1345   REGISTER_BODY_V1(PSI_rwlock_key,
1346                    rwlock_instrument_prefix,
1347                    register_rwlock_class)
1348 }
1349 
1350 /**
1351   Implementation of the cond instrumentation interface.
1352   @sa PSI_v1::register_cond.
1353 */
register_cond_v1(const char * category,PSI_cond_info_v1 * info,int count)1354 static void register_cond_v1(const char *category,
1355                              PSI_cond_info_v1 *info,
1356                              int count)
1357 {
1358   REGISTER_BODY_V1(PSI_cond_key,
1359                    cond_instrument_prefix,
1360                    register_cond_class)
1361 }
1362 
1363 /**
1364   Implementation of the thread instrumentation interface.
1365   @sa PSI_v1::register_thread.
1366 */
register_thread_v1(const char * category,PSI_thread_info_v1 * info,int count)1367 static void register_thread_v1(const char *category,
1368                                PSI_thread_info_v1 *info,
1369                                int count)
1370 {
1371   REGISTER_BODY_V1(PSI_thread_key,
1372                    thread_instrument_prefix,
1373                    register_thread_class)
1374 }
1375 
1376 /**
1377   Implementation of the file instrumentation interface.
1378   @sa PSI_v1::register_file.
1379 */
register_file_v1(const char * category,PSI_file_info_v1 * info,int count)1380 static void register_file_v1(const char *category,
1381                              PSI_file_info_v1 *info,
1382                              int count)
1383 {
1384   REGISTER_BODY_V1(PSI_file_key,
1385                    file_instrument_prefix,
1386                    register_file_class)
1387 }
1388 
register_stage_v1(const char * category,PSI_stage_info_v1 ** info_array,int count)1389 static void register_stage_v1(const char *category,
1390                               PSI_stage_info_v1 **info_array,
1391                               int count)
1392 {
1393   char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1394   int prefix_length;
1395   int len;
1396   int full_length;
1397   PSI_stage_info_v1 *info;
1398 
1399   DBUG_ASSERT(category != NULL);
1400   DBUG_ASSERT(info_array != NULL);
1401   if (unlikely(build_prefix(&stage_instrument_prefix, category,
1402                formatted_name, &prefix_length)))
1403   {
1404     for (; count>0; count--, info_array++)
1405       (*info_array)->m_key= 0;
1406     return ;
1407   }
1408 
1409   for (; count>0; count--, info_array++)
1410   {
1411     info= *info_array;
1412     DBUG_ASSERT(info != NULL);
1413     DBUG_ASSERT(info->m_name != NULL);
1414     len= strlen(info->m_name);
1415     full_length= prefix_length + len;
1416     if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1417     {
1418       memcpy(formatted_name + prefix_length, info->m_name, len);
1419       info->m_key= register_stage_class(formatted_name,
1420                                         prefix_length,
1421                                         full_length,
1422                                         info->m_flags);
1423     }
1424     else
1425     {
1426       pfs_print_error("register_stage_v1: name too long <%s> <%s>\n",
1427                       category, info->m_name);
1428       info->m_key= 0;
1429     }
1430   }
1431   return;
1432 }
1433 
register_statement_v1(const char * category,PSI_statement_info_v1 * info,int count)1434 static void register_statement_v1(const char *category,
1435                                   PSI_statement_info_v1 *info,
1436                                   int count)
1437 {
1438   char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1439   int prefix_length;
1440   int len;
1441   int full_length;
1442 
1443   DBUG_ASSERT(category != NULL);
1444   DBUG_ASSERT(info != NULL);
1445   if (unlikely(build_prefix(&statement_instrument_prefix,
1446                             category, formatted_name, &prefix_length)))
1447   {
1448     for (; count>0; count--, info++)
1449       info->m_key= 0;
1450     return ;
1451   }
1452 
1453   for (; count>0; count--, info++)
1454   {
1455     DBUG_ASSERT(info->m_name != NULL);
1456     len= strlen(info->m_name);
1457     full_length= prefix_length + len;
1458     if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1459     {
1460       memcpy(formatted_name + prefix_length, info->m_name, len);
1461       info->m_key= register_statement_class(formatted_name, full_length, info->m_flags);
1462     }
1463     else
1464     {
1465       pfs_print_error("register_statement_v1: name too long <%s>\n",
1466                       info->m_name);
1467       info->m_key= 0;
1468     }
1469   }
1470   return;
1471 }
1472 
register_socket_v1(const char * category,PSI_socket_info_v1 * info,int count)1473 static void register_socket_v1(const char *category,
1474                              PSI_socket_info_v1 *info,
1475                              int count)
1476 {
1477   REGISTER_BODY_V1(PSI_socket_key,
1478                    socket_instrument_prefix,
1479                    register_socket_class)
1480 }
1481 
1482 #define INIT_BODY_V1(T, KEY, ID)                                            \
1483   PFS_##T##_class *klass;                                                   \
1484   PFS_##T *pfs;                                                             \
1485   klass= find_##T##_class(KEY);                                             \
1486   if (unlikely(klass == NULL))                                              \
1487     return NULL;                                                            \
1488   if (! klass->m_enabled)                                                   \
1489     return NULL;                                                            \
1490   pfs= create_##T(klass, ID);                                               \
1491   return reinterpret_cast<PSI_##T *> (pfs)
1492 
1493 /**
1494   Implementation of the mutex instrumentation interface.
1495   @sa PSI_v1::init_mutex.
1496 */
1497 static PSI_mutex*
init_mutex_v1(PSI_mutex_key key,const void * identity)1498 init_mutex_v1(PSI_mutex_key key, const void *identity)
1499 {
1500   INIT_BODY_V1(mutex, key, identity);
1501 }
1502 
1503 /**
1504   Implementation of the mutex instrumentation interface.
1505   @sa PSI_v1::destroy_mutex.
1506 */
destroy_mutex_v1(PSI_mutex * mutex)1507 static void destroy_mutex_v1(PSI_mutex* mutex)
1508 {
1509   PFS_mutex *pfs= reinterpret_cast<PFS_mutex*> (mutex);
1510 
1511   DBUG_ASSERT(pfs != NULL);
1512 
1513   destroy_mutex(pfs);
1514 }
1515 
1516 /**
1517   Implementation of the rwlock instrumentation interface.
1518   @sa PSI_v1::init_rwlock.
1519 */
1520 static PSI_rwlock*
init_rwlock_v1(PSI_rwlock_key key,const void * identity)1521 init_rwlock_v1(PSI_rwlock_key key, const void *identity)
1522 {
1523   INIT_BODY_V1(rwlock, key, identity);
1524 }
1525 
1526 /**
1527   Implementation of the rwlock instrumentation interface.
1528   @sa PSI_v1::destroy_rwlock.
1529 */
destroy_rwlock_v1(PSI_rwlock * rwlock)1530 static void destroy_rwlock_v1(PSI_rwlock* rwlock)
1531 {
1532   PFS_rwlock *pfs= reinterpret_cast<PFS_rwlock*> (rwlock);
1533 
1534   DBUG_ASSERT(pfs != NULL);
1535 
1536   destroy_rwlock(pfs);
1537 }
1538 
1539 /**
1540   Implementation of the cond instrumentation interface.
1541   @sa PSI_v1::init_cond.
1542 */
1543 static PSI_cond*
init_cond_v1(PSI_cond_key key,const void * identity)1544 init_cond_v1(PSI_cond_key key, const void *identity)
1545 {
1546   INIT_BODY_V1(cond, key, identity);
1547 }
1548 
1549 /**
1550   Implementation of the cond instrumentation interface.
1551   @sa PSI_v1::destroy_cond.
1552 */
destroy_cond_v1(PSI_cond * cond)1553 static void destroy_cond_v1(PSI_cond* cond)
1554 {
1555   PFS_cond *pfs= reinterpret_cast<PFS_cond*> (cond);
1556 
1557   DBUG_ASSERT(pfs != NULL);
1558 
1559   destroy_cond(pfs);
1560 }
1561 
1562 /**
1563   Implementation of the table instrumentation interface.
1564   @sa PSI_v1::get_table_share.
1565 */
1566 static PSI_table_share*
get_table_share_v1(my_bool temporary,TABLE_SHARE * share)1567 get_table_share_v1(my_bool temporary, TABLE_SHARE *share)
1568 {
1569   /* Ignore temporary tables and views. */
1570   if (temporary || share->is_view)
1571     return NULL;
1572   /* An instrumented thread is required, for LF_PINS. */
1573   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1574   if (unlikely(pfs_thread == NULL))
1575     return NULL;
1576   PFS_table_share* pfs_share;
1577   pfs_share= find_or_create_table_share(pfs_thread, temporary, share);
1578   return reinterpret_cast<PSI_table_share*> (pfs_share);
1579 }
1580 
1581 /**
1582   Implementation of the table instrumentation interface.
1583   @sa PSI_v1::release_table_share.
1584 */
release_table_share_v1(PSI_table_share * share)1585 static void release_table_share_v1(PSI_table_share* share)
1586 {
1587   PFS_table_share* pfs= reinterpret_cast<PFS_table_share*> (share);
1588 
1589   if (unlikely(pfs == NULL))
1590     return;
1591 
1592   release_table_share(pfs);
1593 }
1594 
1595 /**
1596   Implementation of the table instrumentation interface.
1597   @sa PSI_v1::drop_table_share.
1598 */
1599 static void
drop_table_share_v1(my_bool temporary,const char * schema_name,int schema_name_length,const char * table_name,int table_name_length)1600 drop_table_share_v1(my_bool temporary,
1601                     const char *schema_name, int schema_name_length,
1602                     const char *table_name, int table_name_length)
1603 {
1604   /* Ignore temporary tables. */
1605   if (temporary)
1606     return;
1607   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1608   if (unlikely(pfs_thread == NULL))
1609     return;
1610   /* TODO: temporary tables */
1611   drop_table_share(pfs_thread, temporary, schema_name, schema_name_length,
1612                    table_name, table_name_length);
1613 }
1614 
1615 /**
1616   Implementation of the table instrumentation interface.
1617   @sa PSI_v1::open_table.
1618 */
1619 static PSI_table*
open_table_v1(PSI_table_share * share,const void * identity)1620 open_table_v1(PSI_table_share *share, const void *identity)
1621 {
1622   PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1623 
1624   if (unlikely(pfs_table_share == NULL))
1625     return NULL;
1626 
1627   /* This object is not to be instrumented. */
1628   if (! pfs_table_share->m_enabled)
1629     return NULL;
1630 
1631   /* This object is instrumented, but all table instruments are disabled. */
1632   if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1633     return NULL;
1634 
1635   /*
1636     When the performance schema is off, do not instrument anything.
1637     Table handles have short life cycle, instrumentation will happen
1638     again if needed during the next open().
1639   */
1640   if (! flag_global_instrumentation)
1641     return NULL;
1642 
1643   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1644   if (unlikely(thread == NULL))
1645     return NULL;
1646 
1647   PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1648   return reinterpret_cast<PSI_table *> (pfs_table);
1649 }
1650 
1651 /**
1652   Implementation of the table instrumentation interface.
1653   @sa PSI_v1::unbind_table.
1654 */
unbind_table_v1(PSI_table * table)1655 static void unbind_table_v1(PSI_table *table)
1656 {
1657   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1658   if (likely(pfs != NULL))
1659   {
1660     pfs->m_thread_owner= NULL;
1661   }
1662 }
1663 
1664 /**
1665   Implementation of the table instrumentation interface.
1666   @sa PSI_v1::rebind_table.
1667 */
1668 static PSI_table *
rebind_table_v1(PSI_table_share * share,const void * identity,PSI_table * table)1669 rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
1670 {
1671   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1672   if (likely(pfs != NULL))
1673   {
1674     PFS_thread *thread;
1675     DBUG_ASSERT(pfs->m_thread_owner == NULL);
1676 
1677     /* The table handle was already instrumented, reuse it for this thread. */
1678     thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1679 
1680     if (unlikely(! pfs->m_share->m_enabled))
1681     {
1682       destroy_table(pfs);
1683       return NULL;
1684     }
1685 
1686     if (unlikely(! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled))
1687     {
1688       destroy_table(pfs);
1689       return NULL;
1690     }
1691 
1692     if (unlikely(! flag_global_instrumentation))
1693     {
1694       destroy_table(pfs);
1695       return NULL;
1696     }
1697 
1698     pfs->m_thread_owner= thread;
1699     return table;
1700   }
1701 
1702   /* See open_table_v1() */
1703 
1704   PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1705 
1706   if (unlikely(pfs_table_share == NULL))
1707     return NULL;
1708 
1709   if (! pfs_table_share->m_enabled)
1710     return NULL;
1711 
1712   if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1713     return NULL;
1714 
1715   if (! flag_global_instrumentation)
1716     return NULL;
1717 
1718   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1719   if (unlikely(thread == NULL))
1720     return NULL;
1721 
1722   PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1723   return reinterpret_cast<PSI_table *> (pfs_table);
1724 }
1725 
1726 /**
1727   Implementation of the table instrumentation interface.
1728   @sa PSI_v1::close_table.
1729 */
close_table_v1(PSI_table * table)1730 static void close_table_v1(PSI_table *table)
1731 {
1732   PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1733   if (unlikely(pfs == NULL))
1734     return;
1735   pfs->aggregate();
1736   destroy_table(pfs);
1737 }
1738 
1739 static PSI_socket*
init_socket_v1(PSI_socket_key key,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)1740 init_socket_v1(PSI_socket_key key, const my_socket *fd,
1741                const struct sockaddr *addr, socklen_t addr_len)
1742 {
1743   PFS_socket_class *klass;
1744   PFS_socket *pfs;
1745   klass= find_socket_class(key);
1746   if (unlikely(klass == NULL))
1747     return NULL;
1748   if (! klass->m_enabled)
1749     return NULL;
1750   pfs= create_socket(klass, fd, addr, addr_len);
1751   return reinterpret_cast<PSI_socket *> (pfs);
1752 }
1753 
destroy_socket_v1(PSI_socket * socket)1754 static void destroy_socket_v1(PSI_socket *socket)
1755 {
1756   PFS_socket *pfs= reinterpret_cast<PFS_socket*> (socket);
1757 
1758   DBUG_ASSERT(pfs != NULL);
1759 
1760   destroy_socket(pfs);
1761 }
1762 
1763 /**
1764   Implementation of the file instrumentation interface.
1765   @sa PSI_v1::create_file.
1766 */
create_file_v1(PSI_file_key key,const char * name,File file)1767 static void create_file_v1(PSI_file_key key, const char *name, File file)
1768 {
1769   if (! flag_global_instrumentation)
1770     return;
1771   int index= (int) file;
1772   if (unlikely(index < 0))
1773     return;
1774   PFS_file_class *klass= find_file_class(key);
1775   if (unlikely(klass == NULL))
1776     return;
1777   if (! klass->m_enabled)
1778     return;
1779 
1780   /* A thread is needed for LF_PINS */
1781   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1782   if (unlikely(pfs_thread == NULL))
1783     return;
1784 
1785   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
1786     return;
1787 
1788   /*
1789     We want this check after pfs_thread->m_enabled,
1790     to avoid reporting false loss.
1791   */
1792   if (unlikely(index >= file_handle_max))
1793   {
1794     file_handle_lost++;
1795     return;
1796   }
1797 
1798   uint len= strlen(name);
1799   PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len, true);
1800 
1801   file_handle_array[index]= pfs_file;
1802 }
1803 
1804 /**
1805   Arguments given from a parent to a child thread, packaged in one structure.
1806   This data is used when spawning a new instrumented thread.
1807   @sa pfs_spawn_thread.
1808 */
1809 struct PFS_spawn_thread_arg
1810 {
1811   ulonglong m_thread_internal_id;
1812   char m_username[USERNAME_LENGTH];
1813   uint m_username_length;
1814   char m_hostname[HOSTNAME_LENGTH];
1815   uint m_hostname_length;
1816 
1817   PSI_thread_key m_child_key;
1818   const void *m_child_identity;
1819   void *(*m_user_start_routine)(void*);
1820   void *m_user_arg;
1821 };
1822 
pfs_spawn_thread(void * arg)1823 void* pfs_spawn_thread(void *arg)
1824 {
1825   PFS_spawn_thread_arg *typed_arg= (PFS_spawn_thread_arg*) arg;
1826   void *user_arg;
1827   void *(*user_start_routine)(void*);
1828 
1829   PFS_thread *pfs;
1830 
1831   /* First, attach instrumentation to this newly created pthread. */
1832   PFS_thread_class *klass= find_thread_class(typed_arg->m_child_key);
1833   if (likely(klass != NULL))
1834   {
1835     pfs= create_thread(klass, typed_arg->m_child_identity, 0);
1836     if (likely(pfs != NULL))
1837     {
1838       clear_thread_account(pfs);
1839 
1840       pfs->m_parent_thread_internal_id= typed_arg->m_thread_internal_id;
1841 
1842       memcpy(pfs->m_username, typed_arg->m_username, sizeof(pfs->m_username));
1843       pfs->m_username_length= typed_arg->m_username_length;
1844 
1845       memcpy(pfs->m_hostname, typed_arg->m_hostname, sizeof(pfs->m_hostname));
1846       pfs->m_hostname_length= typed_arg->m_hostname_length;
1847 
1848       set_thread_account(pfs);
1849     }
1850   }
1851   else
1852   {
1853     pfs= NULL;
1854   }
1855   my_pthread_setspecific_ptr(THR_PFS, pfs);
1856 
1857   /*
1858     Secondly, free the memory allocated in spawn_thread_v1().
1859     It is preferable to do this before invoking the user
1860     routine, to avoid memory leaks at shutdown, in case
1861     the server exits without waiting for this thread.
1862   */
1863   user_start_routine= typed_arg->m_user_start_routine;
1864   user_arg= typed_arg->m_user_arg;
1865   my_free(typed_arg);
1866 
1867   /* Then, execute the user code for this thread. */
1868   (*user_start_routine)(user_arg);
1869 
1870   return NULL;
1871 }
1872 
1873 /**
1874   Implementation of the thread instrumentation interface.
1875   @sa PSI_v1::spawn_thread.
1876 */
spawn_thread_v1(PSI_thread_key key,pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)1877 static int spawn_thread_v1(PSI_thread_key key,
1878                            pthread_t *thread, const pthread_attr_t *attr,
1879                            void *(*start_routine)(void*), void *arg)
1880 {
1881   PFS_spawn_thread_arg *psi_arg;
1882   PFS_thread *parent;
1883 
1884   /* psi_arg can not be global, and can not be a local variable. */
1885   psi_arg= (PFS_spawn_thread_arg*) my_malloc(sizeof(PFS_spawn_thread_arg),
1886                                              MYF(MY_WME));
1887   if (unlikely(psi_arg == NULL))
1888     return EAGAIN;
1889 
1890   psi_arg->m_child_key= key;
1891   psi_arg->m_child_identity= (arg ? arg : thread);
1892   psi_arg->m_user_start_routine= start_routine;
1893   psi_arg->m_user_arg= arg;
1894 
1895   parent= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1896   if (parent != NULL)
1897   {
1898     /*
1899       Make a copy of the parent attributes.
1900       This is required, because instrumentation for this thread (the parent)
1901       may be destroyed before the child thread instrumentation is created.
1902     */
1903     psi_arg->m_thread_internal_id= parent->m_thread_internal_id;
1904 
1905     memcpy(psi_arg->m_username, parent->m_username, sizeof(psi_arg->m_username));
1906     psi_arg->m_username_length= parent->m_username_length;
1907 
1908     memcpy(psi_arg->m_hostname, parent->m_hostname, sizeof(psi_arg->m_hostname));
1909     psi_arg->m_hostname_length= parent->m_hostname_length;
1910   }
1911   else
1912   {
1913     psi_arg->m_thread_internal_id= 0;
1914     psi_arg->m_username_length= 0;
1915     psi_arg->m_hostname_length= 0;
1916   }
1917 
1918   int result= pthread_create(thread, attr, pfs_spawn_thread, psi_arg);
1919   if (unlikely(result != 0))
1920     my_free(psi_arg);
1921   return result;
1922 }
1923 
1924 /**
1925   Implementation of the thread instrumentation interface.
1926   @sa PSI_v1::new_thread.
1927 */
1928 static PSI_thread*
new_thread_v1(PSI_thread_key key,const void * identity,ulonglong processlist_id)1929 new_thread_v1(PSI_thread_key key, const void *identity, ulonglong processlist_id)
1930 {
1931   PFS_thread *pfs;
1932 
1933   PFS_thread_class *klass= find_thread_class(key);
1934   if (likely(klass != NULL))
1935     pfs= create_thread(klass, identity, processlist_id);
1936   else
1937     pfs= NULL;
1938 
1939   return reinterpret_cast<PSI_thread*> (pfs);
1940 }
1941 
1942 /**
1943   Implementation of the thread instrumentation interface.
1944   @sa PSI_v1::set_thread_id.
1945 */
set_thread_id_v1(PSI_thread * thread,ulonglong processlist_id)1946 static void set_thread_id_v1(PSI_thread *thread, ulonglong processlist_id)
1947 {
1948   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
1949   if (unlikely(pfs == NULL))
1950     return;
1951   pfs->m_processlist_id= processlist_id;
1952 }
1953 
1954 /**
1955   Implementation of the thread instrumentation interface.
1956   @sa PSI_v1::get_thread_id.
1957 */
1958 static PSI_thread*
get_thread_v1(void)1959 get_thread_v1(void)
1960 {
1961   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1962   return reinterpret_cast<PSI_thread*> (pfs);
1963 }
1964 
1965 /**
1966   Implementation of the thread instrumentation interface.
1967   @sa PSI_v1::set_thread_user.
1968 */
set_thread_user_v1(const char * user,int user_len)1969 static void set_thread_user_v1(const char *user, int user_len)
1970 {
1971   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1972 
1973   DBUG_ASSERT((user != NULL) || (user_len == 0));
1974   DBUG_ASSERT(user_len >= 0);
1975   DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
1976 
1977   if (unlikely(pfs == NULL))
1978     return;
1979 
1980   aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
1981 
1982   pfs->m_session_lock.allocated_to_dirty();
1983 
1984   clear_thread_account(pfs);
1985 
1986   if (user_len > 0)
1987     memcpy(pfs->m_username, user, user_len);
1988   pfs->m_username_length= user_len;
1989 
1990   set_thread_account(pfs);
1991 
1992   bool enabled= true;
1993   if (flag_thread_instrumentation)
1994   {
1995     if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
1996     {
1997       /*
1998         TODO: performance improvement.
1999         Once performance_schema.USERS is exposed,
2000         we can use PFS_user::m_enabled instead of looking up
2001         SETUP_ACTORS every time.
2002       */
2003       lookup_setup_actor(pfs,
2004                          pfs->m_username, pfs->m_username_length,
2005                          pfs->m_hostname, pfs->m_hostname_length,
2006                          &enabled);
2007     }
2008   }
2009 
2010   pfs->m_enabled= enabled;
2011 
2012   pfs->m_session_lock.dirty_to_allocated();
2013 }
2014 
2015 /**
2016   Implementation of the thread instrumentation interface.
2017   @sa PSI_v1::set_thread_account.
2018 */
set_thread_account_v1(const char * user,int user_len,const char * host,int host_len)2019 static void set_thread_account_v1(const char *user, int user_len,
2020                                     const char *host, int host_len)
2021 {
2022   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2023 
2024   DBUG_ASSERT((user != NULL) || (user_len == 0));
2025   DBUG_ASSERT(user_len >= 0);
2026   DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
2027   DBUG_ASSERT((host != NULL) || (host_len == 0));
2028   DBUG_ASSERT(host_len >= 0);
2029 
2030   host_len= min<size_t>(host_len, sizeof(pfs->m_hostname));
2031 
2032   if (unlikely(pfs == NULL))
2033     return;
2034 
2035   pfs->m_session_lock.allocated_to_dirty();
2036 
2037   clear_thread_account(pfs);
2038 
2039   if (host_len > 0)
2040     memcpy(pfs->m_hostname, host, host_len);
2041   pfs->m_hostname_length= host_len;
2042 
2043   if (user_len > 0)
2044     memcpy(pfs->m_username, user, user_len);
2045   pfs->m_username_length= user_len;
2046 
2047   set_thread_account(pfs);
2048 
2049   bool enabled= true;
2050   if (flag_thread_instrumentation)
2051   {
2052     if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
2053     {
2054       /*
2055         TODO: performance improvement.
2056         Once performance_schema.USERS is exposed,
2057         we can use PFS_user::m_enabled instead of looking up
2058         SETUP_ACTORS every time.
2059       */
2060       lookup_setup_actor(pfs,
2061                          pfs->m_username, pfs->m_username_length,
2062                          pfs->m_hostname, pfs->m_hostname_length,
2063                          &enabled);
2064     }
2065   }
2066   pfs->m_enabled= enabled;
2067 
2068   pfs->m_session_lock.dirty_to_allocated();
2069 }
2070 
2071 /**
2072   Implementation of the thread instrumentation interface.
2073   @sa PSI_v1::set_thread_db.
2074 */
set_thread_db_v1(const char * db,int db_len)2075 static void set_thread_db_v1(const char* db, int db_len)
2076 {
2077   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2078 
2079   DBUG_ASSERT((db != NULL) || (db_len == 0));
2080   DBUG_ASSERT(db_len >= 0);
2081   DBUG_ASSERT((uint) db_len <= sizeof(pfs->m_dbname));
2082 
2083   if (likely(pfs != NULL))
2084   {
2085     pfs->m_stmt_lock.allocated_to_dirty();
2086     if (db_len > 0)
2087       memcpy(pfs->m_dbname, db, db_len);
2088     pfs->m_dbname_length= db_len;
2089     pfs->m_stmt_lock.dirty_to_allocated();
2090   }
2091 }
2092 
2093 /**
2094   Implementation of the thread instrumentation interface.
2095   @sa PSI_v1::set_thread_command.
2096 */
set_thread_command_v1(int command)2097 static void set_thread_command_v1(int command)
2098 {
2099   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2100 
2101   DBUG_ASSERT(command >= 0);
2102   DBUG_ASSERT(command <= (int) COM_END);
2103 
2104   if (likely(pfs != NULL))
2105   {
2106     pfs->m_command= command;
2107   }
2108 }
2109 
2110 /**
2111   Implementation of the thread instrumentation interface.
2112   @sa PSI_v1::set_thread_start_time.
2113 */
set_thread_start_time_v1(time_t start_time)2114 static void set_thread_start_time_v1(time_t start_time)
2115 {
2116   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2117 
2118   if (likely(pfs != NULL))
2119   {
2120     pfs->m_start_time= start_time;
2121   }
2122 }
2123 
2124 /**
2125   Implementation of the thread instrumentation interface.
2126   @sa PSI_v1::set_thread_state.
2127 */
set_thread_state_v1(const char * state)2128 static void set_thread_state_v1(const char* state)
2129 {
2130   /* DEPRECATED. */
2131 }
2132 
2133 /**
2134   Implementation of the thread instrumentation interface.
2135   @sa PSI_v1::set_thread_info.
2136 */
set_thread_info_v1(const char * info,uint info_len)2137 static void set_thread_info_v1(const char* info, uint info_len)
2138 {
2139   PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2140 
2141   DBUG_ASSERT((info != NULL) || (info_len == 0));
2142 
2143   if (likely(pfs != NULL))
2144   {
2145     if ((info != NULL) && (info_len > 0))
2146     {
2147       if (info_len > sizeof(pfs->m_processlist_info))
2148         info_len= sizeof(pfs->m_processlist_info);
2149 
2150       pfs->m_stmt_lock.allocated_to_dirty();
2151       memcpy(pfs->m_processlist_info, info, info_len);
2152       pfs->m_processlist_info_length= info_len;
2153       pfs->m_stmt_lock.dirty_to_allocated();
2154     }
2155     else
2156     {
2157       pfs->m_stmt_lock.allocated_to_dirty();
2158       pfs->m_processlist_info_length= 0;
2159       pfs->m_stmt_lock.dirty_to_allocated();
2160     }
2161   }
2162 }
2163 
2164 /**
2165   Implementation of the thread instrumentation interface.
2166   @sa PSI_v1::set_thread.
2167 */
set_thread_v1(PSI_thread * thread)2168 static void set_thread_v1(PSI_thread* thread)
2169 {
2170   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2171   my_pthread_setspecific_ptr(THR_PFS, pfs);
2172 }
2173 
2174 /**
2175   Implementation of the thread instrumentation interface.
2176   @sa PSI_v1::delete_current_thread.
2177 */
delete_current_thread_v1(void)2178 static void delete_current_thread_v1(void)
2179 {
2180   PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2181   if (thread != NULL)
2182   {
2183     aggregate_thread(thread, thread->m_account, thread->m_user, thread->m_host);
2184     my_pthread_setspecific_ptr(THR_PFS, NULL);
2185     destroy_thread(thread);
2186   }
2187 }
2188 
2189 /**
2190   Implementation of the thread instrumentation interface.
2191   @sa PSI_v1::delete_thread.
2192 */
delete_thread_v1(PSI_thread * thread)2193 static void delete_thread_v1(PSI_thread *thread)
2194 {
2195   PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2196 
2197   if (pfs != NULL)
2198   {
2199     aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
2200     destroy_thread(pfs);
2201   }
2202 }
2203 
2204 /**
2205   Implementation of the mutex instrumentation interface.
2206   @sa PSI_v1::start_mutex_wait.
2207 */
2208 static PSI_mutex_locker*
start_mutex_wait_v1(PSI_mutex_locker_state * state,PSI_mutex * mutex,PSI_mutex_operation op,const char * src_file,uint src_line)2209 start_mutex_wait_v1(PSI_mutex_locker_state *state,
2210                     PSI_mutex *mutex, PSI_mutex_operation op,
2211                     const char *src_file, uint src_line)
2212 {
2213   PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
2214   DBUG_ASSERT((int) op >= 0);
2215   DBUG_ASSERT((uint) op < array_elements(mutex_operation_map));
2216   DBUG_ASSERT(state != NULL);
2217 
2218   DBUG_ASSERT(pfs_mutex != NULL);
2219   DBUG_ASSERT(pfs_mutex->m_class != NULL);
2220 
2221   if (! pfs_mutex->m_enabled)
2222     return NULL;
2223 
2224   register uint flags;
2225   ulonglong timer_start= 0;
2226 
2227   if (flag_thread_instrumentation)
2228   {
2229     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2230     if (unlikely(pfs_thread == NULL))
2231       return NULL;
2232     if (! pfs_thread->m_enabled)
2233       return NULL;
2234     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2235     flags= STATE_FLAG_THREAD;
2236 
2237     if (pfs_mutex->m_timed)
2238     {
2239       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2240       state->m_timer_start= timer_start;
2241       flags|= STATE_FLAG_TIMED;
2242     }
2243 
2244     if (flag_events_waits_current)
2245     {
2246       if (unlikely(pfs_thread->m_events_waits_current >=
2247                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2248       {
2249         locker_lost++;
2250         return NULL;
2251       }
2252       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2253       state->m_wait= wait;
2254       flags|= STATE_FLAG_EVENT;
2255 
2256       PFS_events_waits *parent_event= wait - 1;
2257       wait->m_event_type= EVENT_TYPE_WAIT;
2258       wait->m_nesting_event_id= parent_event->m_event_id;
2259       wait->m_nesting_event_type= parent_event->m_event_type;
2260 
2261       wait->m_thread= pfs_thread;
2262       wait->m_class= pfs_mutex->m_class;
2263       wait->m_timer_start= timer_start;
2264       wait->m_timer_end= 0;
2265       wait->m_object_instance_addr= pfs_mutex->m_identity;
2266       wait->m_event_id= pfs_thread->m_event_id++;
2267       wait->m_end_event_id= 0;
2268       wait->m_operation= mutex_operation_map[(int) op];
2269       wait->m_source_file= src_file;
2270       wait->m_source_line= src_line;
2271       wait->m_wait_class= WAIT_CLASS_MUTEX;
2272 
2273       pfs_thread->m_events_waits_current++;
2274     }
2275   }
2276   else
2277   {
2278     if (pfs_mutex->m_timed)
2279     {
2280       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2281       state->m_timer_start= timer_start;
2282       flags= STATE_FLAG_TIMED;
2283       state->m_thread= NULL;
2284     }
2285     else
2286     {
2287       /*
2288         Complete shortcut.
2289       */
2290       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2291       pfs_mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
2292       return NULL;
2293     }
2294   }
2295 
2296   state->m_flags= flags;
2297   state->m_mutex= mutex;
2298   return reinterpret_cast<PSI_mutex_locker*> (state);
2299 }
2300 
2301 /**
2302   Implementation of the rwlock instrumentation interface.
2303   @sa PSI_v1::start_rwlock_rdwait
2304   @sa PSI_v1::start_rwlock_wrwait
2305 */
2306 static PSI_rwlock_locker*
start_rwlock_wait_v1(PSI_rwlock_locker_state * state,PSI_rwlock * rwlock,PSI_rwlock_operation op,const char * src_file,uint src_line)2307 start_rwlock_wait_v1(PSI_rwlock_locker_state *state,
2308                      PSI_rwlock *rwlock,
2309                      PSI_rwlock_operation op,
2310                      const char *src_file, uint src_line)
2311 {
2312   PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
2313   DBUG_ASSERT(static_cast<int> (op) >= 0);
2314   DBUG_ASSERT(static_cast<uint> (op) < array_elements(rwlock_operation_map));
2315   DBUG_ASSERT(state != NULL);
2316   DBUG_ASSERT(pfs_rwlock != NULL);
2317   DBUG_ASSERT(pfs_rwlock->m_class != NULL);
2318 
2319   if (! pfs_rwlock->m_enabled)
2320     return NULL;
2321 
2322   register uint flags;
2323   ulonglong timer_start= 0;
2324 
2325   if (flag_thread_instrumentation)
2326   {
2327     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2328     if (unlikely(pfs_thread == NULL))
2329       return NULL;
2330     if (! pfs_thread->m_enabled)
2331       return NULL;
2332     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2333     flags= STATE_FLAG_THREAD;
2334 
2335     if (pfs_rwlock->m_timed)
2336     {
2337       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2338       state->m_timer_start= timer_start;
2339       flags|= STATE_FLAG_TIMED;
2340     }
2341 
2342     if (flag_events_waits_current)
2343     {
2344       if (unlikely(pfs_thread->m_events_waits_current >=
2345                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2346       {
2347         locker_lost++;
2348         return NULL;
2349       }
2350       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2351       state->m_wait= wait;
2352       flags|= STATE_FLAG_EVENT;
2353 
2354       PFS_events_waits *parent_event= wait - 1;
2355       wait->m_event_type= EVENT_TYPE_WAIT;
2356       wait->m_nesting_event_id= parent_event->m_event_id;
2357       wait->m_nesting_event_type= parent_event->m_event_type;
2358 
2359       wait->m_thread= pfs_thread;
2360       wait->m_class= pfs_rwlock->m_class;
2361       wait->m_timer_start= timer_start;
2362       wait->m_timer_end= 0;
2363       wait->m_object_instance_addr= pfs_rwlock->m_identity;
2364       wait->m_event_id= pfs_thread->m_event_id++;
2365       wait->m_end_event_id= 0;
2366       wait->m_operation= rwlock_operation_map[static_cast<int> (op)];
2367       wait->m_source_file= src_file;
2368       wait->m_source_line= src_line;
2369       wait->m_wait_class= WAIT_CLASS_RWLOCK;
2370 
2371       pfs_thread->m_events_waits_current++;
2372     }
2373   }
2374   else
2375   {
2376     if (pfs_rwlock->m_timed)
2377     {
2378       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2379       state->m_timer_start= timer_start;
2380       flags= STATE_FLAG_TIMED;
2381       state->m_thread= NULL;
2382     }
2383     else
2384     {
2385       /*
2386         Complete shortcut.
2387       */
2388       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2389       pfs_rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
2390       return NULL;
2391     }
2392   }
2393 
2394   state->m_flags= flags;
2395   state->m_rwlock= rwlock;
2396   return reinterpret_cast<PSI_rwlock_locker*> (state);
2397 }
2398 
2399 /**
2400   Implementation of the cond instrumentation interface.
2401   @sa PSI_v1::start_cond_wait.
2402 */
2403 static PSI_cond_locker*
start_cond_wait_v1(PSI_cond_locker_state * state,PSI_cond * cond,PSI_mutex * mutex,PSI_cond_operation op,const char * src_file,uint src_line)2404 start_cond_wait_v1(PSI_cond_locker_state *state,
2405                    PSI_cond *cond, PSI_mutex *mutex,
2406                    PSI_cond_operation op,
2407                    const char *src_file, uint src_line)
2408 {
2409   /*
2410     Note about the unused PSI_mutex *mutex parameter:
2411     In the pthread library, a call to pthread_cond_wait()
2412     causes an unlock() + lock() on the mutex associated with the condition.
2413     This mutex operation is not instrumented, so the mutex will still
2414     appear as locked when a thread is waiting on a condition.
2415     This has no impact now, as unlock_mutex() is not recording events.
2416     When unlock_mutex() is implemented by later work logs,
2417     this parameter here will be used to adjust the mutex state,
2418     in start_cond_wait_v1() and end_cond_wait_v1().
2419   */
2420   PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
2421   DBUG_ASSERT(static_cast<int> (op) >= 0);
2422   DBUG_ASSERT(static_cast<uint> (op) < array_elements(cond_operation_map));
2423   DBUG_ASSERT(state != NULL);
2424   DBUG_ASSERT(pfs_cond != NULL);
2425   DBUG_ASSERT(pfs_cond->m_class != NULL);
2426 
2427   if (! pfs_cond->m_enabled)
2428     return NULL;
2429 
2430   register uint flags;
2431   ulonglong timer_start= 0;
2432 
2433   if (flag_thread_instrumentation)
2434   {
2435     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2436     if (unlikely(pfs_thread == NULL))
2437       return NULL;
2438     if (! pfs_thread->m_enabled)
2439       return NULL;
2440     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2441     flags= STATE_FLAG_THREAD;
2442 
2443     if (pfs_cond->m_timed)
2444     {
2445       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2446       state->m_timer_start= timer_start;
2447       flags|= STATE_FLAG_TIMED;
2448     }
2449 
2450     if (flag_events_waits_current)
2451     {
2452       if (unlikely(pfs_thread->m_events_waits_current >=
2453                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2454       {
2455         locker_lost++;
2456         return NULL;
2457       }
2458       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2459       state->m_wait= wait;
2460       flags|= STATE_FLAG_EVENT;
2461 
2462       PFS_events_waits *parent_event= wait - 1;
2463       wait->m_event_type= EVENT_TYPE_WAIT;
2464       wait->m_nesting_event_id= parent_event->m_event_id;
2465       wait->m_nesting_event_type= parent_event->m_event_type;
2466 
2467       wait->m_thread= pfs_thread;
2468       wait->m_class= pfs_cond->m_class;
2469       wait->m_timer_start= timer_start;
2470       wait->m_timer_end= 0;
2471       wait->m_object_instance_addr= pfs_cond->m_identity;
2472       wait->m_event_id= pfs_thread->m_event_id++;
2473       wait->m_end_event_id= 0;
2474       wait->m_operation= cond_operation_map[static_cast<int> (op)];
2475       wait->m_source_file= src_file;
2476       wait->m_source_line= src_line;
2477       wait->m_wait_class= WAIT_CLASS_COND;
2478 
2479       pfs_thread->m_events_waits_current++;
2480     }
2481   }
2482   else
2483   {
2484     if (pfs_cond->m_timed)
2485     {
2486       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2487       state->m_timer_start= timer_start;
2488       flags= STATE_FLAG_TIMED;
2489     }
2490     else
2491     {
2492       /*
2493         Complete shortcut.
2494       */
2495       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2496       pfs_cond->m_cond_stat.m_wait_stat.aggregate_counted();
2497       return NULL;
2498     }
2499   }
2500 
2501   state->m_flags= flags;
2502   state->m_cond= cond;
2503   state->m_mutex= mutex;
2504   return reinterpret_cast<PSI_cond_locker*> (state);
2505 }
2506 
lock_flags_to_lock_type(uint flags)2507 static inline PFS_TL_LOCK_TYPE lock_flags_to_lock_type(uint flags)
2508 {
2509   enum thr_lock_type value= static_cast<enum thr_lock_type> (flags);
2510 
2511   switch (value)
2512   {
2513     case TL_READ:
2514       return PFS_TL_READ;
2515     case TL_READ_WITH_SHARED_LOCKS:
2516       return PFS_TL_READ_WITH_SHARED_LOCKS;
2517     case TL_READ_HIGH_PRIORITY:
2518       return PFS_TL_READ_HIGH_PRIORITY;
2519     case TL_READ_NO_INSERT:
2520       return PFS_TL_READ_NO_INSERT;
2521     case TL_WRITE_ALLOW_WRITE:
2522       return PFS_TL_WRITE_ALLOW_WRITE;
2523     case TL_WRITE_CONCURRENT_INSERT:
2524       return PFS_TL_WRITE_CONCURRENT_INSERT;
2525     case TL_WRITE_DELAYED:
2526       return PFS_TL_WRITE_DELAYED;
2527     case TL_WRITE_LOW_PRIORITY:
2528       return PFS_TL_WRITE_LOW_PRIORITY;
2529     case TL_WRITE:
2530       return PFS_TL_WRITE;
2531 
2532     case TL_WRITE_ONLY:
2533     case TL_IGNORE:
2534     case TL_UNLOCK:
2535     case TL_READ_DEFAULT:
2536     case TL_WRITE_DEFAULT:
2537     default:
2538       DBUG_ASSERT(false);
2539   }
2540 
2541   /* Dead code */
2542   return PFS_TL_READ;
2543 }
2544 
external_lock_flags_to_lock_type(uint flags)2545 static inline PFS_TL_LOCK_TYPE external_lock_flags_to_lock_type(uint flags)
2546 {
2547   DBUG_ASSERT(flags == F_RDLCK || flags == F_WRLCK);
2548   return (flags == F_RDLCK ? PFS_TL_READ_EXTERNAL : PFS_TL_WRITE_EXTERNAL);
2549 }
2550 
2551 /**
2552   Implementation of the table instrumentation interface.
2553   @sa PSI_v1::start_table_io_wait_v1
2554 */
2555 static PSI_table_locker*
start_table_io_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_io_operation op,uint index,const char * src_file,uint src_line)2556 start_table_io_wait_v1(PSI_table_locker_state *state,
2557                        PSI_table *table,
2558                        PSI_table_io_operation op,
2559                        uint index,
2560                        const char *src_file, uint src_line)
2561 {
2562   DBUG_ASSERT(static_cast<int> (op) >= 0);
2563   DBUG_ASSERT(static_cast<uint> (op) < array_elements(table_io_operation_map));
2564   DBUG_ASSERT(state != NULL);
2565   PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2566   DBUG_ASSERT(pfs_table != NULL);
2567   DBUG_ASSERT(pfs_table->m_share != NULL);
2568 
2569   if (! pfs_table->m_io_enabled)
2570     return NULL;
2571 
2572   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2573 
2574   register uint flags;
2575   ulonglong timer_start= 0;
2576 
2577   if (flag_thread_instrumentation)
2578   {
2579     if (pfs_thread == NULL)
2580       return NULL;
2581     if (! pfs_thread->m_enabled)
2582       return NULL;
2583     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2584     flags= STATE_FLAG_THREAD;
2585 
2586     if (pfs_table->m_io_timed)
2587     {
2588       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2589       state->m_timer_start= timer_start;
2590       flags|= STATE_FLAG_TIMED;
2591     }
2592 
2593     if (flag_events_waits_current)
2594     {
2595       if (unlikely(pfs_thread->m_events_waits_current >=
2596                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2597       {
2598         locker_lost++;
2599         return NULL;
2600       }
2601       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2602       state->m_wait= wait;
2603       flags|= STATE_FLAG_EVENT;
2604 
2605       PFS_events_waits *parent_event= wait - 1;
2606       wait->m_event_type= EVENT_TYPE_WAIT;
2607       wait->m_nesting_event_id= parent_event->m_event_id;
2608       wait->m_nesting_event_type= parent_event->m_event_type;
2609 
2610       PFS_table_share *share= pfs_table->m_share;
2611       wait->m_thread= pfs_thread;
2612       wait->m_class= &global_table_io_class;
2613       wait->m_timer_start= timer_start;
2614       wait->m_timer_end= 0;
2615       wait->m_object_instance_addr= pfs_table->m_identity;
2616       wait->m_event_id= pfs_thread->m_event_id++;
2617       wait->m_end_event_id= 0;
2618       wait->m_operation= table_io_operation_map[static_cast<int> (op)];
2619       wait->m_flags= 0;
2620       wait->m_object_type= share->get_object_type();
2621       wait->m_weak_table_share= share;
2622       wait->m_weak_version= share->get_version();
2623       wait->m_index= index;
2624       wait->m_source_file= src_file;
2625       wait->m_source_line= src_line;
2626       wait->m_wait_class= WAIT_CLASS_TABLE;
2627 
2628       pfs_thread->m_events_waits_current++;
2629     }
2630   }
2631   else
2632   {
2633     if (pfs_table->m_io_timed)
2634     {
2635       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2636       state->m_timer_start= timer_start;
2637       flags= STATE_FLAG_TIMED;
2638     }
2639     else
2640     {
2641       /* TODO: consider a shortcut here */
2642       flags= 0;
2643     }
2644   }
2645 
2646   state->m_flags= flags;
2647   state->m_table= table;
2648   state->m_io_operation= op;
2649   state->m_index= index;
2650   return reinterpret_cast<PSI_table_locker*> (state);
2651 }
2652 
2653 /**
2654   Implementation of the table instrumentation interface.
2655   @sa PSI_v1::start_table_lock_wait.
2656 */
2657 static PSI_table_locker*
start_table_lock_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_lock_operation op,ulong op_flags,const char * src_file,uint src_line)2658 start_table_lock_wait_v1(PSI_table_locker_state *state,
2659                          PSI_table *table,
2660                          PSI_table_lock_operation op,
2661                          ulong op_flags,
2662                          const char *src_file, uint src_line)
2663 {
2664   DBUG_ASSERT(state != NULL);
2665   DBUG_ASSERT((op == PSI_TABLE_LOCK) || (op == PSI_TABLE_EXTERNAL_LOCK));
2666 
2667   PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2668 
2669   DBUG_ASSERT(pfs_table != NULL);
2670   DBUG_ASSERT(pfs_table->m_share != NULL);
2671 
2672   if (! pfs_table->m_lock_enabled)
2673     return NULL;
2674 
2675   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2676 
2677   PFS_TL_LOCK_TYPE lock_type;
2678 
2679   switch (op)
2680   {
2681     case PSI_TABLE_LOCK:
2682       lock_type= lock_flags_to_lock_type(op_flags);
2683       break;
2684     case PSI_TABLE_EXTERNAL_LOCK:
2685       /*
2686         See the handler::external_lock() API design,
2687         there is no handler::external_unlock().
2688       */
2689       if (op_flags == F_UNLCK)
2690         return NULL;
2691       lock_type= external_lock_flags_to_lock_type(op_flags);
2692       break;
2693     default:
2694       lock_type= PFS_TL_READ;
2695       DBUG_ASSERT(false);
2696   }
2697 
2698   DBUG_ASSERT((uint) lock_type < array_elements(table_lock_operation_map));
2699 
2700   register uint flags;
2701   ulonglong timer_start= 0;
2702 
2703   if (flag_thread_instrumentation)
2704   {
2705     if (pfs_thread == NULL)
2706       return NULL;
2707     if (! pfs_thread->m_enabled)
2708       return NULL;
2709     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2710     flags= STATE_FLAG_THREAD;
2711 
2712     if (pfs_table->m_lock_timed)
2713     {
2714       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2715       state->m_timer_start= timer_start;
2716       flags|= STATE_FLAG_TIMED;
2717     }
2718 
2719     if (flag_events_waits_current)
2720     {
2721       if (unlikely(pfs_thread->m_events_waits_current >=
2722                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2723       {
2724         locker_lost++;
2725         return NULL;
2726       }
2727       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2728       state->m_wait= wait;
2729       flags|= STATE_FLAG_EVENT;
2730 
2731       PFS_events_waits *parent_event= wait - 1;
2732       wait->m_event_type= EVENT_TYPE_WAIT;
2733       wait->m_nesting_event_id= parent_event->m_event_id;
2734       wait->m_nesting_event_type= parent_event->m_event_type;
2735 
2736       PFS_table_share *share= pfs_table->m_share;
2737       wait->m_thread= pfs_thread;
2738       wait->m_class= &global_table_lock_class;
2739       wait->m_timer_start= timer_start;
2740       wait->m_timer_end= 0;
2741       wait->m_object_instance_addr= pfs_table->m_identity;
2742       wait->m_event_id= pfs_thread->m_event_id++;
2743       wait->m_end_event_id= 0;
2744       wait->m_operation= table_lock_operation_map[lock_type];
2745       wait->m_flags= 0;
2746       wait->m_object_type= share->get_object_type();
2747       wait->m_weak_table_share= share;
2748       wait->m_weak_version= share->get_version();
2749       wait->m_index= 0;
2750       wait->m_source_file= src_file;
2751       wait->m_source_line= src_line;
2752       wait->m_wait_class= WAIT_CLASS_TABLE;
2753 
2754       pfs_thread->m_events_waits_current++;
2755     }
2756   }
2757   else
2758   {
2759     if (pfs_table->m_lock_timed)
2760     {
2761       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2762       state->m_timer_start= timer_start;
2763       flags= STATE_FLAG_TIMED;
2764     }
2765     else
2766     {
2767       /* TODO: consider a shortcut here */
2768       flags= 0;
2769     }
2770   }
2771 
2772   state->m_flags= flags;
2773   state->m_table= table;
2774   state->m_index= lock_type;
2775   return reinterpret_cast<PSI_table_locker*> (state);
2776 }
2777 
2778 /**
2779   Implementation of the file instrumentation interface.
2780   @sa PSI_v1::get_thread_file_name_locker.
2781 */
2782 static PSI_file_locker*
get_thread_file_name_locker_v1(PSI_file_locker_state * state,PSI_file_key key,PSI_file_operation op,const char * name,const void * identity)2783 get_thread_file_name_locker_v1(PSI_file_locker_state *state,
2784                                PSI_file_key key,
2785                                PSI_file_operation op,
2786                                const char *name, const void *identity)
2787 {
2788   DBUG_ASSERT(static_cast<int> (op) >= 0);
2789   DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2790   DBUG_ASSERT(state != NULL);
2791 
2792   if (! flag_global_instrumentation)
2793     return NULL;
2794   PFS_file_class *klass= find_file_class(key);
2795   if (unlikely(klass == NULL))
2796     return NULL;
2797   if (! klass->m_enabled)
2798     return NULL;
2799 
2800   /* Needed for the LF_HASH */
2801   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2802   if (unlikely(pfs_thread == NULL))
2803     return NULL;
2804 
2805   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
2806     return NULL;
2807 
2808   register uint flags;
2809 
2810   state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2811   flags= STATE_FLAG_THREAD;
2812 
2813   if (klass->m_timed)
2814     flags|= STATE_FLAG_TIMED;
2815 
2816   if (flag_events_waits_current)
2817   {
2818     if (unlikely(pfs_thread->m_events_waits_current >=
2819                  & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2820     {
2821       locker_lost++;
2822       return NULL;
2823     }
2824     PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2825     state->m_wait= wait;
2826     flags|= STATE_FLAG_EVENT;
2827 
2828     PFS_events_waits *parent_event= wait - 1;
2829     wait->m_event_type= EVENT_TYPE_WAIT;
2830     wait->m_nesting_event_id= parent_event->m_event_id;
2831     wait->m_nesting_event_type= parent_event->m_event_type;
2832 
2833     wait->m_thread= pfs_thread;
2834     wait->m_class= klass;
2835     wait->m_timer_start= 0;
2836     wait->m_timer_end= 0;
2837     wait->m_object_instance_addr= NULL;
2838     wait->m_weak_file= NULL;
2839     wait->m_weak_version= 0;
2840     wait->m_event_id= pfs_thread->m_event_id++;
2841     wait->m_end_event_id= 0;
2842     wait->m_operation= file_operation_map[static_cast<int> (op)];
2843     wait->m_wait_class= WAIT_CLASS_FILE;
2844 
2845     pfs_thread->m_events_waits_current++;
2846   }
2847 
2848   state->m_flags= flags;
2849   state->m_file= NULL;
2850   state->m_name= name;
2851   state->m_class= klass;
2852   state->m_operation= op;
2853   return reinterpret_cast<PSI_file_locker*> (state);
2854 }
2855 
2856 /**
2857   Implementation of the file instrumentation interface.
2858   @sa PSI_v1::get_thread_file_stream_locker.
2859 */
2860 static PSI_file_locker*
get_thread_file_stream_locker_v1(PSI_file_locker_state * state,PSI_file * file,PSI_file_operation op)2861 get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
2862                                  PSI_file *file, PSI_file_operation op)
2863 {
2864   PFS_file *pfs_file= reinterpret_cast<PFS_file*> (file);
2865   DBUG_ASSERT(static_cast<int> (op) >= 0);
2866   DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2867   DBUG_ASSERT(state != NULL);
2868 
2869   if (unlikely(pfs_file == NULL))
2870     return NULL;
2871   DBUG_ASSERT(pfs_file->m_class != NULL);
2872   PFS_file_class *klass= pfs_file->m_class;
2873 
2874   if (! pfs_file->m_enabled)
2875     return NULL;
2876 
2877   register uint flags;
2878 
2879   if (flag_thread_instrumentation)
2880   {
2881     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2882     if (unlikely(pfs_thread == NULL))
2883       return NULL;
2884     if (! pfs_thread->m_enabled)
2885       return NULL;
2886     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2887     flags= STATE_FLAG_THREAD;
2888 
2889     if (pfs_file->m_timed)
2890       flags|= STATE_FLAG_TIMED;
2891 
2892     if (flag_events_waits_current)
2893     {
2894       if (unlikely(pfs_thread->m_events_waits_current >=
2895                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2896       {
2897         locker_lost++;
2898         return NULL;
2899       }
2900       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2901       state->m_wait= wait;
2902       flags|= STATE_FLAG_EVENT;
2903 
2904       PFS_events_waits *parent_event= wait - 1;
2905       wait->m_event_type= EVENT_TYPE_WAIT;
2906       wait->m_nesting_event_id= parent_event->m_event_id;
2907       wait->m_nesting_event_type= parent_event->m_event_type;
2908 
2909       wait->m_thread= pfs_thread;
2910       wait->m_class= klass;
2911       wait->m_timer_start= 0;
2912       wait->m_timer_end= 0;
2913       wait->m_object_instance_addr= pfs_file;
2914       wait->m_weak_file= pfs_file;
2915       wait->m_weak_version= pfs_file->get_version();
2916       wait->m_event_id= pfs_thread->m_event_id++;
2917       wait->m_end_event_id= 0;
2918       wait->m_operation= file_operation_map[static_cast<int> (op)];
2919       wait->m_wait_class= WAIT_CLASS_FILE;
2920 
2921       pfs_thread->m_events_waits_current++;
2922     }
2923   }
2924   else
2925   {
2926     state->m_thread= NULL;
2927     if (pfs_file->m_timed)
2928     {
2929       flags= STATE_FLAG_TIMED;
2930     }
2931     else
2932     {
2933       /* TODO: consider a shortcut. */
2934       flags= 0;
2935     }
2936   }
2937 
2938   state->m_flags= flags;
2939   state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
2940   state->m_operation= op;
2941   state->m_name= NULL;
2942   state->m_class= klass;
2943   return reinterpret_cast<PSI_file_locker*> (state);
2944 }
2945 
2946 /**
2947   Implementation of the file instrumentation interface.
2948   @sa PSI_v1::get_thread_file_descriptor_locker.
2949 */
2950 static PSI_file_locker*
get_thread_file_descriptor_locker_v1(PSI_file_locker_state * state,File file,PSI_file_operation op)2951 get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
2952                                      File file, PSI_file_operation op)
2953 {
2954   int index= static_cast<int> (file);
2955   DBUG_ASSERT(static_cast<int> (op) >= 0);
2956   DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2957   DBUG_ASSERT(state != NULL);
2958 
2959   if (unlikely((index < 0) || (index >= file_handle_max)))
2960     return NULL;
2961 
2962   PFS_file *pfs_file= file_handle_array[index];
2963   if (unlikely(pfs_file == NULL))
2964     return NULL;
2965 
2966   /*
2967     We are about to close a file by descriptor number,
2968     and the calling code still holds the descriptor.
2969     Cleanup the file descriptor <--> file instrument association.
2970     Remove the instrumentation *before* the close to avoid race
2971     conditions with another thread opening a file
2972     (that could be given the same descriptor).
2973   */
2974   if (op == PSI_FILE_CLOSE)
2975     file_handle_array[index]= NULL;
2976 
2977   if (! pfs_file->m_enabled)
2978     return NULL;
2979 
2980   DBUG_ASSERT(pfs_file->m_class != NULL);
2981   PFS_file_class *klass= pfs_file->m_class;
2982 
2983   register uint flags;
2984 
2985   if (flag_thread_instrumentation)
2986   {
2987     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2988     if (unlikely(pfs_thread == NULL))
2989       return NULL;
2990     if (! pfs_thread->m_enabled)
2991       return NULL;
2992     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2993     flags= STATE_FLAG_THREAD;
2994 
2995     if (pfs_file->m_timed)
2996       flags|= STATE_FLAG_TIMED;
2997 
2998     if (flag_events_waits_current)
2999     {
3000       if (unlikely(pfs_thread->m_events_waits_current >=
3001                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3002       {
3003         locker_lost++;
3004         return NULL;
3005       }
3006       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3007       state->m_wait= wait;
3008       flags|= STATE_FLAG_EVENT;
3009 
3010       PFS_events_waits *parent_event= wait - 1;
3011       wait->m_event_type= EVENT_TYPE_WAIT;
3012       wait->m_nesting_event_id= parent_event->m_event_id;
3013       wait->m_nesting_event_type= parent_event->m_event_type;
3014 
3015       wait->m_thread= pfs_thread;
3016       wait->m_class= klass;
3017       wait->m_timer_start= 0;
3018       wait->m_timer_end= 0;
3019       wait->m_object_instance_addr= pfs_file;
3020       wait->m_weak_file= pfs_file;
3021       wait->m_weak_version= pfs_file->get_version();
3022       wait->m_event_id= pfs_thread->m_event_id++;
3023       wait->m_end_event_id= 0;
3024       wait->m_operation= file_operation_map[static_cast<int> (op)];
3025       wait->m_wait_class= WAIT_CLASS_FILE;
3026 
3027       pfs_thread->m_events_waits_current++;
3028     }
3029   }
3030   else
3031   {
3032     state->m_thread= NULL;
3033     if (pfs_file->m_timed)
3034     {
3035       flags= STATE_FLAG_TIMED;
3036     }
3037     else
3038     {
3039       /* TODO: consider a shortcut. */
3040       flags= 0;
3041     }
3042   }
3043 
3044   state->m_flags= flags;
3045   state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3046   state->m_operation= op;
3047   state->m_name= NULL;
3048   state->m_class= klass;
3049   return reinterpret_cast<PSI_file_locker*> (state);
3050 }
3051 
3052 /** Socket locker */
3053 
3054 static PSI_socket_locker*
start_socket_wait_v1(PSI_socket_locker_state * state,PSI_socket * socket,PSI_socket_operation op,size_t count,const char * src_file,uint src_line)3055 start_socket_wait_v1(PSI_socket_locker_state *state,
3056                      PSI_socket *socket,
3057                      PSI_socket_operation op,
3058                      size_t count,
3059                      const char *src_file, uint src_line)
3060 {
3061   DBUG_ASSERT(static_cast<int> (op) >= 0);
3062   DBUG_ASSERT(static_cast<uint> (op) < array_elements(socket_operation_map));
3063   DBUG_ASSERT(state != NULL);
3064   PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*> (socket);
3065 
3066   DBUG_ASSERT(pfs_socket != NULL);
3067   DBUG_ASSERT(pfs_socket->m_class != NULL);
3068 
3069   if (!pfs_socket->m_enabled || pfs_socket->m_idle)
3070     return NULL;
3071 
3072   register uint flags= 0;
3073   ulonglong timer_start= 0;
3074 
3075   if (flag_thread_instrumentation)
3076   {
3077     /*
3078        Do not use pfs_socket->m_thread_owner here,
3079        as different threads may use concurrently the same socket,
3080        for example during a KILL.
3081     */
3082     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3083 
3084     if (unlikely(pfs_thread == NULL))
3085       return NULL;
3086 
3087     if (!pfs_thread->m_enabled)
3088       return NULL;
3089 
3090     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3091     flags= STATE_FLAG_THREAD;
3092 
3093     if (pfs_socket->m_timed)
3094     {
3095       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3096       state->m_timer_start= timer_start;
3097       flags|= STATE_FLAG_TIMED;
3098     }
3099 
3100     if (flag_events_waits_current)
3101     {
3102       if (unlikely(pfs_thread->m_events_waits_current >=
3103                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3104       {
3105         locker_lost++;
3106         return NULL;
3107       }
3108       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3109       state->m_wait= wait;
3110       flags|= STATE_FLAG_EVENT;
3111 
3112       PFS_events_waits *parent_event= wait - 1;
3113       wait->m_event_type= EVENT_TYPE_WAIT;
3114       wait->m_nesting_event_id=   parent_event->m_event_id;
3115       wait->m_nesting_event_type= parent_event->m_event_type;
3116       wait->m_thread=       pfs_thread;
3117       wait->m_class=        pfs_socket->m_class;
3118       wait->m_timer_start=  timer_start;
3119       wait->m_timer_end=    0;
3120       wait->m_object_instance_addr= pfs_socket->m_identity;
3121       wait->m_weak_socket=  pfs_socket;
3122       wait->m_weak_version= pfs_socket->get_version();
3123       wait->m_event_id=     pfs_thread->m_event_id++;
3124       wait->m_end_event_id= 0;
3125       wait->m_operation=    socket_operation_map[static_cast<int>(op)];
3126       wait->m_source_file= src_file;
3127       wait->m_source_line= src_line;
3128       wait->m_number_of_bytes= count;
3129       wait->m_wait_class=   WAIT_CLASS_SOCKET;
3130 
3131       pfs_thread->m_events_waits_current++;
3132     }
3133   }
3134   else
3135   {
3136     if (pfs_socket->m_timed)
3137     {
3138       timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3139       state->m_timer_start= timer_start;
3140       flags= STATE_FLAG_TIMED;
3141     }
3142     else
3143     {
3144       /*
3145         Even if timing is disabled, end_socket_wait() still needs a locker to
3146         capture the number of bytes sent or received by the socket operation.
3147         For operations that do not have a byte count, then just increment the
3148         event counter and return a NULL locker.
3149       */
3150       switch (op)
3151       {
3152         case PSI_SOCKET_CONNECT:
3153         case PSI_SOCKET_CREATE:
3154         case PSI_SOCKET_BIND:
3155         case PSI_SOCKET_SEEK:
3156         case PSI_SOCKET_OPT:
3157         case PSI_SOCKET_STAT:
3158         case PSI_SOCKET_SHUTDOWN:
3159         case PSI_SOCKET_CLOSE:
3160         case PSI_SOCKET_SELECT:
3161           pfs_socket->m_socket_stat.m_io_stat.m_misc.aggregate_counted();
3162           return NULL;
3163         default:
3164           break;
3165       }
3166     }
3167   }
3168 
3169   state->m_flags= flags;
3170   state->m_socket= socket;
3171   state->m_operation= op;
3172   return reinterpret_cast<PSI_socket_locker*> (state);
3173 }
3174 
3175 /**
3176   Implementation of the mutex instrumentation interface.
3177   @sa PSI_v1::unlock_mutex.
3178 */
unlock_mutex_v1(PSI_mutex * mutex)3179 static void unlock_mutex_v1(PSI_mutex *mutex)
3180 {
3181   PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
3182 
3183   DBUG_ASSERT(pfs_mutex != NULL);
3184 
3185   /*
3186     Note that this code is still protected by the instrumented mutex,
3187     and therefore is thread safe. See inline_mysql_mutex_unlock().
3188   */
3189 
3190   /* Always update the instrumented state */
3191   pfs_mutex->m_owner= NULL;
3192   pfs_mutex->m_last_locked= 0;
3193 
3194 #ifdef LATER_WL2333
3195   /*
3196     See WL#2333: SHOW ENGINE ... LOCK STATUS.
3197     PFS_mutex::m_lock_stat is not exposed in user visible tables
3198     currently, so there is no point spending time computing it.
3199   */
3200   if (! pfs_mutex->m_enabled)
3201     return;
3202 
3203   if (! pfs_mutex->m_timed)
3204     return;
3205 
3206   ulonglong locked_time;
3207   locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
3208   pfs_mutex->m_mutex_stat.m_lock_stat.aggregate_value(locked_time);
3209 #endif
3210 }
3211 
3212 /**
3213   Implementation of the rwlock instrumentation interface.
3214   @sa PSI_v1::unlock_rwlock.
3215 */
unlock_rwlock_v1(PSI_rwlock * rwlock)3216 static void unlock_rwlock_v1(PSI_rwlock *rwlock)
3217 {
3218   PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
3219   DBUG_ASSERT(pfs_rwlock != NULL);
3220   DBUG_ASSERT(pfs_rwlock == sanitize_rwlock(pfs_rwlock));
3221   DBUG_ASSERT(pfs_rwlock->m_class != NULL);
3222   DBUG_ASSERT(pfs_rwlock->m_lock.is_populated());
3223 
3224   bool last_writer= false;
3225   bool last_reader= false;
3226 
3227   /*
3228     Note that this code is still protected by the instrumented rwlock,
3229     and therefore is:
3230     - thread safe for write locks
3231     - almost thread safe for read locks (pfs_rwlock->m_readers is unsafe).
3232     See inline_mysql_rwlock_unlock()
3233   */
3234 
3235   /* Always update the instrumented state */
3236   if (pfs_rwlock->m_writer != NULL)
3237   {
3238     /* Nominal case, a writer is unlocking. */
3239     last_writer= true;
3240     pfs_rwlock->m_writer= NULL;
3241     /* Reset the readers stats, they could be off */
3242     pfs_rwlock->m_readers= 0;
3243   }
3244   else if (likely(pfs_rwlock->m_readers > 0))
3245   {
3246     /* Nominal case, a reader is unlocking. */
3247     if (--(pfs_rwlock->m_readers) == 0)
3248       last_reader= true;
3249   }
3250   else
3251   {
3252     /*
3253       Edge case, we have no writer and no readers,
3254       on an unlock event.
3255       This is possible for:
3256       - partial instrumentation
3257       - instrumentation disabled at runtime,
3258         see when get_thread_rwlock_locker_v1() returns NULL
3259       No further action is taken here, the next
3260       write lock will put the statistics is a valid state.
3261     */
3262   }
3263 
3264 #ifdef LATER_WL2333
3265   /* See WL#2333: SHOW ENGINE ... LOCK STATUS. */
3266 
3267   if (! pfs_rwlock->m_enabled)
3268     return;
3269 
3270   if (! pfs_rwlock->m_timed)
3271     return;
3272 
3273   ulonglong locked_time;
3274   if (last_writer)
3275   {
3276     locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
3277     pfs_rwlock->m_rwlock_stat.m_write_lock_stat.aggregate_value(locked_time);
3278   }
3279   else if (last_reader)
3280   {
3281     locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
3282     pfs_rwlock->m_rwlock_stat.m_read_lock_stat.aggregate_value(locked_time);
3283   }
3284 #else
3285   (void) last_reader;
3286   (void) last_writer;
3287 #endif
3288 }
3289 
3290 /**
3291   Implementation of the cond instrumentation interface.
3292   @sa PSI_v1::signal_cond.
3293 */
signal_cond_v1(PSI_cond * cond)3294 static void signal_cond_v1(PSI_cond* cond)
3295 {
3296   PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3297 
3298   DBUG_ASSERT(pfs_cond != NULL);
3299 
3300   pfs_cond->m_cond_stat.m_signal_count++;
3301 }
3302 
3303 /**
3304   Implementation of the cond instrumentation interface.
3305   @sa PSI_v1::broadcast_cond.
3306 */
broadcast_cond_v1(PSI_cond * cond)3307 static void broadcast_cond_v1(PSI_cond* cond)
3308 {
3309   PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3310 
3311   DBUG_ASSERT(pfs_cond != NULL);
3312 
3313   pfs_cond->m_cond_stat.m_broadcast_count++;
3314 }
3315 
3316 /**
3317   Implementation of the idle instrumentation interface.
3318   @sa PSI_v1::start_idle_wait.
3319 */
3320 static PSI_idle_locker*
start_idle_wait_v1(PSI_idle_locker_state * state,const char * src_file,uint src_line)3321 start_idle_wait_v1(PSI_idle_locker_state* state, const char *src_file, uint src_line)
3322 {
3323   DBUG_ASSERT(state != NULL);
3324 
3325   if (!flag_global_instrumentation)
3326     return NULL;
3327 
3328   if (!global_idle_class.m_enabled)
3329     return NULL;
3330 
3331   register uint flags= 0;
3332   ulonglong timer_start= 0;
3333 
3334   if (flag_thread_instrumentation)
3335   {
3336     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3337     if (unlikely(pfs_thread == NULL))
3338       return NULL;
3339     if (!pfs_thread->m_enabled)
3340       return NULL;
3341     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3342     flags= STATE_FLAG_THREAD;
3343 
3344     DBUG_ASSERT(pfs_thread->m_events_statements_count == 0);
3345 
3346     if (global_idle_class.m_timed)
3347     {
3348       timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3349       state->m_timer_start= timer_start;
3350       flags|= STATE_FLAG_TIMED;
3351     }
3352 
3353     if (flag_events_waits_current)
3354     {
3355       if (unlikely(pfs_thread->m_events_waits_current >=
3356                    & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3357       {
3358         locker_lost++;
3359         return NULL;
3360       }
3361       PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3362       state->m_wait= wait;
3363       flags|= STATE_FLAG_EVENT;
3364 
3365       wait->m_event_type= EVENT_TYPE_WAIT;
3366       /*
3367         IDLE events are waits, but by definition we know that
3368         such waits happen outside of any STAGE and STATEMENT,
3369         so they have no parents.
3370       */
3371       wait->m_nesting_event_id= 0;
3372       /* no need to set wait->m_nesting_event_type */
3373 
3374       wait->m_thread= pfs_thread;
3375       wait->m_class= &global_idle_class;
3376       wait->m_timer_start= timer_start;
3377       wait->m_timer_end= 0;
3378       wait->m_event_id= pfs_thread->m_event_id++;
3379       wait->m_end_event_id= 0;
3380       wait->m_operation= OPERATION_TYPE_IDLE;
3381       wait->m_source_file= src_file;
3382       wait->m_source_line= src_line;
3383       wait->m_wait_class= WAIT_CLASS_IDLE;
3384 
3385       pfs_thread->m_events_waits_current++;
3386     }
3387   }
3388   else
3389   {
3390     if (global_idle_class.m_timed)
3391     {
3392       timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3393       state->m_timer_start= timer_start;
3394       flags= STATE_FLAG_TIMED;
3395     }
3396   }
3397 
3398   state->m_flags= flags;
3399   return reinterpret_cast<PSI_idle_locker*> (state);
3400 }
3401 
3402 /**
3403   Implementation of the mutex instrumentation interface.
3404   @sa PSI_v1::end_idle_wait.
3405 */
end_idle_wait_v1(PSI_idle_locker * locker)3406 static void end_idle_wait_v1(PSI_idle_locker* locker)
3407 {
3408   PSI_idle_locker_state *state= reinterpret_cast<PSI_idle_locker_state*> (locker);
3409   DBUG_ASSERT(state != NULL);
3410   ulonglong timer_end= 0;
3411   ulonglong wait_time= 0;
3412 
3413   register uint flags= state->m_flags;
3414 
3415   if (flags & STATE_FLAG_TIMED)
3416   {
3417     timer_end= state->m_timer();
3418     wait_time= timer_end - state->m_timer_start;
3419   }
3420 
3421   if (flags & STATE_FLAG_THREAD)
3422   {
3423     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3424     PFS_single_stat *event_name_array;
3425     event_name_array= thread->m_instr_class_waits_stats;
3426 
3427     if (flags & STATE_FLAG_TIMED)
3428     {
3429       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3430       event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_value(wait_time);
3431     }
3432     else
3433     {
3434       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3435       event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_counted();
3436     }
3437 
3438     if (flags & STATE_FLAG_EVENT)
3439     {
3440       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3441       DBUG_ASSERT(wait != NULL);
3442 
3443       wait->m_timer_end= timer_end;
3444       wait->m_end_event_id= thread->m_event_id;
3445       if (flag_events_waits_history)
3446         insert_events_waits_history(thread, wait);
3447       if (flag_events_waits_history_long)
3448         insert_events_waits_history_long(wait);
3449       thread->m_events_waits_current--;
3450 
3451       DBUG_ASSERT(wait == thread->m_events_waits_current);
3452     }
3453   }
3454 
3455   if (flags & STATE_FLAG_TIMED)
3456   {
3457     /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (timed) */
3458     global_idle_stat.aggregate_value(wait_time);
3459   }
3460   else
3461   {
3462     /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (counted) */
3463     global_idle_stat.aggregate_counted();
3464   }
3465 }
3466 
3467 /**
3468   Implementation of the mutex instrumentation interface.
3469   @sa PSI_v1::end_mutex_wait.
3470 */
end_mutex_wait_v1(PSI_mutex_locker * locker,int rc)3471 static void end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
3472 {
3473   PSI_mutex_locker_state *state= reinterpret_cast<PSI_mutex_locker_state*> (locker);
3474   DBUG_ASSERT(state != NULL);
3475 
3476   ulonglong timer_end= 0;
3477   ulonglong wait_time= 0;
3478 
3479   PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex);
3480   DBUG_ASSERT(mutex != NULL);
3481   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3482 
3483   register uint flags= state->m_flags;
3484 
3485   if (flags & STATE_FLAG_TIMED)
3486   {
3487     timer_end= state->m_timer();
3488     wait_time= timer_end - state->m_timer_start;
3489     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3490     mutex->m_mutex_stat.m_wait_stat.aggregate_value(wait_time);
3491   }
3492   else
3493   {
3494     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3495     mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
3496   }
3497 
3498   if (likely(rc == 0))
3499   {
3500     mutex->m_owner= thread;
3501     mutex->m_last_locked= timer_end;
3502   }
3503 
3504   if (flags & STATE_FLAG_THREAD)
3505   {
3506     PFS_single_stat *event_name_array;
3507     event_name_array= thread->m_instr_class_waits_stats;
3508     uint index= mutex->m_class->m_event_name_index;
3509 
3510     if (flags & STATE_FLAG_TIMED)
3511     {
3512       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3513       event_name_array[index].aggregate_value(wait_time);
3514     }
3515     else
3516     {
3517       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3518       event_name_array[index].aggregate_counted();
3519     }
3520 
3521     if (flags & STATE_FLAG_EVENT)
3522     {
3523       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3524       DBUG_ASSERT(wait != NULL);
3525 
3526       wait->m_timer_end= timer_end;
3527       wait->m_end_event_id= thread->m_event_id;
3528       if (flag_events_waits_history)
3529         insert_events_waits_history(thread, wait);
3530       if (flag_events_waits_history_long)
3531         insert_events_waits_history_long(wait);
3532       thread->m_events_waits_current--;
3533 
3534       DBUG_ASSERT(wait == thread->m_events_waits_current);
3535     }
3536   }
3537 }
3538 
3539 /**
3540   Implementation of the rwlock instrumentation interface.
3541   @sa PSI_v1::end_rwlock_rdwait.
3542 */
end_rwlock_rdwait_v1(PSI_rwlock_locker * locker,int rc)3543 static void end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
3544 {
3545   PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3546   DBUG_ASSERT(state != NULL);
3547 
3548   ulonglong timer_end= 0;
3549   ulonglong wait_time= 0;
3550 
3551   PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3552   DBUG_ASSERT(rwlock != NULL);
3553 
3554   if (state->m_flags & STATE_FLAG_TIMED)
3555   {
3556     timer_end= state->m_timer();
3557     wait_time= timer_end - state->m_timer_start;
3558     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3559     rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3560   }
3561   else
3562   {
3563     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3564     rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3565   }
3566 
3567   if (rc == 0)
3568   {
3569     /*
3570       Warning:
3571       Multiple threads can execute this section concurrently
3572       (since multiple readers can execute in parallel).
3573       The statistics generated are not safe, which is why they are
3574       just statistics, not facts.
3575     */
3576     if (rwlock->m_readers == 0)
3577       rwlock->m_last_read= timer_end;
3578     rwlock->m_writer= NULL;
3579     rwlock->m_readers++;
3580   }
3581 
3582   if (state->m_flags & STATE_FLAG_THREAD)
3583   {
3584     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3585     DBUG_ASSERT(thread != NULL);
3586 
3587     PFS_single_stat *event_name_array;
3588     event_name_array= thread->m_instr_class_waits_stats;
3589     uint index= rwlock->m_class->m_event_name_index;
3590 
3591     if (state->m_flags & STATE_FLAG_TIMED)
3592     {
3593       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3594       event_name_array[index].aggregate_value(wait_time);
3595     }
3596     else
3597     {
3598       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3599       event_name_array[index].aggregate_counted();
3600     }
3601 
3602     if (state->m_flags & STATE_FLAG_EVENT)
3603     {
3604       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3605       DBUG_ASSERT(wait != NULL);
3606 
3607       wait->m_timer_end= timer_end;
3608       wait->m_end_event_id= thread->m_event_id;
3609       if (flag_events_waits_history)
3610         insert_events_waits_history(thread, wait);
3611       if (flag_events_waits_history_long)
3612         insert_events_waits_history_long(wait);
3613       thread->m_events_waits_current--;
3614 
3615       DBUG_ASSERT(wait == thread->m_events_waits_current);
3616     }
3617   }
3618 }
3619 
3620 /**
3621   Implementation of the rwlock instrumentation interface.
3622   @sa PSI_v1::end_rwlock_wrwait.
3623 */
end_rwlock_wrwait_v1(PSI_rwlock_locker * locker,int rc)3624 static void end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
3625 {
3626   PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3627   DBUG_ASSERT(state != NULL);
3628 
3629   ulonglong timer_end= 0;
3630   ulonglong wait_time= 0;
3631 
3632   PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3633   DBUG_ASSERT(rwlock != NULL);
3634   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3635 
3636   if (state->m_flags & STATE_FLAG_TIMED)
3637   {
3638     timer_end= state->m_timer();
3639     wait_time= timer_end - state->m_timer_start;
3640     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3641     rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3642   }
3643   else
3644   {
3645     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3646     rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3647   }
3648 
3649   if (likely(rc == 0))
3650   {
3651     /* Thread safe : we are protected by the instrumented rwlock */
3652     rwlock->m_writer= thread;
3653     rwlock->m_last_written= timer_end;
3654     /* Reset the readers stats, they could be off */
3655     rwlock->m_readers= 0;
3656     rwlock->m_last_read= 0;
3657   }
3658 
3659   if (state->m_flags & STATE_FLAG_THREAD)
3660   {
3661     PFS_single_stat *event_name_array;
3662     event_name_array= thread->m_instr_class_waits_stats;
3663     uint index= rwlock->m_class->m_event_name_index;
3664 
3665     if (state->m_flags & STATE_FLAG_TIMED)
3666     {
3667       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3668       event_name_array[index].aggregate_value(wait_time);
3669     }
3670     else
3671     {
3672       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3673       event_name_array[index].aggregate_counted();
3674     }
3675 
3676     if (state->m_flags & STATE_FLAG_EVENT)
3677     {
3678       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3679       DBUG_ASSERT(wait != NULL);
3680 
3681       wait->m_timer_end= timer_end;
3682       wait->m_end_event_id= thread->m_event_id;
3683       if (flag_events_waits_history)
3684         insert_events_waits_history(thread, wait);
3685       if (flag_events_waits_history_long)
3686         insert_events_waits_history_long(wait);
3687       thread->m_events_waits_current--;
3688 
3689       DBUG_ASSERT(wait == thread->m_events_waits_current);
3690     }
3691   }
3692 }
3693 
3694 /**
3695   Implementation of the cond instrumentation interface.
3696   @sa PSI_v1::end_cond_wait.
3697 */
end_cond_wait_v1(PSI_cond_locker * locker,int rc)3698 static void end_cond_wait_v1(PSI_cond_locker* locker, int rc)
3699 {
3700   PSI_cond_locker_state *state= reinterpret_cast<PSI_cond_locker_state*> (locker);
3701   DBUG_ASSERT(state != NULL);
3702 
3703   ulonglong timer_end= 0;
3704   ulonglong wait_time= 0;
3705 
3706   PFS_cond *cond= reinterpret_cast<PFS_cond *> (state->m_cond);
3707   /* PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex); */
3708 
3709   if (state->m_flags & STATE_FLAG_TIMED)
3710   {
3711     timer_end= state->m_timer();
3712     wait_time= timer_end - state->m_timer_start;
3713     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3714     cond->m_cond_stat.m_wait_stat.aggregate_value(wait_time);
3715   }
3716   else
3717   {
3718     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3719     cond->m_cond_stat.m_wait_stat.aggregate_counted();
3720   }
3721 
3722   if (state->m_flags & STATE_FLAG_THREAD)
3723   {
3724     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3725     DBUG_ASSERT(thread != NULL);
3726 
3727     PFS_single_stat *event_name_array;
3728     event_name_array= thread->m_instr_class_waits_stats;
3729     uint index= cond->m_class->m_event_name_index;
3730 
3731     if (state->m_flags & STATE_FLAG_TIMED)
3732     {
3733       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3734       event_name_array[index].aggregate_value(wait_time);
3735     }
3736     else
3737     {
3738       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3739       event_name_array[index].aggregate_counted();
3740     }
3741 
3742     if (state->m_flags & STATE_FLAG_EVENT)
3743     {
3744       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3745       DBUG_ASSERT(wait != NULL);
3746 
3747       wait->m_timer_end= timer_end;
3748       wait->m_end_event_id= thread->m_event_id;
3749       if (flag_events_waits_history)
3750         insert_events_waits_history(thread, wait);
3751       if (flag_events_waits_history_long)
3752         insert_events_waits_history_long(wait);
3753       thread->m_events_waits_current--;
3754 
3755       DBUG_ASSERT(wait == thread->m_events_waits_current);
3756     }
3757   }
3758 }
3759 
3760 /**
3761   Implementation of the table instrumentation interface.
3762   @sa PSI_v1::end_table_io_wait.
3763 */
end_table_io_wait_v1(PSI_table_locker * locker)3764 static void end_table_io_wait_v1(PSI_table_locker* locker)
3765 {
3766   PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3767   DBUG_ASSERT(state != NULL);
3768 
3769   ulonglong timer_end= 0;
3770   ulonglong wait_time= 0;
3771 
3772   PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3773   DBUG_ASSERT(table != NULL);
3774 
3775   PFS_single_stat *stat;
3776   PFS_table_io_stat *table_io_stat;
3777 
3778   DBUG_ASSERT((state->m_index < table->m_share->m_key_count) ||
3779               (state->m_index == MAX_INDEXES));
3780 
3781   table_io_stat= & table->m_table_stat.m_index_stat[state->m_index];
3782   table_io_stat->m_has_data= true;
3783 
3784   switch (state->m_io_operation)
3785   {
3786   case PSI_TABLE_FETCH_ROW:
3787     stat= & table_io_stat->m_fetch;
3788     break;
3789   case PSI_TABLE_WRITE_ROW:
3790     stat= & table_io_stat->m_insert;
3791     break;
3792   case PSI_TABLE_UPDATE_ROW:
3793     stat= & table_io_stat->m_update;
3794     break;
3795   case PSI_TABLE_DELETE_ROW:
3796     stat= & table_io_stat->m_delete;
3797     break;
3798   default:
3799     DBUG_ASSERT(false);
3800     stat= NULL;
3801     break;
3802   }
3803 
3804   register uint flags= state->m_flags;
3805 
3806   if (flags & STATE_FLAG_TIMED)
3807   {
3808     timer_end= state->m_timer();
3809     wait_time= timer_end - state->m_timer_start;
3810     stat->aggregate_value(wait_time);
3811   }
3812   else
3813   {
3814     stat->aggregate_counted();
3815   }
3816 
3817   if (flags & STATE_FLAG_THREAD)
3818   {
3819     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3820     DBUG_ASSERT(thread != NULL);
3821 
3822     PFS_single_stat *event_name_array;
3823     event_name_array= thread->m_instr_class_waits_stats;
3824 
3825     /*
3826       Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3827       (for wait/io/table/sql/handler)
3828     */
3829     if (flags & STATE_FLAG_TIMED)
3830     {
3831       event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_value(wait_time);
3832     }
3833     else
3834     {
3835       event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_counted();
3836     }
3837 
3838     if (flags & STATE_FLAG_EVENT)
3839     {
3840       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3841       DBUG_ASSERT(wait != NULL);
3842 
3843       wait->m_timer_end= timer_end;
3844       wait->m_end_event_id= thread->m_event_id;
3845       if (flag_events_waits_history)
3846         insert_events_waits_history(thread, wait);
3847       if (flag_events_waits_history_long)
3848         insert_events_waits_history_long(wait);
3849       thread->m_events_waits_current--;
3850 
3851       DBUG_ASSERT(wait == thread->m_events_waits_current);
3852     }
3853   }
3854 
3855   table->m_has_io_stats= true;
3856 }
3857 
3858 /**
3859   Implementation of the table instrumentation interface.
3860   @sa PSI_v1::end_table_lock_wait.
3861 */
end_table_lock_wait_v1(PSI_table_locker * locker)3862 static void end_table_lock_wait_v1(PSI_table_locker* locker)
3863 {
3864   PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3865   DBUG_ASSERT(state != NULL);
3866 
3867   ulonglong timer_end= 0;
3868   ulonglong wait_time= 0;
3869 
3870   PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3871   DBUG_ASSERT(table != NULL);
3872 
3873   PFS_single_stat *stat= & table->m_table_stat.m_lock_stat.m_stat[state->m_index];
3874 
3875   register uint flags= state->m_flags;
3876 
3877   if (flags & STATE_FLAG_TIMED)
3878   {
3879     timer_end= state->m_timer();
3880     wait_time= timer_end - state->m_timer_start;
3881     stat->aggregate_value(wait_time);
3882   }
3883   else
3884   {
3885     stat->aggregate_counted();
3886   }
3887 
3888   if (flags & STATE_FLAG_THREAD)
3889   {
3890     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3891     DBUG_ASSERT(thread != NULL);
3892 
3893     PFS_single_stat *event_name_array;
3894     event_name_array= thread->m_instr_class_waits_stats;
3895 
3896     /*
3897       Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3898       (for wait/lock/table/sql/handler)
3899     */
3900     if (flags & STATE_FLAG_TIMED)
3901     {
3902       event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_value(wait_time);
3903     }
3904     else
3905     {
3906       event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_counted();
3907     }
3908 
3909     if (flags & STATE_FLAG_EVENT)
3910     {
3911       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3912       DBUG_ASSERT(wait != NULL);
3913 
3914       wait->m_timer_end= timer_end;
3915       wait->m_end_event_id= thread->m_event_id;
3916       if (flag_events_waits_history)
3917         insert_events_waits_history(thread, wait);
3918       if (flag_events_waits_history_long)
3919         insert_events_waits_history_long(wait);
3920       thread->m_events_waits_current--;
3921 
3922       DBUG_ASSERT(wait == thread->m_events_waits_current);
3923     }
3924   }
3925 
3926   table->m_has_lock_stats= true;
3927 }
3928 
3929 static void start_file_wait_v1(PSI_file_locker *locker,
3930                                size_t count,
3931                                const char *src_file,
3932                                uint src_line);
3933 
3934 static void end_file_wait_v1(PSI_file_locker *locker,
3935                              size_t count);
3936 
3937 /**
3938   Implementation of the file instrumentation interface.
3939   @sa PSI_v1::start_file_open_wait.
3940 */
start_file_open_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)3941 static void start_file_open_wait_v1(PSI_file_locker *locker,
3942                                     const char *src_file,
3943                                     uint src_line)
3944 {
3945   start_file_wait_v1(locker, 0, src_file, src_line);
3946 
3947   return;
3948 }
3949 
3950 /**
3951   Implementation of the file instrumentation interface.
3952   @sa PSI_v1::end_file_open_wait.
3953 */
end_file_open_wait_v1(PSI_file_locker * locker,void * result)3954 static PSI_file* end_file_open_wait_v1(PSI_file_locker *locker,
3955                                        void *result)
3956 {
3957   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3958   DBUG_ASSERT(state != NULL);
3959 
3960   switch (state->m_operation)
3961   {
3962   case PSI_FILE_STAT:
3963   case PSI_FILE_RENAME:
3964     break;
3965   case PSI_FILE_STREAM_OPEN:
3966   case PSI_FILE_CREATE:
3967   case PSI_FILE_OPEN:
3968     if (result != NULL)
3969     {
3970       PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
3971       PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
3972       const char *name= state->m_name;
3973       uint len= strlen(name);
3974       PFS_file *pfs_file= find_or_create_file(thread, klass, name, len, true);
3975       state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3976     }
3977     break;
3978   default:
3979     DBUG_ASSERT(false);
3980     break;
3981   }
3982 
3983   end_file_wait_v1(locker, 0);
3984 
3985   return state->m_file;
3986 }
3987 
3988 /**
3989   Implementation of the file instrumentation interface.
3990   @sa PSI_v1::end_file_open_wait_and_bind_to_descriptor.
3991 */
end_file_open_wait_and_bind_to_descriptor_v1(PSI_file_locker * locker,File file)3992 static void end_file_open_wait_and_bind_to_descriptor_v1
3993   (PSI_file_locker *locker, File file)
3994 {
3995   PFS_file *pfs_file= NULL;
3996   int index= (int) file;
3997   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3998   DBUG_ASSERT(state != NULL);
3999 
4000   if (index >= 0)
4001   {
4002     PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
4003     PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4004     const char *name= state->m_name;
4005     uint len= strlen(name);
4006     pfs_file= find_or_create_file(thread, klass, name, len, true);
4007     state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4008   }
4009 
4010   end_file_wait_v1(locker, 0);
4011 
4012   if (likely(index >= 0))
4013   {
4014     if (likely(index < file_handle_max))
4015       file_handle_array[index]= pfs_file;
4016     else
4017     {
4018       if (pfs_file != NULL)
4019         release_file(pfs_file);
4020       file_handle_lost++;
4021     }
4022   }
4023 }
4024 
4025 /**
4026   Implementation of the file instrumentation interface.
4027   @sa PSI_v1::start_file_wait.
4028 */
start_file_wait_v1(PSI_file_locker * locker,size_t count,const char * src_file,uint src_line)4029 static void start_file_wait_v1(PSI_file_locker *locker,
4030                                size_t count,
4031                                const char *src_file,
4032                                uint src_line)
4033 {
4034   ulonglong timer_start= 0;
4035   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4036   DBUG_ASSERT(state != NULL);
4037 
4038   register uint flags= state->m_flags;
4039 
4040   if (flags & STATE_FLAG_TIMED)
4041   {
4042     timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
4043     state->m_timer_start= timer_start;
4044   }
4045 
4046   if (flags & STATE_FLAG_EVENT)
4047   {
4048     PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4049     DBUG_ASSERT(wait != NULL);
4050 
4051     wait->m_timer_start= timer_start;
4052     wait->m_source_file= src_file;
4053     wait->m_source_line= src_line;
4054     wait->m_number_of_bytes= count;
4055   }
4056 }
4057 
4058 /**
4059   Implementation of the file instrumentation interface.
4060   @sa PSI_v1::end_file_wait.
4061 */
end_file_wait_v1(PSI_file_locker * locker,size_t byte_count)4062 static void end_file_wait_v1(PSI_file_locker *locker,
4063                              size_t byte_count)
4064 {
4065   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4066   DBUG_ASSERT(state != NULL);
4067   PFS_file *file= reinterpret_cast<PFS_file *> (state->m_file);
4068   PFS_file_class *klass= reinterpret_cast<PFS_file_class *> (state->m_class);
4069   PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4070 
4071   ulonglong timer_end= 0;
4072   ulonglong wait_time= 0;
4073   PFS_byte_stat *byte_stat;
4074   register uint flags= state->m_flags;
4075   size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
4076 
4077   PFS_file_stat *file_stat;
4078 
4079   if (file != NULL)
4080   {
4081     file_stat= & file->m_file_stat;
4082   }
4083   else
4084   {
4085     file_stat= & klass->m_file_stat;
4086   }
4087 
4088   switch (state->m_operation)
4089   {
4090     /* Group read operations */
4091     case PSI_FILE_READ:
4092       byte_stat= &file_stat->m_io_stat.m_read;
4093       break;
4094     /* Group write operations */
4095     case PSI_FILE_WRITE:
4096       byte_stat= &file_stat->m_io_stat.m_write;
4097       break;
4098     /* Group remaining operations as miscellaneous */
4099     case PSI_FILE_CREATE:
4100     case PSI_FILE_CREATE_TMP:
4101     case PSI_FILE_OPEN:
4102     case PSI_FILE_STREAM_OPEN:
4103     case PSI_FILE_STREAM_CLOSE:
4104     case PSI_FILE_SEEK:
4105     case PSI_FILE_TELL:
4106     case PSI_FILE_FLUSH:
4107     case PSI_FILE_FSTAT:
4108     case PSI_FILE_CHSIZE:
4109     case PSI_FILE_DELETE:
4110     case PSI_FILE_RENAME:
4111     case PSI_FILE_SYNC:
4112     case PSI_FILE_STAT:
4113     case PSI_FILE_CLOSE:
4114       byte_stat= &file_stat->m_io_stat.m_misc;
4115       break;
4116     default:
4117       DBUG_ASSERT(false);
4118       byte_stat= NULL;
4119       break;
4120   }
4121 
4122   /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
4123   if (flags & STATE_FLAG_TIMED)
4124   {
4125     timer_end= state->m_timer();
4126     wait_time= timer_end - state->m_timer_start;
4127     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
4128     byte_stat->aggregate(wait_time, bytes);
4129   }
4130   else
4131   {
4132     /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
4133     byte_stat->aggregate_counted(bytes);
4134   }
4135 
4136   if (flags & STATE_FLAG_THREAD)
4137   {
4138     DBUG_ASSERT(thread != NULL);
4139 
4140     PFS_single_stat *event_name_array;
4141     event_name_array= thread->m_instr_class_waits_stats;
4142     uint index= klass->m_event_name_index;
4143 
4144     if (flags & STATE_FLAG_TIMED)
4145     {
4146       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4147       event_name_array[index].aggregate_value(wait_time);
4148     }
4149     else
4150     {
4151       /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4152       event_name_array[index].aggregate_counted();
4153     }
4154 
4155     if (state->m_flags & STATE_FLAG_EVENT)
4156     {
4157       PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4158       DBUG_ASSERT(wait != NULL);
4159 
4160       wait->m_timer_end= timer_end;
4161       wait->m_number_of_bytes= bytes;
4162       wait->m_end_event_id= thread->m_event_id;
4163       wait->m_object_instance_addr= file;
4164       wait->m_weak_file= file;
4165       wait->m_weak_version= (file ? file->get_version() : 0);
4166 
4167       if (flag_events_waits_history)
4168         insert_events_waits_history(thread, wait);
4169       if (flag_events_waits_history_long)
4170         insert_events_waits_history_long(wait);
4171       thread->m_events_waits_current--;
4172 
4173       DBUG_ASSERT(wait == thread->m_events_waits_current);
4174     }
4175   }
4176 }
4177 
4178 /**
4179   Implementation of the file instrumentation interface.
4180   @sa PSI_v1::start_file_close_wait.
4181 */
start_file_close_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)4182 static void start_file_close_wait_v1(PSI_file_locker *locker,
4183                                      const char *src_file,
4184                                      uint src_line)
4185 {
4186   PFS_thread *thread;
4187   const char *name;
4188   uint len;
4189   PFS_file *pfs_file;
4190   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4191   DBUG_ASSERT(state != NULL);
4192 
4193   switch (state->m_operation)
4194   {
4195   case PSI_FILE_DELETE:
4196     thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4197     name= state->m_name;
4198     len= strlen(name);
4199     pfs_file= find_or_create_file(thread, NULL, name, len, false);
4200     state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4201     break;
4202   case PSI_FILE_STREAM_CLOSE:
4203   case PSI_FILE_CLOSE:
4204     break;
4205   default:
4206     DBUG_ASSERT(false);
4207     break;
4208   }
4209 
4210   start_file_wait_v1(locker, 0, src_file, src_line);
4211 
4212   return;
4213 }
4214 
4215 /**
4216   Implementation of the file instrumentation interface.
4217   @sa PSI_v1::end_file_close_wait.
4218 */
end_file_close_wait_v1(PSI_file_locker * locker,int rc)4219 static void end_file_close_wait_v1(PSI_file_locker *locker, int rc)
4220 {
4221   PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4222   DBUG_ASSERT(state != NULL);
4223 
4224   end_file_wait_v1(locker, 0);
4225 
4226   if (rc == 0)
4227   {
4228     PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4229     PFS_file *file= reinterpret_cast<PFS_file*> (state->m_file);
4230 
4231     /* Release or destroy the file if necessary */
4232     switch(state->m_operation)
4233     {
4234     case PSI_FILE_CLOSE:
4235     case PSI_FILE_STREAM_CLOSE:
4236       if (file != NULL)
4237         release_file(file);
4238       break;
4239     case PSI_FILE_DELETE:
4240       if (file != NULL)
4241         destroy_file(thread, file);
4242       break;
4243     default:
4244       DBUG_ASSERT(false);
4245       break;
4246     }
4247   }
4248   return;
4249 }
4250 
start_stage_v1(PSI_stage_key key,const char * src_file,int src_line)4251 static void start_stage_v1(PSI_stage_key key, const char *src_file, int src_line)
4252 {
4253   ulonglong timer_value= 0;
4254 
4255   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4256   if (unlikely(pfs_thread == NULL))
4257     return;
4258 
4259   /* Always update column threads.processlist_state. */
4260   pfs_thread->m_stage= key;
4261 
4262   if (! flag_global_instrumentation)
4263     return;
4264 
4265   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4266     return;
4267 
4268   PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4269   PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4270   PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4271 
4272   PFS_instr_class *old_class= pfs->m_class;
4273   if (old_class != NULL)
4274   {
4275     PFS_stage_stat *event_name_array;
4276     event_name_array= pfs_thread->m_instr_class_stages_stats;
4277     uint index= old_class->m_event_name_index;
4278 
4279     /* Finish old event */
4280     if (old_class->m_timed)
4281     {
4282       timer_value= get_timer_raw_value(stage_timer);;
4283       pfs->m_timer_end= timer_value;
4284 
4285       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4286       ulonglong stage_time= timer_value - pfs->m_timer_start;
4287       event_name_array[index].aggregate_value(stage_time);
4288     }
4289     else
4290     {
4291       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4292       event_name_array[index].aggregate_counted();
4293     }
4294 
4295     if (flag_events_stages_current)
4296     {
4297       pfs->m_end_event_id= pfs_thread->m_event_id;
4298       if (flag_events_stages_history)
4299         insert_events_stages_history(pfs_thread, pfs);
4300       if (flag_events_stages_history_long)
4301         insert_events_stages_history_long(pfs);
4302     }
4303 
4304     /* This stage event is now complete. */
4305     pfs->m_class= NULL;
4306 
4307     /* New waits will now be attached directly to the parent statement. */
4308     child_wait->m_event_id= parent_statement->m_event_id;
4309     child_wait->m_event_type= parent_statement->m_event_type;
4310     /* See below for new stages, that may overwrite this. */
4311   }
4312 
4313   /* Start new event */
4314 
4315   PFS_stage_class *new_klass= find_stage_class(key);
4316   if (unlikely(new_klass == NULL))
4317     return;
4318 
4319   if (! new_klass->m_enabled)
4320     return;
4321 
4322   pfs->m_class= new_klass;
4323   if (new_klass->m_timed)
4324   {
4325     /*
4326       Do not call the timer again if we have a
4327       TIMER_END for the previous stage already.
4328     */
4329     if (timer_value == 0)
4330       timer_value= get_timer_raw_value(stage_timer);
4331     pfs->m_timer_start= timer_value;
4332   }
4333   else
4334     pfs->m_timer_start= 0;
4335   pfs->m_timer_end= 0;
4336 
4337   if (flag_events_stages_current)
4338   {
4339     /* m_thread_internal_id is immutable and already set */
4340     DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4341     pfs->m_event_id= pfs_thread->m_event_id++;
4342     pfs->m_end_event_id= 0;
4343     pfs->m_source_file= src_file;
4344     pfs->m_source_line= src_line;
4345 
4346     /* New wait events will have this new stage as parent. */
4347     child_wait->m_event_id= pfs->m_event_id;
4348     child_wait->m_event_type= EVENT_TYPE_STAGE;
4349   }
4350 }
4351 
end_stage_v1()4352 static void end_stage_v1()
4353 {
4354   ulonglong timer_value= 0;
4355 
4356   PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4357   if (unlikely(pfs_thread == NULL))
4358     return;
4359 
4360   pfs_thread->m_stage= 0;
4361 
4362   if (! flag_global_instrumentation)
4363     return;
4364 
4365   if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4366     return;
4367 
4368   PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4369 
4370   PFS_instr_class *old_class= pfs->m_class;
4371   if (old_class != NULL)
4372   {
4373     PFS_stage_stat *event_name_array;
4374     event_name_array= pfs_thread->m_instr_class_stages_stats;
4375     uint index= old_class->m_event_name_index;
4376 
4377     /* Finish old event */
4378     if (old_class->m_timed)
4379     {
4380       timer_value= get_timer_raw_value(stage_timer);;
4381       pfs->m_timer_end= timer_value;
4382 
4383       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4384       ulonglong stage_time= timer_value - pfs->m_timer_start;
4385       event_name_array[index].aggregate_value(stage_time);
4386     }
4387     else
4388     {
4389       /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4390       event_name_array[index].aggregate_counted();
4391     }
4392 
4393     if (flag_events_stages_current)
4394     {
4395       pfs->m_end_event_id= pfs_thread->m_event_id;
4396       if (flag_events_stages_history)
4397         insert_events_stages_history(pfs_thread, pfs);
4398       if (flag_events_stages_history_long)
4399         insert_events_stages_history_long(pfs);
4400     }
4401 
4402     /* New waits will now be attached directly to the parent statement. */
4403     PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4404     PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4405     child_wait->m_event_id= parent_statement->m_event_id;
4406     child_wait->m_event_type= parent_statement->m_event_type;
4407 
4408     /* This stage is completed */
4409     pfs->m_class= NULL;
4410   }
4411 }
4412 
4413 static PSI_statement_locker*
get_thread_statement_locker_v1(PSI_statement_locker_state * state,PSI_statement_key key,const void * charset)4414 get_thread_statement_locker_v1(PSI_statement_locker_state *state,
4415                                PSI_statement_key key,
4416                                const void *charset)
4417 {
4418   DBUG_ASSERT(state != NULL);
4419   DBUG_ASSERT(charset != NULL);
4420 
4421   if (! flag_global_instrumentation)
4422     return NULL;
4423   PFS_statement_class *klass= find_statement_class(key);
4424   if (unlikely(klass == NULL))
4425     return NULL;
4426   if (! klass->m_enabled)
4427     return NULL;
4428 
4429   register uint flags;
4430 
4431   if (flag_thread_instrumentation)
4432   {
4433     PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4434     if (unlikely(pfs_thread == NULL))
4435       return NULL;
4436     if (! pfs_thread->m_enabled)
4437       return NULL;
4438     state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
4439     flags= STATE_FLAG_THREAD;
4440 
4441     if (klass->m_timed)
4442       flags|= STATE_FLAG_TIMED;
4443 
4444     if (flag_events_statements_current)
4445     {
4446       ulonglong event_id= pfs_thread->m_event_id++;
4447 
4448       if (pfs_thread->m_events_statements_count >= statement_stack_max)
4449       {
4450         return NULL;
4451       }
4452 
4453       pfs_thread->m_stmt_lock.allocated_to_dirty();
4454       PFS_events_statements *pfs= & pfs_thread->m_statement_stack[pfs_thread->m_events_statements_count];
4455       /* m_thread_internal_id is immutable and already set */
4456       DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4457       pfs->m_event_id= event_id;
4458       pfs->m_end_event_id= 0;
4459       pfs->m_class= klass;
4460       pfs->m_timer_start= 0;
4461       pfs->m_timer_end= 0;
4462       pfs->m_lock_time= 0;
4463       pfs->m_current_schema_name_length= 0;
4464       pfs->m_sqltext_length= 0;
4465       pfs->m_sqltext_truncated= false;
4466       pfs->m_sqltext_cs_number= system_charset_info->number; /* default */
4467 
4468       pfs->m_message_text[0]= '\0';
4469       pfs->m_sql_errno= 0;
4470       pfs->m_sqlstate[0]= '\0';
4471       pfs->m_error_count= 0;
4472       pfs->m_warning_count= 0;
4473       pfs->m_rows_affected= 0;
4474 
4475       pfs->m_rows_sent= 0;
4476       pfs->m_rows_examined= 0;
4477       pfs->m_created_tmp_disk_tables= 0;
4478       pfs->m_created_tmp_tables= 0;
4479       pfs->m_select_full_join= 0;
4480       pfs->m_select_full_range_join= 0;
4481       pfs->m_select_range= 0;
4482       pfs->m_select_range_check= 0;
4483       pfs->m_select_scan= 0;
4484       pfs->m_sort_merge_passes= 0;
4485       pfs->m_sort_range= 0;
4486       pfs->m_sort_rows= 0;
4487       pfs->m_sort_scan= 0;
4488       pfs->m_no_index_used= 0;
4489       pfs->m_no_good_index_used= 0;
4490       pfs->m_digest_storage.reset();
4491 
4492       /* New stages will have this statement as parent */
4493       PFS_events_stages *child_stage= & pfs_thread->m_stage_current;
4494       child_stage->m_nesting_event_id= event_id;
4495       child_stage->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4496 
4497       /* New waits will have this statement as parent, if no stage is instrumented */
4498       PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4499       child_wait->m_nesting_event_id= event_id;
4500       child_wait->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4501 
4502       state->m_statement= pfs;
4503       flags|= STATE_FLAG_EVENT;
4504 
4505       pfs_thread->m_events_statements_count++;
4506       pfs_thread->m_stmt_lock.dirty_to_allocated();
4507     }
4508   }
4509   else
4510   {
4511     if (klass->m_timed)
4512       flags= STATE_FLAG_TIMED;
4513     else
4514       flags= 0;
4515   }
4516 
4517   if (flag_statements_digest)
4518   {
4519     flags|= STATE_FLAG_DIGEST;
4520   }
4521 
4522   state->m_discarded= false;
4523   state->m_class= klass;
4524   state->m_flags= flags;
4525 
4526   state->m_lock_time= 0;
4527   state->m_rows_sent= 0;
4528   state->m_rows_examined= 0;
4529   state->m_created_tmp_disk_tables= 0;
4530   state->m_created_tmp_tables= 0;
4531   state->m_select_full_join= 0;
4532   state->m_select_full_range_join= 0;
4533   state->m_select_range= 0;
4534   state->m_select_range_check= 0;
4535   state->m_select_scan= 0;
4536   state->m_sort_merge_passes= 0;
4537   state->m_sort_range= 0;
4538   state->m_sort_rows= 0;
4539   state->m_sort_scan= 0;
4540   state->m_no_index_used= 0;
4541   state->m_no_good_index_used= 0;
4542 
4543   state->m_digest= NULL;
4544 
4545   state->m_schema_name_length= 0;
4546   state->m_cs_number= ((CHARSET_INFO *)charset)->number;
4547 
4548   return reinterpret_cast<PSI_statement_locker*> (state);
4549 }
4550 
4551 static PSI_statement_locker*
refine_statement_v1(PSI_statement_locker * locker,PSI_statement_key key)4552 refine_statement_v1(PSI_statement_locker *locker,
4553                     PSI_statement_key key)
4554 {
4555   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4556   if (state == NULL)
4557     return NULL;
4558   DBUG_ASSERT(state->m_class != NULL);
4559   PFS_statement_class *klass;
4560   /* Only refine statements for mutable instrumentation */
4561   klass= reinterpret_cast<PFS_statement_class*> (state->m_class);
4562   DBUG_ASSERT(klass->is_mutable());
4563   klass= find_statement_class(key);
4564 
4565   uint flags= state->m_flags;
4566 
4567   if (unlikely(klass == NULL) || !klass->m_enabled)
4568   {
4569     /* pop statement stack */
4570     if (flags & STATE_FLAG_THREAD)
4571     {
4572       PFS_thread *pfs_thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4573       DBUG_ASSERT(pfs_thread != NULL);
4574       if (pfs_thread->m_events_statements_count > 0)
4575         pfs_thread->m_events_statements_count--;
4576     }
4577 
4578     state->m_discarded= true;
4579     return NULL;
4580   }
4581 
4582   if ((flags & STATE_FLAG_TIMED) && ! klass->m_timed)
4583     flags= flags & ~STATE_FLAG_TIMED;
4584 
4585   if (flags & STATE_FLAG_EVENT)
4586   {
4587     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4588     DBUG_ASSERT(pfs != NULL);
4589 
4590     /* mutate EVENTS_STATEMENTS_CURRENT.EVENT_NAME */
4591     pfs->m_class= klass;
4592   }
4593 
4594   state->m_class= klass;
4595   state->m_flags= flags;
4596   return reinterpret_cast<PSI_statement_locker*> (state);
4597 }
4598 
start_statement_v1(PSI_statement_locker * locker,const char * db,uint db_len,const char * src_file,uint src_line)4599 static void start_statement_v1(PSI_statement_locker *locker,
4600                                const char *db, uint db_len,
4601                                const char *src_file, uint src_line)
4602 {
4603   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4604   DBUG_ASSERT(state != NULL);
4605 
4606   register uint flags= state->m_flags;
4607   ulonglong timer_start= 0;
4608 
4609   if (flags & STATE_FLAG_TIMED)
4610   {
4611     timer_start= get_timer_raw_value_and_function(statement_timer, & state->m_timer);
4612     state->m_timer_start= timer_start;
4613   }
4614 
4615   compile_time_assert(PSI_SCHEMA_NAME_LEN == NAME_LEN);
4616   DBUG_ASSERT(db_len <= sizeof(state->m_schema_name));
4617 
4618   if (db_len > 0)
4619     memcpy(state->m_schema_name, db, db_len);
4620   state->m_schema_name_length= db_len;
4621 
4622   if (flags & STATE_FLAG_EVENT)
4623   {
4624     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4625     DBUG_ASSERT(pfs != NULL);
4626 
4627     pfs->m_timer_start= timer_start;
4628     pfs->m_source_file= src_file;
4629     pfs->m_source_line= src_line;
4630 
4631     DBUG_ASSERT(db_len <= sizeof(pfs->m_current_schema_name));
4632     if (db_len > 0)
4633       memcpy(pfs->m_current_schema_name, db, db_len);
4634     pfs->m_current_schema_name_length= db_len;
4635   }
4636 }
4637 
set_statement_text_v1(PSI_statement_locker * locker,const char * text,uint text_len)4638 static void set_statement_text_v1(PSI_statement_locker *locker,
4639                                   const char *text, uint text_len)
4640 {
4641   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4642   DBUG_ASSERT(state != NULL);
4643 
4644   if (state->m_discarded)
4645     return;
4646 
4647   if (state->m_flags & STATE_FLAG_EVENT)
4648   {
4649     PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4650     DBUG_ASSERT(pfs != NULL);
4651     if (text_len > sizeof (pfs->m_sqltext))
4652     {
4653       text_len= sizeof(pfs->m_sqltext);
4654       pfs->m_sqltext_truncated= true;
4655     }
4656     if (text_len)
4657       memcpy(pfs->m_sqltext, text, text_len);
4658     pfs->m_sqltext_length= text_len;
4659     pfs->m_sqltext_cs_number= state->m_cs_number;
4660   }
4661 
4662   return;
4663 }
4664 
4665 #define SET_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE)                    \
4666   PSI_statement_locker_state *state;                                    \
4667   state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER);        \
4668   if (unlikely(state == NULL))                                          \
4669     return;                                                             \
4670   if (state->m_discarded)                                               \
4671     return;                                                             \
4672   state->ATTR= VALUE;                                                   \
4673   if (state->m_flags & STATE_FLAG_EVENT)                                \
4674   {                                                                     \
4675     PFS_events_statements *pfs;                                         \
4676     pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4677     DBUG_ASSERT(pfs != NULL);                                           \
4678     pfs->ATTR= VALUE;                                                   \
4679   }                                                                     \
4680   return;
4681 
4682 #define INC_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE)                    \
4683   PSI_statement_locker_state *state;                                    \
4684   state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER);        \
4685   if (unlikely(state == NULL))                                          \
4686     return;                                                             \
4687   if (state->m_discarded)                                               \
4688     return;                                                             \
4689   state->ATTR+= VALUE;                                                  \
4690   if (state->m_flags & STATE_FLAG_EVENT)                                \
4691   {                                                                     \
4692     PFS_events_statements *pfs;                                         \
4693     pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4694     DBUG_ASSERT(pfs != NULL);                                           \
4695     pfs->ATTR+= VALUE;                                                  \
4696   }                                                                     \
4697   return;
4698 
set_statement_lock_time_v1(PSI_statement_locker * locker,ulonglong count)4699 static void set_statement_lock_time_v1(PSI_statement_locker *locker,
4700                                        ulonglong count)
4701 {
4702   SET_STATEMENT_ATTR_BODY(locker, m_lock_time, count);
4703 }
4704 
set_statement_rows_sent_v1(PSI_statement_locker * locker,ulonglong count)4705 static void set_statement_rows_sent_v1(PSI_statement_locker *locker,
4706                                        ulonglong count)
4707 {
4708   SET_STATEMENT_ATTR_BODY(locker, m_rows_sent, count);
4709 }
4710 
set_statement_rows_examined_v1(PSI_statement_locker * locker,ulonglong count)4711 static void set_statement_rows_examined_v1(PSI_statement_locker *locker,
4712                                            ulonglong count)
4713 {
4714   SET_STATEMENT_ATTR_BODY(locker, m_rows_examined, count);
4715 }
4716 
inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker * locker,ulong count)4717 static void inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker *locker,
4718                                                     ulong count)
4719 {
4720   INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_disk_tables, count);
4721 }
4722 
inc_statement_created_tmp_tables_v1(PSI_statement_locker * locker,ulong count)4723 static void inc_statement_created_tmp_tables_v1(PSI_statement_locker *locker,
4724                                                 ulong count)
4725 {
4726   INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_tables, count);
4727 }
4728 
inc_statement_select_full_join_v1(PSI_statement_locker * locker,ulong count)4729 static void inc_statement_select_full_join_v1(PSI_statement_locker *locker,
4730                                               ulong count)
4731 {
4732   INC_STATEMENT_ATTR_BODY(locker, m_select_full_join, count);
4733 }
4734 
inc_statement_select_full_range_join_v1(PSI_statement_locker * locker,ulong count)4735 static void inc_statement_select_full_range_join_v1(PSI_statement_locker *locker,
4736                                                     ulong count)
4737 {
4738   INC_STATEMENT_ATTR_BODY(locker, m_select_full_range_join, count);
4739 }
4740 
inc_statement_select_range_v1(PSI_statement_locker * locker,ulong count)4741 static void inc_statement_select_range_v1(PSI_statement_locker *locker,
4742                                           ulong count)
4743 {
4744   INC_STATEMENT_ATTR_BODY(locker, m_select_range, count);
4745 }
4746 
inc_statement_select_range_check_v1(PSI_statement_locker * locker,ulong count)4747 static void inc_statement_select_range_check_v1(PSI_statement_locker *locker,
4748                                                 ulong count)
4749 {
4750   INC_STATEMENT_ATTR_BODY(locker, m_select_range_check, count);
4751 }
4752 
inc_statement_select_scan_v1(PSI_statement_locker * locker,ulong count)4753 static void inc_statement_select_scan_v1(PSI_statement_locker *locker,
4754                                          ulong count)
4755 {
4756   INC_STATEMENT_ATTR_BODY(locker, m_select_scan, count);
4757 }
4758 
inc_statement_sort_merge_passes_v1(PSI_statement_locker * locker,ulong count)4759 static void inc_statement_sort_merge_passes_v1(PSI_statement_locker *locker,
4760                                                ulong count)
4761 {
4762   INC_STATEMENT_ATTR_BODY(locker, m_sort_merge_passes, count);
4763 }
4764 
inc_statement_sort_range_v1(PSI_statement_locker * locker,ulong count)4765 static void inc_statement_sort_range_v1(PSI_statement_locker *locker,
4766                                         ulong count)
4767 {
4768   INC_STATEMENT_ATTR_BODY(locker, m_sort_range, count);
4769 }
4770 
inc_statement_sort_rows_v1(PSI_statement_locker * locker,ulong count)4771 static void inc_statement_sort_rows_v1(PSI_statement_locker *locker,
4772                                        ulong count)
4773 {
4774   INC_STATEMENT_ATTR_BODY(locker, m_sort_rows, count);
4775 }
4776 
inc_statement_sort_scan_v1(PSI_statement_locker * locker,ulong count)4777 static void inc_statement_sort_scan_v1(PSI_statement_locker *locker,
4778                                        ulong count)
4779 {
4780   INC_STATEMENT_ATTR_BODY(locker, m_sort_scan, count);
4781 }
4782 
set_statement_no_index_used_v1(PSI_statement_locker * locker)4783 static void set_statement_no_index_used_v1(PSI_statement_locker *locker)
4784 {
4785   SET_STATEMENT_ATTR_BODY(locker, m_no_index_used, 1);
4786 }
4787 
set_statement_no_good_index_used_v1(PSI_statement_locker * locker)4788 static void set_statement_no_good_index_used_v1(PSI_statement_locker *locker)
4789 {
4790   SET_STATEMENT_ATTR_BODY(locker, m_no_good_index_used, 1);
4791 }
4792 
end_statement_v1(PSI_statement_locker * locker,void * stmt_da)4793 static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
4794 {
4795   PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4796   Diagnostics_area *da= reinterpret_cast<Diagnostics_area*> (stmt_da);
4797   DBUG_ASSERT(state != NULL);
4798   DBUG_ASSERT(da != NULL);
4799 
4800   if (state->m_discarded)
4801     return;
4802 
4803   PFS_statement_class *klass= reinterpret_cast<PFS_statement_class *> (state->m_class);
4804   DBUG_ASSERT(klass != NULL);
4805 
4806   ulonglong timer_end= 0;
4807   ulonglong wait_time= 0;
4808   register uint flags= state->m_flags;
4809 
4810   if (flags & STATE_FLAG_TIMED)
4811   {
4812     timer_end= state->m_timer();
4813     wait_time= timer_end - state->m_timer_start;
4814   }
4815 
4816   PFS_statement_stat *event_name_array;
4817   uint index= klass->m_event_name_index;
4818   PFS_statement_stat *stat;
4819 
4820   /*
4821    Capture statement stats by digest.
4822   */
4823   const sql_digest_storage *digest_storage= NULL;
4824   PFS_statement_stat *digest_stat= NULL;
4825 
4826   if (flags & STATE_FLAG_THREAD)
4827   {
4828     PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4829     DBUG_ASSERT(thread != NULL);
4830     event_name_array= thread->m_instr_class_statements_stats;
4831     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME */
4832     stat= & event_name_array[index];
4833 
4834     if (flags & STATE_FLAG_DIGEST)
4835     {
4836       digest_storage= state->m_digest;
4837 
4838       if (digest_storage != NULL)
4839       {
4840         /* Populate PFS_statements_digest_stat with computed digest information.*/
4841         digest_stat= find_or_create_digest(thread, digest_storage,
4842                                            state->m_schema_name,
4843                                            state->m_schema_name_length);
4844       }
4845     }
4846 
4847     if (flags & STATE_FLAG_EVENT)
4848     {
4849       PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4850       DBUG_ASSERT(pfs != NULL);
4851 
4852       thread->m_stmt_lock.allocated_to_dirty();
4853 
4854       switch(da->status())
4855       {
4856         case Diagnostics_area::DA_EMPTY:
4857           break;
4858         case Diagnostics_area::DA_OK:
4859           memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4860           pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4861           pfs->m_rows_affected= da->affected_rows();
4862           pfs->m_warning_count= da->statement_warn_count();
4863           memcpy(pfs->m_sqlstate, "00000", SQLSTATE_LENGTH);
4864           break;
4865         case Diagnostics_area::DA_EOF:
4866           pfs->m_warning_count= da->statement_warn_count();
4867           break;
4868         case Diagnostics_area::DA_ERROR:
4869           memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4870           pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4871           pfs->m_sql_errno= da->sql_errno();
4872           pfs->m_error_count++;
4873           memcpy(pfs->m_sqlstate, da->get_sqlstate(), SQLSTATE_LENGTH);
4874           break;
4875         case Diagnostics_area::DA_DISABLED:
4876           break;
4877       }
4878 
4879       pfs->m_timer_end= timer_end;
4880       pfs->m_end_event_id= thread->m_event_id;
4881 
4882       if (digest_storage != NULL)
4883       {
4884         /*
4885           The following columns in events_statement_current:
4886           - DIGEST,
4887           - DIGEST_TEXT
4888           are computed from the digest storage.
4889         */
4890         pfs->m_digest_storage.copy(digest_storage);
4891       }
4892 
4893       if (flag_events_statements_history)
4894         insert_events_statements_history(thread, pfs);
4895       if (flag_events_statements_history_long)
4896         insert_events_statements_history_long(pfs);
4897 
4898       DBUG_ASSERT(thread->m_events_statements_count > 0);
4899       thread->m_events_statements_count--;
4900       thread->m_stmt_lock.dirty_to_allocated();
4901     }
4902   }
4903   else
4904   {
4905     if (flags & STATE_FLAG_DIGEST)
4906     {
4907       PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4908 
4909       /* An instrumented thread is required, for LF_PINS. */
4910       if (thread != NULL)
4911       {
4912         /* Set digest stat. */
4913         digest_storage= state->m_digest;
4914 
4915         if (digest_storage != NULL)
4916         {
4917           /* Populate statements_digest_stat with computed digest information. */
4918           digest_stat= find_or_create_digest(thread, digest_storage,
4919                                              state->m_schema_name,
4920                                              state->m_schema_name_length);
4921         }
4922       }
4923     }
4924 
4925     event_name_array= global_instr_class_statements_array;
4926     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME */
4927     stat= & event_name_array[index];
4928   }
4929 
4930   if (flags & STATE_FLAG_TIMED)
4931   {
4932     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (timed) */
4933     stat->aggregate_value(wait_time);
4934   }
4935   else
4936   {
4937     /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (counted) */
4938     stat->aggregate_counted();
4939   }
4940 
4941   stat->m_lock_time+= state->m_lock_time;
4942   stat->m_rows_sent+= state->m_rows_sent;
4943   stat->m_rows_examined+= state->m_rows_examined;
4944   stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4945   stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4946   stat->m_select_full_join+= state->m_select_full_join;
4947   stat->m_select_full_range_join+= state->m_select_full_range_join;
4948   stat->m_select_range+= state->m_select_range;
4949   stat->m_select_range_check+= state->m_select_range_check;
4950   stat->m_select_scan+= state->m_select_scan;
4951   stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4952   stat->m_sort_range+= state->m_sort_range;
4953   stat->m_sort_rows+= state->m_sort_rows;
4954   stat->m_sort_scan+= state->m_sort_scan;
4955   stat->m_no_index_used+= state->m_no_index_used;
4956   stat->m_no_good_index_used+= state->m_no_good_index_used;
4957 
4958   if (digest_stat != NULL)
4959   {
4960     if (flags & STATE_FLAG_TIMED)
4961     {
4962       digest_stat->aggregate_value(wait_time);
4963     }
4964     else
4965     {
4966       digest_stat->aggregate_counted();
4967     }
4968 
4969     digest_stat->m_lock_time+= state->m_lock_time;
4970     digest_stat->m_rows_sent+= state->m_rows_sent;
4971     digest_stat->m_rows_examined+= state->m_rows_examined;
4972     digest_stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4973     digest_stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4974     digest_stat->m_select_full_join+= state->m_select_full_join;
4975     digest_stat->m_select_full_range_join+= state->m_select_full_range_join;
4976     digest_stat->m_select_range+= state->m_select_range;
4977     digest_stat->m_select_range_check+= state->m_select_range_check;
4978     digest_stat->m_select_scan+= state->m_select_scan;
4979     digest_stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4980     digest_stat->m_sort_range+= state->m_sort_range;
4981     digest_stat->m_sort_rows+= state->m_sort_rows;
4982     digest_stat->m_sort_scan+= state->m_sort_scan;
4983     digest_stat->m_no_index_used+= state->m_no_index_used;
4984     digest_stat->m_no_good_index_used+= state->m_no_good_index_used;
4985   }
4986 
4987   switch (da->status())
4988   {
4989     case Diagnostics_area::DA_EMPTY:
4990       break;
4991     case Diagnostics_area::DA_OK:
4992       stat->m_rows_affected+= da->affected_rows();
4993       stat->m_warning_count+= da->statement_warn_count();
4994       if (digest_stat != NULL)
4995       {
4996         digest_stat->m_rows_affected+= da->affected_rows();
4997         digest_stat->m_warning_count+= da->statement_warn_count();
4998       }
4999       break;
5000     case Diagnostics_area::DA_EOF:
5001       stat->m_warning_count+= da->statement_warn_count();
5002       if (digest_stat != NULL)
5003       {
5004         digest_stat->m_warning_count+= da->statement_warn_count();
5005       }
5006       break;
5007     case Diagnostics_area::DA_ERROR:
5008       stat->m_error_count++;
5009       if (digest_stat != NULL)
5010       {
5011         digest_stat->m_error_count++;
5012       }
5013       break;
5014     case Diagnostics_area::DA_DISABLED:
5015       break;
5016   }
5017 }
5018 
5019 /**
5020   Implementation of the socket instrumentation interface.
5021   @sa PSI_v1::end_socket_wait.
5022 */
end_socket_wait_v1(PSI_socket_locker * locker,size_t byte_count)5023 static void end_socket_wait_v1(PSI_socket_locker *locker, size_t byte_count)
5024 {
5025   PSI_socket_locker_state *state= reinterpret_cast<PSI_socket_locker_state*> (locker);
5026   DBUG_ASSERT(state != NULL);
5027 
5028   PFS_socket *socket= reinterpret_cast<PFS_socket *>(state->m_socket);
5029   DBUG_ASSERT(socket != NULL);
5030 
5031   ulonglong timer_end= 0;
5032   ulonglong wait_time= 0;
5033   PFS_byte_stat *byte_stat;
5034   register uint flags= state->m_flags;
5035   size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
5036 
5037   switch (state->m_operation)
5038   {
5039     /* Group read operations */
5040     case PSI_SOCKET_RECV:
5041     case PSI_SOCKET_RECVFROM:
5042     case PSI_SOCKET_RECVMSG:
5043       byte_stat= &socket->m_socket_stat.m_io_stat.m_read;
5044       break;
5045     /* Group write operations */
5046     case PSI_SOCKET_SEND:
5047     case PSI_SOCKET_SENDTO:
5048     case PSI_SOCKET_SENDMSG:
5049       byte_stat= &socket->m_socket_stat.m_io_stat.m_write;
5050       break;
5051     /* Group remaining operations as miscellaneous */
5052     case PSI_SOCKET_CONNECT:
5053     case PSI_SOCKET_CREATE:
5054     case PSI_SOCKET_BIND:
5055     case PSI_SOCKET_SEEK:
5056     case PSI_SOCKET_OPT:
5057     case PSI_SOCKET_STAT:
5058     case PSI_SOCKET_SHUTDOWN:
5059     case PSI_SOCKET_SELECT:
5060     case PSI_SOCKET_CLOSE:
5061       byte_stat= &socket->m_socket_stat.m_io_stat.m_misc;
5062       break;
5063     default:
5064       DBUG_ASSERT(false);
5065       byte_stat= NULL;
5066       break;
5067   }
5068 
5069   /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
5070   if (flags & STATE_FLAG_TIMED)
5071   {
5072     timer_end= state->m_timer();
5073     wait_time= timer_end - state->m_timer_start;
5074 
5075     /* Aggregate to the socket instrument for now (timed) */
5076     byte_stat->aggregate(wait_time, bytes);
5077   }
5078   else
5079   {
5080     /* Aggregate to the socket instrument (event count and byte count) */
5081     byte_stat->aggregate_counted(bytes);
5082   }
5083 
5084   /* Aggregate to EVENTS_WAITS_HISTORY and EVENTS_WAITS_HISTORY_LONG */
5085   if (flags & STATE_FLAG_EVENT)
5086   {
5087     PFS_thread *thread= reinterpret_cast<PFS_thread *>(state->m_thread);
5088     DBUG_ASSERT(thread != NULL);
5089     PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
5090     DBUG_ASSERT(wait != NULL);
5091 
5092     wait->m_timer_end= timer_end;
5093     wait->m_end_event_id= thread->m_event_id;
5094     wait->m_number_of_bytes= bytes;
5095 
5096     if (flag_events_waits_history)
5097       insert_events_waits_history(thread, wait);
5098     if (flag_events_waits_history_long)
5099       insert_events_waits_history_long(wait);
5100     thread->m_events_waits_current--;
5101 
5102     DBUG_ASSERT(wait == thread->m_events_waits_current);
5103   }
5104 }
5105 
set_socket_state_v1(PSI_socket * socket,PSI_socket_state state)5106 static void set_socket_state_v1(PSI_socket *socket, PSI_socket_state state)
5107 {
5108   DBUG_ASSERT((state == PSI_SOCKET_STATE_IDLE) || (state == PSI_SOCKET_STATE_ACTIVE));
5109   PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5110   DBUG_ASSERT(pfs != NULL);
5111   DBUG_ASSERT(pfs->m_idle || (state == PSI_SOCKET_STATE_IDLE));
5112   DBUG_ASSERT(!pfs->m_idle || (state == PSI_SOCKET_STATE_ACTIVE));
5113   pfs->m_idle= (state == PSI_SOCKET_STATE_IDLE);
5114 }
5115 
5116 /**
5117   Set socket descriptor and address info.
5118 */
set_socket_info_v1(PSI_socket * socket,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)5119 static void set_socket_info_v1(PSI_socket *socket,
5120                                const my_socket *fd,
5121                                const struct sockaddr *addr,
5122                                socklen_t addr_len)
5123 {
5124   PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5125   DBUG_ASSERT(pfs != NULL);
5126 
5127   /** Set socket descriptor */
5128   if (fd != NULL)
5129     pfs->m_fd= *fd;
5130 
5131   /** Set raw socket address and length */
5132   if (likely(addr != NULL && addr_len > 0))
5133   {
5134     pfs->m_addr_len= addr_len;
5135 
5136     /** Restrict address length to size of struct */
5137     if (unlikely(pfs->m_addr_len > sizeof(sockaddr_storage)))
5138       pfs->m_addr_len= sizeof(struct sockaddr_storage);
5139 
5140     memcpy(&pfs->m_sock_addr, addr, pfs->m_addr_len);
5141   }
5142 }
5143 
5144 /**
5145   Implementation of the socket instrumentation interface.
5146   @sa PSI_v1::set_socket_info.
5147 */
set_socket_thread_owner_v1(PSI_socket * socket)5148 static void set_socket_thread_owner_v1(PSI_socket *socket)
5149 {
5150   PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*>(socket);
5151   DBUG_ASSERT(pfs_socket != NULL);
5152   pfs_socket->m_thread_owner= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5153 }
5154 
5155 struct PSI_digest_locker*
pfs_digest_start_v1(PSI_statement_locker * locker)5156 pfs_digest_start_v1(PSI_statement_locker *locker)
5157 {
5158   PSI_statement_locker_state *statement_state;
5159   statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5160   DBUG_ASSERT(statement_state != NULL);
5161 
5162   if (statement_state->m_discarded)
5163     return NULL;
5164 
5165   if (statement_state->m_flags & STATE_FLAG_DIGEST)
5166   {
5167     return reinterpret_cast<PSI_digest_locker*> (locker);
5168   }
5169 
5170   return NULL;
5171 }
5172 
pfs_digest_end_v1(PSI_digest_locker * locker,const sql_digest_storage * digest)5173 void pfs_digest_end_v1(PSI_digest_locker *locker, const sql_digest_storage *digest)
5174 {
5175   PSI_statement_locker_state *statement_state;
5176   statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5177   DBUG_ASSERT(statement_state != NULL);
5178   DBUG_ASSERT(digest != NULL);
5179 
5180   if (statement_state->m_discarded)
5181     return;
5182 
5183   if (statement_state->m_flags & STATE_FLAG_DIGEST)
5184   {
5185     statement_state->m_digest= digest;
5186   }
5187 }
5188 
5189 /**
5190   Implementation of the thread attribute connection interface
5191   @sa PSI_v1::set_thread_connect_attr.
5192 */
set_thread_connect_attrs_v1(const char * buffer,uint length,const void * from_cs)5193 static int set_thread_connect_attrs_v1(const char *buffer, uint length,
5194                                        const void *from_cs)
5195 {
5196 
5197   PFS_thread *thd= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5198 
5199   DBUG_ASSERT(buffer != NULL);
5200 
5201   if (likely(thd != NULL) && session_connect_attrs_size_per_thread > 0)
5202   {
5203     const CHARSET_INFO *cs = static_cast<const CHARSET_INFO *> (from_cs);
5204 
5205     /* copy from the input buffer as much as we can fit */
5206     uint copy_size= (uint)(length < session_connect_attrs_size_per_thread ?
5207                            length : session_connect_attrs_size_per_thread);
5208     thd->m_session_lock.allocated_to_dirty();
5209     memcpy(thd->m_session_connect_attrs, buffer, copy_size);
5210     thd->m_session_connect_attrs_length= copy_size;
5211     thd->m_session_connect_attrs_cs_number= cs->number;
5212     thd->m_session_lock.dirty_to_allocated();
5213 
5214     if (copy_size == length)
5215       return 0;
5216 
5217     session_connect_attrs_lost++;
5218     return 1;
5219   }
5220   return 0;
5221 }
5222 
5223 
5224 /**
5225   Implementation of the instrumentation interface.
5226   @sa PSI_v1.
5227 */
5228 PSI_v1 PFS_v1=
5229 {
5230   register_mutex_v1,
5231   register_rwlock_v1,
5232   register_cond_v1,
5233   register_thread_v1,
5234   register_file_v1,
5235   register_stage_v1,
5236   register_statement_v1,
5237   register_socket_v1,
5238   init_mutex_v1,
5239   destroy_mutex_v1,
5240   init_rwlock_v1,
5241   destroy_rwlock_v1,
5242   init_cond_v1,
5243   destroy_cond_v1,
5244   init_socket_v1,
5245   destroy_socket_v1,
5246   get_table_share_v1,
5247   release_table_share_v1,
5248   drop_table_share_v1,
5249   open_table_v1,
5250   unbind_table_v1,
5251   rebind_table_v1,
5252   close_table_v1,
5253   create_file_v1,
5254   spawn_thread_v1,
5255   new_thread_v1,
5256   set_thread_id_v1,
5257   get_thread_v1,
5258   set_thread_user_v1,
5259   set_thread_account_v1,
5260   set_thread_db_v1,
5261   set_thread_command_v1,
5262   set_thread_start_time_v1,
5263   set_thread_state_v1,
5264   set_thread_info_v1,
5265   set_thread_v1,
5266   delete_current_thread_v1,
5267   delete_thread_v1,
5268   get_thread_file_name_locker_v1,
5269   get_thread_file_stream_locker_v1,
5270   get_thread_file_descriptor_locker_v1,
5271   unlock_mutex_v1,
5272   unlock_rwlock_v1,
5273   signal_cond_v1,
5274   broadcast_cond_v1,
5275   start_idle_wait_v1,
5276   end_idle_wait_v1,
5277   start_mutex_wait_v1,
5278   end_mutex_wait_v1,
5279   start_rwlock_wait_v1, /* read */
5280   end_rwlock_rdwait_v1,
5281   start_rwlock_wait_v1, /* write */
5282   end_rwlock_wrwait_v1,
5283   start_cond_wait_v1,
5284   end_cond_wait_v1,
5285   start_table_io_wait_v1,
5286   end_table_io_wait_v1,
5287   start_table_lock_wait_v1,
5288   end_table_lock_wait_v1,
5289   start_file_open_wait_v1,
5290   end_file_open_wait_v1,
5291   end_file_open_wait_and_bind_to_descriptor_v1,
5292   start_file_wait_v1,
5293   end_file_wait_v1,
5294   start_file_close_wait_v1,
5295   end_file_close_wait_v1,
5296   start_stage_v1,
5297   end_stage_v1,
5298   get_thread_statement_locker_v1,
5299   refine_statement_v1,
5300   start_statement_v1,
5301   set_statement_text_v1,
5302   set_statement_lock_time_v1,
5303   set_statement_rows_sent_v1,
5304   set_statement_rows_examined_v1,
5305   inc_statement_created_tmp_disk_tables_v1,
5306   inc_statement_created_tmp_tables_v1,
5307   inc_statement_select_full_join_v1,
5308   inc_statement_select_full_range_join_v1,
5309   inc_statement_select_range_v1,
5310   inc_statement_select_range_check_v1,
5311   inc_statement_select_scan_v1,
5312   inc_statement_sort_merge_passes_v1,
5313   inc_statement_sort_range_v1,
5314   inc_statement_sort_rows_v1,
5315   inc_statement_sort_scan_v1,
5316   set_statement_no_index_used_v1,
5317   set_statement_no_good_index_used_v1,
5318   end_statement_v1,
5319   start_socket_wait_v1,
5320   end_socket_wait_v1,
5321   set_socket_state_v1,
5322   set_socket_info_v1,
5323   set_socket_thread_owner_v1,
5324   pfs_digest_start_v1,
5325   pfs_digest_end_v1,
5326   set_thread_connect_attrs_v1,
5327 };
5328 
get_interface(int version)5329 static void* get_interface(int version)
5330 {
5331   switch (version)
5332   {
5333   case PSI_VERSION_1:
5334     return &PFS_v1;
5335   default:
5336     return NULL;
5337   }
5338 }
5339 
5340 C_MODE_END
5341 
5342 struct PSI_bootstrap PFS_bootstrap=
5343 {
5344   get_interface
5345 };
5346