1 /* Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
22
23 /**
24 @file storage/perfschema/pfs.cc
25 The performance schema implementation of all instruments.
26 */
27 #include "my_global.h"
28 #include "thr_lock.h"
29 #include "mysql/psi/psi.h"
30 #include "mysql/psi/mysql_thread.h"
31 #include "my_pthread.h"
32 #include "sql_const.h"
33 #include "pfs.h"
34 #include "pfs_instr_class.h"
35 #include "pfs_instr.h"
36 #include "pfs_host.h"
37 #include "pfs_user.h"
38 #include "pfs_account.h"
39 #include "pfs_global.h"
40 #include "pfs_column_values.h"
41 #include "pfs_timer.h"
42 #include "pfs_events_waits.h"
43 #include "pfs_events_stages.h"
44 #include "pfs_events_statements.h"
45 #include "pfs_setup_actor.h"
46 #include "pfs_setup_object.h"
47 #include "sql_error.h"
48 #include "sp_head.h"
49 #include "pfs_digest.h"
50
51 /**
52 @page PAGE_PERFORMANCE_SCHEMA The Performance Schema main page
53 MySQL PERFORMANCE_SCHEMA implementation.
54
55 @section INTRO Introduction
56 The PERFORMANCE_SCHEMA is a way to introspect the internal execution of
57 the server at runtime.
58 The performance schema focuses primarily on performance data,
59 as opposed to the INFORMATION_SCHEMA whose purpose is to inspect metadata.
60
61 From a user point of view, the performance schema consists of:
62 - a dedicated database schema, named PERFORMANCE_SCHEMA,
63 - SQL tables, used to query the server internal state or change
64 configuration settings.
65
66 From an implementation point of view, the performance schema is a dedicated
67 Storage Engine which exposes data collected by 'Instrumentation Points'
68 placed in the server code.
69
70 @section INTERFACES Multiple interfaces
71
72 The performance schema exposes many different interfaces,
73 for different components, and for different purposes.
74
75 @subsection INT_INSTRUMENTING Instrumenting interface
76
77 All the data representing the server internal state exposed
78 in the performance schema must be first collected:
79 this is the role of the instrumenting interface.
80 The instrumenting interface is a coding interface provided
81 by implementors (of the performance schema) to implementors
82 (of the server or server components).
83
84 This interface is available to:
85 - C implementations
86 - C++ implementations
87 - the core SQL layer (/sql)
88 - the mysys library (/mysys)
89 - MySQL plugins, including storage engines,
90 - third party plugins, including third party storage engines.
91
92 For details, see the @ref PAGE_INSTRUMENTATION_INTERFACE
93 "instrumentation interface page".
94
95 @subsection INT_COMPILING Compiling interface
96
97 The implementation of the performance schema can be enabled or disabled at
98 build time, when building MySQL from the source code.
99
100 When building with the performance schema code, some compilation flags
101 are available to change the default values used in the code, if required.
102
103 For more details, see:
104 @verbatim ./configure --help @endverbatim
105
106 To compile with the performance schema:
107 @verbatim ./configure --with-perfschema @endverbatim
108
109 The implementation of all the compiling options is located in
110 @verbatim ./storage/perfschema/plug.in @endverbatim
111
112 @subsection INT_STARTUP Server startup interface
113
114 The server startup interface consists of the "./mysqld ..."
115 command line used to start the server.
116 When the performance schema is compiled in the server binary,
117 extra command line options are available.
118
119 These extra start options allow the DBA to:
120 - enable or disable the performance schema
121 - specify some sizing parameters.
122
123 To see help for the performance schema startup options, see:
124 @verbatim ./sql/mysqld --verbose --help @endverbatim
125
126 The implementation of all the startup options is located in
127 @verbatim ./sql/mysqld.cc, my_long_options[] @endverbatim
128
129 @subsection INT_BOOTSTRAP Server bootstrap interface
130
131 The bootstrap interface is a private interface exposed by
132 the performance schema, and used by the SQL layer.
133 Its role is to advertise all the SQL tables natively
134 supported by the performance schema to the SQL server.
135 The code consists of creating MySQL tables for the
136 performance schema itself, and is used in './mysql --bootstrap'
137 mode when a server is installed.
138
139 The implementation of the database creation script is located in
140 @verbatim ./scripts/mysql_performance_tables.sql @endverbatim
141
142 @subsection INT_CONFIG Runtime configuration interface
143
144 When the performance schema is used at runtime, various configuration
145 parameters can be used to specify what kind of data is collected,
146 what kind of aggregations are computed, what kind of timers are used,
147 what events are timed, etc.
148
149 For all these capabilities, not a single statement or special syntax
150 was introduced in the parser.
151 Instead of new SQL statements, the interface consists of DML
152 (SELECT, INSERT, UPDATE, DELETE) against special "SETUP" tables.
153
154 For example:
155 @verbatim mysql> update performance_schema.SETUP_INSTRUMENTS
156 set ENABLED='YES', TIMED='YES';
157 Query OK, 234 rows affected (0.00 sec)
158 Rows matched: 234 Changed: 234 Warnings: 0 @endverbatim
159
160 @subsection INT_STATUS Internal audit interface
161
162 The internal audit interface is provided to the DBA to inspect if the
163 performance schema code itself is functioning properly.
164 This interface is necessary because a failure caused while
165 instrumenting code in the server should not cause failures in the
166 MySQL server itself, so that the performance schema implementation
167 never raises errors during runtime execution.
168
169 This auditing interface consists of:
170 @verbatim SHOW ENGINE PERFORMANCE_SCHEMA STATUS; @endverbatim
171 It displays data related to the memory usage of the performance schema,
172 as well as statistics about lost events, if any.
173
174 The SHOW STATUS command is implemented in
175 @verbatim ./storage/perfschema/pfs_engine_table.cc @endverbatim
176
177 @subsection INT_QUERY Query interface
178
179 The query interface is used to query the internal state of a running server.
180 It is provided as SQL tables.
181
182 For example:
183 @verbatim mysql> select * from performance_schema.EVENTS_WAITS_CURRENT;
184 @endverbatim
185
186 @section DESIGN_PRINCIPLES Design principles
187
188 @subsection PRINCIPLE_BEHAVIOR No behavior changes
189
190 The primary goal of the performance schema is to measure (instrument) the
191 execution of the server. A good measure should not cause any change
192 in behavior.
193
194 To achieve this, the overall design of the performance schema complies
195 with the following very severe design constraints:
196
197 The parser is unchanged. There are no new keywords, no new statements.
198 This guarantees that existing applications will run the same way with or
199 without the performance schema.
200
201 All the instrumentation points return "void", there are no error codes.
202 Even if the performance schema internally fails, execution of the server
203 code will proceed.
204
205 None of the instrumentation points allocate memory.
206 All the memory used by the performance schema is pre-allocated at startup,
207 and is considered "static" during the server life time.
208
209 None of the instrumentation points use any pthread_mutex, pthread_rwlock,
210 or pthread_cond (or platform equivalents).
211 Executing the instrumentation point should not cause thread scheduling to
212 change in the server.
213
214 In other words, the implementation of the instrumentation points,
215 including all the code called by the instrumentation points, is:
216 - malloc free
217 - mutex free
218 - rwlock free
219
220 TODO: All the code located in storage/perfschema is malloc free,
221 but unfortunately the usage of LF_HASH introduces some memory allocation.
222 This should be revised if possible, to use a lock-free,
223 malloc-free hash code table.
224
225 @subsection PRINCIPLE_PERFORMANCE No performance hit
226
227 The instrumentation of the server should be as fast as possible.
228 In cases when there are choices between:
229 - doing some processing when recording the performance data
230 in the instrumentation,
231 - doing some processing when retrieving the performance data,
232
233 priority is given in the design to make the instrumentation faster,
234 pushing some complexity to data retrieval.
235
236 As a result, some parts of the design, related to:
237 - the setup code path,
238 - the query code path,
239
240 might appear to be sub-optimal.
241
242 The criterion used here is to optimize primarily the critical path (data
243 collection), possibly at the expense of non-critical code paths.
244
245 @subsection PRINCIPLE_NOT_INTRUSIVE Unintrusive instrumentation
246
247 For the performance schema in general to be successful, the barrier
248 of entry for a developer should be low, so it's easy to instrument code.
249
250 In particular, the instrumentation interface:
251 - is available for C and C++ code (so it's a C interface),
252 - does not require parameters that the calling code can't easily provide,
253 - supports partial instrumentation (for example, instrumenting mutexes does
254 not require that every mutex is instrumented)
255
256 @subsection PRINCIPLE_EXTENDABLE Extendable instrumentation
257
258 As the content of the performance schema improves,
259 with more tables exposed and more data collected,
260 the instrumentation interface will also be augmented
261 to support instrumenting new concepts.
262 Existing instrumentations should not be affected when additional
263 instrumentation is made available, and making a new instrumentation
264 available should not require existing instrumented code to support it.
265
266 @subsection PRINCIPLE_VERSIONED Versioned instrumentation
267
268 Given that the instrumentation offered by the performance schema will
269 be augmented with time, when more features are implemented,
270 the interface itself should be versioned, to keep compatibility
271 with previous instrumented code.
272
273 For example, after both plugin-A and plugin-B have been instrumented for
274 mutexes, read write locks and conditions, using the instrumentation
275 interface, we can anticipate that the instrumentation interface
276 is expanded to support file based operations.
277
278 Plugin-A, a file based storage engine, will most likely use the expanded
279 interface and instrument its file usage, using the version 2
280 interface, while Plugin-B, a network based storage engine, will not change
281 its code and not release a new binary.
282
283 When later the instrumentation interface is expanded to support network
284 based operations (which will define interface version 3), the Plugin-B code
285 can then be changed to make use of it.
286
287 Note, this is just an example to illustrate the design concept here.
288 Both mutexes and file instrumentation are already available
289 since version 1 of the instrumentation interface.
290
291 @subsection PRINCIPLE_DEPLOYMENT Easy deployment
292
293 Internally, we might want every plugin implementation to upgrade the
294 instrumented code to the latest available, but this will cause additional
295 work and this is not practical if the code change is monolithic.
296
297 Externally, for third party plugin implementors, asking implementors to
298 always stay aligned to the latest instrumentation and make new releases,
299 even when the change does not provide new functionality for them,
300 is a bad idea.
301
302 For example, requiring a network based engine to re-release because the
303 instrumentation interface changed for file based operations, will create
304 too many deployment issues.
305
306 So, the performance schema implementation must support concurrently,
307 in the same deployment, multiple versions of the instrumentation
308 interface, and ensure binary compatibility with each version.
309
310 In addition to this, the performance schema can be included or excluded
311 from the server binary, using build time configuration options.
312
313 Regardless, the following types of deployment are valid:
314 - a server supporting the performance schema + a storage engine
315 that is not instrumented
316 - a server not supporting the performance schema + a storage engine
317 that is instrumented
318 */
319
320 /**
321 @page PAGE_INSTRUMENTATION_INTERFACE Performance schema: instrumentation interface page.
322 MySQL performance schema instrumentation interface.
323
324 @section INTRO Introduction
325
326 The instrumentation interface consist of two layers:
327 - a raw ABI (Application Binary Interface) layer, that exposes the primitive
328 instrumentation functions exported by the performance schema instrumentation
329 - an API (Application Programing Interface) layer,
330 that provides many helpers for a developer instrumenting some code,
331 to make the instrumentation as easy as possible.
332
333 The ABI layer consists of:
334 @code
335 #include "mysql/psi/psi.h"
336 @endcode
337
338 The API layer consists of:
339 @code
340 #include "mysql/psi/mutex_mutex.h"
341 #include "mysql/psi/mutex_file.h"
342 @endcode
343
344 The first helper is for mutexes, rwlocks and conditions,
345 the second for file io.
346
347 The API layer exposes C macros and typedefs which will expand:
348 - either to non-instrumented code, when compiled without the performance
349 schema instrumentation
350 - or to instrumented code, that will issue the raw calls to the ABI layer
351 so that the implementation can collect data.
352
353 Note that all the names introduced (for example, @c mysql_mutex_lock) do not
354 collide with any other namespace.
355 In particular, the macro @c mysql_mutex_lock is on purpose not named
356 @c pthread_mutex_lock.
357 This is to:
358 - avoid overloading @c pthread_mutex_lock with yet another macro,
359 which is dangerous as it can affect user code and pollute
360 the end-user namespace.
361 - allow the developer instrumenting code to selectively instrument
362 some code but not all.
363
364 @section PRINCIPLES Design principles
365
366 The ABI part is designed as a facade, that exposes basic primitives.
367 The expectation is that each primitive will be very stable over time,
368 but the list will constantly grow when more instruments are supported.
369 To support binary compatibility with plugins compiled with a different
370 version of the instrumentation, the ABI itself is versioned
371 (see @c PSI_v1, @c PSI_v2).
372
373 For a given instrumentation point in the API, the basic coding pattern
374 used is:
375 - (a) notify the performance schema of the operation
376 about to be performed.
377 - (b) execute the instrumented code.
378 - (c) notify the performance schema that the operation
379 is completed.
380
381 An opaque "locker" pointer is returned by (a), that is given to (c).
382 This pointer helps the implementation to keep context, for performances.
383
384 The following code fragment is annotated to show how in detail this pattern
385 in implemented, when the instrumentation is compiled in:
386
387 @verbatim
388 static inline int mysql_mutex_lock(
389 mysql_mutex_t *that, myf flags, const char *src_file, uint src_line)
390 {
391 int result;
392 struct PSI_mutex_locker_state state;
393 struct PSI_mutex_locker *locker= NULL;
394
395 ............... (a)
396 locker= PSI_server->start_mutex_wait(&state, that->p_psi,
397 PSI_MUTEX_LOCK, locker, src_file, src_line);
398
399 ............... (b)
400 result= pthread_mutex_lock(&that->m_mutex);
401
402 ............... (c)
403 PSI_server->end_mutex_wait(locker, result);
404
405 return result;
406 }
407 @endverbatim
408
409 When the performance schema instrumentation is not compiled in,
410 the code becomes simply a wrapper, expanded in line by the compiler:
411
412 @verbatim
413 static inline int mysql_mutex_lock(...)
414 {
415 int result;
416
417 ............... (b)
418 result= pthread_mutex_lock(&that->m_mutex);
419
420 return result;
421 }
422 @endverbatim
423 */
424
425 /**
426 @page PAGE_AGGREGATES Performance schema: the aggregates page.
427 Performance schema aggregates.
428
429 @section INTRO Introduction
430
431 Aggregates tables are tables that can be formally defined as
432 SELECT ... from EVENTS_WAITS_HISTORY_INFINITE ... group by 'group clause'.
433
434 Each group clause defines a different kind of aggregate, and corresponds to
435 a different table exposed by the performance schema.
436
437 Aggregates can be either:
438 - computed on the fly,
439 - computed on demand, based on other available data.
440
441 'EVENTS_WAITS_HISTORY_INFINITE' is a table that does not exist,
442 the best approximation is EVENTS_WAITS_HISTORY_LONG.
443 Aggregates computed on the fly in fact are based on EVENTS_WAITS_CURRENT,
444 while aggregates computed on demand are based on other
445 EVENTS_WAITS_SUMMARY_BY_xxx tables.
446
447 To better understand the implementation itself, a bit of math is
448 required first, to understand the model behind the code:
449 the code is deceptively simple, the real complexity resides
450 in the flyweight of pointers between various performance schema buffers.
451
452 @section DIMENSION Concept of dimension
453
454 An event measured by the instrumentation has many attributes.
455 An event is represented as a data point P(x1, x2, ..., xN),
456 where each x_i coordinate represents a given attribute value.
457
458 Examples of attributes are:
459 - the time waited
460 - the object waited on
461 - the instrument waited on
462 - the thread that waited
463 - the operation performed
464 - per object or per operation additional attributes, such as spins,
465 number of bytes, etc.
466
467 Computing an aggregate per thread is fundamentally different from
468 computing an aggregate by instrument, so the "_BY_THREAD" and
469 "_BY_EVENT_NAME" aggregates are different dimensions,
470 operating on different x_i and x_j coordinates.
471 These aggregates are "orthogonal".
472
473 @section PROJECTION Concept of projection
474
475 A given x_i attribute value can convey either just one basic information,
476 such as a number of bytes, or can convey implied information,
477 such as an object fully qualified name.
478
479 For example, from the value "test.t1", the name of the object schema
480 "test" can be separated from the object name "t1", so that now aggregates
481 by object schema can be implemented.
482
483 In math terms, that corresponds to defining a function:
484 F_i (x): x --> y
485 Applying this function to our point P gives another point P':
486
487 F_i (P):
488 P(x1, x2, ..., x{i-1}, x_i, x{i+1}, ..., x_N)
489 --> P' (x1, x2, ..., x{i-1}, f_i(x_i), x{i+1}, ..., x_N)
490
491 That function defines in fact an aggregate !
492 In SQL terms, this aggregate would look like the following table:
493
494 @verbatim
495 CREATE VIEW EVENTS_WAITS_SUMMARY_BY_Func_i AS
496 SELECT col_1, col_2, ..., col_{i-1},
497 Func_i(col_i),
498 COUNT(col_i),
499 MIN(col_i), AVG(col_i), MAX(col_i), -- if col_i is a numeric value
500 col_{i+1}, ..., col_N
501 FROM EVENTS_WAITS_HISTORY_INFINITE
502 group by col_1, col_2, ..., col_{i-1}, col{i+1}, ..., col_N.
503 @endverbatim
504
505 Note that not all columns have to be included,
506 in particular some columns that are dependent on the x_i column should
507 be removed, so that in practice, MySQL's aggregation method tends to
508 remove many attributes at each aggregation steps.
509
510 For example, when aggregating wait events by object instances,
511 - the wait_time and number_of_bytes can be summed,
512 and sum(wait_time) now becomes an object instance attribute.
513 - the source, timer_start, timer_end columns are not in the
514 _BY_INSTANCE table, because these attributes are only
515 meaningful for a wait.
516
517 @section COMPOSITION Concept of composition
518
519 Now, the "test.t1" --> "test" example was purely theory,
520 just to explain the concept, and does not lead very far.
521 Let's look at a more interesting example of data that can be derived
522 from the row event.
523
524 An event creates a transient object, PFS_wait_locker, per operation.
525 This object's life cycle is extremely short: it's created just
526 before the start_wait() instrumentation call, and is destroyed in
527 the end_wait() call.
528
529 The wait locker itself contains a pointer to the object instance
530 waited on.
531 That allows to implement a wait_locker --> object instance projection,
532 with m_target.
533 The object instance life cycle depends on _init and _destroy calls
534 from the code, such as mysql_mutex_init()
535 and mysql_mutex_destroy() for a mutex.
536
537 The object instance waited on contains a pointer to the object class,
538 which is represented by the instrument name.
539 That allows to implement an object instance --> object class projection.
540 The object class life cycle is permanent, as instruments are loaded in
541 the server and never removed.
542
543 The object class is named in such a way
544 (for example, "wait/sync/mutex/sql/LOCK_open",
545 "wait/io/file/maria/data_file) that the component ("sql", "maria")
546 that it belongs to can be inferred.
547 That allows to implement an object class --> server component projection.
548
549 Back to math again, we have, for example for mutexes:
550
551 F1 (l) : PFS_wait_locker l --> PFS_mutex m = l->m_target.m_mutex
552
553 F1_to_2 (m) : PFS_mutex m --> PFS_mutex_class i = m->m_class
554
555 F2_to_3 (i) : PFS_mutex_class i --> const char *component =
556 substring(i->m_name, ...)
557
558 Per components aggregates are not implemented, this is just an illustration.
559
560 F1 alone defines this aggregate:
561
562 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_INSTANCE
563 (or MUTEX_INSTANCE)
564
565 F1_to_2 alone could define this aggregate:
566
567 EVENTS_WAITS_SUMMARY_BY_INSTANCE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
568
569 Alternatively, using function composition, with
570 F2 = F1_to_2 o F1, F2 defines:
571
572 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_EVENT_NAME
573
574 Likewise, F_2_to_3 defines:
575
576 EVENTS_WAITS_SUMMARY_BY_EVENT_NAME --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
577
578 and F3 = F_2_to_3 o F_1_to_2 o F1 defines:
579
580 EVENTS_WAITS_HISTORY_INFINITE --> EVENTS_WAITS_SUMMARY_BY_COMPONENT
581
582 What has all this to do with the code ?
583
584 Functions (or aggregates) such as F_3 are not implemented as is.
585 Instead, they are decomposed into F_2_to_3 o F_1_to_2 o F1,
586 and each intermediate aggregate is stored into an internal buffer.
587 This allows to support every F1, F2, F3 aggregates from shared
588 internal buffers, where computation already performed to compute F2
589 is reused when computing F3.
590
591 @section OBJECT_GRAPH Object graph
592
593 In terms of object instances, or records, pointers between
594 different buffers define an object instance graph.
595
596 For example, assuming the following scenario:
597 - A mutex class "M" is instrumented, the instrument name
598 is "wait/sync/mutex/sql/M"
599 - This mutex instrument has been instantiated twice,
600 mutex instances are noted M-1 and M-2
601 - Threads T-A and T-B are locking mutex instance M-1
602 - Threads T-C and T-D are locking mutex instance M-2
603
604 The performance schema will record the following data:
605 - EVENTS_WAITS_CURRENT has 4 rows, one for each mutex locker
606 - EVENTS_WAITS_SUMMARY_BY_INSTANCE shows 2 rows, for M-1 and M-2
607 - EVENTS_WAITS_SUMMARY_BY_EVENT_NAME shows 1 row, for M
608
609 The graph of structures will look like:
610
611 @verbatim
612 PFS_wait_locker (T-A, M-1) ----------
613 |
614 v
615 PFS_mutex (M-1)
616 - m_wait_stat ------------
617 ^ |
618 | |
619 PFS_wait_locker (T-B, M-1) ---------- |
620 v
621 PFS_mutex_class (M)
622 - m_wait_stat
623 PFS_wait_locker (T-C, M-2) ---------- ^
624 | |
625 v |
626 PFS_mutex (M-2) |
627 - m_wait_stat ------------
628 ^
629 |
630 PFS_wait_locker (T-D, M-2) ----------
631
632 || || ||
633 || || ||
634 vv vv vv
635
636 EVENTS_WAITS_CURRENT ..._SUMMARY_BY_INSTANCE ..._SUMMARY_BY_EVENT_NAME
637 @endverbatim
638
639 @section ON_THE_FLY On the fly aggregates
640
641 'On the fly' aggregates are computed during the code execution.
642 This is necessary because the data the aggregate is based on is volatile,
643 and can not be kept indefinitely.
644
645 With on the fly aggregates:
646 - the writer thread does all the computation
647 - the reader thread accesses the result directly
648
649 This model is to be avoided if possible, due to the overhead
650 caused when instrumenting code.
651
652 @section HIGHER_LEVEL Higher level aggregates
653
654 'Higher level' aggregates are implemented on demand only.
655 The code executing a SELECT from the aggregate table is
656 collecting data from multiple internal buffers to produce the result.
657
658 With higher level aggregates:
659 - the reader thread does all the computation
660 - the writer thread has no overhead.
661
662 @section MIXED Mixed level aggregates
663
664 The 'Mixed' model is a compromise between 'On the fly' and 'Higher level'
665 aggregates, for internal buffers that are not permanent.
666
667 While an object is present in a buffer, the higher level model is used.
668 When an object is about to be destroyed, statistics are saved into
669 a 'parent' buffer with a longer life cycle, to follow the on the fly model.
670
671 With mixed aggregates:
672 - the reader thread does a lot of complex computation,
673 - the writer thread has minimal overhead, on destroy events.
674
675 @section IMPL_WAIT Implementation for waits aggregates
676
677 For waits, the tables that contains aggregated wait data are:
678 - EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
679 - EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME
680 - EVENTS_WAITS_SUMMARY_BY_INSTANCE
681 - EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
682 - EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME
683 - EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME
684 - FILE_SUMMARY_BY_EVENT_NAME
685 - FILE_SUMMARY_BY_INSTANCE
686 - SOCKET_SUMMARY_BY_INSTANCE
687 - SOCKET_SUMMARY_BY_EVENT_NAME
688 - OBJECTS_SUMMARY_GLOBAL_BY_TYPE
689
690 The instrumented code that generates waits events consist of:
691 - mutexes (mysql_mutex_t)
692 - rwlocks (mysql_rwlock_t)
693 - conditions (mysql_cond_t)
694 - file io (MYSQL_FILE)
695 - socket io (MYSQL_SOCKET)
696 - table io
697 - table lock
698 - idle
699
700 The flow of data between aggregates tables varies for each instrumentation.
701
702 @subsection IMPL_WAIT_MUTEX Mutex waits
703
704 @verbatim
705 mutex_locker(T, M)
706 |
707 | [1]
708 |
709 |-> pfs_mutex(M) =====>> [B], [C]
710 | |
711 | | [2]
712 | |
713 | |-> pfs_mutex_class(M.class) =====>> [C]
714 |
715 |-> pfs_thread(T).event_name(M) =====>> [A], [D], [E], [F]
716 |
717 | [3]
718 |
719 3a |-> pfs_account(U, H).event_name(M) =====>> [D], [E], [F]
720 . |
721 . | [4-RESET]
722 . |
723 3b .....+-> pfs_user(U).event_name(M) =====>> [E]
724 . |
725 3c .....+-> pfs_host(H).event_name(M) =====>> [F]
726 @endverbatim
727
728 How to read this diagram:
729 - events that occur during the instrumented code execution are noted with numbers,
730 as in [1]. Code executed by these events has an impact on overhead.
731 - events that occur during TRUNCATE TABLE operations are noted with numbers,
732 followed by "-RESET", as in [4-RESET].
733 Code executed by these events has no impact on overhead,
734 since they are executed by independent monitoring sessions.
735 - events that occur when a reader extracts data from a performance schema table
736 are noted with letters, as in [A]. The name of the table involved,
737 and the method that builds a row are documented. Code executed by these events
738 has no impact on the instrumentation overhead. Note that the table
739 implementation may pull data from different buffers.
740 - nominal code paths are in plain lines. A "nominal" code path corresponds to
741 cases where the performance schema buffers are sized so that no records are lost.
742 - degenerated code paths are in dotted lines. A "degenerated" code path corresponds
743 to edge cases where parent buffers are full, which forces the code to aggregate to
744 grand parents directly.
745
746 Implemented as:
747 - [1] @c start_mutex_wait_v1(), @c end_mutex_wait_v1()
748 - [2] @c destroy_mutex_v1()
749 - [3] @c aggregate_thread_waits()
750 - [4] @c PFS_account::aggregate_waits()
751 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
752 @c table_ews_by_thread_by_event_name::make_row()
753 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
754 @c table_events_waits_summary_by_instance::make_mutex_row()
755 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
756 @c table_ews_global_by_event_name::make_mutex_row()
757 - [D] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
758 @c table_ews_by_account_by_event_name::make_row()
759 - [E] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
760 @c table_ews_by_user_by_event_name::make_row()
761 - [F] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
762 @c table_ews_by_host_by_event_name::make_row()
763
764 Table EVENTS_WAITS_SUMMARY_BY_INSTANCE is a 'on the fly' aggregate,
765 because the data is collected on the fly by (1) and stored into a buffer,
766 pfs_mutex. The table implementation [B] simply reads the results directly
767 from this buffer.
768
769 Table EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME is a 'mixed' aggregate,
770 because some data is collected on the fly (1),
771 some data is preserved with (2) at a later time in the life cycle,
772 and two different buffers pfs_mutex and pfs_mutex_class are used to store the
773 statistics collected. The table implementation [C] is more complex, since
774 it reads from two buffers pfs_mutex and pfs_mutex_class.
775
776 @subsection IMPL_WAIT_RWLOCK Rwlock waits
777
778 @verbatim
779 rwlock_locker(T, R)
780 |
781 | [1]
782 |
783 |-> pfs_rwlock(R) =====>> [B], [C]
784 | |
785 | | [2]
786 | |
787 | |-> pfs_rwlock_class(R.class) =====>> [C]
788 |
789 |-> pfs_thread(T).event_name(R) =====>> [A]
790 |
791 ...
792 @endverbatim
793
794 Implemented as:
795 - [1] @c start_rwlock_rdwait_v1(), @c end_rwlock_rdwait_v1(), ...
796 - [2] @c destroy_rwlock_v1()
797 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
798 @c table_ews_by_thread_by_event_name::make_row()
799 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
800 @c table_events_waits_summary_by_instance::make_rwlock_row()
801 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
802 @c table_ews_global_by_event_name::make_rwlock_row()
803
804 @subsection IMPL_WAIT_COND Cond waits
805
806 @verbatim
807 cond_locker(T, C)
808 |
809 | [1]
810 |
811 |-> pfs_cond(C) =====>> [B], [C]
812 | |
813 | | [2]
814 | |
815 | |-> pfs_cond_class(C.class) =====>> [C]
816 |
817 |-> pfs_thread(T).event_name(C) =====>> [A]
818 |
819 ...
820 @endverbatim
821
822 Implemented as:
823 - [1] @c start_cond_wait_v1(), @c end_cond_wait_v1()
824 - [2] @c destroy_cond_v1()
825 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
826 @c table_ews_by_thread_by_event_name::make_row()
827 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
828 @c table_events_waits_summary_by_instance::make_cond_row()
829 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
830 @c table_ews_global_by_event_name::make_cond_row()
831
832 @subsection IMPL_WAIT_FILE File waits
833
834 @verbatim
835 file_locker(T, F)
836 |
837 | [1]
838 |
839 |-> pfs_file(F) =====>> [B], [C], [D], [E]
840 | |
841 | | [2]
842 | |
843 | |-> pfs_file_class(F.class) =====>> [C], [D]
844 |
845 |-> pfs_thread(T).event_name(F) =====>> [A]
846 |
847 ...
848 @endverbatim
849
850 Implemented as:
851 - [1] @c get_thread_file_name_locker_v1(), @c start_file_wait_v1(),
852 @c end_file_wait_v1(), ...
853 - [2] @c close_file_v1()
854 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
855 @c table_ews_by_thread_by_event_name::make_row()
856 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
857 @c table_events_waits_summary_by_instance::make_file_row()
858 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
859 @c table_ews_global_by_event_name::make_file_row()
860 - [D] FILE_SUMMARY_BY_EVENT_NAME,
861 @c table_file_summary_by_event_name::make_row()
862 - [E] FILE_SUMMARY_BY_INSTANCE,
863 @c table_file_summary_by_instance::make_row()
864
865 @subsection IMPL_WAIT_SOCKET Socket waits
866
867 @verbatim
868 socket_locker(T, S)
869 |
870 | [1]
871 |
872 |-> pfs_socket(S) =====>> [A], [B], [C], [D], [E]
873 |
874 | [2]
875 |
876 |-> pfs_socket_class(S.class) =====>> [C], [D]
877 |
878 |-> pfs_thread(T).event_name(S) =====>> [A]
879 |
880 | [3]
881 |
882 3a |-> pfs_account(U, H).event_name(S) =====>> [F], [G], [H]
883 . |
884 . | [4-RESET]
885 . |
886 3b .....+-> pfs_user(U).event_name(S) =====>> [G]
887 . |
888 3c .....+-> pfs_host(H).event_name(S) =====>> [H]
889 @endverbatim
890
891 Implemented as:
892 - [1] @c start_socket_wait_v1(), @c end_socket_wait_v1().
893 - [2] @c close_socket_v1()
894 - [3] @c aggregate_thread_waits()
895 - [4] @c PFS_account::aggregate_waits()
896 - [5] @c PFS_host::aggregate_waits()
897 - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
898 @c table_ews_by_thread_by_event_name::make_row()
899 - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
900 @c table_events_waits_summary_by_instance::make_socket_row()
901 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
902 @c table_ews_global_by_event_name::make_socket_row()
903 - [D] SOCKET_SUMMARY_BY_EVENT_NAME,
904 @c table_socket_summary_by_event_name::make_row()
905 - [E] SOCKET_SUMMARY_BY_INSTANCE,
906 @c table_socket_summary_by_instance::make_row()
907 - [F] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
908 @c table_ews_by_account_by_event_name::make_row()
909 - [G] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
910 @c table_ews_by_user_by_event_name::make_row()
911 - [H] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
912 @c table_ews_by_host_by_event_name::make_row()
913
914 @subsection IMPL_WAIT_TABLE Table waits
915
916 @verbatim
917 table_locker(Thread Th, Table Tb, Event = io or lock)
918 |
919 | [1]
920 |
921 1a |-> pfs_table(Tb) =====>> [A], [B], [C]
922 | |
923 | | [2]
924 | |
925 | |-> pfs_table_share(Tb.share) =====>> [B], [C]
926 | |
927 | | [3]
928 | |
929 | |-> global_table_io_stat =====>> [C]
930 | |
931 | |-> global_table_lock_stat =====>> [C]
932 |
933 1b |-> pfs_thread(Th).event_name(E) =====>> [D], [E], [F], [G]
934 | |
935 | | [ 4-RESET]
936 | |
937 | |-> pfs_account(U, H).event_name(E) =====>> [E], [F], [G]
938 | . |
939 | . | [5-RESET]
940 | . |
941 | .....+-> pfs_user(U).event_name(E) =====>> [F]
942 | . |
943 | .....+-> pfs_host(H).event_name(E) =====>> [G]
944 |
945 1c |-> pfs_thread(Th).waits_current(W) =====>> [H]
946 |
947 1d |-> pfs_thread(Th).waits_history(W) =====>> [I]
948 |
949 1e |-> waits_history_long(W) =====>> [J]
950 @endverbatim
951
952 Implemented as:
953 - [1] @c start_table_io_wait_v1(), @c end_table_io_wait_v1()
954 - [2] @c close_table_v1()
955 - [3] @c drop_table_share_v1()
956 - [4] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
957 - [5] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
958 - [A] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
959 @c table_events_waits_summary_by_instance::make_table_row()
960 - [B] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
961 @c table_os_global_by_type::make_row()
962 - [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
963 @c table_ews_global_by_event_name::make_table_io_row(),
964 @c table_ews_global_by_event_name::make_table_lock_row()
965 - [D] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
966 @c table_ews_by_thread_by_event_name::make_row()
967 - [E] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
968 @c table_ews_by_user_by_account_name::make_row()
969 - [F] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
970 @c table_ews_by_user_by_event_name::make_row()
971 - [G] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
972 @c table_ews_by_host_by_event_name::make_row()
973 - [H] EVENTS_WAITS_CURRENT,
974 @c table_events_waits_common::make_row()
975 - [I] EVENTS_WAITS_HISTORY,
976 @c table_events_waits_common::make_row()
977 - [J] EVENTS_WAITS_HISTORY_LONG,
978 @c table_events_waits_common::make_row()
979
980 @section IMPL_STAGE Implementation for stages aggregates
981
982 For stages, the tables that contains aggregated data are:
983 - EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
984 - EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME
985 - EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME
986 - EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME
987 - EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME
988
989 @verbatim
990 start_stage(T, S)
991 |
992 | [1]
993 |
994 1a |-> pfs_thread(T).event_name(S) =====>> [A], [B], [C], [D], [E]
995 | |
996 | | [2]
997 | |
998 | 2a |-> pfs_account(U, H).event_name(S) =====>> [B], [C], [D], [E]
999 | . |
1000 | . | [3-RESET]
1001 | . |
1002 | 2b .....+-> pfs_user(U).event_name(S) =====>> [C]
1003 | . |
1004 | 2c .....+-> pfs_host(H).event_name(S) =====>> [D], [E]
1005 | . . |
1006 | . . | [4-RESET]
1007 | 2d . . |
1008 1b |----+----+----+-> pfs_stage_class(S) =====>> [E]
1009
1010 @endverbatim
1011
1012 Implemented as:
1013 - [1] @c start_stage_v1()
1014 - [2] @c delete_thread_v1(), @c aggregate_thread_stages()
1015 - [3] @c PFS_account::aggregate_stages()
1016 - [4] @c PFS_host::aggregate_stages()
1017 - [A] EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1018 @c table_esgs_by_thread_by_event_name::make_row()
1019 - [B] EVENTS_STAGES_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1020 @c table_esgs_by_account_by_event_name::make_row()
1021 - [C] EVENTS_STAGES_SUMMARY_BY_USER_BY_EVENT_NAME,
1022 @c table_esgs_by_user_by_event_name::make_row()
1023 - [D] EVENTS_STAGES_SUMMARY_BY_HOST_BY_EVENT_NAME,
1024 @c table_esgs_by_host_by_event_name::make_row()
1025 - [E] EVENTS_STAGES_SUMMARY_GLOBAL_BY_EVENT_NAME,
1026 @c table_esgs_global_by_event_name::make_row()
1027
1028 @section IMPL_STATEMENT Implementation for statements consumers
1029
1030 For statements, the tables that contains individual event data are:
1031 - EVENTS_STATEMENTS_CURRENT
1032 - EVENTS_STATEMENTS_HISTORY
1033 - EVENTS_STATEMENTS_HISTORY_LONG
1034
1035 For statements, the tables that contains aggregated data are:
1036 - EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
1037 - EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME
1038 - EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME
1039 - EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME
1040 - EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME
1041 - EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1042
1043 @verbatim
1044 statement_locker(T, S)
1045 |
1046 | [1]
1047 |
1048 1a |-> pfs_thread(T).event_name(S) =====>> [A], [B], [C], [D], [E]
1049 | |
1050 | | [2]
1051 | |
1052 | 2a |-> pfs_account(U, H).event_name(S) =====>> [B], [C], [D], [E]
1053 | . |
1054 | . | [3-RESET]
1055 | . |
1056 | 2b .....+-> pfs_user(U).event_name(S) =====>> [C]
1057 | . |
1058 | 2c .....+-> pfs_host(H).event_name(S) =====>> [D], [E]
1059 | . . |
1060 | . . | [4-RESET]
1061 | 2d . . |
1062 1b |----+----+----+-> pfs_statement_class(S) =====>> [E]
1063 |
1064 1c |-> pfs_thread(T).statement_current(S) =====>> [F]
1065 |
1066 1d |-> pfs_thread(T).statement_history(S) =====>> [G]
1067 |
1068 1e |-> statement_history_long(S) =====>> [H]
1069 |
1070 1f |-> statement_digest(S) =====>> [I]
1071
1072 @endverbatim
1073
1074 Implemented as:
1075 - [1] @c start_statement_v1(), end_statement_v1()
1076 (1a, 1b) is an aggregation by EVENT_NAME,
1077 (1c, 1d, 1e) is an aggregation by TIME,
1078 (1f) is an aggregation by DIGEST
1079 all of these are orthogonal,
1080 and implemented in end_statement_v1().
1081 - [2] @c delete_thread_v1(), @c aggregate_thread_statements()
1082 - [3] @c PFS_account::aggregate_statements()
1083 - [4] @c PFS_host::aggregate_statements()
1084 - [A] EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
1085 @c table_esms_by_thread_by_event_name::make_row()
1086 - [B] EVENTS_STATEMENTS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
1087 @c table_esms_by_account_by_event_name::make_row()
1088 - [C] EVENTS_STATEMENTS_SUMMARY_BY_USER_BY_EVENT_NAME,
1089 @c table_esms_by_user_by_event_name::make_row()
1090 - [D] EVENTS_STATEMENTS_SUMMARY_BY_HOST_BY_EVENT_NAME,
1091 @c table_esms_by_host_by_event_name::make_row()
1092 - [E] EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME,
1093 @c table_esms_global_by_event_name::make_row()
1094 - [F] EVENTS_STATEMENTS_CURRENT,
1095 @c table_events_statements_current::rnd_next(),
1096 @c table_events_statements_common::make_row()
1097 - [G] EVENTS_STATEMENTS_HISTORY,
1098 @c table_events_statements_history::rnd_next(),
1099 @c table_events_statements_common::make_row()
1100 - [H] EVENTS_STATEMENTS_HISTORY_LONG,
1101 @c table_events_statements_history_long::rnd_next(),
1102 @c table_events_statements_common::make_row()
1103 - [I] EVENTS_STATEMENTS_SUMMARY_BY_DIGEST
1104 @c table_esms_by_digest::make_row()
1105 */
1106
1107 /**
1108 @defgroup Performance_schema Performance Schema
1109 The performance schema component.
1110 For details, see the
1111 @ref PAGE_PERFORMANCE_SCHEMA "performance schema main page".
1112
1113 @defgroup Performance_schema_implementation Performance Schema Implementation
1114 @ingroup Performance_schema
1115
1116 @defgroup Performance_schema_tables Performance Schema Tables
1117 @ingroup Performance_schema_implementation
1118 */
1119
1120 pthread_key(PFS_thread*, THR_PFS);
1121 bool THR_PFS_initialized= false;
1122
1123 /**
1124 Conversion map from PSI_mutex_operation to enum_operation_type.
1125 Indexed by enum PSI_mutex_operation.
1126 */
1127 static enum_operation_type mutex_operation_map[]=
1128 {
1129 OPERATION_TYPE_LOCK,
1130 OPERATION_TYPE_TRYLOCK
1131 };
1132
1133 /**
1134 Conversion map from PSI_rwlock_operation to enum_operation_type.
1135 Indexed by enum PSI_rwlock_operation.
1136 */
1137 static enum_operation_type rwlock_operation_map[]=
1138 {
1139 OPERATION_TYPE_READLOCK,
1140 OPERATION_TYPE_WRITELOCK,
1141 OPERATION_TYPE_TRYREADLOCK,
1142 OPERATION_TYPE_TRYWRITELOCK
1143 };
1144
1145 /**
1146 Conversion map from PSI_cond_operation to enum_operation_type.
1147 Indexed by enum PSI_cond_operation.
1148 */
1149 static enum_operation_type cond_operation_map[]=
1150 {
1151 OPERATION_TYPE_WAIT,
1152 OPERATION_TYPE_TIMEDWAIT
1153 };
1154
1155 /**
1156 Conversion map from PSI_file_operation to enum_operation_type.
1157 Indexed by enum PSI_file_operation.
1158 */
1159 static enum_operation_type file_operation_map[]=
1160 {
1161 OPERATION_TYPE_FILECREATE,
1162 OPERATION_TYPE_FILECREATETMP,
1163 OPERATION_TYPE_FILEOPEN,
1164 OPERATION_TYPE_FILESTREAMOPEN,
1165 OPERATION_TYPE_FILECLOSE,
1166 OPERATION_TYPE_FILESTREAMCLOSE,
1167 OPERATION_TYPE_FILEREAD,
1168 OPERATION_TYPE_FILEWRITE,
1169 OPERATION_TYPE_FILESEEK,
1170 OPERATION_TYPE_FILETELL,
1171 OPERATION_TYPE_FILEFLUSH,
1172 OPERATION_TYPE_FILESTAT,
1173 OPERATION_TYPE_FILEFSTAT,
1174 OPERATION_TYPE_FILECHSIZE,
1175 OPERATION_TYPE_FILEDELETE,
1176 OPERATION_TYPE_FILERENAME,
1177 OPERATION_TYPE_FILESYNC
1178 };
1179
1180 /**
1181 Conversion map from PSI_table_operation to enum_operation_type.
1182 Indexed by enum PSI_table_io_operation.
1183 */
1184 static enum_operation_type table_io_operation_map[]=
1185 {
1186 OPERATION_TYPE_TABLE_FETCH,
1187 OPERATION_TYPE_TABLE_WRITE_ROW,
1188 OPERATION_TYPE_TABLE_UPDATE_ROW,
1189 OPERATION_TYPE_TABLE_DELETE_ROW
1190 };
1191
1192 /**
1193 Conversion map from enum PFS_TL_LOCK_TYPE to enum_operation_type.
1194 Indexed by enum PFS_TL_LOCK_TYPE.
1195 */
1196 static enum_operation_type table_lock_operation_map[]=
1197 {
1198 OPERATION_TYPE_TL_READ_NORMAL, /* PFS_TL_READ */
1199 OPERATION_TYPE_TL_READ_WITH_SHARED_LOCKS, /* PFS_TL_READ_WITH_SHARED_LOCKS */
1200 OPERATION_TYPE_TL_READ_HIGH_PRIORITY, /* PFS_TL_READ_HIGH_PRIORITY */
1201 OPERATION_TYPE_TL_READ_NO_INSERTS, /* PFS_TL_READ_NO_INSERT */
1202 OPERATION_TYPE_TL_WRITE_ALLOW_WRITE, /* PFS_TL_WRITE_ALLOW_WRITE */
1203 OPERATION_TYPE_TL_WRITE_CONCURRENT_INSERT, /* PFS_TL_WRITE_CONCURRENT_INSERT */
1204 OPERATION_TYPE_TL_WRITE_DELAYED, /* PFS_TL_WRITE_DELAYED */
1205 OPERATION_TYPE_TL_WRITE_LOW_PRIORITY, /* PFS_TL_WRITE_LOW_PRIORITY */
1206 OPERATION_TYPE_TL_WRITE_NORMAL, /* PFS_TL_WRITE */
1207 OPERATION_TYPE_TL_READ_EXTERNAL, /* PFS_TL_READ_EXTERNAL */
1208 OPERATION_TYPE_TL_WRITE_EXTERNAL /* PFS_TL_WRITE_EXTERNAL */
1209 };
1210
1211 /**
1212 Conversion map from PSI_socket_operation to enum_operation_type.
1213 Indexed by enum PSI_socket_operation.
1214 */
1215 static enum_operation_type socket_operation_map[]=
1216 {
1217 OPERATION_TYPE_SOCKETCREATE,
1218 OPERATION_TYPE_SOCKETCONNECT,
1219 OPERATION_TYPE_SOCKETBIND,
1220 OPERATION_TYPE_SOCKETCLOSE,
1221 OPERATION_TYPE_SOCKETSEND,
1222 OPERATION_TYPE_SOCKETRECV,
1223 OPERATION_TYPE_SOCKETSENDTO,
1224 OPERATION_TYPE_SOCKETRECVFROM,
1225 OPERATION_TYPE_SOCKETSENDMSG,
1226 OPERATION_TYPE_SOCKETRECVMSG,
1227 OPERATION_TYPE_SOCKETSEEK,
1228 OPERATION_TYPE_SOCKETOPT,
1229 OPERATION_TYPE_SOCKETSTAT,
1230 OPERATION_TYPE_SOCKETSHUTDOWN,
1231 OPERATION_TYPE_SOCKETSELECT
1232 };
1233
1234 /**
1235 Build the prefix name of a class of instruments in a category.
1236 For example, this function builds the string 'wait/sync/mutex/sql/' from
1237 a prefix 'wait/sync/mutex' and a category 'sql'.
1238 This prefix is used later to build each instrument name, such as
1239 'wait/sync/mutex/sql/LOCK_open'.
1240 @param prefix Prefix for this class of instruments
1241 @param category Category name
1242 @param [out] output Buffer of length PFS_MAX_INFO_NAME_LENGTH.
1243 @param [out] output_length Length of the resulting output string.
1244 @return 0 for success, non zero for errors
1245 */
build_prefix(const LEX_CSTRING * prefix,const char * category,char * output,int * output_length)1246 static int build_prefix(const LEX_CSTRING *prefix, const char *category,
1247 char *output, int *output_length)
1248 {
1249 size_t len= strlen(category);
1250 char *out_ptr= output;
1251 size_t prefix_length= prefix->length;
1252
1253 if (unlikely((prefix_length + len + 1) >=
1254 PFS_MAX_FULL_PREFIX_NAME_LENGTH))
1255 {
1256 pfs_print_error("build_prefix: prefix+category is too long <%s> <%s>\n",
1257 prefix->str, category);
1258 return 1;
1259 }
1260
1261 if (unlikely(strchr(category, '/') != NULL))
1262 {
1263 pfs_print_error("build_prefix: invalid category <%s>\n",
1264 category);
1265 return 1;
1266 }
1267
1268 /* output = prefix + category + '/' */
1269 memcpy(out_ptr, prefix->str, prefix_length);
1270 out_ptr+= prefix_length;
1271 memcpy(out_ptr, category, len);
1272 out_ptr+= len;
1273 *out_ptr= '/';
1274 out_ptr++;
1275 *output_length= (int)(out_ptr - output);
1276
1277 return 0;
1278 }
1279
1280 #define REGISTER_BODY_V1(KEY_T, PREFIX, REGISTER_FUNC) \
1281 KEY_T key; \
1282 char formatted_name[PFS_MAX_INFO_NAME_LENGTH]; \
1283 int prefix_length; \
1284 int len; \
1285 int full_length; \
1286 \
1287 DBUG_ASSERT(category != NULL); \
1288 DBUG_ASSERT(info != NULL); \
1289 if (unlikely(build_prefix(&PREFIX, category, \
1290 formatted_name, &prefix_length))) \
1291 { \
1292 for (; count>0; count--, info++) \
1293 *(info->m_key)= 0; \
1294 return ; \
1295 } \
1296 \
1297 for (; count>0; count--, info++) \
1298 { \
1299 DBUG_ASSERT(info->m_key != NULL); \
1300 DBUG_ASSERT(info->m_name != NULL); \
1301 len= (int)strlen(info->m_name); \
1302 full_length= prefix_length + len; \
1303 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH)) \
1304 { \
1305 memcpy(formatted_name + prefix_length, info->m_name, len); \
1306 key= REGISTER_FUNC(formatted_name, full_length, info->m_flags); \
1307 } \
1308 else \
1309 { \
1310 pfs_print_error("REGISTER_BODY_V1: name too long <%s> <%s>\n", \
1311 category, info->m_name); \
1312 key= 0; \
1313 } \
1314 \
1315 *(info->m_key)= key; \
1316 } \
1317 return;
1318
1319 /* Use C linkage for the interface functions. */
1320
1321 C_MODE_START
1322
1323 /**
1324 Implementation of the mutex instrumentation interface.
1325 @sa PSI_v1::register_mutex.
1326 */
register_mutex_v1(const char * category,PSI_mutex_info_v1 * info,int count)1327 static void register_mutex_v1(const char *category,
1328 PSI_mutex_info_v1 *info,
1329 int count)
1330 {
1331 REGISTER_BODY_V1(PSI_mutex_key,
1332 mutex_instrument_prefix,
1333 register_mutex_class)
1334 }
1335
1336 /**
1337 Implementation of the rwlock instrumentation interface.
1338 @sa PSI_v1::register_rwlock.
1339 */
register_rwlock_v1(const char * category,PSI_rwlock_info_v1 * info,int count)1340 static void register_rwlock_v1(const char *category,
1341 PSI_rwlock_info_v1 *info,
1342 int count)
1343 {
1344 REGISTER_BODY_V1(PSI_rwlock_key,
1345 rwlock_instrument_prefix,
1346 register_rwlock_class)
1347 }
1348
1349 /**
1350 Implementation of the cond instrumentation interface.
1351 @sa PSI_v1::register_cond.
1352 */
register_cond_v1(const char * category,PSI_cond_info_v1 * info,int count)1353 static void register_cond_v1(const char *category,
1354 PSI_cond_info_v1 *info,
1355 int count)
1356 {
1357 REGISTER_BODY_V1(PSI_cond_key,
1358 cond_instrument_prefix,
1359 register_cond_class)
1360 }
1361
1362 /**
1363 Implementation of the thread instrumentation interface.
1364 @sa PSI_v1::register_thread.
1365 */
register_thread_v1(const char * category,PSI_thread_info_v1 * info,int count)1366 static void register_thread_v1(const char *category,
1367 PSI_thread_info_v1 *info,
1368 int count)
1369 {
1370 REGISTER_BODY_V1(PSI_thread_key,
1371 thread_instrument_prefix,
1372 register_thread_class)
1373 }
1374
1375 /**
1376 Implementation of the file instrumentation interface.
1377 @sa PSI_v1::register_file.
1378 */
register_file_v1(const char * category,PSI_file_info_v1 * info,int count)1379 static void register_file_v1(const char *category,
1380 PSI_file_info_v1 *info,
1381 int count)
1382 {
1383 REGISTER_BODY_V1(PSI_file_key,
1384 file_instrument_prefix,
1385 register_file_class)
1386 }
1387
register_stage_v1(const char * category,PSI_stage_info_v1 ** info_array,int count)1388 static void register_stage_v1(const char *category,
1389 PSI_stage_info_v1 **info_array,
1390 int count)
1391 {
1392 char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1393 int prefix_length;
1394 int len;
1395 int full_length;
1396 PSI_stage_info_v1 *info;
1397
1398 DBUG_ASSERT(category != NULL);
1399 DBUG_ASSERT(info_array != NULL);
1400 if (unlikely(build_prefix(&stage_instrument_prefix, category,
1401 formatted_name, &prefix_length)))
1402 {
1403 for (; count>0; count--, info_array++)
1404 (*info_array)->m_key= 0;
1405 return ;
1406 }
1407
1408 for (; count>0; count--, info_array++)
1409 {
1410 info= *info_array;
1411 DBUG_ASSERT(info != NULL);
1412 DBUG_ASSERT(info->m_name != NULL);
1413 len= (int)strlen(info->m_name);
1414 full_length= prefix_length + len;
1415 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1416 {
1417 memcpy(formatted_name + prefix_length, info->m_name, len);
1418 info->m_key= register_stage_class(formatted_name,
1419 prefix_length,
1420 full_length,
1421 info->m_flags);
1422 }
1423 else
1424 {
1425 pfs_print_error("register_stage_v1: name too long <%s> <%s>\n",
1426 category, info->m_name);
1427 info->m_key= 0;
1428 }
1429 }
1430 return;
1431 }
1432
register_statement_v1(const char * category,PSI_statement_info_v1 * info,int count)1433 static void register_statement_v1(const char *category,
1434 PSI_statement_info_v1 *info,
1435 int count)
1436 {
1437 char formatted_name[PFS_MAX_INFO_NAME_LENGTH];
1438 int prefix_length;
1439 int len;
1440 int full_length;
1441
1442 DBUG_ASSERT(category != NULL);
1443 DBUG_ASSERT(info != NULL);
1444 if (unlikely(build_prefix(&statement_instrument_prefix,
1445 category, formatted_name, &prefix_length)))
1446 {
1447 for (; count>0; count--, info++)
1448 info->m_key= 0;
1449 return ;
1450 }
1451
1452 for (; count>0; count--, info++)
1453 {
1454 if (info->m_name == NULL)
1455 continue;
1456
1457 len= (int)strlen(info->m_name);
1458 full_length= prefix_length + len;
1459 if (likely(full_length <= PFS_MAX_INFO_NAME_LENGTH))
1460 {
1461 memcpy(formatted_name + prefix_length, info->m_name, len);
1462 info->m_key= register_statement_class(formatted_name, full_length, info->m_flags);
1463 }
1464 else
1465 {
1466 pfs_print_error("register_statement_v1: name too long <%s>\n",
1467 info->m_name);
1468 info->m_key= 0;
1469 }
1470 }
1471 return;
1472 }
1473
register_socket_v1(const char * category,PSI_socket_info_v1 * info,int count)1474 static void register_socket_v1(const char *category,
1475 PSI_socket_info_v1 *info,
1476 int count)
1477 {
1478 REGISTER_BODY_V1(PSI_socket_key,
1479 socket_instrument_prefix,
1480 register_socket_class)
1481 }
1482
1483 #define INIT_BODY_V1(T, KEY, ID) \
1484 PFS_##T##_class *klass; \
1485 PFS_##T *pfs; \
1486 klass= find_##T##_class(KEY); \
1487 if (unlikely(klass == NULL)) \
1488 return NULL; \
1489 if (! klass->m_enabled) \
1490 return NULL; \
1491 pfs= create_##T(klass, ID); \
1492 return reinterpret_cast<PSI_##T *> (pfs)
1493
1494 /**
1495 Implementation of the mutex instrumentation interface.
1496 @sa PSI_v1::init_mutex.
1497 */
1498 static PSI_mutex*
init_mutex_v1(PSI_mutex_key key,void * identity)1499 init_mutex_v1(PSI_mutex_key key, void *identity)
1500 {
1501 INIT_BODY_V1(mutex, key, identity);
1502 }
1503
1504 /**
1505 Implementation of the mutex instrumentation interface.
1506 @sa PSI_v1::destroy_mutex.
1507 */
destroy_mutex_v1(PSI_mutex * mutex)1508 static void destroy_mutex_v1(PSI_mutex* mutex)
1509 {
1510 PFS_mutex *pfs= reinterpret_cast<PFS_mutex*> (mutex);
1511
1512 DBUG_ASSERT(pfs != NULL);
1513
1514 destroy_mutex(pfs);
1515 }
1516
1517 /**
1518 Implementation of the rwlock instrumentation interface.
1519 @sa PSI_v1::init_rwlock.
1520 */
1521 static PSI_rwlock*
init_rwlock_v1(PSI_rwlock_key key,void * identity)1522 init_rwlock_v1(PSI_rwlock_key key, void *identity)
1523 {
1524 INIT_BODY_V1(rwlock, key, identity);
1525 }
1526
1527 /**
1528 Implementation of the rwlock instrumentation interface.
1529 @sa PSI_v1::destroy_rwlock.
1530 */
destroy_rwlock_v1(PSI_rwlock * rwlock)1531 static void destroy_rwlock_v1(PSI_rwlock* rwlock)
1532 {
1533 PFS_rwlock *pfs= reinterpret_cast<PFS_rwlock*> (rwlock);
1534
1535 DBUG_ASSERT(pfs != NULL);
1536
1537 destroy_rwlock(pfs);
1538 }
1539
1540 /**
1541 Implementation of the cond instrumentation interface.
1542 @sa PSI_v1::init_cond.
1543 */
1544 static PSI_cond*
init_cond_v1(PSI_cond_key key,void * identity)1545 init_cond_v1(PSI_cond_key key, void *identity)
1546 {
1547 INIT_BODY_V1(cond, key, identity);
1548 }
1549
1550 /**
1551 Implementation of the cond instrumentation interface.
1552 @sa PSI_v1::destroy_cond.
1553 */
destroy_cond_v1(PSI_cond * cond)1554 static void destroy_cond_v1(PSI_cond* cond)
1555 {
1556 PFS_cond *pfs= reinterpret_cast<PFS_cond*> (cond);
1557
1558 DBUG_ASSERT(pfs != NULL);
1559
1560 destroy_cond(pfs);
1561 }
1562
1563 /**
1564 Implementation of the table instrumentation interface.
1565 @sa PSI_v1::get_table_share.
1566 */
1567 static PSI_table_share*
get_table_share_v1(my_bool temporary,TABLE_SHARE * share)1568 get_table_share_v1(my_bool temporary, TABLE_SHARE *share)
1569 {
1570 /* Ignore temporary tables and views. */
1571 if (temporary || share->is_view)
1572 return NULL;
1573 /* An instrumented thread is required, for LF_PINS. */
1574 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1575 if (unlikely(pfs_thread == NULL))
1576 return NULL;
1577 PFS_table_share* pfs_share;
1578 pfs_share= find_or_create_table_share(pfs_thread, temporary, share);
1579 return reinterpret_cast<PSI_table_share*> (pfs_share);
1580 }
1581
1582 /**
1583 Implementation of the table instrumentation interface.
1584 @sa PSI_v1::release_table_share.
1585 */
release_table_share_v1(PSI_table_share * share)1586 static void release_table_share_v1(PSI_table_share* share)
1587 {
1588 PFS_table_share* pfs= reinterpret_cast<PFS_table_share*> (share);
1589
1590 if (unlikely(pfs == NULL))
1591 return;
1592
1593 release_table_share(pfs);
1594 }
1595
1596 /**
1597 Implementation of the table instrumentation interface.
1598 @sa PSI_v1::drop_table_share.
1599 */
1600 static void
drop_table_share_v1(my_bool temporary,const char * schema_name,int schema_name_length,const char * table_name,int table_name_length)1601 drop_table_share_v1(my_bool temporary,
1602 const char *schema_name, int schema_name_length,
1603 const char *table_name, int table_name_length)
1604 {
1605 /* Ignore temporary tables. */
1606 if (temporary)
1607 return;
1608 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1609 if (unlikely(pfs_thread == NULL))
1610 return;
1611 /* TODO: temporary tables */
1612 drop_table_share(pfs_thread, temporary, schema_name, schema_name_length,
1613 table_name, table_name_length);
1614 }
1615
1616 /**
1617 Implementation of the table instrumentation interface.
1618 @sa PSI_v1::open_table.
1619 */
1620 static PSI_table*
open_table_v1(PSI_table_share * share,const void * identity)1621 open_table_v1(PSI_table_share *share, const void *identity)
1622 {
1623 PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1624
1625 /*
1626 When the performance schema is off, do not instrument anything.
1627 Table handles have short life cycle, instrumentation will happen
1628 again if needed during the next open().
1629 */
1630 if (psi_unlikely(! flag_global_instrumentation))
1631 return NULL;
1632
1633 if (unlikely(pfs_table_share == NULL))
1634 return NULL;
1635
1636 /* This object is not to be instrumented. */
1637 if (! pfs_table_share->m_enabled)
1638 return NULL;
1639
1640 /* This object is instrumented, but all table instruments are disabled. */
1641 if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1642 return NULL;
1643
1644 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1645 if (unlikely(thread == NULL))
1646 return NULL;
1647
1648 PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1649 return reinterpret_cast<PSI_table *> (pfs_table);
1650 }
1651
1652 /**
1653 Implementation of the table instrumentation interface.
1654 @sa PSI_v1::unbind_table.
1655 */
unbind_table_v1(PSI_table * table)1656 static void unbind_table_v1(PSI_table *table)
1657 {
1658 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1659 if (likely(pfs != NULL))
1660 {
1661 pfs->m_thread_owner= NULL;
1662 }
1663 }
1664
1665 /**
1666 Implementation of the table instrumentation interface.
1667 @sa PSI_v1::rebind_table.
1668 */
1669 static PSI_table *
rebind_table_v1(PSI_table_share * share,const void * identity,PSI_table * table)1670 rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
1671 {
1672 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1673 if (likely(pfs != NULL))
1674 {
1675 PFS_thread *thread;
1676 DBUG_ASSERT(pfs->m_thread_owner == NULL);
1677
1678 if (psi_unlikely(! flag_global_instrumentation))
1679 {
1680 destroy_table(pfs);
1681 return NULL;
1682 }
1683
1684 /* The table handle was already instrumented, reuse it for this thread. */
1685 thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1686
1687 if (unlikely(! pfs->m_share->m_enabled))
1688 {
1689 destroy_table(pfs);
1690 return NULL;
1691 }
1692
1693 if (unlikely(! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled))
1694 {
1695 destroy_table(pfs);
1696 return NULL;
1697 }
1698
1699 pfs->m_thread_owner= thread;
1700 return table;
1701 }
1702
1703 if (psi_unlikely(! flag_global_instrumentation))
1704 return NULL;
1705
1706 /* See open_table_v1() */
1707
1708 PFS_table_share *pfs_table_share= reinterpret_cast<PFS_table_share*> (share);
1709
1710 if (unlikely(pfs_table_share == NULL))
1711 return NULL;
1712
1713 if (! pfs_table_share->m_enabled)
1714 return NULL;
1715
1716 if (! global_table_io_class.m_enabled && ! global_table_lock_class.m_enabled)
1717 return NULL;
1718
1719 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1720 if (unlikely(thread == NULL))
1721 return NULL;
1722
1723 PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
1724 return reinterpret_cast<PSI_table *> (pfs_table);
1725 }
1726
1727 /**
1728 Implementation of the table instrumentation interface.
1729 @sa PSI_v1::close_table.
1730 */
close_table_v1(PSI_table * table)1731 static void close_table_v1(PSI_table *table)
1732 {
1733 PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
1734 if (unlikely(pfs == NULL))
1735 return;
1736 pfs->aggregate();
1737 destroy_table(pfs);
1738 }
1739
1740 static PSI_socket*
init_socket_v1(PSI_socket_key key,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)1741 init_socket_v1(PSI_socket_key key, const my_socket *fd,
1742 const struct sockaddr *addr, socklen_t addr_len)
1743 {
1744 PFS_socket_class *klass;
1745 PFS_socket *pfs;
1746 klass= find_socket_class(key);
1747 if (unlikely(klass == NULL))
1748 return NULL;
1749 if (! klass->m_enabled)
1750 return NULL;
1751 pfs= create_socket(klass, fd, addr, addr_len);
1752 return reinterpret_cast<PSI_socket *> (pfs);
1753 }
1754
destroy_socket_v1(PSI_socket * socket)1755 static void destroy_socket_v1(PSI_socket *socket)
1756 {
1757 PFS_socket *pfs= reinterpret_cast<PFS_socket*> (socket);
1758
1759 DBUG_ASSERT(pfs != NULL);
1760
1761 destroy_socket(pfs);
1762 }
1763
1764 /**
1765 Implementation of the file instrumentation interface.
1766 @sa PSI_v1::create_file.
1767 */
create_file_v1(PSI_file_key key,const char * name,File file)1768 static void create_file_v1(PSI_file_key key, const char *name, File file)
1769 {
1770 if (psi_unlikely(! flag_global_instrumentation))
1771 return;
1772 int index= (int) file;
1773 if (unlikely(index < 0))
1774 return;
1775 PFS_file_class *klass= find_file_class(key);
1776 if (unlikely(klass == NULL))
1777 return;
1778 if (! klass->m_enabled)
1779 return;
1780
1781 /* A thread is needed for LF_PINS */
1782 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1783 if (unlikely(pfs_thread == NULL))
1784 return;
1785
1786 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
1787 return;
1788
1789 /*
1790 We want this check after pfs_thread->m_enabled,
1791 to avoid reporting false loss.
1792 */
1793 if (unlikely(index >= file_handle_max))
1794 {
1795 file_handle_lost++;
1796 return;
1797 }
1798
1799 uint len= (uint)strlen(name);
1800 PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len, true);
1801
1802 file_handle_array[index]= pfs_file;
1803 }
1804
1805 /**
1806 Arguments given from a parent to a child thread, packaged in one structure.
1807 This data is used when spawning a new instrumented thread.
1808 @sa pfs_spawn_thread.
1809 */
1810 struct PFS_spawn_thread_arg
1811 {
1812 ulonglong m_thread_internal_id;
1813 char m_username[USERNAME_LENGTH];
1814 uint m_username_length;
1815 char m_hostname[HOSTNAME_LENGTH];
1816 uint m_hostname_length;
1817
1818 PSI_thread_key m_child_key;
1819 const void *m_child_identity;
1820 void *(*m_user_start_routine)(void*);
1821 void *m_user_arg;
1822 };
1823
pfs_spawn_thread(void * arg)1824 void* pfs_spawn_thread(void *arg)
1825 {
1826 PFS_spawn_thread_arg *typed_arg= (PFS_spawn_thread_arg*) arg;
1827 void *user_arg;
1828 void *(*user_start_routine)(void*);
1829
1830 PFS_thread *pfs;
1831
1832 /* First, attach instrumentation to this newly created pthread. */
1833 PFS_thread_class *klass= find_thread_class(typed_arg->m_child_key);
1834 if (likely(klass != NULL))
1835 {
1836 pfs= create_thread(klass, typed_arg->m_child_identity, 0);
1837 if (likely(pfs != NULL))
1838 {
1839 clear_thread_account(pfs);
1840
1841 pfs->m_parent_thread_internal_id= typed_arg->m_thread_internal_id;
1842
1843 memcpy(pfs->m_username, typed_arg->m_username, sizeof(pfs->m_username));
1844 pfs->m_username_length= typed_arg->m_username_length;
1845
1846 memcpy(pfs->m_hostname, typed_arg->m_hostname, sizeof(pfs->m_hostname));
1847 pfs->m_hostname_length= typed_arg->m_hostname_length;
1848
1849 set_thread_account(pfs);
1850 }
1851 }
1852 else
1853 {
1854 pfs= NULL;
1855 }
1856 my_pthread_setspecific_ptr(THR_PFS, pfs);
1857
1858 /*
1859 Secondly, free the memory allocated in spawn_thread_v1().
1860 It is preferable to do this before invoking the user
1861 routine, to avoid memory leaks at shutdown, in case
1862 the server exits without waiting for this thread.
1863 */
1864 user_start_routine= typed_arg->m_user_start_routine;
1865 user_arg= typed_arg->m_user_arg;
1866 my_free(typed_arg);
1867
1868 /* Then, execute the user code for this thread. */
1869 (*user_start_routine)(user_arg);
1870
1871 return NULL;
1872 }
1873
1874 /**
1875 Implementation of the thread instrumentation interface.
1876 @sa PSI_v1::spawn_thread.
1877 */
spawn_thread_v1(PSI_thread_key key,pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)1878 static int spawn_thread_v1(PSI_thread_key key,
1879 pthread_t *thread, const pthread_attr_t *attr,
1880 void *(*start_routine)(void*), void *arg)
1881 {
1882 PFS_spawn_thread_arg *psi_arg;
1883 PFS_thread *parent;
1884
1885 /* psi_arg can not be global, and can not be a local variable. */
1886 psi_arg= (PFS_spawn_thread_arg*) my_malloc(sizeof(PFS_spawn_thread_arg),
1887 MYF(MY_WME));
1888 if (unlikely(psi_arg == NULL))
1889 return EAGAIN;
1890
1891 psi_arg->m_child_key= key;
1892 psi_arg->m_child_identity= (arg ? arg : thread);
1893 psi_arg->m_user_start_routine= start_routine;
1894 psi_arg->m_user_arg= arg;
1895
1896 parent= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1897 if (parent != NULL)
1898 {
1899 /*
1900 Make a copy of the parent attributes.
1901 This is required, because instrumentation for this thread (the parent)
1902 may be destroyed before the child thread instrumentation is created.
1903 */
1904 psi_arg->m_thread_internal_id= parent->m_thread_internal_id;
1905
1906 memcpy(psi_arg->m_username, parent->m_username, sizeof(psi_arg->m_username));
1907 psi_arg->m_username_length= parent->m_username_length;
1908
1909 memcpy(psi_arg->m_hostname, parent->m_hostname, sizeof(psi_arg->m_hostname));
1910 psi_arg->m_hostname_length= parent->m_hostname_length;
1911 }
1912 else
1913 {
1914 psi_arg->m_thread_internal_id= 0;
1915 psi_arg->m_username_length= 0;
1916 psi_arg->m_hostname_length= 0;
1917 }
1918
1919 int result= pthread_create(thread, attr, pfs_spawn_thread, psi_arg);
1920 if (unlikely(result != 0))
1921 my_free(psi_arg);
1922 return result;
1923 }
1924
1925 /**
1926 Implementation of the thread instrumentation interface.
1927 @sa PSI_v1::new_thread.
1928 */
1929 static PSI_thread*
new_thread_v1(PSI_thread_key key,const void * identity,ulonglong processlist_id)1930 new_thread_v1(PSI_thread_key key, const void *identity, ulonglong processlist_id)
1931 {
1932 PFS_thread *pfs;
1933
1934 PFS_thread_class *klass= find_thread_class(key);
1935 if (likely(klass != NULL))
1936 pfs= create_thread(klass, identity, processlist_id);
1937 else
1938 pfs= NULL;
1939
1940 return reinterpret_cast<PSI_thread*> (pfs);
1941 }
1942
1943 /**
1944 Implementation of the thread instrumentation interface.
1945 @sa PSI_v1::set_thread_id.
1946 */
set_thread_id_v1(PSI_thread * thread,ulonglong processlist_id)1947 static void set_thread_id_v1(PSI_thread *thread, ulonglong processlist_id)
1948 {
1949 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
1950 if (unlikely(pfs == NULL))
1951 return;
1952 pfs->m_processlist_id= (ulong)processlist_id;
1953 }
1954
1955 /**
1956 Implementation of the thread instrumentation interface.
1957 @sa PSI_v1::get_thread_id.
1958 */
1959 static PSI_thread*
get_thread_v1(void)1960 get_thread_v1(void)
1961 {
1962 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1963 return reinterpret_cast<PSI_thread*> (pfs);
1964 }
1965
1966 /**
1967 Implementation of the thread instrumentation interface.
1968 @sa PSI_v1::set_thread_user.
1969 */
set_thread_user_v1(const char * user,int user_len)1970 static void set_thread_user_v1(const char *user, int user_len)
1971 {
1972 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
1973
1974 DBUG_ASSERT((user != NULL) || (user_len == 0));
1975 DBUG_ASSERT(user_len >= 0);
1976 DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
1977
1978 if (unlikely(pfs == NULL))
1979 return;
1980
1981 aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
1982
1983 pfs->m_session_lock.allocated_to_dirty();
1984
1985 clear_thread_account(pfs);
1986
1987 if (user_len > 0)
1988 memcpy(pfs->m_username, user, user_len);
1989 pfs->m_username_length= user_len;
1990
1991 set_thread_account(pfs);
1992
1993 bool enabled= true;
1994 if (flag_thread_instrumentation)
1995 {
1996 if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
1997 {
1998 /*
1999 TODO: performance improvement.
2000 Once performance_schema.USERS is exposed,
2001 we can use PFS_user::m_enabled instead of looking up
2002 SETUP_ACTORS every time.
2003 */
2004 lookup_setup_actor(pfs,
2005 pfs->m_username, pfs->m_username_length,
2006 pfs->m_hostname, pfs->m_hostname_length,
2007 &enabled);
2008 }
2009 }
2010
2011 pfs->m_enabled= enabled;
2012
2013 pfs->m_session_lock.dirty_to_allocated();
2014 }
2015
2016 /**
2017 Implementation of the thread instrumentation interface.
2018 @sa PSI_v1::set_thread_account.
2019 */
set_thread_account_v1(const char * user,int user_len,const char * host,int host_len)2020 static void set_thread_account_v1(const char *user, int user_len,
2021 const char *host, int host_len)
2022 {
2023 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2024
2025 DBUG_ASSERT((user != NULL) || (user_len == 0));
2026 DBUG_ASSERT(user_len >= 0);
2027 DBUG_ASSERT((uint) user_len <= sizeof(pfs->m_username));
2028 DBUG_ASSERT((host != NULL) || (host_len == 0));
2029 DBUG_ASSERT(host_len >= 0);
2030
2031 host_len= MY_MIN(host_len, static_cast<int>(sizeof(pfs->m_hostname)));
2032
2033 if (unlikely(pfs == NULL))
2034 return;
2035
2036 pfs->m_session_lock.allocated_to_dirty();
2037
2038 clear_thread_account(pfs);
2039
2040 if (host_len > 0)
2041 memcpy(pfs->m_hostname, host, host_len);
2042 pfs->m_hostname_length= host_len;
2043
2044 if (user_len > 0)
2045 memcpy(pfs->m_username, user, user_len);
2046 pfs->m_username_length= user_len;
2047
2048 set_thread_account(pfs);
2049
2050 bool enabled= true;
2051 if (flag_thread_instrumentation)
2052 {
2053 if ((pfs->m_username_length > 0) && (pfs->m_hostname_length > 0))
2054 {
2055 /*
2056 TODO: performance improvement.
2057 Once performance_schema.USERS is exposed,
2058 we can use PFS_user::m_enabled instead of looking up
2059 SETUP_ACTORS every time.
2060 */
2061 lookup_setup_actor(pfs,
2062 pfs->m_username, pfs->m_username_length,
2063 pfs->m_hostname, pfs->m_hostname_length,
2064 &enabled);
2065 }
2066 }
2067 pfs->m_enabled= enabled;
2068
2069 pfs->m_session_lock.dirty_to_allocated();
2070 }
2071
2072 /**
2073 Implementation of the thread instrumentation interface.
2074 @sa PSI_v1::set_thread_db.
2075 */
set_thread_db_v1(const char * db,int db_len)2076 static void set_thread_db_v1(const char* db, int db_len)
2077 {
2078 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2079
2080 DBUG_ASSERT((db != NULL) || (db_len == 0));
2081 DBUG_ASSERT(db_len >= 0);
2082 DBUG_ASSERT((uint) db_len <= sizeof(pfs->m_dbname));
2083
2084 if (likely(pfs != NULL))
2085 {
2086 pfs->m_stmt_lock.allocated_to_dirty();
2087 if (db_len > 0)
2088 memcpy(pfs->m_dbname, db, db_len);
2089 pfs->m_dbname_length= db_len;
2090 pfs->m_stmt_lock.dirty_to_allocated();
2091 }
2092 }
2093
2094 /**
2095 Implementation of the thread instrumentation interface.
2096 @sa PSI_v1::set_thread_command.
2097 */
set_thread_command_v1(int command)2098 static void set_thread_command_v1(int command)
2099 {
2100 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2101
2102 DBUG_ASSERT(command >= 0);
2103 DBUG_ASSERT(command <= (int) COM_END);
2104
2105 if (likely(pfs != NULL))
2106 {
2107 pfs->m_command= command;
2108 }
2109 }
2110
2111 /**
2112 Implementation of the thread instrumentation interface.
2113 @sa PSI_v1::set_thread_start_time.
2114 */
set_thread_start_time_v1(time_t start_time)2115 static void set_thread_start_time_v1(time_t start_time)
2116 {
2117 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2118
2119 if (likely(pfs != NULL))
2120 {
2121 pfs->m_start_time= start_time;
2122 }
2123 }
2124
2125 /**
2126 Implementation of the thread instrumentation interface.
2127 @sa PSI_v1::set_thread_state.
2128 */
set_thread_state_v1(const char * state)2129 static void set_thread_state_v1(const char* state)
2130 {
2131 /* DEPRECATED. */
2132 }
2133
2134 /**
2135 Implementation of the thread instrumentation interface.
2136 @sa PSI_v1::set_thread_info.
2137 */
set_thread_info_v1(const char * info,uint info_len)2138 static void set_thread_info_v1(const char* info, uint info_len)
2139 {
2140 PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2141
2142 DBUG_ASSERT((info != NULL) || (info_len == 0));
2143
2144 if (likely(pfs != NULL))
2145 {
2146 if ((info != NULL) && (info_len > 0))
2147 {
2148 if (info_len > sizeof(pfs->m_processlist_info))
2149 info_len= sizeof(pfs->m_processlist_info);
2150
2151 pfs->m_stmt_lock.allocated_to_dirty();
2152 memcpy(pfs->m_processlist_info, info, info_len);
2153 pfs->m_processlist_info_length= info_len;
2154 pfs->m_stmt_lock.dirty_to_allocated();
2155 }
2156 else
2157 {
2158 pfs->m_stmt_lock.allocated_to_dirty();
2159 pfs->m_processlist_info_length= 0;
2160 pfs->m_stmt_lock.dirty_to_allocated();
2161 }
2162 }
2163 }
2164
2165 /**
2166 Implementation of the thread instrumentation interface.
2167 @sa PSI_v1::set_thread.
2168 */
set_thread_v1(PSI_thread * thread)2169 static void set_thread_v1(PSI_thread* thread)
2170 {
2171 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2172 my_pthread_setspecific_ptr(THR_PFS, pfs);
2173 }
2174
2175 /**
2176 Implementation of the thread instrumentation interface.
2177 @sa PSI_v1::delete_current_thread.
2178 */
delete_current_thread_v1(void)2179 static void delete_current_thread_v1(void)
2180 {
2181 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2182 if (thread != NULL)
2183 {
2184 aggregate_thread(thread, thread->m_account, thread->m_user, thread->m_host);
2185 my_pthread_setspecific_ptr(THR_PFS, NULL);
2186 destroy_thread(thread);
2187 }
2188 }
2189
2190 /**
2191 Implementation of the thread instrumentation interface.
2192 @sa PSI_v1::delete_thread.
2193 */
delete_thread_v1(PSI_thread * thread)2194 static void delete_thread_v1(PSI_thread *thread)
2195 {
2196 PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
2197
2198 if (pfs != NULL)
2199 {
2200 aggregate_thread(pfs, pfs->m_account, pfs->m_user, pfs->m_host);
2201 destroy_thread(pfs);
2202 }
2203 }
2204
2205 /**
2206 Implementation of the mutex instrumentation interface.
2207 @sa PSI_v1::start_mutex_wait.
2208 */
2209 static PSI_mutex_locker*
start_mutex_wait_v1(PSI_mutex_locker_state * state,PSI_mutex * mutex,PSI_mutex_operation op,const char * src_file,uint src_line)2210 start_mutex_wait_v1(PSI_mutex_locker_state *state,
2211 PSI_mutex *mutex, PSI_mutex_operation op,
2212 const char *src_file, uint src_line)
2213 {
2214 PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
2215 DBUG_ASSERT((int) op >= 0);
2216 DBUG_ASSERT((uint) op < array_elements(mutex_operation_map));
2217 DBUG_ASSERT(state != NULL);
2218
2219 DBUG_ASSERT(pfs_mutex != NULL);
2220 DBUG_ASSERT(pfs_mutex->m_class != NULL);
2221
2222 if (! pfs_mutex->m_enabled)
2223 return NULL;
2224
2225 uint flags;
2226 ulonglong timer_start= 0;
2227
2228 if (flag_thread_instrumentation)
2229 {
2230 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2231 if (unlikely(pfs_thread == NULL))
2232 return NULL;
2233 if (! pfs_thread->m_enabled)
2234 return NULL;
2235 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2236 flags= STATE_FLAG_THREAD;
2237
2238 if (pfs_mutex->m_timed)
2239 {
2240 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2241 state->m_timer_start= timer_start;
2242 flags|= STATE_FLAG_TIMED;
2243 }
2244
2245 if (flag_events_waits_current)
2246 {
2247 if (unlikely(pfs_thread->m_events_waits_current >=
2248 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2249 {
2250 locker_lost++;
2251 return NULL;
2252 }
2253 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2254 state->m_wait= wait;
2255 flags|= STATE_FLAG_EVENT;
2256
2257 PFS_events_waits *parent_event= wait - 1;
2258 wait->m_event_type= EVENT_TYPE_WAIT;
2259 wait->m_nesting_event_id= parent_event->m_event_id;
2260 wait->m_nesting_event_type= parent_event->m_event_type;
2261
2262 wait->m_thread= pfs_thread;
2263 wait->m_class= pfs_mutex->m_class;
2264 wait->m_timer_start= timer_start;
2265 wait->m_timer_end= 0;
2266 wait->m_object_instance_addr= pfs_mutex->m_identity;
2267 wait->m_event_id= pfs_thread->m_event_id++;
2268 wait->m_end_event_id= 0;
2269 wait->m_operation= mutex_operation_map[(int) op];
2270 wait->m_source_file= src_file;
2271 wait->m_source_line= src_line;
2272 wait->m_wait_class= WAIT_CLASS_MUTEX;
2273
2274 pfs_thread->m_events_waits_current++;
2275 }
2276 }
2277 else
2278 {
2279 if (pfs_mutex->m_timed)
2280 {
2281 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2282 state->m_timer_start= timer_start;
2283 flags= STATE_FLAG_TIMED;
2284 state->m_thread= NULL;
2285 }
2286 else
2287 {
2288 /*
2289 Complete shortcut.
2290 */
2291 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2292 pfs_mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
2293 return NULL;
2294 }
2295 }
2296
2297 state->m_flags= flags;
2298 state->m_mutex= mutex;
2299 return reinterpret_cast<PSI_mutex_locker*> (state);
2300 }
2301
2302 /**
2303 Implementation of the rwlock instrumentation interface.
2304 @sa PSI_v1::start_rwlock_rdwait
2305 @sa PSI_v1::start_rwlock_wrwait
2306 */
2307 static PSI_rwlock_locker*
start_rwlock_wait_v1(PSI_rwlock_locker_state * state,PSI_rwlock * rwlock,PSI_rwlock_operation op,const char * src_file,uint src_line)2308 start_rwlock_wait_v1(PSI_rwlock_locker_state *state,
2309 PSI_rwlock *rwlock,
2310 PSI_rwlock_operation op,
2311 const char *src_file, uint src_line)
2312 {
2313 PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
2314 DBUG_ASSERT(static_cast<int> (op) >= 0);
2315 DBUG_ASSERT(static_cast<uint> (op) < array_elements(rwlock_operation_map));
2316 DBUG_ASSERT(state != NULL);
2317 DBUG_ASSERT(pfs_rwlock != NULL);
2318 DBUG_ASSERT(pfs_rwlock->m_class != NULL);
2319
2320 if (! pfs_rwlock->m_enabled)
2321 return NULL;
2322
2323 uint flags;
2324 ulonglong timer_start= 0;
2325
2326 if (flag_thread_instrumentation)
2327 {
2328 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2329 if (unlikely(pfs_thread == NULL))
2330 return NULL;
2331 if (! pfs_thread->m_enabled)
2332 return NULL;
2333 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2334 flags= STATE_FLAG_THREAD;
2335
2336 if (pfs_rwlock->m_timed)
2337 {
2338 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2339 state->m_timer_start= timer_start;
2340 flags|= STATE_FLAG_TIMED;
2341 }
2342
2343 if (flag_events_waits_current)
2344 {
2345 if (unlikely(pfs_thread->m_events_waits_current >=
2346 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2347 {
2348 locker_lost++;
2349 return NULL;
2350 }
2351 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2352 state->m_wait= wait;
2353 flags|= STATE_FLAG_EVENT;
2354
2355 PFS_events_waits *parent_event= wait - 1;
2356 wait->m_event_type= EVENT_TYPE_WAIT;
2357 wait->m_nesting_event_id= parent_event->m_event_id;
2358 wait->m_nesting_event_type= parent_event->m_event_type;
2359
2360 wait->m_thread= pfs_thread;
2361 wait->m_class= pfs_rwlock->m_class;
2362 wait->m_timer_start= timer_start;
2363 wait->m_timer_end= 0;
2364 wait->m_object_instance_addr= pfs_rwlock->m_identity;
2365 wait->m_event_id= pfs_thread->m_event_id++;
2366 wait->m_end_event_id= 0;
2367 wait->m_operation= rwlock_operation_map[static_cast<int> (op)];
2368 wait->m_source_file= src_file;
2369 wait->m_source_line= src_line;
2370 wait->m_wait_class= WAIT_CLASS_RWLOCK;
2371
2372 pfs_thread->m_events_waits_current++;
2373 }
2374 }
2375 else
2376 {
2377 if (pfs_rwlock->m_timed)
2378 {
2379 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2380 state->m_timer_start= timer_start;
2381 flags= STATE_FLAG_TIMED;
2382 state->m_thread= NULL;
2383 }
2384 else
2385 {
2386 /*
2387 Complete shortcut.
2388 */
2389 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2390 pfs_rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
2391 return NULL;
2392 }
2393 }
2394
2395 state->m_flags= flags;
2396 state->m_rwlock= rwlock;
2397 return reinterpret_cast<PSI_rwlock_locker*> (state);
2398 }
2399
2400 /**
2401 Implementation of the cond instrumentation interface.
2402 @sa PSI_v1::start_cond_wait.
2403 */
2404 static PSI_cond_locker*
start_cond_wait_v1(PSI_cond_locker_state * state,PSI_cond * cond,PSI_mutex * mutex,PSI_cond_operation op,const char * src_file,uint src_line)2405 start_cond_wait_v1(PSI_cond_locker_state *state,
2406 PSI_cond *cond, PSI_mutex *mutex,
2407 PSI_cond_operation op,
2408 const char *src_file, uint src_line)
2409 {
2410 /*
2411 Note about the unused PSI_mutex *mutex parameter:
2412 In the pthread library, a call to pthread_cond_wait()
2413 causes an unlock() + lock() on the mutex associated with the condition.
2414 This mutex operation is not instrumented, so the mutex will still
2415 appear as locked when a thread is waiting on a condition.
2416 This has no impact now, as unlock_mutex() is not recording events.
2417 When unlock_mutex() is implemented by later work logs,
2418 this parameter here will be used to adjust the mutex state,
2419 in start_cond_wait_v1() and end_cond_wait_v1().
2420 */
2421 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
2422 DBUG_ASSERT(static_cast<int> (op) >= 0);
2423 DBUG_ASSERT(static_cast<uint> (op) < array_elements(cond_operation_map));
2424 DBUG_ASSERT(state != NULL);
2425 DBUG_ASSERT(pfs_cond != NULL);
2426 DBUG_ASSERT(pfs_cond->m_class != NULL);
2427
2428 if (! pfs_cond->m_enabled)
2429 return NULL;
2430
2431 uint flags;
2432 ulonglong timer_start= 0;
2433
2434 if (flag_thread_instrumentation)
2435 {
2436 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2437 if (unlikely(pfs_thread == NULL))
2438 return NULL;
2439 if (! pfs_thread->m_enabled)
2440 return NULL;
2441 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2442 flags= STATE_FLAG_THREAD;
2443
2444 if (pfs_cond->m_timed)
2445 {
2446 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2447 state->m_timer_start= timer_start;
2448 flags|= STATE_FLAG_TIMED;
2449 }
2450
2451 if (flag_events_waits_current)
2452 {
2453 if (unlikely(pfs_thread->m_events_waits_current >=
2454 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2455 {
2456 locker_lost++;
2457 return NULL;
2458 }
2459 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2460 state->m_wait= wait;
2461 flags|= STATE_FLAG_EVENT;
2462
2463 PFS_events_waits *parent_event= wait - 1;
2464 wait->m_event_type= EVENT_TYPE_WAIT;
2465 wait->m_nesting_event_id= parent_event->m_event_id;
2466 wait->m_nesting_event_type= parent_event->m_event_type;
2467
2468 wait->m_thread= pfs_thread;
2469 wait->m_class= pfs_cond->m_class;
2470 wait->m_timer_start= timer_start;
2471 wait->m_timer_end= 0;
2472 wait->m_object_instance_addr= pfs_cond->m_identity;
2473 wait->m_event_id= pfs_thread->m_event_id++;
2474 wait->m_end_event_id= 0;
2475 wait->m_operation= cond_operation_map[static_cast<int> (op)];
2476 wait->m_source_file= src_file;
2477 wait->m_source_line= src_line;
2478 wait->m_wait_class= WAIT_CLASS_COND;
2479
2480 pfs_thread->m_events_waits_current++;
2481 }
2482 }
2483 else
2484 {
2485 if (pfs_cond->m_timed)
2486 {
2487 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2488 state->m_timer_start= timer_start;
2489 flags= STATE_FLAG_TIMED;
2490 }
2491 else
2492 {
2493 /*
2494 Complete shortcut.
2495 */
2496 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
2497 pfs_cond->m_cond_stat.m_wait_stat.aggregate_counted();
2498 return NULL;
2499 }
2500 }
2501
2502 state->m_flags= flags;
2503 state->m_cond= cond;
2504 state->m_mutex= mutex;
2505 return reinterpret_cast<PSI_cond_locker*> (state);
2506 }
2507
lock_flags_to_lock_type(uint flags)2508 static inline PFS_TL_LOCK_TYPE lock_flags_to_lock_type(uint flags)
2509 {
2510 enum thr_lock_type value= static_cast<enum thr_lock_type> (flags);
2511
2512 switch (value)
2513 {
2514 case TL_READ:
2515 return PFS_TL_READ;
2516 case TL_READ_WITH_SHARED_LOCKS:
2517 return PFS_TL_READ_WITH_SHARED_LOCKS;
2518 case TL_READ_HIGH_PRIORITY:
2519 return PFS_TL_READ_HIGH_PRIORITY;
2520 case TL_READ_NO_INSERT:
2521 return PFS_TL_READ_NO_INSERT;
2522 case TL_WRITE_ALLOW_WRITE:
2523 return PFS_TL_WRITE_ALLOW_WRITE;
2524 case TL_WRITE_CONCURRENT_INSERT:
2525 return PFS_TL_WRITE_CONCURRENT_INSERT;
2526 case TL_WRITE_DELAYED:
2527 return PFS_TL_WRITE_DELAYED;
2528 case TL_WRITE_LOW_PRIORITY:
2529 return PFS_TL_WRITE_LOW_PRIORITY;
2530 case TL_WRITE:
2531 return PFS_TL_WRITE;
2532
2533 case TL_WRITE_ONLY:
2534 case TL_IGNORE:
2535 case TL_UNLOCK:
2536 case TL_READ_DEFAULT:
2537 case TL_WRITE_DEFAULT:
2538 default:
2539 DBUG_ASSERT(false);
2540 }
2541
2542 /* Dead code */
2543 return PFS_TL_READ;
2544 }
2545
external_lock_flags_to_lock_type(uint flags)2546 static inline PFS_TL_LOCK_TYPE external_lock_flags_to_lock_type(uint flags)
2547 {
2548 DBUG_ASSERT(flags == F_RDLCK || flags == F_WRLCK);
2549 return (flags == F_RDLCK ? PFS_TL_READ_EXTERNAL : PFS_TL_WRITE_EXTERNAL);
2550 }
2551
2552 /**
2553 Implementation of the table instrumentation interface.
2554 @sa PSI_v1::start_table_io_wait_v1
2555 */
2556 static PSI_table_locker*
start_table_io_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_io_operation op,uint index,const char * src_file,uint src_line)2557 start_table_io_wait_v1(PSI_table_locker_state *state,
2558 PSI_table *table,
2559 PSI_table_io_operation op,
2560 uint index,
2561 const char *src_file, uint src_line)
2562 {
2563 DBUG_ASSERT(static_cast<int> (op) >= 0);
2564 DBUG_ASSERT(static_cast<uint> (op) < array_elements(table_io_operation_map));
2565 DBUG_ASSERT(state != NULL);
2566 PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2567 DBUG_ASSERT(pfs_table != NULL);
2568 DBUG_ASSERT(pfs_table->m_share != NULL);
2569
2570 if (! pfs_table->m_io_enabled)
2571 return NULL;
2572
2573 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2574
2575 uint flags;
2576 ulonglong timer_start= 0;
2577
2578 if (flag_thread_instrumentation)
2579 {
2580 if (pfs_thread == NULL)
2581 return NULL;
2582 if (! pfs_thread->m_enabled)
2583 return NULL;
2584 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2585 flags= STATE_FLAG_THREAD;
2586
2587 if (pfs_table->m_io_timed)
2588 {
2589 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2590 state->m_timer_start= timer_start;
2591 flags|= STATE_FLAG_TIMED;
2592 }
2593
2594 if (flag_events_waits_current)
2595 {
2596 if (unlikely(pfs_thread->m_events_waits_current >=
2597 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2598 {
2599 locker_lost++;
2600 return NULL;
2601 }
2602 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2603 state->m_wait= wait;
2604 flags|= STATE_FLAG_EVENT;
2605
2606 PFS_events_waits *parent_event= wait - 1;
2607 wait->m_event_type= EVENT_TYPE_WAIT;
2608 wait->m_nesting_event_id= parent_event->m_event_id;
2609 wait->m_nesting_event_type= parent_event->m_event_type;
2610
2611 PFS_table_share *share= pfs_table->m_share;
2612 wait->m_thread= pfs_thread;
2613 wait->m_class= &global_table_io_class;
2614 wait->m_timer_start= timer_start;
2615 wait->m_timer_end= 0;
2616 wait->m_object_instance_addr= pfs_table->m_identity;
2617 wait->m_event_id= pfs_thread->m_event_id++;
2618 wait->m_end_event_id= 0;
2619 wait->m_operation= table_io_operation_map[static_cast<int> (op)];
2620 wait->m_flags= 0;
2621 wait->m_object_type= share->get_object_type();
2622 wait->m_weak_table_share= share;
2623 wait->m_weak_version= share->get_version();
2624 wait->m_index= index;
2625 wait->m_source_file= src_file;
2626 wait->m_source_line= src_line;
2627 wait->m_wait_class= WAIT_CLASS_TABLE;
2628
2629 pfs_thread->m_events_waits_current++;
2630 }
2631 }
2632 else
2633 {
2634 if (pfs_table->m_io_timed)
2635 {
2636 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2637 state->m_timer_start= timer_start;
2638 flags= STATE_FLAG_TIMED;
2639 }
2640 else
2641 {
2642 /* TODO: consider a shortcut here */
2643 flags= 0;
2644 }
2645 }
2646
2647 state->m_flags= flags;
2648 state->m_table= table;
2649 state->m_io_operation= op;
2650 state->m_index= index;
2651 return reinterpret_cast<PSI_table_locker*> (state);
2652 }
2653
2654 /**
2655 Implementation of the table instrumentation interface.
2656 @sa PSI_v1::start_table_lock_wait.
2657 */
2658 static PSI_table_locker*
start_table_lock_wait_v1(PSI_table_locker_state * state,PSI_table * table,PSI_table_lock_operation op,ulong op_flags,const char * src_file,uint src_line)2659 start_table_lock_wait_v1(PSI_table_locker_state *state,
2660 PSI_table *table,
2661 PSI_table_lock_operation op,
2662 ulong op_flags,
2663 const char *src_file, uint src_line)
2664 {
2665 DBUG_ASSERT(state != NULL);
2666 DBUG_ASSERT((op == PSI_TABLE_LOCK) || (op == PSI_TABLE_EXTERNAL_LOCK));
2667
2668 PFS_table *pfs_table= reinterpret_cast<PFS_table*> (table);
2669
2670 DBUG_ASSERT(pfs_table != NULL);
2671 DBUG_ASSERT(pfs_table->m_share != NULL);
2672
2673 if (! pfs_table->m_lock_enabled)
2674 return NULL;
2675
2676 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2677
2678 PFS_TL_LOCK_TYPE lock_type;
2679
2680 switch (op)
2681 {
2682 case PSI_TABLE_LOCK:
2683 lock_type= lock_flags_to_lock_type(op_flags);
2684 break;
2685 case PSI_TABLE_EXTERNAL_LOCK:
2686 /*
2687 See the handler::external_lock() API design,
2688 there is no handler::external_unlock().
2689 */
2690 if (op_flags == F_UNLCK)
2691 return NULL;
2692 lock_type= external_lock_flags_to_lock_type(op_flags);
2693 break;
2694 default:
2695 lock_type= PFS_TL_READ;
2696 DBUG_ASSERT(false);
2697 }
2698
2699 DBUG_ASSERT((uint) lock_type < array_elements(table_lock_operation_map));
2700
2701 uint flags;
2702 ulonglong timer_start= 0;
2703
2704 if (flag_thread_instrumentation)
2705 {
2706 if (pfs_thread == NULL)
2707 return NULL;
2708 if (! pfs_thread->m_enabled)
2709 return NULL;
2710 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2711 flags= STATE_FLAG_THREAD;
2712
2713 if (pfs_table->m_lock_timed)
2714 {
2715 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2716 state->m_timer_start= timer_start;
2717 flags|= STATE_FLAG_TIMED;
2718 }
2719
2720 if (flag_events_waits_current)
2721 {
2722 if (unlikely(pfs_thread->m_events_waits_current >=
2723 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2724 {
2725 locker_lost++;
2726 return NULL;
2727 }
2728 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2729 state->m_wait= wait;
2730 flags|= STATE_FLAG_EVENT;
2731
2732 PFS_events_waits *parent_event= wait - 1;
2733 wait->m_event_type= EVENT_TYPE_WAIT;
2734 wait->m_nesting_event_id= parent_event->m_event_id;
2735 wait->m_nesting_event_type= parent_event->m_event_type;
2736
2737 PFS_table_share *share= pfs_table->m_share;
2738 wait->m_thread= pfs_thread;
2739 wait->m_class= &global_table_lock_class;
2740 wait->m_timer_start= timer_start;
2741 wait->m_timer_end= 0;
2742 wait->m_object_instance_addr= pfs_table->m_identity;
2743 wait->m_event_id= pfs_thread->m_event_id++;
2744 wait->m_end_event_id= 0;
2745 wait->m_operation= table_lock_operation_map[lock_type];
2746 wait->m_flags= 0;
2747 wait->m_object_type= share->get_object_type();
2748 wait->m_weak_table_share= share;
2749 wait->m_weak_version= share->get_version();
2750 wait->m_index= 0;
2751 wait->m_source_file= src_file;
2752 wait->m_source_line= src_line;
2753 wait->m_wait_class= WAIT_CLASS_TABLE;
2754
2755 pfs_thread->m_events_waits_current++;
2756 }
2757 }
2758 else
2759 {
2760 if (pfs_table->m_lock_timed)
2761 {
2762 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
2763 state->m_timer_start= timer_start;
2764 flags= STATE_FLAG_TIMED;
2765 }
2766 else
2767 {
2768 /* TODO: consider a shortcut here */
2769 flags= 0;
2770 }
2771 }
2772
2773 state->m_flags= flags;
2774 state->m_table= table;
2775 state->m_index= lock_type;
2776 return reinterpret_cast<PSI_table_locker*> (state);
2777 }
2778
2779 /**
2780 Implementation of the file instrumentation interface.
2781 @sa PSI_v1::get_thread_file_name_locker.
2782 */
2783 static PSI_file_locker*
get_thread_file_name_locker_v1(PSI_file_locker_state * state,PSI_file_key key,PSI_file_operation op,const char * name,const void * identity)2784 get_thread_file_name_locker_v1(PSI_file_locker_state *state,
2785 PSI_file_key key,
2786 PSI_file_operation op,
2787 const char *name, const void *identity)
2788 {
2789 DBUG_ASSERT(static_cast<int> (op) >= 0);
2790 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2791 DBUG_ASSERT(state != NULL);
2792
2793 if (psi_unlikely(! flag_global_instrumentation))
2794 return NULL;
2795 PFS_file_class *klass= find_file_class(key);
2796 if (unlikely(klass == NULL))
2797 return NULL;
2798 if (! klass->m_enabled)
2799 return NULL;
2800
2801 /* Needed for the LF_HASH */
2802 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2803 if (unlikely(pfs_thread == NULL))
2804 return NULL;
2805
2806 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
2807 return NULL;
2808
2809 uint flags;
2810
2811 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2812 flags= STATE_FLAG_THREAD;
2813
2814 if (klass->m_timed)
2815 flags|= STATE_FLAG_TIMED;
2816
2817 if (flag_events_waits_current)
2818 {
2819 if (unlikely(pfs_thread->m_events_waits_current >=
2820 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2821 {
2822 locker_lost++;
2823 return NULL;
2824 }
2825 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2826 state->m_wait= wait;
2827 flags|= STATE_FLAG_EVENT;
2828
2829 PFS_events_waits *parent_event= wait - 1;
2830 wait->m_event_type= EVENT_TYPE_WAIT;
2831 wait->m_nesting_event_id= parent_event->m_event_id;
2832 wait->m_nesting_event_type= parent_event->m_event_type;
2833
2834 wait->m_thread= pfs_thread;
2835 wait->m_class= klass;
2836 wait->m_timer_start= 0;
2837 wait->m_timer_end= 0;
2838 wait->m_object_instance_addr= NULL;
2839 wait->m_weak_file= NULL;
2840 wait->m_weak_version= 0;
2841 wait->m_event_id= pfs_thread->m_event_id++;
2842 wait->m_end_event_id= 0;
2843 wait->m_operation= file_operation_map[static_cast<int> (op)];
2844 wait->m_wait_class= WAIT_CLASS_FILE;
2845
2846 pfs_thread->m_events_waits_current++;
2847 }
2848
2849 state->m_flags= flags;
2850 state->m_file= NULL;
2851 state->m_name= name;
2852 state->m_class= klass;
2853 state->m_operation= op;
2854 return reinterpret_cast<PSI_file_locker*> (state);
2855 }
2856
2857 /**
2858 Implementation of the file instrumentation interface.
2859 @sa PSI_v1::get_thread_file_stream_locker.
2860 */
2861 static PSI_file_locker*
get_thread_file_stream_locker_v1(PSI_file_locker_state * state,PSI_file * file,PSI_file_operation op)2862 get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
2863 PSI_file *file, PSI_file_operation op)
2864 {
2865 PFS_file *pfs_file= reinterpret_cast<PFS_file*> (file);
2866 DBUG_ASSERT(static_cast<int> (op) >= 0);
2867 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2868 DBUG_ASSERT(state != NULL);
2869
2870 if (unlikely(pfs_file == NULL))
2871 return NULL;
2872 DBUG_ASSERT(pfs_file->m_class != NULL);
2873 PFS_file_class *klass= pfs_file->m_class;
2874
2875 if (! pfs_file->m_enabled)
2876 return NULL;
2877
2878 uint flags;
2879
2880 if (flag_thread_instrumentation)
2881 {
2882 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2883 if (unlikely(pfs_thread == NULL))
2884 return NULL;
2885 if (! pfs_thread->m_enabled)
2886 return NULL;
2887 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2888 flags= STATE_FLAG_THREAD;
2889
2890 if (pfs_file->m_timed)
2891 flags|= STATE_FLAG_TIMED;
2892
2893 if (flag_events_waits_current)
2894 {
2895 if (unlikely(pfs_thread->m_events_waits_current >=
2896 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
2897 {
2898 locker_lost++;
2899 return NULL;
2900 }
2901 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
2902 state->m_wait= wait;
2903 flags|= STATE_FLAG_EVENT;
2904
2905 PFS_events_waits *parent_event= wait - 1;
2906 wait->m_event_type= EVENT_TYPE_WAIT;
2907 wait->m_nesting_event_id= parent_event->m_event_id;
2908 wait->m_nesting_event_type= parent_event->m_event_type;
2909
2910 wait->m_thread= pfs_thread;
2911 wait->m_class= klass;
2912 wait->m_timer_start= 0;
2913 wait->m_timer_end= 0;
2914 wait->m_object_instance_addr= pfs_file;
2915 wait->m_weak_file= pfs_file;
2916 wait->m_weak_version= pfs_file->get_version();
2917 wait->m_event_id= pfs_thread->m_event_id++;
2918 wait->m_end_event_id= 0;
2919 wait->m_operation= file_operation_map[static_cast<int> (op)];
2920 wait->m_wait_class= WAIT_CLASS_FILE;
2921
2922 pfs_thread->m_events_waits_current++;
2923 }
2924 }
2925 else
2926 {
2927 state->m_thread= NULL;
2928 if (pfs_file->m_timed)
2929 {
2930 flags= STATE_FLAG_TIMED;
2931 }
2932 else
2933 {
2934 /* TODO: consider a shortcut. */
2935 flags= 0;
2936 }
2937 }
2938
2939 state->m_flags= flags;
2940 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
2941 state->m_operation= op;
2942 state->m_name= NULL;
2943 state->m_class= klass;
2944 return reinterpret_cast<PSI_file_locker*> (state);
2945 }
2946
2947 /**
2948 Implementation of the file instrumentation interface.
2949 @sa PSI_v1::get_thread_file_descriptor_locker.
2950 */
2951 static PSI_file_locker*
get_thread_file_descriptor_locker_v1(PSI_file_locker_state * state,File file,PSI_file_operation op)2952 get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
2953 File file, PSI_file_operation op)
2954 {
2955 int index= static_cast<int> (file);
2956 DBUG_ASSERT(static_cast<int> (op) >= 0);
2957 DBUG_ASSERT(static_cast<uint> (op) < array_elements(file_operation_map));
2958 DBUG_ASSERT(state != NULL);
2959
2960 if (unlikely((index < 0) || (index >= file_handle_max)))
2961 return NULL;
2962
2963 PFS_file *pfs_file= file_handle_array[index];
2964 if (unlikely(pfs_file == NULL))
2965 return NULL;
2966
2967 /*
2968 We are about to close a file by descriptor number,
2969 and the calling code still holds the descriptor.
2970 Cleanup the file descriptor <--> file instrument association.
2971 Remove the instrumentation *before* the close to avoid race
2972 conditions with another thread opening a file
2973 (that could be given the same descriptor).
2974 */
2975 if (op == PSI_FILE_CLOSE)
2976 file_handle_array[index]= NULL;
2977
2978 if (! pfs_file->m_enabled)
2979 return NULL;
2980
2981 DBUG_ASSERT(pfs_file->m_class != NULL);
2982 PFS_file_class *klass= pfs_file->m_class;
2983
2984 uint flags;
2985
2986 if (flag_thread_instrumentation)
2987 {
2988 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
2989 if (unlikely(pfs_thread == NULL))
2990 return NULL;
2991 if (! pfs_thread->m_enabled)
2992 return NULL;
2993 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
2994 flags= STATE_FLAG_THREAD;
2995
2996 if (pfs_file->m_timed)
2997 flags|= STATE_FLAG_TIMED;
2998
2999 if (flag_events_waits_current)
3000 {
3001 if (unlikely(pfs_thread->m_events_waits_current >=
3002 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3003 {
3004 locker_lost++;
3005 return NULL;
3006 }
3007 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3008 state->m_wait= wait;
3009 flags|= STATE_FLAG_EVENT;
3010
3011 PFS_events_waits *parent_event= wait - 1;
3012 wait->m_event_type= EVENT_TYPE_WAIT;
3013 wait->m_nesting_event_id= parent_event->m_event_id;
3014 wait->m_nesting_event_type= parent_event->m_event_type;
3015
3016 wait->m_thread= pfs_thread;
3017 wait->m_class= klass;
3018 wait->m_timer_start= 0;
3019 wait->m_timer_end= 0;
3020 wait->m_object_instance_addr= pfs_file;
3021 wait->m_weak_file= pfs_file;
3022 wait->m_weak_version= pfs_file->get_version();
3023 wait->m_event_id= pfs_thread->m_event_id++;
3024 wait->m_end_event_id= 0;
3025 wait->m_operation= file_operation_map[static_cast<int> (op)];
3026 wait->m_wait_class= WAIT_CLASS_FILE;
3027
3028 pfs_thread->m_events_waits_current++;
3029 }
3030 }
3031 else
3032 {
3033 state->m_thread= NULL;
3034 if (pfs_file->m_timed)
3035 {
3036 flags= STATE_FLAG_TIMED;
3037 }
3038 else
3039 {
3040 /* TODO: consider a shortcut. */
3041 flags= 0;
3042 }
3043 }
3044
3045 state->m_flags= flags;
3046 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3047 state->m_operation= op;
3048 state->m_name= NULL;
3049 state->m_class= klass;
3050 return reinterpret_cast<PSI_file_locker*> (state);
3051 }
3052
3053 /** Socket locker */
3054
3055 static PSI_socket_locker*
start_socket_wait_v1(PSI_socket_locker_state * state,PSI_socket * socket,PSI_socket_operation op,size_t count,const char * src_file,uint src_line)3056 start_socket_wait_v1(PSI_socket_locker_state *state,
3057 PSI_socket *socket,
3058 PSI_socket_operation op,
3059 size_t count,
3060 const char *src_file, uint src_line)
3061 {
3062 DBUG_ASSERT(static_cast<int> (op) >= 0);
3063 DBUG_ASSERT(static_cast<uint> (op) < array_elements(socket_operation_map));
3064 DBUG_ASSERT(state != NULL);
3065 PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*> (socket);
3066
3067 DBUG_ASSERT(pfs_socket != NULL);
3068 DBUG_ASSERT(pfs_socket->m_class != NULL);
3069
3070 if (!pfs_socket->m_enabled || pfs_socket->m_idle)
3071 return NULL;
3072
3073 uint flags= 0;
3074 ulonglong timer_start= 0;
3075
3076 if (flag_thread_instrumentation)
3077 {
3078 /*
3079 Do not use pfs_socket->m_thread_owner here,
3080 as different threads may use concurrently the same socket,
3081 for example during a KILL.
3082 */
3083 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3084
3085 if (unlikely(pfs_thread == NULL))
3086 return NULL;
3087
3088 if (!pfs_thread->m_enabled)
3089 return NULL;
3090
3091 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3092 flags= STATE_FLAG_THREAD;
3093
3094 if (pfs_socket->m_timed)
3095 {
3096 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3097 state->m_timer_start= timer_start;
3098 flags|= STATE_FLAG_TIMED;
3099 }
3100
3101 if (flag_events_waits_current)
3102 {
3103 if (unlikely(pfs_thread->m_events_waits_current >=
3104 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3105 {
3106 locker_lost++;
3107 return NULL;
3108 }
3109 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3110 state->m_wait= wait;
3111 flags|= STATE_FLAG_EVENT;
3112
3113 PFS_events_waits *parent_event= wait - 1;
3114 wait->m_event_type= EVENT_TYPE_WAIT;
3115 wait->m_nesting_event_id= parent_event->m_event_id;
3116 wait->m_nesting_event_type= parent_event->m_event_type;
3117 wait->m_thread= pfs_thread;
3118 wait->m_class= pfs_socket->m_class;
3119 wait->m_timer_start= timer_start;
3120 wait->m_timer_end= 0;
3121 wait->m_object_instance_addr= pfs_socket->m_identity;
3122 wait->m_weak_socket= pfs_socket;
3123 wait->m_weak_version= pfs_socket->get_version();
3124 wait->m_event_id= pfs_thread->m_event_id++;
3125 wait->m_end_event_id= 0;
3126 wait->m_operation= socket_operation_map[static_cast<int>(op)];
3127 wait->m_source_file= src_file;
3128 wait->m_source_line= src_line;
3129 wait->m_number_of_bytes= count;
3130 wait->m_wait_class= WAIT_CLASS_SOCKET;
3131
3132 pfs_thread->m_events_waits_current++;
3133 }
3134 }
3135 else
3136 {
3137 if (pfs_socket->m_timed)
3138 {
3139 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
3140 state->m_timer_start= timer_start;
3141 flags= STATE_FLAG_TIMED;
3142 }
3143 else
3144 {
3145 /*
3146 Even if timing is disabled, end_socket_wait() still needs a locker to
3147 capture the number of bytes sent or received by the socket operation.
3148 For operations that do not have a byte count, then just increment the
3149 event counter and return a NULL locker.
3150 */
3151 switch (op)
3152 {
3153 case PSI_SOCKET_CONNECT:
3154 case PSI_SOCKET_CREATE:
3155 case PSI_SOCKET_BIND:
3156 case PSI_SOCKET_SEEK:
3157 case PSI_SOCKET_OPT:
3158 case PSI_SOCKET_STAT:
3159 case PSI_SOCKET_SHUTDOWN:
3160 case PSI_SOCKET_CLOSE:
3161 case PSI_SOCKET_SELECT:
3162 pfs_socket->m_socket_stat.m_io_stat.m_misc.aggregate_counted();
3163 return NULL;
3164 default:
3165 break;
3166 }
3167 }
3168 }
3169
3170 state->m_flags= flags;
3171 state->m_socket= socket;
3172 state->m_operation= op;
3173 return reinterpret_cast<PSI_socket_locker*> (state);
3174 }
3175
3176 /**
3177 Implementation of the mutex instrumentation interface.
3178 @sa PSI_v1::unlock_mutex.
3179 */
unlock_mutex_v1(PSI_mutex * mutex)3180 static void unlock_mutex_v1(PSI_mutex *mutex)
3181 {
3182 PFS_mutex *pfs_mutex= reinterpret_cast<PFS_mutex*> (mutex);
3183
3184 DBUG_ASSERT(pfs_mutex != NULL);
3185
3186 /*
3187 Note that this code is still protected by the instrumented mutex,
3188 and therefore is thread safe. See inline_mysql_mutex_unlock().
3189 */
3190
3191 /* Always update the instrumented state */
3192 pfs_mutex->m_owner= NULL;
3193 pfs_mutex->m_last_locked= 0;
3194
3195 #ifdef LATER_WL2333
3196 /*
3197 See WL#2333: SHOW ENGINE ... LOCK STATUS.
3198 PFS_mutex::m_lock_stat is not exposed in user visible tables
3199 currently, so there is no point spending time computing it.
3200 */
3201 if (! pfs_mutex->m_enabled)
3202 return;
3203
3204 if (! pfs_mutex->m_timed)
3205 return;
3206
3207 ulonglong locked_time;
3208 locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
3209 pfs_mutex->m_mutex_stat.m_lock_stat.aggregate_value(locked_time);
3210 #endif
3211 }
3212
3213 /**
3214 Implementation of the rwlock instrumentation interface.
3215 @sa PSI_v1::unlock_rwlock.
3216 */
unlock_rwlock_v1(PSI_rwlock * rwlock)3217 static void unlock_rwlock_v1(PSI_rwlock *rwlock)
3218 {
3219 PFS_rwlock *pfs_rwlock= reinterpret_cast<PFS_rwlock*> (rwlock);
3220 DBUG_ASSERT(pfs_rwlock != NULL);
3221 DBUG_ASSERT(pfs_rwlock == sanitize_rwlock(pfs_rwlock));
3222 DBUG_ASSERT(pfs_rwlock->m_class != NULL);
3223 DBUG_ASSERT(pfs_rwlock->m_lock.is_populated());
3224
3225 bool last_writer= false;
3226 bool last_reader= false;
3227
3228 /*
3229 Note that this code is still protected by the instrumented rwlock,
3230 and therefore is:
3231 - thread safe for write locks
3232 - almost thread safe for read locks (pfs_rwlock->m_readers is unsafe).
3233 See inline_mysql_rwlock_unlock()
3234 */
3235
3236 /* Always update the instrumented state */
3237 if (pfs_rwlock->m_writer != NULL)
3238 {
3239 /* Nominal case, a writer is unlocking. */
3240 last_writer= true;
3241 pfs_rwlock->m_writer= NULL;
3242 /* Reset the readers stats, they could be off */
3243 pfs_rwlock->m_readers= 0;
3244 }
3245 else if (likely(pfs_rwlock->m_readers > 0))
3246 {
3247 /* Nominal case, a reader is unlocking. */
3248 if (--(pfs_rwlock->m_readers) == 0)
3249 last_reader= true;
3250 }
3251 else
3252 {
3253 /*
3254 Edge case, we have no writer and no readers,
3255 on an unlock event.
3256 This is possible for:
3257 - partial instrumentation
3258 - instrumentation disabled at runtime,
3259 see when get_thread_rwlock_locker_v1() returns NULL
3260 No further action is taken here, the next
3261 write lock will put the statistics is a valid state.
3262 */
3263 }
3264
3265 #ifdef LATER_WL2333
3266 /* See WL#2333: SHOW ENGINE ... LOCK STATUS. */
3267
3268 if (! pfs_rwlock->m_enabled)
3269 return;
3270
3271 if (! pfs_rwlock->m_timed)
3272 return;
3273
3274 ulonglong locked_time;
3275 if (last_writer)
3276 {
3277 locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
3278 pfs_rwlock->m_rwlock_stat.m_write_lock_stat.aggregate_value(locked_time);
3279 }
3280 else if (last_reader)
3281 {
3282 locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
3283 pfs_rwlock->m_rwlock_stat.m_read_lock_stat.aggregate_value(locked_time);
3284 }
3285 #else
3286 (void) last_reader;
3287 (void) last_writer;
3288 #endif
3289 }
3290
3291 /**
3292 Implementation of the cond instrumentation interface.
3293 @sa PSI_v1::signal_cond.
3294 */
signal_cond_v1(PSI_cond * cond)3295 static void signal_cond_v1(PSI_cond* cond)
3296 {
3297 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3298
3299 DBUG_ASSERT(pfs_cond != NULL);
3300
3301 pfs_cond->m_cond_stat.m_signal_count++;
3302 }
3303
3304 /**
3305 Implementation of the cond instrumentation interface.
3306 @sa PSI_v1::broadcast_cond.
3307 */
broadcast_cond_v1(PSI_cond * cond)3308 static void broadcast_cond_v1(PSI_cond* cond)
3309 {
3310 PFS_cond *pfs_cond= reinterpret_cast<PFS_cond*> (cond);
3311
3312 DBUG_ASSERT(pfs_cond != NULL);
3313
3314 pfs_cond->m_cond_stat.m_broadcast_count++;
3315 }
3316
3317 /**
3318 Implementation of the idle instrumentation interface.
3319 @sa PSI_v1::start_idle_wait.
3320 */
3321 static PSI_idle_locker*
start_idle_wait_v1(PSI_idle_locker_state * state,const char * src_file,uint src_line)3322 start_idle_wait_v1(PSI_idle_locker_state* state, const char *src_file, uint src_line)
3323 {
3324 DBUG_ASSERT(state != NULL);
3325
3326 if (psi_unlikely(! flag_global_instrumentation))
3327 return NULL;
3328
3329 if (!global_idle_class.m_enabled)
3330 return NULL;
3331
3332 uint flags= 0;
3333 ulonglong timer_start= 0;
3334
3335 if (flag_thread_instrumentation)
3336 {
3337 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
3338 if (unlikely(pfs_thread == NULL))
3339 return NULL;
3340 if (!pfs_thread->m_enabled)
3341 return NULL;
3342 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
3343 flags= STATE_FLAG_THREAD;
3344
3345 DBUG_ASSERT(pfs_thread->m_events_statements_count == 0);
3346
3347 if (global_idle_class.m_timed)
3348 {
3349 timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3350 state->m_timer_start= timer_start;
3351 flags|= STATE_FLAG_TIMED;
3352 }
3353
3354 if (flag_events_waits_current)
3355 {
3356 if (unlikely(pfs_thread->m_events_waits_current >=
3357 & pfs_thread->m_events_waits_stack[WAIT_STACK_SIZE]))
3358 {
3359 locker_lost++;
3360 return NULL;
3361 }
3362 PFS_events_waits *wait= pfs_thread->m_events_waits_current;
3363 state->m_wait= wait;
3364 flags|= STATE_FLAG_EVENT;
3365
3366 wait->m_event_type= EVENT_TYPE_WAIT;
3367 /*
3368 IDLE events are waits, but by definition we know that
3369 such waits happen outside of any STAGE and STATEMENT,
3370 so they have no parents.
3371 */
3372 wait->m_nesting_event_id= 0;
3373 /* no need to set wait->m_nesting_event_type */
3374
3375 wait->m_thread= pfs_thread;
3376 wait->m_class= &global_idle_class;
3377 wait->m_timer_start= timer_start;
3378 wait->m_timer_end= 0;
3379 wait->m_event_id= pfs_thread->m_event_id++;
3380 wait->m_end_event_id= 0;
3381 wait->m_operation= OPERATION_TYPE_IDLE;
3382 wait->m_source_file= src_file;
3383 wait->m_source_line= src_line;
3384 wait->m_wait_class= WAIT_CLASS_IDLE;
3385
3386 pfs_thread->m_events_waits_current++;
3387 }
3388 }
3389 else
3390 {
3391 if (global_idle_class.m_timed)
3392 {
3393 timer_start= get_timer_raw_value_and_function(idle_timer, &state->m_timer);
3394 state->m_timer_start= timer_start;
3395 flags= STATE_FLAG_TIMED;
3396 }
3397 }
3398
3399 state->m_flags= flags;
3400 return reinterpret_cast<PSI_idle_locker*> (state);
3401 }
3402
3403 /**
3404 Implementation of the mutex instrumentation interface.
3405 @sa PSI_v1::end_idle_wait.
3406 */
end_idle_wait_v1(PSI_idle_locker * locker)3407 static void end_idle_wait_v1(PSI_idle_locker* locker)
3408 {
3409 PSI_idle_locker_state *state= reinterpret_cast<PSI_idle_locker_state*> (locker);
3410 DBUG_ASSERT(state != NULL);
3411 ulonglong timer_end= 0;
3412 ulonglong wait_time= 0;
3413
3414 uint flags= state->m_flags;
3415
3416 if (flags & STATE_FLAG_TIMED)
3417 {
3418 timer_end= state->m_timer();
3419 wait_time= timer_end - state->m_timer_start;
3420 }
3421
3422 if (flags & STATE_FLAG_THREAD)
3423 {
3424 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3425 PFS_single_stat *event_name_array;
3426 event_name_array= thread->m_instr_class_waits_stats;
3427
3428 if (flags & STATE_FLAG_TIMED)
3429 {
3430 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3431 event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_value(wait_time);
3432 }
3433 else
3434 {
3435 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3436 event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_counted();
3437 }
3438
3439 if (flags & STATE_FLAG_EVENT)
3440 {
3441 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3442 DBUG_ASSERT(wait != NULL);
3443
3444 wait->m_timer_end= timer_end;
3445 wait->m_end_event_id= thread->m_event_id;
3446 if (flag_events_waits_history)
3447 insert_events_waits_history(thread, wait);
3448 if (flag_events_waits_history_long)
3449 insert_events_waits_history_long(wait);
3450 thread->m_events_waits_current--;
3451
3452 DBUG_ASSERT(wait == thread->m_events_waits_current);
3453 }
3454 }
3455
3456 if (flags & STATE_FLAG_TIMED)
3457 {
3458 /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (timed) */
3459 global_idle_stat.aggregate_value(wait_time);
3460 }
3461 else
3462 {
3463 /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (counted) */
3464 global_idle_stat.aggregate_counted();
3465 }
3466 }
3467
3468 /**
3469 Implementation of the mutex instrumentation interface.
3470 @sa PSI_v1::end_mutex_wait.
3471 */
end_mutex_wait_v1(PSI_mutex_locker * locker,int rc)3472 static void end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
3473 {
3474 PSI_mutex_locker_state *state= reinterpret_cast<PSI_mutex_locker_state*> (locker);
3475 DBUG_ASSERT(state != NULL);
3476
3477 ulonglong timer_end= 0;
3478 ulonglong wait_time= 0;
3479
3480 PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex);
3481 DBUG_ASSERT(mutex != NULL);
3482 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3483
3484 uint flags= state->m_flags;
3485
3486 if (flags & STATE_FLAG_TIMED)
3487 {
3488 timer_end= state->m_timer();
3489 wait_time= timer_end - state->m_timer_start;
3490 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3491 mutex->m_mutex_stat.m_wait_stat.aggregate_value(wait_time);
3492 }
3493 else
3494 {
3495 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3496 mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
3497 }
3498
3499 if (likely(rc == 0))
3500 {
3501 mutex->m_owner= thread;
3502 mutex->m_last_locked= timer_end;
3503 }
3504
3505 if (flags & STATE_FLAG_THREAD)
3506 {
3507 PFS_single_stat *event_name_array;
3508 event_name_array= thread->m_instr_class_waits_stats;
3509 uint index= mutex->m_class->m_event_name_index;
3510
3511 if (flags & STATE_FLAG_TIMED)
3512 {
3513 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3514 event_name_array[index].aggregate_value(wait_time);
3515 }
3516 else
3517 {
3518 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3519 event_name_array[index].aggregate_counted();
3520 }
3521
3522 if (flags & STATE_FLAG_EVENT)
3523 {
3524 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3525 DBUG_ASSERT(wait != NULL);
3526
3527 wait->m_timer_end= timer_end;
3528 wait->m_end_event_id= thread->m_event_id;
3529 if (flag_events_waits_history)
3530 insert_events_waits_history(thread, wait);
3531 if (flag_events_waits_history_long)
3532 insert_events_waits_history_long(wait);
3533 thread->m_events_waits_current--;
3534
3535 DBUG_ASSERT(wait == thread->m_events_waits_current);
3536 }
3537 }
3538 }
3539
3540 /**
3541 Implementation of the rwlock instrumentation interface.
3542 @sa PSI_v1::end_rwlock_rdwait.
3543 */
end_rwlock_rdwait_v1(PSI_rwlock_locker * locker,int rc)3544 static void end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
3545 {
3546 PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3547 DBUG_ASSERT(state != NULL);
3548
3549 ulonglong timer_end= 0;
3550 ulonglong wait_time= 0;
3551
3552 PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3553 DBUG_ASSERT(rwlock != NULL);
3554
3555 if (state->m_flags & STATE_FLAG_TIMED)
3556 {
3557 timer_end= state->m_timer();
3558 wait_time= timer_end - state->m_timer_start;
3559 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3560 rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3561 }
3562 else
3563 {
3564 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3565 rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3566 }
3567
3568 if (rc == 0)
3569 {
3570 /*
3571 Warning:
3572 Multiple threads can execute this section concurrently
3573 (since multiple readers can execute in parallel).
3574 The statistics generated are not safe, which is why they are
3575 just statistics, not facts.
3576 */
3577 if (rwlock->m_readers == 0)
3578 rwlock->m_last_read= timer_end;
3579 rwlock->m_writer= NULL;
3580 rwlock->m_readers++;
3581 }
3582
3583 if (state->m_flags & STATE_FLAG_THREAD)
3584 {
3585 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3586 DBUG_ASSERT(thread != NULL);
3587
3588 PFS_single_stat *event_name_array;
3589 event_name_array= thread->m_instr_class_waits_stats;
3590 uint index= rwlock->m_class->m_event_name_index;
3591
3592 if (state->m_flags & STATE_FLAG_TIMED)
3593 {
3594 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3595 event_name_array[index].aggregate_value(wait_time);
3596 }
3597 else
3598 {
3599 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3600 event_name_array[index].aggregate_counted();
3601 }
3602
3603 if (state->m_flags & STATE_FLAG_EVENT)
3604 {
3605 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3606 DBUG_ASSERT(wait != NULL);
3607
3608 wait->m_timer_end= timer_end;
3609 wait->m_end_event_id= thread->m_event_id;
3610 if (flag_events_waits_history)
3611 insert_events_waits_history(thread, wait);
3612 if (flag_events_waits_history_long)
3613 insert_events_waits_history_long(wait);
3614 thread->m_events_waits_current--;
3615
3616 DBUG_ASSERT(wait == thread->m_events_waits_current);
3617 }
3618 }
3619 }
3620
3621 /**
3622 Implementation of the rwlock instrumentation interface.
3623 @sa PSI_v1::end_rwlock_wrwait.
3624 */
end_rwlock_wrwait_v1(PSI_rwlock_locker * locker,int rc)3625 static void end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
3626 {
3627 PSI_rwlock_locker_state *state= reinterpret_cast<PSI_rwlock_locker_state*> (locker);
3628 DBUG_ASSERT(state != NULL);
3629
3630 ulonglong timer_end= 0;
3631 ulonglong wait_time= 0;
3632
3633 PFS_rwlock *rwlock= reinterpret_cast<PFS_rwlock *> (state->m_rwlock);
3634 DBUG_ASSERT(rwlock != NULL);
3635 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3636
3637 if (state->m_flags & STATE_FLAG_TIMED)
3638 {
3639 timer_end= state->m_timer();
3640 wait_time= timer_end - state->m_timer_start;
3641 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3642 rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
3643 }
3644 else
3645 {
3646 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3647 rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
3648 }
3649
3650 if (likely(rc == 0))
3651 {
3652 /* Thread safe : we are protected by the instrumented rwlock */
3653 rwlock->m_writer= thread;
3654 rwlock->m_last_written= timer_end;
3655 /* Reset the readers stats, they could be off */
3656 rwlock->m_readers= 0;
3657 rwlock->m_last_read= 0;
3658 }
3659
3660 if (state->m_flags & STATE_FLAG_THREAD)
3661 {
3662 PFS_single_stat *event_name_array;
3663 event_name_array= thread->m_instr_class_waits_stats;
3664 uint index= rwlock->m_class->m_event_name_index;
3665
3666 if (state->m_flags & STATE_FLAG_TIMED)
3667 {
3668 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3669 event_name_array[index].aggregate_value(wait_time);
3670 }
3671 else
3672 {
3673 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3674 event_name_array[index].aggregate_counted();
3675 }
3676
3677 if (state->m_flags & STATE_FLAG_EVENT)
3678 {
3679 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3680 DBUG_ASSERT(wait != NULL);
3681
3682 wait->m_timer_end= timer_end;
3683 wait->m_end_event_id= thread->m_event_id;
3684 if (flag_events_waits_history)
3685 insert_events_waits_history(thread, wait);
3686 if (flag_events_waits_history_long)
3687 insert_events_waits_history_long(wait);
3688 thread->m_events_waits_current--;
3689
3690 DBUG_ASSERT(wait == thread->m_events_waits_current);
3691 }
3692 }
3693 }
3694
3695 /**
3696 Implementation of the cond instrumentation interface.
3697 @sa PSI_v1::end_cond_wait.
3698 */
end_cond_wait_v1(PSI_cond_locker * locker,int rc)3699 static void end_cond_wait_v1(PSI_cond_locker* locker, int rc)
3700 {
3701 PSI_cond_locker_state *state= reinterpret_cast<PSI_cond_locker_state*> (locker);
3702 DBUG_ASSERT(state != NULL);
3703
3704 ulonglong timer_end= 0;
3705 ulonglong wait_time= 0;
3706
3707 PFS_cond *cond= reinterpret_cast<PFS_cond *> (state->m_cond);
3708 /* PFS_mutex *mutex= reinterpret_cast<PFS_mutex *> (state->m_mutex); */
3709
3710 if (state->m_flags & STATE_FLAG_TIMED)
3711 {
3712 timer_end= state->m_timer();
3713 wait_time= timer_end - state->m_timer_start;
3714 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
3715 cond->m_cond_stat.m_wait_stat.aggregate_value(wait_time);
3716 }
3717 else
3718 {
3719 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
3720 cond->m_cond_stat.m_wait_stat.aggregate_counted();
3721 }
3722
3723 if (state->m_flags & STATE_FLAG_THREAD)
3724 {
3725 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3726 DBUG_ASSERT(thread != NULL);
3727
3728 PFS_single_stat *event_name_array;
3729 event_name_array= thread->m_instr_class_waits_stats;
3730 uint index= cond->m_class->m_event_name_index;
3731
3732 if (state->m_flags & STATE_FLAG_TIMED)
3733 {
3734 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
3735 event_name_array[index].aggregate_value(wait_time);
3736 }
3737 else
3738 {
3739 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
3740 event_name_array[index].aggregate_counted();
3741 }
3742
3743 if (state->m_flags & STATE_FLAG_EVENT)
3744 {
3745 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3746 DBUG_ASSERT(wait != NULL);
3747
3748 wait->m_timer_end= timer_end;
3749 wait->m_end_event_id= thread->m_event_id;
3750 if (flag_events_waits_history)
3751 insert_events_waits_history(thread, wait);
3752 if (flag_events_waits_history_long)
3753 insert_events_waits_history_long(wait);
3754 thread->m_events_waits_current--;
3755
3756 DBUG_ASSERT(wait == thread->m_events_waits_current);
3757 }
3758 }
3759 }
3760
3761 /**
3762 Implementation of the table instrumentation interface.
3763 @sa PSI_v1::end_table_io_wait.
3764 */
end_table_io_wait_v1(PSI_table_locker * locker)3765 static void end_table_io_wait_v1(PSI_table_locker* locker)
3766 {
3767 PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3768 DBUG_ASSERT(state != NULL);
3769
3770 ulonglong timer_end= 0;
3771 ulonglong wait_time= 0;
3772
3773 PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3774 DBUG_ASSERT(table != NULL);
3775
3776 PFS_single_stat *stat;
3777 PFS_table_io_stat *table_io_stat;
3778
3779 DBUG_ASSERT((state->m_index < table->m_share->m_key_count) ||
3780 (state->m_index == MAX_INDEXES));
3781
3782 table_io_stat= & table->m_table_stat.m_index_stat[state->m_index];
3783 table_io_stat->m_has_data= true;
3784
3785 switch (state->m_io_operation)
3786 {
3787 case PSI_TABLE_FETCH_ROW:
3788 stat= & table_io_stat->m_fetch;
3789 break;
3790 case PSI_TABLE_WRITE_ROW:
3791 stat= & table_io_stat->m_insert;
3792 break;
3793 case PSI_TABLE_UPDATE_ROW:
3794 stat= & table_io_stat->m_update;
3795 break;
3796 case PSI_TABLE_DELETE_ROW:
3797 stat= & table_io_stat->m_delete;
3798 break;
3799 default:
3800 DBUG_ASSERT(false);
3801 stat= NULL;
3802 break;
3803 }
3804
3805 uint flags= state->m_flags;
3806
3807 if (flags & STATE_FLAG_TIMED)
3808 {
3809 timer_end= state->m_timer();
3810 wait_time= timer_end - state->m_timer_start;
3811 stat->aggregate_value(wait_time);
3812 }
3813 else
3814 {
3815 stat->aggregate_counted();
3816 }
3817
3818 if (flags & STATE_FLAG_THREAD)
3819 {
3820 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3821 DBUG_ASSERT(thread != NULL);
3822
3823 PFS_single_stat *event_name_array;
3824 event_name_array= thread->m_instr_class_waits_stats;
3825
3826 /*
3827 Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3828 (for wait/io/table/sql/handler)
3829 */
3830 if (flags & STATE_FLAG_TIMED)
3831 {
3832 event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_value(wait_time);
3833 }
3834 else
3835 {
3836 event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_counted();
3837 }
3838
3839 if (flags & STATE_FLAG_EVENT)
3840 {
3841 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3842 DBUG_ASSERT(wait != NULL);
3843
3844 wait->m_timer_end= timer_end;
3845 wait->m_end_event_id= thread->m_event_id;
3846 if (flag_events_waits_history)
3847 insert_events_waits_history(thread, wait);
3848 if (flag_events_waits_history_long)
3849 insert_events_waits_history_long(wait);
3850 thread->m_events_waits_current--;
3851
3852 DBUG_ASSERT(wait == thread->m_events_waits_current);
3853 }
3854 }
3855
3856 table->m_has_io_stats= true;
3857 }
3858
3859 /**
3860 Implementation of the table instrumentation interface.
3861 @sa PSI_v1::end_table_lock_wait.
3862 */
end_table_lock_wait_v1(PSI_table_locker * locker)3863 static void end_table_lock_wait_v1(PSI_table_locker* locker)
3864 {
3865 PSI_table_locker_state *state= reinterpret_cast<PSI_table_locker_state*> (locker);
3866 DBUG_ASSERT(state != NULL);
3867
3868 ulonglong timer_end= 0;
3869 ulonglong wait_time= 0;
3870
3871 PFS_table *table= reinterpret_cast<PFS_table *> (state->m_table);
3872 DBUG_ASSERT(table != NULL);
3873
3874 PFS_single_stat *stat= & table->m_table_stat.m_lock_stat.m_stat[state->m_index];
3875
3876 uint flags= state->m_flags;
3877
3878 if (flags & STATE_FLAG_TIMED)
3879 {
3880 timer_end= state->m_timer();
3881 wait_time= timer_end - state->m_timer_start;
3882 stat->aggregate_value(wait_time);
3883 }
3884 else
3885 {
3886 stat->aggregate_counted();
3887 }
3888
3889 if (flags & STATE_FLAG_THREAD)
3890 {
3891 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
3892 DBUG_ASSERT(thread != NULL);
3893
3894 PFS_single_stat *event_name_array;
3895 event_name_array= thread->m_instr_class_waits_stats;
3896
3897 /*
3898 Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
3899 (for wait/lock/table/sql/handler)
3900 */
3901 if (flags & STATE_FLAG_TIMED)
3902 {
3903 event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_value(wait_time);
3904 }
3905 else
3906 {
3907 event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_counted();
3908 }
3909
3910 if (flags & STATE_FLAG_EVENT)
3911 {
3912 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
3913 DBUG_ASSERT(wait != NULL);
3914
3915 wait->m_timer_end= timer_end;
3916 wait->m_end_event_id= thread->m_event_id;
3917 if (flag_events_waits_history)
3918 insert_events_waits_history(thread, wait);
3919 if (flag_events_waits_history_long)
3920 insert_events_waits_history_long(wait);
3921 thread->m_events_waits_current--;
3922
3923 DBUG_ASSERT(wait == thread->m_events_waits_current);
3924 }
3925 }
3926
3927 table->m_has_lock_stats= true;
3928 }
3929
3930 static void start_file_wait_v1(PSI_file_locker *locker,
3931 size_t count,
3932 const char *src_file,
3933 uint src_line);
3934
3935 static void end_file_wait_v1(PSI_file_locker *locker,
3936 size_t count);
3937
3938 /**
3939 Implementation of the file instrumentation interface.
3940 @sa PSI_v1::start_file_open_wait.
3941 */
start_file_open_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)3942 static void start_file_open_wait_v1(PSI_file_locker *locker,
3943 const char *src_file,
3944 uint src_line)
3945 {
3946 start_file_wait_v1(locker, 0, src_file, src_line);
3947
3948 return;
3949 }
3950
3951 /**
3952 Implementation of the file instrumentation interface.
3953 @sa PSI_v1::end_file_open_wait.
3954 */
end_file_open_wait_v1(PSI_file_locker * locker,void * result)3955 static PSI_file* end_file_open_wait_v1(PSI_file_locker *locker,
3956 void *result)
3957 {
3958 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3959 DBUG_ASSERT(state != NULL);
3960
3961 switch (state->m_operation)
3962 {
3963 case PSI_FILE_STAT:
3964 case PSI_FILE_RENAME:
3965 break;
3966 case PSI_FILE_STREAM_OPEN:
3967 case PSI_FILE_CREATE:
3968 case PSI_FILE_OPEN:
3969 if (result != NULL)
3970 {
3971 PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
3972 PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
3973 const char *name= state->m_name;
3974 uint len= (uint)strlen(name);
3975 PFS_file *pfs_file= find_or_create_file(thread, klass, name, len, true);
3976 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
3977 }
3978 break;
3979 default:
3980 DBUG_ASSERT(false);
3981 break;
3982 }
3983
3984 end_file_wait_v1(locker, 0);
3985
3986 return state->m_file;
3987 }
3988
3989 /**
3990 Implementation of the file instrumentation interface.
3991 @sa PSI_v1::end_file_open_wait_and_bind_to_descriptor.
3992 */
end_file_open_wait_and_bind_to_descriptor_v1(PSI_file_locker * locker,File file)3993 static void end_file_open_wait_and_bind_to_descriptor_v1
3994 (PSI_file_locker *locker, File file)
3995 {
3996 PFS_file *pfs_file= NULL;
3997 int index= (int) file;
3998 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
3999 DBUG_ASSERT(state != NULL);
4000
4001 if (index >= 0)
4002 {
4003 PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
4004 PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4005 const char *name= state->m_name;
4006 uint len= (uint)strlen(name);
4007 pfs_file= find_or_create_file(thread, klass, name, len, true);
4008 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4009 }
4010
4011 end_file_wait_v1(locker, 0);
4012
4013 if (likely(index >= 0))
4014 {
4015 if (likely(index < file_handle_max))
4016 file_handle_array[index]= pfs_file;
4017 else
4018 {
4019 if (pfs_file != NULL)
4020 release_file(pfs_file);
4021 file_handle_lost++;
4022 }
4023 }
4024 }
4025
4026 /**
4027 Implementation of the file instrumentation interface.
4028 @sa PSI_v1::start_file_wait.
4029 */
start_file_wait_v1(PSI_file_locker * locker,size_t count,const char * src_file,uint src_line)4030 static void start_file_wait_v1(PSI_file_locker *locker,
4031 size_t count,
4032 const char *src_file,
4033 uint src_line)
4034 {
4035 ulonglong timer_start= 0;
4036 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4037 DBUG_ASSERT(state != NULL);
4038
4039 uint flags= state->m_flags;
4040
4041 if (flags & STATE_FLAG_TIMED)
4042 {
4043 timer_start= get_timer_raw_value_and_function(wait_timer, & state->m_timer);
4044 state->m_timer_start= timer_start;
4045 }
4046
4047 if (flags & STATE_FLAG_EVENT)
4048 {
4049 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4050 DBUG_ASSERT(wait != NULL);
4051
4052 wait->m_timer_start= timer_start;
4053 wait->m_source_file= src_file;
4054 wait->m_source_line= src_line;
4055 wait->m_number_of_bytes= count;
4056 }
4057 }
4058
4059 /**
4060 Implementation of the file instrumentation interface.
4061 @sa PSI_v1::end_file_wait.
4062 */
end_file_wait_v1(PSI_file_locker * locker,size_t byte_count)4063 static void end_file_wait_v1(PSI_file_locker *locker,
4064 size_t byte_count)
4065 {
4066 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4067 DBUG_ASSERT(state != NULL);
4068 PFS_file *file= reinterpret_cast<PFS_file *> (state->m_file);
4069 PFS_file_class *klass= reinterpret_cast<PFS_file_class *> (state->m_class);
4070 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4071
4072 ulonglong timer_end= 0;
4073 ulonglong wait_time= 0;
4074 PFS_byte_stat *byte_stat;
4075 uint flags= state->m_flags;
4076 size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
4077
4078 PFS_file_stat *file_stat;
4079
4080 if (file != NULL)
4081 {
4082 file_stat= & file->m_file_stat;
4083 }
4084 else
4085 {
4086 file_stat= & klass->m_file_stat;
4087 }
4088
4089 switch (state->m_operation)
4090 {
4091 /* Group read operations */
4092 case PSI_FILE_READ:
4093 byte_stat= &file_stat->m_io_stat.m_read;
4094 break;
4095 /* Group write operations */
4096 case PSI_FILE_WRITE:
4097 byte_stat= &file_stat->m_io_stat.m_write;
4098 break;
4099 /* Group remaining operations as miscellaneous */
4100 case PSI_FILE_CREATE:
4101 case PSI_FILE_CREATE_TMP:
4102 case PSI_FILE_OPEN:
4103 case PSI_FILE_STREAM_OPEN:
4104 case PSI_FILE_STREAM_CLOSE:
4105 case PSI_FILE_SEEK:
4106 case PSI_FILE_TELL:
4107 case PSI_FILE_FLUSH:
4108 case PSI_FILE_FSTAT:
4109 case PSI_FILE_CHSIZE:
4110 case PSI_FILE_DELETE:
4111 case PSI_FILE_RENAME:
4112 case PSI_FILE_SYNC:
4113 case PSI_FILE_STAT:
4114 case PSI_FILE_CLOSE:
4115 byte_stat= &file_stat->m_io_stat.m_misc;
4116 break;
4117 default:
4118 DBUG_ASSERT(false);
4119 byte_stat= NULL;
4120 break;
4121 }
4122
4123 /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
4124 if (flags & STATE_FLAG_TIMED)
4125 {
4126 timer_end= state->m_timer();
4127 wait_time= timer_end - state->m_timer_start;
4128 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
4129 byte_stat->aggregate(wait_time, bytes);
4130 }
4131 else
4132 {
4133 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
4134 byte_stat->aggregate_counted(bytes);
4135 }
4136
4137 if (flags & STATE_FLAG_THREAD)
4138 {
4139 DBUG_ASSERT(thread != NULL);
4140
4141 PFS_single_stat *event_name_array;
4142 event_name_array= thread->m_instr_class_waits_stats;
4143 uint index= klass->m_event_name_index;
4144
4145 if (flags & STATE_FLAG_TIMED)
4146 {
4147 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4148 event_name_array[index].aggregate_value(wait_time);
4149 }
4150 else
4151 {
4152 /* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4153 event_name_array[index].aggregate_counted();
4154 }
4155
4156 if (state->m_flags & STATE_FLAG_EVENT)
4157 {
4158 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
4159 DBUG_ASSERT(wait != NULL);
4160
4161 wait->m_timer_end= timer_end;
4162 wait->m_number_of_bytes= bytes;
4163 wait->m_end_event_id= thread->m_event_id;
4164 wait->m_object_instance_addr= file;
4165 wait->m_weak_file= file;
4166 wait->m_weak_version= (file ? file->get_version() : 0);
4167
4168 if (flag_events_waits_history)
4169 insert_events_waits_history(thread, wait);
4170 if (flag_events_waits_history_long)
4171 insert_events_waits_history_long(wait);
4172 thread->m_events_waits_current--;
4173
4174 DBUG_ASSERT(wait == thread->m_events_waits_current);
4175 }
4176 }
4177 }
4178
4179 /**
4180 Implementation of the file instrumentation interface.
4181 @sa PSI_v1::start_file_close_wait.
4182 */
start_file_close_wait_v1(PSI_file_locker * locker,const char * src_file,uint src_line)4183 static void start_file_close_wait_v1(PSI_file_locker *locker,
4184 const char *src_file,
4185 uint src_line)
4186 {
4187 PFS_thread *thread;
4188 const char *name;
4189 uint len;
4190 PFS_file *pfs_file;
4191 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4192 DBUG_ASSERT(state != NULL);
4193
4194 switch (state->m_operation)
4195 {
4196 case PSI_FILE_DELETE:
4197 thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4198 name= state->m_name;
4199 len= (uint)strlen(name);
4200 pfs_file= find_or_create_file(thread, NULL, name, len, false);
4201 state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
4202 break;
4203 case PSI_FILE_STREAM_CLOSE:
4204 case PSI_FILE_CLOSE:
4205 break;
4206 default:
4207 DBUG_ASSERT(false);
4208 break;
4209 }
4210
4211 start_file_wait_v1(locker, 0, src_file, src_line);
4212
4213 return;
4214 }
4215
4216 /**
4217 Implementation of the file instrumentation interface.
4218 @sa PSI_v1::end_file_close_wait.
4219 */
end_file_close_wait_v1(PSI_file_locker * locker,int rc)4220 static void end_file_close_wait_v1(PSI_file_locker *locker, int rc)
4221 {
4222 PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
4223 DBUG_ASSERT(state != NULL);
4224
4225 end_file_wait_v1(locker, 0);
4226
4227 if (rc == 0)
4228 {
4229 PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
4230 PFS_file *file= reinterpret_cast<PFS_file*> (state->m_file);
4231
4232 /* Release or destroy the file if necessary */
4233 switch(state->m_operation)
4234 {
4235 case PSI_FILE_CLOSE:
4236 case PSI_FILE_STREAM_CLOSE:
4237 if (file != NULL)
4238 release_file(file);
4239 break;
4240 case PSI_FILE_DELETE:
4241 if (file != NULL)
4242 destroy_file(thread, file);
4243 break;
4244 default:
4245 DBUG_ASSERT(false);
4246 break;
4247 }
4248 }
4249 return;
4250 }
4251
start_stage_v1(PSI_stage_key key,const char * src_file,int src_line)4252 static void start_stage_v1(PSI_stage_key key, const char *src_file, int src_line)
4253 {
4254 ulonglong timer_value= 0;
4255
4256 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4257 if (unlikely(pfs_thread == NULL))
4258 return;
4259
4260 /* Always update column threads.processlist_state. */
4261 pfs_thread->m_stage= key;
4262
4263 if (psi_unlikely(! flag_global_instrumentation))
4264 return;
4265
4266 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4267 return;
4268
4269 PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4270 PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4271 PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4272
4273 PFS_instr_class *old_class= pfs->m_class;
4274 if (old_class != NULL)
4275 {
4276 PFS_stage_stat *event_name_array;
4277 event_name_array= pfs_thread->m_instr_class_stages_stats;
4278 uint index= old_class->m_event_name_index;
4279
4280 /* Finish old event */
4281 if (old_class->m_timed)
4282 {
4283 timer_value= get_timer_raw_value(stage_timer);;
4284 pfs->m_timer_end= timer_value;
4285
4286 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4287 ulonglong stage_time= timer_value - pfs->m_timer_start;
4288 event_name_array[index].aggregate_value(stage_time);
4289 }
4290 else
4291 {
4292 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4293 event_name_array[index].aggregate_counted();
4294 }
4295
4296 if (flag_events_stages_current)
4297 {
4298 pfs->m_end_event_id= pfs_thread->m_event_id;
4299 if (flag_events_stages_history)
4300 insert_events_stages_history(pfs_thread, pfs);
4301 if (flag_events_stages_history_long)
4302 insert_events_stages_history_long(pfs);
4303 }
4304
4305 /* This stage event is now complete. */
4306 pfs->m_class= NULL;
4307
4308 /* New waits will now be attached directly to the parent statement. */
4309 child_wait->m_event_id= parent_statement->m_event_id;
4310 child_wait->m_event_type= parent_statement->m_event_type;
4311 /* See below for new stages, that may overwrite this. */
4312 }
4313
4314 /* Start new event */
4315
4316 PFS_stage_class *new_klass= find_stage_class(key);
4317 if (unlikely(new_klass == NULL))
4318 return;
4319
4320 if (! new_klass->m_enabled)
4321 return;
4322
4323 pfs->m_class= new_klass;
4324 if (new_klass->m_timed)
4325 {
4326 /*
4327 Do not call the timer again if we have a
4328 TIMER_END for the previous stage already.
4329 */
4330 if (timer_value == 0)
4331 timer_value= get_timer_raw_value(stage_timer);
4332 pfs->m_timer_start= timer_value;
4333 }
4334 else
4335 pfs->m_timer_start= 0;
4336 pfs->m_timer_end= 0;
4337
4338 if (flag_events_stages_current)
4339 {
4340 /* m_thread_internal_id is immutable and already set */
4341 DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4342 pfs->m_event_id= pfs_thread->m_event_id++;
4343 pfs->m_end_event_id= 0;
4344 pfs->m_source_file= src_file;
4345 pfs->m_source_line= src_line;
4346
4347 /* New wait events will have this new stage as parent. */
4348 child_wait->m_event_id= pfs->m_event_id;
4349 child_wait->m_event_type= EVENT_TYPE_STAGE;
4350 }
4351 }
4352
end_stage_v1()4353 static void end_stage_v1()
4354 {
4355 ulonglong timer_value= 0;
4356
4357 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4358 if (unlikely(pfs_thread == NULL))
4359 return;
4360
4361 pfs_thread->m_stage= 0;
4362
4363 if (psi_unlikely(! flag_global_instrumentation))
4364 return;
4365
4366 if (flag_thread_instrumentation && ! pfs_thread->m_enabled)
4367 return;
4368
4369 PFS_events_stages *pfs= & pfs_thread->m_stage_current;
4370
4371 PFS_instr_class *old_class= pfs->m_class;
4372 if (old_class != NULL)
4373 {
4374 PFS_stage_stat *event_name_array;
4375 event_name_array= pfs_thread->m_instr_class_stages_stats;
4376 uint index= old_class->m_event_name_index;
4377
4378 /* Finish old event */
4379 if (old_class->m_timed)
4380 {
4381 timer_value= get_timer_raw_value(stage_timer);;
4382 pfs->m_timer_end= timer_value;
4383
4384 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
4385 ulonglong stage_time= timer_value - pfs->m_timer_start;
4386 event_name_array[index].aggregate_value(stage_time);
4387 }
4388 else
4389 {
4390 /* Aggregate to EVENTS_STAGES_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
4391 event_name_array[index].aggregate_counted();
4392 }
4393
4394 if (flag_events_stages_current)
4395 {
4396 pfs->m_end_event_id= pfs_thread->m_event_id;
4397 if (flag_events_stages_history)
4398 insert_events_stages_history(pfs_thread, pfs);
4399 if (flag_events_stages_history_long)
4400 insert_events_stages_history_long(pfs);
4401 }
4402
4403 /* New waits will now be attached directly to the parent statement. */
4404 PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4405 PFS_events_statements *parent_statement= & pfs_thread->m_statement_stack[0];
4406 child_wait->m_event_id= parent_statement->m_event_id;
4407 child_wait->m_event_type= parent_statement->m_event_type;
4408
4409 /* This stage is completed */
4410 pfs->m_class= NULL;
4411 }
4412 }
4413
4414 static PSI_statement_locker*
get_thread_statement_locker_v1(PSI_statement_locker_state * state,PSI_statement_key key,const void * charset)4415 get_thread_statement_locker_v1(PSI_statement_locker_state *state,
4416 PSI_statement_key key,
4417 const void *charset)
4418 {
4419 DBUG_ASSERT(state != NULL);
4420 DBUG_ASSERT(charset != NULL);
4421
4422 if (psi_unlikely(! flag_global_instrumentation))
4423 return NULL;
4424 PFS_statement_class *klass= find_statement_class(key);
4425 if (unlikely(klass == NULL))
4426 return NULL;
4427 if (! klass->m_enabled)
4428 return NULL;
4429
4430 uint flags;
4431
4432 if (flag_thread_instrumentation)
4433 {
4434 PFS_thread *pfs_thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4435 if (unlikely(pfs_thread == NULL))
4436 return NULL;
4437 if (! pfs_thread->m_enabled)
4438 return NULL;
4439 state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
4440 flags= STATE_FLAG_THREAD;
4441
4442 if (klass->m_timed)
4443 flags|= STATE_FLAG_TIMED;
4444
4445 if (flag_events_statements_current)
4446 {
4447 ulonglong event_id= pfs_thread->m_event_id++;
4448
4449 if (pfs_thread->m_events_statements_count >= statement_stack_max)
4450 {
4451 return NULL;
4452 }
4453
4454 pfs_thread->m_stmt_lock.allocated_to_dirty();
4455 PFS_events_statements *pfs= & pfs_thread->m_statement_stack[pfs_thread->m_events_statements_count];
4456 /* m_thread_internal_id is immutable and already set */
4457 DBUG_ASSERT(pfs->m_thread_internal_id == pfs_thread->m_thread_internal_id);
4458 pfs->m_event_id= event_id;
4459 pfs->m_end_event_id= 0;
4460 pfs->m_class= klass;
4461 pfs->m_timer_start= 0;
4462 pfs->m_timer_end= 0;
4463 pfs->m_lock_time= 0;
4464 pfs->m_current_schema_name_length= 0;
4465 pfs->m_sqltext_length= 0;
4466 pfs->m_sqltext_truncated= false;
4467 pfs->m_sqltext_cs_number= system_charset_info->number; /* default */
4468
4469 pfs->m_message_text[0]= '\0';
4470 pfs->m_sql_errno= 0;
4471 pfs->m_sqlstate[0]= '\0';
4472 pfs->m_error_count= 0;
4473 pfs->m_warning_count= 0;
4474 pfs->m_rows_affected= 0;
4475
4476 pfs->m_rows_sent= 0;
4477 pfs->m_rows_examined= 0;
4478 pfs->m_created_tmp_disk_tables= 0;
4479 pfs->m_created_tmp_tables= 0;
4480 pfs->m_select_full_join= 0;
4481 pfs->m_select_full_range_join= 0;
4482 pfs->m_select_range= 0;
4483 pfs->m_select_range_check= 0;
4484 pfs->m_select_scan= 0;
4485 pfs->m_sort_merge_passes= 0;
4486 pfs->m_sort_range= 0;
4487 pfs->m_sort_rows= 0;
4488 pfs->m_sort_scan= 0;
4489 pfs->m_no_index_used= 0;
4490 pfs->m_no_good_index_used= 0;
4491 pfs->m_digest_storage.reset();
4492
4493 /* New stages will have this statement as parent */
4494 PFS_events_stages *child_stage= & pfs_thread->m_stage_current;
4495 child_stage->m_nesting_event_id= event_id;
4496 child_stage->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4497
4498 /* New waits will have this statement as parent, if no stage is instrumented */
4499 PFS_events_waits *child_wait= & pfs_thread->m_events_waits_stack[0];
4500 child_wait->m_nesting_event_id= event_id;
4501 child_wait->m_nesting_event_type= EVENT_TYPE_STATEMENT;
4502
4503 state->m_statement= pfs;
4504 flags|= STATE_FLAG_EVENT;
4505
4506 pfs_thread->m_events_statements_count++;
4507 pfs_thread->m_stmt_lock.dirty_to_allocated();
4508 }
4509 }
4510 else
4511 {
4512 if (klass->m_timed)
4513 flags= STATE_FLAG_TIMED;
4514 else
4515 flags= 0;
4516 }
4517
4518 if (flag_statements_digest)
4519 {
4520 flags|= STATE_FLAG_DIGEST;
4521 }
4522
4523 state->m_discarded= false;
4524 state->m_class= klass;
4525 state->m_flags= flags;
4526
4527 state->m_lock_time= 0;
4528 state->m_rows_sent= 0;
4529 state->m_rows_examined= 0;
4530 state->m_created_tmp_disk_tables= 0;
4531 state->m_created_tmp_tables= 0;
4532 state->m_select_full_join= 0;
4533 state->m_select_full_range_join= 0;
4534 state->m_select_range= 0;
4535 state->m_select_range_check= 0;
4536 state->m_select_scan= 0;
4537 state->m_sort_merge_passes= 0;
4538 state->m_sort_range= 0;
4539 state->m_sort_rows= 0;
4540 state->m_sort_scan= 0;
4541 state->m_no_index_used= 0;
4542 state->m_no_good_index_used= 0;
4543
4544 state->m_digest= NULL;
4545
4546 state->m_schema_name_length= 0;
4547 state->m_cs_number= ((CHARSET_INFO *)charset)->number;
4548
4549 return reinterpret_cast<PSI_statement_locker*> (state);
4550 }
4551
4552 static PSI_statement_locker*
refine_statement_v1(PSI_statement_locker * locker,PSI_statement_key key)4553 refine_statement_v1(PSI_statement_locker *locker,
4554 PSI_statement_key key)
4555 {
4556 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4557 if (state == NULL)
4558 return NULL;
4559 DBUG_ASSERT(state->m_class != NULL);
4560 PFS_statement_class *klass;
4561 /* Only refine statements for mutable instrumentation */
4562 klass= reinterpret_cast<PFS_statement_class*> (state->m_class);
4563 DBUG_ASSERT(klass->is_mutable());
4564 klass= find_statement_class(key);
4565
4566 uint flags= state->m_flags;
4567
4568 if (unlikely(klass == NULL) || !klass->m_enabled)
4569 {
4570 /* pop statement stack */
4571 if (flags & STATE_FLAG_THREAD)
4572 {
4573 PFS_thread *pfs_thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4574 DBUG_ASSERT(pfs_thread != NULL);
4575 if (pfs_thread->m_events_statements_count > 0)
4576 pfs_thread->m_events_statements_count--;
4577 }
4578
4579 state->m_discarded= true;
4580 return NULL;
4581 }
4582
4583 if ((flags & STATE_FLAG_TIMED) && ! klass->m_timed)
4584 flags= flags & ~STATE_FLAG_TIMED;
4585
4586 if (flags & STATE_FLAG_EVENT)
4587 {
4588 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4589 DBUG_ASSERT(pfs != NULL);
4590
4591 /* mutate EVENTS_STATEMENTS_CURRENT.EVENT_NAME */
4592 pfs->m_class= klass;
4593 }
4594
4595 state->m_class= klass;
4596 state->m_flags= flags;
4597 return reinterpret_cast<PSI_statement_locker*> (state);
4598 }
4599
start_statement_v1(PSI_statement_locker * locker,const char * db,uint db_len,const char * src_file,uint src_line)4600 static void start_statement_v1(PSI_statement_locker *locker,
4601 const char *db, uint db_len,
4602 const char *src_file, uint src_line)
4603 {
4604 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4605 DBUG_ASSERT(state != NULL);
4606
4607 uint flags= state->m_flags;
4608 ulonglong timer_start= 0;
4609
4610 if (flags & STATE_FLAG_TIMED)
4611 {
4612 timer_start= get_timer_raw_value_and_function(statement_timer, & state->m_timer);
4613 state->m_timer_start= timer_start;
4614 }
4615
4616 compile_time_assert(PSI_SCHEMA_NAME_LEN == NAME_LEN);
4617 DBUG_ASSERT(db_len <= sizeof(state->m_schema_name));
4618
4619 if (db_len > 0)
4620 memcpy(state->m_schema_name, db, db_len);
4621 state->m_schema_name_length= db_len;
4622
4623 if (flags & STATE_FLAG_EVENT)
4624 {
4625 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4626 DBUG_ASSERT(pfs != NULL);
4627
4628 pfs->m_timer_start= timer_start;
4629 pfs->m_source_file= src_file;
4630 pfs->m_source_line= src_line;
4631
4632 DBUG_ASSERT(db_len <= sizeof(pfs->m_current_schema_name));
4633 if (db_len > 0)
4634 memcpy(pfs->m_current_schema_name, db, db_len);
4635 pfs->m_current_schema_name_length= db_len;
4636 }
4637 }
4638
set_statement_text_v1(PSI_statement_locker * locker,const char * text,uint text_len)4639 static void set_statement_text_v1(PSI_statement_locker *locker,
4640 const char *text, uint text_len)
4641 {
4642 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4643 DBUG_ASSERT(state != NULL);
4644
4645 if (state->m_discarded)
4646 return;
4647
4648 if (state->m_flags & STATE_FLAG_EVENT)
4649 {
4650 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4651 DBUG_ASSERT(pfs != NULL);
4652 if (text_len > sizeof (pfs->m_sqltext))
4653 {
4654 text_len= sizeof(pfs->m_sqltext);
4655 pfs->m_sqltext_truncated= true;
4656 }
4657 if (text_len)
4658 memcpy(pfs->m_sqltext, text, text_len);
4659 pfs->m_sqltext_length= text_len;
4660 pfs->m_sqltext_cs_number= state->m_cs_number;
4661 }
4662
4663 return;
4664 }
4665
4666 #define SET_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE) \
4667 PSI_statement_locker_state *state; \
4668 state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER); \
4669 if (unlikely(state == NULL)) \
4670 return; \
4671 if (state->m_discarded) \
4672 return; \
4673 state->ATTR= VALUE; \
4674 if (state->m_flags & STATE_FLAG_EVENT) \
4675 { \
4676 PFS_events_statements *pfs; \
4677 pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4678 DBUG_ASSERT(pfs != NULL); \
4679 pfs->ATTR= VALUE; \
4680 } \
4681 return;
4682
4683 #define INC_STATEMENT_ATTR_BODY(LOCKER, ATTR, VALUE) \
4684 PSI_statement_locker_state *state; \
4685 state= reinterpret_cast<PSI_statement_locker_state*> (LOCKER); \
4686 if (unlikely(state == NULL)) \
4687 return; \
4688 if (state->m_discarded) \
4689 return; \
4690 state->ATTR+= VALUE; \
4691 if (state->m_flags & STATE_FLAG_EVENT) \
4692 { \
4693 PFS_events_statements *pfs; \
4694 pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement); \
4695 DBUG_ASSERT(pfs != NULL); \
4696 pfs->ATTR+= VALUE; \
4697 } \
4698 return;
4699
set_statement_lock_time_v1(PSI_statement_locker * locker,ulonglong count)4700 static void set_statement_lock_time_v1(PSI_statement_locker *locker,
4701 ulonglong count)
4702 {
4703 SET_STATEMENT_ATTR_BODY(locker, m_lock_time, count);
4704 }
4705
set_statement_rows_sent_v1(PSI_statement_locker * locker,ulonglong count)4706 static void set_statement_rows_sent_v1(PSI_statement_locker *locker,
4707 ulonglong count)
4708 {
4709 SET_STATEMENT_ATTR_BODY(locker, m_rows_sent, count);
4710 }
4711
set_statement_rows_examined_v1(PSI_statement_locker * locker,ulonglong count)4712 static void set_statement_rows_examined_v1(PSI_statement_locker *locker,
4713 ulonglong count)
4714 {
4715 SET_STATEMENT_ATTR_BODY(locker, m_rows_examined, count);
4716 }
4717
inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker * locker,ulong count)4718 static void inc_statement_created_tmp_disk_tables_v1(PSI_statement_locker *locker,
4719 ulong count)
4720 {
4721 INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_disk_tables, count);
4722 }
4723
inc_statement_created_tmp_tables_v1(PSI_statement_locker * locker,ulong count)4724 static void inc_statement_created_tmp_tables_v1(PSI_statement_locker *locker,
4725 ulong count)
4726 {
4727 INC_STATEMENT_ATTR_BODY(locker, m_created_tmp_tables, count);
4728 }
4729
inc_statement_select_full_join_v1(PSI_statement_locker * locker,ulong count)4730 static void inc_statement_select_full_join_v1(PSI_statement_locker *locker,
4731 ulong count)
4732 {
4733 INC_STATEMENT_ATTR_BODY(locker, m_select_full_join, count);
4734 }
4735
inc_statement_select_full_range_join_v1(PSI_statement_locker * locker,ulong count)4736 static void inc_statement_select_full_range_join_v1(PSI_statement_locker *locker,
4737 ulong count)
4738 {
4739 INC_STATEMENT_ATTR_BODY(locker, m_select_full_range_join, count);
4740 }
4741
inc_statement_select_range_v1(PSI_statement_locker * locker,ulong count)4742 static void inc_statement_select_range_v1(PSI_statement_locker *locker,
4743 ulong count)
4744 {
4745 INC_STATEMENT_ATTR_BODY(locker, m_select_range, count);
4746 }
4747
inc_statement_select_range_check_v1(PSI_statement_locker * locker,ulong count)4748 static void inc_statement_select_range_check_v1(PSI_statement_locker *locker,
4749 ulong count)
4750 {
4751 INC_STATEMENT_ATTR_BODY(locker, m_select_range_check, count);
4752 }
4753
inc_statement_select_scan_v1(PSI_statement_locker * locker,ulong count)4754 static void inc_statement_select_scan_v1(PSI_statement_locker *locker,
4755 ulong count)
4756 {
4757 INC_STATEMENT_ATTR_BODY(locker, m_select_scan, count);
4758 }
4759
inc_statement_sort_merge_passes_v1(PSI_statement_locker * locker,ulong count)4760 static void inc_statement_sort_merge_passes_v1(PSI_statement_locker *locker,
4761 ulong count)
4762 {
4763 INC_STATEMENT_ATTR_BODY(locker, m_sort_merge_passes, count);
4764 }
4765
inc_statement_sort_range_v1(PSI_statement_locker * locker,ulong count)4766 static void inc_statement_sort_range_v1(PSI_statement_locker *locker,
4767 ulong count)
4768 {
4769 INC_STATEMENT_ATTR_BODY(locker, m_sort_range, count);
4770 }
4771
inc_statement_sort_rows_v1(PSI_statement_locker * locker,ulong count)4772 static void inc_statement_sort_rows_v1(PSI_statement_locker *locker,
4773 ulong count)
4774 {
4775 INC_STATEMENT_ATTR_BODY(locker, m_sort_rows, count);
4776 }
4777
inc_statement_sort_scan_v1(PSI_statement_locker * locker,ulong count)4778 static void inc_statement_sort_scan_v1(PSI_statement_locker *locker,
4779 ulong count)
4780 {
4781 INC_STATEMENT_ATTR_BODY(locker, m_sort_scan, count);
4782 }
4783
set_statement_no_index_used_v1(PSI_statement_locker * locker)4784 static void set_statement_no_index_used_v1(PSI_statement_locker *locker)
4785 {
4786 SET_STATEMENT_ATTR_BODY(locker, m_no_index_used, 1);
4787 }
4788
set_statement_no_good_index_used_v1(PSI_statement_locker * locker)4789 static void set_statement_no_good_index_used_v1(PSI_statement_locker *locker)
4790 {
4791 SET_STATEMENT_ATTR_BODY(locker, m_no_good_index_used, 1);
4792 }
4793
end_statement_v1(PSI_statement_locker * locker,void * stmt_da)4794 static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
4795 {
4796 PSI_statement_locker_state *state= reinterpret_cast<PSI_statement_locker_state*> (locker);
4797 Diagnostics_area *da= reinterpret_cast<Diagnostics_area*> (stmt_da);
4798 DBUG_ASSERT(state != NULL);
4799 DBUG_ASSERT(da != NULL);
4800
4801 if (state->m_discarded)
4802 return;
4803
4804 PFS_statement_class *klass= reinterpret_cast<PFS_statement_class *> (state->m_class);
4805 DBUG_ASSERT(klass != NULL);
4806
4807 ulonglong timer_end= 0;
4808 ulonglong wait_time= 0;
4809 uint flags= state->m_flags;
4810
4811 if (flags & STATE_FLAG_TIMED)
4812 {
4813 timer_end= state->m_timer();
4814 wait_time= timer_end - state->m_timer_start;
4815 }
4816
4817 PFS_statement_stat *event_name_array;
4818 uint index= klass->m_event_name_index;
4819 PFS_statement_stat *stat;
4820
4821 /*
4822 Capture statement stats by digest.
4823 */
4824 const sql_digest_storage *digest_storage= NULL;
4825 PFS_statement_stat *digest_stat= NULL;
4826
4827 if (flags & STATE_FLAG_THREAD)
4828 {
4829 PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
4830 DBUG_ASSERT(thread != NULL);
4831 event_name_array= thread->m_instr_class_statements_stats;
4832 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_BY_THREAD_BY_EVENT_NAME */
4833 stat= & event_name_array[index];
4834
4835 if (flags & STATE_FLAG_DIGEST)
4836 {
4837 digest_storage= state->m_digest;
4838
4839 if (digest_storage != NULL)
4840 {
4841 /* Populate PFS_statements_digest_stat with computed digest information.*/
4842 digest_stat= find_or_create_digest(thread, digest_storage,
4843 state->m_schema_name,
4844 state->m_schema_name_length);
4845 }
4846 }
4847
4848 if (flags & STATE_FLAG_EVENT)
4849 {
4850 PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
4851 DBUG_ASSERT(pfs != NULL);
4852
4853 thread->m_stmt_lock.allocated_to_dirty();
4854
4855 switch(da->status())
4856 {
4857 case Diagnostics_area::DA_OK_BULK:
4858 case Diagnostics_area::DA_EMPTY:
4859 break;
4860 case Diagnostics_area::DA_OK:
4861 memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4862 pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4863 pfs->m_rows_affected= da->affected_rows();
4864 pfs->m_warning_count= da->statement_warn_count();
4865 memcpy(pfs->m_sqlstate, "00000", SQLSTATE_LENGTH);
4866 break;
4867 case Diagnostics_area::DA_EOF:
4868 pfs->m_warning_count= da->statement_warn_count();
4869 break;
4870 case Diagnostics_area::DA_ERROR:
4871 memcpy(pfs->m_message_text, da->message(), MYSQL_ERRMSG_SIZE);
4872 pfs->m_message_text[MYSQL_ERRMSG_SIZE]= 0;
4873 pfs->m_sql_errno= da->sql_errno();
4874 pfs->m_error_count++;
4875 memcpy(pfs->m_sqlstate, da->get_sqlstate(), SQLSTATE_LENGTH);
4876 break;
4877 case Diagnostics_area::DA_DISABLED:
4878 break;
4879 }
4880
4881 pfs->m_timer_end= timer_end;
4882 pfs->m_end_event_id= thread->m_event_id;
4883
4884 if (digest_storage != NULL)
4885 {
4886 /*
4887 The following columns in events_statement_current:
4888 - DIGEST,
4889 - DIGEST_TEXT
4890 are computed from the digest storage.
4891 */
4892 pfs->m_digest_storage.copy(digest_storage);
4893 }
4894
4895 if (flag_events_statements_history)
4896 insert_events_statements_history(thread, pfs);
4897 if (flag_events_statements_history_long)
4898 insert_events_statements_history_long(pfs);
4899
4900 DBUG_ASSERT(thread->m_events_statements_count > 0);
4901 thread->m_events_statements_count--;
4902 thread->m_stmt_lock.dirty_to_allocated();
4903 }
4904 }
4905 else
4906 {
4907 if (flags & STATE_FLAG_DIGEST)
4908 {
4909 PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
4910
4911 /* An instrumented thread is required, for LF_PINS. */
4912 if (thread != NULL)
4913 {
4914 /* Set digest stat. */
4915 digest_storage= state->m_digest;
4916
4917 if (digest_storage != NULL)
4918 {
4919 /* Populate statements_digest_stat with computed digest information. */
4920 digest_stat= find_or_create_digest(thread, digest_storage,
4921 state->m_schema_name,
4922 state->m_schema_name_length);
4923 }
4924 }
4925 }
4926
4927 event_name_array= global_instr_class_statements_array;
4928 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_GLOBAL_BY_EVENT_NAME */
4929 stat= & event_name_array[index];
4930 }
4931
4932 if (flags & STATE_FLAG_TIMED)
4933 {
4934 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (timed) */
4935 stat->aggregate_value(wait_time);
4936 }
4937 else
4938 {
4939 /* Aggregate to EVENTS_STATEMENTS_SUMMARY_..._BY_EVENT_NAME (counted) */
4940 stat->aggregate_counted();
4941 }
4942
4943 stat->m_lock_time+= state->m_lock_time;
4944 stat->m_rows_sent+= state->m_rows_sent;
4945 stat->m_rows_examined+= state->m_rows_examined;
4946 stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4947 stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4948 stat->m_select_full_join+= state->m_select_full_join;
4949 stat->m_select_full_range_join+= state->m_select_full_range_join;
4950 stat->m_select_range+= state->m_select_range;
4951 stat->m_select_range_check+= state->m_select_range_check;
4952 stat->m_select_scan+= state->m_select_scan;
4953 stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4954 stat->m_sort_range+= state->m_sort_range;
4955 stat->m_sort_rows+= state->m_sort_rows;
4956 stat->m_sort_scan+= state->m_sort_scan;
4957 stat->m_no_index_used+= state->m_no_index_used;
4958 stat->m_no_good_index_used+= state->m_no_good_index_used;
4959
4960 if (digest_stat != NULL)
4961 {
4962 if (flags & STATE_FLAG_TIMED)
4963 {
4964 digest_stat->aggregate_value(wait_time);
4965 }
4966 else
4967 {
4968 digest_stat->aggregate_counted();
4969 }
4970
4971 digest_stat->m_lock_time+= state->m_lock_time;
4972 digest_stat->m_rows_sent+= state->m_rows_sent;
4973 digest_stat->m_rows_examined+= state->m_rows_examined;
4974 digest_stat->m_created_tmp_disk_tables+= state->m_created_tmp_disk_tables;
4975 digest_stat->m_created_tmp_tables+= state->m_created_tmp_tables;
4976 digest_stat->m_select_full_join+= state->m_select_full_join;
4977 digest_stat->m_select_full_range_join+= state->m_select_full_range_join;
4978 digest_stat->m_select_range+= state->m_select_range;
4979 digest_stat->m_select_range_check+= state->m_select_range_check;
4980 digest_stat->m_select_scan+= state->m_select_scan;
4981 digest_stat->m_sort_merge_passes+= state->m_sort_merge_passes;
4982 digest_stat->m_sort_range+= state->m_sort_range;
4983 digest_stat->m_sort_rows+= state->m_sort_rows;
4984 digest_stat->m_sort_scan+= state->m_sort_scan;
4985 digest_stat->m_no_index_used+= state->m_no_index_used;
4986 digest_stat->m_no_good_index_used+= state->m_no_good_index_used;
4987 }
4988
4989 switch (da->status())
4990 {
4991 case Diagnostics_area::DA_OK_BULK:
4992 case Diagnostics_area::DA_EMPTY:
4993 break;
4994 case Diagnostics_area::DA_OK:
4995 stat->m_rows_affected+= da->affected_rows();
4996 stat->m_warning_count+= da->statement_warn_count();
4997 if (digest_stat != NULL)
4998 {
4999 digest_stat->m_rows_affected+= da->affected_rows();
5000 digest_stat->m_warning_count+= da->statement_warn_count();
5001 }
5002 break;
5003 case Diagnostics_area::DA_EOF:
5004 stat->m_warning_count+= da->statement_warn_count();
5005 if (digest_stat != NULL)
5006 {
5007 digest_stat->m_warning_count+= da->statement_warn_count();
5008 }
5009 break;
5010 case Diagnostics_area::DA_ERROR:
5011 stat->m_error_count++;
5012 if (digest_stat != NULL)
5013 {
5014 digest_stat->m_error_count++;
5015 }
5016 break;
5017 case Diagnostics_area::DA_DISABLED:
5018 break;
5019 }
5020 }
5021
5022 /**
5023 Implementation of the socket instrumentation interface.
5024 @sa PSI_v1::end_socket_wait.
5025 */
end_socket_wait_v1(PSI_socket_locker * locker,size_t byte_count)5026 static void end_socket_wait_v1(PSI_socket_locker *locker, size_t byte_count)
5027 {
5028 PSI_socket_locker_state *state= reinterpret_cast<PSI_socket_locker_state*> (locker);
5029 DBUG_ASSERT(state != NULL);
5030
5031 PFS_socket *socket= reinterpret_cast<PFS_socket *>(state->m_socket);
5032 DBUG_ASSERT(socket != NULL);
5033
5034 ulonglong timer_end= 0;
5035 ulonglong wait_time= 0;
5036 PFS_byte_stat *byte_stat;
5037 uint flags= state->m_flags;
5038 size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
5039
5040 switch (state->m_operation)
5041 {
5042 /* Group read operations */
5043 case PSI_SOCKET_RECV:
5044 case PSI_SOCKET_RECVFROM:
5045 case PSI_SOCKET_RECVMSG:
5046 byte_stat= &socket->m_socket_stat.m_io_stat.m_read;
5047 break;
5048 /* Group write operations */
5049 case PSI_SOCKET_SEND:
5050 case PSI_SOCKET_SENDTO:
5051 case PSI_SOCKET_SENDMSG:
5052 byte_stat= &socket->m_socket_stat.m_io_stat.m_write;
5053 break;
5054 /* Group remaining operations as miscellaneous */
5055 case PSI_SOCKET_CONNECT:
5056 case PSI_SOCKET_CREATE:
5057 case PSI_SOCKET_BIND:
5058 case PSI_SOCKET_SEEK:
5059 case PSI_SOCKET_OPT:
5060 case PSI_SOCKET_STAT:
5061 case PSI_SOCKET_SHUTDOWN:
5062 case PSI_SOCKET_SELECT:
5063 case PSI_SOCKET_CLOSE:
5064 byte_stat= &socket->m_socket_stat.m_io_stat.m_misc;
5065 break;
5066 default:
5067 DBUG_ASSERT(false);
5068 byte_stat= NULL;
5069 break;
5070 }
5071
5072 /* Aggregation for EVENTS_WAITS_SUMMARY_BY_INSTANCE */
5073 if (flags & STATE_FLAG_TIMED)
5074 {
5075 timer_end= state->m_timer();
5076 wait_time= timer_end - state->m_timer_start;
5077
5078 /* Aggregate to the socket instrument for now (timed) */
5079 byte_stat->aggregate(wait_time, bytes);
5080 }
5081 else
5082 {
5083 /* Aggregate to the socket instrument (event count and byte count) */
5084 byte_stat->aggregate_counted(bytes);
5085 }
5086
5087 /* Aggregate to EVENTS_WAITS_HISTORY and EVENTS_WAITS_HISTORY_LONG */
5088 if (flags & STATE_FLAG_EVENT)
5089 {
5090 PFS_thread *thread= reinterpret_cast<PFS_thread *>(state->m_thread);
5091 DBUG_ASSERT(thread != NULL);
5092 PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
5093 DBUG_ASSERT(wait != NULL);
5094
5095 wait->m_timer_end= timer_end;
5096 wait->m_end_event_id= thread->m_event_id;
5097 wait->m_number_of_bytes= bytes;
5098
5099 if (flag_events_waits_history)
5100 insert_events_waits_history(thread, wait);
5101 if (flag_events_waits_history_long)
5102 insert_events_waits_history_long(wait);
5103 thread->m_events_waits_current--;
5104
5105 DBUG_ASSERT(wait == thread->m_events_waits_current);
5106 }
5107 }
5108
set_socket_state_v1(PSI_socket * socket,PSI_socket_state state)5109 static void set_socket_state_v1(PSI_socket *socket, PSI_socket_state state)
5110 {
5111 DBUG_ASSERT((state == PSI_SOCKET_STATE_IDLE) || (state == PSI_SOCKET_STATE_ACTIVE));
5112 PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5113 DBUG_ASSERT(pfs != NULL);
5114 DBUG_ASSERT(pfs->m_idle || (state == PSI_SOCKET_STATE_IDLE));
5115 DBUG_ASSERT(!pfs->m_idle || (state == PSI_SOCKET_STATE_ACTIVE));
5116 pfs->m_idle= (state == PSI_SOCKET_STATE_IDLE);
5117 }
5118
5119 /**
5120 Set socket descriptor and address info.
5121 */
set_socket_info_v1(PSI_socket * socket,const my_socket * fd,const struct sockaddr * addr,socklen_t addr_len)5122 static void set_socket_info_v1(PSI_socket *socket,
5123 const my_socket *fd,
5124 const struct sockaddr *addr,
5125 socklen_t addr_len)
5126 {
5127 PFS_socket *pfs= reinterpret_cast<PFS_socket*>(socket);
5128 DBUG_ASSERT(pfs != NULL);
5129
5130 /** Set socket descriptor */
5131 if (fd != NULL)
5132 pfs->m_fd= (uint)*fd;
5133
5134 /** Set raw socket address and length */
5135 if (likely(addr != NULL && addr_len > 0))
5136 {
5137 pfs->m_addr_len= addr_len;
5138
5139 /** Restrict address length to size of struct */
5140 if (unlikely(pfs->m_addr_len > sizeof(sockaddr_storage)))
5141 pfs->m_addr_len= sizeof(struct sockaddr_storage);
5142
5143 memcpy(&pfs->m_sock_addr, addr, pfs->m_addr_len);
5144 }
5145 }
5146
5147 /**
5148 Implementation of the socket instrumentation interface.
5149 @sa PSI_v1::set_socket_info.
5150 */
set_socket_thread_owner_v1(PSI_socket * socket)5151 static void set_socket_thread_owner_v1(PSI_socket *socket)
5152 {
5153 PFS_socket *pfs_socket= reinterpret_cast<PFS_socket*>(socket);
5154 DBUG_ASSERT(pfs_socket != NULL);
5155 pfs_socket->m_thread_owner= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5156 }
5157
5158 struct PSI_digest_locker*
pfs_digest_start_v1(PSI_statement_locker * locker)5159 pfs_digest_start_v1(PSI_statement_locker *locker)
5160 {
5161 PSI_statement_locker_state *statement_state;
5162 statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5163 DBUG_ASSERT(statement_state != NULL);
5164
5165 if (statement_state->m_discarded)
5166 return NULL;
5167
5168 if (statement_state->m_flags & STATE_FLAG_DIGEST)
5169 {
5170 return reinterpret_cast<PSI_digest_locker*> (locker);
5171 }
5172
5173 return NULL;
5174 }
5175
pfs_digest_end_v1(PSI_digest_locker * locker,const sql_digest_storage * digest)5176 void pfs_digest_end_v1(PSI_digest_locker *locker, const sql_digest_storage *digest)
5177 {
5178 PSI_statement_locker_state *statement_state;
5179 statement_state= reinterpret_cast<PSI_statement_locker_state*> (locker);
5180 DBUG_ASSERT(statement_state != NULL);
5181 DBUG_ASSERT(digest != NULL);
5182
5183 if (statement_state->m_discarded)
5184 return;
5185
5186 if (statement_state->m_flags & STATE_FLAG_DIGEST)
5187 {
5188 statement_state->m_digest= digest;
5189 }
5190 }
5191
5192 /**
5193 Implementation of the thread attribute connection interface
5194 @sa PSI_v1::set_thread_connect_attr.
5195 */
set_thread_connect_attrs_v1(const char * buffer,uint length,const void * from_cs)5196 static int set_thread_connect_attrs_v1(const char *buffer, uint length,
5197 const void *from_cs)
5198 {
5199
5200 PFS_thread *thd= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
5201
5202 DBUG_ASSERT(buffer != NULL);
5203
5204 if (likely(thd != NULL) && session_connect_attrs_size_per_thread > 0)
5205 {
5206 const CHARSET_INFO *cs = static_cast<const CHARSET_INFO *> (from_cs);
5207
5208 /* copy from the input buffer as much as we can fit */
5209 uint copy_size= (uint)(length < session_connect_attrs_size_per_thread ?
5210 length : session_connect_attrs_size_per_thread);
5211 thd->m_session_lock.allocated_to_dirty();
5212 memcpy(thd->m_session_connect_attrs, buffer, copy_size);
5213 thd->m_session_connect_attrs_length= copy_size;
5214 thd->m_session_connect_attrs_cs_number= cs->number;
5215 thd->m_session_lock.dirty_to_allocated();
5216
5217 if (copy_size == length)
5218 return 0;
5219
5220 session_connect_attrs_lost++;
5221 return 1;
5222 }
5223 return 0;
5224 }
5225
5226
5227 /**
5228 Implementation of the instrumentation interface.
5229 @sa PSI_v1.
5230 */
5231 PSI_v1 PFS_v1=
5232 {
5233 register_mutex_v1,
5234 register_rwlock_v1,
5235 register_cond_v1,
5236 register_thread_v1,
5237 register_file_v1,
5238 register_stage_v1,
5239 register_statement_v1,
5240 register_socket_v1,
5241 init_mutex_v1,
5242 destroy_mutex_v1,
5243 init_rwlock_v1,
5244 destroy_rwlock_v1,
5245 init_cond_v1,
5246 destroy_cond_v1,
5247 init_socket_v1,
5248 destroy_socket_v1,
5249 get_table_share_v1,
5250 release_table_share_v1,
5251 drop_table_share_v1,
5252 open_table_v1,
5253 unbind_table_v1,
5254 rebind_table_v1,
5255 close_table_v1,
5256 create_file_v1,
5257 spawn_thread_v1,
5258 new_thread_v1,
5259 set_thread_id_v1,
5260 get_thread_v1,
5261 set_thread_user_v1,
5262 set_thread_account_v1,
5263 set_thread_db_v1,
5264 set_thread_command_v1,
5265 set_thread_start_time_v1,
5266 set_thread_state_v1,
5267 set_thread_info_v1,
5268 set_thread_v1,
5269 delete_current_thread_v1,
5270 delete_thread_v1,
5271 get_thread_file_name_locker_v1,
5272 get_thread_file_stream_locker_v1,
5273 get_thread_file_descriptor_locker_v1,
5274 unlock_mutex_v1,
5275 unlock_rwlock_v1,
5276 signal_cond_v1,
5277 broadcast_cond_v1,
5278 start_idle_wait_v1,
5279 end_idle_wait_v1,
5280 start_mutex_wait_v1,
5281 end_mutex_wait_v1,
5282 start_rwlock_wait_v1, /* read */
5283 end_rwlock_rdwait_v1,
5284 start_rwlock_wait_v1, /* write */
5285 end_rwlock_wrwait_v1,
5286 start_cond_wait_v1,
5287 end_cond_wait_v1,
5288 start_table_io_wait_v1,
5289 end_table_io_wait_v1,
5290 start_table_lock_wait_v1,
5291 end_table_lock_wait_v1,
5292 start_file_open_wait_v1,
5293 end_file_open_wait_v1,
5294 end_file_open_wait_and_bind_to_descriptor_v1,
5295 start_file_wait_v1,
5296 end_file_wait_v1,
5297 start_file_close_wait_v1,
5298 end_file_close_wait_v1,
5299 start_stage_v1,
5300 end_stage_v1,
5301 get_thread_statement_locker_v1,
5302 refine_statement_v1,
5303 start_statement_v1,
5304 set_statement_text_v1,
5305 set_statement_lock_time_v1,
5306 set_statement_rows_sent_v1,
5307 set_statement_rows_examined_v1,
5308 inc_statement_created_tmp_disk_tables_v1,
5309 inc_statement_created_tmp_tables_v1,
5310 inc_statement_select_full_join_v1,
5311 inc_statement_select_full_range_join_v1,
5312 inc_statement_select_range_v1,
5313 inc_statement_select_range_check_v1,
5314 inc_statement_select_scan_v1,
5315 inc_statement_sort_merge_passes_v1,
5316 inc_statement_sort_range_v1,
5317 inc_statement_sort_rows_v1,
5318 inc_statement_sort_scan_v1,
5319 set_statement_no_index_used_v1,
5320 set_statement_no_good_index_used_v1,
5321 end_statement_v1,
5322 start_socket_wait_v1,
5323 end_socket_wait_v1,
5324 set_socket_state_v1,
5325 set_socket_info_v1,
5326 set_socket_thread_owner_v1,
5327 pfs_digest_start_v1,
5328 pfs_digest_end_v1,
5329 set_thread_connect_attrs_v1,
5330 };
5331
get_interface(int version)5332 static void* get_interface(int version)
5333 {
5334 switch (version)
5335 {
5336 case PSI_VERSION_1:
5337 return &PFS_v1;
5338 default:
5339 return NULL;
5340 }
5341 }
5342
5343 C_MODE_END
5344
5345 struct PSI_bootstrap PFS_bootstrap=
5346 {
5347 get_interface
5348 };
5349