1 /*
2  * Copyright (c) 2016-2018 Inria.  All rights reserved.
3  * $COPYRIGHT$
4  *
5  * Additional copyrights may follow
6  *
7  * $HEADER$
8  */
9 
10 #ifndef MCA_COMMON_MONITORING_H
11 #define MCA_COMMON_MONITORING_H
12 
13 BEGIN_C_DECLS
14 
15 #include <ompi_config.h>
16 #include <ompi/proc/proc.h>
17 #include <ompi/group/group.h>
18 #include <ompi/communicator/communicator.h>
19 #include <opal/class/opal_hash_table.h>
20 #include <opal/mca/base/mca_base_pvar.h>
21 
22 #define MCA_MONITORING_MAKE_VERSION                                     \
23     MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, OMPI_RELEASE_VERSION)
24 
25 #define OPAL_MONITORING_VERBOSE(x, ...)                                 \
26     OPAL_OUTPUT_VERBOSE((x, mca_common_monitoring_output_stream_id, __VA_ARGS__))
27 
28 /* When built in debug mode, always display error messages */
29 #if OPAL_ENABLE_DEBUG
30 #define OPAL_MONITORING_PRINT_ERR(...)          \
31     OPAL_MONITORING_VERBOSE(0, __VA_ARGS__)
32 #else /* if( ! OPAL_ENABLE_DEBUG ) */
33 #define OPAL_MONITORING_PRINT_ERR(...)          \
34     OPAL_MONITORING_VERBOSE(1, __VA_ARGS__)
35 #endif /* OPAL_ENABLE_DEBUG */
36 
37 #define OPAL_MONITORING_PRINT_WARN(...)         \
38     OPAL_MONITORING_VERBOSE(5, __VA_ARGS__)
39 
40 #define OPAL_MONITORING_PRINT_INFO(...)         \
41     OPAL_MONITORING_VERBOSE(10, __VA_ARGS__)
42 
43 extern int mca_common_monitoring_output_stream_id;
44 extern int mca_common_monitoring_enabled;
45 extern int mca_common_monitoring_current_state;
46 extern opal_hash_table_t *common_monitoring_translation_ht;
47 
48 OMPI_DECLSPEC void mca_common_monitoring_register(void*pml_monitoring_component);
49 OMPI_DECLSPEC int mca_common_monitoring_init( void );
50 OMPI_DECLSPEC void mca_common_monitoring_finalize( void );
51 OMPI_DECLSPEC int mca_common_monitoring_add_procs(struct ompi_proc_t **procs, size_t nprocs);
52 
53 /* Records PML communication */
54 OMPI_DECLSPEC void mca_common_monitoring_record_pml(int world_rank, size_t data_size, int tag);
55 
56 /* SEND corresponds to data emitted from the current proc to the given
57  * one. RECV represents data emitted from the given proc to the
58  * current one.
59  */
60 enum mca_monitoring_osc_direction { SEND, RECV };
61 
62 /* Records OSC communications. */
63 OMPI_DECLSPEC void mca_common_monitoring_record_osc(int world_rank, size_t data_size,
64                                                     enum mca_monitoring_osc_direction dir);
65 
66 /* Records COLL communications. */
67 OMPI_DECLSPEC void mca_common_monitoring_record_coll(int world_rank, size_t data_size);
68 
69 /* Translate the rank from the given rank of a process to its rank in MPI_COMM_RANK. */
mca_common_monitoring_get_world_rank(int dest,ompi_group_t * group,int * world_rank)70 static inline int mca_common_monitoring_get_world_rank(int dest, ompi_group_t *group,
71                                                            int *world_rank)
72 {
73     opal_process_name_t tmp;
74 
75     /* find the processor of the destination */
76     ompi_proc_t *proc = ompi_group_get_proc_ptr(group, dest, true);
77     if( ompi_proc_is_sentinel(proc) ) {
78         tmp = ompi_proc_sentinel_to_name((uintptr_t)proc);
79     } else {
80         tmp = proc->super.proc_name;
81     }
82 
83     /* find its name*/
84     uint64_t rank, key = *((uint64_t*)&tmp);
85     /**
86      * If this fails the destination is not part of my MPI_COM_WORLD
87      * Lookup its name in the rank hastable to get its MPI_COMM_WORLD rank
88      */
89     int ret = opal_hash_table_get_value_uint64(common_monitoring_translation_ht,
90                                                key, (void *)&rank);
91 
92     /* Use intermediate variable to avoid overwriting while looking up in the hashtbale. */
93     if( ret == OPAL_SUCCESS ) *world_rank = (int)rank;
94     return ret;
95 }
96 
97 /* Return the current status of the monitoring system 0 if off or the
98  * seperation between internal tags and external tags is disabled. Any
99  * other positive value if the segregation between point-to-point and
100  * collective is enabled.
101  */
mca_common_monitoring_filter(void)102 static inline int mca_common_monitoring_filter( void )
103 {
104     return 1 < mca_common_monitoring_current_state;
105 }
106 
107 /* Collective operation monitoring */
108 struct mca_monitoring_coll_data_t;
109 typedef struct mca_monitoring_coll_data_t mca_monitoring_coll_data_t;
110 OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_monitoring_coll_data_t);
111 
112 OMPI_DECLSPEC mca_monitoring_coll_data_t*mca_common_monitoring_coll_new(ompi_communicator_t*comm);
113 OMPI_DECLSPEC int  mca_common_monitoring_coll_cache_name(ompi_communicator_t*comm);
114 OMPI_DECLSPEC void mca_common_monitoring_coll_release(mca_monitoring_coll_data_t*data);
115 OMPI_DECLSPEC void mca_common_monitoring_coll_o2a(size_t size, mca_monitoring_coll_data_t*data);
116 OMPI_DECLSPEC void mca_common_monitoring_coll_a2o(size_t size, mca_monitoring_coll_data_t*data);
117 OMPI_DECLSPEC void mca_common_monitoring_coll_a2a(size_t size, mca_monitoring_coll_data_t*data);
118 
119 END_C_DECLS
120 
121 #endif  /* MCA_COMMON_MONITORING_H */
122