1#!/bin/sh
2#
3# Copyright (C) Mellanox Technologies Ltd. 2001-2015.  ALL RIGHTS RESERVED.
4#
5# See file LICENSE for terms.
6#
7
8#
9# Convenience script to run MPI applications with UCX
10#
11# Usage: run_mpi.sh <options> <executable> <arguments>
12#
13
14verbose()
15{
16	[ $VERBOSE -ne 0 ] && echo "$@"
17}
18
19check_slurm_env()
20{
21	if [ -z "$SLURM_JOBID" ]
22	then
23		# Search for jobs of the current user
24		SLURM_JOBID=$(squeue -h -u $USER -o "%i"|head -1)
25	fi
26
27	if [ -z "$SLURM_JOBID" ]
28	then
29		# Skip slurm
30		return
31	fi
32
33	# Nodes to run on
34	export HOSTS=$(hostlist -e $(squeue -j ${SLURM_JOBID} -h -o "%N"))
35	SLURM_NNODES=$(squeue -j ${SLURM_JOBID} -h -o "%D")
36	NNODES=$SLURM_NNODES
37
38	if [ -n "$SLURM_JOB_CPUS_PER_NODE" ]
39	then
40		export PPN=$(echo $SLURM_JOB_CPUS_PER_NODE|cut -d'(' -f1)
41	else
42		TOTAL_CPUS=$(squeue  -j ${SLURM_JOBID} -h -o "%C")
43		export PPN=$((${TOTAL_CPUS} / ${SLURM_NNODES}))
44	fi
45
46}
47
48usage()
49{
50	echo "Usage: run_mpi.sh <options> <executable> <arguments> -- <additional arguments to launcher>"
51	echo
52	echo "  -h|--help                     Show this help message"
53	echo "  -v|--verbose                  Turn on verbosity"
54	echo "  -c|--config  <name>=<value>   Set UCX configuration"
55	echo "  -N|--nnodes  <count>          Number of nodes to run on ($NNODES)"
56	echo "  --ppn  <count>                Number of processes per node ($PPN)"
57	echo "  --mpi-log-level <level>       Log level for MPI UCX component ($MPI_LOG_LEVEL)"
58	echo "  --valgrind                    Run with valgrind"
59	echo "  --valgrind-args \"<args>\"      Extra arguments to valgrind"
60	echo
61}
62
63initialize()
64{
65	export MPIRUN=@MPIRUN@
66	export LIBUCS=@abs_top_builddir@/src/ucs/.libs/libucs.so
67	export LIBUCT=@abs_top_builddir@/src/uct/.libs/libuct.so
68	export LIBUCP=@abs_top_builddir@/src/ucp/.libs/libucp.so
69	export VERBOSE=0
70	export EXE=""
71	export EXE_ARGS=""
72	export EXTRA_MPI_ARGS=""
73	export NNODES=1
74	export PPN=1
75	export CONFIG=""
76	export MPI_LOG_LEVEL=0
77	export VALGRIND=0
78	export VALGRIND_ARGS=""
79}
80
81parse_args()
82{
83	while [[ $# -gt 0 ]]
84	do
85		key="$1"
86		case $key in
87		-h|--help)
88			usage
89			exit 0
90			;;
91		-v|--verbose)
92			export VERBOSE=1
93			;;
94		-c|--config)
95			export CONFIG="$CONFIG $2"
96			shift
97			;;
98		-N|--nnodes)
99			export NNODES=$2
100			shift
101			;;
102		--ppn)
103			export PPN=$2
104			shift
105			;;
106		--mpi-log-level)
107			export MPI_LOG_LEVEL=$2
108			shift
109			;;
110		--valgrind)
111			export VALGRIND=1
112			;;
113		--valgrind-args)
114			export VALGRIND_ARGS="$2"
115			shift
116			;;
117		[^-]*)
118			export EXE=$key
119			shift
120			break
121			;;
122		*)
123			usage
124			exit -2
125			;;
126		esac
127		shift
128	done
129
130	while [[ $# -gt 0 ]]
131	do
132		key="$1"
133		case $key in
134		--)
135			shift
136			export EXTRA_MPI_ARGS="$@"
137			break
138			;;
139		*)
140			EXE_ARGS+=("$key")
141			;;
142		esac
143		shift
144	done
145}
146
147adjust_run_params()
148{
149	export NP=$((${NNODES} * ${PPN}))
150	export HOSTLIST=$(echo $HOSTS|cut -d' ' -f 1-$NNODES|tr ' ' ',')
151}
152
153run_open_mpi()
154{
155	OMPI_ARGS=""
156	OMPI_ARGS="$OMPI_ARGS -mca pml ucx"
157	OMPI_ARGS="$OMPI_ARGS -mca pml_ucx_verbose $MPI_LOG_LEVEL"
158	OMPI_ARGS="$OMPI_ARGS -mca spml ucx"
159	OMPI_ARGS="$OMPI_ARGS -mca spml_ucx_verbose $MPI_LOG_LEVEL"
160	OMPI_ARGS="$OMPI_ARGS -H $HOSTLIST"
161	OMPI_ARGS="$OMPI_ARGS -n $NP"
162	OMPI_ARGS="$OMPI_ARGS --map-by node"
163	OMPI_ARGS="$OMPI_ARGS -mca ess_base_stream_buffering 0"
164	OMPI_ARGS="$OMPI_ARGS -mca mpi_abort_delay -1"
165
166	OMPI_ARGS="$OMPI_ARGS -x LD_PRELOAD=$LD_PRELOAD:$LIBUCP"
167	if [ $VALGRIND -ne 0 ]
168	then
169		# Preload valgrind-enabled libraries
170		for lib in /usr/lib64/mlnx_ofed/valgrind/*.so
171		do
172			[ -f $lib ] && OMPI_ARGS="$OMPI_ARGS:$lib"
173		done
174	fi
175
176	OMPI_ARGS="$OMPI_ARGS -x UCX_HANDLE_ERRORS=freeze"
177	for c in $CONFIG
178	do
179		OMPI_ARGS="$OMPI_ARGS -x $c"
180	done
181
182	if [ $VALGRIND -ne 0 ]
183	then
184		MPI_HOME=$(cd $(dirname ${MPIRUN})/.. && pwd)
185		EXE="valgrind \
186			--fair-sched=try \
187			--track-origins=yes \
188			--leak-check=yes \
189			--suppressions=${MPI_HOME}/share/openmpi/openmpi-valgrind.supp \
190			--suppressions=@abs_srcdir@/ompi.supp \
191			$VALGRIND_ARGS \
192			$EXE"
193		LD_LIBRARY_PATH="$LD_LIBRARY_PATH:@VALGRIND_LIBPATH@"
194	fi
195
196	OMPI_ARGS="$OMPI_ARGS -x LD_LIBRARY_PATH"
197
198	export LD_LIBRARY_PATH
199	verbose $MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}"
200	$MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}"
201}
202
203main()
204{
205	EXE_ARGS=()
206	initialize
207	check_slurm_env
208	parse_args "$@"
209	adjust_run_params
210
211	if (strings $MPIRUN|grep -qi orte) && ($MPIRUN -h|grep -q "Open MPI")
212	then
213		run_open_mpi
214	else
215		echo "Unrecognized MPI flavor ($MPIRUN)"
216		exit -3
217	fi
218}
219
220main "$@"
221