1#!/bin/sh 2# 3# Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. 4# 5# See file LICENSE for terms. 6# 7 8# 9# Convenience script to run MPI applications with UCX 10# 11# Usage: run_mpi.sh <options> <executable> <arguments> 12# 13 14verbose() 15{ 16 [ $VERBOSE -ne 0 ] && echo "$@" 17} 18 19check_slurm_env() 20{ 21 if [ -z "$SLURM_JOBID" ] 22 then 23 # Search for jobs of the current user 24 SLURM_JOBID=$(squeue -h -u $USER -o "%i"|head -1) 25 fi 26 27 if [ -z "$SLURM_JOBID" ] 28 then 29 # Skip slurm 30 return 31 fi 32 33 # Nodes to run on 34 export HOSTS=$(hostlist -e $(squeue -j ${SLURM_JOBID} -h -o "%N")) 35 SLURM_NNODES=$(squeue -j ${SLURM_JOBID} -h -o "%D") 36 NNODES=$SLURM_NNODES 37 38 if [ -n "$SLURM_JOB_CPUS_PER_NODE" ] 39 then 40 export PPN=$(echo $SLURM_JOB_CPUS_PER_NODE|cut -d'(' -f1) 41 else 42 TOTAL_CPUS=$(squeue -j ${SLURM_JOBID} -h -o "%C") 43 export PPN=$((${TOTAL_CPUS} / ${SLURM_NNODES})) 44 fi 45 46} 47 48usage() 49{ 50 echo "Usage: run_mpi.sh <options> <executable> <arguments> -- <additional arguments to launcher>" 51 echo 52 echo " -h|--help Show this help message" 53 echo " -v|--verbose Turn on verbosity" 54 echo " -c|--config <name>=<value> Set UCX configuration" 55 echo " -N|--nnodes <count> Number of nodes to run on ($NNODES)" 56 echo " --ppn <count> Number of processes per node ($PPN)" 57 echo " --mpi-log-level <level> Log level for MPI UCX component ($MPI_LOG_LEVEL)" 58 echo " --valgrind Run with valgrind" 59 echo " --valgrind-args \"<args>\" Extra arguments to valgrind" 60 echo 61} 62 63initialize() 64{ 65 export MPIRUN=@MPIRUN@ 66 export LIBUCS=@abs_top_builddir@/src/ucs/.libs/libucs.so 67 export LIBUCT=@abs_top_builddir@/src/uct/.libs/libuct.so 68 export LIBUCP=@abs_top_builddir@/src/ucp/.libs/libucp.so 69 export VERBOSE=0 70 export EXE="" 71 export EXE_ARGS="" 72 export EXTRA_MPI_ARGS="" 73 export NNODES=1 74 export PPN=1 75 export CONFIG="" 76 export MPI_LOG_LEVEL=0 77 export VALGRIND=0 78 export VALGRIND_ARGS="" 79} 80 81parse_args() 82{ 83 while [[ $# -gt 0 ]] 84 do 85 key="$1" 86 case $key in 87 -h|--help) 88 usage 89 exit 0 90 ;; 91 -v|--verbose) 92 export VERBOSE=1 93 ;; 94 -c|--config) 95 export CONFIG="$CONFIG $2" 96 shift 97 ;; 98 -N|--nnodes) 99 export NNODES=$2 100 shift 101 ;; 102 --ppn) 103 export PPN=$2 104 shift 105 ;; 106 --mpi-log-level) 107 export MPI_LOG_LEVEL=$2 108 shift 109 ;; 110 --valgrind) 111 export VALGRIND=1 112 ;; 113 --valgrind-args) 114 export VALGRIND_ARGS="$2" 115 shift 116 ;; 117 [^-]*) 118 export EXE=$key 119 shift 120 break 121 ;; 122 *) 123 usage 124 exit -2 125 ;; 126 esac 127 shift 128 done 129 130 while [[ $# -gt 0 ]] 131 do 132 key="$1" 133 case $key in 134 --) 135 shift 136 export EXTRA_MPI_ARGS="$@" 137 break 138 ;; 139 *) 140 EXE_ARGS+=("$key") 141 ;; 142 esac 143 shift 144 done 145} 146 147adjust_run_params() 148{ 149 export NP=$((${NNODES} * ${PPN})) 150 export HOSTLIST=$(echo $HOSTS|cut -d' ' -f 1-$NNODES|tr ' ' ',') 151} 152 153run_open_mpi() 154{ 155 OMPI_ARGS="" 156 OMPI_ARGS="$OMPI_ARGS -mca pml ucx" 157 OMPI_ARGS="$OMPI_ARGS -mca pml_ucx_verbose $MPI_LOG_LEVEL" 158 OMPI_ARGS="$OMPI_ARGS -mca spml ucx" 159 OMPI_ARGS="$OMPI_ARGS -mca spml_ucx_verbose $MPI_LOG_LEVEL" 160 OMPI_ARGS="$OMPI_ARGS -H $HOSTLIST" 161 OMPI_ARGS="$OMPI_ARGS -n $NP" 162 OMPI_ARGS="$OMPI_ARGS --map-by node" 163 OMPI_ARGS="$OMPI_ARGS -mca ess_base_stream_buffering 0" 164 OMPI_ARGS="$OMPI_ARGS -mca mpi_abort_delay -1" 165 166 OMPI_ARGS="$OMPI_ARGS -x LD_PRELOAD=$LD_PRELOAD:$LIBUCP" 167 if [ $VALGRIND -ne 0 ] 168 then 169 # Preload valgrind-enabled libraries 170 for lib in /usr/lib64/mlnx_ofed/valgrind/*.so 171 do 172 [ -f $lib ] && OMPI_ARGS="$OMPI_ARGS:$lib" 173 done 174 fi 175 176 OMPI_ARGS="$OMPI_ARGS -x UCX_HANDLE_ERRORS=freeze" 177 for c in $CONFIG 178 do 179 OMPI_ARGS="$OMPI_ARGS -x $c" 180 done 181 182 if [ $VALGRIND -ne 0 ] 183 then 184 MPI_HOME=$(cd $(dirname ${MPIRUN})/.. && pwd) 185 EXE="valgrind \ 186 --fair-sched=try \ 187 --track-origins=yes \ 188 --leak-check=yes \ 189 --suppressions=${MPI_HOME}/share/openmpi/openmpi-valgrind.supp \ 190 --suppressions=@abs_srcdir@/ompi.supp \ 191 $VALGRIND_ARGS \ 192 $EXE" 193 LD_LIBRARY_PATH="$LD_LIBRARY_PATH:@VALGRIND_LIBPATH@" 194 fi 195 196 OMPI_ARGS="$OMPI_ARGS -x LD_LIBRARY_PATH" 197 198 export LD_LIBRARY_PATH 199 verbose $MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}" 200 $MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}" 201} 202 203main() 204{ 205 EXE_ARGS=() 206 initialize 207 check_slurm_env 208 parse_args "$@" 209 adjust_run_params 210 211 if (strings $MPIRUN|grep -qi orte) && ($MPIRUN -h|grep -q "Open MPI") 212 then 213 run_open_mpi 214 else 215 echo "Unrecognized MPI flavor ($MPIRUN)" 216 exit -3 217 fi 218} 219 220main "$@" 221