#!/bin/bash

#   Copyright (c) 2014 John Biddiscombe
#   Adapted from stuff found originally somewhere on the internet
#
#   Distributed under the Boost Software License, Version 1.0. (See accompanying
#   file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

# This function writes a slurm script.
# We can call it with different parameter
# settings to create different experiments

function write_script
{
JOB_NAME=$(printf 'hpx-N%04d-T%05d-t%02d-%s' ${NODES} ${TRANSFERSIZE} ${THREADS_PERTASK} ${PARCELTYPE})
DIR_NAME=$(printf 'hpx-N%04d-T%05d-t%02d-%s' ${NODES} ${TRANSFERSIZE} ${THREADS_PERTASK} ${PARCELTYPE})
TASKS_PER_NODE=1

if [ -d "$DIR_NAME" ]; then
  # Directory already exists, skip generation of this job
  echo "Exists already : Skipping $DIR_NAME"
  return 1
fi

echo "Creating job $DIR_NAME"

mkdir -p $DIR_NAME

cat << _EOF_ > ${DIR_NAME}/submit-job.bash
#!/bin/bash

#SBATCH --job-name=${JOB_NAME}
#SBATCH --output=slurm.out
#SBATCH --error=slurm.err
#SBATCH --nodes=${NODES}
#SBATCH --time=${TIME}
#SBATCH --exclusive
#SBATCH --distribution=cyclic
#SBATCH --constraint=gpu
#SBATCH --partition=${QUEUE}

## #SBATCH --cpus-per-task=1
## #SBATCH --dependency=singleton

#======START=====
module load slurm

#
# mvapich settings used at CSCS
# not all are relevant for this test
#
export LD_LIBRARY_PATH=${LIB_PATH}:${LD_LIBRARY_PATH}

# slurm launch command
srun -n $[${PROCESSES_PERNODE} * $NODES] ${EXECUTABLE1} ${PROGRAM_PARAMS}

_EOF_

# make the job script executable
chmod 775 ${DIR_NAME}/submit-job.bash

# create a script that launches the job and adds the jobid to a cancel jobs script
echo "cd ${DIR_NAME}; JOB=\$(sbatch submit-job.bash) ; echo \"\$JOB\" ; echo \"\$JOB\" | sed 's/Submitted batch job/scancel/g' >> \$BASEDIR/cancel_jobs.bash; cd \$BASEDIR" >> run_jobs.bash

}

# get the path to this generate script, works for most cases
pushd `dirname $0` > /dev/null
BASEDIR=`pwd`
popd > /dev/null
echo "Generating jobs using base directory $BASEDIR"

# Create another script to submit all generated jobs to the scheduler
echo "#!/bin/bash" > run_jobs.bash
echo "BASEDIR=$BASEDIR" >> run_jobs.bash
echo "cd $BASEDIR" >> run_jobs.bash
chmod 775 run_jobs.bash

#
#
#
MPIEXEC="@MPIEXEC@"
EXECUTABLE1=@EXE_PATH@
LIB_PATH="@LIB_PATH@"
JOB_OPTIONS1="@JOB_OPTIONS1@"
TIME="00:10:00"
PROCESSES_PERNODE=1

# Loop through all the parameter combinations generating jobs for each

#for NODES in 2 4 8 16 32 64 128 256 512 1024 2048 4096
for NODES in 32 64 128
do

  if [ "$NODES" == "4096" ]; then
    QUEUE=large
  elif [ "$NODES" -lt "4" ]; then
    QUEUE=debug
  else
    QUEUE=normal
  fi

#  for PARCELTYPE in "mpi" "libfabric"
  for PARCELTYPE in "libfabric"
  do

    TCP_ENABLE="-Ihpx.parcel.tcp.enable=0"
    MPI_ENABLE="-Ihpx.parcel.mpi.enable=0"
    FAB_ENABLE="-Ihpx.parcel.libfabric.enable=0"

    BOOTSTRAP="-Ihpx.parcel.bootstrap=$PARCELTYPE"

    if [ "$PARCELTYPE" == "tcp" ]; then
      TCP_ENABLE="-Ihpx.parcel.tcp.enable=1"
    elif [ "$PARCELTYPE" == "mpi" ]; then
      MPI_ENABLE="-Ihpx.parcel.mpi.enable=1"
    elif [ "$PARCELTYPE" == "libfabric" ]; then
      FAB_ENABLE="-Ihpx.parcel.libfabric.enable=1"
    fi

    HPX_ARGS="-Ihpx.parcel.message_handlers=0 --hpx:bind=balanced "
    # -Ihpx.max_busy_loop_count=10 --hpx:attach-debugger=exception"
    for TRANSFERSIZE in 256 512 1024 2048 4096 8192
    do
      for THREADS_PERTASK in 12
      do
        LOCAL_SIZE=$(printf "%.0f" $( bc <<< "scale=6;(128 * $TRANSFERSIZE * $THREADS_PERTASK)/1024" ))
        LOCAL_SIZE=$(echo $((LOCAL_SIZE>1024?1024:LOCAL_SIZE)))
        PROGRAM_PARAMS="${BOOTSTRAP} ${TCP_ENABLE} ${MPI_ENABLE} ${FAB_ENABLE} --hpx:threads=${THREADS_PERTASK} ${HPX_ARGS} --localMB=${LOCAL_SIZE} --transferKB=${TRANSFERSIZE} --parceltype=${PARCELTYPE} --distribution=0 --all-to-all=1 --no-local=1 --iterations=100"
        write_script
      done
    done
  done
done

echo "echo \"Use find . -name \*.out -exec grep CSVData {} \;\" " >> run_jobs.bash