diff --git a/bin/set_rocm_bind_device b/bin/set_rocm_bind_device new file mode 100755 index 0000000000000000000000000000000000000000..cea4b811a71a677c75a589caf73f20524a59d538 --- /dev/null +++ b/bin/set_rocm_bind_device @@ -0,0 +1,20 @@ +#!/bin/bash +#set -x + +GPUSID=(4 5 2 3 6 7 0 1) + +Gpuinfo='rocm-smi -i' +NB_DEVICE=${NB_DEVICE:-$( ${Gpuinfo} | grep GPU | wc -l )} +[ ${NB_DEVICE} -eq 0 ] && NB_DEVICE=1 + +export LIP=${OMPI_COMM_WORLD_LOCAL_RANK:-${SLURM_LOCALID}} +export IP=${OMPI_COMM_WORLD_RANK:-${SLURM_PROCID}} +export NP=${OMPI_COMM_WORLD_SIZE:-${SLURM_NTASKS}} +export NN=${OMPI_MCA_orte_num_nodes:-${SLURM_NNODES}} +export NPN=$(( 1 + (NP-1)/ NN )) +export HN=$( hostname ) +export IG=$(( LIP / ( 1 + (NPN-1) / NB_DEVICE ) )) +export ROCR_VISIBLE_DEVICES=${GPUSID[${IG}]} +echo LIP=${LIP} IP=${IP} NP=${NP} NN=${NN} NPN=${NPN} NG=${NB_DEVICE} IG=${IG} GPU=${ROCR_VISIBLE_DEVICES} ${HN} `taskset -pc $$` + +exec $*