From 6d8cadeab8d98681f4fc22e8a66855c4593f9404 Mon Sep 17 00:00:00 2001 From: Juan ESCOBAR <juan.escobar@aero.obs-mip.fr> Date: Wed, 1 Feb 2023 11:42:03 +0100 Subject: [PATCH] Juan 01/02/2023:set_rocm_bind_device, new script for better bing GPU & CPU on ADASTRA --- bin/set_rocm_bind_device | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 bin/set_rocm_bind_device diff --git a/bin/set_rocm_bind_device b/bin/set_rocm_bind_device new file mode 100755 index 000000000..cea4b811a --- /dev/null +++ b/bin/set_rocm_bind_device @@ -0,0 +1,20 @@ +#!/bin/bash +#set -x + +GPUSID=(4 5 2 3 6 7 0 1) + +Gpuinfo='rocm-smi -i' +NB_DEVICE=${NB_DEVICE:-$( ${Gpuinfo} | grep GPU | wc -l )} +[ ${NB_DEVICE} -eq 0 ] && NB_DEVICE=1 + +export LIP=${OMPI_COMM_WORLD_LOCAL_RANK:-${SLURM_LOCALID}} +export IP=${OMPI_COMM_WORLD_RANK:-${SLURM_PROCID}} +export NP=${OMPI_COMM_WORLD_SIZE:-${SLURM_NTASKS}} +export NN=${OMPI_MCA_orte_num_nodes:-${SLURM_NNODES}} +export NPN=$(( 1 + (NP-1)/ NN )) +export HN=$( hostname ) +export IG=$(( LIP / ( 1 + (NPN-1) / NB_DEVICE ) )) +export ROCR_VISIBLE_DEVICES=${GPUSID[${IG}]} +echo LIP=${LIP} IP=${IP} NP=${NP} NN=${NN} NPN=${NPN} NG=${NB_DEVICE} IG=${IG} GPU=${ROCR_VISIBLE_DEVICES} ${HN} `taskset -pc $$` + +exec $* -- GitLab