From 88d26376ff5d4092390a3f629dedc7623aaf0e78 Mon Sep 17 00:00:00 2001
From: Juan ESCOBAR <juan.escobar@aero.obs-mip.fr>
Date: Fri, 16 Dec 2022 15:45:02 +0100
Subject: [PATCH] Juan 16/12/2022:add set_rocm_device & Rocprof tools

---
 bin/Rocprof         | 18 ++++++++++++++++++
 bin/set_rocm_device | 23 +++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100755 bin/Rocprof
 create mode 100755 bin/set_rocm_device

diff --git a/bin/Rocprof b/bin/Rocprof
new file mode 100755
index 000000000..c0bc6ab32
--- /dev/null
+++ b/bin/Rocprof
@@ -0,0 +1,18 @@
+#!/bin/bash
+if [[ -n ${OMPI_COMM_WORLD_RANK+z} ]]; then
+   # mpich
+   export MPI_RANK=${OMPI_COMM_WORLD_RANK}
+elif [[ -n ${MV2_COMM_WORLD_RANK+z} ]]; then
+   # ompi
+   export MPI_RANK=${MV2_COMM_WORLD_RANK}
+elif [[ -n ${SLURM_PROCID+z} ]]; then    
+   #srun
+   export MPI_RANK=${SLURM_PROCID}
+fi
+args="$*"
+pid="$$"
+outdir="dir_Rocprof-${SLURM_JOBID}"
+outfile="results_Rocprof-MNH${XYZ}_${NP}NP_${NG}NG_${NC}NC_${CG}CG.${MPI_RANK}IP.${SLURM_JOBID}"
+eval "rocprof -o ${outfile}.csv $*"
+rm -f ${outfile}.{db,json,sysinfo.txt}
+
diff --git a/bin/set_rocm_device b/bin/set_rocm_device
new file mode 100755
index 000000000..74833ee79
--- /dev/null
+++ b/bin/set_rocm_device
@@ -0,0 +1,23 @@
+#!/bin/bash
+#set -x
+
+Gpuinfo='rocm-smi -i'
+NB_DEVICE=${NB_DEVICE:-$( ${Gpuinfo} | grep GPU | wc -l )}
+[ ${NB_DEVICE} -eq 0 ] && NB_DEVICE=1
+
+export GPU_OFFSET=${GPU_OFFSET:-0}
+
+#[[ ${IP} -ge 1 ]] && IP=$(( IP +1 ))
+#[[ ${IP} -ge 2 ]] && IP=$(( IP +1 ))
+
+export LIP=${OMPI_COMM_WORLD_LOCAL_RANK:-${SLURM_LOCALID}}
+export IP=${OMPI_COMM_WORLD_RANK:-${SLURM_PROCID}}
+export NP=${OMPI_COMM_WORLD_SIZE:-${SLURM_NTASKS}}
+export NN=${OMPI_MCA_orte_num_nodes:-${SLURM_NNODES}}
+export NPN=$(( 1 + (NP-1)/ NN ))
+export HN=$( hostname )
+#export ROCR_VISIBLE_DEVICES=$(( IP % NB_DEVICE )) 
+export ROCR_VISIBLE_DEVICES=$(( GPU_OFFSET + LIP / ( 1 + (NPN-1) / NB_DEVICE ) )) 
+echo LIP=${LIP} IP=${IP} NP=${NP} NN=${NN} NPN=${NPN} NG=${NB_DEVICE} GPU=${ROCR_VISIBLE_DEVICES} ${HN}
+
+exec $*
-- 
GitLab