From 8d7712c1d5cd146561aa6a9f329fe794e186455a Mon Sep 17 00:00:00 2001
From: ESCOBAR Juan <escj@nuwa>
Date: Mon, 14 Sep 2020 16:00:12 +0200
Subject: [PATCH] Juan 14/09/2020: add script for binding core+gpu , with
 socket continu || pair/impair numbering

---
 bin/set_core_device        | 51 ++++++++++++++++++++++++++++++++++++++
 bin/set_core_device_impair | 51 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100755 bin/set_core_device
 create mode 100755 bin/set_core_device_impair

diff --git a/bin/set_core_device b/bin/set_core_device
new file mode 100755
index 000000000..0a3803bd9
--- /dev/null
+++ b/bin/set_core_device
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+#set -e
+#Numactl='numactl --physcpubind '
+Numactl='taskset -c '
+
+if [[ "x${SLURM_HINT}" != *nomultithread* ]]
+then
+HYP_FAC=1
+else
+HYP_FAC=1
+fi
+
+#NB_DEVICE=$( echo ${SLURM_STEP_GPUS//,/ } | wc -w )
+NB_DEVICE=$( nvidia-smi -L | grep GPU | wc -l )
+[ ${NB_DEVICE} -eq 0 ] && NB_DEVICE=1
+
+export IP=${OMPI_COMM_WORLD_RANK:-${SLURM_PROCID}}
+export LIP=${OMPI_COMM_WORLD_LOCAL_RANK:-${SLURM_LOCALID}}
+export NP=${OMPI_COMM_WORLD_SIZE:-${SLURM_NTASKS}}
+export NN=${OMPI_MCA_orte_num_nodes:-${SLURM_NNODES}}
+
+export NPN=$(( NP / NN ))
+#export NB_HYP=${SLURM_CPUS_ON_NODE}
+export NB_HYP=${SLURM_JOB_CPUS_PER_NODE/(*)/}
+export NB_CORE=$(( ${NB_HYP} / HYP_FAC ))
+export NPC=$(( NB_CORE / NPN ))
+CORE=$(( LIP * NPC ))
+
+export HALF=$(( 1+ ( NPN -1 ) / 2 ))
+export SOC=$(( 1 *( LIP / HALF ) ))
+export RANK_SOC=$(( LIP % HALF ))
+export CORE_IMP=$(( SOC + 2*NPC*RANK_SOC ))
+
+export SURBOOK=$(( NPN / NB_DEVICE ))
+
+if [ ${SURBOOK} == 0 ]
+then
+export ACC_DEVICE_NUM=$(( LIP * 2 ))
+else
+export ACC_DEVICE_NUM=$(( LIP / SURBOOK ))
+fi
+
+echo IP=${IP} LIP=${LIP} NP=${NP} NN=${NN} NPN=${NPN} NPC=${NPC} HOST=`hostname` NB_CORE=${NB_CORE} CORE=${CORE} CORE_IMP=${CORE_IMP} HALF=$HALF SOC=$SOC RS=${RANK_SOC} ND=${NB_DEVICE} CD=${ACC_DEVICE_NUM} 
+
+#EXEC=exec 
+${EXEC} ${Numactl} ${CORE} $*
+
+#exec $*
+#$*
+
diff --git a/bin/set_core_device_impair b/bin/set_core_device_impair
new file mode 100755
index 000000000..fa32268b8
--- /dev/null
+++ b/bin/set_core_device_impair
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+#set -e
+#Numactl='numactl --physcpubind '
+Numactl='taskset -c '
+
+if [[ "x${SLURM_HINT}" != *nomultithread* ]]
+then
+HYP_FAC=1
+else
+HYP_FAC=1
+fi
+
+#NB_DEVICE=$( echo ${SLURM_STEP_GPUS//,/ } | wc -w )
+NB_DEVICE=$( nvidia-smi -L | grep GPU | wc -l )
+[ ${NB_DEVICE} -eq 0 ] && NB_DEVICE=1
+
+export IP=${OMPI_COMM_WORLD_RANK:-${SLURM_PROCID}}
+export LIP=${OMPI_COMM_WORLD_LOCAL_RANK:-${SLURM_LOCALID}}
+export NP=${OMPI_COMM_WORLD_SIZE:-${SLURM_NTASKS}}
+export NN=${OMPI_MCA_orte_num_nodes:-${SLURM_NNODES}}
+
+export NPN=$(( NP / NN ))
+#export NB_HYP=${SLURM_CPUS_ON_NODE}
+export NB_HYP=${SLURM_JOB_CPUS_PER_NODE/(*)/}
+export NB_CORE=$(( ${NB_HYP} / HYP_FAC ))
+export NPC=$(( NB_CORE / NPN ))
+CORE=$(( LIP * NPC ))
+
+export HALF=$(( 1+ ( NPN -1 ) / 2 ))
+export SOC=$(( 1 *( LIP / HALF ) ))
+export RANK_SOC=$(( LIP % HALF ))
+export CORE_IMP=$(( SOC + 2*NPC*RANK_SOC ))
+
+export SURBOOK=$(( NPN / NB_DEVICE ))
+
+if [ ${SURBOOK} == 0 ]
+then
+export ACC_DEVICE_NUM=$(( LIP * 2 ))
+else
+export ACC_DEVICE_NUM=$(( LIP / SURBOOK ))
+fi
+
+echo IP=${IP} LIP=${LIP} NP=${NP} NN=${NN} NPN=${NPN} NPC=${NPC} HOST=`hostname` NB_CORE=${NB_CORE} CORE=${CORE} CORE_IMP=${CORE_IMP} HALF=$HALF SOC=$SOC RS=${RANK_SOC} ND=${NB_DEVICE} CD=${ACC_DEVICE_NUM} 
+
+#EXEC=exec 
+${EXEC} ${Numactl} ${CORE_IMP} $*
+
+#exec $*
+#$*
+
-- 
GitLab