From 8d7712c1d5cd146561aa6a9f329fe794e186455a Mon Sep 17 00:00:00 2001 From: ESCOBAR Juan <escj@nuwa> Date: Mon, 14 Sep 2020 16:00:12 +0200 Subject: [PATCH] Juan 14/09/2020: add script for binding core+gpu , with socket continu || pair/impair numbering --- bin/set_core_device | 51 ++++++++++++++++++++++++++++++++++++++ bin/set_core_device_impair | 51 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100755 bin/set_core_device create mode 100755 bin/set_core_device_impair diff --git a/bin/set_core_device b/bin/set_core_device new file mode 100755 index 000000000..0a3803bd9 --- /dev/null +++ b/bin/set_core_device @@ -0,0 +1,51 @@ +#!/bin/bash + +#set -e +#Numactl='numactl --physcpubind ' +Numactl='taskset -c ' + +if [[ "x${SLURM_HINT}" != *nomultithread* ]] +then +HYP_FAC=1 +else +HYP_FAC=1 +fi + +#NB_DEVICE=$( echo ${SLURM_STEP_GPUS//,/ } | wc -w ) +NB_DEVICE=$( nvidia-smi -L | grep GPU | wc -l ) +[ ${NB_DEVICE} -eq 0 ] && NB_DEVICE=1 + +export IP=${OMPI_COMM_WORLD_RANK:-${SLURM_PROCID}} +export LIP=${OMPI_COMM_WORLD_LOCAL_RANK:-${SLURM_LOCALID}} +export NP=${OMPI_COMM_WORLD_SIZE:-${SLURM_NTASKS}} +export NN=${OMPI_MCA_orte_num_nodes:-${SLURM_NNODES}} + +export NPN=$(( NP / NN )) +#export NB_HYP=${SLURM_CPUS_ON_NODE} +export NB_HYP=${SLURM_JOB_CPUS_PER_NODE/(*)/} +export NB_CORE=$(( ${NB_HYP} / HYP_FAC )) +export NPC=$(( NB_CORE / NPN )) +CORE=$(( LIP * NPC )) + +export HALF=$(( 1+ ( NPN -1 ) / 2 )) +export SOC=$(( 1 *( LIP / HALF ) )) +export RANK_SOC=$(( LIP % HALF )) +export CORE_IMP=$(( SOC + 2*NPC*RANK_SOC )) + +export SURBOOK=$(( NPN / NB_DEVICE )) + +if [ ${SURBOOK} == 0 ] +then +export ACC_DEVICE_NUM=$(( LIP * 2 )) +else +export ACC_DEVICE_NUM=$(( LIP / SURBOOK )) +fi + +echo IP=${IP} LIP=${LIP} NP=${NP} NN=${NN} NPN=${NPN} NPC=${NPC} HOST=`hostname` NB_CORE=${NB_CORE} CORE=${CORE} CORE_IMP=${CORE_IMP} HALF=$HALF SOC=$SOC RS=${RANK_SOC} ND=${NB_DEVICE} CD=${ACC_DEVICE_NUM} + +#EXEC=exec +${EXEC} ${Numactl} ${CORE} $* + +#exec $* +#$* + diff --git a/bin/set_core_device_impair b/bin/set_core_device_impair new file mode 100755 index 000000000..fa32268b8 --- /dev/null +++ b/bin/set_core_device_impair @@ -0,0 +1,51 @@ +#!/bin/bash + +#set -e +#Numactl='numactl --physcpubind ' +Numactl='taskset -c ' + +if [[ "x${SLURM_HINT}" != *nomultithread* ]] +then +HYP_FAC=1 +else +HYP_FAC=1 +fi + +#NB_DEVICE=$( echo ${SLURM_STEP_GPUS//,/ } | wc -w ) +NB_DEVICE=$( nvidia-smi -L | grep GPU | wc -l ) +[ ${NB_DEVICE} -eq 0 ] && NB_DEVICE=1 + +export IP=${OMPI_COMM_WORLD_RANK:-${SLURM_PROCID}} +export LIP=${OMPI_COMM_WORLD_LOCAL_RANK:-${SLURM_LOCALID}} +export NP=${OMPI_COMM_WORLD_SIZE:-${SLURM_NTASKS}} +export NN=${OMPI_MCA_orte_num_nodes:-${SLURM_NNODES}} + +export NPN=$(( NP / NN )) +#export NB_HYP=${SLURM_CPUS_ON_NODE} +export NB_HYP=${SLURM_JOB_CPUS_PER_NODE/(*)/} +export NB_CORE=$(( ${NB_HYP} / HYP_FAC )) +export NPC=$(( NB_CORE / NPN )) +CORE=$(( LIP * NPC )) + +export HALF=$(( 1+ ( NPN -1 ) / 2 )) +export SOC=$(( 1 *( LIP / HALF ) )) +export RANK_SOC=$(( LIP % HALF )) +export CORE_IMP=$(( SOC + 2*NPC*RANK_SOC )) + +export SURBOOK=$(( NPN / NB_DEVICE )) + +if [ ${SURBOOK} == 0 ] +then +export ACC_DEVICE_NUM=$(( LIP * 2 )) +else +export ACC_DEVICE_NUM=$(( LIP / SURBOOK )) +fi + +echo IP=${IP} LIP=${LIP} NP=${NP} NN=${NN} NPN=${NPN} NPC=${NPC} HOST=`hostname` NB_CORE=${NB_CORE} CORE=${CORE} CORE_IMP=${CORE_IMP} HALF=$HALF SOC=$SOC RS=${RANK_SOC} ND=${NB_DEVICE} CD=${ACC_DEVICE_NUM} + +#EXEC=exec +${EXEC} ${Numactl} ${CORE_IMP} $* + +#exec $* +#$* + -- GitLab