HEX
Server: Apache/2.4.52 (Ubuntu)
System: Linux WebLive 5.15.0-79-generic #86-Ubuntu SMP Mon Jul 10 16:07:21 UTC 2023 x86_64
User: ubuntu (1000)
PHP: 7.4.33
Disabled: pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare,
Upload Files
File: //proc/thread-self/root/usr/local/qcloud/nv/taco_setup.sh
#!/bin/bash

########################################################################
#
# 	This script is used to set up execution environment for TACO Train,
#
# 		sudo bash taco_setup.sh
#
########################################################################

if [ "$EUID" -ne 0 ]
	then echo "ERROR: Please run this script as root!"
	exit
fi

OS=`awk -F= '/^NAME=/{print $2}' /etc/os-release`
SH_DIR="/usr/local/qcloud/nv"
LOG_PATH="/var/log/taco_installation.log"

# TODO: there might be more robust way to get gpu or nic counts
GPU_NUM=`lspci -d 10de: | wc -l`
NIC_NUM=`lspci | grep -i ethernet | wc -l`

log_exit()
{
	echo && echo "[$(date)] $1" | tee -a $LOG_PATH
	exit 1
}

log()
{
	echo && echo "[$(date)] $1" | tee -a $LOG_PATH
}

# Check whether assistant nics are attached and enough
check_environment()
{
	log "Check whether assistant nics are attached and enough"
	if [ $GPU_NUM -ge $NIC_NUM ]; then
		log_exit "nic is less than gpu card count [nic:$NIC_NUM vs. gpu:$GPU_NUM], exit"
	fi
}

# Reconfigure harp after each reboot
reconfigure_each_reboot()
{
	if [ ! -f $SH_DIR/harp_setup.sh ]; then
		if [ ! -d $SH_DIR ]; then
			mkdir -p $SH_DIR
		fi
		wget http://mirrors.tencent.com/install/GPU/taco/harp_setup.sh -P $SH_DIR >> /dev/null 2>&1
	fi
	
	cat /etc/rc.local | grep harp >> /dev/null 2>&1
	if [ $? -ne 0 ]; then
		log "Modify /etc/rc.local to auto re-configure harp after each reboot"
		echo "$SH_DIR/harp_setup.sh 2>&1" >> /etc/rc.local
	fi
}

enable_hugepages()
{
	cat /etc/default/grub | grep hugepages >> /dev/null 2>&1
	if [ $? -ne 0 ]; then
		hugepages=$(($GPU_NUM * 5 + 10))
		sed -i "/GRUB_CMDLINE_LINUX/ s/\"$/ default_hugepagesz=1GB hugepagesz=1GB hugepages=$hugepages\"/" /etc/default/grub
		log "Enable hugepages with size $hugepages"
		if [[ $OS == *"Ubuntu"* ]]; then
			sudo update-grub2
		else
			sudo grub2-mkconfig -o /boot/grub2/grub.cfg
		fi
	fi
}

log "========== Start configuring environment for TACO-Training ========== "

check_environment

reconfigure_each_reboot

bash $SH_DIR/harp_setup.sh

enable_hugepages