File: //proc/thread-self/root/usr/local/qcloud/nv/nv_cudnn_install.sh
#!/bin/bash
nvidia_devices=$(lspci -d 10de:)
if [ "$nvidia_devices" == "" ]; then
echo "make sure there is a nvidia pci device"
exit -1
fi
if [ $(id -u) != 0 ]; then
echo "must be root"
exit -1
fi
cd /usr/local/qcloud/
is_ubuntu=0
if [ `grep -i "Ubuntu" /etc/os-release >/dev/null 2>/dev/null; echo $?` -eq 0 ]; then
is_ubuntu=1
fi
# cuDNN v8.4.0 and later, the postfix is .xz
# like this: cudnn-linux-x86_64-8.8.0.121_cuda12-archive.tar.xz
cudnn_installer=$1
if [ "${cudnn_installer##*.}"x = "xz"x ] || [ "$is_ubuntu"x == "0"x ] ; then
wait_time=0
while [ $wait_time -lt 30 ]; do
wget "http://mirrors.tencentyun.com/install/GPU/$cudnn_installer" -O /tmp/$cudnn_installer
if [ $? == 0 ]; then
break;
fi
wait_time=$(($wait_time+1))
sleep 1
done
if [ ! -s "/tmp/$cudnn_installer" ]; then
echo "download cudnn file failed, please check the network"
exit -1
fi
mkdir -p /tmp/cuda
tar xvf /tmp/$cudnn_installer -C /tmp/cuda --strip-components 1 > /dev/null 2>&1
cp /tmp/cuda/include/cudnn* /usr/local/cuda/include
cp /tmp/cuda/lib*/libcudnn* /usr/local/cuda/lib64
chmod a+r /usr/local/cuda/include/cudnn* /usr/local/cuda/lib64/libcudnn*
echo "install nvidia cudnn finished"
rm -rf /tmp/$cudnn_installer
rm -rf /tmp/cuda
cat /usr/local/cuda/include/cudnn* | grep CUDNN_MAJOR -A 2 > /dev/null 2>&1 || { echo "install cudnn failed and exit."; exit 1;}
else
ubuntun_cudnn_runtime_installer=$1
ubuntun_cudnn_dev_installer=$2
ubuntun_cudnn_doc_installer=$3
wait_time=0
get_cudnn_runtime=0
get_cudnn_dev=0
get_cudnn_doc=0
while [ $wait_time -lt 10 ]; do
wget "http://mirrors.tencentyun.com/install/GPU/$ubuntun_cudnn_runtime_installer" -O /tmp/$ubuntun_cudnn_runtime_installer
if [ $? == 0 ]; then
get_cudnn_runtime=1
break;
fi
wait_time=$(($wait_time+1))
sleep 1
done
while [ $wait_time -lt 5 ]; do
wget "http://mirrors.tencentyun.com/install/GPU/$ubuntun_cudnn_dev_installer" -O /tmp/$ubuntun_cudnn_dev_installer
if [ $? == 0 ]; then
get_cudnn_dev=1
break;
fi
wait_time=$(($wait_time+1))
sleep 1
done
while [ $wait_time -lt 10 ]; do
wget "http://mirrors.tencentyun.com/install/GPU/$ubuntun_cudnn_doc_installer" -O /tmp/$ubuntun_cudnn_doc_installer
if [ $? == 0 ]; then
get_cudnn_doc=1
break;
fi
wait_time=$(($wait_time+1))
sleep 1
done
if [ x$get_cudnn_runtime != x1 ] || [ x$get_cudnn_dev != x1 ] || [ x$get_cudnn_doc != x1 ]; then
echo "download cudnn file failed, please check the input,url oand network"
exit -1
fi
dpkg -i /tmp/$ubuntun_cudnn_runtime_installer > /dev/null 2>&1
rm -rf /tmp/$ubuntun_cudnn_runtime_installer
dpkg -i /tmp/$ubuntun_cudnn_dev_installer > /dev/null 2>&1
rm -rf /tmp/$ubuntun_cudnn_dev_installer
dpkg -i /tmp/$ubuntun_cudnn_doc_installer > /dev/null 2>&1
rm -rf /tmp/$ubuntun_cudnn_doc_installer
echo "install nvidia cudnn finished"
cat /usr/include/cudnn* | grep CUDNN_MAJOR -A 2 > /dev/null 2>&1 || { echo "install cudnn failed and exit."; exit 1;}
fi
# restart barad
if [ -d /usr/local/qcloud/monitor/barad/ ]; then
if [ $(ps -ef | grep barad_agent | wc -l) -gt 1 ]; then
/usr/local/qcloud/monitor/barad/admin/stop.sh
/usr/local/qcloud/monitor/barad/admin/trystart.sh
echo "barad agent restarted"
fi
fi