File: //usr/local/qcloud/irq/net_smp_affinity.sh
#!/bin/bash
##used to bind virtio-input interrupt to last cpu
export PATH=$PATH:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin
dir=$(cd `dirname $0`;pwd)
echo "--------------------------------------------"
date
get_highest_mask()
{
cpu_nums=$1
if [ $cpu_nums -gt 32 ]; then
mask_tail=""
mask_low32="00000000"
idx=$((cpu_nums/32))
cpu_reset=$((cpu_nums-idx*32))
if [ $cpu_reset -eq 0 ]; then
mask="80000000"
for((i=2;i<=idx;i++))
do
mask="$mask,$mask_low32"
done
else
for ((i=1;i<=idx;i++))
do
mask_tail="$mask_tail,$mask_low32"
done
mask_head_num=$((1<<(cpu_reset-1)))
mask=`printf "%x%s" $mask_head_num $mask_tail`
fi
else
mask_num=$((1<<(cpu_nums-1)))
mask=`printf "%x" $mask_num`
fi
echo $mask
}
get_smp_affinity_mask()
{
local cpuNums=$1
if [ $cpuNums -gt $gCpuCount ]; then
cpuNums=$(((cpuNums - 1) % gCpuCount + 1))
fi
if [ $gReverse == 1 ];then
cpuNums=$((gCpuCount + 1 - cpuNums))
fi
get_highest_mask $cpuNums
}
input_irq_bind()
{
local netQueueCount=`cat /proc/interrupts | grep -i ".*virtio.*input.*" | wc -l`
local irqSet=`cat /proc/interrupts | grep -i ".*virtio.*input.*" | awk -F ':' '{print $1}'`
if [ $((gCpuCount-2*netQueueCount)) -lt 0 ]; then
i=0
for irq in $irqSet
do
cpunum=$((i%gCpuCount+1))
mask=`get_smp_affinity_mask $cpunum`
echo $mask > /proc/irq/$irq/smp_affinity
echo "[input]bind irq $irq with mask 0x$mask affinity"
((i++))
done
else
if [ $gCpuCount -ge 32 ]; then
cpunum=$((gCpuCount-1))
for irq in $irqSet
do
echo $cpunum > /proc/irq/$irq/smp_affinity_list
echo "[input]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
cpunum=$(((cpunum-2) % gCpuCount))
done
else
cpunum=0
for irq in $irqSet
do
echo $cpunum > /proc/irq/$irq/smp_affinity_list
echo "[input]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
cpunum=$(((cpunum+2) % gCpuCount))
done
fi
fi
}
output_irq_bind()
{
local netQueueCount=`cat /proc/interrupts | grep -i ".*virtio.*input.*" | wc -l`
local irqSet=`cat /proc/interrupts | grep -i ".*virtio.*output.*" | awk -F ':' '{print $1}'`
if [ $((gCpuCount-2*netQueueCount)) -lt 0 ]; then
i=0
for irq in $irqSet
do
cpunum=$((i%gCpuCount+1))
mask=`get_smp_affinity_mask $cpunum`
echo $mask > /proc/irq/$irq/smp_affinity
echo "[output]bind irq $irq with mask 0x$mask affinity"
((i++))
done
else
if [ $gCpuCount -ge 32 ]; then
cpunum=$((gCpuCount-1))
for irq in $irqSet
do
echo $cpunum > /proc/irq/$irq/smp_affinity_list
echo "[output]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
cpunum=$(((cpunum-2) % gCpuCount))
done
else
cpunum=0
for irq in $irqSet
do
echo $cpunum > /proc/irq/$irq/smp_affinity_list
echo "[output]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
cpunum=$(((cpunum+2) % gCpuCount))
done
fi
fi
}
declare -A aff_nodeoffset
declare -A aff_nodecpu
init_numa_support()
{
numa_support=0
aff=0
numanr=`ls /sys/devices/system/node/ |grep node|wc -l`
[ $? != 0 ] && echo "numa not support" && return
numacpunr=$((gCpuCount/numanr))
for((i=0;i<numanr;i++));do
aff_nodeoffset[$i]=0
aff_nodecpu[$i]=`ls /sys/devices/system/node/node$i/|grep cpu|grep -Po "\d+"|sort -n`
[ $? != 0 ] && echo "numa not support" && return
done
numa_support=1
echo "numa supported numa($numanr) cpu per numa($numacpunr)"
}
get_next_affinity_origin()
{
aff=$(((aff+2) % gCpuCount))
}
cloudinit_softpass_last_pci_addr=0
cloudinit_softpass_last_numa_id=0
get_irq_numa()
{
local irq=$1
if [ -f /proc/irq/$irq/node -a `cat /proc/irq/$irq/node` != -1 ];then
cat /proc/irq/$irq/node
return
else
local irq_numa=0
# AS a result of FPGA & Virt Teams' discuss:
# The first nic is on numa $CLOUDINIT_SOFTPASS_NUMA
# The second nic is on next numa. And so on.
for msi_irqs in `find /sys/devices/ -name "*msi_irqs"`;do
if [ -f $msi_irqs/$irq ];then
[ "$cloudinit_softpass_last_pci_addr" == 0 ] && cloudinit_softpass_last_pci_addr=$msi_irqs
if [ "$msi_irqs" == "$cloudinit_softpass_last_pci_addr" ];then
echo $cloudinit_softpass_last_numa_id
else
cloudinit_softpass_last_pci_addr=$msi_irqs
local numanr=`ls /sys/devices/system/node/ |grep node|wc -l`
cloudinit_softpass_last_numa_id=$(((cloudinit_softpass_last_numa_id + 1) % numanr))
fi
else
continue
fi
done
return
fi
}
get_next_affinity()
{
local irq=$1
if [ $numa_support != 0 ];then
[ ! -f /proc/irq/$irq/node ] && get_next_affinity_origin && return
local irqnuma=$(get_irq_numa $irq)
local irqnodeoffset=${aff_nodeoffset[${irqnuma}]}
local forward=1
[ $IS_VM == 0 ] && forward=2 # CVM always keep hyper-thread every 2 cores
if [ $((irqnodeoffset+forward)) -ge $numacpunr ];then
aff_nodeoffset[${irqnuma}]=0
else
aff_nodeoffset[${irqnuma}]=$((irqnodeoffset+forward))
fi
if [ $gReverse == 1 ];then
irqnodeoffset=$((numacpunr-irqnodeoffset-forward))
fi
aff=`echo ${aff_nodecpu[${irqnuma}]}|awk -v iter=$((irqnodeoffset+1)) '{print $iter}'`
else
get_next_affinity_origin
fi
}
input_irq_bind_numaopt()
{
local netQueueCount=`cat /proc/interrupts | grep -i ".*virtio.*input.*" | wc -l`
local irqSet=`cat /proc/interrupts | grep -i ".*virtio.*input.*" | awk -F ':' '{print $1}'`
init_numa_support
echo "shuishan host rss input config"
for irq in $irqSet
do
get_next_affinity $irq
echo $aff > /proc/irq/$irq/smp_affinity_list
echo "[input]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
done
}
output_irq_bind_numaopt()
{
local netQueueCount=`cat /proc/interrupts | grep -i ".*virtio.*input.*" | wc -l`
local irqSet=`cat /proc/interrupts | grep -i ".*virtio.*output.*" | awk -F ':' '{print $1}'`
init_numa_support
echo "shuishan host rss output config"
for irq in $irqSet
do
get_next_affinity $irq
echo $aff > /proc/irq/$irq/smp_affinity_list
echo "[output]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
done
}
ethConfig()
{
ethSet=`ls -d /sys/class/net/eth*`
if ! command -v ethtool &> /dev/null; then
source /etc/profile
fi
ethtool=`which ethtool`
for ethd in $ethSet
do
eth=`basename $ethd`
pre_max=`$ethtool -l $eth 2>/dev/null | grep -i "combined" | head -n 1 | awk '{print $2}'`
cur_max=`$ethtool -l $eth 2>/dev/null | grep -i "combined" | tail -n 1 | awk '{print $2}'`
# if ethtool not work. we have to deal with this situation.
[[ ! "$pre_max" =~ ^[0-9]+$ ]] || [[ ! "$cur_max" =~ ^[0-9]+$ ]] && continue
if [ $pre_max -ne $cur_max ]; then
$ethtool -L $eth combined $pre_max
echo "Set [$eth] Current Combined to <$pre_max>"
fi
done
}
smartnic_bind()
{
irqSet=`cat /proc/interrupts | grep "LiquidIO.*rxtx" | awk -F ':' '{print $1}'`
i=0
for irq in $irqSet
do
cpunum=$((i%gCpuCount+1))
mask=`get_smp_affinity_mask $cpunum`
echo $mask > /proc/irq/$irq/smp_affinity
echo "[smartnic]bind irq $irq with mask 0x$mask affinity"
((i++))
done
}
# return 0: yes, I'm a kvm vm
# return 1: no, I'm not
is_kvm_vm()
{
local ret1
local ret2
# For ARM, ARM device not modelname
archtype=`lscpu | grep Architecture | awk '{print $2}'`
if [ $archtype == "aarch64" ];then
sys_vendor=`cat /sys/class/dmi/id/sys_vendor`
chassis_vendor=`cat /sys/class/dmi/id/chassis_vendor`
if [[ $sys_vendor == "Tencent Cloud" ]] || [[ $chassis_vendor == "QEMU" ]];then
return 0
else
return 1
fi
fi
# Quirk for 2080ti:
# We added a kvm:off parameter in qemu command line.
# This parameter hidden all kvm related featrues in vm,
# such as cpu tag and kvm clock source.
#
# We passthough 2080ti function 0(vga) to vm only,
# So if there is 2080ti vga without function 1(audio),
# We know it is in vm enviroment.
local vga=$(lspci -d 10de:1e04)
local audio=$(lspci -d 10de:10f7)
if [ "$vga" != '' ] && [ "$audio" == '' ]; then
return 0
fi
# For ARM CVM. Cpu model name would be masked.
local modelname=$(cat /proc/cpuinfo |grep "model name"|awk -F':' '{print $2}'|uniq|head -n 1)
local modelname="${modelname#"${modelname%%[![:space:]]*}"}"
[ -z "$modelname" ] && return 0
[ "$modelname" == "Virtual" ] && return 0
lscpu 2>/dev/null | grep -i kvm | grep -i Hypervisor >/dev/null 2>&1
ret1=$?
cat /sys/devices/system/clocksource/clocksource0/available_clocksource 2>/dev/null | grep -i kvm >/dev/null 2>&1
ret2=$?
if [ "$ret1" == "0" -o "$ret2" == "0" ];then
return 0
else
return 1
fi
}
# return 0: yes shuishan bm
# return 1: not shuishan
is_shuishan()
{
virtionum=`cat /proc/interrupts | grep -i ".*virtio.*input.*" | wc -l`
if [ $virtionum == 0 ]; then
return 1
else
return 0
fi
}
set_vm_net_legacy_affinity()
{
ps ax | grep -v grep | grep -q irqbalance && killall irqbalance 2>/dev/null
cat /proc/interrupts | grep "LiquidIO.*rxtx" &>/dev/null
if [ $? -eq 0 ]; then #smartnic
echo "SET VM RSS SMARTNIC"
smartnic_bind
else
ethConfig
local cpuarch=`lscpu|grep Architecture|awk '{print $2}'`
if [ "$cpuarch" == "aarch64" -a\
"$gCpuCount" == 120 -a\
-f $dir/armvirt120c_set_irqaffinity.sh ];then #ARM64 120C spec#
echo "SET VM RSS AARCH64 120C spec.."
$dir/armvirt120c_set_irqaffinity.sh
return
fi
echo "SET VM RSS NORMAL"
#X86 and ARM64 other#
input_irq_bind
output_irq_bind
fi
}
set_eth_bm_net_affinity()
{
if [ $gCpuCount -ge 32 ]; then
aff=$((gCpuCount-1))
for i in $(awk -F ":" '/eth0/{print $1}' /proc/interrupts)
do
echo $aff > /proc/irq/$i/smp_affinity_list
echo "$aff > /proc/irq/$i/smp_affinity_list"
aff=$((aff-1))
[ $aff == 0 ] && aff=$((gCpuCount-1))
done
else
aff=1
for i in $(awk -F ":" '/eth0/{print $1}' /proc/interrupts)
do
echo $aff > /proc/irq/$i/smp_affinity
echo "$aff > /proc/irq/$i/smp_affinity"
aff=$(echo "ibase=16;obase=10;$aff*2"|bc)
done
fi
}
set_virtio_bm_net_affinity()
{
# assume that bm cpu must larger than 32.
gReverse=1
input_irq_bind_numaopt
output_irq_bind_numaopt
}
set_numaopt_vm_net_affinity()
{
input_irq_bind_numaopt
output_irq_bind_numaopt
}
set_bm_net_affinity()
{
if [ $gCpuCount == 0 ];then
echo cpunumber error
return
fi
is_shuishan
if [ $? -ne 0 ]; then
ethConfig
set_eth_bm_net_affinity
else
ethConfig
set_virtio_bm_net_affinity
fi
}
set_vm_net_affinity()
{
if [ $CLOUDINIT_SOFTPASS_NUMA == -1 ];then
set_vm_net_legacy_affinity
else
cloudinit_softpass_last_numa_id=$CLOUDINIT_SOFTPASS_NUMA
set_numaopt_vm_net_affinity
fi
}
reverse_irq_for_nvme()
{
lspci -nn | grep -q "Non-Volatile memory controller"
[ $? == 0 ] && gReverse=1 || gReverse=0
}
# return 0: yes sa3 and cpu >= 128
# return 1: not
is_sa3_cross_node()
{
milan=0
cross_node=0
is_milan=`cat /proc/cpuinfo | awk '{FS=":";if (NR==5) {print $2}}' | grep "AMD EPYC 7K83 64-Core Processor"`
cpu_num=`cat /proc/cpuinfo | grep processor |wc -l`
echo "is_milan:$is_milan cpu_num:$cpu_num"
if [ -n "${is_milan}" ];then
milan=1
fi
if [ $cpu_num -ge 128 ];then
cross_node=1;
fi
if [ $milan -eq 1 -a $cross_node -eq 1 ];then
return 0
else
return 1
fi
}
# qcloud_init.conf with nic_numa = 1/0 tag
CLOUDINIT_SOFTPASS_NUMA=-1
is_VM_cloudinit_softpass_numa()
{
local default_numa_file=/usr/local/qcloud/qcloud_init.ini
grep -q nic_numa $default_numa_file
[ $? != 0 ] && return
CLOUDINIT_SOFTPASS_NUMA=$(awk -F '=' '{if ($1 ~ /nic_numa/) print $2}' $default_numa_file|grep -Po "\d+")
echo "Founding cloudinit config with nic_numa.. loading $CLOUDINIT_SOFTPASS_NUMA"
}
set_net_affinity()
{
sa3_cross_node=0
is_sa3_cross_node
if [ $? -eq 0 ]; then
sa3_cross_node=1
fi
if [ $gCpuCount -ge 32 -a $sa3_cross_node -ne 1 ]; then
gReverse=1
else
reverse_irq_for_nvme
fi
is_kvm_vm
IS_VM=$?
if [ $IS_VM -ne 0 ];then
set_bm_net_affinity
else
set_vm_net_affinity
fi
}
gCpuCount=`cat /proc/cpuinfo |grep processor |wc -l`
if [ $gCpuCount -eq 0 ] ;then
echo "machine cpu count get error!"
exit 0
elif [ $gCpuCount -eq 1 ]; then
echo "machine only have one cpu, needn't set affinity for net interrupt"
exit 0
fi
is_VM_cloudinit_softpass_numa
set_net_affinity