HEX
Server: Apache/2.4.52 (Ubuntu)
System: Linux WebLive 5.15.0-79-generic #86-Ubuntu SMP Mon Jul 10 16:07:21 UTC 2023 x86_64
User: ubuntu (1000)
PHP: 7.4.33
Disabled: pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare,
Upload Files
File: //usr/local/qcloud/irq/net_smp_affinity.sh
#!/bin/bash
##used to bind virtio-input interrupt to last cpu
export PATH=$PATH:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin
dir=$(cd `dirname $0`;pwd)

echo "--------------------------------------------"
date

get_highest_mask()
{
    cpu_nums=$1
    if [ $cpu_nums -gt 32 ]; then
        mask_tail=""
        mask_low32="00000000"
        idx=$((cpu_nums/32))
        cpu_reset=$((cpu_nums-idx*32))

        if [ $cpu_reset -eq 0 ]; then
            mask="80000000"
            for((i=2;i<=idx;i++))
            do
                mask="$mask,$mask_low32"
            done
        else
            for ((i=1;i<=idx;i++))
            do
                mask_tail="$mask_tail,$mask_low32"
            done
            mask_head_num=$((1<<(cpu_reset-1)))
            mask=`printf "%x%s" $mask_head_num $mask_tail`
        fi

    else
        mask_num=$((1<<(cpu_nums-1)))
        mask=`printf "%x" $mask_num`
    fi
    echo $mask
}

get_smp_affinity_mask()
{
    local cpuNums=$1

    if [ $cpuNums -gt $gCpuCount ]; then
        cpuNums=$(((cpuNums - 1) % gCpuCount + 1))
    fi

    if [ $gReverse == 1 ];then
        cpuNums=$((gCpuCount + 1 - cpuNums))
    fi

    get_highest_mask $cpuNums
}

input_irq_bind()
{
    local netQueueCount=`cat /proc/interrupts  | grep -i ".*virtio.*input.*" | wc -l`
    local irqSet=`cat /proc/interrupts  | grep -i ".*virtio.*input.*" | awk -F ':' '{print $1}'`
    if [ $((gCpuCount-2*netQueueCount)) -lt 0 ]; then
        i=0
        for irq in $irqSet
        do
            cpunum=$((i%gCpuCount+1))
            mask=`get_smp_affinity_mask $cpunum`
            echo $mask > /proc/irq/$irq/smp_affinity
            echo "[input]bind irq $irq with mask 0x$mask affinity"
            ((i++))
        done
    else
        if [ $gCpuCount -ge 32 ]; then
            cpunum=$((gCpuCount-1))
            for irq in $irqSet
            do
                echo $cpunum > /proc/irq/$irq/smp_affinity_list
                echo "[input]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
                cpunum=$(((cpunum-2) % gCpuCount))
            done
        else
            cpunum=0
            for irq in $irqSet
            do
                echo $cpunum > /proc/irq/$irq/smp_affinity_list
                echo "[input]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
                cpunum=$(((cpunum+2) % gCpuCount))
            done
        fi
    fi
}

output_irq_bind()
{
    local netQueueCount=`cat /proc/interrupts  | grep -i ".*virtio.*input.*" | wc -l`
    local irqSet=`cat /proc/interrupts  | grep -i ".*virtio.*output.*" | awk -F ':' '{print $1}'`
    if [ $((gCpuCount-2*netQueueCount)) -lt 0 ]; then
        i=0
        for irq in $irqSet
        do
            cpunum=$((i%gCpuCount+1))
            mask=`get_smp_affinity_mask $cpunum`
            echo $mask > /proc/irq/$irq/smp_affinity
            echo "[output]bind irq $irq with mask 0x$mask affinity"
            ((i++))
        done
    else
        if [ $gCpuCount -ge 32 ]; then
            cpunum=$((gCpuCount-1))
            for irq in $irqSet
            do
                echo $cpunum > /proc/irq/$irq/smp_affinity_list
                echo "[output]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
                cpunum=$(((cpunum-2) % gCpuCount))
            done
        else
            cpunum=0
            for irq in $irqSet
            do
                echo $cpunum > /proc/irq/$irq/smp_affinity_list
                echo "[output]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
                cpunum=$(((cpunum+2) % gCpuCount))
            done
        fi
    fi
}

declare -A aff_nodeoffset
declare -A aff_nodecpu
init_numa_support()
{
    numa_support=0
    aff=0

    numanr=`ls /sys/devices/system/node/ |grep node|wc -l`
    [ $? != 0 ] && echo "numa not support" && return

    numacpunr=$((gCpuCount/numanr))

    for((i=0;i<numanr;i++));do
        aff_nodeoffset[$i]=0
        aff_nodecpu[$i]=`ls /sys/devices/system/node/node$i/|grep cpu|grep -Po "\d+"|sort -n`
        [ $? != 0 ] && echo "numa not support" && return
    done

    numa_support=1
    echo "numa supported numa($numanr) cpu per numa($numacpunr)"
}

get_next_affinity_origin()
{
    aff=$(((aff+2) % gCpuCount))
}

cloudinit_softpass_last_pci_addr=0
cloudinit_softpass_last_numa_id=0
get_irq_numa()
{
    local irq=$1
    if [ -f /proc/irq/$irq/node -a `cat /proc/irq/$irq/node` != -1 ];then
        cat /proc/irq/$irq/node
        return
    else
        local irq_numa=0
        # AS a result of FPGA & Virt Teams' discuss:
        # The first nic is on numa $CLOUDINIT_SOFTPASS_NUMA
        # The second nic is on next numa. And so on.
        for msi_irqs in `find /sys/devices/ -name "*msi_irqs"`;do
            if [ -f $msi_irqs/$irq ];then
                [ "$cloudinit_softpass_last_pci_addr" == 0 ] && cloudinit_softpass_last_pci_addr=$msi_irqs
                if [ "$msi_irqs" == "$cloudinit_softpass_last_pci_addr" ];then
                    echo $cloudinit_softpass_last_numa_id 
                else
                    cloudinit_softpass_last_pci_addr=$msi_irqs
                    local numanr=`ls /sys/devices/system/node/ |grep node|wc -l`
                    cloudinit_softpass_last_numa_id=$(((cloudinit_softpass_last_numa_id + 1) % numanr))
                fi
            else
                continue
            fi
        done
        return
    fi
}

get_next_affinity()
{
    local irq=$1
    if [ $numa_support != 0 ];then
        [ ! -f /proc/irq/$irq/node ] && get_next_affinity_origin && return

        local irqnuma=$(get_irq_numa $irq)
        local irqnodeoffset=${aff_nodeoffset[${irqnuma}]}
        local forward=1
        [ $IS_VM == 0 ] && forward=2 # CVM always keep hyper-thread every 2 cores
        if [ $((irqnodeoffset+forward)) -ge $numacpunr ];then
            aff_nodeoffset[${irqnuma}]=0
        else
            aff_nodeoffset[${irqnuma}]=$((irqnodeoffset+forward))
        fi
        if [ $gReverse == 1 ];then
            irqnodeoffset=$((numacpunr-irqnodeoffset-forward))
        fi
        aff=`echo ${aff_nodecpu[${irqnuma}]}|awk -v iter=$((irqnodeoffset+1)) '{print $iter}'`
    else
        get_next_affinity_origin
    fi
}

input_irq_bind_numaopt()
{
    local netQueueCount=`cat /proc/interrupts  | grep -i ".*virtio.*input.*" | wc -l`
    local irqSet=`cat /proc/interrupts  | grep -i ".*virtio.*input.*" | awk -F ':' '{print $1}'`

    init_numa_support

    echo "shuishan host rss input config"
    for irq in $irqSet
    do
        get_next_affinity $irq
        echo $aff > /proc/irq/$irq/smp_affinity_list
        echo "[input]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
    done
}

output_irq_bind_numaopt()
{
    local netQueueCount=`cat /proc/interrupts  | grep -i ".*virtio.*input.*" | wc -l`
    local irqSet=`cat /proc/interrupts  | grep -i ".*virtio.*output.*" | awk -F ':' '{print $1}'`

    init_numa_support

    echo "shuishan host rss output config"
    for irq in $irqSet
    do
        get_next_affinity $irq
        echo $aff > /proc/irq/$irq/smp_affinity_list
        echo "[output]bind irq $irq with mask 0x$(cat /proc/irq/$irq/smp_affinity) affinity"
    done
}

ethConfig()
{
    ethSet=`ls -d /sys/class/net/eth*`
    if ! command -v ethtool &> /dev/null; then
        source /etc/profile
    fi

    ethtool=`which ethtool`

    for ethd in $ethSet
    do
        eth=`basename $ethd`
        pre_max=`$ethtool -l $eth 2>/dev/null | grep -i "combined" | head -n 1 | awk '{print $2}'`
        cur_max=`$ethtool -l $eth 2>/dev/null | grep -i "combined" | tail -n 1 | awk '{print $2}'`
        # if ethtool not work. we have to deal with this situation.
        [[ ! "$pre_max" =~ ^[0-9]+$ ]] || [[ ! "$cur_max" =~ ^[0-9]+$ ]] && continue

        if [ $pre_max -ne $cur_max ]; then
            $ethtool -L $eth combined $pre_max
            echo "Set [$eth] Current Combined to <$pre_max>"
        fi
    done
}

smartnic_bind()
{
    irqSet=`cat /proc/interrupts  | grep "LiquidIO.*rxtx" | awk -F ':' '{print $1}'`
    i=0
    for irq in $irqSet
    do
        cpunum=$((i%gCpuCount+1))
        mask=`get_smp_affinity_mask $cpunum`
        echo $mask > /proc/irq/$irq/smp_affinity
        echo "[smartnic]bind irq $irq with mask 0x$mask affinity"
        ((i++))
    done
}

# return 0: yes, I'm a kvm vm
# return 1: no, I'm not
is_kvm_vm()
{
    local ret1
    local ret2

    # For ARM, ARM device not modelname
    archtype=`lscpu  | grep Architecture | awk '{print $2}'`
    if [ $archtype == "aarch64" ];then
        sys_vendor=`cat /sys/class/dmi/id/sys_vendor`
        chassis_vendor=`cat /sys/class/dmi/id/chassis_vendor`

        if [[ $sys_vendor == "Tencent Cloud" ]] || [[ $chassis_vendor == "QEMU" ]];then
            return 0
        else
            return 1
        fi
    fi

    # Quirk for 2080ti:
    #    We added a kvm:off parameter in qemu command line.
    #    This parameter hidden all kvm related featrues in vm,
    #    such as cpu tag and kvm clock source.
    #
    #    We passthough 2080ti function 0(vga) to vm only,
    #    So if there is 2080ti vga without function 1(audio),
    #    We know it is in vm enviroment.
    local vga=$(lspci -d 10de:1e04)
    local audio=$(lspci -d 10de:10f7)

    if [ "$vga" != '' ] && [ "$audio" == '' ]; then
        return 0
    fi

    # For ARM CVM. Cpu model name would be masked.
    local modelname=$(cat /proc/cpuinfo |grep "model name"|awk -F':' '{print $2}'|uniq|head -n 1)
    local modelname="${modelname#"${modelname%%[![:space:]]*}"}"
    [ -z "$modelname" ] && return 0
    [ "$modelname" == "Virtual" ] && return 0

    lscpu 2>/dev/null | grep -i kvm | grep -i Hypervisor >/dev/null 2>&1
    ret1=$?

    cat /sys/devices/system/clocksource/clocksource0/available_clocksource 2>/dev/null  | grep -i kvm >/dev/null 2>&1
    ret2=$?

    if [ "$ret1" == "0" -o "$ret2" == "0" ];then
        return 0
    else
        return 1
    fi
}

# return 0: yes shuishan bm
# return 1: not shuishan
is_shuishan()
{
    virtionum=`cat /proc/interrupts  | grep -i ".*virtio.*input.*" | wc -l`
    if [ $virtionum == 0 ]; then
        return 1
    else
        return 0
    fi
}

set_vm_net_legacy_affinity()
{
    ps ax | grep -v grep | grep -q irqbalance && killall irqbalance 2>/dev/null
    cat /proc/interrupts  | grep "LiquidIO.*rxtx" &>/dev/null
    if [ $? -eq 0 ]; then #smartnic
        echo "SET VM RSS SMARTNIC"
        smartnic_bind
    else
        ethConfig
        local cpuarch=`lscpu|grep Architecture|awk '{print $2}'`
        if [ "$cpuarch" == "aarch64" -a\
             "$gCpuCount" == 120 -a\
             -f $dir/armvirt120c_set_irqaffinity.sh ];then #ARM64 120C spec#
            echo "SET VM RSS AARCH64 120C spec.."
            $dir/armvirt120c_set_irqaffinity.sh
            return 
        fi
        echo "SET VM RSS NORMAL"
        #X86 and ARM64 other#
        input_irq_bind
        output_irq_bind
    fi
}

set_eth_bm_net_affinity()
{
    if [ $gCpuCount -ge 32 ]; then
        aff=$((gCpuCount-1))
        for i in $(awk -F ":" '/eth0/{print $1}' /proc/interrupts)
        do
            echo $aff > /proc/irq/$i/smp_affinity_list
            echo "$aff > /proc/irq/$i/smp_affinity_list"
            aff=$((aff-1))
            [ $aff == 0 ] && aff=$((gCpuCount-1))
        done
    else
        aff=1
        for i in $(awk -F ":" '/eth0/{print $1}' /proc/interrupts)
        do
            echo $aff > /proc/irq/$i/smp_affinity
            echo "$aff > /proc/irq/$i/smp_affinity"
            aff=$(echo "ibase=16;obase=10;$aff*2"|bc)
        done
    fi
}

set_virtio_bm_net_affinity()
{
    # assume that bm cpu must larger than 32.
    gReverse=1
    input_irq_bind_numaopt
    output_irq_bind_numaopt
}

set_numaopt_vm_net_affinity()
{
    input_irq_bind_numaopt
    output_irq_bind_numaopt
}

set_bm_net_affinity()
{
    if [ $gCpuCount == 0 ];then
        echo cpunumber error
        return
    fi

    is_shuishan
    if [ $? -ne 0 ]; then
        ethConfig
        set_eth_bm_net_affinity
    else
        ethConfig
        set_virtio_bm_net_affinity
    fi
}

set_vm_net_affinity()
{
    if [ $CLOUDINIT_SOFTPASS_NUMA == -1 ];then
        set_vm_net_legacy_affinity
    else
        cloudinit_softpass_last_numa_id=$CLOUDINIT_SOFTPASS_NUMA
        set_numaopt_vm_net_affinity
    fi
}

reverse_irq_for_nvme()
{
    lspci -nn | grep -q "Non-Volatile memory controller"
    [ $? == 0 ] && gReverse=1 || gReverse=0
}


# return 0: yes sa3 and cpu >= 128
# return 1: not
is_sa3_cross_node()
{
    milan=0
    cross_node=0
    is_milan=`cat /proc/cpuinfo | awk '{FS=":";if (NR==5) {print $2}}' | grep "AMD EPYC 7K83 64-Core Processor"`
    cpu_num=`cat /proc/cpuinfo | grep processor |wc -l`

    echo "is_milan:$is_milan cpu_num:$cpu_num"

    if [ -n "${is_milan}" ];then
        milan=1
    fi

    if [ $cpu_num -ge 128 ];then
        cross_node=1;
    fi


    if [ $milan -eq 1 -a $cross_node -eq 1 ];then
        return 0
    else
        return 1
    fi
}

# qcloud_init.conf with nic_numa = 1/0 tag
CLOUDINIT_SOFTPASS_NUMA=-1
is_VM_cloudinit_softpass_numa()
{
    local default_numa_file=/usr/local/qcloud/qcloud_init.ini

    grep -q nic_numa $default_numa_file
    [ $? != 0 ] && return
    
    CLOUDINIT_SOFTPASS_NUMA=$(awk -F '=' '{if ($1 ~ /nic_numa/) print $2}' $default_numa_file|grep -Po "\d+")
    echo "Founding cloudinit config with nic_numa.. loading $CLOUDINIT_SOFTPASS_NUMA"
}

set_net_affinity()
{
    sa3_cross_node=0

    is_sa3_cross_node

    if [ $? -eq 0 ]; then
        sa3_cross_node=1
    fi

    if [ $gCpuCount -ge 32 -a $sa3_cross_node -ne 1 ]; then
        gReverse=1
    else
        reverse_irq_for_nvme
    fi
    is_kvm_vm
    IS_VM=$? 
    if [ $IS_VM -ne 0 ];then
        set_bm_net_affinity
    else
        set_vm_net_affinity
    fi

}

gCpuCount=`cat /proc/cpuinfo |grep processor |wc -l`
if [ $gCpuCount -eq 0 ] ;then
    echo "machine cpu count get error!"
    exit 0
elif [ $gCpuCount -eq 1 ]; then
    echo "machine only have one cpu, needn't set affinity for net interrupt"
    exit 0
fi

is_VM_cloudinit_softpass_numa
set_net_affinity