#!/bin/bash # mlnx_affinity, version 2.0 # This version is supported in MLNX_OFED-2.0.x driver verions only # # # Copyright (c) 2017 Mellanox Technologies. All rights reserved. # # This Software is licensed under one of the following licenses: # # 1) under the terms of the "Common Public License 1.0" a copy of which is # available from the Open Source Initiative, see # http://www.opensource.org/licenses/cpl.php. # # 2) under the terms of the "The BSD License" a copy of which is # available from the Open Source Initiative, see # http://www.opensource.org/licenses/bsd-license.php. # # 3) under the terms of the "GNU General Public License (GPL) Version 2" a # copy of which is available from the Open Source Initiative, see # http://www.opensource.org/licenses/gpl-license.php. # # Licensee has the right to choose one of the above licenses. # # Redistributions of source code must retain the above copyright # notice and one of the license notices. # # Redistributions in binary form must reproduce both the above copyright # notice, one of the license notices in the documentation # and/or other materials provided with the distribution. # ACTION=$1 ISSUE=$(cat /etc/issue | head -1) IRQBALANCER="" if [[ "$(echo $ISSUE | grep -i ubuntu)" != "" || "$(echo $ISSUE | grep -i debian)" != "" ]]; then echo "INFO - This utility does not support irqbalance control in $ISSUE" echo "INFO - irqbalance status will not change" else if [[ -f /bin/systemctl ]]; then IRQBALANCER=$( /bin/systemctl list-unit-files --type=service 2>/dev/null | grep irq | grep balance | cut -d " " -f 1 ) fi if [[ -z "$IRQBALANCER" && -f /sbin/chkconfig ]]; then IRQBALANCER=$( /sbin/chkconfig --list 2>/dev/null | grep irq | grep balance | cut -d " " -f 1 ) fi if [[ -z "$IRQBALANCER" ]]; then echo "INFO - irqbalancer not found" echo "INFO - irqbalance status will not change" fi fi LSCPU_CMD="cat /proc/cpuinfo" IBDEV2NETDEV="ibdev2netdev" IFCONFIG="ifconfig" NUMA_SUPPORT_PREFIX="/sys/class/net/" NUMA_SUPPORT_SUFFIX="/device/numa_node" PCI_SLOT_PREFIX="/sys/class/net/" FATHER_PCI_SLOT_SUFFIX="/device/../uevent" SET_IRQ_AFIINITY_BY_NODE="set_irq_affinity_bynode.sh" SET_IRQ_AFIINITY_ALL="set_irq_affinity.sh" ######################################################################### is_not_pty() { if [ "$CONSOLETYPE" = 'pty' ]; then return 1 fi case `tty` in pts*) return 1 ;; esac return 0 } is_serial() { if [ "$CONSOLETYPE" = 'serial' ]; then return 0 fi case `tty` in ttyS0) return 0 ;; esac return 1 } # Get a sane screen width [ -z "${COLUMNS:-}" ] && COLUMNS=80 if [ -f /etc/sysconfig/i18n -a -z "${NOLOCALE:-}" ] ; then . /etc/sysconfig/i18n if is_not_pty; then case "${LANG:-}" in ja_JP*|ko_KR*|zh_CN*|zh_TW*) export LC_MESSAGES=en_US ;; *) export LANG ;; esac else export LANG fi fi # Read in our configuration if [ -z "${BOOTUP:-}" ]; then if [ -f /etc/sysconfig/init ]; then . /etc/sysconfig/init else # This all seem confusing? Look in /etc/sysconfig/init, # or in /usr/doc/initscripts-*/sysconfig.txt BOOTUP=color RES_COL=60 MOVE_TO_COL="echo -en \\033[${RES_COL}G" SETCOLOR_SUCCESS="echo -en \\033[1;32m" SETCOLOR_FAILURE="echo -en \\033[1;31m" SETCOLOR_WARNING="echo -en \\033[1;33m" SETCOLOR_NORMAL="echo -en \\033[0;39m" LOGLEVEL=1 fi if is_serial; then BOOTUP=serial MOVE_TO_COL= SETCOLOR_SUCCESS= SETCOLOR_FAILURE= SETCOLOR_WARNING= SETCOLOR_NORMAL= fi fi if [ "${BOOTUP:-}" != "verbose" ]; then INITLOG_ARGS="-q" else INITLOG_ARGS= fi echo_success() { echo -n $@ [ "$BOOTUP" = "color" ] && $MOVE_TO_COL echo -n "[ " [ "$BOOTUP" = "color" ] && $SETCOLOR_SUCCESS echo -n $"OK" [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL echo -n " ]" echo -e "\r" return 0 } echo_done() { echo -n $@ [ "$BOOTUP" = "color" ] && $MOVE_TO_COL echo -n "[ " [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL echo -n $"done" [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL echo -n " ]" echo -e "\r" return 0 } echo_failure() { echo -n $@ [ "$BOOTUP" = "color" ] && $MOVE_TO_COL echo -n "[" [ "$BOOTUP" = "color" ] && $SETCOLOR_FAILURE echo -n $"FAILED" [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL echo -n "]" echo -e "\r" return 1 } echo_warning() { echo -n $@ [ "$BOOTUP" = "color" ] && $MOVE_TO_COL echo -n "[" [ "$BOOTUP" = "color" ] && $SETCOLOR_WARNING echo -n $"WARNING" [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL echo -n "]" echo -e "\r" return 1 } start() { echo -n "mlnx_affinity started" ECHO=0 #check if the machine arch is sandy bridge CPU_family=$($LSCPU_CMD | grep -i "CPU family" -m 1 | awk '{print $NF}' ) Model=$($LSCPU_CMD | grep -i "Model" -m 1 | awk '{print $NF}' ) if [ $CPU_family -eq 6 ] && [ $Model -eq 45 ] ; then is_sandy_bridge="true" else is_sandy_bridge="false" fi which numactl &> /dev/null if [ $? != 0 ]; then NUMA_NUMBER=$(lscpu | grep NUMA | grep CPU | wc -l) else NUMA_NUMBER=$(numactl --hardware | grep -i available | awk '{print $2}') fi if [ $IRQBALANCER ]; then /sbin/chkconfig $IRQBALANCER off if [ $? -gt 0 ]; then echo "Warning - can't disable $IRQBALANCER service" fi fi declare -a interfaces=( `$IBDEV2NETDEV | awk '{print $5 }'` ) declare -a interfaces_status=( `$IBDEV2NETDEV | awk '{print $6 }'` ) declare -a interfaces_numa=() #echo "Discovered Mellanox interfaces: ${interfaces[*]}" #echo ${interfaces_status[*]} for i in ${!interfaces[*]} ; do INTERFACE=${interfaces[$i]} INTERFACE_STATUS=${interfaces_status[$i]} if [ "$(ls $NUMA_SUPPORT_PREFIX$INTERFACE/device/ | grep numa_node )" == "numa_node" ]; then NUMA_NODE=`cat $NUMA_SUPPORT_PREFIX$INTERFACE$NUMA_SUPPORT_SUFFIX` else NUMA_NODE='-1' fi #set interface up ( if it's status is down) case "$INTERFACE_STATUS" in *"Down"*) #echo INFO - interface $INTERFACE status is down, running: $IFCONFIG $INTERFACE up $IFCONFIG $INTERFACE up ;; esac #check for BIOS support if [ $NUMA_NODE -eq -1 ] ; then #no BIOS support #echo "INFO - No numa node BIOS support for interface $INTERFACE , detecting noma node using pci device" BIOS_SUPPORT="false" else BIOS_SUPPORT="true" fi if [ "$BIOS_SUPPORT" = "true" ] ; then interfaces_numa[$i]=$NUMA_NODE continue else if [ "$is_sandy_bridge" = "false" ] ; then interfaces_numa[$i]=-1 #set_irq to all cores continue else #is father pci bus and slot is 0000:00 cat $PCI_SLOT_PREFIX$INTERFACE$FATHER_PCI_SLOT_SUFFIX > /dev/null 2>&1 if [ $? ]; then FATHER_PCI=$( ls -l $PCI_SLOT_PREFIX$INTERFACE/device | tr "/" " " | awk '{ print $NF}' | cut -b -7 ) else FATHER_PCI=$( cat $PCI_SLOT_PREFIX$INTERFACE$FATHER_PCI_SLOT_SUFFIX | grep PCI_SLOT_NAME | awk -F "=" '{print $2}' ) fi FATHER_PCI_BUS=`echo $FATHER_PCI | awk -F ":" '{print $1}'` FATHER_PCI_SLOT=`echo $FATHER_PCI | awk -F ":" '{print $2}'` if [ $FATHER_PCI_BUS = "0000" ] && [ $( printf "%d" 0x$FATHER_PCI_SLOT ) -lt 32 ] ; then interfaces_numa[$i]=0 elif [ $NUMA_NUMBER -eq 2 ] ; then #echo "INFO - setting interface $INTERFACE affinity to numa 1" interfaces_numa[$i]=1 else echo_warning echo "WARNING - can't detect numa node for interface $INTERFACE, setting affinity to node 1." interfaces_numa[$i]=1 ECHO=1 fi fi fi done #run set irq affinity scripts for i in ${!interfaces[*]} ; do INTERFACE=${interfaces[$i]} INTERFACE_NUMA=${interfaces_numa[$i]} if [ $INTERFACE_NUMA -eq -1 ] ; then rc=`$SET_IRQ_AFIINITY_ALL $INTERFACE` else rc=`$SET_IRQ_AFIINITY_BY_NODE $INTERFACE_NUMA $INTERFACE` fi done if [ !$ECHO ]; then echo_success fi } stop() { if [ $IRQBALANCER ]; then /sbin/chkconfig $IRQBALANCER on fi echo -n "mlnx_affinity stopped" echo_success } case $ACTION in start) start ;; stop) stop ;; restart) stop start ;; *) echo echo "Usage: `basename $0` {start|stop|restart}" echo exit 1 esac