给 mellanox bf2网卡刷镜像, 并测试 DPI URL-filter 场景
本文试图在BF2上配置DPI功能中的URL-filter场景,网络流量从bf2上经过以后,bf2的dpi芯片会分析网络包,并根据规则进行拦截。
实验的大体过程是,宿主机用rocky linux,用官方的固件(ubuntu)刷bf2卡,把bf2卡配置好。然后在宿主机上做点测试。
本文里面有一段,是如何在宿主机是rocky linux的情况下,给bf2卡刷官方的镜像
install host with rocky 8.5
我们先在宿主机上安装 rocky linux 8.5
# install rocky 8.5
export VAR_HOST='rl_panlab104'
# 按照完了操作系统以后,添加kernel参数,主要是intel_iommu=on iommu=pt,然后重启
cp /etc/default/grub /etc/default/grub.bak
sed -i "/GRUB_CMDLINE_LINUX/s/resume=[^[:space:]]*//" /etc/default/grub
sed -i "/GRUB_CMDLINE_LINUX/s/rd.lvm.lv=${VAR_HOST}\\/swap//" /etc/default/grub
# https://unix.stackexchange.com/questions/403706/sed-insert-text-after-nth-character-preceding-following-a-given-string
sed -i '/GRUB_CMDLINE_LINUX/s/"/ intel_iommu=on iommu=pt pci=realloc default_hugepagesz=1G hugepagesz=1G hugepages=16 rdblacklist=nouveau"/2' /etc/default/grub
grub2-mkconfig -o /boot/efi/EFI/rocky/grub.cfg
grub2-mkconfig -o /boot/grub2/grub.cfg
# 添加kvm cpu host mode模式的支持,可以不做
cat << EOF > /etc/modprobe.d/kvm-nested.conf
options kvm_intel nested=1
options kvm-intel enable_shadow_vmcs=1
options kvm-intel enable_apicv=1
options kvm-intel ept=1
EOF
# 默认的操作系统安装,有swap, home分区,我们是测试系统,全都删了吧。
umount /home
swapoff /dev/$VAR_HOST/swap
cp /etc/fstab /etc/fstab.bak
sed -i 's/^[^#]*home/#&/' /etc/fstab
sed -i 's/^[^#]*swap/#&/' /etc/fstab
lvremove -f /dev/$VAR_HOST/home
lvremove -f /dev/$VAR_HOST/swap
lvextend -l +100%FREE /dev/$VAR_HOST/root
xfs_growfs /dev/$VAR_HOST/root
# on 104
# first, is console
# https://www.mellanox.com/products/infiniband-drivers/linux/mlnx_ofed
dnf install -y epel-release
dnf install -y byobu htop
dnf groupinstall -y 'Development Tools'
dnf groupinstall -y "Server with GUI"
dnf config-manager --set-enabled powertools
# https://bugzilla.redhat.com/show_bug.cgi?id=1814682
dnf install -y kernel-modules-extra psmisc
mkdir -p /data/down/
cd /data/down/
# 接下来装一些bf2特殊的包,把bf2向主机暴露的串口设备给激活。
# https://docs.nvidia.com/doca/sdk/installation-guide/index.html
# wget https://developer.nvidia.com/networking/secure/doca-sdk/doca_1.2.0/doca_120_b215/rshim-2.0.6-3.ge329c69.el7.centos.x86_64.rpm
yum install -y rshim*.rpm
dnf install -y rshim expect wget minicom rpm-build lshw
systemctl enable --now rshim
systemctl status rshim --no-pager -l
dnf install -y openssl-devel
export http_proxy="http://192.168.195.54:5085"
export https_proxy=${http_proxy}
git clone https://github.com/Mellanox/mstflint
cd mstflint
./autogen.sh
./configure --disable-inband
make && make install
# 接下来,配置宿主机当作nat路由器,这样bf2上的操作系统,也能访问互联网了。
# nat router on host
# https://access.redhat.com/discussions/4642721
cat << EOF >> /etc/sysctl.d/99-wzh-sysctl.conf
net.ipv4.ip_forward = 1
EOF
sysctl --system
systemctl disable --now firewalld
# on host
cat << EOF >> /etc/rc.d/rc.local
iptables -t nat -A POSTROUTING -o eno2 -j MASQUERADE
EOF
chmod +x /etc/rc.d/rc.local
systemctl enable --now rc-local
flash bf2 with offical image
if you want to flash the bf2 to offical doca ubuntu image, follow steps here.
# on host
mkdir -p /data/soft
cd /data/soft
cat << EOF > pwd
panpan
EOF
cat << EOF > bf.cfg
ubuntu_PASSWORD='`openssl passwd -1 -in pwd`'
EOF
dnf install -y pv
# https://docs.nvidia.com/doca/sdk/installation-guide/index.html
bfb-install --bfb /data/down/DOCA_v1.2.0_BlueField_OS_Ubuntu_20.04-5.4.0-1022-bluefield-5.5-1.0.3.2-3.8.0.11969-1.signed-aarch64.bfb --config bf.cfg --rshim rshim0
# console=hvc0 console=ttyAMA0 earlycon=pl011,0x01000000 fixrtc quiet
# on host
# set ip address to connect to bf2
# nmcli conn add type tun mode tap con-name tmfifo_net0 ifname tmfifo_net0 autoconnect yes ip4 192.168.100.1
nmcli conn modify tmfifo_net0 ipv4.address 192.168.100.1/30
nmcli conn up tmfifo_net0
# if you want to connect to bf2 through serial console
minicom --color on --baudrate 115200 --device /dev/rshim0/console
# on bf2
# login using ubuntu / panpan
sudo -i
passwd
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
systemctl restart sshd
# set ip address to connect from host
cat << EOF > /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg
network: {config: disabled}
EOF
cat << EOF > /etc/netplan/50-netcfg-wzh.yaml
network:
ethernets:
oob_net0:
dhcp4: true
tmfifo_net0:
addresses:
- 192.168.100.2/30
dhcp4: false
nameservers:
addresses:
- 172.21.1.1
routes:
- metric: 1025
to: 0.0.0.0/0
via: 192.168.100.1
renderer: NetworkManager
version: 2
EOF
netplan apply
/etc/init.d/networking restart
# on host
# 接下来,就可以很舒适的从宿主机上ssh到bf2卡上了
ssh root@192.168.100.2
dpi url-filter test
https://docs.nvidia.com/doca/sdk/url-filter/index.html
我们参考官方文档,做dpi URL-Filter的测试。
# on bf2
cd /opt/mellanox/doca/examples/url_filter/bin
echo 2048 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
systemctl restart mlx-regex
systemctl status mlx-regex
# ● mlx-regex.service - Regex daemon for BlueField 2
# Loaded: loaded (/etc/systemd/system/mlx-regex.service; enabled; vendor preset: enabled)
# Active: active (running) since Thu 2021-12-16 11:47:01 UTC; 7s ago
# Main PID: 55816 (mlx-regex)
# Tasks: 1 (limit: 19083)
# Memory: 564.0K
# CGroup: /system.slice/mlx-regex.service
# └─55816 /usr/bin/mlx-regex
# Dec 16 11:47:01 localhost systemd[1]: Started Regex daemon for BlueField 2.
/opt/mellanox/iproute2/sbin/mlxdevm port show
# pci/0000:03:00.0/294912: type eth netdev en3f0pf0sf0 flavour pcisf controller 0 pfnum 0 sfnum 0
# function:
# hw_addr 02:56:ae:76:cd:e9 state active opstate attached roce true max_uc_macs 128 trust off
# pci/0000:03:00.1/360448: type eth netdev en3f1pf1sf0 flavour pcisf controller 0 pfnum 1 sfnum 0
# function:
# hw_addr 02:26:61:34:13:9e state active opstate attached roce true max_uc_macs 128 trust off
/opt/mellanox/iproute2/sbin/mlxdevm port add pci/0000:03:00.0 flavour pcisf pfnum 0 sfnum 4
/opt/mellanox/iproute2/sbin/mlxdevm port add pci/0000:03:00.0 flavour pcisf pfnum 0 sfnum 5
/opt/mellanox/iproute2/sbin/mlxdevm port show
# pci/0000:03:00.0/294912: type eth netdev en3f0pf0sf0 flavour pcisf controller 0 pfnum 0 sfnum 0
# function:
# hw_addr 02:56:ae:76:cd:e9 state active opstate attached roce true max_uc_macs 128 trust off
# pci/0000:03:00.0/294913: type eth netdev en3f0pf0sf4 flavour pcisf controller 0 pfnum 0 sfnum 4
# function:
# hw_addr 00:00:00:00:00:00 state inactive opstate detached roce true max_uc_macs 128 trust off
# pci/0000:03:00.0/294914: type eth netdev en3f0pf0sf5 flavour pcisf controller 0 pfnum 0 sfnum 5
# function:
# hw_addr 00:00:00:00:00:00 state inactive opstate detached roce true max_uc_macs 128 trust off
# pci/0000:03:00.1/360448: type eth netdev en3f1pf1sf0 flavour pcisf controller 0 pfnum 1 sfnum 0
# function:
# hw_addr 02:26:61:34:13:9e state active opstate attached roce true max_uc_macs 128 trust off
/opt/mellanox/iproute2/sbin/mlxdevm port function set pci/0000:03:00.0/294913 hw_addr 02:25:f2:8d:a2:4c trust on state active
/opt/mellanox/iproute2/sbin/mlxdevm port function set pci/0000:03:00.0/294914 hw_addr 02:25:f2:8d:a2:5c trust on state active
ovs-vsctl del-br ovsbr1
ovs-vsctl add-br sf_bridge1
ovs-vsctl add-br sf_bridge2
ovs-vsctl add-port sf_bridge1 p0
ovs-vsctl add-port sf_bridge1 en3f0pf0sf4
ovs-vsctl add-port sf_bridge2 pf0hpf
ovs-vsctl add-port sf_bridge2 en3f0pf0sf5
ovs-vsctl show
# 04d25b73-2f63-4e47-b7d9-2362cc4d7fda
# Bridge ovsbr2
# Port p1
# Interface p1
# Port en3f1pf1sf0
# Interface en3f1pf1sf0
# Port ovsbr2
# Interface ovsbr2
# type: internal
# Port pf1hpf
# Interface pf1hpf
# Bridge sf_bridge2
# Port sf_bridge2
# Interface sf_bridge2
# type: internal
# Port en3f0pf0sf5
# Interface en3f0pf0sf5
# Port pf0hpf
# Interface pf0hpf
# Bridge sf_bridge1
# Port sf_bridge1
# Interface sf_bridge1
# type: internal
# Port en3f0pf0sf4
# Interface en3f0pf0sf4
# Port p0
# Interface p0
# ovs_version: "2.15.1"
ifconfig en3f0pf0sf4 up
ifconfig en3f0pf0sf5 up
echo mlx5_core.sf.4 > /sys/bus/auxiliary/drivers/mlx5_core.sf_cfg/unbind
echo mlx5_core.sf.4 > /sys/bus/auxiliary/drivers/mlx5_core.sf/bind
echo mlx5_core.sf.5 > /sys/bus/auxiliary/drivers/mlx5_core.sf_cfg/unbind
echo mlx5_core.sf.5 > /sys/bus/auxiliary/drivers/mlx5_core.sf/bind
ls /sys/bus/auxiliary/devices/mlx5_core.sf.*
# /sys/bus/auxiliary/devices/mlx5_core.sf.2:
# driver infiniband infiniband_mad infiniband_verbs mlx5_core.eth.2 mlx5_core.rdma.2 net power sfnum subsystem uevent
# /sys/bus/auxiliary/devices/mlx5_core.sf.3:
# driver infiniband infiniband_mad infiniband_verbs mlx5_core.eth.3 mlx5_core.rdma.3 net power sfnum subsystem uevent
# /sys/bus/auxiliary/devices/mlx5_core.sf.4:
# driver infiniband infiniband_mad infiniband_verbs mlx5_core.eth.4 mlx5_core.rdma.4 net power sfnum subsystem uevent
# /sys/bus/auxiliary/devices/mlx5_core.sf.5:
# driver infiniband infiniband_mad infiniband_verbs mlx5_core.eth.5 mlx5_core.rdma.5 net power sfnum subsystem uevent
cat /sys/bus/auxiliary/devices/mlx5_core.sf.4/sfnum
# 4
# on 104 host with bf2
# nmcli con modify enp6s0f1 ipv4.method manual ipv4.addresses 192.168.99.11/24
nmcli con down enp6s0f1
nmcli con modify enp6s0f0 ipv4.method manual ipv4.addresses 192.168.99.11/24
nmcli con up enp6s0f0
# on 104 bf2
# 我们创建url filter规则。
/opt/mellanox/doca/examples/url_filter/bin/doca_url_filter -a 0000:03:00.0,class=regex -a auxiliary:mlx5_core.sf.4,sft_en=1 -a auxiliary:mlx5_core.sf.5,sft_en=1 -- -p
URL FILTER>> create database
URL FILTER>> filter http wzh_hits_msg wzhtest
URL FILTER>> commit database /tmp/signature.txt
# /tmp/104052/signatures.rules
# rules file is /tmp/104052/signatures.rules
# Info: Setting target hardware version to v5.7...done
# Info: Setting virtual prefix mode to 0...done
# Info: Setting prefix capacity to 32K...done
# Info: Setting compiler objective value to 5...done
# Info: Setting number of threads for compilation to 1...done
# Info: Reading ruleset...done
# Info: Detected 2 rules
# Info: Enabling global single-line mode...done
# Info: Setting maximum TPE data width to 4...done
# Info: Scanning rules...[==============================]...done
# Info: Analising possible prefix usage...[==============================]...done
# Info: Mapping prefixes, phase 1...[==============================]...done
# Info: Mapping prefixes, phase 2...[==============================]...done
# Info: Running rules analysis...[==============================]...done
# Info: Optimizing memory map...[==============================]...done
# Info: Analyzing memory map...[==============================]...done
# Info: Calculating thread instructions...[==============================]...done
# Info: Beginning to write memory map for ROF2...done
# Info: PPE total 1-byte prefix usage: 0/256 (0%)
# Info: PPE total 2-byte prefix usage: 0/2048 (0%)
# Info: PPE total 3-byte prefix usage: 0/2048 (0%)
# Info: PPE total 4-byte prefix usage: 1/32768 (0.00305176%)
# Info: TPE instruction RAM TCM partition usage: 2048/2048 (100%)
# Info: TPE instruction RAM external memory partition usage: 6207/13M (0.0455343%)
# Info: TPE class RAM usage: 1/256 (0.390625%)
# Info: Estimated threads/byte: 5.183e-10
# Info: Finalizing memory map for ROF2...done
# Info: Storing ROF2 data...done
# Info: Number of rules compiled = 2/2
# Info: Writing ROF2 file to /tmp/104052/rof/signatures_compiled.rof2
# Info: Writing binary ROF2 file to /tmp/104052/rof/signatures_compiled.rof2.binary...done
URL FILTER>> [12:36:50:606702][DOCA][I][UFLTR::Core]: SIG ID: 1, URL MSG: wzh_hits_msg, SFT_FID: 1
# on 101
curl http://192.168.99.11
# ....
# <footer class="col-sm-12">
# <a href="https://apache.org">Apache™</a> is a registered trademark of <a href="https://apache.org">the Apache Software Foundation</a> in the United States and/or other countries.<br />
# <a href="https://nginx.org">NGINX™</a> is a registered trademark of <a href="https://">F5 Networks, Inc.</a>.
# </footer>
# </body>
# </html>
curl http://192.168.99.11/test
# <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
# <html><head>
# <title>404 Not Found</title>
# </head><body>
# <h1>Not Found</h1>
# <p>The requested URL was not found on this server.</p>
# </body></html>
# 一下url命中了规则,可以看到访问不成功。
# 其他没有命中的规则,就可以访问http服务。
curl http://192.168.99.11/wzhtest
# curl: (56) Recv failure: Connection timed out
performance test
简单的测试一下性能,由于环境的物理设备条件所限,所以结果并不准确。
# on 104 host
dnf install -y iperf3
iperf3 -s -p 6666
# on 101 host
iperf3 -c 192.168.99.11 -p 6666
# Connecting to host 192.168.99.11, port 6666
# [ 5] local 192.168.99.21 port 37060 connected to 192.168.99.11 port 6666
# [ ID] Interval Transfer Bitrate Retr Cwnd
# [ 5] 0.00-1.00 sec 1.40 GBytes 12.1 Gbits/sec 17 905 KBytes
# [ 5] 1.00-2.00 sec 1.46 GBytes 12.6 Gbits/sec 26 795 KBytes
# [ 5] 2.00-3.00 sec 1.41 GBytes 12.1 Gbits/sec 71 922 KBytes
# [ 5] 3.00-4.00 sec 1.49 GBytes 12.8 Gbits/sec 0 998 KBytes
# [ 5] 4.00-5.00 sec 1.44 GBytes 12.4 Gbits/sec 44 1010 KBytes
# [ 5] 5.00-6.00 sec 1.34 GBytes 11.5 Gbits/sec 101 796 KBytes
# [ 5] 6.00-7.00 sec 1.45 GBytes 12.5 Gbits/sec 9 925 KBytes
# [ 5] 7.00-8.00 sec 1.39 GBytes 11.9 Gbits/sec 0 1014 KBytes
# [ 5] 8.00-9.00 sec 1.45 GBytes 12.4 Gbits/sec 62 930 KBytes
# [ 5] 9.00-10.00 sec 1.44 GBytes 12.3 Gbits/sec 157 1.07 MBytes
# - - - - - - - - - - - - - - - - - - - - - - - - -
# [ ID] Interval Transfer Bitrate Retr
# [ 5] 0.00-10.00 sec 14.3 GBytes 12.3 Gbits/sec 487 sender
# [ 5] 0.00-10.04 sec 14.3 GBytes 12.2 Gbits/sec receiver
# iperf Done.
ethtool enp5s0f1
# Settings for enp5s0f1:
# Supported ports: [ Backplane ]
# Supported link modes: 1000baseKX/Full
# 10000baseKR/Full
# 25000baseCR/Full
# 25000baseKR/Full
# 25000baseSR/Full
# Supported pause frame use: Symmetric
# Supports auto-negotiation: Yes
# Supported FEC modes: None RS BASER
# Advertised link modes: 1000baseKX/Full
# 10000baseKR/Full
# 25000baseCR/Full
# 25000baseKR/Full
# 25000baseSR/Full
# Advertised pause frame use: Symmetric
# Advertised auto-negotiation: Yes
# Advertised FEC modes: None RS BASER
# Link partner advertised link modes: Not reported
# Link partner advertised pause frame use: No
# Link partner advertised auto-negotiation: Yes
# Link partner advertised FEC modes: Not reported
# Speed: 25000Mb/s
# Duplex: Full
# Auto-negotiation: on
# Port: Direct Attach Copper
# PHYAD: 0
# Transceiver: internal
# Supports Wake-on: d
# Wake-on: d
# Current message level: 0x00000004 (4)
# link
# Link detected: yes
others
# firewall-cmd --permanent --direct --add-rule ipv4 nat POSTROUTING 0 -o eth_ext -j MASQUERADE
# firewall-cmd --permanent --direct --add-rule ipv4 filter FORWARD 0 -i eth_int -o eth_ext -j ACCEPT
# firewall-cmd --permanent --direct --add-rule ipv4 filter FORWARD 0 -i eth_ext -o eth_int -m state --state RELATED,ESTABLISHED -j ACCEPT
# firewall-cmd --permanent --add-port=80/tcp
# firewall-cmd --permanent --add-port=443/tcp
# firewall-cmd --permanent --add-port=53/tcp
# firewall-cmd --permanent --add-port=53/udp
# firewall-cmd --permanent --add-masquerade
# firewall-cmd --reload
# firewall-cmd --permanent --direct --remove-rule ipv4 nat POSTROUTING 0 -o eth_ext -j MASQUERADE
# firewall-cmd --permanent --direct --remove-rule ipv4 filter FORWARD 0 -i eth_int -o eth_ext -j ACCEPT
# firewall-cmd --permanent --direct --remove-rule ipv4 filter FORWARD 0 -i eth_ext -o eth_int -m state --state RELATED,ESTABLISHED -j ACCEPT
# firewall-cmd --permanent --remove-port=80/tcp
# firewall-cmd --permanent --remove-port=443/tcp
# firewall-cmd --permanent --remove-port=53/tcp
# firewall-cmd --permanent --remove-port=53/udp
# firewall-cmd --permanent --remove-masquerade
# firewall-cmd --reload