给 mellanox bf2网卡刷镜像, 并测试 DPI URL-filter 场景

本文试图在BF2上配置DPI功能中的URL-filter场景,网络流量从bf2上经过以后,bf2的dpi芯片会分析网络包,并根据规则进行拦截。

实验的大体过程是,宿主机用rocky linux,用官方的固件(ubuntu)刷bf2卡,把bf2卡配置好。然后在宿主机上做点测试。

本文里面有一段,是如何在宿主机是rocky linux的情况下,给bf2卡刷官方的镜像

install host with rocky 8.5

我们先在宿主机上安装 rocky linux 8.5

# install rocky 8.5

export VAR_HOST='rl_panlab104'

# 按照完了操作系统以后,添加kernel参数,主要是intel_iommu=on iommu=pt,然后重启
cp /etc/default/grub /etc/default/grub.bak
sed -i "/GRUB_CMDLINE_LINUX/s/resume=[^[:space:]]*//"  /etc/default/grub
sed -i "/GRUB_CMDLINE_LINUX/s/rd.lvm.lv=${VAR_HOST}\\/swap//"  /etc/default/grub
# https://unix.stackexchange.com/questions/403706/sed-insert-text-after-nth-character-preceding-following-a-given-string
sed -i '/GRUB_CMDLINE_LINUX/s/"/ intel_iommu=on iommu=pt pci=realloc  default_hugepagesz=1G hugepagesz=1G hugepages=16 rdblacklist=nouveau"/2' /etc/default/grub

grub2-mkconfig -o /boot/efi/EFI/rocky/grub.cfg

grub2-mkconfig -o /boot/grub2/grub.cfg

# 添加kvm cpu host mode模式的支持,可以不做
cat << EOF > /etc/modprobe.d/kvm-nested.conf
options kvm_intel nested=1  
options kvm-intel enable_shadow_vmcs=1   
options kvm-intel enable_apicv=1         
options kvm-intel ept=1                  
EOF

# 默认的操作系统安装,有swap, home分区,我们是测试系统,全都删了吧。
umount /home
swapoff  /dev/$VAR_HOST/swap

cp /etc/fstab /etc/fstab.bak
sed -i 's/^[^#]*home/#&/' /etc/fstab
sed -i 's/^[^#]*swap/#&/' /etc/fstab

lvremove -f /dev/$VAR_HOST/home
lvremove -f /dev/$VAR_HOST/swap

lvextend -l +100%FREE /dev/$VAR_HOST/root
xfs_growfs /dev/$VAR_HOST/root

# on 104
# first, is console
# https://www.mellanox.com/products/infiniband-drivers/linux/mlnx_ofed

dnf install -y epel-release
dnf install -y byobu htop
dnf groupinstall -y 'Development Tools'
dnf groupinstall -y "Server with GUI"
dnf config-manager --set-enabled powertools

# https://bugzilla.redhat.com/show_bug.cgi?id=1814682
dnf install -y kernel-modules-extra psmisc

mkdir -p /data/down/
cd /data/down/

# 接下来装一些bf2特殊的包,把bf2向主机暴露的串口设备给激活。
# https://docs.nvidia.com/doca/sdk/installation-guide/index.html
# wget https://developer.nvidia.com/networking/secure/doca-sdk/doca_1.2.0/doca_120_b215/rshim-2.0.6-3.ge329c69.el7.centos.x86_64.rpm
yum install -y rshim*.rpm

dnf install -y rshim expect wget minicom rpm-build lshw
systemctl enable --now rshim
systemctl status rshim --no-pager -l
dnf install -y openssl-devel

export http_proxy="http://192.168.195.54:5085"
export https_proxy=${http_proxy}

git clone https://github.com/Mellanox/mstflint
cd mstflint
./autogen.sh
./configure --disable-inband
make && make install

# 接下来,配置宿主机当作nat路由器,这样bf2上的操作系统,也能访问互联网了。
# nat router on host
# https://access.redhat.com/discussions/4642721
cat << EOF >> /etc/sysctl.d/99-wzh-sysctl.conf

net.ipv4.ip_forward = 1

EOF
sysctl --system

systemctl disable --now firewalld

# on host
cat << EOF >> /etc/rc.d/rc.local

iptables -t nat -A POSTROUTING -o eno2 -j MASQUERADE

EOF
chmod +x /etc/rc.d/rc.local
systemctl enable --now rc-local

flash bf2 with offical image

if you want to flash the bf2 to offical doca ubuntu image, follow steps here.

# on host
mkdir -p /data/soft
cd /data/soft

cat << EOF > pwd
panpan
EOF

cat << EOF > bf.cfg
ubuntu_PASSWORD='`openssl passwd -1 -in pwd`'
EOF

dnf install -y pv

# https://docs.nvidia.com/doca/sdk/installation-guide/index.html
bfb-install --bfb /data/down/DOCA_v1.2.0_BlueField_OS_Ubuntu_20.04-5.4.0-1022-bluefield-5.5-1.0.3.2-3.8.0.11969-1.signed-aarch64.bfb --config bf.cfg --rshim rshim0

# console=hvc0 console=ttyAMA0 earlycon=pl011,0x01000000 fixrtc quiet

# on host
# set ip address to connect to bf2
# nmcli conn add type tun mode tap con-name tmfifo_net0 ifname tmfifo_net0 autoconnect yes ip4 192.168.100.1
nmcli conn modify tmfifo_net0 ipv4.address 192.168.100.1/30
nmcli conn up tmfifo_net0
# if you want to connect to bf2 through serial console
minicom --color on --baudrate 115200 --device /dev/rshim0/console

# on bf2
# login using ubuntu / panpan

sudo -i
passwd

sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
systemctl restart sshd

# set ip address to connect from host
cat << EOF > /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg
network: {config: disabled}
EOF
cat << EOF > /etc/netplan/50-netcfg-wzh.yaml
network:
    ethernets:
        oob_net0:
            dhcp4: true
        tmfifo_net0:
            addresses:
            - 192.168.100.2/30
            dhcp4: false
            nameservers:
                addresses:
                - 172.21.1.1
            routes:
            -   metric: 1025
                to: 0.0.0.0/0
                via: 192.168.100.1
    renderer: NetworkManager
    version: 2
EOF
netplan apply
/etc/init.d/networking restart

# on host
# 接下来,就可以很舒适的从宿主机上ssh到bf2卡上了
ssh root@192.168.100.2

dpi url-filter test

https://docs.nvidia.com/doca/sdk/url-filter/index.html

我们参考官方文档,做dpi URL-Filter的测试。

# on bf2
cd /opt/mellanox/doca/examples/url_filter/bin

echo 2048 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
systemctl restart mlx-regex
systemctl status mlx-regex
# ● mlx-regex.service - Regex daemon for BlueField 2
#      Loaded: loaded (/etc/systemd/system/mlx-regex.service; enabled; vendor preset: enabled)
#      Active: active (running) since Thu 2021-12-16 11:47:01 UTC; 7s ago
#    Main PID: 55816 (mlx-regex)
#       Tasks: 1 (limit: 19083)
#      Memory: 564.0K
#      CGroup: /system.slice/mlx-regex.service
#              └─55816 /usr/bin/mlx-regex

# Dec 16 11:47:01 localhost systemd[1]: Started Regex daemon for BlueField 2.

/opt/mellanox/iproute2/sbin/mlxdevm port show
# pci/0000:03:00.0/294912: type eth netdev en3f0pf0sf0 flavour pcisf controller 0 pfnum 0 sfnum 0
#   function:
#     hw_addr 02:56:ae:76:cd:e9 state active opstate attached roce true max_uc_macs 128 trust off
# pci/0000:03:00.1/360448: type eth netdev en3f1pf1sf0 flavour pcisf controller 0 pfnum 1 sfnum 0
#   function:
#     hw_addr 02:26:61:34:13:9e state active opstate attached roce true max_uc_macs 128 trust off

/opt/mellanox/iproute2/sbin/mlxdevm port add pci/0000:03:00.0 flavour pcisf pfnum 0 sfnum 4
/opt/mellanox/iproute2/sbin/mlxdevm port add pci/0000:03:00.0 flavour pcisf pfnum 0 sfnum 5 

/opt/mellanox/iproute2/sbin/mlxdevm port show
# pci/0000:03:00.0/294912: type eth netdev en3f0pf0sf0 flavour pcisf controller 0 pfnum 0 sfnum 0
#   function:
#     hw_addr 02:56:ae:76:cd:e9 state active opstate attached roce true max_uc_macs 128 trust off
# pci/0000:03:00.0/294913: type eth netdev en3f0pf0sf4 flavour pcisf controller 0 pfnum 0 sfnum 4
#   function:
#     hw_addr 00:00:00:00:00:00 state inactive opstate detached roce true max_uc_macs 128 trust off
# pci/0000:03:00.0/294914: type eth netdev en3f0pf0sf5 flavour pcisf controller 0 pfnum 0 sfnum 5
#   function:
#     hw_addr 00:00:00:00:00:00 state inactive opstate detached roce true max_uc_macs 128 trust off
# pci/0000:03:00.1/360448: type eth netdev en3f1pf1sf0 flavour pcisf controller 0 pfnum 1 sfnum 0
#   function:
#     hw_addr 02:26:61:34:13:9e state active opstate attached roce true max_uc_macs 128 trust off

/opt/mellanox/iproute2/sbin/mlxdevm port function set pci/0000:03:00.0/294913 hw_addr 02:25:f2:8d:a2:4c trust on state active
/opt/mellanox/iproute2/sbin/mlxdevm port function set pci/0000:03:00.0/294914 hw_addr 02:25:f2:8d:a2:5c trust on state active

ovs-vsctl del-br ovsbr1 

ovs-vsctl add-br sf_bridge1
ovs-vsctl add-br sf_bridge2
ovs-vsctl add-port sf_bridge1 p0
ovs-vsctl add-port sf_bridge1 en3f0pf0sf4
ovs-vsctl add-port sf_bridge2 pf0hpf
ovs-vsctl add-port sf_bridge2 en3f0pf0sf5 

ovs-vsctl show
# 04d25b73-2f63-4e47-b7d9-2362cc4d7fda
#     Bridge ovsbr2
#         Port p1
#             Interface p1
#         Port en3f1pf1sf0
#             Interface en3f1pf1sf0
#         Port ovsbr2
#             Interface ovsbr2
#                 type: internal
#         Port pf1hpf
#             Interface pf1hpf
#     Bridge sf_bridge2
#         Port sf_bridge2
#             Interface sf_bridge2
#                 type: internal
#         Port en3f0pf0sf5
#             Interface en3f0pf0sf5
#         Port pf0hpf
#             Interface pf0hpf
#     Bridge sf_bridge1
#         Port sf_bridge1
#             Interface sf_bridge1
#                 type: internal
#         Port en3f0pf0sf4
#             Interface en3f0pf0sf4
#         Port p0
#             Interface p0
#     ovs_version: "2.15.1"

ifconfig en3f0pf0sf4 up
ifconfig en3f0pf0sf5 up

echo mlx5_core.sf.4  > /sys/bus/auxiliary/drivers/mlx5_core.sf_cfg/unbind
echo mlx5_core.sf.4  > /sys/bus/auxiliary/drivers/mlx5_core.sf/bind
echo mlx5_core.sf.5  > /sys/bus/auxiliary/drivers/mlx5_core.sf_cfg/unbind
echo mlx5_core.sf.5  > /sys/bus/auxiliary/drivers/mlx5_core.sf/bind

ls /sys/bus/auxiliary/devices/mlx5_core.sf.*
# /sys/bus/auxiliary/devices/mlx5_core.sf.2:
# driver  infiniband  infiniband_mad  infiniband_verbs  mlx5_core.eth.2  mlx5_core.rdma.2  net  power  sfnum  subsystem  uevent

# /sys/bus/auxiliary/devices/mlx5_core.sf.3:
# driver  infiniband  infiniband_mad  infiniband_verbs  mlx5_core.eth.3  mlx5_core.rdma.3  net  power  sfnum  subsystem  uevent

# /sys/bus/auxiliary/devices/mlx5_core.sf.4:
# driver  infiniband  infiniband_mad  infiniband_verbs  mlx5_core.eth.4  mlx5_core.rdma.4  net  power  sfnum  subsystem  uevent

# /sys/bus/auxiliary/devices/mlx5_core.sf.5:
# driver  infiniband  infiniband_mad  infiniband_verbs  mlx5_core.eth.5  mlx5_core.rdma.5  net  power  sfnum  subsystem  uevent

cat /sys/bus/auxiliary/devices/mlx5_core.sf.4/sfnum
# 4

# on 104 host with bf2
# nmcli con modify enp6s0f1 ipv4.method manual ipv4.addresses 192.168.99.11/24
nmcli con down enp6s0f1
nmcli con modify enp6s0f0 ipv4.method manual ipv4.addresses 192.168.99.11/24
nmcli con up enp6s0f0

# on 104 bf2
# 我们创建url filter规则。
/opt/mellanox/doca/examples/url_filter/bin/doca_url_filter -a 0000:03:00.0,class=regex -a auxiliary:mlx5_core.sf.4,sft_en=1 -a auxiliary:mlx5_core.sf.5,sft_en=1 -- -p
URL FILTER>> create database
URL FILTER>> filter http wzh_hits_msg wzhtest
URL FILTER>> commit database /tmp/signature.txt
# /tmp/104052/signatures.rules
# rules file is /tmp/104052/signatures.rules
# Info: Setting target hardware version to v5.7...done
# Info: Setting virtual prefix mode to 0...done
# Info: Setting prefix capacity to 32K...done
# Info: Setting compiler objective value to 5...done
# Info: Setting number of threads for compilation to 1...done
# Info: Reading ruleset...done
# Info: Detected 2 rules
# Info: Enabling global single-line mode...done
# Info: Setting maximum TPE data width to 4...done
# Info: Scanning rules...[==============================]...done
# Info: Analising possible prefix usage...[==============================]...done
# Info: Mapping prefixes, phase 1...[==============================]...done
# Info: Mapping prefixes, phase 2...[==============================]...done
# Info: Running rules analysis...[==============================]...done
# Info: Optimizing memory map...[==============================]...done
# Info: Analyzing memory map...[==============================]...done
# Info: Calculating thread instructions...[==============================]...done
# Info: Beginning to write memory map for ROF2...done
# Info: PPE total 1-byte prefix usage: 0/256 (0%)
# Info: PPE total 2-byte prefix usage: 0/2048 (0%)
# Info: PPE total 3-byte prefix usage: 0/2048 (0%)
# Info: PPE total 4-byte prefix usage: 1/32768 (0.00305176%)
# Info: TPE instruction RAM TCM partition usage: 2048/2048 (100%)
# Info: TPE instruction RAM external memory partition usage: 6207/13M (0.0455343%)
# Info: TPE class RAM usage: 1/256 (0.390625%)
# Info: Estimated threads/byte: 5.183e-10
# Info: Finalizing memory map for ROF2...done
# Info: Storing ROF2 data...done
# Info: Number of rules compiled = 2/2
# Info: Writing ROF2 file to /tmp/104052/rof/signatures_compiled.rof2
# Info: Writing binary ROF2 file to /tmp/104052/rof/signatures_compiled.rof2.binary...done
URL FILTER>> [12:36:50:606702][DOCA][I][UFLTR::Core]: SIG ID: 1, URL MSG: wzh_hits_msg, SFT_FID: 1

# on 101
curl http://192.168.99.11
# ....
#       <footer class="col-sm-12">
#       <a href="https://apache.org">Apache&trade;</a> is a registered trademark of <a href="https://apache.org">the Apache Software Foundation</a> in the United States and/or other countries.<br />
#       <a href="https://nginx.org">NGINX&trade;</a> is a registered trademark of <a href="https://">F5 Networks, Inc.</a>.
#       </footer>

#   </body>
# </html>

curl http://192.168.99.11/test
# <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
# <html><head>
# <title>404 Not Found</title>
# </head><body>
# <h1>Not Found</h1>
# <p>The requested URL was not found on this server.</p>
# </body></html>

# 一下url命中了规则,可以看到访问不成功。
# 其他没有命中的规则,就可以访问http服务。
curl http://192.168.99.11/wzhtest
# curl: (56) Recv failure: Connection timed out

performance test

简单的测试一下性能,由于环境的物理设备条件所限,所以结果并不准确。

# on 104 host
dnf install -y iperf3
iperf3 -s -p 6666

# on 101 host
iperf3 -c 192.168.99.11 -p 6666
# Connecting to host 192.168.99.11, port 6666
# [  5] local 192.168.99.21 port 37060 connected to 192.168.99.11 port 6666
# [ ID] Interval           Transfer     Bitrate         Retr  Cwnd
# [  5]   0.00-1.00   sec  1.40 GBytes  12.1 Gbits/sec   17    905 KBytes
# [  5]   1.00-2.00   sec  1.46 GBytes  12.6 Gbits/sec   26    795 KBytes
# [  5]   2.00-3.00   sec  1.41 GBytes  12.1 Gbits/sec   71    922 KBytes
# [  5]   3.00-4.00   sec  1.49 GBytes  12.8 Gbits/sec    0    998 KBytes
# [  5]   4.00-5.00   sec  1.44 GBytes  12.4 Gbits/sec   44   1010 KBytes
# [  5]   5.00-6.00   sec  1.34 GBytes  11.5 Gbits/sec  101    796 KBytes
# [  5]   6.00-7.00   sec  1.45 GBytes  12.5 Gbits/sec    9    925 KBytes
# [  5]   7.00-8.00   sec  1.39 GBytes  11.9 Gbits/sec    0   1014 KBytes
# [  5]   8.00-9.00   sec  1.45 GBytes  12.4 Gbits/sec   62    930 KBytes
# [  5]   9.00-10.00  sec  1.44 GBytes  12.3 Gbits/sec  157   1.07 MBytes
# - - - - - - - - - - - - - - - - - - - - - - - - -
# [ ID] Interval           Transfer     Bitrate         Retr
# [  5]   0.00-10.00  sec  14.3 GBytes  12.3 Gbits/sec  487             sender
# [  5]   0.00-10.04  sec  14.3 GBytes  12.2 Gbits/sec                  receiver

# iperf Done.

ethtool enp5s0f1
# Settings for enp5s0f1:
#         Supported ports: [ Backplane ]
#         Supported link modes:   1000baseKX/Full
#                                 10000baseKR/Full
#                                 25000baseCR/Full
#                                 25000baseKR/Full
#                                 25000baseSR/Full
#         Supported pause frame use: Symmetric
#         Supports auto-negotiation: Yes
#         Supported FEC modes: None        RS      BASER
#         Advertised link modes:  1000baseKX/Full
#                                 10000baseKR/Full
#                                 25000baseCR/Full
#                                 25000baseKR/Full
#                                 25000baseSR/Full
#         Advertised pause frame use: Symmetric
#         Advertised auto-negotiation: Yes
#         Advertised FEC modes: None       RS      BASER
#         Link partner advertised link modes:  Not reported
#         Link partner advertised pause frame use: No
#         Link partner advertised auto-negotiation: Yes
#         Link partner advertised FEC modes: Not reported
#         Speed: 25000Mb/s
#         Duplex: Full
#         Auto-negotiation: on
#         Port: Direct Attach Copper
#         PHYAD: 0
#         Transceiver: internal
#         Supports Wake-on: d
#         Wake-on: d
#         Current message level: 0x00000004 (4)
#                                link
#         Link detected: yes

others


# firewall-cmd --permanent --direct --add-rule ipv4 nat POSTROUTING 0 -o eth_ext -j MASQUERADE
# firewall-cmd --permanent --direct --add-rule ipv4 filter FORWARD 0 -i eth_int -o eth_ext -j ACCEPT
# firewall-cmd --permanent --direct --add-rule ipv4 filter FORWARD 0 -i eth_ext -o eth_int -m state --state RELATED,ESTABLISHED -j ACCEPT
# firewall-cmd --permanent --add-port=80/tcp
# firewall-cmd --permanent --add-port=443/tcp
# firewall-cmd --permanent --add-port=53/tcp
# firewall-cmd --permanent --add-port=53/udp
# firewall-cmd --permanent --add-masquerade
# firewall-cmd --reload

# firewall-cmd --permanent --direct --remove-rule ipv4 nat POSTROUTING 0 -o eth_ext -j MASQUERADE
# firewall-cmd --permanent --direct --remove-rule ipv4 filter FORWARD 0 -i eth_int -o eth_ext -j ACCEPT
# firewall-cmd --permanent --direct --remove-rule ipv4 filter FORWARD 0 -i eth_ext -o eth_int -m state --state RELATED,ESTABLISHED -j ACCEPT
# firewall-cmd --permanent --remove-port=80/tcp
# firewall-cmd --permanent --remove-port=443/tcp
# firewall-cmd --permanent --remove-port=53/tcp
# firewall-cmd --permanent --remove-port=53/udp
# firewall-cmd --permanent --remove-masquerade
# firewall-cmd --reload