← Back to Index

mellanox bf2 网卡激活snap功能, 配置nvme over fabrics 支持

本文讲述,如果使用mellanox bf2网卡,配置snap,挂载远端的nvme设备给宿主机使用,达到nvme over fabric的目的。

我们的实验环境,是2台物理机,宿主机都装的rocky linux 8.5,其中一个物理机上有nvme设备,另外一个物理机上有bf2网卡。

以下是实验架构:

安装实验环境

首先,我们需要给bf2刷上mellanox官方的doca bfb image,可以简单的理解是刷网卡固件,参考这里的文档做

接下来,我们给nvme设备主机,配置nvme over fabric的能力。


        # on 101
        
        # config nvme storage server side
        
        # https://access.redhat.com/documentation/zh-cn/red_hat_enterprise_linux/8/html/managing_storage_devices/overview-of-nvme-over-fabric-devicesmanaging-storage-devices
        
        nmcli con modify enp5s0f1 ipv4.method manual ipv4.addresses 192.168.99.21/24
        nmcli con up enp5s0f1
        
        yum install -y nvmetcli
        
        cd /data/down/
        
        # wget http://git.infradead.org/users/hch/nvmetcli.git/blob_plain/0a6b088db2dc2e5de11e6f23f1e890e4b54fee64:/rdma.json
        
        cat << EOF > /data/down/rdma.json
        {
          "hosts": [
            {
              "nqn": "hostnqn"
            }
          ],
          "ports": [
            {
              "addr": {
                "adrfam": "ipv4",
                "traddr": "192.168.99.21",
                "treq": "not specified",
                "trsvcid": "4420",
                "trtype": "rdma"
              },
              "portid": 2,
              "referrals": [],
              "subsystems": [
                "testnqn"
              ]
            }
          ],
          "subsystems": [
            {
              "allowed_hosts": [],
              "attr": {
                "allow_any_host": "1"
              },
              "namespaces": [
                {
                  "device": {
                    "nguid": "ef90689c-6c46-d44c-89c1-4067801309a8",
                    "path": "/dev/nvme0n1"
                  },
                  "enable": 1,
                  "nsid": 1
                }
              ],
              "nqn": "testnqn"
            }
          ]
        }
        EOF
        modprobe nvmet-rdma
        nvmetcli restore /data/down/rdma.json
        
        dmesg
        
        # ........
        
        # [32664.912901] nvmet: adding nsid 1 to subsystem testnqn
        
        # [32664.914013] nvmet_rdma: enabling port 2 (192.168.99.21:4420)
        
        # to clear config / 清空配置
        
        nvmetcli clear
        
        nvme list
        
        # Node                  SN                   Model                                    Namespace Usage                      Format           FW Rev
        
        # --------------------- -------------------- ---------------------------------------- --------- -------------------------- ---------------- --------
        
        # /dev/nvme0n1          CVCQ726600A0400AGN   INTEL SSDPEDMW400G4                      1         400.09  GB / 400.09  GB    512   B +  0 B   8EV10171
        
        # 测试一下
        
        yum install nvme-cli
        modprobe nvme-rdma
        nvme discover -t rdma -a 192.168.99.21 -s 4420
        
        # Discovery Log Number of Records 1, Generation counter 2
        
        # =====Discovery Log Entry 0======
        
        # trtype:  rdma
        
        # adrfam:  ipv4
        
        # subtype: nvme subsystem
        
        # treq:    not specified, sq flow control disable supported
        
        # portid:  2
        
        # trsvcid: 4420
        
        # subnqn:  testnqn
        
        # traddr:  192.168.99.21
        
        # rdma_prtype: not specified
        
        # rdma_qptype: connected
        
        # rdma_cms:    rdma-cm
        
        # rdma_pkey: 0x0000

接下来,我们在bf2上,做一些配置


        # on 104 bf2
        
        # 查看一下基本信息
        
        ovs-vsctl show
        
        # 04d25b73-2f63-4e47-b7d9-2362cc4d7fda
        
        #     Bridge ovsbr2
        
        #         Port p1
        
        #             Interface p1
        
        #         Port en3f1pf1sf0
        
        #             Interface en3f1pf1sf0
        
        #         Port ovsbr2
        
        #             Interface ovsbr2
        
        #                 type: internal
        
        #         Port pf1hpf
        
        #             Interface pf1hpf
        
        #     Bridge ovsbr1
        
        #         Port en3f0pf0sf0
        
        #             Interface en3f0pf0sf0
        
        #         Port pf0hpf
        
        #             Interface pf0hpf
        
        #         Port p0
        
        #             Interface p0
        
        #         Port ovsbr1
        
        #             Interface ovsbr1
        
        #                 type: internal
        
        #     ovs_version: "2.15.1"
        
        # nmcli con modify enp3s0f0s0 ipv4.method manual ipv4.addresses 192.168.99.11/24
        
        # nmcli con up enp3s0f0s0
        
        # ip addr add 192.168.99.11/24 dev enp3s0f0s0 
        
        # ip addr del 192.168.99.11/24 dev enp3s0f0s0 
        
        # 给一个sf端口,配置ip地址,这样bf2就能连接到远端nvme服务
        
        cat << EOF > /etc/netplan/70-wzh-mlnx.yaml
        network:
            ethernets:
                enp3s0f0s0:
                    addresses:
                    - 192.168.99.11/24
                    dhcp4: false
            renderer: NetworkManager
            version: 2
        
        EOF
        
        # 配置bf2参数
        
        mlxconfig -y -d /dev/mst/mt41686_pciconf0 s \
                        PF_BAR2_ENABLE=0 \
                        PER_PF_NUM_SF=1
        mlxconfig -y -d /dev/mst/mt41686_pciconf0 s \
                        PCI_SWITCH_EMULATION_ENABLE=1 \
                        PCI_SWITCH_EMULATION_NUM_PORT=16 \
                        VIRTIO_NET_EMULATION_ENABLE=1 \
                        VIRTIO_NET_EMULATION_NUM_VF=0 \
                        VIRTIO_NET_EMULATION_NUM_PF=0 \
                        VIRTIO_NET_EMULATION_NUM_MSIX=16 \
                        ECPF_ESWITCH_MANAGER=1 \
                        ECPF_PAGE_SUPPLIER=1 \
                        SRIOV_EN=0 \
                        PF_SF_BAR_SIZE=8 \
                        PF_TOTAL_SF=64
        mlxconfig -y -d /dev/mst/mt41686_pciconf0.1 s \
                        PF_SF_BAR_SIZE=10 \
                        PF_TOTAL_SF=64
        mlxconfig -y -d /dev/mst/mt41686_pciconf0 s \
                        VIRTIO_BLK_EMULATION_ENABLE=1 \
                        VIRTIO_BLK_EMULATION_NUM_PF=0 \
                        VIRTIO_BLK_EMULATION_NUM_VF=0 \
                        VIRTIO_BLK_EMULATION_NUM_MSIX=16 \
                        EXP_ROM_VIRTIO_BLK_UEFI_x86_ENABLE=0
        
        # 清空原来的snap配置
        
        # 系统默认会创建一个demo的nvme设备,我们为了更清晰的做实验,就清空默认的配置
        
        /bin/cp -f /etc/mlnx_snap/snap_rpc_init_bf2.conf /etc/mlnx_snap/snap_rpc_init_bf2.conf.wzh
        /bin/cp -f /etc/mlnx_snap/spdk_rpc_init.conf /etc/mlnx_snap/spdk_rpc_init.conf.wzh
        
        echo "" > /etc/mlnx_snap/snap_rpc_init_bf2.conf
        echo "" > /etc/mlnx_snap/spdk_rpc_init.conf
        
        # remember to COLD reboot
        
        poweroff
        
        # on bf2
        
        # 重启以后,我们手动设置snap服务,深入的理解一下spdk, snap
        
        # set the snap step by step
        
        snap_rpc.py subsystem_nvme_create Mellanox_NVMe_SNAP "Mellanox NVMe SNAP Controller"
        
        # {
        
        #   "nqn": "nqn.2021-06.mlnx.snap:8b82f658f138ceaf83e3bfc261a7fb14:0",
        
        #   "subsys_id": 0
        
        # }
        
        snap_rpc.py controller_nvme_create mlx5_0 --subsys_id 0 --pf_id 0
        
        # {
        
        #   "name": "NvmeEmu0pf0",
        
        #   "cntlid": 0,
        
        #   "version": "1.3.0",
        
        #   "offload": false,
        
        #   "mempool": false,
        
        #   "max_nsid": 1024,
        
        #   "max_namespaces": 1024
        
        # }
        
        spdk_rpc.py bdev_nvme_attach_controller -b Nvme0 -t rdma -a 192.168.99.21 -f ipv4 -s 4420 -n testnqn
        
        # Nvme0n1
        
        snap_rpc.py controller_nvme_namespace_attach -c NvmeEmu0pf0 spdk Nvme0n1 1
        
        snap_rpc.py emulation_device_attach --num_msix 8 mlx5_0 virtio_blk 
        
        # {
        
        #   "emulation_manager": "mlx5_0",
        
        #   "emulation_type": "virtio_blk",
        
        #   "pci_type": "physical function",
        
        #   "pci_index": 0
        
        # }
        
        snap_rpc.py controller_virtio_blk_create mlx5_0 --bdev_type spdk --bdev Nvme0n1 --pf_id 0 --num_queues 7
        
        # VblkEmu0pf0
        
        # 配置好了,我们检查一下状态
        
        # check status 
        
        snap_rpc.py controller_nvme_namespace_list -n nqn.2021-06.mlnx.snap:8b82f658f138ceaf83e3bfc261a7fb14:0 -i 0
        
        # {
        
        #   "name": "NvmeEmu0pf0",
        
        #   "cntlid": 0,
        
        #   "Namespaces": [
        
        #     {
        
        #       "nsid": 1,
        
        #       "bdev": "Nvme0n1",
        
        #       "bdev_type": "spdk",
        
        #       "qn": "",
        
        #       "protocol": "",
        
        #       "snap-direct": true
        
        #     }
        
        #   ]
        
        # }
        
        snap_rpc.py emulation_managers_list
        
        # [
        
        #   {
        
        #     "emulation_manager": "mlx5_0",
        
        #     "hotplug_support": true,
        
        #     "supported_types": [
        
        #       "nvme",
        
        #       "virtio_blk",
        
        #       "virtio_net"
        
        #     ]
        
        #   }
        
        # ]
        
        spdk_rpc.py bdev_nvme_get_controllers
        
        # [
        
        #   {
        
        #     "name": "Nvme0",
        
        #     "trid": {
        
        #       "trtype": "RDMA",
        
        #       "adrfam": "IPv4",
        
        #       "traddr": "192.168.99.21",
        
        #       "trsvcid": "4420",
        
        #       "subnqn": "testnqn"
        
        #     }
        
        #   }
        
        # ]
        
        snap_rpc.py controller_list
        
        # [
        
        #   {
        
        #     "mempool": false,
        
        #     "name": "VblkEmu0pf0",
        
        #     "emulation_manager": "mlx5_0",
        
        #     "type": "virtio_blk",
        
        #     "pci_index": 0,
        
        #     "pci_bdf": "07:00.0"
        
        #   },
        
        #   {
        
        #     "subnqn": "nqn.2021-06.mlnx.snap:8b82f658f138ceaf83e3bfc261a7fb14:0",
        
        #     "cntlid": 0,
        
        #     "version": "1.3.0",
        
        #     "offload": false,
        
        #     "mempool": false,
        
        #     "max_nsid": 1024,
        
        #     "max_namespaces": 1024,
        
        #     "name": "NvmeEmu0pf0",
        
        #     "emulation_manager": "mlx5_0",
        
        #     "type": "nvme",
        
        #     "pci_index": 0,
        
        #     "pci_bdf": "06:00.2"
        
        #   }
        
        # ]

测试


        # on 101, rocky linux
        
        lsblk
        
        # NAME               MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
        
        # sda                  8:0    0 278.9G  0 disk
        
        # ├─sda1               8:1    0     1G  0 part /boot
        
        # └─sda2               8:2    0 277.9G  0 part
        
        #   └─rl_lab101-root 253:0    0 277.9G  0 lvm  /
        
        # sr0                 11:0    1  1024M  0 rom
        
        # nvme0n1            259:0    0 372.6G  0 disk
        
        # └─nvme-data        253:1    0 372.6G  0 lvm
        
        # on 104 host, rocky linux
        
        # before snap setting
        
        lsblk
        
        # NAME                  MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
        
        # sda                     8:0    0 278.9G  0 disk
        
        # ├─sda1                  8:1    0   600M  0 part /boot/efi
        
        # ├─sda2                  8:2    0     1G  0 part /boot
        
        # └─sda3                  8:3    0 277.3G  0 part
        
        #   └─rl_panlab104-root 253:0    0 277.3G  0 lvm  /
        
        # after snap setting
        
        lsblk
        
        # NAME                  MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
        
        # sda                     8:0    0 278.9G  0 disk
        
        # ├─sda1                  8:1    0   600M  0 part /boot/efi
        
        # ├─sda2                  8:2    0     1G  0 part /boot
        
        # └─sda3                  8:3    0 277.3G  0 part
        
        #   └─rl_panlab104-root 253:0    0 277.3G  0 lvm  /
        
        # vda                   252:0    0 372.6G  0 disk
        
        # └─nvme-data           253:1    0 372.6G  0 lvm
        
        mount /dev/mapper/nvme-data /mnt
        ls /mnt
        
        # bgp-router.qcow2  ocp4-master-0.qcow2  ocp4-windows.qcow2

持久化配置

我们刚才的配置,是实验性质的,一步一步手工做的,把这些配置固定下来,这么做。


        # on 104, bf2
        
        cat << EOF > snap_rpc_init_bf2.conf
        subsystem_nvme_create Mellanox_NVMe_SNAP "Mellanox NVMe SNAP Controller"
        controller_nvme_create mlx5_0 --subsys_id 0 --pf_id 0
        controller_nvme_namespace_attach -c NvmeEmu0pf0 spdk Nvme0n1 1
        emulation_device_attach --num_msix 8 mlx5_0 virtio_blk 
        controller_virtio_blk_create mlx5_0 --bdev_type spdk --bdev Nvme0n1 --pf_id 0 --num_queues 7
        
        EOF
        
        cat << EOF > spdk_rpc_init.conf
        bdev_nvme_attach_controller -b Nvme0 -t rdma -a 192.168.99.21 -f ipv4 -s 4420 -n testnqn
        EOF
        
        # cold reboot
        
        poweroff