← Back to Index

MetalLB BGP mode on openshift 4.8

openshift对外提供服务,默认是router的方式,里面是一个haproxy,但是默认只是支持http/https,定制一下,可以支持tcp。这种配置方法不是很直观,特别是tcp的支持也很鸡肋。我们希望的方式,是k8s service直接暴露一个对外服务ip,并且通过bgp广播出去。今天,我们就看看metalLB项目如何帮助我们达到这个目的。

本次实验部署架构图:

视频讲解:

安装 MetalLB

安装MetalLB非常简单

https://metallb.universe.tf/installation/clouds/#metallb-on-openshift-ocp


        mkdir -p /data/install/metallb
        cd /data/install/metallb
        
        wget https://raw.githubusercontent.com/metallb/metallb/v0.10.2/manifests/namespace.yaml
        wget https://raw.githubusercontent.com/metallb/metallb/v0.10.2/manifests/metallb.yaml
        
        sed -i '/runAsUser: 65534/d' ./metallb.yaml
        
        oc create -f namespace.yaml
        oc adm policy add-scc-to-user privileged -n metallb-system -z speaker
        oc create -f metallb.yaml

创建路由器

我们用一个 kvm 来模拟 bgp 路由器


        # to setup a router vm for testing
        
        # go to kvm host
        
        cd /data/kvm
        
        wget https://raw.githubusercontent.com/wangzheng422/docker_env/dev/redhat/ocp4/4.8/scripts/helper-ks-rocky.cfg
        
        sed -i '0,/^network.*/s/^network.*/network  --bootproto=static --device=enp1s0 --gateway=172.21.6.254 --ip=172.21.6.10  --netmask=255.255.255.0 --nameserver=172.21.1.1  --ipv6=auto --activate/' helper-ks-rocky.cfg
        
        sed -i '0,/^network  --hostname.*/s/^network  --hostname.*/network  --hostname=bgp-router/' helper-ks-rocky.cfg
        
        virt-install --name="bgp-router" --vcpus=2 --ram=2048 \
        --cpu=host-model \
        --disk path=/data/nvme/bgp-router.qcow2,bus=virtio,size=30 \
        --os-variant rhel8.4 --network bridge=baremetal,model=virtio \
        --graphics vnc,port=49000 \
        --boot menu=on --location /data/kvm/Rocky-8.4-x86_64-minimal.iso \
        --initrd-inject helper-ks-rocky.cfg --extra-args "inst.ks=file:/helper-ks-rocky.cfg" 
        
        # in the bgp-router vm
        
        nmcli con mod enp1s0 +ipv4.addresses "192.168.7.10/24"
        nmcli con up enp1s0
        
        systemctl disable --now firewalld
        
        dnf install -y frr
        
        sed -i 's/bgpd=no/bgpd=yes/g' /etc/frr/daemons
        systemctl enable --now frr
        
        # 进入路由器配置界面
        
        vtysh
        
        # 以下是 bgp 路由器配置
        
        router bgp 64512
         neighbor metallb peer-group
         neighbor metallb remote-as 64512
         bgp listen limit 200
         bgp listen range 192.168.7.0/24 peer-group metallb

配置 MetalLB 和 bgp-router 进行配对


        # on helper
        
        cat << EOF > /data/install/metal-bgp.yaml
        apiVersion: v1
        kind: ConfigMap
        metadata:
          namespace: metallb-system
          name: config
        data:
          config: |
            peers:
            - my-asn: 64512
              peer-asn: 64512
              peer-address: 192.168.7.10
            address-pools:
            - name: my-ip-space
              protocol: bgp
              avoid-buggy-ips: true
              addresses:
              - 198.51.100.0/24
        EOF
        oc create -f /data/install/metal-bgp.yaml
        
        # to restore
        
        oc delete -f /data/install/metal-bgp.yaml

回到 bgp-router 看看路由情况


        # back to bgp-router vm

        vtysh

        bgp-router# show ip bgp summary

        IPv4 Unicast Summary:
        BGP router identifier 192.168.7.10, local AS number 64512 vrf-id 0
        BGP table version 0
        RIB entries 0, using 0 bytes of memory
        Peers 2, using 43 KiB of memory
        Peer groups 1, using 64 bytes of memory

        Neighbor        V         AS   MsgRcvd   MsgSent   TblVer  InQ OutQ  Up/Down State/PfxRcd   PfxSnt
        *192.168.7.13   4      64512         2         2        0    0    0 00:00:25            0        0
        *192.168.7.16   4      64512         2         2        0    0    0 00:00:25            0        0

        Total number of neighbors 2

        * - dynamic neighbor
        2 dynamic neighbor(s), limit 200

我们看到,集群里面的2个node,分别和路由器建立的peer关系。

创建测试应用


        # back to helper vm
        
        cat << EOF > /data/install/demo.yaml
        ---
        apiVersion: v1
        kind: Pod
        metadata:
          name: test-0
          labels:
            env: test
        spec:
          restartPolicy: OnFailure
          nodeSelector:
            kubernetes.io/hostname: 'master-0'
          containers:
          - name: php
            image: "quay.io/wangzheng422/php:demo.02"
        ---
        apiVersion: v1
        kind: Pod
        metadata:
          name: test-1
          labels:
            env: test
        spec:
          restartPolicy: OnFailure
          nodeSelector:
            kubernetes.io/hostname: 'worker-0'
          containers:
          - name: php
            image: "quay.io/wangzheng422/php:demo.02"
        ---
        kind: Service
        apiVersion: v1
        metadata:
          name: demo
        spec:
          type: LoadBalancer
          ports:
            - name: "http"
              protocol: TCP
              port: 80
              targetPort: 80
          selector:
            env: test
        EOF
        oc create -f /data/install/demo.yaml
        
        # to restore
        
        oc delete -f /data/install/demo.yaml
        
        oc get all
        
        # NAME                         READY   STATUS    RESTARTS   AGE
        
        # pod/mypod-787d79b456-4f4xr   1/1     Running   3          3d23h
        
        # pod/test-0                   1/1     Running   0          2m28s
        
        # pod/test-1                   1/1     Running   0          2m28s
        
        # NAME                 TYPE           CLUSTER-IP     EXTERNAL-IP                            PORT(S)        AGE
        
        # service/demo         LoadBalancer   172.30.82.87   198.51.100.1                           80:32203/TCP   2m28s
        
        # service/kubernetes   ClusterIP      172.30.0.1     <none>                                 443/TCP        4d22h
        
        # service/openshift    ExternalName   <none>         kubernetes.default.svc.cluster.local   <none>         4d22h
        
        # NAME                    READY   UP-TO-DATE   AVAILABLE   AGE
        
        # deployment.apps/mypod   1/1     1            1           3d23h
        
        # NAME                               DESIRED   CURRENT   READY   AGE
        
        # replicaset.apps/mypod-787d79b456   1         1         1       3d23h
        
        oc get pod -o wide
        
        # NAME                     READY   STATUS    RESTARTS   AGE     IP             NODE       NOMINATED NODE   READINESS GATES
        
        # mypod-787d79b456-4f4xr   1/1     Running   3          4d      10.254.1.2     worker-0   <none>           <none>
        
        # test-0                   1/1     Running   0          8m38s   10.254.0.66    master-0   <none>           <none>
        
        # test-1                   1/1     Running   0          8m38s   10.254.1.230   worker-0   <none>           <none>
        
        oc get svc/demo -o yaml
        
        # apiVersion: v1
        
        # kind: Service
        
        # metadata:
        
        #   creationTimestamp: "2021-08-30T12:42:21Z"
        
        #   name: demo
        
        #   namespace: default
        
        #   resourceVersion: "2046159"
        
        #   uid: 1af07435-5234-4062-994d-4715453118c6
        
        # spec:
        
        #   clusterIP: 172.30.82.87
        
        #   clusterIPs:
        
        #   - 172.30.82.87
        
        #   externalTrafficPolicy: Cluster
        
        #   ipFamilies:
        
        #   - IPv4
        
        #   ipFamilyPolicy: SingleStack
        
        #   ports:
        
        #   - name: http
        
        #     nodePort: 32203
        
        #     port: 80
        
        #     protocol: TCP
        
        #     targetPort: 80
        
        #   selector:
        
        #     env: test
        
        #   sessionAffinity: None
        
        #   type: LoadBalancer
        
        # status:
        
        #   loadBalancer:
        
        #     ingress:
        
        #     - ip: 198.51.100.1

回到 bgp-router 看看路由更新情况


        # back to bgp-router

        bgp-router# show ip bgp summary

        IPv4 Unicast Summary:
        BGP router identifier 192.168.7.10, local AS number 64512 vrf-id 0
        BGP table version 1
        RIB entries 1, using 192 bytes of memory
        Peers 2, using 43 KiB of memory
        Peer groups 1, using 64 bytes of memory

        Neighbor        V         AS   MsgRcvd   MsgSent   TblVer  InQ OutQ  Up/Down State/PfxRcd   PfxSnt
        *192.168.7.13   4      64512        73        72        0    0    0 00:35:16            1        0
        *192.168.7.16   4      64512        73        72        0    0    0 00:35:16            1        0

        Total number of neighbors 2

        * - dynamic neighbor
        2 dynamic neighbor(s), limit 200

        bgp-router# show ip bgp neighbors 192.168.7.13 routes
        BGP table version is 1, local router ID is 192.168.7.10, vrf id 0
        Default local pref 100, local AS 64512
        Status codes:  s suppressed, d damped, h history, * valid, > best, = multipath,
                       i internal, r RIB-failure, S Stale, R Removed
        Nexthop codes: @NNN nexthop's vrf id, < announce-nh-self
        Origin codes:  i - IGP, e - EGP, ? - incomplete

           Network          Next Hop            Metric LocPrf Weight Path
        *>i198.51.100.1/32  192.168.7.13                    0      0 ?

        Displayed  1 routes and 2 total paths
        bgp-router#
        bgp-router# show ip bgp neighbors 192.168.7.16 routes
        BGP table version is 1, local router ID is 192.168.7.10, vrf id 0
        Default local pref 100, local AS 64512
        Status codes:  s suppressed, d damped, h history, * valid, > best, = multipath,
                       i internal, r RIB-failure, S Stale, R Removed
        Nexthop codes: @NNN nexthop's vrf id, < announce-nh-self
        Origin codes:  i - IGP, e - EGP, ? - incomplete

           Network          Next Hop            Metric LocPrf Weight Path
        *=i198.51.100.1/32  192.168.7.16                    0      0 ?

        Displayed  1 routes and 2 total paths

在路由器的shell界面上看看

ip r
        
        # default via 172.21.6.254 dev enp1s0 proto static metric 100
        
        # 172.21.6.0/24 dev enp1s0 proto kernel scope link src 172.21.6.10 metric 100
        
        # 192.168.7.0/24 dev enp1s0 proto kernel scope link src 192.168.7.10 metric 100
        
        # 198.51.100.1 proto bgp metric 20
        
        #         nexthop via 192.168.7.13 dev enp1s0 weight 1
        
        #         nexthop via 192.168.7.16 dev enp1s0 weight 1
        
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.230
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.230
        [root@bgp-router ~]# curl 198.51.100.1 && echo
        Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66

到worker-0上,看看 nft 规则


        # go to worker-0 to analyze the nat rules
        
        nft list ruleset | grep 198.51
                        # meta l4proto tcp ip daddr 198.51.100.1  tcp dport 80 counter packets 0 bytes 0 jump KUBE-FW-CTBMGJDNUDRWEDVR
        
        nft list ruleset | grep KUBE-FW-CTBMGJDNUDRWEDVR -A 5
        
        #                 meta l4proto tcp ip daddr 198.51.100.1  tcp dport 80 counter packets 0 bytes 0 jump KUBE-FW-CTBMGJDNUDRWEDVR
        
        #                 meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.145.124  tcp dport 443 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
        
        #                 meta l4proto tcp ip daddr 172.30.145.124  tcp dport 443 counter packets 0 bytes 0 jump KUBE-SVC-L54HVQEJKTL2PXFK
        
        #                 meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.16.253  tcp dport 8443 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
        
        #                 meta l4proto tcp ip daddr 172.30.16.253  tcp dport 8443 counter packets 0 bytes 0 jump KUBE-SVC-YVQ2VVJT4ABSS56R
        
        #                 meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.185.119  tcp dport 9091 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
        
        # --
        
        #         chain KUBE-FW-CTBMGJDNUDRWEDVR {
        
        #                  counter packets 0 bytes 0 jump KUBE-MARK-MASQ
        
        #                  counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
        
        #                  counter packets 0 bytes 0 jump KUBE-MARK-DROP
        
        #         }
        
        
        nft list ruleset | grep KUBE-SVC-CTBMGJDNUDRWEDVR -A 3
        
        #                 meta l4proto tcp ip daddr 172.30.82.87  tcp dport 80 counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
        
        #                 meta l4proto tcp ip daddr 198.51.100.1  tcp dport 80 counter packets 11 bytes 660 jump KUBE-FW-CTBMGJDNUDRWEDVR
        
        #                 meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.145.124  tcp dport 443 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
        
        #                 meta l4proto tcp ip daddr 172.30.145.124  tcp dport 443 counter packets 0 bytes 0 jump KUBE-SVC-L54HVQEJKTL2PXFK
        
        # --
        
        #                 meta l4proto tcp  tcp dport 32203 counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
        
        #         }
        
        #         chain KUBE-SVC-DCLNKYLNAMROIJRV {
        
        # --
        
        #         chain KUBE-SVC-CTBMGJDNUDRWEDVR {
        
        #                   counter packets 9 bytes 540 jump KUBE-SEP-BKD3LMWAJNKW5GNU
        
        #                  counter packets 2 bytes 120 jump KUBE-SEP-M5WVBCWAFJ2J2M2U
        
        #         }
        
        # --
        
        #                  counter packets 11 bytes 660 jump KUBE-SVC-CTBMGJDNUDRWEDVR
        
        #                  counter packets 0 bytes 0 jump KUBE-MARK-DROP
        
        #         }
        
        nft list ruleset | grep KUBE-SEP-BKD3LMWAJNKW5GNU -A 3
        
        #                   counter packets 9 bytes 540 jump KUBE-SEP-BKD3LMWAJNKW5GNU
        
        #                  counter packets 2 bytes 120 jump KUBE-SEP-M5WVBCWAFJ2J2M2U
        
        #         }
        
        # --
        
        #         chain KUBE-SEP-BKD3LMWAJNKW5GNU {
        
        #                 ip saddr 10.254.0.66  counter packets 0 bytes 0 jump KUBE-MARK-MASQ
        
        #                 meta l4proto tcp   counter packets 9 bytes 540 dnat to 10.254.0.66:80
        
        #         }
        
        nft list ruleset | grep KUBE-SEP-M5WVBCWAFJ2J2M2U -A 3
        
        #                  counter packets 2 bytes 120 jump KUBE-SEP-M5WVBCWAFJ2J2M2U
        
        #         }
        
        #         chain KUBE-FW-CTBMGJDNUDRWEDVR {
        
        # --
        
        #         chain KUBE-SEP-M5WVBCWAFJ2J2M2U {
        
        #                 ip saddr 10.254.1.230  counter packets 0 bytes 0 jump KUBE-MARK-MASQ
        
        #                 meta l4proto tcp   counter packets 2 bytes 120 dnat to 10.254.1.230:80
        
        #         }