MetalLB BGP mode on openshift 4.8
openshift对外提供服务,默认是router的方式,里面是一个haproxy,但是默认只是支持http/https,定制一下,可以支持tcp。这种配置方法不是很直观,特别是tcp的支持也很鸡肋。我们希望的方式,是k8s service直接暴露一个对外服务ip,并且通过bgp广播出去。今天,我们就看看metalLB项目如何帮助我们达到这个目的。
本次实验部署架构图:
视频讲解:
安装 MetalLB
安装MetalLB非常简单
https://metallb.universe.tf/installation/clouds/#metallb-on-openshift-ocp
mkdir -p /data/install/metallb
cd /data/install/metallb
wget https://raw.githubusercontent.com/metallb/metallb/v0.10.2/manifests/namespace.yaml
wget https://raw.githubusercontent.com/metallb/metallb/v0.10.2/manifests/metallb.yaml
sed -i '/runAsUser: 65534/d' ./metallb.yaml
oc create -f namespace.yaml
oc adm policy add-scc-to-user privileged -n metallb-system -z speaker
oc create -f metallb.yaml
创建路由器
我们用一个 kvm 来模拟 bgp 路由器
- https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/configuring_and_managing_networking/setting-your-routing-protocols_configuring-and-managing-networking#intro-to-frr_setting-your-routing-protocols
- https://www.cisco.com/c/en/us/td/docs/ios-xml/ios/iproute_bgp/configuration/xe-16/irg-xe-16-book/bgp-dynamic-neighbors.html
- https://ipbgp.com/2018/02/07/quagga/
- https://docs.frrouting.org/en/latest/bgp.html
# to setup a router vm for testing
# go to kvm host
cd /data/kvm
wget https://raw.githubusercontent.com/wangzheng422/docker_env/dev/redhat/ocp4/4.8/scripts/helper-ks-rocky.cfg
sed -i '0,/^network.*/s/^network.*/network --bootproto=static --device=enp1s0 --gateway=172.21.6.254 --ip=172.21.6.10 --netmask=255.255.255.0 --nameserver=172.21.1.1 --ipv6=auto --activate/' helper-ks-rocky.cfg
sed -i '0,/^network --hostname.*/s/^network --hostname.*/network --hostname=bgp-router/' helper-ks-rocky.cfg
virt-install --name="bgp-router" --vcpus=2 --ram=2048 \
--cpu=host-model \
--disk path=/data/nvme/bgp-router.qcow2,bus=virtio,size=30 \
--os-variant rhel8.4 --network bridge=baremetal,model=virtio \
--graphics vnc,port=49000 \
--boot menu=on --location /data/kvm/Rocky-8.4-x86_64-minimal.iso \
--initrd-inject helper-ks-rocky.cfg --extra-args "inst.ks=file:/helper-ks-rocky.cfg"
# in the bgp-router vm
nmcli con mod enp1s0 +ipv4.addresses "192.168.7.10/24"
nmcli con up enp1s0
systemctl disable --now firewalld
dnf install -y frr
sed -i 's/bgpd=no/bgpd=yes/g' /etc/frr/daemons
systemctl enable --now frr
# 进入路由器配置界面
vtysh
# 以下是 bgp 路由器配置
router bgp 64512
neighbor metallb peer-group
neighbor metallb remote-as 64512
bgp listen limit 200
bgp listen range 192.168.7.0/24 peer-group metallb
配置 MetalLB 和 bgp-router 进行配对
# on helper
cat << EOF > /data/install/metal-bgp.yaml
apiVersion: v1
kind: ConfigMap
metadata:
namespace: metallb-system
name: config
data:
config: |
peers:
- my-asn: 64512
peer-asn: 64512
peer-address: 192.168.7.10
address-pools:
- name: my-ip-space
protocol: bgp
avoid-buggy-ips: true
addresses:
- 198.51.100.0/24
EOF
oc create -f /data/install/metal-bgp.yaml
# to restore
oc delete -f /data/install/metal-bgp.yaml
回到 bgp-router 看看路由情况
# back to bgp-router vm
vtysh
bgp-router# show ip bgp summary
IPv4 Unicast Summary:
BGP router identifier 192.168.7.10, local AS number 64512 vrf-id 0
BGP table version 0
RIB entries 0, using 0 bytes of memory
Peers 2, using 43 KiB of memory
Peer groups 1, using 64 bytes of memory
Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd PfxSnt
*192.168.7.13 4 64512 2 2 0 0 0 00:00:25 0 0
*192.168.7.16 4 64512 2 2 0 0 0 00:00:25 0 0
Total number of neighbors 2
* - dynamic neighbor
2 dynamic neighbor(s), limit 200
我们看到,集群里面的2个node,分别和路由器建立的peer关系。
创建测试应用
# back to helper vm
cat << EOF > /data/install/demo.yaml
---
apiVersion: v1
kind: Pod
metadata:
name: test-0
labels:
env: test
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/hostname: 'master-0'
containers:
- name: php
image: "quay.io/wangzheng422/php:demo.02"
---
apiVersion: v1
kind: Pod
metadata:
name: test-1
labels:
env: test
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/hostname: 'worker-0'
containers:
- name: php
image: "quay.io/wangzheng422/php:demo.02"
---
kind: Service
apiVersion: v1
metadata:
name: demo
spec:
type: LoadBalancer
ports:
- name: "http"
protocol: TCP
port: 80
targetPort: 80
selector:
env: test
EOF
oc create -f /data/install/demo.yaml
# to restore
oc delete -f /data/install/demo.yaml
oc get all
# NAME READY STATUS RESTARTS AGE
# pod/mypod-787d79b456-4f4xr 1/1 Running 3 3d23h
# pod/test-0 1/1 Running 0 2m28s
# pod/test-1 1/1 Running 0 2m28s
# NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
# service/demo LoadBalancer 172.30.82.87 198.51.100.1 80:32203/TCP 2m28s
# service/kubernetes ClusterIP 172.30.0.1 <none> 443/TCP 4d22h
# service/openshift ExternalName <none> kubernetes.default.svc.cluster.local <none> 4d22h
# NAME READY UP-TO-DATE AVAILABLE AGE
# deployment.apps/mypod 1/1 1 1 3d23h
# NAME DESIRED CURRENT READY AGE
# replicaset.apps/mypod-787d79b456 1 1 1 3d23h
oc get pod -o wide
# NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
# mypod-787d79b456-4f4xr 1/1 Running 3 4d 10.254.1.2 worker-0 <none> <none>
# test-0 1/1 Running 0 8m38s 10.254.0.66 master-0 <none> <none>
# test-1 1/1 Running 0 8m38s 10.254.1.230 worker-0 <none> <none>
oc get svc/demo -o yaml
# apiVersion: v1
# kind: Service
# metadata:
# creationTimestamp: "2021-08-30T12:42:21Z"
# name: demo
# namespace: default
# resourceVersion: "2046159"
# uid: 1af07435-5234-4062-994d-4715453118c6
# spec:
# clusterIP: 172.30.82.87
# clusterIPs:
# - 172.30.82.87
# externalTrafficPolicy: Cluster
# ipFamilies:
# - IPv4
# ipFamilyPolicy: SingleStack
# ports:
# - name: http
# nodePort: 32203
# port: 80
# protocol: TCP
# targetPort: 80
# selector:
# env: test
# sessionAffinity: None
# type: LoadBalancer
# status:
# loadBalancer:
# ingress:
# - ip: 198.51.100.1
回到 bgp-router 看看路由更新情况
# back to bgp-router
bgp-router# show ip bgp summary
IPv4 Unicast Summary:
BGP router identifier 192.168.7.10, local AS number 64512 vrf-id 0
BGP table version 1
RIB entries 1, using 192 bytes of memory
Peers 2, using 43 KiB of memory
Peer groups 1, using 64 bytes of memory
Neighbor V AS MsgRcvd MsgSent TblVer InQ OutQ Up/Down State/PfxRcd PfxSnt
*192.168.7.13 4 64512 73 72 0 0 0 00:35:16 1 0
*192.168.7.16 4 64512 73 72 0 0 0 00:35:16 1 0
Total number of neighbors 2
* - dynamic neighbor
2 dynamic neighbor(s), limit 200
bgp-router# show ip bgp neighbors 192.168.7.13 routes
BGP table version is 1, local router ID is 192.168.7.10, vrf id 0
Default local pref 100, local AS 64512
Status codes: s suppressed, d damped, h history, * valid, > best, = multipath,
i internal, r RIB-failure, S Stale, R Removed
Nexthop codes: @NNN nexthop's vrf id, < announce-nh-self
Origin codes: i - IGP, e - EGP, ? - incomplete
Network Next Hop Metric LocPrf Weight Path
*>i198.51.100.1/32 192.168.7.13 0 0 ?
Displayed 1 routes and 2 total paths
bgp-router#
bgp-router# show ip bgp neighbors 192.168.7.16 routes
BGP table version is 1, local router ID is 192.168.7.10, vrf id 0
Default local pref 100, local AS 64512
Status codes: s suppressed, d damped, h history, * valid, > best, = multipath,
i internal, r RIB-failure, S Stale, R Removed
Nexthop codes: @NNN nexthop's vrf id, < announce-nh-self
Origin codes: i - IGP, e - EGP, ? - incomplete
Network Next Hop Metric LocPrf Weight Path
*=i198.51.100.1/32 192.168.7.16 0 0 ?
Displayed 1 routes and 2 total paths
在路由器的shell界面上看看
ip r
# default via 172.21.6.254 dev enp1s0 proto static metric 100
# 172.21.6.0/24 dev enp1s0 proto kernel scope link src 172.21.6.10 metric 100
# 192.168.7.0/24 dev enp1s0 proto kernel scope link src 192.168.7.10 metric 100
# 198.51.100.1 proto bgp metric 20
# nexthop via 192.168.7.13 dev enp1s0 weight 1
# nexthop via 192.168.7.16 dev enp1s0 weight 1
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.230
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.230
[root@bgp-router ~]# curl 198.51.100.1 && echo
Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.66
到worker-0上,看看 nft 规则
# go to worker-0 to analyze the nat rules
nft list ruleset | grep 198.51
# meta l4proto tcp ip daddr 198.51.100.1 tcp dport 80 counter packets 0 bytes 0 jump KUBE-FW-CTBMGJDNUDRWEDVR
nft list ruleset | grep KUBE-FW-CTBMGJDNUDRWEDVR -A 5
# meta l4proto tcp ip daddr 198.51.100.1 tcp dport 80 counter packets 0 bytes 0 jump KUBE-FW-CTBMGJDNUDRWEDVR
# meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.145.124 tcp dport 443 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp ip daddr 172.30.145.124 tcp dport 443 counter packets 0 bytes 0 jump KUBE-SVC-L54HVQEJKTL2PXFK
# meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.16.253 tcp dport 8443 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp ip daddr 172.30.16.253 tcp dport 8443 counter packets 0 bytes 0 jump KUBE-SVC-YVQ2VVJT4ABSS56R
# meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.185.119 tcp dport 9091 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# --
# chain KUBE-FW-CTBMGJDNUDRWEDVR {
# counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
# counter packets 0 bytes 0 jump KUBE-MARK-DROP
# }
nft list ruleset | grep KUBE-SVC-CTBMGJDNUDRWEDVR -A 3
# meta l4proto tcp ip daddr 172.30.82.87 tcp dport 80 counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
# meta l4proto tcp ip daddr 198.51.100.1 tcp dport 80 counter packets 11 bytes 660 jump KUBE-FW-CTBMGJDNUDRWEDVR
# meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.145.124 tcp dport 443 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp ip daddr 172.30.145.124 tcp dport 443 counter packets 0 bytes 0 jump KUBE-SVC-L54HVQEJKTL2PXFK
# --
# meta l4proto tcp tcp dport 32203 counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
# }
# chain KUBE-SVC-DCLNKYLNAMROIJRV {
# --
# chain KUBE-SVC-CTBMGJDNUDRWEDVR {
# counter packets 9 bytes 540 jump KUBE-SEP-BKD3LMWAJNKW5GNU
# counter packets 2 bytes 120 jump KUBE-SEP-M5WVBCWAFJ2J2M2U
# }
# --
# counter packets 11 bytes 660 jump KUBE-SVC-CTBMGJDNUDRWEDVR
# counter packets 0 bytes 0 jump KUBE-MARK-DROP
# }
nft list ruleset | grep KUBE-SEP-BKD3LMWAJNKW5GNU -A 3
# counter packets 9 bytes 540 jump KUBE-SEP-BKD3LMWAJNKW5GNU
# counter packets 2 bytes 120 jump KUBE-SEP-M5WVBCWAFJ2J2M2U
# }
# --
# chain KUBE-SEP-BKD3LMWAJNKW5GNU {
# ip saddr 10.254.0.66 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp counter packets 9 bytes 540 dnat to 10.254.0.66:80
# }
nft list ruleset | grep KUBE-SEP-M5WVBCWAFJ2J2M2U -A 3
# counter packets 2 bytes 120 jump KUBE-SEP-M5WVBCWAFJ2J2M2U
# }
# chain KUBE-FW-CTBMGJDNUDRWEDVR {
# --
# chain KUBE-SEP-M5WVBCWAFJ2J2M2U {
# ip saddr 10.254.1.230 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp counter packets 2 bytes 120 dnat to 10.254.1.230:80
# }