MetalLB layer2 mode on openshift 4.8
openshift对外提供服务,默认是router的方式,里面是一个haproxy,但是默认只是支持http/https,定制一下,可以支持tcp。这种配置方法不是很直观,特别是tcp的支持也很鸡肋。
我们已经知道metalLB可以帮助service之间暴露external IP,并且通过BGP的方式广播出去,但是在PoC的时候,BGP路由器还是比较难搞,好在metalLB还提供了layer2的方式,更简单的对外暴露external IP.
本次实验部署架构图:
安装 MetalLB
安装MetalLB非常简单
https://metallb.universe.tf/installation/clouds/#metallb-on-openshift-ocp
mkdir -p /data/install/metallb
cd /data/install/metallb
wget https://raw.githubusercontent.com/metallb/metallb/v0.10.2/manifests/namespace.yaml
wget https://raw.githubusercontent.com/metallb/metallb/v0.10.2/manifests/metallb.yaml
sed -i '/runAsUser: 65534/d' ./metallb.yaml
oc create -f /data/install/metallb/namespace.yaml
oc adm policy add-scc-to-user privileged -n metallb-system -z speaker
oc create -f /data/install/metallb/metallb.yaml
# to restore
oc delete -f /data/install/metallb/metallb.yaml
配置 MetalLB
# on helper
cat << EOF > /data/install/metal-bgp.yaml
apiVersion: v1
kind: ConfigMap
metadata:
namespace: metallb-system
name: config
data:
config: |
address-pools:
- name: my-ip-space
protocol: layer2
addresses:
- 192.168.7.150-192.168.7.200
EOF
oc create -f /data/install/metal-bgp.yaml
# to restore
oc delete -f /data/install/metal-bgp.yaml
创建测试应用
# back to helper vm
cat << EOF > /data/install/demo.yaml
---
apiVersion: v1
kind: Pod
metadata:
name: test-0
labels:
env: test
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/hostname: 'master-0'
containers:
- name: php
image: "quay.io/wangzheng422/php:demo.02"
---
apiVersion: v1
kind: Pod
metadata:
name: test-1
labels:
env: test
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/hostname: 'worker-0'
containers:
- name: php
image: "quay.io/wangzheng422/php:demo.02"
---
kind: Service
apiVersion: v1
metadata:
name: demo
spec:
type: LoadBalancer
ports:
- name: "http"
protocol: TCP
port: 80
targetPort: 80
selector:
env: test
EOF
oc create -f /data/install/demo.yaml
# to restore
oc delete -f /data/install/demo.yaml
oc get all
# NAME READY STATUS RESTARTS AGE
# pod/mypod-787d79b456-4f4xr 1/1 Running 4 4d17h
# pod/test-0 0/1 ContainerCreating 0 4s
# pod/test-1 1/1 Running 0 4s
# NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
# service/demo LoadBalancer 172.30.178.14 192.168.7.150 80:30781/TCP 4s
# service/kubernetes ClusterIP 172.30.0.1 <none> 443/TCP 5d16h
# service/openshift ExternalName <none> kubernetes.default.svc.cluster.local <none> 5d16h
# NAME READY UP-TO-DATE AVAILABLE AGE
# deployment.apps/mypod 1/1 1 1 4d17h
# NAME DESIRED CURRENT READY AGE
# replicaset.apps/mypod-787d79b456 1 1 1 4d17h
oc get pod -o wide
# NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
# mypod-787d79b456-4f4xr 1/1 Running 4 4d17h 10.254.1.19 worker-0 <none> <none>
# test-0 1/1 Running 0 9m36s 10.254.0.74 master-0 <none> <none>
# test-1 1/1 Running 0 9m36s 10.254.1.65 worker-0 <none> <none>
oc get svc/demo -o yaml
# apiVersion: v1
# kind: Service
# metadata:
# creationTimestamp: "2021-08-31T06:39:39Z"
# name: demo
# namespace: default
# resourceVersion: "2277414"
# uid: 6f36e7a4-ee2e-4f86-802e-6053debecfb2
# spec:
# clusterIP: 172.30.178.14
# clusterIPs:
# - 172.30.178.14
# externalTrafficPolicy: Cluster
# ipFamilies:
# - IPv4
# ipFamilyPolicy: SingleStack
# ports:
# - name: http
# nodePort: 30781
# port: 80
# protocol: TCP
# targetPort: 80
# selector:
# env: test
# sessionAffinity: None
# type: LoadBalancer
# status:
# loadBalancer:
# ingress:
# - ip: 192.168.7.150
for i in {1..10}
do
curl 192.168.7.150 && echo
done
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.65
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.65
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.65
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.65
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.74
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.65
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.74
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.65
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.0.74
# Hello!<br>Welcome to RedHat Developer<br>Enjoy all of the ad-free articles<br>10.254.1.65
arp -a
# ? (10.88.0.3) at 9a:b9:62:83:0f:75 [ether] on cni-podman0
# master-2.ocp4.redhat.ren (192.168.7.15) at <incomplete> on enp1s0
# ? (10.88.0.2) at 4e:de:d9:d5:f8:f1 [ether] on cni-podman0
# master-1.ocp4.redhat.ren (192.168.7.14) at <incomplete> on enp1s0
# ? (192.168.7.150) at 52:54:00:d2:ba:43 [ether] on enp1s0
# worker-1.ocp4.redhat.ren (192.168.7.17) at <incomplete> on enp1s0
# _gateway (172.21.6.254) at 00:17:94:73:12:c2 [ether] on enp1s0
# master-0.ocp4.redhat.ren (192.168.7.13) at 52:54:00:d2:ba:43 [ether] on enp1s0
# worker-0.ocp4.redhat.ren (192.168.7.16) at 90:b1:1c:44:d6:0f [ether] on enp1s0
# bootstrap.ocp4.redhat.ren (192.168.7.12) at <incomplete> on enp1s0
到worker-0上,看看 nft 规则
# go to worker-0 to analyze the nat rules
nft list ruleset | grep 192.168.7.150
# meta l4proto tcp ip daddr 192.168.7.150 tcp dport 80 counter packets 0 bytes 0 jump KUBE-FW-CTBMGJDNUDRWEDVR
nft list ruleset | grep KUBE-FW-CTBMGJDNUDRWEDVR -A 5
# meta l4proto tcp ip daddr 192.168.7.150 tcp dport 80 counter packets 0 bytes 0 jump KUBE-FW-CTBMGJDNUDRWEDVR
# meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.35.8 tcp dport 80 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp ip daddr 172.30.35.8 tcp dport 80 counter packets 0 bytes 0 jump KUBE-SVC-T3U64PSX3UGU57NF
# meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.152.93 tcp dport 80 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp ip daddr 172.30.152.93 tcp dport 80 counter packets 0 bytes 0 jump KUBE-SVC-ZOXDBRX7A3I2MI4S
# meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.99.142 tcp dport 8443 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# --
# chain KUBE-FW-CTBMGJDNUDRWEDVR {
# counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
# counter packets 0 bytes 0 jump KUBE-MARK-DROP
# }
nft list ruleset | grep KUBE-SVC-CTBMGJDNUDRWEDVR -A 3
# meta l4proto tcp ip daddr 172.30.178.14 tcp dport 80 counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
# meta l4proto tcp ip daddr 192.168.7.150 tcp dport 80 counter packets 0 bytes 0 jump KUBE-FW-CTBMGJDNUDRWEDVR
# meta l4proto tcp @nh,96,16 != 2814 ip daddr 172.30.35.8 tcp dport 80 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp ip daddr 172.30.35.8 tcp dport 80 counter packets 0 bytes 0 jump KUBE-SVC-T3U64PSX3UGU57NF
# --
# meta l4proto tcp tcp dport 30781 counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
# }
# chain KUBE-SVC-HH47JV2DWEPNMQEX {
# --
# chain KUBE-SVC-CTBMGJDNUDRWEDVR {
# counter packets 0 bytes 0 jump KUBE-SEP-CGMBWTJH33MIKSJY
# counter packets 0 bytes 0 jump KUBE-SEP-V5VBCVCJRZSWQ4D6
# }
# --
# counter packets 0 bytes 0 jump KUBE-SVC-CTBMGJDNUDRWEDVR
# counter packets 0 bytes 0 jump KUBE-MARK-DROP
# }
nft list ruleset | grep KUBE-SEP-CGMBWTJH33MIKSJY -A 3
# counter packets 0 bytes 0 jump KUBE-SEP-CGMBWTJH33MIKSJY
# counter packets 0 bytes 0 jump KUBE-SEP-V5VBCVCJRZSWQ4D6
# }
# --
# chain KUBE-SEP-CGMBWTJH33MIKSJY {
# ip saddr 10.254.0.74 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp counter packets 0 bytes 0 dnat to 10.254.0.74:80
# }
nft list ruleset | grep KUBE-SEP-V5VBCVCJRZSWQ4D6 -A 3
# counter packets 0 bytes 0 jump KUBE-SEP-V5VBCVCJRZSWQ4D6
# }
# chain KUBE-FW-CTBMGJDNUDRWEDVR {
# --
# chain KUBE-SEP-V5VBCVCJRZSWQ4D6 {
# ip saddr 10.254.1.65 counter packets 0 bytes 0 jump KUBE-MARK-MASQ
# meta l4proto tcp counter packets 0 bytes 0 dnat to 10.254.1.65:80
# }
nft --handle --numeric list ruleset | grep random
# counter packets 0 bytes 0 masquerade random-fully # handle 13
看看iptables的规则
iptables -L -v -n -t nat | grep 192.168.7.150
# 0 0 KUBE-FW-CTBMGJDNUDRWEDVR tcp -- * * 0.0.0.0/0 192.168.7.150 /* default/demo:http loadbalancer IP */ tcp dpt:80
iptables -L -v -n -t nat | grep KUBE-FW-CTBMGJDNUDRWEDVR -A 5
# 0 0 KUBE-FW-CTBMGJDNUDRWEDVR tcp -- * * 0.0.0.0/0 192.168.7.150 /* default/demo:http loadbalancer IP */ tcp dpt:80
# 0 0 KUBE-MARK-MASQ tcp -- * * !10.254.0.0/16 172.30.210.66 /* openshift-kube-scheduler-operator/metrics:https cluster IP */ tcp dpt:443
# 0 0 KUBE-SVC-HH47JV2DWEPNMQEX tcp -- * * 0.0.0.0/0 172.30.210.66 /* openshift-kube-scheduler-operator/metrics:https cluster IP */ tcp dpt:443
# 0 0 KUBE-MARK-MASQ tcp -- * * !10.254.0.0/16 172.30.55.237 /* openshift-apiserver-operator/metrics:https cluster IP */ tcp dpt:443
# 0 0 KUBE-SVC-CIUYVLZDADCHPTYT tcp -- * * 0.0.0.0/0 172.30.55.237 /* openshift-apiserver-operator/metrics:https cluster IP */ tcp dpt:443
# 0 0 KUBE-MARK-MASQ tcp -- * * !10.254.0.0/16 172.30.134.31 /* openshift-pipelines/tekton-pipelines-controller:probes cluster IP */ tcp dpt:8080
# --
# Chain KUBE-FW-CTBMGJDNUDRWEDVR (1 references)
# pkts bytes target prot opt in out source destination
# 0 0 KUBE-MARK-MASQ all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http loadbalancer IP */
# 0 0 KUBE-SVC-CTBMGJDNUDRWEDVR all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http loadbalancer IP */
# 0 0 KUBE-MARK-DROP all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http loadbalancer IP */
iptables -L -v -n -t nat | grep KUBE-SVC-CTBMGJDNUDRWEDVR -A 4
# 0 0 KUBE-SVC-CTBMGJDNUDRWEDVR tcp -- * * 0.0.0.0/0 172.30.178.14 /* default/demo:http cluster IP */ tcp dpt:80
# 0 0 KUBE-FW-CTBMGJDNUDRWEDVR tcp -- * * 0.0.0.0/0 192.168.7.150 /* default/demo:http loadbalancer IP */ tcp dpt:80
# 0 0 KUBE-MARK-MASQ tcp -- * * !10.254.0.0/16 172.30.210.66 /* openshift-kube-scheduler-operator/metrics:https cluster IP */ tcp dpt:443
# 0 0 KUBE-SVC-HH47JV2DWEPNMQEX tcp -- * * 0.0.0.0/0 172.30.210.66 /* openshift-kube-scheduler-operator/metrics:https cluster IP */ tcp dpt:443
# 0 0 KUBE-MARK-MASQ tcp -- * * !10.254.0.0/16 172.30.55.237 /* openshift-apiserver-operator/metrics:https cluster IP */ tcp dpt:443
# --
# 0 0 KUBE-SVC-CTBMGJDNUDRWEDVR tcp -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http */ tcp dpt:30781
# Chain KUBE-SVC-HH47JV2DWEPNMQEX (1 references)
# pkts bytes target prot opt in out source destination
# 0 0 KUBE-SEP-XIWZUKNCQE6LJCFA all -- * * 0.0.0.0/0 0.0.0.0/0 /* openshift-kube-scheduler-operator/metrics:https */
# --
# Chain KUBE-SVC-CTBMGJDNUDRWEDVR (3 references)
# pkts bytes target prot opt in out source destination
# 0 0 KUBE-SEP-CGMBWTJH33MIKSJY all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http */ statistic mode random probability 0.50000000000
# 0 0 KUBE-SEP-V5VBCVCJRZSWQ4D6 all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http */
# --
# 0 0 KUBE-SVC-CTBMGJDNUDRWEDVR all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http loadbalancer IP */
# 0 0 KUBE-MARK-DROP all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http loadbalancer IP */
# Chain KUBE-SEP-V5VBCVCJRZSWQ4D6 (1 references)
# pkts bytes target prot opt in out source destination
iptables -L -v -n -t nat | grep KUBE-SEP-CGMBWTJH33MIKSJY -A 3
# 0 0 KUBE-SEP-CGMBWTJH33MIKSJY all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http */ statistic mode random probability 0.50000000000
# 0 0 KUBE-SEP-V5VBCVCJRZSWQ4D6 all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http */
# Chain KUBE-FW-CTBMGJDNUDRWEDVR (1 references)
# --
# Chain KUBE-SEP-CGMBWTJH33MIKSJY (1 references)
# pkts bytes target prot opt in out source destination
# 0 0 KUBE-MARK-MASQ all -- * * 10.254.0.74 0.0.0.0/0 /* default/demo:http */
# 0 0 DNAT tcp -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http */ tcp to:10.254.0.74:80
iptables -L -v -n -t nat | grep KUBE-SEP-V5VBCVCJRZSWQ4D6 -A 3
# 0 0 KUBE-SEP-V5VBCVCJRZSWQ4D6 all -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http */
# Chain KUBE-FW-CTBMGJDNUDRWEDVR (1 references)
# pkts bytes target prot opt in out source destination
# --
# Chain KUBE-SEP-V5VBCVCJRZSWQ4D6 (1 references)
# pkts bytes target prot opt in out source destination
# 0 0 KUBE-MARK-MASQ all -- * * 10.254.1.65 0.0.0.0/0 /* default/demo:http */
# 0 0 DNAT tcp -- * * 0.0.0.0/0 0.0.0.0/0 /* default/demo:http */ tcp to:10.254.1.65:80