Real-Time Kernel for Openshift4.9
本次试验部署架构图
硬件部署
假设有专属的FPGA,每个FPGA可以连4个RRU,另外GPS连接也是在FPGA上。
先使用performance addon operator,这个是官方推荐的方法。
performance addon operator 是openshift4里面的一个operator,他的作用是,让用户进行简单的yaml配置,然后operator帮助客户进行复杂的kernel parameter, kubelet, tuned配置。
# install performance addon operator following offical document
# https://docs.openshift.com/container-platform/4.9/scalability_and_performance/cnf-performance-addon-operator-for-low-latency-nodes.html
cat << EOF > /data/install/pao-namespace.yaml
---
apiVersion: v1
kind: Namespace
metadata:
name: openshift-performance-addon-operator
annotations:
workload.openshift.io/allowed: management
---
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: openshift-performance-addon-operator
namespace: openshift-performance-addon-operator
---
EOF
oc create -f /data/install/pao-namespace.yaml
# then install pao in project openshift-performance-addon-operator
# then create mcp, be careful, the label must be there
cat << EOF > /data/install/worker-rt.yaml
apiVersion: machineconfiguration.openshift.io/v1
kind: MachineConfigPool
metadata:
name: worker-rt
labels:
machineconfiguration.openshift.io/role: worker-rt
spec:
machineConfigSelector:
matchExpressions:
- {key: machineconfiguration.openshift.io/role, operator: In, values: [worker,worker-rt]}
nodeSelector:
matchLabels:
node-role.kubernetes.io/worker-rt: ""
EOF
oc create -f /data/install/worker-rt.yaml
# to restore
oc delete -f /data/install/worker-rt.yaml
oc label node worker-0 node-role.kubernetes.io/worker-rt=""
# 以下的配置,是保留了0-1核给系统,剩下的2-19核给应用。
cat << EOF > /data/install/performance.yaml
apiVersion: performance.openshift.io/v2
kind: PerformanceProfile
metadata:
name: wzh-performanceprofile
spec:
additionalKernelArgs:
- no_timer_check
- clocksource=tsc
- tsc=perfect
- selinux=0
- enforcing=0
- nmi_watchdog=0
- softlockup_panic=0
- isolcpus=2-19
- nohz_full=2-19
- idle=poll
- default_hugepagesz=1G
- hugepagesz=1G
- hugepages=16
- skew_tick=1
- rcu_nocbs=2-19
- kthread_cpus=0-1
- irqaffinity=0-1
- rcu_nocb_poll
- iommu=pt
- intel_iommu=on
# profile creator
- audit=0
- idle=poll
- intel_idle.max_cstate=0
- mce=off
- nmi_watchdog=0
- nosmt
- processor.max_cstate=1
globallyDisableIrqLoadBalancing: true
cpu:
isolated: "2-19"
reserved: "0-1"
realTimeKernel:
enabled: true
numa:
topologyPolicy: "single-numa-node"
nodeSelector:
node-role.kubernetes.io/worker-rt: ""
machineConfigPoolSelector:
machineconfiguration.openshift.io/role: worker-rt
EOF
oc create -f /data/install/performance.yaml
# it will create following
# runtimeClass: performance-wzh-performanceprofile
# tuned: >-
# openshift-cluster-node-tuning-operator/openshift-node-performance-wzh-performanceprofile
oc get mc/50-nto-worker-rt -o yaml
oc get runtimeClass -o yaml
oc get -n openshift-cluster-node-tuning-operator tuned/openshift-node-performance-wzh-performanceprofile -o yaml
# restore
oc delete -f /data/install/performance.yaml
# enable sctp
# https://docs.openshift.com/container-platform/4.9/networking/using-sctp.html
cat << EOF > /data/install/sctp-module.yaml
apiVersion: machineconfiguration.openshift.io/v1
kind: MachineConfig
metadata:
name: 99-worker-rt-load-sctp-module
labels:
machineconfiguration.openshift.io/role: worker-rt
spec:
config:
ignition:
version: 3.2.0
storage:
files:
- path: /etc/modprobe.d/sctp-blacklist.conf
mode: 0644
overwrite: true
contents:
source: data:,
- path: /etc/modules-load.d/sctp-load.conf
mode: 0644
overwrite: true
contents:
source: data:,sctp
EOF
oc create -f /data/install/sctp-module.yaml
# check the result
ssh core@worker-0
uname -a
# Linux worker-0 4.18.0-193.51.1.rt13.101.el8_2.x86_64 #1 SMP PREEMPT RT Thu Apr 8 17:21:44 EDT 2021 x86_64 x86_64 x86_64 GNU/Linux
ps -ef | grep stalld
# root 4416 1 0 14:04 ? 00:00:00 /usr/local/bin/stalld -p 1000000000 -r 10000 -d 3 -t 20 --log_syslog --log_kmsg --foreground --pidfile /run/stalld.pid
# core 6601 6478 0 14:08 pts/0 00:00:00 grep --color=auto stalld
create demo vBBU app
---
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
name: host-device-du
spec:
config: '{
"cniVersion": "0.3.0",
"type": "host-device",
"device": "ens18f1",
"ipam": {
"type": "host-local",
"subnet": "192.168.12.0/24",
"rangeStart": "192.168.12.105",
"rangeEnd": "192.168.12.106",
"routes": [{
"dst": "0.0.0.0/0"
}],
"gateway": "192.168.12.1"
}
}'
# apiVersion: "k8s.cni.cncf.io/v1"
# kind: NetworkAttachmentDefinition
# metadata:
# name: host-device-du
# spec:
# config: '{
# "cniVersion": "0.3.0",
# "type": "host-device",
# "device": "ens18f1",
# "ipam": {
# "type": "static",
# "addresses": [
# {
# "address": "192.168.12.105/24"
# },
# {
# "address": "192.168.12.106/24"
# }
# ],
# "routes": [
# {
# "dst": "0.0.0.0/0",
# "gw": "192.168.12.1"
# }
# ]
# }
# }'
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: du-deployment1
labels:
app: du-deployment1
spec:
replicas: 1
selector:
matchLabels:
app: du-pod1
template:
metadata:
labels:
app: du-pod1
annotations:
k8s.v1.cni.cncf.io/networks: '[
{ "name": "host-device-du",
"interface": "veth11" }
]'
cpu-load-balancing.crio.io: "true"
spec:
runtimeClassName: performance-wzh-performanceprofile
containers:
- name: du-container1
image: "registry.ocp4.redhat.ren:5443/ocp4/du:v1-wzh-shell-03"
imagePullPolicy: IfNotPresent
tty: true
stdin: true
env:
- name: duNetProviderDriver
value: "host-netdevice"
#command:
# - sleep
# - infinity
securityContext:
privileged: true
capabilities:
add:
- CAP_SYS_ADMIN
volumeMounts:
- mountPath: /hugepages
name: hugepage
- name: lib-modules
mountPath: /lib/modules
- name: src
mountPath: /usr/src
- name: dev
mountPath: /dev
- name: cache-volume
mountPath: /dev/shm
resources:
requests:
cpu: 16
memory: 48Gi
hugepages-1Gi: 8Gi
limits:
cpu: 16
memory: 48Gi
hugepages-1Gi: 8Gi
volumes:
- name: hugepage
emptyDir:
medium: HugePages
- name: lib-modules
hostPath:
path: /lib/modules
- name: src
hostPath:
path: /usr/src
- name: dev
hostPath:
path: "/dev"
- name: cache-volume
emptyDir:
medium: Memory
sizeLimit: 16Gi
nodeSelector:
node-role.kubernetes.io/worker-rt: ""
---
# apiVersion: v1
# kind: Service
# metadata:
# name: du-http
# spec:
# ports:
# - name: http
# port: 80
# targetPort: 80
# type: NodePort
# selector:
# app: du-pod1
---
trouble shooting
# the most important, kernel.sched_rt_runtime_us should be -1, it is setting for realtime, and for stalld
sysctl kernel.sched_rt_runtime_us
sysctl -w kernel.sched_rt_runtime_us=-1
ps -e -o uid,pid,ppid,cls,rtprio,pri,ni,cmd | grep 'stalld\|rcuc\|softirq\|worker\|bin_read\|dumgr\|duoam'
oc adm must-gather \
--image=registry.redhat.io/openshift4/performance-addon-operator-must-gather-rhel8
oc get performanceprofile wzh-performanceprofile -o yaml > wzh-performanceprofile.output.yaml
oc describe node/worker-0 > node.worker-0.output
oc describe mcp/worker-rt > mcp.worker-rt.output
# if host device can't release ip address
cd /var/lib/cni/networks/host-device-du
rm -f 192.168.12.105
profile creator
https://docs.openshift.com/container-platform/4.9/scalability_and_performance/cnf-create-performance-profiles.html
# on helper
oc adm must-gather --image=quay.io/openshift-kni/performance-addon-operator-must-gather:4.9-snapshot --dest-dir=must-gather
podman run --rm --entrypoint performance-profile-creator quay.io/openshift-kni/performance-addon-operator:4.9-snapshot -h
podman run --rm --entrypoint performance-profile-creator -v /data/install/tmp/must-gather:/must-gather:z quay.io/openshift-kni/performance-addon-operator:4.9-snapshot --mcp-name=worker-rt --reserved-cpu-count=2 --topology-manager-policy=single-numa-node --rt-kernel=true --profile-name=wzh-performanceprofile --power-consumption-mode=ultra-low-latency --disable-ht=true --must-gather-dir-path /must-gather > my-performance-profile.yaml
# ---
# apiVersion: performance.openshift.io/v2
# kind: PerformanceProfile
# metadata:
# name: wzh-performanceprofile
# spec:
# additionalKernelArgs:
# - audit=0
# - idle=poll
# - intel_idle.max_cstate=0
# - mce=off
# - nmi_watchdog=0
# - nosmt
# - processor.max_cstate=1
# cpu:
# isolated: 2-19
# reserved: 0-1
# machineConfigPoolSelector:
# machineconfiguration.openshift.io/role: worker-rt
# nodeSelector:
# node-role.kubernetes.io/worker-rt: ""
# numa:
# topologyPolicy: single-numa-node
# realTimeKernel:
# enabled: true