numa
https://docs.openshift.com/container-platform/4.3/scalability_and_performance/using-topology-manager.html#topology_manager_policies_using-topology-manager
https://www.sharcnet.ca/help/index.php/Using_numactl
video
- https://youtu.be/J2VQQZxk3eY
- https://www.bilibili.com/video/BV1HK4y1r7Di/
oc get featuregate/cluster -o yaml
oc patch featuregate/cluster -p '{"spec": { "featureSet": "LatencySensitive" } }' --type=merge
oc get KubeletConfig -o yaml
cat << EOF > cpumanager-kubeletconfig.yaml
apiVersion: machineconfiguration.openshift.io/v1
kind: KubeletConfig
metadata:
name: cpumanager-enabled
spec:
machineConfigPoolSelector:
matchLabels:
custom-kubelet: cpumanager-enabled
kubeletConfig:
cpuManagerPolicy: static
cpuManagerReconcilePeriod: 5s
topologyManagerPolicy: single-numa-node
EOF
oc apply -f cpumanager-kubeletconfig.yaml
oc project demo
cat << EOF > cpumanager-pod.yaml
apiVersion: v1
kind: Pod
metadata:
generateName: cpumanager-
spec:
containers:
- name: cpumanager
image: gcr.io/google_containers/pause-amd64:3.0
resources:
requests:
cpu: 1
memory: "1G"
limits:
cpu: 1
memory: "1G"
nodeSelector:
cpumanager: "true"
EOF
oc apply -f cpumanager-pod.yaml
# on the worker node
yum install numactl
# 指定命令运行在NUMA NODE0上(CPU,内存都来自NUMA NODE0)
numactl --cpunodebind=0 --membind=0 COMMAND
# 指定命令CPU来自NUMA NODE1,内存尽可能来自NUMA NODE1,如果NUMA NODE1没有足够的内存了,则使用NUMA NODE0上的内存
numactl --cpunodebind=1 --preferred=1 COMMAND
# 获取进程cpu的mask
taskset -p <pid>
# pid 26624's current affinity mask: ff 这个是没设置掩码
# 进程的memory信息可以通过命令获取
numastat <pid>
# Per-node process memory usage (in MBs) for PID 26624 (firefox)
# Node 0 Total
# --------------- ---------------
# Huge 0.00 0.00
# Heap 0.00 0.00
# Stack 0.08 0.08
# Private 208.50 208.50
# ---------------- --------------- ---------------
# Total 208.58 208.58
# 类似于进程,在某个NUMA Node上占用多少内存
# 查询PCI网卡设备所在numa node
cat /sys/class/net/<devicename>/device/numa_node
# back to normal
cat << EOF > cpumanager-kubeletconfig.yaml
apiVersion: machineconfiguration.openshift.io/v1
kind: KubeletConfig
metadata:
name: cpumanager-enabled
spec:
machineConfigPoolSelector:
matchLabels:
custom-kubelet: cpumanager-enabled
kubeletConfig:
cpuManagerPolicy: static
cpuManagerReconcilePeriod: 5s
topologyManagerPolicy: none
EOF
oc apply -f cpumanager-kubeletconfig.yaml
# delete them all
oc delete -f cpumanager-kubeletconfig.yaml