numa

https://docs.openshift.com/container-platform/4.3/scalability_and_performance/using-topology-manager.html#topology_manager_policies_using-topology-manager

https://www.sharcnet.ca/help/index.php/Using_numactl

video

  • https://youtu.be/J2VQQZxk3eY
  • https://www.bilibili.com/video/BV1HK4y1r7Di/
oc get featuregate/cluster -o yaml oc patch featuregate/cluster -p '{"spec": { "featureSet": "LatencySensitive" } }' --type=merge oc get KubeletConfig -o yaml cat << EOF > cpumanager-kubeletconfig.yaml apiVersion: machineconfiguration.openshift.io/v1 kind: KubeletConfig metadata: name: cpumanager-enabled spec: machineConfigPoolSelector: matchLabels: custom-kubelet: cpumanager-enabled kubeletConfig: cpuManagerPolicy: static cpuManagerReconcilePeriod: 5s topologyManagerPolicy: single-numa-node EOF oc apply -f cpumanager-kubeletconfig.yaml oc project demo cat << EOF > cpumanager-pod.yaml apiVersion: v1 kind: Pod metadata: generateName: cpumanager- spec: containers: - name: cpumanager image: gcr.io/google_containers/pause-amd64:3.0 resources: requests: cpu: 1 memory: "1G" limits: cpu: 1 memory: "1G" nodeSelector: cpumanager: "true" EOF oc apply -f cpumanager-pod.yaml # on the worker node yum install numactl # 指定命令运行在NUMA NODE0上(CPU,内存都来自NUMA NODE0) numactl --cpunodebind=0 --membind=0 COMMAND # 指定命令CPU来自NUMA NODE1,内存尽可能来自NUMA NODE1,如果NUMA NODE1没有足够的内存了,则使用NUMA NODE0上的内存 numactl --cpunodebind=1 --preferred=1 COMMAND # 获取进程cpu的mask taskset -p <pid> # pid 26624's current affinity mask: ff 这个是没设置掩码 # 进程的memory信息可以通过命令获取 numastat <pid> # Per-node process memory usage (in MBs) for PID 26624 (firefox) # Node 0 Total # --------------- --------------- # Huge 0.00 0.00 # Heap 0.00 0.00 # Stack 0.08 0.08 # Private 208.50 208.50 # ---------------- --------------- --------------- # Total 208.58 208.58 # 类似于进程,在某个NUMA Node上占用多少内存 # 查询PCI网卡设备所在numa node cat /sys/class/net/<devicename>/device/numa_node # back to normal cat << EOF > cpumanager-kubeletconfig.yaml apiVersion: machineconfiguration.openshift.io/v1 kind: KubeletConfig metadata: name: cpumanager-enabled spec: machineConfigPoolSelector: matchLabels: custom-kubelet: cpumanager-enabled kubeletConfig: cpuManagerPolicy: static cpuManagerReconcilePeriod: 5s topologyManagerPolicy: none EOF oc apply -f cpumanager-kubeletconfig.yaml # delete them all oc delete -f cpumanager-kubeletconfig.yaml